From 065b4ac3198da96f281dc5be01c88435841f3473 Mon Sep 17 00:00:00 2001 From: ydy0615 Date: Thu, 19 Feb 2026 10:34:31 +0800 Subject: [PATCH] feat(llm): add thinking parameter support for Ollama API calls Add optional thinking parameter to the call_ollama function and pass it from the request. Also enhance timezone handling in prompt generation to support configurable timezone preferences. --- backend/llm.py | 21 +++++++++++++-------- backend/main.py | 7 ++++++- backend/prompt.py | 35 ++++++++++++++++++++++------------- 3 files changed, 41 insertions(+), 22 deletions(-) diff --git a/backend/llm.py b/backend/llm.py index 4f88721..e4aeb2d 100644 --- a/backend/llm.py +++ b/backend/llm.py @@ -54,31 +54,36 @@ def _extract_message(response) -> tuple[str, str]: return content, thinking -async def call_ollama(prompt: str, *, tag: str = "default", temperature: float = 0.7) -> dict: +async def call_ollama(prompt: str, *, tag: str = "default", temperature: float = 0.7, thinking: str = None) -> dict: """ 调用 Ollama API 并返回 content 和 thinking。 """ start = time.perf_counter() start_dt = datetime.now() logger.info( - "[LLM][%s] request model=%s host=%s prompt_chars=%d temp=%.2f", + "[LLM][%s] request model=%s host=%s prompt_chars=%d temp=%.2f thinking=%s", tag, OLLAMA_MODEL, OLLAMA_HOST, len(prompt), temperature, + thinking, ) try: - response = await client.chat( - model=OLLAMA_MODEL, - messages=[{'role': 'user', 'content': prompt}], - stream=False, - options={ + kwargs = { + "model": OLLAMA_MODEL, + "messages": [{'role': 'user', 'content': prompt}], + "stream": False, + "options": { 'temperature': temperature, 'repeat_penalty': 1.1, }, - ) + } + if thinking: + kwargs["thinking"] = thinking + + response = await client.chat(**kwargs) except Exception: elapsed_ms = (time.perf_counter() - start) * 1000 end_dt = datetime.now() diff --git a/backend/main.py b/backend/main.py index fc6f399..b8a9884 100644 --- a/backend/main.py +++ b/backend/main.py @@ -94,7 +94,12 @@ async def create_completion(request: Request, req: CompletionRequest): thinking_level=req.model_thinking, preferences=req.user_preferences ) - result = await call_ollama(prompt, tag=f"{request_id}-primary", temperature=0.7) + result = await call_ollama( + prompt, + tag=f"{request_id}-primary", + temperature=0.7, + thinking=req.model_thinking if req.model_thinking != "none" else None + ) content = result["content"] or "" if not content.strip(): diff --git a/backend/prompt.py b/backend/prompt.py index 4acfeff..6c35132 100644 --- a/backend/prompt.py +++ b/backend/prompt.py @@ -1,11 +1,27 @@ from typing import Tuple from datetime import datetime, timezone, timedelta -def _get_current_datetime() -> str: - now = datetime.now(timezone(timedelta(hours=8))) +def _get_current_datetime(timezone_pref: str = "auto") -> str: + # Default to UTC+8 if auto or not specified + offset = 8 + tz_info = " (UTC+8)" + + if timezone_pref and timezone_pref != 'auto': + # Try to parse something like "UTC+8" or "GMT+8" + import re + match = re.search(r'([+-])(\d+)', timezone_pref) + if match: + sign = match.group(1) + hours = int(match.group(2)) + offset = hours if sign == '+' else -hours + tz_info = f" ({timezone_pref})" + else: + tz_info = f" ({timezone_pref})" + + now = datetime.now(timezone(timedelta(hours=offset))) weekdays = ["星期一", "星期二", "星期三", "星期四", "星期五", "星期六", "星期日"] weekday = weekdays[now.weekday()] - return f"{now.year}年{now.month}月{now.day}日 {weekday} {now.hour:02d}:{now.minute:02d}:{now.second:02d}" + return f"{now.year}年{now.month}月{now.day}日 {weekday} {now.hour:02d}:{now.minute:02d}:{now.second:02d}{tz_info}" def _sanitize_language_id(language_id: str) -> str: if not language_id: @@ -39,23 +55,16 @@ def build_prompt( ) -> str: safe_language_id = _sanitize_language_id(language_id) recent_prefix, recent_suffix = _prepare_context(prefix, suffix) - current_time = _get_current_datetime() + tz_pref = preferences.timezone if preferences else "auto" + current_time = _get_current_datetime(tz_pref) location_info = f"\nUser location: {location}" if location else "" - thinking_instruction = "" - if thinking_level == "medium": - thinking_instruction = "\n- Briefly analyze the context before suggesting." - elif thinking_level == "high": - thinking_instruction = "\n- Deeply analyze the context, structure, and intent before suggesting. Think step-by-step." - pref_info = [] if preferences: if preferences.language and preferences.language != 'auto': pref_info.append(f"Preferred language: {preferences.language}") if preferences.currency and preferences.currency != 'auto': pref_info.append(f"Preferred currency: {preferences.currency}") - if preferences.timezone and preferences.timezone != 'auto': - pref_info.append(f"User timezone: {preferences.timezone}") preferences_instruction = "\n".join(pref_info) if preferences_instruction: @@ -68,7 +77,7 @@ You are an inline completion engine for a {safe_language_id} editor with ghost-t Your job: - Return ONLY the text that should be inserted at the cursor between PREFIX and SUFFIX. - Prefer a meaningful, non-empty insertion with moderate length. -- Avoid overly short outputs with little information value.{thinking_instruction} +- Avoid overly short outputs with little information value. Important context: - PREFIX may contain OCR metadata inline after images, e.g. ![alt](url) .