From 065b4ac3198da96f281dc5be01c88435841f3473 Mon Sep 17 00:00:00 2001
From: ydy0615 <allenyuan410@gmail.com>
Date: Thu, 19 Feb 2026 10:34:31 +0800
Subject: [PATCH] feat(llm): add thinking parameter support for Ollama API
 calls

Add optional thinking parameter to the call_ollama function and pass it from the request. Also enhance timezone handling in prompt generation to support configurable timezone preferences.
---
 backend/llm.py    | 21 +++++++++++++--------
 backend/main.py   |  7 ++++++-
 backend/prompt.py | 35 ++++++++++++++++++++++-------------
 3 files changed, 41 insertions(+), 22 deletions(-)

diff --git a/backend/llm.py b/backend/llm.py
index 4f88721..e4aeb2d 100644
--- a/backend/llm.py
+++ b/backend/llm.py
@@ -54,31 +54,36 @@ def _extract_message(response) -> tuple[str, str]:
     return content, thinking
 
 
-async def call_ollama(prompt: str, *, tag: str = "default", temperature: float = 0.7) -> dict:
+async def call_ollama(prompt: str, *, tag: str = "default", temperature: float = 0.7, thinking: str = None) -> dict:
     """
     调用 Ollama API 并返回 content 和 thinking。
     """
     start = time.perf_counter()
     start_dt = datetime.now()
     logger.info(
-        "[LLM][%s] request model=%s host=%s prompt_chars=%d temp=%.2f",
+        "[LLM][%s] request model=%s host=%s prompt_chars=%d temp=%.2f thinking=%s",
         tag,
         OLLAMA_MODEL,
         OLLAMA_HOST,
         len(prompt),
         temperature,
+        thinking,
     )
 
     try:
-        response = await client.chat(
-            model=OLLAMA_MODEL,
-            messages=[{'role': 'user', 'content': prompt}],
-            stream=False,
-            options={
+        kwargs = {
+            "model": OLLAMA_MODEL,
+            "messages": [{'role': 'user', 'content': prompt}],
+            "stream": False,
+            "options": {
                 'temperature': temperature,
                 'repeat_penalty': 1.1,
             },
-        )
+        }
+        if thinking:
+            kwargs["thinking"] = thinking
+
+        response = await client.chat(**kwargs)
     except Exception:
         elapsed_ms = (time.perf_counter() - start) * 1000
         end_dt = datetime.now()
diff --git a/backend/main.py b/backend/main.py
index fc6f399..b8a9884 100644
--- a/backend/main.py
+++ b/backend/main.py
@@ -94,7 +94,12 @@ async def create_completion(request: Request, req: CompletionRequest):
             thinking_level=req.model_thinking,
             preferences=req.user_preferences
         )
-        result = await call_ollama(prompt, tag=f"{request_id}-primary", temperature=0.7)
+        result = await call_ollama(
+            prompt, 
+            tag=f"{request_id}-primary", 
+            temperature=0.7,
+            thinking=req.model_thinking if req.model_thinking != "none" else None
+        )
 
         content = result["content"] or ""
         if not content.strip():
diff --git a/backend/prompt.py b/backend/prompt.py
index 4acfeff..6c35132 100644
--- a/backend/prompt.py
+++ b/backend/prompt.py
@@ -1,11 +1,27 @@
 from typing import Tuple
 from datetime import datetime, timezone, timedelta
 
-def _get_current_datetime() -> str:
-    now = datetime.now(timezone(timedelta(hours=8)))
+def _get_current_datetime(timezone_pref: str = "auto") -> str:
+    # Default to UTC+8 if auto or not specified
+    offset = 8
+    tz_info = " (UTC+8)"
+    
+    if timezone_pref and timezone_pref != 'auto':
+        # Try to parse something like "UTC+8" or "GMT+8"
+        import re
+        match = re.search(r'([+-])(\d+)', timezone_pref)
+        if match:
+            sign = match.group(1)
+            hours = int(match.group(2))
+            offset = hours if sign == '+' else -hours
+            tz_info = f" ({timezone_pref})"
+        else:
+            tz_info = f" ({timezone_pref})"
+
+    now = datetime.now(timezone(timedelta(hours=offset)))
     weekdays = ["星期一", "星期二", "星期三", "星期四", "星期五", "星期六", "星期日"]
     weekday = weekdays[now.weekday()]
-    return f"{now.year}年{now.month}月{now.day}日 {weekday} {now.hour:02d}:{now.minute:02d}:{now.second:02d}"
+    return f"{now.year}年{now.month}月{now.day}日 {weekday} {now.hour:02d}:{now.minute:02d}:{now.second:02d}{tz_info}"
 
 def _sanitize_language_id(language_id: str) -> str:
     if not language_id:
@@ -39,23 +55,16 @@ def build_prompt(
 ) -> str:
     safe_language_id = _sanitize_language_id(language_id)
     recent_prefix, recent_suffix = _prepare_context(prefix, suffix)
-    current_time = _get_current_datetime()
+    tz_pref = preferences.timezone if preferences else "auto"
+    current_time = _get_current_datetime(tz_pref)
     location_info = f"\nUser location: {location}" if location else ""
     
-    thinking_instruction = ""
-    if thinking_level == "medium":
-        thinking_instruction = "\n- Briefly analyze the context before suggesting."
-    elif thinking_level == "high":
-        thinking_instruction = "\n- Deeply analyze the context, structure, and intent before suggesting. Think step-by-step."
-
     pref_info = []
     if preferences:
         if preferences.language and preferences.language != 'auto':
             pref_info.append(f"Preferred language: {preferences.language}")
         if preferences.currency and preferences.currency != 'auto':
             pref_info.append(f"Preferred currency: {preferences.currency}")
-        if preferences.timezone and preferences.timezone != 'auto':
-            pref_info.append(f"User timezone: {preferences.timezone}")
     
     preferences_instruction = "\n".join(pref_info)
     if preferences_instruction:
@@ -68,7 +77,7 @@ You are an inline completion engine for a {safe_language_id} editor with ghost-t
 Your job:
 - Return ONLY the text that should be inserted at the cursor between PREFIX and SUFFIX.
 - Prefer a meaningful, non-empty insertion with moderate length.
-- Avoid overly short outputs with little information value.{thinking_instruction}
+- Avoid overly short outputs with little information value.
 
 Important context:
 - PREFIX may contain OCR metadata inline after images, e.g. ![alt](url) <OCR:description>.