feat(llm): add thinking parameter support for Ollama API calls

Add optional thinking parameter to the call_ollama function and pass it from the request. Also enhance timezone handling in prompt generation to support configurable timezone preferences.
This commit is contained in:
2026-02-19 10:34:31 +08:00
parent aa6133e3ed
commit 065b4ac319
3 changed files with 41 additions and 22 deletions

View File

@@ -54,31 +54,36 @@ def _extract_message(response) -> tuple[str, str]:
return content, thinking
async def call_ollama(prompt: str, *, tag: str = "default", temperature: float = 0.7) -> dict:
async def call_ollama(prompt: str, *, tag: str = "default", temperature: float = 0.7, thinking: str = None) -> dict:
"""
调用 Ollama API 并返回 content 和 thinking。
"""
start = time.perf_counter()
start_dt = datetime.now()
logger.info(
"[LLM][%s] request model=%s host=%s prompt_chars=%d temp=%.2f",
"[LLM][%s] request model=%s host=%s prompt_chars=%d temp=%.2f thinking=%s",
tag,
OLLAMA_MODEL,
OLLAMA_HOST,
len(prompt),
temperature,
thinking,
)
try:
response = await client.chat(
model=OLLAMA_MODEL,
messages=[{'role': 'user', 'content': prompt}],
stream=False,
options={
kwargs = {
"model": OLLAMA_MODEL,
"messages": [{'role': 'user', 'content': prompt}],
"stream": False,
"options": {
'temperature': temperature,
'repeat_penalty': 1.1,
},
)
}
if thinking:
kwargs["thinking"] = thinking
response = await client.chat(**kwargs)
except Exception:
elapsed_ms = (time.perf_counter() - start) * 1000
end_dt = datetime.now()

View File

@@ -94,7 +94,12 @@ async def create_completion(request: Request, req: CompletionRequest):
thinking_level=req.model_thinking,
preferences=req.user_preferences
)
result = await call_ollama(prompt, tag=f"{request_id}-primary", temperature=0.7)
result = await call_ollama(
prompt,
tag=f"{request_id}-primary",
temperature=0.7,
thinking=req.model_thinking if req.model_thinking != "none" else None
)
content = result["content"] or ""
if not content.strip():

View File

@@ -1,11 +1,27 @@
from typing import Tuple
from datetime import datetime, timezone, timedelta
def _get_current_datetime() -> str:
now = datetime.now(timezone(timedelta(hours=8)))
def _get_current_datetime(timezone_pref: str = "auto") -> str:
# Default to UTC+8 if auto or not specified
offset = 8
tz_info = " (UTC+8)"
if timezone_pref and timezone_pref != 'auto':
# Try to parse something like "UTC+8" or "GMT+8"
import re
match = re.search(r'([+-])(\d+)', timezone_pref)
if match:
sign = match.group(1)
hours = int(match.group(2))
offset = hours if sign == '+' else -hours
tz_info = f" ({timezone_pref})"
else:
tz_info = f" ({timezone_pref})"
now = datetime.now(timezone(timedelta(hours=offset)))
weekdays = ["星期一", "星期二", "星期三", "星期四", "星期五", "星期六", "星期日"]
weekday = weekdays[now.weekday()]
return f"{now.year}{now.month}{now.day}{weekday} {now.hour:02d}:{now.minute:02d}:{now.second:02d}"
return f"{now.year}{now.month}{now.day}{weekday} {now.hour:02d}:{now.minute:02d}:{now.second:02d}{tz_info}"
def _sanitize_language_id(language_id: str) -> str:
if not language_id:
@@ -39,23 +55,16 @@ def build_prompt(
) -> str:
safe_language_id = _sanitize_language_id(language_id)
recent_prefix, recent_suffix = _prepare_context(prefix, suffix)
current_time = _get_current_datetime()
tz_pref = preferences.timezone if preferences else "auto"
current_time = _get_current_datetime(tz_pref)
location_info = f"\nUser location: {location}" if location else ""
thinking_instruction = ""
if thinking_level == "medium":
thinking_instruction = "\n- Briefly analyze the context before suggesting."
elif thinking_level == "high":
thinking_instruction = "\n- Deeply analyze the context, structure, and intent before suggesting. Think step-by-step."
pref_info = []
if preferences:
if preferences.language and preferences.language != 'auto':
pref_info.append(f"Preferred language: {preferences.language}")
if preferences.currency and preferences.currency != 'auto':
pref_info.append(f"Preferred currency: {preferences.currency}")
if preferences.timezone and preferences.timezone != 'auto':
pref_info.append(f"User timezone: {preferences.timezone}")
preferences_instruction = "\n".join(pref_info)
if preferences_instruction:
@@ -68,7 +77,7 @@ You are an inline completion engine for a {safe_language_id} editor with ghost-t
Your job:
- Return ONLY the text that should be inserted at the cursor between PREFIX and SUFFIX.
- Prefer a meaningful, non-empty insertion with moderate length.
- Avoid overly short outputs with little information value.{thinking_instruction}
- Avoid overly short outputs with little information value.
Important context:
- PREFIX may contain OCR metadata inline after images, e.g. ![alt](url) <OCR:description>.