feat(llm): add thinking parameter support for Ollama API calls
Add optional thinking parameter to the call_ollama function and pass it from the request. Also enhance timezone handling in prompt generation to support configurable timezone preferences.
This commit is contained in:
@@ -54,31 +54,36 @@ def _extract_message(response) -> tuple[str, str]:
|
||||
return content, thinking
|
||||
|
||||
|
||||
async def call_ollama(prompt: str, *, tag: str = "default", temperature: float = 0.7) -> dict:
|
||||
async def call_ollama(prompt: str, *, tag: str = "default", temperature: float = 0.7, thinking: str = None) -> dict:
|
||||
"""
|
||||
调用 Ollama API 并返回 content 和 thinking。
|
||||
"""
|
||||
start = time.perf_counter()
|
||||
start_dt = datetime.now()
|
||||
logger.info(
|
||||
"[LLM][%s] request model=%s host=%s prompt_chars=%d temp=%.2f",
|
||||
"[LLM][%s] request model=%s host=%s prompt_chars=%d temp=%.2f thinking=%s",
|
||||
tag,
|
||||
OLLAMA_MODEL,
|
||||
OLLAMA_HOST,
|
||||
len(prompt),
|
||||
temperature,
|
||||
thinking,
|
||||
)
|
||||
|
||||
try:
|
||||
response = await client.chat(
|
||||
model=OLLAMA_MODEL,
|
||||
messages=[{'role': 'user', 'content': prompt}],
|
||||
stream=False,
|
||||
options={
|
||||
kwargs = {
|
||||
"model": OLLAMA_MODEL,
|
||||
"messages": [{'role': 'user', 'content': prompt}],
|
||||
"stream": False,
|
||||
"options": {
|
||||
'temperature': temperature,
|
||||
'repeat_penalty': 1.1,
|
||||
},
|
||||
)
|
||||
}
|
||||
if thinking:
|
||||
kwargs["thinking"] = thinking
|
||||
|
||||
response = await client.chat(**kwargs)
|
||||
except Exception:
|
||||
elapsed_ms = (time.perf_counter() - start) * 1000
|
||||
end_dt = datetime.now()
|
||||
|
||||
@@ -94,7 +94,12 @@ async def create_completion(request: Request, req: CompletionRequest):
|
||||
thinking_level=req.model_thinking,
|
||||
preferences=req.user_preferences
|
||||
)
|
||||
result = await call_ollama(prompt, tag=f"{request_id}-primary", temperature=0.7)
|
||||
result = await call_ollama(
|
||||
prompt,
|
||||
tag=f"{request_id}-primary",
|
||||
temperature=0.7,
|
||||
thinking=req.model_thinking if req.model_thinking != "none" else None
|
||||
)
|
||||
|
||||
content = result["content"] or ""
|
||||
if not content.strip():
|
||||
|
||||
@@ -1,11 +1,27 @@
|
||||
from typing import Tuple
|
||||
from datetime import datetime, timezone, timedelta
|
||||
|
||||
def _get_current_datetime() -> str:
|
||||
now = datetime.now(timezone(timedelta(hours=8)))
|
||||
def _get_current_datetime(timezone_pref: str = "auto") -> str:
|
||||
# Default to UTC+8 if auto or not specified
|
||||
offset = 8
|
||||
tz_info = " (UTC+8)"
|
||||
|
||||
if timezone_pref and timezone_pref != 'auto':
|
||||
# Try to parse something like "UTC+8" or "GMT+8"
|
||||
import re
|
||||
match = re.search(r'([+-])(\d+)', timezone_pref)
|
||||
if match:
|
||||
sign = match.group(1)
|
||||
hours = int(match.group(2))
|
||||
offset = hours if sign == '+' else -hours
|
||||
tz_info = f" ({timezone_pref})"
|
||||
else:
|
||||
tz_info = f" ({timezone_pref})"
|
||||
|
||||
now = datetime.now(timezone(timedelta(hours=offset)))
|
||||
weekdays = ["星期一", "星期二", "星期三", "星期四", "星期五", "星期六", "星期日"]
|
||||
weekday = weekdays[now.weekday()]
|
||||
return f"{now.year}年{now.month}月{now.day}日 {weekday} {now.hour:02d}:{now.minute:02d}:{now.second:02d}"
|
||||
return f"{now.year}年{now.month}月{now.day}日 {weekday} {now.hour:02d}:{now.minute:02d}:{now.second:02d}{tz_info}"
|
||||
|
||||
def _sanitize_language_id(language_id: str) -> str:
|
||||
if not language_id:
|
||||
@@ -39,23 +55,16 @@ def build_prompt(
|
||||
) -> str:
|
||||
safe_language_id = _sanitize_language_id(language_id)
|
||||
recent_prefix, recent_suffix = _prepare_context(prefix, suffix)
|
||||
current_time = _get_current_datetime()
|
||||
tz_pref = preferences.timezone if preferences else "auto"
|
||||
current_time = _get_current_datetime(tz_pref)
|
||||
location_info = f"\nUser location: {location}" if location else ""
|
||||
|
||||
thinking_instruction = ""
|
||||
if thinking_level == "medium":
|
||||
thinking_instruction = "\n- Briefly analyze the context before suggesting."
|
||||
elif thinking_level == "high":
|
||||
thinking_instruction = "\n- Deeply analyze the context, structure, and intent before suggesting. Think step-by-step."
|
||||
|
||||
pref_info = []
|
||||
if preferences:
|
||||
if preferences.language and preferences.language != 'auto':
|
||||
pref_info.append(f"Preferred language: {preferences.language}")
|
||||
if preferences.currency and preferences.currency != 'auto':
|
||||
pref_info.append(f"Preferred currency: {preferences.currency}")
|
||||
if preferences.timezone and preferences.timezone != 'auto':
|
||||
pref_info.append(f"User timezone: {preferences.timezone}")
|
||||
|
||||
preferences_instruction = "\n".join(pref_info)
|
||||
if preferences_instruction:
|
||||
@@ -68,7 +77,7 @@ You are an inline completion engine for a {safe_language_id} editor with ghost-t
|
||||
Your job:
|
||||
- Return ONLY the text that should be inserted at the cursor between PREFIX and SUFFIX.
|
||||
- Prefer a meaningful, non-empty insertion with moderate length.
|
||||
- Avoid overly short outputs with little information value.{thinking_instruction}
|
||||
- Avoid overly short outputs with little information value.
|
||||
|
||||
Important context:
|
||||
- PREFIX may contain OCR metadata inline after images, e.g.  <OCR:description>.
|
||||
|
||||
Reference in New Issue
Block a user