llm-in-text/backend/llm.py

import os
import ollama
from dotenv import load_dotenv

load_dotenv()

OLLAMA_MODEL = os.getenv('OLLAMA_MODEL', 'gpt-oss:20b')
OLLAMA_HOST = os.getenv('OLLAMA_HOST', 'http://192.168.0.120:11434')

client = ollama.AsyncClient(host=OLLAMA_HOST)

async def call_ollama(prompt: str) -> dict:
    """
    调用 Ollama API 并返回 content 和 thinking。
    """
    response = await client.chat(
        model=OLLAMA_MODEL,
        messages=[{'role': 'user', 'content': prompt}],
        stream=False,
        options={
            'temperature': 0.7,
            'repeat_penalty': 1.1,
        },
        think='high'
    )

    content = ""
    thinking = ""

    if hasattr(response, 'message') and response.message:
        content = response.message.content or ""
        thinking = getattr(response.message, 'thinking', '') or ""
    elif isinstance(response, dict):
        msg = response.get('message', {})
        content = msg.get('content', '') or ""
        thinking = msg.get('thinking', '') or ""

    return {"content": content, "thinking": thinking}