Files
llm-in-text/backend/llm.py

70 lines
2.6 KiB
Python
Raw Normal View History

import os
from typing import AsyncGenerator
from openai import AsyncOpenAI
import json
import time
api_key = os.getenv('OPENAI_API_KEY', 'ollama')
base_url = os.getenv('OLLAMA_BASE_URL', 'http://192.168.0.120:11434/v1/')
model = os.getenv('OLLAMA_MODEL', 'gpt-oss:120b')
print(f"[LLM] API key configured: {'Yes' if api_key else 'No'}")
print(f"[LLM] Base URL: {base_url}")
print(f"[LLM] Model: {model}")
client = AsyncOpenAI(api_key=api_key, base_url=base_url)
async def stream_openai(prompt: str) -> AsyncGenerator[str, None]:
"""
调用 OpenAI/Ollama API 并流式返回补全内容
参考 completions-sample-code streaming 逻辑
"""
start_time = time.time()
print(f"[LLM] ========== API Call Start ==========")
print(f"[LLM] Prompt length: {len(prompt)}")
print(f"[LLM] Model: {model}")
try:
print(f"[LLM] Creating streaming chat completion...")
stream = await client.chat.completions.create(
model=model,
messages=[{"role": "user", "content": prompt}],
stream=True,
max_tokens=128,
temperature=0.2,
)
print(f"[LLM] Stream created successfully, iterating...")
chunk_count = 0
first_chunk_time = None
async for chunk in stream:
current_time = time.time()
if first_chunk_time is None:
first_chunk_time = current_time - start_time
chunk_count += 1
choice = chunk.choices[0] if chunk.choices else None
if choice and choice.delta.content:
content = choice.delta.content
print(f"[LLM] Chunk {chunk_count}: '{content}' (latency: {current_time - start_time:.3f}s)")
yield json.dumps({"content": content})
elif chunk.choices and hasattr(chunk.choices[0], 'finish_reason'):
finish_reason = chunk.choices[0].finish_reason
print(f"[LLM] Chunk {chunk_count}: finish_reason={finish_reason}")
if finish_reason:
break
else:
print(f"[LLM] Chunk {chunk_count}: empty or no content")
total_time = time.time() - start_time
print(f"[LLM] Stream complete - chunks: {chunk_count}, first chunk latency: {first_chunk_time:.3f}s, total time: {total_time:.3f}s")
print(f"[LLM] ========== API Call End ==========")
except Exception as e:
error_msg = f"Error: {str(e)}"
print(f"[LLM] Error: {error_msg}")
import traceback
traceback.print_exc()
yield json.dumps({"error": str(e), "type": type(e).__name__})