Files
llm-in-text/backend/llm.py
“ydy0615” 65d4a57d33 refactor(editor): migrate to ProseMirror Mark-based ghost text system
- Replace overlay-based GhostTextOverlay.vue with ProseMirror Mark system
- Add AI toggle button with enable/disable functionality
- Implement new copilotPlugin.ts using copilotGhostMark for inline suggestions
- Fix cursor position offset in prompt.py by moving first suffix char to prefix
- Improve API error handling with abort signal support and debug logging
- Update model configuration from gpt-oss:120b to gpt-oss:20b
- Add button tooltips and improve editor styling
- Remove deprecated inlineSuggestionPlugin.ts
- Update README with new architecture diagram and feature documentation
2026-02-13 09:24:50 +08:00

53 lines
1.7 KiB
Python

import os
import json
import ollama
from typing import AsyncGenerator
OLLAMA_MODEL = os.getenv('OLLAMA_MODEL', 'gpt-oss:20b')
OLLAMA_HOST = os.getenv('OLLAMA_HOST', 'http://192.168.0.120:11434')
# 移除 /v1/ 后缀(如果有的话),因为 Ollama Python 包使用原生 API
if OLLAMA_HOST.endswith('/v1/'):
OLLAMA_HOST = OLLAMA_HOST[:-4]
elif OLLAMA_HOST.endswith('/v1'):
OLLAMA_HOST = OLLAMA_HOST[:-3]
os.environ['OLLAMA_HOST'] = OLLAMA_HOST
print(f"[LLM] Ollama host: {OLLAMA_HOST}")
print(f"[LLM] Model: {OLLAMA_MODEL}")
client = ollama.AsyncClient(host=OLLAMA_HOST)
async def stream_openai(prompt: str) -> AsyncGenerator[str, None]:
print(f"[LLM] Calling Ollama API with prompt length: {len(prompt)}")
try:
print(f"[LLM] Awaiting client.chat...")
stream = await client.chat(
model=OLLAMA_MODEL,
messages=[{'role': 'user', 'content': prompt}],
stream=True,
options={
'num_predict': 8192,
'temperature': 0.2,
}
)
print(f"[LLM] Got stream object, starting iteration...")
chunk_count = 0
async for chunk in stream:
if chunk['message'] and chunk['message']['content']:
content = chunk['message']['content']
chunk_count += 1
print(f"[LLM] Chunk {chunk_count}: {content}")
yield json.dumps({"content": content})
print(f"[LLM] Stream complete, total chunks: {chunk_count}")
except Exception as e:
error_msg = f"Error: {str(e)}"
print(f"[LLM] Error: {error_msg}")
import traceback
traceback.print_exc()
yield json.dumps({"error": str(e)})