Files
llm-in-text/backend/llm.py
“ydy0615” 7cddfaba30 feat(editor): add Markdown rendering for ghost text and optimize prompt system
- Implement Markdown parsing for ghost text using Milkdown parser
- Add support for both block and inline content in ghost text
- Refactor prompt system with comprehensive rules and examples
- Adjust LLM parameters: increase temperature to 0.7, add repeat_penalty
- Add CSS styles for formatted ghost text (bold, italic, code, links)
- Add documentation for Copilot prompt system and ghost text rendering
2026-02-13 21:17:45 +08:00

53 lines
1.7 KiB
Python

import os
import json
import ollama
from typing import AsyncGenerator
OLLAMA_MODEL = os.getenv('OLLAMA_MODEL', 'gpt-oss:20b')
OLLAMA_HOST = os.getenv('OLLAMA_HOST', 'http://192.168.0.120:11434')
if OLLAMA_HOST.endswith('/v1/'):
OLLAMA_HOST = OLLAMA_HOST[:-4]
elif OLLAMA_HOST.endswith('/v1'):
OLLAMA_HOST = OLLAMA_HOST[:-3]
os.environ['OLLAMA_HOST'] = OLLAMA_HOST
print(f"[LLM] Ollama host: {OLLAMA_HOST}")
print(f"[LLM] Model: {OLLAMA_MODEL}")
client = ollama.AsyncClient(host=OLLAMA_HOST)
async def stream_openai(prompt: str) -> AsyncGenerator[str, None]:
print(f"[LLM] Calling Ollama API with prompt length: {len(prompt)}")
try:
print(f"[LLM] Awaiting client.chat...")
stream = await client.chat(
model=OLLAMA_MODEL,
messages=[{'role': 'user', 'content': prompt}],
stream=True,
options={
'temperature': 0.7,
'repeat_penalty': 1.1,
},
think='high'
)
print(f"[LLM] Got stream object, starting iteration...")
chunk_count = 0
async for chunk in stream:
if chunk['message'] and chunk['message']['content']:
content = chunk['message']['content']
chunk_count += 1
print(f"[LLM] Chunk {chunk_count}: {content}")
yield json.dumps({"content": content})
print(f"[LLM] Stream complete, total chunks: {chunk_count}")
except Exception as e:
error_msg = f"Error: {str(e)}"
print(f"[LLM] Error: {error_msg}")
import traceback
traceback.print_exc()
yield json.dumps({"error": str(e)})