Files
llm-in-text/backend/main.py
“ydy0615” 65d4a57d33 refactor(editor): migrate to ProseMirror Mark-based ghost text system
- Replace overlay-based GhostTextOverlay.vue with ProseMirror Mark system
- Add AI toggle button with enable/disable functionality
- Implement new copilotPlugin.ts using copilotGhostMark for inline suggestions
- Fix cursor position offset in prompt.py by moving first suffix char to prefix
- Improve API error handling with abort signal support and debug logging
- Update model configuration from gpt-oss:120b to gpt-oss:20b
- Add button tooltips and improve editor styling
- Remove deprecated inlineSuggestionPlugin.ts
- Update README with new architecture diagram and feature documentation
2026-02-13 09:24:50 +08:00

143 lines
5.4 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
from fastapi import FastAPI, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import StreamingResponse, JSONResponse
from pydantic import BaseModel
import os
import json
import re
app = FastAPI()
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
class CompletionRequest(BaseModel):
prefix: str
suffix: str
languageId: str = 'markdown'
def extract_completion_from_thinking(thinking: str) -> str:
"""
从模型的 thinking 输出中提取实际的续写内容。
移除推理过程,保留实际的续写。
"""
if not thinking:
return ""
# 尝试找到实际的续写内容
# 模型通常会在 thinking 中描述上下文,然后输出实际续写
# 常见的模式是:推理过程以描述开始,然后直接输出续写
# 查找 "Continuation:" 或类似标记之后的内容
continuation_match = re.search(r'Continuation[:\s]*([\s\S]*)', thinking, re.IGNORECASE)
if continuation_match:
result = continuation_match.group(1).strip()
# 移除可能的后续推理说明
result = re.sub(r'\s*It seems like.*$', '', result, flags=re.IGNORECASE)
return result.strip()
# 如果没有明确标记,尝试移除描述性内容
# 查找 "We need to continue" 或类似开头
continue_match = re.search(r'(?:We need to|Then we should|So we|I will|The|Thus)[,\s]+([A-Z][^.!?]*(?:[.!?]|$))', thinking)
if continue_match:
# 取找到的句子及其后续内容
start_idx = continue_match.start(1)
result = thinking[start_idx:].strip()
# 移除 "Probably " 开头及其后续内容
result = re.sub(r'^Probably\s+', '', result)
# 如果有 "It seems like" 或类似短语,截断
result = re.split(r'\s*It seems like\s', result, flags=re.IGNORECASE)[0]
return result.strip()
# 最后的策略:直接返回 thinking移除末尾的推理说明
result = thinking.strip()
# 移除 "Probably" 及其后续内容
result = re.split(r'\s+Probably\s', result, flags=re.IGNORECASE, maxsplit=1)[0]
# 移除 "The instruction:" 及其后续内容
result = re.split(r'\s+The instruction:', result, flags=re.IGNORECASE, maxsplit=1)[0]
return result.strip()
@app.post("/v1/completions")
async def create_completion(request: CompletionRequest):
from prompt import build_prompt
import ollama
print(f"[Backend] POST /v1/completions called")
print(f"[Backend] Received request - prefix length: {len(request.prefix)}, suffix length: {len(request.suffix)}")
OLLAMA_MODEL = os.getenv('OLLAMA_MODEL', 'gpt-oss:20b')
OLLAMA_HOST = os.getenv('OLLAMA_HOST', 'http://192.168.0.120:11434')
print(f"[LLM] Using host: {OLLAMA_HOST}, model: {OLLAMA_MODEL}")
try:
prompt = build_prompt(request.prefix, request.suffix)
print(f"[Backend] Built prompt (first 100 chars): {prompt[:100]}...")
print(f"[LLM] Full prompt:\n{prompt}\n")
# 使用非流式 API 获取完整响应
print(f"[LLM] Calling Ollama API (non-streaming)...")
client = ollama.AsyncClient(host=OLLAMA_HOST)
response = await client.chat(
model=OLLAMA_MODEL,
messages=[{'role': 'user', 'content': prompt}],
stream=False,
options={
'num_predict': 8192,
'temperature': 0.2,
}
)
print(f"[LLM] Response type: {type(response)}")
# 提取 content 和 thinking
content = ""
thinking = ""
if hasattr(response, 'message') and response.message:
content = response.message.content or ""
thinking = getattr(response.message, 'thinking', '') or ""
elif isinstance(response, dict):
msg = response.get('message', {})
content = msg.get('content', '') or ""
thinking = msg.get('thinking', '') or ""
print(f"[LLM] Original content: {repr(content[:100] if content else '')}...")
print(f"[LLM] Thinking length: {len(thinking)}")
print(f"[LLM] Thinking (first 200): {thinking[:200]}...")
# 如果 content 为空,尝试从 thinking 中提取
if not content and thinking:
print(f"[LLM] Content is empty, extracting from thinking...")
content = extract_completion_from_thinking(thinking)
print(f"[LLM] Extracted completion: {repr(content[:100])}...")
print(f"[LLM] Final content length: {len(content)}")
# 返回完整内容
async def generate():
if content:
print(f"[LLM] Yielding full content: {repr(content)}")
yield f"data: {json.dumps({'content': content})}\n\n"
yield f"data: {json.dumps({'done': True})}\n\n"
return StreamingResponse(generate(), media_type="text/event-stream")
except Exception as e:
error_msg = f"{{\"error\": \"{str(e)}\"}}"
print(f"[Backend] Error: {e}")
import traceback
traceback.print_exc()
return JSONResponse(content={"error": str(e)}, status_code=500)
if __name__ == "__main__":
import uvicorn
print("[Backend] Starting server on http://0.0.0.0:8000")
uvicorn.run(app, host="0.0.0.0", port=8000)