- Replace overlay-based GhostTextOverlay.vue with ProseMirror Mark system - Add AI toggle button with enable/disable functionality - Implement new copilotPlugin.ts using copilotGhostMark for inline suggestions - Fix cursor position offset in prompt.py by moving first suffix char to prefix - Improve API error handling with abort signal support and debug logging - Update model configuration from gpt-oss:120b to gpt-oss:20b - Add button tooltips and improve editor styling - Remove deprecated inlineSuggestionPlugin.ts - Update README with new architecture diagram and feature documentation
143 lines
5.4 KiB
Python
143 lines
5.4 KiB
Python
from fastapi import FastAPI, HTTPException
|
||
from fastapi.middleware.cors import CORSMiddleware
|
||
from fastapi.responses import StreamingResponse, JSONResponse
|
||
from pydantic import BaseModel
|
||
import os
|
||
import json
|
||
import re
|
||
|
||
app = FastAPI()
|
||
|
||
app.add_middleware(
|
||
CORSMiddleware,
|
||
allow_origins=["*"],
|
||
allow_credentials=True,
|
||
allow_methods=["*"],
|
||
allow_headers=["*"],
|
||
)
|
||
|
||
class CompletionRequest(BaseModel):
|
||
prefix: str
|
||
suffix: str
|
||
languageId: str = 'markdown'
|
||
|
||
def extract_completion_from_thinking(thinking: str) -> str:
|
||
"""
|
||
从模型的 thinking 输出中提取实际的续写内容。
|
||
移除推理过程,保留实际的续写。
|
||
"""
|
||
if not thinking:
|
||
return ""
|
||
|
||
# 尝试找到实际的续写内容
|
||
# 模型通常会在 thinking 中描述上下文,然后输出实际续写
|
||
# 常见的模式是:推理过程以描述开始,然后直接输出续写
|
||
|
||
# 查找 "Continuation:" 或类似标记之后的内容
|
||
continuation_match = re.search(r'Continuation[:\s]*([\s\S]*)', thinking, re.IGNORECASE)
|
||
if continuation_match:
|
||
result = continuation_match.group(1).strip()
|
||
# 移除可能的后续推理说明
|
||
result = re.sub(r'\s*It seems like.*$', '', result, flags=re.IGNORECASE)
|
||
return result.strip()
|
||
|
||
# 如果没有明确标记,尝试移除描述性内容
|
||
# 查找 "We need to continue" 或类似开头
|
||
continue_match = re.search(r'(?:We need to|Then we should|So we|I will|The|Thus)[,\s]+([A-Z][^.!?]*(?:[.!?]|$))', thinking)
|
||
if continue_match:
|
||
# 取找到的句子及其后续内容
|
||
start_idx = continue_match.start(1)
|
||
result = thinking[start_idx:].strip()
|
||
# 移除 "Probably " 开头及其后续内容
|
||
result = re.sub(r'^Probably\s+', '', result)
|
||
# 如果有 "It seems like" 或类似短语,截断
|
||
result = re.split(r'\s*It seems like\s', result, flags=re.IGNORECASE)[0]
|
||
return result.strip()
|
||
|
||
# 最后的策略:直接返回 thinking,移除末尾的推理说明
|
||
result = thinking.strip()
|
||
# 移除 "Probably" 及其后续内容
|
||
result = re.split(r'\s+Probably\s', result, flags=re.IGNORECASE, maxsplit=1)[0]
|
||
# 移除 "The instruction:" 及其后续内容
|
||
result = re.split(r'\s+The instruction:', result, flags=re.IGNORECASE, maxsplit=1)[0]
|
||
|
||
return result.strip()
|
||
|
||
@app.post("/v1/completions")
|
||
async def create_completion(request: CompletionRequest):
|
||
from prompt import build_prompt
|
||
import ollama
|
||
|
||
print(f"[Backend] POST /v1/completions called")
|
||
print(f"[Backend] Received request - prefix length: {len(request.prefix)}, suffix length: {len(request.suffix)}")
|
||
|
||
OLLAMA_MODEL = os.getenv('OLLAMA_MODEL', 'gpt-oss:20b')
|
||
OLLAMA_HOST = os.getenv('OLLAMA_HOST', 'http://192.168.0.120:11434')
|
||
|
||
print(f"[LLM] Using host: {OLLAMA_HOST}, model: {OLLAMA_MODEL}")
|
||
|
||
try:
|
||
prompt = build_prompt(request.prefix, request.suffix)
|
||
print(f"[Backend] Built prompt (first 100 chars): {prompt[:100]}...")
|
||
print(f"[LLM] Full prompt:\n{prompt}\n")
|
||
|
||
# 使用非流式 API 获取完整响应
|
||
print(f"[LLM] Calling Ollama API (non-streaming)...")
|
||
client = ollama.AsyncClient(host=OLLAMA_HOST)
|
||
response = await client.chat(
|
||
model=OLLAMA_MODEL,
|
||
messages=[{'role': 'user', 'content': prompt}],
|
||
stream=False,
|
||
options={
|
||
'num_predict': 8192,
|
||
'temperature': 0.2,
|
||
}
|
||
)
|
||
|
||
print(f"[LLM] Response type: {type(response)}")
|
||
|
||
# 提取 content 和 thinking
|
||
content = ""
|
||
thinking = ""
|
||
|
||
if hasattr(response, 'message') and response.message:
|
||
content = response.message.content or ""
|
||
thinking = getattr(response.message, 'thinking', '') or ""
|
||
elif isinstance(response, dict):
|
||
msg = response.get('message', {})
|
||
content = msg.get('content', '') or ""
|
||
thinking = msg.get('thinking', '') or ""
|
||
|
||
print(f"[LLM] Original content: {repr(content[:100] if content else '')}...")
|
||
print(f"[LLM] Thinking length: {len(thinking)}")
|
||
print(f"[LLM] Thinking (first 200): {thinking[:200]}...")
|
||
|
||
# 如果 content 为空,尝试从 thinking 中提取
|
||
if not content and thinking:
|
||
print(f"[LLM] Content is empty, extracting from thinking...")
|
||
content = extract_completion_from_thinking(thinking)
|
||
print(f"[LLM] Extracted completion: {repr(content[:100])}...")
|
||
|
||
print(f"[LLM] Final content length: {len(content)}")
|
||
|
||
# 返回完整内容
|
||
async def generate():
|
||
if content:
|
||
print(f"[LLM] Yielding full content: {repr(content)}")
|
||
yield f"data: {json.dumps({'content': content})}\n\n"
|
||
yield f"data: {json.dumps({'done': True})}\n\n"
|
||
|
||
return StreamingResponse(generate(), media_type="text/event-stream")
|
||
|
||
except Exception as e:
|
||
error_msg = f"{{\"error\": \"{str(e)}\"}}"
|
||
print(f"[Backend] Error: {e}")
|
||
import traceback
|
||
traceback.print_exc()
|
||
return JSONResponse(content={"error": str(e)}, status_code=500)
|
||
|
||
if __name__ == "__main__":
|
||
import uvicorn
|
||
print("[Backend] Starting server on http://0.0.0.0:8000")
|
||
uvicorn.run(app, host="0.0.0.0", port=8000)
|