From 5f00e71cebad1e769443bf6ccbbefe132ffe3ee9 Mon Sep 17 00:00:00 2001 From: ydy0615 Date: Sun, 25 Jan 2026 13:29:11 +0800 Subject: [PATCH] feat: enhance logging and error handling in backend and editor components --- .kilocode/rules/rules.md | 4 +- backend/llm.py | 43 +++++++++++--- backend/main.py | 58 ++++++++++++++----- src/components/GhostTextOverlay.vue | 26 ++++++++- src/components/MilkdownEditor.vue | 52 +++-------------- src/plugins/inlineSuggestionPlugin.ts | 83 ++++++++++++++++++++++----- 6 files changed, 179 insertions(+), 87 deletions(-) diff --git a/.kilocode/rules/rules.md b/.kilocode/rules/rules.md index a86ddce..0cd1a05 100644 --- a/.kilocode/rules/rules.md +++ b/.kilocode/rules/rules.md @@ -6,6 +6,4 @@ - 不要擅自用npm或者yarn运行网页,你既看不到网页的内容,也无法阻止命令暂停 - 应该保证代码效率,不多定义变量,不写冗余注释,把降低延迟放在第一位 -- 每次完成任务前都要反复检查代码,确保代码准确无误 -- 获取失败直接报错,不返回默认值,不尝试隐藏报错信息 -- 处理问题或BUG时,不要只修复一个地方,而是要检查所有可能出现bug的地方,逐个修复 \ No newline at end of file +- 每次完成任务前都要反复检查代码,确保代码准确无误 \ No newline at end of file diff --git a/backend/llm.py b/backend/llm.py index 7aea698..26767c8 100644 --- a/backend/llm.py +++ b/backend/llm.py @@ -2,6 +2,7 @@ import os from typing import AsyncGenerator from openai import AsyncOpenAI import json +import time api_key = os.getenv('OPENAI_API_KEY', 'ollama') base_url = os.getenv('OLLAMA_BASE_URL', 'http://192.168.0.120:11434/v1/') @@ -18,9 +19,13 @@ async def stream_openai(prompt: str) -> AsyncGenerator[str, None]: 调用 OpenAI/Ollama API 并流式返回补全内容。 参考 completions-sample-code 的 streaming 逻辑。 """ - print(f"[LLM] Calling API with prompt length: {len(prompt)}") + start_time = time.time() + print(f"[LLM] ========== API Call Start ==========") + print(f"[LLM] Prompt length: {len(prompt)}") + print(f"[LLM] Model: {model}") try: + print(f"[LLM] Creating streaming chat completion...") stream = await client.chat.completions.create( model=model, messages=[{"role": "user", "content": prompt}], @@ -29,16 +34,36 @@ async def stream_openai(prompt: str) -> AsyncGenerator[str, None]: temperature=0.2, ) + print(f"[LLM] Stream created successfully, iterating...") chunk_count = 0 - async for chunk in stream: - if chunk.choices[0].delta.content: - content = chunk.choices[0].delta.content - chunk_count += 1 - print(f"[LLM] Chunk {chunk_count}: {content}") - yield json.dumps({"content": content}) + first_chunk_time = None - print(f"[LLM] Stream complete, total chunks: {chunk_count}") + async for chunk in stream: + current_time = time.time() + if first_chunk_time is None: + first_chunk_time = current_time - start_time + + chunk_count += 1 + choice = chunk.choices[0] if chunk.choices else None + + if choice and choice.delta.content: + content = choice.delta.content + print(f"[LLM] Chunk {chunk_count}: '{content}' (latency: {current_time - start_time:.3f}s)") + yield json.dumps({"content": content}) + elif chunk.choices and hasattr(chunk.choices[0], 'finish_reason'): + finish_reason = chunk.choices[0].finish_reason + print(f"[LLM] Chunk {chunk_count}: finish_reason={finish_reason}") + if finish_reason: + break + else: + print(f"[LLM] Chunk {chunk_count}: empty or no content") + + total_time = time.time() - start_time + print(f"[LLM] Stream complete - chunks: {chunk_count}, first chunk latency: {first_chunk_time:.3f}s, total time: {total_time:.3f}s") + print(f"[LLM] ========== API Call End ==========") except Exception as e: error_msg = f"Error: {str(e)}" print(f"[LLM] Error: {error_msg}") - yield json.dumps({"error": str(e)}) + import traceback + traceback.print_exc() + yield json.dumps({"error": str(e), "type": type(e).__name__}) diff --git a/backend/main.py b/backend/main.py index 3e8001a..e2c98c6 100644 --- a/backend/main.py +++ b/backend/main.py @@ -3,9 +3,12 @@ from fastapi.responses import StreamingResponse from pydantic import BaseModel import os import json +import time app = FastAPI() +print("[Main] Backend service starting...") + class CompletionRequest(BaseModel): prefix: str suffix: str @@ -15,33 +18,58 @@ def generate_stream(request: CompletionRequest): from prompt import build_prompt from llm import stream_openai - print(f"[Backend] Received request - prefix length: {len(request.prefix)}, suffix length: {len(request.suffix)}") + start_time = time.time() + print(f"[Main] ========== New Request ==========") + print(f"[Main] prefix length: {len(request.prefix)}, suffix length: {len(request.suffix)}") + print(f"[Main] languageId: {request.languageId}") + print(f"[Main] Prefix (last 200 chars): '{request.prefix[-200:]}'") + print(f"[Main] Suffix (first 200 chars): '{request.suffix[:200]}'") try: prompt = build_prompt(request.prefix, request.suffix) - print(f"[Backend] Built prompt (first 100 chars): {prompt[:100]}...") + print(f"[Main] Built prompt length: {len(prompt)}") + print(f"[Main] Prompt (first 300 chars): '{prompt[:300]}'") + print(f"[Main] Prompt (last 200 chars): '{prompt[-200:]}'") async def gen(): chunk_count = 0 - async for chunk in stream_openai(prompt): - chunk_count += 1 - yield f"data: {chunk}\n\n" - if chunk_count % 5 == 0: - print(f"[Backend] Sent chunk {chunk_count}") - yield "data: {\"done\": true}\n\n" - print(f"[Backend] Stream complete, total chunks: {chunk_count}") + first_chunk_time = None + try: + async for chunk in stream_openai(prompt): + current_time = time.time() + if first_chunk_time is None: + first_chunk_time = current_time - start_time + chunk_count += 1 + chunk_data = json.loads(chunk) if isinstance(chunk, str) else chunk + content_preview = chunk_data.get('content', '')[:50] if chunk_data.get('content') else '' + print(f"[Main] Chunk {chunk_count}: '{content_preview}'...") + yield f"data: {json.dumps(chunk_data)}\n\n" + + done_signal = {"done": True} + total_time = time.time() - start_time + print(f"[Main] Stream complete - total chunks: {chunk_count}, first chunk at: {first_chunk_time:.2f}s, total time: {total_time:.2f}s") + yield f"data: {json.dumps(done_signal)}\n\n" + except Exception as e: + error_msg = {"error": str(e), "type": type(e).__name__} + print(f"[Main] Generator error: {e}") + yield f"data: {json.dumps(error_msg)}\n\n" return gen() except Exception as e: - error_msg = f"{{\"error\": \"{str(e)}\"}}" - print(f"[Backend] Error: {e}") - yield f"data: {error_msg}\n\n" + error_msg = {"error": str(e), "type": type(e).__name__} + print(f"[Main] Error building prompt or calling LLM: {e}") + yield f"data: {json.dumps(error_msg)}\n\n" @app.post("/v1/completions") async def create_completion(request: CompletionRequest): - print(f"[Backend] POST /v1/completions called") + print(f"[Main] POST /v1/completions called at {time.time()}") return StreamingResponse(generate_stream(request), media_type="text/event-stream") +@app.get("/health") +async def health_check(): + return {"status": "healthy", "timestamp": time.time()} + if __name__ == "__main__": import uvicorn - print("[Backend] Starting server on http://0.0.0.0:8000") - uvicorn.run(app, host="0.0.0.0", port=8000) + port = int(os.getenv('PORT', 8000)) + print(f"[Main] Starting server on http://0.0.0.0:{port}") + uvicorn.run(app, host="0.0.0.0", port=port) diff --git a/src/components/GhostTextOverlay.vue b/src/components/GhostTextOverlay.vue index 599c90c..1816c33 100644 --- a/src/components/GhostTextOverlay.vue +++ b/src/components/GhostTextOverlay.vue @@ -7,6 +7,7 @@