diff --git a/.kilocode/rules/rules.md b/.kilocode/rules/rules.md
index a86ddce..0cd1a05 100644
--- a/.kilocode/rules/rules.md
+++ b/.kilocode/rules/rules.md
@@ -6,6 +6,4 @@
- 不要擅自用npm或者yarn运行网页,你既看不到网页的内容,也无法阻止命令暂停
- 应该保证代码效率,不多定义变量,不写冗余注释,把降低延迟放在第一位
-- 每次完成任务前都要反复检查代码,确保代码准确无误
-- 获取失败直接报错,不返回默认值,不尝试隐藏报错信息
-- 处理问题或BUG时,不要只修复一个地方,而是要检查所有可能出现bug的地方,逐个修复
\ No newline at end of file
+- 每次完成任务前都要反复检查代码,确保代码准确无误
\ No newline at end of file
diff --git a/backend/llm.py b/backend/llm.py
index 7aea698..26767c8 100644
--- a/backend/llm.py
+++ b/backend/llm.py
@@ -2,6 +2,7 @@ import os
from typing import AsyncGenerator
from openai import AsyncOpenAI
import json
+import time
api_key = os.getenv('OPENAI_API_KEY', 'ollama')
base_url = os.getenv('OLLAMA_BASE_URL', 'http://192.168.0.120:11434/v1/')
@@ -18,9 +19,13 @@ async def stream_openai(prompt: str) -> AsyncGenerator[str, None]:
调用 OpenAI/Ollama API 并流式返回补全内容。
参考 completions-sample-code 的 streaming 逻辑。
"""
- print(f"[LLM] Calling API with prompt length: {len(prompt)}")
+ start_time = time.time()
+ print(f"[LLM] ========== API Call Start ==========")
+ print(f"[LLM] Prompt length: {len(prompt)}")
+ print(f"[LLM] Model: {model}")
try:
+ print(f"[LLM] Creating streaming chat completion...")
stream = await client.chat.completions.create(
model=model,
messages=[{"role": "user", "content": prompt}],
@@ -29,16 +34,36 @@ async def stream_openai(prompt: str) -> AsyncGenerator[str, None]:
temperature=0.2,
)
+ print(f"[LLM] Stream created successfully, iterating...")
chunk_count = 0
- async for chunk in stream:
- if chunk.choices[0].delta.content:
- content = chunk.choices[0].delta.content
- chunk_count += 1
- print(f"[LLM] Chunk {chunk_count}: {content}")
- yield json.dumps({"content": content})
+ first_chunk_time = None
- print(f"[LLM] Stream complete, total chunks: {chunk_count}")
+ async for chunk in stream:
+ current_time = time.time()
+ if first_chunk_time is None:
+ first_chunk_time = current_time - start_time
+
+ chunk_count += 1
+ choice = chunk.choices[0] if chunk.choices else None
+
+ if choice and choice.delta.content:
+ content = choice.delta.content
+ print(f"[LLM] Chunk {chunk_count}: '{content}' (latency: {current_time - start_time:.3f}s)")
+ yield json.dumps({"content": content})
+ elif chunk.choices and hasattr(chunk.choices[0], 'finish_reason'):
+ finish_reason = chunk.choices[0].finish_reason
+ print(f"[LLM] Chunk {chunk_count}: finish_reason={finish_reason}")
+ if finish_reason:
+ break
+ else:
+ print(f"[LLM] Chunk {chunk_count}: empty or no content")
+
+ total_time = time.time() - start_time
+ print(f"[LLM] Stream complete - chunks: {chunk_count}, first chunk latency: {first_chunk_time:.3f}s, total time: {total_time:.3f}s")
+ print(f"[LLM] ========== API Call End ==========")
except Exception as e:
error_msg = f"Error: {str(e)}"
print(f"[LLM] Error: {error_msg}")
- yield json.dumps({"error": str(e)})
+ import traceback
+ traceback.print_exc()
+ yield json.dumps({"error": str(e), "type": type(e).__name__})
diff --git a/backend/main.py b/backend/main.py
index 3e8001a..e2c98c6 100644
--- a/backend/main.py
+++ b/backend/main.py
@@ -3,9 +3,12 @@ from fastapi.responses import StreamingResponse
from pydantic import BaseModel
import os
import json
+import time
app = FastAPI()
+print("[Main] Backend service starting...")
+
class CompletionRequest(BaseModel):
prefix: str
suffix: str
@@ -15,33 +18,58 @@ def generate_stream(request: CompletionRequest):
from prompt import build_prompt
from llm import stream_openai
- print(f"[Backend] Received request - prefix length: {len(request.prefix)}, suffix length: {len(request.suffix)}")
+ start_time = time.time()
+ print(f"[Main] ========== New Request ==========")
+ print(f"[Main] prefix length: {len(request.prefix)}, suffix length: {len(request.suffix)}")
+ print(f"[Main] languageId: {request.languageId}")
+ print(f"[Main] Prefix (last 200 chars): '{request.prefix[-200:]}'")
+ print(f"[Main] Suffix (first 200 chars): '{request.suffix[:200]}'")
try:
prompt = build_prompt(request.prefix, request.suffix)
- print(f"[Backend] Built prompt (first 100 chars): {prompt[:100]}...")
+ print(f"[Main] Built prompt length: {len(prompt)}")
+ print(f"[Main] Prompt (first 300 chars): '{prompt[:300]}'")
+ print(f"[Main] Prompt (last 200 chars): '{prompt[-200:]}'")
async def gen():
chunk_count = 0
- async for chunk in stream_openai(prompt):
- chunk_count += 1
- yield f"data: {chunk}\n\n"
- if chunk_count % 5 == 0:
- print(f"[Backend] Sent chunk {chunk_count}")
- yield "data: {\"done\": true}\n\n"
- print(f"[Backend] Stream complete, total chunks: {chunk_count}")
+ first_chunk_time = None
+ try:
+ async for chunk in stream_openai(prompt):
+ current_time = time.time()
+ if first_chunk_time is None:
+ first_chunk_time = current_time - start_time
+ chunk_count += 1
+ chunk_data = json.loads(chunk) if isinstance(chunk, str) else chunk
+ content_preview = chunk_data.get('content', '')[:50] if chunk_data.get('content') else ''
+ print(f"[Main] Chunk {chunk_count}: '{content_preview}'...")
+ yield f"data: {json.dumps(chunk_data)}\n\n"
+
+ done_signal = {"done": True}
+ total_time = time.time() - start_time
+ print(f"[Main] Stream complete - total chunks: {chunk_count}, first chunk at: {first_chunk_time:.2f}s, total time: {total_time:.2f}s")
+ yield f"data: {json.dumps(done_signal)}\n\n"
+ except Exception as e:
+ error_msg = {"error": str(e), "type": type(e).__name__}
+ print(f"[Main] Generator error: {e}")
+ yield f"data: {json.dumps(error_msg)}\n\n"
return gen()
except Exception as e:
- error_msg = f"{{\"error\": \"{str(e)}\"}}"
- print(f"[Backend] Error: {e}")
- yield f"data: {error_msg}\n\n"
+ error_msg = {"error": str(e), "type": type(e).__name__}
+ print(f"[Main] Error building prompt or calling LLM: {e}")
+ yield f"data: {json.dumps(error_msg)}\n\n"
@app.post("/v1/completions")
async def create_completion(request: CompletionRequest):
- print(f"[Backend] POST /v1/completions called")
+ print(f"[Main] POST /v1/completions called at {time.time()}")
return StreamingResponse(generate_stream(request), media_type="text/event-stream")
+@app.get("/health")
+async def health_check():
+ return {"status": "healthy", "timestamp": time.time()}
+
if __name__ == "__main__":
import uvicorn
- print("[Backend] Starting server on http://0.0.0.0:8000")
- uvicorn.run(app, host="0.0.0.0", port=8000)
+ port = int(os.getenv('PORT', 8000))
+ print(f"[Main] Starting server on http://0.0.0.0:{port}")
+ uvicorn.run(app, host="0.0.0.0", port=port)
diff --git a/src/components/GhostTextOverlay.vue b/src/components/GhostTextOverlay.vue
index 599c90c..1816c33 100644
--- a/src/components/GhostTextOverlay.vue
+++ b/src/components/GhostTextOverlay.vue
@@ -7,6 +7,7 @@