Files
llm-in-text/backend/main.py

76 lines
3.1 KiB
Python

from fastapi import FastAPI, HTTPException
from fastapi.responses import StreamingResponse
from pydantic import BaseModel
import os
import json
import time
app = FastAPI()
print("[Main] Backend service starting...")
class CompletionRequest(BaseModel):
prefix: str
suffix: str
languageId: str = 'markdown'
def generate_stream(request: CompletionRequest):
from prompt import build_prompt
from llm import stream_openai
start_time = time.time()
print(f"[Main] ========== New Request ==========")
print(f"[Main] prefix length: {len(request.prefix)}, suffix length: {len(request.suffix)}")
print(f"[Main] languageId: {request.languageId}")
print(f"[Main] Prefix (last 200 chars): '{request.prefix[-200:]}'")
print(f"[Main] Suffix (first 200 chars): '{request.suffix[:200]}'")
try:
prompt = build_prompt(request.prefix, request.suffix)
print(f"[Main] Built prompt length: {len(prompt)}")
print(f"[Main] Prompt (first 300 chars): '{prompt[:300]}'")
print(f"[Main] Prompt (last 200 chars): '{prompt[-200:]}'")
async def gen():
chunk_count = 0
first_chunk_time = None
try:
async for chunk in stream_openai(prompt):
current_time = time.time()
if first_chunk_time is None:
first_chunk_time = current_time - start_time
chunk_count += 1
chunk_data = json.loads(chunk) if isinstance(chunk, str) else chunk
content_preview = chunk_data.get('content', '')[:50] if chunk_data.get('content') else ''
print(f"[Main] Chunk {chunk_count}: '{content_preview}'...")
yield f"data: {json.dumps(chunk_data)}\n\n"
done_signal = {"done": True}
total_time = time.time() - start_time
print(f"[Main] Stream complete - total chunks: {chunk_count}, first chunk at: {first_chunk_time:.2f}s, total time: {total_time:.2f}s")
yield f"data: {json.dumps(done_signal)}\n\n"
except Exception as e:
error_msg = {"error": str(e), "type": type(e).__name__}
print(f"[Main] Generator error: {e}")
yield f"data: {json.dumps(error_msg)}\n\n"
return gen()
except Exception as e:
error_msg = {"error": str(e), "type": type(e).__name__}
print(f"[Main] Error building prompt or calling LLM: {e}")
yield f"data: {json.dumps(error_msg)}\n\n"
@app.post("/v1/completions")
async def create_completion(request: CompletionRequest):
print(f"[Main] POST /v1/completions called at {time.time()}")
return StreamingResponse(generate_stream(request), media_type="text/event-stream")
@app.get("/health")
async def health_check():
return {"status": "healthy", "timestamp": time.time()}
if __name__ == "__main__":
import uvicorn
port = int(os.getenv('PORT', 8000))
print(f"[Main] Starting server on http://0.0.0.0:{port}")
uvicorn.run(app, host="0.0.0.0", port=port)