from fastapi import FastAPI, HTTPException from fastapi.responses import StreamingResponse from pydantic import BaseModel import os import json import time app = FastAPI() print("[Main] Backend service starting...") class CompletionRequest(BaseModel): prefix: str suffix: str languageId: str = 'markdown' def generate_stream(request: CompletionRequest): from prompt import build_prompt from llm import stream_openai start_time = time.time() print(f"[Main] ========== New Request ==========") print(f"[Main] prefix length: {len(request.prefix)}, suffix length: {len(request.suffix)}") print(f"[Main] languageId: {request.languageId}") print(f"[Main] Prefix (last 200 chars): '{request.prefix[-200:]}'") print(f"[Main] Suffix (first 200 chars): '{request.suffix[:200]}'") try: prompt = build_prompt(request.prefix, request.suffix) print(f"[Main] Built prompt length: {len(prompt)}") print(f"[Main] Prompt (first 300 chars): '{prompt[:300]}'") print(f"[Main] Prompt (last 200 chars): '{prompt[-200:]}'") async def gen(): chunk_count = 0 first_chunk_time = None try: async for chunk in stream_openai(prompt): current_time = time.time() if first_chunk_time is None: first_chunk_time = current_time - start_time chunk_count += 1 chunk_data = json.loads(chunk) if isinstance(chunk, str) else chunk content_preview = chunk_data.get('content', '')[:50] if chunk_data.get('content') else '' print(f"[Main] Chunk {chunk_count}: '{content_preview}'...") yield f"data: {json.dumps(chunk_data)}\n\n" done_signal = {"done": True} total_time = time.time() - start_time print(f"[Main] Stream complete - total chunks: {chunk_count}, first chunk at: {first_chunk_time:.2f}s, total time: {total_time:.2f}s") yield f"data: {json.dumps(done_signal)}\n\n" except Exception as e: error_msg = {"error": str(e), "type": type(e).__name__} print(f"[Main] Generator error: {e}") yield f"data: {json.dumps(error_msg)}\n\n" return gen() except Exception as e: error_msg = {"error": str(e), "type": type(e).__name__} print(f"[Main] Error building prompt or calling LLM: {e}") yield f"data: {json.dumps(error_msg)}\n\n" @app.post("/v1/completions") async def create_completion(request: CompletionRequest): print(f"[Main] POST /v1/completions called at {time.time()}") return StreamingResponse(generate_stream(request), media_type="text/event-stream") @app.get("/health") async def health_check(): return {"status": "healthy", "timestamp": time.time()} if __name__ == "__main__": import uvicorn port = int(os.getenv('PORT', 8000)) print(f"[Main] Starting server on http://0.0.0.0:{port}") uvicorn.run(app, host="0.0.0.0", port=port)