76 lines
3.1 KiB
Python
76 lines
3.1 KiB
Python
from fastapi import FastAPI, HTTPException
|
|
from fastapi.responses import StreamingResponse
|
|
from pydantic import BaseModel
|
|
import os
|
|
import json
|
|
import time
|
|
|
|
app = FastAPI()
|
|
|
|
print("[Main] Backend service starting...")
|
|
|
|
class CompletionRequest(BaseModel):
|
|
prefix: str
|
|
suffix: str
|
|
languageId: str = 'markdown'
|
|
|
|
def generate_stream(request: CompletionRequest):
|
|
from prompt import build_prompt
|
|
from llm import stream_openai
|
|
|
|
start_time = time.time()
|
|
print(f"[Main] ========== New Request ==========")
|
|
print(f"[Main] prefix length: {len(request.prefix)}, suffix length: {len(request.suffix)}")
|
|
print(f"[Main] languageId: {request.languageId}")
|
|
print(f"[Main] Prefix (last 200 chars): '{request.prefix[-200:]}'")
|
|
print(f"[Main] Suffix (first 200 chars): '{request.suffix[:200]}'")
|
|
|
|
try:
|
|
prompt = build_prompt(request.prefix, request.suffix)
|
|
print(f"[Main] Built prompt length: {len(prompt)}")
|
|
print(f"[Main] Prompt (first 300 chars): '{prompt[:300]}'")
|
|
print(f"[Main] Prompt (last 200 chars): '{prompt[-200:]}'")
|
|
|
|
async def gen():
|
|
chunk_count = 0
|
|
first_chunk_time = None
|
|
try:
|
|
async for chunk in stream_openai(prompt):
|
|
current_time = time.time()
|
|
if first_chunk_time is None:
|
|
first_chunk_time = current_time - start_time
|
|
chunk_count += 1
|
|
chunk_data = json.loads(chunk) if isinstance(chunk, str) else chunk
|
|
content_preview = chunk_data.get('content', '')[:50] if chunk_data.get('content') else ''
|
|
print(f"[Main] Chunk {chunk_count}: '{content_preview}'...")
|
|
yield f"data: {json.dumps(chunk_data)}\n\n"
|
|
|
|
done_signal = {"done": True}
|
|
total_time = time.time() - start_time
|
|
print(f"[Main] Stream complete - total chunks: {chunk_count}, first chunk at: {first_chunk_time:.2f}s, total time: {total_time:.2f}s")
|
|
yield f"data: {json.dumps(done_signal)}\n\n"
|
|
except Exception as e:
|
|
error_msg = {"error": str(e), "type": type(e).__name__}
|
|
print(f"[Main] Generator error: {e}")
|
|
yield f"data: {json.dumps(error_msg)}\n\n"
|
|
return gen()
|
|
except Exception as e:
|
|
error_msg = {"error": str(e), "type": type(e).__name__}
|
|
print(f"[Main] Error building prompt or calling LLM: {e}")
|
|
yield f"data: {json.dumps(error_msg)}\n\n"
|
|
|
|
@app.post("/v1/completions")
|
|
async def create_completion(request: CompletionRequest):
|
|
print(f"[Main] POST /v1/completions called at {time.time()}")
|
|
return StreamingResponse(generate_stream(request), media_type="text/event-stream")
|
|
|
|
@app.get("/health")
|
|
async def health_check():
|
|
return {"status": "healthy", "timestamp": time.time()}
|
|
|
|
if __name__ == "__main__":
|
|
import uvicorn
|
|
port = int(os.getenv('PORT', 8000))
|
|
print(f"[Main] Starting server on http://0.0.0.0:{port}")
|
|
uvicorn.run(app, host="0.0.0.0", port=port)
|