Files
llm-in-text/backend/main.py

48 lines
1.6 KiB
Python

from fastapi import FastAPI, HTTPException
from fastapi.responses import StreamingResponse
from pydantic import BaseModel
import os
import json
app = FastAPI()
class CompletionRequest(BaseModel):
prefix: str
suffix: str
languageId: str = 'markdown'
def generate_stream(request: CompletionRequest):
from prompt import build_prompt
from llm import stream_openai
print(f"[Backend] Received request - prefix length: {len(request.prefix)}, suffix length: {len(request.suffix)}")
try:
prompt = build_prompt(request.prefix, request.suffix)
print(f"[Backend] Built prompt (first 100 chars): {prompt[:100]}...")
async def gen():
chunk_count = 0
async for chunk in stream_openai(prompt):
chunk_count += 1
yield f"data: {chunk}\n\n"
if chunk_count % 5 == 0:
print(f"[Backend] Sent chunk {chunk_count}")
yield "data: {\"done\": true}\n\n"
print(f"[Backend] Stream complete, total chunks: {chunk_count}")
return gen()
except Exception as e:
error_msg = f"{{\"error\": \"{str(e)}\"}}"
print(f"[Backend] Error: {e}")
yield f"data: {error_msg}\n\n"
@app.post("/v1/completions")
async def create_completion(request: CompletionRequest):
print(f"[Backend] POST /v1/completions called")
return StreamingResponse(generate_stream(request), media_type="text/event-stream")
if __name__ == "__main__":
import uvicorn
print("[Backend] Starting server on http://0.0.0.0:8000")
uvicorn.run(app, host="0.0.0.0", port=8000)