llm-in-text/backend/main.py

from fastapi import FastAPI, HTTPException
from fastapi.responses import StreamingResponse
from pydantic import BaseModel
import os
import json

app = FastAPI()

class CompletionRequest(BaseModel):
    prefix: str
    suffix: str
    languageId: str = 'markdown'

def generate_stream(request: CompletionRequest):
    from prompt import build_prompt
    from llm import stream_openai

    print(f"[Backend] Received request - prefix length: {len(request.prefix)}, suffix length: {len(request.suffix)}")

    try:
        prompt = build_prompt(request.prefix, request.suffix)
        print(f"[Backend] Built prompt (first 100 chars): {prompt[:100]}...")

        async def gen():
            chunk_count = 0
            async for chunk in stream_openai(prompt):
                chunk_count += 1
                yield f"data: {chunk}\n\n"
                if chunk_count % 5 == 0:
                    print(f"[Backend] Sent chunk {chunk_count}")
            yield "data: {\"done\": true}\n\n"
            print(f"[Backend] Stream complete, total chunks: {chunk_count}")
        return gen()
    except Exception as e:
        error_msg = f"{{\"error\": \"{str(e)}\"}}"
        print(f"[Backend] Error: {e}")
        yield f"data: {error_msg}\n\n"

@app.post("/v1/completions")
async def create_completion(request: CompletionRequest):
    print(f"[Backend] POST /v1/completions called")
    return StreamingResponse(generate_stream(request), media_type="text/event-stream")

if __name__ == "__main__":
    import uvicorn
    print("[Backend] Starting server on http://0.0.0.0:8000")
    uvicorn.run(app, host="0.0.0.0", port=8000)