48 lines
1.6 KiB
Python
48 lines
1.6 KiB
Python
from fastapi import FastAPI, HTTPException
|
|
from fastapi.responses import StreamingResponse
|
|
from pydantic import BaseModel
|
|
import os
|
|
import json
|
|
|
|
app = FastAPI()
|
|
|
|
class CompletionRequest(BaseModel):
|
|
prefix: str
|
|
suffix: str
|
|
languageId: str = 'markdown'
|
|
|
|
def generate_stream(request: CompletionRequest):
|
|
from prompt import build_prompt
|
|
from llm import stream_openai
|
|
|
|
print(f"[Backend] Received request - prefix length: {len(request.prefix)}, suffix length: {len(request.suffix)}")
|
|
|
|
try:
|
|
prompt = build_prompt(request.prefix, request.suffix)
|
|
print(f"[Backend] Built prompt (first 100 chars): {prompt[:100]}...")
|
|
|
|
async def gen():
|
|
chunk_count = 0
|
|
async for chunk in stream_openai(prompt):
|
|
chunk_count += 1
|
|
yield f"data: {chunk}\n\n"
|
|
if chunk_count % 5 == 0:
|
|
print(f"[Backend] Sent chunk {chunk_count}")
|
|
yield "data: {\"done\": true}\n\n"
|
|
print(f"[Backend] Stream complete, total chunks: {chunk_count}")
|
|
return gen()
|
|
except Exception as e:
|
|
error_msg = f"{{\"error\": \"{str(e)}\"}}"
|
|
print(f"[Backend] Error: {e}")
|
|
yield f"data: {error_msg}\n\n"
|
|
|
|
@app.post("/v1/completions")
|
|
async def create_completion(request: CompletionRequest):
|
|
print(f"[Backend] POST /v1/completions called")
|
|
return StreamingResponse(generate_stream(request), media_type="text/event-stream")
|
|
|
|
if __name__ == "__main__":
|
|
import uvicorn
|
|
print("[Backend] Starting server on http://0.0.0.0:8000")
|
|
uvicorn.run(app, host="0.0.0.0", port=8000)
|