from fastapi import FastAPI from fastapi.middleware.cors import CORSMiddleware from fastapi.responses import StreamingResponse, JSONResponse from pydantic import BaseModel import json import base64 import uuid import logging from prompt import build_prompt, prepare_prompt_context from llm import call_ollama, call_vlm_ocr logging.basicConfig( level=logging.INFO, format="%(asctime)s %(levelname)s %(name)s - %(message)s", ) logger = logging.getLogger("api") app = FastAPI() app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) class CompletionRequest(BaseModel): prefix: str suffix: str languageId: str = 'markdown' class OCRRequest(BaseModel): image: str filename: str = "image.jpg" language: str = 'auto' def _preview(text: str, limit: int = 80) -> str: value = (text or "").replace("\n", "\\n") if len(value) <= limit: return value return value[:limit] + "..." @app.post("/v1/completions") async def create_completion(request: CompletionRequest): request_id = str(uuid.uuid4())[:8] try: logger.info( "[%s] /v1/completions prefix_chars=%d suffix_chars=%d lang=%s prefix_tail='%s' suffix_head='%s'", request_id, len(request.prefix or ""), len(request.suffix or ""), request.languageId, _preview((request.prefix or "")[-120:]), _preview((request.suffix or "")[:120]), ) llm_prefix, llm_suffix = prepare_prompt_context(request.prefix or "", request.suffix or "") logger.info("[%s] llm_input_prefix=%r", request_id, llm_prefix) logger.info("[%s] llm_input_suffix=%r", request_id, llm_suffix) prompt = build_prompt(request.prefix, request.suffix, request.languageId) result = await call_ollama(prompt, tag=f"{request_id}-primary", temperature=0.7) content = result["content"] or "" if not content.strip(): logger.warning("[%s] primary returned empty content, returning empty result", request_id) logger.info( "[%s] completion resolved source=primary content_chars=%d content_preview='%s'", request_id, len(content), _preview(content, 120), ) async def generate(): yield f"data: {json.dumps({'content': content})}\n\n" yield f"data: {json.dumps({'done': True})}\n\n" return StreamingResponse(generate(), media_type="text/event-stream") except Exception as e: logger.exception("[%s] /v1/completions failed: %s", request_id, e) return JSONResponse(content={"error": str(e)}, status_code=500) @app.post("/v1/ocr") async def ocr_image(request: OCRRequest): request_id = str(uuid.uuid4())[:8] try: logger.info( "[%s] /v1/ocr filename=%s language=%s image_base64_chars=%d", request_id, request.filename, request.language, len(request.image or ""), ) image_bytes = base64.b64decode(request.image) logger.info("[%s] /v1/ocr decoded image_bytes=%d", request_id, len(image_bytes)) result = await call_vlm_ocr(image_bytes, request.language) logger.info( "[%s] /v1/ocr success text_chars=%d text_preview='%s'", request_id, len(result or ""), _preview(result or "", 120), ) return {"text": result, "filename": request.filename} except Exception as e: logger.exception("[%s] /v1/ocr failed: %s", request_id, e) return JSONResponse(content={"error": str(e)}, status_code=500) if __name__ == "__main__": import uvicorn uvicorn.run(app, host="0.0.0.0", port=8001)