from fastapi import FastAPI from fastapi.middleware.cors import CORSMiddleware from fastapi.responses import StreamingResponse, JSONResponse from pydantic import BaseModel import json import base64 import uuid import logging from prompt import build_prompt from llm import call_ollama, call_vlm_ocr logging.basicConfig( level=logging.INFO, format="%(asctime)s %(levelname)s %(name)s - %(message)s", ) logger = logging.getLogger("api") app = FastAPI() app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) class CompletionRequest(BaseModel): prefix: str suffix: str languageId: str = 'markdown' class OCRRequest(BaseModel): image: str filename: str = "image.jpg" language: str = 'auto' def _preview(text: str, limit: int = 80) -> str: value = (text or "").replace("\n", "\\n") if len(value) <= limit: return value return value[:limit] + "..." def _build_force_non_empty_prompt(base_prompt: str) -> str: return ( base_prompt + "\n\nStrict override for this request:\n" + "- Output must be non-empty.\n" + "- If you would otherwise output empty, output a single space.\n" + "- Keep it short and do not repeat SUFFIX.\n" ) @app.post("/v1/completions") async def create_completion(request: CompletionRequest): request_id = str(uuid.uuid4())[:8] try: logger.info( "[%s] /v1/completions prefix_chars=%d suffix_chars=%d lang=%s prefix_tail='%s' suffix_head='%s'", request_id, len(request.prefix or ""), len(request.suffix or ""), request.languageId, _preview((request.prefix or "")[-120:]), _preview((request.suffix or "")[:120]), ) prompt = build_prompt(request.prefix, request.suffix, request.languageId) result = await call_ollama(prompt, tag=f"{request_id}-primary", temperature=0.7) content = result["content"] or "" source = "primary" if not content.strip(): logger.warning("[%s] primary returned empty content, starting retry", request_id) retry_prompt = _build_force_non_empty_prompt(prompt) retry_result = await call_ollama(retry_prompt, tag=f"{request_id}-retry1", temperature=0.4) content = retry_result["content"] or "" source = "retry1" if not content.strip(): content = " " source = "fallback-space" logger.warning("[%s] retry still empty, forcing single-space fallback", request_id) logger.info( "[%s] completion resolved source=%s content_chars=%d content_preview='%s'", request_id, source, len(content), _preview(content, 120), ) async def generate(): yield f"data: {json.dumps({'content': content})}\n\n" yield f"data: {json.dumps({'done': True})}\n\n" return StreamingResponse(generate(), media_type="text/event-stream") except Exception as e: logger.exception("[%s] /v1/completions failed: %s", request_id, e) return JSONResponse(content={"error": str(e)}, status_code=500) @app.post("/v1/ocr") async def ocr_image(request: OCRRequest): request_id = str(uuid.uuid4())[:8] try: logger.info( "[%s] /v1/ocr filename=%s language=%s image_base64_chars=%d", request_id, request.filename, request.language, len(request.image or ""), ) image_bytes = base64.b64decode(request.image) logger.info("[%s] /v1/ocr decoded image_bytes=%d", request_id, len(image_bytes)) result = await call_vlm_ocr(image_bytes, request.language) logger.info( "[%s] /v1/ocr success text_chars=%d text_preview='%s'", request_id, len(result or ""), _preview(result or "", 120), ) return {"text": result, "filename": request.filename} except Exception as e: logger.exception("[%s] /v1/ocr failed: %s", request_id, e) return JSONResponse(content={"error": str(e)}, status_code=500) if __name__ == "__main__": import uvicorn uvicorn.run(app, host="0.0.0.0", port=8000)