Files
llm-in-text/backend/main.py
“ydy0615” 64cfa58376 feat(editor): add image insertion with OCR support and size limit handling
Add image button with dropdown menu for uploading local images or inserting from URL.
Integrate VLM-based OCR to extract text context from images and include in AI suggestions.
Implement document size limits to disable AI when exceeding threshold.
Refactor copilot plugin with per-view runtime state and OCR context injection.
Add OCR cache utility for managing image metadata.
Add code splitting configuration for optimized bundle size.
2026-02-14 18:28:37 +08:00

133 lines
4.2 KiB
Python

from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import StreamingResponse, JSONResponse
from pydantic import BaseModel
import json
import base64
import uuid
import logging
from prompt import build_prompt
from llm import call_ollama, call_vlm_ocr
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s %(levelname)s %(name)s - %(message)s",
)
logger = logging.getLogger("api")
app = FastAPI()
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
class CompletionRequest(BaseModel):
prefix: str
suffix: str
languageId: str = 'markdown'
class OCRRequest(BaseModel):
image: str
filename: str = "image.jpg"
language: str = 'auto'
def _preview(text: str, limit: int = 80) -> str:
value = (text or "").replace("\n", "\\n")
if len(value) <= limit:
return value
return value[:limit] + "..."
def _build_force_non_empty_prompt(base_prompt: str) -> str:
return (
base_prompt
+ "\n\nStrict override for this request:\n"
+ "- Output must be non-empty.\n"
+ "- If you would otherwise output empty, output a single space.\n"
+ "- Keep it short and do not repeat SUFFIX.\n"
)
@app.post("/v1/completions")
async def create_completion(request: CompletionRequest):
request_id = str(uuid.uuid4())[:8]
try:
logger.info(
"[%s] /v1/completions prefix_chars=%d suffix_chars=%d lang=%s prefix_tail='%s' suffix_head='%s'",
request_id,
len(request.prefix or ""),
len(request.suffix or ""),
request.languageId,
_preview((request.prefix or "")[-120:]),
_preview((request.suffix or "")[:120]),
)
prompt = build_prompt(request.prefix, request.suffix, request.languageId)
result = await call_ollama(prompt, tag=f"{request_id}-primary", temperature=0.7)
content = result["content"] or ""
source = "primary"
if not content.strip():
logger.warning("[%s] primary returned empty content, starting retry", request_id)
retry_prompt = _build_force_non_empty_prompt(prompt)
retry_result = await call_ollama(retry_prompt, tag=f"{request_id}-retry1", temperature=0.4)
content = retry_result["content"] or ""
source = "retry1"
if not content.strip():
content = " "
source = "fallback-space"
logger.warning("[%s] retry still empty, forcing single-space fallback", request_id)
logger.info(
"[%s] completion resolved source=%s content_chars=%d content_preview='%s'",
request_id,
source,
len(content),
_preview(content, 120),
)
async def generate():
yield f"data: {json.dumps({'content': content})}\n\n"
yield f"data: {json.dumps({'done': True})}\n\n"
return StreamingResponse(generate(), media_type="text/event-stream")
except Exception as e:
logger.exception("[%s] /v1/completions failed: %s", request_id, e)
return JSONResponse(content={"error": str(e)}, status_code=500)
@app.post("/v1/ocr")
async def ocr_image(request: OCRRequest):
request_id = str(uuid.uuid4())[:8]
try:
logger.info(
"[%s] /v1/ocr filename=%s language=%s image_base64_chars=%d",
request_id,
request.filename,
request.language,
len(request.image or ""),
)
image_bytes = base64.b64decode(request.image)
logger.info("[%s] /v1/ocr decoded image_bytes=%d", request_id, len(image_bytes))
result = await call_vlm_ocr(image_bytes, request.language)
logger.info(
"[%s] /v1/ocr success text_chars=%d text_preview='%s'",
request_id,
len(result or ""),
_preview(result or "", 120),
)
return {"text": result, "filename": request.filename}
except Exception as e:
logger.exception("[%s] /v1/ocr failed: %s", request_id, e)
return JSONResponse(content={"error": str(e)}, status_code=500)
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=8000)