From 794fbf8493a814d83ddfd84d1493880ca97a6e9e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9Cydy0615=E2=80=9D?= <“allenyuan410@gmail.com”> Date: Sat, 14 Feb 2026 21:21:06 +0800 Subject: [PATCH] feat(config): add OCR URL configuration and improve image node handling - Add VITE_OCR_URL environment variable with fallback URL construction - Define IMAGE_NODE_TYPES constant to support 'image', 'image-block', and 'imageBlock' node types - Add helper functions for safer image attribute access (getImageSrc, isImageNodeWithSrc, getImageLabel) - Improve OCR error handling with HTTP status checking and error details - Wrap OCR context in HTML comments to prevent prompt injection issues - Update MilkdownEditor to use centralized OCR_URL configuration --- .env.example | 1 + src/components/MilkdownEditor.vue | 20 ++++++++++------- src/plugins/copilotPlugin.ts | 36 +++++++++++++++++++++++++------ src/utils/config.js | 16 ++++++++++++++ 4 files changed, 58 insertions(+), 15 deletions(-) diff --git a/.env.example b/.env.example index e392145..214d718 100644 --- a/.env.example +++ b/.env.example @@ -1,4 +1,5 @@ VITE_API_URL=http://localhost:8000/v1/completions +VITE_OCR_URL=http://localhost:8000/v1/ocr # Ollama 配置 OLLAMA_HOST=http://192.168.0.120:11434 diff --git a/src/components/MilkdownEditor.vue b/src/components/MilkdownEditor.vue index 81e5bff..860fae7 100644 --- a/src/components/MilkdownEditor.vue +++ b/src/components/MilkdownEditor.vue @@ -102,7 +102,7 @@ import { Crepe } from '@milkdown/crepe' import { editorViewCtx } from '@milkdown/kit/core' import { copilotPlugin, copilotConfigCtx, copilotGhostMark, setCopilotEnabled, COPILOT_PLUGIN_KEY, SIZE_LIMIT, checkSizeLimit } from '../plugins/copilotPlugin' import { fetchSuggestion } from '../utils/api.js' -import { DEBUG, API_URL } from '../utils/config.js' +import { DEBUG, OCR_URL } from '../utils/config.js' import { setOcrCache, clearOcrCache, clearAllOcrCache } from '../utils/ocrCache.js' const emit = defineEmits(['update:markdown']) @@ -125,6 +125,7 @@ const aiButtonLabel = computed(() => { let crepe = null let markdownSyncTimer = null const objectUrls = new Set() +const IMAGE_NODE_TYPES = new Set(['image', 'image-block', 'imageBlock']) const revokeObjectUrl = (url) => { if (!objectUrls.has(url)) return @@ -136,12 +137,12 @@ const revokeObjectUrl = (url) => { const collectImageObjectUrls = (doc) => { const activeUrls = new Set() doc.descendants((node) => { + const src = typeof node.attrs?.src === 'string' ? node.attrs.src : '' if ( - node.type?.name === 'image' && - typeof node.attrs?.src === 'string' && - node.attrs.src.startsWith('blob:') + IMAGE_NODE_TYPES.has(node.type?.name) && + src.startsWith('blob:') ) { - activeUrls.add(node.attrs.src) + activeUrls.add(src) } }) return activeUrls @@ -219,8 +220,7 @@ const performOCR = async (file, cacheKey) => { const base64 = dataUrl.slice(splitIndex + 1) try { - const ocrUrl = API_URL.replace('/v1/completions', '/v1/ocr') - const res = await fetch(ocrUrl, { + const res = await fetch(OCR_URL, { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ @@ -229,6 +229,10 @@ const performOCR = async (file, cacheKey) => { language: 'auto' }) }) + if (!res.ok) { + const errorText = await res.text() + throw new Error(`HTTP ${res.status}: ${errorText}`) + } const data = await res.json() if (data.text) { setOcrCache(cacheKey, data.text) @@ -240,7 +244,7 @@ const performOCR = async (file, cacheKey) => { } } } catch (e) { - if (DEBUG) console.error('[OCR] Error:', e) + console.error('[OCR] Error:', e) } } reader.readAsDataURL(file) diff --git a/src/plugins/copilotPlugin.ts b/src/plugins/copilotPlugin.ts index ed6979c..140d59d 100644 --- a/src/plugins/copilotPlugin.ts +++ b/src/plugins/copilotPlugin.ts @@ -9,6 +9,7 @@ import { getOcrCache, checkSizeLimit as checkOcrSizeLimit, OCR_SIZE_LIMIT } from const COPILOT_PLUGIN_KEY = new PluginKey('milkdown-copilot') const DEBOUNCE_MS = 1000 const SIZE_LIMIT = OCR_SIZE_LIMIT +const IMAGE_NODE_TYPES = new Set(['image', 'image-block', 'imageBlock']) interface CopilotState { from: number @@ -251,11 +252,28 @@ function insertPlainText(view: EditorView, suggestion: string, from: number, mar view.dispatch(tr) } +function getImageSrc(node: ProseNode): string { + const src = node.attrs?.src + return typeof src === 'string' ? src : '' +} + +function isImageNodeWithSrc(node: ProseNode): boolean { + return IMAGE_NODE_TYPES.has(node.type.name) && Boolean(getImageSrc(node)) +} + +function getImageLabel(node: ProseNode): string { + const candidates = [node.attrs?.alt, node.attrs?.title, node.attrs?.caption] + for (const value of candidates) { + if (typeof value === 'string' && value.trim()) return value.trim() + } + return 'untitled' +} + function extractImageFilenames(doc: ProseNode): string[] { const filenames: string[] = [] doc.descendants((node: ProseNode) => { - if (node.type.name === 'image' && node.attrs.src) { - filenames.push(node.attrs.src) + if (isImageNodeWithSrc(node)) { + filenames.push(getImageSrc(node)) } }) return filenames @@ -266,18 +284,22 @@ function buildPrefixWithOCR(prefix: string, doc: ProseNode, cursorPos: number): doc.descendants((node: ProseNode, pos) => { if (pos >= cursorPos) return false - if (node.type.name !== 'image' || !node.attrs.src) return true + if (!isImageNodeWithSrc(node)) return true - const ocrText = getOcrCache(node.attrs.src) + const src = getImageSrc(node) + const ocrText = getOcrCache(src) if (!ocrText) return true - const altText = typeof node.attrs.alt === 'string' ? node.attrs.alt : '' - ocrEntries.push(`image(${altText || 'untitled'}): ${ocrText}`) + const label = getImageLabel(node) + const safeOcrText = ocrText.replace(//g, '').trim() + if (!safeOcrText) return true + + ocrEntries.push(`image(${label}): ${safeOcrText}`) return true }) if (!ocrEntries.length) return prefix - return `${prefix}\n\n[OCR Context]\n${ocrEntries.join('\n')}` + return `${prefix}\n\n` } function doFetchSuggestion(view: EditorView, runtime: CopilotRuntime, pos: number, prefix: string, suffix: string) { diff --git a/src/utils/config.js b/src/utils/config.js index 6152c7e..d216c9f 100644 --- a/src/utils/config.js +++ b/src/utils/config.js @@ -1,2 +1,18 @@ export const DEBUG = import.meta.env.DEV export const API_URL = import.meta.env.VITE_API_URL || 'http://localhost:8000/v1/completions' + +const buildDefaultOcrUrl = (apiUrl) => { + if (typeof window === 'undefined') { + return 'http://localhost:8000/v1/ocr' + } + + try { + const url = new URL(apiUrl, window.location.origin) + url.pathname = '/v1/ocr' + return url.toString() + } catch { + return 'http://localhost:8000/v1/ocr' + } +} + +export const OCR_URL = import.meta.env.VITE_OCR_URL || buildDefaultOcrUrl(API_URL)