feat(copilot): enhance OCR handling with inline tags and document serializer

- Replace HTML comment OCR metadata with inline `<OCR:...>` tags
- Implement serializer-based markdown conversion for prefix/suffix content
- Add extractTextFromOCR utility function for text extraction
- Enable Table, Diagram, and ListCheck features in MilkdownEditor
- Add periodic debug logging for document state analysis
This commit is contained in:
2026-02-14 23:53:26 +08:00
parent 794fbf8493
commit 03bb21d5c6
5 changed files with 3496 additions and 225 deletions

View File

@@ -38,10 +38,10 @@ Your job:
- Avoid overly short outputs with little information value.
Important context:
- PREFIX may contain hidden OCR metadata in HTML comments such as <!--OCR:...-->.
- These comments are non-visible context only.
- Never copy, rewrite, or emit HTML comments in output.
- Never output <!-- or -->.
- PREFIX may contain OCR metadata inline after images, e.g. ![alt](url) <OCR:description>.
- The <OCR:...> is hidden context describing image content.
- Never copy, rewrite, or emit OCR tags in output.
- Never output <OCR: or >.
Hard rules:
1. Seamless join:

3519
package-lock.json generated

File diff suppressed because it is too large Load Diff

View File

@@ -99,7 +99,7 @@
import { onMounted, onUnmounted, ref, computed } from 'vue'
import { replaceAll } from '@milkdown/kit/utils'
import { Crepe } from '@milkdown/crepe'
import { editorViewCtx } from '@milkdown/kit/core'
import { editorViewCtx, serializerCtx } from '@milkdown/kit/core'
import { copilotPlugin, copilotConfigCtx, copilotGhostMark, setCopilotEnabled, COPILOT_PLUGIN_KEY, SIZE_LIMIT, checkSizeLimit } from '../plugins/copilotPlugin'
import { fetchSuggestion } from '../utils/api.js'
import { DEBUG, OCR_URL } from '../utils/config.js'
@@ -124,6 +124,7 @@ const aiButtonLabel = computed(() => {
let crepe = null
let markdownSyncTimer = null
let debugLogTimer = null
const objectUrls = new Set()
const IMAGE_NODE_TYPES = new Set(['image', 'image-block', 'imageBlock'])
@@ -199,6 +200,57 @@ const scheduleMarkdownSync = () => {
}, 120)
}
const logDebugInfo = async () => {
if (!crepe) return
try {
const markdown = await crepe.getMarkdown()
crepe.editor.action((ctx) => {
const view = ctx.get(editorViewCtx)
const schema = view.state.schema
const { from, to } = view.state.selection
const serializer = ctx.get(serializerCtx)
let prefixMarkdown = '', suffixMarkdown = ''
try {
// Prefix: 使用 slice 创建文档节点
const prefixSlice = view.state.doc.slice(0, from)
if (prefixSlice.content.size > 0) {
const prefixDoc = schema.topNodeType.createAndFill(undefined, prefixSlice.content)
if (prefixDoc) {
prefixMarkdown = serializer(prefixDoc)
}
}
if (!prefixMarkdown) {
prefixMarkdown = view.state.doc.textBetween(0, from, '\n', '\n')
}
// Suffix
const suffixSlice = view.state.doc.slice(to)
if (suffixSlice.content.size > 0) {
const suffixDoc = schema.topNodeType.createAndFill(undefined, suffixSlice.content)
if (suffixDoc) {
suffixMarkdown = serializer(suffixDoc)
}
}
if (!suffixMarkdown) {
suffixMarkdown = view.state.doc.textBetween(to, view.state.doc.content.size, '\n', '\n')
}
} catch (e) {
console.error('[Debug] Serializer error:', e)
prefixMarkdown = view.state.doc.textBetween(0, from, '\n', '\n')
suffixMarkdown = view.state.doc.textBetween(to, view.state.doc.content.size, '\n', '\n')
}
console.log('[Debug] ===== Document State =====')
console.log('[Debug] PREFIX:', prefixMarkdown)
console.log('[Debug] SUFFIX:', suffixMarkdown)
console.log('[Debug] FULL MARKDOWN:', markdown)
console.log('[Debug] ==========================')
})
} catch (e) {
console.error('[Debug] Log failed:', e)
}
}
const clearCurrentSuggestion = (view) => {
const state = COPILOT_PLUGIN_KEY.getState(view.state)
if (state?.suggestion && state.from < state.to) {
@@ -261,6 +313,9 @@ onMounted(async () => {
features: {
[Crepe.Feature.Latex]: true,
[Crepe.Feature.ImageBlock]: true,
[Crepe.Feature.Table]: true,
[Crepe.Feature.Diagram]: true,
[Crepe.Feature.ListCheck]: true,
},
featureConfigs: {
[Crepe.Feature.Latex]: {
@@ -308,6 +363,7 @@ onMounted(async () => {
refreshSizeAndLimit(ctx)
})
scheduleMarkdownSync()
debugLogTimer = setInterval(logDebugInfo, 20000)
if (DEBUG) console.log('[Debug] Crepe editor created with copilot plugin')
})
@@ -418,6 +474,10 @@ onUnmounted(() => {
clearTimeout(markdownSyncTimer)
markdownSyncTimer = null
}
if (debugLogTimer) {
clearInterval(debugLogTimer)
debugLogTimer = null
}
for (const url of Array.from(objectUrls)) {
revokeObjectUrl(url)

View File

@@ -1,14 +1,15 @@
import { Plugin, PluginKey, Selection } from '@milkdown/prose/state'
import { $prose, $ctx, $markSchema } from '@milkdown/kit/utils'
import { parserCtx } from '@milkdown/kit/core'
import { parserCtx, serializerCtx } from '@milkdown/kit/core'
import { Node as ProseNode, Fragment } from '@milkdown/prose/model'
import type { Ctx } from '@milkdown/kit/core'
import type { EditorView } from '@milkdown/prose/view'
import { getOcrCache, checkSizeLimit as checkOcrSizeLimit, OCR_SIZE_LIMIT } from '../utils/ocrCache'
import { getOcrCache, checkSizeLimit as checkOcrSizeLimit, OCR_SIZE_LIMIT, extractTextFromOCR } from '../utils/ocrCache'
const COPILOT_PLUGIN_KEY = new PluginKey('milkdown-copilot')
const DEBOUNCE_MS = 1000
const SIZE_LIMIT = OCR_SIZE_LIMIT
const DEBUG = true
const IMAGE_NODE_TYPES = new Set(['image', 'image-block', 'imageBlock'])
interface CopilotState {
@@ -279,27 +280,57 @@ function extractImageFilenames(doc: ProseNode): string[] {
return filenames
}
function buildPrefixWithOCR(prefix: string, doc: ProseNode, cursorPos: number): string {
const ocrEntries: string[] = []
function buildPrefixWithOCRFromMarkdown(
doc: ProseNode,
cursorPos: number,
prefixMarkdown: string,
serializer: any,
schema: any
): string {
const imageNodes: Array<{pos: number, src: string, label: string}> = []
doc.descendants((node: ProseNode, pos) => {
if (pos >= cursorPos) return false
if (!isImageNodeWithSrc(node)) return true
if (!isImageNodeWithSrc(node)) return pos < cursorPos
const src = getImageSrc(node)
const ocrText = getOcrCache(src)
if (!ocrText) return true
const label = getImageLabel(node)
const safeOcrText = ocrText.replace(/<!--|-->/g, '').trim()
if (!safeOcrText) return true
ocrEntries.push(`image(${label}): ${safeOcrText}`)
return true
imageNodes.push({ pos, src, label })
return pos < cursorPos
})
if (!ocrEntries.length) return prefix
return `${prefix}\n\n<!--OCR:\n${ocrEntries.join('\n')}\n-->`
if (imageNodes.length === 0) {
return prefixMarkdown
}
imageNodes.sort((a, b) => a.pos - b.pos)
const parts: string[] = []
let lastPos = 0
for (const img of imageNodes) {
if (img.pos > lastPos) {
const slice = doc.slice(lastPos, img.pos)
const sliceDoc = schema.topNodeType.createAndFill(undefined, slice.content)
parts.push(sliceDoc ? serializer(sliceDoc) : doc.textBetween(lastPos, img.pos))
}
const imageSyntax = `![${img.label}](${img.src})`
parts.push(imageSyntax)
const ocrText = getOcrCache(img.src)
if (ocrText) {
const textOnly = extractTextFromOCR(ocrText, 100)
if (textOnly) {
parts.push(` <OCR:${textOnly}>`)
}
}
lastPos = img.pos + 1
}
if (lastPos < cursorPos) {
const slice = doc.slice(lastPos, cursorPos)
const sliceDoc = schema.topNodeType.createAndFill(undefined, slice.content)
parts.push(sliceDoc ? serializer(sliceDoc) : doc.textBetween(lastPos, cursorPos))
}
return parts.join('')
}
function doFetchSuggestion(view: EditorView, runtime: CopilotRuntime, pos: number, prefix: string, suffix: string) {
@@ -339,6 +370,7 @@ function scheduleFetch(view: EditorView, runtime: CopilotRuntime, pos: number, p
if (!runtime.enabled) return
const doc = view.state.doc
const schema = view.state.schema
const imageFilenames = extractImageFilenames(doc)
const { overLimit } = checkOcrSizeLimit(doc.content.size, imageFilenames)
@@ -347,7 +379,61 @@ function scheduleFetch(view: EditorView, runtime: CopilotRuntime, pos: number, p
return
}
const prefixWithOCR = buildPrefixWithOCR(prefix, doc, pos)
const serializer = runtime.ctx.get(serializerCtx)
// 尝试使用 serializer 将文档切片转换为 Markdown
let prefixMarkdown = ''
let suffixMarkdown = ''
try {
// 方法1: 使用 slice 创建文档节点
const prefixSlice = doc.slice(0, pos)
if (prefixSlice.content.size > 0) {
const prefixDoc = schema.topNodeType.createAndFill(undefined, prefixSlice.content)
if (prefixDoc) {
prefixMarkdown = serializer(prefixDoc)
}
}
if (!prefixMarkdown) {
// 方法2: 直接序列化整个文档然后截取
const fullMarkdown = serializer(doc)
const fullDoc = view.state.doc
const totalLen = fullDoc.content.size
if (totalLen > 0 && pos < totalLen) {
// 简单估算位置
prefixMarkdown = fullMarkdown.substring(0, Math.floor(fullMarkdown.length * pos / totalLen))
}
}
if (!prefixMarkdown) {
// 回退到 textBetween 但添加换行符
prefixMarkdown = doc.textBetween(0, pos, '\n', '\n')
}
// Suffix
const suffixSlice = doc.slice(pos)
if (suffixSlice.content.size > 0) {
const suffixDoc = schema.topNodeType.createAndFill(undefined, suffixSlice.content)
if (suffixDoc) {
suffixMarkdown = serializer(suffixDoc)
}
}
if (!suffixMarkdown) {
suffixMarkdown = doc.textBetween(pos, doc.content.size, '\n', '\n')
}
} catch (e) {
console.error('[Copilot] Serializer error:', e)
prefixMarkdown = doc.textBetween(0, pos, '\n', '\n')
suffixMarkdown = doc.textBetween(pos, doc.content.size, '\n', '\n')
}
const prefixWithOCR = buildPrefixWithOCRFromMarkdown(doc, pos, prefixMarkdown, serializer, schema)
if (DEBUG) {
console.log('[Copilot] ===== LLM Request =====')
console.log('[Copilot] PREFIX:', prefixWithOCR)
console.log('[Copilot] SUFFIX:', suffixMarkdown)
console.log('[Copilot] ======================')
}
if (runtime.debounceTimer) {
clearTimeout(runtime.debounceTimer)
@@ -357,7 +443,7 @@ function scheduleFetch(view: EditorView, runtime: CopilotRuntime, pos: number, p
const debounceMs = runtime.ctx.get(copilotConfigCtx.key).debounceMs ?? DEBOUNCE_MS
runtime.debounceTimer = setTimeout(() => {
runtime.debounceTimer = null
doFetchSuggestion(view, runtime, pos, prefixWithOCR, suffix)
doFetchSuggestion(view, runtime, pos, prefixWithOCR, suffixMarkdown)
}, debounceMs)
}

View File

@@ -43,3 +43,11 @@ export function checkSizeLimit(docTextSize, imageFilenames) {
}
export const OCR_SIZE_LIMIT = SIZE_LIMIT
export function extractTextFromOCR(ocrText, maxLen = 100) {
if (!ocrText) return ''
const match = ocrText.match(/TEXT:\s*([\s\S]*?)(?:KEY_DETAILS|LANGUAGE|SUMMARY|$)/i)
let text = match ? match[1].trim() : ocrText.trim()
if (text.toLowerCase() === '(none)') return ''
return text.length > maxLen ? text.substring(0, maxLen) + '...' : text
}