feat(copilot): enhance OCR handling with inline tags and document serializer
- Replace HTML comment OCR metadata with inline `<OCR:...>` tags - Implement serializer-based markdown conversion for prefix/suffix content - Add extractTextFromOCR utility function for text extraction - Enable Table, Diagram, and ListCheck features in MilkdownEditor - Add periodic debug logging for document state analysis
This commit is contained in:
@@ -38,10 +38,10 @@ Your job:
|
||||
- Avoid overly short outputs with little information value.
|
||||
|
||||
Important context:
|
||||
- PREFIX may contain hidden OCR metadata in HTML comments such as <!--OCR:...-->.
|
||||
- These comments are non-visible context only.
|
||||
- Never copy, rewrite, or emit HTML comments in output.
|
||||
- Never output <!-- or -->.
|
||||
- PREFIX may contain OCR metadata inline after images, e.g.  <OCR:description>.
|
||||
- The <OCR:...> is hidden context describing image content.
|
||||
- Never copy, rewrite, or emit OCR tags in output.
|
||||
- Never output <OCR: or >.
|
||||
|
||||
Hard rules:
|
||||
1. Seamless join:
|
||||
|
||||
3519
package-lock.json
generated
3519
package-lock.json
generated
File diff suppressed because it is too large
Load Diff
@@ -99,7 +99,7 @@
|
||||
import { onMounted, onUnmounted, ref, computed } from 'vue'
|
||||
import { replaceAll } from '@milkdown/kit/utils'
|
||||
import { Crepe } from '@milkdown/crepe'
|
||||
import { editorViewCtx } from '@milkdown/kit/core'
|
||||
import { editorViewCtx, serializerCtx } from '@milkdown/kit/core'
|
||||
import { copilotPlugin, copilotConfigCtx, copilotGhostMark, setCopilotEnabled, COPILOT_PLUGIN_KEY, SIZE_LIMIT, checkSizeLimit } from '../plugins/copilotPlugin'
|
||||
import { fetchSuggestion } from '../utils/api.js'
|
||||
import { DEBUG, OCR_URL } from '../utils/config.js'
|
||||
@@ -124,6 +124,7 @@ const aiButtonLabel = computed(() => {
|
||||
|
||||
let crepe = null
|
||||
let markdownSyncTimer = null
|
||||
let debugLogTimer = null
|
||||
const objectUrls = new Set()
|
||||
const IMAGE_NODE_TYPES = new Set(['image', 'image-block', 'imageBlock'])
|
||||
|
||||
@@ -199,6 +200,57 @@ const scheduleMarkdownSync = () => {
|
||||
}, 120)
|
||||
}
|
||||
|
||||
const logDebugInfo = async () => {
|
||||
if (!crepe) return
|
||||
try {
|
||||
const markdown = await crepe.getMarkdown()
|
||||
crepe.editor.action((ctx) => {
|
||||
const view = ctx.get(editorViewCtx)
|
||||
const schema = view.state.schema
|
||||
const { from, to } = view.state.selection
|
||||
const serializer = ctx.get(serializerCtx)
|
||||
let prefixMarkdown = '', suffixMarkdown = ''
|
||||
|
||||
try {
|
||||
// Prefix: 使用 slice 创建文档节点
|
||||
const prefixSlice = view.state.doc.slice(0, from)
|
||||
if (prefixSlice.content.size > 0) {
|
||||
const prefixDoc = schema.topNodeType.createAndFill(undefined, prefixSlice.content)
|
||||
if (prefixDoc) {
|
||||
prefixMarkdown = serializer(prefixDoc)
|
||||
}
|
||||
}
|
||||
if (!prefixMarkdown) {
|
||||
prefixMarkdown = view.state.doc.textBetween(0, from, '\n', '\n')
|
||||
}
|
||||
|
||||
// Suffix
|
||||
const suffixSlice = view.state.doc.slice(to)
|
||||
if (suffixSlice.content.size > 0) {
|
||||
const suffixDoc = schema.topNodeType.createAndFill(undefined, suffixSlice.content)
|
||||
if (suffixDoc) {
|
||||
suffixMarkdown = serializer(suffixDoc)
|
||||
}
|
||||
}
|
||||
if (!suffixMarkdown) {
|
||||
suffixMarkdown = view.state.doc.textBetween(to, view.state.doc.content.size, '\n', '\n')
|
||||
}
|
||||
} catch (e) {
|
||||
console.error('[Debug] Serializer error:', e)
|
||||
prefixMarkdown = view.state.doc.textBetween(0, from, '\n', '\n')
|
||||
suffixMarkdown = view.state.doc.textBetween(to, view.state.doc.content.size, '\n', '\n')
|
||||
}
|
||||
console.log('[Debug] ===== Document State =====')
|
||||
console.log('[Debug] PREFIX:', prefixMarkdown)
|
||||
console.log('[Debug] SUFFIX:', suffixMarkdown)
|
||||
console.log('[Debug] FULL MARKDOWN:', markdown)
|
||||
console.log('[Debug] ==========================')
|
||||
})
|
||||
} catch (e) {
|
||||
console.error('[Debug] Log failed:', e)
|
||||
}
|
||||
}
|
||||
|
||||
const clearCurrentSuggestion = (view) => {
|
||||
const state = COPILOT_PLUGIN_KEY.getState(view.state)
|
||||
if (state?.suggestion && state.from < state.to) {
|
||||
@@ -261,6 +313,9 @@ onMounted(async () => {
|
||||
features: {
|
||||
[Crepe.Feature.Latex]: true,
|
||||
[Crepe.Feature.ImageBlock]: true,
|
||||
[Crepe.Feature.Table]: true,
|
||||
[Crepe.Feature.Diagram]: true,
|
||||
[Crepe.Feature.ListCheck]: true,
|
||||
},
|
||||
featureConfigs: {
|
||||
[Crepe.Feature.Latex]: {
|
||||
@@ -308,6 +363,7 @@ onMounted(async () => {
|
||||
refreshSizeAndLimit(ctx)
|
||||
})
|
||||
scheduleMarkdownSync()
|
||||
debugLogTimer = setInterval(logDebugInfo, 20000)
|
||||
|
||||
if (DEBUG) console.log('[Debug] Crepe editor created with copilot plugin')
|
||||
})
|
||||
@@ -418,6 +474,10 @@ onUnmounted(() => {
|
||||
clearTimeout(markdownSyncTimer)
|
||||
markdownSyncTimer = null
|
||||
}
|
||||
if (debugLogTimer) {
|
||||
clearInterval(debugLogTimer)
|
||||
debugLogTimer = null
|
||||
}
|
||||
|
||||
for (const url of Array.from(objectUrls)) {
|
||||
revokeObjectUrl(url)
|
||||
|
||||
@@ -1,14 +1,15 @@
|
||||
import { Plugin, PluginKey, Selection } from '@milkdown/prose/state'
|
||||
import { $prose, $ctx, $markSchema } from '@milkdown/kit/utils'
|
||||
import { parserCtx } from '@milkdown/kit/core'
|
||||
import { parserCtx, serializerCtx } from '@milkdown/kit/core'
|
||||
import { Node as ProseNode, Fragment } from '@milkdown/prose/model'
|
||||
import type { Ctx } from '@milkdown/kit/core'
|
||||
import type { EditorView } from '@milkdown/prose/view'
|
||||
import { getOcrCache, checkSizeLimit as checkOcrSizeLimit, OCR_SIZE_LIMIT } from '../utils/ocrCache'
|
||||
import { getOcrCache, checkSizeLimit as checkOcrSizeLimit, OCR_SIZE_LIMIT, extractTextFromOCR } from '../utils/ocrCache'
|
||||
|
||||
const COPILOT_PLUGIN_KEY = new PluginKey('milkdown-copilot')
|
||||
const DEBOUNCE_MS = 1000
|
||||
const SIZE_LIMIT = OCR_SIZE_LIMIT
|
||||
const DEBUG = true
|
||||
const IMAGE_NODE_TYPES = new Set(['image', 'image-block', 'imageBlock'])
|
||||
|
||||
interface CopilotState {
|
||||
@@ -279,27 +280,57 @@ function extractImageFilenames(doc: ProseNode): string[] {
|
||||
return filenames
|
||||
}
|
||||
|
||||
function buildPrefixWithOCR(prefix: string, doc: ProseNode, cursorPos: number): string {
|
||||
const ocrEntries: string[] = []
|
||||
function buildPrefixWithOCRFromMarkdown(
|
||||
doc: ProseNode,
|
||||
cursorPos: number,
|
||||
prefixMarkdown: string,
|
||||
serializer: any,
|
||||
schema: any
|
||||
): string {
|
||||
const imageNodes: Array<{pos: number, src: string, label: string}> = []
|
||||
|
||||
doc.descendants((node: ProseNode, pos) => {
|
||||
if (pos >= cursorPos) return false
|
||||
if (!isImageNodeWithSrc(node)) return true
|
||||
|
||||
if (!isImageNodeWithSrc(node)) return pos < cursorPos
|
||||
const src = getImageSrc(node)
|
||||
const ocrText = getOcrCache(src)
|
||||
if (!ocrText) return true
|
||||
|
||||
const label = getImageLabel(node)
|
||||
const safeOcrText = ocrText.replace(/<!--|-->/g, '').trim()
|
||||
if (!safeOcrText) return true
|
||||
|
||||
ocrEntries.push(`image(${label}): ${safeOcrText}`)
|
||||
return true
|
||||
imageNodes.push({ pos, src, label })
|
||||
return pos < cursorPos
|
||||
})
|
||||
|
||||
if (!ocrEntries.length) return prefix
|
||||
return `${prefix}\n\n<!--OCR:\n${ocrEntries.join('\n')}\n-->`
|
||||
if (imageNodes.length === 0) {
|
||||
return prefixMarkdown
|
||||
}
|
||||
|
||||
imageNodes.sort((a, b) => a.pos - b.pos)
|
||||
|
||||
const parts: string[] = []
|
||||
let lastPos = 0
|
||||
|
||||
for (const img of imageNodes) {
|
||||
if (img.pos > lastPos) {
|
||||
const slice = doc.slice(lastPos, img.pos)
|
||||
const sliceDoc = schema.topNodeType.createAndFill(undefined, slice.content)
|
||||
parts.push(sliceDoc ? serializer(sliceDoc) : doc.textBetween(lastPos, img.pos))
|
||||
}
|
||||
const imageSyntax = ``
|
||||
parts.push(imageSyntax)
|
||||
const ocrText = getOcrCache(img.src)
|
||||
if (ocrText) {
|
||||
const textOnly = extractTextFromOCR(ocrText, 100)
|
||||
if (textOnly) {
|
||||
parts.push(` <OCR:${textOnly}>`)
|
||||
}
|
||||
}
|
||||
lastPos = img.pos + 1
|
||||
}
|
||||
|
||||
if (lastPos < cursorPos) {
|
||||
const slice = doc.slice(lastPos, cursorPos)
|
||||
const sliceDoc = schema.topNodeType.createAndFill(undefined, slice.content)
|
||||
parts.push(sliceDoc ? serializer(sliceDoc) : doc.textBetween(lastPos, cursorPos))
|
||||
}
|
||||
|
||||
return parts.join('')
|
||||
}
|
||||
|
||||
function doFetchSuggestion(view: EditorView, runtime: CopilotRuntime, pos: number, prefix: string, suffix: string) {
|
||||
@@ -339,6 +370,7 @@ function scheduleFetch(view: EditorView, runtime: CopilotRuntime, pos: number, p
|
||||
if (!runtime.enabled) return
|
||||
|
||||
const doc = view.state.doc
|
||||
const schema = view.state.schema
|
||||
const imageFilenames = extractImageFilenames(doc)
|
||||
const { overLimit } = checkOcrSizeLimit(doc.content.size, imageFilenames)
|
||||
|
||||
@@ -347,7 +379,61 @@ function scheduleFetch(view: EditorView, runtime: CopilotRuntime, pos: number, p
|
||||
return
|
||||
}
|
||||
|
||||
const prefixWithOCR = buildPrefixWithOCR(prefix, doc, pos)
|
||||
const serializer = runtime.ctx.get(serializerCtx)
|
||||
|
||||
// 尝试使用 serializer 将文档切片转换为 Markdown
|
||||
let prefixMarkdown = ''
|
||||
let suffixMarkdown = ''
|
||||
|
||||
try {
|
||||
// 方法1: 使用 slice 创建文档节点
|
||||
const prefixSlice = doc.slice(0, pos)
|
||||
if (prefixSlice.content.size > 0) {
|
||||
const prefixDoc = schema.topNodeType.createAndFill(undefined, prefixSlice.content)
|
||||
if (prefixDoc) {
|
||||
prefixMarkdown = serializer(prefixDoc)
|
||||
}
|
||||
}
|
||||
if (!prefixMarkdown) {
|
||||
// 方法2: 直接序列化整个文档然后截取
|
||||
const fullMarkdown = serializer(doc)
|
||||
const fullDoc = view.state.doc
|
||||
const totalLen = fullDoc.content.size
|
||||
if (totalLen > 0 && pos < totalLen) {
|
||||
// 简单估算位置
|
||||
prefixMarkdown = fullMarkdown.substring(0, Math.floor(fullMarkdown.length * pos / totalLen))
|
||||
}
|
||||
}
|
||||
if (!prefixMarkdown) {
|
||||
// 回退到 textBetween 但添加换行符
|
||||
prefixMarkdown = doc.textBetween(0, pos, '\n', '\n')
|
||||
}
|
||||
|
||||
// Suffix
|
||||
const suffixSlice = doc.slice(pos)
|
||||
if (suffixSlice.content.size > 0) {
|
||||
const suffixDoc = schema.topNodeType.createAndFill(undefined, suffixSlice.content)
|
||||
if (suffixDoc) {
|
||||
suffixMarkdown = serializer(suffixDoc)
|
||||
}
|
||||
}
|
||||
if (!suffixMarkdown) {
|
||||
suffixMarkdown = doc.textBetween(pos, doc.content.size, '\n', '\n')
|
||||
}
|
||||
} catch (e) {
|
||||
console.error('[Copilot] Serializer error:', e)
|
||||
prefixMarkdown = doc.textBetween(0, pos, '\n', '\n')
|
||||
suffixMarkdown = doc.textBetween(pos, doc.content.size, '\n', '\n')
|
||||
}
|
||||
|
||||
const prefixWithOCR = buildPrefixWithOCRFromMarkdown(doc, pos, prefixMarkdown, serializer, schema)
|
||||
|
||||
if (DEBUG) {
|
||||
console.log('[Copilot] ===== LLM Request =====')
|
||||
console.log('[Copilot] PREFIX:', prefixWithOCR)
|
||||
console.log('[Copilot] SUFFIX:', suffixMarkdown)
|
||||
console.log('[Copilot] ======================')
|
||||
}
|
||||
|
||||
if (runtime.debounceTimer) {
|
||||
clearTimeout(runtime.debounceTimer)
|
||||
@@ -357,7 +443,7 @@ function scheduleFetch(view: EditorView, runtime: CopilotRuntime, pos: number, p
|
||||
const debounceMs = runtime.ctx.get(copilotConfigCtx.key).debounceMs ?? DEBOUNCE_MS
|
||||
runtime.debounceTimer = setTimeout(() => {
|
||||
runtime.debounceTimer = null
|
||||
doFetchSuggestion(view, runtime, pos, prefixWithOCR, suffix)
|
||||
doFetchSuggestion(view, runtime, pos, prefixWithOCR, suffixMarkdown)
|
||||
}, debounceMs)
|
||||
}
|
||||
|
||||
|
||||
@@ -43,3 +43,11 @@ export function checkSizeLimit(docTextSize, imageFilenames) {
|
||||
}
|
||||
|
||||
export const OCR_SIZE_LIMIT = SIZE_LIMIT
|
||||
|
||||
export function extractTextFromOCR(ocrText, maxLen = 100) {
|
||||
if (!ocrText) return ''
|
||||
const match = ocrText.match(/TEXT:\s*([\s\S]*?)(?:KEY_DETAILS|LANGUAGE|SUMMARY|$)/i)
|
||||
let text = match ? match[1].trim() : ocrText.trim()
|
||||
if (text.toLowerCase() === '(none)') return ''
|
||||
return text.length > maxLen ? text.substring(0, maxLen) + '...' : text
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user