diff --git a/README.md b/README.md index 9ace79e..b3e8097 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # LLM in Text - 智能写作助手 -基于 Vue3 和 FastAPI 的智能 Markdown 编辑器,集成大语言模型(LLM)实时补全建议功能。 +基于 Vue3 和 FastAPI 的智能 Markdown 编辑器,集成大语言模型(LLM)实时补全建议功能,提供类似 GitHub Copilot 的 Ghost Text 体验。 ## 功能特性 @@ -26,20 +26,24 @@ ## 技术架构 ```mermaid -flowchart LR - subgraph Frontend - A[Vue3] --> B[Milkdown Editor] - B --> C[ProseMirror Plugin] - C --> D[Ghost Text Mark] +flowchart TB + subgraph Frontend["前端 (Vue3 + Vite)"] + A[App.vue] --> B[MilkdownEditor.vue] + B --> C[Crepe Editor] + C --> D[ProseMirror] + D --> E[copilotPlugin.ts] + E --> F[copilotGhostMark] + E --> G[api.js] end - subgraph Backend - E[FastAPI] --> F[LLM API] - F --> G[Stream Response] + subgraph Backend["后端 (FastAPI + Python)"] + H[main.py
FastAPI Server] --> I[prompt.py
Prompt 构建] + H --> J[llm.py
Ollama 调用] + J --> K[Ollama API] end - D -->|SSE| E - G -->|text| D + G -->|POST /v1/completions
SSE 流式响应| H + K -->|LLM 响应| J ``` ## 项目结构 @@ -50,7 +54,9 @@ llm-in-text/ │ ├── components/ │ │ └── MilkdownEditor.vue # 主编辑器组件 │ ├── plugins/ -│ │ └── copilotPlugin.ts # ProseMirror AI 补全插件 +│ │ ├── copilotPlugin.ts # ProseMirror AI 补全插件 +│ │ ├── types.ts # 类型定义 +│ │ └── index.ts # 插件导出 │ ├── utils/ │ │ ├── api.js # API 调用封装 │ │ └── config.js # 配置文件 @@ -69,7 +75,7 @@ llm-in-text/ ### 环境要求 - Node.js 18+ - Python 3.8+ -- OpenAI API Key 或 Ollama 服务 +- Ollama 服务(或其他兼容 OpenAI API 的服务) ### 安装 @@ -87,9 +93,8 @@ pip install -r requirements.txt 在 `backend/.env` 中配置: ```env -OPENAI_API_KEY=your_api_key -OLLAMA_BASE_URL=http://localhost:11434/v1/ -OLLAMA_MODEL=gpt-4 +OLLAMA_MODEL=gpt-oss:20b +OLLAMA_HOST=http://localhost:11434 ``` ### 启动 @@ -129,7 +134,34 @@ data: {"done": true} ## 核心实现 -### ProseMirror Mark 系统 +### 后端设计 + +#### main.py - FastAPI 服务器 +- 定义 `/v1/completions` 端点 +- 使用 `StreamingResponse` 返回 SSE 流式响应 +- CORS 配置允许跨域请求 + +#### llm.py - LLM 调用封装 +- 使用 `ollama.AsyncClient` 异步调用 +- 支持 `think='high'` 思考模式 +- 返回 `content` 和 `thinking` 字段 + +#### prompt.py - Prompt 工程 +精心设计的 Prompt 模板,包含 7 条核心规则: + +| 规则 | 说明 | +|------|------| +| RULE #1 | 无缝连接 - 不重复 suffix 内容,避免"复读机"错误 | +| RULE #2 | 空白处理 - 避免双空格,正确对接标点 | +| RULE #3 | 缩进对齐 - 匹配当前缩进级别和类型 | +| RULE #4 | 列表维护 - 识别并继续任务列表、有序列表、无序列表 | +| RULE #5 | 语法闭合 - 自动闭合未完成的 Markdown 语法 | +| RULE #6 | 输出格式 - 仅输出续写文本,无解释无注释 | +| RULE #7 | 必须输出 - 始终提供有用的续写建议 | + +### 前端设计 + +#### ProseMirror Mark 系统 使用 ProseMirror 的 Mark 系统实现灰色建议文本: @@ -151,12 +183,80 @@ export const copilotGhostMark = $markSchema('copilot_ghost', () => ({ } ``` -### 交互处理 +#### copilotPlugin 核心逻辑 -- 点击灰色文本区域:接受建议(移除 mark,保留文本) -- 点击其他区域:拒绝建议(删除灰色文本) -- Tab 键:接受建议 -- Esc 键:拒绝建议 +```mermaid +flowchart LR + A[用户输入] --> B{文档变化?} + B -->|是| C[清除旧建议] + C --> D[防抖 500ms] + D --> E[发送 API 请求] + E --> F[收到建议] + F --> G[插入 Ghost Text] + + G --> H{用户操作} + H -->|Tab| I[接受建议
移除 mark] + H -->|Esc| J[拒绝建议
删除文本] + H -->|点击 Ghost| I + H -->|继续输入| J +``` + +#### 关键函数 + +| 函数 | 作用 | +|------|------| +| `scheduleFetch` | 防抖调度 API 请求 | +| `insertGhostText` | 插入带 mark 的建议文本 | +| `acceptSuggestion` | Tab 接受建议 | +| `rejectSuggestion` | Esc 拒绝建议 | +| `clearGhostText` | 清除当前建议 | + +### 数据流 + +```mermaid +sequenceDiagram + participant U as 用户 + participant E as Editor (ProseMirror) + participant P as copilotPlugin + participant A as api.js + participant B as Backend + participant L as LLM + + U->>E: 输入文本 + E->>P: view.update() + P->>P: 清除旧建议 + P->>P: 防抖 500ms + P->>A: fetchSuggestion(prefix, suffix) + A->>B: POST /v1/completions + B->>B: build_prompt() + B->>L: ollama.chat() + L-->>B: {content, thinking} + B-->>A: SSE stream + A-->>P: suggestion text + P->>E: insertGhostText() + E-->>U: 显示灰色建议 + + alt Tab 键 + U->>P: Tab + P->>E: acceptSuggestion() + E-->>U: 建议变为正常文本 + else Esc 键 + U->>P: Esc + P->>E: rejectSuggestion() + E-->>U: 建议消失 + else 继续输入 + U->>E: 输入其他字符 + E->>P: handleKeyDown() + P->>E: clearGhostText() + end +``` + +## 设计亮点 + +1. **前后端分离**:前端只负责渲染和数据回传,后端负责 LLM 调用、Prompt 构建和数据解析 +2. **低延迟优化**:防抖机制 (500ms) + SSE 流式响应 + AbortController 取消过期请求 +3. **ProseMirror Mark 系统**:与编辑器状态完美集成,支持 Undo/Redo +4. **多种交互方式**:Tab/Esc/点击/输入,用户体验友好 ## 许可证 diff --git a/backend/.env b/backend/.env index 1431614..37307ec 100644 --- a/backend/.env +++ b/backend/.env @@ -1,3 +1,4 @@ OPENAI_API_KEY=ollama OLLAMA_HOST=http://192.168.0.120:11434 OLLAMA_MODEL=gpt-oss:20b +VLM_MODEL=qwen3-vl:30b diff --git a/backend/.env.example b/backend/.env.example index 7877999..c74dfba 100644 --- a/backend/.env.example +++ b/backend/.env.example @@ -1,3 +1,4 @@ OPENAI_API_KEY=ollama OLLAMA_BASE_URL=http://192.168.0.120:11434/v1/ OLLAMA_MODEL=gpt-oss:120b +VLM_MODEL=qwen3-vl:30b diff --git a/backend/__pycache__/llm.cpython-313.pyc b/backend/__pycache__/llm.cpython-313.pyc index e6c7c38..bd49ef5 100644 Binary files a/backend/__pycache__/llm.cpython-313.pyc and b/backend/__pycache__/llm.cpython-313.pyc differ diff --git a/backend/__pycache__/main.cpython-313.pyc b/backend/__pycache__/main.cpython-313.pyc new file mode 100644 index 0000000..5954b67 Binary files /dev/null and b/backend/__pycache__/main.cpython-313.pyc differ diff --git a/backend/__pycache__/prompt.cpython-313.pyc b/backend/__pycache__/prompt.cpython-313.pyc index 4b8e849..430125b 100644 Binary files a/backend/__pycache__/prompt.cpython-313.pyc and b/backend/__pycache__/prompt.cpython-313.pyc differ diff --git a/backend/llm.py b/backend/llm.py index 0385aa9..2a4f236 100644 --- a/backend/llm.py +++ b/backend/llm.py @@ -1,4 +1,6 @@ import os +import time +import logging import ollama from dotenv import load_dotenv @@ -6,27 +8,40 @@ load_dotenv() OLLAMA_MODEL = os.getenv('OLLAMA_MODEL', 'gpt-oss:20b') OLLAMA_HOST = os.getenv('OLLAMA_HOST', 'http://192.168.0.120:11434') +VLM_MODEL = os.getenv('VLM_MODEL', 'qwen3-vl:30b') client = ollama.AsyncClient(host=OLLAMA_HOST) +logger = logging.getLogger("llm") -async def call_ollama(prompt: str) -> dict: - """ - 调用 Ollama API 并返回 content 和 thinking。 - """ - response = await client.chat( - model=OLLAMA_MODEL, - messages=[{'role': 'user', 'content': prompt}], - stream=False, - options={ - 'temperature': 0.7, - 'repeat_penalty': 1.1, - }, - think='high' - ) - +VLM_OCR_CONTEXT_PROMPT = """You are an OCR and visual-context extractor for markdown writing assistance. + +Your output will be embedded inside an HTML comment as hidden context for a text-completion model. + +Requirements: +- Keep output compact: maximum 120 words. +- Use plain text only (no markdown code fences). +- Never output . +- Do not invent unreadable text; mark uncertain characters with ?. +- Preserve original script for recognized text (do not forcibly translate). + +Return exactly this format: + +TEXT: + + +KEY_DETAILS: +- <3-5 short factual bullets about relevant objects/layout> + +LANGUAGE: + + +SUMMARY: +""" + +def _extract_message(response) -> tuple[str, str]: content = "" thinking = "" - + if hasattr(response, 'message') and response.message: content = response.message.content or "" thinking = getattr(response.message, 'thinking', '') or "" @@ -34,5 +49,92 @@ async def call_ollama(prompt: str) -> dict: msg = response.get('message', {}) content = msg.get('content', '') or "" thinking = msg.get('thinking', '') or "" - + + return content, thinking + + +async def call_ollama(prompt: str, *, tag: str = "default", temperature: float = 0.7) -> dict: + """ + 调用 Ollama API 并返回 content 和 thinking。 + """ + start = time.perf_counter() + logger.info( + "[LLM][%s] request model=%s host=%s prompt_chars=%d temp=%.2f", + tag, + OLLAMA_MODEL, + OLLAMA_HOST, + len(prompt), + temperature, + ) + + try: + response = await client.chat( + model=OLLAMA_MODEL, + messages=[{'role': 'user', 'content': prompt}], + stream=False, + options={ + 'temperature': temperature, + 'repeat_penalty': 1.1, + }, + ) + except Exception: + elapsed_ms = (time.perf_counter() - start) * 1000 + logger.exception("[LLM][%s] request failed after %.1fms", tag, elapsed_ms) + raise + + content, thinking = _extract_message(response) + elapsed_ms = (time.perf_counter() - start) * 1000 + logger.info( + "[LLM][%s] response in %.1fms response_type=%s content_chars=%d thinking_chars=%d", + tag, + elapsed_ms, + type(response).__name__, + len(content), + len(thinking), + ) + + if not content.strip(): + logger.warning("[LLM][%s] empty content returned by model", tag) + return {"content": content, "thinking": thinking} + +async def call_vlm_ocr(image_bytes: bytes, language: str = 'auto') -> str: + start = time.perf_counter() + logger.info( + "[VLM][ocr] request model=%s host=%s image_bytes=%d language=%s", + VLM_MODEL, + OLLAMA_HOST, + len(image_bytes), + language, + ) + + try: + response = await client.chat( + model=VLM_MODEL, + messages=[{ + 'role': 'user', + 'content': VLM_OCR_CONTEXT_PROMPT, + 'images': [image_bytes] + }], + stream=False, + options={'temperature': 0.3} + ) + except Exception: + elapsed_ms = (time.perf_counter() - start) * 1000 + logger.exception("[VLM][ocr] request failed after %.1fms", elapsed_ms) + raise + + content, thinking = _extract_message(response) + elapsed_ms = (time.perf_counter() - start) * 1000 + logger.info( + "[VLM][ocr] response in %.1fms response_type=%s content_chars=%d thinking_chars=%d", + elapsed_ms, + type(response).__name__, + len(content), + len(thinking), + ) + + if not content.strip(): + logger.warning("[VLM][ocr] empty content returned by model") + + return content diff --git a/backend/main.py b/backend/main.py index 018c4d4..72b9ba4 100644 --- a/backend/main.py +++ b/backend/main.py @@ -3,9 +3,18 @@ from fastapi.middleware.cors import CORSMiddleware from fastapi.responses import StreamingResponse, JSONResponse from pydantic import BaseModel import json +import base64 +import uuid +import logging from prompt import build_prompt -from llm import call_ollama +from llm import call_ollama, call_vlm_ocr + +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s %(levelname)s %(name)s - %(message)s", +) +logger = logging.getLogger("api") app = FastAPI() @@ -22,24 +31,100 @@ class CompletionRequest(BaseModel): suffix: str languageId: str = 'markdown' +class OCRRequest(BaseModel): + image: str + filename: str = "image.jpg" + language: str = 'auto' + + +def _preview(text: str, limit: int = 80) -> str: + value = (text or "").replace("\n", "\\n") + if len(value) <= limit: + return value + return value[:limit] + "..." + + +def _build_force_non_empty_prompt(base_prompt: str) -> str: + return ( + base_prompt + + "\n\nStrict override for this request:\n" + + "- Output must be non-empty.\n" + + "- If you would otherwise output empty, output a single space.\n" + + "- Keep it short and do not repeat SUFFIX.\n" + ) + + @app.post("/v1/completions") async def create_completion(request: CompletionRequest): + request_id = str(uuid.uuid4())[:8] try: - prompt = build_prompt(request.prefix, request.suffix) - result = await call_ollama(prompt) - - content = result["content"] - + logger.info( + "[%s] /v1/completions prefix_chars=%d suffix_chars=%d lang=%s prefix_tail='%s' suffix_head='%s'", + request_id, + len(request.prefix or ""), + len(request.suffix or ""), + request.languageId, + _preview((request.prefix or "")[-120:]), + _preview((request.suffix or "")[:120]), + ) + prompt = build_prompt(request.prefix, request.suffix, request.languageId) + result = await call_ollama(prompt, tag=f"{request_id}-primary", temperature=0.7) + + content = result["content"] or "" + source = "primary" + if not content.strip(): + logger.warning("[%s] primary returned empty content, starting retry", request_id) + retry_prompt = _build_force_non_empty_prompt(prompt) + retry_result = await call_ollama(retry_prompt, tag=f"{request_id}-retry1", temperature=0.4) + content = retry_result["content"] or "" + source = "retry1" + + if not content.strip(): + content = " " + source = "fallback-space" + logger.warning("[%s] retry still empty, forcing single-space fallback", request_id) + + logger.info( + "[%s] completion resolved source=%s content_chars=%d content_preview='%s'", + request_id, + source, + len(content), + _preview(content, 120), + ) + async def generate(): - if content: - yield f"data: {json.dumps({'content': content})}\n\n" + yield f"data: {json.dumps({'content': content})}\n\n" yield f"data: {json.dumps({'done': True})}\n\n" - + return StreamingResponse(generate(), media_type="text/event-stream") - + except Exception as e: - import traceback - traceback.print_exc() + logger.exception("[%s] /v1/completions failed: %s", request_id, e) + return JSONResponse(content={"error": str(e)}, status_code=500) + +@app.post("/v1/ocr") +async def ocr_image(request: OCRRequest): + request_id = str(uuid.uuid4())[:8] + try: + logger.info( + "[%s] /v1/ocr filename=%s language=%s image_base64_chars=%d", + request_id, + request.filename, + request.language, + len(request.image or ""), + ) + image_bytes = base64.b64decode(request.image) + logger.info("[%s] /v1/ocr decoded image_bytes=%d", request_id, len(image_bytes)) + result = await call_vlm_ocr(image_bytes, request.language) + logger.info( + "[%s] /v1/ocr success text_chars=%d text_preview='%s'", + request_id, + len(result or ""), + _preview(result or "", 120), + ) + return {"text": result, "filename": request.filename} + except Exception as e: + logger.exception("[%s] /v1/ocr failed: %s", request_id, e) return JSONResponse(content={"error": str(e)}, status_code=500) if __name__ == "__main__": diff --git a/backend/prompt.py b/backend/prompt.py index bcf02c7..5acbc5a 100644 --- a/backend/prompt.py +++ b/backend/prompt.py @@ -1,202 +1,84 @@ -import os from typing import Tuple -def build_prompt(prefix: str, suffix: str) -> str: +MAX_PREFIX_CHARS = 12000 +MAX_SUFFIX_CHARS = 4000 + + +def _sanitize_language_id(language_id: str) -> str: + if not language_id: + return "markdown" + allowed = [] + for ch in language_id.strip(): + if ch.isalnum() or ch in "-_+.": + allowed.append(ch) + value = "".join(allowed)[:32] + return value or "markdown" + + +def _prepare_context(prefix: str, suffix: str) -> Tuple[str, str]: """ - 优化后的提示词构建函数。 - 使用明确的分隔符区分指令部分和实际的 prefix/suffix 内容。 + Prepare prefix/suffix for model completion context. + Keep the historical one-char lookahead behavior to reduce boundary drift. """ - # 修正:把suffix的第一个字符移到prefix末尾(解决光标位置偏差) if suffix: - first_char = suffix[0] - prefix = prefix + first_char + prefix = prefix + suffix[0] suffix = suffix[1:] - - recent_prefix = prefix - recent_suffix = suffix + return prefix[-MAX_PREFIX_CHARS:], suffix[:MAX_SUFFIX_CHARS] - prompt = f"""You are an expert writing assistant integrated into a text editor. Your task is to complete the text at the cursor position. -━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ -RULES -━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +def build_prompt(prefix: str, suffix: str, language_id: str = "markdown") -> str: + safe_language_id = _sanitize_language_id(language_id) + recent_prefix, recent_suffix = _prepare_context(prefix, suffix) -RULE #1: SEAMLESS CONNECTION (MOST CRITICAL) + prompt = f"""You are an inline completion engine for a {safe_language_id} editor with ghost-text suggestions. -Your continuation MUST seamlessly bridge the prefix and suffix. This is the MOST IMPORTANT rule. +Your job: +- Return ONLY the text that should be inserted at the cursor between PREFIX and SUFFIX. +- Prefer a meaningful, non-empty insertion with moderate length. +- Avoid overly short outputs with little information value. -The "复读机" (Parrot) Error is when you repeat content that already exists in the suffix. This is the WORST mistake you can make. +Important context: +- PREFIX may contain hidden OCR metadata in HTML comments such as . +- These comments are non-visible context only. +- Never copy, rewrite, or emit HTML comments in output. +- Never output . -Requirements: -- Your output must connect prefix to suffix smoothly -- NEVER repeat content that already exists in the suffix -- If prefix already flows naturally into suffix, output NOTHING (empty string) -- The result should read as one coherent text, as if you never interrupted it +Hard rules: +1. Seamless join: + PREFIX + OUTPUT + SUFFIX must read naturally as one continuous document. +2. No suffix repetition: + Do NOT repeat text that already appears at the start of SUFFIX. +3. Balanced length: + Prefer concise but meaningful continuation, not ultra-short fragments. + Default target is 20-120 characters and 1-3 lines. + You may go shorter only when syntax requires it. +4. Avoid trivial output: + Do not output only punctuation or filler such as ".", ",", ";", ":". + Do not output just one token unless it is structurally necessary. +5. Preserve local style: + Match nearby language, tone, punctuation, spacing, and indentation. +6. Markdown awareness: + Continue active list/checkbox/ordered-list patterns when applicable. + Preserve indentation in nested list/code contexts. + Close obvious unclosed inline markdown markers only when needed to bridge. +7. Strict output format: + Output insertion text only. + No explanations, labels, quotes, or code fences. -RULE #2: WHITESPACE & PUNCTUATION +Decision policy: +- If PREFIX already connects naturally to SUFFIX, add a brief but useful continuation when possible. +- If uncertain, prefer a complete short phrase or sentence with clear meaning. -You must carefully check the LAST character of prefix and FIRST character of suffix to ensure perfect docking. - -Requirements: -- If prefix ends with space, do NOT start your output with space (prevents double spaces) -- If prefix does NOT end with space and suffix starts with a letter, you may need to add a space -- If suffix starts with punctuation, do NOT end your output with the same punctuation -- Check for existing spaces around operators before adding more - -RULE #3: INDENTATION ALIGNMENT - -You MUST match the indentation level of the current context. - -Requirements: -- Look at the line where cursor is positioned -- Count the leading spaces/tabs on that line -- Match that indentation for new lines -- Use the SAME type of indentation (spaces OR tabs) as the existing code -- For nested blocks, increase indentation appropriately -- For closing braces, match the opening brace's indentation - -RULE #4: LIST MAINTENANCE - -When the prefix ends with a list marker, you MUST recognize the pattern and continue it appropriately. - -Requirements: -- "- [ ] " indicates an unchecked task → continue with task description -- "- [x] " indicates a checked task → continue with completed task description -- "1. ", "2. ", etc. indicates ordered list → increment the number -- "* " or "- " indicates bullet list → continue with same marker style -- "> " indicates blockquote → continue quoted text -- Maintain the same list format and indentation level - -RULE #5: SYNTAX CLOSURE - -Before generating content, CHECK if there are unclosed syntax elements. If so, you MUST close them FIRST. - -Requirements: -- Scan prefix for opening markers: **, *, `, [, ![, ``` -- Check if each opening has a corresponding closing -- If unclosed, add the closing marker FIRST before continuing with content -- Markdown syntax pairs to check: - - Bold: ** must have closing ** - - Italic: * must have closing * - - Bold + Italic: *** must have closing *** - - Inline code: ` must have closing ` - - Code block: ``` must have closing ``` - - Link: [text must have closing ](url) - - Image: ![alt must have closing ](url) - -RULE #6: OUTPUT FORMAT - -Your output will be directly inserted into the document. Output ONLY the continuation text. - -Requirements: -- Output ONLY the text that should appear at the cursor position -- NO explanations, NO comments, NO meta-text -- NO code blocks wrapping your output -- NO phrases like "Here's the continuation:" or "I'll complete this for you:" -- Your output is inserted DIRECTLY into the user's document - -RULE #7: ALWAYS OUTPUT SOMETHING (MANDATORY) - -You MUST always output some content. Empty output is NOT allowed. - -Requirements: -- Even if the prefix seems complete, you should suggest a natural continuation -- If the prefix ends mid-sentence, complete the sentence -- If the prefix ends at a natural break point, suggest the next logical content -- Examples of valid continuations: - - Add the next word or phrase - - Complete an incomplete thought - - Add a relevant follow-up sentence - - Continue a list with the next item - - Add closing punctuation if missing -- NEVER output an empty string - always provide some useful continuation - -━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ -EXAMPLES -━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ - -EXAMPLE 1 - Seamless Connection: +Examples: The quick brown fox jumps over the lazy dog. -Output: "" (empty - nothing needed, prefix already connects to suffix) -Result: "The quick brown fox jumps over the lazy dog." +Output: "moved quietly and then " -EXAMPLE 2 - Seamless Connection with Space: -Hello -world! -Output: " " -Result: "Hello world!" - -EXAMPLE 3 - Whitespace Docking: -const a = -1; -Output: "1;" -Result: "const a = 1;" - -EXAMPLE 4 - Indentation Alignment: -function test() {{\\n if (true) {{\\n console.log('hi');\\n -\\n}} -Output: "}}\\n}}" -Result: " }}\\n}}" (correctly closes if with 4 spaces, then function) - -EXAMPLE 5 - Task List: -## TODO\\n- [ ] Buy groceries\\n- [ ] +## TODO\\n- [ ] Buy milk\\n- [ ] -Output: "Call mom" -Result: "## TODO\\n- [ ] Buy groceries\\n- [ ] Call mom" +Output: "Write release notes and share draft with team" -EXAMPLE 6 - Ordered List: -1. First item\\n2. Second item\\n - -Output: "3. Third item" -Result: "1. First item\\n2. Second item\\n3. Third item" - -EXAMPLE 7 - Bullet List: -* Apple\\n* Banana\\n* - -Output: "Cherry" -Result: "* Apple\\n* Banana\\n* Cherry" - -EXAMPLE 8 - Unclosed Bold: -This is **important - text continues here. -Output: "** " -Result: "This is **important** text continues here." - -EXAMPLE 9 - Unclosed Link: -Click [here for more - information. -Output: "](https://example.com)" -Result: "Click [here for more](https://example.com) information." - -EXAMPLE 10 - Unclosed Code Block: -```python\\ndef hello(): -\\nprint('done') -Output: "\\n print('hello')\\n```" -Result: Code block properly closed with ``` - -EXAMPLE 11 - Clean Output: -For any completion, output ONLY the continuation text: -Output: "Hello world!" -NOT: "Here's what comes next: Hello world!" -NOT: "```Hello world```" -NOT: "I'll complete this for you: Hello world!" - -━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ -FINAL CHECKLIST -━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ - -Before outputting, verify: -□ Does my output connect prefix and suffix WITHOUT repeating suffix content? -□ Are there no double spaces or missing spaces between prefix and suffix? -□ Does my indentation match the context? -□ If there's a list marker, did I continue the list pattern? -□ Did I close any unclosed Markdown syntax? -□ Is my output ONLY the continuation text, nothing else? - -━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ -NOW COMPLETE THE FOLLOWING TEXT -━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +Now produce the insertion. {recent_prefix} @@ -207,5 +89,5 @@ NOW COMPLETE THE FOLLOWING TEXT Output:""" - + return prompt.strip() diff --git a/plans/image-button-plan.md b/plans/image-button-plan.md new file mode 100644 index 0000000..b39d27e --- /dev/null +++ b/plans/image-button-plan.md @@ -0,0 +1,269 @@ +# Image Button Implementation Plan + +## Overview + +Add an image button to the MilkdownEditor that allows users to insert images at the cursor position. The button will provide a dropdown menu with two options: upload local file or input image URL. + +## Current Architecture Analysis + +### Existing Image Handling + +The editor already has image support through `@milkdown/crepe`: + +```javascript +// From MilkdownEditor.vue lines 217-231 +features: { + [Crepe.Feature.Latex]: true, + [Crepe.Feature.ImageBlock]: true, +}, +featureConfigs: { + [Crepe.Feature.ImageBlock]: { + onUpload: (file) => { + const objectUrl = URL.createObjectURL(file) + objectUrls.add(objectUrl) + performOCR(file, objectUrl) + return objectUrl + } + } +} +``` + +### Editor Access Pattern + +The code uses `editorViewCtx` to access the ProseMirror editor view: + +```javascript +crepe.editor.action((ctx) => { + const view = ctx.get(editorViewCtx) + // manipulate editor state +}) +``` + +## Implementation Plan + +### 1. Template Changes + +Add new button with dropdown menu in the `action-buttons` section: + +```html + +
+ + + +
+ + +
+
+ + + + + +
+
+ + + +
+
+``` + +### 2. Script Changes + +Add new refs and methods: + +```javascript +// New refs +const imageInputRef = ref(null) +const showImageDropdown = ref(false) +const showUrlDialog = ref(false) +const imageUrl = ref('') + +// Toggle dropdown +const toggleImageDropdown = () => { + showImageDropdown.value = !showImageDropdown.value +} + +// Trigger file input +const triggerImageUpload = () => { + showImageDropdown.value = false + imageInputRef.value?.click() +} + +// Handle file upload - reuse existing onUpload logic +const handleImageUpload = async (event) => { + const file = event.target.files?.[0] + if (!file) return + + const objectUrl = URL.createObjectURL(file) + objectUrls.add(objectUrl) + performOCR(file, objectUrl) + + // Insert image at cursor + insertImageAtCursor(objectUrl) + event.target.value = '' +} + +// Insert image from URL +const insertImageFromUrl = () => { + if (!imageUrl.value.trim()) return + insertImageAtCursor(imageUrl.value.trim()) + imageUrl.value = '' + showUrlDialog.value = false +} + +// Core function: insert image at cursor position +const insertImageAtCursor = (src) => { + if (!crepe) return + + crepe.editor.action((ctx) => { + const view = ctx.get(editorViewCtx) + const { state } = view + const { selection, schema } = state + + // Get image node type from schema + const imageType = schema.nodes.image + if (!imageType) return + + // Create image node + const imageNode = imageType.create({ src }) + + // Create transaction to insert at cursor + const tr = state.tr + tr = tr.replaceSelectionWith(imageNode) + + view.dispatch(tr) + }) +} +``` + +### 3. Style Changes + +Add styles for dropdown and dialog: + +```css +/* Image button wrapper */ +.image-btn-wrapper { + position: relative; +} + +/* Dropdown menu */ +.image-dropdown { + position: absolute; + bottom: 100%; + right: 0; + margin-bottom: 8px; + background: #fff; + border: 1px solid #ddd; + border-radius: 8px; + box-shadow: 0 2px 8px rgba(0,0,0,0.15); + overflow: hidden; + z-index: 10000; + min-width: 160px; +} + +.image-dropdown button { + display: block; + width: 100%; + padding: 10px 16px; + border: none; + background: none; + text-align: left; + cursor: pointer; + font-size: 14px; + color: #333; +} + +.image-dropdown button:hover { + background: #f5f5f5; +} + +/* URL dialog overlay */ +.url-dialog-overlay { + position: fixed; + top: 0; + left: 0; + right: 0; + bottom: 0; + background: rgba(0,0,0,0.3); + display: flex; + align-items: center; + justify-content: center; + z-index: 10001; +} + +.url-dialog { + background: #fff; + padding: 20px; + border-radius: 8px; + box-shadow: 0 4px 16px rgba(0,0,0,0.2); +} + +.url-dialog input { + width: 300px; + padding: 8px 12px; + border: 1px solid #ddd; + border-radius: 4px; + margin-bottom: 12px; +} + +.url-dialog button { + padding: 8px 16px; + margin-right: 8px; + border: 1px solid #ddd; + border-radius: 4px; + cursor: pointer; +} +``` + +## Workflow Diagram + +```mermaid +flowchart TD + A[Click Image Button] --> B{Toggle Dropdown} + B --> C[Show Dropdown Menu] + C --> D{User Choice} + D -->|Upload Local| E[Open File Picker] + D -->|From URL| F[Show URL Dialog] + E --> G[Select Image File] + G --> H[Create Object URL] + H --> I[Perform OCR] + I --> J[Insert Image at Cursor] + F --> K[Enter URL] + K --> L[Click Insert] + L --> J + J --> M[Image Appears in Editor] +``` + +## Key Implementation Notes + +1. **Reuse existing logic**: The `onUpload` callback logic for `Crepe.Feature.ImageBlock` should be reused for local file uploads to maintain consistency with OCR processing. + +2. **ProseMirror API**: Use `schema.nodes.image.create()` and `replaceSelectionWith()` to insert images at cursor position. + +3. **Click outside to close**: The dropdown should close when clicking outside. This can be achieved with a click-outside directive or by listening to document clicks. + +4. **Accessibility**: Ensure proper ARIA labels and keyboard navigation support. + +## Files to Modify + +- `src/components/MilkdownEditor.vue` - All changes will be in this single file + +## Dependencies + +No new dependencies required. All functionality uses existing: +- Vue 3 Composition API +- Milkdown/ProseMirror APIs +- Native browser APIs (URL.createObjectURL, FileReader) \ No newline at end of file diff --git a/src/App.vue b/src/App.vue index c330dc6..2844c9b 100644 --- a/src/App.vue +++ b/src/App.vue @@ -1,6 +1,7 @@ @@ -176,6 +440,7 @@ onUnmounted(() => { bottom: 20px; right: 20px; display: flex; + flex-direction: column; gap: 8px; z-index: 9999; } @@ -193,12 +458,14 @@ onUnmounted(() => { align-items: center; justify-content: center; box-shadow: 0 2px 8px rgba(0,0,0,0.1); + opacity: 0.5; } .action-btn:hover { background-color: #4a90d9; color: white; border-color: #4a90d9; + opacity: 1; } .action-btn.ai-disabled { @@ -213,15 +480,42 @@ onUnmounted(() => { border-color: #4a90d9; } +.action-btn.force-disabled { + background-color: #ccc; + color: #999; + border-color: #ccc; + cursor: not-allowed; + opacity: 0.6; +} + +.action-btn.force-disabled:hover { + background-color: #ccc; + color: #999; + border-color: #ccc; + opacity: 0.6; +} + +.size-indicator { + font-size: 10px; + color: #999; + text-align: center; + margin-top: 4px; +} + +.size-indicator.over-limit { + color: #e74c3c; +} + .action-btn { position: relative; } .btn-tooltip { position: absolute; - top: -32px; - left: 50%; - transform: translateX(-50%); + top: 50%; + right: 100%; + transform: translateY(-50%); + margin-right: 8px; background: #333; color: #fff; font-size: 12px; @@ -237,6 +531,116 @@ onUnmounted(() => { opacity: 1; } +.action-btn:focus-visible .btn-tooltip { + opacity: 1; +} + +.image-btn-wrapper { + position: relative; +} + +.image-dropdown { + position: absolute; + bottom: 100%; + right: 0; + margin-bottom: 8px; + background: #fff; + border: 1px solid #ddd; + border-radius: 8px; + box-shadow: 0 2px 8px rgba(0,0,0,0.15); + overflow: hidden; + z-index: 10000; + min-width: 160px; +} + +.image-dropdown button { + display: block; + width: 100%; + padding: 10px 16px; + border: none; + background: none; + text-align: left; + cursor: pointer; + font-size: 14px; + color: #333; +} + +.image-dropdown button:hover { + background: #f5f5f5; +} + +.url-dialog-overlay { + position: fixed; + top: 0; + left: 0; + right: 0; + bottom: 0; + background: rgba(0,0,0,0.3); + display: flex; + align-items: center; + justify-content: center; + z-index: 10001; +} + +.url-dialog { + background: #fff; + padding: 20px; + border-radius: 8px; + box-shadow: 0 4px 16px rgba(0,0,0,0.2); + min-width: 320px; +} + +.url-dialog h3 { + margin: 0 0 12px 0; + font-size: 16px; + color: #333; +} + +.url-dialog input { + width: 100%; + box-sizing: border-box; + padding: 10px 12px; + border: 1px solid #ddd; + border-radius: 4px; + font-size: 14px; + margin-bottom: 16px; +} + +.url-dialog input:focus { + outline: none; + border-color: #4a90d9; +} + +.url-dialog-buttons { + display: flex; + justify-content: flex-end; + gap: 8px; +} + +.dialog-btn { + padding: 8px 16px; + border: 1px solid #ddd; + border-radius: 4px; + cursor: pointer; + font-size: 14px; + background: #fff; + color: #333; +} + +.dialog-btn:hover { + background: #f5f5f5; +} + +.dialog-btn.primary { + background: #4a90d9; + color: #fff; + border-color: #4a90d9; +} + +.dialog-btn.primary:hover { + background: #3a80c9; +} + .milkdown-editor { width: 100%; height: 100%; @@ -247,7 +651,7 @@ onUnmounted(() => { .milkdown-editor :deep(.milkdown) { max-width: none; margin: 0 !important; - padding: 20px 40px !important; + padding: 0 40px !important; min-height: 100%; } @@ -262,6 +666,25 @@ onUnmounted(() => { padding: 0 !important; } +.milkdown-editor :deep(.milkdown > *:first-child) { + margin-top: 0 !important; + padding-top: 0 !important; +} + +.milkdown-editor :deep(.ProseMirror) { + margin: 0 !important; + padding: 0 !important; +} + +.milkdown-editor :deep(.ProseMirror img) { + max-width: 60%; + height: auto; +} + +.milkdown-editor :deep(.ProseMirror > *:first-child) { + margin-top: 0 !important; +} + .milkdown-editor :deep(.milkdown__aside), .milkdown-editor :deep(.milkdown__aside-wrapper), .milkdown-editor :deep([class*="aside"]), @@ -314,7 +737,7 @@ onUnmounted(() => { .copilot-ghost-text { color: #999; opacity: 0.6; - pointer-events: none; + pointer-events: auto; } .copilot-ghost-text.copilot-loading { diff --git a/src/plugins/copilotPlugin.ts b/src/plugins/copilotPlugin.ts index 1131992..ed6979c 100644 --- a/src/plugins/copilotPlugin.ts +++ b/src/plugins/copilotPlugin.ts @@ -1,14 +1,14 @@ import { Plugin, PluginKey, Selection } from '@milkdown/prose/state' import { $prose, $ctx, $markSchema } from '@milkdown/kit/utils' import { parserCtx } from '@milkdown/kit/core' -import { Node as ProseNode, Fragment, Slice } from '@milkdown/prose/model' +import { Node as ProseNode, Fragment } from '@milkdown/prose/model' import type { Ctx } from '@milkdown/kit/core' import type { EditorView } from '@milkdown/prose/view' +import { getOcrCache, checkSizeLimit as checkOcrSizeLimit, OCR_SIZE_LIMIT } from '../utils/ocrCache' const COPILOT_PLUGIN_KEY = new PluginKey('milkdown-copilot') -const DEBOUNCE_MS = 500 - -let enabled = true +const DEBOUNCE_MS = 1000 +const SIZE_LIMIT = OCR_SIZE_LIMIT interface CopilotState { from: number @@ -21,12 +21,21 @@ interface CopilotConfig { debounceMs?: number } +interface CopilotRuntime { + enabled: boolean + debounceTimer: ReturnType | null + abortController: AbortController | null + ctx: Ctx +} + const initialState: CopilotState = { from: 0, to: 0, suggestion: '' } +const runtimeByView = new WeakMap() + export const copilotConfigCtx = $ctx({ fetchSuggestion: async () => '', debounceMs: DEBOUNCE_MS @@ -36,21 +45,68 @@ export const copilotGhostMark = $markSchema('copilot_ghost', () => ({ excludes: '_', inclusive: true, parseDOM: [{ tag: 'span[data-copilot-ghost]' }], - toDOM: () => ['span', { 'data-copilot-ghost': '', class: 'copilot-ghost-text' }, 0] + toDOM: () => ['span', { 'data-copilot-ghost': '', class: 'copilot-ghost-text' }, 0], + parseMarkdown: { + match: () => false, + runner: () => {} + }, + toMarkdown: { + match: (mark) => mark.type.name === 'copilot_ghost', + runner: () => {} + } })) -let debounceTimer: ReturnType | null = null -let abortController: AbortController | null = null -let currentCtx: Ctx | null = null +function clearRuntimeRequests(runtime: CopilotRuntime) { + if (runtime.debounceTimer) { + clearTimeout(runtime.debounceTimer) + runtime.debounceTimer = null + } + + if (runtime.abortController) { + runtime.abortController.abort() + runtime.abortController = null + } +} + +function findGhostRangeByMarks(view: EditorView): { from: number; to: number } | null { + const markType = view.state.schema.marks.copilot_ghost + if (!markType) return null + + let from = Number.POSITIVE_INFINITY + let to = -1 + + view.state.doc.descendants((node, pos) => { + if (node.isText && node.marks.some((m: any) => m.type === markType)) { + from = Math.min(from, pos) + to = Math.max(to, pos + node.nodeSize) + } + return true + }) + + if (!Number.isFinite(from) || to <= from) return null + return { from, to } +} + +function getGhostRange(view: EditorView): { from: number; to: number } | null { + const state = COPILOT_PLUGIN_KEY.getState(view.state) + if (state && state.from < state.to) { + return { from: state.from, to: state.to } + } + return findGhostRangeByMarks(view) +} + +function hasGhostText(view: EditorView): boolean { + return getGhostRange(view) !== null +} function clearGhostText(view: EditorView) { - const state = COPILOT_PLUGIN_KEY.getState(view.state) - if (state && state.suggestion && state.from < state.to) { - const tr = view.state.tr - .delete(state.from, state.to) - .setMeta(COPILOT_PLUGIN_KEY, { ...initialState }) - view.dispatch(tr) - } + const range = getGhostRange(view) + if (!range) return + + const tr = view.state.tr + .delete(range.from, range.to) + .setMeta(COPILOT_PLUGIN_KEY, { ...initialState }) + view.dispatch(tr) } function isBlockNode(node: ProseNode): boolean { @@ -67,39 +123,24 @@ function hasBlockNodes(doc: ProseNode): boolean { return hasBlock } -function addGhostMarkToNode(node: ProseNode, ghostMarkType: any): ProseNode { - if (node.isText) { - return node.mark(node.marks.concat(ghostMarkType.create())) - } - if (node.isLeaf) { - return node - } - const newContent: ProseNode[] = [] - node.forEach((child) => { - newContent.push(addGhostMarkToNode(child, ghostMarkType)) - }) - return node.copy(Fragment.from(newContent)) -} - -function extractInlineContent(doc: ProseNode, ghostMarkType: any, schema: any): Fragment { +function extractInlineContent(doc: ProseNode, schema: any): Fragment { const nodes: ProseNode[] = [] let isFirstBlock = true - + doc.forEach((blockNode) => { if (!isFirstBlock) { const hardBreak = schema.nodes.hard_break?.create() if (hardBreak) { nodes.push(hardBreak) } else { - nodes.push(schema.text('\n', [ghostMarkType.create()])) + nodes.push(schema.text('\n')) } } isFirstBlock = false - + blockNode.forEach((inlineNode) => { if (inlineNode.isText) { - const combinedMarks = inlineNode.marks.concat(ghostMarkType.create()) - nodes.push(inlineNode.mark(combinedMarks)) + nodes.push(inlineNode) } else if (inlineNode.type.name === 'hard_break') { nodes.push(inlineNode) } else if (inlineNode.isLeaf) { @@ -107,8 +148,7 @@ function extractInlineContent(doc: ProseNode, ghostMarkType: any, schema: any): } else if (inlineNode.content.size > 0) { inlineNode.forEach((nestedNode) => { if (nestedNode.isText) { - const combinedMarks = nestedNode.marks.concat(ghostMarkType.create()) - nodes.push(nestedNode.mark(combinedMarks)) + nodes.push(nestedNode) } else if (nestedNode.isLeaf) { nodes.push(nestedNode) } @@ -116,52 +156,83 @@ function extractInlineContent(doc: ProseNode, ghostMarkType: any, schema: any): } }) }) - + return Fragment.from(nodes) } -async function insertGhostText(view: EditorView, suggestion: string, from: number) { - if (!currentCtx || !suggestion) return - +function normalizeSuggestionText(raw: string): string { + if (!raw) return raw + + let text = raw.replace(/\r\n?/g, '\n') + const trimmed = text.trim() + + // Some models may return a JSON-encoded string literal, decode it if so. + if (trimmed.startsWith('"') && trimmed.endsWith('"')) { + try { + const parsed = JSON.parse(trimmed) + if (typeof parsed === 'string') { + text = parsed.replace(/\r\n?/g, '\n') + } + } catch { + // Keep original text when not valid JSON. + } + } + + // If newlines are escaped literally, convert them back. + if (!text.includes('\n') && text.includes('\\n')) { + text = text.replace(/\\n/g, '\n') + } + if (text.includes('\\t')) { + text = text.replace(/\\t/g, '\t') + } + + return text +} + +async function insertGhostText(view: EditorView, suggestion: string, from: number, ctx: Ctx) { + if (!suggestion) return + const schema = view.state.schema const markType = schema.marks.copilot_ghost - + if (!markType) { console.error('[Copilot] copilot_ghost mark not found in schema') return } - + try { - const parser = currentCtx.get(parserCtx) + const parser = ctx.get(parserCtx) const parsedDoc = await parser(suggestion) - + if (!parsedDoc) { insertPlainText(view, suggestion, from, markType) return } - + const containsBlocks = hasBlockNodes(parsedDoc) - + if (containsBlocks) { const $from = view.state.doc.resolve(from) const insertPos = $from.after($from.depth) - + const blockNodes: ProseNode[] = [] parsedDoc.forEach((node) => { - blockNodes.push(addGhostMarkToNode(node, markType)) + blockNodes.push(node) }) - + const fragment = Fragment.from(blockNodes) const tr = view.state.tr tr.insert(insertPos, fragment) const endPos = insertPos + fragment.size + tr.addMark(insertPos, endPos, markType.create()) tr.setMeta(COPILOT_PLUGIN_KEY, { from: insertPos, to: endPos, suggestion }) view.dispatch(tr) } else { - const inlineFragment = extractInlineContent(parsedDoc, markType, schema) + const inlineFragment = extractInlineContent(parsedDoc, schema) const tr = view.state.tr tr.insert(from, inlineFragment) const endPos = from + inlineFragment.size + tr.addMark(from, endPos, markType.create()) tr.setMeta(COPILOT_PLUGIN_KEY, { from, to: endPos, suggestion }) view.dispatch(tr) } @@ -180,66 +251,112 @@ function insertPlainText(view: EditorView, suggestion: string, from: number, mar view.dispatch(tr) } -function doFetchSuggestion(view: EditorView, pos: number, prefix: string, suffix: string) { - if (!currentCtx) return - - const config = currentCtx.get(copilotConfigCtx.key) - - if (abortController) { - abortController.abort() - abortController = null +function extractImageFilenames(doc: ProseNode): string[] { + const filenames: string[] = [] + doc.descendants((node: ProseNode) => { + if (node.type.name === 'image' && node.attrs.src) { + filenames.push(node.attrs.src) + } + }) + return filenames +} + +function buildPrefixWithOCR(prefix: string, doc: ProseNode, cursorPos: number): string { + const ocrEntries: string[] = [] + + doc.descendants((node: ProseNode, pos) => { + if (pos >= cursorPos) return false + if (node.type.name !== 'image' || !node.attrs.src) return true + + const ocrText = getOcrCache(node.attrs.src) + if (!ocrText) return true + + const altText = typeof node.attrs.alt === 'string' ? node.attrs.alt : '' + ocrEntries.push(`image(${altText || 'untitled'}): ${ocrText}`) + return true + }) + + if (!ocrEntries.length) return prefix + return `${prefix}\n\n[OCR Context]\n${ocrEntries.join('\n')}` +} + +function doFetchSuggestion(view: EditorView, runtime: CopilotRuntime, pos: number, prefix: string, suffix: string) { + const config = runtime.ctx.get(copilotConfigCtx.key) + + if (runtime.abortController) { + runtime.abortController.abort() + runtime.abortController = null } - - abortController = new AbortController() - - config.fetchSuggestion(prefix, suffix, abortController.signal) - .then(suggestion => { - if (view.state.selection.from !== pos) return - - if (suggestion) { - insertGhostText(view, suggestion, pos) + + const controller = new AbortController() + runtime.abortController = controller + + config.fetchSuggestion(prefix, suffix, controller.signal) + .then((suggestion) => { + if (!runtime.enabled) return + if (view.state.selection.from !== pos || view.state.selection.to !== pos) return + + const normalizedSuggestion = normalizeSuggestionText(suggestion) + if (normalizedSuggestion) { + insertGhostText(view, normalizedSuggestion, pos, runtime.ctx) } }) - .catch(e => { - if (e.name !== 'AbortError') { + .catch((e: any) => { + if (e?.name !== 'AbortError') { console.error('[Copilot] Error:', e) } }) .finally(() => { - abortController = null + if (runtime.abortController === controller) { + runtime.abortController = null + } }) } -function scheduleFetch(view: EditorView, pos: number, prefix: string, suffix: string) { - if (!enabled) return - - if (debounceTimer) { - clearTimeout(debounceTimer) - debounceTimer = null +function scheduleFetch(view: EditorView, runtime: CopilotRuntime, pos: number, prefix: string, suffix: string) { + if (!runtime.enabled) return + + const doc = view.state.doc + const imageFilenames = extractImageFilenames(doc) + const { overLimit } = checkOcrSizeLimit(doc.content.size, imageFilenames) + + if (overLimit) { + setCopilotEnabled(view, false) + return } - - debounceTimer = setTimeout(() => { - debounceTimer = null - doFetchSuggestion(view, pos, prefix, suffix) - }, DEBOUNCE_MS) + + const prefixWithOCR = buildPrefixWithOCR(prefix, doc, pos) + + if (runtime.debounceTimer) { + clearTimeout(runtime.debounceTimer) + runtime.debounceTimer = null + } + + const debounceMs = runtime.ctx.get(copilotConfigCtx.key).debounceMs ?? DEBOUNCE_MS + runtime.debounceTimer = setTimeout(() => { + runtime.debounceTimer = null + doFetchSuggestion(view, runtime, pos, prefixWithOCR, suffix) + }, debounceMs) } function acceptSuggestion(view: EditorView) { - const state = COPILOT_PLUGIN_KEY.getState(view.state) - if (!state?.suggestion || state.from >= state.to) return false - + const range = getGhostRange(view) + if (!range) return false + const tr = view.state.tr const doc = tr.doc - const from = state.from - const to = state.to - + const from = range.from + const to = range.to + const markType = view.state.schema.marks.copilot_ghost + if (!markType) return false + doc.nodesBetween(from, to, (node, pos) => { - if (node.marks.some((m: any) => m.type.name === 'copilot_ghost')) { - tr.removeMark(pos, pos + node.nodeSize, view.state.schema.marks.copilot_ghost) + if (node.marks.some((m: any) => m.type === markType)) { + tr.removeMark(pos, pos + node.nodeSize, markType) } }) - - const endPos = Math.min(state.to, tr.doc.content.size) + + const endPos = Math.min(to, tr.doc.content.size) tr.setSelection(Selection.near(tr.doc.resolve(endPos))) tr.setMeta(COPILOT_PLUGIN_KEY, { ...initialState }) view.dispatch(tr) @@ -247,108 +364,180 @@ function acceptSuggestion(view: EditorView) { } function rejectSuggestion(view: EditorView) { - const state = COPILOT_PLUGIN_KEY.getState(view.state) - if (!state?.suggestion) return false - + if (!hasGhostText(view)) return false + clearGhostText(view) return true } -export const copilotPlugin = $prose((ctx) => { - currentCtx = ctx - - return new Plugin({ - key: COPILOT_PLUGIN_KEY, - state: { - init: () => ({ ...initialState }), - apply: (tr, value) => { - const meta = tr.getMeta(COPILOT_PLUGIN_KEY) - if (meta !== undefined) { - return meta - } - - if (tr.docChanged && value.suggestion) { - return { ...initialState } - } - - return value +export const copilotPlugin = $prose((ctx) => new Plugin({ + key: COPILOT_PLUGIN_KEY, + state: { + init: () => ({ ...initialState }), + apply: (tr, value) => { + const meta = tr.getMeta(COPILOT_PLUGIN_KEY) + if (meta !== undefined) { + return meta } - }, - props: { - handleKeyDown: (view, event) => { - const state = COPILOT_PLUGIN_KEY.getState(view.state) - - if (event.key === 'Tab' && state?.suggestion) { - event.preventDefault() - return acceptSuggestion(view) - } - - if (event.key === 'Escape' && state?.suggestion) { - event.preventDefault() - return rejectSuggestion(view) - } - - if (state?.suggestion && event.key !== 'Shift' && event.key !== 'Control' && event.key !== 'Alt' && event.key !== 'Meta') { - clearGhostText(view) - } - - return false - }, - handleClick: (view, pos) => { - const state = COPILOT_PLUGIN_KEY.getState(view.state) - if (!state?.suggestion) return false - - if (pos >= state.from && pos < state.to) { - return acceptSuggestion(view) - } - + + if (tr.docChanged && value.suggestion) { + return { ...initialState } + } + + return value + } + }, + props: { + handleKeyDown: (view, event) => { + const hasGhost = hasGhostText(view) + + if (event.key === 'Tab' && hasGhost) { + event.preventDefault() + return acceptSuggestion(view) + } + + if (event.key === 'Escape' && hasGhost) { + event.preventDefault() + return rejectSuggestion(view) + } + + if (hasGhost && event.key !== 'Shift' && event.key !== 'Control' && event.key !== 'Alt' && event.key !== 'Meta') { clearGhostText(view) - return false } + + return false }, - view: () => ({ - update: (view, prevState) => { - if (view.state.doc.eq(prevState.doc) && view.state.selection.eq(prevState.selection)) { - return - } - - const state = COPILOT_PLUGIN_KEY.getState(view.state) - if (state?.suggestion) { - return - } - - if (!view.state.doc.eq(prevState.doc)) { - const { from, to } = view.state.selection - if (from !== to) return - - const doc = view.state.doc - const prefix = doc.textBetween(0, from) - const suffix = doc.textBetween(to, doc.content.size) - - scheduleFetch(view, from, prefix, suffix) - } + handleClick: (view, pos) => { + const range = getGhostRange(view) + if (!range) return false + + if (pos >= range.from && pos <= range.to) { + return acceptSuggestion(view) } - }) - }) -}) + + clearGhostText(view) + return false + } + }, + view: (view) => { + let activeView = view + let activeDom = view.dom + const runtime: CopilotRuntime = { + enabled: true, + debounceTimer: null, + abortController: null, + ctx + } + runtimeByView.set(view, runtime) + + const onKeydownCapture = (event: KeyboardEvent) => { + if (!hasGhostText(activeView)) return + + if (event.key === 'Tab') { + event.preventDefault() + event.stopPropagation() + event.stopImmediatePropagation?.() + acceptSuggestion(activeView) + return + } + + if (event.key === 'Escape') { + event.preventDefault() + event.stopPropagation() + event.stopImmediatePropagation?.() + rejectSuggestion(activeView) + } + } + + const onPointerDownCapture = (event: MouseEvent) => { + if (!hasGhostText(activeView)) return + const targetNode = event.target instanceof Node ? event.target : null + const target = targetNode instanceof Element ? targetNode : targetNode?.parentElement + if (!target) return + + // Accept suggestion when user clicks any rendered ghost-text fragment. + if (target.closest('[data-copilot-ghost]')) { + event.preventDefault() + event.stopPropagation() + event.stopImmediatePropagation?.() + acceptSuggestion(activeView) + } + } + + const bindDomListeners = (dom: HTMLElement) => { + dom.addEventListener('keydown', onKeydownCapture, true) + dom.addEventListener('mousedown', onPointerDownCapture, true) + } + + const unbindDomListeners = (dom: HTMLElement) => { + dom.removeEventListener('keydown', onKeydownCapture, true) + dom.removeEventListener('mousedown', onPointerDownCapture, true) + } + + bindDomListeners(activeDom) + + return { + update: (nextView, prevState) => { + if (nextView.dom !== activeDom) { + unbindDomListeners(activeDom) + activeDom = nextView.dom + bindDomListeners(activeDom) + } + + activeView = nextView + const docChanged = !nextView.state.doc.eq(prevState.doc) + const selectionChanged = !nextView.state.selection.eq(prevState.selection) + + if (!docChanged && !selectionChanged) { + return + } + + if (hasGhostText(nextView)) { + return + } + + const { from, to } = nextView.state.selection + if (from !== to) { + clearRuntimeRequests(runtime) + return + } + + const doc = nextView.state.doc + const prefix = doc.textBetween(0, from) + const suffix = doc.textBetween(to, doc.content.size) + + scheduleFetch(nextView, runtime, from, prefix, suffix) + }, + destroy: () => { + unbindDomListeners(activeDom) + clearRuntimeRequests(runtime) + runtimeByView.delete(view) + } + } + } +})) export { COPILOT_PLUGIN_KEY } -export function isCopilotEnabled(): boolean { - return enabled +export function isCopilotEnabled(view: EditorView): boolean { + return runtimeByView.get(view)?.enabled ?? true } -export function setCopilotEnabled(value: boolean): void { - enabled = value - +export function setCopilotEnabled(view: EditorView, value: boolean): void { + const runtime = runtimeByView.get(view) + if (!runtime) return + + runtime.enabled = value if (!value) { - if (debounceTimer) { - clearTimeout(debounceTimer) - debounceTimer = null - } - if (abortController) { - abortController.abort() - abortController = null - } + clearRuntimeRequests(runtime) } } + +export function checkSizeLimit(view: EditorView): { size: number; overLimit: boolean } { + const doc = view.state.doc + const imageFilenames = extractImageFilenames(doc) + const result = checkOcrSizeLimit(doc.content.size, imageFilenames) + return { size: result.size, overLimit: result.overLimit } +} + +export { SIZE_LIMIT } diff --git a/src/style.css b/src/style.css index 6f0bdb8..bd53d5a 100644 --- a/src/style.css +++ b/src/style.css @@ -72,5 +72,6 @@ body { padding: 0; width: 100%; height: 100%; - overflow: hidden; + overflow-x: hidden; + overflow-y: auto; } diff --git a/src/utils/ocrCache.js b/src/utils/ocrCache.js new file mode 100644 index 0000000..e1ed4ed --- /dev/null +++ b/src/utils/ocrCache.js @@ -0,0 +1,45 @@ +const SIZE_LIMIT = 64 * 1024 + +const ocrCache = new Map() + +export function setOcrCache(filename, text) { + ocrCache.set(filename, text) +} + +export function getOcrCache(filename) { + return ocrCache.get(filename) || '' +} + +export function clearOcrCache(filename) { + ocrCache.delete(filename) +} + +export function hasOcrCache(filename) { + return ocrCache.has(filename) +} + +export function clearAllOcrCache() { + ocrCache.clear() +} + +export function calculateOcrSize(imageFilenames) { + let total = 0 + for (const name of imageFilenames) { + const text = ocrCache.get(name) + if (text) total += new Blob([text]).size + } + return total +} + +export function checkSizeLimit(docTextSize, imageFilenames) { + const ocrSize = calculateOcrSize(imageFilenames) + const total = docTextSize + ocrSize + return { + size: total, + docSize: docTextSize, + ocrSize: ocrSize, + overLimit: total > SIZE_LIMIT + } +} + +export const OCR_SIZE_LIMIT = SIZE_LIMIT diff --git a/vite.config.js b/vite.config.js index ebda1c7..526c009 100644 --- a/vite.config.js +++ b/vite.config.js @@ -7,6 +7,42 @@ export default defineConfig({ host: true, port: 5173 }, + build: { + rollupOptions: { + output: { + manualChunks(id) { + if (!id.includes('node_modules')) return + + const modulePath = id.split('node_modules/')[1] + const segments = modulePath.split('/') + const packageName = segments[0].startsWith('@') + ? `${segments[0]}/${segments[1]}` + : segments[0] + + if (packageName.startsWith('@milkdown')) return 'milkdown' + if (packageName.startsWith('prosemirror')) return 'prosemirror' + + if (packageName.startsWith('@codemirror')) { + const langMatch = modulePath.match(/@codemirror\/lang-([^/]+)/) + if (langMatch) return `cm-lang-${langMatch[1]}` + return `cm-${segments[1]}` + } + + if (packageName === 'refractor') { + const langMatch = modulePath.match(/refractor\/lang\/([^./]+)/) + if (langMatch) return `refractor-lang-${langMatch[1]}` + return 'refractor-core' + } + + if (packageName.startsWith('katex')) return 'katex' + if (packageName.startsWith('markdown-it')) return 'markdown' + if (packageName === 'vue' || packageName.startsWith('@vue')) return 'vue' + + return `vendor-${packageName.replace('@', '').replace('/', '-')}` + } + } + } + }, optimizeDeps: { include: [ '@milkdown/crepe',