diff --git a/README.md b/README.md
index 9ace79e..b3e8097 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
 # LLM in Text - 智能写作助手
 
-基于 Vue3 和 FastAPI 的智能 Markdown 编辑器，集成大语言模型（LLM）实时补全建议功能。
+基于 Vue3 和 FastAPI 的智能 Markdown 编辑器，集成大语言模型（LLM）实时补全建议功能，提供类似 GitHub Copilot 的 Ghost Text 体验。
 
 ## 功能特性
 
@@ -26,20 +26,24 @@
 ## 技术架构
 
 ```mermaid
-flowchart LR
-    subgraph Frontend
-        A[Vue3] --> B[Milkdown Editor]
-        B --> C[ProseMirror Plugin]
-        C --> D[Ghost Text Mark]
+flowchart TB
+    subgraph Frontend["前端 (Vue3 + Vite)"]
+        A[App.vue] --> B[MilkdownEditor.vue]
+        B --> C[Crepe Editor]
+        C --> D[ProseMirror]
+        D --> E[copilotPlugin.ts]
+        E --> F[copilotGhostMark]
+        E --> G[api.js]
     end
     
-    subgraph Backend
-        E[FastAPI] --> F[LLM API]
-        F --> G[Stream Response]
+    subgraph Backend["后端 (FastAPI + Python)"]
+        H[main.py<br/>FastAPI Server] --> I[prompt.py<br/>Prompt 构建]
+        H --> J[llm.py<br/>Ollama 调用]
+        J --> K[Ollama API]
     end
     
-    D -->|SSE| E
-    G -->|text| D
+    G -->|POST /v1/completions<br/>SSE 流式响应| H
+    K -->|LLM 响应| J
 ```
 
 ## 项目结构
@@ -50,7 +54,9 @@ llm-in-text/
 │   ├── components/
 │   │   └── MilkdownEditor.vue    # 主编辑器组件
 │   ├── plugins/
-│   │   └── copilotPlugin.ts      # ProseMirror AI 补全插件
+│   │   ├── copilotPlugin.ts      # ProseMirror AI 补全插件
+│   │   ├── types.ts              # 类型定义
+│   │   └── index.ts              # 插件导出
 │   ├── utils/
 │   │   ├── api.js                # API 调用封装
 │   │   └── config.js             # 配置文件
@@ -69,7 +75,7 @@ llm-in-text/
 ### 环境要求
 - Node.js 18+
 - Python 3.8+
-- OpenAI API Key 或 Ollama 服务
+- Ollama 服务（或其他兼容 OpenAI API 的服务）
 
 ### 安装
 
@@ -87,9 +93,8 @@ pip install -r requirements.txt
 在 `backend/.env` 中配置：
 
 ```env
-OPENAI_API_KEY=your_api_key
-OLLAMA_BASE_URL=http://localhost:11434/v1/
-OLLAMA_MODEL=gpt-4
+OLLAMA_MODEL=gpt-oss:20b
+OLLAMA_HOST=http://localhost:11434
 ```
 
 ### 启动
@@ -129,7 +134,34 @@ data: {"done": true}
 
 ## 核心实现
 
-### ProseMirror Mark 系统
+### 后端设计
+
+#### main.py - FastAPI 服务器
+- 定义 `/v1/completions` 端点
+- 使用 `StreamingResponse` 返回 SSE 流式响应
+- CORS 配置允许跨域请求
+
+#### llm.py - LLM 调用封装
+- 使用 `ollama.AsyncClient` 异步调用
+- 支持 `think='high'` 思考模式
+- 返回 `content` 和 `thinking` 字段
+
+#### prompt.py - Prompt 工程
+精心设计的 Prompt 模板，包含 7 条核心规则：
+
+| 规则 | 说明 |
+|------|------|
+| RULE #1 | 无缝连接 - 不重复 suffix 内容，避免"复读机"错误 |
+| RULE #2 | 空白处理 - 避免双空格，正确对接标点 |
+| RULE #3 | 缩进对齐 - 匹配当前缩进级别和类型 |
+| RULE #4 | 列表维护 - 识别并继续任务列表、有序列表、无序列表 |
+| RULE #5 | 语法闭合 - 自动闭合未完成的 Markdown 语法 |
+| RULE #6 | 输出格式 - 仅输出续写文本，无解释无注释 |
+| RULE #7 | 必须输出 - 始终提供有用的续写建议 |
+
+### 前端设计
+
+#### ProseMirror Mark 系统
 
 使用 ProseMirror 的 Mark 系统实现灰色建议文本：
 
@@ -151,12 +183,80 @@ export const copilotGhostMark = $markSchema('copilot_ghost', () => ({
 }
 ```
 
-### 交互处理
+#### copilotPlugin 核心逻辑
 
-- 点击灰色文本区域：接受建议（移除 mark，保留文本）
-- 点击其他区域：拒绝建议（删除灰色文本）
-- Tab 键：接受建议
-- Esc 键：拒绝建议
+```mermaid
+flowchart LR
+    A[用户输入] --> B{文档变化?}
+    B -->|是| C[清除旧建议]
+    C --> D[防抖 500ms]
+    D --> E[发送 API 请求]
+    E --> F[收到建议]
+    F --> G[插入 Ghost Text]
+    
+    G --> H{用户操作}
+    H -->|Tab| I[接受建议<br/>移除 mark]
+    H -->|Esc| J[拒绝建议<br/>删除文本]
+    H -->|点击 Ghost| I
+    H -->|继续输入| J
+```
+
+#### 关键函数
+
+| 函数 | 作用 |
+|------|------|
+| `scheduleFetch` | 防抖调度 API 请求 |
+| `insertGhostText` | 插入带 mark 的建议文本 |
+| `acceptSuggestion` | Tab 接受建议 |
+| `rejectSuggestion` | Esc 拒绝建议 |
+| `clearGhostText` | 清除当前建议 |
+
+### 数据流
+
+```mermaid
+sequenceDiagram
+    participant U as 用户
+    participant E as Editor (ProseMirror)
+    participant P as copilotPlugin
+    participant A as api.js
+    participant B as Backend
+    participant L as LLM
+    
+    U->>E: 输入文本
+    E->>P: view.update()
+    P->>P: 清除旧建议
+    P->>P: 防抖 500ms
+    P->>A: fetchSuggestion(prefix, suffix)
+    A->>B: POST /v1/completions
+    B->>B: build_prompt()
+    B->>L: ollama.chat()
+    L-->>B: {content, thinking}
+    B-->>A: SSE stream
+    A-->>P: suggestion text
+    P->>E: insertGhostText()
+    E-->>U: 显示灰色建议
+    
+    alt Tab 键
+        U->>P: Tab
+        P->>E: acceptSuggestion()
+        E-->>U: 建议变为正常文本
+    else Esc 键
+        U->>P: Esc
+        P->>E: rejectSuggestion()
+        E-->>U: 建议消失
+    else 继续输入
+        U->>E: 输入其他字符
+        E->>P: handleKeyDown()
+        P->>E: clearGhostText()
+    end
+```
+
+## 设计亮点
+
+1. **前后端分离**：前端只负责渲染和数据回传，后端负责 LLM 调用、Prompt 构建和数据解析
+2. **低延迟优化**：防抖机制 (500ms) + SSE 流式响应 + AbortController 取消过期请求
+3. **ProseMirror Mark 系统**：与编辑器状态完美集成，支持 Undo/Redo
+4. **多种交互方式**：Tab/Esc/点击/输入，用户体验友好
 
 ## 许可证
 
diff --git a/backend/.env b/backend/.env
index 1431614..37307ec 100644
--- a/backend/.env
+++ b/backend/.env
@@ -1,3 +1,4 @@
 OPENAI_API_KEY=ollama
 OLLAMA_HOST=http://192.168.0.120:11434
 OLLAMA_MODEL=gpt-oss:20b
+VLM_MODEL=qwen3-vl:30b
diff --git a/backend/.env.example b/backend/.env.example
index 7877999..c74dfba 100644
--- a/backend/.env.example
+++ b/backend/.env.example
@@ -1,3 +1,4 @@
 OPENAI_API_KEY=ollama
 OLLAMA_BASE_URL=http://192.168.0.120:11434/v1/
 OLLAMA_MODEL=gpt-oss:120b
+VLM_MODEL=qwen3-vl:30b
diff --git a/backend/__pycache__/llm.cpython-313.pyc b/backend/__pycache__/llm.cpython-313.pyc
index e6c7c38..bd49ef5 100644
Binary files a/backend/__pycache__/llm.cpython-313.pyc and b/backend/__pycache__/llm.cpython-313.pyc differ
diff --git a/backend/__pycache__/main.cpython-313.pyc b/backend/__pycache__/main.cpython-313.pyc
new file mode 100644
index 0000000..5954b67
Binary files /dev/null and b/backend/__pycache__/main.cpython-313.pyc differ
diff --git a/backend/__pycache__/prompt.cpython-313.pyc b/backend/__pycache__/prompt.cpython-313.pyc
index 4b8e849..430125b 100644
Binary files a/backend/__pycache__/prompt.cpython-313.pyc and b/backend/__pycache__/prompt.cpython-313.pyc differ
diff --git a/backend/llm.py b/backend/llm.py
index 0385aa9..2a4f236 100644
--- a/backend/llm.py
+++ b/backend/llm.py
@@ -1,4 +1,6 @@
 import os
+import time
+import logging
 import ollama
 from dotenv import load_dotenv
 
@@ -6,27 +8,40 @@ load_dotenv()
 
 OLLAMA_MODEL = os.getenv('OLLAMA_MODEL', 'gpt-oss:20b')
 OLLAMA_HOST = os.getenv('OLLAMA_HOST', 'http://192.168.0.120:11434')
+VLM_MODEL = os.getenv('VLM_MODEL', 'qwen3-vl:30b')
 
 client = ollama.AsyncClient(host=OLLAMA_HOST)
+logger = logging.getLogger("llm")
 
-async def call_ollama(prompt: str) -> dict:
-    """
-    调用 Ollama API 并返回 content 和 thinking。
-    """
-    response = await client.chat(
-        model=OLLAMA_MODEL,
-        messages=[{'role': 'user', 'content': prompt}],
-        stream=False,
-        options={
-            'temperature': 0.7,
-            'repeat_penalty': 1.1,
-        },
-        think='high'
-    )
-    
+VLM_OCR_CONTEXT_PROMPT = """You are an OCR and visual-context extractor for markdown writing assistance.
+
+Your output will be embedded inside an HTML comment as hidden context for a text-completion model.
+
+Requirements:
+- Keep output compact: maximum 120 words.
+- Use plain text only (no markdown code fences).
+- Never output <!-- or -->.
+- Do not invent unreadable text; mark uncertain characters with ?.
+- Preserve original script for recognized text (do not forcibly translate).
+
+Return exactly this format:
+
+TEXT:
+<exact transcription of visible text; use " | " for line breaks; write "(none)" if no readable text>
+
+KEY_DETAILS:
+- <3-5 short factual bullets about relevant objects/layout>
+
+LANGUAGE:
+<dominant language(s) in visible text, e.g. English / Chinese / Mixed>
+
+SUMMARY:
+<one short sentence, <= 20 words>"""
+
+def _extract_message(response) -> tuple[str, str]:
     content = ""
     thinking = ""
-    
+
     if hasattr(response, 'message') and response.message:
         content = response.message.content or ""
         thinking = getattr(response.message, 'thinking', '') or ""
@@ -34,5 +49,92 @@ async def call_ollama(prompt: str) -> dict:
         msg = response.get('message', {})
         content = msg.get('content', '') or ""
         thinking = msg.get('thinking', '') or ""
-    
+
+    return content, thinking
+
+
+async def call_ollama(prompt: str, *, tag: str = "default", temperature: float = 0.7) -> dict:
+    """
+    调用 Ollama API 并返回 content 和 thinking。
+    """
+    start = time.perf_counter()
+    logger.info(
+        "[LLM][%s] request model=%s host=%s prompt_chars=%d temp=%.2f",
+        tag,
+        OLLAMA_MODEL,
+        OLLAMA_HOST,
+        len(prompt),
+        temperature,
+    )
+
+    try:
+        response = await client.chat(
+            model=OLLAMA_MODEL,
+            messages=[{'role': 'user', 'content': prompt}],
+            stream=False,
+            options={
+                'temperature': temperature,
+                'repeat_penalty': 1.1,
+            },
+        )
+    except Exception:
+        elapsed_ms = (time.perf_counter() - start) * 1000
+        logger.exception("[LLM][%s] request failed after %.1fms", tag, elapsed_ms)
+        raise
+
+    content, thinking = _extract_message(response)
+    elapsed_ms = (time.perf_counter() - start) * 1000
+    logger.info(
+        "[LLM][%s] response in %.1fms response_type=%s content_chars=%d thinking_chars=%d",
+        tag,
+        elapsed_ms,
+        type(response).__name__,
+        len(content),
+        len(thinking),
+    )
+
+    if not content.strip():
+        logger.warning("[LLM][%s] empty content returned by model", tag)
+
     return {"content": content, "thinking": thinking}
+
+async def call_vlm_ocr(image_bytes: bytes, language: str = 'auto') -> str:
+    start = time.perf_counter()
+    logger.info(
+        "[VLM][ocr] request model=%s host=%s image_bytes=%d language=%s",
+        VLM_MODEL,
+        OLLAMA_HOST,
+        len(image_bytes),
+        language,
+    )
+
+    try:
+        response = await client.chat(
+            model=VLM_MODEL,
+            messages=[{
+                'role': 'user',
+                'content': VLM_OCR_CONTEXT_PROMPT,
+                'images': [image_bytes]
+            }],
+            stream=False,
+            options={'temperature': 0.3}
+        )
+    except Exception:
+        elapsed_ms = (time.perf_counter() - start) * 1000
+        logger.exception("[VLM][ocr] request failed after %.1fms", elapsed_ms)
+        raise
+
+    content, thinking = _extract_message(response)
+    elapsed_ms = (time.perf_counter() - start) * 1000
+    logger.info(
+        "[VLM][ocr] response in %.1fms response_type=%s content_chars=%d thinking_chars=%d",
+        elapsed_ms,
+        type(response).__name__,
+        len(content),
+        len(thinking),
+    )
+
+    if not content.strip():
+        logger.warning("[VLM][ocr] empty content returned by model")
+
+    return content
diff --git a/backend/main.py b/backend/main.py
index 018c4d4..72b9ba4 100644
--- a/backend/main.py
+++ b/backend/main.py
@@ -3,9 +3,18 @@ from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import StreamingResponse, JSONResponse
 from pydantic import BaseModel
 import json
+import base64
+import uuid
+import logging
 
 from prompt import build_prompt
-from llm import call_ollama
+from llm import call_ollama, call_vlm_ocr
+
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s %(levelname)s %(name)s - %(message)s",
+)
+logger = logging.getLogger("api")
 
 app = FastAPI()
 
@@ -22,24 +31,100 @@ class CompletionRequest(BaseModel):
     suffix: str
     languageId: str = 'markdown'
 
+class OCRRequest(BaseModel):
+    image: str
+    filename: str = "image.jpg"
+    language: str = 'auto'
+
+
+def _preview(text: str, limit: int = 80) -> str:
+    value = (text or "").replace("\n", "\\n")
+    if len(value) <= limit:
+        return value
+    return value[:limit] + "..."
+
+
+def _build_force_non_empty_prompt(base_prompt: str) -> str:
+    return (
+        base_prompt
+        + "\n\nStrict override for this request:\n"
+        + "- Output must be non-empty.\n"
+        + "- If you would otherwise output empty, output a single space.\n"
+        + "- Keep it short and do not repeat SUFFIX.\n"
+    )
+
+
 @app.post("/v1/completions")
 async def create_completion(request: CompletionRequest):
+    request_id = str(uuid.uuid4())[:8]
     try:
-        prompt = build_prompt(request.prefix, request.suffix)
-        result = await call_ollama(prompt)
-        
-        content = result["content"]
-        
+        logger.info(
+            "[%s] /v1/completions prefix_chars=%d suffix_chars=%d lang=%s prefix_tail='%s' suffix_head='%s'",
+            request_id,
+            len(request.prefix or ""),
+            len(request.suffix or ""),
+            request.languageId,
+            _preview((request.prefix or "")[-120:]),
+            _preview((request.suffix or "")[:120]),
+        )
+        prompt = build_prompt(request.prefix, request.suffix, request.languageId)
+        result = await call_ollama(prompt, tag=f"{request_id}-primary", temperature=0.7)
+
+        content = result["content"] or ""
+        source = "primary"
+        if not content.strip():
+            logger.warning("[%s] primary returned empty content, starting retry", request_id)
+            retry_prompt = _build_force_non_empty_prompt(prompt)
+            retry_result = await call_ollama(retry_prompt, tag=f"{request_id}-retry1", temperature=0.4)
+            content = retry_result["content"] or ""
+            source = "retry1"
+
+        if not content.strip():
+            content = " "
+            source = "fallback-space"
+            logger.warning("[%s] retry still empty, forcing single-space fallback", request_id)
+
+        logger.info(
+            "[%s] completion resolved source=%s content_chars=%d content_preview='%s'",
+            request_id,
+            source,
+            len(content),
+            _preview(content, 120),
+        )
+
         async def generate():
-            if content:
-                yield f"data: {json.dumps({'content': content})}\n\n"
+            yield f"data: {json.dumps({'content': content})}\n\n"
             yield f"data: {json.dumps({'done': True})}\n\n"
-        
+
         return StreamingResponse(generate(), media_type="text/event-stream")
-        
+
     except Exception as e:
-        import traceback
-        traceback.print_exc()
+        logger.exception("[%s] /v1/completions failed: %s", request_id, e)
+        return JSONResponse(content={"error": str(e)}, status_code=500)
+
+@app.post("/v1/ocr")
+async def ocr_image(request: OCRRequest):
+    request_id = str(uuid.uuid4())[:8]
+    try:
+        logger.info(
+            "[%s] /v1/ocr filename=%s language=%s image_base64_chars=%d",
+            request_id,
+            request.filename,
+            request.language,
+            len(request.image or ""),
+        )
+        image_bytes = base64.b64decode(request.image)
+        logger.info("[%s] /v1/ocr decoded image_bytes=%d", request_id, len(image_bytes))
+        result = await call_vlm_ocr(image_bytes, request.language)
+        logger.info(
+            "[%s] /v1/ocr success text_chars=%d text_preview='%s'",
+            request_id,
+            len(result or ""),
+            _preview(result or "", 120),
+        )
+        return {"text": result, "filename": request.filename}
+    except Exception as e:
+        logger.exception("[%s] /v1/ocr failed: %s", request_id, e)
         return JSONResponse(content={"error": str(e)}, status_code=500)
 
 if __name__ == "__main__":
diff --git a/backend/prompt.py b/backend/prompt.py
index bcf02c7..5acbc5a 100644
--- a/backend/prompt.py
+++ b/backend/prompt.py
@@ -1,202 +1,84 @@
-import os
 from typing import Tuple
 
-def build_prompt(prefix: str, suffix: str) -> str:
+MAX_PREFIX_CHARS = 12000
+MAX_SUFFIX_CHARS = 4000
+
+
+def _sanitize_language_id(language_id: str) -> str:
+    if not language_id:
+        return "markdown"
+    allowed = []
+    for ch in language_id.strip():
+        if ch.isalnum() or ch in "-_+.":
+            allowed.append(ch)
+    value = "".join(allowed)[:32]
+    return value or "markdown"
+
+
+def _prepare_context(prefix: str, suffix: str) -> Tuple[str, str]:
     """
-    优化后的提示词构建函数。
-    使用明确的分隔符区分指令部分和实际的 prefix/suffix 内容。
+    Prepare prefix/suffix for model completion context.
+    Keep the historical one-char lookahead behavior to reduce boundary drift.
     """
-    # 修正：把suffix的第一个字符移到prefix末尾（解决光标位置偏差）
     if suffix:
-        first_char = suffix[0]
-        prefix = prefix + first_char
+        prefix = prefix + suffix[0]
         suffix = suffix[1:]
-    
-    recent_prefix = prefix
-    recent_suffix = suffix
+    return prefix[-MAX_PREFIX_CHARS:], suffix[:MAX_SUFFIX_CHARS]
 
-    prompt = f"""You are an expert writing assistant integrated into a text editor. Your task is to complete the text at the cursor position.
 
-━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
-RULES
-━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+def build_prompt(prefix: str, suffix: str, language_id: str = "markdown") -> str:
+    safe_language_id = _sanitize_language_id(language_id)
+    recent_prefix, recent_suffix = _prepare_context(prefix, suffix)
 
-RULE #1: SEAMLESS CONNECTION (MOST CRITICAL)
+    prompt = f"""You are an inline completion engine for a {safe_language_id} editor with ghost-text suggestions.
 
-Your continuation MUST seamlessly bridge the prefix and suffix. This is the MOST IMPORTANT rule.
+Your job:
+- Return ONLY the text that should be inserted at the cursor between PREFIX and SUFFIX.
+- Prefer a meaningful, non-empty insertion with moderate length.
+- Avoid overly short outputs with little information value.
 
-The "复读机" (Parrot) Error is when you repeat content that already exists in the suffix. This is the WORST mistake you can make.
+Important context:
+- PREFIX may contain hidden OCR metadata in HTML comments such as <!--OCR:...-->.
+- These comments are non-visible context only.
+- Never copy, rewrite, or emit HTML comments in output.
+- Never output <!-- or -->.
 
-Requirements:
-- Your output must connect prefix to suffix smoothly
-- NEVER repeat content that already exists in the suffix
-- If prefix already flows naturally into suffix, output NOTHING (empty string)
-- The result should read as one coherent text, as if you never interrupted it
+Hard rules:
+1. Seamless join:
+   PREFIX + OUTPUT + SUFFIX must read naturally as one continuous document.
+2. No suffix repetition:
+   Do NOT repeat text that already appears at the start of SUFFIX.
+3. Balanced length:
+   Prefer concise but meaningful continuation, not ultra-short fragments.
+   Default target is 20-120 characters and 1-3 lines.
+   You may go shorter only when syntax requires it.
+4. Avoid trivial output:
+   Do not output only punctuation or filler such as ".", ",", ";", ":".
+   Do not output just one token unless it is structurally necessary.
+5. Preserve local style:
+   Match nearby language, tone, punctuation, spacing, and indentation.
+6. Markdown awareness:
+   Continue active list/checkbox/ordered-list patterns when applicable.
+   Preserve indentation in nested list/code contexts.
+   Close obvious unclosed inline markdown markers only when needed to bridge.
+7. Strict output format:
+   Output insertion text only.
+   No explanations, labels, quotes, or code fences.
 
-RULE #2: WHITESPACE & PUNCTUATION
+Decision policy:
+- If PREFIX already connects naturally to SUFFIX, add a brief but useful continuation when possible.
+- If uncertain, prefer a complete short phrase or sentence with clear meaning.
 
-You must carefully check the LAST character of prefix and FIRST character of suffix to ensure perfect docking.
-
-Requirements:
-- If prefix ends with space, do NOT start your output with space (prevents double spaces)
-- If prefix does NOT end with space and suffix starts with a letter, you may need to add a space
-- If suffix starts with punctuation, do NOT end your output with the same punctuation
-- Check for existing spaces around operators before adding more
-
-RULE #3: INDENTATION ALIGNMENT
-
-You MUST match the indentation level of the current context.
-
-Requirements:
-- Look at the line where cursor is positioned
-- Count the leading spaces/tabs on that line
-- Match that indentation for new lines
-- Use the SAME type of indentation (spaces OR tabs) as the existing code
-- For nested blocks, increase indentation appropriately
-- For closing braces, match the opening brace's indentation
-
-RULE #4: LIST MAINTENANCE
-
-When the prefix ends with a list marker, you MUST recognize the pattern and continue it appropriately.
-
-Requirements:
-- "- [ ] " indicates an unchecked task → continue with task description
-- "- [x] " indicates a checked task → continue with completed task description  
-- "1. ", "2. ", etc. indicates ordered list → increment the number
-- "* " or "- " indicates bullet list → continue with same marker style
-- "> " indicates blockquote → continue quoted text
-- Maintain the same list format and indentation level
-
-RULE #5: SYNTAX CLOSURE
-
-Before generating content, CHECK if there are unclosed syntax elements. If so, you MUST close them FIRST.
-
-Requirements:
-- Scan prefix for opening markers: **, *, `, [, ![, ```
-- Check if each opening has a corresponding closing
-- If unclosed, add the closing marker FIRST before continuing with content
-- Markdown syntax pairs to check:
-  - Bold: ** must have closing **
-  - Italic: * must have closing *
-  - Bold + Italic: *** must have closing ***
-  - Inline code: ` must have closing `
-  - Code block: ``` must have closing ```
-  - Link: [text must have closing ](url)
-  - Image: ![alt must have closing ](url)
-
-RULE #6: OUTPUT FORMAT
-
-Your output will be directly inserted into the document. Output ONLY the continuation text.
-
-Requirements:
-- Output ONLY the text that should appear at the cursor position
-- NO explanations, NO comments, NO meta-text
-- NO code blocks wrapping your output
-- NO phrases like "Here's the continuation:" or "I'll complete this for you:"
-- Your output is inserted DIRECTLY into the user's document
-
-RULE #7: ALWAYS OUTPUT SOMETHING (MANDATORY)
-
-You MUST always output some content. Empty output is NOT allowed.
-       
-Requirements:
-- Even if the prefix seems complete, you should suggest a natural continuation
-- If the prefix ends mid-sentence, complete the sentence
-- If the prefix ends at a natural break point, suggest the next logical content
-- Examples of valid continuations:
-  - Add the next word or phrase
-  - Complete an incomplete thought
-  - Add a relevant follow-up sentence
-  - Continue a list with the next item
-  - Add closing punctuation if missing
-- NEVER output an empty string - always provide some useful continuation
-
-━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
-EXAMPLES
-━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
-
-EXAMPLE 1 - Seamless Connection:
+Examples:
 <PREFIX>The quick brown fox </PREFIX>
 <SUFFIX>jumps over the lazy dog.</SUFFIX>
-Output: "" (empty - nothing needed, prefix already connects to suffix)
-Result: "The quick brown fox jumps over the lazy dog."
+Output: "moved quietly and then "
 
-EXAMPLE 2 - Seamless Connection with Space:
-<PREFIX>Hello</PREFIX>
-<SUFFIX>world!</SUFFIX>
-Output: " "
-Result: "Hello world!"
-
-EXAMPLE 3 - Whitespace Docking:
-<PREFIX>const a = </PREFIX>
-<SUFFIX>1;</SUFFIX>
-Output: "1;"
-Result: "const a = 1;"
-
-EXAMPLE 4 - Indentation Alignment:
-<PREFIX>function test() {{\\n    if (true) {{\\n        console.log('hi');\\n    </PREFIX>
-<SUFFIX>\\n}}</SUFFIX>
-Output: "}}\\n}}"
-Result: "    }}\\n}}" (correctly closes if with 4 spaces, then function)
-
-EXAMPLE 5 - Task List:
-<PREFIX>## TODO\\n- [ ] Buy groceries\\n- [ ] </PREFIX>
+<PREFIX>## TODO\\n- [ ] Buy milk\\n- [ ] </PREFIX>
 <SUFFIX></SUFFIX>
-Output: "Call mom"
-Result: "## TODO\\n- [ ] Buy groceries\\n- [ ] Call mom"
+Output: "Write release notes and share draft with team"
 
-EXAMPLE 6 - Ordered List:
-<PREFIX>1. First item\\n2. Second item\\n</PREFIX>
-<SUFFIX></SUFFIX>
-Output: "3. Third item"
-Result: "1. First item\\n2. Second item\\n3. Third item"
-
-EXAMPLE 7 - Bullet List:
-<PREFIX>* Apple\\n* Banana\\n* </PREFIX>
-<SUFFIX></SUFFIX>
-Output: "Cherry"
-Result: "* Apple\\n* Banana\\n* Cherry"
-
-EXAMPLE 8 - Unclosed Bold:
-<PREFIX>This is **important</PREFIX>
-<SUFFIX> text continues here.</SUFFIX>
-Output: "** "
-Result: "This is **important** text continues here."
-
-EXAMPLE 9 - Unclosed Link:
-<PREFIX>Click [here for more</PREFIX>
-<SUFFIX> information.</SUFFIX>
-Output: "](https://example.com)"
-Result: "Click [here for more](https://example.com) information."
-
-EXAMPLE 10 - Unclosed Code Block:
-<PREFIX>```python\\ndef hello():</PREFIX>
-<SUFFIX>\\nprint('done')</SUFFIX>
-Output: "\\n    print('hello')\\n```"
-Result: Code block properly closed with ```
-
-EXAMPLE 11 - Clean Output:
-For any completion, output ONLY the continuation text:
-Output: "Hello world!"
-NOT: "Here's what comes next: Hello world!"
-NOT: "```Hello world```"
-NOT: "I'll complete this for you: Hello world!"
-
-━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
-FINAL CHECKLIST
-━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
-
-Before outputting, verify:
-□ Does my output connect prefix and suffix WITHOUT repeating suffix content?
-□ Are there no double spaces or missing spaces between prefix and suffix?
-□ Does my indentation match the context?
-□ If there's a list marker, did I continue the list pattern?
-□ Did I close any unclosed Markdown syntax?
-□ Is my output ONLY the continuation text, nothing else?
-
-━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
-NOW COMPLETE THE FOLLOWING TEXT
-━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+Now produce the insertion.
 
 <PREFIX>
 {recent_prefix}
@@ -207,5 +89,5 @@ NOW COMPLETE THE FOLLOWING TEXT
 </SUFFIX>
 
 Output:"""
-    
+
     return prompt.strip()
diff --git a/plans/image-button-plan.md b/plans/image-button-plan.md
new file mode 100644
index 0000000..b39d27e
--- /dev/null
+++ b/plans/image-button-plan.md
@@ -0,0 +1,269 @@
+# Image Button Implementation Plan
+
+## Overview
+
+Add an image button to the MilkdownEditor that allows users to insert images at the cursor position. The button will provide a dropdown menu with two options: upload local file or input image URL.
+
+## Current Architecture Analysis
+
+### Existing Image Handling
+
+The editor already has image support through `@milkdown/crepe`:
+
+```javascript
+// From MilkdownEditor.vue lines 217-231
+features: {
+    [Crepe.Feature.Latex]: true,
+    [Crepe.Feature.ImageBlock]: true,
+},
+featureConfigs: {
+    [Crepe.Feature.ImageBlock]: {
+        onUpload: (file) => {
+            const objectUrl = URL.createObjectURL(file)
+            objectUrls.add(objectUrl)
+            performOCR(file, objectUrl)
+            return objectUrl
+        }
+    }
+}
+```
+
+### Editor Access Pattern
+
+The code uses `editorViewCtx` to access the ProseMirror editor view:
+
+```javascript
+crepe.editor.action((ctx) => {
+    const view = ctx.get(editorViewCtx)
+    // manipulate editor state
+})
+```
+
+## Implementation Plan
+
+### 1. Template Changes
+
+Add new button with dropdown menu in the `action-buttons` section:
+
+```html
+<!-- Image button with dropdown -->
+<div class="image-btn-wrapper">
+  <button
+    type="button"
+    class="action-btn"
+    aria-label="Insert Image"
+    title="Insert Image"
+    @click="toggleImageDropdown"
+  >
+    <!-- Image SVG icon -->
+    <svg>...</svg>
+    <span class="btn-tooltip">Insert Image</span>
+  </button>
+  
+  <!-- Dropdown menu -->
+  <div v-if="showImageDropdown" class="image-dropdown">
+    <button @click="triggerImageUpload">Upload Local Image</button>
+    <button @click="showUrlDialog = true">Insert from URL</button>
+  </div>
+</div>
+
+<!-- Hidden file input for image upload -->
+<input type="file" ref="imageInputRef" @change="handleImageUpload" accept="image/*" style="display:none">
+
+<!-- URL input dialog -->
+<div v-if="showUrlDialog" class="url-dialog-overlay" @click.self="showUrlDialog = false">
+  <div class="url-dialog">
+    <input v-model="imageUrl" placeholder="Enter image URL" />
+    <button @click="insertImageFromUrl">Insert</button>
+    <button @click="showUrlDialog = false">Cancel</button>
+  </div>
+</div>
+```
+
+### 2. Script Changes
+
+Add new refs and methods:
+
+```javascript
+// New refs
+const imageInputRef = ref(null)
+const showImageDropdown = ref(false)
+const showUrlDialog = ref(false)
+const imageUrl = ref('')
+
+// Toggle dropdown
+const toggleImageDropdown = () => {
+    showImageDropdown.value = !showImageDropdown.value
+}
+
+// Trigger file input
+const triggerImageUpload = () => {
+    showImageDropdown.value = false
+    imageInputRef.value?.click()
+}
+
+// Handle file upload - reuse existing onUpload logic
+const handleImageUpload = async (event) => {
+    const file = event.target.files?.[0]
+    if (!file) return
+    
+    const objectUrl = URL.createObjectURL(file)
+    objectUrls.add(objectUrl)
+    performOCR(file, objectUrl)
+    
+    // Insert image at cursor
+    insertImageAtCursor(objectUrl)
+    event.target.value = ''
+}
+
+// Insert image from URL
+const insertImageFromUrl = () => {
+    if (!imageUrl.value.trim()) return
+    insertImageAtCursor(imageUrl.value.trim())
+    imageUrl.value = ''
+    showUrlDialog.value = false
+}
+
+// Core function: insert image at cursor position
+const insertImageAtCursor = (src) => {
+    if (!crepe) return
+    
+    crepe.editor.action((ctx) => {
+        const view = ctx.get(editorViewCtx)
+        const { state } = view
+        const { selection, schema } = state
+        
+        // Get image node type from schema
+        const imageType = schema.nodes.image
+        if (!imageType) return
+        
+        // Create image node
+        const imageNode = imageType.create({ src })
+        
+        // Create transaction to insert at cursor
+        const tr = state.tr
+        tr = tr.replaceSelectionWith(imageNode)
+        
+        view.dispatch(tr)
+    })
+}
+```
+
+### 3. Style Changes
+
+Add styles for dropdown and dialog:
+
+```css
+/* Image button wrapper */
+.image-btn-wrapper {
+    position: relative;
+}
+
+/* Dropdown menu */
+.image-dropdown {
+    position: absolute;
+    bottom: 100%;
+    right: 0;
+    margin-bottom: 8px;
+    background: #fff;
+    border: 1px solid #ddd;
+    border-radius: 8px;
+    box-shadow: 0 2px 8px rgba(0,0,0,0.15);
+    overflow: hidden;
+    z-index: 10000;
+    min-width: 160px;
+}
+
+.image-dropdown button {
+    display: block;
+    width: 100%;
+    padding: 10px 16px;
+    border: none;
+    background: none;
+    text-align: left;
+    cursor: pointer;
+    font-size: 14px;
+    color: #333;
+}
+
+.image-dropdown button:hover {
+    background: #f5f5f5;
+}
+
+/* URL dialog overlay */
+.url-dialog-overlay {
+    position: fixed;
+    top: 0;
+    left: 0;
+    right: 0;
+    bottom: 0;
+    background: rgba(0,0,0,0.3);
+    display: flex;
+    align-items: center;
+    justify-content: center;
+    z-index: 10001;
+}
+
+.url-dialog {
+    background: #fff;
+    padding: 20px;
+    border-radius: 8px;
+    box-shadow: 0 4px 16px rgba(0,0,0,0.2);
+}
+
+.url-dialog input {
+    width: 300px;
+    padding: 8px 12px;
+    border: 1px solid #ddd;
+    border-radius: 4px;
+    margin-bottom: 12px;
+}
+
+.url-dialog button {
+    padding: 8px 16px;
+    margin-right: 8px;
+    border: 1px solid #ddd;
+    border-radius: 4px;
+    cursor: pointer;
+}
+```
+
+## Workflow Diagram
+
+```mermaid
+flowchart TD
+    A[Click Image Button] --> B{Toggle Dropdown}
+    B --> C[Show Dropdown Menu]
+    C --> D{User Choice}
+    D -->|Upload Local| E[Open File Picker]
+    D -->|From URL| F[Show URL Dialog]
+    E --> G[Select Image File]
+    G --> H[Create Object URL]
+    H --> I[Perform OCR]
+    I --> J[Insert Image at Cursor]
+    F --> K[Enter URL]
+    K --> L[Click Insert]
+    L --> J
+    J --> M[Image Appears in Editor]
+```
+
+## Key Implementation Notes
+
+1. **Reuse existing logic**: The `onUpload` callback logic for `Crepe.Feature.ImageBlock` should be reused for local file uploads to maintain consistency with OCR processing.
+
+2. **ProseMirror API**: Use `schema.nodes.image.create()` and `replaceSelectionWith()` to insert images at cursor position.
+
+3. **Click outside to close**: The dropdown should close when clicking outside. This can be achieved with a click-outside directive or by listening to document clicks.
+
+4. **Accessibility**: Ensure proper ARIA labels and keyboard navigation support.
+
+## Files to Modify
+
+- `src/components/MilkdownEditor.vue` - All changes will be in this single file
+
+## Dependencies
+
+No new dependencies required. All functionality uses existing:
+- Vue 3 Composition API
+- Milkdown/ProseMirror APIs
+- Native browser APIs (URL.createObjectURL, FileReader)
\ No newline at end of file
diff --git a/src/App.vue b/src/App.vue
index c330dc6..2844c9b 100644
--- a/src/App.vue
+++ b/src/App.vue
@@ -1,6 +1,7 @@
 <script setup>
-import MilkdownEditor from './components/MilkdownEditor.vue'
-import { ref } from 'vue'
+import { defineAsyncComponent, ref } from 'vue'
+
+const MilkdownEditor = defineAsyncComponent(() => import('./components/MilkdownEditor.vue'))
 
 const markdown = ref('')
 const emit = defineEmits(['update:markdown'])
diff --git a/src/components/MilkdownEditor.vue b/src/components/MilkdownEditor.vue
index de57d34..81e5bff 100644
--- a/src/components/MilkdownEditor.vue
+++ b/src/components/MilkdownEditor.vue
@@ -1,9 +1,15 @@
 <template>
-  <div class="editor-container" ref="containerRef">
+  <div class="editor-container">
     <div ref="root" class="milkdown-editor"></div>
 
     <div class="action-buttons">
-      <button class="action-btn" @click="triggerUpload">
+      <button
+        type="button"
+        class="action-btn"
+        aria-label="导入 Markdown 文件"
+        title="导入 Markdown"
+        @click="triggerUpload"
+      >
         <svg width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
           <path d="M21 15v4a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2v-4"/>
           <polyline points="17 8 12 3 7 8"/>
@@ -13,7 +19,13 @@
       </button>
       <input type="file" ref="fileInputRef" @change="handleFileUpload" accept=".md" style="display:none">
       
-      <button class="action-btn" @click="exportMarkdown">
+      <button
+        type="button"
+        class="action-btn"
+        aria-label="导出 Markdown 文件"
+        title="导出 Markdown"
+        @click="exportMarkdown"
+      >
         <svg width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
           <path d="M21 15v4a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2v-4"/>
           <polyline points="7 10 12 15 17 10"/>
@@ -22,34 +34,217 @@
         <span class="btn-tooltip">导出 Markdown</span>
       </button>
       
-      <button 
-        class="action-btn ai-toggle" 
-        :class="{ 'ai-disabled': !aiEnabled }"
+      <div class="image-btn-wrapper">
+        <button
+          type="button"
+          class="action-btn"
+          aria-label="Insert Image"
+          title="Insert Image"
+          @click="toggleImageDropdown"
+        >
+          <svg width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
+            <rect x="3" y="3" width="18" height="18" rx="2" ry="2"/>
+            <circle cx="8.5" cy="8.5" r="1.5"/>
+            <polyline points="21 15 16 10 5 21"/>
+          </svg>
+          <span class="btn-tooltip">Insert Image</span>
+        </button>
+        <div v-if="showImageDropdown" class="image-dropdown">
+          <button type="button" @click="triggerImageUpload">Upload Local Image</button>
+          <button type="button" @click="showUrlDialog = true; showImageDropdown = false">Insert from URL</button>
+        </div>
+      </div>
+      <input type="file" ref="imageInputRef" @change="handleImageUpload" accept="image/*" style="display:none">
+      
+      <button
+        type="button"
+        class="action-btn ai-toggle"
+        :class="{ 'ai-disabled': !aiEnabled, 'force-disabled': isOverLimit }"
         @click="toggleAI"
+        :disabled="isOverLimit"
+        :aria-label="aiButtonLabel"
+        :title="aiButtonLabel"
       >
         <svg width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
           <path d="M12 2a10 10 0 1 0 10 10A10 10 0 0 0 12 2z"/>
           <path d="M12 6v6l4 2"/>
         </svg>
-        <span class="btn-tooltip">{{ aiEnabled ? '禁用 AI' : '启用 AI' }}</span>
+        <span class="btn-tooltip">{{ aiButtonLabel }}</span>
       </button>
+      
+      <div class="size-indicator" :class="{ 'over-limit': isOverLimit }" aria-live="polite">
+        {{ sizeInKB }} KB
+      </div>
+    </div>
+    
+    <div v-if="showUrlDialog" class="url-dialog-overlay" @click.self="showUrlDialog = false">
+      <div class="url-dialog">
+        <h3>Insert Image from URL</h3>
+        <input 
+          v-model="imageUrl" 
+          type="url" 
+          placeholder="Enter image URL"
+          @keyup.enter="insertImageFromUrl"
+        />
+        <div class="url-dialog-buttons">
+          <button type="button" class="dialog-btn primary" @click="insertImageFromUrl">Insert</button>
+          <button type="button" class="dialog-btn" @click="showUrlDialog = false; imageUrl = ''">Cancel</button>
+        </div>
+      </div>
     </div>
   </div>
 </template>
 
 <script setup>
-import { onMounted, onUnmounted, ref } from 'vue'
+import { onMounted, onUnmounted, ref, computed } from 'vue'
 import { replaceAll } from '@milkdown/kit/utils'
 import { Crepe } from '@milkdown/crepe'
-import { copilotPlugin, copilotConfigCtx, copilotGhostMark, isCopilotEnabled, setCopilotEnabled, COPILOT_PLUGIN_KEY } from '../plugins/copilotPlugin'
+import { editorViewCtx } from '@milkdown/kit/core'
+import { copilotPlugin, copilotConfigCtx, copilotGhostMark, setCopilotEnabled, COPILOT_PLUGIN_KEY, SIZE_LIMIT, checkSizeLimit } from '../plugins/copilotPlugin'
 import { fetchSuggestion } from '../utils/api.js'
-import { DEBUG } from '../utils/config.js'
+import { DEBUG, API_URL } from '../utils/config.js'
+import { setOcrCache, clearOcrCache, clearAllOcrCache } from '../utils/ocrCache.js'
+
+const emit = defineEmits(['update:markdown'])
 
 const root = ref(null)
-const containerRef = ref(null)
 const fileInputRef = ref(null)
+const imageInputRef = ref(null)
 const aiEnabled = ref(true)
+const contentSize = ref(0)
+const showImageDropdown = ref(false)
+const showUrlDialog = ref(false)
+const imageUrl = ref('')
+const isOverLimit = computed(() => contentSize.value > SIZE_LIMIT)
+const sizeInKB = computed(() => Math.floor(contentSize.value / 1024))
+const aiButtonLabel = computed(() => {
+  if (isOverLimit.value) return '文档过大，AI已禁用'
+  return aiEnabled.value ? '禁用 AI' : '启用 AI'
+})
+
 let crepe = null
+let markdownSyncTimer = null
+const objectUrls = new Set()
+
+const revokeObjectUrl = (url) => {
+  if (!objectUrls.has(url)) return
+  URL.revokeObjectURL(url)
+  objectUrls.delete(url)
+  clearOcrCache(url)
+}
+
+const collectImageObjectUrls = (doc) => {
+  const activeUrls = new Set()
+  doc.descendants((node) => {
+    if (
+      node.type?.name === 'image' &&
+      typeof node.attrs?.src === 'string' &&
+      node.attrs.src.startsWith('blob:')
+    ) {
+      activeUrls.add(node.attrs.src)
+    }
+  })
+  return activeUrls
+}
+
+const syncObjectUrls = (doc) => {
+  const activeUrls = collectImageObjectUrls(doc)
+  for (const url of Array.from(objectUrls)) {
+    if (!activeUrls.has(url)) {
+      revokeObjectUrl(url)
+    }
+  }
+}
+
+const refreshSizeAndLimit = (ctx) => {
+  const view = ctx.get(editorViewCtx)
+  const { size, overLimit } = checkSizeLimit(view)
+  contentSize.value = size
+
+  if (overLimit && aiEnabled.value) {
+    aiEnabled.value = false
+    setCopilotEnabled(view, false)
+  }
+}
+
+const scheduleMarkdownSync = () => {
+  if (!crepe) return
+
+  if (markdownSyncTimer) {
+    clearTimeout(markdownSyncTimer)
+    markdownSyncTimer = null
+  }
+
+  markdownSyncTimer = setTimeout(async () => {
+    markdownSyncTimer = null
+    if (!crepe) return
+
+    try {
+      let hasGhostSuggestion = false
+      crepe.editor.action((ctx) => {
+        const view = ctx.get(editorViewCtx)
+        const state = COPILOT_PLUGIN_KEY.getState(view.state)
+        hasGhostSuggestion = Boolean(state?.suggestion && state.from < state.to)
+      })
+
+      // Ghost text is transient UI state and should not leak to emitted markdown.
+      if (hasGhostSuggestion) return
+
+      const markdown = await crepe.getMarkdown()
+      emit('update:markdown', markdown)
+    } catch (e) {
+      if (DEBUG) console.error('[Markdown] Sync failed:', e)
+    }
+  }, 120)
+}
+
+const clearCurrentSuggestion = (view) => {
+  const state = COPILOT_PLUGIN_KEY.getState(view.state)
+  if (state?.suggestion && state.from < state.to) {
+    const tr = view.state.tr
+      .delete(state.from, state.to)
+      .setMeta(COPILOT_PLUGIN_KEY, { from: 0, to: 0, suggestion: '' })
+    view.dispatch(tr)
+  }
+}
+
+const performOCR = async (file, cacheKey) => {
+  if (!aiEnabled.value) return
+  
+  const reader = new FileReader()
+  reader.onload = async () => {
+    const dataUrl = typeof reader.result === 'string' ? reader.result : ''
+    const splitIndex = dataUrl.indexOf(',')
+    if (splitIndex === -1) return
+
+    const base64 = dataUrl.slice(splitIndex + 1)
+    try {
+      const ocrUrl = API_URL.replace('/v1/completions', '/v1/ocr')
+      const res = await fetch(ocrUrl, {
+        method: 'POST',
+        headers: { 'Content-Type': 'application/json' },
+        body: JSON.stringify({ 
+          image: base64, 
+          filename: file.name,
+          language: 'auto'
+        })
+      })
+      const data = await res.json()
+      if (data.text) {
+        setOcrCache(cacheKey, data.text)
+        setOcrCache(file.name, data.text)
+        if (crepe?.editor) {
+          crepe.editor.action((ctx) => {
+            refreshSizeAndLimit(ctx)
+          })
+        }
+      }
+    } catch (e) {
+      if (DEBUG) console.error('[OCR] Error:', e)
+    }
+  }
+  reader.readAsDataURL(file)
+}
 
 onMounted(async () => {
     if (DEBUG) console.log('[Debug] onMounted called')
@@ -61,11 +256,20 @@ onMounted(async () => {
         defaultValue: '# Welcome to LLM in text\n\nStart writing your content here...',
         features: {
             [Crepe.Feature.Latex]: true,
+            [Crepe.Feature.ImageBlock]: true,
         },
         featureConfigs: {
             [Crepe.Feature.Latex]: {
                 katexOptions: {},
                 inlineEditConfirm: 'Escape'
+            },
+            [Crepe.Feature.ImageBlock]: {
+                onUpload: (file) => {
+                    const objectUrl = URL.createObjectURL(file)
+                    objectUrls.add(objectUrl)
+                    performOCR(file, objectUrl)
+                    return objectUrl
+                }
             }
         },
         config: {
@@ -76,7 +280,7 @@ onMounted(async () => {
     crepe.editor.config((ctx) => {
         ctx.set(copilotConfigCtx.key, {
             fetchSuggestion,
-            debounceMs: 500
+            debounceMs: 1000
         })
     })
     
@@ -86,24 +290,30 @@ onMounted(async () => {
     
     await crepe.create()
     
+    crepe.on((listener) => {
+        listener.updated((ctx, doc) => {
+            syncObjectUrls(doc)
+            refreshSizeAndLimit(ctx)
+            scheduleMarkdownSync()
+        })
+    })
+
+    crepe.editor.action((ctx) => {
+        const view = ctx.get(editorViewCtx)
+        setCopilotEnabled(view, aiEnabled.value)
+        refreshSizeAndLimit(ctx)
+    })
+    scheduleMarkdownSync()
+    
     if (DEBUG) console.log('[Debug] Crepe editor created with copilot plugin')
 })
 
 const exportMarkdown = async () => {
     if (!crepe) return
-    
-    const { editorViewCtx } = await import('@milkdown/kit/core')
-    const { COPILOT_PLUGIN_KEY } = await import('../plugins/copilotPlugin')
-    
+
     crepe.editor.action((ctx) => {
         const view = ctx.get(editorViewCtx)
-        const state = COPILOT_PLUGIN_KEY.getState(view.state)
-        if (state?.suggestion && state.from < state.to) {
-            const tr = view.state.tr
-                .delete(state.from, state.to)
-                .setMeta(COPILOT_PLUGIN_KEY, { from: 0, to: 0, suggestion: '' })
-            view.dispatch(tr)
-        }
+        clearCurrentSuggestion(view)
     })
     
     const markdown = await crepe.getMarkdown()
@@ -112,7 +322,9 @@ const exportMarkdown = async () => {
     const a = document.createElement('a')
     a.href = url
     a.download = `document-${Date.now()}.md`
+    document.body.appendChild(a)
     a.click()
+    a.remove()
     URL.revokeObjectURL(url)
 }
 
@@ -137,28 +349,80 @@ const handleFileUpload = async (event) => {
 }
 
 const toggleAI = async () => {
-    aiEnabled.value = !aiEnabled.value
-    setCopilotEnabled(aiEnabled.value)
+    if (isOverLimit.value || !crepe) return
     
-    if (!aiEnabled.value && crepe) {
-        const { editorViewCtx } = await import('@milkdown/kit/core')
+    aiEnabled.value = !aiEnabled.value
+
+    crepe.editor.action((ctx) => {
+        const view = ctx.get(editorViewCtx)
+        setCopilotEnabled(view, aiEnabled.value)
+        if (!aiEnabled.value) {
+            clearCurrentSuggestion(view)
+        }
+    })
+}
+
+const toggleImageDropdown = () => {
+    showImageDropdown.value = !showImageDropdown.value
+}
+
+const triggerImageUpload = () => {
+    showImageDropdown.value = false
+    imageInputRef.value?.click()
+}
+
+const insertImageAtCursor = (src) => {
+    if (!crepe || !src) return
+    
+    crepe.editor.action((ctx) => {
+        const view = ctx.get(editorViewCtx)
+        const { state } = view
+        const { schema } = state
         
-        crepe.editor.action((ctx) => {
-            const view = ctx.get(editorViewCtx)
-            const state = COPILOT_PLUGIN_KEY.getState(view.state)
-            if (state?.suggestion && state.from < state.to) {
-                const tr = view.state.tr
-                    .delete(state.from, state.to)
-                    .setMeta(COPILOT_PLUGIN_KEY, { from: 0, to: 0, suggestion: '' })
-                view.dispatch(tr)
-            }
-        })
-    }
+        const imageType = schema.nodes.image
+        if (!imageType) return
+        
+        const imageNode = imageType.create({ src })
+        const tr = state.tr.replaceSelectionWith(imageNode)
+        view.dispatch(tr)
+    })
+}
+
+const handleImageUpload = async (event) => {
+    const file = event.target.files?.[0]
+    if (!file) return
+    
+    const objectUrl = URL.createObjectURL(file)
+    objectUrls.add(objectUrl)
+    performOCR(file, objectUrl)
+    insertImageAtCursor(objectUrl)
+    
+    event.target.value = ''
+}
+
+const insertImageFromUrl = () => {
+    const url = imageUrl.value.trim()
+    if (!url) return
+    
+    insertImageAtCursor(url)
+    imageUrl.value = ''
+    showUrlDialog.value = false
 }
 
 onUnmounted(() => {
+    if (markdownSyncTimer) {
+        clearTimeout(markdownSyncTimer)
+        markdownSyncTimer = null
+    }
+
+    for (const url of Array.from(objectUrls)) {
+        revokeObjectUrl(url)
+    }
+
+    clearAllOcrCache()
     if (crepe) {
         crepe.destroy()
+        crepe = null
     }
 })
 </script>
@@ -176,6 +440,7 @@ onUnmounted(() => {
     bottom: 20px;
     right: 20px;
     display: flex;
+    flex-direction: column;
     gap: 8px;
     z-index: 9999;
 }
@@ -193,12 +458,14 @@ onUnmounted(() => {
     align-items: center;
     justify-content: center;
     box-shadow: 0 2px 8px rgba(0,0,0,0.1);
+    opacity: 0.5;
 }
 
 .action-btn:hover {
     background-color: #4a90d9;
     color: white;
     border-color: #4a90d9;
+    opacity: 1;
 }
 
 .action-btn.ai-disabled {
@@ -213,15 +480,42 @@ onUnmounted(() => {
     border-color: #4a90d9;
 }
 
+.action-btn.force-disabled {
+    background-color: #ccc;
+    color: #999;
+    border-color: #ccc;
+    cursor: not-allowed;
+    opacity: 0.6;
+}
+
+.action-btn.force-disabled:hover {
+    background-color: #ccc;
+    color: #999;
+    border-color: #ccc;
+    opacity: 0.6;
+}
+
+.size-indicator {
+    font-size: 10px;
+    color: #999;
+    text-align: center;
+    margin-top: 4px;
+}
+
+.size-indicator.over-limit {
+    color: #e74c3c;
+}
+
 .action-btn {
     position: relative;
 }
 
 .btn-tooltip {
     position: absolute;
-    top: -32px;
-    left: 50%;
-    transform: translateX(-50%);
+    top: 50%;
+    right: 100%;
+    transform: translateY(-50%);
+    margin-right: 8px;
     background: #333;
     color: #fff;
     font-size: 12px;
@@ -237,6 +531,116 @@ onUnmounted(() => {
     opacity: 1;
 }
 
+.action-btn:focus-visible .btn-tooltip {
+    opacity: 1;
+}
+
+.image-btn-wrapper {
+    position: relative;
+}
+
+.image-dropdown {
+    position: absolute;
+    bottom: 100%;
+    right: 0;
+    margin-bottom: 8px;
+    background: #fff;
+    border: 1px solid #ddd;
+    border-radius: 8px;
+    box-shadow: 0 2px 8px rgba(0,0,0,0.15);
+    overflow: hidden;
+    z-index: 10000;
+    min-width: 160px;
+}
+
+.image-dropdown button {
+    display: block;
+    width: 100%;
+    padding: 10px 16px;
+    border: none;
+    background: none;
+    text-align: left;
+    cursor: pointer;
+    font-size: 14px;
+    color: #333;
+}
+
+.image-dropdown button:hover {
+    background: #f5f5f5;
+}
+
+.url-dialog-overlay {
+    position: fixed;
+    top: 0;
+    left: 0;
+    right: 0;
+    bottom: 0;
+    background: rgba(0,0,0,0.3);
+    display: flex;
+    align-items: center;
+    justify-content: center;
+    z-index: 10001;
+}
+
+.url-dialog {
+    background: #fff;
+    padding: 20px;
+    border-radius: 8px;
+    box-shadow: 0 4px 16px rgba(0,0,0,0.2);
+    min-width: 320px;
+}
+
+.url-dialog h3 {
+    margin: 0 0 12px 0;
+    font-size: 16px;
+    color: #333;
+}
+
+.url-dialog input {
+    width: 100%;
+    box-sizing: border-box;
+    padding: 10px 12px;
+    border: 1px solid #ddd;
+    border-radius: 4px;
+    font-size: 14px;
+    margin-bottom: 16px;
+}
+
+.url-dialog input:focus {
+    outline: none;
+    border-color: #4a90d9;
+}
+
+.url-dialog-buttons {
+    display: flex;
+    justify-content: flex-end;
+    gap: 8px;
+}
+
+.dialog-btn {
+    padding: 8px 16px;
+    border: 1px solid #ddd;
+    border-radius: 4px;
+    cursor: pointer;
+    font-size: 14px;
+    background: #fff;
+    color: #333;
+}
+
+.dialog-btn:hover {
+    background: #f5f5f5;
+}
+
+.dialog-btn.primary {
+    background: #4a90d9;
+    color: #fff;
+    border-color: #4a90d9;
+}
+
+.dialog-btn.primary:hover {
+    background: #3a80c9;
+}
+
 .milkdown-editor {
     width: 100%;
     height: 100%;
@@ -247,7 +651,7 @@ onUnmounted(() => {
 .milkdown-editor :deep(.milkdown) {
     max-width: none;
     margin: 0 !important;
-    padding: 20px 40px !important;
+    padding: 0 40px !important;
     min-height: 100%;
 }
 
@@ -262,6 +666,25 @@ onUnmounted(() => {
     padding: 0 !important;
 }
 
+.milkdown-editor :deep(.milkdown > *:first-child) {
+    margin-top: 0 !important;
+    padding-top: 0 !important;
+}
+
+.milkdown-editor :deep(.ProseMirror) {
+    margin: 0 !important;
+    padding: 0 !important;
+}
+
+.milkdown-editor :deep(.ProseMirror img) {
+    max-width: 60%;
+    height: auto;
+}
+
+.milkdown-editor :deep(.ProseMirror > *:first-child) {
+    margin-top: 0 !important;
+}
+
 .milkdown-editor :deep(.milkdown__aside),
 .milkdown-editor :deep(.milkdown__aside-wrapper),
 .milkdown-editor :deep([class*="aside"]),
@@ -314,7 +737,7 @@ onUnmounted(() => {
 .copilot-ghost-text {
     color: #999;
     opacity: 0.6;
-    pointer-events: none;
+    pointer-events: auto;
 }
 
 .copilot-ghost-text.copilot-loading {
diff --git a/src/plugins/copilotPlugin.ts b/src/plugins/copilotPlugin.ts
index 1131992..ed6979c 100644
--- a/src/plugins/copilotPlugin.ts
+++ b/src/plugins/copilotPlugin.ts
@@ -1,14 +1,14 @@
 import { Plugin, PluginKey, Selection } from '@milkdown/prose/state'
 import { $prose, $ctx, $markSchema } from '@milkdown/kit/utils'
 import { parserCtx } from '@milkdown/kit/core'
-import { Node as ProseNode, Fragment, Slice } from '@milkdown/prose/model'
+import { Node as ProseNode, Fragment } from '@milkdown/prose/model'
 import type { Ctx } from '@milkdown/kit/core'
 import type { EditorView } from '@milkdown/prose/view'
+import { getOcrCache, checkSizeLimit as checkOcrSizeLimit, OCR_SIZE_LIMIT } from '../utils/ocrCache'
 
 const COPILOT_PLUGIN_KEY = new PluginKey('milkdown-copilot')
-const DEBOUNCE_MS = 500
-
-let enabled = true
+const DEBOUNCE_MS = 1000
+const SIZE_LIMIT = OCR_SIZE_LIMIT
 
 interface CopilotState {
   from: number
@@ -21,12 +21,21 @@ interface CopilotConfig {
   debounceMs?: number
 }
 
+interface CopilotRuntime {
+  enabled: boolean
+  debounceTimer: ReturnType<typeof setTimeout> | null
+  abortController: AbortController | null
+  ctx: Ctx
+}
+
 const initialState: CopilotState = {
   from: 0,
   to: 0,
   suggestion: ''
 }
 
+const runtimeByView = new WeakMap<EditorView, CopilotRuntime>()
+
 export const copilotConfigCtx = $ctx<CopilotConfig, 'copilotConfig'>({
   fetchSuggestion: async () => '',
   debounceMs: DEBOUNCE_MS
@@ -36,21 +45,68 @@ export const copilotGhostMark = $markSchema('copilot_ghost', () => ({
   excludes: '_',
   inclusive: true,
   parseDOM: [{ tag: 'span[data-copilot-ghost]' }],
-  toDOM: () => ['span', { 'data-copilot-ghost': '', class: 'copilot-ghost-text' }, 0]
+  toDOM: () => ['span', { 'data-copilot-ghost': '', class: 'copilot-ghost-text' }, 0],
+  parseMarkdown: {
+    match: () => false,
+    runner: () => {}
+  },
+  toMarkdown: {
+    match: (mark) => mark.type.name === 'copilot_ghost',
+    runner: () => {}
+  }
 }))
 
-let debounceTimer: ReturnType<typeof setTimeout> | null = null
-let abortController: AbortController | null = null
-let currentCtx: Ctx | null = null
+function clearRuntimeRequests(runtime: CopilotRuntime) {
+  if (runtime.debounceTimer) {
+    clearTimeout(runtime.debounceTimer)
+    runtime.debounceTimer = null
+  }
+
+  if (runtime.abortController) {
+    runtime.abortController.abort()
+    runtime.abortController = null
+  }
+}
+
+function findGhostRangeByMarks(view: EditorView): { from: number; to: number } | null {
+  const markType = view.state.schema.marks.copilot_ghost
+  if (!markType) return null
+
+  let from = Number.POSITIVE_INFINITY
+  let to = -1
+
+  view.state.doc.descendants((node, pos) => {
+    if (node.isText && node.marks.some((m: any) => m.type === markType)) {
+      from = Math.min(from, pos)
+      to = Math.max(to, pos + node.nodeSize)
+    }
+    return true
+  })
+
+  if (!Number.isFinite(from) || to <= from) return null
+  return { from, to }
+}
+
+function getGhostRange(view: EditorView): { from: number; to: number } | null {
+  const state = COPILOT_PLUGIN_KEY.getState(view.state)
+  if (state && state.from < state.to) {
+    return { from: state.from, to: state.to }
+  }
+  return findGhostRangeByMarks(view)
+}
+
+function hasGhostText(view: EditorView): boolean {
+  return getGhostRange(view) !== null
+}
 
 function clearGhostText(view: EditorView) {
-  const state = COPILOT_PLUGIN_KEY.getState(view.state)
-  if (state && state.suggestion && state.from < state.to) {
-    const tr = view.state.tr
-      .delete(state.from, state.to)
-      .setMeta(COPILOT_PLUGIN_KEY, { ...initialState })
-    view.dispatch(tr)
-  }
+  const range = getGhostRange(view)
+  if (!range) return
+
+  const tr = view.state.tr
+    .delete(range.from, range.to)
+    .setMeta(COPILOT_PLUGIN_KEY, { ...initialState })
+  view.dispatch(tr)
 }
 
 function isBlockNode(node: ProseNode): boolean {
@@ -67,39 +123,24 @@ function hasBlockNodes(doc: ProseNode): boolean {
   return hasBlock
 }
 
-function addGhostMarkToNode(node: ProseNode, ghostMarkType: any): ProseNode {
-  if (node.isText) {
-    return node.mark(node.marks.concat(ghostMarkType.create()))
-  }
-  if (node.isLeaf) {
-    return node
-  }
-  const newContent: ProseNode[] = []
-  node.forEach((child) => {
-    newContent.push(addGhostMarkToNode(child, ghostMarkType))
-  })
-  return node.copy(Fragment.from(newContent))
-}
-
-function extractInlineContent(doc: ProseNode, ghostMarkType: any, schema: any): Fragment {
+function extractInlineContent(doc: ProseNode, schema: any): Fragment {
   const nodes: ProseNode[] = []
   let isFirstBlock = true
-  
+
   doc.forEach((blockNode) => {
     if (!isFirstBlock) {
       const hardBreak = schema.nodes.hard_break?.create()
       if (hardBreak) {
         nodes.push(hardBreak)
       } else {
-        nodes.push(schema.text('\n', [ghostMarkType.create()]))
+        nodes.push(schema.text('\n'))
       }
     }
     isFirstBlock = false
-    
+
     blockNode.forEach((inlineNode) => {
       if (inlineNode.isText) {
-        const combinedMarks = inlineNode.marks.concat(ghostMarkType.create())
-        nodes.push(inlineNode.mark(combinedMarks))
+        nodes.push(inlineNode)
       } else if (inlineNode.type.name === 'hard_break') {
         nodes.push(inlineNode)
       } else if (inlineNode.isLeaf) {
@@ -107,8 +148,7 @@ function extractInlineContent(doc: ProseNode, ghostMarkType: any, schema: any):
       } else if (inlineNode.content.size > 0) {
         inlineNode.forEach((nestedNode) => {
           if (nestedNode.isText) {
-            const combinedMarks = nestedNode.marks.concat(ghostMarkType.create())
-            nodes.push(nestedNode.mark(combinedMarks))
+            nodes.push(nestedNode)
           } else if (nestedNode.isLeaf) {
             nodes.push(nestedNode)
           }
@@ -116,52 +156,83 @@ function extractInlineContent(doc: ProseNode, ghostMarkType: any, schema: any):
       }
     })
   })
-  
+
   return Fragment.from(nodes)
 }
 
-async function insertGhostText(view: EditorView, suggestion: string, from: number) {
-  if (!currentCtx || !suggestion) return
-  
+function normalizeSuggestionText(raw: string): string {
+  if (!raw) return raw
+
+  let text = raw.replace(/\r\n?/g, '\n')
+  const trimmed = text.trim()
+
+  // Some models may return a JSON-encoded string literal, decode it if so.
+  if (trimmed.startsWith('"') && trimmed.endsWith('"')) {
+    try {
+      const parsed = JSON.parse(trimmed)
+      if (typeof parsed === 'string') {
+        text = parsed.replace(/\r\n?/g, '\n')
+      }
+    } catch {
+      // Keep original text when not valid JSON.
+    }
+  }
+
+  // If newlines are escaped literally, convert them back.
+  if (!text.includes('\n') && text.includes('\\n')) {
+    text = text.replace(/\\n/g, '\n')
+  }
+  if (text.includes('\\t')) {
+    text = text.replace(/\\t/g, '\t')
+  }
+
+  return text
+}
+
+async function insertGhostText(view: EditorView, suggestion: string, from: number, ctx: Ctx) {
+  if (!suggestion) return
+
   const schema = view.state.schema
   const markType = schema.marks.copilot_ghost
-  
+
   if (!markType) {
     console.error('[Copilot] copilot_ghost mark not found in schema')
     return
   }
-  
+
   try {
-    const parser = currentCtx.get(parserCtx)
+    const parser = ctx.get(parserCtx)
     const parsedDoc = await parser(suggestion)
-    
+
     if (!parsedDoc) {
       insertPlainText(view, suggestion, from, markType)
       return
     }
-    
+
     const containsBlocks = hasBlockNodes(parsedDoc)
-    
+
     if (containsBlocks) {
       const $from = view.state.doc.resolve(from)
       const insertPos = $from.after($from.depth)
-      
+
       const blockNodes: ProseNode[] = []
       parsedDoc.forEach((node) => {
-        blockNodes.push(addGhostMarkToNode(node, markType))
+        blockNodes.push(node)
       })
-      
+
       const fragment = Fragment.from(blockNodes)
       const tr = view.state.tr
       tr.insert(insertPos, fragment)
       const endPos = insertPos + fragment.size
+      tr.addMark(insertPos, endPos, markType.create())
       tr.setMeta(COPILOT_PLUGIN_KEY, { from: insertPos, to: endPos, suggestion })
       view.dispatch(tr)
     } else {
-      const inlineFragment = extractInlineContent(parsedDoc, markType, schema)
+      const inlineFragment = extractInlineContent(parsedDoc, schema)
       const tr = view.state.tr
       tr.insert(from, inlineFragment)
       const endPos = from + inlineFragment.size
+      tr.addMark(from, endPos, markType.create())
       tr.setMeta(COPILOT_PLUGIN_KEY, { from, to: endPos, suggestion })
       view.dispatch(tr)
     }
@@ -180,66 +251,112 @@ function insertPlainText(view: EditorView, suggestion: string, from: number, mar
   view.dispatch(tr)
 }
 
-function doFetchSuggestion(view: EditorView, pos: number, prefix: string, suffix: string) {
-  if (!currentCtx) return
-  
-  const config = currentCtx.get(copilotConfigCtx.key)
-  
-  if (abortController) {
-    abortController.abort()
-    abortController = null
+function extractImageFilenames(doc: ProseNode): string[] {
+  const filenames: string[] = []
+  doc.descendants((node: ProseNode) => {
+    if (node.type.name === 'image' && node.attrs.src) {
+      filenames.push(node.attrs.src)
+    }
+  })
+  return filenames
+}
+
+function buildPrefixWithOCR(prefix: string, doc: ProseNode, cursorPos: number): string {
+  const ocrEntries: string[] = []
+
+  doc.descendants((node: ProseNode, pos) => {
+    if (pos >= cursorPos) return false
+    if (node.type.name !== 'image' || !node.attrs.src) return true
+
+    const ocrText = getOcrCache(node.attrs.src)
+    if (!ocrText) return true
+
+    const altText = typeof node.attrs.alt === 'string' ? node.attrs.alt : ''
+    ocrEntries.push(`image(${altText || 'untitled'}): ${ocrText}`)
+    return true
+  })
+
+  if (!ocrEntries.length) return prefix
+  return `${prefix}\n\n[OCR Context]\n${ocrEntries.join('\n')}`
+}
+
+function doFetchSuggestion(view: EditorView, runtime: CopilotRuntime, pos: number, prefix: string, suffix: string) {
+  const config = runtime.ctx.get(copilotConfigCtx.key)
+
+  if (runtime.abortController) {
+    runtime.abortController.abort()
+    runtime.abortController = null
   }
-  
-  abortController = new AbortController()
-  
-  config.fetchSuggestion(prefix, suffix, abortController.signal)
-    .then(suggestion => {
-      if (view.state.selection.from !== pos) return
-      
-      if (suggestion) {
-        insertGhostText(view, suggestion, pos)
+
+  const controller = new AbortController()
+  runtime.abortController = controller
+
+  config.fetchSuggestion(prefix, suffix, controller.signal)
+    .then((suggestion) => {
+      if (!runtime.enabled) return
+      if (view.state.selection.from !== pos || view.state.selection.to !== pos) return
+
+      const normalizedSuggestion = normalizeSuggestionText(suggestion)
+      if (normalizedSuggestion) {
+        insertGhostText(view, normalizedSuggestion, pos, runtime.ctx)
       }
     })
-    .catch(e => {
-      if (e.name !== 'AbortError') {
+    .catch((e: any) => {
+      if (e?.name !== 'AbortError') {
         console.error('[Copilot] Error:', e)
       }
     })
     .finally(() => {
-      abortController = null
+      if (runtime.abortController === controller) {
+        runtime.abortController = null
+      }
     })
 }
 
-function scheduleFetch(view: EditorView, pos: number, prefix: string, suffix: string) {
-  if (!enabled) return
-  
-  if (debounceTimer) {
-    clearTimeout(debounceTimer)
-    debounceTimer = null
+function scheduleFetch(view: EditorView, runtime: CopilotRuntime, pos: number, prefix: string, suffix: string) {
+  if (!runtime.enabled) return
+
+  const doc = view.state.doc
+  const imageFilenames = extractImageFilenames(doc)
+  const { overLimit } = checkOcrSizeLimit(doc.content.size, imageFilenames)
+
+  if (overLimit) {
+    setCopilotEnabled(view, false)
+    return
   }
-  
-  debounceTimer = setTimeout(() => {
-    debounceTimer = null
-    doFetchSuggestion(view, pos, prefix, suffix)
-  }, DEBOUNCE_MS)
+
+  const prefixWithOCR = buildPrefixWithOCR(prefix, doc, pos)
+
+  if (runtime.debounceTimer) {
+    clearTimeout(runtime.debounceTimer)
+    runtime.debounceTimer = null
+  }
+
+  const debounceMs = runtime.ctx.get(copilotConfigCtx.key).debounceMs ?? DEBOUNCE_MS
+  runtime.debounceTimer = setTimeout(() => {
+    runtime.debounceTimer = null
+    doFetchSuggestion(view, runtime, pos, prefixWithOCR, suffix)
+  }, debounceMs)
 }
 
 function acceptSuggestion(view: EditorView) {
-  const state = COPILOT_PLUGIN_KEY.getState(view.state)
-  if (!state?.suggestion || state.from >= state.to) return false
-  
+  const range = getGhostRange(view)
+  if (!range) return false
+
   const tr = view.state.tr
   const doc = tr.doc
-  const from = state.from
-  const to = state.to
-  
+  const from = range.from
+  const to = range.to
+  const markType = view.state.schema.marks.copilot_ghost
+  if (!markType) return false
+
   doc.nodesBetween(from, to, (node, pos) => {
-    if (node.marks.some((m: any) => m.type.name === 'copilot_ghost')) {
-      tr.removeMark(pos, pos + node.nodeSize, view.state.schema.marks.copilot_ghost)
+    if (node.marks.some((m: any) => m.type === markType)) {
+      tr.removeMark(pos, pos + node.nodeSize, markType)
     }
   })
-  
-  const endPos = Math.min(state.to, tr.doc.content.size)
+
+  const endPos = Math.min(to, tr.doc.content.size)
   tr.setSelection(Selection.near(tr.doc.resolve(endPos)))
   tr.setMeta(COPILOT_PLUGIN_KEY, { ...initialState })
   view.dispatch(tr)
@@ -247,108 +364,180 @@ function acceptSuggestion(view: EditorView) {
 }
 
 function rejectSuggestion(view: EditorView) {
-  const state = COPILOT_PLUGIN_KEY.getState(view.state)
-  if (!state?.suggestion) return false
-  
+  if (!hasGhostText(view)) return false
+
   clearGhostText(view)
   return true
 }
 
-export const copilotPlugin = $prose((ctx) => {
-  currentCtx = ctx
-  
-  return new Plugin<CopilotState>({
-    key: COPILOT_PLUGIN_KEY,
-    state: {
-      init: () => ({ ...initialState }),
-      apply: (tr, value) => {
-        const meta = tr.getMeta(COPILOT_PLUGIN_KEY)
-        if (meta !== undefined) {
-          return meta
-        }
-        
-        if (tr.docChanged && value.suggestion) {
-          return { ...initialState }
-        }
-        
-        return value
+export const copilotPlugin = $prose((ctx) => new Plugin<CopilotState>({
+  key: COPILOT_PLUGIN_KEY,
+  state: {
+    init: () => ({ ...initialState }),
+    apply: (tr, value) => {
+      const meta = tr.getMeta(COPILOT_PLUGIN_KEY)
+      if (meta !== undefined) {
+        return meta
       }
-    },
-    props: {
-      handleKeyDown: (view, event) => {
-        const state = COPILOT_PLUGIN_KEY.getState(view.state)
-        
-        if (event.key === 'Tab' && state?.suggestion) {
-          event.preventDefault()
-          return acceptSuggestion(view)
-        }
-        
-        if (event.key === 'Escape' && state?.suggestion) {
-          event.preventDefault()
-          return rejectSuggestion(view)
-        }
-        
-        if (state?.suggestion && event.key !== 'Shift' && event.key !== 'Control' && event.key !== 'Alt' && event.key !== 'Meta') {
-          clearGhostText(view)
-        }
-        
-        return false
-      },
-      handleClick: (view, pos) => {
-        const state = COPILOT_PLUGIN_KEY.getState(view.state)
-        if (!state?.suggestion) return false
-        
-        if (pos >= state.from && pos < state.to) {
-          return acceptSuggestion(view)
-        }
-        
+
+      if (tr.docChanged && value.suggestion) {
+        return { ...initialState }
+      }
+
+      return value
+    }
+  },
+  props: {
+    handleKeyDown: (view, event) => {
+      const hasGhost = hasGhostText(view)
+
+      if (event.key === 'Tab' && hasGhost) {
+        event.preventDefault()
+        return acceptSuggestion(view)
+      }
+
+      if (event.key === 'Escape' && hasGhost) {
+        event.preventDefault()
+        return rejectSuggestion(view)
+      }
+
+      if (hasGhost && event.key !== 'Shift' && event.key !== 'Control' && event.key !== 'Alt' && event.key !== 'Meta') {
         clearGhostText(view)
-        return false
       }
+
+      return false
     },
-    view: () => ({
-      update: (view, prevState) => {
-        if (view.state.doc.eq(prevState.doc) && view.state.selection.eq(prevState.selection)) {
-          return
-        }
-        
-        const state = COPILOT_PLUGIN_KEY.getState(view.state)
-        if (state?.suggestion) {
-          return
-        }
-        
-        if (!view.state.doc.eq(prevState.doc)) {
-          const { from, to } = view.state.selection
-          if (from !== to) return
-          
-          const doc = view.state.doc
-          const prefix = doc.textBetween(0, from)
-          const suffix = doc.textBetween(to, doc.content.size)
-          
-          scheduleFetch(view, from, prefix, suffix)
-        }
+    handleClick: (view, pos) => {
+      const range = getGhostRange(view)
+      if (!range) return false
+
+      if (pos >= range.from && pos <= range.to) {
+        return acceptSuggestion(view)
       }
-    })
-  })
-})
+
+      clearGhostText(view)
+      return false
+    }
+  },
+  view: (view) => {
+    let activeView = view
+    let activeDom = view.dom
+    const runtime: CopilotRuntime = {
+      enabled: true,
+      debounceTimer: null,
+      abortController: null,
+      ctx
+    }
+    runtimeByView.set(view, runtime)
+
+    const onKeydownCapture = (event: KeyboardEvent) => {
+      if (!hasGhostText(activeView)) return
+
+      if (event.key === 'Tab') {
+        event.preventDefault()
+        event.stopPropagation()
+        event.stopImmediatePropagation?.()
+        acceptSuggestion(activeView)
+        return
+      }
+
+      if (event.key === 'Escape') {
+        event.preventDefault()
+        event.stopPropagation()
+        event.stopImmediatePropagation?.()
+        rejectSuggestion(activeView)
+      }
+    }
+
+    const onPointerDownCapture = (event: MouseEvent) => {
+      if (!hasGhostText(activeView)) return
+      const targetNode = event.target instanceof Node ? event.target : null
+      const target = targetNode instanceof Element ? targetNode : targetNode?.parentElement
+      if (!target) return
+
+      // Accept suggestion when user clicks any rendered ghost-text fragment.
+      if (target.closest('[data-copilot-ghost]')) {
+        event.preventDefault()
+        event.stopPropagation()
+        event.stopImmediatePropagation?.()
+        acceptSuggestion(activeView)
+      }
+    }
+
+    const bindDomListeners = (dom: HTMLElement) => {
+      dom.addEventListener('keydown', onKeydownCapture, true)
+      dom.addEventListener('mousedown', onPointerDownCapture, true)
+    }
+
+    const unbindDomListeners = (dom: HTMLElement) => {
+      dom.removeEventListener('keydown', onKeydownCapture, true)
+      dom.removeEventListener('mousedown', onPointerDownCapture, true)
+    }
+
+    bindDomListeners(activeDom)
+
+    return {
+      update: (nextView, prevState) => {
+        if (nextView.dom !== activeDom) {
+          unbindDomListeners(activeDom)
+          activeDom = nextView.dom
+          bindDomListeners(activeDom)
+        }
+
+        activeView = nextView
+        const docChanged = !nextView.state.doc.eq(prevState.doc)
+        const selectionChanged = !nextView.state.selection.eq(prevState.selection)
+
+        if (!docChanged && !selectionChanged) {
+          return
+        }
+
+        if (hasGhostText(nextView)) {
+          return
+        }
+
+        const { from, to } = nextView.state.selection
+        if (from !== to) {
+          clearRuntimeRequests(runtime)
+          return
+        }
+
+        const doc = nextView.state.doc
+        const prefix = doc.textBetween(0, from)
+        const suffix = doc.textBetween(to, doc.content.size)
+
+        scheduleFetch(nextView, runtime, from, prefix, suffix)
+      },
+      destroy: () => {
+        unbindDomListeners(activeDom)
+        clearRuntimeRequests(runtime)
+        runtimeByView.delete(view)
+      }
+    }
+  }
+}))
 
 export { COPILOT_PLUGIN_KEY }
 
-export function isCopilotEnabled(): boolean {
-  return enabled
+export function isCopilotEnabled(view: EditorView): boolean {
+  return runtimeByView.get(view)?.enabled ?? true
 }
 
-export function setCopilotEnabled(value: boolean): void {
-  enabled = value
-  
+export function setCopilotEnabled(view: EditorView, value: boolean): void {
+  const runtime = runtimeByView.get(view)
+  if (!runtime) return
+
+  runtime.enabled = value
   if (!value) {
-    if (debounceTimer) {
-      clearTimeout(debounceTimer)
-      debounceTimer = null
-    }
-    if (abortController) {
-      abortController.abort()
-      abortController = null
-    }
+    clearRuntimeRequests(runtime)
   }
 }
+
+export function checkSizeLimit(view: EditorView): { size: number; overLimit: boolean } {
+  const doc = view.state.doc
+  const imageFilenames = extractImageFilenames(doc)
+  const result = checkOcrSizeLimit(doc.content.size, imageFilenames)
+  return { size: result.size, overLimit: result.overLimit }
+}
+
+export { SIZE_LIMIT }
diff --git a/src/style.css b/src/style.css
index 6f0bdb8..bd53d5a 100644
--- a/src/style.css
+++ b/src/style.css
@@ -72,5 +72,6 @@ body {
   padding: 0;
   width: 100%;
   height: 100%;
-  overflow: hidden;
+  overflow-x: hidden;
+  overflow-y: auto;
 }
diff --git a/src/utils/ocrCache.js b/src/utils/ocrCache.js
new file mode 100644
index 0000000..e1ed4ed
--- /dev/null
+++ b/src/utils/ocrCache.js
@@ -0,0 +1,45 @@
+const SIZE_LIMIT = 64 * 1024
+
+const ocrCache = new Map()
+
+export function setOcrCache(filename, text) {
+  ocrCache.set(filename, text)
+}
+
+export function getOcrCache(filename) {
+  return ocrCache.get(filename) || ''
+}
+
+export function clearOcrCache(filename) {
+  ocrCache.delete(filename)
+}
+
+export function hasOcrCache(filename) {
+  return ocrCache.has(filename)
+}
+
+export function clearAllOcrCache() {
+  ocrCache.clear()
+}
+
+export function calculateOcrSize(imageFilenames) {
+  let total = 0
+  for (const name of imageFilenames) {
+    const text = ocrCache.get(name)
+    if (text) total += new Blob([text]).size
+  }
+  return total
+}
+
+export function checkSizeLimit(docTextSize, imageFilenames) {
+  const ocrSize = calculateOcrSize(imageFilenames)
+  const total = docTextSize + ocrSize
+  return {
+    size: total,
+    docSize: docTextSize,
+    ocrSize: ocrSize,
+    overLimit: total > SIZE_LIMIT
+  }
+}
+
+export const OCR_SIZE_LIMIT = SIZE_LIMIT
diff --git a/vite.config.js b/vite.config.js
index ebda1c7..526c009 100644
--- a/vite.config.js
+++ b/vite.config.js
@@ -7,6 +7,42 @@ export default defineConfig({
     host: true,
     port: 5173
   },
+  build: {
+    rollupOptions: {
+      output: {
+        manualChunks(id) {
+          if (!id.includes('node_modules')) return
+
+          const modulePath = id.split('node_modules/')[1]
+          const segments = modulePath.split('/')
+          const packageName = segments[0].startsWith('@')
+            ? `${segments[0]}/${segments[1]}`
+            : segments[0]
+
+          if (packageName.startsWith('@milkdown')) return 'milkdown'
+          if (packageName.startsWith('prosemirror')) return 'prosemirror'
+
+          if (packageName.startsWith('@codemirror')) {
+            const langMatch = modulePath.match(/@codemirror\/lang-([^/]+)/)
+            if (langMatch) return `cm-lang-${langMatch[1]}`
+            return `cm-${segments[1]}`
+          }
+
+          if (packageName === 'refractor') {
+            const langMatch = modulePath.match(/refractor\/lang\/([^./]+)/)
+            if (langMatch) return `refractor-lang-${langMatch[1]}`
+            return 'refractor-core'
+          }
+
+          if (packageName.startsWith('katex')) return 'katex'
+          if (packageName.startsWith('markdown-it')) return 'markdown'
+          if (packageName === 'vue' || packageName.startsWith('@vue')) return 'vue'
+
+          return `vendor-${packageName.replace('@', '').replace('/', '-')}`
+        }
+      }
+    }
+  },
   optimizeDeps: {
     include: [
       '@milkdown/crepe',