From 64cfa58376f1c4ff955b37de8230f4458dff929e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9Cydy0615=E2=80=9D?= <“allenyuan410@gmail.com”> Date: Sat, 14 Feb 2026 18:28:37 +0800 Subject: [PATCH] feat(editor): add image insertion with OCR support and size limit handling Add image button with dropdown menu for uploading local images or inserting from URL. Integrate VLM-based OCR to extract text context from images and include in AI suggestions. Implement document size limits to disable AI when exceeding threshold. Refactor copilot plugin with per-view runtime state and OCR context injection. Add OCR cache utility for managing image metadata. Add code splitting configuration for optimized bundle size. --- README.md | 144 +++++- backend/.env | 1 + backend/.env.example | 1 + backend/__pycache__/llm.cpython-313.pyc | Bin 2609 -> 5743 bytes backend/__pycache__/main.cpython-313.pyc | Bin 0 -> 6745 bytes backend/__pycache__/prompt.cpython-313.pyc | Bin 9003 -> 3755 bytes backend/llm.py | 136 ++++- backend/main.py | 109 +++- backend/prompt.py | 244 +++------ plans/image-button-plan.md | 269 ++++++++++ src/App.vue | 5 +- src/components/MilkdownEditor.vue | 509 +++++++++++++++++-- src/plugins/copilotPlugin.ts | 549 ++++++++++++++------- src/style.css | 3 +- src/utils/ocrCache.js | 45 ++ vite.config.js | 36 ++ 16 files changed, 1593 insertions(+), 458 deletions(-) create mode 100644 backend/__pycache__/main.cpython-313.pyc create mode 100644 plans/image-button-plan.md create mode 100644 src/utils/ocrCache.js diff --git a/README.md b/README.md index 9ace79e..b3e8097 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # LLM in Text - 智能写作助手 -基于 Vue3 和 FastAPI 的智能 Markdown 编辑器,集成大语言模型(LLM)实时补全建议功能。 +基于 Vue3 和 FastAPI 的智能 Markdown 编辑器,集成大语言模型(LLM)实时补全建议功能,提供类似 GitHub Copilot 的 Ghost Text 体验。 ## 功能特性 @@ -26,20 +26,24 @@ ## 技术架构 ```mermaid -flowchart LR - subgraph Frontend - A[Vue3] --> B[Milkdown Editor] - B --> C[ProseMirror Plugin] - C --> D[Ghost Text Mark] +flowchart TB + subgraph Frontend["前端 (Vue3 + Vite)"] + A[App.vue] --> B[MilkdownEditor.vue] + B --> C[Crepe Editor] + C --> D[ProseMirror] + D --> E[copilotPlugin.ts] + E --> F[copilotGhostMark] + E --> G[api.js] end - subgraph Backend - E[FastAPI] --> F[LLM API] - F --> G[Stream Response] + subgraph Backend["后端 (FastAPI + Python)"] + H[main.py
FastAPI Server] --> I[prompt.py
Prompt 构建] + H --> J[llm.py
Ollama 调用] + J --> K[Ollama API] end - D -->|SSE| E - G -->|text| D + G -->|POST /v1/completions
SSE 流式响应| H + K -->|LLM 响应| J ``` ## 项目结构 @@ -50,7 +54,9 @@ llm-in-text/ │ ├── components/ │ │ └── MilkdownEditor.vue # 主编辑器组件 │ ├── plugins/ -│ │ └── copilotPlugin.ts # ProseMirror AI 补全插件 +│ │ ├── copilotPlugin.ts # ProseMirror AI 补全插件 +│ │ ├── types.ts # 类型定义 +│ │ └── index.ts # 插件导出 │ ├── utils/ │ │ ├── api.js # API 调用封装 │ │ └── config.js # 配置文件 @@ -69,7 +75,7 @@ llm-in-text/ ### 环境要求 - Node.js 18+ - Python 3.8+ -- OpenAI API Key 或 Ollama 服务 +- Ollama 服务(或其他兼容 OpenAI API 的服务) ### 安装 @@ -87,9 +93,8 @@ pip install -r requirements.txt 在 `backend/.env` 中配置: ```env -OPENAI_API_KEY=your_api_key -OLLAMA_BASE_URL=http://localhost:11434/v1/ -OLLAMA_MODEL=gpt-4 +OLLAMA_MODEL=gpt-oss:20b +OLLAMA_HOST=http://localhost:11434 ``` ### 启动 @@ -129,7 +134,34 @@ data: {"done": true} ## 核心实现 -### ProseMirror Mark 系统 +### 后端设计 + +#### main.py - FastAPI 服务器 +- 定义 `/v1/completions` 端点 +- 使用 `StreamingResponse` 返回 SSE 流式响应 +- CORS 配置允许跨域请求 + +#### llm.py - LLM 调用封装 +- 使用 `ollama.AsyncClient` 异步调用 +- 支持 `think='high'` 思考模式 +- 返回 `content` 和 `thinking` 字段 + +#### prompt.py - Prompt 工程 +精心设计的 Prompt 模板,包含 7 条核心规则: + +| 规则 | 说明 | +|------|------| +| RULE #1 | 无缝连接 - 不重复 suffix 内容,避免"复读机"错误 | +| RULE #2 | 空白处理 - 避免双空格,正确对接标点 | +| RULE #3 | 缩进对齐 - 匹配当前缩进级别和类型 | +| RULE #4 | 列表维护 - 识别并继续任务列表、有序列表、无序列表 | +| RULE #5 | 语法闭合 - 自动闭合未完成的 Markdown 语法 | +| RULE #6 | 输出格式 - 仅输出续写文本,无解释无注释 | +| RULE #7 | 必须输出 - 始终提供有用的续写建议 | + +### 前端设计 + +#### ProseMirror Mark 系统 使用 ProseMirror 的 Mark 系统实现灰色建议文本: @@ -151,12 +183,80 @@ export const copilotGhostMark = $markSchema('copilot_ghost', () => ({ } ``` -### 交互处理 +#### copilotPlugin 核心逻辑 -- 点击灰色文本区域:接受建议(移除 mark,保留文本) -- 点击其他区域:拒绝建议(删除灰色文本) -- Tab 键:接受建议 -- Esc 键:拒绝建议 +```mermaid +flowchart LR + A[用户输入] --> B{文档变化?} + B -->|是| C[清除旧建议] + C --> D[防抖 500ms] + D --> E[发送 API 请求] + E --> F[收到建议] + F --> G[插入 Ghost Text] + + G --> H{用户操作} + H -->|Tab| I[接受建议
移除 mark] + H -->|Esc| J[拒绝建议
删除文本] + H -->|点击 Ghost| I + H -->|继续输入| J +``` + +#### 关键函数 + +| 函数 | 作用 | +|------|------| +| `scheduleFetch` | 防抖调度 API 请求 | +| `insertGhostText` | 插入带 mark 的建议文本 | +| `acceptSuggestion` | Tab 接受建议 | +| `rejectSuggestion` | Esc 拒绝建议 | +| `clearGhostText` | 清除当前建议 | + +### 数据流 + +```mermaid +sequenceDiagram + participant U as 用户 + participant E as Editor (ProseMirror) + participant P as copilotPlugin + participant A as api.js + participant B as Backend + participant L as LLM + + U->>E: 输入文本 + E->>P: view.update() + P->>P: 清除旧建议 + P->>P: 防抖 500ms + P->>A: fetchSuggestion(prefix, suffix) + A->>B: POST /v1/completions + B->>B: build_prompt() + B->>L: ollama.chat() + L-->>B: {content, thinking} + B-->>A: SSE stream + A-->>P: suggestion text + P->>E: insertGhostText() + E-->>U: 显示灰色建议 + + alt Tab 键 + U->>P: Tab + P->>E: acceptSuggestion() + E-->>U: 建议变为正常文本 + else Esc 键 + U->>P: Esc + P->>E: rejectSuggestion() + E-->>U: 建议消失 + else 继续输入 + U->>E: 输入其他字符 + E->>P: handleKeyDown() + P->>E: clearGhostText() + end +``` + +## 设计亮点 + +1. **前后端分离**:前端只负责渲染和数据回传,后端负责 LLM 调用、Prompt 构建和数据解析 +2. **低延迟优化**:防抖机制 (500ms) + SSE 流式响应 + AbortController 取消过期请求 +3. **ProseMirror Mark 系统**:与编辑器状态完美集成,支持 Undo/Redo +4. **多种交互方式**:Tab/Esc/点击/输入,用户体验友好 ## 许可证 diff --git a/backend/.env b/backend/.env index 1431614..37307ec 100644 --- a/backend/.env +++ b/backend/.env @@ -1,3 +1,4 @@ OPENAI_API_KEY=ollama OLLAMA_HOST=http://192.168.0.120:11434 OLLAMA_MODEL=gpt-oss:20b +VLM_MODEL=qwen3-vl:30b diff --git a/backend/.env.example b/backend/.env.example index 7877999..c74dfba 100644 --- a/backend/.env.example +++ b/backend/.env.example @@ -1,3 +1,4 @@ OPENAI_API_KEY=ollama OLLAMA_BASE_URL=http://192.168.0.120:11434/v1/ OLLAMA_MODEL=gpt-oss:120b +VLM_MODEL=qwen3-vl:30b diff --git a/backend/__pycache__/llm.cpython-313.pyc b/backend/__pycache__/llm.cpython-313.pyc index e6c7c38000e0e456acced6f75517708c4fc7da02..bd49ef595a624405d490efee5b27a0a4201ba710 100644 GIT binary patch literal 5743 zcmb_gU2qfE6~3$0&+5m*mTh5duopW93y}N+4%o4w1`HGVCnIwDs^$hm*9yxqOcfLA4QBvS}nA9AZL{`QEpXB3`VCa6y&0c*ryVwNXC6tD_$ z(Pr5bM8!ocr~;l7K&zM%WnN;{1Qs)z5ojzVl$0zkK;VpJ=t>q1LU0md96X0rkyVM; z1aK9CqGhoRYXwHgyID~sTGG5W=yFYsUYZ(o4H++s!wfSdsLBjkZqR4?orM^+v)_e_ ztb!!U0vm^^T~%K)RvcbJ#AG4R&WbY-0QQk-A9uOVk6wkE5VAYEFXuB0^=oNj)w>;@z;ffrglsGJoJO6@sY{1k&&~b;CWI>LwG<) z;^kD9PYGRW03!d0>xWn&lnSwn>LIQG75jp;FV$j3)idaa=6i3OHXjvu#=x5?j7a0MC z=MajZa0%gxeZ{&gaq?*rVQaN~@qU;x$0b203wSJ3QHLqFy(Pd{s-JicWt;JqOTHtP z7f{s1dhj=DFanW(Vm@Prw>9M5qlL5dl%-`lLWj4Lv4SV1SZw17^^{S>|EG^C`5&-N z>PUB_1yzL|qBC#?-Gs_kVcw-J{EcD|o&BK)#+BWA3|sMU4-^VF+PTnBlXMmYOljUrq!pI<4_3 z-K`1fjDUH-CPBAnFrc%Bn_&oUfvSAC$kLJx_ZD0ae*{k*u(3ZtQFttt?T$|+xsd5- z`JjZ$N5LG6;D|)1C}TcXwsy31E77pGV^#MI;^B~z8C2MVzh}E>2g&Aj3-*&bUIta1ioI!vi z+4hW?yCYP@7Txuv%`jPo$4XT^?51y3eGap)5}}Aayyr0`@MtD|WuWpSROM&elOs4! zqL@)! z1-w}@CD&KC-}>wOtLz0y;?q1ka`7Dd@S4^AF2xK-Tw5^&> zDIVpXg+N6kOjVmV$AB`D#()Fy`C^!0l4Z~h)aq7EObfaPFl8o|P_jVXShq_`Dg~`o zlp$pyNPr1bLy9ZNx;-I@!0@^if~)Dy(d!Ap2!ZYt%v%F~K{L3oGqIS=r-fKdw?W9n zjPAI`V;M*!z~J3Pa~Zsd0FO0H{$l7ZLEdRRh7te0^{CQu;U0=t#uXELw^964=f2s>C#Pao@vW5ZN!(eH$9{oyIE862e{_e@QvYHCvKeBV7#}yH@s^*Pu@B8 zIWu;DXKT*gzh>=!FaR!mv*kmc#)pUo|NYH*|NguF{pS)g2dwVL^II!e`@p+~x z$26_&`p%t>&zW$6so8YEfPbmI5Vki*s8JjBsr7}^HuPy1b(*$*8nBZ4;7LEc_>89D z;WOLtsD=KWg@zv8Ev6wch{fkMLDd;El}wZp3s8v=NrC=b7P3@f+_o%K;frT*5YoX$Yidds9 z#hF18S+*D^C+o3N#XC0Bv#P!_KUE?Xv4yu`mM6Ffu)oU7y^ooNN6u~9V^PFru+Wcz zh0322o|}c97NK$`Q1O9tLRL`Pk590WXL8|q6#my=i7E;FAK0X7mlHBE_c=f_ZUs{T z8k0B`lV7_bte!ELBz4#*>nAV?3*HCS!oF=RZpRU?JR?$<2 zubf(I=-R9^;NMB$-)X>q>+`{X^cmP+^lCfo2jo8u{}vV}9}IxyZ?+nq1NBGl41Uf` zK92f`pgtWSNPoyc{h)#Ry(fL!qCWZJ#b-4ouv51vsDw%|_k97BlIxi+vm2@WvjvKj0anHR(M%%P3e6Z4KNFR!cg>42t z?rE50u?FSqHiLkSP4EfAocECX9`fEp zHGf2{e?so>*WBMxcWeIb`Q_HtJ?lG;E_&c^<1O)Rak*w~|EYEF=|y+J05N(ckakLTkbkrR$6k-;G(_Ytj#-{?>d{8``4X&7wwzYoz0zV$iLa=q4unF zuF|XAD)Wo(Ed-C7t*Gy%wO6M8h7M9ME+;lgvwGNQe&1*`YnQM5o%B9xb0_f+@?=c*-@2Xi{BMAXH4P-O6r}V+Lbl&$xGnG`Rag zS1T>bPv}Zf-F82awyM}aJ|pe^WRs8GKYK%kJE0QQe(^^{S6ykl%APy66PIkYSDJI~ zdEE25=iE7QI&BEn$6x*?^^FCge-NU&fHv333Wd;pBq52KLAZ=5Oqn!|X~6Uh^K%xn zB8s5}(_Ym?8FwIwk=P-+OkvAUYH^a~g_bpDU8wu99YRUgA&7>o*UJdSAct&jS^+K@N_X)rAJ=svXFdaGf#BDK5RxSYx2F z?I1C_Mkwr%+%c!*iLn5wK^lTUgRE57f>v4VN>Tyu5jk-qKpC#Ewyq?mWktquUB!lD zes(rI7has3pPZV_Ig(kuPt~;1{!15c8t#hhT3DJ|j4aINT2^#DI~of0k6a4&55Ez- z7zDl1{{F#%!5ka9-5)yo;ra9DIR>^z|AC!huUNP~FTI^R6P=x%dsm!Kr{kHpxT0$M zsF-t>nRDuroE{Y+K$Zv$OBSn{EMP3_YgkECuE)%q2gty^pZp3yClaBJqA%nU6=h5< z01;Fz>yL}sR;yoA`Y2mHxnQmCUg9Y6Yc7e2Ac+mqAxeVglB^ORq$Od5eG}Sv1-##; z?lU(y2U4h-xNOx}L?qc2IzoAn_q09}VI&8+ylNXRL56b;kV%CvIbV@>ot7$WgqB== zPJ|S?#=kp4yO89Guuu#3Wh~JbqqX3uq*sGTD@NFhR8{{ndmG)SL^QXE5RYzP_+qx& zYSs}iwe6fl?I`N1nzW+$dk_~Vd804JFv3->laq?2CgwAdQ)3|6B{U-_)`)chs&aY_ zRTv`rzggA)FaNLR>4zIqxGE#5KrkuKaoz!iXKd?#Pwh>Xs~OVj03d6Vl^!r)`Y6) zAb$I312Ri~_9#j;3LZ?4J-y%)$hHsea^#2{!xRG7T6hP7f3yMa0)^Okc| za$i|fZizV>((5jBB-5dofDum1y4)-3svb{^38JWhnso|e6~CPR3TbdM;Roynm&GYX zHyEg!VJX+hU`dfM8Bv2>)l|jcmew*^%@C&UBxEx)gMoAmLC5igd^4W7WeBF3Mfpx5 zz+=+rm~@R{Hv?HrsB7SZGYK;*gOUxw3}G>&C2uYf( zlul>*Qc9mL-_b+FrK~IwUNF0EILozNRI{=YPvHO0xQfL@0ooPDsca;1c z&p4*Rbu6If&SS(k+rCG<*S2Z-&Wi+B!E<$w|Jh+<)297vU;Fon6E1Icm)!MR@9ew% zMYn%@Y9~YnvU-XDu_5)8t-mx9sp4;{A zPVbHs8prlLR}UInK3VULn~7lrnrCym2jd4`{Lg7at1;3q%)oy&}1X!q9cy9L+8 zSN!C^4(#@Wj=#&-WDgo_`;y$W?4YiVTOM{YWPeT|`~f?_ztC3*9H)T)h;qaJk%s~^kLqlbBg`XT zxPEewc^u+^^>~m2*5eTd_5rs+s~Tb@yD+L2e%VIy5x`O1R%oXCt&bbV4T;|@>M(lBpEyH zuK&xu7&d@02K`&x zN4CweB0Bpm;tISkKa(FTSORWPwv1Q4TeOfVAEuM)rxuVx0 zm%Q1f91#f4Jp&iH{a{^NBfdj{)1heV;0~xi`s3l8L;S5&=sWT@3gV(^0e_P#9dJbp zq;G~?QMTnIpSlv?yqSG7^XARl_kJ@QH8mat5C7F|?UOo${+(3pN2o9xP6tAFkc?zz z5=}6ekqGaYbWE_Ar93;yP4JkXaAGH|bCa$K0SlDpC*9Z$^8BO+duZv!URsJ+gwi=# zGf|6csjO?VZlWI7Px!D;LJ@Jg$5y3U21)y#YTf2!L3R&0Xs_4pwrUwfvS*Mf*Ku=% z38KNuOYD#Mr}tMSZ7#yf-u;X$QcAd*AznhQ2u7+6Ah|9g46>E+T4`%NwEDKSw#nUc zLnI(KM%v}3NMOJzHwVx)?izzTWIwc%HcoCKZFm>xFk;VKE#+2QXSprXI^d84M0!i} zu2=MG-`1-QdUZV4ONw;MyM|{GQd$t%M}9JhgOT9$>FT^T2e$UhopP5f4Z%Jq_yivc zJ|!dR@V4$YJ?p6RS!}(-YIGkxA=@YKar_`6;^w zo!loCLwzS5SCdmv6R%{oWIUR|=~Tvq0lYCKnT)2BNhPINBCRYWQ_*w`=MMC~t{5>> zOQ|74>U}+_E~rUesnjY=AG}DZhM^=t2!`XSl;xaHO zYY9y^EMK`A!)jdBO-)G}wnR!bucqT$)vKx!SFsVZO-0IzKcJ1HP`QI52s1Lm4w=Dh zFN$yxUUs|&Zz#tSPUf(a)_7WTVpqZy5-i^sSRtvJT3Vl0f0$Jb^XV>-wwxKP&TH>m zP9r-{N+G4-_u}d6y5&hKdLj#}JsbZRO~DXCG)hbnjar^)G?k8LlY|$e(H~|NYAvYM zMWgc?Hq4}^t9lw5Si{7mRnhgdsgUW7XcV`RshguwgEYmWifLl)O4d}#y8yh~fFH8z zt|+nhR6V{wrD%G8W)U|*D z6$^6If;@Indpc>&g@uF=a$BDBW7D=F$!r#{rNCA>H!b?FWfGP!uO(GtF^f~OX4(>L zgVH5Vh1|HEsM$dXSrG0dq>B)V5E63mUP8JFA$BP*Y&YS0Z9--@$VgxdZ|%*AQd8HB z@ugUS>nb$`2=@mrQ0_;%Se&W8Z-Vn`Db2K;ST(a)k2&Ct{cyk#uiLl=5ylRF zagc+U%1l?Hp529%vO{JEE92;nIooeh*kMPGFf%+15dvhwjvn~QD9mu+uYJ(F2Zi`4 z3t5aO=ZNSzw!gn0J~0kb&IZhfZlF?)@AjqT>7ppDiqcBcO12>O-FSP=y`#rf6ebU)^X)&FZ zuBWrfxRf@ps`$EQs8YJzi3;wsYl>t*xJ;^&kpU}0|Gj3Z09+%>2 zn8O4eGOA)qa`^P=v$Opk>Te+ilgwa{Wml}cD_Rb#pnFV>f^tzhXT?DH4Ui<6&o}_& z26`eimOO`_v5b4*i6``ocewYiJCXMs^UTG%&oS!)@*aKW=G;fuStswLZknsxzi@DW zY%5$D#0_Y~dd~VaF#b*WZH$A%UUu&W$Gn5)w)SVP;))1O6Ni4`sitb%IU#9*GwMty znseI1({7uqma-$v%z5p47)^^dUoBtyU|4&W=X%ywd)VBod%{fhwp6v8Yqa_As)@|W zh(lZYZ14Y_^dZT;+JDeho?k+9&EKpE=(of8JFm(HK@dg&PL!Y7?plC#haIv@7KYff z05Yz^x3|FDVW)Tz>1>#l-8_iCY{8UU_UPP*G}-=e+mIqkpM(;LD} zn5%m1jQeG;nQ_a!ZH+-M}MxU^&`qjxza`O-*0Rc^Y3BR9o zqdBP}hFnO2ZPA5oD+LR99O##>rit>otBLy%30`GFFPlt}56V>5WTe%v7zD1V7Si z*m9<#B}ZQCHD0p?3BaQbn1o1e>Fck*e)>L{PUy>E01kLDF;AcWd=s)CYbpuLYeINc zu>xsYwOp~ZZUTn1Tov)$VX9}P1|gheb2To}kStZC_elV=unC!f2;jw>tB;lk6F>hM zdH(n&#Q}Knpv6eJ+IfJh1bFq)q>&q>Gtdq&j+RW82j54MSB#8VPBCR^$?lvRr`@Vi z9HutX$pz5ONM|8)5HnTutJ=?QsN`{Ksu zCUUxR%%B5|?F{fqvWM9>xCL~FCurtddvh}Xs@@4i-NreUbH)`@8J6$~m^kO~cq}fS z*3}uD!)EgDpl2CQB!K$?zjtEdjt5)kjo=n3*#W>M#3q?uq zQ6PVQ3h5OP-ro=WV)*C7D~tDE%kLR3v>(d{jz2h+Zx}6zqt&+y_skW&&_0+C9JwFQ zHynE`9{X>DeEl~^(J0I0d;}Ekhf|Mk_5omrV-}pBJSjD;m||Qgv9NQY!b?{Pg){%^9q!)Yh@V>nE~LbTHZc#zX7fYP==0gSX z@JsD-zWHE5JhWLSxILQHN&a)t1?!Z+6M~q@ne0+iJt^Vnb*JFS$y-z>>36=fC~Itq*Sh=+=+c#D?2b zx2E##7xL4Q$Knsx+K)c)7uw%iI=9xn?_T6L9gi;N-wfwt^Q+y7VtZogT*)6?8Cv!C z75#_rH!PhgHSbzF{iHfx$=^mAdzVgcjXa&7x%^m+er22LT`*NiY@o*Z>vLqq7w^Uj zVsO*zbbJ0(?{FXA6i|KlFI)d1kUu)PT6ZoloO?Ee8vcTLR*O2WGJoFeM`H5^a+K{~ z6k1n>)_mLWgNet CeZ%Ee#D?sxxcKHoF=o5QPhXMQi7`JZ*3NdK=v0>{nZc#!$0 zG5;9}Jv`2w>EIrYj0T|cyB6k5C-=M7lPpv|ZD&C0(+-C4os>$R@h$oUKC@W6U1d z$pSLpU&b-V0K>G}YpGzEZ$o3w*{16FRV*pHwqckpO#s8t>&Oa*;YK-%VW4~g!+54J zIrrvzwqiE;kYry*ki=_WL;#-)J||^lV==hnl1M(-%F7h3gdFALH-XBzmghQE(*QmT zZ}9{nlR!ds zLYzQCJdNl$47}x~_bt&Yi*R{_FA@>*RT?BI(CA!_&2F$jluBX+-e^Q*6$r_puMEjT zH6-VoARrXSXIIB(^Ouz3rE9B~u00msTl04m{d+$1?^(J2&ma8qgMxo>$-CCFdu6hC zaCG(HXrbj~(RUJpX*pQsn~px%`$_E4o`2n!pLw@%`r>0@w$$2L+;@Bx{_95a!pJio zHH<+_a_$t94&8^Cbn;tb(ivFQkAtTK<{w9Qk2%qA1!j!ne(N3v{vpdi`$LW)ywh`v zXCKz@J%!kZ!wlh%@$mK&!~_2c&jL>_{K(B)G@5fA=%;_$TL9+?q`8_lOpD9F6+(!4 zO7=6_?+5cVAM8Y9l4<%C+AyA*8H#S;qEUnLL35z>Bb^5&JPN?QN ziZ-YVlH1yhRjb6~(W-jo>qT;sC}R~?)6Fshd4o&>s|EN$Qabe^dU;PS+Gd*7{nCGW_8@hszj`x1|@=nj154%{6qxVvvS|IJx*Q@g=FL2MBuNvL9yv!nfB#2!nZ#YIi`F0@H&Fxu0e}? zV}ByG^;#F>ya!EC+&4B!vEBsf^264@Y|VSx3rxqlqmLP9^1Vkk5Mef0bf%kVWZ6Hz-x#n`ydMo0@rCfn+`yABgydO^fcygJ&8@kt1;P$WcsB(e% Ee=+*hLjV8( literal 0 HcmV?d00001 diff --git a/backend/__pycache__/prompt.cpython-313.pyc b/backend/__pycache__/prompt.cpython-313.pyc index 4b8e849448cc06eae52f1c10345c28a3732d1dbb..430125b624f374ed7578607e66e856b75f0589c9 100644 GIT binary patch literal 3755 zcmZu!%WoUU8J{I7ilN?C{6y=FB?qlyk%}$5MP*g7;&)woGsP^;Z5XP*Ixsc#n#4{St-jUJ zcclF)&zSvmgt69uBiw2zs&d`7T4^P9s9e*a)tx7WK8fF(@6kMACHCvigoRaAV}`j# ziLD&o(V4&0>Brfv^NhKPQeuV9dFSb_>YbWltduAwE@PdQU~Gxw+gKrc-+4-jOC714 zEzL4Ez~I86>*w+< z_L}2_sZ^^k(8~5({dOQ6H>|hP!uO?XwNe|N?M|f>`Ki?}9LL+1Rx4rFT76r>31vVN z98YVkEX+-8nemlZ_jTTHw2rO^!nL)%E7uRjK^?&svLjeWv&X(4ICd~{@?dy$@8-eq z$!~{0crpCJH^XPXOdJdhJ(_zs_h|m%{QiZf$NoBhFgX6G@UXCd;hVwJKlHJYiFTR| zjXb*f@aE&2F9(0IU)(P@2hZ$XKNuMOcHs2m&zh;zZynCSVgepd?fxlyS4m$e z$zv5}Vhwo3Q@rk3(%~k&sHsk2m|__$pxI+TW?5$VVC?wQ zvA;~c7@PbujrVpH z7m&6r=)Zh*1zKuh2`z9QzqPt_humD3E`$Z4S>qz$^Jj87tenf|^SRstX}DUGK@^X8 zA%%$y+_D3^49z;&M7B5$T3(Q_1*3jr8Y!}^Y%Qlb+$ig|en%#hh;4nlFaD0SpiPkq z*;_(cT!oGd=CbGWyeLH-?*U?KyK`AkVaMome7UrGx3tR7bt1&;VW6P}GS?NjiNK}} zG7^~`>215A7Y5w&%#e)BXD{S=!GkY|v#`$xOOVSWA=f-!SYC}r2_S^G2tdLi0XP@@ zEmY7&PoM?GveE;~Y@UBAz{(~N7RNu_B}2zF8G5n8ISo8X7w zQ7HL#4VVcUt`_8pFG5?%fZOoV#e4^}xWBe+;>ifFT_2H-ohU}<_@Qg+Fh(vSTCp7m zBD!#!$WKi3iD_ysQ!_Uao8m>bcFx(LC{oO{hx?9)ZWMjM2`1te4RL-PT34FD1;{v` zy_BciL_%*N+dQK4KsOv2X}Kgc!0sYif~$y$ic;fq>6mdQ~!`$%3BB`l8TQ7p>#7a4(Cs`yy8D79)tb z+9W)@4Yez!g@w4-%F4E?(BZd8HN@EL#WSX$$jId=<~WupllU#lhnNH410<2$@!{IY zTfsEET$T>{FG5dC^gx11x>qFffynxr1pMhB--8nxL}j-t-Nf#u7?K1b23x(&X6PHU zF=Awax=DLkiP9(xBBl$^I04k9vbwowS9?&3T$S!!>aGn=cL}Wn^7;sA0 z71`aR*|)-bwr6+8G2FGYYv@!ms#>FxN727OQ~httq_{X94MgQ9b#7}&fdE`hbrThz z-OrfMsV1d5IiNxZ7^NpJvZdwKkK&D8tfP0oIG z5TEg)1b}JJq!LzUHe$ZSTy#FG`46w vn~eLfK75{hUVNVU`wT5wG=4aC#V}6okGC21PtCZ0o{al{7JrP!Bc=WaFr+hX literal 9003 zcmd^F+ixRR8MkxmV%qJE3RoUE%~r$?jzhZJ7TmCv<8>O-*p6bGY_zGGvFF&HW<2Ab znc2h(6$|vT7269{TP+o;s8pdMp_W&+RQwC@Rs|uUZL`~bi?o6#9{7Fd%-EB}3*|Xc ze3|h%-|c+g@4MJH@39*KNP7s20{|K?^2Z{CcQBjxB55kap*Pegw( zB!*a|JiIns9>HUDZTRF^dF)u^=@Bu~j3#coL7R(-==NyE>zI6dtRr~C*xDX*yoP)g z-1UT2AM%;d%P1bhc-(B@>CH$r8pkXBX-6ZtDo1I7HFRcZieBl?P}A;iHlAXmte7V<$$f(dy`Olql@|d&s|hAd`#|>F1@w$+AG)Je(u_< zuU&ud%I6=xeC?g*c0RhY^Ww+X-u%O-7k(D|?BhRQf8+gYmtVR5?gv=6bMb{6Z@+Q< z_rKk_bOrCPU3&4ekKViS^Y`(7=k-fFAG~wpSI=V^Q<_gJ#o5ja&+mNjMrP99yuC?SO-ArDCzGZ~XL-{KuB7i@EY) zQdnxtj?bi7IhQFw`7+Cvip5;Ek}nlmyih7vSavyI$!9Z*iC9c}QQfxSd!9z_!3tHZ zaJbesx#O5!RuhKaR9>aL8&0b;<>4t-X~Fx*0pWC#cD}GwTCQY@6(&5B!`UkMWPInn zmp}jTqiesuGS1>lnh>^|U^%=9+Pm4}7V9D+1n+P-Es4Vs0B#Exnkl%ZcM&EC3$U^z zt3m8nO3RSEP0_Q#CA~VrtBngtvCN!f2-Dtc_s?RYM0dyZsk@w)3rFa?w! z#7^6`-Im$Kref}gx#j;`N^HzG0=j`Z4b$Fqn56+2H4|2r5w18UnJ-tdR9VOuA7^pi z?zmm%xB>u?068+L;Erdy%xT%4spCr10UgY?ILz1LLfQ`?kfggtLz=_l=Qs=w;|k$* zB(Auy3?hUbe=yBf7V?!`c`1|4v2U@ZYB5`>X2@xzllVpgW;HyLx8CCQ4S5uc8CbpE z(u7tAVP^MLg`3Rh=}h)~>YEh?=9c3D0|9>xvb=8BH=y8v!|I!lTRQy3a9hmjXmy^% zCI&OXHkxorNeG}0wZDkP;kW1L;IZ1PnOyC4$dY}^y3HL)3M@h#Le8$3`0Oggd^mlhWtM00J>#0C|4=6xk4~UrjTQ9x5G*J@P@c= zz0xu=YRySd(2#OH>(*_Z_{~eg4uww5gu6OPkf|g1z#vRd>vV+O5eAaC1P zO>&s=Q+o-J(>&wZsM7ZAm$#r002`YM8Za6UDU+lsIl`;qGxD@T9s(RfGzNb15YqlC zBKi!=E|$vGWq8S)0)QsB2olK8{QR0^*@ay83Ca#|BEkqfaEkC#n8@iuhAqZTl`~TW zm5#08PWqBuM?2dJo9q8HwpA#pnh4U-zSan5Uo25WW@@O+08xNx+yLUp zx=~)VxvNe2fhs=>?2XheK%M+4l*SdwPD>y~I6w|mrsE3;Ho&}e(2!1P1v&W%*eC!w zEJA{$*a6M-IH;|LY8-rISiS*I=a*6A!&E&>diVrv0)qw+@kkn}ekHS74i2hZD&+h^ zvydsC&QwavtEw|ly?|zJYF(sWbsUb3YQ&F~6wArV)c*+LP%Vaf*52fL3eF?{4%FT` z6k|T>1nA&!-gbIgE0pn1%<-B{4#3rdYFTm*+7(y?kx!-!C&9L%PdRWRZc&XGGA61i z|MuXGgs(k5H6->8R41@#65W6jgIHPAU7L0j$&Zdob}OKl+tR27hp8Iu9D-lpBPrSr z3cHLCO_IhY5)$PZ}nvTsbE$8O*XHHe%hiJ&rLe>Oj+J?Qw zW~coRW3%PzJgs`#Yj+S$)U{IBncDd-;H;UNo%R>R6yB%VIOYnWTjeSsrf59$giF!lQf zXvW6-;>o>CEe)|h0ro2`gtTm9*lE>B_gO+^383E}VB?ub2k@MEG{m&On{M}XJ_)Az zey74RrqQ%0PxkpXsA;8{5+rlu3HGBki!uVBCGHB3_ioTl;$mZ91Xztvw2XQ{8gfc!NJeNh=*AZ7&W`U}ZQ%O4{Eb!VZLIy@d+I>&xyo<;8OVEuabl&5^EM1^i}G2OUM#5 zGo#6->VG9dU~wM|W+%b#x(av}RaCdp+mNLL3Hx|Lny2jm2js)y?aD8TqRv=n<=+z&dO14`*Th$86?t0${R7W?K54sUG z{**(tj+%k`oYW!$02$W7KnWAMJrg0dIv+OJt;0RpD@eKN@W&2S6I3j%;M+&)gDN>z zS;%21w76JWp)a*6xig0=0E5;icK*ItKb`e@o!I$%aXaX1f)qhNs-W-vI*Bncf?p2^ z`dP$wtU(3cQ}ngk_8pj{A?%(~!PC1`K}kQwKx%Rw`h5qL#EfmHrtx)%aHdgua;xiU z*0gE1rwmIDwWeztM);OKt@fol-R;|Jo?+^sPlUvsAVh*VI-V{>{&jog$k9*k`^M$W zFIO+dKDqm=f4%#Gi(`1b9KAgB+Odmc|M<$W3x!YUX?J96=$`0qXEZW&=Y>0WBO`Z3 zcaQk*hwnPF+a3MM(9thnnBScni`;(CtFtf7{^HbMM!tR%d?Ja_?IGLQ9>aGxKpZiQ zTie61ixlT~5pfr82>fs)vVCN>ZR_~r`INX1m-K@RCxNHk;b=7ax5$_N9{JktNNG5F W?DD<45qkYiUA?|v|EIi`r2Yfy-vVs_ diff --git a/backend/llm.py b/backend/llm.py index 0385aa9..2a4f236 100644 --- a/backend/llm.py +++ b/backend/llm.py @@ -1,4 +1,6 @@ import os +import time +import logging import ollama from dotenv import load_dotenv @@ -6,27 +8,40 @@ load_dotenv() OLLAMA_MODEL = os.getenv('OLLAMA_MODEL', 'gpt-oss:20b') OLLAMA_HOST = os.getenv('OLLAMA_HOST', 'http://192.168.0.120:11434') +VLM_MODEL = os.getenv('VLM_MODEL', 'qwen3-vl:30b') client = ollama.AsyncClient(host=OLLAMA_HOST) +logger = logging.getLogger("llm") -async def call_ollama(prompt: str) -> dict: - """ - 调用 Ollama API 并返回 content 和 thinking。 - """ - response = await client.chat( - model=OLLAMA_MODEL, - messages=[{'role': 'user', 'content': prompt}], - stream=False, - options={ - 'temperature': 0.7, - 'repeat_penalty': 1.1, - }, - think='high' - ) - +VLM_OCR_CONTEXT_PROMPT = """You are an OCR and visual-context extractor for markdown writing assistance. + +Your output will be embedded inside an HTML comment as hidden context for a text-completion model. + +Requirements: +- Keep output compact: maximum 120 words. +- Use plain text only (no markdown code fences). +- Never output . +- Do not invent unreadable text; mark uncertain characters with ?. +- Preserve original script for recognized text (do not forcibly translate). + +Return exactly this format: + +TEXT: + + +KEY_DETAILS: +- <3-5 short factual bullets about relevant objects/layout> + +LANGUAGE: + + +SUMMARY: +""" + +def _extract_message(response) -> tuple[str, str]: content = "" thinking = "" - + if hasattr(response, 'message') and response.message: content = response.message.content or "" thinking = getattr(response.message, 'thinking', '') or "" @@ -34,5 +49,92 @@ async def call_ollama(prompt: str) -> dict: msg = response.get('message', {}) content = msg.get('content', '') or "" thinking = msg.get('thinking', '') or "" - + + return content, thinking + + +async def call_ollama(prompt: str, *, tag: str = "default", temperature: float = 0.7) -> dict: + """ + 调用 Ollama API 并返回 content 和 thinking。 + """ + start = time.perf_counter() + logger.info( + "[LLM][%s] request model=%s host=%s prompt_chars=%d temp=%.2f", + tag, + OLLAMA_MODEL, + OLLAMA_HOST, + len(prompt), + temperature, + ) + + try: + response = await client.chat( + model=OLLAMA_MODEL, + messages=[{'role': 'user', 'content': prompt}], + stream=False, + options={ + 'temperature': temperature, + 'repeat_penalty': 1.1, + }, + ) + except Exception: + elapsed_ms = (time.perf_counter() - start) * 1000 + logger.exception("[LLM][%s] request failed after %.1fms", tag, elapsed_ms) + raise + + content, thinking = _extract_message(response) + elapsed_ms = (time.perf_counter() - start) * 1000 + logger.info( + "[LLM][%s] response in %.1fms response_type=%s content_chars=%d thinking_chars=%d", + tag, + elapsed_ms, + type(response).__name__, + len(content), + len(thinking), + ) + + if not content.strip(): + logger.warning("[LLM][%s] empty content returned by model", tag) + return {"content": content, "thinking": thinking} + +async def call_vlm_ocr(image_bytes: bytes, language: str = 'auto') -> str: + start = time.perf_counter() + logger.info( + "[VLM][ocr] request model=%s host=%s image_bytes=%d language=%s", + VLM_MODEL, + OLLAMA_HOST, + len(image_bytes), + language, + ) + + try: + response = await client.chat( + model=VLM_MODEL, + messages=[{ + 'role': 'user', + 'content': VLM_OCR_CONTEXT_PROMPT, + 'images': [image_bytes] + }], + stream=False, + options={'temperature': 0.3} + ) + except Exception: + elapsed_ms = (time.perf_counter() - start) * 1000 + logger.exception("[VLM][ocr] request failed after %.1fms", elapsed_ms) + raise + + content, thinking = _extract_message(response) + elapsed_ms = (time.perf_counter() - start) * 1000 + logger.info( + "[VLM][ocr] response in %.1fms response_type=%s content_chars=%d thinking_chars=%d", + elapsed_ms, + type(response).__name__, + len(content), + len(thinking), + ) + + if not content.strip(): + logger.warning("[VLM][ocr] empty content returned by model") + + return content diff --git a/backend/main.py b/backend/main.py index 018c4d4..72b9ba4 100644 --- a/backend/main.py +++ b/backend/main.py @@ -3,9 +3,18 @@ from fastapi.middleware.cors import CORSMiddleware from fastapi.responses import StreamingResponse, JSONResponse from pydantic import BaseModel import json +import base64 +import uuid +import logging from prompt import build_prompt -from llm import call_ollama +from llm import call_ollama, call_vlm_ocr + +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s %(levelname)s %(name)s - %(message)s", +) +logger = logging.getLogger("api") app = FastAPI() @@ -22,24 +31,100 @@ class CompletionRequest(BaseModel): suffix: str languageId: str = 'markdown' +class OCRRequest(BaseModel): + image: str + filename: str = "image.jpg" + language: str = 'auto' + + +def _preview(text: str, limit: int = 80) -> str: + value = (text or "").replace("\n", "\\n") + if len(value) <= limit: + return value + return value[:limit] + "..." + + +def _build_force_non_empty_prompt(base_prompt: str) -> str: + return ( + base_prompt + + "\n\nStrict override for this request:\n" + + "- Output must be non-empty.\n" + + "- If you would otherwise output empty, output a single space.\n" + + "- Keep it short and do not repeat SUFFIX.\n" + ) + + @app.post("/v1/completions") async def create_completion(request: CompletionRequest): + request_id = str(uuid.uuid4())[:8] try: - prompt = build_prompt(request.prefix, request.suffix) - result = await call_ollama(prompt) - - content = result["content"] - + logger.info( + "[%s] /v1/completions prefix_chars=%d suffix_chars=%d lang=%s prefix_tail='%s' suffix_head='%s'", + request_id, + len(request.prefix or ""), + len(request.suffix or ""), + request.languageId, + _preview((request.prefix or "")[-120:]), + _preview((request.suffix or "")[:120]), + ) + prompt = build_prompt(request.prefix, request.suffix, request.languageId) + result = await call_ollama(prompt, tag=f"{request_id}-primary", temperature=0.7) + + content = result["content"] or "" + source = "primary" + if not content.strip(): + logger.warning("[%s] primary returned empty content, starting retry", request_id) + retry_prompt = _build_force_non_empty_prompt(prompt) + retry_result = await call_ollama(retry_prompt, tag=f"{request_id}-retry1", temperature=0.4) + content = retry_result["content"] or "" + source = "retry1" + + if not content.strip(): + content = " " + source = "fallback-space" + logger.warning("[%s] retry still empty, forcing single-space fallback", request_id) + + logger.info( + "[%s] completion resolved source=%s content_chars=%d content_preview='%s'", + request_id, + source, + len(content), + _preview(content, 120), + ) + async def generate(): - if content: - yield f"data: {json.dumps({'content': content})}\n\n" + yield f"data: {json.dumps({'content': content})}\n\n" yield f"data: {json.dumps({'done': True})}\n\n" - + return StreamingResponse(generate(), media_type="text/event-stream") - + except Exception as e: - import traceback - traceback.print_exc() + logger.exception("[%s] /v1/completions failed: %s", request_id, e) + return JSONResponse(content={"error": str(e)}, status_code=500) + +@app.post("/v1/ocr") +async def ocr_image(request: OCRRequest): + request_id = str(uuid.uuid4())[:8] + try: + logger.info( + "[%s] /v1/ocr filename=%s language=%s image_base64_chars=%d", + request_id, + request.filename, + request.language, + len(request.image or ""), + ) + image_bytes = base64.b64decode(request.image) + logger.info("[%s] /v1/ocr decoded image_bytes=%d", request_id, len(image_bytes)) + result = await call_vlm_ocr(image_bytes, request.language) + logger.info( + "[%s] /v1/ocr success text_chars=%d text_preview='%s'", + request_id, + len(result or ""), + _preview(result or "", 120), + ) + return {"text": result, "filename": request.filename} + except Exception as e: + logger.exception("[%s] /v1/ocr failed: %s", request_id, e) return JSONResponse(content={"error": str(e)}, status_code=500) if __name__ == "__main__": diff --git a/backend/prompt.py b/backend/prompt.py index bcf02c7..5acbc5a 100644 --- a/backend/prompt.py +++ b/backend/prompt.py @@ -1,202 +1,84 @@ -import os from typing import Tuple -def build_prompt(prefix: str, suffix: str) -> str: +MAX_PREFIX_CHARS = 12000 +MAX_SUFFIX_CHARS = 4000 + + +def _sanitize_language_id(language_id: str) -> str: + if not language_id: + return "markdown" + allowed = [] + for ch in language_id.strip(): + if ch.isalnum() or ch in "-_+.": + allowed.append(ch) + value = "".join(allowed)[:32] + return value or "markdown" + + +def _prepare_context(prefix: str, suffix: str) -> Tuple[str, str]: """ - 优化后的提示词构建函数。 - 使用明确的分隔符区分指令部分和实际的 prefix/suffix 内容。 + Prepare prefix/suffix for model completion context. + Keep the historical one-char lookahead behavior to reduce boundary drift. """ - # 修正:把suffix的第一个字符移到prefix末尾(解决光标位置偏差) if suffix: - first_char = suffix[0] - prefix = prefix + first_char + prefix = prefix + suffix[0] suffix = suffix[1:] - - recent_prefix = prefix - recent_suffix = suffix + return prefix[-MAX_PREFIX_CHARS:], suffix[:MAX_SUFFIX_CHARS] - prompt = f"""You are an expert writing assistant integrated into a text editor. Your task is to complete the text at the cursor position. -━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ -RULES -━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +def build_prompt(prefix: str, suffix: str, language_id: str = "markdown") -> str: + safe_language_id = _sanitize_language_id(language_id) + recent_prefix, recent_suffix = _prepare_context(prefix, suffix) -RULE #1: SEAMLESS CONNECTION (MOST CRITICAL) + prompt = f"""You are an inline completion engine for a {safe_language_id} editor with ghost-text suggestions. -Your continuation MUST seamlessly bridge the prefix and suffix. This is the MOST IMPORTANT rule. +Your job: +- Return ONLY the text that should be inserted at the cursor between PREFIX and SUFFIX. +- Prefer a meaningful, non-empty insertion with moderate length. +- Avoid overly short outputs with little information value. -The "复读机" (Parrot) Error is when you repeat content that already exists in the suffix. This is the WORST mistake you can make. +Important context: +- PREFIX may contain hidden OCR metadata in HTML comments such as . +- These comments are non-visible context only. +- Never copy, rewrite, or emit HTML comments in output. +- Never output . -Requirements: -- Your output must connect prefix to suffix smoothly -- NEVER repeat content that already exists in the suffix -- If prefix already flows naturally into suffix, output NOTHING (empty string) -- The result should read as one coherent text, as if you never interrupted it +Hard rules: +1. Seamless join: + PREFIX + OUTPUT + SUFFIX must read naturally as one continuous document. +2. No suffix repetition: + Do NOT repeat text that already appears at the start of SUFFIX. +3. Balanced length: + Prefer concise but meaningful continuation, not ultra-short fragments. + Default target is 20-120 characters and 1-3 lines. + You may go shorter only when syntax requires it. +4. Avoid trivial output: + Do not output only punctuation or filler such as ".", ",", ";", ":". + Do not output just one token unless it is structurally necessary. +5. Preserve local style: + Match nearby language, tone, punctuation, spacing, and indentation. +6. Markdown awareness: + Continue active list/checkbox/ordered-list patterns when applicable. + Preserve indentation in nested list/code contexts. + Close obvious unclosed inline markdown markers only when needed to bridge. +7. Strict output format: + Output insertion text only. + No explanations, labels, quotes, or code fences. -RULE #2: WHITESPACE & PUNCTUATION +Decision policy: +- If PREFIX already connects naturally to SUFFIX, add a brief but useful continuation when possible. +- If uncertain, prefer a complete short phrase or sentence with clear meaning. -You must carefully check the LAST character of prefix and FIRST character of suffix to ensure perfect docking. - -Requirements: -- If prefix ends with space, do NOT start your output with space (prevents double spaces) -- If prefix does NOT end with space and suffix starts with a letter, you may need to add a space -- If suffix starts with punctuation, do NOT end your output with the same punctuation -- Check for existing spaces around operators before adding more - -RULE #3: INDENTATION ALIGNMENT - -You MUST match the indentation level of the current context. - -Requirements: -- Look at the line where cursor is positioned -- Count the leading spaces/tabs on that line -- Match that indentation for new lines -- Use the SAME type of indentation (spaces OR tabs) as the existing code -- For nested blocks, increase indentation appropriately -- For closing braces, match the opening brace's indentation - -RULE #4: LIST MAINTENANCE - -When the prefix ends with a list marker, you MUST recognize the pattern and continue it appropriately. - -Requirements: -- "- [ ] " indicates an unchecked task → continue with task description -- "- [x] " indicates a checked task → continue with completed task description -- "1. ", "2. ", etc. indicates ordered list → increment the number -- "* " or "- " indicates bullet list → continue with same marker style -- "> " indicates blockquote → continue quoted text -- Maintain the same list format and indentation level - -RULE #5: SYNTAX CLOSURE - -Before generating content, CHECK if there are unclosed syntax elements. If so, you MUST close them FIRST. - -Requirements: -- Scan prefix for opening markers: **, *, `, [, ![, ``` -- Check if each opening has a corresponding closing -- If unclosed, add the closing marker FIRST before continuing with content -- Markdown syntax pairs to check: - - Bold: ** must have closing ** - - Italic: * must have closing * - - Bold + Italic: *** must have closing *** - - Inline code: ` must have closing ` - - Code block: ``` must have closing ``` - - Link: [text must have closing ](url) - - Image: ![alt must have closing ](url) - -RULE #6: OUTPUT FORMAT - -Your output will be directly inserted into the document. Output ONLY the continuation text. - -Requirements: -- Output ONLY the text that should appear at the cursor position -- NO explanations, NO comments, NO meta-text -- NO code blocks wrapping your output -- NO phrases like "Here's the continuation:" or "I'll complete this for you:" -- Your output is inserted DIRECTLY into the user's document - -RULE #7: ALWAYS OUTPUT SOMETHING (MANDATORY) - -You MUST always output some content. Empty output is NOT allowed. - -Requirements: -- Even if the prefix seems complete, you should suggest a natural continuation -- If the prefix ends mid-sentence, complete the sentence -- If the prefix ends at a natural break point, suggest the next logical content -- Examples of valid continuations: - - Add the next word or phrase - - Complete an incomplete thought - - Add a relevant follow-up sentence - - Continue a list with the next item - - Add closing punctuation if missing -- NEVER output an empty string - always provide some useful continuation - -━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ -EXAMPLES -━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ - -EXAMPLE 1 - Seamless Connection: +Examples: The quick brown fox jumps over the lazy dog. -Output: "" (empty - nothing needed, prefix already connects to suffix) -Result: "The quick brown fox jumps over the lazy dog." +Output: "moved quietly and then " -EXAMPLE 2 - Seamless Connection with Space: -Hello -world! -Output: " " -Result: "Hello world!" - -EXAMPLE 3 - Whitespace Docking: -const a = -1; -Output: "1;" -Result: "const a = 1;" - -EXAMPLE 4 - Indentation Alignment: -function test() {{\\n if (true) {{\\n console.log('hi');\\n -\\n}} -Output: "}}\\n}}" -Result: " }}\\n}}" (correctly closes if with 4 spaces, then function) - -EXAMPLE 5 - Task List: -## TODO\\n- [ ] Buy groceries\\n- [ ] +## TODO\\n- [ ] Buy milk\\n- [ ] -Output: "Call mom" -Result: "## TODO\\n- [ ] Buy groceries\\n- [ ] Call mom" +Output: "Write release notes and share draft with team" -EXAMPLE 6 - Ordered List: -1. First item\\n2. Second item\\n - -Output: "3. Third item" -Result: "1. First item\\n2. Second item\\n3. Third item" - -EXAMPLE 7 - Bullet List: -* Apple\\n* Banana\\n* - -Output: "Cherry" -Result: "* Apple\\n* Banana\\n* Cherry" - -EXAMPLE 8 - Unclosed Bold: -This is **important - text continues here. -Output: "** " -Result: "This is **important** text continues here." - -EXAMPLE 9 - Unclosed Link: -Click [here for more - information. -Output: "](https://example.com)" -Result: "Click [here for more](https://example.com) information." - -EXAMPLE 10 - Unclosed Code Block: -```python\\ndef hello(): -\\nprint('done') -Output: "\\n print('hello')\\n```" -Result: Code block properly closed with ``` - -EXAMPLE 11 - Clean Output: -For any completion, output ONLY the continuation text: -Output: "Hello world!" -NOT: "Here's what comes next: Hello world!" -NOT: "```Hello world```" -NOT: "I'll complete this for you: Hello world!" - -━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ -FINAL CHECKLIST -━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ - -Before outputting, verify: -□ Does my output connect prefix and suffix WITHOUT repeating suffix content? -□ Are there no double spaces or missing spaces between prefix and suffix? -□ Does my indentation match the context? -□ If there's a list marker, did I continue the list pattern? -□ Did I close any unclosed Markdown syntax? -□ Is my output ONLY the continuation text, nothing else? - -━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ -NOW COMPLETE THE FOLLOWING TEXT -━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +Now produce the insertion. {recent_prefix} @@ -207,5 +89,5 @@ NOW COMPLETE THE FOLLOWING TEXT Output:""" - + return prompt.strip() diff --git a/plans/image-button-plan.md b/plans/image-button-plan.md new file mode 100644 index 0000000..b39d27e --- /dev/null +++ b/plans/image-button-plan.md @@ -0,0 +1,269 @@ +# Image Button Implementation Plan + +## Overview + +Add an image button to the MilkdownEditor that allows users to insert images at the cursor position. The button will provide a dropdown menu with two options: upload local file or input image URL. + +## Current Architecture Analysis + +### Existing Image Handling + +The editor already has image support through `@milkdown/crepe`: + +```javascript +// From MilkdownEditor.vue lines 217-231 +features: { + [Crepe.Feature.Latex]: true, + [Crepe.Feature.ImageBlock]: true, +}, +featureConfigs: { + [Crepe.Feature.ImageBlock]: { + onUpload: (file) => { + const objectUrl = URL.createObjectURL(file) + objectUrls.add(objectUrl) + performOCR(file, objectUrl) + return objectUrl + } + } +} +``` + +### Editor Access Pattern + +The code uses `editorViewCtx` to access the ProseMirror editor view: + +```javascript +crepe.editor.action((ctx) => { + const view = ctx.get(editorViewCtx) + // manipulate editor state +}) +``` + +## Implementation Plan + +### 1. Template Changes + +Add new button with dropdown menu in the `action-buttons` section: + +```html + +
+ + + +
+ + +
+
+ + + + + +
+
+ + + +
+
+``` + +### 2. Script Changes + +Add new refs and methods: + +```javascript +// New refs +const imageInputRef = ref(null) +const showImageDropdown = ref(false) +const showUrlDialog = ref(false) +const imageUrl = ref('') + +// Toggle dropdown +const toggleImageDropdown = () => { + showImageDropdown.value = !showImageDropdown.value +} + +// Trigger file input +const triggerImageUpload = () => { + showImageDropdown.value = false + imageInputRef.value?.click() +} + +// Handle file upload - reuse existing onUpload logic +const handleImageUpload = async (event) => { + const file = event.target.files?.[0] + if (!file) return + + const objectUrl = URL.createObjectURL(file) + objectUrls.add(objectUrl) + performOCR(file, objectUrl) + + // Insert image at cursor + insertImageAtCursor(objectUrl) + event.target.value = '' +} + +// Insert image from URL +const insertImageFromUrl = () => { + if (!imageUrl.value.trim()) return + insertImageAtCursor(imageUrl.value.trim()) + imageUrl.value = '' + showUrlDialog.value = false +} + +// Core function: insert image at cursor position +const insertImageAtCursor = (src) => { + if (!crepe) return + + crepe.editor.action((ctx) => { + const view = ctx.get(editorViewCtx) + const { state } = view + const { selection, schema } = state + + // Get image node type from schema + const imageType = schema.nodes.image + if (!imageType) return + + // Create image node + const imageNode = imageType.create({ src }) + + // Create transaction to insert at cursor + const tr = state.tr + tr = tr.replaceSelectionWith(imageNode) + + view.dispatch(tr) + }) +} +``` + +### 3. Style Changes + +Add styles for dropdown and dialog: + +```css +/* Image button wrapper */ +.image-btn-wrapper { + position: relative; +} + +/* Dropdown menu */ +.image-dropdown { + position: absolute; + bottom: 100%; + right: 0; + margin-bottom: 8px; + background: #fff; + border: 1px solid #ddd; + border-radius: 8px; + box-shadow: 0 2px 8px rgba(0,0,0,0.15); + overflow: hidden; + z-index: 10000; + min-width: 160px; +} + +.image-dropdown button { + display: block; + width: 100%; + padding: 10px 16px; + border: none; + background: none; + text-align: left; + cursor: pointer; + font-size: 14px; + color: #333; +} + +.image-dropdown button:hover { + background: #f5f5f5; +} + +/* URL dialog overlay */ +.url-dialog-overlay { + position: fixed; + top: 0; + left: 0; + right: 0; + bottom: 0; + background: rgba(0,0,0,0.3); + display: flex; + align-items: center; + justify-content: center; + z-index: 10001; +} + +.url-dialog { + background: #fff; + padding: 20px; + border-radius: 8px; + box-shadow: 0 4px 16px rgba(0,0,0,0.2); +} + +.url-dialog input { + width: 300px; + padding: 8px 12px; + border: 1px solid #ddd; + border-radius: 4px; + margin-bottom: 12px; +} + +.url-dialog button { + padding: 8px 16px; + margin-right: 8px; + border: 1px solid #ddd; + border-radius: 4px; + cursor: pointer; +} +``` + +## Workflow Diagram + +```mermaid +flowchart TD + A[Click Image Button] --> B{Toggle Dropdown} + B --> C[Show Dropdown Menu] + C --> D{User Choice} + D -->|Upload Local| E[Open File Picker] + D -->|From URL| F[Show URL Dialog] + E --> G[Select Image File] + G --> H[Create Object URL] + H --> I[Perform OCR] + I --> J[Insert Image at Cursor] + F --> K[Enter URL] + K --> L[Click Insert] + L --> J + J --> M[Image Appears in Editor] +``` + +## Key Implementation Notes + +1. **Reuse existing logic**: The `onUpload` callback logic for `Crepe.Feature.ImageBlock` should be reused for local file uploads to maintain consistency with OCR processing. + +2. **ProseMirror API**: Use `schema.nodes.image.create()` and `replaceSelectionWith()` to insert images at cursor position. + +3. **Click outside to close**: The dropdown should close when clicking outside. This can be achieved with a click-outside directive or by listening to document clicks. + +4. **Accessibility**: Ensure proper ARIA labels and keyboard navigation support. + +## Files to Modify + +- `src/components/MilkdownEditor.vue` - All changes will be in this single file + +## Dependencies + +No new dependencies required. All functionality uses existing: +- Vue 3 Composition API +- Milkdown/ProseMirror APIs +- Native browser APIs (URL.createObjectURL, FileReader) \ No newline at end of file diff --git a/src/App.vue b/src/App.vue index c330dc6..2844c9b 100644 --- a/src/App.vue +++ b/src/App.vue @@ -1,6 +1,7 @@ @@ -176,6 +440,7 @@ onUnmounted(() => { bottom: 20px; right: 20px; display: flex; + flex-direction: column; gap: 8px; z-index: 9999; } @@ -193,12 +458,14 @@ onUnmounted(() => { align-items: center; justify-content: center; box-shadow: 0 2px 8px rgba(0,0,0,0.1); + opacity: 0.5; } .action-btn:hover { background-color: #4a90d9; color: white; border-color: #4a90d9; + opacity: 1; } .action-btn.ai-disabled { @@ -213,15 +480,42 @@ onUnmounted(() => { border-color: #4a90d9; } +.action-btn.force-disabled { + background-color: #ccc; + color: #999; + border-color: #ccc; + cursor: not-allowed; + opacity: 0.6; +} + +.action-btn.force-disabled:hover { + background-color: #ccc; + color: #999; + border-color: #ccc; + opacity: 0.6; +} + +.size-indicator { + font-size: 10px; + color: #999; + text-align: center; + margin-top: 4px; +} + +.size-indicator.over-limit { + color: #e74c3c; +} + .action-btn { position: relative; } .btn-tooltip { position: absolute; - top: -32px; - left: 50%; - transform: translateX(-50%); + top: 50%; + right: 100%; + transform: translateY(-50%); + margin-right: 8px; background: #333; color: #fff; font-size: 12px; @@ -237,6 +531,116 @@ onUnmounted(() => { opacity: 1; } +.action-btn:focus-visible .btn-tooltip { + opacity: 1; +} + +.image-btn-wrapper { + position: relative; +} + +.image-dropdown { + position: absolute; + bottom: 100%; + right: 0; + margin-bottom: 8px; + background: #fff; + border: 1px solid #ddd; + border-radius: 8px; + box-shadow: 0 2px 8px rgba(0,0,0,0.15); + overflow: hidden; + z-index: 10000; + min-width: 160px; +} + +.image-dropdown button { + display: block; + width: 100%; + padding: 10px 16px; + border: none; + background: none; + text-align: left; + cursor: pointer; + font-size: 14px; + color: #333; +} + +.image-dropdown button:hover { + background: #f5f5f5; +} + +.url-dialog-overlay { + position: fixed; + top: 0; + left: 0; + right: 0; + bottom: 0; + background: rgba(0,0,0,0.3); + display: flex; + align-items: center; + justify-content: center; + z-index: 10001; +} + +.url-dialog { + background: #fff; + padding: 20px; + border-radius: 8px; + box-shadow: 0 4px 16px rgba(0,0,0,0.2); + min-width: 320px; +} + +.url-dialog h3 { + margin: 0 0 12px 0; + font-size: 16px; + color: #333; +} + +.url-dialog input { + width: 100%; + box-sizing: border-box; + padding: 10px 12px; + border: 1px solid #ddd; + border-radius: 4px; + font-size: 14px; + margin-bottom: 16px; +} + +.url-dialog input:focus { + outline: none; + border-color: #4a90d9; +} + +.url-dialog-buttons { + display: flex; + justify-content: flex-end; + gap: 8px; +} + +.dialog-btn { + padding: 8px 16px; + border: 1px solid #ddd; + border-radius: 4px; + cursor: pointer; + font-size: 14px; + background: #fff; + color: #333; +} + +.dialog-btn:hover { + background: #f5f5f5; +} + +.dialog-btn.primary { + background: #4a90d9; + color: #fff; + border-color: #4a90d9; +} + +.dialog-btn.primary:hover { + background: #3a80c9; +} + .milkdown-editor { width: 100%; height: 100%; @@ -247,7 +651,7 @@ onUnmounted(() => { .milkdown-editor :deep(.milkdown) { max-width: none; margin: 0 !important; - padding: 20px 40px !important; + padding: 0 40px !important; min-height: 100%; } @@ -262,6 +666,25 @@ onUnmounted(() => { padding: 0 !important; } +.milkdown-editor :deep(.milkdown > *:first-child) { + margin-top: 0 !important; + padding-top: 0 !important; +} + +.milkdown-editor :deep(.ProseMirror) { + margin: 0 !important; + padding: 0 !important; +} + +.milkdown-editor :deep(.ProseMirror img) { + max-width: 60%; + height: auto; +} + +.milkdown-editor :deep(.ProseMirror > *:first-child) { + margin-top: 0 !important; +} + .milkdown-editor :deep(.milkdown__aside), .milkdown-editor :deep(.milkdown__aside-wrapper), .milkdown-editor :deep([class*="aside"]), @@ -314,7 +737,7 @@ onUnmounted(() => { .copilot-ghost-text { color: #999; opacity: 0.6; - pointer-events: none; + pointer-events: auto; } .copilot-ghost-text.copilot-loading { diff --git a/src/plugins/copilotPlugin.ts b/src/plugins/copilotPlugin.ts index 1131992..ed6979c 100644 --- a/src/plugins/copilotPlugin.ts +++ b/src/plugins/copilotPlugin.ts @@ -1,14 +1,14 @@ import { Plugin, PluginKey, Selection } from '@milkdown/prose/state' import { $prose, $ctx, $markSchema } from '@milkdown/kit/utils' import { parserCtx } from '@milkdown/kit/core' -import { Node as ProseNode, Fragment, Slice } from '@milkdown/prose/model' +import { Node as ProseNode, Fragment } from '@milkdown/prose/model' import type { Ctx } from '@milkdown/kit/core' import type { EditorView } from '@milkdown/prose/view' +import { getOcrCache, checkSizeLimit as checkOcrSizeLimit, OCR_SIZE_LIMIT } from '../utils/ocrCache' const COPILOT_PLUGIN_KEY = new PluginKey('milkdown-copilot') -const DEBOUNCE_MS = 500 - -let enabled = true +const DEBOUNCE_MS = 1000 +const SIZE_LIMIT = OCR_SIZE_LIMIT interface CopilotState { from: number @@ -21,12 +21,21 @@ interface CopilotConfig { debounceMs?: number } +interface CopilotRuntime { + enabled: boolean + debounceTimer: ReturnType | null + abortController: AbortController | null + ctx: Ctx +} + const initialState: CopilotState = { from: 0, to: 0, suggestion: '' } +const runtimeByView = new WeakMap() + export const copilotConfigCtx = $ctx({ fetchSuggestion: async () => '', debounceMs: DEBOUNCE_MS @@ -36,21 +45,68 @@ export const copilotGhostMark = $markSchema('copilot_ghost', () => ({ excludes: '_', inclusive: true, parseDOM: [{ tag: 'span[data-copilot-ghost]' }], - toDOM: () => ['span', { 'data-copilot-ghost': '', class: 'copilot-ghost-text' }, 0] + toDOM: () => ['span', { 'data-copilot-ghost': '', class: 'copilot-ghost-text' }, 0], + parseMarkdown: { + match: () => false, + runner: () => {} + }, + toMarkdown: { + match: (mark) => mark.type.name === 'copilot_ghost', + runner: () => {} + } })) -let debounceTimer: ReturnType | null = null -let abortController: AbortController | null = null -let currentCtx: Ctx | null = null +function clearRuntimeRequests(runtime: CopilotRuntime) { + if (runtime.debounceTimer) { + clearTimeout(runtime.debounceTimer) + runtime.debounceTimer = null + } + + if (runtime.abortController) { + runtime.abortController.abort() + runtime.abortController = null + } +} + +function findGhostRangeByMarks(view: EditorView): { from: number; to: number } | null { + const markType = view.state.schema.marks.copilot_ghost + if (!markType) return null + + let from = Number.POSITIVE_INFINITY + let to = -1 + + view.state.doc.descendants((node, pos) => { + if (node.isText && node.marks.some((m: any) => m.type === markType)) { + from = Math.min(from, pos) + to = Math.max(to, pos + node.nodeSize) + } + return true + }) + + if (!Number.isFinite(from) || to <= from) return null + return { from, to } +} + +function getGhostRange(view: EditorView): { from: number; to: number } | null { + const state = COPILOT_PLUGIN_KEY.getState(view.state) + if (state && state.from < state.to) { + return { from: state.from, to: state.to } + } + return findGhostRangeByMarks(view) +} + +function hasGhostText(view: EditorView): boolean { + return getGhostRange(view) !== null +} function clearGhostText(view: EditorView) { - const state = COPILOT_PLUGIN_KEY.getState(view.state) - if (state && state.suggestion && state.from < state.to) { - const tr = view.state.tr - .delete(state.from, state.to) - .setMeta(COPILOT_PLUGIN_KEY, { ...initialState }) - view.dispatch(tr) - } + const range = getGhostRange(view) + if (!range) return + + const tr = view.state.tr + .delete(range.from, range.to) + .setMeta(COPILOT_PLUGIN_KEY, { ...initialState }) + view.dispatch(tr) } function isBlockNode(node: ProseNode): boolean { @@ -67,39 +123,24 @@ function hasBlockNodes(doc: ProseNode): boolean { return hasBlock } -function addGhostMarkToNode(node: ProseNode, ghostMarkType: any): ProseNode { - if (node.isText) { - return node.mark(node.marks.concat(ghostMarkType.create())) - } - if (node.isLeaf) { - return node - } - const newContent: ProseNode[] = [] - node.forEach((child) => { - newContent.push(addGhostMarkToNode(child, ghostMarkType)) - }) - return node.copy(Fragment.from(newContent)) -} - -function extractInlineContent(doc: ProseNode, ghostMarkType: any, schema: any): Fragment { +function extractInlineContent(doc: ProseNode, schema: any): Fragment { const nodes: ProseNode[] = [] let isFirstBlock = true - + doc.forEach((blockNode) => { if (!isFirstBlock) { const hardBreak = schema.nodes.hard_break?.create() if (hardBreak) { nodes.push(hardBreak) } else { - nodes.push(schema.text('\n', [ghostMarkType.create()])) + nodes.push(schema.text('\n')) } } isFirstBlock = false - + blockNode.forEach((inlineNode) => { if (inlineNode.isText) { - const combinedMarks = inlineNode.marks.concat(ghostMarkType.create()) - nodes.push(inlineNode.mark(combinedMarks)) + nodes.push(inlineNode) } else if (inlineNode.type.name === 'hard_break') { nodes.push(inlineNode) } else if (inlineNode.isLeaf) { @@ -107,8 +148,7 @@ function extractInlineContent(doc: ProseNode, ghostMarkType: any, schema: any): } else if (inlineNode.content.size > 0) { inlineNode.forEach((nestedNode) => { if (nestedNode.isText) { - const combinedMarks = nestedNode.marks.concat(ghostMarkType.create()) - nodes.push(nestedNode.mark(combinedMarks)) + nodes.push(nestedNode) } else if (nestedNode.isLeaf) { nodes.push(nestedNode) } @@ -116,52 +156,83 @@ function extractInlineContent(doc: ProseNode, ghostMarkType: any, schema: any): } }) }) - + return Fragment.from(nodes) } -async function insertGhostText(view: EditorView, suggestion: string, from: number) { - if (!currentCtx || !suggestion) return - +function normalizeSuggestionText(raw: string): string { + if (!raw) return raw + + let text = raw.replace(/\r\n?/g, '\n') + const trimmed = text.trim() + + // Some models may return a JSON-encoded string literal, decode it if so. + if (trimmed.startsWith('"') && trimmed.endsWith('"')) { + try { + const parsed = JSON.parse(trimmed) + if (typeof parsed === 'string') { + text = parsed.replace(/\r\n?/g, '\n') + } + } catch { + // Keep original text when not valid JSON. + } + } + + // If newlines are escaped literally, convert them back. + if (!text.includes('\n') && text.includes('\\n')) { + text = text.replace(/\\n/g, '\n') + } + if (text.includes('\\t')) { + text = text.replace(/\\t/g, '\t') + } + + return text +} + +async function insertGhostText(view: EditorView, suggestion: string, from: number, ctx: Ctx) { + if (!suggestion) return + const schema = view.state.schema const markType = schema.marks.copilot_ghost - + if (!markType) { console.error('[Copilot] copilot_ghost mark not found in schema') return } - + try { - const parser = currentCtx.get(parserCtx) + const parser = ctx.get(parserCtx) const parsedDoc = await parser(suggestion) - + if (!parsedDoc) { insertPlainText(view, suggestion, from, markType) return } - + const containsBlocks = hasBlockNodes(parsedDoc) - + if (containsBlocks) { const $from = view.state.doc.resolve(from) const insertPos = $from.after($from.depth) - + const blockNodes: ProseNode[] = [] parsedDoc.forEach((node) => { - blockNodes.push(addGhostMarkToNode(node, markType)) + blockNodes.push(node) }) - + const fragment = Fragment.from(blockNodes) const tr = view.state.tr tr.insert(insertPos, fragment) const endPos = insertPos + fragment.size + tr.addMark(insertPos, endPos, markType.create()) tr.setMeta(COPILOT_PLUGIN_KEY, { from: insertPos, to: endPos, suggestion }) view.dispatch(tr) } else { - const inlineFragment = extractInlineContent(parsedDoc, markType, schema) + const inlineFragment = extractInlineContent(parsedDoc, schema) const tr = view.state.tr tr.insert(from, inlineFragment) const endPos = from + inlineFragment.size + tr.addMark(from, endPos, markType.create()) tr.setMeta(COPILOT_PLUGIN_KEY, { from, to: endPos, suggestion }) view.dispatch(tr) } @@ -180,66 +251,112 @@ function insertPlainText(view: EditorView, suggestion: string, from: number, mar view.dispatch(tr) } -function doFetchSuggestion(view: EditorView, pos: number, prefix: string, suffix: string) { - if (!currentCtx) return - - const config = currentCtx.get(copilotConfigCtx.key) - - if (abortController) { - abortController.abort() - abortController = null +function extractImageFilenames(doc: ProseNode): string[] { + const filenames: string[] = [] + doc.descendants((node: ProseNode) => { + if (node.type.name === 'image' && node.attrs.src) { + filenames.push(node.attrs.src) + } + }) + return filenames +} + +function buildPrefixWithOCR(prefix: string, doc: ProseNode, cursorPos: number): string { + const ocrEntries: string[] = [] + + doc.descendants((node: ProseNode, pos) => { + if (pos >= cursorPos) return false + if (node.type.name !== 'image' || !node.attrs.src) return true + + const ocrText = getOcrCache(node.attrs.src) + if (!ocrText) return true + + const altText = typeof node.attrs.alt === 'string' ? node.attrs.alt : '' + ocrEntries.push(`image(${altText || 'untitled'}): ${ocrText}`) + return true + }) + + if (!ocrEntries.length) return prefix + return `${prefix}\n\n[OCR Context]\n${ocrEntries.join('\n')}` +} + +function doFetchSuggestion(view: EditorView, runtime: CopilotRuntime, pos: number, prefix: string, suffix: string) { + const config = runtime.ctx.get(copilotConfigCtx.key) + + if (runtime.abortController) { + runtime.abortController.abort() + runtime.abortController = null } - - abortController = new AbortController() - - config.fetchSuggestion(prefix, suffix, abortController.signal) - .then(suggestion => { - if (view.state.selection.from !== pos) return - - if (suggestion) { - insertGhostText(view, suggestion, pos) + + const controller = new AbortController() + runtime.abortController = controller + + config.fetchSuggestion(prefix, suffix, controller.signal) + .then((suggestion) => { + if (!runtime.enabled) return + if (view.state.selection.from !== pos || view.state.selection.to !== pos) return + + const normalizedSuggestion = normalizeSuggestionText(suggestion) + if (normalizedSuggestion) { + insertGhostText(view, normalizedSuggestion, pos, runtime.ctx) } }) - .catch(e => { - if (e.name !== 'AbortError') { + .catch((e: any) => { + if (e?.name !== 'AbortError') { console.error('[Copilot] Error:', e) } }) .finally(() => { - abortController = null + if (runtime.abortController === controller) { + runtime.abortController = null + } }) } -function scheduleFetch(view: EditorView, pos: number, prefix: string, suffix: string) { - if (!enabled) return - - if (debounceTimer) { - clearTimeout(debounceTimer) - debounceTimer = null +function scheduleFetch(view: EditorView, runtime: CopilotRuntime, pos: number, prefix: string, suffix: string) { + if (!runtime.enabled) return + + const doc = view.state.doc + const imageFilenames = extractImageFilenames(doc) + const { overLimit } = checkOcrSizeLimit(doc.content.size, imageFilenames) + + if (overLimit) { + setCopilotEnabled(view, false) + return } - - debounceTimer = setTimeout(() => { - debounceTimer = null - doFetchSuggestion(view, pos, prefix, suffix) - }, DEBOUNCE_MS) + + const prefixWithOCR = buildPrefixWithOCR(prefix, doc, pos) + + if (runtime.debounceTimer) { + clearTimeout(runtime.debounceTimer) + runtime.debounceTimer = null + } + + const debounceMs = runtime.ctx.get(copilotConfigCtx.key).debounceMs ?? DEBOUNCE_MS + runtime.debounceTimer = setTimeout(() => { + runtime.debounceTimer = null + doFetchSuggestion(view, runtime, pos, prefixWithOCR, suffix) + }, debounceMs) } function acceptSuggestion(view: EditorView) { - const state = COPILOT_PLUGIN_KEY.getState(view.state) - if (!state?.suggestion || state.from >= state.to) return false - + const range = getGhostRange(view) + if (!range) return false + const tr = view.state.tr const doc = tr.doc - const from = state.from - const to = state.to - + const from = range.from + const to = range.to + const markType = view.state.schema.marks.copilot_ghost + if (!markType) return false + doc.nodesBetween(from, to, (node, pos) => { - if (node.marks.some((m: any) => m.type.name === 'copilot_ghost')) { - tr.removeMark(pos, pos + node.nodeSize, view.state.schema.marks.copilot_ghost) + if (node.marks.some((m: any) => m.type === markType)) { + tr.removeMark(pos, pos + node.nodeSize, markType) } }) - - const endPos = Math.min(state.to, tr.doc.content.size) + + const endPos = Math.min(to, tr.doc.content.size) tr.setSelection(Selection.near(tr.doc.resolve(endPos))) tr.setMeta(COPILOT_PLUGIN_KEY, { ...initialState }) view.dispatch(tr) @@ -247,108 +364,180 @@ function acceptSuggestion(view: EditorView) { } function rejectSuggestion(view: EditorView) { - const state = COPILOT_PLUGIN_KEY.getState(view.state) - if (!state?.suggestion) return false - + if (!hasGhostText(view)) return false + clearGhostText(view) return true } -export const copilotPlugin = $prose((ctx) => { - currentCtx = ctx - - return new Plugin({ - key: COPILOT_PLUGIN_KEY, - state: { - init: () => ({ ...initialState }), - apply: (tr, value) => { - const meta = tr.getMeta(COPILOT_PLUGIN_KEY) - if (meta !== undefined) { - return meta - } - - if (tr.docChanged && value.suggestion) { - return { ...initialState } - } - - return value +export const copilotPlugin = $prose((ctx) => new Plugin({ + key: COPILOT_PLUGIN_KEY, + state: { + init: () => ({ ...initialState }), + apply: (tr, value) => { + const meta = tr.getMeta(COPILOT_PLUGIN_KEY) + if (meta !== undefined) { + return meta } - }, - props: { - handleKeyDown: (view, event) => { - const state = COPILOT_PLUGIN_KEY.getState(view.state) - - if (event.key === 'Tab' && state?.suggestion) { - event.preventDefault() - return acceptSuggestion(view) - } - - if (event.key === 'Escape' && state?.suggestion) { - event.preventDefault() - return rejectSuggestion(view) - } - - if (state?.suggestion && event.key !== 'Shift' && event.key !== 'Control' && event.key !== 'Alt' && event.key !== 'Meta') { - clearGhostText(view) - } - - return false - }, - handleClick: (view, pos) => { - const state = COPILOT_PLUGIN_KEY.getState(view.state) - if (!state?.suggestion) return false - - if (pos >= state.from && pos < state.to) { - return acceptSuggestion(view) - } - + + if (tr.docChanged && value.suggestion) { + return { ...initialState } + } + + return value + } + }, + props: { + handleKeyDown: (view, event) => { + const hasGhost = hasGhostText(view) + + if (event.key === 'Tab' && hasGhost) { + event.preventDefault() + return acceptSuggestion(view) + } + + if (event.key === 'Escape' && hasGhost) { + event.preventDefault() + return rejectSuggestion(view) + } + + if (hasGhost && event.key !== 'Shift' && event.key !== 'Control' && event.key !== 'Alt' && event.key !== 'Meta') { clearGhostText(view) - return false } + + return false }, - view: () => ({ - update: (view, prevState) => { - if (view.state.doc.eq(prevState.doc) && view.state.selection.eq(prevState.selection)) { - return - } - - const state = COPILOT_PLUGIN_KEY.getState(view.state) - if (state?.suggestion) { - return - } - - if (!view.state.doc.eq(prevState.doc)) { - const { from, to } = view.state.selection - if (from !== to) return - - const doc = view.state.doc - const prefix = doc.textBetween(0, from) - const suffix = doc.textBetween(to, doc.content.size) - - scheduleFetch(view, from, prefix, suffix) - } + handleClick: (view, pos) => { + const range = getGhostRange(view) + if (!range) return false + + if (pos >= range.from && pos <= range.to) { + return acceptSuggestion(view) } - }) - }) -}) + + clearGhostText(view) + return false + } + }, + view: (view) => { + let activeView = view + let activeDom = view.dom + const runtime: CopilotRuntime = { + enabled: true, + debounceTimer: null, + abortController: null, + ctx + } + runtimeByView.set(view, runtime) + + const onKeydownCapture = (event: KeyboardEvent) => { + if (!hasGhostText(activeView)) return + + if (event.key === 'Tab') { + event.preventDefault() + event.stopPropagation() + event.stopImmediatePropagation?.() + acceptSuggestion(activeView) + return + } + + if (event.key === 'Escape') { + event.preventDefault() + event.stopPropagation() + event.stopImmediatePropagation?.() + rejectSuggestion(activeView) + } + } + + const onPointerDownCapture = (event: MouseEvent) => { + if (!hasGhostText(activeView)) return + const targetNode = event.target instanceof Node ? event.target : null + const target = targetNode instanceof Element ? targetNode : targetNode?.parentElement + if (!target) return + + // Accept suggestion when user clicks any rendered ghost-text fragment. + if (target.closest('[data-copilot-ghost]')) { + event.preventDefault() + event.stopPropagation() + event.stopImmediatePropagation?.() + acceptSuggestion(activeView) + } + } + + const bindDomListeners = (dom: HTMLElement) => { + dom.addEventListener('keydown', onKeydownCapture, true) + dom.addEventListener('mousedown', onPointerDownCapture, true) + } + + const unbindDomListeners = (dom: HTMLElement) => { + dom.removeEventListener('keydown', onKeydownCapture, true) + dom.removeEventListener('mousedown', onPointerDownCapture, true) + } + + bindDomListeners(activeDom) + + return { + update: (nextView, prevState) => { + if (nextView.dom !== activeDom) { + unbindDomListeners(activeDom) + activeDom = nextView.dom + bindDomListeners(activeDom) + } + + activeView = nextView + const docChanged = !nextView.state.doc.eq(prevState.doc) + const selectionChanged = !nextView.state.selection.eq(prevState.selection) + + if (!docChanged && !selectionChanged) { + return + } + + if (hasGhostText(nextView)) { + return + } + + const { from, to } = nextView.state.selection + if (from !== to) { + clearRuntimeRequests(runtime) + return + } + + const doc = nextView.state.doc + const prefix = doc.textBetween(0, from) + const suffix = doc.textBetween(to, doc.content.size) + + scheduleFetch(nextView, runtime, from, prefix, suffix) + }, + destroy: () => { + unbindDomListeners(activeDom) + clearRuntimeRequests(runtime) + runtimeByView.delete(view) + } + } + } +})) export { COPILOT_PLUGIN_KEY } -export function isCopilotEnabled(): boolean { - return enabled +export function isCopilotEnabled(view: EditorView): boolean { + return runtimeByView.get(view)?.enabled ?? true } -export function setCopilotEnabled(value: boolean): void { - enabled = value - +export function setCopilotEnabled(view: EditorView, value: boolean): void { + const runtime = runtimeByView.get(view) + if (!runtime) return + + runtime.enabled = value if (!value) { - if (debounceTimer) { - clearTimeout(debounceTimer) - debounceTimer = null - } - if (abortController) { - abortController.abort() - abortController = null - } + clearRuntimeRequests(runtime) } } + +export function checkSizeLimit(view: EditorView): { size: number; overLimit: boolean } { + const doc = view.state.doc + const imageFilenames = extractImageFilenames(doc) + const result = checkOcrSizeLimit(doc.content.size, imageFilenames) + return { size: result.size, overLimit: result.overLimit } +} + +export { SIZE_LIMIT } diff --git a/src/style.css b/src/style.css index 6f0bdb8..bd53d5a 100644 --- a/src/style.css +++ b/src/style.css @@ -72,5 +72,6 @@ body { padding: 0; width: 100%; height: 100%; - overflow: hidden; + overflow-x: hidden; + overflow-y: auto; } diff --git a/src/utils/ocrCache.js b/src/utils/ocrCache.js new file mode 100644 index 0000000..e1ed4ed --- /dev/null +++ b/src/utils/ocrCache.js @@ -0,0 +1,45 @@ +const SIZE_LIMIT = 64 * 1024 + +const ocrCache = new Map() + +export function setOcrCache(filename, text) { + ocrCache.set(filename, text) +} + +export function getOcrCache(filename) { + return ocrCache.get(filename) || '' +} + +export function clearOcrCache(filename) { + ocrCache.delete(filename) +} + +export function hasOcrCache(filename) { + return ocrCache.has(filename) +} + +export function clearAllOcrCache() { + ocrCache.clear() +} + +export function calculateOcrSize(imageFilenames) { + let total = 0 + for (const name of imageFilenames) { + const text = ocrCache.get(name) + if (text) total += new Blob([text]).size + } + return total +} + +export function checkSizeLimit(docTextSize, imageFilenames) { + const ocrSize = calculateOcrSize(imageFilenames) + const total = docTextSize + ocrSize + return { + size: total, + docSize: docTextSize, + ocrSize: ocrSize, + overLimit: total > SIZE_LIMIT + } +} + +export const OCR_SIZE_LIMIT = SIZE_LIMIT diff --git a/vite.config.js b/vite.config.js index ebda1c7..526c009 100644 --- a/vite.config.js +++ b/vite.config.js @@ -7,6 +7,42 @@ export default defineConfig({ host: true, port: 5173 }, + build: { + rollupOptions: { + output: { + manualChunks(id) { + if (!id.includes('node_modules')) return + + const modulePath = id.split('node_modules/')[1] + const segments = modulePath.split('/') + const packageName = segments[0].startsWith('@') + ? `${segments[0]}/${segments[1]}` + : segments[0] + + if (packageName.startsWith('@milkdown')) return 'milkdown' + if (packageName.startsWith('prosemirror')) return 'prosemirror' + + if (packageName.startsWith('@codemirror')) { + const langMatch = modulePath.match(/@codemirror\/lang-([^/]+)/) + if (langMatch) return `cm-lang-${langMatch[1]}` + return `cm-${segments[1]}` + } + + if (packageName === 'refractor') { + const langMatch = modulePath.match(/refractor\/lang\/([^./]+)/) + if (langMatch) return `refractor-lang-${langMatch[1]}` + return 'refractor-core' + } + + if (packageName.startsWith('katex')) return 'katex' + if (packageName.startsWith('markdown-it')) return 'markdown' + if (packageName === 'vue' || packageName.startsWith('@vue')) return 'vue' + + return `vendor-${packageName.replace('@', '').replace('/', '-')}` + } + } + } + }, optimizeDeps: { include: [ '@milkdown/crepe',