diff --git a/.kilocode/rules/rules.md b/.kilocode/rules/rules.md
index a86ddce..0cd1a05 100644
--- a/.kilocode/rules/rules.md
+++ b/.kilocode/rules/rules.md
@@ -6,6 +6,4 @@
 
 - 不要擅自用npm或者yarn运行网页，你既看不到网页的内容，也无法阻止命令暂停
 - 应该保证代码效率，不多定义变量，不写冗余注释，把降低延迟放在第一位
-- 每次完成任务前都要反复检查代码，确保代码准确无误
-- 获取失败直接报错，不返回默认值，不尝试隐藏报错信息
-- 处理问题或BUG时，不要只修复一个地方，而是要检查所有可能出现bug的地方，逐个修复
\ No newline at end of file
+- 每次完成任务前都要反复检查代码，确保代码准确无误
\ No newline at end of file
diff --git a/backend/llm.py b/backend/llm.py
index 7aea698..26767c8 100644
--- a/backend/llm.py
+++ b/backend/llm.py
@@ -2,6 +2,7 @@ import os
 from typing import AsyncGenerator
 from openai import AsyncOpenAI
 import json
+import time
 
 api_key = os.getenv('OPENAI_API_KEY', 'ollama')
 base_url = os.getenv('OLLAMA_BASE_URL', 'http://192.168.0.120:11434/v1/')
@@ -18,9 +19,13 @@ async def stream_openai(prompt: str) -> AsyncGenerator[str, None]:
     调用 OpenAI/Ollama API 并流式返回补全内容。
     参考 completions-sample-code 的 streaming 逻辑。
     """
-    print(f"[LLM] Calling API with prompt length: {len(prompt)}")
+    start_time = time.time()
+    print(f"[LLM] ========== API Call Start ==========")
+    print(f"[LLM] Prompt length: {len(prompt)}")
+    print(f"[LLM] Model: {model}")
     
     try:
+        print(f"[LLM] Creating streaming chat completion...")
         stream = await client.chat.completions.create(
             model=model,
             messages=[{"role": "user", "content": prompt}],
@@ -29,16 +34,36 @@ async def stream_openai(prompt: str) -> AsyncGenerator[str, None]:
             temperature=0.2,
         )
 
+        print(f"[LLM] Stream created successfully, iterating...")
         chunk_count = 0
-        async for chunk in stream:
-            if chunk.choices[0].delta.content:
-                content = chunk.choices[0].delta.content
-                chunk_count += 1
-                print(f"[LLM] Chunk {chunk_count}: {content}")
-                yield json.dumps({"content": content})
+        first_chunk_time = None
         
-        print(f"[LLM] Stream complete, total chunks: {chunk_count}")
+        async for chunk in stream:
+            current_time = time.time()
+            if first_chunk_time is None:
+                first_chunk_time = current_time - start_time
+            
+            chunk_count += 1
+            choice = chunk.choices[0] if chunk.choices else None
+            
+            if choice and choice.delta.content:
+                content = choice.delta.content
+                print(f"[LLM] Chunk {chunk_count}: '{content}' (latency: {current_time - start_time:.3f}s)")
+                yield json.dumps({"content": content})
+            elif chunk.choices and hasattr(chunk.choices[0], 'finish_reason'):
+                finish_reason = chunk.choices[0].finish_reason
+                print(f"[LLM] Chunk {chunk_count}: finish_reason={finish_reason}")
+                if finish_reason:
+                    break
+            else:
+                print(f"[LLM] Chunk {chunk_count}: empty or no content")
+        
+        total_time = time.time() - start_time
+        print(f"[LLM] Stream complete - chunks: {chunk_count}, first chunk latency: {first_chunk_time:.3f}s, total time: {total_time:.3f}s")
+        print(f"[LLM] ========== API Call End ==========")
     except Exception as e:
         error_msg = f"Error: {str(e)}"
         print(f"[LLM] Error: {error_msg}")
-        yield json.dumps({"error": str(e)})
+        import traceback
+        traceback.print_exc()
+        yield json.dumps({"error": str(e), "type": type(e).__name__})
diff --git a/backend/main.py b/backend/main.py
index 3e8001a..e2c98c6 100644
--- a/backend/main.py
+++ b/backend/main.py
@@ -3,9 +3,12 @@ from fastapi.responses import StreamingResponse
 from pydantic import BaseModel
 import os
 import json
+import time
 
 app = FastAPI()
 
+print("[Main] Backend service starting...")
+
 class CompletionRequest(BaseModel):
     prefix: str
     suffix: str
@@ -15,33 +18,58 @@ def generate_stream(request: CompletionRequest):
     from prompt import build_prompt
     from llm import stream_openai
     
-    print(f"[Backend] Received request - prefix length: {len(request.prefix)}, suffix length: {len(request.suffix)}")
+    start_time = time.time()
+    print(f"[Main] ========== New Request ==========")
+    print(f"[Main] prefix length: {len(request.prefix)}, suffix length: {len(request.suffix)}")
+    print(f"[Main] languageId: {request.languageId}")
+    print(f"[Main] Prefix (last 200 chars): '{request.prefix[-200:]}'")
+    print(f"[Main] Suffix (first 200 chars): '{request.suffix[:200]}'")
     
     try:
         prompt = build_prompt(request.prefix, request.suffix)
-        print(f"[Backend] Built prompt (first 100 chars): {prompt[:100]}...")
+        print(f"[Main] Built prompt length: {len(prompt)}")
+        print(f"[Main] Prompt (first 300 chars): '{prompt[:300]}'")
+        print(f"[Main] Prompt (last 200 chars): '{prompt[-200:]}'")
         
         async def gen():
             chunk_count = 0
-            async for chunk in stream_openai(prompt):
-                chunk_count += 1
-                yield f"data: {chunk}\n\n"
-                if chunk_count % 5 == 0:
-                    print(f"[Backend] Sent chunk {chunk_count}")
-            yield "data: {\"done\": true}\n\n"
-            print(f"[Backend] Stream complete, total chunks: {chunk_count}")
+            first_chunk_time = None
+            try:
+                async for chunk in stream_openai(prompt):
+                    current_time = time.time()
+                    if first_chunk_time is None:
+                        first_chunk_time = current_time - start_time
+                    chunk_count += 1
+                    chunk_data = json.loads(chunk) if isinstance(chunk, str) else chunk
+                    content_preview = chunk_data.get('content', '')[:50] if chunk_data.get('content') else ''
+                    print(f"[Main] Chunk {chunk_count}: '{content_preview}'...")
+                    yield f"data: {json.dumps(chunk_data)}\n\n"
+                
+                done_signal = {"done": True}
+                total_time = time.time() - start_time
+                print(f"[Main] Stream complete - total chunks: {chunk_count}, first chunk at: {first_chunk_time:.2f}s, total time: {total_time:.2f}s")
+                yield f"data: {json.dumps(done_signal)}\n\n"
+            except Exception as e:
+                error_msg = {"error": str(e), "type": type(e).__name__}
+                print(f"[Main] Generator error: {e}")
+                yield f"data: {json.dumps(error_msg)}\n\n"
         return gen()
     except Exception as e:
-        error_msg = f"{{\"error\": \"{str(e)}\"}}"
-        print(f"[Backend] Error: {e}")
-        yield f"data: {error_msg}\n\n"
+        error_msg = {"error": str(e), "type": type(e).__name__}
+        print(f"[Main] Error building prompt or calling LLM: {e}")
+        yield f"data: {json.dumps(error_msg)}\n\n"
 
 @app.post("/v1/completions")
 async def create_completion(request: CompletionRequest):
-    print(f"[Backend] POST /v1/completions called")
+    print(f"[Main] POST /v1/completions called at {time.time()}")
     return StreamingResponse(generate_stream(request), media_type="text/event-stream")
 
+@app.get("/health")
+async def health_check():
+    return {"status": "healthy", "timestamp": time.time()}
+
 if __name__ == "__main__":
     import uvicorn
-    print("[Backend] Starting server on http://0.0.0.0:8000")
-    uvicorn.run(app, host="0.0.0.0", port=8000)
+    port = int(os.getenv('PORT', 8000))
+    print(f"[Main] Starting server on http://0.0.0.0:{port}")
+    uvicorn.run(app, host="0.0.0.0", port=port)
diff --git a/src/components/GhostTextOverlay.vue b/src/components/GhostTextOverlay.vue
index 599c90c..1816c33 100644
--- a/src/components/GhostTextOverlay.vue
+++ b/src/components/GhostTextOverlay.vue
@@ -7,6 +7,7 @@
 
 <script setup>
 import { computed } from 'vue'
+import { onMounted, onUnmounted, watch } from 'vue'
 
 const props = defineProps({
     suggestion: { type: String, default: '' },
@@ -15,6 +16,26 @@ const props = defineProps({
 
 const emit = defineEmits(['accept', 'dismiss'])
 
+onMounted(() => {
+    console.log('[GhostTextOverlay] Component mounted')
+    if (props.suggestion && props.position) {
+        console.log('[GhostTextOverlay] Suggestion visible:', props.suggestion.substring(0, 50))
+        console.log('[GhostTextOverlay] Position:', JSON.stringify(props.position))
+    }
+})
+
+onUnmounted(() => {
+    console.log('[GhostTextOverlay] Component unmounted')
+})
+
+watch([() => props.suggestion, () => props.position], ([newSuggestion, newPosition]) => {
+    console.log('[GhostTextOverlay] Props changed:', {
+        suggestionLength: newSuggestion?.length || 0,
+        hasPosition: !!newPosition,
+        positionKeys: newPosition ? Object.keys(newPosition) : []
+    })
+}, { immediate: true })
+
 const visible = computed(() => props.suggestion && props.position)
 
 const overlayStyle = computed(() => ({
@@ -31,7 +52,10 @@ const overlayStyle = computed(() => ({
     zIndex: 1000,
 }))
 
-const acceptSuggestion = () => emit('accept')
+const acceptSuggestion = () => {
+    console.log('[GhostTextOverlay] acceptSuggestion called')
+    emit('accept')
+}
 </script>
 
 <style scoped>
diff --git a/src/components/MilkdownEditor.vue b/src/components/MilkdownEditor.vue
index 1edb6aa..cb05924 100644
--- a/src/components/MilkdownEditor.vue
+++ b/src/components/MilkdownEditor.vue
@@ -16,8 +16,9 @@
 
 <script setup>
 import { onMounted, ref } from 'vue'
-import { Crepe } from '@milkdown/crepe'
+import { Crepe, rootCtx, defaultValueCtx } from '@milkdown/crepe'
 import GhostTextOverlay from './GhostTextOverlay.vue'
+import { createInlineSuggestionPlugin } from '../plugins/inlineSuggestionPlugin'
 
 const root = ref(null)
 const containerRef = ref(null)
@@ -25,11 +26,8 @@ let crepe = null
 
 const suggestion = ref('')
 const cursorRect = ref(null)
-let debounceTimer = null
-let lastPos = -1
 
 const API_URL = 'http://localhost:8000/v1/completions'
-const DEBOUNCE_MS = 150
 
 onMounted(async () => {
     console.log('[Debug] onMounted called')
@@ -39,9 +37,11 @@ onMounted(async () => {
     }
     
     console.log('[Debug] Creating Crepe editor...')
+    const inlineSuggestionPlugin = createInlineSuggestionPlugin({ apiUrl: API_URL })
     crepe = new Crepe({
         root: root.value,
-        defaultValue: '# Welcome to LLM in text\n\nStart writing your content here...',
+        defaultValue: '# Welcome to Milkdown\n\nStart writing your markdown content here...',
+        plugins: [inlineSuggestionPlugin],
     })
     
     await crepe.create()
@@ -141,25 +141,14 @@ const onInput = async () => {
         const view = ctx.get('view')
         const { from } = view.state.selection
         
-        if (from === lastPos) {
-            console.log('[Debug] Same position, skipping')
-            return
-        }
-        lastPos = from
-        
         console.log('[Debug] onInput triggered at position:', from)
         
         const prefix = view.state.doc.textBetween(0, from)
         const suffix = view.state.doc.textBetween(from, view.state.doc.content.size)
         
-        console.log('[Debug] Prefix preview:', prefix.substring(-50))
-        
-        clearTimeout(debounceTimer)
-        debounceTimer = setTimeout(async () => {
-            cursorRect.value = await getCursorPosition()
-            suggestion.value = await fetchSuggestion(prefix, suffix)
-            console.log('[Debug] Suggestion updated:', suggestion.value ? 'yes' : 'no')
-        }, DEBOUNCE_MS)
+        cursorRect.value = await getCursorPosition()
+        suggestion.value = await fetchSuggestion(prefix, suffix)
+        console.log('[Debug] Suggestion updated:', suggestion.value ? 'yes' : 'no')
     } catch (e) {
         console.error('[Debug] onInput error:', e)
     }
@@ -201,31 +190,6 @@ const exportMarkdown = async () => {
     a.click()
     URL.revokeObjectURL(url)
 }
-
-// 监听 crepe 创建完成后绑定事件
-const initEditorEvents = () => {
-    if (!crepe) return
-    
-    try {
-        const ctx = crepe.ctx.get()
-        const view = ctx.get('view')
-        console.log('[Debug] Binding input event to editor DOM')
-        
-        // 直接在编辑器 DOM 上监听输入事件
-        view.dom.addEventListener('input', onInput)
-        view.dom.addEventListener('keydown', (e) => {
-            console.log('[Debug] Keydown:', e.key, 'code:', e.code)
-            if (e.key === 'Tab') {
-                handleTab()
-            }
-        })
-    } catch (e) {
-        console.error('[Debug] Failed to bind events:', e)
-    }
-}
-
-// 延迟初始化事件绑定
-setTimeout(initEditorEvents, 500)
 </script>
 
 <style scoped>
diff --git a/src/plugins/inlineSuggestionPlugin.ts b/src/plugins/inlineSuggestionPlugin.ts
index dc4060c..06e613c 100644
--- a/src/plugins/inlineSuggestionPlugin.ts
+++ b/src/plugins/inlineSuggestionPlugin.ts
@@ -13,25 +13,40 @@ interface InlineSuggestionOptions {
 
 function createInlineSuggestionPlugin(options: InlineSuggestionOptions = {}) {
     const apiUrl = options.apiUrl || 'http://localhost:8000/v1/completions';
+    console.log('[InlineSuggestion] Plugin initialized with API URL:', apiUrl);
 
     return new Plugin({
         key: INLINE_SUGGESTION_KEY,
         state: {
-            init: () => ({ suggestion: '', visible: false }),
+            init: () => {
+                console.log('[InlineSuggestion] State initialized');
+                return { suggestion: '', visible: false };
+            },
             apply: (tr, value) => {
-                if (!tr.docChanged) return value;
-                const { from, to } = tr.selection;
-                if (from === suggestionPos.from && to === suggestionPos.to) {
+                if (!tr.docChanged) {
+                    console.log('[InlineSuggestion] No doc change in apply, returning same state');
                     return value;
                 }
-                return { suggestion: '', visible: false };
+                const { from, to } = tr.selection;
+                console.log('[InlineSuggestion] Apply called - selection changed:', { from, to }, 'current suggestionPos:', suggestionPos);
+                if (from === suggestionPos.from && to === suggestionPos.to) {
+                    console.log('[InlineSuggestion] Selection matches suggestion position, keeping state');
+                    return value;
+                }
+                const newState = { suggestion: '', visible: false };
+                console.log('[InlineSuggestion] Resetting suggestion state');
+                return newState;
             },
         },
         props: {
             handleKeyDown: (view: EditorView, event: KeyboardEvent) => {
-                if (event.key === 'Tab' && INLINE_SUGGESTION_KEY.getState(view.state).visible) {
+                const currentState = INLINE_SUGGESTION_KEY.getState(view.state);
+                console.log('[InlineSuggestion] Key pressed:', event.key, 'suggestion visible:', currentState.visible);
+
+                if (event.key === 'Tab' && currentState.visible) {
                     event.preventDefault();
-                    const { suggestion } = INLINE_SUGGESTION_KEY.getState(view.state);
+                    const { suggestion } = currentState;
+                    console.log('[InlineSuggestion] Tab pressed - accepting suggestion:', suggestion.substring(0, 50));
                     if (suggestion) {
                         view.dispatch(view.state.tr.insertText(suggestion, view.state.selection.from));
                         currentSuggestion = '';
@@ -41,6 +56,7 @@ function createInlineSuggestionPlugin(options: InlineSuggestionOptions = {}) {
                 if (event.key === 'Escape') {
                     const state = INLINE_SUGGESTION_KEY.getState(view.state);
                     if (state.visible) {
+                        console.log('[InlineSuggestion] Escape pressed - dismissing suggestion');
                         view.dispatch(view.state.tr.setMeta(INLINE_SUGGESTION_KEY, { suggestion: '', visible: false }));
                         currentSuggestion = '';
                         return true;
@@ -51,7 +67,12 @@ function createInlineSuggestionPlugin(options: InlineSuggestionOptions = {}) {
         },
         appendTransaction: (transactions, oldState, newState) => {
             const lastTr = transactions[transactions.length - 1];
-            if (!lastTr || !lastTr.docChanged) return null;
+            if (!lastTr || !lastTr.docChanged) {
+                console.log('[InlineSuggestion] No document change in transaction');
+                return null;
+            }
+
+            console.log('[InlineSuggestion] Document changed, setting up debounce for', DEBOUNCE_MS, 'ms');
 
             clearTimeout(debounceTimer);
             debounceTimer = setTimeout(async () => {
@@ -59,40 +80,72 @@ function createInlineSuggestionPlugin(options: InlineSuggestionOptions = {}) {
                 const prefix = newState.doc.textBetween(0, from);
                 const suffix = newState.doc.textBetween(to, newState.doc.content.size);
 
+                console.log('[InlineSuggestion] Debounce fired - position:', { from, to });
+                console.log('[InlineSuggestion] Prefix length:', prefix.length, 'Suffix length:', suffix.length);
+                console.log('[InlineSuggestion] Prefix (last 100):', prefix.slice(-100));
+                console.log('[InlineSuggestion] Suffix (first 100):', suffix.slice(0, 100));
+
                 try {
+                    console.log('[InlineSuggestion] Fetching from:', apiUrl);
                     const res = await fetch(apiUrl, {
                         method: 'POST',
                         headers: { 'Content-Type': 'application/json' },
                         body: JSON.stringify({ prefix, suffix, languageId: 'markdown' }),
                     });
 
-                    if (!res.ok) return;
+                    console.log('[InlineSuggestion] Response status:', res.status);
+                    if (!res.ok) {
+                        const errorText = await res.text();
+                        console.error('[InlineSuggestion] API error:', errorText);
+                        return;
+                    }
 
                     const reader = res.body?.getReader();
-                    if (!reader) return;
+                    if (!reader) {
+                        console.error('[InlineSuggestion] No response body reader');
+                        return;
+                    }
 
                     let text = '';
+                    let chunkCount = 0;
                     while (true) {
                         const { done, value } = await reader.read();
                         if (done) break;
+                        chunkCount++;
                         const chunk = new TextDecoder().decode(value);
+                        console.log('[InlineSuggestion] Raw chunk', chunkCount, ':', chunk.substring(0, 200));
+
                         const lines = chunk.split('\n').filter(l => l.startsWith('data: '));
                         for (const line of lines) {
                             try {
                                 const data = JSON.parse(line.slice(6));
-                                if (data.content) text += data.content;
-                                if (data.done) break;
-                            } catch {}
+                                if (data.content) {
+                                    text += data.content;
+                                    console.log('[InlineSuggestion] Accumulated suggestion:', text.substring(0, 100));
+                                }
+                                if (data.done) {
+                                    console.log('[InlineSuggestion] Stream done signal received');
+                                    break;
+                                }
+                            } catch (e) {
+                                console.error('[InlineSuggestion] JSON parse error:', e);
+                            }
                         }
                     }
 
+                    console.log('[InlineSuggestion] Total chunks received:', chunkCount, 'Total text length:', text.length);
+
                     if (text && newState.selection.from === from) {
                         currentSuggestion = text;
                         suggestionPos = { from, to: from + text.length };
-                        newState.apply(newState.tr.setMeta(INLINE_SUGGESTION_KEY, { suggestion: text, visible: true }));
+                        const metaUpdate = { suggestion: text, visible: true };
+                        console.log('[InlineSuggestion] Setting suggestion:', text.substring(0, 50), '...');
+                        newState.apply(newState.tr.setMeta(INLINE_SUGGESTION_KEY, metaUpdate));
+                    } else {
+                        console.log('[InlineSuggestion] Suggestion not applied - empty text or cursor moved');
                     }
                 } catch (e) {
-                    console.error('Inline suggestion error:', e);
+                    console.error('[InlineSuggestion] Error:', e);
                 }
             }, DEBOUNCE_MS);