feat: add theme management with light and dark modes

- Implemented a new composable `useTheme` for managing theme state. - Added functions to read and write theme preference to local storage. - Applied theme styles to the DOM based on user preference. - Introduced a toggle function to switch between light and dark themes. refactor: enhance copilot plugin functionality - Improved request handling with sequence and document versioning. - Refactored ghost text handling to improve clarity and efficiency. - Updated markdown insertion logic to handle parsed content more robustly. - Enhanced error handling and logging for better debugging. style: update global styles for light and dark themes - Defined CSS variables for light and dark themes to streamline styling. - Improved overall styling consistency and responsiveness. - Added transitions for smoother theme changes and interactions.
2026-02-15 15:44:09 +08:00
parent 03bb21d5c6
commit 838eec30a8
205 changed files with 1868 additions and 344 deletions
--- a/backend/pycache/llm.cpython-310.pyc
+++ b/backend/pycache/llm.cpython-310.pyc
--- a/backend/pycache/prompt.cpython-310.pyc
+++ b/backend/pycache/prompt.cpython-310.pyc
--- a/backend/llm.py
+++ b/backend/llm.py
@@ -1,6 +1,7 @@
 import os
 import time
 import logging
+from datetime import datetime
 import ollama
 from dotenv import load_dotenv

@@ -58,6 +59,7 @@ async def call_ollama(prompt: str, *, tag: str = "default", temperature: float =
    调用 Ollama API 并返回 content 和 thinking。
    """
    start = time.perf_counter()
+    start_dt = datetime.now()
    logger.info(
        "[LLM][%s] request model=%s host=%s prompt_chars=%d temp=%.2f",
        tag,
@@ -79,11 +81,25 @@ async def call_ollama(prompt: str, *, tag: str = "default", temperature: float =
        )
    except Exception:
        elapsed_ms = (time.perf_counter() - start) * 1000
+        end_dt = datetime.now()
+        logger.info(
+            "[LLM][%s] call_time [%s --> %s]",
+            tag,
+            start_dt.strftime("%H:%M:%S"),
+            end_dt.strftime("%H:%M:%S"),
+        )
        logger.exception("[LLM][%s] request failed after %.1fms", tag, elapsed_ms)
        raise

    content, thinking = _extract_message(response)
    elapsed_ms = (time.perf_counter() - start) * 1000
+    end_dt = datetime.now()
+    logger.info(
+        "[LLM][%s] call_time [%s --> %s]",
+        tag,
+        start_dt.strftime("%H:%M:%S"),
+        end_dt.strftime("%H:%M:%S"),
+    )
    logger.info(
        "[LLM][%s] response in %.1fms response_type=%s content_chars=%d thinking_chars=%d",
        tag,
@@ -100,6 +116,7 @@ async def call_ollama(prompt: str, *, tag: str = "default", temperature: float =

 async def call_vlm_ocr(image_bytes: bytes, language: str = 'auto') -> str:
    start = time.perf_counter()
+    start_dt = datetime.now()
    logger.info(
        "[VLM][ocr] request model=%s host=%s image_bytes=%d language=%s",
        VLM_MODEL,
@@ -121,11 +138,23 @@ async def call_vlm_ocr(image_bytes: bytes, language: str = 'auto') -> str:
        )
    except Exception:
        elapsed_ms = (time.perf_counter() - start) * 1000
+        end_dt = datetime.now()
+        logger.info(
+            "[VLM][ocr] call_time [%s --> %s]",
+            start_dt.strftime("%H:%M:%S"),
+            end_dt.strftime("%H:%M:%S"),
+        )
        logger.exception("[VLM][ocr] request failed after %.1fms", elapsed_ms)
        raise

    content, thinking = _extract_message(response)
    elapsed_ms = (time.perf_counter() - start) * 1000
+    end_dt = datetime.now()
+    logger.info(
+        "[VLM][ocr] call_time [%s --> %s]",
+        start_dt.strftime("%H:%M:%S"),
+        end_dt.strftime("%H:%M:%S"),
+    )
    logger.info(
        "[VLM][ocr] response in %.1fms response_type=%s content_chars=%d thinking_chars=%d",
        elapsed_ms,
--- a/backend/main.py
+++ b/backend/main.py
@@ -7,7 +7,7 @@ import base64
 import uuid
 import logging

-from prompt import build_prompt
+from prompt import build_prompt, prepare_prompt_context
 from llm import call_ollama, call_vlm_ocr

 logging.basicConfig(
@@ -43,17 +43,6 @@ def _preview(text: str, limit: int = 80) -> str:
        return value
    return value[:limit] + "..."

-
-def _build_force_non_empty_prompt(base_prompt: str) -> str:
-    return (
-        base_prompt
-        + "\n\nStrict override for this request:\n"
-        + "- Output must be non-empty.\n"
-        + "- If you would otherwise output empty, output a single space.\n"
-        + "- Keep it short and do not repeat SUFFIX.\n"
-    )
-
-
@app.post("/v1/completions")
 async def create_completion(request: CompletionRequest):
    request_id = str(uuid.uuid4())[:8]
@@ -67,27 +56,18 @@ async def create_completion(request: CompletionRequest):
            _preview((request.prefix or "")[-120:]),
            _preview((request.suffix or "")[:120]),
        )
+        llm_prefix, llm_suffix = prepare_prompt_context(request.prefix or "", request.suffix or "")
+        logger.info("[%s] llm_input_prefix=%r", request_id, llm_prefix)
+        logger.info("[%s] llm_input_suffix=%r", request_id, llm_suffix)
        prompt = build_prompt(request.prefix, request.suffix, request.languageId)
        result = await call_ollama(prompt, tag=f"{request_id}-primary", temperature=0.7)

        content = result["content"] or ""
-        source = "primary"
        if not content.strip():
-            logger.warning("[%s] primary returned empty content, starting retry", request_id)
-            retry_prompt = _build_force_non_empty_prompt(prompt)
-            retry_result = await call_ollama(retry_prompt, tag=f"{request_id}-retry1", temperature=0.4)
-            content = retry_result["content"] or ""
-            source = "retry1"
-
-        if not content.strip():
-            content = " "
-            source = "fallback-space"
-            logger.warning("[%s] retry still empty, forcing single-space fallback", request_id)
-
+            logger.warning("[%s] primary returned empty content, returning empty result", request_id)
        logger.info(
-            "[%s] completion resolved source=%s content_chars=%d content_preview='%s'",
+            "[%s] completion resolved source=primary content_chars=%d content_preview='%s'",
            request_id,
-            source,
            len(content),
            _preview(content, 120),
        )
--- a/backend/prompt.py
+++ b/backend/prompt.py
@@ -1,9 +1,5 @@
 from typing import Tuple

-MAX_PREFIX_CHARS = 12000
-MAX_SUFFIX_CHARS = 4000
-
-
 def _sanitize_language_id(language_id: str) -> str:
    if not language_id:
        return "markdown"
@@ -18,12 +14,12 @@ def _sanitize_language_id(language_id: str) -> str:
 def _prepare_context(prefix: str, suffix: str) -> Tuple[str, str]:
    """
    Prepare prefix/suffix for model completion context.
-    Keep the historical one-char lookahead behavior to reduce boundary drift.
    """
-    if suffix:
-        prefix = prefix + suffix[0]
-        suffix = suffix[1:]
-    return prefix[-MAX_PREFIX_CHARS:], suffix[:MAX_SUFFIX_CHARS]
+    return prefix, suffix
+
+
+def prepare_prompt_context(prefix: str, suffix: str) -> Tuple[str, str]:
+    return _prepare_context(prefix, suffix)


 def build_prompt(prefix: str, suffix: str, language_id: str = "markdown") -> str:
@@ -50,8 +46,8 @@ Hard rules:
   Do NOT repeat text that already appears at the start of SUFFIX.
 3. Balanced length:
   Prefer concise but meaningful continuation, not ultra-short fragments.
-   Default target is 20-120 characters and 1-3 lines.
-   You may go shorter only when syntax requires it.
+   Default target is 20-120 characters and 1-3 lines for plain prose.
+   You may be longer when structure requires it (lists, tables, code blocks, math blocks).
 4. Avoid trivial output:
   Do not output only punctuation or filler such as ".", ",", ";", ":".
   Do not output just one token unless it is structurally necessary.
@@ -60,10 +56,12 @@ Hard rules:
 6. Markdown awareness:
   Continue active list/checkbox/ordered-list patterns when applicable.
   Preserve indentation in nested list/code contexts.
+   You may output full markdown structures when context needs them: headings, lists, tables, fenced code blocks, blockquotes, and LaTeX ($...$ / $$...$$).
   Close obvious unclosed inline markdown markers only when needed to bridge.
 7. Strict output format:
   Output insertion text only.
-   No explanations, labels, quotes, or code fences.
+   No explanations, labels, or wrapper quotes around the whole output.
+   Markdown syntax is allowed when it is the intended insertion (including fenced code blocks and LaTeX).

 Decision policy:
 - If PREFIX already connects naturally to SUFFIX, add a brief but useful continuation when possible.
@@ -91,3 +89,6 @@ Now produce the insertion.
 Output:"""

    return prompt.strip()
+
+
+