2026-02-23 15:17:36 +08:00
|
|
|
from datetime import datetime, timedelta, timezone
|
|
|
|
|
import re
|
2026-01-18 19:42:58 +08:00
|
|
|
from typing import Tuple
|
2026-02-23 15:17:36 +08:00
|
|
|
|
2026-02-16 22:49:32 +08:00
|
|
|
|
2026-02-19 10:34:31 +08:00
|
|
|
def _get_current_datetime(timezone_pref: str = "auto") -> str:
|
2026-02-23 15:17:36 +08:00
|
|
|
# Default to UTC+8 if auto or not specified.
|
2026-02-19 10:34:31 +08:00
|
|
|
offset = 8
|
|
|
|
|
tz_info = " (UTC+8)"
|
2026-02-23 15:17:36 +08:00
|
|
|
|
|
|
|
|
if timezone_pref and timezone_pref != "auto":
|
|
|
|
|
# Parse values like "UTC+8" or "GMT-5".
|
|
|
|
|
match = re.search(r"([+-])(\d+)", timezone_pref)
|
2026-02-19 10:34:31 +08:00
|
|
|
if match:
|
|
|
|
|
sign = match.group(1)
|
|
|
|
|
hours = int(match.group(2))
|
2026-02-23 15:17:36 +08:00
|
|
|
offset = hours if sign == "+" else -hours
|
2026-02-19 10:34:31 +08:00
|
|
|
tz_info = f" ({timezone_pref})"
|
|
|
|
|
else:
|
|
|
|
|
tz_info = f" ({timezone_pref})"
|
|
|
|
|
|
|
|
|
|
now = datetime.now(timezone(timedelta(hours=offset)))
|
2026-02-23 15:17:36 +08:00
|
|
|
weekdays = [
|
|
|
|
|
"Monday",
|
|
|
|
|
"Tuesday",
|
|
|
|
|
"Wednesday",
|
|
|
|
|
"Thursday",
|
|
|
|
|
"Friday",
|
|
|
|
|
"Saturday",
|
|
|
|
|
"Sunday",
|
|
|
|
|
]
|
2026-02-16 22:49:32 +08:00
|
|
|
weekday = weekdays[now.weekday()]
|
2026-02-23 15:17:36 +08:00
|
|
|
return (
|
|
|
|
|
f"{now.year}-{now.month:02d}-{now.day:02d} "
|
|
|
|
|
f"{weekday} {now.hour:02d}:{now.minute:02d}:{now.second:02d}{tz_info}"
|
|
|
|
|
)
|
|
|
|
|
|
2026-01-18 19:42:58 +08:00
|
|
|
|
2026-02-14 18:28:37 +08:00
|
|
|
def _sanitize_language_id(language_id: str) -> str:
|
|
|
|
|
if not language_id:
|
|
|
|
|
return "markdown"
|
|
|
|
|
allowed = []
|
|
|
|
|
for ch in language_id.strip():
|
|
|
|
|
if ch.isalnum() or ch in "-_+.":
|
|
|
|
|
allowed.append(ch)
|
|
|
|
|
value = "".join(allowed)[:32]
|
|
|
|
|
return value or "markdown"
|
|
|
|
|
|
|
|
|
|
|
2026-02-23 15:17:36 +08:00
|
|
|
def _normalize_newlines(text: str) -> str:
|
|
|
|
|
return (text or "").replace("\r\n", "\n").replace("\r", "\n")
|
|
|
|
|
|
|
|
|
|
|
2026-02-14 18:28:37 +08:00
|
|
|
def _prepare_context(prefix: str, suffix: str) -> Tuple[str, str]:
|
2026-01-18 19:42:58 +08:00
|
|
|
"""
|
2026-02-14 18:28:37 +08:00
|
|
|
Prepare prefix/suffix for model completion context.
|
2026-02-23 15:17:36 +08:00
|
|
|
Filter out potential web-scraping or legacy artifacts like <br>, <br/>, <br\\>.
|
2026-01-18 19:42:58 +08:00
|
|
|
"""
|
2026-02-23 15:17:36 +08:00
|
|
|
br_pattern = re.compile(r"<br\s*/?\s*\\?>", re.IGNORECASE)
|
|
|
|
|
clean_prefix = br_pattern.sub("", prefix or "")
|
|
|
|
|
clean_suffix = br_pattern.sub("", suffix or "")
|
2026-02-19 11:14:43 +08:00
|
|
|
return clean_prefix, clean_suffix
|
2026-02-15 15:44:09 +08:00
|
|
|
|
|
|
|
|
|
2026-02-23 15:17:36 +08:00
|
|
|
FENCE_LINE_RE = re.compile(r"^[ \t]*```.*$")
|
2026-02-25 19:00:17 +08:00
|
|
|
FENCE_INFO_RE = re.compile(r"^[ \t]*```[ \t]*(.*)$")
|
|
|
|
|
MERMAID_CONTEXT_RE = re.compile(
|
|
|
|
|
r"```[ \t]*mermaid\b|"
|
|
|
|
|
r"\b(flowchart|sequencediagram|classdiagram|statediagram(?:-v2)?|"
|
|
|
|
|
r"erdiagram|journey|gantt|pie|mindmap|timeline|gitgraph|quadrantchart|xychart-beta)\b|"
|
|
|
|
|
r"\bgraph[ \t]+(TD|TB|BT|RL|LR)\b",
|
|
|
|
|
re.IGNORECASE,
|
|
|
|
|
)
|
2026-02-23 15:17:36 +08:00
|
|
|
|
|
|
|
|
|
|
|
|
|
def _cursor_in_fenced_code_block(prefix: str) -> bool:
|
|
|
|
|
"""
|
|
|
|
|
Determine whether the cursor is currently inside a fenced code block.
|
|
|
|
|
The state is computed by toggling on each markdown fence line that matches:
|
|
|
|
|
^[ \t]*```.*$
|
|
|
|
|
"""
|
2026-02-25 19:00:17 +08:00
|
|
|
return _active_fence_language(prefix) != "none"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _active_fence_language(prefix: str) -> str:
|
|
|
|
|
"""
|
|
|
|
|
Return active fence language at cursor based on prefix.
|
|
|
|
|
- "none": cursor is outside fenced code block
|
|
|
|
|
- "unknown": cursor is inside a fence without language tag
|
|
|
|
|
- "<language>": cursor is inside a fenced block with language tag
|
|
|
|
|
"""
|
2026-02-23 15:17:36 +08:00
|
|
|
normalized = _normalize_newlines(prefix)
|
|
|
|
|
in_fence = False
|
2026-02-25 19:00:17 +08:00
|
|
|
active_language = "none"
|
2026-02-23 15:17:36 +08:00
|
|
|
for line in normalized.split("\n"):
|
|
|
|
|
if FENCE_LINE_RE.match(line):
|
2026-02-25 19:00:17 +08:00
|
|
|
if in_fence:
|
|
|
|
|
in_fence = False
|
|
|
|
|
active_language = "none"
|
|
|
|
|
else:
|
|
|
|
|
info_match = FENCE_INFO_RE.match(line)
|
|
|
|
|
info = info_match.group(1).strip() if info_match else ""
|
|
|
|
|
if not info:
|
|
|
|
|
active_language = "unknown"
|
|
|
|
|
else:
|
|
|
|
|
first_token = info.split()[0]
|
|
|
|
|
lang_chars = []
|
|
|
|
|
for ch in first_token.strip():
|
|
|
|
|
if ch.isalnum() or ch in "-_+.":
|
|
|
|
|
lang_chars.append(ch)
|
|
|
|
|
active_language = "".join(lang_chars)[:32].lower() or "unknown"
|
|
|
|
|
in_fence = True
|
|
|
|
|
return active_language if in_fence else "none"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _is_mermaid_context(prefix: str, suffix: str, cursor_fence_language: str) -> bool:
|
|
|
|
|
if cursor_fence_language == "mermaid":
|
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
|
prefix_tail = (prefix or "")[-1200:]
|
|
|
|
|
suffix_head = (suffix or "")[:400]
|
|
|
|
|
combined = f"{prefix_tail}\n{suffix_head}"
|
|
|
|
|
return MERMAID_CONTEXT_RE.search(combined) is not None
|
2026-02-23 15:17:36 +08:00
|
|
|
|
|
|
|
|
|
2026-02-15 15:44:09 +08:00
|
|
|
def prepare_prompt_context(prefix: str, suffix: str) -> Tuple[str, str]:
|
|
|
|
|
return _prepare_context(prefix, suffix)
|
2026-02-14 18:28:37 +08:00
|
|
|
|
|
|
|
|
|
2026-03-14 18:20:39 +08:00
|
|
|
LANGUAGE_SYNONYMS = {
|
|
|
|
|
"md": "markdown",
|
|
|
|
|
"markdown": "markdown",
|
|
|
|
|
"txt": "text",
|
|
|
|
|
"text": "text",
|
|
|
|
|
"plain": "text",
|
|
|
|
|
"plaintext": "text",
|
|
|
|
|
"py": "python",
|
|
|
|
|
"python": "python",
|
|
|
|
|
"js": "javascript",
|
|
|
|
|
"javascript": "javascript",
|
|
|
|
|
"jsx": "javascript",
|
|
|
|
|
"node": "javascript",
|
|
|
|
|
"ts": "typescript",
|
|
|
|
|
"tsx": "typescript",
|
|
|
|
|
"typescript": "typescript",
|
|
|
|
|
"json": "json",
|
|
|
|
|
"jsonc": "json",
|
|
|
|
|
"json5": "json",
|
|
|
|
|
"yaml": "yaml",
|
|
|
|
|
"yml": "yaml",
|
|
|
|
|
"toml": "toml",
|
|
|
|
|
"ini": "ini",
|
|
|
|
|
"cfg": "ini",
|
|
|
|
|
"bash": "bash",
|
|
|
|
|
"shell": "bash",
|
|
|
|
|
"sh": "bash",
|
|
|
|
|
"zsh": "bash",
|
|
|
|
|
"fish": "bash",
|
|
|
|
|
"ps": "powershell",
|
|
|
|
|
"ps1": "powershell",
|
|
|
|
|
"powershell": "powershell",
|
|
|
|
|
"sql": "sql",
|
|
|
|
|
"postgres": "sql",
|
|
|
|
|
"postgresql": "sql",
|
|
|
|
|
"mysql": "sql",
|
|
|
|
|
"sqlite": "sql",
|
|
|
|
|
"html": "html",
|
|
|
|
|
"xml": "xml",
|
|
|
|
|
"svg": "xml",
|
|
|
|
|
"css": "css",
|
|
|
|
|
"scss": "css",
|
|
|
|
|
"less": "css",
|
|
|
|
|
"latex": "latex",
|
|
|
|
|
"tex": "latex",
|
|
|
|
|
"katex": "latex",
|
|
|
|
|
"mermaid": "mermaid",
|
|
|
|
|
"c": "c",
|
|
|
|
|
"c++": "cpp",
|
|
|
|
|
"cpp": "cpp",
|
|
|
|
|
"cxx": "cpp",
|
|
|
|
|
"h": "c",
|
|
|
|
|
"hpp": "cpp",
|
|
|
|
|
"c#": "csharp",
|
|
|
|
|
"cs": "csharp",
|
|
|
|
|
"csharp": "csharp",
|
|
|
|
|
"go": "go",
|
|
|
|
|
"golang": "go",
|
|
|
|
|
"rust": "rust",
|
|
|
|
|
"rs": "rust",
|
|
|
|
|
"java": "java",
|
|
|
|
|
"kotlin": "kotlin",
|
|
|
|
|
"swift": "swift",
|
|
|
|
|
"ruby": "ruby",
|
|
|
|
|
"rb": "ruby",
|
|
|
|
|
"php": "php",
|
|
|
|
|
"lua": "lua",
|
|
|
|
|
"r": "r",
|
|
|
|
|
"matlab": "matlab",
|
|
|
|
|
"dart": "dart",
|
|
|
|
|
"docker": "dockerfile",
|
|
|
|
|
"dockerfile": "dockerfile",
|
|
|
|
|
"make": "makefile",
|
|
|
|
|
"makefile": "makefile",
|
|
|
|
|
"diff": "diff",
|
|
|
|
|
"patch": "diff",
|
|
|
|
|
"regex": "regex",
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _canonical_language_id(language_id: str) -> str:
|
|
|
|
|
safe = _sanitize_language_id(language_id).lower()
|
|
|
|
|
if not safe:
|
|
|
|
|
return "markdown"
|
|
|
|
|
return LANGUAGE_SYNONYMS.get(safe, safe)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _language_guidance(language_id: str) -> str:
|
|
|
|
|
canonical = _canonical_language_id(language_id)
|
|
|
|
|
if canonical == "markdown":
|
|
|
|
|
return ""
|
|
|
|
|
if canonical == "mermaid":
|
|
|
|
|
return """
|
|
|
|
|
Language-specific guidance (mermaid):
|
|
|
|
|
- Output valid Mermaid syntax only.
|
|
|
|
|
- Prefer concise, syntactically correct diagram statements.
|
|
|
|
|
- Avoid prose unless the user prompt explicitly requires it."""
|
|
|
|
|
if canonical == "latex":
|
|
|
|
|
return """
|
|
|
|
|
Language-specific guidance (latex):
|
|
|
|
|
- Output LaTeX math content only when completing LaTeX.
|
|
|
|
|
- If CURSOR_IN_FENCED_CODE_BLOCK=true and CURSOR_FENCE_LANGUAGE is latex/tex/katex:
|
|
|
|
|
- Output raw LaTeX lines only.
|
|
|
|
|
- Do not wrap with $ or $$."""
|
|
|
|
|
if canonical == "json":
|
|
|
|
|
return """
|
|
|
|
|
Language-specific guidance (json):
|
|
|
|
|
- Output strict JSON only (no comments, no trailing commas).
|
|
|
|
|
- Ensure valid quotes and braces."""
|
|
|
|
|
if canonical == "yaml":
|
|
|
|
|
return """
|
|
|
|
|
Language-specific guidance (yaml):
|
|
|
|
|
- Output valid YAML only.
|
|
|
|
|
- Use consistent indentation and avoid tabs."""
|
|
|
|
|
if canonical == "toml":
|
|
|
|
|
return """
|
|
|
|
|
Language-specific guidance (toml):
|
|
|
|
|
- Output valid TOML only.
|
|
|
|
|
- Keep key types consistent."""
|
|
|
|
|
if canonical == "ini":
|
|
|
|
|
return """
|
|
|
|
|
Language-specific guidance (ini):
|
|
|
|
|
- Output valid INI only.
|
|
|
|
|
- Keep section headers and key=value pairs consistent."""
|
|
|
|
|
if canonical == "sql":
|
|
|
|
|
return """
|
|
|
|
|
Language-specific guidance (sql):
|
|
|
|
|
- Output a single, valid SQL statement unless context requires multiple.
|
|
|
|
|
- Prefer ANSI SQL when dialect is unclear."""
|
|
|
|
|
if canonical == "bash":
|
|
|
|
|
return """
|
|
|
|
|
Language-specific guidance (bash):
|
|
|
|
|
- Output POSIX-compatible shell when possible.
|
|
|
|
|
- Avoid interactive prompts or destructive commands unless requested."""
|
|
|
|
|
if canonical == "powershell":
|
|
|
|
|
return """
|
|
|
|
|
Language-specific guidance (powershell):
|
|
|
|
|
- Output valid PowerShell commands.
|
|
|
|
|
- Avoid destructive commands unless explicitly requested."""
|
|
|
|
|
if canonical == "html":
|
|
|
|
|
return """
|
|
|
|
|
Language-specific guidance (html):
|
|
|
|
|
- Output valid HTML only.
|
|
|
|
|
- Keep markup minimal and well-formed."""
|
|
|
|
|
if canonical == "css":
|
|
|
|
|
return """
|
|
|
|
|
Language-specific guidance (css):
|
|
|
|
|
- Output valid CSS only.
|
|
|
|
|
- Use concise, readable selectors."""
|
|
|
|
|
if canonical == "diff":
|
|
|
|
|
return """
|
|
|
|
|
Language-specific guidance (diff):
|
|
|
|
|
- Output a unified diff only.
|
|
|
|
|
- Ensure @@ hunk headers and +/- lines are consistent."""
|
|
|
|
|
if canonical == "regex":
|
|
|
|
|
return """
|
|
|
|
|
Language-specific guidance (regex):
|
|
|
|
|
- Output the regex pattern only.
|
|
|
|
|
- Avoid delimiters unless explicitly requested."""
|
|
|
|
|
if canonical in {"javascript", "typescript"}:
|
|
|
|
|
return f"""
|
|
|
|
|
Language-specific guidance ({canonical}):
|
|
|
|
|
- Output valid {canonical} code.
|
|
|
|
|
- Prefer modern syntax and avoid prose unless comments are needed."""
|
|
|
|
|
if canonical in {"python", "go", "rust", "java", "kotlin", "swift", "ruby", "php", "lua", "c", "cpp", "csharp", "r", "matlab", "dart"}:
|
|
|
|
|
return f"""
|
|
|
|
|
Language-specific guidance ({canonical}):
|
|
|
|
|
- Output valid {canonical} code.
|
|
|
|
|
- Avoid prose unless context clearly expects comments or docstrings."""
|
|
|
|
|
if canonical == "text":
|
|
|
|
|
return """
|
|
|
|
|
Language-specific guidance (text):
|
|
|
|
|
- Output plain text only.
|
|
|
|
|
- Avoid markdown formatting unless explicitly asked."""
|
|
|
|
|
if canonical == "xml":
|
|
|
|
|
return """
|
|
|
|
|
Language-specific guidance (xml):
|
|
|
|
|
- Output well-formed XML only.
|
|
|
|
|
- Ensure matching tags and proper escaping."""
|
|
|
|
|
if canonical == "dockerfile":
|
|
|
|
|
return """
|
|
|
|
|
Language-specific guidance (dockerfile):
|
|
|
|
|
- Output valid Dockerfile instructions only.
|
|
|
|
|
- Keep layers minimal and ordered logically."""
|
|
|
|
|
if canonical == "makefile":
|
|
|
|
|
return """
|
|
|
|
|
Language-specific guidance (makefile):
|
|
|
|
|
- Output valid Makefile syntax only.
|
|
|
|
|
- Use tabs for recipe lines."""
|
|
|
|
|
return f"""
|
|
|
|
|
Language-specific guidance ({canonical}):
|
|
|
|
|
- Output valid {canonical} code.
|
|
|
|
|
- Avoid prose unless context clearly expects comments or docstrings."""
|
|
|
|
|
|
|
|
|
|
|
2026-02-23 15:17:36 +08:00
|
|
|
def build_inline_system_prompt(language_id: str = "markdown") -> str:
|
2026-03-14 18:20:39 +08:00
|
|
|
safe_language_id = _canonical_language_id(language_id)
|
|
|
|
|
language_guidance = _language_guidance(safe_language_id)
|
|
|
|
|
|
2026-02-23 15:17:36 +08:00
|
|
|
system_prompt = f"""You are an inline completion engine for a {safe_language_id} editor with ghost-text suggestions.
|
|
|
|
|
|
|
|
|
|
Return only the insertion text that should be placed between PREFIX and SUFFIX.
|
|
|
|
|
|
|
|
|
|
Hard constraints you must follow:
|
|
|
|
|
1) Output-only contract:
|
|
|
|
|
- Output insertion text only.
|
|
|
|
|
- No explanations, no meta labels, no wrapper quotes around the whole answer.
|
|
|
|
|
|
|
|
|
|
2) Strict math formatting (KaTeX):
|
|
|
|
|
- If you output any math expression, it must be strict KaTeX-compatible math.
|
|
|
|
|
- Every formula must be wrapped with either $...$ (inline) or $$...$$ (block).
|
|
|
|
|
- Never output bare formulas without $ or $$ wrappers.
|
2026-03-14 18:20:39 +08:00
|
|
|
- Exception: If CURSOR_IN_FENCED_CODE_BLOCK=true and CURSOR_FENCE_LANGUAGE is latex/tex/katex,
|
|
|
|
|
output raw LaTeX without $ or $$ wrappers.
|
2026-02-23 15:17:36 +08:00
|
|
|
|
|
|
|
|
3) Strict code formatting:
|
|
|
|
|
- Read CURSOR_IN_FENCED_CODE_BLOCK from the user prompt.
|
|
|
|
|
- If CURSOR_IN_FENCED_CODE_BLOCK=true:
|
|
|
|
|
- You are already inside a fenced code block.
|
|
|
|
|
- Never output triple backticks.
|
|
|
|
|
- Output code lines only.
|
|
|
|
|
- If CURSOR_IN_FENCED_CODE_BLOCK=false:
|
|
|
|
|
- Any code output must be in a fenced code block with a language tag:
|
|
|
|
|
```{{language}}
|
|
|
|
|
...
|
|
|
|
|
```
|
|
|
|
|
- Do not output code snippets as inline backticks.
|
|
|
|
|
- Choose the language tag from context (no default fallback tag instruction).
|
|
|
|
|
|
2026-02-25 19:00:17 +08:00
|
|
|
4) Mermaid-specific completion rules:
|
|
|
|
|
- Read CURSOR_FENCE_LANGUAGE and MERMAID_CONTEXT from the user prompt.
|
|
|
|
|
- If CURSOR_FENCE_LANGUAGE=mermaid:
|
|
|
|
|
- Output Mermaid statements only.
|
|
|
|
|
- Never output triple backticks.
|
|
|
|
|
- Never output prose explanations.
|
|
|
|
|
- If CURSOR_IN_FENCED_CODE_BLOCK=false and MERMAID_CONTEXT=true:
|
|
|
|
|
- Output a complete Mermaid fenced block:
|
|
|
|
|
```mermaid
|
|
|
|
|
...
|
|
|
|
|
```
|
|
|
|
|
- Keep Mermaid syntax valid and concise.
|
|
|
|
|
- Never mix Mermaid code and explanatory narration in one output.
|
|
|
|
|
|
|
|
|
|
5) Boundary newline repair:
|
2026-02-23 15:17:36 +08:00
|
|
|
- Read PREFIX_ENDS_WITH_NEWLINE and SUFFIX_STARTS_WITH_NEWLINE from the user prompt.
|
|
|
|
|
- Carefully reason about whether OUTPUT should start or end with a newline.
|
|
|
|
|
- If PREFIX lacks a required boundary newline, add it at OUTPUT start.
|
|
|
|
|
- If SUFFIX lacks a required boundary newline, add it at OUTPUT end.
|
|
|
|
|
- Ensure PREFIX + OUTPUT + SUFFIX is structurally natural.
|
|
|
|
|
|
2026-02-25 19:00:17 +08:00
|
|
|
6) Context stitching:
|
2026-02-23 15:17:36 +08:00
|
|
|
- Do not repeat text that already appears at the start of SUFFIX.
|
|
|
|
|
- Preserve nearby language, tone, punctuation, indentation, and markdown structure.
|
|
|
|
|
- Continue existing structures naturally (lists, tables, block quotes, headings).
|
|
|
|
|
|
2026-02-25 19:00:17 +08:00
|
|
|
7) OCR safety:
|
2026-02-23 15:17:36 +08:00
|
|
|
- PREFIX may include hidden OCR metadata tags like <OCR:...>.
|
|
|
|
|
- Never output any OCR tag.
|
2026-02-25 19:00:17 +08:00
|
|
|
- Never output OCR tag fragments such as <OCR:...>."""
|
2026-03-14 18:20:39 +08:00
|
|
|
|
|
|
|
|
if language_guidance:
|
|
|
|
|
system_prompt = f"{system_prompt.rstrip()}\n{language_guidance.strip()}"
|
|
|
|
|
|
2026-02-23 15:17:36 +08:00
|
|
|
return system_prompt.strip()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
INLINE_EXAMPLES = """[EX01] Prose continuation
|
|
|
|
|
<PREFIX>The quick brown fox </PREFIX>
|
|
|
|
|
<SUFFIX>jumps over the lazy dog.</SUFFIX>
|
|
|
|
|
Expected OUTPUT:
|
|
|
|
|
moved quietly and then
|
|
|
|
|
|
|
|
|
|
[EX02] Avoid repeating suffix beginning
|
|
|
|
|
<PREFIX>Our launch plan starts with </PREFIX>
|
|
|
|
|
<SUFFIX>phase one, followed by phase two.</SUFFIX>
|
|
|
|
|
Expected OUTPUT:
|
|
|
|
|
careful internal testing before
|
|
|
|
|
|
|
|
|
|
[EX03] Continue markdown checklist
|
|
|
|
|
<PREFIX>## TODO
|
|
|
|
|
- [ ] Buy milk
|
|
|
|
|
- [ ] </PREFIX>
|
|
|
|
|
<SUFFIX></SUFFIX>
|
|
|
|
|
Expected OUTPUT:
|
|
|
|
|
Write release notes and share draft with team
|
|
|
|
|
|
|
|
|
|
[EX04] Cursor outside code block, code must use fenced block
|
|
|
|
|
CURSOR_IN_FENCED_CODE_BLOCK=false
|
|
|
|
|
<PREFIX>Parse this JSON payload in Python:</PREFIX>
|
|
|
|
|
<SUFFIX></SUFFIX>
|
|
|
|
|
Expected OUTPUT:
|
|
|
|
|
```python
|
|
|
|
|
import json
|
|
|
|
|
data = json.loads(payload)
|
|
|
|
|
```
|
|
|
|
|
|
|
|
|
|
[EX05] Cursor inside fenced code block, do not output fences
|
|
|
|
|
CURSOR_IN_FENCED_CODE_BLOCK=true
|
|
|
|
|
<PREFIX>```python
|
|
|
|
|
def add(a, b):
|
|
|
|
|
return </PREFIX>
|
|
|
|
|
<SUFFIX>
|
|
|
|
|
```</SUFFIX>
|
|
|
|
|
Expected OUTPUT:
|
|
|
|
|
a + b
|
|
|
|
|
|
|
|
|
|
[EX06] Inline math must use $...$
|
|
|
|
|
<PREFIX>The derivative of x^2 is </PREFIX>
|
|
|
|
|
<SUFFIX>.</SUFFIX>
|
|
|
|
|
Expected OUTPUT:
|
|
|
|
|
$2x$
|
|
|
|
|
|
|
|
|
|
[EX07] Block math must use $$...$$
|
|
|
|
|
<PREFIX>We can write the Gaussian integral as:</PREFIX>
|
|
|
|
|
<SUFFIX></SUFFIX>
|
|
|
|
|
Expected OUTPUT:
|
|
|
|
|
$$
|
|
|
|
|
\\int_{-\\infty}^{\\infty} e^{-x^2}\\,dx = \\sqrt{\\pi}
|
|
|
|
|
$$
|
|
|
|
|
|
|
|
|
|
[EX08] Prefix misses boundary newline; add newline at output start
|
|
|
|
|
PREFIX_ENDS_WITH_NEWLINE=false
|
|
|
|
|
<PREFIX>Deployment steps:</PREFIX>
|
|
|
|
|
<SUFFIX></SUFFIX>
|
|
|
|
|
Expected OUTPUT:
|
|
|
|
|
|
|
|
|
|
- Build artifact
|
|
|
|
|
- Deploy service
|
|
|
|
|
|
|
|
|
|
[EX09] Suffix misses boundary newline; add newline at output end
|
|
|
|
|
SUFFIX_STARTS_WITH_NEWLINE=false
|
|
|
|
|
<PREFIX>Summary paragraph complete.</PREFIX>
|
|
|
|
|
<SUFFIX>## Next Section</SUFFIX>
|
|
|
|
|
Expected OUTPUT:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
[EX10] OCR metadata exists but must never be emitted
|
|
|
|
|
<PREFIX> <OCR:equation y = mx + b>
|
|
|
|
|
The relationship is </PREFIX>
|
|
|
|
|
<SUFFIX>.</SUFFIX>
|
|
|
|
|
Expected OUTPUT:
|
|
|
|
|
$y = mx + b$
|
|
|
|
|
|
|
|
|
|
[EX11] Continue markdown table with correct row shape
|
|
|
|
|
<PREFIX>| Name | Score |
|
|
|
|
|
| --- | --- |
|
|
|
|
|
| Alice | 92 |
|
|
|
|
|
| Bob | </PREFIX>
|
|
|
|
|
<SUFFIX></SUFFIX>
|
|
|
|
|
Expected OUTPUT:
|
|
|
|
|
88 |
|
|
|
|
|
|
|
|
|
|
[EX12] Mixed text + math + code in one insertion
|
|
|
|
|
CURSOR_IN_FENCED_CODE_BLOCK=false
|
|
|
|
|
<PREFIX>Use the area formula and provide a tiny JS helper.</PREFIX>
|
|
|
|
|
<SUFFIX></SUFFIX>
|
|
|
|
|
Expected OUTPUT:
|
|
|
|
|
The area is $A = \\pi r^2$.
|
|
|
|
|
|
|
|
|
|
```javascript
|
|
|
|
|
const area = (r) => Math.PI * r * r;
|
2026-02-25 19:00:17 +08:00
|
|
|
```
|
|
|
|
|
|
|
|
|
|
[EX13] Cursor inside mermaid fence: no backticks, mermaid lines only
|
|
|
|
|
CURSOR_IN_FENCED_CODE_BLOCK=true
|
|
|
|
|
CURSOR_FENCE_LANGUAGE=mermaid
|
|
|
|
|
<PREFIX>```mermaid
|
|
|
|
|
flowchart TD
|
|
|
|
|
A[Start] --> </PREFIX>
|
|
|
|
|
<SUFFIX>
|
|
|
|
|
```</SUFFIX>
|
|
|
|
|
Expected OUTPUT:
|
|
|
|
|
B{Valid?}
|
|
|
|
|
B -->|Yes| C[Done]
|
|
|
|
|
|
|
|
|
|
[EX14] Mermaid context outside fence: return full mermaid block
|
|
|
|
|
CURSOR_IN_FENCED_CODE_BLOCK=false
|
|
|
|
|
MERMAID_CONTEXT=true
|
|
|
|
|
<PREFIX>Please provide a simple release pipeline diagram.</PREFIX>
|
|
|
|
|
<SUFFIX></SUFFIX>
|
|
|
|
|
Expected OUTPUT:
|
|
|
|
|
```mermaid
|
|
|
|
|
flowchart LR
|
|
|
|
|
Build --> Test --> Deploy
|
2026-02-23 15:17:36 +08:00
|
|
|
```"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def build_completion_prompts(
|
|
|
|
|
prefix: str,
|
|
|
|
|
suffix: str,
|
|
|
|
|
language_id: str = "markdown",
|
2026-02-19 10:22:27 +08:00
|
|
|
location: str = "",
|
|
|
|
|
thinking_level: str = "low",
|
2026-02-23 15:17:36 +08:00
|
|
|
preferences: object = None,
|
|
|
|
|
) -> Tuple[str, str]:
|
2026-03-14 18:20:39 +08:00
|
|
|
safe_language_id = _canonical_language_id(language_id)
|
2026-02-14 18:28:37 +08:00
|
|
|
recent_prefix, recent_suffix = _prepare_context(prefix, suffix)
|
2026-02-23 15:17:36 +08:00
|
|
|
recent_prefix = _normalize_newlines(recent_prefix)
|
|
|
|
|
recent_suffix = _normalize_newlines(recent_suffix)
|
|
|
|
|
|
2026-02-25 19:00:17 +08:00
|
|
|
cursor_fence_language = _active_fence_language(recent_prefix)
|
|
|
|
|
cursor_in_fenced_code_block = cursor_fence_language != "none"
|
|
|
|
|
mermaid_context = _is_mermaid_context(
|
|
|
|
|
recent_prefix, recent_suffix, cursor_fence_language
|
|
|
|
|
)
|
2026-02-23 15:17:36 +08:00
|
|
|
prefix_ends_with_newline = recent_prefix.endswith("\n")
|
|
|
|
|
suffix_starts_with_newline = recent_suffix.startswith("\n")
|
|
|
|
|
|
2026-02-19 10:34:31 +08:00
|
|
|
tz_pref = preferences.timezone if preferences else "auto"
|
|
|
|
|
current_time = _get_current_datetime(tz_pref)
|
2026-02-18 08:59:28 +08:00
|
|
|
location_info = f"\nUser location: {location}" if location else ""
|
2026-02-23 15:17:36 +08:00
|
|
|
|
2026-02-19 10:22:27 +08:00
|
|
|
pref_info = []
|
|
|
|
|
if preferences:
|
2026-02-23 15:17:36 +08:00
|
|
|
if preferences.language and preferences.language != "auto":
|
2026-02-19 10:22:27 +08:00
|
|
|
pref_info.append(f"Preferred language: {preferences.language}")
|
2026-02-23 15:17:36 +08:00
|
|
|
if preferences.currency and preferences.currency != "auto":
|
2026-02-19 10:22:27 +08:00
|
|
|
pref_info.append(f"Preferred currency: {preferences.currency}")
|
2026-02-23 15:17:36 +08:00
|
|
|
|
2026-02-19 10:22:27 +08:00
|
|
|
preferences_instruction = "\n".join(pref_info)
|
|
|
|
|
if preferences_instruction:
|
|
|
|
|
preferences_instruction = f"\nUser Preferences:\n{preferences_instruction}"
|
|
|
|
|
|
2026-02-23 15:17:36 +08:00
|
|
|
user_prompt = f"""Current time: {current_time}{location_info}{preferences_instruction}
|
|
|
|
|
Reasoning hint: {thinking_level}
|
|
|
|
|
Editor language id: {safe_language_id}
|
2026-02-14 18:28:37 +08:00
|
|
|
|
2026-02-23 15:17:36 +08:00
|
|
|
Completion state flags:
|
|
|
|
|
- CURSOR_IN_FENCED_CODE_BLOCK: {"true" if cursor_in_fenced_code_block else "false"}
|
2026-02-25 19:00:17 +08:00
|
|
|
- CURSOR_FENCE_LANGUAGE: {cursor_fence_language}
|
|
|
|
|
- MERMAID_CONTEXT: {"true" if mermaid_context else "false"}
|
2026-02-23 15:17:36 +08:00
|
|
|
- PREFIX_ENDS_WITH_NEWLINE: {"true" if prefix_ends_with_newline else "false"}
|
|
|
|
|
- SUFFIX_STARTS_WITH_NEWLINE: {"true" if suffix_starts_with_newline else "false"}
|
2026-02-14 18:28:37 +08:00
|
|
|
|
2026-02-23 15:17:36 +08:00
|
|
|
Task:
|
|
|
|
|
- Produce the best insertion text at the cursor between PREFIX and SUFFIX.
|
|
|
|
|
- Keep insertion meaningful and non-empty.
|
|
|
|
|
- Keep insertion concise unless structure requires more content.
|
2026-02-13 21:17:45 +08:00
|
|
|
|
2026-02-23 15:17:36 +08:00
|
|
|
Context notes:
|
|
|
|
|
- PREFIX may include OCR metadata after image markdown, e.g.  <OCR:description>.
|
|
|
|
|
- OCR metadata is hidden context and must never be copied into output.
|
|
|
|
|
- Preserve local style and formatting.
|
|
|
|
|
|
|
|
|
|
Decision policy:
|
|
|
|
|
- Prioritize seamless join: PREFIX + OUTPUT + SUFFIX must read naturally.
|
|
|
|
|
- Do not repeat SUFFIX-leading text.
|
|
|
|
|
- If uncertain, prefer a complete short phrase/sentence with clear meaning.
|
|
|
|
|
|
|
|
|
|
Comprehensive examples:
|
|
|
|
|
{INLINE_EXAMPLES}
|
2026-02-13 21:17:45 +08:00
|
|
|
|
2026-02-14 18:28:37 +08:00
|
|
|
Now produce the insertion.
|
2026-01-18 19:42:58 +08:00
|
|
|
|
2026-02-13 21:17:45 +08:00
|
|
|
<PREFIX>
|
|
|
|
|
{recent_prefix}
|
|
|
|
|
</PREFIX>
|
2026-01-18 19:42:58 +08:00
|
|
|
|
2026-02-13 21:17:45 +08:00
|
|
|
<SUFFIX>
|
|
|
|
|
{recent_suffix}
|
|
|
|
|
</SUFFIX>
|
2026-02-07 08:53:37 +08:00
|
|
|
|
2026-02-13 21:17:45 +08:00
|
|
|
Output:"""
|
2026-02-14 18:28:37 +08:00
|
|
|
|
2026-02-23 15:17:36 +08:00
|
|
|
system_prompt = build_inline_system_prompt(safe_language_id)
|
|
|
|
|
return system_prompt.strip(), user_prompt.strip()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def build_prompt(
|
|
|
|
|
prefix: str,
|
|
|
|
|
suffix: str,
|
|
|
|
|
language_id: str = "markdown",
|
|
|
|
|
location: str = "",
|
|
|
|
|
thinking_level: str = "low",
|
|
|
|
|
preferences: object = None,
|
|
|
|
|
) -> str:
|
|
|
|
|
"""
|
|
|
|
|
Backward-compatible helper. Returns only the user prompt body.
|
|
|
|
|
"""
|
|
|
|
|
_, user_prompt = build_completion_prompts(
|
|
|
|
|
prefix=prefix,
|
|
|
|
|
suffix=suffix,
|
|
|
|
|
language_id=language_id,
|
|
|
|
|
location=location,
|
|
|
|
|
thinking_level=thinking_level,
|
|
|
|
|
preferences=preferences,
|
|
|
|
|
)
|
|
|
|
|
return user_prompt
|