Add tokenization and context provider API types

- Implemented window delineation tests for indentation-based tokenization.
- Created tokenizer module with various tokenization strategies including TTokenizer and ApproximateTokenizer.
- Added type definitions for authentication parameters and code citation notifications.
- Introduced context provider API for extensions to supply additional context items to Copilot.
- Defined core types and schemas for position and range.
- Established status types for agent status management in IDEs.
This commit is contained in:
2026-01-18 10:24:32 +08:00
parent 55c1b180f7
commit ba49f82953
345 changed files with 0 additions and 13 deletions

View File

@@ -0,0 +1,163 @@
/*---------------------------------------------------------------------------------------------
* Copyright (c) Microsoft Corporation. All rights reserved.
* Licensed under the MIT License. See License.txt in the project root for license information.
*--------------------------------------------------------------------------------------------*/
import { DataConsumer, Dispatch, StateUpdater, TypePredicate } from './hooks';
import { TokenizerName } from '../tokenization';
import { CancellationToken } from 'vscode-languageserver-protocol';
// --------- Prompt component types
export type PromptComponentChild = PromptElement | string | number | undefined;
type PromptComponentChildren = PromptComponentChild[] | PromptComponentChild;
interface PromptAttributes {
[key: string]: unknown;
key?: string | number;
weight?: number;
source?: unknown;
}
export type PromptElementProps<P = object> = P & Readonly<PromptAttributes & { children?: PromptComponentChildren }>;
export interface ComponentContext {
/**
* Hook to manage component state that can change over time.
* @param initialState - Initial state value or function that returns initial state
* @returns A tuple containing current state and setter function
* @example
* function Counter(props: PromptElementProps, context: ComponentContext) {
* const [count, setCount] = context.useState(0);
* return <Text>Count: {count}</Text>;
* }
*/
useState<S = undefined>(): [S | undefined, Dispatch<StateUpdater<S | undefined>>];
useState<S>(initialState: S | (() => S)): [S, Dispatch<StateUpdater<S>>];
/**
* Hook to subscribe to typed external data streams with type checking.
* @param typePredicate - TypeScript type predicate function for runtime type checking
* @param consumer - Callback function that receives type-checked data
* @example
* function DataViewer(props: PromptElementProps, context: ComponentContext) {
* interface MessageData {
* message: string;
* }
*
* function isMessageData(data: unknown): data is MessageData {
* return typeof data === 'object' && data !== null &&
* 'message' in data && typeof (data as any).message === 'string';
* }
*
* context.useData(
* isMessageData,
* (data) => console.log(data.message)
* );
* }
*/
useData<T>(typePredicate: TypePredicate<T>, consumer: DataConsumer<T>): void;
}
export interface PromptFragment {
type: 'f';
children: PromptComponentChild[];
}
export interface FragmentFunction {
(children: PromptComponentChildren): PromptFragment;
}
export interface FunctionComponent<P = PromptAttributes> {
(props: PromptElementProps<P>, context: ComponentContext): PromptComponentChildren;
}
/**
* Data structure returned by prompt component functions and used by the `virtualize` function to construct a virtual prompt.
*/
export interface PromptElement<P = PromptAttributes> {
type: FunctionComponent<P> | FragmentFunction;
props: P & { children: PromptComponentChildren };
}
// --------- Prompt snapshot and rendering types
export interface PromptSnapshotNodeStatistics {
updateDataTimeMs?: number;
}
/**
* A prompt snapshot node is a node in the virtual prompt tree in its immutable form.
*/
export interface PromptSnapshotNode {
name: string;
path: string;
value?: string;
props?: PromptElementProps;
children?: PromptSnapshotNode[];
statistics: PromptSnapshotNodeStatistics;
}
export interface PromptRenderer<T extends Prompt, P extends PromptRenderOptions> {
render(snapshot: PromptSnapshotNode, options: P, cancellationToken?: CancellationToken): T;
}
export type PromptMetadata = {
renderId: number;
rendererName?: string;
tokenizer: string;
elisionTimeMs: number;
renderTimeMs: number;
updateDataTimeMs: number;
componentStatistics: ComponentStatistics[];
};
export type ComponentStatistics = {
componentPath: string;
expectedTokens?: number;
actualTokens?: number;
updateDataTimeMs?: number;
// This field is only used internally, and even tho we send it to CTS it's not telemetrized
source?: unknown;
};
type StatusOk = { status: 'ok' };
export type StatusNotOk = { status: 'cancelled' } | { status: 'error'; error: Error };
export type Status = StatusOk | StatusNotOk;
export type PromptOk = StatusOk & {
metadata: PromptMetadata;
};
type Prompt = PromptOk | StatusNotOk;
export interface PromptRenderOptions {
tokenizer?: TokenizerName;
delimiter?: string;
}
// --------- Components
type TextPromptComponentChild = string | number | undefined;
interface TextPromptElementProps extends PromptElementProps {
children?: TextPromptComponentChild[] | TextPromptComponentChild;
}
/**
* Basic component to represent text in a prompt.
*/
export function Text(props: TextPromptElementProps) {
if (props.children) {
if (Array.isArray(props.children)) {
return props.children.join('');
}
return props.children;
}
return;
}
/**
* Basic component to represent a group of components that gets elided all together or not at all.
*/
export function Chunk(props: PromptElementProps) {
return props.children;
}

View File

@@ -0,0 +1,67 @@
/*---------------------------------------------------------------------------------------------
* Copyright (c) Microsoft Corporation. All rights reserved.
* Licensed under the MIT License. See License.txt in the project root for license information.
*--------------------------------------------------------------------------------------------*/
export type Dispatch<A> = (value: A) => void;
export type StateUpdater<S> = S | ((prevState: S) => S);
export class UseState {
private currentIndex: number = 0;
private stateChanged: boolean = false;
constructor(private readonly states: unknown[]) { }
useState<S = undefined>(): [S | undefined, Dispatch<StateUpdater<S | undefined>>];
useState<S>(initialState: S | (() => S)): [S, Dispatch<StateUpdater<S>>];
useState<S>(initialState?: S | (() => S)): [S | undefined, Dispatch<StateUpdater<S | undefined>>] {
const index = this.currentIndex;
// Initialize state if not exists
if (this.states[index] === undefined) {
const initial = typeof initialState === 'function' ? (initialState as () => S)() : initialState;
this.states[index] = initial;
}
const setState = (newState: StateUpdater<S | undefined>) => {
const nextState =
typeof newState === 'function' ? (newState as (prevState: S) => S)(this.states[index] as S) : newState;
this.states[index] = nextState;
this.stateChanged = true;
};
this.currentIndex++;
return [this.states[index] as S, setState];
}
hasChanged(): boolean {
return this.stateChanged;
}
}
export type TypePredicate<T> = (data: unknown) => data is T;
export type DataConsumer<T> = (data: T) => void | Promise<void>;
export class UseData {
private consumers: DataConsumer<unknown>[] = [];
constructor(private readonly measureUpdateTime: (updateTimeMs: number) => void) { }
useData<T>(typePredicate: TypePredicate<T>, consumer: DataConsumer<T>): void {
this.consumers.push((data: unknown) => {
if (typePredicate(data)) {
return consumer(data);
}
});
}
async updateData(data: unknown) {
if (this.consumers.length > 0) {
const start = performance.now();
for (const consumer of this.consumers) {
await consumer(data);
}
this.measureUpdateTime(performance.now() - start);
}
}
}

View File

@@ -0,0 +1,270 @@
/*---------------------------------------------------------------------------------------------
* Copyright (c) Microsoft Corporation. All rights reserved.
* Licensed under the MIT License. See License.txt in the project root for license information.
*--------------------------------------------------------------------------------------------*/
import { CancellationToken } from 'vscode-languageserver-protocol';
import {
FragmentFunction,
FunctionComponent,
type ComponentContext,
type PromptComponentChild,
type PromptElement,
type PromptElementProps,
} from './components';
import { DataConsumer, Dispatch, StateUpdater, TypePredicate, UseData, UseState } from './hooks';
import { DataPipe } from './virtualPrompt';
/**
* A virtual prompt node is an in-memory representation of a prompt component in its rendered form.
* It is constructed from a `PromptElement` and contains the name of the component that it was constructed from, and resolved external context and state.
*/
export type VirtualPromptNode = {
name: string;
path: string;
props?: PromptElementProps;
children?: VirtualPromptNode[];
component?: PromptComponentChild;
lifecycle?: PromptElementLifecycle;
};
type VirtualPromptNodeChild = VirtualPromptNode | undefined;
/**
* Translate a `PromptComponentChild` object into a virtual prompt node.
*/
export class VirtualPromptReconciler {
private lifecycleData: Map<string, PromptElementLifecycleData> = new Map();
private vTree: VirtualPromptNode | undefined;
constructor(prompt: PromptElement) {
// Initial virtualization
this.vTree = this.virtualizeElement(prompt, '$', 0);
}
reconcile(cancellationToken?: CancellationToken): VirtualPromptNode | undefined {
if (!this.vTree) {
throw new Error('No tree to reconcile, make sure to pass a valid prompt');
}
if (cancellationToken?.isCancellationRequested) {
return this.vTree;
}
this.vTree = this.reconcileNode(this.vTree, '$', 0, cancellationToken);
return this.vTree;
}
private reconcileNode(
node: VirtualPromptNode,
parentNodePath: string,
nodeIndex: number,
cancellationToken?: CancellationToken
): VirtualPromptNodeChild {
// If the node has no children or does not have a lifecycle, return it as is (primitive nodes)
if (!node.children && !node.lifecycle) { return node; }
let newNode: VirtualPromptNodeChild = node;
const needsReconciliation = node.lifecycle?.isRemountRequired();
// If the node needs reconciliation, virtualize it again
if (needsReconciliation) {
const oldChildrenPaths = this.collectChildPaths(node);
newNode = this.virtualizeElement(node.component, parentNodePath, nodeIndex);
const newChildrenPaths = this.collectChildPaths(newNode);
this.cleanupState(oldChildrenPaths, newChildrenPaths);
// Otherwise, check if the children need reconciliation
} else if (node.children) {
const children: VirtualPromptNode[] = [];
for (let i = 0; i < node.children.length; i++) {
const child = node.children[i];
if (child) {
const reconciledChild = this.reconcileNode(child, node.path, i, cancellationToken);
if (reconciledChild !== undefined) {
children.push(reconciledChild);
}
}
}
newNode.children = children;
}
return newNode;
}
private virtualizeElement(
component: PromptComponentChild,
parentNodePath: string,
nodeIndex: number
): VirtualPromptNodeChild {
if (typeof component === 'undefined') {
return undefined;
}
if (typeof component === 'string' || typeof component === 'number') {
return {
name: typeof component,
path: `${parentNodePath}[${nodeIndex}]`,
props: { value: component },
component,
};
}
if (isFragmentFunction(component.type)) {
const fragment = component.type(component.props.children);
const indexIndicator = parentNodePath !== '$' ? `[${nodeIndex}]` : ``;
const componentPath = `${parentNodePath}${indexIndicator}.${fragment.type}`;
const children = fragment.children.map((c, i) => this.virtualizeElement(c, componentPath, i));
this.ensureUniqueKeys(children);
return {
name: fragment.type,
path: componentPath,
children: children.flat().filter(c => c !== undefined),
component,
};
}
return this.virtualizeFunctionComponent(parentNodePath, nodeIndex, component, component.type);
}
private virtualizeFunctionComponent(
parentNodePath: string,
nodeIndex: number,
component: PromptElement,
functionComponent: FunctionComponent
) {
const indexIndicator = component.props.key ? `["${component.props.key}"]` : `[${nodeIndex}]`;
const componentPath = `${parentNodePath}${indexIndicator}.${functionComponent.name}`;
const lifecycle = new PromptElementLifecycle(this.getOrCreateLifecycleData(componentPath));
const element = functionComponent(component.props, lifecycle);
const elementToVirtualize = Array.isArray(element) ? element : [element];
const virtualizedChildren = elementToVirtualize.map((e, i) => this.virtualizeElement(e, componentPath, i));
const children = virtualizedChildren.flat().filter(e => e !== undefined);
this.ensureUniqueKeys(children);
return {
name: functionComponent.name,
path: componentPath,
props: component.props,
children,
component,
lifecycle,
};
}
private ensureUniqueKeys(nodes: VirtualPromptNodeChild[]) {
const keyCount = new Map<string | number, number>();
for (const node of nodes) {
if (!node) { continue; }
const key = node.props?.key;
if (key) {
keyCount.set(key, (keyCount.get(key) || 0) + 1);
}
}
// Find all duplicates
const duplicates = Array.from(keyCount.entries())
.filter(([_, count]) => count > 1)
.map(([key]) => key);
if (duplicates.length > 0) {
throw new Error(`Duplicate keys found: ${duplicates.join(', ')}`);
}
}
private collectChildPaths(node: VirtualPromptNode | undefined) {
const paths: string[] = [];
if (node?.children) {
for (const child of node.children) {
if (child) {
paths.push(child.path);
paths.push(...this.collectChildPaths(child));
}
}
}
return paths;
}
private cleanupState(oldChildrenPaths: string[], newChildrenPaths: string[]) {
for (const path of oldChildrenPaths) {
if (!newChildrenPaths.includes(path)) {
this.lifecycleData.delete(path);
}
}
}
private getOrCreateLifecycleData(path: string) {
if (!this.lifecycleData.has(path)) {
this.lifecycleData.set(path, new PromptElementLifecycleData([]));
}
return this.lifecycleData.get(path)!;
}
createPipe(): DataPipe {
return {
pump: async (data: unknown) => {
await this.pumpData(data);
},
};
}
private async pumpData<T>(data: T) {
if (!this.vTree) {
throw new Error('No tree to pump data into. Pumping data before initializing?');
}
await this.recursivelyPumpData(data, this.vTree);
}
private async recursivelyPumpData<T>(data: T, node: VirtualPromptNode) {
if (!node) {
throw new Error(`Can't pump data into undefined node.`);
}
await node.lifecycle?.dataHook.updateData(data);
for (const child of node.children || []) {
await this.recursivelyPumpData(data, child);
}
}
}
class PromptElementLifecycleData {
state: unknown[];
_updateTimeMs: number;
constructor(state: unknown[]) {
this.state = state;
this._updateTimeMs = 0;
}
getUpdateTimeMsAndReset() {
const value = this._updateTimeMs;
this._updateTimeMs = 0;
return value;
}
}
class PromptElementLifecycle implements ComponentContext {
private readonly stateHook: UseState;
readonly dataHook: UseData;
constructor(readonly lifecycleData: PromptElementLifecycleData) {
this.stateHook = new UseState(lifecycleData.state);
this.dataHook = new UseData((updateTimeMs: number) => {
lifecycleData._updateTimeMs = updateTimeMs;
});
}
useState<S = undefined>(): [S | undefined, Dispatch<StateUpdater<S | undefined>>];
useState<S>(initialState: S | (() => S)): [S, Dispatch<StateUpdater<S>>];
useState<S>(initialState?: S | (() => S)): [S | undefined, Dispatch<StateUpdater<S | undefined>>] {
return this.stateHook.useState(initialState);
}
useData<T>(typePredicate: TypePredicate<T>, consumer: DataConsumer<T>): void {
this.dataHook.useData(typePredicate, consumer);
}
isRemountRequired(): boolean {
return this.stateHook.hasChanged();
}
}
function isFragmentFunction(element: FragmentFunction | FunctionComponent): element is FragmentFunction {
return typeof element === 'function' && 'isFragmentFunction' in element;
}

View File

@@ -0,0 +1,90 @@
/*---------------------------------------------------------------------------------------------
* Copyright (c) Microsoft Corporation. All rights reserved.
* Licensed under the MIT License. See License.txt in the project root for license information.
*--------------------------------------------------------------------------------------------*/
import type { PromptElement, PromptSnapshotNode, Status } from './components';
import { VirtualPromptNode, VirtualPromptReconciler } from './reconciler';
import { CancellationToken } from 'vscode-languageserver-protocol';
type PromptSnapshot = Status & { snapshot: PromptSnapshotNode | undefined };
/**
* The `VirtualPrompt` class holds the in-memory representation of the prompt, and is responsible for updating it with context, and generating immutable snapshots which can be passed to a prompt renderer.
*/
export class VirtualPrompt {
private reconciler: VirtualPromptReconciler;
constructor(prompt: PromptElement) {
this.reconciler = new VirtualPromptReconciler(prompt);
}
private snapshotNode(
node: VirtualPromptNode,
cancellationToken?: CancellationToken
): PromptSnapshotNode | 'cancelled' | undefined {
if (!node) {
return;
}
if (cancellationToken?.isCancellationRequested) {
return 'cancelled';
}
const children = [];
for (const child of node.children ?? []) {
const result = this.snapshotNode(child, cancellationToken);
if (result === 'cancelled') {
return 'cancelled';
}
if (result !== undefined) {
children.push(result);
}
}
return {
value: node.props?.value?.toString(),
name: node.name,
path: node.path,
props: node.props,
children,
statistics: {
updateDataTimeMs: node.lifecycle?.lifecycleData.getUpdateTimeMsAndReset(),
},
};
}
snapshot(cancellationToken?: CancellationToken): PromptSnapshot {
try {
const vTree = this.reconciler.reconcile(cancellationToken);
if (cancellationToken?.isCancellationRequested) {
return { snapshot: undefined, status: 'cancelled' };
}
if (!vTree) {
throw new Error('Invalid virtual prompt tree');
}
const snapshotNode = this.snapshotNode(vTree, cancellationToken);
if (snapshotNode === 'cancelled' || cancellationToken?.isCancellationRequested) {
return { snapshot: undefined, status: 'cancelled' };
}
return { snapshot: snapshotNode, status: 'ok' };
} catch (e) {
return { snapshot: undefined, status: 'error', error: e as Error };
}
}
createPipe(): DataPipe {
return this.reconciler.createPipe();
}
}
/**
* A data pipe is a one-way channel to get external data into the prompt. Pumping unsupported data types into the pipe will result in no-op.
*/
export interface DataPipe {
pump(data: unknown): Promise<void>;
}

View File

@@ -0,0 +1,115 @@
/*---------------------------------------------------------------------------------------------
* Copyright (c) Microsoft Corporation. All rights reserved.
* Licensed under the MIT License. See License.txt in the project root for license information.
*--------------------------------------------------------------------------------------------*/
import { Chunk, PromptSnapshotNode } from './components';
/**
* Represents the context during the traversal of a prompt snapshot tree.
* This context is passed to every node and can be modified by transformers.
*/
interface WalkContext {
/**
* Context properties that can be added by custom transformers.
*/
[key: string]: unknown;
}
/**
* A function that transforms the walking context as the tree is traversed.
* Transformers are applied in sequence before visiting each node.
*
* @param node - The current node being visited
* @param parent - The parent of the current node (undefined for root)
* @param context - The current context
* @returns A new context to be used for this node and its children
*/
export type WalkContextTransformer = (
node: PromptSnapshotNode,
parent: PromptSnapshotNode | undefined,
context: WalkContext
) => WalkContext;
/**
* A utility class for traversing a prompt snapshot tree.
* The walker applies transformers to modify the context at each node
* and calls a visitor function with the transformed context.
*/
export class SnapshotWalker {
/**
* Creates a new SnapshotWalker.
*
* @param snapshot - The root node of the snapshot tree to walk
* @param transformers - Optional array of context transformers to apply during traversal
*/
constructor(
private readonly snapshot: PromptSnapshotNode,
private readonly transformers: WalkContextTransformer[] = defaultTransformers()
) { }
/**
* Walks the snapshot tree and applies the visitor function to each node.
*
* @param visitor - Function called for each node during traversal. Return false to skip traversing children.
* @param options - Optional configuration for the walk
*/
walkSnapshot(
visitor: (n: PromptSnapshotNode, parent: PromptSnapshotNode | undefined, context: WalkContext) => boolean
) {
this.walkSnapshotNode(this.snapshot, undefined, visitor, {});
}
private walkSnapshotNode(
node: PromptSnapshotNode,
parent: PromptSnapshotNode | undefined,
visitor: (n: PromptSnapshotNode, parent: PromptSnapshotNode | undefined, context: WalkContext) => boolean,
context: WalkContext
) {
// Apply all transformers to create the new context for this node
const newContext = this.transformers.reduce((ctx, transformer) => transformer(node, parent, ctx), { ...context });
// Visit the node with the transformed context
const accept = visitor(node, parent, newContext);
if (!accept) {
return;
}
// Process children with the new context
for (const child of node.children ?? []) {
this.walkSnapshotNode(child, node, visitor, newContext);
}
}
}
export function defaultTransformers(): WalkContextTransformer[] {
return [
// Weight transformer - computes the weight of the current relative to the parent
(node, _, context) => {
if (context.weight === undefined) {
context.weight = 1;
}
const weight = node.props?.weight ?? 1;
const clampedWeight = typeof weight === 'number' ? Math.max(0, Math.min(1, weight)) : 1;
return { ...context, weight: clampedWeight * (context.weight as number) };
},
// Chunk transformer
(node, _, context) => {
if (node.name === Chunk.name) {
// Initialize chunk set if it doesn't exist
const chunks = context.chunks ? new Set<string>(context.chunks as Set<string>) : new Set<string>();
// Add current node path to the set
chunks.add(node.path);
return { ...context, chunks };
}
return context;
},
// Source transformer
(node, _, context) => {
if (node.props?.source !== undefined) {
return { ...context, source: node.props.source };
}
return context;
},
];
}