Files
llm-in-text/completions-sample-code/prompt/src/snippetInclusion/jaccardMatching.ts
ydy0615 ba49f82953 Add tokenization and context provider API types
- Implemented window delineation tests for indentation-based tokenization.
- Created tokenizer module with various tokenization strategies including TTokenizer and ApproximateTokenizer.
- Added type definitions for authentication parameters and code citation notifications.
- Introduced context provider API for extensions to supply additional context items to Copilot.
- Defined core types and schemas for position and range.
- Established status types for agent status management in IDEs.
2026-01-18 10:24:32 +08:00

57 lines
1.8 KiB
TypeScript

/*---------------------------------------------------------------------------------------------
* Copyright (c) Microsoft Corporation. All rights reserved.
* Licensed under the MIT License. See License.txt in the project root for license information.
*--------------------------------------------------------------------------------------------*/
import { DocumentInfoWithOffset } from '../prompt';
import { CursorContextInfo, getCursorContext } from './cursorContext';
import { WindowedMatcher } from './selectRelevance';
import { getBasicWindowDelineations } from './windowDelineations';
export class FixedWindowSizeJaccardMatcher extends WindowedMatcher {
private windowLength: number;
private constructor(referenceDoc: DocumentInfoWithOffset, windowLength: number) {
super(referenceDoc);
this.windowLength = windowLength;
}
static FACTORY = (windowLength: number) => {
return {
to: (referenceDoc: DocumentInfoWithOffset) => new FixedWindowSizeJaccardMatcher(referenceDoc, windowLength),
};
};
protected id(): string {
return 'fixed:' + this.windowLength;
}
protected getWindowsDelineations(lines: string[]): [number, number][] {
return getBasicWindowDelineations(this.windowLength, lines);
}
protected _getCursorContextInfo(referenceDoc: DocumentInfoWithOffset): CursorContextInfo {
return getCursorContext(referenceDoc, {
maxLineCount: this.windowLength,
});
}
protected similarityScore(a: Set<string>, b: Set<string>): number {
return computeScore(a, b);
}
}
/**
* Compute the Jaccard metric of number of elements in the intersection
* divided by number of elements in the union
*/
export function computeScore(a: Set<string>, b: Set<string>) {
const intersection = new Set();
a.forEach(x => {
if (b.has(x)) {
intersection.add(x);
}
});
return intersection.size / (a.size + b.size - intersection.size);
}