Add tokenization and context provider API types

- Implemented window delineation tests for indentation-based tokenization.
- Created tokenizer module with various tokenization strategies including TTokenizer and ApproximateTokenizer.
- Added type definitions for authentication parameters and code citation notifications.
- Introduced context provider API for extensions to supply additional context items to Copilot.
- Defined core types and schemas for position and range.
- Established status types for agent status management in IDEs.
This commit is contained in:
2026-01-18 10:24:32 +08:00
parent 55c1b180f7
commit ba49f82953
345 changed files with 0 additions and 13 deletions

View File

@@ -0,0 +1,749 @@
/*---------------------------------------------------------------------------------------------
* Copyright (c) Microsoft Corporation. All rights reserved.
* Licensed under the MIT License. See License.txt in the project root for license information.
*--------------------------------------------------------------------------------------------*/
// This file is generated by running 'npm run generate_languages'
// a map of all known languages (see languageMarkers) with their extensions and filenames as they are defined in linguist
export const knownLanguages: { [language: string]: { extensions: string[]; filenames?: string[] } } = {
abap: {
extensions: ['.abap'],
},
aspdotnet: {
extensions: ['.asax', '.ascx', '.ashx', '.asmx', '.aspx', '.axd'],
},
bat: {
extensions: ['.bat', '.cmd'],
},
bibtex: {
extensions: ['.bib', '.bibtex'],
},
blade: {
extensions: ['.blade', '.blade.php'],
},
BluespecSystemVerilog: {
extensions: ['.bsv'],
},
c: {
extensions: ['.c', '.cats', '.h', '.h.in', '.idc'],
},
csharp: {
extensions: ['.cake', '.cs', '.cs.pp', '.csx', '.linq'],
},
cpp: {
extensions: [
'.c++',
'.cc',
'.cp',
'.cpp',
'.cppm',
'.cxx',
'.h',
'.h++',
'.hh',
'.hpp',
'.hxx',
'.idl',
'.inc',
'.inl',
'.ino',
'.ipp',
'.ixx',
'.rc',
'.re',
'.tcc',
'.tpp',
'.txx',
'.i',
],
},
cobol: {
extensions: ['.cbl', '.ccp', '.cob', '.cobol', '.cpy'],
},
css: {
extensions: ['.css', '.wxss'],
},
clojure: {
extensions: ['.bb', '.boot', '.cl2', '.clj', '.cljc', '.cljs', '.cljs.hl', '.cljscm', '.cljx', '.edn', '.hic'],
filenames: ['riemann.config'],
},
ql: {
extensions: ['.ql', '.qll'],
},
coffeescript: {
extensions: ['._coffee', '.cake', '.cjsx', '.coffee', '.iced'],
filenames: ['Cakefile'],
},
cuda: {
extensions: ['.cu', '.cuh'],
},
dart: {
extensions: ['.dart'],
},
dockerfile: {
extensions: ['.containerfile', '.dockerfile'],
filenames: ['Containerfile', 'Dockerfile'],
},
dotenv: {
extensions: ['.env'],
filenames: [
'.env',
'.env.ci',
'.env.dev',
'.env.development',
'.env.development.local',
'.env.example',
'.env.local',
'.env.prod',
'.env.production',
'.env.sample',
'.env.staging',
'.env.test',
'.env.testing',
],
},
html: {
extensions: [
'.ect',
'.ejs',
'.ejs.t',
'.jst',
'.hta',
'.htm',
'.html',
'.html.hl',
'.html5',
'.inc',
'.jsp',
'.njk',
'.tpl',
'.twig',
'.wxml',
'.xht',
'.xhtml',
'.phtml',
'.liquid',
],
},
elixir: {
extensions: ['.ex', '.exs'],
filenames: ['mix.lock'],
},
erlang: {
extensions: ['.app', '.app.src', '.erl', '.es', '.escript', '.hrl', '.xrl', '.yrl'],
filenames: ['Emakefile', 'rebar.config', 'rebar.config.lock', 'rebar.lock'],
},
fsharp: {
extensions: ['.fs', '.fsi', '.fsx'],
},
go: {
extensions: ['.go'],
},
groovy: {
extensions: ['.gradle', '.groovy', '.grt', '.gtpl', '.gvy', '.jenkinsfile'],
filenames: ['Jenkinsfile', 'Jenkinsfile'],
},
graphql: {
extensions: ['.gql', '.graphql', '.graphqls'],
},
terraform: {
extensions: ['.hcl', '.nomad', '.tf', '.tfvars', '.workflow'],
},
hlsl: {
extensions: ['.cginc', '.fx', '.fxh', '.hlsl', '.hlsli'],
},
erb: {
extensions: ['.erb', '.erb.deface', '.rhtml'],
},
razor: {
extensions: ['.cshtml', '.razor'],
},
haml: {
extensions: ['.haml', '.haml.deface'],
},
handlebars: {
extensions: ['.handlebars', '.hbs'],
},
haskell: {
extensions: ['.hs', '.hs-boot', '.hsc'],
},
ini: {
extensions: ['.cfg', '.cnf', '.dof', '.ini', '.lektorproject', '.prefs', '.pro', '.properties', '.url'],
filenames: [
'.buckconfig',
'.coveragerc',
'.flake8',
'.pylintrc',
'HOSTS',
'buildozer.spec',
'hosts',
'pylintrc',
'vlcrc',
],
},
json: {
extensions: [
'.4DForm',
'.4DProject',
'.JSON-tmLanguage',
'.avsc',
'.geojson',
'.gltf',
'.har',
'.ice',
'.json',
'.json.example',
'.jsonl',
'.mcmeta',
'.sarif',
'.tact',
'.tfstate',
'.tfstate.backup',
'.topojson',
'.webapp',
'.webmanifest',
'.yy',
'.yyp',
],
filenames: [
'.all-contributorsrc',
'.arcconfig',
'.auto-changelog',
'.c8rc',
'.htmlhintrc',
'.imgbotconfig',
'.nycrc',
'.tern-config',
'.tern-project',
'.watchmanconfig',
'MODULE.bazel.lock',
'Package.resolved',
'Pipfile.lock',
'bun.lock',
'composer.lock',
'deno.lock',
'flake.lock',
'mcmod.info',
],
},
jsonc: {
extensions: [
'.code-snippets',
'.code-workspace',
'.jsonc',
'.sublime-build',
'.sublime-color-scheme',
'.sublime-commands',
'.sublime-completions',
'.sublime-keymap',
'.sublime-macro',
'.sublime-menu',
'.sublime-mousemap',
'.sublime-project',
'.sublime-settings',
'.sublime-theme',
'.sublime-workspace',
'.sublime_metrics',
'.sublime_session',
],
filenames: [
'.babelrc',
'.devcontainer.json',
'.eslintrc.json',
'.jscsrc',
'.jshintrc',
'.jslintrc',
'.swcrc',
'api-extractor.json',
'argv.json',
'devcontainer.json',
'extensions.json',
'jsconfig.json',
'keybindings.json',
'language-configuration.json',
'launch.json',
'profiles.json',
'settings.json',
'tasks.json',
'tsconfig.json',
'tslint.json',
],
},
java: {
extensions: ['.jav', '.java', '.jsh'],
},
javascript: {
extensions: [
'._js',
'.bones',
'.cjs',
'.es',
'.es6',
'.frag',
'.gs',
'.jake',
'.javascript',
'.js',
'.jsb',
'.jscad',
'.jsfl',
'.jslib',
'.jsm',
'.jspre',
'.jss',
'.mjs',
'.njs',
'.pac',
'.sjs',
'.ssjs',
'.xsjs',
'.xsjslib',
],
filenames: ['Jakefile'],
},
julia: {
extensions: ['.jl'],
},
kotlin: {
extensions: ['.kt', '.ktm', '.kts'],
},
less: {
extensions: ['.less'],
},
lua: {
extensions: ['.fcgi', '.lua', '.luau', '.nse', '.p8', '.pd_lua', '.rbxs', '.rockspec', '.wlua'],
filenames: ['.luacheckrc'],
},
makefile: {
extensions: ['.d', '.mak', '.make', '.makefile', '.mk', '.mkfile'],
filenames: [
'BSDmakefile',
'GNUmakefile',
'Kbuild',
'Makefile',
'Makefile.am',
'Makefile.boot',
'Makefile.frag',
'Makefile.in',
'Makefile.inc',
'Makefile.wat',
'makefile',
'makefile.sco',
'mkfile',
],
},
markdown: {
extensions: [
'.livemd',
'.markdown',
'.md',
'.mdown',
'.mdwn',
'.mdx',
'.mkd',
'.mkdn',
'.mkdown',
'.ronn',
'.scd',
'.workbook',
],
filenames: ['contents.lr'],
},
'objective-c': {
extensions: ['.h', '.m'],
},
'objective-cpp': {
extensions: ['.mm'],
},
php: {
extensions: [
'.aw',
'.ctp',
'.fcgi',
'.inc',
'.install',
'.module',
'.php',
'.php3',
'.php4',
'.php5',
'.phps',
'.phpt',
'.theme',
],
filenames: ['.php', '.php_cs', '.php_cs.dist', 'Phakefile'],
},
perl: {
extensions: ['.al', '.cgi', '.fcgi', '.perl', '.ph', '.pl', '.plx', '.pm', '.psgi', '.t'],
filenames: ['.latexmkrc', 'Makefile.PL', 'Rexfile', 'ack', 'cpanfile', 'latexmkrc'],
},
powershell: {
extensions: ['.ps1', '.psd1', '.psm1'],
},
pug: {
extensions: ['.jade', '.pug'],
},
python: {
extensions: [
'.cgi',
'.codon',
'.fcgi',
'.gyp',
'.gypi',
'.lmi',
'.py',
'.py3',
'.pyde',
'.pyi',
'.pyp',
'.pyt',
'.pyw',
'.rpy',
'.sage',
'.spec',
'.tac',
'.wsgi',
'.xpy',
],
filenames: ['.gclient', 'DEPS', 'SConscript', 'SConstruct', 'wscript'],
},
r: {
extensions: ['.r', '.rd', '.rsx'],
filenames: ['.Rprofile', 'expr-dist'],
},
ruby: {
extensions: [
'.builder',
'.eye',
'.fcgi',
'.gemspec',
'.god',
'.jbuilder',
'.mspec',
'.pluginspec',
'.podspec',
'.prawn',
'.rabl',
'.rake',
'.rb',
'.rbi',
'.rbuild',
'.rbw',
'.rbx',
'.ru',
'.ruby',
'.spec',
'.thor',
'.watchr',
],
filenames: [
'.irbrc',
'.pryrc',
'.simplecov',
'Appraisals',
'Berksfile',
'Brewfile',
'Buildfile',
'Capfile',
'Dangerfile',
'Deliverfile',
'Fastfile',
'Gemfile',
'Guardfile',
'Jarfile',
'Mavenfile',
'Podfile',
'Puppetfile',
'Rakefile',
'Snapfile',
'Steepfile',
'Thorfile',
'Vagrantfile',
'buildfile',
],
},
rust: {
extensions: ['.rs', '.rs.in'],
},
scss: {
extensions: ['.scss'],
},
sql: {
extensions: ['.cql', '.ddl', '.inc', '.mysql', '.prc', '.sql', '.tab', '.udf', '.viw'],
},
sass: {
extensions: ['.sass'],
},
scala: {
extensions: ['.kojo', '.sbt', '.sc', '.scala'],
},
shellscript: {
extensions: [
'.bash',
'.bats',
'.cgi',
'.command',
'.fcgi',
'.fish',
'.ksh',
'.sh',
'.sh.in',
'.tmux',
'.tool',
'.trigger',
'.zsh',
'.zsh-theme',
],
filenames: [
'.bash_aliases',
'.bash_functions',
'.bash_history',
'.bash_logout',
'.bash_profile',
'.bashrc',
'.cshrc',
'.envrc',
'.flaskenv',
'.kshrc',
'.login',
'.profile',
'.tmux.conf',
'.zlogin',
'.zlogout',
'.zprofile',
'.zshenv',
'.zshrc',
'9fs',
'PKGBUILD',
'bash_aliases',
'bash_logout',
'bash_profile',
'bashrc',
'cshrc',
'gradlew',
'kshrc',
'login',
'man',
'profile',
'tmux.conf',
'zlogin',
'zlogout',
'zprofile',
'zshenv',
'zshrc',
],
},
slang: {
extensions: ['.fxc', '.hlsl', '.s', '.slang', '.slangh', '.usf', '.ush', '.vfx'],
},
slim: {
extensions: ['.slim'],
},
solidity: {
extensions: ['.sol'],
},
stylus: {
extensions: ['.styl'],
},
svelte: {
extensions: ['.svelte'],
},
swift: {
extensions: ['.swift'],
},
systemverilog: {
extensions: ['.sv', '.svh', '.vh'],
},
typescriptreact: {
extensions: ['.tsx'],
},
latex: {
extensions: [
'.aux',
'.bbx',
'.cbx',
'.cls',
'.dtx',
'.ins',
'.lbx',
'.ltx',
'.mkii',
'.mkiv',
'.mkvi',
'.sty',
'.tex',
'.toc',
],
},
typescript: {
extensions: ['.cts', '.mts', '.ts'],
},
verilog: {
extensions: ['.v', '.veo'],
},
vim: {
extensions: ['.vba', '.vim', '.vimrc', '.vmb'],
filenames: ['.exrc', '.gvimrc', '.nvimrc', '.vimrc', '_vimrc', 'gvimrc', 'nvimrc', 'vimrc'],
},
vb: {
extensions: ['.vb', '.vbhtml', '.Dsr', '.bas', '.cls', '.ctl', '.frm', '.vbs'],
},
vue: {
extensions: ['.nvue', '.vue'],
},
xml: {
extensions: [
'.adml',
'.admx',
'.ant',
'.axaml',
'.axml',
'.builds',
'.ccproj',
'.ccxml',
'.clixml',
'.cproject',
'.cscfg',
'.csdef',
'.csl',
'.csproj',
'.ct',
'.depproj',
'.dita',
'.ditamap',
'.ditaval',
'.dll.config',
'.dotsettings',
'.filters',
'.fsproj',
'.fxml',
'.glade',
'.gml',
'.gmx',
'.gpx',
'.grxml',
'.gst',
'.hzp',
'.iml',
'.ivy',
'.jelly',
'.jsproj',
'.kml',
'.launch',
'.mdpolicy',
'.mjml',
'.mod',
'.mojo',
'.mxml',
'.natvis',
'.ncl',
'.ndproj',
'.nproj',
'.nuspec',
'.odd',
'.osm',
'.pkgproj',
'.plist',
'.pluginspec',
'.proj',
'.props',
'.ps1xml',
'.psc1',
'.pt',
'.pubxml',
'.qhelp',
'.rdf',
'.res',
'.resx',
'.rss',
'.sch',
'.scxml',
'.sfproj',
'.shproj',
'.srdf',
'.storyboard',
'.sublime-snippet',
'.svg',
'.sw',
'.targets',
'.tml',
'.typ',
'.ui',
'.urdf',
'.ux',
'.vbproj',
'.vcxproj',
'.vsixmanifest',
'.vssettings',
'.vstemplate',
'.vxml',
'.wixproj',
'.workflow',
'.wsdl',
'.wsf',
'.wxi',
'.wxl',
'.wxs',
'.x3d',
'.xacro',
'.xaml',
'.xib',
'.xlf',
'.xliff',
'.xmi',
'.xml',
'.xml.dist',
'.xmp',
'.xproj',
'.xsd',
'.xspec',
'.xul',
'.zcml',
],
filenames: [
'.classpath',
'.cproject',
'.project',
'App.config',
'NuGet.config',
'Settings.StyleCop',
'Web.Debug.config',
'Web.Release.config',
'Web.config',
'packages.config',
],
},
xsl: {
extensions: ['.xsl', '.xslt'],
},
yaml: {
extensions: [
'.mir',
'.reek',
'.rviz',
'.sublime-syntax',
'.syntax',
'.yaml',
'.yaml-tmlanguage',
'.yaml.sed',
'.yml',
'.yml.mysql',
],
filenames: [
'.clang-format',
'.clang-tidy',
'.clangd',
'.gemrc',
'CITATION.cff',
'glide.lock',
'pixi.lock',
'yarn.lock',
],
},
javascriptreact: {
extensions: ['.jsx'],
},
legend: {
extensions: ['.pure'],
},
};

View File

@@ -0,0 +1,147 @@
/*---------------------------------------------------------------------------------------------
* Copyright (c) Microsoft Corporation. All rights reserved.
* Licensed under the MIT License. See License.txt in the project root for license information.
*--------------------------------------------------------------------------------------------*/
import { knownLanguages } from './generatedLanguages';
import {
knownFileExtensions,
knownTemplateLanguageExtensions,
templateLanguageLimitations,
} from './languages';
import { basename } from '../util/uri';
import * as path from 'node:path';
export class Language {
constructor(
readonly languageId: string,
readonly isGuess: boolean,
readonly fileExtension: string
) { }
}
interface LanguageDetectionInput {
languageId: string;
uri: string;
}
export abstract class LanguageDetection {
abstract detectLanguage(doc: LanguageDetectionInput): Language;
}
type LanguageIdWithGuessing = { languageId: string; isGuess: boolean };
const knownExtensions = new Map<string, string[]>();
const knownFilenames = new Map<string, string[]>();
for (const [languageId, { extensions, filenames }] of Object.entries(knownLanguages)) {
for (const extension of extensions) {
knownExtensions.set(extension, [...(knownExtensions.get(extension) ?? []), languageId]);
}
for (const filename of filenames ?? []) {
knownFilenames.set(filename, [...(knownFilenames.get(filename) ?? []), languageId]);
}
}
class FilenameAndExensionLanguageDetection extends LanguageDetection {
detectLanguage(doc: LanguageDetectionInput): Language {
const filename = basename(doc.uri);
const extension = path.extname(filename).toLowerCase();
const extensionWithoutTemplate = this.extensionWithoutTemplateLanguage(filename, extension);
const languageIdWithGuessing = this.detectLanguageId(filename, extensionWithoutTemplate);
const ext = this.computeFullyQualifiedExtension(extension, extensionWithoutTemplate);
if (!languageIdWithGuessing) {
return new Language(doc.languageId, true, ext);
}
return new Language(languageIdWithGuessing.languageId, languageIdWithGuessing.isGuess, ext);
}
private extensionWithoutTemplateLanguage(filename: string, extension: string): string {
if (knownTemplateLanguageExtensions.includes(extension)) {
const filenameWithoutExtension = filename.substring(0, filename.lastIndexOf('.'));
const extensionWithoutTemplate = path.extname(filenameWithoutExtension).toLowerCase();
const isTemplateLanguage =
extensionWithoutTemplate.length > 0 &&
knownFileExtensions.includes(extensionWithoutTemplate) &&
this.isExtensionValidForTemplateLanguage(extension, extensionWithoutTemplate);
if (isTemplateLanguage) {
return extensionWithoutTemplate;
}
}
return extension;
}
private isExtensionValidForTemplateLanguage(extension: string, extensionWithoutTemplate: string): boolean {
const limitations = templateLanguageLimitations[extension];
return !limitations || limitations.includes(extensionWithoutTemplate);
}
private detectLanguageId(filename: string, extension: string): LanguageIdWithGuessing | undefined {
if (knownFilenames.has(filename)) {
return { languageId: knownFilenames.get(filename)![0], isGuess: false };
}
const extensionCandidates = knownExtensions.get(extension) ?? [];
if (extensionCandidates.length > 0) {
return { languageId: extensionCandidates[0], isGuess: extensionCandidates.length > 1 };
}
while (filename.includes('.')) {
filename = filename.replace(/\.[^.]*$/, '');
if (knownFilenames.has(filename)) {
return { languageId: knownFilenames.get(filename)![0], isGuess: false };
}
}
}
private computeFullyQualifiedExtension(extension: string, extensionWithoutTemplate: string): string {
if (extension !== extensionWithoutTemplate) {
return extensionWithoutTemplate + extension;
}
return extension;
}
}
// This class is used to group similar languages together.
// The main drawback of trying to keep them apart is that for related files (e.g. header files),
// the language detection might be wrong and thus features like neighbor tabs might not work as expected.
// In the end, this feature should be moved to neighborTabs.ts (but that's hard to do behind a feature flag)
class GroupingLanguageDetection extends LanguageDetection {
constructor(private readonly delegate: LanguageDetection) {
super();
}
detectLanguage(doc: LanguageDetectionInput): Language {
const language = this.delegate.detectLanguage(doc);
const languageId = language.languageId;
if (languageId === 'c' || languageId === 'cpp') {
return new Language('cpp', language.isGuess, language.fileExtension);
}
return language;
}
}
class ClientProvidedLanguageDetection extends LanguageDetection {
constructor(private readonly delegate: LanguageDetection) {
super();
}
detectLanguage(doc: LanguageDetectionInput): Language {
if (doc.uri.startsWith('untitled:') || doc.uri.startsWith('vscode-notebook-cell:')) {
return new Language(doc.languageId, true, '');
}
return this.delegate.detectLanguage(doc);
}
}
export const languageDetection = new GroupingLanguageDetection(
new ClientProvidedLanguageDetection(new FilenameAndExensionLanguageDetection())
);
export function detectLanguage({ uri, languageId }: { uri: string; languageId: string }): string;
export function detectLanguage({ uri }: { uri: string }): string | undefined;
export function detectLanguage({ uri, languageId }: { uri: string; languageId?: string }) {
const language = languageDetection.detectLanguage({ uri, languageId: 'UNKNOWN' });
if (language.languageId === 'UNKNOWN') {
return languageId;
}
return language.languageId;
}

View File

@@ -0,0 +1,34 @@
/*---------------------------------------------------------------------------------------------
* Copyright (c) Microsoft Corporation. All rights reserved.
* Licensed under the MIT License. See License.txt in the project root for license information.
*--------------------------------------------------------------------------------------------*/
import { knownLanguages } from './generatedLanguages';
export const knownTemplateLanguageExtensions = [
'.ejs',
'.erb',
'.haml',
'.hbs',
'.j2',
'.jinja',
'.jinja2',
'.liquid',
'.mustache',
'.njk',
'.php',
'.pug',
'.slim',
'.webc',
];
export const templateLanguageLimitations: { [extension: string]: string[] } = {
'.php': ['.blade'],
};
export type LanguageInfo = {
extensions: string[];
filenames?: string[];
};
export const knownFileExtensions = Object.keys(knownLanguages).flatMap(language => knownLanguages[language].extensions);

View File

@@ -0,0 +1,26 @@
/*---------------------------------------------------------------------------------------------
* Copyright (c) Microsoft Corporation. All rights reserved.
* Licensed under the MIT License. See License.txt in the project root for license information.
*--------------------------------------------------------------------------------------------*/
import { knownLanguages } from '../generatedLanguages';
import { languageMarkers } from '../../../../prompt/src/languageMarker';
import * as assert from 'assert';
suite('generated languages', function () {
// tex exists as latex and tex in language markers
// jsx exists as jsx and javascriptreact in language markers. However jsx is never detected according to telemetry data
// vue-html will be detected as html
const ignoredMappings = ['jsx', 'tex', 'vue-html'];
for (const marker in languageMarkers) {
if (!ignoredMappings.includes(marker)) {
test(`'${marker}' is generated`, function () {
assert.ok(
marker in knownLanguages,
'language for comment marker ' + marker + ' has not been generated'
);
});
}
}
});

View File

@@ -0,0 +1,212 @@
/*---------------------------------------------------------------------------------------------
* Copyright (c) Microsoft Corporation. All rights reserved.
* Licensed under the MIT License. See License.txt in the project root for license information.
*--------------------------------------------------------------------------------------------*/
import * as assert from 'assert';
import { createTextDocument } from '../../test/textDocument';
import { Language, LanguageDetection, languageDetection } from '../languageDetection';
suite('language detection', function () {
test('reuse languages for untitled documents', function () {
assert.deepStrictEqual(
languageDetection.detectLanguage({ uri: 'untitled:///abc', languageId: 'typescript' }),
new Language('typescript', true, '')
);
});
test('normalizes "c" to "cpp" for untitled documents', function () {
assert.deepStrictEqual(
languageDetection.detectLanguage({ uri: 'untitled:///abc', languageId: 'c' }),
new Language('cpp', true, '')
);
});
test('reuse languages for notebook documents', function () {
assert.deepStrictEqual(
languageDetection.detectLanguage({ uri: 'vscode-notebook-cell:/abc', languageId: 'typescript' }).languageId,
'typescript'
);
});
const toDetectByExtension: [string, string][] = [
['.ts', 'typescript'],
['.js', 'javascript'],
['.jsx', 'javascriptreact'],
['.tsx', 'typescriptreact'],
['.html', 'html'],
['.html5', 'html'],
['.css', 'css'],
['.scss', 'scss'],
['.less', 'less'],
['.jsonc', 'jsonc'],
['.json', 'json'],
['.xml', 'xml'],
['.yml', 'yaml'],
['.yaml', 'yaml'],
['.php', 'php'],
['.py', 'python'],
['.rb', 'ruby'],
['.go', 'go'],
['.java', 'java'],
['.cs', 'csharp'],
['.cpp', 'cpp'],
['.c', 'cpp'],
['.C', 'cpp'],
['.h', 'cpp'],
['.sh', 'shellscript'],
['.bash', 'shellscript'],
['.sql', 'sql'],
['.swift', 'swift'],
['.vb', 'vb'],
['.frm', 'vb'],
['.lua', 'lua'],
['.tex', 'latex'],
['.md', 'markdown'],
['.markdown', 'markdown'],
['.r', 'r'],
['.R', 'r'],
['.blade.php', 'blade'],
['.BLADE.php', 'blade'],
['.gradle', 'groovy'],
['.gradle.kts', 'kotlin'],
['.ejs', 'html'],
['.liquid', 'html'],
['.yml.erb', 'yaml'],
['.yml.njk', 'yaml'],
['.some.file.yml.njk', 'yaml'],
['.phtml', 'html'],
['f.sourcecode.php', 'php'],
['.plist', 'xml'],
['.svg', 'xml'],
['.jsp', 'html'],
['.code-workspace', 'jsonc'],
['.wxss', 'css'],
['.luau', 'lua'],
['.codon', 'python'],
['.edn', 'clojure'],
['.tpl', 'html'],
['.rs', 'rust'],
['.bas', 'vb'],
['.wxml', 'html'],
['.nvue', 'vue'],
['.jenkinsfile', 'groovy'],
['.twig', 'html'],
['.inc.php', 'php'],
['.mm', 'objective-cpp'],
['.module', 'php'],
['.install', 'php'],
['.theme', 'php'],
['.rc', 'cpp'],
['.idl', 'cpp'],
['.pubxml', 'xml'],
['.njk', 'html'],
['.fish', 'shellscript'],
['.vbs', 'vb'],
['.sage', 'python'],
['.mdx', 'markdown'],
['.somethingelse', 'clientProvidedLanguageId'],
];
toDetectByExtension.forEach(([extension, languageId]) => {
test(`detect ${languageId} by file extension ${extension}`, function () {
assertLanguageId(`file:///test${extension}`, languageId);
});
});
const toDetectByFilename: [string, string][] = [
['.bash_history', 'shellscript'],
['.bashrc', 'shellscript'],
['.zshrc', 'shellscript'],
['.irbrc', 'ruby'],
['Gemfile', 'ruby'],
['riemann.config', 'clojure'],
['Dockerfile', 'dockerfile'],
['Dockerfile.local', 'dockerfile'],
['.env.production', 'dotenv'],
['.env.development.local', 'dotenv'],
['Jenkinsfile', 'groovy'],
['Makefile', 'makefile'],
['.classpath', 'xml'],
['.gemrc', 'yaml'],
['tsconfig.json', 'jsonc'],
['.eslintrc.json', 'jsonc'],
['settings.json', 'jsonc'],
['tasks.json', 'jsonc'],
['keybindings.json', 'jsonc'],
['extensions.json', 'jsonc'],
['argv.json', 'jsonc'],
['profiles.json', 'jsonc'],
['devcontainer.json', 'jsonc'],
['.devcontainer.json', 'jsonc'],
];
toDetectByFilename.forEach(([filename, languageId]) => {
test(`detect ${languageId} by filename ${filename}`, function () {
assertLanguageId(`file:///${filename}`, languageId);
});
});
const urls: [string, string][] = [
['file:///some/path/test.ts', 'typescript'],
['untitled:///some/path/test', 'clientProvidedLanguageId'],
['file:////server-name/shared-resource-pathname/test.sh', 'shellscript'],
];
urls.forEach(([url, languageId]) => {
test(`detect ${languageId} by url ${url}`, function () {
assertLanguageId(url, languageId);
});
});
const extensionsToDetect: [string, string][] = [
['', ''],
['.ts', '.ts'],
['a.longer.path.ts', '.ts'],
['.sh', '.sh'],
['.html.erb', '.html.erb'],
['.html.slim', '.html.slim'],
['.unknown.erb', '.erb'],
['.yaml.njk', '.yaml.njk'],
['.unknown', '.unknown'],
];
extensionsToDetect.forEach(([filename, extension]) => {
test(`detect extension ${extension} by filename test${filename}`, function () {
assertExtension(`file:///test${filename}`, extension);
});
});
test(`has no extension for filename without extension`, function () {
assertExtension(`file:///.secretproduct`, '');
});
function assertExtension(uri: string, expectedExtension: string) {
const doc = createTextDocument(uri, 'clientProvidedLanguageId', 1, 'test content');
const language = languageDetection.detectLanguage(doc);
assert.deepStrictEqual(language.fileExtension, expectedExtension);
}
function assertLanguageId(uri: string, expectedLanguageId: string) {
const doc = createTextDocument(uri, 'clientProvidedLanguageId', 1, 'test content');
const language = languageDetection.detectLanguage(doc);
assert.deepStrictEqual(language.languageId, expectedLanguageId);
}
test('detected languages for ambiguous options will be re-detected', function () {
assert.deepStrictEqual(detect('testfile.c', languageDetection).languageId, 'cpp');
assert.deepStrictEqual(detect('testfile.h', languageDetection).languageId, 'cpp');
assert.deepStrictEqual(detect('testfile.cpp', languageDetection).languageId, 'cpp');
assert.deepStrictEqual(detect('testfile.h', languageDetection).languageId, 'cpp');
});
function detect(filename: string, languageDetection: LanguageDetection): Language {
return languageDetection.detectLanguage(
createTextDocument(`file:///${filename}`, 'clientProvidedLanguageId', 1, 'test content')
);
}
});