grammar-registry.js 29 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832
  1. const _ = require('underscore-plus');
  2. const Grim = require('grim');
  3. const CSON = require('season');
  4. const SecondMate = require('second-mate');
  5. const { Disposable, CompositeDisposable, Emitter } = require('event-kit');
  6. const TextMateLanguageMode = require('./text-mate-language-mode');
  7. const NodeTreeSitterLanguageMode = require('./tree-sitter-language-mode');
  8. const WASMTreeSitterLanguageMode = require('./wasm-tree-sitter-language-mode');
  9. const TreeSitterGrammar = require('./tree-sitter-grammar');
  10. const WASMTreeSitterGrammar = require('./wasm-tree-sitter-grammar');
  11. const ScopeDescriptor = require('./scope-descriptor');
  12. const Token = require('./token');
  13. const fs = require('fs-plus');
  14. const { Point, Range } = require('text-buffer');
  15. const PATH_SPLIT_REGEX = new RegExp('[/.]');
  16. // Extended: This class holds the grammars used for tokenizing.
  17. //
  18. // An instance of this class is always available as the `atom.grammars` global.
  19. module.exports = class GrammarRegistry {
  20. constructor({ config } = {}) {
  21. this.config = config;
  22. this.subscriptions = new CompositeDisposable();
  23. this.textmateRegistry = new SecondMate.GrammarRegistry({
  24. maxTokensPerLine: 100,
  25. maxLineLength: 1000
  26. });
  27. this.emitter = new Emitter();
  28. this.clear();
  29. }
  30. clear() {
  31. this.textmateRegistry.clear();
  32. this.wasmTreeSitterGrammarsById = {};
  33. this.treeSitterGrammarsById = {};
  34. if (this.subscriptions) this.subscriptions.dispose();
  35. this.subscriptions = new CompositeDisposable();
  36. this.languageOverridesByBufferId = new Map();
  37. this.grammarScoresByBuffer = new Map();
  38. this.textMateScopeNamesByTreeSitterLanguageId = new Map();
  39. this.treeSitterLanguageIdsByTextMateScopeName = new Map();
  40. const grammarAddedOrUpdated = this.grammarAddedOrUpdated.bind(this);
  41. this.textmateRegistry.onDidAddGrammar(grammarAddedOrUpdated);
  42. this.textmateRegistry.onDidUpdateGrammar(grammarAddedOrUpdated);
  43. let onLanguageModeChange = () => {
  44. this.grammarScoresByBuffer.forEach((_score, buffer) => {
  45. if (!this.languageOverridesByBufferId.has(buffer.id)) {
  46. this.autoAssignLanguageMode(buffer);
  47. }
  48. });
  49. };
  50. this.subscriptions.add(
  51. this.config.onDidChange('core.useTreeSitterParsers', onLanguageModeChange),
  52. this.config.onDidChange('core.useLegacyTreeSitter', onLanguageModeChange)
  53. );
  54. }
  55. serialize() {
  56. const languageOverridesByBufferId = {};
  57. this.languageOverridesByBufferId.forEach((languageId, bufferId) => {
  58. languageOverridesByBufferId[bufferId] = languageId;
  59. });
  60. return { languageOverridesByBufferId };
  61. }
  62. deserialize(params) {
  63. for (const bufferId in params.languageOverridesByBufferId || {}) {
  64. this.languageOverridesByBufferId.set(
  65. bufferId,
  66. params.languageOverridesByBufferId[bufferId]
  67. );
  68. }
  69. }
  70. createToken(value, scopes) {
  71. return new Token({ value, scopes });
  72. }
  73. // Extended: set a {TextBuffer}'s language mode based on its path and content,
  74. // and continue to update its language mode as grammars are added or updated, or
  75. // the buffer's file path changes.
  76. //
  77. // * `buffer` The {TextBuffer} whose language mode will be maintained.
  78. //
  79. // Returns a {Disposable} that can be used to stop updating the buffer's
  80. // language mode.
  81. maintainLanguageMode(buffer) {
  82. this.grammarScoresByBuffer.set(buffer, null);
  83. const languageOverride = this.languageOverridesByBufferId.get(buffer.id);
  84. if (languageOverride) {
  85. this.assignLanguageMode(buffer, languageOverride);
  86. } else {
  87. this.autoAssignLanguageMode(buffer);
  88. }
  89. const pathChangeSubscription = buffer.onDidChangePath(() => {
  90. this.grammarScoresByBuffer.delete(buffer);
  91. if (!this.languageOverridesByBufferId.has(buffer.id)) {
  92. this.autoAssignLanguageMode(buffer);
  93. }
  94. });
  95. const destroySubscription = buffer.onDidDestroy(() => {
  96. this.grammarScoresByBuffer.delete(buffer);
  97. this.languageOverridesByBufferId.delete(buffer.id);
  98. this.subscriptions.remove(destroySubscription);
  99. this.subscriptions.remove(pathChangeSubscription);
  100. });
  101. this.subscriptions.add(pathChangeSubscription, destroySubscription);
  102. return new Disposable(() => {
  103. destroySubscription.dispose();
  104. pathChangeSubscription.dispose();
  105. this.subscriptions.remove(pathChangeSubscription);
  106. this.subscriptions.remove(destroySubscription);
  107. this.grammarScoresByBuffer.delete(buffer);
  108. this.languageOverridesByBufferId.delete(buffer.id);
  109. });
  110. }
  111. // Extended: Force a {TextBuffer} to use a different grammar than the
  112. // one that would otherwise be selected for it.
  113. //
  114. // * `buffer` The {TextBuffer} whose grammar will be set.
  115. // * `languageId` The {String} id of the desired language.
  116. //
  117. // Returns a {Boolean} that indicates whether the language was successfully
  118. // found.
  119. assignLanguageMode(buffer, languageId) {
  120. if (buffer.getBuffer) buffer = buffer.getBuffer();
  121. let grammar = null;
  122. if (languageId != null) {
  123. grammar = this.grammarForId(languageId);
  124. if (!grammar || !grammar.scopeName) return false;
  125. this.languageOverridesByBufferId.set(buffer.id, languageId);
  126. } else {
  127. this.languageOverridesByBufferId.set(buffer.id, null);
  128. grammar = this.textmateRegistry.nullGrammar;
  129. }
  130. this.grammarScoresByBuffer.set(buffer, null);
  131. if (grammar !== buffer.getLanguageMode().grammar) {
  132. buffer.setLanguageMode(
  133. this.languageModeForGrammarAndBuffer(grammar, buffer)
  134. );
  135. }
  136. return true;
  137. }
  138. // Extended: Force a {TextBuffer} to use a different grammar than the
  139. // one that would otherwise be selected for it.
  140. //
  141. // * `buffer` The {TextBuffer} whose grammar will be set.
  142. // * `grammar` The desired {Grammar}.
  143. //
  144. // Returns a {Boolean} that indicates whether the assignment was successful
  145. assignGrammar(buffer, grammar) {
  146. if (!grammar) return false;
  147. if (buffer.getBuffer) buffer = buffer.getBuffer();
  148. this.languageOverridesByBufferId.set(buffer.id, grammar.scopeName || null);
  149. this.grammarScoresByBuffer.set(buffer, null);
  150. if (grammar !== buffer.getLanguageMode().grammar) {
  151. buffer.setLanguageMode(
  152. this.languageModeForGrammarAndBuffer(grammar, buffer)
  153. );
  154. }
  155. return true;
  156. }
  157. // Extended: Get the `languageId` that has been explicitly assigned to
  158. // the given buffer, if any.
  159. //
  160. // Returns a {String} id of the language
  161. getAssignedLanguageId(buffer) {
  162. return this.languageOverridesByBufferId.get(buffer.id);
  163. }
  164. // Extended: Remove any language mode override that has been set for the
  165. // given {TextBuffer}. This will assign to the buffer the best language
  166. // mode available.
  167. //
  168. // * `buffer` The {TextBuffer}.
  169. autoAssignLanguageMode(buffer) {
  170. const result = this.selectGrammarWithScore(
  171. buffer.getPath(),
  172. getGrammarSelectionContent(buffer)
  173. );
  174. this.languageOverridesByBufferId.delete(buffer.id);
  175. this.grammarScoresByBuffer.set(buffer, result.score);
  176. if (result.grammar !== buffer.getLanguageMode().grammar) {
  177. buffer.setLanguageMode(
  178. this.languageModeForGrammarAndBuffer(result.grammar, buffer)
  179. );
  180. }
  181. }
  182. languageModeForGrammarAndBuffer(grammar, buffer) {
  183. if (grammar instanceof WASMTreeSitterGrammar) {
  184. return new WASMTreeSitterLanguageMode({
  185. grammar,
  186. buffer,
  187. config: this.config,
  188. grammars: this
  189. });
  190. } else if (grammar instanceof TreeSitterGrammar) {
  191. return new NodeTreeSitterLanguageMode({
  192. grammar,
  193. buffer,
  194. config: this.config,
  195. grammars: this
  196. });
  197. } else {
  198. return new TextMateLanguageMode({ grammar, buffer, config: this.config });
  199. }
  200. }
  201. // Extended: Select a grammar for the given file path and file contents.
  202. //
  203. // This picks the best match by checking the file path and contents against
  204. // each grammar.
  205. //
  206. // * `filePath` A {String} file path.
  207. // * `fileContents` A {String} of text for the file path.
  208. //
  209. // Returns a {Grammar}, never null.
  210. selectGrammar(filePath, fileContents) {
  211. return this.selectGrammarWithScore(filePath, fileContents).grammar;
  212. }
  213. selectGrammarWithScore(filePath, fileContents) {
  214. let bestMatch = null;
  215. let highestScore = -Infinity;
  216. this.forEachGrammar(grammar => {
  217. const score = this.getGrammarScore(grammar, filePath, fileContents);
  218. if (score > highestScore || bestMatch == null) {
  219. bestMatch = grammar;
  220. highestScore = score;
  221. }
  222. });
  223. return { grammar: bestMatch, score: highestScore };
  224. }
  225. getLanguageParserForScope(scope) {
  226. if (typeof scope === 'string') {
  227. scope = new ScopeDescriptor({ scopes: [scope] })
  228. }
  229. let useTreeSitterParsers = this.config.get('core.useTreeSitterParsers', { scope });
  230. let useLegacyTreeSitter = this.config.get('core.useLegacyTreeSitter', { scope });
  231. if (!useTreeSitterParsers) return 'textmate';
  232. return useLegacyTreeSitter ? 'node-tree-sitter' : 'wasm-tree-sitter';
  233. }
  234. // Extended: Evaluates a grammar's fitness for use for a certain file.
  235. //
  236. // By analyzing the file's extension and contents — plus other criteria, like
  237. // the user's configuration — Pulsar will assign a score to this grammar that
  238. // represents how suitable it is for the given file.
  239. //
  240. // Ultimately, whichever grammar scores highest for this file will be used
  241. // to highlight it.
  242. //
  243. // * `grammar`: A given {Grammar}.
  244. // * `filePath`: A {String} path to the file.
  245. // * `contents`: The {String} contents of the file.
  246. //
  247. // Returns a {Number}.
  248. getGrammarScore(grammar, filePath, contents) {
  249. if (contents == null && fs.isFileSync(filePath)) {
  250. contents = fs.readFileSync(filePath, 'utf8');
  251. }
  252. // Initially identify matching grammars based on the filename and the first
  253. // line of the file.
  254. let score = this.getGrammarPathScore(grammar, filePath);
  255. if (this.grammarMatchesPrefix(grammar, contents)) score += 0.5;
  256. // If multiple grammars match by one of the above criteria, break ties.
  257. if (score > 0) {
  258. const isNewTreeSitter = grammar instanceof WASMTreeSitterGrammar;
  259. const isOldTreeSitter = grammar instanceof TreeSitterGrammar;
  260. const isTreeSitter = isNewTreeSitter || isOldTreeSitter;
  261. let scope = new ScopeDescriptor({ scopes: [grammar.scopeName] });
  262. let parserConfig = this.getLanguageParserForScope(scope);
  263. // Prefer either TextMate or Tree-sitter grammars based on the user's
  264. // settings.
  265. //
  266. // TODO: This logic is a bit convoluted temporarily as we transition away
  267. // from legacy tree-sitter grammars; it can be vastly simplified once the
  268. // transition is complete.
  269. if (isNewTreeSitter) {
  270. if (parserConfig === 'wasm-tree-sitter') {
  271. score += 0.1;
  272. } else if (parserConfig === 'textmate') {
  273. score = -1;
  274. }
  275. } else if (isOldTreeSitter) {
  276. if (parserConfig === 'node-tree-sitter') {
  277. score += 0.1;
  278. } else if (parserConfig === 'wasm-tree-sitter') {
  279. // If `useLegacyTreeSitter` isn't checked, we probably still prefer a
  280. // legacy Tree-sitter grammar over a TextMate-style grammar. Bump the
  281. // score, but just a bit less than we'd bump it if this were a
  282. // modern Tree-sitter grammar.
  283. score += 0.09;
  284. } else if (parserConfig === 'textmate') {
  285. score = -1;
  286. }
  287. }
  288. // Prefer grammars with matching content regexes. Prefer a grammar with
  289. // no content regex over one with a non-matching content regex.
  290. if (grammar.contentRegex) {
  291. const contentMatch = isTreeSitter
  292. ? grammar.contentRegex.test(contents)
  293. : grammar.contentRegex.findNextMatchSync(contents);
  294. if (contentMatch) {
  295. score += 0.05;
  296. } else {
  297. score -= 0.05;
  298. }
  299. }
  300. // Prefer grammars that the user has manually installed over bundled
  301. // grammars.
  302. if (!grammar.bundledPackage) score += 0.01;
  303. }
  304. return score;
  305. }
  306. getGrammarPathScore(grammar, filePath) {
  307. if (!filePath) return -1;
  308. if (process.platform === 'win32') {
  309. filePath = filePath.replace(/\\/g, '/');
  310. }
  311. const pathComponents = filePath.toLowerCase().split(PATH_SPLIT_REGEX);
  312. let pathScore = 0;
  313. let customFileTypes;
  314. if (this.config.get('core.customFileTypes')) {
  315. customFileTypes = this.config.get('core.customFileTypes')[
  316. grammar.scopeName
  317. ];
  318. }
  319. let { fileTypes } = grammar;
  320. if (customFileTypes) {
  321. fileTypes = fileTypes.concat(customFileTypes);
  322. }
  323. for (let i = 0; i < fileTypes.length; i++) {
  324. const fileType = fileTypes[i];
  325. const fileTypeComponents = fileType.toLowerCase().split(PATH_SPLIT_REGEX);
  326. const pathSuffix = pathComponents.slice(-fileTypeComponents.length);
  327. if (_.isEqual(pathSuffix, fileTypeComponents)) {
  328. pathScore = Math.max(pathScore, fileType.length);
  329. if (i >= grammar.fileTypes.length) {
  330. pathScore += 0.5;
  331. }
  332. }
  333. }
  334. return pathScore;
  335. }
  336. grammarMatchesPrefix(grammar, contents) {
  337. if (contents && grammar.firstLineRegex) {
  338. let escaped = false;
  339. let numberOfNewlinesInRegex = 0;
  340. for (let character of grammar.firstLineRegex.source) {
  341. switch (character) {
  342. case '\\':
  343. escaped = !escaped;
  344. break;
  345. case 'n':
  346. if (escaped) {
  347. numberOfNewlinesInRegex++;
  348. }
  349. escaped = false;
  350. break;
  351. default:
  352. escaped = false;
  353. }
  354. }
  355. const prefix = contents
  356. .split('\n')
  357. .slice(0, numberOfNewlinesInRegex + 1)
  358. .join('\n');
  359. if (grammar.firstLineRegex.findNextMatchSync) {
  360. return grammar.firstLineRegex.findNextMatchSync(prefix);
  361. } else {
  362. return grammar.firstLineRegex.test(prefix);
  363. }
  364. } else {
  365. return false;
  366. }
  367. }
  368. forEachGrammar(callback) {
  369. this.getGrammars({ includeTreeSitter: true }).forEach(callback);
  370. }
  371. grammarForId(languageId) {
  372. if (!languageId) return null;
  373. const config = this.getLanguageParserForScope(
  374. new ScopeDescriptor({ scopes: [languageId] })
  375. );
  376. let getTreeSitterGrammar = (table, languageId) => {
  377. let grammar = table[languageId];
  378. if (grammar?.scopeName) {
  379. return grammar;
  380. }
  381. return null;
  382. };
  383. if (config === 'wasm-tree-sitter') {
  384. return (
  385. getTreeSitterGrammar(
  386. this.wasmTreeSitterGrammarsById,
  387. languageId
  388. ) ||
  389. this.textmateRegistry.grammarForScopeName(languageId)
  390. );
  391. } else if (config === 'node-tree-sitter') {
  392. return (
  393. getTreeSitterGrammar(
  394. this.treeSitterGrammarsById,
  395. languageId
  396. ) ||
  397. this.textmateRegistry.grammarForScopeName(languageId)
  398. );
  399. } else {
  400. return (
  401. this.textmateRegistry.grammarForScopeName(languageId) ||
  402. this.wasmTreeSitterGrammarsById[languageId] ||
  403. this.treeSitterGrammarsById[languageId]
  404. );
  405. }
  406. }
  407. // Deprecated: Get the grammar override for the given file path.
  408. //
  409. // * `filePath` A {String} file path.
  410. //
  411. // Returns a {String} such as `"source.js"`.
  412. grammarOverrideForPath(filePath) {
  413. Grim.deprecate('Use buffer.getLanguageMode().getLanguageId() instead');
  414. const buffer = atom.project.findBufferForPath(filePath);
  415. if (buffer) return this.getAssignedLanguageId(buffer);
  416. }
  417. // Deprecated: Set the grammar override for the given file path.
  418. //
  419. // * `filePath` A non-empty {String} file path.
  420. // * `languageId` A {String} such as `"source.js"`.
  421. //
  422. // Returns undefined.
  423. setGrammarOverrideForPath(filePath, languageId) {
  424. Grim.deprecate(
  425. 'Use atom.grammars.assignLanguageMode(buffer, languageId) instead'
  426. );
  427. const buffer = atom.project.findBufferForPath(filePath);
  428. if (buffer) {
  429. const grammar = this.grammarForScopeName(languageId);
  430. if (grammar)
  431. this.languageOverridesByBufferId.set(buffer.id, grammar.name);
  432. }
  433. }
  434. // Remove the grammar override for the given file path.
  435. //
  436. // * `filePath` A {String} file path.
  437. //
  438. // Returns undefined.
  439. clearGrammarOverrideForPath(filePath) {
  440. Grim.deprecate('Use atom.grammars.autoAssignLanguageMode(buffer) instead');
  441. const buffer = atom.project.findBufferForPath(filePath);
  442. if (buffer) this.languageOverridesByBufferId.delete(buffer.id);
  443. }
  444. grammarAddedOrUpdated(grammar) {
  445. if (grammar.scopeName && !grammar.id) grammar.id = grammar.scopeName;
  446. this.grammarScoresByBuffer.forEach((score, buffer) => {
  447. const languageMode = buffer.getLanguageMode();
  448. const languageOverride = this.languageOverridesByBufferId.get(buffer.id);
  449. if (
  450. grammar === buffer.getLanguageMode().grammar ||
  451. grammar === this.grammarForId(languageOverride)
  452. ) {
  453. buffer.setLanguageMode(
  454. this.languageModeForGrammarAndBuffer(grammar, buffer)
  455. );
  456. return;
  457. } else if (!languageOverride) {
  458. const score = this.getGrammarScore(
  459. grammar,
  460. buffer.getPath(),
  461. getGrammarSelectionContent(buffer)
  462. );
  463. const currentScore = this.grammarScoresByBuffer.get(buffer);
  464. if (currentScore == null || score > currentScore) {
  465. buffer.setLanguageMode(
  466. this.languageModeForGrammarAndBuffer(grammar, buffer)
  467. );
  468. this.grammarScoresByBuffer.set(buffer, score);
  469. return;
  470. }
  471. }
  472. languageMode.updateForInjection(grammar);
  473. });
  474. }
  475. // Extended: Invoke the given callback when a grammar is added to the registry.
  476. //
  477. // * `callback` {Function} to call when a grammar is added.
  478. // * `grammar` {Grammar} that was added.
  479. //
  480. // Returns a {Disposable} on which `.dispose()` can be called to unsubscribe.
  481. onDidAddGrammar(callback) {
  482. let disposable = new CompositeDisposable();
  483. disposable.add(
  484. this.textmateRegistry.onDidAddGrammar(callback),
  485. this.emitter.on('did-add-grammar', callback)
  486. );
  487. return disposable;
  488. }
  489. // Extended: Invoke the given callback when a grammar is updated due to a grammar
  490. // it depends on being added or removed from the registry.
  491. //
  492. // * `callback` {Function} to call when a grammar is updated.
  493. // * `grammar` {Grammar} that was updated.
  494. //
  495. // Returns a {Disposable} on which `.dispose()` can be called to unsubscribe.
  496. onDidUpdateGrammar(callback) {
  497. let disposable = new CompositeDisposable();
  498. disposable.add(
  499. this.textmateRegistry.onDidUpdateGrammar(callback),
  500. this.emitter.on('did-update-grammar', callback)
  501. );
  502. return disposable;
  503. }
  504. // Public: Specify a type of syntax node that may embed other languages.
  505. //
  506. // * `grammarId` The {String} id of the parent language
  507. // * `injectionPoint` An {Object} with the following keys:
  508. // * `type` The {String} type of syntax node that may embed other languages
  509. // * `language` A {Function} that is called with syntax nodes of the specified `type` and
  510. // returns a {String} that will be tested against other grammars' `injectionRegex` in
  511. // order to determine what language should be embedded.
  512. // * `content` A {Function} that is called with syntax nodes of the specified `type` and
  513. // returns another syntax node or array of syntax nodes that contain the embedded source
  514. // code. Depending on the settings below, the content node(s) will be converted into a
  515. // series of buffer ranges; when this injection is parsed, anything not inside those
  516. // ranges will be invisible to the parser.
  517. // * `includeChildren` A {Boolean} that indicates whether the children (and, in fact, all
  518. // descendants) of the nodes returned by `content` should be included in the injection's
  519. // buffer range(s). Defaults to `false`.
  520. // * `newlinesBetween` A {Boolean} that indicates whether each node returned from `content`
  521. // should be separated by at least one newline character so that the parser understands
  522. // them to be logically separated. Embedded languages like ERB and EJS need this. Defaults
  523. // to {false}.
  524. // * `languageScope` A {String} or {Function} that returns the desired scope name to apply
  525. // to each of the injection's buffer ranges. Defaults to the injected grammar's own language
  526. // scope — e.g., `source.js` for the JavaScript grammar. Set to `null` if the language scope
  527. // should be omitted. If a {Function}, will be called with the grammar instance as an
  528. // argument, and should return either a {String} or `null`.
  529. // * `coverShallowerScopes` A {Boolean} that indicates whether this injection should prevent
  530. // shallower layers (including the layer that created this injection) from adding scopes
  531. // within any of this injection's buffer ranges. Useful for injecting languages into
  532. // themselves — for instance, injecting Rust into Rust macro definitions.
  533. // * `includeAdjacentWhitespace` A {Boolean} that indicates whether the injection's buffer
  534. // range(s) should include whitespace that occurs between two adjacent ranges. Defaults to
  535. // `false`. When `true`, if two consecutive injection buffer ranges are separated _only_ by
  536. // whitespace, those ranges will be consolidated into one range along with that whitespace.
  537. //
  538. addInjectionPoint(grammarId, injectionPoint, { only = null } = {}) {
  539. let grammarsToDispose = [];
  540. const addOrCreateInjectionPoint = (table, grammarId) => {
  541. let grammar = table[grammarId];
  542. if (grammar) {
  543. if (grammar.addInjectionPoint) {
  544. grammar.addInjectionPoint(injectionPoint);
  545. // This is a grammar that's already loaded — not just a stub. Editors
  546. // that already use this grammar will want to know that we added an
  547. // injection.
  548. this.emitter.emit('did-update-grammar', grammar);
  549. } else {
  550. grammar.injectionPoints.push(injectionPoint);
  551. }
  552. grammarsToDispose.push(grammar);
  553. } else {
  554. table[grammarId] = { injectionPoints: [injectionPoint] }
  555. }
  556. };
  557. // TEMP: By default, an injection point will be added for both kinds of
  558. // tree-sitter grammars, but the optional keyword argument `only` lets us
  559. // target one or the other. We'll only need this option until we transition
  560. // away from legacy tree-sitter.
  561. if (!only || only === 'legacy') {
  562. addOrCreateInjectionPoint(this.treeSitterGrammarsById, grammarId);
  563. }
  564. if (!only || only === 'modern') {
  565. addOrCreateInjectionPoint(this.wasmTreeSitterGrammarsById, grammarId);
  566. }
  567. return new Disposable(() => {
  568. for (let grammar of grammarsToDispose) {
  569. grammar.removeInjectionPoint(injectionPoint);
  570. }
  571. });
  572. }
  573. get nullGrammar() {
  574. return this.textmateRegistry.nullGrammar;
  575. }
  576. get grammars() {
  577. return this.getGrammars();
  578. }
  579. decodeTokens() {
  580. return this.textmateRegistry.decodeTokens.apply(
  581. this.textmateRegistry,
  582. arguments
  583. );
  584. }
  585. grammarForScopeName(scopeName) {
  586. return this.grammarForId(scopeName);
  587. }
  588. addGrammar(grammar) {
  589. if (grammar instanceof WASMTreeSitterGrammar) {
  590. const existingParams =
  591. this.wasmTreeSitterGrammarsById[grammar.scopeName] || {};
  592. if (grammar.scopeName)
  593. this.wasmTreeSitterGrammarsById[grammar.scopeName] = grammar;
  594. if (existingParams.injectionPoints) {
  595. for (const injectionPoint of existingParams.injectionPoints) {
  596. grammar.addInjectionPoint(injectionPoint);
  597. }
  598. }
  599. this.grammarAddedOrUpdated(grammar);
  600. this.emitter.emit('did-add-grammar', grammar);
  601. return new Disposable(() => this.removeGrammar(grammar));
  602. } else if (grammar instanceof TreeSitterGrammar) {
  603. const existingParams =
  604. this.treeSitterGrammarsById[grammar.scopeName] || {};
  605. if (grammar.scopeName)
  606. this.treeSitterGrammarsById[grammar.scopeName] = grammar;
  607. if (existingParams.injectionPoints) {
  608. for (const injectionPoint of existingParams.injectionPoints) {
  609. grammar.addInjectionPoint(injectionPoint);
  610. }
  611. }
  612. this.grammarAddedOrUpdated(grammar);
  613. this.emitter.emit('did-add-grammar', grammar);
  614. return new Disposable(() => this.removeGrammar(grammar));
  615. } else {
  616. return this.textmateRegistry.addGrammar(grammar);
  617. }
  618. }
  619. removeGrammar(grammar) {
  620. if (grammar instanceof WASMTreeSitterGrammar) {
  621. delete this.wasmTreeSitterGrammarsById[grammar.scopeName];
  622. } else if (grammar instanceof TreeSitterGrammar) {
  623. delete this.treeSitterGrammarsById[grammar.scopeName];
  624. } else {
  625. return this.textmateRegistry.removeGrammar(grammar);
  626. }
  627. }
  628. removeGrammarForScopeName(scopeName) {
  629. return this.textmateRegistry.removeGrammarForScopeName(scopeName);
  630. }
  631. // Extended: Read a grammar asynchronously and add it to the registry.
  632. //
  633. // * `grammarPath` A {String} absolute file path to a grammar file.
  634. // * `callback` A {Function} to call when loaded with the following arguments:
  635. // * `error` An {Error}, may be null.
  636. // * `grammar` A {Grammar} or null if an error occurred.
  637. loadGrammar(grammarPath, callback) {
  638. this.readGrammar(grammarPath, (error, grammar) => {
  639. if (error) return callback(error);
  640. this.addGrammar(grammar);
  641. callback(null, grammar);
  642. });
  643. }
  644. // Extended: Read a grammar synchronously and add it to this registry.
  645. //
  646. // * `grammarPath` A {String} absolute file path to a grammar file.
  647. //
  648. // Returns a {Grammar}.
  649. loadGrammarSync(grammarPath) {
  650. const grammar = this.readGrammarSync(grammarPath);
  651. this.addGrammar(grammar);
  652. return grammar;
  653. }
  654. // Extended: Read a grammar asynchronously but don't add it to the registry.
  655. //
  656. // * `grammarPath` A {String} absolute file path to a grammar file.
  657. // * `callback` A {Function} to call when read with the following arguments:
  658. // * `error` An {Error}, may be null.
  659. // * `grammar` A {Grammar} or null if an error occurred.
  660. //
  661. // Returns undefined.
  662. readGrammar(grammarPath, callback) {
  663. if (!callback) callback = () => {};
  664. CSON.readFile(grammarPath, (error, params = {}) => {
  665. if (error) return callback(error);
  666. try {
  667. callback(null, this.createGrammar(grammarPath, params));
  668. } catch (error) {
  669. callback(error);
  670. }
  671. });
  672. }
  673. // Extended: Read a grammar synchronously but don't add it to the registry.
  674. //
  675. // * `grammarPath` A {String} absolute file path to a grammar file.
  676. //
  677. // Returns a {Grammar}.
  678. readGrammarSync(grammarPath) {
  679. return this.createGrammar(
  680. grammarPath,
  681. CSON.readFileSync(grammarPath) || {}
  682. );
  683. }
  684. createGrammar(grammarPath, params) {
  685. if (params.type === 'modern-tree-sitter') {
  686. return new WASMTreeSitterGrammar(this, grammarPath, params)
  687. } else if (params.type === 'tree-sitter') {
  688. return new TreeSitterGrammar(this, grammarPath, params);
  689. } else {
  690. if (
  691. typeof params.scopeName !== 'string' ||
  692. params.scopeName.length === 0
  693. ) {
  694. throw new Error(
  695. `Grammar missing required scopeName property: ${grammarPath}`
  696. );
  697. }
  698. return this.textmateRegistry.createGrammar(grammarPath, params);
  699. }
  700. }
  701. // Extended: Get all the grammars in this registry.
  702. //
  703. // * `options` (optional) {Object}
  704. // * `includeTreeSitter` (optional) {Boolean} Set to include
  705. // [Tree-sitter](https://github.blog/2018-10-31-atoms-new-parsing-system/) grammars
  706. //
  707. // Returns a non-empty {Array} of {Grammar} instances.
  708. getGrammars(params) {
  709. let result = this.textmateRegistry.getGrammars();
  710. if (!(params && params.includeTreeSitter)) return result;
  711. let modernTsGrammars = Object.values(this.wasmTreeSitterGrammarsById)
  712. .filter(g => g.scopeName);
  713. result = result.concat(modernTsGrammars);
  714. // We must include all legacy Tree-sitter grammars here just in case the
  715. // user has opted into `useTreeSitterGrammars` via a scope-specific
  716. // setting.
  717. const legacyTsGrammars = Object.values(this.treeSitterGrammarsById)
  718. .filter(g => g.scopeName);
  719. result = result.concat(legacyTsGrammars);
  720. return result;
  721. }
  722. scopeForId(id) {
  723. return this.textmateRegistry.scopeForId(id);
  724. }
  725. // Match up a language string (of the sort generated by an injection point)
  726. // with a grammar. Checks the `injectionRegex` property on grammars and
  727. // returns the one with the longest match.
  728. treeSitterGrammarForLanguageString(languageString, type = 'wasm') {
  729. let longestMatchLength = 0;
  730. let grammarWithLongestMatch = null;
  731. let table = type === 'original' ? this.treeSitterGrammarsById : this.wasmTreeSitterGrammarsById;
  732. for (const id in table) {
  733. const grammar = table[id];
  734. if (grammar.injectionRegex) {
  735. const match = languageString.match(grammar.injectionRegex);
  736. if (match) {
  737. const { length } = match[0];
  738. if (length > longestMatchLength) {
  739. grammarWithLongestMatch = grammar;
  740. longestMatchLength = length;
  741. }
  742. }
  743. }
  744. }
  745. return grammarWithLongestMatch;
  746. }
  747. };
  748. function getGrammarSelectionContent(buffer) {
  749. return buffer.getTextInRange(
  750. Range(Point(0, 0), buffer.positionForCharacterIndex(1024))
  751. );
  752. }