lexer.spec.js 6.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170
  1. const lex = require('../../src/services/search/services/lex');
  2. describe("Lexer fulltext", () => {
  3. it("simple lexing", () => {
  4. expect(lex("hello world").fulltextTokens.map(t => t.token))
  5. .toEqual(["hello", "world"]);
  6. expect(lex("hello, world").fulltextTokens.map(t => t.token))
  7. .toEqual(["hello", "world"]);
  8. });
  9. it("use quotes to keep words together", () => {
  10. expect(lex("'hello world' my friend").fulltextTokens.map(t => t.token))
  11. .toEqual(["hello world", "my", "friend"]);
  12. expect(lex('"hello world" my friend').fulltextTokens.map(t => t.token))
  13. .toEqual(["hello world", "my", "friend"]);
  14. expect(lex('`hello world` my friend').fulltextTokens.map(t => t.token))
  15. .toEqual(["hello world", "my", "friend"]);
  16. });
  17. it("you can use different quotes and other special characters inside quotes", () => {
  18. expect(lex("'i can use \" or ` or #~=*' without problem").fulltextTokens.map(t => t.token))
  19. .toEqual(["i can use \" or ` or #~=*", "without", "problem"]);
  20. });
  21. it("I can use backslash to escape quotes", () => {
  22. expect(lex("hello \\\"world\\\"").fulltextTokens.map(t => t.token))
  23. .toEqual(["hello", '"world"']);
  24. expect(lex("hello \\\'world\\\'").fulltextTokens.map(t => t.token))
  25. .toEqual(["hello", "'world'"]);
  26. expect(lex("hello \\\`world\\\`").fulltextTokens.map(t => t.token))
  27. .toEqual(["hello", '`world`']);
  28. expect(lex('"hello \\\"world\\\"').fulltextTokens.map(t => t.token))
  29. .toEqual(['hello "world"']);
  30. expect(lex("'hello \\\'world\\\''").fulltextTokens.map(t => t.token))
  31. .toEqual(["hello 'world'"]);
  32. expect(lex("`hello \\\`world\\\``").fulltextTokens.map(t => t.token))
  33. .toEqual(["hello `world`"]);
  34. expect(lex("\\#token").fulltextTokens.map(t => t.token))
  35. .toEqual(["#token"]);
  36. });
  37. it("quote inside a word does not have a special meaning", () => {
  38. const lexResult = lex("d'Artagnan is dead #hero = d'Artagnan");
  39. expect(lexResult.fulltextTokens.map(t => t.token))
  40. .toEqual(["d'artagnan", "is", "dead"]);
  41. expect(lexResult.expressionTokens.map(t => t.token))
  42. .toEqual(['#hero', '=', "d'artagnan"]);
  43. });
  44. it("if quote is not ended then it's just one long token", () => {
  45. expect(lex("'unfinished quote").fulltextTokens.map(t => t.token))
  46. .toEqual(["unfinished quote"]);
  47. });
  48. it("parenthesis and symbols in fulltext section are just normal characters", () => {
  49. expect(lex("what's u=p <b(r*t)h>").fulltextTokens.map(t => t.token))
  50. .toEqual(["what's", "u=p", "<b(r*t)h>"]);
  51. });
  52. it("operator characters in expressions are separate tokens", () => {
  53. expect(lex("# abc+=-def**-+d").expressionTokens.map(t => t.token))
  54. .toEqual(["#", "abc", "+=-", "def", "**-+", "d"]);
  55. });
  56. it("escaping special characters", () => {
  57. expect(lex("hello \\#\\~\\'").fulltextTokens.map(t => t.token))
  58. .toEqual(["hello", "#~'"]);
  59. });
  60. });
  61. describe("Lexer expression", () => {
  62. it("simple attribute existence", () => {
  63. expect(lex("#label ~relation").expressionTokens.map(t => t.token))
  64. .toEqual(["#label", "~relation"]);
  65. });
  66. it("simple label operators", () => {
  67. expect(lex("#label*=*text").expressionTokens.map(t => t.token))
  68. .toEqual(["#label", "*=*", "text"]);
  69. });
  70. it("simple label operator with in quotes", () => {
  71. expect(lex("#label*=*'text'").expressionTokens)
  72. .toEqual([
  73. {token: "#label", inQuotes: false, startIndex: 0, endIndex: 5},
  74. {token: "*=*", inQuotes: false, startIndex: 6, endIndex: 8},
  75. {token: "text", inQuotes: true, startIndex: 10, endIndex: 13}
  76. ]);
  77. });
  78. it("simple label operator with param without quotes", () => {
  79. expect(lex("#label*=*text").expressionTokens)
  80. .toEqual([
  81. {token: "#label", inQuotes: false, startIndex: 0, endIndex: 5},
  82. {token: "*=*", inQuotes: false, startIndex: 6, endIndex: 8},
  83. {token: "text", inQuotes: false, startIndex: 9, endIndex: 12}
  84. ]);
  85. });
  86. it("simple label operator with empty string param", () => {
  87. expect(lex("#label = ''").expressionTokens)
  88. .toEqual([
  89. {token: "#label", inQuotes: false, startIndex: 0, endIndex: 5},
  90. {token: "=", inQuotes: false, startIndex: 7, endIndex: 7},
  91. // weird case for empty strings which ends up with endIndex < startIndex :-(
  92. {token: "", inQuotes: true, startIndex: 10, endIndex: 9}
  93. ]);
  94. });
  95. it("note. prefix also separates fulltext from expression", () => {
  96. expect(lex(`hello fulltext note.labels.capital = Prague`).expressionTokens.map(t => t.token))
  97. .toEqual(["note", ".", "labels", ".", "capital", "=", "prague"]);
  98. });
  99. it("note. prefix in quotes will note start expression", () => {
  100. expect(lex(`hello fulltext "note.txt"`).expressionTokens.map(t => t.token))
  101. .toEqual([]);
  102. expect(lex(`hello fulltext "note.txt"`).fulltextTokens.map(t => t.token))
  103. .toEqual(["hello", "fulltext", "note.txt"]);
  104. });
  105. it("complex expressions with and, or and parenthesis", () => {
  106. expect(lex(`# (#label=text OR #second=text) AND ~relation`).expressionTokens.map(t => t.token))
  107. .toEqual(["#", "(", "#label", "=", "text", "or", "#second", "=", "text", ")", "and", "~relation"]);
  108. });
  109. it("dot separated properties", () => {
  110. expect(lex(`# ~author.title = 'Hugh Howey' AND note.'book title' = 'Silo'`).expressionTokens.map(t => t.token))
  111. .toEqual(["#", "~author", ".", "title", "=", "hugh howey", "and", "note", ".", "book title", "=", "silo"]);
  112. });
  113. it("negation of label and relation", () => {
  114. expect(lex(`#!capital ~!neighbor`).expressionTokens.map(t => t.token))
  115. .toEqual(["#!capital", "~!neighbor"]);
  116. });
  117. it("negation of sub-expression", () => {
  118. expect(lex(`# not(#capital) and note.noteId != "root"`).expressionTokens.map(t => t.token))
  119. .toEqual(["#", "not", "(", "#capital", ")", "and", "note", ".", "noteid", "!=", "root"]);
  120. });
  121. it("order by multiple labels", () => {
  122. expect(lex(`# orderby #a,#b`).expressionTokens.map(t => t.token))
  123. .toEqual(["#", "orderby", "#a", ",", "#b"]);
  124. });
  125. });
  126. describe("Lexer invalid queries and edge cases", () => {
  127. it("concatenated attributes", () => {
  128. expect(lex("#label~relation").expressionTokens.map(t => t.token))
  129. .toEqual(["#label", "~relation"]);
  130. });
  131. it("trailing escape \\", () => {
  132. expect(lex('abc \\').fulltextTokens.map(t => t.token))
  133. .toEqual(["abc", "\\"]);
  134. });
  135. });