From c27bacf275afb059ca21c2a943886962d115efff Mon Sep 17 00:00:00 2001 From: Matt Gibson Date: Mon, 17 Mar 2025 17:04:10 -0700 Subject: [PATCH] start testing grammar TODO: I'm sure there are issues with the position markers, but those are best dealt with when we actually use them. --- libs/common/src/vault/search/ast.ts | 2 +- .../search/bitwarde-query-grammar.spec.ts | 381 ++++++++++++++++++ .../vault/search/bitwarden-query-grammar.ne | 48 +-- .../vault/search/bitwarden-query-grammar.ts | 45 +-- 4 files changed, 425 insertions(+), 51 deletions(-) create mode 100644 libs/common/src/vault/search/bitwarde-query-grammar.spec.ts diff --git a/libs/common/src/vault/search/ast.ts b/libs/common/src/vault/search/ast.ts index 8346a5abc81..8f31940ad80 100644 --- a/libs/common/src/vault/search/ast.ts +++ b/libs/common/src/vault/search/ast.ts @@ -45,7 +45,7 @@ export type AstNode = | WebsiteMatchFilter | OrderBy; -type AstNodeBase = { +export type AstNodeBase = { d: object[]; type: AstNodeType; start: number; diff --git a/libs/common/src/vault/search/bitwarde-query-grammar.spec.ts b/libs/common/src/vault/search/bitwarde-query-grammar.spec.ts new file mode 100644 index 00000000000..717a9521fb3 --- /dev/null +++ b/libs/common/src/vault/search/bitwarde-query-grammar.spec.ts @@ -0,0 +1,381 @@ +import { Grammar, Parser } from "nearley"; + +import { AstNodeBase } from "./ast"; +import compiledGrammar from "./bitwarden-query-grammar"; + +const This = { + query: "this", + expectedResults: [ + { + contents: { + type: "term", + value: "this", + }, + type: "search", + }, + ], +}; + +const ThisOrThat = { + query: "this OR that", + expectedResults: [ + { + contents: { + left: { + type: "term", + value: "this", + }, + right: { + type: "term", + value: "that", + }, + type: "or", + }, + type: "search", + }, + ], +}; + +const QuotedReservedWord = { + query: '"quoted reserved word NOT"', + expectedResults: [ + { + contents: { + type: "term", + value: '"quoted reserved word NOT"', + }, + type: "search", + }, + ], +}; + +const CaseSensitiveReservedWord = { + query: "this or that", + expectedResults: [ + { + contents: { + type: "and", + left: { + type: "and", + left: { + type: "term", + value: "this", + }, + right: { + type: "term", + value: "or", + }, + }, + right: { + type: "term", + value: "that", + }, + }, + type: "search", + }, + ], +}; + +const OrOperator = { + query: "this OR that", + expectedResults: [ + { + contents: { + type: "or", + left: { + type: "term", + value: "this", + }, + right: { + type: "term", + value: "that", + }, + }, + type: "search", + }, + ], +}; + +const Parenthesis = { + query: "some (times you) need", + expectedResults: [ + { + contents: { + type: "and", + left: { + type: "and", + left: { + type: "term", + value: "some", + }, + right: { + type: "parentheses", + inner: { + type: "and", + left: { + type: "term", + value: "times", + }, + right: { + type: "term", + value: "you", + }, + }, + }, + }, + right: { + type: "term", + value: "need", + }, + }, + type: "search", + }, + ], +}; + +const QuotedFieldTerm = { + query: '"custom field":fizz', + expectedResults: [ + { + contents: { + type: "field term", + field: '"custom field"', + term: "fizz", + }, + type: "search", + }, + ], +}; + +const HasAttachment = { + query: "has:attachment", + expectedResults: [ + { + contents: { + type: "hasAttachment", + }, + type: "search", + }, + ], +}; + +const HasUri = { + query: "has:uri", + expectedResults: [ + { + contents: { + type: "hasUri", + }, + type: "search", + }, + ], +}; + +const HasFolder = { + query: "has:folder", + expectedResults: [ + { + contents: { + type: "hasFolder", + }, + type: "search", + }, + ], +}; + +const HasCollection = { + query: "has:collection", + expectedResults: [ + { + contents: { + type: "hasCollection", + }, + type: "search", + }, + ], +}; + +const InFolder = { + query: "in:folder:fizz", + expectedResults: [ + { + contents: { + type: "inFolder", + folder: "fizz", + }, + type: "search", + }, + ], +}; + +const InCollection = { + query: "in:collection:fizz", + expectedResults: [ + { + contents: { + type: "inCollection", + collection: "fizz", + }, + type: "search", + }, + ], +}; + +const NotLogic = { + query: "this AND NOT that", + expectedResults: [ + { + contents: { + type: "and", + left: { + type: "term", + value: "this", + }, + right: { + type: "not", + value: { + type: "term", + value: "that", + }, + }, + }, + type: "search", + }, + ], +}; + +const AdvancedBoolean = { + query: "this OR that AND that", + expectedResults: [ + { + contents: { + type: "or", + left: { + type: "term", + value: "this", + }, + right: { + type: "and", + left: { + type: "term", + value: "that", + }, + right: { + type: "term", + value: "that", + }, + }, + }, + type: "search", + }, + ], +}; + +const FunctionNot = { + query: "this NOT(that)", + expectedResults: [ + { + type: "search", + contents: { + type: "and", + left: { + type: "term", + value: "this", + }, + right: { + type: "not", + value: { + type: "parentheses", + inner: { + type: "term", + value: "that", + }, + }, + }, + }, + }, + ], +}; + +const PartialQuoteBegin = { + query: '"this', + expectedResults: [ + { + contents: { + type: "term", + value: '"this', + }, + type: "search", + }, + ], +}; + +const PartialQuoteEnd = { + query: 'this"', + expectedResults: [ + { + contents: { + type: "term", + value: 'this"', + }, + type: "search", + }, + ], +}; + +describe("search query grammar", () => { + const grammar = Grammar.fromCompiled(compiledGrammar); + let parser: Parser; + + beforeEach(() => { + parser = new Parser(grammar); + }); + + it.each([ + This, + ThisOrThat, + QuotedReservedWord, + CaseSensitiveReservedWord, + OrOperator, + Parenthesis, + QuotedFieldTerm, + HasAttachment, + HasFolder, + HasCollection, + HasUri, + InFolder, + InCollection, + NotLogic, + AdvancedBoolean, + FunctionNot, + PartialQuoteBegin, + PartialQuoteEnd, + ])("$query", ({ query, expectedResults }) => { + parser.feed(query); + expect(parser.results.length).toEqual(expectedResults.length); + + for (let i = 0; i < parser.results.length; i++) { + expect(purgeAstPositionMarkers(parser.results[i])).toEqual(expectedResults[i]); + } + }); +}); + +function purgeAstPositionMarkers(ast: AstNodeBase): Partial { + if (ast == null) { + return ast; + } + + const result: Partial = { ...ast }; + + delete result.start; + delete result.end; + delete result.length; + + for (const key in result) { + const k = key as keyof AstNodeBase; + if (result[k] != null && typeof result[k] === "object") { + result[k] = purgeAstPositionMarkers(result[k] as any) as any; + } + } + return result; +} diff --git a/libs/common/src/vault/search/bitwarden-query-grammar.ne b/libs/common/src/vault/search/bitwarden-query-grammar.ne index cc7553912d8..6836b1c6a2e 100644 --- a/libs/common/src/vault/search/bitwarden-query-grammar.ne +++ b/libs/common/src/vault/search/bitwarden-query-grammar.ne @@ -19,7 +19,7 @@ let lexer = moo.compile({ func_website: 'website:', // Ordering functions and parameters func_order: 'order:', - param_dir: /:(?:asc|desc)/, + param_dir: {match: /:(?:asc|desc|ASC|DESC)/, value: (s: string) => s.substring(1,s.length).toLowerCase()}, // function parameter separator access: ':', // string match, includes quoted strings with escaped quotes and backslashes @@ -29,54 +29,54 @@ let lexer = moo.compile({ @lexer lexer -search -> _ OR _ {% function(d) { return { type: 'search', d: d, contents: d[1], start: d[1].start, end: d[1].end, length: d[1].length } } %} +search -> _ OR _ {% function(d) { return { type: 'search', contents: d[1], start: d[1].start, end: d[1].end, length: d[1].length } } %} -PARENTHESES -> %lparen _ OR _ %rparen {% function(d) { const start = d[0].offset; const end = d[4].offset; return { type: 'parentheses', inner: d[2], d: d, start, end, length: end - start + 1 } } %} +PARENTHESES -> %lparen _ OR _ %rparen {% function(d) { const start = d[0].offset; const end = d[4].offset; return { type: 'parentheses', inner: d[2], start, end, length: end - start + 1 } } %} | TERM {% id %} -AND -> AND _ %AND _ PARENTHESES {% function(d) { return { type: 'and', left: d[0], right: d[4], d: d, start: d[0].start, end: d[4].end, length: d[4].end - d[0].start + 1 } } %} - | AND _ PARENTHESES {% function(d) { return { type: 'and', left: d[0], right: d[2], d: d, start: d[0].start, end: d[2].end, length: d[2].end - d[0].start + 1 }} %} +AND -> AND _ %AND _ PARENTHESES {% function(d) { return { type: 'and', left: d[0], right: d[4], start: d[0].start, end: d[4].end, length: d[4].end - d[0].start + 1 } } %} + | AND _ PARENTHESES {% function(d) { return { type: 'and', left: d[0], right: d[2], start: d[0].start, end: d[2].end, length: d[2].end - d[0].start + 1 }} %} | PARENTHESES {% id %} -OR -> OR _ %OR _ AND {% function(d) { return { type: 'or', left: d[0], right: d[4], d: d, start: d[0].start, end: d[4].end, length: d[4].end - d[0].start + 1 } } %} +OR -> OR _ %OR _ AND {% function(d) { return { type: 'or', left: d[0], right: d[4], start: d[0].start, end: d[4].end, length: d[4].end - d[0].start + 1 } } %} | AND {% id %} TERM -> # naked string search term, search all fields - %string {% function(d) { const start = d[0].offset; const end = d[0].offset + d[0].value.length; return { type: 'term', value: d[0].value, d: d[0], start, end, length: d[0].value.length } } %} + %string {% function(d) { const start = d[0].offset; const end = d[0].offset + d[0].value.length; return { type: 'term', value: d[0].value, start, end, length: d[0].value.length } } %} # specified field search term - | %string %access %string {% function(d) { const start = d[0].offset; const end = d[2].offset + d[2].value.length; return { type: 'field term', field: d[0].value, term: d[2].value, d: d, start, end, length: end - start + 1 } } %} + | %string %access %string {% function(d) { const start = d[0].offset; const end = d[2].offset + d[2].value.length; return { type: 'field term', field: d[0].value, term: d[2].value, start, end, length: end - start + 1 } } %} # only items with attachments - | %func_has "attachment" {% function(d) { const start = d[0].offset; const length = 14; return { type: 'hasAttachment', d: d, start, end: d[0].offset + length, length } } %} + | %func_has "attachment" {% function(d) { const start = d[0].offset; const length = 14; return { type: 'hasAttachment', start, end: d[0].offset + length, length } } %} # only items with URIs - | %func_has "uri" {% function(d) { const start = d[0].offset; const length = 7; return { type: 'hasUri', d: d, start, end: d[0].offset + length, length } } %} + | %func_has "uri" {% function(d) { const start = d[0].offset; const length = 7; return { type: 'hasUri', start, end: d[0].offset + length, length } } %} # only items assigned to a folder - | %func_has "folder" {% function(d) { const start = d[0].offset; const length = 10; return { type: 'hasFolder', d: d, start, end: d[0].offset + length, length } } %} + | %func_has "folder" {% function(d) { const start = d[0].offset; const length = 10; return { type: 'hasFolder', start, end: d[0].offset + length, length } } %} # only items assigned to a collection - | %func_has "collection" {% function(d) { const start = d[0].offset; const length = 14; return { type: 'hasCollection', d: d, start, end: d[0].offset + length, length } } %} + | %func_has "collection" {% function(d) { const start = d[0].offset; const length = 14; return { type: 'hasCollection', start, end: d[0].offset + length, length } } %} # only items assigned to a specified folder - | %func_in "folder" %access %string {% function(d) { const start = d[0].offset; const end = d[3].offset + d[3].value.length; return { type: 'inFolder', folder: d[3].value, d: d, start, end, length: end - start } } %} + | %func_in "folder" %access %string {% function(d) { const start = d[0].offset; const end = d[3].offset + d[3].value.length; return { type: 'inFolder', folder: d[3].value, start, end, length: end - start } } %} # only items assigned to a specified collection - | %func_in "collection" %access %string {% function(d) { const start = d[0].offset; const end = d[3].offset + d[3].value.length; return { type: 'inCollection', collection: d[3].value, d: d, start, end, length: end - start + 1 } } %} + | %func_in "collection" %access %string {% function(d) { const start = d[0].offset; const end = d[3].offset + d[3].value.length; return { type: 'inCollection', collection: d[3].value, start, end, length: end - start + 1 } } %} # only items assigned to a specified organization - | %func_in "org" %access %string {% function(d) { const start = d[0].offset; const end = d[3].offset + d[3].value.length; return { type: 'inOrg', org: d[3].value, d: d, start, end, length: end - start + 1 } } %} + | %func_in "org" %access %string {% function(d) { const start = d[0].offset; const end = d[3].offset + d[3].value.length; return { type: 'inOrg', org: d[3].value, start, end, length: end - start + 1 } } %} # only items in personal vault - | %func_in "my_vault" {% function(d) { const start = d[0].offset; const length = 11; return { type: 'inMyVault', d: d, start, end: start + length, length } } %} + | %func_in "my_vault" {% function(d) { const start = d[0].offset; const length = 11; return { type: 'inMyVault', start, end: start + length, length } } %} # only items in trash - | %func_in "trash" {% function(d) { const start = d[0].offset; const length = 8; return { type: 'inTrash', d: d, start, end: start + length, length } } %} + | %func_in "trash" {% function(d) { const start = d[0].offset; const length = 8; return { type: 'inTrash', start, end: start + length, length } } %} # only items marked as favorites - | %func_is "favorite" {% function(d) { const start = d[0].offset; const length = 11; return { type: 'isFavorite', d: d, start, end: start + length, length } } %} + | %func_is "favorite" {% function(d) { const start = d[0].offset; const length = 11; return { type: 'isFavorite', start, end: start + length, length } } %} # only items of given type type - | %func_type %string {% function(d) { const start = d[0].offset; const end = d[1].offset + d[1].value.length; return { type: 'type', d:d, cipherType: d[1].value, start, end, length: end - start + 1 } } %} + | %func_type %string {% function(d) { const start = d[0].offset; const end = d[1].offset + d[1].value.length; return { type: 'type', cipherType: d[1].value, start, end, length: end - start + 1 } } %} # only items with a specified website - | %func_website %string {% function(d) { const start = d[0].offset; const end = d[1].offset + d[1].value.length; return { type: 'website', d: d, website: d[1].value, start, end, length: end - start + 1 } } %} + | %func_website %string {% function(d) { const start = d[0].offset; const end = d[1].offset + d[1].value.length; return { type: 'website', website: d[1].value, start, end, length: end - start + 1 } } %} # only items with a specified website and a given match pattern - | %func_website %string %access %string {% function(d) { const start = d[0].offset; const end = d[3].offset + d[3].value.length; return { type: 'websiteMatch', d: d, website: d[1].value, matchType: d[3].value, start, end, length: end - start + 1 } } %} + | %func_website %string %access %string {% function(d) { const start = d[0].offset; const end = d[3].offset + d[3].value.length; return { type: 'websiteMatch', website: d[1].value, matchType: d[3].value, start, end, length: end - start + 1 } } %} # order by name - | %func_order %param_dir {% function(d) { const start = d[0].offset; const end = d[1].offset + d[1].value.length; return { type: 'orderBy', d: d, field: 'name', direction: d[1].value.substring(1,d[1].value.length).toLowerCase(), start, end, length: end - start + 1 } } %} + | %func_order %param_dir {% function(d) { const start = d[0].offset; const end = d[1].offset + d[1].value.length; return { type: 'orderBy', field: 'name', direction: d[1].value, start, end, length: end - start + 1 } } %} # order by a specified field - | %func_order %string %param_dir {% function(d) { const start = d[0].offset; const end = d[2].offset + d[2].value.length; return { type: 'orderBy', d: d, field: d[1].value, direction: d[2].value.substring(1,d[2].value.length).toLowerCase(), start, end, length: end - start + 1 } } %} + | %func_order %string %param_dir {% function(d) { const start = d[0].offset; const end = d[2].offset + d[2].value.length; return { type: 'orderBy', field: d[1].value, direction: d[2].value, start, end, length: end - start + 1 } } %} # Boolean NOT operator - | %NOT _ PARENTHESES {% function(d) { const start = d[0].offset; return { type: 'not', value: d[2], d: d, start, end: d[2].end, length: d[2].end - d[0].offset + 1 } } %} + | %NOT _ PARENTHESES {% function(d) { const start = d[0].offset; return { type: 'not', value: d[2], start, end: d[2].end, length: d[2].end - d[0].offset + 1 } } %} _ -> %WS:* {% function(d) {return null } %} diff --git a/libs/common/src/vault/search/bitwarden-query-grammar.ts b/libs/common/src/vault/search/bitwarden-query-grammar.ts index feaabb7a401..5fdf61f609a 100644 --- a/libs/common/src/vault/search/bitwarden-query-grammar.ts +++ b/libs/common/src/vault/search/bitwarden-query-grammar.ts @@ -40,7 +40,10 @@ let lexer = moo.compile({ func_website: "website:", // Ordering functions and parameters func_order: "order:", - param_dir: /:(?:asc|desc)/, + param_dir: { + match: /:(?:asc|desc|ASC|DESC)/, + value: (s: string) => s.substring(1, s.length).toLowerCase(), + }, // function parameter separator access: ":", // string match, includes quoted strings with escaped quotes and backslashes @@ -83,7 +86,6 @@ const grammar: Grammar = { postprocess: function (d) { return { type: "search", - d: d, contents: d[1], start: d[1].start, end: d[1].end, @@ -103,7 +105,7 @@ const grammar: Grammar = { postprocess: function (d) { const start = d[0].offset; const end = d[4].offset; - return { type: "parentheses", inner: d[2], d: d, start, end, length: end - start + 1 }; + return { type: "parentheses", inner: d[2], start, end, length: end - start + 1 }; }, }, { name: "PARENTHESES", symbols: ["TERM"], postprocess: id }, @@ -115,7 +117,6 @@ const grammar: Grammar = { type: "and", left: d[0], right: d[4], - d: d, start: d[0].start, end: d[4].end, length: d[4].end - d[0].start + 1, @@ -130,7 +131,6 @@ const grammar: Grammar = { type: "and", left: d[0], right: d[2], - d: d, start: d[0].start, end: d[2].end, length: d[2].end - d[0].start + 1, @@ -146,7 +146,6 @@ const grammar: Grammar = { type: "or", left: d[0], right: d[4], - d: d, start: d[0].start, end: d[4].end, length: d[4].end - d[0].start + 1, @@ -160,7 +159,7 @@ const grammar: Grammar = { postprocess: function (d) { const start = d[0].offset; const end = d[0].offset + d[0].value.length; - return { type: "term", value: d[0].value, d: d[0], start, end, length: d[0].value.length }; + return { type: "term", value: d[0].value, start, end, length: d[0].value.length }; }, }, { @@ -177,7 +176,6 @@ const grammar: Grammar = { type: "field term", field: d[0].value, term: d[2].value, - d: d, start, end, length: end - start + 1, @@ -190,7 +188,7 @@ const grammar: Grammar = { postprocess: function (d) { const start = d[0].offset; const length = 14; - return { type: "hasAttachment", d: d, start, end: d[0].offset + length, length }; + return { type: "hasAttachment", start, end: d[0].offset + length, length }; }, }, { @@ -199,7 +197,7 @@ const grammar: Grammar = { postprocess: function (d) { const start = d[0].offset; const length = 7; - return { type: "hasUri", d: d, start, end: d[0].offset + length, length }; + return { type: "hasUri", start, end: d[0].offset + length, length }; }, }, { @@ -208,7 +206,7 @@ const grammar: Grammar = { postprocess: function (d) { const start = d[0].offset; const length = 10; - return { type: "hasFolder", d: d, start, end: d[0].offset + length, length }; + return { type: "hasFolder", start, end: d[0].offset + length, length }; }, }, { @@ -217,7 +215,7 @@ const grammar: Grammar = { postprocess: function (d) { const start = d[0].offset; const length = 14; - return { type: "hasCollection", d: d, start, end: d[0].offset + length, length }; + return { type: "hasCollection", start, end: d[0].offset + length, length }; }, }, { @@ -231,7 +229,7 @@ const grammar: Grammar = { postprocess: function (d) { const start = d[0].offset; const end = d[3].offset + d[3].value.length; - return { type: "inFolder", folder: d[3].value, d: d, start, end, length: end - start }; + return { type: "inFolder", folder: d[3].value, start, end, length: end - start }; }, }, { @@ -248,7 +246,6 @@ const grammar: Grammar = { return { type: "inCollection", collection: d[3].value, - d: d, start, end, length: end - start + 1, @@ -266,7 +263,7 @@ const grammar: Grammar = { postprocess: function (d) { const start = d[0].offset; const end = d[3].offset + d[3].value.length; - return { type: "inOrg", org: d[3].value, d: d, start, end, length: end - start + 1 }; + return { type: "inOrg", org: d[3].value, start, end, length: end - start + 1 }; }, }, { @@ -275,7 +272,7 @@ const grammar: Grammar = { postprocess: function (d) { const start = d[0].offset; const length = 11; - return { type: "inMyVault", d: d, start, end: start + length, length }; + return { type: "inMyVault", start, end: start + length, length }; }, }, { @@ -284,7 +281,7 @@ const grammar: Grammar = { postprocess: function (d) { const start = d[0].offset; const length = 8; - return { type: "inTrash", d: d, start, end: start + length, length }; + return { type: "inTrash", start, end: start + length, length }; }, }, { @@ -293,7 +290,7 @@ const grammar: Grammar = { postprocess: function (d) { const start = d[0].offset; const length = 11; - return { type: "isFavorite", d: d, start, end: start + length, length }; + return { type: "isFavorite", start, end: start + length, length }; }, }, { @@ -305,7 +302,7 @@ const grammar: Grammar = { postprocess: function (d) { const start = d[0].offset; const end = d[1].offset + d[1].value.length; - return { type: "type", d: d, cipherType: d[1].value, start, end, length: end - start + 1 }; + return { type: "type", cipherType: d[1].value, start, end, length: end - start + 1 }; }, }, { @@ -317,7 +314,7 @@ const grammar: Grammar = { postprocess: function (d) { const start = d[0].offset; const end = d[1].offset + d[1].value.length; - return { type: "website", d: d, website: d[1].value, start, end, length: end - start + 1 }; + return { type: "website", website: d[1].value, start, end, length: end - start + 1 }; }, }, { @@ -333,7 +330,6 @@ const grammar: Grammar = { const end = d[3].offset + d[3].value.length; return { type: "websiteMatch", - d: d, website: d[1].value, matchType: d[3].value, start, @@ -353,9 +349,8 @@ const grammar: Grammar = { const end = d[1].offset + d[1].value.length; return { type: "orderBy", - d: d, field: "name", - direction: d[1].value.substring(1, d[1].value.length).toLowerCase(), + direction: d[1].value, start, end, length: end - start + 1, @@ -374,9 +369,8 @@ const grammar: Grammar = { const end = d[2].offset + d[2].value.length; return { type: "orderBy", - d: d, field: d[1].value, - direction: d[2].value.substring(1, d[2].value.length).toLowerCase(), + direction: d[2].value, start, end, length: end - start + 1, @@ -391,7 +385,6 @@ const grammar: Grammar = { return { type: "not", value: d[2], - d: d, start, end: d[2].end, length: d[2].end - d[0].offset + 1,