diff options
Diffstat (limited to 'tests')
| -rw-r--r-- | tests/paca.mjs | 881 |
1 files changed, 864 insertions, 17 deletions
diff --git a/tests/paca.mjs b/tests/paca.mjs index 6390204..7c807fe 100644 --- a/tests/paca.mjs +++ b/tests/paca.mjs @@ -1,4 +1,4 @@ -import { explode, runTests } from "sjs"; +import { explode, reduced, runTests } from "sjs"; import { SyntaxError, @@ -9,6 +9,7 @@ import { tokenizeRegexFn, tokenizeRegex, shouldPush, + findLowerPrecedenceItem, toPostfixStep, toPostfix, emptyNFA, @@ -496,6 +497,13 @@ const test_shouldPush = t => { t.assertEq(shouldPush([], null), true); }); + t.testing(`push when operator or stack top is "(" without ")"`, () => { + t.assertEq( + shouldPush([{ operator: "("}], { operator: ")"}), + false, + ); + }); + t.testing(`push when operator or stack top is "("`, () => { t.assertEq(shouldPush([{}], { operator: "(" }), true); t.assertEq(shouldPush([{ operator: "("}], {}), true); @@ -506,6 +514,49 @@ const test_shouldPush = t => { }); }; +const test_findLowerPrecedenceItem = t => { + t.start("findLowerPrecedenceItem()"); + + t.testing("open parens is always stronger", () => { + t.assertEq( + findLowerPrecedenceItem([{ operator: "("}], {}), + 0, + ); + t.assertEq( + findLowerPrecedenceItem([ + { operator: "*" }, + { operator: "concat" }, + { operator: "(" }, + { operator: "(" }, + ], { operator: "|" }), + 2, + ); + }); + + t.testing("-1 if equal precedence", () => { + t.assertEq( + findLowerPrecedenceItem([ + { operator: "*" }, + { operator: "+" }, + { operator: "?" }, + ], { operator: "concat" }), + -1, + ); + }); + + t.testing("index if lower precedence exists", () => { + t.assertEq( + findLowerPrecedenceItem([ + { operator: "*" }, + { operator: "+" }, + { operator: "?" }, + { operator: "|" }, + ], { operator: "concat" }), + 3, + ); + }); +}; + const test_toPostfixStep = t => { t.start("toPostfixStep()"); @@ -529,7 +580,171 @@ const test_toPostfixStep = t => { ); }); - // FIXME: more tests + t.testing("error when parens are mismatched", () => { + try { + toPostfixStep( + { stack: ["a"] }, + { operator: ")" }, + null, + "TOKENS", + ); + t.assertEq(true, false); + } catch (e) { + const msg = "close parens without opening: TOKENS"; + t.assertEq(e.message, msg); + t.assertEq(e instanceof SyntaxError, true); + } + }); + + t.testing("the matched parens splits the stack", () => { + t.assertEq( + toPostfixStep( + { + out: ["0"], + stack: ["a", { operator: "(" }, "b"], + }, + { operator: ")" }, + ), + { + out: ["0", "a"], + stack: ["b"], + }, + ); + t.assertEq( + toPostfixStep( + { + out: ["0"], + stack: ["a", { operator: "(" }], + }, + { operator: ")" }, + ), + { + out: ["0", "a"], + stack: [], + }, + ); + t.assertEq( + toPostfixStep( + { + out: ["0"], + stack: [{ operator: "(" }, "b"], + }, + { operator: ")" }, + ), + { + out: ["0"], + stack: ["b"], + }, + ); + }); + + t.testing("flush stack if nothing of lower precedence", () => { + t.assertEq( + toPostfixStep( + { + out: ["0"], + stack: [ + { operator: "*" }, + { operator: "+" }, + { operator: "?" }, + { operator: "concat" }, + ], + }, + { operator: "|" }, + ), + { + out: [ + "0", + { operator: "*" }, + { operator: "+" }, + { operator: "?" }, + { operator: "concat" }, + ], + stack: [{operator: "|"}], + }, + ); + }); + + t.testing("split the stack if lower precedence exists", () => { + t.assertEq( + toPostfixStep( + { + out: ["0"], + stack: [ + { operator: "*" }, + { operator: "+" }, + { operator: "?" }, + { operator: "|" }, + ], + }, + { operator: "concat" }, + ), + { + out: [ + "0", + { operator: "*" }, + { operator: "+" }, + { operator: "?" }, + ], + stack: [ + { operator: "concat" }, + { operator: "|" }, + ], + }, + ); + t.assertEq( + toPostfixStep( + { + out: ["0"], + stack: [ + { operator: "|" }, + { operator: "*" }, + { operator: "+" }, + { operator: "?" }, + ], + }, + { operator: "concat" }, + ), + { + out: [ + "0", + ], + stack: [ + { operator: "concat" }, + { operator: "|" }, + { operator: "*" }, + { operator: "+" }, + { operator: "?" }, + ], + }, + ); + t.assertEq( + toPostfixStep( + { + out: ["0"], + stack: [ + { operator: "*" }, + { operator: "+" }, + { operator: "|" }, + { operator: "?" }, + ], + }, + { operator: "concat" }, + ), + { + out: [ + "0", + { operator: "*" }, + { operator: "+" }, + ], + stack: [ + { operator: "concat" }, + { operator: "|" }, + { operator: "?" }, + ], + }, + ); + }); }; const test_toPostfix = t => { @@ -594,6 +809,13 @@ const test_toPostfix = t => { "c", concat, ], + }, { + in: [ + { operator: "(" }, + { operator: ")" }, + ], + expected: [ + ], }]; for (const test of table) { t.assertEq( @@ -823,7 +1045,7 @@ const test_zeroOrMore = t => { transitions: { a: 2 }, }, 2: { - direct: [ 1, 4 ], + direct: [ 3 ], transitions: {}, }, 3: { @@ -1107,7 +1329,7 @@ const test_buildNFA = t => { transitions: {}, }, 6: { - direct: [ 5, 8 ], + direct: [ 7 ], transitions: {}, }, 7: { @@ -1155,28 +1377,135 @@ const test_allDirects = t => { t.start("allDirects()"); t.testing("empty directs", () => { + const input = { + 1: {}, + 2: {}, + 3: { direct: [] }, + 4: {}, + 5: {}, + }; + t.assertEq(allDirects(3, input), [3]); }); t.testing("single level", () => { + const input = { + 1: { direct: [3] }, + 2: { direct: [4] }, + 3: { direct: [] }, + 4: { direct: [] }, + }; + t.assertEq(allDirects(1, input), [3]); }); t.testing("deep recursion", () => { + const input = { + 1: { direct: [2] }, + 2: { direct: [3] }, + 3: { direct: [4] }, + 4: { direct: [5] }, + 5: { direct: [] }, + }; + t.assertEq(allDirects(1, input), [5]); + t.assertEq(allDirects(2, input), [5]); + t.assertEq(allDirects(3, input), [5]); + t.assertEq(allDirects(4, input), [5]); + t.assertEq(allDirects(5, input), [5]); }); t.testing("plenty of duplicates", () => { + const input = { + 1: { direct: [2, 3, 4] }, + 2: { direct: [3, 4, 5] }, + 3: { direct: [4, 5, 6] }, + 4: { direct: [5, 6] }, + 5: { direct: [] }, + 6: { direct: [] }, + }; + t.assertEq(allDirects(1, input), [5, 6]); }); }; const test_searchNFAStep = t => { t.start("searchNFAStep()"); - t.testing("FIXME", () => {}); + t.testing("empty on empty input", () => { + t.assertEq(searchNFAStep({})([], null), []); + }); + + t.testing("empty on empty transitions", () => { + const nodes = { + 1: { transitions: {} }, + 2: { transitions: {} }, + 3: { transitions: {} }, + }; + t.assertEq(searchNFAStep(nodes)([1, 2, 3], "a"), []); + }); + + t.testing("empty on non-matching transitions", () => { + const nodes = { + 1: { transitions: { b: 2 } }, + 2: { transitions: { c: 3 } }, + 3: { transitions: { d: 1 } }, + }; + t.assertEq(searchNFAStep(nodes)([1, 2, 3], "a"), []); + }); + + t.testing("leaves given by allDirects() otherwise", () => { + const nodes = { + 1: { + direct: [], + transitions: { a: 5 }, + }, + 2: { + direct: [], + transitions: { b: 5 }, + }, + 3: { + direct: [], + transitions: { a: 4 }, + }, + 4: { + direct: [6, 7, 8], + transitions: {}, + }, + 5: { + direct: [], + transitions: { a: 2 }, + }, + 6: { + direct: [], + transitions: { b: 7 }, + }, + 7: { + direct: [], + transitions: { c: 8 }, + }, + 8: { + direct : [], + transitions: {}, + }, + }; + t.assertEq(searchNFAStep(nodes)([1], "a"), [5]); + t.assertEq(searchNFAStep(nodes)([1], "b"), []); + t.assertEq(searchNFAStep(nodes)([3, 2, 1], "a"), [5, 6, 7, 8]); + t.assertEq(searchNFAStep(nodes)([5, 6, 7, 8], "a"), [2]); + t.assertEq(searchNFAStep(nodes)([5, 6, 7, 8], "b"), [7]); + t.assertEq(searchNFAStep(nodes)([5, 6, 7, 8], "c"), [8]); + t.assertEq(searchNFAStep(nodes)([5, 6, 7, 8], "d"), []); + }); }; const test_searchNFAFn = t => { t.start("searchNFAFn()"); - t.testing("FIXME", () => {}); + t.testing("on composed sample regex", () => { + const regex = "(a|b)*c"; + const nfa = buildNFA(toPostfix(tokenizeRegex(explode(regex)))); + t.assertEq(searchNFAFn(nfa, "c"), [10]); + t.assertEq(searchNFAFn(nfa, "cc"), []); + t.assertEq(searchNFAFn(nfa, "aaabbbc"), [10]); + t.assertEq(searchNFAFn(nfa, "a"), [1, 3, 9]); + }); }; const test_searchNFA = t => { @@ -1188,6 +1517,9 @@ const test_searchNFA = t => { t.assertEq(searchNFA(nfa, "c"), true); t.assertEq(searchNFA(nfa, "cc"), false); t.assertEq(searchNFA(nfa, "aaabbbc"), true); + t.assertEq(searchNFA(nfa, "ababac"), true); + t.assertEq(searchNFA(nfa, "babac"), true); + t.assertEq(searchNFA(nfa, "babaca"), false); }); }; @@ -1226,43 +1558,368 @@ const test_unNodeID = t => { const test_collectTransitions = t => { t.start("collectTransitions()"); - t.testing("FIXME", () => {}); + t.testing("empty values", () => { + t.assertEq(collectTransitions({}, []), {}); + t.assertEq(collectTransitions({ a: 1 }, []), {}); + }); + + t.testing("no transitions in states", () => { + t.assertEq( + collectTransitions( + { + 1: { transitions: {} }, + 2: { transitions: {} }, + 3: { transitions: {} }, + }, + [1, 3], + ), + {}, + ); + }); + + t.testing("error on transitions have more than one possibility", () => { + try { + collectTransitions( + { 1: { transitions: { b: 2, c: 3 }}}, + [1], + ); + t.assertEq(true, false); + } catch (e) { + const msg = + "unexpected transitions with more than 1 key"; + t.assertEq(e.message, msg); + } + }); + + t.testing("many values grouped under a single transition key", () => { + const nodes = { + 1: { + direct: [], + transitions: { a: 2 }, + }, + 2: { + direct: [], + transitions: { a: 3 }, + }, + 3: { + direct: [], + transitions: { a: 4 }, + }, + 4: { + direct: [], + transitions: {}, + }, + }; + t.assertEq(collectTransitions(nodes, []), {}); + t.assertEq(collectTransitions(nodes, [1]), { a: [2]}); + t.assertEq(collectTransitions(nodes, [1, 2]), { a: [2, 3]}); + t.assertEq( + collectTransitions(nodes, [1, 2, 3]), + { a: [2, 3, 4]}, + ); + t.assertEq( + collectTransitions(nodes, [1, 2, 3, 4]), + { a: [2, 3, 4]}, + ); + }); + + t.testing("transition with multiple direct values", () => { + const nodes = { + 1: { + direct: [], + transitions: { a: 2 }, + }, + 2: { + direct: [ 3 ], + transitions: {}, + }, + 3: { + direct: [ 4 ], + transitions: {}, + }, + 4: { + direct: [], + transitions: { a: 2 }, + }, + }; + t.assertEq(collectTransitions(nodes, [1]), { a: [4]}); + t.assertEq(collectTransitions(nodes, [1, 2, 3, 4]), { a: [4]}); + }); }; const test_computePending = t => { t.start("computePending()"); - t.testing("FIXME", () => {}); + t.testing("empty values", () => { + t.assertEq(computePending([], []), []) + t.assertEq(computePending([["IGNORED"]], []), []) + }); + + t.testing("deduped states", () => { + t.assertEq( + computePending( + [null, [1, 2], [2, 3], [1, 3], [1, 2]], + [], + ), + [[1, 2], [1, 3], [2, 3]], + ); + t.assertEq( + computePending( + [null, [1, 2], [1, 3]], + [[2, 3], [1, 2], [1, 3]], + ), + [[1, 2], [1, 3], [2, 3]], + ); + }); }; const test_buildDFANodes = t => { t.start("buildDFANodes()"); - t.testing("FIXME", () => {}); + t.testing("empty values", () => { + t.assertEq(buildDFANodes(null, null, [], "RET"), "RET"); + }); + + t.testing("ignore states that were already built", () => { + const nodes = { + "1-2-3": true, + }; + const states = [[1, 2, 3], [3, 2, 1]]; + t.assertEq(buildDFANodes(null, null, states, nodes), nodes); + }); + + t.testing("recursively build the DFA", () => { + const end = 4; + const nfaNodes = { + 1: { + direct: [], + transitions: { a: 2 }, + }, + 2: { + direct: [ 1, 3 ], + transitions: {}, + }, + 3: { + direct: [], + transitions: { b: 4 }, + }, + 4: { + direct: [], + transitions: {}, + }, + }; + const pending = [[1], [1, 2], [1, 3, 4], [4], [1, 2]]; + const dfaNodes = {}; + const expected = { + "1": { + end: false, + transitions: { a: "1-3" }, + }, + "1-2": { + end: false, + transitions: { a: "1-3" }, + }, + "1-3": { + end: false, + transitions: { a: "1-3", b: "4" } + }, + "1-3-4": { + end: true, + transitions: { a: "1-3", b: "4" }, + }, + "4": { + end: true, + transitions: {}, + }, + }; + t.assertEq( + buildDFANodes(end, nfaNodes, pending, dfaNodes), + expected, + ); + }); }; const test_buildDFA = t => { t.start("buildDFA()"); - t.testing("FIXME", () => {}); + t.testing("minimal NFA", () => { + const nfa = { + start: 1, + end: 2, + nextID: 3, + nodes: { + 1: { + direct: [ 2 ], + transitions: {}, + }, + 2: { + direct: [], + transitions: {}, + }, + }, + }; + const expected = { + start: "2", + nodes: { + "2": { + end: true, + transitions: {}, + }, + }, + }; + t.assertEq(buildDFA(nfa), expected); + }); + + t.testing("sample regex NFA", () => { + const nfa = { + start: 7, + end: 10, + nextID: 11, + nodes: { + 1: { + direct: [], + transitions: { a: 2 }, + }, + 2: { + direct: [ 6 ], + transitions: {}, + }, + 3: { + direct: [], + transitions: { b: 4 }, + }, + 4: { + direct: [ 6 ], + transitions: {}, + }, + 5: { + direct: [ 1, 3 ], + transitions: {}, + }, + 6: { + direct: [ 7 ], + transitions: {}, + }, + 7: { + direct: [ 5, 8 ], + transitions: {}, + }, + 8: { + direct: [ 9 ], + transitions: {}, + }, + 9: { + direct: [], + transitions: { c: 10 }, + }, + 10: { + direct: [], + transitions: {}, + }, + }, + }; + const expected = { + start: "1-3-9", + nodes: { + "1-3-9": { + end: false, + transitions: { + a: "1-3-9", + b: "1-3-9", + c: "10", + }, + }, + "10": { + end: true, + transitions: {}, + }, + }, + }; + t.assertEq(nfa, compileNFA("(a|b)*c")); + t.assertEq(buildDFA(nfa), expected); + }); }; const test_searchDFAStep = t => { t.start("searchDFAStep()"); - t.testing("FIXME", () => {}); + t.testing("transition must exist", () => { + const nodes = { + "1": { + transitions: { + a: "1", + b: "1-2", + c: "1-3-9", + }, + }, + "1-2": { + transitions: { + x: "1-3-9", + }, + }, + "1-3-9": { + transitions: {}, + }, + }; + const fn = searchDFAStep(nodes); + t.assertEq(fn("1", "a"), "1"); + t.assertEq(fn("1", "b"), "1-2"); + t.assertEq(fn("1", "c"), "1-3-9"); + t.assertEq(fn("1", "d"), reduced(false)); + t.assertEq(fn("1-2", "x"), "1-3-9"); + t.assertEq(fn("1-2", "y"), reduced(false)); + t.assertEq(fn("1-3-9", "a"), reduced(false)); + }); + + t.testing("error on non-existent state", () => { + try { + searchDFAStep({})("non-existent", null); + t.assertEq(true, false); + } catch (e) { + const msg = + "Cannot read properties of undefined " + + "(reading 'transitions')"; + t.assertEq(e.message, msg); + } + }); }; const test_searchDFAFn = t => { t.start("searchDFAFn()"); - t.testing("FIXME", () => {}); + t.testing("on composed sample regex", () => { + const regex = "(a|b)*c"; + const dfa = buildDFA( + buildNFA(toPostfix(tokenizeRegex(explode(regex)))), + ); + t.assertEq(searchDFAFn(dfa, "a"), "1-3-9"); + t.assertEq(searchDFAFn(dfa, "b"), "1-3-9"); + t.assertEq(searchDFAFn(dfa, "c"), "10"); + t.assertEq(searchDFAFn(dfa, "cc"), false); + t.assertEq(searchDFAFn(dfa, "aaabbbc"), "10"); + t.assertEq(searchDFAFn(dfa, "ababac"), "10"); + t.assertEq(searchDFAFn(dfa, "babac"), "10"); + t.assertEq(searchDFAFn(dfa, "babaca"), false); + }); }; const test_searchDFA = t => { t.start("searchDFA()"); - t.testing("FIXME", () => {}); + t.testing("on composed sample regex", () => { + const regex = "(a|b)*c"; + const dfa = buildDFA( + buildNFA(toPostfix(tokenizeRegex(explode(regex)))), + ); + t.assertEq(searchDFA(dfa, "a"), false); + t.assertEq(searchDFA(dfa, "b"), false); + t.assertEq(searchDFA(dfa, "c"), true); + t.assertEq(searchDFA(dfa, "cc"), false); + t.assertEq(searchDFA(dfa, "aaabbbc"), true); + t.assertEq(searchDFA(dfa, "ababac"), true); + t.assertEq(searchDFA(dfa, "babac"), true); + t.assertEq(searchDFA(dfa, "babaca"), false); + }); }; const test_compileNFA = t => { @@ -1286,6 +1943,138 @@ const test_compileNFA = t => { }; t.assertEq(compileNFA(""), expected); }); + t.testing("compiled sample regex", () => { + const nfa1 = compileNFA("(a|b)*c"); + const expected1 = { + start: 7, + end: 10, + nextID: 11, + nodes: { + 1: { + direct: [], + transitions: { a: 2 }, + }, + 2: { + direct: [ 6 ], + transitions: {}, + }, + 3: { + direct: [], + transitions: { b: 4 }, + }, + 4: { + direct: [ 6 ], + transitions: {}, + }, + 5: { + direct: [ 1, 3 ], + transitions: {}, + }, + 6: { + direct: [ 7 ], + transitions: {}, + }, + 7: { + direct: [ 5, 8 ], + transitions: {}, + }, + 8: { + direct: [ 9 ], + transitions: {}, + }, + 9: { + direct: [], + transitions: { c: 10 }, + }, + 10: { + direct: [], + transitions: {}, + }, + }, + }; + const nfa2 = compileNFA("((a+b)?c|d)*e"); + const expected2 = { + start: 15, + end: 18, + nextID: 19, + nodes: { + 1: { + direct: [], + transitions: { a: 2 }, + }, + 2: { + direct: [ 3 ], + transitions: {}, + }, + 3: { + direct: [ 1, 4 ], + transitions: {}, + }, + 4: { + direct: [ 5 ], + transitions: {}, + }, + 5: { + direct: [], + transitions: { b: 6 }, + }, + 6: { + direct: [ 8 ], + transitions: {}, + }, + 7: { + direct: [ 1, 8 ], + transitions: {}, + }, + 8: { + direct: [ 9 ], + transitions: {}, + }, + 9: { + direct: [], + transitions: { c: 10 }, + }, + 10: { + direct: [ 14 ], + transitions: {}, + }, + 11: { + direct: [], + transitions: { d: 12 }, + }, + 12: { + direct: [ 14 ], + transitions: {}, + }, + 13: { + direct: [ 7, 11 ], + transitions: {}, + }, + 14: { + direct: [ 15 ], + transitions: {}, + }, + 15: { + direct: [ 13, 16 ], + transitions: {}, + }, + 16: { + direct: [ 17 ], + transitions: {}, + }, + 17: { + direct: [], + transitions: { e: 18 }, + }, + 18: { + direct: [], + transitions: {}, + }, + }, + }; + t.assertEq(nfa1, expected1); + t.assertEq(nfa2, expected2); + }); t.testing("composed sample regex", () => { const nfa = compileNFA("(a|b)*c"); @@ -1298,16 +2087,73 @@ const test_compileNFA = t => { const test_compileDFA = t => { t.start("compileDFA()"); - t.testing("FIXME", () => {}); + t.testing("compiled sample regex", () => { + const dfa1 = compileDFA("(a|b)*c"); + const dfa2 = compileDFA("((a+b)?c|d)*e"); + const expected1 = { + start: "1-3-9", + nodes: { + "1-3-9": { + end: false, + transitions: { + a: "1-3-9", + b: "1-3-9", + c: "10", + }, + }, + "10": { + end: true, + transitions: {}, + }, + }, + }; + const expected2 = { + start: "1-9-11-17", + nodes: { + "1-5": { + end: false, + transitions: { + a: "1-5", + b: "9", + }, + }, + "1-9-11-17": { + end: false, + transitions: { + a: "1-5", + d: "1-9-11-17", + e: "18", + c: "1-9-11-17", + }, + }, + "9": { + end: false, + transitions: { + c: "1-9-11-17", + }, + }, + "18": { + end: true, + transitions: {}, + }, + }, + }; + t.assertEq(dfa1, expected1); + t.assertEq(dfa2, expected2); + }); }; const test_compile = t => { t.start("compile()"); t.testing("composed sample regex", () => { - const matcher = compile("(a|b)*c"); - t.assertEq(matcher("ababac"), true); - t.assertEq(matcher("ababaca"), false); + const matcher1 = compile("(a|b)*c"); + t.assertEq(matcher1("ababac"), true); + t.assertEq(matcher1("ababaca"), false); + + const matcher2 = compile("((a+)b)*c"); + t.assertEq(matcher2("aaababc"), true); + t.assertEq(matcher2("aaabbc"), false); }); }; @@ -1320,6 +2166,7 @@ runTests([ test_tokenizeRegexFn, test_tokenizeRegex, test_shouldPush, + test_findLowerPrecedenceItem, test_toPostfixStep, test_toPostfix, test_emptyNFA, |
