import { explode, runTests } from "sjs"; import { SyntaxError, ConcatStep, shouldConcat, isOperator, tokenizeRegexStep, tokenizeRegexFn, tokenizeRegex, shouldPush, toPostfixStep, toPostfix, emptyNFA, baseNFA, concat, union, zeroOrMore, oneOrMore, zeroOrOne, OPERATORS_FNS, buildNFAStep, buildNFA, allDirects, searchNFAStep, searchNFAFn, searchNFA, nodeID, unNodeID, collectTransitions, computePending, buildDFANodes, buildDFA, searchDFAStep, searchDFAFn, searchDFA, compileNFA, compileDFA, compile, } from "../src/paca.exported.mjs"; const test_shouldConcat = t => { t.start("shouldConcat()"); t.testing("do concat literal chars", () => { t.assertEq(shouldConcat("a", "b"), true); }); t.testing("but not when they're the last one", () => { t.assertEq(shouldConcat("a", undefined), false); t.assertEq(shouldConcat("b", undefined), false); }); t.testing("do not concat when starting a parens group", () => { t.assertEq(shouldConcat("(", "a"), false); t.assertEq(shouldConcat("(", "b"), false); }); t.testing("do not concat when starting a union", () => { t.assertEq(shouldConcat("|", "a"), false); t.assertEq(shouldConcat("|", "b"), false); }); t.testing("do not concat when next is special", () => { t.assertEq(shouldConcat("a", "*"), false); t.assertEq(shouldConcat("a", "|"), false); t.assertEq(shouldConcat("a", ")"), false); }); }; const test_isOperator = t => { t.start("isOperator()"); t.testing("operators and open parens are true", () => { t.assertEq(isOperator("*"), true); t.assertEq(isOperator("|"), true); t.assertEq(isOperator("+"), true); t.assertEq(isOperator("?"), true); t.assertEq(isOperator("("), true); t.assertEq(isOperator(")"), true); }); t.testing("false for everyday non-meta chars", () => { t.assertEq(isOperator("a"), false); t.assertEq(isOperator("_"), false); }); }; const test_tokenizeRegexStep = t => { t.start("tokenizeRegexStep()"); t.testing("when escaping we get whatever the char is", () => { const stepFn = tokenizeRegexStep("ab|(cd)*"); const steps = [{ char: "a", index: 0, in: { out: [], state: ConcatStep.ESCAPING, }, out: { out: ["a", { operator: "concat" }], state: ConcatStep.ACCEPTING, }, }, { char: "b", index: 1, in: { out: [], state: ConcatStep.ESCAPING, }, out: { out: ["b"], state: ConcatStep.ACCEPTING, }, }, { char: "|", index: 2, in: { out: [], state: ConcatStep.ESCAPING, }, out: { out: ["|"], state: ConcatStep.ACCEPTING, }, }, { char: "(", index: 3, in: { out: [], state: ConcatStep.ESCAPING, }, out: { out: ["("], state: ConcatStep.ACCEPTING, }, }, { char: "c", index: 4, in: { out: [], state: ConcatStep.ESCAPING, }, out: { out: ["c", {operator: "concat"}], state: ConcatStep.ACCEPTING, }, }, { char: "d", index: 5, in: { out: [], state: ConcatStep.ESCAPING, }, out: { out: ["d"], state: ConcatStep.ACCEPTING, }, }, { char: ")", index: 6, in: { out: [], state: ConcatStep.ESCAPING, }, out: { out: [")"], state: ConcatStep.ACCEPTING, }, }, { char: "*", index: 7, in: { out: [], state: ConcatStep.ESCAPING, }, out: { out: ["*"], state: ConcatStep.ACCEPTING, }, }]; for (const step of steps) { t.assertEq( stepFn(step.in, step.char, step.index), step.out, ); } }); t.testing("trailing escape errors", () => { try { t.assertEq(tokenizeRegexStep("\\")({ out: [], index: 0, state: ConcatStep.ACCEPTING, }, "\\"), null); t.assertEq(true, false); } catch (e) { const message = "bad trailing escape character: "; t.assertEq( e.message.substring(0, message.length), message, ); t.assertEq(e instanceof SyntaxError, true); } }); t.testing("escape makes it enter escaping mode", () => { const stepFn = tokenizeRegexStep("\\a\\*"); const steps = [{ char: "\\", index: 0, in: { out: [], state: ConcatStep.ACCEPTING, }, out: { out: [], state: ConcatStep.ESCAPING, }, }, { char: "a", index: 1, in: { out: [], state: ConcatStep.ESCAPING, }, out: { out: ["a", { operator: "concat" }], state: ConcatStep.ACCEPTING, }, }, { char: "\\", index: 2, in: { out: [], state: ConcatStep.ACCEPTING, }, out: { out: [], state: ConcatStep.ESCAPING, }, }, { char: "*", index: 3, in: { out: [], state: ConcatStep.ESCAPING, }, out: { out: ["*"], state: ConcatStep.ACCEPTING, }, }]; for (const step of steps) { t.assertEq( stepFn(step.in, step.char, step.index), step.out, ); } }); t.testing("operators get detected as such", () => { const stepFn = tokenizeRegexStep("ab|(cd)*"); const steps = [{ char: "a", index: 0, in: { out: [], state: ConcatStep.ACCEPTING, }, out: { out: ["a", { operator: "concat" }], state: ConcatStep.ACCEPTING, }, }, { char: "b", index: 1, in: { out: [], state: ConcatStep.ACCEPTING, }, out: { out: ["b"], state: ConcatStep.ACCEPTING, }, }, { char: "|", index: 2, in: { out: [], state: ConcatStep.ACCEPTING, }, out: { out: [{ operator: "|" }], state: ConcatStep.ACCEPTING, }, }, { char: "(", index: 3, in: { out: [], state: ConcatStep.ACCEPTING, }, out: { out: [{ operator: "(" }], state: ConcatStep.ACCEPTING, }, }, { char: "c", index: 4, in: { out: [], state: ConcatStep.ACCEPTING, }, out: { out: ["c", { operator: "concat" }], state: ConcatStep.ACCEPTING, }, }, { char: "d", index: 5, in: { out: [], state: ConcatStep.ACCEPTING, }, out: { out: ["d"], state: ConcatStep.ACCEPTING, }, }, { char: ")", index: 6, in: { out: [], state: ConcatStep.ACCEPTING, }, out: { out: [{ operator: ")" }], state: ConcatStep.ACCEPTING, }, }, { char: "*", index: 7, in: { out: [], state: ConcatStep.ACCEPTING, }, out: { out: [{ operator: "*" }], state: ConcatStep.ACCEPTING, }, }]; for (const step of steps) { t.assertEq( stepFn(step.in, step.char, step.index), step.out, ); } }); }; const test_tokenizeRegexFn = t => { t.start("tokenizeRegexFn"); t.testing("regex table", () => { const concat = { operator: "concat" }; const star = { operator: "*" }; const table = [{ in: "", expected: { out: [], state: ConcatStep.ACCEPTING, }, }, { in: "a", expected: { out: ["a"], state: ConcatStep.ACCEPTING, }, }, { in: "a*", expected: { out: ["a", star], state: ConcatStep.ACCEPTING, }, }, { in: "a*b", expected: { out: ["a", star, concat, "b"], state: ConcatStep.ACCEPTING, }, }]; for (const test of table) { t.assertEq( tokenizeRegexFn(explode(test.in)), test.expected, ); } }); }; const test_tokenizeRegex = t => { t.start("tokenizeRegex()"); t.testing("regex table", () => { const concat = { operator: "concat" }; const star = { operator: "*" }; const table = [{ in: "", expected: [] }, { in: "a", expected: [ "a" ], }, { in: "ab", expected: [ "a", concat, "b" ], }, { in: "abc", expected: [ "a", concat, "b", concat, "c" ], }, { in: "a*", expected: [ "a", star], }, { in: "ab*", expected: [ "a", concat, "b", star], }, { in: "a*b*", expected: [ "a", star, concat, "b", star], }, { in: "a|b", expected: [ "a", { operator: "|" }, "b" ], }, { in: "a(bc(d))ef", expected: [ "a", concat, { operator: "(" }, "b", concat, "c", concat, { operator: "(" }, "d", { operator: ")" }, { operator: ")" }, concat, "e", concat, "f", ], }, { in: "ab|(cd)*", expected: [ "a", concat, "b", { operator: "|" }, { operator: "(" }, "c", concat, "d", { operator: ")" }, star, ], }, { in: "(a|b)*c", expected: [ { operator: "(" }, "a", { operator: "|" }, "b", { operator: ")" }, star, concat, "c", ], }]; for (const test of table) { t.assertEq( tokenizeRegex(explode(test.in)), test.expected, ); } }); }; const test_shouldPush = t => { t.start("shouldPush()"); t.testing("always push on empty stack", () => { t.assertEq(shouldPush([], "a"), true); t.assertEq(shouldPush([], null), true); }); t.testing(`push when operator or stack top is "("`, () => { t.assertEq(shouldPush([{}], { operator: "(" }), true); t.assertEq(shouldPush([{ operator: "("}], {}), true); }); t.testing("don't push otherwise", () => { t.assertEq(shouldPush([{}], {}), false); }); }; const test_toPostfixStep = t => { t.start("toPostfixStep()"); t.testing("string literals go directly to out", () => { t.assertEq( toPostfixStep({ out: ["a"], stack: "stack" }, "t"), { out: ["a", "t"], stack: "stack" }, ); }); t.testing("parens put things on the stack", () => { t.assertEq( toPostfixStep({ out: "out", stack: [{ operator: "(" }], }, { operator: "X" }), { out: "out", stack: [ { operator: "X" }, { operator: "(" }, ]}, ); }); // FIXME: more tests }; const test_toPostfix = t => { t.start("toPostfix()"); t.testing("regex table", () => { const concat = { operator: "concat" }; const table = [{ in: [], expected: [] }, { in: [ "a" ], expected: [ "a" ], }, { in: [ "a", concat, "b", concat, "c" ], expected: [ "a", "b", concat, "c", concat ], }, { in: [ "a", { operator: "*" }], expected: [ "a", { operator: "*" }], }, { in: [ "a", { operator: "|" }, "b" ], expected: [ "a", "b", { operator: "|" } ], }, { in: [ "a", concat, "b", { operator: "|" }, { operator: "(" }, "c", concat, "d", { operator: ")" }, { operator: "*" }, ], expected: [ "a", "b", concat, "c", "d", concat, { operator: "*" }, { operator: "|" }, ], }, { in: [ { operator: "(" }, "a", { operator: "|" }, "b", { operator: ")" }, { operator: "*" }, concat, "c", ], expected: [ "a", "b", { operator: "|" }, { operator: "*" }, "c", concat, ], }]; for (const test of table) { t.assertEq( toPostfix(test.in), test.expected, ); } }); }; const test_emptyNFA = t => { t.start("emptyNFA()"); t.testing("we deterministically build out of the ID", () => { const expected = { start: 1, end: 2, nextID: 3, nodes: { 1: { direct: [ 2 ], transitions: {}, }, 2: { direct: [], transitions: {}, }, }, }; t.assertEq(emptyNFA(), expected); }); }; const test_baseNFA = t => { t.start("baseNFA()"); t.testing("the given edge connects the nodes", () => { const expected = { start: 123, end: 124, nextID: 125, nodes: { 123: { direct: [], transitions: { X: 124 }, }, 124: { direct: [], transitions: {}, }, }, }; t.assertEq(baseNFA("X", 123), expected); }); }; const test_concat = t => { t.start("concat()"); t.testing("on baseNFA() and on more complex NFAs", () => { const a = baseNFA("a", 1); const b = baseNFA("b", a.nextID); const a_ = baseNFA("a", b.nextID); const expected1 = { start: 1, end: 4, nextID: 5, nodes: { 1: { direct: [], transitions: { a: 2 }, }, 2: { direct: [ 3 ], transitions: {}, }, 3: { direct: [], transitions: { b: 4 }, }, 4: { direct: [], transitions: {}, }, }, }; t.assertEq(concat(a, b), expected1); const expected2 = { start: 1, end: 6, nextID: 7, nodes: { 1: { direct: [], transitions: { a: 2 }, }, 2: { direct: [ 3 ], transitions: {}, }, 3: { direct: [], transitions: { b: 4 }, }, 4: { direct: [ 5 ], transitions: {}, }, 5: { direct: [], transitions: { a: 6 }, }, 6: { direct: [], transitions: {}, }, }, }; t.assertEq(concat(concat(a, b), a_), expected2); }); }; const test_union = t => { t.start("union()"); t.testing("on baseNFA() and on more complex NFAs", () => { const a = baseNFA("a", 1); const b = baseNFA("b", a.nextID); const expected1 = { start: 5, end: 6, nextID: 7, nodes: { 1: { direct: [], transitions: { a: 2 }, }, 2: { direct: [ 6 ], transitions: {}, }, 3: { direct: [], transitions: { b: 4 }, }, 4: { direct: [ 6 ], transitions: {}, }, 5: { direct: [ 1, 3 ], transitions: {}, }, 6: { direct: [], transitions: {}, }, }, }; t.assertEq(union(a, b), expected1); const c = baseNFA("c", expected1.nextID); const expected2 = { start: 9, end: 10, nextID: 11, nodes: { 1: { direct: [], transitions: { a: 2 }, }, 2: { direct: [ 6 ], transitions: {}, }, 3: { direct: [], transitions: { b: 4 }, }, 4: { direct: [ 6 ], transitions: {}, }, 5: { direct: [ 1, 3 ], transitions: {}, }, 6: { direct: [ 10 ], transitions: {}, }, 7: { direct: [], transitions: { c: 8 }, }, 8: { direct: [ 10 ], transitions: {}, }, 9: { direct: [ 5, 7 ], transitions: {}, }, 10: { direct: [], transitions: {}, }, }, }; t.assertEq(union(union(a, b), c), expected2); }); }; const test_zeroOrMore = t => { t.start("zeroOrMore()"); t.testing("on baseNFA()", () => { const a = baseNFA("a", 1); const expected = { start: 3, end: 4, nextID: 5, nodes: { 1: { direct: [], transitions: { a: 2 }, }, 2: { direct: [ 1, 4 ], transitions: {}, }, 3: { direct: [ 1, 4 ], transitions: {}, }, 4: { direct: [], transitions: {}, }, }, }; t.assertEq(zeroOrMore(a), expected); }); }; const test_oneOrMore = t => { t.start("oneOrMore()"); t.testing("on baseNFA()", () => { const a = baseNFA("a", 1); const expected = { start: 1, end: 4, nextID: 5, nodes: { 1: { direct: [], transitions: { a: 2 }, }, 2: { direct: [ 3 ], transitions: {}, }, 3: { direct: [ 1, 4 ], transitions: {}, }, 4: { direct: [], transitions: {}, }, }, }; t.assertEq(oneOrMore(a), expected); }); }; const test_zeroOrOne = t => { t.start("zeroOrOne()"); t.testing("on baseNFA()", () => { const a = baseNFA("a", 1); const expected = { start: 3, end: 4, nextID: 5, nodes: { 1: { direct: [], transitions: { a: 2 }, }, 2: { direct: [ 4 ], transitions: {}, }, 3: { direct: [ 1, 4 ], transitions: {}, }, 4: { direct: [], transitions: {}, }, }, }; t.assertEq(zeroOrOne(a), expected); }); }; const test_OPERATORS_FNS = t => { t.start("OPERATORS_FNS"); const arr = []; const fn = ret => (...args) => { arr.push({ args, ret, }); return ret; }; const input = [1, 2, 3]; t.testing("star", () => { arr.splice(0); t.assertEq( OPERATORS_FNS({ zeroOrMoreFn: fn(111) })["*"](input), [1, 2, 111], ); t.assertEq(arr, [{ args: [3], ret: 111 }]); }); t.testing("plus", () => { arr.splice(0); t.assertEq( OPERATORS_FNS({ oneOrMoreFn: fn(222) })["+"](input), [1, 2, 222], ); t.assertEq(arr, [{ args: [3], ret: 222 }]); }); t.testing("question mark", () => { arr.splice(0); t.assertEq( OPERATORS_FNS({ zeroOrOneFn: fn(333) })["?"](input), [1, 2, 333], ); t.assertEq(arr, [{ args: [3], ret: 333 }]); }); t.testing("pipe", () => { arr.splice(0); t.assertEq( OPERATORS_FNS({ unionFn: fn(444) })["|"](input), [1, 444], ); t.assertEq(arr, [{ args: [2, 3], ret: 444 }]); }); t.testing("concat", () => { arr.splice(0); t.assertEq( OPERATORS_FNS({ concatFn: fn(555) })["concat"](input), [1, 555], ); t.assertEq(arr, [{ args: [2, 3], ret: 555 }]); }); }; const test_buildNFAStep = t => { t.start("buildNFAStep()"); t.testing("literal char without existing nextID", () => { const expected = [{ start: 1, end: 2, nextID: 3, nodes: { 1: { direct: [], transitions: { t: 2 }, }, 2: { direct: [], transitions: {}, }, }, }]; t.assertEq(buildNFAStep([], "t"), expected); }); t.testing("literal char with existing nextID", () => { const input = baseNFA("a", 1); const expected = [{ start: 1, end: 2, nextID: 3, nodes: { 1: { direct: [], transitions: { a: 2 }, }, 2: { direct: [], transitions: {}, }, }, }, { start: 3, end: 4, nextID: 5, nodes: { 3: { direct: [], transitions: { b: 4 }, }, 4: { direct: [], transitions: {}, }, }, }]; t.assertEq(buildNFAStep([input], "b"), expected); t.assertEq( buildNFAStep([input], "b"), [ baseNFA("a", 1), baseNFA("b", 3) ] ); }); t.testing("operator", () => { const input = [ baseNFA("a", 1), baseNFA("b", 3) ]; const expected = [{ start: 1, end: 4, nextID: 5, nodes: { 1: { direct: [], transitions: { a: 2 }, }, 2: { direct: [ 3 ], transitions: {}, }, 3: { direct: [], transitions: { b: 4 }, }, 4: { direct: [], transitions: {}, }, }, }]; t.assertEq( buildNFAStep(input, { operator: "concat" }), expected, ); }); }; const test_buildNFA = t => { t.start("buildNFA()"); t.testing("empty regex pattern", () => { const expected = { start: 1, end: 2, nextID: 3, nodes: { 1: { direct: [ 2 ], transitions: {}, }, 2: { direct: [], transitions: {}, }, }, }; t.assertEq(buildNFA([]), expected); }); t.testing("sample composed regex", () => { const regex = "(a|b)*cde"; const expected = { start: 7, end: 14, nextID: 15, nodes: { 1: { direct: [], transitions: { a: 2 }, }, 2: { direct: [ 6 ], transitions: {}, }, 3: { direct: [], transitions: { b: 4 }, }, 4: { direct: [ 6 ], transitions: {}, }, 5: { direct: [ 1, 3 ], transitions: {}, }, 6: { direct: [ 5, 8 ], transitions: {}, }, 7: { direct: [ 5, 8 ], transitions: {}, }, 8: { direct: [ 9 ], transitions: {}, }, 9: { direct: [], transitions: { c: 10 }, }, 10: { direct: [ 11 ], transitions: {}, }, 11: { direct: [], transitions: { d: 12 }, }, 12: { direct: [ 13 ], transitions: {}, }, 13: { direct: [], transitions: { e: 14 }, }, 14: { direct: [], transitions: {}, }, }, }; t.assertEq( buildNFA(toPostfix(tokenizeRegex(explode(regex)))), expected, ); }); }; const test_allDirects = t => { t.start("allDirects()"); t.testing("empty directs", () => { }); t.testing("single level", () => { }); t.testing("deep recursion", () => { }); t.testing("plenty of duplicates", () => { }); }; const test_searchNFAStep = t => { t.start("searchNFAStep()"); t.testing("FIXME", () => {}); }; const test_searchNFAFn = t => { t.start("searchNFAFn()"); t.testing("FIXME", () => {}); }; const test_searchNFA = t => { t.start("searchNFA()"); t.testing("on composed sample regex", () => { const regex = "(a|b)*c"; const nfa = buildNFA(toPostfix(tokenizeRegex(explode(regex)))); t.assertEq(searchNFA(nfa, "c"), true); t.assertEq(searchNFA(nfa, "cc"), false); t.assertEq(searchNFA(nfa, "aaabbbc"), true); }); }; const test_nodeID = t => { t.start("nodeID()"); t.testing("single number", () => { t.assertEq(nodeID([1]), "1"); }); t.testing("plenty of duplicates", () => { t.assertEq(nodeID([1, 1, 1, 1, 1, 1]), "1"); }); t.testing("unsorted input", () => { t.assertEq( nodeID([5, 5, 11, 11, 11, 3, 4, 5, 123]), "3-4-5-11-123", ); }); }; const test_unNodeID = t => { t.start("unNodeID()"); t.testing("no hyphes", () => { t.assertEq(unNodeID("123"), [123]); }); t.testing("with hyphens", () => { t.assertEq(unNodeID("1-2"), [1, 2]); t.assertEq(unNodeID("11-3-0"), [11, 3, 0]); }); }; const test_collectTransitions = t => { t.start("collectTransitions()"); t.testing("FIXME", () => {}); }; const test_computePending = t => { t.start("computePending()"); t.testing("FIXME", () => {}); }; const test_buildDFANodes = t => { t.start("buildDFANodes()"); t.testing("FIXME", () => {}); }; const test_buildDFA = t => { t.start("buildDFA()"); t.testing("FIXME", () => {}); }; const test_searchDFAStep = t => { t.start("searchDFAStep()"); t.testing("FIXME", () => {}); }; const test_searchDFAFn = t => { t.start("searchDFAFn()"); t.testing("FIXME", () => {}); }; const test_searchDFA = t => { t.start("searchDFA()"); t.testing("FIXME", () => {}); }; const test_compileNFA = t => { t.start("compileNFA()"); t.testing("empty regex", () => { const expected = { start: 1, end: 2, nextID: 3, nodes: { 1: { direct: [ 2 ], transitions: {}, }, 2: { direct: [], transitions: {}, }, }, }; t.assertEq(compileNFA(""), expected); }); t.testing("composed sample regex", () => { const nfa = compileNFA("(a|b)*c"); t.assertEq(searchNFA(nfa, "ababac"), true); t.assertEq(searchNFA(nfa, "babac"), true); t.assertEq(searchNFA(nfa, "babaca"), false); }); }; const test_compileDFA = t => { t.start("compileDFA()"); t.testing("FIXME", () => {}); }; const test_compile = t => { t.start("compile()"); t.testing("composed sample regex", () => { const matcher = compile("(a|b)*c"); t.assertEq(matcher("ababac"), true); t.assertEq(matcher("ababaca"), false); }); }; runTests([ test_shouldConcat, test_isOperator, test_tokenizeRegexStep, test_tokenizeRegexFn, test_tokenizeRegex, test_shouldPush, test_toPostfixStep, test_toPostfix, test_emptyNFA, test_baseNFA, test_concat, test_union, test_zeroOrMore, test_oneOrMore, test_zeroOrOne, test_OPERATORS_FNS, test_buildNFAStep, test_buildNFA, test_allDirects, test_searchNFAStep, test_searchNFAFn, test_searchNFA, test_nodeID, test_unNodeID, test_collectTransitions, test_computePending, test_buildDFANodes, test_buildDFA, test_searchDFAStep, test_searchDFAFn, test_searchDFA, test_compileNFA, test_compileDFA, test_compile, ]);