import { explode, reduced, reductions, runTests } from "sjs"; import { SyntaxError, ValueError, InternalError, ConcatStep, shouldConcat, isOperator, numFromDigits, escapingStateStep, rangeStateStep, classStateStep, TRANSITION_FNS, ANCHOR_FNS, isAnchor, isTransition, tokenizeRegexStep, tokenizeRegexFn, tokenizeRegex, shouldPush, findLowerPrecedenceItem, toPostfixStep, toPostfix, emptyNFA, literal, concat, union, zeroOrMore, oneOrMore, zeroOrOne, compareRange, compressRangeStep, compressCharacterRanges, inRange, characterClass, wildcard, caret, dollar, OPERATORS_FNS, baseNFA, buildNFAStep, buildNFA, allDirects, interpretMetacharacter, performTransition, searchNFAStep, searchNFAFn, searchNFA, nodeID, unNodeID, collectTransitions, computePending, toDFANodes, toDFA, searchDFAStep, searchDFAFn, searchDFA, parse, compileNFA, compileDFA, compile, } from "../src/paca.exported.mjs"; const test_shouldConcat = t => { t.start("shouldConcat()"); t.testing("do concat literal chars", () => { t.assertEq(shouldConcat("a", "b"), true); }); t.testing("but not when they're the last one", () => { t.assertEq(shouldConcat("a", undefined), false); t.assertEq(shouldConcat("b", undefined), false); }); t.testing("do not concat when starting a parens group", () => { t.assertEq(shouldConcat("(", "a"), false); t.assertEq(shouldConcat("(", "b"), false); }); t.testing("do not concat when starting a union", () => { t.assertEq(shouldConcat("|", "a"), false); t.assertEq(shouldConcat("|", "b"), false); }); t.testing("do not concat when next is special", () => { t.assertEq(shouldConcat("a", "*"), false); t.assertEq(shouldConcat("a", "|"), false); t.assertEq(shouldConcat("a", ")"), false); }); t.testing("only consider the `next` char", () => { t.assertEq(shouldConcat(null, "\\"), true); }); }; const test_isOperator = t => { t.start("isOperator()"); t.testing("operators and open parens are true", () => { t.assertEq(isOperator("*"), true); t.assertEq(isOperator("|"), true); t.assertEq(isOperator("+"), true); t.assertEq(isOperator("?"), true); t.assertEq(isOperator("("), true); t.assertEq(isOperator(")"), true); }); t.testing("false for everyday non-meta chars", () => { t.assertEq(isOperator("a"), false); t.assertEq(isOperator("_"), false); }); }; const test_numFromDigits = t => { t.start("numFromDigits()"); t.testing("-1 on empty array", () => { t.assertEq(numFromDigits([]), -1); }); t.testing("the number from the digits", () => { t.assertEq(numFromDigits([ "0" ]), 0); t.assertEq(numFromDigits([ "1" ]), 1); t.assertEq(numFromDigits([ "0", "1" ]), 1); t.assertEq(numFromDigits([ "1", "2", "3" ]), 123); }); }; const test_escapingStateStep = t => { t.start("escapingStateStep()"); t.testing("add a concat when applicable", () => { const given = escapingStateStep( { out: [ 1, 2, 3 ] }, "*", null, "b", ); const expected = { out: [ 1, 2, 3, "*", { operator: "concat" } ], state: "accepting", context: undefined, }; t.assertEq(given, expected); }); t.testing("without a concat when not applicable", () => { const given = escapingStateStep( { out: [ 1, 2, 3 ] }, "$", null, ")", ); const expected = { out: [ 1, 2, 3, "$" ], state: "accepting", context: undefined, }; t.assertEq(given, expected); }); t.testing("error when escaping a non-escapeable char", () => { const { value: { error }} = escapingStateStep( {}, "a", null, null, ); t.assertEq(error.message, "unknown escape sequence: \\a"), t.assertEq(error instanceof SyntaxError, true); }); }; const test_rangeStateStep = t => { t.start("rangeStateStep()"); t.testing("error when comma is missing", () => { const { value: { error }} = rangeStateStep( { context: { where: "from" }}, "}", null, null, ); t.assertEq(error.message, "missing comma in range operator"); t.assertEq(error instanceof SyntaxError, true); }); t.testing("error when range decreases", () => { const { value: { error }} = rangeStateStep( { context: { from: [ "3" ], to: [ "1" ], where: "to", }, }, "}", null, null, ); t.assertEq(error.message, "bad range values: {3,1}"); t.assertEq(error instanceof ValueError, true); }); t.testing("OK when \"to\" is unbounded", () => { const given = rangeStateStep( { out: [ 1, 2, 3 ], context: { from: [ "1", "0" ], to: [], where: "to", }, }, "}", ); const expected = { out: [ 1, 2, 3, { operator: "range", from: 10, to: -1, }], state: "accepting", context: null, }; t.assertEq(given, expected); }); t.testing("error when we have more than 1 comma", () => { const { value: { error }} = rangeStateStep( { context: { where: "to" } }, ",", null, null, ); t.assertEq( error.message, "extraneous comma in range expression", ); t.assertEq(error instanceof SyntaxError, true); }); t.testing("a comma change the `where` - `from` => `to`", () => { const given = rangeStateStep( { out: 123, state: 321, context: { x: 1, y: 2, where: "ignored" }, }, ",", null, null ); const expected = { out: 123, state: 321, context: { x: 1, y: 2, where: "to", }, }; t.assertEq(given, expected); }); t.testing("error on non-numeric component chars", () => { const { value: { error }} = rangeStateStep({}, "a", null, null); t.assertEq(error.message, "bad char in range expression: a"); t.assertEq(error instanceof SyntaxError, true); }); t.testing("digit gets added to the key that `where` indicates", () => { const given1 = rangeStateStep( { out: 1, state: 2, context: { a: "bc", from: [ "1", "3" ], to: [], where: "from", }, }, "7", null, null, ); const expected1 = { out: 1, state: 2, context: { a: "bc", from: [ "1", "3", "7" ], to: [], where: "from", }, }; t.assertEq(given1, expected1); const given2 = rangeStateStep( { context: { to: [], where: "to", }, }, "0", null, null, ); const expected2 = { out: undefined, state: undefined, context: { to: [ "0" ], where: "to" , }, }; t.assertEq(given2, expected2); }); }; const test_classStateStep = t => { t.start("classStateStep()"); t.testing("error when range is unfinished", () => { const { value: { error }} = classStateStep( { context: { range: { where: "to" }}}, "]", null, null, ); t.assertEq(error.message, "unfinished character class range"); t.assertEq(error instanceof SyntaxError, true); }); t.testing("error when class is empty", () => { const { value: { error }} = classStateStep( { context: { range: {}, set: [] }}, "]", null, null, ); t.assertEq(error.message, "empty character class"); t.assertEq(error instanceof ValueError, true); }); t.testing("OK when class in non-empty", () => { const given = classStateStep( { out: [ 1, 2, 3 ], context: { range: {}, set: [ 4, 5, 6 ], }, }, "]", null, null, ); const expected = { out: [ 1, 2, 3, { meta: "class", set: [ 4, 5, 6 ], caret: false, }], state: "accepting", context: null, }; t.assertEq(given, expected); }); t.testing("error on descending range", () => { const { value: { error }} = classStateStep( { context: { range: { from: "c", where: "to" }}}, "b", null, null, ); const message = "bad class range values: [c-b]"; t.assertEq(error.message, message); t.assertEq(error instanceof ValueError, true); }); t.testing("OK when adding ending to range", () => { const given = classStateStep( { context: { range: { from: "a", where: "to", }, set: [ "a", "b" ], x: 1, }, }, "z", null, null, ); const expected = { out: undefined, state: undefined, context: { range: { from: null, where: "from", }, set: [ "a", "b", { from: "a", to: "z" }], x: 1, }, }; t.assertEq(given, expected); }); t.testing("a backslash enters escaping state", () => { const given = classStateStep( { context: { what: "ever" }}, "\\", null, null, ); const expected = { out: undefined, state: undefined, context: { what: "ever", escaping: true, }, }; t.assertEq(given, expected); }); t.testing("when escaping, special chars get added to the set", () => { const given = classStateStep( { context: { set: [ "a" ], escaping: true }}, "]", null, null, ); const expected = { out: undefined, state: undefined, context: { set: [ "a", "]" ] }, }; t.assertEq(given, expected); }); t.testing("a hyphen changes the last char as a range start", () => { const given = classStateStep( { context: { range: "IGNORED", set: [ "0" ], x: 1, }, }, "-", null, null, ); const expected = { out: undefined, state: undefined, context: { range: { from: "0", where: "to", }, set: [], x: 1, }, }; t.assertEq(given, expected); }); t.testing("hyphen as the first char is taken literally", () => { const given = classStateStep( { context: { range: {}, set: [], x: 1, }, }, "-", null, null, ); const expected = { out: undefined, state: undefined, context: { range: {}, set: [ "-" ], x: 1, }, }; t.assertEq(given, expected); }); t.testing("caret as the first char toggles the boolean", () => { const given = classStateStep( { context: { x: 1, set: [], range: {}}}, "^", null, null, ); const expected = { out: undefined, state: undefined, context: { x: 1, set: [], range: {}, caret: true, }, }; t.assertEq(given, expected); }); t.testing("other chars are just added to the set", () => { const given = classStateStep( { context: { x: 1, set: [], range: {}}}, "_", null, null, ); const expected = { out: undefined, state: undefined, context: { x: 1, set: [ "_" ], range: {}, }, }; t.assertEq(given, expected); }); t.testing("caret as not the first char is taken literally", () => { // FIXME }); }; const test_TRANSITION_FNS = t => { t.start("TRANSITION_FNS"); t.testing("\"\\\\\" depends on `out` and `context`", () => { t.assertEq( TRANSITION_FNS["\\"]( { out: 1, context: 2 }, null, null, null, ), { out: 1, context: 2, state: "escaping" }, ); }); t.testing("\"{\" only depends on `out`", () => { t.assertEq( TRANSITION_FNS["{"]({ out: "" }, null, null, null), { out: "", state: "range", context: { from: [], to: [], where: "from", }, }, ); }); t.testing("\"[\" only depends on `out`", () => { t.assertEq( TRANSITION_FNS["["]({ out: 123 }, null, null, null), { out: 123, state: "class", context: { set: [], range: { from: null, where: "from", }, }, }, ); }); }; const test_ANCHOR_FNS = t => { t.start("ANCHOR_FNS"); t.testing(`"^" error when not the first char`, () => { const { value: { error }} = ANCHOR_FNS["^"]({}, null, 1, null); const message = "^ not at the start of the expression" t.assertEq(error.message, message); t.assertEq(error instanceof SyntaxError, true); }); t.testing(`"$" error when not the last char`, () => { const { value: { error }} = ANCHOR_FNS["$"]( {}, null, null, "a", ); const message = "$ not at the end of the expression"; t.assertEq(error.message, message); t.assertEq(error instanceof SyntaxError, true); }); t.testing("caret operator gets added to output", () => { const given = ANCHOR_FNS["^"]({ out: [ 1 ] }, null, 0, null); const expected = { out: [ 1, { meta: "^" }, { operator: "concat" } ], state: undefined, context: undefined, }; t.assertEq(given, expected); }); t.testing("dollar operator gets added to output", () => { const given = ANCHOR_FNS["$"]( { out: [ 2 ] }, null, null, undefined, ); const expected = { out: [ 2, { meta: "$" } ], state: undefined, context: undefined, }; t.assertEq(given, expected); }); }; const test_isAnchor = t => { t.start("isAnchor()"); t.testing("anchors are true", () => { t.assertEq(isAnchor("^"), true); t.assertEq(isAnchor("$"), true); }); t.testing("false for everything else", () => { t.assertEq(isAnchor("*"), false); t.assertEq(isAnchor("\\"), false); t.assertEq(isAnchor("a"), false); t.assertEq(isAnchor("_"), false); }); }; const test_isTransition = t => { t.start("isTransition()"); t.testing("transition chars are true", () => { t.assertEq(isTransition("\\"), true); t.assertEq(isTransition("["), true); t.assertEq(isTransition("{"), true); }); t.testing("false for everything else", () => { t.assertEq(isTransition("."), false); t.assertEq(isTransition("*"), false); t.assertEq(isTransition("a"), false); }); }; const test_tokenizeRegexStep = t => { t.start("tokenizeRegexStep()"); const cat = { operator: "concat" }; const union = { operator: "|" }; const oparen = { operator: "(" }; const cparen = { operator: ")" }; const star = { operator: "*" }; const caret = { meta: "^" }; const dollar = { meta: "$" }; t.testing("when escaping we get whatever the char is", () => { const regex = "ab\\|\\(cd\\)\\*"; const stepFn = tokenizeRegexStep(regex); const steps = [{ out: [], state: ConcatStep.ACCEPTING, context: null, }, { out: ["a", cat], state: ConcatStep.ACCEPTING, context: null, }, { out: ["a", cat, "b", cat], state: ConcatStep.ACCEPTING, context: null, }, { out: ["a", cat, "b", cat], state: ConcatStep.ESCAPING, context: null, }, { out: ["a", cat, "b", cat, "|", cat], state: ConcatStep.ACCEPTING, context: null, }, { out: ["a", cat, "b", cat, "|", cat], state: ConcatStep.ESCAPING, context: null, }, { out: ["a", cat, "b", cat, "|", cat, "(", cat], state: ConcatStep.ACCEPTING, context: null, }, { out: [ "a", cat, "b", cat, "|", cat, "(", cat, "c", cat, ], state: ConcatStep.ACCEPTING, context: null, }, { out: [ "a", cat, "b", cat, "|", cat, "(", cat, "c", cat, "d", cat, ], state: ConcatStep.ACCEPTING, context: null, }, { out: [ "a", cat, "b", cat, "|", cat, "(", cat, "c", cat, "d", cat, ], state: ConcatStep.ESCAPING, context: null, }, { out: [ "a", cat, "b", cat, "|", cat, "(", cat, "c", cat, "d", cat, ")", cat, ], state: ConcatStep.ACCEPTING, context: null, }, { out: [ "a", cat, "b", cat, "|", cat, "(", cat, "c", cat, "d", cat, ")", cat, ], state: ConcatStep.ESCAPING, context: null, }, { out: [ "a", cat, "b", cat, "|", cat, "(", cat, "c", cat, "d", cat, ")", cat, "*", ], state: ConcatStep.ACCEPTING, context: null, }]; const given = reductions( steps, (acc, el, i) => { const ret = stepFn(acc, regex[i], i); t.assertEq(ret, el); return ret; }, ); t.assertEq(given, steps); }); t.testing("operators get detected as such", () => { const regex = "ab|(cd)*"; const stepFn = tokenizeRegexStep(regex); const steps = [{ out: [], state: ConcatStep.ACCEPTING, context: null, }, { out: ["a", cat], state: ConcatStep.ACCEPTING, context: null, }, { out: ["a", cat, "b"], state: ConcatStep.ACCEPTING, context: null, }, { out: ["a", cat, "b", union], state: ConcatStep.ACCEPTING, context: null, }, { out: ["a", cat, "b", union, oparen], state: ConcatStep.ACCEPTING, context: null, }, { out: ["a", cat, "b", union, oparen, "c", cat], state: ConcatStep.ACCEPTING, context: null, }, { out: ["a", cat, "b", union, oparen, "c", cat, "d"], state: ConcatStep.ACCEPTING, context: null, }, { out: [ "a", cat, "b", union, oparen, "c", cat, "d", cparen, ], state: ConcatStep.ACCEPTING, context: null, }, { out: [ "a", cat, "b", union, oparen, "c", cat, "d", cparen, star ], state: ConcatStep.ACCEPTING, context: null, }]; const given = reductions( steps, (acc, el, i) => { const ret = stepFn(acc, regex[i], i); t.assertEq(ret, el); return ret; }, ); t.assertEq(given, steps); }); t.testing("anchors get detected as such", () => { const regex = "^[behilos]*$"; const stepFn = tokenizeRegexStep(regex); const steps = [{ out: [], state: ConcatStep.ACCEPTING, context: null, }, { out: [caret, cat], state: ConcatStep.ACCEPTING, context: null, }, { out: [caret, cat], state: ConcatStep.CLASS, context: { range: { from: null, where: "from", }, set: [], }, }, { out: [caret, cat], state: ConcatStep.CLASS, context: { range: { from: null, where: "from", }, set: ["b"], }, }, { out: [caret, cat], state: ConcatStep.CLASS, context: { range: { from: null, where: "from", }, set: ["b", "e"], }, }, { out: [caret, cat], state: ConcatStep.CLASS, context: { range: { from: null, where: "from", }, set: ["b", "e", "h"], }, }, { out: [caret, cat], state: ConcatStep.CLASS, context: { range: { from: null, where: "from", }, set: ["b", "e", "h", "i"], }, }, { out: [caret, cat], state: ConcatStep.CLASS, context: { range: { from: null, where: "from", }, set: ["b", "e", "h", "i", "l"], }, }, { out: [caret, cat], state: ConcatStep.CLASS, context: { range: { from: null, where: "from", }, set: ["b", "e", "h", "i", "l", "o"], }, }, { out: [caret, cat], state: ConcatStep.CLASS, context: { range: { from: null, where: "from", }, set: ["b", "e", "h", "i", "l", "o", "s"], }, }, { out: [caret, cat, { meta: "class", set: [ "b", "e", "h", "i", "l", "o", "s" ], caret: false, }], state: "accepting", context: null, }, { out: [caret, cat, { meta: "class", set: [ "b", "e", "h", "i", "l", "o", "s" ], caret: false, }, star, cat], state: "accepting", context: null, }, { out: [caret, cat, { meta: "class", set: [ "b", "e", "h", "i", "l", "o", "s" ], caret: false, }, star, cat, dollar], state: "accepting", context: null, }]; const given = reductions( steps, (acc, el, i) => { const ret = stepFn(acc, regex[i], i); t.assertEq(ret, el); return ret; }, ); t.assertEq(given, steps); }); t.testing("multichar range operator {m,n} is parsed right", () => { const table = [{ regex: "a{1,2}", steps: [{ out: [], state: ConcatStep.ACCEPTING, context: null, }, { out: ["a", { operator: "concat" }], state: ConcatStep.ACCEPTING, context: null, }, { out: ["a", cat], state: ConcatStep.RANGE, context: { from: [], to: [], where: "from", }, }, { out: ["a", cat], state: ConcatStep.RANGE, context: { from: [ "1" ], to: [], where: "from", }, }, { out: ["a", cat], state: ConcatStep.RANGE, context: { from: [ "1" ], to: [], where: "to", }, }, { out: ["a", cat], state: ConcatStep.RANGE, context: { from: [ "1" ], to: [ "2" ], where: "to", }, }, { out: ["a", cat, { operator: "range", from: 1, to: 2, }], state: ConcatStep.ACCEPTING, context: null, }], }, { regex: "a{,2}", steps: [{ out: [], state: ConcatStep.ACCEPTING, context: null, }, { out: ["a", { operator: "concat" }], state: ConcatStep.ACCEPTING, context: null, }, { out: ["a", { operator: "concat" }], state: ConcatStep.RANGE, context: { from: [], to: [], where: "from", }, }, { out: ["a", { operator: "concat" }], state: ConcatStep.RANGE, context: { from: [], to: [], where: "to", }, }, { out: ["a", { operator: "concat" }], state: ConcatStep.RANGE, context: { from: [], to: [ "2" ], where: "to", }, }, { out: ["a", cat, { operator: "range", from: -1, to: 2, }], state: ConcatStep.ACCEPTING, context: null, }], }, { regex: "a{1,}", steps: [{ out: [], state: ConcatStep.ACCEPTING, context: null, }, { out: ["a", { operator: "concat" }], state: ConcatStep.ACCEPTING, context: null, }, { out: ["a", { operator: "concat" }], state: ConcatStep.RANGE, context: { from: [], to: [], where: "from", }, }, { out: ["a", { operator: "concat" }], state: ConcatStep.RANGE, context: { from: [ "1" ], to: [], where: "from", }, }, { out: ["a", { operator: "concat" }], state: ConcatStep.RANGE, context: { from: [ "1" ], to: [], where: "to", }, }, { out: ["a", cat, { operator: "range", from: 1, to: -1, }], state: ConcatStep.ACCEPTING, context: null, }], }, { regex: "a{,}", steps: [{ out: [], state: ConcatStep.ACCEPTING, context: null, }, { out: ["a", { operator: "concat" }], state: ConcatStep.ACCEPTING, context: null, }, { out: ["a", { operator: "concat" }], state: ConcatStep.RANGE, context: { from: [], to: [], where: "from", }, }, { out: ["a", { operator: "concat" }], state: ConcatStep.RANGE, context: { from: [], to: [], where: "to", }, }, { out: ["a", cat, { operator: "range", from: -1, to: -1, }], state: ConcatStep.ACCEPTING, context: null, }], }, { regex: "a{123,456}", steps: [{ out: [], state: ConcatStep.ACCEPTING, context: null, }, { out: ["a", cat], state: ConcatStep.ACCEPTING, context: null, }, { out: ["a", cat], state: ConcatStep.RANGE, context: { from: [], to: [], where: "from", }, }, { out: ["a", cat], state: ConcatStep.RANGE, context: { from: [ "1" ], to: [], where: "from", }, }, { out: ["a", cat], state: ConcatStep.RANGE, context: { from: [ "1", "2" ], to: [], where: "from", }, }, { out: ["a", cat], state: ConcatStep.RANGE, context: { from: [ "1", "2", "3" ], to: [], where: "from", }, }, { out: ["a", cat], state: ConcatStep.RANGE, context: { from: [ "1", "2", "3" ], to: [], where: "to", }, }, { out: ["a", cat], state: ConcatStep.RANGE, context: { from: [ "1", "2", "3" ], to: [ "4" ], where: "to", }, }, { out: ["a", cat], state: ConcatStep.RANGE, context: { from: [ "1", "2", "3" ], to: [ "4", "5" ], where: "to", }, }, { out: ["a", cat], state: ConcatStep.RANGE, context: { from: [ "1", "2", "3" ], to: [ "4", "5", "6" ], where: "to", }, }, { out: ["a", cat, { operator: "range", from: 123, to: 456, }], state: ConcatStep.ACCEPTING, context: null, }], }]; for (const { regex, steps } of table) { const stepFn = tokenizeRegexStep(regex); const given = reductions( steps, (acc, el, i) => { const ret = stepFn(acc, regex[i], i); t.assertEq(ret, el); return ret; }, ); t.assertEq(given, steps); } }); }; const test_tokenizeRegexFn = t => { t.start("tokenizeRegexFn"); t.testing("regex table", () => { const concat = { operator: "concat" }; const star = { operator: "*" }; const table = [{ in: "", expected: { out: [], state: ConcatStep.ACCEPTING, context: null, }, }, { in: "a", expected: { out: ["a"], state: ConcatStep.ACCEPTING, context: null, }, }, { in: "a*", expected: { out: ["a", star], state: ConcatStep.ACCEPTING, context: null, }, }, { in: "a*b", expected: { out: ["a", star, concat, "b"], state: ConcatStep.ACCEPTING, context: null, }, }]; for (const test of table) { t.assertEq( tokenizeRegexFn(explode(test.in)), test.expected, ); } }); }; const test_tokenizeRegex = t => { t.start("tokenizeRegex()"); t.testing("regex table", () => { const concat = { operator: "concat" }; const star = { operator: "*" }; const table = [{ in: "", expected: [] }, { in: "a", expected: [ "a" ], }, { in: "ab", expected: [ "a", concat, "b" ], }, { in: "abc", expected: [ "a", concat, "b", concat, "c" ], }, { in: "a*", expected: [ "a", star], }, { in: "ab*", expected: [ "a", concat, "b", star], }, { in: "a*b*", expected: [ "a", star, concat, "b", star], }, { in: "a|b", expected: [ "a", { operator: "|" }, "b" ], }, { in: "a(bc(d))ef", expected: [ "a", concat, { operator: "(" }, "b", concat, "c", concat, { operator: "(" }, "d", { operator: ")" }, { operator: ")" }, concat, "e", concat, "f", ], }, { in: "ab|(cd)*", expected: [ "a", concat, "b", { operator: "|" }, { operator: "(" }, "c", concat, "d", { operator: ")" }, star, ], }, { in: "(a|b)*c", expected: [ { operator: "(" }, "a", { operator: "|" }, "b", { operator: ")" }, star, concat, "c", ], }]; for (const test of table) { t.assertEq( tokenizeRegex(explode(test.in)), test.expected, ); } }); t.testing("trailing escape errors", () => { try { t.assertEq(tokenizeRegex(explode("\\")), null); t.assertEq(true, false); } catch (e) { const message = "bad ending state: escaping"; t.assertEq(e.message, message); t.assertEq(e instanceof SyntaxError, true); } }); t.testing("trailing range errors", () => { try { t.assertEq(tokenizeRegex(explode("{")), null); t.assertEq(true, false); } catch (e) { const message = "bad ending state: range"; t.assertEq(e.message, message); t.assertEq(e instanceof SyntaxError, true); } }); }; const test_shouldPush = t => { t.start("shouldPush()"); t.testing("always push on empty stack", () => { t.assertEq(shouldPush([], "a"), true); t.assertEq(shouldPush([], null), true); }); t.testing(`push when operator or stack top is "(" without ")"`, () => { t.assertEq( shouldPush([{ operator: "("}], { operator: ")"}), false, ); }); t.testing(`push when operator or stack top is "("`, () => { t.assertEq(shouldPush([{}], { operator: "(" }), true); t.assertEq(shouldPush([{ operator: "("}], {}), true); }); t.testing("don't push otherwise", () => { t.assertEq(shouldPush([{}], {}), false); }); }; const test_findLowerPrecedenceItem = t => { t.start("findLowerPrecedenceItem()"); t.testing("open parens is always stronger", () => { t.assertEq( findLowerPrecedenceItem([{ operator: "("}], {}), 0, ); t.assertEq( findLowerPrecedenceItem([ { operator: "*" }, { operator: "concat" }, { operator: "(" }, { operator: "(" }, ], { operator: "|" }), 2, ); }); t.testing("-1 if equal precedence", () => { t.assertEq( findLowerPrecedenceItem([ { operator: "*" }, { operator: "+" }, { operator: "?" }, ], { operator: "concat" }), -1, ); }); t.testing("index if lower precedence exists", () => { t.assertEq( findLowerPrecedenceItem([ { operator: "*" }, { operator: "+" }, { operator: "?" }, { operator: "|" }, ], { operator: "concat" }), 3, ); }); }; const test_toPostfixStep = t => { t.start("toPostfixStep()"); t.testing("string literals go directly to out", () => { t.assertEq( toPostfixStep({ out: ["a"], stack: "stack" }, "t"), { out: ["a", "t"], stack: "stack" }, ); }); t.testing("non-operators go directly to out", () => { t.assertEq( toPostfixStep( { out: ["a"], stack: [{ operator: "(" }] }, { meta: "." }, ), { out: ["a", { meta: "." }], stack: [{ operator: "(" }], }, ); t.assertEq( toPostfixStep( { out: ["a"], stack: [{ operator: "*" }] }, { meta: "class" }, ), { out: ["a", { meta: "class" }], stack: [{ operator: "*" }], }, ); }); t.testing("parens put things on the stack", () => { t.assertEq( toPostfixStep({ out: "out", stack: [{ operator: "(" }], }, { operator: "X" }), { out: "out", stack: [ { operator: "X" }, { operator: "(" }, ]}, ); }); t.testing("error when parens are mismatched", () => { try { toPostfixStep( { stack: ["a"] }, { operator: ")" }, null, "TOKENS", ); t.assertEq(true, false); } catch (e) { const msg = "close parens without opening: TOKENS"; t.assertEq(e.message, msg); t.assertEq(e instanceof SyntaxError, true); } }); t.testing("the matched parens splits the stack", () => { t.assertEq( toPostfixStep( { out: ["0"], stack: ["a", { operator: "(" }, "b"], }, { operator: ")" }, ), { out: ["0", "a"], stack: ["b"], }, ); t.assertEq( toPostfixStep( { out: ["0"], stack: ["a", { operator: "(" }], }, { operator: ")" }, ), { out: ["0", "a"], stack: [], }, ); t.assertEq( toPostfixStep( { out: ["0"], stack: [{ operator: "(" }, "b"], }, { operator: ")" }, ), { out: ["0"], stack: ["b"], }, ); }); t.testing("flush stack if nothing of lower precedence", () => { t.assertEq( toPostfixStep( { out: ["0"], stack: [ { operator: "*" }, { operator: "+" }, { operator: "?" }, { operator: "concat" }, ], }, { operator: "|" }, ), { out: [ "0", { operator: "*" }, { operator: "+" }, { operator: "?" }, { operator: "concat" }, ], stack: [{operator: "|"}], }, ); }); t.testing("split the stack if lower precedence exists", () => { t.assertEq( toPostfixStep( { out: ["0"], stack: [ { operator: "*" }, { operator: "+" }, { operator: "?" }, { operator: "|" }, ], }, { operator: "concat" }, ), { out: [ "0", { operator: "*" }, { operator: "+" }, { operator: "?" }, ], stack: [ { operator: "concat" }, { operator: "|" }, ], }, ); t.assertEq( toPostfixStep( { out: ["0"], stack: [ { operator: "|" }, { operator: "*" }, { operator: "+" }, { operator: "?" }, ], }, { operator: "concat" }, ), { out: [ "0", ], stack: [ { operator: "concat" }, { operator: "|" }, { operator: "*" }, { operator: "+" }, { operator: "?" }, ], }, ); t.assertEq( toPostfixStep( { out: ["0"], stack: [ { operator: "*" }, { operator: "+" }, { operator: "|" }, { operator: "?" }, ], }, { operator: "concat" }, ), { out: [ "0", { operator: "*" }, { operator: "+" }, ], stack: [ { operator: "concat" }, { operator: "|" }, { operator: "?" }, ], }, ); }); }; const test_toPostfix = t => { t.start("toPostfix()"); t.testing("regex table", () => { const concat = { operator: "concat" }; const wildcard = { meta: "." }; const table = [{ in: [], expected: [] }, { in: [ "a" ], expected: [ "a" ], }, { in: [ "a", concat, "b", concat, "c" ], expected: [ "a", "b", concat, "c", concat ], }, { in: [ "a", { operator: "*" }], expected: [ "a", { operator: "*" }], }, { in: [ "a", { operator: "|" }, "b" ], expected: [ "a", "b", { operator: "|" } ], }, { in: [ "a", concat, "b", { operator: "|" }, { operator: "(" }, "c", concat, "d", { operator: ")" }, { operator: "*" }, ], expected: [ "a", "b", concat, "c", "d", concat, { operator: "*" }, { operator: "|" }, ], }, { in: [ { operator: "(" }, "a", { operator: "|" }, "b", { operator: ")" }, { operator: "*" }, concat, "c", ], expected: [ "a", "b", { operator: "|" }, { operator: "*" }, "c", concat, ], }, { in: [ { operator: "(" }, { operator: ")" }, ], expected: [ ], }, { in: [ "a", concat, "b", concat, wildcard, concat, "c", ], expected: [ "a", "b", concat, wildcard, concat, "c", concat, ], }, { in: [ { meta: "^" }, "a", concat, wildcard, { operator: "*" }, concat, "$", concat, "b", { operator: "range", from: 3, to: 5 }, ], expected: [ { meta: "^" }, "a", wildcard, { operator: "*" }, concat, "$", concat, "b", { operator: "range", from: 3, to: 5 }, concat, ], }]; for (const test of table) { t.assertEq( toPostfix(test.in), test.expected, ); } }); }; const test_emptyNFA = t => { t.start("emptyNFA()"); t.testing("we deterministically build out of the ID", () => { const expected = { start: 1, end: 2, nodes: { 1: { direct: [ 2 ], transitions: {}, }, 2: { direct: [], transitions: {}, }, }, }; t.assertEq(emptyNFA(), expected); }); }; const test_literal = t => { t.start("literal()"); t.testing("the given edge connects the nodes", () => { const expected = { start: 123, end: 124, nodes: { 123: { direct: [], transitions: { X: 124 }, }, 124: { direct: [], transitions: {}, }, }, }; t.assertEq(literal("X", 123), expected); }); }; const test_concat = t => { t.start("concat()"); t.testing("on literal() and on more complex NFAs", () => { const a = literal("a", 1); const b = literal("b", a.end + 1); const a_ = literal("a", b.end + 1); const expected1 = { start: 1, end: 4, nodes: { 1: { direct: [], transitions: { a: 2 }, }, 2: { direct: [ 3 ], transitions: {}, }, 3: { direct: [], transitions: { b: 4 }, }, 4: { direct: [], transitions: {}, }, }, }; t.assertEq(concat(a, b), expected1); const expected2 = { start: 1, end: 6, nodes: { 1: { direct: [], transitions: { a: 2 }, }, 2: { direct: [ 3 ], transitions: {}, }, 3: { direct: [], transitions: { b: 4 }, }, 4: { direct: [ 5 ], transitions: {}, }, 5: { direct: [], transitions: { a: 6 }, }, 6: { direct: [], transitions: {}, }, }, }; t.assertEq(concat(concat(a, b), a_), expected2); }); }; const test_union = t => { t.start("union()"); t.testing("on literal() and on more complex NFAs", () => { const a = literal("a", 1); const b = literal("b", a.end + 1); const expected1 = { start: 5, end: 6, nodes: { 1: { direct: [], transitions: { a: 2 }, }, 2: { direct: [ 6 ], transitions: {}, }, 3: { direct: [], transitions: { b: 4 }, }, 4: { direct: [ 6 ], transitions: {}, }, 5: { direct: [ 1, 3 ], transitions: {}, }, 6: { direct: [], transitions: {}, }, }, }; t.assertEq(union(a, b), expected1); const c = literal("c", expected1.end + 1); const expected2 = { start: 9, end: 10, nodes: { 1: { direct: [], transitions: { a: 2 }, }, 2: { direct: [ 6 ], transitions: {}, }, 3: { direct: [], transitions: { b: 4 }, }, 4: { direct: [ 6 ], transitions: {}, }, 5: { direct: [ 1, 3 ], transitions: {}, }, 6: { direct: [ 10 ], transitions: {}, }, 7: { direct: [], transitions: { c: 8 }, }, 8: { direct: [ 10 ], transitions: {}, }, 9: { direct: [ 5, 7 ], transitions: {}, }, 10: { direct: [], transitions: {}, }, }, }; t.assertEq(union(union(a, b), c), expected2); }); }; const test_zeroOrMore = t => { t.start("zeroOrMore()"); t.testing("on literal()", () => { const a = literal("a", 1); const expected = { start: 3, end: 4, nodes: { 1: { direct: [], transitions: { a: 2 }, }, 2: { direct: [ 3 ], transitions: {}, }, 3: { direct: [ 1, 4 ], transitions: {}, }, 4: { direct: [], transitions: {}, }, }, }; t.assertEq(zeroOrMore(a), expected); }); }; const test_oneOrMore = t => { t.start("oneOrMore()"); t.testing("on literal()", () => { const a = literal("a", 1); const expected = { start: 1, end: 4, nodes: { 1: { direct: [], transitions: { a: 2 }, }, 2: { direct: [ 3 ], transitions: {}, }, 3: { direct: [ 1, 4 ], transitions: {}, }, 4: { direct: [], transitions: {}, }, }, }; t.assertEq(oneOrMore(a), expected); }); }; const test_zeroOrOne = t => { t.start("zeroOrOne()"); t.testing("on literal()", () => { const a = literal("a", 1); const expected = { start: 3, end: 4, nodes: { 1: { direct: [], transitions: { a: 2 }, }, 2: { direct: [ 4 ], transitions: {}, }, 3: { direct: [ 1, 4 ], transitions: {}, }, 4: { direct: [], transitions: {}, }, }, }; t.assertEq(zeroOrOne(a), expected); }); }; const test_compareRange = t => { t.start("compareRange()"); t.testing("sorts ranges in ascending order", () => { const input = [ [3, 5], [1, 3], [1, 5], [1, 2], [2, 3], [2, 1], [5, 3], [5, 1], ]; const expected = [ [1, 2], [1, 3], [1, 5], [2, 1], [2, 3], [3, 5], [5, 1], [5, 3], ]; t.assertEq(input.toSorted(compareRange), expected); }); }; const test_compressRangeStep = t => { t.start("compressRangeStep()"); t.testing("concat when not overlapping", () => { t.assertEq( compressRangeStep([[1, 10]], [20, 30]), [[1, 10], [20, 30]], ); }); t.testing("merge ranges when overlapping", () => { t.assertEq( compressRangeStep([[1, 10]], [10, 30]), [[1, 30]], ); }); t.testing("merge with the largest intervals", () => { t.assertEq( compressRangeStep([[1, 10]], [2, 8]), [[1, 10]], ); }); }; const test_compressCharacterRanges = t => { t.start("compressCharacterRanges()"); t.testing("noop when no overlaps", () => { t.assertEq( compressCharacterRanges([[1, 5], [11, 15], [7, 9]]), [[1, 5], [7, 9], [11, 15]], ); }); t.testing("smash all ranges into a single one", () => { t.assertEq( compressCharacterRanges( [[1, 5], [11, 15], [7, 9], [5, 7], [9, 11]], ), [[1, 15]], ); }); }; const test_inRange = t => { t.start("inRange()"); t.testing("always false on empty ranges", () => { t.assertEq(inRange({}, "a"), false); }); t.testing("false when outside range", () => { t.assertEq(inRange({ 10: 11 }, "a"), false); t.assertEq(inRange({ 1: 96 }, "a"), false); t.assertEq(inRange({ 96: 96 }, "a"), false); t.assertEq(inRange({ 98: 98 }, "a"), false); t.assertEq(inRange({ 98: 127 }, "a"), false); }); t.testing("true when within range", () => { t.assertEq(inRange({ 48: 57 }, "0"), true); t.assertEq(inRange({ 65: 90 }, "X"), true); t.assertEq(inRange({ 97: 122 }, "a"), true); t.assertEq(inRange({ 97: 97 }, "a"), true); }); }; const test_characterClass = t => { t.start("characterClass()"); t.testing("when caret is true, op is excludes", () => { const expected = { start: 3, end: 4, nodes: { 3: { direct: [], transitions: {}, meta: { op: "excludes", to: 4, matches: new Set([ "-", "a", "b", "c", ".", "_", "$", "^", ]), ranges: { 102: 112, 116: 122, }, }, }, 4: { direct: [], transitions: {}, }, }, }; t.assertEq(characterClass({ set: [ "-", "a", "b", "c", { from: "f", to: "p" }, ".", { from: "t", to: "z" }, "_", "$", "^", "a", "_", ], caret: true, }, 3), expected); }); t.testing("when caret is false, op is includes", () => { const expected = { start: 7, end: 8, nodes: { 7: { direct: [], transitions: {}, meta: { op: "includes", to: 8, matches: new Set([]), ranges: { 48: 57, 65: 90, 97: 122, }, }, }, 8: { direct: [], transitions: {}, }, }, }; t.assertEq(characterClass({ set: [ { from: "a", to: "z" }, { from: "A", to: "Z" }, { from: "0", to: "9" }, ], }, 7), expected); }); t.testing("overlapping ranges collapse into a single one", () => { const expected = { start: 3, end: 4, nodes: { 3: { direct: [], transitions: {}, meta: { op: "excludes", to: 4, matches: new Set(["-"]), ranges: { 97: 122, }, }, }, 4: { direct: [], transitions: {}, }, }, }; t.assertEq(characterClass({ set: [ "-", "a", "b", "c", { from: "f", to: "p" }, { from: "t", to: "z" }, "a", "d", "f", "x", "y", "z", { from: "p", to: "t" }, { from: "a", to: "f" }, ], caret: true, }, 3), expected); }); }; const test_wildcard = t => { t.start("wildcard()"); t.testing("the returned nfa always matches", () => { const expected = { start: 5, end: 6, nodes: { 5: { direct: [], transitions: {}, meta: { op: true, to: 6, } }, 6: { direct: [], transitions: {}, }, }, }; t.assertEq(wildcard("IGNORED", 5), expected); }); }; const test_caret = t => { t.start("caret()"); t.testing("we get the NFA with the caret meta attribute", () => { const expected = { start: 3, end: 4, nodes: { 3: { direct: [], transitions: {}, meta: { op: "caret", to: 4, }, }, 4: { direct: [], transitions: {}, }, }, }; t.assertEq(caret("IGNORED", 3), expected); }); }; const test_dollar = t => { t.start("dollar()"); t.testing("we get the NFA that matches the end of string", () => { const expected = { start: 2, end: 3, nodes: { 2: { direct: [], transitions: {}, meta: { op: "dollar", to: 3, }, }, 3: { direct: [], transitions: {}, }, }, }; t.assertEq(dollar("IGNORED", 2), expected); }); }; const test_OPERATORS_FNS = t => { t.start("OPERATORS_FNS"); let arr = []; const fn = ret => (...args) => { arr.push({ args, ret, }); return ret; }; const input = [1, 2, 3]; t.testing("star", () => { arr = []; t.assertEq( OPERATORS_FNS({ zeroOrMoreFn: fn(111) })["*"](input), [1, 2, 111], ); t.assertEq(arr, [{ args: [3], ret: 111 }]); }); t.testing("plus", () => { arr = []; t.assertEq( OPERATORS_FNS({ oneOrMoreFn: fn(222) })["+"](input), [1, 2, 222], ); t.assertEq(arr, [{ args: [3], ret: 222 }]); }); t.testing("question mark", () => { arr = []; t.assertEq( OPERATORS_FNS({ zeroOrOneFn: fn(333) })["?"](input), [1, 2, 333], ); t.assertEq(arr, [{ args: [3], ret: 333 }]); }); t.testing("pipe", () => { arr = []; t.assertEq( OPERATORS_FNS({ unionFn: fn(444) })["|"](input), [1, 444], ); t.assertEq(arr, [{ args: [2, 3], ret: 444 }]); }); t.testing("concat", () => { arr = []; t.assertEq( OPERATORS_FNS({ concatFn: fn(555) })["concat"](input), [1, 555], ); t.assertEq(arr, [{ args: [2, 3], ret: 555 }]); }); }; const test_baseNFA = t => { t.start("baseNFA()"); t.testing("decide between metacharacter and literal", () => { const expected1 = { start: 1, end: 2, nodes: { 1: { direct: [], transitions: { a: 2 }, }, 2: { direct: [], transitions: {}, }, }, }; const expected2 = { start: 1, end: 2, nodes: { 1: { direct: [], transitions: {}, meta: { op: true, to: 2, }, }, 2: { direct: [], transitions: {}, }, }, }; t.assertEq(baseNFA("a", 1), expected1); t.assertEq(baseNFA({ meta: "." }, 1), expected2); }); }; const test_buildNFAStep = t => { t.start("buildNFAStep()"); t.testing("literal char without existing nextID", () => { const expected = [{ start: 1, end: 2, nodes: { 1: { direct: [], transitions: { t: 2 }, }, 2: { direct: [], transitions: {}, }, }, }]; t.assertEq(buildNFAStep([], "t"), expected); }); t.testing("literal char with existing nextID", () => { const input = [ literal("a", 1) ]; const expected = [{ start: 1, end: 2, nodes: { 1: { direct: [], transitions: { a: 2 }, }, 2: { direct: [], transitions: {}, }, }, }, { start: 3, end: 4, nodes: { 3: { direct: [], transitions: { b: 4 }, }, 4: { direct: [], transitions: {}, }, }, }]; t.assertEq(buildNFAStep(input, "b"), expected); t.assertEq( buildNFAStep(input, "b"), [ literal("a", 1), literal("b", 3) ] ); }); t.testing("metacharacter", () => { const input = [ literal("a", 1) ]; const expected = [{ start: 1, end: 2, nodes: { 1: { direct: [], transitions: { a: 2 }, }, 2: { direct: [], transitions: {}, }, }, }, { start: 3, end: 4, nodes: { 3: { direct: [], transitions: {}, meta: { op: true, to: 4, }, }, 4: { direct: [], transitions: {}, }, }, }]; t.assertEq( buildNFAStep(input, { meta: "." }), expected, ); }); t.testing("operator", () => { const input = [ literal("a", 1), literal("b", 3) ]; const expected = [{ start: 1, end: 4, nodes: { 1: { direct: [], transitions: { a: 2 }, }, 2: { direct: [ 3 ], transitions: {}, }, 3: { direct: [], transitions: { b: 4 }, }, 4: { direct: [], transitions: {}, }, }, }]; t.assertEq( buildNFAStep(input, { operator: "concat" }), expected, ); }); }; const test_buildNFA = t => { t.start("buildNFA()"); t.testing("empty regex pattern", () => { const expected = { start: 1, end: 2, nodes: { 1: { direct: [ 2 ], transitions: {}, }, 2: { direct: [], transitions: {}, }, }, }; t.assertEq(buildNFA([]), expected); }); t.testing("sample composed regex", () => { const regex = "(a|b)*cde"; const expected = { start: 7, end: 14, nodes: { 1: { direct: [], transitions: { a: 2 }, }, 2: { direct: [ 6 ], transitions: {}, }, 3: { direct: [], transitions: { b: 4 }, }, 4: { direct: [ 6 ], transitions: {}, }, 5: { direct: [ 1, 3 ], transitions: {}, }, 6: { direct: [ 7 ], transitions: {}, }, 7: { direct: [ 5, 8 ], transitions: {}, }, 8: { direct: [ 9 ], transitions: {}, }, 9: { direct: [], transitions: { c: 10 }, }, 10: { direct: [ 11 ], transitions: {}, }, 11: { direct: [], transitions: { d: 12 }, }, 12: { direct: [ 13 ], transitions: {}, }, 13: { direct: [], transitions: { e: 14 }, }, 14: { direct: [], transitions: {}, }, }, }; t.assertEq( buildNFA(toPostfix(tokenizeRegex(explode(regex)))), expected, ); }); t.testing("example with metacharacters", () => { const regex = "^[behilos]*$"; const expected = { start: 1, end: 8, nodes: { 1: { direct: [], transitions: {}, meta: { op: "caret", to: 2, }, }, 2: { direct: [5], transitions: {}, }, 3: { direct: [], transitions: {}, meta: { op: "includes", to: 4, matches: new Set([ "b", "e", "h", "i", "l", "o", "s", ]), ranges: {}, }, }, 4: { direct: [5], transitions: {}, }, 5: { direct: [3, 6], transitions: {}, }, 6: { direct: [7], transitions: {}, }, 7: { direct: [], transitions: {}, meta: { op: "dollar", to: 8, }, }, 8: { direct: [], transitions: {}, }, }, }; t.assertEq( buildNFA(toPostfix(tokenizeRegex(explode(regex)))), expected, ); }); }; const test_allDirects = t => { t.start("allDirects()"); t.testing("empty directs", () => { const input = { 1: {}, 2: {}, 3: { direct: [] }, 4: {}, 5: {}, }; t.assertEq(allDirects(3, input), [3]); }); t.testing("single level", () => { const input = { 1: { direct: [3] }, 2: { direct: [4] }, 3: { direct: [] }, 4: { direct: [] }, }; t.assertEq(allDirects(1, input), [3]); }); t.testing("deep recursion", () => { const input = { 1: { direct: [2] }, 2: { direct: [3] }, 3: { direct: [4] }, 4: { direct: [5] }, 5: { direct: [] }, }; t.assertEq(allDirects(1, input), [5]); t.assertEq(allDirects(2, input), [5]); t.assertEq(allDirects(3, input), [5]); t.assertEq(allDirects(4, input), [5]); t.assertEq(allDirects(5, input), [5]); }); t.testing("plenty of duplicates", () => { const input = { 1: { direct: [2, 3, 4] }, 2: { direct: [3, 4, 5] }, 3: { direct: [4, 5, 6] }, 4: { direct: [5, 6] }, 5: { direct: [] }, 6: { direct: [] }, }; t.assertEq(allDirects(1, input), [5, 6]); }); }; const test_interpretMetacharacter = t => { t.start("interpretMetacharacter()"); t.testing("null on missing meta attribute", () => { t.assertEq(interpretMetacharacter({}, "IGNORED"), []); }); t.testing("error on invalid op", () => { try { interpretMetacharacter( { op: "OP" }, null, null, null, null, ); t.assertEq(true, false); } catch (e) { const message = "Unsupported op: OP"; t.assertEq(e.message, message); t.assertEq(e instanceof InternalError, true); } }); t.testing("wildcard always matches", () => { const input = { op: true, to: "ret", }; t.assertEq(interpretMetacharacter(input, "IGNORED"), ["ret"]); }); t.testing("matches returns depending on the op", () => { const input1 = { op: "includes", to: "ret", matches: new Set(["a"]), }; const input2 = { ...input1, op: "excludes", }; t.assertEq(interpretMetacharacter(input1, "a"), ["ret"]); t.assertEq(interpretMetacharacter(input2, "a"), []); }); t.testing("inRange returns depending on op", () => { const input1 = { op: "includes", to: "ret", matches: new Set([]), ranges: { 97: 122 }, }; const input2 = { ...input1, op: "excludes", }; t.assertEq(interpretMetacharacter(input1, "x"), ["ret"]); t.assertEq(interpretMetacharacter(input2, "x"), []); }); t.testing("when nothing matches, we look at the op again", () => { const input1 = { op: "includes", to: "ret", matches: new Set([]), ranges: {}, }; const input2 = { ...input1, op: "excludes", }; t.assertEq(interpretMetacharacter(input1, "a"), []); t.assertEq(interpretMetacharacter(input2, "a"), ["ret"]); }); t.testing("caret only matches when at the start of the string", () => { const input = { op: "caret", to: 3, }; const nodes = { 3: { direct: [ 4 ], transitions: {}, }, 4: { direct: [ 5 ], transitions: {}, }, 5: { direct: [], transitions: { a: 6 }, }, 6: { direct: [ 7 ], transitions: {}, }, 7: { direct: [], transitions: { x: null }, }, }; t.assertEq( interpretMetacharacter(input, "a", 1, nodes), [], ); t.assertEq( interpretMetacharacter(input, "a", 0, nodes, [ "a" ]), [ 7 ], ); }); }; const test_performTransition = t => { t.start("performTransition()"); t.testing("either get the direct transition", () => { const nodes = { 1: { transitions: { a: 2 }, }, 2: { transitions: {}, }, }; t.assertEq(performTransition(nodes, "a")(1), [2]); t.assertEq(performTransition(nodes, "a")(2), []); }); t.testing("or execute the meta attribute", () => { const nodes = { 1: { transitions: {}, meta: { op: true, to: 2, }, }, 2: { transitions: {}, }, }; t.assertEq(performTransition(nodes, "a")(1), [2]); t.assertEq(performTransition(nodes, "a")(2), []); }); }; const test_searchNFAStep = t => { t.start("searchNFAStep()"); t.testing("empty on empty input", () => { t.assertEq(searchNFAStep({})([], null, null, []), []); }); t.testing("empty on empty transitions", () => { const nodes = { 1: { transitions: {} }, 2: { transitions: {} }, 3: { transitions: {} }, }; t.assertEq(searchNFAStep(nodes)([1, 2, 3], "a", null, []), []); }); t.testing("empty on non-matching transitions", () => { const nodes = { 1: { transitions: { b: 2 } }, 2: { transitions: { c: 3 } }, 3: { transitions: { d: 1 } }, }; t.assertEq(searchNFAStep(nodes)([1, 2, 3], "a", null, []), []); }); t.testing("leaves given by allDirects() otherwise", () => { const nodes = { 1: { direct: [], transitions: { a: 5 }, }, 2: { direct: [], transitions: { b: 5 }, }, 3: { direct: [], transitions: { a: 4 }, }, 4: { direct: [6, 7, 8], transitions: {}, }, 5: { direct: [], transitions: { a: 2 }, }, 6: { direct: [], transitions: { b: 7 }, }, 7: { direct: [], transitions: { c: 8 }, }, 8: { direct : [], transitions: {}, }, }; const fn = searchNFAStep(nodes); t.assertEq(fn([1], "a", null, []), [5]); t.assertEq(fn([1], "b", null, []), []); t.assertEq(fn([3, 2, 1], "a", null, []), [5, 6, 7, 8]); t.assertEq(fn([5, 6, 7, 8], "a", null, []), [2]); t.assertEq(fn([5, 6, 7, 8], "b", null, []), [7]); t.assertEq(fn([5, 6, 7, 8], "c", null, []), [8]); t.assertEq(fn([5, 6, 7, 8], "d", null, []), []); }); t.testing("recursive call to self via interpretMetacharacter", () => { const regex = "^[behilos]*$"; const nfa = buildNFA(toPostfix(tokenizeRegex(explode(regex)))); const str = "sebo"; const fn = searchNFAStep(nfa.nodes, str.length); t.assertEq(fn([1], str[0], 0), [3, 7]); t.assertEq(fn([1], str[0], 1), []); t.assertEq(fn([1], " ", 0), []); t.assertEq(fn([3], str[0], 1), [3, 7]); t.assertEq(fn([3], " ", 1), []); t.assertEq(fn([3, 7], str[3], 4), [3, 7]); t.assertEq(fn([3, 7], " ", 3), []); t.assertEq(fn([1], str[0], 0), [3, 7]); t.assertEq(fn([3, 7], str[1], 1), [3, 7]); t.assertEq(fn([3, 7], str[2], 2), [3, 7]); t.assertEq(fn([3, 7], str[3], 3), [3, 7]); }); }; const test_searchNFAFn = t => { t.start("searchNFAFn()"); t.testing("on composed sample regex", () => { const regex = "(a|b)*c"; const nfa = buildNFA(toPostfix(tokenizeRegex(explode(regex)))); t.assertEq(searchNFAFn(nfa, "c"), [10]); t.assertEq(searchNFAFn(nfa, "cc"), []); t.assertEq(searchNFAFn(nfa, "aaabbbc"), [10]); t.assertEq(searchNFAFn(nfa, "a"), [1, 3, 9]); }); }; const test_searchNFA = t => { t.start("searchNFA()"); t.testing("on composed sample regex", () => { const regex = "(a|b)*c"; const nfa = buildNFA(toPostfix(tokenizeRegex(explode(regex)))); t.assertEq(searchNFA(nfa, "c"), true); t.assertEq(searchNFA(nfa, "cc"), false); t.assertEq(searchNFA(nfa, "aaabbbc"), true); t.assertEq(searchNFA(nfa, "ababac"), true); t.assertEq(searchNFA(nfa, "babac"), true); t.assertEq(searchNFA(nfa, "babaca"), false); }); t.testing("regex with metacharacters", () => { const regex = "^[behilos]*$"; const nfa = buildNFA(toPostfix(tokenizeRegex(explode(regex)))); t.assertEq(searchNFA(nfa, "helios"), true); t.assertEq(searchNFA(nfa, " helios"), false); t.assertEq(searchNFA(nfa, "helios "), false); t.assertEq(searchNFA(nfa, "abc"), false); }); }; const test_nodeID = t => { t.start("nodeID()"); t.testing("single number", () => { t.assertEq(nodeID([1]), "1"); }); t.testing("plenty of duplicates", () => { t.assertEq(nodeID([1, 1, 1, 1, 1, 1]), "1"); }); t.testing("unsorted input", () => { t.assertEq( nodeID([5, 5, 11, 11, 11, 3, 4, 5, 123]), "3-4-5-11-123", ); }); }; const test_unNodeID = t => { t.start("unNodeID()"); t.testing("no hyphes", () => { t.assertEq(unNodeID("123"), [123]); }); t.testing("with hyphens", () => { t.assertEq(unNodeID("1-2"), [1, 2]); t.assertEq(unNodeID("11-3-0"), [11, 3, 0]); }); }; const test_collectTransitions = t => { t.start("collectTransitions()"); t.testing("empty values", () => { t.assertEq(collectTransitions({}, []), {}); t.assertEq(collectTransitions({ a: 1 }, []), {}); }); t.testing("no transitions in states", () => { t.assertEq( collectTransitions( { 1: { transitions: {} }, 2: { transitions: {} }, 3: { transitions: {} }, }, [1, 3], ), {}, ); }); t.testing("error on transitions have more than one possibility", () => { try { collectTransitions( { 1: { transitions: { b: 2, c: 3 }}}, [1], ); t.assertEq(true, false); } catch (e) { const msg = "unexpected transitions with more than 1 key"; t.assertEq(e.message, msg); } }); t.testing("many values grouped under a single transition key", () => { const nodes = { 1: { direct: [], transitions: { a: 2 }, }, 2: { direct: [], transitions: { a: 3 }, }, 3: { direct: [], transitions: { a: 4 }, }, 4: { direct: [], transitions: {}, }, }; t.assertEq(collectTransitions(nodes, []), {}); t.assertEq(collectTransitions(nodes, [1]), { a: [2]}); t.assertEq(collectTransitions(nodes, [1, 2]), { a: [2, 3]}); t.assertEq( collectTransitions(nodes, [1, 2, 3]), { a: [2, 3, 4]}, ); t.assertEq( collectTransitions(nodes, [1, 2, 3, 4]), { a: [2, 3, 4]}, ); }); t.testing("transition with multiple direct values", () => { const nodes = { 1: { direct: [], transitions: { a: 2 }, }, 2: { direct: [ 3 ], transitions: {}, }, 3: { direct: [ 4 ], transitions: {}, }, 4: { direct: [], transitions: { a: 2 }, }, }; t.assertEq(collectTransitions(nodes, [1]), { a: [4]}); t.assertEq(collectTransitions(nodes, [1, 2, 3, 4]), { a: [4]}); }); }; const test_computePending = t => { t.start("computePending()"); t.testing("empty values", () => { t.assertEq(computePending([], []), []) t.assertEq(computePending([["IGNORED"]], []), []) }); t.testing("deduped states", () => { t.assertEq( computePending( [null, [1, 2], [2, 3], [1, 3], [1, 2]], [], ), [[1, 2], [1, 3], [2, 3]], ); t.assertEq( computePending( [null, [1, 2], [1, 3]], [[2, 3], [1, 2], [1, 3]], ), [[1, 2], [1, 3], [2, 3]], ); }); }; const test_toDFANodes = t => { t.start("toDFANodes()"); t.testing("empty values", () => { t.assertEq(toDFANodes(null, null, [], "RET"), "RET"); }); t.testing("ignore states that were already built", () => { const nodes = { "1-2-3": true, }; const states = [[1, 2, 3], [3, 2, 1]]; t.assertEq(toDFANodes(null, null, states, nodes), nodes); }); t.testing("recursively build the DFA", () => { const end = 4; const nfaNodes = { 1: { direct: [], transitions: { a: 2 }, }, 2: { direct: [ 1, 3 ], transitions: {}, }, 3: { direct: [], transitions: { b: 4 }, }, 4: { direct: [], transitions: {}, }, }; const pending = [[1], [1, 2], [1, 3, 4], [4], [1, 2]]; const dfaNodes = {}; const expected = { "1": { end: false, transitions: { a: "1-3" }, }, "1-2": { end: false, transitions: { a: "1-3" }, }, "1-3": { end: false, transitions: { a: "1-3", b: "4" } }, "1-3-4": { end: true, transitions: { a: "1-3", b: "4" }, }, "4": { end: true, transitions: {}, }, }; t.assertEq( toDFANodes(end, nfaNodes, pending, dfaNodes), expected, ); }); }; const test_toDFA = t => { t.start("toDFA()"); t.testing("minimal NFA", () => { const nfa = { start: 1, end: 2, nodes: { 1: { direct: [ 2 ], transitions: {}, }, 2: { direct: [], transitions: {}, }, }, }; const expected = { start: "2", nodes: { "2": { end: true, transitions: {}, }, }, }; t.assertEq(toDFA(nfa), expected); }); t.testing("sample regex NFA", () => { const nfa = { start: 7, end: 10, nodes: { 1: { direct: [], transitions: { a: 2 }, }, 2: { direct: [ 6 ], transitions: {}, }, 3: { direct: [], transitions: { b: 4 }, }, 4: { direct: [ 6 ], transitions: {}, }, 5: { direct: [ 1, 3 ], transitions: {}, }, 6: { direct: [ 7 ], transitions: {}, }, 7: { direct: [ 5, 8 ], transitions: {}, }, 8: { direct: [ 9 ], transitions: {}, }, 9: { direct: [], transitions: { c: 10 }, }, 10: { direct: [], transitions: {}, }, }, }; const expected = { start: "1-3-9", nodes: { "1-3-9": { end: false, transitions: { a: "1-3-9", b: "1-3-9", c: "10", }, }, "10": { end: true, transitions: {}, }, }, }; t.assertEq(nfa, compileNFA("(a|b)*c")); t.assertEq(toDFA(nfa), expected); }); }; const test_searchDFAStep = t => { t.start("searchDFAStep()"); t.testing("transition must exist", () => { const nodes = { "1": { transitions: { a: "1", b: "1-2", c: "1-3-9", }, }, "1-2": { transitions: { x: "1-3-9", }, }, "1-3-9": { transitions: {}, }, }; const fn = searchDFAStep(nodes); t.assertEq(fn("1", "a"), "1"); t.assertEq(fn("1", "b"), "1-2"); t.assertEq(fn("1", "c"), "1-3-9"); t.assertEq(fn("1", "d"), reduced(false)); t.assertEq(fn("1-2", "x"), "1-3-9"); t.assertEq(fn("1-2", "y"), reduced(false)); t.assertEq(fn("1-3-9", "a"), reduced(false)); }); t.testing("error on non-existent state", () => { try { searchDFAStep({})("non-existent", null); t.assertEq(true, false); } catch (e) { const msg = "Cannot read properties of undefined " + "(reading 'transitions')"; t.assertEq(e.message, msg); } }); }; const test_searchDFAFn = t => { t.start("searchDFAFn()"); t.testing("on composed sample regex", () => { const regex = "(a|b)*c"; const dfa = toDFA( buildNFA(toPostfix(tokenizeRegex(explode(regex)))), ); t.assertEq(searchDFAFn(dfa, "a"), "1-3-9"); t.assertEq(searchDFAFn(dfa, "b"), "1-3-9"); t.assertEq(searchDFAFn(dfa, "c"), "10"); t.assertEq(searchDFAFn(dfa, "cc"), false); t.assertEq(searchDFAFn(dfa, "aaabbbc"), "10"); t.assertEq(searchDFAFn(dfa, "ababac"), "10"); t.assertEq(searchDFAFn(dfa, "babac"), "10"); t.assertEq(searchDFAFn(dfa, "babaca"), false); }); }; const test_searchDFA = t => { t.start("searchDFA()"); t.testing("on composed sample regex", () => { const regex = "(a|b)*c"; const dfa = toDFA( buildNFA(toPostfix(tokenizeRegex(explode(regex)))), ); t.assertEq(searchDFA(dfa, "a"), false); t.assertEq(searchDFA(dfa, "b"), false); t.assertEq(searchDFA(dfa, "c"), true); t.assertEq(searchDFA(dfa, "cc"), false); t.assertEq(searchDFA(dfa, "aaabbbc"), true); t.assertEq(searchDFA(dfa, "ababac"), true); t.assertEq(searchDFA(dfa, "babac"), true); t.assertEq(searchDFA(dfa, "babaca"), false); }); }; const test_parse = t => { t.start("parse()"); t.testing("regex table", () => { const table = [{ in: "", expected: [], }, { in: "a", expected: ["a"], }, { in: "a|b", expected: ["a", "b", { operator: "|" }], }, { in: "(a|b)*a?(a|b)+", expected: [ "a", "b", { operator: "|" }, { operator: "*" }, "a", { operator: "?" }, { operator: "concat" }, "a", "b", { operator: "|" }, { operator: "+" }, { operator: "concat" }, ], }, { in: "^[behilos]*$", expected: [ { meta: "^" }, { meta: "class", set: [ "b", "e", "h", "i", "l", "o", "s", ], caret: false, }, { operator: "*" }, { operator: "concat" }, // FIXME { meta: "$" }, { operator: "concat" }, // FIXME ], }]; for (const tcase of table) { t.assertEq(parse(tcase.in), tcase.expected); } }); }; const test_compileNFA = t => { t.start("compileNFA()"); t.testing("empty regex", () => { const expected = { start: 1, end: 2, nodes: { 1: { direct: [ 2 ], transitions: {}, }, 2: { direct: [], transitions: {}, }, }, }; t.assertEq(compileNFA(""), expected); }); t.testing("compiled sample regex", () => { const nfa1 = compileNFA("(a|b)*c"); const expected1 = { start: 7, end: 10, nodes: { 1: { direct: [], transitions: { a: 2 }, }, 2: { direct: [ 6 ], transitions: {}, }, 3: { direct: [], transitions: { b: 4 }, }, 4: { direct: [ 6 ], transitions: {}, }, 5: { direct: [ 1, 3 ], transitions: {}, }, 6: { direct: [ 7 ], transitions: {}, }, 7: { direct: [ 5, 8 ], transitions: {}, }, 8: { direct: [ 9 ], transitions: {}, }, 9: { direct: [], transitions: { c: 10 }, }, 10: { direct: [], transitions: {}, }, }, }; const nfa2 = compileNFA("((a+b)?c|d)*e"); const expected2 = { start: 15, end: 18, nodes: { 1: { direct: [], transitions: { a: 2 }, }, 2: { direct: [ 3 ], transitions: {}, }, 3: { direct: [ 1, 4 ], transitions: {}, }, 4: { direct: [ 5 ], transitions: {}, }, 5: { direct: [], transitions: { b: 6 }, }, 6: { direct: [ 8 ], transitions: {}, }, 7: { direct: [ 1, 8 ], transitions: {}, }, 8: { direct: [ 9 ], transitions: {}, }, 9: { direct: [], transitions: { c: 10 }, }, 10: { direct: [ 14 ], transitions: {}, }, 11: { direct: [], transitions: { d: 12 }, }, 12: { direct: [ 14 ], transitions: {}, }, 13: { direct: [ 7, 11 ], transitions: {}, }, 14: { direct: [ 15 ], transitions: {}, }, 15: { direct: [ 13, 16 ], transitions: {}, }, 16: { direct: [ 17 ], transitions: {}, }, 17: { direct: [], transitions: { e: 18 }, }, 18: { direct: [], transitions: {}, }, }, }; t.assertEq(nfa1, expected1); t.assertEq(nfa2, expected2); }); t.testing("composed sample regex", () => { const nfa = compileNFA("(a|b)*c"); t.assertEq(searchNFA(nfa, "ababac"), true); t.assertEq(searchNFA(nfa, "babac"), true); t.assertEq(searchNFA(nfa, "babaca"), false); }); }; const test_compileDFA = t => { t.start("compileDFA()"); t.testing("empty regex", () => { const expected = { start: "2", nodes: { "2": { end: true, transitions: {}, }, }, }; t.assertEq(compileDFA(""), expected); }); t.testing("compiled sample regex", () => { const dfa1 = compileDFA("(a|b)*c"); const dfa2 = compileDFA("((a+b)?c|d)*e"); const expected1 = { start: "1-3-9", nodes: { "1-3-9": { end: false, transitions: { a: "1-3-9", b: "1-3-9", c: "10", }, }, "10": { end: true, transitions: {}, }, }, }; const expected2 = { start: "1-9-11-17", nodes: { "1-5": { end: false, transitions: { a: "1-5", b: "9", }, }, "1-9-11-17": { end: false, transitions: { a: "1-5", d: "1-9-11-17", e: "18", c: "1-9-11-17", }, }, "9": { end: false, transitions: { c: "1-9-11-17", }, }, "18": { end: true, transitions: {}, }, }, }; t.assertEq(dfa1, expected1); t.assertEq(dfa2, expected2); }); }; const test_compile = t => { t.start("compile()"); t.testing("composed sample regex", () => { const matcher1 = compile("(a|b)*c"); t.assertEq(matcher1("ababac"), true); t.assertEq(matcher1("ababaca"), false); const matcher2 = compile("((a+)b)*c"); t.assertEq(matcher2("aaababc"), true); t.assertEq(matcher2("aaabbc"), false); }); }; runTests([ test_shouldConcat, test_isOperator, test_numFromDigits, test_escapingStateStep, test_rangeStateStep, test_classStateStep, test_TRANSITION_FNS, test_ANCHOR_FNS, test_isAnchor, test_isTransition, test_tokenizeRegexStep, test_tokenizeRegexFn, test_tokenizeRegex, test_shouldPush, test_findLowerPrecedenceItem, test_toPostfixStep, test_toPostfix, test_emptyNFA, test_literal, test_concat, test_union, test_zeroOrMore, test_oneOrMore, test_zeroOrOne, test_compareRange, test_compressRangeStep, test_compressCharacterRanges, test_inRange, test_characterClass, test_wildcard, test_caret, test_dollar, test_OPERATORS_FNS, test_baseNFA, test_buildNFAStep, test_buildNFA, test_allDirects, test_interpretMetacharacter, test_performTransition, test_searchNFAStep, test_searchNFAFn, test_searchNFA, test_nodeID, test_unNodeID, test_collectTransitions, test_computePending, test_toDFANodes, test_toDFA, test_searchDFAStep, test_searchDFAFn, test_searchDFA, test_parse, test_compileNFA, test_compileDFA, test_compile, ]);