diff options
Diffstat (limited to '')
| -rw-r--r-- | src/paca.mjs | 49 | ||||
| -rw-r--r-- | tests/paca.mjs | 194 |
2 files changed, 242 insertions, 1 deletions
diff --git a/src/paca.mjs b/src/paca.mjs index ae14538..42c2f88 100644 --- a/src/paca.mjs +++ b/src/paca.mjs @@ -15,7 +15,7 @@ const ConcatStep = { CLASS: "class", }; -const nonConcatOperators = new Set(["*", "+", "?", "|", ")"]); +const nonConcatOperators = new Set(["*", "+", "?", "|", ")", "$"]); const shouldConcat = (char, next) => next !== undefined && @@ -272,6 +272,44 @@ const TRANSITION_FNS = { }), }; +const ANCHOR_FNS = { + "^": ({ out, state, context }, _char, index, _next) => + index !== 0 + ? reduced({ + out, + state, + context, + error: new SyntaxError( + "^ not at the start of the expression", + ), + }) + : { + out: out.concat({ operator: "caret" }), + state, + context, + }, + "$": ({ out, state, context }, _char, _index, next) => + next !== undefined + ? reduced({ + out, + state, + context, + error: new SyntaxError( + "$ not at the end of the expression", + ), + }) + : { + out: out.concat({ operator: "dollar" }), + state, + context, + }, +}; + +const anchors = new Set(Object.keys(ANCHOR_FNS)); + +const isAnchor = char => + anchors.has(char); + const stateTransitionOperators = new Set(Object.keys(TRANSITION_FNS)); const isOperator = char => @@ -298,6 +336,15 @@ const tokenizeRegexStep = chars => ({ out, state, context }, char, index) => { ); } + if (isAnchor(char)) { + return ANCHOR_FNS[char]( + { out, state, context }, + char, + index, + next, + ); + } + const op = isOperator(char) ? { operator: char } : char; return { out: out.concat( diff --git a/tests/paca.mjs b/tests/paca.mjs index 14a6cf2..134fb77 100644 --- a/tests/paca.mjs +++ b/tests/paca.mjs @@ -10,6 +10,8 @@ import { rangeStateStep, classStateStep, TRANSITION_FNS, + ANCHOR_FNS, + isAnchor, isOperator, tokenizeRegexStep, tokenizeRegexFn, @@ -552,6 +554,70 @@ const test_TRANSITION_FNS = t => { }); }; +const test_ANCHOR_FNS = t => { + t.start("ANCHOR_FNS"); + + t.testing(`"^" error when not the first char`, () => { + const { value: { error }} = ANCHOR_FNS["^"]({}, null, 1, null); + const message = "^ not at the start of the expression" + t.assertEq(error.message, message); + t.assertEq(error instanceof SyntaxError, true); + }); + + t.testing(`"$" error when not the last char`, () => { + const { value: { error }} = ANCHOR_FNS["$"]( + {}, + null, + null, + "a", + ); + const message = "$ not at the end of the expression"; + t.assertEq(error.message, message); + t.assertEq(error instanceof SyntaxError, true); + }); + + t.testing("caret operator gets added to output", () => { + const given = ANCHOR_FNS["^"]({ out: [ 1 ] }, null, 0, null); + const expected = { + out: [ 1, { operator: "caret" } ], + state: undefined, + context: undefined, + }; + t.assertEq(given, expected); + }); + + t.testing("dollar operator gets added to output", () => { + const given = ANCHOR_FNS["$"]( + { out: [ 2 ] }, + null, + null, + undefined, + ); + const expected = { + out: [ 2, { operator: "dollar" } ], + state: undefined, + context: undefined, + }; + t.assertEq(given, expected); + }); +}; + +const test_isAnchor = t => { + t.start("isAnchor()"); + + t.testing("anchors are true", () => { + t.assertEq(isAnchor("^"), true); + t.assertEq(isAnchor("$"), true); + }); + + t.testing("false for everything else", () => { + t.assertEq(isAnchor("*"), false); + t.assertEq(isAnchor("\\"), false); + t.assertEq(isAnchor("a"), false); + t.assertEq(isAnchor("_"), false); + }); +}; + const test_isOperator = t => { t.start("isOperator()"); @@ -578,6 +644,8 @@ const test_tokenizeRegexStep = t => { const oparen = { operator: "(" }; const cparen = { operator: ")" }; const star = { operator: "*" }; + const caret = { operator: "caret" }; + const dollar = { operator: "dollar" }; t.testing("when escaping we get whatever the char is", () => { @@ -722,6 +790,130 @@ const test_tokenizeRegexStep = t => { t.assertEq(given, steps); }); + t.testing("anchors get detected as such", () => { + const regex = "^[behilos]*$"; + const stepFn = tokenizeRegexStep(regex); + const steps = [{ + out: [], + state: ConcatStep.ACCEPTING, + context: null, + }, { + out: [caret], + state: ConcatStep.ACCEPTING, + context: null, + }, { + out: [caret], + state: ConcatStep.CLASS, + context: { + range: { + from: null, + where: "from", + }, + set: [], + }, + }, { + out: [caret], + state: ConcatStep.CLASS, + context: { + range: { + from: null, + where: "from", + }, + set: ["b"], + }, + }, { + out: [caret], + state: ConcatStep.CLASS, + context: { + range: { + from: null, + where: "from", + }, + set: ["b", "e"], + }, + }, { + out: [caret], + state: ConcatStep.CLASS, + context: { + range: { + from: null, + where: "from", + }, + set: ["b", "e", "h"], + }, + }, { + out: [caret], + state: ConcatStep.CLASS, + context: { + range: { + from: null, + where: "from", + }, + set: ["b", "e", "h", "i"], + }, + }, { + out: [caret], + state: ConcatStep.CLASS, + context: { + range: { + from: null, + where: "from", + }, + set: ["b", "e", "h", "i", "l"], + }, + }, { + out: [caret], + state: ConcatStep.CLASS, + context: { + range: { + from: null, + where: "from", + }, + set: ["b", "e", "h", "i", "l", "o"], + }, + }, { + out: [caret], + state: ConcatStep.CLASS, + context: { + range: { + from: null, + where: "from", + }, + set: ["b", "e", "h", "i", "l", "o", "s"], + }, + }, { + out: [caret, { + operator: "class", + set: [ "b", "e", "h", "i", "l", "o", "s" ], + }], + state: "accepting", + context: null, + }, { + out: [caret, { + operator: "class", + set: [ "b", "e", "h", "i", "l", "o", "s" ], + }, star], + state: "accepting", + context: null, + }, { + out: [caret, { + operator: "class", + set: [ "b", "e", "h", "i", "l", "o", "s" ], + }, star, dollar], + state: "accepting", + context: null, + }]; + const given = reductions( + steps, + (acc, el, i) => { + const ret = stepFn(acc, regex[i], i); + t.assertEq(ret, el); + return ret; + }, + ); + t.assertEq(given, steps); + }); + t.testing("multichar range operator {m,n} is parsed right", () => { const table = [{ regex: "a{1,2}", @@ -2836,6 +3028,8 @@ runTests([ test_rangeStateStep, test_classStateStep, test_TRANSITION_FNS, + test_ANCHOR_FNS, + test_isAnchor, test_isOperator, test_tokenizeRegexStep, test_tokenizeRegexFn, |
