diff options
| author | EuAndreh <eu@euandre.org> | 2025-07-15 20:43:57 -0300 |
|---|---|---|
| committer | EuAndreh <eu@euandre.org> | 2025-07-15 20:43:57 -0300 |
| commit | 43946ca0c9e19f904b7f763b9dc590d8095e6472 (patch) | |
| tree | a5549d003fff4459876b23539e428fc42c007508 /tests/paca.mjs | |
| parent | tests/paca.mjs (test_shouldConcat): Also hoist import, definition and positio... (diff) | |
| download | paca-43946ca0c9e19f904b7f763b9dc590d8095e6472.tar.gz paca-43946ca0c9e19f904b7f763b9dc590d8095e6472.tar.xz | |
Support tokenizing `^` and `$` anchors
* src/paca.mjs
(ANCHOR_FNS): Add simple handlers for ^ and $ anchors, that only look
for the position of the character in the pattern as validation
during tokenization.
(isAnchor): Add simple boolean function to identify anchor characters.
(tokenizeRegexStep): Include check if character `isAnchor()`, and call
the appropriate `ANCHOR_FNS[char]` when true.
* tests/paca.mjs
(test_ANCHOR_FNS): Add test with 4 cases - 2 for success and 2 for
errors for ^ and $.
(test_isAnchor): Add obligatory simple test cases.
(test_tokenizeRegexStep): Include test case for tokenizing patterns
with character class.
Diffstat (limited to '')
| -rw-r--r-- | tests/paca.mjs | 194 |
1 files changed, 194 insertions, 0 deletions
diff --git a/tests/paca.mjs b/tests/paca.mjs index 14a6cf2..134fb77 100644 --- a/tests/paca.mjs +++ b/tests/paca.mjs @@ -10,6 +10,8 @@ import { rangeStateStep, classStateStep, TRANSITION_FNS, + ANCHOR_FNS, + isAnchor, isOperator, tokenizeRegexStep, tokenizeRegexFn, @@ -552,6 +554,70 @@ const test_TRANSITION_FNS = t => { }); }; +const test_ANCHOR_FNS = t => { + t.start("ANCHOR_FNS"); + + t.testing(`"^" error when not the first char`, () => { + const { value: { error }} = ANCHOR_FNS["^"]({}, null, 1, null); + const message = "^ not at the start of the expression" + t.assertEq(error.message, message); + t.assertEq(error instanceof SyntaxError, true); + }); + + t.testing(`"$" error when not the last char`, () => { + const { value: { error }} = ANCHOR_FNS["$"]( + {}, + null, + null, + "a", + ); + const message = "$ not at the end of the expression"; + t.assertEq(error.message, message); + t.assertEq(error instanceof SyntaxError, true); + }); + + t.testing("caret operator gets added to output", () => { + const given = ANCHOR_FNS["^"]({ out: [ 1 ] }, null, 0, null); + const expected = { + out: [ 1, { operator: "caret" } ], + state: undefined, + context: undefined, + }; + t.assertEq(given, expected); + }); + + t.testing("dollar operator gets added to output", () => { + const given = ANCHOR_FNS["$"]( + { out: [ 2 ] }, + null, + null, + undefined, + ); + const expected = { + out: [ 2, { operator: "dollar" } ], + state: undefined, + context: undefined, + }; + t.assertEq(given, expected); + }); +}; + +const test_isAnchor = t => { + t.start("isAnchor()"); + + t.testing("anchors are true", () => { + t.assertEq(isAnchor("^"), true); + t.assertEq(isAnchor("$"), true); + }); + + t.testing("false for everything else", () => { + t.assertEq(isAnchor("*"), false); + t.assertEq(isAnchor("\\"), false); + t.assertEq(isAnchor("a"), false); + t.assertEq(isAnchor("_"), false); + }); +}; + const test_isOperator = t => { t.start("isOperator()"); @@ -578,6 +644,8 @@ const test_tokenizeRegexStep = t => { const oparen = { operator: "(" }; const cparen = { operator: ")" }; const star = { operator: "*" }; + const caret = { operator: "caret" }; + const dollar = { operator: "dollar" }; t.testing("when escaping we get whatever the char is", () => { @@ -722,6 +790,130 @@ const test_tokenizeRegexStep = t => { t.assertEq(given, steps); }); + t.testing("anchors get detected as such", () => { + const regex = "^[behilos]*$"; + const stepFn = tokenizeRegexStep(regex); + const steps = [{ + out: [], + state: ConcatStep.ACCEPTING, + context: null, + }, { + out: [caret], + state: ConcatStep.ACCEPTING, + context: null, + }, { + out: [caret], + state: ConcatStep.CLASS, + context: { + range: { + from: null, + where: "from", + }, + set: [], + }, + }, { + out: [caret], + state: ConcatStep.CLASS, + context: { + range: { + from: null, + where: "from", + }, + set: ["b"], + }, + }, { + out: [caret], + state: ConcatStep.CLASS, + context: { + range: { + from: null, + where: "from", + }, + set: ["b", "e"], + }, + }, { + out: [caret], + state: ConcatStep.CLASS, + context: { + range: { + from: null, + where: "from", + }, + set: ["b", "e", "h"], + }, + }, { + out: [caret], + state: ConcatStep.CLASS, + context: { + range: { + from: null, + where: "from", + }, + set: ["b", "e", "h", "i"], + }, + }, { + out: [caret], + state: ConcatStep.CLASS, + context: { + range: { + from: null, + where: "from", + }, + set: ["b", "e", "h", "i", "l"], + }, + }, { + out: [caret], + state: ConcatStep.CLASS, + context: { + range: { + from: null, + where: "from", + }, + set: ["b", "e", "h", "i", "l", "o"], + }, + }, { + out: [caret], + state: ConcatStep.CLASS, + context: { + range: { + from: null, + where: "from", + }, + set: ["b", "e", "h", "i", "l", "o", "s"], + }, + }, { + out: [caret, { + operator: "class", + set: [ "b", "e", "h", "i", "l", "o", "s" ], + }], + state: "accepting", + context: null, + }, { + out: [caret, { + operator: "class", + set: [ "b", "e", "h", "i", "l", "o", "s" ], + }, star], + state: "accepting", + context: null, + }, { + out: [caret, { + operator: "class", + set: [ "b", "e", "h", "i", "l", "o", "s" ], + }, star, dollar], + state: "accepting", + context: null, + }]; + const given = reductions( + steps, + (acc, el, i) => { + const ret = stepFn(acc, regex[i], i); + t.assertEq(ret, el); + return ret; + }, + ); + t.assertEq(given, steps); + }); + t.testing("multichar range operator {m,n} is parsed right", () => { const table = [{ regex: "a{1,2}", @@ -2836,6 +3028,8 @@ runTests([ test_rangeStateStep, test_classStateStep, test_TRANSITION_FNS, + test_ANCHOR_FNS, + test_isAnchor, test_isOperator, test_tokenizeRegexStep, test_tokenizeRegexFn, |
