From 43946ca0c9e19f904b7f763b9dc590d8095e6472 Mon Sep 17 00:00:00 2001 From: EuAndreh Date: Tue, 15 Jul 2025 20:43:57 -0300 Subject: Support tokenizing `^` and `$` anchors * src/paca.mjs (ANCHOR_FNS): Add simple handlers for ^ and $ anchors, that only look for the position of the character in the pattern as validation during tokenization. (isAnchor): Add simple boolean function to identify anchor characters. (tokenizeRegexStep): Include check if character `isAnchor()`, and call the appropriate `ANCHOR_FNS[char]` when true. * tests/paca.mjs (test_ANCHOR_FNS): Add test with 4 cases - 2 for success and 2 for errors for ^ and $. (test_isAnchor): Add obligatory simple test cases. (test_tokenizeRegexStep): Include test case for tokenizing patterns with character class. --- src/paca.mjs | 49 ++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 48 insertions(+), 1 deletion(-) (limited to 'src/paca.mjs') diff --git a/src/paca.mjs b/src/paca.mjs index ae14538..42c2f88 100644 --- a/src/paca.mjs +++ b/src/paca.mjs @@ -15,7 +15,7 @@ const ConcatStep = { CLASS: "class", }; -const nonConcatOperators = new Set(["*", "+", "?", "|", ")"]); +const nonConcatOperators = new Set(["*", "+", "?", "|", ")", "$"]); const shouldConcat = (char, next) => next !== undefined && @@ -272,6 +272,44 @@ const TRANSITION_FNS = { }), }; +const ANCHOR_FNS = { + "^": ({ out, state, context }, _char, index, _next) => + index !== 0 + ? reduced({ + out, + state, + context, + error: new SyntaxError( + "^ not at the start of the expression", + ), + }) + : { + out: out.concat({ operator: "caret" }), + state, + context, + }, + "$": ({ out, state, context }, _char, _index, next) => + next !== undefined + ? reduced({ + out, + state, + context, + error: new SyntaxError( + "$ not at the end of the expression", + ), + }) + : { + out: out.concat({ operator: "dollar" }), + state, + context, + }, +}; + +const anchors = new Set(Object.keys(ANCHOR_FNS)); + +const isAnchor = char => + anchors.has(char); + const stateTransitionOperators = new Set(Object.keys(TRANSITION_FNS)); const isOperator = char => @@ -298,6 +336,15 @@ const tokenizeRegexStep = chars => ({ out, state, context }, char, index) => { ); } + if (isAnchor(char)) { + return ANCHOR_FNS[char]( + { out, state, context }, + char, + index, + next, + ); + } + const op = isOperator(char) ? { operator: char } : char; return { out: out.concat( -- cgit v1.2.3