From e808ad60c1a2b4f7793fd4ba5b70db37039fb1ea Mon Sep 17 00:00:00 2001 From: EuAndreh Date: Tue, 15 Jul 2025 21:11:37 -0300 Subject: Support tokenizing `.` wildcard operator. * src/paca.mjs (isTransition): Add new function as an improved version of the raw usage of `stateTransitionOperators`, equivalent to `isAnchor()` and `isOperator()`. (operatorChars, isOperator): Add new static set `operatorChars` as backing data of `isOperator()`, instead of ad-hoc conditional in its implementation. Also now add the `.` character as an operator by including it in the `operatorChars` set. (tokenizeRegexStep): Use the new `isTransition()` function instead of checking the set directly. Also tweak ternary to fit in 80 columns. (PRECEDENCE): Add `.` operator with lowest precedence, as it is not really operating on anything, and is instead a target to be operated on. * tests/paca.mjs (test_isTransition): Add obligatory test cases. (test_isOperator): Include test case for `.` wildcard operator. --- src/paca.mjs | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'src') diff --git a/src/paca.mjs b/src/paca.mjs index 42c2f88..5d11b05 100644 --- a/src/paca.mjs +++ b/src/paca.mjs @@ -306,14 +306,16 @@ const ANCHOR_FNS = { }; const anchors = new Set(Object.keys(ANCHOR_FNS)); - const isAnchor = char => anchors.has(char); -const stateTransitionOperators = new Set(Object.keys(TRANSITION_FNS)); +const transitionChars = new Set(Object.keys(TRANSITION_FNS)); +const isTransition = char => + transitionChars.has(char); +const operatorChars = new Set([...nonConcatOperators, "(", "."]); const isOperator = char => - nonConcatOperators.has(char) || char == "("; + operatorChars.has(char); const tokenizeRegexStep = chars => ({ out, state, context }, char, index) => { const next = chars[index + 1]; @@ -327,7 +329,7 @@ const tokenizeRegexStep = chars => ({ out, state, context }, char, index) => { ); } - if (stateTransitionOperators.has(char)) { + if (isTransition(char)) { return TRANSITION_FNS[char]( { out, state, context }, char, @@ -349,7 +351,9 @@ const tokenizeRegexStep = chars => ({ out, state, context }, char, index) => { return { out: out.concat( op, - shouldConcat(char, next) ? [{ operator: "concat" }] : [], + shouldConcat(char, next) + ? [{ operator: "concat" }] + : [], ), state, context, @@ -385,6 +389,7 @@ const PRECEDENCE = { "concat": 2, "|": 1, "class": 1, + ".": 1, }; const shouldPush = (stack, token) => -- cgit v1.2.3