diff options
| author | EuAndreh <eu@euandre.org> | 2025-07-15 21:11:37 -0300 |
|---|---|---|
| committer | EuAndreh <eu@euandre.org> | 2025-07-15 21:44:44 -0300 |
| commit | e808ad60c1a2b4f7793fd4ba5b70db37039fb1ea (patch) | |
| tree | c2e5fd66f72ec2a1e11ba49005ad7d6dc1b0b428 | |
| parent | Support tokenizing `^` and `$` anchors (diff) | |
| download | paca-e808ad60c1a2b4f7793fd4ba5b70db37039fb1ea.tar.gz paca-e808ad60c1a2b4f7793fd4ba5b70db37039fb1ea.tar.xz | |
Support tokenizing `.` wildcard operator.
* src/paca.mjs
(isTransition): Add new function as an improved version of the raw
usage of `stateTransitionOperators`, equivalent to `isAnchor()` and
`isOperator()`.
(operatorChars, isOperator): Add new static set `operatorChars` as
backing data of `isOperator()`, instead of ad-hoc conditional in its
implementation. Also now add the `.` character as an operator by
including it in the `operatorChars` set.
(tokenizeRegexStep): Use the new `isTransition()` function instead of
checking the set directly. Also tweak ternary to fit in 80 columns.
(PRECEDENCE): Add `.` operator with lowest precedence, as it is not
really operating on anything, and is instead a target to be operated
on.
* tests/paca.mjs
(test_isTransition): Add obligatory test cases.
(test_isOperator): Include test case for `.` wildcard operator.
Diffstat (limited to '')
| -rw-r--r-- | src/paca.mjs | 15 | ||||
| -rw-r--r-- | tests/paca.mjs | 19 |
2 files changed, 29 insertions, 5 deletions
diff --git a/src/paca.mjs b/src/paca.mjs index 42c2f88..5d11b05 100644 --- a/src/paca.mjs +++ b/src/paca.mjs @@ -306,14 +306,16 @@ const ANCHOR_FNS = { }; const anchors = new Set(Object.keys(ANCHOR_FNS)); - const isAnchor = char => anchors.has(char); -const stateTransitionOperators = new Set(Object.keys(TRANSITION_FNS)); +const transitionChars = new Set(Object.keys(TRANSITION_FNS)); +const isTransition = char => + transitionChars.has(char); +const operatorChars = new Set([...nonConcatOperators, "(", "."]); const isOperator = char => - nonConcatOperators.has(char) || char == "("; + operatorChars.has(char); const tokenizeRegexStep = chars => ({ out, state, context }, char, index) => { const next = chars[index + 1]; @@ -327,7 +329,7 @@ const tokenizeRegexStep = chars => ({ out, state, context }, char, index) => { ); } - if (stateTransitionOperators.has(char)) { + if (isTransition(char)) { return TRANSITION_FNS[char]( { out, state, context }, char, @@ -349,7 +351,9 @@ const tokenizeRegexStep = chars => ({ out, state, context }, char, index) => { return { out: out.concat( op, - shouldConcat(char, next) ? [{ operator: "concat" }] : [], + shouldConcat(char, next) + ? [{ operator: "concat" }] + : [], ), state, context, @@ -385,6 +389,7 @@ const PRECEDENCE = { "concat": 2, "|": 1, "class": 1, + ".": 1, }; const shouldPush = (stack, token) => diff --git a/tests/paca.mjs b/tests/paca.mjs index 134fb77..bc6e1d0 100644 --- a/tests/paca.mjs +++ b/tests/paca.mjs @@ -12,6 +12,7 @@ import { TRANSITION_FNS, ANCHOR_FNS, isAnchor, + isTransition, isOperator, tokenizeRegexStep, tokenizeRegexFn, @@ -618,6 +619,22 @@ const test_isAnchor = t => { }); }; +const test_isTransition = t => { + t.start("isTransition()"); + + t.testing("transition chars are true", () => { + t.assertEq(isTransition("\\"), true); + t.assertEq(isTransition("["), true); + t.assertEq(isTransition("{"), true); + }); + + t.testing("false for everything else", () => { + t.assertEq(isTransition("."), false); + t.assertEq(isTransition("*"), false); + t.assertEq(isTransition("a"), false); + }); +}; + const test_isOperator = t => { t.start("isOperator()"); @@ -628,6 +645,7 @@ const test_isOperator = t => { t.assertEq(isOperator("?"), true); t.assertEq(isOperator("("), true); t.assertEq(isOperator(")"), true); + t.assertEq(isOperator("."), true); }); t.testing("false for everyday non-meta chars", () => { @@ -3030,6 +3048,7 @@ runTests([ test_TRANSITION_FNS, test_ANCHOR_FNS, test_isAnchor, + test_isTransition, test_isOperator, test_tokenizeRegexStep, test_tokenizeRegexFn, |
