summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEuAndreh <eu@euandre.org>2025-07-15 21:11:37 -0300
committerEuAndreh <eu@euandre.org>2025-07-15 21:44:44 -0300
commite808ad60c1a2b4f7793fd4ba5b70db37039fb1ea (patch)
treec2e5fd66f72ec2a1e11ba49005ad7d6dc1b0b428
parentSupport tokenizing `^` and `$` anchors (diff)
downloadpaca-e808ad60c1a2b4f7793fd4ba5b70db37039fb1ea.tar.gz
paca-e808ad60c1a2b4f7793fd4ba5b70db37039fb1ea.tar.xz
Support tokenizing `.` wildcard operator.
* src/paca.mjs (isTransition): Add new function as an improved version of the raw usage of `stateTransitionOperators`, equivalent to `isAnchor()` and `isOperator()`. (operatorChars, isOperator): Add new static set `operatorChars` as backing data of `isOperator()`, instead of ad-hoc conditional in its implementation. Also now add the `.` character as an operator by including it in the `operatorChars` set. (tokenizeRegexStep): Use the new `isTransition()` function instead of checking the set directly. Also tweak ternary to fit in 80 columns. (PRECEDENCE): Add `.` operator with lowest precedence, as it is not really operating on anything, and is instead a target to be operated on. * tests/paca.mjs (test_isTransition): Add obligatory test cases. (test_isOperator): Include test case for `.` wildcard operator.
Diffstat (limited to '')
-rw-r--r--src/paca.mjs15
-rw-r--r--tests/paca.mjs19
2 files changed, 29 insertions, 5 deletions
diff --git a/src/paca.mjs b/src/paca.mjs
index 42c2f88..5d11b05 100644
--- a/src/paca.mjs
+++ b/src/paca.mjs
@@ -306,14 +306,16 @@ const ANCHOR_FNS = {
};
const anchors = new Set(Object.keys(ANCHOR_FNS));
-
const isAnchor = char =>
anchors.has(char);
-const stateTransitionOperators = new Set(Object.keys(TRANSITION_FNS));
+const transitionChars = new Set(Object.keys(TRANSITION_FNS));
+const isTransition = char =>
+ transitionChars.has(char);
+const operatorChars = new Set([...nonConcatOperators, "(", "."]);
const isOperator = char =>
- nonConcatOperators.has(char) || char == "(";
+ operatorChars.has(char);
const tokenizeRegexStep = chars => ({ out, state, context }, char, index) => {
const next = chars[index + 1];
@@ -327,7 +329,7 @@ const tokenizeRegexStep = chars => ({ out, state, context }, char, index) => {
);
}
- if (stateTransitionOperators.has(char)) {
+ if (isTransition(char)) {
return TRANSITION_FNS[char](
{ out, state, context },
char,
@@ -349,7 +351,9 @@ const tokenizeRegexStep = chars => ({ out, state, context }, char, index) => {
return {
out: out.concat(
op,
- shouldConcat(char, next) ? [{ operator: "concat" }] : [],
+ shouldConcat(char, next)
+ ? [{ operator: "concat" }]
+ : [],
),
state,
context,
@@ -385,6 +389,7 @@ const PRECEDENCE = {
"concat": 2,
"|": 1,
"class": 1,
+ ".": 1,
};
const shouldPush = (stack, token) =>
diff --git a/tests/paca.mjs b/tests/paca.mjs
index 134fb77..bc6e1d0 100644
--- a/tests/paca.mjs
+++ b/tests/paca.mjs
@@ -12,6 +12,7 @@ import {
TRANSITION_FNS,
ANCHOR_FNS,
isAnchor,
+ isTransition,
isOperator,
tokenizeRegexStep,
tokenizeRegexFn,
@@ -618,6 +619,22 @@ const test_isAnchor = t => {
});
};
+const test_isTransition = t => {
+ t.start("isTransition()");
+
+ t.testing("transition chars are true", () => {
+ t.assertEq(isTransition("\\"), true);
+ t.assertEq(isTransition("["), true);
+ t.assertEq(isTransition("{"), true);
+ });
+
+ t.testing("false for everything else", () => {
+ t.assertEq(isTransition("."), false);
+ t.assertEq(isTransition("*"), false);
+ t.assertEq(isTransition("a"), false);
+ });
+};
+
const test_isOperator = t => {
t.start("isOperator()");
@@ -628,6 +645,7 @@ const test_isOperator = t => {
t.assertEq(isOperator("?"), true);
t.assertEq(isOperator("("), true);
t.assertEq(isOperator(")"), true);
+ t.assertEq(isOperator("."), true);
});
t.testing("false for everyday non-meta chars", () => {
@@ -3030,6 +3048,7 @@ runTests([
test_TRANSITION_FNS,
test_ANCHOR_FNS,
test_isAnchor,
+ test_isTransition,
test_isOperator,
test_tokenizeRegexStep,
test_tokenizeRegexFn,