summaryrefslogtreecommitdiff
path: root/src/paca.mjs
diff options
context:
space:
mode:
authorEuAndreh <eu@euandre.org>2025-07-15 20:43:57 -0300
committerEuAndreh <eu@euandre.org>2025-07-15 20:43:57 -0300
commit43946ca0c9e19f904b7f763b9dc590d8095e6472 (patch)
treea5549d003fff4459876b23539e428fc42c007508 /src/paca.mjs
parenttests/paca.mjs (test_shouldConcat): Also hoist import, definition and positio... (diff)
downloadpaca-43946ca0c9e19f904b7f763b9dc590d8095e6472.tar.gz
paca-43946ca0c9e19f904b7f763b9dc590d8095e6472.tar.xz
Support tokenizing `^` and `$` anchors
* src/paca.mjs (ANCHOR_FNS): Add simple handlers for ^ and $ anchors, that only look for the position of the character in the pattern as validation during tokenization. (isAnchor): Add simple boolean function to identify anchor characters. (tokenizeRegexStep): Include check if character `isAnchor()`, and call the appropriate `ANCHOR_FNS[char]` when true. * tests/paca.mjs (test_ANCHOR_FNS): Add test with 4 cases - 2 for success and 2 for errors for ^ and $. (test_isAnchor): Add obligatory simple test cases. (test_tokenizeRegexStep): Include test case for tokenizing patterns with character class.
Diffstat (limited to 'src/paca.mjs')
-rw-r--r--src/paca.mjs49
1 files changed, 48 insertions, 1 deletions
diff --git a/src/paca.mjs b/src/paca.mjs
index ae14538..42c2f88 100644
--- a/src/paca.mjs
+++ b/src/paca.mjs
@@ -15,7 +15,7 @@ const ConcatStep = {
CLASS: "class",
};
-const nonConcatOperators = new Set(["*", "+", "?", "|", ")"]);
+const nonConcatOperators = new Set(["*", "+", "?", "|", ")", "$"]);
const shouldConcat = (char, next) =>
next !== undefined &&
@@ -272,6 +272,44 @@ const TRANSITION_FNS = {
}),
};
+const ANCHOR_FNS = {
+ "^": ({ out, state, context }, _char, index, _next) =>
+ index !== 0
+ ? reduced({
+ out,
+ state,
+ context,
+ error: new SyntaxError(
+ "^ not at the start of the expression",
+ ),
+ })
+ : {
+ out: out.concat({ operator: "caret" }),
+ state,
+ context,
+ },
+ "$": ({ out, state, context }, _char, _index, next) =>
+ next !== undefined
+ ? reduced({
+ out,
+ state,
+ context,
+ error: new SyntaxError(
+ "$ not at the end of the expression",
+ ),
+ })
+ : {
+ out: out.concat({ operator: "dollar" }),
+ state,
+ context,
+ },
+};
+
+const anchors = new Set(Object.keys(ANCHOR_FNS));
+
+const isAnchor = char =>
+ anchors.has(char);
+
const stateTransitionOperators = new Set(Object.keys(TRANSITION_FNS));
const isOperator = char =>
@@ -298,6 +336,15 @@ const tokenizeRegexStep = chars => ({ out, state, context }, char, index) => {
);
}
+ if (isAnchor(char)) {
+ return ANCHOR_FNS[char](
+ { out, state, context },
+ char,
+ index,
+ next,
+ );
+ }
+
const op = isOperator(char) ? { operator: char } : char;
return {
out: out.concat(