summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEuAndreh <eu@euandre.org>2025-07-20 09:31:16 -0300
committerEuAndreh <eu@euandre.org>2025-07-20 09:31:16 -0300
commit846adfdbb0931b9bda93cf38383103b44d7bd62b (patch)
tree5b321562a45af19f713bac31fcbea3cac35ffa26
parent.gitignore: Remove trailing slash from node_modules rule (diff)
downloadpaca-846adfdbb0931b9bda93cf38383103b44d7bd62b.tar.gz
paca-846adfdbb0931b9bda93cf38383103b44d7bd62b.tar.xz
Add initial support for caret and dollar metacharacters
-rw-r--r--src/paca.mjs59
-rw-r--r--tests/paca.mjs163
2 files changed, 200 insertions, 22 deletions
diff --git a/src/paca.mjs b/src/paca.mjs
index 4a85067..21a28e8 100644
--- a/src/paca.mjs
+++ b/src/paca.mjs
@@ -15,7 +15,7 @@ const ConcatStep = {
CLASS: "class",
};
-const nonConcatOperators = new Set(["*", "+", "?", "|", ")", "$"]);
+const nonConcatOperators = new Set(["*", "+", "?", "|", ")"]);
const shouldConcat = (char, next) =>
next !== undefined &&
@@ -24,7 +24,7 @@ const shouldConcat = (char, next) =>
char !== "{" &&
!nonConcatOperators.has(next);
-const operatorChars = new Set([...nonConcatOperators, "("]);
+const operatorChars = new Set([...nonConcatOperators, "(", "$"]);
const isOperator = char =>
operatorChars.has(char);
@@ -301,7 +301,10 @@ const ANCHOR_FNS = {
),
})
: {
- out: out.concat({ operator: "caret" }),
+ out: out.concat([
+ { meta: "^" },
+ { operator: "concat" },
+ ]),
state,
context,
},
@@ -316,7 +319,7 @@ const ANCHOR_FNS = {
),
})
: {
- out: out.concat({ operator: "dollar" }),
+ out: out.concat([{ meta: "$" }]),
state,
context,
},
@@ -693,6 +696,52 @@ const wildcard = (_edge, id) => {
};
};
+const caret = (_edge, id) => {
+ const start = id + 0;
+ const end = id + 1;
+ return {
+ start,
+ end,
+ nodes: {
+ [start]: {
+ direct: [],
+ transitions: {},
+ meta: {
+ op: "caret",
+ to: end,
+ },
+ },
+ [end]: {
+ direct: [],
+ transitions: {},
+ },
+ },
+ };
+};
+
+const dollar = (_edge, id) => {
+ const start = id + 0;
+ const end = id + 1;
+ return {
+ start,
+ end,
+ nodes: {
+ [start]: {
+ direct: [],
+ transitions: {},
+ meta: {
+ op: "dollar",
+ to: end,
+ },
+ },
+ [end]: {
+ direct: [],
+ transitions: {},
+ },
+ },
+ };
+};
+
const OPERATORS_FNS = ({
zeroOrMoreFn = zeroOrMore,
oneOrMoreFn = oneOrMore,
@@ -717,6 +766,8 @@ const OPERATORS = OPERATORS_FNS();
const METACHARACTERS_FNS = {
"class": characterClass,
".": wildcard,
+ "^": caret,
+ "$": dollar,
};
const baseNFA = (token, id) => (
diff --git a/tests/paca.mjs b/tests/paca.mjs
index 36ab2a7..6913b9e 100644
--- a/tests/paca.mjs
+++ b/tests/paca.mjs
@@ -34,6 +34,8 @@ import {
inRange,
characterClass,
wildcard,
+ caret,
+ dollar,
OPERATORS_FNS,
baseNFA,
buildNFAStep,
@@ -619,7 +621,7 @@ const test_ANCHOR_FNS = t => {
t.testing("caret operator gets added to output", () => {
const given = ANCHOR_FNS["^"]({ out: [ 1 ] }, null, 0, null);
const expected = {
- out: [ 1, { operator: "caret" } ],
+ out: [ 1, { meta: "^" }, { operator: "concat" } ],
state: undefined,
context: undefined,
};
@@ -634,7 +636,7 @@ const test_ANCHOR_FNS = t => {
undefined,
);
const expected = {
- out: [ 2, { operator: "dollar" } ],
+ out: [ 2, { meta: "$" } ],
state: undefined,
context: undefined,
};
@@ -682,8 +684,8 @@ const test_tokenizeRegexStep = t => {
const oparen = { operator: "(" };
const cparen = { operator: ")" };
const star = { operator: "*" };
- const caret = { operator: "caret" };
- const dollar = { operator: "dollar" };
+ const caret = { meta: "^" };
+ const dollar = { meta: "$" };
t.testing("when escaping we get whatever the char is", () => {
@@ -836,11 +838,11 @@ const test_tokenizeRegexStep = t => {
state: ConcatStep.ACCEPTING,
context: null,
}, {
- out: [caret],
+ out: [caret, cat],
state: ConcatStep.ACCEPTING,
context: null,
}, {
- out: [caret],
+ out: [caret, cat],
state: ConcatStep.CLASS,
context: {
range: {
@@ -850,7 +852,7 @@ const test_tokenizeRegexStep = t => {
set: [],
},
}, {
- out: [caret],
+ out: [caret, cat],
state: ConcatStep.CLASS,
context: {
range: {
@@ -860,7 +862,7 @@ const test_tokenizeRegexStep = t => {
set: ["b"],
},
}, {
- out: [caret],
+ out: [caret, cat],
state: ConcatStep.CLASS,
context: {
range: {
@@ -870,7 +872,7 @@ const test_tokenizeRegexStep = t => {
set: ["b", "e"],
},
}, {
- out: [caret],
+ out: [caret, cat],
state: ConcatStep.CLASS,
context: {
range: {
@@ -880,7 +882,7 @@ const test_tokenizeRegexStep = t => {
set: ["b", "e", "h"],
},
}, {
- out: [caret],
+ out: [caret, cat],
state: ConcatStep.CLASS,
context: {
range: {
@@ -890,7 +892,7 @@ const test_tokenizeRegexStep = t => {
set: ["b", "e", "h", "i"],
},
}, {
- out: [caret],
+ out: [caret, cat],
state: ConcatStep.CLASS,
context: {
range: {
@@ -900,7 +902,7 @@ const test_tokenizeRegexStep = t => {
set: ["b", "e", "h", "i", "l"],
},
}, {
- out: [caret],
+ out: [caret, cat],
state: ConcatStep.CLASS,
context: {
range: {
@@ -910,7 +912,7 @@ const test_tokenizeRegexStep = t => {
set: ["b", "e", "h", "i", "l", "o"],
},
}, {
- out: [caret],
+ out: [caret, cat],
state: ConcatStep.CLASS,
context: {
range: {
@@ -920,7 +922,7 @@ const test_tokenizeRegexStep = t => {
set: ["b", "e", "h", "i", "l", "o", "s"],
},
}, {
- out: [caret, {
+ out: [caret, cat, {
meta: "class",
set: [ "b", "e", "h", "i", "l", "o", "s" ],
caret: false,
@@ -928,19 +930,19 @@ const test_tokenizeRegexStep = t => {
state: "accepting",
context: null,
}, {
- out: [caret, {
+ out: [caret, cat, {
meta: "class",
set: [ "b", "e", "h", "i", "l", "o", "s" ],
caret: false,
- }, star],
+ }, star, cat],
state: "accepting",
context: null,
}, {
- out: [caret, {
+ out: [caret, cat, {
meta: "class",
set: [ "b", "e", "h", "i", "l", "o", "s" ],
caret: false,
- }, star, dollar],
+ }, star, cat, dollar],
state: "accepting",
context: null,
}];
@@ -2281,6 +2283,58 @@ const test_wildcard = t => {
});
};
+const test_caret = t => {
+ t.start("caret()");
+
+ t.testing("we get the NFA with the caret meta attribute", () => {
+ const expected = {
+ start: 3,
+ end: 4,
+ nodes: {
+ 3: {
+ direct: [],
+ transitions: {},
+ meta: {
+ op: "caret",
+ to: 4,
+ },
+ },
+ 4: {
+ direct: [],
+ transitions: {},
+ },
+ },
+ };
+ t.assertEq(caret("IGNORED", 3), expected);
+ });
+};
+
+const test_dollar = t => {
+ t.start("dollar()");
+
+ t.testing("we get the NFA that matches the end of string", () => {
+ const expected = {
+ start: 2,
+ end: 3,
+ nodes: {
+ 2: {
+ direct: [],
+ transitions: {},
+ meta: {
+ op: "dollar",
+ to: 3,
+ },
+ },
+ 3: {
+ direct: [],
+ transitions: {},
+ },
+ },
+ };
+ t.assertEq(dollar("IGNORED", 2), expected);
+ });
+};
+
const test_OPERATORS_FNS = t => {
t.start("OPERATORS_FNS");
@@ -2600,6 +2654,69 @@ const test_buildNFA = t => {
expected,
);
});
+
+ t.testing("example with metacharacters", () => {
+ const regex = "^[behilos]*$";
+ const expected = {
+ start: 1,
+ end: 8,
+ nodes: {
+ 1: {
+ direct: [],
+ transitions: {},
+ meta: {
+ op: "caret",
+ to: 2,
+ },
+ },
+ 2: {
+ direct: [5],
+ transitions: {},
+ },
+ 3: {
+ direct: [],
+ transitions: {},
+ meta: {
+ op: "includes",
+ to: 4,
+ matches: new Set([
+ "b", "e", "h", "i", "l",
+ "o", "s",
+ ]),
+ ranges: {},
+ },
+ },
+ 4: {
+ direct: [5],
+ transitions: {},
+ },
+ 5: {
+ direct: [3, 6],
+ transitions: {},
+ },
+ 6: {
+ direct: [7],
+ transitions: {},
+ },
+ 7: {
+ direct: [],
+ transitions: {},
+ meta: {
+ op: "dollar",
+ to: 8,
+ },
+ },
+ 8: {
+ direct: [],
+ transitions: {},
+ },
+ },
+ };
+ t.assertEq(
+ buildNFA(toPostfix(tokenizeRegex(explode(regex)))),
+ expected,
+ );
+ });
};
const test_allDirects = t => {
@@ -2844,6 +2961,14 @@ const test_searchNFA = t => {
t.assertEq(searchNFA(nfa, "babac"), true);
t.assertEq(searchNFA(nfa, "babaca"), false);
});
+
+ t.testing("regex with metacharacters", () => {
+ const regex = "^[behilos]*";
+ const nfa = buildNFA(toPostfix(tokenizeRegex(explode(regex))));
+ t.assertEq(searchNFA(nfa, "helios"), true);
+ t.assertEq(searchNFA(nfa, "helios "), false);
+ t.assertEq(searchNFA(nfa, "abc"), false);
+ });
};
const test_nodeID = t => {
@@ -3522,6 +3647,8 @@ runTests([
test_inRange,
test_characterClass,
test_wildcard,
+ test_caret,
+ test_dollar,
test_OPERATORS_FNS,
test_baseNFA,
test_buildNFAStep,