summaryrefslogtreecommitdiff
path: root/src/paca.mjs
diff options
context:
space:
mode:
authorEuAndreh <eu@euandre.org>2025-07-16 10:32:04 -0300
committerEuAndreh <eu@euandre.org>2025-07-16 10:32:04 -0300
commit69e36152600599126d95ae1085b0777a830bc97b (patch)
tree43527bb37c38cde9921f579153e85edf915ce97f /src/paca.mjs
parentDifferentiate an "operator" from a "meta" character (diff)
downloadpaca-69e36152600599126d95ae1085b0777a830bc97b.tar.gz
paca-69e36152600599126d95ae1085b0777a830bc97b.tar.xz
Build NFA nodes for "." and "class" metacharacters
* src/paca.mjs (characterClass): Add function that builds the NFA node for `{ meta: "class" }`. This node leaves the "direct" and "transitions" keys empty, and add its data under the "meta" key. One option was to use an inline function that could simply be called directly during the search to check for a match, but instead I chose a data representation instead, in order to keep the NFA literal as obvious and self-representing as possible. Later, the searching part will have to properly interpret the data of "meta" properly, instead of blindly executing an opaque function. This does separate the compilation from execution logic, but keep the NFA clean of opaque closures. (wildcard): Add function that buildl the NFA node for `{ meta: "." }`. Similar to `characterClass()`, the new "meta" key contains pure data that represents the execution of the metacharacter during search. (baseNFA, literal): Rename the existing `baseNFA()` to `literal()`. Then add a new `baseNFA()` function that decides between a character literal and a metacharacter. (buildNFAStep): Instead of checking the type of `token`, we check if `token` has the "operator" attribute, since we now have metacharacters that also aren't strings. (classStateStep): Add missing "caret" key to the final metacharacter output. It was already being detected, just not included in the result. (escapingStateStep): Stick to 80 columns. * tests/paca.mjs (test_characterClass, test_wildcard, test_baseNFA): Add obligatory test cases. (test_buildNFAStep): Include test case for metacharacter.
Diffstat (limited to 'src/paca.mjs')
-rw-r--r--src/paca.mjs82
1 files changed, 76 insertions, 6 deletions
diff --git a/src/paca.mjs b/src/paca.mjs
index ccff1ca..0625d5c 100644
--- a/src/paca.mjs
+++ b/src/paca.mjs
@@ -46,7 +46,9 @@ const escapingStateStep = ({ out, state, context }, char, _index, next) =>
: {
out: out.concat(
char,
- shouldConcat(null, next) ? [{ operator: "concat" }] : [],
+ shouldConcat(null, next)
+ ? [{ operator: "concat" }]
+ : [],
),
state: ConcatStep.ACCEPTING,
context,
@@ -167,8 +169,9 @@ const classStateStep = ({ out, state, context }, char, _index, _next) => {
return {
out: out.concat({
- meta: "class",
- set: context.set,
+ meta: "class",
+ set: context.set,
+ caret: !!context.caret,
}),
state: ConcatStep.ACCEPTING,
context: null,
@@ -495,7 +498,7 @@ const emptyNFA = () => {
};
};
-const baseNFA = (edge, id) => {
+const literal = (edge, id) => {
const startID = id + 0;
const endID = id + 1;
const nextID = id + 2;
@@ -620,6 +623,63 @@ const zeroOrOne = nfa => {
};
};
+const characterClass = ({ set, caret }, id) => {
+ const start = id + 0;
+ const end = id + 1;
+ const nextID = id + 2;
+ const { string, object } = Object.groupBy(set, x => typeof x);
+ const matches = new Set(string);
+ const ranges = Object.fromEntries(object.map(
+ ({ from, to }) => [ from.charCodeAt(0), to.charCodeAt(0) ],
+ ));
+ return {
+ start,
+ end,
+ nextID,
+ nodes: {
+ [start]: {
+ direct: [],
+ transitions: {},
+ meta: {
+ op: caret ? "excludes" : "includes",
+ to: end,
+ matches,
+ ranges,
+ },
+ },
+ [end]: {
+ direct: [],
+ transitions: {},
+ },
+ },
+ };
+};
+
+const wildcard = (_edge, id) => {
+ const start = id + 0;
+ const end = id + 1;
+ const nextID = id + 2;
+ return {
+ start,
+ end,
+ nextID,
+ nodes: {
+ [start]: {
+ direct: [],
+ transitions: {},
+ meta: {
+ op: true,
+ to: end,
+ },
+ },
+ [end]: {
+ direct: [],
+ transitions: {},
+ },
+ },
+ };
+};
+
const OPERATORS_FNS = ({
zeroOrMoreFn = zeroOrMore,
oneOrMoreFn = oneOrMore,
@@ -639,11 +699,21 @@ const OPERATORS_FNS = ({
last(stack),
)),
});
-
const OPERATORS = OPERATORS_FNS();
+const METACHARACTERS_FNS = {
+ "class": characterClass,
+ ".": wildcard,
+};
+
+const baseNFA = (token, id) => (
+ !token.meta
+ ? literal
+ : METACHARACTERS_FNS[token.meta]
+)(token, id);
+
const buildNFAStep = (stack, token) =>
- typeof token === "string"
+ !token.operator
? stack.concat(baseNFA(token, last(stack)?.nextID || 1))
: OPERATORS[token.operator](stack);