summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEuAndreh <eu@euandre.org>2025-08-01 15:01:58 -0300
committerEuAndreh <eu@euandre.org>2025-08-01 15:01:58 -0300
commitba385ba7b748d21f329a6ba9509532d86270de7c (patch)
tree3a1b8971fa2afc4f4f079fa3bc6d24892f163d6a
parentMakefile: Install like a Node.js package (diff)
downloadpaca-main.tar.gz
paca-main.tar.xz
src/paca.mjs: Improve implementation of interpretMetacharactersHEADmain
-rw-r--r--src/paca.mjs61
-rw-r--r--tests/paca.mjs200
2 files changed, 220 insertions, 41 deletions
diff --git a/src/paca.mjs b/src/paca.mjs
index 38100ff..4f50db4 100644
--- a/src/paca.mjs
+++ b/src/paca.mjs
@@ -5,8 +5,9 @@ import {
-export class SyntaxError extends Error {}
-export class ValueError extends Error {}
+export class SyntaxError extends Error {}
+export class ValueError extends Error {}
+export class InternalError extends Error {}
const ConcatStep = {
ACCEPTING: "accepting",
@@ -795,28 +796,56 @@ const allDirects = (stateID, nodes) =>
).flat(),
)].toSorted();
-const interpretMetacharacter = ({ op, to, matches, ranges }, char, index, nodes) => {
+const VALID_OPS = new Set([
+ "caret", "dollar", true, "includes", "excludes",
+]);
+
+const isValidOp = op =>
+ VALID_OPS.has(op);
+
+const interpretMetacharacter = (
+ { op, to, matches, ranges },
+ char,
+ index,
+ nodes,
+) => {
if (!op) {
- return null;
+ return [];
+ }
+
+ if (!isValidOp(op)) {
+ throw new InternalError("Unsupported op: " + op);
+ }
+
+ if (op === "caret") {
+ return index === 0 ? searchNFAStep(nodes)(
+ allDirects(to, nodes),
+ char,
+ index,
+ ) : [];
+ }
+
+ if (op === "dollar") {
+ return [];
}
if (op === true) {
- return to;
+ return [to];
}
if (matches.has(char)) {
- return op === "includes" && to;
+ return op === "includes" ? [to] : [];
}
if (inRange(ranges, char)) {
- return op === "includes" && to;
+ return op === "includes" ? [to] : [];
}
- return op === "excludes" && to;
+ return op === "excludes" ? [to] : [];
};
const performTransition = (nodes, char, index) => id =>
- nodes[id].transitions[char] ||
+ (nodes[id].transitions[char] && [nodes[id].transitions[char]]) ||
interpretMetacharacter(nodes[id].meta || {}, char, index, nodes);
const searchNFAStep = nodes => (directStates, char, index) =>
@@ -824,16 +853,20 @@ const searchNFAStep = nodes => (directStates, char, index) =>
directStates
.map(performTransition(nodes, char, index))
.flat()
- .filter(x => !!x)
.map(id => allDirects(id, nodes))
.flat()
)].toSorted();
+const anchorDollar = (nfa, ids) =>
+ nfa.nodes[last(ids)]?.meta?.op === "dollar"
+ ? allDirects(nfa.nodes[last(ids)].meta.to, nfa.nodes)
+ : ids;
+
const searchNFAFn = (nfa, string) =>
- explode(string).reduce(
+ anchorDollar(nfa, explode(string).reduce(
searchNFAStep(nfa.nodes),
allDirects(nfa.start, nfa.nodes),
- );
+ ));
const searchNFA = (nfa, string) =>
searchNFAFn(nfa, string).includes(nfa.end);
@@ -852,7 +885,9 @@ const collectTransitions = (nfaNodes, states) => {
id => Object.entries(nfaNodes[id].transitions),
).filter(t => t.length !== 0);
if (!transitions.every(t => t.length === 1)) {
- throw new Error("unexpected transitions with more than 1 key");
+ throw new InternalError(
+ "unexpected transitions with more than 1 key",
+ );
}
const groupedTransitions = Object.groupBy(
transitions.map(t => t[0]),
diff --git a/tests/paca.mjs b/tests/paca.mjs
index 7b20e88..378fba3 100644
--- a/tests/paca.mjs
+++ b/tests/paca.mjs
@@ -3,6 +3,7 @@ import { explode, reduced, reductions, runTests } from "sjs";
import {
SyntaxError,
ValueError,
+ InternalError,
ConcatStep,
shouldConcat,
isOperator,
@@ -55,6 +56,7 @@ import {
searchDFAStep,
searchDFAFn,
searchDFA,
+ parse,
compileNFA,
compileDFA,
compile,
@@ -1751,6 +1753,31 @@ const test_toPostfix = t => {
"c",
concat,
],
+ }, {
+ in: [
+ { meta: "^" },
+ "a",
+ concat,
+ wildcard,
+ { operator: "*" },
+ concat,
+ "$",
+ concat,
+ "b",
+ { operator: "range", from: 3, to: 5 },
+ ],
+ expected: [
+ { meta: "^" },
+ "a",
+ wildcard,
+ { operator: "*" },
+ concat,
+ "$",
+ concat,
+ "b",
+ { operator: "range", from: 3, to: 5 },
+ concat,
+ ],
}];
for (const test of table) {
t.assertEq(
@@ -2776,7 +2803,20 @@ const test_interpretMetacharacter = t => {
t.start("interpretMetacharacter()");
t.testing("null on missing meta attribute", () => {
- t.assertEq(interpretMetacharacter({}, "IGNORED"), null);
+ t.assertEq(interpretMetacharacter({}, "IGNORED"), []);
+ });
+
+ t.testing("error on invalid op", () => {
+ try {
+ interpretMetacharacter(
+ { op: "OP" }, null, null, null, null,
+ );
+ t.assertEq(true, false);
+ } catch (e) {
+ const message = "Unsupported op: OP";
+ t.assertEq(e.message, message);
+ t.assertEq(e instanceof InternalError, true);
+ }
});
t.testing("wildcard always matches", () => {
@@ -2784,7 +2824,7 @@ const test_interpretMetacharacter = t => {
op: true,
to: "ret",
};
- t.assertEq(interpretMetacharacter(input, "IGNORED"), "ret");
+ t.assertEq(interpretMetacharacter(input, "IGNORED"), ["ret"]);
});
t.testing("matches returns depending on the op", () => {
@@ -2797,8 +2837,8 @@ const test_interpretMetacharacter = t => {
...input1,
op: "excludes",
};
- t.assertEq(interpretMetacharacter(input1, "a"), "ret");
- t.assertEq(interpretMetacharacter(input2, "a"), false);
+ t.assertEq(interpretMetacharacter(input1, "a"), ["ret"]);
+ t.assertEq(interpretMetacharacter(input2, "a"), []);
});
t.testing("inRange returns depending on op", () => {
@@ -2812,8 +2852,8 @@ const test_interpretMetacharacter = t => {
...input1,
op: "excludes",
};
- t.assertEq(interpretMetacharacter(input1, "x"), "ret");
- t.assertEq(interpretMetacharacter(input2, "x"), false);
+ t.assertEq(interpretMetacharacter(input1, "x"), ["ret"]);
+ t.assertEq(interpretMetacharacter(input2, "x"), []);
});
t.testing("when nothing matches, we look at the op again", () => {
@@ -2827,16 +2867,45 @@ const test_interpretMetacharacter = t => {
...input1,
op: "excludes",
};
- t.assertEq(interpretMetacharacter(input1, "a"), false);
- t.assertEq(interpretMetacharacter(input2, "a"), "ret");
+ t.assertEq(interpretMetacharacter(input1, "a"), []);
+ t.assertEq(interpretMetacharacter(input2, "a"), ["ret"]);
});
t.testing("caret only matches when at the start of the string", () => {
- // FIXME
- });
-
- t.testing("dollar matches end of string", () => {
- // FIXME
+ const input = {
+ op: "caret",
+ to: 3,
+ };
+ const nodes = {
+ 3: {
+ direct: [ 4 ],
+ transitions: {},
+ },
+ 4: {
+ direct: [ 5 ],
+ transitions: {},
+ },
+ 5: {
+ direct: [],
+ transitions: { a: 6 },
+ },
+ 6: {
+ direct: [ 7 ],
+ transitions: {},
+ },
+ 7: {
+ direct: [],
+ transitions: { x: null },
+ },
+ };
+ t.assertEq(
+ interpretMetacharacter(input, "a", 1, nodes),
+ [],
+ );
+ t.assertEq(
+ interpretMetacharacter(input, "a", 0, nodes, [ "a" ]),
+ [ 7 ],
+ );
});
};
@@ -2852,8 +2921,8 @@ const test_performTransition = t => {
transitions: {},
},
};
- t.assertEq(performTransition(nodes, "a")(1), 2);
- t.assertEq(performTransition(nodes, "a")(2), null);
+ t.assertEq(performTransition(nodes, "a")(1), [2]);
+ t.assertEq(performTransition(nodes, "a")(2), []);
});
t.testing("or execute the meta attribute", () => {
@@ -2869,8 +2938,8 @@ const test_performTransition = t => {
transitions: {},
},
};
- t.assertEq(performTransition(nodes, "a")(1), 2);
- t.assertEq(performTransition(nodes, "a")(2), null);
+ t.assertEq(performTransition(nodes, "a")(1), [2]);
+ t.assertEq(performTransition(nodes, "a")(2), []);
});
};
@@ -2878,7 +2947,7 @@ const test_searchNFAStep = t => {
t.start("searchNFAStep()");
t.testing("empty on empty input", () => {
- t.assertEq(searchNFAStep({})([], null), []);
+ t.assertEq(searchNFAStep({})([], null, null, []), []);
});
t.testing("empty on empty transitions", () => {
@@ -2887,7 +2956,7 @@ const test_searchNFAStep = t => {
2: { transitions: {} },
3: { transitions: {} },
};
- t.assertEq(searchNFAStep(nodes)([1, 2, 3], "a"), []);
+ t.assertEq(searchNFAStep(nodes)([1, 2, 3], "a", null, []), []);
});
t.testing("empty on non-matching transitions", () => {
@@ -2896,7 +2965,7 @@ const test_searchNFAStep = t => {
2: { transitions: { c: 3 } },
3: { transitions: { d: 1 } },
};
- t.assertEq(searchNFAStep(nodes)([1, 2, 3], "a"), []);
+ t.assertEq(searchNFAStep(nodes)([1, 2, 3], "a", null, []), []);
});
t.testing("leaves given by allDirects() otherwise", () => {
@@ -2934,13 +3003,33 @@ const test_searchNFAStep = t => {
transitions: {},
},
};
- t.assertEq(searchNFAStep(nodes)([1], "a"), [5]);
- t.assertEq(searchNFAStep(nodes)([1], "b"), []);
- t.assertEq(searchNFAStep(nodes)([3, 2, 1], "a"), [5, 6, 7, 8]);
- t.assertEq(searchNFAStep(nodes)([5, 6, 7, 8], "a"), [2]);
- t.assertEq(searchNFAStep(nodes)([5, 6, 7, 8], "b"), [7]);
- t.assertEq(searchNFAStep(nodes)([5, 6, 7, 8], "c"), [8]);
- t.assertEq(searchNFAStep(nodes)([5, 6, 7, 8], "d"), []);
+ const fn = searchNFAStep(nodes);
+ t.assertEq(fn([1], "a", null, []), [5]);
+ t.assertEq(fn([1], "b", null, []), []);
+ t.assertEq(fn([3, 2, 1], "a", null, []), [5, 6, 7, 8]);
+ t.assertEq(fn([5, 6, 7, 8], "a", null, []), [2]);
+ t.assertEq(fn([5, 6, 7, 8], "b", null, []), [7]);
+ t.assertEq(fn([5, 6, 7, 8], "c", null, []), [8]);
+ t.assertEq(fn([5, 6, 7, 8], "d", null, []), []);
+ });
+
+ t.testing("recursive call to self via interpretMetacharacter", () => {
+ const regex = "^[behilos]*$";
+ const nfa = buildNFA(toPostfix(tokenizeRegex(explode(regex))));
+ const str = "sebo";
+ const fn = searchNFAStep(nfa.nodes, str.length);
+ t.assertEq(fn([1], str[0], 0), [3, 7]);
+ t.assertEq(fn([1], str[0], 1), []);
+ t.assertEq(fn([1], " ", 0), []);
+ t.assertEq(fn([3], str[0], 1), [3, 7]);
+ t.assertEq(fn([3], " ", 1), []);
+ t.assertEq(fn([3, 7], str[3], 4), [3, 7]);
+ t.assertEq(fn([3, 7], " ", 3), []);
+
+ t.assertEq(fn([1], str[0], 0), [3, 7]);
+ t.assertEq(fn([3, 7], str[1], 1), [3, 7]);
+ t.assertEq(fn([3, 7], str[2], 2), [3, 7]);
+ t.assertEq(fn([3, 7], str[3], 3), [3, 7]);
});
};
@@ -2972,9 +3061,10 @@ const test_searchNFA = t => {
});
t.testing("regex with metacharacters", () => {
- const regex = "^[behilos]*";
+ const regex = "^[behilos]*$";
const nfa = buildNFA(toPostfix(tokenizeRegex(explode(regex))));
t.assertEq(searchNFA(nfa, "helios"), true);
+ t.assertEq(searchNFA(nfa, " helios"), false);
t.assertEq(searchNFA(nfa, "helios "), false);
t.assertEq(searchNFA(nfa, "abc"), false);
});
@@ -3377,6 +3467,59 @@ const test_searchDFA = t => {
});
};
+const test_parse = t => {
+ t.start("parse()");
+
+ t.testing("regex table", () => {
+ const table = [{
+ in: "",
+ expected: [],
+ }, {
+ in: "a",
+ expected: ["a"],
+ }, {
+ in: "a|b",
+ expected: ["a", "b", { operator: "|" }],
+ }, {
+ in: "(a|b)*a?(a|b)+",
+ expected: [
+ "a",
+ "b",
+ { operator: "|" },
+ { operator: "*" },
+ "a",
+ { operator: "?" },
+ { operator: "concat" },
+ "a",
+ "b",
+ { operator: "|" },
+ { operator: "+" },
+ { operator: "concat" },
+ ],
+ }, {
+ in: "^[behilos]*$",
+ expected: [
+ { meta: "^" },
+ {
+ meta: "class",
+ set: [
+ "b", "e", "h", "i",
+ "l", "o", "s",
+ ],
+ caret: false,
+ },
+ { operator: "*" },
+ { operator: "concat" }, // FIXME
+ { meta: "$" },
+ { operator: "concat" }, // FIXME
+ ],
+ }];
+ for (const tcase of table) {
+ t.assertEq(parse(tcase.in), tcase.expected);
+ }
+ });
+};
+
const test_compileNFA = t => {
t.start("compileNFA()");
@@ -3677,6 +3820,7 @@ runTests([
test_searchDFAStep,
test_searchDFAFn,
test_searchDFA,
+ test_parse,
test_compileNFA,
test_compileDFA,
test_compile,