summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/paca.mjs36
-rw-r--r--tests/paca.mjs141
2 files changed, 174 insertions, 3 deletions
diff --git a/src/paca.mjs b/src/paca.mjs
index 0625d5c..77a7a4b 100644
--- a/src/paca.mjs
+++ b/src/paca.mjs
@@ -623,14 +623,44 @@ const zeroOrOne = nfa => {
};
};
+const compareRange = (lhs, rhs) =>
+ lhs[0] !== rhs[0] ? lhs[0] - rhs[0] : lhs[1] - rhs[1];
+
+const compressRangeStep = (ranges, [from, to]) => {
+ const curr = last(ranges);
+ if (!curr) {
+ return [[from, to]];
+ }
+ return from <= curr[1]
+ ? butlast(ranges).concat([[curr[0], max(to, curr[1])]])
+ : ranges.concat([[from, to]]);
+};
+
+const compressCharacterRanges = ranges =>
+ ranges.toSorted(compareRange).reduce(compressRangeStep, []);
+
+const inRange = (ranges, char) => {
+ const code = char.charCodeAt(0);
+ const froms = Object.keys(ranges).map(Number).filter(
+ from => from <= code,
+ );
+ return froms.some(from => code <= ranges[from]);
+};
+
const characterClass = ({ set, caret }, id) => {
const start = id + 0;
const end = id + 1;
const nextID = id + 2;
const { string, object } = Object.groupBy(set, x => typeof x);
- const matches = new Set(string);
- const ranges = Object.fromEntries(object.map(
- ({ from, to }) => [ from.charCodeAt(0), to.charCodeAt(0) ],
+ const ranges = Object.fromEntries(
+ compressCharacterRanges((object || []).map(
+ ({ from, to }) => [
+ from.charCodeAt(0), to.charCodeAt(0)
+ ],
+ ),
+ ));
+ const matches = new Set((string || []).filter(
+ char => !inRange(ranges, char),
));
return {
start,
diff --git a/tests/paca.mjs b/tests/paca.mjs
index 06b3c8b..efee446 100644
--- a/tests/paca.mjs
+++ b/tests/paca.mjs
@@ -28,6 +28,10 @@ import {
zeroOrMore,
oneOrMore,
zeroOrOne,
+ compareRange,
+ compressRangeStep,
+ compressCharacterRanges,
+ inRange,
characterClass,
wildcard,
OPERATORS_FNS,
@@ -2052,6 +2056,102 @@ const test_zeroOrOne = t => {
});
};
+const test_compareRange = t => {
+ t.start("compareRange()");
+
+ t.testing("sorts ranges in ascending order", () => {
+ const input = [
+ [3, 5],
+ [1, 3],
+ [1, 5],
+ [1, 2],
+ [2, 3],
+ [2, 1],
+ [5, 3],
+ [5, 1],
+ ];
+ const expected = [
+ [1, 2],
+ [1, 3],
+ [1, 5],
+ [2, 1],
+ [2, 3],
+ [3, 5],
+ [5, 1],
+ [5, 3],
+ ];
+ t.assertEq(input.toSorted(compareRange), expected);
+ });
+};
+
+const test_compressRangeStep = t => {
+ t.start("compressRangeStep()");
+
+ t.testing("concat when not overlapping", () => {
+ t.assertEq(
+ compressRangeStep([[1, 10]], [20, 30]),
+ [[1, 10], [20, 30]],
+ );
+ });
+
+ t.testing("merge ranges when overlapping", () => {
+ t.assertEq(
+ compressRangeStep([[1, 10]], [10, 30]),
+ [[1, 30]],
+ );
+ });
+
+ t.testing("merge with the largest intervals", () => {
+ t.assertEq(
+ compressRangeStep([[1, 10]], [2, 8]),
+ [[1, 10]],
+ );
+ });
+};
+
+const test_compressCharacterRanges = t => {
+ t.start("compressCharacterRanges()");
+
+ t.testing("noop when no overlaps", () => {
+ t.assertEq(
+ compressCharacterRanges([[1, 5], [11, 15], [7, 9]]),
+ [[1, 5], [7, 9], [11, 15]],
+ );
+ });
+
+ t.testing("smash all ranges into a single one", () => {
+ t.assertEq(
+ compressCharacterRanges(
+ [[1, 5], [11, 15], [7, 9], [5, 7], [9, 11]],
+ ),
+ [[1, 15]],
+ );
+ });
+};
+
+const test_inRange = t => {
+ t.start("inRange()");
+
+ t.testing("always false on empty ranges", () => {
+ t.assertEq(inRange({}, "a"), false);
+ });
+
+ t.testing("false when outside range", () => {
+ t.assertEq(inRange({ 10: 11 }, "a"), false);
+ t.assertEq(inRange({ 1: 96 }, "a"), false);
+ t.assertEq(inRange({ 96: 96 }, "a"), false);
+ t.assertEq(inRange({ 98: 98 }, "a"), false);
+ t.assertEq(inRange({ 98: 127 }, "a"), false);
+ });
+
+ t.testing("true when within range", () => {
+ t.assertEq(inRange({ 48: 57 }, "0"), true);
+ t.assertEq(inRange({ 65: 90 }, "X"), true);
+ t.assertEq(inRange({ 97: 122 }, "a"), true);
+ t.assertEq(inRange({ 97: 97 }, "a"), true);
+ });
+};
+
const test_characterClass = t => {
t.start("characterClass()");
@@ -2126,6 +2226,43 @@ const test_characterClass = t => {
],
}, 7), expected);
});
+
+ t.testing("overlapping ranges collapse into a single one", () => {
+ const expected = {
+ start: 3,
+ end: 4,
+ nextID: 5,
+ nodes: {
+ 3: {
+ direct: [],
+ transitions: {},
+ meta: {
+ op: "excludes",
+ to: 4,
+ matches: new Set(["-"]),
+ ranges: {
+ 97: 122,
+ },
+ },
+ },
+ 4: {
+ direct: [],
+ transitions: {},
+ },
+ },
+ };
+ t.assertEq(characterClass({
+ set: [
+ "-", "a", "b", "c",
+ { from: "f", to: "p" },
+ { from: "t", to: "z" },
+ "a", "d", "f", "x", "y", "z",
+ { from: "p", to: "t" },
+ { from: "a", to: "f" },
+ ],
+ caret: true,
+ }, 3), expected);
+ });
};
const test_wildcard = t => {
@@ -3311,6 +3448,10 @@ runTests([
test_zeroOrMore,
test_oneOrMore,
test_zeroOrOne,
+ test_compareRange,
+ test_compressRangeStep,
+ test_compressCharacterRanges,
+ test_inRange,
test_characterClass,
test_wildcard,
test_OPERATORS_FNS,