diff options
Diffstat (limited to '')
| -rw-r--r-- | src/paca.mjs | 36 | ||||
| -rw-r--r-- | tests/paca.mjs | 141 |
2 files changed, 174 insertions, 3 deletions
diff --git a/src/paca.mjs b/src/paca.mjs index 0625d5c..77a7a4b 100644 --- a/src/paca.mjs +++ b/src/paca.mjs @@ -623,14 +623,44 @@ const zeroOrOne = nfa => { }; }; +const compareRange = (lhs, rhs) => + lhs[0] !== rhs[0] ? lhs[0] - rhs[0] : lhs[1] - rhs[1]; + +const compressRangeStep = (ranges, [from, to]) => { + const curr = last(ranges); + if (!curr) { + return [[from, to]]; + } + return from <= curr[1] + ? butlast(ranges).concat([[curr[0], max(to, curr[1])]]) + : ranges.concat([[from, to]]); +}; + +const compressCharacterRanges = ranges => + ranges.toSorted(compareRange).reduce(compressRangeStep, []); + +const inRange = (ranges, char) => { + const code = char.charCodeAt(0); + const froms = Object.keys(ranges).map(Number).filter( + from => from <= code, + ); + return froms.some(from => code <= ranges[from]); +}; + const characterClass = ({ set, caret }, id) => { const start = id + 0; const end = id + 1; const nextID = id + 2; const { string, object } = Object.groupBy(set, x => typeof x); - const matches = new Set(string); - const ranges = Object.fromEntries(object.map( - ({ from, to }) => [ from.charCodeAt(0), to.charCodeAt(0) ], + const ranges = Object.fromEntries( + compressCharacterRanges((object || []).map( + ({ from, to }) => [ + from.charCodeAt(0), to.charCodeAt(0) + ], + ), + )); + const matches = new Set((string || []).filter( + char => !inRange(ranges, char), )); return { start, diff --git a/tests/paca.mjs b/tests/paca.mjs index 06b3c8b..efee446 100644 --- a/tests/paca.mjs +++ b/tests/paca.mjs @@ -28,6 +28,10 @@ import { zeroOrMore, oneOrMore, zeroOrOne, + compareRange, + compressRangeStep, + compressCharacterRanges, + inRange, characterClass, wildcard, OPERATORS_FNS, @@ -2052,6 +2056,102 @@ const test_zeroOrOne = t => { }); }; +const test_compareRange = t => { + t.start("compareRange()"); + + t.testing("sorts ranges in ascending order", () => { + const input = [ + [3, 5], + [1, 3], + [1, 5], + [1, 2], + [2, 3], + [2, 1], + [5, 3], + [5, 1], + ]; + const expected = [ + [1, 2], + [1, 3], + [1, 5], + [2, 1], + [2, 3], + [3, 5], + [5, 1], + [5, 3], + ]; + t.assertEq(input.toSorted(compareRange), expected); + }); +}; + +const test_compressRangeStep = t => { + t.start("compressRangeStep()"); + + t.testing("concat when not overlapping", () => { + t.assertEq( + compressRangeStep([[1, 10]], [20, 30]), + [[1, 10], [20, 30]], + ); + }); + + t.testing("merge ranges when overlapping", () => { + t.assertEq( + compressRangeStep([[1, 10]], [10, 30]), + [[1, 30]], + ); + }); + + t.testing("merge with the largest intervals", () => { + t.assertEq( + compressRangeStep([[1, 10]], [2, 8]), + [[1, 10]], + ); + }); +}; + +const test_compressCharacterRanges = t => { + t.start("compressCharacterRanges()"); + + t.testing("noop when no overlaps", () => { + t.assertEq( + compressCharacterRanges([[1, 5], [11, 15], [7, 9]]), + [[1, 5], [7, 9], [11, 15]], + ); + }); + + t.testing("smash all ranges into a single one", () => { + t.assertEq( + compressCharacterRanges( + [[1, 5], [11, 15], [7, 9], [5, 7], [9, 11]], + ), + [[1, 15]], + ); + }); +}; + +const test_inRange = t => { + t.start("inRange()"); + + t.testing("always false on empty ranges", () => { + t.assertEq(inRange({}, "a"), false); + }); + + t.testing("false when outside range", () => { + t.assertEq(inRange({ 10: 11 }, "a"), false); + t.assertEq(inRange({ 1: 96 }, "a"), false); + t.assertEq(inRange({ 96: 96 }, "a"), false); + t.assertEq(inRange({ 98: 98 }, "a"), false); + t.assertEq(inRange({ 98: 127 }, "a"), false); + }); + + t.testing("true when within range", () => { + t.assertEq(inRange({ 48: 57 }, "0"), true); + t.assertEq(inRange({ 65: 90 }, "X"), true); + t.assertEq(inRange({ 97: 122 }, "a"), true); + t.assertEq(inRange({ 97: 97 }, "a"), true); + }); +}; + const test_characterClass = t => { t.start("characterClass()"); @@ -2126,6 +2226,43 @@ const test_characterClass = t => { ], }, 7), expected); }); + + t.testing("overlapping ranges collapse into a single one", () => { + const expected = { + start: 3, + end: 4, + nextID: 5, + nodes: { + 3: { + direct: [], + transitions: {}, + meta: { + op: "excludes", + to: 4, + matches: new Set(["-"]), + ranges: { + 97: 122, + }, + }, + }, + 4: { + direct: [], + transitions: {}, + }, + }, + }; + t.assertEq(characterClass({ + set: [ + "-", "a", "b", "c", + { from: "f", to: "p" }, + { from: "t", to: "z" }, + "a", "d", "f", "x", "y", "z", + { from: "p", to: "t" }, + { from: "a", to: "f" }, + ], + caret: true, + }, 3), expected); + }); }; const test_wildcard = t => { @@ -3311,6 +3448,10 @@ runTests([ test_zeroOrMore, test_oneOrMore, test_zeroOrOne, + test_compareRange, + test_compressRangeStep, + test_compressCharacterRanges, + test_inRange, test_characterClass, test_wildcard, test_OPERATORS_FNS, |
