diff options
Diffstat (limited to '')
| -rw-r--r-- | src/paca.mjs | 128 | ||||
| -rw-r--r-- | tests/paca.mjs | 226 |
2 files changed, 349 insertions, 5 deletions
diff --git a/src/paca.mjs b/src/paca.mjs index 0b64a87..b92bcdc 100644 --- a/src/paca.mjs +++ b/src/paca.mjs @@ -1,5 +1,6 @@ import { - butlast, explode, isNumeric, last, mapValues, max, reduce, reduced, + butlast, dissoc, explode, isNumeric, last, mapValues, max, reduce, + reduced, } from "sjs"; @@ -11,6 +12,7 @@ const ConcatStep = { ACCEPTING: "accepting", ESCAPING: "escaping", RANGE: "range", + CLASS: "class", }; const numFromDigits = digits => @@ -107,9 +109,130 @@ const rangeStateStep = ({ out, state, context }, char, _index, _next) => { }; }; +const classStateStep = ({ out, state, context }, char, _index, _next) => { + if (context.escaping) { + return { + out, + state, + context: dissoc({ + ...context, + set: context.set.concat(char), + }, "escaping"), + }; + } + + if (char === "]") { + if (context.range.where === "to") { + return reduced({ + out, + state, + context, + error: new SyntaxError( + "unfinished character class range", + ), + }); + } + + if (context.set.length === 0) { + return reduced({ + out, + state, + context, + error: new ValueError("empty character class"), + }); + } + + return { + out: out.concat({ + operator: "class", + set: context.set, + }), + state: ConcatStep.ACCEPTING, + context: null, + }; + } + + if (char === "\\") { + return { + out, + state, + context: { + ...context, + escaping: true, + }, + }; + } + + if (context.range.where === "to") { + const from = context.range.from; + const to = char; + + if (from.charCodeAt(0) > to.charCodeAt(0)) { + return reduced({ + out, + state, + context, + error: new ValueError( + "bad class range values: " + + `[${from}-${to}]`, + ), + }); + } + + return { + out, + state, + context: { + ...context, + set: context.set.concat({ from, to }), + range: { + from: null, + where: "from", + }, + }, + }; + } + + if (char === "-" && context.set.length !== 0) { + return { + out, + state, + context: { + ...context, + set: butlast(context.set), + range: { + from: last(context.set), + where: "to", + }, + }, + }; + } + + if (char === "^" && context.set.length === 0) { + return { + out, + state, + context: { + ...context, + caret: true, + }, + }; + } + + return { + out, + state, + context: { + ...context, + set: context.set.concat(char), + }, + }; +}; + const STATE_FNS = { [ConcatStep.ESCAPING]: escapingStateStep, [ConcatStep.RANGE ]: rangeStateStep, + [ConcatStep.CLASS ]: classStateStep, }; const TRANSITION_FNS = { @@ -133,8 +256,7 @@ const TRANSITION_FNS = { context: { set: [], range: { - from: [], - to: [], + from: null, where: "from", }, }, diff --git a/tests/paca.mjs b/tests/paca.mjs index 739fe80..e9c3a6b 100644 --- a/tests/paca.mjs +++ b/tests/paca.mjs @@ -6,6 +6,7 @@ import { ConcatStep, numFromDigits, rangeStateStep, + classStateStep, TRANSITION_FNS, shouldConcat, isOperator, @@ -214,6 +215,227 @@ const test_rangeStateStep = t => { }); }; +const test_classStateStep = t => { + t.start("classStateStep()"); + + t.testing("error when range is unfinished", () => { + const { value: { error }} = classStateStep( + { context: { range: { where: "to" }}}, + "]", + null, + null, + ); + t.assertEq(error.message, "unfinished character class range"); + t.assertEq(error instanceof SyntaxError, true); + }); + + t.testing("error when class is empty", () => { + const { value: { error }} = classStateStep( + { context: { range: {}, set: [] }}, + "]", + null, + null, + ); + t.assertEq(error.message, "empty character class"); + t.assertEq(error instanceof ValueError, true); + }); + + t.testing("OK when class in non-empty", () => { + const given = classStateStep( + { + out: [ 1, 2, 3 ], + context: { + range: {}, + set: [ 4, 5, 6 ], + }, + }, + "]", + null, + null, + ); + const expected = { + out: [ 1, 2, 3, { + operator: "class", + set: [ 4, 5, 6 ], + }], + state: "accepting", + context: null, + }; + t.assertEq(given, expected); + }); + + t.testing("error on descending range", () => { + const { value: { error }} = classStateStep( + { context: { range: { from: "c", where: "to" }}}, + "b", + null, + null, + ); + const message = "bad class range values: [c-b]"; + t.assertEq(error.message, message); + t.assertEq(error instanceof ValueError, true); + }); + + t.testing("OK when adding ending to range", () => { + const given = classStateStep( + { + context: { + range: { + from: "a", + where: "to", + }, + set: [ "a", "b" ], + x: 1, + }, + }, + "z", + null, + null, + ); + const expected = { + out: undefined, + state: undefined, + context: { + range: { + from: null, + where: "from", + }, + set: [ "a", "b", { from: "a", to: "z" }], + x: 1, + }, + }; + t.assertEq(given, expected); + }); + + t.testing("a backslash enters escaping state", () => { + const given = classStateStep( + { context: { what: "ever" }}, + "\\", + null, + null, + ); + const expected = { + out: undefined, + state: undefined, + context: { + what: "ever", + escaping: true, + }, + }; + t.assertEq(given, expected); + }); + + t.testing("when escaping, special chars get added to the set", () => { + const given = classStateStep( + { context: { set: [ "a" ], escaping: true }}, + "]", + null, + null, + ); + const expected = { + out: undefined, + state: undefined, + context: { set: [ "a", "]" ] }, + }; + t.assertEq(given, expected); + }); + + t.testing("a hyphen changes the last char as a range start", () => { + const given = classStateStep( + { + context: { + range: "IGNORED", + set: [ "0" ], + x: 1, + }, + }, + "-", + null, + null, + ); + const expected = { + out: undefined, + state: undefined, + context: { + range: { + from: "0", + where: "to", + }, + set: [], + x: 1, + }, + }; + t.assertEq(given, expected); + }); + + t.testing("hyphen as the first char is taken literally", () => { + const given = classStateStep( + { + context: { + range: {}, + set: [], + x: 1, + }, + }, + "-", + null, + null, + ); + const expected = { + out: undefined, + state: undefined, + context: { + range: {}, + set: [ "-" ], + x: 1, + }, + }; + t.assertEq(given, expected); + }); + + t.testing("caret as the first char toggles the boolean", () => { + const given = classStateStep( + { context: { x: 1, set: [], range: {}}}, + "^", + null, + null, + ); + const expected = { + out: undefined, + state: undefined, + context: { + x: 1, + set: [], + range: {}, + caret: true, + }, + }; + t.assertEq(given, expected); + }); + + t.testing("other chars are just added to the set", () => { + const given = classStateStep( + { context: { x: 1, set: [], range: {}}}, + "_", + null, + null, + ); + const expected = { + out: undefined, + state: undefined, + context: { + x: 1, + set: [ "_" ], + range: {}, + }, + }; + t.assertEq(given, expected); + }); + + t.testing("caret as not the first char is taken literally", () => { + }); +}; + const test_TRANSITION_FNS = t => { t.start("TRANSITION_FNS"); @@ -253,8 +475,7 @@ const test_TRANSITION_FNS = t => { context: { set: [], range: { - from: [], - to: [], + from: null, where: "from", }, }, @@ -2558,6 +2779,7 @@ const test_compile = t => { runTests([ test_numFromDigits, test_rangeStateStep, + test_classStateStep, test_TRANSITION_FNS, test_shouldConcat, test_isOperator, |
