summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/paca.mjs128
-rw-r--r--tests/paca.mjs226
2 files changed, 349 insertions, 5 deletions
diff --git a/src/paca.mjs b/src/paca.mjs
index 0b64a87..b92bcdc 100644
--- a/src/paca.mjs
+++ b/src/paca.mjs
@@ -1,5 +1,6 @@
import {
- butlast, explode, isNumeric, last, mapValues, max, reduce, reduced,
+ butlast, dissoc, explode, isNumeric, last, mapValues, max, reduce,
+ reduced,
} from "sjs";
@@ -11,6 +12,7 @@ const ConcatStep = {
ACCEPTING: "accepting",
ESCAPING: "escaping",
RANGE: "range",
+ CLASS: "class",
};
const numFromDigits = digits =>
@@ -107,9 +109,130 @@ const rangeStateStep = ({ out, state, context }, char, _index, _next) => {
};
};
+const classStateStep = ({ out, state, context }, char, _index, _next) => {
+ if (context.escaping) {
+ return {
+ out,
+ state,
+ context: dissoc({
+ ...context,
+ set: context.set.concat(char),
+ }, "escaping"),
+ };
+ }
+
+ if (char === "]") {
+ if (context.range.where === "to") {
+ return reduced({
+ out,
+ state,
+ context,
+ error: new SyntaxError(
+ "unfinished character class range",
+ ),
+ });
+ }
+
+ if (context.set.length === 0) {
+ return reduced({
+ out,
+ state,
+ context,
+ error: new ValueError("empty character class"),
+ });
+ }
+
+ return {
+ out: out.concat({
+ operator: "class",
+ set: context.set,
+ }),
+ state: ConcatStep.ACCEPTING,
+ context: null,
+ };
+ }
+
+ if (char === "\\") {
+ return {
+ out,
+ state,
+ context: {
+ ...context,
+ escaping: true,
+ },
+ };
+ }
+
+ if (context.range.where === "to") {
+ const from = context.range.from;
+ const to = char;
+
+ if (from.charCodeAt(0) > to.charCodeAt(0)) {
+ return reduced({
+ out,
+ state,
+ context,
+ error: new ValueError(
+ "bad class range values: " +
+ `[${from}-${to}]`,
+ ),
+ });
+ }
+
+ return {
+ out,
+ state,
+ context: {
+ ...context,
+ set: context.set.concat({ from, to }),
+ range: {
+ from: null,
+ where: "from",
+ },
+ },
+ };
+ }
+
+ if (char === "-" && context.set.length !== 0) {
+ return {
+ out,
+ state,
+ context: {
+ ...context,
+ set: butlast(context.set),
+ range: {
+ from: last(context.set),
+ where: "to",
+ },
+ },
+ };
+ }
+
+ if (char === "^" && context.set.length === 0) {
+ return {
+ out,
+ state,
+ context: {
+ ...context,
+ caret: true,
+ },
+ };
+ }
+
+ return {
+ out,
+ state,
+ context: {
+ ...context,
+ set: context.set.concat(char),
+ },
+ };
+};
+
const STATE_FNS = {
[ConcatStep.ESCAPING]: escapingStateStep,
[ConcatStep.RANGE ]: rangeStateStep,
+ [ConcatStep.CLASS ]: classStateStep,
};
const TRANSITION_FNS = {
@@ -133,8 +256,7 @@ const TRANSITION_FNS = {
context: {
set: [],
range: {
- from: [],
- to: [],
+ from: null,
where: "from",
},
},
diff --git a/tests/paca.mjs b/tests/paca.mjs
index 739fe80..e9c3a6b 100644
--- a/tests/paca.mjs
+++ b/tests/paca.mjs
@@ -6,6 +6,7 @@ import {
ConcatStep,
numFromDigits,
rangeStateStep,
+ classStateStep,
TRANSITION_FNS,
shouldConcat,
isOperator,
@@ -214,6 +215,227 @@ const test_rangeStateStep = t => {
});
};
+const test_classStateStep = t => {
+ t.start("classStateStep()");
+
+ t.testing("error when range is unfinished", () => {
+ const { value: { error }} = classStateStep(
+ { context: { range: { where: "to" }}},
+ "]",
+ null,
+ null,
+ );
+ t.assertEq(error.message, "unfinished character class range");
+ t.assertEq(error instanceof SyntaxError, true);
+ });
+
+ t.testing("error when class is empty", () => {
+ const { value: { error }} = classStateStep(
+ { context: { range: {}, set: [] }},
+ "]",
+ null,
+ null,
+ );
+ t.assertEq(error.message, "empty character class");
+ t.assertEq(error instanceof ValueError, true);
+ });
+
+ t.testing("OK when class in non-empty", () => {
+ const given = classStateStep(
+ {
+ out: [ 1, 2, 3 ],
+ context: {
+ range: {},
+ set: [ 4, 5, 6 ],
+ },
+ },
+ "]",
+ null,
+ null,
+ );
+ const expected = {
+ out: [ 1, 2, 3, {
+ operator: "class",
+ set: [ 4, 5, 6 ],
+ }],
+ state: "accepting",
+ context: null,
+ };
+ t.assertEq(given, expected);
+ });
+
+ t.testing("error on descending range", () => {
+ const { value: { error }} = classStateStep(
+ { context: { range: { from: "c", where: "to" }}},
+ "b",
+ null,
+ null,
+ );
+ const message = "bad class range values: [c-b]";
+ t.assertEq(error.message, message);
+ t.assertEq(error instanceof ValueError, true);
+ });
+
+ t.testing("OK when adding ending to range", () => {
+ const given = classStateStep(
+ {
+ context: {
+ range: {
+ from: "a",
+ where: "to",
+ },
+ set: [ "a", "b" ],
+ x: 1,
+ },
+ },
+ "z",
+ null,
+ null,
+ );
+ const expected = {
+ out: undefined,
+ state: undefined,
+ context: {
+ range: {
+ from: null,
+ where: "from",
+ },
+ set: [ "a", "b", { from: "a", to: "z" }],
+ x: 1,
+ },
+ };
+ t.assertEq(given, expected);
+ });
+
+ t.testing("a backslash enters escaping state", () => {
+ const given = classStateStep(
+ { context: { what: "ever" }},
+ "\\",
+ null,
+ null,
+ );
+ const expected = {
+ out: undefined,
+ state: undefined,
+ context: {
+ what: "ever",
+ escaping: true,
+ },
+ };
+ t.assertEq(given, expected);
+ });
+
+ t.testing("when escaping, special chars get added to the set", () => {
+ const given = classStateStep(
+ { context: { set: [ "a" ], escaping: true }},
+ "]",
+ null,
+ null,
+ );
+ const expected = {
+ out: undefined,
+ state: undefined,
+ context: { set: [ "a", "]" ] },
+ };
+ t.assertEq(given, expected);
+ });
+
+ t.testing("a hyphen changes the last char as a range start", () => {
+ const given = classStateStep(
+ {
+ context: {
+ range: "IGNORED",
+ set: [ "0" ],
+ x: 1,
+ },
+ },
+ "-",
+ null,
+ null,
+ );
+ const expected = {
+ out: undefined,
+ state: undefined,
+ context: {
+ range: {
+ from: "0",
+ where: "to",
+ },
+ set: [],
+ x: 1,
+ },
+ };
+ t.assertEq(given, expected);
+ });
+
+ t.testing("hyphen as the first char is taken literally", () => {
+ const given = classStateStep(
+ {
+ context: {
+ range: {},
+ set: [],
+ x: 1,
+ },
+ },
+ "-",
+ null,
+ null,
+ );
+ const expected = {
+ out: undefined,
+ state: undefined,
+ context: {
+ range: {},
+ set: [ "-" ],
+ x: 1,
+ },
+ };
+ t.assertEq(given, expected);
+ });
+
+ t.testing("caret as the first char toggles the boolean", () => {
+ const given = classStateStep(
+ { context: { x: 1, set: [], range: {}}},
+ "^",
+ null,
+ null,
+ );
+ const expected = {
+ out: undefined,
+ state: undefined,
+ context: {
+ x: 1,
+ set: [],
+ range: {},
+ caret: true,
+ },
+ };
+ t.assertEq(given, expected);
+ });
+
+ t.testing("other chars are just added to the set", () => {
+ const given = classStateStep(
+ { context: { x: 1, set: [], range: {}}},
+ "_",
+ null,
+ null,
+ );
+ const expected = {
+ out: undefined,
+ state: undefined,
+ context: {
+ x: 1,
+ set: [ "_" ],
+ range: {},
+ },
+ };
+ t.assertEq(given, expected);
+ });
+
+ t.testing("caret as not the first char is taken literally", () => {
+ });
+};
+
const test_TRANSITION_FNS = t => {
t.start("TRANSITION_FNS");
@@ -253,8 +475,7 @@ const test_TRANSITION_FNS = t => {
context: {
set: [],
range: {
- from: [],
- to: [],
+ from: null,
where: "from",
},
},
@@ -2558,6 +2779,7 @@ const test_compile = t => {
runTests([
test_numFromDigits,
test_rangeStateStep,
+ test_classStateStep,
test_TRANSITION_FNS,
test_shouldConcat,
test_isOperator,