summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/paca.mjs100
-rw-r--r--tests/paca.mjs540
2 files changed, 638 insertions, 2 deletions
diff --git a/src/paca.mjs b/src/paca.mjs
index e68cd77..53a9c11 100644
--- a/src/paca.mjs
+++ b/src/paca.mjs
@@ -1,5 +1,5 @@
import {
- butlast, explode, last, mapValues, max, reduce, reduced,
+ butlast, explode, isNumeric, last, mapValues, max, reduce, reduced,
} from "sjs";
@@ -9,6 +9,7 @@ export class SyntaxError extends Error {}
const ConcatStep = {
ACCEPTING: "accepting",
ESCAPING: "escaping",
+ RANGE: "range",
};
const nonConcatOperators = new Set(["*", "+", "?", "|", ")"]);
@@ -23,6 +24,11 @@ const shouldConcat = (char, next) =>
const isOperator = char =>
nonConcatOperators.has(char) || char == "(";
+const numFromDigits = digits =>
+ digits.length === 0
+ ? -1
+ : Number(digits.join(""));
+
const tokenizeRegexStep = chars => ({ out, state, context }, char, index) => {
const next = chars[index + 1];
const maybeConcat = shouldConcat(char, next)
@@ -37,6 +43,84 @@ const tokenizeRegexStep = chars => ({ out, state, context }, char, index) => {
};
}
+ if (state === ConcatStep.RANGE) {
+ if (char === "}") {
+ if (context.where !== "to") {
+ return reduced({
+ out,
+ state,
+ context,
+ error: new SyntaxError(
+ "missing comma in range operator",
+ ),
+ });
+ }
+
+ const from = numFromDigits(context.from);
+ const to = numFromDigits(context.to);
+ if (from > to && to != -1) {
+ return reduced({
+ out,
+ state,
+ context,
+ error: new Error(`bad range values: {${from},${to}}`),
+ });
+ }
+ return {
+ out: out.concat({
+ operator: "range",
+ from,
+ to,
+ }),
+ state: ConcatStep.ACCEPTING,
+ context: null,
+ };
+ }
+
+ if (char === ",") {
+ if (context.where === "to") {
+ return reduced({
+ out,
+ state,
+ context,
+ error: new SyntaxError(
+ "extraneuos comma in range expression",
+ ),
+ });
+ } else {
+ return {
+ out,
+ state,
+ context: {
+ ...context,
+ where: "to",
+ },
+ };
+ }
+ }
+
+ if (!isNumeric(char)) {
+ return reduced({
+ out,
+ state,
+ context,
+ error: new SyntaxError(
+ "bad char in range expression: " +
+ char,
+ ),
+ });
+ }
+
+ return {
+ out,
+ state,
+ context: {
+ ...context,
+ [context.where]: context[context.where].concat(char),
+ },
+ };
+ }
+
if (char === "\\") {
return {
out,
@@ -45,6 +129,18 @@ const tokenizeRegexStep = chars => ({ out, state, context }, char, index) => {
};
}
+ if (char === "{") {
+ return {
+ out,
+ state: ConcatStep.RANGE,
+ context: {
+ from: [],
+ to: [],
+ where: "from",
+ },
+ };
+ }
+
const op = isOperator(char) ? { operator: char } : char;
return {
out: out.concat(op, maybeConcat),
@@ -54,7 +150,7 @@ const tokenizeRegexStep = chars => ({ out, state, context }, char, index) => {
};
const tokenizeRegexFn = chars =>
- chars.reduce(tokenizeRegexStep(chars), {
+ reduce(chars, tokenizeRegexStep(chars), {
out: [],
state: ConcatStep.ACCEPTING,
context: null,
diff --git a/tests/paca.mjs b/tests/paca.mjs
index 82102f1..4f085e4 100644
--- a/tests/paca.mjs
+++ b/tests/paca.mjs
@@ -353,6 +353,546 @@ const test_tokenizeRegexStep = t => {
);
}
});
+
+ t.testing("multichar range operator {m,n} is parsed", () => {
+ const table = [{
+ regex: "a{1,2}",
+ steps: [{
+ in: {
+ out: [],
+ state: ConcatStep.ACCEPTING,
+ context: null,
+ },
+ out: {
+ out: ["a", { operator: "concat" }],
+ state: ConcatStep.ACCEPTING,
+ context: null,
+ },
+ }, {
+ in: {
+ out: [],
+ state: ConcatStep.ACCEPTING,
+ context: null,
+ },
+ out: {
+ out: [],
+ state: ConcatStep.RANGE,
+ context: {
+ from: [],
+ to: [],
+ where: "from",
+ },
+ },
+ }, {
+ in: {
+ out: [],
+ state: ConcatStep.RANGE,
+ context: {
+ from: [],
+ to: [],
+ where: "from",
+ },
+ },
+ out: {
+ out: [],
+ state: ConcatStep.RANGE,
+ context: {
+ from: [ "1" ],
+ to: [],
+ where: "from",
+ },
+ },
+ }, {
+ in: {
+ out: [],
+ state: ConcatStep.RANGE,
+ context: {
+ from: [ "1" ],
+ to: [],
+ where: "from",
+ },
+ },
+ out: {
+ out: [],
+ state: ConcatStep.RANGE,
+ context: {
+ from: [ "1" ],
+ to: [],
+ where: "to",
+ },
+ },
+ }, {
+ in: {
+ out: [],
+ state: ConcatStep.RANGE,
+ context: {
+ from: [ "1" ],
+ to: [],
+ where: "to",
+ },
+ },
+ out: {
+ out: [],
+ state: ConcatStep.RANGE,
+ context: {
+ from: [ "1" ],
+ to: [ "2" ],
+ where: "to",
+ },
+ },
+ }, {
+ in: {
+ out: [],
+ state: ConcatStep.RANGE,
+ context: {
+ from: [ "1" ],
+ to: [ "2" ],
+ where: "to",
+ },
+ },
+ out: {
+ out: [{
+ operator: "range",
+ from: 1,
+ to: 2,
+ }],
+ state: ConcatStep.ACCEPTING,
+ context: null,
+ },
+ }],
+ }, {
+ regex: "a{,2}",
+ steps: [{
+ in: {
+ out: [],
+ state: ConcatStep.ACCEPTING,
+ context: null,
+ },
+ out: {
+ out: ["a", { operator: "concat" }],
+ state: ConcatStep.ACCEPTING,
+ context: null,
+ },
+ }, {
+ in: {
+ out: [],
+ state: ConcatStep.ACCEPTING,
+ context: null,
+ },
+ out: {
+ out: [],
+ state: ConcatStep.RANGE,
+ context: {
+ from: [],
+ to: [],
+ where: "from",
+ },
+ },
+ }, {
+ in: {
+ out: [],
+ state: ConcatStep.RANGE,
+ context: {
+ from: [],
+ to: [],
+ where: "from",
+ },
+ },
+ out: {
+ out: [],
+ state: ConcatStep.RANGE,
+ context: {
+ from: [],
+ to: [],
+ where: "to",
+ },
+ },
+ }, {
+ in: {
+ out: [],
+ state: ConcatStep.RANGE,
+ context: {
+ from: [],
+ to: [],
+ where: "to",
+ },
+ },
+ out: {
+ out: [],
+ state: ConcatStep.RANGE,
+ context: {
+ from: [],
+ to: [ "2" ],
+ where: "to",
+ },
+ },
+ }, {
+ in: {
+ out: [],
+ state: ConcatStep.RANGE,
+ context: {
+ from: [],
+ to: [ "2" ],
+ where: "to",
+ },
+ },
+ out: {
+ out: [{
+ operator: "range",
+ from: -1,
+ to: 2,
+ }],
+ state: ConcatStep.ACCEPTING,
+ context: null,
+ },
+ }],
+ }, {
+ regex: "a{1,}",
+ steps: [{
+ in: {
+ out: [],
+ state: ConcatStep.ACCEPTING,
+ context: null,
+ },
+ out: {
+ out: ["a", { operator: "concat" }],
+ state: ConcatStep.ACCEPTING,
+ context: null,
+ },
+ }, {
+ in: {
+ out: [],
+ state: ConcatStep.ACCEPTING,
+ context: null,
+ },
+ out: {
+ out: [],
+ state: ConcatStep.RANGE,
+ context: {
+ from: [],
+ to: [],
+ where: "from",
+ },
+ },
+ }, {
+ in: {
+ out: [],
+ state: ConcatStep.RANGE,
+ context: {
+ from: [],
+ to: [],
+ where: "from",
+ },
+ },
+ out: {
+ out: [],
+ state: ConcatStep.RANGE,
+ context: {
+ from: [ "1" ],
+ to: [],
+ where: "from",
+ },
+ },
+ }, {
+ in: {
+ out: [],
+ state: ConcatStep.RANGE,
+ context: {
+ from: [ "1" ],
+ to: [],
+ where: "from",
+ },
+ },
+ out: {
+ out: [],
+ state: ConcatStep.RANGE,
+ context: {
+ from: [ "1" ],
+ to: [],
+ where: "to",
+ },
+ },
+ }, {
+ in: {
+ out: [],
+ state: ConcatStep.RANGE,
+ context: {
+ from: [ "1" ],
+ to: [],
+ where: "to",
+ },
+ },
+ out: {
+ out: [{
+ operator: "range",
+ from: 1,
+ to: -1,
+ }],
+ state: ConcatStep.ACCEPTING,
+ context: null,
+ },
+ }],
+ }, {
+ regex: "a{,}",
+ steps: [{
+ in: {
+ out: [],
+ state: ConcatStep.ACCEPTING,
+ context: null,
+ },
+ out: {
+ out: ["a", { operator: "concat" }],
+ state: ConcatStep.ACCEPTING,
+ context: null,
+ },
+ }, {
+ in: {
+ out: [],
+ state: ConcatStep.ACCEPTING,
+ context: null,
+ },
+ out: {
+ out: [],
+ state: ConcatStep.RANGE,
+ context: {
+ from: [],
+ to: [],
+ where: "from",
+ },
+ },
+ }, {
+ in: {
+ out: [],
+ state: ConcatStep.RANGE,
+ context: {
+ from: [],
+ to: [],
+ where: "from",
+ },
+ },
+ out: {
+ out: [],
+ state: ConcatStep.RANGE,
+ context: {
+ from: [],
+ to: [],
+ where: "to",
+ },
+ },
+ }, {
+ in: {
+ out: [],
+ state: ConcatStep.RANGE,
+ context: {
+ from: [],
+ to: [],
+ where: "to",
+ },
+ },
+ out: {
+ out: [{
+ operator: "range",
+ from: -1,
+ to: -1,
+ }],
+ state: ConcatStep.ACCEPTING,
+ context: null,
+ },
+ }],
+ }, {
+ regex: "a{123,456}",
+ steps: [{
+ in: {
+ out: [],
+ state: ConcatStep.ACCEPTING,
+ context: null,
+ },
+ out: {
+ out: ["a", { operator: "concat" }],
+ state: ConcatStep.ACCEPTING,
+ context: null,
+ },
+ }, {
+ in: {
+ out: [],
+ state: ConcatStep.ACCEPTING,
+ context: null,
+ },
+ out: {
+ out: [],
+ state: ConcatStep.RANGE,
+ context: {
+ from: [],
+ to: [],
+ where: "from",
+ },
+ },
+ }, {
+ in: {
+ out: [],
+ state: ConcatStep.RANGE,
+ context: {
+ from: [],
+ to: [],
+ where: "from",
+ },
+ },
+ out: {
+ out: [],
+ state: ConcatStep.RANGE,
+ context: {
+ from: [ "1" ],
+ to: [],
+ where: "from",
+ },
+ },
+ }, {
+ in: {
+ out: [],
+ state: ConcatStep.RANGE,
+ context: {
+ from: [ "1" ],
+ to: [],
+ where: "from",
+ },
+ },
+ out: {
+ out: [],
+ state: ConcatStep.RANGE,
+ context: {
+ from: [ "1", "2" ],
+ to: [],
+ where: "from",
+ },
+ },
+ }, {
+ in: {
+ out: [],
+ state: ConcatStep.RANGE,
+ context: {
+ from: [ "1", "2" ],
+ to: [],
+ where: "from",
+ },
+ },
+ out: {
+ out: [],
+ state: ConcatStep.RANGE,
+ context: {
+ from: [ "1", "2", "3" ],
+ to: [],
+ where: "from",
+ },
+ },
+ }, {
+ in: {
+ out: [],
+ state: ConcatStep.RANGE,
+ context: {
+ from: [ "1", "2", "3" ],
+ to: [],
+ where: "from",
+ },
+ },
+ out: {
+ out: [],
+ state: ConcatStep.RANGE,
+ context: {
+ from: [ "1", "2", "3" ],
+ to: [],
+ where: "to",
+ },
+ },
+ }, {
+ in: {
+ out: [],
+ state: ConcatStep.RANGE,
+ context: {
+ from: [ "1", "2", "3" ],
+ to: [],
+ where: "to",
+ },
+ },
+ out: {
+ out: [],
+ state: ConcatStep.RANGE,
+ context: {
+ from: [ "1", "2", "3" ],
+ to: [ "4" ],
+ where: "to",
+ },
+ },
+ }, {
+ in: {
+ out: [],
+ state: ConcatStep.RANGE,
+ context: {
+ from: [ "1", "2", "3" ],
+ to: [ "4" ],
+ where: "to",
+ },
+ },
+ out: {
+ out: [],
+ state: ConcatStep.RANGE,
+ context: {
+ from: [ "1", "2", "3" ],
+ to: [ "4", "5" ],
+ where: "to",
+ },
+ },
+ }, {
+ in: {
+ out: [],
+ state: ConcatStep.RANGE,
+ context: {
+ from: [ "1", "2", "3" ],
+ to: [ "4", "5" ],
+ where: "to",
+ },
+ },
+ out: {
+ out: [],
+ state: ConcatStep.RANGE,
+ context: {
+ from: [ "1", "2", "3" ],
+ to: [ "4", "5", "6" ],
+ where: "to",
+ },
+ },
+ }, {
+ in: {
+ out: [],
+ state: ConcatStep.RANGE,
+ context: {
+ from: [ "1", "2", "3" ],
+ to: [ "4", "5", "6" ],
+ where: "to",
+ },
+ },
+ out: {
+ out: [{
+ operator: "range",
+ from: 123,
+ to: 456,
+ }],
+ state: ConcatStep.ACCEPTING,
+ context: null,
+ },
+ }],
+ }];
+ for (const case_ of table) {
+ const stepFn = tokenizeRegexStep(case_.regex);
+ for (const i in case_.regex) {
+ const step = case_.steps[i];
+ const char = case_.regex[i];
+ t.assertEq(
+ stepFn(step.in, char, Number(i)),
+ step.out,
+ );
+ }
+ }
+ });
};
const test_tokenizeRegexFn = t => {