diff options
Diffstat (limited to '')
| -rw-r--r-- | src/paca.mjs | 100 | ||||
| -rw-r--r-- | tests/paca.mjs | 540 |
2 files changed, 638 insertions, 2 deletions
diff --git a/src/paca.mjs b/src/paca.mjs index e68cd77..53a9c11 100644 --- a/src/paca.mjs +++ b/src/paca.mjs @@ -1,5 +1,5 @@ import { - butlast, explode, last, mapValues, max, reduce, reduced, + butlast, explode, isNumeric, last, mapValues, max, reduce, reduced, } from "sjs"; @@ -9,6 +9,7 @@ export class SyntaxError extends Error {} const ConcatStep = { ACCEPTING: "accepting", ESCAPING: "escaping", + RANGE: "range", }; const nonConcatOperators = new Set(["*", "+", "?", "|", ")"]); @@ -23,6 +24,11 @@ const shouldConcat = (char, next) => const isOperator = char => nonConcatOperators.has(char) || char == "("; +const numFromDigits = digits => + digits.length === 0 + ? -1 + : Number(digits.join("")); + const tokenizeRegexStep = chars => ({ out, state, context }, char, index) => { const next = chars[index + 1]; const maybeConcat = shouldConcat(char, next) @@ -37,6 +43,84 @@ const tokenizeRegexStep = chars => ({ out, state, context }, char, index) => { }; } + if (state === ConcatStep.RANGE) { + if (char === "}") { + if (context.where !== "to") { + return reduced({ + out, + state, + context, + error: new SyntaxError( + "missing comma in range operator", + ), + }); + } + + const from = numFromDigits(context.from); + const to = numFromDigits(context.to); + if (from > to && to != -1) { + return reduced({ + out, + state, + context, + error: new Error(`bad range values: {${from},${to}}`), + }); + } + return { + out: out.concat({ + operator: "range", + from, + to, + }), + state: ConcatStep.ACCEPTING, + context: null, + }; + } + + if (char === ",") { + if (context.where === "to") { + return reduced({ + out, + state, + context, + error: new SyntaxError( + "extraneuos comma in range expression", + ), + }); + } else { + return { + out, + state, + context: { + ...context, + where: "to", + }, + }; + } + } + + if (!isNumeric(char)) { + return reduced({ + out, + state, + context, + error: new SyntaxError( + "bad char in range expression: " + + char, + ), + }); + } + + return { + out, + state, + context: { + ...context, + [context.where]: context[context.where].concat(char), + }, + }; + } + if (char === "\\") { return { out, @@ -45,6 +129,18 @@ const tokenizeRegexStep = chars => ({ out, state, context }, char, index) => { }; } + if (char === "{") { + return { + out, + state: ConcatStep.RANGE, + context: { + from: [], + to: [], + where: "from", + }, + }; + } + const op = isOperator(char) ? { operator: char } : char; return { out: out.concat(op, maybeConcat), @@ -54,7 +150,7 @@ const tokenizeRegexStep = chars => ({ out, state, context }, char, index) => { }; const tokenizeRegexFn = chars => - chars.reduce(tokenizeRegexStep(chars), { + reduce(chars, tokenizeRegexStep(chars), { out: [], state: ConcatStep.ACCEPTING, context: null, diff --git a/tests/paca.mjs b/tests/paca.mjs index 82102f1..4f085e4 100644 --- a/tests/paca.mjs +++ b/tests/paca.mjs @@ -353,6 +353,546 @@ const test_tokenizeRegexStep = t => { ); } }); + + t.testing("multichar range operator {m,n} is parsed", () => { + const table = [{ + regex: "a{1,2}", + steps: [{ + in: { + out: [], + state: ConcatStep.ACCEPTING, + context: null, + }, + out: { + out: ["a", { operator: "concat" }], + state: ConcatStep.ACCEPTING, + context: null, + }, + }, { + in: { + out: [], + state: ConcatStep.ACCEPTING, + context: null, + }, + out: { + out: [], + state: ConcatStep.RANGE, + context: { + from: [], + to: [], + where: "from", + }, + }, + }, { + in: { + out: [], + state: ConcatStep.RANGE, + context: { + from: [], + to: [], + where: "from", + }, + }, + out: { + out: [], + state: ConcatStep.RANGE, + context: { + from: [ "1" ], + to: [], + where: "from", + }, + }, + }, { + in: { + out: [], + state: ConcatStep.RANGE, + context: { + from: [ "1" ], + to: [], + where: "from", + }, + }, + out: { + out: [], + state: ConcatStep.RANGE, + context: { + from: [ "1" ], + to: [], + where: "to", + }, + }, + }, { + in: { + out: [], + state: ConcatStep.RANGE, + context: { + from: [ "1" ], + to: [], + where: "to", + }, + }, + out: { + out: [], + state: ConcatStep.RANGE, + context: { + from: [ "1" ], + to: [ "2" ], + where: "to", + }, + }, + }, { + in: { + out: [], + state: ConcatStep.RANGE, + context: { + from: [ "1" ], + to: [ "2" ], + where: "to", + }, + }, + out: { + out: [{ + operator: "range", + from: 1, + to: 2, + }], + state: ConcatStep.ACCEPTING, + context: null, + }, + }], + }, { + regex: "a{,2}", + steps: [{ + in: { + out: [], + state: ConcatStep.ACCEPTING, + context: null, + }, + out: { + out: ["a", { operator: "concat" }], + state: ConcatStep.ACCEPTING, + context: null, + }, + }, { + in: { + out: [], + state: ConcatStep.ACCEPTING, + context: null, + }, + out: { + out: [], + state: ConcatStep.RANGE, + context: { + from: [], + to: [], + where: "from", + }, + }, + }, { + in: { + out: [], + state: ConcatStep.RANGE, + context: { + from: [], + to: [], + where: "from", + }, + }, + out: { + out: [], + state: ConcatStep.RANGE, + context: { + from: [], + to: [], + where: "to", + }, + }, + }, { + in: { + out: [], + state: ConcatStep.RANGE, + context: { + from: [], + to: [], + where: "to", + }, + }, + out: { + out: [], + state: ConcatStep.RANGE, + context: { + from: [], + to: [ "2" ], + where: "to", + }, + }, + }, { + in: { + out: [], + state: ConcatStep.RANGE, + context: { + from: [], + to: [ "2" ], + where: "to", + }, + }, + out: { + out: [{ + operator: "range", + from: -1, + to: 2, + }], + state: ConcatStep.ACCEPTING, + context: null, + }, + }], + }, { + regex: "a{1,}", + steps: [{ + in: { + out: [], + state: ConcatStep.ACCEPTING, + context: null, + }, + out: { + out: ["a", { operator: "concat" }], + state: ConcatStep.ACCEPTING, + context: null, + }, + }, { + in: { + out: [], + state: ConcatStep.ACCEPTING, + context: null, + }, + out: { + out: [], + state: ConcatStep.RANGE, + context: { + from: [], + to: [], + where: "from", + }, + }, + }, { + in: { + out: [], + state: ConcatStep.RANGE, + context: { + from: [], + to: [], + where: "from", + }, + }, + out: { + out: [], + state: ConcatStep.RANGE, + context: { + from: [ "1" ], + to: [], + where: "from", + }, + }, + }, { + in: { + out: [], + state: ConcatStep.RANGE, + context: { + from: [ "1" ], + to: [], + where: "from", + }, + }, + out: { + out: [], + state: ConcatStep.RANGE, + context: { + from: [ "1" ], + to: [], + where: "to", + }, + }, + }, { + in: { + out: [], + state: ConcatStep.RANGE, + context: { + from: [ "1" ], + to: [], + where: "to", + }, + }, + out: { + out: [{ + operator: "range", + from: 1, + to: -1, + }], + state: ConcatStep.ACCEPTING, + context: null, + }, + }], + }, { + regex: "a{,}", + steps: [{ + in: { + out: [], + state: ConcatStep.ACCEPTING, + context: null, + }, + out: { + out: ["a", { operator: "concat" }], + state: ConcatStep.ACCEPTING, + context: null, + }, + }, { + in: { + out: [], + state: ConcatStep.ACCEPTING, + context: null, + }, + out: { + out: [], + state: ConcatStep.RANGE, + context: { + from: [], + to: [], + where: "from", + }, + }, + }, { + in: { + out: [], + state: ConcatStep.RANGE, + context: { + from: [], + to: [], + where: "from", + }, + }, + out: { + out: [], + state: ConcatStep.RANGE, + context: { + from: [], + to: [], + where: "to", + }, + }, + }, { + in: { + out: [], + state: ConcatStep.RANGE, + context: { + from: [], + to: [], + where: "to", + }, + }, + out: { + out: [{ + operator: "range", + from: -1, + to: -1, + }], + state: ConcatStep.ACCEPTING, + context: null, + }, + }], + }, { + regex: "a{123,456}", + steps: [{ + in: { + out: [], + state: ConcatStep.ACCEPTING, + context: null, + }, + out: { + out: ["a", { operator: "concat" }], + state: ConcatStep.ACCEPTING, + context: null, + }, + }, { + in: { + out: [], + state: ConcatStep.ACCEPTING, + context: null, + }, + out: { + out: [], + state: ConcatStep.RANGE, + context: { + from: [], + to: [], + where: "from", + }, + }, + }, { + in: { + out: [], + state: ConcatStep.RANGE, + context: { + from: [], + to: [], + where: "from", + }, + }, + out: { + out: [], + state: ConcatStep.RANGE, + context: { + from: [ "1" ], + to: [], + where: "from", + }, + }, + }, { + in: { + out: [], + state: ConcatStep.RANGE, + context: { + from: [ "1" ], + to: [], + where: "from", + }, + }, + out: { + out: [], + state: ConcatStep.RANGE, + context: { + from: [ "1", "2" ], + to: [], + where: "from", + }, + }, + }, { + in: { + out: [], + state: ConcatStep.RANGE, + context: { + from: [ "1", "2" ], + to: [], + where: "from", + }, + }, + out: { + out: [], + state: ConcatStep.RANGE, + context: { + from: [ "1", "2", "3" ], + to: [], + where: "from", + }, + }, + }, { + in: { + out: [], + state: ConcatStep.RANGE, + context: { + from: [ "1", "2", "3" ], + to: [], + where: "from", + }, + }, + out: { + out: [], + state: ConcatStep.RANGE, + context: { + from: [ "1", "2", "3" ], + to: [], + where: "to", + }, + }, + }, { + in: { + out: [], + state: ConcatStep.RANGE, + context: { + from: [ "1", "2", "3" ], + to: [], + where: "to", + }, + }, + out: { + out: [], + state: ConcatStep.RANGE, + context: { + from: [ "1", "2", "3" ], + to: [ "4" ], + where: "to", + }, + }, + }, { + in: { + out: [], + state: ConcatStep.RANGE, + context: { + from: [ "1", "2", "3" ], + to: [ "4" ], + where: "to", + }, + }, + out: { + out: [], + state: ConcatStep.RANGE, + context: { + from: [ "1", "2", "3" ], + to: [ "4", "5" ], + where: "to", + }, + }, + }, { + in: { + out: [], + state: ConcatStep.RANGE, + context: { + from: [ "1", "2", "3" ], + to: [ "4", "5" ], + where: "to", + }, + }, + out: { + out: [], + state: ConcatStep.RANGE, + context: { + from: [ "1", "2", "3" ], + to: [ "4", "5", "6" ], + where: "to", + }, + }, + }, { + in: { + out: [], + state: ConcatStep.RANGE, + context: { + from: [ "1", "2", "3" ], + to: [ "4", "5", "6" ], + where: "to", + }, + }, + out: { + out: [{ + operator: "range", + from: 123, + to: 456, + }], + state: ConcatStep.ACCEPTING, + context: null, + }, + }], + }]; + for (const case_ of table) { + const stepFn = tokenizeRegexStep(case_.regex); + for (const i in case_.regex) { + const step = case_.steps[i]; + const char = case_.regex[i]; + t.assertEq( + stepFn(step.in, char, Number(i)), + step.out, + ); + } + } + }); }; const test_tokenizeRegexFn = t => { |
