diff options
| author | EuAndreh <eu@euandre.org> | 2025-07-11 15:41:22 -0300 |
|---|---|---|
| committer | EuAndreh <eu@euandre.org> | 2025-07-11 15:41:22 -0300 |
| commit | fc67be3d926d21194fca5e8ff733e0921f6e141c (patch) | |
| tree | 6b67f16d2687ce2e3d5103f21102b6668e22aa6a /src/paca.mjs | |
| parent | src/paca.mjs (tokenizeRegexStep): Include `context` key in reduced state (diff) | |
| download | paca-fc67be3d926d21194fca5e8ff733e0921f6e141c.tar.gz paca-fc67be3d926d21194fca5e8ff733e0921f6e141c.tar.xz | |
src/paca.mjs (tokenizeRegexStep): Support tokenizing range exps {m,n}
Diffstat (limited to 'src/paca.mjs')
| -rw-r--r-- | src/paca.mjs | 100 |
1 files changed, 98 insertions, 2 deletions
diff --git a/src/paca.mjs b/src/paca.mjs index e68cd77..53a9c11 100644 --- a/src/paca.mjs +++ b/src/paca.mjs @@ -1,5 +1,5 @@ import { - butlast, explode, last, mapValues, max, reduce, reduced, + butlast, explode, isNumeric, last, mapValues, max, reduce, reduced, } from "sjs"; @@ -9,6 +9,7 @@ export class SyntaxError extends Error {} const ConcatStep = { ACCEPTING: "accepting", ESCAPING: "escaping", + RANGE: "range", }; const nonConcatOperators = new Set(["*", "+", "?", "|", ")"]); @@ -23,6 +24,11 @@ const shouldConcat = (char, next) => const isOperator = char => nonConcatOperators.has(char) || char == "("; +const numFromDigits = digits => + digits.length === 0 + ? -1 + : Number(digits.join("")); + const tokenizeRegexStep = chars => ({ out, state, context }, char, index) => { const next = chars[index + 1]; const maybeConcat = shouldConcat(char, next) @@ -37,6 +43,84 @@ const tokenizeRegexStep = chars => ({ out, state, context }, char, index) => { }; } + if (state === ConcatStep.RANGE) { + if (char === "}") { + if (context.where !== "to") { + return reduced({ + out, + state, + context, + error: new SyntaxError( + "missing comma in range operator", + ), + }); + } + + const from = numFromDigits(context.from); + const to = numFromDigits(context.to); + if (from > to && to != -1) { + return reduced({ + out, + state, + context, + error: new Error(`bad range values: {${from},${to}}`), + }); + } + return { + out: out.concat({ + operator: "range", + from, + to, + }), + state: ConcatStep.ACCEPTING, + context: null, + }; + } + + if (char === ",") { + if (context.where === "to") { + return reduced({ + out, + state, + context, + error: new SyntaxError( + "extraneuos comma in range expression", + ), + }); + } else { + return { + out, + state, + context: { + ...context, + where: "to", + }, + }; + } + } + + if (!isNumeric(char)) { + return reduced({ + out, + state, + context, + error: new SyntaxError( + "bad char in range expression: " + + char, + ), + }); + } + + return { + out, + state, + context: { + ...context, + [context.where]: context[context.where].concat(char), + }, + }; + } + if (char === "\\") { return { out, @@ -45,6 +129,18 @@ const tokenizeRegexStep = chars => ({ out, state, context }, char, index) => { }; } + if (char === "{") { + return { + out, + state: ConcatStep.RANGE, + context: { + from: [], + to: [], + where: "from", + }, + }; + } + const op = isOperator(char) ? { operator: char } : char; return { out: out.concat(op, maybeConcat), @@ -54,7 +150,7 @@ const tokenizeRegexStep = chars => ({ out, state, context }, char, index) => { }; const tokenizeRegexFn = chars => - chars.reduce(tokenizeRegexStep(chars), { + reduce(chars, tokenizeRegexStep(chars), { out: [], state: ConcatStep.ACCEPTING, context: null, |
