diff options
| author | EuAndreh <eu@euandre.org> | 2025-07-11 21:49:57 -0300 |
|---|---|---|
| committer | EuAndreh <eu@euandre.org> | 2025-07-11 21:50:04 -0300 |
| commit | 42001c72522293fe9ba2ba17901eda89add6f3fa (patch) | |
| tree | 5f513170bb984f22e092c6aeb36d859ffaaf5040 /src | |
| parent | tests/paca.mjs (test_tokenizeRegexStep): Simplify table values (diff) | |
| download | paca-42001c72522293fe9ba2ba17901eda89add6f3fa.tar.gz paca-42001c72522293fe9ba2ba17901eda89add6f3fa.tar.xz | |
src/paca.mjs (tokenizeRegexStep): Simplify body
When handling a custom state, dispatch it to the appropriate function in
`STATE_FNS`; and when looking for chars that enters these custom states,
dispatch it to the appropriate function in `TRANSITION_FNS`.
The body of each part didn't change, so no tests had to be modified.
But now we can write specific tests for each case, and remove the bulk
of the logic out of `tokenizeRegexFn()`.
Diffstat (limited to '')
| -rw-r--r-- | src/paca.mjs | 232 |
1 files changed, 127 insertions, 105 deletions
diff --git a/src/paca.mjs b/src/paca.mjs index b30d4cd..e8ec542 100644 --- a/src/paca.mjs +++ b/src/paca.mjs @@ -12,133 +12,155 @@ const ConcatStep = { RANGE: "range", }; -const nonConcatOperators = new Set(["*", "+", "?", "|", ")"]); - -const shouldConcat = (char, next) => - next !== undefined && - char !== "(" && - char !== "|" && - char !== "{" && - !nonConcatOperators.has(next); - -const isOperator = char => - nonConcatOperators.has(char) || char == "("; - -const numFromDigits = digits => - digits.length === 0 - ? -1 - : Number(digits.join("")); +const escapingStateStep = ({ out, state, context }, char, index, next) => ({ + out: out.concat( + char, + next !== undefined ? {operator: "concat"} : [], + ), + state: ConcatStep.ACCEPTING, + context, +}); -const tokenizeRegexStep = chars => ({ out, state, context }, char, index) => { - const next = chars[index + 1]; +const rangeStateStep = ({ out, state, context }, char, index, next) => { + if (char === "}") { + if (context.where !== "to") { + return reduced({ + out, + state, + context, + error: new SyntaxError( + "missing comma in range operator", + ), + }); + } - if (state === ConcatStep.ESCAPING) { + const from = numFromDigits(context.from); + const to = numFromDigits(context.to); + if (from > to && to != -1) { + return reduced({ + out, + state, + context, + error: new Error( + `bad range values: {${from},${to}}`, + ), + }); + } return { - out: out.concat( - char, - next !== undefined ? {operator: "concat"} : [], - ), + out: out.concat({ + operator: "range", + from, + to, + }), state: ConcatStep.ACCEPTING, - context, + context: null, }; } - if (state === ConcatStep.RANGE) { - if (char === "}") { - if (context.where !== "to") { - return reduced({ - out, - state, - context, - error: new SyntaxError( - "missing comma in range operator", - ), - }); - } - - const from = numFromDigits(context.from); - const to = numFromDigits(context.to); - if (from > to && to != -1) { - return reduced({ - out, - state, - context, - error: new Error(`bad range values: {${from},${to}}`), - }); - } - return { - out: out.concat({ - operator: "range", - from, - to, - }), - state: ConcatStep.ACCEPTING, - context: null, - }; - } - - if (char === ",") { - if (context.where === "to") { - return reduced({ - out, - state, - context, - error: new SyntaxError( - "extraneuos comma in range expression", - ), - }); - } else { - return { - out, - state, - context: { - ...context, - where: "to", - }, - }; - } - } - - if (!isNumeric(char)) { + if (char === ",") { + if (context.where === "to") { return reduced({ out, state, context, error: new SyntaxError( - "bad char in range expression: " + - char, + "extraneuos comma in range expression", ), }); + } else { + return { + out, + state, + context: { + ...context, + where: "to", + }, + }; } + } - return { + if (!isNumeric(char)) { + return reduced({ out, state, - context: { - ...context, - [context.where]: context[context.where].concat(char), - }, - }; + context, + error: new SyntaxError( + "bad char in range expression: " + + char, + ), + }); } - if (char === "\\") { - return { - out, - state: ConcatStep.ESCAPING, - context, - }; + return { + out, + state, + context: { + ...context, + [context.where]: context[context.where].concat(char), + }, + }; +}; + +const STATE_FNS = { + [ConcatStep.ESCAPING]: escapingStateStep, + [ConcatStep.RANGE ]: rangeStateStep, +}; + +const TRANSITION_FNS = { + "\\": ({ out, state, context }, char, index, next) => ({ + out, + state: ConcatStep.ESCAPING, + context, + }), + "{": ({ out, state, context }, char, index, next) => ({ + out, + state: ConcatStep.RANGE, + context: { + from: [], + to: [], + where: "from", + }, + }), +}; + +const stateTransitionOperators = new Set(Object.keys(TRANSITION_FNS)); + +const nonConcatOperators = new Set(["*", "+", "?", "|", ")"]); + +const shouldConcat = (char, next) => + next !== undefined && + char !== "(" && + char !== "|" && + char !== "{" && + !nonConcatOperators.has(next); + +const isOperator = char => + nonConcatOperators.has(char) || char == "("; + +const numFromDigits = digits => + digits.length === 0 + ? -1 + : Number(digits.join("")); + +const tokenizeRegexStep = chars => ({ out, state, context }, char, index) => { + const next = chars[index + 1]; + + if (state !== ConcatStep.ACCEPTING) { + return STATE_FNS[state]( + { out, state, context }, + char, + index, + next, + ); } - if (char === "{") { - return { - out, - state: ConcatStep.RANGE, - context: { - from: [], - to: [], - where: "from", - }, - }; + if (stateTransitionOperators.has(char)) { + return TRANSITION_FNS[char]( + { out, state, context }, + char, + index, + next, + ); } const op = isOperator(char) ? { operator: char } : char; |
