From 1ce80e005a374488c186d0f545af33096f6523d5 Mon Sep 17 00:00:00 2001 From: EuAndreh Date: Tue, 15 Jul 2025 21:37:16 -0300 Subject: Only tolerate escaping of special chars * src/paca.mjs (escapingStateStep): Return an error when escaping non-metacharacters. This way cases like \d, which is syntax for [0-9] which will eventually be recognized, will not change its behaviour from a noop escape of "d" to matching digits. (operatorChars, isOperator): Hoist both of these up before their usage in `escapingStateStep()`. * tests/paca.mjs (test_isOperator): Hoist its definition and position inside the `runTests([...])` array to match src/paca.mjs. (test_escapingStateStep): Adjust existing cases and add test case for good/bad escapes. (test_tokenizeRegexStep): Fix bad starting escape, that broke because it was escaping a non-metacharacter. --- src/paca.mjs | 34 ++++++++++++++++++++++------------ 1 file changed, 22 insertions(+), 12 deletions(-) (limited to 'src') diff --git a/src/paca.mjs b/src/paca.mjs index 5d11b05..7a01407 100644 --- a/src/paca.mjs +++ b/src/paca.mjs @@ -24,19 +24,33 @@ const shouldConcat = (char, next) => char !== "{" && !nonConcatOperators.has(next); +const operatorChars = new Set([...nonConcatOperators, "(", "."]); +const isOperator = char => + operatorChars.has(char); + const numFromDigits = digits => digits.length === 0 ? -1 : Number(digits.join("")); -const escapingStateStep = ({ out, _state, context }, char, _index, next) => ({ - out: out.concat( - char, - shouldConcat(null, next) ? [{ operator: "concat" }] : [], - ), - state: ConcatStep.ACCEPTING, - context, -}); +const escapingStateStep = ({ out, state, context }, char, _index, next) => + !(isOperator(char) || char === "\\") + ? reduced({ + out, + state, + context, + error: new SyntaxError( + "unknown escape sequence: \\" + char, + ), + }) + : { + out: out.concat( + char, + shouldConcat(null, next) ? [{ operator: "concat" }] : [], + ), + state: ConcatStep.ACCEPTING, + context, + }; const rangeStateStep = ({ out, state, context }, char, _index, _next) => { if (char === "}") { @@ -313,10 +327,6 @@ const transitionChars = new Set(Object.keys(TRANSITION_FNS)); const isTransition = char => transitionChars.has(char); -const operatorChars = new Set([...nonConcatOperators, "(", "."]); -const isOperator = char => - operatorChars.has(char); - const tokenizeRegexStep = chars => ({ out, state, context }, char, index) => { const next = chars[index + 1]; -- cgit v1.2.3