From 1ce80e005a374488c186d0f545af33096f6523d5 Mon Sep 17 00:00:00 2001 From: EuAndreh Date: Tue, 15 Jul 2025 21:37:16 -0300 Subject: Only tolerate escaping of special chars * src/paca.mjs (escapingStateStep): Return an error when escaping non-metacharacters. This way cases like \d, which is syntax for [0-9] which will eventually be recognized, will not change its behaviour from a noop escape of "d" to matching digits. (operatorChars, isOperator): Hoist both of these up before their usage in `escapingStateStep()`. * tests/paca.mjs (test_isOperator): Hoist its definition and position inside the `runTests([...])` array to match src/paca.mjs. (test_escapingStateStep): Adjust existing cases and add test case for good/bad escapes. (test_tokenizeRegexStep): Fix bad starting escape, that broke because it was escaping a non-metacharacter. --- tests/paca.mjs | 63 ++++++++++++++++++++++++++++++++++------------------------ 1 file changed, 37 insertions(+), 26 deletions(-) (limited to 'tests/paca.mjs') diff --git a/tests/paca.mjs b/tests/paca.mjs index bc6e1d0..789f959 100644 --- a/tests/paca.mjs +++ b/tests/paca.mjs @@ -5,6 +5,7 @@ import { ValueError, ConcatStep, shouldConcat, + isOperator, numFromDigits, escapingStateStep, rangeStateStep, @@ -13,7 +14,6 @@ import { ANCHOR_FNS, isAnchor, isTransition, - isOperator, tokenizeRegexStep, tokenizeRegexFn, tokenizeRegex, @@ -84,6 +84,25 @@ const test_shouldConcat = t => { }); }; +const test_isOperator = t => { + t.start("isOperator()"); + + t.testing("operators and open parens are true", () => { + t.assertEq(isOperator("*"), true); + t.assertEq(isOperator("|"), true); + t.assertEq(isOperator("+"), true); + t.assertEq(isOperator("?"), true); + t.assertEq(isOperator("("), true); + t.assertEq(isOperator(")"), true); + t.assertEq(isOperator("."), true); + }); + + t.testing("false for everyday non-meta chars", () => { + t.assertEq(isOperator("a"), false); + t.assertEq(isOperator("_"), false); + }); +}; + const test_numFromDigits = t => { t.start("numFromDigits()"); @@ -105,12 +124,12 @@ const test_escapingStateStep = t => { t.testing("add a concat when applicable", () => { const given = escapingStateStep( { out: [ 1, 2, 3 ] }, - "a", + "*", null, "b", ); const expected = { - out: [ 1, 2, 3, "a", { operator: "concat" } ], + out: [ 1, 2, 3, "*", { operator: "concat" } ], state: "accepting", context: undefined, }; @@ -120,17 +139,28 @@ const test_escapingStateStep = t => { t.testing("without a concat when not applicable", () => { const given = escapingStateStep( { out: [ 1, 2, 3 ] }, - "a", + "$", null, ")", ); const expected = { - out: [ 1, 2, 3, "a" ], + out: [ 1, 2, 3, "$" ], state: "accepting", context: undefined, }; t.assertEq(given, expected); }); + + t.testing("error when escaping a non-escapeable char", () => { + const { value: { error }} = escapingStateStep( + {}, + "a", + null, + null, + ); + t.assertEq(error.message, "unknown escape sequence: \\a"), + t.assertEq(error instanceof SyntaxError, true); + }); }; const test_rangeStateStep = t => { @@ -635,25 +665,6 @@ const test_isTransition = t => { }); }; -const test_isOperator = t => { - t.start("isOperator()"); - - t.testing("operators and open parens are true", () => { - t.assertEq(isOperator("*"), true); - t.assertEq(isOperator("|"), true); - t.assertEq(isOperator("+"), true); - t.assertEq(isOperator("?"), true); - t.assertEq(isOperator("("), true); - t.assertEq(isOperator(")"), true); - t.assertEq(isOperator("."), true); - }); - - t.testing("false for everyday non-meta chars", () => { - t.assertEq(isOperator("a"), false); - t.assertEq(isOperator("_"), false); - }); -}; - const test_tokenizeRegexStep = t => { t.start("tokenizeRegexStep()"); @@ -671,7 +682,7 @@ const test_tokenizeRegexStep = t => { const stepFn = tokenizeRegexStep(regex); const steps = [{ out: [], - state: ConcatStep.ESCAPING, + state: ConcatStep.ACCEPTING, context: null, }, { out: ["a", cat], @@ -3041,6 +3052,7 @@ const test_compile = t => { runTests([ test_shouldConcat, + test_isOperator, test_numFromDigits, test_escapingStateStep, test_rangeStateStep, @@ -3049,7 +3061,6 @@ runTests([ test_ANCHOR_FNS, test_isAnchor, test_isTransition, - test_isOperator, test_tokenizeRegexStep, test_tokenizeRegexFn, test_tokenizeRegex, -- cgit v1.2.3