summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEuAndreh <eu@euandre.org>2025-07-15 21:37:16 -0300
committerEuAndreh <eu@euandre.org>2025-07-15 21:44:55 -0300
commit1ce80e005a374488c186d0f545af33096f6523d5 (patch)
tree997f02f05a40d1fab2c9854dfdf5c687b8bb47c7
parentSupport tokenizing `.` wildcard operator. (diff)
downloadpaca-1ce80e005a374488c186d0f545af33096f6523d5.tar.gz
paca-1ce80e005a374488c186d0f545af33096f6523d5.tar.xz
Only tolerate escaping of special chars
* src/paca.mjs (escapingStateStep): Return an error when escaping non-metacharacters. This way cases like \d, which is syntax for [0-9] which will eventually be recognized, will not change its behaviour from a noop escape of "d" to matching digits. (operatorChars, isOperator): Hoist both of these up before their usage in `escapingStateStep()`. * tests/paca.mjs (test_isOperator): Hoist its definition and position inside the `runTests([...])` array to match src/paca.mjs. (test_escapingStateStep): Adjust existing cases and add test case for good/bad escapes. (test_tokenizeRegexStep): Fix bad starting escape, that broke because it was escaping a non-metacharacter.
-rw-r--r--src/paca.mjs34
-rw-r--r--tests/paca.mjs63
2 files changed, 59 insertions, 38 deletions
diff --git a/src/paca.mjs b/src/paca.mjs
index 5d11b05..7a01407 100644
--- a/src/paca.mjs
+++ b/src/paca.mjs
@@ -24,19 +24,33 @@ const shouldConcat = (char, next) =>
char !== "{" &&
!nonConcatOperators.has(next);
+const operatorChars = new Set([...nonConcatOperators, "(", "."]);
+const isOperator = char =>
+ operatorChars.has(char);
+
const numFromDigits = digits =>
digits.length === 0
? -1
: Number(digits.join(""));
-const escapingStateStep = ({ out, _state, context }, char, _index, next) => ({
- out: out.concat(
- char,
- shouldConcat(null, next) ? [{ operator: "concat" }] : [],
- ),
- state: ConcatStep.ACCEPTING,
- context,
-});
+const escapingStateStep = ({ out, state, context }, char, _index, next) =>
+ !(isOperator(char) || char === "\\")
+ ? reduced({
+ out,
+ state,
+ context,
+ error: new SyntaxError(
+ "unknown escape sequence: \\" + char,
+ ),
+ })
+ : {
+ out: out.concat(
+ char,
+ shouldConcat(null, next) ? [{ operator: "concat" }] : [],
+ ),
+ state: ConcatStep.ACCEPTING,
+ context,
+ };
const rangeStateStep = ({ out, state, context }, char, _index, _next) => {
if (char === "}") {
@@ -313,10 +327,6 @@ const transitionChars = new Set(Object.keys(TRANSITION_FNS));
const isTransition = char =>
transitionChars.has(char);
-const operatorChars = new Set([...nonConcatOperators, "(", "."]);
-const isOperator = char =>
- operatorChars.has(char);
-
const tokenizeRegexStep = chars => ({ out, state, context }, char, index) => {
const next = chars[index + 1];
diff --git a/tests/paca.mjs b/tests/paca.mjs
index bc6e1d0..789f959 100644
--- a/tests/paca.mjs
+++ b/tests/paca.mjs
@@ -5,6 +5,7 @@ import {
ValueError,
ConcatStep,
shouldConcat,
+ isOperator,
numFromDigits,
escapingStateStep,
rangeStateStep,
@@ -13,7 +14,6 @@ import {
ANCHOR_FNS,
isAnchor,
isTransition,
- isOperator,
tokenizeRegexStep,
tokenizeRegexFn,
tokenizeRegex,
@@ -84,6 +84,25 @@ const test_shouldConcat = t => {
});
};
+const test_isOperator = t => {
+ t.start("isOperator()");
+
+ t.testing("operators and open parens are true", () => {
+ t.assertEq(isOperator("*"), true);
+ t.assertEq(isOperator("|"), true);
+ t.assertEq(isOperator("+"), true);
+ t.assertEq(isOperator("?"), true);
+ t.assertEq(isOperator("("), true);
+ t.assertEq(isOperator(")"), true);
+ t.assertEq(isOperator("."), true);
+ });
+
+ t.testing("false for everyday non-meta chars", () => {
+ t.assertEq(isOperator("a"), false);
+ t.assertEq(isOperator("_"), false);
+ });
+};
+
const test_numFromDigits = t => {
t.start("numFromDigits()");
@@ -105,12 +124,12 @@ const test_escapingStateStep = t => {
t.testing("add a concat when applicable", () => {
const given = escapingStateStep(
{ out: [ 1, 2, 3 ] },
- "a",
+ "*",
null,
"b",
);
const expected = {
- out: [ 1, 2, 3, "a", { operator: "concat" } ],
+ out: [ 1, 2, 3, "*", { operator: "concat" } ],
state: "accepting",
context: undefined,
};
@@ -120,17 +139,28 @@ const test_escapingStateStep = t => {
t.testing("without a concat when not applicable", () => {
const given = escapingStateStep(
{ out: [ 1, 2, 3 ] },
- "a",
+ "$",
null,
")",
);
const expected = {
- out: [ 1, 2, 3, "a" ],
+ out: [ 1, 2, 3, "$" ],
state: "accepting",
context: undefined,
};
t.assertEq(given, expected);
});
+
+ t.testing("error when escaping a non-escapeable char", () => {
+ const { value: { error }} = escapingStateStep(
+ {},
+ "a",
+ null,
+ null,
+ );
+ t.assertEq(error.message, "unknown escape sequence: \\a"),
+ t.assertEq(error instanceof SyntaxError, true);
+ });
};
const test_rangeStateStep = t => {
@@ -635,25 +665,6 @@ const test_isTransition = t => {
});
};
-const test_isOperator = t => {
- t.start("isOperator()");
-
- t.testing("operators and open parens are true", () => {
- t.assertEq(isOperator("*"), true);
- t.assertEq(isOperator("|"), true);
- t.assertEq(isOperator("+"), true);
- t.assertEq(isOperator("?"), true);
- t.assertEq(isOperator("("), true);
- t.assertEq(isOperator(")"), true);
- t.assertEq(isOperator("."), true);
- });
-
- t.testing("false for everyday non-meta chars", () => {
- t.assertEq(isOperator("a"), false);
- t.assertEq(isOperator("_"), false);
- });
-};
-
const test_tokenizeRegexStep = t => {
t.start("tokenizeRegexStep()");
@@ -671,7 +682,7 @@ const test_tokenizeRegexStep = t => {
const stepFn = tokenizeRegexStep(regex);
const steps = [{
out: [],
- state: ConcatStep.ESCAPING,
+ state: ConcatStep.ACCEPTING,
context: null,
}, {
out: ["a", cat],
@@ -3041,6 +3052,7 @@ const test_compile = t => {
runTests([
test_shouldConcat,
+ test_isOperator,
test_numFromDigits,
test_escapingStateStep,
test_rangeStateStep,
@@ -3049,7 +3061,6 @@ runTests([
test_ANCHOR_FNS,
test_isAnchor,
test_isTransition,
- test_isOperator,
test_tokenizeRegexStep,
test_tokenizeRegexFn,
test_tokenizeRegex,