summaryrefslogtreecommitdiff
path: root/src/paca.mjs
diff options
context:
space:
mode:
authorEuAndreh <eu@euandre.org>2025-07-15 21:37:16 -0300
committerEuAndreh <eu@euandre.org>2025-07-15 21:44:55 -0300
commit1ce80e005a374488c186d0f545af33096f6523d5 (patch)
tree997f02f05a40d1fab2c9854dfdf5c687b8bb47c7 /src/paca.mjs
parentSupport tokenizing `.` wildcard operator. (diff)
downloadpaca-1ce80e005a374488c186d0f545af33096f6523d5.tar.gz
paca-1ce80e005a374488c186d0f545af33096f6523d5.tar.xz
Only tolerate escaping of special chars
* src/paca.mjs (escapingStateStep): Return an error when escaping non-metacharacters. This way cases like \d, which is syntax for [0-9] which will eventually be recognized, will not change its behaviour from a noop escape of "d" to matching digits. (operatorChars, isOperator): Hoist both of these up before their usage in `escapingStateStep()`. * tests/paca.mjs (test_isOperator): Hoist its definition and position inside the `runTests([...])` array to match src/paca.mjs. (test_escapingStateStep): Adjust existing cases and add test case for good/bad escapes. (test_tokenizeRegexStep): Fix bad starting escape, that broke because it was escaping a non-metacharacter.
Diffstat (limited to 'src/paca.mjs')
-rw-r--r--src/paca.mjs34
1 files changed, 22 insertions, 12 deletions
diff --git a/src/paca.mjs b/src/paca.mjs
index 5d11b05..7a01407 100644
--- a/src/paca.mjs
+++ b/src/paca.mjs
@@ -24,19 +24,33 @@ const shouldConcat = (char, next) =>
char !== "{" &&
!nonConcatOperators.has(next);
+const operatorChars = new Set([...nonConcatOperators, "(", "."]);
+const isOperator = char =>
+ operatorChars.has(char);
+
const numFromDigits = digits =>
digits.length === 0
? -1
: Number(digits.join(""));
-const escapingStateStep = ({ out, _state, context }, char, _index, next) => ({
- out: out.concat(
- char,
- shouldConcat(null, next) ? [{ operator: "concat" }] : [],
- ),
- state: ConcatStep.ACCEPTING,
- context,
-});
+const escapingStateStep = ({ out, state, context }, char, _index, next) =>
+ !(isOperator(char) || char === "\\")
+ ? reduced({
+ out,
+ state,
+ context,
+ error: new SyntaxError(
+ "unknown escape sequence: \\" + char,
+ ),
+ })
+ : {
+ out: out.concat(
+ char,
+ shouldConcat(null, next) ? [{ operator: "concat" }] : [],
+ ),
+ state: ConcatStep.ACCEPTING,
+ context,
+ };
const rangeStateStep = ({ out, state, context }, char, _index, _next) => {
if (char === "}") {
@@ -313,10 +327,6 @@ const transitionChars = new Set(Object.keys(TRANSITION_FNS));
const isTransition = char =>
transitionChars.has(char);
-const operatorChars = new Set([...nonConcatOperators, "(", "."]);
-const isOperator = char =>
- operatorChars.has(char);
-
const tokenizeRegexStep = chars => ({ out, state, context }, char, index) => {
const next = chars[index + 1];