summaryrefslogtreecommitdiff
path: root/src/paca.mjs
diff options
context:
space:
mode:
authorEuAndreh <eu@euandre.org>2025-07-16 07:18:42 -0300
committerEuAndreh <eu@euandre.org>2025-07-16 07:18:42 -0300
commit5d135ee551fa29574cbd558b4feaa46328d54bd4 (patch)
treed2a3d79b563227b92d40b9184c1c70f9f2185be3 /src/paca.mjs
parentOnly tolerate escaping of special chars (diff)
downloadpaca-5d135ee551fa29574cbd558b4feaa46328d54bd4.tar.gz
paca-5d135ee551fa29574cbd558b4feaa46328d54bd4.tar.xz
Differentiate an "operator" from a "meta" character
The character class `[a-z]`, and specially the wildcard `.`, aren't operators: they really do represent themselves with their own special semantics, and they take no operands. So instead of have the "operator" type behave in two ways, with and without arguments, we instead have this new type, the "meta" character. In equivalence to the literal character, the metacharacter represents itself, and also takes no argument. We also can not touch the precedence parsing of operators by tainting it with special conditions for "." and "class", since they should behave just like literal characters: be pushed directly onto the stack. As of now, there are only 2 meta characters: "class" and ".". * src/paca.mjs (operatorChars): Remove "." from the set of operator characters. (classStateStep): Return `{ meta: "class" }` instead of `{ operator: "class" }`. (isMeta): Add equivalent to `isTransition()` and `isOperator()`. (opFor, tokenizeRegexStep): Add new `opFor()` function for classifying a given character, choosing between an operator, a metacharacter and a literal character, and use this function in the body of `tokenizeRegexStep()`. (PRECEDENCE): Remove early entry of precedence values for "class" and ".". (toPostfixStep): Instead of just checking if a character is a literal one before pushing it onto the stack, check that it isn't an operator just by checking if it is an object that has the `operator` attribute. * tests/paca.mjs (test_isOperator): Remove test case for ".", as it is no longer considered an operator. (classStateStep): Update to rename from `{ operator: "class" }` to `{ meta: "class" }`. (test_toPostfixStep, test_toPostfix): Add test cases for meta characters. (test_OPERATOR_FNS): BONUS - Use direct assignment to reset the array to an empty value instead of `arr.splice(0)`.
Diffstat (limited to 'src/paca.mjs')
-rw-r--r--src/paca.mjs22
1 files changed, 14 insertions, 8 deletions
diff --git a/src/paca.mjs b/src/paca.mjs
index 7a01407..ccff1ca 100644
--- a/src/paca.mjs
+++ b/src/paca.mjs
@@ -24,7 +24,7 @@ const shouldConcat = (char, next) =>
char !== "{" &&
!nonConcatOperators.has(next);
-const operatorChars = new Set([...nonConcatOperators, "(", "."]);
+const operatorChars = new Set([...nonConcatOperators, "("]);
const isOperator = char =>
operatorChars.has(char);
@@ -167,8 +167,8 @@ const classStateStep = ({ out, state, context }, char, _index, _next) => {
return {
out: out.concat({
- operator: "class",
- set: context.set,
+ meta: "class",
+ set: context.set,
}),
state: ConcatStep.ACCEPTING,
context: null,
@@ -327,6 +327,15 @@ const transitionChars = new Set(Object.keys(TRANSITION_FNS));
const isTransition = char =>
transitionChars.has(char);
+const metaChars = new Set(["."]);
+const isMeta = char =>
+ metaChars.has(char);
+
+const opFor = char =>
+ isOperator(char) ? { operator: char }
+ : isMeta(char) ? { meta: char }
+ : char;
+
const tokenizeRegexStep = chars => ({ out, state, context }, char, index) => {
const next = chars[index + 1];
@@ -357,10 +366,9 @@ const tokenizeRegexStep = chars => ({ out, state, context }, char, index) => {
);
}
- const op = isOperator(char) ? { operator: char } : char;
return {
out: out.concat(
- op,
+ opFor(char),
shouldConcat(char, next)
? [{ operator: "concat" }]
: [],
@@ -398,8 +406,6 @@ const PRECEDENCE = {
"range": 3,
"concat": 2,
"|": 1,
- "class": 1,
- ".": 1,
};
const shouldPush = (stack, token) =>
@@ -414,7 +420,7 @@ const findLowerPrecedenceItem = (stack, token) =>
);
const toPostfixStep = ({ out, stack }, token, _index, tokens) => {
- if (typeof token === "string") {
+ if (!token.operator) {
return {
out: out.concat(token),
stack,