summaryrefslogtreecommitdiff
path: root/src/paca.mjs
diff options
context:
space:
mode:
authorEuAndreh <eu@euandre.org>2025-07-11 15:41:22 -0300
committerEuAndreh <eu@euandre.org>2025-07-11 15:41:22 -0300
commitfc67be3d926d21194fca5e8ff733e0921f6e141c (patch)
tree6b67f16d2687ce2e3d5103f21102b6668e22aa6a /src/paca.mjs
parentsrc/paca.mjs (tokenizeRegexStep): Include `context` key in reduced state (diff)
downloadpaca-fc67be3d926d21194fca5e8ff733e0921f6e141c.tar.gz
paca-fc67be3d926d21194fca5e8ff733e0921f6e141c.tar.xz
src/paca.mjs (tokenizeRegexStep): Support tokenizing range exps {m,n}
Diffstat (limited to '')
-rw-r--r--src/paca.mjs100
1 files changed, 98 insertions, 2 deletions
diff --git a/src/paca.mjs b/src/paca.mjs
index e68cd77..53a9c11 100644
--- a/src/paca.mjs
+++ b/src/paca.mjs
@@ -1,5 +1,5 @@
import {
- butlast, explode, last, mapValues, max, reduce, reduced,
+ butlast, explode, isNumeric, last, mapValues, max, reduce, reduced,
} from "sjs";
@@ -9,6 +9,7 @@ export class SyntaxError extends Error {}
const ConcatStep = {
ACCEPTING: "accepting",
ESCAPING: "escaping",
+ RANGE: "range",
};
const nonConcatOperators = new Set(["*", "+", "?", "|", ")"]);
@@ -23,6 +24,11 @@ const shouldConcat = (char, next) =>
const isOperator = char =>
nonConcatOperators.has(char) || char == "(";
+const numFromDigits = digits =>
+ digits.length === 0
+ ? -1
+ : Number(digits.join(""));
+
const tokenizeRegexStep = chars => ({ out, state, context }, char, index) => {
const next = chars[index + 1];
const maybeConcat = shouldConcat(char, next)
@@ -37,6 +43,84 @@ const tokenizeRegexStep = chars => ({ out, state, context }, char, index) => {
};
}
+ if (state === ConcatStep.RANGE) {
+ if (char === "}") {
+ if (context.where !== "to") {
+ return reduced({
+ out,
+ state,
+ context,
+ error: new SyntaxError(
+ "missing comma in range operator",
+ ),
+ });
+ }
+
+ const from = numFromDigits(context.from);
+ const to = numFromDigits(context.to);
+ if (from > to && to != -1) {
+ return reduced({
+ out,
+ state,
+ context,
+ error: new Error(`bad range values: {${from},${to}}`),
+ });
+ }
+ return {
+ out: out.concat({
+ operator: "range",
+ from,
+ to,
+ }),
+ state: ConcatStep.ACCEPTING,
+ context: null,
+ };
+ }
+
+ if (char === ",") {
+ if (context.where === "to") {
+ return reduced({
+ out,
+ state,
+ context,
+ error: new SyntaxError(
+ "extraneuos comma in range expression",
+ ),
+ });
+ } else {
+ return {
+ out,
+ state,
+ context: {
+ ...context,
+ where: "to",
+ },
+ };
+ }
+ }
+
+ if (!isNumeric(char)) {
+ return reduced({
+ out,
+ state,
+ context,
+ error: new SyntaxError(
+ "bad char in range expression: " +
+ char,
+ ),
+ });
+ }
+
+ return {
+ out,
+ state,
+ context: {
+ ...context,
+ [context.where]: context[context.where].concat(char),
+ },
+ };
+ }
+
if (char === "\\") {
return {
out,
@@ -45,6 +129,18 @@ const tokenizeRegexStep = chars => ({ out, state, context }, char, index) => {
};
}
+ if (char === "{") {
+ return {
+ out,
+ state: ConcatStep.RANGE,
+ context: {
+ from: [],
+ to: [],
+ where: "from",
+ },
+ };
+ }
+
const op = isOperator(char) ? { operator: char } : char;
return {
out: out.concat(op, maybeConcat),
@@ -54,7 +150,7 @@ const tokenizeRegexStep = chars => ({ out, state, context }, char, index) => {
};
const tokenizeRegexFn = chars =>
- chars.reduce(tokenizeRegexStep(chars), {
+ reduce(chars, tokenizeRegexStep(chars), {
out: [],
state: ConcatStep.ACCEPTING,
context: null,