src/paca.mjs (tokenizeRegexStep): Simplify body

When handling a custom state, dispatch it to the appropriate function in `STATE_FNS`; and when looking for chars that enters these custom states, dispatch it to the appropriate function in `TRANSITION_FNS`. The body of each part didn't change, so no tests had to be modified. But now we can write specific tests for each case, and remove the bulk of the logic out of `tokenizeRegexFn()`.
author: EuAndreh <eu@euandre.org> 2025-07-11 21:49:57 -0300
committer: EuAndreh <eu@euandre.org> 2025-07-11 21:50:04 -0300
commit: 42001c72522293fe9ba2ba17901eda89add6f3fa (patch)
tree: 5f513170bb984f22e092c6aeb36d859ffaaf5040 /src
parent: tests/paca.mjs (test_tokenizeRegexStep): Simplify table values (diff)
download: paca-42001c72522293fe9ba2ba17901eda89add6f3fa.tar.gz
paca-42001c72522293fe9ba2ba17901eda89add6f3fa.tar.xz
1 files changed, 127 insertions, 105 deletions
diff --git a/src/paca.mjs b/src/paca.mjs
index b30d4cd..e8ec542 100644
--- a/src/paca.mjs
+++ b/src/paca.mjs
@@ -12,133 +12,155 @@ const ConcatStep = {
 	RANGE:     "range",
 };
 
-const nonConcatOperators = new Set(["*", "+", "?", "|", ")"]);
-
-const shouldConcat = (char, next) =>
-	next !== undefined &&
-	char !== "(" &&
-	char !== "|" &&
-	char !== "{" &&
-	!nonConcatOperators.has(next);
-
-const isOperator = char =>
-	nonConcatOperators.has(char) || char == "(";
-
-const numFromDigits = digits =>
-	digits.length === 0
-		? -1
-		: Number(digits.join(""));
+const escapingStateStep = ({ out, state, context }, char, index, next) => ({
+	out: out.concat(
+		char,
+		next !== undefined ? {operator: "concat"} : [],
+	),
+	state: ConcatStep.ACCEPTING,
+	context,
+});
 
-const tokenizeRegexStep = chars => ({ out, state, context }, char, index) => {
-	const next = chars[index + 1];
+const rangeStateStep = ({ out, state, context }, char, index, next) => {
+	if (char === "}") {
+		if (context.where !== "to") {
+			return reduced({
+				out,
+				state,
+				context,
+				error: new SyntaxError(
+					"missing comma in range operator",
+				),
+			});
+		}
 
-	if (state === ConcatStep.ESCAPING) {
+		const from = numFromDigits(context.from);
+		const to   = numFromDigits(context.to);
+		if (from > to && to != -1) {
+			return reduced({
+				out,
+				state,
+				context,
+				error: new Error(
+					`bad range values: {${from},${to}}`,
+				),
+			});
+		}
 		return {
-			out: out.concat(
-				char,
-				next !== undefined ? {operator: "concat"} : [],
-			),
+			out: out.concat({
+				operator: "range",
+				from,
+				to,
+				}),
 			state: ConcatStep.ACCEPTING,
-			context,
+			context: null,
 		};
 	}
 
-	if (state === ConcatStep.RANGE) {
-		if (char === "}") {
-			if (context.where !== "to") {
-				return reduced({
-					out,
-					state,
-					context,
-					error: new SyntaxError(
-						"missing comma in range operator",
-					),
-				});
-			}
-
-			const from = numFromDigits(context.from);
-			const to   = numFromDigits(context.to);
-			if (from > to && to != -1) {
-				return reduced({
-					out,
-					state,
-					context,
-					error: new Error(`bad range values: {${from},${to}}`),
-				});
-			}
-			return {
-				out: out.concat({
-					operator: "range",
-					from,
-					to,
-				}),
-				state: ConcatStep.ACCEPTING,
-				context: null,
-			};
-		}
-
-		if (char === ",") {
-			if (context.where === "to") {
-				return reduced({
-					out,
-					state,
-					context,
-					error: new SyntaxError(
-						"extraneuos comma in range expression",
-					),
-				});
-			} else {
-				return {
-					out,
-					state,
-					context: {
-						...context,
-						where: "to",
-					},
-				};
-			}
-		}
-
-		if (!isNumeric(char)) {
+	if (char === ",") {
+		if (context.where === "to") {
 			return reduced({
 				out,
 				state,
 				context,
 				error: new SyntaxError(
-					"bad char in range expression: " +
-						char,
+					"extraneuos comma in range expression",
 				),
 			});
+		} else {
+			return {
+				out,
+				state,
+				context: {
+					...context,
+					where: "to",
+				},
+			};
 		}
+	}
 
-		return {
+	if (!isNumeric(char)) {
+		return reduced({
 			out,
 			state,
-			context: {
-				...context,
-				[context.where]: context[context.where].concat(char),
-			},
-		};
+			context,
+			error: new SyntaxError(
+				"bad char in range expression: " +
+					char,
+			),
+		});
 	}
 
-	if (char === "\\") {
-		return {
-			out,
-			state: ConcatStep.ESCAPING,
-			context,
-		};
+	return {
+		out,
+		state,
+		context: {
+			...context,
+			[context.where]: context[context.where].concat(char),
+		},
+	};
+};
+
+const STATE_FNS = {
+	[ConcatStep.ESCAPING]: escapingStateStep,
+	[ConcatStep.RANGE   ]:    rangeStateStep,
+};
+
+const TRANSITION_FNS = {
+	"\\": ({ out, state, context }, char, index, next) => ({
+		out,
+		state: ConcatStep.ESCAPING,
+		context,
+	}),
+	"{": ({ out, state, context }, char, index, next) => ({
+		out,
+		state: ConcatStep.RANGE,
+		context: {
+			from:  [],
+			to:    [],
+			where: "from",
+		},
+	}),
+};
+
+const stateTransitionOperators = new Set(Object.keys(TRANSITION_FNS));
+
+const nonConcatOperators = new Set(["*", "+", "?", "|", ")"]);
+
+const shouldConcat = (char, next) =>
+	next !== undefined &&
+	char !== "(" &&
+	char !== "|" &&
+	char !== "{" &&
+	!nonConcatOperators.has(next);
+
+const isOperator = char =>
+	nonConcatOperators.has(char) || char == "(";
+
+const numFromDigits = digits =>
+	digits.length === 0
+		? -1
+		: Number(digits.join(""));
+
+const tokenizeRegexStep = chars => ({ out, state, context }, char, index) => {
+	const next = chars[index + 1];
+
+	if (state !== ConcatStep.ACCEPTING) {
+		return STATE_FNS[state](
+			{ out, state, context },
+			char,
+			index,
+			next,
+		);
 	}
 
-	if (char === "{") {
-		return {
-			out,
-			state: ConcatStep.RANGE,
-			context: {
-				from:  [],
-				to:    [],
-				where: "from",
-			},
-		};
+	if (stateTransitionOperators.has(char)) {
+		return TRANSITION_FNS[char](
+			{ out, state, context },
+			char,
+			index,
+			next,
+		);
 	}
 
 	const op = isOperator(char) ? { operator: char } : char;
author	EuAndreh <eu@euandre.org>	2025-07-11 21:49:57 -0300
committer	EuAndreh <eu@euandre.org>	2025-07-11 21:50:04 -0300
commit	42001c72522293fe9ba2ba17901eda89add6f3fa (patch)
tree	5f513170bb984f22e092c6aeb36d859ffaaf5040 /src
parent	tests/paca.mjs (test_tokenizeRegexStep): Simplify table values (diff)
download	paca-42001c72522293fe9ba2ba17901eda89add6f3fa.tar.gz paca-42001c72522293fe9ba2ba17901eda89add6f3fa.tar.xz