Support tokenizing `^` and `$` anchors

* src/paca.mjs (ANCHOR_FNS): Add simple handlers for ^ and $ anchors, that only look for the position of the character in the pattern as validation during tokenization. (isAnchor): Add simple boolean function to identify anchor characters. (tokenizeRegexStep): Include check if character `isAnchor()`, and call the appropriate `ANCHOR_FNS[char]` when true. * tests/paca.mjs (test_ANCHOR_FNS): Add test with 4 cases - 2 for success and 2 for errors for ^ and $. (test_isAnchor): Add obligatory simple test cases. (test_tokenizeRegexStep): Include test case for tokenizing patterns with character class.
author: EuAndreh <eu@euandre.org> 2025-07-15 20:43:57 -0300
committer: EuAndreh <eu@euandre.org> 2025-07-15 20:43:57 -0300
commit: 43946ca0c9e19f904b7f763b9dc590d8095e6472 (patch)
tree: a5549d003fff4459876b23539e428fc42c007508 /tests/paca.mjs
parent: tests/paca.mjs (test_shouldConcat): Also hoist import, definition and positio... (diff)
download: paca-43946ca0c9e19f904b7f763b9dc590d8095e6472.tar.gz
paca-43946ca0c9e19f904b7f763b9dc590d8095e6472.tar.xz
1 files changed, 194 insertions, 0 deletions
diff --git a/tests/paca.mjs b/tests/paca.mjs
index 14a6cf2..134fb77 100644
--- a/tests/paca.mjs
+++ b/tests/paca.mjs
@@ -10,6 +10,8 @@ import {
 	rangeStateStep,
 	classStateStep,
 	TRANSITION_FNS,
+	ANCHOR_FNS,
+	isAnchor,
 	isOperator,
 	tokenizeRegexStep,
 	tokenizeRegexFn,
@@ -552,6 +554,70 @@ const test_TRANSITION_FNS = t => {
 	});
 };
 
+const test_ANCHOR_FNS = t => {
+	t.start("ANCHOR_FNS");
+
+	t.testing(`"^" error when not the first char`, () => {
+		const { value: { error }} = ANCHOR_FNS["^"]({}, null, 1, null);
+		const message = "^ not at the start of the expression"
+		t.assertEq(error.message, message);
+		t.assertEq(error instanceof SyntaxError, true);
+	});
+
+	t.testing(`"$" error when not the last char`, () => {
+		const { value: { error }} = ANCHOR_FNS["$"](
+			{},
+			null,
+			null,
+			"a",
+		);
+		const message = "$ not at the end of the expression";
+		t.assertEq(error.message, message);
+		t.assertEq(error instanceof SyntaxError, true);
+	});
+
+	t.testing("caret operator gets added to output", () => {
+		const given = ANCHOR_FNS["^"]({ out: [ 1 ] }, null, 0, null);
+		const expected = {
+			out:     [ 1, { operator: "caret" } ],
+			state:   undefined,
+			context: undefined,
+		};
+		t.assertEq(given, expected);
+	});
+
+	t.testing("dollar operator gets added to output", () => {
+		const given = ANCHOR_FNS["$"](
+			{ out: [ 2 ] },
+			null,
+			null,
+			undefined,
+		);
+		const expected = {
+			out:     [ 2, { operator: "dollar" } ],
+			state:   undefined,
+			context: undefined,
+		};
+		t.assertEq(given, expected);
+	});
+};
+
+const test_isAnchor = t => {
+	t.start("isAnchor()");
+
+	t.testing("anchors are true", () => {
+		t.assertEq(isAnchor("^"), true);
+		t.assertEq(isAnchor("$"), true);
+	});
+
+	t.testing("false for everything else", () => {
+		t.assertEq(isAnchor("*"),  false);
+		t.assertEq(isAnchor("\\"), false);
+		t.assertEq(isAnchor("a"),  false);
+		t.assertEq(isAnchor("_"),  false);
+	});
+};
+
 const test_isOperator = t => {
 	t.start("isOperator()");
 
@@ -578,6 +644,8 @@ const test_tokenizeRegexStep = t => {
 	const oparen = { operator: "(" };
 	const cparen = { operator: ")" };
 	const star   = { operator: "*" };
+	const caret  = { operator: "caret"  };
+	const dollar = { operator: "dollar" };
 
 
 	t.testing("when escaping we get whatever the char is", () => {
@@ -722,6 +790,130 @@ const test_tokenizeRegexStep = t => {
 		t.assertEq(given, steps);
 	});
 
+	t.testing("anchors get detected as such", () => {
+		const regex = "^[behilos]*$";
+		const stepFn = tokenizeRegexStep(regex);
+		const steps = [{
+			out:     [],
+			state:   ConcatStep.ACCEPTING,
+			context: null,
+		}, {
+			out:     [caret],
+			state:   ConcatStep.ACCEPTING,
+			context: null,
+		}, {
+			out:     [caret],
+			state:   ConcatStep.CLASS,
+			context: {
+				range: {
+					from:  null,
+					where: "from",
+				},
+				set: [],
+			},
+		}, {
+			out:     [caret],
+			state:   ConcatStep.CLASS,
+			context: {
+				range: {
+					from:  null,
+					where: "from",
+				},
+				set: ["b"],
+			},
+		}, {
+			out:     [caret],
+			state:   ConcatStep.CLASS,
+			context: {
+				range: {
+					from:  null,
+					where: "from",
+				},
+				set: ["b", "e"],
+			},
+		}, {
+			out:     [caret],
+			state:   ConcatStep.CLASS,
+			context: {
+				range: {
+					from:  null,
+					where: "from",
+				},
+				set: ["b", "e", "h"],
+			},
+		}, {
+			out:     [caret],
+			state:   ConcatStep.CLASS,
+			context: {
+				range: {
+					from:  null,
+					where: "from",
+				},
+				set: ["b", "e", "h", "i"],
+			},
+		}, {
+			out:     [caret],
+			state:   ConcatStep.CLASS,
+			context: {
+				range: {
+					from:  null,
+					where: "from",
+				},
+				set: ["b", "e", "h", "i", "l"],
+			},
+		}, {
+			out:     [caret],
+			state:   ConcatStep.CLASS,
+			context: {
+				range: {
+					from:  null,
+					where: "from",
+				},
+				set: ["b", "e", "h", "i", "l", "o"],
+			},
+		}, {
+			out:     [caret],
+			state:   ConcatStep.CLASS,
+			context: {
+				range: {
+					from:  null,
+					where: "from",
+				},
+				set: ["b", "e", "h", "i", "l", "o", "s"],
+			},
+		}, {
+			out: [caret, {
+				operator: "class",
+				set: [ "b", "e", "h", "i", "l", "o", "s" ],
+			}],
+			state:   "accepting",
+			context: null,
+		}, {
+			out: [caret, {
+				operator: "class",
+				set: [ "b", "e", "h", "i", "l", "o", "s" ],
+			}, star],
+			state:   "accepting",
+			context: null,
+		}, {
+			out: [caret, {
+				operator: "class",
+				set: [ "b", "e", "h", "i", "l", "o", "s" ],
+			}, star, dollar],
+			state:   "accepting",
+			context: null,
+		}];
+		const given = reductions(
+			steps,
+			(acc, el, i) => {
+				const ret = stepFn(acc, regex[i], i);
+				t.assertEq(ret, el);
+				return ret;
+			},
+		);
+		t.assertEq(given, steps);
+	});
+
 	t.testing("multichar range operator {m,n} is parsed right", () => {
 		const table = [{
 			regex: "a{1,2}",
@@ -2836,6 +3028,8 @@ runTests([
 	test_rangeStateStep,
 	test_classStateStep,
 	test_TRANSITION_FNS,
+	test_ANCHOR_FNS,
+	test_isAnchor,
 	test_isOperator,
 	test_tokenizeRegexStep,
 	test_tokenizeRegexFn,
author	EuAndreh <eu@euandre.org>	2025-07-15 20:43:57 -0300
committer	EuAndreh <eu@euandre.org>	2025-07-15 20:43:57 -0300
commit	43946ca0c9e19f904b7f763b9dc590d8095e6472 (patch)
tree	a5549d003fff4459876b23539e428fc42c007508 /tests/paca.mjs
parent	tests/paca.mjs (test_shouldConcat): Also hoist import, definition and positio... (diff)
download	paca-43946ca0c9e19f904b7f763b9dc590d8095e6472.tar.gz paca-43946ca0c9e19f904b7f763b9dc590d8095e6472.tar.xz