summaryrefslogtreecommitdiff
path: root/tests
diff options
context:
space:
mode:
authorEuAndreh <eu@euandre.org>2025-07-15 20:43:57 -0300
committerEuAndreh <eu@euandre.org>2025-07-15 20:43:57 -0300
commit43946ca0c9e19f904b7f763b9dc590d8095e6472 (patch)
treea5549d003fff4459876b23539e428fc42c007508 /tests
parenttests/paca.mjs (test_shouldConcat): Also hoist import, definition and positio... (diff)
downloadpaca-43946ca0c9e19f904b7f763b9dc590d8095e6472.tar.gz
paca-43946ca0c9e19f904b7f763b9dc590d8095e6472.tar.xz
Support tokenizing `^` and `$` anchors
* src/paca.mjs (ANCHOR_FNS): Add simple handlers for ^ and $ anchors, that only look for the position of the character in the pattern as validation during tokenization. (isAnchor): Add simple boolean function to identify anchor characters. (tokenizeRegexStep): Include check if character `isAnchor()`, and call the appropriate `ANCHOR_FNS[char]` when true. * tests/paca.mjs (test_ANCHOR_FNS): Add test with 4 cases - 2 for success and 2 for errors for ^ and $. (test_isAnchor): Add obligatory simple test cases. (test_tokenizeRegexStep): Include test case for tokenizing patterns with character class.
Diffstat (limited to '')
-rw-r--r--tests/paca.mjs194
1 files changed, 194 insertions, 0 deletions
diff --git a/tests/paca.mjs b/tests/paca.mjs
index 14a6cf2..134fb77 100644
--- a/tests/paca.mjs
+++ b/tests/paca.mjs
@@ -10,6 +10,8 @@ import {
rangeStateStep,
classStateStep,
TRANSITION_FNS,
+ ANCHOR_FNS,
+ isAnchor,
isOperator,
tokenizeRegexStep,
tokenizeRegexFn,
@@ -552,6 +554,70 @@ const test_TRANSITION_FNS = t => {
});
};
+const test_ANCHOR_FNS = t => {
+ t.start("ANCHOR_FNS");
+
+ t.testing(`"^" error when not the first char`, () => {
+ const { value: { error }} = ANCHOR_FNS["^"]({}, null, 1, null);
+ const message = "^ not at the start of the expression"
+ t.assertEq(error.message, message);
+ t.assertEq(error instanceof SyntaxError, true);
+ });
+
+ t.testing(`"$" error when not the last char`, () => {
+ const { value: { error }} = ANCHOR_FNS["$"](
+ {},
+ null,
+ null,
+ "a",
+ );
+ const message = "$ not at the end of the expression";
+ t.assertEq(error.message, message);
+ t.assertEq(error instanceof SyntaxError, true);
+ });
+
+ t.testing("caret operator gets added to output", () => {
+ const given = ANCHOR_FNS["^"]({ out: [ 1 ] }, null, 0, null);
+ const expected = {
+ out: [ 1, { operator: "caret" } ],
+ state: undefined,
+ context: undefined,
+ };
+ t.assertEq(given, expected);
+ });
+
+ t.testing("dollar operator gets added to output", () => {
+ const given = ANCHOR_FNS["$"](
+ { out: [ 2 ] },
+ null,
+ null,
+ undefined,
+ );
+ const expected = {
+ out: [ 2, { operator: "dollar" } ],
+ state: undefined,
+ context: undefined,
+ };
+ t.assertEq(given, expected);
+ });
+};
+
+const test_isAnchor = t => {
+ t.start("isAnchor()");
+
+ t.testing("anchors are true", () => {
+ t.assertEq(isAnchor("^"), true);
+ t.assertEq(isAnchor("$"), true);
+ });
+
+ t.testing("false for everything else", () => {
+ t.assertEq(isAnchor("*"), false);
+ t.assertEq(isAnchor("\\"), false);
+ t.assertEq(isAnchor("a"), false);
+ t.assertEq(isAnchor("_"), false);
+ });
+};
+
const test_isOperator = t => {
t.start("isOperator()");
@@ -578,6 +644,8 @@ const test_tokenizeRegexStep = t => {
const oparen = { operator: "(" };
const cparen = { operator: ")" };
const star = { operator: "*" };
+ const caret = { operator: "caret" };
+ const dollar = { operator: "dollar" };
t.testing("when escaping we get whatever the char is", () => {
@@ -722,6 +790,130 @@ const test_tokenizeRegexStep = t => {
t.assertEq(given, steps);
});
+ t.testing("anchors get detected as such", () => {
+ const regex = "^[behilos]*$";
+ const stepFn = tokenizeRegexStep(regex);
+ const steps = [{
+ out: [],
+ state: ConcatStep.ACCEPTING,
+ context: null,
+ }, {
+ out: [caret],
+ state: ConcatStep.ACCEPTING,
+ context: null,
+ }, {
+ out: [caret],
+ state: ConcatStep.CLASS,
+ context: {
+ range: {
+ from: null,
+ where: "from",
+ },
+ set: [],
+ },
+ }, {
+ out: [caret],
+ state: ConcatStep.CLASS,
+ context: {
+ range: {
+ from: null,
+ where: "from",
+ },
+ set: ["b"],
+ },
+ }, {
+ out: [caret],
+ state: ConcatStep.CLASS,
+ context: {
+ range: {
+ from: null,
+ where: "from",
+ },
+ set: ["b", "e"],
+ },
+ }, {
+ out: [caret],
+ state: ConcatStep.CLASS,
+ context: {
+ range: {
+ from: null,
+ where: "from",
+ },
+ set: ["b", "e", "h"],
+ },
+ }, {
+ out: [caret],
+ state: ConcatStep.CLASS,
+ context: {
+ range: {
+ from: null,
+ where: "from",
+ },
+ set: ["b", "e", "h", "i"],
+ },
+ }, {
+ out: [caret],
+ state: ConcatStep.CLASS,
+ context: {
+ range: {
+ from: null,
+ where: "from",
+ },
+ set: ["b", "e", "h", "i", "l"],
+ },
+ }, {
+ out: [caret],
+ state: ConcatStep.CLASS,
+ context: {
+ range: {
+ from: null,
+ where: "from",
+ },
+ set: ["b", "e", "h", "i", "l", "o"],
+ },
+ }, {
+ out: [caret],
+ state: ConcatStep.CLASS,
+ context: {
+ range: {
+ from: null,
+ where: "from",
+ },
+ set: ["b", "e", "h", "i", "l", "o", "s"],
+ },
+ }, {
+ out: [caret, {
+ operator: "class",
+ set: [ "b", "e", "h", "i", "l", "o", "s" ],
+ }],
+ state: "accepting",
+ context: null,
+ }, {
+ out: [caret, {
+ operator: "class",
+ set: [ "b", "e", "h", "i", "l", "o", "s" ],
+ }, star],
+ state: "accepting",
+ context: null,
+ }, {
+ out: [caret, {
+ operator: "class",
+ set: [ "b", "e", "h", "i", "l", "o", "s" ],
+ }, star, dollar],
+ state: "accepting",
+ context: null,
+ }];
+ const given = reductions(
+ steps,
+ (acc, el, i) => {
+ const ret = stepFn(acc, regex[i], i);
+ t.assertEq(ret, el);
+ return ret;
+ },
+ );
+ t.assertEq(given, steps);
+ });
+
t.testing("multichar range operator {m,n} is parsed right", () => {
const table = [{
regex: "a{1,2}",
@@ -2836,6 +3028,8 @@ runTests([
test_rangeStateStep,
test_classStateStep,
test_TRANSITION_FNS,
+ test_ANCHOR_FNS,
+ test_isAnchor,
test_isOperator,
test_tokenizeRegexStep,
test_tokenizeRegexFn,