summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/paca.mjs49
-rw-r--r--tests/paca.mjs194
2 files changed, 242 insertions, 1 deletions
diff --git a/src/paca.mjs b/src/paca.mjs
index ae14538..42c2f88 100644
--- a/src/paca.mjs
+++ b/src/paca.mjs
@@ -15,7 +15,7 @@ const ConcatStep = {
CLASS: "class",
};
-const nonConcatOperators = new Set(["*", "+", "?", "|", ")"]);
+const nonConcatOperators = new Set(["*", "+", "?", "|", ")", "$"]);
const shouldConcat = (char, next) =>
next !== undefined &&
@@ -272,6 +272,44 @@ const TRANSITION_FNS = {
}),
};
+const ANCHOR_FNS = {
+ "^": ({ out, state, context }, _char, index, _next) =>
+ index !== 0
+ ? reduced({
+ out,
+ state,
+ context,
+ error: new SyntaxError(
+ "^ not at the start of the expression",
+ ),
+ })
+ : {
+ out: out.concat({ operator: "caret" }),
+ state,
+ context,
+ },
+ "$": ({ out, state, context }, _char, _index, next) =>
+ next !== undefined
+ ? reduced({
+ out,
+ state,
+ context,
+ error: new SyntaxError(
+ "$ not at the end of the expression",
+ ),
+ })
+ : {
+ out: out.concat({ operator: "dollar" }),
+ state,
+ context,
+ },
+};
+
+const anchors = new Set(Object.keys(ANCHOR_FNS));
+
+const isAnchor = char =>
+ anchors.has(char);
+
const stateTransitionOperators = new Set(Object.keys(TRANSITION_FNS));
const isOperator = char =>
@@ -298,6 +336,15 @@ const tokenizeRegexStep = chars => ({ out, state, context }, char, index) => {
);
}
+ if (isAnchor(char)) {
+ return ANCHOR_FNS[char](
+ { out, state, context },
+ char,
+ index,
+ next,
+ );
+ }
+
const op = isOperator(char) ? { operator: char } : char;
return {
out: out.concat(
diff --git a/tests/paca.mjs b/tests/paca.mjs
index 14a6cf2..134fb77 100644
--- a/tests/paca.mjs
+++ b/tests/paca.mjs
@@ -10,6 +10,8 @@ import {
rangeStateStep,
classStateStep,
TRANSITION_FNS,
+ ANCHOR_FNS,
+ isAnchor,
isOperator,
tokenizeRegexStep,
tokenizeRegexFn,
@@ -552,6 +554,70 @@ const test_TRANSITION_FNS = t => {
});
};
+const test_ANCHOR_FNS = t => {
+ t.start("ANCHOR_FNS");
+
+ t.testing(`"^" error when not the first char`, () => {
+ const { value: { error }} = ANCHOR_FNS["^"]({}, null, 1, null);
+ const message = "^ not at the start of the expression"
+ t.assertEq(error.message, message);
+ t.assertEq(error instanceof SyntaxError, true);
+ });
+
+ t.testing(`"$" error when not the last char`, () => {
+ const { value: { error }} = ANCHOR_FNS["$"](
+ {},
+ null,
+ null,
+ "a",
+ );
+ const message = "$ not at the end of the expression";
+ t.assertEq(error.message, message);
+ t.assertEq(error instanceof SyntaxError, true);
+ });
+
+ t.testing("caret operator gets added to output", () => {
+ const given = ANCHOR_FNS["^"]({ out: [ 1 ] }, null, 0, null);
+ const expected = {
+ out: [ 1, { operator: "caret" } ],
+ state: undefined,
+ context: undefined,
+ };
+ t.assertEq(given, expected);
+ });
+
+ t.testing("dollar operator gets added to output", () => {
+ const given = ANCHOR_FNS["$"](
+ { out: [ 2 ] },
+ null,
+ null,
+ undefined,
+ );
+ const expected = {
+ out: [ 2, { operator: "dollar" } ],
+ state: undefined,
+ context: undefined,
+ };
+ t.assertEq(given, expected);
+ });
+};
+
+const test_isAnchor = t => {
+ t.start("isAnchor()");
+
+ t.testing("anchors are true", () => {
+ t.assertEq(isAnchor("^"), true);
+ t.assertEq(isAnchor("$"), true);
+ });
+
+ t.testing("false for everything else", () => {
+ t.assertEq(isAnchor("*"), false);
+ t.assertEq(isAnchor("\\"), false);
+ t.assertEq(isAnchor("a"), false);
+ t.assertEq(isAnchor("_"), false);
+ });
+};
+
const test_isOperator = t => {
t.start("isOperator()");
@@ -578,6 +644,8 @@ const test_tokenizeRegexStep = t => {
const oparen = { operator: "(" };
const cparen = { operator: ")" };
const star = { operator: "*" };
+ const caret = { operator: "caret" };
+ const dollar = { operator: "dollar" };
t.testing("when escaping we get whatever the char is", () => {
@@ -722,6 +790,130 @@ const test_tokenizeRegexStep = t => {
t.assertEq(given, steps);
});
+ t.testing("anchors get detected as such", () => {
+ const regex = "^[behilos]*$";
+ const stepFn = tokenizeRegexStep(regex);
+ const steps = [{
+ out: [],
+ state: ConcatStep.ACCEPTING,
+ context: null,
+ }, {
+ out: [caret],
+ state: ConcatStep.ACCEPTING,
+ context: null,
+ }, {
+ out: [caret],
+ state: ConcatStep.CLASS,
+ context: {
+ range: {
+ from: null,
+ where: "from",
+ },
+ set: [],
+ },
+ }, {
+ out: [caret],
+ state: ConcatStep.CLASS,
+ context: {
+ range: {
+ from: null,
+ where: "from",
+ },
+ set: ["b"],
+ },
+ }, {
+ out: [caret],
+ state: ConcatStep.CLASS,
+ context: {
+ range: {
+ from: null,
+ where: "from",
+ },
+ set: ["b", "e"],
+ },
+ }, {
+ out: [caret],
+ state: ConcatStep.CLASS,
+ context: {
+ range: {
+ from: null,
+ where: "from",
+ },
+ set: ["b", "e", "h"],
+ },
+ }, {
+ out: [caret],
+ state: ConcatStep.CLASS,
+ context: {
+ range: {
+ from: null,
+ where: "from",
+ },
+ set: ["b", "e", "h", "i"],
+ },
+ }, {
+ out: [caret],
+ state: ConcatStep.CLASS,
+ context: {
+ range: {
+ from: null,
+ where: "from",
+ },
+ set: ["b", "e", "h", "i", "l"],
+ },
+ }, {
+ out: [caret],
+ state: ConcatStep.CLASS,
+ context: {
+ range: {
+ from: null,
+ where: "from",
+ },
+ set: ["b", "e", "h", "i", "l", "o"],
+ },
+ }, {
+ out: [caret],
+ state: ConcatStep.CLASS,
+ context: {
+ range: {
+ from: null,
+ where: "from",
+ },
+ set: ["b", "e", "h", "i", "l", "o", "s"],
+ },
+ }, {
+ out: [caret, {
+ operator: "class",
+ set: [ "b", "e", "h", "i", "l", "o", "s" ],
+ }],
+ state: "accepting",
+ context: null,
+ }, {
+ out: [caret, {
+ operator: "class",
+ set: [ "b", "e", "h", "i", "l", "o", "s" ],
+ }, star],
+ state: "accepting",
+ context: null,
+ }, {
+ out: [caret, {
+ operator: "class",
+ set: [ "b", "e", "h", "i", "l", "o", "s" ],
+ }, star, dollar],
+ state: "accepting",
+ context: null,
+ }];
+ const given = reductions(
+ steps,
+ (acc, el, i) => {
+ const ret = stepFn(acc, regex[i], i);
+ t.assertEq(ret, el);
+ return ret;
+ },
+ );
+ t.assertEq(given, steps);
+ });
+
t.testing("multichar range operator {m,n} is parsed right", () => {
const table = [{
regex: "a{1,2}",
@@ -2836,6 +3028,8 @@ runTests([
test_rangeStateStep,
test_classStateStep,
test_TRANSITION_FNS,
+ test_ANCHOR_FNS,
+ test_isAnchor,
test_isOperator,
test_tokenizeRegexStep,
test_tokenizeRegexFn,