diff options
| author | EuAndreh <eu@euandre.org> | 2025-07-11 15:29:22 -0300 |
|---|---|---|
| committer | EuAndreh <eu@euandre.org> | 2025-07-11 15:29:22 -0300 |
| commit | ebcaf686e938e31e160bc0610df2a0c52c472ff9 (patch) | |
| tree | 55167c21269a8ce7c8e6cda79b0959cc3f03febf | |
| parent | src/paca.mjs: Move error detection from tokenizeRegexStep => tokenizeRegex (diff) | |
| download | paca-ebcaf686e938e31e160bc0610df2a0c52c472ff9.tar.gz paca-ebcaf686e938e31e160bc0610df2a0c52c472ff9.tar.xz | |
src/paca.mjs (tokenizeRegexStep): Include `context` key in reduced state
| -rw-r--r-- | src/paca.mjs | 11 | ||||
| -rw-r--r-- | tests/paca.mjs | 262 |
2 files changed, 152 insertions, 121 deletions
diff --git a/src/paca.mjs b/src/paca.mjs index 380200a..e68cd77 100644 --- a/src/paca.mjs +++ b/src/paca.mjs @@ -17,12 +17,13 @@ const shouldConcat = (char, next) => next !== undefined && char !== "(" && char !== "|" && + char !== "{" && !nonConcatOperators.has(next); const isOperator = char => nonConcatOperators.has(char) || char == "("; -const tokenizeRegexStep = chars => ({ out, state }, char, index) => { +const tokenizeRegexStep = chars => ({ out, state, context }, char, index) => { const next = chars[index + 1]; const maybeConcat = shouldConcat(char, next) ? [{operator: "concat"}] @@ -32,6 +33,7 @@ const tokenizeRegexStep = chars => ({ out, state }, char, index) => { return { out: out.concat(char, maybeConcat), state: ConcatStep.ACCEPTING, + context, }; } @@ -39,6 +41,7 @@ const tokenizeRegexStep = chars => ({ out, state }, char, index) => { return { out, state: ConcatStep.ESCAPING, + context, }; } @@ -46,13 +49,15 @@ const tokenizeRegexStep = chars => ({ out, state }, char, index) => { return { out: out.concat(op, maybeConcat), state, + context, }; }; const tokenizeRegexFn = chars => chars.reduce(tokenizeRegexStep(chars), { - out: [], - state: ConcatStep.ACCEPTING, + out: [], + state: ConcatStep.ACCEPTING, + context: null, }); const tokenizeRegex = chars => { diff --git a/tests/paca.mjs b/tests/paca.mjs index aa7333b..82102f1 100644 --- a/tests/paca.mjs +++ b/tests/paca.mjs @@ -97,75 +97,91 @@ const test_tokenizeRegexStep = t => { const stepFn = tokenizeRegexStep(regex); const steps = [{ in: { - out: [], - state: ConcatStep.ESCAPING, + out: [], + state: ConcatStep.ESCAPING, + context: null, }, out: { - out: ["a", { operator: "concat" }], - state: ConcatStep.ACCEPTING, + out: ["a", { operator: "concat" }], + state: ConcatStep.ACCEPTING, + context: null, }, }, { in: { - out: [], - state: ConcatStep.ESCAPING, + out: [], + state: ConcatStep.ESCAPING, + context: null, }, out: { - out: ["b"], - state: ConcatStep.ACCEPTING, + out: ["b"], + state: ConcatStep.ACCEPTING, + context: null, }, }, { in: { - out: [], - state: ConcatStep.ESCAPING, + out: [], + state: ConcatStep.ESCAPING, + context: null, }, out: { - out: ["|"], - state: ConcatStep.ACCEPTING, + out: ["|"], + state: ConcatStep.ACCEPTING, + context: null, }, }, { in: { - out: [], - state: ConcatStep.ESCAPING, + out: [], + state: ConcatStep.ESCAPING, + context: null, }, out: { - out: ["("], - state: ConcatStep.ACCEPTING, + out: ["("], + state: ConcatStep.ACCEPTING, + context: null, }, }, { in: { - out: [], - state: ConcatStep.ESCAPING, + out: [], + state: ConcatStep.ESCAPING, + context: null, }, out: { - out: ["c", { operator: "concat" }], - state: ConcatStep.ACCEPTING, + out: ["c", { operator: "concat" }], + state: ConcatStep.ACCEPTING, + context: null, }, }, { in: { - out: [], - state: ConcatStep.ESCAPING, + out: [], + state: ConcatStep.ESCAPING, + context: null, }, out: { - out: ["d"], - state: ConcatStep.ACCEPTING, + out: ["d"], + state: ConcatStep.ACCEPTING, + context: null, }, }, { in: { - out: [], - state: ConcatStep.ESCAPING, + out: [], + state: ConcatStep.ESCAPING, + context: null, }, out: { - out: [")"], - state: ConcatStep.ACCEPTING, + out: [")"], + state: ConcatStep.ACCEPTING, + context: null, }, }, { in: { - out: [], - state: ConcatStep.ESCAPING, + out: [], + state: ConcatStep.ESCAPING, + context: null, }, out: { - out: ["*"], - state: ConcatStep.ACCEPTING, + out: ["*"], + state: ConcatStep.ACCEPTING, + context: null, }, }]; for (const i in regex) { @@ -179,154 +195,160 @@ const test_tokenizeRegexStep = t => { }); t.testing("escape makes it enter escaping mode", () => { - const stepFn = tokenizeRegexStep("\\a\\*"); + const regex = "\\a\\*"; + const stepFn = tokenizeRegexStep(regex); const steps = [{ - char: "\\", - index: 0, in: { - out: [], - state: ConcatStep.ACCEPTING, + out: [], + state: ConcatStep.ACCEPTING, + context: null, }, out: { - out: [], - state: ConcatStep.ESCAPING, + out: [], + state: ConcatStep.ESCAPING, + context: null, }, }, { - char: "a", - index: 1, in: { - out: [], - state: ConcatStep.ESCAPING, + out: [], + state: ConcatStep.ESCAPING, + context: null, }, out: { - out: ["a", { operator: "concat" }], - state: ConcatStep.ACCEPTING, + out: ["a", { operator: "concat" }], + state: ConcatStep.ACCEPTING, + context: null, }, }, { - char: "\\", - index: 2, in: { - out: [], - state: ConcatStep.ACCEPTING, + out: [], + state: ConcatStep.ACCEPTING, + context: null, }, out: { - out: [], - state: ConcatStep.ESCAPING, + out: [], + state: ConcatStep.ESCAPING, + context: null, }, }, { - char: "*", - index: 3, in: { - out: [], - state: ConcatStep.ESCAPING, + out: [], + state: ConcatStep.ESCAPING, + context: null, }, out: { - out: ["*"], - state: ConcatStep.ACCEPTING, + out: ["*"], + state: ConcatStep.ACCEPTING, + context: null, }, }]; - for (const step of steps) { + for (const i in regex) { + const step = steps[i]; + const char = regex[i]; t.assertEq( - stepFn(step.in, step.char, step.index), + stepFn(step.in, char, Number(i)), step.out, ); } }); t.testing("operators get detected as such", () => { - const stepFn = tokenizeRegexStep("ab|(cd)*"); + const regex = "ab|(cd)*"; + const stepFn = tokenizeRegexStep(regex); const steps = [{ - char: "a", - index: 0, in: { - out: [], - state: ConcatStep.ACCEPTING, + out: [], + state: ConcatStep.ACCEPTING, + context: null, }, out: { - out: ["a", { operator: "concat" }], - state: ConcatStep.ACCEPTING, + out: ["a", { operator: "concat" }], + state: ConcatStep.ACCEPTING, + context: null, }, }, { - char: "b", - index: 1, in: { - out: [], - state: ConcatStep.ACCEPTING, + out: [], + state: ConcatStep.ACCEPTING, + context: null, }, out: { - out: ["b"], - state: ConcatStep.ACCEPTING, + out: ["b"], + state: ConcatStep.ACCEPTING, + context: null, }, }, { - char: "|", - index: 2, in: { - out: [], - state: ConcatStep.ACCEPTING, + out: [], + state: ConcatStep.ACCEPTING, + context: null, }, out: { - out: [{ operator: "|" }], - state: ConcatStep.ACCEPTING, + out: [{ operator: "|" }], + state: ConcatStep.ACCEPTING, + context: null, }, }, { - char: "(", - index: 3, in: { - out: [], - state: ConcatStep.ACCEPTING, + out: [], + state: ConcatStep.ACCEPTING, + context: null, }, out: { - out: [{ operator: "(" }], - state: ConcatStep.ACCEPTING, + out: [{ operator: "(" }], + state: ConcatStep.ACCEPTING, + context: null, }, }, { - char: "c", - index: 4, in: { - out: [], - state: ConcatStep.ACCEPTING, + out: [], + state: ConcatStep.ACCEPTING, + context: null, }, out: { - out: ["c", { operator: "concat" }], - state: ConcatStep.ACCEPTING, + out: ["c", { operator: "concat" }], + state: ConcatStep.ACCEPTING, + context: null, }, }, { - char: "d", - index: 5, in: { - out: [], - state: ConcatStep.ACCEPTING, + out: [], + state: ConcatStep.ACCEPTING, + context: null, }, out: { - out: ["d"], - state: ConcatStep.ACCEPTING, + out: ["d"], + state: ConcatStep.ACCEPTING, + context: null, }, }, { - char: ")", - index: 6, in: { - out: [], - state: ConcatStep.ACCEPTING, + out: [], + state: ConcatStep.ACCEPTING, + context: null, }, out: { - out: [{ operator: ")" }], - state: ConcatStep.ACCEPTING, + out: [{ operator: ")" }], + state: ConcatStep.ACCEPTING, + context: null, }, }, { - char: "*", - index: 7, in: { - out: [], - state: ConcatStep.ACCEPTING, + out: [], + state: ConcatStep.ACCEPTING, + context: null, }, out: { - out: [{ operator: "*" }], - state: ConcatStep.ACCEPTING, + out: [{ operator: "*" }], + state: ConcatStep.ACCEPTING, + context: null, }, }]; - for (const step of steps) { + for (const i in regex) { + const step = steps[i]; + const char = regex[i]; t.assertEq( - stepFn(step.in, step.char, step.index), + stepFn(step.in, char, Number(i)), step.out, ); } @@ -342,26 +364,30 @@ const test_tokenizeRegexFn = t => { const table = [{ in: "", expected: { - out: [], - state: ConcatStep.ACCEPTING, + out: [], + state: ConcatStep.ACCEPTING, + context: null, }, }, { in: "a", expected: { - out: ["a"], - state: ConcatStep.ACCEPTING, + out: ["a"], + state: ConcatStep.ACCEPTING, + context: null, }, }, { in: "a*", expected: { - out: ["a", star], - state: ConcatStep.ACCEPTING, + out: ["a", star], + state: ConcatStep.ACCEPTING, + context: null, }, }, { in: "a*b", expected: { - out: ["a", star, concat, "b"], - state: ConcatStep.ACCEPTING, + out: ["a", star, concat, "b"], + state: ConcatStep.ACCEPTING, + context: null, }, }]; for (const test of table) { |
