summaryrefslogtreecommitdiff
path: root/src/paca.mjs
diff options
context:
space:
mode:
authorEuAndreh <eu@euandre.org>2025-07-15 13:55:36 -0300
committerEuAndreh <eu@euandre.org>2025-07-15 13:55:36 -0300
commitc4f360baf07107468667832be66e5e9c3f92d4b6 (patch)
tree321c4f36f43543491c5bd4a5f9ff33ec8086da20 /src/paca.mjs
parenttests/paca.mjs (test_rangeStateStep): Finish test cases for rangeStateStep (diff)
downloadpaca-c4f360baf07107468667832be66e5e9c3f92d4b6.tar.gz
paca-c4f360baf07107468667832be66e5e9c3f92d4b6.tar.xz
Support tokenizing character class expressions [a-z]
* src/paca.mjs (classStateStep): New function equivalent to `rangeStateStep()` for character class expressions. For now it knowns how to handle escaping ([abc\-_]), simple ranges ([a-z]), negation ([^abc]) and the hyphen literal as the first char ([-a-z_]). * tests.paca.mjs (test_classStateStep): New test entry has a test case each scenario described above.
Diffstat (limited to 'src/paca.mjs')
-rw-r--r--src/paca.mjs128
1 files changed, 125 insertions, 3 deletions
diff --git a/src/paca.mjs b/src/paca.mjs
index 0b64a87..b92bcdc 100644
--- a/src/paca.mjs
+++ b/src/paca.mjs
@@ -1,5 +1,6 @@
import {
- butlast, explode, isNumeric, last, mapValues, max, reduce, reduced,
+ butlast, dissoc, explode, isNumeric, last, mapValues, max, reduce,
+ reduced,
} from "sjs";
@@ -11,6 +12,7 @@ const ConcatStep = {
ACCEPTING: "accepting",
ESCAPING: "escaping",
RANGE: "range",
+ CLASS: "class",
};
const numFromDigits = digits =>
@@ -107,9 +109,130 @@ const rangeStateStep = ({ out, state, context }, char, _index, _next) => {
};
};
+const classStateStep = ({ out, state, context }, char, _index, _next) => {
+ if (context.escaping) {
+ return {
+ out,
+ state,
+ context: dissoc({
+ ...context,
+ set: context.set.concat(char),
+ }, "escaping"),
+ };
+ }
+
+ if (char === "]") {
+ if (context.range.where === "to") {
+ return reduced({
+ out,
+ state,
+ context,
+ error: new SyntaxError(
+ "unfinished character class range",
+ ),
+ });
+ }
+
+ if (context.set.length === 0) {
+ return reduced({
+ out,
+ state,
+ context,
+ error: new ValueError("empty character class"),
+ });
+ }
+
+ return {
+ out: out.concat({
+ operator: "class",
+ set: context.set,
+ }),
+ state: ConcatStep.ACCEPTING,
+ context: null,
+ };
+ }
+
+ if (char === "\\") {
+ return {
+ out,
+ state,
+ context: {
+ ...context,
+ escaping: true,
+ },
+ };
+ }
+
+ if (context.range.where === "to") {
+ const from = context.range.from;
+ const to = char;
+
+ if (from.charCodeAt(0) > to.charCodeAt(0)) {
+ return reduced({
+ out,
+ state,
+ context,
+ error: new ValueError(
+ "bad class range values: " +
+ `[${from}-${to}]`,
+ ),
+ });
+ }
+
+ return {
+ out,
+ state,
+ context: {
+ ...context,
+ set: context.set.concat({ from, to }),
+ range: {
+ from: null,
+ where: "from",
+ },
+ },
+ };
+ }
+
+ if (char === "-" && context.set.length !== 0) {
+ return {
+ out,
+ state,
+ context: {
+ ...context,
+ set: butlast(context.set),
+ range: {
+ from: last(context.set),
+ where: "to",
+ },
+ },
+ };
+ }
+
+ if (char === "^" && context.set.length === 0) {
+ return {
+ out,
+ state,
+ context: {
+ ...context,
+ caret: true,
+ },
+ };
+ }
+
+ return {
+ out,
+ state,
+ context: {
+ ...context,
+ set: context.set.concat(char),
+ },
+ };
+};
+
const STATE_FNS = {
[ConcatStep.ESCAPING]: escapingStateStep,
[ConcatStep.RANGE ]: rangeStateStep,
+ [ConcatStep.CLASS ]: classStateStep,
};
const TRANSITION_FNS = {
@@ -133,8 +256,7 @@ const TRANSITION_FNS = {
context: {
set: [],
range: {
- from: [],
- to: [],
+ from: null,
where: "from",
},
},