diff options
| author | EuAndreh <eu@euandre.org> | 2025-07-16 10:32:04 -0300 |
|---|---|---|
| committer | EuAndreh <eu@euandre.org> | 2025-07-16 10:32:04 -0300 |
| commit | 69e36152600599126d95ae1085b0777a830bc97b (patch) | |
| tree | 43527bb37c38cde9921f579153e85edf915ce97f /src/paca.mjs | |
| parent | Differentiate an "operator" from a "meta" character (diff) | |
| download | paca-69e36152600599126d95ae1085b0777a830bc97b.tar.gz paca-69e36152600599126d95ae1085b0777a830bc97b.tar.xz | |
Build NFA nodes for "." and "class" metacharacters
* src/paca.mjs
(characterClass): Add function that builds the NFA node for
`{ meta: "class" }`. This node leaves the "direct" and
"transitions" keys empty, and add its data under the "meta" key.
One option was to use an inline function that could simply be
called directly during the search to check for a match, but instead
I chose a data representation instead, in order to keep the NFA
literal as obvious and self-representing as possible. Later, the
searching part will have to properly interpret the data of "meta"
properly, instead of blindly executing an opaque function. This
does separate the compilation from execution logic, but keep the NFA
clean of opaque closures.
(wildcard): Add function that buildl the NFA node for `{ meta: "." }`.
Similar to `characterClass()`, the new "meta" key contains pure data
that represents the execution of the metacharacter during search.
(baseNFA, literal): Rename the existing `baseNFA()` to `literal()`.
Then add a new `baseNFA()` function that decides between a character
literal and a metacharacter.
(buildNFAStep): Instead of checking the type of `token`, we check if
`token` has the "operator" attribute, since we now have
metacharacters that also aren't strings.
(classStateStep): Add missing "caret" key to the final metacharacter
output. It was already being detected, just not included in the
result.
(escapingStateStep): Stick to 80 columns.
* tests/paca.mjs
(test_characterClass, test_wildcard, test_baseNFA): Add obligatory
test cases.
(test_buildNFAStep): Include test case for metacharacter.
Diffstat (limited to 'src/paca.mjs')
| -rw-r--r-- | src/paca.mjs | 82 |
1 files changed, 76 insertions, 6 deletions
diff --git a/src/paca.mjs b/src/paca.mjs index ccff1ca..0625d5c 100644 --- a/src/paca.mjs +++ b/src/paca.mjs @@ -46,7 +46,9 @@ const escapingStateStep = ({ out, state, context }, char, _index, next) => : { out: out.concat( char, - shouldConcat(null, next) ? [{ operator: "concat" }] : [], + shouldConcat(null, next) + ? [{ operator: "concat" }] + : [], ), state: ConcatStep.ACCEPTING, context, @@ -167,8 +169,9 @@ const classStateStep = ({ out, state, context }, char, _index, _next) => { return { out: out.concat({ - meta: "class", - set: context.set, + meta: "class", + set: context.set, + caret: !!context.caret, }), state: ConcatStep.ACCEPTING, context: null, @@ -495,7 +498,7 @@ const emptyNFA = () => { }; }; -const baseNFA = (edge, id) => { +const literal = (edge, id) => { const startID = id + 0; const endID = id + 1; const nextID = id + 2; @@ -620,6 +623,63 @@ const zeroOrOne = nfa => { }; }; +const characterClass = ({ set, caret }, id) => { + const start = id + 0; + const end = id + 1; + const nextID = id + 2; + const { string, object } = Object.groupBy(set, x => typeof x); + const matches = new Set(string); + const ranges = Object.fromEntries(object.map( + ({ from, to }) => [ from.charCodeAt(0), to.charCodeAt(0) ], + )); + return { + start, + end, + nextID, + nodes: { + [start]: { + direct: [], + transitions: {}, + meta: { + op: caret ? "excludes" : "includes", + to: end, + matches, + ranges, + }, + }, + [end]: { + direct: [], + transitions: {}, + }, + }, + }; +}; + +const wildcard = (_edge, id) => { + const start = id + 0; + const end = id + 1; + const nextID = id + 2; + return { + start, + end, + nextID, + nodes: { + [start]: { + direct: [], + transitions: {}, + meta: { + op: true, + to: end, + }, + }, + [end]: { + direct: [], + transitions: {}, + }, + }, + }; +}; + const OPERATORS_FNS = ({ zeroOrMoreFn = zeroOrMore, oneOrMoreFn = oneOrMore, @@ -639,11 +699,21 @@ const OPERATORS_FNS = ({ last(stack), )), }); - const OPERATORS = OPERATORS_FNS(); +const METACHARACTERS_FNS = { + "class": characterClass, + ".": wildcard, +}; + +const baseNFA = (token, id) => ( + !token.meta + ? literal + : METACHARACTERS_FNS[token.meta] +)(token, id); + const buildNFAStep = (stack, token) => - typeof token === "string" + !token.operator ? stack.concat(baseNFA(token, last(stack)?.nextID || 1)) : OPERATORS[token.operator](stack); |
