From ba385ba7b748d21f329a6ba9509532d86270de7c Mon Sep 17 00:00:00 2001 From: EuAndreh Date: Fri, 1 Aug 2025 15:01:58 -0300 Subject: src/paca.mjs: Improve implementation of interpretMetacharacters --- src/paca.mjs | 61 +++++++++++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 48 insertions(+), 13 deletions(-) (limited to 'src') diff --git a/src/paca.mjs b/src/paca.mjs index 38100ff..4f50db4 100644 --- a/src/paca.mjs +++ b/src/paca.mjs @@ -5,8 +5,9 @@ import { -export class SyntaxError extends Error {} -export class ValueError extends Error {} +export class SyntaxError extends Error {} +export class ValueError extends Error {} +export class InternalError extends Error {} const ConcatStep = { ACCEPTING: "accepting", @@ -795,28 +796,56 @@ const allDirects = (stateID, nodes) => ).flat(), )].toSorted(); -const interpretMetacharacter = ({ op, to, matches, ranges }, char, index, nodes) => { +const VALID_OPS = new Set([ + "caret", "dollar", true, "includes", "excludes", +]); + +const isValidOp = op => + VALID_OPS.has(op); + +const interpretMetacharacter = ( + { op, to, matches, ranges }, + char, + index, + nodes, +) => { if (!op) { - return null; + return []; + } + + if (!isValidOp(op)) { + throw new InternalError("Unsupported op: " + op); + } + + if (op === "caret") { + return index === 0 ? searchNFAStep(nodes)( + allDirects(to, nodes), + char, + index, + ) : []; + } + + if (op === "dollar") { + return []; } if (op === true) { - return to; + return [to]; } if (matches.has(char)) { - return op === "includes" && to; + return op === "includes" ? [to] : []; } if (inRange(ranges, char)) { - return op === "includes" && to; + return op === "includes" ? [to] : []; } - return op === "excludes" && to; + return op === "excludes" ? [to] : []; }; const performTransition = (nodes, char, index) => id => - nodes[id].transitions[char] || + (nodes[id].transitions[char] && [nodes[id].transitions[char]]) || interpretMetacharacter(nodes[id].meta || {}, char, index, nodes); const searchNFAStep = nodes => (directStates, char, index) => @@ -824,16 +853,20 @@ const searchNFAStep = nodes => (directStates, char, index) => directStates .map(performTransition(nodes, char, index)) .flat() - .filter(x => !!x) .map(id => allDirects(id, nodes)) .flat() )].toSorted(); +const anchorDollar = (nfa, ids) => + nfa.nodes[last(ids)]?.meta?.op === "dollar" + ? allDirects(nfa.nodes[last(ids)].meta.to, nfa.nodes) + : ids; + const searchNFAFn = (nfa, string) => - explode(string).reduce( + anchorDollar(nfa, explode(string).reduce( searchNFAStep(nfa.nodes), allDirects(nfa.start, nfa.nodes), - ); + )); const searchNFA = (nfa, string) => searchNFAFn(nfa, string).includes(nfa.end); @@ -852,7 +885,9 @@ const collectTransitions = (nfaNodes, states) => { id => Object.entries(nfaNodes[id].transitions), ).filter(t => t.length !== 0); if (!transitions.every(t => t.length === 1)) { - throw new Error("unexpected transitions with more than 1 key"); + throw new InternalError( + "unexpected transitions with more than 1 key", + ); } const groupedTransitions = Object.groupBy( transitions.map(t => t[0]), -- cgit v1.2.3