import {
	butlast, dissoc, explode, isNumeric, last, mapValues, max, reduce,
	reduced,
} from "sjs";


export class   SyntaxError extends Error {}
export class    ValueError extends Error {}
export class InternalError extends Error {}

const ConcatStep = {
	ACCEPTING: "accepting",
	ESCAPING:  "escaping",
	RANGE:     "range",
	CLASS:     "class",
};

const nonConcatOperators = new Set(["*", "+", "?", "|", ")"]);

const shouldConcat = (char, next) =>
	next !== undefined &&
	char !== "(" &&
	char !== "|" &&
	char !== "{" &&
	!nonConcatOperators.has(next);

const operatorChars = new Set([...nonConcatOperators, "(", "$"]);
const isOperator = char =>
	operatorChars.has(char);

const numFromDigits = digits =>
	digits.length === 0
		? -1
		: Number(digits.join(""));

const escapingStateStep = ({ out, state, context }, char, _index, next) => 
	!(isOperator(char) || char === "\\")
		? reduced({
			out,
			state,
			context,
			error: new SyntaxError(
				"unknown escape sequence: \\" + char,
			),
		})
		: {
			out: out.concat(
				char,
				shouldConcat(null, next)
					? [{ operator: "concat" }]
					: [],
			),
			state: ConcatStep.ACCEPTING,
			context,
		};

const rangeStateStep = ({ out, state, context }, char, _index, _next) => {
	if (char === "}") {
		if (context.where !== "to") {
			return reduced({
				out,
				state,
				context,
				error: new SyntaxError(
					"missing comma in range operator",
				),
			});
		}

		const from = numFromDigits(context.from);
		const to   = numFromDigits(context.to);
		if (from > to && to != -1) {
			return reduced({
				out,
				state,
				context,
				error: new ValueError(
					`bad range values: {${from},${to}}`,
				),
			});
		}

		return {
			out: out.concat({
				operator: "range",
				from,
				to,
			}),
			state: ConcatStep.ACCEPTING,
			context: null,
		};
	}

	if (char === ",") {
		if (context.where === "to") {
			return reduced({
				out,
				state,
				context,
				error: new SyntaxError(
					"extraneous comma in range expression",
				),
			});
		} else {
			return {
				out,
				state,
				context: {
					...context,
					where: "to",
				},
			};
		}
	}

	if (!isNumeric(char)) {
		return reduced({
			out,
			state,
			context,
			error: new SyntaxError(
				"bad char in range expression: " + char,
			),
		});
	}

	return {
		out,
		state,
		context: {
			...context,
			[context.where]: context[context.where].concat(char),
		},
	};
};

const classStateStep = ({ out, state, context }, char, _index, _next) => {
	if (context.escaping) {
		return {
			out,
			state,
			context: dissoc({
				...context,
				set: context.set.concat(char),
			}, "escaping"),
		};
	}

	if (char === "]") {
		if (context.range.where === "to") {
			return reduced({
				out,
				state,
				context,
				error: new SyntaxError(
					"unfinished character class range",
				),
			});
		}

		if (context.set.length === 0) {
			return reduced({
				out,
				state,
				context,
				error: new ValueError("empty character class"),
			});
		}

		return {
			out: out.concat({
				meta:  "class",
				set:   context.set,
				caret: !!context.caret,
			}),
			state:   ConcatStep.ACCEPTING,
			context: null,
		};
	}

	if (char === "\\") {
		return {
			out,
			state,
			context: {
				...context,
				escaping: true,
			},
		};
	}

	if (context.range.where === "to") {
		const from = context.range.from;
		const to   = char;

		if (from.charCodeAt(0) > to.charCodeAt(0)) {
			return reduced({
				out,
				state,
				context,
				error: new ValueError(
					"bad class range values: " +
						`[${from}-${to}]`,
				),
			});
		}

		return {
			out,
			state,
			context: {
				...context,
				set:   context.set.concat({ from, to }),
				range: {
					from:  null,
					where: "from",
				},
			},
		};
	}

	if (char === "-" && context.set.length !== 0) {
		return {
			out,
			state,
			context: {
				...context,
				set: butlast(context.set),
				range: {
					from:  last(context.set),
					where: "to",
				},
			},
		};
	}

	if (char === "^" && context.set.length === 0) {
		return {
			out,
			state,
			context: {
				...context,
				caret: true,
			},
		};
	}

	return {
		out,
		state,
		context: {
			...context,
			set: context.set.concat(char),
		},
	};
};

const STATE_FNS = {
	[ConcatStep.ESCAPING]: escapingStateStep,
	[ConcatStep.RANGE   ]:    rangeStateStep,
	[ConcatStep.CLASS   ]:    classStateStep,
};

const TRANSITION_FNS = {
	"\\": ({ out, _state, context }, _char, _index, _next) => ({
		out,
		state: ConcatStep.ESCAPING,
		context,
	}),
	"{": ({ out, _state, _context }, _char, _index, _next) => ({
		out,
		state: ConcatStep.RANGE,
		context: {
			from:  [],
			to:    [],
			where: "from",
		},
	}),
	"[": ({ out, _state, _context }, _char, _index, _next) => ({
		out,
		state: ConcatStep.CLASS,
		context: {
			set:   [],
			range: {
				from:  null,
				where: "from",
			},
		},
	}),
};

const ANCHOR_FNS = {
	"^": ({ out, state, context }, _char, index, _next) =>
		index !== 0
			? reduced({
				out,
				state,
				context,
				error: new SyntaxError(
					"^ not at the start of the expression",
				),
			})
			: {
				out: out.concat([
					{ meta: "^" },
					{ operator: "concat" },
				]),
				state,
				context,
			},
	"$": ({ out, state, context }, _char, _index, next) =>
		next !== undefined
			? reduced({
				out,
				state,
				context,
				error: new SyntaxError(
					"$ not at the end of the expression",
				),
			})
			: {
				out: out.concat([{ meta: "$" }]),
				state,
				context,
			},
};

const anchors = new Set(Object.keys(ANCHOR_FNS));
const isAnchor = char =>
	anchors.has(char);

const transitionChars = new Set(Object.keys(TRANSITION_FNS));
const isTransition = char =>
	transitionChars.has(char);

const metaChars = new Set(["."]);
const isMeta = char =>
	metaChars.has(char);

const opFor = char =>
	isOperator(char) ? { operator: char }
		: isMeta(char) ? { meta: char }
			: char;

const tokenizeRegexStep = chars => ({ out, state, context }, char, index) => {
	const next = chars[index + 1];

	if (state !== ConcatStep.ACCEPTING) {
		return STATE_FNS[state](
			{ out, state, context },
			char,
			index,
			next,
		);
	}

	if (isTransition(char)) {
		return TRANSITION_FNS[char](
			{ out, state, context },
			char,
			index,
			next,
		);
	}

	if (isAnchor(char)) {
		return ANCHOR_FNS[char](
			{ out, state, context },
			char,
			index,
			next,
		);
	}

	return {
		out: out.concat(
			opFor(char),
			shouldConcat(char, next)
				? [{ operator: "concat" }]
				: [],
		),
		state,
		context,
	};
};

const tokenizeRegexFn = chars =>
	reduce(chars, tokenizeRegexStep(chars), {
		out:     [],
		state:   ConcatStep.ACCEPTING,
		context: null,
	});

const tokenizeRegex = chars => {
	const tokens = tokenizeRegexFn(chars);
	if (!!tokens.error) {
		throw tokens.error;
	}

	if (tokens.state !== ConcatStep.ACCEPTING) {
		throw new SyntaxError("bad ending state: " + tokens.state);
	}

	return tokens.out;
};

const PRECEDENCE = {
	"(":      4,
	"*":      3,
	"+":      3,
	"?":      3,
	"range":  3,
	"concat": 2,
	"|":      1,
};

const shouldPush = (stack, token) =>
	stack.length       === 0   ||
	token.operator     === "(" ||
	(stack[0].operator === "(" && token.operator !== ")");

const findLowerPrecedenceItem = (stack, token) =>
	stack.findIndex(el =>
		el.operator === "(" ||
			PRECEDENCE[el.operator] < PRECEDENCE[token.operator],
	);

const toPostfixStep = ({ out, stack }, token, _index, tokens) => {
	if (!token.operator) {
		return {
			out: out.concat(token),
			stack,
		};
	}

	if (shouldPush(stack, token)) {
		return {
			out,
			stack: [token].concat(stack),
		};
	}

	if (token.operator === ")") {
		const openParenIndex = stack.findIndex(o => o.operator === "(");
		if (openParenIndex === -1) {
			throw new SyntaxError(
				`close parens without opening: ${tokens}`,
			);
		}

		const ops = stack.slice(0, openParenIndex);
		const leftover = stack.slice(openParenIndex + 1);
		return {
			out:   out.concat(ops),
			stack: leftover,
		};
	}

	const idx = findLowerPrecedenceItem(stack, token);
	if (idx < 0) {
		return {
			out:   out.concat(stack),
			stack: [token],
		};
	} else {
		const ops = stack.slice(0, idx);
		const leftover = stack.slice(idx);
		return {
			out:   out.concat(ops),
			stack: [token].concat(leftover),
		};
	}
};

const toPostfix = tokens => {
	const { out, stack } = tokens.reduce(toPostfixStep, {
		out:   [],
		stack: [],
	});
	return out.concat(stack);
};

const emptyNFA = () => {
	const start = 1;
	const end   = 2;
	return {
		start,
		end,
		nodes: {
			[start]: {
				direct:      [end],
				transitions: {},
			},
			[end]:   {
				direct:      [],
				transitions: {},
			},
		}
	};
};

const literal = (edge, id) => {
	const start = id + 0;
	const end   = id + 1;
	return {
		start,
		end,
		nodes: {
			[start]: {
				direct:      [],
				transitions: {
					[edge]: end,
				},
			},
			[end]: {
				direct:      [],
				transitions: {},
			},
		},
	};
};

const concat = (lhs, rhs) => ({
	start:  lhs.start,
	end:    rhs.end,
	nodes: {
		...lhs.nodes,
		...rhs.nodes,
		[lhs.end]: {
			...lhs.nodes[lhs.end],
			direct: lhs.nodes[lhs.end].direct.concat(rhs.start),
		}
	},
});

const union = (lhs, rhs) => {
	const start = max(lhs.end, rhs.end) + 1;
	const end   = start + 1;
	return {
		start,
		end,
		nodes: {
			...lhs.nodes,
			...rhs.nodes,
			[lhs.end]: {
				...lhs.nodes[lhs.end],
				direct: lhs.nodes[lhs.end].direct.concat(end),
			},
			[rhs.end]: {
				...rhs.nodes[rhs.end],
				direct: lhs.nodes[lhs.end].direct.concat(end),
			},
			[start]: {
				direct:      [lhs.start, rhs.start],
				transitions: {},
			},
			[end]: {
				direct:      [],
				transitions: {},
			},
		},
	};
};

const zeroOrMore = nfa => {
	const start = nfa.end + 1;
	const end   = nfa.end + 2;
	return {
		start,
		end,
		nodes: {
			...nfa.nodes,
			[nfa.end]: {
				...nfa.nodes[nfa.end],
				direct: nfa.nodes[nfa.end].direct.concat(start),
			},
			[start]: {
				direct:      [nfa.start, end],
				transitions: {},
			},
			[end]: {
				direct:      [],
				transitions: {},
			},
		},
	};
};

const oneOrMore = nfa =>
	concat(nfa, zeroOrMore(nfa));

const zeroOrOne = nfa => {
	const start = nfa.end + 1;
	const end   = nfa.end + 2;
	return {
		start,
		end,
		nodes: {
			...nfa.nodes,
			[nfa.end]: {
				...nfa.nodes[nfa.end],
				direct: nfa.nodes[nfa.end].direct.concat(end),
			},
			[start]: {
				direct:      [nfa.start, end],
				transitions: {},
			},
			[end]: {
				direct:      [],
				transitions: {},
			},
		},
	};
};

const compareRange = (lhs, rhs) =>
	lhs[0] !== rhs[0] ? lhs[0] - rhs[0] : lhs[1] - rhs[1];

const compressRangeStep = (ranges, [from, to]) => {
	const curr = last(ranges);
	if (!curr) {
		return [[from, to]];
	}
	return from <= curr[1]
		? butlast(ranges).concat([[curr[0], max(to, curr[1])]])
		: ranges.concat([[from, to]]);
};

const compressCharacterRanges = ranges =>
	ranges.toSorted(compareRange).reduce(compressRangeStep, []);

const inRange = (ranges, char) => {
	const code = char.charCodeAt(0);
	const froms = Object.keys(ranges).map(Number).filter(
		from => from <= code,
	);
	return froms.some(from => code <= ranges[from]);
};

const characterClass = ({ set, caret }, id) => {
	const start = id + 0;
	const end   = id + 1;
	const { string, object } = Object.groupBy(set, x => typeof x);
	const ranges = Object.fromEntries(
		compressCharacterRanges((object || []).map(
			({ from, to }) => [
				from.charCodeAt(0), to.charCodeAt(0)
			],
		),
	));
	const matches = new Set((string || []).filter(
		char => !inRange(ranges, char),
	));
	return {
		start,
		end,
		nodes: {
			[start]: {
				direct:      [],
				transitions: {},
				meta: {
					op: caret ? "excludes" : "includes",
					to: end,
					matches,
					ranges,
				},
			},
			[end]: {
				direct:      [],
				transitions: {},
			},
		},
	};
};

const wildcard = (_edge, id) => {
	const start = id + 0;
	const end   = id + 1;
	return {
		start,
		end,
		nodes: {
			[start]: {
				direct:      [],
				transitions: {},
				meta: {
					op: true,
					to: end,
				},
			},
			[end]: {
				direct:      [],
				transitions: {},
			},
		},
	};
};

const caret = (_edge, id) => {
	const start = id + 0;
	const end   = id + 1;
	return {
		start,
		end,
		nodes: {
			[start]: {
				direct:      [],
				transitions: {},
				meta: {
					op: "caret",
					to: end,
				},
			},
			[end]: {
				direct:      [],
				transitions: {},
			},
		},
	};
};

const dollar = (_edge, id) => {
	const start = id + 0;
	const end   = id + 1;
	return {
		start,
		end,
		nodes: {
			[start]: {
				direct:      [],
				transitions: {},
				meta: {
					op: "dollar",
					to: end,
				},
			},
			[end]: {
				direct:      [],
				transitions: {},
			},
		},
	};
};

const OPERATORS_FNS = ({
	zeroOrMoreFn = zeroOrMore,
	oneOrMoreFn  = oneOrMore,
	zeroOrOneFn  = zeroOrOne,
	unionFn      = union,
	concatFn     = concat,
} = {}) => ({
	"*": stack => butlast(stack).concat(zeroOrMoreFn(last(stack))),
	"+": stack => butlast(stack).concat( oneOrMoreFn(last(stack))),
	"?": stack => butlast(stack).concat(zeroOrOneFn (last(stack))),
	"|": stack => butlast(butlast(stack)).concat(unionFn(
		last(butlast(stack)),
		last(stack),
	)),
	"concat": stack => butlast(butlast(stack)).concat(concatFn(
		last(butlast(stack)),
		last(stack),
	)),
});
const OPERATORS = OPERATORS_FNS();

const METACHARACTERS_FNS = {
	"class": characterClass,
	".":     wildcard,
	"^":     caret,
	"$":     dollar,
};

const baseNFA = (token, id) => (
	!token.meta
		? literal
		: METACHARACTERS_FNS[token.meta]
)(token, id);

const buildNFAStep = (stack, token) =>
	!token.operator
		? stack.concat(baseNFA(token, (last(stack)?.end || 0) + 1))
		: OPERATORS[token.operator](stack);

const buildNFA = tokens =>
	tokens.length === 0
		? emptyNFA()
		: last(tokens.reduce(buildNFAStep, []));

const allDirects = (stateID, nodes) =>
	nodes[stateID].direct.length === 0
		? [ stateID ]
		: [...new Set(
			nodes[stateID].direct.map(
				id => allDirects(id, nodes),
			).flat(),
		)].toSorted();

const VALID_OPS = new Set([
	"caret", "dollar", true, "includes", "excludes",
]);

const isValidOp = op =>
	VALID_OPS.has(op);

const interpretMetacharacter = (
	{ op, to, matches, ranges },
	char,
	index,
	nodes,
) => {
	if (!op) {
		return [];
	}

	if (!isValidOp(op)) {
		throw new InternalError("Unsupported op: " + op);
	}

	if (op === "caret") {
		return index === 0 ? searchNFAStep(nodes)(
			allDirects(to, nodes),
			char,
			index,
		) : [];
	}

	if (op === "dollar") {
		return [];
	}

	if (op === true) {
		return [to];
	}

	if (matches.has(char)) {
		return op === "includes" ? [to] : [];
	}

	if (inRange(ranges, char)) {
		return op === "includes" ? [to] : [];
	}

	return op === "excludes" ? [to] : [];
};

const performTransition = (nodes, char, index) => id =>
	(nodes[id].transitions[char] && [nodes[id].transitions[char]]) ||
	interpretMetacharacter(nodes[id].meta || {}, char, index, nodes);

const searchNFAStep = nodes => (directStates, char, index) =>
	[...new Set(
		directStates
			.map(performTransition(nodes, char, index))
			.flat()
			.map(id => allDirects(id, nodes))
			.flat()
	)].toSorted();

const anchorDollar = (nfa, ids) =>
	nfa.nodes[last(ids)]?.meta?.op === "dollar"
		? allDirects(nfa.nodes[last(ids)].meta.to, nfa.nodes)
		: ids;

const searchNFAFn = (nfa, string) =>
	anchorDollar(nfa, explode(string).reduce(
		searchNFAStep(nfa.nodes),
		allDirects(nfa.start, nfa.nodes),
	));

const searchNFA = (nfa, string) =>
	searchNFAFn(nfa, string).includes(nfa.end);

const nodeID = states =>
	[...new Set(states)]
		.toSorted((a, b) => a - b)
		.map(x => x.toString())
		.join("-");

const unNodeID = stateID =>
	stateID.split("-").map(Number);

const collectTransitions = (nfaNodes, states) => {
	const transitions = states.map(
		id => Object.entries(nfaNodes[id].transitions),
	).filter(t => t.length !== 0);
	if (!transitions.every(t => t.length === 1)) {
		throw new InternalError(
			"unexpected transitions with more than 1 key",
		);
	}
	const groupedTransitions = Object.groupBy(
		transitions.map(t => t[0]),
		([key, stateID]) => key,
	);
	return mapValues(
		groupedTransitions,
		entry =>
			[
				...new Set(entry.map(([_, id]) =>
					allDirects(id, nfaNodes)).flat()),
			].toSorted(),
	);
};

const computePending = (pending, transitions) =>
	[...new Set(
		pending.slice(1).concat(transitions)
			.map(nodeID))
	].toSorted().map(unNodeID);

const toDFANodes = (end, nfaNodes, pending, dfaNodes) => {
	if (pending.length === 0) {
		return dfaNodes;
	}

	const states  = pending[0];
	const stateID = nodeID(states);
	if (!!dfaNodes[stateID]) {
		return toDFANodes(end, nfaNodes, pending.slice(1), dfaNodes);
	}

	const transitions = collectTransitions(nfaNodes, states);
	const newPending = computePending(pending, Object.values(transitions));
	return toDFANodes(end, nfaNodes, newPending, {
		...dfaNodes,
		[stateID]: {
			end: states.includes(end),
			transitions: mapValues(transitions, nodeID),
		},
	});
};

const toDFA = nfa => {
	const start = allDirects(nfa.start, nfa.nodes);
	return {
		start: nodeID(start),
		nodes: toDFANodes(nfa.end, nfa.nodes, [start], {}),
	};
};

const searchDFAStep = nodes => (state, char) =>
	nodes[state].transitions[char] || reduced(false);

const searchDFAFn = (dfa, string) =>
	reduce(explode(string), searchDFAStep(dfa.nodes), dfa.start);

const searchDFA = (dfa, string) =>
	!!dfa.nodes[searchDFAFn(dfa, string)]?.end

const parse = regex =>
	toPostfix(tokenizeRegex(explode(regex)));

const compileNFA = regex =>
	buildNFA(parse(regex));

const compileDFA = regex =>
	toDFA(compileNFA(regex));

export const compile = regex => {
	const nfa = compileDFA(regex);
	return string => searchDFA(nfa, string);
};