import {
	butlast, explode, last, mapValues, max, reduce, reduced, repeat,
} from "sjs";


export class SyntaxError extends Error {}

const ConcatStep = {
	ACCEPTING: "accepting",
	ESCAPING:  "escaping",
};

const nonConcatOperators = new Set(["*", "+", "?", "|", ")"]);

const shouldConcat = (char, next) =>
	next !== undefined &&
	char !== "(" &&
	char !== "|" &&
	!nonConcatOperators.has(next);

const isOperator = char =>
	nonConcatOperators.has(char) || char == "(";

const tokenizeRegexStep = chars => ({ out, state }, char, index) => {
	const next = chars[index + 1];
	const maybeConcat = shouldConcat(char, next)
		? [{operator: "concat"}]
		: [];

	if (state === ConcatStep.ESCAPING) {
		return {
			out: out.concat(char, maybeConcat),
			state: ConcatStep.ACCEPTING,
		};
	}

	if (char === "\\") {
		if (next === undefined) {
			throw new SyntaxError(
				`bad trailing escape character: ${chars}`,
			);
		}

		return {
			out,
			state: ConcatStep.ESCAPING,
		};
	}

	const op = isOperator(char) ? { operator: char } : char;
	return {
		out: out.concat(op, maybeConcat),
		state,
	};
};

const tokenizeRegexFn = chars =>
	chars.reduce(tokenizeRegexStep(chars), {
		out:   [],
		state: ConcatStep.ACCEPTING,
	});

const tokenizeRegex = chars =>
	tokenizeRegexFn(chars).out;

const PRECEDENCE = {
	"(":      4,
	"*":      3,
	"+":      3,
	"?":      3,
	"concat": 2,
	"|":      1,
};

const shouldPush = (stack, token) =>
	stack.length       === 0   ||
	token.operator     === "(" ||
	(stack[0].operator === "(" && token.operator !== ")");

const findLowerPrecedenceItem = (stack, token) =>
	stack.findIndex(el =>
		el.operator === "(" ||
			PRECEDENCE[el.operator] < PRECEDENCE[token.operator],
	);

const toPostfixStep = ({ out, stack }, token, _index, tokens) => {
	if (typeof token === "string") {
		return {
			out: out.concat(token),
			stack,
		};
	}

	if (shouldPush(stack, token)) {
		return {
			out,
			stack: [token].concat(stack),
		};
	}

	if (token.operator === ")") {
		const openParenIndex = stack.findIndex(o => o.operator === "(");
		if (openParenIndex === -1) {
			throw new SyntaxError(
				`close parens without opening: ${tokens}`,
			);
		}

		const ops = stack.slice(0, openParenIndex);
		const leftover = stack.slice(openParenIndex + 1);
		return {
			out:   out.concat(ops),
			stack: leftover,
		};
	}

	const idx = findLowerPrecedenceItem(stack, token);
	if (idx < 0) {
		return {
			out:   out.concat(stack),
			stack: [token],
		};
	} else {
		const ops = stack.slice(0, idx);
		const leftover = stack.slice(idx);
		return {
			out:   out.concat(ops),
			stack: [token].concat(leftover),
		};
	}
};

const toPostfix = tokens => {
	const { out, stack } = tokens.reduce(toPostfixStep, {
		out:   [],
		stack: [],
	});
	return out.concat(stack);
};

const emptyNFA = () => {
	const startID = 1;
	const endID   = 2;
	const nextID  = 3;
	return {
		start: startID,
		end:   endID,
		nextID,
		nodes: {
			[startID]: {
				direct:      [ endID ],
				transitions: {},
			},
			[endID]:   {
				direct:      [],
				transitions: {},
			},
		}
	};
};

const baseNFA = (edge, id) => {
	const startID = id + 0;
	const endID   = id + 1;
	const nextID  = id + 2;
	return {
		start: startID,
		end:   endID,
		nextID,
		nodes: {
			[startID]: {
				direct:      [],
				transitions: {
					[edge]: endID,
				},
			},
			[endID]: {
				direct:      [],
				transitions: {},
			},
		},
	};
};

const concat = (lhs, rhs) => ({
	start:  lhs.start,
	end:    rhs.end,
	nextID: max(lhs.nextID, rhs.nextID),
	nodes: {
		...lhs.nodes,
		...rhs.nodes,
		[lhs.end]: {
			...lhs.nodes[lhs.end],
			direct: lhs.nodes[lhs.end].direct.concat([
				rhs.start,
			]),
		}
	},
});

const union = (lhs, rhs) => {
	const startID = max(lhs.nextID, rhs.nextID);
	const endID   = startID + 1;
	const nextID  = startID + 2;
	return {
		start: startID,
		end:   endID,
		nextID,
		nodes: {
			...lhs.nodes,
			...rhs.nodes,
			[lhs.end]: {
				...lhs.nodes[lhs.end],
				direct: lhs.nodes[lhs.end].direct.concat([
					endID,
				]),
			},
			[rhs.end]: {
				...rhs.nodes[rhs.end],
				direct: lhs.nodes[lhs.end].direct.concat([
					endID,
				]),
			},
			[startID]: {
				direct:      [lhs.start, rhs.start],
				transitions: {},
			},
			[endID]: {
				direct:      [],
				transitions: {},
			},
		},
	};
};

const zeroOrMore = nfa => {
	const startID = nfa.nextID;
	const endID   = startID + 1;
	const nextID  = startID + 2;
	return {
		start: startID,
		end:   endID,
		nextID,
		nodes: {
			...nfa.nodes,
			[nfa.end]: {
				...nfa.nodes[nfa.end],
				direct: nfa.nodes[nfa.end].direct.concat([
					startID,
				]),
			},
			[startID]: {
				direct:      [ nfa.start, endID ],
				transitions: {},
			},
			[endID]: {
				direct:      [],
				transitions: {},
			},
		},
	};
};

const oneOrMore = nfa =>
	concat(nfa, zeroOrMore(nfa));

const zeroOrOne = nfa => {
	const startID = nfa.nextID;
	const endID   = startID + 1;
	const nextID  = startID + 2;
	return {
		start: startID,
		end:   endID,
		nextID,
		nodes: {
			...nfa.nodes,
			[nfa.end]: {
				...nfa.nodes[nfa.end],
				direct: nfa.nodes[nfa.end].direct.concat([
					endID,
				]),
			},
			[startID]: {
				direct:      [ nfa.start, endID ],
				transitions: {},
			},
			[endID]: {
				direct:      [],
				transitions: {},
			},
		},
	};
};

const OPERATORS_FNS = ({
	zeroOrMoreFn = zeroOrMore,
	oneOrMoreFn  = oneOrMore,
	zeroOrOneFn  = zeroOrOne,
	unionFn      = union,
	concatFn     = concat,
} = {}) => ({
	"*": stack => butlast(stack).concat(zeroOrMoreFn(last(stack))),
	"+": stack => butlast(stack).concat( oneOrMoreFn(last(stack))),
	"?": stack => butlast(stack).concat(zeroOrOneFn (last(stack))),
	"|": stack => butlast(butlast(stack)).concat(unionFn(
		last(butlast(stack)),
		last(stack),
	)),
	"concat": stack => butlast(butlast(stack)).concat(concatFn(
		last(butlast(stack)),
		last(stack),
	)),
});

const OPERATORS = OPERATORS_FNS();

const buildNFAStep = (stack, token) =>
	typeof token === "string"
		? stack.concat(baseNFA(token, last(stack)?.nextID || 1))
		: OPERATORS[token.operator](stack);

const buildNFA = tokens =>
	tokens.length === 0
		? emptyNFA()
		: last(tokens.reduce(buildNFAStep, []));

const allDirects = (stateID, nodes) =>
	nodes[stateID].direct.length === 0
		? [ stateID ]
		: [...new Set(
			nodes[stateID].direct.map(
				id => allDirects(id, nodes),
			).flat(),
		)].toSorted();

const searchNFAStep = nodes => (directStates, char) =>
	[...new Set(
		directStates
			.map(id => nodes[id].transitions[char])
			.filter(x => !!x)
			.map(id => allDirects(id, nodes))
			.flat()
	)].toSorted();

const searchNFAFn = (nfa, string) =>
	explode(string).reduce(
		searchNFAStep(nfa.nodes),
		allDirects(nfa.start, nfa.nodes),
	);

const searchNFA = (nfa, string) =>
	searchNFAFn(nfa, string).includes(nfa.end);

const nodeID = states =>
	[...new Set(states)]
		.toSorted((a, b) => a - b)
		.map(x => x.toString())
		.join("-");

const unNodeID = stateID =>
	stateID.split("-").map(Number);

const collectTransitions = (nfaNodes, states) => {
	const transitions = states.map(
		id => Object.entries(nfaNodes[id].transitions),
	).filter(t => t.length !== 0);
	if (!transitions.every(t => t.length === 1)) {
		throw new Error("unexpected transitions with more than 1 key");
	}
	const groupedTransitions = Object.groupBy(
		transitions.map(t => t[0]),
		([key, stateID]) => key,
	);
	return mapValues(
		groupedTransitions,
		entry =>
			[
				...new Set(entry.map(([_, id]) =>
					allDirects(id, nfaNodes)).flat()),
			].toSorted(),
	);
};

const computePending = (pending, transitions) =>
	[...new Set(
		pending.slice(1).concat(transitions)
			.map(nodeID))
	].toSorted().map(unNodeID);

const buildDFANodes = (end, nfaNodes, pending, dfaNodes) => {
	if (pending.length === 0) {
		return dfaNodes;
	}

	const states  = pending[0];
	const stateID = nodeID(states);
	if (!!dfaNodes[stateID]) {
		return buildDFANodes(end, nfaNodes, pending.slice(1), dfaNodes);
	}

	const transitions = collectTransitions(nfaNodes, states);
	const newPending = computePending(pending, Object.values(transitions));
	return buildDFANodes(end, nfaNodes, newPending, {
		...dfaNodes,
		[stateID]: {
			end: states.includes(end),
			transitions: mapValues(transitions, nodeID),
		},
	});
};

const buildDFA = nfa => {
	const start = allDirects(nfa.start, nfa.nodes);
	return {
		start: nodeID(start),
		nodes: buildDFANodes(nfa.end, nfa.nodes, [start], {}),
	};
};

const searchDFAStep = nodes => (state, char) =>
	nodes[state].transitions[char] || reduced(false);

const searchDFAFn = (dfa, string) =>
	reduce(explode(string), searchDFAStep(dfa.nodes), dfa.start);

const searchDFA = (dfa, string) =>
	!!dfa.nodes[searchDFAFn(dfa, string)]?.end

const compileNFA = regex =>
	buildNFA(toPostfix(tokenizeRegex(explode(regex))));

const compileDFA = regex =>
	buildDFA(compileNFA(regex));

export const compile = regex => {
	const nfa = compileDFA(regex);
	return string => searchDFA(nfa, string);
};