import {
	TRIM_EXTENSION,
	CLEAN_CHARS,
	IS_NUMERIC_ID,
	IS_ROUND_NUMBER,
	IS_NUMERIC_LEADING_CHAR_ID,
	IS_NUMERIC_TRAILING_CHAR_ID,
	IS_HASH_ID,
	IS_YEAR,
	IS_VIDEO_ID,
} from './regexp.js';

/**
 * Parses the URL path to find the external article ID.
 * @param {string} host
 * @param {string} path
 * @returns {string}
 */
export default function getPathId(host, path) {
	// Clean the URL, split on space and look for potential candidates.
	const candidates = path
		.replace(TRIM_EXTENSION, '')
		.replace(CLEAN_CHARS, ' ')
		.split(' ')
		.filter((pathPart) => findCandidates(pathPart, path))
		.reverse();

	let id = '';
	// Only one candidate, clean it.
	if (candidates.length === 1 && candidates[0]) {
		id = cleanId(candidates[0], path) || '';
	}

	// Multiple candidates, decide on one and clean it.
	if (candidates.length > 1) {
		const pickedId = pickId(candidates);
		if (pickedId) {
			id = cleanId(pickedId, path) || '';
		}
	}

	// Id found in path, create ID from it.
	if (id) {
		return `${host}/${id}`;
	}

	// No ID found in path.
	return '';
}

/**
 * Check if every candidate looks like something that could be an ID.
 * @param {string} pathPart
 * @param {string} path
 * @returns {boolean}
 */
function findCandidates(pathPart, path) {
	// Round numbers are most likely not IDs. It can happen, but it's better to
	// generate a fallback ID in these cases rather than creating a incorrect ID.
	if (IS_ROUND_NUMBER.test(pathPart)) {
		return false;
	}

	// Looks like a year (2000, ... , 2099), consider not a candidate.
	if (IS_YEAR.test(pathPart)) {
		return false;
	}

	// Looks like a hash
	if (IS_HASH_ID.test(pathPart)) {
		return true;
	}

	// Contains at least 5 numbers and possibly a dot and is in a position where it's likely to be an ID.
	if (IS_NUMERIC_ID.test(pathPart) && isInIdPosition(pathPart, path)) {
		return true;
	}

	// Is combination ID from sites that use ../i/<combination_id>/..-format
	if (isCombinationId(pathPart, path, '/i/', 6)) {
		return true;
	}

	// Is combination ID from sites that use ../a/<combination_id>/..-format
	if (isCombinationId(pathPart, path, '/a/', 6)) {
		return true;
	}

	// Is combination ID from sites that use ../video/<combination_id>/..-format
	if (isCombinationId(pathPart, path, '/video/', 11)) {
		return true;
	}

	if (path.includes('/video/') && IS_VIDEO_ID.test(pathPart)) {
		return true;
	}

	// Not a candidate.
	return false;
}

/**
 * Tries to clean up the IDs.
 * @param {string} id
 * @param {string} path
 * @returns {string}
 */
function cleanId(id, path) {
	if (!path) {
		path = '';
	}
	// NRK-style ID: 1.14061418
	if (id.includes('1.') && id.charAt(1) === '.') {
		return id;
	}

	// Svalbardposten-style ID: 19.9886
	if (id.includes('19.') && id.charAt(2) === '.') {
		const splitId = id.split('.');
		return splitId[1] || id;
	}

	// If the ID has a dot, pick the correct part of it.
	// It could also be a number (100.000), so make sure that the ID part has at
	// least length of five, otherwise don't consider it as an ID anymore.
	if (id.includes('.')) {
		return id.split('.').reduce((_currentId, candidate) => {
			if (candidate.length >= 5) {
				return candidate;
			}
			return '';
		});
	}
	// Match the correct part of an ID candidate to clean it.
	const hashId = IS_HASH_ID.exec(id);
	const numericId = IS_NUMERIC_ID.exec(id);
	const videoId = IS_VIDEO_ID.exec(id);
	const numericIdWithTrailingChar = IS_NUMERIC_TRAILING_CHAR_ID.exec(id);
	const numericIdWithLeadingChar = IS_NUMERIC_LEADING_CHAR_ID.exec(id);

	// If hash ID, keep the entire thing.
	if (hashId && hashId.length !== 0) {
		return hashId[0];
	}

	// IDs with a single trailing char (12345b) are OK
	if (numericIdWithTrailingChar && numericIdWithTrailingChar.length !== 0) {
		return numericIdWithTrailingChar[0];
	}

	// IDs with a single leading char (b12345) are OK
	if (
		numericIdWithLeadingChar &&
		numericIdWithLeadingChar.length !== 0 &&
		id.length === numericIdWithLeadingChar[0].length
	) {
		return numericIdWithLeadingChar[0];
	}

	// Video IDs are OK if path includes /video/
	if (path.includes('/video/') && videoId && videoId.length !== 0) {
		return videoId[0];
	}

	// Numeric IDs are OK
	if (numericId && numericId.length !== 0) {
		return numericId[0];
	}

	// The ID is already fine.
	return id;
}

/**
 * Picks the best ID from an array of ID candidates.
 * @param {string[]} candidates
 * @returns {string}
 */
function pickId(candidates) {
	// Pick the first ID as the best candidate so far.
	let bestId = candidates[0];
	for (const candidate of candidates) {
		const numericId = IS_NUMERIC_ID.exec(candidate);
		if (numericId && numericId[0]) {
			const potentialId = numericId[0];
			// Check if it's longer than current best ID
			if ((bestId?.length || 0) < potentialId.length) {
				bestId = potentialId;
			}
		}
	}

	return bestId || '';
}

/**
 * Handles combination IDs from sites that use ../<idIdentification>/<combination_id>/..-format
 *
 * Examples:
 * https://www.vg.no/sport/alpint/i/Xwe06W/jesper-saltvik-pedersen-tok-norges-foerste-gull => vg.no/Xwe06W
 * http://www.aftonbladet.se/sportbladet/hockey/a/xREzoR/rankning-25-basta-varvningarna-i-shl => aftonbladet.se/xREzoR
 * https://www.dagbladet.no/video/bmJszChBR5M => dagbladet.no/bmJszChBR5M
 * @param {string} pathPart
 * @param {string} path
 * @param {string} idIdentification
 * @param {number} idLength
 * @returns {boolean}
 */
function isCombinationId(pathPart, path, idIdentification, idLength) {
	if (path.includes(idIdentification)) {
		const position = path.indexOf(idIdentification);
		const sliceSize = position + idIdentification.length;
		const slicedId = path.slice(sliceSize, sliceSize + idLength);

		if (slicedId.includes('-') || slicedId.includes('_')) {
			return false;
		}

		if (slicedId === pathPart) {
			return true;
		}

		// In some cases, the ID is only 5 chars
		if (slicedId.slice(0, 5) === pathPart) {
			return true;
		}
	}

	return false;
}

/**
 * Check if the candidate is positioned in the path where it's likely to find an ID.
 * @param {string} pathPart
 * @param {string} path
 * @returns {boolean}
 */
function isInIdPosition(pathPart, path) {
	const numericId = IS_NUMERIC_ID.exec(pathPart);
	let id = pathPart;

	if (numericId) {
		[id] = numericId;
	}

	const startIndex = path.indexOf(id);
	const endIndex = startIndex + id.length;
	const charBefore = path.charAt(startIndex - 1);
	const charAfter = path.charAt(endIndex);

	// Surrounded by dashes, most likely the part of a title:
	// /music/2014/may/12/fan-led-aphex-twin-kickstarter-campaign-raises-67424-caustic-window
	if (charBefore === '-' && charAfter === '-') {
		return false;
	}

	return true;
}
