import { IS_NUMERIC_ID, IS_VIDEO_ID } from './regexp.js';

/**
 * Parses the query params to find the external article ID.
 * @param {string} host
 * @param {URLSearchParams} query
 * @returns {string}
 */
export default function getQueryId(host, query) {
	// don't use tracking ids from other tracking services to generate external-id
	const queryParamBlacklist = new Set([
		'utm_id', // google analytics
		'fbclid', // facebook
		'mc_cid', // mailchimp
		'mc_eid', // mailchimp
		'igshid', // instagram
		'msclkid', // microsoft
		'omnisendcontractid', // omnisend
		's_cid', // adobe
		'ef_id', // adobe
		'twclid', // twitter
	]);

	// convert to lowercase since Query Parameters are case sensitive, but our
	// matching is not, and we don't want to spend excessive time looping over
	// every case combination
	const filteredQuery = Object.fromEntries(
		Array.from(query.entries())
			.map(([key, value]) => [key.toLowerCase(), value])
			.filter(([key]) => queryParamBlacklist.has(key || '') === false),
	);

	if (Object.keys(filteredQuery).length < 1) {
		return '';
	}

	const paramKeys = Object.keys(filteredQuery);
	const paramValues = Object.values(filteredQuery);

	const allerIdIndex = paramKeys.findIndex((param) => param === 'allerid');

	if (allerIdIndex > -1) {
		const allerId = paramValues[allerIdIndex];
		return `${host}/${allerId}`;
	}

	const candidates = paramKeys.filter(findCandidates);

	if (candidates.length > 0 && candidates[0]) {
		// picking only the first candidate found here as per original code
		// TODO: Look into selecting the best match
		const foundIdIndex = paramKeys.indexOf(candidates[0]);
		const idValue = paramValues[foundIdIndex];
		const cleanedIdValue = cleanId(idValue);
		if (cleanedIdValue) {
			return `${host}/${cleanedIdValue}`;
		}
	}

	return '';
}

/**
 * Determines if a query parameter key is a candidate for being an ID
 * @param {string} key - The query parameter key to check
 * @returns {boolean} - Whether the key is a candidate for being an ID
 */
function findCandidates(key) {
	const BLACKLIST = new Set(['srwid', 'fylkeid', 'wt.mc_id']);

	const EXACT_WHITELIST = new Set(['id', 'p', 'post', 'playvideo']);

	const PATTERN_WHITELIST = ['id', 'artikkel'];

	if (BLACKLIST.has(key)) {
		return false;
	}

	if (EXACT_WHITELIST.has(key)) {
		return true;
	}

	return PATTERN_WHITELIST.some((pattern) => key.includes(pattern));
}

/**
 * Tries to clean up the IDs.
 * @param {string} id
 * @returns {string}
 */
function cleanId(id) {
	// Match the correct part of an ID candidate to clean it.
	const numericId = IS_NUMERIC_ID.exec(id);
	const videoId = IS_VIDEO_ID.exec(id);

	// Plain numeric IDs are OK
	if (numericId && numericId.length !== 0) {
		return numericId[0];
	}

	// Video IDs are OK
	if (videoId && videoId.length !== 0) {
		return videoId[0];
	}

	// Cleans IDs from paths with broken queries: ?p=4216?xtor=RSS-2
	if (id.includes('?')) {
		return id
			.split('?')
			.filter((part) => !isNaN(Number(part)))
			.reduce((a, b) => (b.length > a.length ? b : a));
	}

	// The ID is already fine.
	return id;
}
