import titleTokenizer from '../search/tokenizers/title-tokenizer';

const STOP_WORDS = new Set([
  'a',
  'about',
  'above',
  'after',
  'again',
  'against',
  'all',
  'am',
  'an',
  'and',
  'any',
  "aren't",
  'as',
  'at',
  'be',
  'because',
  'been',
  'before',
  'being',
  'below',
  'between',
  'both',
  'but',
  'by',
  'can',
  "can't",
  'cannot',
  'could',
  "couldn't",
  'did',
  "didn't",
  'do',
  'does',
  "doesn't",
  'doing',
  "don't",
  'down',
  'during',
  'each',
  'few',
  'for',
  'from',
  'further',
  'go',
  'had',
  "hadn't",
  'has',
  "hasn't",
  'have',
  "haven't",
  'having',
  'he',
  "he'd",
  "he'll",
  "he's",
  'her',
  'here',
  "here's",
  'hers',
  'herself',
  'him',
  'himself',
  'his',
  'how',
  "how's",
  'i',
  "i'd",
  "i'll",
  "i'm",
  "i've",
  'if',
  'in',
  'into',
  'is',
  "isn't",
  'it',
  "it's",
  'its',
  'itself',
  "let's",
  'me',
  'more',
  'most',
  "mustn't",
  'my',
  'myself',
  'no',
  'nor',
  'not',
  'of',
  'off',
  'on',
  'once',
  'only',
  'or',
  'other',
  'ought',
  'our',
  'ours',
  'ourselves',
  'out',
  'over',
  'own',
  'same',
  "shan't",
  'she',
  "she'd",
  "she'll",
  "she's",
  'should',
  "shouldn't",
  'so',
  'some',
  'such',
  'than',
  'that',
  "that's",
  'the',
  'their',
  'theirs',
  'them',
  'themselves',
  'then',
  'there',
  "there's",
  'these',
  'they',
  "they'd",
  "they'll",
  "they're",
  "they've",
  'this',
  'those',
  'through',
  'to',
  'too',
  'under',
  'until',
  'up',
  'very',
  'was',
  "wasn't",
  'we',
  "we'd",
  "we'll",
  "we're",
  "we've",
  'were',
  "weren't",
  'what',
  "what's",
  'when',
  "when's",
  'where',
  "where's",
  'which',
  'while',
  'who',
  "who's",
  'whom',
  'whose',
  'why',
  "why's",
  'will',
  'with',
  "won't",
  'would',
  "wouldn't",
  'you',
  "you'd",
  "you'll",
  "you're",
  "you've",
  'your',
  'yours',
  'yourself',
  'yourselves',
]);

const MIN_WORD_LENGTH = 2;

function isValidLength(word: string): boolean {
  return word.length >= MIN_WORD_LENGTH;
}

function isValidWord(word: string): boolean {
  return isValidLength(word) && !STOP_WORDS.has(word);
}

export function highlightTokenizer(query: string): Array<string> {
  const processedTokens: string[] = [];

  const rawTokens = titleTokenizer(query);

  function traverseToken(
    word: string,
    i: number,
    scopeRawTokens: string[],
  ): void {
    const nextWord = scopeRawTokens[i + 1] || '';
    const prevWord = scopeRawTokens[i - 1] || '';

    if (isValidWord(word)) {
      const subTokens = titleTokenizer(word);

      if (subTokens.length > 1) {
        subTokens.forEach((el, i) => traverseToken(el, i, subTokens));
      } else {
        processedTokens.push(subTokens.at(0) ?? word); // Add non-stop words directly
      }
    } else {
      if (isValidWord(nextWord)) {
        processedTokens.push(`${word} ${nextWord}`);
      }
      if (isValidWord(prevWord)) {
        processedTokens.push(`${prevWord} ${word}`);
      }
    }
  }

  rawTokens.forEach((el, i) => traverseToken(el, i, rawTokens));

  return processedTokens;
}
