import distance from 'jaro-winkler';

import { Utterance } from '@autocut/types/Deepgram';

const DISSIMILARITY_COUNTER_TRESHOLD = 2;
const DISSIMILARITY_SCORE_TRESHOLD = 0.725;
const SEMANTIC_SCORE_COEFF = 0.275;

const dot = (a: number[], b: number[]) =>
  a.map((_, i) => a[i] * b[i]).reduce((m, n) => m + n);

const computeSimilarityScore = (
  group: Array<Utterance>,
  sentenceToCompare: Utterance
) => {
  // Suming the embedding of every sentences in the group and normalizing the result to compare the group as a whole against the new sentence
  const embedVectorOfGroup = group[0].embedding.slice();
  for (let i = 1; i < group.length; i++) {
    for (const [index, embeddingValue] of group[i].embedding.entries()) {
      embedVectorOfGroup[index] += embeddingValue;
    }
  }

  const squareRootOfSumOfSquare = Math.sqrt(
    embedVectorOfGroup.reduce((sum, val) => sum + val * val, 0)
  );
  const normalizedEmbedVectorOfGroup = embedVectorOfGroup.map(
    value => value / squareRootOfSumOfSquare
  );
  const semanticScore = dot(
    normalizedEmbedVectorOfGroup,
    sentenceToCompare.embedding
  );

  // Computing the mean similarity of the new sentence against the group
  let similaritySum = 0;
  for (const groupSentence of group) {
    const jaroWinklerDistance = distance(
      groupSentence.transcript,
      sentenceToCompare.transcript
    );
    similaritySum += jaroWinklerDistance;
  }
  const jaroWinklerScore = similaritySum / group.length;

  // Computing a similarity score with the weighted result previously obtained
  const similarityScore =
    semanticScore * SEMANTIC_SCORE_COEFF +
    jaroWinklerScore * (1 - SEMANTIC_SCORE_COEFF);

  return similarityScore > DISSIMILARITY_SCORE_TRESHOLD;
};

const addIsCutProperty = (groupedUtterances: Array<Utterance[]>) => {
  for (const utterances of groupedUtterances) {
    for (const [index, utterance] of utterances.entries()) {
      const isLastUtteranceOfGroup = index !== utterances.length - 1;
      for (const word of utterance.words) {
        word.isCut = isLastUtteranceOfGroup;
      }
    }
  }
};

export const delimitTakes = (utterances: Array<Utterance>) => {
  let indexFirstSentenceOfGroup = 0;
  const groups = [];
  let areAllGrouped = false;

  while (!areAllGrouped) {
    // Iteratively building groups of bad takes based on the similarity of said sentences
    let currentGroup = [utterances[indexFirstSentenceOfGroup]];
    let indexMarkerEndOfGroup = indexFirstSentenceOfGroup + 1;
    let dissimilarityCounter = 0;
    let isComparisonDone = false;
    let indexComparison = indexFirstSentenceOfGroup + 1;

    while (!isComparisonDone) {
      const currentSentence = utterances[indexComparison];
      const similarityDecision = computeSimilarityScore(
        currentGroup,
        currentSentence
      );

      // If the new sentence is similar to the current group, it is added and we're moving on to the next sentence
      if (similarityDecision) {
        indexComparison++;
        indexMarkerEndOfGroup = indexComparison;
        dissimilarityCounter = 0;
        currentGroup = utterances.slice(
          indexFirstSentenceOfGroup,
          indexMarkerEndOfGroup
        );
        // If it is not similar enough
      } else {
        // We are either skipping it if we did not meet DISSIMILARITY_COUNTER_TRESHOLD dissimilar sentences in a row
        if (dissimilarityCounter < DISSIMILARITY_COUNTER_TRESHOLD) {
          dissimilarityCounter++;
          indexComparison++;
          // Or we are closing the current group with the last similar sentence found and starting again from that point
        } else {
          groups.push(currentGroup);
          indexFirstSentenceOfGroup = indexMarkerEndOfGroup;
          isComparisonDone = true;
        }
      }

      if (indexComparison === utterances.length) {
        groups.push(currentGroup);
        indexFirstSentenceOfGroup = indexMarkerEndOfGroup;
        isComparisonDone = true;
      }
    }

    if (indexFirstSentenceOfGroup === utterances.length - 1) {
      groups.push([utterances[utterances.length - 1]]);
      areAllGrouped = true;
    }
    if (indexFirstSentenceOfGroup === utterances.length) {
      areAllGrouped = true;
    }
  }

  addIsCutProperty(groups);

  return groups;
};
