import { extractImageUrl, hashString } from './utils';
import { kmeans } from './kmeans';

const stopWords = new Set([
  "i",
  "to",
  "me",
  "my",
  "myself",
  "we",
  "our",
  "ours",
  "ourselves",
  "you",
  "your",
  "yours",
  "yourself",
  "yourselves",
  "he",
  "him",
  "his",
  "himself",
  "she",
  "her",
  "hers",
  "herself",
  "it",
  "its",
  "itself",
  "they",
  "them",
  "their",
  "theirs",
  "themselves",
  "what",
  "which",
  "who",
  "whom",
  "this",
  "that",
  "these",
  "those",
  "am",
  "is",
  "are",
  "was",
  "were",
  "be",
  "been",
  "being",
  "have",
  "has",
  "had",
  "having",
  "do",
  "does",
  "did",
  "doing",
  "a",
  "an",
  "the",
  "and",
  "but",
  "if",
  "or",
  "because",
  "as",
  "until",
  "while",
  "of",
  "at",
  "by",
  "for",
  "with",
  "about",
  "into",
  "again",
  "then",
  "here",
  "there",
  "when",
  "where",
  "why",
  "how",
  "such",
  "so",
  "than",
  "too",
  "very",
  "will",
  "just",
  "should",
  "now",
]);

function zeros(n=0) {
  const res = [];
  for (let i = 0; i < n; i++) {
    res.push(0);
  }
  return res;
}

function isSpaces(str) {
  return !str.trim();
}

async function getEmbedding(messages) {
  console.log('sending request to the cloud')
  const response = await fetch('https://us-central1-prismia.cloudfunctions.net/embed', {
    method: 'POST',
    headers: {
      'Content-Type': 'application/json',
    },
    body: JSON.stringify({
      messages: messages.map(m => m.textContent),
    }),
  });
  console.log('reply received');
  return response.json();
}

export async function cluster(docs, seedDocs=[], key='text', k=3, clusterIndices=[], fast=false, cloud=false) {
  if (!docs || !docs.length) return;
  if (docs.length < k) k = docs.length;
  if (!clusterIndices.length) {
    clusterIndices = docs.map(d => -1);
  }
  const docsLength = docs.length;
  if (seedDocs.length) {
    for (let i = 0; i < seedDocs.length; i++) {
      for (let j = 0; j < (seedDocs?.[i]?.length || 0); j++) {
        clusterIndices.push(i);
        docs.push(seedDocs[i][j]);
      }
    }
  }
  const clusters = [];
  for (let i = 0; i < k + 1; i++) {
    clusters.push([]);
  }
  let vectors;
  if (cloud) {
    vectors = await getEmbedding(docs);
  } else {
    const { tokenCounts, wordKeys } = tokenize(docs, key);
    vectors = vectorize(docs, key, tokenCounts, wordKeys);
  }
  //const kMeansClusters = skmeans(vectors, k, null, 10);
  const indicesWithInboxHoldsErased = clusterIndices.map(
    idx => idx === k ? -1 : idx
  );
  //const kMeansClusters = skmeans(vectors, k, 10, indicesWithInboxHoldsErased);
  const kMeansClusters = kmeans({
    points: vectors, 
    clusters: indicesWithInboxHoldsErased, 
    k, 
    maxiter: 10,
    numTries: fast ? 1 : 10,
  });
  for (let i = 0; i < docsLength; i++) {
    if (clusterIndices[i] === k) {
      kMeansClusters.clusters[i] = k;
    }
  }
  for (let i = 0; i < docsLength; i++) {
    let docIndex = kMeansClusters.clusters[i];
    if (clusters[docIndex]) {
      clusters[docIndex].push(docs[i]);
    }
  }
  return clusters;
  function tokenize(docs, key) {
    const tokenCounts = {};
    const wordKeys = {};
    let n = 0;
    for (let i = 0; i < docs.length; i++) {
      const text = stringifyMessage(docs[i], key)
      const t = text.replace(/^[\w|🕔|🤷]️/g, ' ').split(' ');
      // t is just the array of words from a message (doc)
      for (let j = 0; j < t.length; j++) {
        const token = t[j].toLowerCase();
        if (isSpaces(token)) continue;
        if (stopWords.has(token)) continue;
        if (!tokenCounts[token]) {
          tokenCounts[token] = 0;
          wordKeys[token] = n;
          n++;
        }
        tokenCounts[token]++;
      }
    }
    return { tokenCounts, wordKeys };
  }
  function vectorize(docs, key, tokenCounts, wordKeys) {
    const res = [];
    const vectorLength = Object.keys(tokenCounts).length;
    for (let i = 0; i < docs.length; i++) {
      let z = zeros(vectorLength);
      const text = stringifyMessage(docs[i], key)
      let t = text.replace(/^[\w|🕔|🤷]️/g, ' ').split(' ');
      // t here is just an array of words in a message
      for (let j = 0; j < t.length; j++) {
        // not sure if I should be incrementing or just setting to 1
        const token = t[j].toLowerCase();
        if (isSpaces(token)) continue;
        if (!t[j]) continue;
        if (stopWords.has(token)) continue;
        //z[wordKeys[t[j]]]++;
        z[wordKeys[token]] = tokenCounts[token];
      }
      res.push(z);
    }
    return res;
  }
  function stringifyMessage(doc, key) {
    let text = doc[key];
    if (doc.quillDelta) {
      const url = extractImageUrl(JSON.parse(doc.quillDelta));
      if (url) {
        text += " " + hashString(url);
      }
    }
    return text;
  }
}
