

import { replaceAll } from './util';

// tokenizeByLeadingCapitalization("ThePeteSantilli")
// returns ["The","Pete","Santilli","Show"]
function tokenizeByLeadingCapitalization(input_str) {
    const regexp = /[A-Z][a-z]+/g;
    const out = input_str.match(regexp);
    if (out == null || out.length <= 1) {
        return [input_str];
    }
    return out;
}


function calcCapitalRatio(input_str) {
    const n = input_str.length;
    if (n === 0) {
        return 0;
    }
    let cnt = 0;
    for (let i = 0; i < n; ++i) {
        const c = input_str.charAt(i);
        if (c.toUpperCase() === c) {
            cnt++;
        }
    }
    return cnt / n;
}

// tokenizeByGroupCaps("SGTReport") return ["SGT", "Report"];
function tokenizeByGroupCaps(input_str) {
    if (calcCapitalRatio(input_str) > .5) {
        return [input_str];
    }
    const regexp = /[A-Z]+/;
    const out = input_str.match(regexp);
    if (out === null || out.length === 0 || out[0].length < 4) {
        return [input_str];
    }
    const index = input_str.indexOf(out[0]);
    const split_index = index + out[0].length - 1;
    const a = input_str.substring(0, split_index);
    const b = input_str.substring(split_index, input_str.length);
    return [a, b];
}


function tokenize(input_str) {
    input_str = input_str.replace(/#/g, ' ').replace(/_/g, ' ').replace(/\"/g, ' '); // eslint-disable-line
    const tokens = input_str.split(/\s+/);
    const out_tokens = [];
    tokens.forEach((tok) => {
        const finer_tokens = tokenizeByLeadingCapitalization(tok);
        finer_tokens.forEach(tok2 => {
            const toks = tokenizeByGroupCaps(tok2);
            toks.forEach(tok3 => out_tokens.push(tok3.toLowerCase()));
        });
    });
    return out_tokens;
}

// Returns a map of characters to counts:
// denormalize('aab') returns a Map({'a':2, 'b': 1})
function denormalize(input_str) {
    input_str = input_str.toLowerCase(); // eslint-disable-line
    const list_remove_chars = ['#', '_', ' ', '"', ',', '.'];
    list_remove_chars.forEach((elem) => {
        input_str = replaceAll(input_str, elem, '');  // eslint-disable-line
    });
    const out_map = new Map();
    input_str.split('').forEach((c) => {
        if (!out_map.has(c)) {
            out_map.set(c, 1);
        } else {
            out_map.set(c, out_map.get(c) + 1);
        }
    });
    return out_map;
}

// Sorts by first element
// sortRankArray([[4, "a"], [1, "c"]]) -> [[1, "c'], [4, "a"]]
function sortRankArray(arr) {
    arr.sort((a, b) => b[0] - a[0]);
}

function compareDenormalized(query_dn, data_dn) {
    let score = 0;
    query_dn.forEach((cnt, c) => {
        const cnt2 = data_dn.get(c) || 0;
        score += Math.min(cnt2, cnt) / (cnt2 + cnt);
    });
    return score;
}

export function filterByDate(search_json, date_range) {
    if (typeof date_range[0] === "string") {
        date_range[0] = Date.parse(date_range[0]);  // eslint-disable-line no-param-reassign
    }
    if (typeof date_range[1] === "string") {
        date_range[1] = Date.parse(date_range[1]);  // eslint-disable-line no-param-reassign
    }
    const column_names = search_json[0];
    const date_published_idx = column_names.indexOf('date_published');
    const out = search_json.slice(0, 1);
    const begin_date = date_range[0];
    const end_date = date_range[1];
    for (let i = 1; i < search_json.length; ++i) {
        const row = search_json[i];
        const date_str = row[date_published_idx];
        const vid_date = Date.parse(date_str);
        if (begin_date <= vid_date && vid_date <= end_date) {
            out.push(row);
        }
    }
    return out;
}

export function searchVideos(query, search_json, date_range) {
    const MIN_THRESHOLD = 3;  // Experimentally chosen.
    if (query.length < 2) {
        return [];
    }
    if (search_json === null) {  // Hasn't come in yet.
        console.log("Search cancelled because data hasn't loaded yet.");
        return [];
    }
    // console.log('DoSearch');
    const query_tokens = tokenize(query);
    const query_dn = denormalize(query);
    if (!query_tokens.length) {
        return [];
    }
    if (search_json.length < 1) {
        return [];
    }
    const column_names = search_json[0];
    const channel_name_idx = column_names.indexOf('channel_name');
    const title_idx = column_names.indexOf('title');
    console.assert(channel_name_idx >= 0);
    const rankArray = [];  // Array of [score, "video_url"]

    if (date_range) {
        search_json = filterByDate(search_json, date_range);  // eslint-disable-line
    }

    for (let i = 1; i < search_json.length; ++i) {
        const row = search_json[i];
        const word_soup = [row[title_idx], row[channel_name_idx]];
        let score = 1;
        for (let j = 0; j < word_soup.length; ++j) {
            const multiplier = (j === 0) ? 1 : 2;
            let vid_words = word_soup[j];
            const vid_tokens = tokenize(vid_words);
            vid_words = vid_words.toLowerCase();
            const vid_words_dn = denormalize(vid_words);
            let inner_score = 0;
            query_tokens.forEach((tok) => {
                inner_score += compareDenormalized(query_dn, vid_words_dn) / 2;
                if (vid_words.includes(tok)) {
                    inner_score *= 4;
                }
                if (vid_tokens.includes(tok)) {
                    inner_score *= 16;
                }
            });
            score += inner_score * multiplier;
        }
        // Filter
        rankArray.push([score, row]);
    }
    sortRankArray(rankArray);
    const out_table = [column_names];
    rankArray.forEach((tup) => {
        if (tup[0] > MIN_THRESHOLD) {
            out_table.push(tup[1]);
        }
    });
    return out_table;
}

// output: {
//    today: [
//      vid1, vid2, ...
//    ],
//    this_week: [
//      vid1, vid2, ...
//    ],
//    trending: [
//      vid1, vid2, ...
//    ],
// }

export function searchAlgorithm(query, search_json) {
    // const end_date = Date.now();
    // const begin_date = end_date - (60*60*24 * 100);
    // const date_range = [begin_date, end_date];
    const all_vids = searchVideos(query, search_json, null);
    const out_table = all_vids.slice(0, 80);
    return out_table;
}
