app.service("gpt3Encoder", [
    "skyword",
    function(skyword) {
        let encoder;

        let bpe_file = "";
        const textEncoder = new TextEncoder("utf-8");
        const textDecoder = new TextDecoder("utf-8");
        const pat = /'s|'t|'re|'ve|'m|'ll|'d| ?\p{L}+| ?\p{N}+| ?[^\s\p{L}\p{N}]+|\s+(?!\S)|\s+/gu;

        let decoder = {};

        let lines;

        let bpe_merges;

        let byte_encoder;
        let byte_decoder = {};

        let bpe_ranks;
        let cache = new Map();
        var bpe = function(token) {
                if (cache.has(token)) {
                    return cache.get(token);
                }
                ``;

                let word = token.split("");

                let pairs = get_pairs(word);

                if (!pairs) {
                    return token;
                }

                while (true) {
                    const minPairs = {};
                    Array.from(pairs).map((pair) => {
                        const rank = bpe_ranks[pair];
                        minPairs[isNaN(rank) ? 10e10 : rank] = pair;
                    });

                    const bigram =
                        minPairs[
                            Math.min(
                                ...Object.keys(minPairs).map((x) => {
                                    return parseInt(x);
                                })
                            )
                        ];

                    if (!(bigram in bpe_ranks)) {
                        break;
                    }

                    const first = bigram[0];
                    const second = bigram[1];
                    let new_word = [];
                    let i = 0;

                    while (i < word.length) {
                        const j = word.indexOf(first, i);
                        if (j === -1) {
                            new_word = new_word.concat(word.slice(i));
                            break;
                        }
                        new_word = new_word.concat(word.slice(i, j));
                        i = j;

                        if (
                            word[i] === first &&
                            i < word.length - 1 &&
                            word[i + 1] === second
                        ) {
                            new_word.push(first + second);
                            i = i + 2;
                        } else {
                            new_word.push(word[i]);
                            i = i + 1;
                        }
                    }

                    word = new_word;
                    if (word.length === 1) {
                        break;
                    } else {
                        pairs = get_pairs(word);
                    }
                }

                word = word.join(" ");
                cache.set(token, word);

                return word;
            },
            range = function(x, y) {
                const res = Array.from(Array(y).keys()).slice(x);
                return res;
            },
            ord = function(x) {
                return x.charCodeAt(0);
            },
            chr = function(x) {
                return String.fromCharCode(x);
            },
            encodeStr = function(str) {
                return Array.from(textEncoder.encode(str)).map((x) =>
                    x.toString()
                );
            },
            decodeStr = function(arr) {
                return textDecoder.decode(new Uint8Array(arr));
            },
            dictZip = function(x, y) {
                const result = {};
                x.map((_, i) => {
                    result[x[i]] = y[i];
                });
                return result;
            },
            bytes_to_unicode = function() {
                const bs = range(ord("!"), ord("~") + 1).concat(
                    range(ord("¡"), ord("¬") + 1),
                    range(ord("®"), ord("ÿ") + 1)
                );

                let cs = bs.slice();
                let n = 0;
                for (let b = 0; b < 2 ** 8; b++) {
                    if (!bs.includes(b)) {
                        bs.push(b);
                        cs.push(2 ** 8 + n);
                        n = n + 1;
                    }
                }

                cs = cs.map((x) => chr(x));

                const result = {};
                bs.map((_, i) => {
                    result[bs[i]] = cs[i];
                });
                return result;
            },
            get_pairs = function(word) {
                const pairs = new Set();
                let prev_char = word[0];
                for (let i = 1; i < word.length; i++) {
                    const char = word[i];
                    pairs.add([prev_char, char]);
                    prev_char = char;
                }
                return pairs;
            };

        return {
            init: function() {
                fetch("/styles/gpt3Encoder/encoder.json")
                    .then(function(res) {
                        return res.text();
                    })
                    .then(function(data) {
                        encoder = JSON.parse(data);
                    })
                    .catch(function(error) {
                        console.log(
                            "Could not load encoder file::::::::",
                            error
                        );
                    });
                var myHeaders = new Headers();
                myHeaders.append("Content-Type", "text/plain; charset=UTF-8");
                fetch("/styles/gpt3Encoder/vocab.bpe", myHeaders)
                    .then(function(res) {
                        return res.text();
                    })
                    .then(function(data) {
                        bpe_file = data;
                        lines = bpe_file.split("\n");

                        // bpe_merges = [tuple(merge_str.split()) for merge_str in bpe_data.split("\n")[1:-1]]
                        bpe_merges = lines
                            .slice(1, lines.length - 1)
                            .map((x) => {
                                return x.split(/(\s+)/).filter(function(e) {
                                    return e.trim().length > 0;
                                });
                            });
                    })
                    .catch(function(error) {
                        console.log("Could not load vocab file::::::::", error);
                    });
            },
            encode: function(text) {
                byte_encoder = bytes_to_unicode();
                Object.keys(encoder).map((x) => {
                    decoder[encoder[x]] = x;
                });
                Object.keys(byte_encoder).map((x) => {
                    byte_decoder[byte_encoder[x]] = x;
                });
                bpe_ranks = dictZip(bpe_merges, range(0, bpe_merges.length));
                let bpe_tokens = [];
                const matches = Array.from(text.matchAll(pat)).map((x) => x[0]);
                for (let token of matches) {
                    token = encodeStr(token)
                        .map((x) => {
                            return byte_encoder[x];
                        })
                        .join("");

                    const new_tokens = bpe(token)
                        .split(" ")
                        .map((x) => encoder[x]);
                    bpe_tokens = bpe_tokens.concat(new_tokens);
                }
                return bpe_tokens;
            },
            decode: function(tokens) {
                let text = tokens.map((x) => decoder[x]).join("");
                text = decodeStr(text.split("").map((x) => byte_decoder[x]));
                return text;
            }
        };
    }
]);
