#include "parse.h"

#include <algorithm>
#include <iostream>
#include <fstream>
#include <ranges>


// All line breaks are removed, and the JSON text becomes a single line.
std::string Parse::loadJSONFile(const std::filesystem::path& filePath) {
    std::ifstream file(filePath);
    std::string line;
    std::string jsonTextCombined;
    while (getline(file, line)) {
        jsonTextCombined += line;
    }

    return jsonTextCombined;
}

std::string Parse::loadJSONText(const std::string& jsonTextRaw) {
    std::istringstream text(jsonTextRaw);
    std::string line;
    std::string jsonTextCombined;
    while (getline(text, line)) {
        jsonTextCombined += line;
    }

    return jsonTextCombined;
};

/* As JSON uses double quotation marks to enclose key names and string values,
 * double quotation marks inside a string value are generally written as \" for escaping.
 * The library splits the JSON text by using " as a separator, so before splitting
 * the escaped double quotation mark \" is replaced with the mark's Unicode encoding \u0022.
 */
std::string Parse::escapeDoubleQuotationMarks(const std::string& jsonText) {
    std::string escapedJSONText;

    /* The character \ should be escaped by \\ for indicating the character
     * instead of performing as an escape character.
     */
    int i = 0;
    while (i < jsonText.size()) {
        if (jsonText[i] == '"' && jsonText[i - 1] == '\\') {
            std::string mark = "u0022";
            escapedJSONText += mark;
        } else {
            escapedJSONText += jsonText[i];
        }

        ++i;
    }

    return escapedJSONText;
}

std::string Parse::parseCharacters(const std::string& text) {
    std::string parsedText;

    int i = 0;
    while (i < text.size()) {
        if (text[i] == '\\' && i + 5 < text.size() && text.substr(i + 1, 5) == "u0022") {
            parsedText += '"';
            i = i + 6;
        } else if (text[i] == '\\' && i + 1 < text.size() && text[i + 1] == 'n') {
            parsedText += '\n';
            i = i + 2;
        } else {
            parsedText += text[i];
            ++i;
        }
    }

    return parsedText;
}

std::vector<std::pair<std::string, std::string>> Parse::generateDict(const std::string& jsonText) {
    std::vector<std::pair<std::string, std::string>> dict;

    auto cleanString = [](std::string string) {
        while (string[0] == ' ') {
            string.erase(0, 1);
        }

        while (string[string.size() - 1] == ' ') {
            string.erase(string.size() - 1, 1);
        }

        return string;
    };

    auto splitJSON = [](const std::string& text) {
        auto splitJSONVector = text
        | std::views::split('"')
        | std::ranges::to<std::vector<std::string>>();

        return splitJSONVector;
    };

    /* For the first step, the JSON text is split into a vector by using " as a separator.
     * After the step, square brackets, braces and commas that perform functions in JSON
     * are exposed in the beginning or the end of the elements of the vector.
     * For the second step, square brackets and braces are stripped step by step.
     * Each square bracket or brace becomes a separate element of the vector, but
     * [] or {} becomes an element as a whole, because [] and {} perform as a value
     * instead of a function.
     * Commas that perform a function, as well as spaces used for keeping the layout,
     * are all removed.
     * After the above process, the vector dict shows all keys and values with clear and
     * simple hierarchy.
     * Each square bracket or brace that performs a function becomes a separate element of
     * the vector, making the future query simple and easy to handle.
     */
    std::vector<std::string> marks = {"[", "]", "{", "}", ","};

    auto splitNest = [marks, cleanString](std::string text) {
        std::vector<std::string> nest;
        std::string lastChar;
        lastChar += text[text.size() - 1];

        while (std::ranges::contains(marks, lastChar)) {
            nest.emplace(nest.begin(), lastChar);
            text.erase(text.size() - 1, 1);
            text = cleanString(text);

            lastChar = "";
            lastChar += text[text.size() - 1];
            }

        if (!text.empty()) {
            nest.emplace(nest.begin(), text);
        }

        return nest;
    };

    /* A JSON text may contain the character '\n' that is not escaped, so the JSON text
     * should be combined into one line for making further processing easier.
     * When a JSON text is loaded from a file through loadJSONFile, it has been combined
     * into a line.
     * However, when a JSON text is from any other source, it may contains \n that is not
     * escaped, and should be processed by loadJSONText for combining into one line.
     * For simply processing, no matter which source the JSON text is from, it is always
     * processed using loadJSONText.
     * The characters "\n" which have been escaped are not handled at this step.
     */
    auto jsonTextCombined = loadJSONText(jsonText);

    auto escapedJSONText = escapeDoubleQuotationMarks(jsonTextCombined);

    escapedJSONText = cleanString(escapedJSONText);
    escapedJSONText.erase(0, 1);
    escapedJSONText.erase(escapedJSONText.size() - 1, 1);

    auto splitJSONVector = splitJSON(escapedJSONText);

    int j = 0;
    while (j < splitJSONVector.size()) {
        auto cleanI = cleanString(splitJSONVector[j]);

        std::string cleanII;
        if (j + 1 < splitJSONVector.size()) {
            cleanII = cleanString(splitJSONVector[j + 1]);
        }

        std::string cleanIII;
        if (j + 2 < splitJSONVector.size()) {
            cleanIII = cleanString(splitJSONVector[j + 2]);
        }

        if (cleanII == ":" && !cleanI.empty() && !cleanIII.empty()) {
            std::pair<std::string, std::string> pair = {cleanI, cleanIII};
            dict.emplace_back(pair);

            j += 3;
        }

        else if (!cleanI.empty() && cleanII[0] == ':') {
            std::string s = cleanII.erase(0, 1);

            if (s[s.size() - 1] == ',') {
                s = s.erase(s.size() - 1, 1);
            }

            s = cleanString(s);

            if (!s.empty()) {
                std::string lastChar;
                lastChar += s[s.size() - 1];

                if (std::ranges::contains(marks, lastChar)) {
                    auto nest = splitNest(s);

                    if (!nest.empty()) {
                        std::pair<std::string, std::string> pairKey;
                        if (!std::ranges::contains(marks, nest[0])) {
                            pairKey = {cleanI, nest[0]};
                            nest.erase(nest.begin());
                        } else if (
                            (nest[0] == "[" && nest[1] == "]") || (nest[0] == "{" && nest[1] == "}")
                            ) {
                            pairKey = {cleanI, nest[0]};
                            pairKey.second += nest[1];
                            nest.erase(nest.begin(), nest.begin() + 2);
                        } else {
                            pairKey = {cleanI, ""};
                        }

                        dict.emplace_back(pairKey);

                        int k = 0;
                        while (k < nest.size()) {
                            if (nest[k] != ",") {
                                std::pair<std::string, std::string> pair = {nest[k], ""};
                                dict.emplace_back(pair);
                            }

                            ++k;
                        }
                    }
                } else {
                    std::pair<std::string, std::string> pair = {cleanI, s};
                    dict.emplace_back(pair);
                }
            }

            j += 2;
        }

        else {
            if (!cleanI.empty()) {
                if (cleanI[cleanI.size() - 1] == ',') {
                    cleanI.erase(cleanI.size() - 1, 1);
                }

                cleanI = cleanString(cleanI);

                if (!cleanI.empty()) {
                    auto nest = splitNest(cleanI);

                    for (auto& part : nest) {
                        std::pair<std::string, std::string> pair = {part, ""};
                        dict.emplace_back(pair);
                    }
                }
            }

            ++j;
        }
    }

    return dict;
}

std::string Parse::dictQuery(
    const std::string& jsonText, const std::string& queryString
    ) {
    auto isDigit = [](const std::string& string) {
        bool isDigit = true;
        for (const auto& c : string) {
            if (!std::isdigit(c)) {
                isDigit = false;
            }
        }

        return isDigit;
    };

    /* The query's format is like "output.0.content.0.text".
     * The string is split into the vector queryVector by using a dot as a separator.
     * queryVectorIndex is the index of an element in queryVector.
     * How many numeral elements appear in queryVector, how many levels of arrays
     * (starting with [ and ending with ]) are accessed.
     * The lambda puts all numeral elements into the vector nestedArrayIndexVector,
     * so the index in nestedArrayIndexVector shows an array's level.
     * The first array's level is 0.
     */
    auto nestedArrayIndex = [isDigit](const std::vector<std::string>& queryVector, int queryVectorIndex) {
        int index = 0;
        std::vector<int> nestedArrayIndexVector;
        for (int i = 0; i < queryVector.size(); ++i) {
            if (isDigit(queryVector[i])) {
                nestedArrayIndexVector.emplace_back(i);
            }
        }

        for (int j = 0; j < nestedArrayIndexVector.size(); ++j) {
            if (nestedArrayIndexVector[j] == queryVectorIndex) {
                index = j;
            }
        }

        return index;
    };

    auto dict = Parse::generateDict(jsonText);

    /* When the mark [ repeatedly appears without the separation of the mark ],
     * it means multiple levels of arrays are accessed. The first array's level is 0.
     * An array may have many pairs of { and }. pairIndexInArray indicates the index of a pair in an array.
     * In an array, the index of the first pair is 0, and the mark { indicates the beginning of a pair.
     * When entering or quitting an array, pairIndexInArray is reset to -1.
     * Each time an element of queryVector is satisfied, queryVectorIndex plus 1 means that
     * the next element is now for checking.
     * As [ and ] or { and } appear in pairs, top-level keys are always recognized when both of
     * squareBracketIndex and braceIndex keep their initial value or return to their initial value.
     * If an element of queryVector is a number (the index of a pair of { and } in an array),
     * and the array's level is confirmed, the program will check the contents in the pair of { and }
     * when pairIndexInArray matches the number.
     */
    int squareBracketIndex = -1;
    int braceIndex = -1;
    int pairIndexInArray = -1;
    std::vector<int> hierarchy;
    std::string queryResult;

    auto queryVector = queryString
    | std::views::split('.')
    | std::ranges::to<std::vector<std::string>>();

    int i = 0;
    int queryVectorIndex = 0;

    while (i < dict.size()) {
        if (squareBracketIndex == -1 && braceIndex == -1 && queryVector[queryVectorIndex] == dict[i].first) {
            hierarchy.emplace_back(i);
            if (queryVectorIndex + 1 < queryVector.size()) {
                ++queryVectorIndex;
            }
        }

        if (hierarchy.empty()) {
            ++i;
            continue;
        }

        if (dict[i].first == "{") {
            ++braceIndex;
            ++pairIndexInArray;
        }

        else if (dict[i].first == "}") {
            --braceIndex;
        }

        else if (dict[i].first == "[") {
            ++squareBracketIndex;
            pairIndexInArray = -1;
        }

        else if (dict[i].first == "]") {
            --squareBracketIndex;
            pairIndexInArray = -1;
        }

        if (
            (
                isDigit(queryVector[queryVectorIndex])
                && nestedArrayIndex(queryVector, queryVectorIndex) == squareBracketIndex
                && pairIndexInArray == std::stoi(queryVector[queryVectorIndex])
                )
            || (
                !isDigit(queryVector[queryVectorIndex])
                && dict[i].first == queryVector[queryVectorIndex]
                )
            ) {
            if (queryVectorIndex + 1 < queryVector.size()) {
                ++queryVectorIndex;
            }
        }

        if (queryVectorIndex == queryVector.size() - 1 && dict[i].first == queryVector[queryVectorIndex]) {
            queryResult = dict[i].second;
            break;
        }

        ++i;
    }

    // The characters that escape " or \n are now parsed to the actual character for better visual layout.
    return parseCharacters(queryResult);
}