var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
    function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
    return new (P || (P = Promise))(function (resolve, reject) {
        function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
        function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
        function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
        step((generator = generator.apply(thisArg, _arguments || [])).next());
    });
};
import api from 'api';
import sanitize from 'sanitize-html';
import { getCloudConvertOnCallFn } from './callableFunctions';
import { createTreeWalker } from './treeWalker';
const MAX_WORDS_IN_CELL = 25;
const originalElements = {};
let id = 0;
export const convertWord = getCloudConvertOnCallFn();
const isEmptyRow = (r) => {
    /* r?.textContent can be empty. We need to test string against empty space as well that is why checking with type.
    Embedding null type because isEmptyRow is calling from some placese where adding nested condiotions
    can be eerror prone */
    if (typeof (r === null || r === void 0 ? void 0 : r.textContent) !== 'string')
        return false;
    return !/\S/g.test(r.textContent);
};
/**
 * - Trim whitespaces in front of all paragraph elements
 * - Delete whitespaces between dollar signs and their neighboring numbers (needed when inferring tables)
 * - Removes empty paragraphs at the beginning of the document
 * - Removes paragraphs with parentheses only (in lawyer notices)
 * @param fakeDOM Document to preprocess
 */
export const preprocess = (fakeDOM) => {
    // tells us if we haven't encountered non-white-space characters yet
    let noContent = true;
    const trimParagraph = (paragraphElem) => {
        let lastText = null;
        const walker = createTreeWalker(fakeDOM, paragraphElem, NodeFilter.SHOW_TEXT, null, false);
        let text;
        let done = false;
        while ((text = walker.nextNode())) {
            // delete spaces between dollar signs and numbers
            if (!text.nodeValue)
                continue;
            text.nodeValue = text.nodeValue.replace(/\$\s+/g, '$');
            if (lastText &&
                lastText.textContent &&
                /\S$/.test(lastText.textContent)) {
                lastText.nodeValue += ' ';
            }
            if (!done) {
                text.nodeValue = text.nodeValue.replace(/^\s+/g, '');
                done = true;
            }
            lastText = text;
        }
        if (lastText && lastText.textContent)
            lastText.nodeValue = lastText.textContent.replace(/\s+$/g, '');
    };
    const isEmpty = (paragraphElem) => {
        /* With paragraphElem.textContent we are considering empty strings as well; with paragraphElem?.textContent might come with
        null and we cannot compare then. */
        return (typeof paragraphElem.textContent === 'string' &&
            !/\S/g.test(paragraphElem.textContent));
    };
    const isParenthesesOnly = (textContent) => {
        return (textContent &&
            !/[^()]/.test(textContent.replace(/\s+/g, '')) &&
            /\S/.test(textContent));
    };
    const isParagraph = (ele) => ele.tagName === 'P';
    const isTable = (ele) => ele.tagName === 'TABLE';
    const handleChild = (ele) => {
        if (isParagraph(ele)) {
            trimParagraph(ele);
            // remove lines with parentheses only
            if (isParenthesesOnly(ele.textContent))
                ele.remove();
            // remove empty paragraphs at the beginning of the file
            if (isEmpty(ele)) {
                if (noContent)
                    ele.remove();
            }
            else
                noContent = false;
        }
        else if (isTable(ele)) {
            // eslint-disable-next-line no-labels
            LL: for (const tr of ele.rows)
                for (const td of tr.cells)
                    if (isParenthesesOnly(td.textContent)) {
                        ele.setAttribute('lawyer', 'true');
                        // eslint-disable-next-line no-labels
                        break LL;
                    }
        }
        else if (ele.children.length) {
            const children = [...ele.children];
            for (const child of children)
                handleChild(child);
        }
    };
    handleChild(fakeDOM.body);
};
export const inferTables = (fakeDOM) => {
    let tableElement = null;
    let elementsToBeRemoved = [];
    let columns = 0;
    const getRegex = (innerText, created) => {
        const normal = /\u00A0{2,}\s*/g;
        const special = /\u00A0{1,}\s*/g;
        const lastRow = tableElement &&
            tableElement.rows[tableElement.rows.length - (created ? 2 : 1)];
        // Needs more refining
        let specialCaseSplit = false;
        for (const s of innerText.split(special))
            specialCaseSplit = specialCaseSplit || s.split(' ').length < 10;
        specialCaseSplit = !!(specialCaseSplit &&
            tableElement &&
            tableElement.rows.length > 1 &&
            !isEmptyRow(lastRow) &&
            innerText.split(special).length === columns);
        return specialCaseSplit ? special : normal;
    };
    const hasTooManyWords = (cells) => {
        const t = cells.filter(cell => cell.split(' ').length > MAX_WORDS_IN_CELL);
        return !!t.length;
    };
    const isTableRow = (innerText) => {
        const cleanedText = innerText.replace(/^\s+/, '').replace(/\s+$/, '');
        const cells = cleanedText.split(getRegex(cleanedText, false));
        return ((cells.length > 1 &&
            !hasTooManyWords(cells) &&
            !/^\([a-zA-Z0-9]\)/.test(cells[0]) &&
            !/^[a-zA-Z0-9]\./.test(cells[0]) &&
            !/^Section [0-9]/.test(cells[0])) ||
            (tableElement && !/\S/g.test(cleanedText)) ||
            (tableElement && /^[a-z]/.test(cleanedText)));
    };
    const splitText = (innerText) => {
        const cleanedText = innerText.replace(/^\s+/, '').replace(/\s+$/, '');
        const regex = getRegex(cleanedText, true);
        const texts = cleanedText.split(regex);
        if (!columns) {
            columns = texts.length;
        }
        else if (texts.length < columns) {
            const spaces = [];
            let match;
            while ((match = regex.exec(cleanedText))) {
                spaces.push({ length: match[0].length, i: spaces.length });
            }
            spaces.sort((a, b) => b.length - a.length);
            spaces.splice(columns - texts.length);
            spaces.sort((a, b) => b.i - a.i);
            for (const space of spaces) {
                texts.splice(space.i + 1, 0, '');
            }
            while (texts.length < columns)
                texts.push('');
        }
        return texts;
    };
    const constructRow = (paragraphElem) => {
        const row = tableElement === null || tableElement === void 0 ? void 0 : tableElement.insertRow();
        // remember original element
        row === null || row === void 0 ? void 0 : row.setAttribute('id', `${id}`);
        originalElements[`${id}`] = paragraphElem;
        id++;
        /* For paragraphElem.textContent we are dealing with emptty string as well that is why compare with type
        string to avoid null. */
        const cellsText = typeof (paragraphElem === null || paragraphElem === void 0 ? void 0 : paragraphElem.textContent) === 'string'
            ? splitText(paragraphElem.textContent)
            : [];
        let badCells = 0;
        for (const cellText of cellsText) {
            if (cellText.split(' ').length > 10) {
                if (badCells > 0) {
                    tableElement === null || tableElement === void 0 ? void 0 : tableElement.remove();
                    tableElement = null;
                    elementsToBeRemoved = [];
                    break;
                }
                badCells++;
            }
            if (/^(\(|\))+$/.test(cellText))
                tableElement === null || tableElement === void 0 ? void 0 : tableElement.setAttribute('lawyer', 'true');
            const cell = row === null || row === void 0 ? void 0 : row.insertCell();
            const clone = paragraphElem.cloneNode();
            let cur = paragraphElem.firstElementChild;
            let deepestChild = clone;
            while (cur) {
                deepestChild = clone.insertBefore(cur.cloneNode(), null);
                cur = cur.firstElementChild;
            }
            deepestChild.insertAdjacentHTML('afterbegin', cellText);
            cell === null || cell === void 0 ? void 0 : cell.insertBefore(clone, null);
        }
    };
    const startTable = (ele) => {
        tableElement = fakeDOM.createElement('table');
        tableElement.setAttribute('parsed', 'true');
        ele.insertAdjacentElement('afterend', tableElement);
    };
    const endTable = () => {
        tableElement = null;
        columns = 0;
        for (const ele of elementsToBeRemoved)
            ele.remove();
    };
    const handleChild = (ele) => {
        if (ele.tagName === 'P') {
            const elemText = ele === null || ele === void 0 ? void 0 : ele.textContent;
            /* For the elemText we can pass an empty string as well and !elemText condition
            falsy for empty string that is why comparing with string type here. */
            if (typeof elemText === 'string' && isTableRow(elemText)) {
                if (!tableElement) {
                    startTable(ele);
                }
                constructRow(ele);
                elementsToBeRemoved.push(ele);
            }
            else if (tableElement) {
                endTable();
            }
        }
        else if (ele.tagName === 'TABLE') {
            endTable();
        }
        else if (ele.children.length > 0) {
            const children = [...ele.children];
            for (const child of children)
                handleChild(child);
        }
        if (tableElement && ele.tagName !== 'P') {
            endTable();
        }
    };
    handleChild(fakeDOM.body);
    if (tableElement)
        endTable();
};
export const postprocess = (fakeDOM) => {
    const addMissedRows = () => {
        var _a, _b;
        for (const paragraphElem of fakeDOM.querySelectorAll('p')) {
            let pre = paragraphElem;
            while ((pre = pre.previousSibling)) {
                if (pre.nodeType === Node.ELEMENT_NODE)
                    break;
            }
            let next = paragraphElem;
            let lines = 0;
            while ((next = next.nextSibling)) {
                const htmlElem = next;
                if ((htmlElem === null || htmlElem === void 0 ? void 0 : htmlElem.textContent) &&
                    next.nodeType === Node.ELEMENT_NODE &&
                    /\S/.test(htmlElem.textContent))
                    break;
                if (next.nodeType === Node.ELEMENT_NODE)
                    lines++;
            }
            if (pre &&
                pre.tagName === 'TABLE' &&
                next &&
                next.tagName === 'TABLE' &&
                pre.getAttribute('parsed') &&
                next.getAttribute('parsed')) {
                if (!paragraphElem.textContent)
                    continue;
                const match = paragraphElem.textContent.match(/\s\S+$/);
                if (!match)
                    continue;
                const newRowTexts = [
                    paragraphElem.textContent.substring(0, match.index),
                    match[0].substring(1)
                ];
                if (newRowTexts[0].split(' ').length > MAX_WORDS_IN_CELL ||
                    newRowTexts[1].split(' ').length > MAX_WORDS_IN_CELL)
                    continue;
                const newRow = pre.insertRow();
                (_a = newRow
                    .insertCell()
                    .insertAdjacentElement('afterbegin', fakeDOM.createElement('p'))) === null || _a === void 0 ? void 0 : _a.insertAdjacentText('afterbegin', newRowTexts[0]);
                (_b = newRow
                    .insertCell()
                    .insertAdjacentElement('afterbegin', fakeDOM.createElement('p'))) === null || _b === void 0 ? void 0 : _b.insertAdjacentText('afterbegin', newRowTexts[1]);
                while (lines-- > 0)
                    pre
                        .insertRow()
                        .insertCell()
                        .insertAdjacentElement('afterend', fakeDOM.createElement('td'));
                for (const r of next.rows) {
                    pre.insertRow().replaceWith(r.cloneNode(true));
                }
                paragraphElem.remove();
                next.remove();
            }
        }
    };
    // detects weird parentheses string in a cell
    const isParenthesesCell = (td) => {
        return ((td.textContent &&
            /^(\(|\))+$/.test(td.textContent.replace(/\s/g, ''))) ||
            !!td.getAttribute('parenthesis'));
    };
    const isEmptyCell = (td) => {
        /* if there will be space it will be returning true otherwisee return false.
        Returning true when there is no value under td.textContent */
        if (!(td === null || td === void 0 ? void 0 : td.textContent))
            return true;
        return !/\S/g.test(td.textContent);
    };
    const trimTable = (table) => {
        const { rows } = table;
        for (let i = rows.length - 1; i > -1 && isEmptyRow(rows[i]); i--) {
            const { children } = rows[i].cells[0];
            for (const ch of children)
                table.insertAdjacentElement('afterend', ch);
            rows[i].remove();
        }
    };
    const removeFalseTable = (table) => {
        var _a;
        if (table.textContent && /^\s*$/.test(table.textContent)) {
            table.remove();
            return;
        }
        const badTable = table.rows.length <= 1 || table.getAttribute('lawyer') === 'true';
        if (badTable) {
            const replacements = [];
            if (table.rows.length <= 1) {
                const elem = (_a = table.rows[0]) === null || _a === void 0 ? void 0 : _a.getAttribute('id');
                const original = elem && originalElements[elem];
                if (original)
                    replacements.push(original);
                else
                    for (const td of table.rows[0].cells) {
                        if (!isParenthesesCell(td) && !isEmptyCell(td)) {
                            replacements.push(...td.children);
                            replacements.push(fakeDOM.createElement('br'));
                        }
                    }
            }
            else if (table.getAttribute('lawyer') === 'true') {
                const parties = [];
                const meta = [];
                for (const tr of table.rows) {
                    const before = [];
                    const after = [];
                    for (const td of tr.cells) {
                        if (isParenthesesCell(td))
                            break;
                        if (!isEmptyCell(td) && (td === null || td === void 0 ? void 0 : td.textContent))
                            before.push(td.textContent);
                    }
                    for (const td of [...tr.cells].reverse()) {
                        if (isParenthesesCell(td))
                            break;
                        if (!isEmptyCell(td) && (td === null || td === void 0 ? void 0 : td.textContent))
                            after.unshift(td.textContent);
                    }
                    if (before.length) {
                        const paragraphElem = fakeDOM.createElement('p');
                        paragraphElem.insertAdjacentText('afterbegin', before.join('\u00A0'.repeat(2)));
                        parties.push(paragraphElem);
                        parties.push(fakeDOM.createElement('br'));
                    }
                    if (after.length) {
                        const paragraphElem = fakeDOM.createElement('p');
                        paragraphElem.insertAdjacentText('afterbegin', after.join('\u00A0'.repeat(2)));
                        meta.push(paragraphElem);
                        meta.push(fakeDOM.createElement('br'));
                    }
                }
                replacements.push(...parties, ...meta);
            }
            table.replaceWith(...replacements);
        }
    };
    const adjustShortRows = (table) => {
        let maxCells = 0;
        for (const row of table.rows)
            maxCells = Math.max(maxCells, row.cells.length);
        for (const row of table.rows)
            while (row.cells.length < maxCells)
                row.insertCell();
    };
    const removeEmptyColumns = (table) => {
        const allCells = [...table.rows].map(row => {
            const cellNotEmpty = [];
            for (const td of row.cells)
                if (td.textContent && /\S/.test(td.textContent))
                    cellNotEmpty.push(true);
                else
                    cellNotEmpty.push(false);
            return cellNotEmpty;
        });
        const colNotEmpty = allCells.reduce((pre, cur) => {
            return pre.map((val, i) => val || cur[i]);
        });
        for (const tr of table.rows)
            for (const [i, td] of [...tr.cells].entries()) {
                if (!colNotEmpty[i])
                    td.remove();
            }
    };
    const trimParantheses = (text) => {
        if (text && (text === null || text === void 0 ? void 0 : text.nodeValue))
            // eslint-disable-next-line no-param-reassign
            text.nodeValue = text.nodeValue.replace(/^\)+/g, '');
    };
    const removeNonBreakingSpaces = (node) => {
        if (node && node.nodeValue)
            // eslint-disable-next-line no-param-reassign
            node.nodeValue = node.nodeValue.replace(/\u00A0{1,}/g, ' ');
    };
    const tables = [];
    const walker = createTreeWalker(fakeDOM, fakeDOM.body, NodeFilter.SHOW_ALL, null, false);
    let node;
    while ((node = walker.nextNode())) {
        if (node.nodeType === Node.TEXT_NODE) {
            removeNonBreakingSpaces(node);
            trimParantheses(node);
        }
        else if (node.tagName === 'TD') {
            if (isParenthesesCell(node))
                node.setAttribute('parenthesis', 'true');
        }
        else if (node.tagName === 'TABLE') {
            tables.push(node);
        }
    }
    for (const table of tables) {
        if (!table.isConnected)
            continue;
        trimTable(table);
        adjustShortRows(table);
        removeFalseTable(table);
    }
    addMissedRows();
    for (const table of tables) {
        if (!table.isConnected)
            continue;
        adjustShortRows(table);
        removeEmptyColumns(table);
    }
};
const removeEmptySpacesfromEnd = (html) => {
    var _a, _b, _c, _d;
    const doc = new DOMParser().parseFromString(html, 'text/html');
    const body = (_a = doc.body.lastElementChild) === null || _a === void 0 ? void 0 : _a.children;
    if (!body)
        return;
    for (let x = body.length - 1; x >= 0; x--) {
        const elem = (_b = body.item(x)) === null || _b === void 0 ? void 0 : _b.textContent;
        if (elem && elem.length > 0)
            break;
        if ((elem === null || elem === void 0 ? void 0 : elem.length) === 0) {
            /*
              1. This way of removing an element is ugly but is compatible across all browsers.
              2. Added non-null assertion (!) here safely because we are already doing a null check at start
              and it will not get null inside removeChild(body.item(x)).
            */
            (_d = (_c = body.item(x)) === null || _c === void 0 ? void 0 : _c.parentNode) === null || _d === void 0 ? void 0 : _d.removeChild(body.item(x));
        }
    }
    return doc.body.outerHTML;
};
const addSanitize = (html) => {
    const sanitizedHtml = removeEmptySpacesfromEnd(html);
    if (!sanitizedHtml)
        return '';
    return sanitize(sanitizedHtml, {
        allowedTags: false,
        allowedAttributes: {
            '*': ['style']
        },
        allowedStyles: {
            '*': {
                'text-align': [/^.*$/],
                'text-transform': [/^.*$/]
            }
        },
        transformTags: {
            pre: 'p',
            sup: 'span',
            h1: 'p',
            h2: 'p',
            h3: 'p',
            h4: 'p',
            h5: 'p',
            h6: 'p'
        }
    });
};
const squashWord = (fakeDOM) => {
    try {
        const tables = fakeDOM.querySelectorAll('table');
        Array.from(tables).forEach((table) => {
            var _a;
            const cells = table.querySelectorAll('td');
            const outer = fakeDOM.createElement('div');
            Array.from(cells).forEach(cell => {
                if (!cell.innerText)
                    return;
                const tag = fakeDOM.createElement('p');
                const text = document.createTextNode(cell.innerText);
                tag.appendChild(text);
                outer.appendChild(tag);
            });
            (_a = table.parentNode) === null || _a === void 0 ? void 0 : _a.replaceChild(outer, table);
        });
    }
    catch (err) {
        console.error(err.toString());
    }
};
const stripBlankLines = (fakeDOM) => {
    var _a;
    // remove empty paragraphs
    for (const paragraphElem of fakeDOM.querySelectorAll('p')) {
        if (!((_a = paragraphElem.innerText) === null || _a === void 0 ? void 0 : _a.trim())) {
            paragraphElem.remove();
        }
    }
    // remove newlines
    for (const br of fakeDOM.querySelectorAll('br')) {
        br.remove();
    }
};
const checkImage = (fakeDOM) => {
    const hasImage = fakeDOM.querySelector('img');
    return !!hasImage;
};
export const parseHtml = (html, cleanVariant) => {
    const fakeDOM = new DOMParser().parseFromString(html, 'text/html');
    preprocess(fakeDOM);
    inferTables(fakeDOM);
    postprocess(fakeDOM);
    if (cleanVariant === 'squash')
        squashWord(fakeDOM);
    if (cleanVariant === 'strip')
        stripBlankLines(fakeDOM);
    return {
        html: addSanitize(fakeDOM.body.outerHTML),
        hasImage: checkImage(fakeDOM)
    };
};
export const wordToHtml = (storageId, cleanVariant) => __awaiter(void 0, void 0, void 0, function* () {
    const { html } = yield api.post('documents/convert-word', { storageId });
    return parseHtml(html, cleanVariant);
});
