package org.luwrain.reader.builders.html;

import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Properties;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Comment;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.nodes.TextNode;
import org.luwrain.core.Log;
import org.luwrain.core.NullCheck;
import org.luwrain.reader.DocumentBuilder;
import org.luwrain.reader.Node;
import org.luwrain.reader.NodeBuilder;
import org.luwrain.reader.Paragraph;
import org.luwrain.reader.Run;
import org.luwrain.reader.Section;
import org.luwrain.reader.TextRun;

/* loaded from: input_file:org/luwrain/reader/builders/html/Builder.class */
final class Builder extends Base implements DocumentBuilder {
    private static final String DEFAULT_CHARSET = "UTF-8";
    private Document jsoupDoc = null;
    private URL docUrl = null;
    private final LinkedList<String> hrefStack = new LinkedList<>();
    private final List<String> allHrefs = new LinkedList();

    @Override // org.luwrain.reader.DocumentBuilder
    public org.luwrain.reader.Document buildDoc(File file, Properties properties) throws IOException {
        NullCheck.notNull(file, "file");
        NullCheck.notNull(properties, "props");
        FileInputStream fileInputStream = new FileInputStream(file);
        try {
            org.luwrain.reader.Document buildDoc = buildDoc(fileInputStream, properties);
            fileInputStream.close();
            return buildDoc;
        } catch (Throwable th) {
            fileInputStream.close();
            throw th;
        }
    }

    @Override // org.luwrain.reader.DocumentBuilder
    public org.luwrain.reader.Document buildDoc(String str, Properties properties) {
        NullCheck.notNull(str, "text");
        NullCheck.notNull(properties, "props");
        ByteArrayInputStream byteArrayInputStream = new ByteArrayInputStream(str.getBytes());
        try {
            try {
                org.luwrain.reader.Document buildDoc = buildDoc(byteArrayInputStream, properties);
                byteArrayInputStream.close();
                return buildDoc;
            } catch (Throwable th) {
                byteArrayInputStream.close();
                throw th;
            }
        } catch (IOException e) {
            Log.error("reader", "unable to read HTML from a string:" + e.getClass().getName() + ":" + e.getMessage());
            return null;
        }
    }

    @Override // org.luwrain.reader.DocumentBuilder
    public org.luwrain.reader.Document buildDoc(InputStream inputStream, Properties properties) throws IOException {
        NullCheck.notNull(inputStream, "is");
        NullCheck.notNull(properties, "props");
        String property = properties.getProperty("url");
        if (property == null || property.isEmpty()) {
            throw new IOException("no 'url' property");
        }
        this.docUrl = new URL(property);
        String property2 = properties.getProperty("charset");
        String str = (property2 == null || property2.isEmpty()) ? DEFAULT_CHARSET : property2;
        this.jsoupDoc = Jsoup.parse(inputStream, str, this.docUrl.toString());
        org.luwrain.reader.Document constructDoc = constructDoc();
        constructDoc.setProperty("url", this.docUrl.toString());
        constructDoc.setProperty("contenttype", "text/html");
        constructDoc.setProperty("charset", str);
        return constructDoc;
    }

    private org.luwrain.reader.Document constructDoc() {
        NodeBuilder nodeBuilder = new NodeBuilder();
        collectMeta(this.jsoupDoc.head(), new HashMap());
        nodeBuilder.addSubnodes(onNode(this.jsoupDoc.body(), false));
        org.luwrain.reader.Document document = new org.luwrain.reader.Document(this.jsoupDoc.title(), nodeBuilder.newRoot());
        document.setHrefs((String[]) this.allHrefs.toArray(new String[this.allHrefs.size()]));
        return document;
    }

    private Node[] onNode(org.jsoup.nodes.Node node, boolean z) {
        NullCheck.notNull(node, "node");
        LinkedList linkedList = new LinkedList();
        LinkedList linkedList2 = new LinkedList();
        List<Element> childNodes = node.childNodes();
        if (childNodes == null) {
            return new Node[0];
        }
        for (Element element : childNodes) {
            if (element instanceof TextNode) {
                onTextNode((TextNode) element, linkedList, linkedList2, z);
            } else if (element instanceof Element) {
                onElement(element, linkedList, linkedList2, z);
            } else if (!(element instanceof Comment)) {
                Log.warning("reader", "unprocessed node of class " + element.getClass().getName());
            }
        }
        commitParagraph(linkedList, linkedList2);
        return (Node[]) linkedList.toArray(new Node[linkedList.size()]);
    }

    private void onElementInPara(Element element, List<Node> list, List<Run> list2, boolean z) {
        String str;
        NullCheck.notNull(element, "el");
        NullCheck.notNull(list, "nodes");
        NullCheck.notNull(list2, "runs");
        String nodeName = element.nodeName();
        if (nodeName == null || nodeName.isEmpty()) {
            return;
        }
        String lowerCase = nodeName.trim().toLowerCase();
        if (lowerCase.equals("img")) {
            onImg(element, list2);
            return;
        }
        String extractHref = lowerCase.equals("a") ? extractHref(element) : null;
        if (extractHref != null) {
            this.hrefStack.add(extractHref);
        }
        try {
            List<org.jsoup.nodes.Node> childNodes = element.childNodes();
            if (childNodes == null) {
                if (str != null) {
                    return;
                } else {
                    return;
                }
            }
            for (org.jsoup.nodes.Node node : childNodes) {
                if (node instanceof TextNode) {
                    onTextNode((TextNode) node, list, list2, z);
                } else if (node instanceof Element) {
                    onElement((Element) node, list, list2, z);
                } else if (!(node instanceof Comment)) {
                    Log.warning("reader", "encountering unexpected node of class " + node.getClass().getName());
                }
            }
            if (extractHref != null) {
                this.hrefStack.pollLast();
            }
        } finally {
            if (extractHref != null) {
                this.hrefStack.pollLast();
            }
        }
    }

    private void onElement(Element element, List<Node> list, List<Run> list2, boolean z) {
        NullCheck.notNull(element, "el");
        NullCheck.notNull(list, "nodes");
        NullCheck.notNull(list2, "runs");
        String nodeName = element.nodeName();
        if (nodeName == null || nodeName.trim().isEmpty()) {
            return;
        }
        String lowerCase = nodeName.trim().toLowerCase();
        if (lowerCase.startsWith("g:") || lowerCase.startsWith("g-") || lowerCase.startsWith("fb:")) {
            return;
        }
        boolean z2 = -1;
        switch (lowerCase.hashCode()) {
            case -1377687758:
                if (lowerCase.equals("button")) {
                    z2 = 4;
                    break;
                }
                break;
            case -1364013995:
                if (lowerCase.equals("center")) {
                    z2 = 17;
                    break;
                }
                break;
            case -1274639644:
                if (lowerCase.equals("figure")) {
                    z2 = 20;
                    break;
                }
                break;
            case -1268861541:
                if (lowerCase.equals("footer")) {
                    z2 = 16;
                    break;
                }
                break;
            case -1221270899:
                if (lowerCase.equals("header")) {
                    z2 = 15;
                    break;
                }
                break;
            case -1191214428:
                if (lowerCase.equals("iframe")) {
                    z2 = 27;
                    break;
                }
                break;
            case -1147692044:
                if (lowerCase.equals("address")) {
                    z2 = 23;
                    break;
                }
                break;
            case -907685685:
                if (lowerCase.equals("script")) {
                    z2 = false;
                    break;
                }
                break;
            case -891980137:
                if (lowerCase.equals("strong")) {
                    z2 = 63;
                    break;
                }
                break;
            case -732377866:
                if (lowerCase.equals("article")) {
                    z2 = 25;
                    break;
                }
                break;
            case 97:
                if (lowerCase.equals("a")) {
                    z2 = 52;
                    break;
                }
                break;
            case 98:
                if (lowerCase.equals("b")) {
                    z2 = 55;
                    break;
                }
                break;
            case 105:
                if (lowerCase.equals("i")) {
                    z2 = 59;
                    break;
                }
                break;
            case 112:
                if (lowerCase.equals("p")) {
                    z2 = 11;
                    break;
                }
                break;
            case 115:
                if (lowerCase.equals("s")) {
                    z2 = 56;
                    break;
                }
                break;
            case 117:
                if (lowerCase.equals("u")) {
                    z2 = 60;
                    break;
                }
                break;
            case 3152:
                if (lowerCase.equals("br")) {
                    z2 = 10;
                    break;
                }
                break;
            case 3200:
                if (lowerCase.equals("dd")) {
                    z2 = 32;
                    break;
                }
                break;
            case 3208:
                if (lowerCase.equals("dl")) {
                    z2 = 30;
                    break;
                }
                break;
            case 3216:
                if (lowerCase.equals("dt")) {
                    z2 = 31;
                    break;
                }
                break;
            case 3240:
                if (lowerCase.equals("em")) {
                    z2 = 58;
                    break;
                }
                break;
            case 3273:
                if (lowerCase.equals("h1")) {
                    z2 = 35;
                    break;
                }
                break;
            case 3274:
                if (lowerCase.equals("h2")) {
                    z2 = 36;
                    break;
                }
                break;
            case 3275:
                if (lowerCase.equals("h3")) {
                    z2 = 37;
                    break;
                }
                break;
            case 3276:
                if (lowerCase.equals("h4")) {
                    z2 = 38;
                    break;
                }
                break;
            case 3277:
                if (lowerCase.equals("h5")) {
                    z2 = 39;
                    break;
                }
                break;
            case 3279:
                if (lowerCase.equals("h7")) {
                    z2 = 41;
                    break;
                }
                break;
            case 3280:
                if (lowerCase.equals("h8")) {
                    z2 = 42;
                    break;
                }
                break;
            case 3281:
                if (lowerCase.equals("h9")) {
                    z2 = 43;
                    break;
                }
                break;
            case 3338:
                if (lowerCase.equals("hr")) {
                    z2 = 2;
                    break;
                }
                break;
            case 3453:
                if (lowerCase.equals("li")) {
                    z2 = 46;
                    break;
                }
                break;
            case 3549:
                if (lowerCase.equals("ol")) {
                    z2 = 45;
                    break;
                }
                break;
            case 3696:
                if (lowerCase.equals("td")) {
                    z2 = 50;
                    break;
                }
                break;
            case 3700:
                if (lowerCase.equals("th")) {
                    z2 = 48;
                    break;
                }
                break;
            case 3710:
                if (lowerCase.equals("tr")) {
                    z2 = 49;
                    break;
                }
                break;
            case 3712:
                if (lowerCase.equals("tt")) {
                    z2 = 53;
                    break;
                }
                break;
            case 3735:
                if (lowerCase.equals("ul")) {
                    z2 = 44;
                    break;
                }
                break;
            case 97536:
                if (lowerCase.equals("big")) {
                    z2 = 61;
                    break;
                }
                break;
            case 99473:
                if (lowerCase.equals("div")) {
                    z2 = 12;
                    break;
                }
                break;
            case 101672:
                if (lowerCase.equals("h66")) {
                    z2 = 40;
                    break;
                }
                break;
            case 104387:
                if (lowerCase.equals("img")) {
                    z2 = 51;
                    break;
                }
                break;
            case 104430:
                if (lowerCase.equals("ins")) {
                    z2 = 57;
                    break;
                }
                break;
            case 107868:
                if (lowerCase.equals("map")) {
                    z2 = 7;
                    break;
                }
                break;
            case 108835:
                if (lowerCase.equals("nav")) {
                    z2 = 24;
                    break;
                }
                break;
            case 111267:
                if (lowerCase.equals("pre")) {
                    z2 = 9;
                    break;
                }
                break;
            case 114254:
                if (lowerCase.equals("sup")) {
                    z2 = 67;
                    break;
                }
                break;
            case 114276:
                if (lowerCase.equals("svg")) {
                    z2 = 8;
                    break;
                }
                break;
            case 117511:
                if (lowerCase.equals("wbr")) {
                    z2 = 6;
                    break;
                }
                break;
            case 3053911:
                if (lowerCase.equals("cite")) {
                    z2 = 65;
                    break;
                }
                break;
            case 3059181:
                if (lowerCase.equals("code")) {
                    z2 = 54;
                    break;
                }
                break;
            case 3148879:
                if (lowerCase.equals("font")) {
                    z2 = 66;
                    break;
                }
                break;
            case 3148996:
                if (lowerCase.equals("form")) {
                    z2 = 28;
                    break;
                }
                break;
            case 3343801:
                if (lowerCase.equals("main")) {
                    z2 = 13;
                    break;
                }
                break;
            case 3386833:
                if (lowerCase.equals("nobr")) {
                    z2 = 5;
                    break;
                }
                break;
            case 3536714:
                if (lowerCase.equals("span")) {
                    z2 = 64;
                    break;
                }
                break;
            case 3560141:
                if (lowerCase.equals("time")) {
                    z2 = 33;
                    break;
                }
                break;
            case 93111608:
                if (lowerCase.equals("aside")) {
                    z2 = 34;
                    break;
                }
                break;
            case 100358090:
                if (lowerCase.equals("input")) {
                    z2 = 3;
                    break;
                }
                break;
            case 102727412:
                if (lowerCase.equals("label")) {
                    z2 = 68;
                    break;
                }
                break;
            case 109548807:
                if (lowerCase.equals("small")) {
                    z2 = 62;
                    break;
                }
                break;
            case 109780401:
                if (lowerCase.equals("style")) {
                    z2 = true;
                    break;
                }
                break;
            case 110115790:
                if (lowerCase.equals("table")) {
                    z2 = 47;
                    break;
                }
                break;
            case 110157846:
                if (lowerCase.equals("tbody")) {
                    z2 = 19;
                    break;
                }
                break;
            case 299712866:
                if (lowerCase.equals("figcaption")) {
                    z2 = 21;
                    break;
                }
                break;
            case 552573414:
                if (lowerCase.equals("caption")) {
                    z2 = 22;
                    break;
                }
                break;
            case 1303202319:
                if (lowerCase.equals("blockquote")) {
                    z2 = 18;
                    break;
                }
                break;
            case 1551550924:
                if (lowerCase.equals("noscript")) {
                    z2 = 14;
                    break;
                }
                break;
            case 1970241253:
                if (lowerCase.equals("section")) {
                    z2 = 29;
                    break;
                }
                break;
            case 2119338929:
                if (lowerCase.equals("noindex")) {
                    z2 = 26;
                    break;
                }
                break;
        }
        switch (z2) {
            case Node.IMPORTANCE_REGULAR /* 0 */:
            case true:
            case true:
            case true:
            case true:
            case true:
            case true:
            case true:
            case true:
                return;
            case true:
                onPre(element, list, list2);
                return;
            case true:
                commitParagraph(list, list2);
                return;
            case true:
            case true:
            case true:
            case true:
            case true:
            case true:
            case true:
            case true:
            case true:
            case true:
            case true:
            case true:
            case true:
            case true:
            case true:
            case true:
            case true:
            case true:
            case true:
            case true:
            case true:
            case true:
            case true:
            case true:
                commitParagraph(list, list2);
                addExtraInfo(element);
                Node[] onNode = onNode(element, z);
                releaseExtraInfo();
                for (Node node : onNode) {
                    list.add(node);
                }
                return;
            case true:
            case true:
            case true:
            case true:
            case true:
            case true:
            case true:
            case true:
            case true:
                commitParagraph(list, list2);
                addExtraInfo(element);
                NodeBuilder nodeBuilder = new NodeBuilder();
                nodeBuilder.addSubnodes(onNode(element, z));
                Section newSection = nodeBuilder.newSection(lowerCase.trim().charAt(1) - '0');
                newSection.extraInfo = getCurrentExtraInfo();
                releaseExtraInfo();
                list.add(newSection);
                return;
            case true:
            case true:
            case true:
            case true:
            case true:
            case true:
            case true:
                commitParagraph(list, list2);
                addExtraInfo(element);
                NodeBuilder nodeBuilder2 = new NodeBuilder();
                nodeBuilder2.addSubnodes(onNode(element, z));
                Node createNode = createNode(lowerCase, nodeBuilder2);
                createNode.extraInfo = getCurrentExtraInfo();
                releaseExtraInfo();
                list.add(createNode);
                return;
            case true:
            case true:
            case true:
            case true:
            case true:
            case true:
            case true:
            case true:
            case true:
            case true:
            case true:
            case true:
            case true:
            case true:
            case true:
            case true:
            case true:
            case true:
                addExtraInfo(element);
                onElementInPara(element, list, list2, z);
                releaseExtraInfo();
                return;
            default:
                Log.warning("reader", "unprocessed tag:" + lowerCase);
                return;
        }
    }

    private void onTextNode(TextNode textNode, List<Node> list, List<Run> list2, boolean z) {
        NullCheck.notNull(textNode, "textNode");
        NullCheck.notNull(list, "nodes");
        NullCheck.notNull(list2, "runs");
        String text = textNode.text();
        if (text == null || text.isEmpty()) {
            return;
        }
        if (!z) {
            list2.add(new TextRun(text, !this.hrefStack.isEmpty() ? this.hrefStack.getLast() : "", getCurrentExtraInfo()));
            return;
        }
        String[] split = text.split("\n", -1);
        if (split.length == 0) {
            return;
        }
        list2.add(new TextRun(split[0], !this.hrefStack.isEmpty() ? this.hrefStack.getLast() : "", getCurrentExtraInfo()));
        for (int i = 1; i < split.length; i++) {
            commitParagraph(list, list2);
            list2.add(new TextRun(split[i], !this.hrefStack.isEmpty() ? this.hrefStack.getLast() : "", getCurrentExtraInfo()));
        }
    }

    private void commitParagraph(List<Node> list, List<Run> list2) {
        NullCheck.notNull(list, "nodes");
        NullCheck.notNull(list2, "runs");
        if (list2.isEmpty()) {
            return;
        }
        Paragraph newParagraph = NodeBuilder.newParagraph((Run[]) list2.toArray(new Run[list2.size()]));
        newParagraph.extraInfo = getCurrentExtraInfo();
        list.add(newParagraph);
        list2.clear();
    }

    private Node createNode(String str, NodeBuilder nodeBuilder) {
        NullCheck.notEmpty(str, "tagName");
        boolean z = -1;
        switch (str.hashCode()) {
            case 3453:
                if (str.equals("li")) {
                    z = 2;
                    break;
                }
                break;
            case 3549:
                if (str.equals("ol")) {
                    z = true;
                    break;
                }
                break;
            case 3696:
                if (str.equals("td")) {
                    z = 6;
                    break;
                }
                break;
            case 3700:
                if (str.equals("th")) {
                    z = 5;
                    break;
                }
                break;
            case 3710:
                if (str.equals("tr")) {
                    z = 4;
                    break;
                }
                break;
            case 3735:
                if (str.equals("ul")) {
                    z = false;
                    break;
                }
                break;
            case 110115790:
                if (str.equals("table")) {
                    z = 3;
                    break;
                }
                break;
        }
        switch (z) {
            case Node.IMPORTANCE_REGULAR /* 0 */:
                return nodeBuilder.newUnorderedList();
            case true:
                return nodeBuilder.newOrderedList();
            case true:
                return nodeBuilder.newListItem();
            case true:
                return nodeBuilder.newTable();
            case true:
                return nodeBuilder.newTableRow();
            case true:
            case true:
                return nodeBuilder.newTableCell();
            default:
                Log.warning("reader", "unable to create the node for tag '" + str + "'");
                return null;
        }
    }

    private void onImg(Element element, List<Run> list) {
        NullCheck.notNull(element, "el");
        NullCheck.notNull(list, "runs");
        String attr = element.attr("alt");
        if (attr == null || attr.isEmpty()) {
            return;
        }
        list.add(new TextRun("[" + attr + "]", !this.hrefStack.isEmpty() ? this.hrefStack.getLast() : "", getCurrentExtraInfo()));
    }

    private String extractHref(Element element) {
        String attr = element.attr("href");
        if (attr == null) {
            return null;
        }
        this.allHrefs.add(attr);
        try {
            return new URL(this.docUrl, attr).toString();
        } catch (MalformedURLException e) {
            return attr;
        }
    }

    private void onPre(Element element, List<Node> list, List<Run> list2) {
        NullCheck.notNull(element, "el");
        NullCheck.notNull(list, "nodes");
        NullCheck.notNull(list2, "runs");
        commitParagraph(list, list2);
        addExtraInfo(element);
        try {
            for (Node node : onNode(element, true)) {
                list.add(node);
            }
            commitParagraph(list, list2);
            releaseExtraInfo();
        } catch (Throwable th) {
            releaseExtraInfo();
            throw th;
        }
    }
}
