XmlParser.java

package docsite.util;

import java.io.*;
import java.util.*;
import java.util.stream.*;
import javax.xml.XMLConstants;
import javax.xml.parsers.*;
import docsite.*;
import org.w3c.dom.*;
import org.xml.sax.*;

public final class XmlParser {

    private static final DocumentBuilder documentBuilder = documentBuilder();

    private static DocumentBuilder documentBuilder() {
        try {
            DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
            factory.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
            factory.setFeature("http://xml.org/sax/features/external-general-entities", false);
            factory.setFeature("http://xml.org/sax/features/external-parameter-entities", false);
            factory.setAttribute(XMLConstants.ACCESS_EXTERNAL_DTD, "");
            factory.setAttribute(XMLConstants.ACCESS_EXTERNAL_SCHEMA, "");
            return factory.newDocumentBuilder();
        } catch (ParserConfigurationException e) {
            throw new DocsiteException(e);
        }
    }




    public Map<String, Object> parse(InputStream inputStream) throws IOException {
        try {
            String xml = ResourceUtil.read(inputStream).replaceAll(">[\\s]*<","><");
            Reader reader = new StringReader(xml);
            InputSource inputSource = new InputSource(reader);
            Document document = documentBuilder.parse(inputSource);
            return documentToMap(document);
        } catch (SAXException e) {
            throw new IOException(e);
        }
    }


    private Map<String, Object> documentToMap(Document document) {
        TreeMap<String,Object> map = new TreeMap<>();
        Element node = document.getDocumentElement();
        processNode(node, map, null, 1);
        return map;
    }


    private void processNode(Node node, Map<String, Object> map, List<Object> list, int level) {

        if (!isElement(node)) {
            return;
        }

        Element element = (Element) node;

        // <property></property>
        if (!element.hasChildNodes() && !element.hasAttributes()) {
            addToContainer(map, list, element.getTagName(), "");
        }
        // <property>value</property>
        else if (childCount(node) == 1 && isText(child(node, 0)) && !element.hasAttributes()) {
            addToContainer(map, list, element.getTagName(), child(node, 0).getNodeValue());
        }
        // <property attr="a" ... >value</property>
        else if (childCount(node) == 1 && isText(child(node, 0)) && element.hasAttributes()) {
            TreeMap<String, Object> value = new TreeMap<>();
            addToContainer(map, list, element.getTagName(), value);
            attributes(node).forEach( attr -> value.put(attr.getName(), attr.getValue()) );
            value.put("value", child(node, 0).getNodeValue());
        }
        // <property><item>A</item><item>B</item>...</property?
        else if (isListPropertyNode(node)) {
            List<Object> value = new ArrayList<>();
            addToContainer(map, list, element.getTagName(), value);
            children(node).forEach( child -> processNode(child, new TreeMap<>(), value, level+1) );
        }
        // <entity attr="a"...><propertyA>...</propertyA><propertyB>...</propertyB> ..</entity>
        else {
            TreeMap<String, Object> value = new TreeMap<>();
            addToContainer(map, list, element.getTagName(), value);
            attributes(node).forEach( attr -> value.put(attr.getName(), attr.getValue()) );
            children(node).forEach( child -> processNode(child, value, null, level + 1) );
        }

    }


    private void addToContainer(
        Map<String, Object> map,
        List<Object> list,
        String key,
        Object value
    ) {
        if (list != null) {
            list.add(value);
        } else {
            map.put(key, value);
        }
    }



    private boolean isListPropertyNode(Node node) {
        if (isElement(node) && node.hasChildNodes()) {
            String childTag = "";
            int count = 0;
            for (int i = 0; i < node.getChildNodes().getLength(); i++) {
                Node child = child(node , i);
                if (!isElement(child)) {
                    continue;
                }
                count++;
                if (childTag.isEmpty()) {
                    childTag = child.getNodeName();
                } else if (!childTag.equals(child.getNodeName())) {
                    return false;
                }
            }
            return count > 1 && !childTag.isEmpty();
        }
        return false;
    }


    private boolean isElement(Node node) {
        return node.getNodeType() == Node.ELEMENT_NODE;
    }


    private boolean isText(Node node) {
        return node.getNodeType() == Node.TEXT_NODE;
    }


    private int childCount(Node node) {
        return node.getChildNodes().getLength();
    }


    private Node child(Node node, int index) {
        return node.getChildNodes().item(index);
    }


    private Stream<Node> children (Node node) {
        return IntStream
            .rangeClosed(0, node.getChildNodes().getLength() - 1)
            .mapToObj(node.getChildNodes()::item);
    }


    private Stream<Attr> attributes (Node node) {
        return IntStream
            .rangeClosed(0, node.getAttributes().getLength() - 1)
            .mapToObj(node.getAttributes()::item)
            .map(Attr.class::cast);
    }


}