From c9726ff072bed4cc437dc504b3bf7041401b8418 Mon Sep 17 00:00:00 2001 From: Melissa Linkert Date: Thu, 5 Feb 2026 09:44:04 -0600 Subject: [PATCH 1/5] Add new `parseDOM(String, String)` signature to accept an encoding --- src/main/java/loci/common/xml/XMLTools.java | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/src/main/java/loci/common/xml/XMLTools.java b/src/main/java/loci/common/xml/XMLTools.java index d5078d23..a864272e 100644 --- a/src/main/java/loci/common/xml/XMLTools.java +++ b/src/main/java/loci/common/xml/XMLTools.java @@ -196,7 +196,7 @@ public static Document parseDOM(File file) } /** - * Parses a DOM from the given XML string. + * Parses a DOM from the given XML string, using UTF-8 encoding. * * @param xml XML data * @return a {@link Document} reflecting the XML string @@ -207,7 +207,23 @@ public static Document parseDOM(File file) public static Document parseDOM(String xml) throws ParserConfigurationException, SAXException, IOException { - byte[] bytes = xml.getBytes(Constants.ENCODING); + return parseDOM(xml, Constants.ENCODING); + } + + /** + * Parses a DOM from the given XML string, using the given encoding. + * + * @param xml XML data + * @param encoding charset name + * @return a {@link Document} reflecting the XML string + * @throws ParserConfigurationException if the XML parser cannot be created + * @throws SAXException if there is an error parsing the XML + * @throws IOException if there is an error reading from the file + */ + public static Document parseDOM(String xml, String encoding) + throws ParserConfigurationException, SAXException, IOException + { + byte[] bytes = xml.getBytes(encoding); try (InputStream is = new ByteArrayInputStream(bytes)) { Document doc = parseDOM(is); return doc; From 7ba2ce289caf6a58fd5477f5bc70a730fd40c39d Mon Sep 17 00:00:00 2001 From: Melissa Linkert Date: Thu, 5 Feb 2026 09:44:21 -0600 Subject: [PATCH 2/5] Refactor so that DocumentBuilderFactory is only used in one place --- src/main/java/loci/common/xml/XMLTools.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/main/java/loci/common/xml/XMLTools.java b/src/main/java/loci/common/xml/XMLTools.java index a864272e..355c5a50 100644 --- a/src/main/java/loci/common/xml/XMLTools.java +++ b/src/main/java/loci/common/xml/XMLTools.java @@ -247,8 +247,7 @@ public static Document parseDOM(InputStream is) checkUTF8(in); // Java XML factories are not declared to be thread safe - DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); - DocumentBuilder db = factory.newDocumentBuilder(); + DocumentBuilder db = createBuilder(); db.setErrorHandler(new ParserErrorHandler()); return db.parse(in); } From 51a5e466c23c5337d90e5d475a02fa8882ec8d5f Mon Sep 17 00:00:00 2001 From: Melissa Linkert Date: Thu, 5 Feb 2026 11:20:47 -0600 Subject: [PATCH 3/5] Configure safer parsing features in DocumentBuilderFactory --- src/main/java/loci/common/xml/XMLTools.java | 26 ++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/src/main/java/loci/common/xml/XMLTools.java b/src/main/java/loci/common/xml/XMLTools.java index 355c5a50..c7ac9d90 100644 --- a/src/main/java/loci/common/xml/XMLTools.java +++ b/src/main/java/loci/common/xml/XMLTools.java @@ -50,11 +50,13 @@ import java.util.HashMap; import java.util.HashSet; import java.util.Hashtable; +import java.util.Map; import java.util.Set; import java.util.StringTokenizer; import java.util.regex.Matcher; import java.util.regex.Pattern; +import javax.xml.XMLConstants; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; @@ -118,6 +120,18 @@ private static TransformerFactory createTransformFactory() { return factory; }; + private static final Map FEATURES = createXMLParserFeatures(); + + private static Map createXMLParserFeatures() { + HashMap features = new HashMap(); + features.put(XMLConstants.FEATURE_SECURE_PROCESSING, true); + features.put("http://apache.org/xml/features/disallow-doctype-decl", true); + features.put("http://xml.org/sax/features/external-general-entities", false); + features.put("http://xml.org/sax/features/external-parameter-entities", false); + features.put("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); + return features; + }; + // -- Interfaces -- /** @@ -159,7 +173,17 @@ private XMLTools() { } */ public static DocumentBuilder createBuilder() { try { - return DocumentBuilderFactory.newInstance().newDocumentBuilder(); + DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); + factory.setXIncludeAware(false); + for (String feature : FEATURES.keySet()) { + try { + factory.setFeature(feature, FEATURES.get(feature)); + } + catch (ParserConfigurationException e) { + LOGGER.debug("Parser does not support feature " + feature, e); + } + } + return factory.newDocumentBuilder(); } catch (ParserConfigurationException e) { LOGGER.error("Cannot create DocumentBuilder", e); From 28c0b759ac8885794ceea85842343444f42d3d90 Mon Sep 17 00:00:00 2001 From: Melissa Linkert Date: Thu, 5 Feb 2026 11:32:35 -0600 Subject: [PATCH 4/5] Update SAXParserFactory configuration to match DocumentBuilderFactory --- src/main/java/loci/common/xml/XMLTools.java | 31 +++++++++++++++++---- 1 file changed, 25 insertions(+), 6 deletions(-) diff --git a/src/main/java/loci/common/xml/XMLTools.java b/src/main/java/loci/common/xml/XMLTools.java index c7ac9d90..e833bf73 100644 --- a/src/main/java/loci/common/xml/XMLTools.java +++ b/src/main/java/loci/common/xml/XMLTools.java @@ -508,6 +508,29 @@ public static String indentXML(String xml, int spacing, // -- Parsing -- + /** + * Create a new SAX parser. + * + * @throws ParserConfigurationException + * @throws SAXException + */ + public static SAXParser createSAXParser() + throws ParserConfigurationException, SAXException + { + // Java XML factories are not declared to be thread safe + SAXParserFactory factory = SAXParserFactory.newInstance(); + factory.setXIncludeAware(false); + for (String feature : FEATURES.keySet()) { + try { + factory.setFeature(feature, FEATURES.get(feature)); + } + catch (ParserConfigurationException e) { + LOGGER.debug("Parser does not support feature " + feature, e); + } + } + return factory.newSAXParser(); + } + /** * Parses the given XML string into a list of key/value pairs. * @@ -578,9 +601,7 @@ public static void parseXML(InputStream xml, DefaultHandler handler) throws IOException { try { - // Java XML factories are not declared to be thread safe - SAXParserFactory factory = SAXParserFactory.newInstance(); - SAXParser parser = factory.newSAXParser(); + SAXParser parser = createSAXParser(); parser.parse(xml, handler); } catch (ParserConfigurationException exc) { @@ -836,9 +857,7 @@ public static boolean validateXML(String xml, String label, LOGGER.info("Parsing schema path"); ValidationSAXHandler saxHandler = new ValidationSAXHandler(); try { - // Java XML factories are not declared to be thread safe - SAXParserFactory factory = SAXParserFactory.newInstance(); - SAXParser saxParser = factory.newSAXParser(); + SAXParser saxParser = createSAXParser(); InputStream is = new ByteArrayInputStream(xml.getBytes(Constants.ENCODING)); saxParser.parse(is, saxHandler); From 710a91908ae8783baf53f49b2803cf4bc7c4e02a Mon Sep 17 00:00:00 2001 From: Melissa Linkert Date: Fri, 6 Feb 2026 09:54:58 -0600 Subject: [PATCH 5/5] Don't completely disable DTD parsing --- src/main/java/loci/common/xml/XMLTools.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/loci/common/xml/XMLTools.java b/src/main/java/loci/common/xml/XMLTools.java index e833bf73..862116ff 100644 --- a/src/main/java/loci/common/xml/XMLTools.java +++ b/src/main/java/loci/common/xml/XMLTools.java @@ -125,7 +125,6 @@ private static TransformerFactory createTransformFactory() { private static Map createXMLParserFeatures() { HashMap features = new HashMap(); features.put(XMLConstants.FEATURE_SECURE_PROCESSING, true); - features.put("http://apache.org/xml/features/disallow-doctype-decl", true); features.put("http://xml.org/sax/features/external-general-entities", false); features.put("http://xml.org/sax/features/external-parameter-entities", false); features.put("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); @@ -175,6 +174,7 @@ public static DocumentBuilder createBuilder() { try { DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); factory.setXIncludeAware(false); + factory.setExpandEntityReferences(false); for (String feature : FEATURES.keySet()) { try { factory.setFeature(feature, FEATURES.get(feature));