Uploaded image for project: 'JDK'
  1. JDK
  2. JDK-8292689

XMLEvent::writeAsEncodedUnicode un-escapes forbidden entities in attribute value

    XMLWordPrintable

Details

    Description

      ADDITIONAL SYSTEM INFORMATION :
      Mac / OpenJDK 18.0.2 (also tried OpenJDK 17.0.1 and 11).

      A DESCRIPTION OF THE PROBLEM :
      The XML spec forbids the following characters from appearing in their literal form in XML documents:

      <!ENTITY lt "&#38;#60;">
      <!ENTITY gt "&#62;">
      <!ENTITY amp "&#38;#38;">
      <!ENTITY apos "&#39;">
      <!ENTITY quot "&#34;">

      https://www.w3.org/TR/xml11/#sec-predefined-ent

      However, the toString() implementation of the com.sun.xml.internal.stream.events.AttributeImpl class, which is used to the writeAsEncodedUnicodeEx(java.io.Writer writer) method to write out attribute events appears to unescape html entities when writing out attribute values.

      Thus, if you have an attribute value with an ampersand in it, like so:

      <attribution_url data='https://www.redacted.com/?contact_type=redacted&amp;url=REDACTED_VALUE&amp;client=REDACTED_VALUE&amp;videoid=REDACTED_VALUE&#39;&gt;&lt;/attribution_url>

      The &amp; entity will be written out as "&" ... which is incorrect.

      STEPS TO FOLLOW TO REPRODUCE THE PROBLEM :
      package com.bugs

      import java.io.StringReader;
      import java.io.StringWriter;
      import javax.xml.stream.XMLEventReader;
      import javax.xml.stream.XMLInputFactory;
      import javax.xml.stream.XMLStreamException;
      import javax.xml.stream.events.XMLEvent;

      public class XmlMeltdown {
          public static void main(String[] args) throws XMLStreamException {
              String xml = " <UI>\n" +
                  " <config>\n" +
                  " <context data='default'></context>\n" +
                  " <params>\n" +
                  " <attribution_text data='data'></attribution_text>\n" +
                  " <enable_companion_banner bool='true'></enable_companion_banner>\n" +
                  " <attribution_url\n" +
                  " data='https://www.redacted.com/?contact_type=redacted&amp;url=redacted&amp;client=redacted&amp;videoid=redacted&#39;&gt;&lt;/attribution_url>\n" +
                  " </params>\n" +
                  " </config>\n" +
                  " </UI>";


              XMLInputFactory xmlInputFactory = XMLInputFactory.newInstance();
              //All Extractors can assume IS_COALESCING.. otherwise it's just too hard.
              //FYI: this flag ensures all CDATA text inside one Node is presented as one element
              xmlInputFactory.setProperty(XMLInputFactory.IS_COALESCING, Boolean.TRUE);
              XMLEventReader eventReader = xmlInputFactory.createXMLEventReader(new StringReader(xml));

              StringWriter sw = new StringWriter();
              while (eventReader.hasNext()) {
                  XMLEvent curEvent = eventReader.nextEvent();
                  if (curEvent.isStartDocument() || curEvent.isEndDocument()) {
                      continue;
                  }
                  curEvent.writeAsEncodedUnicode(sw);
              }
              System.out.println(sw);

              StringWriter sw2 = new StringWriter();
              XMLEventReader eventReader2 = xmlInputFactory.createXMLEventReader(new StringReader(sw.toString()));
              try {
                  while (eventReader2.hasNext()) {
                      XMLEvent curEvent = eventReader2.nextEvent();
                      if (curEvent.isStartDocument() || curEvent.isEndDocument()) {
                          continue;
                      }
                      curEvent.writeAsEncodedUnicode(sw2);
                  }
              } catch(Exception e) {
                  System.out.println("Failed to read what it wrote.");
              }

              StringWriter sw3 = new StringWriter();
              String workaround = sw.toString().replace("&", "&amp;");
              System.out.println("workaround:" + workaround);
              XMLEventReader eventReader3 = xmlInputFactory.createXMLEventReader(new StringReader(workaround));
                  while (eventReader3.hasNext()) {
                      XMLEvent curEvent = eventReader3.nextEvent();
                      if (curEvent.isStartDocument() || curEvent.isEndDocument()) {
                          continue;
                      }
                      curEvent.writeAsEncodedUnicode(sw3);
                  }
              System.out.println(sw3);
              System.out.println("Work-around success!");
          }
      }

      EXPECTED VERSUS ACTUAL BEHAVIOR :
      EXPECTED -
      The program doesn't print out: "Failed to read what it wrote."


      ACTUAL -
      The program does print out: "Failed to read what it wrote."

      ---------- BEGIN SOURCE ----------
      package com.bugs

      import java.io.StringReader;
      import java.io.StringWriter;
      import javax.xml.stream.XMLEventReader;
      import javax.xml.stream.XMLInputFactory;
      import javax.xml.stream.XMLStreamException;
      import javax.xml.stream.events.XMLEvent;

      public class XmlMeltdown {
          public static void main(String[] args) throws XMLStreamException {
              String xml = " <UI>\n" +
                  " <config>\n" +
                  " <context data='default'></context>\n" +
                  " <params>\n" +
                  " <attribution_text data='data'></attribution_text>\n" +
                  " <enable_companion_banner bool='true'></enable_companion_banner>\n" +
                  " <attribution_url\n" +
                  " data='https://www.redacted.com/?contact_type=redacted&amp;url=redacted&amp;client=redacted&amp;videoid=redacted&#39;&gt;&lt;/attribution_url>\n" +
                  " </params>\n" +
                  " </config>\n" +
                  " </UI>";


              XMLInputFactory xmlInputFactory = XMLInputFactory.newInstance();
              //All Extractors can assume IS_COALESCING.. otherwise it's just too hard.
              //FYI: this flag ensures all CDATA text inside one Node is presented as one element
              xmlInputFactory.setProperty(XMLInputFactory.IS_COALESCING, Boolean.TRUE);
              XMLEventReader eventReader = xmlInputFactory.createXMLEventReader(new StringReader(xml));

              StringWriter sw = new StringWriter();
              while (eventReader.hasNext()) {
                  XMLEvent curEvent = eventReader.nextEvent();
                  if (curEvent.isStartDocument() || curEvent.isEndDocument()) {
                      continue;
                  }
                  curEvent.writeAsEncodedUnicode(sw);
              }
              System.out.println(sw);

              StringWriter sw2 = new StringWriter();
              XMLEventReader eventReader2 = xmlInputFactory.createXMLEventReader(new StringReader(sw.toString()));
              try {
                  while (eventReader2.hasNext()) {
                      XMLEvent curEvent = eventReader2.nextEvent();
                      if (curEvent.isStartDocument() || curEvent.isEndDocument()) {
                          continue;
                      }
                      curEvent.writeAsEncodedUnicode(sw2);
                  }
              } catch(Exception e) {
                  System.out.println("Failed to read what it wrote.");
              }

              StringWriter sw3 = new StringWriter();
              String workaround = sw.toString().replace("&", "&amp;");
              System.out.println("workaround:" + workaround);
              XMLEventReader eventReader3 = xmlInputFactory.createXMLEventReader(new StringReader(workaround));
                  while (eventReader3.hasNext()) {
                      XMLEvent curEvent = eventReader3.nextEvent();
                      if (curEvent.isStartDocument() || curEvent.isEndDocument()) {
                          continue;
                      }
                      curEvent.writeAsEncodedUnicode(sw3);
                  }
              System.out.println(sw3);
              System.out.println("Work-around success!");
          }
      }
      ---------- END SOURCE ----------

      CUSTOMER SUBMITTED WORKAROUND :
      See code: manually replace entities

      FREQUENCY : always


      Attachments

        Activity

          People

            joehw Joe Wang
            webbuggrp Webbug Group
            Votes:
            0 Vote for this issue
            Watchers:
            3 Start watching this issue

            Dates

              Created:
              Updated: