-
Bug
-
Resolution: Unresolved
-
P4
-
8, 11, 17, 18, 19, 20
-
generic
-
generic
ADDITIONAL SYSTEM INFORMATION :
Mac / OpenJDK 18.0.2 (also tried OpenJDK 17.0.1 and 11).
A DESCRIPTION OF THE PROBLEM :
The XML spec forbids the following characters from appearing in their literal form in XML documents:
<!ENTITY lt "&#60;">
<!ENTITY gt ">">
<!ENTITY amp "&#38;">
<!ENTITY apos "'">
<!ENTITY quot """>
https://www.w3.org/TR/xml11/#sec-predefined-ent
However, the toString() implementation of the com.sun.xml.internal.stream.events.AttributeImpl class, which is used to the writeAsEncodedUnicodeEx(java.io.Writer writer) method to write out attribute events appears to unescape html entities when writing out attribute values.
Thus, if you have an attribute value with an ampersand in it, like so:
<attribution_url data='https://www.redacted.com/?contact_type=redacted&url=REDACTED_VALUE&client=REDACTED_VALUE&videoid=REDACTED_VALUE'></attribution_url>
The & entity will be written out as "&" ... which is incorrect.
STEPS TO FOLLOW TO REPRODUCE THE PROBLEM :
package com.bugs
import java.io.StringReader;
import java.io.StringWriter;
import javax.xml.stream.XMLEventReader;
import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.events.XMLEvent;
public class XmlMeltdown {
public static void main(String[] args) throws XMLStreamException {
String xml = " <UI>\n" +
" <config>\n" +
" <context data='default'></context>\n" +
" <params>\n" +
" <attribution_text data='data'></attribution_text>\n" +
" <enable_companion_banner bool='true'></enable_companion_banner>\n" +
" <attribution_url\n" +
" data='https://www.redacted.com/?contact_type=redacted&url=redacted&client=redacted&videoid=redacted'></attribution_url>\n" +
" </params>\n" +
" </config>\n" +
" </UI>";
XMLInputFactory xmlInputFactory = XMLInputFactory.newInstance();
//All Extractors can assume IS_COALESCING.. otherwise it's just too hard.
//FYI: this flag ensures all CDATA text inside one Node is presented as one element
xmlInputFactory.setProperty(XMLInputFactory.IS_COALESCING, Boolean.TRUE);
XMLEventReader eventReader = xmlInputFactory.createXMLEventReader(new StringReader(xml));
StringWriter sw = new StringWriter();
while (eventReader.hasNext()) {
XMLEvent curEvent = eventReader.nextEvent();
if (curEvent.isStartDocument() || curEvent.isEndDocument()) {
continue;
}
curEvent.writeAsEncodedUnicode(sw);
}
System.out.println(sw);
StringWriter sw2 = new StringWriter();
XMLEventReader eventReader2 = xmlInputFactory.createXMLEventReader(new StringReader(sw.toString()));
try {
while (eventReader2.hasNext()) {
XMLEvent curEvent = eventReader2.nextEvent();
if (curEvent.isStartDocument() || curEvent.isEndDocument()) {
continue;
}
curEvent.writeAsEncodedUnicode(sw2);
}
} catch(Exception e) {
System.out.println("Failed to read what it wrote.");
}
StringWriter sw3 = new StringWriter();
String workaround = sw.toString().replace("&", "&");
System.out.println("workaround:" + workaround);
XMLEventReader eventReader3 = xmlInputFactory.createXMLEventReader(new StringReader(workaround));
while (eventReader3.hasNext()) {
XMLEvent curEvent = eventReader3.nextEvent();
if (curEvent.isStartDocument() || curEvent.isEndDocument()) {
continue;
}
curEvent.writeAsEncodedUnicode(sw3);
}
System.out.println(sw3);
System.out.println("Work-around success!");
}
}
EXPECTED VERSUS ACTUAL BEHAVIOR :
EXPECTED -
The program doesn't print out: "Failed to read what it wrote."
ACTUAL -
The program does print out: "Failed to read what it wrote."
---------- BEGIN SOURCE ----------
package com.bugs
import java.io.StringReader;
import java.io.StringWriter;
import javax.xml.stream.XMLEventReader;
import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.events.XMLEvent;
public class XmlMeltdown {
public static void main(String[] args) throws XMLStreamException {
String xml = " <UI>\n" +
" <config>\n" +
" <context data='default'></context>\n" +
" <params>\n" +
" <attribution_text data='data'></attribution_text>\n" +
" <enable_companion_banner bool='true'></enable_companion_banner>\n" +
" <attribution_url\n" +
" data='https://www.redacted.com/?contact_type=redacted&url=redacted&client=redacted&videoid=redacted'></attribution_url>\n" +
" </params>\n" +
" </config>\n" +
" </UI>";
XMLInputFactory xmlInputFactory = XMLInputFactory.newInstance();
//All Extractors can assume IS_COALESCING.. otherwise it's just too hard.
//FYI: this flag ensures all CDATA text inside one Node is presented as one element
xmlInputFactory.setProperty(XMLInputFactory.IS_COALESCING, Boolean.TRUE);
XMLEventReader eventReader = xmlInputFactory.createXMLEventReader(new StringReader(xml));
StringWriter sw = new StringWriter();
while (eventReader.hasNext()) {
XMLEvent curEvent = eventReader.nextEvent();
if (curEvent.isStartDocument() || curEvent.isEndDocument()) {
continue;
}
curEvent.writeAsEncodedUnicode(sw);
}
System.out.println(sw);
StringWriter sw2 = new StringWriter();
XMLEventReader eventReader2 = xmlInputFactory.createXMLEventReader(new StringReader(sw.toString()));
try {
while (eventReader2.hasNext()) {
XMLEvent curEvent = eventReader2.nextEvent();
if (curEvent.isStartDocument() || curEvent.isEndDocument()) {
continue;
}
curEvent.writeAsEncodedUnicode(sw2);
}
} catch(Exception e) {
System.out.println("Failed to read what it wrote.");
}
StringWriter sw3 = new StringWriter();
String workaround = sw.toString().replace("&", "&");
System.out.println("workaround:" + workaround);
XMLEventReader eventReader3 = xmlInputFactory.createXMLEventReader(new StringReader(workaround));
while (eventReader3.hasNext()) {
XMLEvent curEvent = eventReader3.nextEvent();
if (curEvent.isStartDocument() || curEvent.isEndDocument()) {
continue;
}
curEvent.writeAsEncodedUnicode(sw3);
}
System.out.println(sw3);
System.out.println("Work-around success!");
}
}
---------- END SOURCE ----------
CUSTOMER SUBMITTED WORKAROUND :
See code: manually replace entities
FREQUENCY : always
Mac / OpenJDK 18.0.2 (also tried OpenJDK 17.0.1 and 11).
A DESCRIPTION OF THE PROBLEM :
The XML spec forbids the following characters from appearing in their literal form in XML documents:
<!ENTITY lt "&#60;">
<!ENTITY gt ">">
<!ENTITY amp "&#38;">
<!ENTITY apos "'">
<!ENTITY quot """>
https://www.w3.org/TR/xml11/#sec-predefined-ent
However, the toString() implementation of the com.sun.xml.internal.stream.events.AttributeImpl class, which is used to the writeAsEncodedUnicodeEx(java.io.Writer writer) method to write out attribute events appears to unescape html entities when writing out attribute values.
Thus, if you have an attribute value with an ampersand in it, like so:
<attribution_url data='https://www.redacted.com/?contact_type=redacted&url=REDACTED_VALUE&client=REDACTED_VALUE&videoid=REDACTED_VALUE'></attribution_url>
The & entity will be written out as "&" ... which is incorrect.
STEPS TO FOLLOW TO REPRODUCE THE PROBLEM :
package com.bugs
import java.io.StringReader;
import java.io.StringWriter;
import javax.xml.stream.XMLEventReader;
import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.events.XMLEvent;
public class XmlMeltdown {
public static void main(String[] args) throws XMLStreamException {
String xml = " <UI>\n" +
" <config>\n" +
" <context data='default'></context>\n" +
" <params>\n" +
" <attribution_text data='data'></attribution_text>\n" +
" <enable_companion_banner bool='true'></enable_companion_banner>\n" +
" <attribution_url\n" +
" data='https://www.redacted.com/?contact_type=redacted&url=redacted&client=redacted&videoid=redacted'></attribution_url>\n" +
" </params>\n" +
" </config>\n" +
" </UI>";
XMLInputFactory xmlInputFactory = XMLInputFactory.newInstance();
//All Extractors can assume IS_COALESCING.. otherwise it's just too hard.
//FYI: this flag ensures all CDATA text inside one Node is presented as one element
xmlInputFactory.setProperty(XMLInputFactory.IS_COALESCING, Boolean.TRUE);
XMLEventReader eventReader = xmlInputFactory.createXMLEventReader(new StringReader(xml));
StringWriter sw = new StringWriter();
while (eventReader.hasNext()) {
XMLEvent curEvent = eventReader.nextEvent();
if (curEvent.isStartDocument() || curEvent.isEndDocument()) {
continue;
}
curEvent.writeAsEncodedUnicode(sw);
}
System.out.println(sw);
StringWriter sw2 = new StringWriter();
XMLEventReader eventReader2 = xmlInputFactory.createXMLEventReader(new StringReader(sw.toString()));
try {
while (eventReader2.hasNext()) {
XMLEvent curEvent = eventReader2.nextEvent();
if (curEvent.isStartDocument() || curEvent.isEndDocument()) {
continue;
}
curEvent.writeAsEncodedUnicode(sw2);
}
} catch(Exception e) {
System.out.println("Failed to read what it wrote.");
}
StringWriter sw3 = new StringWriter();
String workaround = sw.toString().replace("&", "&");
System.out.println("workaround:" + workaround);
XMLEventReader eventReader3 = xmlInputFactory.createXMLEventReader(new StringReader(workaround));
while (eventReader3.hasNext()) {
XMLEvent curEvent = eventReader3.nextEvent();
if (curEvent.isStartDocument() || curEvent.isEndDocument()) {
continue;
}
curEvent.writeAsEncodedUnicode(sw3);
}
System.out.println(sw3);
System.out.println("Work-around success!");
}
}
EXPECTED VERSUS ACTUAL BEHAVIOR :
EXPECTED -
The program doesn't print out: "Failed to read what it wrote."
ACTUAL -
The program does print out: "Failed to read what it wrote."
---------- BEGIN SOURCE ----------
package com.bugs
import java.io.StringReader;
import java.io.StringWriter;
import javax.xml.stream.XMLEventReader;
import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.events.XMLEvent;
public class XmlMeltdown {
public static void main(String[] args) throws XMLStreamException {
String xml = " <UI>\n" +
" <config>\n" +
" <context data='default'></context>\n" +
" <params>\n" +
" <attribution_text data='data'></attribution_text>\n" +
" <enable_companion_banner bool='true'></enable_companion_banner>\n" +
" <attribution_url\n" +
" data='https://www.redacted.com/?contact_type=redacted&url=redacted&client=redacted&videoid=redacted'></attribution_url>\n" +
" </params>\n" +
" </config>\n" +
" </UI>";
XMLInputFactory xmlInputFactory = XMLInputFactory.newInstance();
//All Extractors can assume IS_COALESCING.. otherwise it's just too hard.
//FYI: this flag ensures all CDATA text inside one Node is presented as one element
xmlInputFactory.setProperty(XMLInputFactory.IS_COALESCING, Boolean.TRUE);
XMLEventReader eventReader = xmlInputFactory.createXMLEventReader(new StringReader(xml));
StringWriter sw = new StringWriter();
while (eventReader.hasNext()) {
XMLEvent curEvent = eventReader.nextEvent();
if (curEvent.isStartDocument() || curEvent.isEndDocument()) {
continue;
}
curEvent.writeAsEncodedUnicode(sw);
}
System.out.println(sw);
StringWriter sw2 = new StringWriter();
XMLEventReader eventReader2 = xmlInputFactory.createXMLEventReader(new StringReader(sw.toString()));
try {
while (eventReader2.hasNext()) {
XMLEvent curEvent = eventReader2.nextEvent();
if (curEvent.isStartDocument() || curEvent.isEndDocument()) {
continue;
}
curEvent.writeAsEncodedUnicode(sw2);
}
} catch(Exception e) {
System.out.println("Failed to read what it wrote.");
}
StringWriter sw3 = new StringWriter();
String workaround = sw.toString().replace("&", "&");
System.out.println("workaround:" + workaround);
XMLEventReader eventReader3 = xmlInputFactory.createXMLEventReader(new StringReader(workaround));
while (eventReader3.hasNext()) {
XMLEvent curEvent = eventReader3.nextEvent();
if (curEvent.isStartDocument() || curEvent.isEndDocument()) {
continue;
}
curEvent.writeAsEncodedUnicode(sw3);
}
System.out.println(sw3);
System.out.println("Work-around success!");
}
}
---------- END SOURCE ----------
CUSTOMER SUBMITTED WORKAROUND :
See code: manually replace entities
FREQUENCY : always