-
Bug
-
Resolution: Incomplete
-
P4
-
None
-
8u212
-
x86_64
-
linux
ADDITIONAL SYSTEM INFORMATION :
Java 8 and 11 on Debian and Windows
A DESCRIPTION OF THE PROBLEM :
When transforming from a DOMSource to a StreamResult two sources are taken into account for the encoding of the produced document:
a. Document.getXmlEncoding()
b. the value of the OutputKeys.ENCODING parameter
The produced XML document uses (a) in the xml declaration, while actually encodes characters using (b), which results in a miscoded document.
STEPS TO FOLLOW TO REPRODUCE THE PROBLEM :
1. Parse an XML document encoded with encoding A into a DOM Document.
2. Get a Transformer with OutputKeys.ENCODING parameter set to B.
3. Transform the DOM Document into an OutputStream.
4. The OutputStream is miscoded.
EXPECTED VERSUS ACTUAL BEHAVIOR :
EXPECTED -
According to https://www.w3.org/TR/xslt-10/#output , the resulting document should have A in the xml declaration.
ACTUAL -
The document declares A as encoding, but encodes using B.
---------- BEGIN SOURCE ----------
package pl.copernik.trax;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.InputStream;
import java.nio.charset.Charset;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.stream.XMLEventReader;
import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLStreamReader;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Source;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMResult;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.sax.SAXSource;
import javax.xml.transform.stax.StAXSource;
import javax.xml.transform.stream.StreamResult;
import javax.xml.transform.stream.StreamSource;
import org.junit.Assert;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.BlockJUnit4ClassRunner;
import org.w3c.dom.Document;
import org.xml.sax.InputSource;
@RunWith(BlockJUnit4ClassRunner.class)
public class TraXTest {
private static final String textValue = "áâãäÃ¥æçèéêëìîïðñòóôõö÷øùúûüýþÿÃÂÃÂÃÂÃÂÃÂàÃÂÃÂÃÂÃÂÃÂÃÂÃÂÃÂÃÂÃÂÃÂÃÂÃÂÃÂÃÂÃÂÃÂÃÂÃÂÃÂÃÂÃÂÃÂÃÂÃÂÃÂàáâãäÃ¥æçèéê"
+ "ëìÃÂîïðñòóôõö÷øùúûüýþÿ";
private static final String xml = "<?xml version=\"1.0\" encoding=\"ISO-8859-1\"?><root>" + textValue
+ "</root>";
private static final byte xmlBytes[] = xml.getBytes(Charset.forName("ISO-8859-1"));
@Test
@SuppressWarnings("static-method")
public void transcoding_from_DOMSource() throws Exception {
final DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
final DocumentBuilder builder = dbf.newDocumentBuilder();
final Document document = builder.parse(new ByteArrayInputStream(xmlBytes));
checkTranscoding(new DOMSource(document));
}
@Test
@SuppressWarnings("static-method")
public void transcoding_from_StreamSource() throws Exception {
final InputStream inputStream = new ByteArrayInputStream(xmlBytes);
checkTranscoding(new StreamSource(inputStream));
}
@Test
@SuppressWarnings("static-method")
public void transcoding_from_EventReader() throws Exception {
final InputStream inputStream = new ByteArrayInputStream(xmlBytes);
final XMLInputFactory inputFactory = XMLInputFactory.newFactory();
XMLEventReader xmlEventReader = inputFactory.createXMLEventReader(inputStream);
checkTranscoding(new StAXSource(xmlEventReader));
}
@Test
@SuppressWarnings("static-method")
public void transcoding_from_StreamReader() throws Exception {
final InputStream inputStream = new ByteArrayInputStream(xmlBytes);
final XMLInputFactory inputFactory = XMLInputFactory.newFactory();
XMLStreamReader xmlStreamtReader = inputFactory.createXMLStreamReader(inputStream);
checkTranscoding(new StAXSource(xmlStreamtReader));
}
@Test
@SuppressWarnings("static-method")
public void transcoding_from_SAXSource() throws Exception {
ByteArrayInputStream inputStream = new ByteArrayInputStream(xmlBytes);
final InputSource inputSource = new InputSource(inputStream);
checkTranscoding(new SAXSource(inputSource));
}
private static void checkTranscoding(Source source) throws Exception {
final DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
final DocumentBuilder builder = dbf.newDocumentBuilder();
final TransformerFactory traxFactory = TransformerFactory.newInstance();
final Transformer inputTrax = traxFactory.newTransformer();
inputTrax.setOutputProperty(OutputKeys.METHOD, "xml");
inputTrax.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
final ByteArrayOutputStream os = new ByteArrayOutputStream();
inputTrax.transform(source, new StreamResult(os));
final InputStream is = new ByteArrayInputStream(os.toByteArray());
final Document output = builder.newDocument();
final Transformer outputTrax = traxFactory.newTransformer();
outputTrax.transform(new StreamSource(is), new DOMResult(output));
String actual = output.getDocumentElement().getTextContent();
Assert.assertEquals(textValue, actual);
}
}
---------- END SOURCE ----------
CUSTOMER SUBMITTED WORKAROUND :
There are plenty of workarounds:
1. Omit the XML declaration and a a custom one.
2. Use the original Xalan, which unfortunately lacks support for StAX Source's and Result's.
IMHO adding a check to com.sun.org.apache.xalan.internal.xsltc.trax.DOM2TO.setEncoding(), whether the SerializationHandler has already an encoding set should be enough to eliminate this bug.
FREQUENCY : always
Java 8 and 11 on Debian and Windows
A DESCRIPTION OF THE PROBLEM :
When transforming from a DOMSource to a StreamResult two sources are taken into account for the encoding of the produced document:
a. Document.getXmlEncoding()
b. the value of the OutputKeys.ENCODING parameter
The produced XML document uses (a) in the xml declaration, while actually encodes characters using (b), which results in a miscoded document.
STEPS TO FOLLOW TO REPRODUCE THE PROBLEM :
1. Parse an XML document encoded with encoding A into a DOM Document.
2. Get a Transformer with OutputKeys.ENCODING parameter set to B.
3. Transform the DOM Document into an OutputStream.
4. The OutputStream is miscoded.
EXPECTED VERSUS ACTUAL BEHAVIOR :
EXPECTED -
According to https://www.w3.org/TR/xslt-10/#output , the resulting document should have A in the xml declaration.
ACTUAL -
The document declares A as encoding, but encodes using B.
---------- BEGIN SOURCE ----------
package pl.copernik.trax;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.InputStream;
import java.nio.charset.Charset;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.stream.XMLEventReader;
import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLStreamReader;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Source;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMResult;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.sax.SAXSource;
import javax.xml.transform.stax.StAXSource;
import javax.xml.transform.stream.StreamResult;
import javax.xml.transform.stream.StreamSource;
import org.junit.Assert;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.BlockJUnit4ClassRunner;
import org.w3c.dom.Document;
import org.xml.sax.InputSource;
@RunWith(BlockJUnit4ClassRunner.class)
public class TraXTest {
private static final String textValue = "áâãäÃ¥æçèéêëìîïðñòóôõö÷øùúûüýþÿÃÂÃÂÃÂÃÂÃÂàÃÂÃÂÃÂÃÂÃÂÃÂÃÂÃÂÃÂÃÂÃÂÃÂÃÂÃÂÃÂÃÂÃÂÃÂÃÂÃÂÃÂÃÂÃÂÃÂÃÂÃÂàáâãäÃ¥æçèéê"
+ "ëìÃÂîïðñòóôõö÷øùúûüýþÿ";
private static final String xml = "<?xml version=\"1.0\" encoding=\"ISO-8859-1\"?><root>" + textValue
+ "</root>";
private static final byte xmlBytes[] = xml.getBytes(Charset.forName("ISO-8859-1"));
@Test
@SuppressWarnings("static-method")
public void transcoding_from_DOMSource() throws Exception {
final DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
final DocumentBuilder builder = dbf.newDocumentBuilder();
final Document document = builder.parse(new ByteArrayInputStream(xmlBytes));
checkTranscoding(new DOMSource(document));
}
@Test
@SuppressWarnings("static-method")
public void transcoding_from_StreamSource() throws Exception {
final InputStream inputStream = new ByteArrayInputStream(xmlBytes);
checkTranscoding(new StreamSource(inputStream));
}
@Test
@SuppressWarnings("static-method")
public void transcoding_from_EventReader() throws Exception {
final InputStream inputStream = new ByteArrayInputStream(xmlBytes);
final XMLInputFactory inputFactory = XMLInputFactory.newFactory();
XMLEventReader xmlEventReader = inputFactory.createXMLEventReader(inputStream);
checkTranscoding(new StAXSource(xmlEventReader));
}
@Test
@SuppressWarnings("static-method")
public void transcoding_from_StreamReader() throws Exception {
final InputStream inputStream = new ByteArrayInputStream(xmlBytes);
final XMLInputFactory inputFactory = XMLInputFactory.newFactory();
XMLStreamReader xmlStreamtReader = inputFactory.createXMLStreamReader(inputStream);
checkTranscoding(new StAXSource(xmlStreamtReader));
}
@Test
@SuppressWarnings("static-method")
public void transcoding_from_SAXSource() throws Exception {
ByteArrayInputStream inputStream = new ByteArrayInputStream(xmlBytes);
final InputSource inputSource = new InputSource(inputStream);
checkTranscoding(new SAXSource(inputSource));
}
private static void checkTranscoding(Source source) throws Exception {
final DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
final DocumentBuilder builder = dbf.newDocumentBuilder();
final TransformerFactory traxFactory = TransformerFactory.newInstance();
final Transformer inputTrax = traxFactory.newTransformer();
inputTrax.setOutputProperty(OutputKeys.METHOD, "xml");
inputTrax.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
final ByteArrayOutputStream os = new ByteArrayOutputStream();
inputTrax.transform(source, new StreamResult(os));
final InputStream is = new ByteArrayInputStream(os.toByteArray());
final Document output = builder.newDocument();
final Transformer outputTrax = traxFactory.newTransformer();
outputTrax.transform(new StreamSource(is), new DOMResult(output));
String actual = output.getDocumentElement().getTextContent();
Assert.assertEquals(textValue, actual);
}
}
---------- END SOURCE ----------
CUSTOMER SUBMITTED WORKAROUND :
There are plenty of workarounds:
1. Omit the XML declaration and a a custom one.
2. Use the original Xalan, which unfortunately lacks support for StAX Source's and Result's.
IMHO adding a check to com.sun.org.apache.xalan.internal.xsltc.trax.DOM2TO.setEncoding(), whether the SerializationHandler has already an encoding set should be enough to eliminate this bug.
FREQUENCY : always