-
Bug
-
Resolution: Not an Issue
-
P3
-
6
-
x86
-
windows_xp
FULL PRODUCT VERSION :
java version "1.6.0-beta"
Java(TM) 2 Runtime Environment, Standard Edition (build 1.6.0-beta-b59g)
Java HotSpot(TM) Client VM (build 1.6.0-beta-b59g, mixed mode, sharing)
ADDITIONAL OS VERSION INFORMATION :
Microsoft Windows XP [versie 5.1.2600]
A DESCRIPTION OF THE PROBLEM :
When setting the output method to "html" and declaring the output encoding to be "utf-8", the resulting document contains named entities for characters not in the ASCII-range.
The XSLT specification is clear on this one and says that ONLY when the chosen encoding lacks the character, a numeric or named entity reference should be written to the output. (http://www.w3.org/TR/xslt-xquery-serialization/#HTML_ENCODING)
STEPS TO FOLLOW TO REPRODUCE THE PROBLEM :
Run the included code. It writes a document to System.out that contains one paragraph with the characters "é à ù".
EXPECTED VERSUS ACTUAL BEHAVIOR :
EXPECTED -
A HTML document with a paragraph: <p>é à ù</p>
ACTUAL -
A HTML document with a paragraph: <p>é à ù</p>
REPRODUCIBILITY :
This bug can be reproduced always.
---------- BEGIN SOURCE ----------
import java.io.StringWriter;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.Result;
import javax.xml.transform.Source;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import org.w3c.dom.DOMImplementation;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
public class EntitiesOutput {
public static void main(String[] args) {
new EntitiesOutput();
}
public EntitiesOutput(){
try {
DocumentBuilderFactory fac = DocumentBuilderFactory.newInstance();
DocumentBuilder build = fac.newDocumentBuilder();
DOMImplementation impl = build.getDOMImplementation();
Document doc=impl.createDocument(null,"html",null);
Element head = doc.createElement("head");
doc.getDocumentElement().appendChild(head);
Element title = doc.createElement("title");
title.appendChild(doc.createTextNode("HTML output"));
head.appendChild(title);
Element body = doc.createElement("body");
doc.getDocumentElement().appendChild(body);
Element p = doc.createElement("p");
p.appendChild(doc.createTextNode("é à ù"));
body.appendChild(p);
StringWriter writer = new StringWriter();
Source xmlSource = new DOMSource(doc);
Source xsltSource = new DOMSource(this.getHTMLTransform(impl));
Result result = new StreamResult(writer);
TransformerFactory transFact = TransformerFactory.newInstance( );
Transformer trans = transFact.newTransformer(xsltSource);
trans.transform(xmlSource, result);
System.out.println(writer.toString());
} catch (Exception e){
e.printStackTrace();
}
}
private Document getHTMLTransform(DOMImplementation impl){
String xslt = "http://www.w3.org/1999/XSL/Transform";
Document doc = impl.createDocument(xslt,"stylesheet",null);
doc.getDocumentElement().setAttribute("version","2.0");
Element output = doc.createElementNS(xslt,"output");
output.setAttribute("method","html");
output.setAttribute("version","4.0");
output.setAttribute("encoding","UTF-8");
output.setAttribute("doctype-public","-//W3C//DTD HTML 4.01 Strict//EN");
output.setAttribute("doctype-system","http://www.w3.org/TR/html4/strict.dtd");
doc.getDocumentElement().appendChild(output);
Element template = doc.createElementNS(xslt,"template");
template.setAttribute("match","/");
doc.getDocumentElement().appendChild(template);
Element copyof = doc.createElementNS(xslt,"copy-of");
copyof.setAttribute("select",".");
template.appendChild(copyof);
return doc;
}
}
---------- END SOURCE ----------
java version "1.6.0-beta"
Java(TM) 2 Runtime Environment, Standard Edition (build 1.6.0-beta-b59g)
Java HotSpot(TM) Client VM (build 1.6.0-beta-b59g, mixed mode, sharing)
ADDITIONAL OS VERSION INFORMATION :
Microsoft Windows XP [versie 5.1.2600]
A DESCRIPTION OF THE PROBLEM :
When setting the output method to "html" and declaring the output encoding to be "utf-8", the resulting document contains named entities for characters not in the ASCII-range.
The XSLT specification is clear on this one and says that ONLY when the chosen encoding lacks the character, a numeric or named entity reference should be written to the output. (http://www.w3.org/TR/xslt-xquery-serialization/#HTML_ENCODING)
STEPS TO FOLLOW TO REPRODUCE THE PROBLEM :
Run the included code. It writes a document to System.out that contains one paragraph with the characters "é à ù".
EXPECTED VERSUS ACTUAL BEHAVIOR :
EXPECTED -
A HTML document with a paragraph: <p>é à ù</p>
ACTUAL -
A HTML document with a paragraph: <p>é à ù</p>
REPRODUCIBILITY :
This bug can be reproduced always.
---------- BEGIN SOURCE ----------
import java.io.StringWriter;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.Result;
import javax.xml.transform.Source;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import org.w3c.dom.DOMImplementation;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
public class EntitiesOutput {
public static void main(String[] args) {
new EntitiesOutput();
}
public EntitiesOutput(){
try {
DocumentBuilderFactory fac = DocumentBuilderFactory.newInstance();
DocumentBuilder build = fac.newDocumentBuilder();
DOMImplementation impl = build.getDOMImplementation();
Document doc=impl.createDocument(null,"html",null);
Element head = doc.createElement("head");
doc.getDocumentElement().appendChild(head);
Element title = doc.createElement("title");
title.appendChild(doc.createTextNode("HTML output"));
head.appendChild(title);
Element body = doc.createElement("body");
doc.getDocumentElement().appendChild(body);
Element p = doc.createElement("p");
p.appendChild(doc.createTextNode("é à ù"));
body.appendChild(p);
StringWriter writer = new StringWriter();
Source xmlSource = new DOMSource(doc);
Source xsltSource = new DOMSource(this.getHTMLTransform(impl));
Result result = new StreamResult(writer);
TransformerFactory transFact = TransformerFactory.newInstance( );
Transformer trans = transFact.newTransformer(xsltSource);
trans.transform(xmlSource, result);
System.out.println(writer.toString());
} catch (Exception e){
e.printStackTrace();
}
}
private Document getHTMLTransform(DOMImplementation impl){
String xslt = "http://www.w3.org/1999/XSL/Transform";
Document doc = impl.createDocument(xslt,"stylesheet",null);
doc.getDocumentElement().setAttribute("version","2.0");
Element output = doc.createElementNS(xslt,"output");
output.setAttribute("method","html");
output.setAttribute("version","4.0");
output.setAttribute("encoding","UTF-8");
output.setAttribute("doctype-public","-//W3C//DTD HTML 4.01 Strict//EN");
output.setAttribute("doctype-system","http://www.w3.org/TR/html4/strict.dtd");
doc.getDocumentElement().appendChild(output);
Element template = doc.createElementNS(xslt,"template");
template.setAttribute("match","/");
doc.getDocumentElement().appendChild(template);
Element copyof = doc.createElementNS(xslt,"copy-of");
copyof.setAttribute("select",".");
template.appendChild(copyof);
return doc;
}
}
---------- END SOURCE ----------