-
Bug
-
Resolution: Fixed
-
P5
-
1.1
-
beta
-
generic
-
generic
Name: bsT130419 Date: 09/19/2001
java version "1.3.0_02"
Java(TM) 2 Runtime Environment, Standard Edition (build 1.3.0_02)
Java HotSpot(TM) Client VM (build 1.3.0_02, mixed mode)
The problem occurs when trying to use a transformer to work on latin-greek
texts with its OutputKeys.ENCODING property set to "ISO-8859-7" or "windows-
1253".
For instance, consider the following program:
import java.io.*;
import java.util.*;
import javax.xml.parsers.*;
import org.xml.sax.*;
import org.w3c.dom.*;
import javax.xml.transform.*;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
/**
* XmlTest converts to xml the file passed in
* the command line.
* Example: XmlTest example.txt example
*/
public class XmlTest
{
/**
* Document instance to hold the xml tree.
*/
private Document document;
private String fileName;
private String resultName;
private BufferedReader br;
public static void main(String argv[])
{
if (argv.length == 2) {
XmlTest xmlTest = new XmlTest();
xmlTest.resultName = argv[1];
try {
xmlTest.br = new BufferedReader(new FileReader(argv[0]));
} catch (java.io.FileNotFoundException fne) {
System.err.println(fne.getMessage());
System.exit(1);
}
xmlTest.buildDom();
xmlTest.outputDom();
return;
}
else
System.err.println("Usage: java -classpath <classpath> XmlTest
<file name> <result name>");
} // main
/**
* buildDom() reads the file passed in the command line and puts the
results in a DOM tree.
*/
public void buildDom()
{
System.err.println("Building DOM...");
DocumentBuilderFactory factory =
DocumentBuilderFactory.newInstance();
DocumentBuilder builder = null;
try {
builder = factory.newDocumentBuilder();
} catch (ParserConfigurationException pce) {
// Parser with specified options can't be built
pce.printStackTrace();
}
document = builder.newDocument(); // Create from scratch
Element root =
(Element) document.createElement("table");
document.appendChild(root);
// Fill the xml tree
String line;
try {
while ((line = br.readLine()) != null) {
// For each row, loop through each column and build
// the xml structure
Element row = document.createElement(resultName); // make
<row> ... </row> element for each row
root.appendChild(row);
StringTokenizer st = new StringTokenizer(line, "|");
for (int i = 0; st.hasMoreTokens(); i++) {
Element column = document.createElement("column" + i); //
make <column> ... </column> element for each column
row.appendChild(column);
Text columnData = document.createTextNode(st.nextToken
().trim()); // make text node for the data in row, column
column.appendChild(columnData);
}
}
} catch (java.io.IOException ioe) {
System.err.println(ioe.getMessage());
System.exit(1);
}
} // buildDom
/**
* Outputs a DOM tree to the standard output.
*/
public void outputDom()
{
System.err.println("Outputting DOM...");
// Use a transformer for output
TransformerFactory tFactory =
TransformerFactory.newInstance();
Transformer transformer = null;
try {
transformer = tFactory.newTransformer();
transformer.setOutputProperty(OutputKeys.ENCODING, "ISO-8859-1");
transformer.setOutputProperty(OutputKeys.INDENT, "yes");
} catch(TransformerConfigurationException tce) {
// Error generated by the parser
System.err.println("\n** Transformer Factory error");
System.err.println(" " + tce.getMessage());
// Use the contained exception, if any
Throwable x = tce;
if (tce.getException() != null)
x = tce.getException();
x.printStackTrace();
}
DOMSource source = new DOMSource(document);
// StreamResult result = new StreamResult(out);
try {
transformer.transform(source, new StreamResult(System.out));
} catch (TransformerException te) {
// Error generated by the parser
System.err.println("\n** Transformation error");
System.err.println(" " + te.getMessage());
// Use the contained exception, if any
Throwable x = te;
if (te.getException() != null)
x = te.getException();
x.printStackTrace();
}
}
}
When this program is run on the file example.txt, comprised of latin-greek
characters:
| 0|????? |????? |
| 1|??? |????? |
| 2|??? |??????? |
| 3|???? |????? |
| 4|??????? |??????? |
| 5|????? |?????? |
| 6|??? |???? |
| 7|???? |?????? |
| 8|???? |????? |
| 9|????? |?????? |
| 10|???? |?????? |
| 11|?????? |???????? |
| 12|?????? |???????? |
| 13|???????? |??????????? |
| 14|??????????? |????????????? |
| 15|????????? |???????????? |
| 16|??????? |?????????? |
| 17|???????? |???????????? |
| 18|???????? |??????????? |
| 19|????????? |???????????? |
| 20|?????? |??????? |
| 21|?????? ??? |??????? ????? |
| 22|?????? ??? |??????? ??????? |
| 23|?????? ???? |??????? ????? |
| 24|?????? ??????? |??????? ??????? |
| 25|?????? ????? |??????? ?????? |
| 26|?????? ??? |??????? ???? |
| 27|?????? ???? |??????? ?????? |
| 28|?????? ???? |??????? ????? |
| 29|?????? ????? |??????? ?????? |
| 30|??????? |????????? |
| 31|??????? ??? |????????? ????? |
| 32|??????? ??? |????????? ??????? |
| 33|??????? ???? |????????? ????? |
| 34|??????? ????????|????????? ??????? |
| 35|??????? ????? |????????? ?????? |
| 36|??????? ??? |????????? ???? |
| 37|??????? ???? |????????? ?????? |
| 38|??????? ???? |????????? ????? |
| 39|??????? ????? |????????? ?????? |
| 1000|????? |???????? |
it will work properly only if the transformer is set with
transformer.setOutputProperty(OutputKeys.ENCODING, "ISO-8859-1");
which is not the correct encoding for the document.
If the transformer is set with
transformer.setOutputProperty(OutputKeys.ENCODING, "ISO-8859-7"); // latin-greek
or
transformer.setOutputProperty(OutputKeys.ENCODING, "windows-1253"); // windows-
greek
the output is problematic. In the case of ISO-8859-7 it produces question marks
in lieu of all greek characters. In the case of windows-1253 it produces
character entities in lieu of all greek characters.
(Review ID: 131458)
======================================================================