Uploaded image for project: 'JDK'
  1. JDK
  2. JDK-4504745

javax.xml.transform.Transformer encoding does not work properly

XMLWordPrintable

    • Icon: Bug Bug
    • Resolution: Fixed
    • Icon: P5 P5
    • 5.0
    • 1.1
    • xml



      Name: bsT130419 Date: 09/19/2001


      java version "1.3.0_02"
      Java(TM) 2 Runtime Environment, Standard Edition (build 1.3.0_02)
      Java HotSpot(TM) Client VM (build 1.3.0_02, mixed mode)

      The problem occurs when trying to use a transformer to work on latin-greek
      texts with its OutputKeys.ENCODING property set to "ISO-8859-7" or "windows-
      1253".

      For instance, consider the following program:

      import java.io.*;
      import java.util.*;

      import javax.xml.parsers.*;
      import org.xml.sax.*;
      import org.w3c.dom.*;

      import javax.xml.transform.*;

      import javax.xml.transform.dom.DOMSource;

      import javax.xml.transform.stream.StreamResult;

      /**
       * XmlTest converts to xml the file passed in
       * the command line.
       * Example: XmlTest example.txt example
       */
      public class XmlTest
      {
          /**
           * Document instance to hold the xml tree.
           */
          private Document document;
          private String fileName;
          private String resultName;
          private BufferedReader br;
          
          public static void main(String argv[])
          {
              if (argv.length == 2) {
                  XmlTest xmlTest = new XmlTest();
                  xmlTest.resultName = argv[1];
                  
                  try {
                      xmlTest.br = new BufferedReader(new FileReader(argv[0]));
                  } catch (java.io.FileNotFoundException fne) {
                      System.err.println(fne.getMessage());
                      System.exit(1);
                  }
                  
                  xmlTest.buildDom();
      xmlTest.outputDom();
      return;
      }
      else
      System.err.println("Usage: java -classpath <classpath> XmlTest
      <file name> <result name>");
          } // main


          /**
           * buildDom() reads the file passed in the command line and puts the
      results in a DOM tree.
           */
          public void buildDom()
          {
              
              System.err.println("Building DOM...");
              DocumentBuilderFactory factory =
                 DocumentBuilderFactory.newInstance();
              DocumentBuilder builder = null;
              try {
                builder = factory.newDocumentBuilder();
              } catch (ParserConfigurationException pce) {
                  // Parser with specified options can't be built
                  pce.printStackTrace();
              }
              
              document = builder.newDocument(); // Create from scratch

              Element root =
                  (Element) document.createElement("table");
              document.appendChild(root);
              
              // Fill the xml tree
              String line;
              try {
                  while ((line = br.readLine()) != null) {
                      // For each row, loop through each column and build
                      // the xml structure
                      Element row = document.createElement(resultName); // make
      <row> ... </row> element for each row
                      root.appendChild(row);
                      StringTokenizer st = new StringTokenizer(line, "|");
                      for (int i = 0; st.hasMoreTokens(); i++) {
                          Element column = document.createElement("column" + i); //
      make <column> ... </column> element for each column
                          row.appendChild(column);
                          Text columnData = document.createTextNode(st.nextToken
      ().trim()); // make text node for the data in row, column
                          column.appendChild(columnData);
                      }
                  }
              } catch (java.io.IOException ioe) {
                  System.err.println(ioe.getMessage());
                  System.exit(1);
              }

          } // buildDom

          /**
           * Outputs a DOM tree to the standard output.
           */
          public void outputDom()
          {
              System.err.println("Outputting DOM...");
              // Use a transformer for output
              TransformerFactory tFactory =
                  TransformerFactory.newInstance();
              Transformer transformer = null;
              try {
                  transformer = tFactory.newTransformer();
                  transformer.setOutputProperty(OutputKeys.ENCODING, "ISO-8859-1");
                  transformer.setOutputProperty(OutputKeys.INDENT, "yes");
              } catch(TransformerConfigurationException tce) {
                  // Error generated by the parser
                  System.err.println("\n** Transformer Factory error");
                  System.err.println(" " + tce.getMessage());
                  
                  // Use the contained exception, if any
                  Throwable x = tce;
                  if (tce.getException() != null)
                  x = tce.getException();
                  x.printStackTrace();
              }

              
              DOMSource source = new DOMSource(document);
              // StreamResult result = new StreamResult(out);
              try {
                  transformer.transform(source, new StreamResult(System.out));
              } catch (TransformerException te) {
                  // Error generated by the parser
                  System.err.println("\n** Transformation error");
                  System.err.println(" " + te.getMessage());
                  
                  // Use the contained exception, if any
                  Throwable x = te;
                  if (te.getException() != null)
                      x = te.getException();
                  x.printStackTrace();
              }
          }
          
      }

      When this program is run on the file example.txt, comprised of latin-greek
      characters:

      | 0|????? |????? |
      | 1|??? |????? |
      | 2|??? |??????? |
      | 3|???? |????? |
      | 4|??????? |??????? |
      | 5|????? |?????? |
      | 6|??? |???? |
      | 7|???? |?????? |
      | 8|???? |????? |
      | 9|????? |?????? |
      | 10|???? |?????? |
      | 11|?????? |???????? |
      | 12|?????? |???????? |
      | 13|???????? |??????????? |
      | 14|??????????? |????????????? |
      | 15|????????? |???????????? |
      | 16|??????? |?????????? |
      | 17|???????? |???????????? |
      | 18|???????? |??????????? |
      | 19|????????? |???????????? |
      | 20|?????? |??????? |
      | 21|?????? ??? |??????? ????? |
      | 22|?????? ??? |??????? ??????? |
      | 23|?????? ???? |??????? ????? |
      | 24|?????? ??????? |??????? ??????? |
      | 25|?????? ????? |??????? ?????? |
      | 26|?????? ??? |??????? ???? |
      | 27|?????? ???? |??????? ?????? |
      | 28|?????? ???? |??????? ????? |
      | 29|?????? ????? |??????? ?????? |
      | 30|??????? |????????? |
      | 31|??????? ??? |????????? ????? |
      | 32|??????? ??? |????????? ??????? |
      | 33|??????? ???? |????????? ????? |
      | 34|??????? ????????|????????? ??????? |
      | 35|??????? ????? |????????? ?????? |
      | 36|??????? ??? |????????? ???? |
      | 37|??????? ???? |????????? ?????? |
      | 38|??????? ???? |????????? ????? |
      | 39|??????? ????? |????????? ?????? |
      | 1000|????? |???????? |

      it will work properly only if the transformer is set with
      transformer.setOutputProperty(OutputKeys.ENCODING, "ISO-8859-1");
      which is not the correct encoding for the document.

      If the transformer is set with
      transformer.setOutputProperty(OutputKeys.ENCODING, "ISO-8859-7"); // latin-greek

      or
      transformer.setOutputProperty(OutputKeys.ENCODING, "windows-1253"); // windows-
      greek

      the output is problematic. In the case of ISO-8859-7 it produces question marks
      in lieu of all greek characters. In the case of windows-1253 it produces
      character entities in lieu of all greek characters.
      (Review ID: 131458)
      ======================================================================

            duke J. Duke
            bstrathesunw Bill Strathearn (Inactive)
            Votes:
            0 Vote for this issue
            Watchers:
            0 Start watching this issue

              Created:
              Updated:
              Resolved:
              Imported:
              Indexed: