Uploaded image for project: 'JDK'
  1. JDK
  2. JDK-8237456

Transform filtered through SAX filter mishandles character entities

XMLWordPrintable

      A DESCRIPTION OF THE PROBLEM :
      Character entities appear to bypass any ContentHandler events on the way to the output.
      Attached is a simple test where a simple subclass of org.xml.sax.helpers.XMLFilterImpl is chained to an org.xml.sax.XMLReader wrapped in a javax.xml.transform.sax.SAXSource and passed to a javax.xml.transform.Transformer.transform call.

      The filter class should emit only a portion of the XML (the first thing2 element and its subtree). However character entities in other parts of the document are also emitted. This does not happen running with Java 8. It also does not happen if Xalan is substituted for Java's built in transform implementation.

      STEPS TO FOLLOW TO REPRODUCE THE PROBLEM :
      Compile and run the provided source.

      EXPECTED VERSUS ACTUAL BEHAVIOR :
      EXPECTED -
      The output should be:

      SAXParserFactory class: com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl
      XmlReader class: com.sun.org.apache.xerces.internal.jaxp.SAXParserImpl$JAXPSAXParser
      TransformerFactory class: com.sun.org.apache.xalan.internal.xsltc.trax.TransformerFactoryImpl
      <?xml version="1.0" encoding="UTF-8"?><thing2 xmlns="things">
                      <name>The Other</name>
              </thing2>
      ACTUAL -
      Actual output (note the occurance of the amp entity just before the root element):

      SAXParserFactory class: com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl
      XmlReader class: com.sun.org.apache.xerces.internal.jaxp.SAXParserImpl$JAXPSAXParser
      TransformerFactory class: com.sun.org.apache.xalan.internal.xsltc.trax.TransformerFactoryImpl
      <?xml version="1.0" encoding="UTF-8"?>&amp;<thing2 xmlns="things">
                      <name>The Other</name>
              </thing2>

      ---------- BEGIN SOURCE ----------
      TestCase1.java:

      package com.wrycan.testcase;

      import javax.xml.parsers.ParserConfigurationException;
      import javax.xml.parsers.SAXParser;
      import javax.xml.parsers.SAXParserFactory;
      import javax.xml.transform.OutputKeys;
      import javax.xml.transform.Transformer;
      import javax.xml.transform.TransformerFactory;
      import javax.xml.transform.sax.SAXSource;
      import javax.xml.transform.stream.StreamResult;
      import javax.xml.transform.TransformerFactoryConfigurationError;
      import javax.xml.transform.TransformerConfigurationException;
      import javax.xml.transform.TransformerException;

      import org.xml.sax.InputSource;
      import org.xml.sax.SAXException;
      import org.xml.sax.XMLReader;
      import org.xml.sax.XMLFilter;


      public class TestCase1 {
          public static void main(String[] args) {
              TestCase1 testCase = new TestCase1();
              XMLReader reader = testCase.getParser();

              final TransformerFactory transformerFactory = TransformerFactory.newInstance();
              System.out.println("TransformerFactory class: " + transformerFactory.getClass().getName());

              Transformer t;
              try {
                  t = transformerFactory.newTransformer();
              } catch(TransformerConfigurationException e){
                  e.printStackTrace();
                  return;
              }

              XMLFilter filter = new Thing2Filter(reader);

              try {
                  t.transform(new SAXSource(filter, new InputSource("things.xml")),
                  new StreamResult(System.out));
              } catch(TransformerException e) {
                  e.printStackTrace();
              }
          }

          private XMLReader getParser() {
              XMLReader reader = null;
              try {
                  SAXParserFactory factory = SAXParserFactory.newInstance();

                  String saxParserFactoryClassName = factory.getClass().getName();
                  System.out.println("SAXParserFactory class: " + saxParserFactoryClassName);

                  factory.setValidating(false);
                  factory.setNamespaceAware(true);

                  SAXParser parser = factory.newSAXParser();
                  reader = parser.getXMLReader();
                  System.out.println("XmlReader class: " + reader.getClass().getName());
              }
              catch (ParserConfigurationException pce)
              {
                  pce.printStackTrace();
              }
              catch (SAXException se)
              {
                  se.printStackTrace();
              }
              return reader;
          }

      }

      Thing2Filter.java

      package com.wrycan.testcase;


      import java.util.ArrayList;
      import java.util.List;

      import org.xml.sax.Attributes;
      import org.xml.sax.SAXException;
      import org.xml.sax.XMLReader;
      import org.xml.sax.helpers.XMLFilterImpl;

      import java.io.IOException;
      import org.xml.sax.InputSource;



      public class Thing2Filter extends XMLFilterImpl{

          private boolean inMatch = false;
          private int elementLocator;
          private boolean doneMatching = false;

          public Thing2Filter(XMLReader parent){
              super(parent);
          }

          @Override
          public void startDocument() throws SAXException{
              doneMatching = false;
              super.startDocument();
          }

          @Override
          public void startElement(String namespaceURI, String localName, String qName, Attributes attrs) throws SAXException
          {
              if (localName.equals("thing2") && !doneMatching) { // start matching when the first thing2 is hit
                  inMatch = true;
              }

              if(inMatch) {
                  super.startElement(namespaceURI, localName, qName, attrs);
              }
          }

          @Override
          public void endElement(String namespaceURI, String localName, String qName) throws SAXException
          {
              if(inMatch) {
                  super.endElement(namespaceURI, localName, qName);
              }
              if (localName.equals("thing2")) { // match is over once end of first thing2 is hit
                  inMatch = false;
                  doneMatching = true;
              }
          }

          @Override
          public void characters(char[] ch, int start, int length) throws SAXException {
              if(inMatch) {
                  super.characters(ch, start, length);
              }
          }

          @Override
           public void startPrefixMapping(java.lang.String prefix, java.lang.String uri) throws SAXException{
               super.startPrefixMapping(prefix, uri);
           }

          @Override
           public void endPrefixMapping(java.lang.String prefix) throws SAXException{
               super.endPrefixMapping(prefix);
           }

      }

      Test XML file for input, things.xml:

      <?xml version="1.0" encoding="UTF-8"?>
      <thing1 xmlns="things">
      <name>This &amp; That</name>
      <thing2>
      <name>The Other</name>
      </thing2>
      <thing2>
      <name>Whatever</name>
      </thing2>
      </thing1>
      ---------- END SOURCE ----------

      FREQUENCY : always


        1. things.xml
          0.2 kB
        2. Thing2Filter.java
          2 kB
        3. TestCase1.java
          2 kB

            joehw Joe Wang
            webbuggrp Webbug Group
            Votes:
            0 Vote for this issue
            Watchers:
            5 Start watching this issue

              Created:
              Updated:
              Resolved: