Uploaded image for project: 'JDK'
  1. JDK
  2. JDK-8028616

Htmleditorkit parser doesn't handle leading slash (/)

XMLWordPrintable


        Issue with Java version 6u41

        Using HTMLEditorKit to parse pre-existing HTML. If we parse this
        document, it works as expected:
        <body>/ at start inside body is okay</body>

        But if we don't have the <body> tag, then we get only a handleEndOfLineString
        callback, and do not get the actual text. This is the example document:
        / at start is bad

        Testcase:
        import javax.swing.text.MutableAttributeSet;
        import javax.swing.text.html.HTML;
        import javax.swing.text.html.HTMLDocument;
        import javax.swing.text.html.HTMLEditorKit;
        import javax.swing.text.html.parser.DTD;
        import javax.swing.text.html.parser.DocumentParser;
        import javax.swing.text.html.parser.Entity;
        import javax.swing.text.html.parser.ParserDelegator;
        import java.io.IOException;
        import java.io.Reader;
        import java.io.StringReader;

        /**
         * Bug in Parser when the document starts with a slash.
         */
        public class ParserTest {
            public static void main(String[] args) throws IOException {

                doTest( "/ at start is bad" );
                doTest( "<body>/ at start inside body is okay</body>" );
            }

            private static void doTest(String text) throws IOException {
                System.out.println( "doTest: " + text );
                ParserCB cb = new ParserCB();
                HTMLEditorKit htmlKit = new HTMLEditorKit();
                HTMLDocument htmlDoc = (HTMLDocument)
        htmlKit.createDefaultDocument();

                htmlDoc.getParser().parse(new StringReader(text), cb, true);
                System.out.println();
            }

            private static class ParserCB extends HTMLEditorKit.ParserCallback {
                @Override
                public void handleComment(char[] data, int pos) {
                    System.out.println("handleComment: " + new String(data) );
                }

                @Override
                public void handleStartTag(HTML.Tag t, MutableAttributeSet a, int
        pos) {
                    System.out.println("handleStartTag: " + t );
                }

                @Override
                public void handleEndTag(HTML.Tag t, int pos) {
                    System.out.println("handleEndTag: " + t );
                }

                @Override
                public void handleSimpleTag(HTML.Tag t, MutableAttributeSet a, int
        pos) {
                    System.out.println("handleSimpleTag: " + t );
                }

                @Override
                public void handleError(String errorMsg, int pos) {
                    System.out.println("handleError: " + errorMsg );
                }

                @Override
                public void handleEndOfLineString(String eol) {
                    System.out.println("handleEndOfLineString: " + eol );
                }

                @Override
                public void handleText(char[] data, int pos) {
                    System.out.println("handleText: " + new String(data));
                }
            }
        }

        Output from the test case:
        doTest: / at start is bad
        handleEndOfLineString:


        doTest: <body>/ at start inside body is okay</body>
        handleStartTag: html
        handleStartTag: head
        handleEndTag: head
        handleStartTag: body
        handleText: / at start inside body is okay
        handleEndTag: body
        handleEndTag: html
        handleEndOfLineString:

              dmarkov Dmitry Markov
              asaha Abhijit Saha
              Votes:
              0 Vote for this issue
              Watchers:
              6 Start watching this issue

                Created:
                Updated:
                Resolved: