-
Bug
-
Resolution: Not an Issue
-
P4
-
6
-
x86
-
windows_xp
FULL PRODUCT VERSION :
java version "1.6.0-rc"
Java(TM) 2 Runtime Environment, Standard Edition (build 1.6.0-rc-b63) Java HotSpot(TM) Client VM (build 1.6.0-rc-b63, mixed mode)
ADDITIONAL OS VERSION INFORMATION :
Microsoft Windows XP [Version 5.1.2600]
A DESCRIPTION OF THE PROBLEM :
With the latest drop of Mustang, we conducted some performance comparisons with
regards to its performance relative to the default SAX parser of Sun's JDK 1.5.
We understand that performance comparisons between two different XML parser APIs
is not trivial.
This test program is designed design so that the program retrieves the same data (local names, prefixes, character content...), so that the comparison is as fair.
The comparison is done by parsing different input file sizes. The actual XML input files are
those generated by XMark (http://monetdb.cwi.nl/xml/). Each file was parsed a
number of times (500) using SAX and using sjsxp.
Timing results are:
xmark_0.00001.xml - sjsxp: 661 ms
xmark_0.00001.xml - SAX: 571 ms
xmark_0.0001.xml - sjsxp: 771 ms
xmark_0.0001.xml - SAX: 691 ms
xmark_0.001.xml - sjsxp: 2534 ms
xmark_0.001.xml - SAX: 1912 ms
xmark_0.01.xml - sjsxp: 24970 ms
xmark_0.01.xml - SAX: 18491 ms
The conclusion conclusion is that there's a general overhead in sjsxp that grows as the XML input file grows. It would have thought that the parser would perform as equal
as the SAX parser as both are Xerces based. Is this performance issue known or
at least explainable?
STEPS TO FOLLOW TO REPRODUCE THE PROBLEM :
Run source code and see Description
EXPECTED VERSUS ACTUAL BEHAVIOR :
EXPECTED -
Relatively similar behavior between two parsers.
ACTUAL -
See description
REPRODUCIBILITY :
This bug can be reproduced always.
---------- BEGIN SOURCE ----------
import java.io.File;
import java.io.FileInputStream;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLStreamReader;
import org.xml.sax.helpers.DefaultHandler;
public class XmlParserPerformance {
private final int nrOfRuns = 500;
private final String[] xmlFileNames = new String[] {
"xml/xmark_0.00001.xml"
, "xml/xmark_0.0001.xml"
, "xml/xmark_0.001.xml"
, "xml/xmark_0.01.xml"
, "xml/xmark_0.1.xml"
, "xml/xmark_1.xml"
};
private File xmlFile;
private void go() throws Exception {
// Init
initSTAX();
initSAX();
// Warm up run
xmlFile = new File(xmlFileNames[0]);
testSTAX();
testSAX();
// Actual timings
System.out.println("===== Start timings =====");
for (int fileIx = 0; fileIx < xmlFileNames.length; fileIx++) {
xmlFile = new File(xmlFileNames[fileIx]);
// STAX
testSTAX();
// SAX
testSAX();
}
}
// -------------------- STAX --------------------
private XMLInputFactory xmlInputFactory;
private void initSTAX() throws Exception {
xmlInputFactory = XMLInputFactory.newInstance();
}
private void testSTAX() throws Exception {
long begin = System.currentTimeMillis(), end = 0;
for (int i = 0; i < nrOfRuns; i++) {
XMLStreamReader xsr = xmlInputFactory.createXMLStreamReader(new
FileInputStream(xmlFile));
while (xsr.hasNext()) {
int event = xsr.next();
xsr.getEventType();
switch (event) {
case XMLStreamReader.START_ELEMENT:
xsr.getLocalName();
xsr.getPrefix();
xsr.getNamespaceURI();
xsr.getAttributeCount();
xsr.getNamespaceCount();
break;
case XMLStreamReader.END_ELEMENT:
xsr.getLocalName();
xsr.getPrefix();
xsr.getNamespaceURI();
break;
case XMLStreamReader.CHARACTERS:
case XMLStreamReader.SPACE:
xsr.getText();
break;
default:
break;
}
}
}
end = System.currentTimeMillis();
System.out.println(xmlFile.getName() + " - sjsxp: " + (end - begin) + " ms");
}
// -------------------- SAX --------------------
private SAXParserFactory saxParserFactory;
private SAXParser saxParser;
private void initSAX() throws Exception {
saxParserFactory = SAXParserFactory.newInstance();
saxParser = saxParserFactory.newSAXParser();
}
private void testSAX() throws Exception {
long begin = System.currentTimeMillis(), end = 0;
for (int i = 0; i < nrOfRuns; i++) {
saxParser.parse(xmlFile, new DefaultHandler());
}
end = System.currentTimeMillis();
System.out.println(xmlFile.getName() + " - SAX: " + (end - begin) + " ms");
}
public static void main(String[] args) {
try {
new XmlParserPerformance().go();
} catch (Exception e) {
e.printStackTrace();
}
}
}
---------- END SOURCE ----------
java version "1.6.0-rc"
Java(TM) 2 Runtime Environment, Standard Edition (build 1.6.0-rc-b63) Java HotSpot(TM) Client VM (build 1.6.0-rc-b63, mixed mode)
ADDITIONAL OS VERSION INFORMATION :
Microsoft Windows XP [Version 5.1.2600]
A DESCRIPTION OF THE PROBLEM :
With the latest drop of Mustang, we conducted some performance comparisons with
regards to its performance relative to the default SAX parser of Sun's JDK 1.5.
We understand that performance comparisons between two different XML parser APIs
is not trivial.
This test program is designed design so that the program retrieves the same data (local names, prefixes, character content...), so that the comparison is as fair.
The comparison is done by parsing different input file sizes. The actual XML input files are
those generated by XMark (http://monetdb.cwi.nl/xml/). Each file was parsed a
number of times (500) using SAX and using sjsxp.
Timing results are:
xmark_0.00001.xml - sjsxp: 661 ms
xmark_0.00001.xml - SAX: 571 ms
xmark_0.0001.xml - sjsxp: 771 ms
xmark_0.0001.xml - SAX: 691 ms
xmark_0.001.xml - sjsxp: 2534 ms
xmark_0.001.xml - SAX: 1912 ms
xmark_0.01.xml - sjsxp: 24970 ms
xmark_0.01.xml - SAX: 18491 ms
The conclusion conclusion is that there's a general overhead in sjsxp that grows as the XML input file grows. It would have thought that the parser would perform as equal
as the SAX parser as both are Xerces based. Is this performance issue known or
at least explainable?
STEPS TO FOLLOW TO REPRODUCE THE PROBLEM :
Run source code and see Description
EXPECTED VERSUS ACTUAL BEHAVIOR :
EXPECTED -
Relatively similar behavior between two parsers.
ACTUAL -
See description
REPRODUCIBILITY :
This bug can be reproduced always.
---------- BEGIN SOURCE ----------
import java.io.File;
import java.io.FileInputStream;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLStreamReader;
import org.xml.sax.helpers.DefaultHandler;
public class XmlParserPerformance {
private final int nrOfRuns = 500;
private final String[] xmlFileNames = new String[] {
"xml/xmark_0.00001.xml"
, "xml/xmark_0.0001.xml"
, "xml/xmark_0.001.xml"
, "xml/xmark_0.01.xml"
, "xml/xmark_0.1.xml"
, "xml/xmark_1.xml"
};
private File xmlFile;
private void go() throws Exception {
// Init
initSTAX();
initSAX();
// Warm up run
xmlFile = new File(xmlFileNames[0]);
testSTAX();
testSAX();
// Actual timings
System.out.println("===== Start timings =====");
for (int fileIx = 0; fileIx < xmlFileNames.length; fileIx++) {
xmlFile = new File(xmlFileNames[fileIx]);
// STAX
testSTAX();
// SAX
testSAX();
}
}
// -------------------- STAX --------------------
private XMLInputFactory xmlInputFactory;
private void initSTAX() throws Exception {
xmlInputFactory = XMLInputFactory.newInstance();
}
private void testSTAX() throws Exception {
long begin = System.currentTimeMillis(), end = 0;
for (int i = 0; i < nrOfRuns; i++) {
XMLStreamReader xsr = xmlInputFactory.createXMLStreamReader(new
FileInputStream(xmlFile));
while (xsr.hasNext()) {
int event = xsr.next();
xsr.getEventType();
switch (event) {
case XMLStreamReader.START_ELEMENT:
xsr.getLocalName();
xsr.getPrefix();
xsr.getNamespaceURI();
xsr.getAttributeCount();
xsr.getNamespaceCount();
break;
case XMLStreamReader.END_ELEMENT:
xsr.getLocalName();
xsr.getPrefix();
xsr.getNamespaceURI();
break;
case XMLStreamReader.CHARACTERS:
case XMLStreamReader.SPACE:
xsr.getText();
break;
default:
break;
}
}
}
end = System.currentTimeMillis();
System.out.println(xmlFile.getName() + " - sjsxp: " + (end - begin) + " ms");
}
// -------------------- SAX --------------------
private SAXParserFactory saxParserFactory;
private SAXParser saxParser;
private void initSAX() throws Exception {
saxParserFactory = SAXParserFactory.newInstance();
saxParser = saxParserFactory.newSAXParser();
}
private void testSAX() throws Exception {
long begin = System.currentTimeMillis(), end = 0;
for (int i = 0; i < nrOfRuns; i++) {
saxParser.parse(xmlFile, new DefaultHandler());
}
end = System.currentTimeMillis();
System.out.println(xmlFile.getName() + " - SAX: " + (end - begin) + " ms");
}
public static void main(String[] args) {
try {
new XmlParserPerformance().go();
} catch (Exception e) {
e.printStackTrace();
}
}
}
---------- END SOURCE ----------