A DESCRIPTION OF THE REQUEST :
If a monitor is owned by another thread, the hotspot code goes into a wait state. It should add some simple spinlock code based on the intel "pause" instruction to that code to greatly increase performance. This will at least start to make Sun's JVM look a bit more respectable compared with jrockit's performance.
The code in os/linux/vm/objectMonitor_linux.cpp function void ObjectMonitor::EnterI (TRAPS) could be easily modified as below (the comment is already there, I'm just filling in the details):
// Consider spinning here ... see comments above ...
for (i=0; i<20; i++) {
for (j=0; j<50; j++) {
__asm__("pause");
}
}
JUSTIFICATION :
Performance of synchronized code is very poor. See the attached example. Making this simple change to the hotspot source would greatly help.
EXPECTED VERSUS ACTUAL BEHAVIOR :
EXPECTED -
Faster result when running test.
ACTUAL -
Takes too long to complete test.
Run as follows (this is on a 2-way Xeon with HT-enabled):
java SyncFifo 4 100000
Elapsed time = 7631 msecs
By making the change shown to the hotspot code, this changes to:
java SyncFifo 4 100000
Elapsed time = 4462 msecs
---------- BEGIN SOURCE ----------
class SFifo {
private static final int MAXNUM = 65535;
private int head;
private Object[] nodes;
private int tail;
public SFifo() {
nodes = new Object[MAXNUM+1];
head = 0;
tail = 0;
for (int i=0; i<=MAXNUM; i++)
nodes[i] = null;
}
public synchronized boolean push(Object o) {
if (((tail+1)&MAXNUM)==head)
return false; // Queue full
nodes[tail++] = o;
tail &= MAXNUM;
return true;
}
public synchronized Object pop() {
if (head==tail)
return null;
Object retVal = nodes[head++];
head &= MAXNUM;
return retVal;
}
}
public class SyncFifo extends Thread {
static SFifo fifo;
static final int PUSHPOP = 6;
static int loopSz, colls;
public void run() {
Object[] tmp = new Object[PUSHPOP];
int n = loopSz;
while (n-- != 0) {
for (int i=0; i<PUSHPOP; i++) {
tmp[i] = fifo.pop();
}
for (int i=0; i<PUSHPOP; i++) {
fifo.push(tmp[i]);
}
}
}
public static void main(String[] args) throws Exception {
int numThreads = Integer.parseInt(args[0]);
loopSz = Integer.parseInt(args[1]);
SyncFifo[] threads = new SyncFifo[numThreads];
colls = 0;
fifo = new SFifo();
for (int i=0; i<numThreads*PUSHPOP; i++) {
fifo.push(new Integer(i));
}
for (int i=0; i<numThreads; i++) {
threads[i] = new SyncFifo();
}
long startTime = System.currentTimeMillis();
for (int i=0; i<numThreads; i++) {
threads[i].start();
}
for (int i=0; i<numThreads; i++) {
threads[i].join();
}
long endTime = System.currentTimeMillis();
System.out.println("Elapsed time = " + (endTime-startTime) + " msecs");
int counter=0;
Integer o=null;
while ((o = (Integer)fifo.pop()) != null) {
System.out.println("FIFO element " + counter + " Has value " + o);
counter++;
}
}
}
---------- END SOURCE ----------
CUSTOMER SUBMITTED WORKAROUND :
Use jrockit!
###@###.### 2005-04-05 08:38:52 GMT
If a monitor is owned by another thread, the hotspot code goes into a wait state. It should add some simple spinlock code based on the intel "pause" instruction to that code to greatly increase performance. This will at least start to make Sun's JVM look a bit more respectable compared with jrockit's performance.
The code in os/linux/vm/objectMonitor_linux.cpp function void ObjectMonitor::EnterI (TRAPS) could be easily modified as below (the comment is already there, I'm just filling in the details):
// Consider spinning here ... see comments above ...
for (i=0; i<20; i++) {
for (j=0; j<50; j++) {
__asm__("pause");
}
}
JUSTIFICATION :
Performance of synchronized code is very poor. See the attached example. Making this simple change to the hotspot source would greatly help.
EXPECTED VERSUS ACTUAL BEHAVIOR :
EXPECTED -
Faster result when running test.
ACTUAL -
Takes too long to complete test.
Run as follows (this is on a 2-way Xeon with HT-enabled):
java SyncFifo 4 100000
Elapsed time = 7631 msecs
By making the change shown to the hotspot code, this changes to:
java SyncFifo 4 100000
Elapsed time = 4462 msecs
---------- BEGIN SOURCE ----------
class SFifo {
private static final int MAXNUM = 65535;
private int head;
private Object[] nodes;
private int tail;
public SFifo() {
nodes = new Object[MAXNUM+1];
head = 0;
tail = 0;
for (int i=0; i<=MAXNUM; i++)
nodes[i] = null;
}
public synchronized boolean push(Object o) {
if (((tail+1)&MAXNUM)==head)
return false; // Queue full
nodes[tail++] = o;
tail &= MAXNUM;
return true;
}
public synchronized Object pop() {
if (head==tail)
return null;
Object retVal = nodes[head++];
head &= MAXNUM;
return retVal;
}
}
public class SyncFifo extends Thread {
static SFifo fifo;
static final int PUSHPOP = 6;
static int loopSz, colls;
public void run() {
Object[] tmp = new Object[PUSHPOP];
int n = loopSz;
while (n-- != 0) {
for (int i=0; i<PUSHPOP; i++) {
tmp[i] = fifo.pop();
}
for (int i=0; i<PUSHPOP; i++) {
fifo.push(tmp[i]);
}
}
}
public static void main(String[] args) throws Exception {
int numThreads = Integer.parseInt(args[0]);
loopSz = Integer.parseInt(args[1]);
SyncFifo[] threads = new SyncFifo[numThreads];
colls = 0;
fifo = new SFifo();
for (int i=0; i<numThreads*PUSHPOP; i++) {
fifo.push(new Integer(i));
}
for (int i=0; i<numThreads; i++) {
threads[i] = new SyncFifo();
}
long startTime = System.currentTimeMillis();
for (int i=0; i<numThreads; i++) {
threads[i].start();
}
for (int i=0; i<numThreads; i++) {
threads[i].join();
}
long endTime = System.currentTimeMillis();
System.out.println("Elapsed time = " + (endTime-startTime) + " msecs");
int counter=0;
Integer o=null;
while ((o = (Integer)fifo.pop()) != null) {
System.out.println("FIFO element " + counter + " Has value " + o);
counter++;
}
}
}
---------- END SOURCE ----------
CUSTOMER SUBMITTED WORKAROUND :
Use jrockit!
###@###.### 2005-04-05 08:38:52 GMT