Add MOVD SSE2 instructions for long volatile moves, d2l, l2d conversions
and long raw moves.
Use 'fst STn' in case of 'fld ST0; fstp STn' in FPU-unit.
And few fixes:
MachSpillCopy - use 'lea' instead of 'sub/add' to preserv flags.
MODX and MODXD - mov popFPU to the end of instruction.
The next java methods were used to verify the generated code:
public class mtest {
static int ops0;
static int ops1;
static int ops2;
static double DVAL = 13.d;
static float FVAL = 13.f;
static final int LIMIT = 2000000000;
static volatile long L = 0L;
static long check0() {
L += 32L;
return L; // L still live here
}
static int check1(double dval) {
//modD
double d1 = dval % DVAL;
//ConvD2F
float f1 = (float)d1;
//modF
float f2 = f1 % FVAL;
//ConvF2D
double d2 = (double)f2;
//sinD
d1 = Math.sin(d2);
d2 = d1 * DVAL;
//cosD
d1 = Math.cos(d2);
d2 = d1 / DVAL;
//tanD
d1 = Math.tan(d2);
d2 = DVAL - d1;
//logD
d1 = Math.log(d2);
d2 = d1 + DVAL;
//log10D
d1 = Math.log10(d2);
//ConvL2D
long l = (long)d1;
//ConvL2F
l += (long)f1;
return (int)l;
}
static int check2(double d) {
long longbits = Double.doubleToRawLongBits(d);
double d2 = Double.longBitsToDouble(longbits);
if (d != d2) {
throw new InternalError("value mismatch");
}
long longbits2 = Double.doubleToRawLongBits(d2);
if (longbits != longbits2) {
throw new InternalError("value mismatch");
}
float f = (float) d;
int intbits = Float.floatToRawIntBits(f);
float f2 = Float.intBitsToFloat(intbits);
if (f != f2) {
throw new InternalError("value mismatch");
}
int intbits2 = Float.floatToRawIntBits(f2);
if (intbits != intbits2) {
throw new InternalError("value mismatch");
}
return intbits2;
}
static int test0(int limit) {
int i = 0;
L = 0;
for (ops0 = 0; ops0 < limit; ops0++) {
i += (int)check0();
}
return i;
}
static int test1(double d, int limit) {
int i = 0;
for (ops1 = 0; ops1 < limit; ops1++) {
i += check1(d);
}
return i;
}
static int test2(double d, int limit) {
int i = 0;
for (ops2 = 0; ops2 < limit; ops2++) {
i += check2(d);
}
return i;
}
public static void main(String[] args) {
double d = 0.0123456789d;
int i = test0(11000); // warmup
i = test0(10000); // warmup
i = test1(d, 11000); // warmup
i = test1(d, 10000); // warmup
i = test2(d, 11000); // warmup
i = test2(d, 10000); // warmup
System.out.println("Start check0!");
new Thread() {
public void run() {
int seconds = 10;
try {
Thread.sleep(seconds * 1000);
} catch (Exception e) { }
System.out.println("check0 (volatile long): " + (ops0 / seconds) + " loops per second");
ops0 = LIMIT;
}
}.start();
i = test0(LIMIT); // run
System.out.println("Start check1!");
new Thread() {
public void run() {
int seconds = 10;
try {
Thread.sleep(seconds * 1000);
} catch (Exception e) { }
System.out.println("check1 (math on FPU): " + (ops1 / seconds) + " loops per second");
ops1 = LIMIT;
}
}.start();
i = test1(d, LIMIT); // run
System.out.println("Start check2!");
new Thread() {
public void run() {
int seconds = 10;
try {
Thread.sleep(seconds * 1000);
} catch (Exception e) { }
System.out.println("check2 (raw 64 bits): " + (ops2 / seconds) + " loops per second");
ops2 = LIMIT;
}
}.start();
i = test2(d, LIMIT); // run
}
}
###@###.### 2004-12-10 01:01:42 GMT
###@###.### 2004-12-14 00:30:33 GMT
and long raw moves.
Use 'fst STn' in case of 'fld ST0; fstp STn' in FPU-unit.
And few fixes:
MachSpillCopy - use 'lea' instead of 'sub/add' to preserv flags.
MODX and MODXD - mov popFPU to the end of instruction.
The next java methods were used to verify the generated code:
public class mtest {
static int ops0;
static int ops1;
static int ops2;
static double DVAL = 13.d;
static float FVAL = 13.f;
static final int LIMIT = 2000000000;
static volatile long L = 0L;
static long check0() {
L += 32L;
return L; // L still live here
}
static int check1(double dval) {
//modD
double d1 = dval % DVAL;
//ConvD2F
float f1 = (float)d1;
//modF
float f2 = f1 % FVAL;
//ConvF2D
double d2 = (double)f2;
//sinD
d1 = Math.sin(d2);
d2 = d1 * DVAL;
//cosD
d1 = Math.cos(d2);
d2 = d1 / DVAL;
//tanD
d1 = Math.tan(d2);
d2 = DVAL - d1;
//logD
d1 = Math.log(d2);
d2 = d1 + DVAL;
//log10D
d1 = Math.log10(d2);
//ConvL2D
long l = (long)d1;
//ConvL2F
l += (long)f1;
return (int)l;
}
static int check2(double d) {
long longbits = Double.doubleToRawLongBits(d);
double d2 = Double.longBitsToDouble(longbits);
if (d != d2) {
throw new InternalError("value mismatch");
}
long longbits2 = Double.doubleToRawLongBits(d2);
if (longbits != longbits2) {
throw new InternalError("value mismatch");
}
float f = (float) d;
int intbits = Float.floatToRawIntBits(f);
float f2 = Float.intBitsToFloat(intbits);
if (f != f2) {
throw new InternalError("value mismatch");
}
int intbits2 = Float.floatToRawIntBits(f2);
if (intbits != intbits2) {
throw new InternalError("value mismatch");
}
return intbits2;
}
static int test0(int limit) {
int i = 0;
L = 0;
for (ops0 = 0; ops0 < limit; ops0++) {
i += (int)check0();
}
return i;
}
static int test1(double d, int limit) {
int i = 0;
for (ops1 = 0; ops1 < limit; ops1++) {
i += check1(d);
}
return i;
}
static int test2(double d, int limit) {
int i = 0;
for (ops2 = 0; ops2 < limit; ops2++) {
i += check2(d);
}
return i;
}
public static void main(String[] args) {
double d = 0.0123456789d;
int i = test0(11000); // warmup
i = test0(10000); // warmup
i = test1(d, 11000); // warmup
i = test1(d, 10000); // warmup
i = test2(d, 11000); // warmup
i = test2(d, 10000); // warmup
System.out.println("Start check0!");
new Thread() {
public void run() {
int seconds = 10;
try {
Thread.sleep(seconds * 1000);
} catch (Exception e) { }
System.out.println("check0 (volatile long): " + (ops0 / seconds) + " loops per second");
ops0 = LIMIT;
}
}.start();
i = test0(LIMIT); // run
System.out.println("Start check1!");
new Thread() {
public void run() {
int seconds = 10;
try {
Thread.sleep(seconds * 1000);
} catch (Exception e) { }
System.out.println("check1 (math on FPU): " + (ops1 / seconds) + " loops per second");
ops1 = LIMIT;
}
}.start();
i = test1(d, LIMIT); // run
System.out.println("Start check2!");
new Thread() {
public void run() {
int seconds = 10;
try {
Thread.sleep(seconds * 1000);
} catch (Exception e) { }
System.out.println("check2 (raw 64 bits): " + (ops2 / seconds) + " loops per second");
ops2 = LIMIT;
}
}.start();
i = test2(d, LIMIT); // run
}
}
###@###.### 2004-12-10 01:01:42 GMT
###@###.### 2004-12-14 00:30:33 GMT