A DESCRIPTION OF THE REQUEST :
1.) Default hash to non-zero constant value except for String.length() == 0.
2.) Use positive constant in 8-bit range, ideally less than 5.
3.) Drop comparison of strings length.
4.) Only increment 1 variable in loop.
5.) Use '!=' instead of '<' to determine end of loop.
JUSTIFICATION :
1.) Eliminates need of changes from Bug ID 6921374
2.) Minimizes byte code and compiled machine code footprint + should perform better.
3.) Makes hashCode() as fast as before for all cases.
4.) In a small micro-benchmark I have proven a speed gain about ~15 %.
5.) Could be faster on some CPU code because no hidden subtraction must be performed.
--> In compiled mode, actually needs only 21 bytes against 40 bytes for inner loop.
EXPECTED VERSUS ACTUAL BEHAVIOR :
EXPECTED -
Inner loop compiles to (needs only 21 bytes of machine code):
0x00b8729c: sub %edx,%ebx
0x00b8729e: mov %ebx,%edx ;*imul
; - java.lang.String::hashCode7@40 (line 1824)
0x00b872a0: movzwl 0xc(%esi,%edi,2),%eax
0x00b872a5: add %eax,%edx ;*iadd
; - java.lang.String::hashCode7@47 (line 1824)
0x00b872a7: inc %edi ;*iinc
; - java.lang.String::hashCode7@43 (line 1824)
0x00b872a8: mov %edx,%ebx
0x00b872aa: shl $0x5,%ebx ;*imul
; - java.lang.String::hashCode7@40 (line 1824)
0x00b872ad: cmp %ebp,%edi
0x00b872af: jl 0x00b8729c
0x00b872b1:
ACTUAL -
Inner loop compiles to (needs *40* bytes of machine code):
0x00b86f1a: movzwl 0xc(%ebp),%ebp
0x00b86f1e: sub %edi,%ebx
0x00b86f20: add %ebp,%ebx ;*iadd
; - java.lang.String::hashCode@45 (line 1727)
0x00b86f22: mov %eax,%esi
0x00b86f24: inc %esi ;*iinc
; - java.lang.String::hashCode@47 (line 1726)
0x00b86f25: cmp %edx,%esi
0x00b86f27: jl 0x00b86f2d
0x00b86f29: mov %ebx,%eax
0x00b86f2b: jmp 0x00b86eea
0x00b86f2d: mov 0x4(%esp),%edi
0x00b86f31: lea 0x2(%edi,%eax,2),%ebp ;*caload
; - java.lang.String::hashCode@44 (line 1727)
0x00b86f35: mov %ebx,%eax
0x00b86f37: shl $0x5,%eax ;*imul
; - java.lang.String::hashCode@38 (line 1727)
0x00b86f3a: mov %ebx,%edi
0x00b86f3c: mov %eax,%ebx
0x00b86f3e: mov %esi,%eax
0x00b86f40: jmp 0x00b86f1a
0x00b86f42:
---------- BEGIN SOURCE ----------
public class String {
private static final int UNKNOWN_HASH = 1;
// private static final int UNKNOWN_HASH = Integer.MIN_VALUE; // alternative
public String(xxx) {
...;
hash = count == 0 ? 0 : UNKNOWN_HASH;
}
public int hashCode() {
int h = hash;
if (h == UNKNOWN_HASH) {
h = 0;
char[] val = value;
for (int i = offset, limit = i + count; i != limit; )
h = h * 31 + val[i++];
hash = h;
}
return h;
}
}
---------- END SOURCE ----------
1.) Default hash to non-zero constant value except for String.length() == 0.
2.) Use positive constant in 8-bit range, ideally less than 5.
3.) Drop comparison of strings length.
4.) Only increment 1 variable in loop.
5.) Use '!=' instead of '<' to determine end of loop.
JUSTIFICATION :
1.) Eliminates need of changes from Bug ID 6921374
2.) Minimizes byte code and compiled machine code footprint + should perform better.
3.) Makes hashCode() as fast as before for all cases.
4.) In a small micro-benchmark I have proven a speed gain about ~15 %.
5.) Could be faster on some CPU code because no hidden subtraction must be performed.
--> In compiled mode, actually needs only 21 bytes against 40 bytes for inner loop.
EXPECTED VERSUS ACTUAL BEHAVIOR :
EXPECTED -
Inner loop compiles to (needs only 21 bytes of machine code):
0x00b8729c: sub %edx,%ebx
0x00b8729e: mov %ebx,%edx ;*imul
; - java.lang.String::hashCode7@40 (line 1824)
0x00b872a0: movzwl 0xc(%esi,%edi,2),%eax
0x00b872a5: add %eax,%edx ;*iadd
; - java.lang.String::hashCode7@47 (line 1824)
0x00b872a7: inc %edi ;*iinc
; - java.lang.String::hashCode7@43 (line 1824)
0x00b872a8: mov %edx,%ebx
0x00b872aa: shl $0x5,%ebx ;*imul
; - java.lang.String::hashCode7@40 (line 1824)
0x00b872ad: cmp %ebp,%edi
0x00b872af: jl 0x00b8729c
0x00b872b1:
ACTUAL -
Inner loop compiles to (needs *40* bytes of machine code):
0x00b86f1a: movzwl 0xc(%ebp),%ebp
0x00b86f1e: sub %edi,%ebx
0x00b86f20: add %ebp,%ebx ;*iadd
; - java.lang.String::hashCode@45 (line 1727)
0x00b86f22: mov %eax,%esi
0x00b86f24: inc %esi ;*iinc
; - java.lang.String::hashCode@47 (line 1726)
0x00b86f25: cmp %edx,%esi
0x00b86f27: jl 0x00b86f2d
0x00b86f29: mov %ebx,%eax
0x00b86f2b: jmp 0x00b86eea
0x00b86f2d: mov 0x4(%esp),%edi
0x00b86f31: lea 0x2(%edi,%eax,2),%ebp ;*caload
; - java.lang.String::hashCode@44 (line 1727)
0x00b86f35: mov %ebx,%eax
0x00b86f37: shl $0x5,%eax ;*imul
; - java.lang.String::hashCode@38 (line 1727)
0x00b86f3a: mov %ebx,%edi
0x00b86f3c: mov %eax,%ebx
0x00b86f3e: mov %esi,%eax
0x00b86f40: jmp 0x00b86f1a
0x00b86f42:
---------- BEGIN SOURCE ----------
public class String {
private static final int UNKNOWN_HASH = 1;
// private static final int UNKNOWN_HASH = Integer.MIN_VALUE; // alternative
public String(xxx) {
...;
hash = count == 0 ? 0 : UNKNOWN_HASH;
}
public int hashCode() {
int h = hash;
if (h == UNKNOWN_HASH) {
h = 0;
char[] val = value;
for (int i = offset, limit = i + count; i != limit; )
h = h * 31 + val[i++];
hash = h;
}
return h;
}
}
---------- END SOURCE ----------
- duplicates
-
JDK-5022385 (str) String.hashCode() performance improvement
-
- Closed
-
- relates to
-
JDK-6921374 java.lang.String::hashCode() should check for count == 0 to avoid repeated stores hash = 0
-
- Resolved
-