-
Bug
-
Resolution: Fixed
-
P4
-
7u51
-
b20
-
x86
-
windows_7
FULL PRODUCT VERSION :
java version "1.7.0_51"
Java(TM) SE Runtime Environment (build 1.7.0_51-b13)
Java HotSpot(TM) Client VM (build 24.51-b03, mixed mode, sharing)
A DESCRIPTION OF THE PROBLEM :
This bug requires 2 conditions to be triggered:
- 1 or more consecutive nested character classes (surrounded by [ ]) following right after an intersection &&
- Right after the series of nested character classes mentioned above is at least one "flat" character class
Then the series of nested character classes are lost.
For example:
// Working cases
String ra = "[A-Z&&[0-9A-Z]]"; // Nothing after the nested character class
String rb = "[A-Z&&0-9A-Z]"; // Everything is "flat"
String rc = "[A-Z&&0-9[A-Z]]"; // [A-Z] is after "flat" character class 0-9
// Failing cases
String rx = "[A-Z&&[A-Z]0-9]"; // [A-Z] is lost
String ry = "[A-Z&&[A-F][G-Z]0-9]"; // [A-F][G-Z] is lost
REPRODUCIBILITY :
This bug can be reproduced always.
---------- BEGIN SOURCE ----------
/* Author: Hong Dai Thanh/nhahtdh */
import java.lang.reflect.*;
import java.util.regex.*;
class BugLostCharacterClass {
private static Class<?> CharProperty;
private static Class<?> Node;
static {
try {
CharProperty = Class.forName("java.util.regex.Pattern$CharProperty");
Node = Class.forName("java.util.regex.Pattern$Node");
} catch (Exception e) {
throw new RuntimeException(e);
}
}
private static void indent(int level) {
System.out.print(new String(new char[level * 3]).replace('\0',' '));
}
private static void dissectCharClass(int indent, Object o) throws Exception {
Field[] fields = o.getClass().getDeclaredFields();
Method enclosing = o.getClass().getEnclosingMethod();
indent(indent);
System.out.println((enclosing != null ? enclosing.getName() + " " : "") + o.getClass().getName());
for (Field f: fields) {
f.setAccessible(true);
Object c = f.get(o);
if (CharProperty.isAssignableFrom(f.getType())) {
dissectCharClass(indent + 1, c);
} else {
indent(indent + 1);
System.out.println(f.getName() + ":" + c);
}
}
}
private static void dissectNode(Object o) throws Exception {
Field nextField = Node.getDeclaredField("next");
nextField.setAccessible(true);
while (o != null) {
if (CharProperty.isInstance(o)) {
dissectCharClass(0, o);
} else {
System.out.println(o.getClass().getName());
}
o = nextField.get(o);
}
}
private static void dissectPattern(Pattern p) throws Exception {
Field rootField = Pattern.class.getDeclaredField("root");
rootField.setAccessible(true);
Object root = rootField.get(p);
dissectNode(root);
System.out.println();
}
public static void main(String args[]) throws Exception {
String ra = "[A-Z&&[0-9A-Z]]";
String rb = "[A-Z&&0-9A-Z]";
String rc = "[A-Z&&0-9[A-Z]]";
String rx = "[A-Z&&[A-Z]0-9]";
String ry = "[A-Z&&[A-F][G-Z]0-9]";
//--------------------------------------------
// Structure verification via reflection
// COMPARISON CASES
// These 3 character classes work as expected.
dissectPattern(Pattern.compile(ra));
dissectPattern(Pattern.compile(rb));
dissectPattern(Pattern.compile(rc));
// EXPECTED: The regex below should be equivalent to the 3 regex in COMPARISON CASES
// ACTUAL: The nested character class [A-Z] is lost during compilation. This regex can't match anything.
dissectPattern(Pattern.compile(rx));
// EXPECTED: The regex below should be equivalent to the 3 regex in COMPARISON CASES
// ACTUAL: The nested character class [A-F] and [G-Z] is lost during compilation. This regex can't match anything.
dissectPattern(Pattern.compile(ry));
//---------------------------------------------
// Normal blackbox testing
// ACTUAL=EXPECTED: true
System.out.println("A".matches(ra));
System.out.println("A".matches(rb));
System.out.println("A".matches(rc));
// EXPECTED: true
// ACTUAL: false
System.out.println("A".matches(rx));
// EXPECTED: true
// ACTUAL: false
System.out.println("A".matches(ry));
}
}
---------- END SOURCE ----------
CUSTOMER SUBMITTED WORKAROUND :
* To fix this bug:
In clazz() method of Pattern class
} else { // abc&&def
unread();
if (rightNode == null) { //L
rightNode = clazz(false);
} else { //L
rightNode = union(rightNode, clazz(false)); //L
}//L
}
Lines ending with //L are lines to be added
* To workaround:
Enclose the whole thing inside a nested character class, instead of leaving some "flat" character class outside.
java version "1.7.0_51"
Java(TM) SE Runtime Environment (build 1.7.0_51-b13)
Java HotSpot(TM) Client VM (build 24.51-b03, mixed mode, sharing)
A DESCRIPTION OF THE PROBLEM :
This bug requires 2 conditions to be triggered:
- 1 or more consecutive nested character classes (surrounded by [ ]) following right after an intersection &&
- Right after the series of nested character classes mentioned above is at least one "flat" character class
Then the series of nested character classes are lost.
For example:
// Working cases
String ra = "[A-Z&&[0-9A-Z]]"; // Nothing after the nested character class
String rb = "[A-Z&&0-9A-Z]"; // Everything is "flat"
String rc = "[A-Z&&0-9[A-Z]]"; // [A-Z] is after "flat" character class 0-9
// Failing cases
String rx = "[A-Z&&[A-Z]0-9]"; // [A-Z] is lost
String ry = "[A-Z&&[A-F][G-Z]0-9]"; // [A-F][G-Z] is lost
REPRODUCIBILITY :
This bug can be reproduced always.
---------- BEGIN SOURCE ----------
/* Author: Hong Dai Thanh/nhahtdh */
import java.lang.reflect.*;
import java.util.regex.*;
class BugLostCharacterClass {
private static Class<?> CharProperty;
private static Class<?> Node;
static {
try {
CharProperty = Class.forName("java.util.regex.Pattern$CharProperty");
Node = Class.forName("java.util.regex.Pattern$Node");
} catch (Exception e) {
throw new RuntimeException(e);
}
}
private static void indent(int level) {
System.out.print(new String(new char[level * 3]).replace('\0',' '));
}
private static void dissectCharClass(int indent, Object o) throws Exception {
Field[] fields = o.getClass().getDeclaredFields();
Method enclosing = o.getClass().getEnclosingMethod();
indent(indent);
System.out.println((enclosing != null ? enclosing.getName() + " " : "") + o.getClass().getName());
for (Field f: fields) {
f.setAccessible(true);
Object c = f.get(o);
if (CharProperty.isAssignableFrom(f.getType())) {
dissectCharClass(indent + 1, c);
} else {
indent(indent + 1);
System.out.println(f.getName() + ":" + c);
}
}
}
private static void dissectNode(Object o) throws Exception {
Field nextField = Node.getDeclaredField("next");
nextField.setAccessible(true);
while (o != null) {
if (CharProperty.isInstance(o)) {
dissectCharClass(0, o);
} else {
System.out.println(o.getClass().getName());
}
o = nextField.get(o);
}
}
private static void dissectPattern(Pattern p) throws Exception {
Field rootField = Pattern.class.getDeclaredField("root");
rootField.setAccessible(true);
Object root = rootField.get(p);
dissectNode(root);
System.out.println();
}
public static void main(String args[]) throws Exception {
String ra = "[A-Z&&[0-9A-Z]]";
String rb = "[A-Z&&0-9A-Z]";
String rc = "[A-Z&&0-9[A-Z]]";
String rx = "[A-Z&&[A-Z]0-9]";
String ry = "[A-Z&&[A-F][G-Z]0-9]";
//--------------------------------------------
// Structure verification via reflection
// COMPARISON CASES
// These 3 character classes work as expected.
dissectPattern(Pattern.compile(ra));
dissectPattern(Pattern.compile(rb));
dissectPattern(Pattern.compile(rc));
// EXPECTED: The regex below should be equivalent to the 3 regex in COMPARISON CASES
// ACTUAL: The nested character class [A-Z] is lost during compilation. This regex can't match anything.
dissectPattern(Pattern.compile(rx));
// EXPECTED: The regex below should be equivalent to the 3 regex in COMPARISON CASES
// ACTUAL: The nested character class [A-F] and [G-Z] is lost during compilation. This regex can't match anything.
dissectPattern(Pattern.compile(ry));
//---------------------------------------------
// Normal blackbox testing
// ACTUAL=EXPECTED: true
System.out.println("A".matches(ra));
System.out.println("A".matches(rb));
System.out.println("A".matches(rc));
// EXPECTED: true
// ACTUAL: false
System.out.println("A".matches(rx));
// EXPECTED: true
// ACTUAL: false
System.out.println("A".matches(ry));
}
}
---------- END SOURCE ----------
CUSTOMER SUBMITTED WORKAROUND :
* To fix this bug:
In clazz() method of Pattern class
} else { // abc&&def
unread();
if (rightNode == null) { //L
rightNode = clazz(false);
} else { //L
rightNode = union(rightNode, clazz(false)); //L
}//L
}
Lines ending with //L are lines to be added
* To workaround:
Enclose the whole thing inside a nested character class, instead of leaving some "flat" character class outside.
- csr for
-
JDK-8264547 RegEx pattern matching loses character class after intersection (&&) operator
- Closed