JDK Version: jdk118
OS: Solaris 2.6
Locale: C, zh
In this build, under some conditions, BreakIterator.getSentenceInstance()
has a problem in zh_CN. For instance, if a sentence has a separator as ",
and between two ", we have Chinese words and uper case of English letter,
the method BreakIterator.getSentenceInstance() can not work correctly.
see following program to reproduce it:
================================================================================
import java.util.*;
import java.text.*;
public class CheckBreakIterator {
static String text = "\u5de5\u4e1a\u754c\u4e0d\u5c11\u4eba\u9884\u8a00\uff1a\u201cJAVA\u8bed\u8a00\u7684\u51fa\u73b0\uff0c\u5c06\u4f1a\u5f15\u8d77\u4e00\u573a\u8f6f\u4ef6\u9769\u547d\u201d\u3002";
public static void main(String args[]) {
new CheckBreakIterator();
}
public CheckBreakIterator() {
BreakIterator boundary = BreakIterator.getSentenceInstance();
String runtimeResult = getResult(boundary);
if (runtimeResult != null) {
System.out.println("Tested BreakIterator.getSentenceInstance() ");
System.out.println(" " + runtimeResult);
return;
}
}
private String getResult(BreakIterator boundary) {
boundary.setText(text.trim());
String result = "From First to Last" + "\n";
try {
int start = boundary.first();
int end;
int i = 1;
for (end = boundary.next(); end != BreakIterator.DONE;
start = end, end = boundary.next()) {
result = result + "Sentence " + i + ": " +
text.substring(start, end).trim() + "\n";
i ++;
}
result = result + "From End to First" + "\n";
i = 1;
end = boundary.last();
for (start = boundary.previous(); start != BreakIterator.DONE;
end = start, start = boundary.previous()) {
result = result + "Sentence " + i + ": " +
text.substring(start, end).trim() + "\n";
i ++;
}
} catch (Exception ex) {
return null;
}
return result;
}
}
==============================================================================
jim.hu@prc 1998-12-11
OS: Solaris 2.6
Locale: C, zh
In this build, under some conditions, BreakIterator.getSentenceInstance()
has a problem in zh_CN. For instance, if a sentence has a separator as ",
and between two ", we have Chinese words and uper case of English letter,
the method BreakIterator.getSentenceInstance() can not work correctly.
see following program to reproduce it:
================================================================================
import java.util.*;
import java.text.*;
public class CheckBreakIterator {
static String text = "\u5de5\u4e1a\u754c\u4e0d\u5c11\u4eba\u9884\u8a00\uff1a\u201cJAVA\u8bed\u8a00\u7684\u51fa\u73b0\uff0c\u5c06\u4f1a\u5f15\u8d77\u4e00\u573a\u8f6f\u4ef6\u9769\u547d\u201d\u3002";
public static void main(String args[]) {
new CheckBreakIterator();
}
public CheckBreakIterator() {
BreakIterator boundary = BreakIterator.getSentenceInstance();
String runtimeResult = getResult(boundary);
if (runtimeResult != null) {
System.out.println("Tested BreakIterator.getSentenceInstance() ");
System.out.println(" " + runtimeResult);
return;
}
}
private String getResult(BreakIterator boundary) {
boundary.setText(text.trim());
String result = "From First to Last" + "\n";
try {
int start = boundary.first();
int end;
int i = 1;
for (end = boundary.next(); end != BreakIterator.DONE;
start = end, end = boundary.next()) {
result = result + "Sentence " + i + ": " +
text.substring(start, end).trim() + "\n";
i ++;
}
result = result + "From End to First" + "\n";
i = 1;
end = boundary.last();
for (start = boundary.previous(); start != BreakIterator.DONE;
end = start, start = boundary.previous()) {
result = result + "Sentence " + i + ": " +
text.substring(start, end).trim() + "\n";
i ++;
}
} catch (Exception ex) {
return null;
}
return result;
}
}
==============================================================================
jim.hu@prc 1998-12-11