masayoshi.okutsu@Eng 1997-01-13
TextBoundary doesn't work with some characters, such as HALFWIDTH KATANAKA
letters. To reproduce the problem, run the following test program with
HALFWIDTH KATANAKA letter.
import java.io.*;
import java.text.*;
public class BoundaryCheck {
public static void main(String args[]) {
BoundaryCheck nb = new BoundaryCheck();
nb.run(args, System.out, System.err);
}
public BoundaryCheck() {
// nothing have to do
}
public void run(String args[], PrintStream out, PrintStream err) {
if (args.length == 1) {
String stringToExamine = args[0];
//print each word in order
TextBoundary boundary = TextBoundary.getLineBreak();
boundary.setText(stringToExamine);
printEachForward(boundary, out);
//print each sentence in reverse order
boundary = TextBoundary.getLineBreak();
boundary.setText(stringToExamine);
printEachbackward(boundary, out);
}
}
public void printEachForward(TextBoundary boundary, PrintStream out) {
int start = boundary.first();
for (int end = boundary.next();
end != TextBoundary.DONE;
start = end, end = boundary.next()) {
out.println("Fw:" + boundary.getText().substring(start, end));
}
}
public void printEachbackward(TextBoundary boundary, PrintStream out) {
int end = boundary.last();
for (int start = boundary.previous();
start != TextBoundary.DONE;
end = start, start = boundary.previous()) {
out.println("Bk:" + boundary.getText().substring(start, end));
}
}
}
Example of execution (includes EUCJIS halfwidth kana):
% java BoundaryCheck ÄĂăĄÄÁ
java.lang.ArrayIndexOutOfBoundsException: 19
at java.text.UnicodeClassMapping.mappedChar(UnicodeClassMapping.java:87)
at java.text.SimpleTextBoundary.mappedChar(SimpleTextBoundary.java:296)
at java.text.SimpleTextBoundary.nextPosition(SimpleTextBoundary.java:278)
at java.text.SimpleTextBoundary.next(SimpleTextBoundary.java:214)
at BoundaryCheck.printEachForward(BoundaryCheck.java:27)
at BoundaryCheck.run(BoundaryCheck.java:18)
at BoundaryCheck.main(BoundaryCheck.java:7)