import java.text.BreakIterator;

public class BreakIteratorCharacterInstanceTimePerChar {

    static final int STEPS = 10;
    static final int LIMIT = 1000000; // maximum string size tested

    static void matchPerformance(String value) {
        BreakIterator breakIterator = BreakIterator.getCharacterInstance();
        breakIterator.setText(value);
        while (breakIterator.next() != BreakIterator.DONE);
    }

    public static void main(String[] args) throws Exception {
        matchPerformance("warmup");

        double[][] strTimesByNChars = new double[STEPS][];
        double[][] chrTimesByNChars = new double[STEPS][];
        System.out.println("number of chars, time string [s], time per char [ns]");
        for (int i = 0; i < STEPS; i++) {
            int chunkSize = (i + 1) * LIMIT / STEPS;
            String value = "x".repeat(chunkSize);
            long start = System.nanoTime();
            matchPerformance(value);
            long end = System.nanoTime();
            long t = end - start;
            strTimesByNChars[i] = new double[] { chunkSize, t };
            double timePerChar = (double) t / (double) chunkSize;
            chrTimesByNChars[i] = new double[] { chunkSize, timePerChar };
            System.out.printf("%7d %6d %10.3f%n", chunkSize, (t / 1000000), timePerChar);
        }
        double cStr = correlate(strTimesByNChars);
        double cChr = correlate(chrTimesByNChars);
        System.out.printf("correlation factor between string size and total time (expected 1): %10.3f%n", cStr);
        System.out.printf("correlation factor between string size and time per character (expected 0 or less): %10.3f%n", cChr);
        if (cStr < .5 || cChr > .5) throw new Exception("test failed");
    }

    static double correlate(double[][] data) {
        double sumx = 0, sumy = 0;
        for (int i = 0; i < data.length; i++) {
            sumx += data[i][0];
            sumy += data[i][1];
        }
        double avgx = sumx / data.length;
        double avgy = sumy / data.length;
        double sxy = 0, sxx = 0, syy = 0;
        for (int i = 0; i < data.length; i++) {
            sxy += (data[i][0] - avgx) * (data[i][1] - avgy);
            sxx += (data[i][0] - avgx) * (data[i][0] - avgx);
            syy += (data[i][1] - avgy) * (data[i][1] - avgy);
        }
        return sxy / Math.sqrt(sxx * syy);
    }

}
