import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; import java.util.Arrays; import java.util.SplittableRandom; import java.util.function.DoubleSupplier; import org.apache.commons.math3.distribution.AbstractRealDistribution; import org.apache.commons.math3.distribution.ExponentialDistribution; import org.apache.commons.math3.distribution.NormalDistribution; import org.apache.commons.math3.stat.inference.ChiSquareTest; /** * Test for {@link RandomSupport}. */ class RandomSupportTest { /** * Test Gaussian samples using a large number of bins based on uniformly spaced quantiles. */ @Test void testGaussianSamplesWithQuantiles() { final int bins = 2000; final NormalDistribution dist = new NormalDistribution(null, 0.0, 1.0); final double[] quantiles = new double[bins]; for (int i = 0; i < bins; i++) { quantiles[i] = dist.inverseCumulativeProbability((i + 1.0) / bins); } testSamples(quantiles, false); } /** * Test Gaussian samples using a large number of bins uniformly spaced in a range. */ @Test void testGaussianSamplesWithUniformValues() { final int bins = 2000; final double[] values = new double[bins]; final double minx = -8; final double maxx = 8; for (int i = 0; i < bins; i++) { values[i] = minx + (maxx - minx) * (i + 1.0) / bins; } // Ensure upper bound is the support limit values[bins - 1] = Double.POSITIVE_INFINITY; testSamples(values, false); } /** * Test exponential samples using a large number of bins based on uniformly spaced quantiles. */ @Test void testExponentialSamplesWithQuantiles() { final int bins = 2000; final ExponentialDistribution dist = new ExponentialDistribution(null, 1.0); final double[] quantiles = new double[bins]; for (int i = 0; i < bins; i++) { quantiles[i] = dist.inverseCumulativeProbability((i + 1.0) / bins); } testSamples(quantiles, true); } /** * Test exponential samples using a large number of bins uniformly spaced in a range. */ @Test void testExponentialSamplesWithUniformValues() { final int bins = 2000; final double[] values = new double[bins]; final double minx = 0; // Enter the tail of the distribution final double maxx = 12; for (int i = 0; i < bins; i++) { values[i] = minx + (maxx - minx) * (i + 1.0) / bins; } // Ensure upper bound is the support limit values[bins - 1] = Double.POSITIVE_INFINITY; testSamples(values, true); } /** * Test samples using the provided bins. Values correspond to the bin upper limit. It * is assumed the values span most of the distribution. Additional tests are performed * using a region of the distribution sampled using the edge of the ziggurat. * * @param values Bin upper limits * @param exponential Set the true to use an exponential sampler */ private static void testSamples(double[] values, boolean exponential) { final int bins = values.length; final int samples = 10000000; final long[] observed = new long[bins]; final SplittableRandom rng = new SplittableRandom(); final DoubleSupplier sampler = exponential ? () -> RandomSupport.computeNextExponential(rng) : () -> RandomSupport.computeNextGaussian(rng); for (int i = 0; i < samples; i++) { final double x = sampler.getAsDouble(); final int index = findIndex(values, x); observed[index]++; } // Compute expected final AbstractRealDistribution dist = exponential ? new ExponentialDistribution(null, 1.0) : new NormalDistribution(null, 0.0, 1.0); final double[] expected = new double[bins]; double x0 = Double.NEGATIVE_INFINITY; for (int i = 0; i < bins; i++) { final double x1 = values[i]; expected[i] = dist.probability(x0, x1); x0 = x1; } final double significanceLevel = 0.001; final double lowerBound = dist.getSupportLowerBound(); final ChiSquareTest chiSquareTest = new ChiSquareTest(); // Pass if we cannot reject null hypothesis that the distributions are the same. final double pValue = chiSquareTest.chiSquareTest(expected, observed); Assertions.assertFalse(pValue < 0.001, () -> String.format("(%s <= x < %s) Chi-square p-value = %s", lowerBound, values[bins - 1], pValue)); // Test bins sampled from the edge of the ziggurat. This is always around zero. for (final double range : new double[] {0.5, 0.25, 0.1, 0.05}) { final int min = findIndex(values, -range); final int max = findIndex(values, range); final long[] observed2 = Arrays.copyOfRange(observed, min, max + 1); final double[] expected2 = Arrays.copyOfRange(expected, min, max + 1); final double pValue2 = chiSquareTest.chiSquareTest(expected2, observed2); Assertions.assertFalse(pValue2 < significanceLevel, () -> String.format("(%s <= x < %s) Chi-square p-value = %s", min == 0 ? lowerBound : values[min - 1], values[max], pValue2)); } } /** * Find the index of the value in the data such that: *
* data[index - 1] <= x < data[index] ** *
This is a specialised binary search that assumes the bounds of the data are the * extremes of the support, and the upper support is infinite. Thus an index cannot * be returned as equal to the data length. * * @param data the data * @param x the value * @return the index */ private static int findIndex(double[] data, double x) { int low = 0; int high = data.length - 1; // Bracket so that low is just above the value x while (low <= high) { final int mid = (low + high) >>> 1; final double midVal = data[mid]; if (x < midVal) { // Reduce search range high = mid - 1; } else { // Set data[low] above the value low = mid + 1; } } // Verify the index is correct Assertions.assertTrue(x < data[low]); if (low != 0) { Assertions.assertTrue(x >= data[low - 1]); } return low; } }