-
Enhancement
-
Resolution: Fixed
-
P4
-
None
-
b15
JavacFileManager.list() has a few O(N) code paths, where N is the number of jars on the compile-time class path. In very large compilations with annotation processing, these code paths can be repeatedly hit, incurring performance penalties.
In some large compilations at Google, these paths have been measured to take over 30s/compilation.
JavacFileManager.list() is called once within JavaCompiler.enterTrees(), but then again repeatedly for all calls of ClassSymbol.complete() (via ClassFinder). Types that are already resolved are cached and returned quickly, but types that have not been loaded or do not exist in the compilation, such as those requested with Elements.getTypeElement() repeatedly hit this branch. Each call to list() calls getContainer(directory) for each item in getLocationAsPaths(Location), which shouldn't change across calls. Additionally, each Container is listed, even if the compiler knows that it can't possibly match the requested package.
Repro:
./gen.sh 20000
./run.sh 20000 <stock javac>
./run.sh 20000 <javac with patch>
Here's the data I collected with my patch (forthcoming) that shows the bottleneck+improvement: https://docs.google.com/spreadsheets/d/121U52kf4NUdhrMD9NMOxPHsQUzHx0SyMfFqDaoZnG5Y/edit#gid=0
====
gen.sh
====
#!/bin/bash
set -eu
COUNT=$1
for i in $(seq 1 $COUNT); do
java_file="P$i.java"
echo "package p$i; class P$i {}" > $java_file
done
javac *.java
rm *.java
for i in $(seq 1 $COUNT); do
mkdir p$i
mv P$i.class p$i/
zip -q p$i.jar p$i/P$i.class
rm -rf p$i
done
=====
run.sh
=====
#!/bin/bash
set -eu
if [[ 2 -ne $# ]]; then
echo "usage: $0 <count> <javac>"
exit 1
fi
COUNT=$1
JAVAC=$2
cat <<EOF > Proc.java
import java.util.*;
import javax.annotation.processing.*;
import javax.lang.model.*;
import javax.lang.model.element.*;
@SupportedAnnotationTypes("*")
@SupportedSourceVersion(SourceVersion.RELEASE_8)
public class Proc extends AbstractProcessor {
boolean run;
@Override
public boolean process(Set<? extends TypeElement> annotations, RoundEnvironment roundEnv) {
if (run) return false;
run = true;
for (int i = 1; i <= $COUNT; i++) {
String fqcn = String.format("p%s.P%s", i, i);
TypeElement resolved = processingEnv.getElementUtils().getTypeElement(fqcn);
if (resolved == null) throw new AssertionError(fqcn + " was not found");
}
return false;
}
}
EOF
$JAVAC Proc.java
cat <<EOF > Tester.java
class Tester {
@interface Anno {}
@Anno void foo() {}
}
EOF
echo "-processor" > argfile
echo Proc >> argfile
echo "-cp" >> argfile
cp="."
for i in $(seq 1 $COUNT); do
cp="${cp}:p$i.jar"
done
echo $cp >> argfile
echo Tester.java >> argfile
$JAVAC -version
echo "===================="
echo "Starting compilation"
echo "===================="
time $JAVAC @argfile
rm Proc* Tester*
In some large compilations at Google, these paths have been measured to take over 30s/compilation.
JavacFileManager.list() is called once within JavaCompiler.enterTrees(), but then again repeatedly for all calls of ClassSymbol.complete() (via ClassFinder). Types that are already resolved are cached and returned quickly, but types that have not been loaded or do not exist in the compilation, such as those requested with Elements.getTypeElement() repeatedly hit this branch. Each call to list() calls getContainer(directory) for each item in getLocationAsPaths(Location), which shouldn't change across calls. Additionally, each Container is listed, even if the compiler knows that it can't possibly match the requested package.
Repro:
./gen.sh 20000
./run.sh 20000 <stock javac>
./run.sh 20000 <javac with patch>
Here's the data I collected with my patch (forthcoming) that shows the bottleneck+improvement: https://docs.google.com/spreadsheets/d/121U52kf4NUdhrMD9NMOxPHsQUzHx0SyMfFqDaoZnG5Y/edit#gid=0
====
gen.sh
====
#!/bin/bash
set -eu
COUNT=$1
for i in $(seq 1 $COUNT); do
java_file="P$i.java"
echo "package p$i; class P$i {}" > $java_file
done
javac *.java
rm *.java
for i in $(seq 1 $COUNT); do
mkdir p$i
mv P$i.class p$i/
zip -q p$i.jar p$i/P$i.class
rm -rf p$i
done
=====
run.sh
=====
#!/bin/bash
set -eu
if [[ 2 -ne $# ]]; then
echo "usage: $0 <count> <javac>"
exit 1
fi
COUNT=$1
JAVAC=$2
cat <<EOF > Proc.java
import java.util.*;
import javax.annotation.processing.*;
import javax.lang.model.*;
import javax.lang.model.element.*;
@SupportedAnnotationTypes("*")
@SupportedSourceVersion(SourceVersion.RELEASE_8)
public class Proc extends AbstractProcessor {
boolean run;
@Override
public boolean process(Set<? extends TypeElement> annotations, RoundEnvironment roundEnv) {
if (run) return false;
run = true;
for (int i = 1; i <= $COUNT; i++) {
String fqcn = String.format("p%s.P%s", i, i);
TypeElement resolved = processingEnv.getElementUtils().getTypeElement(fqcn);
if (resolved == null) throw new AssertionError(fqcn + " was not found");
}
return false;
}
}
EOF
$JAVAC Proc.java
cat <<EOF > Tester.java
class Tester {
@interface Anno {}
@Anno void foo() {}
}
EOF
echo "-processor" > argfile
echo Proc >> argfile
echo "-cp" >> argfile
cp="."
for i in $(seq 1 $COUNT); do
cp="${cp}:p$i.jar"
done
echo $cp >> argfile
echo Tester.java >> argfile
$JAVAC -version
echo "===================="
echo "Starting compilation"
echo "===================="
time $JAVAC @argfile
rm Proc* Tester*