-
Bug
-
Resolution: Unresolved
-
P4
-
14
-
x86_64
-
linux
ADDITIONAL SYSTEM INFORMATION :
Linux 4.15.0-101-generic
Java 14+
A DESCRIPTION OF THE PROBLEM :
JDK 14 included support for containers in OperatingSystemMXBean.getSystemCpuLoad():
https://bugs.openjdk.java.net/browse/JDK-8228428
Unfortunately, this does not work correctly and reports numbers that are flawed:
1. It is not returning "recent" usage, it returns avg "usage" since start of
container
2. It returns 1.0 when the container uses 1 core 100%, also with quota 4.0
3. It does not really return avg cpu usage since start of container, it only
returns average cpu usage of the cfs periods where the container had any threads
ready to run.
A test program to demonstrate this has been created here:
https://github.com/henningandersen/cpuload
STEPS TO FOLLOW TO REPRODUCE THE PROBLEM :
See:
https://github.com/henningandersen/cpuload
and follow the instructions in the README file. Needs to run on a 4+ core machine.
EXPECTED VERSUS ACTUAL BEHAVIOR :
EXPECTED -
We expected that 1 thread of activity with cpu quota 4 should result in a cpu usage of approximately 25%. We expected that (with some minor delay), the activity reported would be recent, not an average of activity since start of cgroup. We expected the avg cpu usage returned to be relative to elapsed time and not only consider the cfs periods where there were runnable threads in the cgroup.
ACTUAL -
cgroup: /docker/fa278332b03db96757d3335f2d4b00649e9a830d8121fcf5d3a975da3b3565c6
0.6106482766666667
nr_periods 6
nr_throttled 0
throttled_time 0
0.22229851588235294
nr_periods 17
nr_throttled 0
throttled_time 0
Starting thread
0.1240554615625
nr_periods 32
nr_throttled 0
throttled_time 0
Starting thread
0.6591970347560976
nr_periods 82
nr_throttled 0
throttled_time 0
Starting thread
1.0
nr_periods 132
nr_throttled 0
throttled_time 0
Starting thread
1.0
nr_periods 182
nr_throttled 0
throttled_time 0
Done in thread
1.0
nr_periods 232
nr_throttled 6
throttled_time 18923556
Done in thread
1.0
nr_periods 282
nr_throttled 6
throttled_time 18923556
Done in thread
1.0
nr_periods 332
nr_throttled 6
throttled_time 18923556
Done in thread
1.0
nr_periods 382
nr_throttled 6
throttled_time 18923556
1.0
nr_periods 392
nr_throttled 6
throttled_time 18923556
1.0
nr_periods 406
nr_throttled 6
throttled_time 18923556
1.0
nr_periods 422
nr_throttled 6
throttled_time 18923556
1.0
nr_periods 432
nr_throttled 6
throttled_time 18923556
1.0
nr_periods 448
nr_throttled 6
throttled_time 18923556
1.0
nr_periods 464
nr_throttled 6
throttled_time 18923556
1.0
nr_periods 476
nr_throttled 6
throttled_time 18923556
1.0
nr_periods 492
nr_throttled 6
throttled_time 18923556
1.0
nr_periods 506
nr_throttled 6
throttled_time 18923556
1.0
nr_periods 513
nr_throttled 6
throttled_time 18923556
1.0
nr_periods 531
nr_throttled 6
throttled_time 18923556
1.0
nr_periods 542
nr_throttled 6
throttled_time 18923556
1.0
nr_periods 550
nr_throttled 6
throttled_time 18923556
1.0
nr_periods 565
nr_throttled 6
throttled_time 18923556
1.0
nr_periods 579
nr_throttled 6
throttled_time 18923556
1.0
nr_periods 593
nr_throttled 6
throttled_time 18923556
1.0
nr_periods 607
nr_throttled 6
throttled_time 18923556
1.0
nr_periods 622
nr_throttled 6
throttled_time 18923556
1.0
nr_periods 636
nr_throttled 6
throttled_time 18923556
1.0
nr_periods 648
nr_throttled 6
throttled_time 18923556
1.0
nr_periods 658
nr_throttled 6
throttled_time 18923556
1.0
nr_periods 674
nr_throttled 6
throttled_time 18923556
1.0
nr_periods 686
nr_throttled 6
throttled_time 18923556
1.0
nr_periods 701
nr_throttled 6
throttled_time 18923556
1.0
nr_periods 714
nr_throttled 6
throttled_time 18923556
1.0
nr_periods 722
nr_throttled 6
throttled_time 18923556
1.0
nr_periods 740
nr_throttled 6
throttled_time 18923556
1.0
nr_periods 751
nr_throttled 6
throttled_time 18923556
1.0
nr_periods 763
nr_throttled 6
throttled_time 18923556
1.0
nr_periods 781
nr_throttled 6
throttled_time 18923556
---------- BEGIN SOURCE ----------
import com.sun.management.OperatingSystemMXBean;
import java.io.IOException;
import java.lang.management.ManagementFactory;
import java.nio.file.FileSystems;
import java.nio.file.Files;
import java.util.List;
public class CpuLoad {
public static void main(String[] args) {
OperatingSystemMXBean osBean = (com.sun.management.OperatingSystemMXBean) ManagementFactory.getOperatingSystemMXBean();
String cgroup = findCgroup();
System.out.println("cgroup: " + cgroup);
int threadCount = args.length > 0 ? Integer.parseInt(args[0]) : 4;
boolean firstTime = true;
while (true) {
System.out.println(osBean.getCpuLoad());
dumpCpuStat(cgroup);
try {
Thread.sleep(5000);
} catch (InterruptedException e) {
throw new RuntimeException(e);
}
if (!firstTime && threadCount-- > 0) {
startThread();
}
firstTime = false;
}
}
private static void dumpCpuStat(String cgroup) {
try {
List<String> strings = Files.readAllLines(FileSystems.getDefault().getPath("/sys/fs/cgroup/cpu/" + cgroup, "cpu.stat"));
strings.forEach(s -> System.out.println(" " + s));
} catch (IOException e) {
e.printStackTrace();
}
}
private static String findCgroup() {
try {
List<String> strings = Files.readAllLines(FileSystems.getDefault().getPath("/proc/self/cgroup"));
return strings.stream().filter(s -> s.contains("cpuacct")).map(s -> s.split(":")[2]).findFirst().get();
} catch (IOException e) {
throw new RuntimeException(e);
}
}
private static void startThread() {
System.out.println("Starting thread");
new Thread(() -> {
long now = System.currentTimeMillis();
while (System.currentTimeMillis() < now + 20000) {
}
System.out.println("Done in thread");
}).start();
}
}
---------- END SOURCE ----------
CUSTOMER SUBMITTED WORKAROUND :
It is possible to grab the cgroup stats and do this calculation. At least a previous record of information should be kept to fix that the cpu load is recent and not since cgroup creation.
FREQUENCY : always
Linux 4.15.0-101-generic
Java 14+
A DESCRIPTION OF THE PROBLEM :
JDK 14 included support for containers in OperatingSystemMXBean.getSystemCpuLoad():
https://bugs.openjdk.java.net/browse/JDK-8228428
Unfortunately, this does not work correctly and reports numbers that are flawed:
1. It is not returning "recent" usage, it returns avg "usage" since start of
container
2. It returns 1.0 when the container uses 1 core 100%, also with quota 4.0
3. It does not really return avg cpu usage since start of container, it only
returns average cpu usage of the cfs periods where the container had any threads
ready to run.
A test program to demonstrate this has been created here:
https://github.com/henningandersen/cpuload
STEPS TO FOLLOW TO REPRODUCE THE PROBLEM :
See:
https://github.com/henningandersen/cpuload
and follow the instructions in the README file. Needs to run on a 4+ core machine.
EXPECTED VERSUS ACTUAL BEHAVIOR :
EXPECTED -
We expected that 1 thread of activity with cpu quota 4 should result in a cpu usage of approximately 25%. We expected that (with some minor delay), the activity reported would be recent, not an average of activity since start of cgroup. We expected the avg cpu usage returned to be relative to elapsed time and not only consider the cfs periods where there were runnable threads in the cgroup.
ACTUAL -
cgroup: /docker/fa278332b03db96757d3335f2d4b00649e9a830d8121fcf5d3a975da3b3565c6
0.6106482766666667
nr_periods 6
nr_throttled 0
throttled_time 0
0.22229851588235294
nr_periods 17
nr_throttled 0
throttled_time 0
Starting thread
0.1240554615625
nr_periods 32
nr_throttled 0
throttled_time 0
Starting thread
0.6591970347560976
nr_periods 82
nr_throttled 0
throttled_time 0
Starting thread
1.0
nr_periods 132
nr_throttled 0
throttled_time 0
Starting thread
1.0
nr_periods 182
nr_throttled 0
throttled_time 0
Done in thread
1.0
nr_periods 232
nr_throttled 6
throttled_time 18923556
Done in thread
1.0
nr_periods 282
nr_throttled 6
throttled_time 18923556
Done in thread
1.0
nr_periods 332
nr_throttled 6
throttled_time 18923556
Done in thread
1.0
nr_periods 382
nr_throttled 6
throttled_time 18923556
1.0
nr_periods 392
nr_throttled 6
throttled_time 18923556
1.0
nr_periods 406
nr_throttled 6
throttled_time 18923556
1.0
nr_periods 422
nr_throttled 6
throttled_time 18923556
1.0
nr_periods 432
nr_throttled 6
throttled_time 18923556
1.0
nr_periods 448
nr_throttled 6
throttled_time 18923556
1.0
nr_periods 464
nr_throttled 6
throttled_time 18923556
1.0
nr_periods 476
nr_throttled 6
throttled_time 18923556
1.0
nr_periods 492
nr_throttled 6
throttled_time 18923556
1.0
nr_periods 506
nr_throttled 6
throttled_time 18923556
1.0
nr_periods 513
nr_throttled 6
throttled_time 18923556
1.0
nr_periods 531
nr_throttled 6
throttled_time 18923556
1.0
nr_periods 542
nr_throttled 6
throttled_time 18923556
1.0
nr_periods 550
nr_throttled 6
throttled_time 18923556
1.0
nr_periods 565
nr_throttled 6
throttled_time 18923556
1.0
nr_periods 579
nr_throttled 6
throttled_time 18923556
1.0
nr_periods 593
nr_throttled 6
throttled_time 18923556
1.0
nr_periods 607
nr_throttled 6
throttled_time 18923556
1.0
nr_periods 622
nr_throttled 6
throttled_time 18923556
1.0
nr_periods 636
nr_throttled 6
throttled_time 18923556
1.0
nr_periods 648
nr_throttled 6
throttled_time 18923556
1.0
nr_periods 658
nr_throttled 6
throttled_time 18923556
1.0
nr_periods 674
nr_throttled 6
throttled_time 18923556
1.0
nr_periods 686
nr_throttled 6
throttled_time 18923556
1.0
nr_periods 701
nr_throttled 6
throttled_time 18923556
1.0
nr_periods 714
nr_throttled 6
throttled_time 18923556
1.0
nr_periods 722
nr_throttled 6
throttled_time 18923556
1.0
nr_periods 740
nr_throttled 6
throttled_time 18923556
1.0
nr_periods 751
nr_throttled 6
throttled_time 18923556
1.0
nr_periods 763
nr_throttled 6
throttled_time 18923556
1.0
nr_periods 781
nr_throttled 6
throttled_time 18923556
---------- BEGIN SOURCE ----------
import com.sun.management.OperatingSystemMXBean;
import java.io.IOException;
import java.lang.management.ManagementFactory;
import java.nio.file.FileSystems;
import java.nio.file.Files;
import java.util.List;
public class CpuLoad {
public static void main(String[] args) {
OperatingSystemMXBean osBean = (com.sun.management.OperatingSystemMXBean) ManagementFactory.getOperatingSystemMXBean();
String cgroup = findCgroup();
System.out.println("cgroup: " + cgroup);
int threadCount = args.length > 0 ? Integer.parseInt(args[0]) : 4;
boolean firstTime = true;
while (true) {
System.out.println(osBean.getCpuLoad());
dumpCpuStat(cgroup);
try {
Thread.sleep(5000);
} catch (InterruptedException e) {
throw new RuntimeException(e);
}
if (!firstTime && threadCount-- > 0) {
startThread();
}
firstTime = false;
}
}
private static void dumpCpuStat(String cgroup) {
try {
List<String> strings = Files.readAllLines(FileSystems.getDefault().getPath("/sys/fs/cgroup/cpu/" + cgroup, "cpu.stat"));
strings.forEach(s -> System.out.println(" " + s));
} catch (IOException e) {
e.printStackTrace();
}
}
private static String findCgroup() {
try {
List<String> strings = Files.readAllLines(FileSystems.getDefault().getPath("/proc/self/cgroup"));
return strings.stream().filter(s -> s.contains("cpuacct")).map(s -> s.split(":")[2]).findFirst().get();
} catch (IOException e) {
throw new RuntimeException(e);
}
}
private static void startThread() {
System.out.println("Starting thread");
new Thread(() -> {
long now = System.currentTimeMillis();
while (System.currentTimeMillis() < now + 20000) {
}
System.out.println("Done in thread");
}).start();
}
}
---------- END SOURCE ----------
CUSTOMER SUBMITTED WORKAROUND :
It is possible to grab the cgroup stats and do this calculation. At least a previous record of information should be kept to fix that the cpu load is recent and not since cgroup creation.
FREQUENCY : always