Skip to content
This repository was archived by the owner on Mar 11, 2019. It is now read-only.

Commit 544ff2c

Browse files
committed
refactoring(docker): use the cgroup statistics to retrieve the cpu usage of a docker container
The LinuxHelper used the docker-java api to retrieve the targets which belong to a container. Each target was monitored to obtain the power consumption of a container. We are now using the cgroup capabilities to obtain directly the stats at the container level and therefore reducing the runtime overhead.
1 parent 630e8e9 commit 544ff2c

File tree

8 files changed

+118
-70
lines changed

8 files changed

+118
-70
lines changed

README.md

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,6 @@ We all stand on the shoulders of giants and get by with a little help from our f
5757
* [Saddle](http://saddle.github.io/) (version 1.3.4 under [Apache 2 license](http://www.apache.org/licenses/LICENSE-2.0)), for data manipulation.
5858
* [Sigar](https://support.hyperic.com/display/SIGAR/Home) (version 1.6.5 under [Apache 2 license](http://www.apache.org/licenses/LICENSE-2.0)), for providing a portable interface for gathering system information.
5959
* [spray-json](http://spray.io/) (version 1.3.2 under [Apache 2 license](http://www.apache.org/licenses/LICENSE-2.0)), for (de)serializing JSON.
60-
* [docker-java](https://github.com/docker-java/docker-java) (version 2.1.4 under [Apache 2 license](http://www.apache.org/licenses/LICENSE-2.0)), for using the JAVA Docker API.
6160
* [scala-influxdb-client](https://github.com/paulgoldbaum/scala-influxdb-client) (version 0.4.5 under [MIT license](https://github.com/paulgoldbaum/scala-influxdb-client/blob/master/LICENSE)), for using an asynchronous scala API for InfluxDB.
6261

6362
# License

powerapi-core/build.sbt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@ libraryDependencies ++= Seq(
2323
"org.hyperic" % "sigar" % "1.6.5.132",
2424
"net.java.dev.jna" % "jna" % "4.2.1",
2525
"io.spray" %% "spray-json" % "1.3.2",
26-
"com.github.docker-java" % "docker-java" % "2.1.4",
2726
"com.paulgoldbaum" %% "scala-influxdb-client" % "0.4.5"
2827
)
2928

powerapi-core/src/main/scala/org/powerapi/core/OSHelper.scala

Lines changed: 50 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,9 @@ package org.powerapi.core
2525
import java.io.{File, IOException}
2626

2727
import scala.collection.JavaConversions._
28+
import scala.io.Source
2829
import scala.sys.process.stringSeqToProcess
2930

30-
import com.github.dockerjava.core.DockerClientBuilder
3131
import com.typesafe.config.Config
3232
import org.apache.logging.log4j.LogManager
3333
import org.hyperic.sigar.ptql.ProcessFinder
@@ -106,6 +106,13 @@ trait OSHelper {
106106
*/
107107
def getProcessCpuTime(process: Process): Long
108108

109+
/**
110+
* Get the docker container execution time on the cpu.
111+
*
112+
* @param container targeted docker container
113+
*/
114+
def getDockerContainerCpuTime(container: Container): Long
115+
109116
/**
110117
* Get the global execution times for the cpu.
111118
*/
@@ -122,8 +129,9 @@ trait OSHelper {
122129
def getTargetCpuTime(target: Target): Long = {
123130
target match {
124131
case process: Process => getProcessCpuTime(process)
125-
case wrapper: Target if wrapper.isInstanceOf[Application] || wrapper.isInstanceOf[Container] =>
126-
getProcesses(wrapper).toSeq.map(process => getProcessCpuTime(process)).sum
132+
case container: Container => getDockerContainerCpuTime(container)
133+
case application: Application =>
134+
getProcesses(application).toSeq.map(process => getProcessCpuTime(process)).sum
127135
case _ => 0L
128136
}
129137
}
@@ -253,14 +261,31 @@ class LinuxHelper extends Configuration(None) with OSHelper {
253261
case ConfigValue(values) => values
254262
case _ => Map()
255263
}
256-
lazy val docker = DockerClientBuilder.getInstance("unix:///var/run/docker.sock").build()
264+
/**
265+
* Mount path.
266+
*/
267+
lazy val mountsPath = load {
268+
_.getString("powerapi.procfs.mounts-path")
269+
} match {
270+
case ConfigValue(p) => p
271+
case _ => "/proc/mounts"
272+
}
273+
257274
private val log = LogManager.getLogger
258275
private val PSFormat = """^\s*(\d+)\s*""".r
259276
private val GlobalStatFormat = """cpu\s+([\d\s]+)""".r
260277
private val TimeInStateFormat = """(\d+)\s+(\d+)""".r
261-
private val procUserTimeIndex = 13
262-
private val procSysTimeIndex = 14
263-
private val procGlobalIdleTime = 3
278+
private val MountFormat = """^.+\s+(.+)\s+(.+)\s+(.+)\s+.+\s+.+$""".r
279+
280+
/**
281+
* Returns the mount path of a given cgroup if it exists.
282+
*/
283+
def cgroupMntPoint(name: String): Option[String] = {
284+
Source.fromFile(mountsPath).getLines().collectFirst {
285+
case MountFormat(mnt, typ, tokens) if typ == "cgroup" && tokens.contains(name) =>
286+
mnt
287+
}
288+
}
264289

265290
def getCPUFrequencies: Set[Long] = {
266291
(for (index <- topology.values.flatten) yield {
@@ -281,10 +306,6 @@ class LinuxHelper extends Configuration(None) with OSHelper {
281306
Seq("ps", "-C", app.name, "-o", "pid", "--no-headers").lineStream_!.map {
282307
case PSFormat(pid) => Process(pid.toInt)
283308
}.toSet
284-
case cont: Container =>
285-
docker.topContainerCmd(cont.id).withPsArgs("-Aopid").exec.getProcesses.flatten.map(
286-
process => Process(process.toInt)
287-
).toSet
288309
case process: Process =>
289310
Set(process)
290311
case _ =>
@@ -310,7 +331,7 @@ class LinuxHelper extends Configuration(None) with OSHelper {
310331

311332
val statLine = source.getLines.toIndexedSeq(0).split("\\s")
312333
// User time + System time
313-
statLine(procUserTimeIndex).toLong + statLine(procSysTimeIndex).toLong
334+
statLine(13).toLong + statLine(14).toLong
314335
})
315336
}
316337
catch {
@@ -320,6 +341,20 @@ class LinuxHelper extends Configuration(None) with OSHelper {
320341
}
321342
}
322343

344+
/**
345+
* Use the cpuacct cgroup to retrieve the overall cpu statistics for a docker container.
346+
*/
347+
def getDockerContainerCpuTime(container: Container): Long = {
348+
// TODO: Could be also replaced by a direct call to the Docker API with stream = 0. The main benefit would be to handle a distant docker server.
349+
cgroupMntPoint("cpuacct") match {
350+
case Some(path) if new File(s"$path/docker/${container.id}").isDirectory =>
351+
Source.fromFile(s"$path/docker/${container.id}/cpuacct.stat").getLines().map(_.split("\\s")(1).toLong).sum
352+
case _ =>
353+
log.warn("i/o exception, cpuacct cgroup not mounted for the container {}", s"${container.id}")
354+
0l
355+
}
356+
}
357+
323358
def getGlobalCpuTimes: GlobalCpuTimes = {
324359
try {
325360
using(globalStatPath)(source => {
@@ -332,7 +367,7 @@ class LinuxHelper extends Configuration(None) with OSHelper {
332367
* @see http://lxr.free-electrons.com/source/kernel/sched/cputime.c#L165
333368
*/
334369
case GlobalStatFormat(times) =>
335-
val idleTime = times.split("\\s")(procGlobalIdleTime).toLong
370+
val idleTime = times.split("\\s")(3).toLong
336371
val activeTime = times.split("\\s").slice(0, 8).map(_.toLong).sum - idleTime
337372

338373
GlobalCpuTimes(idleTime, activeTime)
@@ -503,6 +538,8 @@ class SigarHelper(sigar: SigarProxy) extends OSHelper {
503538
}
504539
}
505540

541+
def getDockerContainerCpuTime(container: Container): Long = throw new SigarException("not yet implemented with sigar")
542+
506543
def getGlobalCpuTimes: GlobalCpuTimes = {
507544
try {
508545
val idleTime = sigar.getCpu.getIdle

powerapi-core/src/main/scala/org/powerapi/core/target/Target.scala

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,9 +56,10 @@ case class Application(name: String) extends Target {
5656
* Monitoring targets for a specific container.
5757
*
5858
* @param id id of the container.
59+
* @param name name of the container.
5960
* @author <a href="mailto:l.huertas.pro@gmail.com">Loïc Huertas</a>
6061
*/
61-
case class Container(id: String) extends Target {
62+
case class Container(id: String, name: String = "unknown") extends Target {
6263
override def toString: String = id
6364
}
6465

powerapi-core/src/test/resources/configuration-suite.conf

Lines changed: 0 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -116,52 +116,3 @@ rapl.supported-architectures = [
116116
{id = 63, model = "Haswell-EP"}
117117
{id = 61, model = "Broadwell"}
118118
]
119-
120-
121-
powerapi.disk.formulae = [
122-
{ name = "sda",
123-
models = {
124-
read = [
125-
{ condition = "<= 78966784", coeffs = [ 0.00, 1.01e-07 ] }
126-
{ condition = "> 78966784", coeffs = [ 7.62, 1.72e-10 ] }
127-
],
128-
write = [
129-
{ condition = "<= 66674688", coeffs = [ 0.00, 1.13e-07 ] }
130-
{ condition = "> 66674688", coeffs = [ 8.33, 1.79e-09 ] }
131-
]
132-
}
133-
}
134-
135-
{ name = "sdb",
136-
models = {
137-
read = [
138-
{ condition = "<= 10", coeffs = [ 0.00, 0.003 ] }
139-
{ condition = "> 10", coeffs = [ 2, 0.15 ] }
140-
],
141-
write = [
142-
{ condition = "<= 5", coeffs = [ 0.00, 0.001 ] }
143-
{ condition = "> 5", coeffs = [ 3, 0.25 ] }
144-
]
145-
}
146-
}
147-
]
148-
powerapi.disk.interval = 1s
149-
150-
disk-test {
151-
powerapi.disk.formulae = [
152-
{ name = "sdb",
153-
models = {
154-
read = [
155-
{ condition = "<= 100", coeffs = [ 0.00, 0.0013 ] }
156-
{ condition = "> 100", coeffs = [ 2.13, 0.28 ] }
157-
],
158-
write = [
159-
{ condition = "<= 1000", coeffs = [ 0.00, 0.0016 ] }
160-
{ condition = "> 1000", coeffs = [ 3.15, 0.40 ] }
161-
]
162-
}
163-
}
164-
]
165-
166-
powerapi.disk.interval = 250ms
167-
}
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
rootfs / rootfs rw 0 0
2+
sysfs /sys sysfs rw,nosuid,nodev,noexec,relatime 0 0
3+
proc /proc proc rw,nosuid,nodev,noexec,relatime 0 0
4+
udev /dev devtmpfs rw,relatime,size=8196944k,nr_inodes=2049236,mode=755 0 0
5+
devpts /dev/pts devpts rw,nosuid,noexec,relatime,gid=5,mode=620,ptmxmode=000 0 0
6+
tmpfs /run tmpfs rw,nosuid,noexec,relatime,size=1641624k,mode=755 0 0
7+
/dev/disk/by-uuid/8141fc2e-ec4b-4475-a334-91678f8f1fe9 / ext4 rw,relatime,errors=remount-ro,data=ordered 0 0
8+
none /sys/fs/cgroup tmpfs rw,relatime,size=4k,mode=755 0 0
9+
none /sys/fs/fuse/connections fusectl rw,relatime 0 0
10+
none /sys/kernel/debug debugfs rw,relatime 0 0
11+
none /sys/kernel/security securityfs rw,relatime 0 0
12+
none /run/lock tmpfs rw,nosuid,nodev,noexec,relatime,size=5120k 0 0
13+
none /run/shm tmpfs rw,nosuid,nodev,relatime 0 0
14+
cgroup /sys/fs/cgroup/cpuset cgroup rw,relatime,cpuset 0 0
15+
none /run/user tmpfs rw,nosuid,nodev,noexec,relatime,size=102400k,mode=755 0 0
16+
none /sys/fs/pstore pstore rw,relatime 0 0
17+
cgroup /sys/fs/cgroup/cpu cgroup rw,relatime,cpu 0 0
18+
cgroup /sys/fs/cgroup/cpuacct cgroup rw,relatime,cpuacct 0 0
19+
cgroup /sys/fs/cgroup/memory cgroup rw,relatime,memory 0 0
20+
cgroup /sys/fs/cgroup/devices cgroup rw,relatime,devices 0 0
21+
cgroup /sys/fs/cgroup/freezer cgroup rw,relatime,freezer 0 0
22+
cgroup /sys/fs/cgroup/blkio cgroup rw,relatime,blkio 0 0
23+
cgroup /sys/fs/cgroup/perf_event cgroup rw,relatime,perf_event 0 0
24+
cgroup /sys/fs/cgroup/hugetlb cgroup rw,relatime,hugetlb 0 0
25+
systemd /sys/fs/cgroup/systemd cgroup rw,nosuid,nodev,noexec,relatime,name=systemd 0 0
26+
/dev/disk/by-uuid/8141fc2e-ec4b-4475-a334-91678f8f1fe9 /var/lib/docker/aufs ext4 rw,relatime,errors=remount-ro,data=ordered 0 0
27+
gvfsd-fuse /run/user/1000/gvfs fuse.gvfsd-fuse rw,nosuid,nodev,relatime,user_id=1000,group_id=1000 0 0
28+
none /var/lib/docker/aufs/mnt/797e80d132c3c82ab9891574a358d4614097d1315945441a0bc04a713c490421 aufs rw,relatime,si=6b8a6d0dfd2c1c66,dio 0 0
29+
shm /var/lib/docker/containers/797e80d132c3c82ab9891574a358d4614097d1315945441a0bc04a713c490421/shm tmpfs rw,nosuid,nodev,noexec,relatime,size=65536k 0 0
30+
mqueue /var/lib/docker/containers/797e80d132c3c82ab9891574a358d4614097d1315945441a0bc04a713c490421/mqueue mqueue rw,nosuid,nodev,noexec,relatime 0 0
31+
proc /run/docker/netns/628d2274152b proc rw,nosuid,nodev,noexec,relatime 0 0
32+
none /var/lib/docker/aufs/mnt/b20b6f81720ba6686b36d8a43727025cd239bfda45203a7710851fea853bda4c aufs rw,relatime,si=6b8a6d0d66fa0c66,dio 0 0
33+
shm /var/lib/docker/containers/b20b6f81720ba6686b36d8a43727025cd239bfda45203a7710851fea853bda4c/shm tmpfs rw,nosuid,nodev,noexec,relatime,size=65536k 0 0
34+
mqueue /var/lib/docker/containers/b20b6f81720ba6686b36d8a43727025cd239bfda45203a7710851fea853bda4c/mqueue mqueue rw,nosuid,nodev,noexec,relatime 0 0
35+
proc /run/docker/netns/2bff8e7c00b5 proc rw,nosuid,nodev,noexec,relatime 0 0
36+
proc /run/docker/netns/default proc rw,nosuid,nodev,noexec,relatime 0 0
37+
none /var/lib/docker/aufs/mnt/98f216c23b2f3f643577b18660825144a69fa5af141cc05e76e8c1c2b4f21658 aufs rw,relatime,si=6b8a6d0ac968fc66,dio 0 0
38+
shm /var/lib/docker/containers/98f216c23b2f3f643577b18660825144a69fa5af141cc05e76e8c1c2b4f21658/shm tmpfs rw,nosuid,nodev,noexec,relatime,size=65536k 0 0
39+
mqueue /var/lib/docker/containers/98f216c23b2f3f643577b18660825144a69fa5af141cc05e76e8c1c2b4f21658/mqueue mqueue rw,nosuid,nodev,noexec,relatime 0 0
40+
proc /run/docker/netns/a8ec38ebc3f0 proc rw,nosuid,nodev,noexec,relatime 0 0
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
user 2502902
2+
system 277405

powerapi-core/src/test/scala/org/powerapi/core/OSHelperSuite.scala

Lines changed: 24 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -62,16 +62,17 @@ class OSHelperSuite extends UnitTest {
6262
def getProcessCpuTime(process: Process): Long = process match {
6363
case Process(1) => 10
6464
case Process(2) => 11
65-
case Process(3) => 20
66-
case Process(4) => 21
6765
case Process(10) => 30
6866
}
6967

68+
def getDockerContainerCpuTime(container: Container): Long = container match {
69+
case Container("abcd", "n") => 20 + 21
70+
}
71+
7072
def getGlobalCpuTimes: GlobalCpuTimes = ???
7173

7274
def getProcesses(target: Target): Set[Process] = target match {
7375
case Application("firefox") => Set(1, 2)
74-
case Container("docker") => Set(3, 4)
7576
case Process(10) => Set(10)
7677
}
7778

@@ -81,11 +82,10 @@ class OSHelperSuite extends UnitTest {
8182
}
8283

8384
helper.getProcesses(Application("firefox")) should equal(Set(Process(1), Process(2)))
84-
helper.getProcesses(Container("docker")) should equal(Set(Process(3), Process(4)))
8585
helper.getProcesses(Process(10)) should equal(Set(Process(10)))
8686
helper.getTargetCpuTime(Process(10)) should equal(30)
8787
helper.getTargetCpuTime(Application("firefox")) should equal(10 + 11)
88-
helper.getTargetCpuTime(Container("docker")) should equal(20 + 21)
88+
helper.getTargetCpuTime(Container("abcd", "n")) should equal(20 + 21)
8989
helper.getTargetCpuTime(All) should equal(0)
9090
helper.getAllDirectories(new File(s"${basepath}/sys/fs/cgroup/blkio")) should contain theSameElementsAs Seq(
9191
new File(s"${basepath}/sys/fs/cgroup/blkio/powerapi"),
@@ -106,6 +106,7 @@ class OSHelperSuite extends UnitTest {
106106
linuxHelper.cgroupSysFSPath should equal("p6")
107107
linuxHelper.diskStatPath should equal("p7")
108108
linuxHelper.topology should equal(Map(0 -> Set(0, 4), 1 -> Set(1, 5), 2 -> Set(2, 6), 3 -> Set(3, 7)))
109+
linuxHelper.mountsPath should equal("p1/mounts")
109110
}
110111

111112
it should "return the list of available frequencies" in {
@@ -134,6 +135,24 @@ class OSHelperSuite extends UnitTest {
134135
helper.getProcessCpuTime(10) should equal(0)
135136
}
136137

138+
it should "return the cgroup mount point if it exists" in {
139+
val helper = new LinuxHelper {
140+
override lazy val mountsPath = s"${basepath}proc/mounts"
141+
}
142+
143+
helper.cgroupMntPoint("cpuacct") should equal (Some("/sys/fs/cgroup/cpuacct"))
144+
helper.cgroupMntPoint("test") should equal (None)
145+
}
146+
147+
it should "return the cpu time of a given docker container" in {
148+
val helper = new LinuxHelper {
149+
override def cgroupMntPoint(name: String): Option[String] = Some(s"${basepath}sys/fs/cgroup/cpuacct")
150+
}
151+
152+
helper.getDockerContainerCpuTime(Container("abcd", "n")) should equal(2502902 + 277405)
153+
helper.getDockerContainerCpuTime(Container("test", "n2")) should equal(0)
154+
}
155+
137156
it should "return the global cpu time" in {
138157
val helper = new LinuxHelper {
139158
override lazy val globalStatPath = s"${basepath}proc/stat"

0 commit comments

Comments
 (0)