Update godeps

This commit is contained in:
Manuel de Brito Fontes 2016-07-11 23:42:47 -04:00
parent 8b25cc67a5
commit a736fba0e1
769 changed files with 15495 additions and 7996 deletions

View file

@ -9,7 +9,7 @@ import (
)
type Manager interface {
// Apply cgroup configuration to the process with the specified pid
// Applies cgroup configuration to the process with the specified pid
Apply(pid int) error
// Returns the PIDs inside the cgroup set

View file

@ -14,6 +14,7 @@ import (
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/configs"
libcontainerUtils "github.com/opencontainers/runc/libcontainer/utils"
)
var (
@ -30,6 +31,7 @@ var (
&NetPrioGroup{},
&PerfEventGroup{},
&FreezerGroup{},
&NameGroup{GroupName: "name=systemd", Join: true},
}
CgroupProcesses = "cgroup.procs"
HugePageSizes, _ = cgroups.GetHugePageSize()
@ -94,11 +96,10 @@ func getCgroupRoot() (string, error) {
}
type cgroupData struct {
root string
parent string
name string
config *configs.Cgroup
pid int
root string
innerPath string
config *configs.Cgroup
pid int
}
func (m *Manager) Apply(pid int) (err error) {
@ -129,12 +130,9 @@ func (m *Manager) Apply(pid int) (err error) {
return cgroups.EnterPid(m.Paths, pid)
}
m.mu.Lock()
defer m.mu.Unlock()
paths := make(map[string]string)
defer func() {
if err != nil {
cgroups.RemovePaths(paths)
}
}()
for _, sys := range subsystems {
if err := sys.Apply(d); err != nil {
return err
@ -144,7 +142,9 @@ func (m *Manager) Apply(pid int) (err error) {
// created then join consists of writing the process pids to cgroup.procs
p, err := d.path(sys.Name())
if err != nil {
if cgroups.IsNotFound(err) {
// The non-presence of the devices subsystem is
// considered fatal for security reasons.
if cgroups.IsNotFound(err) && sys.Name() != "devices" {
continue
}
return err
@ -267,45 +267,31 @@ func getCgroupPath(c *configs.Cgroup) (string, error) {
return d.path("devices")
}
// pathClean makes a path safe for use with filepath.Join. This is done by not
// only cleaning the path, but also (if the path is relative) adding a leading
// '/' and cleaning it (then removing the leading '/'). This ensures that a
// path resulting from prepending another path will always resolve to lexically
// be a subdirectory of the prefixed path. This is all done lexically, so paths
// that include symlinks won't be safe as a result of using pathClean.
func pathClean(path string) string {
// Ensure that all paths are cleaned (especially problematic ones like
// "/../../../../../" which can cause lots of issues).
path = filepath.Clean(path)
// If the path isn't absolute, we need to do more processing to fix paths
// such as "../../../../<etc>/some/path". We also shouldn't convert absolute
// paths to relative ones.
if !filepath.IsAbs(path) {
path = filepath.Clean(string(os.PathSeparator) + path)
// This can't fail, as (by definition) all paths are relative to root.
path, _ = filepath.Rel(string(os.PathSeparator), path)
}
// Clean the path again for good measure.
return filepath.Clean(path)
}
func getCgroupData(c *configs.Cgroup, pid int) (*cgroupData, error) {
root, err := getCgroupRoot()
if err != nil {
return nil, err
}
// Clean the parent slice path.
c.Parent = pathClean(c.Parent)
if (c.Name != "" || c.Parent != "") && c.Path != "" {
return nil, fmt.Errorf("cgroup: either Path or Name and Parent should be used")
}
// XXX: Do not remove this code. Path safety is important! -- cyphar
cgPath := libcontainerUtils.CleanPath(c.Path)
cgParent := libcontainerUtils.CleanPath(c.Parent)
cgName := libcontainerUtils.CleanPath(c.Name)
innerPath := cgPath
if innerPath == "" {
innerPath = filepath.Join(cgParent, cgName)
}
return &cgroupData{
root: root,
parent: c.Parent,
name: c.Name,
config: c,
pid: pid,
root: root,
innerPath: innerPath,
config: c,
pid: pid,
}, nil
}
@ -333,11 +319,10 @@ func (raw *cgroupData) path(subsystem string) (string, error) {
return "", err
}
cgPath := filepath.Join(raw.parent, raw.name)
// If the cgroup name/path is absolute do not look relative to the cgroup of the init process.
if filepath.IsAbs(cgPath) {
if filepath.IsAbs(raw.innerPath) {
// Sometimes subsystems can be mounted togethger as 'cpu,cpuacct'.
return filepath.Join(raw.root, filepath.Base(mnt), cgPath), nil
return filepath.Join(raw.root, filepath.Base(mnt), raw.innerPath), nil
}
parentPath, err := raw.parentPath(subsystem, mnt, root)
@ -345,7 +330,7 @@ func (raw *cgroupData) path(subsystem string) (string, error) {
return "", err
}
return filepath.Join(parentPath, cgPath), nil
return filepath.Join(parentPath, raw.innerPath), nil
}
func (raw *cgroupData) join(subsystem string) (string, error) {
@ -366,9 +351,12 @@ func writeFile(dir, file, data string) error {
// Normally dir should not be empty, one case is that cgroup subsystem
// is not mounted, we will get empty dir, and we want it fail here.
if dir == "" {
return fmt.Errorf("no such directory for %s.", file)
return fmt.Errorf("no such directory for %s", file)
}
return ioutil.WriteFile(filepath.Join(dir, file), []byte(data), 0700)
if err := ioutil.WriteFile(filepath.Join(dir, file), []byte(data), 0700); err != nil {
return fmt.Errorf("failed to write %v to %v: %v", data, file, err)
}
return nil
}
func readFile(dir, file string) (string, error) {

View file

@ -12,6 +12,7 @@ import (
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/configs"
libcontainerUtils "github.com/opencontainers/runc/libcontainer/utils"
)
type CpusetGroup struct {
@ -88,7 +89,7 @@ func (s *CpusetGroup) getSubsystemSettings(parent string) (cpus []byte, mems []b
// it's parent.
func (s *CpusetGroup) ensureParent(current, root string) error {
parent := filepath.Dir(current)
if filepath.Clean(parent) == root {
if libcontainerUtils.CleanPath(parent) == root {
return nil
}
// Avoid infinite recursion.

View file

@ -5,6 +5,7 @@ package fs
import (
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/system"
)
type DevicesGroup struct {
@ -25,6 +26,23 @@ func (s *DevicesGroup) Apply(d *cgroupData) error {
}
func (s *DevicesGroup) Set(path string, cgroup *configs.Cgroup) error {
if system.RunningInUserNS() {
return nil
}
devices := cgroup.Resources.Devices
if len(devices) > 0 {
for _, dev := range devices {
file := "devices.deny"
if dev.Allow {
file = "devices.allow"
}
if err := writeFile(path, file, dev.CgroupString()); err != nil {
return err
}
}
return nil
}
if !cgroup.Resources.AllowAllDevices {
if err := writeFile(path, "devices.deny", "a"); err != nil {
return err

View file

@ -9,6 +9,7 @@ import (
"path/filepath"
"strconv"
"strings"
"time"
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/configs"
@ -26,38 +27,75 @@ func (s *MemoryGroup) Apply(d *cgroupData) (err error) {
if err != nil && !cgroups.IsNotFound(err) {
return err
}
if memoryAssigned(d.config) {
if path != "" {
if err := os.MkdirAll(path, 0755); err != nil {
return err
}
}
// We have to set kernel memory here, as we can't change it once
// processes have been attached.
if err := s.SetKernelMemory(path, d.config); err != nil {
return err
}
// reset error.
err = nil
if path == "" {
// Invalid input.
return fmt.Errorf("invalid path for memory cgroups: %+v", d)
}
defer func() {
if err != nil {
os.RemoveAll(path)
}
}()
if !cgroups.PathExists(path) {
if err = os.MkdirAll(path, 0755); err != nil {
return err
}
}
if memoryAssigned(d.config) {
// We have to set kernel memory here, as we can't change it once
// processes have been attached to the cgroup.
if err = s.SetKernelMemory(path, d.config); err != nil {
return err
}
}
// We need to join memory cgroup after set memory limits, because
// kmem.limit_in_bytes can only be set when the cgroup is empty.
_, err = d.join("memory")
if err != nil && !cgroups.IsNotFound(err) {
if _, jerr := d.join("memory"); jerr != nil && !cgroups.IsNotFound(jerr) {
err = jerr
return err
}
return nil
}
func getModifyTime(path string) (time.Time, error) {
stat, err := os.Stat(path)
if err != nil {
return time.Time{}, fmt.Errorf("failed to get memory cgroups creation time: %v", err)
}
return stat.ModTime(), nil
}
func (s *MemoryGroup) SetKernelMemory(path string, cgroup *configs.Cgroup) error {
// This has to be done separately because it has special constraints (it
// can't be done after there are processes attached to the cgroup).
if cgroup.Resources.KernelMemory > 0 {
// This has to be done separately because it has special
// constraints (it can only be initialized before setting up a
// hierarchy or adding a task to the cgroups. However, if
// sucessfully initialized, it can be updated anytime afterwards)
if cgroup.Resources.KernelMemory != 0 {
// Is kmem.limit_in_bytes already set?
// memory.kmem.max_usage_in_bytes is a read-only file. Use it to get cgroups creation time.
kmemCreationTime, err := getModifyTime(filepath.Join(path, "memory.kmem.max_usage_in_bytes"))
if err != nil {
return err
}
kmemLimitsUpdateTime, err := getModifyTime(filepath.Join(path, "memory.kmem.limit_in_bytes"))
if err != nil {
return err
}
// kmem.limit_in_bytes has already been set if its update time is after that of creation time.
// We use `!=` op instead of `>` because updates are losing precision compared to creation.
kmemInitialized := !kmemLimitsUpdateTime.Equal(kmemCreationTime)
if !kmemInitialized {
// If there's already tasks in the cgroup, we can't change the limit either
tasks, err := getCgroupParamString(path, "tasks")
if err != nil {
return err
}
if tasks != "" {
return fmt.Errorf("cannot set kmem.limit_in_bytes after task have joined this cgroup")
}
}
if err := writeFile(path, "memory.kmem.limit_in_bytes", strconv.FormatInt(cgroup.Resources.KernelMemory, 10)); err != nil {
return err
}
@ -65,19 +103,65 @@ func (s *MemoryGroup) SetKernelMemory(path string, cgroup *configs.Cgroup) error
return nil
}
func (s *MemoryGroup) Set(path string, cgroup *configs.Cgroup) error {
if cgroup.Resources.Memory != 0 {
if err := writeFile(path, "memory.limit_in_bytes", strconv.FormatInt(cgroup.Resources.Memory, 10)); err != nil {
func setMemoryAndSwap(path string, cgroup *configs.Cgroup) error {
// When memory and swap memory are both set, we need to handle the cases
// for updating container.
if cgroup.Resources.Memory != 0 && cgroup.Resources.MemorySwap > 0 {
memoryUsage, err := getMemoryData(path, "")
if err != nil {
return err
}
// When update memory limit, we should adapt the write sequence
// for memory and swap memory, so it won't fail because the new
// value and the old value don't fit kernel's validation.
if memoryUsage.Limit < uint64(cgroup.Resources.MemorySwap) {
if err := writeFile(path, "memory.memsw.limit_in_bytes", strconv.FormatInt(cgroup.Resources.MemorySwap, 10)); err != nil {
return err
}
if err := writeFile(path, "memory.limit_in_bytes", strconv.FormatInt(cgroup.Resources.Memory, 10)); err != nil {
return err
}
} else {
if err := writeFile(path, "memory.limit_in_bytes", strconv.FormatInt(cgroup.Resources.Memory, 10)); err != nil {
return err
}
if err := writeFile(path, "memory.memsw.limit_in_bytes", strconv.FormatInt(cgroup.Resources.MemorySwap, 10)); err != nil {
return err
}
}
} else {
if cgroup.Resources.Memory != 0 {
if err := writeFile(path, "memory.limit_in_bytes", strconv.FormatInt(cgroup.Resources.Memory, 10)); err != nil {
return err
}
}
if cgroup.Resources.MemorySwap > 0 {
if err := writeFile(path, "memory.memsw.limit_in_bytes", strconv.FormatInt(cgroup.Resources.MemorySwap, 10)); err != nil {
return err
}
}
}
return nil
}
func (s *MemoryGroup) Set(path string, cgroup *configs.Cgroup) error {
if err := setMemoryAndSwap(path, cgroup); err != nil {
return err
}
if err := s.SetKernelMemory(path, cgroup); err != nil {
return err
}
if cgroup.Resources.MemoryReservation != 0 {
if err := writeFile(path, "memory.soft_limit_in_bytes", strconv.FormatInt(cgroup.Resources.MemoryReservation, 10)); err != nil {
return err
}
}
if cgroup.Resources.MemorySwap > 0 {
if err := writeFile(path, "memory.memsw.limit_in_bytes", strconv.FormatInt(cgroup.Resources.MemorySwap, 10)); err != nil {
if cgroup.Resources.KernelMemoryTCP != 0 {
if err := writeFile(path, "memory.kmem.tcp.limit_in_bytes", strconv.FormatInt(cgroup.Resources.KernelMemoryTCP, 10)); err != nil {
return err
}
}
@ -86,14 +170,14 @@ func (s *MemoryGroup) Set(path string, cgroup *configs.Cgroup) error {
return err
}
}
if cgroup.Resources.MemorySwappiness >= 0 && cgroup.Resources.MemorySwappiness <= 100 {
if err := writeFile(path, "memory.swappiness", strconv.FormatInt(cgroup.Resources.MemorySwappiness, 10)); err != nil {
if cgroup.Resources.MemorySwappiness == nil || int64(*cgroup.Resources.MemorySwappiness) == -1 {
return nil
} else if int64(*cgroup.Resources.MemorySwappiness) >= 0 && int64(*cgroup.Resources.MemorySwappiness) <= 100 {
if err := writeFile(path, "memory.swappiness", strconv.FormatInt(*cgroup.Resources.MemorySwappiness, 10)); err != nil {
return err
}
} else if cgroup.Resources.MemorySwappiness == -1 {
return nil
} else {
return fmt.Errorf("invalid value:%d. valid memory swappiness range is 0-100", cgroup.Resources.MemorySwappiness)
return fmt.Errorf("invalid value:%d. valid memory swappiness range is 0-100", int64(*cgroup.Resources.MemorySwappiness))
}
return nil
@ -139,6 +223,11 @@ func (s *MemoryGroup) GetStats(path string, stats *cgroups.Stats) error {
return err
}
stats.MemoryStats.KernelUsage = kernelUsage
kernelTCPUsage, err := getMemoryData(path, "kmem.tcp")
if err != nil {
return err
}
stats.MemoryStats.KernelTCPUsage = kernelTCPUsage
return nil
}
@ -148,8 +237,9 @@ func memoryAssigned(cgroup *configs.Cgroup) bool {
cgroup.Resources.MemoryReservation != 0 ||
cgroup.Resources.MemorySwap > 0 ||
cgroup.Resources.KernelMemory > 0 ||
cgroup.Resources.KernelMemoryTCP > 0 ||
cgroup.Resources.OomKillDisable ||
cgroup.Resources.MemorySwappiness != -1
(cgroup.Resources.MemorySwappiness != nil && *cgroup.Resources.MemorySwappiness != -1)
}
func getMemoryData(path, name string) (cgroups.MemoryData, error) {
@ -162,6 +252,7 @@ func getMemoryData(path, name string) (cgroups.MemoryData, error) {
usage := strings.Join([]string{moduleName, "usage_in_bytes"}, ".")
maxUsage := strings.Join([]string{moduleName, "max_usage_in_bytes"}, ".")
failcnt := strings.Join([]string{moduleName, "failcnt"}, ".")
limit := strings.Join([]string{moduleName, "limit_in_bytes"}, ".")
value, err := getCgroupParamUint(path, usage)
if err != nil {
@ -187,6 +278,14 @@ func getMemoryData(path, name string) (cgroups.MemoryData, error) {
return cgroups.MemoryData{}, fmt.Errorf("failed to parse %s - %v", failcnt, err)
}
memoryData.Failcnt = value
value, err = getCgroupParamUint(path, limit)
if err != nil {
if moduleName != "memory" && os.IsNotExist(err) {
return cgroups.MemoryData{}, nil
}
return cgroups.MemoryData{}, fmt.Errorf("failed to parse %s - %v", limit, err)
}
memoryData.Limit = value
return memoryData, nil
}

View file

@ -9,6 +9,7 @@ import (
type NameGroup struct {
GroupName string
Join bool
}
func (s *NameGroup) Name() string {
@ -16,6 +17,10 @@ func (s *NameGroup) Name() string {
}
func (s *NameGroup) Apply(d *cgroupData) error {
if s.Join {
// ignore errors if the named cgroup does not exist
d.join(s.GroupName)
}
return nil
}
@ -24,6 +29,9 @@ func (s *NameGroup) Set(path string, cgroup *configs.Cgroup) error {
}
func (s *NameGroup) Remove(d *cgroupData) error {
if s.Join {
removePath(d.path(s.GroupName))
}
return nil
}

View file

@ -4,6 +4,7 @@ package fs
import (
"fmt"
"path/filepath"
"strconv"
"github.com/opencontainers/runc/libcontainer/cgroups"
@ -47,11 +48,26 @@ func (s *PidsGroup) Remove(d *cgroupData) error {
}
func (s *PidsGroup) GetStats(path string, stats *cgroups.Stats) error {
value, err := getCgroupParamUint(path, "pids.current")
current, err := getCgroupParamUint(path, "pids.current")
if err != nil {
return fmt.Errorf("failed to parse pids.current - %s", err)
}
stats.PidsStats.Current = value
maxString, err := getCgroupParamString(path, "pids.max")
if err != nil {
return fmt.Errorf("failed to parse pids.max - %s", err)
}
// Default if pids.max == "max" is 0 -- which represents "no limit".
var max uint64
if maxString != "max" {
max, err = parseUint(maxString, 10, 64)
if err != nil {
return fmt.Errorf("failed to parse pids.max - unable to parse %q as a uint from Cgroup file %q", maxString, filepath.Join(path, "pids.max"))
}
}
stats.PidsStats.Current = current
stats.PidsStats.Limit = max
return nil
}

View file

@ -12,7 +12,6 @@ import (
)
var (
ErrNotSupportStat = errors.New("stats are not supported for subsystem")
ErrNotValidFormat = errors.New("line is not a valid key value format")
)

View file

@ -11,6 +11,7 @@ type ThrottlingData struct {
ThrottledTime uint64 `json:"throttled_time,omitempty"`
}
// CpuUsage denotes the usage of a CPU.
// All CPU stats are aggregate since container inception.
type CpuUsage struct {
// Total CPU time consumed.
@ -36,7 +37,9 @@ type MemoryData struct {
Usage uint64 `json:"usage,omitempty"`
MaxUsage uint64 `json:"max_usage,omitempty"`
Failcnt uint64 `json:"failcnt"`
Limit uint64 `json:"limit"`
}
type MemoryStats struct {
// memory used for cache
Cache uint64 `json:"cache,omitempty"`
@ -44,14 +47,19 @@ type MemoryStats struct {
Usage MemoryData `json:"usage,omitempty"`
// usage of memory + swap
SwapUsage MemoryData `json:"swap_usage,omitempty"`
// usafe of kernel memory
KernelUsage MemoryData `json:"kernel_usage,omitempty"`
Stats map[string]uint64 `json:"stats,omitempty"`
// usage of kernel memory
KernelUsage MemoryData `json:"kernel_usage,omitempty"`
// usage of kernel TCP memory
KernelTCPUsage MemoryData `json:"kernel_tcp_usage,omitempty"`
Stats map[string]uint64 `json:"stats,omitempty"`
}
type PidsStats struct {
// number of pids in the cgroup
Current uint64 `json:"current,omitempty"`
// active pids hard limit
Limit uint64 `json:"limit,omitempty"`
}
type BlkioStatEntry struct {
@ -78,7 +86,7 @@ type HugetlbStats struct {
Usage uint64 `json:"usage,omitempty"`
// maximum usage ever recorded.
MaxUsage uint64 `json:"max_usage,omitempty"`
// number of times htgetlb usage allocation failure.
// number of times hugetlb usage allocation failure.
Failcnt uint64 `json:"failcnt"`
}

View file

@ -5,6 +5,7 @@ package cgroups
import (
"bufio"
"fmt"
"io"
"io/ioutil"
"os"
"path/filepath"
@ -12,17 +13,19 @@ import (
"strings"
"time"
"github.com/docker/docker/pkg/mount"
"github.com/docker/go-units"
)
const cgroupNamePrefix = "name="
// https://www.kernel.org/doc/Documentation/cgroups/cgroups.txt
// https://www.kernel.org/doc/Documentation/cgroup-v1/cgroups.txt
func FindCgroupMountpoint(subsystem string) (string, error) {
// We are not using mount.GetMounts() because it's super-inefficient,
// parsing it directly sped up x10 times because of not using Sscanf.
// It was one of two major performance drawbacks in container start.
if !isSubsystemAvailable(subsystem) {
return "", NewNotFoundError(subsystem)
}
f, err := os.Open("/proc/self/mountinfo")
if err != nil {
return "", err
@ -47,6 +50,9 @@ func FindCgroupMountpoint(subsystem string) (string, error) {
}
func FindCgroupMountpointAndRoot(subsystem string) (string, string, error) {
if !isSubsystemAvailable(subsystem) {
return "", "", NewNotFoundError(subsystem)
}
f, err := os.Open("/proc/self/mountinfo")
if err != nil {
return "", "", err
@ -70,6 +76,15 @@ func FindCgroupMountpointAndRoot(subsystem string) (string, string, error) {
return "", "", NewNotFoundError(subsystem)
}
func isSubsystemAvailable(subsystem string) bool {
cgroups, err := ParseCgroupFile("/proc/self/cgroup")
if err != nil {
return false
}
_, avail := cgroups[subsystem]
return avail
}
func FindCgroupMountpointDir() (string, error) {
f, err := os.Open("/proc/self/mountinfo")
if err != nil {
@ -121,42 +136,63 @@ func (m Mount) GetThisCgroupDir(cgroups map[string]string) (string, error) {
return getControllerPath(m.Subsystems[0], cgroups)
}
func getCgroupMountsHelper(ss map[string]bool, mi io.Reader) ([]Mount, error) {
res := make([]Mount, 0, len(ss))
scanner := bufio.NewScanner(mi)
numFound := 0
for scanner.Scan() && numFound < len(ss) {
txt := scanner.Text()
sepIdx := strings.Index(txt, " - ")
if sepIdx == -1 {
return nil, fmt.Errorf("invalid mountinfo format")
}
if txt[sepIdx+3:sepIdx+9] != "cgroup" {
continue
}
fields := strings.Split(txt, " ")
m := Mount{
Mountpoint: fields[4],
Root: fields[3],
}
for _, opt := range strings.Split(fields[len(fields)-1], ",") {
if !ss[opt] {
continue
}
if strings.HasPrefix(opt, cgroupNamePrefix) {
m.Subsystems = append(m.Subsystems, opt[len(cgroupNamePrefix):])
} else {
m.Subsystems = append(m.Subsystems, opt)
}
numFound++
}
res = append(res, m)
}
if err := scanner.Err(); err != nil {
return nil, err
}
return res, nil
}
func GetCgroupMounts() ([]Mount, error) {
mounts, err := mount.GetMounts()
f, err := os.Open("/proc/self/mountinfo")
if err != nil {
return nil, err
}
defer f.Close()
all, err := GetAllSubsystems()
all, err := ParseCgroupFile("/proc/self/cgroup")
if err != nil {
return nil, err
}
allMap := make(map[string]bool)
for _, s := range all {
for s := range all {
allMap[s] = true
}
res := []Mount{}
for _, mount := range mounts {
if mount.Fstype == "cgroup" {
m := Mount{Mountpoint: mount.Mountpoint, Root: mount.Root}
for _, opt := range strings.Split(mount.VfsOpts, ",") {
if strings.HasPrefix(opt, cgroupNamePrefix) {
m.Subsystems = append(m.Subsystems, opt[len(cgroupNamePrefix):])
}
if allMap[opt] {
m.Subsystems = append(m.Subsystems, opt)
}
}
res = append(res, m)
}
}
return res, nil
return getCgroupMountsHelper(allMap, f)
}
// Returns all the cgroup subsystems supported by the kernel
// GetAllSubsystems returns all the cgroup subsystems supported by the kernel
func GetAllSubsystems() ([]string, error) {
f, err := os.Open("/proc/cgroups")
if err != nil {
@ -182,7 +218,7 @@ func GetAllSubsystems() ([]string, error) {
return subsystems, nil
}
// Returns the relative path to the cgroup docker is running in.
// GetThisCgroupDir returns the relative path to the cgroup docker is running in.
func GetThisCgroupDir(subsystem string) (string, error) {
cgroups, err := ParseCgroupFile("/proc/self/cgroup")
if err != nil {
@ -226,6 +262,8 @@ func readProcsFile(dir string) ([]int, error) {
return out, nil
}
// ParseCgroupFile parses the given cgroup file, typically from
// /proc/<pid>/cgroup, into a map of subgroups to cgroup names.
func ParseCgroupFile(path string) (map[string]string, error) {
f, err := os.Open(path)
if err != nil {
@ -233,7 +271,12 @@ func ParseCgroupFile(path string) (map[string]string, error) {
}
defer f.Close()
s := bufio.NewScanner(f)
return parseCgroupFromReader(f)
}
// helper function for ParseCgroupFile to make testing easier
func parseCgroupFromReader(r io.Reader) (map[string]string, error) {
s := bufio.NewScanner(r)
cgroups := make(map[string]string)
for s.Scan() {
@ -242,7 +285,16 @@ func ParseCgroupFile(path string) (map[string]string, error) {
}
text := s.Text()
parts := strings.Split(text, ":")
// from cgroups(7):
// /proc/[pid]/cgroup
// ...
// For each cgroup hierarchy ... there is one entry
// containing three colon-separated fields of the form:
// hierarchy-ID:subsystem-list:cgroup-path
parts := strings.SplitN(text, ":", 3)
if len(parts) < 3 {
return nil, fmt.Errorf("invalid cgroup entry: must contain at least two colons: %v", text)
}
for _, subs := range strings.Split(parts[1], ",") {
cgroups[subs] = parts[2]
@ -309,7 +361,7 @@ func RemovePaths(paths map[string]string) (err error) {
return nil
}
}
return fmt.Errorf("Failed to remove paths: %s", paths)
return fmt.Errorf("Failed to remove paths: %v", paths)
}
func GetHugePageSize() ([]string, error) {

View file

@ -11,15 +11,22 @@ const (
)
type Cgroup struct {
Name string `json:"name"`
// Deprecated, use Path instead
Name string `json:"name,omitempty"`
// name of parent cgroup or slice
Parent string `json:"parent"`
// name of parent of cgroup or slice
// Deprecated, use Path instead
Parent string `json:"parent,omitempty"`
// Path specifies the path to cgroups that are created and/or joined by the container.
// The path is assumed to be relative to the host system cgroup mountpoint.
Path string `json:"path"`
// ScopePrefix decribes prefix for the scope name
ScopePrefix string `json:"scope_prefix"`
// Paths represent the cgroups paths to join
// Paths represent the absolute cgroups paths to join.
// This takes precedence over Path.
Paths map[string]string
// Resources contains various cgroups settings to apply
@ -28,11 +35,14 @@ type Cgroup struct {
type Resources struct {
// If this is true allow access to any kind of device within the container. If false, allow access only to devices explicitly listed in the allowed_devices list.
AllowAllDevices bool `json:"allow_all_devices"`
// Deprecated
AllowAllDevices bool `json:"allow_all_devices,omitempty"`
// Deprecated
AllowedDevices []*Device `json:"allowed_devices,omitempty"`
// Deprecated
DeniedDevices []*Device `json:"denied_devices,omitempty"`
AllowedDevices []*Device `json:"allowed_devices"`
DeniedDevices []*Device `json:"denied_devices"`
Devices []*Device `json:"devices"`
// Memory limit (in bytes)
Memory int64 `json:"memory"`
@ -46,6 +56,9 @@ type Resources struct {
// Kernel memory limit (in bytes)
KernelMemory int64 `json:"kernel_memory"`
// Kernel memory limit for TCP use (in bytes)
KernelMemoryTCP int64 `json:"kernel_memory_tcp"`
// CPU shares (relative weight vs. other containers)
CpuShares int64 `json:"cpu_shares"`
@ -101,7 +114,7 @@ type Resources struct {
OomKillDisable bool `json:"oom_kill_disable"`
// Tuning swappiness behaviour per cgroup
MemorySwappiness int64 `json:"memory_swappiness"`
MemorySwappiness *int64 `json:"memory_swappiness"`
// Set priority of network traffic for container
NetPrioIfpriomap []*IfPrioMap `json:"net_prio_ifpriomap"`

View file

@ -3,7 +3,11 @@ package configs
import (
"bytes"
"encoding/json"
"fmt"
"os/exec"
"time"
"github.com/Sirupsen/logrus"
)
type Rlimit struct {
@ -29,7 +33,7 @@ type Seccomp struct {
Syscalls []*Syscall `json:"syscalls"`
}
// An action to be taken upon rule match in Seccomp
// Action is taken upon rule match in Seccomp
type Action int
const (
@ -40,7 +44,7 @@ const (
Trace
)
// A comparison operator to be used when matching syscall arguments in Seccomp
// Operator is a comparison operator to be used when matching syscall arguments in Seccomp
type Operator int
const (
@ -53,7 +57,7 @@ const (
MaskEqualTo
)
// A rule to match a specific syscall argument in Seccomp
// Arg is a rule to match a specific syscall argument in Seccomp
type Arg struct {
Index uint `json:"index"`
Value uint64 `json:"value"`
@ -61,7 +65,7 @@ type Arg struct {
Op Operator `json:"op"`
}
// An rule to match a syscall in Seccomp
// Syscall is a rule to match a syscall in Seccomp
type Syscall struct {
Name string `json:"name"`
Action Action `json:"action"`
@ -128,15 +132,15 @@ type Config struct {
// AppArmorProfile specifies the profile to apply to the process running in the container and is
// change at the time the process is execed
AppArmorProfile string `json:"apparmor_profile"`
AppArmorProfile string `json:"apparmor_profile,omitempty"`
// ProcessLabel specifies the label to apply to the process running in the container. It is
// commonly used by selinux
ProcessLabel string `json:"process_label"`
ProcessLabel string `json:"process_label,omitempty"`
// Rlimits specifies the resource limits, such as max open files, to set in the container
// If Rlimits are not set, the container will inherit rlimits from the parent process
Rlimits []Rlimit `json:"rlimits"`
Rlimits []Rlimit `json:"rlimits,omitempty"`
// OomScoreAdj specifies the adjustment to be made by the kernel when calculating oom scores
// for a process. Valid values are between the range [-1000, '1000'], where processes with
@ -171,12 +175,22 @@ type Config struct {
// A default action to be taken if no rules match is also given.
Seccomp *Seccomp `json:"seccomp"`
// NoNewPrivileges controls whether processes in the container can gain additional privileges.
NoNewPrivileges bool `json:"no_new_privileges,omitempty"`
// Hooks are a collection of actions to perform at various container lifecycle events.
// Hooks are not able to be marshaled to json but they are also not needed to.
Hooks *Hooks `json:"-"`
// CommandHooks are serialized to JSON, but other hooks are not.
Hooks *Hooks
// Version is the version of opencontainer specification that is supported.
Version string `json:"version"`
// Labels are user defined metadata that is stored in the config and populated on the state
Labels []string `json:"labels"`
// NoNewKeyring will not allocated a new session keyring for the container. It will use the
// callers keyring in this case.
NoNewKeyring bool `json:"no_new_keyring"`
}
type Hooks struct {
@ -191,12 +205,59 @@ type Hooks struct {
Poststop []Hook
}
func (hooks *Hooks) UnmarshalJSON(b []byte) error {
var state struct {
Prestart []CommandHook
Poststart []CommandHook
Poststop []CommandHook
}
if err := json.Unmarshal(b, &state); err != nil {
return err
}
deserialize := func(shooks []CommandHook) (hooks []Hook) {
for _, shook := range shooks {
hooks = append(hooks, shook)
}
return hooks
}
hooks.Prestart = deserialize(state.Prestart)
hooks.Poststart = deserialize(state.Poststart)
hooks.Poststop = deserialize(state.Poststop)
return nil
}
func (hooks Hooks) MarshalJSON() ([]byte, error) {
serialize := func(hooks []Hook) (serializableHooks []CommandHook) {
for _, hook := range hooks {
switch chook := hook.(type) {
case CommandHook:
serializableHooks = append(serializableHooks, chook)
default:
logrus.Warnf("cannot serialize hook of type %T, skipping", hook)
}
}
return serializableHooks
}
return json.Marshal(map[string]interface{}{
"prestart": serialize(hooks.Prestart),
"poststart": serialize(hooks.Poststart),
"poststop": serialize(hooks.Poststop),
})
}
// HookState is the payload provided to a hook on execution.
type HookState struct {
Version string `json:"version"`
ID string `json:"id"`
Pid int `json:"pid"`
Root string `json:"root"`
Version string `json:"ociVersion"`
ID string `json:"id"`
Pid int `json:"pid"`
Root string `json:"root"`
BundlePath string `json:"bundlePath"`
}
type Hook interface {
@ -204,7 +265,7 @@ type Hook interface {
Run(HookState) error
}
// NewFunctionHooks will call the provided function when the hook is run.
// NewFunctionHook will call the provided function when the hook is run.
func NewFunctionHook(f func(HookState) error) FuncHook {
return FuncHook{
run: f,
@ -220,13 +281,14 @@ func (f FuncHook) Run(s HookState) error {
}
type Command struct {
Path string `json:"path"`
Args []string `json:"args"`
Env []string `json:"env"`
Dir string `json:"dir"`
Path string `json:"path"`
Args []string `json:"args"`
Env []string `json:"env"`
Dir string `json:"dir"`
Timeout *time.Duration `json:"timeout"`
}
// NewCommandHooks will execute the provided command when the hook is run.
// NewCommandHook will execute the provided command when the hook is run.
func NewCommandHook(cmd Command) CommandHook {
return CommandHook{
Command: cmd,
@ -248,5 +310,23 @@ func (c Command) Run(s HookState) error {
Env: c.Env,
Stdin: bytes.NewReader(b),
}
return cmd.Run()
errC := make(chan error, 1)
go func() {
out, err := cmd.CombinedOutput()
if err != nil {
err = fmt.Errorf("%s: %s", err, out)
}
errC <- err
}()
if c.Timeout != nil {
select {
case err := <-errC:
return err
case <-time.After(*c.Timeout):
cmd.Process.Kill()
cmd.Wait()
return fmt.Errorf("hook ran past specified timeout of %.1fs", c.Timeout.Seconds())
}
}
return <-errC
}

View file

@ -4,7 +4,7 @@ package configs
import "fmt"
// Gets the root uid for the process on host which could be non-zero
// HostUID gets the root uid for the process on host which could be non-zero
// when user namespaces are enabled.
func (c Config) HostUID() (int, error) {
if c.Namespaces.Contains(NEWUSER) {
@ -21,7 +21,7 @@ func (c Config) HostUID() (int, error) {
return 0, nil
}
// Gets the root gid for the process on host which could be non-zero
// HostGID gets the root gid for the process on host which could be non-zero
// when user namespaces are enabled.
func (c Config) HostGID() (int, error) {
if c.Namespaces.Contains(NEWUSER) {

View file

@ -35,6 +35,9 @@ type Device struct {
// Gid of the device.
Gid uint32 `json:"gid"`
// Write the file to the allowed list
Allow bool `json:"allow"`
}
func (d *Device) CgroupString() string {

View file

@ -3,7 +3,7 @@
package configs
var (
// These are devices that are to be both allowed and created.
// DefaultSimpleDevices are devices that are to be both allowed and created.
DefaultSimpleDevices = []*Device{
// /dev/null and zero
{

View file

@ -18,7 +18,7 @@ var namespaceInfo = map[NamespaceType]int{
}
// CloneFlags parses the container's Namespaces options to set the correct
// flags on clone, unshare. This functions returns flags only for new namespaces.
// flags on clone, unshare. This function returns flags only for new namespaces.
func (n *Namespaces) CloneFlags() uintptr {
var flag int
for _, v := range *n {

View file

@ -8,7 +8,7 @@ func (n *Namespace) Syscall() int {
}
// CloneFlags parses the container's Namespaces options to set the correct
// flags on clone, unshare. This functions returns flags only for new namespaces.
// flags on clone, unshare. This function returns flags only for new namespaces.
func (n *Namespaces) CloneFlags() uintptr {
panic("No namespace syscall support")
return uintptr(0)

View file

@ -2,7 +2,11 @@
package configs
import "fmt"
import (
"fmt"
"os"
"sync"
)
const (
NEWNET NamespaceType = "NEWNET"
@ -13,6 +17,51 @@ const (
NEWUSER NamespaceType = "NEWUSER"
)
var (
nsLock sync.Mutex
supportedNamespaces = make(map[NamespaceType]bool)
)
// nsToFile converts the namespace type to its filename
func nsToFile(ns NamespaceType) string {
switch ns {
case NEWNET:
return "net"
case NEWNS:
return "mnt"
case NEWPID:
return "pid"
case NEWIPC:
return "ipc"
case NEWUSER:
return "user"
case NEWUTS:
return "uts"
}
return ""
}
// IsNamespaceSupported returns whether a namespace is available or
// not
func IsNamespaceSupported(ns NamespaceType) bool {
nsLock.Lock()
defer nsLock.Unlock()
supported, ok := supportedNamespaces[ns]
if ok {
return supported
}
nsFile := nsToFile(ns)
// if the namespace type is unknown, just return false
if nsFile == "" {
return false
}
_, err := os.Stat(fmt.Sprintf("/proc/self/ns/%s", nsFile))
// a namespace is supported if it exists and we have permissions to read it
supported = err == nil
supportedNamespaces[ns] = supported
return supported
}
func NamespaceTypes() []NamespaceType {
return []NamespaceType{
NEWNET,
@ -35,26 +84,7 @@ func (n *Namespace) GetPath(pid int) string {
if n.Path != "" {
return n.Path
}
return fmt.Sprintf("/proc/%d/ns/%s", pid, n.file())
}
func (n *Namespace) file() string {
file := ""
switch n.Type {
case NEWNET:
file = "net"
case NEWNS:
file = "mnt"
case NEWPID:
file = "pid"
case NEWIPC:
file = "ipc"
case NEWUSER:
file = "user"
case NEWUTS:
file = "uts"
}
return file
return fmt.Sprintf("/proc/%d/ns/%s", pid, nsToFile(n.Type))
}
func (n *Namespaces) Remove(t NamespaceType) bool {
@ -87,3 +117,11 @@ func (n *Namespaces) index(t NamespaceType) int {
func (n *Namespaces) Contains(t NamespaceType) bool {
return n.index(t) != -1
}
func (n *Namespaces) PathOf(t NamespaceType) string {
i := n.index(t)
if i == -1 {
return ""
}
return (*n)[i].Path
}

View file

@ -11,6 +11,19 @@ import (
"unsafe"
)
// If arg2 is nonzero, set the "child subreaper" attribute of the
// calling process; if arg2 is zero, unset the attribute. When a
// process is marked as a child subreaper, all of the children
// that it creates, and their descendants, will be marked as
// having a subreaper. In effect, a subreaper fulfills the role
// of init(1) for its descendant processes. Upon termination of
// a process that is orphaned (i.e., its immediate parent has
// already terminated) and marked as having a subreaper, the
// nearest still living ancestor subreaper will receive a SIGCHLD
// signal and be able to wait(2) on the process to discover its
// termination status.
const PR_SET_CHILD_SUBREAPER = 36
type ParentDeathSignal int
func (p ParentDeathSignal) Restore() error {
@ -40,6 +53,14 @@ func Execv(cmd string, args []string, env []string) error {
return syscall.Exec(name, args, env)
}
func Prlimit(pid, resource int, limit syscall.Rlimit) error {
_, _, err := syscall.RawSyscall6(syscall.SYS_PRLIMIT64, uintptr(pid), uintptr(resource), uintptr(unsafe.Pointer(&limit)), uintptr(unsafe.Pointer(&limit)), 0, 0)
if err != 0 {
return err
}
return nil
}
func SetParentDeathSignal(sig uintptr) error {
if _, _, err := syscall.RawSyscall(syscall.SYS_PRCTL, syscall.PR_SET_PDEATHSIG, sig, 0); err != 0 {
return err
@ -79,17 +100,12 @@ func Setctty() error {
return nil
}
/*
* Detect whether we are currently running in a user namespace.
* Copied from github.com/lxc/lxd/shared/util.go
*/
// RunningInUserNS detects whether we are currently running in a user namespace.
// Copied from github.com/lxc/lxd/shared/util.go
func RunningInUserNS() bool {
file, err := os.Open("/proc/self/uid_map")
if err != nil {
/*
* This kernel-provided file only exists if user namespaces are
* supported
*/
// This kernel-provided file only exists if user namespaces are supported
return false
}
defer file.Close()
@ -112,3 +128,16 @@ func RunningInUserNS() bool {
}
return true
}
// SetSubreaper sets the value i as the subreaper setting for the calling process
func SetSubreaper(i int) error {
return Prctl(PR_SET_CHILD_SUBREAPER, uintptr(i), 0, 0, 0)
}
func Prctl(option int, arg2, arg3, arg4, arg5 uintptr) (err error) {
_, _, e1 := syscall.Syscall6(syscall.SYS_PRCTL, uintptr(option), arg2, arg3, arg4, arg5, 0)
if e1 != 0 {
err = e1
}
return
}

View file

@ -0,0 +1,9 @@
// +build !linux
package system
// RunningInUserNS is a stub for non-Linux systems
// Always returns false
func RunningInUserNS() bool {
return false
}

View file

@ -0,0 +1,121 @@
package utils
import (
"crypto/rand"
"encoding/hex"
"encoding/json"
"io"
"os"
"path/filepath"
"strings"
"syscall"
)
const (
exitSignalOffset = 128
)
// GenerateRandomName returns a new name joined with a prefix. This size
// specified is used to truncate the randomly generated value
func GenerateRandomName(prefix string, size int) (string, error) {
id := make([]byte, 32)
if _, err := io.ReadFull(rand.Reader, id); err != nil {
return "", err
}
if size > 64 {
size = 64
}
return prefix + hex.EncodeToString(id)[:size], nil
}
// ResolveRootfs ensures that the current working directory is
// not a symlink and returns the absolute path to the rootfs
func ResolveRootfs(uncleanRootfs string) (string, error) {
rootfs, err := filepath.Abs(uncleanRootfs)
if err != nil {
return "", err
}
return filepath.EvalSymlinks(rootfs)
}
// ExitStatus returns the correct exit status for a process based on if it
// was signaled or exited cleanly
func ExitStatus(status syscall.WaitStatus) int {
if status.Signaled() {
return exitSignalOffset + int(status.Signal())
}
return status.ExitStatus()
}
// WriteJSON writes the provided struct v to w using standard json marshaling
func WriteJSON(w io.Writer, v interface{}) error {
data, err := json.Marshal(v)
if err != nil {
return err
}
_, err = w.Write(data)
return err
}
// CleanPath makes a path safe for use with filepath.Join. This is done by not
// only cleaning the path, but also (if the path is relative) adding a leading
// '/' and cleaning it (then removing the leading '/'). This ensures that a
// path resulting from prepending another path will always resolve to lexically
// be a subdirectory of the prefixed path. This is all done lexically, so paths
// that include symlinks won't be safe as a result of using CleanPath.
func CleanPath(path string) string {
// Deal with empty strings nicely.
if path == "" {
return ""
}
// Ensure that all paths are cleaned (especially problematic ones like
// "/../../../../../" which can cause lots of issues).
path = filepath.Clean(path)
// If the path isn't absolute, we need to do more processing to fix paths
// such as "../../../../<etc>/some/path". We also shouldn't convert absolute
// paths to relative ones.
if !filepath.IsAbs(path) {
path = filepath.Clean(string(os.PathSeparator) + path)
// This can't fail, as (by definition) all paths are relative to root.
path, _ = filepath.Rel(string(os.PathSeparator), path)
}
// Clean the path again for good measure.
return filepath.Clean(path)
}
// SearchLabels searches a list of key-value pairs for the provided key and
// returns the corresponding value. The pairs must be separated with '='.
func SearchLabels(labels []string, query string) string {
for _, l := range labels {
parts := strings.SplitN(l, "=", 2)
if len(parts) < 2 {
continue
}
if parts[0] == query {
return parts[1]
}
}
return ""
}
// Annotations returns the bundle path and user defined annotations from the
// libcontianer state. We need to remove the bundle because that is a label
// added by libcontainer.
func Annotations(labels []string) (bundle string, userAnnotations map[string]string) {
userAnnotations = make(map[string]string)
for _, l := range labels {
parts := strings.SplitN(l, "=", 2)
if len(parts) < 2 {
continue
}
if parts[0] == "bundle" {
bundle = parts[1]
} else {
userAnnotations[parts[0]] = parts[1]
}
}
return
}

View file

@ -0,0 +1,33 @@
// +build !windows
package utils
import (
"io/ioutil"
"strconv"
"syscall"
)
func CloseExecFrom(minFd int) error {
fdList, err := ioutil.ReadDir("/proc/self/fd")
if err != nil {
return err
}
for _, fi := range fdList {
fd, err := strconv.Atoi(fi.Name())
if err != nil {
// ignore non-numeric file names
continue
}
if fd < minFd {
// ignore descriptors lower than our specified minimum
continue
}
// intentionally ignore errors from syscall.CloseOnExec
syscall.CloseOnExec(fd)
// the cases where this might fail are basically file descriptors that have already been closed (including and especially the one that was created when ioutil.ReadDir did the "opendir" syscall)
}
return nil
}