Update go dependencies
This commit is contained in:
parent
432f534383
commit
f4a4daed84
1299 changed files with 71186 additions and 91183 deletions
268
vendor/github.com/ncabatoff/process-exporter/proc/grouper.go
generated
vendored
268
vendor/github.com/ncabatoff/process-exporter/proc/grouper.go
generated
vendored
|
|
@ -1,173 +1,179 @@
|
|||
package proc
|
||||
|
||||
import (
|
||||
common "github.com/ncabatoff/process-exporter"
|
||||
"time"
|
||||
|
||||
seq "github.com/ncabatoff/go-seq/seq"
|
||||
common "github.com/ncabatoff/process-exporter"
|
||||
)
|
||||
|
||||
type (
|
||||
// Grouper is the top-level interface to the process metrics. All tracked
|
||||
// procs sharing the same group name are aggregated.
|
||||
Grouper struct {
|
||||
namer common.MatchNamer
|
||||
trackChildren bool
|
||||
// track how much was seen last time so we can report the delta
|
||||
GroupStats map[string]Counts
|
||||
tracker *Tracker
|
||||
// groupAccum records the historical accumulation of a group so that
|
||||
// we can avoid ever decreasing the counts we return.
|
||||
groupAccum map[string]Counts
|
||||
tracker *Tracker
|
||||
threadAccum map[string]map[string]Threads
|
||||
debug bool
|
||||
}
|
||||
|
||||
GroupCountMap map[string]GroupCounts
|
||||
// GroupByName maps group name to group metrics.
|
||||
GroupByName map[string]Group
|
||||
|
||||
GroupCounts struct {
|
||||
// Threads collects metrics for threads in a group sharing a thread name.
|
||||
Threads struct {
|
||||
Name string
|
||||
NumThreads int
|
||||
Counts
|
||||
Procs int
|
||||
Memresident uint64
|
||||
Memvirtual uint64
|
||||
}
|
||||
|
||||
// Group describes the metrics of a single group.
|
||||
Group struct {
|
||||
Counts
|
||||
States
|
||||
Wchans map[string]int
|
||||
Procs int
|
||||
Memory
|
||||
OldestStartTime time.Time
|
||||
OpenFDs uint64
|
||||
WorstFDratio float64
|
||||
NumThreads uint64
|
||||
Threads []Threads
|
||||
}
|
||||
)
|
||||
|
||||
func NewGrouper(trackChildren bool, namer common.MatchNamer) *Grouper {
|
||||
// Returns true if x < y. Test designers should ensure they always have
|
||||
// a unique name/numthreads combination for each group.
|
||||
func lessThreads(x, y Threads) bool { return seq.Compare(x, y) < 0 }
|
||||
|
||||
// NewGrouper creates a grouper.
|
||||
func NewGrouper(namer common.MatchNamer, trackChildren, alwaysRecheck, debug bool) *Grouper {
|
||||
g := Grouper{
|
||||
trackChildren: trackChildren,
|
||||
namer: namer,
|
||||
GroupStats: make(map[string]Counts),
|
||||
tracker: NewTracker(),
|
||||
groupAccum: make(map[string]Counts),
|
||||
threadAccum: make(map[string]map[string]Threads),
|
||||
tracker: NewTracker(namer, trackChildren, alwaysRecheck, debug),
|
||||
debug: debug,
|
||||
}
|
||||
return &g
|
||||
}
|
||||
|
||||
func (g *Grouper) checkAncestry(idinfo ProcIdInfo, newprocs map[ProcId]ProcIdInfo) string {
|
||||
ppid := idinfo.ParentPid
|
||||
pProcId := g.tracker.ProcIds[ppid]
|
||||
if pProcId.Pid < 1 {
|
||||
// Reached root of process tree without finding a tracked parent.
|
||||
g.tracker.Ignore(idinfo.ProcId)
|
||||
return ""
|
||||
}
|
||||
|
||||
// Is the parent already known to the tracker?
|
||||
if ptproc, ok := g.tracker.Tracked[pProcId]; ok {
|
||||
if ptproc != nil {
|
||||
// We've found a tracked parent.
|
||||
g.tracker.Track(ptproc.GroupName, idinfo)
|
||||
return ptproc.GroupName
|
||||
} else {
|
||||
// We've found an untracked parent.
|
||||
g.tracker.Ignore(idinfo.ProcId)
|
||||
return ""
|
||||
}
|
||||
}
|
||||
|
||||
// Is the parent another new process?
|
||||
if pinfoid, ok := newprocs[pProcId]; ok {
|
||||
if name := g.checkAncestry(pinfoid, newprocs); name != "" {
|
||||
// We've found a tracked parent, which implies this entire lineage should be tracked.
|
||||
g.tracker.Track(name, idinfo)
|
||||
return name
|
||||
}
|
||||
}
|
||||
|
||||
// Parent is dead, i.e. we never saw it, or there's no tracked proc in our ancestry.
|
||||
g.tracker.Ignore(idinfo.ProcId)
|
||||
return ""
|
||||
|
||||
}
|
||||
|
||||
// Update tracks any new procs that should be according to policy, and updates
|
||||
// the metrics for already tracked procs. Permission errors are returned as a
|
||||
// count, and will not affect the error return value.
|
||||
func (g *Grouper) Update(iter ProcIter) (int, error) {
|
||||
newProcs, permErrs, err := g.tracker.Update(iter)
|
||||
if err != nil {
|
||||
return permErrs, err
|
||||
}
|
||||
|
||||
// Step 1: track any new proc that should be tracked based on its name and cmdline.
|
||||
untracked := make(map[ProcId]ProcIdInfo)
|
||||
for _, idinfo := range newProcs {
|
||||
wanted, gname := g.namer.MatchAndName(common.NameAndCmdline{Name: idinfo.Name, Cmdline: idinfo.Cmdline})
|
||||
if !wanted {
|
||||
untracked[idinfo.ProcId] = idinfo
|
||||
continue
|
||||
}
|
||||
|
||||
g.tracker.Track(gname, idinfo)
|
||||
}
|
||||
|
||||
// Step 2: track any untracked new proc that should be tracked because its parent is tracked.
|
||||
if !g.trackChildren {
|
||||
return permErrs, nil
|
||||
}
|
||||
|
||||
for _, idinfo := range untracked {
|
||||
if _, ok := g.tracker.Tracked[idinfo.ProcId]; ok {
|
||||
// Already tracked or ignored
|
||||
continue
|
||||
}
|
||||
|
||||
g.checkAncestry(idinfo, untracked)
|
||||
}
|
||||
return permErrs, nil
|
||||
}
|
||||
|
||||
// groups returns the aggregate metrics for all groups tracked. This reflects
|
||||
// solely what's currently running.
|
||||
func (g *Grouper) groups() GroupCountMap {
|
||||
gcounts := make(GroupCountMap)
|
||||
|
||||
func groupadd(grp Group, ts Update) Group {
|
||||
var zeroTime time.Time
|
||||
for _, tinfo := range g.tracker.Tracked {
|
||||
if tinfo == nil {
|
||||
continue
|
||||
}
|
||||
cur := gcounts[tinfo.GroupName]
|
||||
cur.Procs++
|
||||
tstats := tinfo.GetStats()
|
||||
cur.Memresident += tstats.Memory.Resident
|
||||
cur.Memvirtual += tstats.Memory.Virtual
|
||||
cur.OpenFDs += tstats.Filedesc.Open
|
||||
openratio := float64(tstats.Filedesc.Open) / float64(tstats.Filedesc.Limit)
|
||||
if cur.WorstFDratio < openratio {
|
||||
cur.WorstFDratio = openratio
|
||||
}
|
||||
cur.Counts.Cpu += tstats.latest.Cpu
|
||||
cur.Counts.ReadBytes += tstats.latest.ReadBytes
|
||||
cur.Counts.WriteBytes += tstats.latest.WriteBytes
|
||||
if cur.OldestStartTime == zeroTime || tstats.start.Before(cur.OldestStartTime) {
|
||||
cur.OldestStartTime = tstats.start
|
||||
}
|
||||
gcounts[tinfo.GroupName] = cur
|
||||
|
||||
grp.Procs++
|
||||
grp.Memory.ResidentBytes += ts.Memory.ResidentBytes
|
||||
grp.Memory.VirtualBytes += ts.Memory.VirtualBytes
|
||||
grp.Memory.VmSwapBytes += ts.Memory.VmSwapBytes
|
||||
if ts.Filedesc.Open != -1 {
|
||||
grp.OpenFDs += uint64(ts.Filedesc.Open)
|
||||
}
|
||||
openratio := float64(ts.Filedesc.Open) / float64(ts.Filedesc.Limit)
|
||||
if grp.WorstFDratio < openratio {
|
||||
grp.WorstFDratio = openratio
|
||||
}
|
||||
grp.NumThreads += ts.NumThreads
|
||||
grp.Counts.Add(ts.Latest)
|
||||
grp.States.Add(ts.States)
|
||||
if grp.OldestStartTime == zeroTime || ts.Start.Before(grp.OldestStartTime) {
|
||||
grp.OldestStartTime = ts.Start
|
||||
}
|
||||
|
||||
return gcounts
|
||||
if grp.Wchans == nil {
|
||||
grp.Wchans = make(map[string]int)
|
||||
}
|
||||
for wchan, count := range ts.Wchans {
|
||||
grp.Wchans[wchan] += count
|
||||
}
|
||||
|
||||
return grp
|
||||
}
|
||||
|
||||
// Groups returns GroupCounts with Counts that never decrease in value from one
|
||||
// call to the next. Even if processes exit, their CPU and IO contributions up
|
||||
// to that point are included in the results. Even if no processes remain
|
||||
// in a group it will still be included in the results.
|
||||
func (g *Grouper) Groups() GroupCountMap {
|
||||
groups := g.groups()
|
||||
// Update asks the tracker to report on each tracked process by name.
|
||||
// These are aggregated by groupname, augmented by accumulated counts
|
||||
// from the past, and returned. Note that while the Tracker reports
|
||||
// only what counts have changed since last cycle, Grouper.Update
|
||||
// returns counts that never decrease. Even once the last process
|
||||
// with name X disappears, name X will still appear in the results
|
||||
// with the same counts as before; of course, all non-count metrics
|
||||
// will be zero.
|
||||
func (g *Grouper) Update(iter Iter) (CollectErrors, GroupByName, error) {
|
||||
cerrs, tracked, err := g.tracker.Update(iter)
|
||||
if err != nil {
|
||||
return cerrs, nil, err
|
||||
}
|
||||
return cerrs, g.groups(tracked), nil
|
||||
}
|
||||
|
||||
// First add any accumulated counts to what was just observed,
|
||||
// Translate the updates into a new GroupByName and update internal history.
|
||||
func (g *Grouper) groups(tracked []Update) GroupByName {
|
||||
groups := make(GroupByName)
|
||||
threadsByGroup := make(map[string][]ThreadUpdate)
|
||||
|
||||
for _, update := range tracked {
|
||||
groups[update.GroupName] = groupadd(groups[update.GroupName], update)
|
||||
if update.Threads != nil {
|
||||
threadsByGroup[update.GroupName] =
|
||||
append(threadsByGroup[update.GroupName], update.Threads...)
|
||||
}
|
||||
}
|
||||
|
||||
// Add any accumulated counts to what was just observed,
|
||||
// and update the accumulators.
|
||||
for gname, group := range groups {
|
||||
if oldcounts, ok := g.GroupStats[gname]; ok {
|
||||
group.Counts.Cpu += oldcounts.Cpu
|
||||
group.Counts.ReadBytes += oldcounts.ReadBytes
|
||||
group.Counts.WriteBytes += oldcounts.WriteBytes
|
||||
if oldcounts, ok := g.groupAccum[gname]; ok {
|
||||
group.Counts.Add(Delta(oldcounts))
|
||||
}
|
||||
g.GroupStats[gname] = group.Counts
|
||||
g.groupAccum[gname] = group.Counts
|
||||
group.Threads = g.threads(gname, threadsByGroup[gname])
|
||||
groups[gname] = group
|
||||
}
|
||||
|
||||
// Now add any groups that were observed in the past but aren't running now.
|
||||
for gname, gcounts := range g.GroupStats {
|
||||
for gname, gcounts := range g.groupAccum {
|
||||
if _, ok := groups[gname]; !ok {
|
||||
groups[gname] = GroupCounts{Counts: gcounts}
|
||||
groups[gname] = Group{Counts: gcounts}
|
||||
}
|
||||
}
|
||||
|
||||
return groups
|
||||
}
|
||||
|
||||
func (g *Grouper) threads(gname string, tracked []ThreadUpdate) []Threads {
|
||||
if len(tracked) == 0 {
|
||||
delete(g.threadAccum, gname)
|
||||
return nil
|
||||
}
|
||||
|
||||
ret := make([]Threads, 0, len(tracked))
|
||||
threads := make(map[string]Threads)
|
||||
|
||||
// First aggregate the thread metrics by thread name.
|
||||
for _, nc := range tracked {
|
||||
curthr := threads[nc.ThreadName]
|
||||
curthr.NumThreads++
|
||||
curthr.Counts.Add(nc.Latest)
|
||||
curthr.Name = nc.ThreadName
|
||||
threads[nc.ThreadName] = curthr
|
||||
}
|
||||
|
||||
// Add any accumulated counts to what was just observed,
|
||||
// and update the accumulators.
|
||||
if history := g.threadAccum[gname]; history != nil {
|
||||
for tname := range threads {
|
||||
if oldcounts, ok := history[tname]; ok {
|
||||
counts := threads[tname]
|
||||
counts.Add(Delta(oldcounts.Counts))
|
||||
threads[tname] = counts
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
g.threadAccum[gname] = threads
|
||||
|
||||
for _, thr := range threads {
|
||||
ret = append(ret, thr)
|
||||
}
|
||||
return ret
|
||||
}
|
||||
|
|
|
|||
539
vendor/github.com/ncabatoff/process-exporter/proc/read.go
generated
vendored
539
vendor/github.com/ncabatoff/process-exporter/proc/read.go
generated
vendored
|
|
@ -2,18 +2,21 @@ package proc
|
|||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"time"
|
||||
|
||||
"github.com/prometheus/procfs"
|
||||
"github.com/ncabatoff/procfs"
|
||||
)
|
||||
|
||||
func newProcIdStatic(pid, ppid int, startTime uint64, name string, cmdline []string) ProcIdStatic {
|
||||
return ProcIdStatic{ProcId{pid, startTime}, ProcStatic{name, cmdline, ppid, time.Time{}}}
|
||||
}
|
||||
// ErrProcNotExist indicates a process couldn't be read because it doesn't exist,
|
||||
// typically because it disappeared while we were reading it.
|
||||
var ErrProcNotExist = fmt.Errorf("process does not exist")
|
||||
|
||||
type (
|
||||
// ProcId uniquely identifies a process.
|
||||
ProcId struct {
|
||||
// ID uniquely identifies a process.
|
||||
ID struct {
|
||||
// UNIX process id
|
||||
Pid int
|
||||
// The time the process started after system boot, the value is expressed
|
||||
|
|
@ -21,82 +24,138 @@ type (
|
|||
StartTimeRel uint64
|
||||
}
|
||||
|
||||
// ProcStatic contains data read from /proc/pid/*
|
||||
ProcStatic struct {
|
||||
Name string
|
||||
Cmdline []string
|
||||
ParentPid int
|
||||
StartTime time.Time
|
||||
ThreadID ID
|
||||
|
||||
// Static contains data read from /proc/pid/*
|
||||
Static struct {
|
||||
Name string
|
||||
Cmdline []string
|
||||
ParentPid int
|
||||
StartTime time.Time
|
||||
EffectiveUID int
|
||||
}
|
||||
|
||||
// ProcMetrics contains data read from /proc/pid/*
|
||||
ProcMetrics struct {
|
||||
CpuTime float64
|
||||
ReadBytes uint64
|
||||
WriteBytes uint64
|
||||
// Counts are metric counters common to threads and processes and groups.
|
||||
Counts struct {
|
||||
CPUUserTime float64
|
||||
CPUSystemTime float64
|
||||
ReadBytes uint64
|
||||
WriteBytes uint64
|
||||
MajorPageFaults uint64
|
||||
MinorPageFaults uint64
|
||||
CtxSwitchVoluntary uint64
|
||||
CtxSwitchNonvoluntary uint64
|
||||
}
|
||||
|
||||
// Memory describes a proc's memory usage.
|
||||
Memory struct {
|
||||
ResidentBytes uint64
|
||||
VirtualBytes uint64
|
||||
OpenFDs uint64
|
||||
MaxFDs uint64
|
||||
VmSwapBytes uint64
|
||||
}
|
||||
|
||||
ProcIdStatic struct {
|
||||
ProcId
|
||||
ProcStatic
|
||||
// Filedesc describes a proc's file descriptor usage and soft limit.
|
||||
Filedesc struct {
|
||||
// Open is the count of open file descriptors, -1 if unknown.
|
||||
Open int64
|
||||
// Limit is the fd soft limit for the process.
|
||||
Limit uint64
|
||||
}
|
||||
|
||||
ProcInfo struct {
|
||||
ProcStatic
|
||||
ProcMetrics
|
||||
// States counts how many threads are in each state.
|
||||
States struct {
|
||||
Running int
|
||||
Sleeping int
|
||||
Waiting int
|
||||
Zombie int
|
||||
Other int
|
||||
}
|
||||
|
||||
ProcIdInfo struct {
|
||||
ProcId
|
||||
ProcStatic
|
||||
ProcMetrics
|
||||
// Metrics contains data read from /proc/pid/*
|
||||
Metrics struct {
|
||||
Counts
|
||||
Memory
|
||||
Filedesc
|
||||
NumThreads uint64
|
||||
States
|
||||
Wchan string
|
||||
}
|
||||
|
||||
// Thread contains per-thread data.
|
||||
Thread struct {
|
||||
ThreadID
|
||||
ThreadName string
|
||||
Counts
|
||||
Wchan string
|
||||
States
|
||||
}
|
||||
|
||||
// IDInfo groups all info for a single process.
|
||||
IDInfo struct {
|
||||
ID
|
||||
Static
|
||||
Metrics
|
||||
Threads []Thread
|
||||
}
|
||||
|
||||
// ProcIdInfoThreads struct {
|
||||
// ProcIdInfo
|
||||
// Threads []ProcThread
|
||||
// }
|
||||
|
||||
// Proc wraps the details of the underlying procfs-reading library.
|
||||
// Any of these methods may fail if the process has disapeared.
|
||||
// We try to return as much as possible rather than an error, e.g.
|
||||
// if some /proc files are unreadable.
|
||||
Proc interface {
|
||||
// GetPid() returns the POSIX PID (process id). They may be reused over time.
|
||||
GetPid() int
|
||||
// GetProcId() returns (pid,starttime), which can be considered a unique process id.
|
||||
// It may fail if the caller doesn't have permission to read /proc/<pid>/stat, or if
|
||||
// the process has disapeared.
|
||||
GetProcId() (ProcId, error)
|
||||
// GetProcID() returns (pid,starttime), which can be considered a unique process id.
|
||||
GetProcID() (ID, error)
|
||||
// GetStatic() returns various details read from files under /proc/<pid>/. Technically
|
||||
// name may not be static, but we'll pretend it is.
|
||||
// It may fail if the caller doesn't have permission to read those files, or if
|
||||
// the process has disapeared.
|
||||
GetStatic() (ProcStatic, error)
|
||||
GetStatic() (Static, error)
|
||||
// GetMetrics() returns various metrics read from files under /proc/<pid>/.
|
||||
// It may fail if the caller doesn't have permission to read those files, or if
|
||||
// the process has disapeared.
|
||||
GetMetrics() (ProcMetrics, error)
|
||||
// It returns an error on complete failure. Otherwise, it returns metrics
|
||||
// and 0 on complete success, 1 if some (like I/O) couldn't be read.
|
||||
GetMetrics() (Metrics, int, error)
|
||||
GetStates() (States, error)
|
||||
GetWchan() (string, error)
|
||||
GetCounts() (Counts, int, error)
|
||||
GetThreads() ([]Thread, error)
|
||||
}
|
||||
|
||||
// proc is a wrapper for procfs.Proc that caches results of some reads and implements Proc.
|
||||
proc struct {
|
||||
// proccache implements the Proc interface by acting as wrapper for procfs.Proc
|
||||
// that caches results of some reads.
|
||||
proccache struct {
|
||||
procfs.Proc
|
||||
procid *ProcId
|
||||
stat *procfs.ProcStat
|
||||
cmdline []string
|
||||
io *procfs.ProcIO
|
||||
bootTime uint64
|
||||
procid *ID
|
||||
stat *procfs.ProcStat
|
||||
status *procfs.ProcStatus
|
||||
cmdline []string
|
||||
io *procfs.ProcIO
|
||||
fs *FS
|
||||
wchan *string
|
||||
}
|
||||
|
||||
proc struct {
|
||||
proccache
|
||||
}
|
||||
|
||||
// procs is a fancier []Proc that saves on some copying.
|
||||
procs interface {
|
||||
get(int) Proc
|
||||
length() int
|
||||
}
|
||||
|
||||
// procfsprocs implements procs using procfs.
|
||||
procfsprocs struct {
|
||||
Procs []procfs.Proc
|
||||
bootTime uint64
|
||||
Procs []procfs.Proc
|
||||
fs *FS
|
||||
}
|
||||
|
||||
// ProcIter is an iterator over a sequence of procs.
|
||||
ProcIter interface {
|
||||
// Iter is an iterator over a sequence of procs.
|
||||
Iter interface {
|
||||
// Next returns true if the iterator is not exhausted.
|
||||
Next() bool
|
||||
// Close releases any resources the iterator uses.
|
||||
|
|
@ -105,7 +164,7 @@ type (
|
|||
Proc
|
||||
}
|
||||
|
||||
// procIterator implements the ProcIter interface using procfs.
|
||||
// procIterator implements the Iter interface
|
||||
procIterator struct {
|
||||
// procs is the list of Proc we're iterating over.
|
||||
procs
|
||||
|
|
@ -119,66 +178,101 @@ type (
|
|||
Proc
|
||||
}
|
||||
|
||||
procIdInfos []ProcIdInfo
|
||||
// Source is a source of procs.
|
||||
Source interface {
|
||||
// AllProcs returns all the processes in this source at this moment in time.
|
||||
AllProcs() Iter
|
||||
}
|
||||
|
||||
// FS implements Source.
|
||||
FS struct {
|
||||
procfs.FS
|
||||
BootTime uint64
|
||||
MountPoint string
|
||||
debug bool
|
||||
}
|
||||
)
|
||||
|
||||
func procInfoIter(ps ...ProcIdInfo) ProcIter {
|
||||
return &procIterator{procs: procIdInfos(ps), idx: -1}
|
||||
func (ii IDInfo) String() string {
|
||||
return fmt.Sprintf("%+v:%+v", ii.ID, ii.Static)
|
||||
}
|
||||
|
||||
func Info(p Proc) (ProcIdInfo, error) {
|
||||
id, err := p.GetProcId()
|
||||
if err != nil {
|
||||
return ProcIdInfo{}, err
|
||||
}
|
||||
static, err := p.GetStatic()
|
||||
if err != nil {
|
||||
return ProcIdInfo{}, err
|
||||
}
|
||||
metrics, err := p.GetMetrics()
|
||||
if err != nil {
|
||||
return ProcIdInfo{}, err
|
||||
}
|
||||
return ProcIdInfo{id, static, metrics}, nil
|
||||
// Add adds c2 to the counts.
|
||||
func (c *Counts) Add(c2 Delta) {
|
||||
c.CPUUserTime += c2.CPUUserTime
|
||||
c.CPUSystemTime += c2.CPUSystemTime
|
||||
c.ReadBytes += c2.ReadBytes
|
||||
c.WriteBytes += c2.WriteBytes
|
||||
c.MajorPageFaults += c2.MajorPageFaults
|
||||
c.MinorPageFaults += c2.MinorPageFaults
|
||||
c.CtxSwitchVoluntary += c2.CtxSwitchVoluntary
|
||||
c.CtxSwitchNonvoluntary += c2.CtxSwitchNonvoluntary
|
||||
}
|
||||
|
||||
func (p procIdInfos) get(i int) Proc {
|
||||
return &p[i]
|
||||
// Sub subtracts c2 from the counts.
|
||||
func (c Counts) Sub(c2 Counts) Delta {
|
||||
c.CPUUserTime -= c2.CPUUserTime
|
||||
c.CPUSystemTime -= c2.CPUSystemTime
|
||||
c.ReadBytes -= c2.ReadBytes
|
||||
c.WriteBytes -= c2.WriteBytes
|
||||
c.MajorPageFaults -= c2.MajorPageFaults
|
||||
c.MinorPageFaults -= c2.MinorPageFaults
|
||||
c.CtxSwitchVoluntary -= c2.CtxSwitchVoluntary
|
||||
c.CtxSwitchNonvoluntary -= c2.CtxSwitchNonvoluntary
|
||||
return Delta(c)
|
||||
}
|
||||
|
||||
func (p procIdInfos) length() int {
|
||||
return len(p)
|
||||
func (s *States) Add(s2 States) {
|
||||
s.Other += s2.Other
|
||||
s.Running += s2.Running
|
||||
s.Sleeping += s2.Sleeping
|
||||
s.Waiting += s2.Waiting
|
||||
s.Zombie += s2.Zombie
|
||||
}
|
||||
|
||||
func (p ProcIdInfo) GetPid() int {
|
||||
return p.ProcId.Pid
|
||||
func (p IDInfo) GetThreads() ([]Thread, error) {
|
||||
return p.Threads, nil
|
||||
}
|
||||
|
||||
func (p ProcIdInfo) GetProcId() (ProcId, error) {
|
||||
return p.ProcId, nil
|
||||
// GetPid implements Proc.
|
||||
func (p IDInfo) GetPid() int {
|
||||
return p.ID.Pid
|
||||
}
|
||||
|
||||
func (p ProcIdInfo) GetStatic() (ProcStatic, error) {
|
||||
return p.ProcStatic, nil
|
||||
// GetProcID implements Proc.
|
||||
func (p IDInfo) GetProcID() (ID, error) {
|
||||
return p.ID, nil
|
||||
}
|
||||
|
||||
func (p ProcIdInfo) GetMetrics() (ProcMetrics, error) {
|
||||
return p.ProcMetrics, nil
|
||||
// GetStatic implements Proc.
|
||||
func (p IDInfo) GetStatic() (Static, error) {
|
||||
return p.Static, nil
|
||||
}
|
||||
|
||||
func (p procfsprocs) get(i int) Proc {
|
||||
return &proc{Proc: p.Procs[i], bootTime: p.bootTime}
|
||||
// GetCounts implements Proc.
|
||||
func (p IDInfo) GetCounts() (Counts, int, error) {
|
||||
return p.Metrics.Counts, 0, nil
|
||||
}
|
||||
|
||||
func (p procfsprocs) length() int {
|
||||
return len(p.Procs)
|
||||
// GetMetrics implements Proc.
|
||||
func (p IDInfo) GetMetrics() (Metrics, int, error) {
|
||||
return p.Metrics, 0, nil
|
||||
}
|
||||
|
||||
func (p *proc) GetPid() int {
|
||||
// GetStates implements Proc.
|
||||
func (p IDInfo) GetStates() (States, error) {
|
||||
return p.States, nil
|
||||
}
|
||||
|
||||
func (p IDInfo) GetWchan() (string, error) {
|
||||
return p.Wchan, nil
|
||||
}
|
||||
|
||||
func (p *proccache) GetPid() int {
|
||||
return p.Proc.PID
|
||||
}
|
||||
|
||||
func (p *proc) GetStat() (procfs.ProcStat, error) {
|
||||
func (p *proccache) getStat() (procfs.ProcStat, error) {
|
||||
if p.stat == nil {
|
||||
stat, err := p.Proc.NewStat()
|
||||
if err != nil {
|
||||
|
|
@ -190,19 +284,32 @@ func (p *proc) GetStat() (procfs.ProcStat, error) {
|
|||
return *p.stat, nil
|
||||
}
|
||||
|
||||
func (p *proc) GetProcId() (ProcId, error) {
|
||||
if p.procid == nil {
|
||||
stat, err := p.GetStat()
|
||||
func (p *proccache) getStatus() (procfs.ProcStatus, error) {
|
||||
if p.status == nil {
|
||||
status, err := p.Proc.NewStatus()
|
||||
if err != nil {
|
||||
return ProcId{}, err
|
||||
return procfs.ProcStatus{}, err
|
||||
}
|
||||
p.procid = &ProcId{Pid: p.GetPid(), StartTimeRel: stat.Starttime}
|
||||
p.status = &status
|
||||
}
|
||||
|
||||
return *p.status, nil
|
||||
}
|
||||
|
||||
// GetProcID implements Proc.
|
||||
func (p *proccache) GetProcID() (ID, error) {
|
||||
if p.procid == nil {
|
||||
stat, err := p.getStat()
|
||||
if err != nil {
|
||||
return ID{}, err
|
||||
}
|
||||
p.procid = &ID{Pid: p.GetPid(), StartTimeRel: stat.Starttime}
|
||||
}
|
||||
|
||||
return *p.procid, nil
|
||||
}
|
||||
|
||||
func (p *proc) GetCmdLine() ([]string, error) {
|
||||
func (p *proccache) getCmdLine() ([]string, error) {
|
||||
if p.cmdline == nil {
|
||||
cmdline, err := p.Proc.CmdLine()
|
||||
if err != nil {
|
||||
|
|
@ -213,7 +320,18 @@ func (p *proc) GetCmdLine() ([]string, error) {
|
|||
return p.cmdline, nil
|
||||
}
|
||||
|
||||
func (p *proc) GetIo() (procfs.ProcIO, error) {
|
||||
func (p *proccache) getWchan() (string, error) {
|
||||
if p.wchan == nil {
|
||||
wchan, err := p.Proc.Wchan()
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
p.wchan = &wchan
|
||||
}
|
||||
return *p.wchan, nil
|
||||
}
|
||||
|
||||
func (p *proccache) getIo() (procfs.ProcIO, error) {
|
||||
if p.io == nil {
|
||||
io, err := p.Proc.NewIO()
|
||||
if err != nil {
|
||||
|
|
@ -224,56 +342,199 @@ func (p *proc) GetIo() (procfs.ProcIO, error) {
|
|||
return *p.io, nil
|
||||
}
|
||||
|
||||
func (p proc) GetStatic() (ProcStatic, error) {
|
||||
cmdline, err := p.GetCmdLine()
|
||||
// GetStatic returns the ProcStatic corresponding to this proc.
|
||||
func (p *proccache) GetStatic() (Static, error) {
|
||||
// /proc/<pid>/cmdline is normally world-readable.
|
||||
cmdline, err := p.getCmdLine()
|
||||
if err != nil {
|
||||
return ProcStatic{}, err
|
||||
return Static{}, err
|
||||
}
|
||||
stat, err := p.GetStat()
|
||||
|
||||
// /proc/<pid>/stat is normally world-readable.
|
||||
stat, err := p.getStat()
|
||||
if err != nil {
|
||||
return ProcStatic{}, err
|
||||
return Static{}, err
|
||||
}
|
||||
startTime := time.Unix(int64(p.bootTime), 0)
|
||||
startTime := time.Unix(int64(p.fs.BootTime), 0).UTC()
|
||||
startTime = startTime.Add(time.Second / userHZ * time.Duration(stat.Starttime))
|
||||
return ProcStatic{
|
||||
Name: stat.Comm,
|
||||
Cmdline: cmdline,
|
||||
ParentPid: stat.PPID,
|
||||
StartTime: startTime,
|
||||
|
||||
// /proc/<pid>/status is normally world-readable.
|
||||
status, err := p.getStatus()
|
||||
if err != nil {
|
||||
return Static{}, err
|
||||
}
|
||||
|
||||
return Static{
|
||||
Name: stat.Comm,
|
||||
Cmdline: cmdline,
|
||||
ParentPid: stat.PPID,
|
||||
StartTime: startTime,
|
||||
EffectiveUID: status.UIDEffective,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (p proc) GetMetrics() (ProcMetrics, error) {
|
||||
io, err := p.GetIo()
|
||||
func (p proc) GetCounts() (Counts, int, error) {
|
||||
stat, err := p.getStat()
|
||||
if err != nil {
|
||||
return ProcMetrics{}, err
|
||||
if err == os.ErrNotExist {
|
||||
err = ErrProcNotExist
|
||||
}
|
||||
return Counts{}, 0, err
|
||||
}
|
||||
stat, err := p.GetStat()
|
||||
|
||||
status, err := p.getStatus()
|
||||
if err != nil {
|
||||
return ProcMetrics{}, err
|
||||
if err == os.ErrNotExist {
|
||||
err = ErrProcNotExist
|
||||
}
|
||||
return Counts{}, 0, err
|
||||
}
|
||||
|
||||
io, err := p.getIo()
|
||||
softerrors := 0
|
||||
if err != nil {
|
||||
softerrors++
|
||||
}
|
||||
return Counts{
|
||||
CPUUserTime: float64(stat.UTime) / userHZ,
|
||||
CPUSystemTime: float64(stat.STime) / userHZ,
|
||||
ReadBytes: io.ReadBytes,
|
||||
WriteBytes: io.WriteBytes,
|
||||
MajorPageFaults: uint64(stat.MajFlt),
|
||||
MinorPageFaults: uint64(stat.MinFlt),
|
||||
CtxSwitchVoluntary: uint64(status.VoluntaryCtxtSwitches),
|
||||
CtxSwitchNonvoluntary: uint64(status.NonvoluntaryCtxtSwitches),
|
||||
}, softerrors, nil
|
||||
}
|
||||
|
||||
func (p proc) GetWchan() (string, error) {
|
||||
return p.getWchan()
|
||||
}
|
||||
|
||||
func (p proc) GetStates() (States, error) {
|
||||
stat, err := p.getStat()
|
||||
if err != nil {
|
||||
return States{}, err
|
||||
}
|
||||
|
||||
var s States
|
||||
switch stat.State {
|
||||
case "R":
|
||||
s.Running++
|
||||
case "S":
|
||||
s.Sleeping++
|
||||
case "D":
|
||||
s.Waiting++
|
||||
case "Z":
|
||||
s.Zombie++
|
||||
default:
|
||||
s.Other++
|
||||
}
|
||||
return s, nil
|
||||
}
|
||||
|
||||
// GetMetrics returns the current metrics for the proc. The results are
|
||||
// not cached.
|
||||
func (p proc) GetMetrics() (Metrics, int, error) {
|
||||
counts, softerrors, err := p.GetCounts()
|
||||
if err != nil {
|
||||
return Metrics{}, 0, err
|
||||
}
|
||||
|
||||
// We don't need to check for error here because p will have cached
|
||||
// the successful result of calling getStat in GetCounts.
|
||||
// Since GetMetrics isn't a pointer receiver method, our callers
|
||||
// won't see the effect of the caching between calls.
|
||||
stat, _ := p.getStat()
|
||||
|
||||
// Ditto for states
|
||||
states, _ := p.GetStates()
|
||||
|
||||
status, err := p.getStatus()
|
||||
if err != nil {
|
||||
return Metrics{}, 0, err
|
||||
}
|
||||
|
||||
numfds, err := p.Proc.FileDescriptorsLen()
|
||||
if err != nil {
|
||||
return ProcMetrics{}, err
|
||||
numfds = -1
|
||||
softerrors |= 1
|
||||
}
|
||||
limits, err := p.NewLimits()
|
||||
|
||||
limits, err := p.Proc.NewLimits()
|
||||
if err != nil {
|
||||
return ProcMetrics{}, err
|
||||
return Metrics{}, 0, err
|
||||
}
|
||||
return ProcMetrics{
|
||||
CpuTime: stat.CPUTime(),
|
||||
ReadBytes: io.ReadBytes,
|
||||
WriteBytes: io.WriteBytes,
|
||||
ResidentBytes: uint64(stat.ResidentMemory()),
|
||||
VirtualBytes: uint64(stat.VirtualMemory()),
|
||||
OpenFDs: uint64(numfds),
|
||||
MaxFDs: uint64(limits.OpenFiles),
|
||||
}, nil
|
||||
|
||||
wchan, err := p.getWchan()
|
||||
if err != nil {
|
||||
softerrors |= 1
|
||||
}
|
||||
|
||||
return Metrics{
|
||||
Counts: counts,
|
||||
Memory: Memory{
|
||||
ResidentBytes: uint64(stat.ResidentMemory()),
|
||||
VirtualBytes: uint64(stat.VirtualMemory()),
|
||||
VmSwapBytes: uint64(status.VmSwapKB * 1024),
|
||||
},
|
||||
Filedesc: Filedesc{
|
||||
Open: int64(numfds),
|
||||
Limit: uint64(limits.OpenFiles),
|
||||
},
|
||||
NumThreads: uint64(stat.NumThreads),
|
||||
States: states,
|
||||
Wchan: wchan,
|
||||
}, softerrors, nil
|
||||
}
|
||||
|
||||
type FS struct {
|
||||
procfs.FS
|
||||
BootTime uint64
|
||||
func (p proc) GetThreads() ([]Thread, error) {
|
||||
fs, err := p.fs.threadFs(p.PID)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
threads := []Thread{}
|
||||
iter := fs.AllProcs()
|
||||
for iter.Next() {
|
||||
var id ID
|
||||
id, err = iter.GetProcID()
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
|
||||
var static Static
|
||||
static, err = iter.GetStatic()
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
|
||||
var counts Counts
|
||||
counts, _, err = iter.GetCounts()
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
|
||||
wchan, _ := iter.GetWchan()
|
||||
states, _ := iter.GetStates()
|
||||
|
||||
threads = append(threads, Thread{
|
||||
ThreadID: ThreadID(id),
|
||||
ThreadName: static.Name,
|
||||
Counts: counts,
|
||||
Wchan: wchan,
|
||||
States: states,
|
||||
})
|
||||
}
|
||||
err = iter.Close()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if len(threads) < 2 {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
return threads, nil
|
||||
}
|
||||
|
||||
// See https://github.com/prometheus/procfs/blob/master/proc_stat.go for details on userHZ.
|
||||
|
|
@ -281,7 +542,7 @@ const userHZ = 100
|
|||
|
||||
// NewFS returns a new FS mounted under the given mountPoint. It will error
|
||||
// if the mount point can't be read.
|
||||
func NewFS(mountPoint string) (*FS, error) {
|
||||
func NewFS(mountPoint string, debug bool) (*FS, error) {
|
||||
fs, err := procfs.NewFS(mountPoint)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
|
|
@ -290,17 +551,38 @@ func NewFS(mountPoint string) (*FS, error) {
|
|||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &FS{fs, stat.BootTime}, nil
|
||||
return &FS{fs, stat.BootTime, mountPoint, debug}, nil
|
||||
}
|
||||
|
||||
func (fs *FS) AllProcs() ProcIter {
|
||||
func (fs *FS) threadFs(pid int) (*FS, error) {
|
||||
mountPoint := filepath.Join(fs.MountPoint, strconv.Itoa(pid), "task")
|
||||
tfs, err := procfs.NewFS(mountPoint)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &FS{tfs, fs.BootTime, mountPoint, false}, nil
|
||||
}
|
||||
|
||||
// AllProcs implements Source.
|
||||
func (fs *FS) AllProcs() Iter {
|
||||
procs, err := fs.FS.AllProcs()
|
||||
if err != nil {
|
||||
err = fmt.Errorf("Error reading procs: %v", err)
|
||||
}
|
||||
return &procIterator{procs: procfsprocs{procs, fs.BootTime}, err: err, idx: -1}
|
||||
return &procIterator{procs: procfsprocs{procs, fs}, err: err, idx: -1}
|
||||
}
|
||||
|
||||
// get implements procs.
|
||||
func (p procfsprocs) get(i int) Proc {
|
||||
return &proc{proccache{Proc: p.Procs[i], fs: p.fs}}
|
||||
}
|
||||
|
||||
// length implements procs.
|
||||
func (p procfsprocs) length() int {
|
||||
return len(p.Procs)
|
||||
}
|
||||
|
||||
// Next implements Iter.
|
||||
func (pi *procIterator) Next() bool {
|
||||
pi.idx++
|
||||
if pi.idx < pi.procs.length() {
|
||||
|
|
@ -311,6 +593,7 @@ func (pi *procIterator) Next() bool {
|
|||
return pi.idx < pi.procs.length()
|
||||
}
|
||||
|
||||
// Close implements Iter.
|
||||
func (pi *procIterator) Close() error {
|
||||
pi.Next()
|
||||
pi.procs = nil
|
||||
|
|
|
|||
499
vendor/github.com/ncabatoff/process-exporter/proc/tracker.go
generated
vendored
499
vendor/github.com/ncabatoff/process-exporter/proc/tracker.go
generated
vendored
|
|
@ -2,179 +2,432 @@ package proc
|
|||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"log"
|
||||
"os/user"
|
||||
"strconv"
|
||||
"time"
|
||||
|
||||
seq "github.com/ncabatoff/go-seq/seq"
|
||||
common "github.com/ncabatoff/process-exporter"
|
||||
)
|
||||
|
||||
type (
|
||||
Counts struct {
|
||||
Cpu float64
|
||||
ReadBytes uint64
|
||||
WriteBytes uint64
|
||||
}
|
||||
|
||||
Memory struct {
|
||||
Resident uint64
|
||||
Virtual uint64
|
||||
}
|
||||
|
||||
Filedesc struct {
|
||||
Open uint64
|
||||
Limit uint64
|
||||
}
|
||||
|
||||
// Tracker tracks processes and records metrics.
|
||||
Tracker struct {
|
||||
// Tracked holds the processes are being monitored. Processes
|
||||
// namer determines what processes to track and names them
|
||||
namer common.MatchNamer
|
||||
// tracked holds the processes are being monitored. Processes
|
||||
// may be blacklisted such that they no longer get tracked by
|
||||
// setting their value in the Tracked map to nil.
|
||||
Tracked map[ProcId]*TrackedProc
|
||||
// ProcIds is a map from pid to ProcId. This is a convenience
|
||||
// setting their value in the tracked map to nil.
|
||||
tracked map[ID]*trackedProc
|
||||
// procIds is a map from pid to ProcId. This is a convenience
|
||||
// to allow finding the Tracked entry of a parent process.
|
||||
ProcIds map[int]ProcId
|
||||
procIds map[int]ID
|
||||
// trackChildren makes Tracker track descendants of procs the
|
||||
// namer wanted tracked.
|
||||
trackChildren bool
|
||||
// never ignore processes, i.e. always re-check untracked processes in case comm has changed
|
||||
alwaysRecheck bool
|
||||
username map[int]string
|
||||
debug bool
|
||||
}
|
||||
|
||||
// TrackedProc accumulates metrics for a process, as well as
|
||||
// Delta is an alias of Counts used to signal that its contents are not
|
||||
// totals, but rather the result of subtracting two totals.
|
||||
Delta Counts
|
||||
|
||||
trackedThread struct {
|
||||
name string
|
||||
accum Counts
|
||||
latest Delta
|
||||
lastUpdate time.Time
|
||||
wchan string
|
||||
}
|
||||
|
||||
// trackedProc accumulates metrics for a process, as well as
|
||||
// remembering an optional GroupName tag associated with it.
|
||||
TrackedProc struct {
|
||||
trackedProc struct {
|
||||
// lastUpdate is used internally during the update cycle to find which procs have exited
|
||||
lastUpdate time.Time
|
||||
// info is the most recently obtained info for this proc
|
||||
info ProcInfo
|
||||
// accum is the total CPU and IO accrued since we started tracking this proc
|
||||
accum Counts
|
||||
// lastaccum is the CPU and IO accrued in the last Update()
|
||||
lastaccum Counts
|
||||
// GroupName is an optional tag for this proc.
|
||||
GroupName string
|
||||
// static
|
||||
static Static
|
||||
metrics Metrics
|
||||
// lastaccum is the increment to the counters seen in the last update.
|
||||
lastaccum Delta
|
||||
// groupName is the tag for this proc given by the namer.
|
||||
groupName string
|
||||
threads map[ThreadID]trackedThread
|
||||
}
|
||||
|
||||
trackedStats struct {
|
||||
aggregate, latest Counts
|
||||
// ThreadUpdate describes what's changed for a thread since the last cycle.
|
||||
ThreadUpdate struct {
|
||||
// ThreadName is the name of the thread based on field of stat.
|
||||
ThreadName string
|
||||
// Latest is how much the counts increased since last cycle.
|
||||
Latest Delta
|
||||
}
|
||||
|
||||
// Update reports on the latest stats for a process.
|
||||
Update struct {
|
||||
// GroupName is the name given by the namer to the process.
|
||||
GroupName string
|
||||
// Latest is how much the counts increased since last cycle.
|
||||
Latest Delta
|
||||
// Memory is the current memory usage.
|
||||
Memory
|
||||
// Filedesc is the current fd usage/limit.
|
||||
Filedesc
|
||||
start time.Time
|
||||
// Start is the time the process started.
|
||||
Start time.Time
|
||||
// NumThreads is the number of threads.
|
||||
NumThreads uint64
|
||||
// States is how many processes are in which run state.
|
||||
States
|
||||
// Wchans is how many threads are in each non-zero wchan.
|
||||
Wchans map[string]int
|
||||
// Threads are the thread updates for this process.
|
||||
Threads []ThreadUpdate
|
||||
}
|
||||
|
||||
// CollectErrors describes non-fatal errors found while collecting proc
|
||||
// metrics.
|
||||
CollectErrors struct {
|
||||
// Read is incremented every time GetMetrics() returns an error.
|
||||
// This means we failed to load even the basics for the process,
|
||||
// and not just because it disappeared on us.
|
||||
Read int
|
||||
// Partial is incremented every time we're unable to collect
|
||||
// some metrics (e.g. I/O) for a tracked proc, but we're still able
|
||||
// to get the basic stuff like cmdline and core stats.
|
||||
Partial int
|
||||
}
|
||||
)
|
||||
|
||||
func (tp *TrackedProc) GetName() string {
|
||||
return tp.info.Name
|
||||
func lessUpdateGroupName(x, y Update) bool { return x.GroupName < y.GroupName }
|
||||
|
||||
func lessThreadUpdate(x, y ThreadUpdate) bool { return seq.Compare(x, y) < 0 }
|
||||
|
||||
func lessCounts(x, y Counts) bool { return seq.Compare(x, y) < 0 }
|
||||
|
||||
func (tp *trackedProc) getUpdate() Update {
|
||||
u := Update{
|
||||
GroupName: tp.groupName,
|
||||
Latest: tp.lastaccum,
|
||||
Memory: tp.metrics.Memory,
|
||||
Filedesc: tp.metrics.Filedesc,
|
||||
Start: tp.static.StartTime,
|
||||
NumThreads: tp.metrics.NumThreads,
|
||||
States: tp.metrics.States,
|
||||
Wchans: make(map[string]int),
|
||||
}
|
||||
if tp.metrics.Wchan != "" {
|
||||
u.Wchans[tp.metrics.Wchan] = 1
|
||||
}
|
||||
if len(tp.threads) > 1 {
|
||||
for _, tt := range tp.threads {
|
||||
u.Threads = append(u.Threads, ThreadUpdate{tt.name, tt.latest})
|
||||
if tt.wchan != "" {
|
||||
u.Wchans[tt.wchan]++
|
||||
}
|
||||
}
|
||||
}
|
||||
return u
|
||||
}
|
||||
|
||||
func (tp *TrackedProc) GetCmdLine() []string {
|
||||
return tp.info.Cmdline
|
||||
}
|
||||
|
||||
func (tp *TrackedProc) GetStats() trackedStats {
|
||||
mem := Memory{Resident: tp.info.ResidentBytes, Virtual: tp.info.VirtualBytes}
|
||||
fd := Filedesc{Open: tp.info.OpenFDs, Limit: tp.info.MaxFDs}
|
||||
return trackedStats{
|
||||
aggregate: tp.accum,
|
||||
latest: tp.lastaccum,
|
||||
Memory: mem,
|
||||
Filedesc: fd,
|
||||
start: tp.info.StartTime,
|
||||
// NewTracker creates a Tracker.
|
||||
func NewTracker(namer common.MatchNamer, trackChildren, alwaysRecheck, debug bool) *Tracker {
|
||||
return &Tracker{
|
||||
namer: namer,
|
||||
tracked: make(map[ID]*trackedProc),
|
||||
procIds: make(map[int]ID),
|
||||
trackChildren: trackChildren,
|
||||
alwaysRecheck: alwaysRecheck,
|
||||
username: make(map[int]string),
|
||||
debug: debug,
|
||||
}
|
||||
}
|
||||
|
||||
func NewTracker() *Tracker {
|
||||
return &Tracker{Tracked: make(map[ProcId]*TrackedProc), ProcIds: make(map[int]ProcId)}
|
||||
func (t *Tracker) track(groupName string, idinfo IDInfo) {
|
||||
tproc := trackedProc{
|
||||
groupName: groupName,
|
||||
static: idinfo.Static,
|
||||
metrics: idinfo.Metrics,
|
||||
}
|
||||
if len(idinfo.Threads) > 0 {
|
||||
tproc.threads = make(map[ThreadID]trackedThread)
|
||||
for _, thr := range idinfo.Threads {
|
||||
tproc.threads[thr.ThreadID] = trackedThread{
|
||||
thr.ThreadName, thr.Counts, Delta{}, time.Time{}, thr.Wchan}
|
||||
}
|
||||
}
|
||||
t.tracked[idinfo.ID] = &tproc
|
||||
}
|
||||
|
||||
func (t *Tracker) Track(groupName string, idinfo ProcIdInfo) {
|
||||
info := ProcInfo{idinfo.ProcStatic, idinfo.ProcMetrics}
|
||||
t.Tracked[idinfo.ProcId] = &TrackedProc{GroupName: groupName, info: info}
|
||||
func (t *Tracker) ignore(id ID) {
|
||||
// only ignore ID if we didn't set recheck to true
|
||||
if t.alwaysRecheck == false {
|
||||
t.tracked[id] = nil
|
||||
}
|
||||
}
|
||||
|
||||
func (t *Tracker) Ignore(id ProcId) {
|
||||
t.Tracked[id] = nil
|
||||
func (tp *trackedProc) update(metrics Metrics, now time.Time, cerrs *CollectErrors, threads []Thread) {
|
||||
// newcounts: resource consumption since last cycle
|
||||
newcounts := metrics.Counts
|
||||
tp.lastaccum = newcounts.Sub(tp.metrics.Counts)
|
||||
tp.metrics = metrics
|
||||
tp.lastUpdate = now
|
||||
if len(threads) > 1 {
|
||||
if tp.threads == nil {
|
||||
tp.threads = make(map[ThreadID]trackedThread)
|
||||
}
|
||||
for _, thr := range threads {
|
||||
tt := trackedThread{thr.ThreadName, thr.Counts, Delta{}, now, thr.Wchan}
|
||||
if old, ok := tp.threads[thr.ThreadID]; ok {
|
||||
tt.latest, tt.accum = thr.Counts.Sub(old.accum), thr.Counts
|
||||
}
|
||||
tp.threads[thr.ThreadID] = tt
|
||||
}
|
||||
for id, tt := range tp.threads {
|
||||
if tt.lastUpdate != now {
|
||||
delete(tp.threads, id)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
tp.threads = nil
|
||||
}
|
||||
}
|
||||
|
||||
// Scan procs and update metrics for those which are tracked. Processes that have gone
|
||||
// away get removed from the Tracked map. New processes are returned, along with the count
|
||||
// of permission errors.
|
||||
func (t *Tracker) Update(procs ProcIter) ([]ProcIdInfo, int, error) {
|
||||
now := time.Now()
|
||||
var newProcs []ProcIdInfo
|
||||
var permissionErrors int
|
||||
// handleProc updates the tracker if it's a known and not ignored proc.
|
||||
// If it's neither known nor ignored, newProc will be non-nil.
|
||||
// It is not an error if the process disappears while we are reading
|
||||
// its info out of /proc, it just means nothing will be returned and
|
||||
// the tracker will be unchanged.
|
||||
func (t *Tracker) handleProc(proc Proc, updateTime time.Time) (*IDInfo, CollectErrors) {
|
||||
var cerrs CollectErrors
|
||||
procID, err := proc.GetProcID()
|
||||
if err != nil {
|
||||
return nil, cerrs
|
||||
}
|
||||
|
||||
// Do nothing if we're ignoring this proc.
|
||||
last, known := t.tracked[procID]
|
||||
if known && last == nil {
|
||||
return nil, cerrs
|
||||
}
|
||||
|
||||
metrics, softerrors, err := proc.GetMetrics()
|
||||
if err != nil {
|
||||
if t.debug {
|
||||
log.Printf("error reading metrics for %+v: %v", procID, err)
|
||||
}
|
||||
// This usually happens due to the proc having exited, i.e.
|
||||
// we lost the race. We don't count that as an error.
|
||||
if err != ErrProcNotExist {
|
||||
cerrs.Read++
|
||||
}
|
||||
return nil, cerrs
|
||||
}
|
||||
|
||||
var threads []Thread
|
||||
threads, err = proc.GetThreads()
|
||||
if err != nil {
|
||||
softerrors |= 1
|
||||
}
|
||||
cerrs.Partial += softerrors
|
||||
|
||||
if len(threads) > 0 {
|
||||
metrics.Counts.CtxSwitchNonvoluntary, metrics.Counts.CtxSwitchVoluntary = 0, 0
|
||||
for _, thread := range threads {
|
||||
metrics.Counts.CtxSwitchNonvoluntary += thread.Counts.CtxSwitchNonvoluntary
|
||||
metrics.Counts.CtxSwitchVoluntary += thread.Counts.CtxSwitchVoluntary
|
||||
metrics.States.Add(thread.States)
|
||||
}
|
||||
}
|
||||
|
||||
var newProc *IDInfo
|
||||
if known {
|
||||
last.update(metrics, updateTime, &cerrs, threads)
|
||||
} else {
|
||||
static, err := proc.GetStatic()
|
||||
if err != nil {
|
||||
if t.debug {
|
||||
log.Printf("error reading static details for %+v: %v", procID, err)
|
||||
}
|
||||
return nil, cerrs
|
||||
}
|
||||
newProc = &IDInfo{procID, static, metrics, threads}
|
||||
if t.debug {
|
||||
log.Printf("found new proc: %s", newProc)
|
||||
}
|
||||
|
||||
// Is this a new process with the same pid as one we already know?
|
||||
// Then delete it from the known map, otherwise the cleanup in Update()
|
||||
// will remove the ProcIds entry we're creating here.
|
||||
if oldProcID, ok := t.procIds[procID.Pid]; ok {
|
||||
delete(t.tracked, oldProcID)
|
||||
}
|
||||
t.procIds[procID.Pid] = procID
|
||||
}
|
||||
return newProc, cerrs
|
||||
}
|
||||
|
||||
// update scans procs and updates metrics for those which are tracked. Processes
|
||||
// that have gone away get removed from the Tracked map. New processes are
|
||||
// returned, along with the count of nonfatal errors.
|
||||
func (t *Tracker) update(procs Iter) ([]IDInfo, CollectErrors, error) {
|
||||
var newProcs []IDInfo
|
||||
var colErrs CollectErrors
|
||||
var now = time.Now()
|
||||
|
||||
for procs.Next() {
|
||||
procId, err := procs.GetProcId()
|
||||
if err != nil {
|
||||
continue
|
||||
newProc, cerrs := t.handleProc(procs, now)
|
||||
if newProc != nil {
|
||||
newProcs = append(newProcs, *newProc)
|
||||
}
|
||||
|
||||
last, known := t.Tracked[procId]
|
||||
|
||||
// Are we ignoring this proc?
|
||||
if known && last == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
// TODO if just the io file is unreadable, should we still return the other metrics?
|
||||
metrics, err := procs.GetMetrics()
|
||||
if err != nil {
|
||||
if os.IsPermission(err) {
|
||||
permissionErrors++
|
||||
t.Ignore(procId)
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
if known {
|
||||
var newaccum, lastaccum Counts
|
||||
dcpu := metrics.CpuTime - last.info.CpuTime
|
||||
drbytes := metrics.ReadBytes - last.info.ReadBytes
|
||||
dwbytes := metrics.WriteBytes - last.info.WriteBytes
|
||||
|
||||
lastaccum = Counts{Cpu: dcpu, ReadBytes: drbytes, WriteBytes: dwbytes}
|
||||
newaccum = Counts{
|
||||
Cpu: last.accum.Cpu + lastaccum.Cpu,
|
||||
ReadBytes: last.accum.ReadBytes + lastaccum.ReadBytes,
|
||||
WriteBytes: last.accum.WriteBytes + lastaccum.WriteBytes,
|
||||
}
|
||||
|
||||
last.info.ProcMetrics = metrics
|
||||
last.lastUpdate = now
|
||||
last.accum = newaccum
|
||||
last.lastaccum = lastaccum
|
||||
} else {
|
||||
static, err := procs.GetStatic()
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
newProcs = append(newProcs, ProcIdInfo{procId, static, metrics})
|
||||
|
||||
// Is this a new process with the same pid as one we already know?
|
||||
if oldProcId, ok := t.ProcIds[procId.Pid]; ok {
|
||||
// Delete it from known, otherwise the cleanup below will remove the
|
||||
// ProcIds entry we're about to create
|
||||
delete(t.Tracked, oldProcId)
|
||||
}
|
||||
t.ProcIds[procId.Pid] = procId
|
||||
}
|
||||
|
||||
colErrs.Read += cerrs.Read
|
||||
colErrs.Partial += cerrs.Partial
|
||||
}
|
||||
|
||||
err := procs.Close()
|
||||
if err != nil {
|
||||
return nil, permissionErrors, fmt.Errorf("Error reading procs: %v", err)
|
||||
return nil, colErrs, fmt.Errorf("Error reading procs: %v", err)
|
||||
}
|
||||
|
||||
// Rather than allocating a new map each time to detect procs that have
|
||||
// disappeared, we bump the last update time on those that are still
|
||||
// present. Then as a second pass we traverse the map looking for
|
||||
// stale procs and removing them.
|
||||
for procId, pinfo := range t.Tracked {
|
||||
for procID, pinfo := range t.tracked {
|
||||
if pinfo == nil {
|
||||
// TODO is this a bug? we're not tracking the proc so we don't see it go away so ProcIds
|
||||
// and Tracked are leaking?
|
||||
continue
|
||||
}
|
||||
if pinfo.lastUpdate != now {
|
||||
delete(t.Tracked, procId)
|
||||
delete(t.ProcIds, procId.Pid)
|
||||
delete(t.tracked, procID)
|
||||
delete(t.procIds, procID.Pid)
|
||||
}
|
||||
}
|
||||
|
||||
return newProcs, permissionErrors, nil
|
||||
return newProcs, colErrs, nil
|
||||
}
|
||||
|
||||
// checkAncestry walks the process tree recursively towards the root,
|
||||
// stopping at pid 1 or upon finding a parent that's already tracked
|
||||
// or ignored. If we find a tracked parent track this one too; if not,
|
||||
// ignore this one.
|
||||
func (t *Tracker) checkAncestry(idinfo IDInfo, newprocs map[ID]IDInfo) string {
|
||||
ppid := idinfo.ParentPid
|
||||
pProcID := t.procIds[ppid]
|
||||
if pProcID.Pid < 1 {
|
||||
if t.debug {
|
||||
log.Printf("ignoring unmatched proc with no matched parent: %+v", idinfo)
|
||||
}
|
||||
// Reached root of process tree without finding a tracked parent.
|
||||
t.ignore(idinfo.ID)
|
||||
return ""
|
||||
}
|
||||
|
||||
// Is the parent already known to the tracker?
|
||||
if ptproc, ok := t.tracked[pProcID]; ok {
|
||||
if ptproc != nil {
|
||||
if t.debug {
|
||||
log.Printf("matched as %q because child of %+v: %+v",
|
||||
ptproc.groupName, pProcID, idinfo)
|
||||
}
|
||||
// We've found a tracked parent.
|
||||
t.track(ptproc.groupName, idinfo)
|
||||
return ptproc.groupName
|
||||
}
|
||||
// We've found an untracked parent.
|
||||
t.ignore(idinfo.ID)
|
||||
return ""
|
||||
}
|
||||
|
||||
// Is the parent another new process?
|
||||
if pinfoid, ok := newprocs[pProcID]; ok {
|
||||
if name := t.checkAncestry(pinfoid, newprocs); name != "" {
|
||||
if t.debug {
|
||||
log.Printf("matched as %q because child of %+v: %+v",
|
||||
name, pProcID, idinfo)
|
||||
}
|
||||
// We've found a tracked parent, which implies this entire lineage should be tracked.
|
||||
t.track(name, idinfo)
|
||||
return name
|
||||
}
|
||||
}
|
||||
|
||||
// Parent is dead, i.e. we never saw it, or there's no tracked proc in our ancestry.
|
||||
if t.debug {
|
||||
log.Printf("ignoring unmatched proc with no matched parent: %+v", idinfo)
|
||||
}
|
||||
t.ignore(idinfo.ID)
|
||||
return ""
|
||||
}
|
||||
|
||||
func (t *Tracker) lookupUid(uid int) string {
|
||||
if name, ok := t.username[uid]; ok {
|
||||
return name
|
||||
}
|
||||
|
||||
var name string
|
||||
uidstr := strconv.Itoa(uid)
|
||||
u, err := user.LookupId(uidstr)
|
||||
if err != nil {
|
||||
name = uidstr
|
||||
} else {
|
||||
name = u.Username
|
||||
}
|
||||
t.username[uid] = name
|
||||
return name
|
||||
}
|
||||
|
||||
// Update modifies the tracker's internal state based on what it reads from
|
||||
// iter. Tracks any new procs the namer wants tracked, and updates
|
||||
// its metrics for existing tracked procs. Returns nonfatal errors
|
||||
// and the status of all tracked procs, or an error if fatal.
|
||||
func (t *Tracker) Update(iter Iter) (CollectErrors, []Update, error) {
|
||||
newProcs, colErrs, err := t.update(iter)
|
||||
if err != nil {
|
||||
return colErrs, nil, err
|
||||
}
|
||||
|
||||
// Step 1: track any new proc that should be tracked based on its name and cmdline.
|
||||
untracked := make(map[ID]IDInfo)
|
||||
for _, idinfo := range newProcs {
|
||||
nacl := common.ProcAttributes{
|
||||
Name: idinfo.Name,
|
||||
Cmdline: idinfo.Cmdline,
|
||||
Username: t.lookupUid(idinfo.EffectiveUID),
|
||||
}
|
||||
wanted, gname := t.namer.MatchAndName(nacl)
|
||||
if wanted {
|
||||
if t.debug {
|
||||
log.Printf("matched as %q: %+v", gname, idinfo)
|
||||
}
|
||||
t.track(gname, idinfo)
|
||||
} else {
|
||||
untracked[idinfo.ID] = idinfo
|
||||
}
|
||||
}
|
||||
|
||||
// Step 2: track any untracked new proc that should be tracked because its parent is tracked.
|
||||
if t.trackChildren {
|
||||
for _, idinfo := range untracked {
|
||||
if _, ok := t.tracked[idinfo.ID]; ok {
|
||||
// Already tracked or ignored in an earlier iteration
|
||||
continue
|
||||
}
|
||||
|
||||
t.checkAncestry(idinfo, untracked)
|
||||
}
|
||||
}
|
||||
|
||||
tp := []Update{}
|
||||
for _, tproc := range t.tracked {
|
||||
if tproc != nil {
|
||||
tp = append(tp, tproc.getUpdate())
|
||||
}
|
||||
}
|
||||
return colErrs, tp, nil
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue