Update go dependencies

2018-12-05 13:27:09 -03:00 · 2018-12-05 13:27:09 -03:00 · f4a4daed84
commit f4a4daed84
parent 432f534383
1299 changed files with 71186 additions and 91183 deletions
--- a/vendor/github.com/ncabatoff/process-exporter/.gitignore
+++ b/vendor/github.com/ncabatoff/process-exporter/.gitignore
@ -1,4 +1,5 @@
 .*.sw?
 process-exporter
-.tarballs
-process-exporter-*.tar.gz
+load-generator
+integration-tester
+dist
--- a/vendor/github.com/ncabatoff/process-exporter/.goreleaser.yml
+++ b/vendor/github.com/ncabatoff/process-exporter/.goreleaser.yml
@ -0,0 +1,39 @@
+builds:
+  - main: cmd/process-exporter/main.go
+    binary: process-exporter
+    flags: -tags netgo
+    goos:
+      - linux
+    goarch:
+      - amd64
+      - 386
+      - arm
+      - arm64
+      - ppc64
+      - ppc64le
+archive:
+  name_template: "process-exporter-{{ .Version }}.{{ .Os }}-{{ .Arch }}{{ if .Arm }}v{{ .Arm }}{{ end }}"
+  wrap_in_directory: true
+nfpm:
+  homepage: https://github.com/ncabatoff/process-exporter
+  maintainer: nick.cabatoff+procexp@gmail.com
+  description: Prometheus exporter to report on processes running
+  license: MIT
+  formats:
+    - deb
+    - rpm
+  bindir: /usr/bin
+  files:
+    "packaging/process-exporter.service": "/lib/systemd/system/process-exporter.service"
+  config_files:
+    "packaging/conf/all.yaml": "/etc/process-exporter/all.yaml"
+  scripts:
+    postinstall: "packaging/scripts/postinstall.sh"
+    postremove: "packaging/scripts/postremove.sh"
+    preremove: "packaging/scripts/preremove.sh"
+release:
+  github:
+    owner: ncabatoff
+    name: process-exporter
+  draft: false
+  prerelease: true
--- a/vendor/github.com/ncabatoff/process-exporter/.promu.yml
+++ b/vendor/github.com/ncabatoff/process-exporter/.promu.yml
@ -1,35 +0,0 @@
-repository:
-    path: github.com/ncabatoff/process-exporter
-build:
-    binaries:
-        - name: process-exporter
-          path: ./cmd/process-exporter
-    flags: -a -tags netgo
-tarball:
-    files:
-        - LICENSE
-crossbuild:
-    platforms:
-        - linux/amd64
-        - linux/386
-        - darwin/amd64
-        - darwin/386
-        - freebsd/amd64
-        - freebsd/386
-        - openbsd/amd64
-        - openbsd/386
-        - netbsd/amd64
-        - netbsd/386
-        - dragonfly/amd64
-        - linux/arm
-        - linux/arm64
-        - freebsd/arm
-        # Temporarily deactivated as golang.org/x/sys does not have syscalls
-        # implemented for that os/platform combination.
-        #- openbsd/arm
-        #- linux/mips64
-        #- linux/mips64le
-        - netbsd/arm
-        - linux/ppc64
-        - linux/ppc64le
-
--- a/vendor/github.com/ncabatoff/process-exporter/.travis.yml
+++ b/vendor/github.com/ncabatoff/process-exporter/.travis.yml
@ -0,0 +1,29 @@
+services:
+  - docker
+
+language: go
+
+env:
+  - IMAGE_TAG=`echo $TRAVIS_TAG|sed s/v//`
+
+go:
+  - 1.10.x
+
+before_install:
+  - sudo apt-get -qq update
+  - sudo apt-get install -y rpm
+
+go_import_path: github.com/ncabatoff/process-exporter
+
+script:
+  - make style vet test build smoke docker
+  - if [ -n "$IMAGE_TAG" ]; then make docker DOCKER_IMAGE_TAG=$IMAGE_TAG; fi
+
+after_success:
+  - docker login -u $DOCKER_USER -p "$DOCKER_PASSWORD"
+  - >
+    test -n "$TRAVIS_TAG" && 
+    docker tag ncabatoff/process-exporter:$IMAGE_TAG ncabatoff/process-exporter:latest && 
+    docker push ncabatoff/process-exporter:$IMAGE_TAG && 
+    docker push ncabatoff/process-exporter:latest && 
+    curl -sL http://git.io/goreleaser | bash
--- a/vendor/github.com/ncabatoff/process-exporter/Dockerfile
+++ b/vendor/github.com/ncabatoff/process-exporter/Dockerfile
@ -1,17 +1,21 @@
 # Start from a Debian image with the latest version of Go installed
 # and a workspace (GOPATH) configured at /go.
-FROM golang
-
-# Copy the local package files to the container's workspace.
-ADD . /go/src/github.com/ncabatoff/process-exporter
+FROM golang:1.10 AS build
+#RUN curl -L -s https://github.com/golang/dep/releases/download/v0.5.0/dep-linux-amd64 -o $GOPATH/bin/dep
+#RUN chmod +x $GOPATH/bin/dep
+WORKDIR /go/src/github.com/ncabatoff/process-exporter
+ADD . .
+#RUN dep ensure

 # Build the process-exporter command inside the container.
-RUN make -C /go/src/github.com/ncabatoff/process-exporter
+RUN make 

-USER root
+FROM scratch
+
+COPY --from=build /go/src/github.com/ncabatoff/process-exporter/process-exporter /bin/process-exporter

 # Run the process-exporter command by default when the container starts.
-ENTRYPOINT ["/go/src/github.com/ncabatoff/process-exporter/process-exporter"]
+ENTRYPOINT ["/bin/process-exporter"]

 # Document that the service listens on port 9256.
 EXPOSE 9256
--- a/vendor/github.com/ncabatoff/process-exporter/Dockerfile.cloudbuild
+++ b/vendor/github.com/ncabatoff/process-exporter/Dockerfile.cloudbuild
@ -0,0 +1,4 @@
+FROM scratch
+COPY gopath/bin/process-exporter /process-exporter
+ENTRYPOINT ["/process-exporter"]
+EXPOSE 9256
--- a/vendor/github.com/ncabatoff/process-exporter/Gopkg.lock
+++ b/vendor/github.com/ncabatoff/process-exporter/Gopkg.lock
@ -3,73 +3,156 @@

 [[projects]]
  branch = "master"
+  digest = "1:d6afaeed1502aa28e80a4ed0981d570ad91b2579193404256ce672ed0a609e0d"
  name = "github.com/beorn7/perks"
  packages = ["quantile"]
-  revision = "4c0e84591b9aa9e6dcfdf3e020114cd81f89d5f9"
+  pruneopts = "UT"
+  revision = "3a771d992973f24aa725d07868b467d1ddfceafb"

 [[projects]]
-  branch = "master"
+  digest = "1:15042ad3498153684d09f393bbaec6b216c8eec6d61f63dff711de7d64ed8861"
  name = "github.com/golang/protobuf"
  packages = ["proto"]
-  revision = "17ce1425424ab154092bbb43af630bd647f3bb0d"
+  pruneopts = "UT"
+  revision = "b4deda0973fb4c70b50d226b1af49f3da59f5265"
+  version = "v1.1.0"

 [[projects]]
-  branch = "master"
-  name = "github.com/kylelemons/godebug"
-  packages = ["diff","pretty"]
-  revision = "d65d576e9348f5982d7f6d83682b694e731a45c6"
+  digest = "1:d2754cafcab0d22c13541618a8029a70a8959eb3525ff201fe971637e2274cd0"
+  name = "github.com/google/go-cmp"
+  packages = [
+    "cmp",
+    "cmp/cmpopts",
+    "cmp/internal/diff",
+    "cmp/internal/function",
+    "cmp/internal/value",
+  ]
+  pruneopts = "UT"
+  revision = "3af367b6b30c263d47e8895973edcca9a49cf029"
+  version = "v0.2.0"

 [[projects]]
+  digest = "1:ca955a9cd5b50b0f43d2cc3aeb35c951473eeca41b34eb67507f1dbcc0542394"
+  name = "github.com/kr/pretty"
+  packages = ["."]
+  pruneopts = "UT"
+  revision = "73f6ac0b30a98e433b289500d779f50c1a6f0712"
+  version = "v0.1.0"
+
+[[projects]]
+  digest = "1:15b5cc79aad436d47019f814fde81a10221c740dc8ddf769221a65097fb6c2e9"
+  name = "github.com/kr/text"
+  packages = ["."]
+  pruneopts = "UT"
+  revision = "e2ffdb16a802fe2bb95e2e35ff34f0e53aeef34f"
+  version = "v0.1.0"
+
+[[projects]]
+  digest = "1:ff5ebae34cfbf047d505ee150de27e60570e8c394b3b8fdbb720ff6ac71985fc"
  name = "github.com/matttproud/golang_protobuf_extensions"
  packages = ["pbutil"]
-  revision = "3247c84500bff8d9fb6d579d800f20b3e091582c"
-  version = "v1.0.0"
+  pruneopts = "UT"
+  revision = "c12348ce28de40eed0136aa2b644d0ee0650e56c"
+  version = "v1.0.1"

 [[projects]]
  branch = "master"
+  digest = "1:71520363c3acc43c35a2a53f79f6c61f110a026326c8b16dbdd351164765feac"
  name = "github.com/ncabatoff/fakescraper"
  packages = ["."]
+  pruneopts = "UT"
  revision = "15938421d91a82d197de7fc59aebcac65c43407d"

 [[projects]]
+  branch = "master"
+  digest = "1:9e33629d4ec9e9344715a54fa0a107f23ce800deb13999b0190df04c3540ccb5"
+  name = "github.com/ncabatoff/go-seq"
+  packages = ["seq"]
+  pruneopts = "UT"
+  revision = "b08ef85ed83364cba413c98a94bbd4169a0ce70b"
+
+[[projects]]
+  branch = "add-proc-status"
+  digest = "1:df5079557e0fa0fe9fb973f84fffd52e32ef26ada655900fdeea9b0848766c74"
+  name = "github.com/ncabatoff/procfs"
+  packages = [
+    ".",
+    "internal/util",
+    "nfs",
+    "xfs",
+  ]
+  pruneopts = "UT"
+  revision = "e1a38cb53622f65e073c5e750e6498a44ebfbd2a"
+
+[[projects]]
+  digest = "1:b6221ec0f8903b556e127c449e7106b63e6867170c2d10a7c058623d086f2081"
  name = "github.com/prometheus/client_golang"
  packages = ["prometheus"]
+  pruneopts = "UT"
  revision = "c5b7fccd204277076155f10851dad72b76a49317"
  version = "v0.8.0"

 [[projects]]
  branch = "master"
+  digest = "1:2d5cd61daa5565187e1d96bae64dbbc6080dacf741448e9629c64fd93203b0d4"
  name = "github.com/prometheus/client_model"
  packages = ["go"]
-  revision = "6f3806018612930941127f2a7c6c453ba2c527d2"
+  pruneopts = "UT"
+  revision = "5c3871d89910bfb32f5fcab2aa4b9ec68e65a99f"

 [[projects]]
  branch = "master"
+  digest = "1:63b68062b8968092eb86bedc4e68894bd096ea6b24920faca8b9dcf451f54bb5"
  name = "github.com/prometheus/common"
-  packages = ["expfmt","internal/bitbucket.org/ww/goautoneg","model"]
-  revision = "2f17f4a9d485bf34b4bfaccc273805040e4f86c8"
+  packages = [
+    "expfmt",
+    "internal/bitbucket.org/ww/goautoneg",
+    "model",
+  ]
+  pruneopts = "UT"
+  revision = "c7de2306084e37d54b8be01f3541a8464345e9a5"

 [[projects]]
  branch = "master"
+  digest = "1:8c49953a1414305f2ff5465147ee576dd705487c35b15918fcd4efdc0cb7a290"
  name = "github.com/prometheus/procfs"
-  packages = [".","xfs"]
-  revision = "e645f4e5aaa8506fc71d6edbc5c4ff02c04c46f2"
+  packages = [
+    ".",
+    "internal/util",
+    "nfs",
+    "xfs",
+  ]
+  pruneopts = "UT"
+  revision = "05ee40e3a273f7245e8777337fc7b46e533a9a92"

 [[projects]]
  branch = "v1"
+  digest = "1:af715ae33cc1f5695c4b2a4e4b21d008add8802a99e15bb467ac7c32edb5000d"
  name = "gopkg.in/check.v1"
  packages = ["."]
-  revision = "20d25e2804050c1cd24a7eea1e7a6447dd0e74ec"
+  pruneopts = "UT"
+  revision = "788fd78401277ebd861206a03c884797c6ec5541"

 [[projects]]
-  branch = "v2"
+  digest = "1:342378ac4dcb378a5448dd723f0784ae519383532f5e70ade24132c4c8693202"
  name = "gopkg.in/yaml.v2"
  packages = ["."]
-  revision = "eb3733d160e74a9c7e442f435eb3bea458e1d19f"
+  pruneopts = "UT"
+  revision = "5420a8b6744d3b0345ab293f6fcba19c978f1183"
+  version = "v2.2.1"

 [solve-meta]
  analyzer-name = "dep"
  analyzer-version = 1
-  inputs-digest = "abd920f891c3e5fe2ee27ce40acbdde66e0799704d160b01f22530df003adfe1"
+  input-imports = [
+    "github.com/google/go-cmp/cmp",
+    "github.com/google/go-cmp/cmp/cmpopts",
+    "github.com/ncabatoff/fakescraper",
+    "github.com/ncabatoff/go-seq/seq",
+    "github.com/ncabatoff/procfs",
+    "github.com/prometheus/client_golang/prometheus",
+    "gopkg.in/check.v1",
+    "gopkg.in/yaml.v2",
+  ]
  solver-name = "gps-cdcl"
  solver-version = 1
--- a/vendor/github.com/ncabatoff/process-exporter/Gopkg.toml
+++ b/vendor/github.com/ncabatoff/process-exporter/Gopkg.toml
@ -1,4 +1,3 @@
-
 # Gopkg.toml example
 #
 # Refer to https://github.com/golang/dep/blob/master/docs/Gopkg.toml.md
@ -17,30 +16,39 @@
 #   source = "github.com/myfork/project2"
 #
 # [[override]]
-#  name = "github.com/x/y"
-#  version = "2.4.0"
+#   name = "github.com/x/y"
+#   version = "2.4.0"
+#
+# [prune]
+#   non-go = false
+#   go-tests = true
+#   unused-packages = true


 [[constraint]]
-  branch = "master"
-  name = "github.com/kylelemons/godebug"
+  name = "github.com/google/go-cmp"
+  version = "0.2.0"

 [[constraint]]
  branch = "master"
  name = "github.com/ncabatoff/fakescraper"

 [[constraint]]
-  name = "github.com/prometheus/client_golang"
-  version = "0.8.0"
+  branch = "add-proc-status"
+  name = "github.com/ncabatoff/procfs"

 [[constraint]]
-  branch = "master"
-  name = "github.com/prometheus/procfs"
+  name = "github.com/prometheus/client_golang"
+  version = "0.8.0"

 [[constraint]]
  branch = "v1"
  name = "gopkg.in/check.v1"

 [[constraint]]
-  branch = "v2"
  name = "gopkg.in/yaml.v2"
+  version = "2.2.1"
+
+[prune]
+  go-tests = true
+  unused-packages = true
--- a/vendor/github.com/ncabatoff/process-exporter/Makefile
+++ b/vendor/github.com/ncabatoff/process-exporter/Makefile
@ -1,32 +1,12 @@
-# Copyright 2015 The Prometheus Authors
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-GO           := GO15VENDOREXPERIMENT=1 go
-FIRST_GOPATH := $(firstword $(subst :, ,$(shell $(GO) env GOPATH)))
-PROMU        := $(FIRST_GOPATH)/bin/promu
-pkgs          = $(shell $(GO) list ./... | grep -v /vendor/)
+pkgs          = $(shell go list ./... | grep -v /vendor/)

 PREFIX                  ?= $(shell pwd)
 BIN_DIR                 ?= $(shell pwd)
-DOCKER_IMAGE_NAME       ?= process-exporter
+DOCKER_IMAGE_NAME       ?= ncabatoff/process-exporter
 DOCKER_IMAGE_TAG        ?= $(subst /,-,$(shell git rev-parse --abbrev-ref HEAD))
+SMOKE_TEST = -config.path packaging/conf/all.yaml -once-to-stdout-delay 1s |grep -q 'namedprocess_namegroup_memory_bytes{groupname="process-exporte",memtype="virtual"}'

-ifdef DEBUG
-	bindata_flags = -debug
-endif
-
-
-all: format vet build test
+all: format vet test build smoke

 style:
 	@echo ">> checking code style"
@ -34,38 +14,37 @@ style:

 test:
 	@echo ">> running short tests"
-	@$(GO) test -short $(pkgs)
+	go test -short $(pkgs)

 format:
 	@echo ">> formatting code"
-	@$(GO) fmt $(pkgs)
+	go fmt $(pkgs)

 vet:
 	@echo ">> vetting code"
-	@$(GO) vet $(pkgs)
+	go vet $(pkgs)

-build: promu
-	@echo ">> building binaries"
-	@$(PROMU) build --prefix $(PREFIX)
+build:
+	@echo ">> building code"
+	cd cmd/process-exporter; CGO_ENABLED=0 go build -o ../../process-exporter -a -tags netgo

-tarball: promu
-	@echo ">> building release tarball"
-	@$(PROMU) tarball --prefix $(PREFIX) $(BIN_DIR)
+smoke:
+	@echo ">> smoke testing process-exporter"
+	./process-exporter $(SMOKE_TEST)

-crossbuild: promu
-	@echo ">> cross-building"
-	@$(PROMU) crossbuild
-	@$(PROMU) crossbuild tarballs
+integ:
+	@echo ">> integration testing process-exporter"
+	go build -o integration-tester cmd/integration-tester/main.go
+	go build -o load-generator cmd/load-generator/main.go
+	./integration-tester -write-size-bytes 65536
+
+install:
+	@echo ">> installing binary"
+	cd cmd/process-exporter; CGO_ENABLED=0 go install -a -tags netgo

 docker:
 	@echo ">> building docker image"
-	@docker build -t "$(DOCKER_IMAGE_NAME):$(DOCKER_IMAGE_TAG)" .
+	docker build -t "$(DOCKER_IMAGE_NAME):$(DOCKER_IMAGE_TAG)" .
+	docker run --rm -v `pwd`/packaging:/packaging "$(DOCKER_IMAGE_NAME):$(DOCKER_IMAGE_TAG)" $(SMOKE_TEST)

-promu:
-	@echo ">> fetching promu"
-	@GOOS=$(shell uname -s | tr A-Z a-z) \
-	GOARCH=$(subst x86_64,amd64,$(patsubst i%86,386,$(patsubst arm%,arm,$(shell uname -m)))) \
-	$(GO) get -u github.com/prometheus/promu
-
-
-.PHONY: all style format build test vet tarball crossbuild docker promu
+.PHONY: all style format test vet build integ docker
--- a/vendor/github.com/ncabatoff/process-exporter/README.md
+++ b/vendor/github.com/ncabatoff/process-exporter/README.md
@ -1,64 +1,111 @@
 # process-exporter
 Prometheus exporter that mines /proc to report on selected processes.

-The premise for this exporter is that sometimes you have apps that are
-impractical to instrument directly, either because you don't control the code
-or they're written in a language that isn't easy to instrument with Prometheus.
-A fair bit of information can be gleaned from /proc, especially for
-long-running programs.
+[release]: https://github.com/ncabatoff/process-exporter/releases/latest

-For most systems it won't be beneficial to create metrics for every process by
-name: there are just too many of them and most don't do enough to merit it.
-Various command-line options are provided to control how processes are grouped
-and the groups are named.  Run "process-exporter -man" to see a help page
-giving details.
+[![Release](https://img.shields.io/github/release/ncabatoff/process-exporter.svg?style=flat-square")][release]
+[![Build Status](https://travis-ci.org/ncabatoff/process-exporter.svg?branch=master)](https://travis-ci.org/ncabatoff/process-exporter)
+[![Powered By: GoReleaser](https://img.shields.io/badge/powered%20by-goreleaser-green.svg?branch=master)](https://github.com/goreleaser)

-Metrics available currently include CPU usage, bytes written and read, and
-number of processes in each group.  
+Some apps are impractical to instrument directly, either because you
+don't control the code or they're written in a language that isn't easy to
+instrument with Prometheus.  We must instead resort to mining /proc.

-Bytes read and written come from /proc/[pid]/io in recent enough kernels.
-These correspond to the fields `read_bytes` and `write_bytes` respectively.
-These IO stats come with plenty of caveats, see either the Linux kernel 
-documentation or man 5 proc.
+## Installation

-CPU usage comes from /proc/[pid]/stat fields utime (user time) and stime (system
-time.)  It has been translated into fractional seconds of CPU consumed.  Since
-it is a counter, using rate() will tell you how many fractional cores were running
-code from this process during the interval given.
+Either grab a package for your OS from the [Releases][release] page, or
+install via [docker](https://hub.docker.com/r/ncabatoff/process-exporter/).

-An example Grafana dashboard to view the metrics is available at https://grafana.net/dashboards/249
+## Running

-## Instrumentation cost
+Usage:

-process-exporter will consume CPU in proportion to the number of processes in
-the system and the rate at which new ones are created.  The most expensive
-parts - applying regexps and executing templates - are only applied once per
-process seen.  If you have mostly long-running processes process-exporter
-should be lightweight: each time a scrape occurs, parsing of /proc/$pid/stat
-and /proc/$pid/cmdline for every process being monitored and adding a few
-numbers.
+```
+  process-exporter [options] -config.path filename.yml
+```

-## Config
+or via docker:
+
+```
+  docker run -d --rm -p 9256:9256 --privileged -v /proc:/host/proc -v `pwd`:/config ncabatoff/process-exporter --procfs /host/proc -config.path /config/filename.yml
+
+```
+
+Important options (run process-exporter --help for full list):
+
+-children (default:true) makes it so that any process that otherwise
+isn't part of its own group becomes part of the first group found (if any) when
+walking the process tree upwards.  In other words, resource usage of
+subprocesses is added to their parent's usage unless the subprocess identifies
+as a different group name.
+
+-recheck (default:false) means that on each scrape the process names are
+re-evaluated. This is disabled by default as an optimization, but since
+processes can choose to change their names, this may result in a process
+falling into the wrong group if we happen to see it for the first time before
+it's assumed its proper name.
+
+-procnames is intended as a quick alternative to using a config file.  Details
+in the following section.
+
+## Configuration and group naming

 To select and group the processes to monitor, either provide command-line
 arguments or use a YAML configuration file. 

-To avoid confusion with the cmdline YAML element, we'll refer to the
-null-delimited contents of `/proc/<pid>/cmdline` as the array `argv[]`.
+The recommended option is to use a config file via -config.path, but for
+convenience and backwards compatability the -procnames/-namemapping options
+exist as an alternative.
+
+### Using a config file
+
+The general format of the -config.path YAML file is a top-level
+`process_names` section, containing a list of name matchers:
+
+```
+process_names:
+  - matcher1
+  - matcher2
+  ...
+  - matcherN
+```
+
+The default config shipped with the deb/rpm packages is:
+
+```
+process_names:
+  - name: "{{.Comm}}"
+    cmdline: 
+    - '.+'
+```
+
+A process may only belong to one group: even if multiple items would match, the
+first one listed in the file wins.
+
+(Side note: to avoid confusion with the cmdline YAML element, we'll refer to
+the command-line arguments of a process `/proc/<pid>/cmdline` as the array
+`argv[]`.)
+
+#### Using a config file: group name

 Each item in `process_names` gives a recipe for identifying and naming
 processes.  The optional `name` tag defines a template to use to name
 matching processes; if not specified, `name` defaults to `{{.ExeBase}}`.

 Template variables available:
+- `{{.Comm}}` contains the basename of the original executable, i.e. 2nd field in `/proc/<pid>/stat`
 - `{{.ExeBase}}` contains the basename of the executable
 - `{{.ExeFull}}` contains the fully qualified path of the executable
+- `{{.Username}}` contains the username of the effective user
 - `{{.Matches}}` map contains all the matches resulting from applying cmdline regexps

+#### Using a config file: process selectors
+
 Each item in `process_names` must contain one or more selectors (`comm`, `exe`
 or `cmdline`); if more than one selector is present, they must all match.  Each
 selector is a list of strings to match against a process's `comm`, `argv[0]`,
-or in the case of `cmdline`, a regexp to apply to the command line.  
+or in the case of `cmdline`, a regexp to apply to the command line.  The cmdline
+regexp uses the [Go syntax](https://golang.org/pkg/regexp).

 For `comm` and `exe`, the list of strings is an OR, meaning any process
 matching any of the strings will be added to the item's group.  
@ -67,10 +114,7 @@ For `cmdline`, the list of regexes is an AND, meaning they all must match.  Any
 capturing groups in a regexp must use the `?P<name>` option to assign a name to
 the capture, which is used to populate `.Matches`.

-A process may only belong to one group: even if multiple items would match, the
-first one listed in the file wins.
-
-Other performance tips: give an exe or comm clause in addition to any cmdline
+Performance tip: give an exe or comm clause in addition to any cmdline
 clause, so you avoid executing the regexp when the executable name doesn't
 match.

@ -95,8 +139,7 @@ process_names:
    exe: 
    - /usr/local/bin/process-exporter
    cmdline: 
-    - -config.path\\s+(?P<Cfgfile>\\S+)
-    
+    - -config.path\s+(?P<Cfgfile>\S+)

 ```

@ -118,43 +161,195 @@ process_names:

 ```

-## Docker
+### Using -procnames/-namemapping instead of config.path

-A docker image can be created with
+Every name in the procnames list becomes a process group. The default name of
+a process is the value found in the second field of /proc/<pid>/stat
+("comm"), which is truncated at 15 chars.  Usually this is the same as the
+name of the executable.
+
+If -namemapping isn't provided, every process with a comm value present
+in -procnames is assigned to a group based on that name, and any other 
+processes are ignored.
+
+The -namemapping option is a comma-separated list of alternating 
+name,regexp values. It allows assigning a name to a process based on a
+combination of the process name and command line. For example, using
+
+  -namemapping "python2,([^/]+)\.py,java,-jar\s+([^/]+).jar" 
+
+will make it so that each different python2 and java -jar invocation will be
+tracked with distinct metrics. Processes whose remapped name is absent from
+the procnames list will be ignored. On a Ubuntu Xenian machine being used as
+a workstation, here's a good way of tracking resource usage for a few
+different key user apps:
+
+  process-exporter -namemapping "upstart,(--user)" \
+    -procnames chromium-browse,bash,gvim,prometheus,process-exporter,upstart:-user
+
+Since upstart --user is the parent process of the X11 session, this will
+make all apps started by the user fall into the group named "upstart:-user",
+unless they're one of the others named explicitly with -procnames, like gvim.
+
+## Group Metrics
+
+There's no meaningful way to name a process that will only ever name a single process, so process-exporter assumes that every metric will be attached
+to a group of processes - not a 
+[process group](https://en.wikipedia.org/wiki/Process_group) in the technical
+sense, just one or more processes that meet a configuration's specification
+of what should be monitored and how to name it.
+
+All these metrics start with `namedprocess_namegroup_` and have at minimum
+the label `groupname`.
+
+### num_procs gauge
+
+Number of processes in this group.
+
+### cpu_user_seconds_total counter
+
+CPU usage based on /proc/[pid]/stat field utime(14) i.e. user time.
+A value of 1 indicates that the processes in this group have been scheduled
+in user mode for a total of 1 second on a single virtual CPU.
+
+### cpu_system_seconds_total counter
+
+CPU usage based on /proc/[pid]/stat field stime(15) i.e. system time.
+
+### read_bytes_total counter
+
+Bytes read based on /proc/[pid]/io field read_bytes.  The man page
+says 
+
+> Attempt to count the number of bytes which this process really did cause to be fetched from the storage layer.  This is accurate for block-backed filesystems.
+
+but I would take it with a grain of salt.
+
+### write_bytes_total counter
+
+Bytes written based on /proc/[pid]/io field write_bytes.  As with 
+read_bytes, somewhat dubious.  May be useful for isolating which processes
+are doing the most I/O, but probably not measuring just how much I/O is happening.
+
+### major_page_faults_total counter
+
+Number of major page faults based on /proc/[pid]/stat field majflt(12).
+
+### minor_page_faults_total counter
+
+Number of minor page faults based on /proc/[pid]/stat field minflt(10).
+
+### context_switches_total counter
+
+Number of context switches based on /proc/[pid]/status fields voluntary_ctxt_switches
+and nonvoluntary_ctxt_switches.  The extra label `ctxswitchtype` can have two values:
+`voluntary` and `nonvoluntary`.
+
+### memory_bytes gauge
+
+Number of bytes of memory used.  The extra label `memtype` can have two values:
+
+*resident*: Field rss(24) from /proc/[pid]/stat, whose doc says:
+
+> This is just the pages which count toward text, data, or stack space.  This does not include pages which have not been demand-loaded in, or which are swapped out.
+
+*virtual*: Field vsize(23) from /proc/[pid]/stat, virtual memory size.
+
+*swapped*: Field VmSwap from /proc/[pid]/status, translated from KB to bytes.
+
+### open_filedesc gauge
+
+Number of file descriptors, based on counting how many entries are in the directory
+/proc/[pid]/fd.
+
+### worst_fd_ratio gauge
+
+Worst ratio of open filedescs to filedesc limit, amongst all the procs in the
+group. The limit is the fd soft limit based on /proc/[pid]/limits. 
+
+Normally Prometheus metrics ought to be as "basic" as possible (i.e. the raw
+values rather than a derived ratio), but we use a ratio here because nothing
+else makes sense. Suppose there are 10 procs in a given group, each with a
+soft limit of 4096, and one of them has 4000 open fds and the others all have
+40, their total fdcount is 4360 and total soft limit is 40960, so the ratio
+is 1:10, but in fact one of the procs is about to run out of fds. With
+worst_fd_ratio we're able to know this: in the above example it would be
+0.97, rather than the 0.10 you'd see if you computed sum(open_filedesc) /
+sum(limit_filedesc).
+
+### oldest_start_time_seconds gauge
+
+Epoch time (seconds since 1970/1/1) at which the oldest process in the group
+started.  This is derived from field starttime(22) from /proc/[pid]/stat, added
+to boot time to make it relative to epoch.
+
+### num_threads gauge
+
+Sum of number of threads of all process in the group.  Based on field num_threads(20)
+from /proc/[pid]/stat.
+
+### states gauge
+
+Number of threads in the group in each of various states, based on the field
+state(3) from /proc/[pid]/stat.
+
+The extra label `state` can have these values: `Running`, `Sleeping`, `Waiting`, `Zombie`, `Other`.
+
+## Group Thread Metrics
+
+All these metrics start with `namedprocess_namegroup_` and have at minimum
+the labels `groupname` and `threadname`.  `threadname` is field comm(2) from
+/proc/[pid]/stat.  Just as groupname breaks the set of processes down into
+groups, threadname breaks a given process group down into subgroups.
+
+### thread_count gauge
+
+Number of threads in this thread subgroup.
+
+### thread_cpu_seconds_total counter
+
+Same as cpu_user_seconds_total and cpu_system_seconds_total, but broken down
+per-thread subgroup.  Unlike cpu_user_seconds_total/cpu_system_seconds_total,
+the label `cpumode` is used to distinguish between `user` and `system` time.
+
+### thread_io_bytes_total counter
+
+Same as read_bytes_total and write_bytes_total, but broken down
+per-thread subgroup.  Unlike read_bytes_total/write_bytes_total, 
+the label `iomode` is used to distinguish between `read` and `write` bytes.
+
+### thread_major_page_faults_total counter
+
+Same as major_page_faults_total, but broken down per-thread subgroup.
+
+### thread_minor_page_faults_total counter
+
+Same as minor_page_faults_total, but broken down per-thread subgroup.
+
+### thread_context_switches_total counter
+
+Same as context_switches_total, but broken down per-thread subgroup.
+
+## Instrumentation cost
+
+process-exporter will consume CPU in proportion to the number of processes in
+the system and the rate at which new ones are created.  The most expensive
+parts - applying regexps and executing templates - are only applied once per
+process seen, unless the command-line option -recheck is provided.
+
+If you have mostly long-running processes process-exporter overhead should be
+minimal: each time a scrape occurs, it will parse of /proc/$pid/stat and
+/proc/$pid/cmdline for every process being monitored and add a few numbers.
+
+## Dashboards
+
+An example Grafana dashboard to view the metrics is available at https://grafana.net/dashboards/249
+
+## Building
+
+Install [dep](https://github.com/golang/dep), then:

 ```
-make docker
+dep ensure
+make
 ```
-
-Then run the docker, e.g.
-
-```
-docker run --privileged --name pexporter -d -v /proc:/host/proc -p 127.0.0.1:9256:9256 process-exporter:master -procfs /host/proc -procnames chromium-browse,bash,prometheus,gvim,upstart:-user -namemapping "upstart,(-user)"
-```
-
-This will expose metrics on http://localhost:9256/metrics.  Leave off the
-`127.0.0.1:` to publish on all interfaces.  Leave off the --priviliged and
-add the --user docker run argument if you only need to monitor processes
-belonging to a single user.
-
-## History
-
-An earlier version of this exporter had options to enable auto-discovery of
-which processes were consuming resources.  This functionality has been removed.
-These options were based on a percentage of resource usage, e.g. if an
-untracked process consumed X% of CPU during a scrape, start tracking processes
-with that name.  However during any given scrape it's likely that most
-processes are idle, so we could add a process that consumes minimal resources
-but which happened to be active during the interval preceding the current
-scrape.  Over time this means that a great many processes wind up being
-scraped, which becomes unmanageable to visualize.  This could be mitigated by
-looking at resource usage over longer intervals, but ultimately I didn't feel
-this feature was important enough to invest more time in at this point.  It may
-re-appear at some point in the future, but no promises.
-
-Another lost feature: the "other" group was used to count usage by non-tracked
-procs.  This was useful to get an idea of what wasn't being monitored.  But it
-comes at a high cost: if you know what processes you care about, you're wasting
-a lot of CPU to compute the usage of everything else that you don't care about.
-The new approach is to minimize resources expended on non-tracked processes and
-to require the user to whitelist the processes to track.  
--- a/vendor/github.com/ncabatoff/process-exporter/VERSION
+++ b/vendor/github.com/ncabatoff/process-exporter/VERSION
@ -1 +0,0 @@
-0.1.0
--- a/vendor/github.com/ncabatoff/process-exporter/cloudbuild.release.yaml
+++ b/vendor/github.com/ncabatoff/process-exporter/cloudbuild.release.yaml
@ -0,0 +1,49 @@
+steps:
+# - name: string
+#   args: string
+#   env: string
+#   dir: string
+#   id: string
+#   waitFor: string
+#   entrypoint: string
+#   secretEnv: string
+
+  # Setup the workspace
+  - name: gcr.io/cloud-builders/go
+    env: ['PROJECT_ROOT=github.com/ncabatoff/process-exporter']
+    args: ['env']
+
+  # Build project
+  - name: gcr.io/cloud-builders/docker
+    entrypoint: 'bash'
+    args: ['-c', 'docker build -t ncabatoff/process-exporter:`echo $TAG_NAME|sed s/^v//` .']
+
+  # Login to docker hub
+  - name: gcr.io/cloud-builders/docker
+    entrypoint: 'bash'
+    args: ['-c', 'docker login --username=ncabatoff --password=$$DOCKER_PASSWORD']
+    secretEnv: ['DOCKER_PASSWORD']
+
+  # Push to docker hub
+  - name: gcr.io/cloud-builders/docker
+    entrypoint: 'bash'
+    args: ['-c', 'docker push ncabatoff/process-exporter:`echo $TAG_NAME|sed s/^v//`']
+
+  # Create github release
+  - name: goreleaser/goreleaser
+    entrypoint: /bin/sh
+    dir: gopath/src/github.com
+    env: ['GOPATH=/workspace/gopath']
+    args: ['-c', 'cd ncabatoff/process-exporter && git tag $TAG_NAME && /goreleaser' ]
+    secretEnv: ['GITHUB_TOKEN']
+
+secrets:
+  - kmsKeyName: projects/process-exporter/locations/global/keyRings/cloudbuild/cryptoKeys/mykey
+    secretEnv:
+      DOCKER_PASSWORD: |
+        CiQAeHUuEinm1h2j9mp8r0NjPw1l1bBwzDG+JHPUPf3GvtmdjXESMAD3wUauaxWrxid/zPunG67x
+        5+1CYedV5exh0XwQ32eu4UkniS7HHJNWBudklaG0JA==
+      GITHUB_TOKEN: |
+        CiQAeHUuEhEKAvfIHlUZrCgHNScm0mDKI8Z1w/N3OzDk8Ql6kAUSUQD3wUau7qRc+H7OnTUo6b2Z
+        DKA1eMKHNg729KfHj2ZMqZXinrJloYMbZcZRXP9xv91xCq6QJB5UoFoyYDnXGdvgXC08YUstR6UB
+        H0bwHhe1GQ==
--- a/vendor/github.com/ncabatoff/process-exporter/cloudbuild.yaml
+++ b/vendor/github.com/ncabatoff/process-exporter/cloudbuild.yaml
@ -0,0 +1,32 @@
+steps:
+# - name: string
+#   args: string
+#   env: string
+#   dir: string
+#   id: string
+#   waitFor: string
+#   entrypoint: string
+#   secretEnv: string
+# - name: gcr.io/cloud-builders/curl
+#   args: ['-L', '-s', '-o', 'dep', 'https://github.com/golang/dep/releases/download/v0.5.0/dep-linux-amd64']
+# - name: ubuntu
+#   args: ['chmod', '+x', 'dep']
+  # Setup the workspace
+  - name: gcr.io/cloud-builders/go
+    env: ['PROJECT_ROOT=github.com/ncabatoff/process-exporter']
+    args: ['env']
+  # Run dep in the workspace created in previous step
+# - name: gcr.io/cloud-builders/go
+#   entrypoint: /bin/sh
+#   dir: gopath/src/github.com
+#   env: ['GOPATH=/workspace/gopath']
+#   args: ['-c', 'cd ncabatoff/process-exporter && /workspace/dep ensure -vendor-only' ]
+  - name: gcr.io/cloud-builders/go
+    entrypoint: /bin/sh
+    dir: gopath/src/github.com
+    env: ['GOPATH=/workspace/gopath']
+    args: ['-c', 'make -C ncabatoff/process-exporter style vet test build integ install' ]
+  - name: gcr.io/cloud-builders/docker
+    args: ['build', '--tag=gcr.io/$PROJECT_ID/process-exporter', '.', '-f', 'Dockerfile.cloudbuild']
+images: ['gcr.io/$PROJECT_ID/process-exporter']
+    
--- a/vendor/github.com/ncabatoff/process-exporter/common.go
+++ b/vendor/github.com/ncabatoff/process-exporter/common.go
@ -1,14 +1,18 @@
 package common

+import "fmt"
+
 type (
-	NameAndCmdline struct {
-		Name    string
-		Cmdline []string
+	ProcAttributes struct {
+		Name     string
+		Cmdline  []string
+		Username string
 	}

 	MatchNamer interface {
 		// MatchAndName returns false if the match failed, otherwise
 		// true and the resulting name.
-		MatchAndName(NameAndCmdline) (bool, string)
+		MatchAndName(ProcAttributes) (bool, string)
+		fmt.Stringer
 	}
 )
--- a/vendor/github.com/ncabatoff/process-exporter/proc/grouper.go
+++ b/vendor/github.com/ncabatoff/process-exporter/proc/grouper.go
@ -1,173 +1,179 @@
 package proc

 import (
-	common "github.com/ncabatoff/process-exporter"
 	"time"
+
+	seq "github.com/ncabatoff/go-seq/seq"
+	common "github.com/ncabatoff/process-exporter"
 )

 type (
+	// Grouper is the top-level interface to the process metrics.  All tracked
+	// procs sharing the same group name are aggregated.
 	Grouper struct {
-		namer         common.MatchNamer
-		trackChildren bool
-		// track how much was seen last time so we can report the delta
-		GroupStats map[string]Counts
-		tracker    *Tracker
+		// groupAccum records the historical accumulation of a group so that
+		// we can avoid ever decreasing the counts we return.
+		groupAccum  map[string]Counts
+		tracker     *Tracker
+		threadAccum map[string]map[string]Threads
+		debug       bool
 	}

-	GroupCountMap map[string]GroupCounts
+	// GroupByName maps group name to group metrics.
+	GroupByName map[string]Group

-	GroupCounts struct {
+	// Threads collects metrics for threads in a group sharing a thread name.
+	Threads struct {
+		Name       string
+		NumThreads int
 		Counts
-		Procs           int
-		Memresident     uint64
-		Memvirtual      uint64
+	}
+
+	// Group describes the metrics of a single group.
+	Group struct {
+		Counts
+		States
+		Wchans map[string]int
+		Procs  int
+		Memory
 		OldestStartTime time.Time
 		OpenFDs         uint64
 		WorstFDratio    float64
+		NumThreads      uint64
+		Threads         []Threads
 	}
 )

-func NewGrouper(trackChildren bool, namer common.MatchNamer) *Grouper {
+// Returns true if x < y.  Test designers should ensure they always have
+// a unique name/numthreads combination for each group.
+func lessThreads(x, y Threads) bool { return seq.Compare(x, y) < 0 }
+
+// NewGrouper creates a grouper.
+func NewGrouper(namer common.MatchNamer, trackChildren, alwaysRecheck, debug bool) *Grouper {
 	g := Grouper{
-		trackChildren: trackChildren,
-		namer:         namer,
-		GroupStats:    make(map[string]Counts),
-		tracker:       NewTracker(),
+		groupAccum:  make(map[string]Counts),
+		threadAccum: make(map[string]map[string]Threads),
+		tracker:     NewTracker(namer, trackChildren, alwaysRecheck, debug),
+		debug:       debug,
 	}
 	return &g
 }

-func (g *Grouper) checkAncestry(idinfo ProcIdInfo, newprocs map[ProcId]ProcIdInfo) string {
-	ppid := idinfo.ParentPid
-	pProcId := g.tracker.ProcIds[ppid]
-	if pProcId.Pid < 1 {
-		// Reached root of process tree without finding a tracked parent.
-		g.tracker.Ignore(idinfo.ProcId)
-		return ""
-	}
-
-	// Is the parent already known to the tracker?
-	if ptproc, ok := g.tracker.Tracked[pProcId]; ok {
-		if ptproc != nil {
-			// We've found a tracked parent.
-			g.tracker.Track(ptproc.GroupName, idinfo)
-			return ptproc.GroupName
-		} else {
-			// We've found an untracked parent.
-			g.tracker.Ignore(idinfo.ProcId)
-			return ""
-		}
-	}
-
-	// Is the parent another new process?
-	if pinfoid, ok := newprocs[pProcId]; ok {
-		if name := g.checkAncestry(pinfoid, newprocs); name != "" {
-			// We've found a tracked parent, which implies this entire lineage should be tracked.
-			g.tracker.Track(name, idinfo)
-			return name
-		}
-	}
-
-	// Parent is dead, i.e. we never saw it, or there's no tracked proc in our ancestry.
-	g.tracker.Ignore(idinfo.ProcId)
-	return ""
-
-}
-
-// Update tracks any new procs that should be according to policy, and updates
-// the metrics for already tracked procs.  Permission errors are returned as a
-// count, and will not affect the error return value.
-func (g *Grouper) Update(iter ProcIter) (int, error) {
-	newProcs, permErrs, err := g.tracker.Update(iter)
-	if err != nil {
-		return permErrs, err
-	}
-
-	// Step 1: track any new proc that should be tracked based on its name and cmdline.
-	untracked := make(map[ProcId]ProcIdInfo)
-	for _, idinfo := range newProcs {
-		wanted, gname := g.namer.MatchAndName(common.NameAndCmdline{Name: idinfo.Name, Cmdline: idinfo.Cmdline})
-		if !wanted {
-			untracked[idinfo.ProcId] = idinfo
-			continue
-		}
-
-		g.tracker.Track(gname, idinfo)
-	}
-
-	// Step 2: track any untracked new proc that should be tracked because its parent is tracked.
-	if !g.trackChildren {
-		return permErrs, nil
-	}
-
-	for _, idinfo := range untracked {
-		if _, ok := g.tracker.Tracked[idinfo.ProcId]; ok {
-			// Already tracked or ignored
-			continue
-		}
-
-		g.checkAncestry(idinfo, untracked)
-	}
-	return permErrs, nil
-}
-
-// groups returns the aggregate metrics for all groups tracked.  This reflects
-// solely what's currently running.
-func (g *Grouper) groups() GroupCountMap {
-	gcounts := make(GroupCountMap)
-
+func groupadd(grp Group, ts Update) Group {
 	var zeroTime time.Time
-	for _, tinfo := range g.tracker.Tracked {
-		if tinfo == nil {
-			continue
-		}
-		cur := gcounts[tinfo.GroupName]
-		cur.Procs++
-		tstats := tinfo.GetStats()
-		cur.Memresident += tstats.Memory.Resident
-		cur.Memvirtual += tstats.Memory.Virtual
-		cur.OpenFDs += tstats.Filedesc.Open
-		openratio := float64(tstats.Filedesc.Open) / float64(tstats.Filedesc.Limit)
-		if cur.WorstFDratio < openratio {
-			cur.WorstFDratio = openratio
-		}
-		cur.Counts.Cpu += tstats.latest.Cpu
-		cur.Counts.ReadBytes += tstats.latest.ReadBytes
-		cur.Counts.WriteBytes += tstats.latest.WriteBytes
-		if cur.OldestStartTime == zeroTime || tstats.start.Before(cur.OldestStartTime) {
-			cur.OldestStartTime = tstats.start
-		}
-		gcounts[tinfo.GroupName] = cur
+
+	grp.Procs++
+	grp.Memory.ResidentBytes += ts.Memory.ResidentBytes
+	grp.Memory.VirtualBytes += ts.Memory.VirtualBytes
+	grp.Memory.VmSwapBytes += ts.Memory.VmSwapBytes
+	if ts.Filedesc.Open != -1 {
+		grp.OpenFDs += uint64(ts.Filedesc.Open)
+	}
+	openratio := float64(ts.Filedesc.Open) / float64(ts.Filedesc.Limit)
+	if grp.WorstFDratio < openratio {
+		grp.WorstFDratio = openratio
+	}
+	grp.NumThreads += ts.NumThreads
+	grp.Counts.Add(ts.Latest)
+	grp.States.Add(ts.States)
+	if grp.OldestStartTime == zeroTime || ts.Start.Before(grp.OldestStartTime) {
+		grp.OldestStartTime = ts.Start
 	}

-	return gcounts
+	if grp.Wchans == nil {
+		grp.Wchans = make(map[string]int)
+	}
+	for wchan, count := range ts.Wchans {
+		grp.Wchans[wchan] += count
+	}
+
+	return grp
 }

-// Groups returns GroupCounts with Counts that never decrease in value from one
-// call to the next.  Even if processes exit, their CPU and IO contributions up
-// to that point are included in the results.  Even if no processes remain
-// in a group it will still be included in the results.
-func (g *Grouper) Groups() GroupCountMap {
-	groups := g.groups()
+// Update asks the tracker to report on each tracked process by name.
+// These are aggregated by groupname, augmented by accumulated counts
+// from the past, and returned.  Note that while the Tracker reports
+// only what counts have changed since last cycle, Grouper.Update
+// returns counts that never decrease.  Even once the last process
+// with name X disappears, name X will still appear in the results
+// with the same counts as before; of course, all non-count metrics
+// will be zero.
+func (g *Grouper) Update(iter Iter) (CollectErrors, GroupByName, error) {
+	cerrs, tracked, err := g.tracker.Update(iter)
+	if err != nil {
+		return cerrs, nil, err
+	}
+	return cerrs, g.groups(tracked), nil
+}

-	// First add any accumulated counts to what was just observed,
+// Translate the updates into a new GroupByName and update internal history.
+func (g *Grouper) groups(tracked []Update) GroupByName {
+	groups := make(GroupByName)
+	threadsByGroup := make(map[string][]ThreadUpdate)
+
+	for _, update := range tracked {
+		groups[update.GroupName] = groupadd(groups[update.GroupName], update)
+		if update.Threads != nil {
+			threadsByGroup[update.GroupName] =
+				append(threadsByGroup[update.GroupName], update.Threads...)
+		}
+	}
+
+	// Add any accumulated counts to what was just observed,
 	// and update the accumulators.
 	for gname, group := range groups {
-		if oldcounts, ok := g.GroupStats[gname]; ok {
-			group.Counts.Cpu += oldcounts.Cpu
-			group.Counts.ReadBytes += oldcounts.ReadBytes
-			group.Counts.WriteBytes += oldcounts.WriteBytes
+		if oldcounts, ok := g.groupAccum[gname]; ok {
+			group.Counts.Add(Delta(oldcounts))
 		}
-		g.GroupStats[gname] = group.Counts
+		g.groupAccum[gname] = group.Counts
+		group.Threads = g.threads(gname, threadsByGroup[gname])
 		groups[gname] = group
 	}

 	// Now add any groups that were observed in the past but aren't running now.
-	for gname, gcounts := range g.GroupStats {
+	for gname, gcounts := range g.groupAccum {
 		if _, ok := groups[gname]; !ok {
-			groups[gname] = GroupCounts{Counts: gcounts}
+			groups[gname] = Group{Counts: gcounts}
 		}
 	}

 	return groups
 }
+
+func (g *Grouper) threads(gname string, tracked []ThreadUpdate) []Threads {
+	if len(tracked) == 0 {
+		delete(g.threadAccum, gname)
+		return nil
+	}
+
+	ret := make([]Threads, 0, len(tracked))
+	threads := make(map[string]Threads)
+
+	// First aggregate the thread metrics by thread name.
+	for _, nc := range tracked {
+		curthr := threads[nc.ThreadName]
+		curthr.NumThreads++
+		curthr.Counts.Add(nc.Latest)
+		curthr.Name = nc.ThreadName
+		threads[nc.ThreadName] = curthr
+	}
+
+	// Add any accumulated counts to what was just observed,
+	// and update the accumulators.
+	if history := g.threadAccum[gname]; history != nil {
+		for tname := range threads {
+			if oldcounts, ok := history[tname]; ok {
+				counts := threads[tname]
+				counts.Add(Delta(oldcounts.Counts))
+				threads[tname] = counts
+			}
+		}
+	}
+
+	g.threadAccum[gname] = threads
+
+	for _, thr := range threads {
+		ret = append(ret, thr)
+	}
+	return ret
+}
--- a/vendor/github.com/ncabatoff/process-exporter/proc/read.go
+++ b/vendor/github.com/ncabatoff/process-exporter/proc/read.go
@ -2,18 +2,21 @@ package proc

 import (
 	"fmt"
+	"os"
+	"path/filepath"
+	"strconv"
 	"time"

-	"github.com/prometheus/procfs"
+	"github.com/ncabatoff/procfs"
 )

-func newProcIdStatic(pid, ppid int, startTime uint64, name string, cmdline []string) ProcIdStatic {
-	return ProcIdStatic{ProcId{pid, startTime}, ProcStatic{name, cmdline, ppid, time.Time{}}}
-}
+// ErrProcNotExist indicates a process couldn't be read because it doesn't exist,
+// typically because it disappeared while we were reading it.
+var ErrProcNotExist = fmt.Errorf("process does not exist")

 type (
-	// ProcId uniquely identifies a process.
-	ProcId struct {
+	// ID uniquely identifies a process.
+	ID struct {
 		// UNIX process id
 		Pid int
 		// The time the process started after system boot, the value is expressed
@ -21,82 +24,138 @@ type (
 		StartTimeRel uint64
 	}

-	// ProcStatic contains data read from /proc/pid/*
-	ProcStatic struct {
-		Name      string
-		Cmdline   []string
-		ParentPid int
-		StartTime time.Time
+	ThreadID ID
+
+	// Static contains data read from /proc/pid/*
+	Static struct {
+		Name         string
+		Cmdline      []string
+		ParentPid    int
+		StartTime    time.Time
+		EffectiveUID int
 	}

-	// ProcMetrics contains data read from /proc/pid/*
-	ProcMetrics struct {
-		CpuTime       float64
-		ReadBytes     uint64
-		WriteBytes    uint64
+	// Counts are metric counters common to threads and processes and groups.
+	Counts struct {
+		CPUUserTime           float64
+		CPUSystemTime         float64
+		ReadBytes             uint64
+		WriteBytes            uint64
+		MajorPageFaults       uint64
+		MinorPageFaults       uint64
+		CtxSwitchVoluntary    uint64
+		CtxSwitchNonvoluntary uint64
+	}
+
+	// Memory describes a proc's memory usage.
+	Memory struct {
 		ResidentBytes uint64
 		VirtualBytes  uint64
-		OpenFDs       uint64
-		MaxFDs        uint64
+		VmSwapBytes   uint64
 	}

-	ProcIdStatic struct {
-		ProcId
-		ProcStatic
+	// Filedesc describes a proc's file descriptor usage and soft limit.
+	Filedesc struct {
+		// Open is the count of open file descriptors, -1 if unknown.
+		Open int64
+		// Limit is the fd soft limit for the process.
+		Limit uint64
 	}

-	ProcInfo struct {
-		ProcStatic
-		ProcMetrics
+	// States counts how many threads are in each state.
+	States struct {
+		Running  int
+		Sleeping int
+		Waiting  int
+		Zombie   int
+		Other    int
 	}

-	ProcIdInfo struct {
-		ProcId
-		ProcStatic
-		ProcMetrics
+	// Metrics contains data read from /proc/pid/*
+	Metrics struct {
+		Counts
+		Memory
+		Filedesc
+		NumThreads uint64
+		States
+		Wchan string
 	}

+	// Thread contains per-thread data.
+	Thread struct {
+		ThreadID
+		ThreadName string
+		Counts
+		Wchan string
+		States
+	}
+
+	// IDInfo groups all info for a single process.
+	IDInfo struct {
+		ID
+		Static
+		Metrics
+		Threads []Thread
+	}
+
+	// ProcIdInfoThreads struct {
+	// 	ProcIdInfo
+	// 	Threads []ProcThread
+	// }
+
 	// Proc wraps the details of the underlying procfs-reading library.
+	// Any of these methods may fail if the process has disapeared.
+	// We try to return as much as possible rather than an error, e.g.
+	// if some /proc files are unreadable.
 	Proc interface {
 		// GetPid() returns the POSIX PID (process id).  They may be reused over time.
 		GetPid() int
-		// GetProcId() returns (pid,starttime), which can be considered a unique process id.
-		// It may fail if the caller doesn't have permission to read /proc/<pid>/stat, or if
-		// the process has disapeared.
-		GetProcId() (ProcId, error)
+		// GetProcID() returns (pid,starttime), which can be considered a unique process id.
+		GetProcID() (ID, error)
 		// GetStatic() returns various details read from files under /proc/<pid>/.  Technically
 		// name may not be static, but we'll pretend it is.
-		// It may fail if the caller doesn't have permission to read those files, or if
-		// the process has disapeared.
-		GetStatic() (ProcStatic, error)
+		GetStatic() (Static, error)
 		// GetMetrics() returns various metrics read from files under /proc/<pid>/.
-		// It may fail if the caller doesn't have permission to read those files, or if
-		// the process has disapeared.
-		GetMetrics() (ProcMetrics, error)
+		// It returns an error on complete failure.  Otherwise, it returns metrics
+		// and 0 on complete success, 1 if some (like I/O) couldn't be read.
+		GetMetrics() (Metrics, int, error)
+		GetStates() (States, error)
+		GetWchan() (string, error)
+		GetCounts() (Counts, int, error)
+		GetThreads() ([]Thread, error)
 	}

-	// proc is a wrapper for procfs.Proc that caches results of some reads and implements Proc.
-	proc struct {
+	// proccache implements the Proc interface by acting as wrapper for procfs.Proc
+	// that caches results of some reads.
+	proccache struct {
 		procfs.Proc
-		procid   *ProcId
-		stat     *procfs.ProcStat
-		cmdline  []string
-		io       *procfs.ProcIO
-		bootTime uint64
+		procid  *ID
+		stat    *procfs.ProcStat
+		status  *procfs.ProcStatus
+		cmdline []string
+		io      *procfs.ProcIO
+		fs      *FS
+		wchan   *string
 	}

+	proc struct {
+		proccache
+	}
+
+	// procs is a fancier []Proc that saves on some copying.
 	procs interface {
 		get(int) Proc
 		length() int
 	}

+	// procfsprocs implements procs using procfs.
 	procfsprocs struct {
-		Procs    []procfs.Proc
-		bootTime uint64
+		Procs []procfs.Proc
+		fs    *FS
 	}

-	// ProcIter is an iterator over a sequence of procs.
-	ProcIter interface {
+	// Iter is an iterator over a sequence of procs.
+	Iter interface {
 		// Next returns true if the iterator is not exhausted.
 		Next() bool
 		// Close releases any resources the iterator uses.
@ -105,7 +164,7 @@ type (
 		Proc
 	}

-	// procIterator implements the ProcIter interface using procfs.
+	// procIterator implements the Iter interface
 	procIterator struct {
 		// procs is the list of Proc we're iterating over.
 		procs
@ -119,66 +178,101 @@ type (
 		Proc
 	}

-	procIdInfos []ProcIdInfo
+	// Source is a source of procs.
+	Source interface {
+		// AllProcs returns all the processes in this source at this moment in time.
+		AllProcs() Iter
+	}
+
+	// FS implements Source.
+	FS struct {
+		procfs.FS
+		BootTime   uint64
+		MountPoint string
+		debug      bool
+	}
 )

-func procInfoIter(ps ...ProcIdInfo) ProcIter {
-	return &procIterator{procs: procIdInfos(ps), idx: -1}
+func (ii IDInfo) String() string {
+	return fmt.Sprintf("%+v:%+v", ii.ID, ii.Static)
 }

-func Info(p Proc) (ProcIdInfo, error) {
-	id, err := p.GetProcId()
-	if err != nil {
-		return ProcIdInfo{}, err
-	}
-	static, err := p.GetStatic()
-	if err != nil {
-		return ProcIdInfo{}, err
-	}
-	metrics, err := p.GetMetrics()
-	if err != nil {
-		return ProcIdInfo{}, err
-	}
-	return ProcIdInfo{id, static, metrics}, nil
+// Add adds c2 to the counts.
+func (c *Counts) Add(c2 Delta) {
+	c.CPUUserTime += c2.CPUUserTime
+	c.CPUSystemTime += c2.CPUSystemTime
+	c.ReadBytes += c2.ReadBytes
+	c.WriteBytes += c2.WriteBytes
+	c.MajorPageFaults += c2.MajorPageFaults
+	c.MinorPageFaults += c2.MinorPageFaults
+	c.CtxSwitchVoluntary += c2.CtxSwitchVoluntary
+	c.CtxSwitchNonvoluntary += c2.CtxSwitchNonvoluntary
 }

-func (p procIdInfos) get(i int) Proc {
-	return &p[i]
+// Sub subtracts c2 from the counts.
+func (c Counts) Sub(c2 Counts) Delta {
+	c.CPUUserTime -= c2.CPUUserTime
+	c.CPUSystemTime -= c2.CPUSystemTime
+	c.ReadBytes -= c2.ReadBytes
+	c.WriteBytes -= c2.WriteBytes
+	c.MajorPageFaults -= c2.MajorPageFaults
+	c.MinorPageFaults -= c2.MinorPageFaults
+	c.CtxSwitchVoluntary -= c2.CtxSwitchVoluntary
+	c.CtxSwitchNonvoluntary -= c2.CtxSwitchNonvoluntary
+	return Delta(c)
 }

-func (p procIdInfos) length() int {
-	return len(p)
+func (s *States) Add(s2 States) {
+	s.Other += s2.Other
+	s.Running += s2.Running
+	s.Sleeping += s2.Sleeping
+	s.Waiting += s2.Waiting
+	s.Zombie += s2.Zombie
 }

-func (p ProcIdInfo) GetPid() int {
-	return p.ProcId.Pid
+func (p IDInfo) GetThreads() ([]Thread, error) {
+	return p.Threads, nil
 }

-func (p ProcIdInfo) GetProcId() (ProcId, error) {
-	return p.ProcId, nil
+// GetPid implements Proc.
+func (p IDInfo) GetPid() int {
+	return p.ID.Pid
 }

-func (p ProcIdInfo) GetStatic() (ProcStatic, error) {
-	return p.ProcStatic, nil
+// GetProcID implements Proc.
+func (p IDInfo) GetProcID() (ID, error) {
+	return p.ID, nil
 }

-func (p ProcIdInfo) GetMetrics() (ProcMetrics, error) {
-	return p.ProcMetrics, nil
+// GetStatic implements Proc.
+func (p IDInfo) GetStatic() (Static, error) {
+	return p.Static, nil
 }

-func (p procfsprocs) get(i int) Proc {
-	return &proc{Proc: p.Procs[i], bootTime: p.bootTime}
+// GetCounts implements Proc.
+func (p IDInfo) GetCounts() (Counts, int, error) {
+	return p.Metrics.Counts, 0, nil
 }

-func (p procfsprocs) length() int {
-	return len(p.Procs)
+// GetMetrics implements Proc.
+func (p IDInfo) GetMetrics() (Metrics, int, error) {
+	return p.Metrics, 0, nil
 }

-func (p *proc) GetPid() int {
+// GetStates implements Proc.
+func (p IDInfo) GetStates() (States, error) {
+	return p.States, nil
+}
+
+func (p IDInfo) GetWchan() (string, error) {
+	return p.Wchan, nil
+}
+
+func (p *proccache) GetPid() int {
 	return p.Proc.PID
 }

-func (p *proc) GetStat() (procfs.ProcStat, error) {
+func (p *proccache) getStat() (procfs.ProcStat, error) {
 	if p.stat == nil {
 		stat, err := p.Proc.NewStat()
 		if err != nil {
@ -190,19 +284,32 @@ func (p *proc) GetStat() (procfs.ProcStat, error) {
 	return *p.stat, nil
 }

-func (p *proc) GetProcId() (ProcId, error) {
-	if p.procid == nil {
-		stat, err := p.GetStat()
+func (p *proccache) getStatus() (procfs.ProcStatus, error) {
+	if p.status == nil {
+		status, err := p.Proc.NewStatus()
 		if err != nil {
-			return ProcId{}, err
+			return procfs.ProcStatus{}, err
 		}
-		p.procid = &ProcId{Pid: p.GetPid(), StartTimeRel: stat.Starttime}
+		p.status = &status
+	}
+
+	return *p.status, nil
+}
+
+// GetProcID implements Proc.
+func (p *proccache) GetProcID() (ID, error) {
+	if p.procid == nil {
+		stat, err := p.getStat()
+		if err != nil {
+			return ID{}, err
+		}
+		p.procid = &ID{Pid: p.GetPid(), StartTimeRel: stat.Starttime}
 	}

 	return *p.procid, nil
 }

-func (p *proc) GetCmdLine() ([]string, error) {
+func (p *proccache) getCmdLine() ([]string, error) {
 	if p.cmdline == nil {
 		cmdline, err := p.Proc.CmdLine()
 		if err != nil {
@ -213,7 +320,18 @@ func (p *proc) GetCmdLine() ([]string, error) {
 	return p.cmdline, nil
 }

-func (p *proc) GetIo() (procfs.ProcIO, error) {
+func (p *proccache) getWchan() (string, error) {
+	if p.wchan == nil {
+		wchan, err := p.Proc.Wchan()
+		if err != nil {
+			return "", err
+		}
+		p.wchan = &wchan
+	}
+	return *p.wchan, nil
+}
+
+func (p *proccache) getIo() (procfs.ProcIO, error) {
 	if p.io == nil {
 		io, err := p.Proc.NewIO()
 		if err != nil {
@ -224,56 +342,199 @@ func (p *proc) GetIo() (procfs.ProcIO, error) {
 	return *p.io, nil
 }

-func (p proc) GetStatic() (ProcStatic, error) {
-	cmdline, err := p.GetCmdLine()
+// GetStatic returns the ProcStatic corresponding to this proc.
+func (p *proccache) GetStatic() (Static, error) {
+	// /proc/<pid>/cmdline is normally world-readable.
+	cmdline, err := p.getCmdLine()
 	if err != nil {
-		return ProcStatic{}, err
+		return Static{}, err
 	}
-	stat, err := p.GetStat()
+
+	// /proc/<pid>/stat is normally world-readable.
+	stat, err := p.getStat()
 	if err != nil {
-		return ProcStatic{}, err
+		return Static{}, err
 	}
-	startTime := time.Unix(int64(p.bootTime), 0)
+	startTime := time.Unix(int64(p.fs.BootTime), 0).UTC()
 	startTime = startTime.Add(time.Second / userHZ * time.Duration(stat.Starttime))
-	return ProcStatic{
-		Name:      stat.Comm,
-		Cmdline:   cmdline,
-		ParentPid: stat.PPID,
-		StartTime: startTime,
+
+	// /proc/<pid>/status is normally world-readable.
+	status, err := p.getStatus()
+	if err != nil {
+		return Static{}, err
+	}
+
+	return Static{
+		Name:         stat.Comm,
+		Cmdline:      cmdline,
+		ParentPid:    stat.PPID,
+		StartTime:    startTime,
+		EffectiveUID: status.UIDEffective,
 	}, nil
 }

-func (p proc) GetMetrics() (ProcMetrics, error) {
-	io, err := p.GetIo()
+func (p proc) GetCounts() (Counts, int, error) {
+	stat, err := p.getStat()
 	if err != nil {
-		return ProcMetrics{}, err
+		if err == os.ErrNotExist {
+			err = ErrProcNotExist
+		}
+		return Counts{}, 0, err
 	}
-	stat, err := p.GetStat()
+
+	status, err := p.getStatus()
 	if err != nil {
-		return ProcMetrics{}, err
+		if err == os.ErrNotExist {
+			err = ErrProcNotExist
+		}
+		return Counts{}, 0, err
 	}
+
+	io, err := p.getIo()
+	softerrors := 0
+	if err != nil {
+		softerrors++
+	}
+	return Counts{
+		CPUUserTime:           float64(stat.UTime) / userHZ,
+		CPUSystemTime:         float64(stat.STime) / userHZ,
+		ReadBytes:             io.ReadBytes,
+		WriteBytes:            io.WriteBytes,
+		MajorPageFaults:       uint64(stat.MajFlt),
+		MinorPageFaults:       uint64(stat.MinFlt),
+		CtxSwitchVoluntary:    uint64(status.VoluntaryCtxtSwitches),
+		CtxSwitchNonvoluntary: uint64(status.NonvoluntaryCtxtSwitches),
+	}, softerrors, nil
+}
+
+func (p proc) GetWchan() (string, error) {
+	return p.getWchan()
+}
+
+func (p proc) GetStates() (States, error) {
+	stat, err := p.getStat()
+	if err != nil {
+		return States{}, err
+	}
+
+	var s States
+	switch stat.State {
+	case "R":
+		s.Running++
+	case "S":
+		s.Sleeping++
+	case "D":
+		s.Waiting++
+	case "Z":
+		s.Zombie++
+	default:
+		s.Other++
+	}
+	return s, nil
+}
+
+// GetMetrics returns the current metrics for the proc.  The results are
+// not cached.
+func (p proc) GetMetrics() (Metrics, int, error) {
+	counts, softerrors, err := p.GetCounts()
+	if err != nil {
+		return Metrics{}, 0, err
+	}
+
+	// We don't need to check for error here because p will have cached
+	// the successful result of calling getStat in GetCounts.
+	// Since GetMetrics isn't a pointer receiver method, our callers
+	// won't see the effect of the caching between calls.
+	stat, _ := p.getStat()
+
+	// Ditto for states
+	states, _ := p.GetStates()
+
+	status, err := p.getStatus()
+	if err != nil {
+		return Metrics{}, 0, err
+	}
+
 	numfds, err := p.Proc.FileDescriptorsLen()
 	if err != nil {
-		return ProcMetrics{}, err
+		numfds = -1
+		softerrors |= 1
 	}
-	limits, err := p.NewLimits()
+
+	limits, err := p.Proc.NewLimits()
 	if err != nil {
-		return ProcMetrics{}, err
+		return Metrics{}, 0, err
 	}
-	return ProcMetrics{
-		CpuTime:       stat.CPUTime(),
-		ReadBytes:     io.ReadBytes,
-		WriteBytes:    io.WriteBytes,
-		ResidentBytes: uint64(stat.ResidentMemory()),
-		VirtualBytes:  uint64(stat.VirtualMemory()),
-		OpenFDs:       uint64(numfds),
-		MaxFDs:        uint64(limits.OpenFiles),
-	}, nil
+
+	wchan, err := p.getWchan()
+	if err != nil {
+		softerrors |= 1
+	}
+
+	return Metrics{
+		Counts: counts,
+		Memory: Memory{
+			ResidentBytes: uint64(stat.ResidentMemory()),
+			VirtualBytes:  uint64(stat.VirtualMemory()),
+			VmSwapBytes:   uint64(status.VmSwapKB * 1024),
+		},
+		Filedesc: Filedesc{
+			Open:  int64(numfds),
+			Limit: uint64(limits.OpenFiles),
+		},
+		NumThreads: uint64(stat.NumThreads),
+		States:     states,
+		Wchan:      wchan,
+	}, softerrors, nil
 }

-type FS struct {
-	procfs.FS
-	BootTime uint64
+func (p proc) GetThreads() ([]Thread, error) {
+	fs, err := p.fs.threadFs(p.PID)
+	if err != nil {
+		return nil, err
+	}
+
+	threads := []Thread{}
+	iter := fs.AllProcs()
+	for iter.Next() {
+		var id ID
+		id, err = iter.GetProcID()
+		if err != nil {
+			continue
+		}
+
+		var static Static
+		static, err = iter.GetStatic()
+		if err != nil {
+			continue
+		}
+
+		var counts Counts
+		counts, _, err = iter.GetCounts()
+		if err != nil {
+			continue
+		}
+
+		wchan, _ := iter.GetWchan()
+		states, _ := iter.GetStates()
+
+		threads = append(threads, Thread{
+			ThreadID:   ThreadID(id),
+			ThreadName: static.Name,
+			Counts:     counts,
+			Wchan:      wchan,
+			States:     states,
+		})
+	}
+	err = iter.Close()
+	if err != nil {
+		return nil, err
+	}
+	if len(threads) < 2 {
+		return nil, nil
+	}
+
+	return threads, nil
 }

 // See https://github.com/prometheus/procfs/blob/master/proc_stat.go for details on userHZ.
@ -281,7 +542,7 @@ const userHZ = 100

 // NewFS returns a new FS mounted under the given mountPoint. It will error
 // if the mount point can't be read.
-func NewFS(mountPoint string) (*FS, error) {
+func NewFS(mountPoint string, debug bool) (*FS, error) {
 	fs, err := procfs.NewFS(mountPoint)
 	if err != nil {
 		return nil, err
@ -290,17 +551,38 @@ func NewFS(mountPoint string) (*FS, error) {
 	if err != nil {
 		return nil, err
 	}
-	return &FS{fs, stat.BootTime}, nil
+	return &FS{fs, stat.BootTime, mountPoint, debug}, nil
 }

-func (fs *FS) AllProcs() ProcIter {
+func (fs *FS) threadFs(pid int) (*FS, error) {
+	mountPoint := filepath.Join(fs.MountPoint, strconv.Itoa(pid), "task")
+	tfs, err := procfs.NewFS(mountPoint)
+	if err != nil {
+		return nil, err
+	}
+	return &FS{tfs, fs.BootTime, mountPoint, false}, nil
+}
+
+// AllProcs implements Source.
+func (fs *FS) AllProcs() Iter {
 	procs, err := fs.FS.AllProcs()
 	if err != nil {
 		err = fmt.Errorf("Error reading procs: %v", err)
 	}
-	return &procIterator{procs: procfsprocs{procs, fs.BootTime}, err: err, idx: -1}
+	return &procIterator{procs: procfsprocs{procs, fs}, err: err, idx: -1}
 }

+// get implements procs.
+func (p procfsprocs) get(i int) Proc {
+	return &proc{proccache{Proc: p.Procs[i], fs: p.fs}}
+}
+
+// length implements procs.
+func (p procfsprocs) length() int {
+	return len(p.Procs)
+}
+
+// Next implements Iter.
 func (pi *procIterator) Next() bool {
 	pi.idx++
 	if pi.idx < pi.procs.length() {
@ -311,6 +593,7 @@ func (pi *procIterator) Next() bool {
 	return pi.idx < pi.procs.length()
 }

+// Close implements Iter.
 func (pi *procIterator) Close() error {
 	pi.Next()
 	pi.procs = nil
--- a/vendor/github.com/ncabatoff/process-exporter/proc/tracker.go
+++ b/vendor/github.com/ncabatoff/process-exporter/proc/tracker.go
@ -2,179 +2,432 @@ package proc

 import (
 	"fmt"
-	"os"
+	"log"
+	"os/user"
+	"strconv"
 	"time"
+
+	seq "github.com/ncabatoff/go-seq/seq"
+	common "github.com/ncabatoff/process-exporter"
 )

 type (
-	Counts struct {
-		Cpu        float64
-		ReadBytes  uint64
-		WriteBytes uint64
-	}
-
-	Memory struct {
-		Resident uint64
-		Virtual  uint64
-	}
-
-	Filedesc struct {
-		Open  uint64
-		Limit uint64
-	}
-
 	// Tracker tracks processes and records metrics.
 	Tracker struct {
-		// Tracked holds the processes are being monitored.  Processes
+		// namer determines what processes to track and names them
+		namer common.MatchNamer
+		// tracked holds the processes are being monitored.  Processes
 		// may be blacklisted such that they no longer get tracked by
-		// setting their value in the Tracked map to nil.
-		Tracked map[ProcId]*TrackedProc
-		// ProcIds is a map from pid to ProcId.  This is a convenience
+		// setting their value in the tracked map to nil.
+		tracked map[ID]*trackedProc
+		// procIds is a map from pid to ProcId.  This is a convenience
 		// to allow finding the Tracked entry of a parent process.
-		ProcIds map[int]ProcId
+		procIds map[int]ID
+		// trackChildren makes Tracker track descendants of procs the
+		// namer wanted tracked.
+		trackChildren bool
+		// never ignore processes, i.e. always re-check untracked processes in case comm has changed
+		alwaysRecheck bool
+		username      map[int]string
+		debug         bool
 	}

-	// TrackedProc accumulates metrics for a process, as well as
+	// Delta is an alias of Counts used to signal that its contents are not
+	// totals, but rather the result of subtracting two totals.
+	Delta Counts
+
+	trackedThread struct {
+		name       string
+		accum      Counts
+		latest     Delta
+		lastUpdate time.Time
+		wchan      string
+	}
+
+	// trackedProc accumulates metrics for a process, as well as
 	// remembering an optional GroupName tag associated with it.
-	TrackedProc struct {
+	trackedProc struct {
 		// lastUpdate is used internally during the update cycle to find which procs have exited
 		lastUpdate time.Time
-		// info is the most recently obtained info for this proc
-		info ProcInfo
-		// accum is the total CPU and IO accrued since we started tracking this proc
-		accum Counts
-		// lastaccum is the CPU and IO accrued in the last Update()
-		lastaccum Counts
-		// GroupName is an optional tag for this proc.
-		GroupName string
+		// static
+		static  Static
+		metrics Metrics
+		// lastaccum is the increment to the counters seen in the last update.
+		lastaccum Delta
+		// groupName is the tag for this proc given by the namer.
+		groupName string
+		threads   map[ThreadID]trackedThread
 	}

-	trackedStats struct {
-		aggregate, latest Counts
+	// ThreadUpdate describes what's changed for a thread since the last cycle.
+	ThreadUpdate struct {
+		// ThreadName is the name of the thread based on field of stat.
+		ThreadName string
+		// Latest is how much the counts increased since last cycle.
+		Latest Delta
+	}
+
+	// Update reports on the latest stats for a process.
+	Update struct {
+		// GroupName is the name given by the namer to the process.
+		GroupName string
+		// Latest is how much the counts increased since last cycle.
+		Latest Delta
+		// Memory is the current memory usage.
 		Memory
+		// Filedesc is the current fd usage/limit.
 		Filedesc
-		start time.Time
+		// Start is the time the process started.
+		Start time.Time
+		// NumThreads is the number of threads.
+		NumThreads uint64
+		// States is how many processes are in which run state.
+		States
+		// Wchans is how many threads are in each non-zero wchan.
+		Wchans map[string]int
+		// Threads are the thread updates for this process.
+		Threads []ThreadUpdate
+	}
+
+	// CollectErrors describes non-fatal errors found while collecting proc
+	// metrics.
+	CollectErrors struct {
+		// Read is incremented every time GetMetrics() returns an error.
+		// This means we failed to load even the basics for the process,
+		// and not just because it disappeared on us.
+		Read int
+		// Partial is incremented every time we're unable to collect
+		// some metrics (e.g. I/O) for a tracked proc, but we're still able
+		// to get the basic stuff like cmdline and core stats.
+		Partial int
 	}
 )

-func (tp *TrackedProc) GetName() string {
-	return tp.info.Name
+func lessUpdateGroupName(x, y Update) bool { return x.GroupName < y.GroupName }
+
+func lessThreadUpdate(x, y ThreadUpdate) bool { return seq.Compare(x, y) < 0 }
+
+func lessCounts(x, y Counts) bool { return seq.Compare(x, y) < 0 }
+
+func (tp *trackedProc) getUpdate() Update {
+	u := Update{
+		GroupName:  tp.groupName,
+		Latest:     tp.lastaccum,
+		Memory:     tp.metrics.Memory,
+		Filedesc:   tp.metrics.Filedesc,
+		Start:      tp.static.StartTime,
+		NumThreads: tp.metrics.NumThreads,
+		States:     tp.metrics.States,
+		Wchans:     make(map[string]int),
+	}
+	if tp.metrics.Wchan != "" {
+		u.Wchans[tp.metrics.Wchan] = 1
+	}
+	if len(tp.threads) > 1 {
+		for _, tt := range tp.threads {
+			u.Threads = append(u.Threads, ThreadUpdate{tt.name, tt.latest})
+			if tt.wchan != "" {
+				u.Wchans[tt.wchan]++
+			}
+		}
+	}
+	return u
 }

-func (tp *TrackedProc) GetCmdLine() []string {
-	return tp.info.Cmdline
-}
-
-func (tp *TrackedProc) GetStats() trackedStats {
-	mem := Memory{Resident: tp.info.ResidentBytes, Virtual: tp.info.VirtualBytes}
-	fd := Filedesc{Open: tp.info.OpenFDs, Limit: tp.info.MaxFDs}
-	return trackedStats{
-		aggregate: tp.accum,
-		latest:    tp.lastaccum,
-		Memory:    mem,
-		Filedesc:  fd,
-		start:     tp.info.StartTime,
+// NewTracker creates a Tracker.
+func NewTracker(namer common.MatchNamer, trackChildren, alwaysRecheck, debug bool) *Tracker {
+	return &Tracker{
+		namer:         namer,
+		tracked:       make(map[ID]*trackedProc),
+		procIds:       make(map[int]ID),
+		trackChildren: trackChildren,
+		alwaysRecheck: alwaysRecheck,
+		username:      make(map[int]string),
+		debug:         debug,
 	}
 }

-func NewTracker() *Tracker {
-	return &Tracker{Tracked: make(map[ProcId]*TrackedProc), ProcIds: make(map[int]ProcId)}
+func (t *Tracker) track(groupName string, idinfo IDInfo) {
+	tproc := trackedProc{
+		groupName: groupName,
+		static:    idinfo.Static,
+		metrics:   idinfo.Metrics,
+	}
+	if len(idinfo.Threads) > 0 {
+		tproc.threads = make(map[ThreadID]trackedThread)
+		for _, thr := range idinfo.Threads {
+			tproc.threads[thr.ThreadID] = trackedThread{
+				thr.ThreadName, thr.Counts, Delta{}, time.Time{}, thr.Wchan}
+		}
+	}
+	t.tracked[idinfo.ID] = &tproc
 }

-func (t *Tracker) Track(groupName string, idinfo ProcIdInfo) {
-	info := ProcInfo{idinfo.ProcStatic, idinfo.ProcMetrics}
-	t.Tracked[idinfo.ProcId] = &TrackedProc{GroupName: groupName, info: info}
+func (t *Tracker) ignore(id ID) {
+	// only ignore ID if we didn't set recheck to true
+	if t.alwaysRecheck == false {
+		t.tracked[id] = nil
+	}
 }

-func (t *Tracker) Ignore(id ProcId) {
-	t.Tracked[id] = nil
+func (tp *trackedProc) update(metrics Metrics, now time.Time, cerrs *CollectErrors, threads []Thread) {
+	// newcounts: resource consumption since last cycle
+	newcounts := metrics.Counts
+	tp.lastaccum = newcounts.Sub(tp.metrics.Counts)
+	tp.metrics = metrics
+	tp.lastUpdate = now
+	if len(threads) > 1 {
+		if tp.threads == nil {
+			tp.threads = make(map[ThreadID]trackedThread)
+		}
+		for _, thr := range threads {
+			tt := trackedThread{thr.ThreadName, thr.Counts, Delta{}, now, thr.Wchan}
+			if old, ok := tp.threads[thr.ThreadID]; ok {
+				tt.latest, tt.accum = thr.Counts.Sub(old.accum), thr.Counts
+			}
+			tp.threads[thr.ThreadID] = tt
+		}
+		for id, tt := range tp.threads {
+			if tt.lastUpdate != now {
+				delete(tp.threads, id)
+			}
+		}
+	} else {
+		tp.threads = nil
+	}
 }

-// Scan procs and update metrics for those which are tracked.  Processes that have gone
-// away get removed from the Tracked map.  New processes are returned, along with the count
-// of permission errors.
-func (t *Tracker) Update(procs ProcIter) ([]ProcIdInfo, int, error) {
-	now := time.Now()
-	var newProcs []ProcIdInfo
-	var permissionErrors int
+// handleProc updates the tracker if it's a known and not ignored proc.
+// If it's neither known nor ignored, newProc will be non-nil.
+// It is not an error if the process disappears while we are reading
+// its info out of /proc, it just means nothing will be returned and
+// the tracker will be unchanged.
+func (t *Tracker) handleProc(proc Proc, updateTime time.Time) (*IDInfo, CollectErrors) {
+	var cerrs CollectErrors
+	procID, err := proc.GetProcID()
+	if err != nil {
+		return nil, cerrs
+	}
+
+	// Do nothing if we're ignoring this proc.
+	last, known := t.tracked[procID]
+	if known && last == nil {
+		return nil, cerrs
+	}
+
+	metrics, softerrors, err := proc.GetMetrics()
+	if err != nil {
+		if t.debug {
+			log.Printf("error reading metrics for %+v: %v", procID, err)
+		}
+		// This usually happens due to the proc having exited, i.e.
+		// we lost the race.  We don't count that as an error.
+		if err != ErrProcNotExist {
+			cerrs.Read++
+		}
+		return nil, cerrs
+	}
+
+	var threads []Thread
+	threads, err = proc.GetThreads()
+	if err != nil {
+		softerrors |= 1
+	}
+	cerrs.Partial += softerrors
+
+	if len(threads) > 0 {
+		metrics.Counts.CtxSwitchNonvoluntary, metrics.Counts.CtxSwitchVoluntary = 0, 0
+		for _, thread := range threads {
+			metrics.Counts.CtxSwitchNonvoluntary += thread.Counts.CtxSwitchNonvoluntary
+			metrics.Counts.CtxSwitchVoluntary += thread.Counts.CtxSwitchVoluntary
+			metrics.States.Add(thread.States)
+		}
+	}
+
+	var newProc *IDInfo
+	if known {
+		last.update(metrics, updateTime, &cerrs, threads)
+	} else {
+		static, err := proc.GetStatic()
+		if err != nil {
+			if t.debug {
+				log.Printf("error reading static details for %+v: %v", procID, err)
+			}
+			return nil, cerrs
+		}
+		newProc = &IDInfo{procID, static, metrics, threads}
+		if t.debug {
+			log.Printf("found new proc: %s", newProc)
+		}
+
+		// Is this a new process with the same pid as one we already know?
+		// Then delete it from the known map, otherwise the cleanup in Update()
+		// will remove the ProcIds entry we're creating here.
+		if oldProcID, ok := t.procIds[procID.Pid]; ok {
+			delete(t.tracked, oldProcID)
+		}
+		t.procIds[procID.Pid] = procID
+	}
+	return newProc, cerrs
+}
+
+// update scans procs and updates metrics for those which are tracked. Processes
+// that have gone away get removed from the Tracked map. New processes are
+// returned, along with the count of nonfatal errors.
+func (t *Tracker) update(procs Iter) ([]IDInfo, CollectErrors, error) {
+	var newProcs []IDInfo
+	var colErrs CollectErrors
+	var now = time.Now()

 	for procs.Next() {
-		procId, err := procs.GetProcId()
-		if err != nil {
-			continue
+		newProc, cerrs := t.handleProc(procs, now)
+		if newProc != nil {
+			newProcs = append(newProcs, *newProc)
 		}
-
-		last, known := t.Tracked[procId]
-
-		// Are we ignoring this proc?
-		if known && last == nil {
-			continue
-		}
-
-		// TODO if just the io file is unreadable, should we still return the other metrics?
-		metrics, err := procs.GetMetrics()
-		if err != nil {
-			if os.IsPermission(err) {
-				permissionErrors++
-				t.Ignore(procId)
-			}
-			continue
-		}
-
-		if known {
-			var newaccum, lastaccum Counts
-			dcpu := metrics.CpuTime - last.info.CpuTime
-			drbytes := metrics.ReadBytes - last.info.ReadBytes
-			dwbytes := metrics.WriteBytes - last.info.WriteBytes
-
-			lastaccum = Counts{Cpu: dcpu, ReadBytes: drbytes, WriteBytes: dwbytes}
-			newaccum = Counts{
-				Cpu:        last.accum.Cpu + lastaccum.Cpu,
-				ReadBytes:  last.accum.ReadBytes + lastaccum.ReadBytes,
-				WriteBytes: last.accum.WriteBytes + lastaccum.WriteBytes,
-			}
-
-			last.info.ProcMetrics = metrics
-			last.lastUpdate = now
-			last.accum = newaccum
-			last.lastaccum = lastaccum
-		} else {
-			static, err := procs.GetStatic()
-			if err != nil {
-				continue
-			}
-			newProcs = append(newProcs, ProcIdInfo{procId, static, metrics})
-
-			// Is this a new process with the same pid as one we already know?
-			if oldProcId, ok := t.ProcIds[procId.Pid]; ok {
-				// Delete it from known, otherwise the cleanup below will remove the
-				// ProcIds entry we're about to create
-				delete(t.Tracked, oldProcId)
-			}
-			t.ProcIds[procId.Pid] = procId
-		}
-
+		colErrs.Read += cerrs.Read
+		colErrs.Partial += cerrs.Partial
 	}
+
 	err := procs.Close()
 	if err != nil {
-		return nil, permissionErrors, fmt.Errorf("Error reading procs: %v", err)
+		return nil, colErrs, fmt.Errorf("Error reading procs: %v", err)
 	}

 	// Rather than allocating a new map each time to detect procs that have
 	// disappeared, we bump the last update time on those that are still
 	// present.  Then as a second pass we traverse the map looking for
 	// stale procs and removing them.
-	for procId, pinfo := range t.Tracked {
+	for procID, pinfo := range t.tracked {
 		if pinfo == nil {
 			// TODO is this a bug? we're not tracking the proc so we don't see it go away so ProcIds
 			// and Tracked are leaking?
 			continue
 		}
 		if pinfo.lastUpdate != now {
-			delete(t.Tracked, procId)
-			delete(t.ProcIds, procId.Pid)
+			delete(t.tracked, procID)
+			delete(t.procIds, procID.Pid)
 		}
 	}

-	return newProcs, permissionErrors, nil
+	return newProcs, colErrs, nil
+}
+
+// checkAncestry walks the process tree recursively towards the root,
+// stopping at pid 1 or upon finding a parent that's already tracked
+// or ignored.  If we find a tracked parent track this one too; if not,
+// ignore this one.
+func (t *Tracker) checkAncestry(idinfo IDInfo, newprocs map[ID]IDInfo) string {
+	ppid := idinfo.ParentPid
+	pProcID := t.procIds[ppid]
+	if pProcID.Pid < 1 {
+		if t.debug {
+			log.Printf("ignoring unmatched proc with no matched parent: %+v", idinfo)
+		}
+		// Reached root of process tree without finding a tracked parent.
+		t.ignore(idinfo.ID)
+		return ""
+	}
+
+	// Is the parent already known to the tracker?
+	if ptproc, ok := t.tracked[pProcID]; ok {
+		if ptproc != nil {
+			if t.debug {
+				log.Printf("matched as %q because child of %+v: %+v",
+					ptproc.groupName, pProcID, idinfo)
+			}
+			// We've found a tracked parent.
+			t.track(ptproc.groupName, idinfo)
+			return ptproc.groupName
+		}
+		// We've found an untracked parent.
+		t.ignore(idinfo.ID)
+		return ""
+	}
+
+	// Is the parent another new process?
+	if pinfoid, ok := newprocs[pProcID]; ok {
+		if name := t.checkAncestry(pinfoid, newprocs); name != "" {
+			if t.debug {
+				log.Printf("matched as %q because child of %+v: %+v",
+					name, pProcID, idinfo)
+			}
+			// We've found a tracked parent, which implies this entire lineage should be tracked.
+			t.track(name, idinfo)
+			return name
+		}
+	}
+
+	// Parent is dead, i.e. we never saw it, or there's no tracked proc in our ancestry.
+	if t.debug {
+		log.Printf("ignoring unmatched proc with no matched parent: %+v", idinfo)
+	}
+	t.ignore(idinfo.ID)
+	return ""
+}
+
+func (t *Tracker) lookupUid(uid int) string {
+	if name, ok := t.username[uid]; ok {
+		return name
+	}
+
+	var name string
+	uidstr := strconv.Itoa(uid)
+	u, err := user.LookupId(uidstr)
+	if err != nil {
+		name = uidstr
+	} else {
+		name = u.Username
+	}
+	t.username[uid] = name
+	return name
+}
+
+// Update modifies the tracker's internal state based on what it reads from
+// iter.  Tracks any new procs the namer wants tracked, and updates
+// its metrics for existing tracked procs.  Returns nonfatal errors
+// and the status of all tracked procs, or an error if fatal.
+func (t *Tracker) Update(iter Iter) (CollectErrors, []Update, error) {
+	newProcs, colErrs, err := t.update(iter)
+	if err != nil {
+		return colErrs, nil, err
+	}
+
+	// Step 1: track any new proc that should be tracked based on its name and cmdline.
+	untracked := make(map[ID]IDInfo)
+	for _, idinfo := range newProcs {
+		nacl := common.ProcAttributes{
+			Name:     idinfo.Name,
+			Cmdline:  idinfo.Cmdline,
+			Username: t.lookupUid(idinfo.EffectiveUID),
+		}
+		wanted, gname := t.namer.MatchAndName(nacl)
+		if wanted {
+			if t.debug {
+				log.Printf("matched as %q: %+v", gname, idinfo)
+			}
+			t.track(gname, idinfo)
+		} else {
+			untracked[idinfo.ID] = idinfo
+		}
+	}
+
+	// Step 2: track any untracked new proc that should be tracked because its parent is tracked.
+	if t.trackChildren {
+		for _, idinfo := range untracked {
+			if _, ok := t.tracked[idinfo.ID]; ok {
+				// Already tracked or ignored in an earlier iteration
+				continue
+			}
+
+			t.checkAncestry(idinfo, untracked)
+		}
+	}
+
+	tp := []Update{}
+	for _, tproc := range t.tracked {
+		if tproc != nil {
+			tp = append(tp, tproc.getUpdate())
+		}
+	}
+	return colErrs, tp, nil
 }