From dd78c1a036440b33bc5c224b1af817ba4fd6d643 Mon Sep 17 00:00:00 2001
From: huanggze <loganhuang@yunify.com>
Date: Mon, 13 Apr 2020 20:20:21 +0800
Subject: [PATCH] feat: custom monitoring

Signed-off-by: huanggze <loganhuang@yunify.com>
---
 go.mod                                        |   15 +-
 go.sum                                        |   27 +-
 pkg/constants/constants.go                    |    1 +
 pkg/kapis/monitoring/v1alpha3/handler.go      |   33 +
 pkg/kapis/monitoring/v1alpha3/helper.go       |    2 +
 pkg/kapis/monitoring/v1alpha3/register.go     |   23 +
 .../expressions/prometheus/label_replace.go   |   99 +
 .../prometheus/label_replace_test.go          |   51 +
 pkg/models/monitoring/expressions/registry.go |    9 +
 pkg/models/monitoring/monitoring.go           |   37 +-
 pkg/models/monitoring/types.go                |    4 +
 pkg/simple/client/monitoring/interface.go     |    5 +-
 .../monitoring/prometheus/prometheus.go       |   58 +-
 .../monitoring/prometheus/prometheus_test.go  |   56 +-
 .../testdata/metadata-notfound-prom.json      |    5 +
 .../testdata/metadata-notfound-res.json       |    1 +
 .../prometheus/testdata/metadata-prom.json    |   25 +
 .../prometheus/testdata/metadata-res.json     |   12 +
 .../testdata/metrics-error-res.json           |    2 +-
 pkg/simple/client/monitoring/types.go         |    6 +
 vendor/github.com/golang/snappy/.gitignore    |   16 +
 vendor/github.com/golang/snappy/AUTHORS       |   15 +
 vendor/github.com/golang/snappy/CONTRIBUTORS  |   37 +
 vendor/github.com/golang/snappy/LICENSE       |   27 +
 vendor/github.com/golang/snappy/README        |  107 +
 vendor/github.com/golang/snappy/decode.go     |  237 ++
 .../github.com/golang/snappy/decode_amd64.go  |   14 +
 .../github.com/golang/snappy/decode_amd64.s   |  490 ++++
 .../github.com/golang/snappy/decode_other.go  |  101 +
 vendor/github.com/golang/snappy/encode.go     |  285 +++
 .../github.com/golang/snappy/encode_amd64.go  |   29 +
 .../github.com/golang/snappy/encode_amd64.s   |  730 ++++++
 .../github.com/golang/snappy/encode_other.go  |  238 ++
 vendor/github.com/golang/snappy/snappy.go     |   98 +
 .../opentracing/opentracing-go/.gitignore     |    1 +
 .../opentracing/opentracing-go/.travis.yml    |   20 +
 .../opentracing/opentracing-go/CHANGELOG.md   |   46 +
 .../opentracing/opentracing-go/LICENSE        |  201 ++
 .../opentracing/opentracing-go/Makefile       |   20 +
 .../opentracing/opentracing-go/README.md      |  171 ++
 .../opentracing-go/globaltracer.go            |   42 +
 .../opentracing/opentracing-go/gocontext.go   |   60 +
 .../opentracing/opentracing-go/log/field.go   |  269 +++
 .../opentracing/opentracing-go/log/util.go    |   54 +
 .../opentracing/opentracing-go/noop.go        |   64 +
 .../opentracing/opentracing-go/propagation.go |  176 ++
 .../opentracing/opentracing-go/span.go        |  189 ++
 .../opentracing/opentracing-go/tracer.go      |  304 +++
 .../prometheus/client_golang/api/client.go    |   55 +-
 .../client_golang/api/prometheus/v1/api.go    |  364 ++-
 .../client_golang/prometheus/build_info.go    |   29 +
 .../prometheus/build_info_pre_1.12.go         |   22 +
 .../client_golang/prometheus/go_collector.go  |   32 +-
 .../prometheus/process_collector.go           |    6 +-
 .../client_golang/prometheus/promhttp/http.go |   47 +-
 .../client_golang/prometheus/summary.go       |    8 +-
 vendor/github.com/prometheus/procfs/Makefile  |    1 +
 .../prometheus/procfs/Makefile.common         |    2 +-
 vendor/github.com/prometheus/procfs/README.md |   44 +-
 .../github.com/prometheus/procfs/buddyinfo.go |   12 +-
 .../prometheus/procfs/fixtures.ttar           |  144 +-
 vendor/github.com/prometheus/procfs/fs.go     |    8 +-
 vendor/github.com/prometheus/procfs/ipvs.go   |   28 +-
 vendor/github.com/prometheus/procfs/mdstat.go |   74 +-
 .../prometheus/procfs/mountstats.go           |   41 +-
 .../github.com/prometheus/procfs/net_dev.go   |   36 +-
 .../github.com/prometheus/procfs/net_unix.go  |  275 +++
 vendor/github.com/prometheus/procfs/proc.go   |   11 +-
 .../github.com/prometheus/procfs/proc_io.go   |    4 +-
 .../prometheus/procfs/proc_limits.go          |    7 +
 .../github.com/prometheus/procfs/proc_ns.go   |    4 +-
 .../github.com/prometheus/procfs/proc_psi.go  |   17 +-
 .../github.com/prometheus/procfs/proc_stat.go |    9 +-
 .../prometheus/procfs/proc_status.go          |  162 ++
 vendor/github.com/prometheus/procfs/stat.go   |   36 +-
 vendor/github.com/prometheus/procfs/ttar      |   42 +-
 .../github.com/prometheus/prometheus/LICENSE  |  201 ++
 .../github.com/prometheus/prometheus/NOTICE   |   87 +
 .../prometheus/prometheus/promql/ast.go       |  317 +++
 .../prometheus/prometheus/promql/engine.go    | 1436 ++++++++++++
 .../prometheus/prometheus/promql/functions.go | 1338 +++++++++++
 .../prometheus/prometheus/promql/fuzz.go      |   87 +
 .../prometheus/prometheus/promql/lex.go       |  908 ++++++++
 .../prometheus/prometheus/promql/parse.go     | 1146 ++++++++++
 .../prometheus/prometheus/promql/printer.go   |  236 ++
 .../prometheus/prometheus/promql/quantile.go  |  185 ++
 .../prometheus/prometheus/promql/test.go      |  525 +++++
 .../prometheus/storage/local/chunk/chunk.go   |  494 ++++
 .../prometheus/storage/local/chunk/delta.go   |  379 +++
 .../storage/local/chunk/delta_helpers.go      |   84 +
 .../storage/local/chunk/doubledelta.go        |  525 +++++
 .../storage/local/chunk/instrumentation.go    |   90 +
 .../prometheus/storage/local/chunk/varbit.go  | 1210 ++++++++++
 .../storage/local/chunk/varbit_helpers.go     |   75 +
 .../storage/local/codable/codable.go          |  467 ++++
 .../prometheus/storage/local/crashrecovery.go |  559 +++++
 .../prometheus/storage/local/heads.go         |  261 +++
 .../prometheus/storage/local/index/index.go   |  303 +++
 .../storage/local/index/interface.go          |   61 +
 .../prometheus/storage/local/index/leveldb.go |  210 ++
 .../storage/local/instrumentation.go          |   46 +
 .../prometheus/storage/local/interface.go     |  106 +
 .../prometheus/storage/local/locker.go        |   79 +
 .../prometheus/storage/local/mapper.go        |  218 ++
 .../prometheus/storage/local/noop_storage.go  |  100 +
 .../prometheus/storage/local/persistence.go   | 1722 ++++++++++++++
 .../prometheus/storage/local/series.go        |  728 ++++++
 .../prometheus/storage/local/storage.go       | 2029 +++++++++++++++++
 .../prometheus/storage/local/test_helpers.go  |   72 +
 .../prometheus/storage/metric/matcher.go      |  209 ++
 .../prometheus/storage/metric/metric.go       |   63 +
 .../prometheus/storage/metric/sample.go       |   22 +
 .../prometheus/prometheus/storage/storage.go  |   76 +
 .../prometheus/prometheus/util/flock/flock.go |   46 +
 .../prometheus/util/flock/flock_plan9.go      |   32 +
 .../prometheus/util/flock/flock_solaris.go    |   59 +
 .../prometheus/util/flock/flock_unix.go       |   54 +
 .../prometheus/util/flock/flock_windows.go    |   36 +
 .../prometheus/util/stats/query_stats.go      |   48 +
 .../prometheus/prometheus/util/stats/timer.go |  108 +
 .../prometheus/util/strutil/quote.go          |  223 ++
 .../prometheus/util/strutil/strconv.go        |   44 +
 .../prometheus/util/testutil/directory.go     |  129 ++
 .../prometheus/util/testutil/error.go         |   31 +
 .../prometheus/util/testutil/roundtrip.go     |   47 +
 vendor/github.com/syndtr/goleveldb/LICENSE    |   24 +
 .../syndtr/goleveldb/leveldb/batch.go         |  349 +++
 .../syndtr/goleveldb/leveldb/cache/cache.go   |  704 ++++++
 .../syndtr/goleveldb/leveldb/cache/lru.go     |  195 ++
 .../syndtr/goleveldb/leveldb/comparer.go      |   67 +
 .../leveldb/comparer/bytes_comparer.go        |   51 +
 .../goleveldb/leveldb/comparer/comparer.go    |   57 +
 .../github.com/syndtr/goleveldb/leveldb/db.go | 1179 ++++++++++
 .../syndtr/goleveldb/leveldb/db_compaction.go |  854 +++++++
 .../syndtr/goleveldb/leveldb/db_iter.go       |  360 +++
 .../syndtr/goleveldb/leveldb/db_snapshot.go   |  187 ++
 .../syndtr/goleveldb/leveldb/db_state.go      |  239 ++
 .../goleveldb/leveldb/db_transaction.go       |  329 +++
 .../syndtr/goleveldb/leveldb/db_util.go       |  102 +
 .../syndtr/goleveldb/leveldb/db_write.go      |  464 ++++
 .../syndtr/goleveldb/leveldb/doc.go           |   92 +
 .../syndtr/goleveldb/leveldb/errors.go        |   20 +
 .../syndtr/goleveldb/leveldb/errors/errors.go |   78 +
 .../syndtr/goleveldb/leveldb/filter.go        |   31 +
 .../syndtr/goleveldb/leveldb/filter/bloom.go  |  116 +
 .../syndtr/goleveldb/leveldb/filter/filter.go |   60 +
 .../goleveldb/leveldb/iterator/array_iter.go  |  184 ++
 .../leveldb/iterator/indexed_iter.go          |  242 ++
 .../syndtr/goleveldb/leveldb/iterator/iter.go |  132 ++
 .../goleveldb/leveldb/iterator/merged_iter.go |  304 +++
 .../goleveldb/leveldb/journal/journal.go      |  524 +++++
 .../syndtr/goleveldb/leveldb/key.go           |  143 ++
 .../syndtr/goleveldb/leveldb/memdb/memdb.go   |  479 ++++
 .../syndtr/goleveldb/leveldb/opt/options.go   |  697 ++++++
 .../syndtr/goleveldb/leveldb/options.go       |  107 +
 .../syndtr/goleveldb/leveldb/session.go       |  210 ++
 .../goleveldb/leveldb/session_compaction.go   |  302 +++
 .../goleveldb/leveldb/session_record.go       |  323 +++
 .../syndtr/goleveldb/leveldb/session_util.go  |  271 +++
 .../syndtr/goleveldb/leveldb/storage.go       |   63 +
 .../goleveldb/leveldb/storage/file_storage.go |  671 ++++++
 .../leveldb/storage/file_storage_nacl.go      |   34 +
 .../leveldb/storage/file_storage_plan9.go     |   63 +
 .../leveldb/storage/file_storage_solaris.go   |   81 +
 .../leveldb/storage/file_storage_unix.go      |   98 +
 .../leveldb/storage/file_storage_windows.go   |   78 +
 .../goleveldb/leveldb/storage/mem_storage.go  |  222 ++
 .../goleveldb/leveldb/storage/storage.go      |  187 ++
 .../syndtr/goleveldb/leveldb/table.go         |  531 +++++
 .../syndtr/goleveldb/leveldb/table/reader.go  | 1139 +++++++++
 .../syndtr/goleveldb/leveldb/table/table.go   |  177 ++
 .../syndtr/goleveldb/leveldb/table/writer.go  |  375 +++
 .../syndtr/goleveldb/leveldb/util.go          |   98 +
 .../syndtr/goleveldb/leveldb/util/buffer.go   |  293 +++
 .../goleveldb/leveldb/util/buffer_pool.go     |  239 ++
 .../syndtr/goleveldb/leveldb/util/crc32.go    |   30 +
 .../syndtr/goleveldb/leveldb/util/hash.go     |   48 +
 .../syndtr/goleveldb/leveldb/util/range.go    |   32 +
 .../syndtr/goleveldb/leveldb/util/util.go     |   73 +
 .../syndtr/goleveldb/leveldb/version.go       |  528 +++++
 vendor/modules.txt                            |   36 +-
 181 files changed, 37758 insertions(+), 357 deletions(-)
 create mode 100644 pkg/models/monitoring/expressions/prometheus/label_replace.go
 create mode 100644 pkg/models/monitoring/expressions/prometheus/label_replace_test.go
 create mode 100644 pkg/models/monitoring/expressions/registry.go
 create mode 100644 pkg/simple/client/monitoring/prometheus/testdata/metadata-notfound-prom.json
 create mode 100644 pkg/simple/client/monitoring/prometheus/testdata/metadata-notfound-res.json
 create mode 100644 pkg/simple/client/monitoring/prometheus/testdata/metadata-prom.json
 create mode 100644 pkg/simple/client/monitoring/prometheus/testdata/metadata-res.json
 create mode 100644 vendor/github.com/golang/snappy/.gitignore
 create mode 100644 vendor/github.com/golang/snappy/AUTHORS
 create mode 100644 vendor/github.com/golang/snappy/CONTRIBUTORS
 create mode 100644 vendor/github.com/golang/snappy/LICENSE
 create mode 100644 vendor/github.com/golang/snappy/README
 create mode 100644 vendor/github.com/golang/snappy/decode.go
 create mode 100644 vendor/github.com/golang/snappy/decode_amd64.go
 create mode 100644 vendor/github.com/golang/snappy/decode_amd64.s
 create mode 100644 vendor/github.com/golang/snappy/decode_other.go
 create mode 100644 vendor/github.com/golang/snappy/encode.go
 create mode 100644 vendor/github.com/golang/snappy/encode_amd64.go
 create mode 100644 vendor/github.com/golang/snappy/encode_amd64.s
 create mode 100644 vendor/github.com/golang/snappy/encode_other.go
 create mode 100644 vendor/github.com/golang/snappy/snappy.go
 create mode 100644 vendor/github.com/opentracing/opentracing-go/.gitignore
 create mode 100644 vendor/github.com/opentracing/opentracing-go/.travis.yml
 create mode 100644 vendor/github.com/opentracing/opentracing-go/CHANGELOG.md
 create mode 100644 vendor/github.com/opentracing/opentracing-go/LICENSE
 create mode 100644 vendor/github.com/opentracing/opentracing-go/Makefile
 create mode 100644 vendor/github.com/opentracing/opentracing-go/README.md
 create mode 100644 vendor/github.com/opentracing/opentracing-go/globaltracer.go
 create mode 100644 vendor/github.com/opentracing/opentracing-go/gocontext.go
 create mode 100644 vendor/github.com/opentracing/opentracing-go/log/field.go
 create mode 100644 vendor/github.com/opentracing/opentracing-go/log/util.go
 create mode 100644 vendor/github.com/opentracing/opentracing-go/noop.go
 create mode 100644 vendor/github.com/opentracing/opentracing-go/propagation.go
 create mode 100644 vendor/github.com/opentracing/opentracing-go/span.go
 create mode 100644 vendor/github.com/opentracing/opentracing-go/tracer.go
 create mode 100644 vendor/github.com/prometheus/client_golang/prometheus/build_info.go
 create mode 100644 vendor/github.com/prometheus/client_golang/prometheus/build_info_pre_1.12.go
 create mode 100644 vendor/github.com/prometheus/procfs/net_unix.go
 create mode 100644 vendor/github.com/prometheus/procfs/proc_status.go
 create mode 100644 vendor/github.com/prometheus/prometheus/LICENSE
 create mode 100644 vendor/github.com/prometheus/prometheus/NOTICE
 create mode 100644 vendor/github.com/prometheus/prometheus/promql/ast.go
 create mode 100644 vendor/github.com/prometheus/prometheus/promql/engine.go
 create mode 100644 vendor/github.com/prometheus/prometheus/promql/functions.go
 create mode 100644 vendor/github.com/prometheus/prometheus/promql/fuzz.go
 create mode 100644 vendor/github.com/prometheus/prometheus/promql/lex.go
 create mode 100644 vendor/github.com/prometheus/prometheus/promql/parse.go
 create mode 100644 vendor/github.com/prometheus/prometheus/promql/printer.go
 create mode 100644 vendor/github.com/prometheus/prometheus/promql/quantile.go
 create mode 100644 vendor/github.com/prometheus/prometheus/promql/test.go
 create mode 100644 vendor/github.com/prometheus/prometheus/storage/local/chunk/chunk.go
 create mode 100644 vendor/github.com/prometheus/prometheus/storage/local/chunk/delta.go
 create mode 100644 vendor/github.com/prometheus/prometheus/storage/local/chunk/delta_helpers.go
 create mode 100644 vendor/github.com/prometheus/prometheus/storage/local/chunk/doubledelta.go
 create mode 100644 vendor/github.com/prometheus/prometheus/storage/local/chunk/instrumentation.go
 create mode 100644 vendor/github.com/prometheus/prometheus/storage/local/chunk/varbit.go
 create mode 100644 vendor/github.com/prometheus/prometheus/storage/local/chunk/varbit_helpers.go
 create mode 100644 vendor/github.com/prometheus/prometheus/storage/local/codable/codable.go
 create mode 100644 vendor/github.com/prometheus/prometheus/storage/local/crashrecovery.go
 create mode 100644 vendor/github.com/prometheus/prometheus/storage/local/heads.go
 create mode 100644 vendor/github.com/prometheus/prometheus/storage/local/index/index.go
 create mode 100644 vendor/github.com/prometheus/prometheus/storage/local/index/interface.go
 create mode 100644 vendor/github.com/prometheus/prometheus/storage/local/index/leveldb.go
 create mode 100644 vendor/github.com/prometheus/prometheus/storage/local/instrumentation.go
 create mode 100644 vendor/github.com/prometheus/prometheus/storage/local/interface.go
 create mode 100644 vendor/github.com/prometheus/prometheus/storage/local/locker.go
 create mode 100644 vendor/github.com/prometheus/prometheus/storage/local/mapper.go
 create mode 100644 vendor/github.com/prometheus/prometheus/storage/local/noop_storage.go
 create mode 100644 vendor/github.com/prometheus/prometheus/storage/local/persistence.go
 create mode 100644 vendor/github.com/prometheus/prometheus/storage/local/series.go
 create mode 100644 vendor/github.com/prometheus/prometheus/storage/local/storage.go
 create mode 100644 vendor/github.com/prometheus/prometheus/storage/local/test_helpers.go
 create mode 100644 vendor/github.com/prometheus/prometheus/storage/metric/matcher.go
 create mode 100644 vendor/github.com/prometheus/prometheus/storage/metric/metric.go
 create mode 100644 vendor/github.com/prometheus/prometheus/storage/metric/sample.go
 create mode 100644 vendor/github.com/prometheus/prometheus/storage/storage.go
 create mode 100644 vendor/github.com/prometheus/prometheus/util/flock/flock.go
 create mode 100644 vendor/github.com/prometheus/prometheus/util/flock/flock_plan9.go
 create mode 100644 vendor/github.com/prometheus/prometheus/util/flock/flock_solaris.go
 create mode 100644 vendor/github.com/prometheus/prometheus/util/flock/flock_unix.go
 create mode 100644 vendor/github.com/prometheus/prometheus/util/flock/flock_windows.go
 create mode 100644 vendor/github.com/prometheus/prometheus/util/stats/query_stats.go
 create mode 100644 vendor/github.com/prometheus/prometheus/util/stats/timer.go
 create mode 100644 vendor/github.com/prometheus/prometheus/util/strutil/quote.go
 create mode 100644 vendor/github.com/prometheus/prometheus/util/strutil/strconv.go
 create mode 100644 vendor/github.com/prometheus/prometheus/util/testutil/directory.go
 create mode 100644 vendor/github.com/prometheus/prometheus/util/testutil/error.go
 create mode 100644 vendor/github.com/prometheus/prometheus/util/testutil/roundtrip.go
 create mode 100644 vendor/github.com/syndtr/goleveldb/LICENSE
 create mode 100644 vendor/github.com/syndtr/goleveldb/leveldb/batch.go
 create mode 100644 vendor/github.com/syndtr/goleveldb/leveldb/cache/cache.go
 create mode 100644 vendor/github.com/syndtr/goleveldb/leveldb/cache/lru.go
 create mode 100644 vendor/github.com/syndtr/goleveldb/leveldb/comparer.go
 create mode 100644 vendor/github.com/syndtr/goleveldb/leveldb/comparer/bytes_comparer.go
 create mode 100644 vendor/github.com/syndtr/goleveldb/leveldb/comparer/comparer.go
 create mode 100644 vendor/github.com/syndtr/goleveldb/leveldb/db.go
 create mode 100644 vendor/github.com/syndtr/goleveldb/leveldb/db_compaction.go
 create mode 100644 vendor/github.com/syndtr/goleveldb/leveldb/db_iter.go
 create mode 100644 vendor/github.com/syndtr/goleveldb/leveldb/db_snapshot.go
 create mode 100644 vendor/github.com/syndtr/goleveldb/leveldb/db_state.go
 create mode 100644 vendor/github.com/syndtr/goleveldb/leveldb/db_transaction.go
 create mode 100644 vendor/github.com/syndtr/goleveldb/leveldb/db_util.go
 create mode 100644 vendor/github.com/syndtr/goleveldb/leveldb/db_write.go
 create mode 100644 vendor/github.com/syndtr/goleveldb/leveldb/doc.go
 create mode 100644 vendor/github.com/syndtr/goleveldb/leveldb/errors.go
 create mode 100644 vendor/github.com/syndtr/goleveldb/leveldb/errors/errors.go
 create mode 100644 vendor/github.com/syndtr/goleveldb/leveldb/filter.go
 create mode 100644 vendor/github.com/syndtr/goleveldb/leveldb/filter/bloom.go
 create mode 100644 vendor/github.com/syndtr/goleveldb/leveldb/filter/filter.go
 create mode 100644 vendor/github.com/syndtr/goleveldb/leveldb/iterator/array_iter.go
 create mode 100644 vendor/github.com/syndtr/goleveldb/leveldb/iterator/indexed_iter.go
 create mode 100644 vendor/github.com/syndtr/goleveldb/leveldb/iterator/iter.go
 create mode 100644 vendor/github.com/syndtr/goleveldb/leveldb/iterator/merged_iter.go
 create mode 100644 vendor/github.com/syndtr/goleveldb/leveldb/journal/journal.go
 create mode 100644 vendor/github.com/syndtr/goleveldb/leveldb/key.go
 create mode 100644 vendor/github.com/syndtr/goleveldb/leveldb/memdb/memdb.go
 create mode 100644 vendor/github.com/syndtr/goleveldb/leveldb/opt/options.go
 create mode 100644 vendor/github.com/syndtr/goleveldb/leveldb/options.go
 create mode 100644 vendor/github.com/syndtr/goleveldb/leveldb/session.go
 create mode 100644 vendor/github.com/syndtr/goleveldb/leveldb/session_compaction.go
 create mode 100644 vendor/github.com/syndtr/goleveldb/leveldb/session_record.go
 create mode 100644 vendor/github.com/syndtr/goleveldb/leveldb/session_util.go
 create mode 100644 vendor/github.com/syndtr/goleveldb/leveldb/storage.go
 create mode 100644 vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage.go
 create mode 100644 vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage_nacl.go
 create mode 100644 vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage_plan9.go
 create mode 100644 vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage_solaris.go
 create mode 100644 vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage_unix.go
 create mode 100644 vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage_windows.go
 create mode 100644 vendor/github.com/syndtr/goleveldb/leveldb/storage/mem_storage.go
 create mode 100644 vendor/github.com/syndtr/goleveldb/leveldb/storage/storage.go
 create mode 100644 vendor/github.com/syndtr/goleveldb/leveldb/table.go
 create mode 100644 vendor/github.com/syndtr/goleveldb/leveldb/table/reader.go
 create mode 100644 vendor/github.com/syndtr/goleveldb/leveldb/table/table.go
 create mode 100644 vendor/github.com/syndtr/goleveldb/leveldb/table/writer.go
 create mode 100644 vendor/github.com/syndtr/goleveldb/leveldb/util.go
 create mode 100644 vendor/github.com/syndtr/goleveldb/leveldb/util/buffer.go
 create mode 100644 vendor/github.com/syndtr/goleveldb/leveldb/util/buffer_pool.go
 create mode 100644 vendor/github.com/syndtr/goleveldb/leveldb/util/crc32.go
 create mode 100644 vendor/github.com/syndtr/goleveldb/leveldb/util/hash.go
 create mode 100644 vendor/github.com/syndtr/goleveldb/leveldb/util/range.go
 create mode 100644 vendor/github.com/syndtr/goleveldb/leveldb/util/util.go
 create mode 100644 vendor/github.com/syndtr/goleveldb/leveldb/version.go

diff --git a/go.mod b/go.mod
index 4a41dfc01..2a1c7564b 100644
--- a/go.mod
+++ b/go.mod
@@ -66,16 +66,19 @@ require (
 	github.com/opencontainers/go-digest v1.0.0-rc1
 	github.com/opencontainers/image-spec v1.0.1 // indirect
 	github.com/openshift/api v0.0.0-20180801171038-322a19404e37 // indirect
+	github.com/opentracing/opentracing-go v1.1.0 // indirect
 	github.com/pkg/errors v0.8.1
 	github.com/projectcalico/libcalico-go v1.7.2-0.20191104213956-8f81e1e344ce
-	github.com/prometheus/client_golang v0.9.3
-	github.com/prometheus/common v0.4.0
+	github.com/prometheus/client_golang v0.9.4
+	github.com/prometheus/common v0.4.1
+	github.com/prometheus/prometheus v1.8.2
 	github.com/sony/sonyflake v0.0.0-20181109022403-6d5bd6181009
 	github.com/speps/go-hashids v2.0.0+incompatible
 	github.com/spf13/cobra v0.0.5
 	github.com/spf13/pflag v1.0.5
 	github.com/spf13/viper v1.4.0
 	github.com/stretchr/testify v1.4.0
+	github.com/syndtr/goleveldb v1.0.0 // indirect
 	github.com/xanzy/ssh-agent v0.2.1 // indirect
 	golang.org/x/crypto v0.0.0-20190923035154-9ee001bba392
 	golang.org/x/net v0.0.0-20191004110552-13f9640d40b9
@@ -210,6 +213,7 @@ replace (
 	github.com/golang/groupcache => github.com/golang/groupcache v0.0.0-20190702054246-869f871628b6
 	github.com/golang/mock => github.com/golang/mock v1.2.0
 	github.com/golang/protobuf => github.com/golang/protobuf v1.3.2
+	github.com/golang/snappy => github.com/golang/snappy v0.0.0-20180518054509-2e65f85255db
 	github.com/google/btree => github.com/google/btree v1.0.0
 	github.com/google/go-cmp => github.com/google/go-cmp v0.3.0
 	github.com/google/go-querystring => github.com/google/go-querystring v1.0.0
@@ -297,6 +301,7 @@ replace (
 	github.com/opencontainers/image-spec => github.com/opencontainers/image-spec v1.0.1
 	github.com/openshift/api => github.com/openshift/api v0.0.0-20180801171038-322a19404e37
 	github.com/openshift/build-machinery-go => github.com/openshift/build-machinery-go v0.0.0-20200211121458-5e3d6e570160
+	github.com/opentracing/opentracing-go => github.com/opentracing/opentracing-go v1.1.0
 	github.com/pborman/uuid => github.com/pborman/uuid v1.2.0
 	github.com/pelletier/go-buffruneio => github.com/pelletier/go-buffruneio v0.2.0
 	github.com/pelletier/go-toml => github.com/pelletier/go-toml v1.2.0
@@ -311,10 +316,11 @@ replace (
 	github.com/projectcalico/go-yaml => github.com/projectcalico/go-yaml v0.0.0-20161201183616-955bc3e451ef
 	github.com/projectcalico/go-yaml-wrapper => github.com/projectcalico/go-yaml-wrapper v0.0.0-20161127220527-598e54215bee
 	github.com/projectcalico/libcalico-go => github.com/projectcalico/libcalico-go v1.7.2-0.20191104213956-8f81e1e344ce
-	github.com/prometheus/client_golang => github.com/prometheus/client_golang v0.9.3
+	github.com/prometheus/client_golang => github.com/prometheus/client_golang v0.9.4
 	github.com/prometheus/client_model => github.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90
 	github.com/prometheus/common => github.com/prometheus/common v0.4.0
-	github.com/prometheus/procfs => github.com/prometheus/procfs v0.0.0-20190507164030-5867b95ac084
+	github.com/prometheus/procfs => github.com/prometheus/procfs v0.0.2
+	github.com/prometheus/prometheus => github.com/prometheus/prometheus v1.8.2
 	github.com/prometheus/tsdb => github.com/prometheus/tsdb v0.7.1
 	github.com/rcrowley/go-metrics => github.com/rcrowley/go-metrics v0.0.0-20181016184325-3113b8401b8a
 	github.com/remyoudompheng/bigfft => github.com/remyoudompheng/bigfft v0.0.0-20170806203942-52369c62f446
@@ -339,6 +345,7 @@ replace (
 	github.com/src-d/gcfg => github.com/src-d/gcfg v1.4.0
 	github.com/stretchr/objx => github.com/stretchr/objx v0.2.0
 	github.com/stretchr/testify => github.com/stretchr/testify v1.4.0
+	github.com/syndtr/goleveldb => github.com/syndtr/goleveldb v1.0.0
 	github.com/tinylib/msgp => github.com/tinylib/msgp v1.1.0
 	github.com/tmc/grpc-websocket-proxy => github.com/tmc/grpc-websocket-proxy v0.0.0-20190109142713-0ad062ec5ee5
 	github.com/ugorji/go => github.com/ugorji/go v1.1.4
diff --git a/go.sum b/go.sum
index e5ab35c79..3e9a3bfd0 100644
--- a/go.sum
+++ b/go.sum
@@ -80,7 +80,6 @@ github.com/deckarep/golang-set v1.7.1/go.mod h1:93vsz/8Wt4joVM7c2AVqh+YRMiUSc14y
 github.com/denisenkom/go-mssqldb v0.0.0-20190204142019-df6d76eb9289/go.mod h1:xN/JuLBIz4bjkxNmByTiV1IbhfnYb6oo99phBn4Eqhc=
 github.com/dgrijalva/jwt-go v3.2.0+incompatible h1:7qlOGliEKZXTDg6OTjfoBKDXWrumCAMpl/TFQ4/5kLM=
 github.com/dgrijalva/jwt-go v3.2.0+incompatible/go.mod h1:E3ru+11k8xSBh+hMPgOLZmtrrCbhqsmaPHjLKYnJCaQ=
-github.com/dgryski/go-sip13 v0.0.0-20181026042036-e10d5fee7954/go.mod h1:vAd38F8PWV+bWy6jNmig1y/TA+kYO4g3RSRF0IAv0no=
 github.com/docker/distribution v2.7.1+incompatible h1:a5mlkVzth6W5A4fOsS3D2EO5BUmsJpcB+cRlLU7cSug=
 github.com/docker/distribution v2.7.1+incompatible/go.mod h1:J2gT2udsDAN96Uj4KfcMRqY0/ypR+oyYUYmja8H+y+w=
 github.com/docker/engine v1.4.2-0.20190822205725-ed20165a37b4 h1:+VAGRKyn9Ca+ckzV/PJsaRO7UXO9KQjFmSffcSDrWdE=
@@ -187,6 +186,8 @@ github.com/golang/mock v1.2.0 h1:28o5sBqPkBsMGnC6b4MvE2TzSr5/AT4c/1fLqVGIwlk=
 github.com/golang/mock v1.2.0/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A=
 github.com/golang/protobuf v1.3.2 h1:6nsPYzhq5kReh6QImI3k5qWzO4PEbvbIW2cwSfR/6xs=
 github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
+github.com/golang/snappy v0.0.0-20180518054509-2e65f85255db h1:woRePGFeVFfLKN/pOkfl+p/TAqKOfFu+7KPlMVpok/w=
+github.com/golang/snappy v0.0.0-20180518054509-2e65f85255db/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
 github.com/google/btree v1.0.0 h1:0udJVsspx3VBr5FwtLhQQtuAsVc79tTq0ocGIPAU6qo=
 github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ=
 github.com/google/go-cmp v0.3.0 h1:crn/baboCvb5fXaQ0IJ1SGTsTVrWpDsCWC8EGETZijY=
@@ -301,7 +302,6 @@ github.com/munnerz/goautoneg v0.0.0-20120707110453-a547fc61f48d/go.mod h1:+n7T8m
 github.com/mwitkow/go-conntrack v0.0.0-20161129095857-cc309e4a2223/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U=
 github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f h1:y5//uYreIhSUg3J1GEMiLbxo1LJaP8RfCpH6pymGZus=
 github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f/go.mod h1:ZdcZmHo+o7JKHSa8/e818NopupXU1YMK5fe1lsApnBw=
-github.com/oklog/ulid v1.3.1/go.mod h1:CirwcVhetQ6Lv90oh/F+FBtV6XMibvdAFo93nm5qn4U=
 github.com/olekukonko/tablewriter v0.0.1/go.mod h1:vsDQFd/mU46D+Z4whnwzcISnGGzXWMclvtLoiIKAKIo=
 github.com/onsi/ginkgo v1.8.0 h1:VkHVNpR4iVnU8XQR6DBm8BqYjN7CRzw+xKUbVVbbW9w=
 github.com/onsi/ginkgo v1.8.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE=
@@ -315,13 +315,14 @@ github.com/opencontainers/image-spec v1.0.1 h1:JMemWkRwHx4Zj+fVxWoMCFm/8sYGGrUVo
 github.com/opencontainers/image-spec v1.0.1/go.mod h1:BtxoFyWECRxE4U/7sNtV5W15zMzWCbyJoFRP3s7yZA0=
 github.com/openshift/api v0.0.0-20180801171038-322a19404e37 h1:05irGU4HK4IauGGDbsk+ZHrm1wOzMLYjMlfaiqMrBYc=
 github.com/openshift/api v0.0.0-20180801171038-322a19404e37/go.mod h1:dh9o4Fs58gpFXGSYfnVxGR9PnV53I8TW84pQaJDdGiY=
+github.com/opentracing/opentracing-go v1.1.0 h1:pWlfV3Bxv7k65HYwkikxat0+s3pV4bsqf19k25Ur8rU=
+github.com/opentracing/opentracing-go v1.1.0/go.mod h1:UkNAQd3GIcIGf0SeVgPpRdFStlNbqXla1AfSYxPUl2o=
 github.com/pborman/uuid v1.2.0 h1:J7Q5mO4ysT1dv8hyrUGHb9+ooztCXu1D8MY8DZYsu3g=
 github.com/pborman/uuid v1.2.0/go.mod h1:X/NO0urCmaxf9VXbdlT7C2Yzkj2IKimNn4k+gtPdI/k=
 github.com/pelletier/go-buffruneio v0.2.0 h1:U4t4R6YkofJ5xHm3dJzuRpPZ0mr5MMCoAWooScCR7aA=
 github.com/pelletier/go-buffruneio v0.2.0/go.mod h1:JkE26KsDizTr40EUHkXVtNPvgGtbSNq5BcowyYOWdKo=
 github.com/pelletier/go-toml v1.2.0 h1:T5zMGML61Wp+FlcbWjRDT7yAxhJNAiPPLOFECq181zc=
 github.com/pelletier/go-toml v1.2.0/go.mod h1:5z9KED0ma1S8pY6P1sdut58dfprrGBbd/94hg7ilaic=
-github.com/peterbourgon/diskv v2.0.1+incompatible h1:UBdAOUP5p4RWqPBg048CAvpKN+vxiaj6gdUUzhl4XmI=
 github.com/peterbourgon/diskv v2.0.1+incompatible/go.mod h1:uqqh8zWWbv1HBMNONnaR/tNboyR3/BZd58JJSHlUSCU=
 github.com/peterh/liner v0.0.0-20170211195444-bf27d3ba8e1d/go.mod h1:xIteQHvHuaLYG9IFj6mSxM0fCKrs34IrEQUhOYuGPHc=
 github.com/philhofer/fwd v1.0.0/go.mod h1:gk3iGcWd9+svBvR0sR+KPcfE+RNWozjowpeBVG3ZVNU=
@@ -339,22 +340,21 @@ github.com/projectcalico/go-yaml-wrapper v0.0.0-20161127220527-598e54215bee h1:y
 github.com/projectcalico/go-yaml-wrapper v0.0.0-20161127220527-598e54215bee/go.mod h1:UgC0aTQ2KMDxlX3lU/stndk7DMUBJqzN40yFiILHgxc=
 github.com/projectcalico/libcalico-go v1.7.2-0.20191104213956-8f81e1e344ce h1:O/R67iwUe8TvZwgKbDB2cvF2/8L8PR4zVOcBtYEHD5Y=
 github.com/projectcalico/libcalico-go v1.7.2-0.20191104213956-8f81e1e344ce/go.mod h1:z4tuFqrAg/423AMSaDamY5LgqeOZ5ETui6iOxDwJ/ag=
-github.com/prometheus/client_golang v0.9.3 h1:9iH4JKXLzFbOAdtqv/a+j8aewx2Y8lAjAydhbaScPF8=
-github.com/prometheus/client_golang v0.9.3/go.mod h1:/TN21ttK/J9q6uSwhBd54HahCDft0ttaMvbicHlPoso=
+github.com/prometheus/client_golang v0.9.4 h1:Y8E/JaaPbmFSW2V81Ab/d8yZFYQQGbni1b1jPcG9Y6A=
+github.com/prometheus/client_golang v0.9.4/go.mod h1:oCXIBxdI62A4cR6aTRJCgetEjecSIYzOEaeAn4iYEpM=
 github.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90 h1:S/YWwWx/RA8rT8tKFRuGUZhuA90OyIBpPCXkcbwU8DE=
 github.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
 github.com/prometheus/common v0.4.0 h1:7etb9YClo3a6HjLzfl6rIQaU+FDfi0VSX39io3aQ+DM=
 github.com/prometheus/common v0.4.0/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y86RQel1bk4=
-github.com/prometheus/procfs v0.0.0-20190507164030-5867b95ac084 h1:sofwID9zm4tzrgykg80hfFph1mryUeLRsUfoocVVmRY=
-github.com/prometheus/procfs v0.0.0-20190507164030-5867b95ac084/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsTZCD3I8kEA=
-github.com/prometheus/tsdb v0.7.1 h1:YZcsG11NqnK4czYLrWd9mpEuAJIHVQLwdrleYfszMAA=
-github.com/prometheus/tsdb v0.7.1/go.mod h1:qhTCs0VvXwvX/y3TZrWD7rabWM+ijKTux40TwIPHuXU=
+github.com/prometheus/procfs v0.0.2 h1:6LJUbpNm42llc4HRCuvApCSWB/WfhuNo9K98Q9sNGfs=
+github.com/prometheus/procfs v0.0.2/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsTZCD3I8kEA=
+github.com/prometheus/prometheus v1.8.2 h1:PAL466mnJw1VolZPm1OarpdUpqukUy/eX4tagia17DM=
+github.com/prometheus/prometheus v1.8.2/go.mod h1:oAIUtOny2rjMX0OWN5vPR5/q/twIROJvdqnQKDdil/s=
 github.com/rcrowley/go-metrics v0.0.0-20181016184325-3113b8401b8a h1:9ZKAASQSHhDYGoxY8uLVpewe1GDZ2vu2Tr/vTdVAkFQ=
 github.com/rcrowley/go-metrics v0.0.0-20181016184325-3113b8401b8a/go.mod h1:bCqnVzQkZxMG4s8nGwiZ5l3QUCyqpo9Y+/ZMZ9VjZe4=
 github.com/remyoudompheng/bigfft v0.0.0-20170806203942-52369c62f446/go.mod h1:uYEyJGbgTkfkS4+E/PavXkNJcbFIpEtjt2B0KDQ5+9M=
 github.com/rogpeppe/fastuuid v0.0.0-20150106093220-6724a57986af/go.mod h1:XWv6SoW27p1b0cqNHllgS5HIMJraePCO15w5zCzIWYg=
 github.com/rogpeppe/go-charset v0.0.0-20180617210344-2471d30d28b4/go.mod h1:qgYeAmZ5ZIpBWTGllZSQnw97Dj+woV0toclVaRGI8pc=
-github.com/russross/blackfriday v1.5.2 h1:HyvC0ARfnZBqnXwABFeSZHpKvJHJJfPz81GNueLj0oo=
 github.com/russross/blackfriday v1.5.2/go.mod h1:JO/DiYxRf+HjHt06OyowR9PTA263kcR/rfWxYHBV53g=
 github.com/satori/go.uuid v1.2.0 h1:0uYX9dsZ2yD7q2RtLRtPSdGDWzjeM3TbMJP9utgA0ww=
 github.com/satori/go.uuid v1.2.0/go.mod h1:dA0hQrYB0VpLJoorglMZABFdXlWrHn1NEOzdhQKdks0=
@@ -390,6 +390,8 @@ github.com/stretchr/objx v0.2.0 h1:Hbg2NidpLE8veEBkEZTL3CvlkUIVzuU9jDplZO54c48=
 github.com/stretchr/objx v0.2.0/go.mod h1:qt09Ya8vawLte6SNmTgCsAVtYtaKzEcn8ATUoHMkEqE=
 github.com/stretchr/testify v1.4.0 h1:2E4SXV/wtOkTonXsotYi4li6zVWxYlZuYNCXe9XRJyk=
 github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
+github.com/syndtr/goleveldb v1.0.0 h1:fBdIW9lB4Iz0n9khmH8w27SJ3QEJ7+IgjPEwGSZiFdE=
+github.com/syndtr/goleveldb v1.0.0/go.mod h1:ZVVdQEZoIme9iO1Ch2Jdy24qqXrMMOU6lpPAyBWyWuQ=
 github.com/tinylib/msgp v1.1.0/go.mod h1:+d+yLhGm8mzTaHzB+wgMYrodPfmZrzkirds8fDWklFE=
 github.com/tmc/grpc-websocket-proxy v0.0.0-20190109142713-0ad062ec5ee5 h1:LnC5Kc/wtumK+WB441p7ynQJzVuNRJiqddSIE3IlSEQ=
 github.com/tmc/grpc-websocket-proxy v0.0.0-20190109142713-0ad062ec5ee5/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U=
@@ -414,15 +416,12 @@ go.uber.org/zap v1.10.0 h1:ORx85nbTijNz8ljznvCMR1ZBIPKFn3jQrag10X2AsuM=
 go.uber.org/zap v1.10.0/go.mod h1:vwi/ZaCAaUcBkycHslxD9B2zi4UTXhF60s6SWpuDF0Q=
 golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2 h1:VklqNMn3ovrHsnt90PveolxSbWFaJdECFbxSq0Mqo2M=
 golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
-golang.org/x/exp v0.0.0-20190121172915-509febef88a4 h1:c2HOrn5iMezYjSlGPncknSEr/8x5LELb/ilJbXi9DEA=
 golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
-golang.org/x/lint v0.0.0-20190301231843-5614ed5bae6f h1:hX65Cu3JDlGH3uEdK7I99Ii+9kjD6mvnnpfLdEAH0x4=
 golang.org/x/lint v0.0.0-20190301231843-5614ed5bae6f/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
 golang.org/x/net v0.0.0-20190620200207-3b0461eec859 h1:R/3boaszxrf1GEUWTVDzSKVwLmSJpwZ1yqXm8j0v2QI=
 golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
 golang.org/x/oauth2 v0.0.0-20190402181905-9f3314589c9a h1:tImsplftrFpALCYumobsd0K86vlAs/eXGFms2txfJfA=
 golang.org/x/oauth2 v0.0.0-20190402181905-9f3314589c9a/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
-golang.org/x/sync v0.0.0-20190423024810-112230192c58 h1:8gQV6CLnAEikrhgkHFbMAEhagSSnXWGV915qUMm9mrU=
 golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sys v0.0.0-20190228124157-a34e9553db1e h1:ZytStCyV048ZqDsWHiYDdoI2Vd4msMcrDECFxS+tL9c=
 golang.org/x/sys v0.0.0-20190228124157-a34e9553db1e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
@@ -436,7 +435,6 @@ golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7 h1:9zdDQZ7Thm29KFXgAX/+y
 golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 gomodules.xyz/jsonpatch/v2 v2.0.1 h1:xyiBuvkD2g5n7cYzx6u2sxQvsAy4QJsZFCzGVdzOXZ0=
 gomodules.xyz/jsonpatch/v2 v2.0.1/go.mod h1:IhYNNY4jnS53ZnfE4PAmpKtDpTCj1JFXc+3mwe7XcUU=
-gonum.org/v1/gonum v0.0.0-20190331200053-3d26580ed485 h1:OB/uP/Puiu5vS5QMRPrXCDWUPb+kt8f1KW8oQzFejQw=
 gonum.org/v1/gonum v0.0.0-20190331200053-3d26580ed485/go.mod h1:2ltnJ7xHfj0zHS40VVPYEAAMTa3ZGguvHGBSJeRWqE0=
 gonum.org/v1/netlib v0.0.0-20190331212654-76723241ea4e/go.mod h1:kS+toOQn6AQKjmKJ7gzohV1XkqsFehRA2FbsbkopSuQ=
 google.golang.org/api v0.4.0/go.mod h1:8k5glujaEP+g9n7WNsDg8QP6cUVNI86fCNMcbazEtwE=
@@ -465,7 +463,6 @@ gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw=
 gopkg.in/natefinch/lumberjack.v2 v2.0.0 h1:1Lc07Kr7qY4U2YPouBjpCLxpiyxIVoxqXgkXLknAOE8=
 gopkg.in/natefinch/lumberjack.v2 v2.0.0/go.mod h1:l0ndWWf7gzL7RNwBG7wST/UCcT4T24xpD6X8LsfU/+k=
 gopkg.in/resty.v1 v1.12.0/go.mod h1:mDo4pnntr5jdWRML875a/NmxYqAlA73dVijT2AXvQQo=
-gopkg.in/square/go-jose.v2 v2.3.1 h1:SK5KegNXmKmqE342YYN2qPHEnUYeoMiXXl1poUlI+o4=
 gopkg.in/square/go-jose.v2 v2.3.1/go.mod h1:M9dMgbHiYLoDGQrXy7OpJDJWiKiU//h+vD76mk0e1AI=
 gopkg.in/src-d/go-billy.v4 v4.3.0 h1:KtlZ4c1OWbIs4jCv5ZXrTqG8EQocr0g/d4DjNg70aek=
 gopkg.in/src-d/go-billy.v4 v4.3.0/go.mod h1:tm33zBoOwxjYHZIE+OV8bxTWFMJLrconzFMd38aARFk=
diff --git a/pkg/constants/constants.go b/pkg/constants/constants.go
index a72338ba7..8104b4dcf 100644
--- a/pkg/constants/constants.go
+++ b/pkg/constants/constants.go
@@ -75,6 +75,7 @@ const (
 	WorkloadMetricsTag         = "Workload Metrics"
 	WorkspaceMetricsTag        = "Workspace Metrics"
 	ComponentMetricsTag        = "Component Metrics"
+	CustomMetricsTag           = "Custom Metrics"
 	LogQueryTag                = "Log Query"
 	TerminalTag                = "Terminal"
 )
diff --git a/pkg/kapis/monitoring/v1alpha3/handler.go b/pkg/kapis/monitoring/v1alpha3/handler.go
index 9f7220876..8bf4916cc 100644
--- a/pkg/kapis/monitoring/v1alpha3/handler.go
+++ b/pkg/kapis/monitoring/v1alpha3/handler.go
@@ -192,3 +192,36 @@ func (h handler) handleNamedMetricsQuery(resp *restful.Response, q queryOptions)
 	}
 	resp.WriteAsJson(res)
 }
+
+func (h handler) handleMetadataQuery(req *restful.Request, resp *restful.Response) {
+	res := h.mo.GetMetadata(req.PathParameter("namespace"))
+	resp.WriteAsJson(res)
+}
+
+func (h handler) handleAdhocQuery(req *restful.Request, resp *restful.Response) {
+	var res monitoring.Metric
+
+	params := parseRequestParams(req)
+	opt, err := h.makeQueryOptions(params, 0)
+	if err != nil {
+		if err.Error() == ErrNoHit {
+			resp.WriteAsJson(res)
+			return
+		}
+
+		api.HandleBadRequest(resp, nil, err)
+		return
+	}
+
+	if opt.isRangeQuery() {
+		res, err = h.mo.GetMetricOverTime(params.expression, params.namespaceName, opt.start, opt.end, opt.step)
+	} else {
+		res, err = h.mo.GetMetric(params.expression, params.namespaceName, opt.time)
+	}
+
+	if err != nil {
+		api.HandleBadRequest(resp, nil, err)
+	} else {
+		resp.WriteAsJson(res)
+	}
+}
diff --git a/pkg/kapis/monitoring/v1alpha3/helper.go b/pkg/kapis/monitoring/v1alpha3/helper.go
index d5442b7e0..f4a3ad27e 100644
--- a/pkg/kapis/monitoring/v1alpha3/helper.go
+++ b/pkg/kapis/monitoring/v1alpha3/helper.go
@@ -49,6 +49,7 @@ type reqParams struct {
 	pvcName          string
 	storageClassName string
 	componentType    string
+	expression       string
 }
 
 type queryOptions struct {
@@ -99,6 +100,7 @@ func parseRequestParams(req *restful.Request) reqParams {
 	r.pvcName = req.PathParameter("pvc")
 	r.storageClassName = req.PathParameter("storageclass")
 	r.componentType = req.PathParameter("component")
+	r.expression = req.QueryParameter("expr")
 	return r
 }
 
diff --git a/pkg/kapis/monitoring/v1alpha3/register.go b/pkg/kapis/monitoring/v1alpha3/register.go
index 92110eeba..14e6239c8 100644
--- a/pkg/kapis/monitoring/v1alpha3/register.go
+++ b/pkg/kapis/monitoring/v1alpha3/register.go
@@ -400,6 +400,29 @@ func AddToContainer(c *restful.Container, k8sClient kubernetes.Interface, monito
 		Returns(http.StatusOK, RespOK, model.Metrics{})).
 		Produces(restful.MIME_JSON)
 
+	ws.Route(ws.GET("/namespaces/{namespace}/targets/metadata").
+		To(h.handleMetadataQuery).
+		Doc("Get metadata of metrics for the specific namespace.").
+		Param(ws.PathParameter("namespace", "The name of the namespace.").DataType("string").Required(true)).
+		Metadata(restfulspec.KeyOpenAPITags, []string{constants.CustomMetricsTag}).
+		Writes(model.Metadata{}).
+		Returns(http.StatusOK, RespOK, model.Metadata{})).
+		Produces(restful.MIME_JSON)
+
+	ws.Route(ws.GET("/namespaces/{namespace}/targets/query").
+		To(h.handleAdhocQuery).
+		Doc("Make an ad-hoc query in the specific namespace.").
+		Param(ws.PathParameter("namespace", "The name of the namespace.").DataType("string").Required(true)).
+		Param(ws.QueryParameter("expr", "The expression to be evaluated.").DataType("string").Required(false)).
+		Param(ws.QueryParameter("start", "Start time of query. Use **start** and **end** to retrieve metric data over a time span. It is a string with Unix time format, eg. 1559347200. ").DataType("string").Required(true)).
+		Param(ws.QueryParameter("end", "End time of query. Use **start** and **end** to retrieve metric data over a time span. It is a string with Unix time format, eg. 1561939200. ").DataType("string").Required(false)).
+		Param(ws.QueryParameter("step", "Time interval. Retrieve metric data at a fixed interval within the time range of start and end. It requires both **start** and **end** are provided. The format is [0-9]+[smhdwy]. Defaults to 10m (i.e. 10 min).").DataType("string").DefaultValue("10m").Required(false)).
+		Param(ws.QueryParameter("time", "A timestamp in Unix time format. Retrieve metric data at a single point in time. Defaults to now. Time and the combination of start, end, step are mutually exclusive.").DataType("string").Required(false)).
+		Metadata(restfulspec.KeyOpenAPITags, []string{constants.CustomMetricsTag}).
+		Writes(monitoring.Metric{}).
+		Returns(http.StatusOK, RespOK, monitoring.Metric{})).
+		Produces(restful.MIME_JSON)
+
 	c.Add(ws)
 	return nil
 }
diff --git a/pkg/models/monitoring/expressions/prometheus/label_replace.go b/pkg/models/monitoring/expressions/prometheus/label_replace.go
new file mode 100644
index 000000000..6e541598f
--- /dev/null
+++ b/pkg/models/monitoring/expressions/prometheus/label_replace.go
@@ -0,0 +1,99 @@
+package prometheus
+
+import (
+	"fmt"
+	"github.com/prometheus/common/model"
+	"github.com/prometheus/prometheus/promql"
+	"github.com/prometheus/prometheus/storage/metric"
+	"kubesphere.io/kubesphere/pkg/models/monitoring/expressions"
+)
+
+func init() {
+	expressions.Register("prometheus", labelReplace)
+}
+
+func labelReplace(input, ns string) (string, error) {
+	root, err := promql.ParseExpr(input)
+	if err != nil {
+		return "", err
+	}
+
+	SetRecursive(root, ns)
+	if err != nil {
+		return "", err
+	}
+
+	return root.String(), nil
+}
+
+// Inspired by https://github.com/openshift/prom-label-proxy
+func SetRecursive(node promql.Node, namespace string) (err error) {
+	switch n := node.(type) {
+	case *promql.EvalStmt:
+		if err := SetRecursive(n.Expr, namespace); err != nil {
+			return err
+		}
+	case promql.Expressions:
+		for _, e := range n {
+			if err := SetRecursive(e, namespace); err != nil {
+				return err
+			}
+		}
+	case *promql.AggregateExpr:
+		if err := SetRecursive(n.Expr, namespace); err != nil {
+			return err
+		}
+	case *promql.BinaryExpr:
+		if err := SetRecursive(n.LHS, namespace); err != nil {
+			return err
+		}
+		if err := SetRecursive(n.RHS, namespace); err != nil {
+			return err
+		}
+	case *promql.Call:
+		if err := SetRecursive(n.Args, namespace); err != nil {
+			return err
+		}
+	case *promql.ParenExpr:
+		if err := SetRecursive(n.Expr, namespace); err != nil {
+			return err
+		}
+	case *promql.UnaryExpr:
+		if err := SetRecursive(n.Expr, namespace); err != nil {
+			return err
+		}
+	case *promql.NumberLiteral, *promql.StringLiteral:
+		// nothing to do
+	case *promql.MatrixSelector:
+		n.LabelMatchers = enforceLabelMatchers(n.LabelMatchers, namespace)
+	case *promql.VectorSelector:
+		n.LabelMatchers = enforceLabelMatchers(n.LabelMatchers, namespace)
+	default:
+		return fmt.Errorf("promql.Walk: unhandled node type %T", node)
+	}
+	return err
+}
+
+func enforceLabelMatchers(matchers metric.LabelMatchers, namespace string) metric.LabelMatchers {
+	var found bool
+	for i, m := range matchers {
+		if m.Name == "namespace" {
+			matchers[i] = &metric.LabelMatcher{
+				Name:  "namespace",
+				Type:  metric.Equal,
+				Value: model.LabelValue(namespace),
+			}
+			found = true
+			break
+		}
+	}
+
+	if !found {
+		matchers = append(matchers, &metric.LabelMatcher{
+			Name:  "namespace",
+			Type:  metric.Equal,
+			Value: model.LabelValue(namespace),
+		})
+	}
+	return matchers
+}
diff --git a/pkg/models/monitoring/expressions/prometheus/label_replace_test.go b/pkg/models/monitoring/expressions/prometheus/label_replace_test.go
new file mode 100644
index 000000000..d265dc093
--- /dev/null
+++ b/pkg/models/monitoring/expressions/prometheus/label_replace_test.go
@@ -0,0 +1,51 @@
+package prometheus
+
+import (
+	"fmt"
+	"github.com/google/go-cmp/cmp"
+	"testing"
+)
+
+func TestLabelReplace(t *testing.T) {
+	tests := []struct {
+		expr        string
+		expected    string
+		expectedErr bool
+	}{
+		{
+			expr:        "up",
+			expected:    `up{namespace="default"}`,
+			expectedErr: false,
+		},
+		{
+			expr:        `up{namespace="random"}`,
+			expected:    `up{namespace="default"}`,
+			expectedErr: false,
+		},
+		{
+			expr:        `up{namespace="random"} + up{job="test"}`,
+			expected:    `up{namespace="default"} + up{job="test",namespace="default"}`,
+			expectedErr: false,
+		},
+		{
+			expr:        `@@@@`,
+			expectedErr: true,
+		},
+	}
+
+	for i, tt := range tests {
+		t.Run(fmt.Sprintf("%d", i), func(t *testing.T) {
+			result, err := labelReplace(tt.expr, "default")
+			if err != nil {
+				if !tt.expectedErr {
+					t.Fatal(err)
+				}
+				return
+			}
+
+			if diff := cmp.Diff(result, tt.expected); diff != "" {
+				t.Fatalf("%T differ (-got, +want): %s", tt.expected, diff)
+			}
+		})
+	}
+}
diff --git a/pkg/models/monitoring/expressions/registry.go b/pkg/models/monitoring/expressions/registry.go
new file mode 100644
index 000000000..12a606ad4
--- /dev/null
+++ b/pkg/models/monitoring/expressions/registry.go
@@ -0,0 +1,9 @@
+package expressions
+
+type labelReplaceFn func(expr, ns string) (string, error)
+
+var ReplaceNamespaceFns = make(map[string]labelReplaceFn)
+
+func Register(name string, fn labelReplaceFn) {
+	ReplaceNamespaceFns[name] = fn
+}
diff --git a/pkg/models/monitoring/monitoring.go b/pkg/models/monitoring/monitoring.go
index b7443f1bd..795678932 100644
--- a/pkg/models/monitoring/monitoring.go
+++ b/pkg/models/monitoring/monitoring.go
@@ -19,15 +19,17 @@
 package monitoring
 
 import (
+	"kubesphere.io/kubesphere/pkg/models/monitoring/expressions"
 	"kubesphere.io/kubesphere/pkg/simple/client/monitoring"
 	"time"
 )
 
 type MonitoringOperator interface {
-	GetMetrics(stmts []string, time time.Time) Metrics
-	GetMetricsOverTime(stmts []string, start, end time.Time, step time.Duration) Metrics
+	GetMetric(expr, namespace string, time time.Time) (monitoring.Metric, error)
+	GetMetricOverTime(expr, namespace string, start, end time.Time, step time.Duration) (monitoring.Metric, error)
 	GetNamedMetrics(metrics []string, time time.Time, opt monitoring.QueryOption) Metrics
 	GetNamedMetricsOverTime(metrics []string, start, end time.Time, step time.Duration, opt monitoring.QueryOption) Metrics
+	GetMetadata(namespace string) Metadata
 }
 
 type monitoringOperator struct {
@@ -38,14 +40,28 @@ func NewMonitoringOperator(client monitoring.Interface) MonitoringOperator {
 	return &monitoringOperator{client}
 }
 
-// TODO(huanggze): reserve for custom monitoring
-func (mo monitoringOperator) GetMetrics(stmts []string, time time.Time) Metrics {
-	panic("implement me")
+func (mo monitoringOperator) GetMetric(expr, namespace string, time time.Time) (monitoring.Metric, error) {
+	// Different monitoring backend implementations have different ways to enforce namespace isolation.
+	// Each implementation should register itself to `ReplaceNamespaceFns` during init().
+	// We hard code "prometheus" here because we only support this datasource so far.
+	// In the future, maybe the value should be returned from a method like `mo.c.GetMonitoringServiceName()`.
+	expr, err := expressions.ReplaceNamespaceFns["prometheus"](expr, namespace)
+	if err != nil {
+		return monitoring.Metric{}, err
+	}
+	return mo.c.GetMetric(expr, time), nil
 }
 
-// TODO(huanggze): reserve for custom monitoring
-func (mo monitoringOperator) GetMetricsOverTime(stmts []string, start, end time.Time, step time.Duration) Metrics {
-	panic("implement me")
+func (mo monitoringOperator) GetMetricOverTime(expr, namespace string, start, end time.Time, step time.Duration) (monitoring.Metric, error) {
+	// Different monitoring backend implementations have different ways to enforce namespace isolation.
+	// Each implementation should register itself to `ReplaceNamespaceFns` during init().
+	// We hard code "prometheus" here because we only support this datasource so far.
+	// In the future, maybe the value should be returned from a method like `mo.c.GetMonitoringServiceName()`.
+	expr, err := expressions.ReplaceNamespaceFns["prometheus"](expr, namespace)
+	if err != nil {
+		return monitoring.Metric{}, err
+	}
+	return mo.c.GetMetricOverTime(expr, start, end, step), nil
 }
 
 func (mo monitoringOperator) GetNamedMetrics(metrics []string, time time.Time, opt monitoring.QueryOption) Metrics {
@@ -57,3 +73,8 @@ func (mo monitoringOperator) GetNamedMetricsOverTime(metrics []string, start, en
 	ress := mo.c.GetNamedMetricsOverTime(metrics, start, end, step, opt)
 	return Metrics{Results: ress}
 }
+
+func (mo monitoringOperator) GetMetadata(namespace string) Metadata {
+	data := mo.c.GetMetadata(namespace)
+	return Metadata{Data: data}
+}
diff --git a/pkg/models/monitoring/types.go b/pkg/models/monitoring/types.go
index 22364cfc1..7e5c2c6c5 100644
--- a/pkg/models/monitoring/types.go
+++ b/pkg/models/monitoring/types.go
@@ -8,3 +8,7 @@ type Metrics struct {
 	TotalPages  int                 `json:"total_page,omitempty" description:"total number of pages"`
 	TotalItems  int                 `json:"total_item,omitempty" description:"page size"`
 }
+
+type Metadata struct {
+	Data []monitoring.Metadata `json:"data" description:"actual array of results"`
+}
diff --git a/pkg/simple/client/monitoring/interface.go b/pkg/simple/client/monitoring/interface.go
index 488161dfd..68a471317 100644
--- a/pkg/simple/client/monitoring/interface.go
+++ b/pkg/simple/client/monitoring/interface.go
@@ -3,8 +3,9 @@ package monitoring
 import "time"
 
 type Interface interface {
-	GetMetrics(exprs []string, time time.Time) []Metric
-	GetMetricsOverTime(exprs []string, start, end time.Time, step time.Duration) []Metric
+	GetMetric(expr string, time time.Time) Metric
+	GetMetricOverTime(expr string, start, end time.Time, step time.Duration) Metric
 	GetNamedMetrics(metrics []string, time time.Time, opt QueryOption) []Metric
 	GetNamedMetricsOverTime(metrics []string, start, end time.Time, step time.Duration, opt QueryOption) []Metric
+	GetMetadata(namespace string) []Metadata
 }
diff --git a/pkg/simple/client/monitoring/prometheus/prometheus.go b/pkg/simple/client/monitoring/prometheus/prometheus.go
index 0cb3bc8b3..a40571c13 100644
--- a/pkg/simple/client/monitoring/prometheus/prometheus.go
+++ b/pkg/simple/client/monitoring/prometheus/prometheus.go
@@ -2,6 +2,7 @@ package prometheus
 
 import (
 	"context"
+	"fmt"
 	"github.com/prometheus/client_golang/api"
 	apiv1 "github.com/prometheus/client_golang/api/prometheus/v1"
 	"github.com/prometheus/common/model"
@@ -24,14 +25,35 @@ func NewPrometheus(options *Options) (monitoring.Interface, error) {
 	return prometheus{client: apiv1.NewAPI(client)}, err
 }
 
-// TODO(huanggze): reserve for custom monitoring
-func (p prometheus) GetMetrics(stmts []string, time time.Time) []monitoring.Metric {
-	panic("implement me")
+func (p prometheus) GetMetric(expr string, ts time.Time) monitoring.Metric {
+	var parsedResp monitoring.Metric
+
+	value, err := p.client.Query(context.Background(), expr, ts)
+	if err != nil {
+		parsedResp.Error = err.Error()
+	} else {
+		parsedResp.MetricData = parseQueryResp(value)
+	}
+
+	return parsedResp
 }
 
-// TODO(huanggze): reserve for custom monitoring
-func (p prometheus) GetMetricsOverTime(stmts []string, start, end time.Time, step time.Duration) []monitoring.Metric {
-	panic("implement me")
+func (p prometheus) GetMetricOverTime(expr string, start, end time.Time, step time.Duration) monitoring.Metric {
+	timeRange := apiv1.Range{
+		Start: start,
+		End:   end,
+		Step:  step,
+	}
+
+	value, err := p.client.QueryRange(context.Background(), expr, timeRange)
+
+	var parsedResp monitoring.Metric
+	if err != nil {
+		parsedResp.Error = err.Error()
+	} else {
+		parsedResp.MetricData = parseQueryRangeResp(value)
+	}
+	return parsedResp
 }
 
 func (p prometheus) GetNamedMetrics(metrics []string, ts time.Time, o monitoring.QueryOption) []monitoring.Metric {
@@ -49,7 +71,7 @@ func (p prometheus) GetNamedMetrics(metrics []string, ts time.Time, o monitoring
 
 			value, err := p.client.Query(context.Background(), makeExpr(metric, *opts), ts)
 			if err != nil {
-				parsedResp.Error = err.(*apiv1.Error).Msg
+				parsedResp.Error = err.Error()
 			} else {
 				parsedResp.MetricData = parseQueryResp(value)
 			}
@@ -88,7 +110,7 @@ func (p prometheus) GetNamedMetricsOverTime(metrics []string, start, end time.Ti
 
 			value, err := p.client.QueryRange(context.Background(), makeExpr(metric, *opts), timeRange)
 			if err != nil {
-				parsedResp.Error = err.(*apiv1.Error).Msg
+				parsedResp.Error = err.Error()
 			} else {
 				parsedResp.MetricData = parseQueryRangeResp(value)
 			}
@@ -106,6 +128,26 @@ func (p prometheus) GetNamedMetricsOverTime(metrics []string, start, end time.Ti
 	return res
 }
 
+func (p prometheus) GetMetadata(namespace string) []monitoring.Metadata {
+	var meta []monitoring.Metadata
+
+	// Filter metrics available to members of this namespace
+	matchTarget := fmt.Sprintf("{namespace=\"%s\"}", namespace)
+	items, err := p.client.TargetsMetadata(context.Background(), matchTarget, "", "")
+	if err != nil {
+		return meta
+	}
+
+	for _, item := range items {
+		meta = append(meta, monitoring.Metadata{
+			Metric: item.Metric,
+			Type:   string(item.Type),
+			Help:   item.Help,
+		})
+	}
+	return meta
+}
+
 func parseQueryRangeResp(value model.Value) monitoring.MetricData {
 	res := monitoring.MetricData{MetricType: monitoring.MetricTypeMatrix}
 
diff --git a/pkg/simple/client/monitoring/prometheus/prometheus_test.go b/pkg/simple/client/monitoring/prometheus/prometheus_test.go
index 043175b3d..16c60ee0e 100644
--- a/pkg/simple/client/monitoring/prometheus/prometheus_test.go
+++ b/pkg/simple/client/monitoring/prometheus/prometheus_test.go
@@ -24,7 +24,8 @@ func TestGetNamedMetrics(t *testing.T) {
 
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
-			expected, err := jsonFromFile(tt.expected)
+			expected := make([]monitoring.Metric, 0)
+			err := jsonFromFile(tt.expected, &expected)
 			if err != nil {
 				t.Fatal(err)
 			}
@@ -53,7 +54,8 @@ func TestGetNamedMetricsOverTime(t *testing.T) {
 
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
-			expected, err := jsonFromFile(tt.expected)
+			expected := make([]monitoring.Metric, 0)
+			err := jsonFromFile(tt.expected, &expected)
 			if err != nil {
 				t.Fatal(err)
 			}
@@ -70,6 +72,44 @@ func TestGetNamedMetricsOverTime(t *testing.T) {
 	}
 }
 
+func TestGetMetadata(t *testing.T) {
+	tests := []struct {
+		fakeResp string
+		expected string
+	}{
+		{
+			fakeResp: "metadata-prom.json",
+			expected: "metadata-res.json",
+		},
+		{
+			fakeResp: "metadata-notfound-prom.json",
+			expected: "metadata-notfound-res.json",
+		},
+	}
+
+	for i, tt := range tests {
+		t.Run(fmt.Sprintf("%d", i), func(t *testing.T) {
+			expected := make([]monitoring.Metadata, 0)
+			err := jsonFromFile(tt.expected, &expected)
+			if err != nil {
+				t.Fatal(err)
+			}
+			if len(expected) == 0 {
+				expected = nil
+			}
+
+			srv := mockPrometheusService("/api/v1/targets/metadata", tt.fakeResp)
+			defer srv.Close()
+
+			client, _ := NewPrometheus(&Options{Endpoint: srv.URL})
+			result := client.GetMetadata("default")
+			if diff := cmp.Diff(result, expected); diff != "" {
+				t.Fatalf("%T differ (-got, +want): %s", expected, diff)
+			}
+		})
+	}
+}
+
 func mockPrometheusService(pattern, fakeResp string) *httptest.Server {
 	mux := http.NewServeMux()
 	mux.HandleFunc(pattern, func(res http.ResponseWriter, req *http.Request) {
@@ -79,17 +119,15 @@ func mockPrometheusService(pattern, fakeResp string) *httptest.Server {
 	return httptest.NewServer(mux)
 }
 
-func jsonFromFile(expectedFile string) ([]monitoring.Metric, error) {
-	expectedJson := []monitoring.Metric{}
-
+func jsonFromFile(expectedFile string, expectedJsonPtr interface{}) error {
 	json, err := ioutil.ReadFile(fmt.Sprintf("./testdata/%s", expectedFile))
 	if err != nil {
-		return expectedJson, err
+		return err
 	}
-	err = jsoniter.Unmarshal(json, &expectedJson)
+	err = jsoniter.Unmarshal(json, expectedJsonPtr)
 	if err != nil {
-		return expectedJson, err
+		return err
 	}
 
-	return expectedJson, nil
+	return nil
 }
diff --git a/pkg/simple/client/monitoring/prometheus/testdata/metadata-notfound-prom.json b/pkg/simple/client/monitoring/prometheus/testdata/metadata-notfound-prom.json
new file mode 100644
index 000000000..1ffb57de7
--- /dev/null
+++ b/pkg/simple/client/monitoring/prometheus/testdata/metadata-notfound-prom.json
@@ -0,0 +1,5 @@
+{
+  "status":"error",
+  "errorType":"not_found",
+  "error":"specified metadata not found"
+}
\ No newline at end of file
diff --git a/pkg/simple/client/monitoring/prometheus/testdata/metadata-notfound-res.json b/pkg/simple/client/monitoring/prometheus/testdata/metadata-notfound-res.json
new file mode 100644
index 000000000..0637a088a
--- /dev/null
+++ b/pkg/simple/client/monitoring/prometheus/testdata/metadata-notfound-res.json
@@ -0,0 +1 @@
+[]
\ No newline at end of file
diff --git a/pkg/simple/client/monitoring/prometheus/testdata/metadata-prom.json b/pkg/simple/client/monitoring/prometheus/testdata/metadata-prom.json
new file mode 100644
index 000000000..e9fca8153
--- /dev/null
+++ b/pkg/simple/client/monitoring/prometheus/testdata/metadata-prom.json
@@ -0,0 +1,25 @@
+{
+  "status": "success",
+  "data": [
+    {
+      "target": {
+        "instance": "127.0.0.1:9090",
+        "job": "prometheus"
+      },
+      "metric": "prometheus_treecache_zookeeper_failures_total",
+      "type": "counter",
+      "help": "The total number of ZooKeeper failures.",
+      "unit": ""
+    },
+    {
+      "target": {
+        "instance": "127.0.0.1:9090",
+        "job": "prometheus"
+      },
+      "metric": "prometheus_tsdb_reloads_total",
+      "type": "counter",
+      "help": "Number of times the database reloaded block data from disk.",
+      "unit": ""
+    }
+  ]
+}
\ No newline at end of file
diff --git a/pkg/simple/client/monitoring/prometheus/testdata/metadata-res.json b/pkg/simple/client/monitoring/prometheus/testdata/metadata-res.json
new file mode 100644
index 000000000..915a0646b
--- /dev/null
+++ b/pkg/simple/client/monitoring/prometheus/testdata/metadata-res.json
@@ -0,0 +1,12 @@
+[
+  {
+    "metric": "prometheus_treecache_zookeeper_failures_total",
+    "type": "counter",
+    "help": "The total number of ZooKeeper failures."
+  },
+  {
+    "metric": "prometheus_tsdb_reloads_total",
+    "type": "counter",
+    "help": "Number of times the database reloaded block data from disk."
+  }
+]
\ No newline at end of file
diff --git a/pkg/simple/client/monitoring/prometheus/testdata/metrics-error-res.json b/pkg/simple/client/monitoring/prometheus/testdata/metrics-error-res.json
index b580a35f2..5bd92c3a5 100644
--- a/pkg/simple/client/monitoring/prometheus/testdata/metrics-error-res.json
+++ b/pkg/simple/client/monitoring/prometheus/testdata/metrics-error-res.json
@@ -1,6 +1,6 @@
 [
   {
     "metric_name": "cluster_cpu_utilisation",
-    "error": "inconsistent body for response code"
+    "error": "bad_response: inconsistent body for response code"
   }
 ]
\ No newline at end of file
diff --git a/pkg/simple/client/monitoring/types.go b/pkg/simple/client/monitoring/types.go
index a9bc915de..baf732434 100644
--- a/pkg/simple/client/monitoring/types.go
+++ b/pkg/simple/client/monitoring/types.go
@@ -5,6 +5,12 @@ const (
 	MetricTypeVector = "vector"
 )
 
+type Metadata struct {
+	Metric string `json:"metric,omitempty" description:"metric name"`
+	Type   string `json:"type,omitempty" description:"metric type"`
+	Help   string `json:"help,omitempty" description:"metric description"`
+}
+
 type Metric struct {
 	MetricName string `json:"metric_name,omitempty" description:"metric name, eg. scheduler_up_sum"`
 	MetricData `json:"data,omitempty" description:"actual metric result"`
diff --git a/vendor/github.com/golang/snappy/.gitignore b/vendor/github.com/golang/snappy/.gitignore
new file mode 100644
index 000000000..042091d9b
--- /dev/null
+++ b/vendor/github.com/golang/snappy/.gitignore
@@ -0,0 +1,16 @@
+cmd/snappytool/snappytool
+testdata/bench
+
+# These explicitly listed benchmark data files are for an obsolete version of
+# snappy_test.go.
+testdata/alice29.txt
+testdata/asyoulik.txt
+testdata/fireworks.jpeg
+testdata/geo.protodata
+testdata/html
+testdata/html_x_4
+testdata/kppkn.gtb
+testdata/lcet10.txt
+testdata/paper-100k.pdf
+testdata/plrabn12.txt
+testdata/urls.10K
diff --git a/vendor/github.com/golang/snappy/AUTHORS b/vendor/github.com/golang/snappy/AUTHORS
new file mode 100644
index 000000000..bcfa19520
--- /dev/null
+++ b/vendor/github.com/golang/snappy/AUTHORS
@@ -0,0 +1,15 @@
+# This is the official list of Snappy-Go authors for copyright purposes.
+# This file is distinct from the CONTRIBUTORS files.
+# See the latter for an explanation.
+
+# Names should be added to this file as
+#	Name or Organization <email address>
+# The email address is not required for organizations.
+
+# Please keep the list sorted.
+
+Damian Gryski <dgryski@gmail.com>
+Google Inc.
+Jan Mercl <0xjnml@gmail.com>
+Rodolfo Carvalho <rhcarvalho@gmail.com>
+Sebastien Binet <seb.binet@gmail.com>
diff --git a/vendor/github.com/golang/snappy/CONTRIBUTORS b/vendor/github.com/golang/snappy/CONTRIBUTORS
new file mode 100644
index 000000000..931ae3160
--- /dev/null
+++ b/vendor/github.com/golang/snappy/CONTRIBUTORS
@@ -0,0 +1,37 @@
+# This is the official list of people who can contribute
+# (and typically have contributed) code to the Snappy-Go repository.
+# The AUTHORS file lists the copyright holders; this file
+# lists people.  For example, Google employees are listed here
+# but not in AUTHORS, because Google holds the copyright.
+#
+# The submission process automatically checks to make sure
+# that people submitting code are listed in this file (by email address).
+#
+# Names should be added to this file only after verifying that
+# the individual or the individual's organization has agreed to
+# the appropriate Contributor License Agreement, found here:
+#
+#     http://code.google.com/legal/individual-cla-v1.0.html
+#     http://code.google.com/legal/corporate-cla-v1.0.html
+#
+# The agreement for individuals can be filled out on the web.
+#
+# When adding J Random Contributor's name to this file,
+# either J's name or J's organization's name should be
+# added to the AUTHORS file, depending on whether the
+# individual or corporate CLA was used.
+
+# Names should be added to this file like so:
+#     Name <email address>
+
+# Please keep the list sorted.
+
+Damian Gryski <dgryski@gmail.com>
+Jan Mercl <0xjnml@gmail.com>
+Kai Backman <kaib@golang.org>
+Marc-Antoine Ruel <maruel@chromium.org>
+Nigel Tao <nigeltao@golang.org>
+Rob Pike <r@golang.org>
+Rodolfo Carvalho <rhcarvalho@gmail.com>
+Russ Cox <rsc@golang.org>
+Sebastien Binet <seb.binet@gmail.com>
diff --git a/vendor/github.com/golang/snappy/LICENSE b/vendor/github.com/golang/snappy/LICENSE
new file mode 100644
index 000000000..6050c10f4
--- /dev/null
+++ b/vendor/github.com/golang/snappy/LICENSE
@@ -0,0 +1,27 @@
+Copyright (c) 2011 The Snappy-Go Authors. All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+   * Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+   * Redistributions in binary form must reproduce the above
+copyright notice, this list of conditions and the following disclaimer
+in the documentation and/or other materials provided with the
+distribution.
+   * Neither the name of Google Inc. nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/vendor/github.com/golang/snappy/README b/vendor/github.com/golang/snappy/README
new file mode 100644
index 000000000..cea12879a
--- /dev/null
+++ b/vendor/github.com/golang/snappy/README
@@ -0,0 +1,107 @@
+The Snappy compression format in the Go programming language.
+
+To download and install from source:
+$ go get github.com/golang/snappy
+
+Unless otherwise noted, the Snappy-Go source files are distributed
+under the BSD-style license found in the LICENSE file.
+
+
+
+Benchmarks.
+
+The golang/snappy benchmarks include compressing (Z) and decompressing (U) ten
+or so files, the same set used by the C++ Snappy code (github.com/google/snappy
+and note the "google", not "golang"). On an "Intel(R) Core(TM) i7-3770 CPU @
+3.40GHz", Go's GOARCH=amd64 numbers as of 2016-05-29:
+
+"go test -test.bench=."
+
+_UFlat0-8         2.19GB/s ± 0%  html
+_UFlat1-8         1.41GB/s ± 0%  urls
+_UFlat2-8         23.5GB/s ± 2%  jpg
+_UFlat3-8         1.91GB/s ± 0%  jpg_200
+_UFlat4-8         14.0GB/s ± 1%  pdf
+_UFlat5-8         1.97GB/s ± 0%  html4
+_UFlat6-8          814MB/s ± 0%  txt1
+_UFlat7-8          785MB/s ± 0%  txt2
+_UFlat8-8          857MB/s ± 0%  txt3
+_UFlat9-8          719MB/s ± 1%  txt4
+_UFlat10-8        2.84GB/s ± 0%  pb
+_UFlat11-8        1.05GB/s ± 0%  gaviota
+
+_ZFlat0-8         1.04GB/s ± 0%  html
+_ZFlat1-8          534MB/s ± 0%  urls
+_ZFlat2-8         15.7GB/s ± 1%  jpg
+_ZFlat3-8          740MB/s ± 3%  jpg_200
+_ZFlat4-8         9.20GB/s ± 1%  pdf
+_ZFlat5-8          991MB/s ± 0%  html4
+_ZFlat6-8          379MB/s ± 0%  txt1
+_ZFlat7-8          352MB/s ± 0%  txt2
+_ZFlat8-8          396MB/s ± 1%  txt3
+_ZFlat9-8          327MB/s ± 1%  txt4
+_ZFlat10-8        1.33GB/s ± 1%  pb
+_ZFlat11-8         605MB/s ± 1%  gaviota
+
+
+
+"go test -test.bench=. -tags=noasm"
+
+_UFlat0-8          621MB/s ± 2%  html
+_UFlat1-8          494MB/s ± 1%  urls
+_UFlat2-8         23.2GB/s ± 1%  jpg
+_UFlat3-8         1.12GB/s ± 1%  jpg_200
+_UFlat4-8         4.35GB/s ± 1%  pdf
+_UFlat5-8          609MB/s ± 0%  html4
+_UFlat6-8          296MB/s ± 0%  txt1
+_UFlat7-8          288MB/s ± 0%  txt2
+_UFlat8-8          309MB/s ± 1%  txt3
+_UFlat9-8          280MB/s ± 1%  txt4
+_UFlat10-8         753MB/s ± 0%  pb
+_UFlat11-8         400MB/s ± 0%  gaviota
+
+_ZFlat0-8          409MB/s ± 1%  html
+_ZFlat1-8          250MB/s ± 1%  urls
+_ZFlat2-8         12.3GB/s ± 1%  jpg
+_ZFlat3-8          132MB/s ± 0%  jpg_200
+_ZFlat4-8         2.92GB/s ± 0%  pdf
+_ZFlat5-8          405MB/s ± 1%  html4
+_ZFlat6-8          179MB/s ± 1%  txt1
+_ZFlat7-8          170MB/s ± 1%  txt2
+_ZFlat8-8          189MB/s ± 1%  txt3
+_ZFlat9-8          164MB/s ± 1%  txt4
+_ZFlat10-8         479MB/s ± 1%  pb
+_ZFlat11-8         270MB/s ± 1%  gaviota
+
+
+
+For comparison (Go's encoded output is byte-for-byte identical to C++'s), here
+are the numbers from C++ Snappy's
+
+make CXXFLAGS="-O2 -DNDEBUG -g" clean snappy_unittest.log && cat snappy_unittest.log
+
+BM_UFlat/0     2.4GB/s  html
+BM_UFlat/1     1.4GB/s  urls
+BM_UFlat/2    21.8GB/s  jpg
+BM_UFlat/3     1.5GB/s  jpg_200
+BM_UFlat/4    13.3GB/s  pdf
+BM_UFlat/5     2.1GB/s  html4
+BM_UFlat/6     1.0GB/s  txt1
+BM_UFlat/7   959.4MB/s  txt2
+BM_UFlat/8     1.0GB/s  txt3
+BM_UFlat/9   864.5MB/s  txt4
+BM_UFlat/10    2.9GB/s  pb
+BM_UFlat/11    1.2GB/s  gaviota
+
+BM_ZFlat/0   944.3MB/s  html (22.31 %)
+BM_ZFlat/1   501.6MB/s  urls (47.78 %)
+BM_ZFlat/2    14.3GB/s  jpg (99.95 %)
+BM_ZFlat/3   538.3MB/s  jpg_200 (73.00 %)
+BM_ZFlat/4     8.3GB/s  pdf (83.30 %)
+BM_ZFlat/5   903.5MB/s  html4 (22.52 %)
+BM_ZFlat/6   336.0MB/s  txt1 (57.88 %)
+BM_ZFlat/7   312.3MB/s  txt2 (61.91 %)
+BM_ZFlat/8   353.1MB/s  txt3 (54.99 %)
+BM_ZFlat/9   289.9MB/s  txt4 (66.26 %)
+BM_ZFlat/10    1.2GB/s  pb (19.68 %)
+BM_ZFlat/11  527.4MB/s  gaviota (37.72 %)
diff --git a/vendor/github.com/golang/snappy/decode.go b/vendor/github.com/golang/snappy/decode.go
new file mode 100644
index 000000000..72efb0353
--- /dev/null
+++ b/vendor/github.com/golang/snappy/decode.go
@@ -0,0 +1,237 @@
+// Copyright 2011 The Snappy-Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package snappy
+
+import (
+	"encoding/binary"
+	"errors"
+	"io"
+)
+
+var (
+	// ErrCorrupt reports that the input is invalid.
+	ErrCorrupt = errors.New("snappy: corrupt input")
+	// ErrTooLarge reports that the uncompressed length is too large.
+	ErrTooLarge = errors.New("snappy: decoded block is too large")
+	// ErrUnsupported reports that the input isn't supported.
+	ErrUnsupported = errors.New("snappy: unsupported input")
+
+	errUnsupportedLiteralLength = errors.New("snappy: unsupported literal length")
+)
+
+// DecodedLen returns the length of the decoded block.
+func DecodedLen(src []byte) (int, error) {
+	v, _, err := decodedLen(src)
+	return v, err
+}
+
+// decodedLen returns the length of the decoded block and the number of bytes
+// that the length header occupied.
+func decodedLen(src []byte) (blockLen, headerLen int, err error) {
+	v, n := binary.Uvarint(src)
+	if n <= 0 || v > 0xffffffff {
+		return 0, 0, ErrCorrupt
+	}
+
+	const wordSize = 32 << (^uint(0) >> 32 & 1)
+	if wordSize == 32 && v > 0x7fffffff {
+		return 0, 0, ErrTooLarge
+	}
+	return int(v), n, nil
+}
+
+const (
+	decodeErrCodeCorrupt                  = 1
+	decodeErrCodeUnsupportedLiteralLength = 2
+)
+
+// Decode returns the decoded form of src. The returned slice may be a sub-
+// slice of dst if dst was large enough to hold the entire decoded block.
+// Otherwise, a newly allocated slice will be returned.
+//
+// The dst and src must not overlap. It is valid to pass a nil dst.
+func Decode(dst, src []byte) ([]byte, error) {
+	dLen, s, err := decodedLen(src)
+	if err != nil {
+		return nil, err
+	}
+	if dLen <= len(dst) {
+		dst = dst[:dLen]
+	} else {
+		dst = make([]byte, dLen)
+	}
+	switch decode(dst, src[s:]) {
+	case 0:
+		return dst, nil
+	case decodeErrCodeUnsupportedLiteralLength:
+		return nil, errUnsupportedLiteralLength
+	}
+	return nil, ErrCorrupt
+}
+
+// NewReader returns a new Reader that decompresses from r, using the framing
+// format described at
+// https://github.com/google/snappy/blob/master/framing_format.txt
+func NewReader(r io.Reader) *Reader {
+	return &Reader{
+		r:       r,
+		decoded: make([]byte, maxBlockSize),
+		buf:     make([]byte, maxEncodedLenOfMaxBlockSize+checksumSize),
+	}
+}
+
+// Reader is an io.Reader that can read Snappy-compressed bytes.
+type Reader struct {
+	r       io.Reader
+	err     error
+	decoded []byte
+	buf     []byte
+	// decoded[i:j] contains decoded bytes that have not yet been passed on.
+	i, j       int
+	readHeader bool
+}
+
+// Reset discards any buffered data, resets all state, and switches the Snappy
+// reader to read from r. This permits reusing a Reader rather than allocating
+// a new one.
+func (r *Reader) Reset(reader io.Reader) {
+	r.r = reader
+	r.err = nil
+	r.i = 0
+	r.j = 0
+	r.readHeader = false
+}
+
+func (r *Reader) readFull(p []byte, allowEOF bool) (ok bool) {
+	if _, r.err = io.ReadFull(r.r, p); r.err != nil {
+		if r.err == io.ErrUnexpectedEOF || (r.err == io.EOF && !allowEOF) {
+			r.err = ErrCorrupt
+		}
+		return false
+	}
+	return true
+}
+
+// Read satisfies the io.Reader interface.
+func (r *Reader) Read(p []byte) (int, error) {
+	if r.err != nil {
+		return 0, r.err
+	}
+	for {
+		if r.i < r.j {
+			n := copy(p, r.decoded[r.i:r.j])
+			r.i += n
+			return n, nil
+		}
+		if !r.readFull(r.buf[:4], true) {
+			return 0, r.err
+		}
+		chunkType := r.buf[0]
+		if !r.readHeader {
+			if chunkType != chunkTypeStreamIdentifier {
+				r.err = ErrCorrupt
+				return 0, r.err
+			}
+			r.readHeader = true
+		}
+		chunkLen := int(r.buf[1]) | int(r.buf[2])<<8 | int(r.buf[3])<<16
+		if chunkLen > len(r.buf) {
+			r.err = ErrUnsupported
+			return 0, r.err
+		}
+
+		// The chunk types are specified at
+		// https://github.com/google/snappy/blob/master/framing_format.txt
+		switch chunkType {
+		case chunkTypeCompressedData:
+			// Section 4.2. Compressed data (chunk type 0x00).
+			if chunkLen < checksumSize {
+				r.err = ErrCorrupt
+				return 0, r.err
+			}
+			buf := r.buf[:chunkLen]
+			if !r.readFull(buf, false) {
+				return 0, r.err
+			}
+			checksum := uint32(buf[0]) | uint32(buf[1])<<8 | uint32(buf[2])<<16 | uint32(buf[3])<<24
+			buf = buf[checksumSize:]
+
+			n, err := DecodedLen(buf)
+			if err != nil {
+				r.err = err
+				return 0, r.err
+			}
+			if n > len(r.decoded) {
+				r.err = ErrCorrupt
+				return 0, r.err
+			}
+			if _, err := Decode(r.decoded, buf); err != nil {
+				r.err = err
+				return 0, r.err
+			}
+			if crc(r.decoded[:n]) != checksum {
+				r.err = ErrCorrupt
+				return 0, r.err
+			}
+			r.i, r.j = 0, n
+			continue
+
+		case chunkTypeUncompressedData:
+			// Section 4.3. Uncompressed data (chunk type 0x01).
+			if chunkLen < checksumSize {
+				r.err = ErrCorrupt
+				return 0, r.err
+			}
+			buf := r.buf[:checksumSize]
+			if !r.readFull(buf, false) {
+				return 0, r.err
+			}
+			checksum := uint32(buf[0]) | uint32(buf[1])<<8 | uint32(buf[2])<<16 | uint32(buf[3])<<24
+			// Read directly into r.decoded instead of via r.buf.
+			n := chunkLen - checksumSize
+			if n > len(r.decoded) {
+				r.err = ErrCorrupt
+				return 0, r.err
+			}
+			if !r.readFull(r.decoded[:n], false) {
+				return 0, r.err
+			}
+			if crc(r.decoded[:n]) != checksum {
+				r.err = ErrCorrupt
+				return 0, r.err
+			}
+			r.i, r.j = 0, n
+			continue
+
+		case chunkTypeStreamIdentifier:
+			// Section 4.1. Stream identifier (chunk type 0xff).
+			if chunkLen != len(magicBody) {
+				r.err = ErrCorrupt
+				return 0, r.err
+			}
+			if !r.readFull(r.buf[:len(magicBody)], false) {
+				return 0, r.err
+			}
+			for i := 0; i < len(magicBody); i++ {
+				if r.buf[i] != magicBody[i] {
+					r.err = ErrCorrupt
+					return 0, r.err
+				}
+			}
+			continue
+		}
+
+		if chunkType <= 0x7f {
+			// Section 4.5. Reserved unskippable chunks (chunk types 0x02-0x7f).
+			r.err = ErrUnsupported
+			return 0, r.err
+		}
+		// Section 4.4 Padding (chunk type 0xfe).
+		// Section 4.6. Reserved skippable chunks (chunk types 0x80-0xfd).
+		if !r.readFull(r.buf[:chunkLen], false) {
+			return 0, r.err
+		}
+	}
+}
diff --git a/vendor/github.com/golang/snappy/decode_amd64.go b/vendor/github.com/golang/snappy/decode_amd64.go
new file mode 100644
index 000000000..fcd192b84
--- /dev/null
+++ b/vendor/github.com/golang/snappy/decode_amd64.go
@@ -0,0 +1,14 @@
+// Copyright 2016 The Snappy-Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build !appengine
+// +build gc
+// +build !noasm
+
+package snappy
+
+// decode has the same semantics as in decode_other.go.
+//
+//go:noescape
+func decode(dst, src []byte) int
diff --git a/vendor/github.com/golang/snappy/decode_amd64.s b/vendor/github.com/golang/snappy/decode_amd64.s
new file mode 100644
index 000000000..e6179f65e
--- /dev/null
+++ b/vendor/github.com/golang/snappy/decode_amd64.s
@@ -0,0 +1,490 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build !appengine
+// +build gc
+// +build !noasm
+
+#include "textflag.h"
+
+// The asm code generally follows the pure Go code in decode_other.go, except
+// where marked with a "!!!".
+
+// func decode(dst, src []byte) int
+//
+// All local variables fit into registers. The non-zero stack size is only to
+// spill registers and push args when issuing a CALL. The register allocation:
+//	- AX	scratch
+//	- BX	scratch
+//	- CX	length or x
+//	- DX	offset
+//	- SI	&src[s]
+//	- DI	&dst[d]
+//	+ R8	dst_base
+//	+ R9	dst_len
+//	+ R10	dst_base + dst_len
+//	+ R11	src_base
+//	+ R12	src_len
+//	+ R13	src_base + src_len
+//	- R14	used by doCopy
+//	- R15	used by doCopy
+//
+// The registers R8-R13 (marked with a "+") are set at the start of the
+// function, and after a CALL returns, and are not otherwise modified.
+//
+// The d variable is implicitly DI - R8,  and len(dst)-d is R10 - DI.
+// The s variable is implicitly SI - R11, and len(src)-s is R13 - SI.
+TEXT ·decode(SB), NOSPLIT, $48-56
+	// Initialize SI, DI and R8-R13.
+	MOVQ dst_base+0(FP), R8
+	MOVQ dst_len+8(FP), R9
+	MOVQ R8, DI
+	MOVQ R8, R10
+	ADDQ R9, R10
+	MOVQ src_base+24(FP), R11
+	MOVQ src_len+32(FP), R12
+	MOVQ R11, SI
+	MOVQ R11, R13
+	ADDQ R12, R13
+
+loop:
+	// for s < len(src)
+	CMPQ SI, R13
+	JEQ  end
+
+	// CX = uint32(src[s])
+	//
+	// switch src[s] & 0x03
+	MOVBLZX (SI), CX
+	MOVL    CX, BX
+	ANDL    $3, BX
+	CMPL    BX, $1
+	JAE     tagCopy
+
+	// ----------------------------------------
+	// The code below handles literal tags.
+
+	// case tagLiteral:
+	// x := uint32(src[s] >> 2)
+	// switch
+	SHRL $2, CX
+	CMPL CX, $60
+	JAE  tagLit60Plus
+
+	// case x < 60:
+	// s++
+	INCQ SI
+
+doLit:
+	// This is the end of the inner "switch", when we have a literal tag.
+	//
+	// We assume that CX == x and x fits in a uint32, where x is the variable
+	// used in the pure Go decode_other.go code.
+
+	// length = int(x) + 1
+	//
+	// Unlike the pure Go code, we don't need to check if length <= 0 because
+	// CX can hold 64 bits, so the increment cannot overflow.
+	INCQ CX
+
+	// Prepare to check if copying length bytes will run past the end of dst or
+	// src.
+	//
+	// AX = len(dst) - d
+	// BX = len(src) - s
+	MOVQ R10, AX
+	SUBQ DI, AX
+	MOVQ R13, BX
+	SUBQ SI, BX
+
+	// !!! Try a faster technique for short (16 or fewer bytes) copies.
+	//
+	// if length > 16 || len(dst)-d < 16 || len(src)-s < 16 {
+	//   goto callMemmove // Fall back on calling runtime·memmove.
+	// }
+	//
+	// The C++ snappy code calls this TryFastAppend. It also checks len(src)-s
+	// against 21 instead of 16, because it cannot assume that all of its input
+	// is contiguous in memory and so it needs to leave enough source bytes to
+	// read the next tag without refilling buffers, but Go's Decode assumes
+	// contiguousness (the src argument is a []byte).
+	CMPQ CX, $16
+	JGT  callMemmove
+	CMPQ AX, $16
+	JLT  callMemmove
+	CMPQ BX, $16
+	JLT  callMemmove
+
+	// !!! Implement the copy from src to dst as a 16-byte load and store.
+	// (Decode's documentation says that dst and src must not overlap.)
+	//
+	// This always copies 16 bytes, instead of only length bytes, but that's
+	// OK. If the input is a valid Snappy encoding then subsequent iterations
+	// will fix up the overrun. Otherwise, Decode returns a nil []byte (and a
+	// non-nil error), so the overrun will be ignored.
+	//
+	// Note that on amd64, it is legal and cheap to issue unaligned 8-byte or
+	// 16-byte loads and stores. This technique probably wouldn't be as
+	// effective on architectures that are fussier about alignment.
+	MOVOU 0(SI), X0
+	MOVOU X0, 0(DI)
+
+	// d += length
+	// s += length
+	ADDQ CX, DI
+	ADDQ CX, SI
+	JMP  loop
+
+callMemmove:
+	// if length > len(dst)-d || length > len(src)-s { etc }
+	CMPQ CX, AX
+	JGT  errCorrupt
+	CMPQ CX, BX
+	JGT  errCorrupt
+
+	// copy(dst[d:], src[s:s+length])
+	//
+	// This means calling runtime·memmove(&dst[d], &src[s], length), so we push
+	// DI, SI and CX as arguments. Coincidentally, we also need to spill those
+	// three registers to the stack, to save local variables across the CALL.
+	MOVQ DI, 0(SP)
+	MOVQ SI, 8(SP)
+	MOVQ CX, 16(SP)
+	MOVQ DI, 24(SP)
+	MOVQ SI, 32(SP)
+	MOVQ CX, 40(SP)
+	CALL runtime·memmove(SB)
+
+	// Restore local variables: unspill registers from the stack and
+	// re-calculate R8-R13.
+	MOVQ 24(SP), DI
+	MOVQ 32(SP), SI
+	MOVQ 40(SP), CX
+	MOVQ dst_base+0(FP), R8
+	MOVQ dst_len+8(FP), R9
+	MOVQ R8, R10
+	ADDQ R9, R10
+	MOVQ src_base+24(FP), R11
+	MOVQ src_len+32(FP), R12
+	MOVQ R11, R13
+	ADDQ R12, R13
+
+	// d += length
+	// s += length
+	ADDQ CX, DI
+	ADDQ CX, SI
+	JMP  loop
+
+tagLit60Plus:
+	// !!! This fragment does the
+	//
+	// s += x - 58; if uint(s) > uint(len(src)) { etc }
+	//
+	// checks. In the asm version, we code it once instead of once per switch case.
+	ADDQ CX, SI
+	SUBQ $58, SI
+	MOVQ SI, BX
+	SUBQ R11, BX
+	CMPQ BX, R12
+	JA   errCorrupt
+
+	// case x == 60:
+	CMPL CX, $61
+	JEQ  tagLit61
+	JA   tagLit62Plus
+
+	// x = uint32(src[s-1])
+	MOVBLZX -1(SI), CX
+	JMP     doLit
+
+tagLit61:
+	// case x == 61:
+	// x = uint32(src[s-2]) | uint32(src[s-1])<<8
+	MOVWLZX -2(SI), CX
+	JMP     doLit
+
+tagLit62Plus:
+	CMPL CX, $62
+	JA   tagLit63
+
+	// case x == 62:
+	// x = uint32(src[s-3]) | uint32(src[s-2])<<8 | uint32(src[s-1])<<16
+	MOVWLZX -3(SI), CX
+	MOVBLZX -1(SI), BX
+	SHLL    $16, BX
+	ORL     BX, CX
+	JMP     doLit
+
+tagLit63:
+	// case x == 63:
+	// x = uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24
+	MOVL -4(SI), CX
+	JMP  doLit
+
+// The code above handles literal tags.
+// ----------------------------------------
+// The code below handles copy tags.
+
+tagCopy4:
+	// case tagCopy4:
+	// s += 5
+	ADDQ $5, SI
+
+	// if uint(s) > uint(len(src)) { etc }
+	MOVQ SI, BX
+	SUBQ R11, BX
+	CMPQ BX, R12
+	JA   errCorrupt
+
+	// length = 1 + int(src[s-5])>>2
+	SHRQ $2, CX
+	INCQ CX
+
+	// offset = int(uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24)
+	MOVLQZX -4(SI), DX
+	JMP     doCopy
+
+tagCopy2:
+	// case tagCopy2:
+	// s += 3
+	ADDQ $3, SI
+
+	// if uint(s) > uint(len(src)) { etc }
+	MOVQ SI, BX
+	SUBQ R11, BX
+	CMPQ BX, R12
+	JA   errCorrupt
+
+	// length = 1 + int(src[s-3])>>2
+	SHRQ $2, CX
+	INCQ CX
+
+	// offset = int(uint32(src[s-2]) | uint32(src[s-1])<<8)
+	MOVWQZX -2(SI), DX
+	JMP     doCopy
+
+tagCopy:
+	// We have a copy tag. We assume that:
+	//	- BX == src[s] & 0x03
+	//	- CX == src[s]
+	CMPQ BX, $2
+	JEQ  tagCopy2
+	JA   tagCopy4
+
+	// case tagCopy1:
+	// s += 2
+	ADDQ $2, SI
+
+	// if uint(s) > uint(len(src)) { etc }
+	MOVQ SI, BX
+	SUBQ R11, BX
+	CMPQ BX, R12
+	JA   errCorrupt
+
+	// offset = int(uint32(src[s-2])&0xe0<<3 | uint32(src[s-1]))
+	MOVQ    CX, DX
+	ANDQ    $0xe0, DX
+	SHLQ    $3, DX
+	MOVBQZX -1(SI), BX
+	ORQ     BX, DX
+
+	// length = 4 + int(src[s-2])>>2&0x7
+	SHRQ $2, CX
+	ANDQ $7, CX
+	ADDQ $4, CX
+
+doCopy:
+	// This is the end of the outer "switch", when we have a copy tag.
+	//
+	// We assume that:
+	//	- CX == length && CX > 0
+	//	- DX == offset
+
+	// if offset <= 0 { etc }
+	CMPQ DX, $0
+	JLE  errCorrupt
+
+	// if d < offset { etc }
+	MOVQ DI, BX
+	SUBQ R8, BX
+	CMPQ BX, DX
+	JLT  errCorrupt
+
+	// if length > len(dst)-d { etc }
+	MOVQ R10, BX
+	SUBQ DI, BX
+	CMPQ CX, BX
+	JGT  errCorrupt
+
+	// forwardCopy(dst[d:d+length], dst[d-offset:]); d += length
+	//
+	// Set:
+	//	- R14 = len(dst)-d
+	//	- R15 = &dst[d-offset]
+	MOVQ R10, R14
+	SUBQ DI, R14
+	MOVQ DI, R15
+	SUBQ DX, R15
+
+	// !!! Try a faster technique for short (16 or fewer bytes) forward copies.
+	//
+	// First, try using two 8-byte load/stores, similar to the doLit technique
+	// above. Even if dst[d:d+length] and dst[d-offset:] can overlap, this is
+	// still OK if offset >= 8. Note that this has to be two 8-byte load/stores
+	// and not one 16-byte load/store, and the first store has to be before the
+	// second load, due to the overlap if offset is in the range [8, 16).
+	//
+	// if length > 16 || offset < 8 || len(dst)-d < 16 {
+	//   goto slowForwardCopy
+	// }
+	// copy 16 bytes
+	// d += length
+	CMPQ CX, $16
+	JGT  slowForwardCopy
+	CMPQ DX, $8
+	JLT  slowForwardCopy
+	CMPQ R14, $16
+	JLT  slowForwardCopy
+	MOVQ 0(R15), AX
+	MOVQ AX, 0(DI)
+	MOVQ 8(R15), BX
+	MOVQ BX, 8(DI)
+	ADDQ CX, DI
+	JMP  loop
+
+slowForwardCopy:
+	// !!! If the forward copy is longer than 16 bytes, or if offset < 8, we
+	// can still try 8-byte load stores, provided we can overrun up to 10 extra
+	// bytes. As above, the overrun will be fixed up by subsequent iterations
+	// of the outermost loop.
+	//
+	// The C++ snappy code calls this technique IncrementalCopyFastPath. Its
+	// commentary says:
+	//
+	// ----
+	//
+	// The main part of this loop is a simple copy of eight bytes at a time
+	// until we've copied (at least) the requested amount of bytes.  However,
+	// if d and d-offset are less than eight bytes apart (indicating a
+	// repeating pattern of length < 8), we first need to expand the pattern in
+	// order to get the correct results. For instance, if the buffer looks like
+	// this, with the eight-byte <d-offset> and <d> patterns marked as
+	// intervals:
+	//
+	//    abxxxxxxxxxxxx
+	//    [------]           d-offset
+	//      [------]         d
+	//
+	// a single eight-byte copy from <d-offset> to <d> will repeat the pattern
+	// once, after which we can move <d> two bytes without moving <d-offset>:
+	//
+	//    ababxxxxxxxxxx
+	//    [------]           d-offset
+	//        [------]       d
+	//
+	// and repeat the exercise until the two no longer overlap.
+	//
+	// This allows us to do very well in the special case of one single byte
+	// repeated many times, without taking a big hit for more general cases.
+	//
+	// The worst case of extra writing past the end of the match occurs when
+	// offset == 1 and length == 1; the last copy will read from byte positions
+	// [0..7] and write to [4..11], whereas it was only supposed to write to
+	// position 1. Thus, ten excess bytes.
+	//
+	// ----
+	//
+	// That "10 byte overrun" worst case is confirmed by Go's
+	// TestSlowForwardCopyOverrun, which also tests the fixUpSlowForwardCopy
+	// and finishSlowForwardCopy algorithm.
+	//
+	// if length > len(dst)-d-10 {
+	//   goto verySlowForwardCopy
+	// }
+	SUBQ $10, R14
+	CMPQ CX, R14
+	JGT  verySlowForwardCopy
+
+makeOffsetAtLeast8:
+	// !!! As above, expand the pattern so that offset >= 8 and we can use
+	// 8-byte load/stores.
+	//
+	// for offset < 8 {
+	//   copy 8 bytes from dst[d-offset:] to dst[d:]
+	//   length -= offset
+	//   d      += offset
+	//   offset += offset
+	//   // The two previous lines together means that d-offset, and therefore
+	//   // R15, is unchanged.
+	// }
+	CMPQ DX, $8
+	JGE  fixUpSlowForwardCopy
+	MOVQ (R15), BX
+	MOVQ BX, (DI)
+	SUBQ DX, CX
+	ADDQ DX, DI
+	ADDQ DX, DX
+	JMP  makeOffsetAtLeast8
+
+fixUpSlowForwardCopy:
+	// !!! Add length (which might be negative now) to d (implied by DI being
+	// &dst[d]) so that d ends up at the right place when we jump back to the
+	// top of the loop. Before we do that, though, we save DI to AX so that, if
+	// length is positive, copying the remaining length bytes will write to the
+	// right place.
+	MOVQ DI, AX
+	ADDQ CX, DI
+
+finishSlowForwardCopy:
+	// !!! Repeat 8-byte load/stores until length <= 0. Ending with a negative
+	// length means that we overrun, but as above, that will be fixed up by
+	// subsequent iterations of the outermost loop.
+	CMPQ CX, $0
+	JLE  loop
+	MOVQ (R15), BX
+	MOVQ BX, (AX)
+	ADDQ $8, R15
+	ADDQ $8, AX
+	SUBQ $8, CX
+	JMP  finishSlowForwardCopy
+
+verySlowForwardCopy:
+	// verySlowForwardCopy is a simple implementation of forward copy. In C
+	// parlance, this is a do/while loop instead of a while loop, since we know
+	// that length > 0. In Go syntax:
+	//
+	// for {
+	//   dst[d] = dst[d - offset]
+	//   d++
+	//   length--
+	//   if length == 0 {
+	//     break
+	//   }
+	// }
+	MOVB (R15), BX
+	MOVB BX, (DI)
+	INCQ R15
+	INCQ DI
+	DECQ CX
+	JNZ  verySlowForwardCopy
+	JMP  loop
+
+// The code above handles copy tags.
+// ----------------------------------------
+
+end:
+	// This is the end of the "for s < len(src)".
+	//
+	// if d != len(dst) { etc }
+	CMPQ DI, R10
+	JNE  errCorrupt
+
+	// return 0
+	MOVQ $0, ret+48(FP)
+	RET
+
+errCorrupt:
+	// return decodeErrCodeCorrupt
+	MOVQ $1, ret+48(FP)
+	RET
diff --git a/vendor/github.com/golang/snappy/decode_other.go b/vendor/github.com/golang/snappy/decode_other.go
new file mode 100644
index 000000000..8c9f2049b
--- /dev/null
+++ b/vendor/github.com/golang/snappy/decode_other.go
@@ -0,0 +1,101 @@
+// Copyright 2016 The Snappy-Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build !amd64 appengine !gc noasm
+
+package snappy
+
+// decode writes the decoding of src to dst. It assumes that the varint-encoded
+// length of the decompressed bytes has already been read, and that len(dst)
+// equals that length.
+//
+// It returns 0 on success or a decodeErrCodeXxx error code on failure.
+func decode(dst, src []byte) int {
+	var d, s, offset, length int
+	for s < len(src) {
+		switch src[s] & 0x03 {
+		case tagLiteral:
+			x := uint32(src[s] >> 2)
+			switch {
+			case x < 60:
+				s++
+			case x == 60:
+				s += 2
+				if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
+					return decodeErrCodeCorrupt
+				}
+				x = uint32(src[s-1])
+			case x == 61:
+				s += 3
+				if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
+					return decodeErrCodeCorrupt
+				}
+				x = uint32(src[s-2]) | uint32(src[s-1])<<8
+			case x == 62:
+				s += 4
+				if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
+					return decodeErrCodeCorrupt
+				}
+				x = uint32(src[s-3]) | uint32(src[s-2])<<8 | uint32(src[s-1])<<16
+			case x == 63:
+				s += 5
+				if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
+					return decodeErrCodeCorrupt
+				}
+				x = uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24
+			}
+			length = int(x) + 1
+			if length <= 0 {
+				return decodeErrCodeUnsupportedLiteralLength
+			}
+			if length > len(dst)-d || length > len(src)-s {
+				return decodeErrCodeCorrupt
+			}
+			copy(dst[d:], src[s:s+length])
+			d += length
+			s += length
+			continue
+
+		case tagCopy1:
+			s += 2
+			if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
+				return decodeErrCodeCorrupt
+			}
+			length = 4 + int(src[s-2])>>2&0x7
+			offset = int(uint32(src[s-2])&0xe0<<3 | uint32(src[s-1]))
+
+		case tagCopy2:
+			s += 3
+			if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
+				return decodeErrCodeCorrupt
+			}
+			length = 1 + int(src[s-3])>>2
+			offset = int(uint32(src[s-2]) | uint32(src[s-1])<<8)
+
+		case tagCopy4:
+			s += 5
+			if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
+				return decodeErrCodeCorrupt
+			}
+			length = 1 + int(src[s-5])>>2
+			offset = int(uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24)
+		}
+
+		if offset <= 0 || d < offset || length > len(dst)-d {
+			return decodeErrCodeCorrupt
+		}
+		// Copy from an earlier sub-slice of dst to a later sub-slice. Unlike
+		// the built-in copy function, this byte-by-byte copy always runs
+		// forwards, even if the slices overlap. Conceptually, this is:
+		//
+		// d += forwardCopy(dst[d:d+length], dst[d-offset:])
+		for end := d + length; d != end; d++ {
+			dst[d] = dst[d-offset]
+		}
+	}
+	if d != len(dst) {
+		return decodeErrCodeCorrupt
+	}
+	return 0
+}
diff --git a/vendor/github.com/golang/snappy/encode.go b/vendor/github.com/golang/snappy/encode.go
new file mode 100644
index 000000000..8d393e904
--- /dev/null
+++ b/vendor/github.com/golang/snappy/encode.go
@@ -0,0 +1,285 @@
+// Copyright 2011 The Snappy-Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package snappy
+
+import (
+	"encoding/binary"
+	"errors"
+	"io"
+)
+
+// Encode returns the encoded form of src. The returned slice may be a sub-
+// slice of dst if dst was large enough to hold the entire encoded block.
+// Otherwise, a newly allocated slice will be returned.
+//
+// The dst and src must not overlap. It is valid to pass a nil dst.
+func Encode(dst, src []byte) []byte {
+	if n := MaxEncodedLen(len(src)); n < 0 {
+		panic(ErrTooLarge)
+	} else if len(dst) < n {
+		dst = make([]byte, n)
+	}
+
+	// The block starts with the varint-encoded length of the decompressed bytes.
+	d := binary.PutUvarint(dst, uint64(len(src)))
+
+	for len(src) > 0 {
+		p := src
+		src = nil
+		if len(p) > maxBlockSize {
+			p, src = p[:maxBlockSize], p[maxBlockSize:]
+		}
+		if len(p) < minNonLiteralBlockSize {
+			d += emitLiteral(dst[d:], p)
+		} else {
+			d += encodeBlock(dst[d:], p)
+		}
+	}
+	return dst[:d]
+}
+
+// inputMargin is the minimum number of extra input bytes to keep, inside
+// encodeBlock's inner loop. On some architectures, this margin lets us
+// implement a fast path for emitLiteral, where the copy of short (<= 16 byte)
+// literals can be implemented as a single load to and store from a 16-byte
+// register. That literal's actual length can be as short as 1 byte, so this
+// can copy up to 15 bytes too much, but that's OK as subsequent iterations of
+// the encoding loop will fix up the copy overrun, and this inputMargin ensures
+// that we don't overrun the dst and src buffers.
+const inputMargin = 16 - 1
+
+// minNonLiteralBlockSize is the minimum size of the input to encodeBlock that
+// could be encoded with a copy tag. This is the minimum with respect to the
+// algorithm used by encodeBlock, not a minimum enforced by the file format.
+//
+// The encoded output must start with at least a 1 byte literal, as there are
+// no previous bytes to copy. A minimal (1 byte) copy after that, generated
+// from an emitCopy call in encodeBlock's main loop, would require at least
+// another inputMargin bytes, for the reason above: we want any emitLiteral
+// calls inside encodeBlock's main loop to use the fast path if possible, which
+// requires being able to overrun by inputMargin bytes. Thus,
+// minNonLiteralBlockSize equals 1 + 1 + inputMargin.
+//
+// The C++ code doesn't use this exact threshold, but it could, as discussed at
+// https://groups.google.com/d/topic/snappy-compression/oGbhsdIJSJ8/discussion
+// The difference between Go (2+inputMargin) and C++ (inputMargin) is purely an
+// optimization. It should not affect the encoded form. This is tested by
+// TestSameEncodingAsCppShortCopies.
+const minNonLiteralBlockSize = 1 + 1 + inputMargin
+
+// MaxEncodedLen returns the maximum length of a snappy block, given its
+// uncompressed length.
+//
+// It will return a negative value if srcLen is too large to encode.
+func MaxEncodedLen(srcLen int) int {
+	n := uint64(srcLen)
+	if n > 0xffffffff {
+		return -1
+	}
+	// Compressed data can be defined as:
+	//    compressed := item* literal*
+	//    item       := literal* copy
+	//
+	// The trailing literal sequence has a space blowup of at most 62/60
+	// since a literal of length 60 needs one tag byte + one extra byte
+	// for length information.
+	//
+	// Item blowup is trickier to measure. Suppose the "copy" op copies
+	// 4 bytes of data. Because of a special check in the encoding code,
+	// we produce a 4-byte copy only if the offset is < 65536. Therefore
+	// the copy op takes 3 bytes to encode, and this type of item leads
+	// to at most the 62/60 blowup for representing literals.
+	//
+	// Suppose the "copy" op copies 5 bytes of data. If the offset is big
+	// enough, it will take 5 bytes to encode the copy op. Therefore the
+	// worst case here is a one-byte literal followed by a five-byte copy.
+	// That is, 6 bytes of input turn into 7 bytes of "compressed" data.
+	//
+	// This last factor dominates the blowup, so the final estimate is:
+	n = 32 + n + n/6
+	if n > 0xffffffff {
+		return -1
+	}
+	return int(n)
+}
+
+var errClosed = errors.New("snappy: Writer is closed")
+
+// NewWriter returns a new Writer that compresses to w.
+//
+// The Writer returned does not buffer writes. There is no need to Flush or
+// Close such a Writer.
+//
+// Deprecated: the Writer returned is not suitable for many small writes, only
+// for few large writes. Use NewBufferedWriter instead, which is efficient
+// regardless of the frequency and shape of the writes, and remember to Close
+// that Writer when done.
+func NewWriter(w io.Writer) *Writer {
+	return &Writer{
+		w:    w,
+		obuf: make([]byte, obufLen),
+	}
+}
+
+// NewBufferedWriter returns a new Writer that compresses to w, using the
+// framing format described at
+// https://github.com/google/snappy/blob/master/framing_format.txt
+//
+// The Writer returned buffers writes. Users must call Close to guarantee all
+// data has been forwarded to the underlying io.Writer. They may also call
+// Flush zero or more times before calling Close.
+func NewBufferedWriter(w io.Writer) *Writer {
+	return &Writer{
+		w:    w,
+		ibuf: make([]byte, 0, maxBlockSize),
+		obuf: make([]byte, obufLen),
+	}
+}
+
+// Writer is an io.Writer that can write Snappy-compressed bytes.
+type Writer struct {
+	w   io.Writer
+	err error
+
+	// ibuf is a buffer for the incoming (uncompressed) bytes.
+	//
+	// Its use is optional. For backwards compatibility, Writers created by the
+	// NewWriter function have ibuf == nil, do not buffer incoming bytes, and
+	// therefore do not need to be Flush'ed or Close'd.
+	ibuf []byte
+
+	// obuf is a buffer for the outgoing (compressed) bytes.
+	obuf []byte
+
+	// wroteStreamHeader is whether we have written the stream header.
+	wroteStreamHeader bool
+}
+
+// Reset discards the writer's state and switches the Snappy writer to write to
+// w. This permits reusing a Writer rather than allocating a new one.
+func (w *Writer) Reset(writer io.Writer) {
+	w.w = writer
+	w.err = nil
+	if w.ibuf != nil {
+		w.ibuf = w.ibuf[:0]
+	}
+	w.wroteStreamHeader = false
+}
+
+// Write satisfies the io.Writer interface.
+func (w *Writer) Write(p []byte) (nRet int, errRet error) {
+	if w.ibuf == nil {
+		// Do not buffer incoming bytes. This does not perform or compress well
+		// if the caller of Writer.Write writes many small slices. This
+		// behavior is therefore deprecated, but still supported for backwards
+		// compatibility with code that doesn't explicitly Flush or Close.
+		return w.write(p)
+	}
+
+	// The remainder of this method is based on bufio.Writer.Write from the
+	// standard library.
+
+	for len(p) > (cap(w.ibuf)-len(w.ibuf)) && w.err == nil {
+		var n int
+		if len(w.ibuf) == 0 {
+			// Large write, empty buffer.
+			// Write directly from p to avoid copy.
+			n, _ = w.write(p)
+		} else {
+			n = copy(w.ibuf[len(w.ibuf):cap(w.ibuf)], p)
+			w.ibuf = w.ibuf[:len(w.ibuf)+n]
+			w.Flush()
+		}
+		nRet += n
+		p = p[n:]
+	}
+	if w.err != nil {
+		return nRet, w.err
+	}
+	n := copy(w.ibuf[len(w.ibuf):cap(w.ibuf)], p)
+	w.ibuf = w.ibuf[:len(w.ibuf)+n]
+	nRet += n
+	return nRet, nil
+}
+
+func (w *Writer) write(p []byte) (nRet int, errRet error) {
+	if w.err != nil {
+		return 0, w.err
+	}
+	for len(p) > 0 {
+		obufStart := len(magicChunk)
+		if !w.wroteStreamHeader {
+			w.wroteStreamHeader = true
+			copy(w.obuf, magicChunk)
+			obufStart = 0
+		}
+
+		var uncompressed []byte
+		if len(p) > maxBlockSize {
+			uncompressed, p = p[:maxBlockSize], p[maxBlockSize:]
+		} else {
+			uncompressed, p = p, nil
+		}
+		checksum := crc(uncompressed)
+
+		// Compress the buffer, discarding the result if the improvement
+		// isn't at least 12.5%.
+		compressed := Encode(w.obuf[obufHeaderLen:], uncompressed)
+		chunkType := uint8(chunkTypeCompressedData)
+		chunkLen := 4 + len(compressed)
+		obufEnd := obufHeaderLen + len(compressed)
+		if len(compressed) >= len(uncompressed)-len(uncompressed)/8 {
+			chunkType = chunkTypeUncompressedData
+			chunkLen = 4 + len(uncompressed)
+			obufEnd = obufHeaderLen
+		}
+
+		// Fill in the per-chunk header that comes before the body.
+		w.obuf[len(magicChunk)+0] = chunkType
+		w.obuf[len(magicChunk)+1] = uint8(chunkLen >> 0)
+		w.obuf[len(magicChunk)+2] = uint8(chunkLen >> 8)
+		w.obuf[len(magicChunk)+3] = uint8(chunkLen >> 16)
+		w.obuf[len(magicChunk)+4] = uint8(checksum >> 0)
+		w.obuf[len(magicChunk)+5] = uint8(checksum >> 8)
+		w.obuf[len(magicChunk)+6] = uint8(checksum >> 16)
+		w.obuf[len(magicChunk)+7] = uint8(checksum >> 24)
+
+		if _, err := w.w.Write(w.obuf[obufStart:obufEnd]); err != nil {
+			w.err = err
+			return nRet, err
+		}
+		if chunkType == chunkTypeUncompressedData {
+			if _, err := w.w.Write(uncompressed); err != nil {
+				w.err = err
+				return nRet, err
+			}
+		}
+		nRet += len(uncompressed)
+	}
+	return nRet, nil
+}
+
+// Flush flushes the Writer to its underlying io.Writer.
+func (w *Writer) Flush() error {
+	if w.err != nil {
+		return w.err
+	}
+	if len(w.ibuf) == 0 {
+		return nil
+	}
+	w.write(w.ibuf)
+	w.ibuf = w.ibuf[:0]
+	return w.err
+}
+
+// Close calls Flush and then closes the Writer.
+func (w *Writer) Close() error {
+	w.Flush()
+	ret := w.err
+	if w.err == nil {
+		w.err = errClosed
+	}
+	return ret
+}
diff --git a/vendor/github.com/golang/snappy/encode_amd64.go b/vendor/github.com/golang/snappy/encode_amd64.go
new file mode 100644
index 000000000..150d91bc8
--- /dev/null
+++ b/vendor/github.com/golang/snappy/encode_amd64.go
@@ -0,0 +1,29 @@
+// Copyright 2016 The Snappy-Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build !appengine
+// +build gc
+// +build !noasm
+
+package snappy
+
+// emitLiteral has the same semantics as in encode_other.go.
+//
+//go:noescape
+func emitLiteral(dst, lit []byte) int
+
+// emitCopy has the same semantics as in encode_other.go.
+//
+//go:noescape
+func emitCopy(dst []byte, offset, length int) int
+
+// extendMatch has the same semantics as in encode_other.go.
+//
+//go:noescape
+func extendMatch(src []byte, i, j int) int
+
+// encodeBlock has the same semantics as in encode_other.go.
+//
+//go:noescape
+func encodeBlock(dst, src []byte) (d int)
diff --git a/vendor/github.com/golang/snappy/encode_amd64.s b/vendor/github.com/golang/snappy/encode_amd64.s
new file mode 100644
index 000000000..adfd979fe
--- /dev/null
+++ b/vendor/github.com/golang/snappy/encode_amd64.s
@@ -0,0 +1,730 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build !appengine
+// +build gc
+// +build !noasm
+
+#include "textflag.h"
+
+// The XXX lines assemble on Go 1.4, 1.5 and 1.7, but not 1.6, due to a
+// Go toolchain regression. See https://github.com/golang/go/issues/15426 and
+// https://github.com/golang/snappy/issues/29
+//
+// As a workaround, the package was built with a known good assembler, and
+// those instructions were disassembled by "objdump -d" to yield the
+//	4e 0f b7 7c 5c 78       movzwq 0x78(%rsp,%r11,2),%r15
+// style comments, in AT&T asm syntax. Note that rsp here is a physical
+// register, not Go/asm's SP pseudo-register (see https://golang.org/doc/asm).
+// The instructions were then encoded as "BYTE $0x.." sequences, which assemble
+// fine on Go 1.6.
+
+// The asm code generally follows the pure Go code in encode_other.go, except
+// where marked with a "!!!".
+
+// ----------------------------------------------------------------------------
+
+// func emitLiteral(dst, lit []byte) int
+//
+// All local variables fit into registers. The register allocation:
+//	- AX	len(lit)
+//	- BX	n
+//	- DX	return value
+//	- DI	&dst[i]
+//	- R10	&lit[0]
+//
+// The 24 bytes of stack space is to call runtime·memmove.
+//
+// The unusual register allocation of local variables, such as R10 for the
+// source pointer, matches the allocation used at the call site in encodeBlock,
+// which makes it easier to manually inline this function.
+TEXT ·emitLiteral(SB), NOSPLIT, $24-56
+	MOVQ dst_base+0(FP), DI
+	MOVQ lit_base+24(FP), R10
+	MOVQ lit_len+32(FP), AX
+	MOVQ AX, DX
+	MOVL AX, BX
+	SUBL $1, BX
+
+	CMPL BX, $60
+	JLT  oneByte
+	CMPL BX, $256
+	JLT  twoBytes
+
+threeBytes:
+	MOVB $0xf4, 0(DI)
+	MOVW BX, 1(DI)
+	ADDQ $3, DI
+	ADDQ $3, DX
+	JMP  memmove
+
+twoBytes:
+	MOVB $0xf0, 0(DI)
+	MOVB BX, 1(DI)
+	ADDQ $2, DI
+	ADDQ $2, DX
+	JMP  memmove
+
+oneByte:
+	SHLB $2, BX
+	MOVB BX, 0(DI)
+	ADDQ $1, DI
+	ADDQ $1, DX
+
+memmove:
+	MOVQ DX, ret+48(FP)
+
+	// copy(dst[i:], lit)
+	//
+	// This means calling runtime·memmove(&dst[i], &lit[0], len(lit)), so we push
+	// DI, R10 and AX as arguments.
+	MOVQ DI, 0(SP)
+	MOVQ R10, 8(SP)
+	MOVQ AX, 16(SP)
+	CALL runtime·memmove(SB)
+	RET
+
+// ----------------------------------------------------------------------------
+
+// func emitCopy(dst []byte, offset, length int) int
+//
+// All local variables fit into registers. The register allocation:
+//	- AX	length
+//	- SI	&dst[0]
+//	- DI	&dst[i]
+//	- R11	offset
+//
+// The unusual register allocation of local variables, such as R11 for the
+// offset, matches the allocation used at the call site in encodeBlock, which
+// makes it easier to manually inline this function.
+TEXT ·emitCopy(SB), NOSPLIT, $0-48
+	MOVQ dst_base+0(FP), DI
+	MOVQ DI, SI
+	MOVQ offset+24(FP), R11
+	MOVQ length+32(FP), AX
+
+loop0:
+	// for length >= 68 { etc }
+	CMPL AX, $68
+	JLT  step1
+
+	// Emit a length 64 copy, encoded as 3 bytes.
+	MOVB $0xfe, 0(DI)
+	MOVW R11, 1(DI)
+	ADDQ $3, DI
+	SUBL $64, AX
+	JMP  loop0
+
+step1:
+	// if length > 64 { etc }
+	CMPL AX, $64
+	JLE  step2
+
+	// Emit a length 60 copy, encoded as 3 bytes.
+	MOVB $0xee, 0(DI)
+	MOVW R11, 1(DI)
+	ADDQ $3, DI
+	SUBL $60, AX
+
+step2:
+	// if length >= 12 || offset >= 2048 { goto step3 }
+	CMPL AX, $12
+	JGE  step3
+	CMPL R11, $2048
+	JGE  step3
+
+	// Emit the remaining copy, encoded as 2 bytes.
+	MOVB R11, 1(DI)
+	SHRL $8, R11
+	SHLB $5, R11
+	SUBB $4, AX
+	SHLB $2, AX
+	ORB  AX, R11
+	ORB  $1, R11
+	MOVB R11, 0(DI)
+	ADDQ $2, DI
+
+	// Return the number of bytes written.
+	SUBQ SI, DI
+	MOVQ DI, ret+40(FP)
+	RET
+
+step3:
+	// Emit the remaining copy, encoded as 3 bytes.
+	SUBL $1, AX
+	SHLB $2, AX
+	ORB  $2, AX
+	MOVB AX, 0(DI)
+	MOVW R11, 1(DI)
+	ADDQ $3, DI
+
+	// Return the number of bytes written.
+	SUBQ SI, DI
+	MOVQ DI, ret+40(FP)
+	RET
+
+// ----------------------------------------------------------------------------
+
+// func extendMatch(src []byte, i, j int) int
+//
+// All local variables fit into registers. The register allocation:
+//	- DX	&src[0]
+//	- SI	&src[j]
+//	- R13	&src[len(src) - 8]
+//	- R14	&src[len(src)]
+//	- R15	&src[i]
+//
+// The unusual register allocation of local variables, such as R15 for a source
+// pointer, matches the allocation used at the call site in encodeBlock, which
+// makes it easier to manually inline this function.
+TEXT ·extendMatch(SB), NOSPLIT, $0-48
+	MOVQ src_base+0(FP), DX
+	MOVQ src_len+8(FP), R14
+	MOVQ i+24(FP), R15
+	MOVQ j+32(FP), SI
+	ADDQ DX, R14
+	ADDQ DX, R15
+	ADDQ DX, SI
+	MOVQ R14, R13
+	SUBQ $8, R13
+
+cmp8:
+	// As long as we are 8 or more bytes before the end of src, we can load and
+	// compare 8 bytes at a time. If those 8 bytes are equal, repeat.
+	CMPQ SI, R13
+	JA   cmp1
+	MOVQ (R15), AX
+	MOVQ (SI), BX
+	CMPQ AX, BX
+	JNE  bsf
+	ADDQ $8, R15
+	ADDQ $8, SI
+	JMP  cmp8
+
+bsf:
+	// If those 8 bytes were not equal, XOR the two 8 byte values, and return
+	// the index of the first byte that differs. The BSF instruction finds the
+	// least significant 1 bit, the amd64 architecture is little-endian, and
+	// the shift by 3 converts a bit index to a byte index.
+	XORQ AX, BX
+	BSFQ BX, BX
+	SHRQ $3, BX
+	ADDQ BX, SI
+
+	// Convert from &src[ret] to ret.
+	SUBQ DX, SI
+	MOVQ SI, ret+40(FP)
+	RET
+
+cmp1:
+	// In src's tail, compare 1 byte at a time.
+	CMPQ SI, R14
+	JAE  extendMatchEnd
+	MOVB (R15), AX
+	MOVB (SI), BX
+	CMPB AX, BX
+	JNE  extendMatchEnd
+	ADDQ $1, R15
+	ADDQ $1, SI
+	JMP  cmp1
+
+extendMatchEnd:
+	// Convert from &src[ret] to ret.
+	SUBQ DX, SI
+	MOVQ SI, ret+40(FP)
+	RET
+
+// ----------------------------------------------------------------------------
+
+// func encodeBlock(dst, src []byte) (d int)
+//
+// All local variables fit into registers, other than "var table". The register
+// allocation:
+//	- AX	.	.
+//	- BX	.	.
+//	- CX	56	shift (note that amd64 shifts by non-immediates must use CX).
+//	- DX	64	&src[0], tableSize
+//	- SI	72	&src[s]
+//	- DI	80	&dst[d]
+//	- R9	88	sLimit
+//	- R10	.	&src[nextEmit]
+//	- R11	96	prevHash, currHash, nextHash, offset
+//	- R12	104	&src[base], skip
+//	- R13	.	&src[nextS], &src[len(src) - 8]
+//	- R14	.	len(src), bytesBetweenHashLookups, &src[len(src)], x
+//	- R15	112	candidate
+//
+// The second column (56, 64, etc) is the stack offset to spill the registers
+// when calling other functions. We could pack this slightly tighter, but it's
+// simpler to have a dedicated spill map independent of the function called.
+//
+// "var table [maxTableSize]uint16" takes up 32768 bytes of stack space. An
+// extra 56 bytes, to call other functions, and an extra 64 bytes, to spill
+// local variables (registers) during calls gives 32768 + 56 + 64 = 32888.
+TEXT ·encodeBlock(SB), 0, $32888-56
+	MOVQ dst_base+0(FP), DI
+	MOVQ src_base+24(FP), SI
+	MOVQ src_len+32(FP), R14
+
+	// shift, tableSize := uint32(32-8), 1<<8
+	MOVQ $24, CX
+	MOVQ $256, DX
+
+calcShift:
+	// for ; tableSize < maxTableSize && tableSize < len(src); tableSize *= 2 {
+	//	shift--
+	// }
+	CMPQ DX, $16384
+	JGE  varTable
+	CMPQ DX, R14
+	JGE  varTable
+	SUBQ $1, CX
+	SHLQ $1, DX
+	JMP  calcShift
+
+varTable:
+	// var table [maxTableSize]uint16
+	//
+	// In the asm code, unlike the Go code, we can zero-initialize only the
+	// first tableSize elements. Each uint16 element is 2 bytes and each MOVOU
+	// writes 16 bytes, so we can do only tableSize/8 writes instead of the
+	// 2048 writes that would zero-initialize all of table's 32768 bytes.
+	SHRQ $3, DX
+	LEAQ table-32768(SP), BX
+	PXOR X0, X0
+
+memclr:
+	MOVOU X0, 0(BX)
+	ADDQ  $16, BX
+	SUBQ  $1, DX
+	JNZ   memclr
+
+	// !!! DX = &src[0]
+	MOVQ SI, DX
+
+	// sLimit := len(src) - inputMargin
+	MOVQ R14, R9
+	SUBQ $15, R9
+
+	// !!! Pre-emptively spill CX, DX and R9 to the stack. Their values don't
+	// change for the rest of the function.
+	MOVQ CX, 56(SP)
+	MOVQ DX, 64(SP)
+	MOVQ R9, 88(SP)
+
+	// nextEmit := 0
+	MOVQ DX, R10
+
+	// s := 1
+	ADDQ $1, SI
+
+	// nextHash := hash(load32(src, s), shift)
+	MOVL  0(SI), R11
+	IMULL $0x1e35a7bd, R11
+	SHRL  CX, R11
+
+outer:
+	// for { etc }
+
+	// skip := 32
+	MOVQ $32, R12
+
+	// nextS := s
+	MOVQ SI, R13
+
+	// candidate := 0
+	MOVQ $0, R15
+
+inner0:
+	// for { etc }
+
+	// s := nextS
+	MOVQ R13, SI
+
+	// bytesBetweenHashLookups := skip >> 5
+	MOVQ R12, R14
+	SHRQ $5, R14
+
+	// nextS = s + bytesBetweenHashLookups
+	ADDQ R14, R13
+
+	// skip += bytesBetweenHashLookups
+	ADDQ R14, R12
+
+	// if nextS > sLimit { goto emitRemainder }
+	MOVQ R13, AX
+	SUBQ DX, AX
+	CMPQ AX, R9
+	JA   emitRemainder
+
+	// candidate = int(table[nextHash])
+	// XXX: MOVWQZX table-32768(SP)(R11*2), R15
+	// XXX: 4e 0f b7 7c 5c 78       movzwq 0x78(%rsp,%r11,2),%r15
+	BYTE $0x4e
+	BYTE $0x0f
+	BYTE $0xb7
+	BYTE $0x7c
+	BYTE $0x5c
+	BYTE $0x78
+
+	// table[nextHash] = uint16(s)
+	MOVQ SI, AX
+	SUBQ DX, AX
+
+	// XXX: MOVW AX, table-32768(SP)(R11*2)
+	// XXX: 66 42 89 44 5c 78       mov    %ax,0x78(%rsp,%r11,2)
+	BYTE $0x66
+	BYTE $0x42
+	BYTE $0x89
+	BYTE $0x44
+	BYTE $0x5c
+	BYTE $0x78
+
+	// nextHash = hash(load32(src, nextS), shift)
+	MOVL  0(R13), R11
+	IMULL $0x1e35a7bd, R11
+	SHRL  CX, R11
+
+	// if load32(src, s) != load32(src, candidate) { continue } break
+	MOVL 0(SI), AX
+	MOVL (DX)(R15*1), BX
+	CMPL AX, BX
+	JNE  inner0
+
+fourByteMatch:
+	// As per the encode_other.go code:
+	//
+	// A 4-byte match has been found. We'll later see etc.
+
+	// !!! Jump to a fast path for short (<= 16 byte) literals. See the comment
+	// on inputMargin in encode.go.
+	MOVQ SI, AX
+	SUBQ R10, AX
+	CMPQ AX, $16
+	JLE  emitLiteralFastPath
+
+	// ----------------------------------------
+	// Begin inline of the emitLiteral call.
+	//
+	// d += emitLiteral(dst[d:], src[nextEmit:s])
+
+	MOVL AX, BX
+	SUBL $1, BX
+
+	CMPL BX, $60
+	JLT  inlineEmitLiteralOneByte
+	CMPL BX, $256
+	JLT  inlineEmitLiteralTwoBytes
+
+inlineEmitLiteralThreeBytes:
+	MOVB $0xf4, 0(DI)
+	MOVW BX, 1(DI)
+	ADDQ $3, DI
+	JMP  inlineEmitLiteralMemmove
+
+inlineEmitLiteralTwoBytes:
+	MOVB $0xf0, 0(DI)
+	MOVB BX, 1(DI)
+	ADDQ $2, DI
+	JMP  inlineEmitLiteralMemmove
+
+inlineEmitLiteralOneByte:
+	SHLB $2, BX
+	MOVB BX, 0(DI)
+	ADDQ $1, DI
+
+inlineEmitLiteralMemmove:
+	// Spill local variables (registers) onto the stack; call; unspill.
+	//
+	// copy(dst[i:], lit)
+	//
+	// This means calling runtime·memmove(&dst[i], &lit[0], len(lit)), so we push
+	// DI, R10 and AX as arguments.
+	MOVQ DI, 0(SP)
+	MOVQ R10, 8(SP)
+	MOVQ AX, 16(SP)
+	ADDQ AX, DI              // Finish the "d +=" part of "d += emitLiteral(etc)".
+	MOVQ SI, 72(SP)
+	MOVQ DI, 80(SP)
+	MOVQ R15, 112(SP)
+	CALL runtime·memmove(SB)
+	MOVQ 56(SP), CX
+	MOVQ 64(SP), DX
+	MOVQ 72(SP), SI
+	MOVQ 80(SP), DI
+	MOVQ 88(SP), R9
+	MOVQ 112(SP), R15
+	JMP  inner1
+
+inlineEmitLiteralEnd:
+	// End inline of the emitLiteral call.
+	// ----------------------------------------
+
+emitLiteralFastPath:
+	// !!! Emit the 1-byte encoding "uint8(len(lit)-1)<<2".
+	MOVB AX, BX
+	SUBB $1, BX
+	SHLB $2, BX
+	MOVB BX, (DI)
+	ADDQ $1, DI
+
+	// !!! Implement the copy from lit to dst as a 16-byte load and store.
+	// (Encode's documentation says that dst and src must not overlap.)
+	//
+	// This always copies 16 bytes, instead of only len(lit) bytes, but that's
+	// OK. Subsequent iterations will fix up the overrun.
+	//
+	// Note that on amd64, it is legal and cheap to issue unaligned 8-byte or
+	// 16-byte loads and stores. This technique probably wouldn't be as
+	// effective on architectures that are fussier about alignment.
+	MOVOU 0(R10), X0
+	MOVOU X0, 0(DI)
+	ADDQ  AX, DI
+
+inner1:
+	// for { etc }
+
+	// base := s
+	MOVQ SI, R12
+
+	// !!! offset := base - candidate
+	MOVQ R12, R11
+	SUBQ R15, R11
+	SUBQ DX, R11
+
+	// ----------------------------------------
+	// Begin inline of the extendMatch call.
+	//
+	// s = extendMatch(src, candidate+4, s+4)
+
+	// !!! R14 = &src[len(src)]
+	MOVQ src_len+32(FP), R14
+	ADDQ DX, R14
+
+	// !!! R13 = &src[len(src) - 8]
+	MOVQ R14, R13
+	SUBQ $8, R13
+
+	// !!! R15 = &src[candidate + 4]
+	ADDQ $4, R15
+	ADDQ DX, R15
+
+	// !!! s += 4
+	ADDQ $4, SI
+
+inlineExtendMatchCmp8:
+	// As long as we are 8 or more bytes before the end of src, we can load and
+	// compare 8 bytes at a time. If those 8 bytes are equal, repeat.
+	CMPQ SI, R13
+	JA   inlineExtendMatchCmp1
+	MOVQ (R15), AX
+	MOVQ (SI), BX
+	CMPQ AX, BX
+	JNE  inlineExtendMatchBSF
+	ADDQ $8, R15
+	ADDQ $8, SI
+	JMP  inlineExtendMatchCmp8
+
+inlineExtendMatchBSF:
+	// If those 8 bytes were not equal, XOR the two 8 byte values, and return
+	// the index of the first byte that differs. The BSF instruction finds the
+	// least significant 1 bit, the amd64 architecture is little-endian, and
+	// the shift by 3 converts a bit index to a byte index.
+	XORQ AX, BX
+	BSFQ BX, BX
+	SHRQ $3, BX
+	ADDQ BX, SI
+	JMP  inlineExtendMatchEnd
+
+inlineExtendMatchCmp1:
+	// In src's tail, compare 1 byte at a time.
+	CMPQ SI, R14
+	JAE  inlineExtendMatchEnd
+	MOVB (R15), AX
+	MOVB (SI), BX
+	CMPB AX, BX
+	JNE  inlineExtendMatchEnd
+	ADDQ $1, R15
+	ADDQ $1, SI
+	JMP  inlineExtendMatchCmp1
+
+inlineExtendMatchEnd:
+	// End inline of the extendMatch call.
+	// ----------------------------------------
+
+	// ----------------------------------------
+	// Begin inline of the emitCopy call.
+	//
+	// d += emitCopy(dst[d:], base-candidate, s-base)
+
+	// !!! length := s - base
+	MOVQ SI, AX
+	SUBQ R12, AX
+
+inlineEmitCopyLoop0:
+	// for length >= 68 { etc }
+	CMPL AX, $68
+	JLT  inlineEmitCopyStep1
+
+	// Emit a length 64 copy, encoded as 3 bytes.
+	MOVB $0xfe, 0(DI)
+	MOVW R11, 1(DI)
+	ADDQ $3, DI
+	SUBL $64, AX
+	JMP  inlineEmitCopyLoop0
+
+inlineEmitCopyStep1:
+	// if length > 64 { etc }
+	CMPL AX, $64
+	JLE  inlineEmitCopyStep2
+
+	// Emit a length 60 copy, encoded as 3 bytes.
+	MOVB $0xee, 0(DI)
+	MOVW R11, 1(DI)
+	ADDQ $3, DI
+	SUBL $60, AX
+
+inlineEmitCopyStep2:
+	// if length >= 12 || offset >= 2048 { goto inlineEmitCopyStep3 }
+	CMPL AX, $12
+	JGE  inlineEmitCopyStep3
+	CMPL R11, $2048
+	JGE  inlineEmitCopyStep3
+
+	// Emit the remaining copy, encoded as 2 bytes.
+	MOVB R11, 1(DI)
+	SHRL $8, R11
+	SHLB $5, R11
+	SUBB $4, AX
+	SHLB $2, AX
+	ORB  AX, R11
+	ORB  $1, R11
+	MOVB R11, 0(DI)
+	ADDQ $2, DI
+	JMP  inlineEmitCopyEnd
+
+inlineEmitCopyStep3:
+	// Emit the remaining copy, encoded as 3 bytes.
+	SUBL $1, AX
+	SHLB $2, AX
+	ORB  $2, AX
+	MOVB AX, 0(DI)
+	MOVW R11, 1(DI)
+	ADDQ $3, DI
+
+inlineEmitCopyEnd:
+	// End inline of the emitCopy call.
+	// ----------------------------------------
+
+	// nextEmit = s
+	MOVQ SI, R10
+
+	// if s >= sLimit { goto emitRemainder }
+	MOVQ SI, AX
+	SUBQ DX, AX
+	CMPQ AX, R9
+	JAE  emitRemainder
+
+	// As per the encode_other.go code:
+	//
+	// We could immediately etc.
+
+	// x := load64(src, s-1)
+	MOVQ -1(SI), R14
+
+	// prevHash := hash(uint32(x>>0), shift)
+	MOVL  R14, R11
+	IMULL $0x1e35a7bd, R11
+	SHRL  CX, R11
+
+	// table[prevHash] = uint16(s-1)
+	MOVQ SI, AX
+	SUBQ DX, AX
+	SUBQ $1, AX
+
+	// XXX: MOVW AX, table-32768(SP)(R11*2)
+	// XXX: 66 42 89 44 5c 78       mov    %ax,0x78(%rsp,%r11,2)
+	BYTE $0x66
+	BYTE $0x42
+	BYTE $0x89
+	BYTE $0x44
+	BYTE $0x5c
+	BYTE $0x78
+
+	// currHash := hash(uint32(x>>8), shift)
+	SHRQ  $8, R14
+	MOVL  R14, R11
+	IMULL $0x1e35a7bd, R11
+	SHRL  CX, R11
+
+	// candidate = int(table[currHash])
+	// XXX: MOVWQZX table-32768(SP)(R11*2), R15
+	// XXX: 4e 0f b7 7c 5c 78       movzwq 0x78(%rsp,%r11,2),%r15
+	BYTE $0x4e
+	BYTE $0x0f
+	BYTE $0xb7
+	BYTE $0x7c
+	BYTE $0x5c
+	BYTE $0x78
+
+	// table[currHash] = uint16(s)
+	ADDQ $1, AX
+
+	// XXX: MOVW AX, table-32768(SP)(R11*2)
+	// XXX: 66 42 89 44 5c 78       mov    %ax,0x78(%rsp,%r11,2)
+	BYTE $0x66
+	BYTE $0x42
+	BYTE $0x89
+	BYTE $0x44
+	BYTE $0x5c
+	BYTE $0x78
+
+	// if uint32(x>>8) == load32(src, candidate) { continue }
+	MOVL (DX)(R15*1), BX
+	CMPL R14, BX
+	JEQ  inner1
+
+	// nextHash = hash(uint32(x>>16), shift)
+	SHRQ  $8, R14
+	MOVL  R14, R11
+	IMULL $0x1e35a7bd, R11
+	SHRL  CX, R11
+
+	// s++
+	ADDQ $1, SI
+
+	// break out of the inner1 for loop, i.e. continue the outer loop.
+	JMP outer
+
+emitRemainder:
+	// if nextEmit < len(src) { etc }
+	MOVQ src_len+32(FP), AX
+	ADDQ DX, AX
+	CMPQ R10, AX
+	JEQ  encodeBlockEnd
+
+	// d += emitLiteral(dst[d:], src[nextEmit:])
+	//
+	// Push args.
+	MOVQ DI, 0(SP)
+	MOVQ $0, 8(SP)   // Unnecessary, as the callee ignores it, but conservative.
+	MOVQ $0, 16(SP)  // Unnecessary, as the callee ignores it, but conservative.
+	MOVQ R10, 24(SP)
+	SUBQ R10, AX
+	MOVQ AX, 32(SP)
+	MOVQ AX, 40(SP)  // Unnecessary, as the callee ignores it, but conservative.
+
+	// Spill local variables (registers) onto the stack; call; unspill.
+	MOVQ DI, 80(SP)
+	CALL ·emitLiteral(SB)
+	MOVQ 80(SP), DI
+
+	// Finish the "d +=" part of "d += emitLiteral(etc)".
+	ADDQ 48(SP), DI
+
+encodeBlockEnd:
+	MOVQ dst_base+0(FP), AX
+	SUBQ AX, DI
+	MOVQ DI, d+48(FP)
+	RET
diff --git a/vendor/github.com/golang/snappy/encode_other.go b/vendor/github.com/golang/snappy/encode_other.go
new file mode 100644
index 000000000..dbcae905e
--- /dev/null
+++ b/vendor/github.com/golang/snappy/encode_other.go
@@ -0,0 +1,238 @@
+// Copyright 2016 The Snappy-Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build !amd64 appengine !gc noasm
+
+package snappy
+
+func load32(b []byte, i int) uint32 {
+	b = b[i : i+4 : len(b)] // Help the compiler eliminate bounds checks on the next line.
+	return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24
+}
+
+func load64(b []byte, i int) uint64 {
+	b = b[i : i+8 : len(b)] // Help the compiler eliminate bounds checks on the next line.
+	return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 |
+		uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56
+}
+
+// emitLiteral writes a literal chunk and returns the number of bytes written.
+//
+// It assumes that:
+//	dst is long enough to hold the encoded bytes
+//	1 <= len(lit) && len(lit) <= 65536
+func emitLiteral(dst, lit []byte) int {
+	i, n := 0, uint(len(lit)-1)
+	switch {
+	case n < 60:
+		dst[0] = uint8(n)<<2 | tagLiteral
+		i = 1
+	case n < 1<<8:
+		dst[0] = 60<<2 | tagLiteral
+		dst[1] = uint8(n)
+		i = 2
+	default:
+		dst[0] = 61<<2 | tagLiteral
+		dst[1] = uint8(n)
+		dst[2] = uint8(n >> 8)
+		i = 3
+	}
+	return i + copy(dst[i:], lit)
+}
+
+// emitCopy writes a copy chunk and returns the number of bytes written.
+//
+// It assumes that:
+//	dst is long enough to hold the encoded bytes
+//	1 <= offset && offset <= 65535
+//	4 <= length && length <= 65535
+func emitCopy(dst []byte, offset, length int) int {
+	i := 0
+	// The maximum length for a single tagCopy1 or tagCopy2 op is 64 bytes. The
+	// threshold for this loop is a little higher (at 68 = 64 + 4), and the
+	// length emitted down below is is a little lower (at 60 = 64 - 4), because
+	// it's shorter to encode a length 67 copy as a length 60 tagCopy2 followed
+	// by a length 7 tagCopy1 (which encodes as 3+2 bytes) than to encode it as
+	// a length 64 tagCopy2 followed by a length 3 tagCopy2 (which encodes as
+	// 3+3 bytes). The magic 4 in the 64±4 is because the minimum length for a
+	// tagCopy1 op is 4 bytes, which is why a length 3 copy has to be an
+	// encodes-as-3-bytes tagCopy2 instead of an encodes-as-2-bytes tagCopy1.
+	for length >= 68 {
+		// Emit a length 64 copy, encoded as 3 bytes.
+		dst[i+0] = 63<<2 | tagCopy2
+		dst[i+1] = uint8(offset)
+		dst[i+2] = uint8(offset >> 8)
+		i += 3
+		length -= 64
+	}
+	if length > 64 {
+		// Emit a length 60 copy, encoded as 3 bytes.
+		dst[i+0] = 59<<2 | tagCopy2
+		dst[i+1] = uint8(offset)
+		dst[i+2] = uint8(offset >> 8)
+		i += 3
+		length -= 60
+	}
+	if length >= 12 || offset >= 2048 {
+		// Emit the remaining copy, encoded as 3 bytes.
+		dst[i+0] = uint8(length-1)<<2 | tagCopy2
+		dst[i+1] = uint8(offset)
+		dst[i+2] = uint8(offset >> 8)
+		return i + 3
+	}
+	// Emit the remaining copy, encoded as 2 bytes.
+	dst[i+0] = uint8(offset>>8)<<5 | uint8(length-4)<<2 | tagCopy1
+	dst[i+1] = uint8(offset)
+	return i + 2
+}
+
+// extendMatch returns the largest k such that k <= len(src) and that
+// src[i:i+k-j] and src[j:k] have the same contents.
+//
+// It assumes that:
+//	0 <= i && i < j && j <= len(src)
+func extendMatch(src []byte, i, j int) int {
+	for ; j < len(src) && src[i] == src[j]; i, j = i+1, j+1 {
+	}
+	return j
+}
+
+func hash(u, shift uint32) uint32 {
+	return (u * 0x1e35a7bd) >> shift
+}
+
+// encodeBlock encodes a non-empty src to a guaranteed-large-enough dst. It
+// assumes that the varint-encoded length of the decompressed bytes has already
+// been written.
+//
+// It also assumes that:
+//	len(dst) >= MaxEncodedLen(len(src)) &&
+// 	minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
+func encodeBlock(dst, src []byte) (d int) {
+	// Initialize the hash table. Its size ranges from 1<<8 to 1<<14 inclusive.
+	// The table element type is uint16, as s < sLimit and sLimit < len(src)
+	// and len(src) <= maxBlockSize and maxBlockSize == 65536.
+	const (
+		maxTableSize = 1 << 14
+		// tableMask is redundant, but helps the compiler eliminate bounds
+		// checks.
+		tableMask = maxTableSize - 1
+	)
+	shift := uint32(32 - 8)
+	for tableSize := 1 << 8; tableSize < maxTableSize && tableSize < len(src); tableSize *= 2 {
+		shift--
+	}
+	// In Go, all array elements are zero-initialized, so there is no advantage
+	// to a smaller tableSize per se. However, it matches the C++ algorithm,
+	// and in the asm versions of this code, we can get away with zeroing only
+	// the first tableSize elements.
+	var table [maxTableSize]uint16
+
+	// sLimit is when to stop looking for offset/length copies. The inputMargin
+	// lets us use a fast path for emitLiteral in the main loop, while we are
+	// looking for copies.
+	sLimit := len(src) - inputMargin
+
+	// nextEmit is where in src the next emitLiteral should start from.
+	nextEmit := 0
+
+	// The encoded form must start with a literal, as there are no previous
+	// bytes to copy, so we start looking for hash matches at s == 1.
+	s := 1
+	nextHash := hash(load32(src, s), shift)
+
+	for {
+		// Copied from the C++ snappy implementation:
+		//
+		// Heuristic match skipping: If 32 bytes are scanned with no matches
+		// found, start looking only at every other byte. If 32 more bytes are
+		// scanned (or skipped), look at every third byte, etc.. When a match
+		// is found, immediately go back to looking at every byte. This is a
+		// small loss (~5% performance, ~0.1% density) for compressible data
+		// due to more bookkeeping, but for non-compressible data (such as
+		// JPEG) it's a huge win since the compressor quickly "realizes" the
+		// data is incompressible and doesn't bother looking for matches
+		// everywhere.
+		//
+		// The "skip" variable keeps track of how many bytes there are since
+		// the last match; dividing it by 32 (ie. right-shifting by five) gives
+		// the number of bytes to move ahead for each iteration.
+		skip := 32
+
+		nextS := s
+		candidate := 0
+		for {
+			s = nextS
+			bytesBetweenHashLookups := skip >> 5
+			nextS = s + bytesBetweenHashLookups
+			skip += bytesBetweenHashLookups
+			if nextS > sLimit {
+				goto emitRemainder
+			}
+			candidate = int(table[nextHash&tableMask])
+			table[nextHash&tableMask] = uint16(s)
+			nextHash = hash(load32(src, nextS), shift)
+			if load32(src, s) == load32(src, candidate) {
+				break
+			}
+		}
+
+		// A 4-byte match has been found. We'll later see if more than 4 bytes
+		// match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
+		// them as literal bytes.
+		d += emitLiteral(dst[d:], src[nextEmit:s])
+
+		// Call emitCopy, and then see if another emitCopy could be our next
+		// move. Repeat until we find no match for the input immediately after
+		// what was consumed by the last emitCopy call.
+		//
+		// If we exit this loop normally then we need to call emitLiteral next,
+		// though we don't yet know how big the literal will be. We handle that
+		// by proceeding to the next iteration of the main loop. We also can
+		// exit this loop via goto if we get close to exhausting the input.
+		for {
+			// Invariant: we have a 4-byte match at s, and no need to emit any
+			// literal bytes prior to s.
+			base := s
+
+			// Extend the 4-byte match as long as possible.
+			//
+			// This is an inlined version of:
+			//	s = extendMatch(src, candidate+4, s+4)
+			s += 4
+			for i := candidate + 4; s < len(src) && src[i] == src[s]; i, s = i+1, s+1 {
+			}
+
+			d += emitCopy(dst[d:], base-candidate, s-base)
+			nextEmit = s
+			if s >= sLimit {
+				goto emitRemainder
+			}
+
+			// We could immediately start working at s now, but to improve
+			// compression we first update the hash table at s-1 and at s. If
+			// another emitCopy is not our next move, also calculate nextHash
+			// at s+1. At least on GOARCH=amd64, these three hash calculations
+			// are faster as one load64 call (with some shifts) instead of
+			// three load32 calls.
+			x := load64(src, s-1)
+			prevHash := hash(uint32(x>>0), shift)
+			table[prevHash&tableMask] = uint16(s - 1)
+			currHash := hash(uint32(x>>8), shift)
+			candidate = int(table[currHash&tableMask])
+			table[currHash&tableMask] = uint16(s)
+			if uint32(x>>8) != load32(src, candidate) {
+				nextHash = hash(uint32(x>>16), shift)
+				s++
+				break
+			}
+		}
+	}
+
+emitRemainder:
+	if nextEmit < len(src) {
+		d += emitLiteral(dst[d:], src[nextEmit:])
+	}
+	return d
+}
diff --git a/vendor/github.com/golang/snappy/snappy.go b/vendor/github.com/golang/snappy/snappy.go
new file mode 100644
index 000000000..ece692ea4
--- /dev/null
+++ b/vendor/github.com/golang/snappy/snappy.go
@@ -0,0 +1,98 @@
+// Copyright 2011 The Snappy-Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package snappy implements the Snappy compression format. It aims for very
+// high speeds and reasonable compression.
+//
+// There are actually two Snappy formats: block and stream. They are related,
+// but different: trying to decompress block-compressed data as a Snappy stream
+// will fail, and vice versa. The block format is the Decode and Encode
+// functions and the stream format is the Reader and Writer types.
+//
+// The block format, the more common case, is used when the complete size (the
+// number of bytes) of the original data is known upfront, at the time
+// compression starts. The stream format, also known as the framing format, is
+// for when that isn't always true.
+//
+// The canonical, C++ implementation is at https://github.com/google/snappy and
+// it only implements the block format.
+package snappy // import "github.com/golang/snappy"
+
+import (
+	"hash/crc32"
+)
+
+/*
+Each encoded block begins with the varint-encoded length of the decoded data,
+followed by a sequence of chunks. Chunks begin and end on byte boundaries. The
+first byte of each chunk is broken into its 2 least and 6 most significant bits
+called l and m: l ranges in [0, 4) and m ranges in [0, 64). l is the chunk tag.
+Zero means a literal tag. All other values mean a copy tag.
+
+For literal tags:
+  - If m < 60, the next 1 + m bytes are literal bytes.
+  - Otherwise, let n be the little-endian unsigned integer denoted by the next
+    m - 59 bytes. The next 1 + n bytes after that are literal bytes.
+
+For copy tags, length bytes are copied from offset bytes ago, in the style of
+Lempel-Ziv compression algorithms. In particular:
+  - For l == 1, the offset ranges in [0, 1<<11) and the length in [4, 12).
+    The length is 4 + the low 3 bits of m. The high 3 bits of m form bits 8-10
+    of the offset. The next byte is bits 0-7 of the offset.
+  - For l == 2, the offset ranges in [0, 1<<16) and the length in [1, 65).
+    The length is 1 + m. The offset is the little-endian unsigned integer
+    denoted by the next 2 bytes.
+  - For l == 3, this tag is a legacy format that is no longer issued by most
+    encoders. Nonetheless, the offset ranges in [0, 1<<32) and the length in
+    [1, 65). The length is 1 + m. The offset is the little-endian unsigned
+    integer denoted by the next 4 bytes.
+*/
+const (
+	tagLiteral = 0x00
+	tagCopy1   = 0x01
+	tagCopy2   = 0x02
+	tagCopy4   = 0x03
+)
+
+const (
+	checksumSize    = 4
+	chunkHeaderSize = 4
+	magicChunk      = "\xff\x06\x00\x00" + magicBody
+	magicBody       = "sNaPpY"
+
+	// maxBlockSize is the maximum size of the input to encodeBlock. It is not
+	// part of the wire format per se, but some parts of the encoder assume
+	// that an offset fits into a uint16.
+	//
+	// Also, for the framing format (Writer type instead of Encode function),
+	// https://github.com/google/snappy/blob/master/framing_format.txt says
+	// that "the uncompressed data in a chunk must be no longer than 65536
+	// bytes".
+	maxBlockSize = 65536
+
+	// maxEncodedLenOfMaxBlockSize equals MaxEncodedLen(maxBlockSize), but is
+	// hard coded to be a const instead of a variable, so that obufLen can also
+	// be a const. Their equivalence is confirmed by
+	// TestMaxEncodedLenOfMaxBlockSize.
+	maxEncodedLenOfMaxBlockSize = 76490
+
+	obufHeaderLen = len(magicChunk) + checksumSize + chunkHeaderSize
+	obufLen       = obufHeaderLen + maxEncodedLenOfMaxBlockSize
+)
+
+const (
+	chunkTypeCompressedData   = 0x00
+	chunkTypeUncompressedData = 0x01
+	chunkTypePadding          = 0xfe
+	chunkTypeStreamIdentifier = 0xff
+)
+
+var crcTable = crc32.MakeTable(crc32.Castagnoli)
+
+// crc implements the checksum specified in section 3 of
+// https://github.com/google/snappy/blob/master/framing_format.txt
+func crc(b []byte) uint32 {
+	c := crc32.Update(0, crcTable, b)
+	return uint32(c>>15|c<<17) + 0xa282ead8
+}
diff --git a/vendor/github.com/opentracing/opentracing-go/.gitignore b/vendor/github.com/opentracing/opentracing-go/.gitignore
new file mode 100644
index 000000000..c57100a59
--- /dev/null
+++ b/vendor/github.com/opentracing/opentracing-go/.gitignore
@@ -0,0 +1 @@
+coverage.txt
diff --git a/vendor/github.com/opentracing/opentracing-go/.travis.yml b/vendor/github.com/opentracing/opentracing-go/.travis.yml
new file mode 100644
index 000000000..8d5b75e41
--- /dev/null
+++ b/vendor/github.com/opentracing/opentracing-go/.travis.yml
@@ -0,0 +1,20 @@
+language: go
+
+matrix:
+  include:
+  - go: "1.11.x"
+  - go: "1.12.x"
+  - go: "tip"
+    env:
+    - LINT=true
+    - COVERAGE=true
+
+install:
+  - if [ "$LINT" == true ]; then go get -u golang.org/x/lint/golint/... ; else echo 'skipping lint'; fi
+  - go get -u github.com/stretchr/testify/...
+
+script:
+  - make test
+  - go build ./...
+  - if [ "$LINT" == true ]; then make lint ; else echo 'skipping lint'; fi
+  - if [ "$COVERAGE" == true ]; then make cover && bash <(curl -s https://codecov.io/bash) ; else echo 'skipping coverage'; fi
diff --git a/vendor/github.com/opentracing/opentracing-go/CHANGELOG.md b/vendor/github.com/opentracing/opentracing-go/CHANGELOG.md
new file mode 100644
index 000000000..7c14febe1
--- /dev/null
+++ b/vendor/github.com/opentracing/opentracing-go/CHANGELOG.md
@@ -0,0 +1,46 @@
+Changes by Version
+==================
+
+1.1.0 (2019-03-23)
+-------------------
+
+Notable changes:
+- The library is now released under Apache 2.0 license
+- Use Set() instead of Add() in HTTPHeadersCarrier is functionally a breaking change (fixes issue [#159](https://github.com/opentracing/opentracing-go/issues/159))
+- 'golang.org/x/net/context' is replaced with 'context' from the standard library
+
+List of all changes:
+
+- Export StartSpanFromContextWithTracer (#214) <Aaron Delaney>
+- Add IsGlobalTracerRegistered() to indicate if a tracer has been registered (#201) <Mike Goldsmith>
+- Use Set() instead of Add() in HTTPHeadersCarrier (#191) <jeremyxu2010>
+- Update license to Apache 2.0 (#181) <Andrea Kao>
+- Replace 'golang.org/x/net/context' with 'context' (#176) <Tony Ghita>
+- Port of Python opentracing/harness/api_check.py to Go (#146) <chris erway>
+- Fix race condition in MockSpan.Context() (#170) <Brad>
+- Add PeerHostIPv4.SetString() (#155)  <NeoCN>
+- Add a Noop log field type to log to allow for optional fields (#150)  <Matt Ho>
+
+
+1.0.2 (2017-04-26)
+-------------------
+
+- Add more semantic tags (#139) <Rustam Zagirov>
+
+
+1.0.1 (2017-02-06)
+-------------------
+
+- Correct spelling in comments <Ben Sigelman>
+- Address race in nextMockID() (#123) <bill fumerola>
+- log: avoid panic marshaling nil error (#131) <Anthony Voutas>
+- Deprecate InitGlobalTracer in favor of SetGlobalTracer (#128) <Yuri Shkuro>
+- Drop Go 1.5 that fails in Travis (#129) <Yuri Shkuro>
+- Add convenience methods Key() and Value() to log.Field <Ben Sigelman>
+- Add convenience methods to log.Field (2 years, 6 months ago) <Radu Berinde>
+
+1.0.0 (2016-09-26)
+-------------------
+
+- This release implements OpenTracing Specification 1.0 (https://opentracing.io/spec)
+
diff --git a/vendor/github.com/opentracing/opentracing-go/LICENSE b/vendor/github.com/opentracing/opentracing-go/LICENSE
new file mode 100644
index 000000000..f0027349e
--- /dev/null
+++ b/vendor/github.com/opentracing/opentracing-go/LICENSE
@@ -0,0 +1,201 @@
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "{}"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright 2016 The OpenTracing Authors
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
diff --git a/vendor/github.com/opentracing/opentracing-go/Makefile b/vendor/github.com/opentracing/opentracing-go/Makefile
new file mode 100644
index 000000000..62abb63f5
--- /dev/null
+++ b/vendor/github.com/opentracing/opentracing-go/Makefile
@@ -0,0 +1,20 @@
+.DEFAULT_GOAL := test-and-lint
+
+.PHONY: test-and-lint
+test-and-lint: test lint
+
+.PHONY: test
+test:
+	go test -v -cover -race ./...
+
+.PHONY: cover
+cover:
+	go test -v -coverprofile=coverage.txt -covermode=atomic -race ./...
+
+.PHONY: lint
+lint:
+	go fmt ./...
+	golint ./...
+	@# Run again with magic to exit non-zero if golint outputs anything.
+	@! (golint ./... | read dummy)
+	go vet ./...
diff --git a/vendor/github.com/opentracing/opentracing-go/README.md b/vendor/github.com/opentracing/opentracing-go/README.md
new file mode 100644
index 000000000..6ef1d7c9d
--- /dev/null
+++ b/vendor/github.com/opentracing/opentracing-go/README.md
@@ -0,0 +1,171 @@
+[![Gitter chat](http://img.shields.io/badge/gitter-join%20chat%20%E2%86%92-brightgreen.svg)](https://gitter.im/opentracing/public) [![Build Status](https://travis-ci.org/opentracing/opentracing-go.svg?branch=master)](https://travis-ci.org/opentracing/opentracing-go) [![GoDoc](https://godoc.org/github.com/opentracing/opentracing-go?status.svg)](http://godoc.org/github.com/opentracing/opentracing-go)
+[![Sourcegraph Badge](https://sourcegraph.com/github.com/opentracing/opentracing-go/-/badge.svg)](https://sourcegraph.com/github.com/opentracing/opentracing-go?badge)
+
+# OpenTracing API for Go
+
+This package is a Go platform API for OpenTracing.
+
+## Required Reading
+
+In order to understand the Go platform API, one must first be familiar with the
+[OpenTracing project](https://opentracing.io) and
+[terminology](https://opentracing.io/specification/) more specifically.
+
+## API overview for those adding instrumentation
+
+Everyday consumers of this `opentracing` package really only need to worry
+about a couple of key abstractions: the `StartSpan` function, the `Span`
+interface, and binding a `Tracer` at `main()`-time. Here are code snippets
+demonstrating some important use cases.
+
+#### Singleton initialization
+
+The simplest starting point is `./default_tracer.go`. As early as possible, call
+
+```go
+    import "github.com/opentracing/opentracing-go"
+    import ".../some_tracing_impl"
+
+    func main() {
+        opentracing.SetGlobalTracer(
+            // tracing impl specific:
+            some_tracing_impl.New(...),
+        )
+        ...
+    }
+```
+
+#### Non-Singleton initialization
+
+If you prefer direct control to singletons, manage ownership of the
+`opentracing.Tracer` implementation explicitly.
+
+#### Creating a Span given an existing Go `context.Context`
+
+If you use `context.Context` in your application, OpenTracing's Go library will
+happily rely on it for `Span` propagation. To start a new (blocking child)
+`Span`, you can use `StartSpanFromContext`.
+
+```go
+    func xyz(ctx context.Context, ...) {
+        ...
+        span, ctx := opentracing.StartSpanFromContext(ctx, "operation_name")
+        defer span.Finish()
+        span.LogFields(
+            log.String("event", "soft error"),
+            log.String("type", "cache timeout"),
+            log.Int("waited.millis", 1500))
+        ...
+    }
+```
+
+#### Starting an empty trace by creating a "root span"
+
+It's always possible to create a "root" `Span` with no parent or other causal
+reference.
+
+```go
+    func xyz() {
+        ...
+        sp := opentracing.StartSpan("operation_name")
+        defer sp.Finish()
+        ...
+    }
+```
+
+#### Creating a (child) Span given an existing (parent) Span
+
+```go
+    func xyz(parentSpan opentracing.Span, ...) {
+        ...
+        sp := opentracing.StartSpan(
+            "operation_name",
+            opentracing.ChildOf(parentSpan.Context()))
+        defer sp.Finish()
+        ...
+    }
+```
+
+#### Serializing to the wire
+
+```go
+    func makeSomeRequest(ctx context.Context) ... {
+        if span := opentracing.SpanFromContext(ctx); span != nil {
+            httpClient := &http.Client{}
+            httpReq, _ := http.NewRequest("GET", "http://myservice/", nil)
+
+            // Transmit the span's TraceContext as HTTP headers on our
+            // outbound request.
+            opentracing.GlobalTracer().Inject(
+                span.Context(),
+                opentracing.HTTPHeaders,
+                opentracing.HTTPHeadersCarrier(httpReq.Header))
+
+            resp, err := httpClient.Do(httpReq)
+            ...
+        }
+        ...
+    }
+```
+
+#### Deserializing from the wire
+
+```go
+    http.HandleFunc("/", func(w http.ResponseWriter, req *http.Request) {
+        var serverSpan opentracing.Span
+        appSpecificOperationName := ...
+        wireContext, err := opentracing.GlobalTracer().Extract(
+            opentracing.HTTPHeaders,
+            opentracing.HTTPHeadersCarrier(req.Header))
+        if err != nil {
+            // Optionally record something about err here
+        }
+
+        // Create the span referring to the RPC client if available.
+        // If wireContext == nil, a root span will be created.
+        serverSpan = opentracing.StartSpan(
+            appSpecificOperationName,
+            ext.RPCServerOption(wireContext))
+
+        defer serverSpan.Finish()
+
+        ctx := opentracing.ContextWithSpan(context.Background(), serverSpan)
+        ...
+    }
+```
+
+#### Conditionally capture a field using `log.Noop`
+
+In some situations, you may want to dynamically decide whether or not
+to log a field.  For example, you may want to capture additional data,
+such as a customer ID, in non-production environments:
+
+```go
+    func Customer(order *Order) log.Field {
+        if os.Getenv("ENVIRONMENT") == "dev" {
+            return log.String("customer", order.Customer.ID)
+        }
+        return log.Noop()
+    }
+```
+
+#### Goroutine-safety
+
+The entire public API is goroutine-safe and does not require external
+synchronization.
+
+## API pointers for those implementing a tracing system
+
+Tracing system implementors may be able to reuse or copy-paste-modify the `basictracer` package, found [here](https://github.com/opentracing/basictracer-go). In particular, see `basictracer.New(...)`.
+
+## API compatibility
+
+For the time being, "mild" backwards-incompatible changes may be made without changing the major version number. As OpenTracing and `opentracing-go` mature, backwards compatibility will become more of a priority.
+
+## Tracer test suite
+
+A test suite is available in the [harness](https://godoc.org/github.com/opentracing/opentracing-go/harness) package that can assist Tracer implementors to assert that their Tracer is working correctly.
+
+## Licensing
+
+[Apache 2.0 License](./LICENSE).
diff --git a/vendor/github.com/opentracing/opentracing-go/globaltracer.go b/vendor/github.com/opentracing/opentracing-go/globaltracer.go
new file mode 100644
index 000000000..4f7066a92
--- /dev/null
+++ b/vendor/github.com/opentracing/opentracing-go/globaltracer.go
@@ -0,0 +1,42 @@
+package opentracing
+
+type registeredTracer struct {
+	tracer       Tracer
+	isRegistered bool
+}
+
+var (
+	globalTracer = registeredTracer{NoopTracer{}, false}
+)
+
+// SetGlobalTracer sets the [singleton] opentracing.Tracer returned by
+// GlobalTracer(). Those who use GlobalTracer (rather than directly manage an
+// opentracing.Tracer instance) should call SetGlobalTracer as early as
+// possible in main(), prior to calling the `StartSpan` global func below.
+// Prior to calling `SetGlobalTracer`, any Spans started via the `StartSpan`
+// (etc) globals are noops.
+func SetGlobalTracer(tracer Tracer) {
+	globalTracer = registeredTracer{tracer, true}
+}
+
+// GlobalTracer returns the global singleton `Tracer` implementation.
+// Before `SetGlobalTracer()` is called, the `GlobalTracer()` is a noop
+// implementation that drops all data handed to it.
+func GlobalTracer() Tracer {
+	return globalTracer.tracer
+}
+
+// StartSpan defers to `Tracer.StartSpan`. See `GlobalTracer()`.
+func StartSpan(operationName string, opts ...StartSpanOption) Span {
+	return globalTracer.tracer.StartSpan(operationName, opts...)
+}
+
+// InitGlobalTracer is deprecated. Please use SetGlobalTracer.
+func InitGlobalTracer(tracer Tracer) {
+	SetGlobalTracer(tracer)
+}
+
+// IsGlobalTracerRegistered returns a `bool` to indicate if a tracer has been globally registered
+func IsGlobalTracerRegistered() bool {
+	return globalTracer.isRegistered
+}
diff --git a/vendor/github.com/opentracing/opentracing-go/gocontext.go b/vendor/github.com/opentracing/opentracing-go/gocontext.go
new file mode 100644
index 000000000..08c00c04e
--- /dev/null
+++ b/vendor/github.com/opentracing/opentracing-go/gocontext.go
@@ -0,0 +1,60 @@
+package opentracing
+
+import "context"
+
+type contextKey struct{}
+
+var activeSpanKey = contextKey{}
+
+// ContextWithSpan returns a new `context.Context` that holds a reference to
+// `span`'s SpanContext.
+func ContextWithSpan(ctx context.Context, span Span) context.Context {
+	return context.WithValue(ctx, activeSpanKey, span)
+}
+
+// SpanFromContext returns the `Span` previously associated with `ctx`, or
+// `nil` if no such `Span` could be found.
+//
+// NOTE: context.Context != SpanContext: the former is Go's intra-process
+// context propagation mechanism, and the latter houses OpenTracing's per-Span
+// identity and baggage information.
+func SpanFromContext(ctx context.Context) Span {
+	val := ctx.Value(activeSpanKey)
+	if sp, ok := val.(Span); ok {
+		return sp
+	}
+	return nil
+}
+
+// StartSpanFromContext starts and returns a Span with `operationName`, using
+// any Span found within `ctx` as a ChildOfRef. If no such parent could be
+// found, StartSpanFromContext creates a root (parentless) Span.
+//
+// The second return value is a context.Context object built around the
+// returned Span.
+//
+// Example usage:
+//
+//    SomeFunction(ctx context.Context, ...) {
+//        sp, ctx := opentracing.StartSpanFromContext(ctx, "SomeFunction")
+//        defer sp.Finish()
+//        ...
+//    }
+func StartSpanFromContext(ctx context.Context, operationName string, opts ...StartSpanOption) (Span, context.Context) {
+	return StartSpanFromContextWithTracer(ctx, GlobalTracer(), operationName, opts...)
+}
+
+// StartSpanFromContextWithTracer starts and returns a span with `operationName`
+// using  a span found within the context as a ChildOfRef. If that doesn't exist
+// it creates a root span. It also returns a context.Context object built
+// around the returned span.
+//
+// It's behavior is identical to StartSpanFromContext except that it takes an explicit
+// tracer as opposed to using the global tracer.
+func StartSpanFromContextWithTracer(ctx context.Context, tracer Tracer, operationName string, opts ...StartSpanOption) (Span, context.Context) {
+	if parentSpan := SpanFromContext(ctx); parentSpan != nil {
+		opts = append(opts, ChildOf(parentSpan.Context()))
+	}
+	span := tracer.StartSpan(operationName, opts...)
+	return span, ContextWithSpan(ctx, span)
+}
diff --git a/vendor/github.com/opentracing/opentracing-go/log/field.go b/vendor/github.com/opentracing/opentracing-go/log/field.go
new file mode 100644
index 000000000..50feea341
--- /dev/null
+++ b/vendor/github.com/opentracing/opentracing-go/log/field.go
@@ -0,0 +1,269 @@
+package log
+
+import (
+	"fmt"
+	"math"
+)
+
+type fieldType int
+
+const (
+	stringType fieldType = iota
+	boolType
+	intType
+	int32Type
+	uint32Type
+	int64Type
+	uint64Type
+	float32Type
+	float64Type
+	errorType
+	objectType
+	lazyLoggerType
+	noopType
+)
+
+// Field instances are constructed via LogBool, LogString, and so on.
+// Tracing implementations may then handle them via the Field.Marshal
+// method.
+//
+// "heavily influenced by" (i.e., partially stolen from)
+// https://github.com/uber-go/zap
+type Field struct {
+	key          string
+	fieldType    fieldType
+	numericVal   int64
+	stringVal    string
+	interfaceVal interface{}
+}
+
+// String adds a string-valued key:value pair to a Span.LogFields() record
+func String(key, val string) Field {
+	return Field{
+		key:       key,
+		fieldType: stringType,
+		stringVal: val,
+	}
+}
+
+// Bool adds a bool-valued key:value pair to a Span.LogFields() record
+func Bool(key string, val bool) Field {
+	var numericVal int64
+	if val {
+		numericVal = 1
+	}
+	return Field{
+		key:        key,
+		fieldType:  boolType,
+		numericVal: numericVal,
+	}
+}
+
+// Int adds an int-valued key:value pair to a Span.LogFields() record
+func Int(key string, val int) Field {
+	return Field{
+		key:        key,
+		fieldType:  intType,
+		numericVal: int64(val),
+	}
+}
+
+// Int32 adds an int32-valued key:value pair to a Span.LogFields() record
+func Int32(key string, val int32) Field {
+	return Field{
+		key:        key,
+		fieldType:  int32Type,
+		numericVal: int64(val),
+	}
+}
+
+// Int64 adds an int64-valued key:value pair to a Span.LogFields() record
+func Int64(key string, val int64) Field {
+	return Field{
+		key:        key,
+		fieldType:  int64Type,
+		numericVal: val,
+	}
+}
+
+// Uint32 adds a uint32-valued key:value pair to a Span.LogFields() record
+func Uint32(key string, val uint32) Field {
+	return Field{
+		key:        key,
+		fieldType:  uint32Type,
+		numericVal: int64(val),
+	}
+}
+
+// Uint64 adds a uint64-valued key:value pair to a Span.LogFields() record
+func Uint64(key string, val uint64) Field {
+	return Field{
+		key:        key,
+		fieldType:  uint64Type,
+		numericVal: int64(val),
+	}
+}
+
+// Float32 adds a float32-valued key:value pair to a Span.LogFields() record
+func Float32(key string, val float32) Field {
+	return Field{
+		key:        key,
+		fieldType:  float32Type,
+		numericVal: int64(math.Float32bits(val)),
+	}
+}
+
+// Float64 adds a float64-valued key:value pair to a Span.LogFields() record
+func Float64(key string, val float64) Field {
+	return Field{
+		key:        key,
+		fieldType:  float64Type,
+		numericVal: int64(math.Float64bits(val)),
+	}
+}
+
+// Error adds an error with the key "error" to a Span.LogFields() record
+func Error(err error) Field {
+	return Field{
+		key:          "error",
+		fieldType:    errorType,
+		interfaceVal: err,
+	}
+}
+
+// Object adds an object-valued key:value pair to a Span.LogFields() record
+func Object(key string, obj interface{}) Field {
+	return Field{
+		key:          key,
+		fieldType:    objectType,
+		interfaceVal: obj,
+	}
+}
+
+// LazyLogger allows for user-defined, late-bound logging of arbitrary data
+type LazyLogger func(fv Encoder)
+
+// Lazy adds a LazyLogger to a Span.LogFields() record; the tracing
+// implementation will call the LazyLogger function at an indefinite time in
+// the future (after Lazy() returns).
+func Lazy(ll LazyLogger) Field {
+	return Field{
+		fieldType:    lazyLoggerType,
+		interfaceVal: ll,
+	}
+}
+
+// Noop creates a no-op log field that should be ignored by the tracer.
+// It can be used to capture optional fields, for example those that should
+// only be logged in non-production environment:
+//
+//     func customerField(order *Order) log.Field {
+//          if os.Getenv("ENVIRONMENT") == "dev" {
+//              return log.String("customer", order.Customer.ID)
+//          }
+//          return log.Noop()
+//     }
+//
+//     span.LogFields(log.String("event", "purchase"), customerField(order))
+//
+func Noop() Field {
+	return Field{
+		fieldType: noopType,
+	}
+}
+
+// Encoder allows access to the contents of a Field (via a call to
+// Field.Marshal).
+//
+// Tracer implementations typically provide an implementation of Encoder;
+// OpenTracing callers typically do not need to concern themselves with it.
+type Encoder interface {
+	EmitString(key, value string)
+	EmitBool(key string, value bool)
+	EmitInt(key string, value int)
+	EmitInt32(key string, value int32)
+	EmitInt64(key string, value int64)
+	EmitUint32(key string, value uint32)
+	EmitUint64(key string, value uint64)
+	EmitFloat32(key string, value float32)
+	EmitFloat64(key string, value float64)
+	EmitObject(key string, value interface{})
+	EmitLazyLogger(value LazyLogger)
+}
+
+// Marshal passes a Field instance through to the appropriate
+// field-type-specific method of an Encoder.
+func (lf Field) Marshal(visitor Encoder) {
+	switch lf.fieldType {
+	case stringType:
+		visitor.EmitString(lf.key, lf.stringVal)
+	case boolType:
+		visitor.EmitBool(lf.key, lf.numericVal != 0)
+	case intType:
+		visitor.EmitInt(lf.key, int(lf.numericVal))
+	case int32Type:
+		visitor.EmitInt32(lf.key, int32(lf.numericVal))
+	case int64Type:
+		visitor.EmitInt64(lf.key, int64(lf.numericVal))
+	case uint32Type:
+		visitor.EmitUint32(lf.key, uint32(lf.numericVal))
+	case uint64Type:
+		visitor.EmitUint64(lf.key, uint64(lf.numericVal))
+	case float32Type:
+		visitor.EmitFloat32(lf.key, math.Float32frombits(uint32(lf.numericVal)))
+	case float64Type:
+		visitor.EmitFloat64(lf.key, math.Float64frombits(uint64(lf.numericVal)))
+	case errorType:
+		if err, ok := lf.interfaceVal.(error); ok {
+			visitor.EmitString(lf.key, err.Error())
+		} else {
+			visitor.EmitString(lf.key, "<nil>")
+		}
+	case objectType:
+		visitor.EmitObject(lf.key, lf.interfaceVal)
+	case lazyLoggerType:
+		visitor.EmitLazyLogger(lf.interfaceVal.(LazyLogger))
+	case noopType:
+		// intentionally left blank
+	}
+}
+
+// Key returns the field's key.
+func (lf Field) Key() string {
+	return lf.key
+}
+
+// Value returns the field's value as interface{}.
+func (lf Field) Value() interface{} {
+	switch lf.fieldType {
+	case stringType:
+		return lf.stringVal
+	case boolType:
+		return lf.numericVal != 0
+	case intType:
+		return int(lf.numericVal)
+	case int32Type:
+		return int32(lf.numericVal)
+	case int64Type:
+		return int64(lf.numericVal)
+	case uint32Type:
+		return uint32(lf.numericVal)
+	case uint64Type:
+		return uint64(lf.numericVal)
+	case float32Type:
+		return math.Float32frombits(uint32(lf.numericVal))
+	case float64Type:
+		return math.Float64frombits(uint64(lf.numericVal))
+	case errorType, objectType, lazyLoggerType:
+		return lf.interfaceVal
+	case noopType:
+		return nil
+	default:
+		return nil
+	}
+}
+
+// String returns a string representation of the key and value.
+func (lf Field) String() string {
+	return fmt.Sprint(lf.key, ":", lf.Value())
+}
diff --git a/vendor/github.com/opentracing/opentracing-go/log/util.go b/vendor/github.com/opentracing/opentracing-go/log/util.go
new file mode 100644
index 000000000..3832feb5c
--- /dev/null
+++ b/vendor/github.com/opentracing/opentracing-go/log/util.go
@@ -0,0 +1,54 @@
+package log
+
+import "fmt"
+
+// InterleavedKVToFields converts keyValues a la Span.LogKV() to a Field slice
+// a la Span.LogFields().
+func InterleavedKVToFields(keyValues ...interface{}) ([]Field, error) {
+	if len(keyValues)%2 != 0 {
+		return nil, fmt.Errorf("non-even keyValues len: %d", len(keyValues))
+	}
+	fields := make([]Field, len(keyValues)/2)
+	for i := 0; i*2 < len(keyValues); i++ {
+		key, ok := keyValues[i*2].(string)
+		if !ok {
+			return nil, fmt.Errorf(
+				"non-string key (pair #%d): %T",
+				i, keyValues[i*2])
+		}
+		switch typedVal := keyValues[i*2+1].(type) {
+		case bool:
+			fields[i] = Bool(key, typedVal)
+		case string:
+			fields[i] = String(key, typedVal)
+		case int:
+			fields[i] = Int(key, typedVal)
+		case int8:
+			fields[i] = Int32(key, int32(typedVal))
+		case int16:
+			fields[i] = Int32(key, int32(typedVal))
+		case int32:
+			fields[i] = Int32(key, typedVal)
+		case int64:
+			fields[i] = Int64(key, typedVal)
+		case uint:
+			fields[i] = Uint64(key, uint64(typedVal))
+		case uint64:
+			fields[i] = Uint64(key, typedVal)
+		case uint8:
+			fields[i] = Uint32(key, uint32(typedVal))
+		case uint16:
+			fields[i] = Uint32(key, uint32(typedVal))
+		case uint32:
+			fields[i] = Uint32(key, typedVal)
+		case float32:
+			fields[i] = Float32(key, typedVal)
+		case float64:
+			fields[i] = Float64(key, typedVal)
+		default:
+			// When in doubt, coerce to a string
+			fields[i] = String(key, fmt.Sprint(typedVal))
+		}
+	}
+	return fields, nil
+}
diff --git a/vendor/github.com/opentracing/opentracing-go/noop.go b/vendor/github.com/opentracing/opentracing-go/noop.go
new file mode 100644
index 000000000..0d32f692c
--- /dev/null
+++ b/vendor/github.com/opentracing/opentracing-go/noop.go
@@ -0,0 +1,64 @@
+package opentracing
+
+import "github.com/opentracing/opentracing-go/log"
+
+// A NoopTracer is a trivial, minimum overhead implementation of Tracer
+// for which all operations are no-ops.
+//
+// The primary use of this implementation is in libraries, such as RPC
+// frameworks, that make tracing an optional feature controlled by the
+// end user. A no-op implementation allows said libraries to use it
+// as the default Tracer and to write instrumentation that does
+// not need to keep checking if the tracer instance is nil.
+//
+// For the same reason, the NoopTracer is the default "global" tracer
+// (see GlobalTracer and SetGlobalTracer functions).
+//
+// WARNING: NoopTracer does not support baggage propagation.
+type NoopTracer struct{}
+
+type noopSpan struct{}
+type noopSpanContext struct{}
+
+var (
+	defaultNoopSpanContext = noopSpanContext{}
+	defaultNoopSpan        = noopSpan{}
+	defaultNoopTracer      = NoopTracer{}
+)
+
+const (
+	emptyString = ""
+)
+
+// noopSpanContext:
+func (n noopSpanContext) ForeachBaggageItem(handler func(k, v string) bool) {}
+
+// noopSpan:
+func (n noopSpan) Context() SpanContext                                  { return defaultNoopSpanContext }
+func (n noopSpan) SetBaggageItem(key, val string) Span                   { return defaultNoopSpan }
+func (n noopSpan) BaggageItem(key string) string                         { return emptyString }
+func (n noopSpan) SetTag(key string, value interface{}) Span             { return n }
+func (n noopSpan) LogFields(fields ...log.Field)                         {}
+func (n noopSpan) LogKV(keyVals ...interface{})                          {}
+func (n noopSpan) Finish()                                               {}
+func (n noopSpan) FinishWithOptions(opts FinishOptions)                  {}
+func (n noopSpan) SetOperationName(operationName string) Span            { return n }
+func (n noopSpan) Tracer() Tracer                                        { return defaultNoopTracer }
+func (n noopSpan) LogEvent(event string)                                 {}
+func (n noopSpan) LogEventWithPayload(event string, payload interface{}) {}
+func (n noopSpan) Log(data LogData)                                      {}
+
+// StartSpan belongs to the Tracer interface.
+func (n NoopTracer) StartSpan(operationName string, opts ...StartSpanOption) Span {
+	return defaultNoopSpan
+}
+
+// Inject belongs to the Tracer interface.
+func (n NoopTracer) Inject(sp SpanContext, format interface{}, carrier interface{}) error {
+	return nil
+}
+
+// Extract belongs to the Tracer interface.
+func (n NoopTracer) Extract(format interface{}, carrier interface{}) (SpanContext, error) {
+	return nil, ErrSpanContextNotFound
+}
diff --git a/vendor/github.com/opentracing/opentracing-go/propagation.go b/vendor/github.com/opentracing/opentracing-go/propagation.go
new file mode 100644
index 000000000..b0c275eb0
--- /dev/null
+++ b/vendor/github.com/opentracing/opentracing-go/propagation.go
@@ -0,0 +1,176 @@
+package opentracing
+
+import (
+	"errors"
+	"net/http"
+)
+
+///////////////////////////////////////////////////////////////////////////////
+// CORE PROPAGATION INTERFACES:
+///////////////////////////////////////////////////////////////////////////////
+
+var (
+	// ErrUnsupportedFormat occurs when the `format` passed to Tracer.Inject() or
+	// Tracer.Extract() is not recognized by the Tracer implementation.
+	ErrUnsupportedFormat = errors.New("opentracing: Unknown or unsupported Inject/Extract format")
+
+	// ErrSpanContextNotFound occurs when the `carrier` passed to
+	// Tracer.Extract() is valid and uncorrupted but has insufficient
+	// information to extract a SpanContext.
+	ErrSpanContextNotFound = errors.New("opentracing: SpanContext not found in Extract carrier")
+
+	// ErrInvalidSpanContext errors occur when Tracer.Inject() is asked to
+	// operate on a SpanContext which it is not prepared to handle (for
+	// example, since it was created by a different tracer implementation).
+	ErrInvalidSpanContext = errors.New("opentracing: SpanContext type incompatible with tracer")
+
+	// ErrInvalidCarrier errors occur when Tracer.Inject() or Tracer.Extract()
+	// implementations expect a different type of `carrier` than they are
+	// given.
+	ErrInvalidCarrier = errors.New("opentracing: Invalid Inject/Extract carrier")
+
+	// ErrSpanContextCorrupted occurs when the `carrier` passed to
+	// Tracer.Extract() is of the expected type but is corrupted.
+	ErrSpanContextCorrupted = errors.New("opentracing: SpanContext data corrupted in Extract carrier")
+)
+
+///////////////////////////////////////////////////////////////////////////////
+// BUILTIN PROPAGATION FORMATS:
+///////////////////////////////////////////////////////////////////////////////
+
+// BuiltinFormat is used to demarcate the values within package `opentracing`
+// that are intended for use with the Tracer.Inject() and Tracer.Extract()
+// methods.
+type BuiltinFormat byte
+
+const (
+	// Binary represents SpanContexts as opaque binary data.
+	//
+	// For Tracer.Inject(): the carrier must be an `io.Writer`.
+	//
+	// For Tracer.Extract(): the carrier must be an `io.Reader`.
+	Binary BuiltinFormat = iota
+
+	// TextMap represents SpanContexts as key:value string pairs.
+	//
+	// Unlike HTTPHeaders, the TextMap format does not restrict the key or
+	// value character sets in any way.
+	//
+	// For Tracer.Inject(): the carrier must be a `TextMapWriter`.
+	//
+	// For Tracer.Extract(): the carrier must be a `TextMapReader`.
+	TextMap
+
+	// HTTPHeaders represents SpanContexts as HTTP header string pairs.
+	//
+	// Unlike TextMap, the HTTPHeaders format requires that the keys and values
+	// be valid as HTTP headers as-is (i.e., character casing may be unstable
+	// and special characters are disallowed in keys, values should be
+	// URL-escaped, etc).
+	//
+	// For Tracer.Inject(): the carrier must be a `TextMapWriter`.
+	//
+	// For Tracer.Extract(): the carrier must be a `TextMapReader`.
+	//
+	// See HTTPHeadersCarrier for an implementation of both TextMapWriter
+	// and TextMapReader that defers to an http.Header instance for storage.
+	// For example, Inject():
+	//
+	//    carrier := opentracing.HTTPHeadersCarrier(httpReq.Header)
+	//    err := span.Tracer().Inject(
+	//        span.Context(), opentracing.HTTPHeaders, carrier)
+	//
+	// Or Extract():
+	//
+	//    carrier := opentracing.HTTPHeadersCarrier(httpReq.Header)
+	//    clientContext, err := tracer.Extract(
+	//        opentracing.HTTPHeaders, carrier)
+	//
+	HTTPHeaders
+)
+
+// TextMapWriter is the Inject() carrier for the TextMap builtin format. With
+// it, the caller can encode a SpanContext for propagation as entries in a map
+// of unicode strings.
+type TextMapWriter interface {
+	// Set a key:value pair to the carrier. Multiple calls to Set() for the
+	// same key leads to undefined behavior.
+	//
+	// NOTE: The backing store for the TextMapWriter may contain data unrelated
+	// to SpanContext. As such, Inject() and Extract() implementations that
+	// call the TextMapWriter and TextMapReader interfaces must agree on a
+	// prefix or other convention to distinguish their own key:value pairs.
+	Set(key, val string)
+}
+
+// TextMapReader is the Extract() carrier for the TextMap builtin format. With it,
+// the caller can decode a propagated SpanContext as entries in a map of
+// unicode strings.
+type TextMapReader interface {
+	// ForeachKey returns TextMap contents via repeated calls to the `handler`
+	// function. If any call to `handler` returns a non-nil error, ForeachKey
+	// terminates and returns that error.
+	//
+	// NOTE: The backing store for the TextMapReader may contain data unrelated
+	// to SpanContext. As such, Inject() and Extract() implementations that
+	// call the TextMapWriter and TextMapReader interfaces must agree on a
+	// prefix or other convention to distinguish their own key:value pairs.
+	//
+	// The "foreach" callback pattern reduces unnecessary copying in some cases
+	// and also allows implementations to hold locks while the map is read.
+	ForeachKey(handler func(key, val string) error) error
+}
+
+// TextMapCarrier allows the use of regular map[string]string
+// as both TextMapWriter and TextMapReader.
+type TextMapCarrier map[string]string
+
+// ForeachKey conforms to the TextMapReader interface.
+func (c TextMapCarrier) ForeachKey(handler func(key, val string) error) error {
+	for k, v := range c {
+		if err := handler(k, v); err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+// Set implements Set() of opentracing.TextMapWriter
+func (c TextMapCarrier) Set(key, val string) {
+	c[key] = val
+}
+
+// HTTPHeadersCarrier satisfies both TextMapWriter and TextMapReader.
+//
+// Example usage for server side:
+//
+//     carrier := opentracing.HTTPHeadersCarrier(httpReq.Header)
+//     clientContext, err := tracer.Extract(opentracing.HTTPHeaders, carrier)
+//
+// Example usage for client side:
+//
+//     carrier := opentracing.HTTPHeadersCarrier(httpReq.Header)
+//     err := tracer.Inject(
+//         span.Context(),
+//         opentracing.HTTPHeaders,
+//         carrier)
+//
+type HTTPHeadersCarrier http.Header
+
+// Set conforms to the TextMapWriter interface.
+func (c HTTPHeadersCarrier) Set(key, val string) {
+	h := http.Header(c)
+	h.Set(key, val)
+}
+
+// ForeachKey conforms to the TextMapReader interface.
+func (c HTTPHeadersCarrier) ForeachKey(handler func(key, val string) error) error {
+	for k, vals := range c {
+		for _, v := range vals {
+			if err := handler(k, v); err != nil {
+				return err
+			}
+		}
+	}
+	return nil
+}
diff --git a/vendor/github.com/opentracing/opentracing-go/span.go b/vendor/github.com/opentracing/opentracing-go/span.go
new file mode 100644
index 000000000..0d3fb5341
--- /dev/null
+++ b/vendor/github.com/opentracing/opentracing-go/span.go
@@ -0,0 +1,189 @@
+package opentracing
+
+import (
+	"time"
+
+	"github.com/opentracing/opentracing-go/log"
+)
+
+// SpanContext represents Span state that must propagate to descendant Spans and across process
+// boundaries (e.g., a <trace_id, span_id, sampled> tuple).
+type SpanContext interface {
+	// ForeachBaggageItem grants access to all baggage items stored in the
+	// SpanContext.
+	// The handler function will be called for each baggage key/value pair.
+	// The ordering of items is not guaranteed.
+	//
+	// The bool return value indicates if the handler wants to continue iterating
+	// through the rest of the baggage items; for example if the handler is trying to
+	// find some baggage item by pattern matching the name, it can return false
+	// as soon as the item is found to stop further iterations.
+	ForeachBaggageItem(handler func(k, v string) bool)
+}
+
+// Span represents an active, un-finished span in the OpenTracing system.
+//
+// Spans are created by the Tracer interface.
+type Span interface {
+	// Sets the end timestamp and finalizes Span state.
+	//
+	// With the exception of calls to Context() (which are always allowed),
+	// Finish() must be the last call made to any span instance, and to do
+	// otherwise leads to undefined behavior.
+	Finish()
+	// FinishWithOptions is like Finish() but with explicit control over
+	// timestamps and log data.
+	FinishWithOptions(opts FinishOptions)
+
+	// Context() yields the SpanContext for this Span. Note that the return
+	// value of Context() is still valid after a call to Span.Finish(), as is
+	// a call to Span.Context() after a call to Span.Finish().
+	Context() SpanContext
+
+	// Sets or changes the operation name.
+	//
+	// Returns a reference to this Span for chaining.
+	SetOperationName(operationName string) Span
+
+	// Adds a tag to the span.
+	//
+	// If there is a pre-existing tag set for `key`, it is overwritten.
+	//
+	// Tag values can be numeric types, strings, or bools. The behavior of
+	// other tag value types is undefined at the OpenTracing level. If a
+	// tracing system does not know how to handle a particular value type, it
+	// may ignore the tag, but shall not panic.
+	//
+	// Returns a reference to this Span for chaining.
+	SetTag(key string, value interface{}) Span
+
+	// LogFields is an efficient and type-checked way to record key:value
+	// logging data about a Span, though the programming interface is a little
+	// more verbose than LogKV(). Here's an example:
+	//
+	//    span.LogFields(
+	//        log.String("event", "soft error"),
+	//        log.String("type", "cache timeout"),
+	//        log.Int("waited.millis", 1500))
+	//
+	// Also see Span.FinishWithOptions() and FinishOptions.BulkLogData.
+	LogFields(fields ...log.Field)
+
+	// LogKV is a concise, readable way to record key:value logging data about
+	// a Span, though unfortunately this also makes it less efficient and less
+	// type-safe than LogFields(). Here's an example:
+	//
+	//    span.LogKV(
+	//        "event", "soft error",
+	//        "type", "cache timeout",
+	//        "waited.millis", 1500)
+	//
+	// For LogKV (as opposed to LogFields()), the parameters must appear as
+	// key-value pairs, like
+	//
+	//    span.LogKV(key1, val1, key2, val2, key3, val3, ...)
+	//
+	// The keys must all be strings. The values may be strings, numeric types,
+	// bools, Go error instances, or arbitrary structs.
+	//
+	// (Note to implementors: consider the log.InterleavedKVToFields() helper)
+	LogKV(alternatingKeyValues ...interface{})
+
+	// SetBaggageItem sets a key:value pair on this Span and its SpanContext
+	// that also propagates to descendants of this Span.
+	//
+	// SetBaggageItem() enables powerful functionality given a full-stack
+	// opentracing integration (e.g., arbitrary application data from a mobile
+	// app can make it, transparently, all the way into the depths of a storage
+	// system), and with it some powerful costs: use this feature with care.
+	//
+	// IMPORTANT NOTE #1: SetBaggageItem() will only propagate baggage items to
+	// *future* causal descendants of the associated Span.
+	//
+	// IMPORTANT NOTE #2: Use this thoughtfully and with care. Every key and
+	// value is copied into every local *and remote* child of the associated
+	// Span, and that can add up to a lot of network and cpu overhead.
+	//
+	// Returns a reference to this Span for chaining.
+	SetBaggageItem(restrictedKey, value string) Span
+
+	// Gets the value for a baggage item given its key. Returns the empty string
+	// if the value isn't found in this Span.
+	BaggageItem(restrictedKey string) string
+
+	// Provides access to the Tracer that created this Span.
+	Tracer() Tracer
+
+	// Deprecated: use LogFields or LogKV
+	LogEvent(event string)
+	// Deprecated: use LogFields or LogKV
+	LogEventWithPayload(event string, payload interface{})
+	// Deprecated: use LogFields or LogKV
+	Log(data LogData)
+}
+
+// LogRecord is data associated with a single Span log. Every LogRecord
+// instance must specify at least one Field.
+type LogRecord struct {
+	Timestamp time.Time
+	Fields    []log.Field
+}
+
+// FinishOptions allows Span.FinishWithOptions callers to override the finish
+// timestamp and provide log data via a bulk interface.
+type FinishOptions struct {
+	// FinishTime overrides the Span's finish time, or implicitly becomes
+	// time.Now() if FinishTime.IsZero().
+	//
+	// FinishTime must resolve to a timestamp that's >= the Span's StartTime
+	// (per StartSpanOptions).
+	FinishTime time.Time
+
+	// LogRecords allows the caller to specify the contents of many LogFields()
+	// calls with a single slice. May be nil.
+	//
+	// None of the LogRecord.Timestamp values may be .IsZero() (i.e., they must
+	// be set explicitly). Also, they must be >= the Span's start timestamp and
+	// <= the FinishTime (or time.Now() if FinishTime.IsZero()). Otherwise the
+	// behavior of FinishWithOptions() is undefined.
+	//
+	// If specified, the caller hands off ownership of LogRecords at
+	// FinishWithOptions() invocation time.
+	//
+	// If specified, the (deprecated) BulkLogData must be nil or empty.
+	LogRecords []LogRecord
+
+	// BulkLogData is DEPRECATED.
+	BulkLogData []LogData
+}
+
+// LogData is DEPRECATED
+type LogData struct {
+	Timestamp time.Time
+	Event     string
+	Payload   interface{}
+}
+
+// ToLogRecord converts a deprecated LogData to a non-deprecated LogRecord
+func (ld *LogData) ToLogRecord() LogRecord {
+	var literalTimestamp time.Time
+	if ld.Timestamp.IsZero() {
+		literalTimestamp = time.Now()
+	} else {
+		literalTimestamp = ld.Timestamp
+	}
+	rval := LogRecord{
+		Timestamp: literalTimestamp,
+	}
+	if ld.Payload == nil {
+		rval.Fields = []log.Field{
+			log.String("event", ld.Event),
+		}
+	} else {
+		rval.Fields = []log.Field{
+			log.String("event", ld.Event),
+			log.Object("payload", ld.Payload),
+		}
+	}
+	return rval
+}
diff --git a/vendor/github.com/opentracing/opentracing-go/tracer.go b/vendor/github.com/opentracing/opentracing-go/tracer.go
new file mode 100644
index 000000000..715f0cedf
--- /dev/null
+++ b/vendor/github.com/opentracing/opentracing-go/tracer.go
@@ -0,0 +1,304 @@
+package opentracing
+
+import "time"
+
+// Tracer is a simple, thin interface for Span creation and SpanContext
+// propagation.
+type Tracer interface {
+
+	// Create, start, and return a new Span with the given `operationName` and
+	// incorporate the given StartSpanOption `opts`. (Note that `opts` borrows
+	// from the "functional options" pattern, per
+	// http://dave.cheney.net/2014/10/17/functional-options-for-friendly-apis)
+	//
+	// A Span with no SpanReference options (e.g., opentracing.ChildOf() or
+	// opentracing.FollowsFrom()) becomes the root of its own trace.
+	//
+	// Examples:
+	//
+	//     var tracer opentracing.Tracer = ...
+	//
+	//     // The root-span case:
+	//     sp := tracer.StartSpan("GetFeed")
+	//
+	//     // The vanilla child span case:
+	//     sp := tracer.StartSpan(
+	//         "GetFeed",
+	//         opentracing.ChildOf(parentSpan.Context()))
+	//
+	//     // All the bells and whistles:
+	//     sp := tracer.StartSpan(
+	//         "GetFeed",
+	//         opentracing.ChildOf(parentSpan.Context()),
+	//         opentracing.Tag{"user_agent", loggedReq.UserAgent},
+	//         opentracing.StartTime(loggedReq.Timestamp),
+	//     )
+	//
+	StartSpan(operationName string, opts ...StartSpanOption) Span
+
+	// Inject() takes the `sm` SpanContext instance and injects it for
+	// propagation within `carrier`. The actual type of `carrier` depends on
+	// the value of `format`.
+	//
+	// OpenTracing defines a common set of `format` values (see BuiltinFormat),
+	// and each has an expected carrier type.
+	//
+	// Other packages may declare their own `format` values, much like the keys
+	// used by `context.Context` (see https://godoc.org/context#WithValue).
+	//
+	// Example usage (sans error handling):
+	//
+	//     carrier := opentracing.HTTPHeadersCarrier(httpReq.Header)
+	//     err := tracer.Inject(
+	//         span.Context(),
+	//         opentracing.HTTPHeaders,
+	//         carrier)
+	//
+	// NOTE: All opentracing.Tracer implementations MUST support all
+	// BuiltinFormats.
+	//
+	// Implementations may return opentracing.ErrUnsupportedFormat if `format`
+	// is not supported by (or not known by) the implementation.
+	//
+	// Implementations may return opentracing.ErrInvalidCarrier or any other
+	// implementation-specific error if the format is supported but injection
+	// fails anyway.
+	//
+	// See Tracer.Extract().
+	Inject(sm SpanContext, format interface{}, carrier interface{}) error
+
+	// Extract() returns a SpanContext instance given `format` and `carrier`.
+	//
+	// OpenTracing defines a common set of `format` values (see BuiltinFormat),
+	// and each has an expected carrier type.
+	//
+	// Other packages may declare their own `format` values, much like the keys
+	// used by `context.Context` (see
+	// https://godoc.org/golang.org/x/net/context#WithValue).
+	//
+	// Example usage (with StartSpan):
+	//
+	//
+	//     carrier := opentracing.HTTPHeadersCarrier(httpReq.Header)
+	//     clientContext, err := tracer.Extract(opentracing.HTTPHeaders, carrier)
+	//
+	//     // ... assuming the ultimate goal here is to resume the trace with a
+	//     // server-side Span:
+	//     var serverSpan opentracing.Span
+	//     if err == nil {
+	//         span = tracer.StartSpan(
+	//             rpcMethodName, ext.RPCServerOption(clientContext))
+	//     } else {
+	//         span = tracer.StartSpan(rpcMethodName)
+	//     }
+	//
+	//
+	// NOTE: All opentracing.Tracer implementations MUST support all
+	// BuiltinFormats.
+	//
+	// Return values:
+	//  - A successful Extract returns a SpanContext instance and a nil error
+	//  - If there was simply no SpanContext to extract in `carrier`, Extract()
+	//    returns (nil, opentracing.ErrSpanContextNotFound)
+	//  - If `format` is unsupported or unrecognized, Extract() returns (nil,
+	//    opentracing.ErrUnsupportedFormat)
+	//  - If there are more fundamental problems with the `carrier` object,
+	//    Extract() may return opentracing.ErrInvalidCarrier,
+	//    opentracing.ErrSpanContextCorrupted, or implementation-specific
+	//    errors.
+	//
+	// See Tracer.Inject().
+	Extract(format interface{}, carrier interface{}) (SpanContext, error)
+}
+
+// StartSpanOptions allows Tracer.StartSpan() callers and implementors a
+// mechanism to override the start timestamp, specify Span References, and make
+// a single Tag or multiple Tags available at Span start time.
+//
+// StartSpan() callers should look at the StartSpanOption interface and
+// implementations available in this package.
+//
+// Tracer implementations can convert a slice of `StartSpanOption` instances
+// into a `StartSpanOptions` struct like so:
+//
+//     func StartSpan(opName string, opts ...opentracing.StartSpanOption) {
+//         sso := opentracing.StartSpanOptions{}
+//         for _, o := range opts {
+//             o.Apply(&sso)
+//         }
+//         ...
+//     }
+//
+type StartSpanOptions struct {
+	// Zero or more causal references to other Spans (via their SpanContext).
+	// If empty, start a "root" Span (i.e., start a new trace).
+	References []SpanReference
+
+	// StartTime overrides the Span's start time, or implicitly becomes
+	// time.Now() if StartTime.IsZero().
+	StartTime time.Time
+
+	// Tags may have zero or more entries; the restrictions on map values are
+	// identical to those for Span.SetTag(). May be nil.
+	//
+	// If specified, the caller hands off ownership of Tags at
+	// StartSpan() invocation time.
+	Tags map[string]interface{}
+}
+
+// StartSpanOption instances (zero or more) may be passed to Tracer.StartSpan.
+//
+// StartSpanOption borrows from the "functional options" pattern, per
+// http://dave.cheney.net/2014/10/17/functional-options-for-friendly-apis
+type StartSpanOption interface {
+	Apply(*StartSpanOptions)
+}
+
+// SpanReferenceType is an enum type describing different categories of
+// relationships between two Spans. If Span-2 refers to Span-1, the
+// SpanReferenceType describes Span-1 from Span-2's perspective. For example,
+// ChildOfRef means that Span-1 created Span-2.
+//
+// NOTE: Span-1 and Span-2 do *not* necessarily depend on each other for
+// completion; e.g., Span-2 may be part of a background job enqueued by Span-1,
+// or Span-2 may be sitting in a distributed queue behind Span-1.
+type SpanReferenceType int
+
+const (
+	// ChildOfRef refers to a parent Span that caused *and* somehow depends
+	// upon the new child Span. Often (but not always), the parent Span cannot
+	// finish until the child Span does.
+	//
+	// An timing diagram for a ChildOfRef that's blocked on the new Span:
+	//
+	//     [-Parent Span---------]
+	//          [-Child Span----]
+	//
+	// See http://opentracing.io/spec/
+	//
+	// See opentracing.ChildOf()
+	ChildOfRef SpanReferenceType = iota
+
+	// FollowsFromRef refers to a parent Span that does not depend in any way
+	// on the result of the new child Span. For instance, one might use
+	// FollowsFromRefs to describe pipeline stages separated by queues,
+	// or a fire-and-forget cache insert at the tail end of a web request.
+	//
+	// A FollowsFromRef Span is part of the same logical trace as the new Span:
+	// i.e., the new Span is somehow caused by the work of its FollowsFromRef.
+	//
+	// All of the following could be valid timing diagrams for children that
+	// "FollowFrom" a parent.
+	//
+	//     [-Parent Span-]  [-Child Span-]
+	//
+	//
+	//     [-Parent Span--]
+	//      [-Child Span-]
+	//
+	//
+	//     [-Parent Span-]
+	//                 [-Child Span-]
+	//
+	// See http://opentracing.io/spec/
+	//
+	// See opentracing.FollowsFrom()
+	FollowsFromRef
+)
+
+// SpanReference is a StartSpanOption that pairs a SpanReferenceType and a
+// referenced SpanContext. See the SpanReferenceType documentation for
+// supported relationships.  If SpanReference is created with
+// ReferencedContext==nil, it has no effect. Thus it allows for a more concise
+// syntax for starting spans:
+//
+//     sc, _ := tracer.Extract(someFormat, someCarrier)
+//     span := tracer.StartSpan("operation", opentracing.ChildOf(sc))
+//
+// The `ChildOf(sc)` option above will not panic if sc == nil, it will just
+// not add the parent span reference to the options.
+type SpanReference struct {
+	Type              SpanReferenceType
+	ReferencedContext SpanContext
+}
+
+// Apply satisfies the StartSpanOption interface.
+func (r SpanReference) Apply(o *StartSpanOptions) {
+	if r.ReferencedContext != nil {
+		o.References = append(o.References, r)
+	}
+}
+
+// ChildOf returns a StartSpanOption pointing to a dependent parent span.
+// If sc == nil, the option has no effect.
+//
+// See ChildOfRef, SpanReference
+func ChildOf(sc SpanContext) SpanReference {
+	return SpanReference{
+		Type:              ChildOfRef,
+		ReferencedContext: sc,
+	}
+}
+
+// FollowsFrom returns a StartSpanOption pointing to a parent Span that caused
+// the child Span but does not directly depend on its result in any way.
+// If sc == nil, the option has no effect.
+//
+// See FollowsFromRef, SpanReference
+func FollowsFrom(sc SpanContext) SpanReference {
+	return SpanReference{
+		Type:              FollowsFromRef,
+		ReferencedContext: sc,
+	}
+}
+
+// StartTime is a StartSpanOption that sets an explicit start timestamp for the
+// new Span.
+type StartTime time.Time
+
+// Apply satisfies the StartSpanOption interface.
+func (t StartTime) Apply(o *StartSpanOptions) {
+	o.StartTime = time.Time(t)
+}
+
+// Tags are a generic map from an arbitrary string key to an opaque value type.
+// The underlying tracing system is responsible for interpreting and
+// serializing the values.
+type Tags map[string]interface{}
+
+// Apply satisfies the StartSpanOption interface.
+func (t Tags) Apply(o *StartSpanOptions) {
+	if o.Tags == nil {
+		o.Tags = make(map[string]interface{})
+	}
+	for k, v := range t {
+		o.Tags[k] = v
+	}
+}
+
+// Tag may be passed as a StartSpanOption to add a tag to new spans,
+// or its Set method may be used to apply the tag to an existing Span,
+// for example:
+//
+// tracer.StartSpan("opName", Tag{"Key", value})
+//
+//   or
+//
+// Tag{"key", value}.Set(span)
+type Tag struct {
+	Key   string
+	Value interface{}
+}
+
+// Apply satisfies the StartSpanOption interface.
+func (t Tag) Apply(o *StartSpanOptions) {
+	if o.Tags == nil {
+		o.Tags = make(map[string]interface{})
+	}
+	o.Tags[t.Key] = t.Value
+}
+
+// Set applies the tag to an existing Span.
+func (t Tag) Set(s Span) {
+	s.SetTag(t.Key, t.Value)
+}
diff --git a/vendor/github.com/prometheus/client_golang/api/client.go b/vendor/github.com/prometheus/client_golang/api/client.go
index db78ce230..2e6a5518e 100644
--- a/vendor/github.com/prometheus/client_golang/api/client.go
+++ b/vendor/github.com/prometheus/client_golang/api/client.go
@@ -25,6 +25,42 @@ import (
 	"time"
 )
 
+func NewErrorAPI(err error, warnings []string) Error {
+	if err == nil && warnings == nil {
+		return nil
+	}
+	return &ErrorAPI{err, warnings}
+}
+
+type ErrorAPI struct {
+	err      error
+	warnings []string
+}
+
+func (w *ErrorAPI) Err() error {
+	return w.err
+}
+
+func (w *ErrorAPI) Error() string {
+	if w.err != nil {
+		return w.err.Error()
+	}
+	return "Warnings: " + strings.Join(w.warnings, " , ")
+}
+
+func (w *ErrorAPI) Warnings() []string {
+	return w.warnings
+}
+
+// Error encapsulates an error + warning
+type Error interface {
+	error
+	// Err returns the underlying error.
+	Err() error
+	// Warnings returns a list of warnings.
+	Warnings() []string
+}
+
 // DefaultRoundTripper is used if no RoundTripper is set in Config.
 var DefaultRoundTripper http.RoundTripper = &http.Transport{
 	Proxy: http.ProxyFromEnvironment,
@@ -55,14 +91,14 @@ func (cfg *Config) roundTripper() http.RoundTripper {
 // Client is the interface for an API client.
 type Client interface {
 	URL(ep string, args map[string]string) *url.URL
-	Do(context.Context, *http.Request) (*http.Response, []byte, error)
+	Do(context.Context, *http.Request) (*http.Response, []byte, Error)
 }
 
 // DoGetFallback will attempt to do the request as-is, and on a 405 it will fallback to a GET request.
-func DoGetFallback(c Client, ctx context.Context, u *url.URL, args url.Values) (*http.Response, []byte, error) {
+func DoGetFallback(c Client, ctx context.Context, u *url.URL, args url.Values) (*http.Response, []byte, Error) {
 	req, err := http.NewRequest(http.MethodPost, u.String(), strings.NewReader(args.Encode()))
 	if err != nil {
-		return nil, nil, err
+		return nil, nil, NewErrorAPI(err, nil)
 	}
 	req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
 
@@ -71,11 +107,14 @@ func DoGetFallback(c Client, ctx context.Context, u *url.URL, args url.Values) (
 		u.RawQuery = args.Encode()
 		req, err = http.NewRequest(http.MethodGet, u.String(), nil)
 		if err != nil {
-			return nil, nil, err
+			return nil, nil, NewErrorAPI(err, nil)
 		}
 
 	} else {
-		return resp, body, err
+		if err != nil {
+			return resp, body, NewErrorAPI(err, nil)
+		}
+		return resp, body, nil
 	}
 	return c.Do(ctx, req)
 }
@@ -115,7 +154,7 @@ func (c *httpClient) URL(ep string, args map[string]string) *url.URL {
 	return &u
 }
 
-func (c *httpClient) Do(ctx context.Context, req *http.Request) (*http.Response, []byte, error) {
+func (c *httpClient) Do(ctx context.Context, req *http.Request) (*http.Response, []byte, Error) {
 	if ctx != nil {
 		req = req.WithContext(ctx)
 	}
@@ -127,7 +166,7 @@ func (c *httpClient) Do(ctx context.Context, req *http.Request) (*http.Response,
 	}()
 
 	if err != nil {
-		return nil, nil, err
+		return nil, nil, NewErrorAPI(err, nil)
 	}
 
 	var body []byte
@@ -147,5 +186,5 @@ func (c *httpClient) Do(ctx context.Context, req *http.Request) (*http.Response,
 	case <-done:
 	}
 
-	return resp, body, err
+	return resp, body, NewErrorAPI(err, nil)
 }
diff --git a/vendor/github.com/prometheus/client_golang/api/prometheus/v1/api.go b/vendor/github.com/prometheus/client_golang/api/prometheus/v1/api.go
index 8394c97af..28cdaef69 100644
--- a/vendor/github.com/prometheus/client_golang/api/prometheus/v1/api.go
+++ b/vendor/github.com/prometheus/client_golang/api/prometheus/v1/api.go
@@ -17,17 +17,105 @@ package v1
 
 import (
 	"context"
-	"encoding/json"
 	"errors"
 	"fmt"
+	"math"
 	"net/http"
 	"strconv"
+	"strings"
 	"time"
+	"unsafe"
+
+	json "github.com/json-iterator/go"
+
+	"github.com/prometheus/common/model"
 
 	"github.com/prometheus/client_golang/api"
-	"github.com/prometheus/common/model"
 )
 
+func init() {
+	json.RegisterTypeEncoderFunc("model.SamplePair", marshalPointJSON, marshalPointJSONIsEmpty)
+	json.RegisterTypeDecoderFunc("model.SamplePair", unMarshalPointJSON)
+}
+
+func unMarshalPointJSON(ptr unsafe.Pointer, iter *json.Iterator) {
+	p := (*model.SamplePair)(ptr)
+	if !iter.ReadArray() {
+		iter.ReportError("unmarshal model.SamplePair", "SamplePair must be [timestamp, value]")
+		return
+	}
+	t := iter.ReadNumber()
+	if err := p.Timestamp.UnmarshalJSON([]byte(t)); err != nil {
+		iter.ReportError("unmarshal model.SamplePair", err.Error())
+		return
+	}
+	if !iter.ReadArray() {
+		iter.ReportError("unmarshal model.SamplePair", "SamplePair missing value")
+		return
+	}
+
+	f, err := strconv.ParseFloat(iter.ReadString(), 64)
+	if err != nil {
+		iter.ReportError("unmarshal model.SamplePair", err.Error())
+		return
+	}
+	p.Value = model.SampleValue(f)
+
+	if iter.ReadArray() {
+		iter.ReportError("unmarshal model.SamplePair", "SamplePair has too many values, must be [timestamp, value]")
+		return
+	}
+}
+
+func marshalPointJSON(ptr unsafe.Pointer, stream *json.Stream) {
+	p := *((*model.SamplePair)(ptr))
+	stream.WriteArrayStart()
+	// Write out the timestamp as a float divided by 1000.
+	// This is ~3x faster than converting to a float.
+	t := int64(p.Timestamp)
+	if t < 0 {
+		stream.WriteRaw(`-`)
+		t = -t
+	}
+	stream.WriteInt64(t / 1000)
+	fraction := t % 1000
+	if fraction != 0 {
+		stream.WriteRaw(`.`)
+		if fraction < 100 {
+			stream.WriteRaw(`0`)
+		}
+		if fraction < 10 {
+			stream.WriteRaw(`0`)
+		}
+		stream.WriteInt64(fraction)
+	}
+	stream.WriteMore()
+	stream.WriteRaw(`"`)
+
+	// Taken from https://github.com/json-iterator/go/blob/master/stream_float.go#L71 as a workaround
+	// to https://github.com/json-iterator/go/issues/365 (jsoniter, to follow json standard, doesn't allow inf/nan)
+	buf := stream.Buffer()
+	abs := math.Abs(float64(p.Value))
+	fmt := byte('f')
+	// Note: Must use float32 comparisons for underlying float32 value to get precise cutoffs right.
+	if abs != 0 {
+		if abs < 1e-6 || abs >= 1e21 {
+			fmt = 'e'
+			fmt = 'e'
+		}
+	}
+	buf = strconv.AppendFloat(buf, float64(p.Value), fmt, -1, 64)
+	stream.SetBuffer(buf)
+
+	stream.WriteRaw(`"`)
+	stream.WriteArrayEnd()
+
+}
+
+func marshalPointJSONIsEmpty(ptr unsafe.Pointer) bool {
+	return false
+}
+
 const (
 	statusAPIError = 422
 
@@ -40,6 +128,7 @@ const (
 	epLabelValues     = apiPrefix + "/label/:name/values"
 	epSeries          = apiPrefix + "/series"
 	epTargets         = apiPrefix + "/targets"
+	epTargetsMetadata = apiPrefix + "/targets/metadata"
 	epRules           = apiPrefix + "/rules"
 	epSnapshot        = apiPrefix + "/admin/tsdb/snapshot"
 	epDeleteSeries    = apiPrefix + "/admin/tsdb/delete_series"
@@ -63,6 +152,9 @@ type RuleType string
 // RuleHealth models the health status of a rule.
 type RuleHealth string
 
+// MetricType models the type of a metric.
+type MetricType string
+
 const (
 	// Possible values for AlertState.
 	AlertStateFiring   AlertState = "firing"
@@ -91,17 +183,40 @@ const (
 	RuleHealthGood    = "ok"
 	RuleHealthUnknown = "unknown"
 	RuleHealthBad     = "err"
+
+	// Possible values for MetricType
+	MetricTypeCounter        MetricType = "counter"
+	MetricTypeGauge          MetricType = "gauge"
+	MetricTypeHistogram      MetricType = "histogram"
+	MetricTypeGaugeHistogram MetricType = "gaugehistogram"
+	MetricTypeSummary        MetricType = "summary"
+	MetricTypeInfo           MetricType = "info"
+	MetricTypeStateset       MetricType = "stateset"
+	MetricTypeUnknown        MetricType = "unknown"
 )
 
 // Error is an error returned by the API.
 type Error struct {
-	Type   ErrorType
-	Msg    string
-	Detail string
+	Type     ErrorType
+	Msg      string
+	Detail   string
+	warnings []string
 }
 
 func (e *Error) Error() string {
-	return fmt.Sprintf("%s: %s", e.Type, e.Msg)
+	if e.Type != "" || e.Msg != "" {
+		return fmt.Sprintf("%s: %s", e.Type, e.Msg)
+	}
+
+	return "Warnings: " + strings.Join(e.warnings, " , ")
+}
+
+func (w *Error) Err() error {
+	return w
+}
+
+func (w *Error) Warnings() []string {
+	return w.warnings
 }
 
 // Range represents a sliced time range.
@@ -115,32 +230,34 @@ type Range struct {
 // API provides bindings for Prometheus's v1 API.
 type API interface {
 	// Alerts returns a list of all active alerts.
-	Alerts(ctx context.Context) (AlertsResult, error)
+	Alerts(ctx context.Context) (AlertsResult, api.Error)
 	// AlertManagers returns an overview of the current state of the Prometheus alert manager discovery.
-	AlertManagers(ctx context.Context) (AlertManagersResult, error)
+	AlertManagers(ctx context.Context) (AlertManagersResult, api.Error)
 	// CleanTombstones removes the deleted data from disk and cleans up the existing tombstones.
-	CleanTombstones(ctx context.Context) error
+	CleanTombstones(ctx context.Context) api.Error
 	// Config returns the current Prometheus configuration.
-	Config(ctx context.Context) (ConfigResult, error)
+	Config(ctx context.Context) (ConfigResult, api.Error)
 	// DeleteSeries deletes data for a selection of series in a time range.
-	DeleteSeries(ctx context.Context, matches []string, startTime time.Time, endTime time.Time) error
+	DeleteSeries(ctx context.Context, matches []string, startTime time.Time, endTime time.Time) api.Error
 	// Flags returns the flag values that Prometheus was launched with.
-	Flags(ctx context.Context) (FlagsResult, error)
+	Flags(ctx context.Context) (FlagsResult, api.Error)
 	// LabelValues performs a query for the values of the given label.
-	LabelValues(ctx context.Context, label string) (model.LabelValues, error)
+	LabelValues(ctx context.Context, label string) (model.LabelValues, api.Error)
 	// Query performs a query for the given time.
-	Query(ctx context.Context, query string, ts time.Time) (model.Value, error)
+	Query(ctx context.Context, query string, ts time.Time) (model.Value, api.Error)
 	// QueryRange performs a query for the given range.
-	QueryRange(ctx context.Context, query string, r Range) (model.Value, error)
+	QueryRange(ctx context.Context, query string, r Range) (model.Value, api.Error)
 	// Series finds series by label matchers.
-	Series(ctx context.Context, matches []string, startTime time.Time, endTime time.Time) ([]model.LabelSet, error)
+	Series(ctx context.Context, matches []string, startTime time.Time, endTime time.Time) ([]model.LabelSet, api.Error)
 	// Snapshot creates a snapshot of all current data into snapshots/<datetime>-<rand>
 	// under the TSDB's data directory and returns the directory as response.
-	Snapshot(ctx context.Context, skipHead bool) (SnapshotResult, error)
+	Snapshot(ctx context.Context, skipHead bool) (SnapshotResult, api.Error)
 	// Rules returns a list of alerting and recording rules that are currently loaded.
-	Rules(ctx context.Context) (RulesResult, error)
+	Rules(ctx context.Context) (RulesResult, api.Error)
 	// Targets returns an overview of the current state of the Prometheus target discovery.
-	Targets(ctx context.Context) (TargetsResult, error)
+	Targets(ctx context.Context) (TargetsResult, api.Error)
+	// TargetsMetadata returns metadata about metrics currently scraped by the target.
+	TargetsMetadata(ctx context.Context, matchTarget string, metric string, limit string) ([]MetricMetadata, api.Error)
 }
 
 // AlertsResult contains the result from querying the alerts endpoint.
@@ -226,7 +343,7 @@ type Alert struct {
 	Annotations model.LabelSet
 	Labels      model.LabelSet
 	State       AlertState
-	Value       float64
+	Value       string
 }
 
 // TargetsResult contains the result from querying the targets endpoint.
@@ -250,6 +367,15 @@ type DroppedTarget struct {
 	DiscoveredLabels map[string]string `json:"discoveredLabels"`
 }
 
+// MetricMetadata models the metadata of a metric.
+type MetricMetadata struct {
+	Target map[string]string `json:"target"`
+	Metric string            `json:"metric,omitempty"`
+	Type   MetricType        `json:"type"`
+	Help   string            `json:"help"`
+	Unit   string            `json:"unit"`
+}
+
 // queryResult contains result data for a query.
 type queryResult struct {
 	Type   model.ValueType `json:"resultType"`
@@ -408,73 +534,73 @@ type httpAPI struct {
 	client api.Client
 }
 
-func (h *httpAPI) Alerts(ctx context.Context) (AlertsResult, error) {
+func (h *httpAPI) Alerts(ctx context.Context) (AlertsResult, api.Error) {
 	u := h.client.URL(epAlerts, nil)
 
 	req, err := http.NewRequest(http.MethodGet, u.String(), nil)
 	if err != nil {
-		return AlertsResult{}, err
+		return AlertsResult{}, api.NewErrorAPI(err, nil)
 	}
 
-	_, body, err := h.client.Do(ctx, req)
-	if err != nil {
-		return AlertsResult{}, err
+	_, body, apiErr := h.client.Do(ctx, req)
+	if apiErr != nil {
+		return AlertsResult{}, apiErr
 	}
 
 	var res AlertsResult
 	err = json.Unmarshal(body, &res)
-	return res, err
+	return res, api.NewErrorAPI(err, nil)
 }
 
-func (h *httpAPI) AlertManagers(ctx context.Context) (AlertManagersResult, error) {
+func (h *httpAPI) AlertManagers(ctx context.Context) (AlertManagersResult, api.Error) {
 	u := h.client.URL(epAlertManagers, nil)
 
 	req, err := http.NewRequest(http.MethodGet, u.String(), nil)
 	if err != nil {
-		return AlertManagersResult{}, err
+		return AlertManagersResult{}, api.NewErrorAPI(err, nil)
 	}
 
-	_, body, err := h.client.Do(ctx, req)
-	if err != nil {
-		return AlertManagersResult{}, err
+	_, body, apiErr := h.client.Do(ctx, req)
+	if apiErr != nil {
+		return AlertManagersResult{}, apiErr
 	}
 
 	var res AlertManagersResult
 	err = json.Unmarshal(body, &res)
-	return res, err
+	return res, api.NewErrorAPI(err, nil)
 }
 
-func (h *httpAPI) CleanTombstones(ctx context.Context) error {
+func (h *httpAPI) CleanTombstones(ctx context.Context) api.Error {
 	u := h.client.URL(epCleanTombstones, nil)
 
 	req, err := http.NewRequest(http.MethodPost, u.String(), nil)
 	if err != nil {
-		return err
+		return api.NewErrorAPI(err, nil)
 	}
 
-	_, _, err = h.client.Do(ctx, req)
-	return err
+	_, _, apiErr := h.client.Do(ctx, req)
+	return apiErr
 }
 
-func (h *httpAPI) Config(ctx context.Context) (ConfigResult, error) {
+func (h *httpAPI) Config(ctx context.Context) (ConfigResult, api.Error) {
 	u := h.client.URL(epConfig, nil)
 
 	req, err := http.NewRequest(http.MethodGet, u.String(), nil)
 	if err != nil {
-		return ConfigResult{}, err
+		return ConfigResult{}, api.NewErrorAPI(err, nil)
 	}
 
-	_, body, err := h.client.Do(ctx, req)
-	if err != nil {
-		return ConfigResult{}, err
+	_, body, apiErr := h.client.Do(ctx, req)
+	if apiErr != nil {
+		return ConfigResult{}, apiErr
 	}
 
 	var res ConfigResult
 	err = json.Unmarshal(body, &res)
-	return res, err
+	return res, api.NewErrorAPI(err, nil)
 }
 
-func (h *httpAPI) DeleteSeries(ctx context.Context, matches []string, startTime time.Time, endTime time.Time) error {
+func (h *httpAPI) DeleteSeries(ctx context.Context, matches []string, startTime time.Time, endTime time.Time) api.Error {
 	u := h.client.URL(epDeleteSeries, nil)
 	q := u.Query()
 
@@ -489,47 +615,47 @@ func (h *httpAPI) DeleteSeries(ctx context.Context, matches []string, startTime
 
 	req, err := http.NewRequest(http.MethodPost, u.String(), nil)
 	if err != nil {
-		return err
+		return api.NewErrorAPI(err, nil)
 	}
 
-	_, _, err = h.client.Do(ctx, req)
-	return err
+	_, _, apiErr := h.client.Do(ctx, req)
+	return apiErr
 }
 
-func (h *httpAPI) Flags(ctx context.Context) (FlagsResult, error) {
+func (h *httpAPI) Flags(ctx context.Context) (FlagsResult, api.Error) {
 	u := h.client.URL(epFlags, nil)
 
 	req, err := http.NewRequest(http.MethodGet, u.String(), nil)
 	if err != nil {
-		return FlagsResult{}, err
+		return FlagsResult{}, api.NewErrorAPI(err, nil)
 	}
 
-	_, body, err := h.client.Do(ctx, req)
-	if err != nil {
-		return FlagsResult{}, err
+	_, body, apiErr := h.client.Do(ctx, req)
+	if apiErr != nil {
+		return FlagsResult{}, apiErr
 	}
 
 	var res FlagsResult
 	err = json.Unmarshal(body, &res)
-	return res, err
+	return res, api.NewErrorAPI(err, nil)
 }
 
-func (h *httpAPI) LabelValues(ctx context.Context, label string) (model.LabelValues, error) {
+func (h *httpAPI) LabelValues(ctx context.Context, label string) (model.LabelValues, api.Error) {
 	u := h.client.URL(epLabelValues, map[string]string{"name": label})
 	req, err := http.NewRequest(http.MethodGet, u.String(), nil)
 	if err != nil {
-		return nil, err
+		return nil, api.NewErrorAPI(err, nil)
 	}
-	_, body, err := h.client.Do(ctx, req)
-	if err != nil {
-		return nil, err
+	_, body, apiErr := h.client.Do(ctx, req)
+	if apiErr != nil {
+		return nil, apiErr
 	}
 	var labelValues model.LabelValues
 	err = json.Unmarshal(body, &labelValues)
-	return labelValues, err
+	return labelValues, api.NewErrorAPI(err, nil)
 }
 
-func (h *httpAPI) Query(ctx context.Context, query string, ts time.Time) (model.Value, error) {
+func (h *httpAPI) Query(ctx context.Context, query string, ts time.Time) (model.Value, api.Error) {
 	u := h.client.URL(epQuery, nil)
 	q := u.Query()
 
@@ -538,18 +664,16 @@ func (h *httpAPI) Query(ctx context.Context, query string, ts time.Time) (model.
 		q.Set("time", ts.Format(time.RFC3339Nano))
 	}
 
-	_, body, err := api.DoGetFallback(h.client, ctx, u, q)
-	if err != nil {
-		return nil, err
+	_, body, apiErr := api.DoGetFallback(h.client, ctx, u, q)
+	if apiErr != nil {
+		return nil, apiErr
 	}
 
 	var qres queryResult
-	err = json.Unmarshal(body, &qres)
-
-	return model.Value(qres.v), err
+	return model.Value(qres.v), api.NewErrorAPI(json.Unmarshal(body, &qres), nil)
 }
 
-func (h *httpAPI) QueryRange(ctx context.Context, query string, r Range) (model.Value, error) {
+func (h *httpAPI) QueryRange(ctx context.Context, query string, r Range) (model.Value, api.Error) {
 	u := h.client.URL(epQueryRange, nil)
 	q := u.Query()
 
@@ -564,18 +688,17 @@ func (h *httpAPI) QueryRange(ctx context.Context, query string, r Range) (model.
 	q.Set("end", end)
 	q.Set("step", step)
 
-	_, body, err := api.DoGetFallback(h.client, ctx, u, q)
-	if err != nil {
-		return nil, err
+	_, body, apiErr := api.DoGetFallback(h.client, ctx, u, q)
+	if apiErr != nil {
+		return nil, apiErr
 	}
 
 	var qres queryResult
-	err = json.Unmarshal(body, &qres)
 
-	return model.Value(qres.v), err
+	return model.Value(qres.v), api.NewErrorAPI(json.Unmarshal(body, &qres), nil)
 }
 
-func (h *httpAPI) Series(ctx context.Context, matches []string, startTime time.Time, endTime time.Time) ([]model.LabelSet, error) {
+func (h *httpAPI) Series(ctx context.Context, matches []string, startTime time.Time, endTime time.Time) ([]model.LabelSet, api.Error) {
 	u := h.client.URL(epSeries, nil)
 	q := u.Query()
 
@@ -590,20 +713,20 @@ func (h *httpAPI) Series(ctx context.Context, matches []string, startTime time.T
 
 	req, err := http.NewRequest(http.MethodGet, u.String(), nil)
 	if err != nil {
-		return nil, err
+		return nil, api.NewErrorAPI(err, nil)
 	}
 
-	_, body, err := h.client.Do(ctx, req)
-	if err != nil {
-		return nil, err
+	_, body, apiErr := h.client.Do(ctx, req)
+	if apiErr != nil {
+		return nil, apiErr
 	}
 
 	var mset []model.LabelSet
 	err = json.Unmarshal(body, &mset)
-	return mset, err
+	return mset, api.NewErrorAPI(err, nil)
 }
 
-func (h *httpAPI) Snapshot(ctx context.Context, skipHead bool) (SnapshotResult, error) {
+func (h *httpAPI) Snapshot(ctx context.Context, skipHead bool) (SnapshotResult, api.Error) {
 	u := h.client.URL(epSnapshot, nil)
 	q := u.Query()
 
@@ -613,53 +736,78 @@ func (h *httpAPI) Snapshot(ctx context.Context, skipHead bool) (SnapshotResult,
 
 	req, err := http.NewRequest(http.MethodPost, u.String(), nil)
 	if err != nil {
-		return SnapshotResult{}, err
+		return SnapshotResult{}, api.NewErrorAPI(err, nil)
 	}
 
-	_, body, err := h.client.Do(ctx, req)
-	if err != nil {
-		return SnapshotResult{}, err
+	_, body, apiErr := h.client.Do(ctx, req)
+	if apiErr != nil {
+		return SnapshotResult{}, apiErr
 	}
 
 	var res SnapshotResult
 	err = json.Unmarshal(body, &res)
-	return res, err
+	return res, api.NewErrorAPI(err, nil)
 }
 
-func (h *httpAPI) Rules(ctx context.Context) (RulesResult, error) {
+func (h *httpAPI) Rules(ctx context.Context) (RulesResult, api.Error) {
 	u := h.client.URL(epRules, nil)
 
 	req, err := http.NewRequest(http.MethodGet, u.String(), nil)
 	if err != nil {
-		return RulesResult{}, err
+		return RulesResult{}, api.NewErrorAPI(err, nil)
 	}
 
-	_, body, err := h.client.Do(ctx, req)
-	if err != nil {
-		return RulesResult{}, err
+	_, body, apiErr := h.client.Do(ctx, req)
+	if apiErr != nil {
+		return RulesResult{}, apiErr
 	}
 
 	var res RulesResult
 	err = json.Unmarshal(body, &res)
-	return res, err
+	return res, api.NewErrorAPI(err, nil)
 }
 
-func (h *httpAPI) Targets(ctx context.Context) (TargetsResult, error) {
+func (h *httpAPI) Targets(ctx context.Context) (TargetsResult, api.Error) {
 	u := h.client.URL(epTargets, nil)
 
 	req, err := http.NewRequest(http.MethodGet, u.String(), nil)
 	if err != nil {
-		return TargetsResult{}, err
+		return TargetsResult{}, api.NewErrorAPI(err, nil)
 	}
 
-	_, body, err := h.client.Do(ctx, req)
-	if err != nil {
-		return TargetsResult{}, err
+	_, body, apiErr := h.client.Do(ctx, req)
+	if apiErr != nil {
+		return TargetsResult{}, apiErr
 	}
 
 	var res TargetsResult
 	err = json.Unmarshal(body, &res)
-	return res, err
+	return res, api.NewErrorAPI(err, nil)
+}
+
+func (h *httpAPI) TargetsMetadata(ctx context.Context, matchTarget string, metric string, limit string) ([]MetricMetadata, api.Error) {
+	u := h.client.URL(epTargetsMetadata, nil)
+	q := u.Query()
+
+	q.Set("match_target", matchTarget)
+	q.Set("metric", metric)
+	q.Set("limit", limit)
+
+	u.RawQuery = q.Encode()
+
+	req, err := http.NewRequest(http.MethodGet, u.String(), nil)
+	if err != nil {
+		return nil, api.NewErrorAPI(err, nil)
+	}
+
+	_, body, apiErr := h.client.Do(ctx, req)
+	if apiErr != nil {
+		return nil, apiErr
+	}
+
+	var res []MetricMetadata
+	err = json.Unmarshal(body, &res)
+	return res, api.NewErrorAPI(err, nil)
 }
 
 // apiClient wraps a regular client and processes successful API responses.
@@ -673,6 +821,7 @@ type apiResponse struct {
 	Data      json.RawMessage `json:"data"`
 	ErrorType ErrorType       `json:"errorType"`
 	Error     string          `json:"error"`
+	Warnings  []string        `json:"warnings,omitempty"`
 }
 
 func apiError(code int) bool {
@@ -690,14 +839,16 @@ func errorTypeAndMsgFor(resp *http.Response) (ErrorType, string) {
 	return ErrBadResponse, fmt.Sprintf("bad response code %d", resp.StatusCode)
 }
 
-func (c apiClient) Do(ctx context.Context, req *http.Request) (*http.Response, []byte, error) {
-	resp, body, err := c.Client.Do(ctx, req)
-	if err != nil {
-		return resp, body, err
+func (c apiClient) Do(ctx context.Context, req *http.Request) (*http.Response, []byte, api.Error) {
+	resp, body, apiErr := c.Client.Do(ctx, req)
+	if apiErr != nil {
+		return resp, body, apiErr
 	}
 
 	code := resp.StatusCode
 
+	var err api.Error
+
 	if code/100 != 2 && !apiError(code) {
 		errorType, errorMsg := errorTypeAndMsgFor(resp)
 		return resp, body, &Error{
@@ -710,27 +861,30 @@ func (c apiClient) Do(ctx context.Context, req *http.Request) (*http.Response, [
 	var result apiResponse
 
 	if http.StatusNoContent != code {
-		if err = json.Unmarshal(body, &result); err != nil {
+		if jsonErr := json.Unmarshal(body, &result); jsonErr != nil {
 			return resp, body, &Error{
 				Type: ErrBadResponse,
-				Msg:  err.Error(),
+				Msg:  jsonErr.Error(),
 			}
 		}
 	}
 
 	if apiError(code) != (result.Status == "error") {
 		err = &Error{
-			Type: ErrBadResponse,
-			Msg:  "inconsistent body for response code",
+			Type:     ErrBadResponse,
+			Msg:      "inconsistent body for response code",
+			warnings: result.Warnings,
 		}
 	}
 
 	if apiError(code) && result.Status == "error" {
 		err = &Error{
-			Type: result.ErrorType,
-			Msg:  result.Error,
+			Type:     result.ErrorType,
+			Msg:      result.Error,
+			warnings: result.Warnings,
 		}
 	}
 
 	return resp, []byte(result.Data), err
+
 }
diff --git a/vendor/github.com/prometheus/client_golang/prometheus/build_info.go b/vendor/github.com/prometheus/client_golang/prometheus/build_info.go
new file mode 100644
index 000000000..288f0e854
--- /dev/null
+++ b/vendor/github.com/prometheus/client_golang/prometheus/build_info.go
@@ -0,0 +1,29 @@
+// Copyright 2019 The Prometheus Authors
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// +build go1.12
+
+package prometheus
+
+import "runtime/debug"
+
+// readBuildInfo is a wrapper around debug.ReadBuildInfo for Go 1.12+.
+func readBuildInfo() (path, version, sum string) {
+	path, version, sum = "unknown", "unknown", "unknown"
+	if bi, ok := debug.ReadBuildInfo(); ok {
+		path = bi.Main.Path
+		version = bi.Main.Version
+		sum = bi.Main.Sum
+	}
+	return
+}
diff --git a/vendor/github.com/prometheus/client_golang/prometheus/build_info_pre_1.12.go b/vendor/github.com/prometheus/client_golang/prometheus/build_info_pre_1.12.go
new file mode 100644
index 000000000..6609e2877
--- /dev/null
+++ b/vendor/github.com/prometheus/client_golang/prometheus/build_info_pre_1.12.go
@@ -0,0 +1,22 @@
+// Copyright 2019 The Prometheus Authors
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// +build !go1.12
+
+package prometheus
+
+// readBuildInfo is a wrapper around debug.ReadBuildInfo for Go versions before
+// 1.12. Remove this whole file once the minimum supported Go version is 1.12.
+func readBuildInfo() (path, version, sum string) {
+	return "unknown", "unknown", "unknown"
+}
diff --git a/vendor/github.com/prometheus/client_golang/prometheus/go_collector.go b/vendor/github.com/prometheus/client_golang/prometheus/go_collector.go
index b108ec513..dc9247fed 100644
--- a/vendor/github.com/prometheus/client_golang/prometheus/go_collector.go
+++ b/vendor/github.com/prometheus/client_golang/prometheus/go_collector.go
@@ -36,7 +36,7 @@ type goCollector struct {
 	msMaxAge        time.Duration           // Maximum allowed age of old memstats.
 }
 
-// NewGoCollector returns a collector which exports metrics about the current Go
+// NewGoCollector returns a collector that exports metrics about the current Go
 // process. This includes memory stats. To collect those, runtime.ReadMemStats
 // is called. This requires to “stop the world”, which usually only happens for
 // garbage collection (GC). Take the following implications into account when
@@ -364,3 +364,33 @@ type memStatsMetrics []struct {
 	eval    func(*runtime.MemStats) float64
 	valType ValueType
 }
+
+// NewBuildInfoCollector returns a collector collecting a single metric
+// "go_build_info" with the constant value 1 and three labels "path", "version",
+// and "checksum". Their label values contain the main module path, version, and
+// checksum, respectively. The labels will only have meaningful values if the
+// binary is built with Go module support and from source code retrieved from
+// the source repository (rather than the local file system). This is usually
+// accomplished by building from outside of GOPATH, specifying the full address
+// of the main package, e.g. "GO111MODULE=on go run
+// github.com/prometheus/client_golang/examples/random". If built without Go
+// module support, all label values will be "unknown". If built with Go module
+// support but using the source code from the local file system, the "path" will
+// be set appropriately, but "checksum" will be empty and "version" will be
+// "(devel)".
+//
+// This collector uses only the build information for the main module. See
+// https://github.com/povilasv/prommod for an example of a collector for the
+// module dependencies.
+func NewBuildInfoCollector() Collector {
+	path, version, sum := readBuildInfo()
+	c := &selfCollector{MustNewConstMetric(
+		NewDesc(
+			"go_build_info",
+			"Build information about the main Go module.",
+			nil, Labels{"path": path, "version": version, "checksum": sum},
+		),
+		GaugeValue, 1)}
+	c.init(c.self)
+	return c
+}
diff --git a/vendor/github.com/prometheus/client_golang/prometheus/process_collector.go b/vendor/github.com/prometheus/client_golang/prometheus/process_collector.go
index 55176d58c..37d2026ac 100644
--- a/vendor/github.com/prometheus/client_golang/prometheus/process_collector.go
+++ b/vendor/github.com/prometheus/client_golang/prometheus/process_collector.go
@@ -126,7 +126,7 @@ func NewProcessCollector(opts ProcessCollectorOpts) Collector {
 	}
 
 	// Set up process metric collection if supported by the runtime.
-	if _, err := procfs.NewStat(); err == nil {
+	if _, err := procfs.NewDefaultFS(); err == nil {
 		c.collectFn = c.processCollect
 	} else {
 		c.collectFn = func(ch chan<- Metric) {
@@ -166,7 +166,7 @@ func (c *processCollector) processCollect(ch chan<- Metric) {
 		return
 	}
 
-	if stat, err := p.NewStat(); err == nil {
+	if stat, err := p.Stat(); err == nil {
 		ch <- MustNewConstMetric(c.cpuTotal, CounterValue, stat.CPUTime())
 		ch <- MustNewConstMetric(c.vsize, GaugeValue, float64(stat.VirtualMemory()))
 		ch <- MustNewConstMetric(c.rss, GaugeValue, float64(stat.ResidentMemory()))
@@ -185,7 +185,7 @@ func (c *processCollector) processCollect(ch chan<- Metric) {
 		c.reportError(ch, c.openFDs, err)
 	}
 
-	if limits, err := p.NewLimits(); err == nil {
+	if limits, err := p.Limits(); err == nil {
 		ch <- MustNewConstMetric(c.maxFDs, GaugeValue, float64(limits.OpenFiles))
 		ch <- MustNewConstMetric(c.maxVsize, GaugeValue, float64(limits.AddressSpace))
 	} else {
diff --git a/vendor/github.com/prometheus/client_golang/prometheus/promhttp/http.go b/vendor/github.com/prometheus/client_golang/prometheus/promhttp/http.go
index b137c8830..cea5a90fd 100644
--- a/vendor/github.com/prometheus/client_golang/prometheus/promhttp/http.go
+++ b/vendor/github.com/prometheus/client_golang/prometheus/promhttp/http.go
@@ -84,10 +84,32 @@ func Handler() http.Handler {
 // instrumentation. Use the InstrumentMetricHandler function to apply the same
 // kind of instrumentation as it is used by the Handler function.
 func HandlerFor(reg prometheus.Gatherer, opts HandlerOpts) http.Handler {
-	var inFlightSem chan struct{}
+	var (
+		inFlightSem chan struct{}
+		errCnt      = prometheus.NewCounterVec(
+			prometheus.CounterOpts{
+				Name: "promhttp_metric_handler_errors_total",
+				Help: "Total number of internal errors encountered by the promhttp metric handler.",
+			},
+			[]string{"cause"},
+		)
+	)
+
 	if opts.MaxRequestsInFlight > 0 {
 		inFlightSem = make(chan struct{}, opts.MaxRequestsInFlight)
 	}
+	if opts.Registry != nil {
+		// Initialize all possibilites that can occur below.
+		errCnt.WithLabelValues("gathering")
+		errCnt.WithLabelValues("encoding")
+		if err := opts.Registry.Register(errCnt); err != nil {
+			if are, ok := err.(prometheus.AlreadyRegisteredError); ok {
+				errCnt = are.ExistingCollector.(*prometheus.CounterVec)
+			} else {
+				panic(err)
+			}
+		}
+	}
 
 	h := http.HandlerFunc(func(rsp http.ResponseWriter, req *http.Request) {
 		if inFlightSem != nil {
@@ -106,6 +128,7 @@ func HandlerFor(reg prometheus.Gatherer, opts HandlerOpts) http.Handler {
 			if opts.ErrorLog != nil {
 				opts.ErrorLog.Println("error gathering metrics:", err)
 			}
+			errCnt.WithLabelValues("gathering").Inc()
 			switch opts.ErrorHandling {
 			case PanicOnError:
 				panic(err)
@@ -146,6 +169,7 @@ func HandlerFor(reg prometheus.Gatherer, opts HandlerOpts) http.Handler {
 				if opts.ErrorLog != nil {
 					opts.ErrorLog.Println("error encoding and sending metric family:", err)
 				}
+				errCnt.WithLabelValues("encoding").Inc()
 				switch opts.ErrorHandling {
 				case PanicOnError:
 					panic(err)
@@ -236,9 +260,12 @@ const (
 	// Ignore errors and try to serve as many metrics as possible.  However,
 	// if no metrics can be served, serve an HTTP status code 500 and the
 	// last error message in the body. Only use this in deliberate "best
-	// effort" metrics collection scenarios. It is recommended to at least
-	// log errors (by providing an ErrorLog in HandlerOpts) to not mask
-	// errors completely.
+	// effort" metrics collection scenarios. In this case, it is highly
+	// recommended to provide other means of detecting errors: By setting an
+	// ErrorLog in HandlerOpts, the errors are logged. By providing a
+	// Registry in HandlerOpts, the exposed metrics include an error counter
+	// "promhttp_metric_handler_errors_total", which can be used for
+	// alerts.
 	ContinueOnError
 	// Panic upon the first error encountered (useful for "crash only" apps).
 	PanicOnError
@@ -261,6 +288,18 @@ type HandlerOpts struct {
 	// logged regardless of the configured ErrorHandling provided ErrorLog
 	// is not nil.
 	ErrorHandling HandlerErrorHandling
+	// If Registry is not nil, it is used to register a metric
+	// "promhttp_metric_handler_errors_total", partitioned by "cause". A
+	// failed registration causes a panic. Note that this error counter is
+	// different from the instrumentation you get from the various
+	// InstrumentHandler... helpers. It counts errors that don't necessarily
+	// result in a non-2xx HTTP status code. There are two typical cases:
+	// (1) Encoding errors that only happen after streaming of the HTTP body
+	// has already started (and the status code 200 has been sent). This
+	// should only happen with custom collectors. (2) Collection errors with
+	// no effect on the HTTP status code because ErrorHandling is set to
+	// ContinueOnError.
+	Registry prometheus.Registerer
 	// If DisableCompression is true, the handler will never compress the
 	// response, even if requested by the client.
 	DisableCompression bool
diff --git a/vendor/github.com/prometheus/client_golang/prometheus/summary.go b/vendor/github.com/prometheus/client_golang/prometheus/summary.go
index 1574b0fe7..ec663ec3d 100644
--- a/vendor/github.com/prometheus/client_golang/prometheus/summary.go
+++ b/vendor/github.com/prometheus/client_golang/prometheus/summary.go
@@ -39,7 +39,7 @@ const quantileLabel = "quantile"
 // A typical use-case is the observation of request latencies. By default, a
 // Summary provides the median, the 90th and the 99th percentile of the latency
 // as rank estimations. However, the default behavior will change in the
-// upcoming v0.10 of the library. There will be no rank estimations at all by
+// upcoming v1.0.0 of the library. There will be no rank estimations at all by
 // default. For a sane transition, it is recommended to set the desired rank
 // estimations explicitly.
 //
@@ -61,7 +61,7 @@ type Summary interface {
 // DefObjectives are the default Summary quantile values.
 //
 // Deprecated: DefObjectives will not be used as the default objectives in
-// v0.10 of the library. The default Summary will have no quantiles then.
+// v1.0.0 of the library. The default Summary will have no quantiles then.
 var (
 	DefObjectives = map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001}
 
@@ -86,7 +86,7 @@ const (
 // mandatory to set Name to a non-empty string. While all other fields are
 // optional and can safely be left at their zero value, it is recommended to set
 // a help string and to explicitly set the Objectives field to the desired value
-// as the default value will change in the upcoming v0.10 of the library.
+// as the default value will change in the upcoming v1.0.0 of the library.
 type SummaryOpts struct {
 	// Namespace, Subsystem, and Name are components of the fully-qualified
 	// name of the Summary (created by joining these components with
@@ -128,7 +128,7 @@ type SummaryOpts struct {
 	// set it to an empty map (i.e. map[float64]float64{}).
 	//
 	// Note that the current value of DefObjectives is deprecated. It will
-	// be replaced by an empty map in v0.10 of the library. Please
+	// be replaced by an empty map in v1.0.0 of the library. Please
 	// explicitly set Objectives to the desired value to avoid problems
 	// during the transition.
 	Objectives map[float64]float64
diff --git a/vendor/github.com/prometheus/procfs/Makefile b/vendor/github.com/prometheus/procfs/Makefile
index 314d1ba56..616a0d25e 100644
--- a/vendor/github.com/prometheus/procfs/Makefile
+++ b/vendor/github.com/prometheus/procfs/Makefile
@@ -14,6 +14,7 @@
 include Makefile.common
 
 %/.unpacked: %.ttar
+	@echo ">> extracting fixtures"
 	./ttar -C $(dir $*) -x -f $*.ttar
 	touch $@
 
diff --git a/vendor/github.com/prometheus/procfs/Makefile.common b/vendor/github.com/prometheus/procfs/Makefile.common
index 4f18ea587..c7f9ea64f 100644
--- a/vendor/github.com/prometheus/procfs/Makefile.common
+++ b/vendor/github.com/prometheus/procfs/Makefile.common
@@ -69,7 +69,7 @@ else
 	GO_BUILD_PLATFORM ?= $(GOHOSTOS)-$(GOHOSTARCH)
 endif
 
-PROMU_VERSION ?= 0.3.0
+PROMU_VERSION ?= 0.4.0
 PROMU_URL     := https://github.com/prometheus/promu/releases/download/v$(PROMU_VERSION)/promu-$(PROMU_VERSION).$(GO_BUILD_PLATFORM).tar.gz
 
 GOLANGCI_LINT :=
diff --git a/vendor/github.com/prometheus/procfs/README.md b/vendor/github.com/prometheus/procfs/README.md
index 209549471..6f8850feb 100644
--- a/vendor/github.com/prometheus/procfs/README.md
+++ b/vendor/github.com/prometheus/procfs/README.md
@@ -1,7 +1,7 @@
 # procfs
 
 This procfs package provides functions to retrieve system, kernel and process
-metrics from the pseudo-filesystem proc.
+metrics from the pseudo-filesystems /proc and /sys.
 
 *WARNING*: This package is a work in progress. Its API may still break in
 backwards-incompatible ways without warnings. Use it at your own risk.
@@ -9,3 +9,45 @@ backwards-incompatible ways without warnings. Use it at your own risk.
 [![GoDoc](https://godoc.org/github.com/prometheus/procfs?status.png)](https://godoc.org/github.com/prometheus/procfs)
 [![Build Status](https://travis-ci.org/prometheus/procfs.svg?branch=master)](https://travis-ci.org/prometheus/procfs)
 [![Go Report Card](https://goreportcard.com/badge/github.com/prometheus/procfs)](https://goreportcard.com/report/github.com/prometheus/procfs)
+
+## Usage
+
+The procfs library is organized by packages based on whether the gathered data is coming from
+/proc, /sys, or both.  Each package contains an `FS` type which represents the path to either /proc, /sys, or both.  For example, current cpu statistics are gathered from
+`/proc/stat` and are available via the root procfs package.  First, the proc filesystem mount
+point is initialized, and then the stat information is read.
+
+```go
+fs, err := procfs.NewFS("/proc")
+stats, err := fs.Stat()
+```
+
+Some sub-packages such as `blockdevice`, require access to both the proc and sys filesystems.
+
+```go
+    fs, err := blockdevice.NewFS("/proc", "/sys")
+    stats, err := fs.ProcDiskstats()
+```
+
+## Building and Testing
+
+The procfs library is normally built as part of another application.  However, when making
+changes to the library, the `make test` command can be used to run the API test suite.
+
+### Updating Test Fixtures
+
+The procfs library includes a set of test fixtures which include many example files from
+the `/proc` and `/sys` filesystems.  These fixtures are included as a [ttar](https://github.com/ideaship/ttar) file
+which is extracted automatically during testing.  To add/update the test fixtures, first
+ensure the `fixtures` directory is up to date by removing the existing directory and then
+extracting the ttar file using `make fixtures/.unpacked` or just `make test`.
+
+```bash
+rm -rf fixtures
+make test
+```
+
+Next, make the required changes to the extracted files in the `fixtures` directory.  When
+the changes are complete, run `make update_fixtures` to create a new `fixtures.ttar` file
+based on the updated `fixtures` directory.  And finally, verify the changes using
+`git diff fixtures.ttar`.
diff --git a/vendor/github.com/prometheus/procfs/buddyinfo.go b/vendor/github.com/prometheus/procfs/buddyinfo.go
index 5cd22a837..63d4229a4 100644
--- a/vendor/github.com/prometheus/procfs/buddyinfo.go
+++ b/vendor/github.com/prometheus/procfs/buddyinfo.go
@@ -31,18 +31,8 @@ type BuddyInfo struct {
 	Sizes []float64
 }
 
-// NewBuddyInfo reads the buddyinfo statistics.
-func NewBuddyInfo() ([]BuddyInfo, error) {
-	fs, err := NewFS(DefaultMountPoint)
-	if err != nil {
-		return nil, err
-	}
-
-	return fs.NewBuddyInfo()
-}
-
 // NewBuddyInfo reads the buddyinfo statistics from the specified `proc` filesystem.
-func (fs FS) NewBuddyInfo() ([]BuddyInfo, error) {
+func (fs FS) BuddyInfo() ([]BuddyInfo, error) {
 	file, err := os.Open(fs.proc.Path("buddyinfo"))
 	if err != nil {
 		return nil, err
diff --git a/vendor/github.com/prometheus/procfs/fixtures.ttar b/vendor/github.com/prometheus/procfs/fixtures.ttar
index f7f84ef36..951d909af 100644
--- a/vendor/github.com/prometheus/procfs/fixtures.ttar
+++ b/vendor/github.com/prometheus/procfs/fixtures.ttar
@@ -75,13 +75,13 @@ Max realtime timeout      unlimited            unlimited            us
 Mode: 644
 # ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 Path: fixtures/proc/26231/mountstats
-Lines: 19
+Lines: 20
 device rootfs mounted on / with fstype rootfs
 device sysfs mounted on /sys with fstype sysfs
 device proc mounted on /proc with fstype proc
 device /dev/sda1 mounted on / with fstype ext4
 device 192.168.1.1:/srv/test mounted on /mnt/nfs/test with fstype nfs4 statvers=1.1
-	opts:	rw,vers=4.0,rsize=1048576,wsize=1048576,namlen=255,acregmin=3,acregmax=60,acdirmin=30,acdirmax=60,hard,proto=tcp,port=0,timeo=600,retrans=2,sec=sys,clientaddr=192.168.1.5,local_lock=none
+	opts:	rw,vers=4.0,rsize=1048576,wsize=1048576,namlen=255,acregmin=3,acregmax=60,acdirmin=30,acdirmax=60,hard,proto=tcp,port=0,timeo=600,retrans=2,sec=sys,mountaddr=192.168.1.1,clientaddr=192.168.1.5,local_lock=none
 	age:	13968
 	caps:	caps=0xfff7,wtmult=512,dtsize=32768,bsize=0,namlen=255
 	nfsv4:	bm0=0xfdffafff,bm1=0xf9be3e,bm2=0x0,acl=0x0,pnfs=not configured
@@ -94,6 +94,7 @@ device 192.168.1.1:/srv/test mounted on /mnt/nfs/test with fstype nfs4 statvers=
 	        NULL: 0 0 0 0 0 0 0 0
 	        READ: 1298 1298 0 207680 1210292152 6 79386 79407
 	       WRITE: 0 0 0 0 0 0 0 0
+	      ACCESS: 2927395007 2927394995 0 526931094212 362996810236 18446743919241604546 1667369447 1953587717
 
 Mode: 644
 # ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
@@ -125,6 +126,63 @@ Lines: 1
 26231 (vim) R 5392 7446 5392 34835 7446 4218880 32533 309516 26 82 1677 44 158 99 20 0 1 0 82375 56274944 1981 18446744073709551615 4194304 6294284 140736914091744 140736914087944 139965136429984 0 0 12288 1870679807 0 0 0 17 0 0 0 31 0 0 8391624 8481048 16420864 140736914093252 140736914093279 140736914093279 140736914096107 0
 Mode: 644
 # ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+Path: fixtures/proc/26231/status
+Lines: 53
+
+Name:	prometheus
+Umask:	0022
+State:	S (sleeping)
+Tgid:	1
+Ngid:	0
+Pid:	1
+PPid:	0
+TracerPid:	0
+Uid:	0	0	0	0
+Gid:	0	0	0	0
+FDSize:	128
+Groups:
+NStgid:	1
+NSpid:	1
+NSpgid:	1
+NSsid:	1
+VmPeak:	   58472 kB
+VmSize:	   58440 kB
+VmLck:	       0 kB
+VmPin:	       0 kB
+VmHWM:	    8028 kB
+VmRSS:	    6716 kB
+RssAnon:	    2092 kB
+RssFile:	    4624 kB
+RssShmem:	       0 kB
+VmData:	    2580 kB
+VmStk:	     136 kB
+VmExe:	     948 kB
+VmLib:	    6816 kB
+VmPTE:	     128 kB
+VmPMD:	      12 kB
+VmSwap:	     660 kB
+HugetlbPages:	       0 kB
+Threads:	1
+SigQ:	8/63965
+SigPnd:	0000000000000000
+ShdPnd:	0000000000000000
+SigBlk:	7be3c0fe28014a03
+SigIgn:	0000000000001000
+SigCgt:	00000001800004ec
+CapInh:	0000000000000000
+CapPrm:	0000003fffffffff
+CapEff:	0000003fffffffff
+CapBnd:	0000003fffffffff
+CapAmb:	0000000000000000
+Seccomp:	0
+Cpus_allowed:	ff
+Cpus_allowed_list:	0-7
+Mems_allowed:	00000000,00000001
+Mems_allowed_list:	0
+voluntary_ctxt_switches:	4742839
+nonvoluntary_ctxt_switches:	1727500
+Mode: 644
+# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 Directory: fixtures/proc/26232
 Mode: 755
 # ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
@@ -160,23 +218,23 @@ SymlinkTo: ../../symlinktargets/xyz
 # ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 Path: fixtures/proc/26232/limits
 Lines: 17
-Limit                     Soft Limit           Hard Limit           Units     
-Max cpu time              unlimited            unlimited            seconds   
-Max file size             unlimited            unlimited            bytes     
-Max data size             unlimited            unlimited            bytes     
-Max stack size            8388608              unlimited            bytes     
-Max core file size        0                    unlimited            bytes     
-Max resident set          unlimited            unlimited            bytes     
-Max processes             29436                29436                processes 
-Max open files            1024                 4096                 files     
-Max locked memory         65536                65536                bytes     
-Max address space         unlimited            unlimited            bytes     
-Max file locks            unlimited            unlimited            locks     
-Max pending signals       29436                29436                signals   
-Max msgqueue size         819200               819200               bytes     
-Max nice priority         0                    0                    
-Max realtime priority     0                    0                    
-Max realtime timeout      unlimited            unlimited            us        
+Limit                     Soft Limit           Hard Limit           Units
+Max cpu time              unlimited            unlimited            seconds
+Max file size             unlimited            unlimited            bytes
+Max data size             unlimited            unlimited            bytes
+Max stack size            8388608              unlimited            bytes
+Max core file size        0                    unlimited            bytes
+Max resident set          unlimited            unlimited            bytes
+Max processes             29436                29436                processes
+Max open files            1024                 4096                 files
+Max locked memory         65536                65536                bytes
+Max address space         unlimited            unlimited            bytes
+Max file locks            unlimited            unlimited            locks
+Max pending signals       29436                29436                signals
+Max msgqueue size         819200               819200               bytes
+Max nice priority         0                    0
+Max realtime priority     0                    0
+Max realtime timeout      unlimited            unlimited            us
 Mode: 644
 # ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 Path: fixtures/proc/26232/root
@@ -206,9 +264,9 @@ Mode: 644
 # ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 Path: fixtures/proc/buddyinfo
 Lines: 3
-Node 0, zone      DMA      1      0      1      0      2      1      1      0      1      1      3 
-Node 0, zone    DMA32    759    572    791    475    194     45     12      0      0      0      0 
-Node 0, zone   Normal   4381   1093    185   1530    567    102      4      0      0      0      0 
+Node 0, zone      DMA      1      0      1      0      2      1      1      0      1      1      3
+Node 0, zone    DMA32    759    572    791    475    194     45     12      0      0      0      0
+Node 0, zone   Normal   4381   1093    185   1530    567    102      4      0      0      0      0
 Mode: 644
 # ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 Path: fixtures/proc/diskstats
@@ -302,13 +360,13 @@ Lines: 26
 Personalities : [linear] [multipath] [raid0] [raid1] [raid6] [raid5] [raid4] [raid10]
 md3 : active raid6 sda1[8] sdh1[7] sdg1[6] sdf1[5] sde1[11] sdd1[3] sdc1[10] sdb1[9]
       5853468288 blocks super 1.2 level 6, 64k chunk, algorithm 2 [8/8] [UUUUUUUU]
-      
+
 md127 : active raid1 sdi2[0] sdj2[1]
       312319552 blocks [2/2] [UU]
-      
+
 md0 : active raid1 sdk[2](S) sdi1[0] sdj1[1]
       248896 blocks [2/2] [UU]
-      
+
 md4 : inactive raid1 sda3[0] sdb3[1]
       4883648 blocks [2/2] [UU]
 
@@ -402,6 +460,26 @@ proc4 2 2 10853
 proc4ops 72 0 0 0 1098 2 0 0 0 0 8179 5896 0 0 0 0 5900 0 0 2 0 2 0 9609 0 2 150 1272 0 0 0 1236 0 0 0 0 3 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 Mode: 644
 # ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+Path: fixtures/proc/net/unix
+Lines: 6
+Num       RefCount Protocol Flags    Type St Inode Path
+0000000000000000: 00000002 00000000 00010000 0001 01 3442596 /var/run/postgresql/.s.PGSQL.5432
+0000000000000000: 0000000a 00000000 00010000 0005 01 10061 /run/udev/control
+0000000000000000: 00000007 00000000 00000000 0002 01 12392 /dev/log
+0000000000000000: 00000003 00000000 00000000 0001 03 4787297 /var/run/postgresql/.s.PGSQL.5432
+0000000000000000: 00000003 00000000 00000000 0001 03 5091797
+Mode: 644
+# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+Path: fixtures/proc/net/unix_without_inode
+Lines: 6
+Num       RefCount Protocol Flags    Type St Path
+0000000000000000: 00000002 00000000 00010000 0001 01 /var/run/postgresql/.s.PGSQL.5432
+0000000000000000: 0000000a 00000000 00010000 0005 01 /run/udev/control
+0000000000000000: 00000007 00000000 00000000 0002 01 /dev/log
+0000000000000000: 00000003 00000000 00000000 0001 03 /var/run/postgresql/.s.PGSQL.5432
+0000000000000000: 00000003 00000000 00000000 0001 03
+Mode: 644
+# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 Path: fixtures/proc/net/xfrm_stat
 Lines: 28
 XfrmInError                     1
@@ -1107,6 +1185,22 @@ Mode: 644
 Directory: fixtures/sys/devices/system
 Mode: 775
 # ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+Directory: fixtures/sys/devices/system/clocksource
+Mode: 775
+# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+Directory: fixtures/sys/devices/system/clocksource/clocksource0
+Mode: 775
+# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+Path: fixtures/sys/devices/system/clocksource/clocksource0/available_clocksource
+Lines: 1
+tsc hpet acpi_pm 
+Mode: 444
+# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+Path: fixtures/sys/devices/system/clocksource/clocksource0/current_clocksource
+Lines: 1
+tsc
+Mode: 644
+# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 Directory: fixtures/sys/devices/system/cpu
 Mode: 775
 # ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
diff --git a/vendor/github.com/prometheus/procfs/fs.go b/vendor/github.com/prometheus/procfs/fs.go
index 9c56c8395..0102ab0fd 100644
--- a/vendor/github.com/prometheus/procfs/fs.go
+++ b/vendor/github.com/prometheus/procfs/fs.go
@@ -26,8 +26,14 @@ type FS struct {
 // DefaultMountPoint is the common mount point of the proc filesystem.
 const DefaultMountPoint = fs.DefaultProcMountPoint
 
+// NewDefaultFS returns a new proc FS mounted under the default proc mountPoint.
+// It will error if the mount point directory can't be read or is a file.
+func NewDefaultFS() (FS, error) {
+	return NewFS(DefaultMountPoint)
+}
+
 // NewFS returns a new proc FS mounted under the given proc mountPoint. It will error
-// if the mount point dirctory can't be read or is a file.
+// if the mount point directory can't be read or is a file.
 func NewFS(mountPoint string) (FS, error) {
 	fs, err := fs.NewFS(mountPoint)
 	if err != nil {
diff --git a/vendor/github.com/prometheus/procfs/ipvs.go b/vendor/github.com/prometheus/procfs/ipvs.go
index 41e645d23..2d6cb8d1c 100644
--- a/vendor/github.com/prometheus/procfs/ipvs.go
+++ b/vendor/github.com/prometheus/procfs/ipvs.go
@@ -62,18 +62,8 @@ type IPVSBackendStatus struct {
 	Weight uint64
 }
 
-// NewIPVSStats reads the IPVS statistics.
-func NewIPVSStats() (IPVSStats, error) {
-	fs, err := NewFS(DefaultMountPoint)
-	if err != nil {
-		return IPVSStats{}, err
-	}
-
-	return fs.NewIPVSStats()
-}
-
-// NewIPVSStats reads the IPVS statistics from the specified `proc` filesystem.
-func (fs FS) NewIPVSStats() (IPVSStats, error) {
+// IPVSStats reads the IPVS statistics from the specified `proc` filesystem.
+func (fs FS) IPVSStats() (IPVSStats, error) {
 	file, err := os.Open(fs.proc.Path("net/ip_vs_stats"))
 	if err != nil {
 		return IPVSStats{}, err
@@ -131,18 +121,8 @@ func parseIPVSStats(file io.Reader) (IPVSStats, error) {
 	return stats, nil
 }
 
-// NewIPVSBackendStatus reads and returns the status of all (virtual,real) server pairs.
-func NewIPVSBackendStatus() ([]IPVSBackendStatus, error) {
-	fs, err := NewFS(DefaultMountPoint)
-	if err != nil {
-		return []IPVSBackendStatus{}, err
-	}
-
-	return fs.NewIPVSBackendStatus()
-}
-
-// NewIPVSBackendStatus reads and returns the status of all (virtual,real) server pairs from the specified `proc` filesystem.
-func (fs FS) NewIPVSBackendStatus() ([]IPVSBackendStatus, error) {
+// IPVSBackendStatus reads and returns the status of all (virtual,real) server pairs from the specified `proc` filesystem.
+func (fs FS) IPVSBackendStatus() ([]IPVSBackendStatus, error) {
 	file, err := os.Open(fs.proc.Path("net/ip_vs"))
 	if err != nil {
 		return nil, err
diff --git a/vendor/github.com/prometheus/procfs/mdstat.go b/vendor/github.com/prometheus/procfs/mdstat.go
index 6ac7a12f9..71c106782 100644
--- a/vendor/github.com/prometheus/procfs/mdstat.go
+++ b/vendor/github.com/prometheus/procfs/mdstat.go
@@ -42,64 +42,64 @@ type MDStat struct {
 	BlocksSynced int64
 }
 
-// ParseMDStat parses an mdstat-file and returns a struct with the relevant infos.
-func (fs FS) ParseMDStat() (mdstates []MDStat, err error) {
-	mdStatusFilePath := fs.proc.Path("mdstat")
-	content, err := ioutil.ReadFile(mdStatusFilePath)
+// MDStat parses an mdstat-file (/proc/mdstat) and returns a slice of
+// structs containing the relevant info.  More information available here:
+// https://raid.wiki.kernel.org/index.php/Mdstat
+func (fs FS) MDStat() ([]MDStat, error) {
+	data, err := ioutil.ReadFile(fs.proc.Path("mdstat"))
 	if err != nil {
-		return []MDStat{}, fmt.Errorf("error parsing %s: %s", mdStatusFilePath, err)
+		return nil, fmt.Errorf("error parsing mdstat %s: %s", fs.proc.Path("mdstat"), err)
 	}
+	mdstat, err := parseMDStat(data)
+	if err != nil {
+		return nil, fmt.Errorf("error parsing mdstat %s: %s", fs.proc.Path("mdstat"), err)
+	}
+	return mdstat, nil
+}
 
-	mdStates := []MDStat{}
-	lines := strings.Split(string(content), "\n")
+// parseMDStat parses data from mdstat file (/proc/mdstat) and returns a slice of
+// structs containing the relevant info.
+func parseMDStat(mdstatData []byte) ([]MDStat, error) {
+	mdStats := []MDStat{}
+	lines := strings.Split(string(mdstatData), "\n")
 	for i, l := range lines {
-		if l == "" {
-			continue
-		}
-		if l[0] == ' ' {
-			continue
-		}
-		if strings.HasPrefix(l, "Personalities") || strings.HasPrefix(l, "unused") {
+		if strings.TrimSpace(l) == "" || l[0] == ' ' ||
+			strings.HasPrefix(l, "Personalities") || strings.HasPrefix(l, "unused") {
 			continue
 		}
 
-		mainLine := strings.Split(l, " ")
-		if len(mainLine) < 3 {
-			return mdStates, fmt.Errorf("error parsing mdline: %s", l)
+		deviceFields := strings.Fields(l)
+		if len(deviceFields) < 3 {
+			return nil, fmt.Errorf("not enough fields in mdline (expected at least 3): %s", l)
 		}
-		mdName := mainLine[0]
-		activityState := mainLine[2]
+		mdName := deviceFields[0]
+		activityState := deviceFields[2]
 
 		if len(lines) <= i+3 {
-			return mdStates, fmt.Errorf(
-				"error parsing %s: too few lines for md device %s",
-				mdStatusFilePath,
-				mdName,
-			)
+			return mdStats, fmt.Errorf("missing lines for md device %s", mdName)
 		}
 
-		active, total, size, err := evalStatusline(lines[i+1])
+		active, total, size, err := evalStatusLine(lines[i+1])
 		if err != nil {
-			return mdStates, fmt.Errorf("error parsing %s: %s", mdStatusFilePath, err)
+			return nil, err
 		}
 
-		// j is the line number of the syncing-line.
-		j := i + 2
+		syncLineIdx := i + 2
 		if strings.Contains(lines[i+2], "bitmap") { // skip bitmap line
-			j = i + 3
+			syncLineIdx++
 		}
 
-		// If device is syncing at the moment, get the number of currently
+		// If device is recovering/syncing at the moment, get the number of currently
 		// synced bytes, otherwise that number equals the size of the device.
 		syncedBlocks := size
-		if strings.Contains(lines[j], "recovery") || strings.Contains(lines[j], "resync") {
-			syncedBlocks, err = evalBuildline(lines[j])
+		if strings.Contains(lines[syncLineIdx], "recovery") || strings.Contains(lines[syncLineIdx], "resync") {
+			syncedBlocks, err = evalRecoveryLine(lines[syncLineIdx])
 			if err != nil {
-				return mdStates, fmt.Errorf("error parsing %s: %s", mdStatusFilePath, err)
+				return nil, err
 			}
 		}
 
-		mdStates = append(mdStates, MDStat{
+		mdStats = append(mdStats, MDStat{
 			Name:          mdName,
 			ActivityState: activityState,
 			DisksActive:   active,
@@ -109,10 +109,10 @@ func (fs FS) ParseMDStat() (mdstates []MDStat, err error) {
 		})
 	}
 
-	return mdStates, nil
+	return mdStats, nil
 }
 
-func evalStatusline(statusline string) (active, total, size int64, err error) {
+func evalStatusLine(statusline string) (active, total, size int64, err error) {
 	matches := statuslineRE.FindStringSubmatch(statusline)
 	if len(matches) != 4 {
 		return 0, 0, 0, fmt.Errorf("unexpected statusline: %s", statusline)
@@ -136,7 +136,7 @@ func evalStatusline(statusline string) (active, total, size int64, err error) {
 	return active, total, size, nil
 }
 
-func evalBuildline(buildline string) (syncedBlocks int64, err error) {
+func evalRecoveryLine(buildline string) (syncedBlocks int64, err error) {
 	matches := buildlineRE.FindStringSubmatch(buildline)
 	if len(matches) != 2 {
 		return 0, fmt.Errorf("unexpected buildline: %s", buildline)
diff --git a/vendor/github.com/prometheus/procfs/mountstats.go b/vendor/github.com/prometheus/procfs/mountstats.go
index fc385afcf..35b2ef351 100644
--- a/vendor/github.com/prometheus/procfs/mountstats.go
+++ b/vendor/github.com/prometheus/procfs/mountstats.go
@@ -69,8 +69,8 @@ type MountStats interface {
 type MountStatsNFS struct {
 	// The version of statistics provided.
 	StatVersion string
-	// The optional mountaddr of the NFS mount.
-	MountAddress string
+	// The mount options of the NFS mount.
+	Opts map[string]string
 	// The age of the NFS mount.
 	Age time.Duration
 	// Statistics related to byte counters for various operations.
@@ -181,11 +181,11 @@ type NFSOperationStats struct {
 	// Number of bytes received for this operation, including RPC headers and payload.
 	BytesReceived uint64
 	// Duration all requests spent queued for transmission before they were sent.
-	CumulativeQueueTime time.Duration
+	CumulativeQueueMilliseconds uint64
 	// Duration it took to get a reply back after the request was transmitted.
-	CumulativeTotalResponseTime time.Duration
+	CumulativeTotalResponseMilliseconds uint64
 	// Duration from when a request was enqueued to when it was completely handled.
-	CumulativeTotalRequestTime time.Duration
+	CumulativeTotalRequestMilliseconds uint64
 }
 
 // A NFSTransportStats contains statistics for the NFS mount RPC requests and
@@ -204,7 +204,7 @@ type NFSTransportStats struct {
 	// spent waiting for connections to the server to be established.
 	ConnectIdleTime uint64
 	// Duration since the NFS mount last saw any RPC traffic.
-	IdleTime time.Duration
+	IdleTimeSeconds uint64
 	// Number of RPC requests for this mount sent to the NFS server.
 	Sends uint64
 	// Number of RPC responses for this mount received from the NFS server.
@@ -342,10 +342,15 @@ func parseMountStatsNFS(s *bufio.Scanner, statVersion string) (*MountStatsNFS, e
 
 		switch ss[0] {
 		case fieldOpts:
+			if stats.Opts == nil {
+				stats.Opts = map[string]string{}
+			}
 			for _, opt := range strings.Split(ss[1], ",") {
 				split := strings.Split(opt, "=")
-				if len(split) == 2 && split[0] == "mountaddr" {
-					stats.MountAddress = split[1]
+				if len(split) == 2 {
+					stats.Opts[split[0]] = split[1]
+				} else {
+					stats.Opts[opt] = ""
 				}
 			}
 		case fieldAge:
@@ -519,15 +524,15 @@ func parseNFSOperationStats(s *bufio.Scanner) ([]NFSOperationStats, error) {
 		}
 
 		ops = append(ops, NFSOperationStats{
-			Operation:                   strings.TrimSuffix(ss[0], ":"),
-			Requests:                    ns[0],
-			Transmissions:               ns[1],
-			MajorTimeouts:               ns[2],
-			BytesSent:                   ns[3],
-			BytesReceived:               ns[4],
-			CumulativeQueueTime:         time.Duration(ns[5]) * time.Millisecond,
-			CumulativeTotalResponseTime: time.Duration(ns[6]) * time.Millisecond,
-			CumulativeTotalRequestTime:  time.Duration(ns[7]) * time.Millisecond,
+			Operation:                           strings.TrimSuffix(ss[0], ":"),
+			Requests:                            ns[0],
+			Transmissions:                       ns[1],
+			MajorTimeouts:                       ns[2],
+			BytesSent:                           ns[3],
+			BytesReceived:                       ns[4],
+			CumulativeQueueMilliseconds:         ns[5],
+			CumulativeTotalResponseMilliseconds: ns[6],
+			CumulativeTotalRequestMilliseconds:  ns[7],
 		})
 	}
 
@@ -603,7 +608,7 @@ func parseNFSTransportStats(ss []string, statVersion string) (*NFSTransportStats
 		Bind:                     ns[1],
 		Connect:                  ns[2],
 		ConnectIdleTime:          ns[3],
-		IdleTime:                 time.Duration(ns[4]) * time.Second,
+		IdleTimeSeconds:          ns[4],
 		Sends:                    ns[5],
 		Receives:                 ns[6],
 		BadTransactionIDs:        ns[7],
diff --git a/vendor/github.com/prometheus/procfs/net_dev.go b/vendor/github.com/prometheus/procfs/net_dev.go
index 0063594e6..a0b7a0119 100644
--- a/vendor/github.com/prometheus/procfs/net_dev.go
+++ b/vendor/github.com/prometheus/procfs/net_dev.go
@@ -47,23 +47,13 @@ type NetDevLine struct {
 // are interface names.
 type NetDev map[string]NetDevLine
 
-// NewNetDev returns kernel/system statistics read from /proc/net/dev.
-func NewNetDev() (NetDev, error) {
-	fs, err := NewFS(DefaultMountPoint)
-	if err != nil {
-		return nil, err
-	}
-
-	return fs.NewNetDev()
-}
-
-// NewNetDev returns kernel/system statistics read from /proc/net/dev.
-func (fs FS) NewNetDev() (NetDev, error) {
+// NetDev returns kernel/system statistics read from /proc/net/dev.
+func (fs FS) NetDev() (NetDev, error) {
 	return newNetDev(fs.proc.Path("net/dev"))
 }
 
-// NewNetDev returns kernel/system statistics read from /proc/[pid]/net/dev.
-func (p Proc) NewNetDev() (NetDev, error) {
+// NetDev returns kernel/system statistics read from /proc/[pid]/net/dev.
+func (p Proc) NetDev() (NetDev, error) {
 	return newNetDev(p.path("net/dev"))
 }
 
@@ -75,7 +65,7 @@ func newNetDev(file string) (NetDev, error) {
 	}
 	defer f.Close()
 
-	nd := NetDev{}
+	netDev := NetDev{}
 	s := bufio.NewScanner(f)
 	for n := 0; s.Scan(); n++ {
 		// Skip the 2 header lines.
@@ -83,20 +73,20 @@ func newNetDev(file string) (NetDev, error) {
 			continue
 		}
 
-		line, err := nd.parseLine(s.Text())
+		line, err := netDev.parseLine(s.Text())
 		if err != nil {
-			return nd, err
+			return netDev, err
 		}
 
-		nd[line.Name] = *line
+		netDev[line.Name] = *line
 	}
 
-	return nd, s.Err()
+	return netDev, s.Err()
 }
 
 // parseLine parses a single line from the /proc/net/dev file. Header lines
 // must be filtered prior to calling this method.
-func (nd NetDev) parseLine(rawLine string) (*NetDevLine, error) {
+func (netDev NetDev) parseLine(rawLine string) (*NetDevLine, error) {
 	parts := strings.SplitN(rawLine, ":", 2)
 	if len(parts) != 2 {
 		return nil, errors.New("invalid net/dev line, missing colon")
@@ -185,11 +175,11 @@ func (nd NetDev) parseLine(rawLine string) (*NetDevLine, error) {
 
 // Total aggregates the values across interfaces and returns a new NetDevLine.
 // The Name field will be a sorted comma separated list of interface names.
-func (nd NetDev) Total() NetDevLine {
+func (netDev NetDev) Total() NetDevLine {
 	total := NetDevLine{}
 
-	names := make([]string, 0, len(nd))
-	for _, ifc := range nd {
+	names := make([]string, 0, len(netDev))
+	for _, ifc := range netDev {
 		names = append(names, ifc.Name)
 		total.RxBytes += ifc.RxBytes
 		total.RxPackets += ifc.RxPackets
diff --git a/vendor/github.com/prometheus/procfs/net_unix.go b/vendor/github.com/prometheus/procfs/net_unix.go
new file mode 100644
index 000000000..240340a83
--- /dev/null
+++ b/vendor/github.com/prometheus/procfs/net_unix.go
@@ -0,0 +1,275 @@
+// Copyright 2018 The Prometheus Authors
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package procfs
+
+import (
+	"bufio"
+	"errors"
+	"fmt"
+	"io"
+	"os"
+	"strconv"
+	"strings"
+)
+
+// For the proc file format details,
+// see https://elixir.bootlin.com/linux/v4.17/source/net/unix/af_unix.c#L2815
+// and https://elixir.bootlin.com/linux/latest/source/include/uapi/linux/net.h#L48.
+
+const (
+	netUnixKernelPtrIdx = iota
+	netUnixRefCountIdx
+	_
+	netUnixFlagsIdx
+	netUnixTypeIdx
+	netUnixStateIdx
+	netUnixInodeIdx
+
+	// Inode and Path are optional.
+	netUnixStaticFieldsCnt = 6
+)
+
+const (
+	netUnixTypeStream    = 1
+	netUnixTypeDgram     = 2
+	netUnixTypeSeqpacket = 5
+
+	netUnixFlagListen = 1 << 16
+
+	netUnixStateUnconnected  = 1
+	netUnixStateConnecting   = 2
+	netUnixStateConnected    = 3
+	netUnixStateDisconnected = 4
+)
+
+var errInvalidKernelPtrFmt = errors.New("Invalid Num(the kernel table slot number) format")
+
+// NetUnixType is the type of the type field.
+type NetUnixType uint64
+
+// NetUnixFlags is the type of the flags field.
+type NetUnixFlags uint64
+
+// NetUnixState is the type of the state field.
+type NetUnixState uint64
+
+// NetUnixLine represents a line of /proc/net/unix.
+type NetUnixLine struct {
+	KernelPtr string
+	RefCount  uint64
+	Protocol  uint64
+	Flags     NetUnixFlags
+	Type      NetUnixType
+	State     NetUnixState
+	Inode     uint64
+	Path      string
+}
+
+// NetUnix holds the data read from /proc/net/unix.
+type NetUnix struct {
+	Rows []*NetUnixLine
+}
+
+// NewNetUnix returns data read from /proc/net/unix.
+func NewNetUnix() (*NetUnix, error) {
+	fs, err := NewFS(DefaultMountPoint)
+	if err != nil {
+		return nil, err
+	}
+
+	return fs.NewNetUnix()
+}
+
+// NewNetUnix returns data read from /proc/net/unix.
+func (fs FS) NewNetUnix() (*NetUnix, error) {
+	return NewNetUnixByPath(fs.proc.Path("net/unix"))
+}
+
+// NewNetUnixByPath returns data read from /proc/net/unix by file path.
+// It might returns an error with partial parsed data, if an error occur after some data parsed.
+func NewNetUnixByPath(path string) (*NetUnix, error) {
+	f, err := os.Open(path)
+	if err != nil {
+		return nil, err
+	}
+	defer f.Close()
+	return NewNetUnixByReader(f)
+}
+
+// NewNetUnixByReader returns data read from /proc/net/unix by a reader.
+// It might returns an error with partial parsed data, if an error occur after some data parsed.
+func NewNetUnixByReader(reader io.Reader) (*NetUnix, error) {
+	nu := &NetUnix{
+		Rows: make([]*NetUnixLine, 0, 32),
+	}
+	scanner := bufio.NewScanner(reader)
+	// Omit the header line.
+	scanner.Scan()
+	header := scanner.Text()
+	// From the man page of proc(5), it does not contain an Inode field,
+	// but in actually it exists.
+	// This code works for both cases.
+	hasInode := strings.Contains(header, "Inode")
+
+	minFieldsCnt := netUnixStaticFieldsCnt
+	if hasInode {
+		minFieldsCnt++
+	}
+	for scanner.Scan() {
+		line := scanner.Text()
+		item, err := nu.parseLine(line, hasInode, minFieldsCnt)
+		if err != nil {
+			return nu, err
+		}
+		nu.Rows = append(nu.Rows, item)
+	}
+
+	return nu, scanner.Err()
+}
+
+func (u *NetUnix) parseLine(line string, hasInode bool, minFieldsCnt int) (*NetUnixLine, error) {
+	fields := strings.Fields(line)
+	fieldsLen := len(fields)
+	if fieldsLen < minFieldsCnt {
+		return nil, fmt.Errorf(
+			"Parse Unix domain failed: expect at least %d fields but got %d",
+			minFieldsCnt, fieldsLen)
+	}
+	kernelPtr, err := u.parseKernelPtr(fields[netUnixKernelPtrIdx])
+	if err != nil {
+		return nil, fmt.Errorf("Parse Unix domain num(%s) failed: %s", fields[netUnixKernelPtrIdx], err)
+	}
+	users, err := u.parseUsers(fields[netUnixRefCountIdx])
+	if err != nil {
+		return nil, fmt.Errorf("Parse Unix domain ref count(%s) failed: %s", fields[netUnixRefCountIdx], err)
+	}
+	flags, err := u.parseFlags(fields[netUnixFlagsIdx])
+	if err != nil {
+		return nil, fmt.Errorf("Parse Unix domain flags(%s) failed: %s", fields[netUnixFlagsIdx], err)
+	}
+	typ, err := u.parseType(fields[netUnixTypeIdx])
+	if err != nil {
+		return nil, fmt.Errorf("Parse Unix domain type(%s) failed: %s", fields[netUnixTypeIdx], err)
+	}
+	state, err := u.parseState(fields[netUnixStateIdx])
+	if err != nil {
+		return nil, fmt.Errorf("Parse Unix domain state(%s) failed: %s", fields[netUnixStateIdx], err)
+	}
+	var inode uint64
+	if hasInode {
+		inodeStr := fields[netUnixInodeIdx]
+		inode, err = u.parseInode(inodeStr)
+		if err != nil {
+			return nil, fmt.Errorf("Parse Unix domain inode(%s) failed: %s", inodeStr, err)
+		}
+	}
+
+	nuLine := &NetUnixLine{
+		KernelPtr: kernelPtr,
+		RefCount:  users,
+		Type:      typ,
+		Flags:     flags,
+		State:     state,
+		Inode:     inode,
+	}
+
+	// Path field is optional.
+	if fieldsLen > minFieldsCnt {
+		pathIdx := netUnixInodeIdx + 1
+		if !hasInode {
+			pathIdx--
+		}
+		nuLine.Path = fields[pathIdx]
+	}
+
+	return nuLine, nil
+}
+
+func (u NetUnix) parseKernelPtr(str string) (string, error) {
+	if !strings.HasSuffix(str, ":") {
+		return "", errInvalidKernelPtrFmt
+	}
+	return str[:len(str)-1], nil
+}
+
+func (u NetUnix) parseUsers(hexStr string) (uint64, error) {
+	return strconv.ParseUint(hexStr, 16, 32)
+}
+
+func (u NetUnix) parseProtocol(hexStr string) (uint64, error) {
+	return strconv.ParseUint(hexStr, 16, 32)
+}
+
+func (u NetUnix) parseType(hexStr string) (NetUnixType, error) {
+	typ, err := strconv.ParseUint(hexStr, 16, 16)
+	if err != nil {
+		return 0, err
+	}
+	return NetUnixType(typ), nil
+}
+
+func (u NetUnix) parseFlags(hexStr string) (NetUnixFlags, error) {
+	flags, err := strconv.ParseUint(hexStr, 16, 32)
+	if err != nil {
+		return 0, err
+	}
+	return NetUnixFlags(flags), nil
+}
+
+func (u NetUnix) parseState(hexStr string) (NetUnixState, error) {
+	st, err := strconv.ParseInt(hexStr, 16, 8)
+	if err != nil {
+		return 0, err
+	}
+	return NetUnixState(st), nil
+}
+
+func (u NetUnix) parseInode(inodeStr string) (uint64, error) {
+	return strconv.ParseUint(inodeStr, 10, 64)
+}
+
+func (t NetUnixType) String() string {
+	switch t {
+	case netUnixTypeStream:
+		return "stream"
+	case netUnixTypeDgram:
+		return "dgram"
+	case netUnixTypeSeqpacket:
+		return "seqpacket"
+	}
+	return "unknown"
+}
+
+func (f NetUnixFlags) String() string {
+	switch f {
+	case netUnixFlagListen:
+		return "listen"
+	default:
+		return "default"
+	}
+}
+
+func (s NetUnixState) String() string {
+	switch s {
+	case netUnixStateUnconnected:
+		return "unconnected"
+	case netUnixStateConnecting:
+		return "connecting"
+	case netUnixStateConnected:
+		return "connected"
+	case netUnixStateDisconnected:
+		return "disconnected"
+	}
+	return "unknown"
+}
diff --git a/vendor/github.com/prometheus/procfs/proc.go b/vendor/github.com/prometheus/procfs/proc.go
index 8e38493a8..8a8430147 100644
--- a/vendor/github.com/prometheus/procfs/proc.go
+++ b/vendor/github.com/prometheus/procfs/proc.go
@@ -54,7 +54,7 @@ func NewProc(pid int) (Proc, error) {
 	if err != nil {
 		return Proc{}, err
 	}
-	return fs.NewProc(pid)
+	return fs.Proc(pid)
 }
 
 // AllProcs returns a list of all currently available processes under /proc.
@@ -76,11 +76,18 @@ func (fs FS) Self() (Proc, error) {
 	if err != nil {
 		return Proc{}, err
 	}
-	return fs.NewProc(pid)
+	return fs.Proc(pid)
 }
 
 // NewProc returns a process for the given pid.
+//
+// Deprecated: use fs.Proc() instead
 func (fs FS) NewProc(pid int) (Proc, error) {
+	return fs.Proc(pid)
+}
+
+// Proc returns a process for the given pid.
+func (fs FS) Proc(pid int) (Proc, error) {
 	if _, err := os.Stat(fs.proc.Path(strconv.Itoa(pid))); err != nil {
 		return Proc{}, err
 	}
diff --git a/vendor/github.com/prometheus/procfs/proc_io.go b/vendor/github.com/prometheus/procfs/proc_io.go
index 0251c83bf..0ff89b1ce 100644
--- a/vendor/github.com/prometheus/procfs/proc_io.go
+++ b/vendor/github.com/prometheus/procfs/proc_io.go
@@ -39,8 +39,8 @@ type ProcIO struct {
 	CancelledWriteBytes int64
 }
 
-// NewIO creates a new ProcIO instance from a given Proc instance.
-func (p Proc) NewIO() (ProcIO, error) {
+// IO creates a new ProcIO instance from a given Proc instance.
+func (p Proc) IO() (ProcIO, error) {
 	pio := ProcIO{}
 
 	f, err := os.Open(p.path("io"))
diff --git a/vendor/github.com/prometheus/procfs/proc_limits.go b/vendor/github.com/prometheus/procfs/proc_limits.go
index f04ba6fda..91ee24df8 100644
--- a/vendor/github.com/prometheus/procfs/proc_limits.go
+++ b/vendor/github.com/prometheus/procfs/proc_limits.go
@@ -78,7 +78,14 @@ var (
 )
 
 // NewLimits returns the current soft limits of the process.
+//
+// Deprecated: use p.Limits() instead
 func (p Proc) NewLimits() (ProcLimits, error) {
+	return p.Limits()
+}
+
+// Limits returns the current soft limits of the process.
+func (p Proc) Limits() (ProcLimits, error) {
 	f, err := os.Open(p.path("limits"))
 	if err != nil {
 		return ProcLimits{}, err
diff --git a/vendor/github.com/prometheus/procfs/proc_ns.go b/vendor/github.com/prometheus/procfs/proc_ns.go
index d06c26eba..c66740ff7 100644
--- a/vendor/github.com/prometheus/procfs/proc_ns.go
+++ b/vendor/github.com/prometheus/procfs/proc_ns.go
@@ -29,9 +29,9 @@ type Namespace struct {
 // Namespaces contains all of the namespaces that the process is contained in.
 type Namespaces map[string]Namespace
 
-// NewNamespaces reads from /proc/[pid/ns/* to get the namespaces of which the
+// Namespaces reads from /proc/<pid>/ns/* to get the namespaces of which the
 // process is a member.
-func (p Proc) NewNamespaces() (Namespaces, error) {
+func (p Proc) Namespaces() (Namespaces, error) {
 	d, err := os.Open(p.path("ns"))
 	if err != nil {
 		return nil, err
diff --git a/vendor/github.com/prometheus/procfs/proc_psi.go b/vendor/github.com/prometheus/procfs/proc_psi.go
index a23d4c0f0..46fe26626 100644
--- a/vendor/github.com/prometheus/procfs/proc_psi.go
+++ b/vendor/github.com/prometheus/procfs/proc_psi.go
@@ -51,19 +51,10 @@ type PSIStats struct {
 	Full *PSILine
 }
 
-// NewPSIStatsForResource reads pressure stall information for the specified
-// resource. At time of writing this can be either "cpu", "memory" or "io".
-func NewPSIStatsForResource(resource string) (PSIStats, error) {
-	fs, err := NewFS(DefaultMountPoint)
-	if err != nil {
-		return PSIStats{}, err
-	}
-
-	return fs.NewPSIStatsForResource(resource)
-}
-
-// NewPSIStatsForResource reads pressure stall information from /proc/pressure/<resource>
-func (fs FS) NewPSIStatsForResource(resource string) (PSIStats, error) {
+// PSIStatsForResource reads pressure stall information for the specified
+// resource from /proc/pressure/<resource>. At time of writing this can be
+// either "cpu", "memory" or "io".
+func (fs FS) PSIStatsForResource(resource string) (PSIStats, error) {
 	file, err := os.Open(fs.proc.Path(fmt.Sprintf("%s/%s", "pressure", resource)))
 	if err != nil {
 		return PSIStats{}, fmt.Errorf("psi_stats: unavailable for %s", resource)
diff --git a/vendor/github.com/prometheus/procfs/proc_stat.go b/vendor/github.com/prometheus/procfs/proc_stat.go
index 4c8b03ced..6ed98a8ae 100644
--- a/vendor/github.com/prometheus/procfs/proc_stat.go
+++ b/vendor/github.com/prometheus/procfs/proc_stat.go
@@ -105,7 +105,14 @@ type ProcStat struct {
 }
 
 // NewStat returns the current status information of the process.
+//
+// Deprecated: use NewStat() instead
 func (p Proc) NewStat() (ProcStat, error) {
+	return p.Stat()
+}
+
+// Stat returns the current status information of the process.
+func (p Proc) Stat() (ProcStat, error) {
 	f, err := os.Open(p.path("stat"))
 	if err != nil {
 		return ProcStat{}, err
@@ -178,7 +185,7 @@ func (s ProcStat) ResidentMemory() int {
 // StartTime returns the unix timestamp of the process in seconds.
 func (s ProcStat) StartTime() (float64, error) {
 	fs := FS{proc: s.proc}
-	stat, err := fs.NewStat()
+	stat, err := fs.Stat()
 	if err != nil {
 		return 0, err
 	}
diff --git a/vendor/github.com/prometheus/procfs/proc_status.go b/vendor/github.com/prometheus/procfs/proc_status.go
new file mode 100644
index 000000000..6b4b61f71
--- /dev/null
+++ b/vendor/github.com/prometheus/procfs/proc_status.go
@@ -0,0 +1,162 @@
+// Copyright 2018 The Prometheus Authors
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package procfs
+
+import (
+	"bytes"
+	"io/ioutil"
+	"os"
+	"strconv"
+	"strings"
+)
+
+// ProcStat provides status information about the process,
+// read from /proc/[pid]/stat.
+type ProcStatus struct {
+	// The process ID.
+	PID int
+	// The process name.
+	Name string
+
+	// Peak virtual memory size.
+	VmPeak uint64
+	// Virtual memory size.
+	VmSize uint64
+	// Locked memory size.
+	VmLck uint64
+	// Pinned memory size.
+	VmPin uint64
+	// Peak resident set size.
+	VmHWM uint64
+	// Resident set size (sum of RssAnnon RssFile and RssShmem).
+	VmRSS uint64
+	// Size of resident anonymous memory.
+	RssAnon uint64
+	// Size of resident file mappings.
+	RssFile uint64
+	// Size of resident shared memory.
+	RssShmem uint64
+	// Size of data segments.
+	VmData uint64
+	// Size of stack segments.
+	VmStk uint64
+	// Size of text segments.
+	VmExe uint64
+	// Shared library code size.
+	VmLib uint64
+	// Page table entries size.
+	VmPTE uint64
+	// Size of second-level page tables.
+	VmPMD uint64
+	// Swapped-out virtual memory size by anonymous private.
+	VmSwap uint64
+	// Size of hugetlb memory portions
+	HugetlbPages uint64
+
+	// Number of voluntary context switches.
+	VoluntaryCtxtSwitches uint64
+	// Number of involuntary context switches.
+	NonVoluntaryCtxtSwitches uint64
+}
+
+// NewStatus returns the current status information of the process.
+func (p Proc) NewStatus() (ProcStatus, error) {
+	f, err := os.Open(p.path("status"))
+	if err != nil {
+		return ProcStatus{}, err
+	}
+	defer f.Close()
+
+	data, err := ioutil.ReadAll(f)
+	if err != nil {
+		return ProcStatus{}, err
+	}
+
+	s := ProcStatus{PID: p.PID}
+
+	lines := strings.Split(string(data), "\n")
+	for _, line := range lines {
+		if !bytes.Contains([]byte(line), []byte(":")) {
+			continue
+		}
+
+		kv := strings.SplitN(line, ":", 2)
+
+		// removes spaces
+		k := string(strings.TrimSpace(kv[0]))
+		v := string(strings.TrimSpace(kv[1]))
+		// removes "kB"
+		v = string(bytes.Trim([]byte(v), " kB"))
+
+		// value to int when possible
+		// we can skip error check here, 'cause vKBytes is not used when value is a string
+		vKBytes, _ := strconv.ParseUint(v, 10, 64)
+		// convert kB to B
+		vBytes := vKBytes * 1024
+
+		s.fillStatus(k, v, vKBytes, vBytes)
+	}
+
+	return s, nil
+}
+
+func (s *ProcStatus) fillStatus(k string, vString string, vUint uint64, vUintBytes uint64) {
+	switch k {
+	case "Name":
+		s.Name = vString
+	case "VmPeak":
+		s.VmPeak = vUintBytes
+	case "VmSize":
+		s.VmSize = vUintBytes
+	case "VmLck":
+		s.VmLck = vUintBytes
+	case "VmPin":
+		s.VmPin = vUintBytes
+	case "VmHWM":
+		s.VmHWM = vUintBytes
+	case "VmRSS":
+		s.VmRSS = vUintBytes
+	case "RssAnon":
+		s.RssAnon = vUintBytes
+	case "RssFile":
+		s.RssFile = vUintBytes
+	case "RssShmem":
+		s.RssShmem = vUintBytes
+	case "VmData":
+		s.VmData = vUintBytes
+	case "VmStk":
+		s.VmStk = vUintBytes
+	case "VmExe":
+		s.VmExe = vUintBytes
+	case "VmLib":
+		s.VmLib = vUintBytes
+	case "VmPTE":
+		s.VmPTE = vUintBytes
+	case "VmPMD":
+		s.VmPMD = vUintBytes
+	case "VmSwap":
+		s.VmSwap = vUintBytes
+	case "HugetlbPages":
+		s.HugetlbPages = vUintBytes
+	case "voluntary_ctxt_switches":
+		s.VoluntaryCtxtSwitches = vUint
+	case "nonvoluntary_ctxt_switches":
+		s.NonVoluntaryCtxtSwitches = vUint
+	}
+}
+
+// TotalCtxtSwitches returns the total context switch.
+func (s ProcStatus) TotalCtxtSwitches() uint64 {
+	return s.VoluntaryCtxtSwitches + s.NonVoluntaryCtxtSwitches
+}
diff --git a/vendor/github.com/prometheus/procfs/stat.go b/vendor/github.com/prometheus/procfs/stat.go
index 44c9af1b0..6661ee03a 100644
--- a/vendor/github.com/prometheus/procfs/stat.go
+++ b/vendor/github.com/prometheus/procfs/stat.go
@@ -20,6 +20,8 @@ import (
 	"os"
 	"strconv"
 	"strings"
+
+	"github.com/prometheus/procfs/internal/fs"
 )
 
 // CPUStat shows how much time the cpu spend in various stages.
@@ -78,16 +80,6 @@ type Stat struct {
 	SoftIRQ SoftIRQStat
 }
 
-// NewStat returns kernel/system statistics read from /proc/stat.
-func NewStat() (Stat, error) {
-	fs, err := NewFS(DefaultMountPoint)
-	if err != nil {
-		return Stat{}, err
-	}
-
-	return fs.NewStat()
-}
-
 // Parse a cpu statistics line and returns the CPUStat struct plus the cpu id (or -1 for the overall sum).
 func parseCPUStat(line string) (CPUStat, int64, error) {
 	cpuStat := CPUStat{}
@@ -149,9 +141,29 @@ func parseSoftIRQStat(line string) (SoftIRQStat, uint64, error) {
 	return softIRQStat, total, nil
 }
 
-// NewStat returns an information about current kernel/system statistics.
+// NewStat returns information about current cpu/process statistics.
+// See https://www.kernel.org/doc/Documentation/filesystems/proc.txt
+//
+// Deprecated: use fs.Stat() instead
+func NewStat() (Stat, error) {
+	fs, err := NewFS(fs.DefaultProcMountPoint)
+	if err != nil {
+		return Stat{}, err
+	}
+	return fs.Stat()
+}
+
+// NewStat returns information about current cpu/process statistics.
+// See https://www.kernel.org/doc/Documentation/filesystems/proc.txt
+//
+// Deprecated: use fs.Stat() instead
 func (fs FS) NewStat() (Stat, error) {
-	// See https://www.kernel.org/doc/Documentation/filesystems/proc.txt
+	return fs.Stat()
+}
+
+// Stat returns information about current cpu/process statistics.
+// See https://www.kernel.org/doc/Documentation/filesystems/proc.txt
+func (fs FS) Stat() (Stat, error) {
 
 	f, err := os.Open(fs.proc.Path("stat"))
 	if err != nil {
diff --git a/vendor/github.com/prometheus/procfs/ttar b/vendor/github.com/prometheus/procfs/ttar
index b0171a12b..19ef02b8d 100644
--- a/vendor/github.com/prometheus/procfs/ttar
+++ b/vendor/github.com/prometheus/procfs/ttar
@@ -86,8 +86,10 @@ Usage:   $bname [-C <DIR>] -c -f <ARCHIVE> <FILE...> (create archive)
          $bname [-C <DIR>] -x -f <ARCHIVE>           (extract archive)
 
 Options:
-         -C <DIR>                                    (change directory)
-         -v                                          (verbose)
+         -C <DIR>           (change directory)
+         -v                 (verbose)
+         --recursive-unlink (recursively delete existing directory if path
+                             collides with file or directory to extract)
 
 Example: Change to sysfs directory, create ttar file from fixtures directory
          $bname -C sysfs -c -f sysfs/fixtures.ttar fixtures/
@@ -111,8 +113,9 @@ function set_cmd {
 }
 
 unset VERBOSE
+unset RECURSIVE_UNLINK
 
-while getopts :cf:htxvC: opt; do
+while getopts :cf:-:htxvC: opt; do
     case $opt in
         c)
             set_cmd "create"
@@ -136,6 +139,18 @@ while getopts :cf:htxvC: opt; do
         C)
             CDIR=$OPTARG
             ;;
+        -)
+            case $OPTARG in
+                recursive-unlink)
+                    RECURSIVE_UNLINK="yes"
+                    ;;
+                *)
+                    echo -e "Error: invalid option -$OPTARG"
+                    echo
+                    usage 1
+                    ;;
+            esac
+            ;;
         *)
             echo >&2 "ERROR: invalid option -$OPTARG"
             echo
@@ -212,16 +227,16 @@ function extract {
         local eof_without_newline
         if [ "$size" -gt 0 ]; then
             if [[ "$line" =~ [^\\]EOF ]]; then
-                # An EOF not preceeded by a backslash indicates that the line
+                # An EOF not preceded by a backslash indicates that the line
                 # does not end with a newline
                 eof_without_newline=1
             else
                 eof_without_newline=0
             fi
             # Replace NULLBYTE with null byte if at beginning of line
-            # Replace NULLBYTE with null byte unless preceeded by backslash
+            # Replace NULLBYTE with null byte unless preceded by backslash
             # Remove one backslash in front of NULLBYTE (if any)
-            # Remove EOF unless preceeded by backslash
+            # Remove EOF unless preceded by backslash
             # Remove one backslash in front of EOF
             if [ $USE_PYTHON -eq 1 ]; then
                 echo -n "$line" | python -c "$PYTHON_EXTRACT_FILTER" >> "$path"
@@ -245,7 +260,16 @@ function extract {
         fi
         if [[ $line =~ ^Path:\ (.*)$ ]]; then
             path=${BASH_REMATCH[1]}
-            if [ -e "$path" ] || [ -L "$path" ]; then
+            if [ -L "$path" ]; then
+                rm "$path"
+            elif [ -d "$path" ]; then
+                if [ "${RECURSIVE_UNLINK:-}" == "yes" ]; then
+                    rm -r "$path"
+                else
+                    # Safe because symlinks to directories are dealt with above
+                    rmdir "$path"
+                fi
+            elif [ -e "$path" ]; then
                 rm "$path"
             fi
         elif [[ $line =~ ^Lines:\ (.*)$ ]]; then
@@ -338,8 +362,8 @@ function _create {
             else
                 < "$file" \
                     sed 's/EOF/\\EOF/g;
-                         s/NULLBYTE/\\NULLBYTE/g;
-                         s/\x0/NULLBYTE/g;
+                            s/NULLBYTE/\\NULLBYTE/g;
+                            s/\x0/NULLBYTE/g;
                     '
             fi
             if [[ "$eof_without_newline" -eq 1 ]]; then
diff --git a/vendor/github.com/prometheus/prometheus/LICENSE b/vendor/github.com/prometheus/prometheus/LICENSE
new file mode 100644
index 000000000..261eeb9e9
--- /dev/null
+++ b/vendor/github.com/prometheus/prometheus/LICENSE
@@ -0,0 +1,201 @@
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
diff --git a/vendor/github.com/prometheus/prometheus/NOTICE b/vendor/github.com/prometheus/prometheus/NOTICE
new file mode 100644
index 000000000..47de2415e
--- /dev/null
+++ b/vendor/github.com/prometheus/prometheus/NOTICE
@@ -0,0 +1,87 @@
+The Prometheus systems and service monitoring server
+Copyright 2012-2015 The Prometheus Authors
+
+This product includes software developed at
+SoundCloud Ltd. (http://soundcloud.com/).
+
+
+The following components are included in this product:
+
+Bootstrap
+http://getbootstrap.com
+Copyright 2011-2014 Twitter, Inc.
+Licensed under the MIT License
+
+bootstrap3-typeahead.js
+https://github.com/bassjobsen/Bootstrap-3-Typeahead
+Original written by @mdo and @fat
+Copyright 2014 Bass Jobsen @bassjobsen
+Licensed under the Apache License, Version 2.0
+
+fuzzy
+https://github.com/mattyork/fuzzy
+Original written by @mattyork
+Copyright 2012 Matt York
+Licensed under the MIT License
+
+bootstrap-datetimepicker.js
+https://github.com/Eonasdan/bootstrap-datetimepicker
+Copyright 2015 Jonathan Peterson (@Eonasdan)
+Licensed under the MIT License
+
+moment.js
+https://github.com/moment/moment/
+Copyright JS Foundation and other contributors
+Licensed under the MIT License
+
+Rickshaw
+https://github.com/shutterstock/rickshaw
+Copyright 2011-2014 by Shutterstock Images, LLC
+See https://github.com/shutterstock/rickshaw/blob/master/LICENSE for license details
+
+mustache.js
+https://github.com/janl/mustache.js
+Copyright 2009 Chris Wanstrath (Ruby)
+Copyright 2010-2014 Jan Lehnardt (JavaScript)
+Copyright 2010-2015 The mustache.js community
+Licensed under the MIT License
+
+jQuery
+https://jquery.org
+Copyright jQuery Foundation and other contributors
+Licensed under the MIT License
+
+Go support for Protocol Buffers - Google's data interchange format
+http://github.com/golang/protobuf/
+Copyright 2010 The Go Authors
+See source code for license details.
+
+Go support for leveled logs, analogous to
+https://code.google.com/p/google-glog/
+Copyright 2013 Google Inc.
+Licensed under the Apache License, Version 2.0
+
+Support for streaming Protocol Buffer messages for the Go language (golang).
+https://github.com/matttproud/golang_protobuf_extensions
+Copyright 2013 Matt T. Proud
+Licensed under the Apache License, Version 2.0
+
+DNS library in Go
+http://miek.nl/posts/2014/Aug/16/go-dns-package/
+Copyright 2009 The Go Authors, 2011 Miek Gieben
+See https://github.com/miekg/dns/blob/master/LICENSE for license details.
+
+LevelDB key/value database in Go
+https://github.com/syndtr/goleveldb
+Copyright 2012 Suryandaru Triandana
+See https://github.com/syndtr/goleveldb/blob/master/LICENSE for license details.
+
+gosnappy - a fork of code.google.com/p/snappy-go
+https://github.com/syndtr/gosnappy
+Copyright 2011 The Snappy-Go Authors
+See https://github.com/syndtr/gosnappy/blob/master/LICENSE for license details.
+
+go-zookeeper - Native ZooKeeper client for Go
+https://github.com/samuel/go-zookeeper
+Copyright (c) 2013, Samuel Stauffer <samuel@descolada.com>
+See https://github.com/samuel/go-zookeeper/blob/master/LICENSE for license details.
diff --git a/vendor/github.com/prometheus/prometheus/promql/ast.go b/vendor/github.com/prometheus/prometheus/promql/ast.go
new file mode 100644
index 000000000..b3ccd2570
--- /dev/null
+++ b/vendor/github.com/prometheus/prometheus/promql/ast.go
@@ -0,0 +1,317 @@
+// Copyright 2015 The Prometheus Authors
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package promql
+
+import (
+	"fmt"
+	"time"
+
+	"github.com/prometheus/common/model"
+
+	"github.com/prometheus/prometheus/storage/local"
+	"github.com/prometheus/prometheus/storage/metric"
+)
+
+// Node is a generic interface for all nodes in an AST.
+//
+// Whenever numerous nodes are listed such as in a switch-case statement
+// or a chain of function definitions (e.g. String(), expr(), etc.) convention is
+// to list them as follows:
+//
+// 	- Statements
+// 	- statement types (alphabetical)
+// 	- ...
+// 	- Expressions
+// 	- expression types (alphabetical)
+// 	- ...
+//
+type Node interface {
+	// String representation of the node that returns the given node when parsed
+	// as part of a valid query.
+	String() string
+}
+
+// Statement is a generic interface for all statements.
+type Statement interface {
+	Node
+
+	// stmt ensures that no other type accidentally implements the interface
+	stmt()
+}
+
+// Statements is a list of statement nodes that implements Node.
+type Statements []Statement
+
+// AlertStmt represents an added alert rule.
+type AlertStmt struct {
+	Name        string
+	Expr        Expr
+	Duration    time.Duration
+	Labels      model.LabelSet
+	Annotations model.LabelSet
+}
+
+// EvalStmt holds an expression and information on the range it should
+// be evaluated on.
+type EvalStmt struct {
+	Expr Expr // Expression to be evaluated.
+
+	// The time boundaries for the evaluation. If Start equals End an instant
+	// is evaluated.
+	Start, End model.Time
+	// Time between two evaluated instants for the range [Start:End].
+	Interval time.Duration
+}
+
+// RecordStmt represents an added recording rule.
+type RecordStmt struct {
+	Name   string
+	Expr   Expr
+	Labels model.LabelSet
+}
+
+func (*AlertStmt) stmt()  {}
+func (*EvalStmt) stmt()   {}
+func (*RecordStmt) stmt() {}
+
+// Expr is a generic interface for all expression types.
+type Expr interface {
+	Node
+
+	// Type returns the type the expression evaluates to. It does not perform
+	// in-depth checks as this is done at parsing-time.
+	Type() model.ValueType
+	// expr ensures that no other types accidentally implement the interface.
+	expr()
+}
+
+// Expressions is a list of expression nodes that implements Node.
+type Expressions []Expr
+
+// AggregateExpr represents an aggregation operation on a vector.
+type AggregateExpr struct {
+	Op               itemType         // The used aggregation operation.
+	Expr             Expr             // The vector expression over which is aggregated.
+	Param            Expr             // Parameter used by some aggregators.
+	Grouping         model.LabelNames // The labels by which to group the vector.
+	Without          bool             // Whether to drop the given labels rather than keep them.
+	KeepCommonLabels bool             // Whether to keep common labels among result elements.
+}
+
+// BinaryExpr represents a binary expression between two child expressions.
+type BinaryExpr struct {
+	Op       itemType // The operation of the expression.
+	LHS, RHS Expr     // The operands on the respective sides of the operator.
+
+	// The matching behavior for the operation if both operands are vectors.
+	// If they are not this field is nil.
+	VectorMatching *VectorMatching
+
+	// If a comparison operator, return 0/1 rather than filtering.
+	ReturnBool bool
+}
+
+// Call represents a function call.
+type Call struct {
+	Func *Function   // The function that was called.
+	Args Expressions // Arguments used in the call.
+}
+
+// MatrixSelector represents a matrix selection.
+type MatrixSelector struct {
+	Name          string
+	Range         time.Duration
+	Offset        time.Duration
+	LabelMatchers metric.LabelMatchers
+
+	// The series iterators are populated at query preparation time.
+	iterators []local.SeriesIterator
+}
+
+// NumberLiteral represents a number.
+type NumberLiteral struct {
+	Val model.SampleValue
+}
+
+// ParenExpr wraps an expression so it cannot be disassembled as a consequence
+// of operator precedence.
+type ParenExpr struct {
+	Expr Expr
+}
+
+// StringLiteral represents a string.
+type StringLiteral struct {
+	Val string
+}
+
+// UnaryExpr represents a unary operation on another expression.
+// Currently unary operations are only supported for scalars.
+type UnaryExpr struct {
+	Op   itemType
+	Expr Expr
+}
+
+// VectorSelector represents a vector selection.
+type VectorSelector struct {
+	Name          string
+	Offset        time.Duration
+	LabelMatchers metric.LabelMatchers
+
+	// The series iterators are populated at query preparation time.
+	iterators []local.SeriesIterator
+}
+
+func (e *AggregateExpr) Type() model.ValueType  { return model.ValVector }
+func (e *Call) Type() model.ValueType           { return e.Func.ReturnType }
+func (e *MatrixSelector) Type() model.ValueType { return model.ValMatrix }
+func (e *NumberLiteral) Type() model.ValueType  { return model.ValScalar }
+func (e *ParenExpr) Type() model.ValueType      { return e.Expr.Type() }
+func (e *StringLiteral) Type() model.ValueType  { return model.ValString }
+func (e *UnaryExpr) Type() model.ValueType      { return e.Expr.Type() }
+func (e *VectorSelector) Type() model.ValueType { return model.ValVector }
+func (e *BinaryExpr) Type() model.ValueType {
+	if e.LHS.Type() == model.ValScalar && e.RHS.Type() == model.ValScalar {
+		return model.ValScalar
+	}
+	return model.ValVector
+}
+
+func (*AggregateExpr) expr()  {}
+func (*BinaryExpr) expr()     {}
+func (*Call) expr()           {}
+func (*MatrixSelector) expr() {}
+func (*NumberLiteral) expr()  {}
+func (*ParenExpr) expr()      {}
+func (*StringLiteral) expr()  {}
+func (*UnaryExpr) expr()      {}
+func (*VectorSelector) expr() {}
+
+// VectorMatchCardinality describes the cardinality relationship
+// of two vectors in a binary operation.
+type VectorMatchCardinality int
+
+const (
+	CardOneToOne VectorMatchCardinality = iota
+	CardManyToOne
+	CardOneToMany
+	CardManyToMany
+)
+
+func (vmc VectorMatchCardinality) String() string {
+	switch vmc {
+	case CardOneToOne:
+		return "one-to-one"
+	case CardManyToOne:
+		return "many-to-one"
+	case CardOneToMany:
+		return "one-to-many"
+	case CardManyToMany:
+		return "many-to-many"
+	}
+	panic("promql.VectorMatchCardinality.String: unknown match cardinality")
+}
+
+// VectorMatching describes how elements from two vectors in a binary
+// operation are supposed to be matched.
+type VectorMatching struct {
+	// The cardinality of the two vectors.
+	Card VectorMatchCardinality
+	// MatchingLabels contains the labels which define equality of a pair of
+	// elements from the vectors.
+	MatchingLabels model.LabelNames
+	// On includes the given label names from matching,
+	// rather than excluding them.
+	On bool
+	// Include contains additional labels that should be included in
+	// the result from the side with the lower cardinality.
+	Include model.LabelNames
+}
+
+// Visitor allows visiting a Node and its child nodes. The Visit method is
+// invoked for each node encountered by Walk. If the result visitor w is not
+// nil, Walk visits each of the children of node with the visitor w, followed
+// by a call of w.Visit(nil).
+type Visitor interface {
+	Visit(node Node) (w Visitor)
+}
+
+// Walk traverses an AST in depth-first order: It starts by calling
+// v.Visit(node); node must not be nil. If the visitor w returned by
+// v.Visit(node) is not nil, Walk is invoked recursively with visitor
+// w for each of the non-nil children of node, followed by a call of
+// w.Visit(nil).
+func Walk(v Visitor, node Node) {
+	if v = v.Visit(node); v == nil {
+		return
+	}
+
+	switch n := node.(type) {
+	case Statements:
+		for _, s := range n {
+			Walk(v, s)
+		}
+	case *AlertStmt:
+		Walk(v, n.Expr)
+
+	case *EvalStmt:
+		Walk(v, n.Expr)
+
+	case *RecordStmt:
+		Walk(v, n.Expr)
+
+	case Expressions:
+		for _, e := range n {
+			Walk(v, e)
+		}
+	case *AggregateExpr:
+		Walk(v, n.Expr)
+
+	case *BinaryExpr:
+		Walk(v, n.LHS)
+		Walk(v, n.RHS)
+
+	case *Call:
+		Walk(v, n.Args)
+
+	case *ParenExpr:
+		Walk(v, n.Expr)
+
+	case *UnaryExpr:
+		Walk(v, n.Expr)
+
+	case *MatrixSelector, *NumberLiteral, *StringLiteral, *VectorSelector:
+		// nothing to do
+
+	default:
+		panic(fmt.Errorf("promql.Walk: unhandled node type %T", node))
+	}
+
+	v.Visit(nil)
+}
+
+type inspector func(Node) bool
+
+func (f inspector) Visit(node Node) Visitor {
+	if f(node) {
+		return f
+	}
+	return nil
+}
+
+// Inspect traverses an AST in depth-first order: It starts by calling
+// f(node); node must not be nil. If f returns true, Inspect invokes f
+// for all the non-nil children of node, recursively.
+func Inspect(node Node, f func(Node) bool) {
+	Walk(inspector(f), node)
+}
diff --git a/vendor/github.com/prometheus/prometheus/promql/engine.go b/vendor/github.com/prometheus/prometheus/promql/engine.go
new file mode 100644
index 000000000..63c0b9606
--- /dev/null
+++ b/vendor/github.com/prometheus/prometheus/promql/engine.go
@@ -0,0 +1,1436 @@
+// Copyright 2013 The Prometheus Authors
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package promql
+
+import (
+	"container/heap"
+	"fmt"
+	"math"
+	"runtime"
+	"sort"
+	"time"
+
+	opentracing "github.com/opentracing/opentracing-go"
+	"github.com/prometheus/client_golang/prometheus"
+	"github.com/prometheus/common/log"
+	"github.com/prometheus/common/model"
+	"golang.org/x/net/context"
+
+	"github.com/prometheus/prometheus/storage/local"
+	"github.com/prometheus/prometheus/storage/metric"
+	"github.com/prometheus/prometheus/util/stats"
+)
+
+const (
+	namespace = "prometheus"
+	subsystem = "engine"
+	queryTag  = "query"
+
+	// The largest SampleValue that can be converted to an int64 without overflow.
+	maxInt64 model.SampleValue = 9223372036854774784
+	// The smallest SampleValue that can be converted to an int64 without underflow.
+	minInt64 model.SampleValue = -9223372036854775808
+)
+
+var (
+	currentQueries = prometheus.NewGauge(prometheus.GaugeOpts{
+		Namespace: namespace,
+		Subsystem: subsystem,
+		Name:      "queries",
+		Help:      "The current number of queries being executed or waiting.",
+	})
+	maxConcurrentQueries = prometheus.NewGauge(prometheus.GaugeOpts{
+		Namespace: namespace,
+		Subsystem: subsystem,
+		Name:      "queries_concurrent_max",
+		Help:      "The max number of concurrent queries.",
+	})
+	queryPrepareTime = prometheus.NewSummary(
+		prometheus.SummaryOpts{
+			Namespace:   namespace,
+			Subsystem:   subsystem,
+			Name:        "query_duration_seconds",
+			Help:        "Query timings",
+			ConstLabels: prometheus.Labels{"slice": "prepare_time"},
+		},
+	)
+	queryInnerEval = prometheus.NewSummary(
+		prometheus.SummaryOpts{
+			Namespace:   namespace,
+			Subsystem:   subsystem,
+			Name:        "query_duration_seconds",
+			Help:        "Query timings",
+			ConstLabels: prometheus.Labels{"slice": "inner_eval"},
+		},
+	)
+	queryResultAppend = prometheus.NewSummary(
+		prometheus.SummaryOpts{
+			Namespace:   namespace,
+			Subsystem:   subsystem,
+			Name:        "query_duration_seconds",
+			Help:        "Query timings",
+			ConstLabels: prometheus.Labels{"slice": "result_append"},
+		},
+	)
+	queryResultSort = prometheus.NewSummary(
+		prometheus.SummaryOpts{
+			Namespace:   namespace,
+			Subsystem:   subsystem,
+			Name:        "query_duration_seconds",
+			Help:        "Query timings",
+			ConstLabels: prometheus.Labels{"slice": "result_sort"},
+		},
+	)
+)
+
+func init() {
+	prometheus.MustRegister(currentQueries)
+	prometheus.MustRegister(maxConcurrentQueries)
+	prometheus.MustRegister(queryPrepareTime)
+	prometheus.MustRegister(queryInnerEval)
+	prometheus.MustRegister(queryResultAppend)
+	prometheus.MustRegister(queryResultSort)
+}
+
+// convertibleToInt64 returns true if v does not over-/underflow an int64.
+func convertibleToInt64(v model.SampleValue) bool {
+	return v <= maxInt64 && v >= minInt64
+}
+
+// sampleStream is a stream of Values belonging to an attached COWMetric.
+type sampleStream struct {
+	Metric metric.Metric
+	Values []model.SamplePair
+}
+
+// sample is a single sample belonging to a COWMetric.
+type sample struct {
+	Metric    metric.Metric
+	Value     model.SampleValue
+	Timestamp model.Time
+}
+
+// vector is basically only an alias for model.Samples, but the
+// contract is that in a Vector, all Samples have the same timestamp.
+type vector []*sample
+
+func (vector) Type() model.ValueType { return model.ValVector }
+func (vec vector) String() string    { return vec.value().String() }
+
+func (vec vector) value() model.Vector {
+	val := make(model.Vector, len(vec))
+	for i, s := range vec {
+		val[i] = &model.Sample{
+			Metric:    s.Metric.Copy().Metric,
+			Value:     s.Value,
+			Timestamp: s.Timestamp,
+		}
+	}
+	return val
+}
+
+// matrix is a slice of SampleStreams that implements sort.Interface and
+// has a String method.
+type matrix []*sampleStream
+
+func (matrix) Type() model.ValueType { return model.ValMatrix }
+func (mat matrix) String() string    { return mat.value().String() }
+
+func (mat matrix) value() model.Matrix {
+	val := make(model.Matrix, len(mat))
+	for i, ss := range mat {
+		val[i] = &model.SampleStream{
+			Metric: ss.Metric.Copy().Metric,
+			Values: ss.Values,
+		}
+	}
+	return val
+}
+
+// Result holds the resulting value of an execution or an error
+// if any occurred.
+type Result struct {
+	Err   error
+	Value model.Value
+}
+
+// Vector returns a vector if the result value is one. An error is returned if
+// the result was an error or the result value is not a vector.
+func (r *Result) Vector() (model.Vector, error) {
+	if r.Err != nil {
+		return nil, r.Err
+	}
+	v, ok := r.Value.(model.Vector)
+	if !ok {
+		return nil, fmt.Errorf("query result is not a vector")
+	}
+	return v, nil
+}
+
+// Matrix returns a matrix. An error is returned if
+// the result was an error or the result value is not a matrix.
+func (r *Result) Matrix() (model.Matrix, error) {
+	if r.Err != nil {
+		return nil, r.Err
+	}
+	v, ok := r.Value.(model.Matrix)
+	if !ok {
+		return nil, fmt.Errorf("query result is not a range vector")
+	}
+	return v, nil
+}
+
+// Scalar returns a scalar value. An error is returned if
+// the result was an error or the result value is not a scalar.
+func (r *Result) Scalar() (*model.Scalar, error) {
+	if r.Err != nil {
+		return nil, r.Err
+	}
+	v, ok := r.Value.(*model.Scalar)
+	if !ok {
+		return nil, fmt.Errorf("query result is not a scalar")
+	}
+	return v, nil
+}
+
+func (r *Result) String() string {
+	if r.Err != nil {
+		return r.Err.Error()
+	}
+	if r.Value == nil {
+		return ""
+	}
+	return r.Value.String()
+}
+
+type (
+	// ErrQueryTimeout is returned if a query timed out during processing.
+	ErrQueryTimeout string
+	// ErrQueryCanceled is returned if a query was canceled during processing.
+	ErrQueryCanceled string
+	// ErrStorage is returned if an error was encountered in the storage layer
+	// during query handling.
+	ErrStorage error
+)
+
+func (e ErrQueryTimeout) Error() string  { return fmt.Sprintf("query timed out in %s", string(e)) }
+func (e ErrQueryCanceled) Error() string { return fmt.Sprintf("query was canceled in %s", string(e)) }
+
+// A Query is derived from an a raw query string and can be run against an engine
+// it is associated with.
+type Query interface {
+	// Exec processes the query and
+	Exec(ctx context.Context) *Result
+	// Statement returns the parsed statement of the query.
+	Statement() Statement
+	// Stats returns statistics about the lifetime of the query.
+	Stats() *stats.TimerGroup
+	// Cancel signals that a running query execution should be aborted.
+	Cancel()
+}
+
+// query implements the Query interface.
+type query struct {
+	// The original query string.
+	q string
+	// Statement of the parsed query.
+	stmt Statement
+	// Timer stats for the query execution.
+	stats *stats.TimerGroup
+	// Cancellation function for the query.
+	cancel func()
+
+	// The engine against which the query is executed.
+	ng *Engine
+}
+
+// Statement implements the Query interface.
+func (q *query) Statement() Statement {
+	return q.stmt
+}
+
+// Stats implements the Query interface.
+func (q *query) Stats() *stats.TimerGroup {
+	return q.stats
+}
+
+// Cancel implements the Query interface.
+func (q *query) Cancel() {
+	if q.cancel != nil {
+		q.cancel()
+	}
+}
+
+// Exec implements the Query interface.
+func (q *query) Exec(ctx context.Context) *Result {
+	if span := opentracing.SpanFromContext(ctx); span != nil {
+		span.SetTag(queryTag, q.stmt.String())
+	}
+
+	res, err := q.ng.exec(ctx, q)
+	return &Result{Err: err, Value: res}
+}
+
+// contextDone returns an error if the context was canceled or timed out.
+func contextDone(ctx context.Context, env string) error {
+	select {
+	case <-ctx.Done():
+		err := ctx.Err()
+		switch err {
+		case context.Canceled:
+			return ErrQueryCanceled(env)
+		case context.DeadlineExceeded:
+			return ErrQueryTimeout(env)
+		default:
+			return err
+		}
+	default:
+		return nil
+	}
+}
+
+// Engine handles the lifetime of queries from beginning to end.
+// It is connected to a querier.
+type Engine struct {
+	// A Querier constructor against an underlying storage.
+	queryable Queryable
+	// The gate limiting the maximum number of concurrent and waiting queries.
+	gate    *queryGate
+	options *EngineOptions
+}
+
+// Queryable allows opening a storage querier.
+type Queryable interface {
+	Querier() (local.Querier, error)
+}
+
+// NewEngine returns a new engine.
+func NewEngine(queryable Queryable, o *EngineOptions) *Engine {
+	if o == nil {
+		o = DefaultEngineOptions
+	}
+	maxConcurrentQueries.Set(float64(o.MaxConcurrentQueries))
+	return &Engine{
+		queryable: queryable,
+		gate:      newQueryGate(o.MaxConcurrentQueries),
+		options:   o,
+	}
+}
+
+// EngineOptions contains configuration parameters for an Engine.
+type EngineOptions struct {
+	MaxConcurrentQueries int
+	Timeout              time.Duration
+}
+
+// DefaultEngineOptions are the default engine options.
+var DefaultEngineOptions = &EngineOptions{
+	MaxConcurrentQueries: 20,
+	Timeout:              2 * time.Minute,
+}
+
+// NewInstantQuery returns an evaluation query for the given expression at the given time.
+func (ng *Engine) NewInstantQuery(qs string, ts model.Time) (Query, error) {
+	expr, err := ParseExpr(qs)
+	if err != nil {
+		return nil, err
+	}
+	qry := ng.newQuery(expr, ts, ts, 0)
+	qry.q = qs
+
+	return qry, nil
+}
+
+// NewRangeQuery returns an evaluation query for the given time range and with
+// the resolution set by the interval.
+func (ng *Engine) NewRangeQuery(qs string, start, end model.Time, interval time.Duration) (Query, error) {
+	expr, err := ParseExpr(qs)
+	if err != nil {
+		return nil, err
+	}
+	if expr.Type() != model.ValVector && expr.Type() != model.ValScalar {
+		return nil, fmt.Errorf("invalid expression type %q for range query, must be scalar or instant vector", documentedType(expr.Type()))
+	}
+	qry := ng.newQuery(expr, start, end, interval)
+	qry.q = qs
+
+	return qry, nil
+}
+
+func (ng *Engine) newQuery(expr Expr, start, end model.Time, interval time.Duration) *query {
+	es := &EvalStmt{
+		Expr:     expr,
+		Start:    start,
+		End:      end,
+		Interval: interval,
+	}
+	qry := &query{
+		stmt:  es,
+		ng:    ng,
+		stats: stats.NewTimerGroup(),
+	}
+	return qry
+}
+
+// testStmt is an internal helper statement that allows execution
+// of an arbitrary function during handling. It is used to test the Engine.
+type testStmt func(context.Context) error
+
+func (testStmt) String() string { return "test statement" }
+func (testStmt) stmt()          {}
+
+func (ng *Engine) newTestQuery(f func(context.Context) error) Query {
+	qry := &query{
+		q:     "test statement",
+		stmt:  testStmt(f),
+		ng:    ng,
+		stats: stats.NewTimerGroup(),
+	}
+	return qry
+}
+
+// exec executes the query.
+//
+// At this point per query only one EvalStmt is evaluated. Alert and record
+// statements are not handled by the Engine.
+func (ng *Engine) exec(ctx context.Context, q *query) (model.Value, error) {
+	currentQueries.Inc()
+	defer currentQueries.Dec()
+	ctx, cancel := context.WithTimeout(ctx, ng.options.Timeout)
+	q.cancel = cancel
+
+	queueTimer := q.stats.GetTimer(stats.ExecQueueTime).Start()
+
+	if err := ng.gate.Start(ctx); err != nil {
+		return nil, err
+	}
+	defer ng.gate.Done()
+
+	queueTimer.Stop()
+
+	// Cancel when execution is done or an error was raised.
+	defer q.cancel()
+
+	const env = "query execution"
+
+	evalTimer := q.stats.GetTimer(stats.TotalEvalTime).Start()
+	defer evalTimer.Stop()
+
+	// The base context might already be canceled on the first iteration (e.g. during shutdown).
+	if err := contextDone(ctx, env); err != nil {
+		return nil, err
+	}
+
+	switch s := q.Statement().(type) {
+	case *EvalStmt:
+		return ng.execEvalStmt(ctx, q, s)
+	case testStmt:
+		return nil, s(ctx)
+	}
+
+	panic(fmt.Errorf("promql.Engine.exec: unhandled statement of type %T", q.Statement()))
+}
+
+// execEvalStmt evaluates the expression of an evaluation statement for the given time range.
+func (ng *Engine) execEvalStmt(ctx context.Context, query *query, s *EvalStmt) (model.Value, error) {
+	querier, err := ng.queryable.Querier()
+	if err != nil {
+		return nil, err
+	}
+	defer querier.Close()
+
+	prepareTimer := query.stats.GetTimer(stats.QueryPreparationTime).Start()
+	err = ng.populateIterators(ctx, querier, s)
+	prepareTimer.Stop()
+	queryPrepareTime.Observe(prepareTimer.ElapsedTime().Seconds())
+
+	if err != nil {
+		return nil, err
+	}
+	defer ng.closeIterators(s)
+
+	evalTimer := query.stats.GetTimer(stats.InnerEvalTime).Start()
+	// Instant evaluation.
+	if s.Start == s.End && s.Interval == 0 {
+		evaluator := &evaluator{
+			Timestamp: s.Start,
+			ctx:       ctx,
+		}
+		val, err := evaluator.Eval(s.Expr)
+		if err != nil {
+			return nil, err
+		}
+
+		// Turn matrix and vector types with protected metrics into
+		// model.* types.
+		switch v := val.(type) {
+		case vector:
+			val = v.value()
+		case matrix:
+			val = v.value()
+		}
+
+		evalTimer.Stop()
+		queryInnerEval.Observe(evalTimer.ElapsedTime().Seconds())
+
+		return val, nil
+	}
+	numSteps := int(s.End.Sub(s.Start) / s.Interval)
+
+	// Range evaluation.
+	sampleStreams := map[model.Fingerprint]*sampleStream{}
+	for ts := s.Start; !ts.After(s.End); ts = ts.Add(s.Interval) {
+
+		if err := contextDone(ctx, "range evaluation"); err != nil {
+			return nil, err
+		}
+
+		evaluator := &evaluator{
+			Timestamp: ts,
+			ctx:       ctx,
+		}
+		val, err := evaluator.Eval(s.Expr)
+		if err != nil {
+			return nil, err
+		}
+
+		switch v := val.(type) {
+		case *model.Scalar:
+			// As the expression type does not change we can safely default to 0
+			// as the fingerprint for scalar expressions.
+			ss := sampleStreams[0]
+			if ss == nil {
+				ss = &sampleStream{Values: make([]model.SamplePair, 0, numSteps)}
+				sampleStreams[0] = ss
+			}
+			ss.Values = append(ss.Values, model.SamplePair{
+				Value:     v.Value,
+				Timestamp: v.Timestamp,
+			})
+		case vector:
+			for _, sample := range v {
+				fp := sample.Metric.Metric.Fingerprint()
+				ss := sampleStreams[fp]
+				if ss == nil {
+					ss = &sampleStream{
+						Metric: sample.Metric,
+						Values: make([]model.SamplePair, 0, numSteps),
+					}
+					sampleStreams[fp] = ss
+				}
+				ss.Values = append(ss.Values, model.SamplePair{
+					Value:     sample.Value,
+					Timestamp: sample.Timestamp,
+				})
+			}
+		default:
+			panic(fmt.Errorf("promql.Engine.exec: invalid expression type %q", val.Type()))
+		}
+	}
+	evalTimer.Stop()
+	queryInnerEval.Observe(evalTimer.ElapsedTime().Seconds())
+
+	if err := contextDone(ctx, "expression evaluation"); err != nil {
+		return nil, err
+	}
+
+	appendTimer := query.stats.GetTimer(stats.ResultAppendTime).Start()
+	mat := matrix{}
+	for _, ss := range sampleStreams {
+		mat = append(mat, ss)
+	}
+	appendTimer.Stop()
+	queryResultAppend.Observe(appendTimer.ElapsedTime().Seconds())
+
+	if err := contextDone(ctx, "expression evaluation"); err != nil {
+		return nil, err
+	}
+
+	// Turn matrix type with protected metric into model.Matrix.
+	resMatrix := mat.value()
+
+	sortTimer := query.stats.GetTimer(stats.ResultSortTime).Start()
+	sort.Sort(resMatrix)
+	sortTimer.Stop()
+	queryResultSort.Observe(sortTimer.ElapsedTime().Seconds())
+	return resMatrix, nil
+}
+
+func (ng *Engine) populateIterators(ctx context.Context, querier local.Querier, s *EvalStmt) error {
+	var queryErr error
+	Inspect(s.Expr, func(node Node) bool {
+		switch n := node.(type) {
+		case *VectorSelector:
+			if s.Start.Equal(s.End) {
+				n.iterators, queryErr = querier.QueryInstant(
+					ctx,
+					s.Start.Add(-n.Offset),
+					StalenessDelta,
+					n.LabelMatchers...,
+				)
+			} else {
+				n.iterators, queryErr = querier.QueryRange(
+					ctx,
+					s.Start.Add(-n.Offset-StalenessDelta),
+					s.End.Add(-n.Offset),
+					n.LabelMatchers...,
+				)
+			}
+			if queryErr != nil {
+				return false
+			}
+		case *MatrixSelector:
+			n.iterators, queryErr = querier.QueryRange(
+				ctx,
+				s.Start.Add(-n.Offset-n.Range),
+				s.End.Add(-n.Offset),
+				n.LabelMatchers...,
+			)
+			if queryErr != nil {
+				return false
+			}
+		}
+		return true
+	})
+	return queryErr
+}
+
+func (ng *Engine) closeIterators(s *EvalStmt) {
+	Inspect(s.Expr, func(node Node) bool {
+		switch n := node.(type) {
+		case *VectorSelector:
+			for _, it := range n.iterators {
+				it.Close()
+			}
+		case *MatrixSelector:
+			for _, it := range n.iterators {
+				it.Close()
+			}
+		}
+		return true
+	})
+}
+
+// An evaluator evaluates given expressions at a fixed timestamp. It is attached to an
+// engine through which it connects to a querier and reports errors. On timeout or
+// cancellation of its context it terminates.
+type evaluator struct {
+	ctx context.Context
+
+	Timestamp model.Time
+}
+
+// fatalf causes a panic with the input formatted into an error.
+func (ev *evaluator) errorf(format string, args ...interface{}) {
+	ev.error(fmt.Errorf(format, args...))
+}
+
+// fatal causes a panic with the given error.
+func (ev *evaluator) error(err error) {
+	panic(err)
+}
+
+// recover is the handler that turns panics into returns from the top level of evaluation.
+func (ev *evaluator) recover(errp *error) {
+	e := recover()
+	if e != nil {
+		if _, ok := e.(runtime.Error); ok {
+			// Print the stack trace but do not inhibit the running application.
+			buf := make([]byte, 64<<10)
+			buf = buf[:runtime.Stack(buf, false)]
+
+			log.Errorf("parser panic: %v\n%s", e, buf)
+			*errp = fmt.Errorf("unexpected error")
+		} else {
+			*errp = e.(error)
+		}
+	}
+}
+
+// evalScalar attempts to evaluate e to a scalar value and errors otherwise.
+func (ev *evaluator) evalScalar(e Expr) *model.Scalar {
+	val := ev.eval(e)
+	sv, ok := val.(*model.Scalar)
+	if !ok {
+		ev.errorf("expected scalar but got %s", documentedType(val.Type()))
+	}
+	return sv
+}
+
+// evalVector attempts to evaluate e to a vector value and errors otherwise.
+func (ev *evaluator) evalVector(e Expr) vector {
+	val := ev.eval(e)
+	vec, ok := val.(vector)
+	if !ok {
+		ev.errorf("expected instant vector but got %s", documentedType(val.Type()))
+	}
+	return vec
+}
+
+// evalInt attempts to evaluate e into an integer and errors otherwise.
+func (ev *evaluator) evalInt(e Expr) int64 {
+	sc := ev.evalScalar(e)
+	if !convertibleToInt64(sc.Value) {
+		ev.errorf("scalar value %v overflows int64", sc.Value)
+	}
+	return int64(sc.Value)
+}
+
+// evalFloat attempts to evaluate e into a float and errors otherwise.
+func (ev *evaluator) evalFloat(e Expr) float64 {
+	sc := ev.evalScalar(e)
+	return float64(sc.Value)
+}
+
+// evalMatrix attempts to evaluate e into a matrix and errors otherwise.
+// The error message uses the term "range vector" to match the user facing
+// documentation.
+func (ev *evaluator) evalMatrix(e Expr) matrix {
+	val := ev.eval(e)
+	mat, ok := val.(matrix)
+	if !ok {
+		ev.errorf("expected range vector but got %s", documentedType(val.Type()))
+	}
+	return mat
+}
+
+// evalString attempts to evaluate e to a string value and errors otherwise.
+func (ev *evaluator) evalString(e Expr) *model.String {
+	val := ev.eval(e)
+	sv, ok := val.(*model.String)
+	if !ok {
+		ev.errorf("expected string but got %s", documentedType(val.Type()))
+	}
+	return sv
+}
+
+// evalOneOf evaluates e and errors unless the result is of one of the given types.
+func (ev *evaluator) evalOneOf(e Expr, t1, t2 model.ValueType) model.Value {
+	val := ev.eval(e)
+	if val.Type() != t1 && val.Type() != t2 {
+		ev.errorf("expected %s or %s but got %s", documentedType(t1), documentedType(t2), documentedType(val.Type()))
+	}
+	return val
+}
+
+func (ev *evaluator) Eval(expr Expr) (v model.Value, err error) {
+	defer ev.recover(&err)
+	return ev.eval(expr), nil
+}
+
+// eval evaluates the given expression as the given AST expression node requires.
+func (ev *evaluator) eval(expr Expr) model.Value {
+	// This is the top-level evaluation method.
+	// Thus, we check for timeout/cancellation here.
+	if err := contextDone(ev.ctx, "expression evaluation"); err != nil {
+		ev.error(err)
+	}
+
+	switch e := expr.(type) {
+	case *AggregateExpr:
+		vector := ev.evalVector(e.Expr)
+		return ev.aggregation(e.Op, e.Grouping, e.Without, e.KeepCommonLabels, e.Param, vector)
+
+	case *BinaryExpr:
+		lhs := ev.evalOneOf(e.LHS, model.ValScalar, model.ValVector)
+		rhs := ev.evalOneOf(e.RHS, model.ValScalar, model.ValVector)
+
+		switch lt, rt := lhs.Type(), rhs.Type(); {
+		case lt == model.ValScalar && rt == model.ValScalar:
+			return &model.Scalar{
+				Value:     scalarBinop(e.Op, lhs.(*model.Scalar).Value, rhs.(*model.Scalar).Value),
+				Timestamp: ev.Timestamp,
+			}
+
+		case lt == model.ValVector && rt == model.ValVector:
+			switch e.Op {
+			case itemLAND:
+				return ev.vectorAnd(lhs.(vector), rhs.(vector), e.VectorMatching)
+			case itemLOR:
+				return ev.vectorOr(lhs.(vector), rhs.(vector), e.VectorMatching)
+			case itemLUnless:
+				return ev.vectorUnless(lhs.(vector), rhs.(vector), e.VectorMatching)
+			default:
+				return ev.vectorBinop(e.Op, lhs.(vector), rhs.(vector), e.VectorMatching, e.ReturnBool)
+			}
+		case lt == model.ValVector && rt == model.ValScalar:
+			return ev.vectorScalarBinop(e.Op, lhs.(vector), rhs.(*model.Scalar), false, e.ReturnBool)
+
+		case lt == model.ValScalar && rt == model.ValVector:
+			return ev.vectorScalarBinop(e.Op, rhs.(vector), lhs.(*model.Scalar), true, e.ReturnBool)
+		}
+
+	case *Call:
+		return e.Func.Call(ev, e.Args)
+
+	case *MatrixSelector:
+		return ev.matrixSelector(e)
+
+	case *NumberLiteral:
+		return &model.Scalar{Value: e.Val, Timestamp: ev.Timestamp}
+
+	case *ParenExpr:
+		return ev.eval(e.Expr)
+
+	case *StringLiteral:
+		return &model.String{Value: e.Val, Timestamp: ev.Timestamp}
+
+	case *UnaryExpr:
+		se := ev.evalOneOf(e.Expr, model.ValScalar, model.ValVector)
+		// Only + and - are possible operators.
+		if e.Op == itemSUB {
+			switch v := se.(type) {
+			case *model.Scalar:
+				v.Value = -v.Value
+			case vector:
+				for i, sv := range v {
+					v[i].Value = -sv.Value
+				}
+			}
+		}
+		return se
+
+	case *VectorSelector:
+		return ev.vectorSelector(e)
+	}
+	panic(fmt.Errorf("unhandled expression of type: %T", expr))
+}
+
+// vectorSelector evaluates a *VectorSelector expression.
+func (ev *evaluator) vectorSelector(node *VectorSelector) vector {
+	vec := vector{}
+	for _, it := range node.iterators {
+		refTime := ev.Timestamp.Add(-node.Offset)
+		samplePair := it.ValueAtOrBeforeTime(refTime)
+		if samplePair.Timestamp.Before(refTime.Add(-StalenessDelta)) {
+			continue // Sample outside of staleness policy window.
+		}
+		vec = append(vec, &sample{
+			Metric:    it.Metric(),
+			Value:     samplePair.Value,
+			Timestamp: ev.Timestamp,
+		})
+	}
+	return vec
+}
+
+// matrixSelector evaluates a *MatrixSelector expression.
+func (ev *evaluator) matrixSelector(node *MatrixSelector) matrix {
+	interval := metric.Interval{
+		OldestInclusive: ev.Timestamp.Add(-node.Range - node.Offset),
+		NewestInclusive: ev.Timestamp.Add(-node.Offset),
+	}
+
+	sampleStreams := make([]*sampleStream, 0, len(node.iterators))
+	for _, it := range node.iterators {
+		samplePairs := it.RangeValues(interval)
+		if len(samplePairs) == 0 {
+			continue
+		}
+
+		if node.Offset != 0 {
+			for _, sp := range samplePairs {
+				sp.Timestamp = sp.Timestamp.Add(node.Offset)
+			}
+		}
+
+		sampleStream := &sampleStream{
+			Metric: it.Metric(),
+			Values: samplePairs,
+		}
+		sampleStreams = append(sampleStreams, sampleStream)
+	}
+	return matrix(sampleStreams)
+}
+
+func (ev *evaluator) vectorAnd(lhs, rhs vector, matching *VectorMatching) vector {
+	if matching.Card != CardManyToMany {
+		panic("set operations must only use many-to-many matching")
+	}
+	sigf := signatureFunc(matching.On, matching.MatchingLabels...)
+
+	var result vector
+	// The set of signatures for the right-hand side vector.
+	rightSigs := map[uint64]struct{}{}
+	// Add all rhs samples to a map so we can easily find matches later.
+	for _, rs := range rhs {
+		rightSigs[sigf(rs.Metric)] = struct{}{}
+	}
+
+	for _, ls := range lhs {
+		// If there's a matching entry in the right-hand side vector, add the sample.
+		if _, ok := rightSigs[sigf(ls.Metric)]; ok {
+			result = append(result, ls)
+		}
+	}
+	return result
+}
+
+func (ev *evaluator) vectorOr(lhs, rhs vector, matching *VectorMatching) vector {
+	if matching.Card != CardManyToMany {
+		panic("set operations must only use many-to-many matching")
+	}
+	sigf := signatureFunc(matching.On, matching.MatchingLabels...)
+
+	var result vector
+	leftSigs := map[uint64]struct{}{}
+	// Add everything from the left-hand-side vector.
+	for _, ls := range lhs {
+		leftSigs[sigf(ls.Metric)] = struct{}{}
+		result = append(result, ls)
+	}
+	// Add all right-hand side elements which have not been added from the left-hand side.
+	for _, rs := range rhs {
+		if _, ok := leftSigs[sigf(rs.Metric)]; !ok {
+			result = append(result, rs)
+		}
+	}
+	return result
+}
+
+func (ev *evaluator) vectorUnless(lhs, rhs vector, matching *VectorMatching) vector {
+	if matching.Card != CardManyToMany {
+		panic("set operations must only use many-to-many matching")
+	}
+	sigf := signatureFunc(matching.On, matching.MatchingLabels...)
+
+	rightSigs := map[uint64]struct{}{}
+	for _, rs := range rhs {
+		rightSigs[sigf(rs.Metric)] = struct{}{}
+	}
+
+	var result vector
+	for _, ls := range lhs {
+		if _, ok := rightSigs[sigf(ls.Metric)]; !ok {
+			result = append(result, ls)
+		}
+	}
+	return result
+}
+
+// vectorBinop evaluates a binary operation between two vectors, excluding set operators.
+func (ev *evaluator) vectorBinop(op itemType, lhs, rhs vector, matching *VectorMatching, returnBool bool) vector {
+	if matching.Card == CardManyToMany {
+		panic("many-to-many only allowed for set operators")
+	}
+	var (
+		result = vector{}
+		sigf   = signatureFunc(matching.On, matching.MatchingLabels...)
+	)
+
+	// The control flow below handles one-to-one or many-to-one matching.
+	// For one-to-many, swap sidedness and account for the swap when calculating
+	// values.
+	if matching.Card == CardOneToMany {
+		lhs, rhs = rhs, lhs
+	}
+
+	// All samples from the rhs hashed by the matching label/values.
+	rightSigs := map[uint64]*sample{}
+
+	// Add all rhs samples to a map so we can easily find matches later.
+	for _, rs := range rhs {
+		sig := sigf(rs.Metric)
+		// The rhs is guaranteed to be the 'one' side. Having multiple samples
+		// with the same signature means that the matching is many-to-many.
+		if _, found := rightSigs[sig]; found {
+			// Many-to-many matching not allowed.
+			ev.errorf("many-to-many matching not allowed: matching labels must be unique on one side")
+		}
+		rightSigs[sig] = rs
+	}
+
+	// Tracks the match-signature. For one-to-one operations the value is nil. For many-to-one
+	// the value is a set of signatures to detect duplicated result elements.
+	matchedSigs := map[uint64]map[uint64]struct{}{}
+
+	// For all lhs samples find a respective rhs sample and perform
+	// the binary operation.
+	for _, ls := range lhs {
+		sig := sigf(ls.Metric)
+
+		rs, found := rightSigs[sig] // Look for a match in the rhs vector.
+		if !found {
+			continue
+		}
+
+		// Account for potentially swapped sidedness.
+		vl, vr := ls.Value, rs.Value
+		if matching.Card == CardOneToMany {
+			vl, vr = vr, vl
+		}
+		value, keep := vectorElemBinop(op, vl, vr)
+		if returnBool {
+			if keep {
+				value = 1.0
+			} else {
+				value = 0.0
+			}
+		} else if !keep {
+			continue
+		}
+		metric := resultMetric(ls.Metric, rs.Metric, op, matching)
+
+		insertedSigs, exists := matchedSigs[sig]
+		if matching.Card == CardOneToOne {
+			if exists {
+				ev.errorf("multiple matches for labels: many-to-one matching must be explicit (group_left/group_right)")
+			}
+			matchedSigs[sig] = nil // Set existence to true.
+		} else {
+			// In many-to-one matching the grouping labels have to ensure a unique metric
+			// for the result vector. Check whether those labels have already been added for
+			// the same matching labels.
+			insertSig := uint64(metric.Metric.Fingerprint())
+			if !exists {
+				insertedSigs = map[uint64]struct{}{}
+				matchedSigs[sig] = insertedSigs
+			} else if _, duplicate := insertedSigs[insertSig]; duplicate {
+				ev.errorf("multiple matches for labels: grouping labels must ensure unique matches")
+			}
+			insertedSigs[insertSig] = struct{}{}
+		}
+
+		result = append(result, &sample{
+			Metric:    metric,
+			Value:     value,
+			Timestamp: ev.Timestamp,
+		})
+	}
+	return result
+}
+
+// signatureFunc returns a function that calculates the signature for a metric
+// ignoring the provided labels. If on, then the given labels are only used instead.
+func signatureFunc(on bool, labels ...model.LabelName) func(m metric.Metric) uint64 {
+	if !on {
+		return func(m metric.Metric) uint64 {
+			tmp := m.Metric.Clone()
+			for _, l := range labels {
+				delete(tmp, l)
+			}
+			delete(tmp, model.MetricNameLabel)
+			return uint64(tmp.Fingerprint())
+		}
+	}
+	return func(m metric.Metric) uint64 {
+		return model.SignatureForLabels(m.Metric, labels...)
+	}
+}
+
+// resultMetric returns the metric for the given sample(s) based on the vector
+// binary operation and the matching options.
+func resultMetric(lhs, rhs metric.Metric, op itemType, matching *VectorMatching) metric.Metric {
+	if shouldDropMetricName(op) {
+		lhs.Del(model.MetricNameLabel)
+	}
+	if !matching.On {
+		if matching.Card == CardOneToOne {
+			for _, l := range matching.MatchingLabels {
+				lhs.Del(l)
+			}
+		}
+		for _, ln := range matching.Include {
+			// Included labels from the `group_x` modifier are taken from the "one"-side.
+			value := rhs.Metric[ln]
+			if value != "" {
+				lhs.Set(ln, rhs.Metric[ln])
+			} else {
+				lhs.Del(ln)
+			}
+		}
+		return lhs
+	}
+	// As we definitely write, creating a new metric is the easiest solution.
+	m := model.Metric{}
+	if matching.Card == CardOneToOne {
+		for _, ln := range matching.MatchingLabels {
+			if v, ok := lhs.Metric[ln]; ok {
+				m[ln] = v
+			}
+		}
+	} else {
+		for k, v := range lhs.Metric {
+			m[k] = v
+		}
+	}
+	for _, ln := range matching.Include {
+		// Included labels from the `group_x` modifier are taken from the "one"-side .
+		if v, ok := rhs.Metric[ln]; ok {
+			m[ln] = v
+		} else {
+			delete(m, ln)
+		}
+	}
+	return metric.Metric{Metric: m, Copied: false}
+}
+
+// vectorScalarBinop evaluates a binary operation between a vector and a scalar.
+func (ev *evaluator) vectorScalarBinop(op itemType, lhs vector, rhs *model.Scalar, swap, returnBool bool) vector {
+	vec := make(vector, 0, len(lhs))
+
+	for _, lhsSample := range lhs {
+		lv, rv := lhsSample.Value, rhs.Value
+		// lhs always contains the vector. If the original position was different
+		// swap for calculating the value.
+		if swap {
+			lv, rv = rv, lv
+		}
+		value, keep := vectorElemBinop(op, lv, rv)
+		if returnBool {
+			if keep {
+				value = 1.0
+			} else {
+				value = 0.0
+			}
+			keep = true
+		}
+		if keep {
+			lhsSample.Value = value
+			if shouldDropMetricName(op) {
+				lhsSample.Metric.Del(model.MetricNameLabel)
+			}
+			vec = append(vec, lhsSample)
+		}
+	}
+	return vec
+}
+
+// scalarBinop evaluates a binary operation between two scalars.
+func scalarBinop(op itemType, lhs, rhs model.SampleValue) model.SampleValue {
+	switch op {
+	case itemADD:
+		return lhs + rhs
+	case itemSUB:
+		return lhs - rhs
+	case itemMUL:
+		return lhs * rhs
+	case itemDIV:
+		return lhs / rhs
+	case itemPOW:
+		return model.SampleValue(math.Pow(float64(lhs), float64(rhs)))
+	case itemMOD:
+		return model.SampleValue(math.Mod(float64(lhs), float64(rhs)))
+	case itemEQL:
+		return btos(lhs == rhs)
+	case itemNEQ:
+		return btos(lhs != rhs)
+	case itemGTR:
+		return btos(lhs > rhs)
+	case itemLSS:
+		return btos(lhs < rhs)
+	case itemGTE:
+		return btos(lhs >= rhs)
+	case itemLTE:
+		return btos(lhs <= rhs)
+	}
+	panic(fmt.Errorf("operator %q not allowed for scalar operations", op))
+}
+
+// vectorElemBinop evaluates a binary operation between two vector elements.
+func vectorElemBinop(op itemType, lhs, rhs model.SampleValue) (model.SampleValue, bool) {
+	switch op {
+	case itemADD:
+		return lhs + rhs, true
+	case itemSUB:
+		return lhs - rhs, true
+	case itemMUL:
+		return lhs * rhs, true
+	case itemDIV:
+		return lhs / rhs, true
+	case itemPOW:
+		return model.SampleValue(math.Pow(float64(lhs), float64(rhs))), true
+	case itemMOD:
+		return model.SampleValue(math.Mod(float64(lhs), float64(rhs))), true
+	case itemEQL:
+		return lhs, lhs == rhs
+	case itemNEQ:
+		return lhs, lhs != rhs
+	case itemGTR:
+		return lhs, lhs > rhs
+	case itemLSS:
+		return lhs, lhs < rhs
+	case itemGTE:
+		return lhs, lhs >= rhs
+	case itemLTE:
+		return lhs, lhs <= rhs
+	}
+	panic(fmt.Errorf("operator %q not allowed for operations between vectors", op))
+}
+
+// labelIntersection returns the metric of common label/value pairs of two input metrics.
+func labelIntersection(metric1, metric2 metric.Metric) metric.Metric {
+	for label, value := range metric1.Metric {
+		if metric2.Metric[label] != value {
+			metric1.Del(label)
+		}
+	}
+	return metric1
+}
+
+type groupedAggregation struct {
+	labels           metric.Metric
+	value            model.SampleValue
+	valuesSquaredSum model.SampleValue
+	groupCount       int
+	heap             vectorByValueHeap
+	reverseHeap      vectorByReverseValueHeap
+}
+
+// aggregation evaluates an aggregation operation on a vector.
+func (ev *evaluator) aggregation(op itemType, grouping model.LabelNames, without bool, keepCommon bool, param Expr, vec vector) vector {
+
+	result := map[uint64]*groupedAggregation{}
+	var k int64
+	if op == itemTopK || op == itemBottomK {
+		k = ev.evalInt(param)
+		if k < 1 {
+			return vector{}
+		}
+	}
+	var q float64
+	if op == itemQuantile {
+		q = ev.evalFloat(param)
+	}
+	var valueLabel model.LabelName
+	if op == itemCountValues {
+		valueLabel = model.LabelName(ev.evalString(param).Value)
+		if !without {
+			grouping = append(grouping, valueLabel)
+		}
+	}
+
+	for _, s := range vec {
+		withoutMetric := s.Metric
+		if without {
+			for _, l := range grouping {
+				withoutMetric.Del(l)
+			}
+			withoutMetric.Del(model.MetricNameLabel)
+			if op == itemCountValues {
+				withoutMetric.Set(valueLabel, model.LabelValue(s.Value.String()))
+			}
+		} else {
+			if op == itemCountValues {
+				s.Metric.Set(valueLabel, model.LabelValue(s.Value.String()))
+			}
+		}
+
+		var groupingKey uint64
+		if without {
+			groupingKey = uint64(withoutMetric.Metric.Fingerprint())
+		} else {
+			groupingKey = model.SignatureForLabels(s.Metric.Metric, grouping...)
+		}
+
+		groupedResult, ok := result[groupingKey]
+		// Add a new group if it doesn't exist.
+		if !ok {
+			var m metric.Metric
+			if keepCommon {
+				m = s.Metric
+				m.Del(model.MetricNameLabel)
+			} else if without {
+				m = withoutMetric
+			} else {
+				m = metric.Metric{
+					Metric: model.Metric{},
+					Copied: true,
+				}
+				for _, l := range grouping {
+					if v, ok := s.Metric.Metric[l]; ok {
+						m.Set(l, v)
+					}
+				}
+			}
+			result[groupingKey] = &groupedAggregation{
+				labels:           m,
+				value:            s.Value,
+				valuesSquaredSum: s.Value * s.Value,
+				groupCount:       1,
+			}
+			if op == itemTopK || op == itemQuantile {
+				result[groupingKey].heap = make(vectorByValueHeap, 0, k)
+				heap.Push(&result[groupingKey].heap, &sample{Value: s.Value, Metric: s.Metric})
+			} else if op == itemBottomK {
+				result[groupingKey].reverseHeap = make(vectorByReverseValueHeap, 0, k)
+				heap.Push(&result[groupingKey].reverseHeap, &sample{Value: s.Value, Metric: s.Metric})
+			}
+			continue
+		}
+		// Add the sample to the existing group.
+		if keepCommon {
+			groupedResult.labels = labelIntersection(groupedResult.labels, s.Metric)
+		}
+
+		switch op {
+		case itemSum:
+			groupedResult.value += s.Value
+		case itemAvg:
+			groupedResult.value += s.Value
+			groupedResult.groupCount++
+		case itemMax:
+			if groupedResult.value < s.Value || math.IsNaN(float64(groupedResult.value)) {
+				groupedResult.value = s.Value
+			}
+		case itemMin:
+			if groupedResult.value > s.Value || math.IsNaN(float64(groupedResult.value)) {
+				groupedResult.value = s.Value
+			}
+		case itemCount, itemCountValues:
+			groupedResult.groupCount++
+		case itemStdvar, itemStddev:
+			groupedResult.value += s.Value
+			groupedResult.valuesSquaredSum += s.Value * s.Value
+			groupedResult.groupCount++
+		case itemTopK:
+			if int64(len(groupedResult.heap)) < k || groupedResult.heap[0].Value < s.Value || math.IsNaN(float64(groupedResult.heap[0].Value)) {
+				if int64(len(groupedResult.heap)) == k {
+					heap.Pop(&groupedResult.heap)
+				}
+				heap.Push(&groupedResult.heap, &sample{Value: s.Value, Metric: s.Metric})
+			}
+		case itemBottomK:
+			if int64(len(groupedResult.reverseHeap)) < k || groupedResult.reverseHeap[0].Value > s.Value || math.IsNaN(float64(groupedResult.reverseHeap[0].Value)) {
+				if int64(len(groupedResult.reverseHeap)) == k {
+					heap.Pop(&groupedResult.reverseHeap)
+				}
+				heap.Push(&groupedResult.reverseHeap, &sample{Value: s.Value, Metric: s.Metric})
+			}
+		case itemQuantile:
+			groupedResult.heap = append(groupedResult.heap, s)
+		default:
+			panic(fmt.Errorf("expected aggregation operator but got %q", op))
+		}
+	}
+
+	// Construct the result vector from the aggregated groups.
+	resultVector := make(vector, 0, len(result))
+
+	for _, aggr := range result {
+		switch op {
+		case itemAvg:
+			aggr.value = aggr.value / model.SampleValue(aggr.groupCount)
+		case itemCount, itemCountValues:
+			aggr.value = model.SampleValue(aggr.groupCount)
+		case itemStdvar:
+			avg := float64(aggr.value) / float64(aggr.groupCount)
+			aggr.value = model.SampleValue(float64(aggr.valuesSquaredSum)/float64(aggr.groupCount) - avg*avg)
+		case itemStddev:
+			avg := float64(aggr.value) / float64(aggr.groupCount)
+			aggr.value = model.SampleValue(math.Sqrt(float64(aggr.valuesSquaredSum)/float64(aggr.groupCount) - avg*avg))
+		case itemTopK:
+			// The heap keeps the lowest value on top, so reverse it.
+			sort.Sort(sort.Reverse(aggr.heap))
+			for _, v := range aggr.heap {
+				resultVector = append(resultVector, &sample{
+					Metric:    v.Metric,
+					Value:     v.Value,
+					Timestamp: ev.Timestamp,
+				})
+			}
+			continue // Bypass default append.
+		case itemBottomK:
+			// The heap keeps the lowest value on top, so reverse it.
+			sort.Sort(sort.Reverse(aggr.reverseHeap))
+			for _, v := range aggr.reverseHeap {
+				resultVector = append(resultVector, &sample{
+					Metric:    v.Metric,
+					Value:     v.Value,
+					Timestamp: ev.Timestamp,
+				})
+			}
+			continue // Bypass default append.
+		case itemQuantile:
+			aggr.value = model.SampleValue(quantile(q, aggr.heap))
+		default:
+			// For other aggregations, we already have the right value.
+		}
+		sample := &sample{
+			Metric:    aggr.labels,
+			Value:     aggr.value,
+			Timestamp: ev.Timestamp,
+		}
+		resultVector = append(resultVector, sample)
+	}
+	return resultVector
+}
+
+// btos returns 1 if b is true, 0 otherwise.
+func btos(b bool) model.SampleValue {
+	if b {
+		return 1
+	}
+	return 0
+}
+
+// shouldDropMetricName returns whether the metric name should be dropped in the
+// result of the op operation.
+func shouldDropMetricName(op itemType) bool {
+	switch op {
+	case itemADD, itemSUB, itemDIV, itemMUL, itemMOD:
+		return true
+	default:
+		return false
+	}
+}
+
+// StalenessDelta determines the time since the last sample after which a time
+// series is considered stale.
+var StalenessDelta = 5 * time.Minute
+
+// A queryGate controls the maximum number of concurrently running and waiting queries.
+type queryGate struct {
+	ch chan struct{}
+}
+
+// newQueryGate returns a query gate that limits the number of queries
+// being concurrently executed.
+func newQueryGate(length int) *queryGate {
+	return &queryGate{
+		ch: make(chan struct{}, length),
+	}
+}
+
+// Start blocks until the gate has a free spot or the context is done.
+func (g *queryGate) Start(ctx context.Context) error {
+	select {
+	case <-ctx.Done():
+		return contextDone(ctx, "query queue")
+	case g.ch <- struct{}{}:
+		return nil
+	}
+}
+
+// Done releases a single spot in the gate.
+func (g *queryGate) Done() {
+	select {
+	case <-g.ch:
+	default:
+		panic("engine.queryGate.Done: more operations done than started")
+	}
+}
+
+// documentedType returns the internal type to the equivalent
+// user facing terminology as defined in the documentation.
+func documentedType(t model.ValueType) string {
+	switch t.String() {
+	case "vector":
+		return "instant vector"
+	case "matrix":
+		return "range vector"
+	default:
+		return t.String()
+	}
+}
diff --git a/vendor/github.com/prometheus/prometheus/promql/functions.go b/vendor/github.com/prometheus/prometheus/promql/functions.go
new file mode 100644
index 000000000..d4dd01415
--- /dev/null
+++ b/vendor/github.com/prometheus/prometheus/promql/functions.go
@@ -0,0 +1,1338 @@
+// Copyright 2015 The Prometheus Authors
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package promql
+
+import (
+	"math"
+	"regexp"
+	"sort"
+	"strconv"
+	"strings"
+	"time"
+
+	"github.com/prometheus/common/model"
+
+	"github.com/prometheus/prometheus/storage/metric"
+)
+
+// Function represents a function of the expression language and is
+// used by function nodes.
+type Function struct {
+	Name       string
+	ArgTypes   []model.ValueType
+	Variadic   int
+	ReturnType model.ValueType
+	Call       func(ev *evaluator, args Expressions) model.Value
+}
+
+// === time() model.SampleValue ===
+func funcTime(ev *evaluator, args Expressions) model.Value {
+	return &model.Scalar{
+		Value:     model.SampleValue(ev.Timestamp.Unix()),
+		Timestamp: ev.Timestamp,
+	}
+}
+
+// extrapolatedRate is a utility function for rate/increase/delta.
+// It calculates the rate (allowing for counter resets if isCounter is true),
+// extrapolates if the first/last sample is close to the boundary, and returns
+// the result as either per-second (if isRate is true) or overall.
+func extrapolatedRate(ev *evaluator, arg Expr, isCounter bool, isRate bool) model.Value {
+	ms := arg.(*MatrixSelector)
+
+	rangeStart := ev.Timestamp.Add(-ms.Range - ms.Offset)
+	rangeEnd := ev.Timestamp.Add(-ms.Offset)
+
+	resultVector := vector{}
+
+	matrixValue := ev.evalMatrix(ms)
+	for _, samples := range matrixValue {
+		// No sense in trying to compute a rate without at least two points. Drop
+		// this vector element.
+		if len(samples.Values) < 2 {
+			continue
+		}
+		var (
+			counterCorrection model.SampleValue
+			lastValue         model.SampleValue
+		)
+		for _, sample := range samples.Values {
+			currentValue := sample.Value
+			if isCounter && currentValue < lastValue {
+				counterCorrection += lastValue
+			}
+			lastValue = currentValue
+		}
+		resultValue := lastValue - samples.Values[0].Value + counterCorrection
+
+		// Duration between first/last samples and boundary of range.
+		durationToStart := samples.Values[0].Timestamp.Sub(rangeStart).Seconds()
+		durationToEnd := rangeEnd.Sub(samples.Values[len(samples.Values)-1].Timestamp).Seconds()
+
+		sampledInterval := samples.Values[len(samples.Values)-1].Timestamp.Sub(samples.Values[0].Timestamp).Seconds()
+		averageDurationBetweenSamples := sampledInterval / float64(len(samples.Values)-1)
+
+		if isCounter && resultValue > 0 && samples.Values[0].Value >= 0 {
+			// Counters cannot be negative. If we have any slope at
+			// all (i.e. resultValue went up), we can extrapolate
+			// the zero point of the counter. If the duration to the
+			// zero point is shorter than the durationToStart, we
+			// take the zero point as the start of the series,
+			// thereby avoiding extrapolation to negative counter
+			// values.
+			durationToZero := sampledInterval * float64(samples.Values[0].Value/resultValue)
+			if durationToZero < durationToStart {
+				durationToStart = durationToZero
+			}
+		}
+
+		// If the first/last samples are close to the boundaries of the range,
+		// extrapolate the result. This is as we expect that another sample
+		// will exist given the spacing between samples we've seen thus far,
+		// with an allowance for noise.
+		extrapolationThreshold := averageDurationBetweenSamples * 1.1
+		extrapolateToInterval := sampledInterval
+
+		if durationToStart < extrapolationThreshold {
+			extrapolateToInterval += durationToStart
+		} else {
+			extrapolateToInterval += averageDurationBetweenSamples / 2
+		}
+		if durationToEnd < extrapolationThreshold {
+			extrapolateToInterval += durationToEnd
+		} else {
+			extrapolateToInterval += averageDurationBetweenSamples / 2
+		}
+		resultValue = resultValue * model.SampleValue(extrapolateToInterval/sampledInterval)
+		if isRate {
+			resultValue = resultValue / model.SampleValue(ms.Range.Seconds())
+		}
+
+		resultSample := &sample{
+			Metric:    samples.Metric,
+			Value:     resultValue,
+			Timestamp: ev.Timestamp,
+		}
+		resultSample.Metric.Del(model.MetricNameLabel)
+		resultVector = append(resultVector, resultSample)
+	}
+	return resultVector
+}
+
+// === delta(matrix model.ValMatrix) Vector ===
+func funcDelta(ev *evaluator, args Expressions) model.Value {
+	return extrapolatedRate(ev, args[0], false, false)
+}
+
+// === rate(node model.ValMatrix) Vector ===
+func funcRate(ev *evaluator, args Expressions) model.Value {
+	return extrapolatedRate(ev, args[0], true, true)
+}
+
+// === increase(node model.ValMatrix) Vector ===
+func funcIncrease(ev *evaluator, args Expressions) model.Value {
+	return extrapolatedRate(ev, args[0], true, false)
+}
+
+// === irate(node model.ValMatrix) Vector ===
+func funcIrate(ev *evaluator, args Expressions) model.Value {
+	return instantValue(ev, args[0], true)
+}
+
+// === idelta(node model.ValMatrix) Vector ===
+func funcIdelta(ev *evaluator, args Expressions) model.Value {
+	return instantValue(ev, args[0], false)
+}
+
+func instantValue(ev *evaluator, arg Expr, isRate bool) model.Value {
+	resultVector := vector{}
+	for _, samples := range ev.evalMatrix(arg) {
+		// No sense in trying to compute a rate without at least two points. Drop
+		// this vector element.
+		if len(samples.Values) < 2 {
+			continue
+		}
+
+		lastSample := samples.Values[len(samples.Values)-1]
+		previousSample := samples.Values[len(samples.Values)-2]
+
+		var resultValue model.SampleValue
+		if isRate && lastSample.Value < previousSample.Value {
+			// Counter reset.
+			resultValue = lastSample.Value
+		} else {
+			resultValue = lastSample.Value - previousSample.Value
+		}
+
+		sampledInterval := lastSample.Timestamp.Sub(previousSample.Timestamp)
+		if sampledInterval == 0 {
+			// Avoid dividing by 0.
+			continue
+		}
+
+		if isRate {
+			// Convert to per-second.
+			resultValue /= model.SampleValue(sampledInterval.Seconds())
+		}
+
+		resultSample := &sample{
+			Metric:    samples.Metric,
+			Value:     resultValue,
+			Timestamp: ev.Timestamp,
+		}
+		resultSample.Metric.Del(model.MetricNameLabel)
+		resultVector = append(resultVector, resultSample)
+	}
+	return resultVector
+}
+
+// Calculate the trend value at the given index i in raw data d.
+// This is somewhat analogous to the slope of the trend at the given index.
+// The argument "s" is the set of computed smoothed values.
+// The argument "b" is the set of computed trend factors.
+// The argument "d" is the set of raw input values.
+func calcTrendValue(i int, sf, tf float64, s, b, d []float64) float64 {
+	if i == 0 {
+		return b[0]
+	}
+
+	x := tf * (s[i] - s[i-1])
+	y := (1 - tf) * b[i-1]
+
+	// Cache the computed value.
+	b[i] = x + y
+
+	return b[i]
+}
+
+// Holt-Winters is similar to a weighted moving average, where historical data has exponentially less influence on the current data.
+// Holt-Winter also accounts for trends in data. The smoothing factor (0 < sf < 1) affects how historical data will affect the current
+// data. A lower smoothing factor increases the influence of historical data. The trend factor (0 < tf < 1) affects
+// how trends in historical data will affect the current data. A higher trend factor increases the influence.
+// of trends. Algorithm taken from https://en.wikipedia.org/wiki/Exponential_smoothing titled: "Double exponential smoothing".
+func funcHoltWinters(ev *evaluator, args Expressions) model.Value {
+	mat := ev.evalMatrix(args[0])
+
+	// The smoothing factor argument.
+	sf := ev.evalFloat(args[1])
+
+	// The trend factor argument.
+	tf := ev.evalFloat(args[2])
+
+	// Sanity check the input.
+	if sf <= 0 || sf >= 1 {
+		ev.errorf("invalid smoothing factor. Expected: 0 < sf < 1 got: %f", sf)
+	}
+	if tf <= 0 || tf >= 1 {
+		ev.errorf("invalid trend factor. Expected: 0 < tf < 1 got: %f", sf)
+	}
+
+	// Make an output vector large enough to hold the entire result.
+	resultVector := make(vector, 0, len(mat))
+
+	// Create scratch values.
+	var s, b, d []float64
+
+	var l int
+	for _, samples := range mat {
+		l = len(samples.Values)
+
+		// Can't do the smoothing operation with less than two points.
+		if l < 2 {
+			continue
+		}
+
+		// Resize scratch values.
+		if l != len(s) {
+			s = make([]float64, l)
+			b = make([]float64, l)
+			d = make([]float64, l)
+		}
+
+		// Fill in the d values with the raw values from the input.
+		for i, v := range samples.Values {
+			d[i] = float64(v.Value)
+		}
+
+		// Set initial values.
+		s[0] = d[0]
+		b[0] = d[1] - d[0]
+
+		// Run the smoothing operation.
+		var x, y float64
+		for i := 1; i < len(d); i++ {
+
+			// Scale the raw value against the smoothing factor.
+			x = sf * d[i]
+
+			// Scale the last smoothed value with the trend at this point.
+			y = (1 - sf) * (s[i-1] + calcTrendValue(i-1, sf, tf, s, b, d))
+
+			s[i] = x + y
+		}
+
+		samples.Metric.Del(model.MetricNameLabel)
+		resultVector = append(resultVector, &sample{
+			Metric:    samples.Metric,
+			Value:     model.SampleValue(s[len(s)-1]), // The last value in the vector is the smoothed result.
+			Timestamp: ev.Timestamp,
+		})
+	}
+
+	return resultVector
+}
+
+// === sort(node model.ValVector) Vector ===
+func funcSort(ev *evaluator, args Expressions) model.Value {
+	// NaN should sort to the bottom, so take descending sort with NaN first and
+	// reverse it.
+	byValueSorter := vectorByReverseValueHeap(ev.evalVector(args[0]))
+	sort.Sort(sort.Reverse(byValueSorter))
+	return vector(byValueSorter)
+}
+
+// === sortDesc(node model.ValVector) Vector ===
+func funcSortDesc(ev *evaluator, args Expressions) model.Value {
+	// NaN should sort to the bottom, so take ascending sort with NaN first and
+	// reverse it.
+	byValueSorter := vectorByValueHeap(ev.evalVector(args[0]))
+	sort.Sort(sort.Reverse(byValueSorter))
+	return vector(byValueSorter)
+}
+
+// === clamp_max(vector model.ValVector, max Scalar) Vector ===
+func funcClampMax(ev *evaluator, args Expressions) model.Value {
+	vec := ev.evalVector(args[0])
+	max := ev.evalFloat(args[1])
+	for _, el := range vec {
+		el.Metric.Del(model.MetricNameLabel)
+		el.Value = model.SampleValue(math.Min(max, float64(el.Value)))
+	}
+	return vec
+}
+
+// === clamp_min(vector model.ValVector, min Scalar) Vector ===
+func funcClampMin(ev *evaluator, args Expressions) model.Value {
+	vec := ev.evalVector(args[0])
+	min := ev.evalFloat(args[1])
+	for _, el := range vec {
+		el.Metric.Del(model.MetricNameLabel)
+		el.Value = model.SampleValue(math.Max(min, float64(el.Value)))
+	}
+	return vec
+}
+
+// === drop_common_labels(node model.ValVector) Vector ===
+func funcDropCommonLabels(ev *evaluator, args Expressions) model.Value {
+	vec := ev.evalVector(args[0])
+	if len(vec) < 1 {
+		return vector{}
+	}
+	common := model.LabelSet{}
+	for k, v := range vec[0].Metric.Metric {
+		// TODO(julius): Should we also drop common metric names?
+		if k == model.MetricNameLabel {
+			continue
+		}
+		common[k] = v
+	}
+
+	for _, el := range vec[1:] {
+		for k, v := range common {
+			if el.Metric.Metric[k] != v {
+				// Deletion of map entries while iterating over them is safe.
+				// From http://golang.org/ref/spec#For_statements:
+				// "If map entries that have not yet been reached are deleted during
+				// iteration, the corresponding iteration values will not be produced."
+				delete(common, k)
+			}
+		}
+	}
+
+	for _, el := range vec {
+		for k := range el.Metric.Metric {
+			if _, ok := common[k]; ok {
+				el.Metric.Del(k)
+			}
+		}
+	}
+	return vec
+}
+
+// === round(vector model.ValVector, toNearest=1 Scalar) Vector ===
+func funcRound(ev *evaluator, args Expressions) model.Value {
+	// round returns a number rounded to toNearest.
+	// Ties are solved by rounding up.
+	toNearest := float64(1)
+	if len(args) >= 2 {
+		toNearest = ev.evalFloat(args[1])
+	}
+	// Invert as it seems to cause fewer floating point accuracy issues.
+	toNearestInverse := 1.0 / toNearest
+
+	vec := ev.evalVector(args[0])
+	for _, el := range vec {
+		el.Metric.Del(model.MetricNameLabel)
+		el.Value = model.SampleValue(math.Floor(float64(el.Value)*toNearestInverse+0.5) / toNearestInverse)
+	}
+	return vec
+}
+
+// === scalar(node model.ValVector) Scalar ===
+func funcScalar(ev *evaluator, args Expressions) model.Value {
+	v := ev.evalVector(args[0])
+	if len(v) != 1 {
+		return &model.Scalar{
+			Value:     model.SampleValue(math.NaN()),
+			Timestamp: ev.Timestamp,
+		}
+	}
+	return &model.Scalar{
+		Value:     model.SampleValue(v[0].Value),
+		Timestamp: ev.Timestamp,
+	}
+}
+
+// === count_scalar(vector model.ValVector) model.SampleValue ===
+func funcCountScalar(ev *evaluator, args Expressions) model.Value {
+	return &model.Scalar{
+		Value:     model.SampleValue(len(ev.evalVector(args[0]))),
+		Timestamp: ev.Timestamp,
+	}
+}
+
+func aggrOverTime(ev *evaluator, args Expressions, aggrFn func([]model.SamplePair) model.SampleValue) model.Value {
+	mat := ev.evalMatrix(args[0])
+	resultVector := vector{}
+
+	for _, el := range mat {
+		if len(el.Values) == 0 {
+			continue
+		}
+
+		el.Metric.Del(model.MetricNameLabel)
+		resultVector = append(resultVector, &sample{
+			Metric:    el.Metric,
+			Value:     aggrFn(el.Values),
+			Timestamp: ev.Timestamp,
+		})
+	}
+	return resultVector
+}
+
+// === avg_over_time(matrix model.ValMatrix) Vector ===
+func funcAvgOverTime(ev *evaluator, args Expressions) model.Value {
+	return aggrOverTime(ev, args, func(values []model.SamplePair) model.SampleValue {
+		var sum model.SampleValue
+		for _, v := range values {
+			sum += v.Value
+		}
+		return sum / model.SampleValue(len(values))
+	})
+}
+
+// === count_over_time(matrix model.ValMatrix) Vector ===
+func funcCountOverTime(ev *evaluator, args Expressions) model.Value {
+	return aggrOverTime(ev, args, func(values []model.SamplePair) model.SampleValue {
+		return model.SampleValue(len(values))
+	})
+}
+
+// === floor(vector model.ValVector) Vector ===
+func funcFloor(ev *evaluator, args Expressions) model.Value {
+	vector := ev.evalVector(args[0])
+	for _, el := range vector {
+		el.Metric.Del(model.MetricNameLabel)
+		el.Value = model.SampleValue(math.Floor(float64(el.Value)))
+	}
+	return vector
+}
+
+// === max_over_time(matrix model.ValMatrix) Vector ===
+func funcMaxOverTime(ev *evaluator, args Expressions) model.Value {
+	return aggrOverTime(ev, args, func(values []model.SamplePair) model.SampleValue {
+		max := math.Inf(-1)
+		for _, v := range values {
+			max = math.Max(max, float64(v.Value))
+		}
+		return model.SampleValue(max)
+	})
+}
+
+// === min_over_time(matrix model.ValMatrix) Vector ===
+func funcMinOverTime(ev *evaluator, args Expressions) model.Value {
+	return aggrOverTime(ev, args, func(values []model.SamplePair) model.SampleValue {
+		min := math.Inf(1)
+		for _, v := range values {
+			min = math.Min(min, float64(v.Value))
+		}
+		return model.SampleValue(min)
+	})
+}
+
+// === sum_over_time(matrix model.ValMatrix) Vector ===
+func funcSumOverTime(ev *evaluator, args Expressions) model.Value {
+	return aggrOverTime(ev, args, func(values []model.SamplePair) model.SampleValue {
+		var sum model.SampleValue
+		for _, v := range values {
+			sum += v.Value
+		}
+		return sum
+	})
+}
+
+// === quantile_over_time(matrix model.ValMatrix) Vector ===
+func funcQuantileOverTime(ev *evaluator, args Expressions) model.Value {
+	q := ev.evalFloat(args[0])
+	mat := ev.evalMatrix(args[1])
+	resultVector := vector{}
+
+	for _, el := range mat {
+		if len(el.Values) == 0 {
+			continue
+		}
+
+		el.Metric.Del(model.MetricNameLabel)
+		values := make(vectorByValueHeap, 0, len(el.Values))
+		for _, v := range el.Values {
+			values = append(values, &sample{Value: v.Value})
+		}
+		resultVector = append(resultVector, &sample{
+			Metric:    el.Metric,
+			Value:     model.SampleValue(quantile(q, values)),
+			Timestamp: ev.Timestamp,
+		})
+	}
+	return resultVector
+}
+
+// === stddev_over_time(matrix model.ValMatrix) Vector ===
+func funcStddevOverTime(ev *evaluator, args Expressions) model.Value {
+	return aggrOverTime(ev, args, func(values []model.SamplePair) model.SampleValue {
+		var sum, squaredSum, count model.SampleValue
+		for _, v := range values {
+			sum += v.Value
+			squaredSum += v.Value * v.Value
+			count++
+		}
+		avg := sum / count
+		return model.SampleValue(math.Sqrt(float64(squaredSum/count - avg*avg)))
+	})
+}
+
+// === stdvar_over_time(matrix model.ValMatrix) Vector ===
+func funcStdvarOverTime(ev *evaluator, args Expressions) model.Value {
+	return aggrOverTime(ev, args, func(values []model.SamplePair) model.SampleValue {
+		var sum, squaredSum, count model.SampleValue
+		for _, v := range values {
+			sum += v.Value
+			squaredSum += v.Value * v.Value
+			count++
+		}
+		avg := sum / count
+		return squaredSum/count - avg*avg
+	})
+}
+
+// === abs(vector model.ValVector) Vector ===
+func funcAbs(ev *evaluator, args Expressions) model.Value {
+	vector := ev.evalVector(args[0])
+	for _, el := range vector {
+		el.Metric.Del(model.MetricNameLabel)
+		el.Value = model.SampleValue(math.Abs(float64(el.Value)))
+	}
+	return vector
+}
+
+// === absent(vector model.ValVector) Vector ===
+func funcAbsent(ev *evaluator, args Expressions) model.Value {
+	if len(ev.evalVector(args[0])) > 0 {
+		return vector{}
+	}
+	m := model.Metric{}
+	if vs, ok := args[0].(*VectorSelector); ok {
+		for _, matcher := range vs.LabelMatchers {
+			if matcher.Type == metric.Equal && matcher.Name != model.MetricNameLabel {
+				m[matcher.Name] = matcher.Value
+			}
+		}
+	}
+	return vector{
+		&sample{
+			Metric: metric.Metric{
+				Metric: m,
+				Copied: true,
+			},
+			Value:     1,
+			Timestamp: ev.Timestamp,
+		},
+	}
+}
+
+// === ceil(vector model.ValVector) Vector ===
+func funcCeil(ev *evaluator, args Expressions) model.Value {
+	vector := ev.evalVector(args[0])
+	for _, el := range vector {
+		el.Metric.Del(model.MetricNameLabel)
+		el.Value = model.SampleValue(math.Ceil(float64(el.Value)))
+	}
+	return vector
+}
+
+// === exp(vector model.ValVector) Vector ===
+func funcExp(ev *evaluator, args Expressions) model.Value {
+	vector := ev.evalVector(args[0])
+	for _, el := range vector {
+		el.Metric.Del(model.MetricNameLabel)
+		el.Value = model.SampleValue(math.Exp(float64(el.Value)))
+	}
+	return vector
+}
+
+// === sqrt(vector VectorNode) Vector ===
+func funcSqrt(ev *evaluator, args Expressions) model.Value {
+	vector := ev.evalVector(args[0])
+	for _, el := range vector {
+		el.Metric.Del(model.MetricNameLabel)
+		el.Value = model.SampleValue(math.Sqrt(float64(el.Value)))
+	}
+	return vector
+}
+
+// === ln(vector model.ValVector) Vector ===
+func funcLn(ev *evaluator, args Expressions) model.Value {
+	vector := ev.evalVector(args[0])
+	for _, el := range vector {
+		el.Metric.Del(model.MetricNameLabel)
+		el.Value = model.SampleValue(math.Log(float64(el.Value)))
+	}
+	return vector
+}
+
+// === log2(vector model.ValVector) Vector ===
+func funcLog2(ev *evaluator, args Expressions) model.Value {
+	vector := ev.evalVector(args[0])
+	for _, el := range vector {
+		el.Metric.Del(model.MetricNameLabel)
+		el.Value = model.SampleValue(math.Log2(float64(el.Value)))
+	}
+	return vector
+}
+
+// === log10(vector model.ValVector) Vector ===
+func funcLog10(ev *evaluator, args Expressions) model.Value {
+	vector := ev.evalVector(args[0])
+	for _, el := range vector {
+		el.Metric.Del(model.MetricNameLabel)
+		el.Value = model.SampleValue(math.Log10(float64(el.Value)))
+	}
+	return vector
+}
+
+// linearRegression performs a least-square linear regression analysis on the
+// provided SamplePairs. It returns the slope, and the intercept value at the
+// provided time.
+func linearRegression(samples []model.SamplePair, interceptTime model.Time) (slope, intercept model.SampleValue) {
+	var (
+		n            model.SampleValue
+		sumX, sumY   model.SampleValue
+		sumXY, sumX2 model.SampleValue
+	)
+	for _, sample := range samples {
+		x := model.SampleValue(
+			model.Time(sample.Timestamp-interceptTime).UnixNano(),
+		) / 1e9
+		n += 1.0
+		sumY += sample.Value
+		sumX += x
+		sumXY += x * sample.Value
+		sumX2 += x * x
+	}
+	covXY := sumXY - sumX*sumY/n
+	varX := sumX2 - sumX*sumX/n
+
+	slope = covXY / varX
+	intercept = sumY/n - slope*sumX/n
+	return slope, intercept
+}
+
+// === deriv(node model.ValMatrix) Vector ===
+func funcDeriv(ev *evaluator, args Expressions) model.Value {
+	mat := ev.evalMatrix(args[0])
+	resultVector := make(vector, 0, len(mat))
+
+	for _, samples := range mat {
+		// No sense in trying to compute a derivative without at least two points.
+		// Drop this vector element.
+		if len(samples.Values) < 2 {
+			continue
+		}
+		// We pass in an arbitrary timestamp that is near the values in use
+		// to avoid floating point accuracy issues, see
+		// https://github.com/prometheus/prometheus/issues/2674
+		slope, _ := linearRegression(samples.Values, samples.Values[0].Timestamp)
+		resultSample := &sample{
+			Metric:    samples.Metric,
+			Value:     slope,
+			Timestamp: ev.Timestamp,
+		}
+		resultSample.Metric.Del(model.MetricNameLabel)
+		resultVector = append(resultVector, resultSample)
+	}
+	return resultVector
+}
+
+// === predict_linear(node model.ValMatrix, k model.ValScalar) Vector ===
+func funcPredictLinear(ev *evaluator, args Expressions) model.Value {
+	mat := ev.evalMatrix(args[0])
+	resultVector := make(vector, 0, len(mat))
+	duration := model.SampleValue(ev.evalFloat(args[1]))
+
+	for _, samples := range mat {
+		// No sense in trying to predict anything without at least two points.
+		// Drop this vector element.
+		if len(samples.Values) < 2 {
+			continue
+		}
+		slope, intercept := linearRegression(samples.Values, ev.Timestamp)
+		resultSample := &sample{
+			Metric:    samples.Metric,
+			Value:     slope*duration + intercept,
+			Timestamp: ev.Timestamp,
+		}
+		resultSample.Metric.Del(model.MetricNameLabel)
+		resultVector = append(resultVector, resultSample)
+	}
+	return resultVector
+}
+
+// === histogram_quantile(k model.ValScalar, vector model.ValVector) Vector ===
+func funcHistogramQuantile(ev *evaluator, args Expressions) model.Value {
+	q := model.SampleValue(ev.evalFloat(args[0]))
+	inVec := ev.evalVector(args[1])
+
+	outVec := vector{}
+	signatureToMetricWithBuckets := map[uint64]*metricWithBuckets{}
+	for _, el := range inVec {
+		upperBound, err := strconv.ParseFloat(
+			string(el.Metric.Metric[model.BucketLabel]), 64,
+		)
+		if err != nil {
+			// Oops, no bucket label or malformed label value. Skip.
+			// TODO(beorn7): Issue a warning somehow.
+			continue
+		}
+		signature := model.SignatureWithoutLabels(el.Metric.Metric, excludedLabels)
+		mb, ok := signatureToMetricWithBuckets[signature]
+		if !ok {
+			el.Metric.Del(model.BucketLabel)
+			el.Metric.Del(model.MetricNameLabel)
+			mb = &metricWithBuckets{el.Metric, nil}
+			signatureToMetricWithBuckets[signature] = mb
+		}
+		mb.buckets = append(mb.buckets, bucket{upperBound, el.Value})
+	}
+
+	for _, mb := range signatureToMetricWithBuckets {
+		outVec = append(outVec, &sample{
+			Metric:    mb.metric,
+			Value:     model.SampleValue(bucketQuantile(q, mb.buckets)),
+			Timestamp: ev.Timestamp,
+		})
+	}
+
+	return outVec
+}
+
+// === resets(matrix model.ValMatrix) Vector ===
+func funcResets(ev *evaluator, args Expressions) model.Value {
+	in := ev.evalMatrix(args[0])
+	out := make(vector, 0, len(in))
+
+	for _, samples := range in {
+		resets := 0
+		prev := model.SampleValue(samples.Values[0].Value)
+		for _, sample := range samples.Values[1:] {
+			current := sample.Value
+			if current < prev {
+				resets++
+			}
+			prev = current
+		}
+
+		rs := &sample{
+			Metric:    samples.Metric,
+			Value:     model.SampleValue(resets),
+			Timestamp: ev.Timestamp,
+		}
+		rs.Metric.Del(model.MetricNameLabel)
+		out = append(out, rs)
+	}
+	return out
+}
+
+// === changes(matrix model.ValMatrix) Vector ===
+func funcChanges(ev *evaluator, args Expressions) model.Value {
+	in := ev.evalMatrix(args[0])
+	out := make(vector, 0, len(in))
+
+	for _, samples := range in {
+		changes := 0
+		prev := model.SampleValue(samples.Values[0].Value)
+		for _, sample := range samples.Values[1:] {
+			current := sample.Value
+			if current != prev && !(math.IsNaN(float64(current)) && math.IsNaN(float64(prev))) {
+				changes++
+			}
+			prev = current
+		}
+
+		rs := &sample{
+			Metric:    samples.Metric,
+			Value:     model.SampleValue(changes),
+			Timestamp: ev.Timestamp,
+		}
+		rs.Metric.Del(model.MetricNameLabel)
+		out = append(out, rs)
+	}
+	return out
+}
+
+// === label_replace(vector model.ValVector, dst_label, replacement, src_labelname, regex model.ValString) Vector ===
+func funcLabelReplace(ev *evaluator, args Expressions) model.Value {
+	var (
+		vector   = ev.evalVector(args[0])
+		dst      = model.LabelName(ev.evalString(args[1]).Value)
+		repl     = ev.evalString(args[2]).Value
+		src      = model.LabelName(ev.evalString(args[3]).Value)
+		regexStr = ev.evalString(args[4]).Value
+	)
+
+	regex, err := regexp.Compile("^(?:" + regexStr + ")$")
+	if err != nil {
+		ev.errorf("invalid regular expression in label_replace(): %s", regexStr)
+	}
+	if !model.LabelNameRE.MatchString(string(dst)) {
+		ev.errorf("invalid destination label name in label_replace(): %s", dst)
+	}
+
+	outSet := make(map[model.Fingerprint]struct{}, len(vector))
+	for _, el := range vector {
+		srcVal := string(el.Metric.Metric[src])
+		indexes := regex.FindStringSubmatchIndex(srcVal)
+		// If there is no match, no replacement should take place.
+		if indexes == nil {
+			continue
+		}
+		res := regex.ExpandString([]byte{}, repl, srcVal, indexes)
+		if len(res) == 0 {
+			el.Metric.Del(dst)
+		} else {
+			el.Metric.Set(dst, model.LabelValue(res))
+		}
+
+		fp := el.Metric.Metric.Fingerprint()
+		if _, exists := outSet[fp]; exists {
+			ev.errorf("duplicated label set in output of label_replace(): %s", el.Metric.Metric)
+		} else {
+			outSet[fp] = struct{}{}
+		}
+	}
+
+	return vector
+}
+
+// === label_join(vector model.ValVector, dest_labelname, separator, src_labelname...) Vector ===
+func funcLabelJoin(ev *evaluator, args Expressions) model.Value {
+	var (
+		vector    = ev.evalVector(args[0])
+		dst       = model.LabelName(ev.evalString(args[1]).Value)
+		sep       = ev.evalString(args[2]).Value
+		srcLabels = make([]model.LabelName, len(args)-3)
+	)
+	for i := 3; i < len(args); i++ {
+		src := model.LabelName(ev.evalString(args[i]).Value)
+		if !model.LabelNameRE.MatchString(string(src)) {
+			ev.errorf("invalid source label name in label_join(): %s", src)
+		}
+		srcLabels[i-3] = src
+	}
+
+	if !model.LabelNameRE.MatchString(string(dst)) {
+		ev.errorf("invalid destination label name in label_join(): %s", dst)
+	}
+
+	outSet := make(map[model.Fingerprint]struct{}, len(vector))
+	for _, el := range vector {
+		srcVals := make([]string, len(srcLabels))
+		for i, src := range srcLabels {
+			srcVals[i] = string(el.Metric.Metric[src])
+		}
+
+		strval := strings.Join(srcVals, sep)
+		if strval == "" {
+			el.Metric.Del(dst)
+		} else {
+			el.Metric.Set(dst, model.LabelValue(strval))
+		}
+
+		fp := el.Metric.Metric.Fingerprint()
+		if _, exists := outSet[fp]; exists {
+			ev.errorf("duplicated label set in output of label_join(): %s", el.Metric.Metric)
+		} else {
+			outSet[fp] = struct{}{}
+		}
+	}
+	return vector
+}
+
+// === vector(s scalar) Vector ===
+func funcVector(ev *evaluator, args Expressions) model.Value {
+	return vector{
+		&sample{
+			Metric:    metric.Metric{},
+			Value:     model.SampleValue(ev.evalFloat(args[0])),
+			Timestamp: ev.Timestamp,
+		},
+	}
+}
+
+// Common code for date related functions.
+func dateWrapper(ev *evaluator, args Expressions, f func(time.Time) model.SampleValue) model.Value {
+	var v vector
+	if len(args) == 0 {
+		v = vector{
+			&sample{
+				Metric:    metric.Metric{},
+				Value:     model.SampleValue(ev.Timestamp.Unix()),
+				Timestamp: ev.Timestamp,
+			},
+		}
+	} else {
+		v = ev.evalVector(args[0])
+	}
+	for _, el := range v {
+		el.Metric.Del(model.MetricNameLabel)
+		t := time.Unix(int64(el.Value), 0).UTC()
+		el.Value = f(t)
+	}
+	return v
+}
+
+// === days_in_month(v vector) scalar ===
+func funcDaysInMonth(ev *evaluator, args Expressions) model.Value {
+	return dateWrapper(ev, args, func(t time.Time) model.SampleValue {
+		return model.SampleValue(32 - time.Date(t.Year(), t.Month(), 32, 0, 0, 0, 0, time.UTC).Day())
+	})
+}
+
+// === day_of_month(v vector) scalar ===
+func funcDayOfMonth(ev *evaluator, args Expressions) model.Value {
+	return dateWrapper(ev, args, func(t time.Time) model.SampleValue {
+		return model.SampleValue(t.Day())
+	})
+}
+
+// === day_of_week(v vector) scalar ===
+func funcDayOfWeek(ev *evaluator, args Expressions) model.Value {
+	return dateWrapper(ev, args, func(t time.Time) model.SampleValue {
+		return model.SampleValue(t.Weekday())
+	})
+}
+
+// === hour(v vector) scalar ===
+func funcHour(ev *evaluator, args Expressions) model.Value {
+	return dateWrapper(ev, args, func(t time.Time) model.SampleValue {
+		return model.SampleValue(t.Hour())
+	})
+}
+
+// === minute(v vector) scalar ===
+func funcMinute(ev *evaluator, args Expressions) model.Value {
+	return dateWrapper(ev, args, func(t time.Time) model.SampleValue {
+		return model.SampleValue(t.Minute())
+	})
+}
+
+// === month(v vector) scalar ===
+func funcMonth(ev *evaluator, args Expressions) model.Value {
+	return dateWrapper(ev, args, func(t time.Time) model.SampleValue {
+		return model.SampleValue(t.Month())
+	})
+}
+
+// === year(v vector) scalar ===
+func funcYear(ev *evaluator, args Expressions) model.Value {
+	return dateWrapper(ev, args, func(t time.Time) model.SampleValue {
+		return model.SampleValue(t.Year())
+	})
+}
+
+var functions = map[string]*Function{
+	"abs": {
+		Name:       "abs",
+		ArgTypes:   []model.ValueType{model.ValVector},
+		ReturnType: model.ValVector,
+		Call:       funcAbs,
+	},
+	"absent": {
+		Name:       "absent",
+		ArgTypes:   []model.ValueType{model.ValVector},
+		ReturnType: model.ValVector,
+		Call:       funcAbsent,
+	},
+	"avg_over_time": {
+		Name:       "avg_over_time",
+		ArgTypes:   []model.ValueType{model.ValMatrix},
+		ReturnType: model.ValVector,
+		Call:       funcAvgOverTime,
+	},
+	"ceil": {
+		Name:       "ceil",
+		ArgTypes:   []model.ValueType{model.ValVector},
+		ReturnType: model.ValVector,
+		Call:       funcCeil,
+	},
+	"changes": {
+		Name:       "changes",
+		ArgTypes:   []model.ValueType{model.ValMatrix},
+		ReturnType: model.ValVector,
+		Call:       funcChanges,
+	},
+	"clamp_max": {
+		Name:       "clamp_max",
+		ArgTypes:   []model.ValueType{model.ValVector, model.ValScalar},
+		ReturnType: model.ValVector,
+		Call:       funcClampMax,
+	},
+	"clamp_min": {
+		Name:       "clamp_min",
+		ArgTypes:   []model.ValueType{model.ValVector, model.ValScalar},
+		ReturnType: model.ValVector,
+		Call:       funcClampMin,
+	},
+	"count_over_time": {
+		Name:       "count_over_time",
+		ArgTypes:   []model.ValueType{model.ValMatrix},
+		ReturnType: model.ValVector,
+		Call:       funcCountOverTime,
+	},
+	"count_scalar": {
+		Name:       "count_scalar",
+		ArgTypes:   []model.ValueType{model.ValVector},
+		ReturnType: model.ValScalar,
+		Call:       funcCountScalar,
+	},
+	"days_in_month": {
+		Name:       "days_in_month",
+		ArgTypes:   []model.ValueType{model.ValVector},
+		Variadic:   1,
+		ReturnType: model.ValVector,
+		Call:       funcDaysInMonth,
+	},
+	"day_of_month": {
+		Name:       "day_of_month",
+		ArgTypes:   []model.ValueType{model.ValVector},
+		Variadic:   1,
+		ReturnType: model.ValVector,
+		Call:       funcDayOfMonth,
+	},
+	"day_of_week": {
+		Name:       "day_of_week",
+		ArgTypes:   []model.ValueType{model.ValVector},
+		Variadic:   1,
+		ReturnType: model.ValVector,
+		Call:       funcDayOfWeek,
+	},
+	"delta": {
+		Name:       "delta",
+		ArgTypes:   []model.ValueType{model.ValMatrix},
+		ReturnType: model.ValVector,
+		Call:       funcDelta,
+	},
+	"deriv": {
+		Name:       "deriv",
+		ArgTypes:   []model.ValueType{model.ValMatrix},
+		ReturnType: model.ValVector,
+		Call:       funcDeriv,
+	},
+	"drop_common_labels": {
+		Name:       "drop_common_labels",
+		ArgTypes:   []model.ValueType{model.ValVector},
+		ReturnType: model.ValVector,
+		Call:       funcDropCommonLabels,
+	},
+	"exp": {
+		Name:       "exp",
+		ArgTypes:   []model.ValueType{model.ValVector},
+		ReturnType: model.ValVector,
+		Call:       funcExp,
+	},
+	"floor": {
+		Name:       "floor",
+		ArgTypes:   []model.ValueType{model.ValVector},
+		ReturnType: model.ValVector,
+		Call:       funcFloor,
+	},
+	"histogram_quantile": {
+		Name:       "histogram_quantile",
+		ArgTypes:   []model.ValueType{model.ValScalar, model.ValVector},
+		ReturnType: model.ValVector,
+		Call:       funcHistogramQuantile,
+	},
+	"holt_winters": {
+		Name:       "holt_winters",
+		ArgTypes:   []model.ValueType{model.ValMatrix, model.ValScalar, model.ValScalar},
+		ReturnType: model.ValVector,
+		Call:       funcHoltWinters,
+	},
+	"hour": {
+		Name:       "hour",
+		ArgTypes:   []model.ValueType{model.ValVector},
+		Variadic:   1,
+		ReturnType: model.ValVector,
+		Call:       funcHour,
+	},
+	"idelta": {
+		Name:       "idelta",
+		ArgTypes:   []model.ValueType{model.ValMatrix},
+		ReturnType: model.ValVector,
+		Call:       funcIdelta,
+	},
+	"increase": {
+		Name:       "increase",
+		ArgTypes:   []model.ValueType{model.ValMatrix},
+		ReturnType: model.ValVector,
+		Call:       funcIncrease,
+	},
+	"irate": {
+		Name:       "irate",
+		ArgTypes:   []model.ValueType{model.ValMatrix},
+		ReturnType: model.ValVector,
+		Call:       funcIrate,
+	},
+	"label_replace": {
+		Name:       "label_replace",
+		ArgTypes:   []model.ValueType{model.ValVector, model.ValString, model.ValString, model.ValString, model.ValString},
+		ReturnType: model.ValVector,
+		Call:       funcLabelReplace,
+	},
+	"label_join": {
+		Name:       "label_join",
+		ArgTypes:   []model.ValueType{model.ValVector, model.ValString, model.ValString, model.ValString},
+		Variadic:   -1,
+		ReturnType: model.ValVector,
+		Call:       funcLabelJoin,
+	},
+	"ln": {
+		Name:       "ln",
+		ArgTypes:   []model.ValueType{model.ValVector},
+		ReturnType: model.ValVector,
+		Call:       funcLn,
+	},
+	"log10": {
+		Name:       "log10",
+		ArgTypes:   []model.ValueType{model.ValVector},
+		ReturnType: model.ValVector,
+		Call:       funcLog10,
+	},
+	"log2": {
+		Name:       "log2",
+		ArgTypes:   []model.ValueType{model.ValVector},
+		ReturnType: model.ValVector,
+		Call:       funcLog2,
+	},
+	"max_over_time": {
+		Name:       "max_over_time",
+		ArgTypes:   []model.ValueType{model.ValMatrix},
+		ReturnType: model.ValVector,
+		Call:       funcMaxOverTime,
+	},
+	"min_over_time": {
+		Name:       "min_over_time",
+		ArgTypes:   []model.ValueType{model.ValMatrix},
+		ReturnType: model.ValVector,
+		Call:       funcMinOverTime,
+	},
+	"minute": {
+		Name:       "minute",
+		ArgTypes:   []model.ValueType{model.ValVector},
+		Variadic:   1,
+		ReturnType: model.ValVector,
+		Call:       funcMinute,
+	},
+	"month": {
+		Name:       "month",
+		ArgTypes:   []model.ValueType{model.ValVector},
+		Variadic:   1,
+		ReturnType: model.ValVector,
+		Call:       funcMonth,
+	},
+	"predict_linear": {
+		Name:       "predict_linear",
+		ArgTypes:   []model.ValueType{model.ValMatrix, model.ValScalar},
+		ReturnType: model.ValVector,
+		Call:       funcPredictLinear,
+	},
+	"quantile_over_time": {
+		Name:       "quantile_over_time",
+		ArgTypes:   []model.ValueType{model.ValScalar, model.ValMatrix},
+		ReturnType: model.ValVector,
+		Call:       funcQuantileOverTime,
+	},
+	"rate": {
+		Name:       "rate",
+		ArgTypes:   []model.ValueType{model.ValMatrix},
+		ReturnType: model.ValVector,
+		Call:       funcRate,
+	},
+	"resets": {
+		Name:       "resets",
+		ArgTypes:   []model.ValueType{model.ValMatrix},
+		ReturnType: model.ValVector,
+		Call:       funcResets,
+	},
+	"round": {
+		Name:       "round",
+		ArgTypes:   []model.ValueType{model.ValVector, model.ValScalar},
+		Variadic:   1,
+		ReturnType: model.ValVector,
+		Call:       funcRound,
+	},
+	"scalar": {
+		Name:       "scalar",
+		ArgTypes:   []model.ValueType{model.ValVector},
+		ReturnType: model.ValScalar,
+		Call:       funcScalar,
+	},
+	"sort": {
+		Name:       "sort",
+		ArgTypes:   []model.ValueType{model.ValVector},
+		ReturnType: model.ValVector,
+		Call:       funcSort,
+	},
+	"sort_desc": {
+		Name:       "sort_desc",
+		ArgTypes:   []model.ValueType{model.ValVector},
+		ReturnType: model.ValVector,
+		Call:       funcSortDesc,
+	},
+	"sqrt": {
+		Name:       "sqrt",
+		ArgTypes:   []model.ValueType{model.ValVector},
+		ReturnType: model.ValVector,
+		Call:       funcSqrt,
+	},
+	"stddev_over_time": {
+		Name:       "stddev_over_time",
+		ArgTypes:   []model.ValueType{model.ValMatrix},
+		ReturnType: model.ValVector,
+		Call:       funcStddevOverTime,
+	},
+	"stdvar_over_time": {
+		Name:       "stdvar_over_time",
+		ArgTypes:   []model.ValueType{model.ValMatrix},
+		ReturnType: model.ValVector,
+		Call:       funcStdvarOverTime,
+	},
+	"sum_over_time": {
+		Name:       "sum_over_time",
+		ArgTypes:   []model.ValueType{model.ValMatrix},
+		ReturnType: model.ValVector,
+		Call:       funcSumOverTime,
+	},
+	"time": {
+		Name:       "time",
+		ArgTypes:   []model.ValueType{},
+		ReturnType: model.ValScalar,
+		Call:       funcTime,
+	},
+	"vector": {
+		Name:       "vector",
+		ArgTypes:   []model.ValueType{model.ValScalar},
+		ReturnType: model.ValVector,
+		Call:       funcVector,
+	},
+	"year": {
+		Name:       "year",
+		ArgTypes:   []model.ValueType{model.ValVector},
+		Variadic:   1,
+		ReturnType: model.ValVector,
+		Call:       funcYear,
+	},
+}
+
+// getFunction returns a predefined Function object for the given name.
+func getFunction(name string) (*Function, bool) {
+	function, ok := functions[name]
+	return function, ok
+}
+
+type vectorByValueHeap vector
+
+func (s vectorByValueHeap) Len() int {
+	return len(s)
+}
+
+func (s vectorByValueHeap) Less(i, j int) bool {
+	if math.IsNaN(float64(s[i].Value)) {
+		return true
+	}
+	return s[i].Value < s[j].Value
+}
+
+func (s vectorByValueHeap) Swap(i, j int) {
+	s[i], s[j] = s[j], s[i]
+}
+
+func (s *vectorByValueHeap) Push(x interface{}) {
+	*s = append(*s, x.(*sample))
+}
+
+func (s *vectorByValueHeap) Pop() interface{} {
+	old := *s
+	n := len(old)
+	el := old[n-1]
+	*s = old[0 : n-1]
+	return el
+}
+
+type vectorByReverseValueHeap vector
+
+func (s vectorByReverseValueHeap) Len() int {
+	return len(s)
+}
+
+func (s vectorByReverseValueHeap) Less(i, j int) bool {
+	if math.IsNaN(float64(s[i].Value)) {
+		return true
+	}
+	return s[i].Value > s[j].Value
+}
+
+func (s vectorByReverseValueHeap) Swap(i, j int) {
+	s[i], s[j] = s[j], s[i]
+}
+
+func (s *vectorByReverseValueHeap) Push(x interface{}) {
+	*s = append(*s, x.(*sample))
+}
+
+func (s *vectorByReverseValueHeap) Pop() interface{} {
+	old := *s
+	n := len(old)
+	el := old[n-1]
+	*s = old[0 : n-1]
+	return el
+}
diff --git a/vendor/github.com/prometheus/prometheus/promql/fuzz.go b/vendor/github.com/prometheus/prometheus/promql/fuzz.go
new file mode 100644
index 000000000..e52ccfb25
--- /dev/null
+++ b/vendor/github.com/prometheus/prometheus/promql/fuzz.go
@@ -0,0 +1,87 @@
+// Copyright 2015 The Prometheus Authors
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Only build when go-fuzz is in use
+// +build gofuzz
+
+package promql
+
+// PromQL parser fuzzing instrumentation for use with
+// https://github.com/dvyukov/go-fuzz.
+//
+// Fuzz each parser by building appropriately instrumented parser, ex.
+// FuzzParseMetric and execute it with it's
+//
+//     go-fuzz-build -func FuzzParseMetric -o FuzzParseMetric.zip github.com/prometheus/prometheus/promql
+//
+// And then run the tests with the appropriate inputs
+//
+//     go-fuzz -bin FuzzParseMetric.zip -workdir fuzz-data/ParseMetric
+//
+// Further input samples should go in the folders fuzz-data/ParseMetric/corpus.
+//
+// Repeat for ParseMetricSeletion, ParseExpr and ParseStmt.
+
+// Tuning which value is returned from Fuzz*-functions has a strong influence
+// on how quick the fuzzer converges on "interesting" cases. At least try
+// switching between fuzzMeh (= included in corpus, but not a priority) and
+// fuzzDiscard (=don't use this input for re-building later inputs) when
+// experimenting.
+const (
+	fuzzInteresting = 1
+	fuzzMeh         = 0
+	fuzzDiscard     = -1
+)
+
+// Fuzz the metric parser.
+//
+// Note that his is not the parser for the text-based exposition-format; that
+// lives in github.com/prometheus/client_golang/text.
+func FuzzParseMetric(in []byte) int {
+	_, err := ParseMetric(string(in))
+	if err == nil {
+		return fuzzInteresting
+	}
+
+	return fuzzMeh
+}
+
+// Fuzz the metric selector parser.
+func FuzzParseMetricSelector(in []byte) int {
+	_, err := ParseMetricSelector(string(in))
+	if err == nil {
+		return fuzzInteresting
+	}
+
+	return fuzzMeh
+}
+
+// Fuzz the expression parser.
+func FuzzParseExpr(in []byte) int {
+	_, err := ParseExpr(string(in))
+	if err == nil {
+		return fuzzInteresting
+	}
+
+	return fuzzMeh
+}
+
+// Fuzz the parser.
+func FuzzParseStmts(in []byte) int {
+	_, err := ParseStmts(string(in))
+	if err == nil {
+		return fuzzInteresting
+	}
+
+	return fuzzMeh
+}
diff --git a/vendor/github.com/prometheus/prometheus/promql/lex.go b/vendor/github.com/prometheus/prometheus/promql/lex.go
new file mode 100644
index 000000000..efc0b11e8
--- /dev/null
+++ b/vendor/github.com/prometheus/prometheus/promql/lex.go
@@ -0,0 +1,908 @@
+// Copyright 2015 The Prometheus Authors
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package promql
+
+import (
+	"fmt"
+	"strings"
+	"unicode"
+	"unicode/utf8"
+)
+
+// item represents a token or text string returned from the scanner.
+type item struct {
+	typ itemType // The type of this item.
+	pos Pos      // The starting position, in bytes, of this item in the input string.
+	val string   // The value of this item.
+}
+
+// String returns a descriptive string for the item.
+func (i item) String() string {
+	switch {
+	case i.typ == itemEOF:
+		return "EOF"
+	case i.typ == itemError:
+		return i.val
+	case i.typ == itemIdentifier || i.typ == itemMetricIdentifier:
+		return fmt.Sprintf("%q", i.val)
+	case i.typ.isKeyword():
+		return fmt.Sprintf("<%s>", i.val)
+	case i.typ.isOperator():
+		return fmt.Sprintf("<op:%s>", i.val)
+	case i.typ.isAggregator():
+		return fmt.Sprintf("<aggr:%s>", i.val)
+	case len(i.val) > 10:
+		return fmt.Sprintf("%.10q...", i.val)
+	}
+	return fmt.Sprintf("%q", i.val)
+}
+
+// isOperator returns true if the item corresponds to a arithmetic or set operator.
+// Returns false otherwise.
+func (i itemType) isOperator() bool { return i > operatorsStart && i < operatorsEnd }
+
+// isAggregator returns true if the item belongs to the aggregator functions.
+// Returns false otherwise
+func (i itemType) isAggregator() bool { return i > aggregatorsStart && i < aggregatorsEnd }
+
+// isAggregator returns true if the item is an aggregator that takes a parameter.
+// Returns false otherwise
+func (i itemType) isAggregatorWithParam() bool {
+	return i == itemTopK || i == itemBottomK || i == itemCountValues || i == itemQuantile
+}
+
+// isKeyword returns true if the item corresponds to a keyword.
+// Returns false otherwise.
+func (i itemType) isKeyword() bool { return i > keywordsStart && i < keywordsEnd }
+
+// isCompairsonOperator returns true if the item corresponds to a comparison operator.
+// Returns false otherwise.
+func (i itemType) isComparisonOperator() bool {
+	switch i {
+	case itemEQL, itemNEQ, itemLTE, itemLSS, itemGTE, itemGTR:
+		return true
+	default:
+		return false
+	}
+}
+
+// isSetOperator returns whether the item corresponds to a set operator.
+func (i itemType) isSetOperator() bool {
+	switch i {
+	case itemLAND, itemLOR, itemLUnless:
+		return true
+	}
+	return false
+}
+
+// LowestPrec is a constant for operator precedence in expressions.
+const LowestPrec = 0 // Non-operators.
+
+// Precedence returns the operator precedence of the binary
+// operator op. If op is not a binary operator, the result
+// is LowestPrec.
+func (i itemType) precedence() int {
+	switch i {
+	case itemLOR:
+		return 1
+	case itemLAND, itemLUnless:
+		return 2
+	case itemEQL, itemNEQ, itemLTE, itemLSS, itemGTE, itemGTR:
+		return 3
+	case itemADD, itemSUB:
+		return 4
+	case itemMUL, itemDIV, itemMOD:
+		return 5
+	case itemPOW:
+		return 6
+	default:
+		return LowestPrec
+	}
+}
+
+func (i itemType) isRightAssociative() bool {
+	switch i {
+	case itemPOW:
+		return true
+	default:
+		return false
+	}
+
+}
+
+type itemType int
+
+const (
+	itemError itemType = iota // Error occurred, value is error message
+	itemEOF
+	itemComment
+	itemIdentifier
+	itemMetricIdentifier
+	itemLeftParen
+	itemRightParen
+	itemLeftBrace
+	itemRightBrace
+	itemLeftBracket
+	itemRightBracket
+	itemComma
+	itemAssign
+	itemSemicolon
+	itemString
+	itemNumber
+	itemDuration
+	itemBlank
+	itemTimes
+
+	operatorsStart
+	// Operators.
+	itemSUB
+	itemADD
+	itemMUL
+	itemMOD
+	itemDIV
+	itemLAND
+	itemLOR
+	itemLUnless
+	itemEQL
+	itemNEQ
+	itemLTE
+	itemLSS
+	itemGTE
+	itemGTR
+	itemEQLRegex
+	itemNEQRegex
+	itemPOW
+	operatorsEnd
+
+	aggregatorsStart
+	// Aggregators.
+	itemAvg
+	itemCount
+	itemSum
+	itemMin
+	itemMax
+	itemStddev
+	itemStdvar
+	itemTopK
+	itemBottomK
+	itemCountValues
+	itemQuantile
+	aggregatorsEnd
+
+	keywordsStart
+	// Keywords.
+	itemAlert
+	itemIf
+	itemFor
+	itemLabels
+	itemAnnotations
+	itemKeepCommon
+	itemOffset
+	itemBy
+	itemWithout
+	itemOn
+	itemIgnoring
+	itemGroupLeft
+	itemGroupRight
+	itemBool
+	keywordsEnd
+)
+
+var key = map[string]itemType{
+	// Operators.
+	"and":    itemLAND,
+	"or":     itemLOR,
+	"unless": itemLUnless,
+
+	// Aggregators.
+	"sum":          itemSum,
+	"avg":          itemAvg,
+	"count":        itemCount,
+	"min":          itemMin,
+	"max":          itemMax,
+	"stddev":       itemStddev,
+	"stdvar":       itemStdvar,
+	"topk":         itemTopK,
+	"bottomk":      itemBottomK,
+	"count_values": itemCountValues,
+	"quantile":     itemQuantile,
+
+	// Keywords.
+	"alert":       itemAlert,
+	"if":          itemIf,
+	"for":         itemFor,
+	"labels":      itemLabels,
+	"annotations": itemAnnotations,
+	"offset":      itemOffset,
+	"by":          itemBy,
+	"without":     itemWithout,
+	"keep_common": itemKeepCommon,
+	"on":          itemOn,
+	"ignoring":    itemIgnoring,
+	"group_left":  itemGroupLeft,
+	"group_right": itemGroupRight,
+	"bool":        itemBool,
+}
+
+// These are the default string representations for common items. It does not
+// imply that those are the only character sequences that can be lexed to such an item.
+var itemTypeStr = map[itemType]string{
+	itemLeftParen:    "(",
+	itemRightParen:   ")",
+	itemLeftBrace:    "{",
+	itemRightBrace:   "}",
+	itemLeftBracket:  "[",
+	itemRightBracket: "]",
+	itemComma:        ",",
+	itemAssign:       "=",
+	itemSemicolon:    ";",
+	itemBlank:        "_",
+	itemTimes:        "x",
+
+	itemSUB:      "-",
+	itemADD:      "+",
+	itemMUL:      "*",
+	itemMOD:      "%",
+	itemDIV:      "/",
+	itemEQL:      "==",
+	itemNEQ:      "!=",
+	itemLTE:      "<=",
+	itemLSS:      "<",
+	itemGTE:      ">=",
+	itemGTR:      ">",
+	itemEQLRegex: "=~",
+	itemNEQRegex: "!~",
+	itemPOW:      "^",
+}
+
+func init() {
+	// Add keywords to item type strings.
+	for s, ty := range key {
+		itemTypeStr[ty] = s
+	}
+	// Special numbers.
+	key["inf"] = itemNumber
+	key["nan"] = itemNumber
+}
+
+func (i itemType) String() string {
+	if s, ok := itemTypeStr[i]; ok {
+		return s
+	}
+	return fmt.Sprintf("<item %d>", i)
+}
+
+func (i item) desc() string {
+	if _, ok := itemTypeStr[i.typ]; ok {
+		return i.String()
+	}
+	if i.typ == itemEOF {
+		return i.typ.desc()
+	}
+	return fmt.Sprintf("%s %s", i.typ.desc(), i)
+}
+
+func (i itemType) desc() string {
+	switch i {
+	case itemError:
+		return "error"
+	case itemEOF:
+		return "end of input"
+	case itemComment:
+		return "comment"
+	case itemIdentifier:
+		return "identifier"
+	case itemMetricIdentifier:
+		return "metric identifier"
+	case itemString:
+		return "string"
+	case itemNumber:
+		return "number"
+	case itemDuration:
+		return "duration"
+	}
+	return fmt.Sprintf("%q", i)
+}
+
+const eof = -1
+
+// stateFn represents the state of the scanner as a function that returns the next state.
+type stateFn func(*lexer) stateFn
+
+// Pos is the position in a string.
+type Pos int
+
+// lexer holds the state of the scanner.
+type lexer struct {
+	input   string    // The string being scanned.
+	state   stateFn   // The next lexing function to enter.
+	pos     Pos       // Current position in the input.
+	start   Pos       // Start position of this item.
+	width   Pos       // Width of last rune read from input.
+	lastPos Pos       // Position of most recent item returned by nextItem.
+	items   chan item // Channel of scanned items.
+
+	parenDepth  int  // Nesting depth of ( ) exprs.
+	braceOpen   bool // Whether a { is opened.
+	bracketOpen bool // Whether a [ is opened.
+	stringOpen  rune // Quote rune of the string currently being read.
+
+	// seriesDesc is set when a series description for the testing
+	// language is lexed.
+	seriesDesc bool
+}
+
+// next returns the next rune in the input.
+func (l *lexer) next() rune {
+	if int(l.pos) >= len(l.input) {
+		l.width = 0
+		return eof
+	}
+	r, w := utf8.DecodeRuneInString(l.input[l.pos:])
+	l.width = Pos(w)
+	l.pos += l.width
+	return r
+}
+
+// peek returns but does not consume the next rune in the input.
+func (l *lexer) peek() rune {
+	r := l.next()
+	l.backup()
+	return r
+}
+
+// backup steps back one rune. Can only be called once per call of next.
+func (l *lexer) backup() {
+	l.pos -= l.width
+}
+
+// emit passes an item back to the client.
+func (l *lexer) emit(t itemType) {
+	l.items <- item{t, l.start, l.input[l.start:l.pos]}
+	l.start = l.pos
+}
+
+// ignore skips over the pending input before this point.
+func (l *lexer) ignore() {
+	l.start = l.pos
+}
+
+// accept consumes the next rune if it's from the valid set.
+func (l *lexer) accept(valid string) bool {
+	if strings.ContainsRune(valid, l.next()) {
+		return true
+	}
+	l.backup()
+	return false
+}
+
+// acceptRun consumes a run of runes from the valid set.
+func (l *lexer) acceptRun(valid string) {
+	for strings.ContainsRune(valid, l.next()) {
+		// consume
+	}
+	l.backup()
+}
+
+// lineNumber reports which line we're on, based on the position of
+// the previous item returned by nextItem. Doing it this way
+// means we don't have to worry about peek double counting.
+func (l *lexer) lineNumber() int {
+	return 1 + strings.Count(l.input[:l.lastPos], "\n")
+}
+
+// linePosition reports at which character in the current line
+// we are on.
+func (l *lexer) linePosition() int {
+	lb := strings.LastIndex(l.input[:l.lastPos], "\n")
+	if lb == -1 {
+		return 1 + int(l.lastPos)
+	}
+	return 1 + int(l.lastPos) - lb
+}
+
+// errorf returns an error token and terminates the scan by passing
+// back a nil pointer that will be the next state, terminating l.nextItem.
+func (l *lexer) errorf(format string, args ...interface{}) stateFn {
+	l.items <- item{itemError, l.start, fmt.Sprintf(format, args...)}
+	return nil
+}
+
+// nextItem returns the next item from the input.
+func (l *lexer) nextItem() item {
+	item := <-l.items
+	l.lastPos = item.pos
+	return item
+}
+
+// lex creates a new scanner for the input string.
+func lex(input string) *lexer {
+	l := &lexer{
+		input: input,
+		items: make(chan item),
+	}
+	go l.run()
+	return l
+}
+
+// run runs the state machine for the lexer.
+func (l *lexer) run() {
+	for l.state = lexStatements; l.state != nil; {
+		l.state = l.state(l)
+	}
+	close(l.items)
+}
+
+// lineComment is the character that starts a line comment.
+const lineComment = "#"
+
+// lexStatements is the top-level state for lexing.
+func lexStatements(l *lexer) stateFn {
+	if l.braceOpen {
+		return lexInsideBraces
+	}
+	if strings.HasPrefix(l.input[l.pos:], lineComment) {
+		return lexLineComment
+	}
+
+	switch r := l.next(); {
+	case r == eof:
+		if l.parenDepth != 0 {
+			return l.errorf("unclosed left parenthesis")
+		} else if l.bracketOpen {
+			return l.errorf("unclosed left bracket")
+		}
+		l.emit(itemEOF)
+		return nil
+	case r == ',':
+		l.emit(itemComma)
+	case isSpace(r):
+		return lexSpace
+	case r == '*':
+		l.emit(itemMUL)
+	case r == '/':
+		l.emit(itemDIV)
+	case r == '%':
+		l.emit(itemMOD)
+	case r == '+':
+		l.emit(itemADD)
+	case r == '-':
+		l.emit(itemSUB)
+	case r == '^':
+		l.emit(itemPOW)
+	case r == '=':
+		if t := l.peek(); t == '=' {
+			l.next()
+			l.emit(itemEQL)
+		} else if t == '~' {
+			return l.errorf("unexpected character after '=': %q", t)
+		} else {
+			l.emit(itemAssign)
+		}
+	case r == '!':
+		if t := l.next(); t == '=' {
+			l.emit(itemNEQ)
+		} else {
+			return l.errorf("unexpected character after '!': %q", t)
+		}
+	case r == '<':
+		if t := l.peek(); t == '=' {
+			l.next()
+			l.emit(itemLTE)
+		} else {
+			l.emit(itemLSS)
+		}
+	case r == '>':
+		if t := l.peek(); t == '=' {
+			l.next()
+			l.emit(itemGTE)
+		} else {
+			l.emit(itemGTR)
+		}
+	case isDigit(r) || (r == '.' && isDigit(l.peek())):
+		l.backup()
+		return lexNumberOrDuration
+	case r == '"' || r == '\'':
+		l.stringOpen = r
+		return lexString
+	case r == '`':
+		l.stringOpen = r
+		return lexRawString
+	case isAlpha(r) || r == ':':
+		l.backup()
+		return lexKeywordOrIdentifier
+	case r == '(':
+		l.emit(itemLeftParen)
+		l.parenDepth++
+		return lexStatements
+	case r == ')':
+		l.emit(itemRightParen)
+		l.parenDepth--
+		if l.parenDepth < 0 {
+			return l.errorf("unexpected right parenthesis %q", r)
+		}
+		return lexStatements
+	case r == '{':
+		l.emit(itemLeftBrace)
+		l.braceOpen = true
+		return lexInsideBraces(l)
+	case r == '[':
+		if l.bracketOpen {
+			return l.errorf("unexpected left bracket %q", r)
+		}
+		l.emit(itemLeftBracket)
+		l.bracketOpen = true
+		return lexDuration
+	case r == ']':
+		if !l.bracketOpen {
+			return l.errorf("unexpected right bracket %q", r)
+		}
+		l.emit(itemRightBracket)
+		l.bracketOpen = false
+
+	default:
+		return l.errorf("unexpected character: %q", r)
+	}
+	return lexStatements
+}
+
+// lexInsideBraces scans the inside of a vector selector. Keywords are ignored and
+// scanned as identifiers.
+func lexInsideBraces(l *lexer) stateFn {
+	if strings.HasPrefix(l.input[l.pos:], lineComment) {
+		return lexLineComment
+	}
+
+	switch r := l.next(); {
+	case r == eof:
+		return l.errorf("unexpected end of input inside braces")
+	case isSpace(r):
+		return lexSpace
+	case isAlpha(r):
+		l.backup()
+		return lexIdentifier
+	case r == ',':
+		l.emit(itemComma)
+	case r == '"' || r == '\'':
+		l.stringOpen = r
+		return lexString
+	case r == '`':
+		l.stringOpen = r
+		return lexRawString
+	case r == '=':
+		if l.next() == '~' {
+			l.emit(itemEQLRegex)
+			break
+		}
+		l.backup()
+		l.emit(itemEQL)
+	case r == '!':
+		switch nr := l.next(); {
+		case nr == '~':
+			l.emit(itemNEQRegex)
+		case nr == '=':
+			l.emit(itemNEQ)
+		default:
+			return l.errorf("unexpected character after '!' inside braces: %q", nr)
+		}
+	case r == '{':
+		return l.errorf("unexpected left brace %q", r)
+	case r == '}':
+		l.emit(itemRightBrace)
+		l.braceOpen = false
+
+		if l.seriesDesc {
+			return lexValueSequence
+		}
+		return lexStatements
+	default:
+		return l.errorf("unexpected character inside braces: %q", r)
+	}
+	return lexInsideBraces
+}
+
+// lexValueSequence scans a value sequence of a series description.
+func lexValueSequence(l *lexer) stateFn {
+	switch r := l.next(); {
+	case r == eof:
+		return lexStatements
+	case isSpace(r):
+		lexSpace(l)
+	case r == '+':
+		l.emit(itemADD)
+	case r == '-':
+		l.emit(itemSUB)
+	case r == 'x':
+		l.emit(itemTimes)
+	case r == '_':
+		l.emit(itemBlank)
+	case isDigit(r) || (r == '.' && isDigit(l.peek())):
+		l.backup()
+		lexNumber(l)
+	case isAlpha(r):
+		l.backup()
+		// We might lex invalid items here but this will be caught by the parser.
+		return lexKeywordOrIdentifier
+	default:
+		return l.errorf("unexpected character in series sequence: %q", r)
+	}
+	return lexValueSequence
+}
+
+// lexEscape scans a string escape sequence. The initial escaping character (\)
+// has already been seen.
+//
+// NOTE: This function as well as the helper function digitVal() and associated
+// tests have been adapted from the corresponding functions in the "go/scanner"
+// package of the Go standard library to work for Prometheus-style strings.
+// None of the actual escaping/quoting logic was changed in this function - it
+// was only modified to integrate with our lexer.
+func lexEscape(l *lexer) {
+	var n int
+	var base, max uint32
+
+	ch := l.next()
+	switch ch {
+	case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', l.stringOpen:
+		return
+	case '0', '1', '2', '3', '4', '5', '6', '7':
+		n, base, max = 3, 8, 255
+	case 'x':
+		ch = l.next()
+		n, base, max = 2, 16, 255
+	case 'u':
+		ch = l.next()
+		n, base, max = 4, 16, unicode.MaxRune
+	case 'U':
+		ch = l.next()
+		n, base, max = 8, 16, unicode.MaxRune
+	case eof:
+		l.errorf("escape sequence not terminated")
+	default:
+		l.errorf("unknown escape sequence %#U", ch)
+	}
+
+	var x uint32
+	for n > 0 {
+		d := uint32(digitVal(ch))
+		if d >= base {
+			if ch == eof {
+				l.errorf("escape sequence not terminated")
+			}
+			l.errorf("illegal character %#U in escape sequence", ch)
+		}
+		x = x*base + d
+		ch = l.next()
+		n--
+	}
+
+	if x > max || 0xD800 <= x && x < 0xE000 {
+		l.errorf("escape sequence is an invalid Unicode code point")
+	}
+}
+
+// digitVal returns the digit value of a rune or 16 in case the rune does not
+// represent a valid digit.
+func digitVal(ch rune) int {
+	switch {
+	case '0' <= ch && ch <= '9':
+		return int(ch - '0')
+	case 'a' <= ch && ch <= 'f':
+		return int(ch - 'a' + 10)
+	case 'A' <= ch && ch <= 'F':
+		return int(ch - 'A' + 10)
+	}
+	return 16 // Larger than any legal digit val.
+}
+
+// lexString scans a quoted string. The initial quote has already been seen.
+func lexString(l *lexer) stateFn {
+Loop:
+	for {
+		switch l.next() {
+		case '\\':
+			lexEscape(l)
+		case utf8.RuneError:
+			return l.errorf("invalid UTF-8 rune")
+		case eof, '\n':
+			return l.errorf("unterminated quoted string")
+		case l.stringOpen:
+			break Loop
+		}
+	}
+	l.emit(itemString)
+	return lexStatements
+}
+
+// lexRawString scans a raw quoted string. The initial quote has already been seen.
+func lexRawString(l *lexer) stateFn {
+Loop:
+	for {
+		switch l.next() {
+		case utf8.RuneError:
+			return l.errorf("invalid UTF-8 rune")
+		case eof:
+			return l.errorf("unterminated raw string")
+		case l.stringOpen:
+			break Loop
+		}
+	}
+	l.emit(itemString)
+	return lexStatements
+}
+
+// lexSpace scans a run of space characters. One space has already been seen.
+func lexSpace(l *lexer) stateFn {
+	for isSpace(l.peek()) {
+		l.next()
+	}
+	l.ignore()
+	return lexStatements
+}
+
+// lexLineComment scans a line comment. Left comment marker is known to be present.
+func lexLineComment(l *lexer) stateFn {
+	l.pos += Pos(len(lineComment))
+	for r := l.next(); !isEndOfLine(r) && r != eof; {
+		r = l.next()
+	}
+	l.backup()
+	l.emit(itemComment)
+	return lexStatements
+}
+
+func lexDuration(l *lexer) stateFn {
+	if l.scanNumber() {
+		return l.errorf("missing unit character in duration")
+	}
+	// Next two chars must be a valid unit and a non-alphanumeric.
+	if l.accept("smhdwy") {
+		if isAlphaNumeric(l.next()) {
+			return l.errorf("bad duration syntax: %q", l.input[l.start:l.pos])
+		}
+		l.backup()
+		l.emit(itemDuration)
+		return lexStatements
+	}
+	return l.errorf("bad duration syntax: %q", l.input[l.start:l.pos])
+}
+
+// lexNumber scans a number: decimal, hex, oct or float.
+func lexNumber(l *lexer) stateFn {
+	if !l.scanNumber() {
+		return l.errorf("bad number syntax: %q", l.input[l.start:l.pos])
+	}
+	l.emit(itemNumber)
+	return lexStatements
+}
+
+// lexNumberOrDuration scans a number or a duration item.
+func lexNumberOrDuration(l *lexer) stateFn {
+	if l.scanNumber() {
+		l.emit(itemNumber)
+		return lexStatements
+	}
+	// Next two chars must be a valid unit and a non-alphanumeric.
+	if l.accept("smhdwy") {
+		if isAlphaNumeric(l.next()) {
+			return l.errorf("bad number or duration syntax: %q", l.input[l.start:l.pos])
+		}
+		l.backup()
+		l.emit(itemDuration)
+		return lexStatements
+	}
+	return l.errorf("bad number or duration syntax: %q", l.input[l.start:l.pos])
+}
+
+// scanNumber scans numbers of different formats. The scanned item is
+// not necessarily a valid number. This case is caught by the parser.
+func (l *lexer) scanNumber() bool {
+	digits := "0123456789"
+	// Disallow hexadecimal in series descriptions as the syntax is ambiguous.
+	if !l.seriesDesc && l.accept("0") && l.accept("xX") {
+		digits = "0123456789abcdefABCDEF"
+	}
+	l.acceptRun(digits)
+	if l.accept(".") {
+		l.acceptRun(digits)
+	}
+	if l.accept("eE") {
+		l.accept("+-")
+		l.acceptRun("0123456789")
+	}
+	// Next thing must not be alphanumeric unless it's the times token
+	// for series repetitions.
+	if r := l.peek(); (l.seriesDesc && r == 'x') || !isAlphaNumeric(r) {
+		return true
+	}
+	return false
+}
+
+// lexIdentifier scans an alphanumeric identifier. The next character
+// is known to be a letter.
+func lexIdentifier(l *lexer) stateFn {
+	for isAlphaNumeric(l.next()) {
+		// absorb
+	}
+	l.backup()
+	l.emit(itemIdentifier)
+	return lexStatements
+}
+
+// lexKeywordOrIdentifier scans an alphanumeric identifier which may contain
+// a colon rune. If the identifier is a keyword the respective keyword item
+// is scanned.
+func lexKeywordOrIdentifier(l *lexer) stateFn {
+Loop:
+	for {
+		switch r := l.next(); {
+		case isAlphaNumeric(r) || r == ':':
+			// absorb.
+		default:
+			l.backup()
+			word := l.input[l.start:l.pos]
+			if kw, ok := key[strings.ToLower(word)]; ok {
+				l.emit(kw)
+			} else if !strings.Contains(word, ":") {
+				l.emit(itemIdentifier)
+			} else {
+				l.emit(itemMetricIdentifier)
+			}
+			break Loop
+		}
+	}
+	if l.seriesDesc && l.peek() != '{' {
+		return lexValueSequence
+	}
+	return lexStatements
+}
+
+func isSpace(r rune) bool {
+	return r == ' ' || r == '\t' || r == '\n' || r == '\r'
+}
+
+// isEndOfLine reports whether r is an end-of-line character.
+func isEndOfLine(r rune) bool {
+	return r == '\r' || r == '\n'
+}
+
+// isAlphaNumeric reports whether r is an alphabetic, digit, or underscore.
+func isAlphaNumeric(r rune) bool {
+	return isAlpha(r) || isDigit(r)
+}
+
+// isDigit reports whether r is a digit. Note: we cannot use unicode.IsDigit()
+// instead because that also classifies non-Latin digits as digits. See
+// https://github.com/prometheus/prometheus/issues/939.
+func isDigit(r rune) bool {
+	return '0' <= r && r <= '9'
+}
+
+// isAlpha reports whether r is an alphabetic or underscore.
+func isAlpha(r rune) bool {
+	return r == '_' || ('a' <= r && r <= 'z') || ('A' <= r && r <= 'Z')
+}
+
+// isLabel reports whether the string can be used as label.
+func isLabel(s string) bool {
+	if len(s) == 0 || !isAlpha(rune(s[0])) {
+		return false
+	}
+	for _, c := range s[1:] {
+		if !isAlphaNumeric(c) {
+			return false
+		}
+	}
+	return true
+}
diff --git a/vendor/github.com/prometheus/prometheus/promql/parse.go b/vendor/github.com/prometheus/prometheus/promql/parse.go
new file mode 100644
index 000000000..6a0ecc8dd
--- /dev/null
+++ b/vendor/github.com/prometheus/prometheus/promql/parse.go
@@ -0,0 +1,1146 @@
+// Copyright 2015 The Prometheus Authors
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package promql
+
+import (
+	"fmt"
+	"runtime"
+	"strconv"
+	"strings"
+	"time"
+
+	"github.com/prometheus/common/log"
+	"github.com/prometheus/common/model"
+
+	"github.com/prometheus/prometheus/storage/metric"
+	"github.com/prometheus/prometheus/util/strutil"
+)
+
+type parser struct {
+	lex       *lexer
+	token     [3]item
+	peekCount int
+}
+
+// ParseErr wraps a parsing error with line and position context.
+// If the parsing input was a single line, line will be 0 and omitted
+// from the error string.
+type ParseErr struct {
+	Line, Pos int
+	Err       error
+}
+
+func (e *ParseErr) Error() string {
+	if e.Line == 0 {
+		return fmt.Sprintf("parse error at char %d: %s", e.Pos, e.Err)
+	}
+	return fmt.Sprintf("parse error at line %d, char %d: %s", e.Line, e.Pos, e.Err)
+}
+
+// ParseStmts parses the input and returns the resulting statements or any occurring error.
+func ParseStmts(input string) (Statements, error) {
+	p := newParser(input)
+
+	stmts, err := p.parseStmts()
+	if err != nil {
+		return nil, err
+	}
+	err = p.typecheck(stmts)
+	return stmts, err
+}
+
+// ParseExpr returns the expression parsed from the input.
+func ParseExpr(input string) (Expr, error) {
+	p := newParser(input)
+
+	expr, err := p.parseExpr()
+	if err != nil {
+		return nil, err
+	}
+	err = p.typecheck(expr)
+	return expr, err
+}
+
+// ParseMetric parses the input into a metric
+func ParseMetric(input string) (m model.Metric, err error) {
+	p := newParser(input)
+	defer p.recover(&err)
+
+	m = p.metric()
+	if p.peek().typ != itemEOF {
+		p.errorf("could not parse remaining input %.15q...", p.lex.input[p.lex.lastPos:])
+	}
+	return m, nil
+}
+
+// ParseMetricSelector parses the provided textual metric selector into a list of
+// label matchers.
+func ParseMetricSelector(input string) (m metric.LabelMatchers, err error) {
+	p := newParser(input)
+	defer p.recover(&err)
+
+	name := ""
+	if t := p.peek().typ; t == itemMetricIdentifier || t == itemIdentifier {
+		name = p.next().val
+	}
+	vs := p.vectorSelector(name)
+	if p.peek().typ != itemEOF {
+		p.errorf("could not parse remaining input %.15q...", p.lex.input[p.lex.lastPos:])
+	}
+	return vs.LabelMatchers, nil
+}
+
+// parseSeriesDesc parses the description of a time series.
+func parseSeriesDesc(input string) (model.Metric, []sequenceValue, error) {
+	p := newParser(input)
+	p.lex.seriesDesc = true
+
+	return p.parseSeriesDesc()
+}
+
+// newParser returns a new parser.
+func newParser(input string) *parser {
+	p := &parser{
+		lex: lex(input),
+	}
+	return p
+}
+
+// parseStmts parses a sequence of statements from the input.
+func (p *parser) parseStmts() (stmts Statements, err error) {
+	defer p.recover(&err)
+	stmts = Statements{}
+
+	for p.peek().typ != itemEOF {
+		if p.peek().typ == itemComment {
+			continue
+		}
+		stmts = append(stmts, p.stmt())
+	}
+	return
+}
+
+// parseExpr parses a single expression from the input.
+func (p *parser) parseExpr() (expr Expr, err error) {
+	defer p.recover(&err)
+
+	for p.peek().typ != itemEOF {
+		if p.peek().typ == itemComment {
+			continue
+		}
+		if expr != nil {
+			p.errorf("could not parse remaining input %.15q...", p.lex.input[p.lex.lastPos:])
+		}
+		expr = p.expr()
+	}
+
+	if expr == nil {
+		p.errorf("no expression found in input")
+	}
+	return
+}
+
+// sequenceValue is an omittable value in a sequence of time series values.
+type sequenceValue struct {
+	value   model.SampleValue
+	omitted bool
+}
+
+func (v sequenceValue) String() string {
+	if v.omitted {
+		return "_"
+	}
+	return v.value.String()
+}
+
+// parseSeriesDesc parses a description of a time series into its metric and value sequence.
+func (p *parser) parseSeriesDesc() (m model.Metric, vals []sequenceValue, err error) {
+	defer p.recover(&err)
+
+	m = p.metric()
+
+	const ctx = "series values"
+	for {
+		if p.peek().typ == itemEOF {
+			break
+		}
+
+		// Extract blanks.
+		if p.peek().typ == itemBlank {
+			p.next()
+			times := uint64(1)
+			if p.peek().typ == itemTimes {
+				p.next()
+				times, err = strconv.ParseUint(p.expect(itemNumber, ctx).val, 10, 64)
+				if err != nil {
+					p.errorf("invalid repetition in %s: %s", ctx, err)
+				}
+			}
+			for i := uint64(0); i < times; i++ {
+				vals = append(vals, sequenceValue{omitted: true})
+			}
+			continue
+		}
+
+		// Extract values.
+		sign := 1.0
+		if t := p.peek().typ; t == itemSUB || t == itemADD {
+			if p.next().typ == itemSUB {
+				sign = -1
+			}
+		}
+		k := sign * p.number(p.expect(itemNumber, ctx).val)
+		vals = append(vals, sequenceValue{
+			value: model.SampleValue(k),
+		})
+
+		// If there are no offset repetitions specified, proceed with the next value.
+		if t := p.peek().typ; t == itemNumber || t == itemBlank {
+			continue
+		} else if t == itemEOF {
+			break
+		} else if t != itemADD && t != itemSUB {
+			p.errorf("expected next value or relative expansion in %s but got %s", ctx, t.desc())
+		}
+
+		// Expand the repeated offsets into values.
+		sign = 1.0
+		if p.next().typ == itemSUB {
+			sign = -1.0
+		}
+		offset := sign * p.number(p.expect(itemNumber, ctx).val)
+		p.expect(itemTimes, ctx)
+
+		times, err := strconv.ParseUint(p.expect(itemNumber, ctx).val, 10, 64)
+		if err != nil {
+			p.errorf("invalid repetition in %s: %s", ctx, err)
+		}
+
+		for i := uint64(0); i < times; i++ {
+			k += offset
+			vals = append(vals, sequenceValue{
+				value: model.SampleValue(k),
+			})
+		}
+	}
+	return m, vals, nil
+}
+
+// typecheck checks correct typing of the parsed statements or expression.
+func (p *parser) typecheck(node Node) (err error) {
+	defer p.recover(&err)
+
+	p.checkType(node)
+	return nil
+}
+
+// next returns the next token.
+func (p *parser) next() item {
+	if p.peekCount > 0 {
+		p.peekCount--
+	} else {
+		t := p.lex.nextItem()
+		// Skip comments.
+		for t.typ == itemComment {
+			t = p.lex.nextItem()
+		}
+		p.token[0] = t
+	}
+	if p.token[p.peekCount].typ == itemError {
+		p.errorf("%s", p.token[p.peekCount].val)
+	}
+	return p.token[p.peekCount]
+}
+
+// peek returns but does not consume the next token.
+func (p *parser) peek() item {
+	if p.peekCount > 0 {
+		return p.token[p.peekCount-1]
+	}
+	p.peekCount = 1
+
+	t := p.lex.nextItem()
+	// Skip comments.
+	for t.typ == itemComment {
+		t = p.lex.nextItem()
+	}
+	p.token[0] = t
+	return p.token[0]
+}
+
+// backup backs the input stream up one token.
+func (p *parser) backup() {
+	p.peekCount++
+}
+
+// errorf formats the error and terminates processing.
+func (p *parser) errorf(format string, args ...interface{}) {
+	p.error(fmt.Errorf(format, args...))
+}
+
+// error terminates processing.
+func (p *parser) error(err error) {
+	perr := &ParseErr{
+		Line: p.lex.lineNumber(),
+		Pos:  p.lex.linePosition(),
+		Err:  err,
+	}
+	if strings.Count(strings.TrimSpace(p.lex.input), "\n") == 0 {
+		perr.Line = 0
+	}
+	panic(perr)
+}
+
+// expect consumes the next token and guarantees it has the required type.
+func (p *parser) expect(exp itemType, context string) item {
+	token := p.next()
+	if token.typ != exp {
+		p.errorf("unexpected %s in %s, expected %s", token.desc(), context, exp.desc())
+	}
+	return token
+}
+
+// expectOneOf consumes the next token and guarantees it has one of the required types.
+func (p *parser) expectOneOf(exp1, exp2 itemType, context string) item {
+	token := p.next()
+	if token.typ != exp1 && token.typ != exp2 {
+		p.errorf("unexpected %s in %s, expected %s or %s", token.desc(), context, exp1.desc(), exp2.desc())
+	}
+	return token
+}
+
+var errUnexpected = fmt.Errorf("unexpected error")
+
+// recover is the handler that turns panics into returns from the top level of Parse.
+func (p *parser) recover(errp *error) {
+	e := recover()
+	if e != nil {
+		if _, ok := e.(runtime.Error); ok {
+			// Print the stack trace but do not inhibit the running application.
+			buf := make([]byte, 64<<10)
+			buf = buf[:runtime.Stack(buf, false)]
+
+			log.Errorf("parser panic: %v\n%s", e, buf)
+			*errp = errUnexpected
+		} else {
+			*errp = e.(error)
+		}
+	}
+	return
+}
+
+// stmt parses any statement.
+//
+// 		alertStatement | recordStatement
+//
+func (p *parser) stmt() Statement {
+	switch tok := p.peek(); tok.typ {
+	case itemAlert:
+		return p.alertStmt()
+	case itemIdentifier, itemMetricIdentifier:
+		return p.recordStmt()
+	}
+	p.errorf("no valid statement detected")
+	return nil
+}
+
+// alertStmt parses an alert rule.
+//
+//		ALERT name IF expr [FOR duration]
+//			[LABELS label_set]
+//			[ANNOTATIONS label_set]
+//
+func (p *parser) alertStmt() *AlertStmt {
+	const ctx = "alert statement"
+
+	p.expect(itemAlert, ctx)
+	name := p.expect(itemIdentifier, ctx)
+	// Alerts require a vector typed expression.
+	p.expect(itemIf, ctx)
+	expr := p.expr()
+
+	// Optional for clause.
+	var (
+		duration time.Duration
+		err      error
+	)
+	if p.peek().typ == itemFor {
+		p.next()
+		dur := p.expect(itemDuration, ctx)
+		duration, err = parseDuration(dur.val)
+		if err != nil {
+			p.error(err)
+		}
+	}
+
+	var (
+		labels      = model.LabelSet{}
+		annotations = model.LabelSet{}
+	)
+	if p.peek().typ == itemLabels {
+		p.expect(itemLabels, ctx)
+		labels = p.labelSet()
+	}
+	if p.peek().typ == itemAnnotations {
+		p.expect(itemAnnotations, ctx)
+		annotations = p.labelSet()
+	}
+
+	return &AlertStmt{
+		Name:        name.val,
+		Expr:        expr,
+		Duration:    duration,
+		Labels:      labels,
+		Annotations: annotations,
+	}
+}
+
+// recordStmt parses a recording rule.
+func (p *parser) recordStmt() *RecordStmt {
+	const ctx = "record statement"
+
+	name := p.expectOneOf(itemIdentifier, itemMetricIdentifier, ctx).val
+
+	var lset model.LabelSet
+	if p.peek().typ == itemLeftBrace {
+		lset = p.labelSet()
+	}
+
+	p.expect(itemAssign, ctx)
+	expr := p.expr()
+
+	return &RecordStmt{
+		Name:   name,
+		Labels: lset,
+		Expr:   expr,
+	}
+}
+
+// expr parses any expression.
+func (p *parser) expr() Expr {
+	// Parse the starting expression.
+	expr := p.unaryExpr()
+
+	// Loop through the operations and construct a binary operation tree based
+	// on the operators' precedence.
+	for {
+		// If the next token is not an operator the expression is done.
+		op := p.peek().typ
+		if !op.isOperator() {
+			return expr
+		}
+		p.next() // Consume operator.
+
+		// Parse optional operator matching options. Its validity
+		// is checked in the type-checking stage.
+		vecMatching := &VectorMatching{
+			Card: CardOneToOne,
+		}
+		if op.isSetOperator() {
+			vecMatching.Card = CardManyToMany
+		}
+
+		returnBool := false
+		// Parse bool modifier.
+		if p.peek().typ == itemBool {
+			if !op.isComparisonOperator() {
+				p.errorf("bool modifier can only be used on comparison operators")
+			}
+			p.next()
+			returnBool = true
+		}
+
+		// Parse ON/IGNORING clause.
+		if p.peek().typ == itemOn || p.peek().typ == itemIgnoring {
+			if p.peek().typ == itemOn {
+				vecMatching.On = true
+			}
+			p.next()
+			vecMatching.MatchingLabels = p.labels()
+
+			// Parse grouping.
+			if t := p.peek().typ; t == itemGroupLeft || t == itemGroupRight {
+				p.next()
+				if t == itemGroupLeft {
+					vecMatching.Card = CardManyToOne
+				} else {
+					vecMatching.Card = CardOneToMany
+				}
+				if p.peek().typ == itemLeftParen {
+					vecMatching.Include = p.labels()
+				}
+			}
+		}
+
+		for _, ln := range vecMatching.MatchingLabels {
+			for _, ln2 := range vecMatching.Include {
+				if ln == ln2 && vecMatching.On {
+					p.errorf("label %q must not occur in ON and GROUP clause at once", ln)
+				}
+			}
+		}
+
+		// Parse the next operand.
+		rhs := p.unaryExpr()
+
+		// Assign the new root based on the precedence of the LHS and RHS operators.
+		expr = p.balance(expr, op, rhs, vecMatching, returnBool)
+	}
+}
+
+func (p *parser) balance(lhs Expr, op itemType, rhs Expr, vecMatching *VectorMatching, returnBool bool) *BinaryExpr {
+	if lhsBE, ok := lhs.(*BinaryExpr); ok {
+		precd := lhsBE.Op.precedence() - op.precedence()
+		if (precd < 0) || (precd == 0 && op.isRightAssociative()) {
+			balanced := p.balance(lhsBE.RHS, op, rhs, vecMatching, returnBool)
+			if lhsBE.Op.isComparisonOperator() && !lhsBE.ReturnBool && balanced.Type() == model.ValScalar && lhsBE.LHS.Type() == model.ValScalar {
+				p.errorf("comparisons between scalars must use BOOL modifier")
+			}
+			return &BinaryExpr{
+				Op:             lhsBE.Op,
+				LHS:            lhsBE.LHS,
+				RHS:            balanced,
+				VectorMatching: lhsBE.VectorMatching,
+				ReturnBool:     lhsBE.ReturnBool,
+			}
+		}
+	}
+	if op.isComparisonOperator() && !returnBool && rhs.Type() == model.ValScalar && lhs.Type() == model.ValScalar {
+		p.errorf("comparisons between scalars must use BOOL modifier")
+	}
+	return &BinaryExpr{
+		Op:             op,
+		LHS:            lhs,
+		RHS:            rhs,
+		VectorMatching: vecMatching,
+		ReturnBool:     returnBool,
+	}
+}
+
+// unaryExpr parses a unary expression.
+//
+//		<vector_selector> | <matrix_selector> | (+|-) <number_literal> | '(' <expr> ')'
+//
+func (p *parser) unaryExpr() Expr {
+	switch t := p.peek(); t.typ {
+	case itemADD, itemSUB:
+		p.next()
+		e := p.unaryExpr()
+
+		// Simplify unary expressions for number literals.
+		if nl, ok := e.(*NumberLiteral); ok {
+			if t.typ == itemSUB {
+				nl.Val *= -1
+			}
+			return nl
+		}
+		return &UnaryExpr{Op: t.typ, Expr: e}
+
+	case itemLeftParen:
+		p.next()
+		e := p.expr()
+		p.expect(itemRightParen, "paren expression")
+
+		return &ParenExpr{Expr: e}
+	}
+	e := p.primaryExpr()
+
+	// Expression might be followed by a range selector.
+	if p.peek().typ == itemLeftBracket {
+		vs, ok := e.(*VectorSelector)
+		if !ok {
+			p.errorf("range specification must be preceded by a metric selector, but follows a %T instead", e)
+		}
+		e = p.rangeSelector(vs)
+	}
+
+	// Parse optional offset.
+	if p.peek().typ == itemOffset {
+		offset := p.offset()
+
+		switch s := e.(type) {
+		case *VectorSelector:
+			s.Offset = offset
+		case *MatrixSelector:
+			s.Offset = offset
+		default:
+			p.errorf("offset modifier must be preceded by an instant or range selector, but follows a %T instead", e)
+		}
+	}
+
+	return e
+}
+
+// rangeSelector parses a matrix (a.k.a. range) selector based on a given
+// vector selector.
+//
+//		<vector_selector> '[' <duration> ']'
+//
+func (p *parser) rangeSelector(vs *VectorSelector) *MatrixSelector {
+	const ctx = "range selector"
+	p.next()
+
+	var erange time.Duration
+	var err error
+
+	erangeStr := p.expect(itemDuration, ctx).val
+	erange, err = parseDuration(erangeStr)
+	if err != nil {
+		p.error(err)
+	}
+
+	p.expect(itemRightBracket, ctx)
+
+	e := &MatrixSelector{
+		Name:          vs.Name,
+		LabelMatchers: vs.LabelMatchers,
+		Range:         erange,
+	}
+	return e
+}
+
+// number parses a number.
+func (p *parser) number(val string) float64 {
+	n, err := strconv.ParseInt(val, 0, 64)
+	f := float64(n)
+	if err != nil {
+		f, err = strconv.ParseFloat(val, 64)
+	}
+	if err != nil {
+		p.errorf("error parsing number: %s", err)
+	}
+	return f
+}
+
+// primaryExpr parses a primary expression.
+//
+//		<metric_name> | <function_call> | <vector_aggregation> | <literal>
+//
+func (p *parser) primaryExpr() Expr {
+	switch t := p.next(); {
+	case t.typ == itemNumber:
+		f := p.number(t.val)
+		return &NumberLiteral{model.SampleValue(f)}
+
+	case t.typ == itemString:
+		return &StringLiteral{p.unquoteString(t.val)}
+
+	case t.typ == itemLeftBrace:
+		// Metric selector without metric name.
+		p.backup()
+		return p.vectorSelector("")
+
+	case t.typ == itemIdentifier:
+		// Check for function call.
+		if p.peek().typ == itemLeftParen {
+			return p.call(t.val)
+		}
+		fallthrough // Else metric selector.
+
+	case t.typ == itemMetricIdentifier:
+		return p.vectorSelector(t.val)
+
+	case t.typ.isAggregator():
+		p.backup()
+		return p.aggrExpr()
+
+	default:
+		p.errorf("no valid expression found")
+	}
+	return nil
+}
+
+// labels parses a list of labelnames.
+//
+//		'(' <label_name>, ... ')'
+//
+func (p *parser) labels() model.LabelNames {
+	const ctx = "grouping opts"
+
+	p.expect(itemLeftParen, ctx)
+
+	labels := model.LabelNames{}
+	if p.peek().typ != itemRightParen {
+		for {
+			id := p.next()
+			if !isLabel(id.val) {
+				p.errorf("unexpected %s in %s, expected label", id.desc(), ctx)
+			}
+			labels = append(labels, model.LabelName(id.val))
+
+			if p.peek().typ != itemComma {
+				break
+			}
+			p.next()
+		}
+	}
+	p.expect(itemRightParen, ctx)
+
+	return labels
+}
+
+// aggrExpr parses an aggregation expression.
+//
+//		<aggr_op> (<vector_expr>) [by <labels>] [keep_common]
+//		<aggr_op> [by <labels>] [keep_common] (<vector_expr>)
+//
+func (p *parser) aggrExpr() *AggregateExpr {
+	const ctx = "aggregation"
+
+	agop := p.next()
+	if !agop.typ.isAggregator() {
+		p.errorf("expected aggregation operator but got %s", agop)
+	}
+	var grouping model.LabelNames
+	var keepCommon, without bool
+
+	modifiersFirst := false
+
+	if t := p.peek().typ; t == itemBy || t == itemWithout {
+		if t == itemWithout {
+			without = true
+		}
+		p.next()
+		grouping = p.labels()
+		modifiersFirst = true
+	}
+	if p.peek().typ == itemKeepCommon {
+		p.next()
+		keepCommon = true
+		modifiersFirst = true
+	}
+
+	p.expect(itemLeftParen, ctx)
+	var param Expr
+	if agop.typ.isAggregatorWithParam() {
+		param = p.expr()
+		p.expect(itemComma, ctx)
+	}
+	e := p.expr()
+	p.expect(itemRightParen, ctx)
+
+	if !modifiersFirst {
+		if t := p.peek().typ; t == itemBy || t == itemWithout {
+			if len(grouping) > 0 {
+				p.errorf("aggregation must only contain one grouping clause")
+			}
+			if t == itemWithout {
+				without = true
+			}
+			p.next()
+			grouping = p.labels()
+		}
+		if p.peek().typ == itemKeepCommon {
+			p.next()
+			keepCommon = true
+		}
+	}
+
+	if keepCommon && without {
+		p.errorf("cannot use 'keep_common' with 'without'")
+	}
+
+	return &AggregateExpr{
+		Op:               agop.typ,
+		Expr:             e,
+		Param:            param,
+		Grouping:         grouping,
+		Without:          without,
+		KeepCommonLabels: keepCommon,
+	}
+}
+
+// call parses a function call.
+//
+//		<func_name> '(' [ <arg_expr>, ...] ')'
+//
+func (p *parser) call(name string) *Call {
+	const ctx = "function call"
+
+	fn, exist := getFunction(name)
+	if !exist {
+		p.errorf("unknown function with name %q", name)
+	}
+
+	p.expect(itemLeftParen, ctx)
+	// Might be call without args.
+	if p.peek().typ == itemRightParen {
+		p.next() // Consume.
+		return &Call{fn, nil}
+	}
+
+	var args []Expr
+	for {
+		e := p.expr()
+		args = append(args, e)
+
+		// Terminate if no more arguments.
+		if p.peek().typ != itemComma {
+			break
+		}
+		p.next()
+	}
+
+	// Call must be closed.
+	p.expect(itemRightParen, ctx)
+
+	return &Call{Func: fn, Args: args}
+}
+
+// labelSet parses a set of label matchers
+//
+//		'{' [ <labelname> '=' <match_string>, ... ] '}'
+//
+func (p *parser) labelSet() model.LabelSet {
+	set := model.LabelSet{}
+	for _, lm := range p.labelMatchers(itemEQL) {
+		set[lm.Name] = lm.Value
+	}
+	return set
+}
+
+// labelMatchers parses a set of label matchers.
+//
+//		'{' [ <labelname> <match_op> <match_string>, ... ] '}'
+//
+func (p *parser) labelMatchers(operators ...itemType) metric.LabelMatchers {
+	const ctx = "label matching"
+
+	matchers := metric.LabelMatchers{}
+
+	p.expect(itemLeftBrace, ctx)
+
+	// Check if no matchers are provided.
+	if p.peek().typ == itemRightBrace {
+		p.next()
+		return matchers
+	}
+
+	for {
+		label := p.expect(itemIdentifier, ctx)
+
+		op := p.next().typ
+		if !op.isOperator() {
+			p.errorf("expected label matching operator but got %s", op)
+		}
+		var validOp = false
+		for _, allowedOp := range operators {
+			if op == allowedOp {
+				validOp = true
+			}
+		}
+		if !validOp {
+			p.errorf("operator must be one of %q, is %q", operators, op)
+		}
+
+		val := p.unquoteString(p.expect(itemString, ctx).val)
+
+		// Map the item to the respective match type.
+		var matchType metric.MatchType
+		switch op {
+		case itemEQL:
+			matchType = metric.Equal
+		case itemNEQ:
+			matchType = metric.NotEqual
+		case itemEQLRegex:
+			matchType = metric.RegexMatch
+		case itemNEQRegex:
+			matchType = metric.RegexNoMatch
+		default:
+			p.errorf("item %q is not a metric match type", op)
+		}
+
+		m, err := metric.NewLabelMatcher(
+			matchType,
+			model.LabelName(label.val),
+			model.LabelValue(val),
+		)
+		if err != nil {
+			p.error(err)
+		}
+
+		matchers = append(matchers, m)
+
+		if p.peek().typ == itemIdentifier {
+			p.errorf("missing comma before next identifier %q", p.peek().val)
+		}
+
+		// Terminate list if last matcher.
+		if p.peek().typ != itemComma {
+			break
+		}
+		p.next()
+
+		// Allow comma after each item in a multi-line listing.
+		if p.peek().typ == itemRightBrace {
+			break
+		}
+	}
+
+	p.expect(itemRightBrace, ctx)
+
+	return matchers
+}
+
+// metric parses a metric.
+//
+//		<label_set>
+//		<metric_identifier> [<label_set>]
+//
+func (p *parser) metric() model.Metric {
+	name := ""
+	m := model.Metric{}
+
+	t := p.peek().typ
+	if t == itemIdentifier || t == itemMetricIdentifier {
+		name = p.next().val
+		t = p.peek().typ
+	}
+	if t != itemLeftBrace && name == "" {
+		p.errorf("missing metric name or metric selector")
+	}
+	if t == itemLeftBrace {
+		m = model.Metric(p.labelSet())
+	}
+	if name != "" {
+		m[model.MetricNameLabel] = model.LabelValue(name)
+	}
+	return m
+}
+
+// offset parses an offset modifier.
+//
+//		offset <duration>
+//
+func (p *parser) offset() time.Duration {
+	const ctx = "offset"
+
+	p.next()
+	offi := p.expect(itemDuration, ctx)
+
+	offset, err := parseDuration(offi.val)
+	if err != nil {
+		p.error(err)
+	}
+
+	return offset
+}
+
+// vectorSelector parses a new (instant) vector selector.
+//
+//		<metric_identifier> [<label_matchers>]
+//		[<metric_identifier>] <label_matchers>
+//
+func (p *parser) vectorSelector(name string) *VectorSelector {
+	var matchers metric.LabelMatchers
+	// Parse label matching if any.
+	if t := p.peek(); t.typ == itemLeftBrace {
+		matchers = p.labelMatchers(itemEQL, itemNEQ, itemEQLRegex, itemNEQRegex)
+	}
+	// Metric name must not be set in the label matchers and before at the same time.
+	if name != "" {
+		for _, m := range matchers {
+			if m.Name == model.MetricNameLabel {
+				p.errorf("metric name must not be set twice: %q or %q", name, m.Value)
+			}
+		}
+		// Set name label matching.
+		m, err := metric.NewLabelMatcher(metric.Equal, model.MetricNameLabel, model.LabelValue(name))
+		if err != nil {
+			panic(err) // Must not happen with metric.Equal.
+		}
+		matchers = append(matchers, m)
+	}
+
+	if len(matchers) == 0 {
+		p.errorf("vector selector must contain label matchers or metric name")
+	}
+	// A vector selector must contain at least one non-empty matcher to prevent
+	// implicit selection of all metrics (e.g. by a typo).
+	notEmpty := false
+	for _, lm := range matchers {
+		if !lm.MatchesEmptyString() {
+			notEmpty = true
+			break
+		}
+	}
+	if !notEmpty {
+		p.errorf("vector selector must contain at least one non-empty matcher")
+	}
+
+	return &VectorSelector{
+		Name:          name,
+		LabelMatchers: matchers,
+	}
+}
+
+// expectType checks the type of the node and raises an error if it
+// is not of the expected type.
+func (p *parser) expectType(node Node, want model.ValueType, context string) {
+	t := p.checkType(node)
+	if t != want {
+		p.errorf("expected type %s in %s, got %s", documentedType(want), context, documentedType(t))
+	}
+}
+
+// check the types of the children of each node and raise an error
+// if they do not form a valid node.
+//
+// Some of these checks are redundant as the the parsing stage does not allow
+// them, but the costs are small and might reveal errors when making changes.
+func (p *parser) checkType(node Node) (typ model.ValueType) {
+	// For expressions the type is determined by their Type function.
+	// Statements and lists do not have a type but are not invalid either.
+	switch n := node.(type) {
+	case Statements, Expressions, Statement:
+		typ = model.ValNone
+	case Expr:
+		typ = n.Type()
+	default:
+		p.errorf("unknown node type: %T", node)
+	}
+
+	// Recursively check correct typing for child nodes and raise
+	// errors in case of bad typing.
+	switch n := node.(type) {
+	case Statements:
+		for _, s := range n {
+			p.expectType(s, model.ValNone, "statement list")
+		}
+	case *AlertStmt:
+		p.expectType(n.Expr, model.ValVector, "alert statement")
+
+	case *EvalStmt:
+		ty := p.checkType(n.Expr)
+		if ty == model.ValNone {
+			p.errorf("evaluation statement must have a valid expression type but got %s", documentedType(ty))
+		}
+
+	case *RecordStmt:
+		ty := p.checkType(n.Expr)
+		if ty != model.ValVector && ty != model.ValScalar {
+			p.errorf("record statement must have a valid expression of type instant vector or scalar but got %s", documentedType(ty))
+		}
+
+	case Expressions:
+		for _, e := range n {
+			ty := p.checkType(e)
+			if ty == model.ValNone {
+				p.errorf("expression must have a valid expression type but got %s", documentedType(ty))
+			}
+		}
+	case *AggregateExpr:
+		if !n.Op.isAggregator() {
+			p.errorf("aggregation operator expected in aggregation expression but got %q", n.Op)
+		}
+		p.expectType(n.Expr, model.ValVector, "aggregation expression")
+		if n.Op == itemTopK || n.Op == itemBottomK || n.Op == itemQuantile {
+			p.expectType(n.Param, model.ValScalar, "aggregation parameter")
+		}
+		if n.Op == itemCountValues {
+			p.expectType(n.Param, model.ValString, "aggregation parameter")
+		}
+
+	case *BinaryExpr:
+		lt := p.checkType(n.LHS)
+		rt := p.checkType(n.RHS)
+
+		if !n.Op.isOperator() {
+			p.errorf("binary expression does not support operator %q", n.Op)
+		}
+		if (lt != model.ValScalar && lt != model.ValVector) || (rt != model.ValScalar && rt != model.ValVector) {
+			p.errorf("binary expression must contain only scalar and instant vector types")
+		}
+
+		if (lt != model.ValVector || rt != model.ValVector) && n.VectorMatching != nil {
+			if len(n.VectorMatching.MatchingLabels) > 0 {
+				p.errorf("vector matching only allowed between instant vectors")
+			}
+			n.VectorMatching = nil
+		} else {
+			// Both operands are vectors.
+			if n.Op.isSetOperator() {
+				if n.VectorMatching.Card == CardOneToMany || n.VectorMatching.Card == CardManyToOne {
+					p.errorf("no grouping allowed for %q operation", n.Op)
+				}
+				if n.VectorMatching.Card != CardManyToMany {
+					p.errorf("set operations must always be many-to-many")
+				}
+			}
+		}
+
+		if (lt == model.ValScalar || rt == model.ValScalar) && n.Op.isSetOperator() {
+			p.errorf("set operator %q not allowed in binary scalar expression", n.Op)
+		}
+
+	case *Call:
+		nargs := len(n.Func.ArgTypes)
+		if n.Func.Variadic == 0 {
+			if nargs != len(n.Args) {
+				p.errorf("expected %d argument(s) in call to %q, got %d", nargs, n.Func.Name, len(n.Args))
+			}
+		} else {
+			na := nargs - 1
+			if na > len(n.Args) {
+				p.errorf("expected at least %d argument(s) in call to %q, got %d", na, n.Func.Name, len(n.Args))
+			} else if nargsmax := na + n.Func.Variadic; n.Func.Variadic > 0 && nargsmax < len(n.Args) {
+				p.errorf("expected at most %d argument(s) in call to %q, got %d", nargsmax, n.Func.Name, len(n.Args))
+			}
+		}
+
+		for i, arg := range n.Args {
+			if i >= len(n.Func.ArgTypes) {
+				i = len(n.Func.ArgTypes) - 1
+			}
+			p.expectType(arg, n.Func.ArgTypes[i], fmt.Sprintf("call to function %q", n.Func.Name))
+		}
+
+	case *ParenExpr:
+		p.checkType(n.Expr)
+
+	case *UnaryExpr:
+		if n.Op != itemADD && n.Op != itemSUB {
+			p.errorf("only + and - operators allowed for unary expressions")
+		}
+		if t := p.checkType(n.Expr); t != model.ValScalar && t != model.ValVector {
+			p.errorf("unary expression only allowed on expressions of type scalar or instant vector, got %q", documentedType(t))
+		}
+
+	case *NumberLiteral, *MatrixSelector, *StringLiteral, *VectorSelector:
+		// Nothing to do for terminals.
+
+	default:
+		p.errorf("unknown node type: %T", node)
+	}
+	return
+}
+
+func (p *parser) unquoteString(s string) string {
+	unquoted, err := strutil.Unquote(s)
+	if err != nil {
+		p.errorf("error unquoting string %q: %s", s, err)
+	}
+	return unquoted
+}
+
+func parseDuration(ds string) (time.Duration, error) {
+	dur, err := model.ParseDuration(ds)
+	if err != nil {
+		return 0, err
+	}
+	if dur == 0 {
+		return 0, fmt.Errorf("duration must be greater than 0")
+	}
+	return time.Duration(dur), nil
+}
diff --git a/vendor/github.com/prometheus/prometheus/promql/printer.go b/vendor/github.com/prometheus/prometheus/promql/printer.go
new file mode 100644
index 000000000..40ca02e65
--- /dev/null
+++ b/vendor/github.com/prometheus/prometheus/promql/printer.go
@@ -0,0 +1,236 @@
+// Copyright 2015 The Prometheus Authors
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package promql
+
+import (
+	"fmt"
+	"sort"
+	"strings"
+	"time"
+
+	"github.com/prometheus/common/model"
+
+	"github.com/prometheus/prometheus/storage/metric"
+)
+
+// Tree returns a string of the tree structure of the given node.
+func Tree(node Node) string {
+	return tree(node, "")
+}
+
+func tree(node Node, level string) string {
+	if node == nil {
+		return fmt.Sprintf("%s |---- %T\n", level, node)
+	}
+	typs := strings.Split(fmt.Sprintf("%T", node), ".")[1]
+
+	var t string
+	// Only print the number of statements for readability.
+	if stmts, ok := node.(Statements); ok {
+		t = fmt.Sprintf("%s |---- %s :: %d\n", level, typs, len(stmts))
+	} else {
+		t = fmt.Sprintf("%s |---- %s :: %s\n", level, typs, node)
+	}
+
+	level += " · · ·"
+
+	switch n := node.(type) {
+	case Statements:
+		for _, s := range n {
+			t += tree(s, level)
+		}
+	case *AlertStmt:
+		t += tree(n.Expr, level)
+
+	case *EvalStmt:
+		t += tree(n.Expr, level)
+
+	case *RecordStmt:
+		t += tree(n.Expr, level)
+
+	case Expressions:
+		for _, e := range n {
+			t += tree(e, level)
+		}
+	case *AggregateExpr:
+		t += tree(n.Expr, level)
+
+	case *BinaryExpr:
+		t += tree(n.LHS, level)
+		t += tree(n.RHS, level)
+
+	case *Call:
+		t += tree(n.Args, level)
+
+	case *ParenExpr:
+		t += tree(n.Expr, level)
+
+	case *UnaryExpr:
+		t += tree(n.Expr, level)
+
+	case *MatrixSelector, *NumberLiteral, *StringLiteral, *VectorSelector:
+		// nothing to do
+
+	default:
+		panic("promql.Tree: not all node types covered")
+	}
+	return t
+}
+
+func (stmts Statements) String() (s string) {
+	if len(stmts) == 0 {
+		return ""
+	}
+	for _, stmt := range stmts {
+		s += stmt.String()
+		s += "\n\n"
+	}
+	return s[:len(s)-2]
+}
+
+func (node *AlertStmt) String() string {
+	s := fmt.Sprintf("ALERT %s", node.Name)
+	s += fmt.Sprintf("\n\tIF %s", node.Expr)
+	if node.Duration > 0 {
+		s += fmt.Sprintf("\n\tFOR %s", model.Duration(node.Duration))
+	}
+	if len(node.Labels) > 0 {
+		s += fmt.Sprintf("\n\tLABELS %s", node.Labels)
+	}
+	if len(node.Annotations) > 0 {
+		s += fmt.Sprintf("\n\tANNOTATIONS %s", node.Annotations)
+	}
+	return s
+}
+
+func (node *EvalStmt) String() string {
+	return "EVAL " + node.Expr.String()
+}
+
+func (node *RecordStmt) String() string {
+	s := fmt.Sprintf("%s%s = %s", node.Name, node.Labels, node.Expr)
+	return s
+}
+
+func (es Expressions) String() (s string) {
+	if len(es) == 0 {
+		return ""
+	}
+	for _, e := range es {
+		s += e.String()
+		s += ", "
+	}
+	return s[:len(s)-2]
+}
+
+func (node *AggregateExpr) String() string {
+	aggrString := fmt.Sprintf("%s(", node.Op)
+	if node.Op.isAggregatorWithParam() {
+		aggrString += fmt.Sprintf("%s, ", node.Param)
+	}
+	aggrString += fmt.Sprintf("%s)", node.Expr)
+	if len(node.Grouping) > 0 {
+		var format string
+		if node.Without {
+			format = "%s WITHOUT (%s)"
+		} else {
+			format = "%s BY (%s)"
+		}
+		aggrString = fmt.Sprintf(format, aggrString, node.Grouping)
+	}
+	if node.KeepCommonLabels {
+		aggrString += " KEEP_COMMON"
+	}
+	return aggrString
+}
+
+func (node *BinaryExpr) String() string {
+	returnBool := ""
+	if node.ReturnBool {
+		returnBool = " BOOL"
+	}
+
+	matching := ""
+	vm := node.VectorMatching
+	if vm != nil && (len(vm.MatchingLabels) > 0 || vm.On) {
+		if vm.On {
+			matching = fmt.Sprintf(" ON(%s)", vm.MatchingLabels)
+		} else {
+			matching = fmt.Sprintf(" IGNORING(%s)", vm.MatchingLabels)
+		}
+		if vm.Card == CardManyToOne || vm.Card == CardOneToMany {
+			matching += " GROUP_"
+			if vm.Card == CardManyToOne {
+				matching += "LEFT"
+			} else {
+				matching += "RIGHT"
+			}
+			matching += fmt.Sprintf("(%s)", vm.Include)
+		}
+	}
+	return fmt.Sprintf("%s %s%s%s %s", node.LHS, node.Op, returnBool, matching, node.RHS)
+}
+
+func (node *Call) String() string {
+	return fmt.Sprintf("%s(%s)", node.Func.Name, node.Args)
+}
+
+func (node *MatrixSelector) String() string {
+	vecSelector := &VectorSelector{
+		Name:          node.Name,
+		LabelMatchers: node.LabelMatchers,
+	}
+	offset := ""
+	if node.Offset != time.Duration(0) {
+		offset = fmt.Sprintf(" OFFSET %s", model.Duration(node.Offset))
+	}
+	return fmt.Sprintf("%s[%s]%s", vecSelector.String(), model.Duration(node.Range), offset)
+}
+
+func (node *NumberLiteral) String() string {
+	return fmt.Sprint(node.Val)
+}
+
+func (node *ParenExpr) String() string {
+	return fmt.Sprintf("(%s)", node.Expr)
+}
+
+func (node *StringLiteral) String() string {
+	return fmt.Sprintf("%q", node.Val)
+}
+
+func (node *UnaryExpr) String() string {
+	return fmt.Sprintf("%s%s", node.Op, node.Expr)
+}
+
+func (node *VectorSelector) String() string {
+	labelStrings := make([]string, 0, len(node.LabelMatchers)-1)
+	for _, matcher := range node.LabelMatchers {
+		// Only include the __name__ label if its no equality matching.
+		if matcher.Name == model.MetricNameLabel && matcher.Type == metric.Equal {
+			continue
+		}
+		labelStrings = append(labelStrings, matcher.String())
+	}
+	offset := ""
+	if node.Offset != time.Duration(0) {
+		offset = fmt.Sprintf(" OFFSET %s", model.Duration(node.Offset))
+	}
+
+	if len(labelStrings) == 0 {
+		return fmt.Sprintf("%s%s", node.Name, offset)
+	}
+	sort.Strings(labelStrings)
+	return fmt.Sprintf("%s{%s}%s", node.Name, strings.Join(labelStrings, ","), offset)
+}
diff --git a/vendor/github.com/prometheus/prometheus/promql/quantile.go b/vendor/github.com/prometheus/prometheus/promql/quantile.go
new file mode 100644
index 000000000..4250ec388
--- /dev/null
+++ b/vendor/github.com/prometheus/prometheus/promql/quantile.go
@@ -0,0 +1,185 @@
+// Copyright 2015 The Prometheus Authors
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package promql
+
+import (
+	"math"
+	"sort"
+
+	"github.com/prometheus/common/model"
+
+	"github.com/prometheus/prometheus/storage/metric"
+)
+
+// Helpers to calculate quantiles.
+
+// excludedLabels are the labels to exclude from signature calculation for
+// quantiles.
+var excludedLabels = map[model.LabelName]struct{}{
+	model.MetricNameLabel: {},
+	model.BucketLabel:     {},
+}
+
+type bucket struct {
+	upperBound float64
+	count      model.SampleValue
+}
+
+// buckets implements sort.Interface.
+type buckets []bucket
+
+func (b buckets) Len() int           { return len(b) }
+func (b buckets) Swap(i, j int)      { b[i], b[j] = b[j], b[i] }
+func (b buckets) Less(i, j int) bool { return b[i].upperBound < b[j].upperBound }
+
+type metricWithBuckets struct {
+	metric  metric.Metric
+	buckets buckets
+}
+
+// bucketQuantile calculates the quantile 'q' based on the given buckets. The
+// buckets will be sorted by upperBound by this function (i.e. no sorting
+// needed before calling this function). The quantile value is interpolated
+// assuming a linear distribution within a bucket. However, if the quantile
+// falls into the highest bucket, the upper bound of the 2nd highest bucket is
+// returned. A natural lower bound of 0 is assumed if the upper bound of the
+// lowest bucket is greater 0. In that case, interpolation in the lowest bucket
+// happens linearly between 0 and the upper bound of the lowest bucket.
+// However, if the lowest bucket has an upper bound less or equal 0, this upper
+// bound is returned if the quantile falls into the lowest bucket.
+//
+// There are a number of special cases (once we have a way to report errors
+// happening during evaluations of AST functions, we should report those
+// explicitly):
+//
+// If 'buckets' has fewer than 2 elements, NaN is returned.
+//
+// If the highest bucket is not +Inf, NaN is returned.
+//
+// If q<0, -Inf is returned.
+//
+// If q>1, +Inf is returned.
+func bucketQuantile(q model.SampleValue, buckets buckets) float64 {
+	if q < 0 {
+		return math.Inf(-1)
+	}
+	if q > 1 {
+		return math.Inf(+1)
+	}
+	if len(buckets) < 2 {
+		return math.NaN()
+	}
+	sort.Sort(buckets)
+	if !math.IsInf(buckets[len(buckets)-1].upperBound, +1) {
+		return math.NaN()
+	}
+
+	ensureMonotonic(buckets)
+
+	rank := q * buckets[len(buckets)-1].count
+	b := sort.Search(len(buckets)-1, func(i int) bool { return buckets[i].count >= rank })
+
+	if b == len(buckets)-1 {
+		return buckets[len(buckets)-2].upperBound
+	}
+	if b == 0 && buckets[0].upperBound <= 0 {
+		return buckets[0].upperBound
+	}
+	var (
+		bucketStart float64
+		bucketEnd   = buckets[b].upperBound
+		count       = buckets[b].count
+	)
+	if b > 0 {
+		bucketStart = buckets[b-1].upperBound
+		count -= buckets[b-1].count
+		rank -= buckets[b-1].count
+	}
+	return bucketStart + (bucketEnd-bucketStart)*float64(rank/count)
+}
+
+// The assumption that bucket counts increase monotonically with increasing
+// upperBound may be violated during:
+//
+//   * Recording rule evaluation of histogram_quantile, especially when rate()
+//      has been applied to the underlying bucket timeseries.
+//   * Evaluation of histogram_quantile computed over federated bucket
+//      timeseries, especially when rate() has been applied.
+//
+// This is because scraped data is not made available to rule evaluation or
+// federation atomically, so some buckets are computed with data from the
+// most recent scrapes, but the other buckets are missing data from the most
+// recent scrape.
+//
+// Monotonicity is usually guaranteed because if a bucket with upper bound
+// u1 has count c1, then any bucket with a higher upper bound u > u1 must
+// have counted all c1 observations and perhaps more, so that c  >= c1.
+//
+// Randomly interspersed partial sampling breaks that guarantee, and rate()
+// exacerbates it. Specifically, suppose bucket le=1000 has a count of 10 from
+// 4 samples but the bucket with le=2000 has a count of 7 from 3 samples. The
+// monotonicity is broken. It is exacerbated by rate() because under normal
+// operation, cumulative counting of buckets will cause the bucket counts to
+// diverge such that small differences from missing samples are not a problem.
+// rate() removes this divergence.)
+//
+// bucketQuantile depends on that monotonicity to do a binary search for the
+// bucket with the φ-quantile count, so breaking the monotonicity
+// guarantee causes bucketQuantile() to return undefined (nonsense) results.
+//
+// As a somewhat hacky solution until ingestion is atomic per scrape, we
+// calculate the "envelope" of the histogram buckets, essentially removing
+// any decreases in the count between successive buckets.
+
+func ensureMonotonic(buckets buckets) {
+	max := buckets[0].count
+	for i := range buckets[1:] {
+		switch {
+		case buckets[i].count > max:
+			max = buckets[i].count
+		case buckets[i].count < max:
+			buckets[i].count = max
+		}
+	}
+}
+
+// qauntile calculates the given quantile of a vector of samples.
+//
+// The vector will be sorted.
+// If 'values' has zero elements, NaN is returned.
+// If q<0, -Inf is returned.
+// If q>1, +Inf is returned.
+func quantile(q float64, values vectorByValueHeap) float64 {
+	if len(values) == 0 {
+		return math.NaN()
+	}
+	if q < 0 {
+		return math.Inf(-1)
+	}
+	if q > 1 {
+		return math.Inf(+1)
+	}
+	sort.Sort(values)
+
+	n := float64(len(values))
+	// When the quantile lies between two samples,
+	// we use a weighted average of the two samples.
+	rank := q * (n - 1)
+
+	lowerIndex := math.Max(0, math.Floor(rank))
+	upperIndex := math.Min(n-1, lowerIndex+1)
+
+	weight := rank - math.Floor(rank)
+	return float64(values[int(lowerIndex)].Value)*(1-weight) + float64(values[int(upperIndex)].Value)*weight
+}
diff --git a/vendor/github.com/prometheus/prometheus/promql/test.go b/vendor/github.com/prometheus/prometheus/promql/test.go
new file mode 100644
index 000000000..e65982713
--- /dev/null
+++ b/vendor/github.com/prometheus/prometheus/promql/test.go
@@ -0,0 +1,525 @@
+// Copyright 2015 The Prometheus Authors
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package promql
+
+import (
+	"fmt"
+	"io/ioutil"
+	"math"
+	"regexp"
+	"strconv"
+	"strings"
+	"time"
+
+	"github.com/prometheus/common/model"
+	"golang.org/x/net/context"
+
+	"github.com/prometheus/prometheus/storage"
+	"github.com/prometheus/prometheus/storage/local"
+	"github.com/prometheus/prometheus/util/testutil"
+)
+
+var (
+	minNormal = math.Float64frombits(0x0010000000000000) // The smallest positive normal value of type float64.
+
+	patSpace       = regexp.MustCompile("[\t ]+")
+	patLoad        = regexp.MustCompile(`^load\s+(.+?)$`)
+	patEvalInstant = regexp.MustCompile(`^eval(?:_(fail|ordered))?\s+instant\s+(?:at\s+(.+?))?\s+(.+)$`)
+)
+
+const (
+	testStartTime = model.Time(0)
+	epsilon       = 0.000001 // Relative error allowed for sample values.
+)
+
+// Test is a sequence of read and write commands that are run
+// against a test storage.
+type Test struct {
+	testutil.T
+
+	cmds []testCommand
+
+	storage      local.Storage
+	closeStorage func()
+	queryEngine  *Engine
+	context      context.Context
+	cancelCtx    context.CancelFunc
+}
+
+// NewTest returns an initialized empty Test.
+func NewTest(t testutil.T, input string) (*Test, error) {
+	test := &Test{
+		T:    t,
+		cmds: []testCommand{},
+	}
+	err := test.parse(input)
+	test.clear()
+
+	return test, err
+}
+
+func newTestFromFile(t testutil.T, filename string) (*Test, error) {
+	content, err := ioutil.ReadFile(filename)
+	if err != nil {
+		return nil, err
+	}
+	return NewTest(t, string(content))
+}
+
+// QueryEngine returns the test's query engine.
+func (t *Test) QueryEngine() *Engine {
+	return t.queryEngine
+}
+
+// Context returns the test's context.
+func (t *Test) Context() context.Context {
+	return t.context
+}
+
+// Storage returns the test's storage.
+func (t *Test) Storage() local.Storage {
+	return t.storage
+}
+
+func raise(line int, format string, v ...interface{}) error {
+	return &ParseErr{
+		Line: line + 1,
+		Err:  fmt.Errorf(format, v...),
+	}
+}
+
+func (t *Test) parseLoad(lines []string, i int) (int, *loadCmd, error) {
+	if !patLoad.MatchString(lines[i]) {
+		return i, nil, raise(i, "invalid load command. (load <step:duration>)")
+	}
+	parts := patLoad.FindStringSubmatch(lines[i])
+
+	gap, err := model.ParseDuration(parts[1])
+	if err != nil {
+		return i, nil, raise(i, "invalid step definition %q: %s", parts[1], err)
+	}
+	cmd := newLoadCmd(time.Duration(gap))
+	for i+1 < len(lines) {
+		i++
+		defLine := lines[i]
+		if len(defLine) == 0 {
+			i--
+			break
+		}
+		metric, vals, err := parseSeriesDesc(defLine)
+		if err != nil {
+			if perr, ok := err.(*ParseErr); ok {
+				perr.Line = i + 1
+			}
+			return i, nil, err
+		}
+		cmd.set(metric, vals...)
+	}
+	return i, cmd, nil
+}
+
+func (t *Test) parseEval(lines []string, i int) (int, *evalCmd, error) {
+	if !patEvalInstant.MatchString(lines[i]) {
+		return i, nil, raise(i, "invalid evaluation command. (eval[_fail|_ordered] instant [at <offset:duration>] <query>")
+	}
+	parts := patEvalInstant.FindStringSubmatch(lines[i])
+	var (
+		mod = parts[1]
+		at  = parts[2]
+		qry = parts[3]
+	)
+	expr, err := ParseExpr(qry)
+	if err != nil {
+		if perr, ok := err.(*ParseErr); ok {
+			perr.Line = i + 1
+			perr.Pos += strings.Index(lines[i], qry)
+		}
+		return i, nil, err
+	}
+
+	offset, err := model.ParseDuration(at)
+	if err != nil {
+		return i, nil, raise(i, "invalid step definition %q: %s", parts[1], err)
+	}
+	ts := testStartTime.Add(time.Duration(offset))
+
+	cmd := newEvalCmd(expr, ts, ts, 0)
+	switch mod {
+	case "ordered":
+		cmd.ordered = true
+	case "fail":
+		cmd.fail = true
+	}
+
+	for j := 1; i+1 < len(lines); j++ {
+		i++
+		defLine := lines[i]
+		if len(defLine) == 0 {
+			i--
+			break
+		}
+		if f, err := parseNumber(defLine); err == nil {
+			cmd.expect(0, nil, sequenceValue{value: model.SampleValue(f)})
+			break
+		}
+		metric, vals, err := parseSeriesDesc(defLine)
+		if err != nil {
+			if perr, ok := err.(*ParseErr); ok {
+				perr.Line = i + 1
+			}
+			return i, nil, err
+		}
+
+		// Currently, we are not expecting any matrices.
+		if len(vals) > 1 {
+			return i, nil, raise(i, "expecting multiple values in instant evaluation not allowed")
+		}
+		cmd.expect(j, metric, vals...)
+	}
+	return i, cmd, nil
+}
+
+// parse the given command sequence and appends it to the test.
+func (t *Test) parse(input string) error {
+	// Trim lines and remove comments.
+	lines := strings.Split(input, "\n")
+	for i, l := range lines {
+		l = strings.TrimSpace(l)
+		if strings.HasPrefix(l, "#") {
+			l = ""
+		}
+		lines[i] = l
+	}
+	var err error
+
+	// Scan for steps line by line.
+	for i := 0; i < len(lines); i++ {
+		l := lines[i]
+		if len(l) == 0 {
+			continue
+		}
+		var cmd testCommand
+
+		switch c := strings.ToLower(patSpace.Split(l, 2)[0]); {
+		case c == "clear":
+			cmd = &clearCmd{}
+		case c == "load":
+			i, cmd, err = t.parseLoad(lines, i)
+		case strings.HasPrefix(c, "eval"):
+			i, cmd, err = t.parseEval(lines, i)
+		default:
+			return raise(i, "invalid command %q", l)
+		}
+		if err != nil {
+			return err
+		}
+		t.cmds = append(t.cmds, cmd)
+	}
+	return nil
+}
+
+// testCommand is an interface that ensures that only the package internal
+// types can be a valid command for a test.
+type testCommand interface {
+	testCmd()
+}
+
+func (*clearCmd) testCmd() {}
+func (*loadCmd) testCmd()  {}
+func (*evalCmd) testCmd()  {}
+
+// loadCmd is a command that loads sequences of sample values for specific
+// metrics into the storage.
+type loadCmd struct {
+	gap     time.Duration
+	metrics map[model.Fingerprint]model.Metric
+	defs    map[model.Fingerprint][]model.SamplePair
+}
+
+func newLoadCmd(gap time.Duration) *loadCmd {
+	return &loadCmd{
+		gap:     gap,
+		metrics: map[model.Fingerprint]model.Metric{},
+		defs:    map[model.Fingerprint][]model.SamplePair{},
+	}
+}
+
+func (cmd loadCmd) String() string {
+	return "load"
+}
+
+// set a sequence of sample values for the given metric.
+func (cmd *loadCmd) set(m model.Metric, vals ...sequenceValue) {
+	fp := m.Fingerprint()
+
+	samples := make([]model.SamplePair, 0, len(vals))
+	ts := testStartTime
+	for _, v := range vals {
+		if !v.omitted {
+			samples = append(samples, model.SamplePair{
+				Timestamp: ts,
+				Value:     v.value,
+			})
+		}
+		ts = ts.Add(cmd.gap)
+	}
+	cmd.defs[fp] = samples
+	cmd.metrics[fp] = m
+}
+
+// append the defined time series to the storage.
+func (cmd *loadCmd) append(a storage.SampleAppender) {
+	for fp, samples := range cmd.defs {
+		met := cmd.metrics[fp]
+		for _, smpl := range samples {
+			s := &model.Sample{
+				Metric:    met,
+				Value:     smpl.Value,
+				Timestamp: smpl.Timestamp,
+			}
+			a.Append(s)
+		}
+	}
+}
+
+// evalCmd is a command that evaluates an expression for the given time (range)
+// and expects a specific result.
+type evalCmd struct {
+	expr       Expr
+	start, end model.Time
+	interval   time.Duration
+
+	instant       bool
+	fail, ordered bool
+
+	metrics  map[model.Fingerprint]model.Metric
+	expected map[model.Fingerprint]entry
+}
+
+type entry struct {
+	pos  int
+	vals []sequenceValue
+}
+
+func (e entry) String() string {
+	return fmt.Sprintf("%d: %s", e.pos, e.vals)
+}
+
+func newEvalCmd(expr Expr, start, end model.Time, interval time.Duration) *evalCmd {
+	return &evalCmd{
+		expr:     expr,
+		start:    start,
+		end:      end,
+		interval: interval,
+		instant:  start == end && interval == 0,
+
+		metrics:  map[model.Fingerprint]model.Metric{},
+		expected: map[model.Fingerprint]entry{},
+	}
+}
+
+func (ev *evalCmd) String() string {
+	return "eval"
+}
+
+// expect adds a new metric with a sequence of values to the set of expected
+// results for the query.
+func (ev *evalCmd) expect(pos int, m model.Metric, vals ...sequenceValue) {
+	if m == nil {
+		ev.expected[0] = entry{pos: pos, vals: vals}
+		return
+	}
+	fp := m.Fingerprint()
+	ev.metrics[fp] = m
+	ev.expected[fp] = entry{pos: pos, vals: vals}
+}
+
+// compareResult compares the result value with the defined expectation.
+func (ev *evalCmd) compareResult(result model.Value) error {
+	switch val := result.(type) {
+	case model.Matrix:
+		if ev.instant {
+			return fmt.Errorf("received range result on instant evaluation")
+		}
+		seen := map[model.Fingerprint]bool{}
+		for pos, v := range val {
+			fp := v.Metric.Fingerprint()
+			if _, ok := ev.metrics[fp]; !ok {
+				return fmt.Errorf("unexpected metric %s in result", v.Metric)
+			}
+			exp := ev.expected[fp]
+			if ev.ordered && exp.pos != pos+1 {
+				return fmt.Errorf("expected metric %s with %v at position %d but was at %d", v.Metric, exp.vals, exp.pos, pos+1)
+			}
+			for i, expVal := range exp.vals {
+				if !almostEqual(float64(expVal.value), float64(v.Values[i].Value)) {
+					return fmt.Errorf("expected %v for %s but got %v", expVal, v.Metric, v.Values)
+				}
+			}
+			seen[fp] = true
+		}
+		for fp, expVals := range ev.expected {
+			if !seen[fp] {
+				return fmt.Errorf("expected metric %s with %v not found", ev.metrics[fp], expVals)
+			}
+		}
+
+	case model.Vector:
+		if !ev.instant {
+			return fmt.Errorf("received instant result on range evaluation")
+		}
+		seen := map[model.Fingerprint]bool{}
+		for pos, v := range val {
+			fp := v.Metric.Fingerprint()
+			if _, ok := ev.metrics[fp]; !ok {
+				return fmt.Errorf("unexpected metric %s in result", v.Metric)
+			}
+			exp := ev.expected[fp]
+			if ev.ordered && exp.pos != pos+1 {
+				return fmt.Errorf("expected metric %s with %v at position %d but was at %d", v.Metric, exp.vals, exp.pos, pos+1)
+			}
+			if !almostEqual(float64(exp.vals[0].value), float64(v.Value)) {
+				return fmt.Errorf("expected %v for %s but got %v", exp.vals[0].value, v.Metric, v.Value)
+			}
+
+			seen[fp] = true
+		}
+		for fp, expVals := range ev.expected {
+			if !seen[fp] {
+				return fmt.Errorf("expected metric %s with %v not found", ev.metrics[fp], expVals)
+			}
+		}
+
+	case *model.Scalar:
+		if !almostEqual(float64(ev.expected[0].vals[0].value), float64(val.Value)) {
+			return fmt.Errorf("expected scalar %v but got %v", val.Value, ev.expected[0].vals[0].value)
+		}
+
+	default:
+		panic(fmt.Errorf("promql.Test.compareResult: unexpected result type %T", result))
+	}
+	return nil
+}
+
+// clearCmd is a command that wipes the test's storage state.
+type clearCmd struct{}
+
+func (cmd clearCmd) String() string {
+	return "clear"
+}
+
+// Run executes the command sequence of the test. Until the maximum error number
+// is reached, evaluation errors do not terminate execution.
+func (t *Test) Run() error {
+	for _, cmd := range t.cmds {
+		err := t.exec(cmd)
+		// TODO(fabxc): aggregate command errors, yield diffs for result
+		// comparison errors.
+		if err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+// exec processes a single step of the test.
+func (t *Test) exec(tc testCommand) error {
+	switch cmd := tc.(type) {
+	case *clearCmd:
+		t.clear()
+
+	case *loadCmd:
+		cmd.append(t.storage)
+		t.storage.WaitForIndexing()
+
+	case *evalCmd:
+		q := t.queryEngine.newQuery(cmd.expr, cmd.start, cmd.end, cmd.interval)
+		res := q.Exec(t.context)
+		if res.Err != nil {
+			if cmd.fail {
+				return nil
+			}
+			return fmt.Errorf("error evaluating query: %s", res.Err)
+		}
+		if res.Err == nil && cmd.fail {
+			return fmt.Errorf("expected error evaluating query but got none")
+		}
+
+		err := cmd.compareResult(res.Value)
+		if err != nil {
+			return fmt.Errorf("error in %s %s: %s", cmd, cmd.expr, err)
+		}
+
+	default:
+		panic("promql.Test.exec: unknown test command type")
+	}
+	return nil
+}
+
+// clear the current test storage of all inserted samples.
+func (t *Test) clear() {
+	if t.closeStorage != nil {
+		t.closeStorage()
+	}
+	if t.cancelCtx != nil {
+		t.cancelCtx()
+	}
+
+	var closer testutil.Closer
+	t.storage, closer = local.NewTestStorage(t, 2)
+
+	t.closeStorage = closer.Close
+	t.queryEngine = NewEngine(t.storage, nil)
+	t.context, t.cancelCtx = context.WithCancel(context.Background())
+}
+
+// Close closes resources associated with the Test.
+func (t *Test) Close() {
+	t.cancelCtx()
+	t.closeStorage()
+}
+
+// samplesAlmostEqual returns true if the two sample lines only differ by a
+// small relative error in their sample value.
+func almostEqual(a, b float64) bool {
+	// NaN has no equality but for testing we still want to know whether both values
+	// are NaN.
+	if math.IsNaN(a) && math.IsNaN(b) {
+		return true
+	}
+
+	// Cf. http://floating-point-gui.de/errors/comparison/
+	if a == b {
+		return true
+	}
+
+	diff := math.Abs(a - b)
+
+	if a == 0 || b == 0 || diff < minNormal {
+		return diff < epsilon*minNormal
+	}
+	return diff/(math.Abs(a)+math.Abs(b)) < epsilon
+}
+
+func parseNumber(s string) (float64, error) {
+	n, err := strconv.ParseInt(s, 0, 64)
+	f := float64(n)
+	if err != nil {
+		f, err = strconv.ParseFloat(s, 64)
+	}
+	if err != nil {
+		return 0, fmt.Errorf("error parsing number: %s", err)
+	}
+	return f, nil
+}
diff --git a/vendor/github.com/prometheus/prometheus/storage/local/chunk/chunk.go b/vendor/github.com/prometheus/prometheus/storage/local/chunk/chunk.go
new file mode 100644
index 000000000..19c36734b
--- /dev/null
+++ b/vendor/github.com/prometheus/prometheus/storage/local/chunk/chunk.go
@@ -0,0 +1,494 @@
+// Copyright 2014 The Prometheus Authors
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package chunk
+
+import (
+	"container/list"
+	"errors"
+	"fmt"
+	"io"
+	"sort"
+	"sync"
+	"sync/atomic"
+
+	"github.com/prometheus/common/model"
+
+	"github.com/prometheus/prometheus/storage/metric"
+)
+
+// ChunkLen is the length of a chunk in bytes.
+const ChunkLen = 1024
+
+// DefaultEncoding can be changed via a flag.
+var DefaultEncoding = DoubleDelta
+
+var (
+	errChunkBoundsExceeded = errors.New("attempted access outside of chunk boundaries")
+	errAddedToEvictedChunk = errors.New("attempted to add sample to evicted chunk")
+)
+
+// EvictRequest is a request to evict a chunk from memory.
+type EvictRequest struct {
+	Desc  *Desc
+	Evict bool
+}
+
+// Encoding defines which encoding we are using, delta, doubledelta, or varbit
+type Encoding byte
+
+// String implements flag.Value.
+func (e Encoding) String() string {
+	return fmt.Sprintf("%d", e)
+}
+
+// Set implements flag.Value.
+func (e *Encoding) Set(s string) error {
+	switch s {
+	case "0":
+		*e = Delta
+	case "1":
+		*e = DoubleDelta
+	case "2":
+		*e = Varbit
+	default:
+		return fmt.Errorf("invalid chunk encoding: %s", s)
+	}
+	return nil
+}
+
+const (
+	// Delta encoding
+	Delta Encoding = iota
+	// DoubleDelta encoding
+	DoubleDelta
+	// Varbit encoding
+	Varbit
+)
+
+// Desc contains meta-data for a chunk. Pay special attention to the
+// documented requirements for calling its methods concurrently (WRT pinning and
+// locking). The doc comments spell out the requirements for each method, but
+// here is an overview and general explanation:
+//
+// Everything that changes the pinning of the underlying chunk or deals with its
+// eviction is protected by a mutex. This affects the following methods: Pin,
+// Unpin, RefCount, IsEvicted, MaybeEvict. These methods can be called at any
+// time without further prerequisites.
+//
+// Another group of methods acts on (or sets) the underlying chunk. These
+// methods involve no locking. They may only be called if the caller has pinned
+// the chunk (to guarantee the chunk is not evicted concurrently). Also, the
+// caller must make sure nobody else will call these methods concurrently,
+// either by holding the sole reference to the Desc (usually during loading
+// or creation) or by locking the fingerprint of the series the Desc
+// belongs to. The affected methods are: Add, MaybePopulateLastTime, SetChunk.
+//
+// Finally, there are the special cases FirstTime and LastTime. LastTime requires
+// to have locked the fingerprint of the series but the chunk does not need to
+// be pinned. That's because the ChunkLastTime field in Desc gets populated
+// upon completion of the chunk (when it is still pinned, and which happens
+// while the series's fingerprint is locked). Once that has happened, calling
+// LastTime does not require the chunk to be loaded anymore. Before that has
+// happened, the chunk is pinned anyway. The ChunkFirstTime field in Desc
+// is populated upon creation of a Desc, so it is alway safe to call
+// FirstTime. The FirstTime method is arguably not needed and only there for
+// consistency with LastTime.
+type Desc struct {
+	sync.Mutex           // Protects pinning.
+	C              Chunk // nil if chunk is evicted.
+	rCnt           int
+	ChunkFirstTime model.Time // Populated at creation. Immutable.
+	ChunkLastTime  model.Time // Populated on closing of the chunk, model.Earliest if unset.
+
+	// EvictListElement is nil if the chunk is not in the evict list.
+	// EvictListElement is _not_ protected by the Desc mutex.
+	// It must only be touched by the evict list handler in MemorySeriesStorage.
+	EvictListElement *list.Element
+}
+
+// NewDesc creates a new Desc pointing to the provided chunk. The provided chunk
+// is assumed to be not persisted yet. Therefore, the refCount of the new
+// Desc is 1 (preventing eviction prior to persisting).
+func NewDesc(c Chunk, firstTime model.Time) *Desc {
+	Ops.WithLabelValues(CreateAndPin).Inc()
+	atomic.AddInt64(&NumMemChunks, 1)
+	NumMemDescs.Inc()
+	return &Desc{
+		C:              c,
+		rCnt:           1,
+		ChunkFirstTime: firstTime,
+		ChunkLastTime:  model.Earliest,
+	}
+}
+
+// Add adds a sample pair to the underlying chunk. For safe concurrent access,
+// The chunk must be pinned, and the caller must have locked the fingerprint of
+// the series.
+func (d *Desc) Add(s model.SamplePair) ([]Chunk, error) {
+	if d.C == nil {
+		return nil, errAddedToEvictedChunk
+	}
+	return d.C.Add(s)
+}
+
+// Pin increments the refCount by one. Upon increment from 0 to 1, this
+// Desc is removed from the evict list. To enable the latter, the
+// evictRequests channel has to be provided. This method can be called
+// concurrently at any time.
+func (d *Desc) Pin(evictRequests chan<- EvictRequest) {
+	d.Lock()
+	defer d.Unlock()
+
+	if d.rCnt == 0 {
+		// Remove ourselves from the evict list.
+		evictRequests <- EvictRequest{d, false}
+	}
+	d.rCnt++
+}
+
+// Unpin decrements the refCount by one. Upon decrement from 1 to 0, this
+// Desc is added to the evict list. To enable the latter, the evictRequests
+// channel has to be provided. This method can be called concurrently at any
+// time.
+func (d *Desc) Unpin(evictRequests chan<- EvictRequest) {
+	d.Lock()
+	defer d.Unlock()
+
+	if d.rCnt == 0 {
+		panic("cannot unpin already unpinned chunk")
+	}
+	d.rCnt--
+	if d.rCnt == 0 {
+		// Add ourselves to the back of the evict list.
+		evictRequests <- EvictRequest{d, true}
+	}
+}
+
+// RefCount returns the number of pins. This method can be called concurrently
+// at any time.
+func (d *Desc) RefCount() int {
+	d.Lock()
+	defer d.Unlock()
+
+	return d.rCnt
+}
+
+// FirstTime returns the timestamp of the first sample in the chunk. This method
+// can be called concurrently at any time. It only returns the immutable
+// d.ChunkFirstTime without any locking. Arguably, this method is
+// useless. However, it provides consistency with the LastTime method.
+func (d *Desc) FirstTime() model.Time {
+	return d.ChunkFirstTime
+}
+
+// LastTime returns the timestamp of the last sample in the chunk. For safe
+// concurrent access, this method requires the fingerprint of the time series to
+// be locked.
+func (d *Desc) LastTime() (model.Time, error) {
+	if d.ChunkLastTime != model.Earliest || d.C == nil {
+		return d.ChunkLastTime, nil
+	}
+	return d.C.NewIterator().LastTimestamp()
+}
+
+// MaybePopulateLastTime populates the ChunkLastTime from the underlying chunk
+// if it has not yet happened. Call this method directly after having added the
+// last sample to a chunk or after closing a head chunk due to age. For safe
+// concurrent access, the chunk must be pinned, and the caller must have locked
+// the fingerprint of the series.
+func (d *Desc) MaybePopulateLastTime() error {
+	if d.ChunkLastTime == model.Earliest && d.C != nil {
+		t, err := d.C.NewIterator().LastTimestamp()
+		if err != nil {
+			return err
+		}
+		d.ChunkLastTime = t
+	}
+	return nil
+}
+
+// IsEvicted returns whether the chunk is evicted. For safe concurrent access,
+// the caller must have locked the fingerprint of the series.
+func (d *Desc) IsEvicted() bool {
+	// Locking required here because we do not want the caller to force
+	// pinning the chunk first, so it could be evicted while this method is
+	// called.
+	d.Lock()
+	defer d.Unlock()
+
+	return d.C == nil
+}
+
+// SetChunk sets the underlying chunk. The caller must have locked the
+// fingerprint of the series and must have "pre-pinned" the chunk (i.e. first
+// call Pin and then set the chunk).
+func (d *Desc) SetChunk(c Chunk) {
+	if d.C != nil {
+		panic("chunk already set")
+	}
+	d.C = c
+}
+
+// MaybeEvict evicts the chunk if the refCount is 0. It returns whether the chunk
+// is now evicted, which includes the case that the chunk was evicted even
+// before this method was called. It can be called concurrently at any time.
+func (d *Desc) MaybeEvict() bool {
+	d.Lock()
+	defer d.Unlock()
+
+	if d.C == nil {
+		return true
+	}
+	if d.rCnt != 0 {
+		return false
+	}
+	if d.ChunkLastTime == model.Earliest {
+		// This must never happen.
+		panic("ChunkLastTime not populated for evicted chunk")
+	}
+	d.C = nil
+	Ops.WithLabelValues(Evict).Inc()
+	atomic.AddInt64(&NumMemChunks, -1)
+	return true
+}
+
+// Chunk is the interface for all chunks. Chunks are generally not
+// goroutine-safe.
+type Chunk interface {
+	// Add adds a SamplePair to the chunks, performs any necessary
+	// re-encoding, and adds any necessary overflow chunks. It returns the
+	// new version of the original chunk, followed by overflow chunks, if
+	// any. The first chunk returned might be the same as the original one
+	// or a newly allocated version. In any case, take the returned chunk as
+	// the relevant one and discard the original chunk.
+	Add(sample model.SamplePair) ([]Chunk, error)
+	Clone() Chunk
+	FirstTime() model.Time
+	NewIterator() Iterator
+	Marshal(io.Writer) error
+	MarshalToBuf([]byte) error
+	Unmarshal(io.Reader) error
+	UnmarshalFromBuf([]byte) error
+	Encoding() Encoding
+	Utilization() float64
+
+	// Len returns the number of samples in the chunk.  Implementations may be
+	// expensive.
+	Len() int
+}
+
+// Iterator enables efficient access to the content of a chunk. It is
+// generally not safe to use an Iterator concurrently with or after chunk
+// mutation.
+type Iterator interface {
+	// Gets the last timestamp in the chunk.
+	LastTimestamp() (model.Time, error)
+	// Whether a given timestamp is contained between first and last value
+	// in the chunk.
+	Contains(model.Time) (bool, error)
+	// Scans the next value in the chunk. Directly after the iterator has
+	// been created, the next value is the first value in the
+	// chunk. Otherwise, it is the value following the last value scanned or
+	// found (by one of the Find... methods). Returns false if either the
+	// end of the chunk is reached or an error has occurred.
+	Scan() bool
+	// Finds the most recent value at or before the provided time. Returns
+	// false if either the chunk contains no value at or before the provided
+	// time, or an error has occurred.
+	FindAtOrBefore(model.Time) bool
+	// Finds the oldest value at or after the provided time. Returns false
+	// if either the chunk contains no value at or after the provided time,
+	// or an error has occurred.
+	FindAtOrAfter(model.Time) bool
+	// Returns the last value scanned (by the scan method) or found (by one
+	// of the find... methods). It returns model.ZeroSamplePair before any of
+	// those methods were called.
+	Value() model.SamplePair
+	// Returns the last error encountered. In general, an error signals data
+	// corruption in the chunk and requires quarantining.
+	Err() error
+}
+
+// RangeValues is a utility function that retrieves all values within the given
+// range from an Iterator.
+func RangeValues(it Iterator, in metric.Interval) ([]model.SamplePair, error) {
+	result := []model.SamplePair{}
+	if !it.FindAtOrAfter(in.OldestInclusive) {
+		return result, it.Err()
+	}
+	for !it.Value().Timestamp.After(in.NewestInclusive) {
+		result = append(result, it.Value())
+		if !it.Scan() {
+			break
+		}
+	}
+	return result, it.Err()
+}
+
+// addToOverflowChunk is a utility function that creates a new chunk as overflow
+// chunk, adds the provided sample to it, and returns a chunk slice containing
+// the provided old chunk followed by the new overflow chunk.
+func addToOverflowChunk(c Chunk, s model.SamplePair) ([]Chunk, error) {
+	overflowChunks, err := New().Add(s)
+	if err != nil {
+		return nil, err
+	}
+	return []Chunk{c, overflowChunks[0]}, nil
+}
+
+// transcodeAndAdd is a utility function that transcodes the dst chunk into the
+// provided src chunk (plus the necessary overflow chunks) and then adds the
+// provided sample. It returns the new chunks (transcoded plus overflow) with
+// the new sample at the end.
+func transcodeAndAdd(dst Chunk, src Chunk, s model.SamplePair) ([]Chunk, error) {
+	Ops.WithLabelValues(Transcode).Inc()
+
+	var (
+		head            = dst
+		body, NewChunks []Chunk
+		err             error
+	)
+
+	it := src.NewIterator()
+	for it.Scan() {
+		if NewChunks, err = head.Add(it.Value()); err != nil {
+			return nil, err
+		}
+		body = append(body, NewChunks[:len(NewChunks)-1]...)
+		head = NewChunks[len(NewChunks)-1]
+	}
+	if it.Err() != nil {
+		return nil, it.Err()
+	}
+
+	if NewChunks, err = head.Add(s); err != nil {
+		return nil, err
+	}
+	return append(body, NewChunks...), nil
+}
+
+// New creates a new chunk according to the encoding set by the
+// DefaultEncoding flag.
+func New() Chunk {
+	chunk, err := NewForEncoding(DefaultEncoding)
+	if err != nil {
+		panic(err)
+	}
+	return chunk
+}
+
+// NewForEncoding allows configuring what chunk type you want
+func NewForEncoding(encoding Encoding) (Chunk, error) {
+	switch encoding {
+	case Delta:
+		return newDeltaEncodedChunk(d1, d0, true, ChunkLen), nil
+	case DoubleDelta:
+		return newDoubleDeltaEncodedChunk(d1, d0, true, ChunkLen), nil
+	case Varbit:
+		return newVarbitChunk(varbitZeroEncoding), nil
+	default:
+		return nil, fmt.Errorf("unknown chunk encoding: %v", encoding)
+	}
+}
+
+// indexAccessor allows accesses to samples by index.
+type indexAccessor interface {
+	timestampAtIndex(int) model.Time
+	sampleValueAtIndex(int) model.SampleValue
+	err() error
+}
+
+// indexAccessingChunkIterator is a chunk iterator for chunks for which an
+// indexAccessor implementation exists.
+type indexAccessingChunkIterator struct {
+	len       int
+	pos       int
+	lastValue model.SamplePair
+	acc       indexAccessor
+}
+
+func newIndexAccessingChunkIterator(len int, acc indexAccessor) *indexAccessingChunkIterator {
+	return &indexAccessingChunkIterator{
+		len:       len,
+		pos:       -1,
+		lastValue: model.ZeroSamplePair,
+		acc:       acc,
+	}
+}
+
+// lastTimestamp implements Iterator.
+func (it *indexAccessingChunkIterator) LastTimestamp() (model.Time, error) {
+	return it.acc.timestampAtIndex(it.len - 1), it.acc.err()
+}
+
+// contains implements Iterator.
+func (it *indexAccessingChunkIterator) Contains(t model.Time) (bool, error) {
+	return !t.Before(it.acc.timestampAtIndex(0)) &&
+		!t.After(it.acc.timestampAtIndex(it.len-1)), it.acc.err()
+}
+
+// scan implements Iterator.
+func (it *indexAccessingChunkIterator) Scan() bool {
+	it.pos++
+	if it.pos >= it.len {
+		return false
+	}
+	it.lastValue = model.SamplePair{
+		Timestamp: it.acc.timestampAtIndex(it.pos),
+		Value:     it.acc.sampleValueAtIndex(it.pos),
+	}
+	return it.acc.err() == nil
+}
+
+// findAtOrBefore implements Iterator.
+func (it *indexAccessingChunkIterator) FindAtOrBefore(t model.Time) bool {
+	i := sort.Search(it.len, func(i int) bool {
+		return it.acc.timestampAtIndex(i).After(t)
+	})
+	if i == 0 || it.acc.err() != nil {
+		return false
+	}
+	it.pos = i - 1
+	it.lastValue = model.SamplePair{
+		Timestamp: it.acc.timestampAtIndex(i - 1),
+		Value:     it.acc.sampleValueAtIndex(i - 1),
+	}
+	return true
+}
+
+// findAtOrAfter implements Iterator.
+func (it *indexAccessingChunkIterator) FindAtOrAfter(t model.Time) bool {
+	i := sort.Search(it.len, func(i int) bool {
+		return !it.acc.timestampAtIndex(i).Before(t)
+	})
+	if i == it.len || it.acc.err() != nil {
+		return false
+	}
+	it.pos = i
+	it.lastValue = model.SamplePair{
+		Timestamp: it.acc.timestampAtIndex(i),
+		Value:     it.acc.sampleValueAtIndex(i),
+	}
+	return true
+}
+
+// value implements Iterator.
+func (it *indexAccessingChunkIterator) Value() model.SamplePair {
+	return it.lastValue
+}
+
+// err implements Iterator.
+func (it *indexAccessingChunkIterator) Err() error {
+	return it.acc.err()
+}
diff --git a/vendor/github.com/prometheus/prometheus/storage/local/chunk/delta.go b/vendor/github.com/prometheus/prometheus/storage/local/chunk/delta.go
new file mode 100644
index 000000000..4e3fd0645
--- /dev/null
+++ b/vendor/github.com/prometheus/prometheus/storage/local/chunk/delta.go
@@ -0,0 +1,379 @@
+// Copyright 2014 The Prometheus Authors
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package chunk
+
+import (
+	"encoding/binary"
+	"fmt"
+	"io"
+	"math"
+
+	"github.com/prometheus/common/model"
+)
+
+// The 21-byte header of a delta-encoded chunk looks like:
+//
+// - time delta bytes:  1 bytes
+// - value delta bytes: 1 bytes
+// - is integer:        1 byte
+// - base time:         8 bytes
+// - base value:        8 bytes
+// - used buf bytes:    2 bytes
+const (
+	deltaHeaderBytes = 21
+
+	deltaHeaderTimeBytesOffset  = 0
+	deltaHeaderValueBytesOffset = 1
+	deltaHeaderIsIntOffset      = 2
+	deltaHeaderBaseTimeOffset   = 3
+	deltaHeaderBaseValueOffset  = 11
+	deltaHeaderBufLenOffset     = 19
+)
+
+// A deltaEncodedChunk adaptively stores sample timestamps and values with a
+// delta encoding of various types (int, float) and bit widths. However, once 8
+// bytes would be needed to encode a delta value, a fall-back to the absolute
+// numbers happens (so that timestamps are saved directly as int64 and values as
+// float64). It implements the chunk interface.
+type deltaEncodedChunk []byte
+
+// newDeltaEncodedChunk returns a newly allocated deltaEncodedChunk.
+func newDeltaEncodedChunk(tb, vb deltaBytes, isInt bool, length int) *deltaEncodedChunk {
+	if tb < 1 {
+		panic("need at least 1 time delta byte")
+	}
+	if length < deltaHeaderBytes+16 {
+		panic(fmt.Errorf(
+			"chunk length %d bytes is insufficient, need at least %d",
+			length, deltaHeaderBytes+16,
+		))
+	}
+	c := make(deltaEncodedChunk, deltaHeaderIsIntOffset+1, length)
+
+	c[deltaHeaderTimeBytesOffset] = byte(tb)
+	c[deltaHeaderValueBytesOffset] = byte(vb)
+	if vb < d8 && isInt { // Only use int for fewer than 8 value delta bytes.
+		c[deltaHeaderIsIntOffset] = 1
+	} else {
+		c[deltaHeaderIsIntOffset] = 0
+	}
+
+	return &c
+}
+
+// Add implements chunk.
+func (c deltaEncodedChunk) Add(s model.SamplePair) ([]Chunk, error) {
+	// TODO(beorn7): Since we return &c, this method might cause an unnecessary allocation.
+	if c.Len() == 0 {
+		c = c[:deltaHeaderBytes]
+		binary.LittleEndian.PutUint64(c[deltaHeaderBaseTimeOffset:], uint64(s.Timestamp))
+		binary.LittleEndian.PutUint64(c[deltaHeaderBaseValueOffset:], math.Float64bits(float64(s.Value)))
+	}
+
+	remainingBytes := cap(c) - len(c)
+	sampleSize := c.sampleSize()
+
+	// Do we generally have space for another sample in this chunk? If not,
+	// overflow into a new one.
+	if remainingBytes < sampleSize {
+		return addToOverflowChunk(&c, s)
+	}
+
+	baseValue := c.baseValue()
+	dt := s.Timestamp - c.baseTime()
+	if dt < 0 {
+		return nil, fmt.Errorf("time delta is less than zero: %v", dt)
+	}
+
+	dv := s.Value - baseValue
+	tb := c.timeBytes()
+	vb := c.valueBytes()
+	isInt := c.isInt()
+
+	// If the new sample is incompatible with the current encoding, reencode the
+	// existing chunk data into new chunk(s).
+
+	ntb, nvb, nInt := tb, vb, isInt
+	if isInt && !isInt64(dv) {
+		// int->float.
+		nvb = d4
+		nInt = false
+	} else if !isInt && vb == d4 && baseValue+model.SampleValue(float32(dv)) != s.Value {
+		// float32->float64.
+		nvb = d8
+	} else {
+		if tb < d8 {
+			// Maybe more bytes for timestamp.
+			ntb = max(tb, bytesNeededForUnsignedTimestampDelta(dt))
+		}
+		if c.isInt() && vb < d8 {
+			// Maybe more bytes for sample value.
+			nvb = max(vb, bytesNeededForIntegerSampleValueDelta(dv))
+		}
+	}
+	if tb != ntb || vb != nvb || isInt != nInt {
+		if len(c)*2 < cap(c) {
+			return transcodeAndAdd(newDeltaEncodedChunk(ntb, nvb, nInt, cap(c)), &c, s)
+		}
+		// Chunk is already half full. Better create a new one and save the transcoding efforts.
+		return addToOverflowChunk(&c, s)
+	}
+
+	offset := len(c)
+	c = c[:offset+sampleSize]
+
+	switch tb {
+	case d1:
+		c[offset] = byte(dt)
+	case d2:
+		binary.LittleEndian.PutUint16(c[offset:], uint16(dt))
+	case d4:
+		binary.LittleEndian.PutUint32(c[offset:], uint32(dt))
+	case d8:
+		// Store the absolute value (no delta) in case of d8.
+		binary.LittleEndian.PutUint64(c[offset:], uint64(s.Timestamp))
+	default:
+		return nil, fmt.Errorf("invalid number of bytes for time delta: %d", tb)
+	}
+
+	offset += int(tb)
+
+	if c.isInt() {
+		switch vb {
+		case d0:
+			// No-op. Constant value is stored as base value.
+		case d1:
+			c[offset] = byte(int8(dv))
+		case d2:
+			binary.LittleEndian.PutUint16(c[offset:], uint16(int16(dv)))
+		case d4:
+			binary.LittleEndian.PutUint32(c[offset:], uint32(int32(dv)))
+		// d8 must not happen. Those samples are encoded as float64.
+		default:
+			return nil, fmt.Errorf("invalid number of bytes for integer delta: %d", vb)
+		}
+	} else {
+		switch vb {
+		case d4:
+			binary.LittleEndian.PutUint32(c[offset:], math.Float32bits(float32(dv)))
+		case d8:
+			// Store the absolute value (no delta) in case of d8.
+			binary.LittleEndian.PutUint64(c[offset:], math.Float64bits(float64(s.Value)))
+		default:
+			return nil, fmt.Errorf("invalid number of bytes for floating point delta: %d", vb)
+		}
+	}
+	return []Chunk{&c}, nil
+}
+
+// Clone implements chunk.
+func (c deltaEncodedChunk) Clone() Chunk {
+	clone := make(deltaEncodedChunk, len(c), cap(c))
+	copy(clone, c)
+	return &clone
+}
+
+// FirstTime implements chunk.
+func (c deltaEncodedChunk) FirstTime() model.Time {
+	return c.baseTime()
+}
+
+// NewIterator implements chunk.
+func (c *deltaEncodedChunk) NewIterator() Iterator {
+	return newIndexAccessingChunkIterator(c.Len(), &deltaEncodedIndexAccessor{
+		c:      *c,
+		baseT:  c.baseTime(),
+		baseV:  c.baseValue(),
+		tBytes: c.timeBytes(),
+		vBytes: c.valueBytes(),
+		isInt:  c.isInt(),
+	})
+}
+
+// Marshal implements chunk.
+func (c deltaEncodedChunk) Marshal(w io.Writer) error {
+	if len(c) > math.MaxUint16 {
+		panic("chunk buffer length would overflow a 16 bit uint.")
+	}
+	binary.LittleEndian.PutUint16(c[deltaHeaderBufLenOffset:], uint16(len(c)))
+
+	n, err := w.Write(c[:cap(c)])
+	if err != nil {
+		return err
+	}
+	if n != cap(c) {
+		return fmt.Errorf("wanted to write %d bytes, wrote %d", cap(c), n)
+	}
+	return nil
+}
+
+// MarshalToBuf implements chunk.
+func (c deltaEncodedChunk) MarshalToBuf(buf []byte) error {
+	if len(c) > math.MaxUint16 {
+		panic("chunk buffer length would overflow a 16 bit uint")
+	}
+	binary.LittleEndian.PutUint16(c[deltaHeaderBufLenOffset:], uint16(len(c)))
+
+	n := copy(buf, c)
+	if n != len(c) {
+		return fmt.Errorf("wanted to copy %d bytes to buffer, copied %d", len(c), n)
+	}
+	return nil
+}
+
+// Unmarshal implements chunk.
+func (c *deltaEncodedChunk) Unmarshal(r io.Reader) error {
+	*c = (*c)[:cap(*c)]
+	if _, err := io.ReadFull(r, *c); err != nil {
+		return err
+	}
+	return c.setLen()
+}
+
+// UnmarshalFromBuf implements chunk.
+func (c *deltaEncodedChunk) UnmarshalFromBuf(buf []byte) error {
+	*c = (*c)[:cap(*c)]
+	copy(*c, buf)
+	return c.setLen()
+}
+
+// setLen sets the length of the underlying slice and performs some sanity checks.
+func (c *deltaEncodedChunk) setLen() error {
+	l := binary.LittleEndian.Uint16((*c)[deltaHeaderBufLenOffset:])
+	if int(l) > cap(*c) {
+		return fmt.Errorf("delta chunk length exceeded during unmarshaling: %d", l)
+	}
+	if int(l) < deltaHeaderBytes {
+		return fmt.Errorf("delta chunk length less than header size: %d < %d", l, deltaHeaderBytes)
+	}
+	switch c.timeBytes() {
+	case d1, d2, d4, d8:
+		// Pass.
+	default:
+		return fmt.Errorf("invalid number of time bytes in delta chunk: %d", c.timeBytes())
+	}
+	switch c.valueBytes() {
+	case d0, d1, d2, d4, d8:
+		// Pass.
+	default:
+		return fmt.Errorf("invalid number of value bytes in delta chunk: %d", c.valueBytes())
+	}
+	*c = (*c)[:l]
+	return nil
+}
+
+// Encoding implements chunk.
+func (c deltaEncodedChunk) Encoding() Encoding { return Delta }
+
+// Utilization implements chunk.
+func (c deltaEncodedChunk) Utilization() float64 {
+	return float64(len(c)) / float64(cap(c))
+}
+
+func (c deltaEncodedChunk) timeBytes() deltaBytes {
+	return deltaBytes(c[deltaHeaderTimeBytesOffset])
+}
+
+func (c deltaEncodedChunk) valueBytes() deltaBytes {
+	return deltaBytes(c[deltaHeaderValueBytesOffset])
+}
+
+func (c deltaEncodedChunk) isInt() bool {
+	return c[deltaHeaderIsIntOffset] == 1
+}
+
+func (c deltaEncodedChunk) baseTime() model.Time {
+	return model.Time(binary.LittleEndian.Uint64(c[deltaHeaderBaseTimeOffset:]))
+}
+
+func (c deltaEncodedChunk) baseValue() model.SampleValue {
+	return model.SampleValue(math.Float64frombits(binary.LittleEndian.Uint64(c[deltaHeaderBaseValueOffset:])))
+}
+
+func (c deltaEncodedChunk) sampleSize() int {
+	return int(c.timeBytes() + c.valueBytes())
+}
+
+// Len implements Chunk. Runs in constant time.
+func (c deltaEncodedChunk) Len() int {
+	if len(c) < deltaHeaderBytes {
+		return 0
+	}
+	return (len(c) - deltaHeaderBytes) / c.sampleSize()
+}
+
+// deltaEncodedIndexAccessor implements indexAccessor.
+type deltaEncodedIndexAccessor struct {
+	c              deltaEncodedChunk
+	baseT          model.Time
+	baseV          model.SampleValue
+	tBytes, vBytes deltaBytes
+	isInt          bool
+	lastErr        error
+}
+
+func (acc *deltaEncodedIndexAccessor) err() error {
+	return acc.lastErr
+}
+
+func (acc *deltaEncodedIndexAccessor) timestampAtIndex(idx int) model.Time {
+	offset := deltaHeaderBytes + idx*int(acc.tBytes+acc.vBytes)
+
+	switch acc.tBytes {
+	case d1:
+		return acc.baseT + model.Time(uint8(acc.c[offset]))
+	case d2:
+		return acc.baseT + model.Time(binary.LittleEndian.Uint16(acc.c[offset:]))
+	case d4:
+		return acc.baseT + model.Time(binary.LittleEndian.Uint32(acc.c[offset:]))
+	case d8:
+		// Take absolute value for d8.
+		return model.Time(binary.LittleEndian.Uint64(acc.c[offset:]))
+	default:
+		acc.lastErr = fmt.Errorf("invalid number of bytes for time delta: %d", acc.tBytes)
+		return model.Earliest
+	}
+}
+
+func (acc *deltaEncodedIndexAccessor) sampleValueAtIndex(idx int) model.SampleValue {
+	offset := deltaHeaderBytes + idx*int(acc.tBytes+acc.vBytes) + int(acc.tBytes)
+
+	if acc.isInt {
+		switch acc.vBytes {
+		case d0:
+			return acc.baseV
+		case d1:
+			return acc.baseV + model.SampleValue(int8(acc.c[offset]))
+		case d2:
+			return acc.baseV + model.SampleValue(int16(binary.LittleEndian.Uint16(acc.c[offset:])))
+		case d4:
+			return acc.baseV + model.SampleValue(int32(binary.LittleEndian.Uint32(acc.c[offset:])))
+		// No d8 for ints.
+		default:
+			acc.lastErr = fmt.Errorf("invalid number of bytes for integer delta: %d", acc.vBytes)
+			return 0
+		}
+	} else {
+		switch acc.vBytes {
+		case d4:
+			return acc.baseV + model.SampleValue(math.Float32frombits(binary.LittleEndian.Uint32(acc.c[offset:])))
+		case d8:
+			// Take absolute value for d8.
+			return model.SampleValue(math.Float64frombits(binary.LittleEndian.Uint64(acc.c[offset:])))
+		default:
+			acc.lastErr = fmt.Errorf("invalid number of bytes for floating point delta: %d", acc.vBytes)
+			return 0
+		}
+	}
+}
diff --git a/vendor/github.com/prometheus/prometheus/storage/local/chunk/delta_helpers.go b/vendor/github.com/prometheus/prometheus/storage/local/chunk/delta_helpers.go
new file mode 100644
index 000000000..81e5d18cb
--- /dev/null
+++ b/vendor/github.com/prometheus/prometheus/storage/local/chunk/delta_helpers.go
@@ -0,0 +1,84 @@
+// Copyright 2015 The Prometheus Authors
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package chunk
+
+import (
+	"math"
+
+	"github.com/prometheus/common/model"
+)
+
+type deltaBytes byte
+
+const (
+	d0 deltaBytes = 0
+	d1 deltaBytes = 1
+	d2 deltaBytes = 2
+	d4 deltaBytes = 4
+	d8 deltaBytes = 8
+)
+
+func bytesNeededForUnsignedTimestampDelta(deltaT model.Time) deltaBytes {
+	switch {
+	case deltaT > math.MaxUint32:
+		return d8
+	case deltaT > math.MaxUint16:
+		return d4
+	case deltaT > math.MaxUint8:
+		return d2
+	default:
+		return d1
+	}
+}
+
+func bytesNeededForSignedTimestampDelta(deltaT model.Time) deltaBytes {
+	switch {
+	case deltaT > math.MaxInt32 || deltaT < math.MinInt32:
+		return d8
+	case deltaT > math.MaxInt16 || deltaT < math.MinInt16:
+		return d4
+	case deltaT > math.MaxInt8 || deltaT < math.MinInt8:
+		return d2
+	default:
+		return d1
+	}
+}
+
+func bytesNeededForIntegerSampleValueDelta(deltaV model.SampleValue) deltaBytes {
+	switch {
+	case deltaV < math.MinInt32 || deltaV > math.MaxInt32:
+		return d8
+	case deltaV < math.MinInt16 || deltaV > math.MaxInt16:
+		return d4
+	case deltaV < math.MinInt8 || deltaV > math.MaxInt8:
+		return d2
+	case deltaV != 0:
+		return d1
+	default:
+		return d0
+	}
+}
+
+func max(a, b deltaBytes) deltaBytes {
+	if a > b {
+		return a
+	}
+	return b
+}
+
+// isInt64 returns true if v can be represented as an int64.
+func isInt64(v model.SampleValue) bool {
+	// Note: Using math.Modf is slower than the conversion approach below.
+	return model.SampleValue(int64(v)) == v
+}
diff --git a/vendor/github.com/prometheus/prometheus/storage/local/chunk/doubledelta.go b/vendor/github.com/prometheus/prometheus/storage/local/chunk/doubledelta.go
new file mode 100644
index 000000000..249c99d54
--- /dev/null
+++ b/vendor/github.com/prometheus/prometheus/storage/local/chunk/doubledelta.go
@@ -0,0 +1,525 @@
+// Copyright 2014 The Prometheus Authors
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package chunk
+
+import (
+	"encoding/binary"
+	"fmt"
+	"io"
+	"math"
+
+	"github.com/prometheus/common/model"
+)
+
+// The 37-byte header of a delta-encoded chunk looks like:
+//
+// - used buf bytes:           2 bytes
+// - time double-delta bytes:  1 bytes
+// - value double-delta bytes: 1 bytes
+// - is integer:               1 byte
+// - base time:                8 bytes
+// - base value:               8 bytes
+// - base time delta:          8 bytes
+// - base value delta:         8 bytes
+const (
+	doubleDeltaHeaderBytes    = 37
+	doubleDeltaHeaderMinBytes = 21 // header isn't full for chunk w/ one sample
+
+	doubleDeltaHeaderBufLenOffset         = 0
+	doubleDeltaHeaderTimeBytesOffset      = 2
+	doubleDeltaHeaderValueBytesOffset     = 3
+	doubleDeltaHeaderIsIntOffset          = 4
+	doubleDeltaHeaderBaseTimeOffset       = 5
+	doubleDeltaHeaderBaseValueOffset      = 13
+	doubleDeltaHeaderBaseTimeDeltaOffset  = 21
+	doubleDeltaHeaderBaseValueDeltaOffset = 29
+)
+
+// A doubleDeltaEncodedChunk adaptively stores sample timestamps and values with
+// a double-delta encoding of various types (int, float) and bit widths. A base
+// value and timestamp and a base delta for each is saved in the header. The
+// payload consists of double-deltas, i.e. deviations from the values and
+// timestamps calculated by applying the base value and time and the base deltas.
+// However, once 8 bytes would be needed to encode a double-delta value, a
+// fall-back to the absolute numbers happens (so that timestamps are saved
+// directly as int64 and values as float64).
+// doubleDeltaEncodedChunk implements the chunk interface.
+type doubleDeltaEncodedChunk []byte
+
+// newDoubleDeltaEncodedChunk returns a newly allocated doubleDeltaEncodedChunk.
+func newDoubleDeltaEncodedChunk(tb, vb deltaBytes, isInt bool, length int) *doubleDeltaEncodedChunk {
+	if tb < 1 {
+		panic("need at least 1 time delta byte")
+	}
+	if length < doubleDeltaHeaderBytes+16 {
+		panic(fmt.Errorf(
+			"chunk length %d bytes is insufficient, need at least %d",
+			length, doubleDeltaHeaderBytes+16,
+		))
+	}
+	c := make(doubleDeltaEncodedChunk, doubleDeltaHeaderIsIntOffset+1, length)
+
+	c[doubleDeltaHeaderTimeBytesOffset] = byte(tb)
+	c[doubleDeltaHeaderValueBytesOffset] = byte(vb)
+	if vb < d8 && isInt { // Only use int for fewer than 8 value double-delta bytes.
+		c[doubleDeltaHeaderIsIntOffset] = 1
+	} else {
+		c[doubleDeltaHeaderIsIntOffset] = 0
+	}
+	return &c
+}
+
+// Add implements chunk.
+func (c doubleDeltaEncodedChunk) Add(s model.SamplePair) ([]Chunk, error) {
+	// TODO(beorn7): Since we return &c, this method might cause an unnecessary allocation.
+	if c.Len() == 0 {
+		return c.addFirstSample(s), nil
+	}
+
+	tb := c.timeBytes()
+	vb := c.valueBytes()
+
+	if c.Len() == 1 {
+		return c.addSecondSample(s, tb, vb)
+	}
+
+	remainingBytes := cap(c) - len(c)
+	sampleSize := c.sampleSize()
+
+	// Do we generally have space for another sample in this chunk? If not,
+	// overflow into a new one.
+	if remainingBytes < sampleSize {
+		return addToOverflowChunk(&c, s)
+	}
+
+	projectedTime := c.baseTime() + model.Time(c.Len())*c.baseTimeDelta()
+	ddt := s.Timestamp - projectedTime
+
+	projectedValue := c.baseValue() + model.SampleValue(c.Len())*c.baseValueDelta()
+	ddv := s.Value - projectedValue
+
+	ntb, nvb, nInt := tb, vb, c.isInt()
+	// If the new sample is incompatible with the current encoding, reencode the
+	// existing chunk data into new chunk(s).
+	if c.isInt() && !isInt64(ddv) {
+		// int->float.
+		nvb = d4
+		nInt = false
+	} else if !c.isInt() && vb == d4 && projectedValue+model.SampleValue(float32(ddv)) != s.Value {
+		// float32->float64.
+		nvb = d8
+	} else {
+		if tb < d8 {
+			// Maybe more bytes for timestamp.
+			ntb = max(tb, bytesNeededForSignedTimestampDelta(ddt))
+		}
+		if c.isInt() && vb < d8 {
+			// Maybe more bytes for sample value.
+			nvb = max(vb, bytesNeededForIntegerSampleValueDelta(ddv))
+		}
+	}
+	if tb != ntb || vb != nvb || c.isInt() != nInt {
+		if len(c)*2 < cap(c) {
+			return transcodeAndAdd(newDoubleDeltaEncodedChunk(ntb, nvb, nInt, cap(c)), &c, s)
+		}
+		// Chunk is already half full. Better create a new one and save the transcoding efforts.
+		return addToOverflowChunk(&c, s)
+	}
+
+	offset := len(c)
+	c = c[:offset+sampleSize]
+
+	switch tb {
+	case d1:
+		c[offset] = byte(ddt)
+	case d2:
+		binary.LittleEndian.PutUint16(c[offset:], uint16(ddt))
+	case d4:
+		binary.LittleEndian.PutUint32(c[offset:], uint32(ddt))
+	case d8:
+		// Store the absolute value (no delta) in case of d8.
+		binary.LittleEndian.PutUint64(c[offset:], uint64(s.Timestamp))
+	default:
+		return nil, fmt.Errorf("invalid number of bytes for time delta: %d", tb)
+	}
+
+	offset += int(tb)
+
+	if c.isInt() {
+		switch vb {
+		case d0:
+			// No-op. Constant delta is stored as base value.
+		case d1:
+			c[offset] = byte(int8(ddv))
+		case d2:
+			binary.LittleEndian.PutUint16(c[offset:], uint16(int16(ddv)))
+		case d4:
+			binary.LittleEndian.PutUint32(c[offset:], uint32(int32(ddv)))
+		// d8 must not happen. Those samples are encoded as float64.
+		default:
+			return nil, fmt.Errorf("invalid number of bytes for integer delta: %d", vb)
+		}
+	} else {
+		switch vb {
+		case d4:
+			binary.LittleEndian.PutUint32(c[offset:], math.Float32bits(float32(ddv)))
+		case d8:
+			// Store the absolute value (no delta) in case of d8.
+			binary.LittleEndian.PutUint64(c[offset:], math.Float64bits(float64(s.Value)))
+		default:
+			return nil, fmt.Errorf("invalid number of bytes for floating point delta: %d", vb)
+		}
+	}
+	return []Chunk{&c}, nil
+}
+
+// Clone implements chunk.
+func (c doubleDeltaEncodedChunk) Clone() Chunk {
+	clone := make(doubleDeltaEncodedChunk, len(c), cap(c))
+	copy(clone, c)
+	return &clone
+}
+
+// FirstTime implements chunk.
+func (c doubleDeltaEncodedChunk) FirstTime() model.Time {
+	return c.baseTime()
+}
+
+// NewIterator( implements chunk.
+func (c *doubleDeltaEncodedChunk) NewIterator() Iterator {
+	return newIndexAccessingChunkIterator(c.Len(), &doubleDeltaEncodedIndexAccessor{
+		c:      *c,
+		baseT:  c.baseTime(),
+		baseΔT: c.baseTimeDelta(),
+		baseV:  c.baseValue(),
+		baseΔV: c.baseValueDelta(),
+		tBytes: c.timeBytes(),
+		vBytes: c.valueBytes(),
+		isInt:  c.isInt(),
+	})
+}
+
+// Marshal implements chunk.
+func (c doubleDeltaEncodedChunk) Marshal(w io.Writer) error {
+	if len(c) > math.MaxUint16 {
+		panic("chunk buffer length would overflow a 16 bit uint")
+	}
+	binary.LittleEndian.PutUint16(c[doubleDeltaHeaderBufLenOffset:], uint16(len(c)))
+
+	n, err := w.Write(c[:cap(c)])
+	if err != nil {
+		return err
+	}
+	if n != cap(c) {
+		return fmt.Errorf("wanted to write %d bytes, wrote %d", cap(c), n)
+	}
+	return nil
+}
+
+// MarshalToBuf implements chunk.
+func (c doubleDeltaEncodedChunk) MarshalToBuf(buf []byte) error {
+	if len(c) > math.MaxUint16 {
+		panic("chunk buffer length would overflow a 16 bit uint")
+	}
+	binary.LittleEndian.PutUint16(c[doubleDeltaHeaderBufLenOffset:], uint16(len(c)))
+
+	n := copy(buf, c)
+	if n != len(c) {
+		return fmt.Errorf("wanted to copy %d bytes to buffer, copied %d", len(c), n)
+	}
+	return nil
+}
+
+// Unmarshal implements chunk.
+func (c *doubleDeltaEncodedChunk) Unmarshal(r io.Reader) error {
+	*c = (*c)[:cap(*c)]
+	if _, err := io.ReadFull(r, *c); err != nil {
+		return err
+	}
+	return c.setLen()
+}
+
+// UnmarshalFromBuf implements chunk.
+func (c *doubleDeltaEncodedChunk) UnmarshalFromBuf(buf []byte) error {
+	*c = (*c)[:cap(*c)]
+	copy(*c, buf)
+	return c.setLen()
+}
+
+// setLen sets the length of the underlying slice and performs some sanity checks.
+func (c *doubleDeltaEncodedChunk) setLen() error {
+	l := binary.LittleEndian.Uint16((*c)[doubleDeltaHeaderBufLenOffset:])
+	if int(l) > cap(*c) {
+		return fmt.Errorf("doubledelta chunk length exceeded during unmarshaling: %d", l)
+	}
+	if int(l) < doubleDeltaHeaderMinBytes {
+		return fmt.Errorf("doubledelta chunk length less than header size: %d < %d", l, doubleDeltaHeaderMinBytes)
+	}
+	switch c.timeBytes() {
+	case d1, d2, d4, d8:
+		// Pass.
+	default:
+		return fmt.Errorf("invalid number of time bytes in doubledelta chunk: %d", c.timeBytes())
+	}
+	switch c.valueBytes() {
+	case d0, d1, d2, d4, d8:
+		// Pass.
+	default:
+		return fmt.Errorf("invalid number of value bytes in doubledelta chunk: %d", c.valueBytes())
+	}
+	*c = (*c)[:l]
+	return nil
+}
+
+// Encoding implements chunk.
+func (c doubleDeltaEncodedChunk) Encoding() Encoding { return DoubleDelta }
+
+// Utilization implements chunk.
+func (c doubleDeltaEncodedChunk) Utilization() float64 {
+	return float64(len(c)-doubleDeltaHeaderIsIntOffset-1) / float64(cap(c))
+}
+
+func (c doubleDeltaEncodedChunk) baseTime() model.Time {
+	return model.Time(
+		binary.LittleEndian.Uint64(
+			c[doubleDeltaHeaderBaseTimeOffset:],
+		),
+	)
+}
+
+func (c doubleDeltaEncodedChunk) baseValue() model.SampleValue {
+	return model.SampleValue(
+		math.Float64frombits(
+			binary.LittleEndian.Uint64(
+				c[doubleDeltaHeaderBaseValueOffset:],
+			),
+		),
+	)
+}
+
+func (c doubleDeltaEncodedChunk) baseTimeDelta() model.Time {
+	if len(c) < doubleDeltaHeaderBaseTimeDeltaOffset+8 {
+		return 0
+	}
+	return model.Time(
+		binary.LittleEndian.Uint64(
+			c[doubleDeltaHeaderBaseTimeDeltaOffset:],
+		),
+	)
+}
+
+func (c doubleDeltaEncodedChunk) baseValueDelta() model.SampleValue {
+	if len(c) < doubleDeltaHeaderBaseValueDeltaOffset+8 {
+		return 0
+	}
+	return model.SampleValue(
+		math.Float64frombits(
+			binary.LittleEndian.Uint64(
+				c[doubleDeltaHeaderBaseValueDeltaOffset:],
+			),
+		),
+	)
+}
+
+func (c doubleDeltaEncodedChunk) timeBytes() deltaBytes {
+	return deltaBytes(c[doubleDeltaHeaderTimeBytesOffset])
+}
+
+func (c doubleDeltaEncodedChunk) valueBytes() deltaBytes {
+	return deltaBytes(c[doubleDeltaHeaderValueBytesOffset])
+}
+
+func (c doubleDeltaEncodedChunk) sampleSize() int {
+	return int(c.timeBytes() + c.valueBytes())
+}
+
+// Len implements Chunk. Runs in constant time.
+func (c doubleDeltaEncodedChunk) Len() int {
+	if len(c) <= doubleDeltaHeaderIsIntOffset+1 {
+		return 0
+	}
+	if len(c) <= doubleDeltaHeaderBaseValueOffset+8 {
+		return 1
+	}
+	return (len(c)-doubleDeltaHeaderBytes)/c.sampleSize() + 2
+}
+
+func (c doubleDeltaEncodedChunk) isInt() bool {
+	return c[doubleDeltaHeaderIsIntOffset] == 1
+}
+
+// addFirstSample is a helper method only used by c.add(). It adds timestamp and
+// value as base time and value.
+func (c doubleDeltaEncodedChunk) addFirstSample(s model.SamplePair) []Chunk {
+	c = c[:doubleDeltaHeaderBaseValueOffset+8]
+	binary.LittleEndian.PutUint64(
+		c[doubleDeltaHeaderBaseTimeOffset:],
+		uint64(s.Timestamp),
+	)
+	binary.LittleEndian.PutUint64(
+		c[doubleDeltaHeaderBaseValueOffset:],
+		math.Float64bits(float64(s.Value)),
+	)
+	return []Chunk{&c}
+}
+
+// addSecondSample is a helper method only used by c.add(). It calculates the
+// base delta from the provided sample and adds it to the chunk.
+func (c doubleDeltaEncodedChunk) addSecondSample(s model.SamplePair, tb, vb deltaBytes) ([]Chunk, error) {
+	baseTimeDelta := s.Timestamp - c.baseTime()
+	if baseTimeDelta < 0 {
+		return nil, fmt.Errorf("base time delta is less than zero: %v", baseTimeDelta)
+	}
+	c = c[:doubleDeltaHeaderBytes]
+	if tb >= d8 || bytesNeededForUnsignedTimestampDelta(baseTimeDelta) >= d8 {
+		// If already the base delta needs d8 (or we are at d8
+		// already, anyway), we better encode this timestamp
+		// directly rather than as a delta and switch everything
+		// to d8.
+		c[doubleDeltaHeaderTimeBytesOffset] = byte(d8)
+		binary.LittleEndian.PutUint64(
+			c[doubleDeltaHeaderBaseTimeDeltaOffset:],
+			uint64(s.Timestamp),
+		)
+	} else {
+		binary.LittleEndian.PutUint64(
+			c[doubleDeltaHeaderBaseTimeDeltaOffset:],
+			uint64(baseTimeDelta),
+		)
+	}
+	baseValue := c.baseValue()
+	baseValueDelta := s.Value - baseValue
+	if vb >= d8 || baseValue+baseValueDelta != s.Value {
+		// If we can't reproduce the original sample value (or
+		// if we are at d8 already, anyway), we better encode
+		// this value directly rather than as a delta and switch
+		// everything to d8.
+		c[doubleDeltaHeaderValueBytesOffset] = byte(d8)
+		c[doubleDeltaHeaderIsIntOffset] = 0
+		binary.LittleEndian.PutUint64(
+			c[doubleDeltaHeaderBaseValueDeltaOffset:],
+			math.Float64bits(float64(s.Value)),
+		)
+	} else {
+		binary.LittleEndian.PutUint64(
+			c[doubleDeltaHeaderBaseValueDeltaOffset:],
+			math.Float64bits(float64(baseValueDelta)),
+		)
+	}
+	return []Chunk{&c}, nil
+}
+
+// doubleDeltaEncodedIndexAccessor implements indexAccessor.
+type doubleDeltaEncodedIndexAccessor struct {
+	c              doubleDeltaEncodedChunk
+	baseT, baseΔT  model.Time
+	baseV, baseΔV  model.SampleValue
+	tBytes, vBytes deltaBytes
+	isInt          bool
+	lastErr        error
+}
+
+func (acc *doubleDeltaEncodedIndexAccessor) err() error {
+	return acc.lastErr
+}
+
+func (acc *doubleDeltaEncodedIndexAccessor) timestampAtIndex(idx int) model.Time {
+	if idx == 0 {
+		return acc.baseT
+	}
+	if idx == 1 {
+		// If time bytes are at d8, the time is saved directly rather
+		// than as a difference.
+		if acc.tBytes == d8 {
+			return acc.baseΔT
+		}
+		return acc.baseT + acc.baseΔT
+	}
+
+	offset := doubleDeltaHeaderBytes + (idx-2)*int(acc.tBytes+acc.vBytes)
+
+	switch acc.tBytes {
+	case d1:
+		return acc.baseT +
+			model.Time(idx)*acc.baseΔT +
+			model.Time(int8(acc.c[offset]))
+	case d2:
+		return acc.baseT +
+			model.Time(idx)*acc.baseΔT +
+			model.Time(int16(binary.LittleEndian.Uint16(acc.c[offset:])))
+	case d4:
+		return acc.baseT +
+			model.Time(idx)*acc.baseΔT +
+			model.Time(int32(binary.LittleEndian.Uint32(acc.c[offset:])))
+	case d8:
+		// Take absolute value for d8.
+		return model.Time(binary.LittleEndian.Uint64(acc.c[offset:]))
+	default:
+		acc.lastErr = fmt.Errorf("invalid number of bytes for time delta: %d", acc.tBytes)
+		return model.Earliest
+	}
+}
+
+func (acc *doubleDeltaEncodedIndexAccessor) sampleValueAtIndex(idx int) model.SampleValue {
+	if idx == 0 {
+		return acc.baseV
+	}
+	if idx == 1 {
+		// If value bytes are at d8, the value is saved directly rather
+		// than as a difference.
+		if acc.vBytes == d8 {
+			return acc.baseΔV
+		}
+		return acc.baseV + acc.baseΔV
+	}
+
+	offset := doubleDeltaHeaderBytes + (idx-2)*int(acc.tBytes+acc.vBytes) + int(acc.tBytes)
+
+	if acc.isInt {
+		switch acc.vBytes {
+		case d0:
+			return acc.baseV +
+				model.SampleValue(idx)*acc.baseΔV
+		case d1:
+			return acc.baseV +
+				model.SampleValue(idx)*acc.baseΔV +
+				model.SampleValue(int8(acc.c[offset]))
+		case d2:
+			return acc.baseV +
+				model.SampleValue(idx)*acc.baseΔV +
+				model.SampleValue(int16(binary.LittleEndian.Uint16(acc.c[offset:])))
+		case d4:
+			return acc.baseV +
+				model.SampleValue(idx)*acc.baseΔV +
+				model.SampleValue(int32(binary.LittleEndian.Uint32(acc.c[offset:])))
+		// No d8 for ints.
+		default:
+			acc.lastErr = fmt.Errorf("invalid number of bytes for integer delta: %d", acc.vBytes)
+			return 0
+		}
+	} else {
+		switch acc.vBytes {
+		case d4:
+			return acc.baseV +
+				model.SampleValue(idx)*acc.baseΔV +
+				model.SampleValue(math.Float32frombits(binary.LittleEndian.Uint32(acc.c[offset:])))
+		case d8:
+			// Take absolute value for d8.
+			return model.SampleValue(math.Float64frombits(binary.LittleEndian.Uint64(acc.c[offset:])))
+		default:
+			acc.lastErr = fmt.Errorf("invalid number of bytes for floating point delta: %d", acc.vBytes)
+			return 0
+		}
+	}
+}
diff --git a/vendor/github.com/prometheus/prometheus/storage/local/chunk/instrumentation.go b/vendor/github.com/prometheus/prometheus/storage/local/chunk/instrumentation.go
new file mode 100644
index 000000000..4dd3231e4
--- /dev/null
+++ b/vendor/github.com/prometheus/prometheus/storage/local/chunk/instrumentation.go
@@ -0,0 +1,90 @@
+// Copyright 2014 The Prometheus Authors
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package chunk
+
+import "github.com/prometheus/client_golang/prometheus"
+
+// Usually, a separate file for instrumentation is frowned upon. Metrics should
+// be close to where they are used. However, the metrics below are set all over
+// the place, so we go for a separate instrumentation file in this case.
+var (
+	Ops = prometheus.NewCounterVec(
+		prometheus.CounterOpts{
+			Namespace: namespace,
+			Subsystem: subsystem,
+			Name:      "chunk_ops_total",
+			Help:      "The total number of chunk operations by their type.",
+		},
+		[]string{OpTypeLabel},
+	)
+	DescOps = prometheus.NewCounterVec(
+		prometheus.CounterOpts{
+			Namespace: namespace,
+			Subsystem: subsystem,
+			Name:      "chunkdesc_ops_total",
+			Help:      "The total number of chunk descriptor operations by their type.",
+		},
+		[]string{OpTypeLabel},
+	)
+	NumMemDescs = prometheus.NewGauge(prometheus.GaugeOpts{
+		Namespace: namespace,
+		Subsystem: subsystem,
+		Name:      "memory_chunkdescs",
+		Help:      "The current number of chunk descriptors in memory.",
+	})
+)
+
+const (
+	namespace = "prometheus"
+	subsystem = "local_storage"
+
+	// OpTypeLabel is the label name for chunk operation types.
+	OpTypeLabel = "type"
+
+	// Op-types for ChunkOps.
+
+	// CreateAndPin is the label value for create-and-pin chunk ops.
+	CreateAndPin = "create" // A Desc creation with refCount=1.
+	// PersistAndUnpin is the label value for persist chunk ops.
+	PersistAndUnpin = "persist"
+	// Pin is the label value for pin chunk ops (excludes pin on creation).
+	Pin = "pin"
+	// Unpin is the label value for unpin chunk ops (excludes the unpin on persisting).
+	Unpin = "unpin"
+	// Clone is the label value for clone chunk ops.
+	Clone = "clone"
+	// Transcode is the label value for transcode chunk ops.
+	Transcode = "transcode"
+	// Drop is the label value for drop chunk ops.
+	Drop = "drop"
+
+	// Op-types for ChunkOps and ChunkDescOps.
+
+	// Evict is the label value for evict chunk desc ops.
+	Evict = "evict"
+	// Load is the label value for load chunk and chunk desc ops.
+	Load = "load"
+)
+
+func init() {
+	prometheus.MustRegister(Ops)
+	prometheus.MustRegister(DescOps)
+	prometheus.MustRegister(NumMemDescs)
+}
+
+// NumMemChunks is the total number of chunks in memory. This is a global
+// counter, also used internally, so not implemented as metrics. Collected in
+// MemorySeriesStorage.
+// TODO(beorn7): Having this as an exported global variable is really bad.
+var NumMemChunks int64
diff --git a/vendor/github.com/prometheus/prometheus/storage/local/chunk/varbit.go b/vendor/github.com/prometheus/prometheus/storage/local/chunk/varbit.go
new file mode 100644
index 000000000..3181a9a76
--- /dev/null
+++ b/vendor/github.com/prometheus/prometheus/storage/local/chunk/varbit.go
@@ -0,0 +1,1210 @@
+// Copyright 2016 The Prometheus Authors
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package chunk
+
+import (
+	"encoding/binary"
+	"fmt"
+	"io"
+	"math"
+
+	"github.com/prometheus/common/model"
+)
+
+// The varbit chunk encoding is broadly similar to the double-delta
+// chunks. However, it uses a number of different bit-widths to save the
+// double-deltas (rather than 1, 2, or 4 bytes). Also, it doesn't use the delta
+// of the first two samples of a chunk as the base delta, but uses a "sliding"
+// delta, i.e. the delta of the two previous samples. Both differences make
+// random access more expensive. Sample values can be encoded with the same
+// double-delta scheme as timestamps, but different value encodings can be
+// chosen adaptively, among them XOR encoding and "zero" encoding for constant
+// sample values. Overall, the varbit encoding results in a much better
+// compression ratio (~1.3 bytes per sample compared to ~3.3 bytes per sample
+// with double-delta encoding, for typical data sets).
+//
+// Major parts of the varbit encoding are inspired by the following paper:
+//   Gorilla: A Fast, Scalable, In-Memory Time Series Database
+//   T. Pelkonen et al., Facebook Inc.
+//   http://www.vldb.org/pvldb/vol8/p1816-teller.pdf
+// Note that there are significant differences, some due to the way Prometheus
+// chunks work, others to optimize for the Prometheus use-case.
+//
+// Layout of a 1024 byte varbit chunk (big endian, wherever it matters):
+// - first time (int64):                  8 bytes  bit 0000-0063
+// - first value (float64):               8 bytes  bit 0064-0127
+// - last time (int64):                   8 bytes  bit 0128-0191
+// - last value (float64):                8 bytes  bit 0192-0255
+// - first Δt (t1-t0, unsigned):          3 bytes  bit 0256-0279
+// - flags (byte)                         1 byte   bit 0280-0287
+// - bit offset for next sample           2 bytes  bit 0288-0303
+// - first Δv for value encoding 1, otherwise payload
+//                                        4 bytes  bit 0304-0335
+// - payload                            973 bytes  bit 0336-8119
+// The following only exists if the chunk is still open. Otherwise, it might be
+// used by payload.
+// - bit offset for current ΔΔt=0 count   2 bytes  bit 8120-8135
+// - last Δt                              3 bytes  bit 8136-8159
+// - special bytes for value encoding     4 bytes  bit 8160-8191
+//   - for encoding 1: last Δv            4 bytes  bit 8160-8191
+//   - for encoding 2: count of
+//     - last leading zeros (1 byte)      1 byte   bit 8160-8167
+//     - last significant bits (1 byte)   1 byte   bit 8168-8175
+//
+// FLAGS
+//
+// The two least significant bits of the flags byte define the value encoding
+// for the whole chunk, see below. The most significant byte of the flags byte
+// is set if the chunk is closed. No samples can be added anymore to a closed
+// chunk. Furthermore, the last value of a closed chunk is only saved in the
+// header (last time, last value), while in a chunk that is still open, the last
+// sample in the payload is the same sample as saved in the header.
+//
+// The remaining bits in the flags byte are currently unused.
+//
+// TIMESTAMP ENCODING
+//
+// The 1st timestamp is saved directly.
+//
+// The difference to the 2nd timestamp is saved as first Δt. 3 bytes is enough
+// for about 4.5h. Since we close a chunk after sitting idle for 1h, this
+// limitation has no practical consequences. Should, for whatever reason, a
+// larger delta be required, the chunk would be closed, i.e. the new sample is
+// added as the last sample to the chunk, and the next sample will be added to a
+// new chunk.
+//
+// From the 3rd timestamp on, a double-delta (ΔΔt) is saved:
+//   (t_{n} - t_{n-1}) - (t_{n-1} - t_{n-2})
+// To perform that operation, the last Δt is saved at the end of the chunk for
+// as long the chunk is not closed yet (see above).
+//
+// Most of the times, ΔΔt is zero, even with the ms-precision of
+// Prometheus. Therefore, we save a ΔΔt of zero as a leading '0' bit followed by
+// 7 bits counting the number of consecutive ΔΔt==0 (the count is offset by -1,
+// so the range of 0 to 127 represents 1 to 128 repetitions).
+//
+// If ΔΔt != 0, we essentially apply the Gorilla encoding scheme (cf. section
+// 4.1.1 in the paper) but with different bit buckets as Prometheus uses ms
+// rather than s, and the default scrape interval is 1m rather than 4m). In
+// particular:
+//
+// - If ΔΔt is between [-32,31], store '10' followed by a 6 bit value. This is
+//   for minor irregularities in the scrape interval.
+//
+// - If ΔΔt is between [-65536,65535], store '110' followed by a 17 bit
+//   value. This will typically happen if a scrape is missed completely.
+//
+// - If ΔΔt is between [-4194304,4194303], store '111' followed by a 23 bit
+//   value.  This spans more than 1h, which is usually enough as we close a
+//   chunk anyway if it doesn't receive any sample in 1h.
+//
+// - Should we nevertheless encounter a larger ΔΔt, we simply close the chunk,
+//   add the new sample as the last of the chunk, and add subsequent samples to
+//   a new chunk.
+//
+// VALUE ENCODING
+//
+// Value encoding can change and is determined by the two least significant bits
+// of the 'flags' byte at bit position 280. The encoding can be changed without
+// transcoding upon adding the 3rd sample. After that, an encoding change
+// results either in transcoding or in closing the chunk.
+//
+// The 1st sample value is always saved directly. The 2nd sample value is saved
+// in the header as the last value. Upon saving the 3rd value, an encoding is
+// chosen, and the chunk is prepared accordingly.
+//
+// The following value encodings exist (with their value in the flags byte):
+//
+// 0: "Zero encoding".
+//
+// In many time series, the value simply stays constant over a long time
+// (e.g. the "up" time series). In that case, all sample values are determined
+// by the 1st value, and no further value encoding is happening at all. The
+// payload consists entirely of timestamps.
+//
+// 1: Integer double-delta encoding.
+//
+// Many Prometheus metrics are integer counters and change in a quite regular
+// fashion, similar to timestamps. Thus, the same double-delta encoding can be
+// applied. This encoding works like the timestamp encoding described above, but
+// with different bit buckets and without counting of repeated ΔΔv=0. The case
+// of ΔΔv=0 is represented by a single '0' bit for each occurrence. The first Δv
+// is saved as an int32 at bit position 288. The most recent Δv is saved as an
+// int32 at the end of the chunk (see above). If Δv cannot be represented as a
+// 32 bit signed integer, no integer double-delta encoding can be applied.
+//
+// Bit buckets (lead-in bytes followed by (signed) value bits):
+// - '0': 0 bit
+// - '10': 6 bit
+// - '110': 13 bit
+// - '1110': 20 bit
+// - '1111': 33 bit
+// Since Δv is restricted to 32 bit, 33 bit are always enough for ΔΔv.
+//
+// 2: XOR encoding.
+//
+// This follows almost precisely the Gorilla value encoding (cf. section 4.1.2
+// of the paper). The last count of leading zeros and the last count of
+// meaningful bits in the XOR value is saved at the end of the chunk for as long
+// as the chunk is not closed yet (see above). Note, though, that the number of
+// significant bits is saved as (count-1), i.e. a saved value of 0 means 1
+// significant bit, a saved value of 1 means 2, and so on. Also, we save the
+// numbers of leading zeros and significant bits anew if they drop a
+// lot. Otherwise, you can easily be locked in with a high number of significant
+// bits.
+//
+// 3: Direct encoding.
+//
+// If the sample values are just random, it is most efficient to save sample
+// values directly as float64.
+//
+// ZIPPING TIMESTAMPS AND VALUES TOGETHER
+//
+// Usually, encoded timestamps and encoded values simply alternate. There are
+// two exceptions:
+//
+// (1) With the "zero encoding" for values, the payload only contains
+//     timestamps.
+//
+// (2) In a consecutive row of up to 128 ΔΔt=0 repeats, the count of timestamps
+//     determines how many sample values will follow directly after another.
+
+const (
+	varbitMinLength = 128
+	varbitMaxLength = 8191
+
+	// Useful byte offsets.
+	varbitFirstTimeOffset           = 0
+	varbitFirstValueOffset          = 8
+	varbitLastTimeOffset            = 16
+	varbitLastValueOffset           = 24
+	varbitFirstTimeDeltaOffset      = 32
+	varbitFlagOffset                = 35
+	varbitNextSampleBitOffsetOffset = 36
+	varbitFirstValueDeltaOffset     = 38
+	// The following are in the "footer" and only usable if the chunk is
+	// still open.
+	varbitCountOffsetBitOffset           = ChunkLen - 9
+	varbitLastTimeDeltaOffset            = ChunkLen - 7
+	varbitLastValueDeltaOffset           = ChunkLen - 4
+	varbitLastLeadingZerosCountOffset    = ChunkLen - 4
+	varbitLastSignificantBitsCountOffset = ChunkLen - 3
+
+	varbitFirstSampleBitOffset  uint16 = 0 // Symbolic, don't really read or write here.
+	varbitSecondSampleBitOffset uint16 = 1 // Symbolic, don't really read or write here.
+	// varbitThirdSampleBitOffset is a bit special. Depending on the encoding, there can
+	// be various things at this offset. It's most of the time symbolic, but in the best
+	// case (zero encoding for values), it will be the real offset for the 3rd sample.
+	varbitThirdSampleBitOffset uint16 = varbitFirstValueDeltaOffset * 8
+
+	// If the bit offset for the next sample is above this threshold, no new
+	// samples can be added to the chunk's payload (because the payload has
+	// already reached the footer). However, one more sample can be saved in
+	// the header as the last sample.
+	varbitNextSampleBitOffsetThreshold = 8 * varbitCountOffsetBitOffset
+
+	varbitMaxTimeDelta = 1 << 24 // What fits into a 3-byte timestamp.
+)
+
+type varbitValueEncoding byte
+
+const (
+	varbitZeroEncoding varbitValueEncoding = iota
+	varbitIntDoubleDeltaEncoding
+	varbitXOREncoding
+	varbitDirectEncoding
+)
+
+// varbitWorstCaseBitsPerSample provides the worst-case number of bits needed
+// per sample with the various value encodings. The counts already include the
+// up to 27 bits taken by a timestamp.
+var varbitWorstCaseBitsPerSample = map[varbitValueEncoding]int{
+	varbitZeroEncoding:           27 + 0,
+	varbitIntDoubleDeltaEncoding: 27 + 38,
+	varbitXOREncoding:            27 + 13 + 64,
+	varbitDirectEncoding:         27 + 64,
+}
+
+// varbitChunk implements the chunk interface.
+type varbitChunk []byte
+
+// newVarbitChunk returns a newly allocated varbitChunk.  For simplicity, all
+// varbit chunks must have the length as determined by the ChunkLen constant.
+func newVarbitChunk(enc varbitValueEncoding) *varbitChunk {
+	if ChunkLen < varbitMinLength || ChunkLen > varbitMaxLength {
+		panic(fmt.Errorf(
+			"invalid chunk length of %d bytes, need at least %d bytes and at most %d bytes",
+			ChunkLen, varbitMinLength, varbitMaxLength,
+		))
+	}
+	if enc > varbitDirectEncoding {
+		panic(fmt.Errorf("unknown varbit value encoding: %v", enc))
+	}
+	c := make(varbitChunk, ChunkLen)
+	c.setValueEncoding(enc)
+	return &c
+}
+
+// Add implements chunk.
+func (c *varbitChunk) Add(s model.SamplePair) ([]Chunk, error) {
+	offset := c.nextSampleOffset()
+	switch {
+	case c.closed():
+		return addToOverflowChunk(c, s)
+	case offset > varbitNextSampleBitOffsetThreshold:
+		return c.addLastSample(s), nil
+	case offset == varbitFirstSampleBitOffset:
+		return c.addFirstSample(s), nil
+	case offset == varbitSecondSampleBitOffset:
+		return c.addSecondSample(s)
+	}
+	return c.addLaterSample(s, offset)
+}
+
+// Clone implements chunk.
+func (c varbitChunk) Clone() Chunk {
+	clone := make(varbitChunk, len(c))
+	copy(clone, c)
+	return &clone
+}
+
+// NewIterator implements chunk.
+func (c varbitChunk) NewIterator() Iterator {
+	return newVarbitChunkIterator(c)
+}
+
+// Marshal implements chunk.
+func (c varbitChunk) Marshal(w io.Writer) error {
+	n, err := w.Write(c)
+	if err != nil {
+		return err
+	}
+	if n != cap(c) {
+		return fmt.Errorf("wanted to write %d bytes, wrote %d", cap(c), n)
+	}
+	return nil
+}
+
+// MarshalToBuf implements chunk.
+func (c varbitChunk) MarshalToBuf(buf []byte) error {
+	n := copy(buf, c)
+	if n != len(c) {
+		return fmt.Errorf("wanted to copy %d bytes to buffer, copied %d", len(c), n)
+	}
+	return nil
+}
+
+// Unmarshal implements chunk.
+func (c varbitChunk) Unmarshal(r io.Reader) error {
+	_, err := io.ReadFull(r, c)
+	return err
+}
+
+// UnmarshalFromBuf implements chunk.
+func (c varbitChunk) UnmarshalFromBuf(buf []byte) error {
+	if copied := copy(c, buf); copied != cap(c) {
+		return fmt.Errorf("insufficient bytes copied from buffer during unmarshaling, want %d, got %d", cap(c), copied)
+	}
+	return nil
+}
+
+// Encoding implements chunk.
+func (c varbitChunk) Encoding() Encoding { return Varbit }
+
+// Utilization implements chunk.
+func (c varbitChunk) Utilization() float64 {
+	// 15 bytes is the length of the chunk footer.
+	return math.Min(float64(c.nextSampleOffset()/8+15)/float64(cap(c)), 1)
+}
+
+// Len implements chunk.  Runs in O(n).
+func (c varbitChunk) Len() int {
+	it := c.NewIterator()
+	i := 0
+	for ; it.Scan(); i++ {
+	}
+	return i
+}
+
+// FirstTime implements chunk.
+func (c varbitChunk) FirstTime() model.Time {
+	return model.Time(
+		binary.BigEndian.Uint64(
+			c[varbitFirstTimeOffset:],
+		),
+	)
+}
+
+func (c varbitChunk) firstValue() model.SampleValue {
+	return model.SampleValue(
+		math.Float64frombits(
+			binary.BigEndian.Uint64(
+				c[varbitFirstValueOffset:],
+			),
+		),
+	)
+}
+
+func (c varbitChunk) lastTime() model.Time {
+	return model.Time(
+		binary.BigEndian.Uint64(
+			c[varbitLastTimeOffset:],
+		),
+	)
+}
+
+func (c varbitChunk) lastValue() model.SampleValue {
+	return model.SampleValue(
+		math.Float64frombits(
+			binary.BigEndian.Uint64(
+				c[varbitLastValueOffset:],
+			),
+		),
+	)
+}
+
+func (c varbitChunk) firstTimeDelta() model.Time {
+	// Only the first 3 bytes are actually the timestamp, so get rid of the
+	// last one by bitshifting.
+	return model.Time(c[varbitFirstTimeDeltaOffset+2]) |
+		model.Time(c[varbitFirstTimeDeltaOffset+1])<<8 |
+		model.Time(c[varbitFirstTimeDeltaOffset])<<16
+}
+
+// firstValueDelta returns an undefined result if the encoding type is not 1.
+func (c varbitChunk) firstValueDelta() int32 {
+	return int32(binary.BigEndian.Uint32(c[varbitFirstValueDeltaOffset:]))
+}
+
+// lastTimeDelta returns an undefined result if the chunk is closed already.
+func (c varbitChunk) lastTimeDelta() model.Time {
+	return model.Time(c[varbitLastTimeDeltaOffset+2]) |
+		model.Time(c[varbitLastTimeDeltaOffset+1])<<8 |
+		model.Time(c[varbitLastTimeDeltaOffset])<<16
+}
+
+// setLastTimeDelta must not be called if the chunk is closed already. It most
+// not be called with a time that doesn't fit into 24bit, either.
+func (c varbitChunk) setLastTimeDelta(dT model.Time) {
+	if dT > varbitMaxTimeDelta {
+		panic("Δt overflows 24 bit")
+	}
+	c[varbitLastTimeDeltaOffset] = byte(dT >> 16)
+	c[varbitLastTimeDeltaOffset+1] = byte(dT >> 8)
+	c[varbitLastTimeDeltaOffset+2] = byte(dT)
+}
+
+// lastValueDelta returns an undefined result if the chunk is closed already.
+func (c varbitChunk) lastValueDelta() int32 {
+	return int32(binary.BigEndian.Uint32(c[varbitLastValueDeltaOffset:]))
+}
+
+// setLastValueDelta must not be called if the chunk is closed already.
+func (c varbitChunk) setLastValueDelta(dV int32) {
+	binary.BigEndian.PutUint32(c[varbitLastValueDeltaOffset:], uint32(dV))
+}
+
+func (c varbitChunk) nextSampleOffset() uint16 {
+	return binary.BigEndian.Uint16(c[varbitNextSampleBitOffsetOffset:])
+}
+
+func (c varbitChunk) setNextSampleOffset(offset uint16) {
+	binary.BigEndian.PutUint16(c[varbitNextSampleBitOffsetOffset:], offset)
+}
+
+func (c varbitChunk) valueEncoding() varbitValueEncoding {
+	return varbitValueEncoding(c[varbitFlagOffset] & 0x03)
+}
+
+func (c varbitChunk) setValueEncoding(enc varbitValueEncoding) {
+	if enc > varbitDirectEncoding {
+		panic("invalid varbit value encoding")
+	}
+	c[varbitFlagOffset] &^= 0x03     // Clear.
+	c[varbitFlagOffset] |= byte(enc) // Set.
+}
+
+func (c varbitChunk) closed() bool {
+	return c[varbitFlagOffset] > 0x7F // Most significant bit set.
+}
+
+func (c varbitChunk) zeroDDTRepeats() (repeats uint64, offset uint16) {
+	offset = binary.BigEndian.Uint16(c[varbitCountOffsetBitOffset:])
+	if offset == 0 {
+		return 0, 0
+	}
+	return c.readBitPattern(offset, 7) + 1, offset
+}
+
+func (c varbitChunk) setZeroDDTRepeats(repeats uint64, offset uint16) {
+	switch repeats {
+	case 0:
+		// Just clear the offset.
+		binary.BigEndian.PutUint16(c[varbitCountOffsetBitOffset:], 0)
+		return
+	case 1:
+		// First time we set a repeat here, so set the offset. But only
+		// if we haven't reached the footer yet. (If that's the case, we
+		// would overwrite ourselves below, and we don't need the offset
+		// later anyway because no more samples will be added to this
+		// chunk.)
+		if offset+7 <= varbitNextSampleBitOffsetThreshold {
+			binary.BigEndian.PutUint16(c[varbitCountOffsetBitOffset:], offset)
+		}
+	default:
+		// For a change, we are writing somewhere where we have written
+		// before. We need to clear the bits first.
+		posIn1stByte := offset % 8
+		c[offset/8] &^= bitMask[7][posIn1stByte]
+		if posIn1stByte > 1 {
+			c[offset/8+1] &^= bitMask[posIn1stByte-1][0]
+		}
+	}
+	c.addBitPattern(offset, repeats-1, 7)
+}
+
+func (c varbitChunk) setLastSample(s model.SamplePair) {
+	binary.BigEndian.PutUint64(
+		c[varbitLastTimeOffset:],
+		uint64(s.Timestamp),
+	)
+	binary.BigEndian.PutUint64(
+		c[varbitLastValueOffset:],
+		math.Float64bits(float64(s.Value)),
+	)
+}
+
+// addFirstSample is a helper method only used by c.add(). It adds timestamp and
+// value as base time and value.
+func (c *varbitChunk) addFirstSample(s model.SamplePair) []Chunk {
+	binary.BigEndian.PutUint64(
+		(*c)[varbitFirstTimeOffset:],
+		uint64(s.Timestamp),
+	)
+	binary.BigEndian.PutUint64(
+		(*c)[varbitFirstValueOffset:],
+		math.Float64bits(float64(s.Value)),
+	)
+	c.setLastSample(s) // To simplify handling of single-sample chunks.
+	c.setNextSampleOffset(varbitSecondSampleBitOffset)
+	return []Chunk{c}
+}
+
+// addSecondSample is a helper method only used by c.add(). It calculates the
+// first time delta from the provided sample and adds it to the chunk together
+// with the provided sample as the last sample.
+func (c *varbitChunk) addSecondSample(s model.SamplePair) ([]Chunk, error) {
+	firstTimeDelta := s.Timestamp - c.FirstTime()
+	if firstTimeDelta < 0 {
+		return nil, fmt.Errorf("first Δt is less than zero: %v", firstTimeDelta)
+	}
+	if firstTimeDelta > varbitMaxTimeDelta {
+		// A time delta too great. Still, we can add it as a last sample
+		// before overflowing.
+		return c.addLastSample(s), nil
+	}
+	(*c)[varbitFirstTimeDeltaOffset] = byte(firstTimeDelta >> 16)
+	(*c)[varbitFirstTimeDeltaOffset+1] = byte(firstTimeDelta >> 8)
+	(*c)[varbitFirstTimeDeltaOffset+2] = byte(firstTimeDelta)
+
+	// Also set firstTimeDelta as the last time delta to be able to use the
+	// normal methods for adding later samples.
+	c.setLastTimeDelta(firstTimeDelta)
+
+	c.setLastSample(s)
+	c.setNextSampleOffset(varbitThirdSampleBitOffset)
+	return []Chunk{c}, nil
+}
+
+// addLastSample is a helper method only used by c.add() and in other helper
+// methods called by c.add(). It simply sets the given sample as the last sample
+// in the heador and declares the chunk closed. In other words, addLastSample
+// adds the very last sample added to this chunk ever, while setLastSample sets
+// the sample most recently added to the chunk so that it can be used for the
+// calculations required to add the next sample.
+func (c *varbitChunk) addLastSample(s model.SamplePair) []Chunk {
+	c.setLastSample(s)
+	(*c)[varbitFlagOffset] |= 0x80
+	return []Chunk{c}
+}
+
+// addLaterSample is a helper method only used by c.add(). It adds a third or
+// later sample.
+func (c *varbitChunk) addLaterSample(s model.SamplePair, offset uint16) ([]Chunk, error) {
+	var (
+		lastTime      = c.lastTime()
+		lastTimeDelta = c.lastTimeDelta()
+		newTimeDelta  = s.Timestamp - lastTime
+		lastValue     = c.lastValue()
+		encoding      = c.valueEncoding()
+	)
+
+	if newTimeDelta < 0 {
+		return nil, fmt.Errorf("Δt is less than zero: %v", newTimeDelta)
+	}
+	if offset == varbitThirdSampleBitOffset {
+		offset, encoding = c.prepForThirdSample(lastValue, s.Value, encoding)
+	}
+	if newTimeDelta > varbitMaxTimeDelta {
+		// A time delta too great. Still, we can add it as a last sample
+		// before overflowing.
+		return c.addLastSample(s), nil
+	}
+
+	// Analyze worst case, does it fit? If not, set new sample as the last.
+	if int(offset)+varbitWorstCaseBitsPerSample[encoding] > ChunkLen*8 {
+		return c.addLastSample(s), nil
+	}
+
+	// Transcoding/overflow decisions first.
+	if encoding == varbitZeroEncoding && s.Value != lastValue {
+		// Cannot go on with zero encoding.
+		if offset > ChunkLen*4 {
+			// Chunk already half full. Don't transcode, overflow instead.
+			return addToOverflowChunk(c, s)
+		}
+		if isInt32(s.Value - lastValue) {
+			// Trying int encoding looks promising.
+			return transcodeAndAdd(newVarbitChunk(varbitIntDoubleDeltaEncoding), c, s)
+		}
+		return transcodeAndAdd(newVarbitChunk(varbitXOREncoding), c, s)
+	}
+	if encoding == varbitIntDoubleDeltaEncoding && !isInt32(s.Value-lastValue) {
+		// Cannot go on with int encoding.
+		if offset > ChunkLen*4 {
+			// Chunk already half full. Don't transcode, overflow instead.
+			return addToOverflowChunk(c, s)
+		}
+		return transcodeAndAdd(newVarbitChunk(varbitXOREncoding), c, s)
+	}
+
+	offset, overflow := c.addDDTime(offset, lastTimeDelta, newTimeDelta)
+	if overflow {
+		return c.addLastSample(s), nil
+	}
+	switch encoding {
+	case varbitZeroEncoding:
+		// Nothing to do.
+	case varbitIntDoubleDeltaEncoding:
+		offset = c.addDDValue(offset, lastValue, s.Value)
+	case varbitXOREncoding:
+		offset = c.addXORValue(offset, lastValue, s.Value)
+	case varbitDirectEncoding:
+		offset = c.addBitPattern(offset, math.Float64bits(float64(s.Value)), 64)
+	default:
+		return nil, fmt.Errorf("unknown Varbit value encoding: %v", encoding)
+	}
+
+	c.setNextSampleOffset(offset)
+	c.setLastSample(s)
+	return []Chunk{c}, nil
+}
+
+func (c varbitChunk) prepForThirdSample(
+	lastValue, newValue model.SampleValue, encoding varbitValueEncoding,
+) (uint16, varbitValueEncoding) {
+	var (
+		offset                   = varbitThirdSampleBitOffset
+		firstValue               = c.firstValue()
+		firstValueDelta          = lastValue - firstValue
+		firstXOR                 = math.Float64bits(float64(firstValue)) ^ math.Float64bits(float64(lastValue))
+		_, firstSignificantBits  = countBits(firstXOR)
+		secondXOR                = math.Float64bits(float64(lastValue)) ^ math.Float64bits(float64(newValue))
+		_, secondSignificantBits = countBits(secondXOR)
+	)
+	// Now pick an initial encoding and prepare things accordingly.
+	// However, never pick an encoding "below" the one initially set.
+	switch {
+	case encoding == varbitZeroEncoding && lastValue == firstValue && lastValue == newValue:
+		// Stay at zero encoding.
+		// No value to be set.
+		// No offset change required.
+	case encoding <= varbitIntDoubleDeltaEncoding && isInt32(firstValueDelta):
+		encoding = varbitIntDoubleDeltaEncoding
+		binary.BigEndian.PutUint32(
+			c[varbitFirstValueDeltaOffset:],
+			uint32(int32(firstValueDelta)),
+		)
+		c.setLastValueDelta(int32(firstValueDelta))
+		offset += 32
+	case encoding == varbitDirectEncoding || firstSignificantBits+secondSignificantBits > 100:
+		// Heuristics based on three samples only is a bit weak,
+		// but if we need 50+13 = 63 bits per sample already
+		// now, we might be better off going for direct encoding.
+		encoding = varbitDirectEncoding
+		// Put bit pattern directly where otherwise the delta would have gone.
+		binary.BigEndian.PutUint64(
+			c[varbitFirstValueDeltaOffset:],
+			math.Float64bits(float64(lastValue)),
+		)
+		offset += 64
+	default:
+		encoding = varbitXOREncoding
+		offset = c.addXORValue(offset, firstValue, lastValue)
+	}
+	c.setValueEncoding(encoding)
+	c.setNextSampleOffset(offset)
+	return offset, encoding
+}
+
+// addDDTime requires that lastTimeDelta and newTimeDelta are positive and don't overflow 24bit.
+func (c varbitChunk) addDDTime(offset uint16, lastTimeDelta, newTimeDelta model.Time) (newOffset uint16, overflow bool) {
+	timeDD := newTimeDelta - lastTimeDelta
+
+	if !isSignedIntN(int64(timeDD), 23) {
+		return offset, true
+	}
+
+	c.setLastTimeDelta(newTimeDelta)
+	repeats, repeatsOffset := c.zeroDDTRepeats()
+
+	if timeDD == 0 {
+		if repeats == 0 || repeats == 128 {
+			// First zeroDDT, or counter full, prepare new counter.
+			offset = c.addZeroBit(offset)
+			repeatsOffset = offset
+			offset += 7
+			repeats = 0
+		}
+		c.setZeroDDTRepeats(repeats+1, repeatsOffset)
+		return offset, false
+	}
+
+	// No zero repeat. If we had any before, clear the DDT offset.
+	c.setZeroDDTRepeats(0, repeatsOffset)
+
+	switch {
+	case isSignedIntN(int64(timeDD), 6):
+		offset = c.addOneBitsWithTrailingZero(offset, 1)
+		offset = c.addSignedInt(offset, int64(timeDD), 6)
+	case isSignedIntN(int64(timeDD), 17):
+		offset = c.addOneBitsWithTrailingZero(offset, 2)
+		offset = c.addSignedInt(offset, int64(timeDD), 17)
+	case isSignedIntN(int64(timeDD), 23):
+		offset = c.addOneBits(offset, 3)
+		offset = c.addSignedInt(offset, int64(timeDD), 23)
+	default:
+		panic("unexpected required bits for ΔΔt")
+	}
+	return offset, false
+}
+
+// addDDValue requires that newValue-lastValue can be represented with an int32.
+func (c varbitChunk) addDDValue(offset uint16, lastValue, newValue model.SampleValue) uint16 {
+	newValueDelta := int64(newValue - lastValue)
+	lastValueDelta := c.lastValueDelta()
+	valueDD := newValueDelta - int64(lastValueDelta)
+	c.setLastValueDelta(int32(newValueDelta))
+
+	switch {
+	case valueDD == 0:
+		return c.addZeroBit(offset)
+	case isSignedIntN(valueDD, 6):
+		offset = c.addOneBitsWithTrailingZero(offset, 1)
+		return c.addSignedInt(offset, valueDD, 6)
+	case isSignedIntN(valueDD, 13):
+		offset = c.addOneBitsWithTrailingZero(offset, 2)
+		return c.addSignedInt(offset, valueDD, 13)
+	case isSignedIntN(valueDD, 20):
+		offset = c.addOneBitsWithTrailingZero(offset, 3)
+		return c.addSignedInt(offset, valueDD, 20)
+	case isSignedIntN(valueDD, 33):
+		offset = c.addOneBits(offset, 4)
+		return c.addSignedInt(offset, valueDD, 33)
+	default:
+		panic("unexpected required bits for ΔΔv")
+	}
+}
+
+func (c varbitChunk) addXORValue(offset uint16, lastValue, newValue model.SampleValue) uint16 {
+	lastPattern := math.Float64bits(float64(lastValue))
+	newPattern := math.Float64bits(float64(newValue))
+	xor := lastPattern ^ newPattern
+	if xor == 0 {
+		return c.addZeroBit(offset)
+	}
+
+	lastLeadingBits := c[varbitLastLeadingZerosCountOffset]
+	lastSignificantBits := c[varbitLastSignificantBitsCountOffset]
+	newLeadingBits, newSignificantBits := countBits(xor)
+
+	// Short entry if the new significant bits fit into the same box as the
+	// last significant bits.  However, should the new significant bits be
+	// shorter by 10 or more, go for a long entry instead, as we will
+	// probably save more (11 bit one-time overhead, potentially more to
+	// save later).
+	if newLeadingBits >= lastLeadingBits &&
+		newLeadingBits+newSignificantBits <= lastLeadingBits+lastSignificantBits &&
+		lastSignificantBits-newSignificantBits < 10 {
+		offset = c.addOneBitsWithTrailingZero(offset, 1)
+		return c.addBitPattern(
+			offset,
+			xor>>(64-lastLeadingBits-lastSignificantBits),
+			uint16(lastSignificantBits),
+		)
+	}
+
+	// Long entry.
+	c[varbitLastLeadingZerosCountOffset] = newLeadingBits
+	c[varbitLastSignificantBitsCountOffset] = newSignificantBits
+	offset = c.addOneBits(offset, 2)
+	offset = c.addBitPattern(offset, uint64(newLeadingBits), 5)
+	offset = c.addBitPattern(offset, uint64(newSignificantBits-1), 6) // Note -1!
+	return c.addBitPattern(
+		offset,
+		xor>>(64-newLeadingBits-newSignificantBits),
+		uint16(newSignificantBits),
+	)
+}
+
+func (c varbitChunk) addZeroBit(offset uint16) uint16 {
+	if offset < varbitNextSampleBitOffsetThreshold {
+		// Writing a zero to a never touched area is a no-op.
+		// Just increase the offset.
+		return offset + 1
+	}
+	newByte := c[offset/8] &^ bitMask[1][offset%8]
+	c[offset/8] = newByte
+	// TODO(beorn7): The two lines above could be written as
+	//     c[offset/8] &^= bitMask[1][offset%8]
+	// However, that tickles a compiler bug with GOARCH=386.
+	// See https://github.com/prometheus/prometheus/issues/1509
+	return offset + 1
+}
+
+func (c varbitChunk) addOneBits(offset uint16, n uint16) uint16 {
+	if n > 7 {
+		panic("unexpected number of control bits")
+	}
+	b := 8 - offset%8
+	if b > n {
+		b = n
+	}
+	c[offset/8] |= bitMask[b][offset%8]
+	offset += b
+	b = n - b
+	if b > 0 {
+		c[offset/8] |= bitMask[b][0]
+		offset += b
+	}
+	return offset
+}
+func (c varbitChunk) addOneBitsWithTrailingZero(offset uint16, n uint16) uint16 {
+	offset = c.addOneBits(offset, n)
+	return c.addZeroBit(offset)
+}
+
+// addSignedInt adds i as a signed integer with n bits. It requires i to be
+// representable as such. (Check with isSignedIntN first.)
+func (c varbitChunk) addSignedInt(offset uint16, i int64, n uint16) uint16 {
+	if i < 0 && n < 64 {
+		i += 1 << n
+	}
+	return c.addBitPattern(offset, uint64(i), n)
+}
+
+// addBitPattern adds the last n bits of the given pattern. Other bits in the
+// pattern must be 0.
+func (c varbitChunk) addBitPattern(offset uint16, pattern uint64, n uint16) uint16 {
+	var (
+		byteOffset  = offset / 8
+		bitsToWrite = 8 - offset%8
+		newOffset   = offset + n
+	)
+
+	// Clean up the parts of the footer we will write into. (But not more as
+	// we are still using the value related part of the footer when we have
+	// already overwritten timestamp related parts.)
+	if newOffset > varbitNextSampleBitOffsetThreshold {
+		pos := offset
+		if pos < varbitNextSampleBitOffsetThreshold {
+			pos = varbitNextSampleBitOffsetThreshold
+		}
+		for pos < newOffset {
+			posInByte := pos % 8
+			bitsToClear := newOffset - pos
+			if bitsToClear > 8-posInByte {
+				bitsToClear = 8 - posInByte
+			}
+			c[pos/8] &^= bitMask[bitsToClear][posInByte]
+			pos += bitsToClear
+		}
+	}
+
+	for n > 0 {
+		if n <= bitsToWrite {
+			c[byteOffset] |= byte(pattern << (bitsToWrite - n))
+			break
+		}
+		c[byteOffset] |= byte(pattern >> (n - bitsToWrite))
+		n -= bitsToWrite
+		bitsToWrite = 8
+		byteOffset++
+	}
+	return newOffset
+}
+
+// readBitPattern reads n bits at the given offset and returns them as the last
+// n bits in a uint64.
+func (c varbitChunk) readBitPattern(offset, n uint16) uint64 {
+	var (
+		result                   uint64
+		byteOffset               = offset / 8
+		bitOffset                = offset % 8
+		trailingBits, bitsToRead uint16
+	)
+
+	for n > 0 {
+		trailingBits = 0
+		bitsToRead = 8 - bitOffset
+		if bitsToRead > n {
+			trailingBits = bitsToRead - n
+			bitsToRead = n
+		}
+		result <<= bitsToRead
+		result |= uint64(
+			(c[byteOffset] & bitMask[bitsToRead][bitOffset]) >> trailingBits,
+		)
+		n -= bitsToRead
+		byteOffset++
+		bitOffset = 0
+	}
+	return result
+}
+
+type varbitChunkIterator struct {
+	c varbitChunk
+	// pos is the bit position within the chunk for the next sample to be
+	// decoded when scan() is called (i.e. it is _not_ the bit position of
+	// the sample currently returned by value()). The symbolic values
+	// varbitFirstSampleBitOffset and varbitSecondSampleBitOffset are also
+	// used for pos. len is the offset of the first bit in the chunk that is
+	// not part of the payload. If pos==len, then the iterator is positioned
+	// behind the last sample in the payload. However, the next call of
+	// scan() still has to check if the chunk is closed, in which case there
+	// is one more sample, saved in the header. To mark the iterator as
+	// having scanned that last sample, too, pos is set to len+1.
+	pos, len             uint16
+	t, dT                model.Time
+	repeats              byte // Repeats of ΔΔt=0.
+	v                    model.SampleValue
+	dV                   int64 // Only used for int value encoding.
+	leading, significant uint16
+	enc                  varbitValueEncoding
+	lastError            error
+	rewound              bool
+	nextT                model.Time        // Only for rewound state.
+	nextV                model.SampleValue // Only for rewound state.
+}
+
+func newVarbitChunkIterator(c varbitChunk) *varbitChunkIterator {
+	return &varbitChunkIterator{
+		c:           c,
+		len:         c.nextSampleOffset(),
+		t:           model.Earliest,
+		enc:         c.valueEncoding(),
+		significant: 1,
+	}
+}
+
+// lastTimestamp implements Iterator.
+func (it *varbitChunkIterator) LastTimestamp() (model.Time, error) {
+	if it.len == varbitFirstSampleBitOffset {
+		// No samples in the chunk yet.
+		return model.Earliest, it.lastError
+	}
+	return it.c.lastTime(), it.lastError
+}
+
+// contains implements Iterator.
+func (it *varbitChunkIterator) Contains(t model.Time) (bool, error) {
+	last, err := it.LastTimestamp()
+	if err != nil {
+		it.lastError = err
+		return false, err
+	}
+	return !t.Before(it.c.FirstTime()) &&
+		!t.After(last), it.lastError
+}
+
+// scan implements Iterator.
+func (it *varbitChunkIterator) Scan() bool {
+	if it.lastError != nil {
+		return false
+	}
+	if it.rewound {
+		it.t = it.nextT
+		it.v = it.nextV
+		it.rewound = false
+		return true
+	}
+	if it.pos > it.len {
+		return false
+	}
+	if it.pos == it.len && it.repeats == 0 {
+		it.pos = it.len + 1
+		if !it.c.closed() {
+			return false
+		}
+		it.t = it.c.lastTime()
+		it.v = it.c.lastValue()
+		return it.lastError == nil
+	}
+	if it.pos == varbitFirstSampleBitOffset {
+		it.t = it.c.FirstTime()
+		it.v = it.c.firstValue()
+		it.pos = varbitSecondSampleBitOffset
+		return it.lastError == nil
+	}
+	if it.pos == varbitSecondSampleBitOffset {
+		if it.len == varbitThirdSampleBitOffset && !it.c.closed() {
+			// Special case: Chunk has only two samples.
+			it.t = it.c.lastTime()
+			it.v = it.c.lastValue()
+			it.pos = it.len + 1
+			return it.lastError == nil
+		}
+		it.dT = it.c.firstTimeDelta()
+		it.t += it.dT
+		// Value depends on encoding.
+		switch it.enc {
+		case varbitZeroEncoding:
+			it.pos = varbitThirdSampleBitOffset
+		case varbitIntDoubleDeltaEncoding:
+			it.dV = int64(it.c.firstValueDelta())
+			it.v += model.SampleValue(it.dV)
+			it.pos = varbitThirdSampleBitOffset + 32
+		case varbitXOREncoding:
+			it.pos = varbitThirdSampleBitOffset
+			it.readXOR()
+		case varbitDirectEncoding:
+			it.v = model.SampleValue(math.Float64frombits(
+				binary.BigEndian.Uint64(it.c[varbitThirdSampleBitOffset/8:]),
+			))
+			it.pos = varbitThirdSampleBitOffset + 64
+		default:
+			it.lastError = fmt.Errorf("unknown varbit value encoding: %v", it.enc)
+		}
+		return it.lastError == nil
+	}
+	// 3rd sample or later does not have special cases anymore.
+	it.readDDT()
+	switch it.enc {
+	case varbitZeroEncoding:
+		// Do nothing.
+	case varbitIntDoubleDeltaEncoding:
+		it.readDDV()
+	case varbitXOREncoding:
+		it.readXOR()
+	case varbitDirectEncoding:
+		it.v = model.SampleValue(math.Float64frombits(it.readBitPattern(64)))
+		return it.lastError == nil
+	default:
+		it.lastError = fmt.Errorf("unknown varbit value encoding: %v", it.enc)
+		return false
+	}
+	return it.lastError == nil
+}
+
+// findAtOrBefore implements Iterator.
+func (it *varbitChunkIterator) FindAtOrBefore(t model.Time) bool {
+	if it.len == 0 || t.Before(it.c.FirstTime()) {
+		return false
+	}
+	last := it.c.lastTime()
+	if !t.Before(last) {
+		it.t = last
+		it.v = it.c.lastValue()
+		it.pos = it.len + 1
+		return true
+	}
+	if t == it.t {
+		return it.lastError == nil
+	}
+	if t.Before(it.t) || it.rewound {
+		it.reset()
+	}
+
+	var (
+		prevT = model.Earliest
+		prevV model.SampleValue
+	)
+	for it.Scan() && !t.Before(it.t) {
+		prevT = it.t
+		prevV = it.v
+		// TODO(beorn7): If we are in a repeat, we could iterate forward
+		// much faster.
+	}
+	if t == it.t {
+		return it.lastError == nil
+	}
+	it.rewind(prevT, prevV)
+	return it.lastError == nil
+}
+
+// findAtOrAfter implements Iterator.
+func (it *varbitChunkIterator) FindAtOrAfter(t model.Time) bool {
+	if it.len == 0 || t.After(it.c.lastTime()) {
+		return false
+	}
+	first := it.c.FirstTime()
+	if !t.After(first) {
+		it.reset()
+		return it.Scan()
+	}
+	if t == it.t {
+		return it.lastError == nil
+	}
+	if t.Before(it.t) {
+		it.reset()
+	}
+	for it.Scan() && t.After(it.t) {
+		// TODO(beorn7): If we are in a repeat, we could iterate forward
+		// much faster.
+	}
+	return it.lastError == nil
+}
+
+// value implements Iterator.
+func (it *varbitChunkIterator) Value() model.SamplePair {
+	return model.SamplePair{
+		Timestamp: it.t,
+		Value:     it.v,
+	}
+}
+
+// err implements Iterator.
+func (it *varbitChunkIterator) Err() error {
+	return it.lastError
+}
+
+func (it *varbitChunkIterator) readDDT() {
+	if it.repeats > 0 {
+		it.repeats--
+	} else {
+		switch it.readControlBits(3) {
+		case 0:
+			it.repeats = byte(it.readBitPattern(7))
+		case 1:
+			it.dT += model.Time(it.readSignedInt(6))
+		case 2:
+			it.dT += model.Time(it.readSignedInt(17))
+		case 3:
+			it.dT += model.Time(it.readSignedInt(23))
+		default:
+			panic("unexpected number of control bits")
+		}
+	}
+	it.t += it.dT
+}
+
+func (it *varbitChunkIterator) readDDV() {
+	switch it.readControlBits(4) {
+	case 0:
+		// Do nothing.
+	case 1:
+		it.dV += it.readSignedInt(6)
+	case 2:
+		it.dV += it.readSignedInt(13)
+	case 3:
+		it.dV += it.readSignedInt(20)
+	case 4:
+		it.dV += it.readSignedInt(33)
+	default:
+		panic("unexpected number of control bits")
+	}
+	it.v += model.SampleValue(it.dV)
+}
+
+func (it *varbitChunkIterator) readXOR() {
+	switch it.readControlBits(2) {
+	case 0:
+		return
+	case 1:
+		// Do nothing right now. All done below.
+	case 2:
+		it.leading = uint16(it.readBitPattern(5))
+		it.significant = uint16(it.readBitPattern(6)) + 1
+	default:
+		panic("unexpected number of control bits")
+	}
+	pattern := math.Float64bits(float64(it.v))
+	pattern ^= it.readBitPattern(it.significant) << (64 - it.significant - it.leading)
+	it.v = model.SampleValue(math.Float64frombits(pattern))
+}
+
+// readControlBits reads successive 1-bits and stops after reading the first
+// 0-bit. It also stops once it has read max bits. It returns the number of read
+// 1-bits.
+func (it *varbitChunkIterator) readControlBits(max uint16) uint16 {
+	var count uint16
+	for count < max && int(it.pos/8) < len(it.c) {
+		b := it.c[it.pos/8] & bitMask[1][it.pos%8]
+		it.pos++
+		if b == 0 {
+			return count
+		}
+		count++
+	}
+	if int(it.pos/8) >= len(it.c) {
+		it.lastError = errChunkBoundsExceeded
+	}
+	return count
+}
+
+func (it *varbitChunkIterator) readBitPattern(n uint16) uint64 {
+	if len(it.c)*8 < int(it.pos)+int(n) {
+		it.lastError = errChunkBoundsExceeded
+		return 0
+	}
+	u := it.c.readBitPattern(it.pos, n)
+	it.pos += n
+	return u
+}
+
+func (it *varbitChunkIterator) readSignedInt(n uint16) int64 {
+	u := it.readBitPattern(n)
+	if n < 64 && u >= 1<<(n-1) {
+		u -= 1 << n
+	}
+	return int64(u)
+}
+
+// reset puts the chunk iterator into the state it had upon creation.
+func (it *varbitChunkIterator) reset() {
+	it.pos = 0
+	it.t = model.Earliest
+	it.dT = 0
+	it.repeats = 0
+	it.v = 0
+	it.dV = 0
+	it.leading = 0
+	it.significant = 1
+	it.rewound = false
+}
+
+// rewind "rewinds" the chunk iterator by one step. Since one cannot simply
+// rewind a Varbit chunk, the old values have to be provided by the
+// caller. Rewinding an already rewound chunk panics. After a call of scan or
+// reset, a chunk can be rewound again.
+func (it *varbitChunkIterator) rewind(t model.Time, v model.SampleValue) {
+	if it.rewound {
+		panic("cannot rewind varbit chunk twice")
+	}
+	it.rewound = true
+	it.nextT = it.t
+	it.nextV = it.v
+	it.t = t
+	it.v = v
+}
diff --git a/vendor/github.com/prometheus/prometheus/storage/local/chunk/varbit_helpers.go b/vendor/github.com/prometheus/prometheus/storage/local/chunk/varbit_helpers.go
new file mode 100644
index 000000000..cc637a992
--- /dev/null
+++ b/vendor/github.com/prometheus/prometheus/storage/local/chunk/varbit_helpers.go
@@ -0,0 +1,75 @@
+// Copyright 2016 The Prometheus Authors
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package chunk
+
+import "github.com/prometheus/common/model"
+
+var (
+	// bit masks for consecutive bits in a byte at various offsets.
+	bitMask = [][]byte{
+		{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, // 0 bit
+		{0x80, 0x40, 0x20, 0x10, 0x08, 0x04, 0x02, 0x01}, // 1 bit
+		{0xC0, 0x60, 0x30, 0x18, 0x0C, 0x06, 0x03, 0x01}, // 2 bit
+		{0xE0, 0x70, 0x38, 0x1C, 0x0E, 0x07, 0x03, 0x01}, // 3 bit
+		{0xF0, 0x78, 0x3C, 0x1E, 0x0F, 0x07, 0x03, 0x01}, // 4 bit
+		{0xF8, 0x7C, 0x3E, 0x1F, 0x0F, 0x07, 0x03, 0x01}, // 5 bit
+		{0xFC, 0x7E, 0x3F, 0x1F, 0x0F, 0x07, 0x03, 0x01}, // 6 bit
+		{0xFE, 0x7F, 0x3F, 0x1F, 0x0F, 0x07, 0x03, 0x01}, // 7 bit
+		{0xFF, 0x7F, 0x3F, 0x1F, 0x0F, 0x07, 0x03, 0x01}, // 8 bit
+	}
+)
+
+// isInt32 returns true if v can be represented as an int32.
+func isInt32(v model.SampleValue) bool {
+	return model.SampleValue(int32(v)) == v
+}
+
+// countBits returs the number of leading zero bits and the number of
+// significant bits after that in the given bit pattern. The maximum number of
+// leading zeros is 31 (so that it can be represented by a 5bit number). Leading
+// zeros beyond that are considered part of the significant bits.
+func countBits(pattern uint64) (leading, significant byte) {
+	// TODO(beorn7): This would probably be faster with ugly endless switch
+	// statements.
+	if pattern == 0 {
+		return
+	}
+	for pattern < 1<<63 {
+		leading++
+		pattern <<= 1
+	}
+	for pattern > 0 {
+		significant++
+		pattern <<= 1
+	}
+	if leading > 31 { // 5 bit limit.
+		significant += leading - 31
+		leading = 31
+	}
+	return
+}
+
+// isSignedIntN returns if n can be represented as a signed int with the given
+// bit length.
+func isSignedIntN(i int64, n byte) bool {
+	upper := int64(1) << (n - 1)
+	if i >= upper {
+		return false
+	}
+	lower := upper - (1 << n)
+	if i < lower {
+		return false
+	}
+	return true
+}
diff --git a/vendor/github.com/prometheus/prometheus/storage/local/codable/codable.go b/vendor/github.com/prometheus/prometheus/storage/local/codable/codable.go
new file mode 100644
index 000000000..ebabdf456
--- /dev/null
+++ b/vendor/github.com/prometheus/prometheus/storage/local/codable/codable.go
@@ -0,0 +1,467 @@
+// Copyright 2014 The Prometheus Authors
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package codable provides types that implement encoding.BinaryMarshaler and
+// encoding.BinaryUnmarshaler and functions that help to encode and decode
+// primitives. The Prometheus storage backend uses them to persist objects to
+// files and to save objects in LevelDB.
+//
+// The encodings used in this package are designed in a way that objects can be
+// unmarshaled from a continuous byte stream, i.e. the information when to stop
+// reading is determined by the format. No separate termination information is
+// needed.
+//
+// Strings are encoded as the length of their bytes as a varint followed by
+// their bytes.
+//
+// Slices are encoded as their length as a varint followed by their elements.
+//
+// Maps are encoded as the number of mappings as a varint, followed by the
+// mappings, each of which consists of the key followed by the value.
+package codable
+
+import (
+	"bytes"
+	"encoding/binary"
+	"fmt"
+	"io"
+	"sync"
+
+	"github.com/prometheus/common/model"
+)
+
+// A byteReader is an io.ByteReader that also implements the vanilla io.Reader
+// interface.
+type byteReader interface {
+	io.Reader
+	io.ByteReader
+}
+
+// bufPool is a pool for staging buffers. Using a pool allows concurrency-safe
+// reuse of buffers
+var bufPool sync.Pool
+
+// getBuf returns a buffer from the pool. The length of the returned slice is l.
+func getBuf(l int) []byte {
+	x := bufPool.Get()
+	if x == nil {
+		return make([]byte, l)
+	}
+	buf := x.([]byte)
+	if cap(buf) < l {
+		return make([]byte, l)
+	}
+	return buf[:l]
+}
+
+// putBuf returns a buffer to the pool.
+func putBuf(buf []byte) {
+	bufPool.Put(buf)
+}
+
+// EncodeVarint encodes an int64 as a varint and writes it to an io.Writer.
+// It returns the number of bytes written.
+// This is a GC-friendly implementation that takes the required staging buffer
+// from a buffer pool.
+func EncodeVarint(w io.Writer, i int64) (int, error) {
+	buf := getBuf(binary.MaxVarintLen64)
+	defer putBuf(buf)
+
+	bytesWritten := binary.PutVarint(buf, i)
+	_, err := w.Write(buf[:bytesWritten])
+	return bytesWritten, err
+}
+
+// EncodeUvarint encodes an uint64 as a varint and writes it to an io.Writer.
+// It returns the number of bytes written.
+// This is a GC-friendly implementation that takes the required staging buffer
+// from a buffer pool.
+func EncodeUvarint(w io.Writer, i uint64) (int, error) {
+	buf := getBuf(binary.MaxVarintLen64)
+	defer putBuf(buf)
+
+	bytesWritten := binary.PutUvarint(buf, i)
+	_, err := w.Write(buf[:bytesWritten])
+	return bytesWritten, err
+}
+
+// EncodeUint64 writes an uint64 to an io.Writer in big-endian byte-order.
+// This is a GC-friendly implementation that takes the required staging buffer
+// from a buffer pool.
+func EncodeUint64(w io.Writer, u uint64) error {
+	buf := getBuf(8)
+	defer putBuf(buf)
+
+	binary.BigEndian.PutUint64(buf, u)
+	_, err := w.Write(buf)
+	return err
+}
+
+// DecodeUint64 reads an uint64 from an io.Reader in big-endian byte-order.
+// This is a GC-friendly implementation that takes the required staging buffer
+// from a buffer pool.
+func DecodeUint64(r io.Reader) (uint64, error) {
+	buf := getBuf(8)
+	defer putBuf(buf)
+
+	if _, err := io.ReadFull(r, buf); err != nil {
+		return 0, err
+	}
+	return binary.BigEndian.Uint64(buf), nil
+}
+
+// encodeString writes the varint encoded length followed by the bytes of s to
+// b.
+func encodeString(b *bytes.Buffer, s string) error {
+	// Note that this should have used EncodeUvarint but a glitch happened
+	// while designing the checkpoint format.
+	if _, err := EncodeVarint(b, int64(len(s))); err != nil {
+		return err
+	}
+	if _, err := b.WriteString(s); err != nil {
+		return err
+	}
+	return nil
+}
+
+// decodeString decodes a string encoded by encodeString.
+func decodeString(b byteReader) (string, error) {
+	length, err := binary.ReadVarint(b)
+	if length < 0 {
+		err = fmt.Errorf("found negative string length during decoding: %d", length)
+	}
+	if err != nil {
+		return "", err
+	}
+
+	buf := getBuf(int(length))
+	defer putBuf(buf)
+
+	if _, err := io.ReadFull(b, buf); err != nil {
+		return "", err
+	}
+	return string(buf), nil
+}
+
+// A Metric is a model.Metric that implements
+// encoding.BinaryMarshaler and encoding.BinaryUnmarshaler.
+type Metric model.Metric
+
+// MarshalBinary implements encoding.BinaryMarshaler.
+func (m Metric) MarshalBinary() ([]byte, error) {
+	buf := &bytes.Buffer{}
+	// Note that this should have used EncodeUvarint but a glitch happened
+	// while designing the checkpoint format.
+	if _, err := EncodeVarint(buf, int64(len(m))); err != nil {
+		return nil, err
+	}
+	for l, v := range m {
+		if err := encodeString(buf, string(l)); err != nil {
+			return nil, err
+		}
+		if err := encodeString(buf, string(v)); err != nil {
+			return nil, err
+		}
+	}
+	return buf.Bytes(), nil
+}
+
+// UnmarshalBinary implements encoding.BinaryUnmarshaler. It can be used with the
+// zero value of Metric.
+func (m *Metric) UnmarshalBinary(buf []byte) error {
+	return m.UnmarshalFromReader(bytes.NewReader(buf))
+}
+
+// UnmarshalFromReader unmarshals a Metric from a reader that implements
+// both, io.Reader and io.ByteReader. It can be used with the zero value of
+// Metric.
+func (m *Metric) UnmarshalFromReader(r byteReader) error {
+	numLabelPairs, err := binary.ReadVarint(r)
+	if numLabelPairs < 0 {
+		err = fmt.Errorf("found negative numLabelPairs during unmarshaling: %d", numLabelPairs)
+	}
+	if err != nil {
+		return err
+	}
+	*m = make(Metric, numLabelPairs)
+
+	for ; numLabelPairs > 0; numLabelPairs-- {
+		ln, err := decodeString(r)
+		if err != nil {
+			return err
+		}
+		lv, err := decodeString(r)
+		if err != nil {
+			return err
+		}
+		(*m)[model.LabelName(ln)] = model.LabelValue(lv)
+	}
+	return nil
+}
+
+// A Fingerprint is a model.Fingerprint that implements
+// encoding.BinaryMarshaler and encoding.BinaryUnmarshaler. The implementation
+// depends on model.Fingerprint to be convertible to uint64. It encodes
+// the fingerprint as a big-endian uint64.
+type Fingerprint model.Fingerprint
+
+// MarshalBinary implements encoding.BinaryMarshaler.
+func (fp Fingerprint) MarshalBinary() ([]byte, error) {
+	b := make([]byte, 8)
+	binary.BigEndian.PutUint64(b, uint64(fp))
+	return b, nil
+}
+
+// UnmarshalBinary implements encoding.BinaryUnmarshaler.
+func (fp *Fingerprint) UnmarshalBinary(buf []byte) error {
+	*fp = Fingerprint(binary.BigEndian.Uint64(buf))
+	return nil
+}
+
+// FingerprintSet is a map[model.Fingerprint]struct{} that
+// implements encoding.BinaryMarshaler and encoding.BinaryUnmarshaler. Its
+// binary form is identical to that of Fingerprints.
+type FingerprintSet map[model.Fingerprint]struct{}
+
+// MarshalBinary implements encoding.BinaryMarshaler.
+func (fps FingerprintSet) MarshalBinary() ([]byte, error) {
+	b := make([]byte, binary.MaxVarintLen64+len(fps)*8)
+	lenBytes := binary.PutVarint(b, int64(len(fps)))
+	offset := lenBytes
+
+	for fp := range fps {
+		binary.BigEndian.PutUint64(b[offset:], uint64(fp))
+		offset += 8
+	}
+	return b[:len(fps)*8+lenBytes], nil
+}
+
+// UnmarshalBinary implements encoding.BinaryUnmarshaler.
+func (fps *FingerprintSet) UnmarshalBinary(buf []byte) error {
+	numFPs, offset := binary.Varint(buf)
+	if offset <= 0 {
+		return fmt.Errorf("could not decode length of Fingerprints, varint decoding returned %d", offset)
+	}
+	*fps = make(FingerprintSet, numFPs)
+
+	for i := 0; i < int(numFPs); i++ {
+		(*fps)[model.Fingerprint(binary.BigEndian.Uint64(buf[offset+i*8:]))] = struct{}{}
+	}
+	return nil
+}
+
+// Fingerprints is a model.Fingerprints that implements
+// encoding.BinaryMarshaler and encoding.BinaryUnmarshaler. Its binary form is
+// identical to that of FingerprintSet.
+type Fingerprints model.Fingerprints
+
+// MarshalBinary implements encoding.BinaryMarshaler.
+func (fps Fingerprints) MarshalBinary() ([]byte, error) {
+	b := make([]byte, binary.MaxVarintLen64+len(fps)*8)
+	lenBytes := binary.PutVarint(b, int64(len(fps)))
+
+	for i, fp := range fps {
+		binary.BigEndian.PutUint64(b[i*8+lenBytes:], uint64(fp))
+	}
+	return b[:len(fps)*8+lenBytes], nil
+}
+
+// UnmarshalBinary implements encoding.BinaryUnmarshaler.
+func (fps *Fingerprints) UnmarshalBinary(buf []byte) error {
+	numFPs, offset := binary.Varint(buf)
+	if offset <= 0 {
+		return fmt.Errorf("could not decode length of Fingerprints, varint decoding returned %d", offset)
+	}
+	*fps = make(Fingerprints, numFPs)
+
+	for i := range *fps {
+		(*fps)[i] = model.Fingerprint(binary.BigEndian.Uint64(buf[offset+i*8:]))
+	}
+	return nil
+}
+
+// LabelPair is a model.LabelPair that implements
+// encoding.BinaryMarshaler and encoding.BinaryUnmarshaler.
+type LabelPair model.LabelPair
+
+// MarshalBinary implements encoding.BinaryMarshaler.
+func (lp LabelPair) MarshalBinary() ([]byte, error) {
+	buf := &bytes.Buffer{}
+	if err := encodeString(buf, string(lp.Name)); err != nil {
+		return nil, err
+	}
+	if err := encodeString(buf, string(lp.Value)); err != nil {
+		return nil, err
+	}
+	return buf.Bytes(), nil
+}
+
+// UnmarshalBinary implements encoding.BinaryUnmarshaler.
+func (lp *LabelPair) UnmarshalBinary(buf []byte) error {
+	r := bytes.NewReader(buf)
+	n, err := decodeString(r)
+	if err != nil {
+		return err
+	}
+	v, err := decodeString(r)
+	if err != nil {
+		return err
+	}
+	lp.Name = model.LabelName(n)
+	lp.Value = model.LabelValue(v)
+	return nil
+}
+
+// LabelName is a model.LabelName that implements
+// encoding.BinaryMarshaler and encoding.BinaryUnmarshaler.
+type LabelName model.LabelName
+
+// MarshalBinary implements encoding.BinaryMarshaler.
+func (l LabelName) MarshalBinary() ([]byte, error) {
+	buf := &bytes.Buffer{}
+	if err := encodeString(buf, string(l)); err != nil {
+		return nil, err
+	}
+	return buf.Bytes(), nil
+}
+
+// UnmarshalBinary implements encoding.BinaryUnmarshaler.
+func (l *LabelName) UnmarshalBinary(buf []byte) error {
+	r := bytes.NewReader(buf)
+	n, err := decodeString(r)
+	if err != nil {
+		return err
+	}
+	*l = LabelName(n)
+	return nil
+}
+
+// LabelValueSet is a map[model.LabelValue]struct{} that implements
+// encoding.BinaryMarshaler and encoding.BinaryUnmarshaler. Its binary form is
+// identical to that of LabelValues.
+type LabelValueSet map[model.LabelValue]struct{}
+
+// MarshalBinary implements encoding.BinaryMarshaler.
+func (vs LabelValueSet) MarshalBinary() ([]byte, error) {
+	buf := &bytes.Buffer{}
+	// Note that this should have used EncodeUvarint but a glitch happened
+	// while designing the checkpoint format.
+	if _, err := EncodeVarint(buf, int64(len(vs))); err != nil {
+		return nil, err
+	}
+	for v := range vs {
+		if err := encodeString(buf, string(v)); err != nil {
+			return nil, err
+		}
+	}
+	return buf.Bytes(), nil
+}
+
+// UnmarshalBinary implements encoding.BinaryUnmarshaler.
+func (vs *LabelValueSet) UnmarshalBinary(buf []byte) error {
+	r := bytes.NewReader(buf)
+	numValues, err := binary.ReadVarint(r)
+	if numValues < 0 {
+		err = fmt.Errorf("found negative number of values during unmarshaling: %d", numValues)
+	}
+	if err != nil {
+		return err
+	}
+	*vs = make(LabelValueSet, numValues)
+
+	for i := int64(0); i < numValues; i++ {
+		v, err := decodeString(r)
+		if err != nil {
+			return err
+		}
+		(*vs)[model.LabelValue(v)] = struct{}{}
+	}
+	return nil
+}
+
+// LabelValues is a model.LabelValues that implements
+// encoding.BinaryMarshaler and encoding.BinaryUnmarshaler. Its binary form is
+// identical to that of LabelValueSet.
+type LabelValues model.LabelValues
+
+// MarshalBinary implements encoding.BinaryMarshaler.
+func (vs LabelValues) MarshalBinary() ([]byte, error) {
+	buf := &bytes.Buffer{}
+	// Note that this should have used EncodeUvarint but a glitch happened
+	// while designing the checkpoint format.
+	if _, err := EncodeVarint(buf, int64(len(vs))); err != nil {
+		return nil, err
+	}
+	for _, v := range vs {
+		if err := encodeString(buf, string(v)); err != nil {
+			return nil, err
+		}
+	}
+	return buf.Bytes(), nil
+}
+
+// UnmarshalBinary implements encoding.BinaryUnmarshaler.
+func (vs *LabelValues) UnmarshalBinary(buf []byte) error {
+	r := bytes.NewReader(buf)
+	numValues, err := binary.ReadVarint(r)
+	if numValues < 0 {
+		err = fmt.Errorf("found negative number of values during unmarshaling: %d", numValues)
+	}
+	if err != nil {
+		return err
+	}
+	*vs = make(LabelValues, numValues)
+
+	for i := range *vs {
+		v, err := decodeString(r)
+		if err != nil {
+			return err
+		}
+		(*vs)[i] = model.LabelValue(v)
+	}
+	return nil
+}
+
+// TimeRange is used to define a time range and implements
+// encoding.BinaryMarshaler and encoding.BinaryUnmarshaler.
+type TimeRange struct {
+	First, Last model.Time
+}
+
+// MarshalBinary implements encoding.BinaryMarshaler.
+func (tr TimeRange) MarshalBinary() ([]byte, error) {
+	buf := &bytes.Buffer{}
+	if _, err := EncodeVarint(buf, int64(tr.First)); err != nil {
+		return nil, err
+	}
+	if _, err := EncodeVarint(buf, int64(tr.Last)); err != nil {
+		return nil, err
+	}
+	return buf.Bytes(), nil
+}
+
+// UnmarshalBinary implements encoding.BinaryUnmarshaler.
+func (tr *TimeRange) UnmarshalBinary(buf []byte) error {
+	r := bytes.NewReader(buf)
+	first, err := binary.ReadVarint(r)
+	if err != nil {
+		return err
+	}
+	last, err := binary.ReadVarint(r)
+	if err != nil {
+		return err
+	}
+	tr.First = model.Time(first)
+	tr.Last = model.Time(last)
+	return nil
+}
diff --git a/vendor/github.com/prometheus/prometheus/storage/local/crashrecovery.go b/vendor/github.com/prometheus/prometheus/storage/local/crashrecovery.go
new file mode 100644
index 000000000..822678b4d
--- /dev/null
+++ b/vendor/github.com/prometheus/prometheus/storage/local/crashrecovery.go
@@ -0,0 +1,559 @@
+// Copyright 2015 The Prometheus Authors
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package local
+
+import (
+	"errors"
+	"fmt"
+	"io"
+	"os"
+	"path/filepath"
+	"strings"
+	"sync/atomic"
+
+	"github.com/prometheus/common/log"
+	"github.com/prometheus/common/model"
+
+	"github.com/prometheus/prometheus/storage/local/chunk"
+	"github.com/prometheus/prometheus/storage/local/codable"
+	"github.com/prometheus/prometheus/storage/local/index"
+)
+
+// recoverFromCrash is called by loadSeriesMapAndHeads if the persistence
+// appears to be dirty after the loading (either because the loading resulted in
+// an error or because the persistence was dirty from the start). Not goroutine
+// safe. Only call before anything else is running (except index processing
+// queue as started by newPersistence).
+func (p *persistence) recoverFromCrash(fingerprintToSeries map[model.Fingerprint]*memorySeries) error {
+	// TODO(beorn): We need proper tests for the crash recovery.
+	log.Warn("Starting crash recovery. Prometheus is inoperational until complete.")
+	log.Warn("To avoid crash recovery in the future, shut down Prometheus with SIGTERM or a HTTP POST to /-/quit.")
+
+	fpsSeen := map[model.Fingerprint]struct{}{}
+	count := 0
+	seriesDirNameFmt := fmt.Sprintf("%%0%dx", seriesDirNameLen)
+
+	// Delete the fingerprint mapping file as it might be stale or
+	// corrupt. We'll rebuild the mappings as we go.
+	if err := os.RemoveAll(p.mappingsFileName()); err != nil {
+		return fmt.Errorf("couldn't remove old fingerprint mapping file %s: %s", p.mappingsFileName(), err)
+	}
+	// The mappings to rebuild.
+	fpm := fpMappings{}
+
+	log.Info("Scanning files.")
+	for i := 0; i < 1<<(seriesDirNameLen*4); i++ {
+		dirname := filepath.Join(p.basePath, fmt.Sprintf(seriesDirNameFmt, i))
+		dir, err := os.Open(dirname)
+		if os.IsNotExist(err) {
+			continue
+		}
+		if err != nil {
+			return err
+		}
+		for fis := []os.FileInfo{}; err != io.EOF; fis, err = dir.Readdir(1024) {
+			if err != nil {
+				dir.Close()
+				return err
+			}
+			for _, fi := range fis {
+				fp, ok := p.sanitizeSeries(dirname, fi, fingerprintToSeries, fpm)
+				if ok {
+					fpsSeen[fp] = struct{}{}
+				}
+				count++
+				if count%10000 == 0 {
+					log.Infof("%d files scanned.", count)
+				}
+			}
+		}
+		dir.Close()
+	}
+	log.Infof("File scan complete. %d series found.", len(fpsSeen))
+
+	log.Info("Checking for series without series file.")
+	for fp, s := range fingerprintToSeries {
+		if _, seen := fpsSeen[fp]; !seen {
+			// fp exists in fingerprintToSeries, but has no representation on disk.
+			if s.persistWatermark >= len(s.chunkDescs) {
+				// Oops, everything including the head chunk was
+				// already persisted, but nothing on disk. Or
+				// the persistWatermark is plainly wrong. Thus,
+				// we lost that series completely. Clean up the
+				// remnants.
+				delete(fingerprintToSeries, fp)
+				if err := p.purgeArchivedMetric(fp); err != nil {
+					// Purging the archived metric didn't work, so try
+					// to unindex it, just in case it's in the indexes.
+					p.unindexMetric(fp, s.metric)
+				}
+				log.Warnf("Lost series detected: fingerprint %v, metric %v.", fp, s.metric)
+				continue
+			}
+			// If we are here, the only chunks we have are the chunks in the checkpoint.
+			// Adjust things accordingly.
+			if s.persistWatermark > 0 || s.chunkDescsOffset != 0 {
+				minLostChunks := s.persistWatermark + s.chunkDescsOffset
+				if minLostChunks <= 0 {
+					log.Warnf(
+						"Possible loss of chunks for fingerprint %v, metric %v.",
+						fp, s.metric,
+					)
+				} else {
+					log.Warnf(
+						"Lost at least %d chunks for fingerprint %v, metric %v.",
+						minLostChunks, fp, s.metric,
+					)
+				}
+				s.chunkDescs = append(
+					make([]*chunk.Desc, 0, len(s.chunkDescs)-s.persistWatermark),
+					s.chunkDescs[s.persistWatermark:]...,
+				)
+				chunk.NumMemDescs.Sub(float64(s.persistWatermark))
+				s.persistWatermark = 0
+				s.chunkDescsOffset = 0
+			}
+			maybeAddMapping(fp, s.metric, fpm)
+			fpsSeen[fp] = struct{}{} // Add so that fpsSeen is complete.
+		}
+	}
+	log.Info("Check for series without series file complete.")
+
+	if err := p.cleanUpArchiveIndexes(fingerprintToSeries, fpsSeen, fpm); err != nil {
+		return err
+	}
+	if err := p.rebuildLabelIndexes(fingerprintToSeries); err != nil {
+		return err
+	}
+	// Finally rewrite the mappings file if there are any mappings.
+	if len(fpm) > 0 {
+		if err := p.checkpointFPMappings(fpm); err != nil {
+			return err
+		}
+	}
+
+	p.dirtyMtx.Lock()
+	// Only declare storage clean if it didn't become dirty during crash recovery.
+	if !p.becameDirty {
+		p.dirty = false
+	}
+	p.dirtyMtx.Unlock()
+
+	log.Warn("Crash recovery complete.")
+	return nil
+}
+
+// sanitizeSeries sanitizes a series based on its series file as defined by the
+// provided directory and FileInfo.  The method returns the fingerprint as
+// derived from the directory and file name, and whether the provided file has
+// been sanitized. A file that failed to be sanitized is moved into the
+// "orphaned" sub-directory, if possible.
+//
+// The following steps are performed:
+//
+// - A file whose name doesn't comply with the naming scheme of a series file is
+//   simply moved into the orphaned directory.
+//
+// - If the size of the series file isn't a multiple of the chunk size,
+//   extraneous bytes are truncated.  If the truncation fails, the file is
+//   moved into the orphaned directory.
+//
+// - A file that is empty (after truncation) is deleted.
+//
+// - A series that is not archived (i.e. it is in the fingerprintToSeries map)
+//   is checked for consistency of its various parameters (like persist
+//   watermark, offset of chunkDescs etc.). In particular, overlap between an
+//   in-memory head chunk with the most recent persisted chunk is
+//   checked. Inconsistencies are rectified.
+//
+// - A series that is archived (i.e. it is not in the fingerprintToSeries map)
+//   is checked for its presence in the index of archived series. If it cannot
+//   be found there, it is moved into the orphaned directory.
+func (p *persistence) sanitizeSeries(
+	dirname string, fi os.FileInfo,
+	fingerprintToSeries map[model.Fingerprint]*memorySeries,
+	fpm fpMappings,
+) (model.Fingerprint, bool) {
+	var (
+		fp       model.Fingerprint
+		err      error
+		filename = filepath.Join(dirname, fi.Name())
+		s        *memorySeries
+	)
+
+	purge := func() {
+		if fp != 0 {
+			var metric model.Metric
+			if s != nil {
+				metric = s.metric
+			}
+			if err = p.quarantineSeriesFile(
+				fp, errors.New("purge during crash recovery"), metric,
+			); err == nil {
+				return
+			}
+			log.
+				With("file", filename).
+				With("error", err).
+				Error("Failed to move lost series file to orphaned directory.")
+		}
+		// If we are here, we are either purging an incorrectly named
+		// file, or quarantining has failed. So simply delete the file.
+		if err = os.Remove(filename); err != nil {
+			log.
+				With("file", filename).
+				With("error", err).
+				Error("Failed to delete lost series file.")
+		}
+	}
+
+	if len(fi.Name()) != fpLen-seriesDirNameLen+len(seriesFileSuffix) ||
+		!strings.HasSuffix(fi.Name(), seriesFileSuffix) {
+		log.Warnf("Unexpected series file name %s.", filename)
+		purge()
+		return fp, false
+	}
+	if fp, err = model.FingerprintFromString(filepath.Base(dirname) + fi.Name()[:fpLen-seriesDirNameLen]); err != nil {
+		log.Warnf("Error parsing file name %s: %s", filename, err)
+		purge()
+		return fp, false
+	}
+
+	bytesToTrim := fi.Size() % int64(chunkLenWithHeader)
+	chunksInFile := int(fi.Size()) / chunkLenWithHeader
+	modTime := fi.ModTime()
+	if bytesToTrim != 0 {
+		log.Warnf(
+			"Truncating file %s to exactly %d chunks, trimming %d extraneous bytes.",
+			filename, chunksInFile, bytesToTrim,
+		)
+		f, err := os.OpenFile(filename, os.O_WRONLY, 0640)
+		if err != nil {
+			log.Errorf("Could not open file %s: %s", filename, err)
+			purge()
+			return fp, false
+		}
+		if err := f.Truncate(fi.Size() - bytesToTrim); err != nil {
+			log.Errorf("Failed to truncate file %s: %s", filename, err)
+			purge()
+			return fp, false
+		}
+	}
+	if chunksInFile == 0 {
+		log.Warnf("No chunks left in file %s.", filename)
+		purge()
+		return fp, false
+	}
+
+	s, ok := fingerprintToSeries[fp]
+	if ok { // This series is supposed to not be archived.
+		if s == nil {
+			panic("fingerprint mapped to nil pointer")
+		}
+		maybeAddMapping(fp, s.metric, fpm)
+		if !p.pedanticChecks &&
+			bytesToTrim == 0 &&
+			s.chunkDescsOffset != -1 &&
+			chunksInFile == s.chunkDescsOffset+s.persistWatermark &&
+			modTime.Equal(s.modTime) {
+			// Everything is consistent. We are good.
+			return fp, true
+		}
+		// If we are here, we cannot be sure the series file is
+		// consistent with the checkpoint, so we have to take a closer
+		// look.
+		if s.headChunkClosed {
+			// This is the easy case as we have all chunks on
+			// disk. Treat this series as a freshly unarchived one
+			// by loading the chunkDescs and setting all parameters
+			// based on the loaded chunkDescs.
+			cds, err := p.loadChunkDescs(fp, 0)
+			if err != nil {
+				log.Errorf(
+					"Failed to load chunk descriptors for metric %v, fingerprint %v: %s",
+					s.metric, fp, err,
+				)
+				purge()
+				return fp, false
+			}
+			log.Warnf(
+				"Treating recovered metric %v, fingerprint %v, as freshly unarchived, with %d chunks in series file.",
+				s.metric, fp, len(cds),
+			)
+			s.chunkDescs = cds
+			s.chunkDescsOffset = 0
+			s.savedFirstTime = cds[0].FirstTime()
+			s.lastTime, err = cds[len(cds)-1].LastTime()
+			if err != nil {
+				log.Errorf(
+					"Failed to determine time of the last sample for metric %v, fingerprint %v: %s",
+					s.metric, fp, err,
+				)
+				purge()
+				return fp, false
+			}
+			s.persistWatermark = len(cds)
+			s.modTime = modTime
+			// Finally, evict again all chunk.Descs except the latest one to save memory.
+			s.evictChunkDescs(len(cds) - 1)
+			return fp, true
+		}
+		// This is the tricky one: We have chunks from heads.db, but
+		// some of those chunks might already be in the series
+		// file. Strategy: Take the last time of the most recent chunk
+		// in the series file. Then find the oldest chunk among those
+		// from heads.db that has a first time later or equal to the
+		// last time from the series file. Throw away the older chunks
+		// from heads.db and stitch the parts together.
+
+		// First, throw away the chunkDescs without chunks.
+		s.chunkDescs = s.chunkDescs[s.persistWatermark:]
+		chunk.NumMemDescs.Sub(float64(s.persistWatermark))
+		cds, err := p.loadChunkDescs(fp, 0)
+		if err != nil {
+			log.Errorf(
+				"Failed to load chunk descriptors for metric %v, fingerprint %v: %s",
+				s.metric, fp, err,
+			)
+			purge()
+			return fp, false
+		}
+		s.persistWatermark = len(cds)
+		s.chunkDescsOffset = 0
+		s.savedFirstTime = cds[0].FirstTime()
+		s.modTime = modTime
+
+		lastTime, err := cds[len(cds)-1].LastTime()
+		if err != nil {
+			log.Errorf(
+				"Failed to determine time of the last sample for metric %v, fingerprint %v: %s",
+				s.metric, fp, err,
+			)
+			purge()
+			return fp, false
+		}
+		keepIdx := -1
+		for i, cd := range s.chunkDescs {
+			if cd.FirstTime() >= lastTime {
+				keepIdx = i
+				break
+			}
+		}
+		if keepIdx == -1 {
+			log.Warnf(
+				"Recovered metric %v, fingerprint %v: all %d chunks recovered from series file.",
+				s.metric, fp, chunksInFile,
+			)
+			chunk.NumMemDescs.Sub(float64(len(s.chunkDescs)))
+			atomic.AddInt64(&chunk.NumMemChunks, int64(-len(s.chunkDescs)))
+			s.chunkDescs = cds
+			s.headChunkClosed = true
+			// Finally, evict again all chunk.Descs except the latest one to save memory.
+			s.evictChunkDescs(len(cds) - 1)
+			return fp, true
+		}
+		log.Warnf(
+			"Recovered metric %v, fingerprint %v: recovered %d chunks from series file, recovered %d chunks from checkpoint.",
+			s.metric, fp, chunksInFile, len(s.chunkDescs)-keepIdx,
+		)
+		chunk.NumMemDescs.Sub(float64(keepIdx))
+		atomic.AddInt64(&chunk.NumMemChunks, int64(-keepIdx))
+		chunkDescsToEvict := len(cds)
+		if keepIdx == len(s.chunkDescs) {
+			// No chunks from series file left, head chunk is evicted, so declare it closed.
+			s.headChunkClosed = true
+			chunkDescsToEvict-- // Keep one chunk.Desc in this case to avoid a series with zero chunk.Descs.
+		}
+		s.chunkDescs = append(cds, s.chunkDescs[keepIdx:]...)
+		// Finally, evict again chunk.Descs without chunk to save memory.
+		s.evictChunkDescs(chunkDescsToEvict)
+		return fp, true
+	}
+	// This series is supposed to be archived.
+	metric, err := p.archivedMetric(fp)
+	if err != nil {
+		log.Errorf(
+			"Fingerprint %v assumed archived but couldn't be looked up in archived index: %s",
+			fp, err,
+		)
+		purge()
+		return fp, false
+	}
+	if metric == nil {
+		log.Warnf(
+			"Fingerprint %v assumed archived but couldn't be found in archived index.",
+			fp,
+		)
+		purge()
+		return fp, false
+	}
+	// This series looks like a properly archived one.
+	maybeAddMapping(fp, metric, fpm)
+	return fp, true
+}
+
+func (p *persistence) cleanUpArchiveIndexes(
+	fpToSeries map[model.Fingerprint]*memorySeries,
+	fpsSeen map[model.Fingerprint]struct{},
+	fpm fpMappings,
+) error {
+	log.Info("Cleaning up archive indexes.")
+	var fp codable.Fingerprint
+	var m codable.Metric
+	count := 0
+	if err := p.archivedFingerprintToMetrics.ForEach(func(kv index.KeyValueAccessor) error {
+		count++
+		if count%10000 == 0 {
+			log.Infof("%d archived metrics checked.", count)
+		}
+		if err := kv.Key(&fp); err != nil {
+			return err
+		}
+		_, fpSeen := fpsSeen[model.Fingerprint(fp)]
+		inMemory := false
+		if fpSeen {
+			_, inMemory = fpToSeries[model.Fingerprint(fp)]
+		}
+		if !fpSeen || inMemory {
+			if inMemory {
+				log.Warnf("Archive clean-up: Fingerprint %v is not archived. Purging from archive indexes.", model.Fingerprint(fp))
+			}
+			if !fpSeen {
+				log.Warnf("Archive clean-up: Fingerprint %v is unknown. Purging from archive indexes.", model.Fingerprint(fp))
+			}
+			// It's fine if the fp is not in the archive indexes.
+			if _, err := p.archivedFingerprintToMetrics.Delete(fp); err != nil {
+				return err
+			}
+			// Delete from timerange index, too.
+			_, err := p.archivedFingerprintToTimeRange.Delete(fp)
+			return err
+		}
+		// fp is legitimately archived. Now we need the metric to check for a mapped fingerprint.
+		if err := kv.Value(&m); err != nil {
+			return err
+		}
+		maybeAddMapping(model.Fingerprint(fp), model.Metric(m), fpm)
+		// Make sure it is in timerange index, too.
+		has, err := p.archivedFingerprintToTimeRange.Has(fp)
+		if err != nil {
+			return err
+		}
+		if has {
+			return nil // All good.
+		}
+		log.Warnf("Archive clean-up: Fingerprint %v is not in time-range index. Unarchiving it for recovery.")
+		// Again, it's fine if fp is not in the archive index.
+		if _, err := p.archivedFingerprintToMetrics.Delete(fp); err != nil {
+			return err
+		}
+		cds, err := p.loadChunkDescs(model.Fingerprint(fp), 0)
+		if err != nil {
+			return err
+		}
+		series, err := newMemorySeries(model.Metric(m), cds, p.seriesFileModTime(model.Fingerprint(fp)))
+		if err != nil {
+			return err
+		}
+		fpToSeries[model.Fingerprint(fp)] = series
+		// Evict all but one chunk.Desc to save memory.
+		series.evictChunkDescs(len(cds) - 1)
+		return nil
+	}); err != nil {
+		return err
+	}
+	count = 0
+	if err := p.archivedFingerprintToTimeRange.ForEach(func(kv index.KeyValueAccessor) error {
+		count++
+		if count%10000 == 0 {
+			log.Infof("%d archived time ranges checked.", count)
+		}
+		if err := kv.Key(&fp); err != nil {
+			return err
+		}
+		has, err := p.archivedFingerprintToMetrics.Has(fp)
+		if err != nil {
+			return err
+		}
+		if has {
+			return nil // All good.
+		}
+		log.Warnf("Archive clean-up: Purging unknown fingerprint %v in time-range index.", fp)
+		deleted, err := p.archivedFingerprintToTimeRange.Delete(fp)
+		if err != nil {
+			return err
+		}
+		if !deleted {
+			log.Errorf("Fingerprint %v to be deleted from archivedFingerprintToTimeRange not found. This should never happen.", fp)
+		}
+		return nil
+	}); err != nil {
+		return err
+	}
+	log.Info("Clean-up of archive indexes complete.")
+	return nil
+}
+
+func (p *persistence) rebuildLabelIndexes(
+	fpToSeries map[model.Fingerprint]*memorySeries,
+) error {
+	count := 0
+	log.Info("Rebuilding label indexes.")
+	log.Info("Indexing metrics in memory.")
+	for fp, s := range fpToSeries {
+		p.indexMetric(fp, s.metric)
+		count++
+		if count%10000 == 0 {
+			log.Infof("%d metrics queued for indexing.", count)
+		}
+	}
+	log.Info("Indexing archived metrics.")
+	var fp codable.Fingerprint
+	var m codable.Metric
+	if err := p.archivedFingerprintToMetrics.ForEach(func(kv index.KeyValueAccessor) error {
+		if err := kv.Key(&fp); err != nil {
+			return err
+		}
+		if err := kv.Value(&m); err != nil {
+			return err
+		}
+		p.indexMetric(model.Fingerprint(fp), model.Metric(m))
+		count++
+		if count%10000 == 0 {
+			log.Infof("%d metrics queued for indexing.", count)
+		}
+		return nil
+	}); err != nil {
+		return err
+	}
+	log.Info("All requests for rebuilding the label indexes queued. (Actual processing may lag behind.)")
+	return nil
+}
+
+// maybeAddMapping adds a fingerprint mapping to fpm if the FastFingerprint of m is different from fp.
+func maybeAddMapping(fp model.Fingerprint, m model.Metric, fpm fpMappings) {
+	if rawFP := m.FastFingerprint(); rawFP != fp {
+		log.Warnf(
+			"Metric %v with fingerprint %v is mapped from raw fingerprint %v.",
+			m, fp, rawFP,
+		)
+		if mappedFPs, ok := fpm[rawFP]; ok {
+			mappedFPs[metricToUniqueString(m)] = fp
+		} else {
+			fpm[rawFP] = map[string]model.Fingerprint{
+				metricToUniqueString(m): fp,
+			}
+		}
+	}
+}
diff --git a/vendor/github.com/prometheus/prometheus/storage/local/heads.go b/vendor/github.com/prometheus/prometheus/storage/local/heads.go
new file mode 100644
index 000000000..887659170
--- /dev/null
+++ b/vendor/github.com/prometheus/prometheus/storage/local/heads.go
@@ -0,0 +1,261 @@
+// Copyright 2016 The Prometheus Authors
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package local
+
+import (
+	"bufio"
+	"encoding/binary"
+	"fmt"
+	"io"
+	"os"
+	"time"
+
+	"github.com/prometheus/common/model"
+
+	"github.com/prometheus/prometheus/storage/local/chunk"
+	"github.com/prometheus/prometheus/storage/local/codable"
+)
+
+const (
+	headsFileName            = "heads.db"
+	headsTempFileName        = "heads.db.tmp"
+	headsFormatVersion       = 2
+	headsFormatLegacyVersion = 1 // Can read, but will never write.
+	headsMagicString         = "PrometheusHeads"
+)
+
+// headsScanner is a scanner to read time series with their heads from a
+// heads.db file. It follows a similar semantics as the bufio.Scanner.
+// It is not safe to use a headsScanner concurrently.
+type headsScanner struct {
+	f                    *os.File
+	r                    *bufio.Reader
+	fp                   model.Fingerprint // Read after each scan() call that has returned true.
+	series               *memorySeries     // Read after each scan() call that has returned true.
+	version              int64             // Read after newHeadsScanner has returned.
+	seriesTotal          uint64            // Read after newHeadsScanner has returned.
+	seriesCurrent        uint64
+	chunksToPersistTotal int64 // Read after scan() has returned false.
+	err                  error // Read after scan() has returned false.
+}
+
+func newHeadsScanner(filename string) *headsScanner {
+	hs := &headsScanner{}
+	defer func() {
+		if hs.f != nil && hs.err != nil {
+			hs.f.Close()
+		}
+	}()
+
+	if hs.f, hs.err = os.Open(filename); hs.err != nil {
+		return hs
+	}
+	hs.r = bufio.NewReaderSize(hs.f, fileBufSize)
+
+	buf := make([]byte, len(headsMagicString))
+	if _, hs.err = io.ReadFull(hs.r, buf); hs.err != nil {
+		return hs
+	}
+	magic := string(buf)
+	if magic != headsMagicString {
+		hs.err = fmt.Errorf(
+			"unexpected magic string, want %q, got %q",
+			headsMagicString, magic,
+		)
+		return hs
+	}
+	hs.version, hs.err = binary.ReadVarint(hs.r)
+	if (hs.version != headsFormatVersion && hs.version != headsFormatLegacyVersion) || hs.err != nil {
+		hs.err = fmt.Errorf(
+			"unknown or unreadable heads format version, want %d, got %d, error: %s",
+			headsFormatVersion, hs.version, hs.err,
+		)
+		return hs
+	}
+	if hs.seriesTotal, hs.err = codable.DecodeUint64(hs.r); hs.err != nil {
+		return hs
+	}
+	return hs
+}
+
+// scan works like bufio.Scanner.Scan.
+func (hs *headsScanner) scan() bool {
+	if hs.seriesCurrent == hs.seriesTotal || hs.err != nil {
+		return false
+	}
+
+	var (
+		seriesFlags      byte
+		fpAsInt          uint64
+		metric           codable.Metric
+		persistWatermark int64
+		modTimeNano      int64
+		modTime          time.Time
+		chunkDescsOffset int64
+		savedFirstTime   int64
+		numChunkDescs    int64
+		firstTime        int64
+		lastTime         int64
+		encoding         byte
+		ch               chunk.Chunk
+		lastTimeHead     model.Time
+	)
+	if seriesFlags, hs.err = hs.r.ReadByte(); hs.err != nil {
+		return false
+	}
+	headChunkPersisted := seriesFlags&flagHeadChunkPersisted != 0
+	if fpAsInt, hs.err = codable.DecodeUint64(hs.r); hs.err != nil {
+		return false
+	}
+	hs.fp = model.Fingerprint(fpAsInt)
+
+	if hs.err = metric.UnmarshalFromReader(hs.r); hs.err != nil {
+		return false
+	}
+	if hs.version != headsFormatLegacyVersion {
+		// persistWatermark only present in v2.
+		persistWatermark, hs.err = binary.ReadVarint(hs.r)
+		if persistWatermark < 0 {
+			hs.err = fmt.Errorf("found negative persist watermark in checkpoint: %d", persistWatermark)
+		}
+		if hs.err != nil {
+			return false
+		}
+		modTimeNano, hs.err = binary.ReadVarint(hs.r)
+		if hs.err != nil {
+			return false
+		}
+		if modTimeNano != -1 {
+			modTime = time.Unix(0, modTimeNano)
+		}
+	}
+	if chunkDescsOffset, hs.err = binary.ReadVarint(hs.r); hs.err != nil {
+		return false
+	}
+	if savedFirstTime, hs.err = binary.ReadVarint(hs.r); hs.err != nil {
+		return false
+	}
+
+	if numChunkDescs, hs.err = binary.ReadVarint(hs.r); hs.err != nil {
+		return false
+	}
+	if numChunkDescs < 0 {
+		hs.err = fmt.Errorf("found negative number of chunk descriptors in checkpoint: %d", numChunkDescs)
+		return false
+	}
+
+	chunkDescs := make([]*chunk.Desc, numChunkDescs)
+	if hs.version == headsFormatLegacyVersion {
+		if headChunkPersisted {
+			persistWatermark = numChunkDescs
+		} else {
+			persistWatermark = numChunkDescs - 1
+		}
+	}
+	headChunkClosed := true // Initial assumption.
+	for i := int64(0); i < numChunkDescs; i++ {
+		if i < persistWatermark {
+			if firstTime, hs.err = binary.ReadVarint(hs.r); hs.err != nil {
+				return false
+			}
+			if lastTime, hs.err = binary.ReadVarint(hs.r); hs.err != nil {
+				return false
+			}
+			chunkDescs[i] = &chunk.Desc{
+				ChunkFirstTime: model.Time(firstTime),
+				ChunkLastTime:  model.Time(lastTime),
+			}
+			chunk.NumMemDescs.Inc()
+		} else {
+			// Non-persisted chunk.
+			// If there are non-persisted chunks at all, we consider
+			// the head chunk not to be closed yet.
+			headChunkClosed = false
+			if encoding, hs.err = hs.r.ReadByte(); hs.err != nil {
+				return false
+			}
+			if ch, hs.err = chunk.NewForEncoding(chunk.Encoding(encoding)); hs.err != nil {
+				return false
+			}
+			if hs.err = ch.Unmarshal(hs.r); hs.err != nil {
+				return false
+			}
+			cd := chunk.NewDesc(ch, ch.FirstTime())
+			if i < numChunkDescs-1 {
+				// This is NOT the head chunk. So it's a chunk
+				// to be persisted, and we need to populate lastTime.
+				hs.chunksToPersistTotal++
+				if hs.err = cd.MaybePopulateLastTime(); hs.err != nil {
+					return false
+				}
+			}
+			chunkDescs[i] = cd
+		}
+	}
+
+	if lastTimeHead, hs.err = chunkDescs[len(chunkDescs)-1].LastTime(); hs.err != nil {
+		return false
+	}
+
+	hs.series = &memorySeries{
+		metric:           model.Metric(metric),
+		chunkDescs:       chunkDescs,
+		persistWatermark: int(persistWatermark),
+		modTime:          modTime,
+		chunkDescsOffset: int(chunkDescsOffset),
+		savedFirstTime:   model.Time(savedFirstTime),
+		lastTime:         lastTimeHead,
+		headChunkClosed:  headChunkClosed,
+	}
+	hs.seriesCurrent++
+	return true
+}
+
+// close closes the underlying file if required.
+func (hs *headsScanner) close() {
+	if hs.f != nil {
+		hs.f.Close()
+	}
+}
+
+// DumpHeads writes the metadata of the provided heads file in a human-readable
+// form.
+func DumpHeads(filename string, out io.Writer) error {
+	hs := newHeadsScanner(filename)
+	defer hs.close()
+
+	if hs.err == nil {
+		fmt.Fprintf(
+			out,
+			">>> Dumping %d series from heads file %q with format version %d. <<<\n",
+			hs.seriesTotal, filename, hs.version,
+		)
+	}
+	for hs.scan() {
+		s := hs.series
+		fmt.Fprintf(
+			out,
+			"FP=%v\tMETRIC=%s\tlen(chunkDescs)=%d\tpersistWatermark=%d\tchunkDescOffset=%d\tsavedFirstTime=%v\tlastTime=%v\theadChunkClosed=%t\n",
+			hs.fp, s.metric, len(s.chunkDescs), s.persistWatermark, s.chunkDescsOffset, s.savedFirstTime, s.lastTime, s.headChunkClosed,
+		)
+	}
+	if hs.err == nil {
+		fmt.Fprintf(
+			out,
+			">>> Dump complete. %d chunks to persist. <<<\n",
+			hs.chunksToPersistTotal,
+		)
+	}
+	return hs.err
+}
diff --git a/vendor/github.com/prometheus/prometheus/storage/local/index/index.go b/vendor/github.com/prometheus/prometheus/storage/local/index/index.go
new file mode 100644
index 000000000..1f33d5201
--- /dev/null
+++ b/vendor/github.com/prometheus/prometheus/storage/local/index/index.go
@@ -0,0 +1,303 @@
+// Copyright 2014 The Prometheus Authors
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package index provides a number of indexes backed by persistent key-value
+// stores.  The only supported implementation of a key-value store is currently
+// goleveldb, but other implementations can easily be added.
+package index
+
+import (
+	"os"
+	"path"
+	"path/filepath"
+
+	"github.com/prometheus/common/model"
+
+	"github.com/prometheus/prometheus/storage/local/codable"
+)
+
+// Directory names for LevelDB indices.
+const (
+	FingerprintToMetricDir     = "archived_fingerprint_to_metric"
+	FingerprintTimeRangeDir    = "archived_fingerprint_to_timerange"
+	LabelNameToLabelValuesDir  = "labelname_to_labelvalues"
+	LabelPairToFingerprintsDir = "labelpair_to_fingerprints"
+)
+
+// LevelDB cache sizes, changeable via flags.
+var (
+	FingerprintMetricCacheSize     = 10 * 1024 * 1024
+	FingerprintTimeRangeCacheSize  = 5 * 1024 * 1024
+	LabelNameLabelValuesCacheSize  = 10 * 1024 * 1024
+	LabelPairFingerprintsCacheSize = 20 * 1024 * 1024
+)
+
+// FingerprintMetricMapping is an in-memory map of fingerprints to metrics.
+type FingerprintMetricMapping map[model.Fingerprint]model.Metric
+
+// FingerprintMetricIndex models a database mapping fingerprints to metrics.
+type FingerprintMetricIndex struct {
+	KeyValueStore
+}
+
+// IndexBatch indexes a batch of mappings from fingerprints to metrics.
+//
+// This method is goroutine-safe, but note that no specific order of execution
+// can be guaranteed (especially critical if IndexBatch and UnindexBatch are
+// called concurrently for the same fingerprint).
+func (i *FingerprintMetricIndex) IndexBatch(mapping FingerprintMetricMapping) error {
+	b := i.NewBatch()
+
+	for fp, m := range mapping {
+		if err := b.Put(codable.Fingerprint(fp), codable.Metric(m)); err != nil {
+			return err
+		}
+	}
+
+	return i.Commit(b)
+}
+
+// UnindexBatch unindexes a batch of mappings from fingerprints to metrics.
+//
+// This method is goroutine-safe, but note that no specific order of execution
+// can be guaranteed (especially critical if IndexBatch and UnindexBatch are
+// called concurrently for the same fingerprint).
+func (i *FingerprintMetricIndex) UnindexBatch(mapping FingerprintMetricMapping) error {
+	b := i.NewBatch()
+
+	for fp := range mapping {
+		if err := b.Delete(codable.Fingerprint(fp)); err != nil {
+			return err
+		}
+	}
+
+	return i.Commit(b)
+}
+
+// Lookup looks up a metric by fingerprint. Looking up a non-existing
+// fingerprint is not an error. In that case, (nil, false, nil) is returned.
+//
+// This method is goroutine-safe.
+func (i *FingerprintMetricIndex) Lookup(fp model.Fingerprint) (metric model.Metric, ok bool, err error) {
+	ok, err = i.Get(codable.Fingerprint(fp), (*codable.Metric)(&metric))
+	return
+}
+
+// NewFingerprintMetricIndex returns a LevelDB-backed FingerprintMetricIndex
+// ready to use.
+func NewFingerprintMetricIndex(basePath string) (*FingerprintMetricIndex, error) {
+	fingerprintToMetricDB, err := NewLevelDB(LevelDBOptions{
+		Path:           filepath.Join(basePath, FingerprintToMetricDir),
+		CacheSizeBytes: FingerprintMetricCacheSize,
+	})
+	if err != nil {
+		return nil, err
+	}
+	return &FingerprintMetricIndex{
+		KeyValueStore: fingerprintToMetricDB,
+	}, nil
+}
+
+// LabelNameLabelValuesMapping is an in-memory map of label names to
+// label values.
+type LabelNameLabelValuesMapping map[model.LabelName]codable.LabelValueSet
+
+// LabelNameLabelValuesIndex is a KeyValueStore that maps existing label names
+// to all label values stored for that label name.
+type LabelNameLabelValuesIndex struct {
+	KeyValueStore
+}
+
+// IndexBatch adds a batch of label name to label values mappings to the
+// index. A mapping of a label name to an empty slice of label values results in
+// a deletion of that mapping from the index.
+//
+// While this method is fundamentally goroutine-safe, note that the order of
+// execution for multiple batches executed concurrently is undefined.
+func (i *LabelNameLabelValuesIndex) IndexBatch(b LabelNameLabelValuesMapping) error {
+	batch := i.NewBatch()
+
+	for name, values := range b {
+		if len(values) == 0 {
+			if err := batch.Delete(codable.LabelName(name)); err != nil {
+				return err
+			}
+		} else {
+			if err := batch.Put(codable.LabelName(name), values); err != nil {
+				return err
+			}
+		}
+	}
+
+	return i.Commit(batch)
+}
+
+// Lookup looks up all label values for a given label name and returns them as
+// model.LabelValues (which is a slice). Looking up a non-existing label
+// name is not an error. In that case, (nil, false, nil) is returned.
+//
+// This method is goroutine-safe.
+func (i *LabelNameLabelValuesIndex) Lookup(l model.LabelName) (values model.LabelValues, ok bool, err error) {
+	ok, err = i.Get(codable.LabelName(l), (*codable.LabelValues)(&values))
+	return
+}
+
+// LookupSet looks up all label values for a given label name and returns them
+// as a set. Looking up a non-existing label name is not an error. In that case,
+// (nil, false, nil) is returned.
+//
+// This method is goroutine-safe.
+func (i *LabelNameLabelValuesIndex) LookupSet(l model.LabelName) (values map[model.LabelValue]struct{}, ok bool, err error) {
+	ok, err = i.Get(codable.LabelName(l), (*codable.LabelValueSet)(&values))
+	if values == nil {
+		values = map[model.LabelValue]struct{}{}
+	}
+	return
+}
+
+// NewLabelNameLabelValuesIndex returns a LevelDB-backed
+// LabelNameLabelValuesIndex ready to use.
+func NewLabelNameLabelValuesIndex(basePath string) (*LabelNameLabelValuesIndex, error) {
+	labelNameToLabelValuesDB, err := NewLevelDB(LevelDBOptions{
+		Path:           filepath.Join(basePath, LabelNameToLabelValuesDir),
+		CacheSizeBytes: LabelNameLabelValuesCacheSize,
+	})
+	if err != nil {
+		return nil, err
+	}
+	return &LabelNameLabelValuesIndex{
+		KeyValueStore: labelNameToLabelValuesDB,
+	}, nil
+}
+
+// DeleteLabelNameLabelValuesIndex deletes the LevelDB-backed
+// LabelNameLabelValuesIndex. Use only for a not yet opened index.
+func DeleteLabelNameLabelValuesIndex(basePath string) error {
+	return os.RemoveAll(path.Join(basePath, LabelNameToLabelValuesDir))
+}
+
+// LabelPairFingerprintsMapping is an in-memory map of label pairs to
+// fingerprints.
+type LabelPairFingerprintsMapping map[model.LabelPair]codable.FingerprintSet
+
+// LabelPairFingerprintIndex is a KeyValueStore that maps existing label pairs
+// to the fingerprints of all metrics containing those label pairs.
+type LabelPairFingerprintIndex struct {
+	KeyValueStore
+}
+
+// IndexBatch indexes a batch of mappings from label pairs to fingerprints. A
+// mapping to an empty slice of fingerprints results in deletion of that mapping
+// from the index.
+//
+// While this method is fundamentally goroutine-safe, note that the order of
+// execution for multiple batches executed concurrently is undefined.
+func (i *LabelPairFingerprintIndex) IndexBatch(m LabelPairFingerprintsMapping) (err error) {
+	batch := i.NewBatch()
+
+	for pair, fps := range m {
+		if len(fps) == 0 {
+			err = batch.Delete(codable.LabelPair(pair))
+		} else {
+			err = batch.Put(codable.LabelPair(pair), fps)
+		}
+
+		if err != nil {
+			return err
+		}
+	}
+
+	return i.Commit(batch)
+}
+
+// Lookup looks up all fingerprints for a given label pair.  Looking up a
+// non-existing label pair is not an error. In that case, (nil, false, nil) is
+// returned.
+//
+// This method is goroutine-safe.
+func (i *LabelPairFingerprintIndex) Lookup(p model.LabelPair) (fps model.Fingerprints, ok bool, err error) {
+	ok, err = i.Get((codable.LabelPair)(p), (*codable.Fingerprints)(&fps))
+	return
+}
+
+// LookupSet looks up all fingerprints for a given label pair.  Looking up a
+// non-existing label pair is not an error. In that case, (nil, false, nil) is
+// returned.
+//
+// This method is goroutine-safe.
+func (i *LabelPairFingerprintIndex) LookupSet(p model.LabelPair) (fps map[model.Fingerprint]struct{}, ok bool, err error) {
+	ok, err = i.Get((codable.LabelPair)(p), (*codable.FingerprintSet)(&fps))
+	if fps == nil {
+		fps = map[model.Fingerprint]struct{}{}
+	}
+	return
+}
+
+// NewLabelPairFingerprintIndex returns a LevelDB-backed
+// LabelPairFingerprintIndex ready to use.
+func NewLabelPairFingerprintIndex(basePath string) (*LabelPairFingerprintIndex, error) {
+	labelPairToFingerprintsDB, err := NewLevelDB(LevelDBOptions{
+		Path:           filepath.Join(basePath, LabelPairToFingerprintsDir),
+		CacheSizeBytes: LabelPairFingerprintsCacheSize,
+	})
+	if err != nil {
+		return nil, err
+	}
+	return &LabelPairFingerprintIndex{
+		KeyValueStore: labelPairToFingerprintsDB,
+	}, nil
+}
+
+// DeleteLabelPairFingerprintIndex deletes the LevelDB-backed
+// LabelPairFingerprintIndex. Use only for a not yet opened index.
+func DeleteLabelPairFingerprintIndex(basePath string) error {
+	return os.RemoveAll(path.Join(basePath, LabelPairToFingerprintsDir))
+}
+
+// FingerprintTimeRangeIndex models a database tracking the time ranges
+// of metrics by their fingerprints.
+type FingerprintTimeRangeIndex struct {
+	KeyValueStore
+}
+
+// Lookup returns the time range for the given fingerprint.  Looking up a
+// non-existing fingerprint is not an error. In that case, (0, 0, false, nil) is
+// returned.
+//
+// This method is goroutine-safe.
+func (i *FingerprintTimeRangeIndex) Lookup(fp model.Fingerprint) (firstTime, lastTime model.Time, ok bool, err error) {
+	var tr codable.TimeRange
+	ok, err = i.Get(codable.Fingerprint(fp), &tr)
+	return tr.First, tr.Last, ok, err
+}
+
+// NewFingerprintTimeRangeIndex returns a LevelDB-backed
+// FingerprintTimeRangeIndex ready to use.
+func NewFingerprintTimeRangeIndex(basePath string) (*FingerprintTimeRangeIndex, error) {
+	fingerprintTimeRangeDB, err := NewLevelDB(LevelDBOptions{
+		Path:           filepath.Join(basePath, FingerprintTimeRangeDir),
+		CacheSizeBytes: FingerprintTimeRangeCacheSize,
+	})
+	if err != nil {
+		return nil, err
+	}
+	return &FingerprintTimeRangeIndex{
+		KeyValueStore: fingerprintTimeRangeDB,
+	}, nil
+}
+
+// DeleteFingerprintTimeRangeIndex deletes the LevelDB-backed
+// FingerprintTimeRangeIndex. Use only for a not yet opened index.
+func DeleteFingerprintTimeRangeIndex(basePath string) error {
+	return os.RemoveAll(path.Join(basePath, FingerprintTimeRangeDir))
+}
diff --git a/vendor/github.com/prometheus/prometheus/storage/local/index/interface.go b/vendor/github.com/prometheus/prometheus/storage/local/index/interface.go
new file mode 100644
index 000000000..40080c7f3
--- /dev/null
+++ b/vendor/github.com/prometheus/prometheus/storage/local/index/interface.go
@@ -0,0 +1,61 @@
+// Copyright 2014 The Prometheus Authors
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package index
+
+import "encoding"
+
+// KeyValueStore persists key/value pairs. Implementations must be fundamentally
+// goroutine-safe. However, it is the caller's responsibility that keys and
+// values can be safely marshaled and unmarshaled (via the MarshalBinary and
+// UnmarshalBinary methods of the keys and values). For example, if you call the
+// Put method of a KeyValueStore implementation, but the key or the value are
+// modified concurrently while being marshaled into its binary representation,
+// you obviously have a problem. Methods of KeyValueStore return only after
+// (un)marshaling is complete.
+type KeyValueStore interface {
+	Put(key, value encoding.BinaryMarshaler) error
+	// Get unmarshals the result into value. It returns false if no entry
+	// could be found for key. If value is nil, Get behaves like Has.
+	Get(key encoding.BinaryMarshaler, value encoding.BinaryUnmarshaler) (bool, error)
+	Has(key encoding.BinaryMarshaler) (bool, error)
+	// Delete returns (false, nil) if key does not exist.
+	Delete(key encoding.BinaryMarshaler) (bool, error)
+
+	NewBatch() Batch
+	Commit(b Batch) error
+
+	// ForEach iterates through the complete KeyValueStore and calls the
+	// supplied function for each mapping.
+	ForEach(func(kv KeyValueAccessor) error) error
+
+	Close() error
+}
+
+// KeyValueAccessor allows access to the key and value of an entry in a
+// KeyValueStore.
+type KeyValueAccessor interface {
+	Key(encoding.BinaryUnmarshaler) error
+	Value(encoding.BinaryUnmarshaler) error
+}
+
+// Batch allows KeyValueStore mutations to be pooled and committed together. An
+// implementation does not have to be goroutine-safe. Never modify a Batch
+// concurrently or commit the same batch multiple times concurrently. Marshaling
+// of keys and values is guaranteed to be complete when the Put or Delete methods
+// have returned.
+type Batch interface {
+	Put(key, value encoding.BinaryMarshaler) error
+	Delete(key encoding.BinaryMarshaler) error
+	Reset()
+}
diff --git a/vendor/github.com/prometheus/prometheus/storage/local/index/leveldb.go b/vendor/github.com/prometheus/prometheus/storage/local/index/leveldb.go
new file mode 100644
index 000000000..c4c46421c
--- /dev/null
+++ b/vendor/github.com/prometheus/prometheus/storage/local/index/leveldb.go
@@ -0,0 +1,210 @@
+// Copyright 2014 The Prometheus Authors
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package index
+
+import (
+	"encoding"
+
+	"github.com/syndtr/goleveldb/leveldb"
+	leveldb_filter "github.com/syndtr/goleveldb/leveldb/filter"
+	leveldb_iterator "github.com/syndtr/goleveldb/leveldb/iterator"
+	leveldb_opt "github.com/syndtr/goleveldb/leveldb/opt"
+	leveldb_util "github.com/syndtr/goleveldb/leveldb/util"
+)
+
+var (
+	keyspace = &leveldb_util.Range{
+		Start: nil,
+		Limit: nil,
+	}
+
+	iteratorOpts = &leveldb_opt.ReadOptions{
+		DontFillCache: true,
+	}
+)
+
+// LevelDB is a LevelDB-backed sorted KeyValueStore.
+type LevelDB struct {
+	storage   *leveldb.DB
+	readOpts  *leveldb_opt.ReadOptions
+	writeOpts *leveldb_opt.WriteOptions
+}
+
+// LevelDBOptions provides options for a LevelDB.
+type LevelDBOptions struct {
+	Path           string // Base path to store files.
+	CacheSizeBytes int
+}
+
+// NewLevelDB returns a newly allocated LevelDB-backed KeyValueStore ready to
+// use.
+func NewLevelDB(o LevelDBOptions) (KeyValueStore, error) {
+	options := &leveldb_opt.Options{
+		BlockCacheCapacity: o.CacheSizeBytes,
+		Filter:             leveldb_filter.NewBloomFilter(10),
+	}
+
+	storage, err := leveldb.OpenFile(o.Path, options)
+	if err != nil {
+		return nil, err
+	}
+
+	return &LevelDB{
+		storage:   storage,
+		readOpts:  &leveldb_opt.ReadOptions{},
+		writeOpts: &leveldb_opt.WriteOptions{},
+	}, nil
+}
+
+// NewBatch implements KeyValueStore.
+func (l *LevelDB) NewBatch() Batch {
+	return &LevelDBBatch{
+		batch: &leveldb.Batch{},
+	}
+}
+
+// Close implements KeyValueStore.
+func (l *LevelDB) Close() error {
+	return l.storage.Close()
+}
+
+// Get implements KeyValueStore.
+func (l *LevelDB) Get(key encoding.BinaryMarshaler, value encoding.BinaryUnmarshaler) (bool, error) {
+	k, err := key.MarshalBinary()
+	if err != nil {
+		return false, err
+	}
+	raw, err := l.storage.Get(k, l.readOpts)
+	if err == leveldb.ErrNotFound {
+		return false, nil
+	}
+	if err != nil {
+		return false, err
+	}
+	if value == nil {
+		return true, nil
+	}
+	return true, value.UnmarshalBinary(raw)
+}
+
+// Has implements KeyValueStore.
+func (l *LevelDB) Has(key encoding.BinaryMarshaler) (has bool, err error) {
+	return l.Get(key, nil)
+}
+
+// Delete implements KeyValueStore.
+func (l *LevelDB) Delete(key encoding.BinaryMarshaler) (bool, error) {
+	k, err := key.MarshalBinary()
+	if err != nil {
+		return false, err
+	}
+	// Note that Delete returns nil if k does not exist. So we have to test
+	// for existence with Has first.
+	if has, err := l.storage.Has(k, l.readOpts); !has || err != nil {
+		return false, err
+	}
+	if err = l.storage.Delete(k, l.writeOpts); err != nil {
+		return false, err
+	}
+	return true, nil
+}
+
+// Put implements KeyValueStore.
+func (l *LevelDB) Put(key, value encoding.BinaryMarshaler) error {
+	k, err := key.MarshalBinary()
+	if err != nil {
+		return err
+	}
+	v, err := value.MarshalBinary()
+	if err != nil {
+		return err
+	}
+	return l.storage.Put(k, v, l.writeOpts)
+}
+
+// Commit implements KeyValueStore.
+func (l *LevelDB) Commit(b Batch) error {
+	return l.storage.Write(b.(*LevelDBBatch).batch, l.writeOpts)
+}
+
+// ForEach implements KeyValueStore.
+func (l *LevelDB) ForEach(cb func(kv KeyValueAccessor) error) error {
+	snap, err := l.storage.GetSnapshot()
+	if err != nil {
+		return err
+	}
+	defer snap.Release()
+
+	iter := snap.NewIterator(keyspace, iteratorOpts)
+
+	kv := &levelDBKeyValueAccessor{it: iter}
+
+	for valid := iter.First(); valid; valid = iter.Next() {
+		if err = iter.Error(); err != nil {
+			return err
+		}
+
+		if err := cb(kv); err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+// LevelDBBatch is a Batch implementation for LevelDB.
+type LevelDBBatch struct {
+	batch *leveldb.Batch
+}
+
+// Put implements Batch.
+func (b *LevelDBBatch) Put(key, value encoding.BinaryMarshaler) error {
+	k, err := key.MarshalBinary()
+	if err != nil {
+		return err
+	}
+	v, err := value.MarshalBinary()
+	if err != nil {
+		return err
+	}
+	b.batch.Put(k, v)
+	return nil
+}
+
+// Delete implements Batch.
+func (b *LevelDBBatch) Delete(key encoding.BinaryMarshaler) error {
+	k, err := key.MarshalBinary()
+	if err != nil {
+		return err
+	}
+	b.batch.Delete(k)
+	return nil
+}
+
+// Reset implements Batch.
+func (b *LevelDBBatch) Reset() {
+	b.batch.Reset()
+}
+
+// levelDBKeyValueAccessor implements KeyValueAccessor.
+type levelDBKeyValueAccessor struct {
+	it leveldb_iterator.Iterator
+}
+
+func (i *levelDBKeyValueAccessor) Key(key encoding.BinaryUnmarshaler) error {
+	return key.UnmarshalBinary(i.it.Key())
+}
+
+func (i *levelDBKeyValueAccessor) Value(value encoding.BinaryUnmarshaler) error {
+	return value.UnmarshalBinary(i.it.Value())
+}
diff --git a/vendor/github.com/prometheus/prometheus/storage/local/instrumentation.go b/vendor/github.com/prometheus/prometheus/storage/local/instrumentation.go
new file mode 100644
index 000000000..479e13821
--- /dev/null
+++ b/vendor/github.com/prometheus/prometheus/storage/local/instrumentation.go
@@ -0,0 +1,46 @@
+// Copyright 2014 The Prometheus Authors
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package local
+
+const (
+	namespace = "prometheus"
+	subsystem = "local_storage"
+
+	opTypeLabel = "type"
+
+	// Op-types for seriesOps.
+	create             = "create"
+	archive            = "archive"
+	unarchive          = "unarchive"
+	memoryPurge        = "purge_from_memory"
+	archivePurge       = "purge_from_archive"
+	requestedPurge     = "purge_on_request"
+	memoryMaintenance  = "maintenance_in_memory"
+	archiveMaintenance = "maintenance_in_archive"
+	completedQurantine = "quarantine_completed"
+	droppedQuarantine  = "quarantine_dropped"
+	failedQuarantine   = "quarantine_failed"
+
+	seriesLocationLabel = "location"
+
+	// Maintenance types for maintainSeriesDuration.
+	maintainInMemory = "memory"
+	maintainArchived = "archived"
+
+	discardReasonLabel = "reason"
+
+	// Reasons to discard samples.
+	outOfOrderTimestamp = "timestamp_out_of_order"
+	duplicateSample     = "multiple_values_for_timestamp"
+)
diff --git a/vendor/github.com/prometheus/prometheus/storage/local/interface.go b/vendor/github.com/prometheus/prometheus/storage/local/interface.go
new file mode 100644
index 000000000..3f1fda713
--- /dev/null
+++ b/vendor/github.com/prometheus/prometheus/storage/local/interface.go
@@ -0,0 +1,106 @@
+// Copyright 2014 The Prometheus Authors
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package local
+
+import (
+	"time"
+
+	"github.com/prometheus/common/model"
+	"golang.org/x/net/context"
+
+	"github.com/prometheus/prometheus/storage"
+	"github.com/prometheus/prometheus/storage/metric"
+)
+
+// Storage ingests and manages samples, along with various indexes. All methods
+// are goroutine-safe. Storage implements storage.SampleAppender.
+type Storage interface {
+	// Querier returns a new Querier on the storage.
+	Querier() (Querier, error)
+
+	// This SampleAppender needs multiple samples for the same fingerprint to be
+	// submitted in chronological order, from oldest to newest. When Append has
+	// returned, the appended sample might not be queryable immediately. (Use
+	// WaitForIndexing to wait for complete processing.) The implementation might
+	// remove labels with empty value from the provided Sample as those labels
+	// are considered equivalent to a label not present at all.
+	//
+	// Appending is throttled if the Storage has too many chunks in memory
+	// already or has too many chunks waiting for persistence.
+	storage.SampleAppender
+
+	// Drop all time series associated with the given label matchers. Returns
+	// the number series that were dropped.
+	DropMetricsForLabelMatchers(context.Context, ...*metric.LabelMatcher) (int, error)
+	// Run the various maintenance loops in goroutines. Returns when the
+	// storage is ready to use. Keeps everything running in the background
+	// until Stop is called.
+	Start() error
+	// Stop shuts down the Storage gracefully, flushes all pending
+	// operations, stops all maintenance loops,and frees all resources.
+	Stop() error
+	// WaitForIndexing returns once all samples in the storage are
+	// indexed. Indexing is needed for FingerprintsForLabelMatchers and
+	// LabelValuesForLabelName and may lag behind.
+	WaitForIndexing()
+}
+
+// Querier allows querying a time series storage.
+type Querier interface {
+	// Close closes the querier. Behavior for subsequent calls to Querier methods
+	// is undefined.
+	Close() error
+	// QueryRange returns a list of series iterators for the selected
+	// time range and label matchers. The iterators need to be closed
+	// after usage.
+	QueryRange(ctx context.Context, from, through model.Time, matchers ...*metric.LabelMatcher) ([]SeriesIterator, error)
+	// QueryInstant returns a list of series iterators for the selected
+	// instant and label matchers. The iterators need to be closed after usage.
+	QueryInstant(ctx context.Context, ts model.Time, stalenessDelta time.Duration, matchers ...*metric.LabelMatcher) ([]SeriesIterator, error)
+	// MetricsForLabelMatchers returns the metrics from storage that satisfy
+	// the given sets of label matchers. Each set of matchers must contain at
+	// least one label matcher that does not match the empty string. Otherwise,
+	// an empty list is returned. Within one set of matchers, the intersection
+	// of matching series is computed. The final return value will be the union
+	// of the per-set results. The times from and through are hints for the
+	// storage to optimize the search. The storage MAY exclude metrics that
+	// have no samples in the specified interval from the returned map. In
+	// doubt, specify model.Earliest for from and model.Latest for through.
+	MetricsForLabelMatchers(ctx context.Context, from, through model.Time, matcherSets ...metric.LabelMatchers) ([]metric.Metric, error)
+	// LastSampleForLabelMatchers returns the last samples that have been
+	// ingested for the time series matching the given set of label matchers.
+	// The label matching behavior is the same as in MetricsForLabelMatchers.
+	// All returned samples are between the specified cutoff time and now.
+	LastSampleForLabelMatchers(ctx context.Context, cutoff model.Time, matcherSets ...metric.LabelMatchers) (model.Vector, error)
+	// Get all of the label values that are associated with a given label name.
+	LabelValuesForLabelName(context.Context, model.LabelName) (model.LabelValues, error)
+}
+
+// SeriesIterator enables efficient access of sample values in a series. Its
+// methods are not goroutine-safe. A SeriesIterator iterates over a snapshot of
+// a series, i.e. it is safe to continue using a SeriesIterator after or during
+// modifying the corresponding series, but the iterator will represent the state
+// of the series prior to the modification.
+type SeriesIterator interface {
+	// Gets the value that is closest before the given time. In case a value
+	// exists at precisely the given time, that value is returned. If no
+	// applicable value exists, model.ZeroSamplePair is returned.
+	ValueAtOrBeforeTime(model.Time) model.SamplePair
+	// Gets all values contained within a given interval.
+	RangeValues(metric.Interval) []model.SamplePair
+	// Returns the metric of the series that the iterator corresponds to.
+	Metric() metric.Metric
+	// Closes the iterator and releases the underlying data.
+	Close()
+}
diff --git a/vendor/github.com/prometheus/prometheus/storage/local/locker.go b/vendor/github.com/prometheus/prometheus/storage/local/locker.go
new file mode 100644
index 000000000..85effcdbe
--- /dev/null
+++ b/vendor/github.com/prometheus/prometheus/storage/local/locker.go
@@ -0,0 +1,79 @@
+// Copyright 2016 The Prometheus Authors
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package local
+
+import (
+	"sync"
+	"unsafe"
+
+	"github.com/prometheus/common/model"
+)
+
+const (
+	cacheLineSize = 64
+)
+
+// Avoid false sharing when using array of mutexes.
+type paddedMutex struct {
+	sync.Mutex
+	pad [cacheLineSize - unsafe.Sizeof(sync.Mutex{})]byte
+}
+
+// fingerprintLocker allows locking individual fingerprints. To limit the number
+// of mutexes needed for that, only a fixed number of mutexes are
+// allocated. Fingerprints to be locked are assigned to those pre-allocated
+// mutexes by their value. Collisions are not detected. If two fingerprints get
+// assigned to the same mutex, only one of them can be locked at the same
+// time. As long as the number of pre-allocated mutexes is much larger than the
+// number of goroutines requiring a fingerprint lock concurrently, the loss in
+// efficiency is small. However, a goroutine must never lock more than one
+// fingerprint at the same time. (In that case a collision would try to acquire
+// the same mutex twice).
+type fingerprintLocker struct {
+	fpMtxs    []paddedMutex
+	numFpMtxs uint
+}
+
+// newFingerprintLocker returns a new fingerprintLocker ready for use.  At least
+// 1024 preallocated mutexes are used, even if preallocatedMutexes is lower.
+func newFingerprintLocker(preallocatedMutexes int) *fingerprintLocker {
+	if preallocatedMutexes < 1024 {
+		preallocatedMutexes = 1024
+	}
+	return &fingerprintLocker{
+		make([]paddedMutex, preallocatedMutexes),
+		uint(preallocatedMutexes),
+	}
+}
+
+// Lock locks the given fingerprint.
+func (l *fingerprintLocker) Lock(fp model.Fingerprint) {
+	l.fpMtxs[hashFP(fp)%l.numFpMtxs].Lock()
+}
+
+// Unlock unlocks the given fingerprint.
+func (l *fingerprintLocker) Unlock(fp model.Fingerprint) {
+	l.fpMtxs[hashFP(fp)%l.numFpMtxs].Unlock()
+}
+
+// hashFP simply moves entropy from the most significant 48 bits of the
+// fingerprint into the least significant 16 bits (by XORing) so that a simple
+// MOD on the result can be used to pick a mutex while still making use of
+// changes in more significant bits of the fingerprint. (The fast fingerprinting
+// function we use is prone to only change a few bits for similar metrics. We
+// really want to make use of every change in the fingerprint to vary mutex
+// selection.)
+func hashFP(fp model.Fingerprint) uint {
+	return uint(fp ^ (fp >> 32) ^ (fp >> 16))
+}
diff --git a/vendor/github.com/prometheus/prometheus/storage/local/mapper.go b/vendor/github.com/prometheus/prometheus/storage/local/mapper.go
new file mode 100644
index 000000000..0f5c71868
--- /dev/null
+++ b/vendor/github.com/prometheus/prometheus/storage/local/mapper.go
@@ -0,0 +1,218 @@
+// Copyright 2016 The Prometheus Authors
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package local
+
+import (
+	"fmt"
+	"sort"
+	"strings"
+	"sync"
+	"sync/atomic"
+
+	"github.com/prometheus/client_golang/prometheus"
+	"github.com/prometheus/common/log"
+
+	"github.com/prometheus/common/model"
+)
+
+const maxMappedFP = 1 << 20 // About 1M fingerprints reserved for mapping.
+
+var separatorString = string([]byte{model.SeparatorByte})
+
+// fpMappings maps original fingerprints to a map of string representations of
+// metrics to the truly unique fingerprint.
+type fpMappings map[model.Fingerprint]map[string]model.Fingerprint
+
+// fpMapper is used to map fingerprints in order to work around fingerprint
+// collisions.
+type fpMapper struct {
+	// highestMappedFP has to be aligned for atomic operations.
+	highestMappedFP model.Fingerprint
+
+	mtx      sync.RWMutex // Protects mappings.
+	mappings fpMappings
+
+	fpToSeries *seriesMap
+	p          *persistence
+
+	mappingsCounter prometheus.Counter
+}
+
+// newFPMapper loads the collision map from the persistence and
+// returns an fpMapper ready to use.
+func newFPMapper(fpToSeries *seriesMap, p *persistence) (*fpMapper, error) {
+	m := &fpMapper{
+		fpToSeries: fpToSeries,
+		p:          p,
+		mappingsCounter: prometheus.NewCounter(prometheus.CounterOpts{
+			Namespace: namespace,
+			Subsystem: subsystem,
+			Name:      "fingerprint_mappings_total",
+			Help:      "The total number of fingerprints being mapped to avoid collisions.",
+		}),
+	}
+	mappings, nextFP, err := p.loadFPMappings()
+	if err != nil {
+		return nil, err
+	}
+	m.mappings = mappings
+	m.mappingsCounter.Add(float64(len(m.mappings)))
+	m.highestMappedFP = nextFP
+	return m, nil
+}
+
+// checkpoint persists the current mappings.  The caller has to ensure that the
+// provided mappings are not changed concurrently. This method is only called
+// upon shutdown, when no samples are ingested anymore.
+func (m *fpMapper) checkpoint() error {
+	return m.p.checkpointFPMappings(m.mappings)
+}
+
+// mapFP takes a raw fingerprint (as returned by Metrics.FastFingerprint) and
+// returns a truly unique fingerprint. The caller must have locked the raw
+// fingerprint.
+//
+// If an error is encountered, it is returned together with the unchanged raw
+// fingerprint.
+func (m *fpMapper) mapFP(fp model.Fingerprint, metric model.Metric) model.Fingerprint {
+	// First check if we are in the reserved FP space, in which case this is
+	// automatically a collision that has to be mapped.
+	if fp <= maxMappedFP {
+		return m.maybeAddMapping(fp, metric)
+	}
+
+	// Then check the most likely case: This fp belongs to a series that is
+	// already in memory.
+	s, ok := m.fpToSeries.get(fp)
+	if ok {
+		// FP exists in memory, but is it for the same metric?
+		if metric.Equal(s.metric) {
+			// Yupp. We are done.
+			return fp
+		}
+		// Collision detected!
+		return m.maybeAddMapping(fp, metric)
+	}
+	// Metric is not in memory. Before doing the expensive archive lookup,
+	// check if we have a mapping for this metric in place already.
+	m.mtx.RLock()
+	mappedFPs, fpAlreadyMapped := m.mappings[fp]
+	m.mtx.RUnlock()
+	if fpAlreadyMapped {
+		// We indeed have mapped fp historically.
+		ms := metricToUniqueString(metric)
+		// fp is locked by the caller, so no further locking of
+		// 'collisions' required (it is specific to fp).
+		mappedFP, ok := mappedFPs[ms]
+		if ok {
+			// Historical mapping found, return the mapped FP.
+			return mappedFP
+		}
+	}
+	// If we are here, FP does not exist in memory and is either not mapped
+	// at all, or existing mappings for FP are not for m. Check if we have
+	// something for FP in the archive.
+	archivedMetric, err := m.p.archivedMetric(fp)
+	if err != nil || archivedMetric == nil {
+		// Either the archive lookup has returend an error, or fp does
+		// not exist in the archive. In the former case, the storage has
+		// been marked as dirty already. We just carry on for as long as
+		// it goes, assuming that fp does not exist. In either case,
+		// since now we know (or assume) now that fp does not exist,
+		// neither in memory nor in archive, we can safely keep it
+		// unmapped.
+		return fp
+	}
+	// FP exists in archive, but is it for the same metric?
+	if metric.Equal(archivedMetric) {
+		// Yupp. We are done.
+		return fp
+	}
+	// Collision detected!
+	return m.maybeAddMapping(fp, metric)
+}
+
+// maybeAddMapping is only used internally. It takes a detected collision and
+// adds it to the collisions map if not yet there. In any case, it returns the
+// truly unique fingerprint for the colliding metric.
+func (m *fpMapper) maybeAddMapping(
+	fp model.Fingerprint,
+	collidingMetric model.Metric,
+) model.Fingerprint {
+	ms := metricToUniqueString(collidingMetric)
+	m.mtx.RLock()
+	mappedFPs, ok := m.mappings[fp]
+	m.mtx.RUnlock()
+	if ok {
+		// fp is locked by the caller, so no further locking required.
+		mappedFP, ok := mappedFPs[ms]
+		if ok {
+			return mappedFP // Existing mapping.
+		}
+		// A new mapping has to be created.
+		mappedFP = m.nextMappedFP()
+		mappedFPs[ms] = mappedFP
+		log.Infof(
+			"Collision detected for fingerprint %v, metric %v, mapping to new fingerprint %v.",
+			fp, collidingMetric, mappedFP,
+		)
+		return mappedFP
+	}
+	// This is the first collision for fp.
+	mappedFP := m.nextMappedFP()
+	mappedFPs = map[string]model.Fingerprint{ms: mappedFP}
+	m.mtx.Lock()
+	m.mappings[fp] = mappedFPs
+	m.mappingsCounter.Inc()
+	m.mtx.Unlock()
+	log.Infof(
+		"Collision detected for fingerprint %v, metric %v, mapping to new fingerprint %v.",
+		fp, collidingMetric, mappedFP,
+	)
+	return mappedFP
+}
+
+func (m *fpMapper) nextMappedFP() model.Fingerprint {
+	mappedFP := model.Fingerprint(atomic.AddUint64((*uint64)(&m.highestMappedFP), 1))
+	if mappedFP > maxMappedFP {
+		panic(fmt.Errorf("more than %v fingerprints mapped in collision detection", maxMappedFP))
+	}
+	return mappedFP
+}
+
+// Describe implements prometheus.Collector.
+func (m *fpMapper) Describe(ch chan<- *prometheus.Desc) {
+	ch <- m.mappingsCounter.Desc()
+}
+
+// Collect implements prometheus.Collector.
+func (m *fpMapper) Collect(ch chan<- prometheus.Metric) {
+	ch <- m.mappingsCounter
+}
+
+// metricToUniqueString turns a metric into a string in a reproducible and
+// unique way, i.e. the same metric will always create the same string, and
+// different metrics will always create different strings. In a way, it is the
+// "ideal" fingerprint function, only that it is more expensive than the
+// FastFingerprint function, and its result is not suitable as a key for maps
+// and indexes as it might become really large, causing a lot of hashing effort
+// in maps and a lot of storage overhead in indexes.
+func metricToUniqueString(m model.Metric) string {
+	parts := make([]string, 0, len(m))
+	for ln, lv := range m {
+		parts = append(parts, string(ln)+separatorString+string(lv))
+	}
+	sort.Strings(parts)
+	return strings.Join(parts, separatorString)
+}
diff --git a/vendor/github.com/prometheus/prometheus/storage/local/noop_storage.go b/vendor/github.com/prometheus/prometheus/storage/local/noop_storage.go
new file mode 100644
index 000000000..70b5a32f1
--- /dev/null
+++ b/vendor/github.com/prometheus/prometheus/storage/local/noop_storage.go
@@ -0,0 +1,100 @@
+// Copyright 2016 The Prometheus Authors
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package local
+
+import (
+	"time"
+
+	"github.com/prometheus/common/model"
+	"golang.org/x/net/context"
+
+	"github.com/prometheus/prometheus/storage/metric"
+)
+
+// NoopStorage is a dummy storage for use when Prometheus's local storage is
+// disabled. It throws away any appended samples and returns empty results.
+type NoopStorage struct{}
+
+// Start implements Storage.
+func (s *NoopStorage) Start() (err error) {
+	return nil
+}
+
+// Stop implements Storage.
+func (s *NoopStorage) Stop() error {
+	return nil
+}
+
+// WaitForIndexing implements Storage.
+func (s *NoopStorage) WaitForIndexing() {
+}
+
+// Querier implements Storage.
+func (s *NoopStorage) Querier() (Querier, error) {
+	return &NoopQuerier{}, nil
+}
+
+// NoopQuerier is a dummy Querier for use when Prometheus's local storage is
+// disabled. It is returned by the NoopStorage Querier method and always returns
+// empty results.
+type NoopQuerier struct{}
+
+// Close implements Querier.
+func (s *NoopQuerier) Close() error {
+	return nil
+}
+
+// LastSampleForLabelMatchers implements Querier.
+func (s *NoopQuerier) LastSampleForLabelMatchers(ctx context.Context, cutoff model.Time, matcherSets ...metric.LabelMatchers) (model.Vector, error) {
+	return nil, nil
+}
+
+// QueryRange implements Querier
+func (s *NoopQuerier) QueryRange(ctx context.Context, from, through model.Time, matchers ...*metric.LabelMatcher) ([]SeriesIterator, error) {
+	return nil, nil
+}
+
+// QueryInstant implements Querier.
+func (s *NoopQuerier) QueryInstant(ctx context.Context, ts model.Time, stalenessDelta time.Duration, matchers ...*metric.LabelMatcher) ([]SeriesIterator, error) {
+	return nil, nil
+}
+
+// MetricsForLabelMatchers implements Querier.
+func (s *NoopQuerier) MetricsForLabelMatchers(
+	ctx context.Context,
+	from, through model.Time,
+	matcherSets ...metric.LabelMatchers,
+) ([]metric.Metric, error) {
+	return nil, nil
+}
+
+// LabelValuesForLabelName implements Querier.
+func (s *NoopQuerier) LabelValuesForLabelName(ctx context.Context, labelName model.LabelName) (model.LabelValues, error) {
+	return nil, nil
+}
+
+// DropMetricsForLabelMatchers implements Storage.
+func (s *NoopStorage) DropMetricsForLabelMatchers(ctx context.Context, matchers ...*metric.LabelMatcher) (int, error) {
+	return 0, nil
+}
+
+// Append implements Storage.
+func (s *NoopStorage) Append(sample *model.Sample) error {
+	return nil
+}
+
+// NeedsThrottling implements Storage.
+func (s *NoopStorage) NeedsThrottling() bool {
+	return false
+}
diff --git a/vendor/github.com/prometheus/prometheus/storage/local/persistence.go b/vendor/github.com/prometheus/prometheus/storage/local/persistence.go
new file mode 100644
index 000000000..d6edc7b9c
--- /dev/null
+++ b/vendor/github.com/prometheus/prometheus/storage/local/persistence.go
@@ -0,0 +1,1722 @@
+// Copyright 2014 The Prometheus Authors
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package local
+
+import (
+	"bufio"
+	"context"
+	"encoding/binary"
+	"fmt"
+	"io"
+	"io/ioutil"
+	"math"
+	"os"
+	"path/filepath"
+	"strconv"
+	"strings"
+	"sync"
+	"sync/atomic"
+	"time"
+
+	"github.com/prometheus/client_golang/prometheus"
+	"github.com/prometheus/common/log"
+	"github.com/prometheus/common/model"
+
+	"github.com/prometheus/prometheus/storage/local/chunk"
+	"github.com/prometheus/prometheus/storage/local/codable"
+	"github.com/prometheus/prometheus/storage/local/index"
+	"github.com/prometheus/prometheus/util/flock"
+)
+
+const (
+	// Version of the storage as it can be found in the version file.
+	// Increment to protect against incompatible changes.
+	Version         = 1
+	versionFileName = "VERSION"
+
+	seriesFileSuffix     = ".db"
+	seriesTempFileSuffix = ".db.tmp"
+	seriesDirNameLen     = 2 // How many bytes of the fingerprint in dir name.
+	hintFileSuffix       = ".hint"
+
+	mappingsFileName      = "mappings.db"
+	mappingsTempFileName  = "mappings.db.tmp"
+	mappingsFormatVersion = 1
+	mappingsMagicString   = "PrometheusMappings"
+
+	dirtyFileName = "DIRTY"
+
+	fileBufSize = 1 << 16 // 64kiB.
+
+	chunkHeaderLen             = 17
+	chunkHeaderTypeOffset      = 0
+	chunkHeaderFirstTimeOffset = 1
+	chunkHeaderLastTimeOffset  = 9
+	chunkLenWithHeader         = chunk.ChunkLen + chunkHeaderLen
+	chunkMaxBatchSize          = 62 // Max no. of chunks to load or write in
+	// one batch.  Note that 62 is the largest number of chunks that fit
+	// into 64kiB on disk because chunkHeaderLen is added to each 1k chunk.
+
+	indexingMaxBatchSize  = 1024 * 1024
+	indexingBatchTimeout  = 500 * time.Millisecond // Commit batch when idle for that long.
+	indexingQueueCapacity = 1024 * 256
+)
+
+var fpLen = len(model.Fingerprint(0).String()) // Length of a fingerprint as string.
+
+const (
+	flagHeadChunkPersisted byte = 1 << iota
+	// Add more flags here like:
+	// flagFoo
+	// flagBar
+)
+
+type indexingOpType byte
+
+const (
+	add indexingOpType = iota
+	remove
+)
+
+type indexingOp struct {
+	fingerprint model.Fingerprint
+	metric      model.Metric
+	opType      indexingOpType
+}
+
+// A Persistence is used by a Storage implementation to store samples
+// persistently across restarts. The methods are only goroutine-safe if
+// explicitly marked as such below. The chunk-related methods persistChunks,
+// dropChunks, loadChunks, and loadChunkDescs can be called concurrently with
+// each other if each call refers to a different fingerprint.
+type persistence struct {
+	basePath string
+
+	archivedFingerprintToMetrics   *index.FingerprintMetricIndex
+	archivedFingerprintToTimeRange *index.FingerprintTimeRangeIndex
+	labelPairToFingerprints        *index.LabelPairFingerprintIndex
+	labelNameToLabelValues         *index.LabelNameLabelValuesIndex
+
+	indexingQueue   chan indexingOp
+	indexingStopped chan struct{}
+	indexingFlush   chan chan int
+
+	indexingQueueLength     prometheus.Gauge
+	indexingQueueCapacity   prometheus.Metric
+	indexingBatchSizes      prometheus.Summary
+	indexingBatchDuration   prometheus.Summary
+	checkpointDuration      prometheus.Summary
+	checkpointLastDuration  prometheus.Gauge
+	checkpointLastSize      prometheus.Gauge
+	checkpointChunksWritten prometheus.Summary
+	dirtyCounter            prometheus.Counter
+	startedDirty            prometheus.Gauge
+	checkpointing           prometheus.Gauge
+	seriesChunksPersisted   prometheus.Histogram
+
+	dirtyMtx       sync.Mutex     // Protects dirty and becameDirty.
+	dirty          bool           // true if persistence was started in dirty state.
+	becameDirty    bool           // true if an inconsistency came up during runtime.
+	pedanticChecks bool           // true if crash recovery should check each series.
+	dirtyFileName  string         // The file used for locking and to mark dirty state.
+	fLock          flock.Releaser // The file lock to protect against concurrent usage.
+
+	shouldSync syncStrategy
+
+	minShrinkRatio float64 // How much a series file has to shrink to justify dropping chunks.
+
+	bufPool sync.Pool
+}
+
+// newPersistence returns a newly allocated persistence backed by local disk storage, ready to use.
+func newPersistence(
+	basePath string,
+	dirty, pedanticChecks bool,
+	shouldSync syncStrategy,
+	minShrinkRatio float64,
+) (*persistence, error) {
+	dirtyPath := filepath.Join(basePath, dirtyFileName)
+	versionPath := filepath.Join(basePath, versionFileName)
+
+	if versionData, err := ioutil.ReadFile(versionPath); err == nil {
+		if persistedVersion, err := strconv.Atoi(strings.TrimSpace(string(versionData))); err != nil {
+			return nil, fmt.Errorf("cannot parse content of %s: %s", versionPath, versionData)
+		} else if persistedVersion != Version {
+			return nil, fmt.Errorf("found storage version %d on disk, need version %d - please wipe storage or run a version of Prometheus compatible with storage version %d", persistedVersion, Version, persistedVersion)
+		}
+	} else if os.IsNotExist(err) {
+		// No version file found. Let's create the directory (in case
+		// it's not there yet) and then check if it is actually
+		// empty. If not, we have found an old storage directory without
+		// version file, so we have to bail out.
+		if err := os.MkdirAll(basePath, 0700); err != nil {
+			if abspath, e := filepath.Abs(basePath); e == nil {
+				return nil, fmt.Errorf("cannot create persistent directory %s: %s", abspath, err)
+			}
+			return nil, fmt.Errorf("cannot create persistent directory %s: %s", basePath, err)
+		}
+		fis, err := ioutil.ReadDir(basePath)
+		if err != nil {
+			return nil, err
+		}
+		filesPresent := len(fis)
+		for i := range fis {
+			switch {
+			case fis[i].Name() == "lost+found" && fis[i].IsDir():
+				filesPresent--
+			case strings.HasPrefix(fis[i].Name(), "."):
+				filesPresent--
+			}
+		}
+		if filesPresent > 0 {
+			return nil, fmt.Errorf("found existing files in storage path that do not look like storage files compatible with this version of Prometheus; please delete the files in the storage path or choose a different storage path")
+		}
+		// Finally we can write our own version into a new version file.
+		file, err := os.Create(versionPath)
+		if err != nil {
+			return nil, err
+		}
+		defer file.Close()
+		if _, err := fmt.Fprintf(file, "%d\n", Version); err != nil {
+			return nil, err
+		}
+	} else {
+		return nil, err
+	}
+
+	fLock, dirtyfileExisted, err := flock.New(dirtyPath)
+	if err != nil {
+		log.Errorf("Could not lock %s, Prometheus already running?", dirtyPath)
+		return nil, err
+	}
+	if dirtyfileExisted {
+		dirty = true
+	}
+
+	archivedFingerprintToMetrics, err := index.NewFingerprintMetricIndex(basePath)
+	if err != nil {
+		// At this point, we could simply blow away the archived
+		// fingerprint-to-metric index. However, then we would lose
+		// _all_ archived metrics. So better give the user an
+		// opportunity to repair the LevelDB with a 3rd party tool.
+		log.Errorf("Could not open the fingerprint-to-metric index for archived series. Please try a 3rd party tool to repair LevelDB in directory %q. If unsuccessful or undesired, delete the whole directory and restart Prometheus for crash recovery. You will lose all archived time series.", filepath.Join(basePath, index.FingerprintToMetricDir))
+		return nil, err
+	}
+	archivedFingerprintToTimeRange, err := index.NewFingerprintTimeRangeIndex(basePath)
+	if err != nil {
+		// We can recover the archived fingerprint-to-timerange index,
+		// so blow it away and set ourselves dirty. Then re-open the now
+		// empty index.
+		if err := index.DeleteFingerprintTimeRangeIndex(basePath); err != nil {
+			return nil, err
+		}
+		dirty = true
+		if archivedFingerprintToTimeRange, err = index.NewFingerprintTimeRangeIndex(basePath); err != nil {
+			return nil, err
+		}
+	}
+
+	p := &persistence{
+		basePath: basePath,
+
+		archivedFingerprintToMetrics:   archivedFingerprintToMetrics,
+		archivedFingerprintToTimeRange: archivedFingerprintToTimeRange,
+
+		indexingQueue:   make(chan indexingOp, indexingQueueCapacity),
+		indexingStopped: make(chan struct{}),
+		indexingFlush:   make(chan chan int),
+
+		indexingQueueLength: prometheus.NewGauge(prometheus.GaugeOpts{
+			Namespace: namespace,
+			Subsystem: subsystem,
+			Name:      "indexing_queue_length",
+			Help:      "The number of metrics waiting to be indexed.",
+		}),
+		indexingQueueCapacity: prometheus.MustNewConstMetric(
+			prometheus.NewDesc(
+				prometheus.BuildFQName(namespace, subsystem, "indexing_queue_capacity"),
+				"The capacity of the indexing queue.",
+				nil, nil,
+			),
+			prometheus.GaugeValue,
+			float64(indexingQueueCapacity),
+		),
+		indexingBatchSizes: prometheus.NewSummary(
+			prometheus.SummaryOpts{
+				Namespace: namespace,
+				Subsystem: subsystem,
+				Name:      "indexing_batch_sizes",
+				Help:      "Quantiles for indexing batch sizes (number of metrics per batch).",
+			},
+		),
+		indexingBatchDuration: prometheus.NewSummary(
+			prometheus.SummaryOpts{
+				Namespace: namespace,
+				Subsystem: subsystem,
+				Name:      "indexing_batch_duration_seconds",
+				Help:      "Quantiles for batch indexing duration in seconds.",
+			},
+		),
+		checkpointLastDuration: prometheus.NewGauge(prometheus.GaugeOpts{
+			Namespace: namespace,
+			Subsystem: subsystem,
+			Name:      "checkpoint_last_duration_seconds",
+			Help:      "The duration in seconds it took to last checkpoint open chunks and chunks yet to be persisted.",
+		}),
+		checkpointDuration: prometheus.NewSummary(prometheus.SummaryOpts{
+			Namespace:  namespace,
+			Subsystem:  subsystem,
+			Objectives: map[float64]float64{},
+			Name:       "checkpoint_duration_seconds",
+			Help:       "The duration in seconds taken for checkpointing open chunks and chunks yet to be persisted",
+		}),
+		checkpointLastSize: prometheus.NewGauge(prometheus.GaugeOpts{
+			Namespace: namespace,
+			Subsystem: subsystem,
+			Name:      "checkpoint_last_size_bytes",
+			Help:      "The size of the last checkpoint of open chunks and chunks yet to be persisted",
+		}),
+		checkpointChunksWritten: prometheus.NewSummary(prometheus.SummaryOpts{
+			Namespace:  namespace,
+			Subsystem:  subsystem,
+			Objectives: map[float64]float64{},
+			Name:       "checkpoint_series_chunks_written",
+			Help:       "The number of chunk written per series while checkpointing open chunks and chunks yet to be persisted.",
+		}),
+		dirtyCounter: prometheus.NewCounter(prometheus.CounterOpts{
+			Namespace: namespace,
+			Subsystem: subsystem,
+			Name:      "inconsistencies_total",
+			Help:      "A counter incremented each time an inconsistency in the local storage is detected. If this is greater zero, restart the server as soon as possible.",
+		}),
+		startedDirty: prometheus.NewGauge(prometheus.GaugeOpts{
+			Namespace: namespace,
+			Subsystem: subsystem,
+			Name:      "started_dirty",
+			Help:      "Whether the local storage was found to be dirty (and crash recovery occurred) during Prometheus startup.",
+		}),
+		checkpointing: prometheus.NewGauge(prometheus.GaugeOpts{
+			Namespace: namespace,
+			Subsystem: subsystem,
+			Name:      "checkpointing",
+			Help:      "1 if the storage is checkpointing, 0 otherwise.",
+		}),
+		seriesChunksPersisted: prometheus.NewHistogram(prometheus.HistogramOpts{
+			Namespace: namespace,
+			Subsystem: subsystem,
+			Name:      "series_chunks_persisted",
+			Help:      "The number of chunks persisted per series.",
+			// Even with 4 bytes per sample, you're not going to get more than 85
+			// chunks in 6 hours for a time series with 1s resolution.
+			Buckets: []float64{1, 2, 4, 8, 16, 32, 64, 128},
+		}),
+		dirty:          dirty,
+		pedanticChecks: pedanticChecks,
+		dirtyFileName:  dirtyPath,
+		fLock:          fLock,
+		shouldSync:     shouldSync,
+		minShrinkRatio: minShrinkRatio,
+		// Create buffers of length 3*chunkLenWithHeader by default because that is still reasonably small
+		// and at the same time enough for many uses. The contract is to never return buffer smaller than
+		// that to the pool so that callers can rely on a minimum buffer size.
+		bufPool: sync.Pool{New: func() interface{} { return make([]byte, 0, 3*chunkLenWithHeader) }},
+	}
+
+	if p.dirty {
+		// Blow away the label indexes. We'll rebuild them later.
+		if err := index.DeleteLabelPairFingerprintIndex(basePath); err != nil {
+			return nil, err
+		}
+		if err := index.DeleteLabelNameLabelValuesIndex(basePath); err != nil {
+			return nil, err
+		}
+	}
+	labelPairToFingerprints, err := index.NewLabelPairFingerprintIndex(basePath)
+	if err != nil {
+		return nil, err
+	}
+	labelNameToLabelValues, err := index.NewLabelNameLabelValuesIndex(basePath)
+	if err != nil {
+		return nil, err
+	}
+	p.labelPairToFingerprints = labelPairToFingerprints
+	p.labelNameToLabelValues = labelNameToLabelValues
+
+	return p, nil
+}
+
+func (p *persistence) run() {
+	p.processIndexingQueue()
+}
+
+// Describe implements prometheus.Collector.
+func (p *persistence) Describe(ch chan<- *prometheus.Desc) {
+	ch <- p.indexingQueueLength.Desc()
+	ch <- p.indexingQueueCapacity.Desc()
+	p.indexingBatchSizes.Describe(ch)
+	p.indexingBatchDuration.Describe(ch)
+	ch <- p.checkpointDuration.Desc()
+	ch <- p.checkpointLastDuration.Desc()
+	ch <- p.checkpointLastSize.Desc()
+	ch <- p.checkpointChunksWritten.Desc()
+	ch <- p.checkpointing.Desc()
+	ch <- p.dirtyCounter.Desc()
+	ch <- p.startedDirty.Desc()
+	ch <- p.seriesChunksPersisted.Desc()
+}
+
+// Collect implements prometheus.Collector.
+func (p *persistence) Collect(ch chan<- prometheus.Metric) {
+	p.indexingQueueLength.Set(float64(len(p.indexingQueue)))
+
+	ch <- p.indexingQueueLength
+	ch <- p.indexingQueueCapacity
+	p.indexingBatchSizes.Collect(ch)
+	p.indexingBatchDuration.Collect(ch)
+	ch <- p.checkpointDuration
+	ch <- p.checkpointLastDuration
+	ch <- p.checkpointLastSize
+	ch <- p.checkpointChunksWritten
+	ch <- p.checkpointing
+	ch <- p.dirtyCounter
+	ch <- p.startedDirty
+	ch <- p.seriesChunksPersisted
+}
+
+// isDirty returns the dirty flag in a goroutine-safe way.
+func (p *persistence) isDirty() bool {
+	p.dirtyMtx.Lock()
+	defer p.dirtyMtx.Unlock()
+	return p.dirty
+}
+
+// setDirty flags the storage as dirty in a goroutine-safe way. The provided
+// error will be logged as a reason the first time the storage is flagged as dirty.
+func (p *persistence) setDirty(err error) {
+	p.dirtyCounter.Inc()
+	p.dirtyMtx.Lock()
+	defer p.dirtyMtx.Unlock()
+	if p.becameDirty {
+		return
+	}
+	p.dirty = true
+	p.becameDirty = true
+	log.With("error", err).Error("The storage is now inconsistent. Restart Prometheus ASAP to initiate recovery.")
+}
+
+// fingerprintsForLabelPair returns the fingerprints for the given label
+// pair. This method is goroutine-safe but take into account that metrics queued
+// for indexing with IndexMetric might not have made it into the index
+// yet. (Same applies correspondingly to UnindexMetric.)
+func (p *persistence) fingerprintsForLabelPair(lp model.LabelPair) model.Fingerprints {
+	fps, _, err := p.labelPairToFingerprints.Lookup(lp)
+	if err != nil {
+		p.setDirty(fmt.Errorf("error in method fingerprintsForLabelPair(%v): %s", lp, err))
+		return nil
+	}
+	return fps
+}
+
+// labelValuesForLabelName returns the label values for the given label
+// name. This method is goroutine-safe but take into account that metrics queued
+// for indexing with IndexMetric might not have made it into the index
+// yet. (Same applies correspondingly to UnindexMetric.)
+func (p *persistence) labelValuesForLabelName(ln model.LabelName) (model.LabelValues, error) {
+	lvs, _, err := p.labelNameToLabelValues.Lookup(ln)
+	if err != nil {
+		p.setDirty(fmt.Errorf("error in method labelValuesForLabelName(%v): %s", ln, err))
+		return nil, err
+	}
+	return lvs, nil
+}
+
+// persistChunks persists a number of consecutive chunks of a series. It is the
+// caller's responsibility to not modify the chunks concurrently and to not
+// persist or drop anything for the same fingerprint concurrently. It returns
+// the (zero-based) index of the first persisted chunk within the series
+// file. In case of an error, the returned index is -1 (to avoid the
+// misconception that the chunk was written at position 0).
+//
+// Returning an error signals problems with the series file. In this case, the
+// caller should quarantine the series.
+func (p *persistence) persistChunks(fp model.Fingerprint, chunks []chunk.Chunk) (index int, err error) {
+	f, err := p.openChunkFileForWriting(fp)
+	if err != nil {
+		return -1, err
+	}
+	defer p.closeChunkFile(f)
+
+	if err := p.writeChunks(f, chunks); err != nil {
+		return -1, err
+	}
+
+	// Determine index within the file.
+	offset, err := f.Seek(0, io.SeekCurrent)
+	if err != nil {
+		return -1, err
+	}
+	index, err = chunkIndexForOffset(offset)
+	if err != nil {
+		return -1, err
+	}
+
+	return index - len(chunks), err
+}
+
+// loadChunks loads a group of chunks of a timeseries by their index. The chunk
+// with the earliest time will have index 0, the following ones will have
+// incrementally larger indexes. The indexOffset denotes the offset to be added to
+// each index in indexes. It is the caller's responsibility to not persist or
+// drop anything for the same fingerprint concurrently.
+func (p *persistence) loadChunks(fp model.Fingerprint, indexes []int, indexOffset int) ([]chunk.Chunk, error) {
+	f, err := p.openChunkFileForReading(fp)
+	if err != nil {
+		return nil, err
+	}
+	defer f.Close()
+
+	chunks := make([]chunk.Chunk, 0, len(indexes))
+	buf := p.bufPool.Get().([]byte)
+	defer func() {
+		// buf may change below. An unwrapped 'defer p.bufPool.Put(buf)'
+		// would only put back the original buf.
+		p.bufPool.Put(buf)
+	}()
+
+	for i := 0; i < len(indexes); i++ {
+		// This loads chunks in batches. A batch is a streak of
+		// consecutive chunks, read from disk in one go.
+		batchSize := 1
+		if _, err := f.Seek(offsetForChunkIndex(indexes[i]+indexOffset), io.SeekStart); err != nil {
+			return nil, err
+		}
+
+		for ; batchSize < chunkMaxBatchSize &&
+			i+1 < len(indexes) &&
+			indexes[i]+1 == indexes[i+1]; i, batchSize = i+1, batchSize+1 {
+		}
+		readSize := batchSize * chunkLenWithHeader
+		if cap(buf) < readSize {
+			buf = make([]byte, readSize)
+		}
+		buf = buf[:readSize]
+
+		if _, err := io.ReadFull(f, buf); err != nil {
+			return nil, err
+		}
+		for c := 0; c < batchSize; c++ {
+			chunk, err := chunk.NewForEncoding(chunk.Encoding(buf[c*chunkLenWithHeader+chunkHeaderTypeOffset]))
+			if err != nil {
+				return nil, err
+			}
+			if err := chunk.UnmarshalFromBuf(buf[c*chunkLenWithHeader+chunkHeaderLen:]); err != nil {
+				return nil, err
+			}
+			chunks = append(chunks, chunk)
+		}
+	}
+	chunk.Ops.WithLabelValues(chunk.Load).Add(float64(len(chunks)))
+	atomic.AddInt64(&chunk.NumMemChunks, int64(len(chunks)))
+	return chunks, nil
+}
+
+// loadChunkDescs loads the chunk.Descs for a series from disk. offsetFromEnd is
+// the number of chunk.Descs to skip from the end of the series file. It is the
+// caller's responsibility to not persist or drop anything for the same
+// fingerprint concurrently.
+func (p *persistence) loadChunkDescs(fp model.Fingerprint, offsetFromEnd int) ([]*chunk.Desc, error) {
+	f, err := p.openChunkFileForReading(fp)
+	if os.IsNotExist(err) {
+		return nil, nil
+	}
+	if err != nil {
+		return nil, err
+	}
+	defer f.Close()
+
+	fi, err := f.Stat()
+	if err != nil {
+		return nil, err
+	}
+	if fi.Size()%int64(chunkLenWithHeader) != 0 {
+		// The returned error will bubble up and lead to quarantining of the whole series.
+		return nil, fmt.Errorf(
+			"size of series file for fingerprint %v is %d, which is not a multiple of the chunk length %d",
+			fp, fi.Size(), chunkLenWithHeader,
+		)
+	}
+
+	numChunks := int(fi.Size())/chunkLenWithHeader - offsetFromEnd
+	cds := make([]*chunk.Desc, numChunks)
+	chunkTimesBuf := make([]byte, 16)
+	for i := 0; i < numChunks; i++ {
+		_, err := f.Seek(offsetForChunkIndex(i)+chunkHeaderFirstTimeOffset, io.SeekStart)
+		if err != nil {
+			return nil, err
+		}
+
+		_, err = io.ReadAtLeast(f, chunkTimesBuf, 16)
+		if err != nil {
+			return nil, err
+		}
+		cds[i] = &chunk.Desc{
+			ChunkFirstTime: model.Time(binary.LittleEndian.Uint64(chunkTimesBuf)),
+			ChunkLastTime:  model.Time(binary.LittleEndian.Uint64(chunkTimesBuf[8:])),
+		}
+	}
+	chunk.DescOps.WithLabelValues(chunk.Load).Add(float64(len(cds)))
+	chunk.NumMemDescs.Add(float64(len(cds)))
+	return cds, nil
+}
+
+// checkpointSeriesMapAndHeads persists the fingerprint to memory-series mapping
+// and all non persisted chunks. Do not call concurrently with
+// loadSeriesMapAndHeads. This method will only write heads format v2, but
+// loadSeriesMapAndHeads can also understand v1.
+//
+// Description of the file format (for both, v1 and v2):
+//
+// (1) Magic string (const headsMagicString).
+//
+// (2) Varint-encoded format version (const headsFormatVersion).
+//
+// (3) Number of series in checkpoint as big-endian uint64.
+//
+// (4) Repeated once per series:
+//
+// (4.1) A flag byte, see flag constants above. (Present but unused in v2.)
+//
+// (4.2) The fingerprint as big-endian uint64.
+//
+// (4.3) The metric as defined by codable.Metric.
+//
+// (4.4) The varint-encoded persistWatermark. (Missing in v1.)
+//
+// (4.5) The modification time of the series file as nanoseconds elapsed since
+// January 1, 1970 UTC. -1 if the modification time is unknown or no series file
+// exists yet. (Missing in v1.)
+//
+// (4.6) The varint-encoded chunkDescsOffset.
+//
+// (4.6) The varint-encoded savedFirstTime.
+//
+// (4.7) The varint-encoded number of chunk descriptors.
+//
+// (4.8) Repeated once per chunk descriptor, oldest to most recent, either
+// variant 4.8.1 (if index < persistWatermark) or variant 4.8.2 (if index >=
+// persistWatermark). In v1, everything is variant 4.8.1 except for a
+// non-persisted head-chunk (determined by the flags).
+//
+// (4.8.1.1) The varint-encoded first time.
+// (4.8.1.2) The varint-encoded last time.
+//
+// (4.8.2.1) A byte defining the chunk type.
+// (4.8.2.2) The chunk itself, marshaled with the Marshal() method.
+//
+// NOTE: Above, varint encoding is used consistently although uvarint would have
+// made more sense in many cases. This was simply a glitch while designing the
+// format.
+func (p *persistence) checkpointSeriesMapAndHeads(
+	ctx context.Context, fingerprintToSeries *seriesMap, fpLocker *fingerprintLocker,
+) (err error) {
+	log.Info("Checkpointing in-memory metrics and chunks...")
+	p.checkpointing.Set(1)
+	defer p.checkpointing.Set(0)
+	begin := time.Now()
+	f, err := os.OpenFile(p.headsTempFileName(), os.O_WRONLY|os.O_TRUNC|os.O_CREATE, 0640)
+	if err != nil {
+		return err
+	}
+
+	defer func() {
+		defer os.Remove(p.headsTempFileName()) // Just in case it was left behind.
+
+		if err != nil {
+			// If we already had an error, do not bother to sync,
+			// just close, ignoring any further error.
+			f.Close()
+			return
+		}
+		syncErr := f.Sync()
+		closeErr := f.Close()
+		err = syncErr
+		if err != nil {
+			return
+		}
+		err = closeErr
+		if err != nil {
+			return
+		}
+		err = os.Rename(p.headsTempFileName(), p.headsFileName())
+		duration := time.Since(begin)
+		p.checkpointDuration.Observe(duration.Seconds())
+		p.checkpointLastDuration.Set(duration.Seconds())
+		log.Infof("Done checkpointing in-memory metrics and chunks in %v.", duration)
+	}()
+
+	w := bufio.NewWriterSize(f, fileBufSize)
+
+	if _, err = w.WriteString(headsMagicString); err != nil {
+		return err
+	}
+	var numberOfSeriesOffset int
+	if numberOfSeriesOffset, err = codable.EncodeVarint(w, headsFormatVersion); err != nil {
+		return err
+	}
+	numberOfSeriesOffset += len(headsMagicString)
+	numberOfSeriesInHeader := uint64(fingerprintToSeries.length())
+	// We have to write the number of series as uint64 because we might need
+	// to overwrite it later, and a varint might change byte width then.
+	if err = codable.EncodeUint64(w, numberOfSeriesInHeader); err != nil {
+		return err
+	}
+
+	iter := fingerprintToSeries.iter()
+	defer func() {
+		// Consume the iterator in any case to not leak goroutines.
+		for range iter {
+		}
+	}()
+
+	var realNumberOfSeries uint64
+	for m := range iter {
+		select {
+		case <-ctx.Done():
+			return ctx.Err()
+		default:
+		}
+		func() { // Wrapped in function to use defer for unlocking the fp.
+			fpLocker.Lock(m.fp)
+			defer fpLocker.Unlock(m.fp)
+
+			chunksToPersist := len(m.series.chunkDescs) - m.series.persistWatermark
+			if len(m.series.chunkDescs) == 0 {
+				// This series was completely purged or archived
+				// in the meantime. Ignore.
+				return
+			}
+			realNumberOfSeries++
+
+			// Sanity checks.
+			if m.series.chunkDescsOffset < 0 && m.series.persistWatermark > 0 {
+				panic("encountered unknown chunk desc offset in combination with positive persist watermark")
+			}
+
+			// These are the values to save in the normal case.
+			var (
+				// persistWatermark is zero as we only checkpoint non-persisted chunks.
+				persistWatermark int64
+				// chunkDescsOffset is shifted by the original persistWatermark for the same reason.
+				chunkDescsOffset = int64(m.series.chunkDescsOffset + m.series.persistWatermark)
+				numChunkDescs    = int64(chunksToPersist)
+			)
+			// However, in the special case of a series being fully
+			// persisted but still in memory (i.e. not archived), we
+			// need to save a "placeholder", for which we use just
+			// the chunk desc of the last chunk. Values have to be
+			// adjusted accordingly. (The reason for doing it in
+			// this weird way is to keep the checkpoint format
+			// compatible with older versions.)
+			if chunksToPersist == 0 {
+				persistWatermark = 1
+				chunkDescsOffset-- // Save one chunk desc after all.
+				numChunkDescs = 1
+			}
+
+			// seriesFlags left empty in v2.
+			if err = w.WriteByte(0); err != nil {
+				return
+			}
+			if err = codable.EncodeUint64(w, uint64(m.fp)); err != nil {
+				return
+			}
+			var buf []byte
+			buf, err = codable.Metric(m.series.metric).MarshalBinary()
+			if err != nil {
+				return
+			}
+			if _, err = w.Write(buf); err != nil {
+				return
+			}
+			if _, err = codable.EncodeVarint(w, persistWatermark); err != nil {
+				return
+			}
+			if m.series.modTime.IsZero() {
+				if _, err = codable.EncodeVarint(w, -1); err != nil {
+					return
+				}
+			} else {
+				if _, err = codable.EncodeVarint(w, m.series.modTime.UnixNano()); err != nil {
+					return
+				}
+			}
+			if _, err = codable.EncodeVarint(w, chunkDescsOffset); err != nil {
+				return
+			}
+			if _, err = codable.EncodeVarint(w, int64(m.series.savedFirstTime)); err != nil {
+				return
+			}
+			if _, err = codable.EncodeVarint(w, numChunkDescs); err != nil {
+				return
+			}
+			if chunksToPersist == 0 {
+				// Save the one placeholder chunk desc for a fully persisted series.
+				chunkDesc := m.series.chunkDescs[len(m.series.chunkDescs)-1]
+				if _, err = codable.EncodeVarint(w, int64(chunkDesc.FirstTime())); err != nil {
+					return
+				}
+				lt, err := chunkDesc.LastTime()
+				if err != nil {
+					return
+				}
+				if _, err = codable.EncodeVarint(w, int64(lt)); err != nil {
+					return
+				}
+			} else {
+				// Save (only) the non-persisted chunks.
+				for _, chunkDesc := range m.series.chunkDescs[m.series.persistWatermark:] {
+					if err = w.WriteByte(byte(chunkDesc.C.Encoding())); err != nil {
+						return
+					}
+					if err = chunkDesc.C.Marshal(w); err != nil {
+						return
+					}
+					p.checkpointChunksWritten.Observe(float64(chunksToPersist))
+				}
+			}
+			// Series is checkpointed now, so declare it clean. In case the entire
+			// checkpoint fails later on, this is fine, as the storage's series
+			// maintenance will mark these series newly dirty again, continuously
+			// increasing the total number of dirty series as seen by the storage.
+			// This has the effect of triggering a new checkpoint attempt even
+			// earlier than if we hadn't incorrectly set "dirty" to "false" here
+			// already.
+			m.series.dirty = false
+		}()
+		if err != nil {
+			return err
+		}
+	}
+	if err = w.Flush(); err != nil {
+		return err
+	}
+	if realNumberOfSeries != numberOfSeriesInHeader {
+		// The number of series has changed in the meantime.
+		// Rewrite it in the header.
+		if _, err = f.Seek(int64(numberOfSeriesOffset), io.SeekStart); err != nil {
+			return err
+		}
+		if err = codable.EncodeUint64(f, realNumberOfSeries); err != nil {
+			return err
+		}
+	}
+	info, err := f.Stat()
+	if err != nil {
+		return err
+	}
+	p.checkpointLastSize.Set(float64(info.Size()))
+	return err
+}
+
+// loadSeriesMapAndHeads loads the fingerprint to memory-series mapping and all
+// the chunks contained in the checkpoint (and thus not yet persisted to series
+// files). The method is capable of loading the checkpoint format v1 and v2. If
+// recoverable corruption is detected, or if the dirty flag was set from the
+// beginning, crash recovery is run, which might take a while. If an
+// unrecoverable error is encountered, it is returned. Call this method during
+// start-up while nothing else is running in storage land. This method is
+// utterly goroutine-unsafe.
+func (p *persistence) loadSeriesMapAndHeads() (sm *seriesMap, chunksToPersist int64, err error) {
+	fingerprintToSeries := make(map[model.Fingerprint]*memorySeries)
+	sm = &seriesMap{m: fingerprintToSeries}
+
+	defer func() {
+		if p.dirty {
+			log.Warn("Persistence layer appears dirty.")
+			p.startedDirty.Set(1)
+			err = p.recoverFromCrash(fingerprintToSeries)
+			if err != nil {
+				sm = nil
+			}
+		} else {
+			p.startedDirty.Set(0)
+		}
+	}()
+
+	hs := newHeadsScanner(p.headsFileName())
+	defer hs.close()
+	for hs.scan() {
+		fingerprintToSeries[hs.fp] = hs.series
+	}
+	if os.IsNotExist(hs.err) {
+		return sm, 0, nil
+	}
+	if hs.err != nil {
+		p.dirty = true
+		log.
+			With("file", p.headsFileName()).
+			With("error", hs.err).
+			Error("Error reading heads file.")
+		return sm, 0, hs.err
+	}
+	return sm, hs.chunksToPersistTotal, nil
+}
+
+// dropAndPersistChunks deletes all chunks from a series file whose last sample
+// time is before beforeTime, and then appends the provided chunks, leaving out
+// those whose last sample time is before beforeTime. It returns the timestamp
+// of the first sample in the oldest chunk _not_ dropped, the chunk offset
+// within the series file of the first chunk persisted (out of the provided
+// chunks, or - if no chunks were provided - the chunk offset where chunks would
+// have been persisted, i.e. the end of the file), the number of deleted chunks,
+// and true if all chunks of the series have been deleted (in which case the
+// returned timestamp will be 0 and must be ignored).  It is the caller's
+// responsibility to make sure nothing is persisted or loaded for the same
+// fingerprint concurrently.
+//
+// Returning an error signals problems with the series file. In this case, the
+// caller should quarantine the series.
+func (p *persistence) dropAndPersistChunks(
+	fp model.Fingerprint, beforeTime model.Time, chunks []chunk.Chunk,
+) (
+	firstTimeNotDropped model.Time,
+	offset int,
+	numDropped int,
+	allDropped bool,
+	err error,
+) {
+	// Style note: With the many return values, it was decided to use naked
+	// returns in this method. They make the method more readable, but
+	// please handle with care!
+	if len(chunks) > 0 {
+		// We have chunks to persist. First check if those are already
+		// too old. If that's the case, the chunks in the series file
+		// are all too old, too.
+		i := 0
+		for ; i < len(chunks); i++ {
+			var lt model.Time
+			lt, err = chunks[i].NewIterator().LastTimestamp()
+			if err != nil {
+				return
+			}
+			if !lt.Before(beforeTime) {
+				break
+			}
+		}
+		if i < len(chunks) {
+			firstTimeNotDropped = chunks[i].FirstTime()
+		}
+		if i > 0 || firstTimeNotDropped.Before(beforeTime) {
+			// Series file has to go.
+			if numDropped, err = p.deleteSeriesFile(fp); err != nil {
+				return
+			}
+			numDropped += i
+			if i == len(chunks) {
+				allDropped = true
+				return
+			}
+			// Now simply persist what has to be persisted to a new file.
+			_, err = p.persistChunks(fp, chunks[i:])
+			return
+		}
+	}
+
+	// If we are here, we have to check the series file itself.
+	f, err := p.openChunkFileForReading(fp)
+	if os.IsNotExist(err) {
+		// No series file. Only need to create new file with chunks to
+		// persist, if there are any.
+		if len(chunks) == 0 {
+			allDropped = true
+			err = nil // Do not report not-exist err.
+			return
+		}
+		offset, err = p.persistChunks(fp, chunks)
+		return
+	}
+	if err != nil {
+		return
+	}
+	defer f.Close()
+
+	fi, err := f.Stat()
+	if err != nil {
+		return
+	}
+	chunksInFile := int(fi.Size()) / chunkLenWithHeader
+	totalChunks := chunksInFile + len(chunks)
+
+	// Calculate chunk index from minShrinkRatio, to skip unnecessary chunk header reading.
+	chunkIndexToStartSeek := 0
+	if p.minShrinkRatio < 1 {
+		chunkIndexToStartSeek = int(math.Floor(float64(totalChunks) * p.minShrinkRatio))
+	}
+	if chunkIndexToStartSeek >= chunksInFile {
+		chunkIndexToStartSeek = chunksInFile - 1
+	}
+	numDropped = chunkIndexToStartSeek
+
+	headerBuf := make([]byte, chunkHeaderLen)
+	// Find the first chunk in the file that should be kept.
+	for ; ; numDropped++ {
+		_, err = f.Seek(offsetForChunkIndex(numDropped), io.SeekStart)
+		if err != nil {
+			return
+		}
+		_, err = io.ReadFull(f, headerBuf)
+		if err == io.EOF {
+			// Close the file before trying to delete it. This is necessary on Windows
+			// (this will cause the defer f.Close to fail, but the error is silently ignored)
+			f.Close()
+			// We ran into the end of the file without finding any chunks that should
+			// be kept. Remove the whole file.
+			if numDropped, err = p.deleteSeriesFile(fp); err != nil {
+				return
+			}
+			if len(chunks) == 0 {
+				allDropped = true
+				return
+			}
+			offset, err = p.persistChunks(fp, chunks)
+			return
+		}
+		if err != nil {
+			return
+		}
+		lastTime := model.Time(
+			binary.LittleEndian.Uint64(headerBuf[chunkHeaderLastTimeOffset:]),
+		)
+		if !lastTime.Before(beforeTime) {
+			break
+		}
+	}
+
+	// If numDropped isn't incremented, the minShrinkRatio condition isn't satisfied.
+	if numDropped == chunkIndexToStartSeek {
+		// Nothing to drop. Just adjust the return values and append the chunks (if any).
+		numDropped = 0
+		_, err = f.Seek(offsetForChunkIndex(0), io.SeekStart)
+		if err != nil {
+			return
+		}
+		_, err = io.ReadFull(f, headerBuf)
+		if err != nil {
+			return
+		}
+		firstTimeNotDropped = model.Time(
+			binary.LittleEndian.Uint64(headerBuf[chunkHeaderFirstTimeOffset:]),
+		)
+		if len(chunks) > 0 {
+			offset, err = p.persistChunks(fp, chunks)
+		} else {
+			offset = chunksInFile
+		}
+		return
+	}
+	// If we are here, we have to drop some chunks for real. So we need to
+	// record firstTimeNotDropped from the last read header, seek backwards
+	// to the beginning of its header, and start copying everything from
+	// there into a new file. Then append the chunks to the new file.
+	firstTimeNotDropped = model.Time(
+		binary.LittleEndian.Uint64(headerBuf[chunkHeaderFirstTimeOffset:]),
+	)
+	chunk.Ops.WithLabelValues(chunk.Drop).Add(float64(numDropped))
+	_, err = f.Seek(-chunkHeaderLen, io.SeekCurrent)
+	if err != nil {
+		return
+	}
+
+	temp, err := os.OpenFile(p.tempFileNameForFingerprint(fp), os.O_WRONLY|os.O_CREATE, 0640)
+	if err != nil {
+		return
+	}
+	defer func() {
+		// Close the file before trying to rename to it. This is necessary on Windows
+		// (this will cause the defer f.Close to fail, but the error is silently ignored)
+		f.Close()
+		p.closeChunkFile(temp)
+		if err == nil {
+			err = os.Rename(p.tempFileNameForFingerprint(fp), p.fileNameForFingerprint(fp))
+		}
+	}()
+
+	written, err := io.Copy(temp, f)
+	if err != nil {
+		return
+	}
+	offset = int(written / chunkLenWithHeader)
+
+	if len(chunks) > 0 {
+		if err = p.writeChunks(temp, chunks); err != nil {
+			return
+		}
+	}
+	return
+}
+
+// deleteSeriesFile deletes a series file belonging to the provided
+// fingerprint. It returns the number of chunks that were contained in the
+// deleted file.
+func (p *persistence) deleteSeriesFile(fp model.Fingerprint) (int, error) {
+	fname := p.fileNameForFingerprint(fp)
+	fi, err := os.Stat(fname)
+	if os.IsNotExist(err) {
+		// Great. The file is already gone.
+		return 0, nil
+	}
+	if err != nil {
+		return -1, err
+	}
+	numChunks := int(fi.Size() / chunkLenWithHeader)
+	if err := os.Remove(fname); err != nil {
+		return -1, err
+	}
+	chunk.Ops.WithLabelValues(chunk.Drop).Add(float64(numChunks))
+	return numChunks, nil
+}
+
+// quarantineSeriesFile moves a series file to the orphaned directory. It also
+// writes a hint file with the provided quarantine reason and, if series is
+// non-nil, the string representation of the metric.
+func (p *persistence) quarantineSeriesFile(fp model.Fingerprint, quarantineReason error, metric model.Metric) error {
+	var (
+		oldName     = p.fileNameForFingerprint(fp)
+		orphanedDir = filepath.Join(p.basePath, "orphaned", filepath.Base(filepath.Dir(oldName)))
+		newName     = filepath.Join(orphanedDir, filepath.Base(oldName))
+		hintName    = newName[:len(newName)-len(seriesFileSuffix)] + hintFileSuffix
+	)
+
+	renameErr := os.MkdirAll(orphanedDir, 0700)
+	if renameErr != nil {
+		return renameErr
+	}
+	renameErr = os.Rename(oldName, newName)
+	if os.IsNotExist(renameErr) {
+		// Source file dosn't exist. That's normal.
+		renameErr = nil
+	}
+	// Write hint file even if the rename ended in an error. At least try...
+	// And ignore errors writing the hint file. It's best effort.
+	if f, err := os.Create(hintName); err == nil {
+		if metric != nil {
+			f.WriteString(metric.String() + "\n")
+		} else {
+			f.WriteString("[UNKNOWN METRIC]\n")
+		}
+		if quarantineReason != nil {
+			f.WriteString(quarantineReason.Error() + "\n")
+		} else {
+			f.WriteString("[UNKNOWN REASON]\n")
+		}
+		f.Close()
+	}
+	return renameErr
+}
+
+// seriesFileModTime returns the modification time of the series file belonging
+// to the provided fingerprint. In case of an error, the zero value of time.Time
+// is returned.
+func (p *persistence) seriesFileModTime(fp model.Fingerprint) time.Time {
+	var modTime time.Time
+	if fi, err := os.Stat(p.fileNameForFingerprint(fp)); err == nil {
+		return fi.ModTime()
+	}
+	return modTime
+}
+
+// indexMetric queues the given metric for addition to the indexes needed by
+// fingerprintsForLabelPair, labelValuesForLabelName, and
+// fingerprintsModifiedBefore.  If the queue is full, this method blocks until
+// the metric can be queued.  This method is goroutine-safe.
+func (p *persistence) indexMetric(fp model.Fingerprint, m model.Metric) {
+	p.indexingQueue <- indexingOp{fp, m, add}
+}
+
+// unindexMetric queues references to the given metric for removal from the
+// indexes used for fingerprintsForLabelPair, labelValuesForLabelName, and
+// fingerprintsModifiedBefore. The index of fingerprints to archived metrics is
+// not affected by this removal. (In fact, never call this method for an
+// archived metric. To purge an archived metric, call purgeArchivedMetric.)
+// If the queue is full, this method blocks until the metric can be queued. This
+// method is goroutine-safe.
+func (p *persistence) unindexMetric(fp model.Fingerprint, m model.Metric) {
+	p.indexingQueue <- indexingOp{fp, m, remove}
+}
+
+// waitForIndexing waits until all items in the indexing queue are processed. If
+// queue processing is currently on hold (to gather more ops for batching), this
+// method will trigger an immediate start of processing. This method is
+// goroutine-safe.
+func (p *persistence) waitForIndexing() {
+	wait := make(chan int)
+	for {
+		p.indexingFlush <- wait
+		if <-wait == 0 {
+			break
+		}
+	}
+}
+
+// archiveMetric persists the mapping of the given fingerprint to the given
+// metric, together with the first and last timestamp of the series belonging to
+// the metric. The caller must have locked the fingerprint.
+func (p *persistence) archiveMetric(
+	fp model.Fingerprint, m model.Metric, first, last model.Time,
+) {
+	if err := p.archivedFingerprintToMetrics.Put(codable.Fingerprint(fp), codable.Metric(m)); err != nil {
+		p.setDirty(fmt.Errorf("error in method archiveMetric inserting fingerprint %v into FingerprintToMetrics: %s", fp, err))
+		return
+	}
+	if err := p.archivedFingerprintToTimeRange.Put(codable.Fingerprint(fp), codable.TimeRange{First: first, Last: last}); err != nil {
+		p.setDirty(fmt.Errorf("error in method archiveMetric inserting fingerprint %v into FingerprintToTimeRange: %s", fp, err))
+	}
+}
+
+// hasArchivedMetric returns whether the archived metric for the given
+// fingerprint exists and if yes, what the first and last timestamp in the
+// corresponding series is. This method is goroutine-safe.
+func (p *persistence) hasArchivedMetric(fp model.Fingerprint) (
+	hasMetric bool, firstTime, lastTime model.Time,
+) {
+	firstTime, lastTime, hasMetric, err := p.archivedFingerprintToTimeRange.Lookup(fp)
+	if err != nil {
+		p.setDirty(fmt.Errorf("error in method hasArchivedMetric(%v): %s", fp, err))
+		hasMetric = false
+	}
+	return hasMetric, firstTime, lastTime
+}
+
+// updateArchivedTimeRange updates an archived time range. The caller must make
+// sure that the fingerprint is currently archived (the time range will
+// otherwise be added without the corresponding metric in the archive).
+func (p *persistence) updateArchivedTimeRange(
+	fp model.Fingerprint, first, last model.Time,
+) error {
+	return p.archivedFingerprintToTimeRange.Put(codable.Fingerprint(fp), codable.TimeRange{First: first, Last: last})
+}
+
+// fingerprintsModifiedBefore returns the fingerprints of archived timeseries
+// that have live samples before the provided timestamp. This method is
+// goroutine-safe.
+func (p *persistence) fingerprintsModifiedBefore(beforeTime model.Time) ([]model.Fingerprint, error) {
+	var fp codable.Fingerprint
+	var tr codable.TimeRange
+	fps := []model.Fingerprint{}
+	err := p.archivedFingerprintToTimeRange.ForEach(func(kv index.KeyValueAccessor) error {
+		if err := kv.Value(&tr); err != nil {
+			return err
+		}
+		if tr.First.Before(beforeTime) {
+			if err := kv.Key(&fp); err != nil {
+				return err
+			}
+			fps = append(fps, model.Fingerprint(fp))
+		}
+		return nil
+	})
+	return fps, err
+}
+
+// archivedMetric retrieves the archived metric with the given fingerprint. This
+// method is goroutine-safe.
+func (p *persistence) archivedMetric(fp model.Fingerprint) (model.Metric, error) {
+	metric, _, err := p.archivedFingerprintToMetrics.Lookup(fp)
+	if err != nil {
+		p.setDirty(fmt.Errorf("error in method archivedMetric(%v): %s", fp, err))
+		return nil, err
+	}
+	return metric, nil
+}
+
+// purgeArchivedMetric deletes an archived fingerprint and its corresponding
+// metric entirely. It also queues the metric for un-indexing (no need to call
+// unindexMetric for the deleted metric.) It does not touch the series file,
+// though. The caller must have locked the fingerprint.
+func (p *persistence) purgeArchivedMetric(fp model.Fingerprint) (err error) {
+	defer func() {
+		if err != nil {
+			p.setDirty(fmt.Errorf("error in method purgeArchivedMetric(%v): %s", fp, err))
+		}
+	}()
+
+	metric, err := p.archivedMetric(fp)
+	if err != nil || metric == nil {
+		return err
+	}
+	deleted, err := p.archivedFingerprintToMetrics.Delete(codable.Fingerprint(fp))
+	if err != nil {
+		return err
+	}
+	if !deleted {
+		log.Errorf("Tried to delete non-archived fingerprint %s from archivedFingerprintToMetrics index. This should never happen.", fp)
+	}
+	deleted, err = p.archivedFingerprintToTimeRange.Delete(codable.Fingerprint(fp))
+	if err != nil {
+		return err
+	}
+	if !deleted {
+		log.Errorf("Tried to delete non-archived fingerprint %s from archivedFingerprintToTimeRange index. This should never happen.", fp)
+	}
+	p.unindexMetric(fp, metric)
+	return nil
+}
+
+// unarchiveMetric deletes an archived fingerprint and its metric, but (in
+// contrast to purgeArchivedMetric) does not un-index the metric.  If a metric
+// was actually deleted, the method returns true and the first time and last
+// time of the deleted metric. The caller must have locked the fingerprint.
+func (p *persistence) unarchiveMetric(fp model.Fingerprint) (deletedAnything bool, err error) {
+	// An error returned here will bubble up and lead to quarantining of the
+	// series, so no setDirty required.
+	deleted, err := p.archivedFingerprintToMetrics.Delete(codable.Fingerprint(fp))
+	if err != nil || !deleted {
+		return false, err
+	}
+	deleted, err = p.archivedFingerprintToTimeRange.Delete(codable.Fingerprint(fp))
+	if err != nil {
+		return false, err
+	}
+	if !deleted {
+		log.Errorf("Tried to delete non-archived fingerprint %s from archivedFingerprintToTimeRange index. This should never happen.", fp)
+	}
+	return true, nil
+}
+
+// close flushes the indexing queue and other buffered data and releases any
+// held resources. It also removes the dirty marker file if successful and if
+// the persistence is currently not marked as dirty.
+func (p *persistence) close() error {
+	close(p.indexingQueue)
+	<-p.indexingStopped
+
+	var lastError, dirtyFileRemoveError error
+	if err := p.archivedFingerprintToMetrics.Close(); err != nil {
+		lastError = err
+		log.Error("Error closing archivedFingerprintToMetric index DB: ", err)
+	}
+	if err := p.archivedFingerprintToTimeRange.Close(); err != nil {
+		lastError = err
+		log.Error("Error closing archivedFingerprintToTimeRange index DB: ", err)
+	}
+	if err := p.labelPairToFingerprints.Close(); err != nil {
+		lastError = err
+		log.Error("Error closing labelPairToFingerprints index DB: ", err)
+	}
+	if err := p.labelNameToLabelValues.Close(); err != nil {
+		lastError = err
+		log.Error("Error closing labelNameToLabelValues index DB: ", err)
+	}
+	if lastError == nil && !p.isDirty() {
+		dirtyFileRemoveError = os.Remove(p.dirtyFileName)
+	}
+	if err := p.fLock.Release(); err != nil {
+		lastError = err
+		log.Error("Error releasing file lock: ", err)
+	}
+	if dirtyFileRemoveError != nil {
+		// On Windows, removing the dirty file before unlocking is not
+		// possible.  So remove it here if it failed above.
+		lastError = os.Remove(p.dirtyFileName)
+	}
+	return lastError
+}
+
+func (p *persistence) dirNameForFingerprint(fp model.Fingerprint) string {
+	fpStr := fp.String()
+	return filepath.Join(p.basePath, fpStr[0:seriesDirNameLen])
+}
+
+func (p *persistence) fileNameForFingerprint(fp model.Fingerprint) string {
+	fpStr := fp.String()
+	return filepath.Join(p.basePath, fpStr[0:seriesDirNameLen], fpStr[seriesDirNameLen:]+seriesFileSuffix)
+}
+
+func (p *persistence) tempFileNameForFingerprint(fp model.Fingerprint) string {
+	fpStr := fp.String()
+	return filepath.Join(p.basePath, fpStr[0:seriesDirNameLen], fpStr[seriesDirNameLen:]+seriesTempFileSuffix)
+}
+
+func (p *persistence) openChunkFileForWriting(fp model.Fingerprint) (*os.File, error) {
+	if err := os.MkdirAll(p.dirNameForFingerprint(fp), 0700); err != nil {
+		return nil, err
+	}
+	return os.OpenFile(p.fileNameForFingerprint(fp), os.O_WRONLY|os.O_APPEND|os.O_CREATE, 0640)
+	// NOTE: Although the file was opened for append,
+	//     f.Seek(0, io.SeekCurrent)
+	// would now return '0, nil', so we cannot check for a consistent file length right now.
+	// However, the chunkIndexForOffset function is doing that check, so a wrong file length
+	// would still be detected.
+}
+
+// closeChunkFile first syncs the provided file if mandated so by the sync
+// strategy. Then it closes the file. Errors are logged.
+func (p *persistence) closeChunkFile(f *os.File) {
+	if p.shouldSync() {
+		if err := f.Sync(); err != nil {
+			log.Error("Error syncing file:", err)
+		}
+	}
+	if err := f.Close(); err != nil {
+		log.Error("Error closing chunk file:", err)
+	}
+}
+
+func (p *persistence) openChunkFileForReading(fp model.Fingerprint) (*os.File, error) {
+	return os.Open(p.fileNameForFingerprint(fp))
+}
+
+func (p *persistence) headsFileName() string {
+	return filepath.Join(p.basePath, headsFileName)
+}
+
+func (p *persistence) headsTempFileName() string {
+	return filepath.Join(p.basePath, headsTempFileName)
+}
+
+func (p *persistence) mappingsFileName() string {
+	return filepath.Join(p.basePath, mappingsFileName)
+}
+
+func (p *persistence) mappingsTempFileName() string {
+	return filepath.Join(p.basePath, mappingsTempFileName)
+}
+
+func (p *persistence) processIndexingQueue() {
+	batchSize := 0
+	nameToValues := index.LabelNameLabelValuesMapping{}
+	pairToFPs := index.LabelPairFingerprintsMapping{}
+	batchTimeout := time.NewTimer(indexingBatchTimeout)
+	defer batchTimeout.Stop()
+
+	commitBatch := func() {
+		p.indexingBatchSizes.Observe(float64(batchSize))
+		defer func(begin time.Time) {
+			p.indexingBatchDuration.Observe(time.Since(begin).Seconds())
+		}(time.Now())
+
+		if err := p.labelPairToFingerprints.IndexBatch(pairToFPs); err != nil {
+			log.Error("Error indexing label pair to fingerprints batch: ", err)
+			p.setDirty(err)
+		}
+		if err := p.labelNameToLabelValues.IndexBatch(nameToValues); err != nil {
+			log.Error("Error indexing label name to label values batch: ", err)
+			p.setDirty(err)
+		}
+		batchSize = 0
+		nameToValues = index.LabelNameLabelValuesMapping{}
+		pairToFPs = index.LabelPairFingerprintsMapping{}
+		batchTimeout.Reset(indexingBatchTimeout)
+	}
+
+	var flush chan chan int
+loop:
+	for {
+		// Only process flush requests if the queue is currently empty.
+		if len(p.indexingQueue) == 0 {
+			flush = p.indexingFlush
+		} else {
+			flush = nil
+		}
+		select {
+		case <-batchTimeout.C:
+			// Only commit if we have something to commit _and_
+			// nothing is waiting in the queue to be picked up. That
+			// prevents a death spiral if the LookupSet calls below
+			// are slow for some reason.
+			if batchSize > 0 && len(p.indexingQueue) == 0 {
+				commitBatch()
+			} else {
+				batchTimeout.Reset(indexingBatchTimeout)
+			}
+		case r := <-flush:
+			if batchSize > 0 {
+				commitBatch()
+			}
+			r <- len(p.indexingQueue)
+		case op, ok := <-p.indexingQueue:
+			if !ok {
+				if batchSize > 0 {
+					commitBatch()
+				}
+				break loop
+			}
+
+			batchSize++
+			for ln, lv := range op.metric {
+				lp := model.LabelPair{Name: ln, Value: lv}
+				baseFPs, ok := pairToFPs[lp]
+				if !ok {
+					var err error
+					baseFPs, _, err = p.labelPairToFingerprints.LookupSet(lp)
+					if err != nil {
+						log.Errorf("Error looking up label pair %v: %s", lp, err)
+						continue
+					}
+					pairToFPs[lp] = baseFPs
+				}
+				baseValues, ok := nameToValues[ln]
+				if !ok {
+					var err error
+					baseValues, _, err = p.labelNameToLabelValues.LookupSet(ln)
+					if err != nil {
+						log.Errorf("Error looking up label name %v: %s", ln, err)
+						continue
+					}
+					nameToValues[ln] = baseValues
+				}
+				switch op.opType {
+				case add:
+					baseFPs[op.fingerprint] = struct{}{}
+					baseValues[lv] = struct{}{}
+				case remove:
+					delete(baseFPs, op.fingerprint)
+					if len(baseFPs) == 0 {
+						delete(baseValues, lv)
+					}
+				default:
+					panic("unknown op type")
+				}
+			}
+
+			if batchSize >= indexingMaxBatchSize {
+				commitBatch()
+			}
+		}
+	}
+	close(p.indexingStopped)
+}
+
+// checkpointFPMappings persists the fingerprint mappings. The caller has to
+// ensure that the provided mappings are not changed concurrently. This method
+// is only called upon shutdown or during crash recovery, when no samples are
+// ingested.
+//
+// Description of the file format, v1:
+//
+// (1) Magic string (const mappingsMagicString).
+//
+// (2) Uvarint-encoded format version (const mappingsFormatVersion).
+//
+// (3) Uvarint-encoded number of mappings in fpMappings.
+//
+// (4) Repeated once per mapping:
+//
+// (4.1) The raw fingerprint as big-endian uint64.
+//
+// (4.2) The uvarint-encoded number of sub-mappings for the raw fingerprint.
+//
+// (4.3) Repeated once per sub-mapping:
+//
+// (4.3.1) The uvarint-encoded length of the unique metric string.
+// (4.3.2) The unique metric string.
+// (4.3.3) The mapped fingerprint as big-endian uint64.
+func (p *persistence) checkpointFPMappings(fpm fpMappings) (err error) {
+	log.Info("Checkpointing fingerprint mappings...")
+	begin := time.Now()
+	f, err := os.OpenFile(p.mappingsTempFileName(), os.O_WRONLY|os.O_TRUNC|os.O_CREATE, 0640)
+	if err != nil {
+		return
+	}
+
+	defer func() {
+		syncErr := f.Sync()
+		closeErr := f.Close()
+		if err != nil {
+			return
+		}
+		err = syncErr
+		if err != nil {
+			return
+		}
+		err = closeErr
+		if err != nil {
+			return
+		}
+		err = os.Rename(p.mappingsTempFileName(), p.mappingsFileName())
+		duration := time.Since(begin)
+		log.Infof("Done checkpointing fingerprint mappings in %v.", duration)
+	}()
+
+	w := bufio.NewWriterSize(f, fileBufSize)
+
+	if _, err = w.WriteString(mappingsMagicString); err != nil {
+		return
+	}
+	if _, err = codable.EncodeUvarint(w, mappingsFormatVersion); err != nil {
+		return
+	}
+	if _, err = codable.EncodeUvarint(w, uint64(len(fpm))); err != nil {
+		return
+	}
+
+	for fp, mappings := range fpm {
+		if err = codable.EncodeUint64(w, uint64(fp)); err != nil {
+			return
+		}
+		if _, err = codable.EncodeUvarint(w, uint64(len(mappings))); err != nil {
+			return
+		}
+		for ms, mappedFP := range mappings {
+			if _, err = codable.EncodeUvarint(w, uint64(len(ms))); err != nil {
+				return
+			}
+			if _, err = w.WriteString(ms); err != nil {
+				return
+			}
+			if err = codable.EncodeUint64(w, uint64(mappedFP)); err != nil {
+				return
+			}
+		}
+	}
+	err = w.Flush()
+	return
+}
+
+// loadFPMappings loads the fingerprint mappings. It also returns the highest
+// mapped fingerprint and any error encountered. If p.mappingsFileName is not
+// found, the method returns (fpMappings{}, 0, nil). Do not call concurrently
+// with checkpointFPMappings.
+func (p *persistence) loadFPMappings() (fpMappings, model.Fingerprint, error) {
+	fpm := fpMappings{}
+	var highestMappedFP model.Fingerprint
+
+	f, err := os.Open(p.mappingsFileName())
+	if os.IsNotExist(err) {
+		return fpm, 0, nil
+	}
+	if err != nil {
+		return nil, 0, err
+	}
+	defer f.Close()
+	r := bufio.NewReaderSize(f, fileBufSize)
+
+	buf := make([]byte, len(mappingsMagicString))
+	if _, err := io.ReadFull(r, buf); err != nil {
+		return nil, 0, err
+	}
+	magic := string(buf)
+	if magic != mappingsMagicString {
+		return nil, 0, fmt.Errorf(
+			"unexpected magic string, want %q, got %q",
+			mappingsMagicString, magic,
+		)
+	}
+	version, err := binary.ReadUvarint(r)
+	if version != mappingsFormatVersion || err != nil {
+		return nil, 0, fmt.Errorf("unknown fingerprint mappings format version, want %d", mappingsFormatVersion)
+	}
+	numRawFPs, err := binary.ReadUvarint(r)
+	if err != nil {
+		return nil, 0, err
+	}
+	for ; numRawFPs > 0; numRawFPs-- {
+		rawFP, err := codable.DecodeUint64(r)
+		if err != nil {
+			return nil, 0, err
+		}
+		numMappings, err := binary.ReadUvarint(r)
+		if err != nil {
+			return nil, 0, err
+		}
+		mappings := make(map[string]model.Fingerprint, numMappings)
+		for ; numMappings > 0; numMappings-- {
+			lenMS, err := binary.ReadUvarint(r)
+			if err != nil {
+				return nil, 0, err
+			}
+			buf := make([]byte, lenMS)
+			if _, err := io.ReadFull(r, buf); err != nil {
+				return nil, 0, err
+			}
+			fp, err := codable.DecodeUint64(r)
+			if err != nil {
+				return nil, 0, err
+			}
+			mappedFP := model.Fingerprint(fp)
+			if mappedFP > highestMappedFP {
+				highestMappedFP = mappedFP
+			}
+			mappings[string(buf)] = mappedFP
+		}
+		fpm[model.Fingerprint(rawFP)] = mappings
+	}
+	return fpm, highestMappedFP, nil
+}
+
+func (p *persistence) writeChunks(w io.Writer, chunks []chunk.Chunk) error {
+	b := p.bufPool.Get().([]byte)
+	defer func() {
+		// buf may change below. An unwrapped 'defer p.bufPool.Put(buf)'
+		// would only put back the original buf.
+		p.bufPool.Put(b)
+	}()
+	numChunks := len(chunks)
+
+	for batchSize := chunkMaxBatchSize; len(chunks) > 0; chunks = chunks[batchSize:] {
+		if batchSize > len(chunks) {
+			batchSize = len(chunks)
+		}
+		writeSize := batchSize * chunkLenWithHeader
+		if cap(b) < writeSize {
+			b = make([]byte, writeSize)
+		}
+		b = b[:writeSize]
+
+		for i, chunk := range chunks[:batchSize] {
+			if err := writeChunkHeader(b[i*chunkLenWithHeader:], chunk); err != nil {
+				return err
+			}
+			if err := chunk.MarshalToBuf(b[i*chunkLenWithHeader+chunkHeaderLen:]); err != nil {
+				return err
+			}
+		}
+		if _, err := w.Write(b); err != nil {
+			return err
+		}
+	}
+	p.seriesChunksPersisted.Observe(float64(numChunks))
+	return nil
+}
+
+func offsetForChunkIndex(i int) int64 {
+	return int64(i * chunkLenWithHeader)
+}
+
+func chunkIndexForOffset(offset int64) (int, error) {
+	if int(offset)%chunkLenWithHeader != 0 {
+		return -1, fmt.Errorf(
+			"offset %d is not a multiple of on-disk chunk length %d",
+			offset, chunkLenWithHeader,
+		)
+	}
+	return int(offset) / chunkLenWithHeader, nil
+}
+
+func writeChunkHeader(header []byte, c chunk.Chunk) error {
+	header[chunkHeaderTypeOffset] = byte(c.Encoding())
+	binary.LittleEndian.PutUint64(
+		header[chunkHeaderFirstTimeOffset:],
+		uint64(c.FirstTime()),
+	)
+	lt, err := c.NewIterator().LastTimestamp()
+	if err != nil {
+		return err
+	}
+	binary.LittleEndian.PutUint64(
+		header[chunkHeaderLastTimeOffset:],
+		uint64(lt),
+	)
+	return nil
+}
diff --git a/vendor/github.com/prometheus/prometheus/storage/local/series.go b/vendor/github.com/prometheus/prometheus/storage/local/series.go
new file mode 100644
index 000000000..f58371746
--- /dev/null
+++ b/vendor/github.com/prometheus/prometheus/storage/local/series.go
@@ -0,0 +1,728 @@
+// Copyright 2014 The Prometheus Authors
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package local
+
+import (
+	"fmt"
+	"sort"
+	"sync"
+	"time"
+
+	"github.com/prometheus/common/model"
+
+	"github.com/prometheus/prometheus/storage/local/chunk"
+	"github.com/prometheus/prometheus/storage/metric"
+)
+
+// fingerprintSeriesPair pairs a fingerprint with a memorySeries pointer.
+type fingerprintSeriesPair struct {
+	fp     model.Fingerprint
+	series *memorySeries
+}
+
+// seriesMap maps fingerprints to memory series. All its methods are
+// goroutine-safe. A SeriesMap is effectively is a goroutine-safe version of
+// map[model.Fingerprint]*memorySeries.
+type seriesMap struct {
+	mtx sync.RWMutex
+	m   map[model.Fingerprint]*memorySeries
+}
+
+// newSeriesMap returns a newly allocated empty seriesMap. To create a seriesMap
+// based on a prefilled map, use an explicit initializer.
+func newSeriesMap() *seriesMap {
+	return &seriesMap{m: make(map[model.Fingerprint]*memorySeries)}
+}
+
+// length returns the number of mappings in the seriesMap.
+func (sm *seriesMap) length() int {
+	sm.mtx.RLock()
+	defer sm.mtx.RUnlock()
+
+	return len(sm.m)
+}
+
+// get returns a memorySeries for a fingerprint. Return values have the same
+// semantics as the native Go map.
+func (sm *seriesMap) get(fp model.Fingerprint) (s *memorySeries, ok bool) {
+	sm.mtx.RLock()
+	s, ok = sm.m[fp]
+	// Note that the RUnlock is not done via defer for performance reasons.
+	// TODO(beorn7): Once https://github.com/golang/go/issues/14939 is
+	// fixed, revert to the usual defer idiom.
+	sm.mtx.RUnlock()
+	return
+}
+
+// put adds a mapping to the seriesMap. It panics if s == nil.
+func (sm *seriesMap) put(fp model.Fingerprint, s *memorySeries) {
+	sm.mtx.Lock()
+	defer sm.mtx.Unlock()
+
+	if s == nil {
+		panic("tried to add nil pointer to seriesMap")
+	}
+	sm.m[fp] = s
+}
+
+// del removes a mapping from the series Map.
+func (sm *seriesMap) del(fp model.Fingerprint) {
+	sm.mtx.Lock()
+	defer sm.mtx.Unlock()
+
+	delete(sm.m, fp)
+}
+
+// iter returns a channel that produces all mappings in the seriesMap. The
+// channel will be closed once all fingerprints have been received. Not
+// consuming all fingerprints from the channel will leak a goroutine. The
+// semantics of concurrent modification of seriesMap is the similar as the one
+// for iterating over a map with a 'range' clause. However, if the next element
+// in iteration order is removed after the current element has been received
+// from the channel, it will still be produced by the channel.
+func (sm *seriesMap) iter() <-chan fingerprintSeriesPair {
+	ch := make(chan fingerprintSeriesPair)
+	go func() {
+		sm.mtx.RLock()
+		for fp, s := range sm.m {
+			sm.mtx.RUnlock()
+			ch <- fingerprintSeriesPair{fp, s}
+			sm.mtx.RLock()
+		}
+		sm.mtx.RUnlock()
+		close(ch)
+	}()
+	return ch
+}
+
+// sortedFPs returns a sorted slice of all the fingerprints in the seriesMap.
+func (sm *seriesMap) sortedFPs() model.Fingerprints {
+	sm.mtx.RLock()
+	fps := make(model.Fingerprints, 0, len(sm.m))
+	for fp := range sm.m {
+		fps = append(fps, fp)
+	}
+	sm.mtx.RUnlock()
+
+	// Sorting could take some time, so do it outside of the lock.
+	sort.Sort(fps)
+	return fps
+}
+
+type memorySeries struct {
+	metric model.Metric
+	// Sorted by start time, overlapping chunk ranges are forbidden.
+	chunkDescs []*chunk.Desc
+	// The index (within chunkDescs above) of the first chunk.Desc that
+	// points to a non-persisted chunk. If all chunks are persisted, then
+	// persistWatermark == len(chunkDescs).
+	persistWatermark int
+	// The modification time of the series file. The zero value of time.Time
+	// is used to mark an unknown modification time.
+	modTime time.Time
+	// The chunkDescs in memory might not have all the chunkDescs for the
+	// chunks that are persisted to disk. The missing chunkDescs are all
+	// contiguous and at the tail end. chunkDescsOffset is the index of the
+	// chunk on disk that corresponds to the first chunk.Desc in memory. If
+	// it is 0, the chunkDescs are all loaded. A value of -1 denotes a
+	// special case: There are chunks on disk, but the offset to the
+	// chunkDescs in memory is unknown. Also, in this special case, there is
+	// no overlap between chunks on disk and chunks in memory (implying that
+	// upon first persisting of a chunk in memory, the offset has to be
+	// set).
+	chunkDescsOffset int
+	// The savedFirstTime field is used as a fallback when the
+	// chunkDescsOffset is not 0. It can be used to save the FirstTime of the
+	// first chunk before its chunk desc is evicted. In doubt, this field is
+	// just set to the oldest possible timestamp.
+	savedFirstTime model.Time
+	// The timestamp of the last sample in this series. Needed for fast
+	// access for federation and to ensure timestamp monotonicity during
+	// ingestion.
+	lastTime model.Time
+	// The last ingested sample value. Needed for fast access for
+	// federation.
+	lastSampleValue model.SampleValue
+	// Whether lastSampleValue has been set already.
+	lastSampleValueSet bool
+	// Whether the current head chunk has already been finished.  If true,
+	// the current head chunk must not be modified anymore.
+	headChunkClosed bool
+	// Whether the current head chunk is used by an iterator. In that case,
+	// a non-closed head chunk has to be cloned before more samples are
+	// appended.
+	headChunkUsedByIterator bool
+	// Whether the series is inconsistent with the last checkpoint in a way
+	// that would require a disk seek during crash recovery.
+	dirty bool
+}
+
+// newMemorySeries returns a pointer to a newly allocated memorySeries for the
+// given metric. chunkDescs and modTime in the new series are set according to
+// the provided parameters. chunkDescs can be nil or empty if this is a
+// genuinely new time series (i.e. not one that is being unarchived). In that
+// case, headChunkClosed is set to false, and firstTime and lastTime are both
+// set to model.Earliest. The zero value for modTime can be used if the
+// modification time of the series file is unknown (e.g. if this is a genuinely
+// new series).
+func newMemorySeries(m model.Metric, chunkDescs []*chunk.Desc, modTime time.Time) (*memorySeries, error) {
+	var err error
+	firstTime := model.Earliest
+	lastTime := model.Earliest
+	if len(chunkDescs) > 0 {
+		firstTime = chunkDescs[0].FirstTime()
+		if lastTime, err = chunkDescs[len(chunkDescs)-1].LastTime(); err != nil {
+			return nil, err
+		}
+	}
+	return &memorySeries{
+		metric:           m,
+		chunkDescs:       chunkDescs,
+		headChunkClosed:  len(chunkDescs) > 0,
+		savedFirstTime:   firstTime,
+		lastTime:         lastTime,
+		persistWatermark: len(chunkDescs),
+		modTime:          modTime,
+	}, nil
+}
+
+// add adds a sample pair to the series. It returns the number of newly
+// completed chunks (which are now eligible for persistence).
+//
+// The caller must have locked the fingerprint of the series.
+func (s *memorySeries) add(v model.SamplePair) (int, error) {
+	if len(s.chunkDescs) == 0 || s.headChunkClosed {
+		newHead := chunk.NewDesc(chunk.New(), v.Timestamp)
+		s.chunkDescs = append(s.chunkDescs, newHead)
+		s.headChunkClosed = false
+	} else if s.headChunkUsedByIterator && s.head().RefCount() > 1 {
+		// We only need to clone the head chunk if the current head
+		// chunk was used in an iterator at all and if the refCount is
+		// still greater than the 1 we always have because the head
+		// chunk is not yet persisted. The latter is just an
+		// approximation. We will still clone unnecessarily if an older
+		// iterator using a previous version of the head chunk is still
+		// around and keep the head chunk pinned. We needed to track
+		// pins by version of the head chunk, which is probably not
+		// worth the effort.
+		chunk.Ops.WithLabelValues(chunk.Clone).Inc()
+		// No locking needed here because a non-persisted head chunk can
+		// not get evicted concurrently.
+		s.head().C = s.head().C.Clone()
+		s.headChunkUsedByIterator = false
+	}
+
+	chunks, err := s.head().Add(v)
+	if err != nil {
+		return 0, err
+	}
+	s.head().C = chunks[0]
+
+	for _, c := range chunks[1:] {
+		s.chunkDescs = append(s.chunkDescs, chunk.NewDesc(c, c.FirstTime()))
+	}
+
+	// Populate lastTime of now-closed chunks.
+	for _, cd := range s.chunkDescs[len(s.chunkDescs)-len(chunks) : len(s.chunkDescs)-1] {
+		if err := cd.MaybePopulateLastTime(); err != nil {
+			return 0, err
+		}
+	}
+
+	s.lastTime = v.Timestamp
+	s.lastSampleValue = v.Value
+	s.lastSampleValueSet = true
+	return len(chunks) - 1, nil
+}
+
+// maybeCloseHeadChunk closes the head chunk if it has not been touched for the
+// provided duration. It returns whether the head chunk was closed.  If the head
+// chunk is already closed, the method is a no-op and returns false.
+//
+// The caller must have locked the fingerprint of the series.
+func (s *memorySeries) maybeCloseHeadChunk(timeout time.Duration) (bool, error) {
+	if s.headChunkClosed {
+		return false, nil
+	}
+	if time.Since(s.lastTime.Time()) > timeout {
+		s.headChunkClosed = true
+		// Since we cannot modify the head chunk from now on, we
+		// don't need to bother with cloning anymore.
+		s.headChunkUsedByIterator = false
+		return true, s.head().MaybePopulateLastTime()
+	}
+	return false, nil
+}
+
+// evictChunkDescs evicts chunkDescs. lenToEvict is the index within the current
+// chunkDescs of the oldest chunk that is not evicted.
+func (s *memorySeries) evictChunkDescs(lenToEvict int) {
+	if lenToEvict < 1 {
+		return
+	}
+	if s.chunkDescsOffset < 0 {
+		panic("chunk desc eviction requested with unknown chunk desc offset")
+	}
+	lenToKeep := len(s.chunkDescs) - lenToEvict
+	s.savedFirstTime = s.firstTime()
+	s.chunkDescsOffset += lenToEvict
+	s.persistWatermark -= lenToEvict
+	chunk.DescOps.WithLabelValues(chunk.Evict).Add(float64(lenToEvict))
+	chunk.NumMemDescs.Sub(float64(lenToEvict))
+	s.chunkDescs = append(
+		make([]*chunk.Desc, 0, lenToKeep),
+		s.chunkDescs[lenToEvict:]...,
+	)
+	s.dirty = true
+}
+
+// dropChunks removes chunkDescs older than t. The caller must have locked the
+// fingerprint of the series.
+func (s *memorySeries) dropChunks(t model.Time) error {
+	keepIdx := len(s.chunkDescs)
+	for i, cd := range s.chunkDescs {
+		lt, err := cd.LastTime()
+		if err != nil {
+			return err
+		}
+		if !lt.Before(t) {
+			keepIdx = i
+			break
+		}
+	}
+	if keepIdx == len(s.chunkDescs) && !s.headChunkClosed {
+		// Never drop an open head chunk.
+		keepIdx--
+	}
+	if keepIdx <= 0 {
+		// Nothing to drop.
+		return nil
+	}
+	s.chunkDescs = append(
+		make([]*chunk.Desc, 0, len(s.chunkDescs)-keepIdx),
+		s.chunkDescs[keepIdx:]...,
+	)
+	s.persistWatermark -= keepIdx
+	if s.persistWatermark < 0 {
+		panic("dropped unpersisted chunks from memory")
+	}
+	if s.chunkDescsOffset != -1 {
+		s.chunkDescsOffset += keepIdx
+	}
+	chunk.NumMemDescs.Sub(float64(keepIdx))
+	s.dirty = true
+	return nil
+}
+
+// preloadChunks is an internal helper method.
+func (s *memorySeries) preloadChunks(
+	indexes []int, fp model.Fingerprint, mss *MemorySeriesStorage,
+) (SeriesIterator, error) {
+	loadIndexes := []int{}
+	pinnedChunkDescs := make([]*chunk.Desc, 0, len(indexes))
+	for _, idx := range indexes {
+		cd := s.chunkDescs[idx]
+		pinnedChunkDescs = append(pinnedChunkDescs, cd)
+		cd.Pin(mss.evictRequests) // Have to pin everything first to prevent immediate eviction on chunk loading.
+		if cd.IsEvicted() {
+			loadIndexes = append(loadIndexes, idx)
+		}
+	}
+	chunk.Ops.WithLabelValues(chunk.Pin).Add(float64(len(pinnedChunkDescs)))
+
+	if len(loadIndexes) > 0 {
+		if s.chunkDescsOffset == -1 {
+			panic("requested loading chunks from persistence in a situation where we must not have persisted data for chunk descriptors in memory")
+		}
+		chunks, err := mss.loadChunks(fp, loadIndexes, s.chunkDescsOffset)
+		if err != nil {
+			// Unpin the chunks since we won't return them as pinned chunks now.
+			for _, cd := range pinnedChunkDescs {
+				cd.Unpin(mss.evictRequests)
+			}
+			chunk.Ops.WithLabelValues(chunk.Unpin).Add(float64(len(pinnedChunkDescs)))
+			return nopIter, err
+		}
+		for i, c := range chunks {
+			s.chunkDescs[loadIndexes[i]].SetChunk(c)
+		}
+	}
+
+	if !s.headChunkClosed && indexes[len(indexes)-1] == len(s.chunkDescs)-1 {
+		s.headChunkUsedByIterator = true
+	}
+
+	curriedQuarantineSeries := func(err error) {
+		mss.quarantineSeries(fp, s.metric, err)
+	}
+
+	iter := &boundedIterator{
+		it:    s.newIterator(pinnedChunkDescs, curriedQuarantineSeries, mss.evictRequests),
+		start: model.Now().Add(-mss.dropAfter),
+	}
+
+	return iter, nil
+}
+
+// newIterator returns a new SeriesIterator for the provided chunkDescs (which
+// must be pinned).
+//
+// The caller must have locked the fingerprint of the memorySeries.
+func (s *memorySeries) newIterator(
+	pinnedChunkDescs []*chunk.Desc,
+	quarantine func(error),
+	evictRequests chan<- chunk.EvictRequest,
+) SeriesIterator {
+	chunks := make([]chunk.Chunk, 0, len(pinnedChunkDescs))
+	for _, cd := range pinnedChunkDescs {
+		// It's OK to directly access cd.c here (without locking) as the
+		// series FP is locked and the chunk is pinned.
+		chunks = append(chunks, cd.C)
+	}
+	return &memorySeriesIterator{
+		chunks:           chunks,
+		chunkIts:         make([]chunk.Iterator, len(chunks)),
+		quarantine:       quarantine,
+		metric:           s.metric,
+		pinnedChunkDescs: pinnedChunkDescs,
+		evictRequests:    evictRequests,
+	}
+}
+
+// preloadChunksForInstant preloads chunks for the latest value in the given
+// range. If the last sample saved in the memorySeries itself is the latest
+// value in the given range, it will in fact preload zero chunks and just take
+// that value.
+func (s *memorySeries) preloadChunksForInstant(
+	fp model.Fingerprint,
+	from model.Time, through model.Time,
+	mss *MemorySeriesStorage,
+) (SeriesIterator, error) {
+	// If we have a lastSamplePair in the series, and this last samplePair
+	// is in the interval, just take it in a singleSampleSeriesIterator. No
+	// need to pin or load anything.
+	lastSample := s.lastSamplePair()
+	if !through.Before(lastSample.Timestamp) &&
+		!from.After(lastSample.Timestamp) &&
+		lastSample != model.ZeroSamplePair {
+		iter := &boundedIterator{
+			it: &singleSampleSeriesIterator{
+				samplePair: lastSample,
+				metric:     s.metric,
+			},
+			start: model.Now().Add(-mss.dropAfter),
+		}
+		return iter, nil
+	}
+	// If we are here, we are out of luck and have to delegate to the more
+	// expensive method.
+	return s.preloadChunksForRange(fp, from, through, mss)
+}
+
+// preloadChunksForRange loads chunks for the given range from the persistence.
+// The caller must have locked the fingerprint of the series.
+func (s *memorySeries) preloadChunksForRange(
+	fp model.Fingerprint,
+	from model.Time, through model.Time,
+	mss *MemorySeriesStorage,
+) (SeriesIterator, error) {
+	firstChunkDescTime := model.Latest
+	if len(s.chunkDescs) > 0 {
+		firstChunkDescTime = s.chunkDescs[0].FirstTime()
+	}
+	if s.chunkDescsOffset != 0 && from.Before(firstChunkDescTime) {
+		cds, err := mss.loadChunkDescs(fp, s.persistWatermark)
+		if err != nil {
+			return nopIter, err
+		}
+		if s.chunkDescsOffset != -1 && len(cds) != s.chunkDescsOffset {
+			return nopIter, fmt.Errorf(
+				"unexpected number of chunk descs loaded for fingerprint %v: expected %d, got %d",
+				fp, s.chunkDescsOffset, len(cds),
+			)
+		}
+		s.persistWatermark += len(cds)
+		s.chunkDescs = append(cds, s.chunkDescs...)
+		s.chunkDescsOffset = 0
+		if len(s.chunkDescs) > 0 {
+			firstChunkDescTime = s.chunkDescs[0].FirstTime()
+		}
+	}
+
+	if len(s.chunkDescs) == 0 || through.Before(firstChunkDescTime) {
+		return nopIter, nil
+	}
+
+	// Find first chunk with start time after "from".
+	fromIdx := sort.Search(len(s.chunkDescs), func(i int) bool {
+		return s.chunkDescs[i].FirstTime().After(from)
+	})
+	// Find first chunk with start time after "through".
+	throughIdx := sort.Search(len(s.chunkDescs), func(i int) bool {
+		return s.chunkDescs[i].FirstTime().After(through)
+	})
+	if fromIdx == len(s.chunkDescs) {
+		// Even the last chunk starts before "from". Find out if the
+		// series ends before "from" and we don't need to do anything.
+		lt, err := s.chunkDescs[len(s.chunkDescs)-1].LastTime()
+		if err != nil {
+			return nopIter, err
+		}
+		if lt.Before(from) {
+			return nopIter, nil
+		}
+	}
+	if fromIdx > 0 {
+		fromIdx--
+	}
+	if throughIdx == len(s.chunkDescs) {
+		throughIdx--
+	}
+	if fromIdx > throughIdx {
+		// Guard against nonsensical result. The caller will quarantine the series with a meaningful log entry.
+		return nopIter, fmt.Errorf("fromIdx=%d is greater than throughIdx=%d, likely caused by data corruption", fromIdx, throughIdx)
+	}
+
+	pinIndexes := make([]int, 0, throughIdx-fromIdx+1)
+	for i := fromIdx; i <= throughIdx; i++ {
+		pinIndexes = append(pinIndexes, i)
+	}
+	return s.preloadChunks(pinIndexes, fp, mss)
+}
+
+// head returns a pointer to the head chunk descriptor. The caller must have
+// locked the fingerprint of the memorySeries. This method will panic if this
+// series has no chunk descriptors.
+func (s *memorySeries) head() *chunk.Desc {
+	return s.chunkDescs[len(s.chunkDescs)-1]
+}
+
+// firstTime returns the timestamp of the first sample in the series.
+//
+// The caller must have locked the fingerprint of the memorySeries.
+func (s *memorySeries) firstTime() model.Time {
+	if s.chunkDescsOffset == 0 && len(s.chunkDescs) > 0 {
+		return s.chunkDescs[0].FirstTime()
+	}
+	return s.savedFirstTime
+}
+
+// lastSamplePair returns the last ingested SamplePair. It returns
+// model.ZeroSamplePair if this memorySeries has never received a sample (via the add
+// method), which is the case for freshly unarchived series or newly created
+// ones and also for all series after a server restart. However, in that case,
+// series will most likely be considered stale anyway.
+//
+// The caller must have locked the fingerprint of the memorySeries.
+func (s *memorySeries) lastSamplePair() model.SamplePair {
+	if !s.lastSampleValueSet {
+		return model.ZeroSamplePair
+	}
+	return model.SamplePair{
+		Timestamp: s.lastTime,
+		Value:     s.lastSampleValue,
+	}
+}
+
+// chunksToPersist returns a slice of chunkDescs eligible for persistence. It's
+// the caller's responsibility to actually persist the returned chunks
+// afterwards. The method sets the persistWatermark and the dirty flag
+// accordingly.
+//
+// The caller must have locked the fingerprint of the series.
+func (s *memorySeries) chunksToPersist() []*chunk.Desc {
+	newWatermark := len(s.chunkDescs)
+	if !s.headChunkClosed {
+		newWatermark--
+	}
+	if newWatermark == s.persistWatermark {
+		return nil
+	}
+	cds := s.chunkDescs[s.persistWatermark:newWatermark]
+	s.dirty = true
+	s.persistWatermark = newWatermark
+	return cds
+}
+
+// memorySeriesIterator implements SeriesIterator.
+type memorySeriesIterator struct {
+	// Last chunk.Iterator used by ValueAtOrBeforeTime.
+	chunkIt chunk.Iterator
+	// Caches chunkIterators.
+	chunkIts []chunk.Iterator
+	// The actual sample chunks.
+	chunks []chunk.Chunk
+	// Call to quarantine the series this iterator belongs to.
+	quarantine func(error)
+	// The metric corresponding to the iterator.
+	metric model.Metric
+	// Chunks that were pinned for this iterator.
+	pinnedChunkDescs []*chunk.Desc
+	// Where to send evict requests when unpinning pinned chunks.
+	evictRequests chan<- chunk.EvictRequest
+}
+
+// ValueAtOrBeforeTime implements SeriesIterator.
+func (it *memorySeriesIterator) ValueAtOrBeforeTime(t model.Time) model.SamplePair {
+	// The most common case. We are iterating through a chunk.
+	if it.chunkIt != nil {
+		containsT, err := it.chunkIt.Contains(t)
+		if err != nil {
+			it.quarantine(err)
+			return model.ZeroSamplePair
+		}
+		if containsT {
+			if it.chunkIt.FindAtOrBefore(t) {
+				return it.chunkIt.Value()
+			}
+			if it.chunkIt.Err() != nil {
+				it.quarantine(it.chunkIt.Err())
+			}
+			return model.ZeroSamplePair
+		}
+	}
+
+	if len(it.chunks) == 0 {
+		return model.ZeroSamplePair
+	}
+
+	// Find the last chunk where FirstTime() is before or equal to t.
+	l := len(it.chunks) - 1
+	i := sort.Search(len(it.chunks), func(i int) bool {
+		return !it.chunks[l-i].FirstTime().After(t)
+	})
+	if i == len(it.chunks) {
+		// Even the first chunk starts after t.
+		return model.ZeroSamplePair
+	}
+	it.chunkIt = it.chunkIterator(l - i)
+	if it.chunkIt.FindAtOrBefore(t) {
+		return it.chunkIt.Value()
+	}
+	if it.chunkIt.Err() != nil {
+		it.quarantine(it.chunkIt.Err())
+	}
+	return model.ZeroSamplePair
+}
+
+// RangeValues implements SeriesIterator.
+func (it *memorySeriesIterator) RangeValues(in metric.Interval) []model.SamplePair {
+	// Find the first chunk for which the first sample is within the interval.
+	i := sort.Search(len(it.chunks), func(i int) bool {
+		return !it.chunks[i].FirstTime().Before(in.OldestInclusive)
+	})
+	// Only now check the last timestamp of the previous chunk (which is
+	// fairly expensive).
+	if i > 0 {
+		lt, err := it.chunkIterator(i - 1).LastTimestamp()
+		if err != nil {
+			it.quarantine(err)
+			return nil
+		}
+		if !lt.Before(in.OldestInclusive) {
+			i--
+		}
+	}
+
+	values := []model.SamplePair{}
+	for j, c := range it.chunks[i:] {
+		if c.FirstTime().After(in.NewestInclusive) {
+			break
+		}
+		chValues, err := chunk.RangeValues(it.chunkIterator(i+j), in)
+		if err != nil {
+			it.quarantine(err)
+			return nil
+		}
+		values = append(values, chValues...)
+	}
+	return values
+}
+
+func (it *memorySeriesIterator) Metric() metric.Metric {
+	return metric.Metric{Metric: it.metric}
+}
+
+// chunkIterator returns the chunk.Iterator for the chunk at position i (and
+// creates it if needed).
+func (it *memorySeriesIterator) chunkIterator(i int) chunk.Iterator {
+	chunkIt := it.chunkIts[i]
+	if chunkIt == nil {
+		chunkIt = it.chunks[i].NewIterator()
+		it.chunkIts[i] = chunkIt
+	}
+	return chunkIt
+}
+
+func (it *memorySeriesIterator) Close() {
+	for _, cd := range it.pinnedChunkDescs {
+		cd.Unpin(it.evictRequests)
+	}
+	chunk.Ops.WithLabelValues(chunk.Unpin).Add(float64(len(it.pinnedChunkDescs)))
+}
+
+// singleSampleSeriesIterator implements Series Iterator. It is a "shortcut
+// iterator" that returns a single sample only. The sample is saved in the
+// iterator itself, so no chunks need to be pinned.
+type singleSampleSeriesIterator struct {
+	samplePair model.SamplePair
+	metric     model.Metric
+}
+
+// ValueAtTime implements SeriesIterator.
+func (it *singleSampleSeriesIterator) ValueAtOrBeforeTime(t model.Time) model.SamplePair {
+	if it.samplePair.Timestamp.After(t) {
+		return model.ZeroSamplePair
+	}
+	return it.samplePair
+}
+
+// RangeValues implements SeriesIterator.
+func (it *singleSampleSeriesIterator) RangeValues(in metric.Interval) []model.SamplePair {
+	if it.samplePair.Timestamp.After(in.NewestInclusive) ||
+		it.samplePair.Timestamp.Before(in.OldestInclusive) {
+		return []model.SamplePair{}
+	}
+	return []model.SamplePair{it.samplePair}
+}
+
+func (it *singleSampleSeriesIterator) Metric() metric.Metric {
+	return metric.Metric{Metric: it.metric}
+}
+
+// Close implements SeriesIterator.
+func (it *singleSampleSeriesIterator) Close() {}
+
+// nopSeriesIterator implements Series Iterator. It never returns any values.
+type nopSeriesIterator struct{}
+
+// ValueAtTime implements SeriesIterator.
+func (i nopSeriesIterator) ValueAtOrBeforeTime(t model.Time) model.SamplePair {
+	return model.ZeroSamplePair
+}
+
+// RangeValues implements SeriesIterator.
+func (i nopSeriesIterator) RangeValues(in metric.Interval) []model.SamplePair {
+	return []model.SamplePair{}
+}
+
+// Metric implements SeriesIterator.
+func (i nopSeriesIterator) Metric() metric.Metric {
+	return metric.Metric{}
+}
+
+// Close implements SeriesIterator.
+func (i nopSeriesIterator) Close() {}
+
+var nopIter nopSeriesIterator // A nopSeriesIterator for convenience. Can be shared.
diff --git a/vendor/github.com/prometheus/prometheus/storage/local/storage.go b/vendor/github.com/prometheus/prometheus/storage/local/storage.go
new file mode 100644
index 000000000..c1caef67e
--- /dev/null
+++ b/vendor/github.com/prometheus/prometheus/storage/local/storage.go
@@ -0,0 +1,2029 @@
+// Copyright 2014 The Prometheus Authors
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package local contains the local time series storage used by Prometheus.
+package local
+
+import (
+	"container/list"
+	"errors"
+	"fmt"
+	"math/rand"
+	"runtime"
+	"sort"
+	"sync"
+	"sync/atomic"
+	"time"
+
+	opentracing "github.com/opentracing/opentracing-go"
+	"github.com/prometheus/client_golang/prometheus"
+	"github.com/prometheus/common/log"
+	"github.com/prometheus/common/model"
+	"golang.org/x/net/context"
+
+	"github.com/prometheus/prometheus/storage/local/chunk"
+	"github.com/prometheus/prometheus/storage/metric"
+)
+
+const (
+	evictRequestsCap      = 1024
+	quarantineRequestsCap = 1024
+
+	// See waitForNextFP.
+	fpMaxSweepTime    = 6 * time.Hour
+	fpMaxWaitDuration = 10 * time.Second
+
+	// See handleEvictList. This should be clearly shorter than the usual CG
+	// interval. On the other hand, each evict check calls ReadMemStats,
+	// which involves stopping the world (at least up to Go1.8). Hence,
+	// don't just set this to a very short interval.
+	evictInterval = time.Second
+
+	// Constants to control the hysteresis of entering and leaving "rushed
+	// mode". In rushed mode, the dirty series count is ignored for
+	// checkpointing, series are maintained as frequently as possible, and
+	// series files are not synced if the adaptive sync strategy is used.
+	persintenceUrgencyScoreForEnteringRushedMode = 0.8
+	persintenceUrgencyScoreForLeavingRushedMode  = 0.7
+
+	// This factor times -storage.local.memory-chunks is the number of
+	// memory chunks we tolerate before throttling the storage. It is also a
+	// basis for calculating the persistenceUrgencyScore.
+	toleranceFactorMemChunks = 1.1
+	// This factor times -storage.local.max-chunks-to-persist is the minimum
+	// required number of chunks waiting for persistence before the number
+	// of chunks in memory may influence the persistenceUrgencyScore. (In
+	// other words: if there are no chunks to persist, it doesn't help chunk
+	// eviction if we speed up persistence.)
+	factorMinChunksToPersist = 0.2
+
+	// Threshold for when to stop using LabelMatchers to retrieve and
+	// intersect fingerprints. The rationale here is that looking up more
+	// fingerprints has diminishing returns if we already have narrowed down
+	// the possible fingerprints significantly. It is then easier to simply
+	// lookup the metrics for all the fingerprints and directly compare them
+	// to the matchers. Since a fingerprint lookup for an Equal matcher is
+	// much less expensive, there is a lower threshold for that case.
+	// TODO(beorn7): These numbers need to be tweaked, probably a bit lower.
+	// 5x higher numbers have resulted in slightly worse performance in a
+	// real-life production scenario.
+	fpEqualMatchThreshold = 1000
+	fpOtherMatchThreshold = 10000
+
+	selectorsTag = "selectors"
+	fromTag      = "from"
+	throughTag   = "through"
+	tsTag        = "ts"
+	numSeries    = "num_series"
+)
+
+type quarantineRequest struct {
+	fp     model.Fingerprint
+	metric model.Metric
+	reason error
+}
+
+// SyncStrategy is an enum to select a sync strategy for series files.
+type SyncStrategy int
+
+// String implements flag.Value.
+func (ss SyncStrategy) String() string {
+	switch ss {
+	case Adaptive:
+		return "adaptive"
+	case Always:
+		return "always"
+	case Never:
+		return "never"
+	}
+	return "<unknown>"
+}
+
+// Set implements flag.Value.
+func (ss *SyncStrategy) Set(s string) error {
+	switch s {
+	case "adaptive":
+		*ss = Adaptive
+	case "always":
+		*ss = Always
+	case "never":
+		*ss = Never
+	default:
+		return fmt.Errorf("invalid sync strategy: %s", s)
+	}
+	return nil
+}
+
+// Possible values for SyncStrategy.
+const (
+	_ SyncStrategy = iota
+	Never
+	Always
+	Adaptive
+)
+
+// A syncStrategy is a function that returns whether series files should be
+// synced or not. It does not need to be goroutine safe.
+type syncStrategy func() bool
+
+// A MemorySeriesStorage manages series in memory over time, while also
+// interfacing with a persistence layer to make time series data persistent
+// across restarts and evictable from memory.
+type MemorySeriesStorage struct {
+	// archiveHighWatermark, chunksToPersist, persistUrgency have to be aligned for atomic operations.
+	archiveHighWatermark model.Time    // No archived series has samples after this time.
+	numChunksToPersist   int64         // The number of chunks waiting for persistence.
+	persistUrgency       int32         // Persistence urgency score * 1000, int32 allows atomic operations.
+	rushed               bool          // Whether the storage is in rushed mode.
+	rushedMtx            sync.Mutex    // Protects rushed.
+	lastNumGC            uint32        // To detect if a GC cycle has run.
+	throttled            chan struct{} // This chan is sent to whenever NeedsThrottling() returns true (for logging).
+
+	fpLocker   *fingerprintLocker
+	fpToSeries *seriesMap
+
+	options *MemorySeriesStorageOptions
+
+	loopStopping, loopStopped  chan struct{}
+	logThrottlingStopped       chan struct{}
+	targetHeapSize             uint64
+	dropAfter                  time.Duration
+	headChunkTimeout           time.Duration
+	checkpointInterval         time.Duration
+	checkpointDirtySeriesLimit int
+
+	persistence *persistence
+	mapper      *fpMapper
+
+	evictList                   *list.List
+	evictRequests               chan chunk.EvictRequest
+	evictStopping, evictStopped chan struct{}
+
+	quarantineRequests                    chan quarantineRequest
+	quarantineStopping, quarantineStopped chan struct{}
+
+	persistErrors            prometheus.Counter
+	queuedChunksToPersist    prometheus.Counter
+	chunksToPersist          prometheus.GaugeFunc
+	memorySeries             prometheus.Gauge
+	headChunks               prometheus.Gauge
+	dirtySeries              prometheus.Gauge
+	seriesOps                *prometheus.CounterVec
+	ingestedSamples          prometheus.Counter
+	discardedSamples         *prometheus.CounterVec
+	nonExistentSeriesMatches prometheus.Counter
+	memChunks                prometheus.GaugeFunc
+	maintainSeriesDuration   *prometheus.SummaryVec
+	persistenceUrgencyScore  prometheus.GaugeFunc
+	rushedMode               prometheus.GaugeFunc
+	targetHeapSizeBytes      prometheus.GaugeFunc
+}
+
+// MemorySeriesStorageOptions contains options needed by
+// NewMemorySeriesStorage. It is not safe to leave any of those at their zero
+// values.
+type MemorySeriesStorageOptions struct {
+	TargetHeapSize             uint64        // Desired maximum heap size.
+	PersistenceStoragePath     string        // Location of persistence files.
+	PersistenceRetentionPeriod time.Duration // Chunks at least that old are dropped.
+	HeadChunkTimeout           time.Duration // Head chunks idle for at least that long may be closed.
+	CheckpointInterval         time.Duration // How often to checkpoint the series map and head chunks.
+	CheckpointDirtySeriesLimit int           // How many dirty series will trigger an early checkpoint.
+	Dirty                      bool          // Force the storage to consider itself dirty on startup.
+	PedanticChecks             bool          // If dirty, perform crash-recovery checks on each series file.
+	SyncStrategy               SyncStrategy  // Which sync strategy to apply to series files.
+	MinShrinkRatio             float64       // Minimum ratio a series file has to shrink during truncation.
+	NumMutexes                 int           // Number of mutexes used for stochastic fingerprint locking.
+}
+
+// NewMemorySeriesStorage returns a newly allocated Storage. Storage.Serve still
+// has to be called to start the storage.
+func NewMemorySeriesStorage(o *MemorySeriesStorageOptions) *MemorySeriesStorage {
+	s := &MemorySeriesStorage{
+		fpLocker: newFingerprintLocker(o.NumMutexes),
+
+		options: o,
+
+		loopStopping:               make(chan struct{}),
+		loopStopped:                make(chan struct{}),
+		logThrottlingStopped:       make(chan struct{}),
+		throttled:                  make(chan struct{}, 1),
+		targetHeapSize:             o.TargetHeapSize,
+		dropAfter:                  o.PersistenceRetentionPeriod,
+		headChunkTimeout:           o.HeadChunkTimeout,
+		checkpointInterval:         o.CheckpointInterval,
+		checkpointDirtySeriesLimit: o.CheckpointDirtySeriesLimit,
+		archiveHighWatermark:       model.Now().Add(-o.HeadChunkTimeout),
+
+		evictList:     list.New(),
+		evictRequests: make(chan chunk.EvictRequest, evictRequestsCap),
+		evictStopping: make(chan struct{}),
+		evictStopped:  make(chan struct{}),
+
+		quarantineRequests: make(chan quarantineRequest, quarantineRequestsCap),
+		quarantineStopping: make(chan struct{}),
+		quarantineStopped:  make(chan struct{}),
+
+		persistErrors: prometheus.NewCounter(prometheus.CounterOpts{
+			Namespace: namespace,
+			Subsystem: subsystem,
+			Name:      "persist_errors_total",
+			Help:      "The total number of errors while writing to the persistence layer.",
+		}),
+		queuedChunksToPersist: prometheus.NewCounter(prometheus.CounterOpts{
+			Namespace: namespace,
+			Subsystem: subsystem,
+			Name:      "queued_chunks_to_persist_total",
+			Help:      "The total number of chunks queued for persistence.",
+		}),
+		memorySeries: prometheus.NewGauge(prometheus.GaugeOpts{
+			Namespace: namespace,
+			Subsystem: subsystem,
+			Name:      "memory_series",
+			Help:      "The current number of series in memory.",
+		}),
+		headChunks: prometheus.NewGauge(prometheus.GaugeOpts{
+			Namespace: namespace,
+			Subsystem: subsystem,
+			Name:      "open_head_chunks",
+			Help:      "The current number of open head chunks.",
+		}),
+		dirtySeries: prometheus.NewGauge(prometheus.GaugeOpts{
+			Namespace: namespace,
+			Subsystem: subsystem,
+			Name:      "memory_dirty_series",
+			Help:      "The current number of series that would require a disk seek during crash recovery.",
+		}),
+		seriesOps: prometheus.NewCounterVec(
+			prometheus.CounterOpts{
+				Namespace: namespace,
+				Subsystem: subsystem,
+				Name:      "series_ops_total",
+				Help:      "The total number of series operations by their type.",
+			},
+			[]string{opTypeLabel},
+		),
+		ingestedSamples: prometheus.NewCounter(prometheus.CounterOpts{
+			Namespace: namespace,
+			Subsystem: subsystem,
+			Name:      "ingested_samples_total",
+			Help:      "The total number of samples ingested.",
+		}),
+		discardedSamples: prometheus.NewCounterVec(
+			prometheus.CounterOpts{
+				Namespace: namespace,
+				Subsystem: subsystem,
+				Name:      "out_of_order_samples_total",
+				Help:      "The total number of samples that were discarded because their timestamps were at or before the last received sample for a series.",
+			},
+			[]string{discardReasonLabel},
+		),
+		nonExistentSeriesMatches: prometheus.NewCounter(prometheus.CounterOpts{
+			Namespace: namespace,
+			Subsystem: subsystem,
+			Name:      "non_existent_series_matches_total",
+			Help:      "How often a non-existent series was referred to during label matching or chunk preloading. This is an indication of outdated label indexes.",
+		}),
+		memChunks: prometheus.NewGaugeFunc(
+			prometheus.GaugeOpts{
+				Namespace: namespace,
+				Subsystem: subsystem,
+				Name:      "memory_chunks",
+				Help:      "The current number of chunks in memory. The number does not include cloned chunks (i.e. chunks without a descriptor).",
+			},
+			func() float64 { return float64(atomic.LoadInt64(&chunk.NumMemChunks)) },
+		),
+		maintainSeriesDuration: prometheus.NewSummaryVec(
+			prometheus.SummaryOpts{
+				Namespace: namespace,
+				Subsystem: subsystem,
+				Name:      "maintain_series_duration_seconds",
+				Help:      "The duration in seconds it took to perform maintenance on a series.",
+			},
+			[]string{seriesLocationLabel},
+		),
+	}
+
+	s.chunksToPersist = prometheus.NewGaugeFunc(
+		prometheus.GaugeOpts{
+			Namespace: namespace,
+			Subsystem: subsystem,
+			Name:      "chunks_to_persist",
+			Help:      "The current number of chunks waiting for persistence.",
+		},
+		func() float64 {
+			return float64(s.getNumChunksToPersist())
+		},
+	)
+	s.rushedMode = prometheus.NewGaugeFunc(
+		prometheus.GaugeOpts{
+			Namespace: namespace,
+			Subsystem: subsystem,
+			Name:      "rushed_mode",
+			Help:      "1 if the storage is in rushed mode, 0 otherwise.",
+		},
+		func() float64 {
+			s.rushedMtx.Lock()
+			defer s.rushedMtx.Unlock()
+			if s.rushed {
+				return 1
+			}
+			return 0
+		},
+	)
+	s.persistenceUrgencyScore = prometheus.NewGaugeFunc(
+		prometheus.GaugeOpts{
+			Namespace: namespace,
+			Subsystem: subsystem,
+			Name:      "persistence_urgency_score",
+			Help:      "A score of urgency to persist chunks, 0 is least urgent, 1 most.",
+		},
+		func() float64 {
+			score, _ := s.getPersistenceUrgencyScore()
+			return score
+		},
+	)
+	s.targetHeapSizeBytes = prometheus.NewGaugeFunc(
+		prometheus.GaugeOpts{
+			Namespace: namespace,
+			Subsystem: subsystem,
+			Name:      "target_heap_size_bytes",
+			Help:      "The configured target heap size in bytes.",
+		},
+		func() float64 {
+			return float64(s.targetHeapSize)
+		},
+	)
+
+	// Initialize metric vectors.
+	// TODO(beorn7): Rework once we have a utility function for it in client_golang.
+	s.discardedSamples.WithLabelValues(outOfOrderTimestamp)
+	s.discardedSamples.WithLabelValues(duplicateSample)
+	s.maintainSeriesDuration.WithLabelValues(maintainInMemory)
+	s.maintainSeriesDuration.WithLabelValues(maintainArchived)
+	s.seriesOps.WithLabelValues(create)
+	s.seriesOps.WithLabelValues(archive)
+	s.seriesOps.WithLabelValues(unarchive)
+	s.seriesOps.WithLabelValues(memoryPurge)
+	s.seriesOps.WithLabelValues(archivePurge)
+	s.seriesOps.WithLabelValues(requestedPurge)
+	s.seriesOps.WithLabelValues(memoryMaintenance)
+	s.seriesOps.WithLabelValues(archiveMaintenance)
+	s.seriesOps.WithLabelValues(completedQurantine)
+	s.seriesOps.WithLabelValues(droppedQuarantine)
+	s.seriesOps.WithLabelValues(failedQuarantine)
+
+	return s
+}
+
+// Start implements Storage.
+func (s *MemorySeriesStorage) Start() (err error) {
+	var syncStrategy syncStrategy
+	switch s.options.SyncStrategy {
+	case Never:
+		syncStrategy = func() bool { return false }
+	case Always:
+		syncStrategy = func() bool { return true }
+	case Adaptive:
+		syncStrategy = func() bool {
+			_, rushed := s.getPersistenceUrgencyScore()
+			return !rushed
+		}
+	default:
+		panic("unknown sync strategy")
+	}
+
+	var p *persistence
+	p, err = newPersistence(
+		s.options.PersistenceStoragePath,
+		s.options.Dirty, s.options.PedanticChecks,
+		syncStrategy,
+		s.options.MinShrinkRatio,
+	)
+	if err != nil {
+		return err
+	}
+	s.persistence = p
+	// Persistence must start running before loadSeriesMapAndHeads() is called.
+	go s.persistence.run()
+
+	defer func() {
+		if err != nil {
+			if e := p.close(); e != nil {
+				log.Errorln("Error closing persistence:", e)
+			}
+		}
+	}()
+
+	log.Info("Loading series map and head chunks...")
+	s.fpToSeries, s.numChunksToPersist, err = p.loadSeriesMapAndHeads()
+	for _, series := range s.fpToSeries.m {
+		if !series.headChunkClosed {
+			s.headChunks.Inc()
+		}
+	}
+
+	if err != nil {
+		return err
+	}
+	log.Infof("%d series loaded.", s.fpToSeries.length())
+	s.memorySeries.Set(float64(s.fpToSeries.length()))
+
+	s.mapper, err = newFPMapper(s.fpToSeries, p)
+	if err != nil {
+		return err
+	}
+
+	go s.handleEvictList()
+	go s.handleQuarantine()
+	go s.logThrottling()
+	go s.loop()
+
+	return nil
+}
+
+// Stop implements Storage.
+func (s *MemorySeriesStorage) Stop() error {
+	log.Info("Stopping local storage...")
+
+	log.Info("Stopping maintenance loop...")
+	close(s.loopStopping)
+	<-s.loopStopped
+
+	log.Info("Stopping series quarantining...")
+	close(s.quarantineStopping)
+	<-s.quarantineStopped
+
+	log.Info("Stopping chunk eviction...")
+	close(s.evictStopping)
+	<-s.evictStopped
+
+	// One final checkpoint of the series map and the head chunks.
+	if err := s.persistence.checkpointSeriesMapAndHeads(
+		context.Background(), s.fpToSeries, s.fpLocker,
+	); err != nil {
+		return err
+	}
+	if err := s.mapper.checkpoint(); err != nil {
+		return err
+	}
+
+	if err := s.persistence.close(); err != nil {
+		return err
+	}
+	log.Info("Local storage stopped.")
+	return nil
+}
+
+type memorySeriesStorageQuerier struct {
+	*MemorySeriesStorage
+}
+
+func (memorySeriesStorageQuerier) Close() error {
+	return nil
+}
+
+// Querier implements the storage interface.
+func (s *MemorySeriesStorage) Querier() (Querier, error) {
+	return memorySeriesStorageQuerier{s}, nil
+}
+
+// WaitForIndexing implements Storage.
+func (s *MemorySeriesStorage) WaitForIndexing() {
+	s.persistence.waitForIndexing()
+}
+
+// LastSampleForLabelMatchers implements Storage.
+func (s *MemorySeriesStorage) LastSampleForLabelMatchers(_ context.Context, cutoff model.Time, matcherSets ...metric.LabelMatchers) (model.Vector, error) {
+	mergedFPs := map[model.Fingerprint]struct{}{}
+	for _, matchers := range matcherSets {
+		fps, err := s.fpsForLabelMatchers(cutoff, model.Latest, matchers...)
+		if err != nil {
+			return nil, err
+		}
+		for fp := range fps {
+			mergedFPs[fp] = struct{}{}
+		}
+	}
+
+	res := make(model.Vector, 0, len(mergedFPs))
+	for fp := range mergedFPs {
+		s.fpLocker.Lock(fp)
+
+		series, ok := s.fpToSeries.get(fp)
+		if !ok {
+			// A series could have disappeared between resolving label matchers and here.
+			s.fpLocker.Unlock(fp)
+			continue
+		}
+		sp := series.lastSamplePair()
+		res = append(res, &model.Sample{
+			Metric:    series.metric,
+			Value:     sp.Value,
+			Timestamp: sp.Timestamp,
+		})
+		s.fpLocker.Unlock(fp)
+	}
+	return res, nil
+}
+
+// boundedIterator wraps a SeriesIterator and does not allow fetching
+// data from earlier than the configured start time.
+type boundedIterator struct {
+	it    SeriesIterator
+	start model.Time
+}
+
+// ValueAtOrBeforeTime implements the SeriesIterator interface.
+func (bit *boundedIterator) ValueAtOrBeforeTime(ts model.Time) model.SamplePair {
+	if ts < bit.start {
+		return model.ZeroSamplePair
+	}
+	return bit.it.ValueAtOrBeforeTime(ts)
+}
+
+// RangeValues implements the SeriesIterator interface.
+func (bit *boundedIterator) RangeValues(interval metric.Interval) []model.SamplePair {
+	if interval.NewestInclusive < bit.start {
+		return []model.SamplePair{}
+	}
+	if interval.OldestInclusive < bit.start {
+		interval.OldestInclusive = bit.start
+	}
+	return bit.it.RangeValues(interval)
+}
+
+// Metric implements SeriesIterator.
+func (bit *boundedIterator) Metric() metric.Metric {
+	return bit.it.Metric()
+}
+
+// Close implements SeriesIterator.
+func (bit *boundedIterator) Close() {
+	bit.it.Close()
+}
+
+// QueryRange implements Storage.
+func (s *MemorySeriesStorage) QueryRange(ctx context.Context, from, through model.Time, matchers ...*metric.LabelMatcher) ([]SeriesIterator, error) {
+	span, _ := opentracing.StartSpanFromContext(ctx, "QueryRange")
+	span.SetTag(selectorsTag, metric.LabelMatchers(matchers).String())
+	span.SetTag(fromTag, int64(from))
+	span.SetTag(throughTag, int64(through))
+	defer span.Finish()
+
+	if through.Before(from) {
+		// In that case, nothing will match.
+		return nil, nil
+	}
+	fpSeriesPairs, err := s.seriesForLabelMatchers(from, through, matchers...)
+	if err != nil {
+		return nil, err
+	}
+	span.SetTag(numSeries, len(fpSeriesPairs))
+	iterators := make([]SeriesIterator, 0, len(fpSeriesPairs))
+	for _, pair := range fpSeriesPairs {
+		it := s.preloadChunksForRange(pair, from, through)
+		iterators = append(iterators, it)
+	}
+	return iterators, nil
+}
+
+// QueryInstant implements Storage.
+func (s *MemorySeriesStorage) QueryInstant(ctx context.Context, ts model.Time, stalenessDelta time.Duration, matchers ...*metric.LabelMatcher) ([]SeriesIterator, error) {
+	span, _ := opentracing.StartSpanFromContext(ctx, "QueryInstant")
+	span.SetTag(selectorsTag, metric.LabelMatchers(matchers).String())
+	span.SetTag(tsTag, ts)
+	defer span.Finish()
+
+	if stalenessDelta < 0 {
+		panic("negative staleness delta")
+	}
+	from := ts.Add(-stalenessDelta)
+	through := ts
+
+	fpSeriesPairs, err := s.seriesForLabelMatchers(from, through, matchers...)
+	if err != nil {
+		return nil, err
+	}
+	iterators := make([]SeriesIterator, 0, len(fpSeriesPairs))
+	for _, pair := range fpSeriesPairs {
+		it := s.preloadChunksForInstant(pair, from, through)
+		iterators = append(iterators, it)
+	}
+	return iterators, nil
+}
+
+// fingerprintsForLabelPair returns the fingerprints with the given
+// LabelPair. If intersectWith is non-nil, the method will only return
+// fingerprints that are also contained in intersectsWith. If mergeWith is
+// non-nil, the found fingerprints are added to the given map. The returned map
+// is the same as the given one.
+func (s *MemorySeriesStorage) fingerprintsForLabelPair(
+	pair model.LabelPair,
+	mergeWith map[model.Fingerprint]struct{},
+	intersectWith map[model.Fingerprint]struct{},
+) map[model.Fingerprint]struct{} {
+	if mergeWith == nil {
+		mergeWith = map[model.Fingerprint]struct{}{}
+	}
+	for _, fp := range s.persistence.fingerprintsForLabelPair(pair) {
+		if intersectWith == nil {
+			mergeWith[fp] = struct{}{}
+			continue
+		}
+		if _, ok := intersectWith[fp]; ok {
+			mergeWith[fp] = struct{}{}
+		}
+	}
+	return mergeWith
+}
+
+// MetricsForLabelMatchers implements Storage.
+func (s *MemorySeriesStorage) MetricsForLabelMatchers(
+	_ context.Context,
+	from, through model.Time,
+	matcherSets ...metric.LabelMatchers,
+) ([]metric.Metric, error) {
+	fpToMetric := map[model.Fingerprint]metric.Metric{}
+	for _, matchers := range matcherSets {
+		metrics, err := s.metricsForLabelMatchers(from, through, matchers...)
+		if err != nil {
+			return nil, err
+		}
+		for fp, m := range metrics {
+			fpToMetric[fp] = m
+		}
+	}
+
+	metrics := make([]metric.Metric, 0, len(fpToMetric))
+	for _, m := range fpToMetric {
+		metrics = append(metrics, m)
+	}
+	return metrics, nil
+}
+
+// candidateFPsForLabelMatchers returns candidate FPs for given matchers and remaining matchers to be checked.
+func (s *MemorySeriesStorage) candidateFPsForLabelMatchers(
+	matchers ...*metric.LabelMatcher,
+) (map[model.Fingerprint]struct{}, []*metric.LabelMatcher, error) {
+	sort.Sort(metric.LabelMatchers(matchers))
+
+	if len(matchers) == 0 || matchers[0].MatchesEmptyString() {
+		// No matchers at all or even the best matcher matches the empty string.
+		return nil, nil, nil
+	}
+
+	var (
+		matcherIdx   int
+		candidateFPs map[model.Fingerprint]struct{}
+	)
+
+	// Equal matchers.
+	for ; matcherIdx < len(matchers) && (candidateFPs == nil || len(candidateFPs) > fpEqualMatchThreshold); matcherIdx++ {
+		m := matchers[matcherIdx]
+		if m.Type != metric.Equal || m.MatchesEmptyString() {
+			break
+		}
+		candidateFPs = s.fingerprintsForLabelPair(
+			model.LabelPair{
+				Name:  m.Name,
+				Value: m.Value,
+			},
+			nil,
+			candidateFPs,
+		)
+		if len(candidateFPs) == 0 {
+			return nil, nil, nil
+		}
+	}
+
+	// Other matchers.
+	for ; matcherIdx < len(matchers) && (candidateFPs == nil || len(candidateFPs) > fpOtherMatchThreshold); matcherIdx++ {
+		m := matchers[matcherIdx]
+		if m.MatchesEmptyString() {
+			break
+		}
+
+		lvs, err := s.LabelValuesForLabelName(context.TODO(), m.Name)
+		if err != nil {
+			return nil, nil, err
+		}
+		lvs = m.Filter(lvs)
+		if len(lvs) == 0 {
+			return nil, nil, nil
+		}
+		fps := map[model.Fingerprint]struct{}{}
+		for _, lv := range lvs {
+			s.fingerprintsForLabelPair(
+				model.LabelPair{
+					Name:  m.Name,
+					Value: lv,
+				},
+				fps,
+				candidateFPs,
+			)
+		}
+		candidateFPs = fps
+		if len(candidateFPs) == 0 {
+			return nil, nil, nil
+		}
+	}
+	return candidateFPs, matchers[matcherIdx:], nil
+}
+
+func (s *MemorySeriesStorage) seriesForLabelMatchers(
+	from, through model.Time,
+	matchers ...*metric.LabelMatcher,
+) ([]fingerprintSeriesPair, error) {
+	candidateFPs, matchersToCheck, err := s.candidateFPsForLabelMatchers(matchers...)
+	if err != nil {
+		return nil, err
+	}
+
+	result := []fingerprintSeriesPair{}
+FPLoop:
+	for fp := range candidateFPs {
+		s.fpLocker.Lock(fp)
+		series := s.seriesForRange(fp, from, through)
+		s.fpLocker.Unlock(fp)
+
+		if series == nil {
+			continue FPLoop
+		}
+
+		for _, m := range matchersToCheck {
+			if !m.Match(series.metric[m.Name]) {
+				continue FPLoop
+			}
+		}
+		result = append(result, fingerprintSeriesPair{fp, series})
+	}
+	return result, nil
+}
+
+func (s *MemorySeriesStorage) fpsForLabelMatchers(
+	from, through model.Time,
+	matchers ...*metric.LabelMatcher,
+) (map[model.Fingerprint]struct{}, error) {
+	candidateFPs, matchersToCheck, err := s.candidateFPsForLabelMatchers(matchers...)
+	if err != nil {
+		return nil, err
+	}
+
+FPLoop:
+	for fp := range candidateFPs {
+		s.fpLocker.Lock(fp)
+		met, _, ok := s.metricForRange(fp, from, through)
+		s.fpLocker.Unlock(fp)
+
+		if !ok {
+			delete(candidateFPs, fp)
+			continue FPLoop
+		}
+
+		for _, m := range matchersToCheck {
+			if !m.Match(met[m.Name]) {
+				delete(candidateFPs, fp)
+				continue FPLoop
+			}
+		}
+	}
+	return candidateFPs, nil
+}
+
+func (s *MemorySeriesStorage) metricsForLabelMatchers(
+	from, through model.Time,
+	matchers ...*metric.LabelMatcher,
+) (map[model.Fingerprint]metric.Metric, error) {
+
+	candidateFPs, matchersToCheck, err := s.candidateFPsForLabelMatchers(matchers...)
+	if err != nil {
+		return nil, err
+	}
+
+	result := map[model.Fingerprint]metric.Metric{}
+FPLoop:
+	for fp := range candidateFPs {
+		s.fpLocker.Lock(fp)
+		met, _, ok := s.metricForRange(fp, from, through)
+		s.fpLocker.Unlock(fp)
+
+		if !ok {
+			continue FPLoop
+		}
+
+		for _, m := range matchersToCheck {
+			if !m.Match(met[m.Name]) {
+				continue FPLoop
+			}
+		}
+		result[fp] = metric.Metric{Metric: met}
+	}
+	return result, nil
+}
+
+// metricForRange returns the metric for the given fingerprint if the
+// corresponding time series has samples between 'from' and 'through', together
+// with a pointer to the series if it is in memory already. For a series that
+// does not have samples between 'from' and 'through', the returned bool is
+// false. For an archived series that does contain samples between 'from' and
+// 'through', it returns (metric, nil, true).
+//
+// The caller must have locked the fp.
+func (s *MemorySeriesStorage) metricForRange(
+	fp model.Fingerprint,
+	from, through model.Time,
+) (model.Metric, *memorySeries, bool) {
+	series, ok := s.fpToSeries.get(fp)
+	if ok {
+		if series.lastTime.Before(from) || series.firstTime().After(through) {
+			return nil, nil, false
+		}
+		return series.metric, series, true
+	}
+	// From here on, we are only concerned with archived metrics.
+	// If the high watermark of archived series is before 'from', we are done.
+	watermark := model.Time(atomic.LoadInt64((*int64)(&s.archiveHighWatermark)))
+	if watermark < from {
+		return nil, nil, false
+	}
+	if from.After(model.Earliest) || through.Before(model.Latest) {
+		// The range lookup is relatively cheap, so let's do it first if
+		// we have a chance the archived metric is not in the range.
+		has, first, last := s.persistence.hasArchivedMetric(fp)
+		if !has {
+			s.nonExistentSeriesMatches.Inc()
+			return nil, nil, false
+		}
+		if first.After(through) || last.Before(from) {
+			return nil, nil, false
+		}
+	}
+
+	metric, err := s.persistence.archivedMetric(fp)
+	if err != nil {
+		// archivedMetric has already flagged the storage as dirty in this case.
+		return nil, nil, false
+	}
+	return metric, nil, true
+}
+
+// LabelValuesForLabelName implements Storage.
+func (s *MemorySeriesStorage) LabelValuesForLabelName(_ context.Context, labelName model.LabelName) (model.LabelValues, error) {
+	return s.persistence.labelValuesForLabelName(labelName)
+}
+
+// DropMetricsForLabelMatchers implements Storage.
+func (s *MemorySeriesStorage) DropMetricsForLabelMatchers(_ context.Context, matchers ...*metric.LabelMatcher) (int, error) {
+	fps, err := s.fpsForLabelMatchers(model.Earliest, model.Latest, matchers...)
+	if err != nil {
+		return 0, err
+	}
+	for fp := range fps {
+		s.purgeSeries(fp, nil, nil)
+	}
+	return len(fps), nil
+}
+
+var (
+	// ErrOutOfOrderSample is returned if a sample has a timestamp before the latest
+	// timestamp in the series it is appended to.
+	ErrOutOfOrderSample = fmt.Errorf("sample timestamp out of order")
+	// ErrDuplicateSampleForTimestamp is returned if a sample has the same
+	// timestamp as the latest sample in the series it is appended to but a
+	// different value. (Appending an identical sample is a no-op and does
+	// not cause an error.)
+	ErrDuplicateSampleForTimestamp = fmt.Errorf("sample with repeated timestamp but different value")
+)
+
+// Append implements Storage.
+func (s *MemorySeriesStorage) Append(sample *model.Sample) error {
+	for ln, lv := range sample.Metric {
+		if len(lv) == 0 {
+			delete(sample.Metric, ln)
+		}
+	}
+	rawFP := sample.Metric.FastFingerprint()
+	s.fpLocker.Lock(rawFP)
+	fp := s.mapper.mapFP(rawFP, sample.Metric)
+	defer func() {
+		s.fpLocker.Unlock(fp)
+	}() // Func wrapper because fp might change below.
+	if fp != rawFP {
+		// Switch locks.
+		s.fpLocker.Unlock(rawFP)
+		s.fpLocker.Lock(fp)
+	}
+	series, err := s.getOrCreateSeries(fp, sample.Metric)
+	if err != nil {
+		return err // getOrCreateSeries took care of quarantining already.
+	}
+
+	if sample.Timestamp == series.lastTime {
+		// Don't report "no-op appends", i.e. where timestamp and sample
+		// value are the same as for the last append, as they are a
+		// common occurrence when using client-side timestamps
+		// (e.g. Pushgateway or federation).
+		if sample.Timestamp == series.lastTime &&
+			series.lastSampleValueSet &&
+			sample.Value.Equal(series.lastSampleValue) {
+			return nil
+		}
+		s.discardedSamples.WithLabelValues(duplicateSample).Inc()
+		return ErrDuplicateSampleForTimestamp // Caused by the caller.
+	}
+	if sample.Timestamp < series.lastTime {
+		s.discardedSamples.WithLabelValues(outOfOrderTimestamp).Inc()
+		return ErrOutOfOrderSample // Caused by the caller.
+	}
+	headChunkWasClosed := series.headChunkClosed
+	completedChunksCount, err := series.add(model.SamplePair{
+		Value:     sample.Value,
+		Timestamp: sample.Timestamp,
+	})
+	if err != nil {
+		s.quarantineSeries(fp, sample.Metric, err)
+		return err
+	}
+	if headChunkWasClosed {
+		// Appending to a series with a closed head chunk creates an
+		// additional open head chunk.
+		s.headChunks.Inc()
+	}
+	s.ingestedSamples.Inc()
+	s.incNumChunksToPersist(completedChunksCount)
+
+	return nil
+}
+
+// NeedsThrottling implements Storage.
+func (s *MemorySeriesStorage) NeedsThrottling() bool {
+	if score, _ := s.getPersistenceUrgencyScore(); score >= 1 {
+		select {
+		case s.throttled <- struct{}{}:
+		default: // Do nothing, signal already pending.
+		}
+		return true
+	}
+	return false
+}
+
+// logThrottling handles logging of throttled events and has to be started as a
+// goroutine. It stops once s.loopStopping is closed.
+//
+// Logging strategy: Whenever Throttle() is called and returns true, an signal
+// is sent to s.throttled. If that happens for the first time, an Error is
+// logged that the storage is now throttled. As long as signals continues to be
+// sent via s.throttled at least once per minute, nothing else is logged. Once
+// no signal has arrived for a minute, an Info is logged that the storage is not
+// throttled anymore. This resets things to the initial state, i.e. once a
+// signal arrives again, the Error will be logged again.
+func (s *MemorySeriesStorage) logThrottling() {
+	timer := time.NewTimer(time.Minute)
+	timer.Stop()
+
+	// Signal exit of the goroutine. Currently only needed by test code.
+	defer close(s.logThrottlingStopped)
+
+	for {
+		select {
+		case <-s.throttled:
+			if !timer.Stop() {
+				select {
+				case <-timer.C:
+				default:
+				}
+				score, _ := s.getPersistenceUrgencyScore()
+				log.
+					With("urgencyScore", score).
+					With("chunksToPersist", s.getNumChunksToPersist()).
+					With("memoryChunks", atomic.LoadInt64(&chunk.NumMemChunks)).
+					Error("Storage needs throttling. Scrapes and rule evaluations will be skipped.")
+			}
+			timer.Reset(time.Minute)
+		case <-timer.C:
+			score, _ := s.getPersistenceUrgencyScore()
+			log.
+				With("urgencyScore", score).
+				With("chunksToPersist", s.getNumChunksToPersist()).
+				With("memoryChunks", atomic.LoadInt64(&chunk.NumMemChunks)).
+				Info("Storage does not need throttling anymore.")
+		case <-s.loopStopping:
+			return
+		}
+	}
+}
+
+func (s *MemorySeriesStorage) getOrCreateSeries(fp model.Fingerprint, m model.Metric) (*memorySeries, error) {
+	series, ok := s.fpToSeries.get(fp)
+	if !ok {
+		var cds []*chunk.Desc
+		var modTime time.Time
+		unarchived, err := s.persistence.unarchiveMetric(fp)
+		if err != nil {
+			log.Errorf("Error unarchiving fingerprint %v (metric %v): %v", fp, m, err)
+			return nil, err
+		}
+		if unarchived {
+			s.seriesOps.WithLabelValues(unarchive).Inc()
+			// We have to load chunk.Descs anyway to do anything with
+			// the series, so let's do it right now so that we don't
+			// end up with a series without any chunk.Descs for a
+			// while (which is confusing as it makes the series
+			// appear as archived or purged).
+			cds, err = s.loadChunkDescs(fp, 0)
+			if err == nil && len(cds) == 0 {
+				err = fmt.Errorf("unarchived fingerprint %v (metric %v) has no chunks on disk", fp, m)
+			}
+			if err != nil {
+				s.quarantineSeries(fp, m, err)
+				return nil, err
+			}
+			modTime = s.persistence.seriesFileModTime(fp)
+		} else {
+			// This was a genuinely new series, so index the metric.
+			s.persistence.indexMetric(fp, m)
+			s.seriesOps.WithLabelValues(create).Inc()
+		}
+		series, err = newMemorySeries(m, cds, modTime)
+		if err != nil {
+			s.quarantineSeries(fp, m, err)
+			return nil, err
+		}
+		s.fpToSeries.put(fp, series)
+		s.memorySeries.Inc()
+		if !series.headChunkClosed {
+			s.headChunks.Inc()
+		}
+	}
+	return series, nil
+}
+
+// seriesForRange is a helper method for seriesForLabelMatchers.
+//
+// The caller must have locked the fp.
+func (s *MemorySeriesStorage) seriesForRange(
+	fp model.Fingerprint,
+	from model.Time, through model.Time,
+) *memorySeries {
+	metric, series, ok := s.metricForRange(fp, from, through)
+	if !ok {
+		return nil
+	}
+	if series == nil {
+		series, _ = s.getOrCreateSeries(fp, metric)
+		// getOrCreateSeries took care of quarantining already, so ignore the error.
+	}
+	return series
+}
+
+func (s *MemorySeriesStorage) preloadChunksForRange(
+	pair fingerprintSeriesPair,
+	from model.Time, through model.Time,
+) SeriesIterator {
+	fp, series := pair.fp, pair.series
+	if series == nil {
+		return nopIter
+	}
+
+	s.fpLocker.Lock(fp)
+	defer s.fpLocker.Unlock(fp)
+
+	iter, err := series.preloadChunksForRange(fp, from, through, s)
+	if err != nil {
+		s.quarantineSeries(fp, series.metric, err)
+		return nopIter
+	}
+	return iter
+}
+
+func (s *MemorySeriesStorage) preloadChunksForInstant(
+	pair fingerprintSeriesPair,
+	from model.Time, through model.Time,
+) SeriesIterator {
+	fp, series := pair.fp, pair.series
+	if series == nil {
+		return nopIter
+	}
+
+	s.fpLocker.Lock(fp)
+	defer s.fpLocker.Unlock(fp)
+
+	iter, err := series.preloadChunksForInstant(fp, from, through, s)
+	if err != nil {
+		s.quarantineSeries(fp, series.metric, err)
+		return nopIter
+	}
+	return iter
+}
+
+func (s *MemorySeriesStorage) handleEvictList() {
+	// This ticker is supposed to tick at least once per GC cyle. Ideally,
+	// we would handle the evict list after each finished GC cycle, but I
+	// don't know of a way to "subscribe" to that kind of event.
+	ticker := time.NewTicker(evictInterval)
+
+	for {
+		select {
+		case req := <-s.evictRequests:
+			if req.Evict {
+				req.Desc.EvictListElement = s.evictList.PushBack(req.Desc)
+			} else {
+				if req.Desc.EvictListElement != nil {
+					s.evictList.Remove(req.Desc.EvictListElement)
+					req.Desc.EvictListElement = nil
+				}
+			}
+		case <-ticker.C:
+			s.maybeEvict()
+		case <-s.evictStopping:
+			// Drain evictRequests forever in a goroutine to not let
+			// requesters hang.
+			go func() {
+				for {
+					<-s.evictRequests
+				}
+			}()
+			ticker.Stop()
+			log.Info("Chunk eviction stopped.")
+			close(s.evictStopped)
+			return
+		}
+	}
+}
+
+// maybeEvict is a local helper method. Must only be called by handleEvictList.
+func (s *MemorySeriesStorage) maybeEvict() {
+	ms := runtime.MemStats{}
+	runtime.ReadMemStats(&ms)
+	numChunksToEvict := s.calculatePersistUrgency(&ms)
+
+	if numChunksToEvict <= 0 {
+		return
+	}
+
+	chunkDescsToEvict := make([]*chunk.Desc, numChunksToEvict)
+	for i := range chunkDescsToEvict {
+		e := s.evictList.Front()
+		if e == nil {
+			break
+		}
+		cd := e.Value.(*chunk.Desc)
+		cd.EvictListElement = nil
+		chunkDescsToEvict[i] = cd
+		s.evictList.Remove(e)
+	}
+	// Do the actual eviction in a goroutine as we might otherwise deadlock,
+	// in the following way: A chunk was Unpinned completely and therefore
+	// scheduled for eviction. At the time we actually try to evict it,
+	// another goroutine is pinning the chunk. The pinning goroutine has
+	// currently locked the chunk and tries to send the evict request (to
+	// remove the chunk from the evict list) to the evictRequests
+	// channel. The send blocks because evictRequests is full. However, the
+	// goroutine that is supposed to empty the channel is waiting for the
+	// Chunk.Desc lock to try to evict the chunk.
+	go func() {
+		for _, cd := range chunkDescsToEvict {
+			if cd == nil {
+				break
+			}
+			cd.MaybeEvict()
+			// We don't care if the eviction succeeds. If the chunk
+			// was pinned in the meantime, it will be added to the
+			// evict list once it gets Unpinned again.
+		}
+	}()
+}
+
+// calculatePersistUrgency calculates and sets s.persistUrgency. Based on the
+// calculation, it returns the number of chunks to evict. The runtime.MemStats
+// are passed in here for testability.
+//
+// The persist urgency is calculated by the following formula:
+//
+//                      n(toPersist)           MAX( h(nextGC), h(current) )
+//   p = MIN( 1, --------------------------- * ---------------------------- )
+//               n(toPersist) + n(evictable)             h(target)
+//
+// where:
+//
+//    n(toPersist): Number of chunks waiting for persistence.
+//    n(evictable): Number of evictable chunks.
+//    h(nextGC):    Heap size at which the next GC will kick in (ms.NextGC).
+//    h(current):   Current heap size (ms.HeapAlloc).
+//    h(target):    Configured target heap size.
+//
+// Note that the actual value stored in s.persistUrgency is 1000 times the value
+// calculated as above to allow using an int32, which supports atomic
+// operations.
+//
+// If no GC has run after the last call of this method, it will always return 0
+// (no reason to try to evict any more chunks before we have seen the effect of
+// the previous eviction). It will also not decrease the persist urgency in this
+// case (but it will increase the persist urgency if a higher value was calculated).
+//
+// If a GC has run after the last call of this method, the following cases apply:
+//
+// - If MAX( h(nextGC), h(current) ) < h(target), simply return 0. Nothing to
+//   evict if the heap is still small enough.
+//
+// - Otherwise, if n(evictable) is 0, also return 0, but set the urgency score
+//   to 1 to signal that we want to evict chunk but have no evictable chunks
+//   available.
+//
+// - Otherwise, calculate the number of chunks to evict and return it:
+//
+//                                   MAX( h(nextGC), h(current) ) - h(target)
+//   n(toEvict) = MIN( n(evictable), ---------------------------------------- )
+//                                                        c
+//
+//   where c is the size of a chunk.
+//
+// - In the latter case, the persist urgency might be increased. The final value
+//   is the following:
+//
+//            n(toEvict)
+//   MAX( p, ------------ )
+//           n(evictable)
+//
+// Broadly speaking, the persist urgency is based on the ratio of the number of
+// chunks we want to evict and the number of chunks that are actually
+// evictable. However, in particular for the case where we don't need to evict
+// chunks yet, it also takes into account how close the heap has already grown
+// to the configured target size, and how big the pool of chunks to persist is
+// compared to the number of chunks already evictable.
+//
+// This is a helper method only to be called by MemorySeriesStorage.maybeEvict.
+func (s *MemorySeriesStorage) calculatePersistUrgency(ms *runtime.MemStats) int {
+	var (
+		oldUrgency         = atomic.LoadInt32(&s.persistUrgency)
+		newUrgency         int32
+		numChunksToPersist = s.getNumChunksToPersist()
+	)
+	defer func() {
+		if newUrgency > 1000 {
+			newUrgency = 1000
+		}
+		atomic.StoreInt32(&s.persistUrgency, newUrgency)
+	}()
+
+	// Take the NextGC as the relevant heap size because the heap will grow
+	// to that size before GC kicks in. However, at times the current heap
+	// is already larger than NextGC, in which case we take that worse case.
+	heapSize := ms.NextGC
+	if ms.HeapAlloc > ms.NextGC {
+		heapSize = ms.HeapAlloc
+	}
+
+	if numChunksToPersist > 0 {
+		newUrgency = int32(1000 * uint64(numChunksToPersist) / uint64(numChunksToPersist+s.evictList.Len()) * heapSize / s.targetHeapSize)
+	}
+
+	// Only continue if a GC has happened since we were here last time.
+	if ms.NumGC == s.lastNumGC {
+		if oldUrgency > newUrgency {
+			// Never reduce urgency without a GC run.
+			newUrgency = oldUrgency
+		}
+		return 0
+	}
+	s.lastNumGC = ms.NumGC
+
+	if heapSize <= s.targetHeapSize {
+		return 0 // Heap still small enough, don't evict.
+	}
+	if s.evictList.Len() == 0 {
+		// We want to reduce heap size but there is nothing to evict.
+		newUrgency = 1000
+		return 0
+	}
+	numChunksToEvict := int((heapSize - s.targetHeapSize) / chunk.ChunkLen)
+	if numChunksToEvict > s.evictList.Len() {
+		numChunksToEvict = s.evictList.Len()
+	}
+	if u := int32(numChunksToEvict * 1000 / s.evictList.Len()); u > newUrgency {
+		newUrgency = u
+	}
+	return numChunksToEvict
+}
+
+// waitForNextFP waits an estimated duration, after which we want to process
+// another fingerprint so that we will process all fingerprints in a tenth of
+// s.dropAfter assuming that the system is doing nothing else, e.g. if we want
+// to drop chunks after 40h, we want to cycle through all fingerprints within
+// 4h.  The estimation is based on the total number of fingerprints as passed
+// in. However, the maximum sweep time is capped at fpMaxSweepTime. Also, the
+// method will never wait for longer than fpMaxWaitDuration.
+//
+// The maxWaitDurationFactor can be used to reduce the waiting time if a faster
+// processing is required (for example because unpersisted chunks pile up too
+// much).
+//
+// Normally, the method returns true once the wait duration has passed. However,
+// if s.loopStopped is closed, it will return false immediately.
+func (s *MemorySeriesStorage) waitForNextFP(numberOfFPs int, maxWaitDurationFactor float64) bool {
+	d := fpMaxWaitDuration
+	if numberOfFPs != 0 {
+		sweepTime := s.dropAfter / 10
+		if sweepTime > fpMaxSweepTime {
+			sweepTime = fpMaxSweepTime
+		}
+		calculatedWait := time.Duration(float64(sweepTime) / float64(numberOfFPs) * maxWaitDurationFactor)
+		if calculatedWait < d {
+			d = calculatedWait
+		}
+	}
+	if d == 0 {
+		return true
+	}
+	t := time.NewTimer(d)
+	select {
+	case <-t.C:
+		return true
+	case <-s.loopStopping:
+		return false
+	}
+}
+
+// cycleThroughMemoryFingerprints returns a channel that emits fingerprints for
+// series in memory in a throttled fashion. It continues to cycle through all
+// fingerprints in memory until s.loopStopping is closed.
+func (s *MemorySeriesStorage) cycleThroughMemoryFingerprints() chan model.Fingerprint {
+	memoryFingerprints := make(chan model.Fingerprint)
+	go func() {
+		defer close(memoryFingerprints)
+		firstPass := true
+
+		for {
+			// Initial wait, also important if there are no FPs yet.
+			if !s.waitForNextFP(s.fpToSeries.length(), 1) {
+				return
+			}
+			begin := time.Now()
+			fps := s.fpToSeries.sortedFPs()
+			if firstPass && len(fps) > 0 {
+				// Start first pass at a random location in the
+				// key space to cover the whole key space even
+				// in the case of frequent restarts.
+				fps = fps[rand.Intn(len(fps)):]
+			}
+			count := 0
+			for _, fp := range fps {
+				select {
+				case memoryFingerprints <- fp:
+				case <-s.loopStopping:
+					return
+				}
+				// Reduce the wait time according to the urgency score.
+				score, rushed := s.getPersistenceUrgencyScore()
+				if rushed {
+					score = 1
+				}
+				s.waitForNextFP(s.fpToSeries.length(), 1-score)
+				count++
+			}
+			if count > 0 {
+				msg := "full"
+				if firstPass {
+					msg = "initial partial"
+				}
+				log.Infof(
+					"Completed %s maintenance sweep through %d in-memory fingerprints in %v.",
+					msg, count, time.Since(begin),
+				)
+			}
+			firstPass = false
+		}
+	}()
+
+	return memoryFingerprints
+}
+
+// cycleThroughArchivedFingerprints returns a channel that emits fingerprints
+// for archived series in a throttled fashion. It continues to cycle through all
+// archived fingerprints until s.loopStopping is closed.
+func (s *MemorySeriesStorage) cycleThroughArchivedFingerprints() chan model.Fingerprint {
+	archivedFingerprints := make(chan model.Fingerprint)
+	go func() {
+		defer close(archivedFingerprints)
+
+		for {
+			archivedFPs, err := s.persistence.fingerprintsModifiedBefore(
+				model.Now().Add(-s.dropAfter),
+			)
+			if err != nil {
+				log.Error("Failed to lookup archived fingerprint ranges: ", err)
+				s.waitForNextFP(0, 1)
+				continue
+			}
+			// Initial wait, also important if there are no FPs yet.
+			if !s.waitForNextFP(len(archivedFPs), 1) {
+				return
+			}
+			begin := time.Now()
+			for _, fp := range archivedFPs {
+				select {
+				case archivedFingerprints <- fp:
+				case <-s.loopStopping:
+					return
+				}
+				// Never speed up maintenance of archived FPs.
+				s.waitForNextFP(len(archivedFPs), 1)
+			}
+			if len(archivedFPs) > 0 {
+				log.Infof(
+					"Completed maintenance sweep through %d archived fingerprints in %v.",
+					len(archivedFPs), time.Since(begin),
+				)
+			}
+		}
+	}()
+	return archivedFingerprints
+}
+
+func (s *MemorySeriesStorage) loop() {
+	checkpointTimer := time.NewTimer(s.checkpointInterval)
+	checkpointMinTimer := time.NewTimer(0)
+
+	var dirtySeriesCount int64
+
+	defer func() {
+		checkpointTimer.Stop()
+		checkpointMinTimer.Stop()
+		log.Info("Maintenance loop stopped.")
+		close(s.loopStopped)
+	}()
+
+	memoryFingerprints := s.cycleThroughMemoryFingerprints()
+	archivedFingerprints := s.cycleThroughArchivedFingerprints()
+
+	checkpointCtx, checkpointCancel := context.WithCancel(context.Background())
+	checkpointNow := make(chan struct{}, 1)
+
+	doCheckpoint := func() time.Duration {
+		start := time.Now()
+		// We clear this before the checkpoint so that dirtySeriesCount
+		// is an upper bound.
+		atomic.StoreInt64(&dirtySeriesCount, 0)
+		s.dirtySeries.Set(0)
+		select {
+		case <-checkpointNow:
+			// Signal cleared.
+		default:
+			// No signal pending.
+		}
+		err := s.persistence.checkpointSeriesMapAndHeads(
+			checkpointCtx, s.fpToSeries, s.fpLocker,
+		)
+		if err == context.Canceled {
+			log.Info("Checkpoint canceled.")
+		} else if err != nil {
+			s.persistErrors.Inc()
+			log.Errorln("Error while checkpointing:", err)
+		}
+		return time.Since(start)
+	}
+
+	// Checkpoints can happen concurrently with maintenance so even with heavy
+	// checkpointing there will still be sufficient progress on maintenance.
+	checkpointLoopStopped := make(chan struct{})
+	go func() {
+		for {
+			select {
+			case <-checkpointCtx.Done():
+				checkpointLoopStopped <- struct{}{}
+				return
+			case <-checkpointMinTimer.C:
+				var took time.Duration
+				select {
+				case <-checkpointCtx.Done():
+					checkpointLoopStopped <- struct{}{}
+					return
+				case <-checkpointTimer.C:
+					took = doCheckpoint()
+				case <-checkpointNow:
+					if !checkpointTimer.Stop() {
+						<-checkpointTimer.C
+					}
+					took = doCheckpoint()
+				}
+				checkpointMinTimer.Reset(took)
+				checkpointTimer.Reset(s.checkpointInterval)
+			}
+		}
+	}()
+
+loop:
+	for {
+		select {
+		case <-s.loopStopping:
+			checkpointCancel()
+			break loop
+		case fp := <-memoryFingerprints:
+			if s.maintainMemorySeries(fp, model.Now().Add(-s.dropAfter)) {
+				dirty := atomic.AddInt64(&dirtySeriesCount, 1)
+				s.dirtySeries.Set(float64(dirty))
+				// Check if we have enough "dirty" series so that we need an early checkpoint.
+				// However, if we are already behind persisting chunks, creating a checkpoint
+				// would be counterproductive, as it would slow down chunk persisting even more,
+				// while in a situation like that, where we are clearly lacking speed of disk
+				// maintenance, the best we can do for crash recovery is to persist chunks as
+				// quickly as possible. So only checkpoint if we are not in rushed mode.
+				if _, rushed := s.getPersistenceUrgencyScore(); !rushed &&
+					dirty >= int64(s.checkpointDirtySeriesLimit) {
+					select {
+					case checkpointNow <- struct{}{}:
+						// Signal sent.
+					default:
+						// Signal already pending.
+					}
+				}
+			}
+		case fp := <-archivedFingerprints:
+			s.maintainArchivedSeries(fp, model.Now().Add(-s.dropAfter))
+		}
+	}
+	// Wait until both channels are closed.
+	for range memoryFingerprints {
+	}
+	for range archivedFingerprints {
+	}
+	<-checkpointLoopStopped
+}
+
+// maintainMemorySeries maintains a series that is in memory (i.e. not
+// archived). It returns true if the method has changed from clean to dirty
+// (i.e. it is inconsistent with the latest checkpoint now so that in case of a
+// crash a recovery operation that requires a disk seek needed to be applied).
+//
+// The method first closes the head chunk if it was not touched for the duration
+// of headChunkTimeout.
+//
+// Then it determines the chunks that need to be purged and the chunks that need
+// to be persisted. Depending on the result, it does the following:
+//
+// - If all chunks of a series need to be purged, the whole series is deleted
+// for good and the method returns false. (Detecting non-existence of a series
+// file does not require a disk seek.)
+//
+// - If any chunks need to be purged (but not all of them), it purges those
+// chunks from memory and rewrites the series file on disk, leaving out the
+// purged chunks and appending all chunks not yet persisted (with the exception
+// of a still open head chunk).
+//
+// - If no chunks on disk need to be purged, but chunks need to be persisted,
+// those chunks are simply appended to the existing series file (or the file is
+// created if it does not exist yet).
+//
+// - If no chunks need to be purged and no chunks need to be persisted, nothing
+// happens in this step.
+//
+// Next, the method checks if all chunks in the series are evicted. In that
+// case, it archives the series and returns true.
+//
+// Finally, it evicts chunk.Descs if there are too many.
+func (s *MemorySeriesStorage) maintainMemorySeries(
+	fp model.Fingerprint, beforeTime model.Time,
+) (becameDirty bool) {
+	defer func(begin time.Time) {
+		s.maintainSeriesDuration.WithLabelValues(maintainInMemory).Observe(
+			time.Since(begin).Seconds(),
+		)
+	}(time.Now())
+
+	s.fpLocker.Lock(fp)
+	defer s.fpLocker.Unlock(fp)
+
+	series, ok := s.fpToSeries.get(fp)
+	if !ok {
+		// Series is actually not in memory, perhaps archived or dropped in the meantime.
+		return false
+	}
+
+	defer s.seriesOps.WithLabelValues(memoryMaintenance).Inc()
+
+	closed, err := series.maybeCloseHeadChunk(s.headChunkTimeout)
+	if err != nil {
+		s.quarantineSeries(fp, series.metric, err)
+		s.persistErrors.Inc()
+	}
+	if closed {
+		s.incNumChunksToPersist(1)
+		s.headChunks.Dec()
+	}
+
+	seriesWasDirty := series.dirty
+
+	if s.writeMemorySeries(fp, series, beforeTime) {
+		// Series is gone now, we are done.
+		return false
+	}
+
+	iOldestNotEvicted := -1
+	for i, cd := range series.chunkDescs {
+		if !cd.IsEvicted() {
+			iOldestNotEvicted = i
+			break
+		}
+	}
+
+	// Archive if all chunks are evicted. Also make sure the last sample has
+	// an age of at least headChunkTimeout (which is very likely anyway).
+	if iOldestNotEvicted == -1 && model.Now().Sub(series.lastTime) > s.headChunkTimeout {
+		s.fpToSeries.del(fp)
+		s.memorySeries.Dec()
+		s.persistence.archiveMetric(fp, series.metric, series.firstTime(), series.lastTime)
+		s.seriesOps.WithLabelValues(archive).Inc()
+		oldWatermark := atomic.LoadInt64((*int64)(&s.archiveHighWatermark))
+		if oldWatermark < int64(series.lastTime) {
+			if !atomic.CompareAndSwapInt64(
+				(*int64)(&s.archiveHighWatermark),
+				oldWatermark, int64(series.lastTime),
+			) {
+				panic("s.archiveHighWatermark modified outside of maintainMemorySeries")
+			}
+		}
+		return
+	}
+	// If we are here, the series is not archived, so check for chunk.Desc
+	// eviction next.
+	series.evictChunkDescs(iOldestNotEvicted)
+
+	return series.dirty && !seriesWasDirty
+}
+
+// writeMemorySeries (re-)writes a memory series file. While doing so, it drops
+// chunks older than beforeTime from both the series file (if it exists) as well
+// as from memory. The provided chunksToPersist are appended to the newly
+// written series file. If no chunks need to be purged, but chunksToPersist is
+// not empty, those chunks are simply appended to the series file. If the series
+// contains no chunks after dropping old chunks, it is purged entirely. In that
+// case, the method returns true.
+//
+// If a persist error is encountered, the series is queued for quarantine. In
+// that case, the method returns true, too, because the series should not be
+// processed anymore (even if it will only be gone for real once quarantining
+// has been completed).
+//
+// The caller must have locked the fp.
+func (s *MemorySeriesStorage) writeMemorySeries(
+	fp model.Fingerprint, series *memorySeries, beforeTime model.Time,
+) bool {
+	var (
+		persistErr error
+		cds        = series.chunksToPersist()
+	)
+
+	defer func() {
+		if persistErr != nil {
+			s.quarantineSeries(fp, series.metric, persistErr)
+			s.persistErrors.Inc()
+		}
+		// The following is done even in case of an error to ensure
+		// correct counter bookkeeping and to not pin chunks in memory
+		// that belong to a series that is scheduled for quarantine
+		// anyway.
+		for _, cd := range cds {
+			cd.Unpin(s.evictRequests)
+		}
+		s.incNumChunksToPersist(-len(cds))
+		chunk.Ops.WithLabelValues(chunk.PersistAndUnpin).Add(float64(len(cds)))
+		series.modTime = s.persistence.seriesFileModTime(fp)
+	}()
+
+	// Get the actual chunks from underneath the chunk.Descs.
+	// No lock required as chunks still to persist cannot be evicted.
+	chunks := make([]chunk.Chunk, len(cds))
+	for i, cd := range cds {
+		chunks[i] = cd.C
+	}
+
+	if !series.firstTime().Before(beforeTime) {
+		// Oldest sample not old enough, just append chunks, if any.
+		if len(cds) == 0 {
+			return false
+		}
+		var offset int
+		offset, persistErr = s.persistence.persistChunks(fp, chunks)
+		if persistErr != nil {
+			return true
+		}
+		if series.chunkDescsOffset == -1 {
+			// This is the first chunk persisted for a newly created
+			// series that had prior chunks on disk. Finally, we can
+			// set the chunkDescsOffset.
+			series.chunkDescsOffset = offset
+		}
+		return false
+	}
+
+	newFirstTime, offset, numDroppedFromPersistence, allDroppedFromPersistence, persistErr :=
+		s.persistence.dropAndPersistChunks(fp, beforeTime, chunks)
+	if persistErr != nil {
+		return true
+	}
+	if persistErr = series.dropChunks(beforeTime); persistErr != nil {
+		return true
+	}
+	if len(series.chunkDescs) == 0 && allDroppedFromPersistence {
+		// All chunks dropped from both memory and persistence. Delete the series for good.
+		s.fpToSeries.del(fp)
+		s.memorySeries.Dec()
+		s.seriesOps.WithLabelValues(memoryPurge).Inc()
+		s.persistence.unindexMetric(fp, series.metric)
+		return true
+	}
+	series.savedFirstTime = newFirstTime
+	if series.chunkDescsOffset == -1 {
+		series.chunkDescsOffset = offset
+	} else {
+		series.chunkDescsOffset -= numDroppedFromPersistence
+		if series.chunkDescsOffset < 0 {
+			persistErr = errors.New("dropped more chunks from persistence than from memory")
+			series.chunkDescsOffset = 0
+			return true
+		}
+	}
+	return false
+}
+
+// maintainArchivedSeries drops chunks older than beforeTime from an archived
+// series. If the series contains no chunks after that, it is purged entirely.
+func (s *MemorySeriesStorage) maintainArchivedSeries(fp model.Fingerprint, beforeTime model.Time) {
+	defer func(begin time.Time) {
+		s.maintainSeriesDuration.WithLabelValues(maintainArchived).Observe(
+			time.Since(begin).Seconds(),
+		)
+	}(time.Now())
+
+	s.fpLocker.Lock(fp)
+	defer s.fpLocker.Unlock(fp)
+
+	has, firstTime, lastTime := s.persistence.hasArchivedMetric(fp)
+	if !has || !firstTime.Before(beforeTime) {
+		// Oldest sample not old enough, or metric purged or unarchived in the meantime.
+		return
+	}
+
+	defer s.seriesOps.WithLabelValues(archiveMaintenance).Inc()
+
+	newFirstTime, _, _, allDropped, err := s.persistence.dropAndPersistChunks(fp, beforeTime, nil)
+	if err != nil {
+		// TODO(beorn7): Should quarantine the series.
+		s.persistErrors.Inc()
+		log.Error("Error dropping persisted chunks: ", err)
+	}
+	if allDropped {
+		if err := s.persistence.purgeArchivedMetric(fp); err != nil {
+			s.persistErrors.Inc()
+			// purgeArchivedMetric logs the error already.
+		}
+		s.seriesOps.WithLabelValues(archivePurge).Inc()
+		return
+	}
+	if err := s.persistence.updateArchivedTimeRange(fp, newFirstTime, lastTime); err != nil {
+		s.persistErrors.Inc()
+		log.Errorf("Error updating archived time range for fingerprint %v: %s", fp, err)
+	}
+}
+
+// See persistence.loadChunks for detailed explanation.
+func (s *MemorySeriesStorage) loadChunks(fp model.Fingerprint, indexes []int, indexOffset int) ([]chunk.Chunk, error) {
+	return s.persistence.loadChunks(fp, indexes, indexOffset)
+}
+
+// See persistence.loadChunkDescs for detailed explanation.
+func (s *MemorySeriesStorage) loadChunkDescs(fp model.Fingerprint, offsetFromEnd int) ([]*chunk.Desc, error) {
+	return s.persistence.loadChunkDescs(fp, offsetFromEnd)
+}
+
+// getNumChunksToPersist returns chunksToPersist in a goroutine-safe way.
+func (s *MemorySeriesStorage) getNumChunksToPersist() int {
+	return int(atomic.LoadInt64(&s.numChunksToPersist))
+}
+
+// incNumChunksToPersist increments chunksToPersist in a goroutine-safe way. Use a
+// negative 'by' to decrement.
+func (s *MemorySeriesStorage) incNumChunksToPersist(by int) {
+	atomic.AddInt64(&s.numChunksToPersist, int64(by))
+	if by > 0 {
+		s.queuedChunksToPersist.Add(float64(by))
+	}
+}
+
+// getPersistenceUrgencyScore returns an urgency score for the speed of
+// persisting chunks. The score is between 0 and 1, where 0 means no urgency at
+// all and 1 means highest urgency. It also returns if the storage is in
+// "rushed mode".
+//
+// The storage enters "rushed mode" if the score exceeds
+// persintenceUrgencyScoreForEnteringRushedMode at the time this method is
+// called. It will leave "rushed mode" if, at a later time this method is
+// called, the score is below persintenceUrgencyScoreForLeavingRushedMode.
+// "Rushed mode" plays a role for the adaptive series-sync-strategy. It also
+// switches off early checkpointing (due to dirty series), and it makes series
+// maintenance happen as quickly as possible.
+//
+// A score of 1 will trigger throttling of sample ingestion.
+//
+// It is safe to call this method concurrently.
+func (s *MemorySeriesStorage) getPersistenceUrgencyScore() (float64, bool) {
+	s.rushedMtx.Lock()
+	defer s.rushedMtx.Unlock()
+
+	score := float64(atomic.LoadInt32(&s.persistUrgency)) / 1000
+	if score > 1 {
+		score = 1
+	}
+
+	if s.rushed {
+		// We are already in rushed mode. If the score is still above
+		// persintenceUrgencyScoreForLeavingRushedMode, return the score
+		// and leave things as they are.
+		if score > persintenceUrgencyScoreForLeavingRushedMode {
+			return score, true
+		}
+		// We are out of rushed mode!
+		s.rushed = false
+		log.
+			With("urgencyScore", score).
+			With("chunksToPersist", s.getNumChunksToPersist()).
+			With("memoryChunks", atomic.LoadInt64(&chunk.NumMemChunks)).
+			Info("Storage has left rushed mode.")
+		return score, false
+	}
+	if score > persintenceUrgencyScoreForEnteringRushedMode {
+		// Enter rushed mode.
+		s.rushed = true
+		log.
+			With("urgencyScore", score).
+			With("chunksToPersist", s.getNumChunksToPersist()).
+			With("memoryChunks", atomic.LoadInt64(&chunk.NumMemChunks)).
+			Warn("Storage has entered rushed mode.")
+	}
+	return score, s.rushed
+}
+
+// quarantineSeries registers the provided fingerprint for quarantining. It
+// always returns immediately. Quarantine requests are processed
+// asynchronously. If there are too many requests queued, they are simply
+// dropped.
+//
+// Quarantining means that the series file is moved to the orphaned directory,
+// and all its traces are removed from indices. Call this method if an
+// unrecoverable error is detected while dealing with a series, and pass in the
+// encountered error. It will be saved as a hint in the orphaned directory.
+func (s *MemorySeriesStorage) quarantineSeries(fp model.Fingerprint, metric model.Metric, err error) {
+	req := quarantineRequest{fp: fp, metric: metric, reason: err}
+	select {
+	case s.quarantineRequests <- req:
+		// Request submitted.
+	default:
+		log.
+			With("fingerprint", fp).
+			With("metric", metric).
+			With("reason", err).
+			Warn("Quarantine queue full. Dropped quarantine request.")
+		s.seriesOps.WithLabelValues(droppedQuarantine).Inc()
+	}
+}
+
+func (s *MemorySeriesStorage) handleQuarantine() {
+	for {
+		select {
+		case req := <-s.quarantineRequests:
+			s.purgeSeries(req.fp, req.metric, req.reason)
+			log.
+				With("fingerprint", req.fp).
+				With("metric", req.metric).
+				With("reason", req.reason).
+				Warn("Series quarantined.")
+		case <-s.quarantineStopping:
+			log.Info("Series quarantining stopped.")
+			close(s.quarantineStopped)
+			return
+		}
+	}
+
+}
+
+// purgeSeries removes all traces of a series. If a non-nil quarantine reason is
+// provided, the series file will not be deleted completely, but moved to the
+// orphaned directory with the reason and the metric in a hint file. The
+// provided metric might be nil if unknown.
+func (s *MemorySeriesStorage) purgeSeries(fp model.Fingerprint, m model.Metric, quarantineReason error) {
+	s.fpLocker.Lock(fp)
+
+	var (
+		series *memorySeries
+		ok     bool
+	)
+
+	if series, ok = s.fpToSeries.get(fp); ok {
+		s.fpToSeries.del(fp)
+		s.memorySeries.Dec()
+		m = series.metric
+
+		// Adjust s.chunksToPersist and chunk.NumMemChunks down by
+		// the number of chunks in this series that are not
+		// persisted yet. Persisted chunks will be deducted from
+		// chunk.NumMemChunks upon eviction.
+		numChunksNotYetPersisted := len(series.chunkDescs) - series.persistWatermark
+		atomic.AddInt64(&chunk.NumMemChunks, int64(-numChunksNotYetPersisted))
+		if !series.headChunkClosed {
+			// Head chunk wasn't counted as waiting for persistence yet.
+			// (But it was counted as a chunk in memory.)
+			numChunksNotYetPersisted--
+		}
+		s.incNumChunksToPersist(-numChunksNotYetPersisted)
+
+	} else {
+		s.persistence.purgeArchivedMetric(fp) // Ignoring error. There is nothing we can do.
+	}
+	if m != nil {
+		// If we know a metric now, unindex it in any case.
+		// purgeArchivedMetric might have done so already, but we cannot
+		// be sure. Unindexing in idempotent, though.
+		s.persistence.unindexMetric(fp, m)
+	}
+	// Attempt to delete/quarantine the series file in any case.
+	if quarantineReason == nil {
+		// No reason stated, simply delete the file.
+		if _, err := s.persistence.deleteSeriesFile(fp); err != nil {
+			log.
+				With("fingerprint", fp).
+				With("metric", m).
+				With("error", err).
+				Error("Error deleting series file.")
+		}
+		s.seriesOps.WithLabelValues(requestedPurge).Inc()
+	} else {
+		if err := s.persistence.quarantineSeriesFile(fp, quarantineReason, m); err == nil {
+			s.seriesOps.WithLabelValues(completedQurantine).Inc()
+		} else {
+			s.seriesOps.WithLabelValues(failedQuarantine).Inc()
+			log.
+				With("fingerprint", fp).
+				With("metric", m).
+				With("reason", quarantineReason).
+				With("error", err).
+				Error("Error quarantining series file.")
+		}
+	}
+
+	s.fpLocker.Unlock(fp)
+}
+
+// Describe implements prometheus.Collector.
+func (s *MemorySeriesStorage) Describe(ch chan<- *prometheus.Desc) {
+	s.persistence.Describe(ch)
+	s.mapper.Describe(ch)
+
+	ch <- s.persistErrors.Desc()
+	ch <- s.queuedChunksToPersist.Desc()
+	ch <- s.chunksToPersist.Desc()
+	ch <- s.memorySeries.Desc()
+	ch <- s.headChunks.Desc()
+	ch <- s.dirtySeries.Desc()
+	s.seriesOps.Describe(ch)
+	ch <- s.ingestedSamples.Desc()
+	s.discardedSamples.Describe(ch)
+	ch <- s.nonExistentSeriesMatches.Desc()
+	ch <- s.memChunks.Desc()
+	s.maintainSeriesDuration.Describe(ch)
+	ch <- s.persistenceUrgencyScore.Desc()
+	ch <- s.rushedMode.Desc()
+	ch <- s.targetHeapSizeBytes.Desc()
+}
+
+// Collect implements prometheus.Collector.
+func (s *MemorySeriesStorage) Collect(ch chan<- prometheus.Metric) {
+	s.persistence.Collect(ch)
+	s.mapper.Collect(ch)
+
+	ch <- s.persistErrors
+	ch <- s.queuedChunksToPersist
+	ch <- s.chunksToPersist
+	ch <- s.memorySeries
+	ch <- s.headChunks
+	ch <- s.dirtySeries
+	s.seriesOps.Collect(ch)
+	ch <- s.ingestedSamples
+	s.discardedSamples.Collect(ch)
+	ch <- s.nonExistentSeriesMatches
+	ch <- s.memChunks
+	s.maintainSeriesDuration.Collect(ch)
+	ch <- s.persistenceUrgencyScore
+	ch <- s.rushedMode
+	ch <- s.targetHeapSizeBytes
+}
diff --git a/vendor/github.com/prometheus/prometheus/storage/local/test_helpers.go b/vendor/github.com/prometheus/prometheus/storage/local/test_helpers.go
new file mode 100644
index 000000000..7b6cc51e6
--- /dev/null
+++ b/vendor/github.com/prometheus/prometheus/storage/local/test_helpers.go
@@ -0,0 +1,72 @@
+// Copyright 2014 The Prometheus Authors
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// NOTE ON FILENAME: Do not rename this file helpers_test.go (which might appear
+// an obvious choice). We need NewTestStorage in tests outside of the local
+// package, too. On the other hand, moving NewTestStorage in its own package
+// would cause circular dependencies in the tests in packages local.
+
+package local
+
+import (
+	"time"
+
+	"github.com/prometheus/common/model"
+	"github.com/prometheus/prometheus/storage/local/chunk"
+	"github.com/prometheus/prometheus/util/testutil"
+)
+
+type testStorageCloser struct {
+	storage   Storage
+	directory testutil.Closer
+}
+
+func (t *testStorageCloser) Close() {
+	if err := t.storage.Stop(); err != nil {
+		panic(err)
+	}
+	t.directory.Close()
+}
+
+// NewTestStorage creates a storage instance backed by files in a temporary
+// directory. The returned storage is already in serving state. Upon closing the
+// returned test.Closer, the temporary directory is cleaned up.
+func NewTestStorage(t testutil.T, encoding chunk.Encoding) (*MemorySeriesStorage, testutil.Closer) {
+	chunk.DefaultEncoding = encoding
+	directory := testutil.NewTemporaryDirectory("test_storage", t)
+	o := &MemorySeriesStorageOptions{
+		TargetHeapSize:             1000000000,
+		PersistenceRetentionPeriod: 24 * time.Hour * 365 * 100, // Enough to never trigger purging.
+		PersistenceStoragePath:     directory.Path(),
+		HeadChunkTimeout:           5 * time.Minute,
+		CheckpointInterval:         time.Hour,
+		SyncStrategy:               Adaptive,
+	}
+	storage := NewMemorySeriesStorage(o)
+	storage.archiveHighWatermark = model.Latest
+	if err := storage.Start(); err != nil {
+		directory.Close()
+		t.Fatalf("Error creating storage: %s", err)
+	}
+
+	closer := &testStorageCloser{
+		storage:   storage,
+		directory: directory,
+	}
+
+	return storage, closer
+}
+
+func makeFingerprintSeriesPair(s *MemorySeriesStorage, fp model.Fingerprint) fingerprintSeriesPair {
+	return fingerprintSeriesPair{fp, s.seriesForRange(fp, model.Earliest, model.Latest)}
+}
diff --git a/vendor/github.com/prometheus/prometheus/storage/metric/matcher.go b/vendor/github.com/prometheus/prometheus/storage/metric/matcher.go
new file mode 100644
index 000000000..2f451e27f
--- /dev/null
+++ b/vendor/github.com/prometheus/prometheus/storage/metric/matcher.go
@@ -0,0 +1,209 @@
+// Copyright 2014 The Prometheus Authors
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package metric
+
+import (
+	"fmt"
+	"regexp"
+	"strings"
+
+	"github.com/prometheus/common/model"
+)
+
+// MatchType is an enum for label matching types.
+type MatchType int
+
+// Possible MatchTypes.
+const (
+	Equal MatchType = iota
+	NotEqual
+	RegexMatch
+	RegexNoMatch
+)
+
+func (m MatchType) String() string {
+	typeToStr := map[MatchType]string{
+		Equal:        "=",
+		NotEqual:     "!=",
+		RegexMatch:   "=~",
+		RegexNoMatch: "!~",
+	}
+	if str, ok := typeToStr[m]; ok {
+		return str
+	}
+	panic("unknown match type")
+}
+
+// LabelMatchers is a slice of LabelMatcher objects. By implementing the
+// sort.Interface, it is sortable by cardinality score, i.e. after sorting, the
+// LabelMatcher that is expected to yield the fewest matches is first in the
+// slice, and LabelMatchers that match the empty string are last.
+type LabelMatchers []*LabelMatcher
+
+func (lms LabelMatchers) Len() int           { return len(lms) }
+func (lms LabelMatchers) Swap(i, j int)      { lms[i], lms[j] = lms[j], lms[i] }
+func (lms LabelMatchers) Less(i, j int) bool { return lms[i].score < lms[j].score }
+
+func (lms LabelMatchers) String() string {
+	result := make([]string, 0, len(lms))
+	for _, lm := range lms {
+		result = append(result, lm.String())
+	}
+	return strings.Join(result, ",")
+}
+
+// LabelMatcher models the matching of a label. Create with NewLabelMatcher.
+type LabelMatcher struct {
+	Type  MatchType
+	Name  model.LabelName
+	Value model.LabelValue
+	re    *regexp.Regexp
+	score float64 // Cardinality score, between 0 and 1, 0 is lowest cardinality.
+}
+
+// NewLabelMatcher returns a LabelMatcher object ready to use.
+func NewLabelMatcher(matchType MatchType, name model.LabelName, value model.LabelValue) (*LabelMatcher, error) {
+	m := &LabelMatcher{
+		Type:  matchType,
+		Name:  name,
+		Value: value,
+	}
+	if matchType == RegexMatch || matchType == RegexNoMatch {
+		re, err := regexp.Compile("^(?:" + string(value) + ")$")
+		if err != nil {
+			return nil, err
+		}
+		m.re = re
+	}
+	m.calculateScore()
+	return m, nil
+}
+
+// calculateScore is a helper method only called in the constructor. It
+// calculates the cardinality score upfront, so that sorting by it is faster and
+// doesn't change internal state of the matcher.
+//
+// The score is based on a pretty bad but still quite helpful heuristics for
+// now. Note that this is an interim solution until the work in progress to
+// properly intersect matchers is complete. We intend to not invest any further
+// effort into tweaking the score calculation, as this could easily devolve into
+// a rabbit hole.
+//
+// The heuristics works along the following lines:
+//
+// - A matcher that is known to match nothing would have a score of 0. (This
+//   case doesn't happen in the scope of this method.)
+//
+// - A matcher that matches the empty string has a score of 1.
+//
+// - Equal matchers have a score <= 0.5. The order in score for other matchers
+//   are RegexMatch, RegexNoMatch, NotEqual.
+//
+// - There are a number of score adjustments for known "magic" parts, like
+//   instance labels, metric names containing a colon (which are probably
+//   recording rules) and such.
+//
+// - On top, there is a tiny adjustment for the length of the matcher, following
+//   the blunt expectation that a long label name and/or value is more specific
+//   and will therefore have a lower cardinality.
+//
+// To reiterate on the above: PLEASE RESIST THE TEMPTATION TO TWEAK THIS
+// METHOD. IT IS "MAGIC" ENOUGH ALREADY AND WILL GO AWAY WITH THE UPCOMING MORE
+// POWERFUL INDEXING.
+func (m *LabelMatcher) calculateScore() {
+	if m.Match("") {
+		m.score = 1
+		return
+	}
+	// lengthCorrection is between 0 (for length 0) and 0.1 (for length +Inf).
+	lengthCorrection := 0.1 * (1 - 1/float64(len(m.Name)+len(m.Value)+1))
+	switch m.Type {
+	case Equal:
+		m.score = 0.3 - lengthCorrection
+	case RegexMatch:
+		m.score = 0.6 - lengthCorrection
+	case RegexNoMatch:
+		m.score = 0.8 + lengthCorrection
+	case NotEqual:
+		m.score = 0.9 + lengthCorrection
+	}
+	if m.Type != Equal {
+		// Don't bother anymore in this case.
+		return
+	}
+	switch m.Name {
+	case model.InstanceLabel:
+		// Matches only metrics from a single instance, which clearly
+		// limits the damage.
+		m.score -= 0.2
+	case model.JobLabel:
+		// The usual case is a relatively low number of jobs with many
+		// metrics each.
+		m.score += 0.1
+	case model.BucketLabel, model.QuantileLabel:
+		// Magic labels for buckets and quantiles will match copiously.
+		m.score += 0.2
+	case model.MetricNameLabel:
+		if strings.Contains(string(m.Value), ":") {
+			// Probably a recording rule with limited cardinality.
+			m.score -= 0.1
+			return
+		}
+		if m.Value == "up" || m.Value == "scrape_duration_seconds" {
+			// Synthetic metrics which are contained in every scrape
+			// exactly once.  There might be less frequent metric
+			// names, but the worst case is limited here, so give it
+			// a bump.
+			m.score -= 0.05
+			return
+		}
+	}
+}
+
+// MatchesEmptyString returns true if the LabelMatcher matches the empty string.
+func (m *LabelMatcher) MatchesEmptyString() bool {
+	return m.score >= 1
+}
+
+func (m *LabelMatcher) String() string {
+	return fmt.Sprintf("%s%s%q", m.Name, m.Type, m.Value)
+}
+
+// Match returns true if the label matcher matches the supplied label value.
+func (m *LabelMatcher) Match(v model.LabelValue) bool {
+	switch m.Type {
+	case Equal:
+		return m.Value == v
+	case NotEqual:
+		return m.Value != v
+	case RegexMatch:
+		return m.re.MatchString(string(v))
+	case RegexNoMatch:
+		return !m.re.MatchString(string(v))
+	default:
+		panic("invalid match type")
+	}
+}
+
+// Filter takes a list of label values and returns all label values which match
+// the label matcher.
+func (m *LabelMatcher) Filter(in model.LabelValues) model.LabelValues {
+	out := model.LabelValues{}
+	for _, v := range in {
+		if m.Match(v) {
+			out = append(out, v)
+		}
+	}
+	return out
+}
diff --git a/vendor/github.com/prometheus/prometheus/storage/metric/metric.go b/vendor/github.com/prometheus/prometheus/storage/metric/metric.go
new file mode 100644
index 000000000..7328ac7a8
--- /dev/null
+++ b/vendor/github.com/prometheus/prometheus/storage/metric/metric.go
@@ -0,0 +1,63 @@
+// Copyright 2014 The Prometheus Authors
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package metric
+
+import "github.com/prometheus/common/model"
+
+// Metric wraps a model.Metric and copies it upon modification if Copied is false.
+type Metric struct {
+	Copied bool
+	Metric model.Metric
+}
+
+// Set sets a label name in the wrapped Metric to a given value and copies the
+// Metric initially, if it is not already a copy.
+func (m *Metric) Set(ln model.LabelName, lv model.LabelValue) {
+	m.Copy()
+	m.Metric[ln] = lv
+}
+
+// Del deletes a given label name from the wrapped Metric and copies the
+// Metric initially, if it is not already a copy.
+func (m *Metric) Del(ln model.LabelName) {
+	m.Copy()
+	delete(m.Metric, ln)
+}
+
+// Get the value for the given label name. An empty value is returned
+// if the label does not exist in the metric.
+func (m *Metric) Get(ln model.LabelName) model.LabelValue {
+	return m.Metric[ln]
+}
+
+// Gets behaves as Get but the returned boolean is false iff the label
+// does not exist.
+func (m *Metric) Gets(ln model.LabelName) (model.LabelValue, bool) {
+	lv, ok := m.Metric[ln]
+	return lv, ok
+}
+
+// Copy the underlying Metric if it is not already a copy.
+func (m *Metric) Copy() *Metric {
+	if !m.Copied {
+		m.Metric = m.Metric.Clone()
+		m.Copied = true
+	}
+	return m
+}
+
+// String implements fmt.Stringer.
+func (m Metric) String() string {
+	return m.Metric.String()
+}
diff --git a/vendor/github.com/prometheus/prometheus/storage/metric/sample.go b/vendor/github.com/prometheus/prometheus/storage/metric/sample.go
new file mode 100644
index 000000000..a30c2b456
--- /dev/null
+++ b/vendor/github.com/prometheus/prometheus/storage/metric/sample.go
@@ -0,0 +1,22 @@
+// Copyright 2013 The Prometheus Authors
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package metric
+
+import "github.com/prometheus/common/model"
+
+// Interval describes the inclusive interval between two Timestamps.
+type Interval struct {
+	OldestInclusive model.Time
+	NewestInclusive model.Time
+}
diff --git a/vendor/github.com/prometheus/prometheus/storage/storage.go b/vendor/github.com/prometheus/prometheus/storage/storage.go
new file mode 100644
index 000000000..5acae673e
--- /dev/null
+++ b/vendor/github.com/prometheus/prometheus/storage/storage.go
@@ -0,0 +1,76 @@
+// Copyright 2015 The Prometheus Authors
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package storage
+
+import (
+	"github.com/prometheus/common/model"
+)
+
+// SampleAppender is the interface to append samples to both, local and remote
+// storage. All methods are goroutine-safe.
+type SampleAppender interface {
+	// Append appends a sample to the underlying storage. Depending on the
+	// storage implementation, there are different guarantees for the fate
+	// of the sample after Append has returned. Remote storage
+	// implementation will simply drop samples if they cannot keep up with
+	// sending samples. Local storage implementations will only drop metrics
+	// upon unrecoverable errors.
+	Append(*model.Sample) error
+	// NeedsThrottling returns true if the underlying storage wishes to not
+	// receive any more samples. Append will still work but might lead to
+	// undue resource usage. It is recommended to call NeedsThrottling once
+	// before an upcoming batch of Append calls (e.g. a full scrape of a
+	// target or the evaluation of a rule group) and only proceed with the
+	// batch if NeedsThrottling returns false. In that way, the result of a
+	// scrape or of an evaluation of a rule group will always be appended
+	// completely or not at all, and the work of scraping or evaluation will
+	// not be performed in vain. Also, a call of NeedsThrottling is
+	// potentially expensive, so limiting the number of calls is reasonable.
+	//
+	// Only SampleAppenders for which it is considered critical to receive
+	// each and every sample should ever return true. SampleAppenders that
+	// tolerate not receiving all samples should always return false and
+	// instead drop samples as they see fit to avoid overload.
+	NeedsThrottling() bool
+}
+
+// Fanout is a SampleAppender that appends every sample to each SampleAppender
+// in its list.
+type Fanout []SampleAppender
+
+// Append implements SampleAppender. It appends the provided sample to all
+// SampleAppenders in the Fanout slice and waits for each append to complete
+// before proceeding with the next.
+// If any of the SampleAppenders returns an error, the first one is returned
+// at the end.
+func (f Fanout) Append(s *model.Sample) error {
+	var err error
+	for _, a := range f {
+		if e := a.Append(s); e != nil && err == nil {
+			err = e
+		}
+	}
+	return err
+}
+
+// NeedsThrottling returns true if at least one of the SampleAppenders in the
+// Fanout slice is throttled.
+func (f Fanout) NeedsThrottling() bool {
+	for _, a := range f {
+		if a.NeedsThrottling() {
+			return true
+		}
+	}
+	return false
+}
diff --git a/vendor/github.com/prometheus/prometheus/util/flock/flock.go b/vendor/github.com/prometheus/prometheus/util/flock/flock.go
new file mode 100644
index 000000000..5dc22a2fa
--- /dev/null
+++ b/vendor/github.com/prometheus/prometheus/util/flock/flock.go
@@ -0,0 +1,46 @@
+// Copyright 2016 The Prometheus Authors
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package flock provides portable file locking. It is essentially ripped out
+// from the code of github.com/syndtr/goleveldb. Strange enough that the
+// standard library does not provide this functionality. Once this package has
+// proven to work as expected, we should probably turn it into a separate
+// general purpose package for humanity.
+package flock
+
+import (
+	"os"
+	"path/filepath"
+)
+
+// Releaser provides the Release method to release a file lock.
+type Releaser interface {
+	Release() error
+}
+
+// New locks the file with the provided name. If the file does not exist, it is
+// created. The returned Releaser is used to release the lock. existed is true
+// if the file to lock already existed. A non-nil error is returned if the
+// locking has failed. Neither this function nor the returned Releaser is
+// goroutine-safe.
+func New(fileName string) (r Releaser, existed bool, err error) {
+	if err = os.MkdirAll(filepath.Dir(fileName), 0755); err != nil {
+		return
+	}
+
+	_, err = os.Stat(fileName)
+	existed = err == nil
+
+	r, err = newLock(fileName)
+	return
+}
diff --git a/vendor/github.com/prometheus/prometheus/util/flock/flock_plan9.go b/vendor/github.com/prometheus/prometheus/util/flock/flock_plan9.go
new file mode 100644
index 000000000..004e85c0f
--- /dev/null
+++ b/vendor/github.com/prometheus/prometheus/util/flock/flock_plan9.go
@@ -0,0 +1,32 @@
+// Copyright 2016 The Prometheus Authors
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package flock
+
+import "os"
+
+type plan9Lock struct {
+	f *os.File
+}
+
+func (l *plan9Lock) Release() error {
+	return l.f.Close()
+}
+
+func newLock(fileName string) (Releaser, error) {
+	f, err := os.OpenFile(path, os.O_RDWR|os.O_CREATE, os.ModeExclusive|0644)
+	if err != nil {
+		return nil, err
+	}
+	return &plan9Lock{f}, nil
+}
diff --git a/vendor/github.com/prometheus/prometheus/util/flock/flock_solaris.go b/vendor/github.com/prometheus/prometheus/util/flock/flock_solaris.go
new file mode 100644
index 000000000..299fc8744
--- /dev/null
+++ b/vendor/github.com/prometheus/prometheus/util/flock/flock_solaris.go
@@ -0,0 +1,59 @@
+// Copyright 2016 The Prometheus Authors
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// +build solaris
+
+package flock
+
+import (
+	"os"
+	"syscall"
+)
+
+type unixLock struct {
+	f *os.File
+}
+
+func (l *unixLock) Release() error {
+	if err := l.set(false); err != nil {
+		return err
+	}
+	return l.f.Close()
+}
+
+func (l *unixLock) set(lock bool) error {
+	flock := syscall.Flock_t{
+		Type:   syscall.F_UNLCK,
+		Start:  0,
+		Len:    0,
+		Whence: 1,
+	}
+	if lock {
+		flock.Type = syscall.F_WRLCK
+	}
+	return syscall.FcntlFlock(l.f.Fd(), syscall.F_SETLK, &flock)
+}
+
+func newLock(fileName string) (Releaser, error) {
+	f, err := os.OpenFile(fileName, os.O_RDWR|os.O_CREATE, 0644)
+	if err != nil {
+		return nil, err
+	}
+	l := &unixLock{f}
+	err = l.set(true)
+	if err != nil {
+		f.Close()
+		return nil, err
+	}
+	return l, nil
+}
diff --git a/vendor/github.com/prometheus/prometheus/util/flock/flock_unix.go b/vendor/github.com/prometheus/prometheus/util/flock/flock_unix.go
new file mode 100644
index 000000000..7d71f8fc0
--- /dev/null
+++ b/vendor/github.com/prometheus/prometheus/util/flock/flock_unix.go
@@ -0,0 +1,54 @@
+// Copyright 2016 The Prometheus Authors
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// +build darwin dragonfly freebsd linux netbsd openbsd
+
+package flock
+
+import (
+	"os"
+	"syscall"
+)
+
+type unixLock struct {
+	f *os.File
+}
+
+func (l *unixLock) Release() error {
+	if err := l.set(false); err != nil {
+		return err
+	}
+	return l.f.Close()
+}
+
+func (l *unixLock) set(lock bool) error {
+	how := syscall.LOCK_UN
+	if lock {
+		how = syscall.LOCK_EX
+	}
+	return syscall.Flock(int(l.f.Fd()), how|syscall.LOCK_NB)
+}
+
+func newLock(fileName string) (Releaser, error) {
+	f, err := os.OpenFile(fileName, os.O_RDWR|os.O_CREATE, 0644)
+	if err != nil {
+		return nil, err
+	}
+	l := &unixLock{f}
+	err = l.set(true)
+	if err != nil {
+		f.Close()
+		return nil, err
+	}
+	return l, nil
+}
diff --git a/vendor/github.com/prometheus/prometheus/util/flock/flock_windows.go b/vendor/github.com/prometheus/prometheus/util/flock/flock_windows.go
new file mode 100644
index 000000000..bf7266f14
--- /dev/null
+++ b/vendor/github.com/prometheus/prometheus/util/flock/flock_windows.go
@@ -0,0 +1,36 @@
+// Copyright 2016 The Prometheus Authors
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package flock
+
+import "syscall"
+
+type windowsLock struct {
+	fd syscall.Handle
+}
+
+func (fl *windowsLock) Release() error {
+	return syscall.Close(fl.fd)
+}
+
+func newLock(fileName string) (Releaser, error) {
+	pathp, err := syscall.UTF16PtrFromString(fileName)
+	if err != nil {
+		return nil, err
+	}
+	fd, err := syscall.CreateFile(pathp, syscall.GENERIC_READ|syscall.GENERIC_WRITE, 0, nil, syscall.CREATE_ALWAYS, syscall.FILE_ATTRIBUTE_NORMAL, 0)
+	if err != nil {
+		return nil, err
+	}
+	return &windowsLock{fd}, nil
+}
diff --git a/vendor/github.com/prometheus/prometheus/util/stats/query_stats.go b/vendor/github.com/prometheus/prometheus/util/stats/query_stats.go
new file mode 100644
index 000000000..3d7ad0e83
--- /dev/null
+++ b/vendor/github.com/prometheus/prometheus/util/stats/query_stats.go
@@ -0,0 +1,48 @@
+// Copyright 2013 The Prometheus Authors
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package stats
+
+// QueryTiming identifies the code area or functionality in which time is spent
+// during a query.
+type QueryTiming int
+
+// Query timings.
+const (
+	TotalEvalTime QueryTiming = iota
+	ResultSortTime
+	QueryPreparationTime
+	InnerEvalTime
+	ResultAppendTime
+	ExecQueueTime
+)
+
+// Return a string representation of a QueryTiming identifier.
+func (s QueryTiming) String() string {
+	switch s {
+	case TotalEvalTime:
+		return "Total eval time"
+	case ResultSortTime:
+		return "Result sorting time"
+	case QueryPreparationTime:
+		return "Query preparation time"
+	case InnerEvalTime:
+		return "Inner eval time"
+	case ResultAppendTime:
+		return "Result append time"
+	case ExecQueueTime:
+		return "Exec queue wait time"
+	default:
+		return "Unknown query timing"
+	}
+}
diff --git a/vendor/github.com/prometheus/prometheus/util/stats/timer.go b/vendor/github.com/prometheus/prometheus/util/stats/timer.go
new file mode 100644
index 000000000..3d3ee7309
--- /dev/null
+++ b/vendor/github.com/prometheus/prometheus/util/stats/timer.go
@@ -0,0 +1,108 @@
+// Copyright 2013 The Prometheus Authors
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package stats
+
+import (
+	"bytes"
+	"fmt"
+	"sort"
+	"time"
+)
+
+// A Timer that can be started and stopped and accumulates the total time it
+// was running (the time between Start() and Stop()).
+type Timer struct {
+	name     fmt.Stringer
+	created  time.Time
+	start    time.Time
+	duration time.Duration
+}
+
+// Start the timer.
+func (t *Timer) Start() *Timer {
+	t.start = time.Now()
+	return t
+}
+
+// Stop the timer.
+func (t *Timer) Stop() {
+	t.duration += time.Since(t.start)
+}
+
+// ElapsedTime returns the time that passed since starting the timer.
+func (t *Timer) ElapsedTime() time.Duration {
+	return time.Since(t.start)
+}
+
+// Return a string representation of the Timer.
+func (t *Timer) String() string {
+	return fmt.Sprintf("%s: %s", t.name, t.duration)
+}
+
+// A TimerGroup represents a group of timers relevant to a single query.
+type TimerGroup struct {
+	timers map[fmt.Stringer]*Timer
+}
+
+// NewTimerGroup constructs a new TimerGroup.
+func NewTimerGroup() *TimerGroup {
+	return &TimerGroup{timers: map[fmt.Stringer]*Timer{}}
+}
+
+// GetTimer gets (and creates, if necessary) the Timer for a given code section.
+func (t *TimerGroup) GetTimer(name fmt.Stringer) *Timer {
+	if timer, exists := t.timers[name]; exists {
+		return timer
+	}
+	timer := &Timer{
+		name:    name,
+		created: time.Now(),
+	}
+	t.timers[name] = timer
+	return timer
+}
+
+// Timers is a slice of Timer pointers that implements Len and Swap from
+// sort.Interface.
+type Timers []*Timer
+
+type byCreationTimeSorter struct{ Timers }
+
+// Len implements sort.Interface.
+func (t Timers) Len() int {
+	return len(t)
+}
+
+// Swap implements sort.Interface.
+func (t Timers) Swap(i, j int) {
+	t[i], t[j] = t[j], t[i]
+}
+
+func (s byCreationTimeSorter) Less(i, j int) bool {
+	return s.Timers[i].created.Before(s.Timers[j].created)
+}
+
+// Return a string representation of a TimerGroup.
+func (t *TimerGroup) String() string {
+	timers := byCreationTimeSorter{}
+	for _, timer := range t.timers {
+		timers.Timers = append(timers.Timers, timer)
+	}
+	sort.Sort(timers)
+	result := &bytes.Buffer{}
+	for _, timer := range timers.Timers {
+		fmt.Fprintf(result, "%s\n", timer)
+	}
+	return result.String()
+}
diff --git a/vendor/github.com/prometheus/prometheus/util/strutil/quote.go b/vendor/github.com/prometheus/prometheus/util/strutil/quote.go
new file mode 100644
index 000000000..981ad473d
--- /dev/null
+++ b/vendor/github.com/prometheus/prometheus/util/strutil/quote.go
@@ -0,0 +1,223 @@
+// Copyright 2015 The Prometheus Authors
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package strutil
+
+import (
+	"errors"
+	"unicode/utf8"
+)
+
+// ErrSyntax indicates that a value does not have the right syntax for the target type.
+var ErrSyntax = errors.New("invalid syntax")
+
+// Unquote interprets s as a single-quoted, double-quoted, or backquoted
+// Prometheus query language string literal, returning the string value that s
+// quotes.
+//
+// NOTE: This function as well as the necessary helper functions below
+// (unquoteChar, contains, unhex) and associated tests have been adapted from
+// the corresponding functions in the "strconv" package of the Go standard
+// library to work for Prometheus-style strings. Go's special-casing for single
+// quotes was removed and single quoted strings are now treated the same as
+// double quoted ones.
+func Unquote(s string) (t string, err error) {
+	n := len(s)
+	if n < 2 {
+		return "", ErrSyntax
+	}
+	quote := s[0]
+	if quote != s[n-1] {
+		return "", ErrSyntax
+	}
+	s = s[1 : n-1]
+
+	if quote == '`' {
+		if contains(s, '`') {
+			return "", ErrSyntax
+		}
+		return s, nil
+	}
+	if quote != '"' && quote != '\'' {
+		return "", ErrSyntax
+	}
+	if contains(s, '\n') {
+		return "", ErrSyntax
+	}
+
+	// Is it trivial?  Avoid allocation.
+	if !contains(s, '\\') && !contains(s, quote) {
+		return s, nil
+	}
+
+	var runeTmp [utf8.UTFMax]byte
+	buf := make([]byte, 0, 3*len(s)/2) // Try to avoid more allocations.
+	for len(s) > 0 {
+		c, multibyte, ss, err := unquoteChar(s, quote)
+		if err != nil {
+			return "", err
+		}
+		s = ss
+		if c < utf8.RuneSelf || !multibyte {
+			buf = append(buf, byte(c))
+		} else {
+			n := utf8.EncodeRune(runeTmp[:], c)
+			buf = append(buf, runeTmp[:n]...)
+		}
+	}
+	return string(buf), nil
+}
+
+// unquoteChar decodes the first character or byte in the escaped string
+// or character literal represented by the string s.
+// It returns four values:
+//
+//	1) value, the decoded Unicode code point or byte value;
+//	2) multibyte, a boolean indicating whether the decoded character requires a multibyte UTF-8 representation;
+//	3) tail, the remainder of the string after the character; and
+//	4) an error that will be nil if the character is syntactically valid.
+//
+// The second argument, quote, specifies the type of literal being parsed
+// and therefore which escaped quote character is permitted.
+// If set to a single quote, it permits the sequence \' and disallows unescaped '.
+// If set to a double quote, it permits \" and disallows unescaped ".
+// If set to zero, it does not permit either escape and allows both quote characters to appear unescaped.
+func unquoteChar(s string, quote byte) (value rune, multibyte bool, tail string, err error) {
+	// easy cases
+	switch c := s[0]; {
+	case c == quote && (quote == '\'' || quote == '"'):
+		err = ErrSyntax
+		return
+	case c >= utf8.RuneSelf:
+		r, size := utf8.DecodeRuneInString(s)
+		return r, true, s[size:], nil
+	case c != '\\':
+		return rune(s[0]), false, s[1:], nil
+	}
+
+	// Hard case: c is backslash.
+	if len(s) <= 1 {
+		err = ErrSyntax
+		return
+	}
+	c := s[1]
+	s = s[2:]
+
+	switch c {
+	case 'a':
+		value = '\a'
+	case 'b':
+		value = '\b'
+	case 'f':
+		value = '\f'
+	case 'n':
+		value = '\n'
+	case 'r':
+		value = '\r'
+	case 't':
+		value = '\t'
+	case 'v':
+		value = '\v'
+	case 'x', 'u', 'U':
+		n := 0
+		switch c {
+		case 'x':
+			n = 2
+		case 'u':
+			n = 4
+		case 'U':
+			n = 8
+		}
+		var v rune
+		if len(s) < n {
+			err = ErrSyntax
+			return
+		}
+		for j := 0; j < n; j++ {
+			x, ok := unhex(s[j])
+			if !ok {
+				err = ErrSyntax
+				return
+			}
+			v = v<<4 | x
+		}
+		s = s[n:]
+		if c == 'x' {
+			// Single-byte string, possibly not UTF-8.
+			value = v
+			break
+		}
+		if v > utf8.MaxRune {
+			err = ErrSyntax
+			return
+		}
+		value = v
+		multibyte = true
+	case '0', '1', '2', '3', '4', '5', '6', '7':
+		v := rune(c) - '0'
+		if len(s) < 2 {
+			err = ErrSyntax
+			return
+		}
+		for j := 0; j < 2; j++ { // One digit already; two more.
+			x := rune(s[j]) - '0'
+			if x < 0 || x > 7 {
+				err = ErrSyntax
+				return
+			}
+			v = (v << 3) | x
+		}
+		s = s[2:]
+		if v > 255 {
+			err = ErrSyntax
+			return
+		}
+		value = v
+	case '\\':
+		value = '\\'
+	case '\'', '"':
+		if c != quote {
+			err = ErrSyntax
+			return
+		}
+		value = rune(c)
+	default:
+		err = ErrSyntax
+		return
+	}
+	tail = s
+	return
+}
+
+// contains reports whether the string contains the byte c.
+func contains(s string, c byte) bool {
+	for i := 0; i < len(s); i++ {
+		if s[i] == c {
+			return true
+		}
+	}
+	return false
+}
+
+func unhex(b byte) (v rune, ok bool) {
+	c := rune(b)
+	switch {
+	case '0' <= c && c <= '9':
+		return c - '0', true
+	case 'a' <= c && c <= 'f':
+		return c - 'a' + 10, true
+	case 'A' <= c && c <= 'F':
+		return c - 'A' + 10, true
+	}
+	return
+}
diff --git a/vendor/github.com/prometheus/prometheus/util/strutil/strconv.go b/vendor/github.com/prometheus/prometheus/util/strutil/strconv.go
new file mode 100644
index 000000000..3d96e4faf
--- /dev/null
+++ b/vendor/github.com/prometheus/prometheus/util/strutil/strconv.go
@@ -0,0 +1,44 @@
+// Copyright 2013 The Prometheus Authors
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package strutil
+
+import (
+	"fmt"
+	"net/url"
+	"regexp"
+)
+
+var (
+	invalidLabelCharRE = regexp.MustCompile(`[^a-zA-Z0-9_]`)
+)
+
+// TableLinkForExpression creates an escaped relative link to the table view of
+// the provided expression.
+func TableLinkForExpression(expr string) string {
+	escapedExpression := url.QueryEscape(expr)
+	return fmt.Sprintf("/graph?g0.expr=%s&g0.tab=1", escapedExpression)
+}
+
+// GraphLinkForExpression creates an escaped relative link to the graph view of
+// the provided expression.
+func GraphLinkForExpression(expr string) string {
+	escapedExpression := url.QueryEscape(expr)
+	return fmt.Sprintf("/graph?g0.expr=%s&g0.tab=0", escapedExpression)
+}
+
+// SanitizeLabelName replaces anything that doesn't match
+// client_label.LabelNameRE with an underscore.
+func SanitizeLabelName(name string) string {
+	return invalidLabelCharRE.ReplaceAllString(name, "_")
+}
diff --git a/vendor/github.com/prometheus/prometheus/util/testutil/directory.go b/vendor/github.com/prometheus/prometheus/util/testutil/directory.go
new file mode 100644
index 000000000..d3c9c926f
--- /dev/null
+++ b/vendor/github.com/prometheus/prometheus/util/testutil/directory.go
@@ -0,0 +1,129 @@
+// Copyright 2013 The Prometheus Authors
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package testutil
+
+import (
+	"io/ioutil"
+	"os"
+)
+
+const (
+	// The base directory used for test emissions, which instructs the operating
+	// system to use the default temporary directory as the base or TMPDIR
+	// environment variable.
+	defaultDirectory = ""
+
+	// NilCloser is a no-op Closer.
+	NilCloser = nilCloser(true)
+
+	// The number of times that a TemporaryDirectory will retry its removal
+	temporaryDirectoryRemoveRetries = 2
+)
+
+type (
+	// Closer is the interface that wraps the Close method.
+	Closer interface {
+		// Close reaps the underlying directory and its children. The directory
+		// could be deleted by its users already.
+		Close()
+	}
+
+	nilCloser bool
+
+	// TemporaryDirectory models a closeable path for transient POSIX disk
+	// activities.
+	TemporaryDirectory interface {
+		Closer
+
+		// Path returns the underlying path for access.
+		Path() string
+	}
+
+	// temporaryDirectory is kept as a private type due to private fields and
+	// their interactions.
+	temporaryDirectory struct {
+		path   string
+		tester T
+	}
+
+	callbackCloser struct {
+		fn func()
+	}
+
+	// T implements the needed methods of testing.TB so that we do not need
+	// to actually import testing (which has the side effect of adding all
+	// the test flags, which we do not want in non-test binaries even if
+	// they make use of these utilities for some reason).
+	T interface {
+		Fatal(args ...interface{})
+		Fatalf(format string, args ...interface{})
+	}
+)
+
+func (c nilCloser) Close() {
+}
+
+func (c callbackCloser) Close() {
+	c.fn()
+}
+
+// NewCallbackCloser returns a Closer that calls the provided function upon
+// closing.
+func NewCallbackCloser(fn func()) Closer {
+	return &callbackCloser{
+		fn: fn,
+	}
+}
+
+func (t temporaryDirectory) Close() {
+	retries := temporaryDirectoryRemoveRetries
+	err := os.RemoveAll(t.path)
+	for err != nil && retries > 0 {
+		switch {
+		case os.IsNotExist(err):
+			err = nil
+		default:
+			retries--
+			err = os.RemoveAll(t.path)
+		}
+	}
+	if err != nil {
+		t.tester.Fatal(err)
+	}
+}
+
+func (t temporaryDirectory) Path() string {
+	return t.path
+}
+
+// NewTemporaryDirectory creates a new temporary directory for transient POSIX
+// activities.
+func NewTemporaryDirectory(name string, t T) (handler TemporaryDirectory) {
+	var (
+		directory string
+		err       error
+	)
+
+	directory, err = ioutil.TempDir(defaultDirectory, name)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	handler = temporaryDirectory{
+		path:   directory,
+		tester: t,
+	}
+
+	return
+}
diff --git a/vendor/github.com/prometheus/prometheus/util/testutil/error.go b/vendor/github.com/prometheus/prometheus/util/testutil/error.go
new file mode 100644
index 000000000..a132abf8f
--- /dev/null
+++ b/vendor/github.com/prometheus/prometheus/util/testutil/error.go
@@ -0,0 +1,31 @@
+// Copyright 2013 The Prometheus Authors
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package testutil
+
+// ErrorEqual compares Go errors for equality.
+func ErrorEqual(left, right error) bool {
+	if left == right {
+		return true
+	}
+
+	if left != nil && right != nil {
+		if left.Error() == right.Error() {
+			return true
+		}
+
+		return false
+	}
+
+	return false
+}
diff --git a/vendor/github.com/prometheus/prometheus/util/testutil/roundtrip.go b/vendor/github.com/prometheus/prometheus/util/testutil/roundtrip.go
new file mode 100644
index 000000000..996d11f36
--- /dev/null
+++ b/vendor/github.com/prometheus/prometheus/util/testutil/roundtrip.go
@@ -0,0 +1,47 @@
+// Copyright 2017 The Prometheus Authors
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package testutil
+
+import (
+	"net/http"
+)
+
+type roundTrip struct {
+	theResponse *http.Response
+	theError    error
+}
+
+func (rt *roundTrip) RoundTrip(r *http.Request) (*http.Response, error) {
+	return rt.theResponse, rt.theError
+}
+
+type roundTripCheckRequest struct {
+	checkRequest func(*http.Request)
+	roundTrip
+}
+
+func (rt *roundTripCheckRequest) RoundTrip(r *http.Request) (*http.Response, error) {
+	rt.checkRequest(r)
+	return rt.theResponse, rt.theError
+}
+
+// NewRoundTripCheckRequest creates a new instance of a type that implements http.RoundTripper,
+// which before returning theResponse and theError, executes checkRequest against a http.Request.
+func NewRoundTripCheckRequest(checkRequest func(*http.Request), theResponse *http.Response, theError error) http.RoundTripper {
+	return &roundTripCheckRequest{
+		checkRequest: checkRequest,
+		roundTrip: roundTrip{
+			theResponse: theResponse,
+			theError:    theError}}
+}
diff --git a/vendor/github.com/syndtr/goleveldb/LICENSE b/vendor/github.com/syndtr/goleveldb/LICENSE
new file mode 100644
index 000000000..4a772d1ab
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/LICENSE
@@ -0,0 +1,24 @@
+Copyright 2012 Suryandaru Triandana <syndtr@gmail.com>
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+    * Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/batch.go b/vendor/github.com/syndtr/goleveldb/leveldb/batch.go
new file mode 100644
index 000000000..225920002
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/batch.go
@@ -0,0 +1,349 @@
+// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package leveldb
+
+import (
+	"encoding/binary"
+	"fmt"
+	"io"
+
+	"github.com/syndtr/goleveldb/leveldb/errors"
+	"github.com/syndtr/goleveldb/leveldb/memdb"
+	"github.com/syndtr/goleveldb/leveldb/storage"
+)
+
+// ErrBatchCorrupted records reason of batch corruption. This error will be
+// wrapped with errors.ErrCorrupted.
+type ErrBatchCorrupted struct {
+	Reason string
+}
+
+func (e *ErrBatchCorrupted) Error() string {
+	return fmt.Sprintf("leveldb: batch corrupted: %s", e.Reason)
+}
+
+func newErrBatchCorrupted(reason string) error {
+	return errors.NewErrCorrupted(storage.FileDesc{}, &ErrBatchCorrupted{reason})
+}
+
+const (
+	batchHeaderLen = 8 + 4
+	batchGrowRec   = 3000
+	batchBufioSize = 16
+)
+
+// BatchReplay wraps basic batch operations.
+type BatchReplay interface {
+	Put(key, value []byte)
+	Delete(key []byte)
+}
+
+type batchIndex struct {
+	keyType            keyType
+	keyPos, keyLen     int
+	valuePos, valueLen int
+}
+
+func (index batchIndex) k(data []byte) []byte {
+	return data[index.keyPos : index.keyPos+index.keyLen]
+}
+
+func (index batchIndex) v(data []byte) []byte {
+	if index.valueLen != 0 {
+		return data[index.valuePos : index.valuePos+index.valueLen]
+	}
+	return nil
+}
+
+func (index batchIndex) kv(data []byte) (key, value []byte) {
+	return index.k(data), index.v(data)
+}
+
+// Batch is a write batch.
+type Batch struct {
+	data  []byte
+	index []batchIndex
+
+	// internalLen is sums of key/value pair length plus 8-bytes internal key.
+	internalLen int
+}
+
+func (b *Batch) grow(n int) {
+	o := len(b.data)
+	if cap(b.data)-o < n {
+		div := 1
+		if len(b.index) > batchGrowRec {
+			div = len(b.index) / batchGrowRec
+		}
+		ndata := make([]byte, o, o+n+o/div)
+		copy(ndata, b.data)
+		b.data = ndata
+	}
+}
+
+func (b *Batch) appendRec(kt keyType, key, value []byte) {
+	n := 1 + binary.MaxVarintLen32 + len(key)
+	if kt == keyTypeVal {
+		n += binary.MaxVarintLen32 + len(value)
+	}
+	b.grow(n)
+	index := batchIndex{keyType: kt}
+	o := len(b.data)
+	data := b.data[:o+n]
+	data[o] = byte(kt)
+	o++
+	o += binary.PutUvarint(data[o:], uint64(len(key)))
+	index.keyPos = o
+	index.keyLen = len(key)
+	o += copy(data[o:], key)
+	if kt == keyTypeVal {
+		o += binary.PutUvarint(data[o:], uint64(len(value)))
+		index.valuePos = o
+		index.valueLen = len(value)
+		o += copy(data[o:], value)
+	}
+	b.data = data[:o]
+	b.index = append(b.index, index)
+	b.internalLen += index.keyLen + index.valueLen + 8
+}
+
+// Put appends 'put operation' of the given key/value pair to the batch.
+// It is safe to modify the contents of the argument after Put returns but not
+// before.
+func (b *Batch) Put(key, value []byte) {
+	b.appendRec(keyTypeVal, key, value)
+}
+
+// Delete appends 'delete operation' of the given key to the batch.
+// It is safe to modify the contents of the argument after Delete returns but
+// not before.
+func (b *Batch) Delete(key []byte) {
+	b.appendRec(keyTypeDel, key, nil)
+}
+
+// Dump dumps batch contents. The returned slice can be loaded into the
+// batch using Load method.
+// The returned slice is not its own copy, so the contents should not be
+// modified.
+func (b *Batch) Dump() []byte {
+	return b.data
+}
+
+// Load loads given slice into the batch. Previous contents of the batch
+// will be discarded.
+// The given slice will not be copied and will be used as batch buffer, so
+// it is not safe to modify the contents of the slice.
+func (b *Batch) Load(data []byte) error {
+	return b.decode(data, -1)
+}
+
+// Replay replays batch contents.
+func (b *Batch) Replay(r BatchReplay) error {
+	for _, index := range b.index {
+		switch index.keyType {
+		case keyTypeVal:
+			r.Put(index.k(b.data), index.v(b.data))
+		case keyTypeDel:
+			r.Delete(index.k(b.data))
+		}
+	}
+	return nil
+}
+
+// Len returns number of records in the batch.
+func (b *Batch) Len() int {
+	return len(b.index)
+}
+
+// Reset resets the batch.
+func (b *Batch) Reset() {
+	b.data = b.data[:0]
+	b.index = b.index[:0]
+	b.internalLen = 0
+}
+
+func (b *Batch) replayInternal(fn func(i int, kt keyType, k, v []byte) error) error {
+	for i, index := range b.index {
+		if err := fn(i, index.keyType, index.k(b.data), index.v(b.data)); err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+func (b *Batch) append(p *Batch) {
+	ob := len(b.data)
+	oi := len(b.index)
+	b.data = append(b.data, p.data...)
+	b.index = append(b.index, p.index...)
+	b.internalLen += p.internalLen
+
+	// Updating index offset.
+	if ob != 0 {
+		for ; oi < len(b.index); oi++ {
+			index := &b.index[oi]
+			index.keyPos += ob
+			if index.valueLen != 0 {
+				index.valuePos += ob
+			}
+		}
+	}
+}
+
+func (b *Batch) decode(data []byte, expectedLen int) error {
+	b.data = data
+	b.index = b.index[:0]
+	b.internalLen = 0
+	err := decodeBatch(data, func(i int, index batchIndex) error {
+		b.index = append(b.index, index)
+		b.internalLen += index.keyLen + index.valueLen + 8
+		return nil
+	})
+	if err != nil {
+		return err
+	}
+	if expectedLen >= 0 && len(b.index) != expectedLen {
+		return newErrBatchCorrupted(fmt.Sprintf("invalid records length: %d vs %d", expectedLen, len(b.index)))
+	}
+	return nil
+}
+
+func (b *Batch) putMem(seq uint64, mdb *memdb.DB) error {
+	var ik []byte
+	for i, index := range b.index {
+		ik = makeInternalKey(ik, index.k(b.data), seq+uint64(i), index.keyType)
+		if err := mdb.Put(ik, index.v(b.data)); err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+func (b *Batch) revertMem(seq uint64, mdb *memdb.DB) error {
+	var ik []byte
+	for i, index := range b.index {
+		ik = makeInternalKey(ik, index.k(b.data), seq+uint64(i), index.keyType)
+		if err := mdb.Delete(ik); err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+func newBatch() interface{} {
+	return &Batch{}
+}
+
+func decodeBatch(data []byte, fn func(i int, index batchIndex) error) error {
+	var index batchIndex
+	for i, o := 0, 0; o < len(data); i++ {
+		// Key type.
+		index.keyType = keyType(data[o])
+		if index.keyType > keyTypeVal {
+			return newErrBatchCorrupted(fmt.Sprintf("bad record: invalid type %#x", uint(index.keyType)))
+		}
+		o++
+
+		// Key.
+		x, n := binary.Uvarint(data[o:])
+		o += n
+		if n <= 0 || o+int(x) > len(data) {
+			return newErrBatchCorrupted("bad record: invalid key length")
+		}
+		index.keyPos = o
+		index.keyLen = int(x)
+		o += index.keyLen
+
+		// Value.
+		if index.keyType == keyTypeVal {
+			x, n = binary.Uvarint(data[o:])
+			o += n
+			if n <= 0 || o+int(x) > len(data) {
+				return newErrBatchCorrupted("bad record: invalid value length")
+			}
+			index.valuePos = o
+			index.valueLen = int(x)
+			o += index.valueLen
+		} else {
+			index.valuePos = 0
+			index.valueLen = 0
+		}
+
+		if err := fn(i, index); err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+func decodeBatchToMem(data []byte, expectSeq uint64, mdb *memdb.DB) (seq uint64, batchLen int, err error) {
+	seq, batchLen, err = decodeBatchHeader(data)
+	if err != nil {
+		return 0, 0, err
+	}
+	if seq < expectSeq {
+		return 0, 0, newErrBatchCorrupted("invalid sequence number")
+	}
+	data = data[batchHeaderLen:]
+	var ik []byte
+	var decodedLen int
+	err = decodeBatch(data, func(i int, index batchIndex) error {
+		if i >= batchLen {
+			return newErrBatchCorrupted("invalid records length")
+		}
+		ik = makeInternalKey(ik, index.k(data), seq+uint64(i), index.keyType)
+		if err := mdb.Put(ik, index.v(data)); err != nil {
+			return err
+		}
+		decodedLen++
+		return nil
+	})
+	if err == nil && decodedLen != batchLen {
+		err = newErrBatchCorrupted(fmt.Sprintf("invalid records length: %d vs %d", batchLen, decodedLen))
+	}
+	return
+}
+
+func encodeBatchHeader(dst []byte, seq uint64, batchLen int) []byte {
+	dst = ensureBuffer(dst, batchHeaderLen)
+	binary.LittleEndian.PutUint64(dst, seq)
+	binary.LittleEndian.PutUint32(dst[8:], uint32(batchLen))
+	return dst
+}
+
+func decodeBatchHeader(data []byte) (seq uint64, batchLen int, err error) {
+	if len(data) < batchHeaderLen {
+		return 0, 0, newErrBatchCorrupted("too short")
+	}
+
+	seq = binary.LittleEndian.Uint64(data)
+	batchLen = int(binary.LittleEndian.Uint32(data[8:]))
+	if batchLen < 0 {
+		return 0, 0, newErrBatchCorrupted("invalid records length")
+	}
+	return
+}
+
+func batchesLen(batches []*Batch) int {
+	batchLen := 0
+	for _, batch := range batches {
+		batchLen += batch.Len()
+	}
+	return batchLen
+}
+
+func writeBatchesWithHeader(wr io.Writer, batches []*Batch, seq uint64) error {
+	if _, err := wr.Write(encodeBatchHeader(nil, seq, batchesLen(batches))); err != nil {
+		return err
+	}
+	for _, batch := range batches {
+		if _, err := wr.Write(batch.data); err != nil {
+			return err
+		}
+	}
+	return nil
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/cache/cache.go b/vendor/github.com/syndtr/goleveldb/leveldb/cache/cache.go
new file mode 100644
index 000000000..c36ad3235
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/cache/cache.go
@@ -0,0 +1,704 @@
+// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Package cache provides interface and implementation of a cache algorithms.
+package cache
+
+import (
+	"sync"
+	"sync/atomic"
+	"unsafe"
+
+	"github.com/syndtr/goleveldb/leveldb/util"
+)
+
+// Cacher provides interface to implements a caching functionality.
+// An implementation must be safe for concurrent use.
+type Cacher interface {
+	// Capacity returns cache capacity.
+	Capacity() int
+
+	// SetCapacity sets cache capacity.
+	SetCapacity(capacity int)
+
+	// Promote promotes the 'cache node'.
+	Promote(n *Node)
+
+	// Ban evicts the 'cache node' and prevent subsequent 'promote'.
+	Ban(n *Node)
+
+	// Evict evicts the 'cache node'.
+	Evict(n *Node)
+
+	// EvictNS evicts 'cache node' with the given namespace.
+	EvictNS(ns uint64)
+
+	// EvictAll evicts all 'cache node'.
+	EvictAll()
+
+	// Close closes the 'cache tree'
+	Close() error
+}
+
+// Value is a 'cacheable object'. It may implements util.Releaser, if
+// so the the Release method will be called once object is released.
+type Value interface{}
+
+// NamespaceGetter provides convenient wrapper for namespace.
+type NamespaceGetter struct {
+	Cache *Cache
+	NS    uint64
+}
+
+// Get simply calls Cache.Get() method.
+func (g *NamespaceGetter) Get(key uint64, setFunc func() (size int, value Value)) *Handle {
+	return g.Cache.Get(g.NS, key, setFunc)
+}
+
+// The hash tables implementation is based on:
+// "Dynamic-Sized Nonblocking Hash Tables", by Yujie Liu,
+// Kunlong Zhang, and Michael Spear.
+// ACM Symposium on Principles of Distributed Computing, Jul 2014.
+
+const (
+	mInitialSize           = 1 << 4
+	mOverflowThreshold     = 1 << 5
+	mOverflowGrowThreshold = 1 << 7
+)
+
+type mBucket struct {
+	mu     sync.Mutex
+	node   []*Node
+	frozen bool
+}
+
+func (b *mBucket) freeze() []*Node {
+	b.mu.Lock()
+	defer b.mu.Unlock()
+	if !b.frozen {
+		b.frozen = true
+	}
+	return b.node
+}
+
+func (b *mBucket) get(r *Cache, h *mNode, hash uint32, ns, key uint64, noset bool) (done, added bool, n *Node) {
+	b.mu.Lock()
+
+	if b.frozen {
+		b.mu.Unlock()
+		return
+	}
+
+	// Scan the node.
+	for _, n := range b.node {
+		if n.hash == hash && n.ns == ns && n.key == key {
+			atomic.AddInt32(&n.ref, 1)
+			b.mu.Unlock()
+			return true, false, n
+		}
+	}
+
+	// Get only.
+	if noset {
+		b.mu.Unlock()
+		return true, false, nil
+	}
+
+	// Create node.
+	n = &Node{
+		r:    r,
+		hash: hash,
+		ns:   ns,
+		key:  key,
+		ref:  1,
+	}
+	// Add node to bucket.
+	b.node = append(b.node, n)
+	bLen := len(b.node)
+	b.mu.Unlock()
+
+	// Update counter.
+	grow := atomic.AddInt32(&r.nodes, 1) >= h.growThreshold
+	if bLen > mOverflowThreshold {
+		grow = grow || atomic.AddInt32(&h.overflow, 1) >= mOverflowGrowThreshold
+	}
+
+	// Grow.
+	if grow && atomic.CompareAndSwapInt32(&h.resizeInProgess, 0, 1) {
+		nhLen := len(h.buckets) << 1
+		nh := &mNode{
+			buckets:         make([]unsafe.Pointer, nhLen),
+			mask:            uint32(nhLen) - 1,
+			pred:            unsafe.Pointer(h),
+			growThreshold:   int32(nhLen * mOverflowThreshold),
+			shrinkThreshold: int32(nhLen >> 1),
+		}
+		ok := atomic.CompareAndSwapPointer(&r.mHead, unsafe.Pointer(h), unsafe.Pointer(nh))
+		if !ok {
+			panic("BUG: failed swapping head")
+		}
+		go nh.initBuckets()
+	}
+
+	return true, true, n
+}
+
+func (b *mBucket) delete(r *Cache, h *mNode, hash uint32, ns, key uint64) (done, deleted bool) {
+	b.mu.Lock()
+
+	if b.frozen {
+		b.mu.Unlock()
+		return
+	}
+
+	// Scan the node.
+	var (
+		n    *Node
+		bLen int
+	)
+	for i := range b.node {
+		n = b.node[i]
+		if n.ns == ns && n.key == key {
+			if atomic.LoadInt32(&n.ref) == 0 {
+				deleted = true
+
+				// Call releaser.
+				if n.value != nil {
+					if r, ok := n.value.(util.Releaser); ok {
+						r.Release()
+					}
+					n.value = nil
+				}
+
+				// Remove node from bucket.
+				b.node = append(b.node[:i], b.node[i+1:]...)
+				bLen = len(b.node)
+			}
+			break
+		}
+	}
+	b.mu.Unlock()
+
+	if deleted {
+		// Call OnDel.
+		for _, f := range n.onDel {
+			f()
+		}
+
+		// Update counter.
+		atomic.AddInt32(&r.size, int32(n.size)*-1)
+		shrink := atomic.AddInt32(&r.nodes, -1) < h.shrinkThreshold
+		if bLen >= mOverflowThreshold {
+			atomic.AddInt32(&h.overflow, -1)
+		}
+
+		// Shrink.
+		if shrink && len(h.buckets) > mInitialSize && atomic.CompareAndSwapInt32(&h.resizeInProgess, 0, 1) {
+			nhLen := len(h.buckets) >> 1
+			nh := &mNode{
+				buckets:         make([]unsafe.Pointer, nhLen),
+				mask:            uint32(nhLen) - 1,
+				pred:            unsafe.Pointer(h),
+				growThreshold:   int32(nhLen * mOverflowThreshold),
+				shrinkThreshold: int32(nhLen >> 1),
+			}
+			ok := atomic.CompareAndSwapPointer(&r.mHead, unsafe.Pointer(h), unsafe.Pointer(nh))
+			if !ok {
+				panic("BUG: failed swapping head")
+			}
+			go nh.initBuckets()
+		}
+	}
+
+	return true, deleted
+}
+
+type mNode struct {
+	buckets         []unsafe.Pointer // []*mBucket
+	mask            uint32
+	pred            unsafe.Pointer // *mNode
+	resizeInProgess int32
+
+	overflow        int32
+	growThreshold   int32
+	shrinkThreshold int32
+}
+
+func (n *mNode) initBucket(i uint32) *mBucket {
+	if b := (*mBucket)(atomic.LoadPointer(&n.buckets[i])); b != nil {
+		return b
+	}
+
+	p := (*mNode)(atomic.LoadPointer(&n.pred))
+	if p != nil {
+		var node []*Node
+		if n.mask > p.mask {
+			// Grow.
+			pb := (*mBucket)(atomic.LoadPointer(&p.buckets[i&p.mask]))
+			if pb == nil {
+				pb = p.initBucket(i & p.mask)
+			}
+			m := pb.freeze()
+			// Split nodes.
+			for _, x := range m {
+				if x.hash&n.mask == i {
+					node = append(node, x)
+				}
+			}
+		} else {
+			// Shrink.
+			pb0 := (*mBucket)(atomic.LoadPointer(&p.buckets[i]))
+			if pb0 == nil {
+				pb0 = p.initBucket(i)
+			}
+			pb1 := (*mBucket)(atomic.LoadPointer(&p.buckets[i+uint32(len(n.buckets))]))
+			if pb1 == nil {
+				pb1 = p.initBucket(i + uint32(len(n.buckets)))
+			}
+			m0 := pb0.freeze()
+			m1 := pb1.freeze()
+			// Merge nodes.
+			node = make([]*Node, 0, len(m0)+len(m1))
+			node = append(node, m0...)
+			node = append(node, m1...)
+		}
+		b := &mBucket{node: node}
+		if atomic.CompareAndSwapPointer(&n.buckets[i], nil, unsafe.Pointer(b)) {
+			if len(node) > mOverflowThreshold {
+				atomic.AddInt32(&n.overflow, int32(len(node)-mOverflowThreshold))
+			}
+			return b
+		}
+	}
+
+	return (*mBucket)(atomic.LoadPointer(&n.buckets[i]))
+}
+
+func (n *mNode) initBuckets() {
+	for i := range n.buckets {
+		n.initBucket(uint32(i))
+	}
+	atomic.StorePointer(&n.pred, nil)
+}
+
+// Cache is a 'cache map'.
+type Cache struct {
+	mu     sync.RWMutex
+	mHead  unsafe.Pointer // *mNode
+	nodes  int32
+	size   int32
+	cacher Cacher
+	closed bool
+}
+
+// NewCache creates a new 'cache map'. The cacher is optional and
+// may be nil.
+func NewCache(cacher Cacher) *Cache {
+	h := &mNode{
+		buckets:         make([]unsafe.Pointer, mInitialSize),
+		mask:            mInitialSize - 1,
+		growThreshold:   int32(mInitialSize * mOverflowThreshold),
+		shrinkThreshold: 0,
+	}
+	for i := range h.buckets {
+		h.buckets[i] = unsafe.Pointer(&mBucket{})
+	}
+	r := &Cache{
+		mHead:  unsafe.Pointer(h),
+		cacher: cacher,
+	}
+	return r
+}
+
+func (r *Cache) getBucket(hash uint32) (*mNode, *mBucket) {
+	h := (*mNode)(atomic.LoadPointer(&r.mHead))
+	i := hash & h.mask
+	b := (*mBucket)(atomic.LoadPointer(&h.buckets[i]))
+	if b == nil {
+		b = h.initBucket(i)
+	}
+	return h, b
+}
+
+func (r *Cache) delete(n *Node) bool {
+	for {
+		h, b := r.getBucket(n.hash)
+		done, deleted := b.delete(r, h, n.hash, n.ns, n.key)
+		if done {
+			return deleted
+		}
+	}
+}
+
+// Nodes returns number of 'cache node' in the map.
+func (r *Cache) Nodes() int {
+	return int(atomic.LoadInt32(&r.nodes))
+}
+
+// Size returns sums of 'cache node' size in the map.
+func (r *Cache) Size() int {
+	return int(atomic.LoadInt32(&r.size))
+}
+
+// Capacity returns cache capacity.
+func (r *Cache) Capacity() int {
+	if r.cacher == nil {
+		return 0
+	}
+	return r.cacher.Capacity()
+}
+
+// SetCapacity sets cache capacity.
+func (r *Cache) SetCapacity(capacity int) {
+	if r.cacher != nil {
+		r.cacher.SetCapacity(capacity)
+	}
+}
+
+// Get gets 'cache node' with the given namespace and key.
+// If cache node is not found and setFunc is not nil, Get will atomically creates
+// the 'cache node' by calling setFunc. Otherwise Get will returns nil.
+//
+// The returned 'cache handle' should be released after use by calling Release
+// method.
+func (r *Cache) Get(ns, key uint64, setFunc func() (size int, value Value)) *Handle {
+	r.mu.RLock()
+	defer r.mu.RUnlock()
+	if r.closed {
+		return nil
+	}
+
+	hash := murmur32(ns, key, 0xf00)
+	for {
+		h, b := r.getBucket(hash)
+		done, _, n := b.get(r, h, hash, ns, key, setFunc == nil)
+		if done {
+			if n != nil {
+				n.mu.Lock()
+				if n.value == nil {
+					if setFunc == nil {
+						n.mu.Unlock()
+						n.unref()
+						return nil
+					}
+
+					n.size, n.value = setFunc()
+					if n.value == nil {
+						n.size = 0
+						n.mu.Unlock()
+						n.unref()
+						return nil
+					}
+					atomic.AddInt32(&r.size, int32(n.size))
+				}
+				n.mu.Unlock()
+				if r.cacher != nil {
+					r.cacher.Promote(n)
+				}
+				return &Handle{unsafe.Pointer(n)}
+			}
+
+			break
+		}
+	}
+	return nil
+}
+
+// Delete removes and ban 'cache node' with the given namespace and key.
+// A banned 'cache node' will never inserted into the 'cache tree'. Ban
+// only attributed to the particular 'cache node', so when a 'cache node'
+// is recreated it will not be banned.
+//
+// If onDel is not nil, then it will be executed if such 'cache node'
+// doesn't exist or once the 'cache node' is released.
+//
+// Delete return true is such 'cache node' exist.
+func (r *Cache) Delete(ns, key uint64, onDel func()) bool {
+	r.mu.RLock()
+	defer r.mu.RUnlock()
+	if r.closed {
+		return false
+	}
+
+	hash := murmur32(ns, key, 0xf00)
+	for {
+		h, b := r.getBucket(hash)
+		done, _, n := b.get(r, h, hash, ns, key, true)
+		if done {
+			if n != nil {
+				if onDel != nil {
+					n.mu.Lock()
+					n.onDel = append(n.onDel, onDel)
+					n.mu.Unlock()
+				}
+				if r.cacher != nil {
+					r.cacher.Ban(n)
+				}
+				n.unref()
+				return true
+			}
+
+			break
+		}
+	}
+
+	if onDel != nil {
+		onDel()
+	}
+
+	return false
+}
+
+// Evict evicts 'cache node' with the given namespace and key. This will
+// simply call Cacher.Evict.
+//
+// Evict return true is such 'cache node' exist.
+func (r *Cache) Evict(ns, key uint64) bool {
+	r.mu.RLock()
+	defer r.mu.RUnlock()
+	if r.closed {
+		return false
+	}
+
+	hash := murmur32(ns, key, 0xf00)
+	for {
+		h, b := r.getBucket(hash)
+		done, _, n := b.get(r, h, hash, ns, key, true)
+		if done {
+			if n != nil {
+				if r.cacher != nil {
+					r.cacher.Evict(n)
+				}
+				n.unref()
+				return true
+			}
+
+			break
+		}
+	}
+
+	return false
+}
+
+// EvictNS evicts 'cache node' with the given namespace. This will
+// simply call Cacher.EvictNS.
+func (r *Cache) EvictNS(ns uint64) {
+	r.mu.RLock()
+	defer r.mu.RUnlock()
+	if r.closed {
+		return
+	}
+
+	if r.cacher != nil {
+		r.cacher.EvictNS(ns)
+	}
+}
+
+// EvictAll evicts all 'cache node'. This will simply call Cacher.EvictAll.
+func (r *Cache) EvictAll() {
+	r.mu.RLock()
+	defer r.mu.RUnlock()
+	if r.closed {
+		return
+	}
+
+	if r.cacher != nil {
+		r.cacher.EvictAll()
+	}
+}
+
+// Close closes the 'cache map' and forcefully releases all 'cache node'.
+func (r *Cache) Close() error {
+	r.mu.Lock()
+	if !r.closed {
+		r.closed = true
+
+		h := (*mNode)(r.mHead)
+		h.initBuckets()
+
+		for i := range h.buckets {
+			b := (*mBucket)(h.buckets[i])
+			for _, n := range b.node {
+				// Call releaser.
+				if n.value != nil {
+					if r, ok := n.value.(util.Releaser); ok {
+						r.Release()
+					}
+					n.value = nil
+				}
+
+				// Call OnDel.
+				for _, f := range n.onDel {
+					f()
+				}
+				n.onDel = nil
+			}
+		}
+	}
+	r.mu.Unlock()
+
+	// Avoid deadlock.
+	if r.cacher != nil {
+		if err := r.cacher.Close(); err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+// CloseWeak closes the 'cache map' and evict all 'cache node' from cacher, but
+// unlike Close it doesn't forcefully releases 'cache node'.
+func (r *Cache) CloseWeak() error {
+	r.mu.Lock()
+	if !r.closed {
+		r.closed = true
+	}
+	r.mu.Unlock()
+
+	// Avoid deadlock.
+	if r.cacher != nil {
+		r.cacher.EvictAll()
+		if err := r.cacher.Close(); err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+// Node is a 'cache node'.
+type Node struct {
+	r *Cache
+
+	hash    uint32
+	ns, key uint64
+
+	mu    sync.Mutex
+	size  int
+	value Value
+
+	ref   int32
+	onDel []func()
+
+	CacheData unsafe.Pointer
+}
+
+// NS returns this 'cache node' namespace.
+func (n *Node) NS() uint64 {
+	return n.ns
+}
+
+// Key returns this 'cache node' key.
+func (n *Node) Key() uint64 {
+	return n.key
+}
+
+// Size returns this 'cache node' size.
+func (n *Node) Size() int {
+	return n.size
+}
+
+// Value returns this 'cache node' value.
+func (n *Node) Value() Value {
+	return n.value
+}
+
+// Ref returns this 'cache node' ref counter.
+func (n *Node) Ref() int32 {
+	return atomic.LoadInt32(&n.ref)
+}
+
+// GetHandle returns an handle for this 'cache node'.
+func (n *Node) GetHandle() *Handle {
+	if atomic.AddInt32(&n.ref, 1) <= 1 {
+		panic("BUG: Node.GetHandle on zero ref")
+	}
+	return &Handle{unsafe.Pointer(n)}
+}
+
+func (n *Node) unref() {
+	if atomic.AddInt32(&n.ref, -1) == 0 {
+		n.r.delete(n)
+	}
+}
+
+func (n *Node) unrefLocked() {
+	if atomic.AddInt32(&n.ref, -1) == 0 {
+		n.r.mu.RLock()
+		if !n.r.closed {
+			n.r.delete(n)
+		}
+		n.r.mu.RUnlock()
+	}
+}
+
+// Handle is a 'cache handle' of a 'cache node'.
+type Handle struct {
+	n unsafe.Pointer // *Node
+}
+
+// Value returns the value of the 'cache node'.
+func (h *Handle) Value() Value {
+	n := (*Node)(atomic.LoadPointer(&h.n))
+	if n != nil {
+		return n.value
+	}
+	return nil
+}
+
+// Release releases this 'cache handle'.
+// It is safe to call release multiple times.
+func (h *Handle) Release() {
+	nPtr := atomic.LoadPointer(&h.n)
+	if nPtr != nil && atomic.CompareAndSwapPointer(&h.n, nPtr, nil) {
+		n := (*Node)(nPtr)
+		n.unrefLocked()
+	}
+}
+
+func murmur32(ns, key uint64, seed uint32) uint32 {
+	const (
+		m = uint32(0x5bd1e995)
+		r = 24
+	)
+
+	k1 := uint32(ns >> 32)
+	k2 := uint32(ns)
+	k3 := uint32(key >> 32)
+	k4 := uint32(key)
+
+	k1 *= m
+	k1 ^= k1 >> r
+	k1 *= m
+
+	k2 *= m
+	k2 ^= k2 >> r
+	k2 *= m
+
+	k3 *= m
+	k3 ^= k3 >> r
+	k3 *= m
+
+	k4 *= m
+	k4 ^= k4 >> r
+	k4 *= m
+
+	h := seed
+
+	h *= m
+	h ^= k1
+	h *= m
+	h ^= k2
+	h *= m
+	h ^= k3
+	h *= m
+	h ^= k4
+
+	h ^= h >> 13
+	h *= m
+	h ^= h >> 15
+
+	return h
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/cache/lru.go b/vendor/github.com/syndtr/goleveldb/leveldb/cache/lru.go
new file mode 100644
index 000000000..d9a84cde1
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/cache/lru.go
@@ -0,0 +1,195 @@
+// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package cache
+
+import (
+	"sync"
+	"unsafe"
+)
+
+type lruNode struct {
+	n   *Node
+	h   *Handle
+	ban bool
+
+	next, prev *lruNode
+}
+
+func (n *lruNode) insert(at *lruNode) {
+	x := at.next
+	at.next = n
+	n.prev = at
+	n.next = x
+	x.prev = n
+}
+
+func (n *lruNode) remove() {
+	if n.prev != nil {
+		n.prev.next = n.next
+		n.next.prev = n.prev
+		n.prev = nil
+		n.next = nil
+	} else {
+		panic("BUG: removing removed node")
+	}
+}
+
+type lru struct {
+	mu       sync.Mutex
+	capacity int
+	used     int
+	recent   lruNode
+}
+
+func (r *lru) reset() {
+	r.recent.next = &r.recent
+	r.recent.prev = &r.recent
+	r.used = 0
+}
+
+func (r *lru) Capacity() int {
+	r.mu.Lock()
+	defer r.mu.Unlock()
+	return r.capacity
+}
+
+func (r *lru) SetCapacity(capacity int) {
+	var evicted []*lruNode
+
+	r.mu.Lock()
+	r.capacity = capacity
+	for r.used > r.capacity {
+		rn := r.recent.prev
+		if rn == nil {
+			panic("BUG: invalid LRU used or capacity counter")
+		}
+		rn.remove()
+		rn.n.CacheData = nil
+		r.used -= rn.n.Size()
+		evicted = append(evicted, rn)
+	}
+	r.mu.Unlock()
+
+	for _, rn := range evicted {
+		rn.h.Release()
+	}
+}
+
+func (r *lru) Promote(n *Node) {
+	var evicted []*lruNode
+
+	r.mu.Lock()
+	if n.CacheData == nil {
+		if n.Size() <= r.capacity {
+			rn := &lruNode{n: n, h: n.GetHandle()}
+			rn.insert(&r.recent)
+			n.CacheData = unsafe.Pointer(rn)
+			r.used += n.Size()
+
+			for r.used > r.capacity {
+				rn := r.recent.prev
+				if rn == nil {
+					panic("BUG: invalid LRU used or capacity counter")
+				}
+				rn.remove()
+				rn.n.CacheData = nil
+				r.used -= rn.n.Size()
+				evicted = append(evicted, rn)
+			}
+		}
+	} else {
+		rn := (*lruNode)(n.CacheData)
+		if !rn.ban {
+			rn.remove()
+			rn.insert(&r.recent)
+		}
+	}
+	r.mu.Unlock()
+
+	for _, rn := range evicted {
+		rn.h.Release()
+	}
+}
+
+func (r *lru) Ban(n *Node) {
+	r.mu.Lock()
+	if n.CacheData == nil {
+		n.CacheData = unsafe.Pointer(&lruNode{n: n, ban: true})
+	} else {
+		rn := (*lruNode)(n.CacheData)
+		if !rn.ban {
+			rn.remove()
+			rn.ban = true
+			r.used -= rn.n.Size()
+			r.mu.Unlock()
+
+			rn.h.Release()
+			rn.h = nil
+			return
+		}
+	}
+	r.mu.Unlock()
+}
+
+func (r *lru) Evict(n *Node) {
+	r.mu.Lock()
+	rn := (*lruNode)(n.CacheData)
+	if rn == nil || rn.ban {
+		r.mu.Unlock()
+		return
+	}
+	n.CacheData = nil
+	r.mu.Unlock()
+
+	rn.h.Release()
+}
+
+func (r *lru) EvictNS(ns uint64) {
+	var evicted []*lruNode
+
+	r.mu.Lock()
+	for e := r.recent.prev; e != &r.recent; {
+		rn := e
+		e = e.prev
+		if rn.n.NS() == ns {
+			rn.remove()
+			rn.n.CacheData = nil
+			r.used -= rn.n.Size()
+			evicted = append(evicted, rn)
+		}
+	}
+	r.mu.Unlock()
+
+	for _, rn := range evicted {
+		rn.h.Release()
+	}
+}
+
+func (r *lru) EvictAll() {
+	r.mu.Lock()
+	back := r.recent.prev
+	for rn := back; rn != &r.recent; rn = rn.prev {
+		rn.n.CacheData = nil
+	}
+	r.reset()
+	r.mu.Unlock()
+
+	for rn := back; rn != &r.recent; rn = rn.prev {
+		rn.h.Release()
+	}
+}
+
+func (r *lru) Close() error {
+	return nil
+}
+
+// NewLRU create a new LRU-cache.
+func NewLRU(capacity int) Cacher {
+	r := &lru{capacity: capacity}
+	r.reset()
+	return r
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/comparer.go b/vendor/github.com/syndtr/goleveldb/leveldb/comparer.go
new file mode 100644
index 000000000..448402b82
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/comparer.go
@@ -0,0 +1,67 @@
+// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package leveldb
+
+import (
+	"github.com/syndtr/goleveldb/leveldb/comparer"
+)
+
+type iComparer struct {
+	ucmp comparer.Comparer
+}
+
+func (icmp *iComparer) uName() string {
+	return icmp.ucmp.Name()
+}
+
+func (icmp *iComparer) uCompare(a, b []byte) int {
+	return icmp.ucmp.Compare(a, b)
+}
+
+func (icmp *iComparer) uSeparator(dst, a, b []byte) []byte {
+	return icmp.ucmp.Separator(dst, a, b)
+}
+
+func (icmp *iComparer) uSuccessor(dst, b []byte) []byte {
+	return icmp.ucmp.Successor(dst, b)
+}
+
+func (icmp *iComparer) Name() string {
+	return icmp.uName()
+}
+
+func (icmp *iComparer) Compare(a, b []byte) int {
+	x := icmp.uCompare(internalKey(a).ukey(), internalKey(b).ukey())
+	if x == 0 {
+		if m, n := internalKey(a).num(), internalKey(b).num(); m > n {
+			return -1
+		} else if m < n {
+			return 1
+		}
+	}
+	return x
+}
+
+func (icmp *iComparer) Separator(dst, a, b []byte) []byte {
+	ua, ub := internalKey(a).ukey(), internalKey(b).ukey()
+	dst = icmp.uSeparator(dst, ua, ub)
+	if dst != nil && len(dst) < len(ua) && icmp.uCompare(ua, dst) < 0 {
+		// Append earliest possible number.
+		return append(dst, keyMaxNumBytes...)
+	}
+	return nil
+}
+
+func (icmp *iComparer) Successor(dst, b []byte) []byte {
+	ub := internalKey(b).ukey()
+	dst = icmp.uSuccessor(dst, ub)
+	if dst != nil && len(dst) < len(ub) && icmp.uCompare(ub, dst) < 0 {
+		// Append earliest possible number.
+		return append(dst, keyMaxNumBytes...)
+	}
+	return nil
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/comparer/bytes_comparer.go b/vendor/github.com/syndtr/goleveldb/leveldb/comparer/bytes_comparer.go
new file mode 100644
index 000000000..abf9fb65c
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/comparer/bytes_comparer.go
@@ -0,0 +1,51 @@
+// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package comparer
+
+import "bytes"
+
+type bytesComparer struct{}
+
+func (bytesComparer) Compare(a, b []byte) int {
+	return bytes.Compare(a, b)
+}
+
+func (bytesComparer) Name() string {
+	return "leveldb.BytewiseComparator"
+}
+
+func (bytesComparer) Separator(dst, a, b []byte) []byte {
+	i, n := 0, len(a)
+	if n > len(b) {
+		n = len(b)
+	}
+	for ; i < n && a[i] == b[i]; i++ {
+	}
+	if i >= n {
+		// Do not shorten if one string is a prefix of the other
+	} else if c := a[i]; c < 0xff && c+1 < b[i] {
+		dst = append(dst, a[:i+1]...)
+		dst[len(dst)-1]++
+		return dst
+	}
+	return nil
+}
+
+func (bytesComparer) Successor(dst, b []byte) []byte {
+	for i, c := range b {
+		if c != 0xff {
+			dst = append(dst, b[:i+1]...)
+			dst[len(dst)-1]++
+			return dst
+		}
+	}
+	return nil
+}
+
+// DefaultComparer are default implementation of the Comparer interface.
+// It uses the natural ordering, consistent with bytes.Compare.
+var DefaultComparer = bytesComparer{}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/comparer/comparer.go b/vendor/github.com/syndtr/goleveldb/leveldb/comparer/comparer.go
new file mode 100644
index 000000000..2c522db23
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/comparer/comparer.go
@@ -0,0 +1,57 @@
+// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Package comparer provides interface and implementation for ordering
+// sets of data.
+package comparer
+
+// BasicComparer is the interface that wraps the basic Compare method.
+type BasicComparer interface {
+	// Compare returns -1, 0, or +1 depending on whether a is 'less than',
+	// 'equal to' or 'greater than' b. The two arguments can only be 'equal'
+	// if their contents are exactly equal. Furthermore, the empty slice
+	// must be 'less than' any non-empty slice.
+	Compare(a, b []byte) int
+}
+
+// Comparer defines a total ordering over the space of []byte keys: a 'less
+// than' relationship.
+type Comparer interface {
+	BasicComparer
+
+	// Name returns name of the comparer.
+	//
+	// The Level-DB on-disk format stores the comparer name, and opening a
+	// database with a different comparer from the one it was created with
+	// will result in an error.
+	//
+	// An implementation to a new name whenever the comparer implementation
+	// changes in a way that will cause the relative ordering of any two keys
+	// to change.
+	//
+	// Names starting with "leveldb." are reserved and should not be used
+	// by any users of this package.
+	Name() string
+
+	// Bellow are advanced functions used to reduce the space requirements
+	// for internal data structures such as index blocks.
+
+	// Separator appends a sequence of bytes x to dst such that a <= x && x < b,
+	// where 'less than' is consistent with Compare. An implementation should
+	// return nil if x equal to a.
+	//
+	// Either contents of a or b should not by any means modified. Doing so
+	// may cause corruption on the internal state.
+	Separator(dst, a, b []byte) []byte
+
+	// Successor appends a sequence of bytes x to dst such that x >= b, where
+	// 'less than' is consistent with Compare. An implementation should return
+	// nil if x equal to b.
+	//
+	// Contents of b should not by any means modified. Doing so may cause
+	// corruption on the internal state.
+	Successor(dst, b []byte) []byte
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/db.go b/vendor/github.com/syndtr/goleveldb/leveldb/db.go
new file mode 100644
index 000000000..90fedf7bd
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/db.go
@@ -0,0 +1,1179 @@
+// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package leveldb
+
+import (
+	"container/list"
+	"fmt"
+	"io"
+	"os"
+	"runtime"
+	"strings"
+	"sync"
+	"sync/atomic"
+	"time"
+
+	"github.com/syndtr/goleveldb/leveldb/errors"
+	"github.com/syndtr/goleveldb/leveldb/iterator"
+	"github.com/syndtr/goleveldb/leveldb/journal"
+	"github.com/syndtr/goleveldb/leveldb/memdb"
+	"github.com/syndtr/goleveldb/leveldb/opt"
+	"github.com/syndtr/goleveldb/leveldb/storage"
+	"github.com/syndtr/goleveldb/leveldb/table"
+	"github.com/syndtr/goleveldb/leveldb/util"
+)
+
+// DB is a LevelDB database.
+type DB struct {
+	// Need 64-bit alignment.
+	seq uint64
+
+	// Stats. Need 64-bit alignment.
+	cWriteDelay            int64 // The cumulative duration of write delays
+	cWriteDelayN           int32 // The cumulative number of write delays
+	inWritePaused          int32 // The indicator whether write operation is paused by compaction
+	aliveSnaps, aliveIters int32
+
+	// Session.
+	s *session
+
+	// MemDB.
+	memMu           sync.RWMutex
+	memPool         chan *memdb.DB
+	mem, frozenMem  *memDB
+	journal         *journal.Writer
+	journalWriter   storage.Writer
+	journalFd       storage.FileDesc
+	frozenJournalFd storage.FileDesc
+	frozenSeq       uint64
+
+	// Snapshot.
+	snapsMu   sync.Mutex
+	snapsList *list.List
+
+	// Write.
+	batchPool    sync.Pool
+	writeMergeC  chan writeMerge
+	writeMergedC chan bool
+	writeLockC   chan struct{}
+	writeAckC    chan error
+	writeDelay   time.Duration
+	writeDelayN  int
+	tr           *Transaction
+
+	// Compaction.
+	compCommitLk     sync.Mutex
+	tcompCmdC        chan cCmd
+	tcompPauseC      chan chan<- struct{}
+	mcompCmdC        chan cCmd
+	compErrC         chan error
+	compPerErrC      chan error
+	compErrSetC      chan error
+	compWriteLocking bool
+	compStats        cStats
+	memdbMaxLevel    int // For testing.
+
+	// Close.
+	closeW sync.WaitGroup
+	closeC chan struct{}
+	closed uint32
+	closer io.Closer
+}
+
+func openDB(s *session) (*DB, error) {
+	s.log("db@open opening")
+	start := time.Now()
+	db := &DB{
+		s: s,
+		// Initial sequence
+		seq: s.stSeqNum,
+		// MemDB
+		memPool: make(chan *memdb.DB, 1),
+		// Snapshot
+		snapsList: list.New(),
+		// Write
+		batchPool:    sync.Pool{New: newBatch},
+		writeMergeC:  make(chan writeMerge),
+		writeMergedC: make(chan bool),
+		writeLockC:   make(chan struct{}, 1),
+		writeAckC:    make(chan error),
+		// Compaction
+		tcompCmdC:   make(chan cCmd),
+		tcompPauseC: make(chan chan<- struct{}),
+		mcompCmdC:   make(chan cCmd),
+		compErrC:    make(chan error),
+		compPerErrC: make(chan error),
+		compErrSetC: make(chan error),
+		// Close
+		closeC: make(chan struct{}),
+	}
+
+	// Read-only mode.
+	readOnly := s.o.GetReadOnly()
+
+	if readOnly {
+		// Recover journals (read-only mode).
+		if err := db.recoverJournalRO(); err != nil {
+			return nil, err
+		}
+	} else {
+		// Recover journals.
+		if err := db.recoverJournal(); err != nil {
+			return nil, err
+		}
+
+		// Remove any obsolete files.
+		if err := db.checkAndCleanFiles(); err != nil {
+			// Close journal.
+			if db.journal != nil {
+				db.journal.Close()
+				db.journalWriter.Close()
+			}
+			return nil, err
+		}
+
+	}
+
+	// Doesn't need to be included in the wait group.
+	go db.compactionError()
+	go db.mpoolDrain()
+
+	if readOnly {
+		db.SetReadOnly()
+	} else {
+		db.closeW.Add(2)
+		go db.tCompaction()
+		go db.mCompaction()
+		// go db.jWriter()
+	}
+
+	s.logf("db@open done T·%v", time.Since(start))
+
+	runtime.SetFinalizer(db, (*DB).Close)
+	return db, nil
+}
+
+// Open opens or creates a DB for the given storage.
+// The DB will be created if not exist, unless ErrorIfMissing is true.
+// Also, if ErrorIfExist is true and the DB exist Open will returns
+// os.ErrExist error.
+//
+// Open will return an error with type of ErrCorrupted if corruption
+// detected in the DB. Use errors.IsCorrupted to test whether an error is
+// due to corruption. Corrupted DB can be recovered with Recover function.
+//
+// The returned DB instance is safe for concurrent use.
+// The DB must be closed after use, by calling Close method.
+func Open(stor storage.Storage, o *opt.Options) (db *DB, err error) {
+	s, err := newSession(stor, o)
+	if err != nil {
+		return
+	}
+	defer func() {
+		if err != nil {
+			s.close()
+			s.release()
+		}
+	}()
+
+	err = s.recover()
+	if err != nil {
+		if !os.IsNotExist(err) || s.o.GetErrorIfMissing() || s.o.GetReadOnly() {
+			return
+		}
+		err = s.create()
+		if err != nil {
+			return
+		}
+	} else if s.o.GetErrorIfExist() {
+		err = os.ErrExist
+		return
+	}
+
+	return openDB(s)
+}
+
+// OpenFile opens or creates a DB for the given path.
+// The DB will be created if not exist, unless ErrorIfMissing is true.
+// Also, if ErrorIfExist is true and the DB exist OpenFile will returns
+// os.ErrExist error.
+//
+// OpenFile uses standard file-system backed storage implementation as
+// described in the leveldb/storage package.
+//
+// OpenFile will return an error with type of ErrCorrupted if corruption
+// detected in the DB. Use errors.IsCorrupted to test whether an error is
+// due to corruption. Corrupted DB can be recovered with Recover function.
+//
+// The returned DB instance is safe for concurrent use.
+// The DB must be closed after use, by calling Close method.
+func OpenFile(path string, o *opt.Options) (db *DB, err error) {
+	stor, err := storage.OpenFile(path, o.GetReadOnly())
+	if err != nil {
+		return
+	}
+	db, err = Open(stor, o)
+	if err != nil {
+		stor.Close()
+	} else {
+		db.closer = stor
+	}
+	return
+}
+
+// Recover recovers and opens a DB with missing or corrupted manifest files
+// for the given storage. It will ignore any manifest files, valid or not.
+// The DB must already exist or it will returns an error.
+// Also, Recover will ignore ErrorIfMissing and ErrorIfExist options.
+//
+// The returned DB instance is safe for concurrent use.
+// The DB must be closed after use, by calling Close method.
+func Recover(stor storage.Storage, o *opt.Options) (db *DB, err error) {
+	s, err := newSession(stor, o)
+	if err != nil {
+		return
+	}
+	defer func() {
+		if err != nil {
+			s.close()
+			s.release()
+		}
+	}()
+
+	err = recoverTable(s, o)
+	if err != nil {
+		return
+	}
+	return openDB(s)
+}
+
+// RecoverFile recovers and opens a DB with missing or corrupted manifest files
+// for the given path. It will ignore any manifest files, valid or not.
+// The DB must already exist or it will returns an error.
+// Also, Recover will ignore ErrorIfMissing and ErrorIfExist options.
+//
+// RecoverFile uses standard file-system backed storage implementation as described
+// in the leveldb/storage package.
+//
+// The returned DB instance is safe for concurrent use.
+// The DB must be closed after use, by calling Close method.
+func RecoverFile(path string, o *opt.Options) (db *DB, err error) {
+	stor, err := storage.OpenFile(path, false)
+	if err != nil {
+		return
+	}
+	db, err = Recover(stor, o)
+	if err != nil {
+		stor.Close()
+	} else {
+		db.closer = stor
+	}
+	return
+}
+
+func recoverTable(s *session, o *opt.Options) error {
+	o = dupOptions(o)
+	// Mask StrictReader, lets StrictRecovery doing its job.
+	o.Strict &= ^opt.StrictReader
+
+	// Get all tables and sort it by file number.
+	fds, err := s.stor.List(storage.TypeTable)
+	if err != nil {
+		return err
+	}
+	sortFds(fds)
+
+	var (
+		maxSeq                                                            uint64
+		recoveredKey, goodKey, corruptedKey, corruptedBlock, droppedTable int
+
+		// We will drop corrupted table.
+		strict = o.GetStrict(opt.StrictRecovery)
+		noSync = o.GetNoSync()
+
+		rec   = &sessionRecord{}
+		bpool = util.NewBufferPool(o.GetBlockSize() + 5)
+	)
+	buildTable := func(iter iterator.Iterator) (tmpFd storage.FileDesc, size int64, err error) {
+		tmpFd = s.newTemp()
+		writer, err := s.stor.Create(tmpFd)
+		if err != nil {
+			return
+		}
+		defer func() {
+			writer.Close()
+			if err != nil {
+				s.stor.Remove(tmpFd)
+				tmpFd = storage.FileDesc{}
+			}
+		}()
+
+		// Copy entries.
+		tw := table.NewWriter(writer, o)
+		for iter.Next() {
+			key := iter.Key()
+			if validInternalKey(key) {
+				err = tw.Append(key, iter.Value())
+				if err != nil {
+					return
+				}
+			}
+		}
+		err = iter.Error()
+		if err != nil && !errors.IsCorrupted(err) {
+			return
+		}
+		err = tw.Close()
+		if err != nil {
+			return
+		}
+		if !noSync {
+			err = writer.Sync()
+			if err != nil {
+				return
+			}
+		}
+		size = int64(tw.BytesLen())
+		return
+	}
+	recoverTable := func(fd storage.FileDesc) error {
+		s.logf("table@recovery recovering @%d", fd.Num)
+		reader, err := s.stor.Open(fd)
+		if err != nil {
+			return err
+		}
+		var closed bool
+		defer func() {
+			if !closed {
+				reader.Close()
+			}
+		}()
+
+		// Get file size.
+		size, err := reader.Seek(0, 2)
+		if err != nil {
+			return err
+		}
+
+		var (
+			tSeq                                     uint64
+			tgoodKey, tcorruptedKey, tcorruptedBlock int
+			imin, imax                               []byte
+		)
+		tr, err := table.NewReader(reader, size, fd, nil, bpool, o)
+		if err != nil {
+			return err
+		}
+		iter := tr.NewIterator(nil, nil)
+		if itererr, ok := iter.(iterator.ErrorCallbackSetter); ok {
+			itererr.SetErrorCallback(func(err error) {
+				if errors.IsCorrupted(err) {
+					s.logf("table@recovery block corruption @%d %q", fd.Num, err)
+					tcorruptedBlock++
+				}
+			})
+		}
+
+		// Scan the table.
+		for iter.Next() {
+			key := iter.Key()
+			_, seq, _, kerr := parseInternalKey(key)
+			if kerr != nil {
+				tcorruptedKey++
+				continue
+			}
+			tgoodKey++
+			if seq > tSeq {
+				tSeq = seq
+			}
+			if imin == nil {
+				imin = append([]byte{}, key...)
+			}
+			imax = append(imax[:0], key...)
+		}
+		if err := iter.Error(); err != nil && !errors.IsCorrupted(err) {
+			iter.Release()
+			return err
+		}
+		iter.Release()
+
+		goodKey += tgoodKey
+		corruptedKey += tcorruptedKey
+		corruptedBlock += tcorruptedBlock
+
+		if strict && (tcorruptedKey > 0 || tcorruptedBlock > 0) {
+			droppedTable++
+			s.logf("table@recovery dropped @%d Gk·%d Ck·%d Cb·%d S·%d Q·%d", fd.Num, tgoodKey, tcorruptedKey, tcorruptedBlock, size, tSeq)
+			return nil
+		}
+
+		if tgoodKey > 0 {
+			if tcorruptedKey > 0 || tcorruptedBlock > 0 {
+				// Rebuild the table.
+				s.logf("table@recovery rebuilding @%d", fd.Num)
+				iter := tr.NewIterator(nil, nil)
+				tmpFd, newSize, err := buildTable(iter)
+				iter.Release()
+				if err != nil {
+					return err
+				}
+				closed = true
+				reader.Close()
+				if err := s.stor.Rename(tmpFd, fd); err != nil {
+					return err
+				}
+				size = newSize
+			}
+			if tSeq > maxSeq {
+				maxSeq = tSeq
+			}
+			recoveredKey += tgoodKey
+			// Add table to level 0.
+			rec.addTable(0, fd.Num, size, imin, imax)
+			s.logf("table@recovery recovered @%d Gk·%d Ck·%d Cb·%d S·%d Q·%d", fd.Num, tgoodKey, tcorruptedKey, tcorruptedBlock, size, tSeq)
+		} else {
+			droppedTable++
+			s.logf("table@recovery unrecoverable @%d Ck·%d Cb·%d S·%d", fd.Num, tcorruptedKey, tcorruptedBlock, size)
+		}
+
+		return nil
+	}
+
+	// Recover all tables.
+	if len(fds) > 0 {
+		s.logf("table@recovery F·%d", len(fds))
+
+		// Mark file number as used.
+		s.markFileNum(fds[len(fds)-1].Num)
+
+		for _, fd := range fds {
+			if err := recoverTable(fd); err != nil {
+				return err
+			}
+		}
+
+		s.logf("table@recovery recovered F·%d N·%d Gk·%d Ck·%d Q·%d", len(fds), recoveredKey, goodKey, corruptedKey, maxSeq)
+	}
+
+	// Set sequence number.
+	rec.setSeqNum(maxSeq)
+
+	// Create new manifest.
+	if err := s.create(); err != nil {
+		return err
+	}
+
+	// Commit.
+	return s.commit(rec)
+}
+
+func (db *DB) recoverJournal() error {
+	// Get all journals and sort it by file number.
+	rawFds, err := db.s.stor.List(storage.TypeJournal)
+	if err != nil {
+		return err
+	}
+	sortFds(rawFds)
+
+	// Journals that will be recovered.
+	var fds []storage.FileDesc
+	for _, fd := range rawFds {
+		if fd.Num >= db.s.stJournalNum || fd.Num == db.s.stPrevJournalNum {
+			fds = append(fds, fd)
+		}
+	}
+
+	var (
+		ofd storage.FileDesc // Obsolete file.
+		rec = &sessionRecord{}
+	)
+
+	// Recover journals.
+	if len(fds) > 0 {
+		db.logf("journal@recovery F·%d", len(fds))
+
+		// Mark file number as used.
+		db.s.markFileNum(fds[len(fds)-1].Num)
+
+		var (
+			// Options.
+			strict      = db.s.o.GetStrict(opt.StrictJournal)
+			checksum    = db.s.o.GetStrict(opt.StrictJournalChecksum)
+			writeBuffer = db.s.o.GetWriteBuffer()
+
+			jr       *journal.Reader
+			mdb      = memdb.New(db.s.icmp, writeBuffer)
+			buf      = &util.Buffer{}
+			batchSeq uint64
+			batchLen int
+		)
+
+		for _, fd := range fds {
+			db.logf("journal@recovery recovering @%d", fd.Num)
+
+			fr, err := db.s.stor.Open(fd)
+			if err != nil {
+				return err
+			}
+
+			// Create or reset journal reader instance.
+			if jr == nil {
+				jr = journal.NewReader(fr, dropper{db.s, fd}, strict, checksum)
+			} else {
+				jr.Reset(fr, dropper{db.s, fd}, strict, checksum)
+			}
+
+			// Flush memdb and remove obsolete journal file.
+			if !ofd.Zero() {
+				if mdb.Len() > 0 {
+					if _, err := db.s.flushMemdb(rec, mdb, 0); err != nil {
+						fr.Close()
+						return err
+					}
+				}
+
+				rec.setJournalNum(fd.Num)
+				rec.setSeqNum(db.seq)
+				if err := db.s.commit(rec); err != nil {
+					fr.Close()
+					return err
+				}
+				rec.resetAddedTables()
+
+				db.s.stor.Remove(ofd)
+				ofd = storage.FileDesc{}
+			}
+
+			// Replay journal to memdb.
+			mdb.Reset()
+			for {
+				r, err := jr.Next()
+				if err != nil {
+					if err == io.EOF {
+						break
+					}
+
+					fr.Close()
+					return errors.SetFd(err, fd)
+				}
+
+				buf.Reset()
+				if _, err := buf.ReadFrom(r); err != nil {
+					if err == io.ErrUnexpectedEOF {
+						// This is error returned due to corruption, with strict == false.
+						continue
+					}
+
+					fr.Close()
+					return errors.SetFd(err, fd)
+				}
+				batchSeq, batchLen, err = decodeBatchToMem(buf.Bytes(), db.seq, mdb)
+				if err != nil {
+					if !strict && errors.IsCorrupted(err) {
+						db.s.logf("journal error: %v (skipped)", err)
+						// We won't apply sequence number as it might be corrupted.
+						continue
+					}
+
+					fr.Close()
+					return errors.SetFd(err, fd)
+				}
+
+				// Save sequence number.
+				db.seq = batchSeq + uint64(batchLen)
+
+				// Flush it if large enough.
+				if mdb.Size() >= writeBuffer {
+					if _, err := db.s.flushMemdb(rec, mdb, 0); err != nil {
+						fr.Close()
+						return err
+					}
+
+					mdb.Reset()
+				}
+			}
+
+			fr.Close()
+			ofd = fd
+		}
+
+		// Flush the last memdb.
+		if mdb.Len() > 0 {
+			if _, err := db.s.flushMemdb(rec, mdb, 0); err != nil {
+				return err
+			}
+		}
+	}
+
+	// Create a new journal.
+	if _, err := db.newMem(0); err != nil {
+		return err
+	}
+
+	// Commit.
+	rec.setJournalNum(db.journalFd.Num)
+	rec.setSeqNum(db.seq)
+	if err := db.s.commit(rec); err != nil {
+		// Close journal on error.
+		if db.journal != nil {
+			db.journal.Close()
+			db.journalWriter.Close()
+		}
+		return err
+	}
+
+	// Remove the last obsolete journal file.
+	if !ofd.Zero() {
+		db.s.stor.Remove(ofd)
+	}
+
+	return nil
+}
+
+func (db *DB) recoverJournalRO() error {
+	// Get all journals and sort it by file number.
+	rawFds, err := db.s.stor.List(storage.TypeJournal)
+	if err != nil {
+		return err
+	}
+	sortFds(rawFds)
+
+	// Journals that will be recovered.
+	var fds []storage.FileDesc
+	for _, fd := range rawFds {
+		if fd.Num >= db.s.stJournalNum || fd.Num == db.s.stPrevJournalNum {
+			fds = append(fds, fd)
+		}
+	}
+
+	var (
+		// Options.
+		strict      = db.s.o.GetStrict(opt.StrictJournal)
+		checksum    = db.s.o.GetStrict(opt.StrictJournalChecksum)
+		writeBuffer = db.s.o.GetWriteBuffer()
+
+		mdb = memdb.New(db.s.icmp, writeBuffer)
+	)
+
+	// Recover journals.
+	if len(fds) > 0 {
+		db.logf("journal@recovery RO·Mode F·%d", len(fds))
+
+		var (
+			jr       *journal.Reader
+			buf      = &util.Buffer{}
+			batchSeq uint64
+			batchLen int
+		)
+
+		for _, fd := range fds {
+			db.logf("journal@recovery recovering @%d", fd.Num)
+
+			fr, err := db.s.stor.Open(fd)
+			if err != nil {
+				return err
+			}
+
+			// Create or reset journal reader instance.
+			if jr == nil {
+				jr = journal.NewReader(fr, dropper{db.s, fd}, strict, checksum)
+			} else {
+				jr.Reset(fr, dropper{db.s, fd}, strict, checksum)
+			}
+
+			// Replay journal to memdb.
+			for {
+				r, err := jr.Next()
+				if err != nil {
+					if err == io.EOF {
+						break
+					}
+
+					fr.Close()
+					return errors.SetFd(err, fd)
+				}
+
+				buf.Reset()
+				if _, err := buf.ReadFrom(r); err != nil {
+					if err == io.ErrUnexpectedEOF {
+						// This is error returned due to corruption, with strict == false.
+						continue
+					}
+
+					fr.Close()
+					return errors.SetFd(err, fd)
+				}
+				batchSeq, batchLen, err = decodeBatchToMem(buf.Bytes(), db.seq, mdb)
+				if err != nil {
+					if !strict && errors.IsCorrupted(err) {
+						db.s.logf("journal error: %v (skipped)", err)
+						// We won't apply sequence number as it might be corrupted.
+						continue
+					}
+
+					fr.Close()
+					return errors.SetFd(err, fd)
+				}
+
+				// Save sequence number.
+				db.seq = batchSeq + uint64(batchLen)
+			}
+
+			fr.Close()
+		}
+	}
+
+	// Set memDB.
+	db.mem = &memDB{db: db, DB: mdb, ref: 1}
+
+	return nil
+}
+
+func memGet(mdb *memdb.DB, ikey internalKey, icmp *iComparer) (ok bool, mv []byte, err error) {
+	mk, mv, err := mdb.Find(ikey)
+	if err == nil {
+		ukey, _, kt, kerr := parseInternalKey(mk)
+		if kerr != nil {
+			// Shouldn't have had happen.
+			panic(kerr)
+		}
+		if icmp.uCompare(ukey, ikey.ukey()) == 0 {
+			if kt == keyTypeDel {
+				return true, nil, ErrNotFound
+			}
+			return true, mv, nil
+
+		}
+	} else if err != ErrNotFound {
+		return true, nil, err
+	}
+	return
+}
+
+func (db *DB) get(auxm *memdb.DB, auxt tFiles, key []byte, seq uint64, ro *opt.ReadOptions) (value []byte, err error) {
+	ikey := makeInternalKey(nil, key, seq, keyTypeSeek)
+
+	if auxm != nil {
+		if ok, mv, me := memGet(auxm, ikey, db.s.icmp); ok {
+			return append([]byte{}, mv...), me
+		}
+	}
+
+	em, fm := db.getMems()
+	for _, m := range [...]*memDB{em, fm} {
+		if m == nil {
+			continue
+		}
+		defer m.decref()
+
+		if ok, mv, me := memGet(m.DB, ikey, db.s.icmp); ok {
+			return append([]byte{}, mv...), me
+		}
+	}
+
+	v := db.s.version()
+	value, cSched, err := v.get(auxt, ikey, ro, false)
+	v.release()
+	if cSched {
+		// Trigger table compaction.
+		db.compTrigger(db.tcompCmdC)
+	}
+	return
+}
+
+func nilIfNotFound(err error) error {
+	if err == ErrNotFound {
+		return nil
+	}
+	return err
+}
+
+func (db *DB) has(auxm *memdb.DB, auxt tFiles, key []byte, seq uint64, ro *opt.ReadOptions) (ret bool, err error) {
+	ikey := makeInternalKey(nil, key, seq, keyTypeSeek)
+
+	if auxm != nil {
+		if ok, _, me := memGet(auxm, ikey, db.s.icmp); ok {
+			return me == nil, nilIfNotFound(me)
+		}
+	}
+
+	em, fm := db.getMems()
+	for _, m := range [...]*memDB{em, fm} {
+		if m == nil {
+			continue
+		}
+		defer m.decref()
+
+		if ok, _, me := memGet(m.DB, ikey, db.s.icmp); ok {
+			return me == nil, nilIfNotFound(me)
+		}
+	}
+
+	v := db.s.version()
+	_, cSched, err := v.get(auxt, ikey, ro, true)
+	v.release()
+	if cSched {
+		// Trigger table compaction.
+		db.compTrigger(db.tcompCmdC)
+	}
+	if err == nil {
+		ret = true
+	} else if err == ErrNotFound {
+		err = nil
+	}
+	return
+}
+
+// Get gets the value for the given key. It returns ErrNotFound if the
+// DB does not contains the key.
+//
+// The returned slice is its own copy, it is safe to modify the contents
+// of the returned slice.
+// It is safe to modify the contents of the argument after Get returns.
+func (db *DB) Get(key []byte, ro *opt.ReadOptions) (value []byte, err error) {
+	err = db.ok()
+	if err != nil {
+		return
+	}
+
+	se := db.acquireSnapshot()
+	defer db.releaseSnapshot(se)
+	return db.get(nil, nil, key, se.seq, ro)
+}
+
+// Has returns true if the DB does contains the given key.
+//
+// It is safe to modify the contents of the argument after Has returns.
+func (db *DB) Has(key []byte, ro *opt.ReadOptions) (ret bool, err error) {
+	err = db.ok()
+	if err != nil {
+		return
+	}
+
+	se := db.acquireSnapshot()
+	defer db.releaseSnapshot(se)
+	return db.has(nil, nil, key, se.seq, ro)
+}
+
+// NewIterator returns an iterator for the latest snapshot of the
+// underlying DB.
+// The returned iterator is not safe for concurrent use, but it is safe to use
+// multiple iterators concurrently, with each in a dedicated goroutine.
+// It is also safe to use an iterator concurrently with modifying its
+// underlying DB. The resultant key/value pairs are guaranteed to be
+// consistent.
+//
+// Slice allows slicing the iterator to only contains keys in the given
+// range. A nil Range.Start is treated as a key before all keys in the
+// DB. And a nil Range.Limit is treated as a key after all keys in
+// the DB.
+//
+// WARNING: Any slice returned by interator (e.g. slice returned by calling
+// Iterator.Key() or Iterator.Key() methods), its content should not be modified
+// unless noted otherwise.
+//
+// The iterator must be released after use, by calling Release method.
+//
+// Also read Iterator documentation of the leveldb/iterator package.
+func (db *DB) NewIterator(slice *util.Range, ro *opt.ReadOptions) iterator.Iterator {
+	if err := db.ok(); err != nil {
+		return iterator.NewEmptyIterator(err)
+	}
+
+	se := db.acquireSnapshot()
+	defer db.releaseSnapshot(se)
+	// Iterator holds 'version' lock, 'version' is immutable so snapshot
+	// can be released after iterator created.
+	return db.newIterator(nil, nil, se.seq, slice, ro)
+}
+
+// GetSnapshot returns a latest snapshot of the underlying DB. A snapshot
+// is a frozen snapshot of a DB state at a particular point in time. The
+// content of snapshot are guaranteed to be consistent.
+//
+// The snapshot must be released after use, by calling Release method.
+func (db *DB) GetSnapshot() (*Snapshot, error) {
+	if err := db.ok(); err != nil {
+		return nil, err
+	}
+
+	return db.newSnapshot(), nil
+}
+
+// GetProperty returns value of the given property name.
+//
+// Property names:
+//	leveldb.num-files-at-level{n}
+//		Returns the number of files at level 'n'.
+//	leveldb.stats
+//		Returns statistics of the underlying DB.
+//	leveldb.iostats
+//		Returns statistics of effective disk read and write.
+//	leveldb.writedelay
+//		Returns cumulative write delay caused by compaction.
+//	leveldb.sstables
+//		Returns sstables list for each level.
+//	leveldb.blockpool
+//		Returns block pool stats.
+//	leveldb.cachedblock
+//		Returns size of cached block.
+//	leveldb.openedtables
+//		Returns number of opened tables.
+//	leveldb.alivesnaps
+//		Returns number of alive snapshots.
+//	leveldb.aliveiters
+//		Returns number of alive iterators.
+func (db *DB) GetProperty(name string) (value string, err error) {
+	err = db.ok()
+	if err != nil {
+		return
+	}
+
+	const prefix = "leveldb."
+	if !strings.HasPrefix(name, prefix) {
+		return "", ErrNotFound
+	}
+	p := name[len(prefix):]
+
+	v := db.s.version()
+	defer v.release()
+
+	numFilesPrefix := "num-files-at-level"
+	switch {
+	case strings.HasPrefix(p, numFilesPrefix):
+		var level uint
+		var rest string
+		n, _ := fmt.Sscanf(p[len(numFilesPrefix):], "%d%s", &level, &rest)
+		if n != 1 {
+			err = ErrNotFound
+		} else {
+			value = fmt.Sprint(v.tLen(int(level)))
+		}
+	case p == "stats":
+		value = "Compactions\n" +
+			" Level |   Tables   |    Size(MB)   |    Time(sec)  |    Read(MB)   |   Write(MB)\n" +
+			"-------+------------+---------------+---------------+---------------+---------------\n"
+		for level, tables := range v.levels {
+			duration, read, write := db.compStats.getStat(level)
+			if len(tables) == 0 && duration == 0 {
+				continue
+			}
+			value += fmt.Sprintf(" %3d   | %10d | %13.5f | %13.5f | %13.5f | %13.5f\n",
+				level, len(tables), float64(tables.size())/1048576.0, duration.Seconds(),
+				float64(read)/1048576.0, float64(write)/1048576.0)
+		}
+	case p == "iostats":
+		value = fmt.Sprintf("Read(MB):%.5f Write(MB):%.5f",
+			float64(db.s.stor.reads())/1048576.0,
+			float64(db.s.stor.writes())/1048576.0)
+	case p == "writedelay":
+		writeDelayN, writeDelay := atomic.LoadInt32(&db.cWriteDelayN), time.Duration(atomic.LoadInt64(&db.cWriteDelay))
+		paused := atomic.LoadInt32(&db.inWritePaused) == 1
+		value = fmt.Sprintf("DelayN:%d Delay:%s Paused:%t", writeDelayN, writeDelay, paused)
+	case p == "sstables":
+		for level, tables := range v.levels {
+			value += fmt.Sprintf("--- level %d ---\n", level)
+			for _, t := range tables {
+				value += fmt.Sprintf("%d:%d[%q .. %q]\n", t.fd.Num, t.size, t.imin, t.imax)
+			}
+		}
+	case p == "blockpool":
+		value = fmt.Sprintf("%v", db.s.tops.bpool)
+	case p == "cachedblock":
+		if db.s.tops.bcache != nil {
+			value = fmt.Sprintf("%d", db.s.tops.bcache.Size())
+		} else {
+			value = "<nil>"
+		}
+	case p == "openedtables":
+		value = fmt.Sprintf("%d", db.s.tops.cache.Size())
+	case p == "alivesnaps":
+		value = fmt.Sprintf("%d", atomic.LoadInt32(&db.aliveSnaps))
+	case p == "aliveiters":
+		value = fmt.Sprintf("%d", atomic.LoadInt32(&db.aliveIters))
+	default:
+		err = ErrNotFound
+	}
+
+	return
+}
+
+// DBStats is database statistics.
+type DBStats struct {
+	WriteDelayCount    int32
+	WriteDelayDuration time.Duration
+	WritePaused        bool
+
+	AliveSnapshots int32
+	AliveIterators int32
+
+	IOWrite uint64
+	IORead  uint64
+
+	BlockCacheSize    int
+	OpenedTablesCount int
+
+	LevelSizes        []int64
+	LevelTablesCounts []int
+	LevelRead         []int64
+	LevelWrite        []int64
+	LevelDurations    []time.Duration
+}
+
+// Stats populates s with database statistics.
+func (db *DB) Stats(s *DBStats) error {
+	err := db.ok()
+	if err != nil {
+		return err
+	}
+
+	s.IORead = db.s.stor.reads()
+	s.IOWrite = db.s.stor.writes()
+	s.WriteDelayCount = atomic.LoadInt32(&db.cWriteDelayN)
+	s.WriteDelayDuration = time.Duration(atomic.LoadInt64(&db.cWriteDelay))
+	s.WritePaused = atomic.LoadInt32(&db.inWritePaused) == 1
+
+	s.OpenedTablesCount = db.s.tops.cache.Size()
+	if db.s.tops.bcache != nil {
+		s.BlockCacheSize = db.s.tops.bcache.Size()
+	} else {
+		s.BlockCacheSize = 0
+	}
+
+	s.AliveIterators = atomic.LoadInt32(&db.aliveIters)
+	s.AliveSnapshots = atomic.LoadInt32(&db.aliveSnaps)
+
+	s.LevelDurations = s.LevelDurations[:0]
+	s.LevelRead = s.LevelRead[:0]
+	s.LevelWrite = s.LevelWrite[:0]
+	s.LevelSizes = s.LevelSizes[:0]
+	s.LevelTablesCounts = s.LevelTablesCounts[:0]
+
+	v := db.s.version()
+	defer v.release()
+
+	for level, tables := range v.levels {
+		duration, read, write := db.compStats.getStat(level)
+		if len(tables) == 0 && duration == 0 {
+			continue
+		}
+		s.LevelDurations = append(s.LevelDurations, duration)
+		s.LevelRead = append(s.LevelRead, read)
+		s.LevelWrite = append(s.LevelWrite, write)
+		s.LevelSizes = append(s.LevelSizes, tables.size())
+		s.LevelTablesCounts = append(s.LevelTablesCounts, len(tables))
+	}
+
+	return nil
+}
+
+// SizeOf calculates approximate sizes of the given key ranges.
+// The length of the returned sizes are equal with the length of the given
+// ranges. The returned sizes measure storage space usage, so if the user
+// data compresses by a factor of ten, the returned sizes will be one-tenth
+// the size of the corresponding user data size.
+// The results may not include the sizes of recently written data.
+func (db *DB) SizeOf(ranges []util.Range) (Sizes, error) {
+	if err := db.ok(); err != nil {
+		return nil, err
+	}
+
+	v := db.s.version()
+	defer v.release()
+
+	sizes := make(Sizes, 0, len(ranges))
+	for _, r := range ranges {
+		imin := makeInternalKey(nil, r.Start, keyMaxSeq, keyTypeSeek)
+		imax := makeInternalKey(nil, r.Limit, keyMaxSeq, keyTypeSeek)
+		start, err := v.offsetOf(imin)
+		if err != nil {
+			return nil, err
+		}
+		limit, err := v.offsetOf(imax)
+		if err != nil {
+			return nil, err
+		}
+		var size int64
+		if limit >= start {
+			size = limit - start
+		}
+		sizes = append(sizes, size)
+	}
+
+	return sizes, nil
+}
+
+// Close closes the DB. This will also releases any outstanding snapshot,
+// abort any in-flight compaction and discard open transaction.
+//
+// It is not safe to close a DB until all outstanding iterators are released.
+// It is valid to call Close multiple times. Other methods should not be
+// called after the DB has been closed.
+func (db *DB) Close() error {
+	if !db.setClosed() {
+		return ErrClosed
+	}
+
+	start := time.Now()
+	db.log("db@close closing")
+
+	// Clear the finalizer.
+	runtime.SetFinalizer(db, nil)
+
+	// Get compaction error.
+	var err error
+	select {
+	case err = <-db.compErrC:
+		if err == ErrReadOnly {
+			err = nil
+		}
+	default:
+	}
+
+	// Signal all goroutines.
+	close(db.closeC)
+
+	// Discard open transaction.
+	if db.tr != nil {
+		db.tr.Discard()
+	}
+
+	// Acquire writer lock.
+	db.writeLockC <- struct{}{}
+
+	// Wait for all gorotines to exit.
+	db.closeW.Wait()
+
+	// Closes journal.
+	if db.journal != nil {
+		db.journal.Close()
+		db.journalWriter.Close()
+		db.journal = nil
+		db.journalWriter = nil
+	}
+
+	if db.writeDelayN > 0 {
+		db.logf("db@write was delayed N·%d T·%v", db.writeDelayN, db.writeDelay)
+	}
+
+	// Close session.
+	db.s.close()
+	db.logf("db@close done T·%v", time.Since(start))
+	db.s.release()
+
+	if db.closer != nil {
+		if err1 := db.closer.Close(); err == nil {
+			err = err1
+		}
+		db.closer = nil
+	}
+
+	// Clear memdbs.
+	db.clearMems()
+
+	return err
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/db_compaction.go b/vendor/github.com/syndtr/goleveldb/leveldb/db_compaction.go
new file mode 100644
index 000000000..0c1b9a53b
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/db_compaction.go
@@ -0,0 +1,854 @@
+// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package leveldb
+
+import (
+	"sync"
+	"time"
+
+	"github.com/syndtr/goleveldb/leveldb/errors"
+	"github.com/syndtr/goleveldb/leveldb/opt"
+	"github.com/syndtr/goleveldb/leveldb/storage"
+)
+
+var (
+	errCompactionTransactExiting = errors.New("leveldb: compaction transact exiting")
+)
+
+type cStat struct {
+	duration time.Duration
+	read     int64
+	write    int64
+}
+
+func (p *cStat) add(n *cStatStaging) {
+	p.duration += n.duration
+	p.read += n.read
+	p.write += n.write
+}
+
+func (p *cStat) get() (duration time.Duration, read, write int64) {
+	return p.duration, p.read, p.write
+}
+
+type cStatStaging struct {
+	start    time.Time
+	duration time.Duration
+	on       bool
+	read     int64
+	write    int64
+}
+
+func (p *cStatStaging) startTimer() {
+	if !p.on {
+		p.start = time.Now()
+		p.on = true
+	}
+}
+
+func (p *cStatStaging) stopTimer() {
+	if p.on {
+		p.duration += time.Since(p.start)
+		p.on = false
+	}
+}
+
+type cStats struct {
+	lk    sync.Mutex
+	stats []cStat
+}
+
+func (p *cStats) addStat(level int, n *cStatStaging) {
+	p.lk.Lock()
+	if level >= len(p.stats) {
+		newStats := make([]cStat, level+1)
+		copy(newStats, p.stats)
+		p.stats = newStats
+	}
+	p.stats[level].add(n)
+	p.lk.Unlock()
+}
+
+func (p *cStats) getStat(level int) (duration time.Duration, read, write int64) {
+	p.lk.Lock()
+	defer p.lk.Unlock()
+	if level < len(p.stats) {
+		return p.stats[level].get()
+	}
+	return
+}
+
+func (db *DB) compactionError() {
+	var err error
+noerr:
+	// No error.
+	for {
+		select {
+		case err = <-db.compErrSetC:
+			switch {
+			case err == nil:
+			case err == ErrReadOnly, errors.IsCorrupted(err):
+				goto hasperr
+			default:
+				goto haserr
+			}
+		case <-db.closeC:
+			return
+		}
+	}
+haserr:
+	// Transient error.
+	for {
+		select {
+		case db.compErrC <- err:
+		case err = <-db.compErrSetC:
+			switch {
+			case err == nil:
+				goto noerr
+			case err == ErrReadOnly, errors.IsCorrupted(err):
+				goto hasperr
+			default:
+			}
+		case <-db.closeC:
+			return
+		}
+	}
+hasperr:
+	// Persistent error.
+	for {
+		select {
+		case db.compErrC <- err:
+		case db.compPerErrC <- err:
+		case db.writeLockC <- struct{}{}:
+			// Hold write lock, so that write won't pass-through.
+			db.compWriteLocking = true
+		case <-db.closeC:
+			if db.compWriteLocking {
+				// We should release the lock or Close will hang.
+				<-db.writeLockC
+			}
+			return
+		}
+	}
+}
+
+type compactionTransactCounter int
+
+func (cnt *compactionTransactCounter) incr() {
+	*cnt++
+}
+
+type compactionTransactInterface interface {
+	run(cnt *compactionTransactCounter) error
+	revert() error
+}
+
+func (db *DB) compactionTransact(name string, t compactionTransactInterface) {
+	defer func() {
+		if x := recover(); x != nil {
+			if x == errCompactionTransactExiting {
+				if err := t.revert(); err != nil {
+					db.logf("%s revert error %q", name, err)
+				}
+			}
+			panic(x)
+		}
+	}()
+
+	const (
+		backoffMin = 1 * time.Second
+		backoffMax = 8 * time.Second
+		backoffMul = 2 * time.Second
+	)
+	var (
+		backoff  = backoffMin
+		backoffT = time.NewTimer(backoff)
+		lastCnt  = compactionTransactCounter(0)
+
+		disableBackoff = db.s.o.GetDisableCompactionBackoff()
+	)
+	for n := 0; ; n++ {
+		// Check whether the DB is closed.
+		if db.isClosed() {
+			db.logf("%s exiting", name)
+			db.compactionExitTransact()
+		} else if n > 0 {
+			db.logf("%s retrying N·%d", name, n)
+		}
+
+		// Execute.
+		cnt := compactionTransactCounter(0)
+		err := t.run(&cnt)
+		if err != nil {
+			db.logf("%s error I·%d %q", name, cnt, err)
+		}
+
+		// Set compaction error status.
+		select {
+		case db.compErrSetC <- err:
+		case perr := <-db.compPerErrC:
+			if err != nil {
+				db.logf("%s exiting (persistent error %q)", name, perr)
+				db.compactionExitTransact()
+			}
+		case <-db.closeC:
+			db.logf("%s exiting", name)
+			db.compactionExitTransact()
+		}
+		if err == nil {
+			return
+		}
+		if errors.IsCorrupted(err) {
+			db.logf("%s exiting (corruption detected)", name)
+			db.compactionExitTransact()
+		}
+
+		if !disableBackoff {
+			// Reset backoff duration if counter is advancing.
+			if cnt > lastCnt {
+				backoff = backoffMin
+				lastCnt = cnt
+			}
+
+			// Backoff.
+			backoffT.Reset(backoff)
+			if backoff < backoffMax {
+				backoff *= backoffMul
+				if backoff > backoffMax {
+					backoff = backoffMax
+				}
+			}
+			select {
+			case <-backoffT.C:
+			case <-db.closeC:
+				db.logf("%s exiting", name)
+				db.compactionExitTransact()
+			}
+		}
+	}
+}
+
+type compactionTransactFunc struct {
+	runFunc    func(cnt *compactionTransactCounter) error
+	revertFunc func() error
+}
+
+func (t *compactionTransactFunc) run(cnt *compactionTransactCounter) error {
+	return t.runFunc(cnt)
+}
+
+func (t *compactionTransactFunc) revert() error {
+	if t.revertFunc != nil {
+		return t.revertFunc()
+	}
+	return nil
+}
+
+func (db *DB) compactionTransactFunc(name string, run func(cnt *compactionTransactCounter) error, revert func() error) {
+	db.compactionTransact(name, &compactionTransactFunc{run, revert})
+}
+
+func (db *DB) compactionExitTransact() {
+	panic(errCompactionTransactExiting)
+}
+
+func (db *DB) compactionCommit(name string, rec *sessionRecord) {
+	db.compCommitLk.Lock()
+	defer db.compCommitLk.Unlock() // Defer is necessary.
+	db.compactionTransactFunc(name+"@commit", func(cnt *compactionTransactCounter) error {
+		return db.s.commit(rec)
+	}, nil)
+}
+
+func (db *DB) memCompaction() {
+	mdb := db.getFrozenMem()
+	if mdb == nil {
+		return
+	}
+	defer mdb.decref()
+
+	db.logf("memdb@flush N·%d S·%s", mdb.Len(), shortenb(mdb.Size()))
+
+	// Don't compact empty memdb.
+	if mdb.Len() == 0 {
+		db.logf("memdb@flush skipping")
+		// drop frozen memdb
+		db.dropFrozenMem()
+		return
+	}
+
+	// Pause table compaction.
+	resumeC := make(chan struct{})
+	select {
+	case db.tcompPauseC <- (chan<- struct{})(resumeC):
+	case <-db.compPerErrC:
+		close(resumeC)
+		resumeC = nil
+	case <-db.closeC:
+		db.compactionExitTransact()
+	}
+
+	var (
+		rec        = &sessionRecord{}
+		stats      = &cStatStaging{}
+		flushLevel int
+	)
+
+	// Generate tables.
+	db.compactionTransactFunc("memdb@flush", func(cnt *compactionTransactCounter) (err error) {
+		stats.startTimer()
+		flushLevel, err = db.s.flushMemdb(rec, mdb.DB, db.memdbMaxLevel)
+		stats.stopTimer()
+		return
+	}, func() error {
+		for _, r := range rec.addedTables {
+			db.logf("memdb@flush revert @%d", r.num)
+			if err := db.s.stor.Remove(storage.FileDesc{Type: storage.TypeTable, Num: r.num}); err != nil {
+				return err
+			}
+		}
+		return nil
+	})
+
+	rec.setJournalNum(db.journalFd.Num)
+	rec.setSeqNum(db.frozenSeq)
+
+	// Commit.
+	stats.startTimer()
+	db.compactionCommit("memdb", rec)
+	stats.stopTimer()
+
+	db.logf("memdb@flush committed F·%d T·%v", len(rec.addedTables), stats.duration)
+
+	for _, r := range rec.addedTables {
+		stats.write += r.size
+	}
+	db.compStats.addStat(flushLevel, stats)
+
+	// Drop frozen memdb.
+	db.dropFrozenMem()
+
+	// Resume table compaction.
+	if resumeC != nil {
+		select {
+		case <-resumeC:
+			close(resumeC)
+		case <-db.closeC:
+			db.compactionExitTransact()
+		}
+	}
+
+	// Trigger table compaction.
+	db.compTrigger(db.tcompCmdC)
+}
+
+type tableCompactionBuilder struct {
+	db           *DB
+	s            *session
+	c            *compaction
+	rec          *sessionRecord
+	stat0, stat1 *cStatStaging
+
+	snapHasLastUkey bool
+	snapLastUkey    []byte
+	snapLastSeq     uint64
+	snapIter        int
+	snapKerrCnt     int
+	snapDropCnt     int
+
+	kerrCnt int
+	dropCnt int
+
+	minSeq    uint64
+	strict    bool
+	tableSize int
+
+	tw *tWriter
+}
+
+func (b *tableCompactionBuilder) appendKV(key, value []byte) error {
+	// Create new table if not already.
+	if b.tw == nil {
+		// Check for pause event.
+		if b.db != nil {
+			select {
+			case ch := <-b.db.tcompPauseC:
+				b.db.pauseCompaction(ch)
+			case <-b.db.closeC:
+				b.db.compactionExitTransact()
+			default:
+			}
+		}
+
+		// Create new table.
+		var err error
+		b.tw, err = b.s.tops.create()
+		if err != nil {
+			return err
+		}
+	}
+
+	// Write key/value into table.
+	return b.tw.append(key, value)
+}
+
+func (b *tableCompactionBuilder) needFlush() bool {
+	return b.tw.tw.BytesLen() >= b.tableSize
+}
+
+func (b *tableCompactionBuilder) flush() error {
+	t, err := b.tw.finish()
+	if err != nil {
+		return err
+	}
+	b.rec.addTableFile(b.c.sourceLevel+1, t)
+	b.stat1.write += t.size
+	b.s.logf("table@build created L%d@%d N·%d S·%s %q:%q", b.c.sourceLevel+1, t.fd.Num, b.tw.tw.EntriesLen(), shortenb(int(t.size)), t.imin, t.imax)
+	b.tw = nil
+	return nil
+}
+
+func (b *tableCompactionBuilder) cleanup() {
+	if b.tw != nil {
+		b.tw.drop()
+		b.tw = nil
+	}
+}
+
+func (b *tableCompactionBuilder) run(cnt *compactionTransactCounter) error {
+	snapResumed := b.snapIter > 0
+	hasLastUkey := b.snapHasLastUkey // The key might has zero length, so this is necessary.
+	lastUkey := append([]byte{}, b.snapLastUkey...)
+	lastSeq := b.snapLastSeq
+	b.kerrCnt = b.snapKerrCnt
+	b.dropCnt = b.snapDropCnt
+	// Restore compaction state.
+	b.c.restore()
+
+	defer b.cleanup()
+
+	b.stat1.startTimer()
+	defer b.stat1.stopTimer()
+
+	iter := b.c.newIterator()
+	defer iter.Release()
+	for i := 0; iter.Next(); i++ {
+		// Incr transact counter.
+		cnt.incr()
+
+		// Skip until last state.
+		if i < b.snapIter {
+			continue
+		}
+
+		resumed := false
+		if snapResumed {
+			resumed = true
+			snapResumed = false
+		}
+
+		ikey := iter.Key()
+		ukey, seq, kt, kerr := parseInternalKey(ikey)
+
+		if kerr == nil {
+			shouldStop := !resumed && b.c.shouldStopBefore(ikey)
+
+			if !hasLastUkey || b.s.icmp.uCompare(lastUkey, ukey) != 0 {
+				// First occurrence of this user key.
+
+				// Only rotate tables if ukey doesn't hop across.
+				if b.tw != nil && (shouldStop || b.needFlush()) {
+					if err := b.flush(); err != nil {
+						return err
+					}
+
+					// Creates snapshot of the state.
+					b.c.save()
+					b.snapHasLastUkey = hasLastUkey
+					b.snapLastUkey = append(b.snapLastUkey[:0], lastUkey...)
+					b.snapLastSeq = lastSeq
+					b.snapIter = i
+					b.snapKerrCnt = b.kerrCnt
+					b.snapDropCnt = b.dropCnt
+				}
+
+				hasLastUkey = true
+				lastUkey = append(lastUkey[:0], ukey...)
+				lastSeq = keyMaxSeq
+			}
+
+			switch {
+			case lastSeq <= b.minSeq:
+				// Dropped because newer entry for same user key exist
+				fallthrough // (A)
+			case kt == keyTypeDel && seq <= b.minSeq && b.c.baseLevelForKey(lastUkey):
+				// For this user key:
+				// (1) there is no data in higher levels
+				// (2) data in lower levels will have larger seq numbers
+				// (3) data in layers that are being compacted here and have
+				//     smaller seq numbers will be dropped in the next
+				//     few iterations of this loop (by rule (A) above).
+				// Therefore this deletion marker is obsolete and can be dropped.
+				lastSeq = seq
+				b.dropCnt++
+				continue
+			default:
+				lastSeq = seq
+			}
+		} else {
+			if b.strict {
+				return kerr
+			}
+
+			// Don't drop corrupted keys.
+			hasLastUkey = false
+			lastUkey = lastUkey[:0]
+			lastSeq = keyMaxSeq
+			b.kerrCnt++
+		}
+
+		if err := b.appendKV(ikey, iter.Value()); err != nil {
+			return err
+		}
+	}
+
+	if err := iter.Error(); err != nil {
+		return err
+	}
+
+	// Finish last table.
+	if b.tw != nil && !b.tw.empty() {
+		return b.flush()
+	}
+	return nil
+}
+
+func (b *tableCompactionBuilder) revert() error {
+	for _, at := range b.rec.addedTables {
+		b.s.logf("table@build revert @%d", at.num)
+		if err := b.s.stor.Remove(storage.FileDesc{Type: storage.TypeTable, Num: at.num}); err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+func (db *DB) tableCompaction(c *compaction, noTrivial bool) {
+	defer c.release()
+
+	rec := &sessionRecord{}
+	rec.addCompPtr(c.sourceLevel, c.imax)
+
+	if !noTrivial && c.trivial() {
+		t := c.levels[0][0]
+		db.logf("table@move L%d@%d -> L%d", c.sourceLevel, t.fd.Num, c.sourceLevel+1)
+		rec.delTable(c.sourceLevel, t.fd.Num)
+		rec.addTableFile(c.sourceLevel+1, t)
+		db.compactionCommit("table-move", rec)
+		return
+	}
+
+	var stats [2]cStatStaging
+	for i, tables := range c.levels {
+		for _, t := range tables {
+			stats[i].read += t.size
+			// Insert deleted tables into record
+			rec.delTable(c.sourceLevel+i, t.fd.Num)
+		}
+	}
+	sourceSize := int(stats[0].read + stats[1].read)
+	minSeq := db.minSeq()
+	db.logf("table@compaction L%d·%d -> L%d·%d S·%s Q·%d", c.sourceLevel, len(c.levels[0]), c.sourceLevel+1, len(c.levels[1]), shortenb(sourceSize), minSeq)
+
+	b := &tableCompactionBuilder{
+		db:        db,
+		s:         db.s,
+		c:         c,
+		rec:       rec,
+		stat1:     &stats[1],
+		minSeq:    minSeq,
+		strict:    db.s.o.GetStrict(opt.StrictCompaction),
+		tableSize: db.s.o.GetCompactionTableSize(c.sourceLevel + 1),
+	}
+	db.compactionTransact("table@build", b)
+
+	// Commit.
+	stats[1].startTimer()
+	db.compactionCommit("table", rec)
+	stats[1].stopTimer()
+
+	resultSize := int(stats[1].write)
+	db.logf("table@compaction committed F%s S%s Ke·%d D·%d T·%v", sint(len(rec.addedTables)-len(rec.deletedTables)), sshortenb(resultSize-sourceSize), b.kerrCnt, b.dropCnt, stats[1].duration)
+
+	// Save compaction stats
+	for i := range stats {
+		db.compStats.addStat(c.sourceLevel+1, &stats[i])
+	}
+}
+
+func (db *DB) tableRangeCompaction(level int, umin, umax []byte) error {
+	db.logf("table@compaction range L%d %q:%q", level, umin, umax)
+	if level >= 0 {
+		if c := db.s.getCompactionRange(level, umin, umax, true); c != nil {
+			db.tableCompaction(c, true)
+		}
+	} else {
+		// Retry until nothing to compact.
+		for {
+			compacted := false
+
+			// Scan for maximum level with overlapped tables.
+			v := db.s.version()
+			m := 1
+			for i := m; i < len(v.levels); i++ {
+				tables := v.levels[i]
+				if tables.overlaps(db.s.icmp, umin, umax, false) {
+					m = i
+				}
+			}
+			v.release()
+
+			for level := 0; level < m; level++ {
+				if c := db.s.getCompactionRange(level, umin, umax, false); c != nil {
+					db.tableCompaction(c, true)
+					compacted = true
+				}
+			}
+
+			if !compacted {
+				break
+			}
+		}
+	}
+
+	return nil
+}
+
+func (db *DB) tableAutoCompaction() {
+	if c := db.s.pickCompaction(); c != nil {
+		db.tableCompaction(c, false)
+	}
+}
+
+func (db *DB) tableNeedCompaction() bool {
+	v := db.s.version()
+	defer v.release()
+	return v.needCompaction()
+}
+
+// resumeWrite returns an indicator whether we should resume write operation if enough level0 files are compacted.
+func (db *DB) resumeWrite() bool {
+	v := db.s.version()
+	defer v.release()
+	if v.tLen(0) < db.s.o.GetWriteL0PauseTrigger() {
+		return true
+	}
+	return false
+}
+
+func (db *DB) pauseCompaction(ch chan<- struct{}) {
+	select {
+	case ch <- struct{}{}:
+	case <-db.closeC:
+		db.compactionExitTransact()
+	}
+}
+
+type cCmd interface {
+	ack(err error)
+}
+
+type cAuto struct {
+	// Note for table compaction, an non-empty ackC represents it's a compaction waiting command.
+	ackC chan<- error
+}
+
+func (r cAuto) ack(err error) {
+	if r.ackC != nil {
+		defer func() {
+			recover()
+		}()
+		r.ackC <- err
+	}
+}
+
+type cRange struct {
+	level    int
+	min, max []byte
+	ackC     chan<- error
+}
+
+func (r cRange) ack(err error) {
+	if r.ackC != nil {
+		defer func() {
+			recover()
+		}()
+		r.ackC <- err
+	}
+}
+
+// This will trigger auto compaction but will not wait for it.
+func (db *DB) compTrigger(compC chan<- cCmd) {
+	select {
+	case compC <- cAuto{}:
+	default:
+	}
+}
+
+// This will trigger auto compaction and/or wait for all compaction to be done.
+func (db *DB) compTriggerWait(compC chan<- cCmd) (err error) {
+	ch := make(chan error)
+	defer close(ch)
+	// Send cmd.
+	select {
+	case compC <- cAuto{ch}:
+	case err = <-db.compErrC:
+		return
+	case <-db.closeC:
+		return ErrClosed
+	}
+	// Wait cmd.
+	select {
+	case err = <-ch:
+	case err = <-db.compErrC:
+	case <-db.closeC:
+		return ErrClosed
+	}
+	return err
+}
+
+// Send range compaction request.
+func (db *DB) compTriggerRange(compC chan<- cCmd, level int, min, max []byte) (err error) {
+	ch := make(chan error)
+	defer close(ch)
+	// Send cmd.
+	select {
+	case compC <- cRange{level, min, max, ch}:
+	case err := <-db.compErrC:
+		return err
+	case <-db.closeC:
+		return ErrClosed
+	}
+	// Wait cmd.
+	select {
+	case err = <-ch:
+	case err = <-db.compErrC:
+	case <-db.closeC:
+		return ErrClosed
+	}
+	return err
+}
+
+func (db *DB) mCompaction() {
+	var x cCmd
+
+	defer func() {
+		if x := recover(); x != nil {
+			if x != errCompactionTransactExiting {
+				panic(x)
+			}
+		}
+		if x != nil {
+			x.ack(ErrClosed)
+		}
+		db.closeW.Done()
+	}()
+
+	for {
+		select {
+		case x = <-db.mcompCmdC:
+			switch x.(type) {
+			case cAuto:
+				db.memCompaction()
+				x.ack(nil)
+				x = nil
+			default:
+				panic("leveldb: unknown command")
+			}
+		case <-db.closeC:
+			return
+		}
+	}
+}
+
+func (db *DB) tCompaction() {
+	var (
+		x     cCmd
+		waitQ []cCmd
+	)
+
+	defer func() {
+		if x := recover(); x != nil {
+			if x != errCompactionTransactExiting {
+				panic(x)
+			}
+		}
+		for i := range waitQ {
+			waitQ[i].ack(ErrClosed)
+			waitQ[i] = nil
+		}
+		if x != nil {
+			x.ack(ErrClosed)
+		}
+		db.closeW.Done()
+	}()
+
+	for {
+		if db.tableNeedCompaction() {
+			select {
+			case x = <-db.tcompCmdC:
+			case ch := <-db.tcompPauseC:
+				db.pauseCompaction(ch)
+				continue
+			case <-db.closeC:
+				return
+			default:
+			}
+			// Resume write operation as soon as possible.
+			if len(waitQ) > 0 && db.resumeWrite() {
+				for i := range waitQ {
+					waitQ[i].ack(nil)
+					waitQ[i] = nil
+				}
+				waitQ = waitQ[:0]
+			}
+		} else {
+			for i := range waitQ {
+				waitQ[i].ack(nil)
+				waitQ[i] = nil
+			}
+			waitQ = waitQ[:0]
+			select {
+			case x = <-db.tcompCmdC:
+			case ch := <-db.tcompPauseC:
+				db.pauseCompaction(ch)
+				continue
+			case <-db.closeC:
+				return
+			}
+		}
+		if x != nil {
+			switch cmd := x.(type) {
+			case cAuto:
+				if cmd.ackC != nil {
+					// Check the write pause state before caching it.
+					if db.resumeWrite() {
+						x.ack(nil)
+					} else {
+						waitQ = append(waitQ, x)
+					}
+				}
+			case cRange:
+				x.ack(db.tableRangeCompaction(cmd.level, cmd.min, cmd.max))
+			default:
+				panic("leveldb: unknown command")
+			}
+			x = nil
+		}
+		db.tableAutoCompaction()
+	}
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/db_iter.go b/vendor/github.com/syndtr/goleveldb/leveldb/db_iter.go
new file mode 100644
index 000000000..03c24cdab
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/db_iter.go
@@ -0,0 +1,360 @@
+// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package leveldb
+
+import (
+	"errors"
+	"math/rand"
+	"runtime"
+	"sync"
+	"sync/atomic"
+
+	"github.com/syndtr/goleveldb/leveldb/iterator"
+	"github.com/syndtr/goleveldb/leveldb/opt"
+	"github.com/syndtr/goleveldb/leveldb/util"
+)
+
+var (
+	errInvalidInternalKey = errors.New("leveldb: Iterator: invalid internal key")
+)
+
+type memdbReleaser struct {
+	once sync.Once
+	m    *memDB
+}
+
+func (mr *memdbReleaser) Release() {
+	mr.once.Do(func() {
+		mr.m.decref()
+	})
+}
+
+func (db *DB) newRawIterator(auxm *memDB, auxt tFiles, slice *util.Range, ro *opt.ReadOptions) iterator.Iterator {
+	strict := opt.GetStrict(db.s.o.Options, ro, opt.StrictReader)
+	em, fm := db.getMems()
+	v := db.s.version()
+
+	tableIts := v.getIterators(slice, ro)
+	n := len(tableIts) + len(auxt) + 3
+	its := make([]iterator.Iterator, 0, n)
+
+	if auxm != nil {
+		ami := auxm.NewIterator(slice)
+		ami.SetReleaser(&memdbReleaser{m: auxm})
+		its = append(its, ami)
+	}
+	for _, t := range auxt {
+		its = append(its, v.s.tops.newIterator(t, slice, ro))
+	}
+
+	emi := em.NewIterator(slice)
+	emi.SetReleaser(&memdbReleaser{m: em})
+	its = append(its, emi)
+	if fm != nil {
+		fmi := fm.NewIterator(slice)
+		fmi.SetReleaser(&memdbReleaser{m: fm})
+		its = append(its, fmi)
+	}
+	its = append(its, tableIts...)
+	mi := iterator.NewMergedIterator(its, db.s.icmp, strict)
+	mi.SetReleaser(&versionReleaser{v: v})
+	return mi
+}
+
+func (db *DB) newIterator(auxm *memDB, auxt tFiles, seq uint64, slice *util.Range, ro *opt.ReadOptions) *dbIter {
+	var islice *util.Range
+	if slice != nil {
+		islice = &util.Range{}
+		if slice.Start != nil {
+			islice.Start = makeInternalKey(nil, slice.Start, keyMaxSeq, keyTypeSeek)
+		}
+		if slice.Limit != nil {
+			islice.Limit = makeInternalKey(nil, slice.Limit, keyMaxSeq, keyTypeSeek)
+		}
+	}
+	rawIter := db.newRawIterator(auxm, auxt, islice, ro)
+	iter := &dbIter{
+		db:     db,
+		icmp:   db.s.icmp,
+		iter:   rawIter,
+		seq:    seq,
+		strict: opt.GetStrict(db.s.o.Options, ro, opt.StrictReader),
+		key:    make([]byte, 0),
+		value:  make([]byte, 0),
+	}
+	atomic.AddInt32(&db.aliveIters, 1)
+	runtime.SetFinalizer(iter, (*dbIter).Release)
+	return iter
+}
+
+func (db *DB) iterSamplingRate() int {
+	return rand.Intn(2 * db.s.o.GetIteratorSamplingRate())
+}
+
+type dir int
+
+const (
+	dirReleased dir = iota - 1
+	dirSOI
+	dirEOI
+	dirBackward
+	dirForward
+)
+
+// dbIter represent an interator states over a database session.
+type dbIter struct {
+	db     *DB
+	icmp   *iComparer
+	iter   iterator.Iterator
+	seq    uint64
+	strict bool
+
+	smaplingGap int
+	dir         dir
+	key         []byte
+	value       []byte
+	err         error
+	releaser    util.Releaser
+}
+
+func (i *dbIter) sampleSeek() {
+	ikey := i.iter.Key()
+	i.smaplingGap -= len(ikey) + len(i.iter.Value())
+	for i.smaplingGap < 0 {
+		i.smaplingGap += i.db.iterSamplingRate()
+		i.db.sampleSeek(ikey)
+	}
+}
+
+func (i *dbIter) setErr(err error) {
+	i.err = err
+	i.key = nil
+	i.value = nil
+}
+
+func (i *dbIter) iterErr() {
+	if err := i.iter.Error(); err != nil {
+		i.setErr(err)
+	}
+}
+
+func (i *dbIter) Valid() bool {
+	return i.err == nil && i.dir > dirEOI
+}
+
+func (i *dbIter) First() bool {
+	if i.err != nil {
+		return false
+	} else if i.dir == dirReleased {
+		i.err = ErrIterReleased
+		return false
+	}
+
+	if i.iter.First() {
+		i.dir = dirSOI
+		return i.next()
+	}
+	i.dir = dirEOI
+	i.iterErr()
+	return false
+}
+
+func (i *dbIter) Last() bool {
+	if i.err != nil {
+		return false
+	} else if i.dir == dirReleased {
+		i.err = ErrIterReleased
+		return false
+	}
+
+	if i.iter.Last() {
+		return i.prev()
+	}
+	i.dir = dirSOI
+	i.iterErr()
+	return false
+}
+
+func (i *dbIter) Seek(key []byte) bool {
+	if i.err != nil {
+		return false
+	} else if i.dir == dirReleased {
+		i.err = ErrIterReleased
+		return false
+	}
+
+	ikey := makeInternalKey(nil, key, i.seq, keyTypeSeek)
+	if i.iter.Seek(ikey) {
+		i.dir = dirSOI
+		return i.next()
+	}
+	i.dir = dirEOI
+	i.iterErr()
+	return false
+}
+
+func (i *dbIter) next() bool {
+	for {
+		if ukey, seq, kt, kerr := parseInternalKey(i.iter.Key()); kerr == nil {
+			i.sampleSeek()
+			if seq <= i.seq {
+				switch kt {
+				case keyTypeDel:
+					// Skip deleted key.
+					i.key = append(i.key[:0], ukey...)
+					i.dir = dirForward
+				case keyTypeVal:
+					if i.dir == dirSOI || i.icmp.uCompare(ukey, i.key) > 0 {
+						i.key = append(i.key[:0], ukey...)
+						i.value = append(i.value[:0], i.iter.Value()...)
+						i.dir = dirForward
+						return true
+					}
+				}
+			}
+		} else if i.strict {
+			i.setErr(kerr)
+			break
+		}
+		if !i.iter.Next() {
+			i.dir = dirEOI
+			i.iterErr()
+			break
+		}
+	}
+	return false
+}
+
+func (i *dbIter) Next() bool {
+	if i.dir == dirEOI || i.err != nil {
+		return false
+	} else if i.dir == dirReleased {
+		i.err = ErrIterReleased
+		return false
+	}
+
+	if !i.iter.Next() || (i.dir == dirBackward && !i.iter.Next()) {
+		i.dir = dirEOI
+		i.iterErr()
+		return false
+	}
+	return i.next()
+}
+
+func (i *dbIter) prev() bool {
+	i.dir = dirBackward
+	del := true
+	if i.iter.Valid() {
+		for {
+			if ukey, seq, kt, kerr := parseInternalKey(i.iter.Key()); kerr == nil {
+				i.sampleSeek()
+				if seq <= i.seq {
+					if !del && i.icmp.uCompare(ukey, i.key) < 0 {
+						return true
+					}
+					del = (kt == keyTypeDel)
+					if !del {
+						i.key = append(i.key[:0], ukey...)
+						i.value = append(i.value[:0], i.iter.Value()...)
+					}
+				}
+			} else if i.strict {
+				i.setErr(kerr)
+				return false
+			}
+			if !i.iter.Prev() {
+				break
+			}
+		}
+	}
+	if del {
+		i.dir = dirSOI
+		i.iterErr()
+		return false
+	}
+	return true
+}
+
+func (i *dbIter) Prev() bool {
+	if i.dir == dirSOI || i.err != nil {
+		return false
+	} else if i.dir == dirReleased {
+		i.err = ErrIterReleased
+		return false
+	}
+
+	switch i.dir {
+	case dirEOI:
+		return i.Last()
+	case dirForward:
+		for i.iter.Prev() {
+			if ukey, _, _, kerr := parseInternalKey(i.iter.Key()); kerr == nil {
+				i.sampleSeek()
+				if i.icmp.uCompare(ukey, i.key) < 0 {
+					goto cont
+				}
+			} else if i.strict {
+				i.setErr(kerr)
+				return false
+			}
+		}
+		i.dir = dirSOI
+		i.iterErr()
+		return false
+	}
+
+cont:
+	return i.prev()
+}
+
+func (i *dbIter) Key() []byte {
+	if i.err != nil || i.dir <= dirEOI {
+		return nil
+	}
+	return i.key
+}
+
+func (i *dbIter) Value() []byte {
+	if i.err != nil || i.dir <= dirEOI {
+		return nil
+	}
+	return i.value
+}
+
+func (i *dbIter) Release() {
+	if i.dir != dirReleased {
+		// Clear the finalizer.
+		runtime.SetFinalizer(i, nil)
+
+		if i.releaser != nil {
+			i.releaser.Release()
+			i.releaser = nil
+		}
+
+		i.dir = dirReleased
+		i.key = nil
+		i.value = nil
+		i.iter.Release()
+		i.iter = nil
+		atomic.AddInt32(&i.db.aliveIters, -1)
+		i.db = nil
+	}
+}
+
+func (i *dbIter) SetReleaser(releaser util.Releaser) {
+	if i.dir == dirReleased {
+		panic(util.ErrReleased)
+	}
+	if i.releaser != nil && releaser != nil {
+		panic(util.ErrHasReleaser)
+	}
+	i.releaser = releaser
+}
+
+func (i *dbIter) Error() error {
+	return i.err
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/db_snapshot.go b/vendor/github.com/syndtr/goleveldb/leveldb/db_snapshot.go
new file mode 100644
index 000000000..c2ad70c84
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/db_snapshot.go
@@ -0,0 +1,187 @@
+// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package leveldb
+
+import (
+	"container/list"
+	"fmt"
+	"runtime"
+	"sync"
+	"sync/atomic"
+
+	"github.com/syndtr/goleveldb/leveldb/iterator"
+	"github.com/syndtr/goleveldb/leveldb/opt"
+	"github.com/syndtr/goleveldb/leveldb/util"
+)
+
+type snapshotElement struct {
+	seq uint64
+	ref int
+	e   *list.Element
+}
+
+// Acquires a snapshot, based on latest sequence.
+func (db *DB) acquireSnapshot() *snapshotElement {
+	db.snapsMu.Lock()
+	defer db.snapsMu.Unlock()
+
+	seq := db.getSeq()
+
+	if e := db.snapsList.Back(); e != nil {
+		se := e.Value.(*snapshotElement)
+		if se.seq == seq {
+			se.ref++
+			return se
+		} else if seq < se.seq {
+			panic("leveldb: sequence number is not increasing")
+		}
+	}
+	se := &snapshotElement{seq: seq, ref: 1}
+	se.e = db.snapsList.PushBack(se)
+	return se
+}
+
+// Releases given snapshot element.
+func (db *DB) releaseSnapshot(se *snapshotElement) {
+	db.snapsMu.Lock()
+	defer db.snapsMu.Unlock()
+
+	se.ref--
+	if se.ref == 0 {
+		db.snapsList.Remove(se.e)
+		se.e = nil
+	} else if se.ref < 0 {
+		panic("leveldb: Snapshot: negative element reference")
+	}
+}
+
+// Gets minimum sequence that not being snapshotted.
+func (db *DB) minSeq() uint64 {
+	db.snapsMu.Lock()
+	defer db.snapsMu.Unlock()
+
+	if e := db.snapsList.Front(); e != nil {
+		return e.Value.(*snapshotElement).seq
+	}
+
+	return db.getSeq()
+}
+
+// Snapshot is a DB snapshot.
+type Snapshot struct {
+	db       *DB
+	elem     *snapshotElement
+	mu       sync.RWMutex
+	released bool
+}
+
+// Creates new snapshot object.
+func (db *DB) newSnapshot() *Snapshot {
+	snap := &Snapshot{
+		db:   db,
+		elem: db.acquireSnapshot(),
+	}
+	atomic.AddInt32(&db.aliveSnaps, 1)
+	runtime.SetFinalizer(snap, (*Snapshot).Release)
+	return snap
+}
+
+func (snap *Snapshot) String() string {
+	return fmt.Sprintf("leveldb.Snapshot{%d}", snap.elem.seq)
+}
+
+// Get gets the value for the given key. It returns ErrNotFound if
+// the DB does not contains the key.
+//
+// The caller should not modify the contents of the returned slice, but
+// it is safe to modify the contents of the argument after Get returns.
+func (snap *Snapshot) Get(key []byte, ro *opt.ReadOptions) (value []byte, err error) {
+	err = snap.db.ok()
+	if err != nil {
+		return
+	}
+	snap.mu.RLock()
+	defer snap.mu.RUnlock()
+	if snap.released {
+		err = ErrSnapshotReleased
+		return
+	}
+	return snap.db.get(nil, nil, key, snap.elem.seq, ro)
+}
+
+// Has returns true if the DB does contains the given key.
+//
+// It is safe to modify the contents of the argument after Get returns.
+func (snap *Snapshot) Has(key []byte, ro *opt.ReadOptions) (ret bool, err error) {
+	err = snap.db.ok()
+	if err != nil {
+		return
+	}
+	snap.mu.RLock()
+	defer snap.mu.RUnlock()
+	if snap.released {
+		err = ErrSnapshotReleased
+		return
+	}
+	return snap.db.has(nil, nil, key, snap.elem.seq, ro)
+}
+
+// NewIterator returns an iterator for the snapshot of the underlying DB.
+// The returned iterator is not safe for concurrent use, but it is safe to use
+// multiple iterators concurrently, with each in a dedicated goroutine.
+// It is also safe to use an iterator concurrently with modifying its
+// underlying DB. The resultant key/value pairs are guaranteed to be
+// consistent.
+//
+// Slice allows slicing the iterator to only contains keys in the given
+// range. A nil Range.Start is treated as a key before all keys in the
+// DB. And a nil Range.Limit is treated as a key after all keys in
+// the DB.
+//
+// WARNING: Any slice returned by interator (e.g. slice returned by calling
+// Iterator.Key() or Iterator.Value() methods), its content should not be
+// modified unless noted otherwise.
+//
+// The iterator must be released after use, by calling Release method.
+// Releasing the snapshot doesn't mean releasing the iterator too, the
+// iterator would be still valid until released.
+//
+// Also read Iterator documentation of the leveldb/iterator package.
+func (snap *Snapshot) NewIterator(slice *util.Range, ro *opt.ReadOptions) iterator.Iterator {
+	if err := snap.db.ok(); err != nil {
+		return iterator.NewEmptyIterator(err)
+	}
+	snap.mu.Lock()
+	defer snap.mu.Unlock()
+	if snap.released {
+		return iterator.NewEmptyIterator(ErrSnapshotReleased)
+	}
+	// Since iterator already hold version ref, it doesn't need to
+	// hold snapshot ref.
+	return snap.db.newIterator(nil, nil, snap.elem.seq, slice, ro)
+}
+
+// Release releases the snapshot. This will not release any returned
+// iterators, the iterators would still be valid until released or the
+// underlying DB is closed.
+//
+// Other methods should not be called after the snapshot has been released.
+func (snap *Snapshot) Release() {
+	snap.mu.Lock()
+	defer snap.mu.Unlock()
+
+	if !snap.released {
+		// Clear the finalizer.
+		runtime.SetFinalizer(snap, nil)
+
+		snap.released = true
+		snap.db.releaseSnapshot(snap.elem)
+		atomic.AddInt32(&snap.db.aliveSnaps, -1)
+		snap.db = nil
+		snap.elem = nil
+	}
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/db_state.go b/vendor/github.com/syndtr/goleveldb/leveldb/db_state.go
new file mode 100644
index 000000000..65e1c54bb
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/db_state.go
@@ -0,0 +1,239 @@
+// Copyright (c) 2013, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package leveldb
+
+import (
+	"errors"
+	"sync/atomic"
+	"time"
+
+	"github.com/syndtr/goleveldb/leveldb/journal"
+	"github.com/syndtr/goleveldb/leveldb/memdb"
+	"github.com/syndtr/goleveldb/leveldb/storage"
+)
+
+var (
+	errHasFrozenMem = errors.New("has frozen mem")
+)
+
+type memDB struct {
+	db *DB
+	*memdb.DB
+	ref int32
+}
+
+func (m *memDB) getref() int32 {
+	return atomic.LoadInt32(&m.ref)
+}
+
+func (m *memDB) incref() {
+	atomic.AddInt32(&m.ref, 1)
+}
+
+func (m *memDB) decref() {
+	if ref := atomic.AddInt32(&m.ref, -1); ref == 0 {
+		// Only put back memdb with std capacity.
+		if m.Capacity() == m.db.s.o.GetWriteBuffer() {
+			m.Reset()
+			m.db.mpoolPut(m.DB)
+		}
+		m.db = nil
+		m.DB = nil
+	} else if ref < 0 {
+		panic("negative memdb ref")
+	}
+}
+
+// Get latest sequence number.
+func (db *DB) getSeq() uint64 {
+	return atomic.LoadUint64(&db.seq)
+}
+
+// Atomically adds delta to seq.
+func (db *DB) addSeq(delta uint64) {
+	atomic.AddUint64(&db.seq, delta)
+}
+
+func (db *DB) setSeq(seq uint64) {
+	atomic.StoreUint64(&db.seq, seq)
+}
+
+func (db *DB) sampleSeek(ikey internalKey) {
+	v := db.s.version()
+	if v.sampleSeek(ikey) {
+		// Trigger table compaction.
+		db.compTrigger(db.tcompCmdC)
+	}
+	v.release()
+}
+
+func (db *DB) mpoolPut(mem *memdb.DB) {
+	if !db.isClosed() {
+		select {
+		case db.memPool <- mem:
+		default:
+		}
+	}
+}
+
+func (db *DB) mpoolGet(n int) *memDB {
+	var mdb *memdb.DB
+	select {
+	case mdb = <-db.memPool:
+	default:
+	}
+	if mdb == nil || mdb.Capacity() < n {
+		mdb = memdb.New(db.s.icmp, maxInt(db.s.o.GetWriteBuffer(), n))
+	}
+	return &memDB{
+		db: db,
+		DB: mdb,
+	}
+}
+
+func (db *DB) mpoolDrain() {
+	ticker := time.NewTicker(30 * time.Second)
+	for {
+		select {
+		case <-ticker.C:
+			select {
+			case <-db.memPool:
+			default:
+			}
+		case <-db.closeC:
+			ticker.Stop()
+			// Make sure the pool is drained.
+			select {
+			case <-db.memPool:
+			case <-time.After(time.Second):
+			}
+			close(db.memPool)
+			return
+		}
+	}
+}
+
+// Create new memdb and froze the old one; need external synchronization.
+// newMem only called synchronously by the writer.
+func (db *DB) newMem(n int) (mem *memDB, err error) {
+	fd := storage.FileDesc{Type: storage.TypeJournal, Num: db.s.allocFileNum()}
+	w, err := db.s.stor.Create(fd)
+	if err != nil {
+		db.s.reuseFileNum(fd.Num)
+		return
+	}
+
+	db.memMu.Lock()
+	defer db.memMu.Unlock()
+
+	if db.frozenMem != nil {
+		return nil, errHasFrozenMem
+	}
+
+	if db.journal == nil {
+		db.journal = journal.NewWriter(w)
+	} else {
+		db.journal.Reset(w)
+		db.journalWriter.Close()
+		db.frozenJournalFd = db.journalFd
+	}
+	db.journalWriter = w
+	db.journalFd = fd
+	db.frozenMem = db.mem
+	mem = db.mpoolGet(n)
+	mem.incref() // for self
+	mem.incref() // for caller
+	db.mem = mem
+	// The seq only incremented by the writer. And whoever called newMem
+	// should hold write lock, so no need additional synchronization here.
+	db.frozenSeq = db.seq
+	return
+}
+
+// Get all memdbs.
+func (db *DB) getMems() (e, f *memDB) {
+	db.memMu.RLock()
+	defer db.memMu.RUnlock()
+	if db.mem != nil {
+		db.mem.incref()
+	} else if !db.isClosed() {
+		panic("nil effective mem")
+	}
+	if db.frozenMem != nil {
+		db.frozenMem.incref()
+	}
+	return db.mem, db.frozenMem
+}
+
+// Get effective memdb.
+func (db *DB) getEffectiveMem() *memDB {
+	db.memMu.RLock()
+	defer db.memMu.RUnlock()
+	if db.mem != nil {
+		db.mem.incref()
+	} else if !db.isClosed() {
+		panic("nil effective mem")
+	}
+	return db.mem
+}
+
+// Check whether we has frozen memdb.
+func (db *DB) hasFrozenMem() bool {
+	db.memMu.RLock()
+	defer db.memMu.RUnlock()
+	return db.frozenMem != nil
+}
+
+// Get frozen memdb.
+func (db *DB) getFrozenMem() *memDB {
+	db.memMu.RLock()
+	defer db.memMu.RUnlock()
+	if db.frozenMem != nil {
+		db.frozenMem.incref()
+	}
+	return db.frozenMem
+}
+
+// Drop frozen memdb; assume that frozen memdb isn't nil.
+func (db *DB) dropFrozenMem() {
+	db.memMu.Lock()
+	if err := db.s.stor.Remove(db.frozenJournalFd); err != nil {
+		db.logf("journal@remove removing @%d %q", db.frozenJournalFd.Num, err)
+	} else {
+		db.logf("journal@remove removed @%d", db.frozenJournalFd.Num)
+	}
+	db.frozenJournalFd = storage.FileDesc{}
+	db.frozenMem.decref()
+	db.frozenMem = nil
+	db.memMu.Unlock()
+}
+
+// Clear mems ptr; used by DB.Close().
+func (db *DB) clearMems() {
+	db.memMu.Lock()
+	db.mem = nil
+	db.frozenMem = nil
+	db.memMu.Unlock()
+}
+
+// Set closed flag; return true if not already closed.
+func (db *DB) setClosed() bool {
+	return atomic.CompareAndSwapUint32(&db.closed, 0, 1)
+}
+
+// Check whether DB was closed.
+func (db *DB) isClosed() bool {
+	return atomic.LoadUint32(&db.closed) != 0
+}
+
+// Check read ok status.
+func (db *DB) ok() error {
+	if db.isClosed() {
+		return ErrClosed
+	}
+	return nil
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/db_transaction.go b/vendor/github.com/syndtr/goleveldb/leveldb/db_transaction.go
new file mode 100644
index 000000000..1a0000188
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/db_transaction.go
@@ -0,0 +1,329 @@
+// Copyright (c) 2016, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package leveldb
+
+import (
+	"errors"
+	"sync"
+	"time"
+
+	"github.com/syndtr/goleveldb/leveldb/iterator"
+	"github.com/syndtr/goleveldb/leveldb/opt"
+	"github.com/syndtr/goleveldb/leveldb/util"
+)
+
+var errTransactionDone = errors.New("leveldb: transaction already closed")
+
+// Transaction is the transaction handle.
+type Transaction struct {
+	db        *DB
+	lk        sync.RWMutex
+	seq       uint64
+	mem       *memDB
+	tables    tFiles
+	ikScratch []byte
+	rec       sessionRecord
+	stats     cStatStaging
+	closed    bool
+}
+
+// Get gets the value for the given key. It returns ErrNotFound if the
+// DB does not contains the key.
+//
+// The returned slice is its own copy, it is safe to modify the contents
+// of the returned slice.
+// It is safe to modify the contents of the argument after Get returns.
+func (tr *Transaction) Get(key []byte, ro *opt.ReadOptions) ([]byte, error) {
+	tr.lk.RLock()
+	defer tr.lk.RUnlock()
+	if tr.closed {
+		return nil, errTransactionDone
+	}
+	return tr.db.get(tr.mem.DB, tr.tables, key, tr.seq, ro)
+}
+
+// Has returns true if the DB does contains the given key.
+//
+// It is safe to modify the contents of the argument after Has returns.
+func (tr *Transaction) Has(key []byte, ro *opt.ReadOptions) (bool, error) {
+	tr.lk.RLock()
+	defer tr.lk.RUnlock()
+	if tr.closed {
+		return false, errTransactionDone
+	}
+	return tr.db.has(tr.mem.DB, tr.tables, key, tr.seq, ro)
+}
+
+// NewIterator returns an iterator for the latest snapshot of the transaction.
+// The returned iterator is not safe for concurrent use, but it is safe to use
+// multiple iterators concurrently, with each in a dedicated goroutine.
+// It is also safe to use an iterator concurrently while writes to the
+// transaction. The resultant key/value pairs are guaranteed to be consistent.
+//
+// Slice allows slicing the iterator to only contains keys in the given
+// range. A nil Range.Start is treated as a key before all keys in the
+// DB. And a nil Range.Limit is treated as a key after all keys in
+// the DB.
+//
+// WARNING: Any slice returned by interator (e.g. slice returned by calling
+// Iterator.Key() or Iterator.Key() methods), its content should not be modified
+// unless noted otherwise.
+//
+// The iterator must be released after use, by calling Release method.
+//
+// Also read Iterator documentation of the leveldb/iterator package.
+func (tr *Transaction) NewIterator(slice *util.Range, ro *opt.ReadOptions) iterator.Iterator {
+	tr.lk.RLock()
+	defer tr.lk.RUnlock()
+	if tr.closed {
+		return iterator.NewEmptyIterator(errTransactionDone)
+	}
+	tr.mem.incref()
+	return tr.db.newIterator(tr.mem, tr.tables, tr.seq, slice, ro)
+}
+
+func (tr *Transaction) flush() error {
+	// Flush memdb.
+	if tr.mem.Len() != 0 {
+		tr.stats.startTimer()
+		iter := tr.mem.NewIterator(nil)
+		t, n, err := tr.db.s.tops.createFrom(iter)
+		iter.Release()
+		tr.stats.stopTimer()
+		if err != nil {
+			return err
+		}
+		if tr.mem.getref() == 1 {
+			tr.mem.Reset()
+		} else {
+			tr.mem.decref()
+			tr.mem = tr.db.mpoolGet(0)
+			tr.mem.incref()
+		}
+		tr.tables = append(tr.tables, t)
+		tr.rec.addTableFile(0, t)
+		tr.stats.write += t.size
+		tr.db.logf("transaction@flush created L0@%d N·%d S·%s %q:%q", t.fd.Num, n, shortenb(int(t.size)), t.imin, t.imax)
+	}
+	return nil
+}
+
+func (tr *Transaction) put(kt keyType, key, value []byte) error {
+	tr.ikScratch = makeInternalKey(tr.ikScratch, key, tr.seq+1, kt)
+	if tr.mem.Free() < len(tr.ikScratch)+len(value) {
+		if err := tr.flush(); err != nil {
+			return err
+		}
+	}
+	if err := tr.mem.Put(tr.ikScratch, value); err != nil {
+		return err
+	}
+	tr.seq++
+	return nil
+}
+
+// Put sets the value for the given key. It overwrites any previous value
+// for that key; a DB is not a multi-map.
+// Please note that the transaction is not compacted until committed, so if you
+// writes 10 same keys, then those 10 same keys are in the transaction.
+//
+// It is safe to modify the contents of the arguments after Put returns.
+func (tr *Transaction) Put(key, value []byte, wo *opt.WriteOptions) error {
+	tr.lk.Lock()
+	defer tr.lk.Unlock()
+	if tr.closed {
+		return errTransactionDone
+	}
+	return tr.put(keyTypeVal, key, value)
+}
+
+// Delete deletes the value for the given key.
+// Please note that the transaction is not compacted until committed, so if you
+// writes 10 same keys, then those 10 same keys are in the transaction.
+//
+// It is safe to modify the contents of the arguments after Delete returns.
+func (tr *Transaction) Delete(key []byte, wo *opt.WriteOptions) error {
+	tr.lk.Lock()
+	defer tr.lk.Unlock()
+	if tr.closed {
+		return errTransactionDone
+	}
+	return tr.put(keyTypeDel, key, nil)
+}
+
+// Write apply the given batch to the transaction. The batch will be applied
+// sequentially.
+// Please note that the transaction is not compacted until committed, so if you
+// writes 10 same keys, then those 10 same keys are in the transaction.
+//
+// It is safe to modify the contents of the arguments after Write returns.
+func (tr *Transaction) Write(b *Batch, wo *opt.WriteOptions) error {
+	if b == nil || b.Len() == 0 {
+		return nil
+	}
+
+	tr.lk.Lock()
+	defer tr.lk.Unlock()
+	if tr.closed {
+		return errTransactionDone
+	}
+	return b.replayInternal(func(i int, kt keyType, k, v []byte) error {
+		return tr.put(kt, k, v)
+	})
+}
+
+func (tr *Transaction) setDone() {
+	tr.closed = true
+	tr.db.tr = nil
+	tr.mem.decref()
+	<-tr.db.writeLockC
+}
+
+// Commit commits the transaction. If error is not nil, then the transaction is
+// not committed, it can then either be retried or discarded.
+//
+// Other methods should not be called after transaction has been committed.
+func (tr *Transaction) Commit() error {
+	if err := tr.db.ok(); err != nil {
+		return err
+	}
+
+	tr.lk.Lock()
+	defer tr.lk.Unlock()
+	if tr.closed {
+		return errTransactionDone
+	}
+	if err := tr.flush(); err != nil {
+		// Return error, lets user decide either to retry or discard
+		// transaction.
+		return err
+	}
+	if len(tr.tables) != 0 {
+		// Committing transaction.
+		tr.rec.setSeqNum(tr.seq)
+		tr.db.compCommitLk.Lock()
+		tr.stats.startTimer()
+		var cerr error
+		for retry := 0; retry < 3; retry++ {
+			cerr = tr.db.s.commit(&tr.rec)
+			if cerr != nil {
+				tr.db.logf("transaction@commit error R·%d %q", retry, cerr)
+				select {
+				case <-time.After(time.Second):
+				case <-tr.db.closeC:
+					tr.db.logf("transaction@commit exiting")
+					tr.db.compCommitLk.Unlock()
+					return cerr
+				}
+			} else {
+				// Success. Set db.seq.
+				tr.db.setSeq(tr.seq)
+				break
+			}
+		}
+		tr.stats.stopTimer()
+		if cerr != nil {
+			// Return error, lets user decide either to retry or discard
+			// transaction.
+			return cerr
+		}
+
+		// Update compaction stats. This is safe as long as we hold compCommitLk.
+		tr.db.compStats.addStat(0, &tr.stats)
+
+		// Trigger table auto-compaction.
+		tr.db.compTrigger(tr.db.tcompCmdC)
+		tr.db.compCommitLk.Unlock()
+
+		// Additionally, wait compaction when certain threshold reached.
+		// Ignore error, returns error only if transaction can't be committed.
+		tr.db.waitCompaction()
+	}
+	// Only mark as done if transaction committed successfully.
+	tr.setDone()
+	return nil
+}
+
+func (tr *Transaction) discard() {
+	// Discard transaction.
+	for _, t := range tr.tables {
+		tr.db.logf("transaction@discard @%d", t.fd.Num)
+		if err1 := tr.db.s.stor.Remove(t.fd); err1 == nil {
+			tr.db.s.reuseFileNum(t.fd.Num)
+		}
+	}
+}
+
+// Discard discards the transaction.
+//
+// Other methods should not be called after transaction has been discarded.
+func (tr *Transaction) Discard() {
+	tr.lk.Lock()
+	if !tr.closed {
+		tr.discard()
+		tr.setDone()
+	}
+	tr.lk.Unlock()
+}
+
+func (db *DB) waitCompaction() error {
+	if db.s.tLen(0) >= db.s.o.GetWriteL0PauseTrigger() {
+		return db.compTriggerWait(db.tcompCmdC)
+	}
+	return nil
+}
+
+// OpenTransaction opens an atomic DB transaction. Only one transaction can be
+// opened at a time. Subsequent call to Write and OpenTransaction will be blocked
+// until in-flight transaction is committed or discarded.
+// The returned transaction handle is safe for concurrent use.
+//
+// Transaction is expensive and can overwhelm compaction, especially if
+// transaction size is small. Use with caution.
+//
+// The transaction must be closed once done, either by committing or discarding
+// the transaction.
+// Closing the DB will discard open transaction.
+func (db *DB) OpenTransaction() (*Transaction, error) {
+	if err := db.ok(); err != nil {
+		return nil, err
+	}
+
+	// The write happen synchronously.
+	select {
+	case db.writeLockC <- struct{}{}:
+	case err := <-db.compPerErrC:
+		return nil, err
+	case <-db.closeC:
+		return nil, ErrClosed
+	}
+
+	if db.tr != nil {
+		panic("leveldb: has open transaction")
+	}
+
+	// Flush current memdb.
+	if db.mem != nil && db.mem.Len() != 0 {
+		if _, err := db.rotateMem(0, true); err != nil {
+			return nil, err
+		}
+	}
+
+	// Wait compaction when certain threshold reached.
+	if err := db.waitCompaction(); err != nil {
+		return nil, err
+	}
+
+	tr := &Transaction{
+		db:  db,
+		seq: db.seq,
+		mem: db.mpoolGet(0),
+	}
+	tr.mem.incref()
+	db.tr = tr
+	return tr, nil
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/db_util.go b/vendor/github.com/syndtr/goleveldb/leveldb/db_util.go
new file mode 100644
index 000000000..3f0654894
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/db_util.go
@@ -0,0 +1,102 @@
+// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package leveldb
+
+import (
+	"github.com/syndtr/goleveldb/leveldb/errors"
+	"github.com/syndtr/goleveldb/leveldb/iterator"
+	"github.com/syndtr/goleveldb/leveldb/opt"
+	"github.com/syndtr/goleveldb/leveldb/storage"
+	"github.com/syndtr/goleveldb/leveldb/util"
+)
+
+// Reader is the interface that wraps basic Get and NewIterator methods.
+// This interface implemented by both DB and Snapshot.
+type Reader interface {
+	Get(key []byte, ro *opt.ReadOptions) (value []byte, err error)
+	NewIterator(slice *util.Range, ro *opt.ReadOptions) iterator.Iterator
+}
+
+// Sizes is list of size.
+type Sizes []int64
+
+// Sum returns sum of the sizes.
+func (sizes Sizes) Sum() int64 {
+	var sum int64
+	for _, size := range sizes {
+		sum += size
+	}
+	return sum
+}
+
+// Logging.
+func (db *DB) log(v ...interface{})                 { db.s.log(v...) }
+func (db *DB) logf(format string, v ...interface{}) { db.s.logf(format, v...) }
+
+// Check and clean files.
+func (db *DB) checkAndCleanFiles() error {
+	v := db.s.version()
+	defer v.release()
+
+	tmap := make(map[int64]bool)
+	for _, tables := range v.levels {
+		for _, t := range tables {
+			tmap[t.fd.Num] = false
+		}
+	}
+
+	fds, err := db.s.stor.List(storage.TypeAll)
+	if err != nil {
+		return err
+	}
+
+	var nt int
+	var rem []storage.FileDesc
+	for _, fd := range fds {
+		keep := true
+		switch fd.Type {
+		case storage.TypeManifest:
+			keep = fd.Num >= db.s.manifestFd.Num
+		case storage.TypeJournal:
+			if !db.frozenJournalFd.Zero() {
+				keep = fd.Num >= db.frozenJournalFd.Num
+			} else {
+				keep = fd.Num >= db.journalFd.Num
+			}
+		case storage.TypeTable:
+			_, keep = tmap[fd.Num]
+			if keep {
+				tmap[fd.Num] = true
+				nt++
+			}
+		}
+
+		if !keep {
+			rem = append(rem, fd)
+		}
+	}
+
+	if nt != len(tmap) {
+		var mfds []storage.FileDesc
+		for num, present := range tmap {
+			if !present {
+				mfds = append(mfds, storage.FileDesc{Type: storage.TypeTable, Num: num})
+				db.logf("db@janitor table missing @%d", num)
+			}
+		}
+		return errors.NewErrCorrupted(storage.FileDesc{}, &errors.ErrMissingFiles{Fds: mfds})
+	}
+
+	db.logf("db@janitor F·%d G·%d", len(fds), len(rem))
+	for _, fd := range rem {
+		db.logf("db@janitor removing %s-%d", fd.Type, fd.Num)
+		if err := db.s.stor.Remove(fd); err != nil {
+			return err
+		}
+	}
+	return nil
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/db_write.go b/vendor/github.com/syndtr/goleveldb/leveldb/db_write.go
new file mode 100644
index 000000000..db0c1bece
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/db_write.go
@@ -0,0 +1,464 @@
+// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package leveldb
+
+import (
+	"sync/atomic"
+	"time"
+
+	"github.com/syndtr/goleveldb/leveldb/memdb"
+	"github.com/syndtr/goleveldb/leveldb/opt"
+	"github.com/syndtr/goleveldb/leveldb/util"
+)
+
+func (db *DB) writeJournal(batches []*Batch, seq uint64, sync bool) error {
+	wr, err := db.journal.Next()
+	if err != nil {
+		return err
+	}
+	if err := writeBatchesWithHeader(wr, batches, seq); err != nil {
+		return err
+	}
+	if err := db.journal.Flush(); err != nil {
+		return err
+	}
+	if sync {
+		return db.journalWriter.Sync()
+	}
+	return nil
+}
+
+func (db *DB) rotateMem(n int, wait bool) (mem *memDB, err error) {
+	retryLimit := 3
+retry:
+	// Wait for pending memdb compaction.
+	err = db.compTriggerWait(db.mcompCmdC)
+	if err != nil {
+		return
+	}
+	retryLimit--
+
+	// Create new memdb and journal.
+	mem, err = db.newMem(n)
+	if err != nil {
+		if err == errHasFrozenMem {
+			if retryLimit <= 0 {
+				panic("BUG: still has frozen memdb")
+			}
+			goto retry
+		}
+		return
+	}
+
+	// Schedule memdb compaction.
+	if wait {
+		err = db.compTriggerWait(db.mcompCmdC)
+	} else {
+		db.compTrigger(db.mcompCmdC)
+	}
+	return
+}
+
+func (db *DB) flush(n int) (mdb *memDB, mdbFree int, err error) {
+	delayed := false
+	slowdownTrigger := db.s.o.GetWriteL0SlowdownTrigger()
+	pauseTrigger := db.s.o.GetWriteL0PauseTrigger()
+	flush := func() (retry bool) {
+		mdb = db.getEffectiveMem()
+		if mdb == nil {
+			err = ErrClosed
+			return false
+		}
+		defer func() {
+			if retry {
+				mdb.decref()
+				mdb = nil
+			}
+		}()
+		tLen := db.s.tLen(0)
+		mdbFree = mdb.Free()
+		switch {
+		case tLen >= slowdownTrigger && !delayed:
+			delayed = true
+			time.Sleep(time.Millisecond)
+		case mdbFree >= n:
+			return false
+		case tLen >= pauseTrigger:
+			delayed = true
+			// Set the write paused flag explicitly.
+			atomic.StoreInt32(&db.inWritePaused, 1)
+			err = db.compTriggerWait(db.tcompCmdC)
+			// Unset the write paused flag.
+			atomic.StoreInt32(&db.inWritePaused, 0)
+			if err != nil {
+				return false
+			}
+		default:
+			// Allow memdb to grow if it has no entry.
+			if mdb.Len() == 0 {
+				mdbFree = n
+			} else {
+				mdb.decref()
+				mdb, err = db.rotateMem(n, false)
+				if err == nil {
+					mdbFree = mdb.Free()
+				} else {
+					mdbFree = 0
+				}
+			}
+			return false
+		}
+		return true
+	}
+	start := time.Now()
+	for flush() {
+	}
+	if delayed {
+		db.writeDelay += time.Since(start)
+		db.writeDelayN++
+	} else if db.writeDelayN > 0 {
+		db.logf("db@write was delayed N·%d T·%v", db.writeDelayN, db.writeDelay)
+		atomic.AddInt32(&db.cWriteDelayN, int32(db.writeDelayN))
+		atomic.AddInt64(&db.cWriteDelay, int64(db.writeDelay))
+		db.writeDelay = 0
+		db.writeDelayN = 0
+	}
+	return
+}
+
+type writeMerge struct {
+	sync       bool
+	batch      *Batch
+	keyType    keyType
+	key, value []byte
+}
+
+func (db *DB) unlockWrite(overflow bool, merged int, err error) {
+	for i := 0; i < merged; i++ {
+		db.writeAckC <- err
+	}
+	if overflow {
+		// Pass lock to the next write (that failed to merge).
+		db.writeMergedC <- false
+	} else {
+		// Release lock.
+		<-db.writeLockC
+	}
+}
+
+// ourBatch is batch that we can modify.
+func (db *DB) writeLocked(batch, ourBatch *Batch, merge, sync bool) error {
+	// Try to flush memdb. This method would also trying to throttle writes
+	// if it is too fast and compaction cannot catch-up.
+	mdb, mdbFree, err := db.flush(batch.internalLen)
+	if err != nil {
+		db.unlockWrite(false, 0, err)
+		return err
+	}
+	defer mdb.decref()
+
+	var (
+		overflow bool
+		merged   int
+		batches  = []*Batch{batch}
+	)
+
+	if merge {
+		// Merge limit.
+		var mergeLimit int
+		if batch.internalLen > 128<<10 {
+			mergeLimit = (1 << 20) - batch.internalLen
+		} else {
+			mergeLimit = 128 << 10
+		}
+		mergeCap := mdbFree - batch.internalLen
+		if mergeLimit > mergeCap {
+			mergeLimit = mergeCap
+		}
+
+	merge:
+		for mergeLimit > 0 {
+			select {
+			case incoming := <-db.writeMergeC:
+				if incoming.batch != nil {
+					// Merge batch.
+					if incoming.batch.internalLen > mergeLimit {
+						overflow = true
+						break merge
+					}
+					batches = append(batches, incoming.batch)
+					mergeLimit -= incoming.batch.internalLen
+				} else {
+					// Merge put.
+					internalLen := len(incoming.key) + len(incoming.value) + 8
+					if internalLen > mergeLimit {
+						overflow = true
+						break merge
+					}
+					if ourBatch == nil {
+						ourBatch = db.batchPool.Get().(*Batch)
+						ourBatch.Reset()
+						batches = append(batches, ourBatch)
+					}
+					// We can use same batch since concurrent write doesn't
+					// guarantee write order.
+					ourBatch.appendRec(incoming.keyType, incoming.key, incoming.value)
+					mergeLimit -= internalLen
+				}
+				sync = sync || incoming.sync
+				merged++
+				db.writeMergedC <- true
+
+			default:
+				break merge
+			}
+		}
+	}
+
+	// Release ourBatch if any.
+	if ourBatch != nil {
+		defer db.batchPool.Put(ourBatch)
+	}
+
+	// Seq number.
+	seq := db.seq + 1
+
+	// Write journal.
+	if err := db.writeJournal(batches, seq, sync); err != nil {
+		db.unlockWrite(overflow, merged, err)
+		return err
+	}
+
+	// Put batches.
+	for _, batch := range batches {
+		if err := batch.putMem(seq, mdb.DB); err != nil {
+			panic(err)
+		}
+		seq += uint64(batch.Len())
+	}
+
+	// Incr seq number.
+	db.addSeq(uint64(batchesLen(batches)))
+
+	// Rotate memdb if it's reach the threshold.
+	if batch.internalLen >= mdbFree {
+		db.rotateMem(0, false)
+	}
+
+	db.unlockWrite(overflow, merged, nil)
+	return nil
+}
+
+// Write apply the given batch to the DB. The batch records will be applied
+// sequentially. Write might be used concurrently, when used concurrently and
+// batch is small enough, write will try to merge the batches. Set NoWriteMerge
+// option to true to disable write merge.
+//
+// It is safe to modify the contents of the arguments after Write returns but
+// not before. Write will not modify content of the batch.
+func (db *DB) Write(batch *Batch, wo *opt.WriteOptions) error {
+	if err := db.ok(); err != nil || batch == nil || batch.Len() == 0 {
+		return err
+	}
+
+	// If the batch size is larger than write buffer, it may justified to write
+	// using transaction instead. Using transaction the batch will be written
+	// into tables directly, skipping the journaling.
+	if batch.internalLen > db.s.o.GetWriteBuffer() && !db.s.o.GetDisableLargeBatchTransaction() {
+		tr, err := db.OpenTransaction()
+		if err != nil {
+			return err
+		}
+		if err := tr.Write(batch, wo); err != nil {
+			tr.Discard()
+			return err
+		}
+		return tr.Commit()
+	}
+
+	merge := !wo.GetNoWriteMerge() && !db.s.o.GetNoWriteMerge()
+	sync := wo.GetSync() && !db.s.o.GetNoSync()
+
+	// Acquire write lock.
+	if merge {
+		select {
+		case db.writeMergeC <- writeMerge{sync: sync, batch: batch}:
+			if <-db.writeMergedC {
+				// Write is merged.
+				return <-db.writeAckC
+			}
+			// Write is not merged, the write lock is handed to us. Continue.
+		case db.writeLockC <- struct{}{}:
+			// Write lock acquired.
+		case err := <-db.compPerErrC:
+			// Compaction error.
+			return err
+		case <-db.closeC:
+			// Closed
+			return ErrClosed
+		}
+	} else {
+		select {
+		case db.writeLockC <- struct{}{}:
+			// Write lock acquired.
+		case err := <-db.compPerErrC:
+			// Compaction error.
+			return err
+		case <-db.closeC:
+			// Closed
+			return ErrClosed
+		}
+	}
+
+	return db.writeLocked(batch, nil, merge, sync)
+}
+
+func (db *DB) putRec(kt keyType, key, value []byte, wo *opt.WriteOptions) error {
+	if err := db.ok(); err != nil {
+		return err
+	}
+
+	merge := !wo.GetNoWriteMerge() && !db.s.o.GetNoWriteMerge()
+	sync := wo.GetSync() && !db.s.o.GetNoSync()
+
+	// Acquire write lock.
+	if merge {
+		select {
+		case db.writeMergeC <- writeMerge{sync: sync, keyType: kt, key: key, value: value}:
+			if <-db.writeMergedC {
+				// Write is merged.
+				return <-db.writeAckC
+			}
+			// Write is not merged, the write lock is handed to us. Continue.
+		case db.writeLockC <- struct{}{}:
+			// Write lock acquired.
+		case err := <-db.compPerErrC:
+			// Compaction error.
+			return err
+		case <-db.closeC:
+			// Closed
+			return ErrClosed
+		}
+	} else {
+		select {
+		case db.writeLockC <- struct{}{}:
+			// Write lock acquired.
+		case err := <-db.compPerErrC:
+			// Compaction error.
+			return err
+		case <-db.closeC:
+			// Closed
+			return ErrClosed
+		}
+	}
+
+	batch := db.batchPool.Get().(*Batch)
+	batch.Reset()
+	batch.appendRec(kt, key, value)
+	return db.writeLocked(batch, batch, merge, sync)
+}
+
+// Put sets the value for the given key. It overwrites any previous value
+// for that key; a DB is not a multi-map. Write merge also applies for Put, see
+// Write.
+//
+// It is safe to modify the contents of the arguments after Put returns but not
+// before.
+func (db *DB) Put(key, value []byte, wo *opt.WriteOptions) error {
+	return db.putRec(keyTypeVal, key, value, wo)
+}
+
+// Delete deletes the value for the given key. Delete will not returns error if
+// key doesn't exist. Write merge also applies for Delete, see Write.
+//
+// It is safe to modify the contents of the arguments after Delete returns but
+// not before.
+func (db *DB) Delete(key []byte, wo *opt.WriteOptions) error {
+	return db.putRec(keyTypeDel, key, nil, wo)
+}
+
+func isMemOverlaps(icmp *iComparer, mem *memdb.DB, min, max []byte) bool {
+	iter := mem.NewIterator(nil)
+	defer iter.Release()
+	return (max == nil || (iter.First() && icmp.uCompare(max, internalKey(iter.Key()).ukey()) >= 0)) &&
+		(min == nil || (iter.Last() && icmp.uCompare(min, internalKey(iter.Key()).ukey()) <= 0))
+}
+
+// CompactRange compacts the underlying DB for the given key range.
+// In particular, deleted and overwritten versions are discarded,
+// and the data is rearranged to reduce the cost of operations
+// needed to access the data. This operation should typically only
+// be invoked by users who understand the underlying implementation.
+//
+// A nil Range.Start is treated as a key before all keys in the DB.
+// And a nil Range.Limit is treated as a key after all keys in the DB.
+// Therefore if both is nil then it will compact entire DB.
+func (db *DB) CompactRange(r util.Range) error {
+	if err := db.ok(); err != nil {
+		return err
+	}
+
+	// Lock writer.
+	select {
+	case db.writeLockC <- struct{}{}:
+	case err := <-db.compPerErrC:
+		return err
+	case <-db.closeC:
+		return ErrClosed
+	}
+
+	// Check for overlaps in memdb.
+	mdb := db.getEffectiveMem()
+	if mdb == nil {
+		return ErrClosed
+	}
+	defer mdb.decref()
+	if isMemOverlaps(db.s.icmp, mdb.DB, r.Start, r.Limit) {
+		// Memdb compaction.
+		if _, err := db.rotateMem(0, false); err != nil {
+			<-db.writeLockC
+			return err
+		}
+		<-db.writeLockC
+		if err := db.compTriggerWait(db.mcompCmdC); err != nil {
+			return err
+		}
+	} else {
+		<-db.writeLockC
+	}
+
+	// Table compaction.
+	return db.compTriggerRange(db.tcompCmdC, -1, r.Start, r.Limit)
+}
+
+// SetReadOnly makes DB read-only. It will stay read-only until reopened.
+func (db *DB) SetReadOnly() error {
+	if err := db.ok(); err != nil {
+		return err
+	}
+
+	// Lock writer.
+	select {
+	case db.writeLockC <- struct{}{}:
+		db.compWriteLocking = true
+	case err := <-db.compPerErrC:
+		return err
+	case <-db.closeC:
+		return ErrClosed
+	}
+
+	// Set compaction read-only.
+	select {
+	case db.compErrSetC <- ErrReadOnly:
+	case perr := <-db.compPerErrC:
+		return perr
+	case <-db.closeC:
+		return ErrClosed
+	}
+
+	return nil
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/doc.go b/vendor/github.com/syndtr/goleveldb/leveldb/doc.go
new file mode 100644
index 000000000..be768e573
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/doc.go
@@ -0,0 +1,92 @@
+// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Package leveldb provides implementation of LevelDB key/value database.
+//
+// Create or open a database:
+//
+//	// The returned DB instance is safe for concurrent use. Which mean that all
+//	// DB's methods may be called concurrently from multiple goroutine.
+//	db, err := leveldb.OpenFile("path/to/db", nil)
+//	...
+//	defer db.Close()
+//	...
+//
+// Read or modify the database content:
+//
+//	// Remember that the contents of the returned slice should not be modified.
+//	data, err := db.Get([]byte("key"), nil)
+//	...
+//	err = db.Put([]byte("key"), []byte("value"), nil)
+//	...
+//	err = db.Delete([]byte("key"), nil)
+//	...
+//
+// Iterate over database content:
+//
+//	iter := db.NewIterator(nil, nil)
+//	for iter.Next() {
+//		// Remember that the contents of the returned slice should not be modified, and
+//		// only valid until the next call to Next.
+//		key := iter.Key()
+//		value := iter.Value()
+//		...
+//	}
+//	iter.Release()
+//	err = iter.Error()
+//	...
+//
+// Iterate over subset of database content with a particular prefix:
+//	iter := db.NewIterator(util.BytesPrefix([]byte("foo-")), nil)
+//	for iter.Next() {
+//		// Use key/value.
+//		...
+//	}
+//	iter.Release()
+//	err = iter.Error()
+//	...
+//
+// Seek-then-Iterate:
+//
+// 	iter := db.NewIterator(nil, nil)
+// 	for ok := iter.Seek(key); ok; ok = iter.Next() {
+// 		// Use key/value.
+// 		...
+// 	}
+// 	iter.Release()
+// 	err = iter.Error()
+// 	...
+//
+// Iterate over subset of database content:
+//
+// 	iter := db.NewIterator(&util.Range{Start: []byte("foo"), Limit: []byte("xoo")}, nil)
+// 	for iter.Next() {
+// 		// Use key/value.
+// 		...
+// 	}
+// 	iter.Release()
+// 	err = iter.Error()
+// 	...
+//
+// Batch writes:
+//
+//	batch := new(leveldb.Batch)
+//	batch.Put([]byte("foo"), []byte("value"))
+//	batch.Put([]byte("bar"), []byte("another value"))
+//	batch.Delete([]byte("baz"))
+//	err = db.Write(batch, nil)
+//	...
+//
+// Use bloom filter:
+//
+//	o := &opt.Options{
+//		Filter: filter.NewBloomFilter(10),
+//	}
+//	db, err := leveldb.OpenFile("path/to/db", o)
+//	...
+//	defer db.Close()
+//	...
+package leveldb
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/errors.go b/vendor/github.com/syndtr/goleveldb/leveldb/errors.go
new file mode 100644
index 000000000..de2649812
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/errors.go
@@ -0,0 +1,20 @@
+// Copyright (c) 2014, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package leveldb
+
+import (
+	"github.com/syndtr/goleveldb/leveldb/errors"
+)
+
+// Common errors.
+var (
+	ErrNotFound         = errors.ErrNotFound
+	ErrReadOnly         = errors.New("leveldb: read-only mode")
+	ErrSnapshotReleased = errors.New("leveldb: snapshot released")
+	ErrIterReleased     = errors.New("leveldb: iterator released")
+	ErrClosed           = errors.New("leveldb: closed")
+)
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/errors/errors.go b/vendor/github.com/syndtr/goleveldb/leveldb/errors/errors.go
new file mode 100644
index 000000000..8d6146b6f
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/errors/errors.go
@@ -0,0 +1,78 @@
+// Copyright (c) 2014, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Package errors provides common error types used throughout leveldb.
+package errors
+
+import (
+	"errors"
+	"fmt"
+
+	"github.com/syndtr/goleveldb/leveldb/storage"
+	"github.com/syndtr/goleveldb/leveldb/util"
+)
+
+// Common errors.
+var (
+	ErrNotFound    = New("leveldb: not found")
+	ErrReleased    = util.ErrReleased
+	ErrHasReleaser = util.ErrHasReleaser
+)
+
+// New returns an error that formats as the given text.
+func New(text string) error {
+	return errors.New(text)
+}
+
+// ErrCorrupted is the type that wraps errors that indicate corruption in
+// the database.
+type ErrCorrupted struct {
+	Fd  storage.FileDesc
+	Err error
+}
+
+func (e *ErrCorrupted) Error() string {
+	if !e.Fd.Zero() {
+		return fmt.Sprintf("%v [file=%v]", e.Err, e.Fd)
+	}
+	return e.Err.Error()
+}
+
+// NewErrCorrupted creates new ErrCorrupted error.
+func NewErrCorrupted(fd storage.FileDesc, err error) error {
+	return &ErrCorrupted{fd, err}
+}
+
+// IsCorrupted returns a boolean indicating whether the error is indicating
+// a corruption.
+func IsCorrupted(err error) bool {
+	switch err.(type) {
+	case *ErrCorrupted:
+		return true
+	case *storage.ErrCorrupted:
+		return true
+	}
+	return false
+}
+
+// ErrMissingFiles is the type that indicating a corruption due to missing
+// files. ErrMissingFiles always wrapped with ErrCorrupted.
+type ErrMissingFiles struct {
+	Fds []storage.FileDesc
+}
+
+func (e *ErrMissingFiles) Error() string { return "file missing" }
+
+// SetFd sets 'file info' of the given error with the given file.
+// Currently only ErrCorrupted is supported, otherwise will do nothing.
+func SetFd(err error, fd storage.FileDesc) error {
+	switch x := err.(type) {
+	case *ErrCorrupted:
+		x.Fd = fd
+		return x
+	}
+	return err
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/filter.go b/vendor/github.com/syndtr/goleveldb/leveldb/filter.go
new file mode 100644
index 000000000..e961e420d
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/filter.go
@@ -0,0 +1,31 @@
+// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package leveldb
+
+import (
+	"github.com/syndtr/goleveldb/leveldb/filter"
+)
+
+type iFilter struct {
+	filter.Filter
+}
+
+func (f iFilter) Contains(filter, key []byte) bool {
+	return f.Filter.Contains(filter, internalKey(key).ukey())
+}
+
+func (f iFilter) NewGenerator() filter.FilterGenerator {
+	return iFilterGenerator{f.Filter.NewGenerator()}
+}
+
+type iFilterGenerator struct {
+	filter.FilterGenerator
+}
+
+func (g iFilterGenerator) Add(key []byte) {
+	g.FilterGenerator.Add(internalKey(key).ukey())
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/filter/bloom.go b/vendor/github.com/syndtr/goleveldb/leveldb/filter/bloom.go
new file mode 100644
index 000000000..bab0e9970
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/filter/bloom.go
@@ -0,0 +1,116 @@
+// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package filter
+
+import (
+	"github.com/syndtr/goleveldb/leveldb/util"
+)
+
+func bloomHash(key []byte) uint32 {
+	return util.Hash(key, 0xbc9f1d34)
+}
+
+type bloomFilter int
+
+// The bloom filter serializes its parameters and is backward compatible
+// with respect to them. Therefor, its parameters are not added to its
+// name.
+func (bloomFilter) Name() string {
+	return "leveldb.BuiltinBloomFilter"
+}
+
+func (f bloomFilter) Contains(filter, key []byte) bool {
+	nBytes := len(filter) - 1
+	if nBytes < 1 {
+		return false
+	}
+	nBits := uint32(nBytes * 8)
+
+	// Use the encoded k so that we can read filters generated by
+	// bloom filters created using different parameters.
+	k := filter[nBytes]
+	if k > 30 {
+		// Reserved for potentially new encodings for short bloom filters.
+		// Consider it a match.
+		return true
+	}
+
+	kh := bloomHash(key)
+	delta := (kh >> 17) | (kh << 15) // Rotate right 17 bits
+	for j := uint8(0); j < k; j++ {
+		bitpos := kh % nBits
+		if (uint32(filter[bitpos/8]) & (1 << (bitpos % 8))) == 0 {
+			return false
+		}
+		kh += delta
+	}
+	return true
+}
+
+func (f bloomFilter) NewGenerator() FilterGenerator {
+	// Round down to reduce probing cost a little bit.
+	k := uint8(f * 69 / 100) // 0.69 =~ ln(2)
+	if k < 1 {
+		k = 1
+	} else if k > 30 {
+		k = 30
+	}
+	return &bloomFilterGenerator{
+		n: int(f),
+		k: k,
+	}
+}
+
+type bloomFilterGenerator struct {
+	n int
+	k uint8
+
+	keyHashes []uint32
+}
+
+func (g *bloomFilterGenerator) Add(key []byte) {
+	// Use double-hashing to generate a sequence of hash values.
+	// See analysis in [Kirsch,Mitzenmacher 2006].
+	g.keyHashes = append(g.keyHashes, bloomHash(key))
+}
+
+func (g *bloomFilterGenerator) Generate(b Buffer) {
+	// Compute bloom filter size (in both bits and bytes)
+	nBits := uint32(len(g.keyHashes) * g.n)
+	// For small n, we can see a very high false positive rate.  Fix it
+	// by enforcing a minimum bloom filter length.
+	if nBits < 64 {
+		nBits = 64
+	}
+	nBytes := (nBits + 7) / 8
+	nBits = nBytes * 8
+
+	dest := b.Alloc(int(nBytes) + 1)
+	dest[nBytes] = g.k
+	for _, kh := range g.keyHashes {
+		delta := (kh >> 17) | (kh << 15) // Rotate right 17 bits
+		for j := uint8(0); j < g.k; j++ {
+			bitpos := kh % nBits
+			dest[bitpos/8] |= (1 << (bitpos % 8))
+			kh += delta
+		}
+	}
+
+	g.keyHashes = g.keyHashes[:0]
+}
+
+// NewBloomFilter creates a new initialized bloom filter for given
+// bitsPerKey.
+//
+// Since bitsPerKey is persisted individually for each bloom filter
+// serialization, bloom filters are backwards compatible with respect to
+// changing bitsPerKey. This means that no big performance penalty will
+// be experienced when changing the parameter. See documentation for
+// opt.Options.Filter for more information.
+func NewBloomFilter(bitsPerKey int) Filter {
+	return bloomFilter(bitsPerKey)
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/filter/filter.go b/vendor/github.com/syndtr/goleveldb/leveldb/filter/filter.go
new file mode 100644
index 000000000..7a925c5a8
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/filter/filter.go
@@ -0,0 +1,60 @@
+// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Package filter provides interface and implementation of probabilistic
+// data structure.
+//
+// The filter is resposible for creating small filter from a set of keys.
+// These filter will then used to test whether a key is a member of the set.
+// In many cases, a filter can cut down the number of disk seeks from a
+// handful to a single disk seek per DB.Get call.
+package filter
+
+// Buffer is the interface that wraps basic Alloc, Write and WriteByte methods.
+type Buffer interface {
+	// Alloc allocs n bytes of slice from the buffer. This also advancing
+	// write offset.
+	Alloc(n int) []byte
+
+	// Write appends the contents of p to the buffer.
+	Write(p []byte) (n int, err error)
+
+	// WriteByte appends the byte c to the buffer.
+	WriteByte(c byte) error
+}
+
+// Filter is the filter.
+type Filter interface {
+	// Name returns the name of this policy.
+	//
+	// Note that if the filter encoding changes in an incompatible way,
+	// the name returned by this method must be changed. Otherwise, old
+	// incompatible filters may be passed to methods of this type.
+	Name() string
+
+	// NewGenerator creates a new filter generator.
+	NewGenerator() FilterGenerator
+
+	// Contains returns true if the filter contains the given key.
+	//
+	// The filter are filters generated by the filter generator.
+	Contains(filter, key []byte) bool
+}
+
+// FilterGenerator is the filter generator.
+type FilterGenerator interface {
+	// Add adds a key to the filter generator.
+	//
+	// The key may become invalid after call to this method end, therefor
+	// key must be copied if implementation require keeping key for later
+	// use. The key should not modified directly, doing so may cause
+	// undefined results.
+	Add(key []byte)
+
+	// Generate generates filters based on keys passed so far. After call
+	// to Generate the filter generator maybe resetted, depends on implementation.
+	Generate(b Buffer)
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/iterator/array_iter.go b/vendor/github.com/syndtr/goleveldb/leveldb/iterator/array_iter.go
new file mode 100644
index 000000000..a23ab05f7
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/iterator/array_iter.go
@@ -0,0 +1,184 @@
+// Copyright (c) 2014, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package iterator
+
+import (
+	"github.com/syndtr/goleveldb/leveldb/util"
+)
+
+// BasicArray is the interface that wraps basic Len and Search method.
+type BasicArray interface {
+	// Len returns length of the array.
+	Len() int
+
+	// Search finds smallest index that point to a key that is greater
+	// than or equal to the given key.
+	Search(key []byte) int
+}
+
+// Array is the interface that wraps BasicArray and basic Index method.
+type Array interface {
+	BasicArray
+
+	// Index returns key/value pair with index of i.
+	Index(i int) (key, value []byte)
+}
+
+// Array is the interface that wraps BasicArray and basic Get method.
+type ArrayIndexer interface {
+	BasicArray
+
+	// Get returns a new data iterator with index of i.
+	Get(i int) Iterator
+}
+
+type basicArrayIterator struct {
+	util.BasicReleaser
+	array BasicArray
+	pos   int
+	err   error
+}
+
+func (i *basicArrayIterator) Valid() bool {
+	return i.pos >= 0 && i.pos < i.array.Len() && !i.Released()
+}
+
+func (i *basicArrayIterator) First() bool {
+	if i.Released() {
+		i.err = ErrIterReleased
+		return false
+	}
+
+	if i.array.Len() == 0 {
+		i.pos = -1
+		return false
+	}
+	i.pos = 0
+	return true
+}
+
+func (i *basicArrayIterator) Last() bool {
+	if i.Released() {
+		i.err = ErrIterReleased
+		return false
+	}
+
+	n := i.array.Len()
+	if n == 0 {
+		i.pos = 0
+		return false
+	}
+	i.pos = n - 1
+	return true
+}
+
+func (i *basicArrayIterator) Seek(key []byte) bool {
+	if i.Released() {
+		i.err = ErrIterReleased
+		return false
+	}
+
+	n := i.array.Len()
+	if n == 0 {
+		i.pos = 0
+		return false
+	}
+	i.pos = i.array.Search(key)
+	if i.pos >= n {
+		return false
+	}
+	return true
+}
+
+func (i *basicArrayIterator) Next() bool {
+	if i.Released() {
+		i.err = ErrIterReleased
+		return false
+	}
+
+	i.pos++
+	if n := i.array.Len(); i.pos >= n {
+		i.pos = n
+		return false
+	}
+	return true
+}
+
+func (i *basicArrayIterator) Prev() bool {
+	if i.Released() {
+		i.err = ErrIterReleased
+		return false
+	}
+
+	i.pos--
+	if i.pos < 0 {
+		i.pos = -1
+		return false
+	}
+	return true
+}
+
+func (i *basicArrayIterator) Error() error { return i.err }
+
+type arrayIterator struct {
+	basicArrayIterator
+	array      Array
+	pos        int
+	key, value []byte
+}
+
+func (i *arrayIterator) updateKV() {
+	if i.pos == i.basicArrayIterator.pos {
+		return
+	}
+	i.pos = i.basicArrayIterator.pos
+	if i.Valid() {
+		i.key, i.value = i.array.Index(i.pos)
+	} else {
+		i.key = nil
+		i.value = nil
+	}
+}
+
+func (i *arrayIterator) Key() []byte {
+	i.updateKV()
+	return i.key
+}
+
+func (i *arrayIterator) Value() []byte {
+	i.updateKV()
+	return i.value
+}
+
+type arrayIteratorIndexer struct {
+	basicArrayIterator
+	array ArrayIndexer
+}
+
+func (i *arrayIteratorIndexer) Get() Iterator {
+	if i.Valid() {
+		return i.array.Get(i.basicArrayIterator.pos)
+	}
+	return nil
+}
+
+// NewArrayIterator returns an iterator from the given array.
+func NewArrayIterator(array Array) Iterator {
+	return &arrayIterator{
+		basicArrayIterator: basicArrayIterator{array: array, pos: -1},
+		array:              array,
+		pos:                -1,
+	}
+}
+
+// NewArrayIndexer returns an index iterator from the given array.
+func NewArrayIndexer(array ArrayIndexer) IteratorIndexer {
+	return &arrayIteratorIndexer{
+		basicArrayIterator: basicArrayIterator{array: array, pos: -1},
+		array:              array,
+	}
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/iterator/indexed_iter.go b/vendor/github.com/syndtr/goleveldb/leveldb/iterator/indexed_iter.go
new file mode 100644
index 000000000..939adbb93
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/iterator/indexed_iter.go
@@ -0,0 +1,242 @@
+// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package iterator
+
+import (
+	"github.com/syndtr/goleveldb/leveldb/errors"
+	"github.com/syndtr/goleveldb/leveldb/util"
+)
+
+// IteratorIndexer is the interface that wraps CommonIterator and basic Get
+// method. IteratorIndexer provides index for indexed iterator.
+type IteratorIndexer interface {
+	CommonIterator
+
+	// Get returns a new data iterator for the current position, or nil if
+	// done.
+	Get() Iterator
+}
+
+type indexedIterator struct {
+	util.BasicReleaser
+	index  IteratorIndexer
+	strict bool
+
+	data   Iterator
+	err    error
+	errf   func(err error)
+	closed bool
+}
+
+func (i *indexedIterator) setData() {
+	if i.data != nil {
+		i.data.Release()
+	}
+	i.data = i.index.Get()
+}
+
+func (i *indexedIterator) clearData() {
+	if i.data != nil {
+		i.data.Release()
+	}
+	i.data = nil
+}
+
+func (i *indexedIterator) indexErr() {
+	if err := i.index.Error(); err != nil {
+		if i.errf != nil {
+			i.errf(err)
+		}
+		i.err = err
+	}
+}
+
+func (i *indexedIterator) dataErr() bool {
+	if err := i.data.Error(); err != nil {
+		if i.errf != nil {
+			i.errf(err)
+		}
+		if i.strict || !errors.IsCorrupted(err) {
+			i.err = err
+			return true
+		}
+	}
+	return false
+}
+
+func (i *indexedIterator) Valid() bool {
+	return i.data != nil && i.data.Valid()
+}
+
+func (i *indexedIterator) First() bool {
+	if i.err != nil {
+		return false
+	} else if i.Released() {
+		i.err = ErrIterReleased
+		return false
+	}
+
+	if !i.index.First() {
+		i.indexErr()
+		i.clearData()
+		return false
+	}
+	i.setData()
+	return i.Next()
+}
+
+func (i *indexedIterator) Last() bool {
+	if i.err != nil {
+		return false
+	} else if i.Released() {
+		i.err = ErrIterReleased
+		return false
+	}
+
+	if !i.index.Last() {
+		i.indexErr()
+		i.clearData()
+		return false
+	}
+	i.setData()
+	if !i.data.Last() {
+		if i.dataErr() {
+			return false
+		}
+		i.clearData()
+		return i.Prev()
+	}
+	return true
+}
+
+func (i *indexedIterator) Seek(key []byte) bool {
+	if i.err != nil {
+		return false
+	} else if i.Released() {
+		i.err = ErrIterReleased
+		return false
+	}
+
+	if !i.index.Seek(key) {
+		i.indexErr()
+		i.clearData()
+		return false
+	}
+	i.setData()
+	if !i.data.Seek(key) {
+		if i.dataErr() {
+			return false
+		}
+		i.clearData()
+		return i.Next()
+	}
+	return true
+}
+
+func (i *indexedIterator) Next() bool {
+	if i.err != nil {
+		return false
+	} else if i.Released() {
+		i.err = ErrIterReleased
+		return false
+	}
+
+	switch {
+	case i.data != nil && !i.data.Next():
+		if i.dataErr() {
+			return false
+		}
+		i.clearData()
+		fallthrough
+	case i.data == nil:
+		if !i.index.Next() {
+			i.indexErr()
+			return false
+		}
+		i.setData()
+		return i.Next()
+	}
+	return true
+}
+
+func (i *indexedIterator) Prev() bool {
+	if i.err != nil {
+		return false
+	} else if i.Released() {
+		i.err = ErrIterReleased
+		return false
+	}
+
+	switch {
+	case i.data != nil && !i.data.Prev():
+		if i.dataErr() {
+			return false
+		}
+		i.clearData()
+		fallthrough
+	case i.data == nil:
+		if !i.index.Prev() {
+			i.indexErr()
+			return false
+		}
+		i.setData()
+		if !i.data.Last() {
+			if i.dataErr() {
+				return false
+			}
+			i.clearData()
+			return i.Prev()
+		}
+	}
+	return true
+}
+
+func (i *indexedIterator) Key() []byte {
+	if i.data == nil {
+		return nil
+	}
+	return i.data.Key()
+}
+
+func (i *indexedIterator) Value() []byte {
+	if i.data == nil {
+		return nil
+	}
+	return i.data.Value()
+}
+
+func (i *indexedIterator) Release() {
+	i.clearData()
+	i.index.Release()
+	i.BasicReleaser.Release()
+}
+
+func (i *indexedIterator) Error() error {
+	if i.err != nil {
+		return i.err
+	}
+	if err := i.index.Error(); err != nil {
+		return err
+	}
+	return nil
+}
+
+func (i *indexedIterator) SetErrorCallback(f func(err error)) {
+	i.errf = f
+}
+
+// NewIndexedIterator returns an 'indexed iterator'. An index is iterator
+// that returns another iterator, a 'data iterator'. A 'data iterator' is the
+// iterator that contains actual key/value pairs.
+//
+// If strict is true the any 'corruption errors' (i.e errors.IsCorrupted(err) == true)
+// won't be ignored and will halt 'indexed iterator', otherwise the iterator will
+// continue to the next 'data iterator'. Corruption on 'index iterator' will not be
+// ignored and will halt the iterator.
+func NewIndexedIterator(index IteratorIndexer, strict bool) Iterator {
+	return &indexedIterator{index: index, strict: strict}
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/iterator/iter.go b/vendor/github.com/syndtr/goleveldb/leveldb/iterator/iter.go
new file mode 100644
index 000000000..96fb0f685
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/iterator/iter.go
@@ -0,0 +1,132 @@
+// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Package iterator provides interface and implementation to traverse over
+// contents of a database.
+package iterator
+
+import (
+	"errors"
+
+	"github.com/syndtr/goleveldb/leveldb/util"
+)
+
+var (
+	ErrIterReleased = errors.New("leveldb/iterator: iterator released")
+)
+
+// IteratorSeeker is the interface that wraps the 'seeks method'.
+type IteratorSeeker interface {
+	// First moves the iterator to the first key/value pair. If the iterator
+	// only contains one key/value pair then First and Last would moves
+	// to the same key/value pair.
+	// It returns whether such pair exist.
+	First() bool
+
+	// Last moves the iterator to the last key/value pair. If the iterator
+	// only contains one key/value pair then First and Last would moves
+	// to the same key/value pair.
+	// It returns whether such pair exist.
+	Last() bool
+
+	// Seek moves the iterator to the first key/value pair whose key is greater
+	// than or equal to the given key.
+	// It returns whether such pair exist.
+	//
+	// It is safe to modify the contents of the argument after Seek returns.
+	Seek(key []byte) bool
+
+	// Next moves the iterator to the next key/value pair.
+	// It returns false if the iterator is exhausted.
+	Next() bool
+
+	// Prev moves the iterator to the previous key/value pair.
+	// It returns false if the iterator is exhausted.
+	Prev() bool
+}
+
+// CommonIterator is the interface that wraps common iterator methods.
+type CommonIterator interface {
+	IteratorSeeker
+
+	// util.Releaser is the interface that wraps basic Release method.
+	// When called Release will releases any resources associated with the
+	// iterator.
+	util.Releaser
+
+	// util.ReleaseSetter is the interface that wraps the basic SetReleaser
+	// method.
+	util.ReleaseSetter
+
+	// TODO: Remove this when ready.
+	Valid() bool
+
+	// Error returns any accumulated error. Exhausting all the key/value pairs
+	// is not considered to be an error.
+	Error() error
+}
+
+// Iterator iterates over a DB's key/value pairs in key order.
+//
+// When encounter an error any 'seeks method' will return false and will
+// yield no key/value pairs. The error can be queried by calling the Error
+// method. Calling Release is still necessary.
+//
+// An iterator must be released after use, but it is not necessary to read
+// an iterator until exhaustion.
+// Also, an iterator is not necessarily safe for concurrent use, but it is
+// safe to use multiple iterators concurrently, with each in a dedicated
+// goroutine.
+type Iterator interface {
+	CommonIterator
+
+	// Key returns the key of the current key/value pair, or nil if done.
+	// The caller should not modify the contents of the returned slice, and
+	// its contents may change on the next call to any 'seeks method'.
+	Key() []byte
+
+	// Value returns the value of the current key/value pair, or nil if done.
+	// The caller should not modify the contents of the returned slice, and
+	// its contents may change on the next call to any 'seeks method'.
+	Value() []byte
+}
+
+// ErrorCallbackSetter is the interface that wraps basic SetErrorCallback
+// method.
+//
+// ErrorCallbackSetter implemented by indexed and merged iterator.
+type ErrorCallbackSetter interface {
+	// SetErrorCallback allows set an error callback of the corresponding
+	// iterator. Use nil to clear the callback.
+	SetErrorCallback(f func(err error))
+}
+
+type emptyIterator struct {
+	util.BasicReleaser
+	err error
+}
+
+func (i *emptyIterator) rErr() {
+	if i.err == nil && i.Released() {
+		i.err = ErrIterReleased
+	}
+}
+
+func (*emptyIterator) Valid() bool            { return false }
+func (i *emptyIterator) First() bool          { i.rErr(); return false }
+func (i *emptyIterator) Last() bool           { i.rErr(); return false }
+func (i *emptyIterator) Seek(key []byte) bool { i.rErr(); return false }
+func (i *emptyIterator) Next() bool           { i.rErr(); return false }
+func (i *emptyIterator) Prev() bool           { i.rErr(); return false }
+func (*emptyIterator) Key() []byte            { return nil }
+func (*emptyIterator) Value() []byte          { return nil }
+func (i *emptyIterator) Error() error         { return i.err }
+
+// NewEmptyIterator creates an empty iterator. The err parameter can be
+// nil, but if not nil the given err will be returned by Error method.
+func NewEmptyIterator(err error) Iterator {
+	return &emptyIterator{err: err}
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/iterator/merged_iter.go b/vendor/github.com/syndtr/goleveldb/leveldb/iterator/merged_iter.go
new file mode 100644
index 000000000..1a7e29df8
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/iterator/merged_iter.go
@@ -0,0 +1,304 @@
+// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package iterator
+
+import (
+	"github.com/syndtr/goleveldb/leveldb/comparer"
+	"github.com/syndtr/goleveldb/leveldb/errors"
+	"github.com/syndtr/goleveldb/leveldb/util"
+)
+
+type dir int
+
+const (
+	dirReleased dir = iota - 1
+	dirSOI
+	dirEOI
+	dirBackward
+	dirForward
+)
+
+type mergedIterator struct {
+	cmp    comparer.Comparer
+	iters  []Iterator
+	strict bool
+
+	keys     [][]byte
+	index    int
+	dir      dir
+	err      error
+	errf     func(err error)
+	releaser util.Releaser
+}
+
+func assertKey(key []byte) []byte {
+	if key == nil {
+		panic("leveldb/iterator: nil key")
+	}
+	return key
+}
+
+func (i *mergedIterator) iterErr(iter Iterator) bool {
+	if err := iter.Error(); err != nil {
+		if i.errf != nil {
+			i.errf(err)
+		}
+		if i.strict || !errors.IsCorrupted(err) {
+			i.err = err
+			return true
+		}
+	}
+	return false
+}
+
+func (i *mergedIterator) Valid() bool {
+	return i.err == nil && i.dir > dirEOI
+}
+
+func (i *mergedIterator) First() bool {
+	if i.err != nil {
+		return false
+	} else if i.dir == dirReleased {
+		i.err = ErrIterReleased
+		return false
+	}
+
+	for x, iter := range i.iters {
+		switch {
+		case iter.First():
+			i.keys[x] = assertKey(iter.Key())
+		case i.iterErr(iter):
+			return false
+		default:
+			i.keys[x] = nil
+		}
+	}
+	i.dir = dirSOI
+	return i.next()
+}
+
+func (i *mergedIterator) Last() bool {
+	if i.err != nil {
+		return false
+	} else if i.dir == dirReleased {
+		i.err = ErrIterReleased
+		return false
+	}
+
+	for x, iter := range i.iters {
+		switch {
+		case iter.Last():
+			i.keys[x] = assertKey(iter.Key())
+		case i.iterErr(iter):
+			return false
+		default:
+			i.keys[x] = nil
+		}
+	}
+	i.dir = dirEOI
+	return i.prev()
+}
+
+func (i *mergedIterator) Seek(key []byte) bool {
+	if i.err != nil {
+		return false
+	} else if i.dir == dirReleased {
+		i.err = ErrIterReleased
+		return false
+	}
+
+	for x, iter := range i.iters {
+		switch {
+		case iter.Seek(key):
+			i.keys[x] = assertKey(iter.Key())
+		case i.iterErr(iter):
+			return false
+		default:
+			i.keys[x] = nil
+		}
+	}
+	i.dir = dirSOI
+	return i.next()
+}
+
+func (i *mergedIterator) next() bool {
+	var key []byte
+	if i.dir == dirForward {
+		key = i.keys[i.index]
+	}
+	for x, tkey := range i.keys {
+		if tkey != nil && (key == nil || i.cmp.Compare(tkey, key) < 0) {
+			key = tkey
+			i.index = x
+		}
+	}
+	if key == nil {
+		i.dir = dirEOI
+		return false
+	}
+	i.dir = dirForward
+	return true
+}
+
+func (i *mergedIterator) Next() bool {
+	if i.dir == dirEOI || i.err != nil {
+		return false
+	} else if i.dir == dirReleased {
+		i.err = ErrIterReleased
+		return false
+	}
+
+	switch i.dir {
+	case dirSOI:
+		return i.First()
+	case dirBackward:
+		key := append([]byte{}, i.keys[i.index]...)
+		if !i.Seek(key) {
+			return false
+		}
+		return i.Next()
+	}
+
+	x := i.index
+	iter := i.iters[x]
+	switch {
+	case iter.Next():
+		i.keys[x] = assertKey(iter.Key())
+	case i.iterErr(iter):
+		return false
+	default:
+		i.keys[x] = nil
+	}
+	return i.next()
+}
+
+func (i *mergedIterator) prev() bool {
+	var key []byte
+	if i.dir == dirBackward {
+		key = i.keys[i.index]
+	}
+	for x, tkey := range i.keys {
+		if tkey != nil && (key == nil || i.cmp.Compare(tkey, key) > 0) {
+			key = tkey
+			i.index = x
+		}
+	}
+	if key == nil {
+		i.dir = dirSOI
+		return false
+	}
+	i.dir = dirBackward
+	return true
+}
+
+func (i *mergedIterator) Prev() bool {
+	if i.dir == dirSOI || i.err != nil {
+		return false
+	} else if i.dir == dirReleased {
+		i.err = ErrIterReleased
+		return false
+	}
+
+	switch i.dir {
+	case dirEOI:
+		return i.Last()
+	case dirForward:
+		key := append([]byte{}, i.keys[i.index]...)
+		for x, iter := range i.iters {
+			if x == i.index {
+				continue
+			}
+			seek := iter.Seek(key)
+			switch {
+			case seek && iter.Prev(), !seek && iter.Last():
+				i.keys[x] = assertKey(iter.Key())
+			case i.iterErr(iter):
+				return false
+			default:
+				i.keys[x] = nil
+			}
+		}
+	}
+
+	x := i.index
+	iter := i.iters[x]
+	switch {
+	case iter.Prev():
+		i.keys[x] = assertKey(iter.Key())
+	case i.iterErr(iter):
+		return false
+	default:
+		i.keys[x] = nil
+	}
+	return i.prev()
+}
+
+func (i *mergedIterator) Key() []byte {
+	if i.err != nil || i.dir <= dirEOI {
+		return nil
+	}
+	return i.keys[i.index]
+}
+
+func (i *mergedIterator) Value() []byte {
+	if i.err != nil || i.dir <= dirEOI {
+		return nil
+	}
+	return i.iters[i.index].Value()
+}
+
+func (i *mergedIterator) Release() {
+	if i.dir != dirReleased {
+		i.dir = dirReleased
+		for _, iter := range i.iters {
+			iter.Release()
+		}
+		i.iters = nil
+		i.keys = nil
+		if i.releaser != nil {
+			i.releaser.Release()
+			i.releaser = nil
+		}
+	}
+}
+
+func (i *mergedIterator) SetReleaser(releaser util.Releaser) {
+	if i.dir == dirReleased {
+		panic(util.ErrReleased)
+	}
+	if i.releaser != nil && releaser != nil {
+		panic(util.ErrHasReleaser)
+	}
+	i.releaser = releaser
+}
+
+func (i *mergedIterator) Error() error {
+	return i.err
+}
+
+func (i *mergedIterator) SetErrorCallback(f func(err error)) {
+	i.errf = f
+}
+
+// NewMergedIterator returns an iterator that merges its input. Walking the
+// resultant iterator will return all key/value pairs of all input iterators
+// in strictly increasing key order, as defined by cmp.
+// The input's key ranges may overlap, but there are assumed to be no duplicate
+// keys: if iters[i] contains a key k then iters[j] will not contain that key k.
+// None of the iters may be nil.
+//
+// If strict is true the any 'corruption errors' (i.e errors.IsCorrupted(err) == true)
+// won't be ignored and will halt 'merged iterator', otherwise the iterator will
+// continue to the next 'input iterator'.
+func NewMergedIterator(iters []Iterator, cmp comparer.Comparer, strict bool) Iterator {
+	return &mergedIterator{
+		iters:  iters,
+		cmp:    cmp,
+		strict: strict,
+		keys:   make([][]byte, len(iters)),
+	}
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/journal/journal.go b/vendor/github.com/syndtr/goleveldb/leveldb/journal/journal.go
new file mode 100644
index 000000000..d094c3d0f
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/journal/journal.go
@@ -0,0 +1,524 @@
+// Copyright 2011 The LevelDB-Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Taken from: https://code.google.com/p/leveldb-go/source/browse/leveldb/record/record.go?r=1d5ccbe03246da926391ee12d1c6caae054ff4b0
+// License, authors and contributors informations can be found at bellow URLs respectively:
+// 	https://code.google.com/p/leveldb-go/source/browse/LICENSE
+//	https://code.google.com/p/leveldb-go/source/browse/AUTHORS
+//  https://code.google.com/p/leveldb-go/source/browse/CONTRIBUTORS
+
+// Package journal reads and writes sequences of journals. Each journal is a stream
+// of bytes that completes before the next journal starts.
+//
+// When reading, call Next to obtain an io.Reader for the next journal. Next will
+// return io.EOF when there are no more journals. It is valid to call Next
+// without reading the current journal to exhaustion.
+//
+// When writing, call Next to obtain an io.Writer for the next journal. Calling
+// Next finishes the current journal. Call Close to finish the final journal.
+//
+// Optionally, call Flush to finish the current journal and flush the underlying
+// writer without starting a new journal. To start a new journal after flushing,
+// call Next.
+//
+// Neither Readers or Writers are safe to use concurrently.
+//
+// Example code:
+//	func read(r io.Reader) ([]string, error) {
+//		var ss []string
+//		journals := journal.NewReader(r, nil, true, true)
+//		for {
+//			j, err := journals.Next()
+//			if err == io.EOF {
+//				break
+//			}
+//			if err != nil {
+//				return nil, err
+//			}
+//			s, err := ioutil.ReadAll(j)
+//			if err != nil {
+//				return nil, err
+//			}
+//			ss = append(ss, string(s))
+//		}
+//		return ss, nil
+//	}
+//
+//	func write(w io.Writer, ss []string) error {
+//		journals := journal.NewWriter(w)
+//		for _, s := range ss {
+//			j, err := journals.Next()
+//			if err != nil {
+//				return err
+//			}
+//			if _, err := j.Write([]byte(s)), err != nil {
+//				return err
+//			}
+//		}
+//		return journals.Close()
+//	}
+//
+// The wire format is that the stream is divided into 32KiB blocks, and each
+// block contains a number of tightly packed chunks. Chunks cannot cross block
+// boundaries. The last block may be shorter than 32 KiB. Any unused bytes in a
+// block must be zero.
+//
+// A journal maps to one or more chunks. Each chunk has a 7 byte header (a 4
+// byte checksum, a 2 byte little-endian uint16 length, and a 1 byte chunk type)
+// followed by a payload. The checksum is over the chunk type and the payload.
+//
+// There are four chunk types: whether the chunk is the full journal, or the
+// first, middle or last chunk of a multi-chunk journal. A multi-chunk journal
+// has one first chunk, zero or more middle chunks, and one last chunk.
+//
+// The wire format allows for limited recovery in the face of data corruption:
+// on a format error (such as a checksum mismatch), the reader moves to the
+// next block and looks for the next full or first chunk.
+package journal
+
+import (
+	"encoding/binary"
+	"fmt"
+	"io"
+
+	"github.com/syndtr/goleveldb/leveldb/errors"
+	"github.com/syndtr/goleveldb/leveldb/storage"
+	"github.com/syndtr/goleveldb/leveldb/util"
+)
+
+// These constants are part of the wire format and should not be changed.
+const (
+	fullChunkType   = 1
+	firstChunkType  = 2
+	middleChunkType = 3
+	lastChunkType   = 4
+)
+
+const (
+	blockSize  = 32 * 1024
+	headerSize = 7
+)
+
+type flusher interface {
+	Flush() error
+}
+
+// ErrCorrupted is the error type that generated by corrupted block or chunk.
+type ErrCorrupted struct {
+	Size   int
+	Reason string
+}
+
+func (e *ErrCorrupted) Error() string {
+	return fmt.Sprintf("leveldb/journal: block/chunk corrupted: %s (%d bytes)", e.Reason, e.Size)
+}
+
+// Dropper is the interface that wrap simple Drop method. The Drop
+// method will be called when the journal reader dropping a block or chunk.
+type Dropper interface {
+	Drop(err error)
+}
+
+// Reader reads journals from an underlying io.Reader.
+type Reader struct {
+	// r is the underlying reader.
+	r io.Reader
+	// the dropper.
+	dropper Dropper
+	// strict flag.
+	strict bool
+	// checksum flag.
+	checksum bool
+	// seq is the sequence number of the current journal.
+	seq int
+	// buf[i:j] is the unread portion of the current chunk's payload.
+	// The low bound, i, excludes the chunk header.
+	i, j int
+	// n is the number of bytes of buf that are valid. Once reading has started,
+	// only the final block can have n < blockSize.
+	n int
+	// last is whether the current chunk is the last chunk of the journal.
+	last bool
+	// err is any accumulated error.
+	err error
+	// buf is the buffer.
+	buf [blockSize]byte
+}
+
+// NewReader returns a new reader. The dropper may be nil, and if
+// strict is true then corrupted or invalid chunk will halt the journal
+// reader entirely.
+func NewReader(r io.Reader, dropper Dropper, strict, checksum bool) *Reader {
+	return &Reader{
+		r:        r,
+		dropper:  dropper,
+		strict:   strict,
+		checksum: checksum,
+		last:     true,
+	}
+}
+
+var errSkip = errors.New("leveldb/journal: skipped")
+
+func (r *Reader) corrupt(n int, reason string, skip bool) error {
+	if r.dropper != nil {
+		r.dropper.Drop(&ErrCorrupted{n, reason})
+	}
+	if r.strict && !skip {
+		r.err = errors.NewErrCorrupted(storage.FileDesc{}, &ErrCorrupted{n, reason})
+		return r.err
+	}
+	return errSkip
+}
+
+// nextChunk sets r.buf[r.i:r.j] to hold the next chunk's payload, reading the
+// next block into the buffer if necessary.
+func (r *Reader) nextChunk(first bool) error {
+	for {
+		if r.j+headerSize <= r.n {
+			checksum := binary.LittleEndian.Uint32(r.buf[r.j+0 : r.j+4])
+			length := binary.LittleEndian.Uint16(r.buf[r.j+4 : r.j+6])
+			chunkType := r.buf[r.j+6]
+			unprocBlock := r.n - r.j
+			if checksum == 0 && length == 0 && chunkType == 0 {
+				// Drop entire block.
+				r.i = r.n
+				r.j = r.n
+				return r.corrupt(unprocBlock, "zero header", false)
+			}
+			if chunkType < fullChunkType || chunkType > lastChunkType {
+				// Drop entire block.
+				r.i = r.n
+				r.j = r.n
+				return r.corrupt(unprocBlock, fmt.Sprintf("invalid chunk type %#x", chunkType), false)
+			}
+			r.i = r.j + headerSize
+			r.j = r.j + headerSize + int(length)
+			if r.j > r.n {
+				// Drop entire block.
+				r.i = r.n
+				r.j = r.n
+				return r.corrupt(unprocBlock, "chunk length overflows block", false)
+			} else if r.checksum && checksum != util.NewCRC(r.buf[r.i-1:r.j]).Value() {
+				// Drop entire block.
+				r.i = r.n
+				r.j = r.n
+				return r.corrupt(unprocBlock, "checksum mismatch", false)
+			}
+			if first && chunkType != fullChunkType && chunkType != firstChunkType {
+				chunkLength := (r.j - r.i) + headerSize
+				r.i = r.j
+				// Report the error, but skip it.
+				return r.corrupt(chunkLength, "orphan chunk", true)
+			}
+			r.last = chunkType == fullChunkType || chunkType == lastChunkType
+			return nil
+		}
+
+		// The last block.
+		if r.n < blockSize && r.n > 0 {
+			if !first {
+				return r.corrupt(0, "missing chunk part", false)
+			}
+			r.err = io.EOF
+			return r.err
+		}
+
+		// Read block.
+		n, err := io.ReadFull(r.r, r.buf[:])
+		if err != nil && err != io.EOF && err != io.ErrUnexpectedEOF {
+			return err
+		}
+		if n == 0 {
+			if !first {
+				return r.corrupt(0, "missing chunk part", false)
+			}
+			r.err = io.EOF
+			return r.err
+		}
+		r.i, r.j, r.n = 0, 0, n
+	}
+}
+
+// Next returns a reader for the next journal. It returns io.EOF if there are no
+// more journals. The reader returned becomes stale after the next Next call,
+// and should no longer be used. If strict is false, the reader will returns
+// io.ErrUnexpectedEOF error when found corrupted journal.
+func (r *Reader) Next() (io.Reader, error) {
+	r.seq++
+	if r.err != nil {
+		return nil, r.err
+	}
+	r.i = r.j
+	for {
+		if err := r.nextChunk(true); err == nil {
+			break
+		} else if err != errSkip {
+			return nil, err
+		}
+	}
+	return &singleReader{r, r.seq, nil}, nil
+}
+
+// Reset resets the journal reader, allows reuse of the journal reader. Reset returns
+// last accumulated error.
+func (r *Reader) Reset(reader io.Reader, dropper Dropper, strict, checksum bool) error {
+	r.seq++
+	err := r.err
+	r.r = reader
+	r.dropper = dropper
+	r.strict = strict
+	r.checksum = checksum
+	r.i = 0
+	r.j = 0
+	r.n = 0
+	r.last = true
+	r.err = nil
+	return err
+}
+
+type singleReader struct {
+	r   *Reader
+	seq int
+	err error
+}
+
+func (x *singleReader) Read(p []byte) (int, error) {
+	r := x.r
+	if r.seq != x.seq {
+		return 0, errors.New("leveldb/journal: stale reader")
+	}
+	if x.err != nil {
+		return 0, x.err
+	}
+	if r.err != nil {
+		return 0, r.err
+	}
+	for r.i == r.j {
+		if r.last {
+			return 0, io.EOF
+		}
+		x.err = r.nextChunk(false)
+		if x.err != nil {
+			if x.err == errSkip {
+				x.err = io.ErrUnexpectedEOF
+			}
+			return 0, x.err
+		}
+	}
+	n := copy(p, r.buf[r.i:r.j])
+	r.i += n
+	return n, nil
+}
+
+func (x *singleReader) ReadByte() (byte, error) {
+	r := x.r
+	if r.seq != x.seq {
+		return 0, errors.New("leveldb/journal: stale reader")
+	}
+	if x.err != nil {
+		return 0, x.err
+	}
+	if r.err != nil {
+		return 0, r.err
+	}
+	for r.i == r.j {
+		if r.last {
+			return 0, io.EOF
+		}
+		x.err = r.nextChunk(false)
+		if x.err != nil {
+			if x.err == errSkip {
+				x.err = io.ErrUnexpectedEOF
+			}
+			return 0, x.err
+		}
+	}
+	c := r.buf[r.i]
+	r.i++
+	return c, nil
+}
+
+// Writer writes journals to an underlying io.Writer.
+type Writer struct {
+	// w is the underlying writer.
+	w io.Writer
+	// seq is the sequence number of the current journal.
+	seq int
+	// f is w as a flusher.
+	f flusher
+	// buf[i:j] is the bytes that will become the current chunk.
+	// The low bound, i, includes the chunk header.
+	i, j int
+	// buf[:written] has already been written to w.
+	// written is zero unless Flush has been called.
+	written int
+	// first is whether the current chunk is the first chunk of the journal.
+	first bool
+	// pending is whether a chunk is buffered but not yet written.
+	pending bool
+	// err is any accumulated error.
+	err error
+	// buf is the buffer.
+	buf [blockSize]byte
+}
+
+// NewWriter returns a new Writer.
+func NewWriter(w io.Writer) *Writer {
+	f, _ := w.(flusher)
+	return &Writer{
+		w: w,
+		f: f,
+	}
+}
+
+// fillHeader fills in the header for the pending chunk.
+func (w *Writer) fillHeader(last bool) {
+	if w.i+headerSize > w.j || w.j > blockSize {
+		panic("leveldb/journal: bad writer state")
+	}
+	if last {
+		if w.first {
+			w.buf[w.i+6] = fullChunkType
+		} else {
+			w.buf[w.i+6] = lastChunkType
+		}
+	} else {
+		if w.first {
+			w.buf[w.i+6] = firstChunkType
+		} else {
+			w.buf[w.i+6] = middleChunkType
+		}
+	}
+	binary.LittleEndian.PutUint32(w.buf[w.i+0:w.i+4], util.NewCRC(w.buf[w.i+6:w.j]).Value())
+	binary.LittleEndian.PutUint16(w.buf[w.i+4:w.i+6], uint16(w.j-w.i-headerSize))
+}
+
+// writeBlock writes the buffered block to the underlying writer, and reserves
+// space for the next chunk's header.
+func (w *Writer) writeBlock() {
+	_, w.err = w.w.Write(w.buf[w.written:])
+	w.i = 0
+	w.j = headerSize
+	w.written = 0
+}
+
+// writePending finishes the current journal and writes the buffer to the
+// underlying writer.
+func (w *Writer) writePending() {
+	if w.err != nil {
+		return
+	}
+	if w.pending {
+		w.fillHeader(true)
+		w.pending = false
+	}
+	_, w.err = w.w.Write(w.buf[w.written:w.j])
+	w.written = w.j
+}
+
+// Close finishes the current journal and closes the writer.
+func (w *Writer) Close() error {
+	w.seq++
+	w.writePending()
+	if w.err != nil {
+		return w.err
+	}
+	w.err = errors.New("leveldb/journal: closed Writer")
+	return nil
+}
+
+// Flush finishes the current journal, writes to the underlying writer, and
+// flushes it if that writer implements interface{ Flush() error }.
+func (w *Writer) Flush() error {
+	w.seq++
+	w.writePending()
+	if w.err != nil {
+		return w.err
+	}
+	if w.f != nil {
+		w.err = w.f.Flush()
+		return w.err
+	}
+	return nil
+}
+
+// Reset resets the journal writer, allows reuse of the journal writer. Reset
+// will also closes the journal writer if not already.
+func (w *Writer) Reset(writer io.Writer) (err error) {
+	w.seq++
+	if w.err == nil {
+		w.writePending()
+		err = w.err
+	}
+	w.w = writer
+	w.f, _ = writer.(flusher)
+	w.i = 0
+	w.j = 0
+	w.written = 0
+	w.first = false
+	w.pending = false
+	w.err = nil
+	return
+}
+
+// Next returns a writer for the next journal. The writer returned becomes stale
+// after the next Close, Flush or Next call, and should no longer be used.
+func (w *Writer) Next() (io.Writer, error) {
+	w.seq++
+	if w.err != nil {
+		return nil, w.err
+	}
+	if w.pending {
+		w.fillHeader(true)
+	}
+	w.i = w.j
+	w.j = w.j + headerSize
+	// Check if there is room in the block for the header.
+	if w.j > blockSize {
+		// Fill in the rest of the block with zeroes.
+		for k := w.i; k < blockSize; k++ {
+			w.buf[k] = 0
+		}
+		w.writeBlock()
+		if w.err != nil {
+			return nil, w.err
+		}
+	}
+	w.first = true
+	w.pending = true
+	return singleWriter{w, w.seq}, nil
+}
+
+type singleWriter struct {
+	w   *Writer
+	seq int
+}
+
+func (x singleWriter) Write(p []byte) (int, error) {
+	w := x.w
+	if w.seq != x.seq {
+		return 0, errors.New("leveldb/journal: stale writer")
+	}
+	if w.err != nil {
+		return 0, w.err
+	}
+	n0 := len(p)
+	for len(p) > 0 {
+		// Write a block, if it is full.
+		if w.j == blockSize {
+			w.fillHeader(false)
+			w.writeBlock()
+			if w.err != nil {
+				return 0, w.err
+			}
+			w.first = false
+		}
+		// Copy bytes into the buffer.
+		n := copy(w.buf[w.j:], p)
+		w.j += n
+		p = p[n:]
+	}
+	return n0, nil
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/key.go b/vendor/github.com/syndtr/goleveldb/leveldb/key.go
new file mode 100644
index 000000000..ad8f51ec8
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/key.go
@@ -0,0 +1,143 @@
+// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package leveldb
+
+import (
+	"encoding/binary"
+	"fmt"
+
+	"github.com/syndtr/goleveldb/leveldb/errors"
+	"github.com/syndtr/goleveldb/leveldb/storage"
+)
+
+// ErrInternalKeyCorrupted records internal key corruption.
+type ErrInternalKeyCorrupted struct {
+	Ikey   []byte
+	Reason string
+}
+
+func (e *ErrInternalKeyCorrupted) Error() string {
+	return fmt.Sprintf("leveldb: internal key %q corrupted: %s", e.Ikey, e.Reason)
+}
+
+func newErrInternalKeyCorrupted(ikey []byte, reason string) error {
+	return errors.NewErrCorrupted(storage.FileDesc{}, &ErrInternalKeyCorrupted{append([]byte{}, ikey...), reason})
+}
+
+type keyType uint
+
+func (kt keyType) String() string {
+	switch kt {
+	case keyTypeDel:
+		return "d"
+	case keyTypeVal:
+		return "v"
+	}
+	return fmt.Sprintf("<invalid:%#x>", uint(kt))
+}
+
+// Value types encoded as the last component of internal keys.
+// Don't modify; this value are saved to disk.
+const (
+	keyTypeDel = keyType(0)
+	keyTypeVal = keyType(1)
+)
+
+// keyTypeSeek defines the keyType that should be passed when constructing an
+// internal key for seeking to a particular sequence number (since we
+// sort sequence numbers in decreasing order and the value type is
+// embedded as the low 8 bits in the sequence number in internal keys,
+// we need to use the highest-numbered ValueType, not the lowest).
+const keyTypeSeek = keyTypeVal
+
+const (
+	// Maximum value possible for sequence number; the 8-bits are
+	// used by value type, so its can packed together in single
+	// 64-bit integer.
+	keyMaxSeq = (uint64(1) << 56) - 1
+	// Maximum value possible for packed sequence number and type.
+	keyMaxNum = (keyMaxSeq << 8) | uint64(keyTypeSeek)
+)
+
+// Maximum number encoded in bytes.
+var keyMaxNumBytes = make([]byte, 8)
+
+func init() {
+	binary.LittleEndian.PutUint64(keyMaxNumBytes, keyMaxNum)
+}
+
+type internalKey []byte
+
+func makeInternalKey(dst, ukey []byte, seq uint64, kt keyType) internalKey {
+	if seq > keyMaxSeq {
+		panic("leveldb: invalid sequence number")
+	} else if kt > keyTypeVal {
+		panic("leveldb: invalid type")
+	}
+
+	dst = ensureBuffer(dst, len(ukey)+8)
+	copy(dst, ukey)
+	binary.LittleEndian.PutUint64(dst[len(ukey):], (seq<<8)|uint64(kt))
+	return internalKey(dst)
+}
+
+func parseInternalKey(ik []byte) (ukey []byte, seq uint64, kt keyType, err error) {
+	if len(ik) < 8 {
+		return nil, 0, 0, newErrInternalKeyCorrupted(ik, "invalid length")
+	}
+	num := binary.LittleEndian.Uint64(ik[len(ik)-8:])
+	seq, kt = uint64(num>>8), keyType(num&0xff)
+	if kt > keyTypeVal {
+		return nil, 0, 0, newErrInternalKeyCorrupted(ik, "invalid type")
+	}
+	ukey = ik[:len(ik)-8]
+	return
+}
+
+func validInternalKey(ik []byte) bool {
+	_, _, _, err := parseInternalKey(ik)
+	return err == nil
+}
+
+func (ik internalKey) assert() {
+	if ik == nil {
+		panic("leveldb: nil internalKey")
+	}
+	if len(ik) < 8 {
+		panic(fmt.Sprintf("leveldb: internal key %q, len=%d: invalid length", []byte(ik), len(ik)))
+	}
+}
+
+func (ik internalKey) ukey() []byte {
+	ik.assert()
+	return ik[:len(ik)-8]
+}
+
+func (ik internalKey) num() uint64 {
+	ik.assert()
+	return binary.LittleEndian.Uint64(ik[len(ik)-8:])
+}
+
+func (ik internalKey) parseNum() (seq uint64, kt keyType) {
+	num := ik.num()
+	seq, kt = uint64(num>>8), keyType(num&0xff)
+	if kt > keyTypeVal {
+		panic(fmt.Sprintf("leveldb: internal key %q, len=%d: invalid type %#x", []byte(ik), len(ik), kt))
+	}
+	return
+}
+
+func (ik internalKey) String() string {
+	if ik == nil {
+		return "<nil>"
+	}
+
+	if ukey, seq, kt, err := parseInternalKey(ik); err == nil {
+		return fmt.Sprintf("%s,%s%d", shorten(string(ukey)), kt, seq)
+	}
+	return fmt.Sprintf("<invalid:%#x>", []byte(ik))
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/memdb/memdb.go b/vendor/github.com/syndtr/goleveldb/leveldb/memdb/memdb.go
new file mode 100644
index 000000000..824e47f5f
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/memdb/memdb.go
@@ -0,0 +1,479 @@
+// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Package memdb provides in-memory key/value database implementation.
+package memdb
+
+import (
+	"math/rand"
+	"sync"
+
+	"github.com/syndtr/goleveldb/leveldb/comparer"
+	"github.com/syndtr/goleveldb/leveldb/errors"
+	"github.com/syndtr/goleveldb/leveldb/iterator"
+	"github.com/syndtr/goleveldb/leveldb/util"
+)
+
+// Common errors.
+var (
+	ErrNotFound     = errors.ErrNotFound
+	ErrIterReleased = errors.New("leveldb/memdb: iterator released")
+)
+
+const tMaxHeight = 12
+
+type dbIter struct {
+	util.BasicReleaser
+	p          *DB
+	slice      *util.Range
+	node       int
+	forward    bool
+	key, value []byte
+	err        error
+}
+
+func (i *dbIter) fill(checkStart, checkLimit bool) bool {
+	if i.node != 0 {
+		n := i.p.nodeData[i.node]
+		m := n + i.p.nodeData[i.node+nKey]
+		i.key = i.p.kvData[n:m]
+		if i.slice != nil {
+			switch {
+			case checkLimit && i.slice.Limit != nil && i.p.cmp.Compare(i.key, i.slice.Limit) >= 0:
+				fallthrough
+			case checkStart && i.slice.Start != nil && i.p.cmp.Compare(i.key, i.slice.Start) < 0:
+				i.node = 0
+				goto bail
+			}
+		}
+		i.value = i.p.kvData[m : m+i.p.nodeData[i.node+nVal]]
+		return true
+	}
+bail:
+	i.key = nil
+	i.value = nil
+	return false
+}
+
+func (i *dbIter) Valid() bool {
+	return i.node != 0
+}
+
+func (i *dbIter) First() bool {
+	if i.Released() {
+		i.err = ErrIterReleased
+		return false
+	}
+
+	i.forward = true
+	i.p.mu.RLock()
+	defer i.p.mu.RUnlock()
+	if i.slice != nil && i.slice.Start != nil {
+		i.node, _ = i.p.findGE(i.slice.Start, false)
+	} else {
+		i.node = i.p.nodeData[nNext]
+	}
+	return i.fill(false, true)
+}
+
+func (i *dbIter) Last() bool {
+	if i.Released() {
+		i.err = ErrIterReleased
+		return false
+	}
+
+	i.forward = false
+	i.p.mu.RLock()
+	defer i.p.mu.RUnlock()
+	if i.slice != nil && i.slice.Limit != nil {
+		i.node = i.p.findLT(i.slice.Limit)
+	} else {
+		i.node = i.p.findLast()
+	}
+	return i.fill(true, false)
+}
+
+func (i *dbIter) Seek(key []byte) bool {
+	if i.Released() {
+		i.err = ErrIterReleased
+		return false
+	}
+
+	i.forward = true
+	i.p.mu.RLock()
+	defer i.p.mu.RUnlock()
+	if i.slice != nil && i.slice.Start != nil && i.p.cmp.Compare(key, i.slice.Start) < 0 {
+		key = i.slice.Start
+	}
+	i.node, _ = i.p.findGE(key, false)
+	return i.fill(false, true)
+}
+
+func (i *dbIter) Next() bool {
+	if i.Released() {
+		i.err = ErrIterReleased
+		return false
+	}
+
+	if i.node == 0 {
+		if !i.forward {
+			return i.First()
+		}
+		return false
+	}
+	i.forward = true
+	i.p.mu.RLock()
+	defer i.p.mu.RUnlock()
+	i.node = i.p.nodeData[i.node+nNext]
+	return i.fill(false, true)
+}
+
+func (i *dbIter) Prev() bool {
+	if i.Released() {
+		i.err = ErrIterReleased
+		return false
+	}
+
+	if i.node == 0 {
+		if i.forward {
+			return i.Last()
+		}
+		return false
+	}
+	i.forward = false
+	i.p.mu.RLock()
+	defer i.p.mu.RUnlock()
+	i.node = i.p.findLT(i.key)
+	return i.fill(true, false)
+}
+
+func (i *dbIter) Key() []byte {
+	return i.key
+}
+
+func (i *dbIter) Value() []byte {
+	return i.value
+}
+
+func (i *dbIter) Error() error { return i.err }
+
+func (i *dbIter) Release() {
+	if !i.Released() {
+		i.p = nil
+		i.node = 0
+		i.key = nil
+		i.value = nil
+		i.BasicReleaser.Release()
+	}
+}
+
+const (
+	nKV = iota
+	nKey
+	nVal
+	nHeight
+	nNext
+)
+
+// DB is an in-memory key/value database.
+type DB struct {
+	cmp comparer.BasicComparer
+	rnd *rand.Rand
+
+	mu     sync.RWMutex
+	kvData []byte
+	// Node data:
+	// [0]         : KV offset
+	// [1]         : Key length
+	// [2]         : Value length
+	// [3]         : Height
+	// [3..height] : Next nodes
+	nodeData  []int
+	prevNode  [tMaxHeight]int
+	maxHeight int
+	n         int
+	kvSize    int
+}
+
+func (p *DB) randHeight() (h int) {
+	const branching = 4
+	h = 1
+	for h < tMaxHeight && p.rnd.Int()%branching == 0 {
+		h++
+	}
+	return
+}
+
+// Must hold RW-lock if prev == true, as it use shared prevNode slice.
+func (p *DB) findGE(key []byte, prev bool) (int, bool) {
+	node := 0
+	h := p.maxHeight - 1
+	for {
+		next := p.nodeData[node+nNext+h]
+		cmp := 1
+		if next != 0 {
+			o := p.nodeData[next]
+			cmp = p.cmp.Compare(p.kvData[o:o+p.nodeData[next+nKey]], key)
+		}
+		if cmp < 0 {
+			// Keep searching in this list
+			node = next
+		} else {
+			if prev {
+				p.prevNode[h] = node
+			} else if cmp == 0 {
+				return next, true
+			}
+			if h == 0 {
+				return next, cmp == 0
+			}
+			h--
+		}
+	}
+}
+
+func (p *DB) findLT(key []byte) int {
+	node := 0
+	h := p.maxHeight - 1
+	for {
+		next := p.nodeData[node+nNext+h]
+		o := p.nodeData[next]
+		if next == 0 || p.cmp.Compare(p.kvData[o:o+p.nodeData[next+nKey]], key) >= 0 {
+			if h == 0 {
+				break
+			}
+			h--
+		} else {
+			node = next
+		}
+	}
+	return node
+}
+
+func (p *DB) findLast() int {
+	node := 0
+	h := p.maxHeight - 1
+	for {
+		next := p.nodeData[node+nNext+h]
+		if next == 0 {
+			if h == 0 {
+				break
+			}
+			h--
+		} else {
+			node = next
+		}
+	}
+	return node
+}
+
+// Put sets the value for the given key. It overwrites any previous value
+// for that key; a DB is not a multi-map.
+//
+// It is safe to modify the contents of the arguments after Put returns.
+func (p *DB) Put(key []byte, value []byte) error {
+	p.mu.Lock()
+	defer p.mu.Unlock()
+
+	if node, exact := p.findGE(key, true); exact {
+		kvOffset := len(p.kvData)
+		p.kvData = append(p.kvData, key...)
+		p.kvData = append(p.kvData, value...)
+		p.nodeData[node] = kvOffset
+		m := p.nodeData[node+nVal]
+		p.nodeData[node+nVal] = len(value)
+		p.kvSize += len(value) - m
+		return nil
+	}
+
+	h := p.randHeight()
+	if h > p.maxHeight {
+		for i := p.maxHeight; i < h; i++ {
+			p.prevNode[i] = 0
+		}
+		p.maxHeight = h
+	}
+
+	kvOffset := len(p.kvData)
+	p.kvData = append(p.kvData, key...)
+	p.kvData = append(p.kvData, value...)
+	// Node
+	node := len(p.nodeData)
+	p.nodeData = append(p.nodeData, kvOffset, len(key), len(value), h)
+	for i, n := range p.prevNode[:h] {
+		m := n + nNext + i
+		p.nodeData = append(p.nodeData, p.nodeData[m])
+		p.nodeData[m] = node
+	}
+
+	p.kvSize += len(key) + len(value)
+	p.n++
+	return nil
+}
+
+// Delete deletes the value for the given key. It returns ErrNotFound if
+// the DB does not contain the key.
+//
+// It is safe to modify the contents of the arguments after Delete returns.
+func (p *DB) Delete(key []byte) error {
+	p.mu.Lock()
+	defer p.mu.Unlock()
+
+	node, exact := p.findGE(key, true)
+	if !exact {
+		return ErrNotFound
+	}
+
+	h := p.nodeData[node+nHeight]
+	for i, n := range p.prevNode[:h] {
+		m := n + nNext + i
+		p.nodeData[m] = p.nodeData[p.nodeData[m]+nNext+i]
+	}
+
+	p.kvSize -= p.nodeData[node+nKey] + p.nodeData[node+nVal]
+	p.n--
+	return nil
+}
+
+// Contains returns true if the given key are in the DB.
+//
+// It is safe to modify the contents of the arguments after Contains returns.
+func (p *DB) Contains(key []byte) bool {
+	p.mu.RLock()
+	_, exact := p.findGE(key, false)
+	p.mu.RUnlock()
+	return exact
+}
+
+// Get gets the value for the given key. It returns error.ErrNotFound if the
+// DB does not contain the key.
+//
+// The caller should not modify the contents of the returned slice, but
+// it is safe to modify the contents of the argument after Get returns.
+func (p *DB) Get(key []byte) (value []byte, err error) {
+	p.mu.RLock()
+	if node, exact := p.findGE(key, false); exact {
+		o := p.nodeData[node] + p.nodeData[node+nKey]
+		value = p.kvData[o : o+p.nodeData[node+nVal]]
+	} else {
+		err = ErrNotFound
+	}
+	p.mu.RUnlock()
+	return
+}
+
+// Find finds key/value pair whose key is greater than or equal to the
+// given key. It returns ErrNotFound if the table doesn't contain
+// such pair.
+//
+// The caller should not modify the contents of the returned slice, but
+// it is safe to modify the contents of the argument after Find returns.
+func (p *DB) Find(key []byte) (rkey, value []byte, err error) {
+	p.mu.RLock()
+	if node, _ := p.findGE(key, false); node != 0 {
+		n := p.nodeData[node]
+		m := n + p.nodeData[node+nKey]
+		rkey = p.kvData[n:m]
+		value = p.kvData[m : m+p.nodeData[node+nVal]]
+	} else {
+		err = ErrNotFound
+	}
+	p.mu.RUnlock()
+	return
+}
+
+// NewIterator returns an iterator of the DB.
+// The returned iterator is not safe for concurrent use, but it is safe to use
+// multiple iterators concurrently, with each in a dedicated goroutine.
+// It is also safe to use an iterator concurrently with modifying its
+// underlying DB. However, the resultant key/value pairs are not guaranteed
+// to be a consistent snapshot of the DB at a particular point in time.
+//
+// Slice allows slicing the iterator to only contains keys in the given
+// range. A nil Range.Start is treated as a key before all keys in the
+// DB. And a nil Range.Limit is treated as a key after all keys in
+// the DB.
+//
+// WARNING: Any slice returned by interator (e.g. slice returned by calling
+// Iterator.Key() or Iterator.Key() methods), its content should not be modified
+// unless noted otherwise.
+//
+// The iterator must be released after use, by calling Release method.
+//
+// Also read Iterator documentation of the leveldb/iterator package.
+func (p *DB) NewIterator(slice *util.Range) iterator.Iterator {
+	return &dbIter{p: p, slice: slice}
+}
+
+// Capacity returns keys/values buffer capacity.
+func (p *DB) Capacity() int {
+	p.mu.RLock()
+	defer p.mu.RUnlock()
+	return cap(p.kvData)
+}
+
+// Size returns sum of keys and values length. Note that deleted
+// key/value will not be accounted for, but it will still consume
+// the buffer, since the buffer is append only.
+func (p *DB) Size() int {
+	p.mu.RLock()
+	defer p.mu.RUnlock()
+	return p.kvSize
+}
+
+// Free returns keys/values free buffer before need to grow.
+func (p *DB) Free() int {
+	p.mu.RLock()
+	defer p.mu.RUnlock()
+	return cap(p.kvData) - len(p.kvData)
+}
+
+// Len returns the number of entries in the DB.
+func (p *DB) Len() int {
+	p.mu.RLock()
+	defer p.mu.RUnlock()
+	return p.n
+}
+
+// Reset resets the DB to initial empty state. Allows reuse the buffer.
+func (p *DB) Reset() {
+	p.mu.Lock()
+	p.rnd = rand.New(rand.NewSource(0xdeadbeef))
+	p.maxHeight = 1
+	p.n = 0
+	p.kvSize = 0
+	p.kvData = p.kvData[:0]
+	p.nodeData = p.nodeData[:nNext+tMaxHeight]
+	p.nodeData[nKV] = 0
+	p.nodeData[nKey] = 0
+	p.nodeData[nVal] = 0
+	p.nodeData[nHeight] = tMaxHeight
+	for n := 0; n < tMaxHeight; n++ {
+		p.nodeData[nNext+n] = 0
+		p.prevNode[n] = 0
+	}
+	p.mu.Unlock()
+}
+
+// New creates a new initialized in-memory key/value DB. The capacity
+// is the initial key/value buffer capacity. The capacity is advisory,
+// not enforced.
+//
+// This DB is append-only, deleting an entry would remove entry node but not
+// reclaim KV buffer.
+//
+// The returned DB instance is safe for concurrent use.
+func New(cmp comparer.BasicComparer, capacity int) *DB {
+	p := &DB{
+		cmp:       cmp,
+		rnd:       rand.New(rand.NewSource(0xdeadbeef)),
+		maxHeight: 1,
+		kvData:    make([]byte, 0, capacity),
+		nodeData:  make([]int, 4+tMaxHeight),
+	}
+	p.nodeData[nHeight] = tMaxHeight
+	return p
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/opt/options.go b/vendor/github.com/syndtr/goleveldb/leveldb/opt/options.go
new file mode 100644
index 000000000..528b16423
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/opt/options.go
@@ -0,0 +1,697 @@
+// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Package opt provides sets of options used by LevelDB.
+package opt
+
+import (
+	"math"
+
+	"github.com/syndtr/goleveldb/leveldb/cache"
+	"github.com/syndtr/goleveldb/leveldb/comparer"
+	"github.com/syndtr/goleveldb/leveldb/filter"
+)
+
+const (
+	KiB = 1024
+	MiB = KiB * 1024
+	GiB = MiB * 1024
+)
+
+var (
+	DefaultBlockCacher                   = LRUCacher
+	DefaultBlockCacheCapacity            = 8 * MiB
+	DefaultBlockRestartInterval          = 16
+	DefaultBlockSize                     = 4 * KiB
+	DefaultCompactionExpandLimitFactor   = 25
+	DefaultCompactionGPOverlapsFactor    = 10
+	DefaultCompactionL0Trigger           = 4
+	DefaultCompactionSourceLimitFactor   = 1
+	DefaultCompactionTableSize           = 2 * MiB
+	DefaultCompactionTableSizeMultiplier = 1.0
+	DefaultCompactionTotalSize           = 10 * MiB
+	DefaultCompactionTotalSizeMultiplier = 10.0
+	DefaultCompressionType               = SnappyCompression
+	DefaultIteratorSamplingRate          = 1 * MiB
+	DefaultOpenFilesCacher               = LRUCacher
+	DefaultOpenFilesCacheCapacity        = 500
+	DefaultWriteBuffer                   = 4 * MiB
+	DefaultWriteL0PauseTrigger           = 12
+	DefaultWriteL0SlowdownTrigger        = 8
+)
+
+// Cacher is a caching algorithm.
+type Cacher interface {
+	New(capacity int) cache.Cacher
+}
+
+type CacherFunc struct {
+	NewFunc func(capacity int) cache.Cacher
+}
+
+func (f *CacherFunc) New(capacity int) cache.Cacher {
+	if f.NewFunc != nil {
+		return f.NewFunc(capacity)
+	}
+	return nil
+}
+
+func noCacher(int) cache.Cacher { return nil }
+
+var (
+	// LRUCacher is the LRU-cache algorithm.
+	LRUCacher = &CacherFunc{cache.NewLRU}
+
+	// NoCacher is the value to disable caching algorithm.
+	NoCacher = &CacherFunc{}
+)
+
+// Compression is the 'sorted table' block compression algorithm to use.
+type Compression uint
+
+func (c Compression) String() string {
+	switch c {
+	case DefaultCompression:
+		return "default"
+	case NoCompression:
+		return "none"
+	case SnappyCompression:
+		return "snappy"
+	}
+	return "invalid"
+}
+
+const (
+	DefaultCompression Compression = iota
+	NoCompression
+	SnappyCompression
+	nCompression
+)
+
+// Strict is the DB 'strict level'.
+type Strict uint
+
+const (
+	// If present then a corrupted or invalid chunk or block in manifest
+	// journal will cause an error instead of being dropped.
+	// This will prevent database with corrupted manifest to be opened.
+	StrictManifest Strict = 1 << iota
+
+	// If present then journal chunk checksum will be verified.
+	StrictJournalChecksum
+
+	// If present then a corrupted or invalid chunk or block in journal
+	// will cause an error instead of being dropped.
+	// This will prevent database with corrupted journal to be opened.
+	StrictJournal
+
+	// If present then 'sorted table' block checksum will be verified.
+	// This has effect on both 'read operation' and compaction.
+	StrictBlockChecksum
+
+	// If present then a corrupted 'sorted table' will fails compaction.
+	// The database will enter read-only mode.
+	StrictCompaction
+
+	// If present then a corrupted 'sorted table' will halts 'read operation'.
+	StrictReader
+
+	// If present then leveldb.Recover will drop corrupted 'sorted table'.
+	StrictRecovery
+
+	// This only applicable for ReadOptions, if present then this ReadOptions
+	// 'strict level' will override global ones.
+	StrictOverride
+
+	// StrictAll enables all strict flags.
+	StrictAll = StrictManifest | StrictJournalChecksum | StrictJournal | StrictBlockChecksum | StrictCompaction | StrictReader | StrictRecovery
+
+	// DefaultStrict is the default strict flags. Specify any strict flags
+	// will override default strict flags as whole (i.e. not OR'ed).
+	DefaultStrict = StrictJournalChecksum | StrictBlockChecksum | StrictCompaction | StrictReader
+
+	// NoStrict disables all strict flags. Override default strict flags.
+	NoStrict = ^StrictAll
+)
+
+// Options holds the optional parameters for the DB at large.
+type Options struct {
+	// AltFilters defines one or more 'alternative filters'.
+	// 'alternative filters' will be used during reads if a filter block
+	// does not match with the 'effective filter'.
+	//
+	// The default value is nil
+	AltFilters []filter.Filter
+
+	// BlockCacher provides cache algorithm for LevelDB 'sorted table' block caching.
+	// Specify NoCacher to disable caching algorithm.
+	//
+	// The default value is LRUCacher.
+	BlockCacher Cacher
+
+	// BlockCacheCapacity defines the capacity of the 'sorted table' block caching.
+	// Use -1 for zero, this has same effect as specifying NoCacher to BlockCacher.
+	//
+	// The default value is 8MiB.
+	BlockCacheCapacity int
+
+	// BlockCacheEvictRemoved allows enable forced-eviction on cached block belonging
+	// to removed 'sorted table'.
+	//
+	// The default if false.
+	BlockCacheEvictRemoved bool
+
+	// BlockRestartInterval is the number of keys between restart points for
+	// delta encoding of keys.
+	//
+	// The default value is 16.
+	BlockRestartInterval int
+
+	// BlockSize is the minimum uncompressed size in bytes of each 'sorted table'
+	// block.
+	//
+	// The default value is 4KiB.
+	BlockSize int
+
+	// CompactionExpandLimitFactor limits compaction size after expanded.
+	// This will be multiplied by table size limit at compaction target level.
+	//
+	// The default value is 25.
+	CompactionExpandLimitFactor int
+
+	// CompactionGPOverlapsFactor limits overlaps in grandparent (Level + 2) that a
+	// single 'sorted table' generates.
+	// This will be multiplied by table size limit at grandparent level.
+	//
+	// The default value is 10.
+	CompactionGPOverlapsFactor int
+
+	// CompactionL0Trigger defines number of 'sorted table' at level-0 that will
+	// trigger compaction.
+	//
+	// The default value is 4.
+	CompactionL0Trigger int
+
+	// CompactionSourceLimitFactor limits compaction source size. This doesn't apply to
+	// level-0.
+	// This will be multiplied by table size limit at compaction target level.
+	//
+	// The default value is 1.
+	CompactionSourceLimitFactor int
+
+	// CompactionTableSize limits size of 'sorted table' that compaction generates.
+	// The limits for each level will be calculated as:
+	//   CompactionTableSize * (CompactionTableSizeMultiplier ^ Level)
+	// The multiplier for each level can also fine-tuned using CompactionTableSizeMultiplierPerLevel.
+	//
+	// The default value is 2MiB.
+	CompactionTableSize int
+
+	// CompactionTableSizeMultiplier defines multiplier for CompactionTableSize.
+	//
+	// The default value is 1.
+	CompactionTableSizeMultiplier float64
+
+	// CompactionTableSizeMultiplierPerLevel defines per-level multiplier for
+	// CompactionTableSize.
+	// Use zero to skip a level.
+	//
+	// The default value is nil.
+	CompactionTableSizeMultiplierPerLevel []float64
+
+	// CompactionTotalSize limits total size of 'sorted table' for each level.
+	// The limits for each level will be calculated as:
+	//   CompactionTotalSize * (CompactionTotalSizeMultiplier ^ Level)
+	// The multiplier for each level can also fine-tuned using
+	// CompactionTotalSizeMultiplierPerLevel.
+	//
+	// The default value is 10MiB.
+	CompactionTotalSize int
+
+	// CompactionTotalSizeMultiplier defines multiplier for CompactionTotalSize.
+	//
+	// The default value is 10.
+	CompactionTotalSizeMultiplier float64
+
+	// CompactionTotalSizeMultiplierPerLevel defines per-level multiplier for
+	// CompactionTotalSize.
+	// Use zero to skip a level.
+	//
+	// The default value is nil.
+	CompactionTotalSizeMultiplierPerLevel []float64
+
+	// Comparer defines a total ordering over the space of []byte keys: a 'less
+	// than' relationship. The same comparison algorithm must be used for reads
+	// and writes over the lifetime of the DB.
+	//
+	// The default value uses the same ordering as bytes.Compare.
+	Comparer comparer.Comparer
+
+	// Compression defines the 'sorted table' block compression to use.
+	//
+	// The default value (DefaultCompression) uses snappy compression.
+	Compression Compression
+
+	// DisableBufferPool allows disable use of util.BufferPool functionality.
+	//
+	// The default value is false.
+	DisableBufferPool bool
+
+	// DisableBlockCache allows disable use of cache.Cache functionality on
+	// 'sorted table' block.
+	//
+	// The default value is false.
+	DisableBlockCache bool
+
+	// DisableCompactionBackoff allows disable compaction retry backoff.
+	//
+	// The default value is false.
+	DisableCompactionBackoff bool
+
+	// DisableLargeBatchTransaction allows disabling switch-to-transaction mode
+	// on large batch write. If enable batch writes large than WriteBuffer will
+	// use transaction.
+	//
+	// The default is false.
+	DisableLargeBatchTransaction bool
+
+	// ErrorIfExist defines whether an error should returned if the DB already
+	// exist.
+	//
+	// The default value is false.
+	ErrorIfExist bool
+
+	// ErrorIfMissing defines whether an error should returned if the DB is
+	// missing. If false then the database will be created if missing, otherwise
+	// an error will be returned.
+	//
+	// The default value is false.
+	ErrorIfMissing bool
+
+	// Filter defines an 'effective filter' to use. An 'effective filter'
+	// if defined will be used to generate per-table filter block.
+	// The filter name will be stored on disk.
+	// During reads LevelDB will try to find matching filter from
+	// 'effective filter' and 'alternative filters'.
+	//
+	// Filter can be changed after a DB has been created. It is recommended
+	// to put old filter to the 'alternative filters' to mitigate lack of
+	// filter during transition period.
+	//
+	// A filter is used to reduce disk reads when looking for a specific key.
+	//
+	// The default value is nil.
+	Filter filter.Filter
+
+	// IteratorSamplingRate defines approximate gap (in bytes) between read
+	// sampling of an iterator. The samples will be used to determine when
+	// compaction should be triggered.
+	//
+	// The default is 1MiB.
+	IteratorSamplingRate int
+
+	// NoSync allows completely disable fsync.
+	//
+	// The default is false.
+	NoSync bool
+
+	// NoWriteMerge allows disabling write merge.
+	//
+	// The default is false.
+	NoWriteMerge bool
+
+	// OpenFilesCacher provides cache algorithm for open files caching.
+	// Specify NoCacher to disable caching algorithm.
+	//
+	// The default value is LRUCacher.
+	OpenFilesCacher Cacher
+
+	// OpenFilesCacheCapacity defines the capacity of the open files caching.
+	// Use -1 for zero, this has same effect as specifying NoCacher to OpenFilesCacher.
+	//
+	// The default value is 500.
+	OpenFilesCacheCapacity int
+
+	// If true then opens DB in read-only mode.
+	//
+	// The default value is false.
+	ReadOnly bool
+
+	// Strict defines the DB strict level.
+	Strict Strict
+
+	// WriteBuffer defines maximum size of a 'memdb' before flushed to
+	// 'sorted table'. 'memdb' is an in-memory DB backed by an on-disk
+	// unsorted journal.
+	//
+	// LevelDB may held up to two 'memdb' at the same time.
+	//
+	// The default value is 4MiB.
+	WriteBuffer int
+
+	// WriteL0StopTrigger defines number of 'sorted table' at level-0 that will
+	// pause write.
+	//
+	// The default value is 12.
+	WriteL0PauseTrigger int
+
+	// WriteL0SlowdownTrigger defines number of 'sorted table' at level-0 that
+	// will trigger write slowdown.
+	//
+	// The default value is 8.
+	WriteL0SlowdownTrigger int
+}
+
+func (o *Options) GetAltFilters() []filter.Filter {
+	if o == nil {
+		return nil
+	}
+	return o.AltFilters
+}
+
+func (o *Options) GetBlockCacher() Cacher {
+	if o == nil || o.BlockCacher == nil {
+		return DefaultBlockCacher
+	} else if o.BlockCacher == NoCacher {
+		return nil
+	}
+	return o.BlockCacher
+}
+
+func (o *Options) GetBlockCacheCapacity() int {
+	if o == nil || o.BlockCacheCapacity == 0 {
+		return DefaultBlockCacheCapacity
+	} else if o.BlockCacheCapacity < 0 {
+		return 0
+	}
+	return o.BlockCacheCapacity
+}
+
+func (o *Options) GetBlockCacheEvictRemoved() bool {
+	if o == nil {
+		return false
+	}
+	return o.BlockCacheEvictRemoved
+}
+
+func (o *Options) GetBlockRestartInterval() int {
+	if o == nil || o.BlockRestartInterval <= 0 {
+		return DefaultBlockRestartInterval
+	}
+	return o.BlockRestartInterval
+}
+
+func (o *Options) GetBlockSize() int {
+	if o == nil || o.BlockSize <= 0 {
+		return DefaultBlockSize
+	}
+	return o.BlockSize
+}
+
+func (o *Options) GetCompactionExpandLimit(level int) int {
+	factor := DefaultCompactionExpandLimitFactor
+	if o != nil && o.CompactionExpandLimitFactor > 0 {
+		factor = o.CompactionExpandLimitFactor
+	}
+	return o.GetCompactionTableSize(level+1) * factor
+}
+
+func (o *Options) GetCompactionGPOverlaps(level int) int {
+	factor := DefaultCompactionGPOverlapsFactor
+	if o != nil && o.CompactionGPOverlapsFactor > 0 {
+		factor = o.CompactionGPOverlapsFactor
+	}
+	return o.GetCompactionTableSize(level+2) * factor
+}
+
+func (o *Options) GetCompactionL0Trigger() int {
+	if o == nil || o.CompactionL0Trigger == 0 {
+		return DefaultCompactionL0Trigger
+	}
+	return o.CompactionL0Trigger
+}
+
+func (o *Options) GetCompactionSourceLimit(level int) int {
+	factor := DefaultCompactionSourceLimitFactor
+	if o != nil && o.CompactionSourceLimitFactor > 0 {
+		factor = o.CompactionSourceLimitFactor
+	}
+	return o.GetCompactionTableSize(level+1) * factor
+}
+
+func (o *Options) GetCompactionTableSize(level int) int {
+	var (
+		base = DefaultCompactionTableSize
+		mult float64
+	)
+	if o != nil {
+		if o.CompactionTableSize > 0 {
+			base = o.CompactionTableSize
+		}
+		if level < len(o.CompactionTableSizeMultiplierPerLevel) && o.CompactionTableSizeMultiplierPerLevel[level] > 0 {
+			mult = o.CompactionTableSizeMultiplierPerLevel[level]
+		} else if o.CompactionTableSizeMultiplier > 0 {
+			mult = math.Pow(o.CompactionTableSizeMultiplier, float64(level))
+		}
+	}
+	if mult == 0 {
+		mult = math.Pow(DefaultCompactionTableSizeMultiplier, float64(level))
+	}
+	return int(float64(base) * mult)
+}
+
+func (o *Options) GetCompactionTotalSize(level int) int64 {
+	var (
+		base = DefaultCompactionTotalSize
+		mult float64
+	)
+	if o != nil {
+		if o.CompactionTotalSize > 0 {
+			base = o.CompactionTotalSize
+		}
+		if level < len(o.CompactionTotalSizeMultiplierPerLevel) && o.CompactionTotalSizeMultiplierPerLevel[level] > 0 {
+			mult = o.CompactionTotalSizeMultiplierPerLevel[level]
+		} else if o.CompactionTotalSizeMultiplier > 0 {
+			mult = math.Pow(o.CompactionTotalSizeMultiplier, float64(level))
+		}
+	}
+	if mult == 0 {
+		mult = math.Pow(DefaultCompactionTotalSizeMultiplier, float64(level))
+	}
+	return int64(float64(base) * mult)
+}
+
+func (o *Options) GetComparer() comparer.Comparer {
+	if o == nil || o.Comparer == nil {
+		return comparer.DefaultComparer
+	}
+	return o.Comparer
+}
+
+func (o *Options) GetCompression() Compression {
+	if o == nil || o.Compression <= DefaultCompression || o.Compression >= nCompression {
+		return DefaultCompressionType
+	}
+	return o.Compression
+}
+
+func (o *Options) GetDisableBufferPool() bool {
+	if o == nil {
+		return false
+	}
+	return o.DisableBufferPool
+}
+
+func (o *Options) GetDisableBlockCache() bool {
+	if o == nil {
+		return false
+	}
+	return o.DisableBlockCache
+}
+
+func (o *Options) GetDisableCompactionBackoff() bool {
+	if o == nil {
+		return false
+	}
+	return o.DisableCompactionBackoff
+}
+
+func (o *Options) GetDisableLargeBatchTransaction() bool {
+	if o == nil {
+		return false
+	}
+	return o.DisableLargeBatchTransaction
+}
+
+func (o *Options) GetErrorIfExist() bool {
+	if o == nil {
+		return false
+	}
+	return o.ErrorIfExist
+}
+
+func (o *Options) GetErrorIfMissing() bool {
+	if o == nil {
+		return false
+	}
+	return o.ErrorIfMissing
+}
+
+func (o *Options) GetFilter() filter.Filter {
+	if o == nil {
+		return nil
+	}
+	return o.Filter
+}
+
+func (o *Options) GetIteratorSamplingRate() int {
+	if o == nil || o.IteratorSamplingRate <= 0 {
+		return DefaultIteratorSamplingRate
+	}
+	return o.IteratorSamplingRate
+}
+
+func (o *Options) GetNoSync() bool {
+	if o == nil {
+		return false
+	}
+	return o.NoSync
+}
+
+func (o *Options) GetNoWriteMerge() bool {
+	if o == nil {
+		return false
+	}
+	return o.NoWriteMerge
+}
+
+func (o *Options) GetOpenFilesCacher() Cacher {
+	if o == nil || o.OpenFilesCacher == nil {
+		return DefaultOpenFilesCacher
+	}
+	if o.OpenFilesCacher == NoCacher {
+		return nil
+	}
+	return o.OpenFilesCacher
+}
+
+func (o *Options) GetOpenFilesCacheCapacity() int {
+	if o == nil || o.OpenFilesCacheCapacity == 0 {
+		return DefaultOpenFilesCacheCapacity
+	} else if o.OpenFilesCacheCapacity < 0 {
+		return 0
+	}
+	return o.OpenFilesCacheCapacity
+}
+
+func (o *Options) GetReadOnly() bool {
+	if o == nil {
+		return false
+	}
+	return o.ReadOnly
+}
+
+func (o *Options) GetStrict(strict Strict) bool {
+	if o == nil || o.Strict == 0 {
+		return DefaultStrict&strict != 0
+	}
+	return o.Strict&strict != 0
+}
+
+func (o *Options) GetWriteBuffer() int {
+	if o == nil || o.WriteBuffer <= 0 {
+		return DefaultWriteBuffer
+	}
+	return o.WriteBuffer
+}
+
+func (o *Options) GetWriteL0PauseTrigger() int {
+	if o == nil || o.WriteL0PauseTrigger == 0 {
+		return DefaultWriteL0PauseTrigger
+	}
+	return o.WriteL0PauseTrigger
+}
+
+func (o *Options) GetWriteL0SlowdownTrigger() int {
+	if o == nil || o.WriteL0SlowdownTrigger == 0 {
+		return DefaultWriteL0SlowdownTrigger
+	}
+	return o.WriteL0SlowdownTrigger
+}
+
+// ReadOptions holds the optional parameters for 'read operation'. The
+// 'read operation' includes Get, Find and NewIterator.
+type ReadOptions struct {
+	// DontFillCache defines whether block reads for this 'read operation'
+	// should be cached. If false then the block will be cached. This does
+	// not affects already cached block.
+	//
+	// The default value is false.
+	DontFillCache bool
+
+	// Strict will be OR'ed with global DB 'strict level' unless StrictOverride
+	// is present. Currently only StrictReader that has effect here.
+	Strict Strict
+}
+
+func (ro *ReadOptions) GetDontFillCache() bool {
+	if ro == nil {
+		return false
+	}
+	return ro.DontFillCache
+}
+
+func (ro *ReadOptions) GetStrict(strict Strict) bool {
+	if ro == nil {
+		return false
+	}
+	return ro.Strict&strict != 0
+}
+
+// WriteOptions holds the optional parameters for 'write operation'. The
+// 'write operation' includes Write, Put and Delete.
+type WriteOptions struct {
+	// NoWriteMerge allows disabling write merge.
+	//
+	// The default is false.
+	NoWriteMerge bool
+
+	// Sync is whether to sync underlying writes from the OS buffer cache
+	// through to actual disk, if applicable. Setting Sync can result in
+	// slower writes.
+	//
+	// If false, and the machine crashes, then some recent writes may be lost.
+	// Note that if it is just the process that crashes (and the machine does
+	// not) then no writes will be lost.
+	//
+	// In other words, Sync being false has the same semantics as a write
+	// system call. Sync being true means write followed by fsync.
+	//
+	// The default value is false.
+	Sync bool
+}
+
+func (wo *WriteOptions) GetNoWriteMerge() bool {
+	if wo == nil {
+		return false
+	}
+	return wo.NoWriteMerge
+}
+
+func (wo *WriteOptions) GetSync() bool {
+	if wo == nil {
+		return false
+	}
+	return wo.Sync
+}
+
+func GetStrict(o *Options, ro *ReadOptions, strict Strict) bool {
+	if ro.GetStrict(StrictOverride) {
+		return ro.GetStrict(strict)
+	} else {
+		return o.GetStrict(strict) || ro.GetStrict(strict)
+	}
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/options.go b/vendor/github.com/syndtr/goleveldb/leveldb/options.go
new file mode 100644
index 000000000..b072b1ac4
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/options.go
@@ -0,0 +1,107 @@
+// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package leveldb
+
+import (
+	"github.com/syndtr/goleveldb/leveldb/filter"
+	"github.com/syndtr/goleveldb/leveldb/opt"
+)
+
+func dupOptions(o *opt.Options) *opt.Options {
+	newo := &opt.Options{}
+	if o != nil {
+		*newo = *o
+	}
+	if newo.Strict == 0 {
+		newo.Strict = opt.DefaultStrict
+	}
+	return newo
+}
+
+func (s *session) setOptions(o *opt.Options) {
+	no := dupOptions(o)
+	// Alternative filters.
+	if filters := o.GetAltFilters(); len(filters) > 0 {
+		no.AltFilters = make([]filter.Filter, len(filters))
+		for i, filter := range filters {
+			no.AltFilters[i] = &iFilter{filter}
+		}
+	}
+	// Comparer.
+	s.icmp = &iComparer{o.GetComparer()}
+	no.Comparer = s.icmp
+	// Filter.
+	if filter := o.GetFilter(); filter != nil {
+		no.Filter = &iFilter{filter}
+	}
+
+	s.o = &cachedOptions{Options: no}
+	s.o.cache()
+}
+
+const optCachedLevel = 7
+
+type cachedOptions struct {
+	*opt.Options
+
+	compactionExpandLimit []int
+	compactionGPOverlaps  []int
+	compactionSourceLimit []int
+	compactionTableSize   []int
+	compactionTotalSize   []int64
+}
+
+func (co *cachedOptions) cache() {
+	co.compactionExpandLimit = make([]int, optCachedLevel)
+	co.compactionGPOverlaps = make([]int, optCachedLevel)
+	co.compactionSourceLimit = make([]int, optCachedLevel)
+	co.compactionTableSize = make([]int, optCachedLevel)
+	co.compactionTotalSize = make([]int64, optCachedLevel)
+
+	for level := 0; level < optCachedLevel; level++ {
+		co.compactionExpandLimit[level] = co.Options.GetCompactionExpandLimit(level)
+		co.compactionGPOverlaps[level] = co.Options.GetCompactionGPOverlaps(level)
+		co.compactionSourceLimit[level] = co.Options.GetCompactionSourceLimit(level)
+		co.compactionTableSize[level] = co.Options.GetCompactionTableSize(level)
+		co.compactionTotalSize[level] = co.Options.GetCompactionTotalSize(level)
+	}
+}
+
+func (co *cachedOptions) GetCompactionExpandLimit(level int) int {
+	if level < optCachedLevel {
+		return co.compactionExpandLimit[level]
+	}
+	return co.Options.GetCompactionExpandLimit(level)
+}
+
+func (co *cachedOptions) GetCompactionGPOverlaps(level int) int {
+	if level < optCachedLevel {
+		return co.compactionGPOverlaps[level]
+	}
+	return co.Options.GetCompactionGPOverlaps(level)
+}
+
+func (co *cachedOptions) GetCompactionSourceLimit(level int) int {
+	if level < optCachedLevel {
+		return co.compactionSourceLimit[level]
+	}
+	return co.Options.GetCompactionSourceLimit(level)
+}
+
+func (co *cachedOptions) GetCompactionTableSize(level int) int {
+	if level < optCachedLevel {
+		return co.compactionTableSize[level]
+	}
+	return co.Options.GetCompactionTableSize(level)
+}
+
+func (co *cachedOptions) GetCompactionTotalSize(level int) int64 {
+	if level < optCachedLevel {
+		return co.compactionTotalSize[level]
+	}
+	return co.Options.GetCompactionTotalSize(level)
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/session.go b/vendor/github.com/syndtr/goleveldb/leveldb/session.go
new file mode 100644
index 000000000..3f391f934
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/session.go
@@ -0,0 +1,210 @@
+// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package leveldb
+
+import (
+	"fmt"
+	"io"
+	"os"
+	"sync"
+
+	"github.com/syndtr/goleveldb/leveldb/errors"
+	"github.com/syndtr/goleveldb/leveldb/journal"
+	"github.com/syndtr/goleveldb/leveldb/opt"
+	"github.com/syndtr/goleveldb/leveldb/storage"
+)
+
+// ErrManifestCorrupted records manifest corruption. This error will be
+// wrapped with errors.ErrCorrupted.
+type ErrManifestCorrupted struct {
+	Field  string
+	Reason string
+}
+
+func (e *ErrManifestCorrupted) Error() string {
+	return fmt.Sprintf("leveldb: manifest corrupted (field '%s'): %s", e.Field, e.Reason)
+}
+
+func newErrManifestCorrupted(fd storage.FileDesc, field, reason string) error {
+	return errors.NewErrCorrupted(fd, &ErrManifestCorrupted{field, reason})
+}
+
+// session represent a persistent database session.
+type session struct {
+	// Need 64-bit alignment.
+	stNextFileNum    int64 // current unused file number
+	stJournalNum     int64 // current journal file number; need external synchronization
+	stPrevJournalNum int64 // prev journal file number; no longer used; for compatibility with older version of leveldb
+	stTempFileNum    int64
+	stSeqNum         uint64 // last mem compacted seq; need external synchronization
+
+	stor     *iStorage
+	storLock storage.Locker
+	o        *cachedOptions
+	icmp     *iComparer
+	tops     *tOps
+	fileRef  map[int64]int
+
+	manifest       *journal.Writer
+	manifestWriter storage.Writer
+	manifestFd     storage.FileDesc
+
+	stCompPtrs []internalKey // compaction pointers; need external synchronization
+	stVersion  *version      // current version
+	vmu        sync.Mutex
+}
+
+// Creates new initialized session instance.
+func newSession(stor storage.Storage, o *opt.Options) (s *session, err error) {
+	if stor == nil {
+		return nil, os.ErrInvalid
+	}
+	storLock, err := stor.Lock()
+	if err != nil {
+		return
+	}
+	s = &session{
+		stor:     newIStorage(stor),
+		storLock: storLock,
+		fileRef:  make(map[int64]int),
+	}
+	s.setOptions(o)
+	s.tops = newTableOps(s)
+	s.setVersion(newVersion(s))
+	s.log("log@legend F·NumFile S·FileSize N·Entry C·BadEntry B·BadBlock Ke·KeyError D·DroppedEntry L·Level Q·SeqNum T·TimeElapsed")
+	return
+}
+
+// Close session.
+func (s *session) close() {
+	s.tops.close()
+	if s.manifest != nil {
+		s.manifest.Close()
+	}
+	if s.manifestWriter != nil {
+		s.manifestWriter.Close()
+	}
+	s.manifest = nil
+	s.manifestWriter = nil
+	s.setVersion(&version{s: s, closing: true})
+}
+
+// Release session lock.
+func (s *session) release() {
+	s.storLock.Unlock()
+}
+
+// Create a new database session; need external synchronization.
+func (s *session) create() error {
+	// create manifest
+	return s.newManifest(nil, nil)
+}
+
+// Recover a database session; need external synchronization.
+func (s *session) recover() (err error) {
+	defer func() {
+		if os.IsNotExist(err) {
+			// Don't return os.ErrNotExist if the underlying storage contains
+			// other files that belong to LevelDB. So the DB won't get trashed.
+			if fds, _ := s.stor.List(storage.TypeAll); len(fds) > 0 {
+				err = &errors.ErrCorrupted{Fd: storage.FileDesc{Type: storage.TypeManifest}, Err: &errors.ErrMissingFiles{}}
+			}
+		}
+	}()
+
+	fd, err := s.stor.GetMeta()
+	if err != nil {
+		return
+	}
+
+	reader, err := s.stor.Open(fd)
+	if err != nil {
+		return
+	}
+	defer reader.Close()
+
+	var (
+		// Options.
+		strict = s.o.GetStrict(opt.StrictManifest)
+
+		jr      = journal.NewReader(reader, dropper{s, fd}, strict, true)
+		rec     = &sessionRecord{}
+		staging = s.stVersion.newStaging()
+	)
+	for {
+		var r io.Reader
+		r, err = jr.Next()
+		if err != nil {
+			if err == io.EOF {
+				err = nil
+				break
+			}
+			return errors.SetFd(err, fd)
+		}
+
+		err = rec.decode(r)
+		if err == nil {
+			// save compact pointers
+			for _, r := range rec.compPtrs {
+				s.setCompPtr(r.level, internalKey(r.ikey))
+			}
+			// commit record to version staging
+			staging.commit(rec)
+		} else {
+			err = errors.SetFd(err, fd)
+			if strict || !errors.IsCorrupted(err) {
+				return
+			}
+			s.logf("manifest error: %v (skipped)", errors.SetFd(err, fd))
+		}
+		rec.resetCompPtrs()
+		rec.resetAddedTables()
+		rec.resetDeletedTables()
+	}
+
+	switch {
+	case !rec.has(recComparer):
+		return newErrManifestCorrupted(fd, "comparer", "missing")
+	case rec.comparer != s.icmp.uName():
+		return newErrManifestCorrupted(fd, "comparer", fmt.Sprintf("mismatch: want '%s', got '%s'", s.icmp.uName(), rec.comparer))
+	case !rec.has(recNextFileNum):
+		return newErrManifestCorrupted(fd, "next-file-num", "missing")
+	case !rec.has(recJournalNum):
+		return newErrManifestCorrupted(fd, "journal-file-num", "missing")
+	case !rec.has(recSeqNum):
+		return newErrManifestCorrupted(fd, "seq-num", "missing")
+	}
+
+	s.manifestFd = fd
+	s.setVersion(staging.finish())
+	s.setNextFileNum(rec.nextFileNum)
+	s.recordCommited(rec)
+	return nil
+}
+
+// Commit session; need external synchronization.
+func (s *session) commit(r *sessionRecord) (err error) {
+	v := s.version()
+	defer v.release()
+
+	// spawn new version based on current version
+	nv := v.spawn(r)
+
+	if s.manifest == nil {
+		// manifest journal writer not yet created, create one
+		err = s.newManifest(r, nv)
+	} else {
+		err = s.flushManifest(r)
+	}
+
+	// finally, apply new version if no error rise
+	if err == nil {
+		s.setVersion(nv)
+	}
+
+	return
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/session_compaction.go b/vendor/github.com/syndtr/goleveldb/leveldb/session_compaction.go
new file mode 100644
index 000000000..089cd00b2
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/session_compaction.go
@@ -0,0 +1,302 @@
+// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package leveldb
+
+import (
+	"sync/atomic"
+
+	"github.com/syndtr/goleveldb/leveldb/iterator"
+	"github.com/syndtr/goleveldb/leveldb/memdb"
+	"github.com/syndtr/goleveldb/leveldb/opt"
+)
+
+func (s *session) pickMemdbLevel(umin, umax []byte, maxLevel int) int {
+	v := s.version()
+	defer v.release()
+	return v.pickMemdbLevel(umin, umax, maxLevel)
+}
+
+func (s *session) flushMemdb(rec *sessionRecord, mdb *memdb.DB, maxLevel int) (int, error) {
+	// Create sorted table.
+	iter := mdb.NewIterator(nil)
+	defer iter.Release()
+	t, n, err := s.tops.createFrom(iter)
+	if err != nil {
+		return 0, err
+	}
+
+	// Pick level other than zero can cause compaction issue with large
+	// bulk insert and delete on strictly incrementing key-space. The
+	// problem is that the small deletion markers trapped at lower level,
+	// while key/value entries keep growing at higher level. Since the
+	// key-space is strictly incrementing it will not overlaps with
+	// higher level, thus maximum possible level is always picked, while
+	// overlapping deletion marker pushed into lower level.
+	// See: https://github.com/syndtr/goleveldb/issues/127.
+	flushLevel := s.pickMemdbLevel(t.imin.ukey(), t.imax.ukey(), maxLevel)
+	rec.addTableFile(flushLevel, t)
+
+	s.logf("memdb@flush created L%d@%d N·%d S·%s %q:%q", flushLevel, t.fd.Num, n, shortenb(int(t.size)), t.imin, t.imax)
+	return flushLevel, nil
+}
+
+// Pick a compaction based on current state; need external synchronization.
+func (s *session) pickCompaction() *compaction {
+	v := s.version()
+
+	var sourceLevel int
+	var t0 tFiles
+	if v.cScore >= 1 {
+		sourceLevel = v.cLevel
+		cptr := s.getCompPtr(sourceLevel)
+		tables := v.levels[sourceLevel]
+		for _, t := range tables {
+			if cptr == nil || s.icmp.Compare(t.imax, cptr) > 0 {
+				t0 = append(t0, t)
+				break
+			}
+		}
+		if len(t0) == 0 {
+			t0 = append(t0, tables[0])
+		}
+	} else {
+		if p := atomic.LoadPointer(&v.cSeek); p != nil {
+			ts := (*tSet)(p)
+			sourceLevel = ts.level
+			t0 = append(t0, ts.table)
+		} else {
+			v.release()
+			return nil
+		}
+	}
+
+	return newCompaction(s, v, sourceLevel, t0)
+}
+
+// Create compaction from given level and range; need external synchronization.
+func (s *session) getCompactionRange(sourceLevel int, umin, umax []byte, noLimit bool) *compaction {
+	v := s.version()
+
+	if sourceLevel >= len(v.levels) {
+		v.release()
+		return nil
+	}
+
+	t0 := v.levels[sourceLevel].getOverlaps(nil, s.icmp, umin, umax, sourceLevel == 0)
+	if len(t0) == 0 {
+		v.release()
+		return nil
+	}
+
+	// Avoid compacting too much in one shot in case the range is large.
+	// But we cannot do this for level-0 since level-0 files can overlap
+	// and we must not pick one file and drop another older file if the
+	// two files overlap.
+	if !noLimit && sourceLevel > 0 {
+		limit := int64(v.s.o.GetCompactionSourceLimit(sourceLevel))
+		total := int64(0)
+		for i, t := range t0 {
+			total += t.size
+			if total >= limit {
+				s.logf("table@compaction limiting F·%d -> F·%d", len(t0), i+1)
+				t0 = t0[:i+1]
+				break
+			}
+		}
+	}
+
+	return newCompaction(s, v, sourceLevel, t0)
+}
+
+func newCompaction(s *session, v *version, sourceLevel int, t0 tFiles) *compaction {
+	c := &compaction{
+		s:             s,
+		v:             v,
+		sourceLevel:   sourceLevel,
+		levels:        [2]tFiles{t0, nil},
+		maxGPOverlaps: int64(s.o.GetCompactionGPOverlaps(sourceLevel)),
+		tPtrs:         make([]int, len(v.levels)),
+	}
+	c.expand()
+	c.save()
+	return c
+}
+
+// compaction represent a compaction state.
+type compaction struct {
+	s *session
+	v *version
+
+	sourceLevel   int
+	levels        [2]tFiles
+	maxGPOverlaps int64
+
+	gp                tFiles
+	gpi               int
+	seenKey           bool
+	gpOverlappedBytes int64
+	imin, imax        internalKey
+	tPtrs             []int
+	released          bool
+
+	snapGPI               int
+	snapSeenKey           bool
+	snapGPOverlappedBytes int64
+	snapTPtrs             []int
+}
+
+func (c *compaction) save() {
+	c.snapGPI = c.gpi
+	c.snapSeenKey = c.seenKey
+	c.snapGPOverlappedBytes = c.gpOverlappedBytes
+	c.snapTPtrs = append(c.snapTPtrs[:0], c.tPtrs...)
+}
+
+func (c *compaction) restore() {
+	c.gpi = c.snapGPI
+	c.seenKey = c.snapSeenKey
+	c.gpOverlappedBytes = c.snapGPOverlappedBytes
+	c.tPtrs = append(c.tPtrs[:0], c.snapTPtrs...)
+}
+
+func (c *compaction) release() {
+	if !c.released {
+		c.released = true
+		c.v.release()
+	}
+}
+
+// Expand compacted tables; need external synchronization.
+func (c *compaction) expand() {
+	limit := int64(c.s.o.GetCompactionExpandLimit(c.sourceLevel))
+	vt0 := c.v.levels[c.sourceLevel]
+	vt1 := tFiles{}
+	if level := c.sourceLevel + 1; level < len(c.v.levels) {
+		vt1 = c.v.levels[level]
+	}
+
+	t0, t1 := c.levels[0], c.levels[1]
+	imin, imax := t0.getRange(c.s.icmp)
+	// We expand t0 here just incase ukey hop across tables.
+	t0 = vt0.getOverlaps(t0, c.s.icmp, imin.ukey(), imax.ukey(), c.sourceLevel == 0)
+	if len(t0) != len(c.levels[0]) {
+		imin, imax = t0.getRange(c.s.icmp)
+	}
+	t1 = vt1.getOverlaps(t1, c.s.icmp, imin.ukey(), imax.ukey(), false)
+	// Get entire range covered by compaction.
+	amin, amax := append(t0, t1...).getRange(c.s.icmp)
+
+	// See if we can grow the number of inputs in "sourceLevel" without
+	// changing the number of "sourceLevel+1" files we pick up.
+	if len(t1) > 0 {
+		exp0 := vt0.getOverlaps(nil, c.s.icmp, amin.ukey(), amax.ukey(), c.sourceLevel == 0)
+		if len(exp0) > len(t0) && t1.size()+exp0.size() < limit {
+			xmin, xmax := exp0.getRange(c.s.icmp)
+			exp1 := vt1.getOverlaps(nil, c.s.icmp, xmin.ukey(), xmax.ukey(), false)
+			if len(exp1) == len(t1) {
+				c.s.logf("table@compaction expanding L%d+L%d (F·%d S·%s)+(F·%d S·%s) -> (F·%d S·%s)+(F·%d S·%s)",
+					c.sourceLevel, c.sourceLevel+1, len(t0), shortenb(int(t0.size())), len(t1), shortenb(int(t1.size())),
+					len(exp0), shortenb(int(exp0.size())), len(exp1), shortenb(int(exp1.size())))
+				imin, imax = xmin, xmax
+				t0, t1 = exp0, exp1
+				amin, amax = append(t0, t1...).getRange(c.s.icmp)
+			}
+		}
+	}
+
+	// Compute the set of grandparent files that overlap this compaction
+	// (parent == sourceLevel+1; grandparent == sourceLevel+2)
+	if level := c.sourceLevel + 2; level < len(c.v.levels) {
+		c.gp = c.v.levels[level].getOverlaps(c.gp, c.s.icmp, amin.ukey(), amax.ukey(), false)
+	}
+
+	c.levels[0], c.levels[1] = t0, t1
+	c.imin, c.imax = imin, imax
+}
+
+// Check whether compaction is trivial.
+func (c *compaction) trivial() bool {
+	return len(c.levels[0]) == 1 && len(c.levels[1]) == 0 && c.gp.size() <= c.maxGPOverlaps
+}
+
+func (c *compaction) baseLevelForKey(ukey []byte) bool {
+	for level := c.sourceLevel + 2; level < len(c.v.levels); level++ {
+		tables := c.v.levels[level]
+		for c.tPtrs[level] < len(tables) {
+			t := tables[c.tPtrs[level]]
+			if c.s.icmp.uCompare(ukey, t.imax.ukey()) <= 0 {
+				// We've advanced far enough.
+				if c.s.icmp.uCompare(ukey, t.imin.ukey()) >= 0 {
+					// Key falls in this file's range, so definitely not base level.
+					return false
+				}
+				break
+			}
+			c.tPtrs[level]++
+		}
+	}
+	return true
+}
+
+func (c *compaction) shouldStopBefore(ikey internalKey) bool {
+	for ; c.gpi < len(c.gp); c.gpi++ {
+		gp := c.gp[c.gpi]
+		if c.s.icmp.Compare(ikey, gp.imax) <= 0 {
+			break
+		}
+		if c.seenKey {
+			c.gpOverlappedBytes += gp.size
+		}
+	}
+	c.seenKey = true
+
+	if c.gpOverlappedBytes > c.maxGPOverlaps {
+		// Too much overlap for current output; start new output.
+		c.gpOverlappedBytes = 0
+		return true
+	}
+	return false
+}
+
+// Creates an iterator.
+func (c *compaction) newIterator() iterator.Iterator {
+	// Creates iterator slice.
+	icap := len(c.levels)
+	if c.sourceLevel == 0 {
+		// Special case for level-0.
+		icap = len(c.levels[0]) + 1
+	}
+	its := make([]iterator.Iterator, 0, icap)
+
+	// Options.
+	ro := &opt.ReadOptions{
+		DontFillCache: true,
+		Strict:        opt.StrictOverride,
+	}
+	strict := c.s.o.GetStrict(opt.StrictCompaction)
+	if strict {
+		ro.Strict |= opt.StrictReader
+	}
+
+	for i, tables := range c.levels {
+		if len(tables) == 0 {
+			continue
+		}
+
+		// Level-0 is not sorted and may overlaps each other.
+		if c.sourceLevel+i == 0 {
+			for _, t := range tables {
+				its = append(its, c.s.tops.newIterator(t, nil, ro))
+			}
+		} else {
+			it := iterator.NewIndexedIterator(tables.newIndexIterator(c.s.tops, c.s.icmp, nil, ro), strict)
+			its = append(its, it)
+		}
+	}
+
+	return iterator.NewMergedIterator(its, c.s.icmp, strict)
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/session_record.go b/vendor/github.com/syndtr/goleveldb/leveldb/session_record.go
new file mode 100644
index 000000000..854e1aa6f
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/session_record.go
@@ -0,0 +1,323 @@
+// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package leveldb
+
+import (
+	"bufio"
+	"encoding/binary"
+	"io"
+	"strings"
+
+	"github.com/syndtr/goleveldb/leveldb/errors"
+	"github.com/syndtr/goleveldb/leveldb/storage"
+)
+
+type byteReader interface {
+	io.Reader
+	io.ByteReader
+}
+
+// These numbers are written to disk and should not be changed.
+const (
+	recComparer    = 1
+	recJournalNum  = 2
+	recNextFileNum = 3
+	recSeqNum      = 4
+	recCompPtr     = 5
+	recDelTable    = 6
+	recAddTable    = 7
+	// 8 was used for large value refs
+	recPrevJournalNum = 9
+)
+
+type cpRecord struct {
+	level int
+	ikey  internalKey
+}
+
+type atRecord struct {
+	level int
+	num   int64
+	size  int64
+	imin  internalKey
+	imax  internalKey
+}
+
+type dtRecord struct {
+	level int
+	num   int64
+}
+
+type sessionRecord struct {
+	hasRec         int
+	comparer       string
+	journalNum     int64
+	prevJournalNum int64
+	nextFileNum    int64
+	seqNum         uint64
+	compPtrs       []cpRecord
+	addedTables    []atRecord
+	deletedTables  []dtRecord
+
+	scratch [binary.MaxVarintLen64]byte
+	err     error
+}
+
+func (p *sessionRecord) has(rec int) bool {
+	return p.hasRec&(1<<uint(rec)) != 0
+}
+
+func (p *sessionRecord) setComparer(name string) {
+	p.hasRec |= 1 << recComparer
+	p.comparer = name
+}
+
+func (p *sessionRecord) setJournalNum(num int64) {
+	p.hasRec |= 1 << recJournalNum
+	p.journalNum = num
+}
+
+func (p *sessionRecord) setPrevJournalNum(num int64) {
+	p.hasRec |= 1 << recPrevJournalNum
+	p.prevJournalNum = num
+}
+
+func (p *sessionRecord) setNextFileNum(num int64) {
+	p.hasRec |= 1 << recNextFileNum
+	p.nextFileNum = num
+}
+
+func (p *sessionRecord) setSeqNum(num uint64) {
+	p.hasRec |= 1 << recSeqNum
+	p.seqNum = num
+}
+
+func (p *sessionRecord) addCompPtr(level int, ikey internalKey) {
+	p.hasRec |= 1 << recCompPtr
+	p.compPtrs = append(p.compPtrs, cpRecord{level, ikey})
+}
+
+func (p *sessionRecord) resetCompPtrs() {
+	p.hasRec &= ^(1 << recCompPtr)
+	p.compPtrs = p.compPtrs[:0]
+}
+
+func (p *sessionRecord) addTable(level int, num, size int64, imin, imax internalKey) {
+	p.hasRec |= 1 << recAddTable
+	p.addedTables = append(p.addedTables, atRecord{level, num, size, imin, imax})
+}
+
+func (p *sessionRecord) addTableFile(level int, t *tFile) {
+	p.addTable(level, t.fd.Num, t.size, t.imin, t.imax)
+}
+
+func (p *sessionRecord) resetAddedTables() {
+	p.hasRec &= ^(1 << recAddTable)
+	p.addedTables = p.addedTables[:0]
+}
+
+func (p *sessionRecord) delTable(level int, num int64) {
+	p.hasRec |= 1 << recDelTable
+	p.deletedTables = append(p.deletedTables, dtRecord{level, num})
+}
+
+func (p *sessionRecord) resetDeletedTables() {
+	p.hasRec &= ^(1 << recDelTable)
+	p.deletedTables = p.deletedTables[:0]
+}
+
+func (p *sessionRecord) putUvarint(w io.Writer, x uint64) {
+	if p.err != nil {
+		return
+	}
+	n := binary.PutUvarint(p.scratch[:], x)
+	_, p.err = w.Write(p.scratch[:n])
+}
+
+func (p *sessionRecord) putVarint(w io.Writer, x int64) {
+	if x < 0 {
+		panic("invalid negative value")
+	}
+	p.putUvarint(w, uint64(x))
+}
+
+func (p *sessionRecord) putBytes(w io.Writer, x []byte) {
+	if p.err != nil {
+		return
+	}
+	p.putUvarint(w, uint64(len(x)))
+	if p.err != nil {
+		return
+	}
+	_, p.err = w.Write(x)
+}
+
+func (p *sessionRecord) encode(w io.Writer) error {
+	p.err = nil
+	if p.has(recComparer) {
+		p.putUvarint(w, recComparer)
+		p.putBytes(w, []byte(p.comparer))
+	}
+	if p.has(recJournalNum) {
+		p.putUvarint(w, recJournalNum)
+		p.putVarint(w, p.journalNum)
+	}
+	if p.has(recNextFileNum) {
+		p.putUvarint(w, recNextFileNum)
+		p.putVarint(w, p.nextFileNum)
+	}
+	if p.has(recSeqNum) {
+		p.putUvarint(w, recSeqNum)
+		p.putUvarint(w, p.seqNum)
+	}
+	for _, r := range p.compPtrs {
+		p.putUvarint(w, recCompPtr)
+		p.putUvarint(w, uint64(r.level))
+		p.putBytes(w, r.ikey)
+	}
+	for _, r := range p.deletedTables {
+		p.putUvarint(w, recDelTable)
+		p.putUvarint(w, uint64(r.level))
+		p.putVarint(w, r.num)
+	}
+	for _, r := range p.addedTables {
+		p.putUvarint(w, recAddTable)
+		p.putUvarint(w, uint64(r.level))
+		p.putVarint(w, r.num)
+		p.putVarint(w, r.size)
+		p.putBytes(w, r.imin)
+		p.putBytes(w, r.imax)
+	}
+	return p.err
+}
+
+func (p *sessionRecord) readUvarintMayEOF(field string, r io.ByteReader, mayEOF bool) uint64 {
+	if p.err != nil {
+		return 0
+	}
+	x, err := binary.ReadUvarint(r)
+	if err != nil {
+		if err == io.ErrUnexpectedEOF || (mayEOF == false && err == io.EOF) {
+			p.err = errors.NewErrCorrupted(storage.FileDesc{}, &ErrManifestCorrupted{field, "short read"})
+		} else if strings.HasPrefix(err.Error(), "binary:") {
+			p.err = errors.NewErrCorrupted(storage.FileDesc{}, &ErrManifestCorrupted{field, err.Error()})
+		} else {
+			p.err = err
+		}
+		return 0
+	}
+	return x
+}
+
+func (p *sessionRecord) readUvarint(field string, r io.ByteReader) uint64 {
+	return p.readUvarintMayEOF(field, r, false)
+}
+
+func (p *sessionRecord) readVarint(field string, r io.ByteReader) int64 {
+	x := int64(p.readUvarintMayEOF(field, r, false))
+	if x < 0 {
+		p.err = errors.NewErrCorrupted(storage.FileDesc{}, &ErrManifestCorrupted{field, "invalid negative value"})
+	}
+	return x
+}
+
+func (p *sessionRecord) readBytes(field string, r byteReader) []byte {
+	if p.err != nil {
+		return nil
+	}
+	n := p.readUvarint(field, r)
+	if p.err != nil {
+		return nil
+	}
+	x := make([]byte, n)
+	_, p.err = io.ReadFull(r, x)
+	if p.err != nil {
+		if p.err == io.ErrUnexpectedEOF {
+			p.err = errors.NewErrCorrupted(storage.FileDesc{}, &ErrManifestCorrupted{field, "short read"})
+		}
+		return nil
+	}
+	return x
+}
+
+func (p *sessionRecord) readLevel(field string, r io.ByteReader) int {
+	if p.err != nil {
+		return 0
+	}
+	x := p.readUvarint(field, r)
+	if p.err != nil {
+		return 0
+	}
+	return int(x)
+}
+
+func (p *sessionRecord) decode(r io.Reader) error {
+	br, ok := r.(byteReader)
+	if !ok {
+		br = bufio.NewReader(r)
+	}
+	p.err = nil
+	for p.err == nil {
+		rec := p.readUvarintMayEOF("field-header", br, true)
+		if p.err != nil {
+			if p.err == io.EOF {
+				return nil
+			}
+			return p.err
+		}
+		switch rec {
+		case recComparer:
+			x := p.readBytes("comparer", br)
+			if p.err == nil {
+				p.setComparer(string(x))
+			}
+		case recJournalNum:
+			x := p.readVarint("journal-num", br)
+			if p.err == nil {
+				p.setJournalNum(x)
+			}
+		case recPrevJournalNum:
+			x := p.readVarint("prev-journal-num", br)
+			if p.err == nil {
+				p.setPrevJournalNum(x)
+			}
+		case recNextFileNum:
+			x := p.readVarint("next-file-num", br)
+			if p.err == nil {
+				p.setNextFileNum(x)
+			}
+		case recSeqNum:
+			x := p.readUvarint("seq-num", br)
+			if p.err == nil {
+				p.setSeqNum(x)
+			}
+		case recCompPtr:
+			level := p.readLevel("comp-ptr.level", br)
+			ikey := p.readBytes("comp-ptr.ikey", br)
+			if p.err == nil {
+				p.addCompPtr(level, internalKey(ikey))
+			}
+		case recAddTable:
+			level := p.readLevel("add-table.level", br)
+			num := p.readVarint("add-table.num", br)
+			size := p.readVarint("add-table.size", br)
+			imin := p.readBytes("add-table.imin", br)
+			imax := p.readBytes("add-table.imax", br)
+			if p.err == nil {
+				p.addTable(level, num, size, imin, imax)
+			}
+		case recDelTable:
+			level := p.readLevel("del-table.level", br)
+			num := p.readVarint("del-table.num", br)
+			if p.err == nil {
+				p.delTable(level, num)
+			}
+		}
+	}
+
+	return p.err
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/session_util.go b/vendor/github.com/syndtr/goleveldb/leveldb/session_util.go
new file mode 100644
index 000000000..40cb2cf95
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/session_util.go
@@ -0,0 +1,271 @@
+// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package leveldb
+
+import (
+	"fmt"
+	"sync/atomic"
+
+	"github.com/syndtr/goleveldb/leveldb/journal"
+	"github.com/syndtr/goleveldb/leveldb/storage"
+)
+
+// Logging.
+
+type dropper struct {
+	s  *session
+	fd storage.FileDesc
+}
+
+func (d dropper) Drop(err error) {
+	if e, ok := err.(*journal.ErrCorrupted); ok {
+		d.s.logf("journal@drop %s-%d S·%s %q", d.fd.Type, d.fd.Num, shortenb(e.Size), e.Reason)
+	} else {
+		d.s.logf("journal@drop %s-%d %q", d.fd.Type, d.fd.Num, err)
+	}
+}
+
+func (s *session) log(v ...interface{})                 { s.stor.Log(fmt.Sprint(v...)) }
+func (s *session) logf(format string, v ...interface{}) { s.stor.Log(fmt.Sprintf(format, v...)) }
+
+// File utils.
+
+func (s *session) newTemp() storage.FileDesc {
+	num := atomic.AddInt64(&s.stTempFileNum, 1) - 1
+	return storage.FileDesc{Type: storage.TypeTemp, Num: num}
+}
+
+func (s *session) addFileRef(fd storage.FileDesc, ref int) int {
+	ref += s.fileRef[fd.Num]
+	if ref > 0 {
+		s.fileRef[fd.Num] = ref
+	} else if ref == 0 {
+		delete(s.fileRef, fd.Num)
+	} else {
+		panic(fmt.Sprintf("negative ref: %v", fd))
+	}
+	return ref
+}
+
+// Session state.
+
+// Get current version. This will incr version ref, must call
+// version.release (exactly once) after use.
+func (s *session) version() *version {
+	s.vmu.Lock()
+	defer s.vmu.Unlock()
+	s.stVersion.incref()
+	return s.stVersion
+}
+
+func (s *session) tLen(level int) int {
+	s.vmu.Lock()
+	defer s.vmu.Unlock()
+	return s.stVersion.tLen(level)
+}
+
+// Set current version to v.
+func (s *session) setVersion(v *version) {
+	s.vmu.Lock()
+	defer s.vmu.Unlock()
+	// Hold by session. It is important to call this first before releasing
+	// current version, otherwise the still used files might get released.
+	v.incref()
+	if s.stVersion != nil {
+		// Release current version.
+		s.stVersion.releaseNB()
+	}
+	s.stVersion = v
+}
+
+// Get current unused file number.
+func (s *session) nextFileNum() int64 {
+	return atomic.LoadInt64(&s.stNextFileNum)
+}
+
+// Set current unused file number to num.
+func (s *session) setNextFileNum(num int64) {
+	atomic.StoreInt64(&s.stNextFileNum, num)
+}
+
+// Mark file number as used.
+func (s *session) markFileNum(num int64) {
+	nextFileNum := num + 1
+	for {
+		old, x := s.stNextFileNum, nextFileNum
+		if old > x {
+			x = old
+		}
+		if atomic.CompareAndSwapInt64(&s.stNextFileNum, old, x) {
+			break
+		}
+	}
+}
+
+// Allocate a file number.
+func (s *session) allocFileNum() int64 {
+	return atomic.AddInt64(&s.stNextFileNum, 1) - 1
+}
+
+// Reuse given file number.
+func (s *session) reuseFileNum(num int64) {
+	for {
+		old, x := s.stNextFileNum, num
+		if old != x+1 {
+			x = old
+		}
+		if atomic.CompareAndSwapInt64(&s.stNextFileNum, old, x) {
+			break
+		}
+	}
+}
+
+// Set compaction ptr at given level; need external synchronization.
+func (s *session) setCompPtr(level int, ik internalKey) {
+	if level >= len(s.stCompPtrs) {
+		newCompPtrs := make([]internalKey, level+1)
+		copy(newCompPtrs, s.stCompPtrs)
+		s.stCompPtrs = newCompPtrs
+	}
+	s.stCompPtrs[level] = append(internalKey{}, ik...)
+}
+
+// Get compaction ptr at given level; need external synchronization.
+func (s *session) getCompPtr(level int) internalKey {
+	if level >= len(s.stCompPtrs) {
+		return nil
+	}
+	return s.stCompPtrs[level]
+}
+
+// Manifest related utils.
+
+// Fill given session record obj with current states; need external
+// synchronization.
+func (s *session) fillRecord(r *sessionRecord, snapshot bool) {
+	r.setNextFileNum(s.nextFileNum())
+
+	if snapshot {
+		if !r.has(recJournalNum) {
+			r.setJournalNum(s.stJournalNum)
+		}
+
+		if !r.has(recSeqNum) {
+			r.setSeqNum(s.stSeqNum)
+		}
+
+		for level, ik := range s.stCompPtrs {
+			if ik != nil {
+				r.addCompPtr(level, ik)
+			}
+		}
+
+		r.setComparer(s.icmp.uName())
+	}
+}
+
+// Mark if record has been committed, this will update session state;
+// need external synchronization.
+func (s *session) recordCommited(rec *sessionRecord) {
+	if rec.has(recJournalNum) {
+		s.stJournalNum = rec.journalNum
+	}
+
+	if rec.has(recPrevJournalNum) {
+		s.stPrevJournalNum = rec.prevJournalNum
+	}
+
+	if rec.has(recSeqNum) {
+		s.stSeqNum = rec.seqNum
+	}
+
+	for _, r := range rec.compPtrs {
+		s.setCompPtr(r.level, internalKey(r.ikey))
+	}
+}
+
+// Create a new manifest file; need external synchronization.
+func (s *session) newManifest(rec *sessionRecord, v *version) (err error) {
+	fd := storage.FileDesc{Type: storage.TypeManifest, Num: s.allocFileNum()}
+	writer, err := s.stor.Create(fd)
+	if err != nil {
+		return
+	}
+	jw := journal.NewWriter(writer)
+
+	if v == nil {
+		v = s.version()
+		defer v.release()
+	}
+	if rec == nil {
+		rec = &sessionRecord{}
+	}
+	s.fillRecord(rec, true)
+	v.fillRecord(rec)
+
+	defer func() {
+		if err == nil {
+			s.recordCommited(rec)
+			if s.manifest != nil {
+				s.manifest.Close()
+			}
+			if s.manifestWriter != nil {
+				s.manifestWriter.Close()
+			}
+			if !s.manifestFd.Zero() {
+				s.stor.Remove(s.manifestFd)
+			}
+			s.manifestFd = fd
+			s.manifestWriter = writer
+			s.manifest = jw
+		} else {
+			writer.Close()
+			s.stor.Remove(fd)
+			s.reuseFileNum(fd.Num)
+		}
+	}()
+
+	w, err := jw.Next()
+	if err != nil {
+		return
+	}
+	err = rec.encode(w)
+	if err != nil {
+		return
+	}
+	err = jw.Flush()
+	if err != nil {
+		return
+	}
+	err = s.stor.SetMeta(fd)
+	return
+}
+
+// Flush record to disk.
+func (s *session) flushManifest(rec *sessionRecord) (err error) {
+	s.fillRecord(rec, false)
+	w, err := s.manifest.Next()
+	if err != nil {
+		return
+	}
+	err = rec.encode(w)
+	if err != nil {
+		return
+	}
+	err = s.manifest.Flush()
+	if err != nil {
+		return
+	}
+	if !s.o.GetNoSync() {
+		err = s.manifestWriter.Sync()
+		if err != nil {
+			return
+		}
+	}
+	s.recordCommited(rec)
+	return
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/storage.go b/vendor/github.com/syndtr/goleveldb/leveldb/storage.go
new file mode 100644
index 000000000..d45fb5dfe
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/storage.go
@@ -0,0 +1,63 @@
+package leveldb
+
+import (
+	"github.com/syndtr/goleveldb/leveldb/storage"
+	"sync/atomic"
+)
+
+type iStorage struct {
+	storage.Storage
+	read  uint64
+	write uint64
+}
+
+func (c *iStorage) Open(fd storage.FileDesc) (storage.Reader, error) {
+	r, err := c.Storage.Open(fd)
+	return &iStorageReader{r, c}, err
+}
+
+func (c *iStorage) Create(fd storage.FileDesc) (storage.Writer, error) {
+	w, err := c.Storage.Create(fd)
+	return &iStorageWriter{w, c}, err
+}
+
+func (c *iStorage) reads() uint64 {
+	return atomic.LoadUint64(&c.read)
+}
+
+func (c *iStorage) writes() uint64 {
+	return atomic.LoadUint64(&c.write)
+}
+
+// newIStorage returns the given storage wrapped by iStorage.
+func newIStorage(s storage.Storage) *iStorage {
+	return &iStorage{s, 0, 0}
+}
+
+type iStorageReader struct {
+	storage.Reader
+	c *iStorage
+}
+
+func (r *iStorageReader) Read(p []byte) (n int, err error) {
+	n, err = r.Reader.Read(p)
+	atomic.AddUint64(&r.c.read, uint64(n))
+	return n, err
+}
+
+func (r *iStorageReader) ReadAt(p []byte, off int64) (n int, err error) {
+	n, err = r.Reader.ReadAt(p, off)
+	atomic.AddUint64(&r.c.read, uint64(n))
+	return n, err
+}
+
+type iStorageWriter struct {
+	storage.Writer
+	c *iStorage
+}
+
+func (w *iStorageWriter) Write(p []byte) (n int, err error) {
+	n, err = w.Writer.Write(p)
+	atomic.AddUint64(&w.c.write, uint64(n))
+	return n, err
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage.go b/vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage.go
new file mode 100644
index 000000000..9ba71fd6d
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage.go
@@ -0,0 +1,671 @@
+// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reservefs.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package storage
+
+import (
+	"errors"
+	"fmt"
+	"io"
+	"io/ioutil"
+	"os"
+	"path/filepath"
+	"runtime"
+	"sort"
+	"strconv"
+	"strings"
+	"sync"
+	"time"
+)
+
+var (
+	errFileOpen = errors.New("leveldb/storage: file still open")
+	errReadOnly = errors.New("leveldb/storage: storage is read-only")
+)
+
+type fileLock interface {
+	release() error
+}
+
+type fileStorageLock struct {
+	fs *fileStorage
+}
+
+func (lock *fileStorageLock) Unlock() {
+	if lock.fs != nil {
+		lock.fs.mu.Lock()
+		defer lock.fs.mu.Unlock()
+		if lock.fs.slock == lock {
+			lock.fs.slock = nil
+		}
+	}
+}
+
+type int64Slice []int64
+
+func (p int64Slice) Len() int           { return len(p) }
+func (p int64Slice) Less(i, j int) bool { return p[i] < p[j] }
+func (p int64Slice) Swap(i, j int)      { p[i], p[j] = p[j], p[i] }
+
+func writeFileSynced(filename string, data []byte, perm os.FileMode) error {
+	f, err := os.OpenFile(filename, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, perm)
+	if err != nil {
+		return err
+	}
+	n, err := f.Write(data)
+	if err == nil && n < len(data) {
+		err = io.ErrShortWrite
+	}
+	if err1 := f.Sync(); err == nil {
+		err = err1
+	}
+	if err1 := f.Close(); err == nil {
+		err = err1
+	}
+	return err
+}
+
+const logSizeThreshold = 1024 * 1024 // 1 MiB
+
+// fileStorage is a file-system backed storage.
+type fileStorage struct {
+	path     string
+	readOnly bool
+
+	mu      sync.Mutex
+	flock   fileLock
+	slock   *fileStorageLock
+	logw    *os.File
+	logSize int64
+	buf     []byte
+	// Opened file counter; if open < 0 means closed.
+	open int
+	day  int
+}
+
+// OpenFile returns a new filesystem-backed storage implementation with the given
+// path. This also acquire a file lock, so any subsequent attempt to open the
+// same path will fail.
+//
+// The storage must be closed after use, by calling Close method.
+func OpenFile(path string, readOnly bool) (Storage, error) {
+	if fi, err := os.Stat(path); err == nil {
+		if !fi.IsDir() {
+			return nil, fmt.Errorf("leveldb/storage: open %s: not a directory", path)
+		}
+	} else if os.IsNotExist(err) && !readOnly {
+		if err := os.MkdirAll(path, 0755); err != nil {
+			return nil, err
+		}
+	} else {
+		return nil, err
+	}
+
+	flock, err := newFileLock(filepath.Join(path, "LOCK"), readOnly)
+	if err != nil {
+		return nil, err
+	}
+
+	defer func() {
+		if err != nil {
+			flock.release()
+		}
+	}()
+
+	var (
+		logw    *os.File
+		logSize int64
+	)
+	if !readOnly {
+		logw, err = os.OpenFile(filepath.Join(path, "LOG"), os.O_WRONLY|os.O_CREATE, 0644)
+		if err != nil {
+			return nil, err
+		}
+		logSize, err = logw.Seek(0, os.SEEK_END)
+		if err != nil {
+			logw.Close()
+			return nil, err
+		}
+	}
+
+	fs := &fileStorage{
+		path:     path,
+		readOnly: readOnly,
+		flock:    flock,
+		logw:     logw,
+		logSize:  logSize,
+	}
+	runtime.SetFinalizer(fs, (*fileStorage).Close)
+	return fs, nil
+}
+
+func (fs *fileStorage) Lock() (Locker, error) {
+	fs.mu.Lock()
+	defer fs.mu.Unlock()
+	if fs.open < 0 {
+		return nil, ErrClosed
+	}
+	if fs.readOnly {
+		return &fileStorageLock{}, nil
+	}
+	if fs.slock != nil {
+		return nil, ErrLocked
+	}
+	fs.slock = &fileStorageLock{fs: fs}
+	return fs.slock, nil
+}
+
+func itoa(buf []byte, i int, wid int) []byte {
+	u := uint(i)
+	if u == 0 && wid <= 1 {
+		return append(buf, '0')
+	}
+
+	// Assemble decimal in reverse order.
+	var b [32]byte
+	bp := len(b)
+	for ; u > 0 || wid > 0; u /= 10 {
+		bp--
+		wid--
+		b[bp] = byte(u%10) + '0'
+	}
+	return append(buf, b[bp:]...)
+}
+
+func (fs *fileStorage) printDay(t time.Time) {
+	if fs.day == t.Day() {
+		return
+	}
+	fs.day = t.Day()
+	fs.logw.Write([]byte("=============== " + t.Format("Jan 2, 2006 (MST)") + " ===============\n"))
+}
+
+func (fs *fileStorage) doLog(t time.Time, str string) {
+	if fs.logSize > logSizeThreshold {
+		// Rotate log file.
+		fs.logw.Close()
+		fs.logw = nil
+		fs.logSize = 0
+		rename(filepath.Join(fs.path, "LOG"), filepath.Join(fs.path, "LOG.old"))
+	}
+	if fs.logw == nil {
+		var err error
+		fs.logw, err = os.OpenFile(filepath.Join(fs.path, "LOG"), os.O_WRONLY|os.O_CREATE, 0644)
+		if err != nil {
+			return
+		}
+		// Force printDay on new log file.
+		fs.day = 0
+	}
+	fs.printDay(t)
+	hour, min, sec := t.Clock()
+	msec := t.Nanosecond() / 1e3
+	// time
+	fs.buf = itoa(fs.buf[:0], hour, 2)
+	fs.buf = append(fs.buf, ':')
+	fs.buf = itoa(fs.buf, min, 2)
+	fs.buf = append(fs.buf, ':')
+	fs.buf = itoa(fs.buf, sec, 2)
+	fs.buf = append(fs.buf, '.')
+	fs.buf = itoa(fs.buf, msec, 6)
+	fs.buf = append(fs.buf, ' ')
+	// write
+	fs.buf = append(fs.buf, []byte(str)...)
+	fs.buf = append(fs.buf, '\n')
+	n, _ := fs.logw.Write(fs.buf)
+	fs.logSize += int64(n)
+}
+
+func (fs *fileStorage) Log(str string) {
+	if !fs.readOnly {
+		t := time.Now()
+		fs.mu.Lock()
+		defer fs.mu.Unlock()
+		if fs.open < 0 {
+			return
+		}
+		fs.doLog(t, str)
+	}
+}
+
+func (fs *fileStorage) log(str string) {
+	if !fs.readOnly {
+		fs.doLog(time.Now(), str)
+	}
+}
+
+func (fs *fileStorage) setMeta(fd FileDesc) error {
+	content := fsGenName(fd) + "\n"
+	// Check and backup old CURRENT file.
+	currentPath := filepath.Join(fs.path, "CURRENT")
+	if _, err := os.Stat(currentPath); err == nil {
+		b, err := ioutil.ReadFile(currentPath)
+		if err != nil {
+			fs.log(fmt.Sprintf("backup CURRENT: %v", err))
+			return err
+		}
+		if string(b) == content {
+			// Content not changed, do nothing.
+			return nil
+		}
+		if err := writeFileSynced(currentPath+".bak", b, 0644); err != nil {
+			fs.log(fmt.Sprintf("backup CURRENT: %v", err))
+			return err
+		}
+	} else if !os.IsNotExist(err) {
+		return err
+	}
+	path := fmt.Sprintf("%s.%d", filepath.Join(fs.path, "CURRENT"), fd.Num)
+	if err := writeFileSynced(path, []byte(content), 0644); err != nil {
+		fs.log(fmt.Sprintf("create CURRENT.%d: %v", fd.Num, err))
+		return err
+	}
+	// Replace CURRENT file.
+	if err := rename(path, currentPath); err != nil {
+		fs.log(fmt.Sprintf("rename CURRENT.%d: %v", fd.Num, err))
+		return err
+	}
+	// Sync root directory.
+	if err := syncDir(fs.path); err != nil {
+		fs.log(fmt.Sprintf("syncDir: %v", err))
+		return err
+	}
+	return nil
+}
+
+func (fs *fileStorage) SetMeta(fd FileDesc) error {
+	if !FileDescOk(fd) {
+		return ErrInvalidFile
+	}
+	if fs.readOnly {
+		return errReadOnly
+	}
+
+	fs.mu.Lock()
+	defer fs.mu.Unlock()
+	if fs.open < 0 {
+		return ErrClosed
+	}
+	return fs.setMeta(fd)
+}
+
+func (fs *fileStorage) GetMeta() (FileDesc, error) {
+	fs.mu.Lock()
+	defer fs.mu.Unlock()
+	if fs.open < 0 {
+		return FileDesc{}, ErrClosed
+	}
+	dir, err := os.Open(fs.path)
+	if err != nil {
+		return FileDesc{}, err
+	}
+	names, err := dir.Readdirnames(0)
+	// Close the dir first before checking for Readdirnames error.
+	if ce := dir.Close(); ce != nil {
+		fs.log(fmt.Sprintf("close dir: %v", ce))
+	}
+	if err != nil {
+		return FileDesc{}, err
+	}
+	// Try this in order:
+	// - CURRENT.[0-9]+ ('pending rename' file, descending order)
+	// - CURRENT
+	// - CURRENT.bak
+	//
+	// Skip corrupted file or file that point to a missing target file.
+	type currentFile struct {
+		name string
+		fd   FileDesc
+	}
+	tryCurrent := func(name string) (*currentFile, error) {
+		b, err := ioutil.ReadFile(filepath.Join(fs.path, name))
+		if err != nil {
+			if os.IsNotExist(err) {
+				err = os.ErrNotExist
+			}
+			return nil, err
+		}
+		var fd FileDesc
+		if len(b) < 1 || b[len(b)-1] != '\n' || !fsParseNamePtr(string(b[:len(b)-1]), &fd) {
+			fs.log(fmt.Sprintf("%s: corrupted content: %q", name, b))
+			err := &ErrCorrupted{
+				Err: errors.New("leveldb/storage: corrupted or incomplete CURRENT file"),
+			}
+			return nil, err
+		}
+		if _, err := os.Stat(filepath.Join(fs.path, fsGenName(fd))); err != nil {
+			if os.IsNotExist(err) {
+				fs.log(fmt.Sprintf("%s: missing target file: %s", name, fd))
+				err = os.ErrNotExist
+			}
+			return nil, err
+		}
+		return &currentFile{name: name, fd: fd}, nil
+	}
+	tryCurrents := func(names []string) (*currentFile, error) {
+		var (
+			cur *currentFile
+			// Last corruption error.
+			lastCerr error
+		)
+		for _, name := range names {
+			var err error
+			cur, err = tryCurrent(name)
+			if err == nil {
+				break
+			} else if err == os.ErrNotExist {
+				// Fallback to the next file.
+			} else if isCorrupted(err) {
+				lastCerr = err
+				// Fallback to the next file.
+			} else {
+				// In case the error is due to permission, etc.
+				return nil, err
+			}
+		}
+		if cur == nil {
+			err := os.ErrNotExist
+			if lastCerr != nil {
+				err = lastCerr
+			}
+			return nil, err
+		}
+		return cur, nil
+	}
+
+	// Try 'pending rename' files.
+	var nums []int64
+	for _, name := range names {
+		if strings.HasPrefix(name, "CURRENT.") && name != "CURRENT.bak" {
+			i, err := strconv.ParseInt(name[8:], 10, 64)
+			if err == nil {
+				nums = append(nums, i)
+			}
+		}
+	}
+	var (
+		pendCur   *currentFile
+		pendErr   = os.ErrNotExist
+		pendNames []string
+	)
+	if len(nums) > 0 {
+		sort.Sort(sort.Reverse(int64Slice(nums)))
+		pendNames = make([]string, len(nums))
+		for i, num := range nums {
+			pendNames[i] = fmt.Sprintf("CURRENT.%d", num)
+		}
+		pendCur, pendErr = tryCurrents(pendNames)
+		if pendErr != nil && pendErr != os.ErrNotExist && !isCorrupted(pendErr) {
+			return FileDesc{}, pendErr
+		}
+	}
+
+	// Try CURRENT and CURRENT.bak.
+	curCur, curErr := tryCurrents([]string{"CURRENT", "CURRENT.bak"})
+	if curErr != nil && curErr != os.ErrNotExist && !isCorrupted(curErr) {
+		return FileDesc{}, curErr
+	}
+
+	// pendCur takes precedence, but guards against obsolete pendCur.
+	if pendCur != nil && (curCur == nil || pendCur.fd.Num > curCur.fd.Num) {
+		curCur = pendCur
+	}
+
+	if curCur != nil {
+		// Restore CURRENT file to proper state.
+		if !fs.readOnly && (curCur.name != "CURRENT" || len(pendNames) != 0) {
+			// Ignore setMeta errors, however don't delete obsolete files if we
+			// catch error.
+			if err := fs.setMeta(curCur.fd); err == nil {
+				// Remove 'pending rename' files.
+				for _, name := range pendNames {
+					if err := os.Remove(filepath.Join(fs.path, name)); err != nil {
+						fs.log(fmt.Sprintf("remove %s: %v", name, err))
+					}
+				}
+			}
+		}
+		return curCur.fd, nil
+	}
+
+	// Nothing found.
+	if isCorrupted(pendErr) {
+		return FileDesc{}, pendErr
+	}
+	return FileDesc{}, curErr
+}
+
+func (fs *fileStorage) List(ft FileType) (fds []FileDesc, err error) {
+	fs.mu.Lock()
+	defer fs.mu.Unlock()
+	if fs.open < 0 {
+		return nil, ErrClosed
+	}
+	dir, err := os.Open(fs.path)
+	if err != nil {
+		return
+	}
+	names, err := dir.Readdirnames(0)
+	// Close the dir first before checking for Readdirnames error.
+	if cerr := dir.Close(); cerr != nil {
+		fs.log(fmt.Sprintf("close dir: %v", cerr))
+	}
+	if err == nil {
+		for _, name := range names {
+			if fd, ok := fsParseName(name); ok && fd.Type&ft != 0 {
+				fds = append(fds, fd)
+			}
+		}
+	}
+	return
+}
+
+func (fs *fileStorage) Open(fd FileDesc) (Reader, error) {
+	if !FileDescOk(fd) {
+		return nil, ErrInvalidFile
+	}
+
+	fs.mu.Lock()
+	defer fs.mu.Unlock()
+	if fs.open < 0 {
+		return nil, ErrClosed
+	}
+	of, err := os.OpenFile(filepath.Join(fs.path, fsGenName(fd)), os.O_RDONLY, 0)
+	if err != nil {
+		if fsHasOldName(fd) && os.IsNotExist(err) {
+			of, err = os.OpenFile(filepath.Join(fs.path, fsGenOldName(fd)), os.O_RDONLY, 0)
+			if err == nil {
+				goto ok
+			}
+		}
+		return nil, err
+	}
+ok:
+	fs.open++
+	return &fileWrap{File: of, fs: fs, fd: fd}, nil
+}
+
+func (fs *fileStorage) Create(fd FileDesc) (Writer, error) {
+	if !FileDescOk(fd) {
+		return nil, ErrInvalidFile
+	}
+	if fs.readOnly {
+		return nil, errReadOnly
+	}
+
+	fs.mu.Lock()
+	defer fs.mu.Unlock()
+	if fs.open < 0 {
+		return nil, ErrClosed
+	}
+	of, err := os.OpenFile(filepath.Join(fs.path, fsGenName(fd)), os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644)
+	if err != nil {
+		return nil, err
+	}
+	fs.open++
+	return &fileWrap{File: of, fs: fs, fd: fd}, nil
+}
+
+func (fs *fileStorage) Remove(fd FileDesc) error {
+	if !FileDescOk(fd) {
+		return ErrInvalidFile
+	}
+	if fs.readOnly {
+		return errReadOnly
+	}
+
+	fs.mu.Lock()
+	defer fs.mu.Unlock()
+	if fs.open < 0 {
+		return ErrClosed
+	}
+	err := os.Remove(filepath.Join(fs.path, fsGenName(fd)))
+	if err != nil {
+		if fsHasOldName(fd) && os.IsNotExist(err) {
+			if e1 := os.Remove(filepath.Join(fs.path, fsGenOldName(fd))); !os.IsNotExist(e1) {
+				fs.log(fmt.Sprintf("remove %s: %v (old name)", fd, err))
+				err = e1
+			}
+		} else {
+			fs.log(fmt.Sprintf("remove %s: %v", fd, err))
+		}
+	}
+	return err
+}
+
+func (fs *fileStorage) Rename(oldfd, newfd FileDesc) error {
+	if !FileDescOk(oldfd) || !FileDescOk(newfd) {
+		return ErrInvalidFile
+	}
+	if oldfd == newfd {
+		return nil
+	}
+	if fs.readOnly {
+		return errReadOnly
+	}
+
+	fs.mu.Lock()
+	defer fs.mu.Unlock()
+	if fs.open < 0 {
+		return ErrClosed
+	}
+	return rename(filepath.Join(fs.path, fsGenName(oldfd)), filepath.Join(fs.path, fsGenName(newfd)))
+}
+
+func (fs *fileStorage) Close() error {
+	fs.mu.Lock()
+	defer fs.mu.Unlock()
+	if fs.open < 0 {
+		return ErrClosed
+	}
+	// Clear the finalizer.
+	runtime.SetFinalizer(fs, nil)
+
+	if fs.open > 0 {
+		fs.log(fmt.Sprintf("close: warning, %d files still open", fs.open))
+	}
+	fs.open = -1
+	if fs.logw != nil {
+		fs.logw.Close()
+	}
+	return fs.flock.release()
+}
+
+type fileWrap struct {
+	*os.File
+	fs     *fileStorage
+	fd     FileDesc
+	closed bool
+}
+
+func (fw *fileWrap) Sync() error {
+	if err := fw.File.Sync(); err != nil {
+		return err
+	}
+	if fw.fd.Type == TypeManifest {
+		// Also sync parent directory if file type is manifest.
+		// See: https://code.google.com/p/leveldb/issues/detail?id=190.
+		if err := syncDir(fw.fs.path); err != nil {
+			fw.fs.log(fmt.Sprintf("syncDir: %v", err))
+			return err
+		}
+	}
+	return nil
+}
+
+func (fw *fileWrap) Close() error {
+	fw.fs.mu.Lock()
+	defer fw.fs.mu.Unlock()
+	if fw.closed {
+		return ErrClosed
+	}
+	fw.closed = true
+	fw.fs.open--
+	err := fw.File.Close()
+	if err != nil {
+		fw.fs.log(fmt.Sprintf("close %s: %v", fw.fd, err))
+	}
+	return err
+}
+
+func fsGenName(fd FileDesc) string {
+	switch fd.Type {
+	case TypeManifest:
+		return fmt.Sprintf("MANIFEST-%06d", fd.Num)
+	case TypeJournal:
+		return fmt.Sprintf("%06d.log", fd.Num)
+	case TypeTable:
+		return fmt.Sprintf("%06d.ldb", fd.Num)
+	case TypeTemp:
+		return fmt.Sprintf("%06d.tmp", fd.Num)
+	default:
+		panic("invalid file type")
+	}
+}
+
+func fsHasOldName(fd FileDesc) bool {
+	return fd.Type == TypeTable
+}
+
+func fsGenOldName(fd FileDesc) string {
+	switch fd.Type {
+	case TypeTable:
+		return fmt.Sprintf("%06d.sst", fd.Num)
+	}
+	return fsGenName(fd)
+}
+
+func fsParseName(name string) (fd FileDesc, ok bool) {
+	var tail string
+	_, err := fmt.Sscanf(name, "%d.%s", &fd.Num, &tail)
+	if err == nil {
+		switch tail {
+		case "log":
+			fd.Type = TypeJournal
+		case "ldb", "sst":
+			fd.Type = TypeTable
+		case "tmp":
+			fd.Type = TypeTemp
+		default:
+			return
+		}
+		return fd, true
+	}
+	n, _ := fmt.Sscanf(name, "MANIFEST-%d%s", &fd.Num, &tail)
+	if n == 1 {
+		fd.Type = TypeManifest
+		return fd, true
+	}
+	return
+}
+
+func fsParseNamePtr(name string, fd *FileDesc) bool {
+	_fd, ok := fsParseName(name)
+	if fd != nil {
+		*fd = _fd
+	}
+	return ok
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage_nacl.go b/vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage_nacl.go
new file mode 100644
index 000000000..5545aeef2
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage_nacl.go
@@ -0,0 +1,34 @@
+// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// +build nacl
+
+package storage
+
+import (
+	"os"
+	"syscall"
+)
+
+func newFileLock(path string, readOnly bool) (fl fileLock, err error) {
+	return nil, syscall.ENOTSUP
+}
+
+func setFileLock(f *os.File, readOnly, lock bool) error {
+	return syscall.ENOTSUP
+}
+
+func rename(oldpath, newpath string) error {
+	return syscall.ENOTSUP
+}
+
+func isErrInvalid(err error) bool {
+	return false
+}
+
+func syncDir(name string) error {
+	return syscall.ENOTSUP
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage_plan9.go b/vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage_plan9.go
new file mode 100644
index 000000000..b82979801
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage_plan9.go
@@ -0,0 +1,63 @@
+// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package storage
+
+import (
+	"os"
+)
+
+type plan9FileLock struct {
+	f *os.File
+}
+
+func (fl *plan9FileLock) release() error {
+	return fl.f.Close()
+}
+
+func newFileLock(path string, readOnly bool) (fl fileLock, err error) {
+	var (
+		flag int
+		perm os.FileMode
+	)
+	if readOnly {
+		flag = os.O_RDONLY
+	} else {
+		flag = os.O_RDWR
+		perm = os.ModeExclusive
+	}
+	f, err := os.OpenFile(path, flag, perm)
+	if os.IsNotExist(err) {
+		f, err = os.OpenFile(path, flag|os.O_CREATE, perm|0644)
+	}
+	if err != nil {
+		return
+	}
+	fl = &plan9FileLock{f: f}
+	return
+}
+
+func rename(oldpath, newpath string) error {
+	if _, err := os.Stat(newpath); err == nil {
+		if err := os.Remove(newpath); err != nil {
+			return err
+		}
+	}
+
+	return os.Rename(oldpath, newpath)
+}
+
+func syncDir(name string) error {
+	f, err := os.Open(name)
+	if err != nil {
+		return err
+	}
+	defer f.Close()
+	if err := f.Sync(); err != nil {
+		return err
+	}
+	return nil
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage_solaris.go b/vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage_solaris.go
new file mode 100644
index 000000000..79901ee4a
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage_solaris.go
@@ -0,0 +1,81 @@
+// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// +build solaris
+
+package storage
+
+import (
+	"os"
+	"syscall"
+)
+
+type unixFileLock struct {
+	f *os.File
+}
+
+func (fl *unixFileLock) release() error {
+	if err := setFileLock(fl.f, false, false); err != nil {
+		return err
+	}
+	return fl.f.Close()
+}
+
+func newFileLock(path string, readOnly bool) (fl fileLock, err error) {
+	var flag int
+	if readOnly {
+		flag = os.O_RDONLY
+	} else {
+		flag = os.O_RDWR
+	}
+	f, err := os.OpenFile(path, flag, 0)
+	if os.IsNotExist(err) {
+		f, err = os.OpenFile(path, flag|os.O_CREATE, 0644)
+	}
+	if err != nil {
+		return
+	}
+	err = setFileLock(f, readOnly, true)
+	if err != nil {
+		f.Close()
+		return
+	}
+	fl = &unixFileLock{f: f}
+	return
+}
+
+func setFileLock(f *os.File, readOnly, lock bool) error {
+	flock := syscall.Flock_t{
+		Type:   syscall.F_UNLCK,
+		Start:  0,
+		Len:    0,
+		Whence: 1,
+	}
+	if lock {
+		if readOnly {
+			flock.Type = syscall.F_RDLCK
+		} else {
+			flock.Type = syscall.F_WRLCK
+		}
+	}
+	return syscall.FcntlFlock(f.Fd(), syscall.F_SETLK, &flock)
+}
+
+func rename(oldpath, newpath string) error {
+	return os.Rename(oldpath, newpath)
+}
+
+func syncDir(name string) error {
+	f, err := os.Open(name)
+	if err != nil {
+		return err
+	}
+	defer f.Close()
+	if err := f.Sync(); err != nil {
+		return err
+	}
+	return nil
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage_unix.go b/vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage_unix.go
new file mode 100644
index 000000000..d75f66a9e
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage_unix.go
@@ -0,0 +1,98 @@
+// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// +build darwin dragonfly freebsd linux netbsd openbsd
+
+package storage
+
+import (
+	"os"
+	"syscall"
+)
+
+type unixFileLock struct {
+	f *os.File
+}
+
+func (fl *unixFileLock) release() error {
+	if err := setFileLock(fl.f, false, false); err != nil {
+		return err
+	}
+	return fl.f.Close()
+}
+
+func newFileLock(path string, readOnly bool) (fl fileLock, err error) {
+	var flag int
+	if readOnly {
+		flag = os.O_RDONLY
+	} else {
+		flag = os.O_RDWR
+	}
+	f, err := os.OpenFile(path, flag, 0)
+	if os.IsNotExist(err) {
+		f, err = os.OpenFile(path, flag|os.O_CREATE, 0644)
+	}
+	if err != nil {
+		return
+	}
+	err = setFileLock(f, readOnly, true)
+	if err != nil {
+		f.Close()
+		return
+	}
+	fl = &unixFileLock{f: f}
+	return
+}
+
+func setFileLock(f *os.File, readOnly, lock bool) error {
+	how := syscall.LOCK_UN
+	if lock {
+		if readOnly {
+			how = syscall.LOCK_SH
+		} else {
+			how = syscall.LOCK_EX
+		}
+	}
+	return syscall.Flock(int(f.Fd()), how|syscall.LOCK_NB)
+}
+
+func rename(oldpath, newpath string) error {
+	return os.Rename(oldpath, newpath)
+}
+
+func isErrInvalid(err error) bool {
+	if err == os.ErrInvalid {
+		return true
+	}
+	// Go < 1.8
+	if syserr, ok := err.(*os.SyscallError); ok && syserr.Err == syscall.EINVAL {
+		return true
+	}
+	// Go >= 1.8 returns *os.PathError instead
+	if patherr, ok := err.(*os.PathError); ok && patherr.Err == syscall.EINVAL {
+		return true
+	}
+	return false
+}
+
+func syncDir(name string) error {
+	// As per fsync manpage, Linux seems to expect fsync on directory, however
+	// some system don't support this, so we will ignore syscall.EINVAL.
+	//
+	// From fsync(2):
+	//   Calling fsync() does not necessarily ensure that the entry in the
+	//   directory containing the file has also reached disk. For that an
+	//   explicit fsync() on a file descriptor for the directory is also needed.
+	f, err := os.Open(name)
+	if err != nil {
+		return err
+	}
+	defer f.Close()
+	if err := f.Sync(); err != nil && !isErrInvalid(err) {
+		return err
+	}
+	return nil
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage_windows.go b/vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage_windows.go
new file mode 100644
index 000000000..899335fd7
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/storage/file_storage_windows.go
@@ -0,0 +1,78 @@
+// Copyright (c) 2013, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package storage
+
+import (
+	"syscall"
+	"unsafe"
+)
+
+var (
+	modkernel32 = syscall.NewLazyDLL("kernel32.dll")
+
+	procMoveFileExW = modkernel32.NewProc("MoveFileExW")
+)
+
+const (
+	_MOVEFILE_REPLACE_EXISTING = 1
+)
+
+type windowsFileLock struct {
+	fd syscall.Handle
+}
+
+func (fl *windowsFileLock) release() error {
+	return syscall.Close(fl.fd)
+}
+
+func newFileLock(path string, readOnly bool) (fl fileLock, err error) {
+	pathp, err := syscall.UTF16PtrFromString(path)
+	if err != nil {
+		return
+	}
+	var access, shareMode uint32
+	if readOnly {
+		access = syscall.GENERIC_READ
+		shareMode = syscall.FILE_SHARE_READ
+	} else {
+		access = syscall.GENERIC_READ | syscall.GENERIC_WRITE
+	}
+	fd, err := syscall.CreateFile(pathp, access, shareMode, nil, syscall.OPEN_EXISTING, syscall.FILE_ATTRIBUTE_NORMAL, 0)
+	if err == syscall.ERROR_FILE_NOT_FOUND {
+		fd, err = syscall.CreateFile(pathp, access, shareMode, nil, syscall.OPEN_ALWAYS, syscall.FILE_ATTRIBUTE_NORMAL, 0)
+	}
+	if err != nil {
+		return
+	}
+	fl = &windowsFileLock{fd: fd}
+	return
+}
+
+func moveFileEx(from *uint16, to *uint16, flags uint32) error {
+	r1, _, e1 := syscall.Syscall(procMoveFileExW.Addr(), 3, uintptr(unsafe.Pointer(from)), uintptr(unsafe.Pointer(to)), uintptr(flags))
+	if r1 == 0 {
+		if e1 != 0 {
+			return error(e1)
+		}
+		return syscall.EINVAL
+	}
+	return nil
+}
+
+func rename(oldpath, newpath string) error {
+	from, err := syscall.UTF16PtrFromString(oldpath)
+	if err != nil {
+		return err
+	}
+	to, err := syscall.UTF16PtrFromString(newpath)
+	if err != nil {
+		return err
+	}
+	return moveFileEx(from, to, _MOVEFILE_REPLACE_EXISTING)
+}
+
+func syncDir(name string) error { return nil }
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/storage/mem_storage.go b/vendor/github.com/syndtr/goleveldb/leveldb/storage/mem_storage.go
new file mode 100644
index 000000000..838f1bee1
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/storage/mem_storage.go
@@ -0,0 +1,222 @@
+// Copyright (c) 2013, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package storage
+
+import (
+	"bytes"
+	"os"
+	"sync"
+)
+
+const typeShift = 4
+
+// Verify at compile-time that typeShift is large enough to cover all FileType
+// values by confirming that 0 == 0.
+var _ [0]struct{} = [TypeAll >> typeShift]struct{}{}
+
+type memStorageLock struct {
+	ms *memStorage
+}
+
+func (lock *memStorageLock) Unlock() {
+	ms := lock.ms
+	ms.mu.Lock()
+	defer ms.mu.Unlock()
+	if ms.slock == lock {
+		ms.slock = nil
+	}
+	return
+}
+
+// memStorage is a memory-backed storage.
+type memStorage struct {
+	mu    sync.Mutex
+	slock *memStorageLock
+	files map[uint64]*memFile
+	meta  FileDesc
+}
+
+// NewMemStorage returns a new memory-backed storage implementation.
+func NewMemStorage() Storage {
+	return &memStorage{
+		files: make(map[uint64]*memFile),
+	}
+}
+
+func (ms *memStorage) Lock() (Locker, error) {
+	ms.mu.Lock()
+	defer ms.mu.Unlock()
+	if ms.slock != nil {
+		return nil, ErrLocked
+	}
+	ms.slock = &memStorageLock{ms: ms}
+	return ms.slock, nil
+}
+
+func (*memStorage) Log(str string) {}
+
+func (ms *memStorage) SetMeta(fd FileDesc) error {
+	if !FileDescOk(fd) {
+		return ErrInvalidFile
+	}
+
+	ms.mu.Lock()
+	ms.meta = fd
+	ms.mu.Unlock()
+	return nil
+}
+
+func (ms *memStorage) GetMeta() (FileDesc, error) {
+	ms.mu.Lock()
+	defer ms.mu.Unlock()
+	if ms.meta.Zero() {
+		return FileDesc{}, os.ErrNotExist
+	}
+	return ms.meta, nil
+}
+
+func (ms *memStorage) List(ft FileType) ([]FileDesc, error) {
+	ms.mu.Lock()
+	var fds []FileDesc
+	for x := range ms.files {
+		fd := unpackFile(x)
+		if fd.Type&ft != 0 {
+			fds = append(fds, fd)
+		}
+	}
+	ms.mu.Unlock()
+	return fds, nil
+}
+
+func (ms *memStorage) Open(fd FileDesc) (Reader, error) {
+	if !FileDescOk(fd) {
+		return nil, ErrInvalidFile
+	}
+
+	ms.mu.Lock()
+	defer ms.mu.Unlock()
+	if m, exist := ms.files[packFile(fd)]; exist {
+		if m.open {
+			return nil, errFileOpen
+		}
+		m.open = true
+		return &memReader{Reader: bytes.NewReader(m.Bytes()), ms: ms, m: m}, nil
+	}
+	return nil, os.ErrNotExist
+}
+
+func (ms *memStorage) Create(fd FileDesc) (Writer, error) {
+	if !FileDescOk(fd) {
+		return nil, ErrInvalidFile
+	}
+
+	x := packFile(fd)
+	ms.mu.Lock()
+	defer ms.mu.Unlock()
+	m, exist := ms.files[x]
+	if exist {
+		if m.open {
+			return nil, errFileOpen
+		}
+		m.Reset()
+	} else {
+		m = &memFile{}
+		ms.files[x] = m
+	}
+	m.open = true
+	return &memWriter{memFile: m, ms: ms}, nil
+}
+
+func (ms *memStorage) Remove(fd FileDesc) error {
+	if !FileDescOk(fd) {
+		return ErrInvalidFile
+	}
+
+	x := packFile(fd)
+	ms.mu.Lock()
+	defer ms.mu.Unlock()
+	if _, exist := ms.files[x]; exist {
+		delete(ms.files, x)
+		return nil
+	}
+	return os.ErrNotExist
+}
+
+func (ms *memStorage) Rename(oldfd, newfd FileDesc) error {
+	if !FileDescOk(oldfd) || !FileDescOk(newfd) {
+		return ErrInvalidFile
+	}
+	if oldfd == newfd {
+		return nil
+	}
+
+	oldx := packFile(oldfd)
+	newx := packFile(newfd)
+	ms.mu.Lock()
+	defer ms.mu.Unlock()
+	oldm, exist := ms.files[oldx]
+	if !exist {
+		return os.ErrNotExist
+	}
+	newm, exist := ms.files[newx]
+	if (exist && newm.open) || oldm.open {
+		return errFileOpen
+	}
+	delete(ms.files, oldx)
+	ms.files[newx] = oldm
+	return nil
+}
+
+func (*memStorage) Close() error { return nil }
+
+type memFile struct {
+	bytes.Buffer
+	open bool
+}
+
+type memReader struct {
+	*bytes.Reader
+	ms     *memStorage
+	m      *memFile
+	closed bool
+}
+
+func (mr *memReader) Close() error {
+	mr.ms.mu.Lock()
+	defer mr.ms.mu.Unlock()
+	if mr.closed {
+		return ErrClosed
+	}
+	mr.m.open = false
+	return nil
+}
+
+type memWriter struct {
+	*memFile
+	ms     *memStorage
+	closed bool
+}
+
+func (*memWriter) Sync() error { return nil }
+
+func (mw *memWriter) Close() error {
+	mw.ms.mu.Lock()
+	defer mw.ms.mu.Unlock()
+	if mw.closed {
+		return ErrClosed
+	}
+	mw.memFile.open = false
+	return nil
+}
+
+func packFile(fd FileDesc) uint64 {
+	return uint64(fd.Num)<<typeShift | uint64(fd.Type)
+}
+
+func unpackFile(x uint64) FileDesc {
+	return FileDesc{FileType(x) & TypeAll, int64(x >> typeShift)}
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/storage/storage.go b/vendor/github.com/syndtr/goleveldb/leveldb/storage/storage.go
new file mode 100644
index 000000000..4e4a72425
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/storage/storage.go
@@ -0,0 +1,187 @@
+// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Package storage provides storage abstraction for LevelDB.
+package storage
+
+import (
+	"errors"
+	"fmt"
+	"io"
+)
+
+// FileType represent a file type.
+type FileType int
+
+// File types.
+const (
+	TypeManifest FileType = 1 << iota
+	TypeJournal
+	TypeTable
+	TypeTemp
+
+	TypeAll = TypeManifest | TypeJournal | TypeTable | TypeTemp
+)
+
+func (t FileType) String() string {
+	switch t {
+	case TypeManifest:
+		return "manifest"
+	case TypeJournal:
+		return "journal"
+	case TypeTable:
+		return "table"
+	case TypeTemp:
+		return "temp"
+	}
+	return fmt.Sprintf("<unknown:%d>", t)
+}
+
+// Common error.
+var (
+	ErrInvalidFile = errors.New("leveldb/storage: invalid file for argument")
+	ErrLocked      = errors.New("leveldb/storage: already locked")
+	ErrClosed      = errors.New("leveldb/storage: closed")
+)
+
+// ErrCorrupted is the type that wraps errors that indicate corruption of
+// a file. Package storage has its own type instead of using
+// errors.ErrCorrupted to prevent circular import.
+type ErrCorrupted struct {
+	Fd  FileDesc
+	Err error
+}
+
+func isCorrupted(err error) bool {
+	switch err.(type) {
+	case *ErrCorrupted:
+		return true
+	}
+	return false
+}
+
+func (e *ErrCorrupted) Error() string {
+	if !e.Fd.Zero() {
+		return fmt.Sprintf("%v [file=%v]", e.Err, e.Fd)
+	}
+	return e.Err.Error()
+}
+
+// Syncer is the interface that wraps basic Sync method.
+type Syncer interface {
+	// Sync commits the current contents of the file to stable storage.
+	Sync() error
+}
+
+// Reader is the interface that groups the basic Read, Seek, ReadAt and Close
+// methods.
+type Reader interface {
+	io.ReadSeeker
+	io.ReaderAt
+	io.Closer
+}
+
+// Writer is the interface that groups the basic Write, Sync and Close
+// methods.
+type Writer interface {
+	io.WriteCloser
+	Syncer
+}
+
+// Locker is the interface that wraps Unlock method.
+type Locker interface {
+	Unlock()
+}
+
+// FileDesc is a 'file descriptor'.
+type FileDesc struct {
+	Type FileType
+	Num  int64
+}
+
+func (fd FileDesc) String() string {
+	switch fd.Type {
+	case TypeManifest:
+		return fmt.Sprintf("MANIFEST-%06d", fd.Num)
+	case TypeJournal:
+		return fmt.Sprintf("%06d.log", fd.Num)
+	case TypeTable:
+		return fmt.Sprintf("%06d.ldb", fd.Num)
+	case TypeTemp:
+		return fmt.Sprintf("%06d.tmp", fd.Num)
+	default:
+		return fmt.Sprintf("%#x-%d", fd.Type, fd.Num)
+	}
+}
+
+// Zero returns true if fd == (FileDesc{}).
+func (fd FileDesc) Zero() bool {
+	return fd == (FileDesc{})
+}
+
+// FileDescOk returns true if fd is a valid 'file descriptor'.
+func FileDescOk(fd FileDesc) bool {
+	switch fd.Type {
+	case TypeManifest:
+	case TypeJournal:
+	case TypeTable:
+	case TypeTemp:
+	default:
+		return false
+	}
+	return fd.Num >= 0
+}
+
+// Storage is the storage. A storage instance must be safe for concurrent use.
+type Storage interface {
+	// Lock locks the storage. Any subsequent attempt to call Lock will fail
+	// until the last lock released.
+	// Caller should call Unlock method after use.
+	Lock() (Locker, error)
+
+	// Log logs a string. This is used for logging.
+	// An implementation may write to a file, stdout or simply do nothing.
+	Log(str string)
+
+	// SetMeta store 'file descriptor' that can later be acquired using GetMeta
+	// method. The 'file descriptor' should point to a valid file.
+	// SetMeta should be implemented in such way that changes should happen
+	// atomically.
+	SetMeta(fd FileDesc) error
+
+	// GetMeta returns 'file descriptor' stored in meta. The 'file descriptor'
+	// can be updated using SetMeta method.
+	// Returns os.ErrNotExist if meta doesn't store any 'file descriptor', or
+	// 'file descriptor' point to nonexistent file.
+	GetMeta() (FileDesc, error)
+
+	// List returns file descriptors that match the given file types.
+	// The file types may be OR'ed together.
+	List(ft FileType) ([]FileDesc, error)
+
+	// Open opens file with the given 'file descriptor' read-only.
+	// Returns os.ErrNotExist error if the file does not exist.
+	// Returns ErrClosed if the underlying storage is closed.
+	Open(fd FileDesc) (Reader, error)
+
+	// Create creates file with the given 'file descriptor', truncate if already
+	// exist and opens write-only.
+	// Returns ErrClosed if the underlying storage is closed.
+	Create(fd FileDesc) (Writer, error)
+
+	// Remove removes file with the given 'file descriptor'.
+	// Returns ErrClosed if the underlying storage is closed.
+	Remove(fd FileDesc) error
+
+	// Rename renames file from oldfd to newfd.
+	// Returns ErrClosed if the underlying storage is closed.
+	Rename(oldfd, newfd FileDesc) error
+
+	// Close closes the storage.
+	// It is valid to call Close multiple times. Other methods should not be
+	// called after the storage has been closed.
+	Close() error
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/table.go b/vendor/github.com/syndtr/goleveldb/leveldb/table.go
new file mode 100644
index 000000000..1fac60d05
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/table.go
@@ -0,0 +1,531 @@
+// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package leveldb
+
+import (
+	"fmt"
+	"sort"
+	"sync/atomic"
+
+	"github.com/syndtr/goleveldb/leveldb/cache"
+	"github.com/syndtr/goleveldb/leveldb/iterator"
+	"github.com/syndtr/goleveldb/leveldb/opt"
+	"github.com/syndtr/goleveldb/leveldb/storage"
+	"github.com/syndtr/goleveldb/leveldb/table"
+	"github.com/syndtr/goleveldb/leveldb/util"
+)
+
+// tFile holds basic information about a table.
+type tFile struct {
+	fd         storage.FileDesc
+	seekLeft   int32
+	size       int64
+	imin, imax internalKey
+}
+
+// Returns true if given key is after largest key of this table.
+func (t *tFile) after(icmp *iComparer, ukey []byte) bool {
+	return ukey != nil && icmp.uCompare(ukey, t.imax.ukey()) > 0
+}
+
+// Returns true if given key is before smallest key of this table.
+func (t *tFile) before(icmp *iComparer, ukey []byte) bool {
+	return ukey != nil && icmp.uCompare(ukey, t.imin.ukey()) < 0
+}
+
+// Returns true if given key range overlaps with this table key range.
+func (t *tFile) overlaps(icmp *iComparer, umin, umax []byte) bool {
+	return !t.after(icmp, umin) && !t.before(icmp, umax)
+}
+
+// Cosumes one seek and return current seeks left.
+func (t *tFile) consumeSeek() int32 {
+	return atomic.AddInt32(&t.seekLeft, -1)
+}
+
+// Creates new tFile.
+func newTableFile(fd storage.FileDesc, size int64, imin, imax internalKey) *tFile {
+	f := &tFile{
+		fd:   fd,
+		size: size,
+		imin: imin,
+		imax: imax,
+	}
+
+	// We arrange to automatically compact this file after
+	// a certain number of seeks.  Let's assume:
+	//   (1) One seek costs 10ms
+	//   (2) Writing or reading 1MB costs 10ms (100MB/s)
+	//   (3) A compaction of 1MB does 25MB of IO:
+	//         1MB read from this level
+	//         10-12MB read from next level (boundaries may be misaligned)
+	//         10-12MB written to next level
+	// This implies that 25 seeks cost the same as the compaction
+	// of 1MB of data.  I.e., one seek costs approximately the
+	// same as the compaction of 40KB of data.  We are a little
+	// conservative and allow approximately one seek for every 16KB
+	// of data before triggering a compaction.
+	f.seekLeft = int32(size / 16384)
+	if f.seekLeft < 100 {
+		f.seekLeft = 100
+	}
+
+	return f
+}
+
+func tableFileFromRecord(r atRecord) *tFile {
+	return newTableFile(storage.FileDesc{Type: storage.TypeTable, Num: r.num}, r.size, r.imin, r.imax)
+}
+
+// tFiles hold multiple tFile.
+type tFiles []*tFile
+
+func (tf tFiles) Len() int      { return len(tf) }
+func (tf tFiles) Swap(i, j int) { tf[i], tf[j] = tf[j], tf[i] }
+
+func (tf tFiles) nums() string {
+	x := "[ "
+	for i, f := range tf {
+		if i != 0 {
+			x += ", "
+		}
+		x += fmt.Sprint(f.fd.Num)
+	}
+	x += " ]"
+	return x
+}
+
+// Returns true if i smallest key is less than j.
+// This used for sort by key in ascending order.
+func (tf tFiles) lessByKey(icmp *iComparer, i, j int) bool {
+	a, b := tf[i], tf[j]
+	n := icmp.Compare(a.imin, b.imin)
+	if n == 0 {
+		return a.fd.Num < b.fd.Num
+	}
+	return n < 0
+}
+
+// Returns true if i file number is greater than j.
+// This used for sort by file number in descending order.
+func (tf tFiles) lessByNum(i, j int) bool {
+	return tf[i].fd.Num > tf[j].fd.Num
+}
+
+// Sorts tables by key in ascending order.
+func (tf tFiles) sortByKey(icmp *iComparer) {
+	sort.Sort(&tFilesSortByKey{tFiles: tf, icmp: icmp})
+}
+
+// Sorts tables by file number in descending order.
+func (tf tFiles) sortByNum() {
+	sort.Sort(&tFilesSortByNum{tFiles: tf})
+}
+
+// Returns sum of all tables size.
+func (tf tFiles) size() (sum int64) {
+	for _, t := range tf {
+		sum += t.size
+	}
+	return sum
+}
+
+// Searches smallest index of tables whose its smallest
+// key is after or equal with given key.
+func (tf tFiles) searchMin(icmp *iComparer, ikey internalKey) int {
+	return sort.Search(len(tf), func(i int) bool {
+		return icmp.Compare(tf[i].imin, ikey) >= 0
+	})
+}
+
+// Searches smallest index of tables whose its largest
+// key is after or equal with given key.
+func (tf tFiles) searchMax(icmp *iComparer, ikey internalKey) int {
+	return sort.Search(len(tf), func(i int) bool {
+		return icmp.Compare(tf[i].imax, ikey) >= 0
+	})
+}
+
+// Returns true if given key range overlaps with one or more
+// tables key range. If unsorted is true then binary search will not be used.
+func (tf tFiles) overlaps(icmp *iComparer, umin, umax []byte, unsorted bool) bool {
+	if unsorted {
+		// Check against all files.
+		for _, t := range tf {
+			if t.overlaps(icmp, umin, umax) {
+				return true
+			}
+		}
+		return false
+	}
+
+	i := 0
+	if len(umin) > 0 {
+		// Find the earliest possible internal key for min.
+		i = tf.searchMax(icmp, makeInternalKey(nil, umin, keyMaxSeq, keyTypeSeek))
+	}
+	if i >= len(tf) {
+		// Beginning of range is after all files, so no overlap.
+		return false
+	}
+	return !tf[i].before(icmp, umax)
+}
+
+// Returns tables whose its key range overlaps with given key range.
+// Range will be expanded if ukey found hop across tables.
+// If overlapped is true then the search will be restarted if umax
+// expanded.
+// The dst content will be overwritten.
+func (tf tFiles) getOverlaps(dst tFiles, icmp *iComparer, umin, umax []byte, overlapped bool) tFiles {
+	dst = dst[:0]
+	for i := 0; i < len(tf); {
+		t := tf[i]
+		if t.overlaps(icmp, umin, umax) {
+			if umin != nil && icmp.uCompare(t.imin.ukey(), umin) < 0 {
+				umin = t.imin.ukey()
+				dst = dst[:0]
+				i = 0
+				continue
+			} else if umax != nil && icmp.uCompare(t.imax.ukey(), umax) > 0 {
+				umax = t.imax.ukey()
+				// Restart search if it is overlapped.
+				if overlapped {
+					dst = dst[:0]
+					i = 0
+					continue
+				}
+			}
+
+			dst = append(dst, t)
+		}
+		i++
+	}
+
+	return dst
+}
+
+// Returns tables key range.
+func (tf tFiles) getRange(icmp *iComparer) (imin, imax internalKey) {
+	for i, t := range tf {
+		if i == 0 {
+			imin, imax = t.imin, t.imax
+			continue
+		}
+		if icmp.Compare(t.imin, imin) < 0 {
+			imin = t.imin
+		}
+		if icmp.Compare(t.imax, imax) > 0 {
+			imax = t.imax
+		}
+	}
+
+	return
+}
+
+// Creates iterator index from tables.
+func (tf tFiles) newIndexIterator(tops *tOps, icmp *iComparer, slice *util.Range, ro *opt.ReadOptions) iterator.IteratorIndexer {
+	if slice != nil {
+		var start, limit int
+		if slice.Start != nil {
+			start = tf.searchMax(icmp, internalKey(slice.Start))
+		}
+		if slice.Limit != nil {
+			limit = tf.searchMin(icmp, internalKey(slice.Limit))
+		} else {
+			limit = tf.Len()
+		}
+		tf = tf[start:limit]
+	}
+	return iterator.NewArrayIndexer(&tFilesArrayIndexer{
+		tFiles: tf,
+		tops:   tops,
+		icmp:   icmp,
+		slice:  slice,
+		ro:     ro,
+	})
+}
+
+// Tables iterator index.
+type tFilesArrayIndexer struct {
+	tFiles
+	tops  *tOps
+	icmp  *iComparer
+	slice *util.Range
+	ro    *opt.ReadOptions
+}
+
+func (a *tFilesArrayIndexer) Search(key []byte) int {
+	return a.searchMax(a.icmp, internalKey(key))
+}
+
+func (a *tFilesArrayIndexer) Get(i int) iterator.Iterator {
+	if i == 0 || i == a.Len()-1 {
+		return a.tops.newIterator(a.tFiles[i], a.slice, a.ro)
+	}
+	return a.tops.newIterator(a.tFiles[i], nil, a.ro)
+}
+
+// Helper type for sortByKey.
+type tFilesSortByKey struct {
+	tFiles
+	icmp *iComparer
+}
+
+func (x *tFilesSortByKey) Less(i, j int) bool {
+	return x.lessByKey(x.icmp, i, j)
+}
+
+// Helper type for sortByNum.
+type tFilesSortByNum struct {
+	tFiles
+}
+
+func (x *tFilesSortByNum) Less(i, j int) bool {
+	return x.lessByNum(i, j)
+}
+
+// Table operations.
+type tOps struct {
+	s            *session
+	noSync       bool
+	evictRemoved bool
+	cache        *cache.Cache
+	bcache       *cache.Cache
+	bpool        *util.BufferPool
+}
+
+// Creates an empty table and returns table writer.
+func (t *tOps) create() (*tWriter, error) {
+	fd := storage.FileDesc{Type: storage.TypeTable, Num: t.s.allocFileNum()}
+	fw, err := t.s.stor.Create(fd)
+	if err != nil {
+		return nil, err
+	}
+	return &tWriter{
+		t:  t,
+		fd: fd,
+		w:  fw,
+		tw: table.NewWriter(fw, t.s.o.Options),
+	}, nil
+}
+
+// Builds table from src iterator.
+func (t *tOps) createFrom(src iterator.Iterator) (f *tFile, n int, err error) {
+	w, err := t.create()
+	if err != nil {
+		return
+	}
+
+	defer func() {
+		if err != nil {
+			w.drop()
+		}
+	}()
+
+	for src.Next() {
+		err = w.append(src.Key(), src.Value())
+		if err != nil {
+			return
+		}
+	}
+	err = src.Error()
+	if err != nil {
+		return
+	}
+
+	n = w.tw.EntriesLen()
+	f, err = w.finish()
+	return
+}
+
+// Opens table. It returns a cache handle, which should
+// be released after use.
+func (t *tOps) open(f *tFile) (ch *cache.Handle, err error) {
+	ch = t.cache.Get(0, uint64(f.fd.Num), func() (size int, value cache.Value) {
+		var r storage.Reader
+		r, err = t.s.stor.Open(f.fd)
+		if err != nil {
+			return 0, nil
+		}
+
+		var bcache *cache.NamespaceGetter
+		if t.bcache != nil {
+			bcache = &cache.NamespaceGetter{Cache: t.bcache, NS: uint64(f.fd.Num)}
+		}
+
+		var tr *table.Reader
+		tr, err = table.NewReader(r, f.size, f.fd, bcache, t.bpool, t.s.o.Options)
+		if err != nil {
+			r.Close()
+			return 0, nil
+		}
+		return 1, tr
+
+	})
+	if ch == nil && err == nil {
+		err = ErrClosed
+	}
+	return
+}
+
+// Finds key/value pair whose key is greater than or equal to the
+// given key.
+func (t *tOps) find(f *tFile, key []byte, ro *opt.ReadOptions) (rkey, rvalue []byte, err error) {
+	ch, err := t.open(f)
+	if err != nil {
+		return nil, nil, err
+	}
+	defer ch.Release()
+	return ch.Value().(*table.Reader).Find(key, true, ro)
+}
+
+// Finds key that is greater than or equal to the given key.
+func (t *tOps) findKey(f *tFile, key []byte, ro *opt.ReadOptions) (rkey []byte, err error) {
+	ch, err := t.open(f)
+	if err != nil {
+		return nil, err
+	}
+	defer ch.Release()
+	return ch.Value().(*table.Reader).FindKey(key, true, ro)
+}
+
+// Returns approximate offset of the given key.
+func (t *tOps) offsetOf(f *tFile, key []byte) (offset int64, err error) {
+	ch, err := t.open(f)
+	if err != nil {
+		return
+	}
+	defer ch.Release()
+	return ch.Value().(*table.Reader).OffsetOf(key)
+}
+
+// Creates an iterator from the given table.
+func (t *tOps) newIterator(f *tFile, slice *util.Range, ro *opt.ReadOptions) iterator.Iterator {
+	ch, err := t.open(f)
+	if err != nil {
+		return iterator.NewEmptyIterator(err)
+	}
+	iter := ch.Value().(*table.Reader).NewIterator(slice, ro)
+	iter.SetReleaser(ch)
+	return iter
+}
+
+// Removes table from persistent storage. It waits until
+// no one use the the table.
+func (t *tOps) remove(f *tFile) {
+	t.cache.Delete(0, uint64(f.fd.Num), func() {
+		if err := t.s.stor.Remove(f.fd); err != nil {
+			t.s.logf("table@remove removing @%d %q", f.fd.Num, err)
+		} else {
+			t.s.logf("table@remove removed @%d", f.fd.Num)
+		}
+		if t.evictRemoved && t.bcache != nil {
+			t.bcache.EvictNS(uint64(f.fd.Num))
+		}
+	})
+}
+
+// Closes the table ops instance. It will close all tables,
+// regadless still used or not.
+func (t *tOps) close() {
+	t.bpool.Close()
+	t.cache.Close()
+	if t.bcache != nil {
+		t.bcache.CloseWeak()
+	}
+}
+
+// Creates new initialized table ops instance.
+func newTableOps(s *session) *tOps {
+	var (
+		cacher cache.Cacher
+		bcache *cache.Cache
+		bpool  *util.BufferPool
+	)
+	if s.o.GetOpenFilesCacheCapacity() > 0 {
+		cacher = cache.NewLRU(s.o.GetOpenFilesCacheCapacity())
+	}
+	if !s.o.GetDisableBlockCache() {
+		var bcacher cache.Cacher
+		if s.o.GetBlockCacheCapacity() > 0 {
+			bcacher = s.o.GetBlockCacher().New(s.o.GetBlockCacheCapacity())
+		}
+		bcache = cache.NewCache(bcacher)
+	}
+	if !s.o.GetDisableBufferPool() {
+		bpool = util.NewBufferPool(s.o.GetBlockSize() + 5)
+	}
+	return &tOps{
+		s:            s,
+		noSync:       s.o.GetNoSync(),
+		evictRemoved: s.o.GetBlockCacheEvictRemoved(),
+		cache:        cache.NewCache(cacher),
+		bcache:       bcache,
+		bpool:        bpool,
+	}
+}
+
+// tWriter wraps the table writer. It keep track of file descriptor
+// and added key range.
+type tWriter struct {
+	t *tOps
+
+	fd storage.FileDesc
+	w  storage.Writer
+	tw *table.Writer
+
+	first, last []byte
+}
+
+// Append key/value pair to the table.
+func (w *tWriter) append(key, value []byte) error {
+	if w.first == nil {
+		w.first = append([]byte{}, key...)
+	}
+	w.last = append(w.last[:0], key...)
+	return w.tw.Append(key, value)
+}
+
+// Returns true if the table is empty.
+func (w *tWriter) empty() bool {
+	return w.first == nil
+}
+
+// Closes the storage.Writer.
+func (w *tWriter) close() {
+	if w.w != nil {
+		w.w.Close()
+		w.w = nil
+	}
+}
+
+// Finalizes the table and returns table file.
+func (w *tWriter) finish() (f *tFile, err error) {
+	defer w.close()
+	err = w.tw.Close()
+	if err != nil {
+		return
+	}
+	if !w.t.noSync {
+		err = w.w.Sync()
+		if err != nil {
+			return
+		}
+	}
+	f = newTableFile(w.fd, int64(w.tw.BytesLen()), internalKey(w.first), internalKey(w.last))
+	return
+}
+
+// Drops the table.
+func (w *tWriter) drop() {
+	w.close()
+	w.t.s.stor.Remove(w.fd)
+	w.t.s.reuseFileNum(w.fd.Num)
+	w.tw = nil
+	w.first = nil
+	w.last = nil
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/table/reader.go b/vendor/github.com/syndtr/goleveldb/leveldb/table/reader.go
new file mode 100644
index 000000000..496feb6fb
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/table/reader.go
@@ -0,0 +1,1139 @@
+// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package table
+
+import (
+	"encoding/binary"
+	"fmt"
+	"io"
+	"sort"
+	"strings"
+	"sync"
+
+	"github.com/golang/snappy"
+
+	"github.com/syndtr/goleveldb/leveldb/cache"
+	"github.com/syndtr/goleveldb/leveldb/comparer"
+	"github.com/syndtr/goleveldb/leveldb/errors"
+	"github.com/syndtr/goleveldb/leveldb/filter"
+	"github.com/syndtr/goleveldb/leveldb/iterator"
+	"github.com/syndtr/goleveldb/leveldb/opt"
+	"github.com/syndtr/goleveldb/leveldb/storage"
+	"github.com/syndtr/goleveldb/leveldb/util"
+)
+
+// Reader errors.
+var (
+	ErrNotFound       = errors.ErrNotFound
+	ErrReaderReleased = errors.New("leveldb/table: reader released")
+	ErrIterReleased   = errors.New("leveldb/table: iterator released")
+)
+
+// ErrCorrupted describes error due to corruption. This error will be wrapped
+// with errors.ErrCorrupted.
+type ErrCorrupted struct {
+	Pos    int64
+	Size   int64
+	Kind   string
+	Reason string
+}
+
+func (e *ErrCorrupted) Error() string {
+	return fmt.Sprintf("leveldb/table: corruption on %s (pos=%d): %s", e.Kind, e.Pos, e.Reason)
+}
+
+func max(x, y int) int {
+	if x > y {
+		return x
+	}
+	return y
+}
+
+type block struct {
+	bpool          *util.BufferPool
+	bh             blockHandle
+	data           []byte
+	restartsLen    int
+	restartsOffset int
+}
+
+func (b *block) seek(cmp comparer.Comparer, rstart, rlimit int, key []byte) (index, offset int, err error) {
+	index = sort.Search(b.restartsLen-rstart-(b.restartsLen-rlimit), func(i int) bool {
+		offset := int(binary.LittleEndian.Uint32(b.data[b.restartsOffset+4*(rstart+i):]))
+		offset++                                    // shared always zero, since this is a restart point
+		v1, n1 := binary.Uvarint(b.data[offset:])   // key length
+		_, n2 := binary.Uvarint(b.data[offset+n1:]) // value length
+		m := offset + n1 + n2
+		return cmp.Compare(b.data[m:m+int(v1)], key) > 0
+	}) + rstart - 1
+	if index < rstart {
+		// The smallest key is greater-than key sought.
+		index = rstart
+	}
+	offset = int(binary.LittleEndian.Uint32(b.data[b.restartsOffset+4*index:]))
+	return
+}
+
+func (b *block) restartIndex(rstart, rlimit, offset int) int {
+	return sort.Search(b.restartsLen-rstart-(b.restartsLen-rlimit), func(i int) bool {
+		return int(binary.LittleEndian.Uint32(b.data[b.restartsOffset+4*(rstart+i):])) > offset
+	}) + rstart - 1
+}
+
+func (b *block) restartOffset(index int) int {
+	return int(binary.LittleEndian.Uint32(b.data[b.restartsOffset+4*index:]))
+}
+
+func (b *block) entry(offset int) (key, value []byte, nShared, n int, err error) {
+	if offset >= b.restartsOffset {
+		if offset != b.restartsOffset {
+			err = &ErrCorrupted{Reason: "entries offset not aligned"}
+		}
+		return
+	}
+	v0, n0 := binary.Uvarint(b.data[offset:])       // Shared prefix length
+	v1, n1 := binary.Uvarint(b.data[offset+n0:])    // Key length
+	v2, n2 := binary.Uvarint(b.data[offset+n0+n1:]) // Value length
+	m := n0 + n1 + n2
+	n = m + int(v1) + int(v2)
+	if n0 <= 0 || n1 <= 0 || n2 <= 0 || offset+n > b.restartsOffset {
+		err = &ErrCorrupted{Reason: "entries corrupted"}
+		return
+	}
+	key = b.data[offset+m : offset+m+int(v1)]
+	value = b.data[offset+m+int(v1) : offset+n]
+	nShared = int(v0)
+	return
+}
+
+func (b *block) Release() {
+	b.bpool.Put(b.data)
+	b.bpool = nil
+	b.data = nil
+}
+
+type dir int
+
+const (
+	dirReleased dir = iota - 1
+	dirSOI
+	dirEOI
+	dirBackward
+	dirForward
+)
+
+type blockIter struct {
+	tr            *Reader
+	block         *block
+	blockReleaser util.Releaser
+	releaser      util.Releaser
+	key, value    []byte
+	offset        int
+	// Previous offset, only filled by Next.
+	prevOffset   int
+	prevNode     []int
+	prevKeys     []byte
+	restartIndex int
+	// Iterator direction.
+	dir dir
+	// Restart index slice range.
+	riStart int
+	riLimit int
+	// Offset slice range.
+	offsetStart     int
+	offsetRealStart int
+	offsetLimit     int
+	// Error.
+	err error
+}
+
+func (i *blockIter) sErr(err error) {
+	i.err = err
+	i.key = nil
+	i.value = nil
+	i.prevNode = nil
+	i.prevKeys = nil
+}
+
+func (i *blockIter) reset() {
+	if i.dir == dirBackward {
+		i.prevNode = i.prevNode[:0]
+		i.prevKeys = i.prevKeys[:0]
+	}
+	i.restartIndex = i.riStart
+	i.offset = i.offsetStart
+	i.dir = dirSOI
+	i.key = i.key[:0]
+	i.value = nil
+}
+
+func (i *blockIter) isFirst() bool {
+	switch i.dir {
+	case dirForward:
+		return i.prevOffset == i.offsetRealStart
+	case dirBackward:
+		return len(i.prevNode) == 1 && i.restartIndex == i.riStart
+	}
+	return false
+}
+
+func (i *blockIter) isLast() bool {
+	switch i.dir {
+	case dirForward, dirBackward:
+		return i.offset == i.offsetLimit
+	}
+	return false
+}
+
+func (i *blockIter) First() bool {
+	if i.err != nil {
+		return false
+	} else if i.dir == dirReleased {
+		i.err = ErrIterReleased
+		return false
+	}
+
+	if i.dir == dirBackward {
+		i.prevNode = i.prevNode[:0]
+		i.prevKeys = i.prevKeys[:0]
+	}
+	i.dir = dirSOI
+	return i.Next()
+}
+
+func (i *blockIter) Last() bool {
+	if i.err != nil {
+		return false
+	} else if i.dir == dirReleased {
+		i.err = ErrIterReleased
+		return false
+	}
+
+	if i.dir == dirBackward {
+		i.prevNode = i.prevNode[:0]
+		i.prevKeys = i.prevKeys[:0]
+	}
+	i.dir = dirEOI
+	return i.Prev()
+}
+
+func (i *blockIter) Seek(key []byte) bool {
+	if i.err != nil {
+		return false
+	} else if i.dir == dirReleased {
+		i.err = ErrIterReleased
+		return false
+	}
+
+	ri, offset, err := i.block.seek(i.tr.cmp, i.riStart, i.riLimit, key)
+	if err != nil {
+		i.sErr(err)
+		return false
+	}
+	i.restartIndex = ri
+	i.offset = max(i.offsetStart, offset)
+	if i.dir == dirSOI || i.dir == dirEOI {
+		i.dir = dirForward
+	}
+	for i.Next() {
+		if i.tr.cmp.Compare(i.key, key) >= 0 {
+			return true
+		}
+	}
+	return false
+}
+
+func (i *blockIter) Next() bool {
+	if i.dir == dirEOI || i.err != nil {
+		return false
+	} else if i.dir == dirReleased {
+		i.err = ErrIterReleased
+		return false
+	}
+
+	if i.dir == dirSOI {
+		i.restartIndex = i.riStart
+		i.offset = i.offsetStart
+	} else if i.dir == dirBackward {
+		i.prevNode = i.prevNode[:0]
+		i.prevKeys = i.prevKeys[:0]
+	}
+	for i.offset < i.offsetRealStart {
+		key, value, nShared, n, err := i.block.entry(i.offset)
+		if err != nil {
+			i.sErr(i.tr.fixErrCorruptedBH(i.block.bh, err))
+			return false
+		}
+		if n == 0 {
+			i.dir = dirEOI
+			return false
+		}
+		i.key = append(i.key[:nShared], key...)
+		i.value = value
+		i.offset += n
+	}
+	if i.offset >= i.offsetLimit {
+		i.dir = dirEOI
+		if i.offset != i.offsetLimit {
+			i.sErr(i.tr.newErrCorruptedBH(i.block.bh, "entries offset not aligned"))
+		}
+		return false
+	}
+	key, value, nShared, n, err := i.block.entry(i.offset)
+	if err != nil {
+		i.sErr(i.tr.fixErrCorruptedBH(i.block.bh, err))
+		return false
+	}
+	if n == 0 {
+		i.dir = dirEOI
+		return false
+	}
+	i.key = append(i.key[:nShared], key...)
+	i.value = value
+	i.prevOffset = i.offset
+	i.offset += n
+	i.dir = dirForward
+	return true
+}
+
+func (i *blockIter) Prev() bool {
+	if i.dir == dirSOI || i.err != nil {
+		return false
+	} else if i.dir == dirReleased {
+		i.err = ErrIterReleased
+		return false
+	}
+
+	var ri int
+	if i.dir == dirForward {
+		// Change direction.
+		i.offset = i.prevOffset
+		if i.offset == i.offsetRealStart {
+			i.dir = dirSOI
+			return false
+		}
+		ri = i.block.restartIndex(i.restartIndex, i.riLimit, i.offset)
+		i.dir = dirBackward
+	} else if i.dir == dirEOI {
+		// At the end of iterator.
+		i.restartIndex = i.riLimit
+		i.offset = i.offsetLimit
+		if i.offset == i.offsetRealStart {
+			i.dir = dirSOI
+			return false
+		}
+		ri = i.riLimit - 1
+		i.dir = dirBackward
+	} else if len(i.prevNode) == 1 {
+		// This is the end of a restart range.
+		i.offset = i.prevNode[0]
+		i.prevNode = i.prevNode[:0]
+		if i.restartIndex == i.riStart {
+			i.dir = dirSOI
+			return false
+		}
+		i.restartIndex--
+		ri = i.restartIndex
+	} else {
+		// In the middle of restart range, get from cache.
+		n := len(i.prevNode) - 3
+		node := i.prevNode[n:]
+		i.prevNode = i.prevNode[:n]
+		// Get the key.
+		ko := node[0]
+		i.key = append(i.key[:0], i.prevKeys[ko:]...)
+		i.prevKeys = i.prevKeys[:ko]
+		// Get the value.
+		vo := node[1]
+		vl := vo + node[2]
+		i.value = i.block.data[vo:vl]
+		i.offset = vl
+		return true
+	}
+	// Build entries cache.
+	i.key = i.key[:0]
+	i.value = nil
+	offset := i.block.restartOffset(ri)
+	if offset == i.offset {
+		ri--
+		if ri < 0 {
+			i.dir = dirSOI
+			return false
+		}
+		offset = i.block.restartOffset(ri)
+	}
+	i.prevNode = append(i.prevNode, offset)
+	for {
+		key, value, nShared, n, err := i.block.entry(offset)
+		if err != nil {
+			i.sErr(i.tr.fixErrCorruptedBH(i.block.bh, err))
+			return false
+		}
+		if offset >= i.offsetRealStart {
+			if i.value != nil {
+				// Appends 3 variables:
+				// 1. Previous keys offset
+				// 2. Value offset in the data block
+				// 3. Value length
+				i.prevNode = append(i.prevNode, len(i.prevKeys), offset-len(i.value), len(i.value))
+				i.prevKeys = append(i.prevKeys, i.key...)
+			}
+			i.value = value
+		}
+		i.key = append(i.key[:nShared], key...)
+		offset += n
+		// Stop if target offset reached.
+		if offset >= i.offset {
+			if offset != i.offset {
+				i.sErr(i.tr.newErrCorruptedBH(i.block.bh, "entries offset not aligned"))
+				return false
+			}
+
+			break
+		}
+	}
+	i.restartIndex = ri
+	i.offset = offset
+	return true
+}
+
+func (i *blockIter) Key() []byte {
+	if i.err != nil || i.dir <= dirEOI {
+		return nil
+	}
+	return i.key
+}
+
+func (i *blockIter) Value() []byte {
+	if i.err != nil || i.dir <= dirEOI {
+		return nil
+	}
+	return i.value
+}
+
+func (i *blockIter) Release() {
+	if i.dir != dirReleased {
+		i.tr = nil
+		i.block = nil
+		i.prevNode = nil
+		i.prevKeys = nil
+		i.key = nil
+		i.value = nil
+		i.dir = dirReleased
+		if i.blockReleaser != nil {
+			i.blockReleaser.Release()
+			i.blockReleaser = nil
+		}
+		if i.releaser != nil {
+			i.releaser.Release()
+			i.releaser = nil
+		}
+	}
+}
+
+func (i *blockIter) SetReleaser(releaser util.Releaser) {
+	if i.dir == dirReleased {
+		panic(util.ErrReleased)
+	}
+	if i.releaser != nil && releaser != nil {
+		panic(util.ErrHasReleaser)
+	}
+	i.releaser = releaser
+}
+
+func (i *blockIter) Valid() bool {
+	return i.err == nil && (i.dir == dirBackward || i.dir == dirForward)
+}
+
+func (i *blockIter) Error() error {
+	return i.err
+}
+
+type filterBlock struct {
+	bpool      *util.BufferPool
+	data       []byte
+	oOffset    int
+	baseLg     uint
+	filtersNum int
+}
+
+func (b *filterBlock) contains(filter filter.Filter, offset uint64, key []byte) bool {
+	i := int(offset >> b.baseLg)
+	if i < b.filtersNum {
+		o := b.data[b.oOffset+i*4:]
+		n := int(binary.LittleEndian.Uint32(o))
+		m := int(binary.LittleEndian.Uint32(o[4:]))
+		if n < m && m <= b.oOffset {
+			return filter.Contains(b.data[n:m], key)
+		} else if n == m {
+			return false
+		}
+	}
+	return true
+}
+
+func (b *filterBlock) Release() {
+	b.bpool.Put(b.data)
+	b.bpool = nil
+	b.data = nil
+}
+
+type indexIter struct {
+	*blockIter
+	tr    *Reader
+	slice *util.Range
+	// Options
+	fillCache bool
+}
+
+func (i *indexIter) Get() iterator.Iterator {
+	value := i.Value()
+	if value == nil {
+		return nil
+	}
+	dataBH, n := decodeBlockHandle(value)
+	if n == 0 {
+		return iterator.NewEmptyIterator(i.tr.newErrCorruptedBH(i.tr.indexBH, "bad data block handle"))
+	}
+
+	var slice *util.Range
+	if i.slice != nil && (i.blockIter.isFirst() || i.blockIter.isLast()) {
+		slice = i.slice
+	}
+	return i.tr.getDataIterErr(dataBH, slice, i.tr.verifyChecksum, i.fillCache)
+}
+
+// Reader is a table reader.
+type Reader struct {
+	mu     sync.RWMutex
+	fd     storage.FileDesc
+	reader io.ReaderAt
+	cache  *cache.NamespaceGetter
+	err    error
+	bpool  *util.BufferPool
+	// Options
+	o              *opt.Options
+	cmp            comparer.Comparer
+	filter         filter.Filter
+	verifyChecksum bool
+
+	dataEnd                   int64
+	metaBH, indexBH, filterBH blockHandle
+	indexBlock                *block
+	filterBlock               *filterBlock
+}
+
+func (r *Reader) blockKind(bh blockHandle) string {
+	switch bh.offset {
+	case r.metaBH.offset:
+		return "meta-block"
+	case r.indexBH.offset:
+		return "index-block"
+	case r.filterBH.offset:
+		if r.filterBH.length > 0 {
+			return "filter-block"
+		}
+	}
+	return "data-block"
+}
+
+func (r *Reader) newErrCorrupted(pos, size int64, kind, reason string) error {
+	return &errors.ErrCorrupted{Fd: r.fd, Err: &ErrCorrupted{Pos: pos, Size: size, Kind: kind, Reason: reason}}
+}
+
+func (r *Reader) newErrCorruptedBH(bh blockHandle, reason string) error {
+	return r.newErrCorrupted(int64(bh.offset), int64(bh.length), r.blockKind(bh), reason)
+}
+
+func (r *Reader) fixErrCorruptedBH(bh blockHandle, err error) error {
+	if cerr, ok := err.(*ErrCorrupted); ok {
+		cerr.Pos = int64(bh.offset)
+		cerr.Size = int64(bh.length)
+		cerr.Kind = r.blockKind(bh)
+		return &errors.ErrCorrupted{Fd: r.fd, Err: cerr}
+	}
+	return err
+}
+
+func (r *Reader) readRawBlock(bh blockHandle, verifyChecksum bool) ([]byte, error) {
+	data := r.bpool.Get(int(bh.length + blockTrailerLen))
+	if _, err := r.reader.ReadAt(data, int64(bh.offset)); err != nil && err != io.EOF {
+		return nil, err
+	}
+
+	if verifyChecksum {
+		n := bh.length + 1
+		checksum0 := binary.LittleEndian.Uint32(data[n:])
+		checksum1 := util.NewCRC(data[:n]).Value()
+		if checksum0 != checksum1 {
+			r.bpool.Put(data)
+			return nil, r.newErrCorruptedBH(bh, fmt.Sprintf("checksum mismatch, want=%#x got=%#x", checksum0, checksum1))
+		}
+	}
+
+	switch data[bh.length] {
+	case blockTypeNoCompression:
+		data = data[:bh.length]
+	case blockTypeSnappyCompression:
+		decLen, err := snappy.DecodedLen(data[:bh.length])
+		if err != nil {
+			r.bpool.Put(data)
+			return nil, r.newErrCorruptedBH(bh, err.Error())
+		}
+		decData := r.bpool.Get(decLen)
+		decData, err = snappy.Decode(decData, data[:bh.length])
+		r.bpool.Put(data)
+		if err != nil {
+			r.bpool.Put(decData)
+			return nil, r.newErrCorruptedBH(bh, err.Error())
+		}
+		data = decData
+	default:
+		r.bpool.Put(data)
+		return nil, r.newErrCorruptedBH(bh, fmt.Sprintf("unknown compression type %#x", data[bh.length]))
+	}
+	return data, nil
+}
+
+func (r *Reader) readBlock(bh blockHandle, verifyChecksum bool) (*block, error) {
+	data, err := r.readRawBlock(bh, verifyChecksum)
+	if err != nil {
+		return nil, err
+	}
+	restartsLen := int(binary.LittleEndian.Uint32(data[len(data)-4:]))
+	b := &block{
+		bpool:          r.bpool,
+		bh:             bh,
+		data:           data,
+		restartsLen:    restartsLen,
+		restartsOffset: len(data) - (restartsLen+1)*4,
+	}
+	return b, nil
+}
+
+func (r *Reader) readBlockCached(bh blockHandle, verifyChecksum, fillCache bool) (*block, util.Releaser, error) {
+	if r.cache != nil {
+		var (
+			err error
+			ch  *cache.Handle
+		)
+		if fillCache {
+			ch = r.cache.Get(bh.offset, func() (size int, value cache.Value) {
+				var b *block
+				b, err = r.readBlock(bh, verifyChecksum)
+				if err != nil {
+					return 0, nil
+				}
+				return cap(b.data), b
+			})
+		} else {
+			ch = r.cache.Get(bh.offset, nil)
+		}
+		if ch != nil {
+			b, ok := ch.Value().(*block)
+			if !ok {
+				ch.Release()
+				return nil, nil, errors.New("leveldb/table: inconsistent block type")
+			}
+			return b, ch, err
+		} else if err != nil {
+			return nil, nil, err
+		}
+	}
+
+	b, err := r.readBlock(bh, verifyChecksum)
+	return b, b, err
+}
+
+func (r *Reader) readFilterBlock(bh blockHandle) (*filterBlock, error) {
+	data, err := r.readRawBlock(bh, true)
+	if err != nil {
+		return nil, err
+	}
+	n := len(data)
+	if n < 5 {
+		return nil, r.newErrCorruptedBH(bh, "too short")
+	}
+	m := n - 5
+	oOffset := int(binary.LittleEndian.Uint32(data[m:]))
+	if oOffset > m {
+		return nil, r.newErrCorruptedBH(bh, "invalid data-offsets offset")
+	}
+	b := &filterBlock{
+		bpool:      r.bpool,
+		data:       data,
+		oOffset:    oOffset,
+		baseLg:     uint(data[n-1]),
+		filtersNum: (m - oOffset) / 4,
+	}
+	return b, nil
+}
+
+func (r *Reader) readFilterBlockCached(bh blockHandle, fillCache bool) (*filterBlock, util.Releaser, error) {
+	if r.cache != nil {
+		var (
+			err error
+			ch  *cache.Handle
+		)
+		if fillCache {
+			ch = r.cache.Get(bh.offset, func() (size int, value cache.Value) {
+				var b *filterBlock
+				b, err = r.readFilterBlock(bh)
+				if err != nil {
+					return 0, nil
+				}
+				return cap(b.data), b
+			})
+		} else {
+			ch = r.cache.Get(bh.offset, nil)
+		}
+		if ch != nil {
+			b, ok := ch.Value().(*filterBlock)
+			if !ok {
+				ch.Release()
+				return nil, nil, errors.New("leveldb/table: inconsistent block type")
+			}
+			return b, ch, err
+		} else if err != nil {
+			return nil, nil, err
+		}
+	}
+
+	b, err := r.readFilterBlock(bh)
+	return b, b, err
+}
+
+func (r *Reader) getIndexBlock(fillCache bool) (b *block, rel util.Releaser, err error) {
+	if r.indexBlock == nil {
+		return r.readBlockCached(r.indexBH, true, fillCache)
+	}
+	return r.indexBlock, util.NoopReleaser{}, nil
+}
+
+func (r *Reader) getFilterBlock(fillCache bool) (*filterBlock, util.Releaser, error) {
+	if r.filterBlock == nil {
+		return r.readFilterBlockCached(r.filterBH, fillCache)
+	}
+	return r.filterBlock, util.NoopReleaser{}, nil
+}
+
+func (r *Reader) newBlockIter(b *block, bReleaser util.Releaser, slice *util.Range, inclLimit bool) *blockIter {
+	bi := &blockIter{
+		tr:            r,
+		block:         b,
+		blockReleaser: bReleaser,
+		// Valid key should never be nil.
+		key:             make([]byte, 0),
+		dir:             dirSOI,
+		riStart:         0,
+		riLimit:         b.restartsLen,
+		offsetStart:     0,
+		offsetRealStart: 0,
+		offsetLimit:     b.restartsOffset,
+	}
+	if slice != nil {
+		if slice.Start != nil {
+			if bi.Seek(slice.Start) {
+				bi.riStart = b.restartIndex(bi.restartIndex, b.restartsLen, bi.prevOffset)
+				bi.offsetStart = b.restartOffset(bi.riStart)
+				bi.offsetRealStart = bi.prevOffset
+			} else {
+				bi.riStart = b.restartsLen
+				bi.offsetStart = b.restartsOffset
+				bi.offsetRealStart = b.restartsOffset
+			}
+		}
+		if slice.Limit != nil {
+			if bi.Seek(slice.Limit) && (!inclLimit || bi.Next()) {
+				bi.offsetLimit = bi.prevOffset
+				bi.riLimit = bi.restartIndex + 1
+			}
+		}
+		bi.reset()
+		if bi.offsetStart > bi.offsetLimit {
+			bi.sErr(errors.New("leveldb/table: invalid slice range"))
+		}
+	}
+	return bi
+}
+
+func (r *Reader) getDataIter(dataBH blockHandle, slice *util.Range, verifyChecksum, fillCache bool) iterator.Iterator {
+	b, rel, err := r.readBlockCached(dataBH, verifyChecksum, fillCache)
+	if err != nil {
+		return iterator.NewEmptyIterator(err)
+	}
+	return r.newBlockIter(b, rel, slice, false)
+}
+
+func (r *Reader) getDataIterErr(dataBH blockHandle, slice *util.Range, verifyChecksum, fillCache bool) iterator.Iterator {
+	r.mu.RLock()
+	defer r.mu.RUnlock()
+
+	if r.err != nil {
+		return iterator.NewEmptyIterator(r.err)
+	}
+
+	return r.getDataIter(dataBH, slice, verifyChecksum, fillCache)
+}
+
+// NewIterator creates an iterator from the table.
+//
+// Slice allows slicing the iterator to only contains keys in the given
+// range. A nil Range.Start is treated as a key before all keys in the
+// table. And a nil Range.Limit is treated as a key after all keys in
+// the table.
+//
+// WARNING: Any slice returned by interator (e.g. slice returned by calling
+// Iterator.Key() or Iterator.Key() methods), its content should not be modified
+// unless noted otherwise.
+//
+// The returned iterator is not safe for concurrent use and should be released
+// after use.
+//
+// Also read Iterator documentation of the leveldb/iterator package.
+func (r *Reader) NewIterator(slice *util.Range, ro *opt.ReadOptions) iterator.Iterator {
+	r.mu.RLock()
+	defer r.mu.RUnlock()
+
+	if r.err != nil {
+		return iterator.NewEmptyIterator(r.err)
+	}
+
+	fillCache := !ro.GetDontFillCache()
+	indexBlock, rel, err := r.getIndexBlock(fillCache)
+	if err != nil {
+		return iterator.NewEmptyIterator(err)
+	}
+	index := &indexIter{
+		blockIter: r.newBlockIter(indexBlock, rel, slice, true),
+		tr:        r,
+		slice:     slice,
+		fillCache: !ro.GetDontFillCache(),
+	}
+	return iterator.NewIndexedIterator(index, opt.GetStrict(r.o, ro, opt.StrictReader))
+}
+
+func (r *Reader) find(key []byte, filtered bool, ro *opt.ReadOptions, noValue bool) (rkey, value []byte, err error) {
+	r.mu.RLock()
+	defer r.mu.RUnlock()
+
+	if r.err != nil {
+		err = r.err
+		return
+	}
+
+	indexBlock, rel, err := r.getIndexBlock(true)
+	if err != nil {
+		return
+	}
+	defer rel.Release()
+
+	index := r.newBlockIter(indexBlock, nil, nil, true)
+	defer index.Release()
+
+	if !index.Seek(key) {
+		if err = index.Error(); err == nil {
+			err = ErrNotFound
+		}
+		return
+	}
+
+	dataBH, n := decodeBlockHandle(index.Value())
+	if n == 0 {
+		r.err = r.newErrCorruptedBH(r.indexBH, "bad data block handle")
+		return nil, nil, r.err
+	}
+
+	// The filter should only used for exact match.
+	if filtered && r.filter != nil {
+		filterBlock, frel, ferr := r.getFilterBlock(true)
+		if ferr == nil {
+			if !filterBlock.contains(r.filter, dataBH.offset, key) {
+				frel.Release()
+				return nil, nil, ErrNotFound
+			}
+			frel.Release()
+		} else if !errors.IsCorrupted(ferr) {
+			return nil, nil, ferr
+		}
+	}
+
+	data := r.getDataIter(dataBH, nil, r.verifyChecksum, !ro.GetDontFillCache())
+	if !data.Seek(key) {
+		data.Release()
+		if err = data.Error(); err != nil {
+			return
+		}
+
+		// The nearest greater-than key is the first key of the next block.
+		if !index.Next() {
+			if err = index.Error(); err == nil {
+				err = ErrNotFound
+			}
+			return
+		}
+
+		dataBH, n = decodeBlockHandle(index.Value())
+		if n == 0 {
+			r.err = r.newErrCorruptedBH(r.indexBH, "bad data block handle")
+			return nil, nil, r.err
+		}
+
+		data = r.getDataIter(dataBH, nil, r.verifyChecksum, !ro.GetDontFillCache())
+		if !data.Next() {
+			data.Release()
+			if err = data.Error(); err == nil {
+				err = ErrNotFound
+			}
+			return
+		}
+	}
+
+	// Key doesn't use block buffer, no need to copy the buffer.
+	rkey = data.Key()
+	if !noValue {
+		if r.bpool == nil {
+			value = data.Value()
+		} else {
+			// Value does use block buffer, and since the buffer will be
+			// recycled, it need to be copied.
+			value = append([]byte{}, data.Value()...)
+		}
+	}
+	data.Release()
+	return
+}
+
+// Find finds key/value pair whose key is greater than or equal to the
+// given key. It returns ErrNotFound if the table doesn't contain
+// such pair.
+// If filtered is true then the nearest 'block' will be checked against
+// 'filter data' (if present) and will immediately return ErrNotFound if
+// 'filter data' indicates that such pair doesn't exist.
+//
+// The caller may modify the contents of the returned slice as it is its
+// own copy.
+// It is safe to modify the contents of the argument after Find returns.
+func (r *Reader) Find(key []byte, filtered bool, ro *opt.ReadOptions) (rkey, value []byte, err error) {
+	return r.find(key, filtered, ro, false)
+}
+
+// FindKey finds key that is greater than or equal to the given key.
+// It returns ErrNotFound if the table doesn't contain such key.
+// If filtered is true then the nearest 'block' will be checked against
+// 'filter data' (if present) and will immediately return ErrNotFound if
+// 'filter data' indicates that such key doesn't exist.
+//
+// The caller may modify the contents of the returned slice as it is its
+// own copy.
+// It is safe to modify the contents of the argument after Find returns.
+func (r *Reader) FindKey(key []byte, filtered bool, ro *opt.ReadOptions) (rkey []byte, err error) {
+	rkey, _, err = r.find(key, filtered, ro, true)
+	return
+}
+
+// Get gets the value for the given key. It returns errors.ErrNotFound
+// if the table does not contain the key.
+//
+// The caller may modify the contents of the returned slice as it is its
+// own copy.
+// It is safe to modify the contents of the argument after Find returns.
+func (r *Reader) Get(key []byte, ro *opt.ReadOptions) (value []byte, err error) {
+	r.mu.RLock()
+	defer r.mu.RUnlock()
+
+	if r.err != nil {
+		err = r.err
+		return
+	}
+
+	rkey, value, err := r.find(key, false, ro, false)
+	if err == nil && r.cmp.Compare(rkey, key) != 0 {
+		value = nil
+		err = ErrNotFound
+	}
+	return
+}
+
+// OffsetOf returns approximate offset for the given key.
+//
+// It is safe to modify the contents of the argument after Get returns.
+func (r *Reader) OffsetOf(key []byte) (offset int64, err error) {
+	r.mu.RLock()
+	defer r.mu.RUnlock()
+
+	if r.err != nil {
+		err = r.err
+		return
+	}
+
+	indexBlock, rel, err := r.readBlockCached(r.indexBH, true, true)
+	if err != nil {
+		return
+	}
+	defer rel.Release()
+
+	index := r.newBlockIter(indexBlock, nil, nil, true)
+	defer index.Release()
+	if index.Seek(key) {
+		dataBH, n := decodeBlockHandle(index.Value())
+		if n == 0 {
+			r.err = r.newErrCorruptedBH(r.indexBH, "bad data block handle")
+			return
+		}
+		offset = int64(dataBH.offset)
+		return
+	}
+	err = index.Error()
+	if err == nil {
+		offset = r.dataEnd
+	}
+	return
+}
+
+// Release implements util.Releaser.
+// It also close the file if it is an io.Closer.
+func (r *Reader) Release() {
+	r.mu.Lock()
+	defer r.mu.Unlock()
+
+	if closer, ok := r.reader.(io.Closer); ok {
+		closer.Close()
+	}
+	if r.indexBlock != nil {
+		r.indexBlock.Release()
+		r.indexBlock = nil
+	}
+	if r.filterBlock != nil {
+		r.filterBlock.Release()
+		r.filterBlock = nil
+	}
+	r.reader = nil
+	r.cache = nil
+	r.bpool = nil
+	r.err = ErrReaderReleased
+}
+
+// NewReader creates a new initialized table reader for the file.
+// The fi, cache and bpool is optional and can be nil.
+//
+// The returned table reader instance is safe for concurrent use.
+func NewReader(f io.ReaderAt, size int64, fd storage.FileDesc, cache *cache.NamespaceGetter, bpool *util.BufferPool, o *opt.Options) (*Reader, error) {
+	if f == nil {
+		return nil, errors.New("leveldb/table: nil file")
+	}
+
+	r := &Reader{
+		fd:             fd,
+		reader:         f,
+		cache:          cache,
+		bpool:          bpool,
+		o:              o,
+		cmp:            o.GetComparer(),
+		verifyChecksum: o.GetStrict(opt.StrictBlockChecksum),
+	}
+
+	if size < footerLen {
+		r.err = r.newErrCorrupted(0, size, "table", "too small")
+		return r, nil
+	}
+
+	footerPos := size - footerLen
+	var footer [footerLen]byte
+	if _, err := r.reader.ReadAt(footer[:], footerPos); err != nil && err != io.EOF {
+		return nil, err
+	}
+	if string(footer[footerLen-len(magic):footerLen]) != magic {
+		r.err = r.newErrCorrupted(footerPos, footerLen, "table-footer", "bad magic number")
+		return r, nil
+	}
+
+	var n int
+	// Decode the metaindex block handle.
+	r.metaBH, n = decodeBlockHandle(footer[:])
+	if n == 0 {
+		r.err = r.newErrCorrupted(footerPos, footerLen, "table-footer", "bad metaindex block handle")
+		return r, nil
+	}
+
+	// Decode the index block handle.
+	r.indexBH, n = decodeBlockHandle(footer[n:])
+	if n == 0 {
+		r.err = r.newErrCorrupted(footerPos, footerLen, "table-footer", "bad index block handle")
+		return r, nil
+	}
+
+	// Read metaindex block.
+	metaBlock, err := r.readBlock(r.metaBH, true)
+	if err != nil {
+		if errors.IsCorrupted(err) {
+			r.err = err
+			return r, nil
+		}
+		return nil, err
+	}
+
+	// Set data end.
+	r.dataEnd = int64(r.metaBH.offset)
+
+	// Read metaindex.
+	metaIter := r.newBlockIter(metaBlock, nil, nil, true)
+	for metaIter.Next() {
+		key := string(metaIter.Key())
+		if !strings.HasPrefix(key, "filter.") {
+			continue
+		}
+		fn := key[7:]
+		if f0 := o.GetFilter(); f0 != nil && f0.Name() == fn {
+			r.filter = f0
+		} else {
+			for _, f0 := range o.GetAltFilters() {
+				if f0.Name() == fn {
+					r.filter = f0
+					break
+				}
+			}
+		}
+		if r.filter != nil {
+			filterBH, n := decodeBlockHandle(metaIter.Value())
+			if n == 0 {
+				continue
+			}
+			r.filterBH = filterBH
+			// Update data end.
+			r.dataEnd = int64(filterBH.offset)
+			break
+		}
+	}
+	metaIter.Release()
+	metaBlock.Release()
+
+	// Cache index and filter block locally, since we don't have global cache.
+	if cache == nil {
+		r.indexBlock, err = r.readBlock(r.indexBH, true)
+		if err != nil {
+			if errors.IsCorrupted(err) {
+				r.err = err
+				return r, nil
+			}
+			return nil, err
+		}
+		if r.filter != nil {
+			r.filterBlock, err = r.readFilterBlock(r.filterBH)
+			if err != nil {
+				if !errors.IsCorrupted(err) {
+					return nil, err
+				}
+
+				// Don't use filter then.
+				r.filter = nil
+			}
+		}
+	}
+
+	return r, nil
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/table/table.go b/vendor/github.com/syndtr/goleveldb/leveldb/table/table.go
new file mode 100644
index 000000000..beacdc1f0
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/table/table.go
@@ -0,0 +1,177 @@
+// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Package table allows read and write sorted key/value.
+package table
+
+import (
+	"encoding/binary"
+)
+
+/*
+Table:
+
+Table is consist of one or more data blocks, an optional filter block
+a metaindex block, an index block and a table footer. Metaindex block
+is a special block used to keep parameters of the table, such as filter
+block name and its block handle. Index block is a special block used to
+keep record of data blocks offset and length, index block use one as
+restart interval. The key used by index block are the last key of preceding
+block, shorter separator of adjacent blocks or shorter successor of the
+last key of the last block. Filter block is an optional block contains
+sequence of filter data generated by a filter generator.
+
+Table data structure:
+                                                         + optional
+                                                        /
+    +--------------+--------------+--------------+------+-------+-----------------+-------------+--------+
+    | data block 1 |      ...     | data block n | filter block | metaindex block | index block | footer |
+    +--------------+--------------+--------------+--------------+-----------------+-------------+--------+
+
+    Each block followed by a 5-bytes trailer contains compression type and checksum.
+
+Table block trailer:
+
+    +---------------------------+-------------------+
+    | compression type (1-byte) | checksum (4-byte) |
+    +---------------------------+-------------------+
+
+    The checksum is a CRC-32 computed using Castagnoli's polynomial. Compression
+    type also included in the checksum.
+
+Table footer:
+
+      +------------------- 40-bytes -------------------+
+     /                                                  \
+    +------------------------+--------------------+------+-----------------+
+    | metaindex block handle / index block handle / ---- | magic (8-bytes) |
+    +------------------------+--------------------+------+-----------------+
+
+    The magic are first 64-bit of SHA-1 sum of "http://code.google.com/p/leveldb/".
+
+NOTE: All fixed-length integer are little-endian.
+*/
+
+/*
+Block:
+
+Block is consist of one or more key/value entries and a block trailer.
+Block entry shares key prefix with its preceding key until a restart
+point reached. A block should contains at least one restart point.
+First restart point are always zero.
+
+Block data structure:
+
+      + restart point                 + restart point (depends on restart interval)
+     /                               /
+    +---------------+---------------+---------------+---------------+---------+
+    | block entry 1 | block entry 2 |      ...      | block entry n | trailer |
+    +---------------+---------------+---------------+---------------+---------+
+
+Key/value entry:
+
+              +---- key len ----+
+             /                   \
+    +-------+---------+-----------+---------+--------------------+--------------+----------------+
+    | shared (varint) | not shared (varint) | value len (varint) | key (varlen) | value (varlen) |
+    +-----------------+---------------------+--------------------+--------------+----------------+
+
+    Block entry shares key prefix with its preceding key:
+    Conditions:
+        restart_interval=2
+        entry one  : key=deck,value=v1
+        entry two  : key=dock,value=v2
+        entry three: key=duck,value=v3
+    The entries will be encoded as follow:
+
+      + restart point (offset=0)                                                 + restart point (offset=16)
+     /                                                                          /
+    +-----+-----+-----+----------+--------+-----+-----+-----+---------+--------+-----+-----+-----+----------+--------+
+    |  0  |  4  |  2  |  "deck"  |  "v1"  |  1  |  3  |  2  |  "ock"  |  "v2"  |  0  |  4  |  2  |  "duck"  |  "v3"  |
+    +-----+-----+-----+----------+--------+-----+-----+-----+---------+--------+-----+-----+-----+----------+--------+
+     \                                   / \                                  / \                                   /
+      +----------- entry one -----------+   +----------- entry two ----------+   +---------- entry three ----------+
+
+    The block trailer will contains two restart points:
+
+    +------------+-----------+--------+
+    |     0      |    16     |   2    |
+    +------------+-----------+---+----+
+     \                      /     \
+      +-- restart points --+       + restart points length
+
+Block trailer:
+
+      +-- 4-bytes --+
+     /               \
+    +-----------------+-----------------+-----------------+------------------------------+
+    | restart point 1 |       ....      | restart point n | restart points len (4-bytes) |
+    +-----------------+-----------------+-----------------+------------------------------+
+
+
+NOTE: All fixed-length integer are little-endian.
+*/
+
+/*
+Filter block:
+
+Filter block consist of one or more filter data and a filter block trailer.
+The trailer contains filter data offsets, a trailer offset and a 1-byte base Lg.
+
+Filter block data structure:
+
+      + offset 1      + offset 2      + offset n      + trailer offset
+     /               /               /               /
+    +---------------+---------------+---------------+---------+
+    | filter data 1 |      ...      | filter data n | trailer |
+    +---------------+---------------+---------------+---------+
+
+Filter block trailer:
+
+      +- 4-bytes -+
+     /             \
+    +---------------+---------------+---------------+-------------------------------+------------------+
+    | data 1 offset |      ....     | data n offset | data-offsets offset (4-bytes) | base Lg (1-byte) |
+    +-------------- +---------------+---------------+-------------------------------+------------------+
+
+
+NOTE: All fixed-length integer are little-endian.
+*/
+
+const (
+	blockTrailerLen = 5
+	footerLen       = 48
+
+	magic = "\x57\xfb\x80\x8b\x24\x75\x47\xdb"
+
+	// The block type gives the per-block compression format.
+	// These constants are part of the file format and should not be changed.
+	blockTypeNoCompression     = 0
+	blockTypeSnappyCompression = 1
+
+	// Generate new filter every 2KB of data
+	filterBaseLg = 11
+	filterBase   = 1 << filterBaseLg
+)
+
+type blockHandle struct {
+	offset, length uint64
+}
+
+func decodeBlockHandle(src []byte) (blockHandle, int) {
+	offset, n := binary.Uvarint(src)
+	length, m := binary.Uvarint(src[n:])
+	if n == 0 || m == 0 {
+		return blockHandle{}, 0
+	}
+	return blockHandle{offset, length}, n + m
+}
+
+func encodeBlockHandle(dst []byte, b blockHandle) int {
+	n := binary.PutUvarint(dst, b.offset)
+	m := binary.PutUvarint(dst[n:], b.length)
+	return n + m
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/table/writer.go b/vendor/github.com/syndtr/goleveldb/leveldb/table/writer.go
new file mode 100644
index 000000000..b96b271d8
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/table/writer.go
@@ -0,0 +1,375 @@
+// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package table
+
+import (
+	"encoding/binary"
+	"errors"
+	"fmt"
+	"io"
+
+	"github.com/golang/snappy"
+
+	"github.com/syndtr/goleveldb/leveldb/comparer"
+	"github.com/syndtr/goleveldb/leveldb/filter"
+	"github.com/syndtr/goleveldb/leveldb/opt"
+	"github.com/syndtr/goleveldb/leveldb/util"
+)
+
+func sharedPrefixLen(a, b []byte) int {
+	i, n := 0, len(a)
+	if n > len(b) {
+		n = len(b)
+	}
+	for i < n && a[i] == b[i] {
+		i++
+	}
+	return i
+}
+
+type blockWriter struct {
+	restartInterval int
+	buf             util.Buffer
+	nEntries        int
+	prevKey         []byte
+	restarts        []uint32
+	scratch         []byte
+}
+
+func (w *blockWriter) append(key, value []byte) {
+	nShared := 0
+	if w.nEntries%w.restartInterval == 0 {
+		w.restarts = append(w.restarts, uint32(w.buf.Len()))
+	} else {
+		nShared = sharedPrefixLen(w.prevKey, key)
+	}
+	n := binary.PutUvarint(w.scratch[0:], uint64(nShared))
+	n += binary.PutUvarint(w.scratch[n:], uint64(len(key)-nShared))
+	n += binary.PutUvarint(w.scratch[n:], uint64(len(value)))
+	w.buf.Write(w.scratch[:n])
+	w.buf.Write(key[nShared:])
+	w.buf.Write(value)
+	w.prevKey = append(w.prevKey[:0], key...)
+	w.nEntries++
+}
+
+func (w *blockWriter) finish() {
+	// Write restarts entry.
+	if w.nEntries == 0 {
+		// Must have at least one restart entry.
+		w.restarts = append(w.restarts, 0)
+	}
+	w.restarts = append(w.restarts, uint32(len(w.restarts)))
+	for _, x := range w.restarts {
+		buf4 := w.buf.Alloc(4)
+		binary.LittleEndian.PutUint32(buf4, x)
+	}
+}
+
+func (w *blockWriter) reset() {
+	w.buf.Reset()
+	w.nEntries = 0
+	w.restarts = w.restarts[:0]
+}
+
+func (w *blockWriter) bytesLen() int {
+	restartsLen := len(w.restarts)
+	if restartsLen == 0 {
+		restartsLen = 1
+	}
+	return w.buf.Len() + 4*restartsLen + 4
+}
+
+type filterWriter struct {
+	generator filter.FilterGenerator
+	buf       util.Buffer
+	nKeys     int
+	offsets   []uint32
+}
+
+func (w *filterWriter) add(key []byte) {
+	if w.generator == nil {
+		return
+	}
+	w.generator.Add(key)
+	w.nKeys++
+}
+
+func (w *filterWriter) flush(offset uint64) {
+	if w.generator == nil {
+		return
+	}
+	for x := int(offset / filterBase); x > len(w.offsets); {
+		w.generate()
+	}
+}
+
+func (w *filterWriter) finish() {
+	if w.generator == nil {
+		return
+	}
+	// Generate last keys.
+
+	if w.nKeys > 0 {
+		w.generate()
+	}
+	w.offsets = append(w.offsets, uint32(w.buf.Len()))
+	for _, x := range w.offsets {
+		buf4 := w.buf.Alloc(4)
+		binary.LittleEndian.PutUint32(buf4, x)
+	}
+	w.buf.WriteByte(filterBaseLg)
+}
+
+func (w *filterWriter) generate() {
+	// Record offset.
+	w.offsets = append(w.offsets, uint32(w.buf.Len()))
+	// Generate filters.
+	if w.nKeys > 0 {
+		w.generator.Generate(&w.buf)
+		w.nKeys = 0
+	}
+}
+
+// Writer is a table writer.
+type Writer struct {
+	writer io.Writer
+	err    error
+	// Options
+	cmp         comparer.Comparer
+	filter      filter.Filter
+	compression opt.Compression
+	blockSize   int
+
+	dataBlock   blockWriter
+	indexBlock  blockWriter
+	filterBlock filterWriter
+	pendingBH   blockHandle
+	offset      uint64
+	nEntries    int
+	// Scratch allocated enough for 5 uvarint. Block writer should not use
+	// first 20-bytes since it will be used to encode block handle, which
+	// then passed to the block writer itself.
+	scratch            [50]byte
+	comparerScratch    []byte
+	compressionScratch []byte
+}
+
+func (w *Writer) writeBlock(buf *util.Buffer, compression opt.Compression) (bh blockHandle, err error) {
+	// Compress the buffer if necessary.
+	var b []byte
+	if compression == opt.SnappyCompression {
+		// Allocate scratch enough for compression and block trailer.
+		if n := snappy.MaxEncodedLen(buf.Len()) + blockTrailerLen; len(w.compressionScratch) < n {
+			w.compressionScratch = make([]byte, n)
+		}
+		compressed := snappy.Encode(w.compressionScratch, buf.Bytes())
+		n := len(compressed)
+		b = compressed[:n+blockTrailerLen]
+		b[n] = blockTypeSnappyCompression
+	} else {
+		tmp := buf.Alloc(blockTrailerLen)
+		tmp[0] = blockTypeNoCompression
+		b = buf.Bytes()
+	}
+
+	// Calculate the checksum.
+	n := len(b) - 4
+	checksum := util.NewCRC(b[:n]).Value()
+	binary.LittleEndian.PutUint32(b[n:], checksum)
+
+	// Write the buffer to the file.
+	_, err = w.writer.Write(b)
+	if err != nil {
+		return
+	}
+	bh = blockHandle{w.offset, uint64(len(b) - blockTrailerLen)}
+	w.offset += uint64(len(b))
+	return
+}
+
+func (w *Writer) flushPendingBH(key []byte) {
+	if w.pendingBH.length == 0 {
+		return
+	}
+	var separator []byte
+	if len(key) == 0 {
+		separator = w.cmp.Successor(w.comparerScratch[:0], w.dataBlock.prevKey)
+	} else {
+		separator = w.cmp.Separator(w.comparerScratch[:0], w.dataBlock.prevKey, key)
+	}
+	if separator == nil {
+		separator = w.dataBlock.prevKey
+	} else {
+		w.comparerScratch = separator
+	}
+	n := encodeBlockHandle(w.scratch[:20], w.pendingBH)
+	// Append the block handle to the index block.
+	w.indexBlock.append(separator, w.scratch[:n])
+	// Reset prev key of the data block.
+	w.dataBlock.prevKey = w.dataBlock.prevKey[:0]
+	// Clear pending block handle.
+	w.pendingBH = blockHandle{}
+}
+
+func (w *Writer) finishBlock() error {
+	w.dataBlock.finish()
+	bh, err := w.writeBlock(&w.dataBlock.buf, w.compression)
+	if err != nil {
+		return err
+	}
+	w.pendingBH = bh
+	// Reset the data block.
+	w.dataBlock.reset()
+	// Flush the filter block.
+	w.filterBlock.flush(w.offset)
+	return nil
+}
+
+// Append appends key/value pair to the table. The keys passed must
+// be in increasing order.
+//
+// It is safe to modify the contents of the arguments after Append returns.
+func (w *Writer) Append(key, value []byte) error {
+	if w.err != nil {
+		return w.err
+	}
+	if w.nEntries > 0 && w.cmp.Compare(w.dataBlock.prevKey, key) >= 0 {
+		w.err = fmt.Errorf("leveldb/table: Writer: keys are not in increasing order: %q, %q", w.dataBlock.prevKey, key)
+		return w.err
+	}
+
+	w.flushPendingBH(key)
+	// Append key/value pair to the data block.
+	w.dataBlock.append(key, value)
+	// Add key to the filter block.
+	w.filterBlock.add(key)
+
+	// Finish the data block if block size target reached.
+	if w.dataBlock.bytesLen() >= w.blockSize {
+		if err := w.finishBlock(); err != nil {
+			w.err = err
+			return w.err
+		}
+	}
+	w.nEntries++
+	return nil
+}
+
+// BlocksLen returns number of blocks written so far.
+func (w *Writer) BlocksLen() int {
+	n := w.indexBlock.nEntries
+	if w.pendingBH.length > 0 {
+		// Includes the pending block.
+		n++
+	}
+	return n
+}
+
+// EntriesLen returns number of entries added so far.
+func (w *Writer) EntriesLen() int {
+	return w.nEntries
+}
+
+// BytesLen returns number of bytes written so far.
+func (w *Writer) BytesLen() int {
+	return int(w.offset)
+}
+
+// Close will finalize the table. Calling Append is not possible
+// after Close, but calling BlocksLen, EntriesLen and BytesLen
+// is still possible.
+func (w *Writer) Close() error {
+	if w.err != nil {
+		return w.err
+	}
+
+	// Write the last data block. Or empty data block if there
+	// aren't any data blocks at all.
+	if w.dataBlock.nEntries > 0 || w.nEntries == 0 {
+		if err := w.finishBlock(); err != nil {
+			w.err = err
+			return w.err
+		}
+	}
+	w.flushPendingBH(nil)
+
+	// Write the filter block.
+	var filterBH blockHandle
+	w.filterBlock.finish()
+	if buf := &w.filterBlock.buf; buf.Len() > 0 {
+		filterBH, w.err = w.writeBlock(buf, opt.NoCompression)
+		if w.err != nil {
+			return w.err
+		}
+	}
+
+	// Write the metaindex block.
+	if filterBH.length > 0 {
+		key := []byte("filter." + w.filter.Name())
+		n := encodeBlockHandle(w.scratch[:20], filterBH)
+		w.dataBlock.append(key, w.scratch[:n])
+	}
+	w.dataBlock.finish()
+	metaindexBH, err := w.writeBlock(&w.dataBlock.buf, w.compression)
+	if err != nil {
+		w.err = err
+		return w.err
+	}
+
+	// Write the index block.
+	w.indexBlock.finish()
+	indexBH, err := w.writeBlock(&w.indexBlock.buf, w.compression)
+	if err != nil {
+		w.err = err
+		return w.err
+	}
+
+	// Write the table footer.
+	footer := w.scratch[:footerLen]
+	for i := range footer {
+		footer[i] = 0
+	}
+	n := encodeBlockHandle(footer, metaindexBH)
+	encodeBlockHandle(footer[n:], indexBH)
+	copy(footer[footerLen-len(magic):], magic)
+	if _, err := w.writer.Write(footer); err != nil {
+		w.err = err
+		return w.err
+	}
+	w.offset += footerLen
+
+	w.err = errors.New("leveldb/table: writer is closed")
+	return nil
+}
+
+// NewWriter creates a new initialized table writer for the file.
+//
+// Table writer is not safe for concurrent use.
+func NewWriter(f io.Writer, o *opt.Options) *Writer {
+	w := &Writer{
+		writer:          f,
+		cmp:             o.GetComparer(),
+		filter:          o.GetFilter(),
+		compression:     o.GetCompression(),
+		blockSize:       o.GetBlockSize(),
+		comparerScratch: make([]byte, 0),
+	}
+	// data block
+	w.dataBlock.restartInterval = o.GetBlockRestartInterval()
+	// The first 20-bytes are used for encoding block handle.
+	w.dataBlock.scratch = w.scratch[20:]
+	// index block
+	w.indexBlock.restartInterval = 1
+	w.indexBlock.scratch = w.scratch[20:]
+	// filter block
+	if w.filter != nil {
+		w.filterBlock.generator = w.filter.NewGenerator()
+		w.filterBlock.flush(0)
+	}
+	return w
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/util.go b/vendor/github.com/syndtr/goleveldb/leveldb/util.go
new file mode 100644
index 000000000..0e2b519e5
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/util.go
@@ -0,0 +1,98 @@
+// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package leveldb
+
+import (
+	"fmt"
+	"sort"
+
+	"github.com/syndtr/goleveldb/leveldb/storage"
+)
+
+func shorten(str string) string {
+	if len(str) <= 8 {
+		return str
+	}
+	return str[:3] + ".." + str[len(str)-3:]
+}
+
+var bunits = [...]string{"", "Ki", "Mi", "Gi", "Ti"}
+
+func shortenb(bytes int) string {
+	i := 0
+	for ; bytes > 1024 && i < 4; i++ {
+		bytes /= 1024
+	}
+	return fmt.Sprintf("%d%sB", bytes, bunits[i])
+}
+
+func sshortenb(bytes int) string {
+	if bytes == 0 {
+		return "~"
+	}
+	sign := "+"
+	if bytes < 0 {
+		sign = "-"
+		bytes *= -1
+	}
+	i := 0
+	for ; bytes > 1024 && i < 4; i++ {
+		bytes /= 1024
+	}
+	return fmt.Sprintf("%s%d%sB", sign, bytes, bunits[i])
+}
+
+func sint(x int) string {
+	if x == 0 {
+		return "~"
+	}
+	sign := "+"
+	if x < 0 {
+		sign = "-"
+		x *= -1
+	}
+	return fmt.Sprintf("%s%d", sign, x)
+}
+
+func minInt(a, b int) int {
+	if a < b {
+		return a
+	}
+	return b
+}
+
+func maxInt(a, b int) int {
+	if a > b {
+		return a
+	}
+	return b
+}
+
+type fdSorter []storage.FileDesc
+
+func (p fdSorter) Len() int {
+	return len(p)
+}
+
+func (p fdSorter) Less(i, j int) bool {
+	return p[i].Num < p[j].Num
+}
+
+func (p fdSorter) Swap(i, j int) {
+	p[i], p[j] = p[j], p[i]
+}
+
+func sortFds(fds []storage.FileDesc) {
+	sort.Sort(fdSorter(fds))
+}
+
+func ensureBuffer(b []byte, n int) []byte {
+	if cap(b) < n {
+		return make([]byte, n)
+	}
+	return b[:n]
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/util/buffer.go b/vendor/github.com/syndtr/goleveldb/leveldb/util/buffer.go
new file mode 100644
index 000000000..21de24255
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/util/buffer.go
@@ -0,0 +1,293 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package util
+
+// This a copy of Go std bytes.Buffer with some modification
+// and some features stripped.
+
+import (
+	"bytes"
+	"io"
+)
+
+// A Buffer is a variable-sized buffer of bytes with Read and Write methods.
+// The zero value for Buffer is an empty buffer ready to use.
+type Buffer struct {
+	buf       []byte   // contents are the bytes buf[off : len(buf)]
+	off       int      // read at &buf[off], write at &buf[len(buf)]
+	bootstrap [64]byte // memory to hold first slice; helps small buffers (Printf) avoid allocation.
+}
+
+// Bytes returns a slice of the contents of the unread portion of the buffer;
+// len(b.Bytes()) == b.Len().  If the caller changes the contents of the
+// returned slice, the contents of the buffer will change provided there
+// are no intervening method calls on the Buffer.
+func (b *Buffer) Bytes() []byte { return b.buf[b.off:] }
+
+// String returns the contents of the unread portion of the buffer
+// as a string.  If the Buffer is a nil pointer, it returns "<nil>".
+func (b *Buffer) String() string {
+	if b == nil {
+		// Special case, useful in debugging.
+		return "<nil>"
+	}
+	return string(b.buf[b.off:])
+}
+
+// Len returns the number of bytes of the unread portion of the buffer;
+// b.Len() == len(b.Bytes()).
+func (b *Buffer) Len() int { return len(b.buf) - b.off }
+
+// Truncate discards all but the first n unread bytes from the buffer.
+// It panics if n is negative or greater than the length of the buffer.
+func (b *Buffer) Truncate(n int) {
+	switch {
+	case n < 0 || n > b.Len():
+		panic("leveldb/util.Buffer: truncation out of range")
+	case n == 0:
+		// Reuse buffer space.
+		b.off = 0
+	}
+	b.buf = b.buf[0 : b.off+n]
+}
+
+// Reset resets the buffer so it has no content.
+// b.Reset() is the same as b.Truncate(0).
+func (b *Buffer) Reset() { b.Truncate(0) }
+
+// grow grows the buffer to guarantee space for n more bytes.
+// It returns the index where bytes should be written.
+// If the buffer can't grow it will panic with bytes.ErrTooLarge.
+func (b *Buffer) grow(n int) int {
+	m := b.Len()
+	// If buffer is empty, reset to recover space.
+	if m == 0 && b.off != 0 {
+		b.Truncate(0)
+	}
+	if len(b.buf)+n > cap(b.buf) {
+		var buf []byte
+		if b.buf == nil && n <= len(b.bootstrap) {
+			buf = b.bootstrap[0:]
+		} else if m+n <= cap(b.buf)/2 {
+			// We can slide things down instead of allocating a new
+			// slice. We only need m+n <= cap(b.buf) to slide, but
+			// we instead let capacity get twice as large so we
+			// don't spend all our time copying.
+			copy(b.buf[:], b.buf[b.off:])
+			buf = b.buf[:m]
+		} else {
+			// not enough space anywhere
+			buf = makeSlice(2*cap(b.buf) + n)
+			copy(buf, b.buf[b.off:])
+		}
+		b.buf = buf
+		b.off = 0
+	}
+	b.buf = b.buf[0 : b.off+m+n]
+	return b.off + m
+}
+
+// Alloc allocs n bytes of slice from the buffer, growing the buffer as
+// needed. If n is negative, Alloc will panic.
+// If the buffer can't grow it will panic with bytes.ErrTooLarge.
+func (b *Buffer) Alloc(n int) []byte {
+	if n < 0 {
+		panic("leveldb/util.Buffer.Alloc: negative count")
+	}
+	m := b.grow(n)
+	return b.buf[m:]
+}
+
+// Grow grows the buffer's capacity, if necessary, to guarantee space for
+// another n bytes. After Grow(n), at least n bytes can be written to the
+// buffer without another allocation.
+// If n is negative, Grow will panic.
+// If the buffer can't grow it will panic with bytes.ErrTooLarge.
+func (b *Buffer) Grow(n int) {
+	if n < 0 {
+		panic("leveldb/util.Buffer.Grow: negative count")
+	}
+	m := b.grow(n)
+	b.buf = b.buf[0:m]
+}
+
+// Write appends the contents of p to the buffer, growing the buffer as
+// needed. The return value n is the length of p; err is always nil. If the
+// buffer becomes too large, Write will panic with bytes.ErrTooLarge.
+func (b *Buffer) Write(p []byte) (n int, err error) {
+	m := b.grow(len(p))
+	return copy(b.buf[m:], p), nil
+}
+
+// MinRead is the minimum slice size passed to a Read call by
+// Buffer.ReadFrom.  As long as the Buffer has at least MinRead bytes beyond
+// what is required to hold the contents of r, ReadFrom will not grow the
+// underlying buffer.
+const MinRead = 512
+
+// ReadFrom reads data from r until EOF and appends it to the buffer, growing
+// the buffer as needed. The return value n is the number of bytes read. Any
+// error except io.EOF encountered during the read is also returned. If the
+// buffer becomes too large, ReadFrom will panic with bytes.ErrTooLarge.
+func (b *Buffer) ReadFrom(r io.Reader) (n int64, err error) {
+	// If buffer is empty, reset to recover space.
+	if b.off >= len(b.buf) {
+		b.Truncate(0)
+	}
+	for {
+		if free := cap(b.buf) - len(b.buf); free < MinRead {
+			// not enough space at end
+			newBuf := b.buf
+			if b.off+free < MinRead {
+				// not enough space using beginning of buffer;
+				// double buffer capacity
+				newBuf = makeSlice(2*cap(b.buf) + MinRead)
+			}
+			copy(newBuf, b.buf[b.off:])
+			b.buf = newBuf[:len(b.buf)-b.off]
+			b.off = 0
+		}
+		m, e := r.Read(b.buf[len(b.buf):cap(b.buf)])
+		b.buf = b.buf[0 : len(b.buf)+m]
+		n += int64(m)
+		if e == io.EOF {
+			break
+		}
+		if e != nil {
+			return n, e
+		}
+	}
+	return n, nil // err is EOF, so return nil explicitly
+}
+
+// makeSlice allocates a slice of size n. If the allocation fails, it panics
+// with bytes.ErrTooLarge.
+func makeSlice(n int) []byte {
+	// If the make fails, give a known error.
+	defer func() {
+		if recover() != nil {
+			panic(bytes.ErrTooLarge)
+		}
+	}()
+	return make([]byte, n)
+}
+
+// WriteTo writes data to w until the buffer is drained or an error occurs.
+// The return value n is the number of bytes written; it always fits into an
+// int, but it is int64 to match the io.WriterTo interface. Any error
+// encountered during the write is also returned.
+func (b *Buffer) WriteTo(w io.Writer) (n int64, err error) {
+	if b.off < len(b.buf) {
+		nBytes := b.Len()
+		m, e := w.Write(b.buf[b.off:])
+		if m > nBytes {
+			panic("leveldb/util.Buffer.WriteTo: invalid Write count")
+		}
+		b.off += m
+		n = int64(m)
+		if e != nil {
+			return n, e
+		}
+		// all bytes should have been written, by definition of
+		// Write method in io.Writer
+		if m != nBytes {
+			return n, io.ErrShortWrite
+		}
+	}
+	// Buffer is now empty; reset.
+	b.Truncate(0)
+	return
+}
+
+// WriteByte appends the byte c to the buffer, growing the buffer as needed.
+// The returned error is always nil, but is included to match bufio.Writer's
+// WriteByte. If the buffer becomes too large, WriteByte will panic with
+// bytes.ErrTooLarge.
+func (b *Buffer) WriteByte(c byte) error {
+	m := b.grow(1)
+	b.buf[m] = c
+	return nil
+}
+
+// Read reads the next len(p) bytes from the buffer or until the buffer
+// is drained.  The return value n is the number of bytes read.  If the
+// buffer has no data to return, err is io.EOF (unless len(p) is zero);
+// otherwise it is nil.
+func (b *Buffer) Read(p []byte) (n int, err error) {
+	if b.off >= len(b.buf) {
+		// Buffer is empty, reset to recover space.
+		b.Truncate(0)
+		if len(p) == 0 {
+			return
+		}
+		return 0, io.EOF
+	}
+	n = copy(p, b.buf[b.off:])
+	b.off += n
+	return
+}
+
+// Next returns a slice containing the next n bytes from the buffer,
+// advancing the buffer as if the bytes had been returned by Read.
+// If there are fewer than n bytes in the buffer, Next returns the entire buffer.
+// The slice is only valid until the next call to a read or write method.
+func (b *Buffer) Next(n int) []byte {
+	m := b.Len()
+	if n > m {
+		n = m
+	}
+	data := b.buf[b.off : b.off+n]
+	b.off += n
+	return data
+}
+
+// ReadByte reads and returns the next byte from the buffer.
+// If no byte is available, it returns error io.EOF.
+func (b *Buffer) ReadByte() (c byte, err error) {
+	if b.off >= len(b.buf) {
+		// Buffer is empty, reset to recover space.
+		b.Truncate(0)
+		return 0, io.EOF
+	}
+	c = b.buf[b.off]
+	b.off++
+	return c, nil
+}
+
+// ReadBytes reads until the first occurrence of delim in the input,
+// returning a slice containing the data up to and including the delimiter.
+// If ReadBytes encounters an error before finding a delimiter,
+// it returns the data read before the error and the error itself (often io.EOF).
+// ReadBytes returns err != nil if and only if the returned data does not end in
+// delim.
+func (b *Buffer) ReadBytes(delim byte) (line []byte, err error) {
+	slice, err := b.readSlice(delim)
+	// return a copy of slice. The buffer's backing array may
+	// be overwritten by later calls.
+	line = append(line, slice...)
+	return
+}
+
+// readSlice is like ReadBytes but returns a reference to internal buffer data.
+func (b *Buffer) readSlice(delim byte) (line []byte, err error) {
+	i := bytes.IndexByte(b.buf[b.off:], delim)
+	end := b.off + i + 1
+	if i < 0 {
+		end = len(b.buf)
+		err = io.EOF
+	}
+	line = b.buf[b.off:end]
+	b.off = end
+	return line, err
+}
+
+// NewBuffer creates and initializes a new Buffer using buf as its initial
+// contents.  It is intended to prepare a Buffer to read existing data.  It
+// can also be used to size the internal buffer for writing. To do that,
+// buf should have the desired capacity but a length of zero.
+//
+// In most cases, new(Buffer) (or just declaring a Buffer variable) is
+// sufficient to initialize a Buffer.
+func NewBuffer(buf []byte) *Buffer { return &Buffer{buf: buf} }
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/util/buffer_pool.go b/vendor/github.com/syndtr/goleveldb/leveldb/util/buffer_pool.go
new file mode 100644
index 000000000..2f3db974a
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/util/buffer_pool.go
@@ -0,0 +1,239 @@
+// Copyright (c) 2014, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package util
+
+import (
+	"fmt"
+	"sync"
+	"sync/atomic"
+	"time"
+)
+
+type buffer struct {
+	b    []byte
+	miss int
+}
+
+// BufferPool is a 'buffer pool'.
+type BufferPool struct {
+	pool      [6]chan []byte
+	size      [5]uint32
+	sizeMiss  [5]uint32
+	sizeHalf  [5]uint32
+	baseline  [4]int
+	baseline0 int
+
+	mu     sync.RWMutex
+	closed bool
+	closeC chan struct{}
+
+	get     uint32
+	put     uint32
+	half    uint32
+	less    uint32
+	equal   uint32
+	greater uint32
+	miss    uint32
+}
+
+func (p *BufferPool) poolNum(n int) int {
+	if n <= p.baseline0 && n > p.baseline0/2 {
+		return 0
+	}
+	for i, x := range p.baseline {
+		if n <= x {
+			return i + 1
+		}
+	}
+	return len(p.baseline) + 1
+}
+
+// Get returns buffer with length of n.
+func (p *BufferPool) Get(n int) []byte {
+	if p == nil {
+		return make([]byte, n)
+	}
+
+	p.mu.RLock()
+	defer p.mu.RUnlock()
+
+	if p.closed {
+		return make([]byte, n)
+	}
+
+	atomic.AddUint32(&p.get, 1)
+
+	poolNum := p.poolNum(n)
+	pool := p.pool[poolNum]
+	if poolNum == 0 {
+		// Fast path.
+		select {
+		case b := <-pool:
+			switch {
+			case cap(b) > n:
+				if cap(b)-n >= n {
+					atomic.AddUint32(&p.half, 1)
+					select {
+					case pool <- b:
+					default:
+					}
+					return make([]byte, n)
+				} else {
+					atomic.AddUint32(&p.less, 1)
+					return b[:n]
+				}
+			case cap(b) == n:
+				atomic.AddUint32(&p.equal, 1)
+				return b[:n]
+			default:
+				atomic.AddUint32(&p.greater, 1)
+			}
+		default:
+			atomic.AddUint32(&p.miss, 1)
+		}
+
+		return make([]byte, n, p.baseline0)
+	} else {
+		sizePtr := &p.size[poolNum-1]
+
+		select {
+		case b := <-pool:
+			switch {
+			case cap(b) > n:
+				if cap(b)-n >= n {
+					atomic.AddUint32(&p.half, 1)
+					sizeHalfPtr := &p.sizeHalf[poolNum-1]
+					if atomic.AddUint32(sizeHalfPtr, 1) == 20 {
+						atomic.StoreUint32(sizePtr, uint32(cap(b)/2))
+						atomic.StoreUint32(sizeHalfPtr, 0)
+					} else {
+						select {
+						case pool <- b:
+						default:
+						}
+					}
+					return make([]byte, n)
+				} else {
+					atomic.AddUint32(&p.less, 1)
+					return b[:n]
+				}
+			case cap(b) == n:
+				atomic.AddUint32(&p.equal, 1)
+				return b[:n]
+			default:
+				atomic.AddUint32(&p.greater, 1)
+				if uint32(cap(b)) >= atomic.LoadUint32(sizePtr) {
+					select {
+					case pool <- b:
+					default:
+					}
+				}
+			}
+		default:
+			atomic.AddUint32(&p.miss, 1)
+		}
+
+		if size := atomic.LoadUint32(sizePtr); uint32(n) > size {
+			if size == 0 {
+				atomic.CompareAndSwapUint32(sizePtr, 0, uint32(n))
+			} else {
+				sizeMissPtr := &p.sizeMiss[poolNum-1]
+				if atomic.AddUint32(sizeMissPtr, 1) == 20 {
+					atomic.StoreUint32(sizePtr, uint32(n))
+					atomic.StoreUint32(sizeMissPtr, 0)
+				}
+			}
+			return make([]byte, n)
+		} else {
+			return make([]byte, n, size)
+		}
+	}
+}
+
+// Put adds given buffer to the pool.
+func (p *BufferPool) Put(b []byte) {
+	if p == nil {
+		return
+	}
+
+	p.mu.RLock()
+	defer p.mu.RUnlock()
+
+	if p.closed {
+		return
+	}
+
+	atomic.AddUint32(&p.put, 1)
+
+	pool := p.pool[p.poolNum(cap(b))]
+	select {
+	case pool <- b:
+	default:
+	}
+
+}
+
+func (p *BufferPool) Close() {
+	if p == nil {
+		return
+	}
+
+	p.mu.Lock()
+	if !p.closed {
+		p.closed = true
+		p.closeC <- struct{}{}
+	}
+	p.mu.Unlock()
+}
+
+func (p *BufferPool) String() string {
+	if p == nil {
+		return "<nil>"
+	}
+
+	return fmt.Sprintf("BufferPool{B·%d Z·%v Zm·%v Zh·%v G·%d P·%d H·%d <·%d =·%d >·%d M·%d}",
+		p.baseline0, p.size, p.sizeMiss, p.sizeHalf, p.get, p.put, p.half, p.less, p.equal, p.greater, p.miss)
+}
+
+func (p *BufferPool) drain() {
+	ticker := time.NewTicker(2 * time.Second)
+	defer ticker.Stop()
+	for {
+		select {
+		case <-ticker.C:
+			for _, ch := range p.pool {
+				select {
+				case <-ch:
+				default:
+				}
+			}
+		case <-p.closeC:
+			close(p.closeC)
+			for _, ch := range p.pool {
+				close(ch)
+			}
+			return
+		}
+	}
+}
+
+// NewBufferPool creates a new initialized 'buffer pool'.
+func NewBufferPool(baseline int) *BufferPool {
+	if baseline <= 0 {
+		panic("baseline can't be <= 0")
+	}
+	p := &BufferPool{
+		baseline0: baseline,
+		baseline:  [...]int{baseline / 4, baseline / 2, baseline * 2, baseline * 4},
+		closeC:    make(chan struct{}, 1),
+	}
+	for i, cap := range []int{2, 2, 4, 4, 2, 1} {
+		p.pool[i] = make(chan []byte, cap)
+	}
+	go p.drain()
+	return p
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/util/crc32.go b/vendor/github.com/syndtr/goleveldb/leveldb/util/crc32.go
new file mode 100644
index 000000000..631c9d610
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/util/crc32.go
@@ -0,0 +1,30 @@
+// Copyright 2011 The LevelDB-Go Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package util
+
+import (
+	"hash/crc32"
+)
+
+var table = crc32.MakeTable(crc32.Castagnoli)
+
+// CRC is a CRC-32 checksum computed using Castagnoli's polynomial.
+type CRC uint32
+
+// NewCRC creates a new crc based on the given bytes.
+func NewCRC(b []byte) CRC {
+	return CRC(0).Update(b)
+}
+
+// Update updates the crc with the given bytes.
+func (c CRC) Update(b []byte) CRC {
+	return CRC(crc32.Update(uint32(c), table, b))
+}
+
+// Value returns a masked crc.
+func (c CRC) Value() uint32 {
+	return uint32(c>>15|c<<17) + 0xa282ead8
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/util/hash.go b/vendor/github.com/syndtr/goleveldb/leveldb/util/hash.go
new file mode 100644
index 000000000..7f3fa4e2c
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/util/hash.go
@@ -0,0 +1,48 @@
+// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package util
+
+import (
+	"encoding/binary"
+)
+
+// Hash return hash of the given data.
+func Hash(data []byte, seed uint32) uint32 {
+	// Similar to murmur hash
+	const (
+		m = uint32(0xc6a4a793)
+		r = uint32(24)
+	)
+	var (
+		h = seed ^ (uint32(len(data)) * m)
+		i int
+	)
+
+	for n := len(data) - len(data)%4; i < n; i += 4 {
+		h += binary.LittleEndian.Uint32(data[i:])
+		h *= m
+		h ^= (h >> 16)
+	}
+
+	switch len(data) - i {
+	default:
+		panic("not reached")
+	case 3:
+		h += uint32(data[i+2]) << 16
+		fallthrough
+	case 2:
+		h += uint32(data[i+1]) << 8
+		fallthrough
+	case 1:
+		h += uint32(data[i])
+		h *= m
+		h ^= (h >> r)
+	case 0:
+	}
+
+	return h
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/util/range.go b/vendor/github.com/syndtr/goleveldb/leveldb/util/range.go
new file mode 100644
index 000000000..85159583d
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/util/range.go
@@ -0,0 +1,32 @@
+// Copyright (c) 2014, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package util
+
+// Range is a key range.
+type Range struct {
+	// Start of the key range, include in the range.
+	Start []byte
+
+	// Limit of the key range, not include in the range.
+	Limit []byte
+}
+
+// BytesPrefix returns key range that satisfy the given prefix.
+// This only applicable for the standard 'bytes comparer'.
+func BytesPrefix(prefix []byte) *Range {
+	var limit []byte
+	for i := len(prefix) - 1; i >= 0; i-- {
+		c := prefix[i]
+		if c < 0xff {
+			limit = make([]byte, i+1)
+			copy(limit, prefix)
+			limit[i] = c + 1
+			break
+		}
+	}
+	return &Range{prefix, limit}
+}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/util/util.go b/vendor/github.com/syndtr/goleveldb/leveldb/util/util.go
new file mode 100644
index 000000000..80614afc5
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/util/util.go
@@ -0,0 +1,73 @@
+// Copyright (c) 2013, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Package util provides utilities used throughout leveldb.
+package util
+
+import (
+	"errors"
+)
+
+var (
+	ErrReleased    = errors.New("leveldb: resource already relesed")
+	ErrHasReleaser = errors.New("leveldb: releaser already defined")
+)
+
+// Releaser is the interface that wraps the basic Release method.
+type Releaser interface {
+	// Release releases associated resources. Release should always success
+	// and can be called multiple times without causing error.
+	Release()
+}
+
+// ReleaseSetter is the interface that wraps the basic SetReleaser method.
+type ReleaseSetter interface {
+	// SetReleaser associates the given releaser to the resources. The
+	// releaser will be called once coresponding resources released.
+	// Calling SetReleaser with nil will clear the releaser.
+	//
+	// This will panic if a releaser already present or coresponding
+	// resource is already released. Releaser should be cleared first
+	// before assigned a new one.
+	SetReleaser(releaser Releaser)
+}
+
+// BasicReleaser provides basic implementation of Releaser and ReleaseSetter.
+type BasicReleaser struct {
+	releaser Releaser
+	released bool
+}
+
+// Released returns whether Release method already called.
+func (r *BasicReleaser) Released() bool {
+	return r.released
+}
+
+// Release implements Releaser.Release.
+func (r *BasicReleaser) Release() {
+	if !r.released {
+		if r.releaser != nil {
+			r.releaser.Release()
+			r.releaser = nil
+		}
+		r.released = true
+	}
+}
+
+// SetReleaser implements ReleaseSetter.SetReleaser.
+func (r *BasicReleaser) SetReleaser(releaser Releaser) {
+	if r.released {
+		panic(ErrReleased)
+	}
+	if r.releaser != nil && releaser != nil {
+		panic(ErrHasReleaser)
+	}
+	r.releaser = releaser
+}
+
+type NoopReleaser struct{}
+
+func (NoopReleaser) Release() {}
diff --git a/vendor/github.com/syndtr/goleveldb/leveldb/version.go b/vendor/github.com/syndtr/goleveldb/leveldb/version.go
new file mode 100644
index 000000000..73f272af5
--- /dev/null
+++ b/vendor/github.com/syndtr/goleveldb/leveldb/version.go
@@ -0,0 +1,528 @@
+// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
+// All rights reserved.
+//
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package leveldb
+
+import (
+	"fmt"
+	"sync/atomic"
+	"unsafe"
+
+	"github.com/syndtr/goleveldb/leveldb/iterator"
+	"github.com/syndtr/goleveldb/leveldb/opt"
+	"github.com/syndtr/goleveldb/leveldb/util"
+)
+
+type tSet struct {
+	level int
+	table *tFile
+}
+
+type version struct {
+	s *session
+
+	levels []tFiles
+
+	// Level that should be compacted next and its compaction score.
+	// Score < 1 means compaction is not strictly needed. These fields
+	// are initialized by computeCompaction()
+	cLevel int
+	cScore float64
+
+	cSeek unsafe.Pointer
+
+	closing  bool
+	ref      int
+	released bool
+}
+
+func newVersion(s *session) *version {
+	return &version{s: s}
+}
+
+func (v *version) incref() {
+	if v.released {
+		panic("already released")
+	}
+
+	v.ref++
+	if v.ref == 1 {
+		// Incr file ref.
+		for _, tt := range v.levels {
+			for _, t := range tt {
+				v.s.addFileRef(t.fd, 1)
+			}
+		}
+	}
+}
+
+func (v *version) releaseNB() {
+	v.ref--
+	if v.ref > 0 {
+		return
+	} else if v.ref < 0 {
+		panic("negative version ref")
+	}
+
+	for _, tt := range v.levels {
+		for _, t := range tt {
+			if v.s.addFileRef(t.fd, -1) == 0 {
+				v.s.tops.remove(t)
+			}
+		}
+	}
+
+	v.released = true
+}
+
+func (v *version) release() {
+	v.s.vmu.Lock()
+	v.releaseNB()
+	v.s.vmu.Unlock()
+}
+
+func (v *version) walkOverlapping(aux tFiles, ikey internalKey, f func(level int, t *tFile) bool, lf func(level int) bool) {
+	ukey := ikey.ukey()
+
+	// Aux level.
+	if aux != nil {
+		for _, t := range aux {
+			if t.overlaps(v.s.icmp, ukey, ukey) {
+				if !f(-1, t) {
+					return
+				}
+			}
+		}
+
+		if lf != nil && !lf(-1) {
+			return
+		}
+	}
+
+	// Walk tables level-by-level.
+	for level, tables := range v.levels {
+		if len(tables) == 0 {
+			continue
+		}
+
+		if level == 0 {
+			// Level-0 files may overlap each other. Find all files that
+			// overlap ukey.
+			for _, t := range tables {
+				if t.overlaps(v.s.icmp, ukey, ukey) {
+					if !f(level, t) {
+						return
+					}
+				}
+			}
+		} else {
+			if i := tables.searchMax(v.s.icmp, ikey); i < len(tables) {
+				t := tables[i]
+				if v.s.icmp.uCompare(ukey, t.imin.ukey()) >= 0 {
+					if !f(level, t) {
+						return
+					}
+				}
+			}
+		}
+
+		if lf != nil && !lf(level) {
+			return
+		}
+	}
+}
+
+func (v *version) get(aux tFiles, ikey internalKey, ro *opt.ReadOptions, noValue bool) (value []byte, tcomp bool, err error) {
+	if v.closing {
+		return nil, false, ErrClosed
+	}
+
+	ukey := ikey.ukey()
+
+	var (
+		tset  *tSet
+		tseek bool
+
+		// Level-0.
+		zfound bool
+		zseq   uint64
+		zkt    keyType
+		zval   []byte
+	)
+
+	err = ErrNotFound
+
+	// Since entries never hop across level, finding key/value
+	// in smaller level make later levels irrelevant.
+	v.walkOverlapping(aux, ikey, func(level int, t *tFile) bool {
+		if level >= 0 && !tseek {
+			if tset == nil {
+				tset = &tSet{level, t}
+			} else {
+				tseek = true
+			}
+		}
+
+		var (
+			fikey, fval []byte
+			ferr        error
+		)
+		if noValue {
+			fikey, ferr = v.s.tops.findKey(t, ikey, ro)
+		} else {
+			fikey, fval, ferr = v.s.tops.find(t, ikey, ro)
+		}
+
+		switch ferr {
+		case nil:
+		case ErrNotFound:
+			return true
+		default:
+			err = ferr
+			return false
+		}
+
+		if fukey, fseq, fkt, fkerr := parseInternalKey(fikey); fkerr == nil {
+			if v.s.icmp.uCompare(ukey, fukey) == 0 {
+				// Level <= 0 may overlaps each-other.
+				if level <= 0 {
+					if fseq >= zseq {
+						zfound = true
+						zseq = fseq
+						zkt = fkt
+						zval = fval
+					}
+				} else {
+					switch fkt {
+					case keyTypeVal:
+						value = fval
+						err = nil
+					case keyTypeDel:
+					default:
+						panic("leveldb: invalid internalKey type")
+					}
+					return false
+				}
+			}
+		} else {
+			err = fkerr
+			return false
+		}
+
+		return true
+	}, func(level int) bool {
+		if zfound {
+			switch zkt {
+			case keyTypeVal:
+				value = zval
+				err = nil
+			case keyTypeDel:
+			default:
+				panic("leveldb: invalid internalKey type")
+			}
+			return false
+		}
+
+		return true
+	})
+
+	if tseek && tset.table.consumeSeek() <= 0 {
+		tcomp = atomic.CompareAndSwapPointer(&v.cSeek, nil, unsafe.Pointer(tset))
+	}
+
+	return
+}
+
+func (v *version) sampleSeek(ikey internalKey) (tcomp bool) {
+	var tset *tSet
+
+	v.walkOverlapping(nil, ikey, func(level int, t *tFile) bool {
+		if tset == nil {
+			tset = &tSet{level, t}
+			return true
+		}
+		if tset.table.consumeSeek() <= 0 {
+			tcomp = atomic.CompareAndSwapPointer(&v.cSeek, nil, unsafe.Pointer(tset))
+		}
+		return false
+	}, nil)
+
+	return
+}
+
+func (v *version) getIterators(slice *util.Range, ro *opt.ReadOptions) (its []iterator.Iterator) {
+	strict := opt.GetStrict(v.s.o.Options, ro, opt.StrictReader)
+	for level, tables := range v.levels {
+		if level == 0 {
+			// Merge all level zero files together since they may overlap.
+			for _, t := range tables {
+				its = append(its, v.s.tops.newIterator(t, slice, ro))
+			}
+		} else if len(tables) != 0 {
+			its = append(its, iterator.NewIndexedIterator(tables.newIndexIterator(v.s.tops, v.s.icmp, slice, ro), strict))
+		}
+	}
+	return
+}
+
+func (v *version) newStaging() *versionStaging {
+	return &versionStaging{base: v}
+}
+
+// Spawn a new version based on this version.
+func (v *version) spawn(r *sessionRecord) *version {
+	staging := v.newStaging()
+	staging.commit(r)
+	return staging.finish()
+}
+
+func (v *version) fillRecord(r *sessionRecord) {
+	for level, tables := range v.levels {
+		for _, t := range tables {
+			r.addTableFile(level, t)
+		}
+	}
+}
+
+func (v *version) tLen(level int) int {
+	if level < len(v.levels) {
+		return len(v.levels[level])
+	}
+	return 0
+}
+
+func (v *version) offsetOf(ikey internalKey) (n int64, err error) {
+	for level, tables := range v.levels {
+		for _, t := range tables {
+			if v.s.icmp.Compare(t.imax, ikey) <= 0 {
+				// Entire file is before "ikey", so just add the file size
+				n += t.size
+			} else if v.s.icmp.Compare(t.imin, ikey) > 0 {
+				// Entire file is after "ikey", so ignore
+				if level > 0 {
+					// Files other than level 0 are sorted by meta->min, so
+					// no further files in this level will contain data for
+					// "ikey".
+					break
+				}
+			} else {
+				// "ikey" falls in the range for this table. Add the
+				// approximate offset of "ikey" within the table.
+				if m, err := v.s.tops.offsetOf(t, ikey); err == nil {
+					n += m
+				} else {
+					return 0, err
+				}
+			}
+		}
+	}
+
+	return
+}
+
+func (v *version) pickMemdbLevel(umin, umax []byte, maxLevel int) (level int) {
+	if maxLevel > 0 {
+		if len(v.levels) == 0 {
+			return maxLevel
+		}
+		if !v.levels[0].overlaps(v.s.icmp, umin, umax, true) {
+			var overlaps tFiles
+			for ; level < maxLevel; level++ {
+				if pLevel := level + 1; pLevel >= len(v.levels) {
+					return maxLevel
+				} else if v.levels[pLevel].overlaps(v.s.icmp, umin, umax, false) {
+					break
+				}
+				if gpLevel := level + 2; gpLevel < len(v.levels) {
+					overlaps = v.levels[gpLevel].getOverlaps(overlaps, v.s.icmp, umin, umax, false)
+					if overlaps.size() > int64(v.s.o.GetCompactionGPOverlaps(level)) {
+						break
+					}
+				}
+			}
+		}
+	}
+	return
+}
+
+func (v *version) computeCompaction() {
+	// Precomputed best level for next compaction
+	bestLevel := int(-1)
+	bestScore := float64(-1)
+
+	statFiles := make([]int, len(v.levels))
+	statSizes := make([]string, len(v.levels))
+	statScore := make([]string, len(v.levels))
+	statTotSize := int64(0)
+
+	for level, tables := range v.levels {
+		var score float64
+		size := tables.size()
+		if level == 0 {
+			// We treat level-0 specially by bounding the number of files
+			// instead of number of bytes for two reasons:
+			//
+			// (1) With larger write-buffer sizes, it is nice not to do too
+			// many level-0 compaction.
+			//
+			// (2) The files in level-0 are merged on every read and
+			// therefore we wish to avoid too many files when the individual
+			// file size is small (perhaps because of a small write-buffer
+			// setting, or very high compression ratios, or lots of
+			// overwrites/deletions).
+			score = float64(len(tables)) / float64(v.s.o.GetCompactionL0Trigger())
+		} else {
+			score = float64(size) / float64(v.s.o.GetCompactionTotalSize(level))
+		}
+
+		if score > bestScore {
+			bestLevel = level
+			bestScore = score
+		}
+
+		statFiles[level] = len(tables)
+		statSizes[level] = shortenb(int(size))
+		statScore[level] = fmt.Sprintf("%.2f", score)
+		statTotSize += size
+	}
+
+	v.cLevel = bestLevel
+	v.cScore = bestScore
+
+	v.s.logf("version@stat F·%v S·%s%v Sc·%v", statFiles, shortenb(int(statTotSize)), statSizes, statScore)
+}
+
+func (v *version) needCompaction() bool {
+	return v.cScore >= 1 || atomic.LoadPointer(&v.cSeek) != nil
+}
+
+type tablesScratch struct {
+	added   map[int64]atRecord
+	deleted map[int64]struct{}
+}
+
+type versionStaging struct {
+	base   *version
+	levels []tablesScratch
+}
+
+func (p *versionStaging) getScratch(level int) *tablesScratch {
+	if level >= len(p.levels) {
+		newLevels := make([]tablesScratch, level+1)
+		copy(newLevels, p.levels)
+		p.levels = newLevels
+	}
+	return &(p.levels[level])
+}
+
+func (p *versionStaging) commit(r *sessionRecord) {
+	// Deleted tables.
+	for _, r := range r.deletedTables {
+		scratch := p.getScratch(r.level)
+		if r.level < len(p.base.levels) && len(p.base.levels[r.level]) > 0 {
+			if scratch.deleted == nil {
+				scratch.deleted = make(map[int64]struct{})
+			}
+			scratch.deleted[r.num] = struct{}{}
+		}
+		if scratch.added != nil {
+			delete(scratch.added, r.num)
+		}
+	}
+
+	// New tables.
+	for _, r := range r.addedTables {
+		scratch := p.getScratch(r.level)
+		if scratch.added == nil {
+			scratch.added = make(map[int64]atRecord)
+		}
+		scratch.added[r.num] = r
+		if scratch.deleted != nil {
+			delete(scratch.deleted, r.num)
+		}
+	}
+}
+
+func (p *versionStaging) finish() *version {
+	// Build new version.
+	nv := newVersion(p.base.s)
+	numLevel := len(p.levels)
+	if len(p.base.levels) > numLevel {
+		numLevel = len(p.base.levels)
+	}
+	nv.levels = make([]tFiles, numLevel)
+	for level := 0; level < numLevel; level++ {
+		var baseTabels tFiles
+		if level < len(p.base.levels) {
+			baseTabels = p.base.levels[level]
+		}
+
+		if level < len(p.levels) {
+			scratch := p.levels[level]
+
+			var nt tFiles
+			// Prealloc list if possible.
+			if n := len(baseTabels) + len(scratch.added) - len(scratch.deleted); n > 0 {
+				nt = make(tFiles, 0, n)
+			}
+
+			// Base tables.
+			for _, t := range baseTabels {
+				if _, ok := scratch.deleted[t.fd.Num]; ok {
+					continue
+				}
+				if _, ok := scratch.added[t.fd.Num]; ok {
+					continue
+				}
+				nt = append(nt, t)
+			}
+
+			// New tables.
+			for _, r := range scratch.added {
+				nt = append(nt, tableFileFromRecord(r))
+			}
+
+			if len(nt) != 0 {
+				// Sort tables.
+				if level == 0 {
+					nt.sortByNum()
+				} else {
+					nt.sortByKey(p.base.s.icmp)
+				}
+
+				nv.levels[level] = nt
+			}
+		} else {
+			nv.levels[level] = baseTabels
+		}
+	}
+
+	// Trim levels.
+	n := len(nv.levels)
+	for ; n > 0 && nv.levels[n-1] == nil; n-- {
+	}
+	nv.levels = nv.levels[:n]
+
+	// Compute compaction score for new version.
+	nv.computeCompaction()
+
+	return nv
+}
+
+type versionReleaser struct {
+	v    *version
+	once bool
+}
+
+func (vr *versionReleaser) Release() {
+	v := vr.v
+	v.s.vmu.Lock()
+	if !vr.once {
+		v.releaseNB()
+		vr.once = true
+	}
+	v.s.vmu.Unlock()
+}
diff --git a/vendor/modules.txt b/vendor/modules.txt
index cf05e33e0..89c3d8f33 100644
--- a/vendor/modules.txt
+++ b/vendor/modules.txt
@@ -265,6 +265,8 @@ github.com/golang/protobuf/ptypes/empty
 github.com/golang/protobuf/ptypes/struct
 github.com/golang/protobuf/ptypes/timestamp
 github.com/golang/protobuf/ptypes/wrappers
+# github.com/golang/snappy v0.0.0-20180518054509-2e65f85255db => github.com/golang/snappy v0.0.0-20180518054509-2e65f85255db
+github.com/golang/snappy
 # github.com/google/go-cmp v0.3.0 => github.com/google/go-cmp v0.3.0
 github.com/google/go-cmp/cmp
 github.com/google/go-cmp/cmp/internal/diff
@@ -490,6 +492,9 @@ github.com/opencontainers/image-spec/specs-go/v1
 # github.com/openshift/api v0.0.0-20180801171038-322a19404e37 => github.com/openshift/api v0.0.0-20180801171038-322a19404e37
 github.com/openshift/api/apps/v1
 github.com/openshift/api/project/v1
+# github.com/opentracing/opentracing-go v1.1.0 => github.com/opentracing/opentracing-go v1.1.0
+github.com/opentracing/opentracing-go
+github.com/opentracing/opentracing-go/log
 # github.com/pborman/uuid v1.2.0 => github.com/pborman/uuid v1.2.0
 github.com/pborman/uuid
 # github.com/pelletier/go-buffruneio v0.2.0 => github.com/pelletier/go-buffruneio v0.2.0
@@ -535,7 +540,7 @@ github.com/projectcalico/libcalico-go/lib/selector/tokenizer
 github.com/projectcalico/libcalico-go/lib/set
 github.com/projectcalico/libcalico-go/lib/validator/v3
 github.com/projectcalico/libcalico-go/lib/watch
-# github.com/prometheus/client_golang v0.9.3 => github.com/prometheus/client_golang v0.9.3
+# github.com/prometheus/client_golang v0.9.4 => github.com/prometheus/client_golang v0.9.4
 github.com/prometheus/client_golang/api
 github.com/prometheus/client_golang/api/prometheus/v1
 github.com/prometheus/client_golang/prometheus
@@ -543,14 +548,26 @@ github.com/prometheus/client_golang/prometheus/internal
 github.com/prometheus/client_golang/prometheus/promhttp
 # github.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90 => github.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90
 github.com/prometheus/client_model/go
-# github.com/prometheus/common v0.4.0 => github.com/prometheus/common v0.4.0
+# github.com/prometheus/common v0.4.1 => github.com/prometheus/common v0.4.0
 github.com/prometheus/common/expfmt
 github.com/prometheus/common/internal/bitbucket.org/ww/goautoneg
 github.com/prometheus/common/log
 github.com/prometheus/common/model
-# github.com/prometheus/procfs v0.0.0-20190507164030-5867b95ac084 => github.com/prometheus/procfs v0.0.0-20190507164030-5867b95ac084
+# github.com/prometheus/procfs v0.0.2 => github.com/prometheus/procfs v0.0.2
 github.com/prometheus/procfs
 github.com/prometheus/procfs/internal/fs
+# github.com/prometheus/prometheus v1.8.2 => github.com/prometheus/prometheus v1.8.2
+github.com/prometheus/prometheus/promql
+github.com/prometheus/prometheus/storage
+github.com/prometheus/prometheus/storage/local
+github.com/prometheus/prometheus/storage/local/chunk
+github.com/prometheus/prometheus/storage/local/codable
+github.com/prometheus/prometheus/storage/local/index
+github.com/prometheus/prometheus/storage/metric
+github.com/prometheus/prometheus/util/flock
+github.com/prometheus/prometheus/util/stats
+github.com/prometheus/prometheus/util/strutil
+github.com/prometheus/prometheus/util/testutil
 # github.com/rcrowley/go-metrics v0.0.0-20181016184325-3113b8401b8a => github.com/rcrowley/go-metrics v0.0.0-20181016184325-3113b8401b8a
 github.com/rcrowley/go-metrics
 # github.com/satori/go.uuid v1.2.0 => github.com/satori/go.uuid v1.2.0
@@ -586,6 +603,19 @@ github.com/stretchr/objx
 # github.com/stretchr/testify v1.4.0 => github.com/stretchr/testify v1.4.0
 github.com/stretchr/testify/assert
 github.com/stretchr/testify/mock
+# github.com/syndtr/goleveldb v1.0.0 => github.com/syndtr/goleveldb v1.0.0
+github.com/syndtr/goleveldb/leveldb
+github.com/syndtr/goleveldb/leveldb/cache
+github.com/syndtr/goleveldb/leveldb/comparer
+github.com/syndtr/goleveldb/leveldb/errors
+github.com/syndtr/goleveldb/leveldb/filter
+github.com/syndtr/goleveldb/leveldb/iterator
+github.com/syndtr/goleveldb/leveldb/journal
+github.com/syndtr/goleveldb/leveldb/memdb
+github.com/syndtr/goleveldb/leveldb/opt
+github.com/syndtr/goleveldb/leveldb/storage
+github.com/syndtr/goleveldb/leveldb/table
+github.com/syndtr/goleveldb/leveldb/util
 # github.com/xanzy/ssh-agent v0.2.1 => github.com/xanzy/ssh-agent v0.2.1
 github.com/xanzy/ssh-agent
 # github.com/yashtewari/glob-intersection v0.0.0-20180916065949-5c77d914dd0b => github.com/yashtewari/glob-intersection v0.0.0-20180916065949-5c77d914dd0b