449
vendor/go.starlark.net/starlark/profile.go
generated
vendored
Normal file
449
vendor/go.starlark.net/starlark/profile.go
generated
vendored
Normal file
@@ -0,0 +1,449 @@
|
||||
// Copyright 2019 The Bazel Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package starlark
|
||||
|
||||
// This file defines a simple execution-time profiler for Starlark.
|
||||
// It measures the wall time spent executing Starlark code, and emits a
|
||||
// gzipped protocol message in pprof format (github.com/google/pprof).
|
||||
//
|
||||
// When profiling is enabled, the interpreter calls the profiler to
|
||||
// indicate the start and end of each "span" or time interval. A leaf
|
||||
// function (whether Go or Starlark) has a single span. A function that
|
||||
// calls another function has spans for each interval in which it is the
|
||||
// top of the stack. (A LOAD instruction also ends a span.)
|
||||
//
|
||||
// At the start of a span, the interpreter records the current time in
|
||||
// the thread's topmost frame. At the end of the span, it obtains the
|
||||
// time again and subtracts the span start time. The difference is added
|
||||
// to an accumulator variable in the thread. If the accumulator exceeds
|
||||
// some fixed quantum (10ms, say), the profiler records the current call
|
||||
// stack and sends it to the profiler goroutine, along with the number
|
||||
// of quanta, which are subtracted. For example, if the accumulator
|
||||
// holds 3ms and then a completed span adds 25ms to it, its value is 28ms,
|
||||
// which exceeeds 10ms. The profiler records a stack with the value 20ms
|
||||
// (2 quanta), and the accumulator is left with 8ms.
|
||||
//
|
||||
// The profiler goroutine converts the stacks into the pprof format and
|
||||
// emits a gzip-compressed protocol message to the designated output
|
||||
// file. We use a hand-written streaming proto encoder to avoid
|
||||
// dependencies on pprof and proto, and to avoid the need to
|
||||
// materialize the profile data structure in memory.
|
||||
//
|
||||
// A limitation of this profiler is that it measures wall time, which
|
||||
// does not necessarily correspond to CPU time. A CPU profiler requires
|
||||
// that only running (not runnable) threads are sampled; this is
|
||||
// commonly achieved by having the kernel deliver a (PROF) signal to an
|
||||
// arbitrary running thread, through setitimer(2). The CPU profiler in the
|
||||
// Go runtime uses this mechanism, but it is not possible for a Go
|
||||
// application to register a SIGPROF handler, nor is it possible for a
|
||||
// Go handler for some other signal to read the stack pointer of
|
||||
// the interrupted thread.
|
||||
//
|
||||
// Two caveats:
|
||||
// (1) it is tempting to send the leaf Frame directly to the profiler
|
||||
// goroutine instead of making a copy of the stack, since a Frame is a
|
||||
// spaghetti stack--a linked list. However, as soon as execution
|
||||
// resumes, the stack's Frame.pc values may be mutated, so Frames are
|
||||
// not safe to share with the asynchronous profiler goroutine.
|
||||
// (2) it is tempting to use Callables as keys in a map when tabulating
|
||||
// the pprof protocols's Function entities. However, we cannot assume
|
||||
// that Callables are valid map keys, and furthermore we must not
|
||||
// pin function values in memory indefinitely as this may cause lambda
|
||||
// values to keep their free variables live much longer than necessary.
|
||||
|
||||
// TODO(adonovan):
|
||||
// - make Start/Stop fully thread-safe.
|
||||
// - fix the pc hack.
|
||||
// - experiment with other values of quantum.
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"compress/gzip"
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
"reflect"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
"unsafe"
|
||||
|
||||
"go.starlark.net/syntax"
|
||||
)
|
||||
|
||||
// StartProfile enables time profiling of all Starlark threads,
|
||||
// and writes a profile in pprof format to w.
|
||||
// It must be followed by a call to StopProfiler to stop
|
||||
// the profiler and finalize the profile.
|
||||
//
|
||||
// StartProfile returns an error if profiling was already enabled.
|
||||
//
|
||||
// StartProfile must not be called concurrently with Starlark execution.
|
||||
func StartProfile(w io.Writer) error {
|
||||
if !atomic.CompareAndSwapUint32(&profiler.on, 0, 1) {
|
||||
return fmt.Errorf("profiler already running")
|
||||
}
|
||||
|
||||
// TODO(adonovan): make the API fully concurrency-safe.
|
||||
// The main challenge is racy reads/writes of profiler.events,
|
||||
// and of send/close races on the channel it refers to.
|
||||
// It's easy to solve them with a mutex but harder to do
|
||||
// it efficiently.
|
||||
|
||||
profiler.events = make(chan *profEvent, 1)
|
||||
profiler.done = make(chan error)
|
||||
|
||||
go profile(w)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// StopProfiler stops the profiler started by a prior call to
|
||||
// StartProfile and finalizes the profile. It returns an error if the
|
||||
// profile could not be completed.
|
||||
//
|
||||
// StopProfiler must not be called concurrently with Starlark execution.
|
||||
func StopProfile() error {
|
||||
// Terminate the profiler goroutine and get its result.
|
||||
close(profiler.events)
|
||||
err := <-profiler.done
|
||||
|
||||
profiler.done = nil
|
||||
profiler.events = nil
|
||||
atomic.StoreUint32(&profiler.on, 0)
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
// globals
|
||||
var profiler struct {
|
||||
on uint32 // nonzero => profiler running
|
||||
events chan *profEvent // profile events from interpreter threads
|
||||
done chan error // indicates profiler goroutine is ready
|
||||
}
|
||||
|
||||
func (thread *Thread) beginProfSpan() {
|
||||
if profiler.events == nil {
|
||||
return // profiling not enabled
|
||||
}
|
||||
|
||||
thread.frameAt(0).spanStart = nanotime()
|
||||
}
|
||||
|
||||
// TODO(adonovan): experiment with smaller values,
|
||||
// which trade space and time for greater precision.
|
||||
const quantum = 10 * time.Millisecond
|
||||
|
||||
func (thread *Thread) endProfSpan() {
|
||||
if profiler.events == nil {
|
||||
return // profiling not enabled
|
||||
}
|
||||
|
||||
// Add the span to the thread's accumulator.
|
||||
thread.proftime += time.Duration(nanotime() - thread.frameAt(0).spanStart)
|
||||
if thread.proftime < quantum {
|
||||
return
|
||||
}
|
||||
|
||||
// Only record complete quanta.
|
||||
n := thread.proftime / quantum
|
||||
thread.proftime -= n * quantum
|
||||
|
||||
// Copy the stack.
|
||||
// (We can't save thread.frame because its pc will change.)
|
||||
ev := &profEvent{
|
||||
thread: thread,
|
||||
time: n * quantum,
|
||||
}
|
||||
ev.stack = ev.stackSpace[:0]
|
||||
for i := range thread.stack {
|
||||
fr := thread.frameAt(i)
|
||||
ev.stack = append(ev.stack, profFrame{
|
||||
pos: fr.Position(),
|
||||
fn: fr.Callable(),
|
||||
pc: fr.pc,
|
||||
})
|
||||
}
|
||||
|
||||
profiler.events <- ev
|
||||
}
|
||||
|
||||
type profEvent struct {
|
||||
thread *Thread // currently unused
|
||||
time time.Duration
|
||||
stack []profFrame
|
||||
stackSpace [8]profFrame // initial space for stack
|
||||
}
|
||||
|
||||
type profFrame struct {
|
||||
fn Callable // don't hold this live for too long (prevents GC of lambdas)
|
||||
pc uint32 // program counter (Starlark frames only)
|
||||
pos syntax.Position // position of pc within this frame
|
||||
}
|
||||
|
||||
// profile is the profiler goroutine.
|
||||
// It runs until StopProfiler is called.
|
||||
func profile(w io.Writer) {
|
||||
// Field numbers from pprof protocol.
|
||||
// See https://github.com/google/pprof/blob/master/proto/profile.proto
|
||||
const (
|
||||
Profile_sample_type = 1 // repeated ValueType
|
||||
Profile_sample = 2 // repeated Sample
|
||||
Profile_mapping = 3 // repeated Mapping
|
||||
Profile_location = 4 // repeated Location
|
||||
Profile_function = 5 // repeated Function
|
||||
Profile_string_table = 6 // repeated string
|
||||
Profile_time_nanos = 9 // int64
|
||||
Profile_duration_nanos = 10 // int64
|
||||
Profile_period_type = 11 // ValueType
|
||||
Profile_period = 12 // int64
|
||||
|
||||
ValueType_type = 1 // int64
|
||||
ValueType_unit = 2 // int64
|
||||
|
||||
Sample_location_id = 1 // repeated uint64
|
||||
Sample_value = 2 // repeated int64
|
||||
Sample_label = 3 // repeated Label
|
||||
|
||||
Label_key = 1 // int64
|
||||
Label_str = 2 // int64
|
||||
Label_num = 3 // int64
|
||||
Label_num_unit = 4 // int64
|
||||
|
||||
Location_id = 1 // uint64
|
||||
Location_mapping_id = 2 // uint64
|
||||
Location_address = 3 // uint64
|
||||
Location_line = 4 // repeated Line
|
||||
|
||||
Line_function_id = 1 // uint64
|
||||
Line_line = 2 // int64
|
||||
|
||||
Function_id = 1 // uint64
|
||||
Function_name = 2 // int64
|
||||
Function_system_name = 3 // int64
|
||||
Function_filename = 4 // int64
|
||||
Function_start_line = 5 // int64
|
||||
)
|
||||
|
||||
bufw := bufio.NewWriter(w) // write file in 4KB (not 240B flate-sized) chunks
|
||||
gz := gzip.NewWriter(bufw)
|
||||
enc := protoEncoder{w: gz}
|
||||
|
||||
// strings
|
||||
stringIndex := make(map[string]int64)
|
||||
str := func(s string) int64 {
|
||||
i, ok := stringIndex[s]
|
||||
if !ok {
|
||||
i = int64(len(stringIndex))
|
||||
enc.string(Profile_string_table, s)
|
||||
stringIndex[s] = i
|
||||
}
|
||||
return i
|
||||
}
|
||||
str("") // entry 0
|
||||
|
||||
// functions
|
||||
//
|
||||
// function returns the ID of a Callable for use in Line.FunctionId.
|
||||
// The ID is the same as the function's logical address,
|
||||
// which is supplied by the caller to avoid the need to recompute it.
|
||||
functionId := make(map[uintptr]uint64)
|
||||
function := func(fn Callable, addr uintptr) uint64 {
|
||||
id, ok := functionId[addr]
|
||||
if !ok {
|
||||
id = uint64(addr)
|
||||
|
||||
var pos syntax.Position
|
||||
if fn, ok := fn.(callableWithPosition); ok {
|
||||
pos = fn.Position()
|
||||
}
|
||||
|
||||
name := fn.Name()
|
||||
if name == "<toplevel>" {
|
||||
name = pos.Filename()
|
||||
}
|
||||
|
||||
nameIndex := str(name)
|
||||
|
||||
fun := new(bytes.Buffer)
|
||||
funenc := protoEncoder{w: fun}
|
||||
funenc.uint(Function_id, id)
|
||||
funenc.int(Function_name, nameIndex)
|
||||
funenc.int(Function_system_name, nameIndex)
|
||||
funenc.int(Function_filename, str(pos.Filename()))
|
||||
funenc.int(Function_start_line, int64(pos.Line))
|
||||
enc.bytes(Profile_function, fun.Bytes())
|
||||
|
||||
functionId[addr] = id
|
||||
}
|
||||
return id
|
||||
}
|
||||
|
||||
// locations
|
||||
//
|
||||
// location returns the ID of the location denoted by fr.
|
||||
// For Starlark frames, this is the Frame pc.
|
||||
locationId := make(map[uintptr]uint64)
|
||||
location := func(fr profFrame) uint64 {
|
||||
fnAddr := profFuncAddr(fr.fn)
|
||||
|
||||
// For Starlark functions, the frame position
|
||||
// represents the current PC value.
|
||||
// Mix it into the low bits of the address.
|
||||
// This is super hacky and may result in collisions
|
||||
// in large functions or if functions are numerous.
|
||||
// TODO(adonovan): fix: try making this cleaner by treating
|
||||
// each bytecode segment as a Profile.Mapping.
|
||||
pcAddr := fnAddr
|
||||
if _, ok := fr.fn.(*Function); ok {
|
||||
pcAddr = (pcAddr << 16) ^ uintptr(fr.pc)
|
||||
}
|
||||
|
||||
id, ok := locationId[pcAddr]
|
||||
if !ok {
|
||||
id = uint64(pcAddr)
|
||||
|
||||
line := new(bytes.Buffer)
|
||||
lineenc := protoEncoder{w: line}
|
||||
lineenc.uint(Line_function_id, function(fr.fn, fnAddr))
|
||||
lineenc.int(Line_line, int64(fr.pos.Line))
|
||||
loc := new(bytes.Buffer)
|
||||
locenc := protoEncoder{w: loc}
|
||||
locenc.uint(Location_id, id)
|
||||
locenc.uint(Location_address, uint64(pcAddr))
|
||||
locenc.bytes(Location_line, line.Bytes())
|
||||
enc.bytes(Profile_location, loc.Bytes())
|
||||
|
||||
locationId[pcAddr] = id
|
||||
}
|
||||
return id
|
||||
}
|
||||
|
||||
wallNanos := new(bytes.Buffer)
|
||||
wnenc := protoEncoder{w: wallNanos}
|
||||
wnenc.int(ValueType_type, str("wall"))
|
||||
wnenc.int(ValueType_unit, str("nanoseconds"))
|
||||
|
||||
// informational fields of Profile
|
||||
enc.bytes(Profile_sample_type, wallNanos.Bytes())
|
||||
enc.int(Profile_period, quantum.Nanoseconds()) // magnitude of sampling period
|
||||
enc.bytes(Profile_period_type, wallNanos.Bytes()) // dimension and unit of period
|
||||
enc.int(Profile_time_nanos, time.Now().UnixNano()) // start (real) time of profile
|
||||
|
||||
startNano := nanotime()
|
||||
|
||||
// Read profile events from the channel
|
||||
// until it is closed by StopProfiler.
|
||||
for e := range profiler.events {
|
||||
sample := new(bytes.Buffer)
|
||||
sampleenc := protoEncoder{w: sample}
|
||||
sampleenc.int(Sample_value, e.time.Nanoseconds()) // wall nanoseconds
|
||||
for _, fr := range e.stack {
|
||||
sampleenc.uint(Sample_location_id, location(fr))
|
||||
}
|
||||
enc.bytes(Profile_sample, sample.Bytes())
|
||||
}
|
||||
|
||||
endNano := nanotime()
|
||||
enc.int(Profile_duration_nanos, endNano-startNano)
|
||||
|
||||
err := gz.Close() // Close reports any prior write error
|
||||
if flushErr := bufw.Flush(); err == nil {
|
||||
err = flushErr
|
||||
}
|
||||
profiler.done <- err
|
||||
}
|
||||
|
||||
// nanotime returns the time in nanoseconds since epoch.
|
||||
// It is implemented by runtime.nanotime using the linkname hack;
|
||||
// runtime.nanotime is defined for all OSs/ARCHS and uses the
|
||||
// monotonic system clock, which there is no portable way to access.
|
||||
// Should that function ever go away, these alternatives exist:
|
||||
//
|
||||
// // POSIX only. REALTIME not MONOTONIC. 17ns.
|
||||
// var tv syscall.Timeval
|
||||
// syscall.Gettimeofday(&tv) // can't fail
|
||||
// return tv.Nano()
|
||||
//
|
||||
// // Portable. REALTIME not MONOTONIC. 46ns.
|
||||
// return time.Now().Nanoseconds()
|
||||
//
|
||||
// // POSIX only. Adds a dependency.
|
||||
// import "golang.org/x/sys/unix"
|
||||
// var ts unix.Timespec
|
||||
// unix.ClockGettime(CLOCK_MONOTONIC, &ts) // can't fail
|
||||
// return unix.TimespecToNsec(ts)
|
||||
//
|
||||
//go:linkname nanotime runtime.nanotime
|
||||
func nanotime() int64
|
||||
|
||||
// profFuncAddr returns the canonical "address"
|
||||
// of a Callable for use by the profiler.
|
||||
func profFuncAddr(fn Callable) uintptr {
|
||||
switch fn := fn.(type) {
|
||||
case *Builtin:
|
||||
return reflect.ValueOf(fn.fn).Pointer()
|
||||
case *Function:
|
||||
return uintptr(unsafe.Pointer(fn.funcode))
|
||||
}
|
||||
|
||||
// User-defined callable types are typically of
|
||||
// of kind pointer-to-struct. Handle them specially.
|
||||
if v := reflect.ValueOf(fn); v.Type().Kind() == reflect.Ptr {
|
||||
return v.Pointer()
|
||||
}
|
||||
|
||||
// Address zero is reserved by the protocol.
|
||||
// Use 1 for callables we don't recognize.
|
||||
log.Printf("Starlark profiler: no address for Callable %T", fn)
|
||||
return 1
|
||||
}
|
||||
|
||||
// We encode the protocol message by hand to avoid making
|
||||
// the interpreter depend on both github.com/google/pprof
|
||||
// and github.com/golang/protobuf.
|
||||
//
|
||||
// This also avoids the need to materialize a protocol message object
|
||||
// tree of unbounded size and serialize it all at the end.
|
||||
// The pprof format appears to have been designed to
|
||||
// permit streaming implementations such as this one.
|
||||
//
|
||||
// See https://developers.google.com/protocol-buffers/docs/encoding.
|
||||
type protoEncoder struct {
|
||||
w io.Writer // *bytes.Buffer or *gzip.Writer
|
||||
tmp [binary.MaxVarintLen64]byte
|
||||
}
|
||||
|
||||
func (e *protoEncoder) uvarint(x uint64) {
|
||||
n := binary.PutUvarint(e.tmp[:], x)
|
||||
e.w.Write(e.tmp[:n])
|
||||
}
|
||||
|
||||
func (e *protoEncoder) tag(field, wire uint) {
|
||||
e.uvarint(uint64(field<<3 | wire))
|
||||
}
|
||||
|
||||
func (e *protoEncoder) string(field uint, s string) {
|
||||
e.tag(field, 2) // length-delimited
|
||||
e.uvarint(uint64(len(s)))
|
||||
io.WriteString(e.w, s)
|
||||
}
|
||||
|
||||
func (e *protoEncoder) bytes(field uint, b []byte) {
|
||||
e.tag(field, 2) // length-delimited
|
||||
e.uvarint(uint64(len(b)))
|
||||
e.w.Write(b)
|
||||
}
|
||||
|
||||
func (e *protoEncoder) uint(field uint, x uint64) {
|
||||
e.tag(field, 0) // varint
|
||||
e.uvarint(x)
|
||||
}
|
||||
|
||||
func (e *protoEncoder) int(field uint, x int64) {
|
||||
e.tag(field, 0) // varint
|
||||
e.uvarint(uint64(x))
|
||||
}
|
||||
Reference in New Issue
Block a user