
Imported golang.org/x/telemetry@9c0d19e to avoid go version requirement change. For #3815 For golang/go#70056
359 lines
12 KiB
Go
359 lines
12 KiB
Go
// Copyright 2024 The Go Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
package telemetry
|
|
|
|
import (
|
|
"fmt"
|
|
"log"
|
|
"os"
|
|
"os/exec"
|
|
"path/filepath"
|
|
"sync"
|
|
"time"
|
|
|
|
"golang.org/x/sync/errgroup"
|
|
"golang.org/x/telemetry/counter"
|
|
"golang.org/x/telemetry/internal/crashmonitor"
|
|
"golang.org/x/telemetry/internal/telemetry"
|
|
"golang.org/x/telemetry/internal/upload"
|
|
)
|
|
|
|
// Config controls the behavior of [Start].
|
|
type Config struct {
|
|
// ReportCrashes, if set, will enable crash reporting.
|
|
// ReportCrashes uses the [debug.SetCrashOutput] mechanism, which is a
|
|
// process-wide resource.
|
|
// Do not make other calls to that function within your application.
|
|
// ReportCrashes is a non-functional unless the program is built with go1.23+.
|
|
ReportCrashes bool
|
|
|
|
// Upload causes this program to periodically upload approved counters
|
|
// from the local telemetry database to telemetry.go.dev.
|
|
//
|
|
// This option has no effect unless the user has given consent
|
|
// to enable data collection, for example by running
|
|
// cmd/gotelemetry or affirming the gopls dialog.
|
|
//
|
|
// (This feature is expected to be used only by gopls.
|
|
// Longer term, the go command may become the sole program
|
|
// responsible for uploading.)
|
|
Upload bool
|
|
|
|
// TelemetryDir, if set, will specify an alternate telemetry
|
|
// directory to write data to. If not set, it uses the default
|
|
// directory.
|
|
// This field is intended to be used for isolating testing environments.
|
|
TelemetryDir string
|
|
|
|
// UploadStartTime, if set, overrides the time used as the upload start time,
|
|
// which is the time used by the upload logic to determine whether counter
|
|
// file data should be uploaded. Only counter files that have expired before
|
|
// the start time are considered for upload.
|
|
//
|
|
// This field can be used to simulate a future upload that collects recently
|
|
// modified counters.
|
|
UploadStartTime time.Time
|
|
|
|
// UploadURL, if set, overrides the URL used to receive uploaded reports. If
|
|
// unset, this URL defaults to https://telemetry.go.dev/upload.
|
|
UploadURL string
|
|
}
|
|
|
|
// Start initializes telemetry using the specified configuration.
|
|
//
|
|
// Start opens the local telemetry database so that counter increment
|
|
// operations are durably recorded in the local file system.
|
|
//
|
|
// If [Config.Upload] is set, and the user has opted in to telemetry
|
|
// uploading, this process may attempt to upload approved counters
|
|
// to telemetry.go.dev.
|
|
//
|
|
// If [Config.ReportCrashes] is set, any fatal crash will be
|
|
// recorded by incrementing a counter named for the stack of the
|
|
// first running goroutine in the traceback.
|
|
//
|
|
// If either of these flags is set, Start re-executes the current
|
|
// executable as a child process, in a special mode in which it
|
|
// acts as a telemetry sidecar for the parent process (the application).
|
|
// In that mode, the call to Start will never return, so Start must
|
|
// be called immediately within main, even before such things as
|
|
// inspecting the command line. The application should avoid expensive
|
|
// steps or external side effects in init functions, as they will
|
|
// be executed twice (parent and child).
|
|
//
|
|
// Start returns a StartResult, which may be awaited via [StartResult.Wait] to
|
|
// wait for all work done by Start to complete.
|
|
func Start(config Config) *StartResult {
|
|
switch v := os.Getenv(telemetryChildVar); v {
|
|
case "":
|
|
// The subprocess started by parent has GO_TELEMETRY_CHILD=1.
|
|
return parent(config)
|
|
case "1":
|
|
child(config) // child will exit the process when it's done.
|
|
case "2":
|
|
// Do nothing: this was executed directly or indirectly by a child.
|
|
default:
|
|
log.Fatalf("unexpected value for %q: %q", telemetryChildVar, v)
|
|
}
|
|
|
|
return &StartResult{}
|
|
}
|
|
|
|
// MaybeChild executes the telemetry child logic if the calling program is
|
|
// the telemetry child process, and does nothing otherwise. It is meant to be
|
|
// called as the first thing in a program that uses telemetry.Start but cannot
|
|
// call telemetry.Start immediately when it starts.
|
|
func MaybeChild(config Config) {
|
|
if v := os.Getenv(telemetryChildVar); v == "1" {
|
|
child(config) // child will exit the process when it's done.
|
|
}
|
|
// other values of the telemetryChildVar environment variable
|
|
// will be handled by telemetry.Start.
|
|
}
|
|
|
|
// A StartResult is a handle to the result of a call to [Start]. Call
|
|
// [StartResult.Wait] to wait for the completion of all work done on behalf of
|
|
// Start.
|
|
type StartResult struct {
|
|
wg sync.WaitGroup
|
|
}
|
|
|
|
// Wait waits for the completion of all work initiated by [Start].
|
|
func (res *StartResult) Wait() {
|
|
if res == nil {
|
|
return
|
|
}
|
|
res.wg.Wait()
|
|
}
|
|
|
|
var daemonize = func(cmd *exec.Cmd) {}
|
|
|
|
// If telemetryChildVar is set to "1" in the environment, this is the telemetry
|
|
// child.
|
|
//
|
|
// If telemetryChildVar is set to "2", this is a child of the child, and no
|
|
// further forking should occur.
|
|
const telemetryChildVar = "GO_TELEMETRY_CHILD"
|
|
|
|
// If telemetryUploadVar is set to "1" in the environment, the upload token has been
|
|
// acquired by the parent, and the child should attempt an upload.
|
|
const telemetryUploadVar = "GO_TELEMETRY_CHILD_UPLOAD"
|
|
|
|
func parent(config Config) *StartResult {
|
|
if config.TelemetryDir != "" {
|
|
telemetry.Default = telemetry.NewDir(config.TelemetryDir)
|
|
}
|
|
result := new(StartResult)
|
|
|
|
mode, _ := telemetry.Default.Mode()
|
|
if mode == "off" {
|
|
// Telemetry is turned off. Crash reporting doesn't work without telemetry
|
|
// at least set to "local". The upload process runs in both "on" and "local" modes.
|
|
// In local mode the upload process builds local reports but does not do the upload.
|
|
return result
|
|
}
|
|
|
|
counter.Open()
|
|
|
|
if _, err := os.Stat(telemetry.Default.LocalDir()); err != nil {
|
|
// There was a problem statting LocalDir, which is needed for both
|
|
// crash monitoring and counter uploading. Most likely, there was an
|
|
// error creating telemetry.LocalDir in the counter.Open call above.
|
|
// Don't start the child.
|
|
return result
|
|
}
|
|
|
|
childShouldUpload := config.Upload && acquireUploadToken()
|
|
reportCrashes := config.ReportCrashes && crashmonitor.Supported()
|
|
|
|
if reportCrashes || childShouldUpload {
|
|
startChild(reportCrashes, childShouldUpload, result)
|
|
}
|
|
|
|
return result
|
|
}
|
|
|
|
func startChild(reportCrashes, upload bool, result *StartResult) {
|
|
// This process is the application (parent).
|
|
// Fork+exec the telemetry child.
|
|
exe, err := os.Executable()
|
|
if err != nil {
|
|
// There was an error getting os.Executable. It's possible
|
|
// for this to happen on AIX if os.Args[0] is not an absolute
|
|
// path and we can't find os.Args[0] in PATH.
|
|
log.Printf("failed to start telemetry sidecar: os.Executable: %v", err)
|
|
return
|
|
}
|
|
cmd := exec.Command(exe, "** telemetry **") // this unused arg is just for ps(1)
|
|
daemonize(cmd)
|
|
cmd.Env = append(os.Environ(), telemetryChildVar+"=1")
|
|
if upload {
|
|
cmd.Env = append(cmd.Env, telemetryUploadVar+"=1")
|
|
}
|
|
cmd.Dir = telemetry.Default.LocalDir()
|
|
|
|
// The child process must write to a log file, not
|
|
// the stderr file it inherited from the parent, as
|
|
// the child may outlive the parent but should not prolong
|
|
// the life of any pipes created (by the grandparent)
|
|
// to gather the output of the parent.
|
|
//
|
|
// By default, we discard the child process's stderr,
|
|
// but in line with the uploader, log to a file in debug
|
|
// only if that directory was created by the user.
|
|
fd, err := os.Stat(telemetry.Default.DebugDir())
|
|
if err != nil {
|
|
if !os.IsNotExist(err) {
|
|
log.Printf("failed to stat debug directory: %v", err)
|
|
return
|
|
}
|
|
} else if fd.IsDir() {
|
|
// local/debug exists and is a directory. Set stderr to a log file path
|
|
// in local/debug.
|
|
childLogPath := filepath.Join(telemetry.Default.DebugDir(), "sidecar.log")
|
|
childLog, err := os.OpenFile(childLogPath, os.O_WRONLY|os.O_CREATE|os.O_APPEND, 0600)
|
|
if err != nil {
|
|
log.Printf("opening sidecar log file for child: %v", err)
|
|
return
|
|
}
|
|
defer childLog.Close()
|
|
cmd.Stderr = childLog
|
|
}
|
|
|
|
var crashOutputFile *os.File
|
|
if reportCrashes {
|
|
pipe, err := cmd.StdinPipe()
|
|
if err != nil {
|
|
log.Printf("StdinPipe: %v", err)
|
|
return
|
|
}
|
|
|
|
crashOutputFile = pipe.(*os.File) // (this conversion is safe)
|
|
}
|
|
|
|
if err := cmd.Start(); err != nil {
|
|
// The child couldn't be started. Log the failure.
|
|
log.Printf("can't start telemetry child process: %v", err)
|
|
return
|
|
}
|
|
if reportCrashes {
|
|
crashmonitor.Parent(crashOutputFile)
|
|
}
|
|
result.wg.Add(1)
|
|
go func() {
|
|
cmd.Wait() // Release resources if cmd happens not to outlive this process.
|
|
result.wg.Done()
|
|
}()
|
|
}
|
|
|
|
func child(config Config) {
|
|
log.SetPrefix(fmt.Sprintf("telemetry-sidecar (pid %v): ", os.Getpid()))
|
|
|
|
if config.TelemetryDir != "" {
|
|
telemetry.Default = telemetry.NewDir(config.TelemetryDir)
|
|
}
|
|
|
|
// golang/go#67211: be sure to set telemetryChildVar before running the
|
|
// child, because the child itself invokes the go command to download the
|
|
// upload config. If the telemetryChildVar variable is still set to "1",
|
|
// that delegated go command may think that it is itself a telemetry
|
|
// child.
|
|
//
|
|
// On the other hand, if telemetryChildVar were simply unset, then the
|
|
// delegated go commands would fork themselves recursively. Short-circuit
|
|
// this recursion.
|
|
os.Setenv(telemetryChildVar, "2")
|
|
upload := os.Getenv(telemetryUploadVar) == "1"
|
|
|
|
reportCrashes := config.ReportCrashes && crashmonitor.Supported()
|
|
uploadStartTime := config.UploadStartTime
|
|
uploadURL := config.UploadURL
|
|
|
|
// The crashmonitor and/or upload process may themselves record counters.
|
|
counter.Open()
|
|
|
|
// Start crashmonitoring and uploading depending on what's requested
|
|
// and wait for the longer running child to complete before exiting:
|
|
// if we collected a crash before the upload finished, wait for the
|
|
// upload to finish before exiting
|
|
var g errgroup.Group
|
|
|
|
if reportCrashes {
|
|
g.Go(func() error {
|
|
crashmonitor.Child()
|
|
return nil
|
|
})
|
|
}
|
|
if upload {
|
|
g.Go(func() error {
|
|
uploaderChild(uploadStartTime, uploadURL)
|
|
return nil
|
|
})
|
|
}
|
|
g.Wait()
|
|
|
|
os.Exit(0)
|
|
}
|
|
|
|
func uploaderChild(asof time.Time, uploadURL string) {
|
|
if err := upload.Run(upload.RunConfig{
|
|
UploadURL: uploadURL,
|
|
LogWriter: os.Stderr,
|
|
StartTime: asof,
|
|
}); err != nil {
|
|
log.Printf("upload failed: %v", err)
|
|
}
|
|
}
|
|
|
|
// acquireUploadToken acquires a token permitting the caller to upload.
|
|
// To limit the frequency of uploads, only one token is issue per
|
|
// machine per time period.
|
|
// The boolean indicates whether the token was acquired.
|
|
func acquireUploadToken() bool {
|
|
if telemetry.Default.LocalDir() == "" {
|
|
// The telemetry dir wasn't initialized properly, probably because
|
|
// os.UserConfigDir did not complete successfully. In that case
|
|
// there are no counters to upload, so we should just do nothing.
|
|
return false
|
|
}
|
|
tokenfile := filepath.Join(telemetry.Default.LocalDir(), "upload.token")
|
|
const period = 24 * time.Hour
|
|
|
|
// A process acquires a token by successfully creating a
|
|
// well-known file. If the file already exists and has an
|
|
// mtime age less then than the period, the process does
|
|
// not acquire the token. If the file is older than the
|
|
// period, the process is allowed to remove the file and
|
|
// try to re-create it.
|
|
fi, err := os.Stat(tokenfile)
|
|
if err == nil {
|
|
if time.Since(fi.ModTime()) < period {
|
|
return false
|
|
}
|
|
// There's a possible race here where two processes check the
|
|
// token file and see that it's older than the period, then the
|
|
// first one removes it and creates another, and then a second one
|
|
// removes the newly created file and creates yet another
|
|
// file. Then both processes would act as though they had the token.
|
|
// This is very rare, but it's also okay because we're only grabbing
|
|
// the token to do rate limiting, not for correctness.
|
|
_ = os.Remove(tokenfile)
|
|
} else if !os.IsNotExist(err) {
|
|
log.Printf("error acquiring upload taken: statting token file: %v", err)
|
|
return false
|
|
}
|
|
|
|
f, err := os.OpenFile(tokenfile, os.O_CREATE|os.O_EXCL, 0666)
|
|
if err != nil {
|
|
if os.IsExist(err) {
|
|
return false
|
|
}
|
|
log.Printf("error acquiring upload token: creating token file: %v", err)
|
|
return false
|
|
}
|
|
_ = f.Close()
|
|
return true
|
|
}
|