proc/gdbserial,debugger: allow clients to stop a recording (#1890)

Allows Delve clients to stop a recording midway by sending a
Command('halt')
request.

This is implemented by changing debugger.New to start recording the
process on a separate goroutine while holding the processMutex locked.
By locking the processMutex we ensure that almost all RPC requests will
block until the recording is done, since we can not respond correctly
to any of them.
API calls that do not require manipulating or examining the target
process, such as "IsMulticlient", "SetApiVersion" and
"GetState(nowait=true)" will work while we are recording the process.

Two other internal changes are made to the API: both GetState and
Restart become asynchronous requests, like Command. Restart because
this way it can be interrupted by a StopRecording request if the
rerecord option is passed.
GetState because clients need a call that will block until the
recording is compelted and can also be interrupted with a
StopRecording.

Clients that are uninterested in allowing the user to stop a recording
can ignore this change, since eventually they will make a request to
Delve that will block until the recording is completed.

Clients that wish to support this feature must:

1. call GetState(nowait=false) after connecting to Delve, before any
   call that would need to manipulate the target process
2. allow the user to send a StopRecording request during the initial
   GetState call
3. allow the user to send a StopRecording request during any subsequent
   Restart(rerecord=true) request (if supported).

Implements #1747
This commit is contained in:
Alessandro Arzilli 2020-03-24 17:09:28 +01:00 committed by GitHub
parent 475551cf3d
commit 8bb93e9ae1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 266 additions and 75 deletions

@ -13,7 +13,7 @@ Command | Description
[call](#call) | Resumes process, injecting a function call (EXPERIMENTAL!!!)
[continue](#continue) | Run until breakpoint or program termination.
[next](#next) | Step over to next source line.
[restart](#restart) | Restart process from a checkpoint or event.
[restart](#restart) | Restart process.
[rev](#rev) | Reverses the execution of the target program for the command specified.
[rewind](#rewind) | Run backwards until breakpoint or program termination.
[step](#step) | Single step through program.
@ -396,9 +396,21 @@ Argument -a shows more registers.
## restart
Restart process from a checkpoint or event.
Restart process.
For recorded targets the command takes the following forms:
restart resets ot the start of the recording
restart [checkpoint] resets the recording to the given checkpoint
restart -r [newargv...] re-records the target process
For live targets the command takes the following forms:
restart [newargv...] restarts the process
If newargv is omitted the process is restarted (or re-recorded) with the same argument vector.
If -noargs is specified instead, the argument vector is cleared.
restart [event number or checkpoint id]
Aliases: r

@ -11,21 +11,24 @@ import (
"path/filepath"
"strconv"
"strings"
"syscall"
"unicode"
"github.com/go-delve/delve/pkg/proc"
)
// Record uses rr to record the execution of the specified program and
// returns the trace directory's path.
func Record(cmd []string, wd string, quiet bool) (tracedir string, err error) {
// RecordAsync configures rr to record the execution of the specified
// program. Returns a run function which will actually record the program, a
// stop function which will prematurely terminate the recording of the
// program.
func RecordAsync(cmd []string, wd string, quiet bool) (run func() (string, error), stop func() error, err error) {
if err := checkRRAvailabe(); err != nil {
return "", err
return nil, nil, err
}
rfd, wfd, err := os.Pipe()
if err != nil {
return "", err
return nil, nil, err
}
args := make([]string, 0, len(cmd)+2)
@ -40,18 +43,36 @@ func Record(cmd []string, wd string, quiet bool) (tracedir string, err error) {
rrcmd.ExtraFiles = []*os.File{wfd}
rrcmd.Dir = wd
done := make(chan struct{})
tracedirChan := make(chan string)
go func() {
bs, _ := ioutil.ReadAll(rfd)
tracedir = strings.TrimSpace(string(bs))
close(done)
tracedirChan <- strings.TrimSpace(string(bs))
}()
err = rrcmd.Run()
run = func() (string, error) {
err := rrcmd.Run()
_ = wfd.Close()
tracedir := <-tracedirChan
return tracedir, err
}
stop = func() error {
return rrcmd.Process.Signal(syscall.SIGTERM)
}
return run, stop, nil
}
// Record uses rr to record the execution of the specified program and
// returns the trace directory's path.
func Record(cmd []string, wd string, quiet bool) (tracedir string, err error) {
run, _, err := RecordAsync(cmd, wd, quiet)
if err != nil {
return "", err
}
// ignore run errors, it could be the program crashing
wfd.Close()
<-done
return
return run()
}
// Replay starts an instance of rr in replay mode, with the specified trace

@ -395,7 +395,17 @@ The '-a' option adds an expression to the list of expression printed every time
If display is called without arguments it will print the value of all expression in the list.`},
}
if client == nil || client.Recorded() {
addrecorded := client == nil
if !addrecorded {
if state, err := client.GetStateNonBlocking(); err == nil {
addrecorded = state.Recording
if !addrecorded {
addrecorded = client.Recorded()
}
}
}
if addrecorded {
c.cmds = append(c.cmds,
command{
aliases: []string{"rewind", "rw"},
@ -431,14 +441,6 @@ The "note" is arbitrary text that can be used to identify the checkpoint, if it
helpMsg: `Reverses the execution of the target program for the command specified.
Currently, only the rev step-instruction command is supported.`,
})
for i := range c.cmds {
v := &c.cmds[i]
if v.match("restart") {
v.helpMsg = `Restart process from a checkpoint or event.
restart [event number or checkpoint id]`
}
}
}
sort.Sort(ByFirstAlias(c.cmds))

@ -122,6 +122,15 @@ func (t *Term) Close() {
func (t *Term) sigintGuard(ch <-chan os.Signal, multiClient bool) {
for range ch {
t.starlarkEnv.Cancel()
state, err := t.client.GetStateNonBlocking()
if err == nil && state.Recording {
fmt.Printf("received SIGINT, stopping recording (will not forward signal)\n")
err := t.client.StopRecording()
if err != nil {
fmt.Fprintf(os.Stderr, "%v\n", err)
}
continue
}
if multiClient {
answer, err := t.line.Prompt("Would you like to [s]top the target or [q]uit this client, leaving the target running [s/q]? ")
if err != nil {
@ -216,6 +225,10 @@ func (t *Term) Run() (int, error) {
var lastCmd string
// Ensure that the target process is neither running nor recording by
// making a blocking call.
_, _ = t.client.GetState()
for {
cmdstr, err := t.promptForInput()
if err != nil {

@ -26,7 +26,7 @@ func getSuitableMethods(pkg *types.Package, typename string) []*types.Func {
continue
}
if fn.Name() == "Command" {
if fn.Name() == "Command" || fn.Name() == "Restart" || fn.Name() == "State" {
r = append(r, fn)
continue
}
@ -120,8 +120,13 @@ func processServerMethods(serverMethods []*types.Func) []binding {
}
retType := sig.Params().At(1).Type().String()
if fn.Name() == "Command" {
switch fn.Name() {
case "Command":
retType = "rpc2.CommandOut"
case "Restart":
retType = "rpc2.RestartOut"
case "State":
retType = "rpc2.StateOut"
}
bindings[i] = binding{

@ -19,6 +19,11 @@ var ErrNotExecutable = proc.ErrNotExecutable
type DebuggerState struct {
// Running is true if the process is running and no other information can be collected.
Running bool
// Recording is true if the process is currently being recorded and no other
// information can be collected. While the debugger is in this state
// sending a StopRecording request will halt the recording, every other
// request will block until the process has been recorded.
Recording bool
// CurrentThread is the currently selected debugger thread.
CurrentThread *Thread `json:"currentThread,omitempty"`
// SelectedGoroutine is the currently selected goroutine

@ -162,6 +162,9 @@ type Client interface {
// This function will return an error if it reads less than `length` bytes.
ExamineMemory(address uintptr, length int) ([]byte, error)
// StopRecording stops a recording if one is in progress.
StopRecording() error
// Disconnect closes the connection to the server without sending a Detach request first.
// If cont is true a continue command will be sent instead.
Disconnect(cont bool) error

@ -5,6 +5,7 @@ import (
"errors"
"fmt"
"go/parser"
"os"
"path/filepath"
"regexp"
"runtime"
@ -43,6 +44,9 @@ type Debugger struct {
running bool
runningMutex sync.Mutex
stopRecording func() error
recordMutex sync.Mutex
}
// Config provides the configuration to start a Debugger.
@ -133,7 +137,10 @@ func New(config *Config, processArgs []string) (*Debugger, error) {
}
return nil, err
}
d.target = p
if p != nil {
// if p == nil and err == nil then we are doing a recording, don't touch d.target
d.target = p
}
if err := d.checkGoVersion(); err != nil {
d.target.Detach(true)
return nil, err
@ -158,6 +165,10 @@ func (d *Debugger) checkGoVersion() error {
if !d.config.CheckGoVersion {
return nil
}
if d.isRecording() {
// do not do anything if we are still recording
return nil
}
producer := d.target.BinInfo().Producer()
if producer == "" {
return nil
@ -173,8 +184,44 @@ func (d *Debugger) Launch(processArgs []string, wd string) (*proc.Target, error)
case "lldb":
return betterGdbserialLaunchError(gdbserial.LLDBLaunch(processArgs, wd, d.config.Foreground, d.config.DebugInfoDirectories))
case "rr":
p, _, err := gdbserial.RecordAndReplay(processArgs, wd, false, d.config.DebugInfoDirectories)
return p, err
if d.target != nil {
// restart should not call us if the backend is 'rr'
panic("internal error: call to Launch with rr backend and target already exists")
}
run, stop, err := gdbserial.RecordAsync(processArgs, wd, false)
if err != nil {
return nil, err
}
// let the initialization proceed but hold the processMutex lock so that
// any other request to debugger will block except State(nowait=true) and
// Command(halt).
d.processMutex.Lock()
d.recordingStart(stop)
go func() {
defer d.processMutex.Unlock()
p, err := d.recordingRun(run)
if err != nil {
d.log.Errorf("could not record target: %v", err)
// this is ugly but we can't respond to any client requests at this
// point so it's better if we die.
os.Exit(1)
}
d.recordingDone()
d.target = p
if err := d.checkGoVersion(); err != nil {
d.log.Error(err)
err := d.target.Detach(true)
if err != nil {
d.log.Errorf("Error detaching from target: %v", err)
}
}
}()
return nil, nil
case "default":
if runtime.GOOS == "darwin" {
return betterGdbserialLaunchError(gdbserial.LLDBLaunch(processArgs, wd, d.config.Foreground, d.config.DebugInfoDirectories))
@ -185,6 +232,33 @@ func (d *Debugger) Launch(processArgs []string, wd string) (*proc.Target, error)
}
}
func (d *Debugger) recordingStart(stop func() error) {
d.recordMutex.Lock()
d.stopRecording = stop
d.recordMutex.Unlock()
}
func (d *Debugger) recordingDone() {
d.recordMutex.Lock()
d.stopRecording = nil
d.recordMutex.Unlock()
}
func (d *Debugger) isRecording() bool {
d.recordMutex.Lock()
defer d.recordMutex.Unlock()
return d.stopRecording != nil
}
func (d *Debugger) recordingRun(run func() (string, error)) (*proc.Target, error) {
tracedir, err := run()
if err != nil && tracedir == "" {
return nil, err
}
return gdbserial.Replay(tracedir, false, true, d.config.DebugInfoDirectories)
}
// ErrNoAttachPath is the error returned when the client tries to attach to
// a process on macOS using the lldb backend without specifying the path to
// the target's executable.
@ -223,12 +297,16 @@ func betterGdbserialLaunchError(p *proc.Target, err error) (*proc.Target, error)
// ProcessPid returns the PID of the process
// the debugger is debugging.
func (d *Debugger) ProcessPid() int {
d.processMutex.Lock()
defer d.processMutex.Unlock()
return d.target.Pid()
}
// LastModified returns the time that the process' executable was last
// modified.
func (d *Debugger) LastModified() time.Time {
d.processMutex.Lock()
defer d.processMutex.Unlock()
return d.target.BinInfo().LastModified()
}
@ -292,6 +370,10 @@ func (d *Debugger) detach(kill bool) error {
var ErrCanNotRestart = errors.New("can not restart this target")
// ErrNotRecording is returned when StopRecording is called while the
// debugger is not recording the target.
var ErrNotRecording = errors.New("debugger is not recording")
// Restart will restart the target process, first killing
// and then exec'ing it again.
// If the target process is a recording it will restart it from the given
@ -316,7 +398,7 @@ func (d *Debugger) Restart(rerecord bool, pos string, resetArgs bool, newArgs []
if valid, _ := d.target.Valid(); valid && !recorded {
// Ensure the process is in a PTRACE_STOP.
if err := stopProcess(d.ProcessPid()); err != nil {
if err := stopProcess(d.target.Pid()); err != nil {
return nil, err
}
}
@ -326,10 +408,24 @@ func (d *Debugger) Restart(rerecord bool, pos string, resetArgs bool, newArgs []
if resetArgs {
d.processArgs = append([]string{d.processArgs[0]}, newArgs...)
}
p, err := d.Launch(d.processArgs, d.config.WorkingDir)
var p *proc.Target
var err error
if recorded {
run, stop, err2 := gdbserial.RecordAsync(d.processArgs, d.config.WorkingDir, false)
if err2 != nil {
return nil, err2
}
d.recordingStart(stop)
p, err = d.recordingRun(run)
d.recordingDone()
} else {
p, err = d.Launch(d.processArgs, d.config.WorkingDir)
}
if err != nil {
return nil, fmt.Errorf("could not launch process: %s", err)
}
discarded := []api.DiscardedBreakpoint{}
for _, oldBp := range api.ConvertBreakpoints(d.breakpoints()) {
if oldBp.ID < 0 {
@ -362,6 +458,10 @@ func (d *Debugger) State(nowait bool) (*api.DebuggerState, error) {
return &api.DebuggerState{Running: true}, nil
}
if d.isRecording() && nowait {
return &api.DebuggerState{Recording: true}, nil
}
d.processMutex.Lock()
defer d.processMutex.Unlock()
return d.state(nil)
@ -533,6 +633,8 @@ func (d *Debugger) AmendBreakpoint(amend *api.Breakpoint) error {
// CancelNext will clear internal breakpoints, thus cancelling the 'next',
// 'step' or 'stepout' operation.
func (d *Debugger) CancelNext() error {
d.processMutex.Lock()
defer d.processMutex.Unlock()
return d.target.ClearInternalBreakpoints()
}
@ -681,7 +783,12 @@ func (d *Debugger) Command(command *api.DebuggerCommand) (*api.DebuggerState, er
// RequestManualStop does not invoke any ptrace syscalls, so it's safe to
// access the process directly.
d.log.Debug("halting")
err = d.target.RequestManualStop()
d.recordMutex.Lock()
if d.stopRecording == nil {
err = d.target.RequestManualStop()
}
d.recordMutex.Unlock()
}
withBreakpointInfo := true
@ -1439,7 +1546,9 @@ func (d *Debugger) GetVersion(out *api.GetVersionOut) error {
}
}
out.TargetGoVersion = d.target.BinInfo().Producer()
if !d.isRecording() && !d.isRunning() {
out.TargetGoVersion = d.target.BinInfo().Producer()
}
out.MinSupportedVersionOfGo = fmt.Sprintf("%d.%d.0", goversion.MinSupportedVersionOfGoMajor, goversion.MinSupportedVersionOfGoMinor)
out.MaxSupportedVersionOfGo = fmt.Sprintf("%d.%d.0", goversion.MaxSupportedVersionOfGoMajor, goversion.MaxSupportedVersionOfGoMinor)
@ -1476,6 +1585,16 @@ func (d *Debugger) ListPackagesBuildInfo(includeFiles bool) []api.PackageBuildIn
return r
}
// StopRecording stops a recording (if one is in progress)
func (d *Debugger) StopRecording() error {
d.recordMutex.Lock()
defer d.recordMutex.Unlock()
if d.stopRecording == nil {
return ErrNotRecording
}
return d.stopRecording()
}
func go11DecodeErrorCheck(err error) error {
if _, isdecodeerr := err.(dwarf.DecodeError); !isdecodeerr {
return err

@ -452,6 +452,10 @@ func (c *RPCClient) ExamineMemory(address uintptr, count int) ([]byte, error) {
return out.Mem, nil
}
func (c *RPCClient) StopRecording() error {
return c.call("StopRecording", StopRecordingIn{}, &StopRecordingOut{})
}
func (c *RPCClient) call(method string, args, reply interface{}) error {
return c.client.Call("RPCServer."+method, args, reply)
}

@ -83,13 +83,15 @@ type RestartOut struct {
}
// Restart restarts program.
func (s *RPCServer) Restart(arg RestartIn, out *RestartOut) error {
func (s *RPCServer) Restart(arg RestartIn, cb service.RPCCallback) {
if s.config.AttachPid != 0 {
return errors.New("cannot restart process Delve did not create")
cb.Return(nil, errors.New("cannot restart process Delve did not create"))
return
}
var out RestartOut
var err error
out.DiscardedBreakpoints, err = s.debugger.Restart(arg.Rerecord, arg.Position, arg.ResetArgs, arg.NewArgs)
return err
cb.Return(out, err)
}
type StateIn struct {
@ -102,13 +104,15 @@ type StateOut struct {
}
// State returns the current debugger state.
func (s *RPCServer) State(arg StateIn, out *StateOut) error {
func (s *RPCServer) State(arg StateIn, cb service.RPCCallback) {
var out StateOut
st, err := s.debugger.State(arg.NonBlocking)
if err != nil {
return err
cb.Return(nil, err)
return
}
out.State = st
return nil
cb.Return(out, nil)
}
type CommandOut struct {
@ -778,3 +782,19 @@ func (s *RPCServer) ExamineMemory(arg ExamineMemoryIn, out *ExaminedMemoryOut) e
out.Mem = Mem
return nil
}
type StopRecordingIn struct {
}
type StopRecordingOut struct {
}
func (s *RPCServer) StopRecording(arg StopRecordingIn, cb service.RPCCallback) {
var out StopRecordingOut
err := s.debugger.StopRecording()
if err != nil {
cb.Return(nil, err)
return
}
cb.Return(out, nil)
}

@ -3,7 +3,6 @@ package rpccommon
import (
"bytes"
"encoding/json"
"errors"
"fmt"
"io"
"net"
@ -95,14 +94,6 @@ func (s *ServerImpl) Stop() error {
return s.debugger.Detach(kill)
}
// Restart restarts the debugger.
func (s *ServerImpl) Restart() error {
if s.config.AttachPid != 0 {
return errors.New("cannot restart process Delve did not create")
}
return s.s2.Restart(rpc2.RestartIn{}, nil)
}
// Run starts a debugger and exposes it with an HTTP server. The debugger
// itself can be stopped with the `detach` API. Run blocks until the HTTP
// server stops.

@ -154,19 +154,6 @@ func Test1Restart_duringStop(t *testing.T) {
})
}
func Test1Restart_attachPid(t *testing.T) {
// Assert it does not work and returns error.
// We cannot restart a process we did not spawn.
server := rpccommon.NewServer(&service.Config{
Listener: nil,
AttachPid: 999,
Backend: testBackend,
})
if err := server.Restart(); err == nil {
t.Fatal("expected error on restart after attaching to pid but got none")
}
}
func Test1ClientServer_exit(t *testing.T) {
withTestClient1("continuetestprog", t, func(c *rpc1.RPCClient) {
state, err := c.GetState()

@ -190,20 +190,6 @@ func TestRestart_duringStop(t *testing.T) {
})
}
func TestRestart_attachPid(t *testing.T) {
// Assert it does not work and returns error.
// We cannot restart a process we did not spawn.
server := rpccommon.NewServer(&service.Config{
Listener: nil,
AttachPid: 999,
APIVersion: 2,
Backend: testBackend,
})
if err := server.Restart(); err == nil {
t.Fatal("expected error on restart after attaching to pid but got none")
}
}
func TestClientServer_exit(t *testing.T) {
protest.AllowRecording(t)
withTestClient2("continuetestprog", t, func(c service.Client) {
@ -1881,3 +1867,26 @@ func TestDoubleCreateBreakpoint(t *testing.T) {
}
})
}
func TestStopRecording(t *testing.T) {
protest.AllowRecording(t)
if testBackend != "rr" {
t.Skip("only for rr backend")
}
withTestClient2("sleep", t, func(c service.Client) {
time.Sleep(time.Second)
c.StopRecording()
_, err := c.GetState()
assertNoError(err, t, "GetState()")
// try rerecording
go func() {
c.RestartFrom(true, "", false, nil)
}()
time.Sleep(time.Second) // hopefully the re-recording started...
c.StopRecording()
_, err = c.GetState()
assertNoError(err, t, "GetState()")
})
}