
If a breakpoint is hit close to process death on a thread that isn't the group leader the process could die while we are trying to stop it. This can be easily reproduced by having the goroutine that's executing main.main (which will almost always run on the thread group leader) wait for a second goroutine before exiting, then setting a breakpoint on the second goroutine and stepping through it (see TestIssue1101 in proc_test.go). When stepping over the return instruction of main.f the deferred wg.Done() call will be executed which will cause the main goroutine to resume and proceed to exit. Both the temporary breakpoint on wg.Done and the temporary breakpoint on the return address of main.f will be in close proximity to main.main calling os.Exit() and causing the death of the thread group leader. Under these circumstances the call to native.(*Thread).waitFast in native.(*Thread).halt can hang forever due to a bug similar to https://sourceware.org/bugzilla/show_bug.cgi?id=12702 (see comment in native.(*Thread).wait for an explanation). Replacing waitFast with a normal wait work in most circumstances, however, besides the performance hit, it looks like in this circumstances trapWait sometimes receives a spurious SIGTRAP on the dying group leader which would cause the subsequent call to wait in halt to accidentally reap the process without noting that it did exit. Instead this patch removes the call to wait from halt and instead calls trapWait in a loop in setCurrentBreakpoints until all threads are set to running=false. This is also a better fix than the workaround to ESRCH error while setting current breakpoints implemented in 94b50d. Fixes #1101
464 lines
12 KiB
Go
464 lines
12 KiB
Go
package native
|
|
|
|
import (
|
|
"bytes"
|
|
"errors"
|
|
"fmt"
|
|
"io/ioutil"
|
|
"os"
|
|
"os/exec"
|
|
"path/filepath"
|
|
"regexp"
|
|
"strconv"
|
|
"strings"
|
|
"sync"
|
|
"syscall"
|
|
"time"
|
|
|
|
sys "golang.org/x/sys/unix"
|
|
|
|
"github.com/derekparker/delve/pkg/proc"
|
|
)
|
|
|
|
// Process statuses
|
|
const (
|
|
StatusSleeping = 'S'
|
|
StatusRunning = 'R'
|
|
StatusTraceStop = 't'
|
|
StatusZombie = 'Z'
|
|
|
|
// Kernel 2.6 has TraceStop as T
|
|
// TODO(derekparker) Since this means something different based on the
|
|
// version of the kernel ('T' is job control stop on modern 3.x+ kernels) we
|
|
// may want to differentiate at some point.
|
|
StatusTraceStopT = 'T'
|
|
)
|
|
|
|
// OSProcessDetails contains Linux specific
|
|
// process details.
|
|
type OSProcessDetails struct {
|
|
comm string
|
|
}
|
|
|
|
// Launch creates and begins debugging a new process. First entry in
|
|
// `cmd` is the program to run, and then rest are the arguments
|
|
// to be supplied to that process. `wd` is working directory of the program.
|
|
func Launch(cmd []string, wd string) (*Process, error) {
|
|
var (
|
|
process *exec.Cmd
|
|
err error
|
|
)
|
|
// check that the argument to Launch is an executable file
|
|
if fi, staterr := os.Stat(cmd[0]); staterr == nil && (fi.Mode()&0111) == 0 {
|
|
return nil, proc.NotExecutableErr
|
|
}
|
|
dbp := New(0)
|
|
dbp.execPtraceFunc(func() {
|
|
process = exec.Command(cmd[0])
|
|
process.Args = cmd
|
|
process.Stdout = os.Stdout
|
|
process.Stderr = os.Stderr
|
|
process.SysProcAttr = &syscall.SysProcAttr{Ptrace: true, Setpgid: true}
|
|
if wd != "" {
|
|
process.Dir = wd
|
|
}
|
|
err = process.Start()
|
|
})
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
dbp.pid = process.Process.Pid
|
|
dbp.childProcess = true
|
|
_, _, err = dbp.wait(process.Process.Pid, 0)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("waiting for target execve failed: %s", err)
|
|
}
|
|
return initializeDebugProcess(dbp, process.Path)
|
|
}
|
|
|
|
// Attach to an existing process with the given PID.
|
|
func Attach(pid int) (*Process, error) {
|
|
dbp := New(pid)
|
|
|
|
var err error
|
|
dbp.execPtraceFunc(func() { err = PtraceAttach(dbp.pid) })
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
_, _, err = dbp.wait(dbp.pid, 0)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
dbp, err = initializeDebugProcess(dbp, "")
|
|
if err != nil {
|
|
dbp.Detach(false)
|
|
return nil, err
|
|
}
|
|
return dbp, nil
|
|
}
|
|
|
|
// Kill kills the target process.
|
|
func (dbp *Process) Kill() (err error) {
|
|
if dbp.exited {
|
|
return nil
|
|
}
|
|
if !dbp.threads[dbp.pid].Stopped() {
|
|
return errors.New("process must be stopped in order to kill it")
|
|
}
|
|
if err = sys.Kill(-dbp.pid, sys.SIGKILL); err != nil {
|
|
return errors.New("could not deliver signal " + err.Error())
|
|
}
|
|
if _, _, err = dbp.wait(dbp.pid, 0); err != nil {
|
|
return
|
|
}
|
|
dbp.postExit()
|
|
return
|
|
}
|
|
|
|
func (dbp *Process) requestManualStop() (err error) {
|
|
return sys.Kill(dbp.pid, sys.SIGTRAP)
|
|
}
|
|
|
|
// Attach to a newly created thread, and store that thread in our list of
|
|
// known threads.
|
|
func (dbp *Process) addThread(tid int, attach bool) (*Thread, error) {
|
|
if thread, ok := dbp.threads[tid]; ok {
|
|
return thread, nil
|
|
}
|
|
|
|
var err error
|
|
if attach {
|
|
dbp.execPtraceFunc(func() { err = sys.PtraceAttach(tid) })
|
|
if err != nil && err != sys.EPERM {
|
|
// Do not return err if err == EPERM,
|
|
// we may already be tracing this thread due to
|
|
// PTRACE_O_TRACECLONE. We will surely blow up later
|
|
// if we truly don't have permissions.
|
|
return nil, fmt.Errorf("could not attach to new thread %d %s", tid, err)
|
|
}
|
|
pid, status, err := dbp.waitFast(tid)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if status.Exited() {
|
|
return nil, fmt.Errorf("thread already exited %d", pid)
|
|
}
|
|
}
|
|
|
|
dbp.execPtraceFunc(func() { err = syscall.PtraceSetOptions(tid, syscall.PTRACE_O_TRACECLONE) })
|
|
if err == syscall.ESRCH {
|
|
if _, _, err = dbp.waitFast(tid); err != nil {
|
|
return nil, fmt.Errorf("error while waiting after adding thread: %d %s", tid, err)
|
|
}
|
|
dbp.execPtraceFunc(func() { err = syscall.PtraceSetOptions(tid, syscall.PTRACE_O_TRACECLONE) })
|
|
if err == syscall.ESRCH {
|
|
return nil, err
|
|
}
|
|
if err != nil {
|
|
return nil, fmt.Errorf("could not set options for new traced thread %d %s", tid, err)
|
|
}
|
|
}
|
|
|
|
dbp.threads[tid] = &Thread{
|
|
ID: tid,
|
|
dbp: dbp,
|
|
os: new(OSSpecificDetails),
|
|
}
|
|
if dbp.currentThread == nil {
|
|
dbp.SwitchThread(tid)
|
|
}
|
|
return dbp.threads[tid], nil
|
|
}
|
|
|
|
func (dbp *Process) updateThreadList() error {
|
|
tids, _ := filepath.Glob(fmt.Sprintf("/proc/%d/task/*", dbp.pid))
|
|
for _, tidpath := range tids {
|
|
tidstr := filepath.Base(tidpath)
|
|
tid, err := strconv.Atoi(tidstr)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if _, err := dbp.addThread(tid, tid != dbp.pid); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func findExecutable(path string, pid int) string {
|
|
if path == "" {
|
|
path = fmt.Sprintf("/proc/%d/exe", pid)
|
|
}
|
|
return path
|
|
}
|
|
|
|
func (dbp *Process) trapWait(pid int) (*Thread, error) {
|
|
for {
|
|
wpid, status, err := dbp.wait(pid, 0)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("wait err %s %d", err, pid)
|
|
}
|
|
if wpid == 0 {
|
|
continue
|
|
}
|
|
th, ok := dbp.threads[wpid]
|
|
if ok {
|
|
th.Status = (*WaitStatus)(status)
|
|
}
|
|
if status.Exited() {
|
|
if wpid == dbp.pid {
|
|
dbp.postExit()
|
|
return nil, proc.ProcessExitedError{Pid: wpid, Status: status.ExitStatus()}
|
|
}
|
|
delete(dbp.threads, wpid)
|
|
continue
|
|
}
|
|
if status.StopSignal() == sys.SIGTRAP && status.TrapCause() == sys.PTRACE_EVENT_CLONE {
|
|
// A traced thread has cloned a new thread, grab the pid and
|
|
// add it to our list of traced threads.
|
|
var cloned uint
|
|
dbp.execPtraceFunc(func() { cloned, err = sys.PtraceGetEventMsg(wpid) })
|
|
if err != nil {
|
|
if err == sys.ESRCH {
|
|
// thread died while we were adding it
|
|
continue
|
|
}
|
|
return nil, fmt.Errorf("could not get event message: %s", err)
|
|
}
|
|
th, err = dbp.addThread(int(cloned), false)
|
|
if err != nil {
|
|
if err == sys.ESRCH {
|
|
// thread died while we were adding it
|
|
delete(dbp.threads, int(cloned))
|
|
continue
|
|
}
|
|
return nil, err
|
|
}
|
|
dbp.haltMu.Lock()
|
|
halt := dbp.halt
|
|
dbp.haltMu.Unlock()
|
|
if halt {
|
|
dbp.halt = false
|
|
th.running = false
|
|
dbp.threads[int(wpid)].running = false
|
|
return nil, nil
|
|
}
|
|
if err = th.Continue(); err != nil {
|
|
if err == sys.ESRCH {
|
|
// thread died while we were adding it
|
|
delete(dbp.threads, th.ID)
|
|
continue
|
|
}
|
|
return nil, fmt.Errorf("could not continue new thread %d %s", cloned, err)
|
|
}
|
|
if err = dbp.threads[int(wpid)].Continue(); err != nil {
|
|
if err != sys.ESRCH {
|
|
return nil, fmt.Errorf("could not continue existing thread %d %s", wpid, err)
|
|
}
|
|
}
|
|
continue
|
|
}
|
|
if th == nil {
|
|
// Sometimes we get an unknown thread, ignore it?
|
|
continue
|
|
}
|
|
dbp.haltMu.Lock()
|
|
halt := dbp.halt
|
|
dbp.haltMu.Unlock()
|
|
if halt && (status.StopSignal() == sys.SIGTRAP || status.StopSignal() == sys.SIGSTOP) {
|
|
th.running = false
|
|
dbp.halt = false
|
|
return th, nil
|
|
}
|
|
if status.StopSignal() == sys.SIGTRAP {
|
|
th.running = false
|
|
return th, nil
|
|
}
|
|
if th != nil {
|
|
// TODO(dp) alert user about unexpected signals here.
|
|
if err := th.resumeWithSig(int(status.StopSignal())); err != nil {
|
|
if err == sys.ESRCH {
|
|
return nil, proc.ProcessExitedError{Pid: dbp.pid}
|
|
}
|
|
return nil, err
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
func (dbp *Process) loadProcessInformation(wg *sync.WaitGroup) {
|
|
defer wg.Done()
|
|
|
|
comm, err := ioutil.ReadFile(fmt.Sprintf("/proc/%d/comm", dbp.pid))
|
|
if err == nil {
|
|
// removes newline character
|
|
comm = bytes.TrimSuffix(comm, []byte("\n"))
|
|
}
|
|
|
|
if comm == nil || len(comm) <= 0 {
|
|
stat, err := ioutil.ReadFile(fmt.Sprintf("/proc/%d/stat", dbp.pid))
|
|
if err != nil {
|
|
fmt.Printf("Could not read proc stat: %v\n", err)
|
|
os.Exit(1)
|
|
}
|
|
expr := fmt.Sprintf("%d\\s*\\((.*)\\)", dbp.pid)
|
|
rexp, err := regexp.Compile(expr)
|
|
if err != nil {
|
|
fmt.Printf("Regexp compile error: %v\n", err)
|
|
os.Exit(1)
|
|
}
|
|
match := rexp.FindSubmatch(stat)
|
|
if match == nil {
|
|
fmt.Printf("No match found using regexp '%s' in /proc/%d/stat\n", expr, dbp.pid)
|
|
os.Exit(1)
|
|
}
|
|
comm = match[1]
|
|
}
|
|
dbp.os.comm = strings.Replace(string(comm), "%", "%%", -1)
|
|
}
|
|
|
|
func status(pid int, comm string) rune {
|
|
f, err := os.Open(fmt.Sprintf("/proc/%d/stat", pid))
|
|
if err != nil {
|
|
return '\000'
|
|
}
|
|
defer f.Close()
|
|
|
|
var (
|
|
p int
|
|
state rune
|
|
)
|
|
|
|
// The second field of /proc/pid/stat is the name of the task in parenthesis.
|
|
// The name of the task is the base name of the executable for this process limited to TASK_COMM_LEN characters
|
|
// Since both parenthesis and spaces can appear inside the name of the task and no escaping happens we need to read the name of the executable first
|
|
// See: include/linux/sched.c:315 and include/linux/sched.c:1510
|
|
fmt.Fscanf(f, "%d ("+comm+") %c", &p, &state)
|
|
return state
|
|
}
|
|
|
|
// waitFast is like wait but does not handle process-exit correctly
|
|
func (dbp *Process) waitFast(pid int) (int, *sys.WaitStatus, error) {
|
|
var s sys.WaitStatus
|
|
wpid, err := sys.Wait4(pid, &s, sys.WALL, nil)
|
|
return wpid, &s, err
|
|
}
|
|
|
|
func (dbp *Process) wait(pid, options int) (int, *sys.WaitStatus, error) {
|
|
var s sys.WaitStatus
|
|
if (pid != dbp.pid) || (options != 0) {
|
|
wpid, err := sys.Wait4(pid, &s, sys.WALL|options, nil)
|
|
return wpid, &s, err
|
|
}
|
|
// If we call wait4/waitpid on a thread that is the leader of its group,
|
|
// with options == 0, while ptracing and the thread leader has exited leaving
|
|
// zombies of its own then waitpid hangs forever this is apparently intended
|
|
// behaviour in the linux kernel because it's just so convenient.
|
|
// Therefore we call wait4 in a loop with WNOHANG, sleeping a while between
|
|
// calls and exiting when either wait4 succeeds or we find out that the thread
|
|
// has become a zombie.
|
|
// References:
|
|
// https://sourceware.org/bugzilla/show_bug.cgi?id=12702
|
|
// https://sourceware.org/bugzilla/show_bug.cgi?id=10095
|
|
// https://sourceware.org/bugzilla/attachment.cgi?id=5685
|
|
for {
|
|
wpid, err := sys.Wait4(pid, &s, sys.WNOHANG|sys.WALL|options, nil)
|
|
if err != nil {
|
|
return 0, nil, err
|
|
}
|
|
if wpid != 0 {
|
|
return wpid, &s, err
|
|
}
|
|
if status(pid, dbp.os.comm) == StatusZombie {
|
|
return pid, nil, nil
|
|
}
|
|
time.Sleep(200 * time.Millisecond)
|
|
}
|
|
}
|
|
|
|
func (dbp *Process) setCurrentBreakpoints(trapthread *Thread) error {
|
|
// wait for all threads to stop
|
|
for {
|
|
allstopped := true
|
|
for _, th := range dbp.threads {
|
|
if th.running {
|
|
allstopped = false
|
|
break
|
|
}
|
|
}
|
|
if allstopped {
|
|
break
|
|
}
|
|
dbp.halt = true
|
|
_, err := dbp.trapWait(-1)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
}
|
|
for _, th := range dbp.threads {
|
|
if th.CurrentBreakpoint.Breakpoint == nil {
|
|
if err := th.SetCurrentBreakpoint(); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (dbp *Process) exitGuard(err error) error {
|
|
if err != sys.ESRCH {
|
|
return err
|
|
}
|
|
if status(dbp.pid, dbp.os.comm) == StatusZombie {
|
|
_, err := dbp.trapWait(-1)
|
|
return err
|
|
}
|
|
|
|
return err
|
|
}
|
|
|
|
func (dbp *Process) resume() error {
|
|
// all threads stopped over a breakpoint are made to step over it
|
|
for _, thread := range dbp.threads {
|
|
if thread.CurrentBreakpoint.Breakpoint != nil {
|
|
if err := thread.StepInstruction(); err != nil {
|
|
return err
|
|
}
|
|
thread.CurrentBreakpoint.Clear()
|
|
}
|
|
}
|
|
// everything is resumed
|
|
for _, thread := range dbp.threads {
|
|
if err := thread.resume(); err != nil && err != sys.ESRCH {
|
|
return err
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (dbp *Process) detach(kill bool) error {
|
|
for threadID := range dbp.threads {
|
|
err := PtraceDetach(threadID, 0)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
}
|
|
if kill {
|
|
return nil
|
|
}
|
|
// For some reason the process will sometimes enter stopped state after a
|
|
// detach, this doesn't happen immediately either.
|
|
// We have to wait a bit here, then check if the main thread is stopped and
|
|
// SIGCONT it if it is.
|
|
time.Sleep(50 * time.Millisecond)
|
|
if s := status(dbp.pid, dbp.os.comm); s == 'T' {
|
|
sys.Kill(dbp.pid, sys.SIGCONT)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func killProcess(pid int) error {
|
|
return sys.Kill(pid, sys.SIGINT)
|
|
}
|