From e69d536e819bbbd3539d5748d61942330863cf11 Mon Sep 17 00:00:00 2001 From: Alessandro Arzilli Date: Tue, 3 Nov 2020 19:28:37 +0100 Subject: [PATCH] proc/native: fix flakyness of TestStepConcurrentDirect on linux/386 (#2179) TestStepConcurrentDirect will occasionally fail (7% of the time on my setup) by either causing the target processs to execute an invalid instruction or (more infrequently) by switching to the wrong thread. Both of those are caused by receiving SIGTRAPs for threads hitting a breakpoint after it has been removed (the thread hits the breakpoint, we stop everything and remove the breakpoint and only after we receive the signal). Change native.(*nativeProcess).stop to handle SIGTRAPs that can't be attributed to a breakpoint, a hardcoded breakpoint in the program's text, or manual stops (and therefore are likely caused by phantom breakpoint hits). Co-authored-by: a --- pkg/proc/arch.go | 20 ++++++--- pkg/proc/i386_arch.go | 1 + pkg/proc/native/nonative_darwin.go | 2 +- pkg/proc/native/proc.go | 40 +++++++++-------- pkg/proc/native/proc_darwin.go | 16 +++---- pkg/proc/native/proc_freebsd.go | 8 ++-- pkg/proc/native/proc_linux.go | 69 ++++++++++++++++++++++++++---- pkg/proc/native/proc_windows.go | 16 +++---- 8 files changed, 119 insertions(+), 53 deletions(-) diff --git a/pkg/proc/arch.go b/pkg/proc/arch.go index c205e6dd..732b8704 100644 --- a/pkg/proc/arch.go +++ b/pkg/proc/arch.go @@ -9,13 +9,14 @@ import ( type Arch struct { Name string // architecture name - ptrSize int - maxInstructionLength int - prologues []opcodeSeq - breakpointInstruction []byte - breakInstrMovesPC bool - derefTLS bool - usesLR bool // architecture uses a link register, also called RA on some architectures + ptrSize int + maxInstructionLength int + prologues []opcodeSeq + breakpointInstruction []byte + altBreakpointInstruction []byte + breakInstrMovesPC bool + derefTLS bool + usesLR bool // architecture uses a link register, also called RA on some architectures // asmDecode decodes the assembly instruction starting at mem[0:] into asmInst. // It assumes that the Loc and AtPC fields of asmInst have already been filled. @@ -66,6 +67,11 @@ func (a *Arch) BreakpointInstruction() []byte { return a.breakpointInstruction } +// AltBreakpointInstruction returns an alternate encoding for the breakpoint instruction. +func (a *Arch) AltBreakpointInstruction() []byte { + return a.altBreakpointInstruction +} + // BreakInstrMovesPC is true if hitting the breakpoint instruction advances the // instruction counter by the size of the breakpoint instruction. func (a *Arch) BreakInstrMovesPC() bool { diff --git a/pkg/proc/i386_arch.go b/pkg/proc/i386_arch.go index 62960f42..f22787d1 100644 --- a/pkg/proc/i386_arch.go +++ b/pkg/proc/i386_arch.go @@ -24,6 +24,7 @@ func I386Arch(goos string) *Arch { ptrSize: 4, maxInstructionLength: 15, breakpointInstruction: i386BreakInstruction, + altBreakpointInstruction: []byte{0xcd, 0x03}, breakInstrMovesPC: true, derefTLS: false, prologues: prologuesI386, diff --git a/pkg/proc/native/nonative_darwin.go b/pkg/proc/native/nonative_darwin.go index 2c00425b..06009a27 100644 --- a/pkg/proc/native/nonative_darwin.go +++ b/pkg/proc/native/nonative_darwin.go @@ -59,7 +59,7 @@ func (dbp *nativeProcess) trapWait(pid int) (*nativeThread, error) { panic(ErrNativeBackendDisabled) } -func (dbp *nativeProcess) stop(trapthread *nativeThread) (err error) { +func (dbp *nativeProcess) stop(trapthread *nativeThread) (*nativeThread, error) { panic(ErrNativeBackendDisabled) } diff --git a/pkg/proc/native/proc.go b/pkg/proc/native/proc.go index a2a8939d..fdd72061 100644 --- a/pkg/proc/native/proc.go +++ b/pkg/proc/native/proc.go @@ -231,27 +231,33 @@ func (dbp *nativeProcess) ContinueOnce() (proc.Thread, proc.StopReason, error) { return nil, proc.StopExited, &proc.ErrProcessExited{Pid: dbp.Pid()} } - if err := dbp.resume(); err != nil { - return nil, proc.StopUnknown, err - } + for { - for _, th := range dbp.threads { - th.CurrentBreakpoint.Clear() - } + if err := dbp.resume(); err != nil { + return nil, proc.StopUnknown, err + } - if dbp.resumeChan != nil { - close(dbp.resumeChan) - dbp.resumeChan = nil - } + for _, th := range dbp.threads { + th.CurrentBreakpoint.Clear() + } - trapthread, err := dbp.trapWait(-1) - if err != nil { - return nil, proc.StopUnknown, err + if dbp.resumeChan != nil { + close(dbp.resumeChan) + dbp.resumeChan = nil + } + + trapthread, err := dbp.trapWait(-1) + if err != nil { + return nil, proc.StopUnknown, err + } + trapthread, err = dbp.stop(trapthread) + if err != nil { + return nil, proc.StopUnknown, err + } + if trapthread != nil { + return trapthread, proc.StopUnknown, nil + } } - if err := dbp.stop(trapthread); err != nil { - return nil, proc.StopUnknown, err - } - return trapthread, proc.StopUnknown, err } // FindBreakpoint finds the breakpoint for the given pc. diff --git a/pkg/proc/native/proc_darwin.go b/pkg/proc/native/proc_darwin.go index 5d5320b6..3df1522e 100644 --- a/pkg/proc/native/proc_darwin.go +++ b/pkg/proc/native/proc_darwin.go @@ -113,7 +113,7 @@ func Launch(cmd []string, wd string, flags proc.LaunchFlags, _ []string, _ strin if err != nil { return nil, err } - if err := dbp.stop(nil); err != nil { + if _, err := dbp.stop(nil); err != nil { return nil, err } @@ -422,35 +422,35 @@ func (dbp *nativeProcess) resume() error { } // stop stops all running threads and sets breakpoints -func (dbp *nativeProcess) stop(trapthread *nativeThread) (err error) { +func (dbp *nativeProcess) stop(trapthread *nativeThread) (*nativeThread, error) { if dbp.exited { - return &proc.ErrProcessExited{Pid: dbp.Pid()} + return nil, &proc.ErrProcessExited{Pid: dbp.Pid()} } for _, th := range dbp.threads { if !th.Stopped() { if err := th.stop(); err != nil { - return dbp.exitGuard(err) + return nil, dbp.exitGuard(err) } } } ports, err := dbp.waitForStop() if err != nil { - return err + return nil, err } if !dbp.os.initialized { - return nil + return nil, nil } trapthread.SetCurrentBreakpoint(true) for _, port := range ports { if th, ok := dbp.threads[port]; ok { err := th.SetCurrentBreakpoint(true) if err != nil { - return err + return nil, err } } } - return nil + return trapthread, nil } func (dbp *nativeProcess) detach(kill bool) error { diff --git a/pkg/proc/native/proc_freebsd.go b/pkg/proc/native/proc_freebsd.go index 9457161e..7f5a854f 100644 --- a/pkg/proc/native/proc_freebsd.go +++ b/pkg/proc/native/proc_freebsd.go @@ -347,19 +347,19 @@ func (dbp *nativeProcess) resume() error { // Used by ContinueOnce // stop stops all running threads and sets breakpoints -func (dbp *nativeProcess) stop(trapthread *nativeThread) (err error) { +func (dbp *nativeProcess) stop(trapthread *nativeThread) (*nativeThread, error) { if dbp.exited { - return &proc.ErrProcessExited{Pid: dbp.Pid()} + return nil, &proc.ErrProcessExited{Pid: dbp.Pid()} } // set breakpoints on all threads for _, th := range dbp.threads { if th.CurrentBreakpoint.Breakpoint == nil { if err := th.SetCurrentBreakpoint(true); err != nil { - return err + return nil, err } } } - return nil + return trapthread, nil } // Used by Detach diff --git a/pkg/proc/native/proc_linux.go b/pkg/proc/native/proc_linux.go index 00a01038..d85a992a 100644 --- a/pkg/proc/native/proc_linux.go +++ b/pkg/proc/native/proc_linux.go @@ -486,9 +486,9 @@ func (dbp *nativeProcess) resume() error { } // stop stops all running threads and sets breakpoints -func (dbp *nativeProcess) stop(trapthread *nativeThread) (err error) { +func (dbp *nativeProcess) stop(trapthread *nativeThread) (*nativeThread, error) { if dbp.exited { - return &proc.ErrProcessExited{Pid: dbp.Pid()} + return nil, &proc.ErrProcessExited{Pid: dbp.Pid()} } for _, th := range dbp.threads { @@ -500,7 +500,7 @@ func (dbp *nativeProcess) stop(trapthread *nativeThread) (err error) { for { th, err := dbp.trapWaitInternal(-1, trapWaitNohang) if err != nil { - return dbp.exitGuard(err) + return nil, dbp.exitGuard(err) } if th == nil { break @@ -511,7 +511,7 @@ func (dbp *nativeProcess) stop(trapthread *nativeThread) (err error) { for _, th := range dbp.threads { if th.os.running { if err := th.stop(); err != nil { - return dbp.exitGuard(err) + return nil, dbp.exitGuard(err) } } } @@ -530,23 +530,76 @@ func (dbp *nativeProcess) stop(trapthread *nativeThread) (err error) { } _, err := dbp.trapWaitInternal(-1, trapWaitHalt) if err != nil { - return err + return nil, err } } if err := linutil.ElfUpdateSharedObjects(dbp); err != nil { - return err + return nil, err } + switchTrapthread := false + // set breakpoints on SIGTRAP threads for _, th := range dbp.threads { if th.CurrentBreakpoint.Breakpoint == nil && th.os.setbp { if err := th.SetCurrentBreakpoint(true); err != nil { - return err + return nil, err + } + } + if th.CurrentBreakpoint.Breakpoint == nil && th.os.setbp && (th.Status != nil) && ((*sys.WaitStatus)(th.Status).StopSignal() == sys.SIGTRAP) && dbp.BinInfo().Arch.BreakInstrMovesPC() { + manualStop := false + if th.ThreadID() == trapthread.ThreadID() { + dbp.stopMu.Lock() + manualStop = dbp.manualStopRequested + dbp.stopMu.Unlock() + } + if !manualStop { + // Thread received a SIGTRAP but we don't have a breakpoint for it and + // it wasn't sent by a manual stop request. It's either a hardcoded + // breakpoint or a phantom breakpoint hit (a breakpoint that was hit but + // we have removed before we could receive its signal). Check if it is a + // hardcoded breakpoint, otherwise rewind the thread. + isHardcodedBreakpoint := false + pc, _ := th.PC() + for _, bpinstr := range [][]byte{ + dbp.BinInfo().Arch.BreakpointInstruction(), + dbp.BinInfo().Arch.AltBreakpointInstruction()} { + if bpinstr == nil { + continue + } + buf := make([]byte, len(bpinstr)) + _, _ = th.ReadMemory(buf, pc-uint64(len(buf))) + if bytes.Equal(buf, bpinstr) { + isHardcodedBreakpoint = true + break + } + } + if !isHardcodedBreakpoint { + // phantom breakpoint hit + _ = th.SetPC(pc - uint64(len(dbp.BinInfo().Arch.BreakpointInstruction()))) + th.os.setbp = false + if trapthread.ThreadID() == th.ThreadID() { + // Will switch to a different thread for trapthread because we don't + // want pkg/proc to believe that this thread was stopped by a + // hardcoded breakpoint. + switchTrapthread = true + } + } } } } - return nil + + if switchTrapthread { + trapthread = nil + for _, th := range dbp.threads { + if th.os.setbp && th.ThreadID() != trapthread.ThreadID() { + return th, nil + } + } + } + + return trapthread, nil } func (dbp *nativeProcess) detach(kill bool) error { diff --git a/pkg/proc/native/proc_windows.go b/pkg/proc/native/proc_windows.go index c7ce90a5..f3f86420 100644 --- a/pkg/proc/native/proc_windows.go +++ b/pkg/proc/native/proc_windows.go @@ -407,9 +407,9 @@ func (dbp *nativeProcess) resume() error { } // stop stops all running threads threads and sets breakpoints -func (dbp *nativeProcess) stop(trapthread *nativeThread) (err error) { +func (dbp *nativeProcess) stop(trapthread *nativeThread) (*nativeThread, error) { if dbp.exited { - return &proc.ErrProcessExited{Pid: dbp.Pid()} + return nil, &proc.ErrProcessExited{Pid: dbp.Pid()} } dbp.os.running = false @@ -424,15 +424,15 @@ func (dbp *nativeProcess) stop(trapthread *nativeThread) (err error) { // call to _ContinueDebugEvent will resume execution of some of the // target threads. - err = trapthread.SetCurrentBreakpoint(true) + err := trapthread.SetCurrentBreakpoint(true) if err != nil { - return err + return nil, err } for _, thread := range dbp.threads { _, err := _SuspendThread(thread.os.hThread) if err != nil { - return err + return nil, err } } @@ -446,18 +446,18 @@ func (dbp *nativeProcess) stop(trapthread *nativeThread) (err error) { } }) if err != nil { - return err + return nil, err } if tid == 0 { break } err = dbp.threads[tid].SetCurrentBreakpoint(true) if err != nil { - return err + return nil, err } } - return nil + return trapthread, nil } func (dbp *nativeProcess) detach(kill bool) error {