From e001bbfff24cd2ddaa85f822bd2843cd2bf450fb Mon Sep 17 00:00:00 2001 From: Derek Parker Date: Sun, 1 Feb 2015 13:45:20 -0500 Subject: [PATCH] Fix race between Delve and tracee runtime This commit fixes a race condition between Delve and the runtime of the traced process. When a new thread is created in the traced process, Delve takes note of it and then continue both the new thread, and the thread that called clone. If Delve attempts to use data in `runtime.allm` before the new `m->procid` is set, errors occur. The errors are due to Delve assuming any m with a procid of 0 is the main thread of the process (due to how theGo runtime allocates M's, only `clone`d threads have procid properly set. This causes certain events (like `next`) to happen twice to the main thread, because 2 m's in `runtime.allm` have a `procid` of 0, and also causes various other issues that prevent proper thread coordination from Delve. Fixes #43 --- proctl/proctl.go | 35 +++++++++++++++++++++++++---------- 1 file changed, 25 insertions(+), 10 deletions(-) diff --git a/proctl/proctl.go b/proctl/proctl.go index da3eddec..f71c103e 100644 --- a/proctl/proctl.go +++ b/proctl/proctl.go @@ -12,6 +12,7 @@ import ( "strconv" "strings" "syscall" + "time" sys "golang.org/x/sys/unix" @@ -298,12 +299,12 @@ func (dbp *DebuggedProcess) Next() error { ok bool ) - allm, err := dbp.CurrentThread.AllM() - if err != nil { - return err - } - fn := func() error { + allm, err := dbp.CurrentThread.AllM() + if err != nil { + return err + } + for _, m := range allm { th, ok = dbp.Threads[m.procid] if !ok { @@ -518,12 +519,26 @@ func addNewThread(dbp *DebuggedProcess, pid int) error { return fmt.Errorf("could not continue new thread %d %s", msg, err) } - err = sys.PtraceCont(pid, 0) - if err != nil { - return fmt.Errorf("could not continue stopped thread %d %s", pid, err) + // Here we loop for a while to ensure that the once we continue + // the newly created thread, we allow enough time for the runtime + // to assign m->procid. This is important because we rely on + // looping through runtime.allm in other parts of the code, so + // we require that this is set before we do anything else. + // TODO(dp): we might be able to eliminate this loop by telling + // the CPU to emit a breakpoint exception on write to this location + // in memory. That way we prevent having to loop, and can be + // notified as soon as m->procid is set. + th := dbp.Threads[pid] + for { + allm, _ := th.AllM() + for _, m := range allm { + if m.procid == int(msg) { + // Continue the thread that cloned + return sys.PtraceCont(pid, 0) + } + } + time.Sleep(time.Millisecond) } - - return nil } func wait(pid, options int) (int, *sys.WaitStatus, error) {