Fix race between Delve and tracee runtime

This commit fixes a race condition between Delve and the runtime of the traced process. When a new thread is created in the traced process, Delve takes note of it and then continue both the new thread, and the thread that called clone. If Delve attempts to use data in `runtime.allm` before the new `m->procid` is set, errors occur. The errors are due to Delve assuming any m with a procid of 0 is the main thread of the process (due to how theGo runtime allocates M's, only `clone`d threads have procid properly set. This causes certain events (like `next`) to happen twice to the main thread, because 2 m's in `runtime.allm` have a `procid` of 0, and also causes various other issues that prevent proper thread coordination from Delve. Fixes #43
2015-02-01 13:45:20 -05:00 · 2015-02-01 13:45:20 -05:00 · e001bbfff2
commit e001bbfff2
parent 4f5bfc5ee8
1 changed files with 25 additions and 10 deletions
--- a/proctl/proctl.go
+++ b/proctl/proctl.go
@ -12,6 +12,7 @@ import (
 	"strconv"
 	"strings"
 	"syscall"
+	"time"

 	sys "golang.org/x/sys/unix"

@ -298,12 +299,12 @@ func (dbp *DebuggedProcess) Next() error {
 		ok bool
 	)

-	allm, err := dbp.CurrentThread.AllM()
-	if err != nil {
-		return err
-	}
-
 	fn := func() error {
+		allm, err := dbp.CurrentThread.AllM()
+		if err != nil {
+			return err
+		}
+
 		for _, m := range allm {
 			th, ok = dbp.Threads[m.procid]
 			if !ok {
@ -518,12 +519,26 @@ func addNewThread(dbp *DebuggedProcess, pid int) error {
 		return fmt.Errorf("could not continue new thread %d %s", msg, err)
 	}

-	err = sys.PtraceCont(pid, 0)
-	if err != nil {
-		return fmt.Errorf("could not continue stopped thread %d %s", pid, err)
+	// Here we loop for a while to ensure that the once we continue
+	// the newly created thread, we allow enough time for the runtime
+	// to assign m->procid. This is important because we rely on
+	// looping through runtime.allm in other parts of the code, so
+	// we require that this is set before we do anything else.
+	// TODO(dp): we might be able to eliminate this loop by telling
+	// the CPU to emit a breakpoint exception on write to this location
+	// in memory. That way we prevent having to loop, and can be
+	// notified as soon as m->procid is set.
+	th := dbp.Threads[pid]
+	for {
+		allm, _ := th.AllM()
+		for _, m := range allm {
+			if m.procid == int(msg) {
+				// Continue the thread that cloned
+				return sys.PtraceCont(pid, 0)
+			}
+		}
+		time.Sleep(time.Millisecond)
 	}
-
-	return nil
 }

 func wait(pid, options int) (int, *sys.WaitStatus, error) {