proc: use .closureptr for stepping through range-over-func statements (#3763)

* proc: use .closureptr for stepping through range-over-func statements Uses special variables .closureptr and #yieldN to correctly identify the parent frame of a range-over-func body closure call. Updates #3733 * fix
2024-07-11 19:26:38 +02:00 · 2024-07-11 19:26:38 +02:00 · abad6bb97e
commit abad6bb97e
parent b791f91c0e
7 changed files with 271 additions and 69 deletions
--- a/_fixtures/rangeoverfunc.go
+++ b/_fixtures/rangeoverfunc.go
@ -228,6 +228,20 @@ B:
 	fmt.Println(result)
 }

+func TestRecur(n int) {
+	result := []int{}
+	if n > 0 {
+		TestRecur(n - 1)
+	}
+	for _, x := range OfSliceIndex([]int{10, 20, 30}) {
+		result = append(result, x)
+		if n == 3 {
+			TestRecur(0)
+		}
+	}
+	fmt.Println(result)
+}
+
 func main() {
 	TestTrickyIterAll()
 	TestTrickyIterAll2()
@ -240,34 +254,23 @@ func main() {
 	TestLongReturnWrapper()
 	TestGotoA1()
 	TestGotoB1()
+	TestRecur(3)
 }

 type Seq[T any] func(yield func(T) bool)
 type Seq2[T1, T2 any] func(yield func(T1, T2) bool)

 type TrickyIterator struct {
-	yield func(int, int) bool
-}
-
-func (ti *TrickyIterator) iterEcho(s []int) Seq2[int, int] {
-	return func(yield func(int, int) bool) {
-		for i, v := range s {
-			if !yield(i, v) {
-				ti.yield = yield
-				return
-			}
-			if ti.yield != nil && !ti.yield(i, v) {
-				return
-			}
-		}
-		ti.yield = yield
-		return
-	}
+	yield func()
 }

 func (ti *TrickyIterator) iterAll(s []int) Seq2[int, int] {
 	return func(yield func(int, int) bool) {
-		ti.yield = yield // Save yield for future abuse
+		// NOTE: storing the yield func in the iterator has been removed because
+		// it make the closure escape to the heap which breaks the .closureptr
+		// heuristic. Eventually we will need to figure out what to do when that
+		// happens.
+		// ti.yield = yield // Save yield for future abuse
 		for i, v := range s {
 			if !yield(i, v) {
 				return
@ -277,14 +280,6 @@ func (ti *TrickyIterator) iterAll(s []int) Seq2[int, int] {
 	}
 }

-func (ti *TrickyIterator) iterZero(s []int) Seq2[int, int] {
-	return func(yield func(int, int) bool) {
-		ti.yield = yield // Save yield for future abuse
-		// Don't call it at all, maybe it won't escape
-		return
-	}
-}
-
 // OfSliceIndex returns a Seq2 over the elements of s. It is equivalent
 // to range s.
 func OfSliceIndex[T any, S ~[]T](s S) Seq2[int, T] {
--- a/pkg/proc/eval.go
+++ b/pkg/proc/eval.go
@ -67,6 +67,10 @@ const (
 	// If localsNoDeclLineCheck the declaration line isn't checked at
 	// all to determine if the variable is in scope.
 	localsNoDeclLineCheck
+
+	// If localsOnlyRangeBodyClosures is set simpleLocals only returns
+	// variables containing the range body closure.
+	localsOnlyRangeBodyClosures
 )

 // ConvertEvalScope returns a new EvalScope in the context of the
@ -330,12 +334,10 @@ func (scope *EvalScope) Locals(flags localsFlags, wantedName string) ([]*Variabl
 	scopes = append(scopes, vars0)

 	if scope.rangeFrames == nil {
-		scope.rangeFrames, err = rangeFuncStackTrace(scope.target, scope.g)
+		err := scope.setupRangeFrames()
 		if err != nil {
 			return nil, err
 		}
-		scope.rangeFrames = scope.rangeFrames[1:]
-		scope.enclosingRangeScopes = make([]*EvalScope, len(scope.rangeFrames))
 	}
 	for i, scope2 := range scope.enclosingRangeScopes {
 		if i == len(scope.enclosingRangeScopes)-1 {
@ -373,6 +375,17 @@ func (scope *EvalScope) Locals(flags localsFlags, wantedName string) ([]*Variabl
 	return vars, nil
 }

+func (scope *EvalScope) setupRangeFrames() error {
+	var err error
+	scope.rangeFrames, err = rangeFuncStackTrace(scope.target, scope.g)
+	if err != nil {
+		return err
+	}
+	scope.rangeFrames = scope.rangeFrames[1:]
+	scope.enclosingRangeScopes = make([]*EvalScope, len(scope.rangeFrames))
+	return nil
+}
+
 // simpleLocals returns all local variables in 'scope'.
 // This function does not try to merge the scopes of range-over-func closure
 // bodies with their enclosing function, for that use (*EvalScope).Locals or
@ -406,23 +419,7 @@ func (scope *EvalScope) simpleLocals(flags localsFlags, wantedName string) ([]*V

 	// look for dictionary entry
 	if scope.dictAddr == 0 {
-		for _, entry := range varEntries {
-			name, _ := entry.Val(dwarf.AttrName).(string)
-			if name == goDictionaryName {
-				dictVar, err := extractVarInfoFromEntry(scope.target, scope.BinInfo, scope.image(), scope.Regs, scope.Mem, entry.Tree, 0)
-				if err != nil {
-					logflags.DebuggerLogger().Errorf("could not load %s variable: %v", name, err)
-				} else if dictVar.Unreadable != nil {
-					logflags.DebuggerLogger().Errorf("could not load %s variable: %v", name, dictVar.Unreadable)
-				} else {
-					scope.dictAddr, err = readUintRaw(dictVar.mem, dictVar.Addr, int64(scope.BinInfo.Arch.PtrSize()))
-					if err != nil {
-						logflags.DebuggerLogger().Errorf("could not load %s variable: %v", name, err)
-					}
-				}
-				break
-			}
-		}
+		scope.dictAddr = readLocalPtrVar(dwarfTree, goDictionaryName, scope.target, scope.BinInfo, scope.image(), scope.Regs, scope.Mem)
 	}

 	vars := make([]*Variable, 0, len(varEntries))
@ -434,19 +431,18 @@ func (scope *EvalScope) simpleLocals(flags localsFlags, wantedName string) ([]*V
 			if name != wantedName && name != "&"+wantedName {
 				continue
 			}
+		case flags&localsOnlyRangeBodyClosures != 0:
+			if !strings.HasPrefix(name, "#yield") && !strings.HasPrefix(name, "&#yield") {
+				continue
+			}
 		default:
-			if name == goDictionaryName || name == goClosurePtr || strings.HasPrefix(name, "#state") || strings.HasPrefix(name, "&#state") || strings.HasPrefix(name, "#next") || strings.HasPrefix(name, "&#next") || strings.HasPrefix(name, "#yield") {
+			if name == goDictionaryName || name == goClosurePtr || strings.HasPrefix(name, "#yield") || strings.HasPrefix(name, "&#yield") {
 				continue
 			}
 		}
-		if scope.Fn.rangeParentName() != "" {
-			// Skip return values and closure variables for range-over-func closure bodies
-			if strings.HasPrefix(name, "~") {
-				continue
-			}
-			if entry.Val(godwarf.AttrGoClosureOffset) != nil {
-				continue
-			}
+		if scope.Fn.rangeParentName() != "" && (strings.HasPrefix(name, "~") || entry.Val(godwarf.AttrGoClosureOffset) != nil) {
+			// Skip unnamed parameters and closure variables for range-over-func closure bodies
+			continue
 		}
 		val, err := extractVarInfoFromEntry(scope.target, scope.BinInfo, scope.image(), scope.Regs, scope.Mem, entry.Tree, scope.dictAddr)
 		if err != nil {
@ -522,6 +518,31 @@ func afterLastArgAddr(vars []*Variable) uint64 {
 	return 0
 }

+// readLocalPtrVar reads the value of the local pointer variable vname. This
+// is a low level helper function, it does not support nested scopes, range
+// resolution across range bodies, type parameters, &c...
+func readLocalPtrVar(dwarfTree *godwarf.Tree, vname string, tgt *Target, bi *BinaryInfo, image *Image, regs op.DwarfRegisters, mem MemoryReadWriter) uint64 {
+	for _, entry := range dwarfTree.Children {
+		name, _ := entry.Val(dwarf.AttrName).(string)
+		if name == vname {
+			v, err := extractVarInfoFromEntry(tgt, bi, image, regs, mem, entry, 0)
+			if err != nil {
+				logflags.DebuggerLogger().Errorf("could not load %s variable: %v", name, err)
+			} else if v.Unreadable != nil {
+				logflags.DebuggerLogger().Errorf("could not load %s variable: %v", name, v.Unreadable)
+			} else {
+				r, err := readUintRaw(v.mem, v.Addr, int64(bi.Arch.PtrSize()))
+				if err != nil {
+					logflags.DebuggerLogger().Errorf("could not load %s variable: %v", name, err)
+				}
+				return r
+			}
+			break
+		}
+	}
+	return 0
+}
+
 // setValue writes the value of srcv to dstv.
 //   - If srcv is a numerical literal constant and srcv is of a compatible type
 //     the necessary type conversion is performed.
@ -887,6 +908,8 @@ func (stack *evalStack) resume(g *G) {
 	scope.Regs.FrameBase = stack.fboff + int64(scope.g.stack.hi)
 	scope.Regs.CFA = scope.frameOffset + int64(scope.g.stack.hi)
 	stack.curthread = g.Thread
+	scope.rangeFrames = nil
+	scope.enclosingRangeScopes = nil

 	finished := funcCallStep(scope, stack, g.Thread)
 	if finished {
@ -1002,6 +1025,16 @@ func (stack *evalStack) executeOp() {
 	case *evalop.PushFrameoff:
 		stack.push(newConstant(constant.MakeInt64(scope.frameOffset), scope.Mem))

+	case *evalop.PushRangeParentOffset:
+		if scope.rangeFrames == nil {
+			stack.err = scope.setupRangeFrames()
+		}
+		if len(scope.rangeFrames) > 0 {
+			stack.push(newConstant(constant.MakeInt64(scope.rangeFrames[len(scope.rangeFrames)-2].FrameOffset()), scope.Mem))
+		} else {
+			stack.push(newConstant(constant.MakeInt64(0), scope.Mem))
+		}
+
 	case *evalop.PushThreadID:
 		stack.push(newConstant(constant.MakeInt64(int64(scope.threadID)), scope.Mem))

--- a/pkg/proc/evalop/evalcompile.go
+++ b/pkg/proc/evalop/evalcompile.go
@ -218,6 +218,9 @@ func (ctx *compileCtx) compileAST(t ast.Expr) error {
 			case x.Name == "runtime" && node.Sel.Name == "threadid":
 				ctx.pushOp(&PushThreadID{})

+			case x.Name == "runtime" && node.Sel.Name == "rangeParentOffset":
+				ctx.pushOp(&PushRangeParentOffset{})
+
 			default:
 				ctx.pushOp(&PushPackageVarOrSelect{Name: x.Name, Sel: node.Sel.Name})
 			}
--- a/pkg/proc/evalop/ops.go
+++ b/pkg/proc/evalop/ops.go
@ -30,6 +30,12 @@ type PushThreadID struct {

 func (*PushThreadID) depthCheck() (npop, npush int) { return 0, 1 }

+// PushRangeParentOffset pushes the frame offset of the range-over-func closure body parent.
+type PushRangeParentOffset struct {
+}
+
+func (*PushRangeParentOffset) depthCheck() (npop, npush int) { return 0, 1 }
+
 // PushConst pushes a constant on the stack.
 type PushConst struct {
 	Value constant.Value
--- a/pkg/proc/proc_test.go
+++ b/pkg/proc/proc_test.go
@ -6267,11 +6267,21 @@ func TestRangeOverFuncNext(t *testing.T) {
 		t.Skip("N/A")
 	}

+	var bp *proc.Breakpoint
+
 	funcBreak := func(t *testing.T, fnname string) seqTest {
 		return seqTest{
 			contNothing,
 			func(p *proc.Target) {
-				setFunctionBreakpoint(p, t, fnname)
+				bp = setFunctionBreakpoint(p, t, fnname)
+			}}
+	}
+
+	clearBreak := func(t *testing.T) seqTest {
+		return seqTest{
+			contNothing,
+			func(p *proc.Target) {
+				assertNoError(p.ClearBreakpoint(bp.Addr), t, "ClearBreakpoint")
 			}}
 	}

@ -6362,6 +6372,19 @@ func TestRangeOverFuncNext(t *testing.T) {
 		}
 	}

+	assertFunc := func(t *testing.T, fname string) seqTest {
+		return seqTest{
+			contNothing,
+			func(p *proc.Target) {
+				pc := currentPC(p, t)
+				fn := p.BinInfo().PCToFunc(pc)
+				if fn.Name != fname {
+					t.Errorf("Wrong function name, expected %s got %s", fname, fn.Name)
+				}
+			},
+		}
+	}
+
 	withTestProcessArgs("rangeoverfunc", t, ".", []string{}, 0, func(p *proc.Target, grp *proc.TargetGroup, fixture protest.Fixture) {
 		t.Run("TestTrickyIterAll1", func(t *testing.T) {
 			testseq2intl(t, fixture, grp, p, nil, []seqTest{
@ -6800,6 +6823,33 @@ func TestRangeOverFuncNext(t *testing.T) {
 				nx(228), // fmt.Println
 			})
 		})
+
+		t.Run("TestRecur", func(t *testing.T) {
+			testseq2intl(t, fixture, grp, p, nil, []seqTest{
+				funcBreak(t, "main.TestRecur"),
+				{contContinue, 231},
+				clearBreak(t),
+				nx(232), // result := []int{}
+				assertEval(t, "n", "3"),
+				nx(233), // if n > 0 {
+				nx(234), // TestRecur
+
+				nx(236), // for _, x := range (x == 10)
+				assertFunc(t, "main.TestRecur"),
+				assertEval(t, "n", "3"),
+				nx(236),
+				assertFunc(t, "main.TestRecur-range1"),
+				assertEval(t, "x", "10", "n", "3"),
+				nx(237), // result = ...
+				nx(238), // if n == 3
+				nx(239), // TestRecur(0)
+				nx(241),
+
+				nx(236), // for _, x := range (x == 20)
+				nx(237), // result = ...
+				assertEval(t, "x", "20", "n", "3"),
+			})
+		})
 	})
 }

@ -6810,7 +6860,7 @@ func TestRangeOverFuncStepOut(t *testing.T) {

 	testseq2(t, "rangeoverfunc", "", []seqTest{
 		{contContinue, 97},
-		{contStepout, 237},
+		{contStepout, 251},
 	})
 }

@ -6819,11 +6869,21 @@ func TestRangeOverFuncNextInlined(t *testing.T) {
 		t.Skip("N/A")
 	}

+	var bp *proc.Breakpoint
+
 	funcBreak := func(t *testing.T, fnname string) seqTest {
 		return seqTest{
 			contNothing,
 			func(p *proc.Target) {
-				setFunctionBreakpoint(p, t, fnname)
+				bp = setFunctionBreakpoint(p, t, fnname)
+			}}
+	}
+
+	clearBreak := func(t *testing.T) seqTest {
+		return seqTest{
+			contNothing,
+			func(p *proc.Target) {
+				assertNoError(p.ClearBreakpoint(bp.Addr), t, "ClearBreakpoint")
 			}}
 	}

@ -6885,6 +6945,19 @@ func TestRangeOverFuncNextInlined(t *testing.T) {
 		}
 	}

+	assertFunc := func(t *testing.T, fname string) seqTest {
+		return seqTest{
+			contNothing,
+			func(p *proc.Target) {
+				pc := currentPC(p, t)
+				fn := p.BinInfo().PCToFunc(pc)
+				if fn.Name != fname {
+					t.Errorf("Wrong function name, expected %s got %s", fname, fn.Name)
+				}
+			},
+		}
+	}
+
 	withTestProcessArgs("rangeoverfunc", t, ".", []string{}, protest.EnableInlining, func(p *proc.Target, grp *proc.TargetGroup, fixture protest.Fixture) {
 		t.Run("TestTrickyIterAll1", func(t *testing.T) {
 			testseq2intl(t, fixture, grp, p, nil, []seqTest{
@ -7336,6 +7409,33 @@ func TestRangeOverFuncNextInlined(t *testing.T) {
 				nx(228), // fmt.Println
 			})
 		})
+
+		t.Run("TestRecur", func(t *testing.T) {
+			testseq2intl(t, fixture, grp, p, nil, []seqTest{
+				funcBreak(t, "main.TestRecur"),
+				{contContinue, 231},
+				clearBreak(t),
+				nx(232), // result := []int{}
+				assertEval(t, "n", "3"),
+				nx(233), // if n > 0 {
+				nx(234), // TestRecur
+
+				nx(236), // for _, x := range (x == 10)
+				assertFunc(t, "main.TestRecur"),
+				assertEval(t, "n", "3"),
+				nx(236),
+				assertFunc(t, "main.TestRecur-range1"),
+				assertEval(t, "x", "10", "n", "3"),
+				nx(237), // result = ...
+				nx(238), // if n == 3
+				nx(239), // TestRecur(0)
+				nx(241),
+
+				nx(236), // for _, x := range (x == 20)
+				nx(237), // result = ...
+				assertEval(t, "x", "20", "n", "3"),
+			})
+		})
 	})
 }

--- a/pkg/proc/stack.go
+++ b/pkg/proc/stack.go
@ -65,6 +65,11 @@ type Stackframe struct {
 	// Use this value to determine active lexical scopes for the stackframe.
 	lastpc uint64

+	// closurePtr is the value of .closureptr, if present. This variable is
+	// used to correlated range-over-func closure bodies with their enclosing
+	// function.
+	closurePtr int64
+
 	// TopmostDefer is the defer that would be at the top of the stack when a
 	// panic unwind would get to this call frame, in other words it's the first
 	// deferred function that will  be called if the runtime unwinds past this
@ -95,6 +100,12 @@ func (frame *Stackframe) FramePointerOffset() int64 {
 	return int64(frame.Regs.BP()) - int64(frame.stackHi)
 }

+// contains returns true if off is between CFA and SP
+func (frame *Stackframe) contains(off int64) bool {
+	p := uint64(off + int64(frame.stackHi))
+	return frame.Regs.SP() < p && p <= uint64(frame.Regs.CFA)
+}
+
 // ThreadStacktrace returns the stack trace for thread.
 // Note the locations in the array are return addresses not call addresses.
 func ThreadStacktrace(tgt *Target, thread Thread, depth int) ([]Stackframe, error) {
@ -350,6 +361,19 @@ func (it *stackIterator) newStackframe(ret, retaddr uint64) Stackframe {
 			r.Call.File, r.Call.Line = r.Current.Fn.cu.lineInfo.PCToLine(r.Current.Fn.Entry, it.pc-1)
 		}
 	}
+	if fn != nil && !fn.cu.image.Stripped() && !r.SystemStack && it.g != nil {
+		dwarfTree, _ := fn.cu.image.getDwarfTree(fn.offset)
+		if dwarfTree != nil {
+			c := readLocalPtrVar(dwarfTree, goClosurePtr, it.target, it.bi, fn.cu.image, r.Regs, it.mem)
+			if c != 0 {
+				if c >= it.g.stack.lo && c < it.g.stack.hi {
+					r.closurePtr = int64(c) - int64(it.g.stack.hi)
+				} else {
+					r.closurePtr = int64(c)
+				}
+			}
+		}
+	}
 	return r
 }

@ -436,6 +460,7 @@ func (it *stackIterator) appendInlineCalls(callback func(Stackframe) bool, frame
 			SystemStack: frame.SystemStack,
 			Inlined:     true,
 			lastpc:      frame.lastpc,
+			closurePtr:  frame.closurePtr,
 		})

 		frame.Call.File = filepath
@ -909,9 +934,40 @@ func rangeFuncStackTrace(tgt *Target, g *G) ([]Stackframe, error) {
 	stage := 0
 	var rangeParent *Function
 	nonMonotonicSP := false
-	it.stacktraceFunc(func(fr Stackframe) bool {
-		//TODO(range-over-func): this is a heuristic, we should use .closureptr instead
+	var closurePtr int64

+	appendFrame := func(fr Stackframe) {
+		frames = append(frames, fr)
+		if fr.closurePtr != 0 {
+			closurePtr = fr.closurePtr
+		}
+	}
+
+	closurePtrOk := func(fr *Stackframe) bool {
+		if fr.SystemStack {
+			return false
+		}
+		if closurePtr < 0 {
+			// closure is stack allocated, check that it is on this frame
+			return fr.contains(closurePtr)
+		}
+		// otherwise closurePtr is a heap allocated variable, so we need to check
+		// all closure body variables in scope in this frame
+		scope := FrameToScope(tgt, it.mem, it.g, 0, *fr)
+		yields, _ := scope.simpleLocals(localsNoDeclLineCheck|localsOnlyRangeBodyClosures, "")
+		for _, yield := range yields {
+			if yield.Kind != reflect.Func {
+				continue
+			}
+			addr := yield.funcvalAddr()
+			if int64(addr) == closurePtr {
+				return true
+			}
+		}
+		return false
+	}
+
+	it.stacktraceFunc(func(fr Stackframe) bool {
 		if len(frames) > 0 {
 			prev := &frames[len(frames)-1]
 			if fr.Regs.SP() < prev.Regs.SP() {
@ -922,20 +978,25 @@ func rangeFuncStackTrace(tgt *Target, g *G) ([]Stackframe, error) {

 		switch stage {
 		case 0:
-			frames = append(frames, fr)
+			appendFrame(fr)
 			rangeParent = fr.Call.Fn.extra(tgt.BinInfo()).rangeParent
 			stage++
-			if rangeParent == nil {
+			if rangeParent == nil || closurePtr == 0 {
 				frames = nil
 				stage = 3
 				return false
 			}
 		case 1:
-			if fr.Call.Fn.offset == rangeParent.offset {
-				frames = append(frames, fr)
+			if fr.Call.Fn.offset == rangeParent.offset && closurePtrOk(&fr) {
+				appendFrame(fr)
 				stage++
-			} else if fr.Call.Fn.extra(tgt.BinInfo()).rangeParent == rangeParent {
-				frames = append(frames, fr)
+			} else if fr.Call.Fn.extra(tgt.BinInfo()).rangeParent == rangeParent && closurePtrOk(&fr) {
+				appendFrame(fr)
+				if closurePtr == 0 {
+					frames = nil
+					stage = 3
+					return false
+				}
 			}
 		case 2:
 			frames = append(frames, fr)
--- a/pkg/proc/target_exec.go
+++ b/pkg/proc/target_exec.go
@ -827,14 +827,18 @@ func next(dbp *Target, stepInto, inlinedStepOut bool) error {
 		if rangeParent == nil {
 			rangeParent = topframe.Call.Fn
 		}
+		rpoff := topframe.FrameOffset()
+		if len(rangeFrames) > 0 {
+			rpoff = rangeFrames[len(rangeFrames)-2].FrameOffset()
+		}
+		rpc := astutil.And(sameGCond, astutil.Eql(astutil.PkgVar("runtime", "rangeParentOffset"), astutil.Int(rpoff)))
 		for _, fn := range rangeParent.extra(bi).rangeBodies {
 			if fn.Entry != 0 {
 				pc, err := FirstPCAfterPrologue(dbp, fn, false)
 				if err != nil {
 					return err
 				}
-				// TODO: this breakpoint must have a condition on .closureptr (https://go-review.googlesource.com/c/go/+/586975)
-				if _, err := allowDuplicateBreakpoint(dbp.SetBreakpoint(0, pc, NextBreakpoint, sameGCond)); err != nil {
+				if _, err := allowDuplicateBreakpoint(dbp.SetBreakpoint(0, pc, NextBreakpoint, rpc)); err != nil {
 					return err
 				}
 			}
@ -842,7 +846,7 @@ func next(dbp *Target, stepInto, inlinedStepOut bool) error {
 	}

 	// Set step-out breakpoints for range-over-func body closures
-	if !stepInto && selg != nil && topframe.Current.Fn.extra(bi).rangeParent != nil {
+	if !stepInto && selg != nil && topframe.Current.Fn.extra(bi).rangeParent != nil && len(rangeFrames) > 0 {
 		for _, fr := range rangeFrames[:len(rangeFrames)-1] {
 			retframecond := astutil.And(sameGCond, frameoffCondition(&fr))
 			if !fr.hasInlines {