proc/gdbserial: support call injection with rr backend (#2740)

Normally calls can't be performed on recorded processes, becuase the
future instructions executed by the target are predetermined. The rr
debugger however has a mechanism that allows this by taking the current
state of the recording and allowing it to diverge from the recording,
temporarily.
This commit adds support for starting and ending such diversions around
function calls.

Note: this requires rr version 5.5 of later to work, see:
	https://github.com/rr-debugger/rr/pull/2748
This commit is contained in:
Alessandro Arzilli 2021-10-14 20:06:14 +02:00 committed by GitHub
parent c8f6c3a685
commit 9a5d5bc996
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 120 additions and 14 deletions

@ -278,6 +278,9 @@ func (dbp *process) SetUProbe(fnName string, goidOffset int64, args []ebpf.UProb
panic("not implemented")
}
// StartCallInjection notifies the backend that we are about to inject a function call.
func (p *process) StartCallInjection() (func(), error) { return func() {}, nil }
// ReadMemory will return memory from the core file at the specified location and put the
// read memory into `data`, returning the length read, and returning an error if
// the length read is shorter than the length of the `data` buffer.

@ -132,6 +132,7 @@ type callInjection struct {
continueCompleted chan<- *G
continueRequest <-chan continueRequest
startThreadID int
endCallInjection func()
}
func (callCtx *callContext) doContinue() *G {
@ -188,10 +189,16 @@ func EvalExpressionWithCalls(t *Target, g *G, expr string, retLoadCfg LoadConfig
continueCompleted: continueCompleted,
}
endCallInjection, err := t.proc.StartCallInjection()
if err != nil {
return err
}
t.fncallForG[g.ID] = &callInjection{
continueCompleted: continueCompleted,
continueRequest: continueRequest,
startThreadID: 0,
endCallInjection: endCallInjection,
}
go scope.EvalExpression(expr, retLoadCfg)
@ -230,7 +237,13 @@ func finishEvalExpressionWithCalls(t *Target, g *G, contReq continueRequest, ok
}
close(t.fncallForG[g.ID].continueCompleted)
delete(t.fncallForG, g.ID)
callinj := t.fncallForG[g.ID]
for goid := range t.fncallForG {
if t.fncallForG[goid] == callinj {
delete(t.fncallForG, goid)
}
}
callinj.endCallInjection()
return err
}

@ -131,6 +131,10 @@ var debugserverExecutablePaths = []string{
// while there are still internal breakpoints set.
var ErrDirChange = errors.New("direction change with internal breakpoints")
// ErrStartCallInjectionBackwards is returned when trying to start a call
// injection while the recording is being run backwards.
var ErrStartCallInjectionBackwards = errors.New("can not start a call injection while running backwards")
var checkCanUnmaskSignalsOnce sync.Once
var canUnmaskSignalsCached bool
@ -151,7 +155,8 @@ type gdbProcess struct {
breakpoints proc.BreakpointMap
gcmdok bool // true if the stub supports g and G commands
gcmdok bool // true if the stub supports g and (maybe) G commands
_Gcmdok bool // true if the stub supports G command
threadStopInfo bool // true if the stub supports qThreadStopInfo
tracedir string // if attached to rr the path to the trace directory
@ -1207,6 +1212,39 @@ func (p *gdbProcess) ChangeDirection(dir proc.Direction) error {
return nil
}
// StartCallInjection notifies the backend that we are about to inject a function call.
func (p *gdbProcess) StartCallInjection() (func(), error) {
if p.tracedir == "" {
return func() {}, nil
}
if p.conn.conn == nil {
return nil, proc.ErrProcessExited{Pid: p.conn.pid}
}
if p.conn.direction != proc.Forward {
return nil, ErrStartCallInjectionBackwards
}
// Normally it's impossible to inject function calls in a recorded target
// because the sequence of instructions that the target will execute is
// predetermined.
// RR however allows this in a "diversion". When a diversion is started rr
// takes the current state of the process and runs it forward as a normal
// process, not following the recording.
// The gdb serial protocol does not have a way to start a diversion and gdb
// (the main frontend of rr) does not know how to do it. Instead a
// diversion is started by reading siginfo, because that's the first
// request gdb does when starting a function call injection.
_, err := p.conn.qXfer("siginfo", "", true)
if err != nil {
return nil, err
}
return func() {
_ = p.conn.qXferWrite("siginfo", "") // rr always returns an error for qXfer:siginfo:write... even though it works
}, nil
}
// GetDirection returns the current direction of execution.
func (p *gdbProcess) GetDirection() proc.Direction {
return p.conn.direction
@ -1649,8 +1687,14 @@ func (t *gdbThread) writeSomeRegisters(regNames ...string) error {
}
func (t *gdbThread) writeRegisters() error {
if t.p.gcmdok {
return t.p.conn.writeRegisters(t.strID, t.regs.buf)
if t.p.gcmdok && t.p._Gcmdok {
err := t.p.conn.writeRegisters(t.strID, t.regs.buf)
if isProtocolErrorUnsupported(err) {
t.p._Gcmdok = false
} else {
return err
}
}
for _, r := range t.regs.regs {
if r.ignoreOnWrite {

@ -47,6 +47,8 @@ type gdbConn struct {
xcmdok bool // x command can be used to transfer memory
goarch string
useXcmd bool // forces writeMemory to use the 'X' command
log *logrus.Entry
}
@ -405,6 +407,17 @@ func (conn *gdbConn) qXfer(kind, annex string, binary bool) ([]byte, error) {
return out, nil
}
// qXferWrite executes a 'qXfer' write with the specified kind and annex.
func (conn *gdbConn) qXferWrite(kind, annex string) error {
conn.outbuf.Reset()
fmt.Fprintf(&conn.outbuf, "$qXfer:%s:write:%s:0:", kind, annex)
//TODO(aarzilli): if we ever actually need to write something with qXfer,
//this will need to be implemented properly. At the moment it is only used
//for a fake write to the siginfo kind, to end a diversion in 'rr'.
_, err := conn.exec(conn.outbuf.Bytes(), "qXfer")
return err
}
type breakpointType uint8
const (
@ -988,8 +1001,16 @@ func writeAsciiBytes(w io.Writer, data []byte) {
}
}
// executes 'M' (write memory) command
// writeMemory writes memory using either 'M' or 'X'
func (conn *gdbConn) writeMemory(addr uint64, data []byte) (written int, err error) {
if conn.useXcmd {
return conn.writeMemoryBinary(addr, data)
}
return conn.writeMemoryHex(addr, data)
}
// executes 'M' (write memory) command
func (conn *gdbConn) writeMemoryHex(addr uint64, data []byte) (written int, err error) {
if len(data) == 0 {
// LLDB can't parse requests for 0-length writes and hangs if we emit them
return 0, nil
@ -1007,6 +1028,27 @@ func (conn *gdbConn) writeMemory(addr uint64, data []byte) (written int, err err
return len(data), nil
}
func (conn *gdbConn) writeMemoryBinary(addr uint64, data []byte) (written int, err error) {
conn.outbuf.Reset()
fmt.Fprintf(&conn.outbuf, "$X%x,%x:", addr, len(data))
for _, b := range data {
switch b {
case '#', '$', '}':
conn.outbuf.WriteByte('}')
conn.outbuf.WriteByte(b ^ escapeXor)
default:
conn.outbuf.WriteByte(b)
}
}
_, err = conn.exec(conn.outbuf.Bytes(), "memory write")
if err != nil {
return 0, err
}
return len(data), nil
}
func (conn *gdbConn) allocMemory(sz uint64) (uint64, error) {
conn.outbuf.Reset()
fmt.Fprintf(&conn.outbuf, "$_M%x,rwx", sz)

@ -154,6 +154,7 @@ func Replay(tracedir string, quiet, deleteOnDetach bool, debugInfoDirs []string)
p := newProcess(rrcmd.Process)
p.tracedir = tracedir
p.conn.useXcmd = true // 'rr' does not support the 'M' command which is what we would usually use to write memory, this is only important during function calls, in any other situation writing memory will fail anyway.
if deleteOnDetach {
p.onDetach = func() {
safeRemoveAll(p.tracedir)

@ -55,6 +55,9 @@ type ProcessInternal interface {
MemoryMap() ([]MemoryMapEntry, error)
GetBufferedTracepoints() []ebpf.RawUProbeParams
// StartCallInjection notifies the backend that we are about to inject a function call.
StartCallInjection() (func(), error)
}
// RecordingManipulation is an interface for manipulating process recordings.

@ -106,6 +106,9 @@ func (dbp *nativeProcess) Checkpoints() ([]proc.Checkpoint, error) { return nil,
// only supported in recorded traces.
func (dbp *nativeProcess) ClearCheckpoint(int) error { return proc.ErrNotRecorded }
// StartCallInjection notifies the backend that we are about to inject a function call.
func (dbp *nativeProcess) StartCallInjection() (func(), error) { return func() {}, nil }
// Detach from the process being debugged, optionally killing it.
func (dbp *nativeProcess) Detach(kill bool) (err error) {
if dbp.exited {

@ -246,9 +246,6 @@ func (t *Target) Valid() (bool, error) {
// Currently only non-recorded processes running on AMD64 support
// function calls.
func (t *Target) SupportsFunctionCalls() bool {
if ok, _ := t.Process.Recorded(); ok {
return false
}
return t.Process.BinInfo().Arch.Name == "amd64"
}

@ -125,10 +125,6 @@ func (dbp *Target) Continue() error {
switch {
case curbp.Breakpoint == nil:
// runtime.Breakpoint, manual stop or debugCallV1-related stop
recorded, _ := dbp.Recorded()
if recorded {
return conditionErrors(threads)
}
loc, err := curthread.Location()
if err != nil || loc.Fn == nil {
@ -139,6 +135,9 @@ func (dbp *Target) Continue() error {
switch {
case loc.Fn.Name == "runtime.breakpoint":
if recorded, _ := dbp.Recorded(); recorded {
return conditionErrors(threads)
}
// In linux-arm64, PtraceSingleStep seems cannot step over BRK instruction
// (linux-arm64 feature or kernel bug maybe).
if !arch.BreakInstrMovesPC() {

@ -314,7 +314,7 @@ func MustSupportFunctionCalls(t *testing.T, testBackend string) {
t.Skip("this version of Go does not support function calls")
}
if testBackend == "rr" || (runtime.GOOS == "darwin" && testBackend == "native") {
if runtime.GOOS == "darwin" && testBackend == "native" {
t.Skip("this backend does not support function calls")
}

@ -1171,6 +1171,7 @@ type testCaseCallFunction struct {
func TestCallFunction(t *testing.T) {
protest.MustSupportFunctionCalls(t, testBackend)
protest.AllowRecording(t)
var testcases = []testCaseCallFunction{
// Basic function call injection tests
@ -1396,7 +1397,7 @@ func testCallFunction(t *testing.T, p *proc.Target, tc testCaseCallFunction) {
}
if len(retvals) != len(tc.outs) {
t.Fatalf("call %q: wrong number of return parameters", tc.expr)
t.Fatalf("call %q: wrong number of return parameters (%#v)", tc.expr, retvals)
}
for i := range retvals {