proc: fix TestIssue1101 flake (#3585)

If the target process exits and receives a signal at the same time and
we receive the signal first we should call waitpid again to read the
target's exit status.

This also fixes a nil pointer dereference when trapWaitInternal returns
an error, this fix is probably incomplete but I wasn't able to
reproduce its circumstances after 30000 runs of TestIssue1101 to
properly address it.
This commit is contained in:
Alessandro Arzilli 2023-11-27 17:58:27 +01:00 committed by GitHub
parent 4e2d63fa26
commit 4ed41e9060
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 17 additions and 4 deletions

@ -563,9 +563,23 @@ func trapWaitInternal(procgrp *processGroup, pid int, options trapWaitOptions) (
}
// do the same thing we do if a thread quit
if wpid == dbp.pid {
exitStatus := 0
if procgrp.numValid() == 1 {
// try to recover the real exit status using waitpid
for {
wpid2, status2, err := dbp.wait(-1, sys.WNOHANG)
if wpid2 <= 0 || err != nil {
break
}
if status2.Exited() {
exitStatus = status2.ExitStatus()
}
}
}
dbp.postExit()
if procgrp.numValid() == 0 {
return nil, proc.ErrProcessExited{Pid: wpid, Status: status.ExitStatus()}
return nil, proc.ErrProcessExited{Pid: wpid, Status: exitStatus}
}
continue
}
@ -693,8 +707,7 @@ func (procgrp *processGroup) stop(cctx *proc.ContinueOnceContext, trapthread *na
for {
th, err := trapWaitInternal(procgrp, -1, trapWaitNohang)
if err != nil {
p := procgrp.procForThread(th.ID)
return nil, exitGuard(p, procgrp, err)
return nil, exitGuard(procgrp.procs[0], procgrp, err)
}
if th == nil {
break

@ -3690,7 +3690,7 @@ func TestIssue1101(t *testing.T) {
// Also it seems that sometimes on linux/386 we will not receive the
// exit status. This happens if the process exits at the same time as it
// receives a signal.
t.Fatalf("process exited status %d (expected 2)", pexit.Status)
t.Fatalf("process exited status %d (expected 2) (last command = %s) (%#v)", pexit.Status, lastCmd, pexit)
}
} else {
assertNoError(exitErr, t, lastCmd)