pkg/proc: add initial data structures for core support

Core files contain a variety of memory mappings either to files or anonymous regions stored in the core file. These regions can overlap, so figuring out what exactly to read can be tricky. This commit contains a data structure, SplicedMemory, which accumulates mappings and reads from the correct sources.
2017-02-08 15:22:04 -05:00 · 2017-02-08 15:22:04 -05:00 · 423bcaa83a
commit 423bcaa83a
parent e7fa14d21d
3 changed files with 566 additions and 0 deletions
--- a/pkg/proc/core.go
+++ b/pkg/proc/core.go
@ -0,0 +1,146 @@
 package proc
 import (
 	"fmt"
 	"io"
 )
 // MemoryReader is like io.ReaderAt, but the offset is a uintptr so that it
 // can address all of 64-bit memory.
 // Redundant with memoryReadWriter but more easily suited to working with
 // the standard io package.
 type MemoryReader interface {
 	// ReadMemory is just like io.ReaderAt.ReadAt.
 	ReadMemory(buf []byte, addr uintptr) (n int, err error)
 }
 // A SplicedMemory represents a memory space formed from multiple regions,
 // each of which may override previously regions. For example, in the following
 // core, the program text was loaded at 0x400000:
 // Start               End                 Page Offset
 // 0x0000000000400000  0x000000000044f000  0x0000000000000000
 // but then it's partially overwritten with an RW mapping whose data is stored
 // in the core file:
 // Type           Offset             VirtAddr           PhysAddr
 //                FileSiz            MemSiz              Flags  Align
 // LOAD           0x0000000000004000 0x000000000049a000 0x0000000000000000
 //                0x0000000000002000 0x0000000000002000  RW     1000
 // This can be represented in a SplicedMemory by adding the original region,
 // then putting the RW mapping on top of it.
 type SplicedMemory struct {
 	readers []readerEntry
 }
 type readerEntry struct {
 	offset uintptr
 	length uintptr
 	reader MemoryReader
 }
 // Add adds a new region to the SplicedMemory, which may override existing regions.
 func (r *SplicedMemory) Add(reader MemoryReader, off, length uintptr) {
 	if length == 0 {
 		return
 	}
 	end := off + length - 1
 	newReaders := make([]readerEntry, 0, len(r.readers))
 	add := func(e readerEntry) {
 		if e.length == 0 {
 			return
 		}
 		newReaders = append(newReaders, e)
 	}
 	inserted := false
 	// Walk through the list of regions, fixing up any that overlap and inserting the new one.
 	for _, entry := range r.readers {
 		entryEnd := entry.offset + entry.length - 1
 		switch {
 		case entryEnd < off:
 			// Entry is completely before the new region.
 			add(entry)
 		case end < entry.offset:
 			// Entry is completely after the new region.
 			if !inserted {
 				add(readerEntry{off, length, reader})
 				inserted = true
 			}
 			add(entry)
 		case off <= entry.offset && entryEnd <= end:
 			// Entry is completely overwritten by the new region. Drop.
 		case entry.offset < off && entryEnd <= end:
 			// New region overwrites the end of the entry.
 			entry.length = off - entry.offset
 			add(entry)
 		case off <= entry.offset && end < entryEnd:
 			// New reader overwrites the beginning of the entry.
 			if !inserted {
 				add(readerEntry{off, length, reader})
 				inserted = true
 			}
 			overlap := entry.offset - off
 			entry.offset += overlap
 			entry.length -= overlap
 			add(entry)
 		case entry.offset < off && end < entryEnd:
 			// New region punches a hole in the entry. Split it in two and put the new region in the middle.
 			add(readerEntry{entry.offset, off - entry.offset, entry.reader})
 			add(readerEntry{off, length, reader})
 			add(readerEntry{end + 1, entryEnd - end, entry.reader})
 			inserted = true
 		default:
 			panic(fmt.Sprintf("Unhandled case: existing entry is %v len %v, new is %v len %v", entry.offset, entry.length, off, length))
 		}
 	}
 	if !inserted {
 		newReaders = append(newReaders, readerEntry{off, length, reader})
 	}
 	r.readers = newReaders
 }
 // ReadMemory implements MemoryReader.ReadMemory.
 func (r *SplicedMemory) ReadMemory(buf []byte, addr uintptr) (n int, err error) {
 	started := false
 	for _, entry := range r.readers {
 		if entry.offset+entry.length < addr {
 			if !started {
 				continue
 			}
 			return n, fmt.Errorf("hit unmapped area at %v after %v bytes", addr, n)
 		}
 		// Don't go past the region.
 		pb := buf
 		if addr+uintptr(len(buf)) > entry.offset+entry.length {
 			pb = pb[:entry.offset+entry.length-addr]
 		}
 		pn, err := entry.reader.ReadMemory(pb, addr)
 		n += pn
 		if err != nil || pn != len(pb) {
 			return n, err
 		}
 		buf = buf[pn:]
 		addr += uintptr(pn)
 		if len(buf) == 0 {
 			// Done, don't bother scanning the rest.
 			return n, nil
 		}
 	}
 	if n == 0 {
 		return 0, fmt.Errorf("offset %v did not match any regions", addr)
 	}
 	return n, nil
 }
 // OffsetReaderAt wraps a ReaderAt into a MemoryReader, subtracting a fixed
 // offset from the address. This is useful to represent a mapping in an address
 // space. For example, if program text is mapped in at 0x400000, an
 // OffsetReaderAt with offset 0x400000 can be wrapped around file.Open(program)
 // to return the results of a read in that part of the address space.
 type OffsetReaderAt struct {
 	reader io.ReaderAt
 	offset uintptr
 }
 func (r *OffsetReaderAt) ReadMemory(buf []byte, addr uintptr) (n int, err error) {
 	return r.reader.ReadAt(buf, int64(addr-r.offset))
 }
--- a/pkg/proc/core_linux_amd64.go
+++ b/pkg/proc/core_linux_amd64.go
@ -0,0 +1,269 @@
 package proc
 import (
 	"bytes"
 	"encoding/binary"
 	"fmt"
 	"io"
 	"os"
 	"golang.org/x/debug/elf"
 	"golang.org/x/sys/unix"
 )
 const NT_FILE elf.NType = 0x46494c45 // "FILE".
 // readCore reads a core file from corePath corresponding to the executable at
 // exePath. For details on the Linux ELF core format, see:
 // http://www.gabriel.urdhr.fr/2015/05/29/core-file/,
 // http://uhlo.blogspot.fr/2012/05/brief-look-into-core-dumps.html,
 // elf_core_dump in http://lxr.free-electrons.com/source/fs/binfmt_elf.c,
 // and, if absolutely desperate, readelf.c from the binutils source.
 func readCore(corePath, exePath string) (*Core, error) {
 	core, err := elf.Open(corePath)
 	if err != nil {
 		return nil, err
 	}
 	exe, err := os.Open(exePath)
 	if err != nil {
 		return nil, err
 	}
 	if core.Type != elf.ET_CORE {
 		return nil, fmt.Errorf("%v is not a core file", core)
 	}
 	notes, err := readNotes(core)
 	if err != nil {
 		return nil, err
 	}
 	memory := buildMemory(core, exe, notes)
 	threads := map[int]*LinuxPrStatus{}
 	pid := 0
 	for _, note := range notes {
 		switch note.Type {
 		case elf.NT_PRSTATUS:
 			t := note.Desc.(*LinuxPrStatus)
 			threads[int(t.Pid)] = t
 		case elf.NT_PRPSINFO:
 			pid = int(note.Desc.(*LinuxPrPsInfo).Pid)
 		}
 	}
 	return &Core{
 		MemoryReader: memory,
 		Threads:      threads,
 		Pid:          pid,
 	}, nil
 }
 type Core struct {
 	MemoryReader
 	Threads map[int]*LinuxPrStatus
 	Pid     int
 }
 // Note is a note from the PT_NOTE prog.
 // Relevant types:
 // - NT_FILE: File mapping information, e.g. program text mappings. Desc is a LinuxNTFile.
 // - NT_PRPSINFO: Information about a process, including PID and signal. Desc is a LinuxPrPsInfo.
 // - NT_PRSTATUS: Information about a thread, including base registers, state, etc. Desc is a LinuxPrStatus.
 // - NT_FPREGSET (Not implemented): x87 floating point registers.
 // - NT_X86_XSTATE (Not implemented): Other registers, including AVX and such.
 type Note struct {
 	Type elf.NType
 	Name string
 	Desc interface{} // Decoded Desc from the
 }
 // readNotes reads all the notes from the notes prog in core.
 func readNotes(core *elf.File) ([]*Note, error) {
 	var notesProg *elf.Prog
 	for _, prog := range core.Progs {
 		if prog.Type == elf.PT_NOTE {
 			notesProg = prog
 			break
 		}
 	}
 	r := notesProg.Open()
 	notes := []*Note{}
 	for {
 		note, err := readNote(r)
 		if err == io.EOF {
 			break
 		}
 		if err != nil {
 			return nil, err
 		}
 		notes = append(notes, note)
 	}
 	return notes, nil
 }
 // readNote reads a single note from r, decoding the descriptor if possible.
 func readNote(r io.ReadSeeker) (*Note, error) {
 	// Notes are laid out as described in the SysV ABI:
 	// http://www.sco.com/developers/gabi/latest/ch5.pheader.html#note_section
 	note := &Note{}
 	hdr := &ELFNotesHdr{}
 	err := binary.Read(r, binary.LittleEndian, hdr)
 	if err != nil {
 		return nil, err // don't wrap so readNotes sees EOF.
 	}
 	note.Type = elf.NType(hdr.Type)
 	name := make([]byte, hdr.Namesz)
 	if _, err := r.Read(name); err != nil {
 		return nil, fmt.Errorf("reading name: %v", err)
 	}
 	note.Name = string(name)
 	if err := skipPadding(r, 4); err != nil {
 		return nil, fmt.Errorf("aligning after name: %v", err)
 	}
 	desc := make([]byte, hdr.Descsz)
 	if _, err := r.Read(desc); err != nil {
 		return nil, fmt.Errorf("reading desc: %v", err)
 	}
 	descReader := bytes.NewReader(desc)
 	switch note.Type {
 	case elf.NT_PRSTATUS:
 		note.Desc = &LinuxPrStatus{}
 		if err := binary.Read(descReader, binary.LittleEndian, note.Desc); err != nil {
 			return nil, fmt.Errorf("reading NT_PRSTATUS: %v", err)
 		}
 	case elf.NT_PRPSINFO:
 		note.Desc = &LinuxPrPsInfo{}
 		if err := binary.Read(descReader, binary.LittleEndian, note.Desc); err != nil {
 			return nil, fmt.Errorf("reading NT_PRPSINFO: %v", err)
 		}
 	case NT_FILE:
 		// No good documentation reference, but the structure is
 		// simply a header, including entry count, followed by that
 		// many entries, and then the file name of each entry,
 		// null-delimited. Not reading the names here.
 		data := &LinuxNTFile{}
 		if err := binary.Read(descReader, binary.LittleEndian, &data.LinuxNTFileHdr); err != nil {
 			return nil, fmt.Errorf("reading NT_FILE header: %v", err)
 		}
 		for i := 0; i < int(data.Count); i++ {
 			entry := &LinuxNTFileEntry{}
 			if err := binary.Read(descReader, binary.LittleEndian, entry); err != nil {
 				return nil, fmt.Errorf("reading NT_PRPSINFO entry %v: %v", i, err)
 			}
 			data.entries = append(data.entries, entry)
 		}
 		note.Desc = data
 	}
 	if err := skipPadding(r, 4); err != nil {
 		return nil, fmt.Errorf("aligning after desc: %v", err)
 	}
 	return note, nil
 }
 // skipPadding moves r to the next multiple of pad.
 func skipPadding(r io.ReadSeeker, pad int64) error {
 	pos, err := r.Seek(0, io.SeekCurrent)
 	if err != nil {
 		return err
 	}
 	if pos%pad == 0 {
 		return nil
 	}
 	if _, err := r.Seek(pad-(pos%pad), io.SeekCurrent); err != nil {
 		return err
 	}
 	return nil
 }
 func buildMemory(core *elf.File, exe io.ReaderAt, notes []*Note) MemoryReader {
 	memory := &SplicedMemory{}
 	// For now, assume all file mappings are to the exe.
 	for _, note := range notes {
 		if note.Type == NT_FILE {
 			fileNote := note.Desc.(*LinuxNTFile)
 			for _, entry := range fileNote.entries {
 				r := &OffsetReaderAt{
 					reader: exe,
 					offset: uintptr(entry.Start - (entry.FileOfs * fileNote.PageSize)),
 				}
 				memory.Add(r, uintptr(entry.Start), uintptr(entry.End-entry.Start))
 			}
 		}
 	}
 	for _, prog := range core.Progs {
 		if prog.Type == elf.PT_LOAD {
 			if prog.Filesz == 0 {
 				continue
 			}
 			r := &OffsetReaderAt{
 				reader: prog.ReaderAt,
 				offset: uintptr(prog.Vaddr),
 			}
 			memory.Add(r, uintptr(prog.Vaddr), uintptr(prog.Filesz))
 		}
 	}
 	return memory
 }
 // Various structures from the ELF spec and the Linux kernel.
 // AMD64 specific primarily because of unix.PtraceRegs, but also
 // because some of the fields are word sized.
 // See http://lxr.free-electrons.com/source/include/uapi/linux/elfcore.h
 type LinuxPrPsInfo struct {
 	State                uint8
 	Sname                int8
 	Zomb                 uint8
 	Nice                 int8
 	_                    [4]uint8
 	Flag                 uint64
 	Uid, Gid             uint32
 	Pid, Ppid, Pgrp, Sid int32
 	Fname                [16]uint8
 	Args                 [80]uint8
 }
 type LinuxPrStatus struct {
 	Siginfo                      LinuxSiginfo
 	Cursig                       uint16
 	_                            [2]uint8
 	Sigpend                      uint64
 	Sighold                      uint64
 	Pid, Ppid, Pgrp, Sid         int32
 	Utime, Stime, CUtime, CStime unix.Timeval
 	Reg                          unix.PtraceRegs
 	Fpvalid                      int32
 }
 type LinuxSiginfo struct {
 	Signo int32
 	Code  int32
 	Errno int32
 }
 type LinuxNTFile struct {
 	LinuxNTFileHdr
 	entries []*LinuxNTFileEntry
 }
 type LinuxNTFileHdr struct {
 	Count    uint64
 	PageSize uint64
 }
 type LinuxNTFileEntry struct {
 	Start   uint64
 	End     uint64
 	FileOfs uint64
 }
 // ELF Notes header. Same size on 64 and 32-bit machines.
 type ELFNotesHdr struct {
 	Namesz uint32
 	Descsz uint32
 	Type   uint32
 }
--- a/pkg/proc/core_linux_amd64_test.go
+++ b/pkg/proc/core_linux_amd64_test.go
@ -0,0 +1,151 @@
 package proc
 import (
 	"bytes"
 	"io/ioutil"
 	"os/exec"
 	"reflect"
 	"testing"
 	"fmt"
 	"path"
 	"github.com/derekparker/delve/pkg/proc/test"
 )
 func TestSplicedReader(t *testing.T) {
 	data := []byte{}
 	data2 := []byte{}
 	for i := 0; i < 100; i++ {
 		data = append(data, byte(i))
 		data2 = append(data2, byte(i+100))
 	}
 	type region struct {
 		data   []byte
 		off    uintptr
 		length uintptr
 	}
 	tests := []struct {
 		name     string
 		regions  []region
 		readAddr uintptr
 		readLen  int
 		want     []byte
 	}{
 		{
 			"Insert after",
 			[]region{
 				{data, 0, 1},
 				{data2, 1, 1},
 			},
 			0,
 			2,
 			[]byte{0, 101},
 		},
 		{
 			"Insert before",
 			[]region{
 				{data, 1, 1},
 				{data2, 0, 1},
 			},
 			0,
 			2,
 			[]byte{100, 1},
 		},
 		{
 			"Completely overwrite",
 			[]region{
 				{data, 1, 1},
 				{data2, 0, 3},
 			},
 			0,
 			3,
 			[]byte{100, 101, 102},
 		},
 		{
 			"Overwrite end",
 			[]region{
 				{data, 0, 2},
 				{data2, 1, 2},
 			},
 			0,
 			3,
 			[]byte{0, 101, 102},
 		},
 		{
 			"Overwrite start",
 			[]region{
 				{data, 0, 3},
 				{data2, 0, 2},
 			},
 			0,
 			3,
 			[]byte{100, 101, 2},
 		},
 		{
 			"Punch hole",
 			[]region{
 				{data, 0, 5},
 				{data2, 1, 3},
 			},
 			0,
 			5,
 			[]byte{0, 101, 102, 103, 4},
 		},
 		{
 			"Overlap two",
 			[]region{
 				{data, 10, 4},
 				{data, 14, 4},
 				{data2, 12, 4},
 			},
 			10,
 			8,
 			[]byte{10, 11, 112, 113, 114, 115, 16, 17},
 		},
 	}
 	for _, test := range tests {
 		t.Run(test.name, func(t *testing.T) {
 			mem := &SplicedMemory{}
 			for _, region := range test.regions {
 				r := bytes.NewReader(region.data)
 				mem.Add(&OffsetReaderAt{r, 0}, region.off, region.length)
 			}
 			got := make([]byte, test.readLen)
 			n, err := mem.ReadMemory(got, test.readAddr)
 			if n != test.readLen || err != nil || !reflect.DeepEqual(got, test.want) {
 				t.Errorf("ReadAt = %v, %v, %v, want %v, %v, %v", n, err, got, test.readLen, nil, test.want)
 			}
 		})
 	}
 }
 func TestReadCore(t *testing.T) {
 	// This is all very fragile and won't work on hosts with non-default core patterns.
 	// Might be better to check in the core?
 	tempDir, err := ioutil.TempDir("", "")
 	if err != nil {
 		t.Fatal(err)
 	}
 	fix := test.BuildFixture("panic")
 	bashCmd := fmt.Sprintf("cd %v && ulimit -c unlimited && GOTRACEBACK=crash %v", tempDir, fix.Path)
 	exec.Command("bash", "-c", bashCmd).Run()
 	corePath := path.Join(tempDir, "core")
 	core, err := readCore(corePath, fix.Path)
 	if err != nil {
 		t.Fatal(err)
 	}
 	if len(core.Threads) == 0 {
 		t.Error("expected at least one thread")
 	}
 	// Punch through the abstraction to verify that we got some mappings.
 	spliced := core.MemoryReader.(*SplicedMemory)
 	// There should be at least an RO section, RW section, RX section, the heap, and a thread stack.
 	if len(spliced.readers) < 5 {
 		t.Errorf("expected at least 5 memory regions, got only %v", len(spliced.readers))
 	}
 	// Would be good to test more stuff but not sure what without reading debug information, etc.
 }