proc: use CPUID to determine ZMM_Hi256 region offset (#3831)

The offset of state component i can be found via
CPUID.(EAX=0DH,ECX=i):EBX. The ZMM_Hi256 is state component 6, so we use
CPUID to enumerate the offset instead of hardcoding.

For core dumps, we guess the ZMM_Hi256 offset based on xcr0 and the
length of xsave region. The logic comes from binutils-gdb.

Fixes #3827.
This commit is contained in:
Chen 2024-10-22 00:16:57 +08:00 committed by GitHub
parent 423644e288
commit bef326c6a5
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 50 additions and 8 deletions

@ -79,7 +79,6 @@ const (
_XSAVE_HEADER_LEN = 64 _XSAVE_HEADER_LEN = 64
_XSAVE_EXTENDED_REGION_START = 576 _XSAVE_EXTENDED_REGION_START = 576
_XSAVE_SSE_REGION_LEN = 416 _XSAVE_SSE_REGION_LEN = 416
_XSAVE_AVX512_ZMM_REGION_START = 1152
) )
// AMD64XstateRead reads a byte array containing an XSAVE area into regset. // AMD64XstateRead reads a byte array containing an XSAVE area into regset.
@ -87,7 +86,8 @@ const (
// contents of the legacy region of the XSAVE area. // contents of the legacy region of the XSAVE area.
// See Section 13.1 (and following) of Intel® 64 and IA-32 Architectures // See Section 13.1 (and following) of Intel® 64 and IA-32 Architectures
// Software Developers Manual, Volume 1: Basic Architecture. // Software Developers Manual, Volume 1: Basic Architecture.
func AMD64XstateRead(xstateargs []byte, readLegacy bool, regset *AMD64Xstate) error { // If xstateZMMHi256Offset is zero, it will be guessed.
func AMD64XstateRead(xstateargs []byte, readLegacy bool, regset *AMD64Xstate, xstateZMMHi256Offset int) error {
if _XSAVE_HEADER_START+_XSAVE_HEADER_LEN >= len(xstateargs) { if _XSAVE_HEADER_START+_XSAVE_HEADER_LEN >= len(xstateargs) {
return nil return nil
} }
@ -120,7 +120,19 @@ func AMD64XstateRead(xstateargs []byte, readLegacy bool, regset *AMD64Xstate) er
return nil return nil
} }
avx512state := xstateargs[_XSAVE_AVX512_ZMM_REGION_START:] if xstateZMMHi256Offset == 0 {
// Guess ZMM_Hi256 component offset
// ref: https://github.com/bminor/binutils-gdb/blob/df89bdf0baf106c3b0a9fae53e4e48607a7f3f87/gdb/i387-tdep.c#L916
if xstate_bv&(1<<9) != 0 && len(xstateargs) == 2440 {
// AMD CPUs supporting PKRU
xstateZMMHi256Offset = 896
} else {
// Intel CPUs supporting AVX512
xstateZMMHi256Offset = 1152
}
}
avx512state := xstateargs[xstateZMMHi256Offset:]
regset.Avx512State = true regset.Avx512State = true
copy(regset.ZmmSpace[:], avx512state[:len(regset.ZmmSpace)]) copy(regset.ZmmSpace[:], avx512state[:len(regset.ZmmSpace)])
@ -180,7 +192,7 @@ func (xstate *AMD64Xstate) SetXmmRegister(n int, value []byte) error {
// Copy bytes [32, 64) to Xsave area // Copy bytes [32, 64) to Xsave area
zmmval := rest zmmval := rest
zmmpos := _XSAVE_AVX512_ZMM_REGION_START + (n * 32) zmmpos := AMD64XstateZMMHi256Offset() + (n * 32)
if zmmpos >= len(xstate.Xsave) { if zmmpos >= len(xstate.Xsave) {
return fmt.Errorf("could not set XMM%d: bytes 32..%d not in XSAVE area", n, 32+len(zmmval)) return fmt.Errorf("could not set XMM%d: bytes 32..%d not in XSAVE area", n, 32+len(zmmval))
} }

@ -28,3 +28,28 @@ func AMD64XstateMaxSize() int {
}) })
return xstateMaxSize return xstateMaxSize
} }
var xstateZMMHi256Offset int
var loadXstateZMMHi256OffsetOnce sync.Once
// AMD64XstateZMMHi256Offset probes ZMM_Hi256 offset of the current CPU. Beware
// that core dumps may be generated from a different CPU.
func AMD64XstateZMMHi256Offset() int {
loadXstateZMMHi256OffsetOnce.Do(func() {
// See Intel 64 and IA-32 Architecture Software Developer's Manual, Vol. 1
// chapter 13.2 and Vol. 2A CPUID instruction for a description of all the
// magic constants.
_, _, cx, _ := cpuid(0x01, 0x00)
if cx&(1<<26) == 0 { // Vol. 2A, Table 3-10, XSAVE enabled bit check
// XSAVE not supported by this processor
xstateZMMHi256Offset = 0
return
}
_, bx, _, _ := cpuid(0x0d, 0x06) // ZMM_Hi256 is component #6
xstateZMMHi256Offset = int(bx)
})
return xstateZMMHi256Offset
}

@ -5,3 +5,8 @@ package amd64util
func AMD64XstateMaxSize() int { func AMD64XstateMaxSize() int {
return _XSTATE_MAX_KNOWN_SIZE return _XSTATE_MAX_KNOWN_SIZE
} }
func AMD64XstateZMMHi256Offset() int {
// AVX-512 not supported
return 0
}

@ -338,7 +338,7 @@ func readNote(r io.ReadSeeker, machineType elf.Machine) (*note, error) {
case _NT_X86_XSTATE: case _NT_X86_XSTATE:
if machineType == _EM_X86_64 { if machineType == _EM_X86_64 {
var fpregs amd64util.AMD64Xstate var fpregs amd64util.AMD64Xstate
if err := amd64util.AMD64XstateRead(desc, true, &fpregs); err != nil { if err := amd64util.AMD64XstateRead(desc, true, &fpregs, 0); err != nil {
return nil, err return nil, err
} }
note.Desc = &fpregs note.Desc = &fpregs

@ -73,7 +73,7 @@ func ptraceGetRegset(id int) (*amd64util.AMD64Xstate, error) {
if err != nil { if err != nil {
return nil, err return nil, err
} }
err = amd64util.AMD64XstateRead(regset.Xsave, false, &regset) err = amd64util.AMD64XstateRead(regset.Xsave, false, &regset, amd64util.AMD64XstateZMMHi256Offset())
return &regset, err return &regset, err
} }

@ -38,7 +38,7 @@ func ptraceGetRegset(tid int) (regset amd64util.AMD64Xstate, err error) {
} }
regset.Xsave = xstateargs[:iov.Len] regset.Xsave = xstateargs[:iov.Len]
err = amd64util.AMD64XstateRead(regset.Xsave, false, &regset) err = amd64util.AMD64XstateRead(regset.Xsave, false, &regset, amd64util.AMD64XstateZMMHi256Offset())
return return
} }

@ -37,6 +37,6 @@ func ptraceGetRegset(tid int) (regset amd64util.AMD64Xstate, err error) {
} }
regset.Xsave = xstateargs[:iov.Len] regset.Xsave = xstateargs[:iov.Len]
err = amd64util.AMD64XstateRead(regset.Xsave, false, &regset) err = amd64util.AMD64XstateRead(regset.Xsave, false, &regset, amd64util.AMD64XstateZMMHi256Offset())
return return
} }