From c938051dd0b80a5c60572d6807270d06ca685d2e Mon Sep 17 00:00:00 2001 From: Keith Randall Date: Tue, 7 Oct 2025 07:58:50 -0700 Subject: [PATCH] Revert "cmd/compile: redo arm64 LR/FP save and restore" This reverts commit 719dfcf8a8478d70360bf3c34c0e920be7b32994. Reason for revert: Causing crashes. Change-Id: I0b8526dd03d82fa074ce4f97f1789eeac702b3eb Reviewed-on: https://go-review.googlesource.com/c/go/+/709755 Reviewed-by: Keith Randall Reviewed-by: David Chase LUCI-TryBot-Result: Go LUCI Auto-Submit: Keith Randall Reviewed-by: Cherry Mui --- src/cmd/compile/abi-internal.md | 12 +- src/cmd/compile/internal/arm64/ggen.go | 10 +- src/cmd/compile/internal/arm64/ssa.go | 2 +- src/cmd/compile/internal/ssagen/pgen.go | 6 - src/cmd/compile/internal/ssagen/ssa.go | 3 +- src/cmd/internal/obj/arm64/asm7.go | 12 +- src/cmd/internal/obj/arm64/obj7.go | 316 ++++++++++++------ src/cmd/link/internal/amd64/obj.go | 19 +- src/cmd/link/internal/arm64/obj.go | 23 +- src/cmd/link/internal/ld/dwarf.go | 7 +- src/cmd/link/internal/ld/lib.go | 4 - src/cmd/link/internal/ld/stackcheck.go | 5 + src/cmd/link/internal/x86/obj.go | 15 +- src/runtime/asm_arm64.s | 81 ++--- src/runtime/mkpreempt.go | 20 +- src/runtime/panic.go | 8 +- src/runtime/preempt_arm64.s | 15 +- src/runtime/race_arm64.s | 17 +- src/runtime/signal_arm64.go | 16 +- src/runtime/stack.go | 20 +- src/runtime/testdata/testprog/badtraceback.go | 5 - src/runtime/traceback.go | 30 +- test/nosplit.go | 8 +- 23 files changed, 356 insertions(+), 298 deletions(-) diff --git a/src/cmd/compile/abi-internal.md b/src/cmd/compile/abi-internal.md index 490e1affb7..eae230dc07 100644 --- a/src/cmd/compile/abi-internal.md +++ b/src/cmd/compile/abi-internal.md @@ -576,19 +576,19 @@ A function's stack frame, after the frame is created, is laid out as follows: +------------------------------+ - | return PC | - | frame pointer on entry | ← R29 points to | ... locals ... | | ... outgoing arguments ... | - | unused word | ← RSP points to + | return PC | ← RSP points to + | frame pointer on entry | +------------------------------+ ↓ lower addresses The "return PC" is loaded to the link register, R30, as part of the arm64 `CALL` operation. -On entry, a function pushes R30 (the return address) and R29 -(the caller's frame pointer) onto the bottom of the stack. It then -subtracts a constant from RSP to open its stack frame. +On entry, a function subtracts from RSP to open its stack frame, and +saves the values of R30 and R29 at the bottom of the frame. +Specifically, R30 is saved at 0(RSP) and R29 is saved at -8(RSP), +after RSP is updated. A leaf function that does not require any stack space may omit the saved R30 and R29. diff --git a/src/cmd/compile/internal/arm64/ggen.go b/src/cmd/compile/internal/arm64/ggen.go index 6ba56b992e..1402746700 100644 --- a/src/cmd/compile/internal/arm64/ggen.go +++ b/src/cmd/compile/internal/arm64/ggen.go @@ -11,12 +11,10 @@ import ( ) func padframe(frame int64) int64 { - // arm64 requires frame sizes here that are 8 mod 16. - // With the additional (unused) slot at the bottom of the frame, - // that makes an aligned 16 byte frame. - // Adding a save region for LR+FP does not change the alignment. - if frame != 0 { - frame += (-(frame + 8)) & 15 + // arm64 requires that the frame size (not counting saved FP&LR) + // be 16 bytes aligned. If not, pad it. + if frame%16 != 0 { + frame += 16 - (frame % 16) } return frame } diff --git a/src/cmd/compile/internal/arm64/ssa.go b/src/cmd/compile/internal/arm64/ssa.go index 9f79a740c6..7bc0e536e9 100644 --- a/src/cmd/compile/internal/arm64/ssa.go +++ b/src/cmd/compile/internal/arm64/ssa.go @@ -221,7 +221,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) { for i := 0; i < len(args); i++ { a := args[i] - // Offset by size of the unused slot before start of args. + // Offset by size of the saved LR slot. addr := ssagen.SpillSlotAddr(a, arm64.REGSP, base.Ctxt.Arch.FixedFrameSize) // Look for double-register operations if we can. if i < len(args)-1 { diff --git a/src/cmd/compile/internal/ssagen/pgen.go b/src/cmd/compile/internal/ssagen/pgen.go index f0776172b9..0a2010363f 100644 --- a/src/cmd/compile/internal/ssagen/pgen.go +++ b/src/cmd/compile/internal/ssagen/pgen.go @@ -393,16 +393,10 @@ func StackOffset(slot ssa.LocalSlot) int32 { case ir.PAUTO: off = n.FrameOffset() if base.Ctxt.Arch.FixedFrameSize == 0 { - // x86 return address off -= int64(types.PtrSize) } if buildcfg.FramePointerEnabled { - // frame pointer off -= int64(types.PtrSize) - if buildcfg.GOARCH == "arm64" { - // arm64 return address also - off -= int64(types.PtrSize) - } } } return int32(off + slot.Off) diff --git a/src/cmd/compile/internal/ssagen/ssa.go b/src/cmd/compile/internal/ssagen/ssa.go index 107447f04c..1e2159579d 100644 --- a/src/cmd/compile/internal/ssagen/ssa.go +++ b/src/cmd/compile/internal/ssagen/ssa.go @@ -7150,7 +7150,6 @@ func defframe(s *State, e *ssafn, f *ssa.Func) { // Insert code to zero ambiguously live variables so that the // garbage collector only sees initialized values when it // looks for pointers. - // Note: lo/hi are offsets from varp and will be negative. var lo, hi int64 // Opaque state for backend to use. Current backends use it to @@ -7158,7 +7157,7 @@ func defframe(s *State, e *ssafn, f *ssa.Func) { var state uint32 // Iterate through declarations. Autos are sorted in decreasing - // frame offset order (least negative to most negative). + // frame offset order. for _, n := range e.curfn.Dcl { if !n.Needzero() { continue diff --git a/src/cmd/internal/obj/arm64/asm7.go b/src/cmd/internal/obj/arm64/asm7.go index 281d705a3e..743d09a319 100644 --- a/src/cmd/internal/obj/arm64/asm7.go +++ b/src/cmd/internal/obj/arm64/asm7.go @@ -51,6 +51,7 @@ type ctxt7 struct { blitrl *obj.Prog elitrl *obj.Prog autosize int32 + extrasize int32 instoffset int64 pc int64 pool struct { @@ -1121,7 +1122,8 @@ func span7(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { ctxt.Diag("arm64 ops not initialized, call arm64.buildop first") } - c := ctxt7{ctxt: ctxt, newprog: newprog, cursym: cursym, autosize: int32(p.To.Offset)} + c := ctxt7{ctxt: ctxt, newprog: newprog, cursym: cursym, autosize: int32(p.To.Offset & 0xffffffff), extrasize: int32(p.To.Offset >> 32)} + p.To.Offset &= 0xffffffff // extrasize is no longer needed // Process literal pool and allocate initial program counter for each Prog, before // generating branch veneers. @@ -2117,8 +2119,8 @@ func (c *ctxt7) aclass(a *obj.Addr) int { // a.Offset is still relative to pseudo-SP. a.Reg = obj.REG_NONE } - // The frame top 16 bytes are for LR/FP - c.instoffset = int64(c.autosize) + a.Offset - extrasize + // The frame top 8 or 16 bytes are for FP + c.instoffset = int64(c.autosize) + a.Offset - int64(c.extrasize) return autoclass(c.instoffset) case obj.NAME_PARAM: @@ -2178,8 +2180,8 @@ func (c *ctxt7) aclass(a *obj.Addr) int { // a.Offset is still relative to pseudo-SP. a.Reg = obj.REG_NONE } - // The frame top 16 bytes are for LR/FP - c.instoffset = int64(c.autosize) + a.Offset - extrasize + // The frame top 8 or 16 bytes are for FP + c.instoffset = int64(c.autosize) + a.Offset - int64(c.extrasize) case obj.NAME_PARAM: if a.Reg == REGSP { diff --git a/src/cmd/internal/obj/arm64/obj7.go b/src/cmd/internal/obj/arm64/obj7.go index a697426145..2583e46354 100644 --- a/src/cmd/internal/obj/arm64/obj7.go +++ b/src/cmd/internal/obj/arm64/obj7.go @@ -36,6 +36,7 @@ import ( "cmd/internal/src" "cmd/internal/sys" "internal/abi" + "internal/buildcfg" "log" "math" ) @@ -471,8 +472,6 @@ func (c *ctxt7) rewriteToUseGot(p *obj.Prog) { obj.Nopout(p) } -const extrasize = 16 // space needed in the frame for LR+FP - func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { if cursym.Func().Text == nil || cursym.Func().Text.Link == nil { return @@ -522,26 +521,33 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { c.autosize = int32(textstksiz) if p.Mark&LEAF != 0 && c.autosize == 0 { - // A leaf function with no locals needs no frame. + // A leaf function with no locals has no frame. p.From.Sym.Set(obj.AttrNoFrame, true) } if !p.From.Sym.NoFrame() { // If there is a stack frame at all, it includes - // space for the (now unused) word at [SP:SP+8]. + // space to save the LR. c.autosize += 8 } - // Round up to a multiple of 16. - c.autosize += (-c.autosize) & 15 - if c.autosize != 0 { - // Allocate an extra 16 bytes at the top of the frame - // to save LR+FP. + extrasize := int32(0) + if c.autosize%16 == 8 { + // Allocate extra 8 bytes on the frame top to save FP + extrasize = 8 + } else if c.autosize&(16-1) == 0 { + // Allocate extra 16 bytes to save FP for the old frame whose size is 8 mod 16 + extrasize = 16 + } else { + c.ctxt.Diag("%v: unaligned frame size %d - must be 16 aligned", p, c.autosize-8) + } c.autosize += extrasize c.cursym.Func().Locals += extrasize - p.To.Offset = int64(c.autosize) + // low 32 bits for autosize + // high 32 bits for extrasize + p.To.Offset = int64(c.autosize) | int64(extrasize)<<32 } else { // NOFRAME p.To.Offset = 0 @@ -574,72 +580,120 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { var prologueEnd *obj.Prog aoffset := c.autosize - if aoffset < 16 { - log.Fatalf("aoffset too small %d", aoffset) + if aoffset > 0xf0 { + // MOVD.W offset variant range is -0x100 to 0xf8, SP should be 16-byte aligned. + // so the maximum aoffset value is 0xf0. + aoffset = 0xf0 } + // Frame is non-empty. Make sure to save link register, even if + // it is a leaf function, so that traceback works. q = p + if c.autosize > aoffset { + // Frame size is too large for a MOVD.W instruction. Store the frame pointer + // register and link register before decrementing SP, so if a signal comes + // during the execution of the function prologue, the traceback code will + // not see a half-updated stack frame. - // Store return address and frame pointer at the top of the stack frame. - // STP.W (R29, R30), -16(SP) - q1 = obj.Appendp(q, c.newprog) - q1.Pos = p.Pos - q1.As = ASTP - q1.From.Type = obj.TYPE_REGREG - q1.From.Reg = REGFP - q1.From.Offset = REGLINK - q1.To.Type = obj.TYPE_MEM - q1.To.Reg = REG_RSP - q1.To.Offset = -16 - q1.Scond = C_XPRE + // SUB $autosize, RSP, R20 + q1 = obj.Appendp(q, c.newprog) + q1.Pos = p.Pos + q1.As = ASUB + q1.From.Type = obj.TYPE_CONST + q1.From.Offset = int64(c.autosize) + q1.Reg = REGSP + q1.To.Type = obj.TYPE_REG + q1.To.Reg = REG_R20 - prologueEnd = q1 + prologueEnd = q1 - // Update frame pointer - q1 = obj.Appendp(q1, c.newprog) - q1.Pos = p.Pos - q1.As = AMOVD - q1.From.Type = obj.TYPE_REG - q1.From.Reg = REGSP - q1.To.Type = obj.TYPE_REG - q1.To.Reg = REGFP + // STP (R29, R30), -8(R20) + q1 = obj.Appendp(q1, c.newprog) + q1.Pos = p.Pos + q1.As = ASTP + q1.From.Type = obj.TYPE_REGREG + q1.From.Reg = REGFP + q1.From.Offset = REGLINK + q1.To.Type = obj.TYPE_MEM + q1.To.Reg = REG_R20 + q1.To.Offset = -8 - // Allocate additional frame space. - adj := aoffset - 16 - if adj > 0 { - // SUB $autosize-16, RSP - if adj < 1<<12 { + // This is not async preemptible, as if we open a frame + // at the current SP, it will clobber the saved LR. + q1 = c.ctxt.StartUnsafePoint(q1, c.newprog) + + // MOVD R20, RSP + q1 = obj.Appendp(q1, c.newprog) + q1.Pos = p.Pos + q1.As = AMOVD + q1.From.Type = obj.TYPE_REG + q1.From.Reg = REG_R20 + q1.To.Type = obj.TYPE_REG + q1.To.Reg = REGSP + q1.Spadj = c.autosize + + q1 = c.ctxt.EndUnsafePoint(q1, c.newprog, -1) + + if buildcfg.GOOS == "ios" { + // iOS does not support SA_ONSTACK. We will run the signal handler + // on the G stack. If we write below SP, it may be clobbered by + // the signal handler. So we save FP and LR after decrementing SP. + // STP (R29, R30), -8(RSP) q1 = obj.Appendp(q1, c.newprog) q1.Pos = p.Pos - q1.As = ASUB - q1.From.Type = obj.TYPE_CONST - q1.From.Offset = int64(adj) - q1.To.Type = obj.TYPE_REG - q1.To.Reg = REGSP - } else { - // Constant too big for atomic subtract. - // Materialize in tmp register first. - q1 = obj.Appendp(q1, c.newprog) - q1.Pos = p.Pos - q1.As = AMOVD - q1.From.Type = obj.TYPE_CONST - q1.From.Offset = int64(adj) - q1.To.Type = obj.TYPE_REG - q1.To.Reg = REGTMP - - q1 = obj.Appendp(q1, c.newprog) - q1.Pos = p.Pos - q1.As = ASUB - q1.From.Type = obj.TYPE_REG - q1.From.Reg = REGTMP - q1.To.Type = obj.TYPE_REG + q1.As = ASTP + q1.From.Type = obj.TYPE_REGREG + q1.From.Reg = REGFP + q1.From.Offset = REGLINK + q1.To.Type = obj.TYPE_MEM q1.To.Reg = REGSP + q1.To.Offset = -8 } - q1.Spadj = adj + } else { + // small frame, update SP and save LR in a single MOVD.W instruction. + // So if a signal comes during the execution of the function prologue, + // the traceback code will not see a half-updated stack frame. + // Also, on Linux, in a cgo binary we may get a SIGSETXID signal + // early on before the signal stack is set, as glibc doesn't allow + // us to block SIGSETXID. So it is important that we don't write below + // the SP until the signal stack is set. + // Luckily, all the functions from thread entry to setting the signal + // stack have small frames. + q1 = obj.Appendp(q, c.newprog) + q1.As = AMOVD + q1.Pos = p.Pos + q1.From.Type = obj.TYPE_REG + q1.From.Reg = REGLINK + q1.To.Type = obj.TYPE_MEM + q1.Scond = C_XPRE + q1.To.Offset = int64(-aoffset) + q1.To.Reg = REGSP + q1.Spadj = aoffset + + prologueEnd = q1 + + // Frame pointer. + q1 = obj.Appendp(q1, c.newprog) + q1.Pos = p.Pos + q1.As = AMOVD + q1.From.Type = obj.TYPE_REG + q1.From.Reg = REGFP + q1.To.Type = obj.TYPE_MEM + q1.To.Reg = REGSP + q1.To.Offset = -8 } prologueEnd.Pos = prologueEnd.Pos.WithXlogue(src.PosPrologueEnd) + q1 = obj.Appendp(q1, c.newprog) + q1.Pos = p.Pos + q1.As = ASUB + q1.From.Type = obj.TYPE_CONST + q1.From.Offset = 8 + q1.Reg = REGSP + q1.To.Type = obj.TYPE_REG + q1.To.Reg = REGFP + case obj.ARET: nocache(p) if p.From.Type == obj.TYPE_CONST { @@ -653,56 +707,105 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { } p.To = obj.Addr{} aoffset := c.autosize - if aoffset > 0 { - if aoffset < 16 { - log.Fatalf("aoffset too small %d", aoffset) - } - adj := aoffset - 16 - if adj > 0 { - if adj < 1<<12 { - // ADD $adj, RSP, RSP - p.As = AADD - p.From.Type = obj.TYPE_CONST - p.From.Offset = int64(adj) - p.To.Type = obj.TYPE_REG - p.To.Reg = REGSP - } else { - // Put frame size in a separate register and - // add it in with a single instruction, - // so we never have a partial frame during - // the epilog. See issue 73259. + if c.cursym.Func().Text.Mark&LEAF != 0 { + if aoffset != 0 { + // Restore frame pointer. + // ADD $framesize-8, RSP, R29 + p.As = AADD + p.From.Type = obj.TYPE_CONST + p.From.Offset = int64(c.autosize) - 8 + p.Reg = REGSP + p.To.Type = obj.TYPE_REG + p.To.Reg = REGFP - // MOVD $adj, REGTMP - p.As = AMOVD - p.From.Type = obj.TYPE_CONST - p.From.Offset = int64(adj) - p.To.Type = obj.TYPE_REG - p.To.Reg = REGTMP - // ADD REGTMP, RSP, RSP - p = obj.Appendp(p, c.newprog) - p.As = AADD - p.From.Type = obj.TYPE_REG - p.From.Reg = REGTMP - p.To.Type = obj.TYPE_REG - p.To.Reg = REGSP - } - p.Spadj = -adj - } - - // Pop LR+FP. - // LDP.P 16(RSP), (R29, R30) - if p.As != obj.ARET { + // Pop stack frame. + // ADD $framesize, RSP, RSP p = obj.Appendp(p, c.newprog) + p.As = AADD + p.From.Type = obj.TYPE_CONST + p.From.Offset = int64(c.autosize) + p.To.Type = obj.TYPE_REG + p.To.Reg = REGSP + p.Spadj = -c.autosize } - p.As = ALDP + } else if aoffset <= 0xF0 { + // small frame, restore LR and update SP in a single MOVD.P instruction. + // There is no correctness issue to use a single LDP for LR and FP, + // but the instructions are not pattern matched with the prologue's + // MOVD.W and MOVD, which may cause performance issue in + // store-forwarding. + + // MOVD -8(RSP), R29 + p.As = AMOVD p.From.Type = obj.TYPE_MEM p.From.Reg = REGSP - p.From.Offset = 16 + p.From.Offset = -8 + p.To.Type = obj.TYPE_REG + p.To.Reg = REGFP + p = obj.Appendp(p, c.newprog) + + // MOVD.P offset(RSP), R30 + p.As = AMOVD + p.From.Type = obj.TYPE_MEM p.Scond = C_XPOST + p.From.Offset = int64(aoffset) + p.From.Reg = REGSP + p.To.Type = obj.TYPE_REG + p.To.Reg = REGLINK + p.Spadj = -aoffset + } else { + // LDP -8(RSP), (R29, R30) + p.As = ALDP + p.From.Type = obj.TYPE_MEM + p.From.Offset = -8 + p.From.Reg = REGSP p.To.Type = obj.TYPE_REGREG p.To.Reg = REGFP p.To.Offset = REGLINK - p.Spadj = -16 + + if aoffset < 1<<12 { + // ADD $aoffset, RSP, RSP + q = newprog() + q.As = AADD + q.From.Type = obj.TYPE_CONST + q.From.Offset = int64(aoffset) + q.To.Type = obj.TYPE_REG + q.To.Reg = REGSP + q.Spadj = -aoffset + q.Pos = p.Pos + q.Link = p.Link + p.Link = q + p = q + } else { + // Put frame size in a separate register and + // add it in with a single instruction, + // so we never have a partial frame during + // the epilog. See issue 73259. + + // MOVD $aoffset, REGTMP + q = newprog() + q.As = AMOVD + q.From.Type = obj.TYPE_CONST + q.From.Offset = int64(aoffset) + q.To.Type = obj.TYPE_REG + q.To.Reg = REGTMP + q.Pos = p.Pos + q.Link = p.Link + p.Link = q + p = q + // ADD REGTMP, RSP, RSP + q = newprog() + q.As = AADD + q.From.Type = obj.TYPE_REG + q.From.Reg = REGTMP + q.To.Type = obj.TYPE_REG + q.To.Reg = REGSP + q.Spadj = -aoffset + q.Pos = p.Pos + q.Link = p.Link + p.Link = q + p = q + } } // If enabled, this code emits 'MOV PC, R27' before every 'MOV LR, PC', @@ -765,11 +868,10 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { p.From.Type = obj.TYPE_REG p.From.Reg = REGLINK } else { - /* MOVD framesize-8(RSP), Rd */ + /* MOVD (RSP), Rd */ p.As = AMOVD p.From.Type = obj.TYPE_MEM p.From.Reg = REGSP - p.From.Offset = int64(c.autosize - 8) } } if p.To.Type == obj.TYPE_REG && p.To.Reg == REGSP && p.Spadj == 0 { @@ -804,12 +906,6 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { p.From.Reg = int16(REG_LSL + r + (shift&7)<<5) p.From.Offset = 0 } - if p.To.Type == obj.TYPE_MEM && p.To.Reg == REG_RSP && (p.Scond == C_XPRE || p.Scond == C_XPOST) { - p.Spadj += int32(-p.To.Offset) - } - if p.From.Type == obj.TYPE_MEM && p.From.Reg == REG_RSP && (p.Scond == C_XPRE || p.Scond == C_XPOST) { - p.Spadj += int32(-p.From.Offset) - } } } diff --git a/src/cmd/link/internal/amd64/obj.go b/src/cmd/link/internal/amd64/obj.go index 761496549f..3a6141b909 100644 --- a/src/cmd/link/internal/amd64/obj.go +++ b/src/cmd/link/internal/amd64/obj.go @@ -51,16 +51,15 @@ func Init() (*sys.Arch, ld.Arch) { Plan9Magic: uint32(4*26*26 + 7), Plan9_64Bit: true, - Adddynrel: adddynrel, - Archinit: archinit, - Archreloc: archreloc, - Archrelocvariant: archrelocvariant, - Gentext: gentext, - Machoreloc1: machoreloc1, - MachorelocSize: 8, - PEreloc1: pereloc1, - TLSIEtoLE: tlsIEtoLE, - ReturnAddressAtTopOfFrame: true, + Adddynrel: adddynrel, + Archinit: archinit, + Archreloc: archreloc, + Archrelocvariant: archrelocvariant, + Gentext: gentext, + Machoreloc1: machoreloc1, + MachorelocSize: 8, + PEreloc1: pereloc1, + TLSIEtoLE: tlsIEtoLE, ELF: ld.ELFArch{ Linuxdynld: "/lib64/ld-linux-x86-64.so.2", diff --git a/src/cmd/link/internal/arm64/obj.go b/src/cmd/link/internal/arm64/obj.go index e1e4ade818..3d358155ba 100644 --- a/src/cmd/link/internal/arm64/obj.go +++ b/src/cmd/link/internal/arm64/obj.go @@ -47,18 +47,17 @@ func Init() (*sys.Arch, ld.Arch) { Dwarfreglr: dwarfRegLR, TrampLimit: 0x7c00000, // 26-bit signed offset * 4, leave room for PLT etc. - Adddynrel: adddynrel, - Archinit: archinit, - Archreloc: archreloc, - Archrelocvariant: archrelocvariant, - Extreloc: extreloc, - Gentext: gentext, - GenSymsLate: gensymlate, - Machoreloc1: machoreloc1, - MachorelocSize: 8, - PEreloc1: pereloc1, - Trampoline: trampoline, - ReturnAddressAtTopOfFrame: true, + Adddynrel: adddynrel, + Archinit: archinit, + Archreloc: archreloc, + Archrelocvariant: archrelocvariant, + Extreloc: extreloc, + Gentext: gentext, + GenSymsLate: gensymlate, + Machoreloc1: machoreloc1, + MachorelocSize: 8, + PEreloc1: pereloc1, + Trampoline: trampoline, ELF: ld.ELFArch{ Androiddynld: "/system/bin/linker64", diff --git a/src/cmd/link/internal/ld/dwarf.go b/src/cmd/link/internal/ld/dwarf.go index c4d12a5488..0003938ef2 100644 --- a/src/cmd/link/internal/ld/dwarf.go +++ b/src/cmd/link/internal/ld/dwarf.go @@ -1544,14 +1544,9 @@ func (d *dwctxt) writeframes(fs loader.Sym) dwarfSecInfo { if pcsp.Value > 0 { // The return address is preserved at (CFA-frame_size) // after a stack frame has been allocated. - off := -spdelta - if thearch.ReturnAddressAtTopOfFrame { - // Except arm64, which has it at the top of frame. - off = -int64(d.arch.PtrSize) - } deltaBuf = append(deltaBuf, dwarf.DW_CFA_offset_extended_sf) deltaBuf = dwarf.AppendUleb128(deltaBuf, uint64(thearch.Dwarfreglr)) - deltaBuf = dwarf.AppendSleb128(deltaBuf, off/dataAlignmentFactor) + deltaBuf = dwarf.AppendSleb128(deltaBuf, -spdelta/dataAlignmentFactor) } else { // The return address is restored into the link register // when a stack frame has been de-allocated. diff --git a/src/cmd/link/internal/ld/lib.go b/src/cmd/link/internal/ld/lib.go index 5f5ebfc1d9..2c861129b5 100644 --- a/src/cmd/link/internal/ld/lib.go +++ b/src/cmd/link/internal/ld/lib.go @@ -263,10 +263,6 @@ type Arch struct { // optional override for assignAddress AssignAddress func(ldr *loader.Loader, sect *sym.Section, n int, s loader.Sym, va uint64, isTramp bool) (*sym.Section, int, uint64) - // Reports whether the return address is stored at the top (highest address) - // of the stack frame. - ReturnAddressAtTopOfFrame bool - // ELF specific information. ELF ELFArch } diff --git a/src/cmd/link/internal/ld/stackcheck.go b/src/cmd/link/internal/ld/stackcheck.go index 14cd3a2238..98e7edaeb1 100644 --- a/src/cmd/link/internal/ld/stackcheck.go +++ b/src/cmd/link/internal/ld/stackcheck.go @@ -9,6 +9,7 @@ import ( "cmd/internal/objabi" "cmd/link/internal/loader" "fmt" + "internal/buildcfg" "sort" "strings" ) @@ -61,6 +62,10 @@ func (ctxt *Link) doStackCheck() { // that there are at least StackLimit bytes available below SP // when morestack returns. limit := objabi.StackNosplit(*flagRace) - sc.callSize + if buildcfg.GOARCH == "arm64" { + // Need an extra 8 bytes below SP to save FP. + limit -= 8 + } // Compute stack heights without any back-tracking information. // This will almost certainly succeed and we can simply diff --git a/src/cmd/link/internal/x86/obj.go b/src/cmd/link/internal/x86/obj.go index a4885fde8f..4336f01ea3 100644 --- a/src/cmd/link/internal/x86/obj.go +++ b/src/cmd/link/internal/x86/obj.go @@ -50,14 +50,13 @@ func Init() (*sys.Arch, ld.Arch) { Plan9Magic: uint32(4*11*11 + 7), - Adddynrel: adddynrel, - Archinit: archinit, - Archreloc: archreloc, - Archrelocvariant: archrelocvariant, - Gentext: gentext, - Machoreloc1: machoreloc1, - PEreloc1: pereloc1, - ReturnAddressAtTopOfFrame: true, + Adddynrel: adddynrel, + Archinit: archinit, + Archreloc: archreloc, + Archrelocvariant: archrelocvariant, + Gentext: gentext, + Machoreloc1: machoreloc1, + PEreloc1: pereloc1, ELF: ld.ELFArch{ Linuxdynld: "/lib/ld-linux.so.2", diff --git a/src/runtime/asm_arm64.s b/src/runtime/asm_arm64.s index aa49a27a75..a0e82ec830 100644 --- a/src/runtime/asm_arm64.s +++ b/src/runtime/asm_arm64.s @@ -50,7 +50,9 @@ TEXT _rt0_arm64_lib(SB),NOSPLIT,$184 CBZ R4, nocgo MOVD $_rt0_arm64_lib_go(SB), R0 MOVD $0, R1 + SUB $16, RSP // reserve 16 bytes for sp-8 where fp may be saved. BL (R4) + ADD $16, RSP B restore nocgo: @@ -369,6 +371,7 @@ switch: BL runtime·save_g(SB) MOVD (g_sched+gobuf_sp)(g), R0 MOVD R0, RSP + MOVD (g_sched+gobuf_bp)(g), R29 MOVD $0, (g_sched+gobuf_sp)(g) MOVD $0, (g_sched+gobuf_bp)(g) RET @@ -378,8 +381,8 @@ noswitch: // Using a tail call here cleans up tracebacks since we won't stop // at an intermediate systemstack. MOVD 0(R26), R3 // code pointer - ADD $16, RSP - LDP.P 16(RSP), (R29,R30) // restore FP, LR + MOVD.P 16(RSP), R30 // restore LR + SUB $8, RSP, R29 // restore FP B (R3) // func switchToCrashStack0(fn func()) @@ -1048,7 +1051,7 @@ again: // Smashes R0. TEXT gosave_systemstack_switch<>(SB),NOSPLIT|NOFRAME,$0 MOVD $runtime·systemstack_switch(SB), R0 - ADD $12, R0 // get past prologue + ADD $8, R0 // get past prologue MOVD R0, (g_sched+gobuf_pc)(g) MOVD RSP, R0 MOVD R0, (g_sched+gobuf_sp)(g) @@ -1066,7 +1069,9 @@ TEXT gosave_systemstack_switch<>(SB),NOSPLIT|NOFRAME,$0 TEXT ·asmcgocall_no_g(SB),NOSPLIT,$0-16 MOVD fn+0(FP), R1 MOVD arg+8(FP), R0 + SUB $16, RSP // skip over saved frame pointer below RSP BL (R1) + ADD $16, RSP // skip over saved frame pointer below RSP RET // func asmcgocall(fn, arg unsafe.Pointer) int32 @@ -1231,9 +1236,9 @@ havem: BL runtime·save_g(SB) MOVD (g_sched+gobuf_sp)(g), R4 // prepare stack as R4 MOVD (g_sched+gobuf_pc)(g), R5 - MOVD R5, -8(R4) + MOVD R5, -48(R4) MOVD (g_sched+gobuf_bp)(g), R5 - MOVD R5, -16(R4) + MOVD R5, -56(R4) // Gather our arguments into registers. MOVD fn+0(FP), R1 MOVD frame+8(FP), R2 @@ -1247,7 +1252,7 @@ havem: CALL (R0) // indirect call to bypass nosplit check. We're on a different stack now. // Restore g->sched (== m->curg->sched) from saved values. - MOVD 40(RSP), R5 + MOVD 0(RSP), R5 MOVD R5, (g_sched+gobuf_pc)(g) MOVD RSP, R4 ADD $48, R4, R4 @@ -1485,57 +1490,10 @@ GLOBL debugCallFrameTooLarge<>(SB), RODATA, $20 // Size duplicated below // // This is ABIInternal because Go code injects its PC directly into new // goroutine stacks. -// -// State before debugger starts doing anything: -// | current | -// | stack | -// +-------------+ <- SP = origSP -// stopped executing at PC = origPC -// some values are in LR (origLR) and FP (origFP) -// -// After debugger has done steps 1-6 above: -// | current | -// | stack | -// +-------------+ <- origSP -// | ----- | (used to be a slot to store frame pointer on entry to origPC's frame.) -// +-------------+ -// | origLR | -// +-------------+ <- SP -// | ----- | -// +-------------+ -// | argsize | -// +-------------+ -// LR = origPC, PC = debugCallV2 -// -// debugCallV2 then modifies the stack up to the "good" label: -// | current | -// | stack | -// +-------------+ <- origSP -// | ----- | (used to be a slot to store frame pointer on entry to origPC's frame.) -// +-------------+ -// | origLR | -// +-------------+ <- where debugger left SP -// | origPC | -// +-------------+ -// | origFP | -// +-------------+ <- FP = SP + 256 -// | saved | -// | registers | -// | (224 bytes) | -// +-------------+ <- SP + 32 -// | space for | -// | outargs | -// +-------------+ <- SP + 8 -// | argsize | -// +-------------+ <- SP - TEXT runtime·debugCallV2(SB),NOSPLIT|NOFRAME,$0-0 - MOVD R30, -8(RSP) // save origPC - MOVD -16(RSP), R30 // save argsize in R30 temporarily - MOVD.W R29, -16(RSP) // push origFP - MOVD RSP, R29 // frame pointer chain now set up - SUB $256, RSP, RSP // allocate frame - MOVD R30, (RSP) // Save argsize on the stack + STP (R29, R30), -280(RSP) + SUB $272, RSP, RSP + SUB $8, RSP, R29 // Save all registers that may contain pointers so they can be // conservatively scanned. // @@ -1557,8 +1515,7 @@ TEXT runtime·debugCallV2(SB),NOSPLIT|NOFRAME,$0-0 STP (R0, R1), (4*8)(RSP) // Perform a safe-point check. - MOVD 264(RSP), R0 // origPC - MOVD R0, 8(RSP) + MOVD R30, 8(RSP) // Caller's PC CALL runtime·debugCallCheck(SB) MOVD 16(RSP), R0 CBZ R0, good @@ -1602,7 +1559,7 @@ good: CALL runtime·debugCallWrap(SB); \ JMP restore - MOVD (RSP), R0 // the argument frame size + MOVD 256(RSP), R0 // the argument frame size DEBUG_CALL_DISPATCH(debugCall32<>, 32) DEBUG_CALL_DISPATCH(debugCall64<>, 64) DEBUG_CALL_DISPATCH(debugCall128<>, 128) @@ -1650,9 +1607,9 @@ restore: LDP (6*8)(RSP), (R2, R3) LDP (4*8)(RSP), (R0, R1) - MOVD 272(RSP), R30 // restore old lr (saved by (*sigctxt).pushCall) - LDP 256(RSP), (R29, R27) // restore old fp, set up resumption address - ADD $288, RSP, RSP // Pop frame, LR+FP, and block pushed by (*sigctxt).pushCall + LDP -8(RSP), (R29, R27) + ADD $288, RSP, RSP // Add 16 more bytes, see saveSigContext + MOVD -16(RSP), R30 // restore old lr JMP (R27) // runtime.debugCallCheck assumes that functions defined with the diff --git a/src/runtime/mkpreempt.go b/src/runtime/mkpreempt.go index 9064cae039..769c4ffc5c 100644 --- a/src/runtime/mkpreempt.go +++ b/src/runtime/mkpreempt.go @@ -488,18 +488,26 @@ func genARM64(g *gen) { l.stack += 8 // SP needs 16-byte alignment } - // allocate frame, save PC (in R30), FP (in R29) of interrupted instruction - p("STP.W (R29, R30), -16(RSP)") - p("MOVD RSP, R29") // set up new frame pointer + // allocate frame, save PC of interrupted instruction (in LR) + p("MOVD R30, %d(RSP)", -l.stack) p("SUB $%d, RSP", l.stack) + p("MOVD R29, -8(RSP)") // save frame pointer (only used on Linux) + p("SUB $8, RSP, R29") // set up new frame pointer + // On iOS, save the LR again after decrementing SP. We run the + // signal handler on the G stack (as it doesn't support sigaltstack), + // so any writes below SP may be clobbered. + p("#ifdef GOOS_ios") + p("MOVD R30, (RSP)") + p("#endif") l.save(g) p("CALL ·asyncPreempt2(SB)") l.restore(g) - p("MOVD %d(RSP), R30", l.stack+16) // sigctxt.pushCall has pushed LR (at interrupt) on stack, restore it - p("LDP %d(RSP), (R29, R27)", l.stack) // Restore frame pointer. Load PC into regtmp. - p("ADD $%d, RSP", l.stack+32) // pop frame (including the space pushed by sigctxt.pushCall) + p("MOVD %d(RSP), R30", l.stack) // sigctxt.pushCall has pushed LR (at interrupt) on stack, restore it + p("MOVD -8(RSP), R29") // restore frame pointer + p("MOVD (RSP), R27") // load PC to REGTMP + p("ADD $%d, RSP", l.stack+16) // pop frame (including the space pushed by sigctxt.pushCall) p("RET (R27)") } diff --git a/src/runtime/panic.go b/src/runtime/panic.go index 04b3afe168..8c91c9435a 100644 --- a/src/runtime/panic.go +++ b/src/runtime/panic.go @@ -1379,10 +1379,10 @@ func recovery(gp *g) { // the caller gp.sched.bp = fp - 2*goarch.PtrSize case goarch.IsArm64 != 0: - // on arm64, the first two words of the frame are caller's PC - // (the saved LR register) and the caller's BP. - // Coincidentally, the same as amd64. - gp.sched.bp = fp - 2*goarch.PtrSize + // on arm64, the architectural bp points one word higher + // than the sp. fp is totally useless to us here, because it + // only gets us to the caller's fp. + gp.sched.bp = sp - goarch.PtrSize } gogo(&gp.sched) } diff --git a/src/runtime/preempt_arm64.s b/src/runtime/preempt_arm64.s index f4248cac25..31ec9d940f 100644 --- a/src/runtime/preempt_arm64.s +++ b/src/runtime/preempt_arm64.s @@ -4,9 +4,13 @@ #include "textflag.h" TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0 - STP.W (R29, R30), -16(RSP) - MOVD RSP, R29 + MOVD R30, -496(RSP) SUB $496, RSP + MOVD R29, -8(RSP) + SUB $8, RSP, R29 + #ifdef GOOS_ios + MOVD R30, (RSP) + #endif STP (R0, R1), 8(RSP) STP (R2, R3), 24(RSP) STP (R4, R5), 40(RSP) @@ -74,7 +78,8 @@ TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0 LDP 40(RSP), (R4, R5) LDP 24(RSP), (R2, R3) LDP 8(RSP), (R0, R1) - MOVD 512(RSP), R30 - LDP 496(RSP), (R29, R27) - ADD $528, RSP + MOVD 496(RSP), R30 + MOVD -8(RSP), R29 + MOVD (RSP), R27 + ADD $512, RSP RET (R27) diff --git a/src/runtime/race_arm64.s b/src/runtime/race_arm64.s index feaa328d4c..5df650105b 100644 --- a/src/runtime/race_arm64.s +++ b/src/runtime/race_arm64.s @@ -397,7 +397,7 @@ TEXT racecallatomic<>(SB), NOSPLIT, $0 // R3 = addr of incoming arg list // Trigger SIGSEGV early. - MOVD 72(RSP), R3 // 1st arg is addr. after two small frames (32 bytes each), get it at 72(RSP) + MOVD 40(RSP), R3 // 1st arg is addr. after two times BL, get it at 40(RSP) MOVB (R3), R13 // segv here if addr is bad // Check that addr is within [arenastart, arenaend) or within [racedatastart, racedataend). MOVD runtime·racearenastart(SB), R10 @@ -417,11 +417,10 @@ racecallatomic_ok: // Addr is within the good range, call the atomic function. load_g MOVD g_racectx(g), R0 // goroutine context - MOVD 56(RSP), R1 // caller pc + MOVD 16(RSP), R1 // caller pc MOVD R9, R2 // pc - ADD $72, RSP, R3 - BL racecall<>(SB) - RET + ADD $40, RSP, R3 + JMP racecall<>(SB) // does not return racecallatomic_ignore: // Addr is outside the good range. // Call __tsan_go_ignore_sync_begin to ignore synchronization during the atomic op. @@ -436,9 +435,9 @@ racecallatomic_ignore: // racecall will call LLVM race code which might clobber R28 (g) load_g MOVD g_racectx(g), R0 // goroutine context - MOVD 56(RSP), R1 // caller pc + MOVD 16(RSP), R1 // caller pc MOVD R9, R2 // pc - ADD $72, RSP, R3 // arguments + ADD $40, RSP, R3 // arguments BL racecall<>(SB) // Call __tsan_go_ignore_sync_end. MOVD $__tsan_go_ignore_sync_end(SB), R9 @@ -477,6 +476,10 @@ TEXT racecall<>(SB), NOSPLIT|NOFRAME, $0-0 MOVD (g_sched+gobuf_sp)(R11), R12 MOVD R12, RSP call: + // Decrement SP past where the frame pointer is saved in the Go arm64 + // ABI (one word below the stack pointer) so the race detector library + // code doesn't clobber it + SUB $16, RSP BL R9 MOVD R19, RSP JMP (R20) diff --git a/src/runtime/signal_arm64.go b/src/runtime/signal_arm64.go index 61dad50721..af7d29f9de 100644 --- a/src/runtime/signal_arm64.go +++ b/src/runtime/signal_arm64.go @@ -8,6 +8,7 @@ package runtime import ( "internal/abi" + "internal/goarch" "internal/runtime/sys" "unsafe" ) @@ -62,11 +63,18 @@ func (c *sigctxt) preparePanic(sig uint32, gp *g) { // We arrange lr, and pc to pretend the panicking // function calls sigpanic directly. // Always save LR to stack so that panics in leaf - // functions are correctly handled. - // This extra space is known to gentraceback. + // functions are correctly handled. This smashes + // the stack frame but we're not going back there + // anyway. sp := c.sp() - sys.StackAlign // needs only sizeof uint64, but must align the stack c.set_sp(sp) *(*uint64)(unsafe.Pointer(uintptr(sp))) = c.lr() + // Make sure a valid frame pointer is saved on the stack so that the + // frame pointer checks in adjustframe are happy, if they're enabled. + // Frame pointer unwinding won't visit the sigpanic frame, since + // sigpanic will save the same frame pointer before calling into a panic + // function. + *(*uint64)(unsafe.Pointer(uintptr(sp - goarch.PtrSize))) = c.r29() pc := gp.sigpc @@ -88,6 +96,10 @@ func (c *sigctxt) pushCall(targetPC, resumePC uintptr) { sp := c.sp() - 16 // SP needs 16-byte alignment c.set_sp(sp) *(*uint64)(unsafe.Pointer(uintptr(sp))) = c.lr() + // Make sure a valid frame pointer is saved on the stack so that the + // frame pointer checks in adjustframe are happy, if they're enabled. + // This is not actually used for unwinding. + *(*uint64)(unsafe.Pointer(uintptr(sp - goarch.PtrSize))) = c.r29() // Set up PC and LR to pretend the function being signaled // calls targetPC at resumePC. c.set_lr(uint64(resumePC)) diff --git a/src/runtime/stack.go b/src/runtime/stack.go index 5eaceec6da..55e97e77af 100644 --- a/src/runtime/stack.go +++ b/src/runtime/stack.go @@ -579,27 +579,23 @@ var ptrnames = []string{ // | args to callee | // +------------------+ <- frame->sp // -// (arm64) +// (arm) // +------------------+ // | args from caller | // +------------------+ <- frame->argp -// | | -// +------------------+ <- frame->fp (aka caller's sp) -// | return address | +// | caller's retaddr | // +------------------+ -// | caller's FP | (frame pointer always enabled: TODO) +// | caller's FP (*) | (*) on ARM64, if framepointer_enabled && varp > sp // +------------------+ <- frame->varp // | locals | // +------------------+ // | args to callee | // +------------------+ -// | | +// | return address | // +------------------+ <- frame->sp // // varp > sp means that the function has a frame; // varp == sp means frameless function. -// -// Alignment padding, if needed, will be between "locals" and "args to callee". type adjustinfo struct { old stack @@ -713,8 +709,7 @@ func adjustframe(frame *stkframe, adjinfo *adjustinfo) { } // Adjust saved frame pointer if there is one. - if goarch.ArchFamily == goarch.AMD64 && frame.argp-frame.varp == 2*goarch.PtrSize || - goarch.ArchFamily == goarch.ARM64 && frame.argp-frame.varp == 3*goarch.PtrSize { + if (goarch.ArchFamily == goarch.AMD64 || goarch.ArchFamily == goarch.ARM64) && frame.argp-frame.varp == 2*goarch.PtrSize { if stackDebug >= 3 { print(" saved bp\n") } @@ -728,7 +723,10 @@ func adjustframe(frame *stkframe, adjinfo *adjustinfo) { throw("bad frame pointer") } } - // This is the caller's frame pointer saved in the current frame. + // On AMD64, this is the caller's frame pointer saved in the current + // frame. + // On ARM64, this is the frame pointer of the caller's caller saved + // by the caller in its frame (one word below its SP). adjustpointer(adjinfo, unsafe.Pointer(frame.varp)) } diff --git a/src/runtime/testdata/testprog/badtraceback.go b/src/runtime/testdata/testprog/badtraceback.go index 36575f765d..455118a543 100644 --- a/src/runtime/testdata/testprog/badtraceback.go +++ b/src/runtime/testdata/testprog/badtraceback.go @@ -41,11 +41,6 @@ func badLR2(arg int) { if runtime.GOARCH == "ppc64" || runtime.GOARCH == "ppc64le" { lrOff = 32 // FIXED_FRAME or sys.MinFrameSize } - if runtime.GOARCH == "arm64" { - // skip 8 bytes at bottom of parent frame, then point - // to the 8 bytes of the saved PC at the top of the frame. - lrOff = 16 - } lrPtr := (*uintptr)(unsafe.Pointer(uintptr(unsafe.Pointer(&arg)) - lrOff)) *lrPtr = 0xbad diff --git a/src/runtime/traceback.go b/src/runtime/traceback.go index 1c3e679a02..8882c306ed 100644 --- a/src/runtime/traceback.go +++ b/src/runtime/traceback.go @@ -175,11 +175,6 @@ func (u *unwinder) initAt(pc0, sp0, lr0 uintptr, gp *g, flags unwindFlags) { // Start in the caller's frame. if frame.pc == 0 { if usesLR { - // TODO: this isn't right on arm64. But also, this should - // ~never happen. Calling a nil function will panic - // when loading the PC out of the closure, not when - // branching to that PC. (Closures should always have - // valid PCs in their first word.) frame.pc = *(*uintptr)(unsafe.Pointer(frame.sp)) frame.lr = 0 } else { @@ -374,11 +369,7 @@ func (u *unwinder) resolveInternal(innermost, isSyscall bool) { var lrPtr uintptr if usesLR { if innermost && frame.sp < frame.fp || frame.lr == 0 { - if GOARCH == "arm64" { - lrPtr = frame.fp - goarch.PtrSize - } else { - lrPtr = frame.sp - } + lrPtr = frame.sp frame.lr = *(*uintptr)(unsafe.Pointer(lrPtr)) } } else { @@ -394,17 +385,24 @@ func (u *unwinder) resolveInternal(innermost, isSyscall bool) { // On x86, call instruction pushes return PC before entering new function. frame.varp -= goarch.PtrSize } - if GOARCH == "arm64" && frame.varp > frame.sp { - frame.varp -= goarch.PtrSize // LR have been saved, skip over it. - } // For architectures with frame pointers, if there's // a frame, then there's a saved frame pointer here. // // NOTE: This code is not as general as it looks. - // On x86 and arm64, the ABI is to save the frame pointer word at the + // On x86, the ABI is to save the frame pointer word at the // top of the stack frame, so we have to back down over it. - // No other architectures are framepointer-enabled at the moment. + // On arm64, the frame pointer should be at the bottom of + // the stack (with R29 (aka FP) = RSP), in which case we would + // not want to do the subtraction here. But we started out without + // any frame pointer, and when we wanted to add it, we didn't + // want to break all the assembly doing direct writes to 8(RSP) + // to set the first parameter to a called function. + // So we decided to write the FP link *below* the stack pointer + // (with R29 = RSP - 8 in Go functions). + // This is technically ABI-compatible but not standard. + // And it happens to end up mimicking the x86 layout. + // Other architectures may make different decisions. if frame.varp > frame.sp && framepointer_enabled { frame.varp -= goarch.PtrSize } @@ -564,7 +562,7 @@ func (u *unwinder) finishInternal() { gp := u.g.ptr() if u.flags&(unwindPrintErrors|unwindSilentErrors) == 0 && u.frame.sp != gp.stktopsp { print("runtime: g", gp.goid, ": frame.sp=", hex(u.frame.sp), " top=", hex(gp.stktopsp), "\n") - print("\tstack=[", hex(gp.stack.lo), "-", hex(gp.stack.hi), "]\n") + print("\tstack=[", hex(gp.stack.lo), "-", hex(gp.stack.hi), "\n") throw("traceback did not unwind completely") } } diff --git a/test/nosplit.go b/test/nosplit.go index 1f943fa18c..4b4c93b1d0 100644 --- a/test/nosplit.go +++ b/test/nosplit.go @@ -142,7 +142,7 @@ start 136 # (CallSize is 32 on ppc64, 8 on amd64 for frame pointer.) start 96 nosplit start 100 nosplit; REJECT ppc64 ppc64le -start 104 nosplit; REJECT ppc64 ppc64le +start 104 nosplit; REJECT ppc64 ppc64le arm64 start 108 nosplit; REJECT ppc64 ppc64le start 112 nosplit; REJECT ppc64 ppc64le arm64 start 116 nosplit; REJECT ppc64 ppc64le @@ -160,7 +160,7 @@ start 136 nosplit; REJECT # Because AMD64 uses frame pointer, it has 8 fewer bytes. start 96 nosplit call f; f 0 nosplit start 100 nosplit call f; f 0 nosplit; REJECT ppc64 ppc64le -start 104 nosplit call f; f 0 nosplit; REJECT ppc64 ppc64le +start 104 nosplit call f; f 0 nosplit; REJECT ppc64 ppc64le arm64 start 108 nosplit call f; f 0 nosplit; REJECT ppc64 ppc64le start 112 nosplit call f; f 0 nosplit; REJECT ppc64 ppc64le amd64 arm64 start 116 nosplit call f; f 0 nosplit; REJECT ppc64 ppc64le amd64 @@ -176,7 +176,7 @@ start 136 nosplit call f; f 0 nosplit; REJECT # Architectures differ in the same way as before. start 96 nosplit call f; f 0 call f start 100 nosplit call f; f 0 call f; REJECT ppc64 ppc64le -start 104 nosplit call f; f 0 call f; REJECT ppc64 ppc64le amd64 +start 104 nosplit call f; f 0 call f; REJECT ppc64 ppc64le amd64 arm64 start 108 nosplit call f; f 0 call f; REJECT ppc64 ppc64le amd64 start 112 nosplit call f; f 0 call f; REJECT ppc64 ppc64le amd64 arm64 start 116 nosplit call f; f 0 call f; REJECT ppc64 ppc64le amd64 @@ -189,7 +189,7 @@ start 136 nosplit call f; f 0 call f; REJECT # Indirect calls are assumed to be splitting functions. start 96 nosplit callind start 100 nosplit callind; REJECT ppc64 ppc64le -start 104 nosplit callind; REJECT ppc64 ppc64le amd64 +start 104 nosplit callind; REJECT ppc64 ppc64le amd64 arm64 start 108 nosplit callind; REJECT ppc64 ppc64le amd64 start 112 nosplit callind; REJECT ppc64 ppc64le amd64 arm64 start 116 nosplit callind; REJECT ppc64 ppc64le amd64