From f959fb3872a32d29c93f10deae31e4e6d79f2da9 Mon Sep 17 00:00:00 2001
From: Keith Randall <khr@golang.org>
Date: Mon, 21 Nov 2022 22:22:36 -0800
Subject: [PATCH] cmd/compile: add anchored version of SP
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The SPanchored opcode is identical to SP, except that it takes a memory
argument so that it (and more importantly, anything that uses it)
must be scheduled at or after that memory argument.

This opcode ensures that a LEAQ of a variable gets scheduled after the
corresponding VARDEF for that variable.

This may lead to less CSE of LEAQ operations. The effect is very small.
The go binary is only 80 bytes bigger after this CL. Usually LEAQs get
folded into load/store operations, so the effect is only for pointerful
types, large enough to need a duffzero, and have their address passed
somewhere. Even then, usually the CSEd LEAQs will be un-CSEd because
the two uses are on different sides of a function call and the LEAQ
ends up being rematerialized at the second use anyway.

Change-Id: Ib893562cd05369b91dd563b48fb83f5250950293
Reviewed-on: https://go-review.googlesource.com/c/go/+/452916
TryBot-Result: Gopher Robot <gobot@golang.org>
Run-TryBot: Keith Randall <khr@golang.org>
Reviewed-by: Martin Möhrmann <moehrmann@google.com>
Reviewed-by: Martin Möhrmann <martin@golang.org>
Reviewed-by: Keith Randall <khr@google.com>
---
 src/cmd/compile/internal/ssa/_gen/386.rules   |  3 +-
 src/cmd/compile/internal/ssa/_gen/AMD64.rules |  3 +-
 src/cmd/compile/internal/ssa/_gen/ARM.rules   |  3 +-
 src/cmd/compile/internal/ssa/_gen/ARM64.rules |  3 +-
 .../compile/internal/ssa/_gen/LOONG64.rules   |  3 +-
 src/cmd/compile/internal/ssa/_gen/MIPS.rules  |  3 +-
 .../compile/internal/ssa/_gen/MIPS64.rules    |  3 +-
 src/cmd/compile/internal/ssa/_gen/PPC64.rules |  3 +-
 .../compile/internal/ssa/_gen/RISCV64.rules   |  3 +-
 src/cmd/compile/internal/ssa/_gen/S390X.rules |  3 +-
 src/cmd/compile/internal/ssa/_gen/Wasm.rules  |  3 +-
 .../compile/internal/ssa/_gen/genericOps.go   |  7 +++--
 src/cmd/compile/internal/ssa/lower.go         |  2 +-
 src/cmd/compile/internal/ssa/opGen.go         |  7 +++++
 src/cmd/compile/internal/ssa/rewrite.go       |  4 +--
 src/cmd/compile/internal/ssa/rewrite386.go    | 29 ++++++++++++++++++-
 src/cmd/compile/internal/ssa/rewriteAMD64.go  | 29 ++++++++++++++++++-
 src/cmd/compile/internal/ssa/rewriteARM.go    | 29 ++++++++++++++++++-
 src/cmd/compile/internal/ssa/rewriteARM64.go  | 29 ++++++++++++++++++-
 .../compile/internal/ssa/rewriteLOONG64.go    | 29 ++++++++++++++++++-
 src/cmd/compile/internal/ssa/rewriteMIPS.go   | 29 ++++++++++++++++++-
 src/cmd/compile/internal/ssa/rewriteMIPS64.go | 29 ++++++++++++++++++-
 src/cmd/compile/internal/ssa/rewritePPC64.go  | 29 ++++++++++++++++++-
 .../compile/internal/ssa/rewriteRISCV64.go    | 29 ++++++++++++++++++-
 src/cmd/compile/internal/ssa/rewriteS390X.go  | 29 ++++++++++++++++++-
 src/cmd/compile/internal/ssa/rewriteWasm.go   | 29 ++++++++++++++++++-
 src/cmd/compile/internal/ssa/schedule.go      | 29 +++++++++++++++++++
 test/codegen/arithmetic.go                    | 18 +++++++-----
 test/codegen/logic.go                         |  6 +---
 29 files changed, 384 insertions(+), 41 deletions(-)

diff --git a/src/cmd/compile/internal/ssa/_gen/386.rules b/src/cmd/compile/internal/ssa/_gen/386.rules
index 5e30ca9fd74..88074e5fd08 100644
--- a/src/cmd/compile/internal/ssa/_gen/386.rules
+++ b/src/cmd/compile/internal/ssa/_gen/386.rules
@@ -333,7 +333,8 @@
 (GetCallerPC ...) => (LoweredGetCallerPC ...)
 (GetCallerSP ...) => (LoweredGetCallerSP ...)
 (Addr {sym} base) => (LEAL {sym} base)
-(LocalAddr {sym} base _) => (LEAL {sym} base)
+(LocalAddr <t> {sym} base mem) && t.Elem().HasPointers() => (LEAL {sym} (SPanchored base mem))
+(LocalAddr <t> {sym} base _)  && !t.Elem().HasPointers() => (LEAL {sym} base)
 
 // block rewrites
 (If (SETL  cmp) yes no) => (LT  cmp yes no)
diff --git a/src/cmd/compile/internal/ssa/_gen/AMD64.rules b/src/cmd/compile/internal/ssa/_gen/AMD64.rules
index ccb52956224..c50710ec90c 100644
--- a/src/cmd/compile/internal/ssa/_gen/AMD64.rules
+++ b/src/cmd/compile/internal/ssa/_gen/AMD64.rules
@@ -480,7 +480,8 @@
 
 (HasCPUFeature {s}) => (SETNE (CMPLconst [0] (LoweredHasCPUFeature {s})))
 (Addr {sym} base) => (LEAQ {sym} base)
-(LocalAddr {sym} base _) => (LEAQ {sym} base)
+(LocalAddr <t> {sym} base mem) && t.Elem().HasPointers() => (LEAQ {sym} (SPanchored base mem))
+(LocalAddr <t> {sym} base _)  && !t.Elem().HasPointers() => (LEAQ {sym} base)
 
 (MOVBstore [off] {sym} ptr y:(SETL x) mem) && y.Uses == 1 => (SETLstore [off] {sym} ptr x mem)
 (MOVBstore [off] {sym} ptr y:(SETLE x) mem) && y.Uses == 1 => (SETLEstore [off] {sym} ptr x mem)
diff --git a/src/cmd/compile/internal/ssa/_gen/ARM.rules b/src/cmd/compile/internal/ssa/_gen/ARM.rules
index e5898b03691..d8fbf41754e 100644
--- a/src/cmd/compile/internal/ssa/_gen/ARM.rules
+++ b/src/cmd/compile/internal/ssa/_gen/ARM.rules
@@ -248,7 +248,8 @@
 (OffPtr [off] ptr) => (ADDconst [int32(off)] ptr)
 
 (Addr {sym} base) => (MOVWaddr {sym} base)
-(LocalAddr {sym} base _) => (MOVWaddr {sym} base)
+(LocalAddr <t> {sym} base mem) && t.Elem().HasPointers() => (MOVWaddr {sym} (SPanchored base mem))
+(LocalAddr <t> {sym} base _)  && !t.Elem().HasPointers() => (MOVWaddr {sym} base)
 
 // loads
 (Load <t> ptr mem) && t.IsBoolean() => (MOVBUload ptr mem)
diff --git a/src/cmd/compile/internal/ssa/_gen/ARM64.rules b/src/cmd/compile/internal/ssa/_gen/ARM64.rules
index 0c5a2e66a8c..0ae02f5de37 100644
--- a/src/cmd/compile/internal/ssa/_gen/ARM64.rules
+++ b/src/cmd/compile/internal/ssa/_gen/ARM64.rules
@@ -350,7 +350,8 @@
 (OffPtr [off] ptr) => (ADDconst [off] ptr)
 
 (Addr {sym} base) => (MOVDaddr {sym} base)
-(LocalAddr {sym} base _) => (MOVDaddr {sym} base)
+(LocalAddr <t> {sym} base mem) && t.Elem().HasPointers() => (MOVDaddr {sym} (SPanchored base mem))
+(LocalAddr <t> {sym} base _)  && !t.Elem().HasPointers() => (MOVDaddr {sym} base)
 
 // loads
 (Load <t> ptr mem) && t.IsBoolean() => (MOVBUload ptr mem)
diff --git a/src/cmd/compile/internal/ssa/_gen/LOONG64.rules b/src/cmd/compile/internal/ssa/_gen/LOONG64.rules
index 1caaf13600d..7e445e506e5 100644
--- a/src/cmd/compile/internal/ssa/_gen/LOONG64.rules
+++ b/src/cmd/compile/internal/ssa/_gen/LOONG64.rules
@@ -227,7 +227,8 @@
 (OffPtr [off] ptr) => (ADDVconst [off] ptr)
 
 (Addr {sym} base) => (MOVVaddr {sym} base)
-(LocalAddr {sym} base _) => (MOVVaddr {sym} base)
+(LocalAddr <t> {sym} base mem) && t.Elem().HasPointers() => (MOVVaddr {sym} (SPanchored base mem))
+(LocalAddr <t> {sym} base _)  && !t.Elem().HasPointers() => (MOVVaddr {sym} base)
 
 // loads
 (Load <t> ptr mem) && t.IsBoolean() => (MOVBUload ptr mem)
diff --git a/src/cmd/compile/internal/ssa/_gen/MIPS.rules b/src/cmd/compile/internal/ssa/_gen/MIPS.rules
index 6f696da3cc8..9cd5a1618e0 100644
--- a/src/cmd/compile/internal/ssa/_gen/MIPS.rules
+++ b/src/cmd/compile/internal/ssa/_gen/MIPS.rules
@@ -210,7 +210,8 @@
 (OffPtr [off] ptr) => (ADDconst [int32(off)] ptr)
 
 (Addr {sym} base) => (MOVWaddr {sym} base)
-(LocalAddr {sym} base _) => (MOVWaddr {sym} base)
+(LocalAddr <t> {sym} base mem) && t.Elem().HasPointers() => (MOVWaddr {sym} (SPanchored base mem))
+(LocalAddr <t> {sym} base _)  && !t.Elem().HasPointers() => (MOVWaddr {sym} base)
 
 // loads
 (Load <t> ptr mem) && t.IsBoolean() => (MOVBUload ptr mem)
diff --git a/src/cmd/compile/internal/ssa/_gen/MIPS64.rules b/src/cmd/compile/internal/ssa/_gen/MIPS64.rules
index a594df2b266..b0d0dd8e665 100644
--- a/src/cmd/compile/internal/ssa/_gen/MIPS64.rules
+++ b/src/cmd/compile/internal/ssa/_gen/MIPS64.rules
@@ -219,7 +219,8 @@
 (OffPtr [off] ptr) => (ADDVconst [off] ptr)
 
 (Addr {sym} base) => (MOVVaddr {sym} base)
-(LocalAddr {sym} base _) => (MOVVaddr {sym} base)
+(LocalAddr <t> {sym} base mem) && t.Elem().HasPointers() => (MOVVaddr {sym} (SPanchored base mem))
+(LocalAddr <t> {sym} base _)  && !t.Elem().HasPointers() => (MOVVaddr {sym} base)
 
 // loads
 (Load <t> ptr mem) && t.IsBoolean() => (MOVBUload ptr mem)
diff --git a/src/cmd/compile/internal/ssa/_gen/PPC64.rules b/src/cmd/compile/internal/ssa/_gen/PPC64.rules
index 5a68de0ca4d..2eda1af3bfc 100644
--- a/src/cmd/compile/internal/ssa/_gen/PPC64.rules
+++ b/src/cmd/compile/internal/ssa/_gen/PPC64.rules
@@ -233,7 +233,8 @@
 (S(RAW|RW|LW) x (MOVDconst [c])) => (S(RAW|RW|LW)const [c&31 | (c>>5&1*31)] x)
 
 (Addr {sym} base) => (MOVDaddr {sym} [0] base)
-(LocalAddr {sym} base _) => (MOVDaddr {sym} base)
+(LocalAddr <t> {sym} base mem) && t.Elem().HasPointers() => (MOVDaddr {sym} (SPanchored base mem))
+(LocalAddr <t> {sym} base _)  && !t.Elem().HasPointers() => (MOVDaddr {sym} base)
 (OffPtr [off] ptr) => (ADD (MOVDconst <typ.Int64> [off]) ptr)
 
 // TODO: optimize these cases?
diff --git a/src/cmd/compile/internal/ssa/_gen/RISCV64.rules b/src/cmd/compile/internal/ssa/_gen/RISCV64.rules
index 59f71be5baf..802b1dd1fd6 100644
--- a/src/cmd/compile/internal/ssa/_gen/RISCV64.rules
+++ b/src/cmd/compile/internal/ssa/_gen/RISCV64.rules
@@ -547,7 +547,8 @@
 (ConstBool [val]) => (MOVDconst [int64(b2i(val))])
 
 (Addr {sym} base) => (MOVaddr {sym} [0] base)
-(LocalAddr {sym} base _) => (MOVaddr {sym} base)
+(LocalAddr <t> {sym} base mem) && t.Elem().HasPointers() => (MOVaddr {sym} (SPanchored base mem))
+(LocalAddr <t> {sym} base _)  && !t.Elem().HasPointers() => (MOVaddr {sym} base)
 
 // Calls
 (StaticCall  ...) => (CALLstatic  ...)
diff --git a/src/cmd/compile/internal/ssa/_gen/S390X.rules b/src/cmd/compile/internal/ssa/_gen/S390X.rules
index e9becb2e17f..9495010a736 100644
--- a/src/cmd/compile/internal/ssa/_gen/S390X.rules
+++ b/src/cmd/compile/internal/ssa/_gen/S390X.rules
@@ -446,7 +446,8 @@
 (GetCallerSP ...) => (LoweredGetCallerSP ...)
 (GetCallerPC ...) => (LoweredGetCallerPC ...)
 (Addr {sym} base) => (MOVDaddr {sym} base)
-(LocalAddr {sym} base _) => (MOVDaddr {sym} base)
+(LocalAddr <t> {sym} base mem) && t.Elem().HasPointers() => (MOVDaddr {sym} (SPanchored base mem))
+(LocalAddr <t> {sym} base _)  && !t.Elem().HasPointers() => (MOVDaddr {sym} base)
 (ITab (Load ptr mem)) => (MOVDload ptr mem)
 
 // block rewrites
diff --git a/src/cmd/compile/internal/ssa/_gen/Wasm.rules b/src/cmd/compile/internal/ssa/_gen/Wasm.rules
index a9ed82e4566..e31808ebe17 100644
--- a/src/cmd/compile/internal/ssa/_gen/Wasm.rules
+++ b/src/cmd/compile/internal/ssa/_gen/Wasm.rules
@@ -304,7 +304,8 @@
 (GetCallerPC ...) => (LoweredGetCallerPC ...)
 (GetCallerSP ...) => (LoweredGetCallerSP ...)
 (Addr {sym} base) => (LoweredAddr {sym} [0] base)
-(LocalAddr {sym} base _) => (LoweredAddr {sym} base)
+(LocalAddr <t> {sym} base mem) && t.Elem().HasPointers() => (LoweredAddr {sym} (SPanchored base mem))
+(LocalAddr <t> {sym} base _)  && !t.Elem().HasPointers() => (LoweredAddr {sym} base)
 
 // Write barrier.
 (WB ...) => (LoweredWB ...)
diff --git a/src/cmd/compile/internal/ssa/_gen/genericOps.go b/src/cmd/compile/internal/ssa/_gen/genericOps.go
index a4c8fc9c692..40c9baf5f1e 100644
--- a/src/cmd/compile/internal/ssa/_gen/genericOps.go
+++ b/src/cmd/compile/internal/ssa/_gen/genericOps.go
@@ -349,9 +349,10 @@ var genericOps = []opData{
 	{name: "Addr", argLength: 1, aux: "Sym", symEffect: "Addr"},      // Address of a variable.  Arg0=SB.  Aux identifies the variable.
 	{name: "LocalAddr", argLength: 2, aux: "Sym", symEffect: "Addr"}, // Address of a variable.  Arg0=SP. Arg1=mem. Aux identifies the variable.
 
-	{name: "SP", zeroWidth: true},                 // stack pointer
-	{name: "SB", typ: "Uintptr", zeroWidth: true}, // static base pointer (a.k.a. globals pointer)
-	{name: "Invalid"},                             // unused value
+	{name: "SP", zeroWidth: true},                                       // stack pointer
+	{name: "SB", typ: "Uintptr", zeroWidth: true},                       // static base pointer (a.k.a. globals pointer)
+	{name: "Invalid"},                                                   // unused value
+	{name: "SPanchored", typ: "Uintptr", argLength: 2, zeroWidth: true}, // arg0 = SP, arg1 = mem. Result is identical to arg0, but cannot be scheduled before memory state arg1.
 
 	// Memory operations
 	{name: "Load", argLength: 2},                          // Load from arg0.  arg1=memory
diff --git a/src/cmd/compile/internal/ssa/lower.go b/src/cmd/compile/internal/ssa/lower.go
index 0b79d77c504..88eb6748e86 100644
--- a/src/cmd/compile/internal/ssa/lower.go
+++ b/src/cmd/compile/internal/ssa/lower.go
@@ -29,7 +29,7 @@ func checkLower(f *Func) {
 				continue // lowered
 			}
 			switch v.Op {
-			case OpSP, OpSB, OpInitMem, OpArg, OpArgIntReg, OpArgFloatReg, OpPhi, OpVarDef, OpVarLive, OpKeepAlive, OpSelect0, OpSelect1, OpSelectN, OpConvert, OpInlMark:
+			case OpSP, OpSPanchored, OpSB, OpInitMem, OpArg, OpArgIntReg, OpArgFloatReg, OpPhi, OpVarDef, OpVarLive, OpKeepAlive, OpSelect0, OpSelect1, OpSelectN, OpConvert, OpInlMark:
 				continue // ok not to lower
 			case OpMakeResult:
 				if b.Controls[0] == v {
diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go
index 9ff0ad353e0..c59cfdd9ef1 100644
--- a/src/cmd/compile/internal/ssa/opGen.go
+++ b/src/cmd/compile/internal/ssa/opGen.go
@@ -3008,6 +3008,7 @@ const (
 	OpLocalAddr
 	OpSP
 	OpSB
+	OpSPanchored
 	OpLoad
 	OpDereference
 	OpStore
@@ -38818,6 +38819,12 @@ var opcodeTable = [...]opInfo{
 		zeroWidth: true,
 		generic:   true,
 	},
+	{
+		name:      "SPanchored",
+		argLen:    2,
+		zeroWidth: true,
+		generic:   true,
+	},
 	{
 		name:    "Load",
 		argLen:  2,
diff --git a/src/cmd/compile/internal/ssa/rewrite.go b/src/cmd/compile/internal/ssa/rewrite.go
index f4ac97c5eb7..84bf2047d42 100644
--- a/src/cmd/compile/internal/ssa/rewrite.go
+++ b/src/cmd/compile/internal/ssa/rewrite.go
@@ -839,9 +839,7 @@ func isSamePtr(p1, p2 *Value) bool {
 	case OpOffPtr:
 		return p1.AuxInt == p2.AuxInt && isSamePtr(p1.Args[0], p2.Args[0])
 	case OpAddr, OpLocalAddr:
-		// OpAddr's 0th arg is either OpSP or OpSB, which means that it is uniquely identified by its Op.
-		// Checking for value equality only works after [z]cse has run.
-		return p1.Aux == p2.Aux && p1.Args[0].Op == p2.Args[0].Op
+		return p1.Aux == p2.Aux
 	case OpAddPtr:
 		return p1.Args[1] == p2.Args[1] && isSamePtr(p1.Args[0], p2.Args[0])
 	}
diff --git a/src/cmd/compile/internal/ssa/rewrite386.go b/src/cmd/compile/internal/ssa/rewrite386.go
index a7671e91906..064173a946b 100644
--- a/src/cmd/compile/internal/ssa/rewrite386.go
+++ b/src/cmd/compile/internal/ssa/rewrite386.go
@@ -9050,17 +9050,44 @@ func rewriteValue386_OpLoad(v *Value) bool {
 	return false
 }
 func rewriteValue386_OpLocalAddr(v *Value) bool {
+	v_1 := v.Args[1]
 	v_0 := v.Args[0]
-	// match: (LocalAddr {sym} base _)
+	b := v.Block
+	typ := &b.Func.Config.Types
+	// match: (LocalAddr <t> {sym} base mem)
+	// cond: t.Elem().HasPointers()
+	// result: (LEAL {sym} (SPanchored base mem))
+	for {
+		t := v.Type
+		sym := auxToSym(v.Aux)
+		base := v_0
+		mem := v_1
+		if !(t.Elem().HasPointers()) {
+			break
+		}
+		v.reset(Op386LEAL)
+		v.Aux = symToAux(sym)
+		v0 := b.NewValue0(v.Pos, OpSPanchored, typ.Uintptr)
+		v0.AddArg2(base, mem)
+		v.AddArg(v0)
+		return true
+	}
+	// match: (LocalAddr <t> {sym} base _)
+	// cond: !t.Elem().HasPointers()
 	// result: (LEAL {sym} base)
 	for {
+		t := v.Type
 		sym := auxToSym(v.Aux)
 		base := v_0
+		if !(!t.Elem().HasPointers()) {
+			break
+		}
 		v.reset(Op386LEAL)
 		v.Aux = symToAux(sym)
 		v.AddArg(base)
 		return true
 	}
+	return false
 }
 func rewriteValue386_OpLsh16x16(v *Value) bool {
 	v_1 := v.Args[1]
diff --git a/src/cmd/compile/internal/ssa/rewriteAMD64.go b/src/cmd/compile/internal/ssa/rewriteAMD64.go
index 395b2b1e6e3..fa00bd4f5fb 100644
--- a/src/cmd/compile/internal/ssa/rewriteAMD64.go
+++ b/src/cmd/compile/internal/ssa/rewriteAMD64.go
@@ -29388,17 +29388,44 @@ func rewriteValueAMD64_OpLoad(v *Value) bool {
 	return false
 }
 func rewriteValueAMD64_OpLocalAddr(v *Value) bool {
+	v_1 := v.Args[1]
 	v_0 := v.Args[0]
-	// match: (LocalAddr {sym} base _)
+	b := v.Block
+	typ := &b.Func.Config.Types
+	// match: (LocalAddr <t> {sym} base mem)
+	// cond: t.Elem().HasPointers()
+	// result: (LEAQ {sym} (SPanchored base mem))
+	for {
+		t := v.Type
+		sym := auxToSym(v.Aux)
+		base := v_0
+		mem := v_1
+		if !(t.Elem().HasPointers()) {
+			break
+		}
+		v.reset(OpAMD64LEAQ)
+		v.Aux = symToAux(sym)
+		v0 := b.NewValue0(v.Pos, OpSPanchored, typ.Uintptr)
+		v0.AddArg2(base, mem)
+		v.AddArg(v0)
+		return true
+	}
+	// match: (LocalAddr <t> {sym} base _)
+	// cond: !t.Elem().HasPointers()
 	// result: (LEAQ {sym} base)
 	for {
+		t := v.Type
 		sym := auxToSym(v.Aux)
 		base := v_0
+		if !(!t.Elem().HasPointers()) {
+			break
+		}
 		v.reset(OpAMD64LEAQ)
 		v.Aux = symToAux(sym)
 		v.AddArg(base)
 		return true
 	}
+	return false
 }
 func rewriteValueAMD64_OpLsh16x16(v *Value) bool {
 	v_1 := v.Args[1]
diff --git a/src/cmd/compile/internal/ssa/rewriteARM.go b/src/cmd/compile/internal/ssa/rewriteARM.go
index c31d89c1fd2..6ea1a7e4b26 100644
--- a/src/cmd/compile/internal/ssa/rewriteARM.go
+++ b/src/cmd/compile/internal/ssa/rewriteARM.go
@@ -14047,17 +14047,44 @@ func rewriteValueARM_OpLoad(v *Value) bool {
 	return false
 }
 func rewriteValueARM_OpLocalAddr(v *Value) bool {
+	v_1 := v.Args[1]
 	v_0 := v.Args[0]
-	// match: (LocalAddr {sym} base _)
+	b := v.Block
+	typ := &b.Func.Config.Types
+	// match: (LocalAddr <t> {sym} base mem)
+	// cond: t.Elem().HasPointers()
+	// result: (MOVWaddr {sym} (SPanchored base mem))
+	for {
+		t := v.Type
+		sym := auxToSym(v.Aux)
+		base := v_0
+		mem := v_1
+		if !(t.Elem().HasPointers()) {
+			break
+		}
+		v.reset(OpARMMOVWaddr)
+		v.Aux = symToAux(sym)
+		v0 := b.NewValue0(v.Pos, OpSPanchored, typ.Uintptr)
+		v0.AddArg2(base, mem)
+		v.AddArg(v0)
+		return true
+	}
+	// match: (LocalAddr <t> {sym} base _)
+	// cond: !t.Elem().HasPointers()
 	// result: (MOVWaddr {sym} base)
 	for {
+		t := v.Type
 		sym := auxToSym(v.Aux)
 		base := v_0
+		if !(!t.Elem().HasPointers()) {
+			break
+		}
 		v.reset(OpARMMOVWaddr)
 		v.Aux = symToAux(sym)
 		v.AddArg(base)
 		return true
 	}
+	return false
 }
 func rewriteValueARM_OpLsh16x16(v *Value) bool {
 	v_1 := v.Args[1]
diff --git a/src/cmd/compile/internal/ssa/rewriteARM64.go b/src/cmd/compile/internal/ssa/rewriteARM64.go
index 88c690be1f6..6f02b507e82 100644
--- a/src/cmd/compile/internal/ssa/rewriteARM64.go
+++ b/src/cmd/compile/internal/ssa/rewriteARM64.go
@@ -24790,17 +24790,44 @@ func rewriteValueARM64_OpLoad(v *Value) bool {
 	return false
 }
 func rewriteValueARM64_OpLocalAddr(v *Value) bool {
+	v_1 := v.Args[1]
 	v_0 := v.Args[0]
-	// match: (LocalAddr {sym} base _)
+	b := v.Block
+	typ := &b.Func.Config.Types
+	// match: (LocalAddr <t> {sym} base mem)
+	// cond: t.Elem().HasPointers()
+	// result: (MOVDaddr {sym} (SPanchored base mem))
+	for {
+		t := v.Type
+		sym := auxToSym(v.Aux)
+		base := v_0
+		mem := v_1
+		if !(t.Elem().HasPointers()) {
+			break
+		}
+		v.reset(OpARM64MOVDaddr)
+		v.Aux = symToAux(sym)
+		v0 := b.NewValue0(v.Pos, OpSPanchored, typ.Uintptr)
+		v0.AddArg2(base, mem)
+		v.AddArg(v0)
+		return true
+	}
+	// match: (LocalAddr <t> {sym} base _)
+	// cond: !t.Elem().HasPointers()
 	// result: (MOVDaddr {sym} base)
 	for {
+		t := v.Type
 		sym := auxToSym(v.Aux)
 		base := v_0
+		if !(!t.Elem().HasPointers()) {
+			break
+		}
 		v.reset(OpARM64MOVDaddr)
 		v.Aux = symToAux(sym)
 		v.AddArg(base)
 		return true
 	}
+	return false
 }
 func rewriteValueARM64_OpLsh16x16(v *Value) bool {
 	v_1 := v.Args[1]
diff --git a/src/cmd/compile/internal/ssa/rewriteLOONG64.go b/src/cmd/compile/internal/ssa/rewriteLOONG64.go
index 451b8313ebd..3c783a3037b 100644
--- a/src/cmd/compile/internal/ssa/rewriteLOONG64.go
+++ b/src/cmd/compile/internal/ssa/rewriteLOONG64.go
@@ -4628,17 +4628,44 @@ func rewriteValueLOONG64_OpLoad(v *Value) bool {
 	return false
 }
 func rewriteValueLOONG64_OpLocalAddr(v *Value) bool {
+	v_1 := v.Args[1]
 	v_0 := v.Args[0]
-	// match: (LocalAddr {sym} base _)
+	b := v.Block
+	typ := &b.Func.Config.Types
+	// match: (LocalAddr <t> {sym} base mem)
+	// cond: t.Elem().HasPointers()
+	// result: (MOVVaddr {sym} (SPanchored base mem))
+	for {
+		t := v.Type
+		sym := auxToSym(v.Aux)
+		base := v_0
+		mem := v_1
+		if !(t.Elem().HasPointers()) {
+			break
+		}
+		v.reset(OpLOONG64MOVVaddr)
+		v.Aux = symToAux(sym)
+		v0 := b.NewValue0(v.Pos, OpSPanchored, typ.Uintptr)
+		v0.AddArg2(base, mem)
+		v.AddArg(v0)
+		return true
+	}
+	// match: (LocalAddr <t> {sym} base _)
+	// cond: !t.Elem().HasPointers()
 	// result: (MOVVaddr {sym} base)
 	for {
+		t := v.Type
 		sym := auxToSym(v.Aux)
 		base := v_0
+		if !(!t.Elem().HasPointers()) {
+			break
+		}
 		v.reset(OpLOONG64MOVVaddr)
 		v.Aux = symToAux(sym)
 		v.AddArg(base)
 		return true
 	}
+	return false
 }
 func rewriteValueLOONG64_OpLsh16x16(v *Value) bool {
 	v_1 := v.Args[1]
diff --git a/src/cmd/compile/internal/ssa/rewriteMIPS.go b/src/cmd/compile/internal/ssa/rewriteMIPS.go
index f1e4970af83..a8cda7644a7 100644
--- a/src/cmd/compile/internal/ssa/rewriteMIPS.go
+++ b/src/cmd/compile/internal/ssa/rewriteMIPS.go
@@ -1653,17 +1653,44 @@ func rewriteValueMIPS_OpLoad(v *Value) bool {
 	return false
 }
 func rewriteValueMIPS_OpLocalAddr(v *Value) bool {
+	v_1 := v.Args[1]
 	v_0 := v.Args[0]
-	// match: (LocalAddr {sym} base _)
+	b := v.Block
+	typ := &b.Func.Config.Types
+	// match: (LocalAddr <t> {sym} base mem)
+	// cond: t.Elem().HasPointers()
+	// result: (MOVWaddr {sym} (SPanchored base mem))
+	for {
+		t := v.Type
+		sym := auxToSym(v.Aux)
+		base := v_0
+		mem := v_1
+		if !(t.Elem().HasPointers()) {
+			break
+		}
+		v.reset(OpMIPSMOVWaddr)
+		v.Aux = symToAux(sym)
+		v0 := b.NewValue0(v.Pos, OpSPanchored, typ.Uintptr)
+		v0.AddArg2(base, mem)
+		v.AddArg(v0)
+		return true
+	}
+	// match: (LocalAddr <t> {sym} base _)
+	// cond: !t.Elem().HasPointers()
 	// result: (MOVWaddr {sym} base)
 	for {
+		t := v.Type
 		sym := auxToSym(v.Aux)
 		base := v_0
+		if !(!t.Elem().HasPointers()) {
+			break
+		}
 		v.reset(OpMIPSMOVWaddr)
 		v.Aux = symToAux(sym)
 		v.AddArg(base)
 		return true
 	}
+	return false
 }
 func rewriteValueMIPS_OpLsh16x16(v *Value) bool {
 	v_1 := v.Args[1]
diff --git a/src/cmd/compile/internal/ssa/rewriteMIPS64.go b/src/cmd/compile/internal/ssa/rewriteMIPS64.go
index 14d39babf2a..82d52f0b3f3 100644
--- a/src/cmd/compile/internal/ssa/rewriteMIPS64.go
+++ b/src/cmd/compile/internal/ssa/rewriteMIPS64.go
@@ -1848,17 +1848,44 @@ func rewriteValueMIPS64_OpLoad(v *Value) bool {
 	return false
 }
 func rewriteValueMIPS64_OpLocalAddr(v *Value) bool {
+	v_1 := v.Args[1]
 	v_0 := v.Args[0]
-	// match: (LocalAddr {sym} base _)
+	b := v.Block
+	typ := &b.Func.Config.Types
+	// match: (LocalAddr <t> {sym} base mem)
+	// cond: t.Elem().HasPointers()
+	// result: (MOVVaddr {sym} (SPanchored base mem))
+	for {
+		t := v.Type
+		sym := auxToSym(v.Aux)
+		base := v_0
+		mem := v_1
+		if !(t.Elem().HasPointers()) {
+			break
+		}
+		v.reset(OpMIPS64MOVVaddr)
+		v.Aux = symToAux(sym)
+		v0 := b.NewValue0(v.Pos, OpSPanchored, typ.Uintptr)
+		v0.AddArg2(base, mem)
+		v.AddArg(v0)
+		return true
+	}
+	// match: (LocalAddr <t> {sym} base _)
+	// cond: !t.Elem().HasPointers()
 	// result: (MOVVaddr {sym} base)
 	for {
+		t := v.Type
 		sym := auxToSym(v.Aux)
 		base := v_0
+		if !(!t.Elem().HasPointers()) {
+			break
+		}
 		v.reset(OpMIPS64MOVVaddr)
 		v.Aux = symToAux(sym)
 		v.AddArg(base)
 		return true
 	}
+	return false
 }
 func rewriteValueMIPS64_OpLsh16x16(v *Value) bool {
 	v_1 := v.Args[1]
diff --git a/src/cmd/compile/internal/ssa/rewritePPC64.go b/src/cmd/compile/internal/ssa/rewritePPC64.go
index 192ec49d824..aee570df7a6 100644
--- a/src/cmd/compile/internal/ssa/rewritePPC64.go
+++ b/src/cmd/compile/internal/ssa/rewritePPC64.go
@@ -2352,17 +2352,44 @@ func rewriteValuePPC64_OpLoad(v *Value) bool {
 	return false
 }
 func rewriteValuePPC64_OpLocalAddr(v *Value) bool {
+	v_1 := v.Args[1]
 	v_0 := v.Args[0]
-	// match: (LocalAddr {sym} base _)
+	b := v.Block
+	typ := &b.Func.Config.Types
+	// match: (LocalAddr <t> {sym} base mem)
+	// cond: t.Elem().HasPointers()
+	// result: (MOVDaddr {sym} (SPanchored base mem))
+	for {
+		t := v.Type
+		sym := auxToSym(v.Aux)
+		base := v_0
+		mem := v_1
+		if !(t.Elem().HasPointers()) {
+			break
+		}
+		v.reset(OpPPC64MOVDaddr)
+		v.Aux = symToAux(sym)
+		v0 := b.NewValue0(v.Pos, OpSPanchored, typ.Uintptr)
+		v0.AddArg2(base, mem)
+		v.AddArg(v0)
+		return true
+	}
+	// match: (LocalAddr <t> {sym} base _)
+	// cond: !t.Elem().HasPointers()
 	// result: (MOVDaddr {sym} base)
 	for {
+		t := v.Type
 		sym := auxToSym(v.Aux)
 		base := v_0
+		if !(!t.Elem().HasPointers()) {
+			break
+		}
 		v.reset(OpPPC64MOVDaddr)
 		v.Aux = symToAux(sym)
 		v.AddArg(base)
 		return true
 	}
+	return false
 }
 func rewriteValuePPC64_OpLsh16x16(v *Value) bool {
 	v_1 := v.Args[1]
diff --git a/src/cmd/compile/internal/ssa/rewriteRISCV64.go b/src/cmd/compile/internal/ssa/rewriteRISCV64.go
index 5107b9a96fd..66a6967db4c 100644
--- a/src/cmd/compile/internal/ssa/rewriteRISCV64.go
+++ b/src/cmd/compile/internal/ssa/rewriteRISCV64.go
@@ -1607,17 +1607,44 @@ func rewriteValueRISCV64_OpLoad(v *Value) bool {
 	return false
 }
 func rewriteValueRISCV64_OpLocalAddr(v *Value) bool {
+	v_1 := v.Args[1]
 	v_0 := v.Args[0]
-	// match: (LocalAddr {sym} base _)
+	b := v.Block
+	typ := &b.Func.Config.Types
+	// match: (LocalAddr <t> {sym} base mem)
+	// cond: t.Elem().HasPointers()
+	// result: (MOVaddr {sym} (SPanchored base mem))
+	for {
+		t := v.Type
+		sym := auxToSym(v.Aux)
+		base := v_0
+		mem := v_1
+		if !(t.Elem().HasPointers()) {
+			break
+		}
+		v.reset(OpRISCV64MOVaddr)
+		v.Aux = symToAux(sym)
+		v0 := b.NewValue0(v.Pos, OpSPanchored, typ.Uintptr)
+		v0.AddArg2(base, mem)
+		v.AddArg(v0)
+		return true
+	}
+	// match: (LocalAddr <t> {sym} base _)
+	// cond: !t.Elem().HasPointers()
 	// result: (MOVaddr {sym} base)
 	for {
+		t := v.Type
 		sym := auxToSym(v.Aux)
 		base := v_0
+		if !(!t.Elem().HasPointers()) {
+			break
+		}
 		v.reset(OpRISCV64MOVaddr)
 		v.Aux = symToAux(sym)
 		v.AddArg(base)
 		return true
 	}
+	return false
 }
 func rewriteValueRISCV64_OpLsh16x16(v *Value) bool {
 	v_1 := v.Args[1]
diff --git a/src/cmd/compile/internal/ssa/rewriteS390X.go b/src/cmd/compile/internal/ssa/rewriteS390X.go
index 597941c26d4..b766156b437 100644
--- a/src/cmd/compile/internal/ssa/rewriteS390X.go
+++ b/src/cmd/compile/internal/ssa/rewriteS390X.go
@@ -2460,17 +2460,44 @@ func rewriteValueS390X_OpLoad(v *Value) bool {
 	return false
 }
 func rewriteValueS390X_OpLocalAddr(v *Value) bool {
+	v_1 := v.Args[1]
 	v_0 := v.Args[0]
-	// match: (LocalAddr {sym} base _)
+	b := v.Block
+	typ := &b.Func.Config.Types
+	// match: (LocalAddr <t> {sym} base mem)
+	// cond: t.Elem().HasPointers()
+	// result: (MOVDaddr {sym} (SPanchored base mem))
+	for {
+		t := v.Type
+		sym := auxToSym(v.Aux)
+		base := v_0
+		mem := v_1
+		if !(t.Elem().HasPointers()) {
+			break
+		}
+		v.reset(OpS390XMOVDaddr)
+		v.Aux = symToAux(sym)
+		v0 := b.NewValue0(v.Pos, OpSPanchored, typ.Uintptr)
+		v0.AddArg2(base, mem)
+		v.AddArg(v0)
+		return true
+	}
+	// match: (LocalAddr <t> {sym} base _)
+	// cond: !t.Elem().HasPointers()
 	// result: (MOVDaddr {sym} base)
 	for {
+		t := v.Type
 		sym := auxToSym(v.Aux)
 		base := v_0
+		if !(!t.Elem().HasPointers()) {
+			break
+		}
 		v.reset(OpS390XMOVDaddr)
 		v.Aux = symToAux(sym)
 		v.AddArg(base)
 		return true
 	}
+	return false
 }
 func rewriteValueS390X_OpLsh16x16(v *Value) bool {
 	v_1 := v.Args[1]
diff --git a/src/cmd/compile/internal/ssa/rewriteWasm.go b/src/cmd/compile/internal/ssa/rewriteWasm.go
index 818003c4a75..bb35d8e663c 100644
--- a/src/cmd/compile/internal/ssa/rewriteWasm.go
+++ b/src/cmd/compile/internal/ssa/rewriteWasm.go
@@ -1487,17 +1487,44 @@ func rewriteValueWasm_OpLoad(v *Value) bool {
 	return false
 }
 func rewriteValueWasm_OpLocalAddr(v *Value) bool {
+	v_1 := v.Args[1]
 	v_0 := v.Args[0]
-	// match: (LocalAddr {sym} base _)
+	b := v.Block
+	typ := &b.Func.Config.Types
+	// match: (LocalAddr <t> {sym} base mem)
+	// cond: t.Elem().HasPointers()
+	// result: (LoweredAddr {sym} (SPanchored base mem))
+	for {
+		t := v.Type
+		sym := auxToSym(v.Aux)
+		base := v_0
+		mem := v_1
+		if !(t.Elem().HasPointers()) {
+			break
+		}
+		v.reset(OpWasmLoweredAddr)
+		v.Aux = symToAux(sym)
+		v0 := b.NewValue0(v.Pos, OpSPanchored, typ.Uintptr)
+		v0.AddArg2(base, mem)
+		v.AddArg(v0)
+		return true
+	}
+	// match: (LocalAddr <t> {sym} base _)
+	// cond: !t.Elem().HasPointers()
 	// result: (LoweredAddr {sym} base)
 	for {
+		t := v.Type
 		sym := auxToSym(v.Aux)
 		base := v_0
+		if !(!t.Elem().HasPointers()) {
+			break
+		}
 		v.reset(OpWasmLoweredAddr)
 		v.Aux = symToAux(sym)
 		v.AddArg(base)
 		return true
 	}
+	return false
 }
 func rewriteValueWasm_OpLsh16x16(v *Value) bool {
 	v_1 := v.Args[1]
diff --git a/src/cmd/compile/internal/ssa/schedule.go b/src/cmd/compile/internal/ssa/schedule.go
index 4e762f7b3fd..d88c33f3046 100644
--- a/src/cmd/compile/internal/ssa/schedule.go
+++ b/src/cmd/compile/internal/ssa/schedule.go
@@ -5,6 +5,7 @@
 package ssa
 
 import (
+	"cmd/compile/internal/base"
 	"cmd/compile/internal/types"
 	"container/heap"
 	"sort"
@@ -365,6 +366,34 @@ func schedule(f *Func) {
 		}
 	}
 
+	// Remove SPanchored now that we've scheduled.
+	for _, b := range f.Blocks {
+		for _, v := range b.Values {
+			for i, a := range v.Args {
+				if a.Op == OpSPanchored {
+					v.SetArg(i, a.Args[0])
+				}
+			}
+		}
+	}
+	for _, b := range f.Blocks {
+		i := 0
+		for _, v := range b.Values {
+			if v.Op == OpSPanchored {
+				// Free this value
+				if v.Uses != 0 {
+					base.Fatalf("SPAnchored still has %d uses", v.Uses)
+				}
+				v.resetArgs()
+				f.freeValue(v)
+			} else {
+				b.Values[i] = v
+				i++
+			}
+		}
+		b.truncateValues(i)
+	}
+
 	f.scheduled = true
 }
 
diff --git a/test/codegen/arithmetic.go b/test/codegen/arithmetic.go
index 327be24db81..5139d1340a3 100644
--- a/test/codegen/arithmetic.go
+++ b/test/codegen/arithmetic.go
@@ -316,14 +316,14 @@ func ConstMods(n1 uint, n2 int) (uint, int) {
 }
 
 // Check that divisibility checks x%c==0 are converted to MULs and rotates
-func Divisible(n1 uint, n2 int) (bool, bool, bool, bool) {
+func DivisibleU(n uint) (bool, bool) {
 	// amd64:"MOVQ\t[$]-6148914691236517205","IMULQ","ROLQ\t[$]63",-"DIVQ"
 	// 386:"IMUL3L\t[$]-1431655765","ROLL\t[$]31",-"DIVQ"
 	// arm64:"MOVD\t[$]-6148914691236517205","MOVD\t[$]3074457345618258602","MUL","ROR",-"DIV"
 	// arm:"MUL","CMP\t[$]715827882",-".*udiv"
 	// ppc64:"MULLD","ROTL\t[$]63"
 	// ppc64le:"MULLD","ROTL\t[$]63"
-	evenU := n1%6 == 0
+	even := n%6 == 0
 
 	// amd64:"MOVQ\t[$]-8737931403336103397","IMULQ",-"ROLQ",-"DIVQ"
 	// 386:"IMUL3L\t[$]678152731",-"ROLL",-"DIVQ"
@@ -331,17 +331,21 @@ func Divisible(n1 uint, n2 int) (bool, bool, bool, bool) {
 	// arm:"MUL","CMP\t[$]226050910",-".*udiv"
 	// ppc64:"MULLD",-"ROTL"
 	// ppc64le:"MULLD",-"ROTL"
-	oddU := n1%19 == 0
+	odd := n%19 == 0
 
+	return even, odd
+}
+
+func Divisible(n int) (bool, bool) {
 	// amd64:"IMULQ","ADD","ROLQ\t[$]63",-"DIVQ"
 	// 386:"IMUL3L\t[$]-1431655765","ADDL\t[$]715827882","ROLL\t[$]31",-"DIVQ"
-	// arm64:"MUL","ADD\tR","ROR",-"DIV"
+	// arm64:"MOVD\t[$]-6148914691236517205","MOVD\t[$]3074457345618258602","MUL","ADD\tR","ROR",-"DIV"
 	// arm:"MUL","ADD\t[$]715827882",-".*udiv"
 	// ppc64/power8:"MULLD","ADD","ROTL\t[$]63"
 	// ppc64le/power8:"MULLD","ADD","ROTL\t[$]63"
 	// ppc64/power9:"MADDLD","ROTL\t[$]63"
 	// ppc64le/power9:"MADDLD","ROTL\t[$]63"
-	evenS := n2%6 == 0
+	even := n%6 == 0
 
 	// amd64:"IMULQ","ADD",-"ROLQ",-"DIVQ"
 	// 386:"IMUL3L\t[$]678152731","ADDL\t[$]113025455",-"ROLL",-"DIVQ"
@@ -351,9 +355,9 @@ func Divisible(n1 uint, n2 int) (bool, bool, bool, bool) {
 	// ppc64/power9:"MADDLD",-"ROTL"
 	// ppc64le/power8:"MULLD","ADD",-"ROTL"
 	// ppc64le/power9:"MADDLD",-"ROTL"
-	oddS := n2%19 == 0
+	odd := n%19 == 0
 
-	return evenU, oddU, evenS, oddS
+	return even, odd
 }
 
 // Check that fix-up code is not generated for divisions where it has been proven that
diff --git a/test/codegen/logic.go b/test/codegen/logic.go
index 50ce5f0cca8..f761e7bb9ae 100644
--- a/test/codegen/logic.go
+++ b/test/codegen/logic.go
@@ -6,16 +6,12 @@
 
 package codegen
 
-var gx, gy int
-
 // Test to make sure that (CMPQ (ANDQ x y) [0]) does not get rewritten to
 // (TESTQ x y) if the ANDQ has other uses. If that rewrite happens, then one
 // of the args of the ANDQ needs to be saved so it can be used as the arg to TESTQ.
 func andWithUse(x, y int) int {
-	// Load x,y into registers, so those MOVQ will not appear at the z := x&y line.
-	gx, gy = x, y
-	// amd64:-"MOVQ"
 	z := x & y
+	// amd64:`TESTQ\s(AX, AX|BX, BX|CX, CX|DX, DX|SI, SI|DI, DI|R8, R8|R9, R9|R10, R10|R11, R11|R12, R12|R13, R13|R15, R15)`
 	if z == 0 {
 		return 77
 	}
-- 
GitLab