diff --git a/src/cmd/compile/internal/ssa/_gen/ARM64.rules b/src/cmd/compile/internal/ssa/_gen/ARM64.rules
index 3392644e7ddd713ffcf2015b078be4a40b9b2439..8b3c0e72f6e39583c66b06b38c1f7ba1f73f5414 100644
--- a/src/cmd/compile/internal/ssa/_gen/ARM64.rules
+++ b/src/cmd/compile/internal/ssa/_gen/ARM64.rules
@@ -1312,6 +1312,10 @@
 (ROR x (MOVDconst [c])) => (RORconst x [c&63])
 (RORW x (MOVDconst [c])) => (RORWconst x [c&31])
 
+(ADDSflags x (MOVDconst [c]))  => (ADDSconstflags [c] x)
+
+(ADDconst [c] y) && c < 0 => (SUBconst [-c] y)
+
 // Canonicalize the order of arguments to comparisons - helps with CSE.
 ((CMP|CMPW) x y) && canonLessThan(x,y) => (InvertFlags ((CMP|CMPW) y x))
 
diff --git a/src/cmd/compile/internal/ssa/_gen/ARM64latelower.rules b/src/cmd/compile/internal/ssa/_gen/ARM64latelower.rules
new file mode 100644
index 0000000000000000000000000000000000000000..d0c2099da9b7c3397134ae817ca3d3b4c125fa65
--- /dev/null
+++ b/src/cmd/compile/internal/ssa/_gen/ARM64latelower.rules
@@ -0,0 +1,21 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This file contains rules used by the laterLower pass.
+// These are often the exact inverse of rules in ARM64.rules.
+
+(ADDconst [c] x) && !isARM64addcon(c)  => (ADD x (MOVDconst [c]))
+(SUBconst [c] x) && !isARM64addcon(c)  => (SUB x (MOVDconst [c]))
+(ANDconst [c] x) && !isARM64bitcon(uint64(c)) => (AND x (MOVDconst [c]))
+(ORconst  [c] x) && !isARM64bitcon(uint64(c))  => (OR  x (MOVDconst [c]))
+(XORconst [c] x) && !isARM64bitcon(uint64(c))  => (XOR x (MOVDconst [c]))
+(TSTconst [c] x) && !isARM64bitcon(uint64(c))  => (TST x (MOVDconst [c]))
+(TSTWconst [c] x) && !isARM64bitcon(uint64(c)|uint64(c)<<32)  => (TSTW x (MOVDconst [int64(c)]))
+
+(CMPconst [c] x) && !isARM64addcon(c)  => (CMP x (MOVDconst [c]))
+(CMPWconst [c] x) && !isARM64addcon(int64(c))  => (CMPW x (MOVDconst [int64(c)]))
+(CMNconst [c] x) && !isARM64addcon(c)  => (CMN x (MOVDconst [c]))
+(CMNWconst [c] x) && !isARM64addcon(int64(c))  => (CMNW x (MOVDconst [int64(c)]))
+
+(ADDSconstflags [c] x) && !isARM64addcon(c)  => (ADDSflags x (MOVDconst [c]))
diff --git a/src/cmd/compile/internal/ssa/_gen/main.go b/src/cmd/compile/internal/ssa/_gen/main.go
index 2fbd94df45fa4b0b8dc321faba1cb3f156a59a8b..b3fbe96ba7fa9d699e20e64bb9f771eea898b0bb 100644
--- a/src/cmd/compile/internal/ssa/_gen/main.go
+++ b/src/cmd/compile/internal/ssa/_gen/main.go
@@ -159,6 +159,7 @@ func main() {
 		tasks = append(tasks, func() {
 			genRules(a)
 			genSplitLoadRules(a)
+			genLateLowerRules(a)
 		})
 	}
 	var wg sync.WaitGroup
diff --git a/src/cmd/compile/internal/ssa/_gen/rulegen.go b/src/cmd/compile/internal/ssa/_gen/rulegen.go
index 20b35dc448a631040909d4f3f3fa312295e88bcd..aa038255e160399a6a5e9be89aeb721dd4f8878c 100644
--- a/src/cmd/compile/internal/ssa/_gen/rulegen.go
+++ b/src/cmd/compile/internal/ssa/_gen/rulegen.go
@@ -89,6 +89,7 @@ func (r Rule) parse() (match, cond, result string) {
 
 func genRules(arch arch)          { genRulesSuffix(arch, "") }
 func genSplitLoadRules(arch arch) { genRulesSuffix(arch, "splitload") }
+func genLateLowerRules(arch arch) { genRulesSuffix(arch, "latelower") }
 
 func genRulesSuffix(arch arch, suff string) {
 	// Open input file.
diff --git a/src/cmd/compile/internal/ssa/compile.go b/src/cmd/compile/internal/ssa/compile.go
index 3be2cc7c3738e55abf82312db27f4f095f79c221..2eaef724454236e7b48ee10206d7e8c50e6d52e3 100644
--- a/src/cmd/compile/internal/ssa/compile.go
+++ b/src/cmd/compile/internal/ssa/compile.go
@@ -486,6 +486,7 @@ var passes = [...]pass{
 	{name: "insert resched checks", fn: insertLoopReschedChecks,
 		disabled: !buildcfg.Experiment.PreemptibleLoops}, // insert resched checks in loops.
 	{name: "lower", fn: lower, required: true},
+	{name: "late lower", fn: lateLower, required: true},
 	{name: "addressing modes", fn: addressingModes, required: false},
 	{name: "lowered deadcode for cse", fn: deadcode}, // deadcode immediately before CSE avoids CSE making dead values live again
 	{name: "lowered cse", fn: cse},
@@ -559,9 +560,14 @@ var passOrder = [...]constraint{
 	{"critical", "regalloc"},
 	// regalloc requires all the values in a block to be scheduled
 	{"schedule", "regalloc"},
+	// the rules in late lower run after the general rules.
+	{"lower", "late lower"},
+	// late lower may generate some values that need to be CSEed.
+	{"late lower", "lowered cse"},
 	// checkLower must run after lowering & subsequent dead code elim
 	{"lower", "checkLower"},
 	{"lowered deadcode", "checkLower"},
+	{"late lower", "checkLower"},
 	// late nilcheck needs instructions to be scheduled.
 	{"schedule", "late nilcheck"},
 	// flagalloc needs instructions to be scheduled.
diff --git a/src/cmd/compile/internal/ssa/config.go b/src/cmd/compile/internal/ssa/config.go
index d7a413268b6eb007946c8317eb0eac64305099c7..0ad2d94dce85b29abc216a63c478461e61d62e3d 100644
--- a/src/cmd/compile/internal/ssa/config.go
+++ b/src/cmd/compile/internal/ssa/config.go
@@ -22,7 +22,8 @@ type Config struct {
 	RegSize        int64  // 4 or 8; copy of cmd/internal/sys.Arch.RegSize
 	Types          Types
 	lowerBlock     blockRewriter  // lowering function
-	lowerValue     valueRewriter  // lowering function
+	lowerValue     valueRewriter  // lowering function, first round
+	lateLowerValue valueRewriter  // lowering function that needs to be run after the first round of lower function; only used on some architectures
 	splitLoad      valueRewriter  // function for splitting merged load ops; only used on some architectures
 	registers      []Register     // machine registers
 	gpRegMask      regMask        // general purpose integer register mask
@@ -222,6 +223,7 @@ func NewConfig(arch string, types Types, ctxt *obj.Link, optimize, softfloat boo
 		c.RegSize = 8
 		c.lowerBlock = rewriteBlockARM64
 		c.lowerValue = rewriteValueARM64
+		c.lateLowerValue = rewriteValueARM64latelower
 		c.registers = registersARM64[:]
 		c.gpRegMask = gpRegMaskARM64
 		c.fpRegMask = fpRegMaskARM64
diff --git a/src/cmd/compile/internal/ssa/lower.go b/src/cmd/compile/internal/ssa/lower.go
index fb4b7484136a8745ec518ee4abd7d85dd20eda3f..b850ac52e3d4075816092e0d304fe640193f7c98 100644
--- a/src/cmd/compile/internal/ssa/lower.go
+++ b/src/cmd/compile/internal/ssa/lower.go
@@ -10,6 +10,14 @@ func lower(f *Func) {
 	applyRewrite(f, f.Config.lowerBlock, f.Config.lowerValue, removeDeadValues)
 }
 
+// lateLower applies those rules that need to be run after the general lower rules.
+func lateLower(f *Func) {
+	// repeat rewrites until we find no more rewrites
+	if f.Config.lateLowerValue != nil {
+		applyRewrite(f, f.Config.lowerBlock, f.Config.lateLowerValue, removeDeadValues)
+	}
+}
+
 // checkLower checks for unlowered opcodes and fails if we find one.
 func checkLower(f *Func) {
 	// Needs to be a separate phase because it must run after both
diff --git a/src/cmd/compile/internal/ssa/rewrite.go b/src/cmd/compile/internal/ssa/rewrite.go
index c95d8734564c490c7fb392c990b8bd06cc157c67..77ff4b100e2c6470b20663115613e4626d942728 100644
--- a/src/cmd/compile/internal/ssa/rewrite.go
+++ b/src/cmd/compile/internal/ssa/rewrite.go
@@ -1994,3 +1994,52 @@ func canRotate(c *Config, bits int64) bool {
 		return false
 	}
 }
+
+// isARM64bitcon reports whether a constant can be encoded into a logical instruction.
+func isARM64bitcon(x uint64) bool {
+	if x == 1<<64-1 || x == 0 {
+		return false
+	}
+	// determine the period and sign-extend a unit to 64 bits
+	switch {
+	case x != x>>32|x<<32:
+		// period is 64
+		// nothing to do
+	case x != x>>16|x<<48:
+		// period is 32
+		x = uint64(int64(int32(x)))
+	case x != x>>8|x<<56:
+		// period is 16
+		x = uint64(int64(int16(x)))
+	case x != x>>4|x<<60:
+		// period is 8
+		x = uint64(int64(int8(x)))
+	default:
+		// period is 4 or 2, always true
+		// 0001, 0010, 0100, 1000 -- 0001 rotate
+		// 0011, 0110, 1100, 1001 -- 0011 rotate
+		// 0111, 1011, 1101, 1110 -- 0111 rotate
+		// 0101, 1010             -- 01   rotate, repeat
+		return true
+	}
+	return sequenceOfOnes(x) || sequenceOfOnes(^x)
+}
+
+// sequenceOfOnes tests whether a constant is a sequence of ones in binary, with leading and trailing zeros.
+func sequenceOfOnes(x uint64) bool {
+	y := x & -x // lowest set bit of x. x is good iff x+y is a power of 2
+	y += x
+	return (y-1)&y == 0
+}
+
+// isARM64addcon reports whether x can be encoded as the immediate value in an ADD or SUB instruction.
+func isARM64addcon(v int64) bool {
+	/* uimm12 or uimm24? */
+	if v < 0 {
+		return false
+	}
+	if (v & 0xFFF) == 0 {
+		v >>= 12
+	}
+	return v <= 0xFFF
+}
diff --git a/src/cmd/compile/internal/ssa/rewriteARM64.go b/src/cmd/compile/internal/ssa/rewriteARM64.go
index b026532df3d4d2785221772a39dc11f974eca234..f8b6252d6040d1e44708f80554f5f7d823f9ac7f 100644
--- a/src/cmd/compile/internal/ssa/rewriteARM64.go
+++ b/src/cmd/compile/internal/ssa/rewriteARM64.go
@@ -11,6 +11,8 @@ func rewriteValueARM64(v *Value) bool {
 		return rewriteValueARM64_OpARM64ADCSflags(v)
 	case OpARM64ADD:
 		return rewriteValueARM64_OpARM64ADD(v)
+	case OpARM64ADDSflags:
+		return rewriteValueARM64_OpARM64ADDSflags(v)
 	case OpARM64ADDconst:
 		return rewriteValueARM64_OpARM64ADDconst(v)
 	case OpARM64ADDshiftLL:
@@ -1360,6 +1362,27 @@ func rewriteValueARM64_OpARM64ADD(v *Value) bool {
 	}
 	return false
 }
+func rewriteValueARM64_OpARM64ADDSflags(v *Value) bool {
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	// match: (ADDSflags x (MOVDconst [c]))
+	// result: (ADDSconstflags [c] x)
+	for {
+		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+			x := v_0
+			if v_1.Op != OpARM64MOVDconst {
+				continue
+			}
+			c := auxIntToInt64(v_1.AuxInt)
+			v.reset(OpARM64ADDSconstflags)
+			v.AuxInt = int64ToAuxInt(c)
+			v.AddArg(x)
+			return true
+		}
+		break
+	}
+	return false
+}
 func rewriteValueARM64_OpARM64ADDconst(v *Value) bool {
 	v_0 := v.Args[0]
 	// match: (ADDconst [off1] (MOVDaddr [off2] {sym} ptr))
@@ -1382,6 +1405,20 @@ func rewriteValueARM64_OpARM64ADDconst(v *Value) bool {
 		v.AddArg(ptr)
 		return true
 	}
+	// match: (ADDconst [c] y)
+	// cond: c < 0
+	// result: (SUBconst [-c] y)
+	for {
+		c := auxIntToInt64(v.AuxInt)
+		y := v_0
+		if !(c < 0) {
+			break
+		}
+		v.reset(OpARM64SUBconst)
+		v.AuxInt = int64ToAuxInt(-c)
+		v.AddArg(y)
+		return true
+	}
 	// match: (ADDconst [0] x)
 	// result: x
 	for {
diff --git a/src/cmd/compile/internal/ssa/rewriteARM64latelower.go b/src/cmd/compile/internal/ssa/rewriteARM64latelower.go
new file mode 100644
index 0000000000000000000000000000000000000000..d4890fe0806bbbc1497503f2e60522034ef34f50
--- /dev/null
+++ b/src/cmd/compile/internal/ssa/rewriteARM64latelower.go
@@ -0,0 +1,289 @@
+// Code generated from gen/ARM64latelower.rules; DO NOT EDIT.
+// generated with: cd gen; go run *.go
+
+package ssa
+
+func rewriteValueARM64latelower(v *Value) bool {
+	switch v.Op {
+	case OpARM64ADDSconstflags:
+		return rewriteValueARM64latelower_OpARM64ADDSconstflags(v)
+	case OpARM64ADDconst:
+		return rewriteValueARM64latelower_OpARM64ADDconst(v)
+	case OpARM64ANDconst:
+		return rewriteValueARM64latelower_OpARM64ANDconst(v)
+	case OpARM64CMNWconst:
+		return rewriteValueARM64latelower_OpARM64CMNWconst(v)
+	case OpARM64CMNconst:
+		return rewriteValueARM64latelower_OpARM64CMNconst(v)
+	case OpARM64CMPWconst:
+		return rewriteValueARM64latelower_OpARM64CMPWconst(v)
+	case OpARM64CMPconst:
+		return rewriteValueARM64latelower_OpARM64CMPconst(v)
+	case OpARM64ORconst:
+		return rewriteValueARM64latelower_OpARM64ORconst(v)
+	case OpARM64SUBconst:
+		return rewriteValueARM64latelower_OpARM64SUBconst(v)
+	case OpARM64TSTWconst:
+		return rewriteValueARM64latelower_OpARM64TSTWconst(v)
+	case OpARM64TSTconst:
+		return rewriteValueARM64latelower_OpARM64TSTconst(v)
+	case OpARM64XORconst:
+		return rewriteValueARM64latelower_OpARM64XORconst(v)
+	}
+	return false
+}
+func rewriteValueARM64latelower_OpARM64ADDSconstflags(v *Value) bool {
+	v_0 := v.Args[0]
+	b := v.Block
+	typ := &b.Func.Config.Types
+	// match: (ADDSconstflags [c] x)
+	// cond: !isARM64addcon(c)
+	// result: (ADDSflags x (MOVDconst [c]))
+	for {
+		c := auxIntToInt64(v.AuxInt)
+		x := v_0
+		if !(!isARM64addcon(c)) {
+			break
+		}
+		v.reset(OpARM64ADDSflags)
+		v0 := b.NewValue0(v.Pos, OpARM64MOVDconst, typ.UInt64)
+		v0.AuxInt = int64ToAuxInt(c)
+		v.AddArg2(x, v0)
+		return true
+	}
+	return false
+}
+func rewriteValueARM64latelower_OpARM64ADDconst(v *Value) bool {
+	v_0 := v.Args[0]
+	b := v.Block
+	typ := &b.Func.Config.Types
+	// match: (ADDconst [c] x)
+	// cond: !isARM64addcon(c)
+	// result: (ADD x (MOVDconst [c]))
+	for {
+		c := auxIntToInt64(v.AuxInt)
+		x := v_0
+		if !(!isARM64addcon(c)) {
+			break
+		}
+		v.reset(OpARM64ADD)
+		v0 := b.NewValue0(v.Pos, OpARM64MOVDconst, typ.UInt64)
+		v0.AuxInt = int64ToAuxInt(c)
+		v.AddArg2(x, v0)
+		return true
+	}
+	return false
+}
+func rewriteValueARM64latelower_OpARM64ANDconst(v *Value) bool {
+	v_0 := v.Args[0]
+	b := v.Block
+	typ := &b.Func.Config.Types
+	// match: (ANDconst [c] x)
+	// cond: !isARM64bitcon(uint64(c))
+	// result: (AND x (MOVDconst [c]))
+	for {
+		c := auxIntToInt64(v.AuxInt)
+		x := v_0
+		if !(!isARM64bitcon(uint64(c))) {
+			break
+		}
+		v.reset(OpARM64AND)
+		v0 := b.NewValue0(v.Pos, OpARM64MOVDconst, typ.UInt64)
+		v0.AuxInt = int64ToAuxInt(c)
+		v.AddArg2(x, v0)
+		return true
+	}
+	return false
+}
+func rewriteValueARM64latelower_OpARM64CMNWconst(v *Value) bool {
+	v_0 := v.Args[0]
+	b := v.Block
+	typ := &b.Func.Config.Types
+	// match: (CMNWconst [c] x)
+	// cond: !isARM64addcon(int64(c))
+	// result: (CMNW x (MOVDconst [int64(c)]))
+	for {
+		c := auxIntToInt32(v.AuxInt)
+		x := v_0
+		if !(!isARM64addcon(int64(c))) {
+			break
+		}
+		v.reset(OpARM64CMNW)
+		v0 := b.NewValue0(v.Pos, OpARM64MOVDconst, typ.UInt64)
+		v0.AuxInt = int64ToAuxInt(int64(c))
+		v.AddArg2(x, v0)
+		return true
+	}
+	return false
+}
+func rewriteValueARM64latelower_OpARM64CMNconst(v *Value) bool {
+	v_0 := v.Args[0]
+	b := v.Block
+	typ := &b.Func.Config.Types
+	// match: (CMNconst [c] x)
+	// cond: !isARM64addcon(c)
+	// result: (CMN x (MOVDconst [c]))
+	for {
+		c := auxIntToInt64(v.AuxInt)
+		x := v_0
+		if !(!isARM64addcon(c)) {
+			break
+		}
+		v.reset(OpARM64CMN)
+		v0 := b.NewValue0(v.Pos, OpARM64MOVDconst, typ.UInt64)
+		v0.AuxInt = int64ToAuxInt(c)
+		v.AddArg2(x, v0)
+		return true
+	}
+	return false
+}
+func rewriteValueARM64latelower_OpARM64CMPWconst(v *Value) bool {
+	v_0 := v.Args[0]
+	b := v.Block
+	typ := &b.Func.Config.Types
+	// match: (CMPWconst [c] x)
+	// cond: !isARM64addcon(int64(c))
+	// result: (CMPW x (MOVDconst [int64(c)]))
+	for {
+		c := auxIntToInt32(v.AuxInt)
+		x := v_0
+		if !(!isARM64addcon(int64(c))) {
+			break
+		}
+		v.reset(OpARM64CMPW)
+		v0 := b.NewValue0(v.Pos, OpARM64MOVDconst, typ.UInt64)
+		v0.AuxInt = int64ToAuxInt(int64(c))
+		v.AddArg2(x, v0)
+		return true
+	}
+	return false
+}
+func rewriteValueARM64latelower_OpARM64CMPconst(v *Value) bool {
+	v_0 := v.Args[0]
+	b := v.Block
+	typ := &b.Func.Config.Types
+	// match: (CMPconst [c] x)
+	// cond: !isARM64addcon(c)
+	// result: (CMP x (MOVDconst [c]))
+	for {
+		c := auxIntToInt64(v.AuxInt)
+		x := v_0
+		if !(!isARM64addcon(c)) {
+			break
+		}
+		v.reset(OpARM64CMP)
+		v0 := b.NewValue0(v.Pos, OpARM64MOVDconst, typ.UInt64)
+		v0.AuxInt = int64ToAuxInt(c)
+		v.AddArg2(x, v0)
+		return true
+	}
+	return false
+}
+func rewriteValueARM64latelower_OpARM64ORconst(v *Value) bool {
+	v_0 := v.Args[0]
+	b := v.Block
+	typ := &b.Func.Config.Types
+	// match: (ORconst [c] x)
+	// cond: !isARM64bitcon(uint64(c))
+	// result: (OR x (MOVDconst [c]))
+	for {
+		c := auxIntToInt64(v.AuxInt)
+		x := v_0
+		if !(!isARM64bitcon(uint64(c))) {
+			break
+		}
+		v.reset(OpARM64OR)
+		v0 := b.NewValue0(v.Pos, OpARM64MOVDconst, typ.UInt64)
+		v0.AuxInt = int64ToAuxInt(c)
+		v.AddArg2(x, v0)
+		return true
+	}
+	return false
+}
+func rewriteValueARM64latelower_OpARM64SUBconst(v *Value) bool {
+	v_0 := v.Args[0]
+	b := v.Block
+	typ := &b.Func.Config.Types
+	// match: (SUBconst [c] x)
+	// cond: !isARM64addcon(c)
+	// result: (SUB x (MOVDconst [c]))
+	for {
+		c := auxIntToInt64(v.AuxInt)
+		x := v_0
+		if !(!isARM64addcon(c)) {
+			break
+		}
+		v.reset(OpARM64SUB)
+		v0 := b.NewValue0(v.Pos, OpARM64MOVDconst, typ.UInt64)
+		v0.AuxInt = int64ToAuxInt(c)
+		v.AddArg2(x, v0)
+		return true
+	}
+	return false
+}
+func rewriteValueARM64latelower_OpARM64TSTWconst(v *Value) bool {
+	v_0 := v.Args[0]
+	b := v.Block
+	typ := &b.Func.Config.Types
+	// match: (TSTWconst [c] x)
+	// cond: !isARM64bitcon(uint64(c)|uint64(c)<<32)
+	// result: (TSTW x (MOVDconst [int64(c)]))
+	for {
+		c := auxIntToInt32(v.AuxInt)
+		x := v_0
+		if !(!isARM64bitcon(uint64(c) | uint64(c)<<32)) {
+			break
+		}
+		v.reset(OpARM64TSTW)
+		v0 := b.NewValue0(v.Pos, OpARM64MOVDconst, typ.UInt64)
+		v0.AuxInt = int64ToAuxInt(int64(c))
+		v.AddArg2(x, v0)
+		return true
+	}
+	return false
+}
+func rewriteValueARM64latelower_OpARM64TSTconst(v *Value) bool {
+	v_0 := v.Args[0]
+	b := v.Block
+	typ := &b.Func.Config.Types
+	// match: (TSTconst [c] x)
+	// cond: !isARM64bitcon(uint64(c))
+	// result: (TST x (MOVDconst [c]))
+	for {
+		c := auxIntToInt64(v.AuxInt)
+		x := v_0
+		if !(!isARM64bitcon(uint64(c))) {
+			break
+		}
+		v.reset(OpARM64TST)
+		v0 := b.NewValue0(v.Pos, OpARM64MOVDconst, typ.UInt64)
+		v0.AuxInt = int64ToAuxInt(c)
+		v.AddArg2(x, v0)
+		return true
+	}
+	return false
+}
+func rewriteValueARM64latelower_OpARM64XORconst(v *Value) bool {
+	v_0 := v.Args[0]
+	b := v.Block
+	typ := &b.Func.Config.Types
+	// match: (XORconst [c] x)
+	// cond: !isARM64bitcon(uint64(c))
+	// result: (XOR x (MOVDconst [c]))
+	for {
+		c := auxIntToInt64(v.AuxInt)
+		x := v_0
+		if !(!isARM64bitcon(uint64(c))) {
+			break
+		}
+		v.reset(OpARM64XOR)
+		v0 := b.NewValue0(v.Pos, OpARM64MOVDconst, typ.UInt64)
+		v0.AuxInt = int64ToAuxInt(c)
+		v.AddArg2(x, v0)
+		return true
+	}
+	return false
+}
+func rewriteBlockARM64latelower(b *Block) bool {
+	return false
+}
diff --git a/src/cmd/dist/buildtool.go b/src/cmd/dist/buildtool.go
index 828e93aa4cf65f4de190561454df33e3a4cac410..eee738b43c8ecf56d89e35b5bd12af282aa2136d 100644
--- a/src/cmd/dist/buildtool.go
+++ b/src/cmd/dist/buildtool.go
@@ -266,6 +266,7 @@ func isUnneededSSARewriteFile(srcFile, goArch string) (archCaps string, unneeded
 	archCaps = fileArch
 	fileArch = strings.ToLower(fileArch)
 	fileArch = strings.TrimSuffix(fileArch, "splitload")
+	fileArch = strings.TrimSuffix(fileArch, "latelower")
 	if fileArch == goArch {
 		return "", false
 	}
diff --git a/src/cmd/internal/obj/arm64/obj7.go b/src/cmd/internal/obj/arm64/obj7.go
index 7d20beb5d61694022ff8ebb1593b3efa3fedaa82..6c2cb63e9b4467ed41eb4e1b4fdd45ed607e03d9 100644
--- a/src/cmd/internal/obj/arm64/obj7.go
+++ b/src/cmd/internal/obj/arm64/obj7.go
@@ -40,13 +40,6 @@ import (
 	"math"
 )
 
-var complements = []obj.As{
-	AADD:  ASUB,
-	AADDW: ASUBW,
-	ASUB:  AADD,
-	ASUBW: AADDW,
-}
-
 // zrReplace is the set of instructions for which $0 in the From operand
 // should be replaced with REGZERO.
 var zrReplace = map[obj.As]bool{
@@ -375,21 +368,6 @@ func progedit(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) {
 		break
 	}
 
-	// Rewrite negative immediates as positive immediates with
-	// complementary instruction.
-	switch p.As {
-	case AADD, ASUB:
-		if p.From.Type == obj.TYPE_CONST && p.From.Offset < 0 && p.From.Offset != -1<<63 {
-			p.From.Offset = -p.From.Offset
-			p.As = complements[p.As]
-		}
-	case AADDW, ASUBW:
-		if p.From.Type == obj.TYPE_CONST && p.From.Offset < 0 && int32(p.From.Offset) != -1<<31 {
-			p.From.Offset = -p.From.Offset
-			p.As = complements[p.As]
-		}
-	}
-
 	if c.ctxt.Flag_dynlink {
 		c.rewriteToUseGot(p)
 	}
diff --git a/test/codegen/arithmetic.go b/test/codegen/arithmetic.go
index 3fb9ce646b81c371968ed3062f8f1467a6ef7b17..327be24db8125f7ea9aaa98556c62e40130e10f3 100644
--- a/test/codegen/arithmetic.go
+++ b/test/codegen/arithmetic.go
@@ -319,7 +319,7 @@ func ConstMods(n1 uint, n2 int) (uint, int) {
 func Divisible(n1 uint, n2 int) (bool, bool, bool, bool) {
 	// amd64:"MOVQ\t[$]-6148914691236517205","IMULQ","ROLQ\t[$]63",-"DIVQ"
 	// 386:"IMUL3L\t[$]-1431655765","ROLL\t[$]31",-"DIVQ"
-	// arm64:"MOVD\t[$]-6148914691236517205","MUL","ROR",-"DIV"
+	// arm64:"MOVD\t[$]-6148914691236517205","MOVD\t[$]3074457345618258602","MUL","ROR",-"DIV"
 	// arm:"MUL","CMP\t[$]715827882",-".*udiv"
 	// ppc64:"MULLD","ROTL\t[$]63"
 	// ppc64le:"MULLD","ROTL\t[$]63"
@@ -335,7 +335,7 @@ func Divisible(n1 uint, n2 int) (bool, bool, bool, bool) {
 
 	// amd64:"IMULQ","ADD","ROLQ\t[$]63",-"DIVQ"
 	// 386:"IMUL3L\t[$]-1431655765","ADDL\t[$]715827882","ROLL\t[$]31",-"DIVQ"
-	// arm64:"MUL","ADD\t[$]3074457345618258602","ROR",-"DIV"
+	// arm64:"MUL","ADD\tR","ROR",-"DIV"
 	// arm:"MUL","ADD\t[$]715827882",-".*udiv"
 	// ppc64/power8:"MULLD","ADD","ROTL\t[$]63"
 	// ppc64le/power8:"MULLD","ADD","ROTL\t[$]63"
@@ -345,7 +345,7 @@ func Divisible(n1 uint, n2 int) (bool, bool, bool, bool) {
 
 	// amd64:"IMULQ","ADD",-"ROLQ",-"DIVQ"
 	// 386:"IMUL3L\t[$]678152731","ADDL\t[$]113025455",-"ROLL",-"DIVQ"
-	// arm64:"MUL","ADD\t[$]485440633518672410",-"ROR",-"DIV"
+	// arm64:"MUL","MOVD\t[$]485440633518672410","ADD",-"ROR",-"DIV"
 	// arm:"MUL","ADD\t[$]113025455",-".*udiv"
 	// ppc64/power8:"MULLD","ADD",-"ROTL"
 	// ppc64/power9:"MADDLD",-"ROTL"
diff --git a/test/codegen/bits.go b/test/codegen/bits.go
index e7826b8e658dc89d6ae97516b6f8a6339796f8b1..4f70627c258cedb31a5441d03f3eb8d5341e21f8 100644
--- a/test/codegen/bits.go
+++ b/test/codegen/bits.go
@@ -363,3 +363,14 @@ func issue48467(x, y uint64) uint64 {
 	d, borrow := bits.Sub64(x, y, 0)
 	return x - d&(-borrow)
 }
+
+func foldConst(x, y uint64) uint64 {
+	// arm64: "ADDS\t[$]7",-"MOVD\t[$]7"
+	d, b := bits.Add64(x, 7, 0)
+	return b & d
+}
+
+func foldConstOutOfRange(a uint64) uint64 {
+	// arm64: "MOVD\t[$]19088744",-"ADD\t[$]19088744"
+	return a + 0x1234568
+}
diff --git a/test/codegen/comparisons.go b/test/codegen/comparisons.go
index 4b66044804418e59561ef49c4ee1d47e962aa64e..5a54a960bc629ee32fac464af1030a989be29a66 100644
--- a/test/codegen/comparisons.go
+++ b/test/codegen/comparisons.go
@@ -19,7 +19,7 @@ import "unsafe"
 
 func CompareString1(s string) bool {
 	// amd64:`CMPW\t\(.*\), [$]`
-	// arm64:`MOVHU\t\(.*\), [R]`,`CMPW\t[$]`
+	// arm64:`MOVHU\t\(.*\), [R]`,`MOVD\t[$]`,`CMPW\tR`
 	// ppc64le:`MOVHZ\t\(.*\), [R]`,`CMPW\t.*, [$]`
 	// s390x:`MOVHBR\t\(.*\), [R]`,`CMPW\t.*, [$]`
 	return s == "xx"
@@ -327,12 +327,12 @@ func CmpToZero_ex1(a int64, e int32) int {
 		return 3
 	}
 
-	// arm64:`CMP|CMN`,-`(ADD|SUB)`,`(BMI|BPL)`
+	// arm64:`SUB`,`TBZ`
 	if a-11 >= 0 {
 		return 4
 	}
 
-	// arm64:`CMP|CMN`,-`(ADD|SUB)`,`BEQ`,`(BMI|BPL)`
+	// arm64:`SUB`,`CMP`,`BGT`
 	if a-19 > 0 {
 		return 4
 	}
@@ -355,7 +355,7 @@ func CmpToZero_ex1(a int64, e int32) int {
 		return 7
 	}
 
-	// arm64:`CMPW|CMNW`,`(BMI|BPL)`
+	// arm64:`SUB`,`TBNZ`
 	// arm:`CMP|CMN`, -`(ADD|SUB)`, `(BMI|BPL)`
 	if e-11 >= 0 {
 		return 8
diff --git a/test/codegen/switch.go b/test/codegen/switch.go
index c3c24e2e11249757c3e50d4226db2cce887fd1d9..603e0befbbdc752df4a3ba2aee2c5ab3ef8654ce 100644
--- a/test/codegen/switch.go
+++ b/test/codegen/switch.go
@@ -80,19 +80,19 @@ func mimetype(ext string) string {
 	// arm64: `MOVB\s1\(R.*\), R.*$`, `CMPW\s\$104, R.*$`, -`cmpstring`
 	switch ext {
 	// amd64: `CMPL\s\(.*\), \$1836345390$`
-	// arm64: `CMPW\s\$1836345390, R.*$`
+	// arm64: `MOVD\s\$1836345390`, `CMPW\sR.*, R.*$`
 	case ".htm":
 		return "A"
 	// amd64: `CMPL\s\(.*\), \$1953457454$`
-	// arm64: `CMPW\s\$1953457454, R.*$`
+	// arm64: `MOVD\s\$1953457454`, `CMPW\sR.*, R.*$`
 	case ".eot":
 		return "B"
 	// amd64: `CMPL\s\(.*\), \$1735815982$`
-	// arm64: `CMPW\s\$1735815982, R.*$`
+	// arm64: `MOVD\s\$1735815982`, `CMPW\sR.*, R.*$`
 	case ".svg":
 		return "C"
 	// amd64: `CMPL\s\(.*\), \$1718907950$`
-	// arm64: `CMPW\s\$1718907950, R.*$`
+	// arm64: `MOVD\s\$1718907950`, `CMPW\sR.*, R.*$`
 	case ".ttf":
 		return "D"
 	default: