diff --git a/src/cmd/asm/internal/asm/operand_test.go b/src/cmd/asm/internal/asm/operand_test.go
index eafc8a361e489905c5010d59c581310c42d2090b..a8d8f5f34f8a0e502b72026ae0a3de27a10965af 100644
--- a/src/cmd/asm/internal/asm/operand_test.go
+++ b/src/cmd/asm/internal/asm/operand_test.go
@@ -17,6 +17,7 @@ import (
 
 func setArch(goarch string) (*arch.Arch, *obj.Link) {
 	os.Setenv("GOOS", "linux") // obj can handle this OS for all architectures.
+	os.Setenv("GOARCH", goarch)
 	architecture := arch.Set(goarch)
 	if architecture == nil {
 		panic("asm: unrecognized architecture " + goarch)
diff --git a/src/cmd/compile/internal/arm64/prog.go b/src/cmd/compile/internal/arm64/prog.go
index 783a371015762274f57ba15e6fdb6122f65d903d..af43163ece66188e7bec1282ab9246e980a239f3 100644
--- a/src/cmd/compile/internal/arm64/prog.go
+++ b/src/cmd/compile/internal/arm64/prog.go
@@ -71,6 +71,7 @@ var progtable = [arm64.ALAST & obj.AMask]obj.ProgInfo{
 	arm64.ACMPW & obj.AMask:  {Flags: gc.SizeL | gc.LeftRead | gc.RegRead},
 	arm64.AADC & obj.AMask:   {Flags: gc.SizeQ | gc.LeftRead | gc.RegRead | gc.RightWrite | gc.UseCarry},
 	arm64.AROR & obj.AMask:   {Flags: gc.SizeQ | gc.LeftRead | gc.RegRead | gc.RightWrite},
+	arm64.ARORW & obj.AMask:  {Flags: gc.SizeL | gc.LeftRead | gc.RegRead | gc.RightWrite},
 	arm64.AADDS & obj.AMask:  {Flags: gc.SizeQ | gc.LeftRead | gc.RegRead | gc.RightWrite | gc.SetCarry},
 	arm64.ACSET & obj.AMask:  {Flags: gc.SizeQ | gc.RightWrite},
 	arm64.ACSEL & obj.AMask:  {Flags: gc.SizeQ | gc.RegRead | gc.RightWrite},
diff --git a/src/cmd/compile/internal/arm64/ssa.go b/src/cmd/compile/internal/arm64/ssa.go
index 24281300aef0cf0fac2ea68d0d3c588243884f86..1f96909716b75054972b301197ab0b47c0baf1d2 100644
--- a/src/cmd/compile/internal/arm64/ssa.go
+++ b/src/cmd/compile/internal/arm64/ssa.go
@@ -148,6 +148,24 @@ func storeByType(t ssa.Type) obj.As {
 	panic("bad store type")
 }
 
+// makeshift encodes a register shifted by a constant, used as an Offset in Prog
+func makeshift(reg int16, typ int64, s int64) int64 {
+	return int64(reg&31)<<16 | typ | (s&63)<<10
+}
+
+// genshift generates a Prog for r = r0 op (r1 shifted by s)
+func genshift(as obj.As, r0, r1, r int16, typ int64, s int64) *obj.Prog {
+	p := gc.Prog(as)
+	p.From.Type = obj.TYPE_SHIFT
+	p.From.Offset = makeshift(r1, typ, s)
+	p.Reg = r0
+	if r != 0 {
+		p.To.Type = obj.TYPE_REG
+		p.To.Reg = r
+	}
+	return p
+}
+
 func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
 	s.SetLineno(v.Line)
 	switch v.Op {
@@ -284,6 +302,27 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
 		p.Reg = gc.SSARegNum(v.Args[0])
 		p.To.Type = obj.TYPE_REG
 		p.To.Reg = gc.SSARegNum(v)
+	case ssa.OpARM64ADDshiftLL,
+		ssa.OpARM64SUBshiftLL,
+		ssa.OpARM64ANDshiftLL,
+		ssa.OpARM64ORshiftLL,
+		ssa.OpARM64XORshiftLL,
+		ssa.OpARM64BICshiftLL:
+		genshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), gc.SSARegNum(v), arm64.SHIFT_LL, v.AuxInt)
+	case ssa.OpARM64ADDshiftRL,
+		ssa.OpARM64SUBshiftRL,
+		ssa.OpARM64ANDshiftRL,
+		ssa.OpARM64ORshiftRL,
+		ssa.OpARM64XORshiftRL,
+		ssa.OpARM64BICshiftRL:
+		genshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), gc.SSARegNum(v), arm64.SHIFT_LR, v.AuxInt)
+	case ssa.OpARM64ADDshiftRA,
+		ssa.OpARM64SUBshiftRA,
+		ssa.OpARM64ANDshiftRA,
+		ssa.OpARM64ORshiftRA,
+		ssa.OpARM64XORshiftRA,
+		ssa.OpARM64BICshiftRA:
+		genshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), gc.SSARegNum(v), arm64.SHIFT_AR, v.AuxInt)
 	case ssa.OpARM64MOVDconst:
 		p := gc.Prog(v.Op.Asm())
 		p.From.Type = obj.TYPE_CONST
@@ -315,6 +354,12 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
 		p.From.Type = obj.TYPE_CONST
 		p.From.Offset = v.AuxInt
 		p.Reg = gc.SSARegNum(v.Args[0])
+	case ssa.OpARM64CMPshiftLL:
+		genshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), 0, arm64.SHIFT_LL, v.AuxInt)
+	case ssa.OpARM64CMPshiftRL:
+		genshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), 0, arm64.SHIFT_LR, v.AuxInt)
+	case ssa.OpARM64CMPshiftRA:
+		genshift(v.Op.Asm(), gc.SSARegNum(v.Args[0]), gc.SSARegNum(v.Args[1]), 0, arm64.SHIFT_AR, v.AuxInt)
 	case ssa.OpARM64MOVDaddr:
 		p := gc.Prog(arm64.AMOVD)
 		p.From.Type = obj.TYPE_ADDR
@@ -372,6 +417,16 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
 		p.To.Type = obj.TYPE_MEM
 		p.To.Reg = gc.SSARegNum(v.Args[0])
 		gc.AddAux(&p.To, v)
+	case ssa.OpARM64MOVBstorezero,
+		ssa.OpARM64MOVHstorezero,
+		ssa.OpARM64MOVWstorezero,
+		ssa.OpARM64MOVDstorezero:
+		p := gc.Prog(v.Op.Asm())
+		p.From.Type = obj.TYPE_REG
+		p.From.Reg = arm64.REGZERO
+		p.To.Type = obj.TYPE_MEM
+		p.To.Reg = gc.SSARegNum(v.Args[0])
+		gc.AddAux(&p.To, v)
 	case ssa.OpARM64MOVBreg,
 		ssa.OpARM64MOVBUreg,
 		ssa.OpARM64MOVHreg,
@@ -433,12 +488,17 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
 		p.From.Reg = gc.SSARegNum(v.Args[0])
 		p.To.Type = obj.TYPE_REG
 		p.To.Reg = gc.SSARegNum(v)
-	case ssa.OpARM64CSELULT:
+	case ssa.OpARM64CSELULT,
+		ssa.OpARM64CSELULT0:
+		r1 := int16(arm64.REGZERO)
+		if v.Op == ssa.OpARM64CSELULT {
+			r1 = gc.SSARegNum(v.Args[1])
+		}
 		p := gc.Prog(v.Op.Asm())
 		p.From.Type = obj.TYPE_REG // assembler encodes conditional bits in Reg
 		p.From.Reg = arm64.COND_LO
 		p.Reg = gc.SSARegNum(v.Args[0])
-		p.From3 = &obj.Addr{Type: obj.TYPE_REG, Reg: gc.SSARegNum(v.Args[1])}
+		p.From3 = &obj.Addr{Type: obj.TYPE_REG, Reg: r1}
 		p.To.Type = obj.TYPE_REG
 		p.To.Reg = gc.SSARegNum(v)
 	case ssa.OpARM64DUFFZERO:
diff --git a/src/cmd/compile/internal/gc/ssa.go b/src/cmd/compile/internal/gc/ssa.go
index 77c20d474f3965f3f0c24ad9aec4c48e3da2cc67..2e7d45d54382e7aaf8fc8f8a21b66c44aeb8d4da 100644
--- a/src/cmd/compile/internal/gc/ssa.go
+++ b/src/cmd/compile/internal/gc/ssa.go
@@ -40,7 +40,7 @@ func shouldssa(fn *Node) bool {
 		if os.Getenv("SSATEST") == "" {
 			return false
 		}
-	case "amd64", "amd64p32", "arm", "386":
+	case "amd64", "amd64p32", "arm", "386", "arm64":
 		// Generally available.
 	}
 	if !ssaEnabled {
diff --git a/src/cmd/compile/internal/gc/walk.go b/src/cmd/compile/internal/gc/walk.go
index c6aeddb172a6e794a4a04df43e775d62b1ed8aa6..1e7d80d3e931c46a6f9efb185610ee58443ab3ff 100644
--- a/src/cmd/compile/internal/gc/walk.go
+++ b/src/cmd/compile/internal/gc/walk.go
@@ -3336,6 +3336,7 @@ func samecheap(a *Node, b *Node) bool {
 // The result of walkrotate MUST be assigned back to n, e.g.
 // 	n.Left = walkrotate(n.Left)
 func walkrotate(n *Node) *Node {
+	//TODO: enable LROT on ARM64 once the old backend is gone
 	if Thearch.LinkArch.InFamily(sys.MIPS64, sys.ARM64, sys.PPC64) {
 		return n
 	}
@@ -3529,16 +3530,6 @@ func walkdiv(n *Node, init *Nodes) *Node {
 			goto ret
 		}
 
-		// TODO(zhongwei) Test shows that TUINT8, TINT8, TUINT16 and TINT16's "quick division" method
-		// on current arm64 backend is slower than hardware div instruction on ARM64 due to unnecessary
-		// data movement between registers. It could be enabled when generated code is good enough.
-		if Thearch.LinkArch.Family == sys.ARM64 {
-			switch Simtype[nl.Type.Etype] {
-			case TUINT8, TINT8, TUINT16, TINT16:
-				return n
-			}
-		}
-
 		switch Simtype[nl.Type.Etype] {
 		default:
 			return n
diff --git a/src/cmd/compile/internal/ssa/gen/ARM64.rules b/src/cmd/compile/internal/ssa/gen/ARM64.rules
index 715bddee388f6ccb8f71a852d3dd35934127eac0..bc215c56b4f98c89e0d3466af1a72927da5c665c 100644
--- a/src/cmd/compile/internal/ssa/gen/ARM64.rules
+++ b/src/cmd/compile/internal/ssa/gen/ARM64.rules
@@ -523,6 +523,10 @@
 (MOVDstore [off1] {sym} (ADDconst [off2] ptr) val mem) -> (MOVDstore [off1+off2] {sym} ptr val mem)
 (FMOVSstore [off1] {sym} (ADDconst [off2] ptr) val mem) -> (FMOVSstore [off1+off2] {sym} ptr val mem)
 (FMOVDstore [off1] {sym} (ADDconst [off2] ptr) val mem) -> (FMOVDstore [off1+off2] {sym} ptr val mem)
+(MOVBstorezero [off1] {sym} (ADDconst [off2] ptr) mem) -> (MOVBstorezero [off1+off2] {sym} ptr mem)
+(MOVHstorezero [off1] {sym} (ADDconst [off2] ptr) mem) -> (MOVHstorezero [off1+off2] {sym} ptr mem)
+(MOVWstorezero [off1] {sym} (ADDconst [off2] ptr) mem) -> (MOVWstorezero [off1+off2] {sym} ptr mem)
+(MOVDstorezero [off1] {sym} (ADDconst [off2] ptr) mem) -> (MOVDstorezero [off1+off2] {sym} ptr mem)
 
 (MOVBload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) ->
 	(MOVBload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
@@ -555,6 +559,20 @@
 	(FMOVSstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
 (FMOVDstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem) && canMergeSym(sym1,sym2) ->
 	(FMOVDstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
+(MOVBstorezero [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) ->
+	(MOVBstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
+(MOVHstorezero [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) ->
+	(MOVHstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
+(MOVWstorezero [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) ->
+	(MOVWstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
+(MOVDstorezero [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) ->
+	(MOVDstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
+
+// store zero
+(MOVBstore [off] {sym} ptr (MOVDconst [0]) mem) -> (MOVBstorezero [off] {sym} ptr mem)
+(MOVHstore [off] {sym} ptr (MOVDconst [0]) mem) -> (MOVHstorezero [off] {sym} ptr mem)
+(MOVWstore [off] {sym} ptr (MOVDconst [0]) mem) -> (MOVWstorezero [off] {sym} ptr mem)
+(MOVDstore [off] {sym} ptr (MOVDconst [0]) mem) -> (MOVDstorezero [off] {sym} ptr mem)
 
 // replace load from same location as preceding store with copy
 (MOVBload [off] {sym} ptr (MOVBstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> x
@@ -567,6 +585,14 @@
 (FMOVSload [off] {sym} ptr (FMOVSstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> x
 (FMOVDload [off] {sym} ptr (FMOVDstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> x
 
+(MOVBload [off] {sym} ptr (MOVBstorezero [off2] {sym2} ptr2 _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVDconst [0])
+(MOVBUload [off] {sym} ptr (MOVBstorezero [off2] {sym2} ptr2 _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVDconst [0])
+(MOVHload [off] {sym} ptr (MOVHstorezero [off2] {sym2} ptr2 _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVDconst [0])
+(MOVHUload [off] {sym} ptr (MOVHstorezero [off2] {sym2} ptr2 _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVDconst [0])
+(MOVWload [off] {sym} ptr (MOVWstorezero [off2] {sym2} ptr2 _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVDconst [0])
+(MOVWUload [off] {sym} ptr (MOVWstorezero [off2] {sym2} ptr2 _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVDconst [0])
+(MOVDload [off] {sym} ptr (MOVDstorezero [off2] {sym2} ptr2 _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVDconst [0])
+
 // don't extend after proper load
 (MOVBreg x:(MOVBload _ _)) -> (MOVDreg x)
 (MOVBUreg x:(MOVBUload _ _)) -> (MOVDreg x)
@@ -645,21 +671,46 @@
 (MUL _ (MOVDconst [0])) -> (MOVDconst [0])
 (MUL x (MOVDconst [1])) -> x
 (MUL x (MOVDconst [c])) && isPowerOfTwo(c) -> (SLLconst [log2(c)] x)
+(MUL x (MOVDconst [c])) && isPowerOfTwo(c-1) && c >= 3 -> (ADDshiftLL x x [log2(c-1)])
+(MUL x (MOVDconst [c])) && isPowerOfTwo(c+1) && c >= 7 -> (ADDshiftLL (NEG <x.Type> x) x [log2(c+1)])
+(MUL x (MOVDconst [c])) && c%3 == 0 && isPowerOfTwo(c/3) -> (SLLconst [log2(c/3)] (ADDshiftLL <x.Type> x x [1]))
+(MUL x (MOVDconst [c])) && c%5 == 0 && isPowerOfTwo(c/5) -> (SLLconst [log2(c/5)] (ADDshiftLL <x.Type> x x [2]))
+(MUL x (MOVDconst [c])) && c%7 == 0 && isPowerOfTwo(c/7) -> (SLLconst [log2(c/7)] (ADDshiftLL <x.Type> (NEG <x.Type> x) x [3]))
+(MUL x (MOVDconst [c])) && c%9 == 0 && isPowerOfTwo(c/9) -> (SLLconst [log2(c/9)] (ADDshiftLL <x.Type> x x [3]))
 
 (MUL (MOVDconst [-1]) x) -> (NEG x)
 (MUL (MOVDconst [0]) _) -> (MOVDconst [0])
 (MUL (MOVDconst [1]) x) -> x
 (MUL (MOVDconst [c]) x) && isPowerOfTwo(c) -> (SLLconst [log2(c)] x)
+(MUL (MOVDconst [c]) x) && isPowerOfTwo(c) -> (SLLconst [log2(c)] x)
+(MUL (MOVDconst [c]) x) && isPowerOfTwo(c-1) && c >= 3 -> (ADDshiftLL x x [log2(c-1)])
+(MUL (MOVDconst [c]) x) && isPowerOfTwo(c+1) && c >= 7 -> (ADDshiftLL (NEG <x.Type> x) x [log2(c+1)])
+(MUL (MOVDconst [c]) x) && c%3 == 0 && isPowerOfTwo(c/3) -> (SLLconst [log2(c/3)] (ADDshiftLL <x.Type> x x [1]))
+(MUL (MOVDconst [c]) x) && c%5 == 0 && isPowerOfTwo(c/5) -> (SLLconst [log2(c/5)] (ADDshiftLL <x.Type> x x [2]))
+(MUL (MOVDconst [c]) x) && c%7 == 0 && isPowerOfTwo(c/7) -> (SLLconst [log2(c/7)] (ADDshiftLL <x.Type> (NEG <x.Type> x) x [3]))
+(MUL (MOVDconst [c]) x) && c%9 == 0 && isPowerOfTwo(c/9) -> (SLLconst [log2(c/9)] (ADDshiftLL <x.Type> x x [3]))
 
 (MULW x (MOVDconst [c])) && int32(c)==-1 -> (NEG x)
 (MULW _ (MOVDconst [c])) && int32(c)==0 -> (MOVDconst [0])
 (MULW x (MOVDconst [c])) && int32(c)==1 -> x
 (MULW x (MOVDconst [c])) && isPowerOfTwo(c) -> (SLLconst [log2(c)] x)
+(MULW x (MOVDconst [c])) && isPowerOfTwo(c-1) && int32(c) >= 3 -> (ADDshiftLL x x [log2(c-1)])
+(MULW x (MOVDconst [c])) && isPowerOfTwo(c+1) && int32(c) >= 7 -> (ADDshiftLL (NEG <x.Type> x) x [log2(c+1)])
+(MULW x (MOVDconst [c])) && c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c) -> (SLLconst [log2(c/3)] (ADDshiftLL <x.Type> x x [1]))
+(MULW x (MOVDconst [c])) && c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c) -> (SLLconst [log2(c/5)] (ADDshiftLL <x.Type> x x [2]))
+(MULW x (MOVDconst [c])) && c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c) -> (SLLconst [log2(c/7)] (ADDshiftLL <x.Type> (NEG <x.Type> x) x [3]))
+(MULW x (MOVDconst [c])) && c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c) -> (SLLconst [log2(c/9)] (ADDshiftLL <x.Type> x x [3]))
 
 (MULW (MOVDconst [c]) x) && int32(c)==-1 -> (NEG x)
 (MULW (MOVDconst [c]) _) && int32(c)==0 -> (MOVDconst [0])
 (MULW (MOVDconst [c]) x) && int32(c)==1 -> x
 (MULW (MOVDconst [c]) x) && isPowerOfTwo(c) -> (SLLconst [log2(c)] x)
+(MULW (MOVDconst [c]) x) && isPowerOfTwo(c-1) && int32(c) >= 3 -> (ADDshiftLL x x [log2(c-1)])
+(MULW (MOVDconst [c]) x) && isPowerOfTwo(c+1) && int32(c) >= 7 -> (ADDshiftLL (NEG <x.Type> x) x [log2(c+1)])
+(MULW (MOVDconst [c]) x) && c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c) -> (SLLconst [log2(c/3)] (ADDshiftLL <x.Type> x x [1]))
+(MULW (MOVDconst [c]) x) && c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c) -> (SLLconst [log2(c/5)] (ADDshiftLL <x.Type> x x [2]))
+(MULW (MOVDconst [c]) x) && c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c) -> (SLLconst [log2(c/7)] (ADDshiftLL <x.Type> (NEG <x.Type> x) x [3]))
+(MULW (MOVDconst [c]) x) && c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c) -> (SLLconst [log2(c/9)] (ADDshiftLL <x.Type> x x [3]))
 
 // div by constant
 (UDIV x (MOVDconst [1])) -> x
@@ -680,6 +731,7 @@
 (XOR x x) -> (MOVDconst [0])
 (BIC x x) -> (MOVDconst [0])
 (AND x (MVN y)) -> (BIC x y)
+(CSELULT x (MOVDconst [0]) flag) -> (CSELULT0 x flag)
 
 // remove redundant *const ops
 (ADDconst [0]  x) -> x
@@ -903,3 +955,103 @@
 (CSELULT _ y (FlagLT_UGT)) -> y
 (CSELULT x _ (FlagGT_ULT)) -> x
 (CSELULT _ y (FlagGT_UGT)) -> y
+(CSELULT0 _ (FlagEQ)) -> (MOVDconst [0])
+(CSELULT0 x (FlagLT_ULT)) -> x
+(CSELULT0 _ (FlagLT_UGT)) -> (MOVDconst [0])
+(CSELULT0 x (FlagGT_ULT)) -> x
+(CSELULT0 _ (FlagGT_UGT)) -> (MOVDconst [0])
+
+// absorb shifts into ops
+(ADD x (SLLconst [c] y)) -> (ADDshiftLL x y [c])
+(ADD (SLLconst [c] y) x) -> (ADDshiftLL x y [c])
+(ADD x (SRLconst [c] y)) -> (ADDshiftRL x y [c])
+(ADD (SRLconst [c] y) x) -> (ADDshiftRL x y [c])
+(ADD x (SRAconst [c] y)) -> (ADDshiftRA x y [c])
+(ADD (SRAconst [c] y) x) -> (ADDshiftRA x y [c])
+(SUB x (SLLconst [c] y)) -> (SUBshiftLL x y [c])
+(SUB x (SRLconst [c] y)) -> (SUBshiftRL x y [c])
+(SUB x (SRAconst [c] y)) -> (SUBshiftRA x y [c])
+(AND x (SLLconst [c] y)) -> (ANDshiftLL x y [c])
+(AND (SLLconst [c] y) x) -> (ANDshiftLL x y [c])
+(AND x (SRLconst [c] y)) -> (ANDshiftRL x y [c])
+(AND (SRLconst [c] y) x) -> (ANDshiftRL x y [c])
+(AND x (SRAconst [c] y)) -> (ANDshiftRA x y [c])
+(AND (SRAconst [c] y) x) -> (ANDshiftRA x y [c])
+(OR  x (SLLconst [c] y)) -> (ORshiftLL  x y [c])
+(OR  (SLLconst [c] y) x) -> (ORshiftLL  x y [c])
+(OR  x (SRLconst [c] y)) -> (ORshiftRL  x y [c])
+(OR  (SRLconst [c] y) x) -> (ORshiftRL  x y [c])
+(OR  x (SRAconst [c] y)) -> (ORshiftRA  x y [c])
+(OR  (SRAconst [c] y) x) -> (ORshiftRA  x y [c])
+(XOR x (SLLconst [c] y)) -> (XORshiftLL x y [c])
+(XOR (SLLconst [c] y) x) -> (XORshiftLL x y [c])
+(XOR x (SRLconst [c] y)) -> (XORshiftRL x y [c])
+(XOR (SRLconst [c] y) x) -> (XORshiftRL x y [c])
+(XOR x (SRAconst [c] y)) -> (XORshiftRA x y [c])
+(XOR (SRAconst [c] y) x) -> (XORshiftRA x y [c])
+(BIC x (SLLconst [c] y)) -> (BICshiftLL x y [c])
+(BIC x (SRLconst [c] y)) -> (BICshiftRL x y [c])
+(BIC x (SRAconst [c] y)) -> (BICshiftRA x y [c])
+(CMP x (SLLconst [c] y)) -> (CMPshiftLL x y [c])
+(CMP (SLLconst [c] y) x) -> (InvertFlags (CMPshiftLL x y [c]))
+(CMP x (SRLconst [c] y)) -> (CMPshiftRL x y [c])
+(CMP (SRLconst [c] y) x) -> (InvertFlags (CMPshiftRL x y [c]))
+(CMP x (SRAconst [c] y)) -> (CMPshiftRA x y [c])
+(CMP (SRAconst [c] y) x) -> (InvertFlags (CMPshiftRA x y [c]))
+
+// prefer *const ops to *shift ops
+(ADDshiftLL (MOVDconst [c]) x [d]) -> (ADDconst [c] (SLLconst <x.Type> x [d]))
+(ADDshiftRL (MOVDconst [c]) x [d]) -> (ADDconst [c] (SRLconst <x.Type> x [d]))
+(ADDshiftRA (MOVDconst [c]) x [d]) -> (ADDconst [c] (SRAconst <x.Type> x [d]))
+(ANDshiftLL (MOVDconst [c]) x [d]) -> (ANDconst [c] (SLLconst <x.Type> x [d]))
+(ANDshiftRL (MOVDconst [c]) x [d]) -> (ANDconst [c] (SRLconst <x.Type> x [d]))
+(ANDshiftRA (MOVDconst [c]) x [d]) -> (ANDconst [c] (SRAconst <x.Type> x [d]))
+(ORshiftLL  (MOVDconst [c]) x [d]) -> (ORconst  [c] (SLLconst <x.Type> x [d]))
+(ORshiftRL  (MOVDconst [c]) x [d]) -> (ORconst  [c] (SRLconst <x.Type> x [d]))
+(ORshiftRA  (MOVDconst [c]) x [d]) -> (ORconst  [c] (SRAconst <x.Type> x [d]))
+(XORshiftLL (MOVDconst [c]) x [d]) -> (XORconst [c] (SLLconst <x.Type> x [d]))
+(XORshiftRL (MOVDconst [c]) x [d]) -> (XORconst [c] (SRLconst <x.Type> x [d]))
+(XORshiftRA (MOVDconst [c]) x [d]) -> (XORconst [c] (SRAconst <x.Type> x [d]))
+(CMPshiftLL (MOVDconst [c]) x [d]) -> (InvertFlags (CMPconst [c] (SLLconst <x.Type> x [d])))
+(CMPshiftRL (MOVDconst [c]) x [d]) -> (InvertFlags (CMPconst [c] (SRLconst <x.Type> x [d])))
+(CMPshiftRA (MOVDconst [c]) x [d]) -> (InvertFlags (CMPconst [c] (SRAconst <x.Type> x [d])))
+
+// constant folding in *shift ops
+(ADDshiftLL x (MOVDconst [c]) [d]) -> (ADDconst x [int64(uint64(c)<<uint64(d))])
+(ADDshiftRL x (MOVDconst [c]) [d]) -> (ADDconst x [int64(uint64(c)>>uint64(d))])
+(ADDshiftRA x (MOVDconst [c]) [d]) -> (ADDconst x [int64(int64(c)>>uint64(d))])
+(SUBshiftLL x (MOVDconst [c]) [d]) -> (SUBconst x [int64(uint64(c)<<uint64(d))])
+(SUBshiftRL x (MOVDconst [c]) [d]) -> (SUBconst x [int64(uint64(c)>>uint64(d))])
+(SUBshiftRA x (MOVDconst [c]) [d]) -> (SUBconst x [int64(int64(c)>>uint64(d))])
+(ANDshiftLL x (MOVDconst [c]) [d]) -> (ANDconst x [int64(uint64(c)<<uint64(d))])
+(ANDshiftRL x (MOVDconst [c]) [d]) -> (ANDconst x [int64(uint64(c)>>uint64(d))])
+(ANDshiftRA x (MOVDconst [c]) [d]) -> (ANDconst x [int64(int64(c)>>uint64(d))])
+(ORshiftLL  x (MOVDconst [c]) [d]) -> (ORconst  x [int64(uint64(c)<<uint64(d))])
+(ORshiftRL  x (MOVDconst [c]) [d]) -> (ORconst  x [int64(uint64(c)>>uint64(d))])
+(ORshiftRA  x (MOVDconst [c]) [d]) -> (ORconst  x [int64(int64(c)>>uint64(d))])
+(XORshiftLL x (MOVDconst [c]) [d]) -> (XORconst x [int64(uint64(c)<<uint64(d))])
+(XORshiftRL x (MOVDconst [c]) [d]) -> (XORconst x [int64(uint64(c)>>uint64(d))])
+(XORshiftRA x (MOVDconst [c]) [d]) -> (XORconst x [int64(int64(c)>>uint64(d))])
+(BICshiftLL x (MOVDconst [c]) [d]) -> (BICconst x [int64(uint64(c)<<uint64(d))])
+(BICshiftRL x (MOVDconst [c]) [d]) -> (BICconst x [int64(uint64(c)>>uint64(d))])
+(BICshiftRA x (MOVDconst [c]) [d]) -> (BICconst x [int64(int64(c)>>uint64(d))])
+(CMPshiftLL x (MOVDconst [c]) [d]) -> (CMPconst x [int64(uint64(c)<<uint64(d))])
+(CMPshiftRL x (MOVDconst [c]) [d]) -> (CMPconst x [int64(uint64(c)>>uint64(d))])
+(CMPshiftRA x (MOVDconst [c]) [d]) -> (CMPconst x [int64(int64(c)>>uint64(d))])
+
+// simplification with *shift ops
+(SUBshiftLL x (SLLconst x [c]) [d]) && c==d -> (MOVDconst [0])
+(SUBshiftRL x (SRLconst x [c]) [d]) && c==d -> (MOVDconst [0])
+(SUBshiftRA x (SRAconst x [c]) [d]) && c==d -> (MOVDconst [0])
+(ANDshiftLL x y:(SLLconst x [c]) [d]) && c==d -> y
+(ANDshiftRL x y:(SRLconst x [c]) [d]) && c==d -> y
+(ANDshiftRA x y:(SRAconst x [c]) [d]) && c==d -> y
+(ORshiftLL  x y:(SLLconst x [c]) [d]) && c==d -> y
+(ORshiftRL  x y:(SRLconst x [c]) [d]) && c==d -> y
+(ORshiftRA  x y:(SRAconst x [c]) [d]) && c==d -> y
+(XORshiftLL x (SLLconst x [c]) [d]) && c==d -> (MOVDconst [0])
+(XORshiftRL x (SRLconst x [c]) [d]) && c==d -> (MOVDconst [0])
+(XORshiftRA x (SRAconst x [c]) [d]) && c==d -> (MOVDconst [0])
+(BICshiftLL x (SLLconst x [c]) [d]) && c==d -> (MOVDconst [0])
+(BICshiftRL x (SRLconst x [c]) [d]) && c==d -> (MOVDconst [0])
+(BICshiftRA x (SRAconst x [c]) [d]) && c==d -> (MOVDconst [0])
diff --git a/src/cmd/compile/internal/ssa/gen/ARM64Ops.go b/src/cmd/compile/internal/ssa/gen/ARM64Ops.go
index e30fcd63420a26908dfb7b3768db549159a09c07..b586ec5b57a4c8f685a53450fe0a216c764d3785 100644
--- a/src/cmd/compile/internal/ssa/gen/ARM64Ops.go
+++ b/src/cmd/compile/internal/ssa/gen/ARM64Ops.go
@@ -133,11 +133,11 @@ func init() {
 	)
 	// Common regInfo
 	var (
-		gp01     = regInfo{inputs: nil, outputs: []regMask{gp}}
-		gp11     = regInfo{inputs: []regMask{gpg}, outputs: []regMask{gp}}
-		gp11sp   = regInfo{inputs: []regMask{gpspg}, outputs: []regMask{gp}}
-		gp1flags = regInfo{inputs: []regMask{gpg}}
-		//gp1flags1 = regInfo{inputs: []regMask{gp}, outputs: []regMask{gp}}
+		gp01      = regInfo{inputs: nil, outputs: []regMask{gp}}
+		gp11      = regInfo{inputs: []regMask{gpg}, outputs: []regMask{gp}}
+		gp11sp    = regInfo{inputs: []regMask{gpspg}, outputs: []regMask{gp}}
+		gp1flags  = regInfo{inputs: []regMask{gpg}}
+		gp1flags1 = regInfo{inputs: []regMask{gpg}, outputs: []regMask{gp}}
 		gp21      = regInfo{inputs: []regMask{gpg, gpg}, outputs: []regMask{gp}}
 		gp2flags  = regInfo{inputs: []regMask{gpg, gpg}}
 		gp2flags1 = regInfo{inputs: []regMask{gp, gp}, outputs: []regMask{gp}}
@@ -145,8 +145,9 @@ func init() {
 		//gp31      = regInfo{inputs: []regMask{gp, gp, gp}, outputs: []regMask{gp}}
 		//gp3flags  = regInfo{inputs: []regMask{gp, gp, gp}}
 		//gp3flags1 = regInfo{inputs: []regMask{gp, gp, gp}, outputs: []regMask{gp}}
-		gpload  = regInfo{inputs: []regMask{gpspsbg}, outputs: []regMask{gp}}
-		gpstore = regInfo{inputs: []regMask{gpspsbg, gpg}}
+		gpload   = regInfo{inputs: []regMask{gpspsbg}, outputs: []regMask{gp}}
+		gpstore  = regInfo{inputs: []regMask{gpspsbg, gpg}}
+		gpstore0 = regInfo{inputs: []regMask{gpspsbg}}
 		//gp2load   = regInfo{inputs: []regMask{gpspsbg, gpg}, outputs: []regMask{gp}}
 		//gp2store  = regInfo{inputs: []regMask{gpspsbg, gpg, gpg}}
 		fp01 = regInfo{inputs: nil, outputs: []regMask{fp}}
@@ -228,6 +229,29 @@ func init() {
 		{name: "FCMPS", argLength: 2, reg: fp2flags, asm: "FCMPS", typ: "Flags"},                  // arg0 compare to arg1, float32
 		{name: "FCMPD", argLength: 2, reg: fp2flags, asm: "FCMPD", typ: "Flags"},                  // arg0 compare to arg1, float64
 
+		// shifted ops
+		{name: "ADDshiftLL", argLength: 2, reg: gp21, asm: "ADD", aux: "Int64"},                   // arg0 + arg1<<auxInt
+		{name: "ADDshiftRL", argLength: 2, reg: gp21, asm: "ADD", aux: "Int64"},                   // arg0 + arg1>>auxInt, unsigned shift
+		{name: "ADDshiftRA", argLength: 2, reg: gp21, asm: "ADD", aux: "Int64"},                   // arg0 + arg1>>auxInt, signed shift
+		{name: "SUBshiftLL", argLength: 2, reg: gp21, asm: "SUB", aux: "Int64"},                   // arg0 - arg1<<auxInt
+		{name: "SUBshiftRL", argLength: 2, reg: gp21, asm: "SUB", aux: "Int64"},                   // arg0 - arg1>>auxInt, unsigned shift
+		{name: "SUBshiftRA", argLength: 2, reg: gp21, asm: "SUB", aux: "Int64"},                   // arg0 - arg1>>auxInt, signed shift
+		{name: "ANDshiftLL", argLength: 2, reg: gp21, asm: "AND", aux: "Int64"},                   // arg0 & (arg1<<auxInt)
+		{name: "ANDshiftRL", argLength: 2, reg: gp21, asm: "AND", aux: "Int64"},                   // arg0 & (arg1>>auxInt), unsigned shift
+		{name: "ANDshiftRA", argLength: 2, reg: gp21, asm: "AND", aux: "Int64"},                   // arg0 & (arg1>>auxInt), signed shift
+		{name: "ORshiftLL", argLength: 2, reg: gp21, asm: "ORR", aux: "Int64"},                    // arg0 | arg1<<auxInt
+		{name: "ORshiftRL", argLength: 2, reg: gp21, asm: "ORR", aux: "Int64"},                    // arg0 | arg1>>auxInt, unsigned shift
+		{name: "ORshiftRA", argLength: 2, reg: gp21, asm: "ORR", aux: "Int64"},                    // arg0 | arg1>>auxInt, signed shift
+		{name: "XORshiftLL", argLength: 2, reg: gp21, asm: "EOR", aux: "Int64"},                   // arg0 ^ arg1<<auxInt
+		{name: "XORshiftRL", argLength: 2, reg: gp21, asm: "EOR", aux: "Int64"},                   // arg0 ^ arg1>>auxInt, unsigned shift
+		{name: "XORshiftRA", argLength: 2, reg: gp21, asm: "EOR", aux: "Int64"},                   // arg0 ^ arg1>>auxInt, signed shift
+		{name: "BICshiftLL", argLength: 2, reg: gp21, asm: "BIC", aux: "Int64"},                   // arg0 &^ (arg1<<auxInt)
+		{name: "BICshiftRL", argLength: 2, reg: gp21, asm: "BIC", aux: "Int64"},                   // arg0 &^ (arg1>>auxInt), unsigned shift
+		{name: "BICshiftRA", argLength: 2, reg: gp21, asm: "BIC", aux: "Int64"},                   // arg0 &^ (arg1>>auxInt), signed shift
+		{name: "CMPshiftLL", argLength: 2, reg: gp2flags, asm: "CMP", aux: "Int64", typ: "Flags"}, // arg0 compare to arg1<<auxInt
+		{name: "CMPshiftRL", argLength: 2, reg: gp2flags, asm: "CMP", aux: "Int64", typ: "Flags"}, // arg0 compare to arg1>>auxInt, unsigned shift
+		{name: "CMPshiftRA", argLength: 2, reg: gp2flags, asm: "CMP", aux: "Int64", typ: "Flags"}, // arg0 compare to arg1>>auxInt, signed shift
+
 		// moves
 		{name: "MOVDconst", argLength: 0, reg: gp01, aux: "Int64", asm: "MOVD", typ: "UInt64", rematerializeable: true},      // 32 low bits of auxint
 		{name: "FMOVSconst", argLength: 0, reg: fp01, aux: "Float64", asm: "FMOVS", typ: "Float32", rematerializeable: true}, // auxint as 64-bit float, convert to 32-bit float
@@ -252,6 +276,11 @@ func init() {
 		{name: "FMOVSstore", argLength: 3, reg: fpstore, aux: "SymOff", asm: "FMOVS", typ: "Mem"}, // store 4 bytes of arg1 to arg0 + auxInt + aux.  arg2=mem.
 		{name: "FMOVDstore", argLength: 3, reg: fpstore, aux: "SymOff", asm: "FMOVD", typ: "Mem"}, // store 8 bytes of arg1 to arg0 + auxInt + aux.  arg2=mem.
 
+		{name: "MOVBstorezero", argLength: 2, reg: gpstore0, aux: "SymOff", asm: "MOVB", typ: "Mem"}, // store 1 byte of zero to arg0 + auxInt + aux.  arg1=mem.
+		{name: "MOVHstorezero", argLength: 2, reg: gpstore0, aux: "SymOff", asm: "MOVH", typ: "Mem"}, // store 2 bytes of zero to arg0 + auxInt + aux.  arg1=mem.
+		{name: "MOVWstorezero", argLength: 2, reg: gpstore0, aux: "SymOff", asm: "MOVW", typ: "Mem"}, // store 4 bytes of zero to arg0 + auxInt + aux.  arg1=mem.
+		{name: "MOVDstorezero", argLength: 2, reg: gpstore0, aux: "SymOff", asm: "MOVD", typ: "Mem"}, // store 8 bytes of zero to arg0 + auxInt + aux.  ar12=mem.
+
 		// conversions
 		{name: "MOVBreg", argLength: 1, reg: gp11, asm: "MOVB"},   // move from arg0, sign-extended from byte
 		{name: "MOVBUreg", argLength: 1, reg: gp11, asm: "MOVBU"}, // move from arg0, unsign-extended from byte
@@ -283,7 +312,8 @@ func init() {
 		{name: "FCVTDS", argLength: 1, reg: fp11, asm: "FCVTDS"},     // float64 -> float32
 
 		// conditional instructions
-		{name: "CSELULT", argLength: 3, reg: gp2flags1, asm: "CSEL"}, // returns arg0 if flags indicates unsigned LT, arg1 otherwise, arg2=flags
+		{name: "CSELULT", argLength: 3, reg: gp2flags1, asm: "CSEL"},  // returns arg0 if flags indicates unsigned LT, arg1 otherwise, arg2=flags
+		{name: "CSELULT0", argLength: 2, reg: gp1flags1, asm: "CSEL"}, // returns arg0 if flags indicates unsigned LT, 0 otherwise, arg1=flags
 
 		// function calls
 		{name: "CALLstatic", argLength: 1, reg: regInfo{clobbers: callerSave}, aux: "SymOff", clobberFlags: true},                                              // call static function aux.(*gc.Sym).  arg0=mem, auxint=argsize, returns mem
diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go
index 75381c4246dfc005213c538ccbc2c48c9f64a140..8fa816d0755ff276a3e2627e20d77c315ef62b3d 100644
--- a/src/cmd/compile/internal/ssa/opGen.go
+++ b/src/cmd/compile/internal/ssa/opGen.go
@@ -859,6 +859,27 @@ const (
 	OpARM64CMNWconst
 	OpARM64FCMPS
 	OpARM64FCMPD
+	OpARM64ADDshiftLL
+	OpARM64ADDshiftRL
+	OpARM64ADDshiftRA
+	OpARM64SUBshiftLL
+	OpARM64SUBshiftRL
+	OpARM64SUBshiftRA
+	OpARM64ANDshiftLL
+	OpARM64ANDshiftRL
+	OpARM64ANDshiftRA
+	OpARM64ORshiftLL
+	OpARM64ORshiftRL
+	OpARM64ORshiftRA
+	OpARM64XORshiftLL
+	OpARM64XORshiftRL
+	OpARM64XORshiftRA
+	OpARM64BICshiftLL
+	OpARM64BICshiftRL
+	OpARM64BICshiftRA
+	OpARM64CMPshiftLL
+	OpARM64CMPshiftRL
+	OpARM64CMPshiftRA
 	OpARM64MOVDconst
 	OpARM64FMOVSconst
 	OpARM64FMOVDconst
@@ -878,6 +899,10 @@ const (
 	OpARM64MOVDstore
 	OpARM64FMOVSstore
 	OpARM64FMOVDstore
+	OpARM64MOVBstorezero
+	OpARM64MOVHstorezero
+	OpARM64MOVWstorezero
+	OpARM64MOVDstorezero
 	OpARM64MOVBreg
 	OpARM64MOVBUreg
 	OpARM64MOVHreg
@@ -905,6 +930,7 @@ const (
 	OpARM64FCVTSD
 	OpARM64FCVTDS
 	OpARM64CSELULT
+	OpARM64CSELULT0
 	OpARM64CALLstatic
 	OpARM64CALLclosure
 	OpARM64CALLdefer
@@ -10596,6 +10622,312 @@ var opcodeTable = [...]opInfo{
 			},
 		},
 	},
+	{
+		name:    "ADDshiftLL",
+		auxType: auxInt64,
+		argLen:  2,
+		asm:     arm64.AADD,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 268173311}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g
+				{1, 268173311}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g
+			},
+			outputs: []outputInfo{
+				{0, 133955583}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26
+			},
+		},
+	},
+	{
+		name:    "ADDshiftRL",
+		auxType: auxInt64,
+		argLen:  2,
+		asm:     arm64.AADD,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 268173311}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g
+				{1, 268173311}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g
+			},
+			outputs: []outputInfo{
+				{0, 133955583}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26
+			},
+		},
+	},
+	{
+		name:    "ADDshiftRA",
+		auxType: auxInt64,
+		argLen:  2,
+		asm:     arm64.AADD,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 268173311}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g
+				{1, 268173311}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g
+			},
+			outputs: []outputInfo{
+				{0, 133955583}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26
+			},
+		},
+	},
+	{
+		name:    "SUBshiftLL",
+		auxType: auxInt64,
+		argLen:  2,
+		asm:     arm64.ASUB,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 268173311}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g
+				{1, 268173311}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g
+			},
+			outputs: []outputInfo{
+				{0, 133955583}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26
+			},
+		},
+	},
+	{
+		name:    "SUBshiftRL",
+		auxType: auxInt64,
+		argLen:  2,
+		asm:     arm64.ASUB,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 268173311}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g
+				{1, 268173311}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g
+			},
+			outputs: []outputInfo{
+				{0, 133955583}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26
+			},
+		},
+	},
+	{
+		name:    "SUBshiftRA",
+		auxType: auxInt64,
+		argLen:  2,
+		asm:     arm64.ASUB,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 268173311}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g
+				{1, 268173311}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g
+			},
+			outputs: []outputInfo{
+				{0, 133955583}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26
+			},
+		},
+	},
+	{
+		name:    "ANDshiftLL",
+		auxType: auxInt64,
+		argLen:  2,
+		asm:     arm64.AAND,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 268173311}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g
+				{1, 268173311}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g
+			},
+			outputs: []outputInfo{
+				{0, 133955583}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26
+			},
+		},
+	},
+	{
+		name:    "ANDshiftRL",
+		auxType: auxInt64,
+		argLen:  2,
+		asm:     arm64.AAND,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 268173311}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g
+				{1, 268173311}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g
+			},
+			outputs: []outputInfo{
+				{0, 133955583}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26
+			},
+		},
+	},
+	{
+		name:    "ANDshiftRA",
+		auxType: auxInt64,
+		argLen:  2,
+		asm:     arm64.AAND,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 268173311}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g
+				{1, 268173311}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g
+			},
+			outputs: []outputInfo{
+				{0, 133955583}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26
+			},
+		},
+	},
+	{
+		name:    "ORshiftLL",
+		auxType: auxInt64,
+		argLen:  2,
+		asm:     arm64.AORR,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 268173311}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g
+				{1, 268173311}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g
+			},
+			outputs: []outputInfo{
+				{0, 133955583}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26
+			},
+		},
+	},
+	{
+		name:    "ORshiftRL",
+		auxType: auxInt64,
+		argLen:  2,
+		asm:     arm64.AORR,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 268173311}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g
+				{1, 268173311}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g
+			},
+			outputs: []outputInfo{
+				{0, 133955583}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26
+			},
+		},
+	},
+	{
+		name:    "ORshiftRA",
+		auxType: auxInt64,
+		argLen:  2,
+		asm:     arm64.AORR,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 268173311}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g
+				{1, 268173311}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g
+			},
+			outputs: []outputInfo{
+				{0, 133955583}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26
+			},
+		},
+	},
+	{
+		name:    "XORshiftLL",
+		auxType: auxInt64,
+		argLen:  2,
+		asm:     arm64.AEOR,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 268173311}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g
+				{1, 268173311}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g
+			},
+			outputs: []outputInfo{
+				{0, 133955583}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26
+			},
+		},
+	},
+	{
+		name:    "XORshiftRL",
+		auxType: auxInt64,
+		argLen:  2,
+		asm:     arm64.AEOR,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 268173311}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g
+				{1, 268173311}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g
+			},
+			outputs: []outputInfo{
+				{0, 133955583}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26
+			},
+		},
+	},
+	{
+		name:    "XORshiftRA",
+		auxType: auxInt64,
+		argLen:  2,
+		asm:     arm64.AEOR,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 268173311}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g
+				{1, 268173311}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g
+			},
+			outputs: []outputInfo{
+				{0, 133955583}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26
+			},
+		},
+	},
+	{
+		name:    "BICshiftLL",
+		auxType: auxInt64,
+		argLen:  2,
+		asm:     arm64.ABIC,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 268173311}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g
+				{1, 268173311}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g
+			},
+			outputs: []outputInfo{
+				{0, 133955583}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26
+			},
+		},
+	},
+	{
+		name:    "BICshiftRL",
+		auxType: auxInt64,
+		argLen:  2,
+		asm:     arm64.ABIC,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 268173311}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g
+				{1, 268173311}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g
+			},
+			outputs: []outputInfo{
+				{0, 133955583}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26
+			},
+		},
+	},
+	{
+		name:    "BICshiftRA",
+		auxType: auxInt64,
+		argLen:  2,
+		asm:     arm64.ABIC,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 268173311}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g
+				{1, 268173311}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g
+			},
+			outputs: []outputInfo{
+				{0, 133955583}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26
+			},
+		},
+	},
+	{
+		name:    "CMPshiftLL",
+		auxType: auxInt64,
+		argLen:  2,
+		asm:     arm64.ACMP,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 268173311}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g
+				{1, 268173311}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g
+			},
+		},
+	},
+	{
+		name:    "CMPshiftRL",
+		auxType: auxInt64,
+		argLen:  2,
+		asm:     arm64.ACMP,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 268173311}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g
+				{1, 268173311}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g
+			},
+		},
+	},
+	{
+		name:    "CMPshiftRA",
+		auxType: auxInt64,
+		argLen:  2,
+		asm:     arm64.ACMP,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 268173311}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g
+				{1, 268173311}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g
+			},
+		},
+	},
 	{
 		name:              "MOVDconst",
 		auxType:           auxInt64,
@@ -10845,6 +11177,50 @@ var opcodeTable = [...]opInfo{
 			},
 		},
 	},
+	{
+		name:    "MOVBstorezero",
+		auxType: auxSymOff,
+		argLen:  2,
+		asm:     arm64.AMOVB,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 4611686019232432127}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g SP SB
+			},
+		},
+	},
+	{
+		name:    "MOVHstorezero",
+		auxType: auxSymOff,
+		argLen:  2,
+		asm:     arm64.AMOVH,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 4611686019232432127}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g SP SB
+			},
+		},
+	},
+	{
+		name:    "MOVWstorezero",
+		auxType: auxSymOff,
+		argLen:  2,
+		asm:     arm64.AMOVW,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 4611686019232432127}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g SP SB
+			},
+		},
+	},
+	{
+		name:    "MOVDstorezero",
+		auxType: auxSymOff,
+		argLen:  2,
+		asm:     arm64.AMOVD,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 4611686019232432127}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g SP SB
+			},
+		},
+	},
 	{
 		name:   "MOVBreg",
 		argLen: 1,
@@ -11197,6 +11573,19 @@ var opcodeTable = [...]opInfo{
 			},
 		},
 	},
+	{
+		name:   "CSELULT0",
+		argLen: 2,
+		asm:    arm64.ACSEL,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 268173311}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g
+			},
+			outputs: []outputInfo{
+				{0, 133955583}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26
+			},
+		},
+	},
 	{
 		name:         "CALLstatic",
 		auxType:      auxSymOff,
diff --git a/src/cmd/compile/internal/ssa/rewriteARM64.go b/src/cmd/compile/internal/ssa/rewriteARM64.go
index 6350d1d6b01aa12d8a0c7bcf709ca0d7a39a6db6..318718d652e5fc1e540846bbc88c6c74ca13def0 100644
--- a/src/cmd/compile/internal/ssa/rewriteARM64.go
+++ b/src/cmd/compile/internal/ssa/rewriteARM64.go
@@ -12,14 +12,32 @@ func rewriteValueARM64(v *Value, config *Config) bool {
 		return rewriteValueARM64_OpARM64ADD(v, config)
 	case OpARM64ADDconst:
 		return rewriteValueARM64_OpARM64ADDconst(v, config)
+	case OpARM64ADDshiftLL:
+		return rewriteValueARM64_OpARM64ADDshiftLL(v, config)
+	case OpARM64ADDshiftRA:
+		return rewriteValueARM64_OpARM64ADDshiftRA(v, config)
+	case OpARM64ADDshiftRL:
+		return rewriteValueARM64_OpARM64ADDshiftRL(v, config)
 	case OpARM64AND:
 		return rewriteValueARM64_OpARM64AND(v, config)
 	case OpARM64ANDconst:
 		return rewriteValueARM64_OpARM64ANDconst(v, config)
+	case OpARM64ANDshiftLL:
+		return rewriteValueARM64_OpARM64ANDshiftLL(v, config)
+	case OpARM64ANDshiftRA:
+		return rewriteValueARM64_OpARM64ANDshiftRA(v, config)
+	case OpARM64ANDshiftRL:
+		return rewriteValueARM64_OpARM64ANDshiftRL(v, config)
 	case OpARM64BIC:
 		return rewriteValueARM64_OpARM64BIC(v, config)
 	case OpARM64BICconst:
 		return rewriteValueARM64_OpARM64BICconst(v, config)
+	case OpARM64BICshiftLL:
+		return rewriteValueARM64_OpARM64BICshiftLL(v, config)
+	case OpARM64BICshiftRA:
+		return rewriteValueARM64_OpARM64BICshiftRA(v, config)
+	case OpARM64BICshiftRL:
+		return rewriteValueARM64_OpARM64BICshiftRL(v, config)
 	case OpARM64CMP:
 		return rewriteValueARM64_OpARM64CMP(v, config)
 	case OpARM64CMPW:
@@ -28,8 +46,16 @@ func rewriteValueARM64(v *Value, config *Config) bool {
 		return rewriteValueARM64_OpARM64CMPWconst(v, config)
 	case OpARM64CMPconst:
 		return rewriteValueARM64_OpARM64CMPconst(v, config)
+	case OpARM64CMPshiftLL:
+		return rewriteValueARM64_OpARM64CMPshiftLL(v, config)
+	case OpARM64CMPshiftRA:
+		return rewriteValueARM64_OpARM64CMPshiftRA(v, config)
+	case OpARM64CMPshiftRL:
+		return rewriteValueARM64_OpARM64CMPshiftRL(v, config)
 	case OpARM64CSELULT:
 		return rewriteValueARM64_OpARM64CSELULT(v, config)
+	case OpARM64CSELULT0:
+		return rewriteValueARM64_OpARM64CSELULT0(v, config)
 	case OpARM64DIV:
 		return rewriteValueARM64_OpARM64DIV(v, config)
 	case OpARM64DIVW:
@@ -74,12 +100,16 @@ func rewriteValueARM64(v *Value, config *Config) bool {
 		return rewriteValueARM64_OpARM64MOVBreg(v, config)
 	case OpARM64MOVBstore:
 		return rewriteValueARM64_OpARM64MOVBstore(v, config)
+	case OpARM64MOVBstorezero:
+		return rewriteValueARM64_OpARM64MOVBstorezero(v, config)
 	case OpARM64MOVDload:
 		return rewriteValueARM64_OpARM64MOVDload(v, config)
 	case OpARM64MOVDreg:
 		return rewriteValueARM64_OpARM64MOVDreg(v, config)
 	case OpARM64MOVDstore:
 		return rewriteValueARM64_OpARM64MOVDstore(v, config)
+	case OpARM64MOVDstorezero:
+		return rewriteValueARM64_OpARM64MOVDstorezero(v, config)
 	case OpARM64MOVHUload:
 		return rewriteValueARM64_OpARM64MOVHUload(v, config)
 	case OpARM64MOVHUreg:
@@ -90,6 +120,8 @@ func rewriteValueARM64(v *Value, config *Config) bool {
 		return rewriteValueARM64_OpARM64MOVHreg(v, config)
 	case OpARM64MOVHstore:
 		return rewriteValueARM64_OpARM64MOVHstore(v, config)
+	case OpARM64MOVHstorezero:
+		return rewriteValueARM64_OpARM64MOVHstorezero(v, config)
 	case OpARM64MOVWUload:
 		return rewriteValueARM64_OpARM64MOVWUload(v, config)
 	case OpARM64MOVWUreg:
@@ -100,6 +132,8 @@ func rewriteValueARM64(v *Value, config *Config) bool {
 		return rewriteValueARM64_OpARM64MOVWreg(v, config)
 	case OpARM64MOVWstore:
 		return rewriteValueARM64_OpARM64MOVWstore(v, config)
+	case OpARM64MOVWstorezero:
+		return rewriteValueARM64_OpARM64MOVWstorezero(v, config)
 	case OpARM64MUL:
 		return rewriteValueARM64_OpARM64MUL(v, config)
 	case OpARM64MULW:
@@ -114,6 +148,12 @@ func rewriteValueARM64(v *Value, config *Config) bool {
 		return rewriteValueARM64_OpARM64OR(v, config)
 	case OpARM64ORconst:
 		return rewriteValueARM64_OpARM64ORconst(v, config)
+	case OpARM64ORshiftLL:
+		return rewriteValueARM64_OpARM64ORshiftLL(v, config)
+	case OpARM64ORshiftRA:
+		return rewriteValueARM64_OpARM64ORshiftRA(v, config)
+	case OpARM64ORshiftRL:
+		return rewriteValueARM64_OpARM64ORshiftRL(v, config)
 	case OpARM64SLL:
 		return rewriteValueARM64_OpARM64SLL(v, config)
 	case OpARM64SLLconst:
@@ -130,6 +170,12 @@ func rewriteValueARM64(v *Value, config *Config) bool {
 		return rewriteValueARM64_OpARM64SUB(v, config)
 	case OpARM64SUBconst:
 		return rewriteValueARM64_OpARM64SUBconst(v, config)
+	case OpARM64SUBshiftLL:
+		return rewriteValueARM64_OpARM64SUBshiftLL(v, config)
+	case OpARM64SUBshiftRA:
+		return rewriteValueARM64_OpARM64SUBshiftRA(v, config)
+	case OpARM64SUBshiftRL:
+		return rewriteValueARM64_OpARM64SUBshiftRL(v, config)
 	case OpARM64UDIV:
 		return rewriteValueARM64_OpARM64UDIV(v, config)
 	case OpARM64UDIVW:
@@ -142,6 +188,12 @@ func rewriteValueARM64(v *Value, config *Config) bool {
 		return rewriteValueARM64_OpARM64XOR(v, config)
 	case OpARM64XORconst:
 		return rewriteValueARM64_OpARM64XORconst(v, config)
+	case OpARM64XORshiftLL:
+		return rewriteValueARM64_OpARM64XORshiftLL(v, config)
+	case OpARM64XORshiftRA:
+		return rewriteValueARM64_OpARM64XORshiftRA(v, config)
+	case OpARM64XORshiftRL:
+		return rewriteValueARM64_OpARM64XORshiftRL(v, config)
 	case OpAdd16:
 		return rewriteValueARM64_OpAdd16(v, config)
 	case OpAdd32:
@@ -684,6 +736,108 @@ func rewriteValueARM64_OpARM64ADD(v *Value, config *Config) bool {
 		v.AddArg(y)
 		return true
 	}
+	// match: (ADD x (SLLconst [c] y))
+	// cond:
+	// result: (ADDshiftLL x y [c])
+	for {
+		x := v.Args[0]
+		v_1 := v.Args[1]
+		if v_1.Op != OpARM64SLLconst {
+			break
+		}
+		c := v_1.AuxInt
+		y := v_1.Args[0]
+		v.reset(OpARM64ADDshiftLL)
+		v.AuxInt = c
+		v.AddArg(x)
+		v.AddArg(y)
+		return true
+	}
+	// match: (ADD (SLLconst [c] y) x)
+	// cond:
+	// result: (ADDshiftLL x y [c])
+	for {
+		v_0 := v.Args[0]
+		if v_0.Op != OpARM64SLLconst {
+			break
+		}
+		c := v_0.AuxInt
+		y := v_0.Args[0]
+		x := v.Args[1]
+		v.reset(OpARM64ADDshiftLL)
+		v.AuxInt = c
+		v.AddArg(x)
+		v.AddArg(y)
+		return true
+	}
+	// match: (ADD x (SRLconst [c] y))
+	// cond:
+	// result: (ADDshiftRL x y [c])
+	for {
+		x := v.Args[0]
+		v_1 := v.Args[1]
+		if v_1.Op != OpARM64SRLconst {
+			break
+		}
+		c := v_1.AuxInt
+		y := v_1.Args[0]
+		v.reset(OpARM64ADDshiftRL)
+		v.AuxInt = c
+		v.AddArg(x)
+		v.AddArg(y)
+		return true
+	}
+	// match: (ADD (SRLconst [c] y) x)
+	// cond:
+	// result: (ADDshiftRL x y [c])
+	for {
+		v_0 := v.Args[0]
+		if v_0.Op != OpARM64SRLconst {
+			break
+		}
+		c := v_0.AuxInt
+		y := v_0.Args[0]
+		x := v.Args[1]
+		v.reset(OpARM64ADDshiftRL)
+		v.AuxInt = c
+		v.AddArg(x)
+		v.AddArg(y)
+		return true
+	}
+	// match: (ADD x (SRAconst [c] y))
+	// cond:
+	// result: (ADDshiftRA x y [c])
+	for {
+		x := v.Args[0]
+		v_1 := v.Args[1]
+		if v_1.Op != OpARM64SRAconst {
+			break
+		}
+		c := v_1.AuxInt
+		y := v_1.Args[0]
+		v.reset(OpARM64ADDshiftRA)
+		v.AuxInt = c
+		v.AddArg(x)
+		v.AddArg(y)
+		return true
+	}
+	// match: (ADD (SRAconst [c] y) x)
+	// cond:
+	// result: (ADDshiftRA x y [c])
+	for {
+		v_0 := v.Args[0]
+		if v_0.Op != OpARM64SRAconst {
+			break
+		}
+		c := v_0.AuxInt
+		y := v_0.Args[0]
+		x := v.Args[1]
+		v.reset(OpARM64ADDshiftRA)
+		v.AuxInt = c
+		v.AddArg(x)
+		v.AddArg(y)
+		return true
+	}
 	return false
 }
 func rewriteValueARM64_OpARM64ADDconst(v *Value, config *Config) bool {
@@ -768,6 +922,126 @@ func rewriteValueARM64_OpARM64ADDconst(v *Value, config *Config) bool {
 	}
 	return false
 }
+func rewriteValueARM64_OpARM64ADDshiftLL(v *Value, config *Config) bool {
+	b := v.Block
+	_ = b
+	// match: (ADDshiftLL (MOVDconst [c]) x [d])
+	// cond:
+	// result: (ADDconst [c] (SLLconst <x.Type> x [d]))
+	for {
+		d := v.AuxInt
+		v_0 := v.Args[0]
+		if v_0.Op != OpARM64MOVDconst {
+			break
+		}
+		c := v_0.AuxInt
+		x := v.Args[1]
+		v.reset(OpARM64ADDconst)
+		v.AuxInt = c
+		v0 := b.NewValue0(v.Line, OpARM64SLLconst, x.Type)
+		v0.AuxInt = d
+		v0.AddArg(x)
+		v.AddArg(v0)
+		return true
+	}
+	// match: (ADDshiftLL x (MOVDconst [c]) [d])
+	// cond:
+	// result: (ADDconst x [int64(uint64(c)<<uint64(d))])
+	for {
+		d := v.AuxInt
+		x := v.Args[0]
+		v_1 := v.Args[1]
+		if v_1.Op != OpARM64MOVDconst {
+			break
+		}
+		c := v_1.AuxInt
+		v.reset(OpARM64ADDconst)
+		v.AuxInt = int64(uint64(c) << uint64(d))
+		v.AddArg(x)
+		return true
+	}
+	return false
+}
+func rewriteValueARM64_OpARM64ADDshiftRA(v *Value, config *Config) bool {
+	b := v.Block
+	_ = b
+	// match: (ADDshiftRA (MOVDconst [c]) x [d])
+	// cond:
+	// result: (ADDconst [c] (SRAconst <x.Type> x [d]))
+	for {
+		d := v.AuxInt
+		v_0 := v.Args[0]
+		if v_0.Op != OpARM64MOVDconst {
+			break
+		}
+		c := v_0.AuxInt
+		x := v.Args[1]
+		v.reset(OpARM64ADDconst)
+		v.AuxInt = c
+		v0 := b.NewValue0(v.Line, OpARM64SRAconst, x.Type)
+		v0.AuxInt = d
+		v0.AddArg(x)
+		v.AddArg(v0)
+		return true
+	}
+	// match: (ADDshiftRA x (MOVDconst [c]) [d])
+	// cond:
+	// result: (ADDconst x [int64(int64(c)>>uint64(d))])
+	for {
+		d := v.AuxInt
+		x := v.Args[0]
+		v_1 := v.Args[1]
+		if v_1.Op != OpARM64MOVDconst {
+			break
+		}
+		c := v_1.AuxInt
+		v.reset(OpARM64ADDconst)
+		v.AuxInt = int64(int64(c) >> uint64(d))
+		v.AddArg(x)
+		return true
+	}
+	return false
+}
+func rewriteValueARM64_OpARM64ADDshiftRL(v *Value, config *Config) bool {
+	b := v.Block
+	_ = b
+	// match: (ADDshiftRL (MOVDconst [c]) x [d])
+	// cond:
+	// result: (ADDconst [c] (SRLconst <x.Type> x [d]))
+	for {
+		d := v.AuxInt
+		v_0 := v.Args[0]
+		if v_0.Op != OpARM64MOVDconst {
+			break
+		}
+		c := v_0.AuxInt
+		x := v.Args[1]
+		v.reset(OpARM64ADDconst)
+		v.AuxInt = c
+		v0 := b.NewValue0(v.Line, OpARM64SRLconst, x.Type)
+		v0.AuxInt = d
+		v0.AddArg(x)
+		v.AddArg(v0)
+		return true
+	}
+	// match: (ADDshiftRL x (MOVDconst [c]) [d])
+	// cond:
+	// result: (ADDconst x [int64(uint64(c)>>uint64(d))])
+	for {
+		d := v.AuxInt
+		x := v.Args[0]
+		v_1 := v.Args[1]
+		if v_1.Op != OpARM64MOVDconst {
+			break
+		}
+		c := v_1.AuxInt
+		v.reset(OpARM64ADDconst)
+		v.AuxInt = int64(uint64(c) >> uint64(d))
+		v.AddArg(x)
+		return true
+	}
+	return false
+}
 func rewriteValueARM64_OpARM64AND(v *Value, config *Config) bool {
 	b := v.Block
 	_ = b
@@ -829,6 +1103,108 @@ func rewriteValueARM64_OpARM64AND(v *Value, config *Config) bool {
 		v.AddArg(y)
 		return true
 	}
+	// match: (AND x (SLLconst [c] y))
+	// cond:
+	// result: (ANDshiftLL x y [c])
+	for {
+		x := v.Args[0]
+		v_1 := v.Args[1]
+		if v_1.Op != OpARM64SLLconst {
+			break
+		}
+		c := v_1.AuxInt
+		y := v_1.Args[0]
+		v.reset(OpARM64ANDshiftLL)
+		v.AuxInt = c
+		v.AddArg(x)
+		v.AddArg(y)
+		return true
+	}
+	// match: (AND (SLLconst [c] y) x)
+	// cond:
+	// result: (ANDshiftLL x y [c])
+	for {
+		v_0 := v.Args[0]
+		if v_0.Op != OpARM64SLLconst {
+			break
+		}
+		c := v_0.AuxInt
+		y := v_0.Args[0]
+		x := v.Args[1]
+		v.reset(OpARM64ANDshiftLL)
+		v.AuxInt = c
+		v.AddArg(x)
+		v.AddArg(y)
+		return true
+	}
+	// match: (AND x (SRLconst [c] y))
+	// cond:
+	// result: (ANDshiftRL x y [c])
+	for {
+		x := v.Args[0]
+		v_1 := v.Args[1]
+		if v_1.Op != OpARM64SRLconst {
+			break
+		}
+		c := v_1.AuxInt
+		y := v_1.Args[0]
+		v.reset(OpARM64ANDshiftRL)
+		v.AuxInt = c
+		v.AddArg(x)
+		v.AddArg(y)
+		return true
+	}
+	// match: (AND (SRLconst [c] y) x)
+	// cond:
+	// result: (ANDshiftRL x y [c])
+	for {
+		v_0 := v.Args[0]
+		if v_0.Op != OpARM64SRLconst {
+			break
+		}
+		c := v_0.AuxInt
+		y := v_0.Args[0]
+		x := v.Args[1]
+		v.reset(OpARM64ANDshiftRL)
+		v.AuxInt = c
+		v.AddArg(x)
+		v.AddArg(y)
+		return true
+	}
+	// match: (AND x (SRAconst [c] y))
+	// cond:
+	// result: (ANDshiftRA x y [c])
+	for {
+		x := v.Args[0]
+		v_1 := v.Args[1]
+		if v_1.Op != OpARM64SRAconst {
+			break
+		}
+		c := v_1.AuxInt
+		y := v_1.Args[0]
+		v.reset(OpARM64ANDshiftRA)
+		v.AuxInt = c
+		v.AddArg(x)
+		v.AddArg(y)
+		return true
+	}
+	// match: (AND (SRAconst [c] y) x)
+	// cond:
+	// result: (ANDshiftRA x y [c])
+	for {
+		v_0 := v.Args[0]
+		if v_0.Op != OpARM64SRAconst {
+			break
+		}
+		c := v_0.AuxInt
+		y := v_0.Args[0]
+		x := v.Args[1]
+		v.reset(OpARM64ANDshiftRA)
+		v.AuxInt = c
+		v.AddArg(x)
+		v.AddArg(y)
+		return true
+	}
 	return false
 }
 func rewriteValueARM64_OpARM64ANDconst(v *Value, config *Config) bool {
@@ -890,16 +1266,202 @@ func rewriteValueARM64_OpARM64ANDconst(v *Value, config *Config) bool {
 	}
 	return false
 }
-func rewriteValueARM64_OpARM64BIC(v *Value, config *Config) bool {
+func rewriteValueARM64_OpARM64ANDshiftLL(v *Value, config *Config) bool {
 	b := v.Block
 	_ = b
-	// match: (BIC x (MOVDconst [c]))
+	// match: (ANDshiftLL (MOVDconst [c]) x [d])
 	// cond:
-	// result: (BICconst [c] x)
+	// result: (ANDconst [c] (SLLconst <x.Type> x [d]))
 	for {
-		x := v.Args[0]
-		v_1 := v.Args[1]
-		if v_1.Op != OpARM64MOVDconst {
+		d := v.AuxInt
+		v_0 := v.Args[0]
+		if v_0.Op != OpARM64MOVDconst {
+			break
+		}
+		c := v_0.AuxInt
+		x := v.Args[1]
+		v.reset(OpARM64ANDconst)
+		v.AuxInt = c
+		v0 := b.NewValue0(v.Line, OpARM64SLLconst, x.Type)
+		v0.AuxInt = d
+		v0.AddArg(x)
+		v.AddArg(v0)
+		return true
+	}
+	// match: (ANDshiftLL x (MOVDconst [c]) [d])
+	// cond:
+	// result: (ANDconst x [int64(uint64(c)<<uint64(d))])
+	for {
+		d := v.AuxInt
+		x := v.Args[0]
+		v_1 := v.Args[1]
+		if v_1.Op != OpARM64MOVDconst {
+			break
+		}
+		c := v_1.AuxInt
+		v.reset(OpARM64ANDconst)
+		v.AuxInt = int64(uint64(c) << uint64(d))
+		v.AddArg(x)
+		return true
+	}
+	// match: (ANDshiftLL x y:(SLLconst x [c]) [d])
+	// cond: c==d
+	// result: y
+	for {
+		d := v.AuxInt
+		x := v.Args[0]
+		y := v.Args[1]
+		if y.Op != OpARM64SLLconst {
+			break
+		}
+		c := y.AuxInt
+		if x != y.Args[0] {
+			break
+		}
+		if !(c == d) {
+			break
+		}
+		v.reset(OpCopy)
+		v.Type = y.Type
+		v.AddArg(y)
+		return true
+	}
+	return false
+}
+func rewriteValueARM64_OpARM64ANDshiftRA(v *Value, config *Config) bool {
+	b := v.Block
+	_ = b
+	// match: (ANDshiftRA (MOVDconst [c]) x [d])
+	// cond:
+	// result: (ANDconst [c] (SRAconst <x.Type> x [d]))
+	for {
+		d := v.AuxInt
+		v_0 := v.Args[0]
+		if v_0.Op != OpARM64MOVDconst {
+			break
+		}
+		c := v_0.AuxInt
+		x := v.Args[1]
+		v.reset(OpARM64ANDconst)
+		v.AuxInt = c
+		v0 := b.NewValue0(v.Line, OpARM64SRAconst, x.Type)
+		v0.AuxInt = d
+		v0.AddArg(x)
+		v.AddArg(v0)
+		return true
+	}
+	// match: (ANDshiftRA x (MOVDconst [c]) [d])
+	// cond:
+	// result: (ANDconst x [int64(int64(c)>>uint64(d))])
+	for {
+		d := v.AuxInt
+		x := v.Args[0]
+		v_1 := v.Args[1]
+		if v_1.Op != OpARM64MOVDconst {
+			break
+		}
+		c := v_1.AuxInt
+		v.reset(OpARM64ANDconst)
+		v.AuxInt = int64(int64(c) >> uint64(d))
+		v.AddArg(x)
+		return true
+	}
+	// match: (ANDshiftRA x y:(SRAconst x [c]) [d])
+	// cond: c==d
+	// result: y
+	for {
+		d := v.AuxInt
+		x := v.Args[0]
+		y := v.Args[1]
+		if y.Op != OpARM64SRAconst {
+			break
+		}
+		c := y.AuxInt
+		if x != y.Args[0] {
+			break
+		}
+		if !(c == d) {
+			break
+		}
+		v.reset(OpCopy)
+		v.Type = y.Type
+		v.AddArg(y)
+		return true
+	}
+	return false
+}
+func rewriteValueARM64_OpARM64ANDshiftRL(v *Value, config *Config) bool {
+	b := v.Block
+	_ = b
+	// match: (ANDshiftRL (MOVDconst [c]) x [d])
+	// cond:
+	// result: (ANDconst [c] (SRLconst <x.Type> x [d]))
+	for {
+		d := v.AuxInt
+		v_0 := v.Args[0]
+		if v_0.Op != OpARM64MOVDconst {
+			break
+		}
+		c := v_0.AuxInt
+		x := v.Args[1]
+		v.reset(OpARM64ANDconst)
+		v.AuxInt = c
+		v0 := b.NewValue0(v.Line, OpARM64SRLconst, x.Type)
+		v0.AuxInt = d
+		v0.AddArg(x)
+		v.AddArg(v0)
+		return true
+	}
+	// match: (ANDshiftRL x (MOVDconst [c]) [d])
+	// cond:
+	// result: (ANDconst x [int64(uint64(c)>>uint64(d))])
+	for {
+		d := v.AuxInt
+		x := v.Args[0]
+		v_1 := v.Args[1]
+		if v_1.Op != OpARM64MOVDconst {
+			break
+		}
+		c := v_1.AuxInt
+		v.reset(OpARM64ANDconst)
+		v.AuxInt = int64(uint64(c) >> uint64(d))
+		v.AddArg(x)
+		return true
+	}
+	// match: (ANDshiftRL x y:(SRLconst x [c]) [d])
+	// cond: c==d
+	// result: y
+	for {
+		d := v.AuxInt
+		x := v.Args[0]
+		y := v.Args[1]
+		if y.Op != OpARM64SRLconst {
+			break
+		}
+		c := y.AuxInt
+		if x != y.Args[0] {
+			break
+		}
+		if !(c == d) {
+			break
+		}
+		v.reset(OpCopy)
+		v.Type = y.Type
+		v.AddArg(y)
+		return true
+	}
+	return false
+}
+func rewriteValueARM64_OpARM64BIC(v *Value, config *Config) bool {
+	b := v.Block
+	_ = b
+	// match: (BIC x (MOVDconst [c]))
+	// cond:
+	// result: (BICconst [c] x)
+	for {
+		x := v.Args[0]
+		v_1 := v.Args[1]
+		if v_1.Op != OpARM64MOVDconst {
 			break
 		}
 		c := v_1.AuxInt
@@ -920,6 +1482,57 @@ func rewriteValueARM64_OpARM64BIC(v *Value, config *Config) bool {
 		v.AuxInt = 0
 		return true
 	}
+	// match: (BIC x (SLLconst [c] y))
+	// cond:
+	// result: (BICshiftLL x y [c])
+	for {
+		x := v.Args[0]
+		v_1 := v.Args[1]
+		if v_1.Op != OpARM64SLLconst {
+			break
+		}
+		c := v_1.AuxInt
+		y := v_1.Args[0]
+		v.reset(OpARM64BICshiftLL)
+		v.AuxInt = c
+		v.AddArg(x)
+		v.AddArg(y)
+		return true
+	}
+	// match: (BIC x (SRLconst [c] y))
+	// cond:
+	// result: (BICshiftRL x y [c])
+	for {
+		x := v.Args[0]
+		v_1 := v.Args[1]
+		if v_1.Op != OpARM64SRLconst {
+			break
+		}
+		c := v_1.AuxInt
+		y := v_1.Args[0]
+		v.reset(OpARM64BICshiftRL)
+		v.AuxInt = c
+		v.AddArg(x)
+		v.AddArg(y)
+		return true
+	}
+	// match: (BIC x (SRAconst [c] y))
+	// cond:
+	// result: (BICshiftRA x y [c])
+	for {
+		x := v.Args[0]
+		v_1 := v.Args[1]
+		if v_1.Op != OpARM64SRAconst {
+			break
+		}
+		c := v_1.AuxInt
+		y := v_1.Args[0]
+		v.reset(OpARM64BICshiftRA)
+		v.AuxInt = c
+		v.AddArg(x)
+		v.AddArg(y)
+		return true
+	}
 	return false
 }
 func rewriteValueARM64_OpARM64BICconst(v *Value, config *Config) bool {
@@ -965,102 +1578,336 @@ func rewriteValueARM64_OpARM64BICconst(v *Value, config *Config) bool {
 	}
 	return false
 }
-func rewriteValueARM64_OpARM64CMP(v *Value, config *Config) bool {
+func rewriteValueARM64_OpARM64BICshiftLL(v *Value, config *Config) bool {
 	b := v.Block
 	_ = b
-	// match: (CMP x (MOVDconst [c]))
+	// match: (BICshiftLL x (MOVDconst [c]) [d])
 	// cond:
-	// result: (CMPconst [c] x)
+	// result: (BICconst x [int64(uint64(c)<<uint64(d))])
 	for {
+		d := v.AuxInt
 		x := v.Args[0]
 		v_1 := v.Args[1]
 		if v_1.Op != OpARM64MOVDconst {
 			break
 		}
 		c := v_1.AuxInt
-		v.reset(OpARM64CMPconst)
-		v.AuxInt = c
+		v.reset(OpARM64BICconst)
+		v.AuxInt = int64(uint64(c) << uint64(d))
 		v.AddArg(x)
 		return true
 	}
-	// match: (CMP (MOVDconst [c]) x)
-	// cond:
-	// result: (InvertFlags (CMPconst [c] x))
+	// match: (BICshiftLL x (SLLconst x [c]) [d])
+	// cond: c==d
+	// result: (MOVDconst [0])
 	for {
-		v_0 := v.Args[0]
-		if v_0.Op != OpARM64MOVDconst {
+		d := v.AuxInt
+		x := v.Args[0]
+		v_1 := v.Args[1]
+		if v_1.Op != OpARM64SLLconst {
 			break
 		}
-		c := v_0.AuxInt
-		x := v.Args[1]
-		v.reset(OpARM64InvertFlags)
-		v0 := b.NewValue0(v.Line, OpARM64CMPconst, TypeFlags)
-		v0.AuxInt = c
-		v0.AddArg(x)
-		v.AddArg(v0)
+		c := v_1.AuxInt
+		if x != v_1.Args[0] {
+			break
+		}
+		if !(c == d) {
+			break
+		}
+		v.reset(OpARM64MOVDconst)
+		v.AuxInt = 0
 		return true
 	}
 	return false
 }
-func rewriteValueARM64_OpARM64CMPW(v *Value, config *Config) bool {
+func rewriteValueARM64_OpARM64BICshiftRA(v *Value, config *Config) bool {
 	b := v.Block
 	_ = b
-	// match: (CMPW x (MOVDconst [c]))
+	// match: (BICshiftRA x (MOVDconst [c]) [d])
 	// cond:
-	// result: (CMPWconst [int64(int32(c))] x)
+	// result: (BICconst x [int64(int64(c)>>uint64(d))])
 	for {
+		d := v.AuxInt
 		x := v.Args[0]
 		v_1 := v.Args[1]
 		if v_1.Op != OpARM64MOVDconst {
 			break
 		}
 		c := v_1.AuxInt
-		v.reset(OpARM64CMPWconst)
-		v.AuxInt = int64(int32(c))
+		v.reset(OpARM64BICconst)
+		v.AuxInt = int64(int64(c) >> uint64(d))
 		v.AddArg(x)
 		return true
 	}
-	// match: (CMPW (MOVDconst [c]) x)
-	// cond:
-	// result: (InvertFlags (CMPWconst [int64(int32(c))] x))
+	// match: (BICshiftRA x (SRAconst x [c]) [d])
+	// cond: c==d
+	// result: (MOVDconst [0])
 	for {
-		v_0 := v.Args[0]
-		if v_0.Op != OpARM64MOVDconst {
+		d := v.AuxInt
+		x := v.Args[0]
+		v_1 := v.Args[1]
+		if v_1.Op != OpARM64SRAconst {
 			break
 		}
-		c := v_0.AuxInt
-		x := v.Args[1]
-		v.reset(OpARM64InvertFlags)
-		v0 := b.NewValue0(v.Line, OpARM64CMPWconst, TypeFlags)
-		v0.AuxInt = int64(int32(c))
-		v0.AddArg(x)
-		v.AddArg(v0)
+		c := v_1.AuxInt
+		if x != v_1.Args[0] {
+			break
+		}
+		if !(c == d) {
+			break
+		}
+		v.reset(OpARM64MOVDconst)
+		v.AuxInt = 0
 		return true
 	}
 	return false
 }
-func rewriteValueARM64_OpARM64CMPWconst(v *Value, config *Config) bool {
+func rewriteValueARM64_OpARM64BICshiftRL(v *Value, config *Config) bool {
 	b := v.Block
 	_ = b
-	// match: (CMPWconst (MOVDconst [x]) [y])
-	// cond: int32(x)==int32(y)
-	// result: (FlagEQ)
+	// match: (BICshiftRL x (MOVDconst [c]) [d])
+	// cond:
+	// result: (BICconst x [int64(uint64(c)>>uint64(d))])
 	for {
-		y := v.AuxInt
-		v_0 := v.Args[0]
-		if v_0.Op != OpARM64MOVDconst {
-			break
-		}
-		x := v_0.AuxInt
-		if !(int32(x) == int32(y)) {
+		d := v.AuxInt
+		x := v.Args[0]
+		v_1 := v.Args[1]
+		if v_1.Op != OpARM64MOVDconst {
 			break
 		}
-		v.reset(OpARM64FlagEQ)
+		c := v_1.AuxInt
+		v.reset(OpARM64BICconst)
+		v.AuxInt = int64(uint64(c) >> uint64(d))
+		v.AddArg(x)
 		return true
 	}
-	// match: (CMPWconst (MOVDconst [x]) [y])
-	// cond: int32(x)<int32(y) && uint32(x)<uint32(y)
-	// result: (FlagLT_ULT)
+	// match: (BICshiftRL x (SRLconst x [c]) [d])
+	// cond: c==d
+	// result: (MOVDconst [0])
+	for {
+		d := v.AuxInt
+		x := v.Args[0]
+		v_1 := v.Args[1]
+		if v_1.Op != OpARM64SRLconst {
+			break
+		}
+		c := v_1.AuxInt
+		if x != v_1.Args[0] {
+			break
+		}
+		if !(c == d) {
+			break
+		}
+		v.reset(OpARM64MOVDconst)
+		v.AuxInt = 0
+		return true
+	}
+	return false
+}
+func rewriteValueARM64_OpARM64CMP(v *Value, config *Config) bool {
+	b := v.Block
+	_ = b
+	// match: (CMP x (MOVDconst [c]))
+	// cond:
+	// result: (CMPconst [c] x)
+	for {
+		x := v.Args[0]
+		v_1 := v.Args[1]
+		if v_1.Op != OpARM64MOVDconst {
+			break
+		}
+		c := v_1.AuxInt
+		v.reset(OpARM64CMPconst)
+		v.AuxInt = c
+		v.AddArg(x)
+		return true
+	}
+	// match: (CMP (MOVDconst [c]) x)
+	// cond:
+	// result: (InvertFlags (CMPconst [c] x))
+	for {
+		v_0 := v.Args[0]
+		if v_0.Op != OpARM64MOVDconst {
+			break
+		}
+		c := v_0.AuxInt
+		x := v.Args[1]
+		v.reset(OpARM64InvertFlags)
+		v0 := b.NewValue0(v.Line, OpARM64CMPconst, TypeFlags)
+		v0.AuxInt = c
+		v0.AddArg(x)
+		v.AddArg(v0)
+		return true
+	}
+	// match: (CMP x (SLLconst [c] y))
+	// cond:
+	// result: (CMPshiftLL x y [c])
+	for {
+		x := v.Args[0]
+		v_1 := v.Args[1]
+		if v_1.Op != OpARM64SLLconst {
+			break
+		}
+		c := v_1.AuxInt
+		y := v_1.Args[0]
+		v.reset(OpARM64CMPshiftLL)
+		v.AuxInt = c
+		v.AddArg(x)
+		v.AddArg(y)
+		return true
+	}
+	// match: (CMP (SLLconst [c] y) x)
+	// cond:
+	// result: (InvertFlags (CMPshiftLL x y [c]))
+	for {
+		v_0 := v.Args[0]
+		if v_0.Op != OpARM64SLLconst {
+			break
+		}
+		c := v_0.AuxInt
+		y := v_0.Args[0]
+		x := v.Args[1]
+		v.reset(OpARM64InvertFlags)
+		v0 := b.NewValue0(v.Line, OpARM64CMPshiftLL, TypeFlags)
+		v0.AuxInt = c
+		v0.AddArg(x)
+		v0.AddArg(y)
+		v.AddArg(v0)
+		return true
+	}
+	// match: (CMP x (SRLconst [c] y))
+	// cond:
+	// result: (CMPshiftRL x y [c])
+	for {
+		x := v.Args[0]
+		v_1 := v.Args[1]
+		if v_1.Op != OpARM64SRLconst {
+			break
+		}
+		c := v_1.AuxInt
+		y := v_1.Args[0]
+		v.reset(OpARM64CMPshiftRL)
+		v.AuxInt = c
+		v.AddArg(x)
+		v.AddArg(y)
+		return true
+	}
+	// match: (CMP (SRLconst [c] y) x)
+	// cond:
+	// result: (InvertFlags (CMPshiftRL x y [c]))
+	for {
+		v_0 := v.Args[0]
+		if v_0.Op != OpARM64SRLconst {
+			break
+		}
+		c := v_0.AuxInt
+		y := v_0.Args[0]
+		x := v.Args[1]
+		v.reset(OpARM64InvertFlags)
+		v0 := b.NewValue0(v.Line, OpARM64CMPshiftRL, TypeFlags)
+		v0.AuxInt = c
+		v0.AddArg(x)
+		v0.AddArg(y)
+		v.AddArg(v0)
+		return true
+	}
+	// match: (CMP x (SRAconst [c] y))
+	// cond:
+	// result: (CMPshiftRA x y [c])
+	for {
+		x := v.Args[0]
+		v_1 := v.Args[1]
+		if v_1.Op != OpARM64SRAconst {
+			break
+		}
+		c := v_1.AuxInt
+		y := v_1.Args[0]
+		v.reset(OpARM64CMPshiftRA)
+		v.AuxInt = c
+		v.AddArg(x)
+		v.AddArg(y)
+		return true
+	}
+	// match: (CMP (SRAconst [c] y) x)
+	// cond:
+	// result: (InvertFlags (CMPshiftRA x y [c]))
+	for {
+		v_0 := v.Args[0]
+		if v_0.Op != OpARM64SRAconst {
+			break
+		}
+		c := v_0.AuxInt
+		y := v_0.Args[0]
+		x := v.Args[1]
+		v.reset(OpARM64InvertFlags)
+		v0 := b.NewValue0(v.Line, OpARM64CMPshiftRA, TypeFlags)
+		v0.AuxInt = c
+		v0.AddArg(x)
+		v0.AddArg(y)
+		v.AddArg(v0)
+		return true
+	}
+	return false
+}
+func rewriteValueARM64_OpARM64CMPW(v *Value, config *Config) bool {
+	b := v.Block
+	_ = b
+	// match: (CMPW x (MOVDconst [c]))
+	// cond:
+	// result: (CMPWconst [int64(int32(c))] x)
+	for {
+		x := v.Args[0]
+		v_1 := v.Args[1]
+		if v_1.Op != OpARM64MOVDconst {
+			break
+		}
+		c := v_1.AuxInt
+		v.reset(OpARM64CMPWconst)
+		v.AuxInt = int64(int32(c))
+		v.AddArg(x)
+		return true
+	}
+	// match: (CMPW (MOVDconst [c]) x)
+	// cond:
+	// result: (InvertFlags (CMPWconst [int64(int32(c))] x))
+	for {
+		v_0 := v.Args[0]
+		if v_0.Op != OpARM64MOVDconst {
+			break
+		}
+		c := v_0.AuxInt
+		x := v.Args[1]
+		v.reset(OpARM64InvertFlags)
+		v0 := b.NewValue0(v.Line, OpARM64CMPWconst, TypeFlags)
+		v0.AuxInt = int64(int32(c))
+		v0.AddArg(x)
+		v.AddArg(v0)
+		return true
+	}
+	return false
+}
+func rewriteValueARM64_OpARM64CMPWconst(v *Value, config *Config) bool {
+	b := v.Block
+	_ = b
+	// match: (CMPWconst (MOVDconst [x]) [y])
+	// cond: int32(x)==int32(y)
+	// result: (FlagEQ)
+	for {
+		y := v.AuxInt
+		v_0 := v.Args[0]
+		if v_0.Op != OpARM64MOVDconst {
+			break
+		}
+		x := v_0.AuxInt
+		if !(int32(x) == int32(y)) {
+			break
+		}
+		v.reset(OpARM64FlagEQ)
+		return true
+	}
+	// match: (CMPWconst (MOVDconst [x]) [y])
+	// cond: int32(x)<int32(y) && uint32(x)<uint32(y)
+	// result: (FlagLT_ULT)
 	for {
 		y := v.AuxInt
 		v_0 := v.Args[0]
@@ -1316,61 +2163,205 @@ func rewriteValueARM64_OpARM64CMPconst(v *Value, config *Config) bool {
 	}
 	return false
 }
-func rewriteValueARM64_OpARM64CSELULT(v *Value, config *Config) bool {
+func rewriteValueARM64_OpARM64CMPshiftLL(v *Value, config *Config) bool {
 	b := v.Block
 	_ = b
-	// match: (CSELULT _ y (FlagEQ))
+	// match: (CMPshiftLL (MOVDconst [c]) x [d])
 	// cond:
-	// result: y
+	// result: (InvertFlags (CMPconst [c] (SLLconst <x.Type> x [d])))
 	for {
-		y := v.Args[1]
-		v_2 := v.Args[2]
-		if v_2.Op != OpARM64FlagEQ {
+		d := v.AuxInt
+		v_0 := v.Args[0]
+		if v_0.Op != OpARM64MOVDconst {
 			break
 		}
-		v.reset(OpCopy)
-		v.Type = y.Type
-		v.AddArg(y)
+		c := v_0.AuxInt
+		x := v.Args[1]
+		v.reset(OpARM64InvertFlags)
+		v0 := b.NewValue0(v.Line, OpARM64CMPconst, TypeFlags)
+		v0.AuxInt = c
+		v1 := b.NewValue0(v.Line, OpARM64SLLconst, x.Type)
+		v1.AuxInt = d
+		v1.AddArg(x)
+		v0.AddArg(v1)
+		v.AddArg(v0)
 		return true
 	}
-	// match: (CSELULT x _ (FlagLT_ULT))
+	// match: (CMPshiftLL x (MOVDconst [c]) [d])
 	// cond:
-	// result: x
+	// result: (CMPconst x [int64(uint64(c)<<uint64(d))])
 	for {
+		d := v.AuxInt
 		x := v.Args[0]
-		v_2 := v.Args[2]
-		if v_2.Op != OpARM64FlagLT_ULT {
+		v_1 := v.Args[1]
+		if v_1.Op != OpARM64MOVDconst {
 			break
 		}
-		v.reset(OpCopy)
-		v.Type = x.Type
+		c := v_1.AuxInt
+		v.reset(OpARM64CMPconst)
+		v.AuxInt = int64(uint64(c) << uint64(d))
 		v.AddArg(x)
 		return true
 	}
-	// match: (CSELULT _ y (FlagLT_UGT))
+	return false
+}
+func rewriteValueARM64_OpARM64CMPshiftRA(v *Value, config *Config) bool {
+	b := v.Block
+	_ = b
+	// match: (CMPshiftRA (MOVDconst [c]) x [d])
 	// cond:
-	// result: y
+	// result: (InvertFlags (CMPconst [c] (SRAconst <x.Type> x [d])))
 	for {
-		y := v.Args[1]
-		v_2 := v.Args[2]
-		if v_2.Op != OpARM64FlagLT_UGT {
+		d := v.AuxInt
+		v_0 := v.Args[0]
+		if v_0.Op != OpARM64MOVDconst {
 			break
 		}
-		v.reset(OpCopy)
-		v.Type = y.Type
-		v.AddArg(y)
+		c := v_0.AuxInt
+		x := v.Args[1]
+		v.reset(OpARM64InvertFlags)
+		v0 := b.NewValue0(v.Line, OpARM64CMPconst, TypeFlags)
+		v0.AuxInt = c
+		v1 := b.NewValue0(v.Line, OpARM64SRAconst, x.Type)
+		v1.AuxInt = d
+		v1.AddArg(x)
+		v0.AddArg(v1)
+		v.AddArg(v0)
 		return true
 	}
-	// match: (CSELULT x _ (FlagGT_ULT))
+	// match: (CMPshiftRA x (MOVDconst [c]) [d])
 	// cond:
-	// result: x
+	// result: (CMPconst x [int64(int64(c)>>uint64(d))])
 	for {
+		d := v.AuxInt
 		x := v.Args[0]
-		v_2 := v.Args[2]
-		if v_2.Op != OpARM64FlagGT_ULT {
+		v_1 := v.Args[1]
+		if v_1.Op != OpARM64MOVDconst {
 			break
 		}
-		v.reset(OpCopy)
+		c := v_1.AuxInt
+		v.reset(OpARM64CMPconst)
+		v.AuxInt = int64(int64(c) >> uint64(d))
+		v.AddArg(x)
+		return true
+	}
+	return false
+}
+func rewriteValueARM64_OpARM64CMPshiftRL(v *Value, config *Config) bool {
+	b := v.Block
+	_ = b
+	// match: (CMPshiftRL (MOVDconst [c]) x [d])
+	// cond:
+	// result: (InvertFlags (CMPconst [c] (SRLconst <x.Type> x [d])))
+	for {
+		d := v.AuxInt
+		v_0 := v.Args[0]
+		if v_0.Op != OpARM64MOVDconst {
+			break
+		}
+		c := v_0.AuxInt
+		x := v.Args[1]
+		v.reset(OpARM64InvertFlags)
+		v0 := b.NewValue0(v.Line, OpARM64CMPconst, TypeFlags)
+		v0.AuxInt = c
+		v1 := b.NewValue0(v.Line, OpARM64SRLconst, x.Type)
+		v1.AuxInt = d
+		v1.AddArg(x)
+		v0.AddArg(v1)
+		v.AddArg(v0)
+		return true
+	}
+	// match: (CMPshiftRL x (MOVDconst [c]) [d])
+	// cond:
+	// result: (CMPconst x [int64(uint64(c)>>uint64(d))])
+	for {
+		d := v.AuxInt
+		x := v.Args[0]
+		v_1 := v.Args[1]
+		if v_1.Op != OpARM64MOVDconst {
+			break
+		}
+		c := v_1.AuxInt
+		v.reset(OpARM64CMPconst)
+		v.AuxInt = int64(uint64(c) >> uint64(d))
+		v.AddArg(x)
+		return true
+	}
+	return false
+}
+func rewriteValueARM64_OpARM64CSELULT(v *Value, config *Config) bool {
+	b := v.Block
+	_ = b
+	// match: (CSELULT x (MOVDconst [0]) flag)
+	// cond:
+	// result: (CSELULT0 x flag)
+	for {
+		x := v.Args[0]
+		v_1 := v.Args[1]
+		if v_1.Op != OpARM64MOVDconst {
+			break
+		}
+		if v_1.AuxInt != 0 {
+			break
+		}
+		flag := v.Args[2]
+		v.reset(OpARM64CSELULT0)
+		v.AddArg(x)
+		v.AddArg(flag)
+		return true
+	}
+	// match: (CSELULT _ y (FlagEQ))
+	// cond:
+	// result: y
+	for {
+		y := v.Args[1]
+		v_2 := v.Args[2]
+		if v_2.Op != OpARM64FlagEQ {
+			break
+		}
+		v.reset(OpCopy)
+		v.Type = y.Type
+		v.AddArg(y)
+		return true
+	}
+	// match: (CSELULT x _ (FlagLT_ULT))
+	// cond:
+	// result: x
+	for {
+		x := v.Args[0]
+		v_2 := v.Args[2]
+		if v_2.Op != OpARM64FlagLT_ULT {
+			break
+		}
+		v.reset(OpCopy)
+		v.Type = x.Type
+		v.AddArg(x)
+		return true
+	}
+	// match: (CSELULT _ y (FlagLT_UGT))
+	// cond:
+	// result: y
+	for {
+		y := v.Args[1]
+		v_2 := v.Args[2]
+		if v_2.Op != OpARM64FlagLT_UGT {
+			break
+		}
+		v.reset(OpCopy)
+		v.Type = y.Type
+		v.AddArg(y)
+		return true
+	}
+	// match: (CSELULT x _ (FlagGT_ULT))
+	// cond:
+	// result: x
+	for {
+		x := v.Args[0]
+		v_2 := v.Args[2]
+		if v_2.Op != OpARM64FlagGT_ULT {
+			break
+		}
+		v.reset(OpCopy)
 		v.Type = x.Type
 		v.AddArg(x)
 		return true
@@ -1391,6 +2382,75 @@ func rewriteValueARM64_OpARM64CSELULT(v *Value, config *Config) bool {
 	}
 	return false
 }
+func rewriteValueARM64_OpARM64CSELULT0(v *Value, config *Config) bool {
+	b := v.Block
+	_ = b
+	// match: (CSELULT0 _ (FlagEQ))
+	// cond:
+	// result: (MOVDconst [0])
+	for {
+		v_1 := v.Args[1]
+		if v_1.Op != OpARM64FlagEQ {
+			break
+		}
+		v.reset(OpARM64MOVDconst)
+		v.AuxInt = 0
+		return true
+	}
+	// match: (CSELULT0 x (FlagLT_ULT))
+	// cond:
+	// result: x
+	for {
+		x := v.Args[0]
+		v_1 := v.Args[1]
+		if v_1.Op != OpARM64FlagLT_ULT {
+			break
+		}
+		v.reset(OpCopy)
+		v.Type = x.Type
+		v.AddArg(x)
+		return true
+	}
+	// match: (CSELULT0 _ (FlagLT_UGT))
+	// cond:
+	// result: (MOVDconst [0])
+	for {
+		v_1 := v.Args[1]
+		if v_1.Op != OpARM64FlagLT_UGT {
+			break
+		}
+		v.reset(OpARM64MOVDconst)
+		v.AuxInt = 0
+		return true
+	}
+	// match: (CSELULT0 x (FlagGT_ULT))
+	// cond:
+	// result: x
+	for {
+		x := v.Args[0]
+		v_1 := v.Args[1]
+		if v_1.Op != OpARM64FlagGT_ULT {
+			break
+		}
+		v.reset(OpCopy)
+		v.Type = x.Type
+		v.AddArg(x)
+		return true
+	}
+	// match: (CSELULT0 _ (FlagGT_UGT))
+	// cond:
+	// result: (MOVDconst [0])
+	for {
+		v_1 := v.Args[1]
+		if v_1.Op != OpARM64FlagGT_UGT {
+			break
+		}
+		v.reset(OpARM64MOVDconst)
+		v.AuxInt = 0
+		return true
+	}
+	return false
+}
 func rewriteValueARM64_OpARM64DIV(v *Value, config *Config) bool {
 	b := v.Block
 	_ = b
@@ -2505,6 +3565,27 @@ func rewriteValueARM64_OpARM64MOVBUload(v *Value, config *Config) bool {
 		v.AddArg(x)
 		return true
 	}
+	// match: (MOVBUload [off] {sym} ptr (MOVBstorezero [off2] {sym2} ptr2 _))
+	// cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
+	// result: (MOVDconst [0])
+	for {
+		off := v.AuxInt
+		sym := v.Aux
+		ptr := v.Args[0]
+		v_1 := v.Args[1]
+		if v_1.Op != OpARM64MOVBstorezero {
+			break
+		}
+		off2 := v_1.AuxInt
+		sym2 := v_1.Aux
+		ptr2 := v_1.Args[0]
+		if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
+			break
+		}
+		v.reset(OpARM64MOVDconst)
+		v.AuxInt = 0
+		return true
+	}
 	return false
 }
 func rewriteValueARM64_OpARM64MOVBUreg(v *Value, config *Config) bool {
@@ -2619,6 +3700,27 @@ func rewriteValueARM64_OpARM64MOVBload(v *Value, config *Config) bool {
 		v.AddArg(x)
 		return true
 	}
+	// match: (MOVBload [off] {sym} ptr (MOVBstorezero [off2] {sym2} ptr2 _))
+	// cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
+	// result: (MOVDconst [0])
+	for {
+		off := v.AuxInt
+		sym := v.Aux
+		ptr := v.Args[0]
+		v_1 := v.Args[1]
+		if v_1.Op != OpARM64MOVBstorezero {
+			break
+		}
+		off2 := v_1.AuxInt
+		sym2 := v_1.Aux
+		ptr2 := v_1.Args[0]
+		if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
+			break
+		}
+		v.reset(OpARM64MOVDconst)
+		v.AuxInt = 0
+		return true
+	}
 	return false
 }
 func rewriteValueARM64_OpARM64MOVBreg(v *Value, config *Config) bool {
@@ -2714,6 +3816,28 @@ func rewriteValueARM64_OpARM64MOVBstore(v *Value, config *Config) bool {
 		v.AddArg(mem)
 		return true
 	}
+	// match: (MOVBstore [off] {sym} ptr (MOVDconst [0]) mem)
+	// cond:
+	// result: (MOVBstorezero [off] {sym} ptr mem)
+	for {
+		off := v.AuxInt
+		sym := v.Aux
+		ptr := v.Args[0]
+		v_1 := v.Args[1]
+		if v_1.Op != OpARM64MOVDconst {
+			break
+		}
+		if v_1.AuxInt != 0 {
+			break
+		}
+		mem := v.Args[2]
+		v.reset(OpARM64MOVBstorezero)
+		v.AuxInt = off
+		v.Aux = sym
+		v.AddArg(ptr)
+		v.AddArg(mem)
+		return true
+	}
 	// match: (MOVBstore [off] {sym} ptr (MOVBreg x) mem)
 	// cond:
 	// result: (MOVBstore [off] {sym} ptr x mem)
@@ -2842,12 +3966,12 @@ func rewriteValueARM64_OpARM64MOVBstore(v *Value, config *Config) bool {
 	}
 	return false
 }
-func rewriteValueARM64_OpARM64MOVDload(v *Value, config *Config) bool {
+func rewriteValueARM64_OpARM64MOVBstorezero(v *Value, config *Config) bool {
 	b := v.Block
 	_ = b
-	// match: (MOVDload [off1] {sym} (ADDconst [off2] ptr) mem)
+	// match: (MOVBstorezero [off1] {sym} (ADDconst [off2] ptr) mem)
 	// cond:
-	// result: (MOVDload [off1+off2] {sym} ptr mem)
+	// result: (MOVBstorezero [off1+off2] {sym} ptr mem)
 	for {
 		off1 := v.AuxInt
 		sym := v.Aux
@@ -2858,16 +3982,16 @@ func rewriteValueARM64_OpARM64MOVDload(v *Value, config *Config) bool {
 		off2 := v_0.AuxInt
 		ptr := v_0.Args[0]
 		mem := v.Args[1]
-		v.reset(OpARM64MOVDload)
+		v.reset(OpARM64MOVBstorezero)
 		v.AuxInt = off1 + off2
 		v.Aux = sym
 		v.AddArg(ptr)
 		v.AddArg(mem)
 		return true
 	}
-	// match: (MOVDload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
+	// match: (MOVBstorezero [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
 	// cond: canMergeSym(sym1,sym2)
-	// result: (MOVDload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
+	// result: (MOVBstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
 	for {
 		off1 := v.AuxInt
 		sym1 := v.Aux
@@ -2882,43 +4006,113 @@ func rewriteValueARM64_OpARM64MOVDload(v *Value, config *Config) bool {
 		if !(canMergeSym(sym1, sym2)) {
 			break
 		}
-		v.reset(OpARM64MOVDload)
+		v.reset(OpARM64MOVBstorezero)
 		v.AuxInt = off1 + off2
 		v.Aux = mergeSym(sym1, sym2)
 		v.AddArg(ptr)
 		v.AddArg(mem)
 		return true
 	}
-	// match: (MOVDload [off] {sym} ptr (MOVDstore [off2] {sym2} ptr2 x _))
-	// cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
-	// result: x
-	for {
-		off := v.AuxInt
-		sym := v.Aux
-		ptr := v.Args[0]
-		v_1 := v.Args[1]
-		if v_1.Op != OpARM64MOVDstore {
-			break
-		}
-		off2 := v_1.AuxInt
-		sym2 := v_1.Aux
-		ptr2 := v_1.Args[0]
-		x := v_1.Args[1]
-		if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
-			break
-		}
-		v.reset(OpCopy)
-		v.Type = x.Type
-		v.AddArg(x)
-		return true
-	}
 	return false
 }
-func rewriteValueARM64_OpARM64MOVDreg(v *Value, config *Config) bool {
+func rewriteValueARM64_OpARM64MOVDload(v *Value, config *Config) bool {
 	b := v.Block
 	_ = b
-	// match: (MOVDreg x)
-	// cond: x.Uses == 1
+	// match: (MOVDload [off1] {sym} (ADDconst [off2] ptr) mem)
+	// cond:
+	// result: (MOVDload [off1+off2] {sym} ptr mem)
+	for {
+		off1 := v.AuxInt
+		sym := v.Aux
+		v_0 := v.Args[0]
+		if v_0.Op != OpARM64ADDconst {
+			break
+		}
+		off2 := v_0.AuxInt
+		ptr := v_0.Args[0]
+		mem := v.Args[1]
+		v.reset(OpARM64MOVDload)
+		v.AuxInt = off1 + off2
+		v.Aux = sym
+		v.AddArg(ptr)
+		v.AddArg(mem)
+		return true
+	}
+	// match: (MOVDload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
+	// cond: canMergeSym(sym1,sym2)
+	// result: (MOVDload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
+	for {
+		off1 := v.AuxInt
+		sym1 := v.Aux
+		v_0 := v.Args[0]
+		if v_0.Op != OpARM64MOVDaddr {
+			break
+		}
+		off2 := v_0.AuxInt
+		sym2 := v_0.Aux
+		ptr := v_0.Args[0]
+		mem := v.Args[1]
+		if !(canMergeSym(sym1, sym2)) {
+			break
+		}
+		v.reset(OpARM64MOVDload)
+		v.AuxInt = off1 + off2
+		v.Aux = mergeSym(sym1, sym2)
+		v.AddArg(ptr)
+		v.AddArg(mem)
+		return true
+	}
+	// match: (MOVDload [off] {sym} ptr (MOVDstore [off2] {sym2} ptr2 x _))
+	// cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
+	// result: x
+	for {
+		off := v.AuxInt
+		sym := v.Aux
+		ptr := v.Args[0]
+		v_1 := v.Args[1]
+		if v_1.Op != OpARM64MOVDstore {
+			break
+		}
+		off2 := v_1.AuxInt
+		sym2 := v_1.Aux
+		ptr2 := v_1.Args[0]
+		x := v_1.Args[1]
+		if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
+			break
+		}
+		v.reset(OpCopy)
+		v.Type = x.Type
+		v.AddArg(x)
+		return true
+	}
+	// match: (MOVDload [off] {sym} ptr (MOVDstorezero [off2] {sym2} ptr2 _))
+	// cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
+	// result: (MOVDconst [0])
+	for {
+		off := v.AuxInt
+		sym := v.Aux
+		ptr := v.Args[0]
+		v_1 := v.Args[1]
+		if v_1.Op != OpARM64MOVDstorezero {
+			break
+		}
+		off2 := v_1.AuxInt
+		sym2 := v_1.Aux
+		ptr2 := v_1.Args[0]
+		if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
+			break
+		}
+		v.reset(OpARM64MOVDconst)
+		v.AuxInt = 0
+		return true
+	}
+	return false
+}
+func rewriteValueARM64_OpARM64MOVDreg(v *Value, config *Config) bool {
+	b := v.Block
+	_ = b
+	// match: (MOVDreg x)
+	// cond: x.Uses == 1
 	// result: (MOVDnop x)
 	for {
 		x := v.Args[0]
@@ -2995,6 +4189,77 @@ func rewriteValueARM64_OpARM64MOVDstore(v *Value, config *Config) bool {
 		v.AddArg(mem)
 		return true
 	}
+	// match: (MOVDstore [off] {sym} ptr (MOVDconst [0]) mem)
+	// cond:
+	// result: (MOVDstorezero [off] {sym} ptr mem)
+	for {
+		off := v.AuxInt
+		sym := v.Aux
+		ptr := v.Args[0]
+		v_1 := v.Args[1]
+		if v_1.Op != OpARM64MOVDconst {
+			break
+		}
+		if v_1.AuxInt != 0 {
+			break
+		}
+		mem := v.Args[2]
+		v.reset(OpARM64MOVDstorezero)
+		v.AuxInt = off
+		v.Aux = sym
+		v.AddArg(ptr)
+		v.AddArg(mem)
+		return true
+	}
+	return false
+}
+func rewriteValueARM64_OpARM64MOVDstorezero(v *Value, config *Config) bool {
+	b := v.Block
+	_ = b
+	// match: (MOVDstorezero [off1] {sym} (ADDconst [off2] ptr) mem)
+	// cond:
+	// result: (MOVDstorezero [off1+off2] {sym} ptr mem)
+	for {
+		off1 := v.AuxInt
+		sym := v.Aux
+		v_0 := v.Args[0]
+		if v_0.Op != OpARM64ADDconst {
+			break
+		}
+		off2 := v_0.AuxInt
+		ptr := v_0.Args[0]
+		mem := v.Args[1]
+		v.reset(OpARM64MOVDstorezero)
+		v.AuxInt = off1 + off2
+		v.Aux = sym
+		v.AddArg(ptr)
+		v.AddArg(mem)
+		return true
+	}
+	// match: (MOVDstorezero [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
+	// cond: canMergeSym(sym1,sym2)
+	// result: (MOVDstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
+	for {
+		off1 := v.AuxInt
+		sym1 := v.Aux
+		v_0 := v.Args[0]
+		if v_0.Op != OpARM64MOVDaddr {
+			break
+		}
+		off2 := v_0.AuxInt
+		sym2 := v_0.Aux
+		ptr := v_0.Args[0]
+		mem := v.Args[1]
+		if !(canMergeSym(sym1, sym2)) {
+			break
+		}
+		v.reset(OpARM64MOVDstorezero)
+		v.AuxInt = off1 + off2
+		v.Aux = mergeSym(sym1, sym2)
+		v.AddArg(ptr)
+		v.AddArg(mem)
+		return true
+	}
 	return false
 }
 func rewriteValueARM64_OpARM64MOVHUload(v *Value, config *Config) bool {
@@ -3067,6 +4332,27 @@ func rewriteValueARM64_OpARM64MOVHUload(v *Value, config *Config) bool {
 		v.AddArg(x)
 		return true
 	}
+	// match: (MOVHUload [off] {sym} ptr (MOVHstorezero [off2] {sym2} ptr2 _))
+	// cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
+	// result: (MOVDconst [0])
+	for {
+		off := v.AuxInt
+		sym := v.Aux
+		ptr := v.Args[0]
+		v_1 := v.Args[1]
+		if v_1.Op != OpARM64MOVHstorezero {
+			break
+		}
+		off2 := v_1.AuxInt
+		sym2 := v_1.Aux
+		ptr2 := v_1.Args[0]
+		if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
+			break
+		}
+		v.reset(OpARM64MOVDconst)
+		v.AuxInt = 0
+		return true
+	}
 	return false
 }
 func rewriteValueARM64_OpARM64MOVHUreg(v *Value, config *Config) bool {
@@ -3205,6 +4491,27 @@ func rewriteValueARM64_OpARM64MOVHload(v *Value, config *Config) bool {
 		v.AddArg(x)
 		return true
 	}
+	// match: (MOVHload [off] {sym} ptr (MOVHstorezero [off2] {sym2} ptr2 _))
+	// cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
+	// result: (MOVDconst [0])
+	for {
+		off := v.AuxInt
+		sym := v.Aux
+		ptr := v.Args[0]
+		v_1 := v.Args[1]
+		if v_1.Op != OpARM64MOVHstorezero {
+			break
+		}
+		off2 := v_1.AuxInt
+		sym2 := v_1.Aux
+		ptr2 := v_1.Args[0]
+		if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
+			break
+		}
+		v.reset(OpARM64MOVDconst)
+		v.AuxInt = 0
+		return true
+	}
 	return false
 }
 func rewriteValueARM64_OpARM64MOVHreg(v *Value, config *Config) bool {
@@ -3348,6 +4655,28 @@ func rewriteValueARM64_OpARM64MOVHstore(v *Value, config *Config) bool {
 		v.AddArg(mem)
 		return true
 	}
+	// match: (MOVHstore [off] {sym} ptr (MOVDconst [0]) mem)
+	// cond:
+	// result: (MOVHstorezero [off] {sym} ptr mem)
+	for {
+		off := v.AuxInt
+		sym := v.Aux
+		ptr := v.Args[0]
+		v_1 := v.Args[1]
+		if v_1.Op != OpARM64MOVDconst {
+			break
+		}
+		if v_1.AuxInt != 0 {
+			break
+		}
+		mem := v.Args[2]
+		v.reset(OpARM64MOVHstorezero)
+		v.AuxInt = off
+		v.Aux = sym
+		v.AddArg(ptr)
+		v.AddArg(mem)
+		return true
+	}
 	// match: (MOVHstore [off] {sym} ptr (MOVHreg x) mem)
 	// cond:
 	// result: (MOVHstore [off] {sym} ptr x mem)
@@ -3434,6 +4763,55 @@ func rewriteValueARM64_OpARM64MOVHstore(v *Value, config *Config) bool {
 	}
 	return false
 }
+func rewriteValueARM64_OpARM64MOVHstorezero(v *Value, config *Config) bool {
+	b := v.Block
+	_ = b
+	// match: (MOVHstorezero [off1] {sym} (ADDconst [off2] ptr) mem)
+	// cond:
+	// result: (MOVHstorezero [off1+off2] {sym} ptr mem)
+	for {
+		off1 := v.AuxInt
+		sym := v.Aux
+		v_0 := v.Args[0]
+		if v_0.Op != OpARM64ADDconst {
+			break
+		}
+		off2 := v_0.AuxInt
+		ptr := v_0.Args[0]
+		mem := v.Args[1]
+		v.reset(OpARM64MOVHstorezero)
+		v.AuxInt = off1 + off2
+		v.Aux = sym
+		v.AddArg(ptr)
+		v.AddArg(mem)
+		return true
+	}
+	// match: (MOVHstorezero [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
+	// cond: canMergeSym(sym1,sym2)
+	// result: (MOVHstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
+	for {
+		off1 := v.AuxInt
+		sym1 := v.Aux
+		v_0 := v.Args[0]
+		if v_0.Op != OpARM64MOVDaddr {
+			break
+		}
+		off2 := v_0.AuxInt
+		sym2 := v_0.Aux
+		ptr := v_0.Args[0]
+		mem := v.Args[1]
+		if !(canMergeSym(sym1, sym2)) {
+			break
+		}
+		v.reset(OpARM64MOVHstorezero)
+		v.AuxInt = off1 + off2
+		v.Aux = mergeSym(sym1, sym2)
+		v.AddArg(ptr)
+		v.AddArg(mem)
+		return true
+	}
+	return false
+}
 func rewriteValueARM64_OpARM64MOVWUload(v *Value, config *Config) bool {
 	b := v.Block
 	_ = b
@@ -3504,9 +4882,30 @@ func rewriteValueARM64_OpARM64MOVWUload(v *Value, config *Config) bool {
 		v.AddArg(x)
 		return true
 	}
-	return false
-}
-func rewriteValueARM64_OpARM64MOVWUreg(v *Value, config *Config) bool {
+	// match: (MOVWUload [off] {sym} ptr (MOVWstorezero [off2] {sym2} ptr2 _))
+	// cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
+	// result: (MOVDconst [0])
+	for {
+		off := v.AuxInt
+		sym := v.Aux
+		ptr := v.Args[0]
+		v_1 := v.Args[1]
+		if v_1.Op != OpARM64MOVWstorezero {
+			break
+		}
+		off2 := v_1.AuxInt
+		sym2 := v_1.Aux
+		ptr2 := v_1.Args[0]
+		if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
+			break
+		}
+		v.reset(OpARM64MOVDconst)
+		v.AuxInt = 0
+		return true
+	}
+	return false
+}
+func rewriteValueARM64_OpARM64MOVWUreg(v *Value, config *Config) bool {
 	b := v.Block
 	_ = b
 	// match: (MOVWUreg x:(MOVBUload _ _))
@@ -3666,6 +5065,27 @@ func rewriteValueARM64_OpARM64MOVWload(v *Value, config *Config) bool {
 		v.AddArg(x)
 		return true
 	}
+	// match: (MOVWload [off] {sym} ptr (MOVWstorezero [off2] {sym2} ptr2 _))
+	// cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
+	// result: (MOVDconst [0])
+	for {
+		off := v.AuxInt
+		sym := v.Aux
+		ptr := v.Args[0]
+		v_1 := v.Args[1]
+		if v_1.Op != OpARM64MOVWstorezero {
+			break
+		}
+		off2 := v_1.AuxInt
+		sym2 := v_1.Aux
+		ptr2 := v_1.Args[0]
+		if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) {
+			break
+		}
+		v.reset(OpARM64MOVDconst)
+		v.AuxInt = 0
+		return true
+	}
 	return false
 }
 func rewriteValueARM64_OpARM64MOVWreg(v *Value, config *Config) bool {
@@ -3831,81 +5251,585 @@ func rewriteValueARM64_OpARM64MOVWstore(v *Value, config *Config) bool {
 		v.AddArg(mem)
 		return true
 	}
-	// match: (MOVWstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem)
-	// cond: canMergeSym(sym1,sym2)
-	// result: (MOVWstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
+	// match: (MOVWstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem)
+	// cond: canMergeSym(sym1,sym2)
+	// result: (MOVWstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
+	for {
+		off1 := v.AuxInt
+		sym1 := v.Aux
+		v_0 := v.Args[0]
+		if v_0.Op != OpARM64MOVDaddr {
+			break
+		}
+		off2 := v_0.AuxInt
+		sym2 := v_0.Aux
+		ptr := v_0.Args[0]
+		val := v.Args[1]
+		mem := v.Args[2]
+		if !(canMergeSym(sym1, sym2)) {
+			break
+		}
+		v.reset(OpARM64MOVWstore)
+		v.AuxInt = off1 + off2
+		v.Aux = mergeSym(sym1, sym2)
+		v.AddArg(ptr)
+		v.AddArg(val)
+		v.AddArg(mem)
+		return true
+	}
+	// match: (MOVWstore [off] {sym} ptr (MOVDconst [0]) mem)
+	// cond:
+	// result: (MOVWstorezero [off] {sym} ptr mem)
+	for {
+		off := v.AuxInt
+		sym := v.Aux
+		ptr := v.Args[0]
+		v_1 := v.Args[1]
+		if v_1.Op != OpARM64MOVDconst {
+			break
+		}
+		if v_1.AuxInt != 0 {
+			break
+		}
+		mem := v.Args[2]
+		v.reset(OpARM64MOVWstorezero)
+		v.AuxInt = off
+		v.Aux = sym
+		v.AddArg(ptr)
+		v.AddArg(mem)
+		return true
+	}
+	// match: (MOVWstore [off] {sym} ptr (MOVWreg x) mem)
+	// cond:
+	// result: (MOVWstore [off] {sym} ptr x mem)
+	for {
+		off := v.AuxInt
+		sym := v.Aux
+		ptr := v.Args[0]
+		v_1 := v.Args[1]
+		if v_1.Op != OpARM64MOVWreg {
+			break
+		}
+		x := v_1.Args[0]
+		mem := v.Args[2]
+		v.reset(OpARM64MOVWstore)
+		v.AuxInt = off
+		v.Aux = sym
+		v.AddArg(ptr)
+		v.AddArg(x)
+		v.AddArg(mem)
+		return true
+	}
+	// match: (MOVWstore [off] {sym} ptr (MOVWUreg x) mem)
+	// cond:
+	// result: (MOVWstore [off] {sym} ptr x mem)
+	for {
+		off := v.AuxInt
+		sym := v.Aux
+		ptr := v.Args[0]
+		v_1 := v.Args[1]
+		if v_1.Op != OpARM64MOVWUreg {
+			break
+		}
+		x := v_1.Args[0]
+		mem := v.Args[2]
+		v.reset(OpARM64MOVWstore)
+		v.AuxInt = off
+		v.Aux = sym
+		v.AddArg(ptr)
+		v.AddArg(x)
+		v.AddArg(mem)
+		return true
+	}
+	return false
+}
+func rewriteValueARM64_OpARM64MOVWstorezero(v *Value, config *Config) bool {
+	b := v.Block
+	_ = b
+	// match: (MOVWstorezero [off1] {sym} (ADDconst [off2] ptr) mem)
+	// cond:
+	// result: (MOVWstorezero [off1+off2] {sym} ptr mem)
+	for {
+		off1 := v.AuxInt
+		sym := v.Aux
+		v_0 := v.Args[0]
+		if v_0.Op != OpARM64ADDconst {
+			break
+		}
+		off2 := v_0.AuxInt
+		ptr := v_0.Args[0]
+		mem := v.Args[1]
+		v.reset(OpARM64MOVWstorezero)
+		v.AuxInt = off1 + off2
+		v.Aux = sym
+		v.AddArg(ptr)
+		v.AddArg(mem)
+		return true
+	}
+	// match: (MOVWstorezero [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
+	// cond: canMergeSym(sym1,sym2)
+	// result: (MOVWstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
+	for {
+		off1 := v.AuxInt
+		sym1 := v.Aux
+		v_0 := v.Args[0]
+		if v_0.Op != OpARM64MOVDaddr {
+			break
+		}
+		off2 := v_0.AuxInt
+		sym2 := v_0.Aux
+		ptr := v_0.Args[0]
+		mem := v.Args[1]
+		if !(canMergeSym(sym1, sym2)) {
+			break
+		}
+		v.reset(OpARM64MOVWstorezero)
+		v.AuxInt = off1 + off2
+		v.Aux = mergeSym(sym1, sym2)
+		v.AddArg(ptr)
+		v.AddArg(mem)
+		return true
+	}
+	return false
+}
+func rewriteValueARM64_OpARM64MUL(v *Value, config *Config) bool {
+	b := v.Block
+	_ = b
+	// match: (MUL x (MOVDconst [-1]))
+	// cond:
+	// result: (NEG x)
+	for {
+		x := v.Args[0]
+		v_1 := v.Args[1]
+		if v_1.Op != OpARM64MOVDconst {
+			break
+		}
+		if v_1.AuxInt != -1 {
+			break
+		}
+		v.reset(OpARM64NEG)
+		v.AddArg(x)
+		return true
+	}
+	// match: (MUL _ (MOVDconst [0]))
+	// cond:
+	// result: (MOVDconst [0])
+	for {
+		v_1 := v.Args[1]
+		if v_1.Op != OpARM64MOVDconst {
+			break
+		}
+		if v_1.AuxInt != 0 {
+			break
+		}
+		v.reset(OpARM64MOVDconst)
+		v.AuxInt = 0
+		return true
+	}
+	// match: (MUL x (MOVDconst [1]))
+	// cond:
+	// result: x
+	for {
+		x := v.Args[0]
+		v_1 := v.Args[1]
+		if v_1.Op != OpARM64MOVDconst {
+			break
+		}
+		if v_1.AuxInt != 1 {
+			break
+		}
+		v.reset(OpCopy)
+		v.Type = x.Type
+		v.AddArg(x)
+		return true
+	}
+	// match: (MUL x (MOVDconst [c]))
+	// cond: isPowerOfTwo(c)
+	// result: (SLLconst [log2(c)] x)
+	for {
+		x := v.Args[0]
+		v_1 := v.Args[1]
+		if v_1.Op != OpARM64MOVDconst {
+			break
+		}
+		c := v_1.AuxInt
+		if !(isPowerOfTwo(c)) {
+			break
+		}
+		v.reset(OpARM64SLLconst)
+		v.AuxInt = log2(c)
+		v.AddArg(x)
+		return true
+	}
+	// match: (MUL x (MOVDconst [c]))
+	// cond: isPowerOfTwo(c-1) && c >= 3
+	// result: (ADDshiftLL x x [log2(c-1)])
+	for {
+		x := v.Args[0]
+		v_1 := v.Args[1]
+		if v_1.Op != OpARM64MOVDconst {
+			break
+		}
+		c := v_1.AuxInt
+		if !(isPowerOfTwo(c-1) && c >= 3) {
+			break
+		}
+		v.reset(OpARM64ADDshiftLL)
+		v.AuxInt = log2(c - 1)
+		v.AddArg(x)
+		v.AddArg(x)
+		return true
+	}
+	// match: (MUL x (MOVDconst [c]))
+	// cond: isPowerOfTwo(c+1) && c >= 7
+	// result: (ADDshiftLL (NEG <x.Type> x) x [log2(c+1)])
+	for {
+		x := v.Args[0]
+		v_1 := v.Args[1]
+		if v_1.Op != OpARM64MOVDconst {
+			break
+		}
+		c := v_1.AuxInt
+		if !(isPowerOfTwo(c+1) && c >= 7) {
+			break
+		}
+		v.reset(OpARM64ADDshiftLL)
+		v.AuxInt = log2(c + 1)
+		v0 := b.NewValue0(v.Line, OpARM64NEG, x.Type)
+		v0.AddArg(x)
+		v.AddArg(v0)
+		v.AddArg(x)
+		return true
+	}
+	// match: (MUL x (MOVDconst [c]))
+	// cond: c%3 == 0 && isPowerOfTwo(c/3)
+	// result: (SLLconst [log2(c/3)] (ADDshiftLL <x.Type> x x [1]))
+	for {
+		x := v.Args[0]
+		v_1 := v.Args[1]
+		if v_1.Op != OpARM64MOVDconst {
+			break
+		}
+		c := v_1.AuxInt
+		if !(c%3 == 0 && isPowerOfTwo(c/3)) {
+			break
+		}
+		v.reset(OpARM64SLLconst)
+		v.AuxInt = log2(c / 3)
+		v0 := b.NewValue0(v.Line, OpARM64ADDshiftLL, x.Type)
+		v0.AuxInt = 1
+		v0.AddArg(x)
+		v0.AddArg(x)
+		v.AddArg(v0)
+		return true
+	}
+	// match: (MUL x (MOVDconst [c]))
+	// cond: c%5 == 0 && isPowerOfTwo(c/5)
+	// result: (SLLconst [log2(c/5)] (ADDshiftLL <x.Type> x x [2]))
+	for {
+		x := v.Args[0]
+		v_1 := v.Args[1]
+		if v_1.Op != OpARM64MOVDconst {
+			break
+		}
+		c := v_1.AuxInt
+		if !(c%5 == 0 && isPowerOfTwo(c/5)) {
+			break
+		}
+		v.reset(OpARM64SLLconst)
+		v.AuxInt = log2(c / 5)
+		v0 := b.NewValue0(v.Line, OpARM64ADDshiftLL, x.Type)
+		v0.AuxInt = 2
+		v0.AddArg(x)
+		v0.AddArg(x)
+		v.AddArg(v0)
+		return true
+	}
+	// match: (MUL x (MOVDconst [c]))
+	// cond: c%7 == 0 && isPowerOfTwo(c/7)
+	// result: (SLLconst [log2(c/7)] (ADDshiftLL <x.Type> (NEG <x.Type> x) x [3]))
+	for {
+		x := v.Args[0]
+		v_1 := v.Args[1]
+		if v_1.Op != OpARM64MOVDconst {
+			break
+		}
+		c := v_1.AuxInt
+		if !(c%7 == 0 && isPowerOfTwo(c/7)) {
+			break
+		}
+		v.reset(OpARM64SLLconst)
+		v.AuxInt = log2(c / 7)
+		v0 := b.NewValue0(v.Line, OpARM64ADDshiftLL, x.Type)
+		v0.AuxInt = 3
+		v1 := b.NewValue0(v.Line, OpARM64NEG, x.Type)
+		v1.AddArg(x)
+		v0.AddArg(v1)
+		v0.AddArg(x)
+		v.AddArg(v0)
+		return true
+	}
+	// match: (MUL x (MOVDconst [c]))
+	// cond: c%9 == 0 && isPowerOfTwo(c/9)
+	// result: (SLLconst [log2(c/9)] (ADDshiftLL <x.Type> x x [3]))
+	for {
+		x := v.Args[0]
+		v_1 := v.Args[1]
+		if v_1.Op != OpARM64MOVDconst {
+			break
+		}
+		c := v_1.AuxInt
+		if !(c%9 == 0 && isPowerOfTwo(c/9)) {
+			break
+		}
+		v.reset(OpARM64SLLconst)
+		v.AuxInt = log2(c / 9)
+		v0 := b.NewValue0(v.Line, OpARM64ADDshiftLL, x.Type)
+		v0.AuxInt = 3
+		v0.AddArg(x)
+		v0.AddArg(x)
+		v.AddArg(v0)
+		return true
+	}
+	// match: (MUL (MOVDconst [-1]) x)
+	// cond:
+	// result: (NEG x)
+	for {
+		v_0 := v.Args[0]
+		if v_0.Op != OpARM64MOVDconst {
+			break
+		}
+		if v_0.AuxInt != -1 {
+			break
+		}
+		x := v.Args[1]
+		v.reset(OpARM64NEG)
+		v.AddArg(x)
+		return true
+	}
+	// match: (MUL (MOVDconst [0]) _)
+	// cond:
+	// result: (MOVDconst [0])
+	for {
+		v_0 := v.Args[0]
+		if v_0.Op != OpARM64MOVDconst {
+			break
+		}
+		if v_0.AuxInt != 0 {
+			break
+		}
+		v.reset(OpARM64MOVDconst)
+		v.AuxInt = 0
+		return true
+	}
+	// match: (MUL (MOVDconst [1]) x)
+	// cond:
+	// result: x
+	for {
+		v_0 := v.Args[0]
+		if v_0.Op != OpARM64MOVDconst {
+			break
+		}
+		if v_0.AuxInt != 1 {
+			break
+		}
+		x := v.Args[1]
+		v.reset(OpCopy)
+		v.Type = x.Type
+		v.AddArg(x)
+		return true
+	}
+	// match: (MUL (MOVDconst [c]) x)
+	// cond: isPowerOfTwo(c)
+	// result: (SLLconst [log2(c)] x)
+	for {
+		v_0 := v.Args[0]
+		if v_0.Op != OpARM64MOVDconst {
+			break
+		}
+		c := v_0.AuxInt
+		x := v.Args[1]
+		if !(isPowerOfTwo(c)) {
+			break
+		}
+		v.reset(OpARM64SLLconst)
+		v.AuxInt = log2(c)
+		v.AddArg(x)
+		return true
+	}
+	// match: (MUL (MOVDconst [c]) x)
+	// cond: isPowerOfTwo(c)
+	// result: (SLLconst [log2(c)] x)
+	for {
+		v_0 := v.Args[0]
+		if v_0.Op != OpARM64MOVDconst {
+			break
+		}
+		c := v_0.AuxInt
+		x := v.Args[1]
+		if !(isPowerOfTwo(c)) {
+			break
+		}
+		v.reset(OpARM64SLLconst)
+		v.AuxInt = log2(c)
+		v.AddArg(x)
+		return true
+	}
+	// match: (MUL (MOVDconst [c]) x)
+	// cond: isPowerOfTwo(c-1) && c >= 3
+	// result: (ADDshiftLL x x [log2(c-1)])
+	for {
+		v_0 := v.Args[0]
+		if v_0.Op != OpARM64MOVDconst {
+			break
+		}
+		c := v_0.AuxInt
+		x := v.Args[1]
+		if !(isPowerOfTwo(c-1) && c >= 3) {
+			break
+		}
+		v.reset(OpARM64ADDshiftLL)
+		v.AuxInt = log2(c - 1)
+		v.AddArg(x)
+		v.AddArg(x)
+		return true
+	}
+	// match: (MUL (MOVDconst [c]) x)
+	// cond: isPowerOfTwo(c+1) && c >= 7
+	// result: (ADDshiftLL (NEG <x.Type> x) x [log2(c+1)])
+	for {
+		v_0 := v.Args[0]
+		if v_0.Op != OpARM64MOVDconst {
+			break
+		}
+		c := v_0.AuxInt
+		x := v.Args[1]
+		if !(isPowerOfTwo(c+1) && c >= 7) {
+			break
+		}
+		v.reset(OpARM64ADDshiftLL)
+		v.AuxInt = log2(c + 1)
+		v0 := b.NewValue0(v.Line, OpARM64NEG, x.Type)
+		v0.AddArg(x)
+		v.AddArg(v0)
+		v.AddArg(x)
+		return true
+	}
+	// match: (MUL (MOVDconst [c]) x)
+	// cond: c%3 == 0 && isPowerOfTwo(c/3)
+	// result: (SLLconst [log2(c/3)] (ADDshiftLL <x.Type> x x [1]))
+	for {
+		v_0 := v.Args[0]
+		if v_0.Op != OpARM64MOVDconst {
+			break
+		}
+		c := v_0.AuxInt
+		x := v.Args[1]
+		if !(c%3 == 0 && isPowerOfTwo(c/3)) {
+			break
+		}
+		v.reset(OpARM64SLLconst)
+		v.AuxInt = log2(c / 3)
+		v0 := b.NewValue0(v.Line, OpARM64ADDshiftLL, x.Type)
+		v0.AuxInt = 1
+		v0.AddArg(x)
+		v0.AddArg(x)
+		v.AddArg(v0)
+		return true
+	}
+	// match: (MUL (MOVDconst [c]) x)
+	// cond: c%5 == 0 && isPowerOfTwo(c/5)
+	// result: (SLLconst [log2(c/5)] (ADDshiftLL <x.Type> x x [2]))
+	for {
+		v_0 := v.Args[0]
+		if v_0.Op != OpARM64MOVDconst {
+			break
+		}
+		c := v_0.AuxInt
+		x := v.Args[1]
+		if !(c%5 == 0 && isPowerOfTwo(c/5)) {
+			break
+		}
+		v.reset(OpARM64SLLconst)
+		v.AuxInt = log2(c / 5)
+		v0 := b.NewValue0(v.Line, OpARM64ADDshiftLL, x.Type)
+		v0.AuxInt = 2
+		v0.AddArg(x)
+		v0.AddArg(x)
+		v.AddArg(v0)
+		return true
+	}
+	// match: (MUL (MOVDconst [c]) x)
+	// cond: c%7 == 0 && isPowerOfTwo(c/7)
+	// result: (SLLconst [log2(c/7)] (ADDshiftLL <x.Type> (NEG <x.Type> x) x [3]))
+	for {
+		v_0 := v.Args[0]
+		if v_0.Op != OpARM64MOVDconst {
+			break
+		}
+		c := v_0.AuxInt
+		x := v.Args[1]
+		if !(c%7 == 0 && isPowerOfTwo(c/7)) {
+			break
+		}
+		v.reset(OpARM64SLLconst)
+		v.AuxInt = log2(c / 7)
+		v0 := b.NewValue0(v.Line, OpARM64ADDshiftLL, x.Type)
+		v0.AuxInt = 3
+		v1 := b.NewValue0(v.Line, OpARM64NEG, x.Type)
+		v1.AddArg(x)
+		v0.AddArg(v1)
+		v0.AddArg(x)
+		v.AddArg(v0)
+		return true
+	}
+	// match: (MUL (MOVDconst [c]) x)
+	// cond: c%9 == 0 && isPowerOfTwo(c/9)
+	// result: (SLLconst [log2(c/9)] (ADDshiftLL <x.Type> x x [3]))
 	for {
-		off1 := v.AuxInt
-		sym1 := v.Aux
 		v_0 := v.Args[0]
-		if v_0.Op != OpARM64MOVDaddr {
+		if v_0.Op != OpARM64MOVDconst {
 			break
 		}
-		off2 := v_0.AuxInt
-		sym2 := v_0.Aux
-		ptr := v_0.Args[0]
-		val := v.Args[1]
-		mem := v.Args[2]
-		if !(canMergeSym(sym1, sym2)) {
+		c := v_0.AuxInt
+		x := v.Args[1]
+		if !(c%9 == 0 && isPowerOfTwo(c/9)) {
 			break
 		}
-		v.reset(OpARM64MOVWstore)
-		v.AuxInt = off1 + off2
-		v.Aux = mergeSym(sym1, sym2)
-		v.AddArg(ptr)
-		v.AddArg(val)
-		v.AddArg(mem)
+		v.reset(OpARM64SLLconst)
+		v.AuxInt = log2(c / 9)
+		v0 := b.NewValue0(v.Line, OpARM64ADDshiftLL, x.Type)
+		v0.AuxInt = 3
+		v0.AddArg(x)
+		v0.AddArg(x)
+		v.AddArg(v0)
 		return true
 	}
-	// match: (MOVWstore [off] {sym} ptr (MOVWreg x) mem)
+	// match: (MUL   (MOVDconst [c]) (MOVDconst [d]))
 	// cond:
-	// result: (MOVWstore [off] {sym} ptr x mem)
+	// result: (MOVDconst [c*d])
 	for {
-		off := v.AuxInt
-		sym := v.Aux
-		ptr := v.Args[0]
-		v_1 := v.Args[1]
-		if v_1.Op != OpARM64MOVWreg {
+		v_0 := v.Args[0]
+		if v_0.Op != OpARM64MOVDconst {
 			break
 		}
-		x := v_1.Args[0]
-		mem := v.Args[2]
-		v.reset(OpARM64MOVWstore)
-		v.AuxInt = off
-		v.Aux = sym
-		v.AddArg(ptr)
-		v.AddArg(x)
-		v.AddArg(mem)
-		return true
-	}
-	// match: (MOVWstore [off] {sym} ptr (MOVWUreg x) mem)
-	// cond:
-	// result: (MOVWstore [off] {sym} ptr x mem)
-	for {
-		off := v.AuxInt
-		sym := v.Aux
-		ptr := v.Args[0]
+		c := v_0.AuxInt
 		v_1 := v.Args[1]
-		if v_1.Op != OpARM64MOVWUreg {
+		if v_1.Op != OpARM64MOVDconst {
 			break
 		}
-		x := v_1.Args[0]
-		mem := v.Args[2]
-		v.reset(OpARM64MOVWstore)
-		v.AuxInt = off
-		v.Aux = sym
-		v.AddArg(ptr)
-		v.AddArg(x)
-		v.AddArg(mem)
+		d := v_1.AuxInt
+		v.reset(OpARM64MOVDconst)
+		v.AuxInt = c * d
 		return true
 	}
 	return false
 }
-func rewriteValueARM64_OpARM64MUL(v *Value, config *Config) bool {
+func rewriteValueARM64_OpARM64MULW(v *Value, config *Config) bool {
 	b := v.Block
 	_ = b
-	// match: (MUL x (MOVDconst [-1]))
-	// cond:
+	// match: (MULW x (MOVDconst [c]))
+	// cond: int32(c)==-1
 	// result: (NEG x)
 	for {
 		x := v.Args[0]
@@ -3913,30 +5837,32 @@ func rewriteValueARM64_OpARM64MUL(v *Value, config *Config) bool {
 		if v_1.Op != OpARM64MOVDconst {
 			break
 		}
-		if v_1.AuxInt != -1 {
+		c := v_1.AuxInt
+		if !(int32(c) == -1) {
 			break
 		}
 		v.reset(OpARM64NEG)
 		v.AddArg(x)
 		return true
 	}
-	// match: (MUL _ (MOVDconst [0]))
-	// cond:
+	// match: (MULW _ (MOVDconst [c]))
+	// cond: int32(c)==0
 	// result: (MOVDconst [0])
 	for {
 		v_1 := v.Args[1]
 		if v_1.Op != OpARM64MOVDconst {
 			break
 		}
-		if v_1.AuxInt != 0 {
+		c := v_1.AuxInt
+		if !(int32(c) == 0) {
 			break
 		}
 		v.reset(OpARM64MOVDconst)
 		v.AuxInt = 0
 		return true
 	}
-	// match: (MUL x (MOVDconst [1]))
-	// cond:
+	// match: (MULW x (MOVDconst [c]))
+	// cond: int32(c)==1
 	// result: x
 	for {
 		x := v.Args[0]
@@ -3944,7 +5870,8 @@ func rewriteValueARM64_OpARM64MUL(v *Value, config *Config) bool {
 		if v_1.Op != OpARM64MOVDconst {
 			break
 		}
-		if v_1.AuxInt != 1 {
+		c := v_1.AuxInt
+		if !(int32(c) == 1) {
 			break
 		}
 		v.reset(OpCopy)
@@ -3952,7 +5879,7 @@ func rewriteValueARM64_OpARM64MUL(v *Value, config *Config) bool {
 		v.AddArg(x)
 		return true
 	}
-	// match: (MUL x (MOVDconst [c]))
+	// match: (MULW x (MOVDconst [c]))
 	// cond: isPowerOfTwo(c)
 	// result: (SLLconst [log2(c)] x)
 	for {
@@ -3970,98 +5897,117 @@ func rewriteValueARM64_OpARM64MUL(v *Value, config *Config) bool {
 		v.AddArg(x)
 		return true
 	}
-	// match: (MUL (MOVDconst [-1]) x)
-	// cond:
-	// result: (NEG x)
+	// match: (MULW x (MOVDconst [c]))
+	// cond: isPowerOfTwo(c-1) && int32(c) >= 3
+	// result: (ADDshiftLL x x [log2(c-1)])
 	for {
-		v_0 := v.Args[0]
-		if v_0.Op != OpARM64MOVDconst {
+		x := v.Args[0]
+		v_1 := v.Args[1]
+		if v_1.Op != OpARM64MOVDconst {
 			break
 		}
-		if v_0.AuxInt != -1 {
+		c := v_1.AuxInt
+		if !(isPowerOfTwo(c-1) && int32(c) >= 3) {
 			break
 		}
-		x := v.Args[1]
-		v.reset(OpARM64NEG)
+		v.reset(OpARM64ADDshiftLL)
+		v.AuxInt = log2(c - 1)
+		v.AddArg(x)
 		v.AddArg(x)
 		return true
 	}
-	// match: (MUL (MOVDconst [0]) _)
-	// cond:
-	// result: (MOVDconst [0])
+	// match: (MULW x (MOVDconst [c]))
+	// cond: isPowerOfTwo(c+1) && int32(c) >= 7
+	// result: (ADDshiftLL (NEG <x.Type> x) x [log2(c+1)])
 	for {
-		v_0 := v.Args[0]
-		if v_0.Op != OpARM64MOVDconst {
+		x := v.Args[0]
+		v_1 := v.Args[1]
+		if v_1.Op != OpARM64MOVDconst {
 			break
 		}
-		if v_0.AuxInt != 0 {
+		c := v_1.AuxInt
+		if !(isPowerOfTwo(c+1) && int32(c) >= 7) {
 			break
 		}
-		v.reset(OpARM64MOVDconst)
-		v.AuxInt = 0
+		v.reset(OpARM64ADDshiftLL)
+		v.AuxInt = log2(c + 1)
+		v0 := b.NewValue0(v.Line, OpARM64NEG, x.Type)
+		v0.AddArg(x)
+		v.AddArg(v0)
+		v.AddArg(x)
 		return true
 	}
-	// match: (MUL (MOVDconst [1]) x)
-	// cond:
-	// result: x
+	// match: (MULW x (MOVDconst [c]))
+	// cond: c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c)
+	// result: (SLLconst [log2(c/3)] (ADDshiftLL <x.Type> x x [1]))
 	for {
-		v_0 := v.Args[0]
-		if v_0.Op != OpARM64MOVDconst {
+		x := v.Args[0]
+		v_1 := v.Args[1]
+		if v_1.Op != OpARM64MOVDconst {
 			break
 		}
-		if v_0.AuxInt != 1 {
+		c := v_1.AuxInt
+		if !(c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c)) {
 			break
 		}
-		x := v.Args[1]
-		v.reset(OpCopy)
-		v.Type = x.Type
-		v.AddArg(x)
+		v.reset(OpARM64SLLconst)
+		v.AuxInt = log2(c / 3)
+		v0 := b.NewValue0(v.Line, OpARM64ADDshiftLL, x.Type)
+		v0.AuxInt = 1
+		v0.AddArg(x)
+		v0.AddArg(x)
+		v.AddArg(v0)
 		return true
 	}
-	// match: (MUL (MOVDconst [c]) x)
-	// cond: isPowerOfTwo(c)
-	// result: (SLLconst [log2(c)] x)
+	// match: (MULW x (MOVDconst [c]))
+	// cond: c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c)
+	// result: (SLLconst [log2(c/5)] (ADDshiftLL <x.Type> x x [2]))
 	for {
-		v_0 := v.Args[0]
-		if v_0.Op != OpARM64MOVDconst {
+		x := v.Args[0]
+		v_1 := v.Args[1]
+		if v_1.Op != OpARM64MOVDconst {
 			break
 		}
-		c := v_0.AuxInt
-		x := v.Args[1]
-		if !(isPowerOfTwo(c)) {
+		c := v_1.AuxInt
+		if !(c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c)) {
 			break
 		}
 		v.reset(OpARM64SLLconst)
-		v.AuxInt = log2(c)
-		v.AddArg(x)
+		v.AuxInt = log2(c / 5)
+		v0 := b.NewValue0(v.Line, OpARM64ADDshiftLL, x.Type)
+		v0.AuxInt = 2
+		v0.AddArg(x)
+		v0.AddArg(x)
+		v.AddArg(v0)
 		return true
 	}
-	// match: (MUL   (MOVDconst [c]) (MOVDconst [d]))
-	// cond:
-	// result: (MOVDconst [c*d])
+	// match: (MULW x (MOVDconst [c]))
+	// cond: c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c)
+	// result: (SLLconst [log2(c/7)] (ADDshiftLL <x.Type> (NEG <x.Type> x) x [3]))
 	for {
-		v_0 := v.Args[0]
-		if v_0.Op != OpARM64MOVDconst {
-			break
-		}
-		c := v_0.AuxInt
+		x := v.Args[0]
 		v_1 := v.Args[1]
 		if v_1.Op != OpARM64MOVDconst {
 			break
 		}
-		d := v_1.AuxInt
-		v.reset(OpARM64MOVDconst)
-		v.AuxInt = c * d
+		c := v_1.AuxInt
+		if !(c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c)) {
+			break
+		}
+		v.reset(OpARM64SLLconst)
+		v.AuxInt = log2(c / 7)
+		v0 := b.NewValue0(v.Line, OpARM64ADDshiftLL, x.Type)
+		v0.AuxInt = 3
+		v1 := b.NewValue0(v.Line, OpARM64NEG, x.Type)
+		v1.AddArg(x)
+		v0.AddArg(v1)
+		v0.AddArg(x)
+		v.AddArg(v0)
 		return true
 	}
-	return false
-}
-func rewriteValueARM64_OpARM64MULW(v *Value, config *Config) bool {
-	b := v.Block
-	_ = b
 	// match: (MULW x (MOVDconst [c]))
-	// cond: int32(c)==-1
-	// result: (NEG x)
+	// cond: c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c)
+	// result: (SLLconst [log2(c/9)] (ADDshiftLL <x.Type> x x [3]))
 	for {
 		x := v.Args[0]
 		v_1 := v.Args[1]
@@ -4069,6 +6015,28 @@ func rewriteValueARM64_OpARM64MULW(v *Value, config *Config) bool {
 			break
 		}
 		c := v_1.AuxInt
+		if !(c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c)) {
+			break
+		}
+		v.reset(OpARM64SLLconst)
+		v.AuxInt = log2(c / 9)
+		v0 := b.NewValue0(v.Line, OpARM64ADDshiftLL, x.Type)
+		v0.AuxInt = 3
+		v0.AddArg(x)
+		v0.AddArg(x)
+		v.AddArg(v0)
+		return true
+	}
+	// match: (MULW (MOVDconst [c]) x)
+	// cond: int32(c)==-1
+	// result: (NEG x)
+	for {
+		v_0 := v.Args[0]
+		if v_0.Op != OpARM64MOVDconst {
+			break
+		}
+		c := v_0.AuxInt
+		x := v.Args[1]
 		if !(int32(c) == -1) {
 			break
 		}
@@ -4076,15 +6044,15 @@ func rewriteValueARM64_OpARM64MULW(v *Value, config *Config) bool {
 		v.AddArg(x)
 		return true
 	}
-	// match: (MULW _ (MOVDconst [c]))
+	// match: (MULW (MOVDconst [c]) _)
 	// cond: int32(c)==0
 	// result: (MOVDconst [0])
 	for {
-		v_1 := v.Args[1]
-		if v_1.Op != OpARM64MOVDconst {
+		v_0 := v.Args[0]
+		if v_0.Op != OpARM64MOVDconst {
 			break
 		}
-		c := v_1.AuxInt
+		c := v_0.AuxInt
 		if !(int32(c) == 0) {
 			break
 		}
@@ -4092,45 +6060,85 @@ func rewriteValueARM64_OpARM64MULW(v *Value, config *Config) bool {
 		v.AuxInt = 0
 		return true
 	}
-	// match: (MULW x (MOVDconst [c]))
+	// match: (MULW (MOVDconst [c]) x)
 	// cond: int32(c)==1
 	// result: x
 	for {
-		x := v.Args[0]
-		v_1 := v.Args[1]
-		if v_1.Op != OpARM64MOVDconst {
+		v_0 := v.Args[0]
+		if v_0.Op != OpARM64MOVDconst {
+			break
+		}
+		c := v_0.AuxInt
+		x := v.Args[1]
+		if !(int32(c) == 1) {
+			break
+		}
+		v.reset(OpCopy)
+		v.Type = x.Type
+		v.AddArg(x)
+		return true
+	}
+	// match: (MULW (MOVDconst [c]) x)
+	// cond: isPowerOfTwo(c)
+	// result: (SLLconst [log2(c)] x)
+	for {
+		v_0 := v.Args[0]
+		if v_0.Op != OpARM64MOVDconst {
+			break
+		}
+		c := v_0.AuxInt
+		x := v.Args[1]
+		if !(isPowerOfTwo(c)) {
+			break
+		}
+		v.reset(OpARM64SLLconst)
+		v.AuxInt = log2(c)
+		v.AddArg(x)
+		return true
+	}
+	// match: (MULW (MOVDconst [c]) x)
+	// cond: isPowerOfTwo(c-1) && int32(c) >= 3
+	// result: (ADDshiftLL x x [log2(c-1)])
+	for {
+		v_0 := v.Args[0]
+		if v_0.Op != OpARM64MOVDconst {
 			break
 		}
-		c := v_1.AuxInt
-		if !(int32(c) == 1) {
+		c := v_0.AuxInt
+		x := v.Args[1]
+		if !(isPowerOfTwo(c-1) && int32(c) >= 3) {
 			break
 		}
-		v.reset(OpCopy)
-		v.Type = x.Type
+		v.reset(OpARM64ADDshiftLL)
+		v.AuxInt = log2(c - 1)
+		v.AddArg(x)
 		v.AddArg(x)
 		return true
 	}
-	// match: (MULW x (MOVDconst [c]))
-	// cond: isPowerOfTwo(c)
-	// result: (SLLconst [log2(c)] x)
+	// match: (MULW (MOVDconst [c]) x)
+	// cond: isPowerOfTwo(c+1) && int32(c) >= 7
+	// result: (ADDshiftLL (NEG <x.Type> x) x [log2(c+1)])
 	for {
-		x := v.Args[0]
-		v_1 := v.Args[1]
-		if v_1.Op != OpARM64MOVDconst {
+		v_0 := v.Args[0]
+		if v_0.Op != OpARM64MOVDconst {
 			break
 		}
-		c := v_1.AuxInt
-		if !(isPowerOfTwo(c)) {
+		c := v_0.AuxInt
+		x := v.Args[1]
+		if !(isPowerOfTwo(c+1) && int32(c) >= 7) {
 			break
 		}
-		v.reset(OpARM64SLLconst)
-		v.AuxInt = log2(c)
+		v.reset(OpARM64ADDshiftLL)
+		v.AuxInt = log2(c + 1)
+		v0 := b.NewValue0(v.Line, OpARM64NEG, x.Type)
+		v0.AddArg(x)
+		v.AddArg(v0)
 		v.AddArg(x)
 		return true
 	}
 	// match: (MULW (MOVDconst [c]) x)
-	// cond: int32(c)==-1
-	// result: (NEG x)
+	// cond: c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c)
+	// result: (SLLconst [log2(c/3)] (ADDshiftLL <x.Type> x x [1]))
 	for {
 		v_0 := v.Args[0]
 		if v_0.Op != OpARM64MOVDconst {
@@ -4138,32 +6146,43 @@ func rewriteValueARM64_OpARM64MULW(v *Value, config *Config) bool {
 		}
 		c := v_0.AuxInt
 		x := v.Args[1]
-		if !(int32(c) == -1) {
+		if !(c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c)) {
 			break
 		}
-		v.reset(OpARM64NEG)
-		v.AddArg(x)
+		v.reset(OpARM64SLLconst)
+		v.AuxInt = log2(c / 3)
+		v0 := b.NewValue0(v.Line, OpARM64ADDshiftLL, x.Type)
+		v0.AuxInt = 1
+		v0.AddArg(x)
+		v0.AddArg(x)
+		v.AddArg(v0)
 		return true
 	}
-	// match: (MULW (MOVDconst [c]) _)
-	// cond: int32(c)==0
-	// result: (MOVDconst [0])
+	// match: (MULW (MOVDconst [c]) x)
+	// cond: c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c)
+	// result: (SLLconst [log2(c/5)] (ADDshiftLL <x.Type> x x [2]))
 	for {
 		v_0 := v.Args[0]
 		if v_0.Op != OpARM64MOVDconst {
 			break
 		}
 		c := v_0.AuxInt
-		if !(int32(c) == 0) {
+		x := v.Args[1]
+		if !(c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c)) {
 			break
 		}
-		v.reset(OpARM64MOVDconst)
-		v.AuxInt = 0
+		v.reset(OpARM64SLLconst)
+		v.AuxInt = log2(c / 5)
+		v0 := b.NewValue0(v.Line, OpARM64ADDshiftLL, x.Type)
+		v0.AuxInt = 2
+		v0.AddArg(x)
+		v0.AddArg(x)
+		v.AddArg(v0)
 		return true
 	}
 	// match: (MULW (MOVDconst [c]) x)
-	// cond: int32(c)==1
-	// result: x
+	// cond: c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c)
+	// result: (SLLconst [log2(c/7)] (ADDshiftLL <x.Type> (NEG <x.Type> x) x [3]))
 	for {
 		v_0 := v.Args[0]
 		if v_0.Op != OpARM64MOVDconst {
@@ -4171,17 +6190,23 @@ func rewriteValueARM64_OpARM64MULW(v *Value, config *Config) bool {
 		}
 		c := v_0.AuxInt
 		x := v.Args[1]
-		if !(int32(c) == 1) {
+		if !(c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c)) {
 			break
 		}
-		v.reset(OpCopy)
-		v.Type = x.Type
-		v.AddArg(x)
+		v.reset(OpARM64SLLconst)
+		v.AuxInt = log2(c / 7)
+		v0 := b.NewValue0(v.Line, OpARM64ADDshiftLL, x.Type)
+		v0.AuxInt = 3
+		v1 := b.NewValue0(v.Line, OpARM64NEG, x.Type)
+		v1.AddArg(x)
+		v0.AddArg(v1)
+		v0.AddArg(x)
+		v.AddArg(v0)
 		return true
 	}
 	// match: (MULW (MOVDconst [c]) x)
-	// cond: isPowerOfTwo(c)
-	// result: (SLLconst [log2(c)] x)
+	// cond: c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c)
+	// result: (SLLconst [log2(c/9)] (ADDshiftLL <x.Type> x x [3]))
 	for {
 		v_0 := v.Args[0]
 		if v_0.Op != OpARM64MOVDconst {
@@ -4189,12 +6214,16 @@ func rewriteValueARM64_OpARM64MULW(v *Value, config *Config) bool {
 		}
 		c := v_0.AuxInt
 		x := v.Args[1]
-		if !(isPowerOfTwo(c)) {
+		if !(c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c)) {
 			break
 		}
 		v.reset(OpARM64SLLconst)
-		v.AuxInt = log2(c)
-		v.AddArg(x)
+		v.AuxInt = log2(c / 9)
+		v0 := b.NewValue0(v.Line, OpARM64ADDshiftLL, x.Type)
+		v0.AuxInt = 3
+		v0.AddArg(x)
+		v0.AddArg(x)
+		v.AddArg(v0)
 		return true
 	}
 	// match: (MULW  (MOVDconst [c]) (MOVDconst [d]))
@@ -4309,35 +6338,245 @@ func rewriteValueARM64_OpARM64NotEqual(v *Value, config *Config) bool {
 	// result: (MOVDconst [1])
 	for {
 		v_0 := v.Args[0]
-		if v_0.Op != OpARM64FlagGT_UGT {
+		if v_0.Op != OpARM64FlagGT_UGT {
+			break
+		}
+		v.reset(OpARM64MOVDconst)
+		v.AuxInt = 1
+		return true
+	}
+	// match: (NotEqual (InvertFlags x))
+	// cond:
+	// result: (NotEqual x)
+	for {
+		v_0 := v.Args[0]
+		if v_0.Op != OpARM64InvertFlags {
+			break
+		}
+		x := v_0.Args[0]
+		v.reset(OpARM64NotEqual)
+		v.AddArg(x)
+		return true
+	}
+	return false
+}
+func rewriteValueARM64_OpARM64OR(v *Value, config *Config) bool {
+	b := v.Block
+	_ = b
+	// match: (OR  (MOVDconst [c]) x)
+	// cond:
+	// result: (ORconst  [c] x)
+	for {
+		v_0 := v.Args[0]
+		if v_0.Op != OpARM64MOVDconst {
+			break
+		}
+		c := v_0.AuxInt
+		x := v.Args[1]
+		v.reset(OpARM64ORconst)
+		v.AuxInt = c
+		v.AddArg(x)
+		return true
+	}
+	// match: (OR  x (MOVDconst [c]))
+	// cond:
+	// result: (ORconst  [c] x)
+	for {
+		x := v.Args[0]
+		v_1 := v.Args[1]
+		if v_1.Op != OpARM64MOVDconst {
+			break
+		}
+		c := v_1.AuxInt
+		v.reset(OpARM64ORconst)
+		v.AuxInt = c
+		v.AddArg(x)
+		return true
+	}
+	// match: (OR  x x)
+	// cond:
+	// result: x
+	for {
+		x := v.Args[0]
+		if x != v.Args[1] {
+			break
+		}
+		v.reset(OpCopy)
+		v.Type = x.Type
+		v.AddArg(x)
+		return true
+	}
+	// match: (OR  x (SLLconst [c] y))
+	// cond:
+	// result: (ORshiftLL  x y [c])
+	for {
+		x := v.Args[0]
+		v_1 := v.Args[1]
+		if v_1.Op != OpARM64SLLconst {
+			break
+		}
+		c := v_1.AuxInt
+		y := v_1.Args[0]
+		v.reset(OpARM64ORshiftLL)
+		v.AuxInt = c
+		v.AddArg(x)
+		v.AddArg(y)
+		return true
+	}
+	// match: (OR  (SLLconst [c] y) x)
+	// cond:
+	// result: (ORshiftLL  x y [c])
+	for {
+		v_0 := v.Args[0]
+		if v_0.Op != OpARM64SLLconst {
+			break
+		}
+		c := v_0.AuxInt
+		y := v_0.Args[0]
+		x := v.Args[1]
+		v.reset(OpARM64ORshiftLL)
+		v.AuxInt = c
+		v.AddArg(x)
+		v.AddArg(y)
+		return true
+	}
+	// match: (OR  x (SRLconst [c] y))
+	// cond:
+	// result: (ORshiftRL  x y [c])
+	for {
+		x := v.Args[0]
+		v_1 := v.Args[1]
+		if v_1.Op != OpARM64SRLconst {
+			break
+		}
+		c := v_1.AuxInt
+		y := v_1.Args[0]
+		v.reset(OpARM64ORshiftRL)
+		v.AuxInt = c
+		v.AddArg(x)
+		v.AddArg(y)
+		return true
+	}
+	// match: (OR  (SRLconst [c] y) x)
+	// cond:
+	// result: (ORshiftRL  x y [c])
+	for {
+		v_0 := v.Args[0]
+		if v_0.Op != OpARM64SRLconst {
+			break
+		}
+		c := v_0.AuxInt
+		y := v_0.Args[0]
+		x := v.Args[1]
+		v.reset(OpARM64ORshiftRL)
+		v.AuxInt = c
+		v.AddArg(x)
+		v.AddArg(y)
+		return true
+	}
+	// match: (OR  x (SRAconst [c] y))
+	// cond:
+	// result: (ORshiftRA  x y [c])
+	for {
+		x := v.Args[0]
+		v_1 := v.Args[1]
+		if v_1.Op != OpARM64SRAconst {
+			break
+		}
+		c := v_1.AuxInt
+		y := v_1.Args[0]
+		v.reset(OpARM64ORshiftRA)
+		v.AuxInt = c
+		v.AddArg(x)
+		v.AddArg(y)
+		return true
+	}
+	// match: (OR  (SRAconst [c] y) x)
+	// cond:
+	// result: (ORshiftRA  x y [c])
+	for {
+		v_0 := v.Args[0]
+		if v_0.Op != OpARM64SRAconst {
+			break
+		}
+		c := v_0.AuxInt
+		y := v_0.Args[0]
+		x := v.Args[1]
+		v.reset(OpARM64ORshiftRA)
+		v.AuxInt = c
+		v.AddArg(x)
+		v.AddArg(y)
+		return true
+	}
+	return false
+}
+func rewriteValueARM64_OpARM64ORconst(v *Value, config *Config) bool {
+	b := v.Block
+	_ = b
+	// match: (ORconst  [0]  x)
+	// cond:
+	// result: x
+	for {
+		if v.AuxInt != 0 {
+			break
+		}
+		x := v.Args[0]
+		v.reset(OpCopy)
+		v.Type = x.Type
+		v.AddArg(x)
+		return true
+	}
+	// match: (ORconst  [-1] _)
+	// cond:
+	// result: (MOVDconst [-1])
+	for {
+		if v.AuxInt != -1 {
+			break
+		}
+		v.reset(OpARM64MOVDconst)
+		v.AuxInt = -1
+		return true
+	}
+	// match: (ORconst  [c] (MOVDconst [d]))
+	// cond:
+	// result: (MOVDconst [c|d])
+	for {
+		c := v.AuxInt
+		v_0 := v.Args[0]
+		if v_0.Op != OpARM64MOVDconst {
 			break
 		}
+		d := v_0.AuxInt
 		v.reset(OpARM64MOVDconst)
-		v.AuxInt = 1
+		v.AuxInt = c | d
 		return true
 	}
-	// match: (NotEqual (InvertFlags x))
+	// match: (ORconst  [c] (ORconst [d] x))
 	// cond:
-	// result: (NotEqual x)
+	// result: (ORconst [c|d] x)
 	for {
+		c := v.AuxInt
 		v_0 := v.Args[0]
-		if v_0.Op != OpARM64InvertFlags {
+		if v_0.Op != OpARM64ORconst {
 			break
 		}
+		d := v_0.AuxInt
 		x := v_0.Args[0]
-		v.reset(OpARM64NotEqual)
+		v.reset(OpARM64ORconst)
+		v.AuxInt = c | d
 		v.AddArg(x)
 		return true
 	}
 	return false
 }
-func rewriteValueARM64_OpARM64OR(v *Value, config *Config) bool {
+func rewriteValueARM64_OpARM64ORshiftLL(v *Value, config *Config) bool {
 	b := v.Block
 	_ = b
-	// match: (OR  (MOVDconst [c]) x)
+	// match: (ORshiftLL  (MOVDconst [c]) x [d])
 	// cond:
-	// result: (ORconst  [c] x)
+	// result: (ORconst  [c] (SLLconst <x.Type> x [d]))
 	for {
+		d := v.AuxInt
 		v_0 := v.Args[0]
 		if v_0.Op != OpARM64MOVDconst {
 			break
@@ -4346,13 +6585,17 @@ func rewriteValueARM64_OpARM64OR(v *Value, config *Config) bool {
 		x := v.Args[1]
 		v.reset(OpARM64ORconst)
 		v.AuxInt = c
-		v.AddArg(x)
+		v0 := b.NewValue0(v.Line, OpARM64SLLconst, x.Type)
+		v0.AuxInt = d
+		v0.AddArg(x)
+		v.AddArg(v0)
 		return true
 	}
-	// match: (OR  x (MOVDconst [c]))
+	// match: (ORshiftLL  x (MOVDconst [c]) [d])
 	// cond:
-	// result: (ORconst  [c] x)
+	// result: (ORconst  x [int64(uint64(c)<<uint64(d))])
 	for {
+		d := v.AuxInt
 		x := v.Args[0]
 		v_1 := v.Args[1]
 		if v_1.Op != OpARM64MOVDconst {
@@ -4360,82 +6603,156 @@ func rewriteValueARM64_OpARM64OR(v *Value, config *Config) bool {
 		}
 		c := v_1.AuxInt
 		v.reset(OpARM64ORconst)
-		v.AuxInt = c
+		v.AuxInt = int64(uint64(c) << uint64(d))
 		v.AddArg(x)
 		return true
 	}
-	// match: (OR  x x)
-	// cond:
-	// result: x
+	// match: (ORshiftLL  x y:(SLLconst x [c]) [d])
+	// cond: c==d
+	// result: y
 	for {
+		d := v.AuxInt
 		x := v.Args[0]
-		if x != v.Args[1] {
+		y := v.Args[1]
+		if y.Op != OpARM64SLLconst {
+			break
+		}
+		c := y.AuxInt
+		if x != y.Args[0] {
+			break
+		}
+		if !(c == d) {
 			break
 		}
 		v.reset(OpCopy)
-		v.Type = x.Type
-		v.AddArg(x)
+		v.Type = y.Type
+		v.AddArg(y)
 		return true
 	}
 	return false
 }
-func rewriteValueARM64_OpARM64ORconst(v *Value, config *Config) bool {
+func rewriteValueARM64_OpARM64ORshiftRA(v *Value, config *Config) bool {
 	b := v.Block
 	_ = b
-	// match: (ORconst  [0]  x)
+	// match: (ORshiftRA  (MOVDconst [c]) x [d])
 	// cond:
-	// result: x
+	// result: (ORconst  [c] (SRAconst <x.Type> x [d]))
 	for {
-		if v.AuxInt != 0 {
+		d := v.AuxInt
+		v_0 := v.Args[0]
+		if v_0.Op != OpARM64MOVDconst {
 			break
 		}
+		c := v_0.AuxInt
+		x := v.Args[1]
+		v.reset(OpARM64ORconst)
+		v.AuxInt = c
+		v0 := b.NewValue0(v.Line, OpARM64SRAconst, x.Type)
+		v0.AuxInt = d
+		v0.AddArg(x)
+		v.AddArg(v0)
+		return true
+	}
+	// match: (ORshiftRA  x (MOVDconst [c]) [d])
+	// cond:
+	// result: (ORconst  x [int64(int64(c)>>uint64(d))])
+	for {
+		d := v.AuxInt
 		x := v.Args[0]
-		v.reset(OpCopy)
-		v.Type = x.Type
+		v_1 := v.Args[1]
+		if v_1.Op != OpARM64MOVDconst {
+			break
+		}
+		c := v_1.AuxInt
+		v.reset(OpARM64ORconst)
+		v.AuxInt = int64(int64(c) >> uint64(d))
 		v.AddArg(x)
 		return true
 	}
-	// match: (ORconst  [-1] _)
-	// cond:
-	// result: (MOVDconst [-1])
+	// match: (ORshiftRA  x y:(SRAconst x [c]) [d])
+	// cond: c==d
+	// result: y
 	for {
-		if v.AuxInt != -1 {
+		d := v.AuxInt
+		x := v.Args[0]
+		y := v.Args[1]
+		if y.Op != OpARM64SRAconst {
 			break
 		}
-		v.reset(OpARM64MOVDconst)
-		v.AuxInt = -1
+		c := y.AuxInt
+		if x != y.Args[0] {
+			break
+		}
+		if !(c == d) {
+			break
+		}
+		v.reset(OpCopy)
+		v.Type = y.Type
+		v.AddArg(y)
 		return true
 	}
-	// match: (ORconst  [c] (MOVDconst [d]))
+	return false
+}
+func rewriteValueARM64_OpARM64ORshiftRL(v *Value, config *Config) bool {
+	b := v.Block
+	_ = b
+	// match: (ORshiftRL  (MOVDconst [c]) x [d])
 	// cond:
-	// result: (MOVDconst [c|d])
+	// result: (ORconst  [c] (SRLconst <x.Type> x [d]))
 	for {
-		c := v.AuxInt
+		d := v.AuxInt
 		v_0 := v.Args[0]
 		if v_0.Op != OpARM64MOVDconst {
 			break
 		}
-		d := v_0.AuxInt
-		v.reset(OpARM64MOVDconst)
-		v.AuxInt = c | d
+		c := v_0.AuxInt
+		x := v.Args[1]
+		v.reset(OpARM64ORconst)
+		v.AuxInt = c
+		v0 := b.NewValue0(v.Line, OpARM64SRLconst, x.Type)
+		v0.AuxInt = d
+		v0.AddArg(x)
+		v.AddArg(v0)
 		return true
 	}
-	// match: (ORconst  [c] (ORconst [d] x))
+	// match: (ORshiftRL  x (MOVDconst [c]) [d])
 	// cond:
-	// result: (ORconst [c|d] x)
+	// result: (ORconst  x [int64(uint64(c)>>uint64(d))])
 	for {
-		c := v.AuxInt
-		v_0 := v.Args[0]
-		if v_0.Op != OpARM64ORconst {
+		d := v.AuxInt
+		x := v.Args[0]
+		v_1 := v.Args[1]
+		if v_1.Op != OpARM64MOVDconst {
 			break
 		}
-		d := v_0.AuxInt
-		x := v_0.Args[0]
+		c := v_1.AuxInt
 		v.reset(OpARM64ORconst)
-		v.AuxInt = c | d
+		v.AuxInt = int64(uint64(c) >> uint64(d))
 		v.AddArg(x)
 		return true
 	}
+	// match: (ORshiftRL  x y:(SRLconst x [c]) [d])
+	// cond: c==d
+	// result: y
+	for {
+		d := v.AuxInt
+		x := v.Args[0]
+		y := v.Args[1]
+		if y.Op != OpARM64SRLconst {
+			break
+		}
+		c := y.AuxInt
+		if x != y.Args[0] {
+			break
+		}
+		if !(c == d) {
+			break
+		}
+		v.reset(OpCopy)
+		v.Type = y.Type
+		v.AddArg(y)
+		return true
+	}
 	return false
 }
 func rewriteValueARM64_OpARM64SLL(v *Value, config *Config) bool {
@@ -4585,6 +6902,57 @@ func rewriteValueARM64_OpARM64SUB(v *Value, config *Config) bool {
 		v.AuxInt = 0
 		return true
 	}
+	// match: (SUB x (SLLconst [c] y))
+	// cond:
+	// result: (SUBshiftLL x y [c])
+	for {
+		x := v.Args[0]
+		v_1 := v.Args[1]
+		if v_1.Op != OpARM64SLLconst {
+			break
+		}
+		c := v_1.AuxInt
+		y := v_1.Args[0]
+		v.reset(OpARM64SUBshiftLL)
+		v.AuxInt = c
+		v.AddArg(x)
+		v.AddArg(y)
+		return true
+	}
+	// match: (SUB x (SRLconst [c] y))
+	// cond:
+	// result: (SUBshiftRL x y [c])
+	for {
+		x := v.Args[0]
+		v_1 := v.Args[1]
+		if v_1.Op != OpARM64SRLconst {
+			break
+		}
+		c := v_1.AuxInt
+		y := v_1.Args[0]
+		v.reset(OpARM64SUBshiftRL)
+		v.AuxInt = c
+		v.AddArg(x)
+		v.AddArg(y)
+		return true
+	}
+	// match: (SUB x (SRAconst [c] y))
+	// cond:
+	// result: (SUBshiftRA x y [c])
+	for {
+		x := v.Args[0]
+		v_1 := v.Args[1]
+		if v_1.Op != OpARM64SRAconst {
+			break
+		}
+		c := v_1.AuxInt
+		y := v_1.Args[0]
+		v.reset(OpARM64SUBshiftRA)
+		v.AuxInt = c
+		v.AddArg(x)
+		v.AddArg(y)
+		return true
+	}
 	return false
 }
 func rewriteValueARM64_OpARM64SUBconst(v *Value, config *Config) bool {
@@ -4612,41 +6980,167 @@ func rewriteValueARM64_OpARM64SUBconst(v *Value, config *Config) bool {
 		if v_0.Op != OpARM64MOVDconst {
 			break
 		}
-		d := v_0.AuxInt
+		d := v_0.AuxInt
+		v.reset(OpARM64MOVDconst)
+		v.AuxInt = d - c
+		return true
+	}
+	// match: (SUBconst [c] (SUBconst [d] x))
+	// cond:
+	// result: (ADDconst [-c-d] x)
+	for {
+		c := v.AuxInt
+		v_0 := v.Args[0]
+		if v_0.Op != OpARM64SUBconst {
+			break
+		}
+		d := v_0.AuxInt
+		x := v_0.Args[0]
+		v.reset(OpARM64ADDconst)
+		v.AuxInt = -c - d
+		v.AddArg(x)
+		return true
+	}
+	// match: (SUBconst [c] (ADDconst [d] x))
+	// cond:
+	// result: (ADDconst [-c+d] x)
+	for {
+		c := v.AuxInt
+		v_0 := v.Args[0]
+		if v_0.Op != OpARM64ADDconst {
+			break
+		}
+		d := v_0.AuxInt
+		x := v_0.Args[0]
+		v.reset(OpARM64ADDconst)
+		v.AuxInt = -c + d
+		v.AddArg(x)
+		return true
+	}
+	return false
+}
+func rewriteValueARM64_OpARM64SUBshiftLL(v *Value, config *Config) bool {
+	b := v.Block
+	_ = b
+	// match: (SUBshiftLL x (MOVDconst [c]) [d])
+	// cond:
+	// result: (SUBconst x [int64(uint64(c)<<uint64(d))])
+	for {
+		d := v.AuxInt
+		x := v.Args[0]
+		v_1 := v.Args[1]
+		if v_1.Op != OpARM64MOVDconst {
+			break
+		}
+		c := v_1.AuxInt
+		v.reset(OpARM64SUBconst)
+		v.AuxInt = int64(uint64(c) << uint64(d))
+		v.AddArg(x)
+		return true
+	}
+	// match: (SUBshiftLL x (SLLconst x [c]) [d])
+	// cond: c==d
+	// result: (MOVDconst [0])
+	for {
+		d := v.AuxInt
+		x := v.Args[0]
+		v_1 := v.Args[1]
+		if v_1.Op != OpARM64SLLconst {
+			break
+		}
+		c := v_1.AuxInt
+		if x != v_1.Args[0] {
+			break
+		}
+		if !(c == d) {
+			break
+		}
+		v.reset(OpARM64MOVDconst)
+		v.AuxInt = 0
+		return true
+	}
+	return false
+}
+func rewriteValueARM64_OpARM64SUBshiftRA(v *Value, config *Config) bool {
+	b := v.Block
+	_ = b
+	// match: (SUBshiftRA x (MOVDconst [c]) [d])
+	// cond:
+	// result: (SUBconst x [int64(int64(c)>>uint64(d))])
+	for {
+		d := v.AuxInt
+		x := v.Args[0]
+		v_1 := v.Args[1]
+		if v_1.Op != OpARM64MOVDconst {
+			break
+		}
+		c := v_1.AuxInt
+		v.reset(OpARM64SUBconst)
+		v.AuxInt = int64(int64(c) >> uint64(d))
+		v.AddArg(x)
+		return true
+	}
+	// match: (SUBshiftRA x (SRAconst x [c]) [d])
+	// cond: c==d
+	// result: (MOVDconst [0])
+	for {
+		d := v.AuxInt
+		x := v.Args[0]
+		v_1 := v.Args[1]
+		if v_1.Op != OpARM64SRAconst {
+			break
+		}
+		c := v_1.AuxInt
+		if x != v_1.Args[0] {
+			break
+		}
+		if !(c == d) {
+			break
+		}
 		v.reset(OpARM64MOVDconst)
-		v.AuxInt = d - c
+		v.AuxInt = 0
 		return true
 	}
-	// match: (SUBconst [c] (SUBconst [d] x))
+	return false
+}
+func rewriteValueARM64_OpARM64SUBshiftRL(v *Value, config *Config) bool {
+	b := v.Block
+	_ = b
+	// match: (SUBshiftRL x (MOVDconst [c]) [d])
 	// cond:
-	// result: (ADDconst [-c-d] x)
+	// result: (SUBconst x [int64(uint64(c)>>uint64(d))])
 	for {
-		c := v.AuxInt
-		v_0 := v.Args[0]
-		if v_0.Op != OpARM64SUBconst {
+		d := v.AuxInt
+		x := v.Args[0]
+		v_1 := v.Args[1]
+		if v_1.Op != OpARM64MOVDconst {
 			break
 		}
-		d := v_0.AuxInt
-		x := v_0.Args[0]
-		v.reset(OpARM64ADDconst)
-		v.AuxInt = -c - d
+		c := v_1.AuxInt
+		v.reset(OpARM64SUBconst)
+		v.AuxInt = int64(uint64(c) >> uint64(d))
 		v.AddArg(x)
 		return true
 	}
-	// match: (SUBconst [c] (ADDconst [d] x))
-	// cond:
-	// result: (ADDconst [-c+d] x)
+	// match: (SUBshiftRL x (SRLconst x [c]) [d])
+	// cond: c==d
+	// result: (MOVDconst [0])
 	for {
-		c := v.AuxInt
-		v_0 := v.Args[0]
-		if v_0.Op != OpARM64ADDconst {
+		d := v.AuxInt
+		x := v.Args[0]
+		v_1 := v.Args[1]
+		if v_1.Op != OpARM64SRLconst {
 			break
 		}
-		d := v_0.AuxInt
-		x := v_0.Args[0]
-		v.reset(OpARM64ADDconst)
-		v.AuxInt = -c + d
-		v.AddArg(x)
+		c := v_1.AuxInt
+		if x != v_1.Args[0] {
+			break
+		}
+		if !(c == d) {
+			break
+		}
+		v.reset(OpARM64MOVDconst)
+		v.AuxInt = 0
 		return true
 	}
 	return false
@@ -4926,6 +7420,108 @@ func rewriteValueARM64_OpARM64XOR(v *Value, config *Config) bool {
 		v.AuxInt = 0
 		return true
 	}
+	// match: (XOR x (SLLconst [c] y))
+	// cond:
+	// result: (XORshiftLL x y [c])
+	for {
+		x := v.Args[0]
+		v_1 := v.Args[1]
+		if v_1.Op != OpARM64SLLconst {
+			break
+		}
+		c := v_1.AuxInt
+		y := v_1.Args[0]
+		v.reset(OpARM64XORshiftLL)
+		v.AuxInt = c
+		v.AddArg(x)
+		v.AddArg(y)
+		return true
+	}
+	// match: (XOR (SLLconst [c] y) x)
+	// cond:
+	// result: (XORshiftLL x y [c])
+	for {
+		v_0 := v.Args[0]
+		if v_0.Op != OpARM64SLLconst {
+			break
+		}
+		c := v_0.AuxInt
+		y := v_0.Args[0]
+		x := v.Args[1]
+		v.reset(OpARM64XORshiftLL)
+		v.AuxInt = c
+		v.AddArg(x)
+		v.AddArg(y)
+		return true
+	}
+	// match: (XOR x (SRLconst [c] y))
+	// cond:
+	// result: (XORshiftRL x y [c])
+	for {
+		x := v.Args[0]
+		v_1 := v.Args[1]
+		if v_1.Op != OpARM64SRLconst {
+			break
+		}
+		c := v_1.AuxInt
+		y := v_1.Args[0]
+		v.reset(OpARM64XORshiftRL)
+		v.AuxInt = c
+		v.AddArg(x)
+		v.AddArg(y)
+		return true
+	}
+	// match: (XOR (SRLconst [c] y) x)
+	// cond:
+	// result: (XORshiftRL x y [c])
+	for {
+		v_0 := v.Args[0]
+		if v_0.Op != OpARM64SRLconst {
+			break
+		}
+		c := v_0.AuxInt
+		y := v_0.Args[0]
+		x := v.Args[1]
+		v.reset(OpARM64XORshiftRL)
+		v.AuxInt = c
+		v.AddArg(x)
+		v.AddArg(y)
+		return true
+	}
+	// match: (XOR x (SRAconst [c] y))
+	// cond:
+	// result: (XORshiftRA x y [c])
+	for {
+		x := v.Args[0]
+		v_1 := v.Args[1]
+		if v_1.Op != OpARM64SRAconst {
+			break
+		}
+		c := v_1.AuxInt
+		y := v_1.Args[0]
+		v.reset(OpARM64XORshiftRA)
+		v.AuxInt = c
+		v.AddArg(x)
+		v.AddArg(y)
+		return true
+	}
+	// match: (XOR (SRAconst [c] y) x)
+	// cond:
+	// result: (XORshiftRA x y [c])
+	for {
+		v_0 := v.Args[0]
+		if v_0.Op != OpARM64SRAconst {
+			break
+		}
+		c := v_0.AuxInt
+		y := v_0.Args[0]
+		x := v.Args[1]
+		v.reset(OpARM64XORshiftRA)
+		v.AuxInt = c
+		v.AddArg(x)
+		v.AddArg(y)
+		return true
+	}
 	return false
 }
 func rewriteValueARM64_OpARM64XORconst(v *Value, config *Config) bool {
@@ -4988,6 +7584,189 @@ func rewriteValueARM64_OpARM64XORconst(v *Value, config *Config) bool {
 	}
 	return false
 }
+func rewriteValueARM64_OpARM64XORshiftLL(v *Value, config *Config) bool {
+	b := v.Block
+	_ = b
+	// match: (XORshiftLL (MOVDconst [c]) x [d])
+	// cond:
+	// result: (XORconst [c] (SLLconst <x.Type> x [d]))
+	for {
+		d := v.AuxInt
+		v_0 := v.Args[0]
+		if v_0.Op != OpARM64MOVDconst {
+			break
+		}
+		c := v_0.AuxInt
+		x := v.Args[1]
+		v.reset(OpARM64XORconst)
+		v.AuxInt = c
+		v0 := b.NewValue0(v.Line, OpARM64SLLconst, x.Type)
+		v0.AuxInt = d
+		v0.AddArg(x)
+		v.AddArg(v0)
+		return true
+	}
+	// match: (XORshiftLL x (MOVDconst [c]) [d])
+	// cond:
+	// result: (XORconst x [int64(uint64(c)<<uint64(d))])
+	for {
+		d := v.AuxInt
+		x := v.Args[0]
+		v_1 := v.Args[1]
+		if v_1.Op != OpARM64MOVDconst {
+			break
+		}
+		c := v_1.AuxInt
+		v.reset(OpARM64XORconst)
+		v.AuxInt = int64(uint64(c) << uint64(d))
+		v.AddArg(x)
+		return true
+	}
+	// match: (XORshiftLL x (SLLconst x [c]) [d])
+	// cond: c==d
+	// result: (MOVDconst [0])
+	for {
+		d := v.AuxInt
+		x := v.Args[0]
+		v_1 := v.Args[1]
+		if v_1.Op != OpARM64SLLconst {
+			break
+		}
+		c := v_1.AuxInt
+		if x != v_1.Args[0] {
+			break
+		}
+		if !(c == d) {
+			break
+		}
+		v.reset(OpARM64MOVDconst)
+		v.AuxInt = 0
+		return true
+	}
+	return false
+}
+func rewriteValueARM64_OpARM64XORshiftRA(v *Value, config *Config) bool {
+	b := v.Block
+	_ = b
+	// match: (XORshiftRA (MOVDconst [c]) x [d])
+	// cond:
+	// result: (XORconst [c] (SRAconst <x.Type> x [d]))
+	for {
+		d := v.AuxInt
+		v_0 := v.Args[0]
+		if v_0.Op != OpARM64MOVDconst {
+			break
+		}
+		c := v_0.AuxInt
+		x := v.Args[1]
+		v.reset(OpARM64XORconst)
+		v.AuxInt = c
+		v0 := b.NewValue0(v.Line, OpARM64SRAconst, x.Type)
+		v0.AuxInt = d
+		v0.AddArg(x)
+		v.AddArg(v0)
+		return true
+	}
+	// match: (XORshiftRA x (MOVDconst [c]) [d])
+	// cond:
+	// result: (XORconst x [int64(int64(c)>>uint64(d))])
+	for {
+		d := v.AuxInt
+		x := v.Args[0]
+		v_1 := v.Args[1]
+		if v_1.Op != OpARM64MOVDconst {
+			break
+		}
+		c := v_1.AuxInt
+		v.reset(OpARM64XORconst)
+		v.AuxInt = int64(int64(c) >> uint64(d))
+		v.AddArg(x)
+		return true
+	}
+	// match: (XORshiftRA x (SRAconst x [c]) [d])
+	// cond: c==d
+	// result: (MOVDconst [0])
+	for {
+		d := v.AuxInt
+		x := v.Args[0]
+		v_1 := v.Args[1]
+		if v_1.Op != OpARM64SRAconst {
+			break
+		}
+		c := v_1.AuxInt
+		if x != v_1.Args[0] {
+			break
+		}
+		if !(c == d) {
+			break
+		}
+		v.reset(OpARM64MOVDconst)
+		v.AuxInt = 0
+		return true
+	}
+	return false
+}
+func rewriteValueARM64_OpARM64XORshiftRL(v *Value, config *Config) bool {
+	b := v.Block
+	_ = b
+	// match: (XORshiftRL (MOVDconst [c]) x [d])
+	// cond:
+	// result: (XORconst [c] (SRLconst <x.Type> x [d]))
+	for {
+		d := v.AuxInt
+		v_0 := v.Args[0]
+		if v_0.Op != OpARM64MOVDconst {
+			break
+		}
+		c := v_0.AuxInt
+		x := v.Args[1]
+		v.reset(OpARM64XORconst)
+		v.AuxInt = c
+		v0 := b.NewValue0(v.Line, OpARM64SRLconst, x.Type)
+		v0.AuxInt = d
+		v0.AddArg(x)
+		v.AddArg(v0)
+		return true
+	}
+	// match: (XORshiftRL x (MOVDconst [c]) [d])
+	// cond:
+	// result: (XORconst x [int64(uint64(c)>>uint64(d))])
+	for {
+		d := v.AuxInt
+		x := v.Args[0]
+		v_1 := v.Args[1]
+		if v_1.Op != OpARM64MOVDconst {
+			break
+		}
+		c := v_1.AuxInt
+		v.reset(OpARM64XORconst)
+		v.AuxInt = int64(uint64(c) >> uint64(d))
+		v.AddArg(x)
+		return true
+	}
+	// match: (XORshiftRL x (SRLconst x [c]) [d])
+	// cond: c==d
+	// result: (MOVDconst [0])
+	for {
+		d := v.AuxInt
+		x := v.Args[0]
+		v_1 := v.Args[1]
+		if v_1.Op != OpARM64SRLconst {
+			break
+		}
+		c := v_1.AuxInt
+		if x != v_1.Args[0] {
+			break
+		}
+		if !(c == d) {
+			break
+		}
+		v.reset(OpARM64MOVDconst)
+		v.AuxInt = 0
+		return true
+	}
+	return false
+}
 func rewriteValueARM64_OpAdd16(v *Value, config *Config) bool {
 	b := v.Block
 	_ = b
diff --git a/src/cmd/internal/obj/arm64/a.out.go b/src/cmd/internal/obj/arm64/a.out.go
index ad52be66b8dfd880201aa3a845efd06abf5cb67d..9a02452158989790c925bf33cc357173c00fc577 100644
--- a/src/cmd/internal/obj/arm64/a.out.go
+++ b/src/cmd/internal/obj/arm64/a.out.go
@@ -714,3 +714,10 @@ const (
 	AB  = obj.AJMP
 	ABL = obj.ACALL
 )
+
+const (
+	// shift types
+	SHIFT_LL = 0 << 22
+	SHIFT_LR = 1 << 22
+	SHIFT_AR = 2 << 22
+)
diff --git a/src/cmd/internal/obj/link.go b/src/cmd/internal/obj/link.go
index b6861f4c1e572c7b112ea61c2c685928688c1c1d..3c66eecbf0dd803c9d6ea3ddd7e2c2ca0f96318a 100644
--- a/src/cmd/internal/obj/link.go
+++ b/src/cmd/internal/obj/link.go
@@ -112,13 +112,17 @@ import (
 //			val = int32(y)
 //
 //	reg<<shift, reg>>shift, reg->shift, reg@>shift
-//		Shifted register value, for ARM.
+//		Shifted register value, for ARM and ARM64.
 //		In this form, reg must be a register and shift can be a register or an integer constant.
 //		Encoding:
 //			type = TYPE_SHIFT
+//		On ARM:
 //			offset = (reg&15) | shifttype<<5 | count
 //			shifttype = 0, 1, 2, 3 for <<, >>, ->, @>
 //			count = (reg&15)<<8 | 1<<4 for a register shift count, (n&31)<<7 for an integer constant.
+//		On ARM64:
+//			offset = (reg&31)<<16 | shifttype<<22 | (count&63)<<10
+//			shifttype = 0, 1, 2 for <<, >>, ->
 //
 //	(reg, reg)
 //		A destination register pair. When used as the last argument of an instruction,
diff --git a/src/cmd/internal/obj/util.go b/src/cmd/internal/obj/util.go
index 18813c35a8ea3ecb173e33204e5511c3d3d5e805..c8f8760af1a6151fcd9d63c1494269d3ac9739ea 100644
--- a/src/cmd/internal/obj/util.go
+++ b/src/cmd/internal/obj/util.go
@@ -286,14 +286,23 @@ func Dconv(p *Prog, a *Addr) string {
 
 	case TYPE_SHIFT:
 		v := int(a.Offset)
-		op := "<<>>->@>"[((v>>5)&3)<<1:]
-		if v&(1<<4) != 0 {
-			str = fmt.Sprintf("R%d%c%cR%d", v&15, op[0], op[1], (v>>8)&15)
-		} else {
-			str = fmt.Sprintf("R%d%c%c%d", v&15, op[0], op[1], (v>>7)&31)
-		}
-		if a.Reg != 0 {
-			str += fmt.Sprintf("(%v)", Rconv(int(a.Reg)))
+		ops := "<<>>->@>"
+		switch goarch := Getgoarch(); goarch {
+		case "arm":
+			op := ops[((v>>5)&3)<<1:]
+			if v&(1<<4) != 0 {
+				str = fmt.Sprintf("R%d%c%cR%d", v&15, op[0], op[1], (v>>8)&15)
+			} else {
+				str = fmt.Sprintf("R%d%c%c%d", v&15, op[0], op[1], (v>>7)&31)
+			}
+			if a.Reg != 0 {
+				str += fmt.Sprintf("(%v)", Rconv(int(a.Reg)))
+			}
+		case "arm64":
+			op := ops[((v>>22)&3)<<1:]
+			str = fmt.Sprintf("R%d%c%c%d", (v>>16)&31, op[0], op[1], (v>>10)&63)
+		default:
+			panic("TYPE_SHIFT is not supported on " + goarch)
 		}
 
 	case TYPE_REGREG:
diff --git a/test/live.go b/test/live.go
index 78ba498a362771620c8233f364f46efa4424ad26..8675840d0ec0a2f8a7cfde6fa2cab9509ddc31af 100644
--- a/test/live.go
+++ b/test/live.go
@@ -1,4 +1,4 @@
-// +build !amd64,!arm,!amd64p32,!386
+// +build !amd64,!arm,!amd64p32,!386,!arm64
 // errorcheck -0 -l -live -wb=0
 
 // Copyright 2014 The Go Authors. All rights reserved.
diff --git a/test/live_ssa.go b/test/live_ssa.go
index 4da31c6f4e442d4888172e500f46302cc30053be..881f139a20fe2687400454174e6093f2cd911c0d 100644
--- a/test/live_ssa.go
+++ b/test/live_ssa.go
@@ -1,4 +1,4 @@
-// +build amd64 arm amd64p32 386
+// +build amd64 arm amd64p32 386 arm64
 // errorcheck -0 -l -live -wb=0
 
 // Copyright 2014 The Go Authors. All rights reserved.
diff --git a/test/nilptr3.go b/test/nilptr3.go
index 5b174e02273d2d82eaee032d18ed62c96962871a..a81efb7d8e40f11a12ef2ece7b401adc003ed512 100644
--- a/test/nilptr3.go
+++ b/test/nilptr3.go
@@ -2,7 +2,7 @@
 // Fails on ppc64x because of incomplete optimization.
 // See issues 9058.
 // Same reason for mips64x and s390x.
-// +build !ppc64,!ppc64le,!mips64,!mips64le,!amd64,!s390x,!arm,!amd64p32,!386
+// +build !ppc64,!ppc64le,!mips64,!mips64le,!amd64,!s390x,!arm,!amd64p32,!386,!arm64
 
 // Copyright 2013 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
diff --git a/test/nilptr3_ssa.go b/test/nilptr3_ssa.go
index 73f888fff102cc98de65a5f465e235df59d2bbb5..0974a84333714df65fe3c8b5ee9a5d38273089fe 100644
--- a/test/nilptr3_ssa.go
+++ b/test/nilptr3_ssa.go
@@ -1,5 +1,5 @@
 // errorcheck -0 -d=nil
-// +build amd64 arm amd64p32 386
+// +build amd64 arm amd64p32 386 arm64
 
 // Copyright 2013 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
diff --git a/test/sliceopt.go b/test/sliceopt.go
index 115f8166f3832090edd1773c3820acdfe760f924..9dc8a4444c030f031faefc95b0e7f9be846e70fe 100644
--- a/test/sliceopt.go
+++ b/test/sliceopt.go
@@ -1,4 +1,4 @@
-// +build !amd64,!arm,!amd64p32,!386
+// +build !amd64,!arm,!amd64p32,!386,!arm64
 // errorcheck -0 -d=append,slice
 
 // Copyright 2015 The Go Authors. All rights reserved.