Skip to content
Snippets Groups Projects
asm.go 24.7 KiB
Newer Older
  • Learn to ignore specific revisions
  • // Copyright 2025 The Go Authors. All rights reserved.
    // Use of this source code is governed by a BSD-style
    // license that can be found in the LICENSE file.
    
    package asmgen
    
    import (
    	"bytes"
    	"cmp"
    	"fmt"
    	"math/bits"
    	"slices"
    	"strings"
    )
    
    // Note: Exported fields and methods are expected to be used
    // by function generators (like the ones in add.go and so on).
    // Unexported fields and methods should not be.
    
    // An Asm is an assembly file being written.
    type Asm struct {
    	Arch     *Arch           // architecture
    	out      bytes.Buffer    // output buffer
    	regavail uint64          // bitmap of available registers
    	enabled  map[Option]bool // enabled optional CPU features
    }
    
    // NewAsm returns a new Asm preparing assembly
    // for the given architecture to be written to file.
    func NewAsm(arch *Arch) *Asm {
    	a := &Asm{Arch: arch, enabled: make(map[Option]bool)}
    	buildTag := ""
    	if arch.Build != "" {
    		buildTag = " && (" + arch.Build + ")"
    	}
    	a.Printf(asmHeader, buildTag)
    	return a
    }
    
    // Note: Using Copyright 2025, not the current year, to avoid test failures
    // on January 1 and spurious diffs when regenerating assembly.
    // The generator was written in 2025; that's good enough.
    // (As a matter of policy the Go project does not update copyright
    // notices every year, since copyright terms are so long anyway.)
    
    var asmHeader = `// Copyright 2025 The Go Authors. All rights reserved.
    // Use of this source code is governed by a BSD-style
    // license that can be found in the LICENSE file.
    
    // Code generated by 'go generate' (with ./internal/asmgen). DO NOT EDIT.
    
    //go:build !math_big_pure_go%s
    
    #include "textflag.h"
    `
    
    // Fatalf reports a fatal error by panicking.
    // Panicking is appropriate because there is a bug in the generator,
    // and panicking will show the exact source lines leading to that bug.
    func (a *Asm) Fatalf(format string, args ...any) {
    	text := a.out.String()
    	i := strings.LastIndex(text, "\nTEXT")
    	text = text[i+1:]
    	panic("[" + a.Arch.Name + "] asmgen internal error: " + fmt.Sprintf(format, args...) + "\n" + text)
    }
    
    // hint returns the register name for the given hint.
    func (a *Asm) hint(h Hint) string {
    	if h == HintCarry && a.Arch.regCarry != "" {
    		return a.Arch.regCarry
    	}
    	if h == HintAltCarry && a.Arch.regAltCarry != "" {
    		return a.Arch.regAltCarry
    	}
    	if h == HintNone || a.Arch.hint == nil {
    		return ""
    	}
    	return a.Arch.hint(a, h)
    }
    
    // ZR returns the zero register (the specific register guaranteed to hold the integer 0),
    // or else the zero Reg (Reg{}, which has r.Valid() == false).
    func (a *Asm) ZR() Reg {
    	return Reg{a.Arch.reg0}
    }
    
    // tmp returns the temporary register, or else the zero Reg.
    // The temporary register is one available for use implementing logical instructions
    // that compile into multiple actual instructions on a given system.
    // The assembler sometimes uses it for that purpose, as do we.
    // Of course, if we are using it, we'd better not emit an instruction that
    // will cause the assembler to smash it while we want it to be holding
    // a live value. In general it is the architecture implementation's responsibility
    // not to suggest the use of any such pseudo-instructions in situations
    // where they would cause problems.
    func (a *Asm) tmp() Reg {
    	return Reg{a.Arch.regTmp}
    }
    
    // Carry returns the carry register, or else the zero Reg.
    func (a *Asm) Carry() Reg {
    	return Reg{a.Arch.regCarry}
    }
    
    // AltCarry returns the secondary carry register, or else the zero Reg.
    func (a *Asm) AltCarry() Reg {
    	return Reg{a.Arch.regAltCarry}
    }
    
    // Imm returns a Reg representing an immediate (constant) value.
    func (a *Asm) Imm(x int) Reg {
    	if x == 0 && a.Arch.reg0 != "" {
    		return Reg{a.Arch.reg0}
    	}
    	return Reg{fmt.Sprintf("$%d", x)}
    }
    
    // IsZero reports whether r is a zero immediate or the zero register.
    func (a *Asm) IsZero(r Reg) bool {
    	return r.name == "$0" || a.Arch.reg0 != "" && r.name == a.Arch.reg0
    }
    
    // Reg allocates a new register.
    func (a *Asm) Reg() Reg {
    	i := bits.TrailingZeros64(a.regavail)
    	if i == 64 {
    		a.Fatalf("out of registers")
    	}
    	a.regavail ^= 1 << i
    	return Reg{a.Arch.regs[i]}
    }
    
    // RegHint allocates a new register, with a hint as to its purpose.
    func (a *Asm) RegHint(hint Hint) Reg {
    	if name := a.hint(hint); name != "" {
    		i := slices.Index(a.Arch.regs, name)
    		if i < 0 {
    			return Reg{name}
    		}
    		if a.regavail&(1<<i) == 0 {
    			a.Fatalf("hint for already allocated register %s", name)
    		}
    		a.regavail &^= 1 << i
    		return Reg{name}
    	}
    	return a.Reg()
    }
    
    // Free frees a previously allocated register.
    // If r is not a register (if it's an immediate or a memory reference), Free is a no-op.
    func (a *Asm) Free(r Reg) {
    	i := slices.Index(a.Arch.regs, r.name)
    	if i < 0 {
    		return
    	}
    	if a.regavail&(1<<i) != 0 {
    		a.Fatalf("register %s already freed", r.name)
    	}
    	a.regavail |= 1 << i
    }
    
    // Unfree reallocates a previously freed register r.
    // If r is not a register (if it's an immediate or a memory reference), Unfree is a no-op.
    // If r is not free for allocation, Unfree panics.
    // A Free paired with Unfree can release a register for use temporarily
    // but then reclaim it, such as at the end of a loop body when it must be restored.
    func (a *Asm) Unfree(r Reg) {
    	i := slices.Index(a.Arch.regs, r.name)
    	if i < 0 {
    		return
    	}
    	if a.regavail&(1<<i) == 0 {
    		a.Fatalf("register %s not free", r.name)
    	}
    	a.regavail &^= 1 << i
    }
    
    // A RegsUsed is a snapshot of which registers are allocated.
    type RegsUsed struct {
    	avail uint64
    }
    
    // RegsUsed returns a snapshot of which registers are currently allocated,
    // which can be passed to a future call to [Asm.SetRegsUsed].
    func (a *Asm) RegsUsed() RegsUsed {
    	return RegsUsed{a.regavail}
    }
    
    // SetRegsUsed sets which registers are currently allocated.
    // The argument should have been returned from a previous
    // call to [Asm.RegsUsed].
    func (a *Asm) SetRegsUsed(used RegsUsed) {
    	a.regavail = used.avail
    }
    
    // FreeAll frees all known registers.
    func (a *Asm) FreeAll() {
    	a.regavail = 1<<len(a.Arch.regs) - 1
    }
    
    // Printf emits to the assembly output.
    func (a *Asm) Printf(format string, args ...any) {
    	text := fmt.Sprintf(format, args...)
    	if strings.Contains(text, "%!") {
    		a.Fatalf("printf error: %s", text)
    	}
    	a.out.WriteString(text)
    }
    
    // Comment emits a line comment to the assembly output.
    func (a *Asm) Comment(format string, args ...any) {
    	fmt.Fprintf(&a.out, "\t// %s\n", fmt.Sprintf(format, args...))
    }
    
    // EOL appends an end-of-line comment to the previous line.
    func (a *Asm) EOL(format string, args ...any) {
    	bytes := a.out.Bytes()
    	if len(bytes) > 0 && bytes[len(bytes)-1] == '\n' {
    		a.out.Truncate(a.out.Len() - 1)
    	}
    	a.Comment(format, args...)
    }
    
    // JmpEnable emits a test for the optional CPU feature that jumps to label if the feature is present.
    // If JmpEnable returns false, the feature is not available on this architecture and no code was emitted.
    func (a *Asm) JmpEnable(option Option, label string) bool {
    	jmpEnable := a.Arch.options[option]
    	if jmpEnable == nil {
    		return false
    	}
    	jmpEnable(a, label)
    	return true
    }
    
    // Enabled reports whether the optional CPU feature is considered
    // to be enabled at this point in the assembly output.
    func (a *Asm) Enabled(option Option) bool {
    	return a.enabled[option]
    }
    
    // SetOption changes whether the optional CPU feature should be
    // considered to be enabled.
    func (a *Asm) SetOption(option Option, on bool) {
    	a.enabled[option] = on
    }
    
    // op3 emits a 3-operand instruction op src1, src2, dst,
    // taking care to handle 2-operand machines and also
    // to simplify the printout when src2==dst.
    func (a *Asm) op3(op string, src1, src2, dst Reg) {
    	if op == "" {
    		a.Fatalf("missing instruction")
    	}
    	if src2 == dst {
    		// src2 and dst are same; print as 2-op form.
    		a.Printf("\t%s %s, %s\n", op, src1, dst)
    	} else if a.Arch.op3 != nil && !a.Arch.op3(op) {
    		// Machine does not have 3-op form for op; convert to 2-op.
    		if src1 == dst {
    			a.Fatalf("implicit mov %s, %s would smash src1", src2, dst)
    		}
    		a.Mov(src2, dst)
    		a.Printf("\t%s %s, %s\n", op, src1, dst)
    	} else {
    		// Full 3-op form.
    		a.Printf("\t%s %s, %s, %s\n", op, src1, src2, dst)
    	}
    }
    
    // Mov emits dst = src.
    func (a *Asm) Mov(src, dst Reg) {
    	if src != dst {
    		a.Printf("\t%s %s, %s\n", a.Arch.mov, src, dst)
    	}
    }
    
    // AddWords emits dst = src1*WordBytes + src2.
    // It does not set or use the carry flag.
    func (a *Asm) AddWords(src1 Reg, src2, dst RegPtr) {
    	if a.Arch.addWords == "" {
    		// Note: Assuming that Lsh does not clobber the carry flag.
    		// Architectures where this is not true (x86) need to provide Arch.addWords.
    		t := a.Reg()
    		a.Lsh(a.Imm(bits.TrailingZeros(uint(a.Arch.WordBytes))), src1, t)
    		a.Add(t, Reg(src2), Reg(dst), KeepCarry)
    		a.Free(t)
    		return
    	}
    	a.Printf("\t"+a.Arch.addWords+"\n", src1, src2, dst)
    }
    
    // And emits dst = src1 & src2
    // It may modify the carry flag.
    func (a *Asm) And(src1, src2, dst Reg) {
    	a.op3(a.Arch.and, src1, src2, dst)
    }
    
    // Or emits dst = src1 | src2
    // It may modify the carry flag.
    func (a *Asm) Or(src1, src2, dst Reg) {
    	a.op3(a.Arch.or, src1, src2, dst)
    }
    
    // Xor emits dst = src1 ^ src2
    // It may modify the carry flag.
    func (a *Asm) Xor(src1, src2, dst Reg) {
    	a.op3(a.Arch.xor, src1, src2, dst)
    }
    
    // Neg emits dst = -src.
    // It may modify the carry flag.
    func (a *Asm) Neg(src, dst Reg) {
    	if a.Arch.neg == "" {
    		if a.Arch.rsb != "" {
    			a.Printf("\t%s $0, %s, %s\n", a.Arch.rsb, src, dst)
    			return
    		}
    		if a.Arch.sub != "" && a.Arch.reg0 != "" {
    			a.Printf("\t%s %s, %s, %s\n", a.Arch.sub, src, a.Arch.reg0, dst)
    			return
    		}
    		a.Fatalf("missing neg")
    	}
    	if src == dst {
    		a.Printf("\t%s %s\n", a.Arch.neg, dst)
    	} else {
    		a.Printf("\t%s %s, %s\n", a.Arch.neg, src, dst)
    	}
    }
    
    
    // HasRegShift reports whether the architecture can use shift expressions as operands.
    func (a *Asm) HasRegShift() bool {
    	return a.Arch.regShift
    }
    
    // LshReg returns a shift-expression operand src<<shift.
    // If a.HasRegShift() == false, LshReg panics.
    func (a *Asm) LshReg(shift, src Reg) Reg {
    	if !a.HasRegShift() {
    		a.Fatalf("no reg shift")
    	}
    	return Reg{fmt.Sprintf("%s<<%s", src, strings.TrimPrefix(shift.name, "$"))}
    }
    
    
    // Lsh emits dst = src << shift.
    // It may modify the carry flag.
    func (a *Asm) Lsh(shift, src, dst Reg) {
    	if need := a.hint(HintShiftCount); need != "" && shift.name != need && !shift.IsImm() {
    		a.Fatalf("shift count not in %s", need)
    	}
    
    	if a.HasRegShift() {
    		a.Mov(a.LshReg(shift, src), dst)
    
    		return
    	}
    	a.op3(a.Arch.lsh, shift, src, dst)
    }
    
    // LshWide emits dst = src << shift with low bits shifted from adj.
    // It may modify the carry flag.
    func (a *Asm) LshWide(shift, adj, src, dst Reg) {
    	if a.Arch.lshd == "" {
    		a.Fatalf("no lshwide on %s", a.Arch.Name)
    	}
    	if need := a.hint(HintShiftCount); need != "" && shift.name != need && !shift.IsImm() {
    		a.Fatalf("shift count not in %s", need)
    	}
    	a.op3(fmt.Sprintf("%s %s,", a.Arch.lshd, shift), adj, src, dst)
    }
    
    
    // RshReg returns a shift-expression operand src>>shift.
    // If a.HasRegShift() == false, RshReg panics.
    func (a *Asm) RshReg(shift, src Reg) Reg {
    	if !a.HasRegShift() {
    		a.Fatalf("no reg shift")
    	}
    	return Reg{fmt.Sprintf("%s>>%s", src, strings.TrimPrefix(shift.name, "$"))}
    }
    
    
    // Rsh emits dst = src >> shift.
    // It may modify the carry flag.
    func (a *Asm) Rsh(shift, src, dst Reg) {
    	if need := a.hint(HintShiftCount); need != "" && shift.name != need && !shift.IsImm() {
    		a.Fatalf("shift count not in %s", need)
    	}
    
    	if a.HasRegShift() {
    		a.Mov(a.RshReg(shift, src), dst)
    
    387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804
    		return
    	}
    	a.op3(a.Arch.rsh, shift, src, dst)
    }
    
    // RshWide emits dst = src >> shift with high bits shifted from adj.
    // It may modify the carry flag.
    func (a *Asm) RshWide(shift, adj, src, dst Reg) {
    	if a.Arch.lshd == "" {
    		a.Fatalf("no rshwide on %s", a.Arch.Name)
    	}
    	if need := a.hint(HintShiftCount); need != "" && shift.name != need && !shift.IsImm() {
    		a.Fatalf("shift count not in %s", need)
    	}
    	a.op3(fmt.Sprintf("%s %s,", a.Arch.rshd, shift), adj, src, dst)
    }
    
    // SLTU emits dst = src2 < src1 (0 or 1), using an unsigned comparison.
    func (a *Asm) SLTU(src1, src2, dst Reg) {
    	switch {
    	default:
    		a.Fatalf("arch has no sltu/sgtu")
    	case a.Arch.sltu != "":
    		a.Printf("\t%s %s, %s, %s\n", a.Arch.sltu, src1, src2, dst)
    	case a.Arch.sgtu != "":
    		a.Printf("\t%s %s, %s, %s\n", a.Arch.sgtu, src2, src1, dst)
    	}
    }
    
    // Add emits dst = src1+src2, with the specified carry behavior.
    func (a *Asm) Add(src1, src2, dst Reg, carry Carry) {
    	switch {
    	default:
    		a.Fatalf("unsupported carry behavior")
    	case a.Arch.addF != nil && a.Arch.addF(a, src1, src2, dst, carry):
    		// handled
    	case a.Arch.add != "" && (carry == KeepCarry || carry == SmashCarry):
    		a.op3(a.Arch.add, src1, src2, dst)
    	case a.Arch.adds != "" && (carry == SetCarry || carry == SmashCarry):
    		a.op3(a.Arch.adds, src1, src2, dst)
    	case a.Arch.adc != "" && (carry == UseCarry || carry == UseCarry|SmashCarry):
    		a.op3(a.Arch.adc, src1, src2, dst)
    	case a.Arch.adcs != "" && (carry == UseCarry|SetCarry || carry == UseCarry|SmashCarry):
    		a.op3(a.Arch.adcs, src1, src2, dst)
    	case a.Arch.lea != "" && (carry == KeepCarry || carry == SmashCarry):
    		if src1.IsImm() {
    			a.Printf("\t%s %s(%s), %s\n", a.Arch.lea, src1.name[1:], src2, dst) // name[1:] removes $
    		} else {
    			a.Printf("\t%s (%s)(%s), %s\n", a.Arch.lea, src1, src2, dst)
    		}
    		if src2 == dst {
    			a.EOL("ADD %s, %s", src1, dst)
    		} else {
    			a.EOL("ADD %s, %s, %s", src1, src2, dst)
    		}
    
    	case a.Arch.add != "" && a.Arch.regCarry != "":
    		// Machine has no carry flag; instead we've dedicated a register
    		// and use SLTU/SGTU (set less-than/greater-than unsigned)
    		// to compute the carry flags as needed.
    		// For ADD x, y, z, SLTU x/y, z, c computes the carry (borrow) bit.
    		// Either of x or y can be used as the second argument, provided
    		// it is not aliased to z.
    		// To make the output less of a wall of instructions,
    		// we comment the “higher-level” operation, with ... marking
    		// continued instructions implementing the operation.
    		cr := a.Carry()
    		if carry&AltCarry != 0 {
    			cr = a.AltCarry()
    			if !cr.Valid() {
    				a.Fatalf("alt carry not supported")
    			}
    			carry &^= AltCarry
    		}
    		tmp := a.tmp()
    		if !tmp.Valid() {
    			a.Fatalf("cannot simulate sub carry without regTmp")
    		}
    		switch carry {
    		default:
    			a.Fatalf("unsupported carry behavior")
    		case UseCarry, UseCarry | SmashCarry:
    			// Easy case, just add the carry afterward.
    			if a.IsZero(src1) {
    				// Only here to use the carry.
    				a.Add(cr, src2, dst, KeepCarry)
    				a.EOL("ADC $0, %s, %s", src2, dst)
    				break
    			}
    			a.Add(src1, src2, dst, KeepCarry)
    			a.EOL("ADC %s, %s, %s (cr=%s)", src1, src2, dst, cr)
    			a.Add(cr, dst, dst, KeepCarry)
    			a.EOL("...")
    
    		case SetCarry:
    			if a.IsZero(src1) && src2 == dst {
    				// Only here to clear the carry flag. (Caller will comment.)
    				a.Xor(cr, cr, cr)
    				break
    			}
    			var old Reg // old is a src distinct from dst
    			switch {
    			case dst != src1:
    				old = src1
    			case dst != src2:
    				old = src2
    			default:
    				// src1 == src2 == dst.
    				// Overflows if and only if the high bit is set, so copy high bit to carry.
    				a.Rsh(a.Imm(a.Arch.WordBits-1), src1, cr)
    				a.EOL("ADDS %s, %s, %s (cr=%s)", src1, src2, dst, cr)
    				a.Add(src1, src2, dst, KeepCarry)
    				a.EOL("...")
    				return
    			}
    			a.Add(src1, src2, dst, KeepCarry)
    			a.EOL("ADDS %s, %s, %s (cr=%s)", src1, src2, dst, cr)
    			a.SLTU(old, dst, cr) // dst < old (one of the src) implies carry
    			a.EOL("...")
    
    		case UseCarry | SetCarry:
    			if a.IsZero(src1) {
    				// Only here to use and then set the carry.
    				// Easy since carry is not aliased to dst.
    				a.Add(cr, src2, dst, KeepCarry)
    				a.EOL("ADCS $0, %s, %s (cr=%s)", src2, dst, cr)
    				a.SLTU(cr, dst, cr) // dst < cr implies carry
    				a.EOL("...")
    				break
    			}
    			// General case. Need to do two different adds (src1 + src2 + cr),
    			// computing carry bits for both, and add'ing them together.
    			// Start with src1+src2.
    			var old Reg // old is a src distinct from dst
    			switch {
    			case dst != src1:
    				old = src1
    			case dst != src2:
    				old = src2
    			}
    			if old.Valid() {
    				a.Add(src1, src2, dst, KeepCarry)
    				a.EOL("ADCS %s, %s, %s (cr=%s)", src1, src2, dst, cr)
    				a.SLTU(old, dst, tmp) // // dst < old (one of the src) implies carry
    				a.EOL("...")
    			} else {
    				// src1 == src2 == dst, like above. Sign bit is carry bit,
    				// but we copy it into tmp, not cr.
    				a.Rsh(a.Imm(a.Arch.WordBits-1), src1, tmp)
    				a.EOL("ADCS %s, %s, %s (cr=%s)", src1, src2, dst, cr)
    				a.Add(src1, src2, dst, KeepCarry)
    				a.EOL("...")
    			}
    			// Add cr to dst.
    			a.Add(cr, dst, dst, KeepCarry)
    			a.EOL("...")
    			a.SLTU(cr, dst, cr) // sum < cr implies carry
    			a.EOL("...")
    			// Add the two carry bits (at most one can be set, because (2⁶⁴-1)+(2⁶⁴-1)+1 < 2·2⁶⁴).
    			a.Add(tmp, cr, cr, KeepCarry)
    			a.EOL("...")
    		}
    	}
    }
    
    // Sub emits dst = src2-src1, with the specified carry behavior.
    func (a *Asm) Sub(src1, src2, dst Reg, carry Carry) {
    	switch {
    	default:
    		a.Fatalf("unsupported carry behavior")
    	case a.Arch.subF != nil && a.Arch.subF(a, src1, src2, dst, carry):
    		// handled
    	case a.Arch.sub != "" && (carry == KeepCarry || carry == SmashCarry):
    		a.op3(a.Arch.sub, src1, src2, dst)
    	case a.Arch.subs != "" && (carry == SetCarry || carry == SmashCarry):
    		a.op3(a.Arch.subs, src1, src2, dst)
    	case a.Arch.sbc != "" && (carry == UseCarry || carry == UseCarry|SmashCarry):
    		a.op3(a.Arch.sbc, src1, src2, dst)
    	case a.Arch.sbcs != "" && (carry == UseCarry|SetCarry || carry == UseCarry|SmashCarry):
    		a.op3(a.Arch.sbcs, src1, src2, dst)
    	case strings.HasPrefix(src1.name, "$") && (carry == KeepCarry || carry == SmashCarry):
    		// Running out of options; if this is an immediate
    		// and we don't need to worry about carry semantics,
    		// try adding the negation.
    		if strings.HasPrefix(src1.name, "$-") {
    			src1.name = "$" + src1.name[2:]
    		} else {
    			src1.name = "$-" + src1.name[1:]
    		}
    		a.Add(src1, src2, dst, carry)
    
    	case a.Arch.sub != "" && a.Arch.regCarry != "":
    		// Machine has no carry flag; instead we've dedicated a register
    		// and use SLTU/SGTU (set less-than/greater-than unsigned)
    		// to compute the carry bits as needed.
    		// For SUB x, y, z, SLTU x, y, c computes the carry (borrow) bit.
    		// To make the output less of a wall of instructions,
    		// we comment the “higher-level” operation, with ... marking
    		// continued instructions implementing the operation.
    		// Be careful! Subtract and add have different overflow behaviors,
    		// so the details here are NOT the same as in Add above.
    		cr := a.Carry()
    		if carry&AltCarry != 0 {
    			a.Fatalf("alt carry not supported")
    		}
    		tmp := a.tmp()
    		if !tmp.Valid() {
    			a.Fatalf("cannot simulate carry without regTmp")
    		}
    		switch carry {
    		default:
    			a.Fatalf("unsupported carry behavior")
    		case UseCarry, UseCarry | SmashCarry:
    			// Easy case, just subtract the carry afterward.
    			if a.IsZero(src1) {
    				// Only here to use the carry.
    				a.Sub(cr, src2, dst, KeepCarry)
    				a.EOL("SBC $0, %s, %s", src2, dst)
    				break
    			}
    			a.Sub(src1, src2, dst, KeepCarry)
    			a.EOL("SBC %s, %s, %s", src1, src2, dst)
    			a.Sub(cr, dst, dst, KeepCarry)
    			a.EOL("...")
    
    		case SetCarry:
    			if a.IsZero(src1) && src2 == dst {
    				// Only here to clear the carry flag.
    				a.Xor(cr, cr, cr)
    				break
    			}
    			// Compute the new carry first, in case dst is src1 or src2.
    			a.SLTU(src1, src2, cr)
    			a.EOL("SUBS %s, %s, %s", src1, src2, dst)
    			a.Sub(src1, src2, dst, KeepCarry)
    			a.EOL("...")
    
    		case UseCarry | SetCarry:
    			if a.IsZero(src1) {
    				// Only here to use and then set the carry.
    				if src2 == dst {
    					// Unfortunate case. Using src2==dst is common (think x -= y)
    					// and also more efficient on two-operand machines (like x86),
    					// but here subtracting from dst will smash src2, making it
    					// impossible to recover the carry information after the SUB.
    					// But we want to use the carry, so we can't compute it before
    					// the SUB either. Compute into a temporary and MOV.
    					a.SLTU(cr, src2, tmp)
    					a.EOL("SBCS $0, %s, %s", src2, dst)
    					a.Sub(cr, src2, dst, KeepCarry)
    					a.EOL("...")
    					a.Mov(tmp, cr)
    					a.EOL("...")
    					break
    				}
    				a.Sub(cr, src2, dst, KeepCarry) // src2 not dst, so src2 preserved
    				a.SLTU(cr, src2, cr)
    				break
    			}
    			// General case. Need to do two different subtracts (src2 - cr - src1),
    			// computing carry bits for both, and add'ing them together.
    			// Doing src2 - cr first frees up cr to store the carry from the sub of src1.
    			a.SLTU(cr, src2, tmp)
    			a.EOL("SBCS %s, %s, %s", src1, src2, dst)
    			a.Sub(cr, src2, dst, KeepCarry)
    			a.EOL("...")
    			a.SLTU(src1, dst, cr)
    			a.EOL("...")
    			a.Sub(src1, dst, dst, KeepCarry)
    			a.EOL("...")
    			a.Add(tmp, cr, cr, KeepCarry)
    			a.EOL("...")
    		}
    	}
    }
    
    // ClearCarry clears the carry flag.
    // The ‘which’ parameter must be AddCarry or SubCarry to specify how the flag will be used.
    // (On some systems, the sub carry's actual processor bit is inverted from its usual value.)
    func (a *Asm) ClearCarry(which Carry) {
    	dst := Reg{a.Arch.regs[0]} // not actually modified
    	switch which & (AddCarry | SubCarry) {
    	default:
    		a.Fatalf("bad carry")
    	case AddCarry:
    		a.Add(a.Imm(0), dst, dst, SetCarry|which&AltCarry)
    	case SubCarry:
    		a.Sub(a.Imm(0), dst, dst, SetCarry|which&AltCarry)
    	}
    	a.EOL("clear carry")
    }
    
    // SaveCarry saves the carry flag into dst.
    // The meaning of the bits in dst is architecture-dependent.
    // The carry flag is left in an undefined state.
    func (a *Asm) SaveCarry(dst Reg) {
    	// Note: As implemented here, the carry flag is actually left unmodified,
    	// but we say it is in an undefined state in case that changes in the future.
    	// (The SmashCarry could be changed to SetCarry if so.)
    	if cr := a.Carry(); cr.Valid() {
    		if cr == dst {
    			return // avoid EOL
    		}
    		a.Mov(cr, dst)
    	} else {
    		a.Sub(dst, dst, dst, UseCarry|SmashCarry)
    	}
    	a.EOL("save carry")
    }
    
    // RestoreCarry restores the carry flag from src.
    // src is left in an undefined state.
    func (a *Asm) RestoreCarry(src Reg) {
    	if cr := a.Carry(); cr.Valid() {
    		if cr == src {
    			return // avoid EOL
    		}
    		a.Mov(src, cr)
    	} else if a.Arch.subCarryIsBorrow {
    		a.Add(src, src, src, SetCarry)
    	} else {
    		// SaveCarry saved the sub carry flag with an encoding of 0, 1 -> 0, ^0.
    		// Restore it by subtracting from a value less than ^0, which will carry if src != 0.
    		// If there is no zero register, the SP register is guaranteed to be less than ^0.
    		// (This may seem too clever, but on GOARCH=arm we have no other good options.)
    		a.Sub(src, cmp.Or(a.ZR(), Reg{"SP"}), src, SetCarry)
    	}
    	a.EOL("restore carry")
    }
    
    // ConvertCarry converts the carry flag in dst from the internal format to a 0 or 1.
    // The carry flag is left in an undefined state.
    func (a *Asm) ConvertCarry(which Carry, dst Reg) {
    	if a.Carry().Valid() { // already 0 or 1
    		return
    	}
    	switch which {
    	case AddCarry:
    		if a.Arch.subCarryIsBorrow {
    			a.Neg(dst, dst)
    		} else {
    			a.Add(a.Imm(1), dst, dst, SmashCarry)
    		}
    		a.EOL("convert add carry")
    	case SubCarry:
    		a.Neg(dst, dst)
    		a.EOL("convert sub carry")
    	}
    }
    
    // SaveConvertCarry saves and converts the carry flag into dst: 0 unset, 1 set.
    // The carry flag is left in an undefined state.
    func (a *Asm) SaveConvertCarry(which Carry, dst Reg) {
    	switch which {
    	default:
    		a.Fatalf("bad carry")
    	case AddCarry:
    		if (a.Arch.adc != "" || a.Arch.adcs != "") && a.ZR().Valid() {
    			a.Add(a.ZR(), a.ZR(), dst, UseCarry|SmashCarry)
    			a.EOL("save & convert add carry")
    			return
    		}
    	case SubCarry:
    		// no special cases
    	}
    	a.SaveCarry(dst)
    	a.ConvertCarry(which, dst)
    }
    
    // MulWide emits dstlo = src1 * src2 and dsthi = (src1 * src2) >> WordBits.
    // The carry flag is left in an undefined state.
    // If dstlo or dsthi is the zero Reg, then those outputs are discarded.
    func (a *Asm) MulWide(src1, src2, dstlo, dsthi Reg) {
    	switch {
    	default:
    		a.Fatalf("mulwide not available")
    	case a.Arch.mulWideF != nil:
    		a.Arch.mulWideF(a, src1, src2, dstlo, dsthi)
    	case a.Arch.mul != "" && !dsthi.Valid():
    		a.op3(a.Arch.mul, src1, src2, dstlo)
    	case a.Arch.mulhi != "" && !dstlo.Valid():
    		a.op3(a.Arch.mulhi, src1, src2, dsthi)
    	case a.Arch.mul != "" && a.Arch.mulhi != "" && dstlo != src1 && dstlo != src2:
    		a.op3(a.Arch.mul, src1, src2, dstlo)
    		a.op3(a.Arch.mulhi, src1, src2, dsthi)
    	case a.Arch.mul != "" && a.Arch.mulhi != "" && dsthi != src1 && dsthi != src2:
    		a.op3(a.Arch.mulhi, src1, src2, dsthi)
    		a.op3(a.Arch.mul, src1, src2, dstlo)
    	}
    }
    
    // Jmp jumps to the label.
    func (a *Asm) Jmp(label string) {
    	// Note: Some systems prefer the spelling B or BR, but all accept JMP.
    	a.Printf("\tJMP %s\n", label)
    }
    
    // JmpZero jumps to the label if src is zero.
    // It may modify the carry flag unless a.Arch.CarrySafeLoop is true.
    func (a *Asm) JmpZero(src Reg, label string) {
    	a.Printf("\t"+a.Arch.jmpZero+"\n", src, label)
    }
    
    // JmpNonZero jumps to the label if src is non-zero.
    // It may modify the carry flag unless a.Arch,CarrySafeLoop is true.
    func (a *Asm) JmpNonZero(src Reg, label string) {
    	a.Printf("\t"+a.Arch.jmpNonZero+"\n", src, label)
    }
    
    // Label emits a label with the given name.
    func (a *Asm) Label(name string) {
    	a.Printf("%s:\n", name)
    }
    
    // Ret returns.
    func (a *Asm) Ret() {
    	a.Printf("\tRET\n")
    }