diff --git a/src/runtime/export_test.go b/src/runtime/export_test.go
index 385c569ed88fabdecd1763442fbec72dfb314c2c..8a44cebc760fffebe9211bd787a6bfa3a4f54468 100644
--- a/src/runtime/export_test.go
+++ b/src/runtime/export_test.go
@@ -413,3 +413,35 @@ func TracebackSystemstack(stk []uintptr, i int) int {
 	})
 	return n
 }
+
+func KeepNArenaHints(n int) {
+	hint := mheap_.arenaHints
+	for i := 1; i < n; i++ {
+		hint = hint.next
+		if hint == nil {
+			return
+		}
+	}
+	hint.next = nil
+}
+
+// MapNextArenaHint reserves a page at the next arena growth hint,
+// preventing the arena from growing there, and returns the range of
+// addresses that are no longer viable.
+func MapNextArenaHint() (start, end uintptr) {
+	hint := mheap_.arenaHints
+	addr := hint.addr
+	if hint.down {
+		start, end = addr-heapArenaBytes, addr
+		addr -= physPageSize
+	} else {
+		start, end = addr, addr+heapArenaBytes
+	}
+	var reserved bool
+	sysReserve(unsafe.Pointer(addr), physPageSize, &reserved)
+	return
+}
+
+func GetNextArenaHint() uintptr {
+	return mheap_.arenaHints.addr
+}
diff --git a/src/runtime/malloc.go b/src/runtime/malloc.go
index a95a7fffde73789013d96a7911a9a1d4cc199a8e..02c0be6690f96b651c9fb6a8a6ae963c6cdde0a6 100644
--- a/src/runtime/malloc.go
+++ b/src/runtime/malloc.go
@@ -78,9 +78,32 @@
 //
 //	3. We don't zero pages that never get reused.
 
+// Virtual memory layout
+//
+// The heap consists of a set of arenas, which are 64MB on 64-bit and
+// 4MB on 32-bit (heapArenaBytes). Each arena's start address is also
+// aligned to the arena size.
+//
+// Each arena has an associated heapArena object that stores the
+// metadata for that arena: the heap bitmap for all words in the arena
+// and the span map for all pages in the arena. heapArena objects are
+// themselves allocated off-heap.
+//
+// Since arenas are aligned, the address space can be viewed as a
+// series of arena frames. The arena index (mheap_.arenas) maps from
+// arena frame number to *heapArena, or nil for parts of the address
+// space not backed by the Go heap. Since arenas are large, the arena
+// index is just a single-level mapping.
+//
+// The arena index covers the entire possible address space, allowing
+// the Go heap to use any part of the address space. The allocator
+// attempts to keep arenas contiguous so that large spans (and hence
+// large objects) can cross arenas.
+
 package runtime
 
 import (
+	"runtime/internal/atomic"
 	"runtime/internal/sys"
 	"unsafe"
 )
@@ -113,9 +136,8 @@ const (
 	_TinySize      = 16
 	_TinySizeClass = int8(2)
 
-	_FixAllocChunk  = 16 << 10               // Chunk size for FixAlloc
-	_MaxMHeapList   = 1 << (20 - _PageShift) // Maximum page length for fixed-size list in MHeap.
-	_HeapAllocChunk = 1 << 20                // Chunk size for heap growth
+	_FixAllocChunk = 16 << 10               // Chunk size for FixAlloc
+	_MaxMHeapList  = 1 << (20 - _PageShift) // Maximum page length for fixed-size list in MHeap.
 
 	// Per-P, per order stack segment cache size.
 	_StackCacheSize = 32 * 1024
@@ -134,26 +156,6 @@ const (
 	//   plan9            | 4KB        | 3
 	_NumStackOrders = 4 - sys.PtrSize/4*sys.GoosWindows - 1*sys.GoosPlan9
 
-	// Number of bits in page to span calculations (4k pages).
-	// On Windows 64-bit we limit the arena to 32GB or 35 bits.
-	// Windows counts memory used by page table into committed memory
-	// of the process, so we can't reserve too much memory.
-	// See https://golang.org/issue/5402 and https://golang.org/issue/5236.
-	// On other 64-bit platforms, we limit the arena to 512GB, or 39 bits.
-	// On 32-bit, we don't bother limiting anything, so we use the full 32-bit address.
-	// The only exception is mips32 which only has access to low 2GB of virtual memory.
-	// On Darwin/arm64, we cannot reserve more than ~5GB of virtual memory,
-	// but as most devices have less than 4GB of physical memory anyway, we
-	// try to be conservative here, and only ask for a 2GB heap.
-	_MHeapMap_TotalBits = (_64bit*sys.GoosWindows)*35 + (_64bit*(1-sys.GoosWindows)*(1-sys.GoosDarwin*sys.GoarchArm64))*39 + sys.GoosDarwin*sys.GoarchArm64*31 + (1-_64bit)*(32-(sys.GoarchMips+sys.GoarchMipsle))
-	_MHeapMap_Bits      = _MHeapMap_TotalBits - _PageShift
-
-	// _MaxMem is the maximum heap arena size minus 1.
-	//
-	// On 32-bit, this is also the maximum heap pointer value,
-	// since the arena starts at address 0.
-	_MaxMem = 1<<_MHeapMap_TotalBits - 1
-
 	// memLimitBits is the maximum number of bits in a heap address.
 	//
 	// On 64-bit platforms, we limit this to 48 bits because that
@@ -174,14 +176,14 @@ const (
 	memLimitBits = _64bit*48 + (1-_64bit)*(32-(sys.GoarchMips+sys.GoarchMipsle))
 
 	// memLimit is one past the highest possible heap pointer value.
+	//
+	// This is also the maximum heap pointer value.
 	memLimit = 1 << memLimitBits
+	_MaxMem  = memLimit - 1
 
 	// heapArenaBytes is the size of a heap arena. The heap
 	// consists of mappings of size heapArenaBytes, aligned to
 	// heapArenaBytes. The initial heap mapping is one arena.
-	//
-	// TODO: Right now only the bitmap is divided into separate
-	// arenas, but shortly all of the heap will be.
 	heapArenaBytes = (64<<20)*_64bit + (4<<20)*(1-_64bit)
 
 	// heapArenaBitmapBytes is the size of each heap arena's bitmap.
@@ -281,43 +283,53 @@ func mallocinit() {
 		throw("bad system page size")
 	}
 
-	// The auxiliary regions start at p and are laid out in the
-	// following order: spans, bitmap, arena.
-	var p, pSize uintptr
-	var reserved bool
+	// Map the arena index. Most of this will never be written to,
+	// so we don't account it.
+	var untracked uint64
+	mheap_.arenas = (*[memLimit / heapArenaBytes]*heapArena)(persistentalloc(unsafe.Sizeof(*mheap_.arenas), sys.PtrSize, &untracked))
+	if mheap_.arenas == nil {
+		throw("failed to allocate arena index")
+	}
+
+	// Initialize the heap.
+	mheap_.init()
+	_g_ := getg()
+	_g_.m.mcache = allocmcache()
 
-	// Set up the allocation arena, a contiguous area of memory where
-	// allocated data will be found.
+	// Create initial arena growth hints.
 	if sys.PtrSize == 8 {
-		// On a 64-bit machine, allocate from a single contiguous reservation.
-		// 512 GB (MaxMem) should be big enough for now.
+		// On a 64-bit machine, we pick the following hints
+		// because:
 		//
-		// The code will work with the reservation at any address, but ask
-		// SysReserve to use 0x0000XXc000000000 if possible (XX=00...7f).
-		// Allocating a 512 GB region takes away 39 bits, and the amd64
-		// doesn't let us choose the top 17 bits, so that leaves the 9 bits
-		// in the middle of 0x00c0 for us to choose. Choosing 0x00c0 means
-		// that the valid memory addresses will begin 0x00c0, 0x00c1, ..., 0x00df.
-		// In little-endian, that's c0 00, c1 00, ..., df 00. None of those are valid
+		// 1. Starting from the middle of the address space
+		// makes it easier to grow out a contiguous range
+		// without running in to some other mapping.
+		//
+		// 2. This makes Go heap addresses more easily
+		// recognizable when debugging.
+		//
+		// 3. Stack scanning in gccgo is still conservative,
+		// so it's important that addresses be distinguishable
+		// from other data.
+		//
+		// Starting at 0x00c0 means that the valid memory addresses
+		// will begin 0x00c0, 0x00c1, ...
+		// In little-endian, that's c0 00, c1 00, ... None of those are valid
 		// UTF-8 sequences, and they are otherwise as far away from
 		// ff (likely a common byte) as possible. If that fails, we try other 0xXXc0
 		// addresses. An earlier attempt to use 0x11f8 caused out of memory errors
 		// on OS X during thread allocations.  0x00c0 causes conflicts with
 		// AddressSanitizer which reserves all memory up to 0x0100.
-		// These choices are both for debuggability and to reduce the
-		// odds of a conservative garbage collector (as is still used in gccgo)
+		// These choices reduce the odds of a conservative garbage collector
 		// not collecting memory because some non-pointer block of memory
 		// had a bit pattern that matched a memory address.
 		//
-		// If this fails we fall back to the 32 bit memory mechanism
-		//
 		// However, on arm64, we ignore all this advice above and slam the
 		// allocation at 0x40 << 32 because when using 4k pages with 3-level
 		// translation buffers, the user address space is limited to 39 bits
 		// On darwin/arm64, the address space is even smaller.
-		arenaSize := round(_MaxMem, _PageSize)
-		pSize = arenaSize + _PageSize
-		for i := 0; i <= 0x7f; i++ {
+		for i := 0x7f; i >= 0; i-- {
+			var p uintptr
 			switch {
 			case GOARCH == "arm64" && GOOS == "darwin":
 				p = uintptr(i)<<40 | uintptrMask&(0x0013<<28)
@@ -326,225 +338,240 @@ func mallocinit() {
 			default:
 				p = uintptr(i)<<40 | uintptrMask&(0x00c0<<32)
 			}
-			p = uintptr(sysReserve(unsafe.Pointer(p), pSize, &reserved))
-			if p != 0 {
-				break
-			}
+			hint := (*arenaHint)(mheap_.arenaHintAlloc.alloc())
+			hint.addr = p
+			hint.next, mheap_.arenaHints = mheap_.arenaHints, hint
+		}
+	} else {
+		// On a 32-bit machine, we're much more concerned
+		// about keeping the usable heap contiguous.
+		// Hence:
+		//
+		// 1. We reserve space for all heapArenas up front so
+		// they don't get interleaved with the heap. They're
+		// ~258MB, so this isn't too bad. (We could reserve a
+		// smaller amount of space up front if this is a
+		// problem.)
+		//
+		// 2. We hint the heap to start right above the end of
+		// the binary so we have the best chance of keeping it
+		// contiguous.
+		//
+		// 3. We try to stake out a reasonably large initial
+		// heap reservation.
+
+		const arenaMetaSize = unsafe.Sizeof(heapArena{}) * uintptr(len(*mheap_.arenas))
+		var reserved bool
+		meta := uintptr(sysReserve(nil, arenaMetaSize, &reserved))
+		if meta != 0 {
+			mheap_.heapArenaAlloc.init(meta, arenaMetaSize)
 		}
-	}
-
-	if p == 0 {
-		// On a 32-bit machine, we can't typically get away
-		// with a giant virtual address space reservation.
-		// Instead we map the memory information bitmap
-		// immediately after the data segment, large enough
-		// to handle the entire 4GB address space (256 MB),
-		// along with a reservation for an initial arena.
-		// When that gets used up, we'll start asking the kernel
-		// for any memory anywhere.
 
 		// We want to start the arena low, but if we're linked
 		// against C code, it's possible global constructors
 		// have called malloc and adjusted the process' brk.
 		// Query the brk so we can avoid trying to map the
-		// arena over it (which will cause the kernel to put
-		// the arena somewhere else, likely at a high
+		// region over it (which will cause the kernel to put
+		// the region somewhere else, likely at a high
 		// address).
 		procBrk := sbrk0()
 
-		// If we fail to allocate, try again with a smaller arena.
-		// This is necessary on Android L where we share a process
-		// with ART, which reserves virtual memory aggressively.
-		// In the worst case, fall back to a 0-sized initial arena,
-		// in the hope that subsequent reservations will succeed.
+		// If we ask for the end of the data segment but the
+		// operating system requires a little more space
+		// before we can start allocating, it will give out a
+		// slightly higher pointer. Except QEMU, which is
+		// buggy, as usual: it won't adjust the pointer
+		// upward. So adjust it upward a little bit ourselves:
+		// 1/4 MB to get away from the running binary image.
+		p := firstmoduledata.end
+		if p < procBrk {
+			p = procBrk
+		}
+		if mheap_.heapArenaAlloc.next <= p && p < mheap_.heapArenaAlloc.end {
+			p = mheap_.heapArenaAlloc.end
+		}
+		p = round(p+(256<<10), heapArenaBytes)
+		// Because we're worried about fragmentation on
+		// 32-bit, we try to make a large initial reservation.
 		arenaSizes := []uintptr{
 			512 << 20,
 			256 << 20,
 			128 << 20,
-			0,
 		}
-
 		for _, arenaSize := range arenaSizes {
-			// SysReserve treats the address we ask for, end, as a hint,
-			// not as an absolute requirement. If we ask for the end
-			// of the data segment but the operating system requires
-			// a little more space before we can start allocating, it will
-			// give out a slightly higher pointer. Except QEMU, which
-			// is buggy, as usual: it won't adjust the pointer upward.
-			// So adjust it upward a little bit ourselves: 1/4 MB to get
-			// away from the running binary image and then round up
-			// to a MB boundary.
-			p = round(firstmoduledata.end+(1<<18), 1<<20)
-			pSize = arenaSize + _PageSize
-			if p <= procBrk && procBrk < p+pSize {
-				// Move the start above the brk,
-				// leaving some room for future brk
-				// expansion.
-				p = round(procBrk+(1<<20), 1<<20)
-			}
-			p = uintptr(sysReserve(unsafe.Pointer(p), pSize, &reserved))
-			if p != 0 {
+			a, size := sysReserveAligned(unsafe.Pointer(p), arenaSize, heapArenaBytes, &reserved)
+			if a != nil {
+				mheap_.arena.init(uintptr(a), size)
+				p = uintptr(a) + size // For hint below
 				break
 			}
 		}
-		if p == 0 {
-			throw("runtime: cannot reserve arena virtual address space")
-		}
-	}
-
-	// PageSize can be larger than OS definition of page size,
-	// so SysReserve can give us a PageSize-unaligned pointer.
-	// To overcome this we ask for PageSize more and round up the pointer.
-	p1 := round(p, _PageSize)
-	pSize -= p1 - p
-
-	if sys.PtrSize == 4 {
-		// Set arena_start such that we can accept memory
-		// reservations located anywhere in the 4GB virtual space.
-		mheap_.arena_start = 0
-	} else {
-		mheap_.arena_start = p1
-	}
-	mheap_.arena_end = p + pSize
-	mheap_.arena_used = p1
-	mheap_.arena_alloc = p1
-	mheap_.arena_reserved = reserved
-
-	if mheap_.arena_start&(_PageSize-1) != 0 {
-		println("bad pagesize", hex(p), hex(p1), hex(_PageSize), "start", hex(mheap_.arena_start))
-		throw("misrounded allocation in mallocinit")
-	}
-
-	// Map the arena index. Most of this will never be touched.
-	var untracked uint64
-	mheap_.arenas = (*[memLimit / heapArenaBytes]*heapArena)(persistentalloc(unsafe.Sizeof(*mheap_.arenas), sys.PtrSize, &untracked))
-	if mheap_.arenas == nil {
-		throw("failed to allocate arena index")
+		hint := (*arenaHint)(mheap_.arenaHintAlloc.alloc())
+		hint.addr = p
+		hint.next, mheap_.arenaHints = mheap_.arenaHints, hint
 	}
-
-	// Initialize the rest of the allocator.
-	mheap_.init()
-	_g_ := getg()
-	_g_.m.mcache = allocmcache()
 }
 
-// sysAlloc allocates the next n bytes from the heap arena. The
-// returned pointer is always _PageSize aligned and between
-// h.arena_start and h.arena_end. sysAlloc returns nil on failure.
+// sysAlloc allocates heap arena space for at least n bytes. The
+// returned pointer is always heapArenaBytes-aligned and backed by
+// h.arenas metadata. The returned size is always a multiple of
+// heapArenaBytes. sysAlloc returns nil on failure.
 // There is no corresponding free function.
-func (h *mheap) sysAlloc(n uintptr) unsafe.Pointer {
-	// strandLimit is the maximum number of bytes to strand from
-	// the current arena block. If we would need to strand more
-	// than this, we fall back to sysAlloc'ing just enough for
-	// this allocation.
-	const strandLimit = 16 << 20
-
-	if n > h.arena_end-h.arena_alloc {
-		// If we haven't grown the arena to _MaxMem yet, try
-		// to reserve some more address space.
-		p_size := round(n+_PageSize, 256<<20)
-		new_end := h.arena_end + p_size // Careful: can overflow
-		if h.arena_end <= new_end && new_end-h.arena_start-1 <= _MaxMem {
-			// TODO: It would be bad if part of the arena
-			// is reserved and part is not.
-			var reserved bool
-			p := uintptr(sysReserve(unsafe.Pointer(h.arena_end), p_size, &reserved))
-			if p == 0 {
-				// TODO: Try smaller reservation
-				// growths in case we're in a crowded
-				// 32-bit address space.
-				goto reservationFailed
-			}
-			// p can be just about anywhere in the address
-			// space, including before arena_end.
-			if p == h.arena_end {
-				// The new block is contiguous with
-				// the current block. Extend the
-				// current arena block.
-				h.arena_end = new_end
-				h.arena_reserved = reserved
-			} else if h.arena_start <= p && p+p_size-h.arena_start-1 <= _MaxMem && h.arena_end-h.arena_alloc < strandLimit {
-				// We were able to reserve more memory
-				// within the arena space, but it's
-				// not contiguous with our previous
-				// reservation. It could be before or
-				// after our current arena_used.
-				//
-				// Keep everything page-aligned.
-				// Our pages are bigger than hardware pages.
-				h.arena_end = p + p_size
-				p = round(p, _PageSize)
-				h.arena_alloc = p
-				h.arena_reserved = reserved
-			} else {
-				// We got a mapping, but either
-				//
-				// 1) It's not in the arena, so we
-				// can't use it. (This should never
-				// happen on 32-bit.)
-				//
-				// 2) We would need to discard too
-				// much of our current arena block to
-				// use it.
-				//
-				// We haven't added this allocation to
-				// the stats, so subtract it from a
-				// fake stat (but avoid underflow).
-				//
-				// We'll fall back to a small sysAlloc.
-				stat := uint64(p_size)
-				sysFree(unsafe.Pointer(p), p_size, &stat)
+//
+// h must be locked.
+func (h *mheap) sysAlloc(n uintptr) (v unsafe.Pointer, size uintptr) {
+	n = round(n, heapArenaBytes)
+
+	// First, try the arena pre-reservation.
+	v = h.arena.alloc(n, heapArenaBytes, &memstats.heap_sys)
+	if v != nil {
+		size = n
+		goto mapped
+	}
+
+	// Try to grow the heap at a hint address.
+	for h.arenaHints != nil {
+		hint := h.arenaHints
+		p := hint.addr
+		if hint.down {
+			p -= n
+		}
+		if p+n < p || p+n >= memLimit-1 {
+			// We can't use this, so don't ask.
+			v = nil
+		} else {
+			v = sysReserve(unsafe.Pointer(p), n, &h.arena_reserved)
+		}
+		if p == uintptr(v) {
+			// Success. Update the hint.
+			if !hint.down {
+				p += n
 			}
+			hint.addr = p
+			size = n
+			break
+		}
+		// Failed. Discard this hint and try the next.
+		//
+		// TODO: This would be cleaner if sysReserve could be
+		// told to only return the requested address. In
+		// particular, this is already how Windows behaves, so
+		// it would simply things there.
+		if v != nil {
+			sysFree(v, n, nil)
 		}
+		h.arenaHints = hint.next
+		h.arenaHintAlloc.free(unsafe.Pointer(hint))
 	}
 
-	if n <= h.arena_end-h.arena_alloc {
-		// Keep taking from our reservation.
-		p := h.arena_alloc
-		sysMap(unsafe.Pointer(p), n, h.arena_reserved, &memstats.heap_sys)
-		h.arena_alloc += n
-		if h.arena_alloc > h.arena_used {
-			h.setArenaUsed(h.arena_alloc, true)
+	if size == 0 {
+		// All of the hints failed, so we'll take any
+		// (sufficiently aligned) address the kernel will give
+		// us.
+		v, size = sysReserveAligned(nil, n, heapArenaBytes, &h.arena_reserved)
+		if v == nil {
+			return nil, 0
 		}
 
-		if p&(_PageSize-1) != 0 {
-			throw("misrounded allocation in MHeap_SysAlloc")
-		}
-		return unsafe.Pointer(p)
+		// Create new hints for extending this region.
+		hint := (*arenaHint)(h.arenaHintAlloc.alloc())
+		hint.addr, hint.down = uintptr(v), true
+		hint.next, mheap_.arenaHints = mheap_.arenaHints, hint
+		hint = (*arenaHint)(h.arenaHintAlloc.alloc())
+		hint.addr = uintptr(v) + size
+		hint.next, mheap_.arenaHints = mheap_.arenaHints, hint
 	}
 
-reservationFailed:
-	// If using 64-bit, our reservation is all we have.
-	if sys.PtrSize != 4 {
-		return nil
+	if v := uintptr(v); v+size < v || v+size >= memLimit-1 {
+		// This should be impossible on most architectures,
+		// but it would be really confusing to debug.
+		print("runtime: memory allocated by OS [", hex(v), ", ", hex(v+size), ") exceeds address space limit (", hex(int64(memLimit)), ")\n")
+		throw("memory reservation exceeds address space limit")
 	}
 
-	// On 32-bit, once the reservation is gone we can
-	// try to get memory at a location chosen by the OS.
-	p_size := round(n, _PageSize) + _PageSize
-	p := uintptr(sysAlloc(p_size, &memstats.heap_sys))
-	if p == 0 {
-		return nil
+	if uintptr(v)&(heapArenaBytes-1) != 0 {
+		throw("misrounded allocation in sysAlloc")
 	}
 
-	if p < h.arena_start || p+p_size-h.arena_start > _MaxMem {
-		// This shouldn't be possible because _MaxMem is the
-		// whole address space on 32-bit.
-		top := uint64(h.arena_start) + _MaxMem
-		print("runtime: memory allocated by OS (", hex(p), ") not in usable range [", hex(h.arena_start), ",", hex(top), ")\n")
-		sysFree(unsafe.Pointer(p), p_size, &memstats.heap_sys)
-		return nil
+	// Back the reservation.
+	sysMap(v, size, h.arena_reserved, &memstats.heap_sys)
+
+mapped:
+	// Create arena metadata.
+	for ri := uintptr(v) / heapArenaBytes; ri < (uintptr(v)+size)/heapArenaBytes; ri++ {
+		if h.arenas[ri] != nil {
+			throw("arena already initialized")
+		}
+		var r *heapArena
+		r = (*heapArena)(h.heapArenaAlloc.alloc(unsafe.Sizeof(*r), sys.PtrSize, &memstats.gc_sys))
+		if r == nil {
+			r = (*heapArena)(persistentalloc(unsafe.Sizeof(*r), sys.PtrSize, &memstats.gc_sys))
+			if r == nil {
+				throw("out of memory allocating heap arena metadata")
+			}
+		}
+
+		// Store atomically just in case an object from the
+		// new heap arena becomes visible before the heap lock
+		// is released (which shouldn't happen, but there's
+		// little downside to this).
+		atomic.StorepNoWB(unsafe.Pointer(&h.arenas[ri]), unsafe.Pointer(r))
 	}
 
-	p += -p & (_PageSize - 1)
-	if p+n > h.arena_used {
-		h.setArenaUsed(p+n, true)
+	// Tell the race detector about the new heap memory.
+	if raceenabled {
+		racemapshadow(v, size)
 	}
 
-	if p&(_PageSize-1) != 0 {
-		throw("misrounded allocation in MHeap_SysAlloc")
+	return
+}
+
+// sysReserveAligned is like sysReserve, but the returned pointer is
+// aligned to align bytes. It may reserve either n or n+align bytes,
+// so it returns the size that was reserved.
+func sysReserveAligned(v unsafe.Pointer, size, align uintptr, reserved *bool) (unsafe.Pointer, uintptr) {
+	// Since the alignment is rather large in uses of this
+	// function, we're not likely to get it by chance, so we ask
+	// for a larger region and remove the parts we don't need.
+	retries := 0
+retry:
+	p := uintptr(sysReserve(v, size+align, reserved))
+	switch {
+	case p == 0:
+		return nil, 0
+	case p&(align-1) == 0:
+		// We got lucky and got an aligned region, so we can
+		// use the whole thing.
+		return unsafe.Pointer(p), size + align
+	case GOOS == "windows":
+		// On Windows we can't release pieces of a
+		// reservation, so we release the whole thing and
+		// re-reserve the aligned sub-region. This may race,
+		// so we may have to try again.
+		sysFree(unsafe.Pointer(p), size+align, nil)
+		p = round(p, align)
+		p2 := sysReserve(unsafe.Pointer(p), size, reserved)
+		if p != uintptr(p2) {
+			// Must have raced. Try again.
+			sysFree(p2, size, nil)
+			if retries++; retries == 100 {
+				throw("failed to allocate aligned heap memory; too many retries")
+			}
+			goto retry
+		}
+		// Success.
+		return p2, size
+	default:
+		// Trim off the unaligned parts.
+		pAligned := round(p, align)
+		sysFree(unsafe.Pointer(p), pAligned-p, nil)
+		end := pAligned + size
+		endLen := (p + size + align) - end
+		if endLen > 0 {
+			sysFree(unsafe.Pointer(end), endLen, nil)
+		}
+		return unsafe.Pointer(pAligned), size
 	}
-	return unsafe.Pointer(p)
 }
 
 // base address for all 0-byte allocations
@@ -1046,6 +1073,34 @@ func persistentalloc1(size, align uintptr, sysStat *uint64) *notInHeap {
 	return p
 }
 
+// linearAlloc is a simple linear allocator that pre-reserves a region
+// of memory and then maps that region as needed. The caller is
+// responsible for locking.
+type linearAlloc struct {
+	next   uintptr // next free byte
+	mapped uintptr // one byte past end of mapped space
+	end    uintptr // end of reserved space
+}
+
+func (l *linearAlloc) init(base, size uintptr) {
+	l.next, l.mapped = base, base
+	l.end = base + size
+}
+
+func (l *linearAlloc) alloc(size, align uintptr, sysStat *uint64) unsafe.Pointer {
+	p := round(l.next, align)
+	if p+size > l.end {
+		return nil
+	}
+	l.next = p + size
+	if pEnd := round(l.next-1, physPageSize); pEnd > l.mapped {
+		// We need to map more of the reserved space.
+		sysMap(unsafe.Pointer(l.mapped), pEnd-l.mapped, true, sysStat)
+		l.mapped = pEnd
+	}
+	return unsafe.Pointer(p)
+}
+
 // notInHeap is off-heap memory allocated by a lower-level allocator
 // like sysAlloc or persistentAlloc.
 //
diff --git a/src/runtime/malloc_test.go b/src/runtime/malloc_test.go
index a56d9e69251bde5ed00cd9876d9278f2cecde99b..091fc21199b0ba7e60b3f01505d9eb0307d64d3c 100644
--- a/src/runtime/malloc_test.go
+++ b/src/runtime/malloc_test.go
@@ -7,8 +7,12 @@ package runtime_test
 import (
 	"flag"
 	"fmt"
+	"internal/testenv"
+	"os"
+	"os/exec"
 	"reflect"
 	. "runtime"
+	"strings"
 	"testing"
 	"time"
 	"unsafe"
@@ -152,6 +156,55 @@ func TestTinyAlloc(t *testing.T) {
 	}
 }
 
+type acLink struct {
+	x [1 << 20]byte
+}
+
+var arenaCollisionSink []*acLink
+
+func TestArenaCollision(t *testing.T) {
+	if GOOS == "nacl" {
+		t.Skip("nacl can't self-exec a test")
+	}
+	// Test that mheap.sysAlloc handles collisions with other
+	// memory mappings.
+	if os.Getenv("TEST_ARENA_COLLISION") != "1" {
+		cmd := testenv.CleanCmdEnv(exec.Command(os.Args[0], "-test.run=TestArenaCollision", "-test.v"))
+		cmd.Env = append(cmd.Env, "TEST_ARENA_COLLISION=1")
+		if out, err := cmd.CombinedOutput(); !strings.Contains(string(out), "PASS\n") || err != nil {
+			t.Fatalf("%s\n(exit status %v)", string(out), err)
+		}
+		return
+	}
+	disallowed := [][2]uintptr{}
+	// Drop all but the next 3 hints. 64-bit has a lot of hints,
+	// so it would take a lot of memory to go through all of them.
+	KeepNArenaHints(3)
+	// Consume these 3 hints and force the runtime to find some
+	// fallback hints.
+	for i := 0; i < 5; i++ {
+		// Reserve memory at the next hint so it can't be used
+		// for the heap.
+		start, end := MapNextArenaHint()
+		disallowed = append(disallowed, [2]uintptr{start, end})
+		// Allocate until the runtime tries to use the hint we
+		// just mapped over.
+		hint := GetNextArenaHint()
+		for GetNextArenaHint() == hint {
+			ac := new(acLink)
+			arenaCollisionSink = append(arenaCollisionSink, ac)
+			// The allocation must not have fallen into
+			// one of the reserved regions.
+			p := uintptr(unsafe.Pointer(ac))
+			for _, d := range disallowed {
+				if d[0] <= p && p < d[1] {
+					t.Fatalf("allocation %#x in reserved region [%#x, %#x)", p, d[0], d[1])
+				}
+			}
+		}
+	}
+}
+
 var mallocSink uintptr
 
 func BenchmarkMalloc8(b *testing.B) {
diff --git a/src/runtime/mem_windows.go b/src/runtime/mem_windows.go
index c37c82ab67eb8c4836c1b297a379ff69c5398616..c7ee2950ea9b0f84544cfe047bddcf25082e1f46 100644
--- a/src/runtime/mem_windows.go
+++ b/src/runtime/mem_windows.go
@@ -102,6 +102,7 @@ func sysReserve(v unsafe.Pointer, n uintptr, reserved *bool) unsafe.Pointer {
 	*reserved = true
 	// v is just a hint.
 	// First try at v.
+	// This will fail if any of [v, v+n) is already reserved.
 	v = unsafe.Pointer(stdcall4(_VirtualAlloc, uintptr(v), n, _MEM_RESERVE, _PAGE_READWRITE))
 	if v != nil {
 		return v
diff --git a/src/runtime/mheap.go b/src/runtime/mheap.go
index 9fafcb7ffd4c21a56a9ea2becdb4a6e0b759304e..7c469b104904d7af54089ffe5aa14dbd4498f221 100644
--- a/src/runtime/mheap.go
+++ b/src/runtime/mheap.go
@@ -96,31 +96,13 @@ type mheap struct {
 	nlargefree  uint64                  // number of frees for large objects (>maxsmallsize)
 	nsmallfree  [_NumSizeClasses]uint64 // number of frees for small objects (<=maxsmallsize)
 
-	// range of addresses we might see in the heap
-
-	// The arena_* fields indicate the addresses of the Go heap.
-	//
-	// The maximum range of the Go heap is
-	// [arena_start, arena_start+_MaxMem+1).
-	//
-	// The range of the current Go heap is
-	// [arena_start, arena_used). Parts of this range may not be
-	// mapped, but the metadata structures are always mapped for
-	// the full range.
-	arena_start uintptr
-	arena_used  uintptr // Set with setArenaUsed.
-
-	// The heap is grown using a linear allocator that allocates
-	// from the block [arena_alloc, arena_end). arena_alloc is
-	// often, but *not always* equal to arena_used.
-	arena_alloc uintptr
-	arena_end   uintptr
-
 	// arena_reserved indicates that the memory [arena_alloc,
 	// arena_end) is reserved (e.g., mapped PROT_NONE). If this is
 	// false, we have to be careful not to clobber existing
 	// mappings here. If this is true, then we own the mapping
 	// here and *must* clobber it to use it.
+	//
+	// TODO(austin): Remove.
 	arena_reserved bool
 
 	// arenas is the heap arena index. arenas[va/heapArenaBytes]
@@ -138,7 +120,22 @@ type mheap struct {
 	// to probe any index.
 	arenas *[memLimit / heapArenaBytes]*heapArena
 
-	//_ uint32 // ensure 64-bit alignment of central
+	// heapArenaAlloc is pre-reserved space for allocating heapArena
+	// objects. This is only used on 32-bit, where we pre-reserve
+	// this space to avoid interleaving it with the heap itself.
+	heapArenaAlloc linearAlloc
+
+	// arenaHints is a list of addresses at which to attempt to
+	// add more heap arenas. This is initially populated with a
+	// set of general hint addresses, and grown with the bounds of
+	// actual heap arena ranges.
+	arenaHints *arenaHint
+
+	// arena is a pre-reserved space for allocating heap arenas
+	// (the actual arenas). This is only used on 32-bit.
+	arena linearAlloc
+
+	_ uint32 // ensure 64-bit alignment of central
 
 	// central free lists for small size classes.
 	// the padding makes sure that the MCentrals are
@@ -156,6 +153,7 @@ type mheap struct {
 	specialfinalizeralloc fixalloc // allocator for specialfinalizer*
 	specialprofilealloc   fixalloc // allocator for specialprofile*
 	speciallock           mutex    // lock for special record allocators.
+	arenaHintAlloc        fixalloc // allocator for arenaHints
 
 	unused *specialfinalizer // never set, just here to force the specialfinalizer type into DWARF
 }
@@ -190,6 +188,16 @@ type heapArena struct {
 	spans [pagesPerArena]*mspan
 }
 
+// arenaHint is a hint for where to grow the heap arenas. See
+// mheap_.arenaHints.
+//
+//go:notinheap
+type arenaHint struct {
+	addr uintptr
+	down bool
+	next *arenaHint
+}
+
 // An MSpan is a run of pages.
 //
 // When a MSpan is in the heap free list, state == MSpanFree
@@ -458,8 +466,7 @@ func spanOf(p uintptr) *mspan {
 }
 
 // spanOfUnchecked is equivalent to spanOf, but the caller must ensure
-// that p points into the heap (that is, mheap_.arena_start <= p <
-// mheap_.arena_used).
+// that p points into an allocated heap arena.
 //
 // Must be nosplit because it has callers that are nosplit.
 //
@@ -491,6 +498,7 @@ func (h *mheap) init() {
 	h.cachealloc.init(unsafe.Sizeof(mcache{}), nil, nil, &memstats.mcache_sys)
 	h.specialfinalizeralloc.init(unsafe.Sizeof(specialfinalizer{}), nil, nil, &memstats.other_sys)
 	h.specialprofilealloc.init(unsafe.Sizeof(specialprofile{}), nil, nil, &memstats.other_sys)
+	h.arenaHintAlloc.init(unsafe.Sizeof(arenaHint{}), nil, nil, &memstats.other_sys)
 
 	// Don't zero mspan allocations. Background sweeping can
 	// inspect a span concurrently with allocating it, so it's
@@ -511,46 +519,6 @@ func (h *mheap) init() {
 	for i := range h.central {
 		h.central[i].mcentral.init(spanClass(i))
 	}
-
-	// Map metadata structures. But don't map race detector memory
-	// since we're not actually growing the arena here (and TSAN
-	// gets mad if you map 0 bytes).
-	h.setArenaUsed(h.arena_used, false)
-}
-
-// setArenaUsed extends the usable arena to address arena_used and
-// maps auxiliary VM regions for any newly usable arena space.
-//
-// racemap indicates that this memory should be managed by the race
-// detector. racemap should be true unless this is covering a VM hole.
-func (h *mheap) setArenaUsed(arena_used uintptr, racemap bool) {
-	// Map auxiliary structures *before* h.arena_used is updated.
-	// Waiting to update arena_used until after the memory has been mapped
-	// avoids faults when other threads try access these regions immediately
-	// after observing the change to arena_used.
-
-	// Allocate heap arena metadata.
-	for ri := h.arena_used / heapArenaBytes; ri < (arena_used+heapArenaBytes-1)/heapArenaBytes; ri++ {
-		if h.arenas[ri] != nil {
-			continue
-		}
-		r := (*heapArena)(persistentalloc(unsafe.Sizeof(heapArena{}), sys.PtrSize, &memstats.gc_sys))
-		if r == nil {
-			throw("runtime: out of memory allocating heap arena metadata")
-		}
-		// Store atomically just in case an object from the
-		// new heap arena becomes visible before the heap lock
-		// is released (which shouldn't happen, but there's
-		// little downside to this).
-		atomic.StorepNoWB(unsafe.Pointer(&h.arenas[ri]), unsafe.Pointer(r))
-	}
-
-	// Tell the race detector about the new heap memory.
-	if racemap && raceenabled {
-		racemapshadow(unsafe.Pointer(h.arena_used), arena_used-h.arena_used)
-	}
-
-	h.arena_used = arena_used
 }
 
 // Sweeps spans in list until reclaims at least npages into heap.
@@ -886,32 +854,17 @@ func (h *mheap) allocLarge(npage uintptr) *mspan {
 //
 // h must be locked.
 func (h *mheap) grow(npage uintptr) bool {
-	// Ask for a big chunk, to reduce the number of mappings
-	// the operating system needs to track; also amortizes
-	// the overhead of an operating system mapping.
-	// Allocate a multiple of 64kB.
-	npage = round(npage, (64<<10)/_PageSize)
 	ask := npage << _PageShift
-	if ask < _HeapAllocChunk {
-		ask = _HeapAllocChunk
-	}
-
-	v := h.sysAlloc(ask)
+	v, size := h.sysAlloc(ask)
 	if v == nil {
-		if ask > npage<<_PageShift {
-			ask = npage << _PageShift
-			v = h.sysAlloc(ask)
-		}
-		if v == nil {
-			print("runtime: out of memory: cannot allocate ", ask, "-byte block (", memstats.heap_sys, " in use)\n")
-			return false
-		}
+		print("runtime: out of memory: cannot allocate ", ask, "-byte block (", memstats.heap_sys, " in use)\n")
+		return false
 	}
 
 	// Create a fake "in use" span and free it, so that the
 	// right coalescing happens.
 	s := (*mspan)(h.spanalloc.alloc())
-	s.init(uintptr(v), ask>>_PageShift)
+	s.init(uintptr(v), size/pageSize)
 	h.setSpans(s.base(), s.npages, s)
 	atomic.Store(&s.sweepgen, h.sweepgen)
 	s.state = _MSpanInUse
diff --git a/src/runtime/mstats.go b/src/runtime/mstats.go
index c75ca747d03d132567cf66306da6cdb10b0c7e8c..f67d05414daf2104f950d581e1d93343cf83dd2d 100644
--- a/src/runtime/mstats.go
+++ b/src/runtime/mstats.go
@@ -662,6 +662,9 @@ func purgecachedstats(c *mcache) {
 // overflow errors.
 //go:nosplit
 func mSysStatInc(sysStat *uint64, n uintptr) {
+	if sysStat == nil {
+		return
+	}
 	if sys.BigEndian {
 		atomic.Xadd64(sysStat, int64(n))
 		return
@@ -676,6 +679,9 @@ func mSysStatInc(sysStat *uint64, n uintptr) {
 // mSysStatInc apply.
 //go:nosplit
 func mSysStatDec(sysStat *uint64, n uintptr) {
+	if sysStat == nil {
+		return
+	}
 	if sys.BigEndian {
 		atomic.Xadd64(sysStat, -int64(n))
 		return
diff --git a/src/runtime/stack.go b/src/runtime/stack.go
index 9ed6b1d774d22d4224b1da7cddd74a87511e46cb..029bff5af425a56deba99f70bbe1ce2c53df2742 100644
--- a/src/runtime/stack.go
+++ b/src/runtime/stack.go
@@ -144,7 +144,7 @@ var stackpoolmu mutex
 // Global pool of large stack spans.
 var stackLarge struct {
 	lock mutex
-	free [_MHeapMap_Bits]mSpanList // free lists by log_2(s.npages)
+	free [memLimitBits - pageShift]mSpanList // free lists by log_2(s.npages)
 }
 
 func stackinit() {
diff --git a/test/chancap.go b/test/chancap.go
index b08478a13c2336625796c0afd5566712912ea065..9675e38bdb1a76922fcd83ffa4b15edbe64a7b9a 100644
--- a/test/chancap.go
+++ b/test/chancap.go
@@ -42,11 +42,10 @@ func main() {
 	shouldPanic("makechan: size out of range", func() { _ = make(T, n) })
 	shouldPanic("makechan: size out of range", func() { _ = make(T, int64(n)) })
 	if ptrSize == 8 {
-		n = 1 << 20
-		n <<= 20
-		shouldPanic("makechan: size out of range", func() { _ = make(T, n) })
-		n <<= 20
-		shouldPanic("makechan: size out of range", func() { _ = make(T, n) })
+		var n2 int64 = 1 << 50
+		shouldPanic("makechan: size out of range", func() { _ = make(T, int(n2)) })
+		n2 = 1<<63 - 1
+		shouldPanic("makechan: size out of range", func() { _ = make(T, int(n2)) })
 	} else {
 		n = 1<<31 - 1
 		shouldPanic("makechan: size out of range", func() { _ = make(T, n) })
diff --git a/test/fixedbugs/bug273.go b/test/fixedbugs/bug273.go
index c04f2116c5fecb418f6cec87308090c448e5f28f..7305c6063ccdc8193e9f7a9d525c445f1369019e 100644
--- a/test/fixedbugs/bug273.go
+++ b/test/fixedbugs/bug273.go
@@ -8,13 +8,15 @@
 
 package main
 
+import "unsafe"
+
 var bug = false
 
 var minus1 = -1
 var five = 5
-var big int64 = 10 | 1<<32
+var big int64 = 10 | 1<<40
 
-type block [1<<19]byte
+type block [1 << 19]byte
 
 var g1 []block
 
@@ -48,9 +50,10 @@ func bigcap() {
 	g1 = make([]block, 10, big)
 }
 
-type cblock [1<<16-1]byte
+type cblock [1<<16 - 1]byte
 
 var g4 chan cblock
+
 func badchancap() {
 	g4 = make(chan cblock, minus1)
 }
@@ -60,7 +63,8 @@ func bigchancap() {
 }
 
 func overflowchan() {
-	g4 = make(chan cblock, 1<<30)
+	const ptrSize = unsafe.Sizeof(uintptr(0))
+	g4 = make(chan cblock, 1<<(30*(ptrSize/4)))
 }
 
 func main() {
diff --git a/test/fixedbugs/issue4085b.go b/test/fixedbugs/issue4085b.go
index b91bbd748a3572932b981d6f72ea6163d24953ed..db9a15894ba3fba397998510933c3c9137950d10 100644
--- a/test/fixedbugs/issue4085b.go
+++ b/test/fixedbugs/issue4085b.go
@@ -21,13 +21,12 @@ func main() {
 	shouldPanic("cap out of range", func() { _ = make(T, 0, int64(n)) })
 	var t *byte
 	if unsafe.Sizeof(t) == 8 {
-		n = 1 << 20
-		n <<= 20
-		shouldPanic("len out of range", func() { _ = make(T, n) })
-		shouldPanic("cap out of range", func() { _ = make(T, 0, n) })
-		n <<= 20
-		shouldPanic("len out of range", func() { _ = make(T, n) })
-		shouldPanic("cap out of range", func() { _ = make(T, 0, n) })
+		var n2 int64 = 1 << 50
+		shouldPanic("len out of range", func() { _ = make(T, int(n2)) })
+		shouldPanic("cap out of range", func() { _ = make(T, 0, int(n2)) })
+		n2 = 1<<63 - 1
+		shouldPanic("len out of range", func() { _ = make(T, int(n2)) })
+		shouldPanic("cap out of range", func() { _ = make(T, 0, int(n2)) })
 	} else {
 		n = 1<<31 - 1
 		shouldPanic("len out of range", func() { _ = make(T, n) })