Skip to content
Snippets Groups Projects
malloc.go 31.3 KiB
Newer Older
  • Learn to ignore specific revisions
  • // Copyright 2014 The Go Authors. All rights reserved.
    // Use of this source code is governed by a BSD-style
    // license that can be found in the LICENSE file.
    
    package runtime
    
    
    	debugMalloc = false
    
    
    	flagNoScan = _FlagNoScan
    	flagNoZero = _FlagNoZero
    
    	maxTinySize   = _TinySize
    	tinySizeClass = _TinySizeClass
    	maxSmallSize  = _MaxSmallSize
    
    	pageShift = _PageShift
    	pageSize  = _PageSize
    	pageMask  = _PageMask
    
    	bitsPerPointer  = _BitsPerPointer
    	bitsMask        = _BitsMask
    	pointersPerByte = _PointersPerByte
    	maxGCMask       = _MaxGCMask
    	bitsDead        = _BitsDead
    	bitsPointer     = _BitsPointer
    
    	mSpanInUse = _MSpanInUse
    
    	concurrentSweep = _ConcurrentSweep
    
    // Page number (address>>pageShift)
    type pageID uintptr
    
    
    // base address for all 0-byte allocations
    var zerobase uintptr
    
    // Determine whether to initiate a GC.
    // Currently the primitive heuristic we use will start a new
    // concurrent GC when approximately half the available space
    // made available by the last GC cycle has been used.
    // If the GC is already working no need to trigger another one.
    // This should establish a feedback loop where if the GC does not
    // have sufficient time to complete then more memory will be
    // requested from the OS increasing heap size thus allow future
    // GCs more time to complete.
    // memstat.heap_alloc and memstat.next_gc reads have benign races
    // A false negative simple does not start a GC, a false positive
    // will start a GC needlessly. Neither have correctness issues.
    func shouldtriggergc() bool {
    	return memstats.heap_alloc+memstats.heap_alloc*3/4 >= memstats.next_gc && atomicloaduint(&bggc.working) == 0
    }
    
    
    // Allocate an object of size bytes.
    // Small objects are allocated from the per-P cache's free lists.
    
    // Large objects (> 32 kB) are allocated straight from the heap.
    
    func mallocgc(size uintptr, typ *_type, flags uint32) unsafe.Pointer {
    
    	shouldhelpgc := false
    
    	if size == 0 {
    
    		return unsafe.Pointer(&zerobase)
    
    	}
    	size0 := size
    
    
    	if flags&flagNoScan == 0 && typ == nil {
    
    		throw("malloc missing type")
    
    	// This function must be atomic wrt GC, but for performance reasons
    	// we don't acquirem/releasem on fast path. The code below does not have
    	// split stack checks, so it can't be preempted by GC.
    
    	// Functions like roundup/add are inlined. And systemstack/racemalloc are nosplit.
    
    	// If debugMalloc = true, these assumptions are checked below.
    	if debugMalloc {
    		mp := acquirem()
    		if mp.mallocing != 0 {
    
    			throw("malloc deadlock")
    
    		}
    		mp.mallocing = 1
    		if mp.curg != nil {
    
    			mp.curg.stackguard0 = ^uintptr(0xfff) | 0xbad
    
    	var s *mspan
    	var x unsafe.Pointer
    	if size <= maxSmallSize {
    		if flags&flagNoScan != 0 && size < maxTinySize {
    			// Tiny allocator.
    			//
    			// Tiny allocator combines several tiny allocation requests
    			// into a single memory block. The resulting memory block
    			// is freed when all subobjects are unreachable. The subobjects
    			// must be FlagNoScan (don't have pointers), this ensures that
    			// the amount of potentially wasted memory is bounded.
    			//
    			// Size of the memory block used for combining (maxTinySize) is tunable.
    			// Current setting is 16 bytes, which relates to 2x worst case memory
    			// wastage (when all but one subobjects are unreachable).
    			// 8 bytes would result in no wastage at all, but provides less
    			// opportunities for combining.
    			// 32 bytes provides more opportunities for combining,
    			// but can lead to 4x worst case wastage.
    			// The best case winning is 8x regardless of block size.
    			//
    			// Objects obtained from tiny allocator must not be freed explicitly.
    			// So when an object will be freed explicitly, we ensure that
    			// its size >= maxTinySize.
    			//
    			// SetFinalizer has a special case for objects potentially coming
    			// from tiny allocator, it such case it allows to set finalizers
    			// for an inner byte of a memory block.
    			//
    			// The main targets of tiny allocator are small strings and
    			// standalone escaping variables. On a json benchmark
    			// the allocator reduces number of allocations by ~12% and
    			// reduces heap size by ~20%.
    
    			off := c.tinyoffset
    			// Align tiny pointer for required (conservative) alignment.
    			if size&7 == 0 {
    				off = round(off, 8)
    			} else if size&3 == 0 {
    				off = round(off, 4)
    			} else if size&1 == 0 {
    				off = round(off, 2)
    			}
    
    			if off+size <= maxTinySize && c.tiny != nil {
    
    				// The object fits into existing tiny block.
    				x = add(c.tiny, off)
    				c.tinyoffset = off + size
    				c.local_tinyallocs++
    				if debugMalloc {
    					mp := acquirem()
    					if mp.mallocing == 0 {
    						throw("bad malloc")
    					}
    					mp.mallocing = 0
    					if mp.curg != nil {
    						mp.curg.stackguard0 = mp.curg.stack.lo + _StackGuard
    
    					// Note: one releasem for the acquirem just above.
    					// The other for the acquirem at start of malloc.
    					releasem(mp)
    					releasem(mp)
    
    			}
    			// Allocate a new maxTinySize block.
    			s = c.alloc[tinySizeClass]
    			v := s.freelist
    
    					mCache_Refill(c, tinySizeClass)
    				})
    
    				shouldhelpgc = true
    
    				s = c.alloc[tinySizeClass]
    				v = s.freelist
    			}
    
    			s.ref++
    			//TODO: prefetch v.next
    			x = unsafe.Pointer(v)
    			(*[2]uint64)(x)[0] = 0
    			(*[2]uint64)(x)[1] = 0
    			// See if we need to replace the existing tiny block with the new one
    			// based on amount of remaining free space.
    
    			if size < c.tinyoffset {
    				c.tiny = x
    				c.tinyoffset = size
    
    			}
    			size = maxTinySize
    		} else {
    			var sizeclass int8
    			if size <= 1024-8 {
    				sizeclass = size_to_class8[(size+7)>>3]
    			} else {
    				sizeclass = size_to_class128[(size-1024+127)>>7]
    			}
    			size = uintptr(class_to_size[sizeclass])
    			s = c.alloc[sizeclass]
    			v := s.freelist
    
    					mCache_Refill(c, int32(sizeclass))
    				})
    
    				shouldhelpgc = true
    
    				s = c.alloc[sizeclass]
    				v = s.freelist
    			}
    
    			s.ref++
    			//TODO: prefetch
    			x = unsafe.Pointer(v)
    			if flags&flagNoZero == 0 {
    
    				if size > 2*ptrSize && ((*[2]uintptr)(x))[1] != 0 {
    					memclr(unsafe.Pointer(v), size)
    				}
    			}
    		}
    
    		c.local_cachealloc += intptr(size)
    
    	} else {
    
    		shouldhelpgc = true
    
    			s = largeAlloc(size, uint32(flags))
    		})
    
    		x = unsafe.Pointer(uintptr(s.start << pageShift))
    		size = uintptr(s.elemsize)
    	}
    
    
    	if flags&flagNoScan != 0 {
    		// All objects are pre-marked as noscan.
    		goto marked
    	}
    
    
    	// If allocating a defer+arg block, now that we've picked a malloc size
    	// large enough to hold everything, cut the "asked for" size down to
    	// just the defer header, so that the GC bitmap will record the arg block
    	// as containing nothing at all (as if it were unused space at the end of
    	// a malloc block caused by size rounding).
    	// The defer arg areas are scanned as part of scanstack.
    	if typ == deferType {
    		size0 = unsafe.Sizeof(_defer{})
    	}
    
    
    	// From here till marked label marking the object as allocated
    	// and storing type info in the GC bitmap.
    
    	{
    		arena_start := uintptr(unsafe.Pointer(mheap_.arena_start))
    		off := (uintptr(x) - arena_start) / ptrSize
    
    		xbits := (*uint8)(unsafe.Pointer(arena_start - off/wordsPerBitmapByte - 1))
    		shift := (off % wordsPerBitmapByte) * gcBits
    
    		if debugMalloc && ((*xbits>>shift)&(bitMask|bitPtrMask)) != bitBoundary {
    			println("runtime: bits =", (*xbits>>shift)&(bitMask|bitPtrMask))
    
    			throw("bad bits in markallocated")
    
    		var ti, te uintptr
    		var ptrmask *uint8
    
    		if size == ptrSize {
    
    			// It's one word and it has pointers, it must be a pointer.
    
    			// The bitmap byte is shared with the one-word object
    			// next to it, and concurrent GC might be marking that
    			// object, so we must use an atomic update.
    			atomicor8(xbits, (bitsPointer<<2)<<shift)
    
    		if typ.kind&kindGCProg != 0 {
    			nptr := (uintptr(typ.size) + ptrSize - 1) / ptrSize
    			masksize := nptr
    			if masksize%2 != 0 {
    				masksize *= 2 // repeated
    
    			masksize = masksize * pointersPerByte / 8 // 4 bits per word
    			masksize++                                // unroll flag in the beginning
    			if masksize > maxGCMask && typ.gc[1] != 0 {
    
    				// write barriers have not been updated to deal with this case yet.
    
    				throw("maxGCMask too small for now")
    
    				// If the mask is too large, unroll the program directly
    				// into the GC bitmap. It's 7 times slower than copying
    				// from the pre-unrolled mask, but saves 1/16 of type size
    				// memory for the mask.
    
    				systemstack(func() {
    					unrollgcproginplace_m(x, typ, size, size0)
    				})
    
    			ptrmask = (*uint8)(unsafe.Pointer(uintptr(typ.gc[0])))
    
    			// Check whether the program is already unrolled
    			// by checking if the unroll flag byte is set
    			maskword := uintptr(atomicloadp(unsafe.Pointer(ptrmask)))
    			if *(*uint8)(unsafe.Pointer(&maskword)) == 0 {
    
    				systemstack(func() {
    					unrollgcprog_m(typ)
    				})
    
    			ptrmask = (*uint8)(add(unsafe.Pointer(ptrmask), 1)) // skip the unroll flag byte
    		} else {
    
    			ptrmask = (*uint8)(unsafe.Pointer(typ.gc[0])) // pointer to unrolled mask
    
    		}
    		if size == 2*ptrSize {
    
    			*xbits = *ptrmask | bitBoundary
    
    		te = uintptr(typ.size) / ptrSize
    		// If the type occupies odd number of words, its mask is repeated.
    		if te%2 == 0 {
    			te /= 2
    		}
    
    		// Copy pointer bitmask into the bitmap.
    		for i := uintptr(0); i < size0; i += 2 * ptrSize {
    
    			v := *(*uint8)(add(unsafe.Pointer(ptrmask), ti))
    			ti++
    			if ti == te {
    				ti = 0
    
    			if i == 0 {
    				v |= bitBoundary
    			}
    			if i+ptrSize == size0 {
    				v &^= uint8(bitPtrMask << 4)
    			}
    
    
    			*xbits = v
    			xbits = (*byte)(add(unsafe.Pointer(xbits), ^uintptr(0)))
    
    		if size0%(2*ptrSize) == 0 && size0 < size {
    			// Mark the word after last object's word as bitsDead.
    
    			*xbits = bitsDead << 2
    
    
    	// GCmarkterminate allocates black
    	// All slots hold nil so no scanning is needed.
    	// This may be racing with GC so do it atomically if there can be
    	// a race marking the bit.
    	if gcphase == _GCmarktermination {
    
    		systemstack(func() {
    			gcmarknewobject_m(uintptr(x))
    		})
    
    	if mheap_.shadow_enabled {
    		clearshadow(uintptr(x), size)
    	}
    
    
    	if raceenabled {
    		racemalloc(x, size)
    	}
    
    
    	if debugMalloc {
    		mp := acquirem()
    		if mp.mallocing == 0 {
    
    			throw("bad malloc")
    
    		}
    		mp.mallocing = 0
    		if mp.curg != nil {
    
    			mp.curg.stackguard0 = mp.curg.stack.lo + _StackGuard
    
    		// Note: one releasem for the acquirem just above.
    		// The other for the acquirem at start of malloc.
    
    	if debug.allocfreetrace != 0 {
    		tracealloc(x, size, typ)
    	}
    
    
    	if rate := MemProfileRate; rate > 0 {
    		if size < uintptr(rate) && int32(size) < c.next_sample {
    			c.next_sample -= int32(size)
    		} else {
    
    			mp := acquirem()
    
    			profilealloc(mp, x, size)
    
    	if shouldtriggergc() {
    
    		gogc(0)
    
    	} else if shouldhelpgc && atomicloaduint(&bggc.working) == 1 {
    		// bggc.lock not taken since race on bggc.working is benign.
    		// At worse we don't call gchelpwork.
    		// Delay the gchelpwork until the epilogue so that it doesn't
    		// interfere with the inner working of malloc such as
    		// mcache refills that might happen while doing the gchelpwork
    		systemstack(gchelpwork)
    
    func loadPtrMask(typ *_type) []uint8 {
    	var ptrmask *uint8
    	nptr := (uintptr(typ.size) + ptrSize - 1) / ptrSize
    	if typ.kind&kindGCProg != 0 {
    		masksize := nptr
    		if masksize%2 != 0 {
    			masksize *= 2 // repeated
    		}
    		masksize = masksize * pointersPerByte / 8 // 4 bits per word
    		masksize++                                // unroll flag in the beginning
    		if masksize > maxGCMask && typ.gc[1] != 0 {
    			// write barriers have not been updated to deal with this case yet.
    
    			throw("maxGCMask too small for now")
    
    		}
    		ptrmask = (*uint8)(unsafe.Pointer(uintptr(typ.gc[0])))
    		// Check whether the program is already unrolled
    		// by checking if the unroll flag byte is set
    		maskword := uintptr(atomicloadp(unsafe.Pointer(ptrmask)))
    		if *(*uint8)(unsafe.Pointer(&maskword)) == 0 {
    
    			systemstack(func() {
    				unrollgcprog_m(typ)
    			})
    
    		}
    		ptrmask = (*uint8)(add(unsafe.Pointer(ptrmask), 1)) // skip the unroll flag byte
    	} else {
    		ptrmask = (*uint8)(unsafe.Pointer(typ.gc[0])) // pointer to unrolled mask
    	}
    	return (*[1 << 30]byte)(unsafe.Pointer(ptrmask))[:(nptr+1)/2]
    }
    
    
    // implementation of new builtin
    func newobject(typ *_type) unsafe.Pointer {
    
    	if typ.kind&kindNoPointers != 0 {
    		flags |= flagNoScan
    	}
    
    	return mallocgc(uintptr(typ.size), typ, flags)
    
    Russ Cox's avatar
    Russ Cox committed
    //go:linkname reflect_unsafe_New reflect.unsafe_New
    func reflect_unsafe_New(typ *_type) unsafe.Pointer {
    	return newobject(typ)
    }
    
    
    // implementation of make builtin for slices
    func newarray(typ *_type, n uintptr) unsafe.Pointer {
    
    	if typ.kind&kindNoPointers != 0 {
    		flags |= flagNoScan
    	}
    
    	if int(n) < 0 || (typ.size > 0 && n > _MaxMem/uintptr(typ.size)) {
    
    		panic("runtime: allocation size out of range")
    	}
    
    	return mallocgc(uintptr(typ.size)*n, typ, flags)
    
    Russ Cox's avatar
    Russ Cox committed
    //go:linkname reflect_unsafe_NewArray reflect.unsafe_NewArray
    func reflect_unsafe_NewArray(typ *_type, n uintptr) unsafe.Pointer {
    	return newarray(typ, n)
    }
    
    
    // rawmem returns a chunk of pointerless memory.  It is
    // not zeroed.
    func rawmem(size uintptr) unsafe.Pointer {
    
    	return mallocgc(size, nil, flagNoScan|flagNoZero)
    
    func profilealloc(mp *m, x unsafe.Pointer, size uintptr) {
    	c := mp.mcache
    	rate := MemProfileRate
    	if size < uintptr(rate) {
    		// pick next profile time
    		// If you change this, also change allocmcache.
    		if rate > 0x3fffffff { // make 2*rate not overflow
    			rate = 0x3fffffff
    		}
    
    		next := int32(fastrand1()) % (2 * int32(rate))
    
    		// Subtract the "remainder" of the current allocation.
    		// Otherwise objects that are close in size to sampling rate
    		// will be under-sampled, because we consistently discard this remainder.
    		next -= (int32(size) - c.next_sample)
    		if next < 0 {
    			next = 0
    		}
    		c.next_sample = next
    	}
    
    
    	mProf_Malloc(x, size)
    
    // For now this must be bracketed with a stoptheworld and a starttheworld to ensure
    // all go routines see the new barrier.
    func gcinstallmarkwb() {
    	gcphase = _GCmark
    }
    
    // force = 0 - start concurrent GC
    // force = 1 - do STW GC regardless of current heap usage
    // force = 2 - go STW GC and eager sweep
    
    func gogc(force int32) {
    
    	// The gc is turned off (via enablegc) until the bootstrap has completed.
    	// Also, malloc gets called in the guts of a number of libraries that might be
    	// holding locks. To avoid deadlocks during stoptheworld, don't bother
    	// trying to run gc while holding a lock. The next mallocgc without a lock
    	// will do the gc instead.
    
    	mp := acquirem()
    
    	if gp := getg(); gp == mp.g0 || mp.locks > 1 || !memstats.enablegc || panicking != 0 || gcpercent < 0 {
    
    		releasem(mp)
    		return
    	}
    	releasem(mp)
    
    	if force == 0 {
    		lock(&bggc.lock)
    		if !bggc.started {
    			bggc.working = 1
    			bggc.started = true
    			go backgroundgc()
    		} else if bggc.working == 0 {
    			bggc.working = 1
    			ready(bggc.g)
    		}
    		unlock(&bggc.lock)
    	} else {
    		gcwork(force)
    
    }
    
    func gcwork(force int32) {
    
    	semacquire(&worldsema, false)
    
    	// Pick up the remaining unswept/not being swept spans concurrently
    	for gosweepone() != ^uintptr(0) {
    		sweep.nbgsweep++
    	}
    
    
    	// Ok, we're doing it!  Stop everybody else
    
    	mp := acquirem()
    
    	mp.gcing = 1
    
    	releasem(mp)
    
    	gctimer.count++
    	if force == 0 {
    		gctimer.cycle.sweepterm = nanotime()
    	}
    
    	// Pick up the remaining unswept/not being swept spans before we STW
    	for gosweepone() != ^uintptr(0) {
    		sweep.nbgsweep++
    	}
    
    	systemstack(finishsweep_m) // finish sweep before we start concurrent scan.
    
    	if force == 0 {            // Do as much work concurrently as possible
    
    		gcphase = _GCscan
    
    		systemstack(starttheworld)
    
    		gctimer.cycle.scan = nanotime()
    
    		// Do a concurrent heap scan before we stop the world.
    
    		systemstack(gcscan_m)
    
    		gctimer.cycle.installmarkwb = nanotime()
    
    		systemstack(stoptheworld)
    
    		systemstack(gcinstallmarkwb)
    
    		systemstack(starttheworld)
    
    		gctimer.cycle.mark = nanotime()
    
    		systemstack(gcmark_m)
    
    		gctimer.cycle.markterm = nanotime()
    
    		systemstack(stoptheworld)
    		systemstack(gcinstalloffwb_m)
    
    	startTime := nanotime()
    
    	if mp != acquirem() {
    
    		throw("gogc: rescheduled")
    
    
    	clearpools()
    
    	// Run gc on the g0 stack.  We do this so that the g stack
    	// we're currently running on will no longer change.  Cuts
    	// the root set down a bit (g0 stacks are not scanned, and
    	// we don't need to scan gc's internal state).  We also
    	// need to switch to g0 so we can shrink the stack.
    	n := 1
    	if debug.gctrace > 1 {
    		n = 2
    	}
    
    	eagersweep := force >= 2
    
    	for i := 0; i < n; i++ {
    		if i > 0 {
    
    			// refresh start time if doing a second GC
    
    		}
    		// switch to g0, call gc, then switch back
    
    		systemstack(func() {
    			gc_m(startTime, eagersweep)
    		})
    
    	systemstack(func() {
    		gccheckmark_m(startTime, eagersweep)
    	})
    
    	// all done
    	mp.gcing = 0
    
    
    	if force == 0 {
    		gctimer.cycle.sweep = nanotime()
    	}
    
    
    	semrelease(&worldsema)
    
    
    	if force == 0 {
    		if gctimer.verbose > 1 {
    			GCprinttimes()
    		} else if gctimer.verbose > 0 {
    			calctimes() // ignore result
    		}
    	}
    
    
    	releasem(mp)
    
    
    	// now that gc is done, kick off finalizer thread if needed
    	if !concurrentSweep {
    		// give the queued finalizers, if any, a chance to run
    
    // gctimes records the time in nanoseconds of each phase of the concurrent GC.
    type gctimes struct {
    	sweepterm     int64 // stw
    
    	scan          int64
    	installmarkwb int64 // stw
    
    	mark          int64
    	markterm      int64 // stw
    	sweep         int64
    }
    
    // gcchronograph holds timer information related to GC phases
    // max records the maximum time spent in each GC phase since GCstarttimes.
    // total records the total time spent in each GC phase since GCstarttimes.
    // cycle records the absolute time (as returned by nanoseconds()) that each GC phase last started at.
    type gcchronograph struct {
    	count    int64
    	verbose  int64
    	maxpause int64
    	max      gctimes
    	total    gctimes
    	cycle    gctimes
    }
    
    var gctimer gcchronograph
    
    
    // GCstarttimes initializes the gc times. All previous times are lost.
    
    func GCstarttimes(verbose int64) {
    	gctimer = gcchronograph{verbose: verbose}
    }
    
    // GCendtimes stops the gc timers.
    func GCendtimes() {
    	gctimer.verbose = 0
    }
    
    // calctimes converts gctimer.cycle into the elapsed times, updates gctimer.total
    // and updates gctimer.max with the max pause time.
    func calctimes() gctimes {
    	var times gctimes
    
    	var max = func(a, b int64) int64 {
    		if a > b {
    			return a
    		}
    		return b
    	}
    
    	times.sweepterm = gctimer.cycle.scan - gctimer.cycle.sweepterm
    	gctimer.total.sweepterm += times.sweepterm
    	gctimer.max.sweepterm = max(gctimer.max.sweepterm, times.sweepterm)
    	gctimer.maxpause = max(gctimer.maxpause, gctimer.max.sweepterm)
    
    	times.scan = gctimer.cycle.installmarkwb - gctimer.cycle.scan
    	gctimer.total.scan += times.scan
    	gctimer.max.scan = max(gctimer.max.scan, times.scan)
    
    	times.installmarkwb = gctimer.cycle.mark - gctimer.cycle.installmarkwb
    	gctimer.total.installmarkwb += times.installmarkwb
    	gctimer.max.installmarkwb = max(gctimer.max.installmarkwb, times.installmarkwb)
    	gctimer.maxpause = max(gctimer.maxpause, gctimer.max.installmarkwb)
    
    	times.mark = gctimer.cycle.markterm - gctimer.cycle.mark
    	gctimer.total.mark += times.mark
    	gctimer.max.mark = max(gctimer.max.mark, times.mark)
    
    	times.markterm = gctimer.cycle.sweep - gctimer.cycle.markterm
    	gctimer.total.markterm += times.markterm
    	gctimer.max.markterm = max(gctimer.max.markterm, times.markterm)
    	gctimer.maxpause = max(gctimer.maxpause, gctimer.max.markterm)
    
    	return times
    }
    
    // GCprinttimes prints latency information in nanoseconds about various
    // phases in the GC. The information for each phase includes the maximum pause
    // and total time since the most recent call to GCstarttimes as well as
    // the information from the most recent Concurent GC cycle. Calls from the
    // application to runtime.GC() are ignored.
    func GCprinttimes() {
    
    	if gctimer.verbose == 0 {
    		println("GC timers not enabled")
    		return
    	}
    
    
    	// Explicitly put times on the heap so printPhase can use it.
    	times := new(gctimes)
    	*times = calctimes()
    
    	cycletime := gctimer.cycle.sweep - gctimer.cycle.sweepterm
    
    	pause := times.sweepterm + times.installmarkwb + times.markterm
    	gomaxprocs := GOMAXPROCS(-1)
    
    	printlock()
    	print("GC: #", gctimer.count, " ", cycletime, "ns @", gctimer.cycle.sweepterm, " pause=", pause, " maxpause=", gctimer.maxpause, " goroutines=", allglen, " gomaxprocs=", gomaxprocs, "\n")
    	printPhase := func(label string, get func(*gctimes) int64, procs int) {
    		print("GC:     ", label, " ", get(times), "ns\tmax=", get(&gctimer.max), "\ttotal=", get(&gctimer.total), "\tprocs=", procs, "\n")
    	}
    	printPhase("sweep term:", func(t *gctimes) int64 { return t.sweepterm }, gomaxprocs)
    	printPhase("scan:      ", func(t *gctimes) int64 { return t.scan }, 1)
    	printPhase("install wb:", func(t *gctimes) int64 { return t.installmarkwb }, gomaxprocs)
    	printPhase("mark:      ", func(t *gctimes) int64 { return t.mark }, 1)
    	printPhase("mark term: ", func(t *gctimes) int64 { return t.markterm }, gomaxprocs)
    	printunlock()
    
    // GC runs a garbage collection.
    func GC() {
    	gogc(2)
    }
    
    
    // linker-provided
    var noptrdata struct{}
    
    var enoptrdata struct{}
    var noptrbss struct{}
    
    var enoptrbss struct{}
    
    
    // SetFinalizer sets the finalizer associated with x to f.
    // When the garbage collector finds an unreachable block
    // with an associated finalizer, it clears the association and runs
    // f(x) in a separate goroutine.  This makes x reachable again, but
    // now without an associated finalizer.  Assuming that SetFinalizer
    // is not called again, the next time the garbage collector sees
    // that x is unreachable, it will free x.
    //
    // SetFinalizer(x, nil) clears any finalizer associated with x.
    //
    // The argument x must be a pointer to an object allocated by
    // calling new or by taking the address of a composite literal.
    // The argument f must be a function that takes a single argument
    // to which x's type can be assigned, and can have arbitrary ignored return
    // values. If either of these is not true, SetFinalizer aborts the
    // program.
    //
    // Finalizers are run in dependency order: if A points at B, both have
    // finalizers, and they are otherwise unreachable, only the finalizer
    // for A runs; once A is freed, the finalizer for B can run.
    // If a cyclic structure includes a block with a finalizer, that
    // cycle is not guaranteed to be garbage collected and the finalizer
    // is not guaranteed to run, because there is no ordering that
    // respects the dependencies.
    //
    // The finalizer for x is scheduled to run at some arbitrary time after
    // x becomes unreachable.
    // There is no guarantee that finalizers will run before a program exits,
    // so typically they are useful only for releasing non-memory resources
    // associated with an object during a long-running program.
    // For example, an os.File object could use a finalizer to close the
    // associated operating system file descriptor when a program discards
    // an os.File without calling Close, but it would be a mistake
    // to depend on a finalizer to flush an in-memory I/O buffer such as a
    // bufio.Writer, because the buffer would not be flushed at program exit.
    //
    // It is not guaranteed that a finalizer will run if the size of *x is
    // zero bytes.
    //
    
    // It is not guaranteed that a finalizer will run for objects allocated
    // in initializers for package-level variables. Such objects may be
    // linker-allocated, not heap-allocated.
    //
    
    // A single goroutine runs all finalizers for a program, sequentially.
    // If a finalizer must run for a long time, it should do so by starting
    // a new goroutine.
    func SetFinalizer(obj interface{}, finalizer interface{}) {
    	e := (*eface)(unsafe.Pointer(&obj))
    
    	etyp := e._type
    	if etyp == nil {
    
    		throw("runtime.SetFinalizer: first argument is nil")
    
    	if etyp.kind&kindMask != kindPtr {
    
    		throw("runtime.SetFinalizer: first argument is " + *etyp._string + ", not pointer")
    
    	}
    	ot := (*ptrtype)(unsafe.Pointer(etyp))
    	if ot.elem == nil {
    
    		throw("nil elem type!")
    
    	}
    
    	// find the containing object
    	_, base, _ := findObject(e.data)
    
    	if base == nil {
    
    		// 0-length objects are okay.
    		if e.data == unsafe.Pointer(&zerobase) {
    			return
    		}
    
    		// Global initializers might be linker-allocated.
    		//	var Foo = &Object{}
    		//	func main() {
    		//		runtime.SetFinalizer(Foo, nil)
    		//	}
    
    		// The relevant segments are: noptrdata, data, bss, noptrbss.
    		// We cannot assume they are in any order or even contiguous,
    		// due to external linking.
    		if uintptr(unsafe.Pointer(&noptrdata)) <= uintptr(e.data) && uintptr(e.data) < uintptr(unsafe.Pointer(&enoptrdata)) ||
    			uintptr(unsafe.Pointer(&data)) <= uintptr(e.data) && uintptr(e.data) < uintptr(unsafe.Pointer(&edata)) ||
    			uintptr(unsafe.Pointer(&bss)) <= uintptr(e.data) && uintptr(e.data) < uintptr(unsafe.Pointer(&ebss)) ||
    			uintptr(unsafe.Pointer(&noptrbss)) <= uintptr(e.data) && uintptr(e.data) < uintptr(unsafe.Pointer(&enoptrbss)) {
    
    		throw("runtime.SetFinalizer: pointer not in allocated block")
    
    	}
    
    	if e.data != base {
    		// As an implementation detail we allow to set finalizers for an inner byte
    		// of an object if it could come from tiny alloc (see mallocgc for details).
    		if ot.elem == nil || ot.elem.kind&kindNoPointers == 0 || ot.elem.size >= maxTinySize {
    
    			throw("runtime.SetFinalizer: pointer not at beginning of allocated block")
    
    	}
    
    	f := (*eface)(unsafe.Pointer(&finalizer))
    	ftyp := f._type
    
    	if ftyp == nil {
    
    		// switch to system stack and remove finalizer
    		systemstack(func() {
    
    		return
    	}
    
    	if ftyp.kind&kindMask != kindFunc {
    
    		throw("runtime.SetFinalizer: second argument is " + *ftyp._string + ", not a function")
    
    	ft := (*functype)(unsafe.Pointer(ftyp))
    	ins := *(*[]*_type)(unsafe.Pointer(&ft.in))
    	if ft.dotdotdot || len(ins) != 1 {
    
    		throw("runtime.SetFinalizer: cannot pass " + *etyp._string + " to finalizer " + *ftyp._string)
    
    	}
    	fint := ins[0]
    	switch {
    	case fint == etyp:
    		// ok - same type
    		goto okarg
    	case fint.kind&kindMask == kindPtr:
    		if (fint.x == nil || fint.x.name == nil || etyp.x == nil || etyp.x.name == nil) && (*ptrtype)(unsafe.Pointer(fint)).elem == ot.elem {
    			// ok - not same type, but both pointers,
    			// one or the other is unnamed, and same element type, so assignable.
    			goto okarg
    		}
    	case fint.kind&kindMask == kindInterface:
    		ityp := (*interfacetype)(unsafe.Pointer(fint))
    		if len(ityp.mhdr) == 0 {
    			// ok - satisfies empty interface
    			goto okarg
    		}
    
    		if assertE2I2(ityp, obj, nil) {
    
    	throw("runtime.SetFinalizer: cannot pass " + *etyp._string + " to finalizer " + *ftyp._string)
    
    okarg:
    	// compute size needed for return parameters
    	nret := uintptr(0)
    	for _, t := range *(*[]*_type)(unsafe.Pointer(&ft.out)) {
    		nret = round(nret, uintptr(t.align)) + uintptr(t.size)
    	}
    	nret = round(nret, ptrSize)
    
    	// make sure we have a finalizer goroutine
    	createfing()
    
    
    		if !addfinalizer(e.data, (*funcval)(f.data), nret, fint, ot) {
    
    			throw("runtime.SetFinalizer: finalizer already set")
    
    
    // round n up to a multiple of a.  a must be a power of 2.
    func round(n, a uintptr) uintptr {
    	return (n + a - 1) &^ (a - 1)
    }
    
    // Look up pointer v in heap.  Return the span containing the object,
    // the start of the object, and the size of the object.  If the object
    // does not exist, return nil, nil, 0.
    func findObject(v unsafe.Pointer) (s *mspan, x unsafe.Pointer, n uintptr) {
    	c := gomcache()
    	c.local_nlookup++
    	if ptrSize == 4 && c.local_nlookup >= 1<<30 {
    		// purge cache stats to prevent overflow
    		lock(&mheap_.lock)
    		purgecachedstats(c)
    		unlock(&mheap_.lock)
    	}
    
    	// find span
    	arena_start := uintptr(unsafe.Pointer(mheap_.arena_start))
    	arena_used := uintptr(unsafe.Pointer(mheap_.arena_used))
    	if uintptr(v) < arena_start || uintptr(v) >= arena_used {
    		return
    	}
    	p := uintptr(v) >> pageShift
    	q := p - arena_start>>pageShift
    	s = *(**mspan)(add(unsafe.Pointer(mheap_.spans), q*ptrSize))
    	if s == nil {
    		return
    	}
    	x = unsafe.Pointer(uintptr(s.start) << pageShift)
    
    	if uintptr(v) < uintptr(x) || uintptr(v) >= uintptr(unsafe.Pointer(s.limit)) || s.state != mSpanInUse {
    		s = nil
    		x = nil
    		return
    	}
    
    	n = uintptr(s.elemsize)
    	if s.sizeclass != 0 {
    		x = add(x, (uintptr(v)-uintptr(x))/n*n)
    	}
    	return
    }
    
    var fingCreate uint32
    
    func createfing() {
    	// start the finalizer goroutine exactly once
    	if fingCreate == 0 && cas(&fingCreate, 0, 1) {
    		go runfinq()
    	}
    }
    
    // This is the goroutine that runs all of the finalizers
    func runfinq() {
    	var (
    		frame    unsafe.Pointer
    		framecap uintptr
    	)
    
    	for {
    		lock(&finlock)
    		fb := finq
    		finq = nil
    		if fb == nil {
    			gp := getg()
    			fing = gp
    			fingwait = true
    			gp.issystem = true
    			goparkunlock(&finlock, "finalizer wait")
    			gp.issystem = false
    			continue
    		}
    		unlock(&finlock)
    		if raceenabled {
    			racefingo()
    		}
    		for fb != nil {
    			for i := int32(0); i < fb.cnt; i++ {
    				f := (*finalizer)(add(unsafe.Pointer(&fb.fin), uintptr(i)*unsafe.Sizeof(finalizer{})))
    
    				framesz := unsafe.Sizeof((interface{})(nil)) + uintptr(f.nret)
    				if framecap < framesz {
    					// The frame does not contain pointers interesting for GC,
    					// all not yet finalized objects are stored in finq.
    					// If we do not mark it as FlagNoScan,
    					// the last finalized object is not collected.
    
    					frame = mallocgc(framesz, nil, flagNoScan)
    
    					framecap = framesz
    				}
    
    				if f.fint == nil {
    
    					throw("missing type in runfinq")
    
    				}
    				switch f.fint.kind & kindMask {
    				case kindPtr:
    					// direct use of pointer
    					*(*unsafe.Pointer)(frame) = f.arg
    				case kindInterface:
    					ityp := (*interfacetype)(unsafe.Pointer(f.fint))
    					// set up with empty interface
    					(*eface)(frame)._type = &f.ot.typ
    					(*eface)(frame).data = f.arg
    					if len(ityp.mhdr) != 0 {
    						// convert to interface with methods
    						// this conversion is guaranteed to succeed - we checked in SetFinalizer
    
    						assertE2I(ityp, *(*interface{})(frame), (*fInterface)(frame))
    
    					throw("bad kind in runfinq")
    
    Russ Cox's avatar
    Russ Cox committed
    				reflectcall(nil, unsafe.Pointer(f.fn), frame, uint32(framesz), uint32(framesz))
    
    
    				// drop finalizer queue references to finalized object
    				f.fn = nil
    				f.arg = nil
    				f.ot = nil
    			}
    			fb.cnt = 0
    			next := fb.next
    			lock(&finlock)