Newer
Older
// underflows.
size += computeRZlog(size)
}
if debug.malloc {
if debug.sbrk != 0 {
align := uintptr(16)
if typ != nil {
// TODO(austin): This should be just
// align = uintptr(typ.align)
// but that's only 4 on 32-bit platforms,
// even if there's a uint64 field in typ (see #599).
// This causes 64-bit atomic accesses to panic.
// Hence, we use stricter alignment that matches
// the normal allocator better.
if size&7 == 0 {
align = 8
} else if size&3 == 0 {
align = 4
} else if size&1 == 0 {
align = 2
} else {
align = 1
}
return persistentalloc(size, align, &memstats.other_sys)
}
if inittrace.active && inittrace.id == getg().goid {
// Init functions are executed sequentially in a single goroutine.
}
}
// assistG is the G to charge for this allocation, or nil if
// GC is not currently active.
assistG := deductAssistCredit(size)
// Set mp.mallocing to keep from being preempted by GC.
mp := acquirem()
if mp.mallocing != 0 {
throw("malloc deadlock")
if mp.gsignal == getg() {
throw("malloc during signal")
}
shouldhelpgc := false
if c == nil {
throw("mallocgc called without a P or outside bootstrapping")
}
Michael Anthony Knyszek
committed
var header **_type
noscan := typ == nil || !typ.Pointers()
// In some cases block zeroing can profitably (for latency reduction purposes)
// be delayed till preemption is possible; delayedZeroing tracks that state.
delayedZeroing := false
Michael Anthony Knyszek
committed
// Determine if it's a 'small' object that goes into a size-classed span.
//
// Note: This comparison looks a little strange, but it exists to smooth out
// the crossover between the largest size class and large objects that have
// their own spans. The small window of object sizes between maxSmallSize-mallocHeaderSize
// and maxSmallSize will be considered large, even though they might fit in
// a size class. In practice this is completely fine, since the largest small
// size class has a single object in it already, precisely to make the transition
// to large objects smooth.
if size <= maxSmallSize-mallocHeaderSize {
if noscan && size < maxTinySize {
// Tiny allocator.
//
// Tiny allocator combines several tiny allocation requests
// into a single memory block. The resulting memory block
// is freed when all subobjects are unreachable. The subobjects
// must be noscan (don't have pointers), this ensures that
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
// the amount of potentially wasted memory is bounded.
//
// Size of the memory block used for combining (maxTinySize) is tunable.
// Current setting is 16 bytes, which relates to 2x worst case memory
// wastage (when all but one subobjects are unreachable).
// 8 bytes would result in no wastage at all, but provides less
// opportunities for combining.
// 32 bytes provides more opportunities for combining,
// but can lead to 4x worst case wastage.
// The best case winning is 8x regardless of block size.
//
// Objects obtained from tiny allocator must not be freed explicitly.
// So when an object will be freed explicitly, we ensure that
// its size >= maxTinySize.
//
// SetFinalizer has a special case for objects potentially coming
// from tiny allocator, it such case it allows to set finalizers
// for an inner byte of a memory block.
//
// The main targets of tiny allocator are small strings and
// standalone escaping variables. On a json benchmark
// the allocator reduces number of allocations by ~12% and
// reduces heap size by ~20%.
off := c.tinyoffset
// Align tiny pointer for required (conservative) alignment.
if size&7 == 0 {
off = alignUp(off, 8)
Michael Anthony Knyszek
committed
} else if goarch.PtrSize == 4 && size == 12 {
// Conservatively align 12-byte objects to 8 bytes on 32-bit
// systems so that objects whose first field is a 64-bit
// value is aligned to 8 bytes and does not cause a fault on
// atomic access. See issue 37262.
// TODO(mknyszek): Remove this workaround if/when issue 36606
// is resolved.
off = alignUp(off, 8)
} else if size&3 == 0 {
off = alignUp(off, 4)
} else if size&1 == 0 {
off = alignUp(off, 2)
}
if off+size <= maxTinySize && c.tiny != 0 {
// The object fits into existing tiny block.
c.tinyoffset = off + size
c.tinyAllocs++
mp.mallocing = 0
releasem(mp)
return x
}
// Allocate a new maxTinySize block.
span = c.alloc[tinySpanClass]
v := nextFreeFast(span)
v, span, shouldhelpgc = c.nextFree(tinySpanClass)
x = unsafe.Pointer(v)
(*[2]uint64)(x)[0] = 0
(*[2]uint64)(x)[1] = 0
// See if we need to replace the existing tiny block with the new one
// based on amount of remaining free space.
if !raceenabled && (size < c.tinyoffset || c.tiny == 0) {
// Note: disabled when race detector is on, see comment near end of this function.
c.tinyoffset = size
Michael Anthony Knyszek
committed
hasHeader := !noscan && !heapBitsInSpan(size)
Michael Anthony Knyszek
committed
size += mallocHeaderSize
}
var sizeclass uint8
if size <= smallSizeMax-8 {
sizeclass = size_to_class8[divRoundUp(size, smallSizeDiv)]
sizeclass = size_to_class128[divRoundUp(size-smallSizeMax, largeSizeDiv)]
}
size = uintptr(class_to_size[sizeclass])
spc := makeSpanClass(sizeclass, noscan)
v := nextFreeFast(span)
v, span, shouldhelpgc = c.nextFree(spc)
if needzero && span.needzero != 0 {
header = (**_type)(x)
x = add(x, mallocHeaderSize)
Michael Anthony Knyszek
committed
size -= mallocHeaderSize
}
// For large allocations, keep track of zeroed state so that
// bulk zeroing can be happen later in a preemptible context.
span = c.allocLarge(size, noscan)
span.freeindex = 1
span.allocCount = 1
size = span.elemsize
x = unsafe.Pointer(span.base())
if needzero && span.needzero != 0 {
Michael Anthony Knyszek
committed
delayedZeroing = true
Michael Anthony Knyszek
committed
// Tell the GC not to look at this yet.
span.largeType = nil
Michael Anthony Knyszek
committed
header = &span.largeType
}
Michael Anthony Knyszek
committed
if !noscan && !delayedZeroing {
c.scanAlloc += heapSetType(uintptr(x), dataSize, typ, header, span)
// Ensure that the stores above that initialize x to
// type-safe memory and set the heap bits occur before
// the caller can make x observable to the garbage
// collector. Otherwise, on weakly ordered machines,
// the garbage collector could follow a pointer to x,
// but see uninitialized memory or stale heap bits.
publicationBarrier()
// As x and the heap bits are initialized, update
// freeIndexForScan now so x is seen by the GC
// (including conservative scan) as an allocated object.
// While this pointer can't escape into user code as a
// _live_ pointer until we return, conservative scanning
// may find a dead pointer that happens to point into this
// object. Delaying this update until now ensures that
// conservative scanning considers this pointer dead until
// this point.
span.freeIndexForScan = span.freeindex
// All slots hold nil so no scanning is needed.
// This may be racing with GC so do it atomically if there can be
// a race marking the bit.
gcmarknewobject(span, uintptr(x))
if raceenabled {
racemalloc(x, size)
}
if msanenabled {
msanmalloc(x, size)
}
if asanenabled {
// We should only read/write the memory with the size asked by the user.
// The rest of the allocated memory should be poisoned, so that we can report
// errors when accessing poisoned memory.
// The allocated memory is larger than required userSize, it will also include
// redzone and some other padding bytes.
rzBeg := unsafe.Add(x, userSize)
asanpoison(rzBeg, size-userSize)
asanunpoison(x, userSize)
}
// TODO(mknyszek): We should really count the header as part
// of gc_sys or something. The code below just pretends it is
// internal fragmentation and matches the GC's accounting by
// using the whole allocation slot.
fullSize := span.elemsize
if rate := MemProfileRate; rate > 0 {
// Note cache c only valid while m acquired; see #47302
//
// N.B. Use the full size because that matches how the GC
// will update the mem profile on the "free" side.
if rate != 1 && fullSize < c.nextSample {
c.nextSample -= fullSize
profilealloc(mp, x, fullSize)
mp.mallocing = 0
releasem(mp)
Michael Anthony Knyszek
committed
// Objects can be zeroed late in a context where preemption can occur.
// If the object contains pointers, its pointer data must be cleared
// or otherwise indicate that the GC shouldn't scan it.
// x will keep the memory alive.
// N.B. size == fullSize always in this case.
memclrNoHeapPointersChunked(size, x) // This is a possible preemption point: see #47302
Michael Anthony Knyszek
committed
// Finish storing the type information for this case.
if !noscan {
mp := acquirem()
getMCache(mp).scanAlloc += heapSetType(uintptr(x), dataSize, typ, header, span)
// Publish the type information with the zeroed memory.
publicationBarrier()
releasem(mp)
}
}
if debug.malloc {
if inittrace.active && inittrace.id == getg().goid {
// Init functions are executed sequentially in a single goroutine.
inittrace.bytes += uint64(fullSize)
Michael Anthony Knyszek
committed
if traceAllocFreeEnabled() {
trace := traceAcquire()
if trace.ok() {
trace.HeapObjectAlloc(uintptr(x), typ)
traceRelease(trace)
}
}
if assistG != nil {
// Account for internal fragmentation in the assist
// debt now that we know it.
//
// N.B. Use the full size because that's how the rest
// of the GC accounts for bytes marked.
assistG.gcAssistBytes -= int64(fullSize - dataSize)
if shouldhelpgc {
if t := (gcTrigger{kind: gcTriggerHeap}); t.test() {
if raceenabled && noscan && dataSize < maxTinySize {
// Pad tinysize allocations so they are aligned with the end
// of the tinyalloc region. This ensures that any arithmetic
// that goes off the top end of the object will be detectable
// by checkptr (issue 38872).
// Note that we disable tinyalloc when raceenabled for this to work.
// TODO: This padding is only performed when the race detector
// is enabled. It would be nice to enable it if any package
// was compiled with checkptr, but there's no easy way to
// detect that (especially at compile time).
// TODO: enable this padding for all allocations, not just
// tinyalloc ones. It's tricky because of pointer maps.
// Maybe just all noscan objects?
x = add(x, size-dataSize)
}
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
// deductAssistCredit reduces the current G's assist credit
// by size bytes, and assists the GC if necessary.
//
// Caller must be preemptible.
//
// Returns the G for which the assist credit was accounted.
func deductAssistCredit(size uintptr) *g {
var assistG *g
if gcBlackenEnabled != 0 {
// Charge the current user G for this allocation.
assistG = getg()
if assistG.m.curg != nil {
assistG = assistG.m.curg
}
// Charge the allocation against the G. We'll account
// for internal fragmentation at the end of mallocgc.
assistG.gcAssistBytes -= int64(size)
if assistG.gcAssistBytes < 0 {
// This G is in debt. Assist the GC to correct
// this before allocating. This must happen
// before disabling preemption.
gcAssistAlloc(assistG)
}
}
return assistG
}
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
// memclrNoHeapPointersChunked repeatedly calls memclrNoHeapPointers
// on chunks of the buffer to be zeroed, with opportunities for preemption
// along the way. memclrNoHeapPointers contains no safepoints and also
// cannot be preemptively scheduled, so this provides a still-efficient
// block copy that can also be preempted on a reasonable granularity.
//
// Use this with care; if the data being cleared is tagged to contain
// pointers, this allows the GC to run before it is all cleared.
func memclrNoHeapPointersChunked(size uintptr, x unsafe.Pointer) {
v := uintptr(x)
// got this from benchmarking. 128k is too small, 512k is too large.
const chunkBytes = 256 * 1024
vsize := v + size
for voff := v; voff < vsize; voff = voff + chunkBytes {
if getg().preempt {
// may hold locks, e.g., profiling
goschedguarded()
}
// clear min(avail, lump) bytes
n := vsize - voff
if n > chunkBytes {
n = chunkBytes
}
memclrNoHeapPointers(unsafe.Pointer(voff), n)
}
}
// compiler (both frontend and SSA backend) knows the signature
func newobject(typ *_type) unsafe.Pointer {
return mallocgc(typ.Size_, typ, true)
// reflect_unsafe_New is meant for package reflect,
// but widely used packages access it using linkname.
// Notable members of the hall of shame include:
// - gitee.com/quant1x/gox
// - github.com/goccy/json
// - github.com/modern-go/reflect2
// - github.com/v2pro/plz
//
// Do not remove or change the type signature.
// See go.dev/issue/67401.
//
//go:linkname reflect_unsafe_New reflect.unsafe_New
func reflect_unsafe_New(typ *_type) unsafe.Pointer {
return mallocgc(typ.Size_, typ, true)
//go:linkname reflectlite_unsafe_New internal/reflectlite.unsafe_New
func reflectlite_unsafe_New(typ *_type) unsafe.Pointer {
return mallocgc(typ.Size_, typ, true)
// newarray allocates an array of n elements of type typ.
//
// newarray should be an internal detail,
// but widely used packages access it using linkname.
// Notable members of the hall of shame include:
// - github.com/RomiChan/protobuf
// - github.com/segmentio/encoding
// - github.com/ugorji/go/codec
//
// Do not remove or change the type signature.
// See go.dev/issue/67401.
//
//go:linkname newarray
func newarray(typ *_type, n int) unsafe.Pointer {
if n == 1 {
return mallocgc(typ.Size_, typ, true)
mem, overflow := math.MulUintptr(typ.Size_, uintptr(n))
if overflow || mem > maxAlloc || n < 0 {
panic(plainError("runtime: allocation size out of range"))
return mallocgc(mem, typ, true)
// reflect_unsafe_NewArray is meant for package reflect,
// but widely used packages access it using linkname.
// Notable members of the hall of shame include:
// - gitee.com/quant1x/gox
// - github.com/bytedance/sonic
// - github.com/goccy/json
// - github.com/modern-go/reflect2
// - github.com/segmentio/encoding
// - github.com/segmentio/kafka-go
// - github.com/v2pro/plz
//
// Do not remove or change the type signature.
// See go.dev/issue/67401.
//
//go:linkname reflect_unsafe_NewArray reflect.unsafe_NewArray
func reflect_unsafe_NewArray(typ *_type, n int) unsafe.Pointer {
func profilealloc(mp *m, x unsafe.Pointer, size uintptr) {
if c == nil {
throw("profilealloc called without a P or outside bootstrapping")
}
c.nextSample = nextSample()
// nextSample returns the next sampling point for heap profiling. The goal is
// to sample allocations on average every MemProfileRate bytes, but with a
// completely random distribution over the allocation timeline; this
// corresponds to a Poisson process with parameter MemProfileRate. In Poisson
// processes, the distance between two samples follows the exponential
// distribution (exp(MemProfileRate)), so the best return value is a random
// number taken from an exponential distribution whose mean is MemProfileRate.
Keith Randall
committed
func nextSample() uintptr {
if MemProfileRate == 1 {
// Callers assign our return value to
// mcache.next_sample, but next_sample is not used
// when the rate is 1. So avoid the math below and
// just return something.
return 0
}
if GOOS == "plan9" {
// Plan 9 doesn't support floating point in note handler.
if gp := getg(); gp == gp.m.gsignal {
return nextSampleNoFP()
}
}
Keith Randall
committed
return uintptr(fastexprand(MemProfileRate))
// fastexprand returns a random number from an exponential distribution with
// the specified mean.
func fastexprand(mean int) int32 {
// Avoid overflow. Maximum possible step is
// -ln(1/(1<<randomBitCount)) * mean, approximately 20 * mean.
case mean > 0x7000000:
mean = 0x7000000
case mean == 0:
// Take a random sample of the exponential distribution exp(-mean*x).
// The probability distribution function is mean*exp(-mean*x), so the CDF is
// p = 1 - exp(-mean*x), so
// q = 1 - p == exp(-mean*x)
// log_e(q) = -mean*x
// -log_e(q)/mean = x
// x = -log_e(q) * mean
// x = log_2(q) * (-log_e(2)) * mean ; Using log_2 for efficiency
q := cheaprandn(1<<randomBitCount) + 1
qlog := fastlog2(float64(q)) - randomBitCount
if qlog > 0 {
qlog = 0
}
const minusLog2 = -0.6931471805599453 // -ln(2)
return int32(qlog*(minusLog2*float64(mean))) + 1
// nextSampleNoFP is similar to nextSample, but uses older,
// simpler code to avoid floating point.
Keith Randall
committed
func nextSampleNoFP() uintptr {
// Set first allocation sample size.
rate := MemProfileRate
if rate > 0x3fffffff { // make 2*rate not overflow
rate = 0x3fffffff
}
if rate != 0 {
return uintptr(cheaprandn(uint32(2 * rate)))
}
return 0
}
type persistentAlloc struct {
base *notInHeap
off uintptr
var globalAlloc struct {
mutex
persistentAlloc
}
// persistentChunkSize is the number of bytes we allocate when we grow
// a persistentAlloc.
const persistentChunkSize = 256 << 10
// persistentChunks is a list of all the persistent chunks we have
// allocated. The list is maintained through the first word in the
// persistent chunk. This is updated atomically.
var persistentChunks *notInHeap
// Wrapper around sysAlloc that can allocate small chunks.
// There is no associated free operation.
// Intended for things like function/type/debug-related persistent data.
// If align is 0, uses default align (currently 8).
// The returned memory will be zeroed.
// sysStat must be non-nil.
// Consider marking persistentalloc'd types not in heap by embedding
// runtime/internal/sys.NotInHeap.
func persistentalloc(size, align uintptr, sysStat *sysMemStat) unsafe.Pointer {
var p *notInHeap
systemstack(func() {
p = persistentalloc1(size, align, sysStat)
})
return unsafe.Pointer(p)
}
// Must run on system stack because stack growth can (re)invoke it.
// See issue 9174.
func persistentalloc1(size, align uintptr, sysStat *sysMemStat) *notInHeap {
const (
maxBlock = 64 << 10 // VM reservation granularity is 64K on windows
)
if size == 0 {
throw("persistentalloc: size == 0")
}
if align != 0 {
if align&(align-1) != 0 {
throw("persistentalloc: align is not a power of 2")
}
if align > _PageSize {
throw("persistentalloc: align is too large")
}
} else {
align = 8
}
if size >= maxBlock {
return (*notInHeap)(sysAlloc(size, sysStat))
mp := acquirem()
var persistent *persistentAlloc
if mp != nil && mp.p != 0 {
persistent = &mp.p.ptr().palloc
} else {
lock(&globalAlloc.mutex)
persistent = &globalAlloc.persistentAlloc
}
persistent.off = alignUp(persistent.off, align)
if persistent.off+size > persistentChunkSize || persistent.base == nil {
persistent.base = (*notInHeap)(sysAlloc(persistentChunkSize, &memstats.other_sys))
if persistent.base == nil {
if persistent == &globalAlloc.persistentAlloc {
unlock(&globalAlloc.mutex)
}
throw("runtime: cannot allocate memory")
// Add the new chunk to the persistentChunks list.
for {
chunks := uintptr(unsafe.Pointer(persistentChunks))
*(*uintptr)(unsafe.Pointer(persistent.base)) = chunks
if atomic.Casuintptr((*uintptr)(unsafe.Pointer(&persistentChunks)), chunks, uintptr(unsafe.Pointer(persistent.base))) {
break
}
}
Michael Anthony Knyszek
committed
persistent.off = alignUp(goarch.PtrSize, align)
p := persistent.base.add(persistent.off)
persistent.off += size
releasem(mp)
if persistent == &globalAlloc.persistentAlloc {
unlock(&globalAlloc.mutex)
}
if sysStat != &memstats.other_sys {
sysStat.add(int64(size))
memstats.other_sys.add(-int64(size))
// inPersistentAlloc reports whether p points to memory allocated by
// persistentalloc. This must be nosplit because it is called by the
// cgo checker code, which is called by the write barrier code.
//go:nosplit
func inPersistentAlloc(p uintptr) bool {
chunk := atomic.Loaduintptr((*uintptr)(unsafe.Pointer(&persistentChunks)))
for chunk != 0 {
if p >= chunk && p < chunk+persistentChunkSize {
return true
}
chunk = *(*uintptr)(unsafe.Pointer(chunk))
}
return false
}
// linearAlloc is a simple linear allocator that pre-reserves a region
// of memory and then optionally maps that region into the Ready state
// as needed.
//
// The caller is responsible for locking.
type linearAlloc struct {
next uintptr // next free byte
mapped uintptr // one byte past end of mapped space
end uintptr // end of reserved space
mapMemory bool // transition memory from Reserved to Ready if true
func (l *linearAlloc) init(base, size uintptr, mapMemory bool) {
if base+size < base {
// Chop off the last byte. The runtime isn't prepared
// to deal with situations where the bounds could overflow.
// Leave that memory reserved, though, so we don't map it
// later.
size -= 1
}
l.next, l.mapped = base, base
l.end = base + size
l.mapMemory = mapMemory
func (l *linearAlloc) alloc(size, align uintptr, sysStat *sysMemStat) unsafe.Pointer {
p := alignUp(l.next, align)
if p+size > l.end {
return nil
}
l.next = p + size
if pEnd := alignUp(l.next-1, physPageSize); pEnd > l.mapped {
if l.mapMemory {
// Transition from Reserved to Prepared to Ready.
n := pEnd - l.mapped
sysMap(unsafe.Pointer(l.mapped), n, sysStat)
sysUsed(unsafe.Pointer(l.mapped), n, n)
l.mapped = pEnd
}
return unsafe.Pointer(p)
}
// notInHeap is off-heap memory allocated by a lower-level allocator
// like sysAlloc or persistentAlloc.
//
// In general, it's better to use real types which embed
// runtime/internal/sys.NotInHeap, but this serves as a generic type
// for situations where that isn't possible (like in the allocators).
//
// TODO: Use this as the return type of sysAlloc, persistentAlloc, etc?
type notInHeap struct{ _ sys.NotInHeap }
func (p *notInHeap) add(bytes uintptr) *notInHeap {
return (*notInHeap)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) + bytes))
}
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
// computeRZlog computes the size of the redzone.
// Refer to the implementation of the compiler-rt.
func computeRZlog(userSize uintptr) uintptr {
switch {
case userSize <= (64 - 16):
return 16 << 0
case userSize <= (128 - 32):
return 16 << 1
case userSize <= (512 - 64):
return 16 << 2
case userSize <= (4096 - 128):
return 16 << 3
case userSize <= (1<<14)-256:
return 16 << 4
case userSize <= (1<<15)-512:
return 16 << 5
case userSize <= (1<<16)-1024:
return 16 << 6
default:
return 16 << 7
}
}