Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
// Copyright 2024 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build (aix || darwin || dragonfly || freebsd || linux || netbsd || openbsd || plan9 || solaris || windows) && goexperiment.spinbitmutex
package runtime
import (
"internal/goarch"
"internal/runtime/atomic"
"unsafe"
)
// This implementation depends on OS-specific implementations of
//
// func semacreate(mp *m)
// Create a semaphore for mp, if it does not already have one.
//
// func semasleep(ns int64) int32
// If ns < 0, acquire m's semaphore and return 0.
// If ns >= 0, try to acquire m's semaphore for at most ns nanoseconds.
// Return 0 if the semaphore was acquired, -1 if interrupted or timed out.
//
// func semawakeup(mp *m)
// Wake up mp, which is or will soon be sleeping on its semaphore.
// The mutex state consists of four flags and a pointer. The flag at bit 0,
// mutexLocked, represents the lock itself. Bit 1, mutexSleeping, is a hint that
// the pointer is non-nil. The fast paths for locking and unlocking the mutex
// are based on atomic 8-bit swap operations on the low byte; bits 2 through 7
// are unused.
//
// Bit 8, mutexSpinning, is a try-lock that grants a waiting M permission to
// spin on the state word. Most other Ms must attempt to spend their time
// sleeping to reduce traffic on the cache line. This is the "spin bit" for
// which the implementation is named. (The anti-starvation mechanism also grants
// temporary permission for an M to spin.)
//
// Bit 9, mutexStackLocked, is a try-lock that grants an unlocking M permission
// to inspect the list of waiting Ms and to pop an M off of that stack.
//
// The upper bits hold a (partial) pointer to the M that most recently went to
// sleep. The sleeping Ms form a stack linked by their mWaitList.next fields.
// Because the fast paths use an 8-bit swap on the low byte of the state word,
// we'll need to reconstruct the full M pointer from the bits we have. Most Ms
// are allocated on the heap, and have a known alignment and base offset. (The
// offset is due to mallocgc's allocation headers.) The main program thread uses
// a static M value, m0. We check for m0 specifically and add a known offset
// otherwise.
const (
active_spin = 4 // referenced in proc.go for sync.Mutex implementation
active_spin_cnt = 30 // referenced in proc.go for sync.Mutex implementation
)
const (
mutexLocked = 0x001
mutexSleeping = 0x002
mutexSpinning = 0x100
mutexStackLocked = 0x200
mutexMMask = 0x3FF
mutexMOffset = mallocHeaderSize // alignment of heap-allocated Ms (those other than m0)
mutexActiveSpinCount = 4
mutexActiveSpinSize = 30
mutexPassiveSpinCount = 1
mutexTailWakePeriod = 16
)
//go:nosplit
func key8(p *uintptr) *uint8 {
if goarch.BigEndian {
return &(*[8]uint8)(unsafe.Pointer(p))[goarch.PtrSize/1-1]
}
return &(*[8]uint8)(unsafe.Pointer(p))[0]
}
// mWaitList is part of the M struct, and holds the list of Ms that are waiting
// for a particular runtime.mutex.
//
// When an M is unable to immediately obtain a lock, it adds itself to the list
// of Ms waiting for the lock. It does that via this struct's next field,
// forming a singly-linked list with the mutex's key field pointing to the head
// of the list.
type mWaitList struct {
next muintptr // next m waiting for lock
}
// lockVerifyMSize confirms that we can recreate the low bits of the M pointer.
func lockVerifyMSize() {
size := roundupsize(unsafe.Sizeof(mPadded{}), false) + mallocHeaderSize
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
if size&mutexMMask != 0 {
print("M structure uses sizeclass ", size, "/", hex(size), " bytes; ",
"incompatible with mutex flag mask ", hex(mutexMMask), "\n")
throw("runtime.m memory alignment too small for spinbit mutex")
}
}
// mutexWaitListHead recovers a full muintptr that was missing its low bits.
// With the exception of the static m0 value, it requires allocating runtime.m
// values in a size class with a particular minimum alignment. The 2048-byte
// size class allows recovering the full muintptr value even after overwriting
// the low 11 bits with flags. We can use those 11 bits as 3 flags and an
// atomically-swapped byte.
//
//go:nosplit
func mutexWaitListHead(v uintptr) muintptr {
if highBits := v &^ mutexMMask; highBits == 0 {
return 0
} else if m0bits := muintptr(unsafe.Pointer(&m0)); highBits == uintptr(m0bits)&^mutexMMask {
return m0bits
} else {
return muintptr(highBits + mutexMOffset)
}
}
// mutexPreferLowLatency reports if this mutex prefers low latency at the risk
// of performance collapse. If so, we can allow all waiting threads to spin on
// the state word rather than go to sleep.
//
// TODO: We could have the waiting Ms each spin on their own private cache line,
// especially if we can put a bound on the on-CPU time that would consume.
//
// TODO: If there's a small set of mutex values with special requirements, they
// could make use of a more specialized lock2/unlock2 implementation. Otherwise,
// we're constrained to what we can fit within a single uintptr with no
// additional storage on the M for each lock held.
//
//go:nosplit
func mutexPreferLowLatency(l *mutex) bool {
switch l {
default:
return false
case &sched.lock:
// We often expect sched.lock to pass quickly between Ms in a way that
// each M has unique work to do: for instance when we stop-the-world
// (bringing each P to idle) or add new netpoller-triggered work to the
// global run queue.
return true
}
}
func mutexContended(l *mutex) bool {
return atomic.Loaduintptr(&l.key)&^mutexMMask != 0
}
func lock(l *mutex) {
lockWithRank(l, getLockRank(l))
}
func lock2(l *mutex) {
gp := getg()
if gp.m.locks < 0 {
throw("runtime·lock: lock count")
}
gp.m.locks++
k8 := key8(&l.key)
// Speculative grab for lock.
v8 := atomic.Xchg8(k8, mutexLocked)
if v8&mutexLocked == 0 {
if v8&mutexSleeping != 0 {
atomic.Or8(k8, mutexSleeping)
}
return
}
semacreate(gp.m)
timer := &lockTimer{lock: l}
timer.begin()
// On uniprocessors, no point spinning.
// On multiprocessors, spin for mutexActiveSpinCount attempts.
spin := 0
if ncpu > 1 {
spin = mutexActiveSpinCount
}
var weSpin, atTail bool
v := atomic.Loaduintptr(&l.key)
tryAcquire:
for i := 0; ; i++ {
next := (v &^ mutexSpinning) | mutexSleeping | mutexLocked
if next&^mutexMMask == 0 {
// The fast-path Xchg8 may have cleared mutexSleeping. Fix
// the hint so unlock2 knows when to use its slow path.
next = next &^ mutexSleeping
}
if atomic.Casuintptr(&l.key, v, next) {
timer.end()
return
}
} else {
prev8 := atomic.Xchg8(k8, mutexLocked|mutexSleeping)
if prev8&mutexLocked == 0 {
timer.end()
return
}
}
v = atomic.Loaduintptr(&l.key)
}
if !weSpin && v&mutexSpinning == 0 && atomic.Casuintptr(&l.key, v, v|mutexSpinning) {
v |= mutexSpinning
weSpin = true
}
if weSpin || atTail || mutexPreferLowLatency(l) {
if i < spin {
procyield(mutexActiveSpinSize)
v = atomic.Loaduintptr(&l.key)
continue tryAcquire
} else if i < spin+mutexPassiveSpinCount {
osyield() // TODO: Consider removing this step. See https://go.dev/issue/69268.
v = atomic.Loaduintptr(&l.key)
continue tryAcquire
}
}
// Go to sleep
if v&mutexLocked == 0 {
throw("runtime·lock: sleeping while lock is available")
}
// Store the current head of the list of sleeping Ms in our gp.m.mWaitList.next field
gp.m.mWaitList.next = mutexWaitListHead(v)
// Pack a (partial) pointer to this M with the current lock state bits
next := (uintptr(unsafe.Pointer(gp.m)) &^ mutexMMask) | v&mutexMMask | mutexSleeping
if weSpin { // If we were spinning, prepare to retire
next = next &^ mutexSpinning
if atomic.Casuintptr(&l.key, v, next) {
weSpin = false
// We've pushed ourselves onto the stack of waiters. Wait.
semasleep(-1)
atTail = gp.m.mWaitList.next == 0 // we were at risk of starving
i = 0
}
gp.m.mWaitList.next = 0
v = atomic.Loaduintptr(&l.key)
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
}
}
func unlock(l *mutex) {
unlockWithRank(l)
}
// We might not be holding a p in this code.
//
//go:nowritebarrier
func unlock2(l *mutex) {
gp := getg()
prev8 := atomic.Xchg8(key8(&l.key), 0)
if prev8&mutexLocked == 0 {
throw("unlock of unlocked lock")
}
if prev8&mutexSleeping != 0 {
unlock2Wake(l)
}
gp.m.mLockProfile.recordUnlock(l)
gp.m.locks--
if gp.m.locks < 0 {
throw("runtime·unlock: lock count")
}
if gp.m.locks == 0 && gp.preempt { // restore the preemption request in case we've cleared it in newstack
gp.stackguard0 = stackPreempt
}
}
// unlock2Wake updates the list of Ms waiting on l, waking an M if necessary.
//
//go:nowritebarrier
func unlock2Wake(l *mutex) {
v := atomic.Loaduintptr(&l.key)
// On occasion, seek out and wake the M at the bottom of the stack so it
// doesn't starve.
antiStarve := cheaprandn(mutexTailWakePeriod) == 0
if !(antiStarve || // avoiding starvation may require a wake
v&mutexSpinning == 0 || // no spinners means we must wake
mutexPreferLowLatency(l)) { // prefer waiters be awake as much as possible
return
}
for {
if v&^mutexMMask == 0 || v&mutexStackLocked != 0 {
// No waiting Ms means nothing to do.
//
// If the stack lock is unavailable, its owner would make the same
// wake decisions that we would, so there's nothing for us to do.
//
// Although: This thread may have a different call stack, which
// would result in a different entry in the mutex contention profile
// (upon completion of go.dev/issue/66999). That could lead to weird
// results if a slow critical section ends but another thread
// quickly takes the lock, finishes its own critical section,
// releases the lock, and then grabs the stack lock. That quick
// thread would then take credit (blame) for the delay that this
// slow thread caused. The alternative is to have more expensive
// atomic operations (a CAS) on the critical path of unlock2.
return
}
// Other M's are waiting for the lock.
// Obtain the stack lock, and pop off an M.
next := v | mutexStackLocked
if atomic.Casuintptr(&l.key, v, next) {
break
}
v = atomic.Loaduintptr(&l.key)
}
// We own the mutexStackLocked flag. New Ms may push themselves onto the
// stack concurrently, but we're now the only thread that can remove or
// modify the Ms that are sleeping in the list.
var committed *m // If we choose an M within the stack, we've made a promise to wake it
for {
headM := v &^ mutexMMask
flags := v & (mutexMMask &^ mutexStackLocked) // preserve low bits, but release stack lock
mp := mutexWaitListHead(v).ptr()
wakem := committed
if committed == nil {
if v&mutexSpinning == 0 || mutexPreferLowLatency(l) {
wakem = mp
}
if antiStarve {
// Wake the M at the bottom of the stack of waiters. (This is
// O(N) with the number of waiters.)
wakem = mp
prev := mp
for {
next := wakem.mWaitList.next.ptr()
if next == nil {
break
}
prev, wakem = wakem, next
}
if wakem != mp {
prev.mWaitList.next = wakem.mWaitList.next
committed = wakem
}
}
}
if wakem == mp {
headM = uintptr(mp.mWaitList.next) &^ mutexMMask
}
next := headM | flags
if atomic.Casuintptr(&l.key, v, next) {
if wakem != nil {
// Claimed an M. Wake it.
semawakeup(wakem)
}
break
}
v = atomic.Loaduintptr(&l.key)
}
}