diff --git a/src/runtime/export_test.go b/src/runtime/export_test.go index 980066df700d107d218d1d0bf30729a60c40cf9f..520b060599ae9b8ba819ad80d018105e5b856384 100644 --- a/src/runtime/export_test.go +++ b/src/runtime/export_test.go @@ -40,6 +40,8 @@ var Atoi32 = atoi32 var ParseByteCount = parseByteCount var Nanotime = nanotime +var Cputicks = cputicks +var CyclesPerSecond = pprof_cyclesPerSecond var NetpollBreak = netpollBreak var Usleep = usleep diff --git a/src/runtime/metrics_test.go b/src/runtime/metrics_test.go index a036f37b975e0e249808671e8b2c99f2c3d6b2a0..178dd968f72b56a50c6283ca967829060b13739d 100644 --- a/src/runtime/metrics_test.go +++ b/src/runtime/metrics_test.go @@ -1020,9 +1020,9 @@ func TestRuntimeLockMetricsAndProfile(t *testing.T) { return metricGrowth, profileGrowth, p } - testcase := func(strictTiming bool, acceptStacks [][]string, workers int, fn func() bool) func(t *testing.T) (metricGrowth, profileGrowth float64, n, value int64, explain func()) { - return func(t *testing.T) (metricGrowth, profileGrowth float64, n, value int64, explain func()) { - metricGrowth, profileGrowth, p := measureDelta(t, func() { + testcase := func(strictTiming bool, acceptStacks [][]string, workers int, fn func() bool) func(t *testing.T) (metricGrowth float64, profileGrowth []int64, n, value int64, explain func()) { + return func(t *testing.T) (metricGrowth float64, profileGrowth []int64, n, value int64, explain func()) { + metricGrowth, totalProfileGrowth, p := measureDelta(t, func() { var started, stopped sync.WaitGroup started.Add(workers) stopped.Add(workers) @@ -1042,7 +1042,7 @@ func TestRuntimeLockMetricsAndProfile(t *testing.T) { stopped.Wait() }) - if profileGrowth == 0 { + if totalProfileGrowth == 0 { t.Errorf("no increase in mutex profile") } if metricGrowth == 0 && strictTiming { @@ -1062,7 +1062,7 @@ func TestRuntimeLockMetricsAndProfile(t *testing.T) { // together. It doesn't work as well for user-space contention, where the // involved goroutines are not _Grunnable the whole time and so need to pass // through the scheduler. - t.Logf("lock contention growth in runtime/pprof's view (%fs)", profileGrowth) + t.Logf("lock contention growth in runtime/pprof's view (%fs)", totalProfileGrowth) t.Logf("lock contention growth in runtime/metrics' view (%fs)", metricGrowth) acceptStacks = append([][]string(nil), acceptStacks...) @@ -1082,7 +1082,7 @@ func TestRuntimeLockMetricsAndProfile(t *testing.T) { } var stks [][]string - values := make([][2]int64, len(acceptStacks)) + values := make([][2]int64, len(acceptStacks)+1) for _, s := range p.Sample { var have []string for _, loc := range s.Location { @@ -1091,16 +1091,26 @@ func TestRuntimeLockMetricsAndProfile(t *testing.T) { } } stks = append(stks, have) + found := false for i, stk := range acceptStacks { if slices.Equal(have, stk) { values[i][0] += s.Value[0] values[i][1] += s.Value[1] + found = true + break } } + if !found { + values[len(values)-1][0] += s.Value[0] + values[len(values)-1][1] += s.Value[1] + } } + profileGrowth = make([]int64, len(acceptStacks)+1) + profileGrowth[len(profileGrowth)-1] = values[len(values)-1][1] for i, stk := range acceptStacks { n += values[i][0] value += values[i][1] + profileGrowth[i] = values[i][1] t.Logf("stack %v has samples totaling n=%d value=%d", stk, values[i][0], values[i][1]) } if n == 0 && value == 0 { @@ -1122,11 +1132,6 @@ func TestRuntimeLockMetricsAndProfile(t *testing.T) { name := t.Name() t.Run("runtime.lock", func(t *testing.T) { - mus := make([]runtime.Mutex, 200) - var needContention atomic.Int64 - delay := 100 * time.Microsecond // large relative to system noise, for comparison between clocks - delayMicros := delay.Microseconds() - // The goroutine that acquires the lock will only proceed when it // detects that its partner is contended for the lock. That will lead to // live-lock if anything (such as a STW) prevents the partner goroutine @@ -1136,11 +1141,29 @@ func TestRuntimeLockMetricsAndProfile(t *testing.T) { // will end up contended twice. Instead, disable the GC. defer debug.SetGCPercent(debug.SetGCPercent(-1)) - const workers = 2 - if runtime.GOMAXPROCS(0) < workers { - t.Skipf("contention on runtime-internal locks requires GOMAXPROCS >= %d", workers) + mus := make([]runtime.Mutex, 200) + var needContention atomic.Int64 + + baseDelay := 100 * time.Microsecond // large relative to system noise, for comparison between clocks + fastDelayMicros := baseDelay.Microseconds() + slowDelayMicros := baseDelay.Microseconds() * 4 + + const ( + fastRole = iota + slowRole + workerCount + ) + if runtime.GOMAXPROCS(0) < workerCount { + t.Skipf("contention on runtime-internal locks requires GOMAXPROCS >= %d", workerCount) } + minTicks := make([][]int64, workerCount) // lower bound, known-contended time, measured by cputicks + maxTicks := make([][]int64, workerCount) // upper bound, total lock() duration, measured by cputicks + for i := range minTicks { + minTicks[i] = make([]int64, len(mus)) + maxTicks[i] = make([]int64, len(mus)) + } + var id atomic.Int32 fn := func() bool { n := int(needContention.Load()) if n < 0 { @@ -1148,34 +1171,77 @@ func TestRuntimeLockMetricsAndProfile(t *testing.T) { } mu := &mus[n] - runtime.Lock(mu) - for int(needContention.Load()) == n { - if runtime.MutexContended(mu) { - // make them wait a little while - for start := runtime.Nanotime(); (runtime.Nanotime()-start)/1000 < delayMicros; { - runtime.Usleep(uint32(delayMicros)) + // Each worker has a role: to have a fast or slow critical section. + // Rotate the role assignments as we step through the mutex slice so + // we don't end up with one M always claiming the same kind of work. + id := int(id.Add(1)) + role := (id + n) % workerCount + + marker, delayMicros := fastMarkerFrame, fastDelayMicros + if role == slowRole { + marker, delayMicros = slowMarkerFrame, slowDelayMicros + } + + // Each lock is used by two different critical sections, one fast + // and one slow, identified in profiles by their different "marker" + // functions. We expect the profile to blame each for the amount of + // delay it inflicts on other users of the lock. We run one worker + // of each kind, so any contention in one would be due to the other. + // + // We measure how long our runtime.lock call takes, which sets an + // upper bound on how much blame to expect for the other worker type + // in the profile. And if we acquire the lock first, we wait for the + // other worker to announce its contention. We measure the + // known-contended time, to use as a lower bound on how much blame + // we expect of ourselves in the profile. Then we stall for a little + // while (different amounts for "fast" versus "slow") before + // unlocking the mutex. + + marker(func() { + t0 := runtime.Cputicks() + runtime.Lock(mu) + maxTicks[role][n] = runtime.Cputicks() - t0 + minTicks[role][n] = 0 + for int(needContention.Load()) == n { + if runtime.MutexContended(mu) { + t1 := runtime.Cputicks() + // make them wait a little while + for start := runtime.Nanotime(); (runtime.Nanotime()-start)/1000 < delayMicros; { + runtime.Usleep(uint32(1 + delayMicros/8)) + } + minTicks[role][n] = runtime.Cputicks() - t1 + break } - break } - } - runtime.Unlock(mu) - needContention.Store(int64(n - 1)) + runtime.Unlock(mu) + needContention.Store(int64(n - 1)) + }) return true } - stks := [][]string{{ - "runtime.unlock", - "runtime_test." + name + ".func5.1", - "runtime_test.(*contentionWorker).run", - }} + stks := make([][]string, 2) + for i := range stks { + marker := "runtime_test.fastMarkerFrame" + if i == slowRole { + marker = "runtime_test.slowMarkerFrame" + } + + stks[i] = []string{ + "runtime.unlock", + "runtime_test." + name + ".func5.1.1", + marker, + "runtime_test." + name + ".func5.1", + "runtime_test.(*contentionWorker).run", + } + } t.Run("sample-1", func(t *testing.T) { old := runtime.SetMutexProfileFraction(1) defer runtime.SetMutexProfileFraction(old) needContention.Store(int64(len(mus) - 1)) - metricGrowth, profileGrowth, n, _, explain := testcase(true, stks, workers, fn)(t) + metricGrowth, profileGrowth, n, _, explain := testcase(true, stks, workerCount, fn)(t) defer func() { if t.Failed() { explain() @@ -1187,7 +1253,7 @@ func TestRuntimeLockMetricsAndProfile(t *testing.T) { // gTrackingPeriod, so we don't have a hard lower bound here. testenv.SkipFlaky(t, 64253) - if have, want := metricGrowth, delay.Seconds()*float64(len(mus)); have < want { + if have, want := metricGrowth, baseDelay.Seconds()*float64(len(mus)); have < want { // The test imposes a delay with usleep, verified with calls to // nanotime. Compare against the runtime/metrics package's view // (based on nanotime) rather than runtime/pprof's view (based @@ -1199,10 +1265,68 @@ func TestRuntimeLockMetricsAndProfile(t *testing.T) { t.Errorf("mutex profile reported contention count different from the known true count (%d != %d)", have, want) } + var slowMinTicks, fastMinTicks int64 + for role, ticks := range minTicks { + for _, delta := range ticks { + if role == slowRole { + slowMinTicks += delta + } else { + fastMinTicks += delta + } + } + } + var slowMaxTicks, fastMaxTicks int64 + for role, ticks := range maxTicks { + for _, delta := range ticks { + if role == slowRole { + slowMaxTicks += delta + } else { + fastMaxTicks += delta + } + } + } + + cpuGHz := float64(runtime.CyclesPerSecond()) / 1e9 + for _, set := range []struct { + name string + profTime int64 + minTime int64 + maxTime int64 + }{ + { + name: "slow", + profTime: profileGrowth[slowRole], + minTime: int64(float64(slowMinTicks) / cpuGHz), + maxTime: int64(float64(fastMaxTicks) / cpuGHz), + }, + { + name: "fast", + profTime: profileGrowth[fastRole], + minTime: int64(float64(fastMinTicks) / cpuGHz), + maxTime: int64(float64(slowMaxTicks) / cpuGHz), + }, + } { + t.Logf("profile's view of delays due to %q critical section: %dns", set.name, set.profTime) + t.Logf("test's view of known-contended time within %q critical section: %dns", set.name, set.minTime) + t.Logf("test's view of lock duration before critical sections other than %q: %dns", set.name, set.maxTime) + + if set.profTime < set.minTime { + t.Errorf("profile undercounted %q critical section", set.name) + } + if set.profTime > set.maxTime { + t.Errorf("profile overcounted %q critical section", set.name) + } + } + + var totalProfileGrowth float64 + for _, growth := range profileGrowth { + totalProfileGrowth += float64(growth) * time.Nanosecond.Seconds() + } + const slop = 1.5 // account for nanotime vs cputicks t.Run("compare timers", func(t *testing.T) { testenv.SkipFlaky(t, 64253) - if profileGrowth > slop*metricGrowth || metricGrowth > slop*profileGrowth { + if totalProfileGrowth > slop*metricGrowth || metricGrowth > slop*totalProfileGrowth { t.Errorf("views differ by more than %fx", slop) } }) @@ -1215,7 +1339,7 @@ func TestRuntimeLockMetricsAndProfile(t *testing.T) { defer runtime.SetMutexProfileFraction(old) needContention.Store(int64(len(mus) - 1)) - metricGrowth, profileGrowth, n, _, explain := testcase(true, stks, workers, fn)(t) + metricGrowth, profileGrowth, n, _, explain := testcase(true, stks, workerCount, fn)(t) defer func() { if t.Failed() { explain() @@ -1228,7 +1352,7 @@ func TestRuntimeLockMetricsAndProfile(t *testing.T) { // 1e-9 chance of that, which is an acceptably low flakiness rate. const samplingSlop = 2.5 - if have, want := metricGrowth, delay.Seconds()*float64(len(mus)); samplingSlop*have < want { + if have, want := metricGrowth, baseDelay.Seconds()*float64(len(mus)); samplingSlop*have < want { // The test imposes a delay with usleep, verified with calls to // nanotime. Compare against the runtime/metrics package's view // (based on nanotime) rather than runtime/pprof's view (based @@ -1239,8 +1363,13 @@ func TestRuntimeLockMetricsAndProfile(t *testing.T) { t.Errorf("mutex profile reported contention count too different from the expected count (%d far from %d)", have, want) } + var totalProfileGrowth float64 + for _, growth := range profileGrowth { + totalProfileGrowth += float64(growth) * time.Nanosecond.Seconds() + } + const timerSlop = 1.5 * samplingSlop // account for nanotime vs cputicks, plus the two views' independent sampling - if profileGrowth > timerSlop*metricGrowth || metricGrowth > timerSlop*profileGrowth { + if totalProfileGrowth > timerSlop*metricGrowth || metricGrowth > timerSlop*totalProfileGrowth { t.Errorf("views differ by more than %fx", timerSlop) } }) @@ -1307,6 +1436,9 @@ func TestRuntimeLockMetricsAndProfile(t *testing.T) { }) } +func slowMarkerFrame(fn func()) { fn() } +func fastMarkerFrame(fn func()) { fn() } + // contentionWorker provides cleaner call stacks for lock contention profile tests type contentionWorker struct { before func()