diff --git a/src/cmd/compile/internal/inline/interleaved/interleaved.go b/src/cmd/compile/internal/inline/interleaved/interleaved.go
index a35121517ac001b7ecb16e2b0d4974b4107ec2b2..954cc306fc81d3596f09533e19ac3c51e01463c6 100644
--- a/src/cmd/compile/internal/inline/interleaved/interleaved.go
+++ b/src/cmd/compile/internal/inline/interleaved/interleaved.go
@@ -253,7 +253,7 @@ func (s *inlClosureState) mark(n ir.Node) ir.Node {
 
 	if isTestingBLoop(n) {
 		// No inlining nor devirtualization performed on b.Loop body
-		if base.Flag.LowerM > 1 {
+		if base.Flag.LowerM > 0 {
 			fmt.Printf("%v: skip inlining within testing.B.loop for %v\n", ir.Line(n), n)
 		}
 		// We still want to explore inlining opportunities in other parts of ForStmt.
diff --git a/src/cmd/compile/internal/test/inl_test.go b/src/cmd/compile/internal/test/inl_test.go
index 9a1a8bb1059ca6a8697b48d8da4e013cea2f2e16..c30be67731e6e40e423ce01ae94f68e447dbb9fa 100644
--- a/src/cmd/compile/internal/test/inl_test.go
+++ b/src/cmd/compile/internal/test/inl_test.go
@@ -230,6 +230,9 @@ func TestIntendedInlining(t *testing.T) {
 			"(*Pointer[go.shape.int]).Store",
 			"(*Pointer[go.shape.int]).Swap",
 		},
+		"testing": {
+			"(*B).Loop",
+		},
 	}
 
 	if !goexperiment.SwissMap {
diff --git a/src/testing/benchmark.go b/src/testing/benchmark.go
index c2b38d814c8993165ba3d7d78c563ed6e053318f..5716af171c5b1f14002095c9ddb9b87e6242c8a8 100644
--- a/src/testing/benchmark.go
+++ b/src/testing/benchmark.go
@@ -120,10 +120,13 @@ type B struct {
 		// n is the target number of iterations. It gets bumped up as we go.
 		// When the benchmark loop is done, we commit this to b.N so users can
 		// do reporting based on it, but we avoid exposing it until then.
-		n int
+		n uint64
 		// i is the current Loop iteration. It's strictly monotonically
 		// increasing toward n.
-		i int
+		//
+		// The high bit is used to poison the Loop fast path and fall back to
+		// the slow path.
+		i uint64
 
 		done bool // set when B.Loop return false
 	}
@@ -139,6 +142,7 @@ func (b *B) StartTimer() {
 		b.startBytes = memStats.TotalAlloc
 		b.start = highPrecisionTimeNow()
 		b.timerOn = true
+		b.loop.i &^= loopPoisonTimer
 	}
 }
 
@@ -151,6 +155,8 @@ func (b *B) StopTimer() {
 		b.netAllocs += memStats.Mallocs - b.startAllocs
 		b.netBytes += memStats.TotalAlloc - b.startBytes
 		b.timerOn = false
+		// If we hit B.Loop with the timer stopped, fail.
+		b.loop.i |= loopPoisonTimer
 	}
 }
 
@@ -388,19 +394,32 @@ func (b *B) stopOrScaleBLoop() bool {
 		// Stop the timer so we don't count cleanup time
 		b.StopTimer()
 		// Commit iteration count
-		b.N = b.loop.n
+		b.N = int(b.loop.n)
 		b.loop.done = true
 		return false
 	}
 	// Loop scaling
 	goalns := b.benchTime.d.Nanoseconds()
 	prevIters := int64(b.loop.n)
-	b.loop.n = predictN(goalns, prevIters, t.Nanoseconds(), prevIters)
+	b.loop.n = uint64(predictN(goalns, prevIters, t.Nanoseconds(), prevIters))
+	if b.loop.n&loopPoisonMask != 0 {
+		// The iteration count should never get this high, but if it did we'd be
+		// in big trouble.
+		panic("loop iteration target overflow")
+	}
 	b.loop.i++
 	return true
 }
 
 func (b *B) loopSlowPath() bool {
+	// Consistency checks
+	if !b.timerOn {
+		b.Fatal("B.Loop called with timer stopped")
+	}
+	if b.loop.i&loopPoisonMask != 0 {
+		panic(fmt.Sprintf("unknown loop stop condition: %#x", b.loop.i))
+	}
+
 	if b.loop.n == 0 {
 		// If it's the first call to b.Loop() in the benchmark function.
 		// Allows more precise measurement of benchmark loop cost counts.
@@ -414,14 +433,14 @@ func (b *B) loopSlowPath() bool {
 	}
 	// Handles fixed iterations case
 	if b.benchTime.n > 0 {
-		if b.loop.n < b.benchTime.n {
-			b.loop.n = b.benchTime.n
+		if b.loop.n < uint64(b.benchTime.n) {
+			b.loop.n = uint64(b.benchTime.n)
 			b.loop.i++
 			return true
 		}
 		b.StopTimer()
 		// Commit iteration count
-		b.N = b.loop.n
+		b.N = int(b.loop.n)
 		b.loop.done = true
 		return false
 	}
@@ -463,8 +482,18 @@ func (b *B) loopSlowPath() bool {
 // whereas b.N-based benchmarks must run the benchmark function (and any
 // associated setup and cleanup) several times.
 func (b *B) Loop() bool {
-	// On the first call, both i and n are 0, so we'll fall through to the slow
-	// path in that case, too.
+	// This is written such that the fast path is as fast as possible and can be
+	// inlined.
+	//
+	// There are three cases where we'll fall out of the fast path:
+	//
+	// - On the first call, both i and n are 0.
+	//
+	// - If the loop reaches the n'th iteration, then i == n and we need
+	//   to figure out the new target iteration count or if we're done.
+	//
+	// - If the timer is stopped, it poisons the top bit of i so the slow
+	//   path can do consistency checks and fail.
 	if b.loop.i < b.loop.n {
 		b.loop.i++
 		return true
@@ -472,6 +501,19 @@ func (b *B) Loop() bool {
 	return b.loopSlowPath()
 }
 
+// The loopPoison constants can be OR'd into B.loop.i to cause it to fall back
+// to the slow path.
+const (
+	loopPoisonTimer = uint64(1 << (63 - iota))
+	// If necessary, add more poison bits here.
+
+	// loopPoisonMask is the set of all loop poison bits. (iota-1) is the index
+	// of the bit we just set, from which we recreate that bit mask. We subtract
+	// 1 to set all of the bits below that bit, then complement the result to
+	// get the mask. Sorry, not sorry.
+	loopPoisonMask = ^uint64((1 << (63 - (iota - 1))) - 1)
+)
+
 // BenchmarkResult contains the results of a benchmark run.
 type BenchmarkResult struct {
 	N         int           // The number of iterations.
diff --git a/src/testing/loop_test.go b/src/testing/loop_test.go
index 423094fbbd1b3c774059af61bfcb0e6aaead6c20..743cbe64f0613b03d544edf08d2face62945ad7c 100644
--- a/src/testing/loop_test.go
+++ b/src/testing/loop_test.go
@@ -129,3 +129,26 @@ func TestBenchmarkBLoopError(t *T) {
 		t.Errorf("want N == 0, got %d", bRet.N)
 	}
 }
+
+func TestBenchmarkBLoopStop(t *T) {
+	var bState *B
+	var bLog bytes.Buffer
+	bRet := Benchmark(func(b *B) {
+		bState = b
+		b.common.w = &bLog
+
+		for i := 0; b.Loop(); i++ {
+			b.StopTimer()
+		}
+	})
+	if !bState.failed {
+		t.Errorf("benchmark should have failed")
+	}
+	const wantLog = "B.Loop called with timer stopped"
+	if log := bLog.String(); !strings.Contains(log, wantLog) {
+		t.Errorf("missing error %q in output:\n%s", wantLog, log)
+	}
+	if bRet.N != 0 {
+		t.Errorf("want N == 0, got %d", bRet.N)
+	}
+}
diff --git a/test/inline_testingbloop.go b/test/inline_testingbloop.go
index cbdf905993c799c1b62aa18fc638ab136e880541..702a652f562b5716a11418f4fb3be3fc5bfacbbd 100644
--- a/test/inline_testingbloop.go
+++ b/test/inline_testingbloop.go
@@ -1,4 +1,4 @@
-// errorcheck -0 -m=2
+// errorcheck -0 -m
 
 // Copyright 2024 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
@@ -15,7 +15,7 @@ func caninline(x int) int { // ERROR "can inline caninline"
 	return x
 }
 
-func cannotinline(b *testing.B) { // ERROR "b does not escape" "cannot inline cannotinline.*"
+func test(b *testing.B) { // ERROR "leaking param: b"
 	for i := 0; i < b.N; i++ {
 		caninline(1) // ERROR "inlining call to caninline"
 	}