From 7df29b50b2d7fd9abcd2a070df407db57f3f79a6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20M=C3=B6hrmann?= <moehrmann@google.com>
Date: Sat, 12 Aug 2017 10:20:30 +0200
Subject: [PATCH] bytes: speed up Fields and FieldsFunc
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Applies the optimizations from golang.org/cl/42810 and golang.org/cl/37959
done to the strings package to the bytes package.

name                      old time/op    new time/op     delta
Fields/ASCII/16              417ns ± 4%      118ns ± 3%    -71.65%  (p=0.000 n=10+10)
Fields/ASCII/256            5.95µs ± 3%     0.88µs ± 0%    -85.23%  (p=0.000 n=10+7)
Fields/ASCII/4096           92.3µs ± 1%     12.8µs ± 2%    -86.13%  (p=0.000 n=10+10)
Fields/ASCII/65536          1.49ms ± 1%     0.25ms ± 1%    -83.14%  (p=0.000 n=10+10)
Fields/ASCII/1048576        25.0ms ± 1%      6.5ms ± 2%    -74.04%  (p=0.000 n=10+10)
Fields/Mixed/16              406ns ± 1%      222ns ± 1%    -45.24%  (p=0.000 n=10+9)
Fields/Mixed/256            5.78µs ± 1%     2.27µs ± 1%    -60.73%  (p=0.000 n=9+10)
Fields/Mixed/4096           97.9µs ± 1%     40.5µs ± 3%    -58.66%  (p=0.000 n=10+10)
Fields/Mixed/65536          1.58ms ± 1%     0.69ms ± 1%    -56.58%  (p=0.000 n=10+10)
Fields/Mixed/1048576        26.6ms ± 1%     12.6ms ± 2%    -52.44%  (p=0.000 n=9+10)
FieldsFunc/ASCII/16          395ns ± 1%      188ns ± 1%    -52.34%  (p=0.000 n=10+10)
FieldsFunc/ASCII/256        5.90µs ± 1%     2.00µs ± 1%    -66.06%  (p=0.000 n=10+10)
FieldsFunc/ASCII/4096       92.5µs ± 1%     33.0µs ± 1%    -64.34%  (p=0.000 n=10+9)
FieldsFunc/ASCII/65536      1.48ms ± 1%     0.54ms ± 1%    -63.38%  (p=0.000 n=10+9)
FieldsFunc/ASCII/1048576    25.1ms ± 1%     10.5ms ± 3%    -58.24%  (p=0.000 n=10+10)
FieldsFunc/Mixed/16          401ns ± 1%      205ns ± 2%    -48.87%  (p=0.000 n=10+10)
FieldsFunc/Mixed/256        5.70µs ± 1%     1.98µs ± 1%    -65.28%  (p=0.000 n=10+10)
FieldsFunc/Mixed/4096       97.5µs ± 1%     35.4µs ± 1%    -63.65%  (p=0.000 n=10+10)
FieldsFunc/Mixed/65536      1.57ms ± 1%     0.61ms ± 1%    -61.20%  (p=0.000 n=10+10)
FieldsFunc/Mixed/1048576    26.5ms ± 1%     11.4ms ± 2%    -56.84%  (p=0.000 n=10+10)

name                      old speed      new speed       delta
Fields/ASCII/16           38.4MB/s ± 4%  134.9MB/s ± 3%   +251.55%  (p=0.000 n=10+10)
Fields/ASCII/256          43.0MB/s ± 3%  290.6MB/s ± 1%   +575.97%  (p=0.000 n=10+8)
Fields/ASCII/4096         44.4MB/s ± 1%  320.0MB/s ± 2%   +620.90%  (p=0.000 n=10+10)
Fields/ASCII/65536        44.0MB/s ± 1%  260.7MB/s ± 1%   +493.15%  (p=0.000 n=10+10)
Fields/ASCII/1048576      42.0MB/s ± 1%  161.6MB/s ± 2%   +285.21%  (p=0.000 n=10+10)
Fields/Mixed/16           39.4MB/s ± 1%   71.7MB/s ± 1%    +82.20%  (p=0.000 n=10+10)
Fields/Mixed/256          44.3MB/s ± 1%  112.8MB/s ± 1%   +154.64%  (p=0.000 n=9+10)
Fields/Mixed/4096         41.9MB/s ± 1%  101.2MB/s ± 3%   +141.92%  (p=0.000 n=10+10)
Fields/Mixed/65536        41.5MB/s ± 1%   95.5MB/s ± 1%   +130.29%  (p=0.000 n=10+10)
Fields/Mixed/1048576      39.4MB/s ± 1%   82.9MB/s ± 2%   +110.28%  (p=0.000 n=9+10)
FieldsFunc/ASCII/16       40.5MB/s ± 1%   84.9MB/s ± 2%   +109.80%  (p=0.000 n=10+10)
FieldsFunc/ASCII/256      43.4MB/s ± 1%  127.9MB/s ± 1%   +194.58%  (p=0.000 n=10+10)
FieldsFunc/ASCII/4096     44.3MB/s ± 1%  124.2MB/s ± 1%   +180.44%  (p=0.000 n=10+9)
FieldsFunc/ASCII/65536    44.2MB/s ± 1%  120.6MB/s ± 1%   +173.06%  (p=0.000 n=10+9)
FieldsFunc/ASCII/1048576  41.8MB/s ± 1%  100.2MB/s ± 3%   +139.53%  (p=0.000 n=10+10)
FieldsFunc/Mixed/16       39.8MB/s ± 1%   77.8MB/s ± 2%    +95.46%  (p=0.000 n=10+10)
FieldsFunc/Mixed/256      44.9MB/s ± 1%  129.4MB/s ± 1%   +187.97%  (p=0.000 n=10+10)
FieldsFunc/Mixed/4096     42.0MB/s ± 1%  115.6MB/s ± 1%   +175.08%  (p=0.000 n=10+10)
FieldsFunc/Mixed/65536    41.6MB/s ± 1%  107.3MB/s ± 1%   +157.75%  (p=0.000 n=10+10)
FieldsFunc/Mixed/1048576  39.6MB/s ± 1%   91.8MB/s ± 2%   +131.72%  (p=0.000 n=10+10)

name                      old alloc/op   new alloc/op    delta
Fields/ASCII/16              80.0B ± 0%      80.0B ± 0%       ~     (all equal)
Fields/ASCII/256              768B ± 0%       768B ± 0%       ~     (all equal)
Fields/ASCII/4096           9.47kB ± 0%     9.47kB ± 0%       ~     (all equal)
Fields/ASCII/65536           147kB ± 0%      147kB ± 0%       ~     (all equal)
Fields/ASCII/1048576        2.27MB ± 0%     2.27MB ± 0%       ~     (all equal)
Fields/Mixed/16              96.0B ± 0%      96.0B ± 0%       ~     (all equal)
Fields/Mixed/256              768B ± 0%       768B ± 0%       ~     (all equal)
Fields/Mixed/4096           9.47kB ± 0%    24.83kB ± 0%   +162.16%  (p=0.000 n=10+10)
Fields/Mixed/65536           147kB ± 0%      497kB ± 0%   +237.24%  (p=0.000 n=10+10)
Fields/Mixed/1048576        2.26MB ± 0%     9.61MB ± 0%   +324.89%  (p=0.000 n=10+10)
FieldsFunc/ASCII/16          80.0B ± 0%      80.0B ± 0%       ~     (all equal)
FieldsFunc/ASCII/256          768B ± 0%       768B ± 0%       ~     (all equal)
FieldsFunc/ASCII/4096       9.47kB ± 0%    24.83kB ± 0%   +162.16%  (p=0.000 n=10+10)
FieldsFunc/ASCII/65536       147kB ± 0%      497kB ± 0%   +237.24%  (p=0.000 n=10+10)
FieldsFunc/ASCII/1048576    2.27MB ± 0%     9.61MB ± 0%   +323.72%  (p=0.000 n=10+10)
FieldsFunc/Mixed/16          96.0B ± 0%      96.0B ± 0%       ~     (all equal)
FieldsFunc/Mixed/256          768B ± 0%       768B ± 0%       ~     (all equal)
FieldsFunc/Mixed/4096       9.47kB ± 0%    24.83kB ± 0%   +162.16%  (p=0.000 n=10+10)
FieldsFunc/Mixed/65536       147kB ± 0%      497kB ± 0%   +237.24%  (p=0.000 n=10+10)
FieldsFunc/Mixed/1048576    2.26MB ± 0%     9.61MB ± 0%   +324.89%  (p=0.000 n=10+10)

name                      old allocs/op  new allocs/op   delta
Fields/ASCII/16               1.00 ± 0%       1.00 ± 0%       ~     (all equal)
Fields/ASCII/256              1.00 ± 0%       1.00 ± 0%       ~     (all equal)
Fields/ASCII/4096             1.00 ± 0%       1.00 ± 0%       ~     (all equal)
Fields/ASCII/65536            1.00 ± 0%       1.00 ± 0%       ~     (all equal)
Fields/ASCII/1048576          1.00 ± 0%       1.00 ± 0%       ~     (all equal)
Fields/Mixed/16               1.00 ± 0%       1.00 ± 0%       ~     (all equal)
Fields/Mixed/256              1.00 ± 0%       1.00 ± 0%       ~     (all equal)
Fields/Mixed/4096             1.00 ± 0%       5.00 ± 0%   +400.00%  (p=0.000 n=10+10)
Fields/Mixed/65536            1.00 ± 0%      12.00 ± 0%  +1100.00%  (p=0.000 n=10+10)
Fields/Mixed/1048576          1.00 ± 0%      24.00 ± 0%  +2300.00%  (p=0.000 n=10+10)
FieldsFunc/ASCII/16           1.00 ± 0%       1.00 ± 0%       ~     (all equal)
FieldsFunc/ASCII/256          1.00 ± 0%       1.00 ± 0%       ~     (all equal)
FieldsFunc/ASCII/4096         1.00 ± 0%       5.00 ± 0%   +400.00%  (p=0.000 n=10+10)
FieldsFunc/ASCII/65536        1.00 ± 0%      12.00 ± 0%  +1100.00%  (p=0.000 n=10+10)
FieldsFunc/ASCII/1048576      1.00 ± 0%      24.00 ± 0%  +2300.00%  (p=0.000 n=10+10)
FieldsFunc/Mixed/16           1.00 ± 0%       1.00 ± 0%       ~     (all equal)
FieldsFunc/Mixed/256          1.00 ± 0%       1.00 ± 0%       ~     (all equal)
FieldsFunc/Mixed/4096         1.00 ± 0%       5.00 ± 0%   +400.00%  (p=0.000 n=10+10)
FieldsFunc/Mixed/65536        1.00 ± 0%      12.00 ± 0%  +1100.00%  (p=0.000 n=10+10)
FieldsFunc/Mixed/1048576      1.00 ± 0%      24.00 ± 0%  +2300.00%  (p=0.000 n=10+10)

Change-Id: If1926782decc2f60d3b4b8c41c2ce7d8bdedfd8f
Reviewed-on: https://go-review.googlesource.com/55131
Run-TryBot: Martin Möhrmann <moehrmann@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Joe Tsai <thebrokentoaster@gmail.com>
---
 src/bytes/bytes.go      | 114 ++++++++++++++++++++++++++++++----------
 src/bytes/bytes_test.go |  53 ++++++++++++++++---
 2 files changed, 132 insertions(+), 35 deletions(-)

diff --git a/src/bytes/bytes.go b/src/bytes/bytes.go
index 7c878af688c..457e1494104 100644
--- a/src/bytes/bytes.go
+++ b/src/bytes/bytes.go
@@ -265,9 +265,57 @@ func SplitAfter(s, sep []byte) [][]byte {
 	return genSplit(s, sep, len(sep), -1)
 }
 
+var asciiSpace = [256]uint8{'\t': 1, '\n': 1, '\v': 1, '\f': 1, '\r': 1, ' ': 1}
+
 // Fields splits the slice s around each instance of one or more consecutive white space
-// characters, returning a slice of subslices of s or an empty list if s contains only white space.
+// characters, as defined by unicode.IsSpace, returning a slice of subslices of s or an
+// empty slice if s contains only white space.
 func Fields(s []byte) [][]byte {
+	// First count the fields.
+	// This is an exact count if s is ASCII, otherwise it is an approximation.
+	n := 0
+	wasSpace := 1
+	// setBits is used to track which bits are set in the bytes of s.
+	setBits := uint8(0)
+	for i := 0; i < len(s); i++ {
+		r := s[i]
+		setBits |= r
+		isSpace := int(asciiSpace[r])
+		n += wasSpace & ^isSpace
+		wasSpace = isSpace
+	}
+
+	if setBits < utf8.RuneSelf { // ASCII fast path
+		a := make([][]byte, n)
+		na := 0
+		fieldStart := 0
+		i := 0
+		// Skip spaces in the front of the input.
+		for i < len(s) && asciiSpace[s[i]] != 0 {
+			i++
+		}
+		fieldStart = i
+		for i < len(s) {
+			if asciiSpace[s[i]] == 0 {
+				i++
+				continue
+			}
+			a[na] = s[fieldStart:i]
+			na++
+			i++
+			// Skip spaces in between fields.
+			for i < len(s) && asciiSpace[s[i]] != 0 {
+				i++
+			}
+			fieldStart = i
+		}
+		if fieldStart < len(s) { // Last field might end at EOF.
+			a[na] = s[fieldStart:]
+		}
+		return a
+	}
+
+	// Some runes in the input slice are not ASCII.
 	return FieldsFunc(s, unicode.IsSpace)
 }
 
@@ -278,39 +326,49 @@ func Fields(s []byte) [][]byte {
 // FieldsFunc makes no guarantees about the order in which it calls f(c).
 // If f does not return consistent results for a given c, FieldsFunc may crash.
 func FieldsFunc(s []byte, f func(rune) bool) [][]byte {
-	n := 0
-	inField := false
-	for i := 0; i < len(s); {
-		r, size := utf8.DecodeRune(s[i:])
-		wasInField := inField
-		inField = !f(r)
-		if inField && !wasInField {
-			n++
-		}
-		i += size
+	// A span is used to record a slice of s of the form s[start:end].
+	// The start index is inclusive and the end index is exclusive.
+	type span struct {
+		start int
+		end   int
 	}
+	spans := make([]span, 0, 32)
 
-	a := make([][]byte, n)
-	na := 0
-	fieldStart := -1
-	for i := 0; i <= len(s) && na < n; {
-		r, size := utf8.DecodeRune(s[i:])
-		if fieldStart < 0 && size > 0 && !f(r) {
-			fieldStart = i
-			i += size
-			continue
-		}
-		if fieldStart >= 0 && (size == 0 || f(r)) {
-			a[na] = s[fieldStart:i]
-			na++
-			fieldStart = -1
+	// Find the field start and end indices.
+	wasField := false
+	fromIndex := 0
+	for i := 0; i < len(s); {
+		size := 1
+		r := rune(s[i])
+		if r >= utf8.RuneSelf {
+			r, size = utf8.DecodeRune(s[i:])
 		}
-		if size == 0 {
-			break
+		if f(r) {
+			if wasField {
+				spans = append(spans, span{start: fromIndex, end: i})
+				wasField = false
+			}
+		} else {
+			if !wasField {
+				fromIndex = i
+				wasField = true
+			}
 		}
 		i += size
 	}
-	return a[0:na]
+
+	// Last field might end at EOF.
+	if wasField {
+		spans = append(spans, span{fromIndex, len(s)})
+	}
+
+	// Create subslices from recorded field indices.
+	a := make([][]byte, len(spans))
+	for i, span := range spans {
+		a[i] = s[span.start:span.end]
+	}
+
+	return a
 }
 
 // Join concatenates the elements of s to create a new byte slice. The separator
diff --git a/src/bytes/bytes_test.go b/src/bytes/bytes_test.go
index ca0cdbb7c9f..db28497e39e 100644
--- a/src/bytes/bytes_test.go
+++ b/src/bytes/bytes_test.go
@@ -1502,19 +1502,58 @@ var makeFieldsInput = func() []byte {
 	return x
 }
 
-var fieldsInput = makeFieldsInput()
+var makeFieldsInputASCII = func() []byte {
+	x := make([]byte, 1<<20)
+	// Input is ~10% space, rest ASCII non-space.
+	for i := range x {
+		if rand.Intn(10) == 0 {
+			x[i] = ' '
+		} else {
+			x[i] = 'x'
+		}
+	}
+	return x
+}
+
+var bytesdata = []struct {
+	name string
+	data []byte
+}{
+	{"ASCII", makeFieldsInputASCII()},
+	{"Mixed", makeFieldsInput()},
+}
 
 func BenchmarkFields(b *testing.B) {
-	b.SetBytes(int64(len(fieldsInput)))
-	for i := 0; i < b.N; i++ {
-		Fields(fieldsInput)
+	for _, sd := range bytesdata {
+		b.Run(sd.name, func(b *testing.B) {
+			for j := 1 << 4; j <= 1<<20; j <<= 4 {
+				b.Run(fmt.Sprintf("%d", j), func(b *testing.B) {
+					b.ReportAllocs()
+					b.SetBytes(int64(j))
+					data := sd.data[:j]
+					for i := 0; i < b.N; i++ {
+						Fields(data)
+					}
+				})
+			}
+		})
 	}
 }
 
 func BenchmarkFieldsFunc(b *testing.B) {
-	b.SetBytes(int64(len(fieldsInput)))
-	for i := 0; i < b.N; i++ {
-		FieldsFunc(fieldsInput, unicode.IsSpace)
+	for _, sd := range bytesdata {
+		b.Run(sd.name, func(b *testing.B) {
+			for j := 1 << 4; j <= 1<<20; j <<= 4 {
+				b.Run(fmt.Sprintf("%d", j), func(b *testing.B) {
+					b.ReportAllocs()
+					b.SetBytes(int64(j))
+					data := sd.data[:j]
+					for i := 0; i < b.N; i++ {
+						FieldsFunc(data, unicode.IsSpace)
+					}
+				})
+			}
+		})
 	}
 }
 
-- 
GitLab