Skip to content
Snippets Groups Projects
Commit 691f5c34 authored by erifan01's avatar erifan01 Committed by Ian Lance Taylor
Browse files

strings, bytes: optimize function Index

This change compares the first two characters instead of the first one,
and if they match, the entire string is compared. Comparing the first two
characters helps to filter out the case where the first character matches
but the subsequent characters do not match, thereby improving the substring
search speed in this case.

Benchmarks with no effect or minimal impact (less than 5%) is not listed,
the following are improved benchmarks:
On arm64:
strings:
IndexPeriodic/IndexPeriodic16-8   172890.00ns +- 2%   124156.20ns +- 0%  -28.19%  (p=0.008 n=5+5)
IndexPeriodic/IndexPeriodic32-8    78092.80ns +- 0%    65138.60ns +- 0%  -16.59%  (p=0.008 n=5+5)
IndexPeriodic/IndexPeriodic64-8    42322.20ns +- 0%    34661.60ns +- 0%  -18.10%  (p=0.008 n=5+5)
bytes:
IndexPeriodic/IndexPeriodic16-8     183468.20ns +- 6%     123759.00ns +- 0%  -32.54%  (p=0.008 n=5+5)
IndexPeriodic/IndexPeriodic32-8      84776.40ns +- 0%      63907.80ns +- 0%  -24.62%  (p=0.008 n=5+5)
IndexPeriodic/IndexPeriodic64-8      45835.60ns +- 0%      34194.20ns +- 0%  -25.40%  (p=0.008 n=5+5)

On amd64:
strings:
IndexPeriodic/IndexPeriodic8-16    219499.00ns +- 0%   178123.40ns +- 0%  -18.85%  (p=0.008 n=5+5)
IndexPeriodic/IndexPeriodic16-16   109760.20ns +- 0%    88957.80ns +- 0%  -18.95%  (p=0.008 n=5+5)
IndexPeriodic/IndexPeriodic32-16    54943.00ns +- 0%    44573.80ns +- 0%  -18.87%  (p=0.008 n=5+5)
IndexPeriodic/IndexPeriodic64-16    29804.80ns +- 0%    24417.80ns +- 0%  -18.07%  (p=0.008 n=5+5)
bytes:
IndexPeriodic/IndexPeriodic8-16     226592.60ns +- 0%    181183.20ns +- 0%  -20.04%  (p=0.008 n=5+5)
IndexPeriodic/IndexPeriodic16-16    111432.60ns +- 0%     90634.60ns +- 0%  -18.66%  (p=0.008 n=5+5)
IndexPeriodic/IndexPeriodic32-16     55640.60ns +- 0%     45433.00ns +- 0%  -18.35%  (p=0.008 n=5+5)
IndexPeriodic/IndexPeriodic64-16     30833.00ns +- 0%     24784.20ns +- 0%  -19.62%  (p=0.008 n=5+5)

Change-Id: I2d9e7e138d29e960d20a203eb74dc2ec976a9d71
Reviewed-on: https://go-review.googlesource.com/131177


Run-TryBot: Ian Lance Taylor <iant@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: default avatarIan Lance Taylor <iant@golang.org>
parent 77e503a3
Branches
Tags
No related merge requests found
......@@ -849,21 +849,22 @@ func Index(s, sep []byte) int {
if len(s) <= bytealg.MaxBruteForce {
return bytealg.Index(s, sep)
}
c := sep[0]
c0 := sep[0]
c1 := sep[1]
i := 0
t := s[:len(s)-n+1]
t := len(s) - n + 1
fails := 0
for i < len(t) {
if t[i] != c {
for i < t {
if s[i] != c0 {
// IndexByte is faster than bytealg.Index, so use it as long as
// we're not getting lots of false positives.
o := IndexByte(t[i:], c)
o := IndexByte(s[i:t], c0)
if o < 0 {
return -1
}
i += o
}
if Equal(s[i:i+n], sep) {
if s[i+1] == c1 && Equal(s[i:i+n], sep) {
return i
}
fails++
......@@ -879,24 +880,25 @@ func Index(s, sep []byte) int {
}
return -1
}
c := sep[0]
c0 := sep[0]
c1 := sep[1]
i := 0
fails := 0
t := s[:len(s)-n+1]
for i < len(t) {
if t[i] != c {
o := IndexByte(t[i:], c)
t := len(s) - n + 1
for i < t {
if s[i] != c0 {
o := IndexByte(s[i:t], c0)
if o < 0 {
break
}
i += o
}
if Equal(s[i:i+n], sep) {
if s[i+1] == c1 && Equal(s[i:i+n], sep) {
return i
}
i++
fails++
if fails >= 4+i>>4 && i < len(t) {
if fails >= 4+i>>4 && i < t {
// Give up on IndexByte, it isn't skipping ahead
// far enough to be better than Rabin-Karp.
// Experiments (using IndexPeriodic) suggest
......
......@@ -947,21 +947,22 @@ func Index(s, substr string) int {
if len(s) <= bytealg.MaxBruteForce {
return bytealg.IndexString(s, substr)
}
c := substr[0]
c0 := substr[0]
c1 := substr[1]
i := 0
t := s[:len(s)-n+1]
t := len(s) - n + 1
fails := 0
for i < len(t) {
if t[i] != c {
for i < t {
if s[i] != c0 {
// IndexByte is faster than bytealg.IndexString, so use it as long as
// we're not getting lots of false positives.
o := IndexByte(t[i:], c)
o := IndexByte(s[i:t], c0)
if o < 0 {
return -1
}
i += o
}
if s[i:i+n] == substr {
if s[i+1] == c1 && s[i:i+n] == substr {
return i
}
fails++
......@@ -977,24 +978,25 @@ func Index(s, substr string) int {
}
return -1
}
c := substr[0]
c0 := substr[0]
c1 := substr[1]
i := 0
t := s[:len(s)-n+1]
t := len(s) - n + 1
fails := 0
for i < len(t) {
if t[i] != c {
o := IndexByte(t[i:], c)
for i < t {
if s[i] != c0 {
o := IndexByte(s[i:t], c0)
if o < 0 {
return -1
}
i += o
}
if s[i:i+n] == substr {
if s[i+1] == c1 && s[i:i+n] == substr {
return i
}
i++
fails++
if fails >= 4+i>>4 && i < len(t) {
if fails >= 4+i>>4 && i < t {
// See comment in ../bytes/bytes_generic.go.
j := indexRabinKarp(s[i:], substr)
if j < 0 {
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment