Skip to content
Snippets Groups Projects
Commit e74c6cd3 authored by Martin Möhrmann's avatar Martin Möhrmann Committed by Brad Fitzpatrick
Browse files

regexp: add ASCII fast path for context methods

The step method implementations check directly if the next rune
only needs one byte to be decoded and avoid calling utf8.DecodeRune
for such ASCII characters.

Introduce the same fast path optimization for rune decoding
for the context methods.

Results for regexp benchmarks that use the context methods:

name                            old time/op  new time/op  delta
AnchoredLiteralShortNonMatch-4  97.5ns ± 1%  94.8ns ± 2%  -2.80%  (p=0.000 n=45+43)
AnchoredShortMatch-4             163ns ± 1%   160ns ± 1%  -1.84%  (p=0.000 n=46+47)
NotOnePassShortA-4               742ns ± 2%   742ns ± 2%    ~     (p=0.440 n=49+50)
NotOnePassShortB-4               535ns ± 1%   533ns ± 2%  -0.37%  (p=0.005 n=46+48)
OnePassLongPrefix-4              169ns ± 2%   166ns ± 2%  -2.06%  (p=0.000 n=50+49)

Change-Id: Ib302d9e8c63333f02695369fcf9963974362e335
Reviewed-on: https://go-review.googlesource.com/38256


Reviewed-by: default avatarBrad Fitzpatrick <bradfitz@golang.org>
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
parent 8a16d7d4
Branches
No related tags found
No related merge requests found
...@@ -313,11 +313,19 @@ func (i *inputString) index(re *Regexp, pos int) int { ...@@ -313,11 +313,19 @@ func (i *inputString) index(re *Regexp, pos int) int {
func (i *inputString) context(pos int) syntax.EmptyOp { func (i *inputString) context(pos int) syntax.EmptyOp {
r1, r2 := endOfText, endOfText r1, r2 := endOfText, endOfText
if pos > 0 && pos <= len(i.str) { // 0 < pos && pos <= len(i.str)
r1, _ = utf8.DecodeLastRuneInString(i.str[:pos]) if uint(pos-1) < uint(len(i.str)) {
r1 = rune(i.str[pos-1])
if r1 >= utf8.RuneSelf {
r1, _ = utf8.DecodeLastRuneInString(i.str[:pos])
}
} }
if pos < len(i.str) { // 0 <= pos && pos < len(i.str)
r2, _ = utf8.DecodeRuneInString(i.str[pos:]) if uint(pos) < uint(len(i.str)) {
r2 = rune(i.str[pos])
if r2 >= utf8.RuneSelf {
r2, _ = utf8.DecodeRuneInString(i.str[pos:])
}
} }
return syntax.EmptyOpContext(r1, r2) return syntax.EmptyOpContext(r1, r2)
} }
...@@ -352,11 +360,19 @@ func (i *inputBytes) index(re *Regexp, pos int) int { ...@@ -352,11 +360,19 @@ func (i *inputBytes) index(re *Regexp, pos int) int {
func (i *inputBytes) context(pos int) syntax.EmptyOp { func (i *inputBytes) context(pos int) syntax.EmptyOp {
r1, r2 := endOfText, endOfText r1, r2 := endOfText, endOfText
if pos > 0 && pos <= len(i.str) { // 0 < pos && pos <= len(i.str)
r1, _ = utf8.DecodeLastRune(i.str[:pos]) if uint(pos-1) < uint(len(i.str)) {
r1 = rune(i.str[pos-1])
if r1 >= utf8.RuneSelf {
r1, _ = utf8.DecodeLastRune(i.str[:pos])
}
} }
if pos < len(i.str) { // 0 <= pos && pos < len(i.str)
r2, _ = utf8.DecodeRune(i.str[pos:]) if uint(pos) < uint(len(i.str)) {
r2 = rune(i.str[pos])
if r2 >= utf8.RuneSelf {
r2, _ = utf8.DecodeRune(i.str[pos:])
}
} }
return syntax.EmptyOpContext(r1, r2) return syntax.EmptyOpContext(r1, r2)
} }
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment