Newer
Older
func trimLeftUnicode(s []byte, cutset string) []byte {
for len(s) > 0 {
r, n := rune(s[0]), 1
if r >= utf8.RuneSelf {
r, n = utf8.DecodeRune(s)
}
if !containsRune(cutset, r) {
break
}
s = s[n:]
}
if len(s) == 0 {
// This is what we've historically done.
return nil
}
return s
}
// TrimRight returns a subslice of s by slicing off all trailing
// UTF-8-encoded code points that are contained in cutset.
func TrimRight(s []byte, cutset string) []byte {
if len(s) == 0 || cutset == "" {
return s
}
if len(cutset) == 1 && cutset[0] < utf8.RuneSelf {
return trimRightByte(s, cutset[0])
}
if as, ok := makeASCIISet(cutset); ok {
return trimRightASCII(s, &as)
}
return trimRightUnicode(s, cutset)
func trimRightByte(s []byte, c byte) []byte {
for len(s) > 0 && s[len(s)-1] == c {
s = s[:len(s)-1]
}
return s
}
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
func trimRightASCII(s []byte, as *asciiSet) []byte {
for len(s) > 0 {
if !as.contains(s[len(s)-1]) {
break
}
s = s[:len(s)-1]
}
return s
}
func trimRightUnicode(s []byte, cutset string) []byte {
for len(s) > 0 {
r, n := rune(s[len(s)-1]), 1
if r >= utf8.RuneSelf {
r, n = utf8.DecodeLastRune(s)
}
if !containsRune(cutset, r) {
break
}
s = s[:len(s)-n]
}
return s
}
// TrimSpace returns a subslice of s by slicing off all leading and
// trailing white space, as defined by Unicode.
func TrimSpace(s []byte) []byte {
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
// Fast path for ASCII: look for the first ASCII non-space byte
start := 0
for ; start < len(s); start++ {
c := s[start]
if c >= utf8.RuneSelf {
// If we run into a non-ASCII byte, fall back to the
// slower unicode-aware method on the remaining bytes
return TrimFunc(s[start:], unicode.IsSpace)
}
if asciiSpace[c] == 0 {
break
}
}
// Now look for the first ASCII non-space byte from the end
stop := len(s)
for ; stop > start; stop-- {
c := s[stop-1]
if c >= utf8.RuneSelf {
return TrimFunc(s[start:stop], unicode.IsSpace)
}
if asciiSpace[c] == 0 {
break
}
}
// At this point s[start:stop] starts and ends with an ASCII
// non-space bytes, so we're done. Non-ASCII cases have already
// been handled above.
if start == stop {
// Special case to preserve previous TrimLeftFunc behavior,
// returning nil instead of empty slice if all spaces.
return nil
}
return s[start:stop]
// Runes interprets s as a sequence of UTF-8-encoded code points.
// It returns a slice of runes (Unicode code points) equivalent to s.
func Runes(s []byte) []rune {
t := make([]rune, utf8.RuneCount(s))
r, l := utf8.DecodeRune(s)
t[i] = r
i++
s = s[l:]
// Replace returns a copy of the slice s with the first n
// non-overlapping instances of old replaced by new.
// If old is empty, it matches at the beginning of the slice
// and after each UTF-8 sequence, yielding up to k+1 replacements
// for a k-rune slice.
// If n < 0, there is no limit on the number of replacements.
func Replace(s, old, new []byte, n int) []byte {
m := 0
if n != 0 {
// Compute number of replacements.
m = Count(s, old)
}
if m == 0 {
// Just return a copy.
return append([]byte(nil), s...)
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
n = m
}
// Apply replacements to buffer.
t := make([]byte, len(s)+n*(len(new)-len(old)))
w := 0
start := 0
for i := 0; i < n; i++ {
j := start
if len(old) == 0 {
if i > 0 {
_, wid := utf8.DecodeRune(s[start:])
j += wid
}
} else {
j += Index(s[start:], old)
}
w += copy(t[w:], s[start:j])
w += copy(t[w:], new)
start = j + len(old)
}
w += copy(t[w:], s[start:])
return t[0:w]
}
// ReplaceAll returns a copy of the slice s with all
// non-overlapping instances of old replaced by new.
// If old is empty, it matches at the beginning of the slice
// and after each UTF-8 sequence, yielding up to k+1 replacements
// for a k-rune slice.
func ReplaceAll(s, old, new []byte) []byte {
return Replace(s, old, new, -1)
}
// EqualFold reports whether s and t, interpreted as UTF-8 strings,
// are equal under simple Unicode case-folding, which is a more general
// form of case-insensitivity.
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
// ASCII fast path
i := 0
for ; i < len(s) && i < len(t); i++ {
sr := s[i]
tr := t[i]
if sr|tr >= utf8.RuneSelf {
goto hasUnicode
}
// Easy case.
if tr == sr {
continue
}
// Make sr < tr to simplify what follows.
if tr < sr {
tr, sr = sr, tr
}
// ASCII only, sr/tr must be upper/lower case
if 'A' <= sr && sr <= 'Z' && tr == sr+'a'-'A' {
continue
}
return false
}
// Check if we've exhausted both strings.
return len(s) == len(t)
hasUnicode:
s = s[i:]
t = t[i:]
for len(s) != 0 && len(t) != 0 {
// Extract first rune from each.
} else {
r, size := utf8.DecodeRune(s)
sr, s = r, s[size:]
}
if t[0] < utf8.RuneSelf {
} else {
r, size := utf8.DecodeRune(t)
tr, t = r, t[size:]
}
// If they match, keep going; if not, return false.
// Easy case.
if tr == sr {
continue
}
// Make sr < tr to simplify what follows.
if tr < sr {
tr, sr = sr, tr
}
// Fast check for ASCII.
if tr < utf8.RuneSelf {
// ASCII only, sr/tr must be upper/lower case
if 'A' <= sr && sr <= 'Z' && tr == sr+'a'-'A' {
// General case. SimpleFold(x) returns the next equivalent rune > x
// or wraps around to smaller values.
r := unicode.SimpleFold(sr)
for r != sr && r < tr {
r = unicode.SimpleFold(r)
}
if r == tr {
continue
}
return false
}
// Index returns the index of the first instance of sep in s, or -1 if sep is not present in s.
func Index(s, sep []byte) int {
n := len(sep)
switch {
case n == 0:
return 0
case n == 1:
return IndexByte(s, sep[0])
case n == len(s):
if Equal(sep, s) {
return 0
}
return -1
case n > len(s):
return -1
case n <= bytealg.MaxLen:
// Use brute force when s and sep both are small
if len(s) <= bytealg.MaxBruteForce {
return bytealg.Index(s, sep)
}
i := 0
fails := 0
// IndexByte is faster than bytealg.Index, so use it as long as
// we're not getting lots of false positives.
if o < 0 {
return -1
}
}
if s[i+1] == c1 && Equal(s[i:i+n], sep) {
return i
}
fails++
i++
// Switch to bytealg.Index when IndexByte produces too many false positives.
if fails > bytealg.Cutover(i) {
r := bytealg.Index(s[i:], sep)
if r >= 0 {
return r + i
}
return -1
}
}
return -1
}
i := 0
fails := 0
t := len(s) - n + 1
for i < t {
if s[i] != c0 {
if o < 0 {
break
}
}
if s[i+1] == c1 && Equal(s[i:i+n], sep) {
return i
}
i++
fails++
// Give up on IndexByte, it isn't skipping ahead
// far enough to be better than Rabin-Karp.
// Experiments (using IndexPeriodic) suggest
// the cutover is about 16 byte skips.
// TODO: if large prefixes of sep are matching
// we should cutover at even larger average skips,
// because Equal becomes that much more expensive.
// This code does not take that effect into account.
j := bytealg.IndexRabinKarpBytes(s[i:], sep)
if j < 0 {
return -1
}
return i + j
}
}
return -1
}
// Cut slices s around the first instance of sep,
// returning the text before and after sep.
// The found result reports whether sep appears in s.
// If sep does not appear in s, cut returns s, nil, false.
//
// Cut returns slices of the original slice s, not copies.
func Cut(s, sep []byte) (before, after []byte, found bool) {
if i := Index(s, sep); i >= 0 {
return s[:i], s[i+len(sep):], true
}
return s, nil, false
}
// Clone returns a copy of b[:len(b)].
// The result may have additional unused capacity.
// Clone(nil) returns nil.
func Clone(b []byte) []byte {
if b == nil {
return nil
}
return append([]byte{}, b...)
}
// CutPrefix returns s without the provided leading prefix byte slice
// and reports whether it found the prefix.
// If s doesn't start with prefix, CutPrefix returns s, false.
// If prefix is the empty byte slice, CutPrefix returns s, true.
//
// CutPrefix returns slices of the original slice s, not copies.
func CutPrefix(s, prefix []byte) (after []byte, found bool) {
if !HasPrefix(s, prefix) {
return s, false
}
return s[len(prefix):], true
}
// CutSuffix returns s without the provided ending suffix byte slice
// and reports whether it found the suffix.
// If s doesn't end with suffix, CutSuffix returns s, false.
// If suffix is the empty byte slice, CutSuffix returns s, true.
//
// CutSuffix returns slices of the original slice s, not copies.
func CutSuffix(s, suffix []byte) (before []byte, found bool) {
if !HasSuffix(s, suffix) {
return s, false
}
return s[:len(s)-len(suffix)], true
}