Skip to content
Snippets Groups Projects
bytes.go 34.1 KiB
Newer Older
  • Learn to ignore specific revisions
  • // Copyright 2009 The Go Authors. All rights reserved.
    // Use of this source code is governed by a BSD-style
    // license that can be found in the LICENSE file.
    
    
    // Package bytes implements functions for the manipulation of byte slices.
    
    // It is analogous to the facilities of the [strings] package.
    
    import (
    
    	"internal/bytealg"
    
    // Equal reports whether a and b
    
    // are the same length and contain the same bytes.
    // A nil argument is equivalent to an empty slice.
    func Equal(a, b []byte) bool {
    
    	// Neither cmd/compile nor gccgo allocates for these string conversions.
    	return string(a) == string(b)
    
    // Compare returns an integer comparing two byte slices lexicographically.
    
    // The result will be 0 if a == b, -1 if a < b, and +1 if a > b.
    
    // A nil argument is equivalent to an empty slice.
    func Compare(a, b []byte) int {
    	return bytealg.Compare(a, b)
    }
    
    
    // explode splits s into a slice of UTF-8 sequences, one per Unicode code point (still slices of bytes),
    
    // up to a maximum of n byte slices. Invalid UTF-8 sequences are chopped into individual bytes.
    
    func explode(s []byte, n int) [][]byte {
    
    	if n <= 0 || n > len(s) {
    
    	a := make([][]byte, n)
    	var size int
    	na := 0
    
    		_, size = utf8.DecodeRune(s)
    
    // Count counts the number of non-overlapping instances of sep in s.
    // If sep is an empty slice, Count returns 1 + the number of UTF-8-encoded code points in s.
    func Count(s, sep []byte) int {
    
    Ilya Tocar's avatar
    Ilya Tocar committed
    	// special case
    	if len(sep) == 0 {
    
    	if len(sep) == 1 {
    		return bytealg.Count(s, sep[0])
    	}
    
    Ilya Tocar's avatar
    Ilya Tocar committed
    	for {
    		i := Index(s, sep)
    		if i == -1 {
    			return n
    
    Ilya Tocar's avatar
    Ilya Tocar committed
    		n++
    		s = s[i+len(sep):]
    
    // Contains reports whether subslice is within b.
    
    func Contains(b, subslice []byte) bool {
    
    	return Index(b, subslice) != -1
    }
    
    
    // ContainsAny reports whether any of the UTF-8-encoded code points in chars are within b.
    
    Rob Pike's avatar
    Rob Pike committed
    func ContainsAny(b []byte, chars string) bool {
    	return IndexAny(b, chars) >= 0
    }
    
    
    // ContainsRune reports whether the rune is contained in the UTF-8-encoded byte slice b.
    
    Emmanuel Odeke's avatar
    Emmanuel Odeke committed
    func ContainsRune(b []byte, r rune) bool {
    	return IndexRune(b, r) >= 0
    }
    
    
    // ContainsFunc reports whether any of the UTF-8-encoded code points r within b satisfy f(r).
    func ContainsFunc(b []byte, f func(rune) bool) bool {
    	return IndexFunc(b, f) >= 0
    }
    
    
    // IndexByte returns the index of the first instance of c in b, or -1 if c is not present in b.
    func IndexByte(b []byte, c byte) int {
    	return bytealg.IndexByte(b, c)
    }
    
    
    Russ Cox's avatar
    Russ Cox committed
    func indexBytePortable(s []byte, c byte) int {
    
    	for i, b := range s {
    		if b == c {
    			return i
    		}
    	}
    
    // LastIndex returns the index of the last instance of sep in s, or -1 if sep is not present in s.
    func LastIndex(s, sep []byte) int {
    
    		return bytealg.LastIndexByte(s, sep[0])
    
    	case n == len(s):
    		if Equal(s, sep) {
    			return 0
    		}
    		return -1
    	case n > len(s):
    		return -1
    
    	// Rabin-Karp search from the end of the string
    
    	hashss, pow := bytealg.HashStrRev(sep)
    
    	last := len(s) - n
    	var h uint32
    	for i := len(s) - 1; i >= last; i-- {
    
    		h = h*bytealg.PrimeRK + uint32(s[i])
    
    	}
    	if h == hashss && Equal(s[last:], sep) {
    		return last
    	}
    	for i := last - 1; i >= 0; i-- {
    
    		h += uint32(s[i])
    		h -= pow * uint32(s[i+n])
    		if h == hashss && Equal(s[i:i+n], sep) {
    
    // LastIndexByte returns the index of the last instance of c in s, or -1 if c is not present in s.
    func LastIndexByte(s []byte, c byte) int {
    
    	return bytealg.LastIndexByte(s, c)
    
    // IndexRune interprets s as a sequence of UTF-8-encoded code points.
    
    // It returns the byte index of the first occurrence in s of the given rune.
    // It returns -1 if rune is not present in s.
    
    // If r is utf8.RuneError, it returns the first instance of any
    // invalid UTF-8 byte sequence.
    
    Russ Cox's avatar
    Russ Cox committed
    func IndexRune(s []byte, r rune) int {
    
    	switch {
    	case 0 <= r && r < utf8.RuneSelf:
    
    		return IndexByte(s, byte(r))
    
    	case r == utf8.RuneError:
    		for i := 0; i < len(s); {
    			r1, n := utf8.DecodeRune(s[i:])
    			if r1 == utf8.RuneError {
    				return i
    			}
    			i += n
    		}
    		return -1
    	case !utf8.ValidRune(r):
    		return -1
    	default:
    		var b [utf8.UTFMax]byte
    		n := utf8.EncodeRune(b[:], r)
    		return Index(s, b[:n])
    
    // IndexAny interprets s as a sequence of UTF-8-encoded Unicode code points.
    
    // It returns the byte index of the first occurrence in s of any of the Unicode
    
    // code points in chars. It returns -1 if chars is empty or if there is no code
    
    // point in common.
    func IndexAny(s []byte, chars string) int {
    
    	if chars == "" {
    		// Avoid scanning all of s.
    		return -1
    	}
    
    	if len(s) == 1 {
    		r := rune(s[0])
    		if r >= utf8.RuneSelf {
    			// search utf8.RuneError.
    			for _, r = range chars {
    				if r == utf8.RuneError {
    					return 0
    				}
    			}
    			return -1
    		}
    		if bytealg.IndexByteString(chars, s[0]) >= 0 {
    			return 0
    		}
    		return -1
    	}
    	if len(chars) == 1 {
    		r := rune(chars[0])
    		if r >= utf8.RuneSelf {
    			r = utf8.RuneError
    		}
    		return IndexRune(s, r)
    	}
    
    	if len(s) > 8 {
    		if as, isASCII := makeASCIISet(chars); isASCII {
    			for i, c := range s {
    				if as.contains(c) {
    					return i
    
    	}
    	var width int
    	for i := 0; i < len(s); i += width {
    		r := rune(s[i])
    		if r < utf8.RuneSelf {
    
    			if bytealg.IndexByteString(chars, s[i]) >= 0 {
    				return i
    			}
    
    		r, width = utf8.DecodeRune(s[i:])
    
    		if r != utf8.RuneError {
    			// r is 2 to 4 bytes
    			if len(chars) == width {
    				if chars == string(r) {
    
    				continue
    			}
    			// Use bytealg.IndexString for performance if available.
    			if bytealg.MaxLen >= width {
    				if bytealg.IndexString(chars, string(r)) >= 0 {
    					return i
    				}
    				continue
    
    		for _, ch := range chars {
    			if r == ch {
    				return i
    			}
    
    Benny Siegert's avatar
    Benny Siegert committed
    // LastIndexAny interprets s as a sequence of UTF-8-encoded Unicode code
    
    // points. It returns the byte index of the last occurrence in s of any of
    // the Unicode code points in chars. It returns -1 if chars is empty or if
    
    Benny Siegert's avatar
    Benny Siegert committed
    // there is no code point in common.
    func LastIndexAny(s []byte, chars string) int {
    
    	if chars == "" {
    		// Avoid scanning all of s.
    		return -1
    	}
    
    	if len(s) > 8 {
    		if as, isASCII := makeASCIISet(chars); isASCII {
    			for i := len(s) - 1; i >= 0; i-- {
    				if as.contains(s[i]) {
    					return i
    
    	if len(s) == 1 {
    		r := rune(s[0])
    		if r >= utf8.RuneSelf {
    			for _, r = range chars {
    				if r == utf8.RuneError {
    					return 0
    				}
    			}
    			return -1
    		}
    		if bytealg.IndexByteString(chars, s[0]) >= 0 {
    			return 0
    		}
    		return -1
    	}
    	if len(chars) == 1 {
    		cr := rune(chars[0])
    		if cr >= utf8.RuneSelf {
    			cr = utf8.RuneError
    		}
    		for i := len(s); i > 0; {
    			r, size := utf8.DecodeLastRune(s[:i])
    			i -= size
    			if r == cr {
    				return i
    			}
    		}
    		return -1
    	}
    
    		r := rune(s[i-1])
    		if r < utf8.RuneSelf {
    			if bytealg.IndexByteString(chars, s[i-1]) >= 0 {
    				return i - 1
    			}
    			i--
    			continue
    		}
    
    		r, size := utf8.DecodeLastRune(s[:i])
    		i -= size
    
    		if r != utf8.RuneError {
    			// r is 2 to 4 bytes
    			if len(chars) == size {
    				if chars == string(r) {
    
    				continue
    			}
    			// Use bytealg.IndexString for performance if available.
    			if bytealg.MaxLen >= size {
    				if bytealg.IndexString(chars, string(r)) >= 0 {
    					return i
    				}
    				continue
    
    		for _, ch := range chars {
    			if r == ch {
    				return i
    			}
    
    // Generic split: splits after each instance of sep,
    
    // including sepSave bytes of sep in the subslices.
    
    func genSplit(s, sep []byte, sepSave, n int) [][]byte {
    
    	if len(sep) == 0 {
    
    	if n > len(s)+1 {
    		n = len(s) + 1
    	}
    
    	a := make([][]byte, n)
    
    	n--
    	i := 0
    	for i < n {
    		m := Index(s, sep)
    		if m < 0 {
    			break
    
    		a[i] = s[: m+sepSave : m+sepSave]
    
    		s = s[m+len(sep):]
    		i++
    
    	a[i] = s
    	return a[:i+1]
    
    // SplitN slices s into subslices separated by sep and returns a slice of
    
    // the subslices between those separators.
    
    // If sep is empty, SplitN splits after each UTF-8 sequence.
    
    // The count determines the number of subslices to return:
    
    Russ Cox's avatar
    Russ Cox committed
    //
    //	n > 0: at most n subslices; the last subslice will be the unsplit remainder.
    //	n == 0: the result is nil (zero subslices)
    //	n < 0: all subslices
    
    //
    // To split around the first instance of a separator, see Cut.
    
    func SplitN(s, sep []byte, n int) [][]byte { return genSplit(s, sep, 0, n) }
    
    // SplitAfterN slices s into subslices after each instance of sep and
    
    // returns a slice of those subslices.
    
    // If sep is empty, SplitAfterN splits after each UTF-8 sequence.
    
    // The count determines the number of subslices to return:
    
    Russ Cox's avatar
    Russ Cox committed
    //
    //	n > 0: at most n subslices; the last subslice will be the unsplit remainder.
    //	n == 0: the result is nil (zero subslices)
    //	n < 0: all subslices
    
    func SplitAfterN(s, sep []byte, n int) [][]byte {
    
    	return genSplit(s, sep, len(sep), n)
    
    // Split slices s into all subslices separated by sep and returns a slice of
    // the subslices between those separators.
    // If sep is empty, Split splits after each UTF-8 sequence.
    // It is equivalent to SplitN with a count of -1.
    
    //
    // To split around the first instance of a separator, see Cut.
    
    func Split(s, sep []byte) [][]byte { return genSplit(s, sep, 0, -1) }
    
    // SplitAfter slices s into all subslices after each instance of sep and
    // returns a slice of those subslices.
    // If sep is empty, SplitAfter splits after each UTF-8 sequence.
    // It is equivalent to SplitAfterN with a count of -1.
    func SplitAfter(s, sep []byte) [][]byte {
    	return genSplit(s, sep, len(sep), -1)
    }
    
    
    var asciiSpace = [256]uint8{'\t': 1, '\n': 1, '\v': 1, '\f': 1, '\r': 1, ' ': 1}
    
    
    // Fields interprets s as a sequence of UTF-8-encoded code points.
    // It splits the slice s around each instance of one or more consecutive white space
    
    // characters, as defined by unicode.IsSpace, returning a slice of subslices of s or an
    // empty slice if s contains only white space.
    
    func Fields(s []byte) [][]byte {
    
    	// First count the fields.
    	// This is an exact count if s is ASCII, otherwise it is an approximation.
    	n := 0
    	wasSpace := 1
    	// setBits is used to track which bits are set in the bytes of s.
    	setBits := uint8(0)
    	for i := 0; i < len(s); i++ {
    		r := s[i]
    		setBits |= r
    		isSpace := int(asciiSpace[r])
    		n += wasSpace & ^isSpace
    		wasSpace = isSpace
    	}
    
    
    	if setBits >= utf8.RuneSelf {
    		// Some runes in the input slice are not ASCII.
    		return FieldsFunc(s, unicode.IsSpace)
    	}
    
    	// ASCII fast path
    	a := make([][]byte, n)
    	na := 0
    	fieldStart := 0
    	i := 0
    	// Skip spaces in the front of the input.
    	for i < len(s) && asciiSpace[s[i]] != 0 {
    		i++
    	}
    	fieldStart = i
    	for i < len(s) {
    		if asciiSpace[s[i]] == 0 {
    
    		na++
    		i++
    		// Skip spaces in between fields.
    		for i < len(s) && asciiSpace[s[i]] != 0 {
    
    		fieldStart = i
    
    	if fieldStart < len(s) { // Last field might end at EOF.
    
    		a[na] = s[fieldStart:len(s):len(s)]
    
    // FieldsFunc interprets s as a sequence of UTF-8-encoded code points.
    
    // It splits the slice s at each run of code points c satisfying f(c) and
    
    // returns a slice of subslices of s. If all code points in s satisfy f(c), or
    
    // len(s) == 0, an empty slice is returned.
    
    //
    // FieldsFunc makes no guarantees about the order in which it calls f(c)
    // and assumes that f always returns the same value for a given c.
    
    Russ Cox's avatar
    Russ Cox committed
    func FieldsFunc(s []byte, f func(rune) bool) [][]byte {
    
    	// A span is used to record a slice of s of the form s[start:end].
    	// The start index is inclusive and the end index is exclusive.
    	type span struct {
    		start int
    		end   int
    
    	spans := make([]span, 0, 32)
    
    	// Find the field start and end indices.
    
    	// Doing this in a separate pass (rather than slicing the string s
    	// and collecting the result substrings right away) is significantly
    	// more efficient, possibly due to cache effects.
    	start := -1 // valid span start if >= 0
    
    	for i := 0; i < len(s); {
    		size := 1
    		r := rune(s[i])
    		if r >= utf8.RuneSelf {
    			r, size = utf8.DecodeRune(s[i:])
    
    			if start >= 0 {
    				spans = append(spans, span{start, i})
    				start = -1
    
    
    	// Last field might end at EOF.
    
    	if start >= 0 {
    		spans = append(spans, span{start, len(s)})
    
    	}
    
    	// Create subslices from recorded field indices.
    	a := make([][]byte, len(spans))
    	for i, span := range spans {
    
    		a[i] = s[span.start:span.end:span.end]
    
    // Join concatenates the elements of s to create a new byte slice. The separator
    // sep is placed between elements in the resulting slice.
    func Join(s [][]byte, sep []byte) []byte {
    	if len(s) == 0 {
    
    	if len(s) == 1 {
    
    		return append([]byte(nil), s[0]...)
    
    
    	var n int
    	if len(sep) > 0 {
    		if len(sep) >= maxInt/(len(s)-1) {
    			panic("bytes: Join output length overflow")
    		}
    		n += len(sep) * (len(s) - 1)
    	}
    
    	for _, v := range s {
    
    		if len(v) > maxInt-n {
    			panic("bytes: Join output length overflow")
    		}
    
    		n += len(v)
    
    	b := bytealg.MakeNoZero(n)
    
    	bp := copy(b, s[0])
    	for _, v := range s[1:] {
    
    Evan Shaw's avatar
    Evan Shaw committed
    		bp += copy(b[bp:], sep)
    
    		bp += copy(b[bp:], v)
    
    // HasPrefix reports whether the byte slice s begins with prefix.
    
    func HasPrefix(s, prefix []byte) bool {
    
    	return len(s) >= len(prefix) && Equal(s[0:len(prefix)], prefix)
    
    // HasSuffix reports whether the byte slice s ends with suffix.
    
    func HasSuffix(s, suffix []byte) bool {
    
    	return len(s) >= len(suffix) && Equal(s[len(s)-len(suffix):], suffix)
    
    // Map returns a copy of the byte slice s with all its characters modified
    
    // according to the mapping function. If mapping returns a negative value, the character is
    
    // dropped from the byte slice with no replacement. The characters in s and the
    
    // output are interpreted as UTF-8-encoded code points.
    
    Russ Cox's avatar
    Russ Cox committed
    func Map(mapping func(r rune) rune, s []byte) []byte {
    
    	// In the worst case, the slice can grow when mapped, making
    
    	// things unpleasant. But it's so rare we barge in assuming it's
    	// fine. It could also shrink but that falls out naturally.
    
    	b := make([]byte, 0, len(s))
    
    Russ Cox's avatar
    Russ Cox committed
    	for i := 0; i < len(s); {
    
    Russ Cox's avatar
    Russ Cox committed
    		r := rune(s[i])
    		if r >= utf8.RuneSelf {
    			r, wid = utf8.DecodeRune(s[i:])
    
    Russ Cox's avatar
    Russ Cox committed
    		r = mapping(r)
    		if r >= 0 {
    
    			b = utf8.AppendRune(b, r)
    
    	return b
    
    // Repeat returns a new byte slice consisting of count copies of b.
    
    // It panics if count is negative or if the result of (len(b) * count)
    // overflows.
    
    func Repeat(b []byte, count int) []byte {
    
    	if count == 0 {
    		return []byte{}
    	}
    
    	// Since we cannot return an error on overflow,
    
    	// we should panic if the repeat will generate an overflow.
    
    	// See golang.org/issue/16237.
    
    	if count < 0 {
    		panic("bytes: negative Repeat count")
    	}
    
    	if len(b) >= maxInt/count {
    		panic("bytes: Repeat output length overflow")
    	}
    	n := len(b) * count
    
    	if len(b) == 0 {
    		return []byte{}
    	}
    
    	// Past a certain chunk size it is counterproductive to use
    	// larger chunks as the source of the write, as when the source
    	// is too large we are basically just thrashing the CPU D-cache.
    	// So if the result length is larger than an empirically-found
    	// limit (8KB), we stop growing the source string once the limit
    	// is reached and keep reusing the same source string - that
    	// should therefore be always resident in the L1 cache - until we
    	// have completed the construction of the result.
    	// This yields significant speedups (up to +100%) in cases where
    	// the result length is large (roughly, over L2 cache size).
    	const chunkLimit = 8 * 1024
    	chunkMax := n
    	if chunkMax > chunkLimit {
    		chunkMax = chunkLimit / len(b) * len(b)
    		if chunkMax == 0 {
    			chunkMax = len(b)
    		}
    	}
    
    	nb := bytealg.MakeNoZero(n)
    
    	bp := copy(nb, b)
    
    		chunk := bp
    		if chunk > chunkMax {
    			chunk = chunkMax
    		}
    		bp += copy(nb[bp:], nb[:chunk])
    
    // ToUpper returns a copy of the byte slice s with all Unicode letters mapped to
    // their upper case.
    func ToUpper(s []byte) []byte {
    	isASCII, hasLower := true, false
    	for i := 0; i < len(s); i++ {
    		c := s[i]
    		if c >= utf8.RuneSelf {
    			isASCII = false
    			break
    		}
    		hasLower = hasLower || ('a' <= c && c <= 'z')
    	}
    
    	if isASCII { // optimize for ASCII-only byte slices.
    		if !hasLower {
    			// Just return a copy.
    			return append([]byte(""), s...)
    		}
    
    		b := bytealg.MakeNoZero(len(s))
    
    		for i := 0; i < len(s); i++ {
    			c := s[i]
    			if 'a' <= c && c <= 'z' {
    				c -= 'a' - 'A'
    			}
    			b[i] = c
    		}
    		return b
    	}
    	return Map(unicode.ToUpper, s)
    }
    
    // ToLower returns a copy of the byte slice s with all Unicode letters mapped to
    // their lower case.
    func ToLower(s []byte) []byte {
    	isASCII, hasUpper := true, false
    	for i := 0; i < len(s); i++ {
    		c := s[i]
    		if c >= utf8.RuneSelf {
    			isASCII = false
    			break
    		}
    		hasUpper = hasUpper || ('A' <= c && c <= 'Z')
    	}
    
    	if isASCII { // optimize for ASCII-only byte slices.
    		if !hasUpper {
    			return append([]byte(""), s...)
    		}
    
    		b := bytealg.MakeNoZero(len(s))
    
    		for i := 0; i < len(s); i++ {
    			c := s[i]
    			if 'A' <= c && c <= 'Z' {
    				c += 'a' - 'A'
    			}
    			b[i] = c
    		}
    		return b
    	}
    	return Map(unicode.ToLower, s)
    }
    
    // ToTitle treats s as UTF-8-encoded bytes and returns a copy with all the Unicode letters mapped to their title case.
    
    func ToTitle(s []byte) []byte { return Map(unicode.ToTitle, s) }
    
    // ToUpperSpecial treats s as UTF-8-encoded bytes and returns a copy with all the Unicode letters mapped to their
    
    // upper case, giving priority to the special casing rules.
    
    func ToUpperSpecial(c unicode.SpecialCase, s []byte) []byte {
    
    	return Map(c.ToUpper, s)
    
    // ToLowerSpecial treats s as UTF-8-encoded bytes and returns a copy with all the Unicode letters mapped to their
    
    // lower case, giving priority to the special casing rules.
    
    func ToLowerSpecial(c unicode.SpecialCase, s []byte) []byte {
    
    	return Map(c.ToLower, s)
    
    // ToTitleSpecial treats s as UTF-8-encoded bytes and returns a copy with all the Unicode letters mapped to their
    
    // title case, giving priority to the special casing rules.
    
    func ToTitleSpecial(c unicode.SpecialCase, s []byte) []byte {
    
    	return Map(c.ToTitle, s)
    
    // ToValidUTF8 treats s as UTF-8-encoded bytes and returns a copy with each run of bytes
    // representing invalid UTF-8 replaced with the bytes in replacement, which may be empty.
    func ToValidUTF8(s, replacement []byte) []byte {
    	b := make([]byte, 0, len(s)+len(replacement))
    	invalid := false // previous byte was from an invalid UTF-8 sequence
    	for i := 0; i < len(s); {
    		c := s[i]
    		if c < utf8.RuneSelf {
    			i++
    			invalid = false
    
    			continue
    		}
    		_, wid := utf8.DecodeRune(s[i:])
    		if wid == 1 {
    			i++
    			if !invalid {
    				invalid = true
    				b = append(b, replacement...)
    			}
    			continue
    		}
    		invalid = false
    		b = append(b, s[i:i+wid]...)
    		i += wid
    	}
    	return b
    }
    
    
    Rob Pike's avatar
    Rob Pike committed
    // isSeparator reports whether the rune could mark a word boundary.
    // TODO: update when package unicode captures more of the properties.
    
    Russ Cox's avatar
    Russ Cox committed
    func isSeparator(r rune) bool {
    
    Rob Pike's avatar
    Rob Pike committed
    	// ASCII alphanumerics and underscore are not separators
    
    Russ Cox's avatar
    Russ Cox committed
    	if r <= 0x7F {
    
    Rob Pike's avatar
    Rob Pike committed
    		switch {
    
    Russ Cox's avatar
    Russ Cox committed
    		case '0' <= r && r <= '9':
    
    Rob Pike's avatar
    Rob Pike committed
    			return false
    
    Russ Cox's avatar
    Russ Cox committed
    		case 'a' <= r && r <= 'z':
    
    Rob Pike's avatar
    Rob Pike committed
    			return false
    
    Russ Cox's avatar
    Russ Cox committed
    		case 'A' <= r && r <= 'Z':
    
    Rob Pike's avatar
    Rob Pike committed
    			return false
    
    Russ Cox's avatar
    Russ Cox committed
    		case r == '_':
    
    Rob Pike's avatar
    Rob Pike committed
    			return false
    		}
    		return true
    	}
    	// Letters and digits are not separators
    
    Russ Cox's avatar
    Russ Cox committed
    	if unicode.IsLetter(r) || unicode.IsDigit(r) {
    
    Rob Pike's avatar
    Rob Pike committed
    		return false
    	}
    	// Otherwise, all we can do for now is treat spaces as separators.
    
    Russ Cox's avatar
    Russ Cox committed
    	return unicode.IsSpace(r)
    
    Rob Pike's avatar
    Rob Pike committed
    }
    
    
    // Title treats s as UTF-8-encoded bytes and returns a copy with all Unicode letters that begin
    // words mapped to their title case.
    
    // Deprecated: The rule Title uses for word boundaries does not handle Unicode
    // punctuation properly. Use golang.org/x/text/cases instead.
    
    Rob Pike's avatar
    Rob Pike committed
    func Title(s []byte) []byte {
    	// Use a closure here to remember state.
    	// Hackish but effective. Depends on Map scanning in order and calling
    	// the closure once per rune.
    
    	prev := ' '
    
    Rob Pike's avatar
    Rob Pike committed
    	return Map(
    
    Russ Cox's avatar
    Russ Cox committed
    		func(r rune) rune {
    
    Rob Pike's avatar
    Rob Pike committed
    			if isSeparator(prev) {
    				prev = r
    				return unicode.ToTitle(r)
    			}
    			prev = r
    			return r
    		},
    		s)
    }
    
    
    // TrimLeftFunc treats s as UTF-8-encoded bytes and returns a subslice of s by slicing off
    // all leading UTF-8-encoded code points c that satisfy f(c).
    
    Russ Cox's avatar
    Russ Cox committed
    func TrimLeftFunc(s []byte, f func(r rune) bool) []byte {
    
    	i := indexFunc(s, f, false)
    	if i == -1 {
    		return nil
    	}
    	return s[i:]
    }
    
    
    // TrimRightFunc returns a subslice of s by slicing off all trailing
    // UTF-8-encoded code points c that satisfy f(c).
    
    Russ Cox's avatar
    Russ Cox committed
    func TrimRightFunc(s []byte, f func(r rune) bool) []byte {
    
    	i := lastIndexFunc(s, f, false)
    	if i >= 0 && s[i] >= utf8.RuneSelf {
    		_, wid := utf8.DecodeRune(s[i:])
    		i += wid
    	} else {
    		i++
    	}
    	return s[0:i]
    }
    
    // TrimFunc returns a subslice of s by slicing off all leading and trailing
    
    // UTF-8-encoded code points c that satisfy f(c).
    
    Russ Cox's avatar
    Russ Cox committed
    func TrimFunc(s []byte, f func(r rune) bool) []byte {
    
    	return TrimRightFunc(TrimLeftFunc(s, f), f)
    }
    
    
    // TrimPrefix returns s without the provided leading prefix string.
    // If s doesn't start with prefix, s is returned unchanged.
    func TrimPrefix(s, prefix []byte) []byte {
    	if HasPrefix(s, prefix) {
    		return s[len(prefix):]
    	}
    	return s
    }
    
    // TrimSuffix returns s without the provided trailing suffix string.
    // If s doesn't end with suffix, s is returned unchanged.
    func TrimSuffix(s, suffix []byte) []byte {
    	if HasSuffix(s, suffix) {
    		return s[:len(s)-len(suffix)]
    	}
    	return s
    }
    
    
    // IndexFunc interprets s as a sequence of UTF-8-encoded code points.
    
    // It returns the byte index in s of the first Unicode
    // code point satisfying f(c), or -1 if none do.
    
    Russ Cox's avatar
    Russ Cox committed
    func IndexFunc(s []byte, f func(r rune) bool) int {
    
    // LastIndexFunc interprets s as a sequence of UTF-8-encoded code points.
    
    // It returns the byte index in s of the last Unicode
    // code point satisfying f(c), or -1 if none do.
    
    Russ Cox's avatar
    Russ Cox committed
    func LastIndexFunc(s []byte, f func(r rune) bool) int {
    
    	return lastIndexFunc(s, f, true)
    }
    
    // indexFunc is the same as IndexFunc except that if
    // truth==false, the sense of the predicate function is
    // inverted.
    
    Russ Cox's avatar
    Russ Cox committed
    func indexFunc(s []byte, f func(r rune) bool, truth bool) int {
    
    	start := 0
    	for start < len(s) {
    		wid := 1
    
    Russ Cox's avatar
    Russ Cox committed
    		r := rune(s[start])
    		if r >= utf8.RuneSelf {
    			r, wid = utf8.DecodeRune(s[start:])
    
    Russ Cox's avatar
    Russ Cox committed
    		if f(r) == truth {
    
    // lastIndexFunc is the same as LastIndexFunc except that if
    // truth==false, the sense of the predicate function is
    // inverted.
    
    Russ Cox's avatar
    Russ Cox committed
    func lastIndexFunc(s []byte, f func(r rune) bool, truth bool) int {
    
    	for i := len(s); i > 0; {
    
    		r, size := rune(s[i-1]), 1
    		if r >= utf8.RuneSelf {
    			r, size = utf8.DecodeLastRune(s[0:i])
    		}
    
    Russ Cox's avatar
    Russ Cox committed
    		if f(r) == truth {
    
    // asciiSet is a 32-byte value, where each bit represents the presence of a
    // given ASCII character in the set. The 128-bits of the lower 16 bytes,
    // starting with the least-significant bit of the lowest word to the
    // most-significant bit of the highest word, map to the full range of all
    // 128 ASCII characters. The 128-bits of the upper 16 bytes will be zeroed,
    // ensuring that any non-ASCII character will be reported as not in the set.
    
    // This allocates a total of 32 bytes even though the upper half
    // is unused to avoid bounds checks in asciiSet.contains.
    
    type asciiSet [8]uint32
    
    // makeASCIISet creates a set of ASCII characters and reports whether all
    // characters in chars are ASCII.
    func makeASCIISet(chars string) (as asciiSet, ok bool) {
    	for i := 0; i < len(chars); i++ {
    		c := chars[i]
    		if c >= utf8.RuneSelf {
    			return as, false
    		}
    
    	}
    	return as, true
    }
    
    // contains reports whether c is inside the set.
    func (as *asciiSet) contains(c byte) bool {
    
    	return (as[c/32] & (1 << (c % 32))) != 0
    
    // containsRune is a simplified version of strings.ContainsRune
    // to avoid importing the strings package.
    // We avoid bytes.ContainsRune to avoid allocating a temporary copy of s.
    func containsRune(s string, r rune) bool {
    	for _, c := range s {
    		if c == r {
    			return true
    
    }
    
    // Trim returns a subslice of s by slicing off all leading and
    
    // trailing UTF-8-encoded code points contained in cutset.
    
    func Trim(s []byte, cutset string) []byte {
    
    	if len(s) == 0 {
    		// This is what we've historically done.
    		return nil
    	}
    	if cutset == "" {
    
    	if len(cutset) == 1 && cutset[0] < utf8.RuneSelf {
    		return trimLeftByte(trimRightByte(s, cutset[0]), cutset[0])
    	}
    
    	if as, ok := makeASCIISet(cutset); ok {
    		return trimLeftASCII(trimRightASCII(s, &as), &as)
    	}
    	return trimLeftUnicode(trimRightUnicode(s, cutset), cutset)
    
    }
    
    // TrimLeft returns a subslice of s by slicing off all leading
    
    // UTF-8-encoded code points contained in cutset.
    
    func TrimLeft(s []byte, cutset string) []byte {
    
    	if len(s) == 0 {
    		// This is what we've historically done.
    		return nil
    	}
    	if cutset == "" {
    
    	if len(cutset) == 1 && cutset[0] < utf8.RuneSelf {
    		return trimLeftByte(s, cutset[0])
    	}
    
    	if as, ok := makeASCIISet(cutset); ok {
    		return trimLeftASCII(s, &as)
    	}
    	return trimLeftUnicode(s, cutset)
    
    func trimLeftByte(s []byte, c byte) []byte {
    	for len(s) > 0 && s[0] == c {
    		s = s[1:]
    	}
    
    	if len(s) == 0 {
    		// This is what we've historically done.
    		return nil
    	}
    
    func trimLeftASCII(s []byte, as *asciiSet) []byte {
    	for len(s) > 0 {
    		if !as.contains(s[0]) {
    			break
    		}
    		s = s[1:]
    	}
    
    	if len(s) == 0 {
    		// This is what we've historically done.
    		return nil
    	}