Skip to content
Snippets Groups Projects
bytes.go 27.8 KiB
Newer Older
  • Learn to ignore specific revisions
  • 				// IndexByte is faster than bytealg.Index, so use it as long as
    				// we're not getting lots of false positives.
    
    				o := IndexByte(s[i:t], c0)
    
    			if s[i+1] == c1 && Equal(s[i:i+n], sep) {
    
    				return i
    			}
    			fails++
    			i++
    			// Switch to bytealg.Index when IndexByte produces too many false positives.
    			if fails > bytealg.Cutover(i) {
    				r := bytealg.Index(s[i:], sep)
    				if r >= 0 {
    					return r + i
    				}
    				return -1
    			}
    		}
    		return -1
    	}
    
    	c0 := sep[0]
    	c1 := sep[1]
    
    	t := len(s) - n + 1
    	for i < t {
    		if s[i] != c0 {
    			o := IndexByte(s[i:t], c0)
    
    		if s[i+1] == c1 && Equal(s[i:i+n], sep) {
    
    		if fails >= 4+i>>4 && i < t {
    
    			// Give up on IndexByte, it isn't skipping ahead
    			// far enough to be better than Rabin-Karp.
    			// Experiments (using IndexPeriodic) suggest
    			// the cutover is about 16 byte skips.
    			// TODO: if large prefixes of sep are matching
    			// we should cutover at even larger average skips,
    			// because Equal becomes that much more expensive.
    			// This code does not take that effect into account.
    			j := indexRabinKarp(s[i:], sep)
    			if j < 0 {
    				return -1
    			}
    			return i + j
    		}
    	}
    	return -1
    }
    
    
    func indexRabinKarp(s, sep []byte) int {
    	// Rabin-Karp search
    	hashsep, pow := hashStr(sep)
    	n := len(sep)
    	var h uint32
    	for i := 0; i < n; i++ {
    		h = h*primeRK + uint32(s[i])
    	}
    	if h == hashsep && Equal(s[:n], sep) {
    		return 0
    	}
    	for i := n; i < len(s); {
    		h *= primeRK
    		h += uint32(s[i])
    		h -= pow * uint32(s[i-n])
    		i++
    		if h == hashsep && Equal(s[i-n:i], sep) {
    			return i - n
    		}
    	}
    	return -1
    }
    
    // primeRK is the prime base used in Rabin-Karp algorithm.
    const primeRK = 16777619
    
    // hashStr returns the hash and the appropriate multiplicative
    // factor for use in Rabin-Karp algorithm.
    func hashStr(sep []byte) (uint32, uint32) {
    	hash := uint32(0)
    	for i := 0; i < len(sep); i++ {
    		hash = hash*primeRK + uint32(sep[i])
    	}
    	var pow, sq uint32 = 1, primeRK
    	for i := len(sep); i > 0; i >>= 1 {
    		if i&1 != 0 {
    			pow *= sq
    		}
    		sq *= sq
    	}
    	return hash, pow
    }
    
    
    // hashStrRev returns the hash of the reverse of sep and the
    // appropriate multiplicative factor for use in Rabin-Karp algorithm.
    func hashStrRev(sep []byte) (uint32, uint32) {
    	hash := uint32(0)
    	for i := len(sep) - 1; i >= 0; i-- {
    		hash = hash*primeRK + uint32(sep[i])
    	}
    	var pow, sq uint32 = 1, primeRK
    	for i := len(sep); i > 0; i >>= 1 {
    		if i&1 != 0 {
    			pow *= sq
    		}
    		sq *= sq
    	}
    	return hash, pow
    }