Skip to content
Snippets Groups Projects
parse.go 54.6 KiB
Newer Older
  • Learn to ignore specific revisions
  • const (
    	// minimum and maximum runes involved in folding.
    	// checked during test.
    	minFold = 0x0041
    
    	maxFold = 0x1e943
    
    )
    
    // appendFoldedRange returns the result of appending the range lo-hi
    // and its case folding-equivalent runes to the class r.
    
    Russ Cox's avatar
    Russ Cox committed
    func appendFoldedRange(r []rune, lo, hi rune) []rune {
    
    	// Optimizations.
    	if lo <= minFold && hi >= maxFold {
    		// Range is full: folding can't add more.
    		return appendRange(r, lo, hi)
    	}
    	if hi < minFold || lo > maxFold {
    		// Range is outside folding possibilities.
    		return appendRange(r, lo, hi)
    	}
    	if lo < minFold {
    		// [lo, minFold-1] needs no folding.
    		r = appendRange(r, lo, minFold-1)
    		lo = minFold
    	}
    	if hi > maxFold {
    		// [maxFold+1, hi] needs no folding.
    		r = appendRange(r, maxFold+1, hi)
    		hi = maxFold
    	}
    
    
    	// Brute force. Depend on appendRange to coalesce ranges on the fly.
    
    	for c := lo; c <= hi; c++ {
    		r = appendRange(r, c, c)
    		f := unicode.SimpleFold(c)
    		for f != c {
    			r = appendRange(r, f, f)
    			f = unicode.SimpleFold(f)
    		}
    	}
    	return r
    }
    
    
    // appendClass returns the result of appending the class x to the class r.
    // It assume x is clean.
    
    Russ Cox's avatar
    Russ Cox committed
    func appendClass(r []rune, x []rune) []rune {
    
    	for i := 0; i < len(x); i += 2 {
    		r = appendRange(r, x[i], x[i+1])
    	}
    	return r
    }
    
    
    // appendFoldedClass returns the result of appending the case folding of the class x to the class r.
    
    Russ Cox's avatar
    Russ Cox committed
    func appendFoldedClass(r []rune, x []rune) []rune {
    
    	for i := 0; i < len(x); i += 2 {
    		r = appendFoldedRange(r, x[i], x[i+1])
    	}
    	return r
    }
    
    
    // appendNegatedClass returns the result of appending the negation of the class x to the class r.
    // It assumes x is clean.
    
    Russ Cox's avatar
    Russ Cox committed
    func appendNegatedClass(r []rune, x []rune) []rune {
    
    	nextLo := '\u0000'
    
    	for i := 0; i < len(x); i += 2 {
    		lo, hi := x[i], x[i+1]
    		if nextLo <= lo-1 {
    			r = appendRange(r, nextLo, lo-1)
    		}
    		nextLo = hi + 1
    	}
    	if nextLo <= unicode.MaxRune {
    		r = appendRange(r, nextLo, unicode.MaxRune)
    	}
    	return r
    }
    
    // appendTable returns the result of appending x to the class r.
    
    Russ Cox's avatar
    Russ Cox committed
    func appendTable(r []rune, x *unicode.RangeTable) []rune {
    
    	for _, xr := range x.R16 {
    
    Russ Cox's avatar
    Russ Cox committed
    		lo, hi, stride := rune(xr.Lo), rune(xr.Hi), rune(xr.Stride)
    
    		if stride == 1 {
    			r = appendRange(r, lo, hi)
    			continue
    		}
    		for c := lo; c <= hi; c += stride {
    			r = appendRange(r, c, c)
    		}
    	}
    	for _, xr := range x.R32 {
    
    Russ Cox's avatar
    Russ Cox committed
    		lo, hi, stride := rune(xr.Lo), rune(xr.Hi), rune(xr.Stride)
    
    		if stride == 1 {
    			r = appendRange(r, lo, hi)
    			continue
    		}
    		for c := lo; c <= hi; c += stride {
    			r = appendRange(r, c, c)
    		}
    	}
    	return r
    }
    
    // appendNegatedTable returns the result of appending the negation of x to the class r.
    
    Russ Cox's avatar
    Russ Cox committed
    func appendNegatedTable(r []rune, x *unicode.RangeTable) []rune {
    
    	nextLo := '\u0000' // lo end of next class to add
    
    	for _, xr := range x.R16 {
    
    Russ Cox's avatar
    Russ Cox committed
    		lo, hi, stride := rune(xr.Lo), rune(xr.Hi), rune(xr.Stride)
    
    		if stride == 1 {
    			if nextLo <= lo-1 {
    				r = appendRange(r, nextLo, lo-1)
    			}
    			nextLo = hi + 1
    			continue
    		}
    		for c := lo; c <= hi; c += stride {
    			if nextLo <= c-1 {
    				r = appendRange(r, nextLo, c-1)
    			}
    			nextLo = c + 1
    		}
    	}
    	for _, xr := range x.R32 {
    
    Russ Cox's avatar
    Russ Cox committed
    		lo, hi, stride := rune(xr.Lo), rune(xr.Hi), rune(xr.Stride)
    
    		if stride == 1 {
    			if nextLo <= lo-1 {
    				r = appendRange(r, nextLo, lo-1)
    			}
    			nextLo = hi + 1
    			continue
    		}
    		for c := lo; c <= hi; c += stride {
    			if nextLo <= c-1 {
    				r = appendRange(r, nextLo, c-1)
    			}
    			nextLo = c + 1
    		}
    	}
    	if nextLo <= unicode.MaxRune {
    		r = appendRange(r, nextLo, unicode.MaxRune)
    	}
    	return r
    }
    
    
    // negateClass overwrites r and returns r's negation.
    // It assumes the class r is already clean.
    
    Russ Cox's avatar
    Russ Cox committed
    func negateClass(r []rune) []rune {
    
    	nextLo := '\u0000' // lo end of next class to add
    	w := 0             // write index
    
    	for i := 0; i < len(r); i += 2 {
    		lo, hi := r[i], r[i+1]
    		if nextLo <= lo-1 {
    			r[w] = nextLo
    			r[w+1] = lo - 1
    			w += 2
    		}
    		nextLo = hi + 1
    	}
    
    	if nextLo <= unicode.MaxRune {
    		// It's possible for the negation to have one more
    		// range - this one - than the original class, so use append.
    
    		r = append(r, nextLo, unicode.MaxRune)
    
    	}
    	return r
    }
    
    // ranges implements sort.Interface on a []rune.
    // The choice of receiver type definition is strange
    // but avoids an allocation since we already have
    
    Russ Cox's avatar
    Russ Cox committed
    // a *[]rune.
    
    type ranges struct {
    
    Russ Cox's avatar
    Russ Cox committed
    	p *[]rune
    
    }
    
    func (ra ranges) Less(i, j int) bool {
    	p := *ra.p
    	i *= 2
    	j *= 2
    	return p[i] < p[j] || p[i] == p[j] && p[i+1] > p[j+1]
    }
    
    func (ra ranges) Len() int {
    	return len(*ra.p) / 2
    }
    
    func (ra ranges) Swap(i, j int) {
    	p := *ra.p
    	i *= 2
    	j *= 2
    	p[i], p[i+1], p[j], p[j+1] = p[j], p[j+1], p[i], p[i+1]
    }
    
    
    func checkUTF8(s string) error {
    
    	for s != "" {
    		rune, size := utf8.DecodeRuneInString(s)
    		if rune == utf8.RuneError && size == 1 {
    			return &Error{Code: ErrInvalidUTF8, Expr: s}
    		}
    		s = s[size:]
    	}
    	return nil
    }
    
    
    func nextRune(s string) (c rune, t string, err error) {
    
    	c, size := utf8.DecodeRuneInString(s)
    	if c == utf8.RuneError && size == 1 {
    		return 0, "", &Error{Code: ErrInvalidUTF8, Expr: s}
    	}
    	return c, s[size:], nil
    }
    
    Russ Cox's avatar
    Russ Cox committed
    func isalnum(c rune) bool {
    
    	return '0' <= c && c <= '9' || 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z'
    }
    
    
    Russ Cox's avatar
    Russ Cox committed
    func unhex(c rune) rune {
    
    	if '0' <= c && c <= '9' {
    		return c - '0'
    	}
    	if 'a' <= c && c <= 'f' {
    		return c - 'a' + 10
    	}
    	if 'A' <= c && c <= 'F' {
    		return c - 'A' + 10
    	}
    	return -1
    }