Skip to content
Snippets Groups Projects
read.go 15.6 KiB
Newer Older
  • Learn to ignore specific revisions
  • // Copyright 2013 The Go Authors. All rights reserved.
    
    // Use of this source code is governed by a BSD-style
    // license that can be found in the LICENSE file.
    
    // Package goobj implements reading of Go object files and archives.
    //
    // TODO(rsc): Decide where this package should live. (golang.org/issue/6932)
    // TODO(rsc): Decide the appropriate integer types for various fields.
    // TODO(rsc): Write tests. (File format still up in the air a little.)
    package goobj
    
    import (
    	"bufio"
    	"bytes"
    
    )
    
    // A Sym is a named symbol in an object file.
    type Sym struct {
    
    	SymID                // symbol identifier (name and version)
    	Kind  objabi.SymKind // kind of symbol
    	DupOK bool           // are duplicate definitions okay?
    	Size  int            // size of corresponding data
    	Type  SymID          // symbol for Go type information
    	Data  Data           // memory image of symbol
    	Reloc []Reloc        // relocations to apply to Data
    	Func  *Func          // additional data for functions
    
    }
    
    // A SymID - the combination of Name and Version - uniquely identifies
    // a symbol within a package.
    type SymID struct {
    	// Name is the name of a symbol.
    	Name string
    
    	// Version is zero for symbols with global visibility.
    	// Symbols with only file visibility (such as file-level static
    
    Robert Hencke's avatar
    Robert Hencke committed
    	// declarations in C) have a non-zero version distinguishing
    
    	// a symbol in one file from a symbol of the same name
    	// in another file
    	Version int
    }
    
    
    func (s SymID) String() string {
    	if s.Version == 0 {
    		return s.Name
    	}
    	return fmt.Sprintf("%s<%d>", s.Name, s.Version)
    }
    
    
    // A Data is a reference to data stored in an object file.
    // It records the offset and size of the data, so that a client can
    // read the data only if necessary.
    type Data struct {
    	Offset int64
    	Size   int64
    }
    
    // A Reloc describes a relocation applied to a memory image to refer
    // to an address within a particular symbol.
    type Reloc struct {
    
    	// The bytes at [Offset, Offset+Size) within the containing Sym
    
    	// should be updated to refer to the address Add bytes after the start
    	// of the symbol Sym.
    	Offset int
    	Size   int
    	Sym    SymID
    	Add    int
    
    	// The Type records the form of address expected in the bytes
    	// described by the previous fields: absolute, PC-relative, and so on.
    	// TODO(rsc): The interpretation of Type is not exposed by this package.
    
    }
    
    // A Var describes a variable in a function stack frame: a declared
    // local variable, an input argument, or an output result.
    type Var struct {
    	// The combination of Name, Kind, and Offset uniquely
    	// identifies a variable in a function stack frame.
    	// Using fewer of these - in particular, using only Name - does not.
    	Name   string // Name of variable.
    	Kind   int    // TODO(rsc): Define meaning.
    	Offset int    // Frame offset. TODO(rsc): Define meaning.
    
    	Type SymID // Go type for variable.
    }
    
    // Func contains additional per-symbol information specific to functions.
    type Func struct {
    
    Rui Ueyama's avatar
    Rui Ueyama committed
    	Args     int        // size in bytes of argument frame: inputs and outputs
    
    	Frame    int        // size in bytes of local variable frame
    
    	Leaf     bool       // function omits save of link register (ARM)
    
    	NoSplit  bool       // function omits stack split prologue
    
    	Var      []Var      // detail about local variables
    	PCSP     Data       // PC → SP offset map
    	PCFile   Data       // PC → file number map (index into File)
    	PCLine   Data       // PC → line number map
    
    	PCInline Data       // PC → inline tree index map
    
    	PCData   []Data     // PC → runtime support data map
    	FuncData []FuncData // non-PC-specific runtime support data
    	File     []string   // paths indexed by PCFile
    
    	InlTree  []InlinedCall
    
    }
    
    // TODO: Add PCData []byte and PCDataIter (similar to liblink).
    
    // A FuncData is a single function-specific data value.
    type FuncData struct {
    	Sym    SymID // symbol holding data
    	Offset int64 // offset into symbol for funcdata pointer
    }
    
    
    // An InlinedCall is a node in an InlTree.
    // See cmd/internal/obj.InlTree for details.
    type InlinedCall struct {
    	Parent int
    	File   string
    	Line   int
    	Func   SymID
    }
    
    
    // A Package is a parsed Go object file or archive defining a Go package.
    type Package struct {
    	ImportPath string   // import path denoting this package
    	Imports    []string // packages imported by this package
    
    	SymRefs    []SymID  // list of symbol names and versions referred to by this pack
    
    	Syms       []*Sym   // symbols defined by this package
    	MaxVersion int      // maximum Version in any SymID in Syms
    
    	Arch       string   // architecture
    
    }
    
    var (
    	archiveHeader = []byte("!<arch>\n")
    	archiveMagic  = []byte("`\n")
    	goobjHeader   = []byte("go objec") // truncated to size of archiveHeader
    
    	errCorruptArchive   = errors.New("corrupt archive")
    	errTruncatedArchive = errors.New("truncated archive")
    
    Dave Cheney's avatar
    Dave Cheney committed
    	errCorruptObject    = errors.New("corrupt object file")
    	errNotObject        = errors.New("unrecognized object file format")
    
    )
    
    // An objReader is an object file reader.
    type objReader struct {
    
    	p          *Package
    	b          *bufio.Reader
    	f          io.ReadSeeker
    	err        error
    	offset     int64
    	dataOffset int64
    	limit      int64
    	tmp        [256]byte
    	pkgprefix  string
    
    // init initializes r to read package p from f.
    func (r *objReader) init(f io.ReadSeeker, p *Package) {
    	r.f = f
    	r.p = p
    
    	r.offset, _ = f.Seek(0, io.SeekCurrent)
    	r.limit, _ = f.Seek(0, io.SeekEnd)
    	f.Seek(r.offset, io.SeekStart)
    
    	r.b = bufio.NewReader(f)
    
    	r.pkgprefix = objabi.PathToPrefix(p.ImportPath) + "."
    
    }
    
    // error records that an error occurred.
    // It returns only the first error, so that an error
    // caused by an earlier error does not discard information
    // about the earlier error.
    func (r *objReader) error(err error) error {
    	if r.err == nil {
    		if err == io.EOF {
    			err = io.ErrUnexpectedEOF
    		}
    		r.err = err
    	}
    	// panic("corrupt") // useful for debugging
    	return r.err
    }
    
    // readByte reads and returns a byte from the input file.
    // On I/O error or EOF, it records the error but returns byte 0.
    // A sequence of 0 bytes will eventually terminate any
    // parsing state in the object file. In particular, it ends the
    // reading of a varint.
    func (r *objReader) readByte() byte {
    	if r.err != nil {
    		return 0
    	}
    	if r.offset >= r.limit {
    		r.error(io.ErrUnexpectedEOF)
    		return 0
    	}
    	b, err := r.b.ReadByte()
    	if err != nil {
    		if err == io.EOF {
    			err = io.ErrUnexpectedEOF
    		}
    		r.error(err)
    		b = 0
    	} else {
    		r.offset++
    	}
    	return b
    }
    
    // read reads exactly len(b) bytes from the input file.
    // If an error occurs, read returns the error but also
    // records it, so it is safe for callers to ignore the result
    // as long as delaying the report is not a problem.
    func (r *objReader) readFull(b []byte) error {
    	if r.err != nil {
    		return r.err
    	}
    	if r.offset+int64(len(b)) > r.limit {
    		return r.error(io.ErrUnexpectedEOF)
    	}
    	n, err := io.ReadFull(r.b, b)
    	r.offset += int64(n)
    	if err != nil {
    		return r.error(err)
    	}
    	return nil
    }
    
    // readInt reads a zigzag varint from the input file.
    func (r *objReader) readInt() int {
    	var u uint64
    
    	for shift := uint(0); ; shift += 7 {
    		if shift >= 64 {
    			r.error(errCorruptObject)
    			return 0
    		}
    		c := r.readByte()
    		u |= uint64(c&0x7F) << shift
    		if c&0x80 == 0 {
    			break
    		}
    	}
    
    	v := int64(u>>1) ^ (int64(u) << 63 >> 63)
    	if int64(int(v)) != v {
    		r.error(errCorruptObject) // TODO
    		return 0
    	}
    	return int(v)
    }
    
    // readString reads a length-delimited string from the input file.
    func (r *objReader) readString() string {
    	n := r.readInt()
    	buf := make([]byte, n)
    	r.readFull(buf)
    	return string(buf)
    }
    
    // readSymID reads a SymID from the input file.
    func (r *objReader) readSymID() SymID {
    
    	i := r.readInt()
    	return r.p.SymRefs[i]
    }
    
    func (r *objReader) readRef() {
    
    	name, vers := r.readString(), r.readInt()
    
    
    	// In a symbol name in an object file, "". denotes the
    	// prefix for the package in which the object file has been found.
    	// Expand it.
    	name = strings.Replace(name, `"".`, r.pkgprefix, -1)
    
    
    	// An individual object file only records version 0 (extern) or 1 (static).
    	// To make static symbols unique across all files being read, we
    	// replace version 1 with the version corresponding to the current
    	// file number. The number is incremented on each call to parseObject.
    	if vers != 0 {
    		vers = r.p.MaxVersion
    	}
    
    	r.p.SymRefs = append(r.p.SymRefs, SymID{name, vers})
    
    }
    
    // readData reads a data reference from the input file.
    func (r *objReader) readData() Data {
    	n := r.readInt()
    
    	d := Data{Offset: r.dataOffset, Size: int64(n)}
    	r.dataOffset += int64(n)
    
    	return d
    }
    
    // skip skips n bytes in the input.
    func (r *objReader) skip(n int64) {
    	if n < 0 {
    		r.error(fmt.Errorf("debug/goobj: internal error: misuse of skip"))
    	}
    	if n < int64(len(r.tmp)) {
    		// Since the data is so small, a just reading from the buffered
    		// reader is better than flushing the buffer and seeking.
    		r.readFull(r.tmp[:n])
    	} else if n <= int64(r.b.Buffered()) {
    		// Even though the data is not small, it has already been read.
    		// Advance the buffer instead of seeking.
    		for n > int64(len(r.tmp)) {
    			r.readFull(r.tmp[:])
    			n -= int64(len(r.tmp))
    		}
    		r.readFull(r.tmp[:n])
    	} else {
    		// Seek, giving up buffered data.
    
    		_, err := r.f.Seek(r.offset+n, io.SeekStart)
    
    		if err != nil {
    			r.error(err)
    		}
    		r.offset += n
    		r.b.Reset(r.f)
    	}
    }
    
    // Parse parses an object file or archive from r,
    // assuming that its import path is pkgpath.
    func Parse(r io.ReadSeeker, pkgpath string) (*Package, error) {
    
    	if pkgpath == "" {
    		pkgpath = `""`
    	}
    
    	p := new(Package)
    	p.ImportPath = pkgpath
    
    	var rd objReader
    	rd.init(r, p)
    	err := rd.readFull(rd.tmp[:8])
    	if err != nil {
    		if err == io.EOF {
    			err = io.ErrUnexpectedEOF
    		}
    		return nil, err
    	}
    
    	switch {
    	default:
    		return nil, errNotObject
    
    	case bytes.Equal(rd.tmp[:8], archiveHeader):
    		if err := rd.parseArchive(); err != nil {
    			return nil, err
    		}
    	case bytes.Equal(rd.tmp[:8], goobjHeader):
    		if err := rd.parseObject(goobjHeader); err != nil {
    			return nil, err
    		}
    	}
    
    	return p, nil
    }
    
    // trimSpace removes trailing spaces from b and returns the corresponding string.
    // This effectively parses the form used in archive headers.
    func trimSpace(b []byte) string {
    	return string(bytes.TrimRight(b, " "))
    }
    
    // parseArchive parses a Unix archive of Go object files.
    // TODO(rsc): Need to skip non-Go object files.
    // TODO(rsc): Maybe record table of contents in r.p so that
    // linker can avoid having code to parse archives too.
    func (r *objReader) parseArchive() error {
    	for r.offset < r.limit {
    		if err := r.readFull(r.tmp[:60]); err != nil {
    			return err
    		}
    		data := r.tmp[:60]
    
    		// Each file is preceded by this text header (slice indices in first column):
    		//	 0:16	name
    		//	16:28 date
    		//	28:34 uid
    		//	34:40 gid
    		//	40:48 mode
    		//	48:58 size
    		//	58:60 magic - `\n
    		// We only care about name, size, and magic.
    		// The fields are space-padded on the right.
    		// The size is in decimal.
    		// The file data - size bytes - follows the header.
    		// Headers are 2-byte aligned, so if size is odd, an extra padding
    		// byte sits between the file data and the next header.
    		// The file data that follows is padded to an even number of bytes:
    		// if size is odd, an extra padding byte is inserted betw the next header.
    		if len(data) < 60 {
    			return errTruncatedArchive
    		}
    		if !bytes.Equal(data[58:60], archiveMagic) {
    			return errCorruptArchive
    		}
    		name := trimSpace(data[0:16])
    		size, err := strconv.ParseInt(trimSpace(data[48:58]), 10, 64)
    		if err != nil {
    			return errCorruptArchive
    		}
    		data = data[60:]
    		fsize := size + size&1
    		if fsize < 0 || fsize < size {
    			return errCorruptArchive
    		}
    		switch name {
    
    			r.skip(size)
    		default:
    			oldLimit := r.limit
    			r.limit = r.offset + size
    			if err := r.parseObject(nil); err != nil {
    				return fmt.Errorf("parsing archive member %q: %v", name, err)
    			}
    			r.skip(r.limit - r.offset)
    			r.limit = oldLimit
    		}
    		if size&1 != 0 {
    			r.skip(1)
    		}
    	}
    	return nil
    }
    
    // parseObject parses a single Go object file.
    // The prefix is the bytes already read from the file,
    // typically in order to detect that this is an object file.
    // The object file consists of a textual header ending in "\n!\n"
    // and then the part we want to parse begins.
    // The format of that part is defined in a comment at the top
    // of src/liblink/objfile.c.
    func (r *objReader) parseObject(prefix []byte) error {
    	r.p.MaxVersion++
    
    	h := make([]byte, 0, 256)
    	h = append(h, prefix...)
    
    	var c1, c2, c3 byte
    	for {
    		c1, c2, c3 = c2, c3, r.readByte()
    
    		h = append(h, c3)
    
    		// The new export format can contain 0 bytes.
    		// Don't consider them errors, only look for r.err != nil.
    		if r.err != nil {
    
    			return errCorruptObject
    		}
    		if c1 == '\n' && c2 == '!' && c3 == '\n' {
    			break
    		}
    	}
    
    
    	hs := strings.Fields(string(h))
    	if len(hs) >= 4 {
    		r.p.Arch = hs[3]
    	}
    	// TODO: extract OS + build ID if/when we need it
    
    
    	r.readFull(r.tmp[:8])
    
    	if !bytes.Equal(r.tmp[:8], []byte("\x00\x00go19ld")) {
    
    		return r.error(errCorruptObject)
    	}
    
    
    	b := r.readByte()
    	if b != 1 {
    		return r.error(errCorruptObject)
    	}
    
    
    	// Direct package dependencies.
    	for {
    		s := r.readString()
    		if s == "" {
    			break
    		}
    		r.p.Imports = append(r.p.Imports, s)
    	}
    
    
    	r.p.SymRefs = []SymID{{"", 0}}
    	for {
    		if b := r.readByte(); b != 0xfe {
    			if b != 0xff {
    				return r.error(errCorruptObject)
    			}
    			break
    		}
    
    		r.readRef()
    	}
    
    
    	r.readInt() // n relocations - ignore
    	r.readInt() // n pcdata - ignore
    	r.readInt() // n autom - ignore
    	r.readInt() // n funcdata - ignore
    	r.readInt() // n files - ignore
    
    
    	// Symbols.
    	for {
    		if b := r.readByte(); b != 0xfe {
    			if b != 0xff {
    				return r.error(errCorruptObject)
    			}
    			break
    		}
    
    
    		s := &Sym{SymID: r.readSymID()}
    		r.p.Syms = append(r.p.Syms, s)
    
    		s.Kind = objabi.SymKind(typ)
    
    Russ Cox's avatar
    Russ Cox committed
    		flags := r.readInt()
    		s.DupOK = flags&1 != 0
    
    		s.Size = r.readInt()
    		s.Type = r.readSymID()
    		s.Data = r.readData()
    		s.Reloc = make([]Reloc, r.readInt())
    		for i := range s.Reloc {
    			rel := &s.Reloc[i]
    			rel.Offset = r.readInt()
    			rel.Size = r.readInt()
    
    			rel.Type = objabi.RelocType(r.readInt())
    
    			rel.Add = r.readInt()
    			rel.Sym = r.readSymID()
    		}
    
    
    			f := new(Func)
    			s.Func = f
    			f.Args = r.readInt()
    			f.Frame = r.readInt()
    
    Russ Cox's avatar
    Russ Cox committed
    			flags := r.readInt()
    
    			f.Leaf = flags&(1<<0) != 0
    
    			f.NoSplit = r.readInt() != 0
    
    			f.Var = make([]Var, r.readInt())
    			for i := range f.Var {
    				v := &f.Var[i]
    				v.Name = r.readSymID().Name
    				v.Offset = r.readInt()
    				v.Kind = r.readInt()
    				v.Type = r.readSymID()
    			}
    
    			f.PCSP = r.readData()
    			f.PCFile = r.readData()
    			f.PCLine = r.readData()
    
    			f.PCInline = r.readData()
    
    			f.PCData = make([]Data, r.readInt())
    			for i := range f.PCData {
    				f.PCData[i] = r.readData()
    			}
    			f.FuncData = make([]FuncData, r.readInt())
    			for i := range f.FuncData {
    				f.FuncData[i].Sym = r.readSymID()
    			}
    			for i := range f.FuncData {
    				f.FuncData[i].Offset = int64(r.readInt()) // TODO
    			}
    			f.File = make([]string, r.readInt())
    			for i := range f.File {
    				f.File[i] = r.readSymID().Name
    			}
    
    			f.InlTree = make([]InlinedCall, r.readInt())
    			for i := range f.InlTree {
    				f.InlTree[i].Parent = r.readInt()
    				f.InlTree[i].File = r.readSymID().Name
    				f.InlTree[i].Line = r.readInt()
    				f.InlTree[i].Func = r.readSymID()
    			}
    
    	if !bytes.Equal(r.tmp[:7], []byte("\xffgo19ld")) {
    
    		return r.error(errCorruptObject)
    	}
    
    	return nil
    }
    
    
    func (r *Reloc) String(insnOffset uint64) string {
    	delta := r.Offset - int(insnOffset)
    	s := fmt.Sprintf("[%d:%d]%s", delta, delta+r.Size, r.Type)
    	if r.Sym.Name != "" {
    		if r.Add != 0 {
    			return fmt.Sprintf("%s:%s+%d", s, r.Sym.Name, r.Add)
    		}
    		return fmt.Sprintf("%s:%s", s, r.Sym.Name)
    	}
    	if r.Add != 0 {
    		return fmt.Sprintf("%s:%d", s, r.Add)
    	}
    	return s
    }