Skip to content
Snippets Groups Projects
exec_unix.go 10.4 KiB
Newer Older
  • Learn to ignore specific revisions
  • // Copyright 2009 The Go Authors. All rights reserved.
    // Use of this source code is governed by a BSD-style
    // license that can be found in the LICENSE file.
    
    // Fork, exec, wait, etc.
    
    package syscall
    
    import (
    
    )
    
    // Lock synchronizing creation of new file descriptors with fork.
    //
    // We want the child in a fork/exec sequence to inherit only the
    // file descriptors we intend.  To do that, we mark all file
    // descriptors close-on-exec and then, in the child, explicitly
    // unmark the ones we want the exec'ed program to keep.
    // Unix doesn't make this easy: there is, in general, no way to
    // allocate a new file descriptor close-on-exec.  Instead you
    // have to allocate the descriptor and then mark it close-on-exec.
    // If a fork happens between those two events, the child's exec
    // will inherit an unwanted file descriptor.
    //
    // This lock solves that race: the create new fd/mark close-on-exec
    // operation is done holding ForkLock for reading, and the fork itself
    // is done holding ForkLock for writing.  At least, that's the idea.
    // There are some complications.
    //
    // Some system calls that create new file descriptors can block
    // for arbitrarily long times: open on a hung NFS server or named
    // pipe, accept on a socket, and so on.  We can't reasonably grab
    // the lock across those operations.
    //
    // It is worse to inherit some file descriptors than others.
    // If a non-malicious child accidentally inherits an open ordinary file,
    // that's not a big deal.  On the other hand, if a long-lived child
    // accidentally inherits the write end of a pipe, then the reader
    // of that pipe will not see EOF until that child exits, potentially
    // causing the parent program to hang.  This is a common problem
    // in threaded C programs that use popen.
    //
    // Luckily, the file descriptors that are most important not to
    // inherit are not the ones that can take an arbitrarily long time
    // to create: pipe returns instantly, and the net package uses
    // non-blocking I/O to accept on a listening socket.
    // The rules for which file descriptor-creating operations use the
    // ForkLock are as follows:
    //
    // 1) Pipe.    Does not block.  Use the ForkLock.
    // 2) Socket.  Does not block.  Use the ForkLock.
    // 3) Accept.  If using non-blocking mode, use the ForkLock.
    //             Otherwise, live with the race.
    // 4) Open.    Can block.  Use O_CLOEXEC if available (Linux).
    //             Otherwise, live with the race.
    // 5) Dup.     Does not block.  Use the ForkLock.
    //             On Linux, could use fcntl F_DUPFD_CLOEXEC
    //             instead of the ForkLock, but only for dup(fd, -1).
    
    var ForkLock sync.RWMutex
    
    // Convert array of string to array
    // of NUL-terminated byte pointer.
    
    func StringSlicePtr(ss []string) []*byte {
    
    	bb := make([]*byte, len(ss)+1)
    
    	for i := 0; i < len(ss); i++ {
    
    	bb[len(ss)] = nil
    	return bb
    
    func CloseOnExec(fd int) { fcntl(fd, F_SETFD, FD_CLOEXEC) }
    
    
    func SetNonblock(fd int, nonblocking bool) (errno int) {
    
    	flag, err := fcntl(fd, F_GETFL, 0)
    
    	_, err = fcntl(fd, F_SETFL, flag)
    	return err
    
    // Fork, dup fd onto 0..len(fd), and exec(argv0, argvv, envv) in child.
    
    // If a dup or exec fails, write the errno int to pipe.
    
    // (Pipe is close-on-exec so if exec succeeds, it will be closed.)
    // In the child, this function must not acquire any locks, because
    // they might have been locked at the time of the fork.  This means
    // no rescheduling, no malloc calls, and no new stack segments.
    // The calls to RawSyscall are okay because they are assembly
    // functions that do not grow the stack.
    
    func forkAndExecInChild(argv0 *byte, argv, envv []*byte, chroot, dir *byte, attr *ProcAttr, pipe int) (pid int, err int) {
    
    	// Declare all variables at top in case any
    	// declarations require heap allocation (e.g., err1).
    
    	var r1, r2, err1 uintptr
    	var nextfd int
    	var i int
    
    	// guard against side effects of shuffling fds below.
    	fd := append([]int(nil), attr.Files...)
    
    
    	darwin := OS == "darwin"
    
    
    	// About to call fork.
    	// No more allocation or calls of non-assembly functions.
    
    	r1, r2, err1 = RawSyscall(SYS_FORK, 0, 0, 0)
    
    	}
    
    	// On Darwin:
    	//	r1 = child pid in both parent and child.
    	//	r2 = 0 in parent, 1 in child.
    	// Convert to normal Unix r1 = 0 in child.
    	if darwin && r2 == 1 {
    
    	}
    
    	if r1 != 0 {
    		// parent; return PID
    
    	}
    
    	// Fork succeeded, now in child.
    
    
    	// Enable tracing if requested.
    
    		_, _, err1 = RawSyscall(SYS_PTRACE, uintptr(PTRACE_TRACEME), 0, 0)
    
    		if err1 != 0 {
    
    	// Session ID
    	if attr.Setsid {
    		_, _, err1 = RawSyscall(SYS_SETSID, 0, 0, 0)
    		if err1 != 0 {
    			goto childerror
    		}
    	}
    
    
    	// Chroot
    	if chroot != nil {
    		_, _, err1 = RawSyscall(SYS_CHROOT, uintptr(unsafe.Pointer(chroot)), 0, 0)
    		if err1 != 0 {
    			goto childerror
    		}
    	}
    
    	// User and groups
    	if attr.Credential != nil {
    		ngroups := uintptr(len(attr.Credential.Groups))
    		groups := uintptr(0)
    		if ngroups > 0 {
    			groups = uintptr(unsafe.Pointer(&attr.Credential.Groups[0]))
    		}
    		_, _, err1 = RawSyscall(SYS_SETGROUPS, ngroups, groups, 0)
    		if err1 != 0 {
    			goto childerror
    		}
    		_, _, err1 = RawSyscall(SYS_SETGID, uintptr(attr.Credential.Gid), 0, 0)
    		if err1 != 0 {
    			goto childerror
    		}
    		_, _, err1 = RawSyscall(SYS_SETUID, uintptr(attr.Credential.Uid), 0, 0)
    		if err1 != 0 {
    			goto childerror
    		}
    	}
    
    
    	// Chdir
    	if dir != nil {
    
    		_, _, err1 = RawSyscall(SYS_CHDIR, uintptr(unsafe.Pointer(dir)), 0, 0)
    
    	// Pass 1: look for fd[i] < i and move those up above len(fd)
    	// so that pass 2 won't stomp on an fd it needs later.
    
    	nextfd = int(len(fd))
    
    	if pipe < nextfd {
    
    		_, _, err1 = RawSyscall(SYS_DUP2, uintptr(pipe), uintptr(nextfd), 0)
    
    		RawSyscall(SYS_FCNTL, uintptr(nextfd), F_SETFD, FD_CLOEXEC)
    		pipe = nextfd
    		nextfd++
    
    	}
    	for i = 0; i < len(fd); i++ {
    
    		if fd[i] >= 0 && fd[i] < int(i) {
    
    			_, _, err1 = RawSyscall(SYS_DUP2, uintptr(fd[i]), uintptr(nextfd), 0)
    
    			RawSyscall(SYS_FCNTL, uintptr(nextfd), F_SETFD, FD_CLOEXEC)
    			fd[i] = nextfd
    			nextfd++
    			if nextfd == pipe { // don't stomp on pipe
    
    			}
    		}
    	}
    
    	// Pass 2: dup fd[i] down onto i.
    	for i = 0; i < len(fd); i++ {
    		if fd[i] == -1 {
    
    			RawSyscall(SYS_CLOSE, uintptr(i), 0, 0)
    			continue
    
    			// dup2(i, i) won't clear close-on-exec flag on Linux,
    			// probably not elsewhere either.
    
    			_, _, err1 = RawSyscall(SYS_FCNTL, uintptr(fd[i]), F_SETFD, 0)
    
    		}
    		// The new fd is created NOT close-on-exec,
    		// which is exactly what we want.
    
    		_, _, err1 = RawSyscall(SYS_DUP2, uintptr(fd[i]), uintptr(i), 0)
    
    		}
    	}
    
    	// By convention, we don't close-on-exec the fds we are
    	// started with, so if len(fd) < 3, close 0, 1, 2 as needed.
    	// Programs that know they inherit fds >= 3 will need
    	// to set them close-on-exec.
    	for i = len(fd); i < 3; i++ {
    
    		RawSyscall(SYS_CLOSE, uintptr(i), 0, 0)
    
    Russ Cox's avatar
    Russ Cox committed
    	_, _, err1 = RawSyscall(SYS_EXECVE,
    
    		uintptr(unsafe.Pointer(argv0)),
    		uintptr(unsafe.Pointer(&argv[0])),
    
    		uintptr(unsafe.Pointer(&envv[0])))
    
    
    childerror:
    	// send error code on pipe
    
    	RawSyscall(SYS_WRITE, uintptr(pipe), uintptr(unsafe.Pointer(&err1)), uintptr(unsafe.Sizeof(err1)))
    
    		RawSyscall(SYS_EXIT, 253, 0, 0)
    
    	}
    
    	// Calling panic is not actually safe,
    	// but the for loop above won't break
    	// and this shuts up the compiler.
    
    	panic("unreached")
    
    type Credential struct {
    	Uid    uint32   // User ID.
    	Gid    uint32   // Group ID.
    	Groups []uint32 // Supplementary group IDs.
    }
    
    	Setsid     bool        // Create session.
    	Ptrace     bool        // Enable tracing.
    	Dir        string      // Current working directory.
    	Env        []string    // Environment.
    	Files      []int       // File descriptors.
    	Chroot     string      // Chroot.
    	Credential *Credential // Credential.
    
    }
    
    var zeroAttributes ProcAttr
    
    func forkExec(argv0 string, argv []string, attr *ProcAttr) (pid int, err int) {
    
    	var p [2]int
    	var n int
    	var err1 uintptr
    	var wstatus WaitStatus
    
    
    	// Convert args to C form.
    
    	argv0p := StringBytePtr(argv0)
    
    	argvp := StringSlicePtr(argv)
    	envvp := StringSlicePtr(attr.Env)
    
    	if OS == "freebsd" && len(argv[0]) > len(argv0) {
    		argvp[0] = argv0p
    	}
    
    
    	var chroot *byte
    	if attr.Chroot != "" {
    		chroot = StringBytePtr(attr.Chroot)
    	}
    
    	var dir *byte
    	if attr.Dir != "" {
    		dir = StringBytePtr(attr.Dir)
    	}
    
    
    	// Acquire the fork lock so that no other threads
    	// create new fds that are not yet close-on-exec
    	// before we fork.
    
    
    	// Allocate child status pipe close on exec.
    
    	if err = Pipe(p[0:]); err != 0 {
    
    	if _, err = fcntl(p[0], F_SETFD, FD_CLOEXEC); err != 0 {
    
    	if _, err = fcntl(p[1], F_SETFD, FD_CLOEXEC); err != 0 {
    
    	pid, err = forkAndExecInChild(argv0p, argvp, envvp, chroot, dir, attr, p[1])
    
    	if err != 0 {
    	error:
    		if p[0] >= 0 {
    
    			Close(p[0])
    			Close(p[1])
    
    		ForkLock.Unlock()
    		return 0, err
    
    
    	// Read child error status from pipe.
    
    	Close(p[1])
    	n, err = read(p[0], (*byte)(unsafe.Pointer(&err1)), unsafe.Sizeof(err1))
    	Close(p[0])
    
    	if err != 0 || n != 0 {
    
    		if n == unsafe.Sizeof(err1) {
    
    		}
    
    		// Child failed; wait for it to exit, to make sure
    		// the zombies don't accumulate.
    
    		_, err1 := Wait4(pid, &wstatus, 0, nil)
    
    		for err1 == EINTR {
    
    			_, err1 = Wait4(pid, &wstatus, 0, nil)
    
    	}
    
    	// Read got EOF, so pipe closed on exec, so exec succeeded.
    
    // Combination of fork and exec, careful to be thread safe.
    
    func ForkExec(argv0 string, argv []string, attr *ProcAttr) (pid int, err int) {
    	return forkExec(argv0, argv, attr)
    
    // StartProcess wraps ForkExec for package os.
    func StartProcess(argv0 string, argv []string, attr *ProcAttr) (pid, handle int, err int) {
    	pid, err = forkExec(argv0, argv, attr)
    	return pid, 0, err
    
    // Ordinary exec.
    
    func Exec(argv0 string, argv []string, envv []string) (err int) {
    
    Russ Cox's avatar
    Russ Cox committed
    	_, _, err1 := RawSyscall(SYS_EXECVE,
    
    		uintptr(unsafe.Pointer(StringBytePtr(argv0))),
    
    		uintptr(unsafe.Pointer(&StringSlicePtr(argv)[0])),
    		uintptr(unsafe.Pointer(&StringSlicePtr(envv)[0])))