From 9e7bc80b31088dc62faf4776ffdb1a2e27afa94e Mon Sep 17 00:00:00 2001
From: Tom Thorogood <me+google@tomthorogood.co.uk>
Date: Tue, 16 Mar 2021 12:35:35 +1030
Subject: [PATCH] os: reuse readdir buffers on unix with a sync.Pool
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

(*File).readdir allocates a fixed-size 8KiB buffer on unix systems that
cannot be reused. While it accounts for just a single allocation, it's
more than large enough to show up in profiles and make things quite a
bit slower.

Instead of allocating so often, use a sync.Pool to allow these buffers to
be reused. This has a large impact on readdir heavy workloads.

Package os benchmarks:

name            old time/op    new time/op    delta
Readdirname-12    35.6µs ± 5%    18.1µs ± 4%  -49.00%  (p=0.000 n=10+10)
Readdir-12         142µs ± 1%     121µs ± 0%  -14.87%  (p=0.000 n=10+9)
ReadDir-12        44.0µs ± 6%    28.4µs ± 8%  -35.58%  (p=0.000 n=9+10)

name            old alloc/op   new alloc/op   delta
Readdirname-12    14.4kB ± 0%     6.2kB ± 0%  -57.08%  (p=0.000 n=10+10)
Readdir-12        41.6kB ± 0%    33.4kB ± 0%  -19.77%  (p=0.000 n=10+9)
ReadDir-12        21.9kB ± 0%    13.7kB ± 0%  -37.39%  (p=0.000 n=10+10)

name            old allocs/op  new allocs/op  delta
Readdirname-12       131 ± 0%       130 ± 0%   -0.76%  (p=0.000 n=10+10)
Readdir-12           367 ± 0%       366 ± 0%   -0.27%  (p=0.000 n=10+10)
ReadDir-12           249 ± 0%       248 ± 0%   -0.40%  (p=0.000 n=10+10)

A clunky benchmark I threw together that calls filepath.WalkDir on $GOMODCACHE:

name        old time/op    new time/op    delta
WalkDir-12    91.2ms ±19%    48.7ms ± 0%  -46.54%  (p=0.000 n=10+10)

name        old alloc/op   new alloc/op   delta
WalkDir-12    54.0MB ± 0%     7.6MB ± 0%  -85.92%  (p=0.000 n=8+9)

name        old allocs/op  new allocs/op  delta
WalkDir-12      136k ± 0%      130k ± 0%   -4.15%  (p=0.000 n=8+8)

Change-Id: I00e4d48726da0e46c528ab205409afd03127b844
Reviewed-on: https://go-review.googlesource.com/c/go/+/302169
Reviewed-by: Ian Lance Taylor <iant@golang.org>
Reviewed-by: Emmanuel Odeke <emmanuel@orijtech.com>
Run-TryBot: Emmanuel Odeke <emmanuel@orijtech.com>
TryBot-Result: Go Bot <gobot@golang.org>
---
 src/os/dir_unix.go  | 29 +++++++++++++++++++++--------
 src/os/file_unix.go |  1 +
 2 files changed, 22 insertions(+), 8 deletions(-)

diff --git a/src/os/dir_unix.go b/src/os/dir_unix.go
index ef5c00aee08..5589c9c6825 100644
--- a/src/os/dir_unix.go
+++ b/src/os/dir_unix.go
@@ -10,15 +10,16 @@ package os
 import (
 	"io"
 	"runtime"
+	"sync"
 	"syscall"
 	"unsafe"
 )
 
 // Auxiliary information if the File describes a directory
 type dirInfo struct {
-	buf  []byte // buffer for directory I/O
-	nbuf int    // length of buf; return value from Getdirentries
-	bufp int    // location of next record in buf.
+	buf  *[]byte // buffer for directory I/O
+	nbuf int     // length of buf; return value from Getdirentries
+	bufp int     // location of next record in buf.
 }
 
 const (
@@ -26,14 +27,26 @@ const (
 	blockSize = 8192
 )
 
-func (d *dirInfo) close() {}
+var dirBufPool = sync.Pool{
+	New: func() interface{} {
+		// The buffer must be at least a block long.
+		buf := make([]byte, blockSize)
+		return &buf
+	},
+}
+
+func (d *dirInfo) close() {
+	if d.buf != nil {
+		dirBufPool.Put(d.buf)
+		d.buf = nil
+	}
+}
 
 func (f *File) readdir(n int, mode readdirMode) (names []string, dirents []DirEntry, infos []FileInfo, err error) {
 	// If this file has no dirinfo, create one.
 	if f.dirinfo == nil {
 		f.dirinfo = new(dirInfo)
-		// The buffer must be at least a block long.
-		f.dirinfo.buf = make([]byte, blockSize)
+		f.dirinfo.buf = dirBufPool.Get().(*[]byte)
 	}
 	d := f.dirinfo
 
@@ -55,7 +68,7 @@ func (f *File) readdir(n int, mode readdirMode) (names []string, dirents []DirEn
 		if d.bufp >= d.nbuf {
 			d.bufp = 0
 			var errno error
-			d.nbuf, errno = f.pfd.ReadDirent(d.buf)
+			d.nbuf, errno = f.pfd.ReadDirent(*d.buf)
 			runtime.KeepAlive(f)
 			if errno != nil {
 				return names, dirents, infos, &PathError{Op: "readdirent", Path: f.name, Err: errno}
@@ -66,7 +79,7 @@ func (f *File) readdir(n int, mode readdirMode) (names []string, dirents []DirEn
 		}
 
 		// Drain the buffer
-		buf := d.buf[d.bufp:d.nbuf]
+		buf := (*d.buf)[d.bufp:d.nbuf]
 		reclen, ok := direntReclen(buf)
 		if !ok || reclen > uint64(len(buf)) {
 			break
diff --git a/src/os/file_unix.go b/src/os/file_unix.go
index e8b286c9ee8..b5d87fcb731 100644
--- a/src/os/file_unix.go
+++ b/src/os/file_unix.go
@@ -247,6 +247,7 @@ func (file *file) close() error {
 	}
 	if file.dirinfo != nil {
 		file.dirinfo.close()
+		file.dirinfo = nil
 	}
 	var err error
 	if e := file.pfd.Close(); e != nil {
-- 
GitLab