From 21e49cbb2dc0d9557b78c8c3c7d46ffc1c6705ca Mon Sep 17 00:00:00 2001
From: Ziad Hatahet <hatahet@gmail.com>
Date: Wed, 7 Sep 2011 13:54:33 -0400
Subject: [PATCH] sort: use heapsort to bail out quicksort

See http://research.swtch.com/2008/01/killing-quicksort.html for more
info.
Fixes #467.

R=r, rsc
CC=golang-dev
https://golang.org/cl/4591051
---
 src/pkg/sort/export_test.go |  9 +++++
 src/pkg/sort/sort.go        | 61 ++++++++++++++++++++++++++++++---
 src/pkg/sort/sort_test.go   | 68 +++++++++++++++++++++++++++++++++----
 3 files changed, 127 insertions(+), 11 deletions(-)
 create mode 100644 src/pkg/sort/export_test.go

diff --git a/src/pkg/sort/export_test.go b/src/pkg/sort/export_test.go
new file mode 100644
index 00000000000..b6e30ceb570
--- /dev/null
+++ b/src/pkg/sort/export_test.go
@@ -0,0 +1,9 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package sort
+
+func Heapsort(data Interface) {
+	heapSort(data, 0, data.Len())
+}
diff --git a/src/pkg/sort/sort.go b/src/pkg/sort/sort.go
index 0a4a4375f05..83ee170cbab 100644
--- a/src/pkg/sort/sort.go
+++ b/src/pkg/sort/sort.go
@@ -37,10 +37,47 @@ func insertionSort(data Interface, a, b int) {
 	}
 }
 
+// siftDown implements the heap property on data[lo, hi).
+// first is an offset into the array where the root of the heap lies.
+func siftDown(data Interface, lo, hi, first int) {
+	root := lo
+	for {
+		child := 2*root + 1
+		if child >= hi {
+			break
+		}
+		if child+1 < hi && data.Less(first+child, first+child+1) {
+			child++
+		}
+		if !data.Less(first+root, first+child) {
+			return
+		}
+		data.Swap(first+root, first+child)
+		root = child
+	}
+}
+
+func heapSort(data Interface, a, b int) {
+	first := a
+	lo := 0
+	hi := b - a
+
+	// Build heap with greatest element at top.
+	for i := (hi - 1) / 2; i >= 0; i-- {
+		siftDown(data, i, hi, first)
+	}
+
+	// Pop elements, largest first, into end of data.
+	for i := hi - 1; i >= 0; i-- {
+		data.Swap(first, first+i)
+		siftDown(data, lo, i, first)
+	}
+}
+
 // Quicksort, following Bentley and McIlroy,
 // ``Engineering a Sort Function,'' SP&E November 1993.
 
-// Move the median of the three values data[a], data[b], data[c] into data[a].
+// medianOfThree moves the median of the three values data[a], data[b], data[c] into data[a].
 func medianOfThree(data Interface, a, b, c int) {
 	m0 := b
 	m1 := a
@@ -123,16 +160,21 @@ func doPivot(data Interface, lo, hi int) (midlo, midhi int) {
 	return lo + b - a, hi - (d - c)
 }
 
-func quickSort(data Interface, a, b int) {
+func quickSort(data Interface, a, b, maxDepth int) {
 	for b-a > 7 {
+		if maxDepth == 0 {
+			heapSort(data, a, b)
+			return
+		}
+		maxDepth--
 		mlo, mhi := doPivot(data, a, b)
 		// Avoiding recursion on the larger subproblem guarantees
 		// a stack depth of at most lg(b-a).
 		if mlo-a < b-mhi {
-			quickSort(data, a, mlo)
+			quickSort(data, a, mlo, maxDepth)
 			a = mhi // i.e., quickSort(data, mhi, b)
 		} else {
-			quickSort(data, mhi, b)
+			quickSort(data, mhi, b, maxDepth)
 			b = mlo // i.e., quickSort(data, a, mlo)
 		}
 	}
@@ -141,7 +183,16 @@ func quickSort(data Interface, a, b int) {
 	}
 }
 
-func Sort(data Interface) { quickSort(data, 0, data.Len()) }
+func Sort(data Interface) {
+	// Switch to heapsort if depth of 2*ceil(lg(n)) is reached.
+	n := data.Len()
+	maxDepth := 0
+	for 1<<uint(maxDepth) < n {
+		maxDepth++
+	}
+	maxDepth *= 2
+	quickSort(data, 0, data.Len(), maxDepth)
+}
 
 func IsSorted(data Interface) bool {
 	n := data.Len()
diff --git a/src/pkg/sort/sort_test.go b/src/pkg/sort/sort_test.go
index 5007a92a562..a5640151cb2 100644
--- a/src/pkg/sort/sort_test.go
+++ b/src/pkg/sort/sort_test.go
@@ -169,6 +169,13 @@ func (d *testingData) Swap(i, j int) {
 	d.data[i], d.data[j] = d.data[j], d.data[i]
 }
 
+func min(a, b int) int {
+	if a < b {
+		return a
+	}
+	return b
+}
+
 func lg(n int) int {
 	i := 0
 	for 1<<uint(i) < n {
@@ -177,7 +184,7 @@ func lg(n int) int {
 	return i
 }
 
-func TestBentleyMcIlroy(t *testing.T) {
+func testBentleyMcIlroy(t *testing.T, sort func(Interface)) {
 	sizes := []int{100, 1023, 1024, 1025}
 	if testing.Short() {
 		sizes = []int{100, 127, 128, 129}
@@ -253,7 +260,7 @@ func TestBentleyMcIlroy(t *testing.T) {
 
 					desc := fmt.Sprintf("n=%d m=%d dist=%s mode=%s", n, m, dists[dist], modes[mode])
 					d := &testingData{desc, t, mdata[0:n], n * lg(n) * 12 / 10, 0}
-					Sort(d)
+					sort(d)
 
 					// If we were testing C qsort, we'd have to make a copy
 					// of the slice and sort it ourselves and then compare
@@ -274,9 +281,58 @@ func TestBentleyMcIlroy(t *testing.T) {
 	}
 }
 
-func min(a, b int) int {
-	if a < b {
-		return a
+func TestSortBM(t *testing.T) {
+	testBentleyMcIlroy(t, Sort)
+}
+
+func TestHeapsortBM(t *testing.T) {
+	testBentleyMcIlroy(t, Heapsort)
+}
+
+// This is based on the "antiquicksort" implementation by M. Douglas McIlroy.
+// See http://www.cs.dartmouth.edu/~doug/mdmspe.pdf for more info.
+type adversaryTestingData struct {
+	data      []int
+	keys      map[int]int
+	candidate int
+}
+
+func (d *adversaryTestingData) Len() int { return len(d.data) }
+
+func (d *adversaryTestingData) Less(i, j int) bool {
+	if _, present := d.keys[i]; !present {
+		if _, present := d.keys[j]; !present {
+			if i == d.candidate {
+				d.keys[i] = len(d.keys)
+			} else {
+				d.keys[j] = len(d.keys)
+			}
+		}
 	}
-	return b
+
+	if _, present := d.keys[i]; !present {
+		d.candidate = i
+		return false
+	}
+	if _, present := d.keys[j]; !present {
+		d.candidate = j
+		return true
+	}
+
+	return d.keys[i] >= d.keys[j]
+}
+
+func (d *adversaryTestingData) Swap(i, j int) {
+	d.data[i], d.data[j] = d.data[j], d.data[i]
+}
+
+func TestAdversary(t *testing.T) {
+	const size = 100
+	data := make([]int, size)
+	for i := 0; i < size; i++ {
+		data[i] = i
+	}
+
+	d := &adversaryTestingData{data, make(map[int]int), 0}
+	Sort(d) // This should degenerate to heapsort.
 }
-- 
GitLab