diff --git a/src/pkg/sort/sort.go b/src/pkg/sort/sort.go index d3092e8019171d11d76df5d198b87b8739766f99..edef06ff363c9b53bd4cc45850dbb055f48edc0f 100644 --- a/src/pkg/sort/sort.go +++ b/src/pkg/sort/sort.go @@ -283,3 +283,192 @@ func Float64sAreSorted(a []float64) bool { return IsSorted(Float64Slice(a)) } // StringsAreSorted tests whether a slice of strings is sorted in increasing order. func StringsAreSorted(a []string) bool { return IsSorted(StringSlice(a)) } + +// Notes on stable sorting: +// The used algorithms are simple and provable correct on all input and use +// only logarithmic additional stack space. They perform well if compared +// experimentaly to other stable in-place sorting algorithms. +// +// Remarks on other algoritms evaluated: +// - GCC's 4.6.3 stable_sort with merge_without_buffer from libstdc++: +// Not faster. +// - GCC's __rotate for block rotations: Not faster. +// - "Practical in-place mergesort" from Jyrki Katajainen, Tomi A. Pasanen +// and Jukka Teuhola; Nordic Journal of Computing 3,1 (1996), 27-40: +// The given algorithms are in-place, number of Swap and Assignments +// grow as n log n but the algorithm is not stable. +// - "Fast Stable In-Plcae Sorting with O(n) Data Moves" J.I. Munro and +// V. Raman in Algorithmica (1996) 16, 115-160: +// This algorithm either needs additional 2n bits or works only if there +// are enough different elements available to encode some permutations +// which have to be undone later (so not stable an any input). +// - All the optimal in-place sorting/merging algorithms I found are either +// unstable or rely on enough different elements in each step to encode the +// performed block rearrangements. See also "In-Place Merging Algorithms", +// Denham Coates-Evely, Department of Computer Science, Kings College, +// January 2004 and the reverences in there. +// - Often "optimal" algorithms are optimal in the number of assignments +// but Interface has only Swap as operation. + +// Stable sorts data while keeping the original order of equal elements. +// +// It makes one call to data.Len to determine n, O(n*log(n)) calls to +// data.Less and O(n*log(n)*log(n)) calls to data.Swap. +func Stable(data Interface) { + n := data.Len() + blockSize := 20 + a, b := 0, blockSize + for b <= n { + insertionSort(data, a, b) + a = b + b += blockSize + } + insertionSort(data, a, n) + + for blockSize < n { + a, b = 0, 2*blockSize + for b <= n { + symMerge(data, a, a+blockSize, b) + a = b + b += 2 * blockSize + } + symMerge(data, a, a+blockSize, n) + blockSize *= 2 + } +} + +// SymMerge merges the two sorted subsequences data[a:m] and data[m:b] using +// the SymMerge algorithm from Pok-Son Kim and Arne Kutzner, "Stable Minimum +// Storage Merging by Symmetric Comparisons", in Susanne Albers and Tomasz +// Radzik, editors, Algorithms - ESA 2004, volume 3221 of Lecture Notes in +// Computer Science, pages 714-723. Springer, 2004. +// +// Let M = m-a and N = b-n. Wolog M < N. +// The recursion depth is bound by ceil(log(N+M)). +// The algorithm needs O(M*log(N/M + 1)) calls to data.Less. +// The algorithm needs O((M+N)*log(M)) calls to data.Swap. +// +// The paper gives O((M+N)*log(M)) as the number of assignments assuming a +// rotation algorithm wich uses O(M+N+gcd(M+N)) assignments. The argumentation +// in the paper carries through for Swap operations, especially as the block +// swapping rotate uses only O(M+N) Swaps. +func symMerge(data Interface, a, m, b int) { + if a >= m || m >= b { + return + } + + mid := a + (b-a)/2 + n := mid + m + start := 0 + if m > mid { + start = n - b + r, p := mid, n-1 + for start < r { + c := start + (r-start)/2 + if !data.Less(p-c, c) { + start = c + 1 + } else { + r = c + } + } + } else { + start = a + r, p := m, n-1 + for start < r { + c := start + (r-start)/2 + if !data.Less(p-c, c) { + start = c + 1 + } else { + r = c + } + } + } + end := n - start + rotate(data, start, m, end) + symMerge(data, a, start, mid) + symMerge(data, mid, end, b) +} + +// Rotate two consecutives blocks u = data[a:m] and v = data[m:b] in data: +// Data of the form 'x u v y' is changed to 'x v u y'. +// Rotate performs at most b-a many calls to data.Swap. +func rotate(data Interface, a, m, b int) { + i := m - a + if i == 0 { + return + } + j := b - m + if j == 0 { + return + } + + if i == j { + swapRange(data, a, m, i) + return + } + + p := a + i + for i != j { + if i > j { + swapRange(data, p-i, p, j) + i -= j + } else { + swapRange(data, p-i, p+j-i, i) + j -= i + } + } + swapRange(data, p-i, p, i) +} + +/* +Complexity of Stable Sorting + + +Complexity of block swapping rotation + +Each Swap puts one new element into its correct, final position. +Elements which reach their final position are no longer moved. +Thus block swapping rotation needs |u|+|v| calls to Swaps. +This is best possible as each element might need a move. + +Pay attention when comparing to other optimal algorithms which +typically count the number of assignments instead of swaps: +E.g. the optimal algorithm of Dudzinski and Dydek for in-place +rotations uses O(u + v + gcd(u,v)) assignments which is +better than our O(3 * (u+v)) as gcd(u,v) <= u. + + +Stable sorting by SymMerge and BlockSwap rotations + +SymMerg complexity for same size input M = N: +Calls to Less: O(M*log(N/M+1)) = O(N*log(2)) = O(N) +Calls to Swap: O((M+N)*log(M)) = O(2*N*log(N)) = O(N*log(N)) + +(The following argument does not fuzz over a missing -1 or +other stuff which does not impact the final result). + +Let n = data.Len(). Assume n = 2^k. + +Plain merge sort performs log(n) = k iterations. +On iteration i the algorithm merges 2^(k-i) blocks, each of size 2^i. + +Thus iteration i of merge sort performs: +Calls to Less O(2^(k-i) * 2^i) = O(2^k) = O(2^log(n)) = O(n) +Calls to Swap O(2^(k-i) * 2^i * log(2^i)) = O(2^k * i) = O(n*i) + +In total k = log(n) iterations are performed; so in total: +Calls to Less O(log(n) * n) +Calls to Swap O(n + 2*n + 3*n + ... + (k-1)*n + k*n) + = O((k/2) * k * n) = O(n * k^2) = O(n * log^2(n)) + + +Above results should generalize to arbitrary n = 2^k + p +and should not be influenced by the initial insertion sort phase: +Insertion sort is O(n^2) on Swap and Less, thus O(bs^2) per block of +size bs at n/bs blocks: O(bs*n) Swaps and Less during insertion sort. +Merge sort iterations start at i = log(bs). With t = log(bs) constant: +Calls to Less O((log(n)-t) * n + bs*n) = O(log(n)*n + (bs-t)*n) + = O(n * log(n)) +Calls to Swap O(n * log^2(n) - (t^2+t)/2*n) = O(n * log^2(n)) + +*/ diff --git a/src/pkg/sort/sort_test.go b/src/pkg/sort/sort_test.go index 5daf8482b9c271581f62948f7a1d2ff6b41f4bb3..2dd65c44366a40b11cd6b041de8cc6b9be2a61ee 100644 --- a/src/pkg/sort/sort_test.go +++ b/src/pkg/sort/sort_test.go @@ -122,6 +122,19 @@ func BenchmarkSortString1K(b *testing.B) { } } +func BenchmarkStableString1K(b *testing.B) { + b.StopTimer() + for i := 0; i < b.N; i++ { + data := make([]string, 1<<10) + for i := 0; i < len(data); i++ { + data[i] = strconv.Itoa(i ^ 0x2cc) + } + b.StartTimer() + Stable(StringSlice(data)) + b.StopTimer() + } +} + func BenchmarkSortInt1K(b *testing.B) { b.StopTimer() for i := 0; i < b.N; i++ { @@ -135,6 +148,19 @@ func BenchmarkSortInt1K(b *testing.B) { } } +func BenchmarkStableInt1K(b *testing.B) { + b.StopTimer() + for i := 0; i < b.N; i++ { + data := make([]int, 1<<10) + for i := 0; i < len(data); i++ { + data[i] = i ^ 0x2cc + } + b.StartTimer() + Stable(IntSlice(data)) + b.StopTimer() + } +} + func BenchmarkSortInt64K(b *testing.B) { b.StopTimer() for i := 0; i < b.N; i++ { @@ -148,6 +174,19 @@ func BenchmarkSortInt64K(b *testing.B) { } } +func BenchmarkStableInt64K(b *testing.B) { + b.StopTimer() + for i := 0; i < b.N; i++ { + data := make([]int, 1<<16) + for i := 0; i < len(data); i++ { + data[i] = i ^ 0xcccc + } + b.StartTimer() + Stable(IntSlice(data)) + b.StopTimer() + } +} + const ( _Sawtooth = iota _Rand @@ -204,7 +243,7 @@ func lg(n int) int { return i } -func testBentleyMcIlroy(t *testing.T, sort func(Interface)) { +func testBentleyMcIlroy(t *testing.T, sort func(Interface), maxswap func(int) int) { sizes := []int{100, 1023, 1024, 1025} if testing.Short() { sizes = []int{100, 127, 128, 129} @@ -278,7 +317,7 @@ func testBentleyMcIlroy(t *testing.T, sort func(Interface)) { } desc := fmt.Sprintf("n=%d m=%d dist=%s mode=%s", n, m, dists[dist], modes[mode]) - d := &testingData{desc: desc, t: t, data: mdata[0:n], maxswap: n * lg(n) * 12 / 10} + d := &testingData{desc: desc, t: t, data: mdata[0:n], maxswap: maxswap(n)} sort(d) // Uncomment if you are trying to improve the number of compares/swaps. //t.Logf("%s: ncmp=%d, nswp=%d", desc, d.ncmp, d.nswap) @@ -303,11 +342,15 @@ func testBentleyMcIlroy(t *testing.T, sort func(Interface)) { } func TestSortBM(t *testing.T) { - testBentleyMcIlroy(t, Sort) + testBentleyMcIlroy(t, Sort, func(n int) int { return n * lg(n) * 12 / 10 }) } func TestHeapsortBM(t *testing.T) { - testBentleyMcIlroy(t, Heapsort) + testBentleyMcIlroy(t, Heapsort, func(n int) int { return n * lg(n) * 12 / 10 }) +} + +func TestStableBM(t *testing.T) { + testBentleyMcIlroy(t, Stable, func(n int) int { return n * lg(n) * lg(n) }) } // This is based on the "antiquicksort" implementation by M. Douglas McIlroy. @@ -357,3 +400,148 @@ func TestAdversary(t *testing.T) { d := &adversaryTestingData{data, make(map[int]int), 0} Sort(d) // This should degenerate to heapsort. } + +func TestStableInts(t *testing.T) { + data := ints + Stable(IntSlice(data[0:])) + if !IntsAreSorted(data[0:]) { + t.Errorf("nsorted %v\n got %v", ints, data) + } +} + +type intPairs []struct { + a, b int +} + +// IntPairs compare on a only. +func (d intPairs) Len() int { return len(d) } +func (d intPairs) Less(i, j int) bool { return d[i].a < d[j].a } +func (d intPairs) Swap(i, j int) { d[i], d[j] = d[j], d[i] } + +// Record initial order in B. +func (d intPairs) initB() { + for i := range d { + d[i].b = i + } +} + +// InOrder checks if a-equal elements were not reordered. +func (d intPairs) inOrder() bool { + lastA, lastB := -1, 0 + for i := 0; i < len(d); i++ { + if lastA != d[i].a { + lastA = d[i].a + lastB = d[i].b + continue + } + if d[i].b <= lastB { + return false + } + lastB = d[i].b + } + return true +} + +func TestStability(t *testing.T) { + n, m := 100000, 1000 + if testing.Short() { + n, m = 1000, 100 + } + data := make(intPairs, n) + + // random distribution + for i := 0; i < len(data); i++ { + data[i].a = rand.Intn(m) + } + if IsSorted(data) { + t.Fatalf("terrible rand.rand") + } + data.initB() + Stable(data) + if !IsSorted(data) { + t.Errorf("Stable didn't sort %d ints", n) + } + if !data.inOrder() { + t.Errorf("Stable wasn't stable on %d ints", n) + } + + // already sorted + data.initB() + Stable(data) + if !IsSorted(data) { + t.Errorf("Stable shuffeled sorted %d ints (order)", n) + } + if !data.inOrder() { + t.Errorf("Stable shuffeled sorted %d ints (stability)", n) + } + + // sorted reversed + for i := 0; i < len(data); i++ { + data[i].a = len(data) - i + } + data.initB() + Stable(data) + if !IsSorted(data) { + t.Errorf("Stable didn't sort %d ints", n) + } + if !data.inOrder() { + t.Errorf("Stable wasn't stable on %d ints", n) + } +} + +var countOpsSizes = []int{1e2, 3e2, 1e3, 3e3, 1e4, 3e4, 1e5, 3e5, 1e6} + +func countOps(t *testing.T, algo func(Interface), name string) { + sizes := countOpsSizes + if testing.Short() { + sizes = sizes[:5] + } + if !testing.Verbose() { + t.Skip("Counting skipped as non-verbose mode.") + } + for _, n := range sizes { + td := testingData{ + desc: name, + t: t, + data: make([]int, n), + maxswap: 1 << 31, + } + for i := 0; i < n; i++ { + td.data[i] = rand.Intn(n / 5) + } + algo(&td) + t.Logf("%s %8d elements: %11d Swap, %10d Less", name, n, td.nswap, td.ncmp) + } +} + +func TestCountStableOps(t *testing.T) { countOps(t, Stable, "Stable") } +func TestCountSortOps(t *testing.T) { countOps(t, Sort, "Sort ") } + +func bench(b *testing.B, size int, algo func(Interface), name string) { + b.StopTimer() + data := make(intPairs, size) + for i := 0; i < b.N; i++ { + for n := size - 3; n <= size+3; n++ { + for i := 0; i < len(data); i++ { + data[i].a = rand.Intn(n / 5) + } + data.initB() + b.StartTimer() + algo(data) + b.StopTimer() + if !IsSorted(data) { + b.Errorf("%s did not sort %d ints", name, n) + } + if name == "Stable" && !data.inOrder() { + b.Errorf("%s unstable on %d ints", name, n) + } + } + } +} + +func BenchmarkSort1e2(b *testing.B) { bench(b, 1e2, Sort, "Sort") } +func BenchmarkStable1e2(b *testing.B) { bench(b, 1e2, Stable, "Stable") } +func BenchmarkSort1e4(b *testing.B) { bench(b, 1e4, Sort, "Sort") } +func BenchmarkStable1e4(b *testing.B) { bench(b, 1e4, Stable, "Stable") } +func BenchmarkSort1e6(b *testing.B) { bench(b, 1e6, Sort, "Sort") } +func BenchmarkStable1e6(b *testing.B) { bench(b, 1e6, Stable, "Stable") }