Sorting Algorithms

Preface

关于排序有很多人整理了,但是基本是东抄一点西抄一点,并没有统一的逻辑,而且往往写的非常复杂。本文希望能够痛殴统一的代码风格、统一的命名规范、统一的实现风格,以体现出不同算法之间的改进和区别,同时尽可能使用简洁的代码(因此可能会存在一些不常规的技巧)(毕竟平时都是直接sort,哪关心怎么实现)。本文代码经过本地测试,且均为从小到大排序。 下文中使用array举例,在文章末尾会放出测试代码,在测试代码中使用vector简化实现。

更新:2021.11.29 测试代码完成。 更优雅的实现;更完整的测试;只考虑int排序;只实现主要算法;使用vector

note:如果你在我的Github Blog上查看此post,在github 仓库源代码中有同名文件,其中包含了以下算法的cpp文件。


Sort Algorithms

1. Bubble Sort

1.1 Principle

Compare the adjacent two elements and put the larger one the the right then move to the next pair.

1.2 Implement

1
2
3
4
5
6
7
8
9
10
11
12
void BubbleSort(int arr[], int length)
{
if(arr==NULL || length<=1) return;

for(int i=0; i<length-1; i++) // note: it should be length-1
{
for(int j=0; j<length-1-i; j++)
{
if(arr[j]>arr[j+1]) swap(arr[j],arr[j+1]); // note: swap is also suitable for array
}
}
}

1.3 Performance

Time Complexity (Best/Worst/Average) Space Complexity Stability
O(n2) O(1) stable
(no change in relative position for same elements)


2. Selection Sort (Improvement of Bubble)

2.1 Principle

Always try to find the smallest element from thoes have not be sorted.

2.2 Implement

1
2
3
4
5
6
7
8
9
10
11
12
13
14
void SelectionSort(int arr[], int length)
{
if(arr==NULL || length<=1) return;

for(int i=0; i<length-1;i++) // note: you do not to sort the last element
{
int min=i; // the index of the smallest element
for(int j=i+1; j<length; j++)
{
if(arr[j]<arr[min]) min=j;
}
swap(arr[i], arr[min]); // put the min element to the head of array
}
}

2.3 Performance

Time Complexity (Best/Worst/Average) Space Complexity Stability
O(n2) O(1)
(one more element for swapping)
unstable
(the swap will let the same element change their relative position)


3. Insertion Sort

3.1 Principle

[sorted, unsorted]. For each element in unsorted array, insert it to sorted array.

3.2 Implement

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
void InsertionSort(int arr[], int length)
{
if(arr==NULL || length<=1) return;

for(int i=1; i<length; i++) // note: you have to sort the first element
{
int key=arr[i];
int j=i-1; // compare this one with the elements before
while(j>=0 && key<arr[j])
{
arr[j+1]=arr[j];
j--;
}
arr[j+1]=key; // note: at this time: j=-1 or arr[j]<key, so it should be j+1
}
}

3.3 Performance

Time Complexity (Best/Worst/Average) Space Complexity Stability
O(n)/O(n2)/O(n2) O(1)
(one more element for swapping)
stable
(no change in relative position for same elements)


4. Shell's Sort (Improvement of Insertion)

4.1 Principle

Divid the sequence to several subsequence. Sort this sequence locally, then sort the whole sequence with insertion sort. eg. for (0,1,2,3,4,5,6,7): (0,2,4,6) will be subsequence1 and (1,3,5,7) will be subsequence2.

4.2 Implement

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
const int increment = 3;
void ShellSort(int arr[], int length)
{
int step=length/increment+1; // you can choose increment=3 or anything else(<length)

while(step>=1)
{
// this part is same with insertion sort
for(int i=step; i<length; i++)
{
int key=arr[i];
int j=i-step;
while(j>=0 && key<arr[j])
{
arr[j+step]=arr[j];
j-=step;
}
arr[j+step]=key;
}
step=step/increment;
}
}

4.3 Performance

Time Complexity (Best/Worst/Average) Space Complexity Stability
The worst case is better than O(n2).
With certian increment, the worst case can be O(n1.3)
O(1)
(one more element for swapping)
unstable
(the insertion sort will be performed with different gaps)


5. Merge Sort

5.1 Principle

Based on Divide and Conquer.

Divid the sequence to two sub-sequence(new space). Sort them seperatly, then merge two sub-sequence.

Note: There are two different ways of implementation: recursive and unrecursive way. We only consider recursive way in C++.

5.2 Implement

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
// recursive way
void Merge(int arr[], int startIndex, int endIndex)
{
// first, we need to know where the two arrays are located.
int leftMid=(startIndex+endIndex)/2;
int rightMid=leftMid+1;

// we need to allocate new space for sorting
int arrayLeft[leftMid-startIndex+1];
int arrayRight[endIndex-rightMid+1];
for(int i=startIndex; i<=leftMid; i++) arrayLeft[i-startIndex]=arr[i];
for(int i=rightMid; i<=endIndex; i++) arrayRight[i-rightMid]=arr[i];

// two pointer to merge
int index_L=0, index_R=0;

// merge to the original arr--to save space
for(int i=startIndex; i<=endIndex; i++)
{
if(index_L != (leftMid-startIndex+1) && index_R != (endIndex-rightMid+1))
{
if(arrayLeft[index_L]<arrayRight[index_R]) arr[i]=arrayLeft[index_L++];
else arr[i]=arrayRight[index_R++];
}
// if left array is done
else if(index_L == (leftMid-startIndex+1)) arr[i]=arrayRight[index_R++];
// if right array is done
else arr[i]=arrayLeft[index_L++];
}
}
void MergeSort(int arr[], int startIndex, int endIndex) // since it is a recursive way, we need to save both indices
{
if(endIndex>startIndex)
{
MergeSort(arr, startIndex, (startIndex+endIndex)/2); // sort left part
MergeSort(arr, (startIndex+endIndex)/2+1, endIndex);
Merge(arr, startIndex, endIndex); // note: we need to sort in [startIndex, endIndex]
}
}


5.3 Performance

Time Complexity (Best/Worst/Average) Space Complexity Stability
O(nlogn)
with improvement, the best can be O(n)
O(n)
stable
the stability is ensured in Merge


6. QuickSort (Important)

6.1 Principle

Based on Divide and conquer. As an improvement of BubbleSort.

It is the fastest sorting algorithm in average.

Select a pivot, put smaller (than pivot) elements in pivot's left and larger elements in pivot's right. Do it recursively.

6.2 Implement

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
void QuickSort(int arr[], int startIndex, int endIndex)
{
if(endIndex>startIndex)
{
int i=startIndex, j=endIndex;
// select pivot
int pivot=arr[startIndex];
// seperate the sequence
while(i<j)
{
while(i<j && arr[j]>=pivot) j--; // if you select the most left element as pivot, you should go from right-->so that when i==j, the element will be smaller than pivot.
if(i<j) arr[i++]=arr[j]; // put the smaller one to left
while(i<j && arr[i]<=pivot) i++;
if(i<j) arr[j--]=arr[i]; // put the larger one to right
}
// put the pivot, i==j
arr[i]=pivot; // the original arr[i] is in arr[i+1/i-1]

// recursive
QuickSort(arr, startIndex, i-1); // left side
QuickSort(arr, i+1, endIndex); // right side
}
}

6.3 Performance

Time Complexity (Best/Worst/Average) Space Complexity Stability
O(nlogn)/O(nlogn)/O(n2) O(logn)
(on average, the space complexity it the depth of recursion; for the worst case, it can be O(n))
unstable
(the swap of pivot makes it unstable)


7. Radix/Bucket Sort

7.1 Principle

Sort input array using counting sort according to the i’th (from the least significant digit to the most significant) digit.

7.2 Implement

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
// helper function, return the max number of digit
int maxBit(int arr[], int length)
{
int d=1;
int p=10;
for(int i=0; i<length; i++)
{
while(arr[i]>=pow(p,d)) d++;
}
return d;
}
void RadixSort(int arr[], int length)
{
int d=maxBit(arr, length); // the max number of digit
int temp[length];
int buckets[10]; // there are 10 buckets (0-9)
int base=1;
int remainder=0;
for(int i=1; i<=d; i++) // you need to sort b times
{
for(int j=0; j<10; j++) buckets[j] = 0; // clear buckets
// put arr[] into buckets
for(int j=0; j<length; j++)
{
remainder=(arr[j]/base)%10;
buckets[remainder]++;
}
// count the total number of elements that are stored in buckets
for(int j=1; j<length; j++) buckets[j]+=buckets[j-1];
// you should go from back to front--make sure this algorithm is stable
for(int j=length-1; j>=0; j--)
{
remainder=(arr[j]/base)%10;
temp[buckets[remainder]-1]=arr[j];
buckets[remainder]--;
}
// put temp back to arr
for(int j=0; j<length; j++) arr[j]=temp[j];

base=base*10;
}
}

7.2 Performance

Time Complexity (Best/Worst/Average) Space Complexity Stability
O(nd)
(d is the number of digits)
O(n+d) stable


8. Heap Sort

8.1 Principle

Heap Sort is designed for heap. Heap is a complete binary tree and its childen nodes should be always larger/smaller than parent nodes.

For a Min-Heap, it satisfiies:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
#### 8.2 Implement
```c++
// create maxheap
void MaxHeapify(int arr[], int startIndex, int endIndex)
{
int parentNode=startIndex;
int childNode=2*startIndex+1;
while(childNode<=endIndex)
{
// compare the two child nodes, and choose the larger one
if (childNode+1<=endIndex && arr[childNode]<arr[childNode+1]) childNode++;
if (arr[parentNode]<arr[childNode])
{
swap(arr[parentNode], arr[childNode]);
parentNode=childNode;
childNode=parentNode*2+1;
}
else
return;
}
}
void HeapSort(int arr[], int length)
{
// create the heap--start from the last parent node
for(int i=length/2-1; i>=0; i--) MaxHeapify(arr, i, length-1);

for(int i=length-1; i>=0; i--)
{
swap(arr[0], arr[i]); // put the largest element to the end of array
MaxHeapify(arr, 0, i-1); // rebalance the tree
}
}

8.3 Performance

Time Complexity (Best/Worst/Average) Space Complexity Stability
O(n+nlogn)
(n for creating the heap; rebalance the tree(logn) for n times)
O(1)
(one more element for swapping)
unstable
(eg. 64, 27, 16, 27. After 64, the second 27 will be ouput.)

Summary on Performance

Algorithm Time Complexity (Best/Worst/Average) Space Complexity Stability
Bubble O(n2) O(1) stable
(no change in relative position for same elements)
Selection O(n2) O(1)
(one more element for swapping)
unstable
(the swap will let the same element change their relative position)
Insertion O(n)/O(n2)/O(n2) O(1)
(one more element for swapping)
stable
(no change in relative position for same elements)
Shell The worst case is better than O(n2).
With certian increment, the worst case can be O(n1.3)
O(1)
(one more element for swapping)
unstable
(the insertion sort will be performed with different gaps)
Merge O(nlogn)
with improvement, the best can be O(n)
O(n)
stable
the stability is ensured in Merge
Quick O(nlogn)/O(nlogn)/O(n2) O(logn)
(on average, the space complexity it the depth of recursion; for the worst case, it can be O(n))
unstable
(the swap of pivot makes it unstable)
Bucket O(nd)
(d is the number of digits)
O(n+d) stable
Heap O(n+nlogn)
(n for creating the heap; rebalance the tree(logn) for n times)
O(1)
(one more element for swapping)
unstable
(eg. 64, 27, 16, 27. After 64, the second 27 will be ouput.)

Performance in test

Bucket Sort is the fastest sorting algorithm 👑 (it is also stable)!

Algorithm Time consumed
Bubble Sort 400.445ms
Insertion Sort 222.066ms
Selection Sort 129.083ms
Merge Sort 4.5228ms
Heap Sort 2.1415ms
Quick Sort 1.0323ms
Bucket Sort 0.1504ms

Testing Code

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
#include<iostream>
#include<time.h>
#include<vector>
#include<stdlib.h>
using namespace std;

// sorting algorithms
void Bubble_Sort(vector<int>& vec);
void Selection_Sort(vector<int>& vec);
void Insertion_Sort(vector<int>& vec);
void Merge(vector<int>& vec, int start_index, int end_index);
void Merge_Sort(vector<int>& vec, int start_index, int end_index);
void Quick_Sort(vector<int>& vec, int start_index, int end_index);
void Heap_Sort(vector<int>& vec);
void Bucket_Sort(vector<int>& vec);

// utils
void shuffle_vector(vector<int>& vec)
{
// init random seed
srand((unsigned)time(NULL));
for(int i=vec.size()-1; i>=0; i--)
{
int seed = rand() % (i+1);
swap(vec[seed], vec[i]);
}
}
void show_vector(vector<int>& vec)
{
for(auto i:vec)
{
cout<<i<<" ";
}
cout<<endl;
}
bool check_vector(vector<int>& vec1, vector<int>& vec2)
{
if(vec1.size()!=vec2.size())
{
cout<<"vector size does not match"<<endl;
return false;
}
int n=vec1.size();
for(int i=0; i<n; i++)
{
if(vec1[i]==vec2[i]) continue;
else return false;
}
return true;
}
double average_vector(vector<double>& vec)
{
double sum=0;
for(auto i:vec) sum+=i;
return sum/(double)vec.size();
}

int main()
{
clock_t start;
clock_t end;

// generate data
vector<int> input;
for(int i=0; i<10000; i++) input.push_back(rand()%1000);
vector<int> sorted;
vector<double> time_consumed(10, 0);

// show_vector(input);

// Note: it seems the cache will affact the performance, so load the cache first.
for(int i=0; i<10; i++)
{
shuffle_vector(input);
Bucket_Sort(input);
sorted=input; // get sorted list
}

//////////////////////////////////////////////////////////////////////////////////////////

// Bubble Sort
for(int i=0; i<10; i++)
{
shuffle_vector(input);
start=clock();
Bubble_Sort(input);
end=clock();
if(check_vector(sorted, input)) time_consumed[i]=(double)(end-start)/CLOCKS_PER_SEC*1000;
else
{
cout<<"Bubble Sort failed!"<<endl;
return 0;
}
}
cout<<left<<"Bubble Sort: "<<average_vector(time_consumed)<<"ms"<<endl;

// Insertion Sort
for(int i=0; i<10; i++)
{
shuffle_vector(input);
start=clock();
Insertion_Sort(input);
end=clock();
if(check_vector(sorted, input)) time_consumed[i]=(double)(end-start)/CLOCKS_PER_SEC*1000;
else
{
cout<<"Insertion Sort failed!"<<endl;
return 0;
}
}
cout<<left<<"Insertion Sort: "<<average_vector(time_consumed)<<"ms"<<endl;

// Selection Sort
for(int i=0; i<10; i++)
{
shuffle_vector(input);
start=clock();
Selection_Sort(input);
end=clock();
if(check_vector(sorted, input)) time_consumed[i]=(double)(end-start)/CLOCKS_PER_SEC*1000;
else
{
cout<<"Selection Sort failed!"<<endl;
return 0;
}
}
cout<<left<<"Selection Sort: "<<average_vector(time_consumed)<<"ms"<<endl;

// Merge Sort
for(int i=0; i<10; i++)
{
shuffle_vector(input);
start=clock();
// input.size()-1
Merge_Sort(input, 0, input.size()-1);
end=clock();
if(check_vector(sorted, input)) time_consumed[i]=(double)(end-start)/CLOCKS_PER_SEC*1000;
else
{
cout<<"Merge Sort failed!"<<endl;
return 0;
}
}
cout<<left<<"Merge Sort: "<<average_vector(time_consumed)<<"ms"<<endl;

// Heap Sort
for(int i=0; i<10; i++)
{
shuffle_vector(input);
start=clock();
Heap_Sort(input);
end=clock();
if(check_vector(sorted, input)) time_consumed[i]=(double)(end-start)/CLOCKS_PER_SEC*1000;
else
{
cout<<"Heap failed!"<<endl;
return 0;
}
}
cout<<left<<"Heap Sort: "<<average_vector(time_consumed)<<"ms"<<endl;

// Quick Sort
for(int i=0; i<10; i++)
{
shuffle_vector(input);
start=clock();
// input.size()-1
Quick_Sort(input, 0, input.size()-1);
end=clock();
if(check_vector(sorted, input)) time_consumed[i]=(double)(end-start)/CLOCKS_PER_SEC*1000;
else
{
cout<<"Quick Sort failed!"<<endl;
return 0;
}
}
cout<<left<<"Quick Sort: "<<average_vector(time_consumed)<<"ms"<<endl;

// Bucket Sort
for(int i=0; i<10; i++)
{
shuffle_vector(input);
start=clock();
Bucket_Sort(input);
end=clock();
if(check_vector(sorted, input)) time_consumed[i]=(double)(end-start)/CLOCKS_PER_SEC*1000;
else
{
cout<<"Bucket failed!"<<endl;
return 0;
}
}
cout<<left<<"Bucket Sort: "<<average_vector(time_consumed)<<"ms"<<endl;

return 0;
}

void Bubble_Sort(vector<int>& vec)
{
int length=vec.size();
if(length==0) return;
for(int i=0; i<length-1; i++)
{
for(int j=0; j<length-i-1; j++)
{
if(vec[j]>vec[j+1]) swap(vec[j], vec[j+1]);
}
}
}
void Selection_Sort(vector<int>& vec)
{
int length=vec.size();
if(length==0) return;
for(int i=0; i<length-1; i++)
{
int smallest_index=i;
for(int j=i+1; j<length; j++)
{
if(vec[j]<vec[smallest_index]) smallest_index=j;
}
swap(vec[smallest_index], vec[i]);
}
}
void Insertion_Sort(vector<int>& vec)
{
int length=vec.size();
if(length==0) return;
for(int i=1; i<vec.size(); i++)
{
int insertion_index=i;
for(int j=i-1; j>=0; j--)
{
if(vec[j]>vec[insertion_index])
{
swap(vec[j], vec[insertion_index]);
insertion_index=j;
}
else break;
}
}
}
void Merge(vector<int>& vec, int start_index, int end_index)
{
int length=end_index-start_index+1;
int left_start_index=start_index;
int left_end_index=(start_index+end_index)/2;
int right_start_index=(start_index+end_index)/2+1;
int right_end_index=end_index;

vector<int> tmp_vec(length);
for(int i=0; i<length; i++)
{
if(left_start_index<=left_end_index && right_start_index<=right_end_index)
{
if(vec[left_start_index]<vec[right_start_index])
tmp_vec[i]=vec[left_start_index++];
else
tmp_vec[i]=vec[right_start_index++];
}
else
{
if(right_start_index>right_end_index) tmp_vec[i]=vec[left_start_index++];
else tmp_vec[i]=vec[right_start_index++];
}
}

for(int i=0; i<length; i++) vec[start_index+i]=tmp_vec[i];

}
void Merge_Sort(vector<int>& vec, int start_index, int end_index)
{
int length=end_index-start_index+1;
if(length==0) return;
if(end_index>start_index)
{
Merge_Sort(vec, start_index, (start_index+end_index)/2);
Merge_Sort(vec, (start_index+end_index)/2+1, end_index);
Merge(vec, start_index, end_index);
}
}
void Quick_Sort(vector<int>& vec, int start_index, int end_index)
{
if(start_index>=end_index) return; // >=
int i = start_index, j = end_index+1, pivot = vec[start_index];;
while(true)
{
// the key point is '++i' and '--j'
while(vec[++i]<pivot && i!=end_index); // find the first greater element
while(vec[--j]>pivot && j!=start_index); // find the first smaller element
if(i>=j) break;
swap(vec[i], vec[j]);
}
swap(vec[start_index], vec[j]);
Quick_Sort(vec, start_index, j-1);
Quick_Sort(vec, j+1, end_index);
}
void Max_Heapify(vector<int>& vec, int start_index, int end_index)
{
int parent_index = start_index, child_index = start_index*2+1;
while(child_index <= end_index)
{
if(child_index+1<=end_index && vec[child_index]<vec[child_index+1]) child_index++; // find the max child element
if(vec[parent_index]<vec[child_index])
{
swap(vec[parent_index], vec[child_index]);
parent_index = child_index;
child_index = parent_index*2+1;
}
else return; // since we go from bottom, there is no need to check the rest
}
}
void Heap_Sort(vector<int>& vec)
{
int length=vec.size();
if(length==0) return;
// construct max_heap
for(int i=length/2-1; i>=0; i--) Max_Heapify(vec, i, length-1);

for(int i=0; i<length-1; i++)
{
swap(vec[0], vec[length-1-i]);
Max_Heapify(vec, 0, length-2-i);
}
}
void Bucket_Sort(vector<int>& vec)
{
int length=vec.size();
if(length==0) return;
int min_element = INT_MAX, max_element = INT_MIN; // find max/min element
for(auto i:vec)
{
if(min_element>i) min_element = i;
if(max_element<i) max_element = i;
}
vector<int> buckets(max_element-min_element+1, 0); // init buckets with 0
for(auto i:vec)
buckets[i-min_element]++;

int i=0;
for(int j=0; j<max_element-min_element+1; j++)
{
if(buckets[j]==0) continue;
for(int k=0; k<buckets[j]; k++)
{
vec[i++] = min_element+j;
}
}
}