1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
|
/* SPDX-License-Identifier: GPL-3.0-or-later
* Copyright © 2023-2025 The TokTok team.
*/
#include "sort.h"
#include <assert.h>
#include "attributes.h"
#include "ccompat.h"
#include "util.h"
/**
* @brief Threshold for when to switch to insertion sort.
*
* This is a trade-off between the complexity of insertion sort and the
* overhead of merge sort. The threshold is chosen to be the smallest value
* that gives a measurable speedup for insertion sort over merge sort. This is
* based on measurements done in sort_bench.cc. Starting from 32 elements,
* merge sort is faster than insertion sort in all our tests (both unsorted
* and mostly-sorted).
*
* Toxcore has a lot of small arrays it wants to sort, so this optimisation
* makes sense.
*/
#define SMALL_ARRAY_THRESHOLD 16
non_null()
static void merge_sort_merge_back(
void *arr,
const void *l_arr, uint32_t l_arr_size,
const void *r_arr, uint32_t r_arr_size,
uint32_t left_start,
const void *object, const Sort_Funcs *funcs)
{
uint32_t li = 0;
uint32_t ri = 0;
uint32_t k = left_start;
while (li < l_arr_size && ri < r_arr_size) {
const void *l = funcs->get_callback(l_arr, li);
const void *r = funcs->get_callback(r_arr, ri);
// !(r < l) <=> (r >= l) <=> (l <= r)
if (!funcs->less_callback(object, r, l)) {
funcs->set_callback(arr, k, l);
++li;
} else {
funcs->set_callback(arr, k, r);
++ri;
}
++k;
}
/* Copy the remaining elements of `l_arr[]`, if there are any. */
while (li < l_arr_size) {
funcs->set_callback(arr, k, funcs->get_callback(l_arr, li));
++li;
++k;
}
/* Copy the remaining elements of `r_arr[]`, if there are any. */
while (ri < r_arr_size) {
funcs->set_callback(arr, k, funcs->get_callback(r_arr, ri));
++ri;
++k;
}
}
/** Function to merge the two haves `arr[left_start..mid]` and `arr[mid+1..right_end]` of array `arr[]`. */
non_null()
static void merge_sort_merge(
void *arr, uint32_t left_start, uint32_t mid, uint32_t right_end, void *tmp,
const void *object, const Sort_Funcs *funcs)
{
const uint32_t l_arr_size = mid - left_start + 1;
const uint32_t r_arr_size = right_end - mid;
/* Temporary arrays, using the tmp buffer created in `merge_sort` below. */
void *l_arr = funcs->subarr_callback(tmp, 0, l_arr_size);
void *r_arr = funcs->subarr_callback(tmp, l_arr_size, r_arr_size);
/* Copy data to temp arrays `l_arr[]` and `r_arr[]`.
*
* This is iterating and repeatedly calling `get` and `set`, which sounds
* slow, but is only marginally slower than having a `copy` callback. With
* a `copy` callback, we'd save 3-4% in time.
*/
for (uint32_t i = 0; i < l_arr_size; ++i) {
funcs->set_callback(l_arr, i, funcs->get_callback(arr, left_start + i));
}
for (uint32_t i = 0; i < r_arr_size; ++i) {
funcs->set_callback(r_arr, i, funcs->get_callback(arr, mid + 1 + i));
}
/* Merge the temp arrays back into `arr[left_start..right_end]`. */
merge_sort_merge_back(arr, l_arr, l_arr_size, r_arr, r_arr_size, left_start, object, funcs);
}
non_null()
static void insertion_sort_step(void *arr, void *tmp, uint32_t i, const void *object, const Sort_Funcs *funcs)
{
funcs->set_callback(tmp, 0, funcs->get_callback(arr, i));
uint32_t j = i;
while (j > 0) {
if (!funcs->less_callback(object, tmp, funcs->get_callback(arr, j - 1))) {
break;
}
funcs->set_callback(arr, j, funcs->get_callback(arr, j - 1));
--j;
}
funcs->set_callback(arr, j, tmp);
}
non_null()
static void insertion_sort_with_buf(void *arr, uint32_t arr_size, void *tmp, uint32_t tmp_size, const void *object, const Sort_Funcs *funcs)
{
for (uint32_t i = 1; i < arr_size; ++i) {
insertion_sort_step(arr, tmp, i, object, funcs);
}
}
non_null()
static bool insertion_sort(void *arr, uint32_t arr_size, const void *object, const Sort_Funcs *funcs)
{
void *tmp = funcs->alloc_callback(object, 1);
if (tmp == nullptr) {
return false;
}
insertion_sort_with_buf(arr, arr_size, tmp, 1, object, funcs);
funcs->delete_callback(object, tmp, 1);
return true;
}
void merge_sort_with_buf(void *arr, uint32_t arr_size, void *tmp, uint32_t tmp_size, const void *object, const Sort_Funcs *funcs)
{
assert(tmp_size >= arr_size);
if (arr_size <= SMALL_ARRAY_THRESHOLD) {
assert(tmp_size >= 1);
insertion_sort_with_buf(arr, arr_size, tmp, tmp_size, object, funcs);
return;
}
// Merge subarrays in bottom up manner. First merge subarrays of
// size 1 to create sorted subarrays of size 2, then merge subarrays
// of size 2 to create sorted subarrays of size 4, and so on.
for (uint32_t curr_size = 1; curr_size <= arr_size - 1; curr_size = 2 * curr_size) {
// Pick starting point of different subarrays of current size
for (uint32_t left_start = 0; left_start < arr_size - 1; left_start += 2 * curr_size) {
// Find ending point of left subarray. mid+1 is starting
// point of right
const uint32_t mid = min_u32(left_start + curr_size - 1, arr_size - 1);
const uint32_t right_end = min_u32(left_start + 2 * curr_size - 1, arr_size - 1);
// Merge Subarrays arr[left_start...mid] & arr[mid+1...right_end]
merge_sort_merge(arr, left_start, mid, right_end, tmp, object, funcs);
}
}
}
bool merge_sort(void *arr, uint32_t arr_size, const void *object, const Sort_Funcs *funcs)
{
if (arr_size <= SMALL_ARRAY_THRESHOLD) {
return insertion_sort(arr, arr_size, object, funcs);
}
void *tmp = funcs->alloc_callback(object, arr_size);
if (tmp == nullptr) {
return false;
}
merge_sort_with_buf(arr, arr_size, tmp, arr_size, object, funcs);
funcs->delete_callback(object, tmp, arr_size);
return true;
}
|