L
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
volatile size_t initial_seed = 123;
size_t total_hash = 23;
// A structure for the future rating system.
typedef struct score
{
size_t index;
size_t index_2;
double seconds;
double total_seconds;
size_t total_points;
char *name;
void (*func)(size_t, size_t *);
} SCORE;
size_t rand1(size_t seed, size_t max)
{
size_t a = 16807;
return (a * seed) % max;
}
size_t my_rand(size_t max)
{
size_t r = rand1(initial_seed, max);
initial_seed = rand1(initial_seed, -1);
return r;
}
void rfill(size_t n, size_t m, size_t a[m][n])
{
for (size_t i = 0; i < m; i++)
{
for (size_t j = 0; j < n; j++)
{
a[i][j] = my_rand(51);
}
}
}
double average(size_t n, size_t a[])
{
// The average should be around 25.
double average = 0;
for (size_t i = 0; i < n; i++)
{
average += (double)a[i] / (double)n;
}
return average;
}
// Fast, combining hash function with few collisions and h(a,b) often not equal to h(b,a)...
size_t my_hash(size_t a, size_t b)
{
return a * 31 + b;
}
void add_to_total_hash(size_t n, size_t m, size_t a[m][n])
{
for (size_t i = 0; i < m; i++)
{
for (size_t j = 0; j < n; j++)
{
total_hash = my_hash(total_hash, a[i][j]);
}
}
}
void print(size_t n, size_t m, size_t a[m][n])
{
add_to_total_hash(n, m, a);
for (size_t i = 0; i < n; i++)
{
printf("%zu ", a[0][i]);
}
printf("\na: %f\n...\n", average(n, a[0]));
// ...
for (size_t i = 0; i < n; i++)
{
printf("%zu ", a[m - 1][i]);
}
printf("\na: %f\nh: %zu\n\n", average(n, a[m - 1]), total_hash);
}
// Comparison function for sort_a
int compare(const void *a, const void *b)
{
return (*(size_t *)a - *(size_t *)b);
}
// Comparison function for score seconds
int compare_score_seconds(const void *a, const void *b)
{
double a_dbl = ((SCORE *)a)->seconds;
double b_dbl = ((SCORE *)b)->seconds;
return a_dbl < b_dbl ? -1 : a_dbl > b_dbl ? +1
: 0;
}
// Comparison function for score total points
int compare_score_total_points(const void *a, const void *b)
{
int a_int = ((SCORE *)a)->total_points;
int b_int = ((SCORE *)b)->total_points;
return b_int - a_int;
}
// Quicksort
void sort_a_quick(size_t n, size_t arr[])
{
qsort(arr, n, sizeof(size_t), compare);
}
// Bubble sort
void sort_b_bubble(size_t n, size_t arr[])
{
size_t i = 0, j = 1, a, b;
while (j < n)
{
a = arr[i];
b = arr[j];
if (a > b)
{
arr[i] = b;
arr[j] = a;
i = 0;
j = 1;
continue;
}
i++;
j++;
}
}
// Selection sort
void sort_c_selection(size_t n, size_t arr[])
{
for (size_t i = 0; i < n - 1; i++)
{
size_t a = i;
size_t b = arr[i];
for (size_t j = i + 1; j < n; j++)
{
if (arr[j] < b)
{
a = j;
b = arr[j];
}
}
arr[a] = arr[i];
arr[i] = b;
}
}
// Insertion sort
void sort_d_insertion(size_t n, size_t arr[])
{
size_t i = 0, j = 1, k, l, a, b;
while (j < n)
{
a = arr[i];
b = arr[j];
if (a > b)
{
k = i;
l = j;
do
{
arr[k] = b;
arr[l] = a;
if (k == 0)
{
break;
}
k--;
l--;
a = arr[k];
b = arr[l];
} while (a > b);
}
i++;
j++;
}
}
#pragma GCC push_options
#pragma GCC optimize("O0")
void sort_e_insertion_optimized(size_t n, size_t arr[])
{
asm volatile(
" movq %%rdi, -56(%%rbp) \n"
" movq %%rsi, -64(%%rbp) \n"
" movq $0, -8(%%rbp) \n"
" subq $1, -8(%%rbp) \n"
" movq $0, -16(%%rbp) \n"
"1: \n"
" addq $1, -16(%%rbp) \n"
" movq -16(%%rbp), %%rax \n"
" cmpq -56(%%rbp), %%rax \n"
" jnb 3f \n"
" addq $1, -8(%%rbp) \n"
" movq -8(%%rbp), %%rax \n"
" leaq 0(,%%rax,8), %%rdx \n"
" movq -64(%%rbp), %%rax \n"
" addq %%rdx, %%rax \n"
" movq (%%rax), %%rax \n"
" movq %%rax, -40(%%rbp) \n"
" movq -16(%%rbp), %%rax \n"
" leaq 0(,%%rax,8), %%rdx \n"
" movq -64(%%rbp), %%rax \n"
" addq %%rdx, %%rax \n"
" movq (%%rax), %%rax \n"
" movq %%rax, -48(%%rbp) \n"
" movq -40(%%rbp), %%rax \n"
" cmpq %%rax, -48(%%rbp) \n"
" jnb 1b \n"
" movq -8(%%rbp), %%rax \n"
" movq %%rax, -24(%%rbp) \n"
" movq -16(%%rbp), %%rax \n"
" movq %%rax, -32(%%rbp) \n"
"2: \n"
" movq -24(%%rbp), %%rax \n"
" leaq 0(,%%rax,8), %%rdx \n"
" movq -64(%%rbp), %%rax \n"
" addq %%rax, %%rdx \n"
" movq -48(%%rbp), %%rax \n"
" movq %%rax, (%%rdx) \n"
" movq -32(%%rbp), %%rax \n"
" leaq 0(,%%rax,8), %%rdx \n"
" movq -64(%%rbp), %%rax \n"
" addq %%rax, %%rdx \n"
" movq -40(%%rbp), %%rax \n"
" movq %%rax, (%%rdx) \n"
" cmpq $0, -24(%%rbp) \n"
" je 1b \n"
" subq $1, -24(%%rbp) \n"
" subq $1, -32(%%rbp) \n"
" movq -24(%%rbp), %%rax \n"
" leaq 0(,%%rax,8), %%rdx \n"
" movq -64(%%rbp), %%rax \n"
" addq %%rdx, %%rax \n"
" movq (%%rax), %%rax \n"
" movq %%rax, -40(%%rbp) \n"
" movq -32(%%rbp), %%rax \n"
" leaq 0(,%%rax,8), %%rdx \n"
" movq -64(%%rbp), %%rax \n"
" addq %%rdx, %%rax \n"
" movq (%%rax), %%rax \n"
" movq %%rax, -48(%%rbp) \n"
" movq -40(%%rbp), %%rax \n"
" cmpq %%rax, -48(%%rbp) \n"
" jnb 1b \n"
" jmp 2b \n"
"3: \n"
: /* No outputs. */
: /* No inputs. */
: "memory");
}
#pragma GCC pop_options
double profile_sort_func(void (*f)(size_t, size_t *), size_t n, size_t m, size_t arr[m][n])
{
rfill(n, m, arr);
print(n, m, arr);
clock_t c1 = clock();
for (size_t i = 0; i < m; i++)
{
(*f)(n, arr[i]);
}
clock_t c2 = clock();
print(n, m, arr);
return (double)(c2 - c1) / CLOCKS_PER_SEC;
}
size_t scores_len = 5;
SCORE scores[] = {
{0, 0, 0, 0, 0, "q-sort ", sort_a_quick},
{1, 1, 0, 0, 0, "bubble ", sort_b_bubble},
{2, 2, 0, 0, 0, "select ", sort_c_selection},
{3, 3, 0, 0, 0, "insert ", sort_d_insertion},
{4, 4, 0, 0, 0, "insert_asm", sort_e_insertion_optimized},
};
size_t get_sort_winner(size_t n, size_t m)
{
// Prepare the array for testing
size_t arr[m][n];
for (size_t i = 0; i < scores_len; i++)
{
scores[i].seconds = profile_sort_func(scores[i].func, n, m, arr);
scores[i].total_seconds += scores[i].seconds;
}
// Copy the results so that you can sort them
SCORE scores_cpy[scores_len];
for (size_t i = 0; i < scores_len; i++)
{
scores_cpy[i] = scores[i];
}
// Sort by current duration
qsort(scores_cpy, scores_len, sizeof(SCORE), compare_score_seconds);
double factor = 100 / scores_cpy[scores_len - 1].seconds;
size_t winner = scores_cpy[0].index;
for (size_t i = 0; i < scores_len; i++)
{
scores[scores_cpy[i].index].index_2 = i;
}
for (size_t i = 0; i < scores_len; i++)
{
scores[i].total_points += scores_len - scores[i].index_2;
}
// Sort by total points
qsort(scores_cpy, scores_len, sizeof(SCORE), compare_score_total_points);
for (size_t i = 0; i < scores_len; i++)
{
scores[scores_cpy[i].index].index_2 = i;
}
for (size_t i = 0; i < scores_len; i++)
{
printf("%s: (%zu %zu.) %fs %f%% %fts %zu total_points\n", scores[i].name, scores[i].index, scores[i].index_2 + 1, scores[i].seconds, scores[i].seconds * factor, scores[i].total_seconds, scores[i].total_points);
}
return winner;
}
// Compile with "gcc -Wall -O0 main.c -o main" or with "gcc -Wall -O3 main.c -o main"
int main(int argc, char const *argv[])
{
size_t n = 10;
size_t m = 10000;
// Play until the current winner is equal to qsort
while (get_sort_winner(n++, m))
;
printf("n_max=%zu\n", n - 1);
}