L
Meine Zeitrechnung für den Benchmark war ja komplett falsch... (Wall time vs. CPU time...) Wieso sagt keiner was?
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
size_t initial_seed = 123;
size_t n = 100;
size_t rand1(size_t seed, size_t max)
{
size_t a = 16807;
return (a * seed) % max;
}
size_t my_rand(size_t max)
{
size_t r = rand1(initial_seed, max);
initial_seed = rand1(initial_seed, -1);
return r;
}
void fill(size_t m, size_t a[m][n])
{
for (size_t i = 0; i < m; i++)
{
for (size_t j = 0; j < n; j++)
{
a[i][j] = my_rand(51);
}
}
}
double average(size_t a[])
{
// The average should be around 25.
double average = 0;
for (size_t i = 0; i < n; i++)
{
average += (double)a[i] / (double)n;
}
return average;
}
void print(size_t m, size_t a[m][n])
{
for (size_t i = 0; i < n; i++)
{
printf("%zu ", a[0][i]);
}
printf("\na: %f\n...\n", average(a[0]));
// ...
for (size_t i = 0; i < n; i++)
{
printf("%zu ", a[m - 1][i]);
}
printf("\na: %f\n\n---\n\n", average(a[m - 1]));
}
// Comparison function for sort_a
int compare(const void *a, const void *b)
{
return (*(size_t *)a - *(size_t *)b);
}
void sort_a(size_t arr[])
{
qsort(arr, n, sizeof(size_t), compare);
}
#pragma GCC push_options
#pragma GCC optimize("O0")
void sort_b_optimized(size_t arr[])
{
asm volatile(
" movq %%rdi, -40(%%rbp) \n"
" movq $0, -8(%%rbp) \n"
" movq $1, -16(%%rbp) \n"
"1: \n"
" movq -8(%%rbp), %%rax \n"
" leaq 0(,%%rax,8), %%rdx \n"
" movq -40(%%rbp), %%rax \n"
" addq %%rdx, %%rax \n"
" movq (%%rax), %%rax \n"
" movq %%rax, -24(%%rbp) \n"
" movq -16(%%rbp), %%rax \n"
" leaq 0(,%%rax,8), %%rdx \n"
" movq -40(%%rbp), %%rax \n"
" addq %%rdx, %%rax \n"
" movq (%%rax), %%rax \n"
" movq %%rax, -32(%%rbp) \n"
" movq -24(%%rbp), %%rax \n"
" cmpq %%rax, -32(%%rbp) \n"
" jnb 2f \n"
" movq -8(%%rbp), %%rax \n"
" leaq 0(,%%rax,8), %%rdx \n"
" movq -40(%%rbp), %%rax \n"
" addq %%rax, %%rdx \n"
" movq -32(%%rbp), %%rax \n"
" movq %%rax, (%%rdx) \n"
" movq -16(%%rbp), %%rax \n"
" leaq 0(,%%rax,8), %%rdx \n"
" movq -40(%%rbp), %%rax \n"
" addq %%rax, %%rdx \n"
" movq -24(%%rbp), %%rax \n"
" movq %%rax, (%%rdx) \n"
" movq $0, -8(%%rbp) \n"
" movq $1, -16(%%rbp) \n"
" jmp 3f \n"
"2: \n"
" addq $1, -8(%%rbp) \n"
" addq $1, -16(%%rbp) \n"
"3: \n"
" movq n(%%rip), %%rax \n"
" cmpq %%rax, -16(%%rbp) \n"
" jb 1b \n"
: /* No outputs. */
: /* No inputs. */
: "memory");
}
#pragma GCC pop_options
void sort_c(size_t arr[])
{
for (size_t i = 0; i < n - 1; i++)
{
size_t a = i;
size_t b = arr[i];
for (size_t j = i + 1; j < n; j++)
{
if (arr[j] < b)
{
a = j;
b = arr[j];
}
}
arr[a] = arr[i];
arr[i] = b;
}
}
void set_time(clock_t *t_old, clock_t *t, clock_t *max_dif)
{
*t = clock();
clock_t dif = *t - *t_old;
if (dif > *max_dif)
{
*max_dif = dif;
}
}
int main(int argc, char const *argv[])
{
clock_t t1, t2, t3, t4, t5, t6, max_dif = 0;
size_t iterations = 10000;
size_t arr[iterations][n];
fill(iterations, arr);
print(iterations, arr);
set_time(&t1, &t1, &max_dif);
for (size_t i = 0; i < iterations; i++)
{
sort_a(arr[i]);
}
set_time(&t1, &t2, &max_dif);
print(iterations, arr);
fill(iterations, arr);
print(iterations, arr);
set_time(&t3, &t3, &max_dif);
for (size_t i = 0; i < iterations; i++)
{
sort_b_optimized(arr[i]);
}
set_time(&t3, &t4, &max_dif);
print(iterations, arr);
fill(iterations, arr);
print(iterations, arr);
set_time(&t5, &t5, &max_dif);
for (size_t i = 0; i < iterations; i++)
{
sort_c(arr[i]);
}
set_time(&t5, &t6, &max_dif);
print(iterations, arr);
double t1_spent, t2_spent, t3_spent, max_spent;
t1_spent = (double)(t2 - t1) / CLOCKS_PER_SEC;
t2_spent = (double)(t4 - t3) / CLOCKS_PER_SEC;
t3_spent = (double)(t6 - t5) / CLOCKS_PER_SEC;
max_spent = (double)(max_dif) / CLOCKS_PER_SEC;
printf("%f s\n%f s\n%f s\n", t1_spent, t2_spent, t3_spent);
printf("%f %%\n%f %%\n%f %%\n", t1_spent * 100 / max_spent, t2_spent * 100 / max_spent, t3_spent * 100 / max_spent);
return 0;
}
Leider bin ich jetzt am langsamsten. Kein Bock mehr.
Edit: Ok, es liegt daran, dass alle bis auf zwei sort_c-Schleifen wegoptimiert werden. Der Compiler erkennt, dass zum Schluss nur noch das erste und letzte Array ausgegeben werden muss - und schneidet die Äste dann ab. So macht ein Vergleich keinen Sinn mehr.