L
@wob
So besser?
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
volatile size_t initial_seed = 123;
size_t total_hash = 23;
size_t rand1(size_t seed, size_t max)
{
size_t a = 16807;
return (a * seed) % max;
}
size_t my_rand(size_t max)
{
size_t r = rand1(initial_seed, max);
initial_seed = rand1(initial_seed, -1);
return r;
}
void rfill(size_t n, size_t m, size_t a[m][n])
{
for (size_t i = 0; i < m; i++)
{
for (size_t j = 0; j < n; j++)
{
a[i][j] = my_rand(51);
}
}
}
double average(size_t n, size_t a[])
{
// The average should be around 25.
double average = 0;
for (size_t i = 0; i < n; i++)
{
average += (double)a[i] / (double)n;
}
return average;
}
// Fast, combining hash function with few collisions and h(a,b) often not equal to h(b,a)...
size_t my_hash(size_t a, size_t b)
{
return a * 31 + b;
}
void add_to_total_hash(size_t n, size_t m, size_t a[m][n])
{
for (size_t i = 0; i < m; i++)
{
for (size_t j = 0; j < n; j++)
{
total_hash = my_hash(total_hash, a[i][j]);
}
}
}
void print(size_t n, size_t m, size_t a[m][n])
{
add_to_total_hash(n, m, a);
for (size_t i = 0; i < n; i++)
{
printf("%zu ", a[0][i]);
}
printf("\na: %f\n...\n", average(n, a[0]));
// ...
for (size_t i = 0; i < n; i++)
{
printf("%zu ", a[m - 1][i]);
}
printf("\na: %f\nh: %zu\n\n", average(n, a[m - 1]), total_hash);
}
// Comparison function for sort_a
int compare(const void *a, const void *b)
{
return (*(size_t *)a - *(size_t *)b);
}
// Quicksort?
void sort_a(size_t n, size_t arr[])
{
qsort(arr, n, sizeof(size_t), compare);
}
// Bubble sort
void sort_b(size_t n, size_t arr[])
{
size_t i = 0, j = 1, a, b;
while (j < n)
{
a = arr[i];
b = arr[j];
if (a > b)
{
arr[i] = b;
arr[j] = a;
i = 0;
j = 1;
continue;
}
i++;
j++;
}
}
// Selection sort
void sort_c(size_t n, size_t arr[])
{
for (size_t i = 0; i < n - 1; i++)
{
size_t a = i;
size_t b = arr[i];
for (size_t j = i + 1; j < n; j++)
{
if (arr[j] < b)
{
a = j;
b = arr[j];
}
}
arr[a] = arr[i];
arr[i] = b;
}
}
// Insertion sort
void sort_d(size_t n, size_t arr[])
{
size_t i = 0, j = 1, k, l, a, b;
while (j < n)
{
a = arr[i];
b = arr[j];
if (a > b)
{
k = i;
l = j;
do
{
arr[k] = b;
arr[l] = a;
if (k == 0)
{
break;
}
k--;
l--;
a = arr[k];
b = arr[l];
} while (a > b);
}
i++;
j++;
}
}
#pragma GCC push_options
#pragma GCC optimize("O0")
void sort_optimized(size_t n, size_t arr[])
{
asm volatile(
" movq %%rdi, -40(%%rbp) \n"
" movq %%rsi, -48(%%rbp) \n"
" movq $0, -8(%%rbp) \n"
" movq $1, -16(%%rbp) \n"
" jmp 3f \n"
"1: \n"
" movq -8(%%rbp), %%rax \n"
" leaq 0(,%%rax,8), %%rdx \n"
" movq -48(%%rbp), %%rax \n"
" addq %%rdx, %%rax \n"
" movq (%%rax), %%rax \n"
" movq %%rax, -24(%%rbp) \n"
" movq -16(%%rbp), %%rax \n"
" leaq 0(,%%rax,8), %%rdx \n"
" movq -48(%%rbp), %%rax \n"
" addq %%rdx, %%rax \n"
" movq (%%rax), %%rax \n"
" movq %%rax, -32(%%rbp) \n"
" movq -24(%%rbp), %%rax \n"
" cmpq %%rax, -32(%%rbp) \n"
" jnb 2f \n"
" movq -8(%%rbp), %%rax \n"
" leaq 0(,%%rax,8), %%rdx \n"
" movq -48(%%rbp), %%rax \n"
" addq %%rax, %%rdx \n"
" movq -32(%%rbp), %%rax \n"
" movq %%rax, (%%rdx) \n"
" movq -16(%%rbp), %%rax \n"
" leaq 0(,%%rax,8), %%rdx \n"
" movq -48(%%rbp), %%rax \n"
" addq %%rax, %%rdx \n"
" movq -24(%%rbp), %%rax \n"
" movq %%rax, (%%rdx) \n"
" movq $0, -8(%%rbp) \n"
" movq $1, -16(%%rbp) \n"
" jmp 3f \n"
"2: \n"
" addq $1, -8(%%rbp) \n"
" addq $1, -16(%%rbp) \n"
"3: \n"
" movq -16(%%rbp), %%rax \n"
" cmpq -40(%%rbp), %%rax \n"
" jb 1b \n"
: /* No outputs. */
: /* No inputs. */
: "memory");
}
#pragma GCC pop_options
double profile_sort_func(void (*f)(size_t, size_t *), size_t n, size_t m, size_t arr[m][n])
{
rfill(n, m, arr);
print(n, m, arr);
clock_t c1 = clock();
for (size_t i = 0; i < m; i++)
{
(*f)(n, arr[i]);
}
clock_t c2 = clock();
print(n, m, arr);
return (double)(c2 - c1) / CLOCKS_PER_SEC;
}
int main(int argc, char const *argv[])
{
size_t n = 10;
size_t m = 100000;
size_t arr[m][n];
double secs[5] = {
profile_sort_func(sort_a, n, m, arr),
profile_sort_func(sort_b, n, m, arr),
profile_sort_func(sort_c, n, m, arr),
profile_sort_func(sort_d, n, m, arr),
profile_sort_func(sort_optimized, n, m, arr),
};
double max = 0;
for (size_t i = 0; i < 5; i++)
{
if (secs[i] > max)
{
max = secs[i];
}
}
printf("%fs %f%%\n%fs %f%%\n%fs %f%%\n%fs %f%%\n%fs %f%%\n",
secs[0], secs[0] * 100 / max,
secs[1], secs[1] * 100 / max,
secs[2], secs[2] * 100 / max,
secs[3], secs[3] * 100 / max,
secs[4], secs[4] * 100 / max);
return 0;
}
gcc -Wall main.c -o main
./main
27 42 3 33 40 14 19 12 36 43
a: 26.900000
...
24 36 36 26 49 36 47 1 48 40
a: 34.300000
h: 5939366454565110528
3 12 14 19 27 33 36 40 42 43
a: 26.900000
...
1 24 26 36 36 36 40 47 48 49
a: 34.300000
h: 4429705970288659611
33 44 43 0 24 15 46 36 2 11
a: 25.400000
...
28 20 39 12 2 25 18 43 16 50
a: 25.300000
h: 6352222085557659981
0 2 11 15 24 33 36 43 44 46
a: 25.400000
...
2 12 16 18 20 25 28 39 43 50
a: 25.300000
h: 4505934733348363633
9 39 48 42 46 11 50 24 15 37
a: 32.100000
...
1 23 45 46 29 44 24 3 7 40
a: 26.200000
h: 4225079418598471750
9 11 15 24 37 39 42 46 48 50
a: 32.100000
...
1 3 7 23 24 29 40 44 45 46
a: 26.200000
h: 1630903755648978609
3 26 21 15 26 27 35 13 32 43
a: 24.100000
...
45 8 6 21 47 31 28 6 45 24
a: 26.100000
h: 4076123578238199228
3 13 15 21 26 26 27 32 35 43
a: 24.100000
...
6 6 8 21 24 28 31 45 45 47
a: 26.100000
h: 10126057256555243605
13 30 14 4 11 21 10 24 4 19
a: 15.000000
...
32 12 45 33 31 9 41 32 21 46
a: 30.200000
h: 14143251785556203417
4 4 10 11 13 14 19 21 24 30
a: 15.000000
...
9 12 21 31 32 32 33 41 45 46
a: 30.200000
h: 10117127439067026547
0.019759s 66.492798%
0.029716s 100.000000%
0.019451s 65.456320%
0.014100s 47.449186%
0.028555s 96.093014%
... Ich habe noch nicht angefangen, das Assembly zu optimieren.