E
Die bisher genutzte Klasse BigUnsigned ist zwar sehr schön einsehbar und nachvollziehbar, aber sie ist einfach zu langsam für Mersennezahlen.
Daher verwende ich ab sofort das auf GMP basierende MPIR 2.7.0 (mpir.dll wurde von mir mit MS VS 2015 für x64 geschwindigkeits-optimiert erstellt):
Aktueller Code:
#define NOMINMAX // due to conflict with max()
#include <windows.h> // warning: no C++ standard!
#include <mpirxx.h>
#include <iostream>
#include <cstdint>
#include <vector>
#include <cassert>
#include <thread>
#include <mutex>
///#define DETAILS
///#define OUTPUT_OF_PRECHECKS
///#define NEGATIVE_LL
using namespace std;
const uint32_t numberOfThreads = 11;
const uint32_t DIVISOR_PRECHECK_K_MAX = 600;
uint64_t const primeLimit = 200000000;
uint64_t frequency;
mutex mutex0;
mutex mutex1;
mutex mutex2;
mutex mutex3;
void textcolor(unsigned short color = 15)
{
SetConsoleTextAttribute(::GetStdHandle(STD_OUTPUT_HANDLE), color);
}
mpz_class calculateMersenneNumber(uint64_t p)
{
mpz_class x = 2;
for (uint64_t i = 1; i < p; ++i)
{
x <<= 1;
}
return (x - 1);
}
bool LucasLehmerTest(mpz_class m, uint64_t p, uint64_t startCount, uint64_t lastCount)
{
mpz_class s = 4;
for (uint64_t i = 2; i < p; ++i)
{
s = (s*s - 2) % m;
#ifdef DETAILS
QueryPerformanceCounter((LARGE_INTEGER*)&lastCount);
uint64_t delta = lastCount - startCount;
mutex1.lock();
cout << "p = " << p << " i = " << i << " s(len) = " << s.getLength();
cout << "\ttime: " << 1000 * delta / frequency << " ms" << endl;
mutex1.unlock();
#endif
}
return (s == 0);
}
// division test with 2*k*p+1
uint64_t divisionPrecheck(mpz_class m, uint64_t p, uint64_t startCount, uint64_t lastCount)
{
for (uint64_t k = 1; k <= DIVISOR_PRECHECK_K_MAX; ++k)
{
mpz_class Divisor = k * p;
Divisor <<= 1; // 2 * k * p
Divisor += 1; // 2 * k * p +1
#ifdef DETAILS
QueryPerformanceCounter((LARGE_INTEGER*)&lastCount);
uint64_t delta = lastCount - startCount;
mutex2.lock();
cout << "check: p = " << p << " k = " << k;
cout << "\ttime: " << 1000 * delta / frequency << " ms" << endl;
mutex2.unlock();
#endif
if (m % Divisor == 0)
{
return k;
}
}
return 0; // No divisor found
}
bool precheck_mod4_is_3_and_2p_plus_1_isPrime (vector<bool>& primes, uint64_t p)
{
return ((p % 4 == 3) && (primes[2 * p + 1]));
}
void mersennePrimeCheck(vector<bool>& primes, uint64_t startCount, uint64_t lastCount, uint64_t startP, uint64_t limitP)
{
for (uint64_t p = startP; p < limitP; ++p)
{
// primes lookup for p // In order for M(p) to be prime, p must itself be prime.
if (!primes[p])
{
#ifdef DETAILS
mutex0.lock();
textcolor(3);
cout << ">>> p = " << p << " no prime! <<<" << endl;
mutex0.unlock();
#endif
continue;
}
// precheck: P = 3 (mod 4) && isPrime (2*p+1) ?
if (precheck_mod4_is_3_and_2p_plus_1_isPrime (primes, p))
{
#ifdef OUTPUT_OF_PRECHECKS
mutex3.lock();
textcolor(5);
cout << ">>> p = " << p << " p_mod4_is_3_and_2p_plus_1_isPrime! <<<" << endl;
mutex3.unlock();
#endif
continue;
}
// if p is prime, then we calculate the mersenne number
mpz_class m = calculateMersenneNumber(p);
// mersenne numbers with (2*k*p+1) factors are no mersenne primes
if (p > 19)
{
uint64_t divisor = divisionPrecheck(m, p, startCount, lastCount);
if (divisor != 0)
{
#ifdef OUTPUT_OF_PRECHECKS
mutex0.lock();
textcolor(3);
cout << ">>> divisionPrecheck (2*k*p+1) finds k = " << divisor << " for p = " << p << " <<<" << endl;
mutex0.unlock();
#endif
continue;
}
}
if (LucasLehmerTest(m, p, startCount, lastCount))
{
QueryPerformanceCounter((LARGE_INTEGER*)&lastCount);
uint64_t delta = lastCount - startCount;
mutex0.lock();
textcolor(15);
cout << "p: " << p;
textcolor(2);
cout << "\ttime: " << 1000 * delta / frequency << " ms" << endl;
mutex0.unlock();
}
#ifdef NEGATIVE_LL
else
{
cout << ">>> p = " << p << " LucasLehmerTest negative! <<<" << endl;
}
#endif
}
mutex0.lock();
textcolor(11);
cout << "work is done for " << startP << " to " << limitP - 1 << endl;
mutex0.unlock();
}
int main()
{
uint64_t startCount = 0, lastCount = 0, delta = 0;
QueryPerformanceFrequency((LARGE_INTEGER*)&frequency);
cout << "cpu frequency: " << frequency << " Hz" << endl;
cout << "DIVISOR_PRECHECK_K_MAX: " << DIVISOR_PRECHECK_K_MAX << endl;
cout << "number of threads: " << numberOfThreads << endl;
////////////////////////// Generate primes lookup table /////////////////////////////////////////////////////////////
vector<bool> primes(primeLimit + 1); // calculated primes (low values)
primes[0] = primes[1] = false;
for (uint64_t i = 2; i < primeLimit + 1; ++i) { primes[i] = true; }
// Erastothenes sieving loop
uint64_t iMax = (uint64_t)sqrt((double)primeLimit) + 1;
for (uint64_t i = 2; i < iMax; ++i)
{
if (primes[i])
{
for (uint64_t j = 2 * i; j < primeLimit + 1; j += i)
{
primes[j] = false;
}
}
}
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
cout << "Lucas Lehmer prime test for mersenne numbers" << endl; // https://de.wikipedia.org/wiki/Lucas-Lehmer-Test
QueryPerformanceCounter((LARGE_INTEGER*)&startCount);
vector<thread> t;
uint64_t deltaP = 2400;
uint64_t startP = 0; // 57885160;
uint64_t limitP = startP + deltaP;
for (int x = 0; x < numberOfThreads; ++x)
{
t.emplace_back(mersennePrimeCheck, primes, startCount, lastCount, startP + x * deltaP, limitP + x * deltaP);
}
for (int x = 0; x < numberOfThreads; ++x)
{
t[x].join(); // main has to wait for the results of the threads.
}
}
Das Ergebnis ist gegenüber der nun umgehend in den Ruhestand verabschiedeten Klasse BigUnsigned mehr als überzeugend:
cpu frequency: 3127021 Hz
DIVISOR_PRECHECK_K_MAX: 600
number of threads: 11
Lucas Lehmer prime test for mersenne numbers
p: 5 time: 7 ms
p: 7 time: 8 ms
p: 13 time: 8 ms
p: 17 time: 8 ms
p: 19 time: 9 ms
p: 31 time: 9 ms
p: 61 time: 10 ms
p: 89 time: 11 ms
p: 107 time: 12 ms
p: 127 time: 13 ms
p: 521 time: 29 ms
p: 607 time: 35 ms
p: 1279 time: 203 ms
p: 2203 time: 1395 ms
p: 2281 time: 1687 ms
p: 3217 time: 3600 ms
p: 4253 time: 12141 ms
p: 4423 time: 14403 ms
p: 9689 time: 15894 ms
p: 9941 time: 53003 ms
p: 21701 time: 79777 ms
p: 11213 time: 185587 ms
p: 19937 time: 368181 ms
p: 23209 time: 1153250 ms
Man kann MPIR für geschwindigkeitskritische Aufgaben wirklich empfehlen. BigUnsigned macht nur Sinn, wenn man selbst in der Klasse herum werkeln will. das kann man bei GMP vergessen.
Es fehlt noch die rasante verbesserte FT-Multiplikation (für LL Test), oder ist diese in GMP schon eingebaut? Könnte ich mir bei diesen rasanten Zeiten vorstellen. EDIT: Ist bereits eingebaut bei GMP.
Nun können wir uns mit dem PC wieder an die wirklich großen Zahlen heran wagen.
Für den Bereich ab p = 100000 ist das Programm ideal. Im Bereich p = 750000 wird es allerdings ebenfalls zäh (ca. 21 h pro Zahl pro Thread mit dem Lucas-Lehmer-Test). Es wird Zeit, dass der Lucas-Lehmer-Test durch etwas genialeres ersetzt wird, aber GIMPS verwendet ihn ebenfalls.