|
@@ -1,3 +1,4 @@
|
|
|
|
+#include <stdbool.h>
|
|
#include <stdint.h>
|
|
#include <stdint.h>
|
|
#include <limits.h>
|
|
#include <limits.h>
|
|
#include <strings.h>
|
|
#include <strings.h>
|
|
@@ -12,9 +13,12 @@ uint32_t lookup[256];
|
|
uint32_t offset[256];
|
|
uint32_t offset[256];
|
|
|
|
|
|
static const uint64_t topmask = ~(ULLONG_MAX >> halfstrip_size);
|
|
static const uint64_t topmask = ~(ULLONG_MAX >> halfstrip_size);
|
|
-static const uint64_t topbigmask = ~(ULLONG_MAX >> strip_size);
|
|
|
|
|
|
+//static const uint64_t topbigmask = ~(ULLONG_MAX >> strip_size);
|
|
static const uint64_t bottommask = (0x01 << halfstrip_size) -1;
|
|
static const uint64_t bottommask = (0x01 << halfstrip_size) -1;
|
|
|
|
|
|
|
|
+static const uint64_t failuremask = ~(ULLONG_MAX >> FAILURE);
|
|
|
|
+#define distinguished_limb 0x8000000000000000
|
|
|
|
+#define clz(x) __builtin_clzll(x)
|
|
|
|
|
|
static inline
|
|
static inline
|
|
void add_1(uint64_t *b, const size_t start, const size_t len, uint64_t a)
|
|
void add_1(uint64_t *b, const size_t start, const size_t len, uint64_t a)
|
|
@@ -30,6 +34,9 @@ void add_1(uint64_t *b, const size_t start, const size_t len, uint64_t a)
|
|
*/
|
|
*/
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+
|
|
|
|
+#define zdistinguished(x) (((x)[head] & topbigmask)) == 0
|
|
|
|
+
|
|
uint32_t __attribute__((optimize("unroll-loops"))) convert(uint64_t * nn)
|
|
uint32_t __attribute__((optimize("unroll-loops"))) convert(uint64_t * nn)
|
|
{
|
|
{
|
|
uint32_t steps;
|
|
uint32_t steps;
|
|
@@ -37,40 +44,38 @@ uint32_t __attribute__((optimize("unroll-loops"))) convert(uint64_t * nn)
|
|
#define next_head ((head + 23) % 24)
|
|
#define next_head ((head + 23) % 24)
|
|
#define tail ((head + 1) % 24)
|
|
#define tail ((head + 1) % 24)
|
|
#define next_tail ((head + 2) % 24)
|
|
#define next_tail ((head + 2) % 24)
|
|
-
|
|
|
|
-#define distinguished(x) (((x)[head] & topbigmask)) == 0
|
|
|
|
|
|
+#define x (nn[head])
|
|
|
|
+#define y (nn[next_head])
|
|
|
|
|
|
/** Check the most significant block */
|
|
/** Check the most significant block */
|
|
- const uint64_t x = nn[head];
|
|
|
|
- for (uint32_t w2 = halfstrip_size; w2 < 64-halfstrip_size; w2 += halfstrip_size) {
|
|
|
|
|
|
+ for (uint32_t w2 = clz(x) + halfstrip_size - (clz(x) % halfstrip_size);
|
|
|
|
+ w2 < 64 - halfstrip_size;
|
|
|
|
+ w2 += halfstrip_size) {
|
|
if (!(x & (topmask >> w2))) {
|
|
if (!(x & (topmask >> w2))) {
|
|
const size_t previous = (x >> (64 - halfstrip_size - w2 + halfstrip_size)) & bottommask;
|
|
const size_t previous = (x >> (64 - halfstrip_size - w2 + halfstrip_size)) & bottommask;
|
|
- const uint32_t next = (x >> (64 - halfstrip_size - w2 - halfstrip_size)) & bottommask;
|
|
|
|
- if (next <= lookup[previous]) return w2 - offset[previous];
|
|
|
|
|
|
+ const uint32_t next = (x >> (64 - halfstrip_size - w2 - halfstrip_size)) & bottommask;
|
|
|
|
+ if (next <= lookup[previous]) return w2 - offset[previous] - 1;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
- for (steps = 64; !distinguished(nn); steps += 64) {
|
|
|
|
- const uint64_t x = nn[head];
|
|
|
|
- const uint64_t y = nn[next_head];
|
|
|
|
-
|
|
|
|
|
|
+ for (steps = 64; true; steps += 64) {
|
|
if (!(x & bottommask)) {
|
|
if (!(x & bottommask)) {
|
|
const size_t previous = (x >> halfstrip_size) & bottommask;
|
|
const size_t previous = (x >> halfstrip_size) & bottommask;
|
|
- const uint32_t next = y >> (64 - halfstrip_size);
|
|
|
|
- if (next <= lookup[previous]) return steps - halfstrip_size - offset[previous];
|
|
|
|
|
|
+ const uint32_t next = y >> (64 - halfstrip_size);
|
|
|
|
+ if (next <= lookup[previous]) return steps - halfstrip_size - offset[previous] - 1;
|
|
}
|
|
}
|
|
|
|
|
|
if (!(y & topmask)) {
|
|
if (!(y & topmask)) {
|
|
- const size_t previous = x & bottommask;
|
|
|
|
- const uint32_t next = (y >> (64 - 2*halfstrip_size)) & bottommask;
|
|
|
|
- if (next <= lookup[previous]) return steps - offset[previous];
|
|
|
|
|
|
+ const size_t previous = x & bottommask;
|
|
|
|
+ const uint32_t next = (y >> (64 - 2*halfstrip_size)) & bottommask;
|
|
|
|
+ if (next <= lookup[previous]) return steps - offset[previous] - 1;
|
|
}
|
|
}
|
|
|
|
|
|
for (uint32_t w2 = halfstrip_size; w2 < 64-halfstrip_size; w2 += halfstrip_size) {
|
|
for (uint32_t w2 = halfstrip_size; w2 < 64-halfstrip_size; w2 += halfstrip_size) {
|
|
if (!(y & (topmask >> w2))) {
|
|
if (!(y & (topmask >> w2))) {
|
|
const size_t previous = (y >> (64 - halfstrip_size - w2 + halfstrip_size)) & bottommask;
|
|
const size_t previous = (y >> (64 - halfstrip_size - w2 + halfstrip_size)) & bottommask;
|
|
- const uint32_t next = (y >> (64 - halfstrip_size - w2 - halfstrip_size)) & bottommask;
|
|
|
|
- if (next <= lookup[previous]) return steps + w2 - offset[previous];
|
|
|
|
|
|
+ const uint32_t next = (y >> (64 - halfstrip_size - w2 - halfstrip_size)) & bottommask;
|
|
|
|
+ if (next <= lookup[previous]) return steps + w2 - offset[previous] - 1;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
@@ -83,27 +88,23 @@ uint32_t __attribute__((optimize("unroll-loops"))) convert(uint64_t * nn)
|
|
head = next_head;
|
|
head = next_head;
|
|
nn[tail] = al;
|
|
nn[tail] = al;
|
|
add_1(nn, next_tail, 24, ah);
|
|
add_1(nn, next_tail, 24, ah);
|
|
-
|
|
|
|
}
|
|
}
|
|
- return steps;
|
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+bool distinguished(mpz_t n)
|
|
|
|
+{
|
|
|
|
+ return (n->_mp_d[23] & failuremask) == distinguished_limb;
|
|
|
|
+}
|
|
|
|
|
|
uint32_t naif_convert(mpz_t n)
|
|
uint32_t naif_convert(mpz_t n)
|
|
{
|
|
{
|
|
- uint32_t i;
|
|
|
|
- mpz_t t;
|
|
|
|
- mpz_init_set_ui(t, 1);
|
|
|
|
- mpz_mul_2exp(t, t, 1536-strip_size);
|
|
|
|
-
|
|
|
|
|
|
+ uint32_t steps;
|
|
|
|
|
|
- for (i = 0; mpz_cmp(n, t) > -1; i++) {
|
|
|
|
- mpz_mul_ui(n, n, 2);
|
|
|
|
- mpz_mod(n, n, p);
|
|
|
|
|
|
+ for (steps = 0; !distinguished(n); steps++) {
|
|
|
|
+ mpz_mul_ui_modp(n, n, 2);
|
|
}
|
|
}
|
|
|
|
|
|
- mpz_clear(t);
|
|
|
|
- return i;
|
|
|
|
|
|
+ return steps;
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
@@ -111,7 +112,7 @@ void dlog_precompute()
|
|
{
|
|
{
|
|
for (size_t i = 0; i <= bottommask; i++) {
|
|
for (size_t i = 0; i <= bottommask; i++) {
|
|
uint32_t j = ffs(i) ? ffs(i) - 1 : halfstrip_size;
|
|
uint32_t j = ffs(i) ? ffs(i) - 1 : halfstrip_size;
|
|
- lookup[i] = bottommask >> (halfstrip_size-j);
|
|
|
|
|
|
+ lookup[i] = bottommask >> (halfstrip_size - j);
|
|
offset[i] = j;
|
|
offset[i] = j;
|
|
}
|
|
}
|
|
}
|
|
}
|