|
@@ -11,6 +11,11 @@
|
|
uint32_t lookup[256];
|
|
uint32_t lookup[256];
|
|
uint32_t offset[256];
|
|
uint32_t offset[256];
|
|
|
|
|
|
|
|
+static const uint64_t topmask = ~(ULLONG_MAX >> halfstrip_size);
|
|
|
|
+static const uint64_t topbigmask = ~(ULLONG_MAX >> strip_size);
|
|
|
|
+static const uint64_t bottommask = (0x01 << halfstrip_size) -1;
|
|
|
|
+
|
|
|
|
+
|
|
static inline
|
|
static inline
|
|
void add_1(uint64_t *b, const size_t start, const size_t len, uint64_t a)
|
|
void add_1(uint64_t *b, const size_t start, const size_t len, uint64_t a)
|
|
{
|
|
{
|
|
@@ -27,9 +32,6 @@ void add_1(uint64_t *b, const size_t start, const size_t len, uint64_t a)
|
|
|
|
|
|
uint32_t __attribute__((optimize("unroll-loops"))) convert(uint64_t * nn)
|
|
uint32_t __attribute__((optimize("unroll-loops"))) convert(uint64_t * nn)
|
|
{
|
|
{
|
|
- static const uint64_t topmask = ~(ULLONG_MAX >> halfstrip_size);
|
|
|
|
- static const uint64_t topbigmask = ~(ULLONG_MAX >> strip_size);
|
|
|
|
- static const uint64_t bottommask = (0x01 << halfstrip_size) -1;
|
|
|
|
uint32_t steps;
|
|
uint32_t steps;
|
|
size_t head = 23;
|
|
size_t head = 23;
|
|
#define next_head ((head + 23) % 24)
|
|
#define next_head ((head + 23) % 24)
|
|
@@ -67,7 +69,7 @@ uint32_t __attribute__((optimize("unroll-loops"))) convert(uint64_t * nn)
|
|
for (uint32_t w2 = halfstrip_size; w2 < 64-halfstrip_size; w2 += halfstrip_size) {
|
|
for (uint32_t w2 = halfstrip_size; w2 < 64-halfstrip_size; w2 += halfstrip_size) {
|
|
if (!(y & (topmask >> w2))) {
|
|
if (!(y & (topmask >> w2))) {
|
|
const size_t previous = (y >> (64 - halfstrip_size - w2 + halfstrip_size)) & bottommask;
|
|
const size_t previous = (y >> (64 - halfstrip_size - w2 + halfstrip_size)) & bottommask;
|
|
- const uint32_t next = (y >> (64 - halfstrip_size - w2 - halfstrip_size)) & bottommask;
|
|
|
|
|
|
+ const uint32_t next = (y >> (64 - halfstrip_size - w2 - halfstrip_size)) & bottommask;
|
|
if (next <= lookup[previous]) return steps + w2 - offset[previous];
|
|
if (next <= lookup[previous]) return steps + w2 - offset[previous];
|
|
}
|
|
}
|
|
}
|
|
}
|
|
@@ -107,9 +109,9 @@ uint32_t naif_convert(mpz_t n)
|
|
|
|
|
|
void dlog_precompute()
|
|
void dlog_precompute()
|
|
{
|
|
{
|
|
- for (size_t i = 0; i <= 0xFF; i++) {
|
|
|
|
- uint32_t j = ffs(i) ? ffs(i) - 1 : 8;
|
|
|
|
- lookup[i] = 0xFF >> (8-j);
|
|
|
|
|
|
+ for (size_t i = 0; i <= bottommask; i++) {
|
|
|
|
+ uint32_t j = ffs(i) ? ffs(i) - 1 : halfstrip_size;
|
|
|
|
+ lookup[i] = bottommask >> (halfstrip_size-j);
|
|
offset[i] = j;
|
|
offset[i] = j;
|
|
}
|
|
}
|
|
}
|
|
}
|