ddlog.c 6.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244
  1. #include <stdbool.h>
  2. #include <stdint.h>
  3. #include <limits.h>
  4. #include <strings.h>
  5. #include <gmp.h>
  6. #include "ddlog.h"
  7. #include "group.h"
  8. #include "hss.h"
  9. uint64_t lookup[0x01 << halfstrip_size];
  10. uint64_t offset[0x01 << halfstrip_size];
  11. static const uint64_t topmask = ~(ULLONG_MAX >> halfstrip_size);
  12. static const uint64_t bottommask = (0x01 << halfstrip_size) - 1;
  13. static const uint64_t topbigmask = ~(ULLONG_MAX >> strip_size);
  14. static const uint64_t failuremask = ~(ULLONG_MAX >> FAILURE);
  15. #define distinguished_limb 0x8000000000000000
  16. #define clz(x) __builtin_clzll(x)
  17. static inline
  18. void add_1(uint64_t *b, const size_t start, const size_t len, uint64_t a)
  19. {
  20. for (size_t i = start; a != 0; i = (i+1)%len) {
  21. const uint64_t r = b[i] + a;
  22. a = (r < a);
  23. b[i] = r;
  24. }
  25. /*
  26. we don't check for overflows in "i".
  27. If it happens, buy a lottery ticket and retry.
  28. */
  29. }
  30. #define offset_xy_l (64 % halfstrip_size)
  31. #define offset_xy_r (halfstrip_size - offset_xy_l)
  32. #define mask_xy_l ((0x01 << offset_xy_l) - 1)
  33. #define mask_xy_r (~(ULLONG_MAX >> offset_xy_r))
  34. #define strip_xy (((x & mask_xy_l) << offset_xy_r) | (y >> (64 - offset_xy_r)))
  35. #define offset_yz_l ((64 - offset_xy_r) % halfstrip_size)
  36. #define offset_yz_r (halfstrip_size - offset_yz_l)
  37. #define mask_yz_l ((0x01 << offset_yz_l) - 1)
  38. #define strip_yz (((y & mask_yz_l) << offset_yz_r) | (z >> (64 - offset_yz_r)))
  39. #define next_head ((head + 23) % 24)
  40. #define next_next_head ((head + 23*2) % 24)
  41. #define tail ((head + 1) % 24)
  42. #define next_tail ((head + 2) % 24)
  43. uint32_t __attribute__((optimize("unroll-loops"))) convert_lookup(uint64_t * nn)
  44. {
  45. uint32_t steps;
  46. size_t head = 23;
  47. uint32_t w2;
  48. /** Check the most significant block */
  49. const uint64_t x = nn[head];
  50. for (w2 = clz(x) + halfstrip_size - (clz(x) % halfstrip_size);
  51. w2 < 64 - offset_xy_l - halfstrip_size;
  52. w2 += halfstrip_size) {
  53. if (!(x & (topmask >> w2))) {
  54. const size_t previous = (x >> (64 - halfstrip_size - w2 + halfstrip_size)) & bottommask;
  55. const uint32_t next = (x >> (64 - halfstrip_size - w2 - halfstrip_size)) & bottommask;
  56. if (next <= lookup[previous]) {
  57. return w2 - offset[previous] - 1;
  58. }
  59. }
  60. }
  61. for (steps = 64; true; steps += 64) {
  62. const uint64_t x = nn[head];
  63. const uint64_t y = nn[next_head];
  64. const uint64_t z = nn[next_next_head];
  65. if (!((x >> offset_xy_l) & bottommask)) {
  66. const size_t previous = (x >> (halfstrip_size + offset_xy_l)) & bottommask;
  67. const uint64_t next = strip_xy;
  68. if (next <= lookup[previous]) {
  69. return steps - offset_xy_l - halfstrip_size - offset[previous] - 1;
  70. }
  71. }
  72. if (!strip_xy) {
  73. const size_t previous = (x >> offset_xy_l) & bottommask;
  74. const uint64_t next = ((y << offset_xy_r) >> (64 - halfstrip_size));
  75. if (next <= lookup[previous]) {
  76. return steps - offset_xy_l - offset[previous] - 1;
  77. }
  78. }
  79. if (!((y << offset_xy_r) & topmask)) {
  80. const size_t previous = strip_xy;
  81. const uint64_t next = ((y << (offset_xy_r + halfstrip_size)) >> (64 - halfstrip_size));
  82. if (next <= lookup[previous]) {
  83. return steps + offset_xy_r - offset[previous] - 1;
  84. }
  85. }
  86. for (w2 = halfstrip_size + offset_xy_r;
  87. w2 < 64 - halfstrip_size - offset_yz_l;
  88. w2 += halfstrip_size) {
  89. if (!(y & (topmask >> w2))) {
  90. const size_t previous = (y >> (64 - halfstrip_size - w2 + halfstrip_size)) & bottommask;
  91. const uint64_t next = (y >> (64 - halfstrip_size - w2 - halfstrip_size)) & bottommask;
  92. if (next <= lookup[previous]) {
  93. return steps + w2 - offset[previous] - 1;
  94. }
  95. }
  96. }
  97. if (!(y & (topmask >> w2))) {
  98. const size_t previous = (y >> (64 - halfstrip_size - w2 + halfstrip_size)) & bottommask;
  99. const uint64_t next = strip_yz;
  100. if (next <= lookup[previous]) {
  101. return steps + w2 - offset[previous] - 1;
  102. }
  103. }
  104. /**
  105. * We found no distinguished point.
  106. */
  107. const uint128_t a = (uint128_t) x * gg;
  108. const uint64_t al = (uint64_t) a;
  109. const uint64_t ah = (a >> 64);
  110. head = next_head;
  111. nn[tail] = al;
  112. add_1(nn, next_tail, 24, ah);
  113. }
  114. }
  115. bool distinguished(mpz_t n)
  116. {
  117. return (n->_mp_d[23] & failuremask) == distinguished_limb;
  118. }
  119. void dlog_precompute()
  120. {
  121. for (size_t i = 0; i <= bottommask; i++) {
  122. uint32_t j = ffs(i) ? ffs(i) - 1 : halfstrip_size;
  123. lookup[i] = bottommask >> (halfstrip_size - j);
  124. offset[i] = j;
  125. }
  126. }
  127. /** Alternative implementations of the conversion method.
  128. * Used for testing and/or comparing past results.
  129. */
  130. uint32_t convert_naif(mpz_t n)
  131. {
  132. uint32_t steps;
  133. for (steps = 0; !distinguished(n); steps++) {
  134. mpz_mul_ui_modp(n, n, 2);
  135. }
  136. return steps;
  137. }
  138. uint32_t __attribute__((optimize("unroll-loops"))) convert_ec17(uint64_t * nn)
  139. {
  140. uint32_t steps;
  141. size_t head = 23;
  142. uint32_t w, w2;
  143. /** Check the most significant block */
  144. const uint64_t x = nn[head];
  145. for (w2 = clz(x) + halfstrip_size - (clz(x) % halfstrip_size);
  146. w2 < 64 - offset_xy_l - halfstrip_size;
  147. w2 += halfstrip_size) {
  148. if (!(x & (topmask >> w2))) {
  149. for (w = w2-1; !(x & (topmask >> w)); w--);
  150. ++w;
  151. if (!(x & (topbigmask >> w))) return w - 1;
  152. }
  153. }
  154. for (steps = 64; true; steps += 64) {
  155. const uint64_t x = nn[head];
  156. const uint64_t y = nn[next_head];
  157. const uint64_t z = nn[next_next_head];
  158. if (!((x >> offset_xy_l) & bottommask)) {
  159. for (w=0; !(x >> (halfstrip_size + offset_xy_l + w) & 1); w++);
  160. if (!(strip_xy >> w)) {
  161. return steps - offset_xy_l - halfstrip_size - w - 1;
  162. }
  163. }
  164. if (!strip_xy) {
  165. for (w = 0; !((x >> (offset_xy_l + w)) & 1); w++);
  166. if (!(y << offset_xy_r & (topmask << w)))
  167. return steps - offset_xy_l - w - 1;
  168. }
  169. if (!((y << offset_xy_r) & topmask)) {
  170. for (w = 0; !((strip_xy >> w) & 1); w++);
  171. if (!((y << (offset_xy_r + halfstrip_size)) & (topmask << w)))
  172. return steps + offset_xy_r - w - 1;
  173. }
  174. for (w2 = halfstrip_size + offset_xy_r;
  175. w2 < 64 - halfstrip_size - offset_yz_l;
  176. w2 += halfstrip_size) {
  177. if (!(y & (topmask >> w2))) {
  178. for (w = w2-1; !(y & (topmask >> w)); w--);
  179. ++w;
  180. if (!(y & (topbigmask >> w))) return steps + w - 1;
  181. }
  182. }
  183. if (!(y & (topmask >> w2))) {
  184. for (w = w2-1; !(y & (topmask >> w)); w--);
  185. w++;
  186. if (!(strip_yz >> (w2 - w))) {
  187. return steps + w - 1;
  188. }
  189. }
  190. /**
  191. * We found no distinguished point.
  192. */
  193. const uint128_t a = (uint128_t) x * gg;
  194. const uint64_t al = (uint64_t) a;
  195. const uint64_t ah = (a >> 64);
  196. head = next_head;
  197. nn[tail] = al;
  198. add_1(nn, next_tail, 24, ah);
  199. }
  200. }