ver1.c 4.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176
  1. #define _GNU_SOURCE
  2. #include <assert.h>
  3. #include <limits.h>
  4. #include <stdbool.h>
  5. #include <stdio.h>
  6. #include <stdint.h>
  7. #include <string.h>
  8. #include <unistd.h>
  9. #include <linux/random.h>
  10. #include <sys/syscall.h>
  11. #include <sys/time.h>
  12. #include <gmp.h>
  13. #define INIT_TIMEIT() \
  14. struct timeval __start, __end; \
  15. double __sdiff = 0, __udiff = 0
  16. #define START_TIMEIT() \
  17. gettimeofday(&__start, NULL)
  18. #define END_TIMEIT() \
  19. gettimeofday(&__end, NULL); \
  20. __sdiff += (__end.tv_sec - __start.tv_sec); \
  21. __udiff += (__end.tv_usec - __start.tv_usec)
  22. #define GET_TIMEIT() \
  23. __sdiff + __udiff * 1e-6
  24. #define TIMEIT_FORMAT "%lf"
  25. /**
  26. * p is our prime modulus, and is 2^n - g
  27. * where g is referred to as "gamma" (built-in function in C, so transliterated)
  28. */
  29. const static char* p_str =
  30. "0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF"
  31. "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF"
  32. "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF"
  33. "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF"
  34. "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF"
  35. "505CAF";
  36. const static char* g_str =
  37. "11510609";
  38. mpz_t p, g;
  39. uint32_t gg = 11510609;
  40. static inline ssize_t
  41. getrandom(void *buffer, size_t length, unsigned int flags)
  42. {
  43. return syscall(SYS_getrandom, buffer, length, flags);
  44. }
  45. INIT_TIMEIT();
  46. static const uint32_t strip_size = 8;
  47. uint32_t convert(uint64_t * nn)
  48. {
  49. uint32_t steps = 0;
  50. /**
  51. * Here we start making a bunch of assumptions.
  52. * First, we assume the "w" here is 64 bits, which should be the size
  53. * (in bits) of a mp_limb_t.
  54. * Secondly, the amount of zeros to check, "d" here is 8.
  55. */
  56. static const uint32_t window = 32;
  57. static const uint32_t strip_size = 16;
  58. static const uint32_t halfstrip_size = strip_size/2;
  59. static const uint64_t topmask = ~(ULLONG_MAX >> halfstrip_size);
  60. static const uint64_t topbigmask = ~(ULLONG_MAX >> strip_size);
  61. //static const uint64_t tailmask = (1 << halfstrip_size) - 1;
  62. /** Check if x has a halfstrip starting from start */
  63. #define halfstrip(x, start) (((x)) << (start)) & topmask)
  64. #define distinguished(x) (((x)[23] & (~(ULLONG_MAX >> strip_size)))) == 0
  65. #define lbindex(x) __builtin_clzll(x);
  66. #define tbindex(x) __builtin_ctzll(x);
  67. while (!distinguished(nn)) {
  68. /**
  69. * Here we try to find a strip of zeros for "w2" bits.
  70. * When we find one (up to w2 = 64), then we jump of w = w/2.
  71. * I tried to optimize this code:
  72. * - by integrating the if statement above with the for loop invariant;
  73. * - by making the loop algebraic (i.e. no if-s), given that in the
  74. * generated assembly I read a lot of jumps.
  75. * Unfortunately, both approaches actually lead to a slow down in the code.
  76. */
  77. uint64_t x = nn[23];
  78. uint32_t w;
  79. START_TIMEIT();
  80. for (uint32_t w2 = halfstrip_size; w2 < window*2-halfstrip_size; w2 += halfstrip_size) {
  81. if (!(x & (topmask >> w2))) {
  82. for (w = w2-1; !(x & (topmask >> w)); w--);
  83. ++w;
  84. if (!(x & (topbigmask >> w))) return steps + w;
  85. }
  86. }
  87. END_TIMEIT();
  88. /**
  89. * We found no distinguished point.
  90. */
  91. const uint64_t a = mpn_lshift(nn, nn, 24, window) * gg;
  92. mpn_add_1(nn, nn, 24, a);
  93. steps += window;
  94. }
  95. return steps;
  96. }
  97. uint32_t naif_convert(mpz_t n)
  98. {
  99. uint32_t i;
  100. mpz_t t;
  101. mpz_init_set_ui(t, 1);
  102. mpz_mul_2exp(t, t, 1536-strip_size);
  103. for (i = 0; mpz_cmp(n, t) > -1; i++) {
  104. mpz_mul_2exp(n, n, 1);
  105. mpz_mod(n, n, p);
  106. }
  107. mpz_clear(t);
  108. return i;
  109. }
  110. int main()
  111. {
  112. mpz_init_set_str(p, p_str, 0);
  113. mpz_init_set_str(g, g_str, 10);
  114. gmp_randstate_t _rstate;
  115. unsigned long int _rseed;
  116. gmp_randinit_default(_rstate);
  117. getrandom(&_rseed, sizeof(unsigned long int), GRND_NONBLOCK);
  118. gmp_randseed_ui(_rstate, _rseed);
  119. mpz_t n, n0;
  120. mpz_inits(n, n0, NULL);
  121. uint32_t converted;
  122. for (int i=0; i < 1e4; i++) {
  123. mpz_urandomm(n0, _rstate, p);
  124. mpz_set(n, n0);
  125. converted = convert(n->_mp_d);
  126. mpz_set(n, n0);
  127. #ifndef NDEBUG
  128. uint32_t expected = naif_convert(n);
  129. if (converted != expected) printf("%d %d\n", converted, expected);
  130. assert(converted == expected);
  131. #endif
  132. }
  133. printf(TIMEIT_FORMAT "\n", GET_TIMEIT());
  134. /* memset(n->_mp_d, 0, 24*8); */
  135. /* memset(n0->_mp_d, 0, 24*8); */
  136. /* n0->_mp_d[0] = 13423523; */
  137. /* n0->_mp_d[1] = 1; */
  138. /* uint64_t v[64] = {0}; */
  139. /* unpack(v, n->_mp_d); */
  140. /* pack(n0, v); */
  141. mpz_clears(n, n0, p, g, NULL);
  142. return 0;
  143. }