ver1.c 4.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160
  1. #define _GNU_SOURCE
  2. #include <assert.h>
  3. #include <limits.h>
  4. #include <stdbool.h>
  5. #include <stdio.h>
  6. #include <stdint.h>
  7. #include <string.h>
  8. #include <unistd.h>
  9. #include <linux/random.h>
  10. #include <sys/syscall.h>
  11. #include <sys/time.h>
  12. #include <immintrin.h>
  13. #include <gmp.h>
  14. #define INIT_TIMEIT() \
  15. struct timeval __start, __end; \
  16. double __sdiff = 0, __udiff = 0
  17. #define START_TIMEIT() \
  18. gettimeofday(&__start, NULL)
  19. #define END_TIMEIT() \
  20. gettimeofday(&__end, NULL); \
  21. __sdiff += (__end.tv_sec - __start.tv_sec); \
  22. __udiff += (__end.tv_usec - __start.tv_usec)
  23. #define GET_TIMEIT() \
  24. __sdiff + __udiff * 1e-6
  25. #define TIMEIT_FORMAT "%lf"
  26. /**
  27. * p is our prime modulus, and is 2^n - g
  28. * where g is referred to as "gamma" (built-in function in C, so transliterated)
  29. */
  30. const static char* p_str =
  31. "0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF"
  32. "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF"
  33. "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF"
  34. "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF"
  35. "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF"
  36. "505CAF";
  37. const static char* g_str =
  38. "11510609";
  39. mpz_t p, g;
  40. uint32_t gg = 11510609;
  41. static inline ssize_t
  42. getrandom(void *buffer, size_t length, unsigned int flags)
  43. {
  44. return syscall(SYS_getrandom, buffer, length, flags);
  45. }
  46. INIT_TIMEIT();
  47. static const uint32_t strip_size = 16;
  48. #define halfstrip_size (strip_size/2)
  49. static uint8_t lookup[256];
  50. static uint8_t offset[256];
  51. uint32_t convert(uint64_t *nn)
  52. {
  53. assert(strip_size == 16);
  54. uint32_t steps;
  55. static const uint32_t window = 7;
  56. #define distinguished(x) ((x)[23] & ~(ULLONG_MAX >> strip_size)) == 0
  57. const __m128i rotmask = _mm_set_epi8(0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
  58. for (steps = 0; !distinguished(nn); steps += window*8) {
  59. START_TIMEIT();
  60. __m128i x = _mm_lddqu_si128((__m128i *) (nn + 22));
  61. __m128i mask = _mm_set_epi8(0xFF, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
  62. for (int32_t i = 14; i > 0; --i) {
  63. mask = _mm_shuffle_epi8(mask, rotmask);
  64. const bool zero = _mm_testz_si128(mask, x);
  65. if (zero) {
  66. const uint8_t previous = _mm_extract_epi8(x, i-1);
  67. const uint8_t next = _mm_extract_epi8(x, i+1);
  68. if (previous <= lookup[next]) {
  69. END_TIMEIT();
  70. return steps + (15-i)*8 - offset[next];
  71. }
  72. }
  73. }
  74. END_TIMEIT();
  75. /**
  76. * We found no distinguished point.
  77. */
  78. const uint64_t a = mpn_lshift(nn, nn, 24, window) * gg;
  79. mpn_add_1(nn, nn, 24, a);
  80. }
  81. return steps;
  82. }
  83. uint32_t naif_convert(mpz_t n)
  84. {
  85. uint32_t i;
  86. mpz_t t;
  87. mpz_init_set_ui(t, 1);
  88. mpz_mul_2exp(t, t, 1536-strip_size);
  89. for (i = 0; mpz_cmp(n, t) > -1; i++) {
  90. mpz_mul_2exp(n, n, 1);
  91. mpz_mod(n, n, p);
  92. }
  93. mpz_clear(t);
  94. return i;
  95. }
  96. int main()
  97. {
  98. mpz_init_set_str(p, p_str, 0);
  99. mpz_init_set_str(g, g_str, 10);
  100. gmp_randstate_t _rstate;
  101. unsigned long int _rseed;
  102. gmp_randinit_default(_rstate);
  103. getrandom(&_rseed, sizeof(unsigned long int), GRND_NONBLOCK);
  104. gmp_randseed_ui(_rstate, _rseed);
  105. for (uint32_t i = 0; i <= 0xFF; i++) {
  106. uint32_t j = ffs(i) ? ffs(i) - 1 : 8;
  107. lookup[i] = 0xFF >> (8-j);
  108. offset[i] = j;
  109. }
  110. mpz_t n, n0;
  111. mpz_inits(n, n0, NULL);
  112. uint32_t converted;
  113. for (uint64_t i=0; i < 1e3; i++) {
  114. mpz_urandomm(n0, _rstate, p);
  115. mpz_set(n, n0);
  116. converted = convert(n->_mp_d);
  117. mpz_set(n, n0);
  118. #ifndef NDEBUG
  119. uint32_t expected = naif_convert(n);
  120. if (converted != expected) printf("%d %d\n", converted, expected);
  121. assert(converted == expected);
  122. #endif
  123. }
  124. printf(TIMEIT_FORMAT "\n", GET_TIMEIT());
  125. mpz_clears(n, n0, NULL);
  126. mpz_clears(p, g, NULL);
  127. return 0;
  128. }