aesni_rand.h 4.32 KB
Newer Older
1
2
#if !defined(__AES__) || !defined(__SSE2__) || (!defined(__AVX512VL__) && !defined(__SSE4_1__))
#error AES-NI and SSE2, as well as AVX512 or SSE4.1 need to be enabled
Michael Kuron's avatar
Michael Kuron committed
3
4
#endif

Michael Kuron's avatar
Michael Kuron committed
5
6
7
8
#include <emmintrin.h> // SSE2
#include <wmmintrin.h> // AES
#ifdef __AVX512VL__
#include <immintrin.h> // AVX*
9
10
#else
#include <smmintrin.h>  // SSE4
Michael Kuron's avatar
Michael Kuron committed
11
12
13
#ifdef __FMA__
#include <immintrin.h> // FMA
#endif
Michael Kuron's avatar
Michael Kuron committed
14
#endif
Michael Kuron's avatar
Michael Kuron committed
15
16
17
18
19
20
21
22
23
24
25
#include <cstdint>

#define QUALIFIERS inline
#define TWOPOW53_INV_DOUBLE (1.1102230246251565e-16)
#define TWOPOW32_INV_FLOAT (2.3283064e-10f)

typedef std::uint32_t uint32;
typedef std::uint64_t uint64;

QUALIFIERS __m128i aesni1xm128i(const __m128i & in, const __m128i & k) {
    __m128i x = _mm_xor_si128(k, in);
Michael Kuron's avatar
Michael Kuron committed
26
27
28
29
30
31
32
33
34
35
    x = _mm_aesenc_si128(x, k);     // 1
    x = _mm_aesenc_si128(x, k);     // 2
    x = _mm_aesenc_si128(x, k);     // 3
    x = _mm_aesenc_si128(x, k);     // 4
    x = _mm_aesenc_si128(x, k);     // 5
    x = _mm_aesenc_si128(x, k);     // 6
    x = _mm_aesenc_si128(x, k);     // 7
    x = _mm_aesenc_si128(x, k);     // 8
    x = _mm_aesenc_si128(x, k);     // 9
    x = _mm_aesenclast_si128(x, k); // 10
Michael Kuron's avatar
Michael Kuron committed
36
37
38
    return x;
}

Michael Kuron's avatar
Michael Kuron committed
39
QUALIFIERS __m128 _my_cvtepu32_ps(const __m128i v)
Michael Kuron's avatar
Michael Kuron committed
40
{
Michael Kuron's avatar
Michael Kuron committed
41
#ifdef __AVX512VL__
Michael Kuron's avatar
Michael Kuron committed
42
43
44
45
46
47
48
49
50
51
    return _mm_cvtepu32_ps(v);
#else
    __m128i v2 = _mm_srli_epi32(v, 1);
    __m128i v1 = _mm_and_si128(v, _mm_set1_epi32(1));
    __m128 v2f = _mm_cvtepi32_ps(v2);
    __m128 v1f = _mm_cvtepi32_ps(v1);
    return _mm_add_ps(_mm_add_ps(v2f, v2f), v1f);
#endif
}

52
53
54
#if !defined(__AVX512VL__) && defined(__FAST_MATH__) && defined(__GNUC__) && __GNUC__ >= 5
__attribute__((optimize("no-fast-math")))
#endif
Michael Kuron's avatar
Michael Kuron committed
55
QUALIFIERS __m128d _my_cvtepu64_pd(const __m128i x)
Michael Kuron's avatar
Michael Kuron committed
56
{
Michael Kuron's avatar
Michael Kuron committed
57
58
#ifdef __AVX512VL__
    return _mm_cvtepu64_pd(x);
Michael Kuron's avatar
Michael Kuron committed
59
#else
60
61
62
63
64
    __m128i xH = _mm_srli_epi64(x, 32);
    xH = _mm_or_si128(xH, _mm_castpd_si128(_mm_set1_pd(19342813113834066795298816.)));          //  2^84
    __m128i xL = _mm_blend_epi16(x, _mm_castpd_si128(_mm_set1_pd(0x0010000000000000)), 0xcc);   //  2^52
    __m128d f = _mm_sub_pd(_mm_castsi128_pd(xH), _mm_set1_pd(19342813118337666422669312.));     //  2^84 + 2^52
    return _mm_add_pd(f, _mm_castsi128_pd(xL));
Michael Kuron's avatar
Michael Kuron committed
65
#endif
Michael Kuron's avatar
Michael Kuron committed
66
67
68
69
70
71
72
}


QUALIFIERS void aesni_double2(uint32 ctr0, uint32 ctr1, uint32 ctr2, uint32 ctr3,
                              uint32 key0, uint32 key1, uint32 key2, uint32 key3,
                              double & rnd1, double & rnd2)
{
Michael Kuron's avatar
Michael Kuron committed
73
    // pack input and call AES
Michael Kuron's avatar
Michael Kuron committed
74
75
76
    __m128i c128 = _mm_set_epi32(ctr3, ctr2, ctr1, ctr0);
    __m128i k128 = _mm_set_epi32(key3, key2, key1, key0);
    c128 = aesni1xm128i(c128, k128);
Michael Kuron's avatar
Michael Kuron committed
77

Michael Kuron's avatar
Michael Kuron committed
78
79
80
81
    // convert 32 to 64 bit and put 0th and 2nd element into x, 1st and 3rd element into y
    __m128i x = _mm_and_si128(c128, _mm_set_epi32(0, 0xffffffff, 0, 0xffffffff));
    __m128i y = _mm_and_si128(c128, _mm_set_epi32(0xffffffff, 0, 0xffffffff, 0));
    y = _mm_srli_si128(y, 4);
Michael Kuron's avatar
Michael Kuron committed
82

Michael Kuron's avatar
Michael Kuron committed
83
84
85
    // calculate z = x ^ y << (53 - 32))
    __m128i z = _mm_sll_epi64(y, _mm_set_epi64x(53 - 32, 53 - 32));
    z = _mm_xor_si128(x, z);
Michael Kuron's avatar
Michael Kuron committed
86

Michael Kuron's avatar
Michael Kuron committed
87
    // convert uint64 to double
Michael Kuron's avatar
Michael Kuron committed
88
    __m128d rs = _my_cvtepu64_pd(z);
Michael Kuron's avatar
Michael Kuron committed
89
    // calculate rs * TWOPOW53_INV_DOUBLE + (TWOPOW53_INV_DOUBLE/2.0)
Michael Kuron's avatar
Michael Kuron committed
90
91
92
#ifdef __FMA__
    rs = _mm_fmadd_pd(rs, _mm_set_pd1(TWOPOW53_INV_DOUBLE), _mm_set_pd1(TWOPOW53_INV_DOUBLE/2.0));
#else
Michael Kuron's avatar
Michael Kuron committed
93
94
    rs = _mm_mul_pd(rs, _mm_set_pd1(TWOPOW53_INV_DOUBLE));
    rs = _mm_add_pd(rs, _mm_set_pd1(TWOPOW53_INV_DOUBLE/2.0));
Michael Kuron's avatar
Michael Kuron committed
95
#endif
Michael Kuron's avatar
Michael Kuron committed
96

Michael Kuron's avatar
Michael Kuron committed
97
    // store result
Michael Kuron's avatar
Michael Kuron committed
98
99
100
101
    double rr[2];
    _mm_storeu_pd(rr, rs);
    rnd1 = rr[0];
    rnd2 = rr[1];
Michael Kuron's avatar
Michael Kuron committed
102
103
104
105
106
107
108
}


QUALIFIERS void aesni_float4(uint32 ctr0, uint32 ctr1, uint32 ctr2, uint32 ctr3,
                             uint32 key0, uint32 key1, uint32 key2, uint32 key3,
                             float & rnd1, float & rnd2, float & rnd3, float & rnd4)
{
Michael Kuron's avatar
Michael Kuron committed
109
    // pack input and call AES
Michael Kuron's avatar
Michael Kuron committed
110
111
112
113
    __m128i c128 = _mm_set_epi32(ctr3, ctr2, ctr1, ctr0);
    __m128i k128 = _mm_set_epi32(key3, key2, key1, key0);
    c128 = aesni1xm128i(c128, k128);

Michael Kuron's avatar
Michael Kuron committed
114
    // convert uint32 to float
Michael Kuron's avatar
Michael Kuron committed
115
    __m128 rs = _my_cvtepu32_ps(c128);
Michael Kuron's avatar
Michael Kuron committed
116
    // calculate rs * TWOPOW32_INV_FLOAT + (TWOPOW32_INV_FLOAT/2.0f)
Michael Kuron's avatar
Michael Kuron committed
117
118
119
#ifdef __FMA__
    rs = _mm_fmadd_ps(rs, _mm_set_ps1(TWOPOW32_INV_FLOAT), _mm_set_ps1(TWOPOW32_INV_FLOAT/2.0f));
#else
Michael Kuron's avatar
Michael Kuron committed
120
121
    rs = _mm_mul_ps(rs, _mm_set_ps1(TWOPOW32_INV_FLOAT));
    rs = _mm_add_ps(rs, _mm_set_ps1(TWOPOW32_INV_FLOAT/2.0f));
Michael Kuron's avatar
Michael Kuron committed
122
#endif
Michael Kuron's avatar
Michael Kuron committed
123

Michael Kuron's avatar
Michael Kuron committed
124
    // store result
Michael Kuron's avatar
Michael Kuron committed
125
126
127
128
129
130
    float r[4];
    _mm_storeu_ps(r, rs);
    rnd1 = r[0];
    rnd2 = r[1];
    rnd3 = r[2];
    rnd4 = r[3];
131
}