Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Stephan Seitz
pystencils
Commits
2d65fcb8
Commit
2d65fcb8
authored
Sep 03, 2019
by
Michael Kuron
Browse files
AES-NI: cleanup
parent
43074231
Changes
1
Hide whitespace changes
Inline
Side-by-side
pystencils/include/aesni_rand.h
View file @
2d65fcb8
...
...
@@ -4,7 +4,7 @@
#include
<emmintrin.h>
// SSE2
#include
<wmmintrin.h>
// AES
#ifdef
__AVX512VL__
#if
def
ined(
__AVX512VL__
) || defined(__AVX512F__)
#include
<immintrin.h>
// AVX*
#else
#include
<smmintrin.h>
// SSE4
...
...
@@ -38,7 +38,7 @@ QUALIFIERS __m128i aesni1xm128i(const __m128i & in, const __m128i & k) {
QUALIFIERS
__m128
_my_cvtepu32_ps
(
const
__m128i
v
)
{
#ifdef
__AVX512VL__
#if
def
ined(
__AVX512VL__
) || defined(__AVX512F__)
return
_mm_cvtepu32_ps
(
v
);
#else
__m128i
v2
=
_mm_srli_epi32
(
v
,
1
);
...
...
@@ -49,12 +49,12 @@ QUALIFIERS __m128 _my_cvtepu32_ps(const __m128i v)
#endif
}
#if !defined(__AVX512VL__) && defined(__GNUC__) && __GNUC__ >= 5
#if !defined(__AVX512VL__) &&
!defined(__AVX512F__) &&
defined(__GNUC__) && __GNUC__ >= 5
__attribute__
((
optimize
(
"no-associative-math"
)))
#endif
QUALIFIERS
__m128d
_my_cvtepu64_pd
(
const
__m128i
x
)
{
#ifdef
__AVX512VL__
#if
def
ined(
__AVX512VL__
) || defined(__AVX512F__)
return
_mm_cvtepu64_pd
(
x
);
#else
__m128i
xH
=
_mm_srli_epi64
(
x
,
32
);
...
...
@@ -81,17 +81,17 @@ QUALIFIERS void aesni_double2(uint32 ctr0, uint32 ctr1, uint32 ctr2, uint32 ctr3
y
=
_mm_srli_si128
(
y
,
4
);
// calculate z = x ^ y << (53 - 32))
__m128i
z
=
_mm_sll_epi64
(
y
,
_mm_set_epi64x
(
53
-
32
,
53
-
32
));
__m128i
z
=
_mm_sll_epi64
(
y
,
_mm_set
1
_epi64x
(
53
-
32
));
z
=
_mm_xor_si128
(
x
,
z
);
// convert uint64 to double
__m128d
rs
=
_my_cvtepu64_pd
(
z
);
// calculate rs * TWOPOW53_INV_DOUBLE + (TWOPOW53_INV_DOUBLE/2.0)
#ifdef __FMA__
rs
=
_mm_fmadd_pd
(
rs
,
_mm_set_pd
1
(
TWOPOW53_INV_DOUBLE
),
_mm_set_pd
1
(
TWOPOW53_INV_DOUBLE
/
2
.
0
));
rs
=
_mm_fmadd_pd
(
rs
,
_mm_set
1
_pd
(
TWOPOW53_INV_DOUBLE
),
_mm_set
1
_pd
(
TWOPOW53_INV_DOUBLE
/
2
.
0
));
#else
rs
=
_mm_mul_pd
(
rs
,
_mm_set_pd
1
(
TWOPOW53_INV_DOUBLE
));
rs
=
_mm_add_pd
(
rs
,
_mm_set_pd
1
(
TWOPOW53_INV_DOUBLE
/
2
.
0
));
rs
=
_mm_mul_pd
(
rs
,
_mm_set
1
_pd
(
TWOPOW53_INV_DOUBLE
));
rs
=
_mm_add_pd
(
rs
,
_mm_set
1
_pd
(
TWOPOW53_INV_DOUBLE
/
2
.
0
));
#endif
// store result
...
...
@@ -115,10 +115,10 @@ QUALIFIERS void aesni_float4(uint32 ctr0, uint32 ctr1, uint32 ctr2, uint32 ctr3,
__m128
rs
=
_my_cvtepu32_ps
(
c128
);
// calculate rs * TWOPOW32_INV_FLOAT + (TWOPOW32_INV_FLOAT/2.0f)
#ifdef __FMA__
rs
=
_mm_fmadd_ps
(
rs
,
_mm_set_ps
1
(
TWOPOW32_INV_FLOAT
),
_mm_set_ps
1
(
TWOPOW32_INV_FLOAT
/
2
.
0
f
));
rs
=
_mm_fmadd_ps
(
rs
,
_mm_set
1
_ps
(
TWOPOW32_INV_FLOAT
),
_mm_set
1
_ps
(
TWOPOW32_INV_FLOAT
/
2
.
0
f
));
#else
rs
=
_mm_mul_ps
(
rs
,
_mm_set_ps
1
(
TWOPOW32_INV_FLOAT
));
rs
=
_mm_add_ps
(
rs
,
_mm_set_ps
1
(
TWOPOW32_INV_FLOAT
/
2
.
0
f
));
rs
=
_mm_mul_ps
(
rs
,
_mm_set
1
_ps
(
TWOPOW32_INV_FLOAT
));
rs
=
_mm_add_ps
(
rs
,
_mm_set
1
_ps
(
TWOPOW32_INV_FLOAT
/
2
.
0
f
));
#endif
// store result
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment