From dae01d056d433ab9b0aa6dd95bd73655345c4b8d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Olivier=20Ch=C3=A9ron?= Date: Fri, 6 Mar 2020 06:43:32 +0100 Subject: [PATCH 1/4] AES-NI with per-file target compiler options --- cbits/aes/x86ni.c | 7 +++++++ cryptonite.cabal | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/cbits/aes/x86ni.c b/cbits/aes/x86ni.c index 590a897..f51b32d 100644 --- a/cbits/aes/x86ni.c +++ b/cbits/aes/x86ni.c @@ -30,6 +30,10 @@ #ifdef WITH_AESNI +#pragma GCC push_options +#pragma GCC target("ssse3", "aes") +#pragma clang attribute push (__attribute__((target("ssse3,aes"))), apply_to=function) + #include #include #include @@ -400,4 +404,7 @@ static inline __m128i ghash_add(__m128i tag, const table_4bit htable, __m128i m) #endif +#pragma clang attribute pop +#pragma GCC pop_options + #endif diff --git a/cryptonite.cabal b/cryptonite.cabal index 68cb78f..d0c51db 100644 --- a/cryptonite.cabal +++ b/cryptonite.cabal @@ -336,7 +336,7 @@ Library c-sources: cbits/cryptonite_rdrand.c if flag(support_aesni) && (os(linux) || os(freebsd) || os(osx)) && (arch(i386) || arch(x86_64)) - CC-options: -mssse3 -maes -DWITH_AESNI + CC-options: -DWITH_AESNI if flag(support_pclmuldq) CC-options: -msse4.1 -mpclmul -DWITH_PCLMUL C-sources: cbits/aes/x86ni.c From f5706959a473a684f2eeab3648e6476e6bc03f0f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Olivier=20Ch=C3=A9ron?= Date: Thu, 12 Mar 2020 07:04:55 +0100 Subject: [PATCH 2/4] AES-NI and PCLMUL with per-function target compiler options --- cbits/aes/x86ni.c | 15 ++++++++------- cbits/aes/x86ni.h | 4 ++++ cbits/aes/x86ni_impl.c | 10 ++++++++++ cryptonite.cabal | 2 +- 4 files changed, 23 insertions(+), 8 deletions(-) diff --git a/cbits/aes/x86ni.c b/cbits/aes/x86ni.c index f51b32d..75aae55 100644 --- a/cbits/aes/x86ni.c +++ b/cbits/aes/x86ni.c @@ -30,10 +30,6 @@ #ifdef WITH_AESNI -#pragma GCC push_options -#pragma GCC target("ssse3", "aes") -#pragma clang attribute push (__attribute__((target("ssse3,aes"))), apply_to=function) - #include #include #include @@ -50,6 +46,7 @@ /* old GCC version doesn't cope with the shuffle parameters, that can take 2 values (0xff and 0xaa) * in our case, passed as argument despite being a immediate 8 bits constant anyway. * un-factorise aes_128_key_expansion into 2 version that have the shuffle parameter explicitly set */ +TARGET_AESNI static __m128i aes_128_key_expansion_ff(__m128i key, __m128i keygened) { keygened = _mm_shuffle_epi32(keygened, 0xff); @@ -59,6 +56,7 @@ static __m128i aes_128_key_expansion_ff(__m128i key, __m128i keygened) return _mm_xor_si128(key, keygened); } +TARGET_AESNI static __m128i aes_128_key_expansion_aa(__m128i key, __m128i keygened) { keygened = _mm_shuffle_epi32(keygened, 0xaa); @@ -68,6 +66,7 @@ static __m128i aes_128_key_expansion_aa(__m128i key, __m128i keygened) return _mm_xor_si128(key, keygened); } +TARGET_AESNI void cryptonite_aesni_init(aes_key *key, uint8_t *ikey, uint8_t size) { __m128i k[28]; @@ -149,6 +148,7 @@ void cryptonite_aesni_init(aes_key *key, uint8_t *ikey, uint8_t size) /* TO OPTIMISE: use pcmulqdq... or some faster code. * this is the lamest way of doing it, but i'm out of time. * this is basically a copy of gf_mulx in gf.c */ +TARGET_AESNI static __m128i gfmulx(__m128i v) { uint64_t v_[2] ALIGNMENT(16); @@ -162,6 +162,7 @@ static __m128i gfmulx(__m128i v) return v; } +TARGET_AESNI static __m128i gfmul_generic(__m128i tag, const table_4bit htable) { aes_block _t; @@ -181,6 +182,7 @@ __m128i (*gfmul_branch_ptr)(__m128i a, const table_4bit t) = gfmul_generic; * Adapted from figure 5, with additional byte swapping so that interface * is simimar to cryptonite_aes_generic_gf_mul. */ +TARGET_AESNI_PCLMUL static __m128i gfmul_pclmuldq(__m128i a, const table_4bit htable) { __m128i b, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8, tmp9; @@ -244,6 +246,7 @@ void cryptonite_aesni_hinit_pclmul(table_4bit htable, const block128 *h) htable->q[1] = bitfn_swap64(h->q[0]); } +TARGET_AESNI_PCLMUL void cryptonite_aesni_gf_mul_pclmul(block128 *a, const table_4bit htable) { __m128i _a, _b; @@ -261,6 +264,7 @@ void cryptonite_aesni_init_pclmul(void) #define gfmul(a,t) (gfmul_generic(a,t)) #endif +TARGET_AESNI static inline __m128i ghash_add(__m128i tag, const table_4bit htable, __m128i m) { tag = _mm_xor_si128(tag, m); @@ -404,7 +408,4 @@ static inline __m128i ghash_add(__m128i tag, const table_4bit htable, __m128i m) #endif -#pragma clang attribute pop -#pragma GCC pop_options - #endif diff --git a/cbits/aes/x86ni.h b/cbits/aes/x86ni.h index 6ffe74c..c0ffd44 100644 --- a/cbits/aes/x86ni.h +++ b/cbits/aes/x86ni.h @@ -40,7 +40,11 @@ #include #include +#define TARGET_AESNI __attribute__((target("ssse3,aes"))) +#define TARGET_AESNI_PCLMUL __attribute__((target("sse4.1,aes,pclmul"))) + #ifdef IMPL_DEBUG +TARGET_AESNI static void block128_sse_print(__m128i m) { block128 b; diff --git a/cbits/aes/x86ni_impl.c b/cbits/aes/x86ni_impl.c index ba8d762..39b8f31 100644 --- a/cbits/aes/x86ni_impl.c +++ b/cbits/aes/x86ni_impl.c @@ -28,6 +28,7 @@ * SUCH DAMAGE. */ +TARGET_AESNI void SIZED(cryptonite_aesni_encrypt_block)(aes_block *out, aes_key *key, aes_block *in) { __m128i *k = (__m128i *) key->data; @@ -37,6 +38,7 @@ void SIZED(cryptonite_aesni_encrypt_block)(aes_block *out, aes_key *key, aes_blo _mm_storeu_si128((__m128i *) out, m); } +TARGET_AESNI void SIZED(cryptonite_aesni_decrypt_block)(aes_block *out, aes_key *key, aes_block *in) { __m128i *k = (__m128i *) key->data; @@ -46,6 +48,7 @@ void SIZED(cryptonite_aesni_decrypt_block)(aes_block *out, aes_key *key, aes_blo _mm_storeu_si128((__m128i *) out, m); } +TARGET_AESNI void SIZED(cryptonite_aesni_encrypt_ecb)(aes_block *out, aes_key *key, aes_block *in, uint32_t blocks) { __m128i *k = (__m128i *) key->data; @@ -58,6 +61,7 @@ void SIZED(cryptonite_aesni_encrypt_ecb)(aes_block *out, aes_key *key, aes_block } } +TARGET_AESNI void SIZED(cryptonite_aesni_decrypt_ecb)(aes_block *out, aes_key *key, aes_block *in, uint32_t blocks) { __m128i *k = (__m128i *) key->data; @@ -71,6 +75,7 @@ void SIZED(cryptonite_aesni_decrypt_ecb)(aes_block *out, aes_key *key, aes_block } } +TARGET_AESNI void SIZED(cryptonite_aesni_encrypt_cbc)(aes_block *out, aes_key *key, aes_block *_iv, aes_block *in, uint32_t blocks) { __m128i *k = (__m128i *) key->data; @@ -87,6 +92,7 @@ void SIZED(cryptonite_aesni_encrypt_cbc)(aes_block *out, aes_key *key, aes_block } } +TARGET_AESNI void SIZED(cryptonite_aesni_decrypt_cbc)(aes_block *out, aes_key *key, aes_block *_iv, aes_block *in, uint32_t blocks) { __m128i *k = (__m128i *) key->data; @@ -106,6 +112,7 @@ void SIZED(cryptonite_aesni_decrypt_cbc)(aes_block *out, aes_key *key, aes_block } } +TARGET_AESNI void SIZED(cryptonite_aesni_encrypt_ctr)(uint8_t *output, aes_key *key, aes_block *_iv, uint8_t *input, uint32_t len) { __m128i *k = (__m128i *) key->data; @@ -151,6 +158,7 @@ void SIZED(cryptonite_aesni_encrypt_ctr)(uint8_t *output, aes_key *key, aes_bloc return ; } +TARGET_AESNI void SIZED(cryptonite_aesni_encrypt_c32_)(uint8_t *output, aes_key *key, aes_block *_iv, uint8_t *input, uint32_t len) { __m128i *k = (__m128i *) key->data; @@ -192,6 +200,7 @@ void SIZED(cryptonite_aesni_encrypt_c32_)(uint8_t *output, aes_key *key, aes_blo return ; } +TARGET_AESNI void SIZED(cryptonite_aesni_encrypt_xts)(aes_block *out, aes_key *key1, aes_key *key2, aes_block *_tweak, uint32_t spoint, aes_block *in, uint32_t blocks) { @@ -222,6 +231,7 @@ void SIZED(cryptonite_aesni_encrypt_xts)(aes_block *out, aes_key *key1, aes_key } while (0); } +TARGET_AESNI void SIZED(cryptonite_aesni_gcm_encrypt)(uint8_t *output, aes_gcm *gcm, aes_key *key, uint8_t *input, uint32_t length) { __m128i *k = (__m128i *) key->data; diff --git a/cryptonite.cabal b/cryptonite.cabal index d0c51db..d983581 100644 --- a/cryptonite.cabal +++ b/cryptonite.cabal @@ -338,7 +338,7 @@ Library if flag(support_aesni) && (os(linux) || os(freebsd) || os(osx)) && (arch(i386) || arch(x86_64)) CC-options: -DWITH_AESNI if flag(support_pclmuldq) - CC-options: -msse4.1 -mpclmul -DWITH_PCLMUL + CC-options: -DWITH_PCLMUL C-sources: cbits/aes/x86ni.c , cbits/aes/generic.c , cbits/aes/gf.c From 0cf0d076abc995c410cbd66ea81da2740b1a6700 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Olivier=20Ch=C3=A9ron?= Date: Sat, 14 Mar 2020 07:50:30 +0100 Subject: [PATCH 3/4] Add flag use_target_attributes --- cbits/aes/x86ni.h | 5 +++++ cryptonite.cabal | 11 +++++++++++ 2 files changed, 16 insertions(+) diff --git a/cbits/aes/x86ni.h b/cbits/aes/x86ni.h index c0ffd44..cd26ce4 100644 --- a/cbits/aes/x86ni.h +++ b/cbits/aes/x86ni.h @@ -40,8 +40,13 @@ #include #include +#ifdef WITH_TARGET_ATTRIBUTES #define TARGET_AESNI __attribute__((target("ssse3,aes"))) #define TARGET_AESNI_PCLMUL __attribute__((target("sse4.1,aes,pclmul"))) +#else +#define TARGET_AESNI +#define TARGET_AESNI_PCLMUL +#endif #ifdef IMPL_DEBUG TARGET_AESNI diff --git a/cryptonite.cabal b/cryptonite.cabal index d983581..2d386b0 100644 --- a/cryptonite.cabal +++ b/cryptonite.cabal @@ -103,6 +103,11 @@ Flag check_alignment Default: False Manual: True +Flag use_target_attributes + Description: use GCC / clang function attributes instead of global target options. + Default: False + Manual: True + Library Exposed-modules: Crypto.Cipher.AES Crypto.Cipher.AESGCMSIV @@ -337,8 +342,12 @@ Library if flag(support_aesni) && (os(linux) || os(freebsd) || os(osx)) && (arch(i386) || arch(x86_64)) CC-options: -DWITH_AESNI + if !flag(use_target_attributes) + CC-options: -mssse3 -maes if flag(support_pclmuldq) CC-options: -DWITH_PCLMUL + if !flag(use_target_attributes) + CC-options: -msse4.1 -mpclmul C-sources: cbits/aes/x86ni.c , cbits/aes/generic.c , cbits/aes/gf.c @@ -385,6 +394,8 @@ Library Build-depends: deepseq if flag(check_alignment) cc-options: -DWITH_ASSERT_ALIGNMENT + if flag(use_target_attributes) + cc-options: -DWITH_TARGET_ATTRIBUTES Test-Suite test-cryptonite type: exitcode-stdio-1.0 From f84f7e300934ea1d6cf150c50ed9305a8706e55a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Olivier=20Ch=C3=A9ron?= Date: Sun, 15 Mar 2020 15:39:56 +0100 Subject: [PATCH 4/4] Enable flag by default --- cryptonite.cabal | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cryptonite.cabal b/cryptonite.cabal index 2d386b0..652e595 100644 --- a/cryptonite.cabal +++ b/cryptonite.cabal @@ -105,7 +105,7 @@ Flag check_alignment Flag use_target_attributes Description: use GCC / clang function attributes instead of global target options. - Default: False + Default: True Manual: True Library