diff --git a/Crypto/Cipher/AES/Primitive.hs b/Crypto/Cipher/AES/Primitive.hs index d8a8490..5c03a93 100644 --- a/Crypto/Cipher/AES/Primitive.hs +++ b/Crypto/Cipher/AES/Primitive.hs @@ -37,6 +37,9 @@ module Crypto.Cipher.AES.Primitive , decryptCTR , decryptXTS + -- * CTR with 32-bit wrapping + , combineC32 + -- * Incremental GCM , gcmMode , gcmInit @@ -317,6 +320,21 @@ decryptXTS :: ByteArray ba -> ba -- ^ output decrypted decryptXTS = doXTS c_aes_decrypt_xts +-- | encrypt/decrypt using Counter mode (32-bit wrapping used in AES-GCM-SIV) +{-# NOINLINE combineC32 #-} +combineC32 :: ByteArray ba + => AES -- ^ AES Context + -> IV AES -- ^ initial vector of AES block size (usually representing a 128 bit integer) + -> ba -- ^ plaintext input + -> ba -- ^ ciphertext output +combineC32 ctx iv input + | len <= 0 = B.empty + | B.length iv /= 16 = error $ "AES error: IV length must be block size (16). Its length is: " ++ show (B.length iv) + | otherwise = B.allocAndFreeze len doEncrypt + where doEncrypt o = withKeyAndIV ctx iv $ \k v -> withByteArray input $ \i -> + c_aes_encrypt_c32 (castPtr o) k v i (fromIntegral len) + len = B.length input + {-# INLINE doECB #-} doECB :: ByteArray ba => (Ptr b -> Ptr AES -> CString -> CUInt -> IO ()) @@ -578,6 +596,9 @@ foreign import ccall unsafe "cryptonite_aes.h cryptonite_aes_gen_ctr_cont" foreign import ccall "cryptonite_aes.h cryptonite_aes_encrypt_ctr" c_aes_encrypt_ctr :: CString -> Ptr AES -> Ptr Word8 -> CString -> CUInt -> IO () +foreign import ccall "cryptonite_aes.h cryptonite_aes_encrypt_c32" + c_aes_encrypt_c32 :: CString -> Ptr AES -> Ptr Word8 -> CString -> CUInt -> IO () + foreign import ccall "cryptonite_aes.h cryptonite_aes_gcm_init" c_aes_gcm_init :: Ptr AESGCM -> Ptr AES -> Ptr Word8 -> CUInt -> IO () diff --git a/cbits/aes/block128.h b/cbits/aes/block128.h index 8513b94..12d842f 100644 --- a/cbits/aes/block128.h +++ b/cbits/aes/block128.h @@ -123,6 +123,11 @@ static inline void block128_inc32_be(block128 *b) b->d[3] = cpu_to_be32(be32_to_cpu(b->d[3]) + 1); } +static inline void block128_inc32_le(block128 *b) +{ + b->d[0] = cpu_to_le32(le32_to_cpu(b->d[0]) + 1); +} + #ifdef IMPL_DEBUG #include static inline void block128_print(block128 *b) diff --git a/cbits/aes/x86ni.h b/cbits/aes/x86ni.h index 2ef2615..6ffe74c 100644 --- a/cbits/aes/x86ni.h +++ b/cbits/aes/x86ni.h @@ -64,6 +64,8 @@ void cryptonite_aesni_decrypt_cbc128(aes_block *out, aes_key *key, aes_block *_i void cryptonite_aesni_decrypt_cbc256(aes_block *out, aes_key *key, aes_block *_iv, aes_block *in, uint32_t blocks); void cryptonite_aesni_encrypt_ctr128(uint8_t *out, aes_key *key, aes_block *_iv, uint8_t *in, uint32_t length); void cryptonite_aesni_encrypt_ctr256(uint8_t *out, aes_key *key, aes_block *_iv, uint8_t *in, uint32_t length); +void cryptonite_aesni_encrypt_c32_128(uint8_t *out, aes_key *key, aes_block *_iv, uint8_t *in, uint32_t length); +void cryptonite_aesni_encrypt_c32_256(uint8_t *out, aes_key *key, aes_block *_iv, uint8_t *in, uint32_t length); void cryptonite_aesni_encrypt_xts128(aes_block *out, aes_key *key1, aes_key *key2, aes_block *_tweak, uint32_t spoint, aes_block *in, uint32_t blocks); void cryptonite_aesni_encrypt_xts256(aes_block *out, aes_key *key1, aes_key *key2, diff --git a/cbits/aes/x86ni_impl.c b/cbits/aes/x86ni_impl.c index 219e8e6..ba8d762 100644 --- a/cbits/aes/x86ni_impl.c +++ b/cbits/aes/x86ni_impl.c @@ -151,6 +151,47 @@ void SIZED(cryptonite_aesni_encrypt_ctr)(uint8_t *output, aes_key *key, aes_bloc return ; } +void SIZED(cryptonite_aesni_encrypt_c32_)(uint8_t *output, aes_key *key, aes_block *_iv, uint8_t *input, uint32_t len) +{ + __m128i *k = (__m128i *) key->data; + __m128i one = _mm_set_epi32(0,0,0,1); + uint32_t nb_blocks = len / 16; + uint32_t part_block_len = len % 16; + + /* get the IV */ + __m128i iv = _mm_loadu_si128((__m128i *) _iv); + + PRELOAD_ENC(k); + + for (; nb_blocks-- > 0; output += 16, input += 16) { + /* encrypt the iv and and xor it the input block */ + __m128i tmp = iv; + DO_ENC_BLOCK(tmp); + __m128i m = _mm_loadu_si128((__m128i *) input); + m = _mm_xor_si128(m, tmp); + + _mm_storeu_si128((__m128i *) output, m); + /* iv += 1 */ + iv = _mm_add_epi32(iv, one); + } + + if (part_block_len != 0) { + aes_block block; + memset(&block.b, 0, 16); + memcpy(&block.b, input, part_block_len); + + __m128i m = _mm_loadu_si128((__m128i *) &block); + __m128i tmp = iv; + + DO_ENC_BLOCK(tmp); + m = _mm_xor_si128(m, tmp); + _mm_storeu_si128((__m128i *) &block.b, m); + memcpy(output, &block.b, part_block_len); + } + + return ; +} + void SIZED(cryptonite_aesni_encrypt_xts)(aes_block *out, aes_key *key1, aes_key *key2, aes_block *_tweak, uint32_t spoint, aes_block *in, uint32_t blocks) { diff --git a/cbits/cryptonite_aes.c b/cbits/cryptonite_aes.c index e35ced9..e70aa07 100644 --- a/cbits/cryptonite_aes.c +++ b/cbits/cryptonite_aes.c @@ -44,6 +44,7 @@ void cryptonite_aes_generic_decrypt_ecb(aes_block *output, aes_key *key, aes_blo void cryptonite_aes_generic_encrypt_cbc(aes_block *output, aes_key *key, aes_block *iv, aes_block *input, uint32_t nb_blocks); void cryptonite_aes_generic_decrypt_cbc(aes_block *output, aes_key *key, aes_block *iv, aes_block *input, uint32_t nb_blocks); void cryptonite_aes_generic_encrypt_ctr(uint8_t *output, aes_key *key, aes_block *iv, uint8_t *input, uint32_t length); +void cryptonite_aes_generic_encrypt_c32(uint8_t *output, aes_key *key, aes_block *iv, uint8_t *input, uint32_t length); void cryptonite_aes_generic_encrypt_xts(aes_block *output, aes_key *k1, aes_key *k2, aes_block *dataunit, uint32_t spoint, aes_block *input, uint32_t nb_blocks); void cryptonite_aes_generic_decrypt_xts(aes_block *output, aes_key *k1, aes_key *k2, aes_block *dataunit, @@ -69,6 +70,8 @@ enum { DECRYPT_CBC_128, DECRYPT_CBC_192, DECRYPT_CBC_256, /* ctr */ ENCRYPT_CTR_128, ENCRYPT_CTR_192, ENCRYPT_CTR_256, + /* ctr with 32-bit wrapping */ + ENCRYPT_C32_128, ENCRYPT_C32_192, ENCRYPT_C32_256, /* xts */ ENCRYPT_XTS_128, ENCRYPT_XTS_192, ENCRYPT_XTS_256, DECRYPT_XTS_128, DECRYPT_XTS_192, DECRYPT_XTS_256, @@ -115,6 +118,10 @@ void *cryptonite_aes_branch_table[] = { [ENCRYPT_CTR_128] = cryptonite_aes_generic_encrypt_ctr, [ENCRYPT_CTR_192] = cryptonite_aes_generic_encrypt_ctr, [ENCRYPT_CTR_256] = cryptonite_aes_generic_encrypt_ctr, + /* CTR with 32-bit wrapping */ + [ENCRYPT_C32_128] = cryptonite_aes_generic_encrypt_c32, + [ENCRYPT_C32_192] = cryptonite_aes_generic_encrypt_c32, + [ENCRYPT_C32_256] = cryptonite_aes_generic_encrypt_c32, /* XTS */ [ENCRYPT_XTS_128] = cryptonite_aes_generic_encrypt_xts, [ENCRYPT_XTS_192] = cryptonite_aes_generic_encrypt_xts, @@ -173,6 +180,8 @@ typedef void (*gf_mul_f)(block128 *a, const table_4bit htable); ((cbc_f) (cryptonite_aes_branch_table[DECRYPT_CBC_128 + strength])) #define GET_CTR_ENCRYPT(strength) \ ((ctr_f) (cryptonite_aes_branch_table[ENCRYPT_CTR_128 + strength])) +#define GET_C32_ENCRYPT(strength) \ + ((ctr_f) (cryptonite_aes_branch_table[ENCRYPT_C32_128 + strength])) #define GET_XTS_ENCRYPT(strength) \ ((xts_f) (cryptonite_aes_branch_table[ENCRYPT_XTS_128 + strength])) #define GET_XTS_DECRYPT(strength) \ @@ -204,6 +213,7 @@ typedef void (*gf_mul_f)(block128 *a, const table_4bit htable); #define GET_CBC_ENCRYPT(strength) cryptonite_aes_generic_encrypt_cbc #define GET_CBC_DECRYPT(strength) cryptonite_aes_generic_decrypt_cbc #define GET_CTR_ENCRYPT(strength) cryptonite_aes_generic_encrypt_ctr +#define GET_C32_ENCRYPT(strength) cryptonite_aes_generic_encrypt_c32 #define GET_XTS_ENCRYPT(strength) cryptonite_aes_generic_encrypt_xts #define GET_XTS_DECRYPT(strength) cryptonite_aes_generic_decrypt_xts #define GET_GCM_ENCRYPT(strength) cryptonite_aes_generic_gcm_encrypt @@ -251,6 +261,9 @@ static void initialize_table_ni(int aesni, int pclmul) /* CTR */ cryptonite_aes_branch_table[ENCRYPT_CTR_128] = cryptonite_aesni_encrypt_ctr128; cryptonite_aes_branch_table[ENCRYPT_CTR_256] = cryptonite_aesni_encrypt_ctr256; + /* CTR with 32-bit wrapping */ + cryptonite_aes_branch_table[ENCRYPT_C32_128] = cryptonite_aesni_encrypt_c32_128; + cryptonite_aes_branch_table[ENCRYPT_C32_256] = cryptonite_aesni_encrypt_c32_256; /* XTS */ cryptonite_aes_branch_table[ENCRYPT_XTS_128] = cryptonite_aesni_encrypt_xts128; cryptonite_aes_branch_table[ENCRYPT_XTS_256] = cryptonite_aesni_encrypt_xts256; @@ -352,6 +365,12 @@ void cryptonite_aes_encrypt_ctr(uint8_t *output, aes_key *key, aes_block *iv, ui e(output, key, iv, input, len); } +void cryptonite_aes_encrypt_c32(uint8_t *output, aes_key *key, aes_block *iv, uint8_t *input, uint32_t len) +{ + ctr_f e = GET_C32_ENCRYPT(key->strength); + e(output, key, iv, input, len); +} + void cryptonite_aes_encrypt_xts(aes_block *output, aes_key *k1, aes_key *k2, aes_block *dataunit, uint32_t spoint, aes_block *input, uint32_t nb_blocks) { @@ -789,6 +808,30 @@ void cryptonite_aes_generic_encrypt_ctr(uint8_t *output, aes_key *key, aes_block } } +void cryptonite_aes_generic_encrypt_c32(uint8_t *output, aes_key *key, aes_block *iv, uint8_t *input, uint32_t len) +{ + aes_block block, o; + uint32_t nb_blocks = len / 16; + int i; + + /* preload IV in block */ + block128_copy(&block, iv); + + for ( ; nb_blocks-- > 0; block128_inc32_le(&block), output += 16, input += 16) { + cryptonite_aes_encrypt_block(&o, key, &block); + block128_vxor((block128 *) output, &o, (block128 *) input); + } + + if ((len % 16) != 0) { + cryptonite_aes_encrypt_block(&o, key, &block); + for (i = 0; i < (len % 16); i++) { + *output = ((uint8_t *) &o)[i] ^ *input; + output++; + input++; + } + } +} + void cryptonite_aes_generic_encrypt_xts(aes_block *output, aes_key *k1, aes_key *k2, aes_block *dataunit, uint32_t spoint, aes_block *input, uint32_t nb_blocks) {