diff --git a/cbits/aes/aes_x86ni_impl.c b/cbits/aes/aes_x86ni_impl.c new file mode 100644 index 0000000..f28745a --- /dev/null +++ b/cbits/aes/aes_x86ni_impl.c @@ -0,0 +1,263 @@ +/* + * Copyright (c) 2012-2013 Vincent Hanquez + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the author nor the names of his contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +void SIZED(aes_ni_encrypt_block)(aes_block *out, aes_key *key, aes_block *in) +{ + __m128i *k = (__m128i *) key->data; + PRELOAD_ENC(k); + __m128i m = _mm_loadu_si128((__m128i *) in); + DO_ENC_BLOCK(m); + _mm_storeu_si128((__m128i *) out, m); +} + +void SIZED(aes_ni_decrypt_block)(aes_block *out, aes_key *key, aes_block *in) +{ + __m128i *k = (__m128i *) key->data; + PRELOAD_DEC(k); + __m128i m = _mm_loadu_si128((__m128i *) in); + DO_DEC_BLOCK(m); + _mm_storeu_si128((__m128i *) out, m); +} + +void SIZED(aes_ni_encrypt_ecb)(aes_block *out, aes_key *key, aes_block *in, uint32_t blocks) +{ + __m128i *k = (__m128i *) key->data; + + PRELOAD_ENC(k); + for (; blocks-- > 0; in += 1, out += 1) { + __m128i m = _mm_loadu_si128((__m128i *) in); + DO_ENC_BLOCK(m); + _mm_storeu_si128((__m128i *) out, m); + } +} + +void SIZED(aes_ni_decrypt_ecb)(aes_block *out, aes_key *key, aes_block *in, uint32_t blocks) +{ + __m128i *k = (__m128i *) key->data; + + PRELOAD_DEC(k); + + for (; blocks-- > 0; in += 1, out += 1) { + __m128i m = _mm_loadu_si128((__m128i *) in); + DO_DEC_BLOCK(m); + _mm_storeu_si128((__m128i *) out, m); + } +} + +void SIZED(aes_ni_encrypt_cbc)(aes_block *out, aes_key *key, aes_block *_iv, aes_block *in, uint32_t blocks) +{ + __m128i *k = (__m128i *) key->data; + __m128i iv = _mm_loadu_si128((__m128i *) _iv); + + PRELOAD_ENC(k); + + for (; blocks-- > 0; in += 1, out += 1) { + __m128i m = _mm_loadu_si128((__m128i *) in); + m = _mm_xor_si128(m, iv); + DO_ENC_BLOCK(m); + iv = m; + _mm_storeu_si128((__m128i *) out, m); + } +} + +void SIZED(aes_ni_decrypt_cbc)(aes_block *out, aes_key *key, aes_block *_iv, aes_block *in, uint32_t blocks) +{ + __m128i *k = (__m128i *) key->data; + __m128i iv = _mm_loadu_si128((__m128i *) _iv); + + PRELOAD_DEC(k); + + for (; blocks-- > 0; in += 1, out += 1) { + __m128i m = _mm_loadu_si128((__m128i *) in); + __m128i ivnext = m; + + DO_DEC_BLOCK(m); + m = _mm_xor_si128(m, iv); + + _mm_storeu_si128((__m128i *) out, m); + iv = ivnext; + } +} + +void SIZED(aes_ni_encrypt_ctr)(uint8_t *output, aes_key *key, aes_block *_iv, uint8_t *input, uint32_t len) +{ + __m128i *k = (__m128i *) key->data; + __m128i bswap_mask = _mm_setr_epi8(7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8); + __m128i one = _mm_set_epi32(0,1,0,0); + uint32_t nb_blocks = len / 16; + uint32_t part_block_len = len % 16; + + /* get the IV in little endian format */ + __m128i iv = _mm_loadu_si128((__m128i *) _iv); + iv = _mm_shuffle_epi8(iv, bswap_mask); + + PRELOAD_ENC(k); + + for (; nb_blocks-- > 0; output += 16, input += 16) { + /* put back the iv in big endian mode, + * encrypt it and and xor it the input block + */ + __m128i tmp = _mm_shuffle_epi8(iv, bswap_mask); + DO_ENC_BLOCK(tmp); + __m128i m = _mm_loadu_si128((__m128i *) input); + m = _mm_xor_si128(m, tmp); + + _mm_storeu_si128((__m128i *) output, m); + /* iv += 1 */ + iv = _mm_add_epi64(iv, one); + } + + if (part_block_len != 0) { + aes_block block; + memset(&block.b, 0, 16); + memcpy(&block.b, input, part_block_len); + + __m128i m = _mm_loadu_si128((__m128i *) &block); + __m128i tmp = _mm_shuffle_epi8(iv, bswap_mask); + + DO_ENC_BLOCK(tmp); + m = _mm_xor_si128(m, tmp); + _mm_storeu_si128((__m128i *) &block.b, m); + memcpy(output, &block.b, part_block_len); + } + + return ; +} + +void SIZED(aes_ni_encrypt_xts)(aes_block *out, aes_key *key1, aes_key *key2, + aes_block *_tweak, uint32_t spoint, aes_block *in, uint32_t blocks) +{ + __m128i tweak = _mm_loadu_si128((__m128i *) _tweak); + + do { + __m128i *k2 = (__m128i *) key2->data; + PRELOAD_ENC(k2); + DO_ENC_BLOCK(tweak); + + while (spoint-- > 0) + tweak = gfmulx(tweak); + } while (0) ; + + do { + __m128i *k1 = (__m128i *) key1->data; + PRELOAD_ENC(k1); + + for ( ; blocks-- > 0; in += 1, out += 1, tweak = gfmulx(tweak)) { + __m128i m = _mm_loadu_si128((__m128i *) in); + + m = _mm_xor_si128(m, tweak); + DO_ENC_BLOCK(m); + m = _mm_xor_si128(m, tweak); + + _mm_storeu_si128((__m128i *) out, m); + } + } while (0); +} + +void SIZED(aes_ni_gcm_encrypt)(uint8_t *output, aes_gcm *gcm, aes_key *key, uint8_t *input, uint32_t length) +{ + __m128i *k = (__m128i *) key->data; + __m128i bswap_mask = _mm_setr_epi8(7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8); + __m128i one = _mm_set_epi32(0,1,0,0); + uint32_t nb_blocks = length / 16; + uint32_t part_block_len = length % 16; + + gcm->length_input += length; + + __m128i h = _mm_loadu_si128((__m128i *) &gcm->h); + __m128i tag = _mm_loadu_si128((__m128i *) &gcm->tag); + __m128i iv = _mm_loadu_si128((__m128i *) &gcm->civ); + iv = _mm_shuffle_epi8(iv, bswap_mask); + + PRELOAD_ENC(k); + + for (; nb_blocks-- > 0; output += 16, input += 16) { + /* iv += 1 */ + iv = _mm_add_epi64(iv, one); + + /* put back iv in big endian, encrypt it, + * and xor it to input */ + __m128i tmp = _mm_shuffle_epi8(iv, bswap_mask); + DO_ENC_BLOCK(tmp); + __m128i m = _mm_loadu_si128((__m128i *) input); + m = _mm_xor_si128(m, tmp); + + tag = ghash_add(tag, h, m); + + /* store it out */ + _mm_storeu_si128((__m128i *) output, m); + } + if (part_block_len > 0) { + __m128i mask; + aes_block block; + /* FIXME could do something a bit more clever (slli & sub & and maybe) ... */ + switch (part_block_len) { + case 1: mask = _mm_setr_epi8(0,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80); break; + case 2: mask = _mm_setr_epi8(0,1,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80); break; + case 3: mask = _mm_setr_epi8(0,1,2,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80); break; + case 4: mask = _mm_setr_epi8(0,1,2,3,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80); break; + case 5: mask = _mm_setr_epi8(0,1,2,3,4,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80); break; + case 6: mask = _mm_setr_epi8(0,1,2,3,4,5,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80); break; + case 7: mask = _mm_setr_epi8(0,1,2,3,4,5,6,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80); break; + case 8: mask = _mm_setr_epi8(0,1,2,3,4,5,6,7,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80); break; + case 9: mask = _mm_setr_epi8(0,1,2,3,4,5,6,7,8,0x80,0x80,0x80,0x80,0x80,0x80,0x80); break; + case 10: mask = _mm_setr_epi8(0,1,2,3,4,5,6,7,8,9,0x80,0x80,0x80,0x80,0x80,0x80); break; + case 11: mask = _mm_setr_epi8(0,1,2,3,4,5,6,7,8,9,10,0x80,0x80,0x80,0x80,0x80); break; + case 12: mask = _mm_setr_epi8(0,1,2,3,4,5,6,7,8,9,10,11,0x80,0x80,0x80,0x80); break; + case 13: mask = _mm_setr_epi8(0,1,2,3,4,5,6,7,8,9,10,11,12,0x80,0x80,0x80); break; + case 14: mask = _mm_setr_epi8(0,1,2,3,4,5,6,7,8,9,10,11,12,13,0x80,0x80); break; + case 15: mask = _mm_setr_epi8(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,0x80); break; + default: mask = _mm_setr_epi8(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15); break; + } + block128_zero(&block); + block128_copy_bytes(&block, input, part_block_len); + + /* iv += 1 */ + iv = _mm_add_epi64(iv, one); + + /* put back iv in big endian mode, encrypt it and xor it with input */ + __m128i tmp = _mm_shuffle_epi8(iv, bswap_mask); + DO_ENC_BLOCK(tmp); + + __m128i m = _mm_loadu_si128((__m128i *) &block); + m = _mm_xor_si128(m, tmp); + m = _mm_shuffle_epi8(m, mask); + + tag = ghash_add(tag, h, m); + + /* make output */ + _mm_storeu_si128((__m128i *) &block.b, m); + memcpy(output, &block.b, part_block_len); + } + /* store back IV & tag */ + __m128i tmp = _mm_shuffle_epi8(iv, bswap_mask); + _mm_storeu_si128((__m128i *) &gcm->civ, tmp); + _mm_storeu_si128((__m128i *) &gcm->tag, tag); +} diff --git a/cbits/aes/block128.h b/cbits/aes/block128.h new file mode 100644 index 0000000..2556078 --- /dev/null +++ b/cbits/aes/block128.h @@ -0,0 +1,99 @@ +/* + * Copyright (c) 2012 Vincent Hanquez + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the author nor the names of his contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef BLOCK128_H +#define BLOCK128_H + +#include + +typedef union { + uint64_t q[2]; + uint32_t d[4]; + uint16_t w[8]; + uint8_t b[16]; +} block128; + +static inline void block128_copy_bytes(block128 *block, uint8_t *src, uint32_t len) +{ + int i; + for (i = 0; i < len; i++) block->b[i] = src[i]; +} + +static inline void block128_copy(block128 *d, const block128 *s) +{ + d->q[0] = s->q[0]; d->q[1] = s->q[1]; +} + +static inline void block128_zero(block128 *d) +{ + d->q[0] = 0; d->q[1] = 0; +} + +static inline void block128_xor(block128 *d, const block128 *s) +{ + d->q[0] ^= s->q[0]; + d->q[1] ^= s->q[1]; +} + +static inline void block128_vxor(block128 *d, const block128 *s1, const block128 *s2) +{ + d->q[0] = s1->q[0] ^ s2->q[0]; + d->q[1] = s1->q[1] ^ s2->q[1]; +} + +static inline void block128_xor_bytes(block128 *block, uint8_t *src, uint32_t len) +{ + int i; + for (i = 0; i < len; i++) block->b[i] ^= src[i]; +} + +static inline void block128_inc_be(block128 *b) +{ + uint64_t v = be64_to_cpu(b->q[1]); + if (++v == 0) { + b->q[0] = cpu_to_be64(be64_to_cpu(b->q[0]) + 1); + b->q[1] = 0; + } else + b->q[1] = cpu_to_be64(v); +} + +#ifdef IMPL_DEBUG +#include +static inline void block128_print(block128 *b) +{ + int i; + for (i = 0; i < 16; i++) { + printf("%02x ", b->b[i]); + } + printf("\n"); +} +#endif + +#endif diff --git a/cbits/aes/generic.c b/cbits/aes/generic.c new file mode 100644 index 0000000..32963ad --- /dev/null +++ b/cbits/aes/generic.c @@ -0,0 +1,442 @@ +/* + * Copyright (C) 2008 Vincent Hanquez + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the author nor the names of his contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * AES implementation + */ + +#include +#include +#include +#include + +static uint8_t sbox[256] = { + 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b, 0xfe, + 0xd7, 0xab, 0x76, 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, 0xad, 0xd4, + 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0, 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, + 0xcc, 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15, 0x04, 0xc7, 0x23, 0xc3, + 0x18, 0x96, 0x05, 0x9a, 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75, 0x09, + 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, + 0x2f, 0x84, 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, 0x6a, 0xcb, 0xbe, + 0x39, 0x4a, 0x4c, 0x58, 0xcf, 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, + 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8, 0x51, 0xa3, 0x40, 0x8f, 0x92, + 0x9d, 0x38, 0xf5, 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2, 0xcd, 0x0c, + 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, + 0x73, 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, 0x46, 0xee, 0xb8, 0x14, + 0xde, 0x5e, 0x0b, 0xdb, 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, 0xc2, + 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79, 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, + 0x4e, 0xa9, 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08, 0xba, 0x78, 0x25, + 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a, + 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, 0x61, 0x35, 0x57, 0xb9, 0x86, + 0xc1, 0x1d, 0x9e, 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, + 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf, 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, + 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 +}; + +static uint8_t rsbox[256] = { + 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, 0xbf, 0x40, 0xa3, 0x9e, 0x81, + 0xf3, 0xd7, 0xfb, 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, 0x34, 0x8e, + 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb, 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, + 0x3d, 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e, 0x08, 0x2e, 0xa1, 0x66, + 0x28, 0xd9, 0x24, 0xb2, 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25, 0x72, + 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16, 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, + 0xb6, 0x92, 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda, 0x5e, 0x15, 0x46, + 0x57, 0xa7, 0x8d, 0x9d, 0x84, 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, + 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06, 0xd0, 0x2c, 0x1e, 0x8f, 0xca, + 0x3f, 0x0f, 0x02, 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b, 0x3a, 0x91, + 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, + 0x73, 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, 0xe2, 0xf9, 0x37, 0xe8, + 0x1c, 0x75, 0xdf, 0x6e, 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89, 0x6f, + 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b, 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, + 0x79, 0x20, 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4, 0x1f, 0xdd, 0xa8, + 0x33, 0x88, 0x07, 0xc7, 0x31, 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f, + 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, 0x2d, 0xe5, 0x7a, 0x9f, 0x93, + 0xc9, 0x9c, 0xef, 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, 0xc8, 0xeb, + 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61, 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, + 0x26, 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d +}; + +static uint8_t Rcon[] = { + 0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36, 0x6c, 0xd8, + 0xab, 0x4d, 0x9a, 0x2f, 0x5e, 0xbc, 0x63, 0xc6, 0x97, 0x35, 0x6a, 0xd4, 0xb3, + 0x7d, 0xfa, 0xef, 0xc5, 0x91, 0x39, 0x72, 0xe4, 0xd3, 0xbd, 0x61, 0xc2, 0x9f, + 0x25, 0x4a, 0x94, 0x33, 0x66, 0xcc, 0x83, 0x1d, 0x3a, 0x74, 0xe8, 0xcb, +}; + +#define G(a,b,c,d,e,f) { a,b,c,d,e,f } +uint8_t gmtab[256][6] = +{ + G(0x00, 0x00, 0x00, 0x00, 0x00, 0x00), G(0x02, 0x03, 0x09, 0x0b, 0x0d, 0x0e), + G(0x04, 0x06, 0x12, 0x16, 0x1a, 0x1c), G(0x06, 0x05, 0x1b, 0x1d, 0x17, 0x12), + G(0x08, 0x0c, 0x24, 0x2c, 0x34, 0x38), G(0x0a, 0x0f, 0x2d, 0x27, 0x39, 0x36), + G(0x0c, 0x0a, 0x36, 0x3a, 0x2e, 0x24), G(0x0e, 0x09, 0x3f, 0x31, 0x23, 0x2a), + G(0x10, 0x18, 0x48, 0x58, 0x68, 0x70), G(0x12, 0x1b, 0x41, 0x53, 0x65, 0x7e), + G(0x14, 0x1e, 0x5a, 0x4e, 0x72, 0x6c), G(0x16, 0x1d, 0x53, 0x45, 0x7f, 0x62), + G(0x18, 0x14, 0x6c, 0x74, 0x5c, 0x48), G(0x1a, 0x17, 0x65, 0x7f, 0x51, 0x46), + G(0x1c, 0x12, 0x7e, 0x62, 0x46, 0x54), G(0x1e, 0x11, 0x77, 0x69, 0x4b, 0x5a), + G(0x20, 0x30, 0x90, 0xb0, 0xd0, 0xe0), G(0x22, 0x33, 0x99, 0xbb, 0xdd, 0xee), + G(0x24, 0x36, 0x82, 0xa6, 0xca, 0xfc), G(0x26, 0x35, 0x8b, 0xad, 0xc7, 0xf2), + G(0x28, 0x3c, 0xb4, 0x9c, 0xe4, 0xd8), G(0x2a, 0x3f, 0xbd, 0x97, 0xe9, 0xd6), + G(0x2c, 0x3a, 0xa6, 0x8a, 0xfe, 0xc4), G(0x2e, 0x39, 0xaf, 0x81, 0xf3, 0xca), + G(0x30, 0x28, 0xd8, 0xe8, 0xb8, 0x90), G(0x32, 0x2b, 0xd1, 0xe3, 0xb5, 0x9e), + G(0x34, 0x2e, 0xca, 0xfe, 0xa2, 0x8c), G(0x36, 0x2d, 0xc3, 0xf5, 0xaf, 0x82), + G(0x38, 0x24, 0xfc, 0xc4, 0x8c, 0xa8), G(0x3a, 0x27, 0xf5, 0xcf, 0x81, 0xa6), + G(0x3c, 0x22, 0xee, 0xd2, 0x96, 0xb4), G(0x3e, 0x21, 0xe7, 0xd9, 0x9b, 0xba), + G(0x40, 0x60, 0x3b, 0x7b, 0xbb, 0xdb), G(0x42, 0x63, 0x32, 0x70, 0xb6, 0xd5), + G(0x44, 0x66, 0x29, 0x6d, 0xa1, 0xc7), G(0x46, 0x65, 0x20, 0x66, 0xac, 0xc9), + G(0x48, 0x6c, 0x1f, 0x57, 0x8f, 0xe3), G(0x4a, 0x6f, 0x16, 0x5c, 0x82, 0xed), + G(0x4c, 0x6a, 0x0d, 0x41, 0x95, 0xff), G(0x4e, 0x69, 0x04, 0x4a, 0x98, 0xf1), + G(0x50, 0x78, 0x73, 0x23, 0xd3, 0xab), G(0x52, 0x7b, 0x7a, 0x28, 0xde, 0xa5), + G(0x54, 0x7e, 0x61, 0x35, 0xc9, 0xb7), G(0x56, 0x7d, 0x68, 0x3e, 0xc4, 0xb9), + G(0x58, 0x74, 0x57, 0x0f, 0xe7, 0x93), G(0x5a, 0x77, 0x5e, 0x04, 0xea, 0x9d), + G(0x5c, 0x72, 0x45, 0x19, 0xfd, 0x8f), G(0x5e, 0x71, 0x4c, 0x12, 0xf0, 0x81), + G(0x60, 0x50, 0xab, 0xcb, 0x6b, 0x3b), G(0x62, 0x53, 0xa2, 0xc0, 0x66, 0x35), + G(0x64, 0x56, 0xb9, 0xdd, 0x71, 0x27), G(0x66, 0x55, 0xb0, 0xd6, 0x7c, 0x29), + G(0x68, 0x5c, 0x8f, 0xe7, 0x5f, 0x03), G(0x6a, 0x5f, 0x86, 0xec, 0x52, 0x0d), + G(0x6c, 0x5a, 0x9d, 0xf1, 0x45, 0x1f), G(0x6e, 0x59, 0x94, 0xfa, 0x48, 0x11), + G(0x70, 0x48, 0xe3, 0x93, 0x03, 0x4b), G(0x72, 0x4b, 0xea, 0x98, 0x0e, 0x45), + G(0x74, 0x4e, 0xf1, 0x85, 0x19, 0x57), G(0x76, 0x4d, 0xf8, 0x8e, 0x14, 0x59), + G(0x78, 0x44, 0xc7, 0xbf, 0x37, 0x73), G(0x7a, 0x47, 0xce, 0xb4, 0x3a, 0x7d), + G(0x7c, 0x42, 0xd5, 0xa9, 0x2d, 0x6f), G(0x7e, 0x41, 0xdc, 0xa2, 0x20, 0x61), + G(0x80, 0xc0, 0x76, 0xf6, 0x6d, 0xad), G(0x82, 0xc3, 0x7f, 0xfd, 0x60, 0xa3), + G(0x84, 0xc6, 0x64, 0xe0, 0x77, 0xb1), G(0x86, 0xc5, 0x6d, 0xeb, 0x7a, 0xbf), + G(0x88, 0xcc, 0x52, 0xda, 0x59, 0x95), G(0x8a, 0xcf, 0x5b, 0xd1, 0x54, 0x9b), + G(0x8c, 0xca, 0x40, 0xcc, 0x43, 0x89), G(0x8e, 0xc9, 0x49, 0xc7, 0x4e, 0x87), + G(0x90, 0xd8, 0x3e, 0xae, 0x05, 0xdd), G(0x92, 0xdb, 0x37, 0xa5, 0x08, 0xd3), + G(0x94, 0xde, 0x2c, 0xb8, 0x1f, 0xc1), G(0x96, 0xdd, 0x25, 0xb3, 0x12, 0xcf), + G(0x98, 0xd4, 0x1a, 0x82, 0x31, 0xe5), G(0x9a, 0xd7, 0x13, 0x89, 0x3c, 0xeb), + G(0x9c, 0xd2, 0x08, 0x94, 0x2b, 0xf9), G(0x9e, 0xd1, 0x01, 0x9f, 0x26, 0xf7), + G(0xa0, 0xf0, 0xe6, 0x46, 0xbd, 0x4d), G(0xa2, 0xf3, 0xef, 0x4d, 0xb0, 0x43), + G(0xa4, 0xf6, 0xf4, 0x50, 0xa7, 0x51), G(0xa6, 0xf5, 0xfd, 0x5b, 0xaa, 0x5f), + G(0xa8, 0xfc, 0xc2, 0x6a, 0x89, 0x75), G(0xaa, 0xff, 0xcb, 0x61, 0x84, 0x7b), + G(0xac, 0xfa, 0xd0, 0x7c, 0x93, 0x69), G(0xae, 0xf9, 0xd9, 0x77, 0x9e, 0x67), + G(0xb0, 0xe8, 0xae, 0x1e, 0xd5, 0x3d), G(0xb2, 0xeb, 0xa7, 0x15, 0xd8, 0x33), + G(0xb4, 0xee, 0xbc, 0x08, 0xcf, 0x21), G(0xb6, 0xed, 0xb5, 0x03, 0xc2, 0x2f), + G(0xb8, 0xe4, 0x8a, 0x32, 0xe1, 0x05), G(0xba, 0xe7, 0x83, 0x39, 0xec, 0x0b), + G(0xbc, 0xe2, 0x98, 0x24, 0xfb, 0x19), G(0xbe, 0xe1, 0x91, 0x2f, 0xf6, 0x17), + G(0xc0, 0xa0, 0x4d, 0x8d, 0xd6, 0x76), G(0xc2, 0xa3, 0x44, 0x86, 0xdb, 0x78), + G(0xc4, 0xa6, 0x5f, 0x9b, 0xcc, 0x6a), G(0xc6, 0xa5, 0x56, 0x90, 0xc1, 0x64), + G(0xc8, 0xac, 0x69, 0xa1, 0xe2, 0x4e), G(0xca, 0xaf, 0x60, 0xaa, 0xef, 0x40), + G(0xcc, 0xaa, 0x7b, 0xb7, 0xf8, 0x52), G(0xce, 0xa9, 0x72, 0xbc, 0xf5, 0x5c), + G(0xd0, 0xb8, 0x05, 0xd5, 0xbe, 0x06), G(0xd2, 0xbb, 0x0c, 0xde, 0xb3, 0x08), + G(0xd4, 0xbe, 0x17, 0xc3, 0xa4, 0x1a), G(0xd6, 0xbd, 0x1e, 0xc8, 0xa9, 0x14), + G(0xd8, 0xb4, 0x21, 0xf9, 0x8a, 0x3e), G(0xda, 0xb7, 0x28, 0xf2, 0x87, 0x30), + G(0xdc, 0xb2, 0x33, 0xef, 0x90, 0x22), G(0xde, 0xb1, 0x3a, 0xe4, 0x9d, 0x2c), + G(0xe0, 0x90, 0xdd, 0x3d, 0x06, 0x96), G(0xe2, 0x93, 0xd4, 0x36, 0x0b, 0x98), + G(0xe4, 0x96, 0xcf, 0x2b, 0x1c, 0x8a), G(0xe6, 0x95, 0xc6, 0x20, 0x11, 0x84), + G(0xe8, 0x9c, 0xf9, 0x11, 0x32, 0xae), G(0xea, 0x9f, 0xf0, 0x1a, 0x3f, 0xa0), + G(0xec, 0x9a, 0xeb, 0x07, 0x28, 0xb2), G(0xee, 0x99, 0xe2, 0x0c, 0x25, 0xbc), + G(0xf0, 0x88, 0x95, 0x65, 0x6e, 0xe6), G(0xf2, 0x8b, 0x9c, 0x6e, 0x63, 0xe8), + G(0xf4, 0x8e, 0x87, 0x73, 0x74, 0xfa), G(0xf6, 0x8d, 0x8e, 0x78, 0x79, 0xf4), + G(0xf8, 0x84, 0xb1, 0x49, 0x5a, 0xde), G(0xfa, 0x87, 0xb8, 0x42, 0x57, 0xd0), + G(0xfc, 0x82, 0xa3, 0x5f, 0x40, 0xc2), G(0xfe, 0x81, 0xaa, 0x54, 0x4d, 0xcc), + G(0x1b, 0x9b, 0xec, 0xf7, 0xda, 0x41), G(0x19, 0x98, 0xe5, 0xfc, 0xd7, 0x4f), + G(0x1f, 0x9d, 0xfe, 0xe1, 0xc0, 0x5d), G(0x1d, 0x9e, 0xf7, 0xea, 0xcd, 0x53), + G(0x13, 0x97, 0xc8, 0xdb, 0xee, 0x79), G(0x11, 0x94, 0xc1, 0xd0, 0xe3, 0x77), + G(0x17, 0x91, 0xda, 0xcd, 0xf4, 0x65), G(0x15, 0x92, 0xd3, 0xc6, 0xf9, 0x6b), + G(0x0b, 0x83, 0xa4, 0xaf, 0xb2, 0x31), G(0x09, 0x80, 0xad, 0xa4, 0xbf, 0x3f), + G(0x0f, 0x85, 0xb6, 0xb9, 0xa8, 0x2d), G(0x0d, 0x86, 0xbf, 0xb2, 0xa5, 0x23), + G(0x03, 0x8f, 0x80, 0x83, 0x86, 0x09), G(0x01, 0x8c, 0x89, 0x88, 0x8b, 0x07), + G(0x07, 0x89, 0x92, 0x95, 0x9c, 0x15), G(0x05, 0x8a, 0x9b, 0x9e, 0x91, 0x1b), + G(0x3b, 0xab, 0x7c, 0x47, 0x0a, 0xa1), G(0x39, 0xa8, 0x75, 0x4c, 0x07, 0xaf), + G(0x3f, 0xad, 0x6e, 0x51, 0x10, 0xbd), G(0x3d, 0xae, 0x67, 0x5a, 0x1d, 0xb3), + G(0x33, 0xa7, 0x58, 0x6b, 0x3e, 0x99), G(0x31, 0xa4, 0x51, 0x60, 0x33, 0x97), + G(0x37, 0xa1, 0x4a, 0x7d, 0x24, 0x85), G(0x35, 0xa2, 0x43, 0x76, 0x29, 0x8b), + G(0x2b, 0xb3, 0x34, 0x1f, 0x62, 0xd1), G(0x29, 0xb0, 0x3d, 0x14, 0x6f, 0xdf), + G(0x2f, 0xb5, 0x26, 0x09, 0x78, 0xcd), G(0x2d, 0xb6, 0x2f, 0x02, 0x75, 0xc3), + G(0x23, 0xbf, 0x10, 0x33, 0x56, 0xe9), G(0x21, 0xbc, 0x19, 0x38, 0x5b, 0xe7), + G(0x27, 0xb9, 0x02, 0x25, 0x4c, 0xf5), G(0x25, 0xba, 0x0b, 0x2e, 0x41, 0xfb), + G(0x5b, 0xfb, 0xd7, 0x8c, 0x61, 0x9a), G(0x59, 0xf8, 0xde, 0x87, 0x6c, 0x94), + G(0x5f, 0xfd, 0xc5, 0x9a, 0x7b, 0x86), G(0x5d, 0xfe, 0xcc, 0x91, 0x76, 0x88), + G(0x53, 0xf7, 0xf3, 0xa0, 0x55, 0xa2), G(0x51, 0xf4, 0xfa, 0xab, 0x58, 0xac), + G(0x57, 0xf1, 0xe1, 0xb6, 0x4f, 0xbe), G(0x55, 0xf2, 0xe8, 0xbd, 0x42, 0xb0), + G(0x4b, 0xe3, 0x9f, 0xd4, 0x09, 0xea), G(0x49, 0xe0, 0x96, 0xdf, 0x04, 0xe4), + G(0x4f, 0xe5, 0x8d, 0xc2, 0x13, 0xf6), G(0x4d, 0xe6, 0x84, 0xc9, 0x1e, 0xf8), + G(0x43, 0xef, 0xbb, 0xf8, 0x3d, 0xd2), G(0x41, 0xec, 0xb2, 0xf3, 0x30, 0xdc), + G(0x47, 0xe9, 0xa9, 0xee, 0x27, 0xce), G(0x45, 0xea, 0xa0, 0xe5, 0x2a, 0xc0), + G(0x7b, 0xcb, 0x47, 0x3c, 0xb1, 0x7a), G(0x79, 0xc8, 0x4e, 0x37, 0xbc, 0x74), + G(0x7f, 0xcd, 0x55, 0x2a, 0xab, 0x66), G(0x7d, 0xce, 0x5c, 0x21, 0xa6, 0x68), + G(0x73, 0xc7, 0x63, 0x10, 0x85, 0x42), G(0x71, 0xc4, 0x6a, 0x1b, 0x88, 0x4c), + G(0x77, 0xc1, 0x71, 0x06, 0x9f, 0x5e), G(0x75, 0xc2, 0x78, 0x0d, 0x92, 0x50), + G(0x6b, 0xd3, 0x0f, 0x64, 0xd9, 0x0a), G(0x69, 0xd0, 0x06, 0x6f, 0xd4, 0x04), + G(0x6f, 0xd5, 0x1d, 0x72, 0xc3, 0x16), G(0x6d, 0xd6, 0x14, 0x79, 0xce, 0x18), + G(0x63, 0xdf, 0x2b, 0x48, 0xed, 0x32), G(0x61, 0xdc, 0x22, 0x43, 0xe0, 0x3c), + G(0x67, 0xd9, 0x39, 0x5e, 0xf7, 0x2e), G(0x65, 0xda, 0x30, 0x55, 0xfa, 0x20), + G(0x9b, 0x5b, 0x9a, 0x01, 0xb7, 0xec), G(0x99, 0x58, 0x93, 0x0a, 0xba, 0xe2), + G(0x9f, 0x5d, 0x88, 0x17, 0xad, 0xf0), G(0x9d, 0x5e, 0x81, 0x1c, 0xa0, 0xfe), + G(0x93, 0x57, 0xbe, 0x2d, 0x83, 0xd4), G(0x91, 0x54, 0xb7, 0x26, 0x8e, 0xda), + G(0x97, 0x51, 0xac, 0x3b, 0x99, 0xc8), G(0x95, 0x52, 0xa5, 0x30, 0x94, 0xc6), + G(0x8b, 0x43, 0xd2, 0x59, 0xdf, 0x9c), G(0x89, 0x40, 0xdb, 0x52, 0xd2, 0x92), + G(0x8f, 0x45, 0xc0, 0x4f, 0xc5, 0x80), G(0x8d, 0x46, 0xc9, 0x44, 0xc8, 0x8e), + G(0x83, 0x4f, 0xf6, 0x75, 0xeb, 0xa4), G(0x81, 0x4c, 0xff, 0x7e, 0xe6, 0xaa), + G(0x87, 0x49, 0xe4, 0x63, 0xf1, 0xb8), G(0x85, 0x4a, 0xed, 0x68, 0xfc, 0xb6), + G(0xbb, 0x6b, 0x0a, 0xb1, 0x67, 0x0c), G(0xb9, 0x68, 0x03, 0xba, 0x6a, 0x02), + G(0xbf, 0x6d, 0x18, 0xa7, 0x7d, 0x10), G(0xbd, 0x6e, 0x11, 0xac, 0x70, 0x1e), + G(0xb3, 0x67, 0x2e, 0x9d, 0x53, 0x34), G(0xb1, 0x64, 0x27, 0x96, 0x5e, 0x3a), + G(0xb7, 0x61, 0x3c, 0x8b, 0x49, 0x28), G(0xb5, 0x62, 0x35, 0x80, 0x44, 0x26), + G(0xab, 0x73, 0x42, 0xe9, 0x0f, 0x7c), G(0xa9, 0x70, 0x4b, 0xe2, 0x02, 0x72), + G(0xaf, 0x75, 0x50, 0xff, 0x15, 0x60), G(0xad, 0x76, 0x59, 0xf4, 0x18, 0x6e), + G(0xa3, 0x7f, 0x66, 0xc5, 0x3b, 0x44), G(0xa1, 0x7c, 0x6f, 0xce, 0x36, 0x4a), + G(0xa7, 0x79, 0x74, 0xd3, 0x21, 0x58), G(0xa5, 0x7a, 0x7d, 0xd8, 0x2c, 0x56), + G(0xdb, 0x3b, 0xa1, 0x7a, 0x0c, 0x37), G(0xd9, 0x38, 0xa8, 0x71, 0x01, 0x39), + G(0xdf, 0x3d, 0xb3, 0x6c, 0x16, 0x2b), G(0xdd, 0x3e, 0xba, 0x67, 0x1b, 0x25), + G(0xd3, 0x37, 0x85, 0x56, 0x38, 0x0f), G(0xd1, 0x34, 0x8c, 0x5d, 0x35, 0x01), + G(0xd7, 0x31, 0x97, 0x40, 0x22, 0x13), G(0xd5, 0x32, 0x9e, 0x4b, 0x2f, 0x1d), + G(0xcb, 0x23, 0xe9, 0x22, 0x64, 0x47), G(0xc9, 0x20, 0xe0, 0x29, 0x69, 0x49), + G(0xcf, 0x25, 0xfb, 0x34, 0x7e, 0x5b), G(0xcd, 0x26, 0xf2, 0x3f, 0x73, 0x55), + G(0xc3, 0x2f, 0xcd, 0x0e, 0x50, 0x7f), G(0xc1, 0x2c, 0xc4, 0x05, 0x5d, 0x71), + G(0xc7, 0x29, 0xdf, 0x18, 0x4a, 0x63), G(0xc5, 0x2a, 0xd6, 0x13, 0x47, 0x6d), + G(0xfb, 0x0b, 0x31, 0xca, 0xdc, 0xd7), G(0xf9, 0x08, 0x38, 0xc1, 0xd1, 0xd9), + G(0xff, 0x0d, 0x23, 0xdc, 0xc6, 0xcb), G(0xfd, 0x0e, 0x2a, 0xd7, 0xcb, 0xc5), + G(0xf3, 0x07, 0x15, 0xe6, 0xe8, 0xef), G(0xf1, 0x04, 0x1c, 0xed, 0xe5, 0xe1), + G(0xf7, 0x01, 0x07, 0xf0, 0xf2, 0xf3), G(0xf5, 0x02, 0x0e, 0xfb, 0xff, 0xfd), + G(0xeb, 0x13, 0x79, 0x92, 0xb4, 0xa7), G(0xe9, 0x10, 0x70, 0x99, 0xb9, 0xa9), + G(0xef, 0x15, 0x6b, 0x84, 0xae, 0xbb), G(0xed, 0x16, 0x62, 0x8f, 0xa3, 0xb5), + G(0xe3, 0x1f, 0x5d, 0xbe, 0x80, 0x9f), G(0xe1, 0x1c, 0x54, 0xb5, 0x8d, 0x91), + G(0xe7, 0x19, 0x4f, 0xa8, 0x9a, 0x83), G(0xe5, 0x1a, 0x46, 0xa3, 0x97, 0x8d), +}; +#undef G + +static void expand_key(uint8_t *expandedKey, uint8_t *key, int size, size_t expandedKeySize) +{ + int csz; + int i; + uint8_t t[4] = { 0 }; + + for (i = 0; i < size; i++) + expandedKey[i] = key[i]; + csz = size; + + i = 1; + while (csz < expandedKeySize) { + t[0] = expandedKey[(csz - 4) + 0]; + t[1] = expandedKey[(csz - 4) + 1]; + t[2] = expandedKey[(csz - 4) + 2]; + t[3] = expandedKey[(csz - 4) + 3]; + + if (csz % size == 0) { + uint8_t tmp; + + tmp = t[0]; + t[0] = sbox[t[1]] ^ Rcon[i++ % sizeof(Rcon)]; + t[1] = sbox[t[2]]; + t[2] = sbox[t[3]]; + t[3] = sbox[tmp]; + } + + if (size == 32 && ((csz % size) == 16)) { + t[0] = sbox[t[0]]; + t[1] = sbox[t[1]]; + t[2] = sbox[t[2]]; + t[3] = sbox[t[3]]; + } + + expandedKey[csz] = expandedKey[csz - size] ^ t[0]; csz++; + expandedKey[csz] = expandedKey[csz - size] ^ t[1]; csz++; + expandedKey[csz] = expandedKey[csz - size] ^ t[2]; csz++; + expandedKey[csz] = expandedKey[csz - size] ^ t[3]; csz++; + } +} + +static void shift_rows(uint8_t *state) +{ + uint32_t *s32; + int i; + + for (i = 0; i < 16; i++) + state[i] = sbox[state[i]]; + s32 = (uint32_t *) state; + s32[1] = rol32_be(s32[1], 8); + s32[2] = rol32_be(s32[2], 16); + s32[3] = rol32_be(s32[3], 24); +} + +static void add_round_key(uint8_t *state, uint8_t *rk) +{ + uint32_t *s32, *r32; + + s32 = (uint32_t *) state; + r32 = (uint32_t *) rk; + s32[0] ^= r32[0]; + s32[1] ^= r32[1]; + s32[2] ^= r32[2]; + s32[3] ^= r32[3]; +} + +#define gm1(a) (a) +#define gm2(a) gmtab[a][0] +#define gm3(a) gmtab[a][1] +#define gm9(a) gmtab[a][2] +#define gm11(a) gmtab[a][3] +#define gm13(a) gmtab[a][4] +#define gm14(a) gmtab[a][5] + +static void mix_columns(uint8_t *state) +{ + int i; + uint8_t cpy[4]; + + for (i = 0; i < 4; i++) { + cpy[0] = state[0 * 4 + i]; + cpy[1] = state[1 * 4 + i]; + cpy[2] = state[2 * 4 + i]; + cpy[3] = state[3 * 4 + i]; + state[i] = gm2(cpy[0]) ^ gm1(cpy[3]) ^ gm1(cpy[2]) ^ gm3(cpy[1]); + state[4+i] = gm2(cpy[1]) ^ gm1(cpy[0]) ^ gm1(cpy[3]) ^ gm3(cpy[2]); + state[8+i] = gm2(cpy[2]) ^ gm1(cpy[1]) ^ gm1(cpy[0]) ^ gm3(cpy[3]); + state[12+i] = gm2(cpy[3]) ^ gm1(cpy[2]) ^ gm1(cpy[1]) ^ gm3(cpy[0]); + } +} + +static void create_round_key(uint8_t *expandedKey, uint8_t *rk) +{ + int i,j; + for (i = 0; i < 4; i++) + for (j = 0; j < 4; j++) + rk[i + j * 4] = expandedKey[i * 4 + j]; +} + +static void aes_main(aes_key *key, uint8_t *state) +{ + int i = 0; + uint8_t rk[16]; + + create_round_key(key->data, rk); + add_round_key(state, rk); + + for (i = 1; i < key->nbr; i++) { + create_round_key(key->data + 16 * i, rk); + shift_rows(state); + mix_columns(state); + add_round_key(state, rk); + } + + create_round_key(key->data + 16 * key->nbr, rk); + shift_rows(state); + add_round_key(state, rk); +} + +static void shift_rows_inv(uint8_t *state) +{ + uint32_t *s32; + int i; + + s32 = (uint32_t *) state; + s32[1] = ror32_be(s32[1], 8); + s32[2] = ror32_be(s32[2], 16); + s32[3] = ror32_be(s32[3], 24); + for (i = 0; i < 16; i++) + state[i] = rsbox[state[i]]; +} + +static void mix_columns_inv(uint8_t *state) +{ + int i; + uint8_t cpy[4]; + + for (i = 0; i < 4; i++) { + cpy[0] = state[0 * 4 + i]; + cpy[1] = state[1 * 4 + i]; + cpy[2] = state[2 * 4 + i]; + cpy[3] = state[3 * 4 + i]; + state[i] = gm14(cpy[0]) ^ gm9(cpy[3]) ^ gm13(cpy[2]) ^ gm11(cpy[1]); + state[4+i] = gm14(cpy[1]) ^ gm9(cpy[0]) ^ gm13(cpy[3]) ^ gm11(cpy[2]); + state[8+i] = gm14(cpy[2]) ^ gm9(cpy[1]) ^ gm13(cpy[0]) ^ gm11(cpy[3]); + state[12+i] = gm14(cpy[3]) ^ gm9(cpy[2]) ^ gm13(cpy[1]) ^ gm11(cpy[0]); + } +} + +static void aes_main_inv(aes_key *key, uint8_t *state) +{ + int i = 0; + uint8_t rk[16]; + + create_round_key(key->data + 16 * key->nbr, rk); + add_round_key(state, rk); + + for (i = key->nbr - 1; i > 0; i--) { + create_round_key(key->data + 16 * i, rk); + shift_rows_inv(state); + add_round_key(state, rk); + mix_columns_inv(state); + } + + create_round_key(key->data, rk); + shift_rows_inv(state); + add_round_key(state, rk); +} + +/* Set the block values, for the block: + * a0,0 a0,1 a0,2 a0,3 + * a1,0 a1,1 a1,2 a1,3 -> a0,0 a1,0 a2,0 a3,0 a0,1 a1,1 ... a2,3 a3,3 + * a2,0 a2,1 a2,2 a2,3 + * a3,0 a3,1 a3,2 a3,3 + */ +#define swap_block(t, f) \ + t[0] = f[0]; t[4] = f[1]; t[8] = f[2]; t[12] = f[3]; \ + t[1] = f[4]; t[5] = f[5]; t[9] = f[6]; t[13] = f[7]; \ + t[2] = f[8]; t[6] = f[9]; t[10] = f[10]; t[14] = f[11]; \ + t[3] = f[12]; t[7] = f[13]; t[11] = f[14]; t[15] = f[15] + +void cryptonite_aes_generic_encrypt_block(aes_block *output, aes_key *key, aes_block *input) +{ + uint8_t block[16]; + uint8_t *iptr, *optr; + + iptr = (uint8_t *) input; + optr = (uint8_t *) output; + swap_block(block, iptr); + aes_main(key, block); + swap_block(optr, block); +} + +void cryptonite_aes_generic_decrypt_block(aes_block *output, aes_key *key, aes_block *input) +{ + uint8_t block[16]; + uint8_t *iptr, *optr; + + iptr = (uint8_t *) input; + optr = (uint8_t *) output; + swap_block(block, iptr); + aes_main_inv(key, block); + swap_block(optr, block); +} + +void cryptonite_aes_generic_init(aes_key *key, uint8_t *origkey, uint8_t size) +{ + int esz; + + switch (size) { + case 16: key->nbr = 10; esz = 176; break; + case 24: key->nbr = 12; esz = 208; break; + case 32: key->nbr = 14; esz = 240; break; + default: return; + } + expand_key(key->data, origkey, size, esz); + return; +} diff --git a/cbits/aes/generic.h b/cbits/aes/generic.h new file mode 100644 index 0000000..e54eb6b --- /dev/null +++ b/cbits/aes/generic.h @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2012 Vincent Hanquez + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the author nor the names of his contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +#include "cryptonite_aes.h" + +void aes_generic_encrypt_block(aes_block *output, aes_key *key, aes_block *input); +void aes_generic_decrypt_block(aes_block *output, aes_key *key, aes_block *input); +void aes_generic_init(aes_key *key, uint8_t *origkey, uint8_t size); diff --git a/cbits/aes/gf.c b/cbits/aes/gf.c new file mode 100644 index 0000000..49e8106 --- /dev/null +++ b/cbits/aes/gf.c @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2012 Vincent Hanquez + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the author nor the names of his contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include + +/* this is a really inefficient way to GF multiply. + * the alternative without hw accel is building small tables + * to speed up the multiplication. + * TODO: optimise with tables + */ +void gf_mul(block128 *a, block128 *b) +{ + uint64_t a0, a1, v0, v1; + int i, j; + + a0 = a1 = 0; + v0 = cpu_to_be64(a->q[0]); + v1 = cpu_to_be64(a->q[1]); + + for (i = 0; i < 16; i++) + for (j = 0x80; j != 0; j >>= 1) { + uint8_t x = b->b[i] & j; + a0 ^= x ? v0 : 0; + a1 ^= x ? v1 : 0; + x = (uint8_t) v1 & 1; + v1 = (v1 >> 1) | (v0 << 63); + v0 = (v0 >> 1) ^ (x ? (0xe1ULL << 56) : 0); + } + a->q[0] = cpu_to_be64(a0); + a->q[1] = cpu_to_be64(a1); +} + +/* inplace GFMUL for xts mode */ +void gf_mulx(block128 *a) +{ + const uint64_t gf_mask = cpu_to_le64(0x8000000000000000ULL); + uint64_t r = ((a->q[1] & gf_mask) ? cpu_to_le64(0x87) : 0); + a->q[1] = cpu_to_le64((le64_to_cpu(a->q[1]) << 1) | (a->q[0] & gf_mask ? 1 : 0)); + a->q[0] = cpu_to_le64(le64_to_cpu(a->q[0]) << 1) ^ r; +} + diff --git a/cbits/aes/gf.h b/cbits/aes/gf.h new file mode 100644 index 0000000..c69c2d6 --- /dev/null +++ b/cbits/aes/gf.h @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2012 Vincent Hanquez + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the author nor the names of his contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +#ifndef CRYPTONITE_AES_GF128MUL_H +#define CRYPTONITE_AES_GF128MUL_H + +#include "aes/block128.h" + +void gf_mul(block128 *a, block128 *b); +void gf_mulx(block128 *a); + +#endif diff --git a/cbits/aes/x86ni.c b/cbits/aes/x86ni.c new file mode 100644 index 0000000..217045b --- /dev/null +++ b/cbits/aes/x86ni.c @@ -0,0 +1,331 @@ +/* + * Copyright (c) 2012-2013 Vincent Hanquez + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the author nor the names of his contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifdef WITH_AESNI + +#include +#include +#include +#include +#include +#include +#include + +#ifdef ARCH_X86 +#define ALIGN_UP(addr, size) (((addr) + ((size) - 1)) & (~((size) - 1))) +#define ALIGNMENT(n) __attribute__((aligned(n))) + +/* old GCC version doesn't cope with the shuffle parameters, that can take 2 values (0xff and 0xaa) + * in our case, passed as argument despite being a immediate 8 bits constant anyway. + * un-factorise aes_128_key_expansion into 2 version that have the shuffle parameter explicitly set */ +static __m128i aes_128_key_expansion_ff(__m128i key, __m128i keygened) +{ + keygened = _mm_shuffle_epi32(keygened, 0xff); + key = _mm_xor_si128(key, _mm_slli_si128(key, 4)); + key = _mm_xor_si128(key, _mm_slli_si128(key, 4)); + key = _mm_xor_si128(key, _mm_slli_si128(key, 4)); + return _mm_xor_si128(key, keygened); +} + +static __m128i aes_128_key_expansion_aa(__m128i key, __m128i keygened) +{ + keygened = _mm_shuffle_epi32(keygened, 0xaa); + key = _mm_xor_si128(key, _mm_slli_si128(key, 4)); + key = _mm_xor_si128(key, _mm_slli_si128(key, 4)); + key = _mm_xor_si128(key, _mm_slli_si128(key, 4)); + return _mm_xor_si128(key, keygened); +} + +void aes_ni_init(aes_key *key, uint8_t *ikey, uint8_t size) +{ + __m128i k[28]; + uint64_t *out = (uint64_t *) key->data; + int i; + + switch (size) { + case 16: + k[0] = _mm_loadu_si128((const __m128i*) ikey); + + #define AES_128_key_exp(K, RCON) aes_128_key_expansion_ff(K, _mm_aeskeygenassist_si128(K, RCON)) + k[1] = AES_128_key_exp(k[0], 0x01); + k[2] = AES_128_key_exp(k[1], 0x02); + k[3] = AES_128_key_exp(k[2], 0x04); + k[4] = AES_128_key_exp(k[3], 0x08); + k[5] = AES_128_key_exp(k[4], 0x10); + k[6] = AES_128_key_exp(k[5], 0x20); + k[7] = AES_128_key_exp(k[6], 0x40); + k[8] = AES_128_key_exp(k[7], 0x80); + k[9] = AES_128_key_exp(k[8], 0x1B); + k[10] = AES_128_key_exp(k[9], 0x36); + + /* generate decryption keys in reverse order. + * k[10] is shared by last encryption and first decryption rounds + * k[20] is shared by first encryption round (and is the original user key) */ + k[11] = _mm_aesimc_si128(k[9]); + k[12] = _mm_aesimc_si128(k[8]); + k[13] = _mm_aesimc_si128(k[7]); + k[14] = _mm_aesimc_si128(k[6]); + k[15] = _mm_aesimc_si128(k[5]); + k[16] = _mm_aesimc_si128(k[4]); + k[17] = _mm_aesimc_si128(k[3]); + k[18] = _mm_aesimc_si128(k[2]); + k[19] = _mm_aesimc_si128(k[1]); + + for (i = 0; i < 20; i++) + _mm_storeu_si128(((__m128i *) out) + i, k[i]); + break; + case 32: +#define AES_256_key_exp_1(K1, K2, RCON) aes_128_key_expansion_ff(K1, _mm_aeskeygenassist_si128(K2, RCON)) +#define AES_256_key_exp_2(K1, K2) aes_128_key_expansion_aa(K1, _mm_aeskeygenassist_si128(K2, 0x00)) + k[0] = _mm_loadu_si128((const __m128i*) ikey); + k[1] = _mm_loadu_si128((const __m128i*) (ikey+16)); + k[2] = AES_256_key_exp_1(k[0], k[1], 0x01); + k[3] = AES_256_key_exp_2(k[1], k[2]); + k[4] = AES_256_key_exp_1(k[2], k[3], 0x02); + k[5] = AES_256_key_exp_2(k[3], k[4]); + k[6] = AES_256_key_exp_1(k[4], k[5], 0x04); + k[7] = AES_256_key_exp_2(k[5], k[6]); + k[8] = AES_256_key_exp_1(k[6], k[7], 0x08); + k[9] = AES_256_key_exp_2(k[7], k[8]); + k[10] = AES_256_key_exp_1(k[8], k[9], 0x10); + k[11] = AES_256_key_exp_2(k[9], k[10]); + k[12] = AES_256_key_exp_1(k[10], k[11], 0x20); + k[13] = AES_256_key_exp_2(k[11], k[12]); + k[14] = AES_256_key_exp_1(k[12], k[13], 0x40); + + k[15] = _mm_aesimc_si128(k[13]); + k[16] = _mm_aesimc_si128(k[12]); + k[17] = _mm_aesimc_si128(k[11]); + k[18] = _mm_aesimc_si128(k[10]); + k[19] = _mm_aesimc_si128(k[9]); + k[20] = _mm_aesimc_si128(k[8]); + k[21] = _mm_aesimc_si128(k[7]); + k[22] = _mm_aesimc_si128(k[6]); + k[23] = _mm_aesimc_si128(k[5]); + k[24] = _mm_aesimc_si128(k[4]); + k[25] = _mm_aesimc_si128(k[3]); + k[26] = _mm_aesimc_si128(k[2]); + k[27] = _mm_aesimc_si128(k[1]); + for (i = 0; i < 28; i++) + _mm_storeu_si128(((__m128i *) out) + i, k[i]); + break; + default: + break; + } +} + +/* TO OPTIMISE: use pcmulqdq... or some faster code. + * this is the lamest way of doing it, but i'm out of time. + * this is basically a copy of gf_mulx in gf.c */ +static __m128i gfmulx(__m128i v) +{ + uint64_t v_[2] ALIGNMENT(16); + const uint64_t gf_mask = 0x8000000000000000; + + _mm_store_si128((__m128i *) v_, v); + uint64_t r = ((v_[1] & gf_mask) ? 0x87 : 0); + v_[1] = (v_[1] << 1) | (v_[0] & gf_mask ? 1 : 0); + v_[0] = (v_[0] << 1) ^ r; + v = _mm_load_si128((__m128i *) v_); + return v; +} + +static void unopt_gf_mul(block128 *a, block128 *b) +{ + uint64_t a0, a1, v0, v1; + int i, j; + + a0 = a1 = 0; + v0 = cpu_to_be64(a->q[0]); + v1 = cpu_to_be64(a->q[1]); + + for (i = 0; i < 16; i++) + for (j = 0x80; j != 0; j >>= 1) { + uint8_t x = b->b[i] & j; + a0 ^= x ? v0 : 0; + a1 ^= x ? v1 : 0; + x = (uint8_t) v1 & 1; + v1 = (v1 >> 1) | (v0 << 63); + v0 = (v0 >> 1) ^ (x ? (0xe1ULL << 56) : 0); + } + a->q[0] = cpu_to_be64(a0); + a->q[1] = cpu_to_be64(a1); +} + +static __m128i ghash_add(__m128i tag, __m128i h, __m128i m) +{ + aes_block _t, _h; + tag = _mm_xor_si128(tag, m); + + _mm_store_si128((__m128i *) &_t, tag); + _mm_store_si128((__m128i *) &_h, h); + unopt_gf_mul(&_t, &_h); + tag = _mm_load_si128((__m128i *) &_t); + return tag; +} + +#define PRELOAD_ENC_KEYS128(k) \ + __m128i K0 = _mm_loadu_si128(((__m128i *) k)+0); \ + __m128i K1 = _mm_loadu_si128(((__m128i *) k)+1); \ + __m128i K2 = _mm_loadu_si128(((__m128i *) k)+2); \ + __m128i K3 = _mm_loadu_si128(((__m128i *) k)+3); \ + __m128i K4 = _mm_loadu_si128(((__m128i *) k)+4); \ + __m128i K5 = _mm_loadu_si128(((__m128i *) k)+5); \ + __m128i K6 = _mm_loadu_si128(((__m128i *) k)+6); \ + __m128i K7 = _mm_loadu_si128(((__m128i *) k)+7); \ + __m128i K8 = _mm_loadu_si128(((__m128i *) k)+8); \ + __m128i K9 = _mm_loadu_si128(((__m128i *) k)+9); \ + __m128i K10 = _mm_loadu_si128(((__m128i *) k)+10); + +#define PRELOAD_ENC_KEYS256(k) \ + PRELOAD_ENC_KEYS128(k) \ + __m128i K11 = _mm_loadu_si128(((__m128i *) k)+11); \ + __m128i K12 = _mm_loadu_si128(((__m128i *) k)+12); \ + __m128i K13 = _mm_loadu_si128(((__m128i *) k)+13); \ + __m128i K14 = _mm_loadu_si128(((__m128i *) k)+14); + +#define DO_ENC_BLOCK128(m) \ + m = _mm_xor_si128(m, K0); \ + m = _mm_aesenc_si128(m, K1); \ + m = _mm_aesenc_si128(m, K2); \ + m = _mm_aesenc_si128(m, K3); \ + m = _mm_aesenc_si128(m, K4); \ + m = _mm_aesenc_si128(m, K5); \ + m = _mm_aesenc_si128(m, K6); \ + m = _mm_aesenc_si128(m, K7); \ + m = _mm_aesenc_si128(m, K8); \ + m = _mm_aesenc_si128(m, K9); \ + m = _mm_aesenclast_si128(m, K10); + +#define DO_ENC_BLOCK256(m) \ + m = _mm_xor_si128(m, K0); \ + m = _mm_aesenc_si128(m, K1); \ + m = _mm_aesenc_si128(m, K2); \ + m = _mm_aesenc_si128(m, K3); \ + m = _mm_aesenc_si128(m, K4); \ + m = _mm_aesenc_si128(m, K5); \ + m = _mm_aesenc_si128(m, K6); \ + m = _mm_aesenc_si128(m, K7); \ + m = _mm_aesenc_si128(m, K8); \ + m = _mm_aesenc_si128(m, K9); \ + m = _mm_aesenc_si128(m, K10); \ + m = _mm_aesenc_si128(m, K11); \ + m = _mm_aesenc_si128(m, K12); \ + m = _mm_aesenc_si128(m, K13); \ + m = _mm_aesenclast_si128(m, K14); + +/* load K0 at K9 from index 'at' */ +#define PRELOAD_DEC_KEYS_AT(k, at) \ + __m128i K0 = _mm_loadu_si128(((__m128i *) k)+at+0); \ + __m128i K1 = _mm_loadu_si128(((__m128i *) k)+at+1); \ + __m128i K2 = _mm_loadu_si128(((__m128i *) k)+at+2); \ + __m128i K3 = _mm_loadu_si128(((__m128i *) k)+at+3); \ + __m128i K4 = _mm_loadu_si128(((__m128i *) k)+at+4); \ + __m128i K5 = _mm_loadu_si128(((__m128i *) k)+at+5); \ + __m128i K6 = _mm_loadu_si128(((__m128i *) k)+at+6); \ + __m128i K7 = _mm_loadu_si128(((__m128i *) k)+at+7); \ + __m128i K8 = _mm_loadu_si128(((__m128i *) k)+at+8); \ + __m128i K9 = _mm_loadu_si128(((__m128i *) k)+at+9); \ + +#define PRELOAD_DEC_KEYS128(k) \ + PRELOAD_DEC_KEYS_AT(k, 10) \ + __m128i K10 = _mm_loadu_si128(((__m128i *) k)+0); + +#define PRELOAD_DEC_KEYS256(k) \ + PRELOAD_DEC_KEYS_AT(k, 14) \ + __m128i K10 = _mm_loadu_si128(((__m128i *) k)+14+10); \ + __m128i K11 = _mm_loadu_si128(((__m128i *) k)+14+11); \ + __m128i K12 = _mm_loadu_si128(((__m128i *) k)+14+12); \ + __m128i K13 = _mm_loadu_si128(((__m128i *) k)+14+13); \ + __m128i K14 = _mm_loadu_si128(((__m128i *) k)+0); + +#define DO_DEC_BLOCK128(m) \ + m = _mm_xor_si128(m, K0); \ + m = _mm_aesdec_si128(m, K1); \ + m = _mm_aesdec_si128(m, K2); \ + m = _mm_aesdec_si128(m, K3); \ + m = _mm_aesdec_si128(m, K4); \ + m = _mm_aesdec_si128(m, K5); \ + m = _mm_aesdec_si128(m, K6); \ + m = _mm_aesdec_si128(m, K7); \ + m = _mm_aesdec_si128(m, K8); \ + m = _mm_aesdec_si128(m, K9); \ + m = _mm_aesdeclast_si128(m, K10); + +#define DO_DEC_BLOCK256(m) \ + m = _mm_xor_si128(m, K0); \ + m = _mm_aesdec_si128(m, K1); \ + m = _mm_aesdec_si128(m, K2); \ + m = _mm_aesdec_si128(m, K3); \ + m = _mm_aesdec_si128(m, K4); \ + m = _mm_aesdec_si128(m, K5); \ + m = _mm_aesdec_si128(m, K6); \ + m = _mm_aesdec_si128(m, K7); \ + m = _mm_aesdec_si128(m, K8); \ + m = _mm_aesdec_si128(m, K9); \ + m = _mm_aesdec_si128(m, K10); \ + m = _mm_aesdec_si128(m, K11); \ + m = _mm_aesdec_si128(m, K12); \ + m = _mm_aesdec_si128(m, K13); \ + m = _mm_aesdeclast_si128(m, K14); + +#define SIZE 128 +#define SIZED(m) m##128 +#define PRELOAD_ENC PRELOAD_ENC_KEYS128 +#define DO_ENC_BLOCK DO_ENC_BLOCK128 +#define PRELOAD_DEC PRELOAD_DEC_KEYS128 +#define DO_DEC_BLOCK DO_DEC_BLOCK128 +#include "aes_x86ni_impl.c" + +#undef SIZE +#undef SIZED +#undef PRELOAD_ENC +#undef PRELOAD_DEC +#undef DO_ENC_BLOCK +#undef DO_DEC_BLOCK + +#define SIZED(m) m##256 +#define SIZE 256 +#define PRELOAD_ENC PRELOAD_ENC_KEYS256 +#define DO_ENC_BLOCK DO_ENC_BLOCK256 +#define PRELOAD_DEC PRELOAD_DEC_KEYS256 +#define DO_DEC_BLOCK DO_DEC_BLOCK256 +#include "aes_x86ni_impl.c" + +#undef SIZE +#undef SIZED +#undef PRELOAD_ENC +#undef PRELOAD_DEC +#undef DO_ENC_BLOCK +#undef DO_DEC_BLOCK + +#endif + +#endif diff --git a/cbits/aes/x86ni.h b/cbits/aes/x86ni.h new file mode 100644 index 0000000..1232c97 --- /dev/null +++ b/cbits/aes/x86ni.h @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2012 Vincent Hanquez + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the author nor the names of his contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef AES_X86NI_H +#define AES_X86NI_H + +#ifdef WITH_AESNI + +#if defined(__i386__) || defined(__x86_64__) + +#include +#include +#include +#include + +#ifdef IMPL_DEBUG +static void block128_sse_print(__m128i m) +{ + block128 b; + _mm_storeu_si128((__m128i *) &b.b, m); + block128_print(&b); +} +#endif + +void aes_ni_init(aes_key *key, uint8_t *origkey, uint8_t size); +void aes_ni_encrypt_block128(aes_block *out, aes_key *key, aes_block *in); +void aes_ni_encrypt_block256(aes_block *out, aes_key *key, aes_block *in); +void aes_ni_decrypt_block128(aes_block *out, aes_key *key, aes_block *in); +void aes_ni_decrypt_block256(aes_block *out, aes_key *key, aes_block *in); +void aes_ni_encrypt_ecb128(aes_block *out, aes_key *key, aes_block *in, uint32_t blocks); +void aes_ni_encrypt_ecb256(aes_block *out, aes_key *key, aes_block *in, uint32_t blocks); +void aes_ni_decrypt_ecb128(aes_block *out, aes_key *key, aes_block *in, uint32_t blocks); +void aes_ni_decrypt_ecb256(aes_block *out, aes_key *key, aes_block *in, uint32_t blocks); +void aes_ni_encrypt_cbc128(aes_block *out, aes_key *key, aes_block *_iv, aes_block *in, uint32_t blocks); +void aes_ni_encrypt_cbc256(aes_block *out, aes_key *key, aes_block *_iv, aes_block *in, uint32_t blocks); +void aes_ni_decrypt_cbc128(aes_block *out, aes_key *key, aes_block *_iv, aes_block *in, uint32_t blocks); +void aes_ni_decrypt_cbc256(aes_block *out, aes_key *key, aes_block *_iv, aes_block *in, uint32_t blocks); +void aes_ni_encrypt_ctr128(uint8_t *out, aes_key *key, aes_block *_iv, uint8_t *in, uint32_t length); +void aes_ni_encrypt_ctr256(uint8_t *out, aes_key *key, aes_block *_iv, uint8_t *in, uint32_t length); +void aes_ni_encrypt_xts128(aes_block *out, aes_key *key1, aes_key *key2, + aes_block *_tweak, uint32_t spoint, aes_block *in, uint32_t blocks); +void aes_ni_encrypt_xts256(aes_block *out, aes_key *key1, aes_key *key2, + aes_block *_tweak, uint32_t spoint, aes_block *in, uint32_t blocks); + +void aes_ni_gcm_encrypt128(uint8_t *out, aes_gcm *gcm, aes_key *key, uint8_t *in, uint32_t length); +void aes_ni_gcm_encrypt256(uint8_t *out, aes_gcm *gcm, aes_key *key, uint8_t *in, uint32_t length); + +void gf_mul_x86ni(block128 *res, block128 *a_, block128 *b_); + +#endif + +#endif + +#endif diff --git a/cbits/cryptonite_aes.c b/cbits/cryptonite_aes.c new file mode 100644 index 0000000..595585a --- /dev/null +++ b/cbits/cryptonite_aes.c @@ -0,0 +1,750 @@ +/* + * Copyright (c) 2012 Vincent Hanquez + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the author nor the names of his contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +#include + +#include +#include +#include + +#include +#include +#include + +void aes_generic_encrypt_ecb(aes_block *output, aes_key *key, aes_block *input, uint32_t nb_blocks); +void aes_generic_decrypt_ecb(aes_block *output, aes_key *key, aes_block *input, uint32_t nb_blocks); +void aes_generic_encrypt_cbc(aes_block *output, aes_key *key, aes_block *iv, aes_block *input, uint32_t nb_blocks); +void aes_generic_decrypt_cbc(aes_block *output, aes_key *key, aes_block *iv, aes_block *input, uint32_t nb_blocks); +void aes_generic_encrypt_ctr(uint8_t *output, aes_key *key, aes_block *iv, uint8_t *input, uint32_t length); +void aes_generic_encrypt_xts(aes_block *output, aes_key *k1, aes_key *k2, aes_block *dataunit, + uint32_t spoint, aes_block *input, uint32_t nb_blocks); +void aes_generic_decrypt_xts(aes_block *output, aes_key *k1, aes_key *k2, aes_block *dataunit, + uint32_t spoint, aes_block *input, uint32_t nb_blocks); +void aes_generic_gcm_encrypt(uint8_t *output, aes_gcm *gcm, aes_key *key, uint8_t *input, uint32_t length); +void aes_generic_gcm_decrypt(uint8_t *output, aes_gcm *gcm, aes_key *key, uint8_t *input, uint32_t length); +void aes_generic_ocb_encrypt(uint8_t *output, aes_ocb *ocb, aes_key *key, uint8_t *input, uint32_t length); +void aes_generic_ocb_decrypt(uint8_t *output, aes_ocb *ocb, aes_key *key, uint8_t *input, uint32_t length); + +enum { + /* init */ + INIT_128, INIT_192, INIT_256, + /* single block */ + ENCRYPT_BLOCK_128, ENCRYPT_BLOCK_192, ENCRYPT_BLOCK_256, + DECRYPT_BLOCK_128, DECRYPT_BLOCK_192, DECRYPT_BLOCK_256, + /* ecb */ + ENCRYPT_ECB_128, ENCRYPT_ECB_192, ENCRYPT_ECB_256, + DECRYPT_ECB_128, DECRYPT_ECB_192, DECRYPT_ECB_256, + /* cbc */ + ENCRYPT_CBC_128, ENCRYPT_CBC_192, ENCRYPT_CBC_256, + DECRYPT_CBC_128, DECRYPT_CBC_192, DECRYPT_CBC_256, + /* ctr */ + ENCRYPT_CTR_128, ENCRYPT_CTR_192, ENCRYPT_CTR_256, + /* xts */ + ENCRYPT_XTS_128, ENCRYPT_XTS_192, ENCRYPT_XTS_256, + DECRYPT_XTS_128, DECRYPT_XTS_192, DECRYPT_XTS_256, + /* gcm */ + ENCRYPT_GCM_128, ENCRYPT_GCM_192, ENCRYPT_GCM_256, + DECRYPT_GCM_128, DECRYPT_GCM_192, DECRYPT_GCM_256, + /* ocb */ + ENCRYPT_OCB_128, ENCRYPT_OCB_192, ENCRYPT_OCB_256, + DECRYPT_OCB_128, DECRYPT_OCB_192, DECRYPT_OCB_256, +}; + +void *branch_table[] = { + /* INIT */ + [INIT_128] = aes_generic_init, + [INIT_192] = aes_generic_init, + [INIT_256] = aes_generic_init, + /* BLOCK */ + [ENCRYPT_BLOCK_128] = aes_generic_encrypt_block, + [ENCRYPT_BLOCK_192] = aes_generic_encrypt_block, + [ENCRYPT_BLOCK_256] = aes_generic_encrypt_block, + [DECRYPT_BLOCK_128] = aes_generic_decrypt_block, + [DECRYPT_BLOCK_192] = aes_generic_decrypt_block, + [DECRYPT_BLOCK_256] = aes_generic_decrypt_block, + /* ECB */ + [ENCRYPT_ECB_128] = aes_generic_encrypt_ecb, + [ENCRYPT_ECB_192] = aes_generic_encrypt_ecb, + [ENCRYPT_ECB_256] = aes_generic_encrypt_ecb, + [DECRYPT_ECB_128] = aes_generic_decrypt_ecb, + [DECRYPT_ECB_192] = aes_generic_decrypt_ecb, + [DECRYPT_ECB_256] = aes_generic_decrypt_ecb, + /* CBC */ + [ENCRYPT_CBC_128] = aes_generic_encrypt_cbc, + [ENCRYPT_CBC_192] = aes_generic_encrypt_cbc, + [ENCRYPT_CBC_256] = aes_generic_encrypt_cbc, + [DECRYPT_CBC_128] = aes_generic_decrypt_cbc, + [DECRYPT_CBC_192] = aes_generic_decrypt_cbc, + [DECRYPT_CBC_256] = aes_generic_decrypt_cbc, + /* CTR */ + [ENCRYPT_CTR_128] = aes_generic_encrypt_ctr, + [ENCRYPT_CTR_192] = aes_generic_encrypt_ctr, + [ENCRYPT_CTR_256] = aes_generic_encrypt_ctr, + /* XTS */ + [ENCRYPT_XTS_128] = aes_generic_encrypt_xts, + [ENCRYPT_XTS_192] = aes_generic_encrypt_xts, + [ENCRYPT_XTS_256] = aes_generic_encrypt_xts, + [DECRYPT_XTS_128] = aes_generic_decrypt_xts, + [DECRYPT_XTS_192] = aes_generic_decrypt_xts, + [DECRYPT_XTS_256] = aes_generic_decrypt_xts, + /* GCM */ + [ENCRYPT_GCM_128] = aes_generic_gcm_encrypt, + [ENCRYPT_GCM_192] = aes_generic_gcm_encrypt, + [ENCRYPT_GCM_256] = aes_generic_gcm_encrypt, + [DECRYPT_GCM_128] = aes_generic_gcm_decrypt, + [DECRYPT_GCM_192] = aes_generic_gcm_decrypt, + [DECRYPT_GCM_256] = aes_generic_gcm_decrypt, + /* OCB */ + [ENCRYPT_OCB_128] = aes_generic_ocb_encrypt, + [ENCRYPT_OCB_192] = aes_generic_ocb_encrypt, + [ENCRYPT_OCB_256] = aes_generic_ocb_encrypt, + [DECRYPT_OCB_128] = aes_generic_ocb_decrypt, + [DECRYPT_OCB_192] = aes_generic_ocb_decrypt, + [DECRYPT_OCB_256] = aes_generic_ocb_decrypt, +}; + +typedef void (*init_f)(aes_key *, uint8_t *, uint8_t); +typedef void (*ecb_f)(aes_block *output, aes_key *key, aes_block *input, uint32_t nb_blocks); +typedef void (*cbc_f)(aes_block *output, aes_key *key, aes_block *iv, aes_block *input, uint32_t nb_blocks); +typedef void (*ctr_f)(uint8_t *output, aes_key *key, aes_block *iv, uint8_t *input, uint32_t length); +typedef void (*xts_f)(aes_block *output, aes_key *k1, aes_key *k2, aes_block *dataunit, uint32_t spoint, aes_block *input, uint32_t nb_blocks); +typedef void (*gcm_crypt_f)(uint8_t *output, aes_gcm *gcm, aes_key *key, uint8_t *input, uint32_t length); +typedef void (*ocb_crypt_f)(uint8_t *output, aes_ocb *ocb, aes_key *key, uint8_t *input, uint32_t length); +typedef void (*block_f)(aes_block *output, aes_key *key, aes_block *input); + +#ifdef WITH_AESNI +#define GET_INIT(strength) \ + ((init_f) (branch_table[INIT_128 + strength])) +#define GET_ECB_ENCRYPT(strength) \ + ((ecb_f) (branch_table[ENCRYPT_ECB_128 + strength])) +#define GET_ECB_DECRYPT(strength) \ + ((ecb_f) (branch_table[DECRYPT_ECB_128 + strength])) +#define GET_CBC_ENCRYPT(strength) \ + ((cbc_f) (branch_table[ENCRYPT_CBC_128 + strength])) +#define GET_CBC_DECRYPT(strength) \ + ((cbc_f) (branch_table[DECRYPT_CBC_128 + strength])) +#define GET_CTR_ENCRYPT(strength) \ + ((ctr_f) (branch_table[ENCRYPT_CTR_128 + strength])) +#define GET_XTS_ENCRYPT(strength) \ + ((xts_f) (branch_table[ENCRYPT_XTS_128 + strength])) +#define GET_XTS_DECRYPT(strength) \ + ((xts_f) (branch_table[DECRYPT_XTS_128 + strength])) +#define GET_GCM_ENCRYPT(strength) \ + ((gcm_crypt_f) (branch_table[ENCRYPT_GCM_128 + strength])) +#define GET_GCM_DECRYPT(strength) \ + ((gcm_crypt_f) (branch_table[DECRYPT_GCM_128 + strength])) +#define GET_OCB_ENCRYPT(strength) \ + ((ocb_crypt_f) (branch_table[ENCRYPT_OCB_128 + strength])) +#define GET_OCB_DECRYPT(strength) \ + ((ocb_crypt_f) (branch_table[DECRYPT_OCB_128 + strength])) +#define aes_encrypt_block(o,k,i) \ + (((block_f) (branch_table[ENCRYPT_BLOCK_128 + k->strength]))(o,k,i)) +#define aes_decrypt_block(o,k,i) \ + (((block_f) (branch_table[DECRYPT_BLOCK_128 + k->strength]))(o,k,i)) +#else +#define GET_INIT(strenght) aes_generic_init +#define GET_ECB_ENCRYPT(strength) aes_generic_encrypt_ecb +#define GET_ECB_DECRYPT(strength) aes_generic_decrypt_ecb +#define GET_CBC_ENCRYPT(strength) aes_generic_encrypt_cbc +#define GET_CBC_DECRYPT(strength) aes_generic_decrypt_cbc +#define GET_CTR_ENCRYPT(strength) aes_generic_encrypt_ctr +#define GET_XTS_ENCRYPT(strength) aes_generic_encrypt_xts +#define GET_XTS_DECRYPT(strength) aes_generic_decrypt_xts +#define GET_GCM_ENCRYPT(strength) aes_generic_gcm_encrypt +#define GET_GCM_DECRYPT(strength) aes_generic_gcm_decrypt +#define GET_OCB_ENCRYPT(strength) aes_generic_ocb_encrypt +#define GET_OCB_DECRYPT(strength) aes_generic_ocb_decrypt +#define aes_encrypt_block(o,k,i) aes_generic_encrypt_block(o,k,i) +#define aes_decrypt_block(o,k,i) aes_generic_decrypt_block(o,k,i) +#endif + +#if defined(ARCH_X86) && defined(WITH_AESNI) +static void initialize_table_ni(int aesni, int pclmul) +{ + if (!aesni) + return; + branch_table[INIT_128] = aes_ni_init; + branch_table[INIT_256] = aes_ni_init; + + branch_table[ENCRYPT_BLOCK_128] = aes_ni_encrypt_block128; + branch_table[DECRYPT_BLOCK_128] = aes_ni_decrypt_block128; + branch_table[ENCRYPT_BLOCK_256] = aes_ni_encrypt_block256; + branch_table[DECRYPT_BLOCK_256] = aes_ni_decrypt_block256; + /* ECB */ + branch_table[ENCRYPT_ECB_128] = aes_ni_encrypt_ecb128; + branch_table[DECRYPT_ECB_128] = aes_ni_decrypt_ecb128; + branch_table[ENCRYPT_ECB_256] = aes_ni_encrypt_ecb256; + branch_table[DECRYPT_ECB_256] = aes_ni_decrypt_ecb256; + /* CBC */ + branch_table[ENCRYPT_CBC_128] = aes_ni_encrypt_cbc128; + branch_table[DECRYPT_CBC_128] = aes_ni_decrypt_cbc128; + branch_table[ENCRYPT_CBC_256] = aes_ni_encrypt_cbc256; + branch_table[DECRYPT_CBC_256] = aes_ni_decrypt_cbc256; + /* CTR */ + branch_table[ENCRYPT_CTR_128] = aes_ni_encrypt_ctr128; + branch_table[ENCRYPT_CTR_256] = aes_ni_encrypt_ctr256; + /* XTS */ + branch_table[ENCRYPT_XTS_128] = aes_ni_encrypt_xts128; + branch_table[ENCRYPT_XTS_256] = aes_ni_encrypt_xts256; + /* GCM */ + branch_table[ENCRYPT_GCM_128] = aes_ni_gcm_encrypt128; + branch_table[ENCRYPT_GCM_256] = aes_ni_gcm_encrypt256; + /* OCB */ + /* + branch_table[ENCRYPT_OCB_128] = aes_ni_ocb_encrypt128; + branch_table[ENCRYPT_OCB_256] = aes_ni_ocb_encrypt256; + */ +} +#endif + +void aes_initkey(aes_key *key, uint8_t *origkey, uint8_t size) +{ + switch (size) { + case 16: key->nbr = 10; key->strength = 0; break; + case 24: key->nbr = 12; key->strength = 1; break; + case 32: key->nbr = 14; key->strength = 2; break; + } +#if defined(ARCH_X86) && defined(WITH_AESNI) + initialize_hw(initialize_table_ni); +#endif + init_f _init = GET_INIT(key->strength); + _init(key, origkey, size); +} + +void aes_encrypt_ecb(aes_block *output, aes_key *key, aes_block *input, uint32_t nb_blocks) +{ + ecb_f e = GET_ECB_ENCRYPT(key->strength); + e(output, key, input, nb_blocks); +} + +void aes_decrypt_ecb(aes_block *output, aes_key *key, aes_block *input, uint32_t nb_blocks) +{ + ecb_f d = GET_ECB_DECRYPT(key->strength); + d(output, key, input, nb_blocks); +} + +void aes_encrypt_cbc(aes_block *output, aes_key *key, aes_block *iv, aes_block *input, uint32_t nb_blocks) +{ + cbc_f e = GET_CBC_ENCRYPT(key->strength); + e(output, key, iv, input, nb_blocks); +} + +void aes_decrypt_cbc(aes_block *output, aes_key *key, aes_block *iv, aes_block *input, uint32_t nb_blocks) +{ + cbc_f d = GET_CBC_DECRYPT(key->strength); + d(output, key, iv, input, nb_blocks); +} + +void aes_gen_ctr(aes_block *output, aes_key *key, const aes_block *iv, uint32_t nb_blocks) +{ + aes_block block; + + /* preload IV in block */ + block128_copy(&block, iv); + + for ( ; nb_blocks-- > 0; output++, block128_inc_be(&block)) { + aes_encrypt_block(output, key, &block); + } +} + +void aes_gen_ctr_cont(aes_block *output, aes_key *key, aes_block *iv, uint32_t nb_blocks) +{ + aes_block block; + + /* preload IV in block */ + block128_copy(&block, iv); + + for ( ; nb_blocks-- > 0; output++, block128_inc_be(&block)) { + aes_encrypt_block(output, key, &block); + } + + /* copy back the IV */ + block128_copy(iv, &block); +} + +void aes_encrypt_ctr(uint8_t *output, aes_key *key, aes_block *iv, uint8_t *input, uint32_t len) +{ + ctr_f e = GET_CTR_ENCRYPT(key->strength); + e(output, key, iv, input, len); +} + +void aes_encrypt_xts(aes_block *output, aes_key *k1, aes_key *k2, aes_block *dataunit, + uint32_t spoint, aes_block *input, uint32_t nb_blocks) +{ + xts_f e = GET_XTS_ENCRYPT(k1->strength); + e(output, k1, k2, dataunit, spoint, input, nb_blocks); +} + +void aes_decrypt_xts(aes_block *output, aes_key *k1, aes_key *k2, aes_block *dataunit, + uint32_t spoint, aes_block *input, uint32_t nb_blocks) +{ + aes_generic_decrypt_xts(output, k1, k2, dataunit, spoint, input, nb_blocks); +} + +void aes_gcm_encrypt(uint8_t *output, aes_gcm *gcm, aes_key *key, uint8_t *input, uint32_t length) +{ + gcm_crypt_f e = GET_GCM_ENCRYPT(key->strength); + e(output, gcm, key, input, length); +} + +void aes_gcm_decrypt(uint8_t *output, aes_gcm *gcm, aes_key *key, uint8_t *input, uint32_t length) +{ + gcm_crypt_f d = GET_GCM_DECRYPT(key->strength); + d(output, gcm, key, input, length); +} + +void aes_ocb_encrypt(uint8_t *output, aes_ocb *ocb, aes_key *key, uint8_t *input, uint32_t length) +{ + ocb_crypt_f e = GET_OCB_ENCRYPT(key->strength); + e(output, ocb, key, input, length); +} + +void aes_ocb_decrypt(uint8_t *output, aes_ocb *ocb, aes_key *key, uint8_t *input, uint32_t length) +{ + ocb_crypt_f d = GET_OCB_DECRYPT(key->strength); + d(output, ocb, key, input, length); +} + +static void gcm_ghash_add(aes_gcm *gcm, block128 *b) +{ + block128_xor(&gcm->tag, b); + gf_mul(&gcm->tag, &gcm->h); +} + +void aes_gcm_init(aes_gcm *gcm, aes_key *key, uint8_t *iv, uint32_t len) +{ + gcm->length_aad = 0; + gcm->length_input = 0; + + block128_zero(&gcm->h); + block128_zero(&gcm->tag); + block128_zero(&gcm->iv); + + /* prepare H : encrypt_K(0^128) */ + aes_encrypt_block(&gcm->h, key, &gcm->h); + + if (len == 12) { + block128_copy_bytes(&gcm->iv, iv, 12); + gcm->iv.b[15] = 0x01; + } else { + uint32_t origlen = len << 3; + int i; + for (; len >= 16; len -= 16, iv += 16) { + block128_xor(&gcm->iv, (block128 *) iv); + gf_mul(&gcm->iv, &gcm->h); + } + if (len > 0) { + block128_xor_bytes(&gcm->iv, iv, len); + gf_mul(&gcm->iv, &gcm->h); + } + for (i = 15; origlen; --i, origlen >>= 8) + gcm->iv.b[i] ^= (uint8_t) origlen; + gf_mul(&gcm->iv, &gcm->h); + } + + block128_copy(&gcm->civ, &gcm->iv); +} + +void aes_gcm_aad(aes_gcm *gcm, uint8_t *input, uint32_t length) +{ + gcm->length_aad += length; + for (; length >= 16; input += 16, length -= 16) { + gcm_ghash_add(gcm, (block128 *) input); + } + if (length > 0) { + aes_block tmp; + block128_zero(&tmp); + block128_copy_bytes(&tmp, input, length); + gcm_ghash_add(gcm, &tmp); + } + +} + +void aes_gcm_finish(uint8_t *tag, aes_gcm *gcm, aes_key *key) +{ + aes_block lblock; + int i; + + /* tag = (tag-1 xor (lenbits(a) | lenbits(c)) ) . H */ + lblock.q[0] = cpu_to_be64(gcm->length_aad << 3); + lblock.q[1] = cpu_to_be64(gcm->length_input << 3); + gcm_ghash_add(gcm, &lblock); + + aes_encrypt_block(&lblock, key, &gcm->iv); + block128_xor(&gcm->tag, &lblock); + + for (i = 0; i < 16; i++) { + tag[i] = gcm->tag.b[i]; + } +} + +static inline void ocb_block_double(block128 *d, block128 *s) +{ + unsigned int i; + uint8_t tmp = s->b[0]; + + for (i=0; i<15; i++) + d->b[i] = (s->b[i] << 1) | (s->b[i+1] >> 7); + d->b[15] = (s->b[15] << 1) ^ ((tmp >> 7) * 0x87); +} + +static void ocb_get_L_i(block128 *l, block128 *lis, unsigned int i) +{ +#define L_CACHED 4 + i = bitfn_ntz(i); + if (i < L_CACHED) { + block128_copy(l, &lis[i]); + } else { + i -= (L_CACHED - 1); + block128_copy(l, &lis[L_CACHED - 1]); + while (i--) { + ocb_block_double(l, l); + } + } +#undef L_CACHED +} + +void aes_ocb_init(aes_ocb *ocb, aes_key *key, uint8_t *iv, uint32_t len) +{ + block128 tmp, nonce, ktop; + unsigned char stretch[24]; + unsigned bottom, byteshift, bitshift, i; + + /* we don't accept more than 15 bytes, any bytes higher will be ignored. */ + if (len > 15) { + len = 15; + } + + /* create L*, and L$,L0,L1,L2,L3 */ + block128_zero(&tmp); + aes_encrypt_block(&ocb->lstar, key, &tmp); + + ocb_block_double(&ocb->ldollar, &ocb->lstar); + ocb_block_double(&ocb->li[0], &ocb->ldollar); + ocb_block_double(&ocb->li[1], &ocb->li[0]); + ocb_block_double(&ocb->li[2], &ocb->li[1]); + ocb_block_double(&ocb->li[3], &ocb->li[2]); + + /* create strech from the nonce */ + block128_zero(&nonce); + memcpy(nonce.b + 4, iv, 12); + nonce.b[0] = (unsigned char)(((16 * 8) % 128) << 1); + nonce.b[16-12-1] |= 0x01; + bottom = nonce.b[15] & 0x3F; + nonce.b[15] &= 0xC0; + aes_encrypt_block(&ktop, key, &nonce); + memcpy(stretch, ktop.b, 16); + + memcpy(tmp.b, ktop.b + 1, 8); + block128_xor(&tmp, &ktop); + memcpy(stretch + 16, tmp.b, 8); + + /* initialize the encryption offset from stretch */ + byteshift = bottom / 8; + bitshift = bottom % 8; + if (bitshift != 0) + for (i = 0; i < 16; i++) + ocb->offset_enc.b[i] = (stretch[i+byteshift] << bitshift) + | (stretch[i+byteshift+1] >> (8-bitshift)); + else + for (i = 0; i < 16; i++) + ocb->offset_enc.b[i] = stretch[i+byteshift]; + /* initialize checksum for aad and encryption, and the aad offset */ + block128_zero(&ocb->sum_aad); + block128_zero(&ocb->sum_enc); + block128_zero(&ocb->offset_aad); +} + +void aes_ocb_aad(aes_ocb *ocb, aes_key *key, uint8_t *input, uint32_t length) +{ + block128 tmp; + unsigned int i; + + for (i=1; i<= length/16; i++, input=input+16) { + ocb_get_L_i(&tmp, ocb->li, i); + block128_xor(&ocb->offset_aad, &tmp); + + block128_vxor(&tmp, &ocb->offset_aad, (block128 *) input); + aes_encrypt_block(&tmp, key, &tmp); + block128_xor(&ocb->sum_aad, &tmp); + } + + length = length % 16; /* Bytes in final block */ + if (length > 0) { + block128_xor(&ocb->offset_aad, &ocb->lstar); + block128_zero(&tmp); + block128_copy_bytes(&tmp, input, length); + tmp.b[length] = 0x80; + block128_xor(&tmp, &ocb->offset_aad); + aes_encrypt_block(&tmp, key, &tmp); + block128_xor(&ocb->sum_aad, &tmp); + } +} + +void aes_ocb_finish(uint8_t *tag, aes_ocb *ocb, aes_key *key) +{ + block128 tmp; + + block128_vxor(&tmp, &ocb->sum_enc, &ocb->offset_enc); + block128_xor(&tmp, &ocb->ldollar); + aes_encrypt_block((block128 *) tag, key, &tmp); + block128_xor((block128 *) tag, &ocb->sum_aad); +} + +void aes_generic_encrypt_ecb(aes_block *output, aes_key *key, aes_block *input, uint32_t nb_blocks) +{ + for ( ; nb_blocks-- > 0; input++, output++) { + aes_generic_encrypt_block(output, key, input); + } +} + +void aes_generic_decrypt_ecb(aes_block *output, aes_key *key, aes_block *input, uint32_t nb_blocks) +{ + for ( ; nb_blocks-- > 0; input++, output++) { + aes_generic_decrypt_block(output, key, input); + } +} + +void aes_generic_encrypt_cbc(aes_block *output, aes_key *key, aes_block *iv, aes_block *input, uint32_t nb_blocks) +{ + aes_block block; + + /* preload IV in block */ + block128_copy(&block, iv); + for ( ; nb_blocks-- > 0; input++, output++) { + block128_xor(&block, (block128 *) input); + aes_generic_encrypt_block(&block, key, &block); + block128_copy((block128 *) output, &block); + } +} + +void aes_generic_decrypt_cbc(aes_block *output, aes_key *key, aes_block *ivini, aes_block *input, uint32_t nb_blocks) +{ + aes_block block, blocko; + aes_block iv; + + /* preload IV in block */ + block128_copy(&iv, ivini); + for ( ; nb_blocks-- > 0; input++, output++) { + block128_copy(&block, (block128 *) input); + aes_generic_decrypt_block(&blocko, key, &block); + block128_vxor((block128 *) output, &blocko, &iv); + block128_copy(&iv, &block); + } +} + +void aes_generic_encrypt_ctr(uint8_t *output, aes_key *key, aes_block *iv, uint8_t *input, uint32_t len) +{ + aes_block block, o; + uint32_t nb_blocks = len / 16; + int i; + + /* preload IV in block */ + block128_copy(&block, iv); + + for ( ; nb_blocks-- > 0; block128_inc_be(&block), output += 16, input += 16) { + aes_encrypt_block(&o, key, &block); + block128_vxor((block128 *) output, &o, (block128 *) input); + } + + if ((len % 16) != 0) { + aes_encrypt_block(&o, key, &block); + for (i = 0; i < (len % 16); i++) { + *output = ((uint8_t *) &o)[i] ^ *input; + output++; + input++; + } + } +} + +void aes_generic_encrypt_xts(aes_block *output, aes_key *k1, aes_key *k2, aes_block *dataunit, + uint32_t spoint, aes_block *input, uint32_t nb_blocks) +{ + aes_block block, tweak; + + /* load IV and encrypt it using k2 as the tweak */ + block128_copy(&tweak, dataunit); + aes_encrypt_block(&tweak, k2, &tweak); + + /* TO OPTIMISE: this is really inefficient way to do that */ + while (spoint-- > 0) + gf_mulx(&tweak); + + for ( ; nb_blocks-- > 0; input++, output++, gf_mulx(&tweak)) { + block128_vxor(&block, input, &tweak); + aes_encrypt_block(&block, k1, &block); + block128_vxor(output, &block, &tweak); + } +} + +void aes_generic_decrypt_xts(aes_block *output, aes_key *k1, aes_key *k2, aes_block *dataunit, + uint32_t spoint, aes_block *input, uint32_t nb_blocks) +{ + aes_block block, tweak; + + /* load IV and encrypt it using k2 as the tweak */ + block128_copy(&tweak, dataunit); + aes_encrypt_block(&tweak, k2, &tweak); + + /* TO OPTIMISE: this is really inefficient way to do that */ + while (spoint-- > 0) + gf_mulx(&tweak); + + for ( ; nb_blocks-- > 0; input++, output++, gf_mulx(&tweak)) { + block128_vxor(&block, input, &tweak); + aes_decrypt_block(&block, k1, &block); + block128_vxor(output, &block, &tweak); + } +} + +void aes_generic_gcm_encrypt(uint8_t *output, aes_gcm *gcm, aes_key *key, uint8_t *input, uint32_t length) +{ + aes_block out; + + gcm->length_input += length; + for (; length >= 16; input += 16, output += 16, length -= 16) { + block128_inc_be(&gcm->civ); + + aes_encrypt_block(&out, key, &gcm->civ); + block128_xor(&out, (block128 *) input); + gcm_ghash_add(gcm, &out); + block128_copy((block128 *) output, &out); + } + if (length > 0) { + aes_block tmp; + int i; + + block128_inc_be(&gcm->civ); + /* create e(civ) in out */ + aes_encrypt_block(&out, key, &gcm->civ); + /* initialize a tmp as input and xor it to e(civ) */ + block128_zero(&tmp); + block128_copy_bytes(&tmp, input, length); + block128_xor_bytes(&tmp, out.b, length); + + gcm_ghash_add(gcm, &tmp); + + for (i = 0; i < length; i++) { + output[i] = tmp.b[i]; + } + } +} + +void aes_generic_gcm_decrypt(uint8_t *output, aes_gcm *gcm, aes_key *key, uint8_t *input, uint32_t length) +{ + aes_block out; + + gcm->length_input += length; + for (; length >= 16; input += 16, output += 16, length -= 16) { + block128_inc_be(&gcm->civ); + + aes_encrypt_block(&out, key, &gcm->civ); + gcm_ghash_add(gcm, (block128 *) input); + block128_xor(&out, (block128 *) input); + block128_copy((block128 *) output, &out); + } + if (length > 0) { + aes_block tmp; + int i; + + block128_inc_be(&gcm->civ); + + block128_zero(&tmp); + block128_copy_bytes(&tmp, input, length); + gcm_ghash_add(gcm, &tmp); + + aes_encrypt_block(&out, key, &gcm->civ); + block128_xor_bytes(&tmp, out.b, length); + + for (i = 0; i < length; i++) { + output[i] = tmp.b[i]; + } + } +} + +static void ocb_generic_crypt(uint8_t *output, aes_ocb *ocb, aes_key *key, + uint8_t *input, uint32_t length, int encrypt) +{ + block128 tmp, pad; + unsigned int i; + + for (i = 1; i <= length/16; i++, input += 16, output += 16) { + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + ocb_get_L_i(&tmp, ocb->li, i); + block128_xor(&ocb->offset_enc, &tmp); + + block128_vxor(&tmp, &ocb->offset_enc, (block128 *) input); + if (encrypt) { + aes_encrypt_block(&tmp, key, &tmp); + block128_vxor((block128 *) output, &ocb->offset_enc, &tmp); + block128_xor(&ocb->sum_enc, (block128 *) input); + } else { + aes_decrypt_block(&tmp, key, &tmp); + block128_vxor((block128 *) output, &ocb->offset_enc, &tmp); + block128_xor(&ocb->sum_enc, (block128 *) output); + } + } + + /* process the last partial block if any */ + length = length % 16; + if (length > 0) { + block128_xor(&ocb->offset_enc, &ocb->lstar); + aes_encrypt_block(&pad, key, &ocb->offset_enc); + + if (encrypt) { + block128_zero(&tmp); + block128_copy_bytes(&tmp, input, length); + tmp.b[length] = 0x80; + block128_xor(&ocb->sum_enc, &tmp); + block128_xor(&pad, &tmp); + memcpy(output, pad.b, length); + output += length; + } else { + block128_copy(&tmp, &pad); + block128_copy_bytes(&tmp, input, length); + block128_xor(&tmp, &pad); + tmp.b[length] = 0x80; + memcpy(output, tmp.b, length); + block128_xor(&ocb->sum_enc, &tmp); + input += length; + } + } +} + +void aes_generic_ocb_encrypt(uint8_t *output, aes_ocb *ocb, aes_key *key, uint8_t *input, uint32_t length) +{ + ocb_generic_crypt(output, ocb, key, input, length, 1); +} + +void aes_generic_ocb_decrypt(uint8_t *output, aes_ocb *ocb, aes_key *key, uint8_t *input, uint32_t length) +{ + ocb_generic_crypt(output, ocb, key, input, length, 0); +} diff --git a/cbits/cryptonite_aes.h b/cbits/cryptonite_aes.h new file mode 100644 index 0000000..733e0a3 --- /dev/null +++ b/cbits/cryptonite_aes.h @@ -0,0 +1,100 @@ +/* + * Copyright (C) 2008 Vincent Hanquez + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the author nor the names of his contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * AES implementation + */ +#ifndef CRYPTONITE_AES_H +#define CRYPTONITE_AES_H + +#include +#include "aes/block128.h" + +typedef block128 aes_block; + +/* size = 456 */ +typedef struct { + uint8_t nbr; /* number of rounds: 10 (128), 12 (192), 14 (256) */ + uint8_t strength; /* 128 = 0, 192 = 1, 256 = 2 */ + uint8_t _padding[6]; + uint8_t data[16*14*2]; +} aes_key; + +/* size = 4*16+2*8= 80 */ +typedef struct { + aes_block tag; + aes_block h; + aes_block iv; + aes_block civ; + uint64_t length_aad; + uint64_t length_input; +} aes_gcm; + +typedef struct { + block128 offset_aad; + block128 offset_enc; + block128 sum_aad; + block128 sum_enc; + block128 lstar; + block128 ldollar; + block128 li[4]; +} aes_ocb; + +/* in bytes: either 16,24,32 */ +void cryptonite_aes_initkey(aes_key *ctx, uint8_t *key, uint8_t size); + +void cryptonite_aes_encrypt(aes_block *output, aes_key *key, aes_block *input); +void cryptonite_aes_decrypt(aes_block *output, aes_key *key, aes_block *input); + +void cryptonite_aes_encrypt_ecb(aes_block *output, aes_key *key, aes_block *input, uint32_t nb_blocks); +void cryptonite_aes_decrypt_ecb(aes_block *output, aes_key *key, aes_block *input, uint32_t nb_blocks); + +void cryptonite_aes_encrypt_cbc(aes_block *output, aes_key *key, aes_block *iv, aes_block *input, uint32_t nb_blocks); +void cryptonite_aes_decrypt_cbc(aes_block *output, aes_key *key, aes_block *iv, aes_block *input, uint32_t nb_blocks); + +void cryptonite_aes_gen_ctr(aes_block *output, aes_key *key, const aes_block *iv, uint32_t nb_blocks); +void cryptonite_aes_gen_ctr_cont(aes_block *output, aes_key *key, aes_block *iv, uint32_t nb_blocks); + +void cryptonite_aes_encrypt_xts(aes_block *output, aes_key *key, aes_key *key2, aes_block *sector, + uint32_t spoint, aes_block *input, uint32_t nb_blocks); +void cryptonite_aes_decrypt_xts(aes_block *output, aes_key *key, aes_key *key2, aes_block *sector, + uint32_t spoint, aes_block *input, uint32_t nb_blocks); + +void cryptonite_aes_gcm_init(aes_gcm *gcm, aes_key *key, uint8_t *iv, uint32_t len); +void cryptonite_aes_gcm_aad(aes_gcm *gcm, uint8_t *input, uint32_t length); +void cryptonite_aes_gcm_encrypt(uint8_t *output, aes_gcm *gcm, aes_key *key, uint8_t *input, uint32_t length); +void cryptonite_aes_gcm_decrypt(uint8_t *output, aes_gcm *gcm, aes_key *key, uint8_t *input, uint32_t length); +void cryptonite_aes_gcm_finish(uint8_t *tag, aes_gcm *gcm, aes_key *key); + +void cryptonite_aes_ocb_init(aes_ocb *ocb, aes_key *key, uint8_t *iv, uint32_t len); +void cryptonite_aes_ocb_aad(aes_ocb *ocb, aes_key *key, uint8_t *input, uint32_t length); +void cryptonite_aes_ocb_encrypt(uint8_t *output, aes_ocb *ocb, aes_key *key, uint8_t *input, uint32_t length); +void cryptonite_aes_ocb_decrypt(uint8_t *output, aes_ocb *ocb, aes_key *key, uint8_t *input, uint32_t length); +void cryptonite_aes_ocb_finish(uint8_t *tag, aes_ocb *ocb, aes_key *key); + +#endif diff --git a/cbits/cryptonite_bitfn.h b/cbits/cryptonite_bitfn.h index defad22..385998e 100644 --- a/cbits/cryptonite_bitfn.h +++ b/cbits/cryptonite_bitfn.h @@ -165,6 +165,12 @@ static inline void array_copy64(uint64_t *d, uint64_t *s, uint32_t nb) } #endif +#ifdef __GNUC__ +#define bitfn_ntz(n) __builtin_ctz(n) +#else +#error "define ntz for your platform" +#endif + #ifdef __MINGW32__ # define LITTLE_ENDIAN 1234 # define BYTE_ORDER LITTLE_ENDIAN diff --git a/cbits/cryptonite_cpu.c b/cbits/cryptonite_cpu.c new file mode 100644 index 0000000..011ae8f --- /dev/null +++ b/cbits/cryptonite_cpu.c @@ -0,0 +1,75 @@ +/* + * Copyright (C) 2012 Vincent Hanquez + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the author nor the names of his contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + */ +#include "cryptonite_cpu.h" +#include + +#ifdef ARCH_X86 +static void cpuid(uint32_t info, uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx) +{ + *eax = info; + asm volatile + ( +#ifdef __x86_64__ + "mov %%rbx, %%rdi;" +#else + "mov %%ebx, %%edi;" +#endif + "cpuid;" + "mov %%ebx, %%esi;" +#ifdef __x86_64__ + "mov %%rdi, %%rbx;" +#else + "mov %%edi, %%ebx;" +#endif + :"+a" (*eax), "=S" (*ebx), "=c" (*ecx), "=d" (*edx) + : :"edi"); +} + +#ifdef USE_AESNI +void initialize_hw(void (*init_table)(int, int)) +{ + static int inited = 0; + if (inited == 0) { + uint32_t eax, ebx, ecx, edx; + int aesni, pclmul; + + inited = 1; + cpuid(1, &eax, &ebx, &ecx, &edx); + aesni = (ecx & 0x02000000); + pclmul = (ecx & 0x00000001); + init_table(aesni, pclmul); + } +} +#else +#define initialize_hw(init_table) (0) +#endif + +#endif diff --git a/cbits/cryptonite_cpu.h b/cbits/cryptonite_cpu.h new file mode 100644 index 0000000..3e9dfeb --- /dev/null +++ b/cbits/cryptonite_cpu.h @@ -0,0 +1,45 @@ +/* + * Copyright (C) 2012 Vincent Hanquez + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the author nor the names of his contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + */ +#ifndef CPU_H +#define CPU_H + +#if defined(__i386__) || defined(__x86_64__) +#define ARCH_X86 +#define USE_AESNI +#endif + +#ifdef USE_AESNI +void initialize_hw(void (*init_table)(int, int)); +#else +#define initialize_hw(init_table) (0) +#endif + +#endif diff --git a/cryptonite.cabal b/cryptonite.cabal index fa66c36..2af574b 100644 --- a/cryptonite.cabal +++ b/cryptonite.cabal @@ -20,6 +20,10 @@ source-repository head type: git location: https://github.com/vincenthz/cryptonite +Flag support_aesni + Description: allow compilation with AESNI on system and architecture that supports it + Default: True + Library Exposed-modules: Crypto.Cipher.ChaCha Crypto.Cipher.Salsa @@ -64,6 +68,10 @@ Library C-sources: cbits/cryptonite_chacha.c , cbits/cryptonite_salsa.c , cbits/cryptonite_rc4.c + , cbits/cryptonite_cpu.c + , cbits/aes/generic.c + , cbits/aes/gf.c + , cbits/cryptonite_aes.c , cbits/cryptonite_poly1305.c , cbits/cryptonite_sha1.c , cbits/cryptonite_sha256.c @@ -79,14 +87,19 @@ Library , cbits/cryptonite_tiger.c , cbits/cryptonite_whirlpool.c , cbits/cryptonite_scrypt.c + include-dirs: cbits if (arch(i386) || arch(x86_64)) CPP-options: -DARCH_IS_LITTLE_ENDIAN if arch(x86_64) - cpp-options: -DSUPPORT_RDRAND + CPP-options: -DSUPPORT_RDRAND Other-modules: Crypto.Random.Entropy.RDRand c-sources: cbits/cryptonite_rdrand.c + if flag(support_aesni) && (os(linux) || os(freebsd) || os(osx)) && (arch(i386) || arch(x86_64)) + CC-options: -mssse3 -maes -mpclmul -DWITH_AESNI + C-sources: cbits/aes/x86ni.c + if os(windows) cpp-options: -DWINDOWS Build-Depends: Win32