From ff8a1c524dbcc5da5e32adfe9e9a01f371898788 Mon Sep 17 00:00:00 2001 From: Lars Petersen Date: Tue, 8 May 2018 22:08:20 +0200 Subject: [PATCH] Extend the internal interface of the Blowfish module. In preparation of an implementation of the bcrypt_pbkdf (a variant of PBKDF2 used by OpenSSH) algorithm, certain low-level operations of the Blowfish algorithm need to be generalized and exposed. The Blowfish.Primitive module has already been extended to account for the requirements imposed by the BCrypt algorithm, but the salt length was limited to 16 bytes and the BCrypt specific key schedule setup has been hard-coded into the Blowfish module. This commit makes a clear distintion between the expandKey and expandKeyWithSalt operation. Both take arbitrary sized salts and keys now. The specialized operation for 16 byte salts as used by BCrypt has been preserved and is selected automatically. Also, the BCrypt specific parts have been move to the BCrypt module with regard to separation of concern. A benchmark for generating BCrypt hashes with cost 10 shows a performance improvement from 158 to 141ms on average (Intel i5-6500) after this refactoring. Further experiments suggest that the specialized expandKeyWithSalt128 does not have any advantage over the generalized version and might be removed in favour of less branches and exceptional behaviour. --- Crypto/Cipher/Blowfish/Box.hs | 12 +- Crypto/Cipher/Blowfish/Primitive.hs | 351 ++++++++++++++++------------ Crypto/KDF/BCrypt.hs | 33 ++- 3 files changed, 239 insertions(+), 157 deletions(-) diff --git a/Crypto/Cipher/Blowfish/Box.hs b/Crypto/Cipher/Blowfish/Box.hs index 2a2f42c..34414a7 100644 --- a/Crypto/Cipher/Blowfish/Box.hs +++ b/Crypto/Cipher/Blowfish/Box.hs @@ -5,15 +5,19 @@ -- Portability : Good {-# LANGUAGE MagicHash #-} module Crypto.Cipher.Blowfish.Box - ( createKeySchedule + ( KeySchedule(..) + , createKeySchedule ) where -import Crypto.Internal.WordArray (mutableArray32FromAddrBE, MutableArray32) +import Crypto.Internal.WordArray (MutableArray32, + mutableArray32FromAddrBE) + +newtype KeySchedule = KeySchedule MutableArray32 -- | Create a key schedule mutable array of the pbox followed by -- all the sboxes. -createKeySchedule :: IO MutableArray32 -createKeySchedule = mutableArray32FromAddrBE 1042 "\ +createKeySchedule :: IO KeySchedule +createKeySchedule = KeySchedule `fmap` mutableArray32FromAddrBE 1042 "\ \\x24\x3f\x6a\x88\x85\xa3\x08\xd3\x13\x19\x8a\x2e\x03\x70\x73\x44\ \\xa4\x09\x38\x22\x29\x9f\x31\xd0\x08\x2e\xfa\x98\xec\x4e\x6c\x89\ \\x45\x28\x21\xe6\x38\xd0\x13\x77\xbe\x54\x66\xcf\x34\xe9\x0c\x6c\ diff --git a/Crypto/Cipher/Blowfish/Primitive.hs b/Crypto/Cipher/Blowfish/Primitive.hs index 6fcd388..572a5ec 100644 --- a/Crypto/Cipher/Blowfish/Primitive.hs +++ b/Crypto/Cipher/Blowfish/Primitive.hs @@ -5,6 +5,7 @@ -- Portability : Good -- Rewritten by Vincent Hanquez (c) 2015 +-- Lars Petersen (c) 2018 -- -- Original code: -- Crypto.Cipher.Blowfish.Primitive, copyright (c) 2012 Stijn van Drongelen @@ -16,186 +17,242 @@ module Crypto.Cipher.Blowfish.Primitive , initBlowfish , encrypt , decrypt - , eksBlowfish + , KeySchedule + , createKeySchedule + , freezeKeySchedule + , expandKey + , expandKeyWithSalt ) where -import Control.Monad (when) +import Control.Monad (when) import Data.Bits import Data.Memory.Endian import Data.Word +import Crypto.Cipher.Blowfish.Box import Crypto.Error +import Crypto.Internal.ByteArray (ByteArray, ByteArrayAccess, Bytes) +import qualified Crypto.Internal.ByteArray as B import Crypto.Internal.Compat import Crypto.Internal.Imports -import Crypto.Internal.ByteArray (ByteArrayAccess, ByteArray, Bytes) -import qualified Crypto.Internal.ByteArray as B -import Crypto.Internal.Words import Crypto.Internal.WordArray -import Crypto.Cipher.Blowfish.Box +import Crypto.Internal.Words --- | variable keyed blowfish state -data Context = BF (Int -> Word32) -- p - (Int -> Word32) -- sbox0 - (Int -> Word32) -- sbox1 - (Int -> Word32) -- sbox2 - (Int -> Word32) -- sbox2 +newtype Context = Context Array32 instance NFData Context where - rnf (BF p a b c d) = p `seq` a `seq` b `seq` c `seq` d `seq` () - --- | Encrypt blocks --- --- Input need to be a multiple of 8 bytes -encrypt :: ByteArray ba => Context -> ba -> ba -encrypt = cipher - --- | Decrypt blocks --- --- Input need to be a multiple of 8 bytes -decrypt :: ByteArray ba => Context -> ba -> ba -decrypt = cipher . decryptContext - -decryptContext :: Context -> Context -decryptContext (BF p s0 s1 s2 s3) = BF (\i -> p (17-i)) s0 s1 s2 s3 - -cipher :: ByteArray ba => Context -> ba -> ba -cipher ctx b - | B.length b == 0 = B.empty - | B.length b `mod` 8 /= 0 = error "invalid data length" - | otherwise = B.mapAsWord64 (coreCrypto ctx) b + rnf a = a `seq` () -- | Initialize a new Blowfish context from a key. -- -- key needs to be between 0 and 448 bits. initBlowfish :: ByteArrayAccess key => key -> CryptoFailable Context initBlowfish key - | len > (448 `div` 8) = CryptoFailed CryptoError_KeySizeInvalid - | otherwise = CryptoPassed $ makeKeySchedule key (Nothing :: Maybe (Bytes, Int)) - where len = B.length key + | B.length key > (448 `div` 8) = CryptoFailed CryptoError_KeySizeInvalid + | otherwise = CryptoPassed $ unsafeDoIO $ do + ks <- createKeySchedule + expandKey ks key + freezeKeySchedule ks --- | The BCrypt "expensive key schedule" version of blowfish. +-- | Get an immutable Blowfish context by freezing a mutable key schedule. +freezeKeySchedule :: KeySchedule -> IO Context +freezeKeySchedule (KeySchedule ma) = Context `fmap` mutableArray32Freeze ma + +expandKey :: (ByteArrayAccess key) => KeySchedule -> key -> IO () +expandKey ks@(KeySchedule ma) key = do + when (B.length key > 0) $ iterKeyStream key 0 0 $ \i l r a0 a1 cont-> do + mutableArrayWriteXor32 ma i l + mutableArrayWriteXor32 ma (i + 1) r + when (i + 2 < 18) (cont a0 a1) + loop 0 0 0 + where + loop i l r = do + n <- cipherBlockMutable ks (fromIntegral l `shiftL` 32 .|. fromIntegral r) + let nl = fromIntegral (n `shiftR` 32) + nr = fromIntegral (n .&. 0xffffffff) + mutableArrayWrite32 ma i nl + mutableArrayWrite32 ma (i + 1) nr + when (i < 18 + 1024) (loop (i + 2) nl nr) + +expandKeyWithSalt :: (ByteArrayAccess key, ByteArrayAccess salt) + => KeySchedule + -> key + -> salt + -> IO () +expandKeyWithSalt ks key salt + | B.length salt == 16 = expandKeyWithSalt128 ks key (fromBE $ B.toW64BE salt 0) (fromBE $ B.toW64BE salt 8) + | otherwise = expandKeyWithSaltAny ks key salt + +expandKeyWithSaltAny :: (ByteArrayAccess key, ByteArrayAccess salt) + => KeySchedule -- ^ The key schedule + -> key -- ^ The key + -> salt -- ^ The salt + -> IO () +expandKeyWithSaltAny ks@(KeySchedule ma) key salt = do + when (B.length key > 0) $ iterKeyStream key 0 0 $ \i l r a0 a1 cont-> do + mutableArrayWriteXor32 ma i l + mutableArrayWriteXor32 ma (i + 1) r + when (i + 2 < 18) (cont a0 a1) + -- Go through the entire key schedule overwriting the P-Array and S-Boxes + when (B.length salt > 0) $ iterKeyStream salt 0 0 $ \i l r a0 a1 cont-> do + let l' = xor l a0 + let r' = xor r a1 + n <- cipherBlockMutable ks (fromIntegral l' `shiftL` 32 .|. fromIntegral r') + let nl = fromIntegral (n `shiftR` 32) + nr = fromIntegral (n .&. 0xffffffff) + mutableArrayWrite32 ma i nl + mutableArrayWrite32 ma (i + 1) nr + when (i + 2 < 18 + 1024) (cont nl nr) + +expandKeyWithSalt128 :: ByteArrayAccess ba + => KeySchedule -- ^ The key schedule + -> ba -- ^ The key + -> Word64 -- ^ First word of the salt + -> Word64 -- ^ Second word of the salt + -> IO () +expandKeyWithSalt128 ks@(KeySchedule ma) key salt1 salt2 = do + when (B.length key > 0) $ iterKeyStream key 0 0 $ \i l r a0 a1 cont-> do + mutableArrayWriteXor32 ma i l + mutableArrayWriteXor32 ma (i + 1) r + when (i + 2 < 18) (cont a0 a1) + -- Go through the entire key schedule overwriting the P-Array and S-Boxes + loop 0 salt1 salt1 salt2 + where + loop i input slt1 slt2 + | i == 1042 = return () + | otherwise = do + n <- cipherBlockMutable ks input + let nl = fromIntegral (n `shiftR` 32) + nr = fromIntegral (n .&. 0xffffffff) + mutableArrayWrite32 ma i nl + mutableArrayWrite32 ma (i+1) nr + loop (i+2) (n `xor` slt2) slt2 slt1 + +-- | Encrypt blocks -- --- Salt must be 128 bits --- Cost must be between 4 and 31 inclusive --- See -eksBlowfish :: (ByteArrayAccess salt, ByteArrayAccess password) => Int -> salt -> password -> Context -eksBlowfish cost salt key - | B.length salt /= 16 = error "bcrypt salt must be 16 bytes" - | otherwise = makeKeySchedule key (Just (salt, cost)) +-- Input need to be a multiple of 8 bytes +encrypt :: ByteArray ba => Context -> ba -> ba +encrypt ctx ba + | B.length ba == 0 = B.empty + | B.length ba `mod` 8 /= 0 = error "invalid data length" + | otherwise = B.mapAsWord64 (cipherBlock ctx False) ba -coreCrypto :: Context -> Word64 -> Word64 -coreCrypto (BF p s0 s1 s2 s3) input = doRound input 0 - where - -- transform the input over 16 rounds +-- | Decrypt blocks +-- +-- Input need to be a multiple of 8 bytes +decrypt :: ByteArray ba => Context -> ba -> ba +decrypt ctx ba + | B.length ba == 0 = B.empty + | B.length ba `mod` 8 /= 0 = error "invalid data length" + | otherwise = B.mapAsWord64 (cipherBlock ctx True) ba + +-- | Encrypt or decrypt a single block of 64 bits. +-- +-- The inverse argument decides whether to encrypt or decrypt. +cipherBlock :: Context -> Bool -> Word64 -> Word64 +cipherBlock (Context ar) inverse input = doRound input 0 + where + -- | Transform the input over 16 rounds doRound :: Word64 -> Int -> Word64 doRound i roundIndex | roundIndex == 16 = let final = (fromIntegral (p 16) `shiftL` 32) .|. fromIntegral (p 17) in rotateL (i `xor` final) 32 | otherwise = - let newr = fromIntegral (i `shiftR` 32) `xor` (p roundIndex) - newi = ((i `shiftL` 32) `xor` (f newr)) .|. (fromIntegral newr) + let newr = fromIntegral (i `shiftR` 32) `xor` p roundIndex + newi = ((i `shiftL` 32) `xor` f newr) .|. fromIntegral newr in doRound newi (roundIndex+1) + + -- | The Blowfish Feistel function F f :: Word32 -> Word64 - f t = let a = s0 (fromIntegral $ (t `shiftR` 24) .&. 0xff) - b = s1 (fromIntegral $ (t `shiftR` 16) .&. 0xff) - c = s2 (fromIntegral $ (t `shiftR` 8) .&. 0xff) - d = s3 (fromIntegral $ t .&. 0xff) + f t = let a = s0 (0xff .&. (t `shiftR` 24)) + b = s1 (0xff .&. (t `shiftR` 16)) + c = s2 (0xff .&. (t `shiftR` 8)) + d = s3 (0xff .&. t) in fromIntegral (((a + b) `xor` c) + d) `shiftL` 32 + -- | S-Box arrays, each containing 256 32-bit words + -- The first 18 words contain the P-Array of subkeys + s0, s1, s2, s3 :: Word32 -> Word32 + s0 i = arrayRead32 ar (fromIntegral i + 18) + s1 i = arrayRead32 ar (fromIntegral i + 274) + s2 i = arrayRead32 ar (fromIntegral i + 530) + s3 i = arrayRead32 ar (fromIntegral i + 786) + p :: Int -> Word32 + p i | inverse = arrayRead32 ar (17 - fromIntegral i) + | otherwise = arrayRead32 ar (fromIntegral i) --- | Create a key schedule for either plain Blowfish or the BCrypt "EKS" version --- For the expensive version, the salt and cost factor are supplied. Salt must be --- a 128-bit byte array. --- --- The standard case is just a single key expansion with the salt set to zero. -makeKeySchedule :: (ByteArrayAccess key, ByteArrayAccess salt) => key-> Maybe (salt, Int) -> Context -makeKeySchedule keyBytes saltCost = - let v = unsafeDoIO $ do - mv <- createKeySchedule - case saltCost of - -- Standard blowfish - Nothing -> expandKey mv 0 0 keyBytes - -- The expensive case - Just (s, cost) -> do - let (salt1, salt2) = splitSalt s - expandKey mv salt1 salt2 keyBytes - forM_ [1..2^cost :: Int] $ \_ -> do - expandKey mv 0 0 keyBytes - expandKey mv 0 0 s - mutableArray32Freeze mv - in BF (\i -> arrayRead32 v i) - (\i -> arrayRead32 v (s0+i)) - (\i -> arrayRead32 v (s1+i)) - (\i -> arrayRead32 v (s2+i)) - (\i -> arrayRead32 v (s3+i)) - where - splitSalt s = (fromBE (B.toW64BE s 0), fromBE (B.toW64BE s 8)) +-- | Blowfish encrypt a Word using the current state of the key schedule +cipherBlockMutable :: KeySchedule -> Word64 -> IO Word64 +cipherBlockMutable (KeySchedule ma) input = doRound input 0 + where + -- | Transform the input over 16 rounds + doRound i roundIndex + | roundIndex == 16 = do + pVal1 <- mutableArrayRead32 ma 16 + pVal2 <- mutableArrayRead32 ma 17 + let final = (fromIntegral pVal1 `shiftL` 32) .|. fromIntegral pVal2 + return $ rotateL (i `xor` final) 32 + | otherwise = do + pVal <- mutableArrayRead32 ma roundIndex + let newr = fromIntegral (i `shiftR` 32) `xor` pVal + newr' <- f newr + let newi = ((i `shiftL` 32) `xor` newr') .|. fromIntegral newr + doRound newi (roundIndex+1) - -- Indices of the S-Box arrays, each containing 256 32-bit words - -- The first 18 words contain the P-Array of subkeys - s0 = 18 - s1 = 274 - s2 = 530 - s3 = 786 + -- | The Blowfish Feistel function F + f :: Word32 -> IO Word64 + f t = do + a <- s0 (0xff .&. (t `shiftR` 24)) + b <- s1 (0xff .&. (t `shiftR` 16)) + c <- s2 (0xff .&. (t `shiftR` 8)) + d <- s3 (0xff .&. t) + return (fromIntegral (((a + b) `xor` c) + d) `shiftL` 32) -expandKey :: ByteArrayAccess ba - => MutableArray32 -- ^ The key schedule - -> Word64 -- ^ First word of the salt - -> Word64 -- ^ Second word of the salt - -> ba -- ^ The key - -> IO () -expandKey mv salt1 salt2 key = do - when (len > 0) $ forM_ [0..17] $ \i -> do - let a = B.index key ((i * 4 + 0) `mod` len) - b = B.index key ((i * 4 + 1) `mod` len) - c = B.index key ((i * 4 + 2) `mod` len) - d = B.index key ((i * 4 + 3) `mod` len) - k = (fromIntegral a `shiftL` 24) .|. - (fromIntegral b `shiftL` 16) .|. - (fromIntegral c `shiftL` 8) .|. - (fromIntegral d) - mutableArrayWriteXor32 mv i k - prepare mv - return () - where - len = B.length key + -- | S-Box arrays, each containing 256 32-bit words + -- The first 18 words contain the P-Array of subkeys + s0, s1, s2, s3 :: Word32 -> IO Word32 + s0 i = mutableArrayRead32 ma (fromIntegral i + 18) + s1 i = mutableArrayRead32 ma (fromIntegral i + 274) + s2 i = mutableArrayRead32 ma (fromIntegral i + 530) + s3 i = mutableArrayRead32 ma (fromIntegral i + 786) - -- | Go through the entire key schedule overwriting the P-Array and S-Boxes - prepare mctx = loop 0 salt1 salt1 salt2 - where loop i input slt1 slt2 - | i == 1042 = return () - | otherwise = do - ninput <- coreCryptoMutable input - let (nl, nr) = w64to32 ninput - mutableArrayWrite32 mctx i nl - mutableArrayWrite32 mctx (i+1) nr - loop (i+2) (ninput `xor` slt2) slt2 slt1 - - -- | Blowfish encrypt a Word using the current state of the key schedule - coreCryptoMutable :: Word64 -> IO Word64 - coreCryptoMutable input = doRound input 0 - where doRound i roundIndex - | roundIndex == 16 = do - pVal1 <- mutableArrayRead32 mctx 16 - pVal2 <- mutableArrayRead32 mctx 17 - let final = (fromIntegral pVal1 `shiftL` 32) .|. fromIntegral pVal2 - return $ rotateL (i `xor` final) 32 - | otherwise = do - pVal <- mutableArrayRead32 mctx roundIndex - let newr = fromIntegral (i `shiftR` 32) `xor` pVal - newr' <- f newr - let newi = ((i `shiftL` 32) `xor` newr') .|. (fromIntegral newr) - doRound newi (roundIndex+1) - - -- The Blowfish Feistel function F - f :: Word32 -> IO Word64 - f t = do a <- mutableArrayRead32 mctx (s0 + fromIntegral ((t `shiftR` 24) .&. 0xff)) - b <- mutableArrayRead32 mctx (s1 + fromIntegral ((t `shiftR` 16) .&. 0xff)) - c <- mutableArrayRead32 mctx (s2 + fromIntegral ((t `shiftR` 8) .&. 0xff)) - d <- mutableArrayRead32 mctx (s3 + fromIntegral (t .&. 0xff)) - return (fromIntegral (((a + b) `xor` c) + d) `shiftL` 32) - where s0 = 18 - s1 = 274 - s2 = 530 - s3 = 786 +iterKeyStream :: (ByteArrayAccess x) + => x + -> Word32 + -> Word32 + -> (Int -> Word32 -> Word32 -> Word32 -> Word32 -> (Word32 -> Word32 -> IO ()) -> IO ()) + -> IO () +iterKeyStream x a0 a1 g = f 0 0 a0 a1 + where + len = B.length x + -- Avoiding the modulo operation when interating over the ring + -- buffer is assumed to be more efficient here. All other + -- implementations do this, too. The branch prediction shall prefer + -- the branch with the increment. + n j = if j + 1 >= len then 0 else j + 1 + f i j0 b0 b1 = g i l r b0 b1 (f (i + 2) j8) + where + j1 = n j0 + j2 = n j1 + j3 = n j2 + j4 = n j3 + j5 = n j4 + j6 = n j5 + j7 = n j6 + j8 = n j7 + x0 = fromIntegral (B.index x j0) + x1 = fromIntegral (B.index x j1) + x2 = fromIntegral (B.index x j2) + x3 = fromIntegral (B.index x j3) + x4 = fromIntegral (B.index x j4) + x5 = fromIntegral (B.index x j5) + x6 = fromIntegral (B.index x j6) + x7 = fromIntegral (B.index x j7) + l = shiftL x0 24 .|. shiftL x1 16 .|. shiftL x2 8 .|. x3 + r = shiftL x4 24 .|. shiftL x5 16 .|. shiftL x6 8 .|. x7 +{-# INLINE iterKeyStream #-} +-- Benchmarking shows that GHC considers this function too big to inline +-- although forcing inlining causes an actual improvement. +-- It is assumed that all function calls (especially the continuation) +-- collapse into a tight loop after inlining. diff --git a/Crypto/KDF/BCrypt.hs b/Crypto/KDF/BCrypt.hs index b374b4f..0a706bc 100644 --- a/Crypto/KDF/BCrypt.hs +++ b/Crypto/KDF/BCrypt.hs @@ -52,11 +52,16 @@ module Crypto.KDF.BCrypt ) where -import Control.Monad (unless, when) -import Crypto.Cipher.Blowfish.Primitive (eksBlowfish, encrypt) -import Crypto.Random (MonadRandom, getRandomBytes) -import Data.ByteArray (ByteArrayAccess, ByteArray, Bytes) -import qualified Data.ByteArray as B +import Control.Monad (forM_, unless, when) +import Crypto.Cipher.Blowfish.Primitive (Context, createKeySchedule, + encrypt, expandKey, + expandKeyWithSalt, + freezeKeySchedule) +import Crypto.Internal.Compat +import Crypto.Random (MonadRandom, getRandomBytes) +import Data.ByteArray (ByteArray, ByteArrayAccess, + Bytes) +import qualified Data.ByteArray as B import Data.ByteArray.Encoding import Data.Char @@ -136,7 +141,7 @@ rawHash _ cost salt password = B.take 23 hash -- Another compatibility bug. Igno -- Truncate the password if necessary and append a null byte for C compatibility key = B.snoc (B.take 72 password) 0 - ctx = eksBlowfish cost salt key + ctx = expensiveBlowfishContext key salt cost -- The BCrypt plaintext: "OrpheanBeholderScryDoubt" orpheanBeholder = B.pack [79,114,112,104,101,97,110,66,101,104,111,108,100,101,114,83,99,114,121,68,111,117,98,116] @@ -166,3 +171,19 @@ parseBCryptHash bc = do salt <- convertFromBase Base64OpenBSD s hash <- convertFromBase Base64OpenBSD h return (salt, hash) + +-- | Create a key schedule for the BCrypt "EKS" version. +-- +-- Salt must be a 128-bit byte array. +-- Cost must be between 4 and 31 inclusive +-- See +expensiveBlowfishContext :: (ByteArrayAccess key, ByteArrayAccess salt) => key-> salt -> Int -> Context +expensiveBlowfishContext keyBytes saltBytes cost + | B.length saltBytes /= 16 = error "bcrypt salt must be 16 bytes" + | otherwise = unsafeDoIO $ do + ks <- createKeySchedule + expandKeyWithSalt ks keyBytes saltBytes + forM_ [1..2^cost :: Int] $ \_ -> do + expandKey ks keyBytes + expandKey ks saltBytes + freezeKeySchedule ks