Extend the internal interface of the Blowfish module.

In preparation of an implementation of the bcrypt_pbkdf (a
variant of PBKDF2 used by OpenSSH) algorithm,
certain low-level operations of the Blowfish algorithm need to
be generalized and exposed.

The Blowfish.Primitive module has already been extended to
account for the requirements imposed by the BCrypt algorithm,
but the salt length was limited to 16 bytes and the BCrypt
specific key schedule setup has been hard-coded into the Blowfish
module.

This commit makes a clear distintion between the expandKey and
expandKeyWithSalt operation. Both take arbitrary sized salts
and keys now. The specialized operation for 16 byte salts as used
by BCrypt has been preserved and is selected automatically.
Also, the BCrypt specific parts have been move to the BCrypt
module with regard to separation of concern.

A benchmark for generating BCrypt hashes with cost 10 shows a
performance improvement from 158 to 141ms on average (Intel i5-6500)
after this refactoring.
Further experiments suggest that the specialized expandKeyWithSalt128
does not have any advantage over the generalized version
and might be removed in favour of less branches and exceptional
behaviour.
This commit is contained in:
Lars Petersen 2018-05-08 22:08:20 +02:00
parent 4622e5fc8e
commit ff8a1c524d
3 changed files with 239 additions and 157 deletions

View File

@ -5,15 +5,19 @@
-- Portability : Good -- Portability : Good
{-# LANGUAGE MagicHash #-} {-# LANGUAGE MagicHash #-}
module Crypto.Cipher.Blowfish.Box module Crypto.Cipher.Blowfish.Box
( createKeySchedule ( KeySchedule(..)
, createKeySchedule
) where ) where
import Crypto.Internal.WordArray (mutableArray32FromAddrBE, MutableArray32) import Crypto.Internal.WordArray (MutableArray32,
mutableArray32FromAddrBE)
newtype KeySchedule = KeySchedule MutableArray32
-- | Create a key schedule mutable array of the pbox followed by -- | Create a key schedule mutable array of the pbox followed by
-- all the sboxes. -- all the sboxes.
createKeySchedule :: IO MutableArray32 createKeySchedule :: IO KeySchedule
createKeySchedule = mutableArray32FromAddrBE 1042 "\ createKeySchedule = KeySchedule `fmap` mutableArray32FromAddrBE 1042 "\
\\x24\x3f\x6a\x88\x85\xa3\x08\xd3\x13\x19\x8a\x2e\x03\x70\x73\x44\ \\x24\x3f\x6a\x88\x85\xa3\x08\xd3\x13\x19\x8a\x2e\x03\x70\x73\x44\
\\xa4\x09\x38\x22\x29\x9f\x31\xd0\x08\x2e\xfa\x98\xec\x4e\x6c\x89\ \\xa4\x09\x38\x22\x29\x9f\x31\xd0\x08\x2e\xfa\x98\xec\x4e\x6c\x89\
\\x45\x28\x21\xe6\x38\xd0\x13\x77\xbe\x54\x66\xcf\x34\xe9\x0c\x6c\ \\x45\x28\x21\xe6\x38\xd0\x13\x77\xbe\x54\x66\xcf\x34\xe9\x0c\x6c\

View File

@ -5,6 +5,7 @@
-- Portability : Good -- Portability : Good
-- Rewritten by Vincent Hanquez (c) 2015 -- Rewritten by Vincent Hanquez (c) 2015
-- Lars Petersen (c) 2018
-- --
-- Original code: -- Original code:
-- Crypto.Cipher.Blowfish.Primitive, copyright (c) 2012 Stijn van Drongelen -- Crypto.Cipher.Blowfish.Primitive, copyright (c) 2012 Stijn van Drongelen
@ -16,7 +17,11 @@ module Crypto.Cipher.Blowfish.Primitive
, initBlowfish , initBlowfish
, encrypt , encrypt
, decrypt , decrypt
, eksBlowfish , KeySchedule
, createKeySchedule
, freezeKeySchedule
, expandKey
, expandKeyWithSalt
) where ) where
import Control.Monad (when) import Control.Monad (when)
@ -24,178 +29,230 @@ import Data.Bits
import Data.Memory.Endian import Data.Memory.Endian
import Data.Word import Data.Word
import Crypto.Cipher.Blowfish.Box
import Crypto.Error import Crypto.Error
import Crypto.Internal.ByteArray (ByteArray, ByteArrayAccess, Bytes)
import qualified Crypto.Internal.ByteArray as B
import Crypto.Internal.Compat import Crypto.Internal.Compat
import Crypto.Internal.Imports import Crypto.Internal.Imports
import Crypto.Internal.ByteArray (ByteArrayAccess, ByteArray, Bytes)
import qualified Crypto.Internal.ByteArray as B
import Crypto.Internal.Words
import Crypto.Internal.WordArray import Crypto.Internal.WordArray
import Crypto.Cipher.Blowfish.Box import Crypto.Internal.Words
-- | variable keyed blowfish state newtype Context = Context Array32
data Context = BF (Int -> Word32) -- p
(Int -> Word32) -- sbox0
(Int -> Word32) -- sbox1
(Int -> Word32) -- sbox2
(Int -> Word32) -- sbox2
instance NFData Context where instance NFData Context where
rnf (BF p a b c d) = p `seq` a `seq` b `seq` c `seq` d `seq` () rnf a = a `seq` ()
-- | Encrypt blocks
--
-- Input need to be a multiple of 8 bytes
encrypt :: ByteArray ba => Context -> ba -> ba
encrypt = cipher
-- | Decrypt blocks
--
-- Input need to be a multiple of 8 bytes
decrypt :: ByteArray ba => Context -> ba -> ba
decrypt = cipher . decryptContext
decryptContext :: Context -> Context
decryptContext (BF p s0 s1 s2 s3) = BF (\i -> p (17-i)) s0 s1 s2 s3
cipher :: ByteArray ba => Context -> ba -> ba
cipher ctx b
| B.length b == 0 = B.empty
| B.length b `mod` 8 /= 0 = error "invalid data length"
| otherwise = B.mapAsWord64 (coreCrypto ctx) b
-- | Initialize a new Blowfish context from a key. -- | Initialize a new Blowfish context from a key.
-- --
-- key needs to be between 0 and 448 bits. -- key needs to be between 0 and 448 bits.
initBlowfish :: ByteArrayAccess key => key -> CryptoFailable Context initBlowfish :: ByteArrayAccess key => key -> CryptoFailable Context
initBlowfish key initBlowfish key
| len > (448 `div` 8) = CryptoFailed CryptoError_KeySizeInvalid | B.length key > (448 `div` 8) = CryptoFailed CryptoError_KeySizeInvalid
| otherwise = CryptoPassed $ makeKeySchedule key (Nothing :: Maybe (Bytes, Int)) | otherwise = CryptoPassed $ unsafeDoIO $ do
where len = B.length key ks <- createKeySchedule
expandKey ks key
freezeKeySchedule ks
-- | The BCrypt "expensive key schedule" version of blowfish. -- | Get an immutable Blowfish context by freezing a mutable key schedule.
-- freezeKeySchedule :: KeySchedule -> IO Context
-- Salt must be 128 bits freezeKeySchedule (KeySchedule ma) = Context `fmap` mutableArray32Freeze ma
-- Cost must be between 4 and 31 inclusive
-- See <https://www.usenix.org/conference/1999-usenix-annual-technical-conference/future-adaptable-password-scheme>
eksBlowfish :: (ByteArrayAccess salt, ByteArrayAccess password) => Int -> salt -> password -> Context
eksBlowfish cost salt key
| B.length salt /= 16 = error "bcrypt salt must be 16 bytes"
| otherwise = makeKeySchedule key (Just (salt, cost))
coreCrypto :: Context -> Word64 -> Word64 expandKey :: (ByteArrayAccess key) => KeySchedule -> key -> IO ()
coreCrypto (BF p s0 s1 s2 s3) input = doRound input 0 expandKey ks@(KeySchedule ma) key = do
when (B.length key > 0) $ iterKeyStream key 0 0 $ \i l r a0 a1 cont-> do
mutableArrayWriteXor32 ma i l
mutableArrayWriteXor32 ma (i + 1) r
when (i + 2 < 18) (cont a0 a1)
loop 0 0 0
where where
-- transform the input over 16 rounds loop i l r = do
n <- cipherBlockMutable ks (fromIntegral l `shiftL` 32 .|. fromIntegral r)
let nl = fromIntegral (n `shiftR` 32)
nr = fromIntegral (n .&. 0xffffffff)
mutableArrayWrite32 ma i nl
mutableArrayWrite32 ma (i + 1) nr
when (i < 18 + 1024) (loop (i + 2) nl nr)
expandKeyWithSalt :: (ByteArrayAccess key, ByteArrayAccess salt)
=> KeySchedule
-> key
-> salt
-> IO ()
expandKeyWithSalt ks key salt
| B.length salt == 16 = expandKeyWithSalt128 ks key (fromBE $ B.toW64BE salt 0) (fromBE $ B.toW64BE salt 8)
| otherwise = expandKeyWithSaltAny ks key salt
expandKeyWithSaltAny :: (ByteArrayAccess key, ByteArrayAccess salt)
=> KeySchedule -- ^ The key schedule
-> key -- ^ The key
-> salt -- ^ The salt
-> IO ()
expandKeyWithSaltAny ks@(KeySchedule ma) key salt = do
when (B.length key > 0) $ iterKeyStream key 0 0 $ \i l r a0 a1 cont-> do
mutableArrayWriteXor32 ma i l
mutableArrayWriteXor32 ma (i + 1) r
when (i + 2 < 18) (cont a0 a1)
-- Go through the entire key schedule overwriting the P-Array and S-Boxes
when (B.length salt > 0) $ iterKeyStream salt 0 0 $ \i l r a0 a1 cont-> do
let l' = xor l a0
let r' = xor r a1
n <- cipherBlockMutable ks (fromIntegral l' `shiftL` 32 .|. fromIntegral r')
let nl = fromIntegral (n `shiftR` 32)
nr = fromIntegral (n .&. 0xffffffff)
mutableArrayWrite32 ma i nl
mutableArrayWrite32 ma (i + 1) nr
when (i + 2 < 18 + 1024) (cont nl nr)
expandKeyWithSalt128 :: ByteArrayAccess ba
=> KeySchedule -- ^ The key schedule
-> ba -- ^ The key
-> Word64 -- ^ First word of the salt
-> Word64 -- ^ Second word of the salt
-> IO ()
expandKeyWithSalt128 ks@(KeySchedule ma) key salt1 salt2 = do
when (B.length key > 0) $ iterKeyStream key 0 0 $ \i l r a0 a1 cont-> do
mutableArrayWriteXor32 ma i l
mutableArrayWriteXor32 ma (i + 1) r
when (i + 2 < 18) (cont a0 a1)
-- Go through the entire key schedule overwriting the P-Array and S-Boxes
loop 0 salt1 salt1 salt2
where
loop i input slt1 slt2
| i == 1042 = return ()
| otherwise = do
n <- cipherBlockMutable ks input
let nl = fromIntegral (n `shiftR` 32)
nr = fromIntegral (n .&. 0xffffffff)
mutableArrayWrite32 ma i nl
mutableArrayWrite32 ma (i+1) nr
loop (i+2) (n `xor` slt2) slt2 slt1
-- | Encrypt blocks
--
-- Input need to be a multiple of 8 bytes
encrypt :: ByteArray ba => Context -> ba -> ba
encrypt ctx ba
| B.length ba == 0 = B.empty
| B.length ba `mod` 8 /= 0 = error "invalid data length"
| otherwise = B.mapAsWord64 (cipherBlock ctx False) ba
-- | Decrypt blocks
--
-- Input need to be a multiple of 8 bytes
decrypt :: ByteArray ba => Context -> ba -> ba
decrypt ctx ba
| B.length ba == 0 = B.empty
| B.length ba `mod` 8 /= 0 = error "invalid data length"
| otherwise = B.mapAsWord64 (cipherBlock ctx True) ba
-- | Encrypt or decrypt a single block of 64 bits.
--
-- The inverse argument decides whether to encrypt or decrypt.
cipherBlock :: Context -> Bool -> Word64 -> Word64
cipherBlock (Context ar) inverse input = doRound input 0
where
-- | Transform the input over 16 rounds
doRound :: Word64 -> Int -> Word64 doRound :: Word64 -> Int -> Word64
doRound i roundIndex doRound i roundIndex
| roundIndex == 16 = | roundIndex == 16 =
let final = (fromIntegral (p 16) `shiftL` 32) .|. fromIntegral (p 17) let final = (fromIntegral (p 16) `shiftL` 32) .|. fromIntegral (p 17)
in rotateL (i `xor` final) 32 in rotateL (i `xor` final) 32
| otherwise = | otherwise =
let newr = fromIntegral (i `shiftR` 32) `xor` (p roundIndex) let newr = fromIntegral (i `shiftR` 32) `xor` p roundIndex
newi = ((i `shiftL` 32) `xor` (f newr)) .|. (fromIntegral newr) newi = ((i `shiftL` 32) `xor` f newr) .|. fromIntegral newr
in doRound newi (roundIndex+1) in doRound newi (roundIndex+1)
-- | The Blowfish Feistel function F
f :: Word32 -> Word64 f :: Word32 -> Word64
f t = let a = s0 (fromIntegral $ (t `shiftR` 24) .&. 0xff) f t = let a = s0 (0xff .&. (t `shiftR` 24))
b = s1 (fromIntegral $ (t `shiftR` 16) .&. 0xff) b = s1 (0xff .&. (t `shiftR` 16))
c = s2 (fromIntegral $ (t `shiftR` 8) .&. 0xff) c = s2 (0xff .&. (t `shiftR` 8))
d = s3 (fromIntegral $ t .&. 0xff) d = s3 (0xff .&. t)
in fromIntegral (((a + b) `xor` c) + d) `shiftL` 32 in fromIntegral (((a + b) `xor` c) + d) `shiftL` 32
-- | S-Box arrays, each containing 256 32-bit words
-- | Create a key schedule for either plain Blowfish or the BCrypt "EKS" version
-- For the expensive version, the salt and cost factor are supplied. Salt must be
-- a 128-bit byte array.
--
-- The standard case is just a single key expansion with the salt set to zero.
makeKeySchedule :: (ByteArrayAccess key, ByteArrayAccess salt) => key-> Maybe (salt, Int) -> Context
makeKeySchedule keyBytes saltCost =
let v = unsafeDoIO $ do
mv <- createKeySchedule
case saltCost of
-- Standard blowfish
Nothing -> expandKey mv 0 0 keyBytes
-- The expensive case
Just (s, cost) -> do
let (salt1, salt2) = splitSalt s
expandKey mv salt1 salt2 keyBytes
forM_ [1..2^cost :: Int] $ \_ -> do
expandKey mv 0 0 keyBytes
expandKey mv 0 0 s
mutableArray32Freeze mv
in BF (\i -> arrayRead32 v i)
(\i -> arrayRead32 v (s0+i))
(\i -> arrayRead32 v (s1+i))
(\i -> arrayRead32 v (s2+i))
(\i -> arrayRead32 v (s3+i))
where
splitSalt s = (fromBE (B.toW64BE s 0), fromBE (B.toW64BE s 8))
-- Indices of the S-Box arrays, each containing 256 32-bit words
-- The first 18 words contain the P-Array of subkeys -- The first 18 words contain the P-Array of subkeys
s0 = 18 s0, s1, s2, s3 :: Word32 -> Word32
s1 = 274 s0 i = arrayRead32 ar (fromIntegral i + 18)
s2 = 530 s1 i = arrayRead32 ar (fromIntegral i + 274)
s3 = 786 s2 i = arrayRead32 ar (fromIntegral i + 530)
s3 i = arrayRead32 ar (fromIntegral i + 786)
expandKey :: ByteArrayAccess ba p :: Int -> Word32
=> MutableArray32 -- ^ The key schedule p i | inverse = arrayRead32 ar (17 - fromIntegral i)
-> Word64 -- ^ First word of the salt | otherwise = arrayRead32 ar (fromIntegral i)
-> Word64 -- ^ Second word of the salt
-> ba -- ^ The key
-> IO ()
expandKey mv salt1 salt2 key = do
when (len > 0) $ forM_ [0..17] $ \i -> do
let a = B.index key ((i * 4 + 0) `mod` len)
b = B.index key ((i * 4 + 1) `mod` len)
c = B.index key ((i * 4 + 2) `mod` len)
d = B.index key ((i * 4 + 3) `mod` len)
k = (fromIntegral a `shiftL` 24) .|.
(fromIntegral b `shiftL` 16) .|.
(fromIntegral c `shiftL` 8) .|.
(fromIntegral d)
mutableArrayWriteXor32 mv i k
prepare mv
return ()
where
len = B.length key
-- | Go through the entire key schedule overwriting the P-Array and S-Boxes
prepare mctx = loop 0 salt1 salt1 salt2
where loop i input slt1 slt2
| i == 1042 = return ()
| otherwise = do
ninput <- coreCryptoMutable input
let (nl, nr) = w64to32 ninput
mutableArrayWrite32 mctx i nl
mutableArrayWrite32 mctx (i+1) nr
loop (i+2) (ninput `xor` slt2) slt2 slt1
-- | Blowfish encrypt a Word using the current state of the key schedule -- | Blowfish encrypt a Word using the current state of the key schedule
coreCryptoMutable :: Word64 -> IO Word64 cipherBlockMutable :: KeySchedule -> Word64 -> IO Word64
coreCryptoMutable input = doRound input 0 cipherBlockMutable (KeySchedule ma) input = doRound input 0
where doRound i roundIndex where
-- | Transform the input over 16 rounds
doRound i roundIndex
| roundIndex == 16 = do | roundIndex == 16 = do
pVal1 <- mutableArrayRead32 mctx 16 pVal1 <- mutableArrayRead32 ma 16
pVal2 <- mutableArrayRead32 mctx 17 pVal2 <- mutableArrayRead32 ma 17
let final = (fromIntegral pVal1 `shiftL` 32) .|. fromIntegral pVal2 let final = (fromIntegral pVal1 `shiftL` 32) .|. fromIntegral pVal2
return $ rotateL (i `xor` final) 32 return $ rotateL (i `xor` final) 32
| otherwise = do | otherwise = do
pVal <- mutableArrayRead32 mctx roundIndex pVal <- mutableArrayRead32 ma roundIndex
let newr = fromIntegral (i `shiftR` 32) `xor` pVal let newr = fromIntegral (i `shiftR` 32) `xor` pVal
newr' <- f newr newr' <- f newr
let newi = ((i `shiftL` 32) `xor` newr') .|. (fromIntegral newr) let newi = ((i `shiftL` 32) `xor` newr') .|. fromIntegral newr
doRound newi (roundIndex+1) doRound newi (roundIndex+1)
-- The Blowfish Feistel function F -- | The Blowfish Feistel function F
f :: Word32 -> IO Word64 f :: Word32 -> IO Word64
f t = do a <- mutableArrayRead32 mctx (s0 + fromIntegral ((t `shiftR` 24) .&. 0xff)) f t = do
b <- mutableArrayRead32 mctx (s1 + fromIntegral ((t `shiftR` 16) .&. 0xff)) a <- s0 (0xff .&. (t `shiftR` 24))
c <- mutableArrayRead32 mctx (s2 + fromIntegral ((t `shiftR` 8) .&. 0xff)) b <- s1 (0xff .&. (t `shiftR` 16))
d <- mutableArrayRead32 mctx (s3 + fromIntegral (t .&. 0xff)) c <- s2 (0xff .&. (t `shiftR` 8))
d <- s3 (0xff .&. t)
return (fromIntegral (((a + b) `xor` c) + d) `shiftL` 32) return (fromIntegral (((a + b) `xor` c) + d) `shiftL` 32)
where s0 = 18
s1 = 274 -- | S-Box arrays, each containing 256 32-bit words
s2 = 530 -- The first 18 words contain the P-Array of subkeys
s3 = 786 s0, s1, s2, s3 :: Word32 -> IO Word32
s0 i = mutableArrayRead32 ma (fromIntegral i + 18)
s1 i = mutableArrayRead32 ma (fromIntegral i + 274)
s2 i = mutableArrayRead32 ma (fromIntegral i + 530)
s3 i = mutableArrayRead32 ma (fromIntegral i + 786)
iterKeyStream :: (ByteArrayAccess x)
=> x
-> Word32
-> Word32
-> (Int -> Word32 -> Word32 -> Word32 -> Word32 -> (Word32 -> Word32 -> IO ()) -> IO ())
-> IO ()
iterKeyStream x a0 a1 g = f 0 0 a0 a1
where
len = B.length x
-- Avoiding the modulo operation when interating over the ring
-- buffer is assumed to be more efficient here. All other
-- implementations do this, too. The branch prediction shall prefer
-- the branch with the increment.
n j = if j + 1 >= len then 0 else j + 1
f i j0 b0 b1 = g i l r b0 b1 (f (i + 2) j8)
where
j1 = n j0
j2 = n j1
j3 = n j2
j4 = n j3
j5 = n j4
j6 = n j5
j7 = n j6
j8 = n j7
x0 = fromIntegral (B.index x j0)
x1 = fromIntegral (B.index x j1)
x2 = fromIntegral (B.index x j2)
x3 = fromIntegral (B.index x j3)
x4 = fromIntegral (B.index x j4)
x5 = fromIntegral (B.index x j5)
x6 = fromIntegral (B.index x j6)
x7 = fromIntegral (B.index x j7)
l = shiftL x0 24 .|. shiftL x1 16 .|. shiftL x2 8 .|. x3
r = shiftL x4 24 .|. shiftL x5 16 .|. shiftL x6 8 .|. x7
{-# INLINE iterKeyStream #-}
-- Benchmarking shows that GHC considers this function too big to inline
-- although forcing inlining causes an actual improvement.
-- It is assumed that all function calls (especially the continuation)
-- collapse into a tight loop after inlining.

View File

@ -52,10 +52,15 @@ module Crypto.KDF.BCrypt
) )
where where
import Control.Monad (unless, when) import Control.Monad (forM_, unless, when)
import Crypto.Cipher.Blowfish.Primitive (eksBlowfish, encrypt) import Crypto.Cipher.Blowfish.Primitive (Context, createKeySchedule,
encrypt, expandKey,
expandKeyWithSalt,
freezeKeySchedule)
import Crypto.Internal.Compat
import Crypto.Random (MonadRandom, getRandomBytes) import Crypto.Random (MonadRandom, getRandomBytes)
import Data.ByteArray (ByteArrayAccess, ByteArray, Bytes) import Data.ByteArray (ByteArray, ByteArrayAccess,
Bytes)
import qualified Data.ByteArray as B import qualified Data.ByteArray as B
import Data.ByteArray.Encoding import Data.ByteArray.Encoding
import Data.Char import Data.Char
@ -136,7 +141,7 @@ rawHash _ cost salt password = B.take 23 hash -- Another compatibility bug. Igno
-- Truncate the password if necessary and append a null byte for C compatibility -- Truncate the password if necessary and append a null byte for C compatibility
key = B.snoc (B.take 72 password) 0 key = B.snoc (B.take 72 password) 0
ctx = eksBlowfish cost salt key ctx = expensiveBlowfishContext key salt cost
-- The BCrypt plaintext: "OrpheanBeholderScryDoubt" -- The BCrypt plaintext: "OrpheanBeholderScryDoubt"
orpheanBeholder = B.pack [79,114,112,104,101,97,110,66,101,104,111,108,100,101,114,83,99,114,121,68,111,117,98,116] orpheanBeholder = B.pack [79,114,112,104,101,97,110,66,101,104,111,108,100,101,114,83,99,114,121,68,111,117,98,116]
@ -166,3 +171,19 @@ parseBCryptHash bc = do
salt <- convertFromBase Base64OpenBSD s salt <- convertFromBase Base64OpenBSD s
hash <- convertFromBase Base64OpenBSD h hash <- convertFromBase Base64OpenBSD h
return (salt, hash) return (salt, hash)
-- | Create a key schedule for the BCrypt "EKS" version.
--
-- Salt must be a 128-bit byte array.
-- Cost must be between 4 and 31 inclusive
-- See <https://www.usenix.org/conference/1999-usenix-annual-technical-conference/future-adaptable-password-scheme>
expensiveBlowfishContext :: (ByteArrayAccess key, ByteArrayAccess salt) => key-> salt -> Int -> Context
expensiveBlowfishContext keyBytes saltBytes cost
| B.length saltBytes /= 16 = error "bcrypt salt must be 16 bytes"
| otherwise = unsafeDoIO $ do
ks <- createKeySchedule
expandKeyWithSalt ks keyBytes saltBytes
forM_ [1..2^cost :: Int] $ \_ -> do
expandKey ks keyBytes
expandKey ks saltBytes
freezeKeySchedule ks