Make parallel upload also resume an existing upload
This commit is contained in:
parent
d17d6f216d
commit
688f326b6e
@ -43,6 +43,7 @@ library
|
|||||||
, cryptonite-conduit
|
, cryptonite-conduit
|
||||||
, data-default
|
, data-default
|
||||||
, exceptions
|
, exceptions
|
||||||
|
, extra
|
||||||
, filepath
|
, filepath
|
||||||
, http-client
|
, http-client
|
||||||
, http-conduit
|
, http-conduit
|
||||||
@ -87,6 +88,7 @@ test-suite minio-hs-test
|
|||||||
, cryptonite-conduit
|
, cryptonite-conduit
|
||||||
, data-default
|
, data-default
|
||||||
, exceptions
|
, exceptions
|
||||||
|
, extra
|
||||||
, filepath
|
, filepath
|
||||||
, http-client
|
, http-client
|
||||||
, http-conduit
|
, http-conduit
|
||||||
|
|||||||
@ -4,6 +4,7 @@ module Network.Minio.Data.Crypto
|
|||||||
, hashSHA256FromSource
|
, hashSHA256FromSource
|
||||||
|
|
||||||
, hashMD5
|
, hashMD5
|
||||||
|
, hashMD5FromSource
|
||||||
|
|
||||||
, hmacSHA256
|
, hmacSHA256
|
||||||
, hmacSHA256RawBS
|
, hmacSHA256RawBS
|
||||||
@ -32,6 +33,18 @@ hashSHA256FromSource src = do
|
|||||||
sinkSHA256Hash :: Monad m => C.Consumer ByteString m (Digest SHA256)
|
sinkSHA256Hash :: Monad m => C.Consumer ByteString m (Digest SHA256)
|
||||||
sinkSHA256Hash = sinkHash
|
sinkSHA256Hash = sinkHash
|
||||||
|
|
||||||
|
hashMD5 :: ByteString -> ByteString
|
||||||
|
hashMD5 = digestToBase16 . hashWith MD5
|
||||||
|
|
||||||
|
hashMD5FromSource :: Monad m => C.Producer m ByteString -> m ByteString
|
||||||
|
hashMD5FromSource src = do
|
||||||
|
digest <- src C.$$ sinkMD5Hash
|
||||||
|
return $ digestToBase16 digest
|
||||||
|
where
|
||||||
|
-- To help with type inference
|
||||||
|
sinkMD5Hash :: Monad m => C.Consumer ByteString m (Digest MD5)
|
||||||
|
sinkMD5Hash = sinkHash
|
||||||
|
|
||||||
hmacSHA256 :: ByteString -> ByteString -> HMAC SHA256
|
hmacSHA256 :: ByteString -> ByteString -> HMAC SHA256
|
||||||
hmacSHA256 message key = hmac key message
|
hmacSHA256 message key = hmac key message
|
||||||
|
|
||||||
@ -43,6 +56,3 @@ digestToBS = convert
|
|||||||
|
|
||||||
digestToBase16 :: ByteArrayAccess a => a -> ByteString
|
digestToBase16 :: ByteArrayAccess a => a -> ByteString
|
||||||
digestToBase16 = convertToBase Base16
|
digestToBase16 = convertToBase Base16
|
||||||
|
|
||||||
hashMD5 :: ByteString -> ByteString
|
|
||||||
hashMD5 = digestToBase16 . hashWith MD5
|
|
||||||
|
|||||||
@ -5,11 +5,14 @@ module Network.Minio.PutObject
|
|||||||
) where
|
) where
|
||||||
|
|
||||||
|
|
||||||
import qualified Data.Conduit as C
|
import Control.Monad.Extra (loopM)
|
||||||
import qualified Data.Conduit.Combinators as CC
|
import qualified Data.ByteString as B
|
||||||
import qualified Data.Conduit.Binary as CB
|
|
||||||
import qualified Data.List as List
|
|
||||||
import qualified Data.ByteString.Lazy as LB
|
import qualified Data.ByteString.Lazy as LB
|
||||||
|
import qualified Data.Conduit as C
|
||||||
|
import Data.Conduit.Binary (sourceHandleRange)
|
||||||
|
import qualified Data.Conduit.Binary as CB
|
||||||
|
import qualified Data.Conduit.Combinators as CC
|
||||||
|
import qualified Data.List as List
|
||||||
import qualified Data.Map.Strict as Map
|
import qualified Data.Map.Strict as Map
|
||||||
|
|
||||||
import Lib.Prelude
|
import Lib.Prelude
|
||||||
@ -47,6 +50,7 @@ data ObjectData m = ODFile FilePath (Maybe Int64) -- ^ Takes filepath and option
|
|||||||
-- | Put an object from ObjectData. This high-level API handles
|
-- | Put an object from ObjectData. This high-level API handles
|
||||||
-- objects of all sizes, and even if the object size is unknown.
|
-- objects of all sizes, and even if the object size is unknown.
|
||||||
putObject :: Bucket -> Object -> ObjectData Minio -> Minio ETag
|
putObject :: Bucket -> Object -> ObjectData Minio -> Minio ETag
|
||||||
|
putObject b o (ODStream src sizeMay) = sequentialMultipartUpload b o sizeMay src
|
||||||
putObject b o (ODFile fp sizeMay) = do
|
putObject b o (ODFile fp sizeMay) = do
|
||||||
hResE <- withNewHandle fp $ \h ->
|
hResE <- withNewHandle fp $ \h ->
|
||||||
liftM2 (,) (isHandleSeekable h) (getFileSize h)
|
liftM2 (,) (isHandleSeekable h) (getFileSize h)
|
||||||
@ -72,7 +76,6 @@ putObject b o (ODFile fp sizeMay) = do
|
|||||||
| isSeekable -> parallelMultipartUpload b o fp size
|
| isSeekable -> parallelMultipartUpload b o fp size
|
||||||
| otherwise -> sequentialMultipartUpload b o (Just size) $
|
| otherwise -> sequentialMultipartUpload b o (Just size) $
|
||||||
CB.sourceFile fp
|
CB.sourceFile fp
|
||||||
putObject b o (ODStream src sizeMay) = sequentialMultipartUpload b o sizeMay src
|
|
||||||
|
|
||||||
-- | Select part sizes - the logic is that the minimum part-size will
|
-- | Select part sizes - the logic is that the minimum part-size will
|
||||||
-- be 64MiB. TODO: write quickcheck tests.
|
-- be 64MiB. TODO: write quickcheck tests.
|
||||||
@ -85,16 +88,35 @@ selectPartSizes size = List.zip3 [1..] partOffsets partSizes
|
|||||||
partSizes = replicate (fromIntegral numParts) partSize ++ lastPart
|
partSizes = replicate (fromIntegral numParts) partSize ++ lastPart
|
||||||
partOffsets = List.scanl' (+) 0 partSizes
|
partOffsets = List.scanl' (+) 0 partSizes
|
||||||
|
|
||||||
|
-- returns partinfo if part is already uploaded.
|
||||||
|
checkUploadNeeded :: Payload -> PartNumber
|
||||||
|
-> Map.Map PartNumber ListPartInfo
|
||||||
|
-> Minio (Maybe PartInfo)
|
||||||
|
checkUploadNeeded payload n pmap = do
|
||||||
|
(md5hash, pSize) <- case payload of
|
||||||
|
PayloadBS bs -> return (hashMD5 bs, fromIntegral $ B.length bs)
|
||||||
|
PayloadH h off size -> liftM (, size) $
|
||||||
|
hashMD5FromSource $ sourceHandleRange h (Just $ fromIntegral off)
|
||||||
|
(Just $ fromIntegral size)
|
||||||
|
case Map.lookup n pmap of
|
||||||
|
Nothing -> return Nothing
|
||||||
|
Just (ListPartInfo _ etag size _) -> return $
|
||||||
|
bool Nothing (Just (PartInfo n etag)) $
|
||||||
|
md5hash == encodeUtf8 etag && size == pSize
|
||||||
|
|
||||||
parallelMultipartUpload :: Bucket -> Object -> FilePath -> Int64
|
parallelMultipartUpload :: Bucket -> Object -> FilePath -> Int64
|
||||||
-> Minio ETag
|
-> Minio ETag
|
||||||
parallelMultipartUpload b o filePath size = do
|
parallelMultipartUpload b o filePath size = do
|
||||||
|
(uidMay, pmap) <- getExistingUpload b o
|
||||||
|
|
||||||
|
-- get a new upload id if needed.
|
||||||
|
uploadId <- maybe (newMultipartUpload b o []) return uidMay
|
||||||
|
|
||||||
let partSizeInfo = selectPartSizes size
|
let partSizeInfo = selectPartSizes size
|
||||||
|
|
||||||
-- get new upload id.
|
|
||||||
uploadId <- newMultipartUpload b o []
|
|
||||||
|
|
||||||
-- perform upload with 10 threads
|
-- perform upload with 10 threads
|
||||||
uploadedPartsE <- limitedMapConcurrently 10 (uploadPart uploadId) partSizeInfo
|
uploadedPartsE <- limitedMapConcurrently 10
|
||||||
|
(uploadPart pmap uploadId) partSizeInfo
|
||||||
|
|
||||||
-- if there were any errors, rethrow exception.
|
-- if there were any errors, rethrow exception.
|
||||||
mapM_ throwM $ lefts uploadedPartsE
|
mapM_ throwM $ lefts uploadedPartsE
|
||||||
@ -102,20 +124,29 @@ parallelMultipartUpload b o filePath size = do
|
|||||||
-- if we get here, all parts were successfully uploaded.
|
-- if we get here, all parts were successfully uploaded.
|
||||||
completeMultipartUpload b o uploadId $ rights uploadedPartsE
|
completeMultipartUpload b o uploadId $ rights uploadedPartsE
|
||||||
where
|
where
|
||||||
uploadPart uploadId (partNum, offset, sz) = withNewHandle filePath $
|
uploadPart pmap uploadId (partNum, offset, sz) =
|
||||||
\h -> putObjectPart b o uploadId partNum [] $ PayloadH h offset sz
|
withNewHandle filePath $ \h -> do
|
||||||
|
let payload = PayloadH h offset sz
|
||||||
|
pInfoMay <- checkUploadNeeded payload partNum pmap
|
||||||
|
maybe
|
||||||
|
(putObjectPart b o uploadId partNum [] payload)
|
||||||
|
return pInfoMay
|
||||||
|
|
||||||
-- | Upload multipart object from conduit source sequentially
|
-- | Upload multipart object from conduit source sequentially
|
||||||
sequentialMultipartUpload :: Bucket -> Object -> Maybe Int64
|
sequentialMultipartUpload :: Bucket -> Object -> Maybe Int64
|
||||||
-> C.Producer Minio ByteString -> Minio ETag
|
-> C.Producer Minio ByteString -> Minio ETag
|
||||||
sequentialMultipartUpload b o sizeMay src = do
|
sequentialMultipartUpload b o sizeMay src = do
|
||||||
(uidMay, pinfos) <- getExistingUpload b o
|
(uidMay, pmap) <- getExistingUpload b o
|
||||||
|
|
||||||
-- get a new upload id if needed.
|
-- get a new upload id if needed.
|
||||||
uploadId <- maybe (newMultipartUpload b o []) return uidMay
|
uploadId <- maybe (newMultipartUpload b o []) return uidMay
|
||||||
|
|
||||||
-- upload parts in loop
|
-- upload parts in loop
|
||||||
uploadedParts <- loop pinfos uploadId rSrc partSizeInfo []
|
let
|
||||||
|
rSrc = C.newResumableSource src
|
||||||
|
partSizeInfo = selectPartSizes $ maybe maxObjectSize identity sizeMay
|
||||||
|
|
||||||
|
uploadedParts <- loopM (loopFunc pmap uploadId rSrc) (partSizeInfo, [])
|
||||||
|
|
||||||
-- complete multipart upload
|
-- complete multipart upload
|
||||||
completeMultipartUpload b o uploadId uploadedParts
|
completeMultipartUpload b o uploadId uploadedParts
|
||||||
@ -123,44 +154,32 @@ sequentialMultipartUpload b o sizeMay src = do
|
|||||||
rSrc = C.newResumableSource src
|
rSrc = C.newResumableSource src
|
||||||
partSizeInfo = selectPartSizes $ maybe maxObjectSize identity sizeMay
|
partSizeInfo = selectPartSizes $ maybe maxObjectSize identity sizeMay
|
||||||
|
|
||||||
-- returns partinfo if part is already uploaded.
|
|
||||||
checkUploadNeeded :: LByteString -> PartNumber
|
|
||||||
-> Map.Map PartNumber ListPartInfo
|
|
||||||
-> Maybe PartInfo
|
|
||||||
checkUploadNeeded lbs n pmap = do
|
|
||||||
pinfo@(ListPartInfo _ etag size _) <- Map.lookup n pmap
|
|
||||||
bool Nothing (return (PartInfo n etag)) $
|
|
||||||
LB.length lbs == size &&
|
|
||||||
hashMD5 (LB.toStrict lbs) == encodeUtf8 etag
|
|
||||||
|
|
||||||
-- make a sink that consumes only `s` bytes
|
-- make a sink that consumes only `s` bytes
|
||||||
limitedSink s = CB.isolate (fromIntegral s) C.=$= CB.sinkLbs
|
limitedSink s = CB.isolate (fromIntegral s) C.=$= CB.sinkLbs
|
||||||
|
|
||||||
-- FIXME: test, confirm and remove traceShowM statements
|
-- FIXME: test, confirm and remove traceShowM statements
|
||||||
loop _ _ _ [] uparts = return $ reverse uparts
|
loopFunc pmap uid rSource ([], uparts) = return $ Right $ reverse uparts
|
||||||
loop pinfos uid rSource ((partNum, _, size):ps) u = do
|
loopFunc pmap uid rSource (((partNum, _, size):ps), uparts) = do
|
||||||
|
|
||||||
-- load data from resume-able source into bytestring.
|
|
||||||
(newSource, buf) <- rSource C.$$++ (limitedSink size)
|
(newSource, buf) <- rSource C.$$++ (limitedSink size)
|
||||||
traceShowM "psize: "
|
traceShowM "psize: "
|
||||||
traceShowM (LB.length buf)
|
traceShowM (LB.length buf)
|
||||||
|
|
||||||
case checkUploadNeeded buf partNum pinfos of
|
let payload = PayloadBS $ LB.toStrict buf
|
||||||
Just pinfo -> loop pinfos uid newSource ps (pinfo:u)
|
partMay <- checkUploadNeeded payload partNum pmap
|
||||||
|
case partMay of
|
||||||
|
Just pinfo -> return $ Left (ps, pinfo:uparts)
|
||||||
Nothing -> do
|
Nothing -> do
|
||||||
pInfo <- putObjectPart b o uid partNum [] $
|
-- upload the part
|
||||||
PayloadBS $ LB.toStrict buf
|
pInfo <- putObjectPart b o uid partNum [] payload
|
||||||
|
|
||||||
if LB.length buf == size
|
if LB.length buf == size
|
||||||
-- upload the full size part.
|
then return $ Left (ps, pInfo:uparts)
|
||||||
then loop pinfos uid newSource ps (pInfo:u)
|
|
||||||
|
|
||||||
-- got a smaller part, so its the last one.
|
-- got a smaller part, so its the last one.
|
||||||
else do traceShowM (("Found a piece with length < than "::[Char]) ++ show size ++ " - uploading as last and quitting.")
|
else do traceShowM (("Found a piece with length < than "::[Char]) ++ show size ++ " - uploading as last and quitting.")
|
||||||
finalData <- newSource C.$$+- (limitedSink size)
|
finalData <- newSource C.$$+- (limitedSink size)
|
||||||
traceShowM "finalData size:"
|
traceShowM "finalData size:"
|
||||||
traceShowM (LB.length finalData)
|
traceShowM (LB.length finalData)
|
||||||
return $ reverse (pInfo:u)
|
return $ Right $ reverse (pInfo:uparts)
|
||||||
|
|
||||||
-- | Looks for incomplete uploads for an object. Returns the first one
|
-- | Looks for incomplete uploads for an object. Returns the first one
|
||||||
-- if there are many.
|
-- if there are many.
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user