minio-hs/src/Network/Minio/PutObject.hs
2017-02-13 16:03:42 +05:30

194 lines
7.5 KiB
Haskell

module Network.Minio.PutObject
(
putObject
, ObjectData(..)
) where
import Control.Monad.Extra (loopM)
import qualified Data.ByteString as B
import qualified Data.ByteString.Lazy as LB
import qualified Data.Conduit as C
import Data.Conduit.Binary (sourceHandleRange)
import qualified Data.Conduit.Binary as CB
import qualified Data.Conduit.Combinators as CC
import qualified Data.List as List
import qualified Data.Map.Strict as Map
import Lib.Prelude
import Network.Minio.Data
import Network.Minio.Data.Crypto
import Network.Minio.ListOps
import Network.Minio.S3API
import Network.Minio.Utils
-- | max obj size is 5TiB
maxObjectSize :: Int64
maxObjectSize = 5 * 1024 * 1024 * oneMiB
oneMiB :: Int64
oneMiB = 1024 * 1024
maxMultipartParts :: Int64
maxMultipartParts = 10000
-- | A data-type to represent the source data for an object. A
-- file-path or a producer-conduit may be provided.
--
-- For files, a size may be provided - this is useful in cases when
-- the file size cannot be automatically determined or if only some
-- prefix of the file is desired.
--
-- For streams also, a size may be provided. This is useful to limit
-- the input - if it is not provided, upload will continue until the
-- stream ends or the object reaches `maxObjectsize` size.
data ObjectData m = ODFile FilePath (Maybe Int64) -- ^ Takes filepath and optional size.
| ODStream (C.Producer m ByteString) (Maybe Int64) -- ^ Pass size in bytes as maybe if known.
-- | Put an object from ObjectData. This high-level API handles
-- objects of all sizes, and even if the object size is unknown.
putObject :: Bucket -> Object -> ObjectData Minio -> Minio ETag
putObject b o (ODStream src sizeMay) = sequentialMultipartUpload b o sizeMay src
putObject b o (ODFile fp sizeMay) = do
hResE <- withNewHandle fp $ \h ->
liftM2 (,) (isHandleSeekable h) (getFileSize h)
(isSeekable, handleSizeMay) <- either (const $ return (False, Nothing)) return
hResE
-- prefer given size to queried size.
let finalSizeMay = listToMaybe $ catMaybes [sizeMay, handleSizeMay]
case finalSizeMay of
-- unable to get size, so assume non-seekable file and max-object size
Nothing -> sequentialMultipartUpload b o (Just maxObjectSize) $
CB.sourceFile fp
-- got file size, so check for single/multipart upload
Just size ->
if | size <= 64 * oneMiB -> do
resE <- withNewHandle fp (\h -> putObjectSingle b o [] h 0 size)
either throwM return resE
| size > maxObjectSize -> throwM $ ValidationError $
MErrVPutSizeExceeded size
| isSeekable -> parallelMultipartUpload b o fp size
| otherwise -> sequentialMultipartUpload b o (Just size) $
CB.sourceFile fp
-- | Select part sizes - the logic is that the minimum part-size will
-- be 64MiB. TODO: write quickcheck tests.
selectPartSizes :: Int64 -> [(PartNumber, Int64, Int64)]
selectPartSizes size = List.zip3 [1..] partOffsets partSizes
where
partSize = max (64 * oneMiB) (size `div` maxMultipartParts)
(numParts, lastPartSize) = size `divMod` partSize
lastPart = filter (> 0) [lastPartSize]
partSizes = replicate (fromIntegral numParts) partSize ++ lastPart
partOffsets = List.scanl' (+) 0 partSizes
-- returns partinfo if part is already uploaded.
checkUploadNeeded :: Payload -> PartNumber
-> Map.Map PartNumber ListPartInfo
-> Minio (Maybe PartInfo)
checkUploadNeeded payload n pmap = do
(md5hash, pSize) <- case payload of
PayloadBS bs -> return (hashMD5 bs, fromIntegral $ B.length bs)
PayloadH h off size -> liftM (, size) $
hashMD5FromSource $ sourceHandleRange h (Just $ fromIntegral off)
(Just $ fromIntegral size)
case Map.lookup n pmap of
Nothing -> return Nothing
Just (ListPartInfo _ etag size _) -> return $
bool Nothing (Just (PartInfo n etag)) $
md5hash == encodeUtf8 etag && size == pSize
parallelMultipartUpload :: Bucket -> Object -> FilePath -> Int64
-> Minio ETag
parallelMultipartUpload b o filePath size = do
(uidMay, pmap) <- getExistingUpload b o
-- get a new upload id if needed.
uploadId <- maybe (newMultipartUpload b o []) return uidMay
let partSizeInfo = selectPartSizes size
-- perform upload with 10 threads
uploadedPartsE <- limitedMapConcurrently 10
(uploadPart pmap uploadId) partSizeInfo
-- if there were any errors, rethrow exception.
mapM_ throwM $ lefts uploadedPartsE
-- if we get here, all parts were successfully uploaded.
completeMultipartUpload b o uploadId $ rights uploadedPartsE
where
uploadPart pmap uploadId (partNum, offset, sz) =
withNewHandle filePath $ \h -> do
let payload = PayloadH h offset sz
pInfoMay <- checkUploadNeeded payload partNum pmap
maybe
(putObjectPart b o uploadId partNum [] payload)
return pInfoMay
-- | Upload multipart object from conduit source sequentially
sequentialMultipartUpload :: Bucket -> Object -> Maybe Int64
-> C.Producer Minio ByteString -> Minio ETag
sequentialMultipartUpload b o sizeMay src = do
(uidMay, pmap) <- getExistingUpload b o
-- get a new upload id if needed.
uploadId <- maybe (newMultipartUpload b o []) return uidMay
-- upload parts in loop
let
rSrc = C.newResumableSource src
partSizeInfo = selectPartSizes $ maybe maxObjectSize identity sizeMay
uploadedParts <- loopM (loopFunc pmap uploadId rSrc) (partSizeInfo, [])
-- complete multipart upload
completeMultipartUpload b o uploadId uploadedParts
where
rSrc = C.newResumableSource src
partSizeInfo = selectPartSizes $ maybe maxObjectSize identity sizeMay
-- make a sink that consumes only `s` bytes
limitedSink s = CB.isolate (fromIntegral s) C.=$= CB.sinkLbs
-- FIXME: test, confirm and remove traceShowM statements
loopFunc pmap uid rSource ([], uparts) = return $ Right $ reverse uparts
loopFunc pmap uid rSource (((partNum, _, size):ps), uparts) = do
(newSource, buf) <- rSource C.$$++ (limitedSink size)
traceShowM "psize: "
traceShowM (LB.length buf)
let payload = PayloadBS $ LB.toStrict buf
partMay <- checkUploadNeeded payload partNum pmap
case partMay of
Just pinfo -> return $ Left (ps, pinfo:uparts)
Nothing -> do
-- upload the part
pInfo <- putObjectPart b o uid partNum [] payload
if LB.length buf == size
then return $ Left (ps, pInfo:uparts)
-- got a smaller part, so its the last one.
else do traceShowM (("Found a piece with length < than "::[Char]) ++ show size ++ " - uploading as last and quitting.")
finalData <- newSource C.$$+- (limitedSink size)
traceShowM "finalData size:"
traceShowM (LB.length finalData)
return $ Right $ reverse (pInfo:uparts)
-- | Looks for incomplete uploads for an object. Returns the first one
-- if there are many.
getExistingUpload :: Bucket -> Object
-> Minio (Maybe UploadId, Map.Map PartNumber ListPartInfo)
getExistingUpload b o = do
uidMay <- (fmap . fmap) uiUploadId $
listIncompleteUploads b (Just o) False C.$$ CC.head
parts <- maybe (return [])
(\uid -> listIncompleteParts b o uid C.$$ CC.sinkList) uidMay
return (uidMay, Map.fromList $ map (\p -> (piNumber p, p)) parts)