309 lines
16 KiB
Haskell
309 lines
16 KiB
Haskell
-- SPDX-FileCopyrightText: 2022 Gregor Kleen <gregor.kleen@ifi.lmu.de>,Sarah Vaupel <sarah.vaupel@ifi.lmu.de>
|
|
--
|
|
-- SPDX-License-Identifier: AGPL-3.0-or-later
|
|
|
|
{-# OPTIONS_GHC -fno-warn-orphans #-}
|
|
|
|
module Handler.Utils.Files
|
|
( sourceFile, sourceFile'
|
|
, sourceFiles, sourceFiles'
|
|
, SourceFilesException(..), _SourceFilesMismatchedHashes, _SourceFilesContentUnavailable
|
|
, sourceFileDB, sourceFileChunks, sourceFileMinio
|
|
, acceptFile
|
|
, respondFileConditional
|
|
) where
|
|
|
|
import Import.NoFoundation hiding (First(..))
|
|
import Foundation.Type
|
|
import Foundation.DB
|
|
import Utils.Metrics
|
|
|
|
import Data.Monoid (First(..))
|
|
|
|
import qualified Data.Conduit.Combinators as C
|
|
import qualified Data.Conduit.List as C (unfoldM)
|
|
|
|
import Handler.Utils.Minio
|
|
import qualified Network.Minio as Minio
|
|
|
|
import qualified Database.Esqueleto.Legacy as E
|
|
import qualified Database.Esqueleto.Utils as E
|
|
|
|
import System.FilePath (normalise, makeValid)
|
|
import Data.List (dropWhileEnd)
|
|
|
|
import qualified Data.ByteString as ByteString
|
|
|
|
|
|
data SourceFilesException
|
|
= SourceFilesMismatchedHashes
|
|
| SourceFilesContentUnavailable
|
|
deriving (Eq, Ord, Read, Show, Generic)
|
|
deriving anyclass (Exception)
|
|
|
|
makePrisms ''SourceFilesException
|
|
|
|
|
|
fileChunkARC :: ( MonadHandler m
|
|
, HandlerSite m ~ UniWorX
|
|
)
|
|
=> Maybe Int
|
|
-> (FileContentChunkReference, (Int, Int))
|
|
-> m (Maybe (ByteString, Maybe FileChunkStorage))
|
|
-> m (Maybe ByteString)
|
|
fileChunkARC altSize k@(ref, (s, l)) getChunkDB' = do
|
|
prewarm <- getsYesod appFileSourcePrewarm
|
|
let getChunkDB = case prewarm of
|
|
Nothing -> do
|
|
chunk' <- getChunkDB'
|
|
for chunk' $ \(chunk, mStorage) -> chunk <$ do
|
|
$logDebugS "fileChunkARC" "No prewarm"
|
|
for_ mStorage $ \storage ->
|
|
let w = length chunk
|
|
in liftIO $ observeSourcedChunk storage w
|
|
Just lh -> do
|
|
chunkRes <- lookupLRUHandle lh k
|
|
case chunkRes of
|
|
Just (chunk, w) -> Just chunk <$ do
|
|
$logDebugS "fileChunkARC" "Prewarm hit"
|
|
liftIO $ observeSourcedChunk StoragePrewarm w
|
|
Nothing -> do
|
|
chunk' <- getChunkDB'
|
|
for chunk' $ \(chunk, mStorage) -> chunk <$ do
|
|
$logDebugS "fileChunkARC" "Prewarm miss"
|
|
for_ mStorage $ \storage ->
|
|
let w = length chunk
|
|
in liftIO $ observeSourcedChunk storage w
|
|
|
|
arc <- getsYesod appFileSourceARC
|
|
case arc of
|
|
Nothing -> getChunkDB
|
|
Just ah -> do
|
|
cachedARC' ah k $ \case
|
|
Nothing -> do
|
|
chunk' <- case assertM (> l) altSize of
|
|
-- This optimization works for the somewhat common case that cdc chunks are smaller than db chunks and start of the requested range is aligned with a db chunk boundary
|
|
Just altSize'
|
|
-> fmap getFirst . execWriterT . cachedARC' ah (ref, (s, altSize')) $ \x -> x <$ case x of
|
|
Nothing -> tellM $ First <$> getChunkDB
|
|
Just (v, _) -> tell . First . Just $ ByteString.take l v
|
|
Nothing -> getChunkDB
|
|
for chunk' $ \chunk -> do
|
|
let w = length chunk
|
|
$logDebugS "fileChunkARC" "ARC miss"
|
|
return (chunk, w)
|
|
Just x@(_, w) -> do
|
|
$logDebugS "fileChunkARC" "ARC hit"
|
|
liftIO $ Just x <$ observeSourcedChunk StorageARC w
|
|
|
|
|
|
|
|
sourceFileDB :: forall m.
|
|
(MonadCatch m, MonadHandler m, HandlerSite m ~ UniWorX, MonadUnliftIO m)
|
|
=> FileContentReference -> ConduitT () ByteString (SqlPersistT m) ()
|
|
sourceFileDB fileReference = chunkHashes
|
|
.| awaitForever (sourceFileChunks (const $ over (mapped . mapped . _2) Just) . E.unValue)
|
|
.| C.map (view _1)
|
|
where
|
|
chunkHashes :: ConduitT () (E.Value FileContentChunkReference) (SqlPersistT m) ()
|
|
chunkHashes = E.selectSource . E.from $ \fileContentEntry -> do
|
|
E.where_ $ fileContentEntry E.^. FileContentEntryHash E.==. E.val fileReference
|
|
E.orderBy [ E.asc $ fileContentEntry E.^. FileContentEntryIx ]
|
|
return $ fileContentEntry E.^. FileContentEntryChunkHash
|
|
|
|
sourceFileChunks :: (MonadCatch m, MonadHandler m, HandlerSite m ~ UniWorX, BackendCompatible SqlReadBackend backend, MonadUnliftIO m)
|
|
=> ((Int, Int) -> ReaderT SqlReadBackend m (Maybe (ByteString, FileChunkStorage)) -> ReaderT SqlReadBackend m (Maybe (ByteString, Maybe FileChunkStorage))) -> FileContentChunkReference -> ConduitT i (ByteString, (Int, Int)) (ReaderT backend m) ()
|
|
sourceFileChunks cont chunkHash = transPipe (withReaderT $ projectBackend @SqlReadBackend) $ do
|
|
dbChunksize <- getsYesod $ view _appFileUploadDBChunksize
|
|
let dbRetrieveChunk = \case
|
|
Nothing -> return Nothing
|
|
Just start -> do
|
|
let getChunkDB = cont (start, dbChunksize) . runMaybeT $
|
|
let getChunkDB' = MaybeT . fmap (fmap $ (, StorageDB) . E.unValue) . E.selectMaybe . E.from $ \fileContentChunk -> do
|
|
E.where_ $ fileContentChunk E.^. FileContentChunkHash E.==. E.val chunkHash
|
|
return $ E.substring (fileContentChunk E.^. FileContentChunkContent) (E.val start) (E.val dbChunksize)
|
|
getChunkMinio = fmap (, StorageMinio) . catchIfMaybeT (is _SourceFilesContentUnavailable) . runConduit $ sourceMinio (Left chunkHash) (Just $ ByteRangeFromTo (fromIntegral $ pred start) (fromIntegral . pred $ pred start + dbChunksize)) .| C.fold
|
|
in getChunkDB' <|> getChunkMinio
|
|
chunk <- fileChunkARC Nothing (chunkHash, (start, dbChunksize)) getChunkDB
|
|
case chunk of
|
|
Just c | olength c <= 0 -> return Nothing
|
|
Just c -> do
|
|
return . Just . ((c, (start, dbChunksize)), ) $ if
|
|
| olength c >= dbChunksize -> Just $ start + dbChunksize
|
|
| otherwise -> Nothing
|
|
Nothing -> throwM SourceFilesContentUnavailable
|
|
|
|
C.unfoldM dbRetrieveChunk $ Just (1 :: Int)
|
|
|
|
sourceMinio :: (MonadThrow m, MonadHandler m, HandlerSite m ~ UniWorX, MonadUnliftIO m)
|
|
=> Either FileContentChunkReference FileContentReference
|
|
-> Maybe ByteRange
|
|
-> ConduitT i ByteString m ()
|
|
sourceMinio fileReference mRange = do
|
|
chunkVar <- newEmptyTMVarIO
|
|
minioAsync <- lift . allocateLinkedAsync $
|
|
maybeT (throwM SourceFilesContentUnavailable) $ do
|
|
let uploadName = either (review minioFileChunkReference) (review minioFileReference) fileReference
|
|
uploadBucket <- getsYesod $ views appSettings appUploadCacheBucket
|
|
hoistMaybe <=< runAppMinio . runMaybeT $ do
|
|
objRes <- catchIfMaybeT minioIsDoesNotExist $ Minio.getObject uploadBucket uploadName Minio.defaultGetObjectOptions{ Minio.gooRange = mRange }
|
|
lift . runConduit $ Minio.gorObjectStream objRes .| C.mapM_ (atomically . putTMVar chunkVar)
|
|
let go = do
|
|
mChunk <- atomically $ Right <$> takeTMVar chunkVar
|
|
<|> Left <$> waitCatchSTM minioAsync
|
|
case mChunk of
|
|
Right chunk -> do
|
|
observeSourcedChunk StorageMinio $ olength chunk
|
|
yield chunk
|
|
go
|
|
Left (Right ()) -> return ()
|
|
Left (Left exc) -> throwM exc
|
|
in go
|
|
|
|
sourceFileMinio :: (MonadThrow m, MonadHandler m, HandlerSite m ~ UniWorX, MonadUnliftIO m)
|
|
=> FileContentReference -> ConduitT () ByteString m ()
|
|
sourceFileMinio fileContent = sourceMinio (Right fileContent) Nothing
|
|
|
|
sourceFiles :: (Monad m, YesodPersistBackend UniWorX ~ SqlBackend) => ConduitT FileReference DBFile m ()
|
|
sourceFiles = C.map sourceFile
|
|
|
|
sourceFile :: YesodPersistBackend UniWorX ~ SqlBackend
|
|
=> FileReference -> DBFile
|
|
sourceFile FileReference{..} = File
|
|
{ fileTitle = fileReferenceTitle
|
|
, fileModified = fileReferenceModified
|
|
, fileContent = toFileContent <$> fileReferenceContent
|
|
}
|
|
where
|
|
toFileContent fileReference
|
|
| fileReference == $$(liftTyped $ FileContentReference $$(emptyHash))
|
|
= return ()
|
|
toFileContent fileReference = do
|
|
inDB <- lift . E.selectExists . E.from $ \fileContentEntry -> E.where_ $ fileContentEntry E.^. FileContentEntryHash E.==. E.val fileReference
|
|
bool sourceFileMinio sourceFileDB inDB fileReference
|
|
|
|
sourceFiles' :: forall file m. (HasFileReference file, Monad m, YesodPersistBackend UniWorX ~ SqlBackend) => ConduitT file DBFile m ()
|
|
sourceFiles' = C.map sourceFile'
|
|
|
|
sourceFile' :: forall file. (HasFileReference file, YesodPersistBackend UniWorX ~ SqlBackend) => file -> DBFile
|
|
sourceFile' = sourceFile . view (_FileReference . _1)
|
|
|
|
|
|
instance (YesodMail UniWorX, YesodPersistBackend UniWorX ~ SqlBackend) => ToMailPart UniWorX FileReference where
|
|
toMailPart = toMailPart <=< liftHandler . runDBRead . withReaderT projectBackend . toPureFile . sourceFile'
|
|
|
|
|
|
respondFileConditional :: (MonadThrow m, MonadHandler m, HandlerSite m ~ UniWorX, YesodPersistBackend UniWorX ~ SqlBackend, YesodPersistRunner UniWorX)
|
|
=> Maybe UTCTime -> MimeType
|
|
-> FileReference
|
|
-> SqlPersistT m (HandlerFor UniWorX a)
|
|
respondFileConditional representationLastModified cType FileReference{..} = do
|
|
if
|
|
| Just fileContent <- fileReferenceContent
|
|
, fileContent == $$(liftTyped $ FileContentReference $$(emptyHash))
|
|
-> return . respondSourceConditional @ByteRangesSpecifier condInfo cType . Left $ (return () :: ConduitT () ByteString _ ())
|
|
| Just fileContent <- fileReferenceContent -> do
|
|
dbManifest <- fmap fromNullable . E.select . E.from $ \(fileContentEntry `E.LeftOuterJoin` fileContentChunk) -> do
|
|
E.on $ E.just (fileContentEntry E.^. FileContentEntryChunkHash) E.==. fileContentChunk E.?. FileContentChunkHash
|
|
E.where_ $ fileContentEntry E.^. FileContentEntryHash E.==. E.val fileContent
|
|
E.orderBy [E.asc $ fileContentEntry E.^. FileContentEntryIx ]
|
|
return ( fileContentEntry E.^. FileContentEntryChunkHash
|
|
, E.maybe E.nothing (E.just . E.length_) $ fileContentChunk E.?. FileContentChunkContent
|
|
)
|
|
case dbManifest of
|
|
Nothing -> do
|
|
uploadBucket <- getsYesod $ views appSettings appUploadCacheBucket
|
|
let uploadName = minioFileReference # fileContent
|
|
statRes <- maybeT (throwM SourceFilesContentUnavailable) . (hoistMaybe <=< runAppMinio) . runMaybeT $ do
|
|
catchIfMaybeT minioIsDoesNotExist $ Minio.statObject uploadBucket uploadName Minio.defaultGetObjectOptions
|
|
let iLength = fromIntegral $ Minio.oiSize statRes
|
|
respondSourceConditional condInfo cType . Right $ \byteRange ->
|
|
let (byteRange', respRange) = byteRangeSpecificationToMinio iLength byteRange
|
|
in ( sourceMinio (Right fileContent) $ Just byteRange'
|
|
, ByteContentRangeSpecification (Just respRange) (Just iLength)
|
|
)
|
|
Just (toNullable -> dbManifest') -> do
|
|
dbManifest'' <- forM dbManifest' $ \(E.Value chunkHash, E.Value mChunkLength) -> case mChunkLength of
|
|
Just chunkLength -> return (chunkHash, chunkLength)
|
|
Nothing -> throwM SourceFilesContentUnavailable
|
|
let iLength = sumOf (folded . _2) dbManifest''
|
|
respondSourceDBConditional condInfo cType . Right $ \byteRange ->
|
|
let (byteFrom, byteTo) = case byteRange of
|
|
ByteRangeSpecification f Nothing -> (min (pred iLength) f, pred iLength)
|
|
ByteRangeSpecification f (Just t) -> (min (pred iLength) f, min (pred iLength) t)
|
|
ByteRangeSuffixSpecification s -> (iLength - min (pred iLength) s, pred iLength)
|
|
relevantChunks = view _2 $ foldl' go (0, []) dbManifest''
|
|
where go :: (Word64, [(FileContentChunkReference, Word64, Word64)])
|
|
-> (FileContentChunkReference, Word64)
|
|
-> (Word64, [(FileContentChunkReference, Word64, Word64)])
|
|
go (lengthBefore, acc) (cChunk, cLength)
|
|
= ( lengthBefore + cLength
|
|
, if
|
|
| byteFrom < lengthBefore + cLength, byteTo >= lengthBefore
|
|
-> let cChunk' = ( cChunk
|
|
, bool 0 (byteFrom - lengthBefore) $ byteFrom >= lengthBefore
|
|
, bool cLength (cLength - pred (lengthBefore + cLength - byteTo)) $ byteTo < lengthBefore + cLength
|
|
)
|
|
in acc ++ pure cChunk'
|
|
| otherwise
|
|
-> acc
|
|
)
|
|
in ( do
|
|
dbChunksize <- getsYesod $ views _appFileUploadDBChunksize fromIntegral
|
|
forM_ relevantChunks $ \(chunkHash, offset, cLength)
|
|
-> let retrieveChunk = \case
|
|
Just (start, cLength') | cLength' > 0 -> do
|
|
let getChunkDB = fmap (fmap $ (, Just StorageDB) . E.unValue) . E.selectMaybe . E.from $ \fileContentChunk -> do
|
|
E.where_ $ fileContentChunk E.^. FileContentChunkHash E.==. E.val chunkHash
|
|
return $ E.substring (fileContentChunk E.^. FileContentChunkContent) (E.val start) (E.val $ min cLength' dbChunksize)
|
|
chunk <- fileChunkARC (Just $ fromIntegral dbChunksize) (chunkHash, (fromIntegral start, fromIntegral $ min cLength' dbChunksize)) getChunkDB
|
|
case chunk of
|
|
Nothing -> throwM SourceFilesContentUnavailable
|
|
Just c -> do
|
|
return . Just . (c, ) $ if
|
|
| fromIntegral (olength c) >= min cLength' dbChunksize
|
|
-> Just (start + dbChunksize, cLength' - fromIntegral (olength c))
|
|
| otherwise
|
|
-> Nothing
|
|
_other -> return Nothing
|
|
in C.unfoldM retrieveChunk . Just $ (succ offset, cLength)
|
|
, ByteContentRangeSpecification (Just $ ByteRangeResponseSpecification byteFrom byteTo) (Just iLength)
|
|
)
|
|
| otherwise -> throwM SourceFilesContentUnavailable
|
|
|
|
| otherwise
|
|
-> return $ sendResponseStatus noContent204 ()
|
|
where
|
|
condInfo = RepresentationConditionalInformation
|
|
{ representationETag = review etagFileReference <$> fileReferenceContent
|
|
, representationLastModified
|
|
, representationExists = True
|
|
, requestedActionAlreadySucceeded = Nothing
|
|
}
|
|
|
|
byteRangeSpecificationToMinio :: Word64 -> ByteRangeSpecification -> (ByteRange, ByteRangeResponseSpecification)
|
|
byteRangeSpecificationToMinio iLength byteRange = (byteRange', respRange)
|
|
where
|
|
byteRange' = case byteRange of
|
|
ByteRangeSpecification f Nothing -> ByteRangeFrom (fromIntegral $ min (pred iLength) f)
|
|
ByteRangeSpecification f (Just t) -> ByteRangeFromTo (fromIntegral $ min iLength f) (fromIntegral $ min (pred iLength) t)
|
|
ByteRangeSuffixSpecification s -> ByteRangeSuffix (fromIntegral $ min iLength s)
|
|
respRange = case byteRange of
|
|
ByteRangeSpecification f Nothing -> ByteRangeResponseSpecification (min (pred iLength) f) (pred iLength)
|
|
ByteRangeSpecification f (Just t) -> ByteRangeResponseSpecification (min (pred iLength) f) (min (pred iLength) t)
|
|
ByteRangeSuffixSpecification s -> ByteRangeResponseSpecification (iLength - min (pred iLength) s) (pred iLength)
|
|
|
|
|
|
acceptFile :: (MonadResource m, MonadResource m') => FileInfo -> m (File m')
|
|
acceptFile fInfo = do
|
|
let fileTitle = "." <//> unpack (fileName fInfo)
|
|
& normalise
|
|
& makeValid
|
|
& dropWhile isPathSeparator
|
|
& dropWhileEnd isPathSeparator
|
|
& normalise
|
|
& makeValid
|
|
fileContent = Just $ fileSource fInfo
|
|
fileModified <- liftIO getCurrentTime
|
|
return File{..}
|