fradrive/src/Handler/Utils/Files.hs

309 lines
16 KiB
Haskell

-- SPDX-FileCopyrightText: 2022 Gregor Kleen <gregor.kleen@ifi.lmu.de>,Sarah Vaupel <sarah.vaupel@ifi.lmu.de>
--
-- SPDX-License-Identifier: AGPL-3.0-or-later
{-# OPTIONS_GHC -fno-warn-orphans #-}
module Handler.Utils.Files
( sourceFile, sourceFile'
, sourceFiles, sourceFiles'
, SourceFilesException(..), _SourceFilesMismatchedHashes, _SourceFilesContentUnavailable
, sourceFileDB, sourceFileChunks, sourceFileMinio
, acceptFile
, respondFileConditional
) where
import Import.NoFoundation hiding (First(..))
import Foundation.Type
import Foundation.DB
import Utils.Metrics
import Data.Monoid (First(..))
import qualified Data.Conduit.Combinators as C
import qualified Data.Conduit.List as C (unfoldM)
import Handler.Utils.Minio
import qualified Network.Minio as Minio
import qualified Database.Esqueleto.Legacy as E
import qualified Database.Esqueleto.Utils as E
import System.FilePath (normalise, makeValid)
import Data.List (dropWhileEnd)
import qualified Data.ByteString as ByteString
data SourceFilesException
= SourceFilesMismatchedHashes
| SourceFilesContentUnavailable
deriving (Eq, Ord, Read, Show, Generic)
deriving anyclass (Exception)
makePrisms ''SourceFilesException
fileChunkARC :: ( MonadHandler m
, HandlerSite m ~ UniWorX
)
=> Maybe Int
-> (FileContentChunkReference, (Int, Int))
-> m (Maybe (ByteString, Maybe FileChunkStorage))
-> m (Maybe ByteString)
fileChunkARC altSize k@(ref, (s, l)) getChunkDB' = do
prewarm <- getsYesod appFileSourcePrewarm
let getChunkDB = case prewarm of
Nothing -> do
chunk' <- getChunkDB'
for chunk' $ \(chunk, mStorage) -> chunk <$ do
$logDebugS "fileChunkARC" "No prewarm"
for_ mStorage $ \storage ->
let w = length chunk
in liftIO $ observeSourcedChunk storage w
Just lh -> do
chunkRes <- lookupLRUHandle lh k
case chunkRes of
Just (chunk, w) -> Just chunk <$ do
$logDebugS "fileChunkARC" "Prewarm hit"
liftIO $ observeSourcedChunk StoragePrewarm w
Nothing -> do
chunk' <- getChunkDB'
for chunk' $ \(chunk, mStorage) -> chunk <$ do
$logDebugS "fileChunkARC" "Prewarm miss"
for_ mStorage $ \storage ->
let w = length chunk
in liftIO $ observeSourcedChunk storage w
arc <- getsYesod appFileSourceARC
case arc of
Nothing -> getChunkDB
Just ah -> do
cachedARC' ah k $ \case
Nothing -> do
chunk' <- case assertM (> l) altSize of
-- This optimization works for the somewhat common case that cdc chunks are smaller than db chunks and start of the requested range is aligned with a db chunk boundary
Just altSize'
-> fmap getFirst . execWriterT . cachedARC' ah (ref, (s, altSize')) $ \x -> x <$ case x of
Nothing -> tellM $ First <$> getChunkDB
Just (v, _) -> tell . First . Just $ ByteString.take l v
Nothing -> getChunkDB
for chunk' $ \chunk -> do
let w = length chunk
$logDebugS "fileChunkARC" "ARC miss"
return (chunk, w)
Just x@(_, w) -> do
$logDebugS "fileChunkARC" "ARC hit"
liftIO $ Just x <$ observeSourcedChunk StorageARC w
sourceFileDB :: forall m.
(MonadCatch m, MonadHandler m, HandlerSite m ~ UniWorX, MonadUnliftIO m)
=> FileContentReference -> ConduitT () ByteString (SqlPersistT m) ()
sourceFileDB fileReference = chunkHashes
.| awaitForever (sourceFileChunks (const $ over (mapped . mapped . _2) Just) . E.unValue)
.| C.map (view _1)
where
chunkHashes :: ConduitT () (E.Value FileContentChunkReference) (SqlPersistT m) ()
chunkHashes = E.selectSource . E.from $ \fileContentEntry -> do
E.where_ $ fileContentEntry E.^. FileContentEntryHash E.==. E.val fileReference
E.orderBy [ E.asc $ fileContentEntry E.^. FileContentEntryIx ]
return $ fileContentEntry E.^. FileContentEntryChunkHash
sourceFileChunks :: (MonadCatch m, MonadHandler m, HandlerSite m ~ UniWorX, BackendCompatible SqlReadBackend backend, MonadUnliftIO m)
=> ((Int, Int) -> ReaderT SqlReadBackend m (Maybe (ByteString, FileChunkStorage)) -> ReaderT SqlReadBackend m (Maybe (ByteString, Maybe FileChunkStorage))) -> FileContentChunkReference -> ConduitT i (ByteString, (Int, Int)) (ReaderT backend m) ()
sourceFileChunks cont chunkHash = transPipe (withReaderT $ projectBackend @SqlReadBackend) $ do
dbChunksize <- getsYesod $ view _appFileUploadDBChunksize
let dbRetrieveChunk = \case
Nothing -> return Nothing
Just start -> do
let getChunkDB = cont (start, dbChunksize) . runMaybeT $
let getChunkDB' = MaybeT . fmap (fmap $ (, StorageDB) . E.unValue) . E.selectMaybe . E.from $ \fileContentChunk -> do
E.where_ $ fileContentChunk E.^. FileContentChunkHash E.==. E.val chunkHash
return $ E.substring (fileContentChunk E.^. FileContentChunkContent) (E.val start) (E.val dbChunksize)
getChunkMinio = fmap (, StorageMinio) . catchIfMaybeT (is _SourceFilesContentUnavailable) . runConduit $ sourceMinio (Left chunkHash) (Just $ ByteRangeFromTo (fromIntegral $ pred start) (fromIntegral . pred $ pred start + dbChunksize)) .| C.fold
in getChunkDB' <|> getChunkMinio
chunk <- fileChunkARC Nothing (chunkHash, (start, dbChunksize)) getChunkDB
case chunk of
Just c | olength c <= 0 -> return Nothing
Just c -> do
return . Just . ((c, (start, dbChunksize)), ) $ if
| olength c >= dbChunksize -> Just $ start + dbChunksize
| otherwise -> Nothing
Nothing -> throwM SourceFilesContentUnavailable
C.unfoldM dbRetrieveChunk $ Just (1 :: Int)
sourceMinio :: (MonadThrow m, MonadHandler m, HandlerSite m ~ UniWorX, MonadUnliftIO m)
=> Either FileContentChunkReference FileContentReference
-> Maybe ByteRange
-> ConduitT i ByteString m ()
sourceMinio fileReference mRange = do
chunkVar <- newEmptyTMVarIO
minioAsync <- lift . allocateLinkedAsync $
maybeT (throwM SourceFilesContentUnavailable) $ do
let uploadName = either (review minioFileChunkReference) (review minioFileReference) fileReference
uploadBucket <- getsYesod $ views appSettings appUploadCacheBucket
hoistMaybe <=< runAppMinio . runMaybeT $ do
objRes <- catchIfMaybeT minioIsDoesNotExist $ Minio.getObject uploadBucket uploadName Minio.defaultGetObjectOptions{ Minio.gooRange = mRange }
lift . runConduit $ Minio.gorObjectStream objRes .| C.mapM_ (atomically . putTMVar chunkVar)
let go = do
mChunk <- atomically $ Right <$> takeTMVar chunkVar
<|> Left <$> waitCatchSTM minioAsync
case mChunk of
Right chunk -> do
observeSourcedChunk StorageMinio $ olength chunk
yield chunk
go
Left (Right ()) -> return ()
Left (Left exc) -> throwM exc
in go
sourceFileMinio :: (MonadThrow m, MonadHandler m, HandlerSite m ~ UniWorX, MonadUnliftIO m)
=> FileContentReference -> ConduitT () ByteString m ()
sourceFileMinio fileContent = sourceMinio (Right fileContent) Nothing
sourceFiles :: (Monad m, YesodPersistBackend UniWorX ~ SqlBackend) => ConduitT FileReference DBFile m ()
sourceFiles = C.map sourceFile
sourceFile :: YesodPersistBackend UniWorX ~ SqlBackend
=> FileReference -> DBFile
sourceFile FileReference{..} = File
{ fileTitle = fileReferenceTitle
, fileModified = fileReferenceModified
, fileContent = toFileContent <$> fileReferenceContent
}
where
toFileContent fileReference
| fileReference == $$(liftTyped $ FileContentReference $$(emptyHash))
= return ()
toFileContent fileReference = do
inDB <- lift . E.selectExists . E.from $ \fileContentEntry -> E.where_ $ fileContentEntry E.^. FileContentEntryHash E.==. E.val fileReference
bool sourceFileMinio sourceFileDB inDB fileReference
sourceFiles' :: forall file m. (HasFileReference file, Monad m, YesodPersistBackend UniWorX ~ SqlBackend) => ConduitT file DBFile m ()
sourceFiles' = C.map sourceFile'
sourceFile' :: forall file. (HasFileReference file, YesodPersistBackend UniWorX ~ SqlBackend) => file -> DBFile
sourceFile' = sourceFile . view (_FileReference . _1)
instance (YesodMail UniWorX, YesodPersistBackend UniWorX ~ SqlBackend) => ToMailPart UniWorX FileReference where
toMailPart = toMailPart <=< liftHandler . runDBRead . withReaderT projectBackend . toPureFile . sourceFile'
respondFileConditional :: (MonadThrow m, MonadHandler m, HandlerSite m ~ UniWorX, YesodPersistBackend UniWorX ~ SqlBackend, YesodPersistRunner UniWorX)
=> Maybe UTCTime -> MimeType
-> FileReference
-> SqlPersistT m (HandlerFor UniWorX a)
respondFileConditional representationLastModified cType FileReference{..} = do
if
| Just fileContent <- fileReferenceContent
, fileContent == $$(liftTyped $ FileContentReference $$(emptyHash))
-> return . respondSourceConditional @ByteRangesSpecifier condInfo cType . Left $ (return () :: ConduitT () ByteString _ ())
| Just fileContent <- fileReferenceContent -> do
dbManifest <- fmap fromNullable . E.select . E.from $ \(fileContentEntry `E.LeftOuterJoin` fileContentChunk) -> do
E.on $ E.just (fileContentEntry E.^. FileContentEntryChunkHash) E.==. fileContentChunk E.?. FileContentChunkHash
E.where_ $ fileContentEntry E.^. FileContentEntryHash E.==. E.val fileContent
E.orderBy [E.asc $ fileContentEntry E.^. FileContentEntryIx ]
return ( fileContentEntry E.^. FileContentEntryChunkHash
, E.maybe E.nothing (E.just . E.length_) $ fileContentChunk E.?. FileContentChunkContent
)
case dbManifest of
Nothing -> do
uploadBucket <- getsYesod $ views appSettings appUploadCacheBucket
let uploadName = minioFileReference # fileContent
statRes <- maybeT (throwM SourceFilesContentUnavailable) . (hoistMaybe <=< runAppMinio) . runMaybeT $ do
catchIfMaybeT minioIsDoesNotExist $ Minio.statObject uploadBucket uploadName Minio.defaultGetObjectOptions
let iLength = fromIntegral $ Minio.oiSize statRes
respondSourceConditional condInfo cType . Right $ \byteRange ->
let (byteRange', respRange) = byteRangeSpecificationToMinio iLength byteRange
in ( sourceMinio (Right fileContent) $ Just byteRange'
, ByteContentRangeSpecification (Just respRange) (Just iLength)
)
Just (toNullable -> dbManifest') -> do
dbManifest'' <- forM dbManifest' $ \(E.Value chunkHash, E.Value mChunkLength) -> case mChunkLength of
Just chunkLength -> return (chunkHash, chunkLength)
Nothing -> throwM SourceFilesContentUnavailable
let iLength = sumOf (folded . _2) dbManifest''
respondSourceDBConditional condInfo cType . Right $ \byteRange ->
let (byteFrom, byteTo) = case byteRange of
ByteRangeSpecification f Nothing -> (min (pred iLength) f, pred iLength)
ByteRangeSpecification f (Just t) -> (min (pred iLength) f, min (pred iLength) t)
ByteRangeSuffixSpecification s -> (iLength - min (pred iLength) s, pred iLength)
relevantChunks = view _2 $ foldl' go (0, []) dbManifest''
where go :: (Word64, [(FileContentChunkReference, Word64, Word64)])
-> (FileContentChunkReference, Word64)
-> (Word64, [(FileContentChunkReference, Word64, Word64)])
go (lengthBefore, acc) (cChunk, cLength)
= ( lengthBefore + cLength
, if
| byteFrom < lengthBefore + cLength, byteTo >= lengthBefore
-> let cChunk' = ( cChunk
, bool 0 (byteFrom - lengthBefore) $ byteFrom >= lengthBefore
, bool cLength (cLength - pred (lengthBefore + cLength - byteTo)) $ byteTo < lengthBefore + cLength
)
in acc ++ pure cChunk'
| otherwise
-> acc
)
in ( do
dbChunksize <- getsYesod $ views _appFileUploadDBChunksize fromIntegral
forM_ relevantChunks $ \(chunkHash, offset, cLength)
-> let retrieveChunk = \case
Just (start, cLength') | cLength' > 0 -> do
let getChunkDB = fmap (fmap $ (, Just StorageDB) . E.unValue) . E.selectMaybe . E.from $ \fileContentChunk -> do
E.where_ $ fileContentChunk E.^. FileContentChunkHash E.==. E.val chunkHash
return $ E.substring (fileContentChunk E.^. FileContentChunkContent) (E.val start) (E.val $ min cLength' dbChunksize)
chunk <- fileChunkARC (Just $ fromIntegral dbChunksize) (chunkHash, (fromIntegral start, fromIntegral $ min cLength' dbChunksize)) getChunkDB
case chunk of
Nothing -> throwM SourceFilesContentUnavailable
Just c -> do
return . Just . (c, ) $ if
| fromIntegral (olength c) >= min cLength' dbChunksize
-> Just (start + dbChunksize, cLength' - fromIntegral (olength c))
| otherwise
-> Nothing
_other -> return Nothing
in C.unfoldM retrieveChunk . Just $ (succ offset, cLength)
, ByteContentRangeSpecification (Just $ ByteRangeResponseSpecification byteFrom byteTo) (Just iLength)
)
| otherwise -> throwM SourceFilesContentUnavailable
| otherwise
-> return $ sendResponseStatus noContent204 ()
where
condInfo = RepresentationConditionalInformation
{ representationETag = review etagFileReference <$> fileReferenceContent
, representationLastModified
, representationExists = True
, requestedActionAlreadySucceeded = Nothing
}
byteRangeSpecificationToMinio :: Word64 -> ByteRangeSpecification -> (ByteRange, ByteRangeResponseSpecification)
byteRangeSpecificationToMinio iLength byteRange = (byteRange', respRange)
where
byteRange' = case byteRange of
ByteRangeSpecification f Nothing -> ByteRangeFrom (fromIntegral $ min (pred iLength) f)
ByteRangeSpecification f (Just t) -> ByteRangeFromTo (fromIntegral $ min iLength f) (fromIntegral $ min (pred iLength) t)
ByteRangeSuffixSpecification s -> ByteRangeSuffix (fromIntegral $ min iLength s)
respRange = case byteRange of
ByteRangeSpecification f Nothing -> ByteRangeResponseSpecification (min (pred iLength) f) (pred iLength)
ByteRangeSpecification f (Just t) -> ByteRangeResponseSpecification (min (pred iLength) f) (min (pred iLength) t)
ByteRangeSuffixSpecification s -> ByteRangeResponseSpecification (iLength - min (pred iLength) s) (pred iLength)
acceptFile :: (MonadResource m, MonadResource m') => FileInfo -> m (File m')
acceptFile fInfo = do
let fileTitle = "." <//> unpack (fileName fInfo)
& normalise
& makeValid
& dropWhile isPathSeparator
& dropWhileEnd isPathSeparator
& normalise
& makeValid
fileContent = Just $ fileSource fInfo
fileModified <- liftIO getCurrentTime
return File{..}