-- SPDX-FileCopyrightText: 2022 Gregor Kleen ,Sarah Vaupel -- -- SPDX-License-Identifier: AGPL-3.0-or-later {-# OPTIONS_GHC -fno-warn-orphans #-} module Handler.Utils.Files ( sourceFile, sourceFile' , sourceFiles, sourceFiles' , SourceFilesException(..), _SourceFilesMismatchedHashes, _SourceFilesContentUnavailable , sourceFileDB, sourceFileChunks, sourceFileMinio , acceptFile , respondFileConditional ) where import Import.NoFoundation hiding (First(..)) import Foundation.Type import Foundation.DB import Utils.Metrics import Data.Monoid (First(..)) import qualified Data.Conduit.Combinators as C import qualified Data.Conduit.List as C (unfoldM) import Handler.Utils.Minio import qualified Network.Minio as Minio import qualified Database.Esqueleto.Legacy as E import qualified Database.Esqueleto.Utils as E import System.FilePath (normalise, makeValid) import Data.List (dropWhileEnd) import qualified Data.ByteString as ByteString data SourceFilesException = SourceFilesMismatchedHashes | SourceFilesContentUnavailable deriving (Eq, Ord, Read, Show, Generic) deriving anyclass (Exception) makePrisms ''SourceFilesException fileChunkARC :: ( MonadHandler m , HandlerSite m ~ UniWorX ) => Maybe Int -> (FileContentChunkReference, (Int, Int)) -> m (Maybe (ByteString, Maybe FileChunkStorage)) -> m (Maybe ByteString) fileChunkARC altSize k@(ref, (s, l)) getChunkDB' = do prewarm <- getsYesod appFileSourcePrewarm let getChunkDB = case prewarm of Nothing -> do chunk' <- getChunkDB' for chunk' $ \(chunk, mStorage) -> chunk <$ do $logDebugS "fileChunkARC" "No prewarm" for_ mStorage $ \storage -> let w = length chunk in liftIO $ observeSourcedChunk storage w Just lh -> do chunkRes <- lookupLRUHandle lh k case chunkRes of Just (chunk, w) -> Just chunk <$ do $logDebugS "fileChunkARC" "Prewarm hit" liftIO $ observeSourcedChunk StoragePrewarm w Nothing -> do chunk' <- getChunkDB' for chunk' $ \(chunk, mStorage) -> chunk <$ do $logDebugS "fileChunkARC" "Prewarm miss" for_ mStorage $ \storage -> let w = length chunk in liftIO $ observeSourcedChunk storage w arc <- getsYesod appFileSourceARC case arc of Nothing -> getChunkDB Just ah -> do cachedARC' ah k $ \case Nothing -> do chunk' <- case assertM (> l) altSize of -- This optimization works for the somewhat common case that cdc chunks are smaller than db chunks and start of the requested range is aligned with a db chunk boundary Just altSize' -> fmap getFirst . execWriterT . cachedARC' ah (ref, (s, altSize')) $ \x -> x <$ case x of Nothing -> tellM $ First <$> getChunkDB Just (v, _) -> tell . First . Just $ ByteString.take l v Nothing -> getChunkDB for chunk' $ \chunk -> do let w = length chunk $logDebugS "fileChunkARC" "ARC miss" return (chunk, w) Just x@(_, w) -> do $logDebugS "fileChunkARC" "ARC hit" liftIO $ Just x <$ observeSourcedChunk StorageARC w sourceFileDB :: forall m. (MonadCatch m, MonadHandler m, HandlerSite m ~ UniWorX, MonadUnliftIO m) => FileContentReference -> ConduitT () ByteString (SqlPersistT m) () sourceFileDB fileReference = chunkHashes .| awaitForever (sourceFileChunks (const $ over (mapped . mapped . _2) Just) . E.unValue) .| C.map (view _1) where chunkHashes :: ConduitT () (E.Value FileContentChunkReference) (SqlPersistT m) () chunkHashes = E.selectSource . E.from $ \fileContentEntry -> do E.where_ $ fileContentEntry E.^. FileContentEntryHash E.==. E.val fileReference E.orderBy [ E.asc $ fileContentEntry E.^. FileContentEntryIx ] return $ fileContentEntry E.^. FileContentEntryChunkHash sourceFileChunks :: (MonadCatch m, MonadHandler m, HandlerSite m ~ UniWorX, BackendCompatible SqlReadBackend backend, MonadUnliftIO m) => ((Int, Int) -> ReaderT SqlReadBackend m (Maybe (ByteString, FileChunkStorage)) -> ReaderT SqlReadBackend m (Maybe (ByteString, Maybe FileChunkStorage))) -> FileContentChunkReference -> ConduitT i (ByteString, (Int, Int)) (ReaderT backend m) () sourceFileChunks cont chunkHash = transPipe (withReaderT $ projectBackend @SqlReadBackend) $ do dbChunksize <- getsYesod $ view _appFileUploadDBChunksize let dbRetrieveChunk = \case Nothing -> return Nothing Just start -> do let getChunkDB = cont (start, dbChunksize) . runMaybeT $ let getChunkDB' = MaybeT . fmap (fmap $ (, StorageDB) . E.unValue) . E.selectMaybe . E.from $ \fileContentChunk -> do E.where_ $ fileContentChunk E.^. FileContentChunkHash E.==. E.val chunkHash return $ E.substring (fileContentChunk E.^. FileContentChunkContent) (E.val start) (E.val dbChunksize) getChunkMinio = fmap (, StorageMinio) . catchIfMaybeT (is _SourceFilesContentUnavailable) . runConduit $ sourceMinio (Left chunkHash) (Just $ ByteRangeFromTo (fromIntegral $ pred start) (fromIntegral . pred $ pred start + dbChunksize)) .| C.fold in getChunkDB' <|> getChunkMinio chunk <- fileChunkARC Nothing (chunkHash, (start, dbChunksize)) getChunkDB case chunk of Just c | olength c <= 0 -> return Nothing Just c -> do return . Just . ((c, (start, dbChunksize)), ) $ if | olength c >= dbChunksize -> Just $ start + dbChunksize | otherwise -> Nothing Nothing -> throwM SourceFilesContentUnavailable C.unfoldM dbRetrieveChunk $ Just (1 :: Int) sourceMinio :: (MonadThrow m, MonadHandler m, HandlerSite m ~ UniWorX, MonadUnliftIO m) => Either FileContentChunkReference FileContentReference -> Maybe ByteRange -> ConduitT i ByteString m () sourceMinio fileReference mRange = do chunkVar <- newEmptyTMVarIO minioAsync <- lift . allocateLinkedAsync $ maybeT (throwM SourceFilesContentUnavailable) $ do let uploadName = either (review minioFileChunkReference) (review minioFileReference) fileReference uploadBucket <- getsYesod $ views appSettings appUploadCacheBucket hoistMaybe <=< runAppMinio . runMaybeT $ do objRes <- catchIfMaybeT minioIsDoesNotExist $ Minio.getObject uploadBucket uploadName Minio.defaultGetObjectOptions{ Minio.gooRange = mRange } lift . runConduit $ Minio.gorObjectStream objRes .| C.mapM_ (atomically . putTMVar chunkVar) let go = do mChunk <- atomically $ Right <$> takeTMVar chunkVar <|> Left <$> waitCatchSTM minioAsync case mChunk of Right chunk -> do observeSourcedChunk StorageMinio $ olength chunk yield chunk go Left (Right ()) -> return () Left (Left exc) -> throwM exc in go sourceFileMinio :: (MonadThrow m, MonadHandler m, HandlerSite m ~ UniWorX, MonadUnliftIO m) => FileContentReference -> ConduitT () ByteString m () sourceFileMinio fileContent = sourceMinio (Right fileContent) Nothing sourceFiles :: (Monad m, YesodPersistBackend UniWorX ~ SqlBackend) => ConduitT FileReference DBFile m () sourceFiles = C.map sourceFile sourceFile :: YesodPersistBackend UniWorX ~ SqlBackend => FileReference -> DBFile sourceFile FileReference{..} = File { fileTitle = fileReferenceTitle , fileModified = fileReferenceModified , fileContent = toFileContent <$> fileReferenceContent } where toFileContent fileReference | fileReference == $$(liftTyped $ FileContentReference $$(emptyHash)) = return () toFileContent fileReference = do inDB <- lift . E.selectExists . E.from $ \fileContentEntry -> E.where_ $ fileContentEntry E.^. FileContentEntryHash E.==. E.val fileReference bool sourceFileMinio sourceFileDB inDB fileReference sourceFiles' :: forall file m. (HasFileReference file, Monad m, YesodPersistBackend UniWorX ~ SqlBackend) => ConduitT file DBFile m () sourceFiles' = C.map sourceFile' sourceFile' :: forall file. (HasFileReference file, YesodPersistBackend UniWorX ~ SqlBackend) => file -> DBFile sourceFile' = sourceFile . view (_FileReference . _1) instance (YesodMail UniWorX, YesodPersistBackend UniWorX ~ SqlBackend) => ToMailPart UniWorX FileReference where toMailPart = toMailPart <=< liftHandler . runDBRead . withReaderT projectBackend . toPureFile . sourceFile' respondFileConditional :: (MonadThrow m, MonadHandler m, HandlerSite m ~ UniWorX, YesodPersistBackend UniWorX ~ SqlBackend, YesodPersistRunner UniWorX) => Maybe UTCTime -> MimeType -> FileReference -> SqlPersistT m (HandlerFor UniWorX a) respondFileConditional representationLastModified cType FileReference{..} = do if | Just fileContent <- fileReferenceContent , fileContent == $$(liftTyped $ FileContentReference $$(emptyHash)) -> return . respondSourceConditional @ByteRangesSpecifier condInfo cType . Left $ (return () :: ConduitT () ByteString _ ()) | Just fileContent <- fileReferenceContent -> do dbManifest <- fmap fromNullable . E.select . E.from $ \(fileContentEntry `E.LeftOuterJoin` fileContentChunk) -> do E.on $ E.just (fileContentEntry E.^. FileContentEntryChunkHash) E.==. fileContentChunk E.?. FileContentChunkHash E.where_ $ fileContentEntry E.^. FileContentEntryHash E.==. E.val fileContent E.orderBy [E.asc $ fileContentEntry E.^. FileContentEntryIx ] return ( fileContentEntry E.^. FileContentEntryChunkHash , E.maybe E.nothing (E.just . E.length_) $ fileContentChunk E.?. FileContentChunkContent ) case dbManifest of Nothing -> do uploadBucket <- getsYesod $ views appSettings appUploadCacheBucket let uploadName = minioFileReference # fileContent statRes <- maybeT (throwM SourceFilesContentUnavailable) . (hoistMaybe <=< runAppMinio) . runMaybeT $ do catchIfMaybeT minioIsDoesNotExist $ Minio.statObject uploadBucket uploadName Minio.defaultGetObjectOptions let iLength = fromIntegral $ Minio.oiSize statRes respondSourceConditional condInfo cType . Right $ \byteRange -> let (byteRange', respRange) = byteRangeSpecificationToMinio iLength byteRange in ( sourceMinio (Right fileContent) $ Just byteRange' , ByteContentRangeSpecification (Just respRange) (Just iLength) ) Just (toNullable -> dbManifest') -> do dbManifest'' <- forM dbManifest' $ \(E.Value chunkHash, E.Value mChunkLength) -> case mChunkLength of Just chunkLength -> return (chunkHash, chunkLength) Nothing -> throwM SourceFilesContentUnavailable let iLength = sumOf (folded . _2) dbManifest'' respondSourceDBConditional condInfo cType . Right $ \byteRange -> let (byteFrom, byteTo) = case byteRange of ByteRangeSpecification f Nothing -> (min (pred iLength) f, pred iLength) ByteRangeSpecification f (Just t) -> (min (pred iLength) f, min (pred iLength) t) ByteRangeSuffixSpecification s -> (iLength - min (pred iLength) s, pred iLength) relevantChunks = view _2 $ foldl' go (0, []) dbManifest'' where go :: (Word64, [(FileContentChunkReference, Word64, Word64)]) -> (FileContentChunkReference, Word64) -> (Word64, [(FileContentChunkReference, Word64, Word64)]) go (lengthBefore, acc) (cChunk, cLength) = ( lengthBefore + cLength , if | byteFrom < lengthBefore + cLength, byteTo >= lengthBefore -> let cChunk' = ( cChunk , bool 0 (byteFrom - lengthBefore) $ byteFrom >= lengthBefore , bool cLength (cLength - pred (lengthBefore + cLength - byteTo)) $ byteTo < lengthBefore + cLength ) in acc ++ pure cChunk' | otherwise -> acc ) in ( do dbChunksize <- getsYesod $ views _appFileUploadDBChunksize fromIntegral forM_ relevantChunks $ \(chunkHash, offset, cLength) -> let retrieveChunk = \case Just (start, cLength') | cLength' > 0 -> do let getChunkDB = fmap (fmap $ (, Just StorageDB) . E.unValue) . E.selectMaybe . E.from $ \fileContentChunk -> do E.where_ $ fileContentChunk E.^. FileContentChunkHash E.==. E.val chunkHash return $ E.substring (fileContentChunk E.^. FileContentChunkContent) (E.val start) (E.val $ min cLength' dbChunksize) chunk <- fileChunkARC (Just $ fromIntegral dbChunksize) (chunkHash, (fromIntegral start, fromIntegral $ min cLength' dbChunksize)) getChunkDB case chunk of Nothing -> throwM SourceFilesContentUnavailable Just c -> do return . Just . (c, ) $ if | fromIntegral (olength c) >= min cLength' dbChunksize -> Just (start + dbChunksize, cLength' - fromIntegral (olength c)) | otherwise -> Nothing _other -> return Nothing in C.unfoldM retrieveChunk . Just $ (succ offset, cLength) , ByteContentRangeSpecification (Just $ ByteRangeResponseSpecification byteFrom byteTo) (Just iLength) ) | otherwise -> throwM SourceFilesContentUnavailable | otherwise -> return $ sendResponseStatus noContent204 () where condInfo = RepresentationConditionalInformation { representationETag = review etagFileReference <$> fileReferenceContent , representationLastModified , representationExists = True , requestedActionAlreadySucceeded = Nothing } byteRangeSpecificationToMinio :: Word64 -> ByteRangeSpecification -> (ByteRange, ByteRangeResponseSpecification) byteRangeSpecificationToMinio iLength byteRange = (byteRange', respRange) where byteRange' = case byteRange of ByteRangeSpecification f Nothing -> ByteRangeFrom (fromIntegral $ min (pred iLength) f) ByteRangeSpecification f (Just t) -> ByteRangeFromTo (fromIntegral $ min iLength f) (fromIntegral $ min (pred iLength) t) ByteRangeSuffixSpecification s -> ByteRangeSuffix (fromIntegral $ min iLength s) respRange = case byteRange of ByteRangeSpecification f Nothing -> ByteRangeResponseSpecification (min (pred iLength) f) (pred iLength) ByteRangeSpecification f (Just t) -> ByteRangeResponseSpecification (min (pred iLength) f) (min (pred iLength) t) ByteRangeSuffixSpecification s -> ByteRangeResponseSpecification (iLength - min (pred iLength) s) (pred iLength) acceptFile :: (MonadResource m, MonadResource m') => FileInfo -> m (File m') acceptFile fInfo = do let fileTitle = "." unpack (fileName fInfo) & normalise & makeValid & dropWhile isPathSeparator & dropWhileEnd isPathSeparator & normalise & makeValid fileContent = Just $ fileSource fInfo fileModified <- liftIO getCurrentTime return File{..}