feat(files): safer file deletion

This commit is contained in:
Gregor Kleen 2020-08-01 14:43:33 +02:00
parent a0392dd329
commit 88a92390d5
12 changed files with 190 additions and 63 deletions

View File

@ -36,6 +36,7 @@ bearer-encoding: HS256
maximum-content-length: "_env:MAX_UPLOAD_SIZE:134217728"
session-files-expire: 3600
prune-unreferenced-files: 600
keep-unreferenced-files: 86400
health-check-interval:
matching-cluster-config: "_env:HEALTHCHECK_INTERVAL_MATCHING_CLUSTER_CONFIG:600"
http-reachable: "_env:HEALTHCHECK_INTERVAL_HTTP_REACHABLE:600"
@ -61,6 +62,7 @@ log-settings:
all: "_env:LOG_ALL:false"
minimum-level: "_env:LOGLEVEL:warn"
destination: "_env:LOGDEST:stderr"
serializable-transaction-retry-limit: 2
ip-retention-time: 1209600

View File

@ -1,7 +1,8 @@
FileContent
hash FileContentReference
content ByteString
Primary hash
hash FileContentReference
content ByteString
unreferencedSince UTCTime Maybe
Primary hash
SessionFile
content FileContentReference Maybe
@ -10,4 +11,4 @@ SessionFile
FileLock
content FileContentReference
instance InstanceId
time UTCTime
time UTCTime

View File

@ -0,0 +1,50 @@
{-# OPTIONS_GHC -fno-warn-orphans #-}
module Control.Monad.Trans.Memo.StateCache.Instances
( hoistStateCache
) where
import ClassyPrelude hiding (handle)
import Yesod.Core
import Control.Monad.Logger (MonadLoggerIO)
import Control.Monad.Trans.Memo.StateCache
import Control.Monad.Catch
instance MonadResource m => MonadResource (StateCache c m) where
liftResourceT = lift . liftResourceT
instance MonadLogger m => MonadLogger (StateCache c m)
instance MonadLoggerIO m => MonadLoggerIO (StateCache c m)
instance MonadHandler m => MonadHandler (StateCache c m) where
type HandlerSite (StateCache c m) = HandlerSite m
type SubHandlerSite (StateCache c m) = SubHandlerSite m
liftHandler = lift . liftHandler
liftSubHandler = lift . liftSubHandler
instance MonadWidget m => MonadWidget (StateCache c m) where
liftWidget = lift . liftWidget
instance MonadThrow m => MonadThrow (StateCache c m) where
throwM = lift . throwM
-- | Rolls back modifications to state in failing section
instance MonadCatch m => MonadCatch (StateCache c m) where
catch m h = do
s <- container
(x, s') <- lift . handle (flip runStateCache s . h) $ runStateCache m s
x <$ setContainer s'
hoistStateCache :: forall m n c b.
Monad n
=> (forall a. m a -> n a)
-> (StateCache c m b -> StateCache c n b)
-- ^ Morally identical to `Control.Monad.Morph.hoist`
--
-- Due to limited exports from `Control.Monad.Trans.Memo.StateCache` we incur a @Monad n@ constraint which `Control.Monad.Morph.hoist` does not account for
hoistStateCache nat m = do
s <- container
(x, s') <- lift . nat $ runStateCache m s
x <$ setContainer s'

View File

@ -18,6 +18,8 @@ module Database.Esqueleto.Utils
, SqlHashable
, sha256
, maybe, unsafeCoalesce
, bool
, max, min
, SqlProject(..)
, (->.)
, fromSqlKey
@ -27,7 +29,7 @@ module Database.Esqueleto.Utils
) where
import ClassyPrelude.Yesod hiding (isInfixOf, any, all, or, and, isJust, maybe)
import ClassyPrelude.Yesod hiding (isInfixOf, any, all, or, and, isJust, maybe, bool, max, min)
import Data.Universe
import qualified Data.Set as Set
import qualified Data.List as List
@ -241,6 +243,26 @@ maybe onNothing onJust val = E.case_
]
(E.else_ onNothing)
bool :: PersistField a
=> E.SqlExpr (E.Value a)
-> E.SqlExpr (E.Value a)
-> E.SqlExpr (E.Value Bool)
-> E.SqlExpr (E.Value a)
bool onFalse onTrue val = E.case_
[ E.when_
val
E.then_
onTrue
]
(E.else_ onFalse)
max, min :: PersistField a
=> E.SqlExpr (E.Value a)
-> E.SqlExpr (E.Value a)
-> E.SqlExpr (E.Value a)
max a b = bool a b $ b E.>. a
min a b = bool a b $ b E.<. a
unsafeCoalesce :: E.PersistField a => [E.SqlExpr (E.Value (Maybe a))] -> E.SqlExpr (E.Value a)
unsafeCoalesce = E.veryUnsafeCoerceSqlExprValue . E.coalesce

View File

@ -1,3 +1,6 @@
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE UndecidableInstances #-}
module Foundation.Type
( UniWorX(..)
, SomeSessionStorage(..)
@ -68,3 +71,6 @@ instance HasAppSettings UniWorX where
appSettings = _appSettings'
instance HasCookieSettings RegisteredCookie UniWorX where
getCookieSettings = appCookieSettings . appSettings'
instance (MonadHandler m, HandlerSite m ~ UniWorX) => ReadLogSettings m where
readLogSettings = liftIO . readTVarIO =<< getsYesod (view _appLogSettings)

View File

@ -165,6 +165,7 @@ import Network.HTTP.Types.Method.Instances as Import ()
import Crypto.Random.Instances as Import ()
import Network.Minio.Instances as Import ()
import System.Clock.Instances as Import ()
import Control.Monad.Trans.Memo.StateCache.Instances as Import (hoistStateCache)
import Crypto.Hash as Import (Digest, SHA3_256, SHA3_512)
import Crypto.Random as Import (ChaChaDRG, Seed)

View File

@ -20,6 +20,8 @@ import qualified Network.Minio as Minio
import qualified Crypto.Hash as Crypto
import qualified Data.ByteString.Base64.URL as Base64
import Control.Monad.Memo (startEvalMemoT, memo)
dispatchJobPruneSessionFiles :: JobHandler UniWorX
dispatchJobPruneSessionFiles = JobHandlerAtomic . hoist lift $ do
@ -47,26 +49,36 @@ fileReferences (E.just -> fHash)
dispatchJobPruneUnreferencedFiles :: JobHandler UniWorX
dispatchJobPruneUnreferencedFiles = JobHandlerAtomic . hoist lift $ do
interval <- getsYesod $ view _appPruneUnreferencedFiles
Sum n <- runConduit $ getCandidates
.| maybe (C.map id) (takeWhileTime . (/ 2)) interval
.| persistentTokenBucketTakeC' TokenBucketPruneFiles (view $ _2 . _Value :: _ -> Word64)
.| C.map (view $ _1 . _Value)
.| C.mapM (\fRef -> Sum <$> deleteWhereCount [FileContentHash ==. fRef])
.| C.fold
$logInfoS "PruneUnreferencedFiles" [st|Deleted #{n} unreferenced files|]
where
now <- liftIO getCurrentTime
interval <- fmap (fmap $ max 0) . getsYesod $ view _appPruneUnreferencedFiles
keep <- fmap (max 0) . getsYesod $ view _appKeepUnreferencedFiles
E.update $ \fileContent -> do
let isReferenced = E.any E.exists . fileReferences $ fileContent E.^. FileContentHash
E.set fileContent [ FileContentUnreferencedSince E.=. E.bool (E.just . E.maybe (E.val now) (E.min $ E.val now) $ fileContent E.^. FileContentUnreferencedSince) E.nothing isReferenced ]
let
getCandidates = E.selectSource . E.from $ \fileContent -> do
E.where_ . E.not_ . E.any E.exists $ fileReferences (fileContent E.^. FileContentHash)
E.where_ . E.maybe E.false (E.<. E.val (addUTCTime (-keep) now)) $ fileContent E.^. FileContentUnreferencedSince
return ( fileContent E.^. FileContentHash
, E.length_ $ fileContent E.^. FileContentContent
)
Sum deleted <- runConduit $
getCandidates
.| maybe (C.map id) (takeWhileTime . (/ 2)) interval
.| persistentTokenBucketTakeC' TokenBucketPruneFiles (view $ _2 . _Value :: _ -> Word64)
.| C.map (view $ _1 . _Value)
.| C.mapM (\fRef -> Sum <$> deleteWhereCount [FileContentHash ==. fRef])
.| C.fold
$logInfoS "PruneUnreferencedFiles" [st|Deleted #{deleted} long-unreferenced files|]
dispatchJobInjectFiles :: JobHandler UniWorX
dispatchJobInjectFiles = JobHandlerException . maybeT (return ()) $ do
uploadBucket <- getsYesod $ view _appUploadCacheBucket
interval <- getsYesod $ view _appInjectFiles
now <- liftIO getCurrentTime
let
extractReference (Minio.ListItemObject oi)
@ -78,14 +90,17 @@ dispatchJobInjectFiles = JobHandlerException . maybeT (return ()) $ do
injectOrDelete :: (Minio.Object, FileContentReference)
-> Handler (Sum Int64, Sum Int64) -- ^ Injected, Already existed
injectOrDelete (obj, fRef) = maybeT (return mempty) $ do
res <- hoist (runDB . setSerializable) $ do
alreadyInjected <- lift $ exists [ FileContentHash ==. fRef ]
res <- hoist (startEvalMemoT . hoistStateCache (runDB . setSerializable)) $ do
alreadyInjected <- lift . lift $ exists [ FileContentHash ==. fRef ]
if | alreadyInjected -> return (mempty, Sum 1)
| otherwise -> do
content <- (hoistMaybe =<<) . runAppMinio . runMaybeT $ do
objRes <- catchIfMaybeT minioIsDoesNotExist $ Minio.getObject uploadBucket obj Minio.defaultGetObjectOptions
content <- flip memo obj $ \obj' -> hoistMaybeM . runAppMinio . runMaybeT $ do
objRes <- catchIfMaybeT minioIsDoesNotExist $ Minio.getObject uploadBucket obj' Minio.defaultGetObjectOptions
lift . runConduit $ Minio.gorObjectStream objRes .| C.fold
lift $ (Sum 1, mempty) <$ insertUnique (FileContent fRef content)
fmap ((, mempty) . Sum) . lift. lift . E.insertSelectCount $
let isReferenced = E.any E.exists $ fileReferences (E.val fRef)
in return $ FileContent E.<# E.val fRef E.<&> E.val content E.<&> E.bool (E.just $ E.val now) E.nothing isReferenced
runAppMinio . maybeT (return ()) . catchIfMaybeT minioIsDoesNotExist $ Minio.removeObject uploadBucket obj
return res
@ -99,6 +114,6 @@ dispatchJobInjectFiles = JobHandlerException . maybeT (return ()) $ do
.| C.fold
when (exc > 0) $
$logInfoS "InjectFiles" [st|Deleted #{exc} files from upload cache because they were already referenced|]
$logInfoS "InjectFiles" [st|Deleted #{exc} files from upload cache because they were already injected|]
when (inj > 0) $
$logInfoS "InjectFiles" [st|Injected #{inj} files from upload cache into database|]

View File

@ -11,6 +11,7 @@ module Settings
, module Settings.Cluster
, module Settings.Mime
, module Settings.Cookies
, module Settings.Log
) where
import Import.NoModel
@ -53,6 +54,7 @@ import Model
import Settings.Cluster
import Settings.Mime
import Settings.Cookies
import Settings.Log
import qualified System.FilePath as FilePath
@ -139,6 +141,7 @@ data AppSettings = AppSettings
, appSessionFilesExpire :: NominalDiffTime
, appPruneUnreferencedFiles :: Maybe NominalDiffTime
, appKeepUnreferencedFiles :: NominalDiffTime
, appInitialLogSettings :: LogSettings
@ -188,23 +191,6 @@ newtype ServerSessionSettings
instance Show ServerSessionSettings where
showsPrec d _ = showParen (d > 10) $ showString "ServerSessionSettings _"
data LogSettings = LogSettings
{ logAll, logDetailed :: Bool
, logMinimumLevel :: LogLevel
, logDestination :: LogDestination
} deriving (Show, Read, Generic, Eq, Ord)
data LogDestination = LogDestStderr | LogDestStdout | LogDestFile { logDestFile :: !FilePath }
deriving (Show, Read, Generic, Eq, Ord)
deriving instance Generic LogLevel
instance Hashable LogLevel
instance NFData LogLevel
instance Hashable LogSettings
instance NFData LogSettings
instance Hashable LogDestination
instance NFData LogDestination
data UserDefaultConf = UserDefaultConf
{ userDefaultTheme :: Theme
, userDefaultMaxFavourites, userDefaultMaxFavouriteTerms :: Int
@ -306,17 +292,6 @@ deriveJSON defaultOptions
{ fieldLabelModifier = camelToPathPiece' 2
} ''TokenBucketConf
deriveJSON defaultOptions
{ constructorTagModifier = camelToPathPiece' 2
, fieldLabelModifier = camelToPathPiece' 2
, sumEncoding = UntaggedValue
, unwrapUnaryRecords = True
} ''LogDestination
deriveJSON defaultOptions
{ fieldLabelModifier = camelToPathPiece' 1
} ''LogSettings
deriveFromJSON defaultOptions ''Ldap.Scope
deriveFromJSON defaultOptions
{ fieldLabelModifier = camelToPathPiece' 2
@ -349,13 +324,6 @@ deriveFromJSON
}
''ResourcePoolConf
deriveJSON
defaultOptions
{ constructorTagModifier = camelToPathPiece' 1
, sumEncoding = UntaggedValue
}
''LogLevel
instance FromJSON HaskellNet.PortNumber where
parseJSON = withScientific "PortNumber" $ \sciNum -> case toBoundedInteger sciNum of
Just int -> return $ fromIntegral (int :: Word16)
@ -502,6 +470,7 @@ instance FromJSON AppSettings where
appSessionFilesExpire <- o .: "session-files-expire"
appPruneUnreferencedFiles <- o .:? "prune-unreferenced-files"
appKeepUnreferencedFiles <- o .:? "keep-unreferenced-files" .!= 0
appInjectFiles <- o .:? "inject-files"
appMaximumContentLength <- o .: "maximum-content-length"

52
src/Settings/Log.hs Normal file
View File

@ -0,0 +1,52 @@
{-# OPTIONS_GHC -fno-warn-orphans #-}
module Settings.Log
( LogSettings(..)
, LogDestination(..)
, LogLevel(..)
, ReadLogSettings(..)
) where
import ClassyPrelude.Yesod
import Numeric.Natural
import Data.Aeson.TH
import Utils.PathPiece
data LogSettings = LogSettings
{ logAll, logDetailed :: Bool
, logMinimumLevel :: LogLevel
, logDestination :: LogDestination
, logSerializableTransactionRetryLimit :: Maybe Natural
} deriving (Show, Read, Generic, Eq, Ord)
data LogDestination = LogDestStderr | LogDestStdout | LogDestFile { logDestFile :: !FilePath }
deriving (Show, Read, Generic, Eq, Ord)
deriving instance Generic LogLevel
instance Hashable LogLevel
instance NFData LogLevel
instance Hashable LogSettings
instance NFData LogSettings
instance Hashable LogDestination
instance NFData LogDestination
deriveJSON defaultOptions
{ constructorTagModifier = camelToPathPiece' 1
, sumEncoding = UntaggedValue
} ''LogLevel
deriveJSON defaultOptions
{ constructorTagModifier = camelToPathPiece' 2
, fieldLabelModifier = camelToPathPiece' 2
, sumEncoding = UntaggedValue
, unwrapUnaryRecords = True
} ''LogDestination
deriveJSON defaultOptions
{ fieldLabelModifier = camelToPathPiece' 1
} ''LogSettings
class ReadLogSettings m where
readLogSettings :: m LogSettings

View File

@ -569,6 +569,9 @@ hoistMaybe :: MonadPlus m => Maybe a -> m a
-- ^ `hoist` regarding `Maybe` as if identical to @MaybeT Identity@
hoistMaybe = maybe mzero return
hoistMaybeM :: MonadPlus m => m (Maybe a) -> m a
hoistMaybeM = (=<<) hoistMaybe
catchIfMaybeT :: (MonadCatch m, Exception e) => (e -> Bool) -> m a -> MaybeT m a
catchIfMaybeT p act = catchIf p (lift act) (const mzero)

View File

@ -47,7 +47,7 @@ sinkFile File{ fileContent = Just fileContentContent, .. } = do
inDB <- exists [ FileContentHash ==. fileContentHash ]
let sinkFileDB = unless inDB $ repsert (FileContentKey fileContentHash) FileContent{..}
let sinkFileDB = unless inDB $ repsert (FileContentKey fileContentHash) FileContent{ fileContentUnreferencedSince = Nothing, .. }
maybeT sinkFileDB $ do
let uploadName = decodeUtf8 . Base64.encodeUnpadded $ ByteArray.convert fileContentHash
uploadBucket <- getsYesod $ views appSettings appUploadCacheBucket

View File

@ -3,6 +3,8 @@ module Utils.Sql
) where
import ClassyPrelude.Yesod
import Numeric.Natural
import Settings.Log
import Database.PostgreSQL.Simple (SqlError)
import Database.PostgreSQL.Simple.Errors (isSerializationError)
@ -16,23 +18,27 @@ import Control.Retry
import Control.Lens ((&))
setSerializable :: forall m a. (MonadLogger m, MonadMask m, MonadIO m) => ReaderT SqlBackend m a -> ReaderT SqlBackend m a
setSerializable :: forall m a. (MonadLogger m, MonadMask m, MonadIO m, ReadLogSettings (SqlPersistT m)) => SqlPersistT m a -> SqlPersistT m a
setSerializable = setSerializable' $ fullJitterBackoff 1e3 & limitRetriesByCumulativeDelay 10e6
setSerializable' :: forall m a. (MonadLogger m, MonadMask m, MonadIO m) => RetryPolicyM (ReaderT SqlBackend m) -> ReaderT SqlBackend m a -> ReaderT SqlBackend m a
setSerializable' :: forall m a. (MonadLogger m, MonadMask m, MonadIO m, ReadLogSettings (SqlPersistT m)) => RetryPolicyM (SqlPersistT m) -> SqlPersistT m a -> ReaderT SqlBackend m a
setSerializable' policy act = do
LogSettings{logSerializableTransactionRetryLimit} <- readLogSettings
didCommit <- newTVarIO False
recovering policy (skipAsyncExceptions `snoc` logRetries suggestRetry logRetry) $ act' didCommit
recovering policy (skipAsyncExceptions `snoc` logRetries suggestRetry (logRetry logSerializableTransactionRetryLimit)) $ act' didCommit
where
suggestRetry :: SqlError -> ReaderT SqlBackend m Bool
suggestRetry = return . isSerializationError
logRetry :: Bool -- ^ Will retry
logRetry :: Maybe Natural
-> Bool -- ^ Will retry
-> SqlError
-> RetryStatus
-> ReaderT SqlBackend m ()
logRetry shouldRetry@False err status = $logErrorS "SQL.setSerializable" . pack $ defaultLogMsg shouldRetry err status
logRetry shouldRetry@True err status = $logDebugS "SQL.setSerializable" . pack $ defaultLogMsg shouldRetry err status
logRetry _ shouldRetry@False err status = $logErrorS "SQL.setSerializable" . pack $ defaultLogMsg shouldRetry err status
logRetry (Just limit) shouldRetry err status
| fromIntegral limit >= rsIterNumber status = $logInfoS "SQL.setSerializable" . pack $ defaultLogMsg shouldRetry err status
logRetry _ shouldRetry err status = $logDebugS "SQL.setSerializable" . pack $ defaultLogMsg shouldRetry err status
act' :: TVar Bool -> RetryStatus -> ReaderT SqlBackend m a
act' didCommit RetryStatus{..} = do