diff --git a/config/settings.yml b/config/settings.yml index 8f21d7277..aea998f4b 100644 --- a/config/settings.yml +++ b/config/settings.yml @@ -36,6 +36,7 @@ bearer-encoding: HS256 maximum-content-length: "_env:MAX_UPLOAD_SIZE:134217728" session-files-expire: 3600 prune-unreferenced-files: 600 +keep-unreferenced-files: 86400 health-check-interval: matching-cluster-config: "_env:HEALTHCHECK_INTERVAL_MATCHING_CLUSTER_CONFIG:600" http-reachable: "_env:HEALTHCHECK_INTERVAL_HTTP_REACHABLE:600" @@ -61,6 +62,7 @@ log-settings: all: "_env:LOG_ALL:false" minimum-level: "_env:LOGLEVEL:warn" destination: "_env:LOGDEST:stderr" + serializable-transaction-retry-limit: 2 ip-retention-time: 1209600 diff --git a/models/files.model b/models/files.model index fcf0b3809..428331b36 100644 --- a/models/files.model +++ b/models/files.model @@ -1,7 +1,8 @@ FileContent - hash FileContentReference - content ByteString - Primary hash + hash FileContentReference + content ByteString + unreferencedSince UTCTime Maybe + Primary hash SessionFile content FileContentReference Maybe @@ -10,4 +11,4 @@ SessionFile FileLock content FileContentReference instance InstanceId - time UTCTime \ No newline at end of file + time UTCTime diff --git a/src/Control/Monad/Trans/Memo/StateCache/Instances.hs b/src/Control/Monad/Trans/Memo/StateCache/Instances.hs new file mode 100644 index 000000000..e885eb655 --- /dev/null +++ b/src/Control/Monad/Trans/Memo/StateCache/Instances.hs @@ -0,0 +1,50 @@ +{-# OPTIONS_GHC -fno-warn-orphans #-} + +module Control.Monad.Trans.Memo.StateCache.Instances + ( hoistStateCache + ) where + +import ClassyPrelude hiding (handle) +import Yesod.Core +import Control.Monad.Logger (MonadLoggerIO) +import Control.Monad.Trans.Memo.StateCache +import Control.Monad.Catch + + +instance MonadResource m => MonadResource (StateCache c m) where + liftResourceT = lift . liftResourceT + +instance MonadLogger m => MonadLogger (StateCache c m) +instance MonadLoggerIO m => MonadLoggerIO (StateCache c m) + +instance MonadHandler m => MonadHandler (StateCache c m) where + type HandlerSite (StateCache c m) = HandlerSite m + type SubHandlerSite (StateCache c m) = SubHandlerSite m + + liftHandler = lift . liftHandler + liftSubHandler = lift . liftSubHandler + +instance MonadWidget m => MonadWidget (StateCache c m) where + liftWidget = lift . liftWidget + +instance MonadThrow m => MonadThrow (StateCache c m) where + throwM = lift . throwM + +-- | Rolls back modifications to state in failing section +instance MonadCatch m => MonadCatch (StateCache c m) where + catch m h = do + s <- container + (x, s') <- lift . handle (flip runStateCache s . h) $ runStateCache m s + x <$ setContainer s' + +hoistStateCache :: forall m n c b. + Monad n + => (forall a. m a -> n a) + -> (StateCache c m b -> StateCache c n b) +-- ^ Morally identical to `Control.Monad.Morph.hoist` +-- +-- Due to limited exports from `Control.Monad.Trans.Memo.StateCache` we incur a @Monad n@ constraint which `Control.Monad.Morph.hoist` does not account for +hoistStateCache nat m = do + s <- container + (x, s') <- lift . nat $ runStateCache m s + x <$ setContainer s' diff --git a/src/Database/Esqueleto/Utils.hs b/src/Database/Esqueleto/Utils.hs index 026f3e79e..8828d9d4f 100644 --- a/src/Database/Esqueleto/Utils.hs +++ b/src/Database/Esqueleto/Utils.hs @@ -18,6 +18,8 @@ module Database.Esqueleto.Utils , SqlHashable , sha256 , maybe, unsafeCoalesce + , bool + , max, min , SqlProject(..) , (->.) , fromSqlKey @@ -27,7 +29,7 @@ module Database.Esqueleto.Utils ) where -import ClassyPrelude.Yesod hiding (isInfixOf, any, all, or, and, isJust, maybe) +import ClassyPrelude.Yesod hiding (isInfixOf, any, all, or, and, isJust, maybe, bool, max, min) import Data.Universe import qualified Data.Set as Set import qualified Data.List as List @@ -241,6 +243,26 @@ maybe onNothing onJust val = E.case_ ] (E.else_ onNothing) +bool :: PersistField a + => E.SqlExpr (E.Value a) + -> E.SqlExpr (E.Value a) + -> E.SqlExpr (E.Value Bool) + -> E.SqlExpr (E.Value a) +bool onFalse onTrue val = E.case_ + [ E.when_ + val + E.then_ + onTrue + ] + (E.else_ onFalse) + +max, min :: PersistField a + => E.SqlExpr (E.Value a) + -> E.SqlExpr (E.Value a) + -> E.SqlExpr (E.Value a) +max a b = bool a b $ b E.>. a +min a b = bool a b $ b E.<. a + unsafeCoalesce :: E.PersistField a => [E.SqlExpr (E.Value (Maybe a))] -> E.SqlExpr (E.Value a) unsafeCoalesce = E.veryUnsafeCoerceSqlExprValue . E.coalesce diff --git a/src/Foundation/Type.hs b/src/Foundation/Type.hs index 44e3cdd77..ee96ec211 100644 --- a/src/Foundation/Type.hs +++ b/src/Foundation/Type.hs @@ -1,3 +1,6 @@ +{-# OPTIONS_GHC -fno-warn-orphans #-} +{-# LANGUAGE UndecidableInstances #-} + module Foundation.Type ( UniWorX(..) , SomeSessionStorage(..) @@ -68,3 +71,6 @@ instance HasAppSettings UniWorX where appSettings = _appSettings' instance HasCookieSettings RegisteredCookie UniWorX where getCookieSettings = appCookieSettings . appSettings' + +instance (MonadHandler m, HandlerSite m ~ UniWorX) => ReadLogSettings m where + readLogSettings = liftIO . readTVarIO =<< getsYesod (view _appLogSettings) diff --git a/src/Import/NoModel.hs b/src/Import/NoModel.hs index 25c1330b5..d7a71dce2 100644 --- a/src/Import/NoModel.hs +++ b/src/Import/NoModel.hs @@ -165,6 +165,7 @@ import Network.HTTP.Types.Method.Instances as Import () import Crypto.Random.Instances as Import () import Network.Minio.Instances as Import () import System.Clock.Instances as Import () +import Control.Monad.Trans.Memo.StateCache.Instances as Import (hoistStateCache) import Crypto.Hash as Import (Digest, SHA3_256, SHA3_512) import Crypto.Random as Import (ChaChaDRG, Seed) diff --git a/src/Jobs/Handler/Files.hs b/src/Jobs/Handler/Files.hs index 7b144ae05..0d4774ea9 100644 --- a/src/Jobs/Handler/Files.hs +++ b/src/Jobs/Handler/Files.hs @@ -20,6 +20,8 @@ import qualified Network.Minio as Minio import qualified Crypto.Hash as Crypto import qualified Data.ByteString.Base64.URL as Base64 +import Control.Monad.Memo (startEvalMemoT, memo) + dispatchJobPruneSessionFiles :: JobHandler UniWorX dispatchJobPruneSessionFiles = JobHandlerAtomic . hoist lift $ do @@ -47,26 +49,36 @@ fileReferences (E.just -> fHash) dispatchJobPruneUnreferencedFiles :: JobHandler UniWorX dispatchJobPruneUnreferencedFiles = JobHandlerAtomic . hoist lift $ do - interval <- getsYesod $ view _appPruneUnreferencedFiles - Sum n <- runConduit $ getCandidates - .| maybe (C.map id) (takeWhileTime . (/ 2)) interval - .| persistentTokenBucketTakeC' TokenBucketPruneFiles (view $ _2 . _Value :: _ -> Word64) - .| C.map (view $ _1 . _Value) - .| C.mapM (\fRef -> Sum <$> deleteWhereCount [FileContentHash ==. fRef]) - .| C.fold - $logInfoS "PruneUnreferencedFiles" [st|Deleted #{n} unreferenced files|] - where + now <- liftIO getCurrentTime + interval <- fmap (fmap $ max 0) . getsYesod $ view _appPruneUnreferencedFiles + keep <- fmap (max 0) . getsYesod $ view _appKeepUnreferencedFiles + + E.update $ \fileContent -> do + let isReferenced = E.any E.exists . fileReferences $ fileContent E.^. FileContentHash + E.set fileContent [ FileContentUnreferencedSince E.=. E.bool (E.just . E.maybe (E.val now) (E.min $ E.val now) $ fileContent E.^. FileContentUnreferencedSince) E.nothing isReferenced ] + + let getCandidates = E.selectSource . E.from $ \fileContent -> do - E.where_ . E.not_ . E.any E.exists $ fileReferences (fileContent E.^. FileContentHash) + E.where_ . E.maybe E.false (E.<. E.val (addUTCTime (-keep) now)) $ fileContent E.^. FileContentUnreferencedSince return ( fileContent E.^. FileContentHash , E.length_ $ fileContent E.^. FileContentContent ) + + Sum deleted <- runConduit $ + getCandidates + .| maybe (C.map id) (takeWhileTime . (/ 2)) interval + .| persistentTokenBucketTakeC' TokenBucketPruneFiles (view $ _2 . _Value :: _ -> Word64) + .| C.map (view $ _1 . _Value) + .| C.mapM (\fRef -> Sum <$> deleteWhereCount [FileContentHash ==. fRef]) + .| C.fold + $logInfoS "PruneUnreferencedFiles" [st|Deleted #{deleted} long-unreferenced files|] dispatchJobInjectFiles :: JobHandler UniWorX dispatchJobInjectFiles = JobHandlerException . maybeT (return ()) $ do uploadBucket <- getsYesod $ view _appUploadCacheBucket interval <- getsYesod $ view _appInjectFiles + now <- liftIO getCurrentTime let extractReference (Minio.ListItemObject oi) @@ -78,14 +90,17 @@ dispatchJobInjectFiles = JobHandlerException . maybeT (return ()) $ do injectOrDelete :: (Minio.Object, FileContentReference) -> Handler (Sum Int64, Sum Int64) -- ^ Injected, Already existed injectOrDelete (obj, fRef) = maybeT (return mempty) $ do - res <- hoist (runDB . setSerializable) $ do - alreadyInjected <- lift $ exists [ FileContentHash ==. fRef ] + res <- hoist (startEvalMemoT . hoistStateCache (runDB . setSerializable)) $ do + alreadyInjected <- lift . lift $ exists [ FileContentHash ==. fRef ] if | alreadyInjected -> return (mempty, Sum 1) | otherwise -> do - content <- (hoistMaybe =<<) . runAppMinio . runMaybeT $ do - objRes <- catchIfMaybeT minioIsDoesNotExist $ Minio.getObject uploadBucket obj Minio.defaultGetObjectOptions + content <- flip memo obj $ \obj' -> hoistMaybeM . runAppMinio . runMaybeT $ do + objRes <- catchIfMaybeT minioIsDoesNotExist $ Minio.getObject uploadBucket obj' Minio.defaultGetObjectOptions lift . runConduit $ Minio.gorObjectStream objRes .| C.fold - lift $ (Sum 1, mempty) <$ insertUnique (FileContent fRef content) + + fmap ((, mempty) . Sum) . lift. lift . E.insertSelectCount $ + let isReferenced = E.any E.exists $ fileReferences (E.val fRef) + in return $ FileContent E.<# E.val fRef E.<&> E.val content E.<&> E.bool (E.just $ E.val now) E.nothing isReferenced runAppMinio . maybeT (return ()) . catchIfMaybeT minioIsDoesNotExist $ Minio.removeObject uploadBucket obj return res @@ -99,6 +114,6 @@ dispatchJobInjectFiles = JobHandlerException . maybeT (return ()) $ do .| C.fold when (exc > 0) $ - $logInfoS "InjectFiles" [st|Deleted #{exc} files from upload cache because they were already referenced|] + $logInfoS "InjectFiles" [st|Deleted #{exc} files from upload cache because they were already injected|] when (inj > 0) $ $logInfoS "InjectFiles" [st|Injected #{inj} files from upload cache into database|] diff --git a/src/Settings.hs b/src/Settings.hs index 8a3995342..490d8076c 100644 --- a/src/Settings.hs +++ b/src/Settings.hs @@ -11,6 +11,7 @@ module Settings , module Settings.Cluster , module Settings.Mime , module Settings.Cookies + , module Settings.Log ) where import Import.NoModel @@ -53,6 +54,7 @@ import Model import Settings.Cluster import Settings.Mime import Settings.Cookies +import Settings.Log import qualified System.FilePath as FilePath @@ -139,6 +141,7 @@ data AppSettings = AppSettings , appSessionFilesExpire :: NominalDiffTime , appPruneUnreferencedFiles :: Maybe NominalDiffTime + , appKeepUnreferencedFiles :: NominalDiffTime , appInitialLogSettings :: LogSettings @@ -188,23 +191,6 @@ newtype ServerSessionSettings instance Show ServerSessionSettings where showsPrec d _ = showParen (d > 10) $ showString "ServerSessionSettings _" -data LogSettings = LogSettings - { logAll, logDetailed :: Bool - , logMinimumLevel :: LogLevel - , logDestination :: LogDestination - } deriving (Show, Read, Generic, Eq, Ord) - -data LogDestination = LogDestStderr | LogDestStdout | LogDestFile { logDestFile :: !FilePath } - deriving (Show, Read, Generic, Eq, Ord) - -deriving instance Generic LogLevel -instance Hashable LogLevel -instance NFData LogLevel -instance Hashable LogSettings -instance NFData LogSettings -instance Hashable LogDestination -instance NFData LogDestination - data UserDefaultConf = UserDefaultConf { userDefaultTheme :: Theme , userDefaultMaxFavourites, userDefaultMaxFavouriteTerms :: Int @@ -306,17 +292,6 @@ deriveJSON defaultOptions { fieldLabelModifier = camelToPathPiece' 2 } ''TokenBucketConf -deriveJSON defaultOptions - { constructorTagModifier = camelToPathPiece' 2 - , fieldLabelModifier = camelToPathPiece' 2 - , sumEncoding = UntaggedValue - , unwrapUnaryRecords = True - } ''LogDestination - -deriveJSON defaultOptions - { fieldLabelModifier = camelToPathPiece' 1 - } ''LogSettings - deriveFromJSON defaultOptions ''Ldap.Scope deriveFromJSON defaultOptions { fieldLabelModifier = camelToPathPiece' 2 @@ -349,13 +324,6 @@ deriveFromJSON } ''ResourcePoolConf -deriveJSON - defaultOptions - { constructorTagModifier = camelToPathPiece' 1 - , sumEncoding = UntaggedValue - } - ''LogLevel - instance FromJSON HaskellNet.PortNumber where parseJSON = withScientific "PortNumber" $ \sciNum -> case toBoundedInteger sciNum of Just int -> return $ fromIntegral (int :: Word16) @@ -502,6 +470,7 @@ instance FromJSON AppSettings where appSessionFilesExpire <- o .: "session-files-expire" appPruneUnreferencedFiles <- o .:? "prune-unreferenced-files" + appKeepUnreferencedFiles <- o .:? "keep-unreferenced-files" .!= 0 appInjectFiles <- o .:? "inject-files" appMaximumContentLength <- o .: "maximum-content-length" diff --git a/src/Settings/Log.hs b/src/Settings/Log.hs new file mode 100644 index 000000000..112519e41 --- /dev/null +++ b/src/Settings/Log.hs @@ -0,0 +1,52 @@ +{-# OPTIONS_GHC -fno-warn-orphans #-} + +module Settings.Log + ( LogSettings(..) + , LogDestination(..) + , LogLevel(..) + , ReadLogSettings(..) + ) where + +import ClassyPrelude.Yesod +import Numeric.Natural + +import Data.Aeson.TH +import Utils.PathPiece + + +data LogSettings = LogSettings + { logAll, logDetailed :: Bool + , logMinimumLevel :: LogLevel + , logDestination :: LogDestination + , logSerializableTransactionRetryLimit :: Maybe Natural + } deriving (Show, Read, Generic, Eq, Ord) + +data LogDestination = LogDestStderr | LogDestStdout | LogDestFile { logDestFile :: !FilePath } + deriving (Show, Read, Generic, Eq, Ord) + +deriving instance Generic LogLevel +instance Hashable LogLevel +instance NFData LogLevel +instance Hashable LogSettings +instance NFData LogSettings +instance Hashable LogDestination +instance NFData LogDestination + +deriveJSON defaultOptions + { constructorTagModifier = camelToPathPiece' 1 + , sumEncoding = UntaggedValue + } ''LogLevel + +deriveJSON defaultOptions + { constructorTagModifier = camelToPathPiece' 2 + , fieldLabelModifier = camelToPathPiece' 2 + , sumEncoding = UntaggedValue + , unwrapUnaryRecords = True + } ''LogDestination + +deriveJSON defaultOptions + { fieldLabelModifier = camelToPathPiece' 1 + } ''LogSettings + +class ReadLogSettings m where + readLogSettings :: m LogSettings diff --git a/src/Utils.hs b/src/Utils.hs index 3181e52d5..0baeee670 100644 --- a/src/Utils.hs +++ b/src/Utils.hs @@ -569,6 +569,9 @@ hoistMaybe :: MonadPlus m => Maybe a -> m a -- ^ `hoist` regarding `Maybe` as if identical to @MaybeT Identity@ hoistMaybe = maybe mzero return +hoistMaybeM :: MonadPlus m => m (Maybe a) -> m a +hoistMaybeM = (=<<) hoistMaybe + catchIfMaybeT :: (MonadCatch m, Exception e) => (e -> Bool) -> m a -> MaybeT m a catchIfMaybeT p act = catchIf p (lift act) (const mzero) diff --git a/src/Utils/Files.hs b/src/Utils/Files.hs index 517c36034..8ccf64b13 100644 --- a/src/Utils/Files.hs +++ b/src/Utils/Files.hs @@ -47,7 +47,7 @@ sinkFile File{ fileContent = Just fileContentContent, .. } = do inDB <- exists [ FileContentHash ==. fileContentHash ] - let sinkFileDB = unless inDB $ repsert (FileContentKey fileContentHash) FileContent{..} + let sinkFileDB = unless inDB $ repsert (FileContentKey fileContentHash) FileContent{ fileContentUnreferencedSince = Nothing, .. } maybeT sinkFileDB $ do let uploadName = decodeUtf8 . Base64.encodeUnpadded $ ByteArray.convert fileContentHash uploadBucket <- getsYesod $ views appSettings appUploadCacheBucket diff --git a/src/Utils/Sql.hs b/src/Utils/Sql.hs index 47f90a449..eeb11c537 100644 --- a/src/Utils/Sql.hs +++ b/src/Utils/Sql.hs @@ -3,6 +3,8 @@ module Utils.Sql ) where import ClassyPrelude.Yesod +import Numeric.Natural +import Settings.Log import Database.PostgreSQL.Simple (SqlError) import Database.PostgreSQL.Simple.Errors (isSerializationError) @@ -16,23 +18,27 @@ import Control.Retry import Control.Lens ((&)) -setSerializable :: forall m a. (MonadLogger m, MonadMask m, MonadIO m) => ReaderT SqlBackend m a -> ReaderT SqlBackend m a +setSerializable :: forall m a. (MonadLogger m, MonadMask m, MonadIO m, ReadLogSettings (SqlPersistT m)) => SqlPersistT m a -> SqlPersistT m a setSerializable = setSerializable' $ fullJitterBackoff 1e3 & limitRetriesByCumulativeDelay 10e6 -setSerializable' :: forall m a. (MonadLogger m, MonadMask m, MonadIO m) => RetryPolicyM (ReaderT SqlBackend m) -> ReaderT SqlBackend m a -> ReaderT SqlBackend m a +setSerializable' :: forall m a. (MonadLogger m, MonadMask m, MonadIO m, ReadLogSettings (SqlPersistT m)) => RetryPolicyM (SqlPersistT m) -> SqlPersistT m a -> ReaderT SqlBackend m a setSerializable' policy act = do + LogSettings{logSerializableTransactionRetryLimit} <- readLogSettings didCommit <- newTVarIO False - recovering policy (skipAsyncExceptions `snoc` logRetries suggestRetry logRetry) $ act' didCommit + recovering policy (skipAsyncExceptions `snoc` logRetries suggestRetry (logRetry logSerializableTransactionRetryLimit)) $ act' didCommit where suggestRetry :: SqlError -> ReaderT SqlBackend m Bool suggestRetry = return . isSerializationError - logRetry :: Bool -- ^ Will retry + logRetry :: Maybe Natural + -> Bool -- ^ Will retry -> SqlError -> RetryStatus -> ReaderT SqlBackend m () - logRetry shouldRetry@False err status = $logErrorS "SQL.setSerializable" . pack $ defaultLogMsg shouldRetry err status - logRetry shouldRetry@True err status = $logDebugS "SQL.setSerializable" . pack $ defaultLogMsg shouldRetry err status + logRetry _ shouldRetry@False err status = $logErrorS "SQL.setSerializable" . pack $ defaultLogMsg shouldRetry err status + logRetry (Just limit) shouldRetry err status + | fromIntegral limit >= rsIterNumber status = $logInfoS "SQL.setSerializable" . pack $ defaultLogMsg shouldRetry err status + logRetry _ shouldRetry err status = $logDebugS "SQL.setSerializable" . pack $ defaultLogMsg shouldRetry err status act' :: TVar Bool -> RetryStatus -> ReaderT SqlBackend m a act' didCommit RetryStatus{..} = do