From e7a5162ec9560a20eaffdc24f143ab765f3f0238 Mon Sep 17 00:00:00 2001 From: Gregor Kleen Date: Thu, 16 Jul 2020 15:02:10 +0200 Subject: [PATCH] feat(db): automatic retry of database transactions upon system error BREAKING CHANGE: transactions need to be retryable, now --- src/Foundation.hs | 36 ++++++++++++++++++++++++++++++++++-- src/Jobs.hs | 5 ++++- 2 files changed, 38 insertions(+), 3 deletions(-) diff --git a/src/Foundation.hs b/src/Foundation.hs index 12ba55520..f1041367a 100644 --- a/src/Foundation.hs +++ b/src/Foundation.hs @@ -107,6 +107,9 @@ import Web.Cookie import Yesod.Core.Types (GHState(..), HandlerData(..), HandlerContents, RunHandlerEnv(rheSite, rheChild)) import Database.Persist.Sql (transactionUndo, SqlReadBackend(..)) +import qualified Control.Retry as Retry +import GHC.IO.Exception (IOErrorType(OtherError)) + -- | Convenient Type Synonyms: type DB = YesodDB UniWorX type Form x = Html -> MForm (HandlerFor UniWorX) (FormResult x, Widget) @@ -4564,10 +4567,37 @@ routeNormalizers = return newRoute +runSqlPoolRetry :: forall m a backend. + ( MonadUnliftIO m, BackendCompatible SqlBackend backend + , MonadLogger m, MonadMask m + ) + => ReaderT backend m a + -> Pool backend + -> m a +runSqlPoolRetry action pool = do + let policy = Retry.fullJitterBackoff 1e3 & Retry.limitRetriesByCumulativeDelay 10e6 + handlers = Retry.skipAsyncExceptions `snoc` Retry.logRetries suggestRetry logRetry + where suggestRetry :: IOException -> m Bool + suggestRetry ioExc = return $ + ioeGetErrorType ioExc == OtherError + && ioeGetLocation ioExc == "libpq" + logRetry :: forall e. + Exception e + => Bool -- ^ Will retry + -> e + -> Retry.RetryStatus + -> m () + logRetry shouldRetry@False err status = $logErrorS "runSqlPoolRetry" . pack $ Retry.defaultLogMsg shouldRetry err status + logRetry shouldRetry@True err status = $logWarnS "runSqlPoolRetry" . pack $ Retry.defaultLogMsg shouldRetry err status + + Retry.recovering policy handlers $ \Retry.RetryStatus{..} -> do + $logDebugS "runSqlPoolRetry" $ "rsIterNumber = " <> tshow rsIterNumber + runSqlPool action pool + runDBRead :: ReaderT SqlReadBackend Handler a -> Handler a runDBRead action = do $logDebugS "YesodPersist" "runDBRead" - runSqlPool (withReaderT SqlReadBackend action) =<< appConnPool <$> getYesod + runSqlPoolRetry (withReaderT SqlReadBackend action) =<< appConnPool <$> getYesod -- How to run database actions. instance YesodPersist UniWorX where @@ -4580,7 +4610,9 @@ instance YesodPersist UniWorX where let action' | dryRun = action <* transactionUndo | otherwise = action - runSqlPool action' =<< appConnPool <$> getYesod + + runSqlPoolRetry action' =<< appConnPool <$> getYesod + instance YesodPersistRunner UniWorX where getDBRunner = do (DBRunner{..}, cleanup) <- defaultGetDBRunner appConnPool diff --git a/src/Jobs.hs b/src/Jobs.hs index c97438c32..2833f6091 100644 --- a/src/Jobs.hs +++ b/src/Jobs.hs @@ -66,6 +66,8 @@ import Jobs.HealthReport import Control.Exception.Base (AsyncException) +import Type.Reflection (typeOf) + data JobQueueException = JInvalid QueuedJobId QueuedJob | JLocked QueuedJobId InstanceId UTCTime @@ -364,7 +366,8 @@ handleJobs' wNum = C.mapM_ $ \jctl -> withJobWorkerState wNum JobWorkerBusy $ do lift $ foldrM (\resVar -> bool (tryPutTMVar resVar res) $ return True) False (maybe [] NonEmpty.toList resVars) case res of Just err - | not sentRes -> $logErrorS logIdent $ tshow err + | not sentRes -> case err of + SomeException err' -> $logErrorS logIdent $ tshow (typeOf err') <> ": " <> pack (displayException err') _other -> return () where logIdent = mkLogIdent wNum