From 33338cdfe94754759e4aa8cbf5ccd9f9fc939fa6 Mon Sep 17 00:00:00 2001 From: Gregor Kleen Date: Tue, 10 Sep 2019 09:41:52 +0200 Subject: [PATCH] feat(health): timeout all health checks --- config/settings.yml | 6 ++++++ src/Jobs/HealthReport.hs | 17 +++++++---------- src/Settings.hs | 12 ++++++++++++ src/Utils.hs | 21 +++++++++++++++++++++ 4 files changed, 46 insertions(+), 10 deletions(-) diff --git a/config/settings.yml b/config/settings.yml index e4568d03f..8eef1cb7b 100644 --- a/config/settings.yml +++ b/config/settings.yml @@ -39,7 +39,13 @@ health-check-interval: active-job-executors: "_env:HEALTHCHECK_INTERVAL_ACTIVE_JOB_EXECUTORS:60" health-check-delay-notify: "_env:HEALTHCHECK_DELAY_NOTIFY:true" health-check-http: "_env:HEALTHCHECK_HTTP:true" # Can we assume, that we can reach ourselves under APPROOT via HTTP (reverse proxies or firewalls might prevent this)? + health-check-active-job-executors-timeout: "_env:HEALTHCHECK_ACTIVE_JOB_EXECUTORS_TIMEOUT:5" +health-check-active-widget-memcached-timeout: "_env:HEALTHCHECK_ACTIVE_WIDGET_MEMCACHED_TIMEOUT:2" +health-check-smtp-connect-timeout: "_env:HEALTHCHECK_SMTP_CONNECT_TIMEOUT:5" +health-check-ldap-admins-timeout: "_env:HEALTHCHECK_LDAP_ADMINS_TIMEOUT:60" +health-check-http-reachable-timeout: "_env:HEALTHCHECK_HTTP_REACHABLE_TIMEOUT:2" +health-check-matching-cluster-config-timeout: "_env:HEALTHCHECK_MATCHING_CLUSTER_CONFIG_TIMEOUT:2" synchronise-ldap-users-within: "_env:SYNCHRONISE_LDAP_WITHIN:1209600" synchronise-ldap-users-interval: "_env:SYNCHRONISE_LDAP_INTERVAL:3600" diff --git a/src/Jobs/HealthReport.hs b/src/Jobs/HealthReport.hs index 157934cf1..a6da601c4 100644 --- a/src/Jobs/HealthReport.hs +++ b/src/Jobs/HealthReport.hs @@ -26,8 +26,6 @@ import qualified Data.CaseInsensitive as CI import qualified Network.HaskellNet.SMTP as SMTP import Data.Pool (withResource) -import System.Timeout - import Jobs.Queue import Control.Concurrent.Async.Lifted.Safe (forConcurrently) @@ -35,11 +33,12 @@ import Control.Concurrent.Async.Lifted.Safe (forConcurrently) generateHealthReport :: HealthCheck -> Handler HealthReport generateHealthReport = $(dispatchTH ''HealthCheck) + dispatchHealthCheckMatchingClusterConfig :: Handler HealthReport -- ^ Can the cluster configuration be read from the database and does it match our configuration? dispatchHealthCheckMatchingClusterConfig - = fmap HealthMatchingClusterConfig . runDB $ and <$> forM universeF clusterSettingMatches + = fmap HealthMatchingClusterConfig . yesodTimeout (^. _appHealthCheckMatchingClusterConfigTimeout) False . runDB $ and <$> forM universeF clusterSettingMatches where clusterSettingMatches ClusterCryptoIDKey = do ourSetting <- getsYesod appCryptoIDKey @@ -75,7 +74,7 @@ dispatchHealthCheckMatchingClusterConfig dispatchHealthCheckHTTPReachable :: Handler HealthReport -dispatchHealthCheckHTTPReachable = HealthHTTPReachable <$> do +dispatchHealthCheckHTTPReachable = fmap HealthHTTPReachable . yesodTimeout (^. _appHealthCheckHTTPReachableTimeout) (Just False) $ do staticAppRoot <- getsYesod $ view _appRoot doHTTP <- getsYesod $ view _appHealthCheckHTTP for (staticAppRoot <* guard doHTTP) $ \_ -> do @@ -89,7 +88,7 @@ dispatchHealthCheckHTTPReachable = HealthHTTPReachable <$> do dispatchHealthCheckLDAPAdmins :: Handler HealthReport -dispatchHealthCheckLDAPAdmins = HealthLDAPAdmins <$> do +dispatchHealthCheckLDAPAdmins = fmap HealthLDAPAdmins . yesodTimeout (^. _appHealthCheckLDAPAdminsTimeout) (Just 0) $ do ldapPool' <- getsYesod appLdapPool ldapConf' <- getsYesod $ view _appLdapConf ldapAdminUsers <- fmap (map E.unValue) . runDB . E.select . E.from $ \(user `E.InnerJoin` lecturer) -> E.distinctOnOrderBy [E.asc $ user E.^. UserId] $ do @@ -110,7 +109,7 @@ dispatchHealthCheckLDAPAdmins = HealthLDAPAdmins <$> do dispatchHealthCheckSMTPConnect :: Handler HealthReport -dispatchHealthCheckSMTPConnect = HealthSMTPConnect <$> do +dispatchHealthCheckSMTPConnect = fmap HealthSMTPConnect . yesodTimeout (^. _appHealthCheckSMTPConnectTimeout) (Just False) $ do smtpPool <- getsYesod appSmtpPool for smtpPool . flip withResource $ \smtpConn -> do response@(rCode, _) <- liftIO $ SMTP.sendCommand smtpConn SMTP.NOOP @@ -122,7 +121,7 @@ dispatchHealthCheckSMTPConnect = HealthSMTPConnect <$> do dispatchHealthCheckWidgetMemcached :: Handler HealthReport -dispatchHealthCheckWidgetMemcached = HealthWidgetMemcached <$> do +dispatchHealthCheckWidgetMemcached = fmap HealthWidgetMemcached . yesodTimeout (^. _appHealthCheckActiveWidgetMemcachedTimeout) (Just False) $ do memcachedConn <- getsYesod appWidgetMemcached for memcachedConn $ \_memcachedConn' -> do let ext = "bin" @@ -155,11 +154,9 @@ dispatchHealthCheckActiveJobExecutors = HealthActiveJobExecutors <$> do tid <- liftIO myThreadId let workers' = Map.fromSet jobWorkerName (Map.keysSet jobWorkers) workers = Map.filterWithKey (\a _ -> asyncThreadId a /= tid) workers' - timeoutMicro = let (MkFixed micro :: Micro) = realToFrac timeoutLength - in fromInteger micro $logDebugS "HealthCheckActiveJobExecutors" . tshow . map showWorkerId $ Map.elems workers' responders <- fmap (getSum . fold) . liftIO . forConcurrently (Map.toList workers) $ \(_, wName) - -> fromMaybe (Sum 0) <$> timeout timeoutMicro (runReaderT ?? app $ Sum 1 <$ writeJobCtlBlock' (writeJobCtl' wName) JobCtlTest) + -> diffTimeout timeoutLength (Sum 0) (runReaderT ?? app $ Sum 1 <$ writeJobCtlBlock' (writeJobCtl' wName) JobCtlTest) if | Map.null workers -> return Nothing | otherwise -> return . Just $ responders % fromIntegral (Map.size workers) diff --git a/src/Settings.hs b/src/Settings.hs index 0874d2b50..922749148 100644 --- a/src/Settings.hs +++ b/src/Settings.hs @@ -116,7 +116,13 @@ data AppSettings = AppSettings , appHealthCheckInterval :: HealthCheck -> Maybe NominalDiffTime , appHealthCheckDelayNotify :: Bool , appHealthCheckHTTP :: Bool + , appHealthCheckActiveJobExecutorsTimeout :: NominalDiffTime + , appHealthCheckActiveWidgetMemcachedTimeout :: NominalDiffTime + , appHealthCheckSMTPConnectTimeout :: NominalDiffTime + , appHealthCheckLDAPAdminsTimeout :: NominalDiffTime + , appHealthCheckHTTPReachableTimeout :: NominalDiffTime + , appHealthCheckMatchingClusterConfigTimeout :: NominalDiffTime , appSynchroniseLdapUsersWithin :: Maybe NominalDiffTime , appSynchroniseLdapUsersInterval :: NominalDiffTime @@ -398,7 +404,13 @@ instance FromJSON AppSettings where appHealthCheckInterval <- (assertM' (> 0) . ) <$> o .: "health-check-interval" appHealthCheckDelayNotify <- o .: "health-check-delay-notify" appHealthCheckHTTP <- o .: "health-check-http" + appHealthCheckActiveJobExecutorsTimeout <- o .: "health-check-active-job-executors-timeout" + appHealthCheckActiveWidgetMemcachedTimeout <- o .: "health-check-active-widget-memcached-timeout" + appHealthCheckSMTPConnectTimeout <- o .: "health-check-smtp-connect-timeout" + appHealthCheckLDAPAdminsTimeout <- o .: "health-check-ldap-admins-timeout" + appHealthCheckHTTPReachableTimeout <- o .: "health-check-http-reachable-timeout" + appHealthCheckMatchingClusterConfigTimeout <- o .: "health-check-matching-cluster-config-timeout" appSessionTimeout <- o .: "session-timeout" diff --git a/src/Utils.hs b/src/Utils.hs index fa6611f35..8e7cd2de8 100644 --- a/src/Utils.hs +++ b/src/Utils.hs @@ -52,6 +52,7 @@ import Control.Monad.Trans.Except (ExceptT(..), throwE, runExceptT) import Control.Monad.Except (MonadError(..)) import Control.Monad.Trans.Maybe as Utils (MaybeT(..)) import Control.Monad.Catch (catchIf) +import System.Timeout.Lifted (timeout) import Language.Haskell.TH import Language.Haskell.TH.Instances () @@ -682,6 +683,26 @@ mconcatForM = flip mconcatMapM findM :: (Monad m, Foldable f) => (a -> MaybeT m b) -> f a -> m (Maybe b) findM f = runMaybeT . Fold.foldr (\x as -> f x <|> as) mzero + +yesodTimeout :: ( MonadHandler m + , MonadBaseControl IO m + ) + => (HandlerSite m -> NominalDiffTime) -- ^ Calculate timeout + -> a -- ^ Default value + -> m a -- ^ Computation + -> m a -- ^ Result of computation or default value, if timeout is reached +yesodTimeout getTimeout timeoutRes act = do + timeoutLength <- getsYesod getTimeout + diffTimeout timeoutLength timeoutRes act + +diffTimeout :: MonadBaseControl IO m + => NominalDiffTime -> a -> m a -> m a +diffTimeout timeoutLength timeoutRes act = fromMaybe timeoutRes <$> timeout timeoutMicro act + where + timeoutMicro + = let (MkFixed micro :: Micro) = realToFrac timeoutLength + in fromInteger micro + ------------- -- Conduit -- -------------