feat(health): timeout all health checks

This commit is contained in:
Gregor Kleen 2019-09-10 09:41:52 +02:00
parent 5ee6d33325
commit 33338cdfe9
4 changed files with 46 additions and 10 deletions

View File

@ -39,7 +39,13 @@ health-check-interval:
active-job-executors: "_env:HEALTHCHECK_INTERVAL_ACTIVE_JOB_EXECUTORS:60" active-job-executors: "_env:HEALTHCHECK_INTERVAL_ACTIVE_JOB_EXECUTORS:60"
health-check-delay-notify: "_env:HEALTHCHECK_DELAY_NOTIFY:true" health-check-delay-notify: "_env:HEALTHCHECK_DELAY_NOTIFY:true"
health-check-http: "_env:HEALTHCHECK_HTTP:true" # Can we assume, that we can reach ourselves under APPROOT via HTTP (reverse proxies or firewalls might prevent this)? health-check-http: "_env:HEALTHCHECK_HTTP:true" # Can we assume, that we can reach ourselves under APPROOT via HTTP (reverse proxies or firewalls might prevent this)?
health-check-active-job-executors-timeout: "_env:HEALTHCHECK_ACTIVE_JOB_EXECUTORS_TIMEOUT:5" health-check-active-job-executors-timeout: "_env:HEALTHCHECK_ACTIVE_JOB_EXECUTORS_TIMEOUT:5"
health-check-active-widget-memcached-timeout: "_env:HEALTHCHECK_ACTIVE_WIDGET_MEMCACHED_TIMEOUT:2"
health-check-smtp-connect-timeout: "_env:HEALTHCHECK_SMTP_CONNECT_TIMEOUT:5"
health-check-ldap-admins-timeout: "_env:HEALTHCHECK_LDAP_ADMINS_TIMEOUT:60"
health-check-http-reachable-timeout: "_env:HEALTHCHECK_HTTP_REACHABLE_TIMEOUT:2"
health-check-matching-cluster-config-timeout: "_env:HEALTHCHECK_MATCHING_CLUSTER_CONFIG_TIMEOUT:2"
synchronise-ldap-users-within: "_env:SYNCHRONISE_LDAP_WITHIN:1209600" synchronise-ldap-users-within: "_env:SYNCHRONISE_LDAP_WITHIN:1209600"
synchronise-ldap-users-interval: "_env:SYNCHRONISE_LDAP_INTERVAL:3600" synchronise-ldap-users-interval: "_env:SYNCHRONISE_LDAP_INTERVAL:3600"

View File

@ -26,8 +26,6 @@ import qualified Data.CaseInsensitive as CI
import qualified Network.HaskellNet.SMTP as SMTP import qualified Network.HaskellNet.SMTP as SMTP
import Data.Pool (withResource) import Data.Pool (withResource)
import System.Timeout
import Jobs.Queue import Jobs.Queue
import Control.Concurrent.Async.Lifted.Safe (forConcurrently) import Control.Concurrent.Async.Lifted.Safe (forConcurrently)
@ -35,11 +33,12 @@ import Control.Concurrent.Async.Lifted.Safe (forConcurrently)
generateHealthReport :: HealthCheck -> Handler HealthReport generateHealthReport :: HealthCheck -> Handler HealthReport
generateHealthReport = $(dispatchTH ''HealthCheck) generateHealthReport = $(dispatchTH ''HealthCheck)
dispatchHealthCheckMatchingClusterConfig :: Handler HealthReport dispatchHealthCheckMatchingClusterConfig :: Handler HealthReport
-- ^ Can the cluster configuration be read from the database and does it match our configuration? -- ^ Can the cluster configuration be read from the database and does it match our configuration?
dispatchHealthCheckMatchingClusterConfig dispatchHealthCheckMatchingClusterConfig
= fmap HealthMatchingClusterConfig . runDB $ and <$> forM universeF clusterSettingMatches = fmap HealthMatchingClusterConfig . yesodTimeout (^. _appHealthCheckMatchingClusterConfigTimeout) False . runDB $ and <$> forM universeF clusterSettingMatches
where where
clusterSettingMatches ClusterCryptoIDKey = do clusterSettingMatches ClusterCryptoIDKey = do
ourSetting <- getsYesod appCryptoIDKey ourSetting <- getsYesod appCryptoIDKey
@ -75,7 +74,7 @@ dispatchHealthCheckMatchingClusterConfig
dispatchHealthCheckHTTPReachable :: Handler HealthReport dispatchHealthCheckHTTPReachable :: Handler HealthReport
dispatchHealthCheckHTTPReachable = HealthHTTPReachable <$> do dispatchHealthCheckHTTPReachable = fmap HealthHTTPReachable . yesodTimeout (^. _appHealthCheckHTTPReachableTimeout) (Just False) $ do
staticAppRoot <- getsYesod $ view _appRoot staticAppRoot <- getsYesod $ view _appRoot
doHTTP <- getsYesod $ view _appHealthCheckHTTP doHTTP <- getsYesod $ view _appHealthCheckHTTP
for (staticAppRoot <* guard doHTTP) $ \_ -> do for (staticAppRoot <* guard doHTTP) $ \_ -> do
@ -89,7 +88,7 @@ dispatchHealthCheckHTTPReachable = HealthHTTPReachable <$> do
dispatchHealthCheckLDAPAdmins :: Handler HealthReport dispatchHealthCheckLDAPAdmins :: Handler HealthReport
dispatchHealthCheckLDAPAdmins = HealthLDAPAdmins <$> do dispatchHealthCheckLDAPAdmins = fmap HealthLDAPAdmins . yesodTimeout (^. _appHealthCheckLDAPAdminsTimeout) (Just 0) $ do
ldapPool' <- getsYesod appLdapPool ldapPool' <- getsYesod appLdapPool
ldapConf' <- getsYesod $ view _appLdapConf ldapConf' <- getsYesod $ view _appLdapConf
ldapAdminUsers <- fmap (map E.unValue) . runDB . E.select . E.from $ \(user `E.InnerJoin` lecturer) -> E.distinctOnOrderBy [E.asc $ user E.^. UserId] $ do ldapAdminUsers <- fmap (map E.unValue) . runDB . E.select . E.from $ \(user `E.InnerJoin` lecturer) -> E.distinctOnOrderBy [E.asc $ user E.^. UserId] $ do
@ -110,7 +109,7 @@ dispatchHealthCheckLDAPAdmins = HealthLDAPAdmins <$> do
dispatchHealthCheckSMTPConnect :: Handler HealthReport dispatchHealthCheckSMTPConnect :: Handler HealthReport
dispatchHealthCheckSMTPConnect = HealthSMTPConnect <$> do dispatchHealthCheckSMTPConnect = fmap HealthSMTPConnect . yesodTimeout (^. _appHealthCheckSMTPConnectTimeout) (Just False) $ do
smtpPool <- getsYesod appSmtpPool smtpPool <- getsYesod appSmtpPool
for smtpPool . flip withResource $ \smtpConn -> do for smtpPool . flip withResource $ \smtpConn -> do
response@(rCode, _) <- liftIO $ SMTP.sendCommand smtpConn SMTP.NOOP response@(rCode, _) <- liftIO $ SMTP.sendCommand smtpConn SMTP.NOOP
@ -122,7 +121,7 @@ dispatchHealthCheckSMTPConnect = HealthSMTPConnect <$> do
dispatchHealthCheckWidgetMemcached :: Handler HealthReport dispatchHealthCheckWidgetMemcached :: Handler HealthReport
dispatchHealthCheckWidgetMemcached = HealthWidgetMemcached <$> do dispatchHealthCheckWidgetMemcached = fmap HealthWidgetMemcached . yesodTimeout (^. _appHealthCheckActiveWidgetMemcachedTimeout) (Just False) $ do
memcachedConn <- getsYesod appWidgetMemcached memcachedConn <- getsYesod appWidgetMemcached
for memcachedConn $ \_memcachedConn' -> do for memcachedConn $ \_memcachedConn' -> do
let ext = "bin" let ext = "bin"
@ -155,11 +154,9 @@ dispatchHealthCheckActiveJobExecutors = HealthActiveJobExecutors <$> do
tid <- liftIO myThreadId tid <- liftIO myThreadId
let workers' = Map.fromSet jobWorkerName (Map.keysSet jobWorkers) let workers' = Map.fromSet jobWorkerName (Map.keysSet jobWorkers)
workers = Map.filterWithKey (\a _ -> asyncThreadId a /= tid) workers' workers = Map.filterWithKey (\a _ -> asyncThreadId a /= tid) workers'
timeoutMicro = let (MkFixed micro :: Micro) = realToFrac timeoutLength
in fromInteger micro
$logDebugS "HealthCheckActiveJobExecutors" . tshow . map showWorkerId $ Map.elems workers' $logDebugS "HealthCheckActiveJobExecutors" . tshow . map showWorkerId $ Map.elems workers'
responders <- fmap (getSum . fold) . liftIO . forConcurrently (Map.toList workers) $ \(_, wName) responders <- fmap (getSum . fold) . liftIO . forConcurrently (Map.toList workers) $ \(_, wName)
-> fromMaybe (Sum 0) <$> timeout timeoutMicro (runReaderT ?? app $ Sum 1 <$ writeJobCtlBlock' (writeJobCtl' wName) JobCtlTest) -> diffTimeout timeoutLength (Sum 0) (runReaderT ?? app $ Sum 1 <$ writeJobCtlBlock' (writeJobCtl' wName) JobCtlTest)
if if
| Map.null workers -> return Nothing | Map.null workers -> return Nothing
| otherwise -> return . Just $ responders % fromIntegral (Map.size workers) | otherwise -> return . Just $ responders % fromIntegral (Map.size workers)

View File

@ -116,7 +116,13 @@ data AppSettings = AppSettings
, appHealthCheckInterval :: HealthCheck -> Maybe NominalDiffTime , appHealthCheckInterval :: HealthCheck -> Maybe NominalDiffTime
, appHealthCheckDelayNotify :: Bool , appHealthCheckDelayNotify :: Bool
, appHealthCheckHTTP :: Bool , appHealthCheckHTTP :: Bool
, appHealthCheckActiveJobExecutorsTimeout :: NominalDiffTime , appHealthCheckActiveJobExecutorsTimeout :: NominalDiffTime
, appHealthCheckActiveWidgetMemcachedTimeout :: NominalDiffTime
, appHealthCheckSMTPConnectTimeout :: NominalDiffTime
, appHealthCheckLDAPAdminsTimeout :: NominalDiffTime
, appHealthCheckHTTPReachableTimeout :: NominalDiffTime
, appHealthCheckMatchingClusterConfigTimeout :: NominalDiffTime
, appSynchroniseLdapUsersWithin :: Maybe NominalDiffTime , appSynchroniseLdapUsersWithin :: Maybe NominalDiffTime
, appSynchroniseLdapUsersInterval :: NominalDiffTime , appSynchroniseLdapUsersInterval :: NominalDiffTime
@ -398,7 +404,13 @@ instance FromJSON AppSettings where
appHealthCheckInterval <- (assertM' (> 0) . ) <$> o .: "health-check-interval" appHealthCheckInterval <- (assertM' (> 0) . ) <$> o .: "health-check-interval"
appHealthCheckDelayNotify <- o .: "health-check-delay-notify" appHealthCheckDelayNotify <- o .: "health-check-delay-notify"
appHealthCheckHTTP <- o .: "health-check-http" appHealthCheckHTTP <- o .: "health-check-http"
appHealthCheckActiveJobExecutorsTimeout <- o .: "health-check-active-job-executors-timeout" appHealthCheckActiveJobExecutorsTimeout <- o .: "health-check-active-job-executors-timeout"
appHealthCheckActiveWidgetMemcachedTimeout <- o .: "health-check-active-widget-memcached-timeout"
appHealthCheckSMTPConnectTimeout <- o .: "health-check-smtp-connect-timeout"
appHealthCheckLDAPAdminsTimeout <- o .: "health-check-ldap-admins-timeout"
appHealthCheckHTTPReachableTimeout <- o .: "health-check-http-reachable-timeout"
appHealthCheckMatchingClusterConfigTimeout <- o .: "health-check-matching-cluster-config-timeout"
appSessionTimeout <- o .: "session-timeout" appSessionTimeout <- o .: "session-timeout"

View File

@ -52,6 +52,7 @@ import Control.Monad.Trans.Except (ExceptT(..), throwE, runExceptT)
import Control.Monad.Except (MonadError(..)) import Control.Monad.Except (MonadError(..))
import Control.Monad.Trans.Maybe as Utils (MaybeT(..)) import Control.Monad.Trans.Maybe as Utils (MaybeT(..))
import Control.Monad.Catch (catchIf) import Control.Monad.Catch (catchIf)
import System.Timeout.Lifted (timeout)
import Language.Haskell.TH import Language.Haskell.TH
import Language.Haskell.TH.Instances () import Language.Haskell.TH.Instances ()
@ -682,6 +683,26 @@ mconcatForM = flip mconcatMapM
findM :: (Monad m, Foldable f) => (a -> MaybeT m b) -> f a -> m (Maybe b) findM :: (Monad m, Foldable f) => (a -> MaybeT m b) -> f a -> m (Maybe b)
findM f = runMaybeT . Fold.foldr (\x as -> f x <|> as) mzero findM f = runMaybeT . Fold.foldr (\x as -> f x <|> as) mzero
yesodTimeout :: ( MonadHandler m
, MonadBaseControl IO m
)
=> (HandlerSite m -> NominalDiffTime) -- ^ Calculate timeout
-> a -- ^ Default value
-> m a -- ^ Computation
-> m a -- ^ Result of computation or default value, if timeout is reached
yesodTimeout getTimeout timeoutRes act = do
timeoutLength <- getsYesod getTimeout
diffTimeout timeoutLength timeoutRes act
diffTimeout :: MonadBaseControl IO m
=> NominalDiffTime -> a -> m a -> m a
diffTimeout timeoutLength timeoutRes act = fromMaybe timeoutRes <$> timeout timeoutMicro act
where
timeoutMicro
= let (MkFixed micro :: Micro) = realToFrac timeoutLength
in fromInteger micro
------------- -------------
-- Conduit -- -- Conduit --
------------- -------------