feat(health): timeout all health checks
This commit is contained in:
parent
5ee6d33325
commit
33338cdfe9
@ -39,7 +39,13 @@ health-check-interval:
|
|||||||
active-job-executors: "_env:HEALTHCHECK_INTERVAL_ACTIVE_JOB_EXECUTORS:60"
|
active-job-executors: "_env:HEALTHCHECK_INTERVAL_ACTIVE_JOB_EXECUTORS:60"
|
||||||
health-check-delay-notify: "_env:HEALTHCHECK_DELAY_NOTIFY:true"
|
health-check-delay-notify: "_env:HEALTHCHECK_DELAY_NOTIFY:true"
|
||||||
health-check-http: "_env:HEALTHCHECK_HTTP:true" # Can we assume, that we can reach ourselves under APPROOT via HTTP (reverse proxies or firewalls might prevent this)?
|
health-check-http: "_env:HEALTHCHECK_HTTP:true" # Can we assume, that we can reach ourselves under APPROOT via HTTP (reverse proxies or firewalls might prevent this)?
|
||||||
|
|
||||||
health-check-active-job-executors-timeout: "_env:HEALTHCHECK_ACTIVE_JOB_EXECUTORS_TIMEOUT:5"
|
health-check-active-job-executors-timeout: "_env:HEALTHCHECK_ACTIVE_JOB_EXECUTORS_TIMEOUT:5"
|
||||||
|
health-check-active-widget-memcached-timeout: "_env:HEALTHCHECK_ACTIVE_WIDGET_MEMCACHED_TIMEOUT:2"
|
||||||
|
health-check-smtp-connect-timeout: "_env:HEALTHCHECK_SMTP_CONNECT_TIMEOUT:5"
|
||||||
|
health-check-ldap-admins-timeout: "_env:HEALTHCHECK_LDAP_ADMINS_TIMEOUT:60"
|
||||||
|
health-check-http-reachable-timeout: "_env:HEALTHCHECK_HTTP_REACHABLE_TIMEOUT:2"
|
||||||
|
health-check-matching-cluster-config-timeout: "_env:HEALTHCHECK_MATCHING_CLUSTER_CONFIG_TIMEOUT:2"
|
||||||
|
|
||||||
synchronise-ldap-users-within: "_env:SYNCHRONISE_LDAP_WITHIN:1209600"
|
synchronise-ldap-users-within: "_env:SYNCHRONISE_LDAP_WITHIN:1209600"
|
||||||
synchronise-ldap-users-interval: "_env:SYNCHRONISE_LDAP_INTERVAL:3600"
|
synchronise-ldap-users-interval: "_env:SYNCHRONISE_LDAP_INTERVAL:3600"
|
||||||
|
|||||||
@ -26,8 +26,6 @@ import qualified Data.CaseInsensitive as CI
|
|||||||
import qualified Network.HaskellNet.SMTP as SMTP
|
import qualified Network.HaskellNet.SMTP as SMTP
|
||||||
import Data.Pool (withResource)
|
import Data.Pool (withResource)
|
||||||
|
|
||||||
import System.Timeout
|
|
||||||
|
|
||||||
import Jobs.Queue
|
import Jobs.Queue
|
||||||
|
|
||||||
import Control.Concurrent.Async.Lifted.Safe (forConcurrently)
|
import Control.Concurrent.Async.Lifted.Safe (forConcurrently)
|
||||||
@ -36,10 +34,11 @@ import Control.Concurrent.Async.Lifted.Safe (forConcurrently)
|
|||||||
generateHealthReport :: HealthCheck -> Handler HealthReport
|
generateHealthReport :: HealthCheck -> Handler HealthReport
|
||||||
generateHealthReport = $(dispatchTH ''HealthCheck)
|
generateHealthReport = $(dispatchTH ''HealthCheck)
|
||||||
|
|
||||||
|
|
||||||
dispatchHealthCheckMatchingClusterConfig :: Handler HealthReport
|
dispatchHealthCheckMatchingClusterConfig :: Handler HealthReport
|
||||||
-- ^ Can the cluster configuration be read from the database and does it match our configuration?
|
-- ^ Can the cluster configuration be read from the database and does it match our configuration?
|
||||||
dispatchHealthCheckMatchingClusterConfig
|
dispatchHealthCheckMatchingClusterConfig
|
||||||
= fmap HealthMatchingClusterConfig . runDB $ and <$> forM universeF clusterSettingMatches
|
= fmap HealthMatchingClusterConfig . yesodTimeout (^. _appHealthCheckMatchingClusterConfigTimeout) False . runDB $ and <$> forM universeF clusterSettingMatches
|
||||||
where
|
where
|
||||||
clusterSettingMatches ClusterCryptoIDKey = do
|
clusterSettingMatches ClusterCryptoIDKey = do
|
||||||
ourSetting <- getsYesod appCryptoIDKey
|
ourSetting <- getsYesod appCryptoIDKey
|
||||||
@ -75,7 +74,7 @@ dispatchHealthCheckMatchingClusterConfig
|
|||||||
|
|
||||||
|
|
||||||
dispatchHealthCheckHTTPReachable :: Handler HealthReport
|
dispatchHealthCheckHTTPReachable :: Handler HealthReport
|
||||||
dispatchHealthCheckHTTPReachable = HealthHTTPReachable <$> do
|
dispatchHealthCheckHTTPReachable = fmap HealthHTTPReachable . yesodTimeout (^. _appHealthCheckHTTPReachableTimeout) (Just False) $ do
|
||||||
staticAppRoot <- getsYesod $ view _appRoot
|
staticAppRoot <- getsYesod $ view _appRoot
|
||||||
doHTTP <- getsYesod $ view _appHealthCheckHTTP
|
doHTTP <- getsYesod $ view _appHealthCheckHTTP
|
||||||
for (staticAppRoot <* guard doHTTP) $ \_ -> do
|
for (staticAppRoot <* guard doHTTP) $ \_ -> do
|
||||||
@ -89,7 +88,7 @@ dispatchHealthCheckHTTPReachable = HealthHTTPReachable <$> do
|
|||||||
|
|
||||||
|
|
||||||
dispatchHealthCheckLDAPAdmins :: Handler HealthReport
|
dispatchHealthCheckLDAPAdmins :: Handler HealthReport
|
||||||
dispatchHealthCheckLDAPAdmins = HealthLDAPAdmins <$> do
|
dispatchHealthCheckLDAPAdmins = fmap HealthLDAPAdmins . yesodTimeout (^. _appHealthCheckLDAPAdminsTimeout) (Just 0) $ do
|
||||||
ldapPool' <- getsYesod appLdapPool
|
ldapPool' <- getsYesod appLdapPool
|
||||||
ldapConf' <- getsYesod $ view _appLdapConf
|
ldapConf' <- getsYesod $ view _appLdapConf
|
||||||
ldapAdminUsers <- fmap (map E.unValue) . runDB . E.select . E.from $ \(user `E.InnerJoin` lecturer) -> E.distinctOnOrderBy [E.asc $ user E.^. UserId] $ do
|
ldapAdminUsers <- fmap (map E.unValue) . runDB . E.select . E.from $ \(user `E.InnerJoin` lecturer) -> E.distinctOnOrderBy [E.asc $ user E.^. UserId] $ do
|
||||||
@ -110,7 +109,7 @@ dispatchHealthCheckLDAPAdmins = HealthLDAPAdmins <$> do
|
|||||||
|
|
||||||
|
|
||||||
dispatchHealthCheckSMTPConnect :: Handler HealthReport
|
dispatchHealthCheckSMTPConnect :: Handler HealthReport
|
||||||
dispatchHealthCheckSMTPConnect = HealthSMTPConnect <$> do
|
dispatchHealthCheckSMTPConnect = fmap HealthSMTPConnect . yesodTimeout (^. _appHealthCheckSMTPConnectTimeout) (Just False) $ do
|
||||||
smtpPool <- getsYesod appSmtpPool
|
smtpPool <- getsYesod appSmtpPool
|
||||||
for smtpPool . flip withResource $ \smtpConn -> do
|
for smtpPool . flip withResource $ \smtpConn -> do
|
||||||
response@(rCode, _) <- liftIO $ SMTP.sendCommand smtpConn SMTP.NOOP
|
response@(rCode, _) <- liftIO $ SMTP.sendCommand smtpConn SMTP.NOOP
|
||||||
@ -122,7 +121,7 @@ dispatchHealthCheckSMTPConnect = HealthSMTPConnect <$> do
|
|||||||
|
|
||||||
|
|
||||||
dispatchHealthCheckWidgetMemcached :: Handler HealthReport
|
dispatchHealthCheckWidgetMemcached :: Handler HealthReport
|
||||||
dispatchHealthCheckWidgetMemcached = HealthWidgetMemcached <$> do
|
dispatchHealthCheckWidgetMemcached = fmap HealthWidgetMemcached . yesodTimeout (^. _appHealthCheckActiveWidgetMemcachedTimeout) (Just False) $ do
|
||||||
memcachedConn <- getsYesod appWidgetMemcached
|
memcachedConn <- getsYesod appWidgetMemcached
|
||||||
for memcachedConn $ \_memcachedConn' -> do
|
for memcachedConn $ \_memcachedConn' -> do
|
||||||
let ext = "bin"
|
let ext = "bin"
|
||||||
@ -155,11 +154,9 @@ dispatchHealthCheckActiveJobExecutors = HealthActiveJobExecutors <$> do
|
|||||||
tid <- liftIO myThreadId
|
tid <- liftIO myThreadId
|
||||||
let workers' = Map.fromSet jobWorkerName (Map.keysSet jobWorkers)
|
let workers' = Map.fromSet jobWorkerName (Map.keysSet jobWorkers)
|
||||||
workers = Map.filterWithKey (\a _ -> asyncThreadId a /= tid) workers'
|
workers = Map.filterWithKey (\a _ -> asyncThreadId a /= tid) workers'
|
||||||
timeoutMicro = let (MkFixed micro :: Micro) = realToFrac timeoutLength
|
|
||||||
in fromInteger micro
|
|
||||||
$logDebugS "HealthCheckActiveJobExecutors" . tshow . map showWorkerId $ Map.elems workers'
|
$logDebugS "HealthCheckActiveJobExecutors" . tshow . map showWorkerId $ Map.elems workers'
|
||||||
responders <- fmap (getSum . fold) . liftIO . forConcurrently (Map.toList workers) $ \(_, wName)
|
responders <- fmap (getSum . fold) . liftIO . forConcurrently (Map.toList workers) $ \(_, wName)
|
||||||
-> fromMaybe (Sum 0) <$> timeout timeoutMicro (runReaderT ?? app $ Sum 1 <$ writeJobCtlBlock' (writeJobCtl' wName) JobCtlTest)
|
-> diffTimeout timeoutLength (Sum 0) (runReaderT ?? app $ Sum 1 <$ writeJobCtlBlock' (writeJobCtl' wName) JobCtlTest)
|
||||||
if
|
if
|
||||||
| Map.null workers -> return Nothing
|
| Map.null workers -> return Nothing
|
||||||
| otherwise -> return . Just $ responders % fromIntegral (Map.size workers)
|
| otherwise -> return . Just $ responders % fromIntegral (Map.size workers)
|
||||||
|
|||||||
@ -116,7 +116,13 @@ data AppSettings = AppSettings
|
|||||||
, appHealthCheckInterval :: HealthCheck -> Maybe NominalDiffTime
|
, appHealthCheckInterval :: HealthCheck -> Maybe NominalDiffTime
|
||||||
, appHealthCheckDelayNotify :: Bool
|
, appHealthCheckDelayNotify :: Bool
|
||||||
, appHealthCheckHTTP :: Bool
|
, appHealthCheckHTTP :: Bool
|
||||||
|
|
||||||
, appHealthCheckActiveJobExecutorsTimeout :: NominalDiffTime
|
, appHealthCheckActiveJobExecutorsTimeout :: NominalDiffTime
|
||||||
|
, appHealthCheckActiveWidgetMemcachedTimeout :: NominalDiffTime
|
||||||
|
, appHealthCheckSMTPConnectTimeout :: NominalDiffTime
|
||||||
|
, appHealthCheckLDAPAdminsTimeout :: NominalDiffTime
|
||||||
|
, appHealthCheckHTTPReachableTimeout :: NominalDiffTime
|
||||||
|
, appHealthCheckMatchingClusterConfigTimeout :: NominalDiffTime
|
||||||
|
|
||||||
, appSynchroniseLdapUsersWithin :: Maybe NominalDiffTime
|
, appSynchroniseLdapUsersWithin :: Maybe NominalDiffTime
|
||||||
, appSynchroniseLdapUsersInterval :: NominalDiffTime
|
, appSynchroniseLdapUsersInterval :: NominalDiffTime
|
||||||
@ -398,7 +404,13 @@ instance FromJSON AppSettings where
|
|||||||
appHealthCheckInterval <- (assertM' (> 0) . ) <$> o .: "health-check-interval"
|
appHealthCheckInterval <- (assertM' (> 0) . ) <$> o .: "health-check-interval"
|
||||||
appHealthCheckDelayNotify <- o .: "health-check-delay-notify"
|
appHealthCheckDelayNotify <- o .: "health-check-delay-notify"
|
||||||
appHealthCheckHTTP <- o .: "health-check-http"
|
appHealthCheckHTTP <- o .: "health-check-http"
|
||||||
|
|
||||||
appHealthCheckActiveJobExecutorsTimeout <- o .: "health-check-active-job-executors-timeout"
|
appHealthCheckActiveJobExecutorsTimeout <- o .: "health-check-active-job-executors-timeout"
|
||||||
|
appHealthCheckActiveWidgetMemcachedTimeout <- o .: "health-check-active-widget-memcached-timeout"
|
||||||
|
appHealthCheckSMTPConnectTimeout <- o .: "health-check-smtp-connect-timeout"
|
||||||
|
appHealthCheckLDAPAdminsTimeout <- o .: "health-check-ldap-admins-timeout"
|
||||||
|
appHealthCheckHTTPReachableTimeout <- o .: "health-check-http-reachable-timeout"
|
||||||
|
appHealthCheckMatchingClusterConfigTimeout <- o .: "health-check-matching-cluster-config-timeout"
|
||||||
|
|
||||||
appSessionTimeout <- o .: "session-timeout"
|
appSessionTimeout <- o .: "session-timeout"
|
||||||
|
|
||||||
|
|||||||
21
src/Utils.hs
21
src/Utils.hs
@ -52,6 +52,7 @@ import Control.Monad.Trans.Except (ExceptT(..), throwE, runExceptT)
|
|||||||
import Control.Monad.Except (MonadError(..))
|
import Control.Monad.Except (MonadError(..))
|
||||||
import Control.Monad.Trans.Maybe as Utils (MaybeT(..))
|
import Control.Monad.Trans.Maybe as Utils (MaybeT(..))
|
||||||
import Control.Monad.Catch (catchIf)
|
import Control.Monad.Catch (catchIf)
|
||||||
|
import System.Timeout.Lifted (timeout)
|
||||||
|
|
||||||
import Language.Haskell.TH
|
import Language.Haskell.TH
|
||||||
import Language.Haskell.TH.Instances ()
|
import Language.Haskell.TH.Instances ()
|
||||||
@ -682,6 +683,26 @@ mconcatForM = flip mconcatMapM
|
|||||||
findM :: (Monad m, Foldable f) => (a -> MaybeT m b) -> f a -> m (Maybe b)
|
findM :: (Monad m, Foldable f) => (a -> MaybeT m b) -> f a -> m (Maybe b)
|
||||||
findM f = runMaybeT . Fold.foldr (\x as -> f x <|> as) mzero
|
findM f = runMaybeT . Fold.foldr (\x as -> f x <|> as) mzero
|
||||||
|
|
||||||
|
|
||||||
|
yesodTimeout :: ( MonadHandler m
|
||||||
|
, MonadBaseControl IO m
|
||||||
|
)
|
||||||
|
=> (HandlerSite m -> NominalDiffTime) -- ^ Calculate timeout
|
||||||
|
-> a -- ^ Default value
|
||||||
|
-> m a -- ^ Computation
|
||||||
|
-> m a -- ^ Result of computation or default value, if timeout is reached
|
||||||
|
yesodTimeout getTimeout timeoutRes act = do
|
||||||
|
timeoutLength <- getsYesod getTimeout
|
||||||
|
diffTimeout timeoutLength timeoutRes act
|
||||||
|
|
||||||
|
diffTimeout :: MonadBaseControl IO m
|
||||||
|
=> NominalDiffTime -> a -> m a -> m a
|
||||||
|
diffTimeout timeoutLength timeoutRes act = fromMaybe timeoutRes <$> timeout timeoutMicro act
|
||||||
|
where
|
||||||
|
timeoutMicro
|
||||||
|
= let (MkFixed micro :: Micro) = realToFrac timeoutLength
|
||||||
|
in fromInteger micro
|
||||||
|
|
||||||
-------------
|
-------------
|
||||||
-- Conduit --
|
-- Conduit --
|
||||||
-------------
|
-------------
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user