refactor(health): show all health checks that may lead to failure (WIP)

This commit is contained in:
Steffen Jost 2023-07-10 15:28:20 +00:00
parent ee4e67fbda
commit 9b93c00301
5 changed files with 52 additions and 34 deletions

View File

@ -3,12 +3,13 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
HealthReport: Instanz-Zustand
HealthMatchingClusterConfig: Cluster-geteilte Konfiguration ist aktuell
HealthHTTPReachable: Cluster kann an der erwarteten URL über HTTP erreicht werden
HealthLDAPAdmins: Anteil der Administrator:innen mit LDAP Authentifizierung, welche tatsächlich im LDAP-Verzeichnis gefunden werden können
HealthSMTPConnect: SMTP-Server kann erreicht werden
HealthWidgetMemcached: Memcached-Server liefert Widgets korrekt aus
HealthActiveJobExecutors: Anteil der job-workers, die neue Befehle annehmen
HealthCheckMatchingClusterConfig: Cluster-geteilte Konfiguration ist aktuell
HealthCheckHTTPReachable: Cluster kann an der erwarteten URL über HTTP erreicht werden
HealthCheckLDAPAdmins: Anteil der Administrator:innen mit LDAP Authentifizierung, welche tatsächlich im LDAP-Verzeichnis gefunden werden können
HealthCheckSMTPConnect: SMTP-Server kann erreicht werden
HealthCheckWidgetMemcached: Memcached-Server liefert Widgets korrekt aus
HealthCheckActiveJobExecutors: Anteil der job-workers, die neue Befehle annehmen
HealthCheckDoesFlush: Zustandspüfung läuft durch
InstanceIdentification: Instanz-Identifikation
InstanceId: Instanz-Nummer
ClusterId: Cluster-Nummer

View File

@ -3,12 +3,13 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
HealthReport: Health report
HealthMatchingClusterConfig: Cluster config matches
HealthHTTPReachable: Cluster can be reached under the expected URL via HTTP
HealthLDAPAdmins: Proportion of administrators with LDAP authentication that were actually found in the LDAP directory
HealthSMTPConnect: SMTP server is reachable
HealthWidgetMemcached: Memcached server is serving widgets correctly
HealthActiveJobExecutors: Proportion of job workers accepting new jobs
HealthCheckMatchingClusterConfig: Cluster config matches
HealthCheckHTTPReachable: Cluster can be reached under the expected URL via HTTP
HealthCheckLDAPAdmins: Proportion of administrators with LDAP authentication that were actually found in the LDAP directory
HealthCheckSMTPConnect: SMTP server is reachable
HealthCheckWidgetMemcached: Memcached server is serving widgets correctly
HealthCheckActiveJobExecutors: Proportion of job workers accepting new jobs
HealthCheckDoesFlush: Health reports flushes
InstanceIdentification: Instance identification
InstanceId: Instance id
ClusterId: Cluster id

View File

@ -247,6 +247,7 @@ mkMessageVariant ''UniWorX ''PWHashMessage "messages/auth/pw-hash" "de"
mkMessageVariant ''UniWorX ''ButtonMessage "messages/button" "de"
mkMessageVariant ''UniWorX ''FrontendMessage "messages/frontend" "de-de-formal"
embedRenderMessage ''UniWorX ''HealthCheck id -- not possible here
embedRenderMessage ''UniWorX ''AvsLicence id -- required by UniWorXAvsMessages
mkMessageAddition ''UniWorX "Qualification" "messages/uniworx/categories/qualification" "de-de-formal"
mkMessageAddition ''UniWorX "Avs" "messages/uniworx/categories/avs" "de-de-formal"

View File

@ -34,7 +34,7 @@ getHealthR = do
waitResult <- atomically $ maybe (pure $ Left False) (fmap (const $ Left True) . waitDelay) delay <|> (fmap Right . assertM (not. Set.null) $ readTVar reportStore)
case waitResult of
Left False -> sendResponseStatus noContent204 ()
Left True -> sendResponseStatus internalServerError500 ("System is not generating HealthReports" :: Text)
Left True -> sendResponseStatus internalServerError500 ("System is not generating HealthReports" :: Text) -- can this ever happen after it was non-null?
Right _ -> redirect HealthR
Just healthReports -> do
let (Max lastUpdated, Min status) = ofoldMap1 (Max *** Min . healthReportStatus) healthReports
@ -57,28 +57,41 @@ getHealthR = do
setTitleI MsgHealthReport
[whamlet|
$newline never
<h2>
$case status
$of HealthSuccess
_{MsgMessageSuccess}
$of _
_{MsgMessageError}
<hr>
<dl .deflist>
$forall (_, report) <- healthReports'
$case report
$of HealthMatchingClusterConfig passed
<dt .deflist__dt>_{MsgHealthMatchingClusterConfig}
<dd .deflist__dd>#{boolSymbol passed}
$of HealthHTTPReachable (Just passed)
<dt .deflist__dt>_{MsgHealthHTTPReachable}
<dd .deflist__dd>#{boolSymbol passed}
$of HealthLDAPAdmins (Just found)
<dt .deflist__dt>_{MsgHealthLDAPAdmins}
<dd .deflist__dd>#{textPercent found 1}
$of HealthSMTPConnect (Just passed)
<dt .deflist__dt>_{MsgHealthSMTPConnect}
<dd .deflist__dd>#{boolSymbol passed}
$of HealthWidgetMemcached (Just passed)
<dt .deflist__dt>_{MsgHealthWidgetMemcached}
<dd .deflist__dd>#{boolSymbol passed}
$of HealthActiveJobExecutors (Just active)
<dt .deflist__dt>_{MsgHealthActiveJobExecutors}
<dd .deflist__dd>#{textPercent active 1}
$of _
$with hcclass = classifyHealthReport report
$with hcstatus = HealthSuccess == healthReportStatus report
$case report
$of HealthMatchingClusterConfig passed
<dt .deflist__dt>_{MsgHealthCheckMatchingClusterConfig}
<dd .deflist__dd>#{boolSymbol passed}
$of HealthHTTPReachable (Just passed)
<dt .deflist__dt>_{MsgHealthCheckHTTPReachable}
<dd .deflist__dd>#{boolSymbol passed}
$of HealthLDAPAdmins (Just found)
<dt .deflist__dt>_{MsgHealthCheckLDAPAdmins}
<dd .deflist__dd>#{textPercent found 1}
$of HealthSMTPConnect (Just passed)
<dt .deflist__dt>_{MsgHealthCheckSMTPConnect}
<dd .deflist__dd>#{boolSymbol passed}
$of HealthWidgetMemcached (Just passed)
<dt .deflist__dt>_{MsgHealthCheckWidgetMemcached}
<dd .deflist__dd>#{boolSymbol passed}
$of HealthActiveJobExecutors (Just active)
<dt .deflist__dt>_{MsgHealthCheckActiveJobExecutors}
<dd .deflist__dd>#{textPercent active 1}
$of HealthDoesFlush mProp
<dt .deflist__dt>_{hcclass}
<dd .deflist__dd>#{boolSymbol hcstatus}
$of _
|]
provideJson healthReports
provideRep . return . Builder.toLazyText $ Aeson.encodePrettyToTextBuilder healthReports
@ -105,7 +118,7 @@ getInstanceR = do
provideRep . return $ tshow instanceInfo
-- Most simple page for simple liveness checks
-- Most simple page for simple liveness checks, but it always delivers 200
getStatusR :: Handler Html
getStatusR = do
starttime <- getsYesod appStartTime

View File

@ -27,6 +27,8 @@ instance Finite HealthCheck
instance Hashable HealthCheck
instance NFData HealthCheck
-- embedRenderMessage ''UniWorX ''HealthCheck id -- not possible here
deriveJSON defaultOptions
{ constructorTagModifier = camelToPathPiece' 2
} ''HealthCheck