feat(metrics): report on health checks
This commit is contained in:
parent
5ffee38979
commit
bec40236db
@ -463,6 +463,7 @@ ul.list--inline
|
|||||||
.deflist
|
.deflist
|
||||||
display: grid
|
display: grid
|
||||||
grid-template-columns: minmax(0,100%)
|
grid-template-columns: minmax(0,100%)
|
||||||
|
grid-row-gap: 7px
|
||||||
|
|
||||||
.deflist__dt,
|
.deflist__dt,
|
||||||
.deflist__dd
|
.deflist__dd
|
||||||
|
|||||||
@ -22,8 +22,6 @@ import Database.Persist.Postgresql (createPostgresqlPool, pgConnStr,
|
|||||||
import Import hiding (cancel)
|
import Import hiding (cancel)
|
||||||
import Language.Haskell.TH.Syntax (qLocation)
|
import Language.Haskell.TH.Syntax (qLocation)
|
||||||
import Network.Wai (Middleware)
|
import Network.Wai (Middleware)
|
||||||
import qualified Network.Wai as Wai
|
|
||||||
import qualified Network.HTTP.Types as HTTP
|
|
||||||
import Network.Wai.Handler.Warp (Settings, defaultSettings,
|
import Network.Wai.Handler.Warp (Settings, defaultSettings,
|
||||||
defaultShouldDisplayException,
|
defaultShouldDisplayException,
|
||||||
runSettings, runSettingsSocket, setHost,
|
runSettings, runSettingsSocket, setHost,
|
||||||
@ -90,15 +88,8 @@ import qualified Data.Set as Set
|
|||||||
|
|
||||||
import Data.Semigroup (Min(..))
|
import Data.Semigroup (Min(..))
|
||||||
|
|
||||||
import qualified Prometheus.Metric.GHC as Prometheus
|
|
||||||
import qualified Prometheus
|
|
||||||
|
|
||||||
import Data.Time.Clock.POSIX
|
|
||||||
|
|
||||||
import Handler.Utils.Routes (classifyHandler)
|
import Handler.Utils.Routes (classifyHandler)
|
||||||
|
|
||||||
import Data.List (cycle)
|
|
||||||
|
|
||||||
-- Import all relevant handler modules here.
|
-- Import all relevant handler modules here.
|
||||||
-- (HPack takes care to add new modules to our cabal file nowadays.)
|
-- (HPack takes care to add new modules to our cabal file nowadays.)
|
||||||
import Handler.News
|
import Handler.News
|
||||||
@ -138,7 +129,7 @@ mkYesodDispatch "UniWorX" resourcesUniWorX
|
|||||||
-- migrations handled by Yesod.
|
-- migrations handled by Yesod.
|
||||||
makeFoundation :: (MonadResource m, MonadUnliftIO m) => AppSettings -> m UniWorX
|
makeFoundation :: (MonadResource m, MonadUnliftIO m) => AppSettings -> m UniWorX
|
||||||
makeFoundation appSettings'@AppSettings{..} = do
|
makeFoundation appSettings'@AppSettings{..} = do
|
||||||
void $ Prometheus.register Prometheus.ghcMetrics
|
registerGHCMetrics
|
||||||
|
|
||||||
-- Some basic initializations: HTTP connection manager, logger, and static
|
-- Some basic initializations: HTTP connection manager, logger, and static
|
||||||
-- subsite.
|
-- subsite.
|
||||||
@ -306,38 +297,8 @@ makeApplication foundation = liftIO $ do
|
|||||||
logWare <- makeLogWare foundation
|
logWare <- makeLogWare foundation
|
||||||
-- Create the WAI application and apply middlewares
|
-- Create the WAI application and apply middlewares
|
||||||
appPlain <- toWaiAppPlain foundation
|
appPlain <- toWaiAppPlain foundation
|
||||||
return . prometheusMiddleware . logWare $ defaultMiddlewaresNoLogging appPlain
|
return . observeHTTPRequestLatency classifyHandler . logWare $ defaultMiddlewaresNoLogging appPlain
|
||||||
where
|
where
|
||||||
prometheusMiddleware :: Middleware
|
|
||||||
prometheusMiddleware app req respond' = do
|
|
||||||
start <- getPOSIXTime
|
|
||||||
app req $ \res -> do
|
|
||||||
end <- getPOSIXTime
|
|
||||||
let method = decodeUtf8 $ Wai.requestMethod req
|
|
||||||
status = tshow . HTTP.statusCode $ Wai.responseStatus res
|
|
||||||
route :: Maybe (Route UniWorX)
|
|
||||||
route = parseRoute ( Wai.pathInfo req
|
|
||||||
, over (mapped . _2) (fromMaybe "") . HTTP.queryToQueryText $ Wai.queryString req
|
|
||||||
)
|
|
||||||
handler' = pack . classifyHandler <$> route
|
|
||||||
|
|
||||||
labels :: Prometheus.Label3
|
|
||||||
labels = (fromMaybe "n/a" handler', method, status)
|
|
||||||
Prometheus.withLabel requestLatency labels . flip Prometheus.observe . realToFrac $ end - start
|
|
||||||
|
|
||||||
respond' res
|
|
||||||
|
|
||||||
{-# NOINLINE requestLatency #-}
|
|
||||||
requestLatency :: Prometheus.Vector Prometheus.Label3 Prometheus.Histogram
|
|
||||||
requestLatency = Prometheus.unsafeRegister
|
|
||||||
$ Prometheus.vector ("handler", "method", "status")
|
|
||||||
$ Prometheus.histogram info buckets
|
|
||||||
where info = Prometheus.Info "http_request_duration_seconds"
|
|
||||||
"HTTP request latency"
|
|
||||||
buckets = map fromRational . takeWhile (<= 500) . go 50e-6 $ cycle [2, 2, 2.5]
|
|
||||||
where
|
|
||||||
go n [] = [n]
|
|
||||||
go n (f:fs) = n : go (f * n) fs
|
|
||||||
|
|
||||||
|
|
||||||
makeLogWare :: MonadIO m => UniWorX -> m Middleware
|
makeLogWare :: MonadIO m => UniWorX -> m Middleware
|
||||||
@ -368,8 +329,6 @@ makeLogWare app = do
|
|||||||
logWare <- either mkLogWare return lookupRes
|
logWare <- either mkLogWare return lookupRes
|
||||||
logWare wai req fin
|
logWare wai req fin
|
||||||
|
|
||||||
data ReadySince = MkReadySince
|
|
||||||
|
|
||||||
-- | Warp settings for the given foundation value.
|
-- | Warp settings for the given foundation value.
|
||||||
warpSettings :: UniWorX -> Settings
|
warpSettings :: UniWorX -> Settings
|
||||||
warpSettings foundation = defaultSettings
|
warpSettings foundation = defaultSettings
|
||||||
@ -377,13 +336,8 @@ warpSettings foundation = defaultSettings
|
|||||||
let notifyReady = do
|
let notifyReady = do
|
||||||
$logInfoS "setup" "Ready"
|
$logInfoS "setup" "Ready"
|
||||||
void . liftIO $ do
|
void . liftIO $ do
|
||||||
void . Prometheus.register . readyMetric =<< getCurrentTime
|
registerReadyMetric
|
||||||
Systemd.notifyReady
|
Systemd.notifyReady
|
||||||
readyMetric ts = Prometheus.Metric $ return (MkReadySince, collectReadySince)
|
|
||||||
where
|
|
||||||
collectReadySince = return [Prometheus.SampleGroup info Prometheus.GaugeType [Prometheus.Sample "ready_time" [] sample]]
|
|
||||||
info = Prometheus.Info "ready_time" "POSIXTime this Uni2work-instance became ready"
|
|
||||||
sample = encodeUtf8 . tshow . (realToFrac :: POSIXTime -> Nano) $ utcTimeToPOSIXSeconds ts
|
|
||||||
if
|
if
|
||||||
| foundation ^. _appHealthCheckDelayNotify
|
| foundation ^. _appHealthCheckDelayNotify
|
||||||
-> void . forkIO $ do
|
-> void . forkIO $ do
|
||||||
|
|||||||
@ -10,6 +10,8 @@ import qualified Network.Wai.Middleware.Prometheus as Prometheus
|
|||||||
import qualified Data.Text as Text
|
import qualified Data.Text as Text
|
||||||
import qualified Data.HashSet as HashSet
|
import qualified Data.HashSet as HashSet
|
||||||
|
|
||||||
|
import qualified Data.Set as Set
|
||||||
|
|
||||||
|
|
||||||
getMetricsR :: Handler TypedContent
|
getMetricsR :: Handler TypedContent
|
||||||
getMetricsR = selectRep $ do
|
getMetricsR = selectRep $ do
|
||||||
@ -43,3 +45,9 @@ getMetricsR = selectRep $ do
|
|||||||
singleSample base [Sample sName lPairs sValue]
|
singleSample base [Sample sName lPairs sValue]
|
||||||
| sName == base = Just (lPairs, sValue)
|
| sName == base = Just (lPairs, sValue)
|
||||||
singleSample _ _ = Nothing
|
singleSample _ _ = Nothing
|
||||||
|
|
||||||
|
showMetricName metricName mSamples = Set.size (Set.fromList sampleBasenames) > 1
|
||||||
|
where
|
||||||
|
sampleBasenames = do
|
||||||
|
Sample sName _ _ <- mSamples
|
||||||
|
return $ metricBasename metricName sName
|
||||||
|
|||||||
@ -6,3 +6,4 @@ import Foundation as Import
|
|||||||
import Import.NoFoundation as Import
|
import Import.NoFoundation as Import
|
||||||
|
|
||||||
import Utils.SystemMessage as Import
|
import Utils.SystemMessage as Import
|
||||||
|
import Utils.Metrics as Import
|
||||||
|
|||||||
@ -32,7 +32,7 @@ import UnliftIO.Concurrent (myThreadId)
|
|||||||
|
|
||||||
|
|
||||||
generateHealthReport :: HealthCheck -> Handler HealthReport
|
generateHealthReport :: HealthCheck -> Handler HealthReport
|
||||||
generateHealthReport = $(dispatchTH ''HealthCheck)
|
generateHealthReport = withHealthReportMetrics . $(dispatchTH ''HealthCheck)
|
||||||
|
|
||||||
|
|
||||||
dispatchHealthCheckMatchingClusterConfig :: Handler HealthReport
|
dispatchHealthCheckMatchingClusterConfig :: Handler HealthReport
|
||||||
|
|||||||
113
src/Utils/Metrics.hs
Normal file
113
src/Utils/Metrics.hs
Normal file
@ -0,0 +1,113 @@
|
|||||||
|
module Utils.Metrics
|
||||||
|
( withHealthReportMetrics
|
||||||
|
, registerGHCMetrics
|
||||||
|
, observeHTTPRequestLatency
|
||||||
|
, registerReadyMetric
|
||||||
|
) where
|
||||||
|
|
||||||
|
import Import.NoFoundation hiding (Vector, Info)
|
||||||
|
import Prometheus
|
||||||
|
import Prometheus.Metric.GHC
|
||||||
|
|
||||||
|
import qualified Data.List as List
|
||||||
|
|
||||||
|
import Data.Time.Clock.POSIX
|
||||||
|
|
||||||
|
import Network.Wai (Middleware)
|
||||||
|
import qualified Network.Wai as Wai
|
||||||
|
import qualified Network.HTTP.Types as HTTP
|
||||||
|
|
||||||
|
|
||||||
|
histogramBuckets :: Rational -- ^ min
|
||||||
|
-> Rational -- ^ max
|
||||||
|
-> [Double]
|
||||||
|
histogramBuckets bMin bMax = map fromRational . takeWhile (<= bMax) . go bMin $ List.cycle factors
|
||||||
|
where
|
||||||
|
go n [] = [n]
|
||||||
|
go n (f:fs) = n : go (f * n) fs
|
||||||
|
|
||||||
|
factors
|
||||||
|
| bMin' `mod` 2 == 0 = [2.5, 2, 2]
|
||||||
|
| bMin' `mod` 5 == 0 = [2, 2, 2.5]
|
||||||
|
| otherwise = [2, 2.5, 2]
|
||||||
|
where
|
||||||
|
bMin' :: Integer
|
||||||
|
bMin' = floor . List.head . dropWhile (< 1) $ List.iterate (* 10) bMin
|
||||||
|
|
||||||
|
|
||||||
|
{-# NOINLINE healthReportTime #-}
|
||||||
|
healthReportTime :: Vector Label2 Gauge
|
||||||
|
healthReportTime = unsafeRegister . vector ("check", "status") $ gauge Info
|
||||||
|
{ metricName = "uni2work_health_check_time"
|
||||||
|
, metricHelp = "POSIXTime of last health check performed by this Uni2work-instance"
|
||||||
|
}
|
||||||
|
|
||||||
|
{-# NOINLINE healthReportDuration #-}
|
||||||
|
healthReportDuration :: Vector Label2 Histogram
|
||||||
|
healthReportDuration = unsafeRegister . vector ("check", "status") $ histogram info buckets
|
||||||
|
where
|
||||||
|
info = Info
|
||||||
|
{ metricName = "uni2work_health_check_duration_seconds"
|
||||||
|
, metricHelp = "Duration of last health check performed by this Uni2work-instance"
|
||||||
|
}
|
||||||
|
buckets = histogramBuckets 5e-6 100e-3
|
||||||
|
|
||||||
|
{-# NOINLINE httpRequestLatency #-}
|
||||||
|
httpRequestLatency :: Vector Label3 Histogram
|
||||||
|
httpRequestLatency = unsafeRegister
|
||||||
|
$ vector ("handler", "method", "status")
|
||||||
|
$ histogram info buckets
|
||||||
|
where info = Info "http_request_duration_seconds"
|
||||||
|
"HTTP request latency"
|
||||||
|
buckets = histogramBuckets 50e-6 500
|
||||||
|
|
||||||
|
data ReadySince = MkReadySince
|
||||||
|
|
||||||
|
readyMetric :: POSIXTime -> Metric ReadySince
|
||||||
|
readyMetric ts = Metric $ return (MkReadySince, collectReadySince)
|
||||||
|
where
|
||||||
|
collectReadySince = return [SampleGroup info GaugeType [Sample "ready_time" [] sample]]
|
||||||
|
info = Info "ready_time" "POSIXTime this Uni2work-instance became ready"
|
||||||
|
sample = encodeUtf8 $ tshow (realToFrac ts :: Nano)
|
||||||
|
|
||||||
|
|
||||||
|
withHealthReportMetrics :: MonadIO m => m HealthReport -> m HealthReport
|
||||||
|
withHealthReportMetrics act = do
|
||||||
|
before <- liftIO getPOSIXTime
|
||||||
|
report <- act
|
||||||
|
after <- liftIO getPOSIXTime
|
||||||
|
|
||||||
|
let checkVal = toPathPiece $ classifyHealthReport report
|
||||||
|
statusVal = toPathPiece $ healthReportStatus report
|
||||||
|
timeSample, durationSample :: Double
|
||||||
|
timeSample = realToFrac (realToFrac after :: Nano)
|
||||||
|
durationSample = realToFrac (realToFrac $ after - before :: Nano)
|
||||||
|
liftIO $ withLabel healthReportTime (checkVal, statusVal) $ flip setGauge timeSample
|
||||||
|
liftIO $ withLabel healthReportDuration (checkVal, statusVal) $ flip observe durationSample
|
||||||
|
|
||||||
|
return report
|
||||||
|
|
||||||
|
registerGHCMetrics :: MonadIO m => m ()
|
||||||
|
registerGHCMetrics = void $ register ghcMetrics
|
||||||
|
|
||||||
|
observeHTTPRequestLatency :: forall site. ParseRoute site => (Route site -> String) -> Middleware
|
||||||
|
observeHTTPRequestLatency classifyHandler app req respond' = do
|
||||||
|
start <- getPOSIXTime
|
||||||
|
app req $ \res -> do
|
||||||
|
end <- getPOSIXTime
|
||||||
|
let method = decodeUtf8 $ Wai.requestMethod req
|
||||||
|
status = tshow . HTTP.statusCode $ Wai.responseStatus res
|
||||||
|
route :: Maybe (Route site)
|
||||||
|
route = parseRoute ( Wai.pathInfo req
|
||||||
|
, over (mapped . _2) (fromMaybe "") . HTTP.queryToQueryText $ Wai.queryString req
|
||||||
|
)
|
||||||
|
handler' = pack . classifyHandler <$> route
|
||||||
|
|
||||||
|
labels :: Label3
|
||||||
|
labels = (fromMaybe "n/a" handler', method, status)
|
||||||
|
withLabel httpRequestLatency labels . flip observe . realToFrac $ end - start
|
||||||
|
|
||||||
|
respond' res
|
||||||
|
|
||||||
|
registerReadyMetric :: MonadIO m => m ()
|
||||||
|
registerReadyMetric = liftIO $ void . register . readyMetric =<< getPOSIXTime
|
||||||
@ -29,15 +29,17 @@ $maybe t <- metricsToken
|
|||||||
<table .table .table--striped .table--hover>
|
<table .table .table--striped .table--hover>
|
||||||
<thead>
|
<thead>
|
||||||
<tr .table__row .table__row--head>
|
<tr .table__row .table__row--head>
|
||||||
<th .table__th>_{MsgMetricName}
|
$if showMetricName metricName mSamples
|
||||||
|
<th .table__th>_{MsgMetricName}
|
||||||
$forall l <- allLabels
|
$forall l <- allLabels
|
||||||
<th .table__th style="font-family: monospace">#{l}
|
<th .table__th style="font-family: monospace">#{l}
|
||||||
<th .table__th>_{MsgMetricValue}
|
<th .table__th>_{MsgMetricValue}
|
||||||
<tbody>
|
<tbody>
|
||||||
$forall Sample sName lPairs sValue <- mSamples
|
$forall Sample sName lPairs sValue <- mSamples
|
||||||
<tr .table__row>
|
<tr .table__row>
|
||||||
<td .table__td title=#{sName}>
|
$if showMetricName metricName mSamples
|
||||||
#{metricBasename metricName sName}
|
<td .table__td title=#{sName}>
|
||||||
|
#{metricBasename metricName sName}
|
||||||
$forall l <- allLabels
|
$forall l <- allLabels
|
||||||
<td .table__td>
|
<td .table__td>
|
||||||
$maybe lValue <- lookup l lPairs
|
$maybe lValue <- lookup l lPairs
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user