feat(metrics): report on health checks
This commit is contained in:
parent
5ffee38979
commit
bec40236db
@ -463,6 +463,7 @@ ul.list--inline
|
||||
.deflist
|
||||
display: grid
|
||||
grid-template-columns: minmax(0,100%)
|
||||
grid-row-gap: 7px
|
||||
|
||||
.deflist__dt,
|
||||
.deflist__dd
|
||||
|
||||
@ -22,8 +22,6 @@ import Database.Persist.Postgresql (createPostgresqlPool, pgConnStr,
|
||||
import Import hiding (cancel)
|
||||
import Language.Haskell.TH.Syntax (qLocation)
|
||||
import Network.Wai (Middleware)
|
||||
import qualified Network.Wai as Wai
|
||||
import qualified Network.HTTP.Types as HTTP
|
||||
import Network.Wai.Handler.Warp (Settings, defaultSettings,
|
||||
defaultShouldDisplayException,
|
||||
runSettings, runSettingsSocket, setHost,
|
||||
@ -90,15 +88,8 @@ import qualified Data.Set as Set
|
||||
|
||||
import Data.Semigroup (Min(..))
|
||||
|
||||
import qualified Prometheus.Metric.GHC as Prometheus
|
||||
import qualified Prometheus
|
||||
|
||||
import Data.Time.Clock.POSIX
|
||||
|
||||
import Handler.Utils.Routes (classifyHandler)
|
||||
|
||||
import Data.List (cycle)
|
||||
|
||||
-- Import all relevant handler modules here.
|
||||
-- (HPack takes care to add new modules to our cabal file nowadays.)
|
||||
import Handler.News
|
||||
@ -138,7 +129,7 @@ mkYesodDispatch "UniWorX" resourcesUniWorX
|
||||
-- migrations handled by Yesod.
|
||||
makeFoundation :: (MonadResource m, MonadUnliftIO m) => AppSettings -> m UniWorX
|
||||
makeFoundation appSettings'@AppSettings{..} = do
|
||||
void $ Prometheus.register Prometheus.ghcMetrics
|
||||
registerGHCMetrics
|
||||
|
||||
-- Some basic initializations: HTTP connection manager, logger, and static
|
||||
-- subsite.
|
||||
@ -306,38 +297,8 @@ makeApplication foundation = liftIO $ do
|
||||
logWare <- makeLogWare foundation
|
||||
-- Create the WAI application and apply middlewares
|
||||
appPlain <- toWaiAppPlain foundation
|
||||
return . prometheusMiddleware . logWare $ defaultMiddlewaresNoLogging appPlain
|
||||
return . observeHTTPRequestLatency classifyHandler . logWare $ defaultMiddlewaresNoLogging appPlain
|
||||
where
|
||||
prometheusMiddleware :: Middleware
|
||||
prometheusMiddleware app req respond' = do
|
||||
start <- getPOSIXTime
|
||||
app req $ \res -> do
|
||||
end <- getPOSIXTime
|
||||
let method = decodeUtf8 $ Wai.requestMethod req
|
||||
status = tshow . HTTP.statusCode $ Wai.responseStatus res
|
||||
route :: Maybe (Route UniWorX)
|
||||
route = parseRoute ( Wai.pathInfo req
|
||||
, over (mapped . _2) (fromMaybe "") . HTTP.queryToQueryText $ Wai.queryString req
|
||||
)
|
||||
handler' = pack . classifyHandler <$> route
|
||||
|
||||
labels :: Prometheus.Label3
|
||||
labels = (fromMaybe "n/a" handler', method, status)
|
||||
Prometheus.withLabel requestLatency labels . flip Prometheus.observe . realToFrac $ end - start
|
||||
|
||||
respond' res
|
||||
|
||||
{-# NOINLINE requestLatency #-}
|
||||
requestLatency :: Prometheus.Vector Prometheus.Label3 Prometheus.Histogram
|
||||
requestLatency = Prometheus.unsafeRegister
|
||||
$ Prometheus.vector ("handler", "method", "status")
|
||||
$ Prometheus.histogram info buckets
|
||||
where info = Prometheus.Info "http_request_duration_seconds"
|
||||
"HTTP request latency"
|
||||
buckets = map fromRational . takeWhile (<= 500) . go 50e-6 $ cycle [2, 2, 2.5]
|
||||
where
|
||||
go n [] = [n]
|
||||
go n (f:fs) = n : go (f * n) fs
|
||||
|
||||
|
||||
makeLogWare :: MonadIO m => UniWorX -> m Middleware
|
||||
@ -368,8 +329,6 @@ makeLogWare app = do
|
||||
logWare <- either mkLogWare return lookupRes
|
||||
logWare wai req fin
|
||||
|
||||
data ReadySince = MkReadySince
|
||||
|
||||
-- | Warp settings for the given foundation value.
|
||||
warpSettings :: UniWorX -> Settings
|
||||
warpSettings foundation = defaultSettings
|
||||
@ -377,13 +336,8 @@ warpSettings foundation = defaultSettings
|
||||
let notifyReady = do
|
||||
$logInfoS "setup" "Ready"
|
||||
void . liftIO $ do
|
||||
void . Prometheus.register . readyMetric =<< getCurrentTime
|
||||
registerReadyMetric
|
||||
Systemd.notifyReady
|
||||
readyMetric ts = Prometheus.Metric $ return (MkReadySince, collectReadySince)
|
||||
where
|
||||
collectReadySince = return [Prometheus.SampleGroup info Prometheus.GaugeType [Prometheus.Sample "ready_time" [] sample]]
|
||||
info = Prometheus.Info "ready_time" "POSIXTime this Uni2work-instance became ready"
|
||||
sample = encodeUtf8 . tshow . (realToFrac :: POSIXTime -> Nano) $ utcTimeToPOSIXSeconds ts
|
||||
if
|
||||
| foundation ^. _appHealthCheckDelayNotify
|
||||
-> void . forkIO $ do
|
||||
|
||||
@ -10,6 +10,8 @@ import qualified Network.Wai.Middleware.Prometheus as Prometheus
|
||||
import qualified Data.Text as Text
|
||||
import qualified Data.HashSet as HashSet
|
||||
|
||||
import qualified Data.Set as Set
|
||||
|
||||
|
||||
getMetricsR :: Handler TypedContent
|
||||
getMetricsR = selectRep $ do
|
||||
@ -43,3 +45,9 @@ getMetricsR = selectRep $ do
|
||||
singleSample base [Sample sName lPairs sValue]
|
||||
| sName == base = Just (lPairs, sValue)
|
||||
singleSample _ _ = Nothing
|
||||
|
||||
showMetricName metricName mSamples = Set.size (Set.fromList sampleBasenames) > 1
|
||||
where
|
||||
sampleBasenames = do
|
||||
Sample sName _ _ <- mSamples
|
||||
return $ metricBasename metricName sName
|
||||
|
||||
@ -6,3 +6,4 @@ import Foundation as Import
|
||||
import Import.NoFoundation as Import
|
||||
|
||||
import Utils.SystemMessage as Import
|
||||
import Utils.Metrics as Import
|
||||
|
||||
@ -32,7 +32,7 @@ import UnliftIO.Concurrent (myThreadId)
|
||||
|
||||
|
||||
generateHealthReport :: HealthCheck -> Handler HealthReport
|
||||
generateHealthReport = $(dispatchTH ''HealthCheck)
|
||||
generateHealthReport = withHealthReportMetrics . $(dispatchTH ''HealthCheck)
|
||||
|
||||
|
||||
dispatchHealthCheckMatchingClusterConfig :: Handler HealthReport
|
||||
|
||||
113
src/Utils/Metrics.hs
Normal file
113
src/Utils/Metrics.hs
Normal file
@ -0,0 +1,113 @@
|
||||
module Utils.Metrics
|
||||
( withHealthReportMetrics
|
||||
, registerGHCMetrics
|
||||
, observeHTTPRequestLatency
|
||||
, registerReadyMetric
|
||||
) where
|
||||
|
||||
import Import.NoFoundation hiding (Vector, Info)
|
||||
import Prometheus
|
||||
import Prometheus.Metric.GHC
|
||||
|
||||
import qualified Data.List as List
|
||||
|
||||
import Data.Time.Clock.POSIX
|
||||
|
||||
import Network.Wai (Middleware)
|
||||
import qualified Network.Wai as Wai
|
||||
import qualified Network.HTTP.Types as HTTP
|
||||
|
||||
|
||||
histogramBuckets :: Rational -- ^ min
|
||||
-> Rational -- ^ max
|
||||
-> [Double]
|
||||
histogramBuckets bMin bMax = map fromRational . takeWhile (<= bMax) . go bMin $ List.cycle factors
|
||||
where
|
||||
go n [] = [n]
|
||||
go n (f:fs) = n : go (f * n) fs
|
||||
|
||||
factors
|
||||
| bMin' `mod` 2 == 0 = [2.5, 2, 2]
|
||||
| bMin' `mod` 5 == 0 = [2, 2, 2.5]
|
||||
| otherwise = [2, 2.5, 2]
|
||||
where
|
||||
bMin' :: Integer
|
||||
bMin' = floor . List.head . dropWhile (< 1) $ List.iterate (* 10) bMin
|
||||
|
||||
|
||||
{-# NOINLINE healthReportTime #-}
|
||||
healthReportTime :: Vector Label2 Gauge
|
||||
healthReportTime = unsafeRegister . vector ("check", "status") $ gauge Info
|
||||
{ metricName = "uni2work_health_check_time"
|
||||
, metricHelp = "POSIXTime of last health check performed by this Uni2work-instance"
|
||||
}
|
||||
|
||||
{-# NOINLINE healthReportDuration #-}
|
||||
healthReportDuration :: Vector Label2 Histogram
|
||||
healthReportDuration = unsafeRegister . vector ("check", "status") $ histogram info buckets
|
||||
where
|
||||
info = Info
|
||||
{ metricName = "uni2work_health_check_duration_seconds"
|
||||
, metricHelp = "Duration of last health check performed by this Uni2work-instance"
|
||||
}
|
||||
buckets = histogramBuckets 5e-6 100e-3
|
||||
|
||||
{-# NOINLINE httpRequestLatency #-}
|
||||
httpRequestLatency :: Vector Label3 Histogram
|
||||
httpRequestLatency = unsafeRegister
|
||||
$ vector ("handler", "method", "status")
|
||||
$ histogram info buckets
|
||||
where info = Info "http_request_duration_seconds"
|
||||
"HTTP request latency"
|
||||
buckets = histogramBuckets 50e-6 500
|
||||
|
||||
data ReadySince = MkReadySince
|
||||
|
||||
readyMetric :: POSIXTime -> Metric ReadySince
|
||||
readyMetric ts = Metric $ return (MkReadySince, collectReadySince)
|
||||
where
|
||||
collectReadySince = return [SampleGroup info GaugeType [Sample "ready_time" [] sample]]
|
||||
info = Info "ready_time" "POSIXTime this Uni2work-instance became ready"
|
||||
sample = encodeUtf8 $ tshow (realToFrac ts :: Nano)
|
||||
|
||||
|
||||
withHealthReportMetrics :: MonadIO m => m HealthReport -> m HealthReport
|
||||
withHealthReportMetrics act = do
|
||||
before <- liftIO getPOSIXTime
|
||||
report <- act
|
||||
after <- liftIO getPOSIXTime
|
||||
|
||||
let checkVal = toPathPiece $ classifyHealthReport report
|
||||
statusVal = toPathPiece $ healthReportStatus report
|
||||
timeSample, durationSample :: Double
|
||||
timeSample = realToFrac (realToFrac after :: Nano)
|
||||
durationSample = realToFrac (realToFrac $ after - before :: Nano)
|
||||
liftIO $ withLabel healthReportTime (checkVal, statusVal) $ flip setGauge timeSample
|
||||
liftIO $ withLabel healthReportDuration (checkVal, statusVal) $ flip observe durationSample
|
||||
|
||||
return report
|
||||
|
||||
registerGHCMetrics :: MonadIO m => m ()
|
||||
registerGHCMetrics = void $ register ghcMetrics
|
||||
|
||||
observeHTTPRequestLatency :: forall site. ParseRoute site => (Route site -> String) -> Middleware
|
||||
observeHTTPRequestLatency classifyHandler app req respond' = do
|
||||
start <- getPOSIXTime
|
||||
app req $ \res -> do
|
||||
end <- getPOSIXTime
|
||||
let method = decodeUtf8 $ Wai.requestMethod req
|
||||
status = tshow . HTTP.statusCode $ Wai.responseStatus res
|
||||
route :: Maybe (Route site)
|
||||
route = parseRoute ( Wai.pathInfo req
|
||||
, over (mapped . _2) (fromMaybe "") . HTTP.queryToQueryText $ Wai.queryString req
|
||||
)
|
||||
handler' = pack . classifyHandler <$> route
|
||||
|
||||
labels :: Label3
|
||||
labels = (fromMaybe "n/a" handler', method, status)
|
||||
withLabel httpRequestLatency labels . flip observe . realToFrac $ end - start
|
||||
|
||||
respond' res
|
||||
|
||||
registerReadyMetric :: MonadIO m => m ()
|
||||
registerReadyMetric = liftIO $ void . register . readyMetric =<< getPOSIXTime
|
||||
@ -29,15 +29,17 @@ $maybe t <- metricsToken
|
||||
<table .table .table--striped .table--hover>
|
||||
<thead>
|
||||
<tr .table__row .table__row--head>
|
||||
<th .table__th>_{MsgMetricName}
|
||||
$if showMetricName metricName mSamples
|
||||
<th .table__th>_{MsgMetricName}
|
||||
$forall l <- allLabels
|
||||
<th .table__th style="font-family: monospace">#{l}
|
||||
<th .table__th>_{MsgMetricValue}
|
||||
<tbody>
|
||||
$forall Sample sName lPairs sValue <- mSamples
|
||||
<tr .table__row>
|
||||
<td .table__td title=#{sName}>
|
||||
#{metricBasename metricName sName}
|
||||
$if showMetricName metricName mSamples
|
||||
<td .table__td title=#{sName}>
|
||||
#{metricBasename metricName sName}
|
||||
$forall l <- allLabels
|
||||
<td .table__td>
|
||||
$maybe lValue <- lookup l lPairs
|
||||
|
||||
Loading…
Reference in New Issue
Block a user