From bec40236dbdedd022b968e83f563df75ab35c959 Mon Sep 17 00:00:00 2001 From: Gregor Kleen Date: Wed, 19 Feb 2020 13:51:21 +0100 Subject: [PATCH] feat(metrics): report on health checks --- frontend/src/app.sass | 1 + src/Application.hs | 52 ++---------------- src/Handler/Metrics.hs | 8 +++ src/Import.hs | 1 + src/Jobs/HealthReport.hs | 2 +- src/Utils/Metrics.hs | 113 +++++++++++++++++++++++++++++++++++++++ templates/metrics.hamlet | 8 +-- 7 files changed, 132 insertions(+), 53 deletions(-) create mode 100644 src/Utils/Metrics.hs diff --git a/frontend/src/app.sass b/frontend/src/app.sass index 6d14815e3..0b755dad5 100644 --- a/frontend/src/app.sass +++ b/frontend/src/app.sass @@ -463,6 +463,7 @@ ul.list--inline .deflist display: grid grid-template-columns: minmax(0,100%) + grid-row-gap: 7px .deflist__dt, .deflist__dd diff --git a/src/Application.hs b/src/Application.hs index c2274a508..0da706a22 100644 --- a/src/Application.hs +++ b/src/Application.hs @@ -22,8 +22,6 @@ import Database.Persist.Postgresql (createPostgresqlPool, pgConnStr, import Import hiding (cancel) import Language.Haskell.TH.Syntax (qLocation) import Network.Wai (Middleware) -import qualified Network.Wai as Wai -import qualified Network.HTTP.Types as HTTP import Network.Wai.Handler.Warp (Settings, defaultSettings, defaultShouldDisplayException, runSettings, runSettingsSocket, setHost, @@ -90,15 +88,8 @@ import qualified Data.Set as Set import Data.Semigroup (Min(..)) -import qualified Prometheus.Metric.GHC as Prometheus -import qualified Prometheus - -import Data.Time.Clock.POSIX - import Handler.Utils.Routes (classifyHandler) -import Data.List (cycle) - -- Import all relevant handler modules here. -- (HPack takes care to add new modules to our cabal file nowadays.) import Handler.News @@ -138,7 +129,7 @@ mkYesodDispatch "UniWorX" resourcesUniWorX -- migrations handled by Yesod. makeFoundation :: (MonadResource m, MonadUnliftIO m) => AppSettings -> m UniWorX makeFoundation appSettings'@AppSettings{..} = do - void $ Prometheus.register Prometheus.ghcMetrics + registerGHCMetrics -- Some basic initializations: HTTP connection manager, logger, and static -- subsite. @@ -306,38 +297,8 @@ makeApplication foundation = liftIO $ do logWare <- makeLogWare foundation -- Create the WAI application and apply middlewares appPlain <- toWaiAppPlain foundation - return . prometheusMiddleware . logWare $ defaultMiddlewaresNoLogging appPlain + return . observeHTTPRequestLatency classifyHandler . logWare $ defaultMiddlewaresNoLogging appPlain where - prometheusMiddleware :: Middleware - prometheusMiddleware app req respond' = do - start <- getPOSIXTime - app req $ \res -> do - end <- getPOSIXTime - let method = decodeUtf8 $ Wai.requestMethod req - status = tshow . HTTP.statusCode $ Wai.responseStatus res - route :: Maybe (Route UniWorX) - route = parseRoute ( Wai.pathInfo req - , over (mapped . _2) (fromMaybe "") . HTTP.queryToQueryText $ Wai.queryString req - ) - handler' = pack . classifyHandler <$> route - - labels :: Prometheus.Label3 - labels = (fromMaybe "n/a" handler', method, status) - Prometheus.withLabel requestLatency labels . flip Prometheus.observe . realToFrac $ end - start - - respond' res - -{-# NOINLINE requestLatency #-} -requestLatency :: Prometheus.Vector Prometheus.Label3 Prometheus.Histogram -requestLatency = Prometheus.unsafeRegister - $ Prometheus.vector ("handler", "method", "status") - $ Prometheus.histogram info buckets - where info = Prometheus.Info "http_request_duration_seconds" - "HTTP request latency" - buckets = map fromRational . takeWhile (<= 500) . go 50e-6 $ cycle [2, 2, 2.5] - where - go n [] = [n] - go n (f:fs) = n : go (f * n) fs makeLogWare :: MonadIO m => UniWorX -> m Middleware @@ -368,8 +329,6 @@ makeLogWare app = do logWare <- either mkLogWare return lookupRes logWare wai req fin -data ReadySince = MkReadySince - -- | Warp settings for the given foundation value. warpSettings :: UniWorX -> Settings warpSettings foundation = defaultSettings @@ -377,13 +336,8 @@ warpSettings foundation = defaultSettings let notifyReady = do $logInfoS "setup" "Ready" void . liftIO $ do - void . Prometheus.register . readyMetric =<< getCurrentTime + registerReadyMetric Systemd.notifyReady - readyMetric ts = Prometheus.Metric $ return (MkReadySince, collectReadySince) - where - collectReadySince = return [Prometheus.SampleGroup info Prometheus.GaugeType [Prometheus.Sample "ready_time" [] sample]] - info = Prometheus.Info "ready_time" "POSIXTime this Uni2work-instance became ready" - sample = encodeUtf8 . tshow . (realToFrac :: POSIXTime -> Nano) $ utcTimeToPOSIXSeconds ts if | foundation ^. _appHealthCheckDelayNotify -> void . forkIO $ do diff --git a/src/Handler/Metrics.hs b/src/Handler/Metrics.hs index b51d8ebe9..b9a7ceb7f 100644 --- a/src/Handler/Metrics.hs +++ b/src/Handler/Metrics.hs @@ -10,6 +10,8 @@ import qualified Network.Wai.Middleware.Prometheus as Prometheus import qualified Data.Text as Text import qualified Data.HashSet as HashSet +import qualified Data.Set as Set + getMetricsR :: Handler TypedContent getMetricsR = selectRep $ do @@ -43,3 +45,9 @@ getMetricsR = selectRep $ do singleSample base [Sample sName lPairs sValue] | sName == base = Just (lPairs, sValue) singleSample _ _ = Nothing + + showMetricName metricName mSamples = Set.size (Set.fromList sampleBasenames) > 1 + where + sampleBasenames = do + Sample sName _ _ <- mSamples + return $ metricBasename metricName sName diff --git a/src/Import.hs b/src/Import.hs index 27dc6e5df..cf2787a10 100644 --- a/src/Import.hs +++ b/src/Import.hs @@ -6,3 +6,4 @@ import Foundation as Import import Import.NoFoundation as Import import Utils.SystemMessage as Import +import Utils.Metrics as Import diff --git a/src/Jobs/HealthReport.hs b/src/Jobs/HealthReport.hs index 7fb45b36a..60eb2aa7f 100644 --- a/src/Jobs/HealthReport.hs +++ b/src/Jobs/HealthReport.hs @@ -32,7 +32,7 @@ import UnliftIO.Concurrent (myThreadId) generateHealthReport :: HealthCheck -> Handler HealthReport -generateHealthReport = $(dispatchTH ''HealthCheck) +generateHealthReport = withHealthReportMetrics . $(dispatchTH ''HealthCheck) dispatchHealthCheckMatchingClusterConfig :: Handler HealthReport diff --git a/src/Utils/Metrics.hs b/src/Utils/Metrics.hs new file mode 100644 index 000000000..5f4e310f5 --- /dev/null +++ b/src/Utils/Metrics.hs @@ -0,0 +1,113 @@ +module Utils.Metrics + ( withHealthReportMetrics + , registerGHCMetrics + , observeHTTPRequestLatency + , registerReadyMetric + ) where + +import Import.NoFoundation hiding (Vector, Info) +import Prometheus +import Prometheus.Metric.GHC + +import qualified Data.List as List + +import Data.Time.Clock.POSIX + +import Network.Wai (Middleware) +import qualified Network.Wai as Wai +import qualified Network.HTTP.Types as HTTP + + +histogramBuckets :: Rational -- ^ min + -> Rational -- ^ max + -> [Double] +histogramBuckets bMin bMax = map fromRational . takeWhile (<= bMax) . go bMin $ List.cycle factors + where + go n [] = [n] + go n (f:fs) = n : go (f * n) fs + + factors + | bMin' `mod` 2 == 0 = [2.5, 2, 2] + | bMin' `mod` 5 == 0 = [2, 2, 2.5] + | otherwise = [2, 2.5, 2] + where + bMin' :: Integer + bMin' = floor . List.head . dropWhile (< 1) $ List.iterate (* 10) bMin + + +{-# NOINLINE healthReportTime #-} +healthReportTime :: Vector Label2 Gauge +healthReportTime = unsafeRegister . vector ("check", "status") $ gauge Info + { metricName = "uni2work_health_check_time" + , metricHelp = "POSIXTime of last health check performed by this Uni2work-instance" + } + +{-# NOINLINE healthReportDuration #-} +healthReportDuration :: Vector Label2 Histogram +healthReportDuration = unsafeRegister . vector ("check", "status") $ histogram info buckets + where + info = Info + { metricName = "uni2work_health_check_duration_seconds" + , metricHelp = "Duration of last health check performed by this Uni2work-instance" + } + buckets = histogramBuckets 5e-6 100e-3 + +{-# NOINLINE httpRequestLatency #-} +httpRequestLatency :: Vector Label3 Histogram +httpRequestLatency = unsafeRegister + $ vector ("handler", "method", "status") + $ histogram info buckets + where info = Info "http_request_duration_seconds" + "HTTP request latency" + buckets = histogramBuckets 50e-6 500 + +data ReadySince = MkReadySince + +readyMetric :: POSIXTime -> Metric ReadySince +readyMetric ts = Metric $ return (MkReadySince, collectReadySince) + where + collectReadySince = return [SampleGroup info GaugeType [Sample "ready_time" [] sample]] + info = Info "ready_time" "POSIXTime this Uni2work-instance became ready" + sample = encodeUtf8 $ tshow (realToFrac ts :: Nano) + + +withHealthReportMetrics :: MonadIO m => m HealthReport -> m HealthReport +withHealthReportMetrics act = do + before <- liftIO getPOSIXTime + report <- act + after <- liftIO getPOSIXTime + + let checkVal = toPathPiece $ classifyHealthReport report + statusVal = toPathPiece $ healthReportStatus report + timeSample, durationSample :: Double + timeSample = realToFrac (realToFrac after :: Nano) + durationSample = realToFrac (realToFrac $ after - before :: Nano) + liftIO $ withLabel healthReportTime (checkVal, statusVal) $ flip setGauge timeSample + liftIO $ withLabel healthReportDuration (checkVal, statusVal) $ flip observe durationSample + + return report + +registerGHCMetrics :: MonadIO m => m () +registerGHCMetrics = void $ register ghcMetrics + +observeHTTPRequestLatency :: forall site. ParseRoute site => (Route site -> String) -> Middleware +observeHTTPRequestLatency classifyHandler app req respond' = do + start <- getPOSIXTime + app req $ \res -> do + end <- getPOSIXTime + let method = decodeUtf8 $ Wai.requestMethod req + status = tshow . HTTP.statusCode $ Wai.responseStatus res + route :: Maybe (Route site) + route = parseRoute ( Wai.pathInfo req + , over (mapped . _2) (fromMaybe "") . HTTP.queryToQueryText $ Wai.queryString req + ) + handler' = pack . classifyHandler <$> route + + labels :: Label3 + labels = (fromMaybe "n/a" handler', method, status) + withLabel httpRequestLatency labels . flip observe . realToFrac $ end - start + + respond' res + +registerReadyMetric :: MonadIO m => m () +registerReadyMetric = liftIO $ void . register . readyMetric =<< getPOSIXTime diff --git a/templates/metrics.hamlet b/templates/metrics.hamlet index 1881382bc..885a79591 100644 --- a/templates/metrics.hamlet +++ b/templates/metrics.hamlet @@ -29,15 +29,17 @@ $maybe t <- metricsToken - $forall Sample sName lPairs sValue <- mSamples -
_{MsgMetricName} + $if showMetricName metricName mSamples + _{MsgMetricName} $forall l <- allLabels #{l} _{MsgMetricValue}
- #{metricBasename metricName sName} + $if showMetricName metricName mSamples + + #{metricBasename metricName sName} $forall l <- allLabels $maybe lValue <- lookup l lPairs