feat(metrics): monitor job durations
This commit is contained in:
parent
697c3e11fc
commit
0da6c49392
@ -402,7 +402,7 @@ handleJobs' wNum = C.mapM_ $ \jctl -> do
|
||||
instanceID' <- getsYesod $ view instanceID
|
||||
now <- liftIO getCurrentTime
|
||||
|
||||
performJob content
|
||||
withJobDuration (classifyJob content) $ performJob content
|
||||
|
||||
-- `performJob` is expected to throw an exception if it detects that the job was not done
|
||||
runDB . setSerializable $ do
|
||||
|
||||
@ -2,6 +2,7 @@
|
||||
|
||||
module Jobs.Types
|
||||
( Job(..), Notification(..)
|
||||
, classifyJob
|
||||
, JobCtl(..)
|
||||
, JobContext(..)
|
||||
, JobState(..)
|
||||
@ -12,11 +13,14 @@ module Jobs.Types
|
||||
, JobPriority(..), prioritiseJob
|
||||
) where
|
||||
|
||||
import Import.NoFoundation hiding (Unique)
|
||||
import Import.NoFoundation hiding (Unique, state)
|
||||
|
||||
import qualified Data.Aeson as Aeson
|
||||
import Data.Aeson (defaultOptions, Options(..), SumEncoding(..))
|
||||
import Data.Aeson.TH (deriveJSON)
|
||||
|
||||
import qualified Data.HashMap.Strict as HashMap
|
||||
|
||||
import Data.List.NonEmpty (NonEmpty)
|
||||
|
||||
import Data.Unique
|
||||
@ -27,6 +31,8 @@ import qualified Data.Set as Set
|
||||
import Data.PQueue.Prio.Max (MaxPQueue)
|
||||
import qualified Data.PQueue.Prio.Max as PQ
|
||||
|
||||
import Utils.Metrics (observeJobQueueDepth)
|
||||
|
||||
|
||||
data Job = JobSendNotification { jRecipient :: UserId, jNotification :: Notification }
|
||||
| JobSendTestEmail { jEmail :: Email, jMailContext :: MailContext }
|
||||
@ -116,6 +122,12 @@ deriveJSON defaultOptions
|
||||
, sumEncoding = TaggedObject "notification" "data"
|
||||
} ''Notification
|
||||
|
||||
classifyJob :: Job -> String
|
||||
classifyJob job = unpack tag
|
||||
where
|
||||
Aeson.Object obj = Aeson.toJSON job
|
||||
Aeson.String tag = obj HashMap.! "job"
|
||||
|
||||
|
||||
data JobCtl = JobCtlFlush
|
||||
| JobCtlPerform QueuedJobId
|
||||
@ -151,6 +163,8 @@ instance Universe JobPriority
|
||||
instance Finite JobPriority
|
||||
instance NFData JobPriority
|
||||
|
||||
nullaryPathPiece ''JobPriority $ camelToPathPiece' 2
|
||||
|
||||
prioritiseJob :: JobCtl -> JobPriority
|
||||
prioritiseJob JobCtlTest = JobPrioRealtime
|
||||
prioritiseJob (JobCtlGenerateHealthReport _) = JobPrioRealtime
|
||||
|
||||
@ -3,6 +3,7 @@ module Utils.Metrics
|
||||
, registerGHCMetrics
|
||||
, observeHTTPRequestLatency
|
||||
, registerReadyMetric
|
||||
, withJobDuration
|
||||
) where
|
||||
|
||||
import Import.NoFoundation hiding (Vector, Info)
|
||||
@ -37,28 +38,24 @@ histogramBuckets bMin bMax = map fromRational . takeWhile (<= bMax) . go bMin $
|
||||
|
||||
{-# NOINLINE healthReportTime #-}
|
||||
healthReportTime :: Vector Label2 Gauge
|
||||
healthReportTime = unsafeRegister . vector ("check", "status") $ gauge Info
|
||||
{ metricName = "uni2work_health_check_time"
|
||||
, metricHelp = "POSIXTime of last health check performed by this Uni2work-instance"
|
||||
}
|
||||
healthReportTime = unsafeRegister . vector ("check", "status") $ gauge info
|
||||
where
|
||||
info = Info "uni2work_health_check_time"
|
||||
"POSIXTime of last health check performed by this Uni2work-instance"
|
||||
|
||||
{-# NOINLINE healthReportDuration #-}
|
||||
healthReportDuration :: Vector Label2 Histogram
|
||||
healthReportDuration = unsafeRegister . vector ("check", "status") $ histogram info buckets
|
||||
where
|
||||
info = Info
|
||||
{ metricName = "uni2work_health_check_duration_seconds"
|
||||
, metricHelp = "Duration of last health check performed by this Uni2work-instance"
|
||||
}
|
||||
info = Info "uni2work_health_check_duration_seconds"
|
||||
"Duration of last health check performed by this Uni2work-instance"
|
||||
buckets = histogramBuckets 5e-6 100e-3
|
||||
|
||||
{-# NOINLINE httpRequestLatency #-}
|
||||
httpRequestLatency :: Vector Label3 Histogram
|
||||
httpRequestLatency = unsafeRegister
|
||||
$ vector ("handler", "method", "status")
|
||||
$ histogram info buckets
|
||||
httpRequestLatency = unsafeRegister . vector ("handler", "method", "status") $ histogram info buckets
|
||||
where info = Info "http_request_duration_seconds"
|
||||
"HTTP request latency"
|
||||
"HTTP request latency"
|
||||
buckets = histogramBuckets 50e-6 500
|
||||
|
||||
data ReadySince = MkReadySince
|
||||
@ -67,9 +64,23 @@ readyMetric :: POSIXTime -> Metric ReadySince
|
||||
readyMetric ts = Metric $ return (MkReadySince, collectReadySince)
|
||||
where
|
||||
collectReadySince = return [SampleGroup info GaugeType [Sample "ready_time" [] sample]]
|
||||
info = Info "ready_time" "POSIXTime this Uni2work-instance became ready"
|
||||
info = Info "ready_time"
|
||||
"POSIXTime this Uni2work-instance became ready"
|
||||
sample = encodeUtf8 $ tshow (realToFrac ts :: Nano)
|
||||
|
||||
{-# NOINLINE jobDuration #-}
|
||||
jobDuration :: Vector Label2 Histogram
|
||||
jobDuration = unsafeRegister . vector ("kind", "status") $ histogram info buckets
|
||||
where info = Info "uni2work_job_duration_seconds"
|
||||
"Duration of time taken to execute a background job"
|
||||
buckets = histogramBuckets 5e-6 500
|
||||
|
||||
{-# NOINLINE jobQueueDepth #-}
|
||||
jobQueueDepth :: Vector Label2 Gauge
|
||||
jobQueueDepth = unsafeRegister . vector ("worker", "priority") $ gauge info
|
||||
where info = Info "uni2work_job_queue_size_count"
|
||||
"Current depth of worker queue"
|
||||
|
||||
|
||||
withHealthReportMetrics :: MonadIO m => m HealthReport -> m HealthReport
|
||||
withHealthReportMetrics act = do
|
||||
@ -111,3 +122,15 @@ observeHTTPRequestLatency classifyHandler app req respond' = do
|
||||
|
||||
registerReadyMetric :: MonadIO m => m ()
|
||||
registerReadyMetric = liftIO $ void . register . readyMetric =<< getPOSIXTime
|
||||
|
||||
withJobDuration :: (MonadIO m, MonadCatch m) => String -> m a -> m a
|
||||
withJobDuration job doJob = do
|
||||
start <- liftIO getPOSIXTime
|
||||
res <- handleAll (return . Left) $ Right <$> doJob
|
||||
end <- liftIO getPOSIXTime
|
||||
|
||||
liftIO . withLabel jobDuration (pack job, bool "failure" "success" $ is _Right res) . flip observe . realToFrac $ end - start
|
||||
|
||||
case res of
|
||||
Left exc -> throwM exc
|
||||
Right res' -> return res'
|
||||
|
||||
Loading…
Reference in New Issue
Block a user