diff --git a/src/Jobs.hs b/src/Jobs.hs index 88e66b146..923d43626 100644 --- a/src/Jobs.hs +++ b/src/Jobs.hs @@ -402,7 +402,7 @@ handleJobs' wNum = C.mapM_ $ \jctl -> do instanceID' <- getsYesod $ view instanceID now <- liftIO getCurrentTime - performJob content + withJobDuration (classifyJob content) $ performJob content -- `performJob` is expected to throw an exception if it detects that the job was not done runDB . setSerializable $ do diff --git a/src/Jobs/Types.hs b/src/Jobs/Types.hs index 21bea5be8..9d33b1258 100644 --- a/src/Jobs/Types.hs +++ b/src/Jobs/Types.hs @@ -2,6 +2,7 @@ module Jobs.Types ( Job(..), Notification(..) + , classifyJob , JobCtl(..) , JobContext(..) , JobState(..) @@ -12,11 +13,14 @@ module Jobs.Types , JobPriority(..), prioritiseJob ) where -import Import.NoFoundation hiding (Unique) +import Import.NoFoundation hiding (Unique, state) +import qualified Data.Aeson as Aeson import Data.Aeson (defaultOptions, Options(..), SumEncoding(..)) import Data.Aeson.TH (deriveJSON) +import qualified Data.HashMap.Strict as HashMap + import Data.List.NonEmpty (NonEmpty) import Data.Unique @@ -27,6 +31,8 @@ import qualified Data.Set as Set import Data.PQueue.Prio.Max (MaxPQueue) import qualified Data.PQueue.Prio.Max as PQ +import Utils.Metrics (observeJobQueueDepth) + data Job = JobSendNotification { jRecipient :: UserId, jNotification :: Notification } | JobSendTestEmail { jEmail :: Email, jMailContext :: MailContext } @@ -116,6 +122,12 @@ deriveJSON defaultOptions , sumEncoding = TaggedObject "notification" "data" } ''Notification +classifyJob :: Job -> String +classifyJob job = unpack tag + where + Aeson.Object obj = Aeson.toJSON job + Aeson.String tag = obj HashMap.! "job" + data JobCtl = JobCtlFlush | JobCtlPerform QueuedJobId @@ -151,6 +163,8 @@ instance Universe JobPriority instance Finite JobPriority instance NFData JobPriority +nullaryPathPiece ''JobPriority $ camelToPathPiece' 2 + prioritiseJob :: JobCtl -> JobPriority prioritiseJob JobCtlTest = JobPrioRealtime prioritiseJob (JobCtlGenerateHealthReport _) = JobPrioRealtime diff --git a/src/Utils/Metrics.hs b/src/Utils/Metrics.hs index 5f4e310f5..6a2368a86 100644 --- a/src/Utils/Metrics.hs +++ b/src/Utils/Metrics.hs @@ -3,6 +3,7 @@ module Utils.Metrics , registerGHCMetrics , observeHTTPRequestLatency , registerReadyMetric + , withJobDuration ) where import Import.NoFoundation hiding (Vector, Info) @@ -37,28 +38,24 @@ histogramBuckets bMin bMax = map fromRational . takeWhile (<= bMax) . go bMin $ {-# NOINLINE healthReportTime #-} healthReportTime :: Vector Label2 Gauge -healthReportTime = unsafeRegister . vector ("check", "status") $ gauge Info - { metricName = "uni2work_health_check_time" - , metricHelp = "POSIXTime of last health check performed by this Uni2work-instance" - } +healthReportTime = unsafeRegister . vector ("check", "status") $ gauge info + where + info = Info "uni2work_health_check_time" + "POSIXTime of last health check performed by this Uni2work-instance" {-# NOINLINE healthReportDuration #-} healthReportDuration :: Vector Label2 Histogram healthReportDuration = unsafeRegister . vector ("check", "status") $ histogram info buckets where - info = Info - { metricName = "uni2work_health_check_duration_seconds" - , metricHelp = "Duration of last health check performed by this Uni2work-instance" - } + info = Info "uni2work_health_check_duration_seconds" + "Duration of last health check performed by this Uni2work-instance" buckets = histogramBuckets 5e-6 100e-3 {-# NOINLINE httpRequestLatency #-} httpRequestLatency :: Vector Label3 Histogram -httpRequestLatency = unsafeRegister - $ vector ("handler", "method", "status") - $ histogram info buckets +httpRequestLatency = unsafeRegister . vector ("handler", "method", "status") $ histogram info buckets where info = Info "http_request_duration_seconds" - "HTTP request latency" + "HTTP request latency" buckets = histogramBuckets 50e-6 500 data ReadySince = MkReadySince @@ -67,9 +64,23 @@ readyMetric :: POSIXTime -> Metric ReadySince readyMetric ts = Metric $ return (MkReadySince, collectReadySince) where collectReadySince = return [SampleGroup info GaugeType [Sample "ready_time" [] sample]] - info = Info "ready_time" "POSIXTime this Uni2work-instance became ready" + info = Info "ready_time" + "POSIXTime this Uni2work-instance became ready" sample = encodeUtf8 $ tshow (realToFrac ts :: Nano) +{-# NOINLINE jobDuration #-} +jobDuration :: Vector Label2 Histogram +jobDuration = unsafeRegister . vector ("kind", "status") $ histogram info buckets + where info = Info "uni2work_job_duration_seconds" + "Duration of time taken to execute a background job" + buckets = histogramBuckets 5e-6 500 + +{-# NOINLINE jobQueueDepth #-} +jobQueueDepth :: Vector Label2 Gauge +jobQueueDepth = unsafeRegister . vector ("worker", "priority") $ gauge info + where info = Info "uni2work_job_queue_size_count" + "Current depth of worker queue" + withHealthReportMetrics :: MonadIO m => m HealthReport -> m HealthReport withHealthReportMetrics act = do @@ -111,3 +122,15 @@ observeHTTPRequestLatency classifyHandler app req respond' = do registerReadyMetric :: MonadIO m => m () registerReadyMetric = liftIO $ void . register . readyMetric =<< getPOSIXTime + +withJobDuration :: (MonadIO m, MonadCatch m) => String -> m a -> m a +withJobDuration job doJob = do + start <- liftIO getPOSIXTime + res <- handleAll (return . Left) $ Right <$> doJob + end <- liftIO getPOSIXTime + + liftIO . withLabel jobDuration (pack job, bool "failure" "success" $ is _Right res) . flip observe . realToFrac $ end - start + + case res of + Left exc -> throwM exc + Right res' -> return res'