{-# OPTIONS_GHC -fno-warn-incomplete-uni-patterns #-} module Utils.Metrics ( withHealthReportMetrics , registerGHCMetrics , observeHTTPRequestLatency , registerReadyMetric , withJobWorkerState , observeYesodCacheSize , observeFavouritesQuickActionsDuration , LoginOutcome(..), observeLoginOutcome , registerJobHeldLocksCount , FileChunkStorage(..), observeSourcedChunk, observeSunkChunk , observeDeletedUnreferencedFiles, observeDeletedUnreferencedChunks, observeInjectedFiles, observeRechunkedFiles , registerJobWorkerQueueDepth , observeMissingFiles ) where import Import.NoModel hiding (Vector, Info) import Model import Prometheus import Prometheus.Metric.GHC import qualified Data.List as List import System.Clock import Data.Time.Clock.POSIX import Network.Wai (Middleware) import qualified Network.Wai as Wai import qualified Network.HTTP.Types as HTTP import Yesod.Core.Types (HandlerData(..), GHState(..)) import qualified Data.Set as Set import Jobs.Types import qualified Data.Aeson as Aeson import qualified Data.HashMap.Strict as HashMap {-# ANN module ("HLint: ignore Use even" :: String) #-} histogramBuckets :: Rational -- ^ min -> Rational -- ^ max -> [Double] histogramBuckets bMin bMax = map fromRational . takeWhile (<= bMax) . go bMin $ List.cycle factors where go n [] = [n] go n (f:fs) = n : go (f * n) fs factors | bMin' `mod` 2 == 0 = [2.5, 2, 2] | bMin' `mod` 5 == 0 = [2, 2, 2.5] | otherwise = [2, 2.5, 2] where bMin' :: Integer bMin' = floor . List.head . dropWhile (< 1) $ List.iterate (* 10) bMin {-# NOINLINE healthReportTime #-} healthReportTime :: Vector Label2 Gauge healthReportTime = unsafeRegister . vector ("check", "status") $ gauge info where info = Info "uni2work_health_check_time" "POSIXTime of last health check performed by this Uni2work-instance" {-# NOINLINE healthReportDuration #-} healthReportDuration :: Vector Label2 Histogram healthReportDuration = unsafeRegister . vector ("check", "status") $ histogram info buckets where info = Info "uni2work_health_check_duration_seconds" "Duration of last health check performed by this Uni2work-instance" buckets = histogramBuckets 5e-6 100e-3 {-# NOINLINE httpRequestLatency #-} httpRequestLatency :: Vector Label3 Histogram httpRequestLatency = unsafeRegister . vector ("handler", "method", "status") $ histogram info buckets where info = Info "http_request_duration_seconds" "HTTP request latency" buckets = histogramBuckets 50e-6 500 data ReadySince = MkReadySince readyMetric :: POSIXTime -> Metric ReadySince readyMetric ts = Metric $ return (MkReadySince, collectReadySince) where collectReadySince = return [SampleGroup info GaugeType [Sample "ready_time" [] sample]] info = Info "ready_time" "POSIXTime this Uni2work-instance became ready" sample = encodeUtf8 $ tshow (realToFrac ts :: Nano) {-# NOINLINE jobWorkerStateDuration #-} jobWorkerStateDuration :: Vector Label4 Histogram jobWorkerStateDuration = unsafeRegister . vector ("worker", "state", "jobctl", "task") $ histogram info buckets where info = Info "uni2work_job_worker_state_duration_seconds" "Duration of time a Uni2work job executor spent in a certain state" buckets = histogramBuckets 1e-6 5000 {-# NOINLINE jobWorkerStateTransitions #-} jobWorkerStateTransitions :: Vector Label4 Counter jobWorkerStateTransitions = unsafeRegister . vector ("worker", "state", "jobctl", "task") $ counter info where info = Info "uni2work_job_worker_state_transitions_total" "Number of times a Uni2work job executor entered a certain state" {-# NOINLINE yesodCacheSize #-} yesodCacheSize :: Histogram yesodCacheSize = unsafeRegister $ histogram info buckets where info = Info "yesod_ghs_cache_items" "Number of items in Yesod's ghsCache and ghsCacheBy" buckets = 0 : histogramBuckets 1 1e6 {-# NOINLINE favouritesQuickActionsDuration #-} favouritesQuickActionsDuration :: Histogram favouritesQuickActionsDuration = unsafeRegister $ histogram info buckets where info = Info "uni2work_favourites_quick_actions_seconds" "Duration of time needed to calculate a set of favourite quick actions" buckets = histogramBuckets 500e-6 50 {-# NOINLINE loginOutcomes #-} loginOutcomes :: Vector Label2 Counter loginOutcomes = unsafeRegister . vector ("plugin", "outcome") $ counter info where info = Info "uni2work_login_attempts_total" "Number of login attempts" data JobHeldLocksCount = MkJobHeldLocksCount jobHeldLocksCount :: TVar (Set QueuedJobId) -> Metric JobHeldLocksCount jobHeldLocksCount heldLocks = Metric $ return (MkJobHeldLocksCount, collectJobHeldLocksCount) where collectJobHeldLocksCount = do nLocks <- Set.size <$> readTVarIO heldLocks let sample = encodeUtf8 $ tshow nLocks return [SampleGroup info GaugeType [Sample "uni2work_jobs_held_locks_count" [] sample]] info = Info "uni2work_jobs_held_locks_count" "Number of job locks currently held by this Uni2work-instance" {-# NOINLINE sourcedFileChunkSizes #-} sourcedFileChunkSizes :: Vector Label1 Histogram sourcedFileChunkSizes = unsafeRegister . vector "storage" $ histogram info buckets where info = Info "uni2work_sourced_file_chunks_bytes" "Sizes of file chunks sourced" buckets = 0 : histogramBuckets 1 1000000000 {-# NOINLINE sunkFileChunkSizes #-} sunkFileChunkSizes :: Vector Label1 Histogram sunkFileChunkSizes = unsafeRegister . vector "storage" $ histogram info buckets where info = Info "uni2work_sunk_file_chunks_bytes" "Sizes of file chunks sunk" buckets = 0 : histogramBuckets 1 1000000000 {-# NOINLINE deletedUnreferencedFiles #-} deletedUnreferencedFiles :: Counter deletedUnreferencedFiles = unsafeRegister $ counter info where info = Info "uni2work_deleted_unreferenced_files_count" "Number of unreferenced files deleted" {-# NOINLINE deletedUnreferencedChunks #-} deletedUnreferencedChunks :: Counter deletedUnreferencedChunks = unsafeRegister $ counter info where info = Info "uni2work_deleted_unreferenced_chunks_count" "Number of unreferenced chunks deleted" {-# NOINLINE deletedUnreferencedChunksBytes #-} deletedUnreferencedChunksBytes :: Counter deletedUnreferencedChunksBytes = unsafeRegister $ counter info where info = Info "uni2work_deleted_unreferenced_chunks_bytes" "Size of unreferenced chunks deleted" {-# NOINLINE injectedFiles #-} injectedFiles :: Counter injectedFiles = unsafeRegister $ counter info where info = Info "uni2work_injected_files_count" "Number of files injected from upload cache into database" {-# NOINLINE injectedFilesBytes #-} injectedFilesBytes :: Counter injectedFilesBytes = unsafeRegister $ counter info where info = Info "uni2work_injected_files_bytes" "Size of files injected from upload cache into database" {-# NOINLINE rechunkedFiles #-} rechunkedFiles :: Counter rechunkedFiles = unsafeRegister $ counter info where info = Info "uni2work_rechunked_files_count" "Number of files rechunked within database" {-# NOINLINE rechunkedFilesBytes #-} rechunkedFilesBytes :: Counter rechunkedFilesBytes = unsafeRegister $ counter info where info = Info "uni2work_rechunked_files_bytes" "Size of files rechunked within database" data JobWorkerQueueDepth = MkJobWorkerQueueDepth jobWorkerQueueDepth :: TMVar JobState -> Metric JobWorkerQueueDepth jobWorkerQueueDepth jSt = Metric $ return (MkJobWorkerQueueDepth, collectJobWorkerQueueDepth) where collectJobWorkerQueueDepth = maybeT (return []) $ do wQueues <- hoist atomically $ do JobState{..} <- MaybeT $ tryReadTMVar jSt flip ifoldMapM jobWorkers $ \wAsync wQueue -> lift $ pure . (jobWorkerName wAsync, ) . jqDepth <$> readTVar wQueue return [ SampleGroup info GaugeType [ Sample "uni2work_queued_jobs_count" [("worker", showWorkerId wName)] . encodeUtf8 $ tshow wDepth | (wName, wDepth) <- wQueues ] ] info = Info "uni2work_queued_jobs_count" "Number of JobQueue entries in this Uni2work-instance" {-# NOINLINE missingFiles #-} missingFiles :: Vector Label1 Gauge missingFiles = unsafeRegister . vector "ref" $ gauge info where info = Info "uni2work_missing_files_count" "Number of files referenced from within database that are missing" withHealthReportMetrics :: MonadIO m => m HealthReport -> m HealthReport withHealthReportMetrics act = do before <- liftIO $ getTime Monotonic report <- act after <- liftIO $ getTime Monotonic let checkVal = toPathPiece $ classifyHealthReport report statusVal = toPathPiece $ healthReportStatus report timeSample, durationSample :: Double timeSample = realToFrac (realToFrac after :: Nano) durationSample = realToFrac (realToFrac $ after - before :: Nano) liftIO $ withLabel healthReportTime (checkVal, statusVal) $ flip setGauge timeSample liftIO $ withLabel healthReportDuration (checkVal, statusVal) $ flip observe durationSample return report registerGHCMetrics :: MonadIO m => m () registerGHCMetrics = void $ register ghcMetrics observeHTTPRequestLatency :: forall site. ParseRoute site => (Route site -> String) -> Middleware observeHTTPRequestLatency classifyHandler app req respond' = do start <- getTime Monotonic app req $ \res -> do end <- getTime Monotonic let method = decodeUtf8 $ Wai.requestMethod req status = tshow . HTTP.statusCode $ Wai.responseStatus res route :: Maybe (Route site) route = parseRoute ( Wai.pathInfo req , over (mapped . _2) (fromMaybe "") . HTTP.queryToQueryText $ Wai.queryString req ) handler' = pack . classifyHandler <$> route labels :: Label3 labels = (fromMaybe "n/a" handler', method, status) withLabel httpRequestLatency labels . flip observe . realToFrac $ end - start respond' res registerReadyMetric :: MonadIO m => m () registerReadyMetric = liftIO $ void . register . readyMetric =<< getPOSIXTime classifyJobWorkerState :: JobWorkerId -> JobWorkerState -> Prometheus.Label4 classifyJobWorkerState wId jws = (showWorkerId wId, tag, maybe "n/a" pack mJobCtl, maybe "n/a" pack mJob) where Aeson.Object obj = Aeson.toJSON jws Aeson.String tag = obj HashMap.! "state" mJobCtl = asum [ classifyJobCtl <$> jws ^? _jobWorkerJobCtl , "perform" <$ jws ^? _jobWorkerJob ] mJob = classifyJob <$> jws ^? _jobWorkerJob withJobWorkerState :: (MonadIO m, MonadMask m) => JobWorkerId -> JobWorkerState -> m a -> m a withJobWorkerState wId newSt = withJobWorkerStateLbls $ classifyJobWorkerState wId newSt withJobWorkerStateLbls :: (MonadIO m, MonadMask m) => Label4 -> m a -> m a withJobWorkerStateLbls newLbls act = do liftIO $ withLabel jobWorkerStateTransitions newLbls incCounter start <- liftIO $ getTime Monotonic res <- handleAll (return . Left) $ Right <$> act end <- liftIO $ getTime Monotonic liftIO . withLabel jobWorkerStateDuration newLbls . flip observe . realToFrac $ end - start either throwM return res observeYesodCacheSize :: MonadHandler m => m () observeYesodCacheSize = do HandlerData{handlerState} <- liftHandler ask liftIO $ do GHState{..} <- readIORef handlerState let size = fromIntegral $ length ghsCache + length ghsCacheBy observe yesodCacheSize size observeFavouritesQuickActionsDuration :: (MonadIO m, MonadMask m) => m a -> m a observeFavouritesQuickActionsDuration act = do start <- liftIO $ getTime Monotonic res <- handleAll (return . Left) $ Right <$> act end <- liftIO $ getTime Monotonic liftIO . observe favouritesQuickActionsDuration . realToFrac $ end - start either throwM return res data LoginOutcome = LoginSuccessful | LoginInvalidCredentials | LoginError deriving (Eq, Ord, Read, Show, Enum, Bounded, Generic, Typeable) deriving anyclass (Universe, Finite) nullaryPathPiece ''LoginOutcome $ camelToPathPiece' 1 observeLoginOutcome :: MonadHandler m => Text -- ^ Plugin -> LoginOutcome -> m () observeLoginOutcome plugin outcome = liftIO $ withLabel loginOutcomes (plugin, toPathPiece outcome) incCounter registerJobHeldLocksCount :: MonadIO m => TVar (Set QueuedJobId) -> m () registerJobHeldLocksCount = liftIO . void . register . jobHeldLocksCount data FileChunkStorage = StorageMinio | StorageDB deriving (Eq, Ord, Read, Show, Enum, Bounded, Generic, Typeable) deriving anyclass (Universe, Finite) nullaryPathPiece ''FileChunkStorage $ camelToPathPiece' 1 observeSunkChunk, observeSourcedChunk :: (Integral n, MonadIO m) => FileChunkStorage -> n -> m () observeSunkChunk store = liftIO . observeChunkSize sunkFileChunkSizes store . fromIntegral observeSourcedChunk store = liftIO . observeChunkSize sourcedFileChunkSizes store . fromIntegral observeChunkSize :: Vector Label1 Histogram -> FileChunkStorage -> Integer -> IO () observeChunkSize metric (toPathPiece -> storageLabel) = withLabel metric storageLabel . flip observe . fromInteger observeDeletedUnreferencedFiles :: MonadIO m => Natural -> m () observeDeletedUnreferencedFiles = liftIO . void . addCounter deletedUnreferencedFiles . fromIntegral observeDeletedUnreferencedChunks :: MonadIO m => Natural -> Word64 -> m () observeDeletedUnreferencedChunks num size = liftIO $ do void . addCounter deletedUnreferencedChunks $ fromIntegral num void . addCounter deletedUnreferencedChunksBytes $ fromIntegral size observeInjectedFiles :: MonadIO m => Natural -> Word64 -> m () observeInjectedFiles num size = liftIO $ do void . addCounter injectedFiles $ fromIntegral num void . addCounter injectedFilesBytes $ fromIntegral size observeRechunkedFiles :: MonadIO m => Natural -> Word64 -> m () observeRechunkedFiles num size = liftIO $ do void . addCounter rechunkedFiles $ fromIntegral num void . addCounter rechunkedFilesBytes $ fromIntegral size registerJobWorkerQueueDepth :: MonadIO m => TMVar JobState -> m () registerJobWorkerQueueDepth = liftIO . void . register . jobWorkerQueueDepth observeMissingFiles :: MonadIO m => Text -> Int -> m () observeMissingFiles refIdent = liftIO . withLabel missingFiles refIdent . flip setGauge . fromIntegral