359 lines
14 KiB
Haskell
359 lines
14 KiB
Haskell
{-# OPTIONS_GHC -fno-warn-incomplete-uni-patterns #-}
|
|
|
|
module Utils.Metrics
|
|
( withHealthReportMetrics
|
|
, registerGHCMetrics
|
|
, observeHTTPRequestLatency
|
|
, registerReadyMetric
|
|
, withJobWorkerState
|
|
, observeYesodCacheSize
|
|
, observeFavouritesQuickActionsDuration
|
|
, LoginOutcome(..), observeLoginOutcome
|
|
, registerJobHeldLocksCount
|
|
, FileChunkStorage(..), observeSourcedChunk, observeSunkChunk
|
|
, observeDeletedUnreferencedFiles, observeDeletedUnreferencedChunks, observeInjectedFiles, observeRechunkedFiles
|
|
, registerJobWorkerQueueDepth
|
|
, observeMissingFiles
|
|
) where
|
|
|
|
import Import.NoModel hiding (Vector, Info)
|
|
import Model
|
|
import Prometheus
|
|
import Prometheus.Metric.GHC
|
|
|
|
import qualified Data.List as List
|
|
|
|
import System.Clock
|
|
import Data.Time.Clock.POSIX
|
|
|
|
import Network.Wai (Middleware)
|
|
import qualified Network.Wai as Wai
|
|
import qualified Network.HTTP.Types as HTTP
|
|
|
|
import Yesod.Core.Types (HandlerData(..), GHState(..))
|
|
|
|
import qualified Data.Set as Set
|
|
|
|
import Jobs.Types
|
|
|
|
import qualified Data.Aeson as Aeson
|
|
import qualified Data.HashMap.Strict as HashMap
|
|
|
|
{-# ANN module ("HLint: ignore Use even" :: String) #-}
|
|
|
|
|
|
histogramBuckets :: Rational -- ^ min
|
|
-> Rational -- ^ max
|
|
-> [Double]
|
|
histogramBuckets bMin bMax = map fromRational . takeWhile (<= bMax) . go bMin $ List.cycle factors
|
|
where
|
|
go n [] = [n]
|
|
go n (f:fs) = n : go (f * n) fs
|
|
|
|
factors
|
|
| bMin' `mod` 2 == 0 = [2.5, 2, 2]
|
|
| bMin' `mod` 5 == 0 = [2, 2, 2.5]
|
|
| otherwise = [2, 2.5, 2]
|
|
where
|
|
bMin' :: Integer
|
|
bMin' = floor . List.head . dropWhile (< 1) $ List.iterate (* 10) bMin
|
|
|
|
|
|
{-# NOINLINE healthReportTime #-}
|
|
healthReportTime :: Vector Label2 Gauge
|
|
healthReportTime = unsafeRegister . vector ("check", "status") $ gauge info
|
|
where
|
|
info = Info "uni2work_health_check_time"
|
|
"POSIXTime of last health check performed by this Uni2work-instance"
|
|
|
|
{-# NOINLINE healthReportDuration #-}
|
|
healthReportDuration :: Vector Label2 Histogram
|
|
healthReportDuration = unsafeRegister . vector ("check", "status") $ histogram info buckets
|
|
where
|
|
info = Info "uni2work_health_check_duration_seconds"
|
|
"Duration of last health check performed by this Uni2work-instance"
|
|
buckets = histogramBuckets 5e-6 100e-3
|
|
|
|
{-# NOINLINE httpRequestLatency #-}
|
|
httpRequestLatency :: Vector Label3 Histogram
|
|
httpRequestLatency = unsafeRegister . vector ("handler", "method", "status") $ histogram info buckets
|
|
where info = Info "http_request_duration_seconds"
|
|
"HTTP request latency"
|
|
buckets = histogramBuckets 50e-6 500
|
|
|
|
data ReadySince = MkReadySince
|
|
|
|
readyMetric :: POSIXTime -> Metric ReadySince
|
|
readyMetric ts = Metric $ return (MkReadySince, collectReadySince)
|
|
where
|
|
collectReadySince = return [SampleGroup info GaugeType [Sample "ready_time" [] sample]]
|
|
info = Info "ready_time"
|
|
"POSIXTime this Uni2work-instance became ready"
|
|
sample = encodeUtf8 $ tshow (realToFrac ts :: Nano)
|
|
|
|
{-# NOINLINE jobWorkerStateDuration #-}
|
|
jobWorkerStateDuration :: Vector Label4 Histogram
|
|
jobWorkerStateDuration = unsafeRegister . vector ("worker", "state", "jobctl", "task") $ histogram info buckets
|
|
where info = Info "uni2work_job_worker_state_duration_seconds"
|
|
"Duration of time a Uni2work job executor spent in a certain state"
|
|
buckets = histogramBuckets 1e-6 5000
|
|
|
|
{-# NOINLINE jobWorkerStateTransitions #-}
|
|
jobWorkerStateTransitions :: Vector Label4 Counter
|
|
jobWorkerStateTransitions = unsafeRegister . vector ("worker", "state", "jobctl", "task") $ counter info
|
|
where info = Info "uni2work_job_worker_state_transitions_total"
|
|
"Number of times a Uni2work job executor entered a certain state"
|
|
|
|
{-# NOINLINE yesodCacheSize #-}
|
|
yesodCacheSize :: Histogram
|
|
yesodCacheSize = unsafeRegister $ histogram info buckets
|
|
where info = Info "yesod_ghs_cache_items"
|
|
"Number of items in Yesod's ghsCache and ghsCacheBy"
|
|
buckets = 0 : histogramBuckets 1 1e6
|
|
|
|
{-# NOINLINE favouritesQuickActionsDuration #-}
|
|
favouritesQuickActionsDuration :: Histogram
|
|
favouritesQuickActionsDuration = unsafeRegister $ histogram info buckets
|
|
where info = Info "uni2work_favourites_quick_actions_seconds"
|
|
"Duration of time needed to calculate a set of favourite quick actions"
|
|
buckets = histogramBuckets 500e-6 50
|
|
|
|
{-# NOINLINE loginOutcomes #-}
|
|
loginOutcomes :: Vector Label2 Counter
|
|
loginOutcomes = unsafeRegister . vector ("plugin", "outcome") $ counter info
|
|
where info = Info "uni2work_login_attempts_total"
|
|
"Number of login attempts"
|
|
|
|
data JobHeldLocksCount = MkJobHeldLocksCount
|
|
|
|
jobHeldLocksCount :: TVar (Set QueuedJobId) -> Metric JobHeldLocksCount
|
|
jobHeldLocksCount heldLocks = Metric $ return (MkJobHeldLocksCount, collectJobHeldLocksCount)
|
|
where
|
|
collectJobHeldLocksCount = do
|
|
nLocks <- Set.size <$> readTVarIO heldLocks
|
|
let sample = encodeUtf8 $ tshow nLocks
|
|
return [SampleGroup info GaugeType [Sample "uni2work_jobs_held_locks_count" [] sample]]
|
|
info = Info "uni2work_jobs_held_locks_count"
|
|
"Number of job locks currently held by this Uni2work-instance"
|
|
|
|
{-# NOINLINE sourcedFileChunkSizes #-}
|
|
sourcedFileChunkSizes :: Vector Label1 Histogram
|
|
sourcedFileChunkSizes = unsafeRegister . vector "storage" $ histogram info buckets
|
|
where info = Info "uni2work_sourced_file_chunks_bytes"
|
|
"Sizes of file chunks sourced"
|
|
buckets = 0 : histogramBuckets 1 1000000000
|
|
|
|
{-# NOINLINE sunkFileChunkSizes #-}
|
|
sunkFileChunkSizes :: Vector Label1 Histogram
|
|
sunkFileChunkSizes = unsafeRegister . vector "storage" $ histogram info buckets
|
|
where info = Info "uni2work_sunk_file_chunks_bytes"
|
|
"Sizes of file chunks sunk"
|
|
buckets = 0 : histogramBuckets 1 1000000000
|
|
|
|
{-# NOINLINE deletedUnreferencedFiles #-}
|
|
deletedUnreferencedFiles :: Counter
|
|
deletedUnreferencedFiles = unsafeRegister $ counter info
|
|
where info = Info "uni2work_deleted_unreferenced_files_count"
|
|
"Number of unreferenced files deleted"
|
|
|
|
{-# NOINLINE deletedUnreferencedChunks #-}
|
|
deletedUnreferencedChunks :: Counter
|
|
deletedUnreferencedChunks = unsafeRegister $ counter info
|
|
where info = Info "uni2work_deleted_unreferenced_chunks_count"
|
|
"Number of unreferenced chunks deleted"
|
|
|
|
{-# NOINLINE deletedUnreferencedChunksBytes #-}
|
|
deletedUnreferencedChunksBytes :: Counter
|
|
deletedUnreferencedChunksBytes = unsafeRegister $ counter info
|
|
where info = Info "uni2work_deleted_unreferenced_chunks_bytes"
|
|
"Size of unreferenced chunks deleted"
|
|
|
|
{-# NOINLINE injectedFiles #-}
|
|
injectedFiles :: Counter
|
|
injectedFiles = unsafeRegister $ counter info
|
|
where info = Info "uni2work_injected_files_count"
|
|
"Number of files injected from upload cache into database"
|
|
|
|
{-# NOINLINE injectedFilesBytes #-}
|
|
injectedFilesBytes :: Counter
|
|
injectedFilesBytes = unsafeRegister $ counter info
|
|
where info = Info "uni2work_injected_files_bytes"
|
|
"Size of files injected from upload cache into database"
|
|
|
|
{-# NOINLINE rechunkedFiles #-}
|
|
rechunkedFiles :: Counter
|
|
rechunkedFiles = unsafeRegister $ counter info
|
|
where info = Info "uni2work_rechunked_files_count"
|
|
"Number of files rechunked within database"
|
|
|
|
{-# NOINLINE rechunkedFilesBytes #-}
|
|
rechunkedFilesBytes :: Counter
|
|
rechunkedFilesBytes = unsafeRegister $ counter info
|
|
where info = Info "uni2work_rechunked_files_bytes"
|
|
"Size of files rechunked within database"
|
|
|
|
data JobWorkerQueueDepth = MkJobWorkerQueueDepth
|
|
|
|
jobWorkerQueueDepth :: TMVar JobState -> Metric JobWorkerQueueDepth
|
|
jobWorkerQueueDepth jSt = Metric $ return (MkJobWorkerQueueDepth, collectJobWorkerQueueDepth)
|
|
where
|
|
collectJobWorkerQueueDepth = maybeT (return []) $ do
|
|
wQueues <- hoist atomically $ do
|
|
JobState{..} <- MaybeT $ tryReadTMVar jSt
|
|
flip ifoldMapM jobWorkers $ \wAsync wQueue
|
|
-> lift $ pure . (jobWorkerName wAsync, ) . jqDepth <$> readTVar wQueue
|
|
return [ SampleGroup info GaugeType
|
|
[ Sample "uni2work_queued_jobs_count" [("worker", showWorkerId wName)] . encodeUtf8 $ tshow wDepth
|
|
| (wName, wDepth) <- wQueues
|
|
]
|
|
]
|
|
info = Info "uni2work_queued_jobs_count"
|
|
"Number of JobQueue entries in this Uni2work-instance"
|
|
|
|
{-# NOINLINE missingFiles #-}
|
|
missingFiles :: Vector Label1 Gauge
|
|
missingFiles = unsafeRegister . vector "ref" $ gauge info
|
|
where info = Info "uni2work_missing_files_count"
|
|
"Number of files referenced from within database that are missing"
|
|
|
|
|
|
withHealthReportMetrics :: MonadIO m => m HealthReport -> m HealthReport
|
|
withHealthReportMetrics act = do
|
|
before <- liftIO $ getTime Monotonic
|
|
report <- act
|
|
after <- liftIO $ getTime Monotonic
|
|
|
|
let checkVal = toPathPiece $ classifyHealthReport report
|
|
statusVal = toPathPiece $ healthReportStatus report
|
|
timeSample, durationSample :: Double
|
|
timeSample = realToFrac (realToFrac after :: Nano)
|
|
durationSample = realToFrac (realToFrac $ after - before :: Nano)
|
|
liftIO $ withLabel healthReportTime (checkVal, statusVal) $ flip setGauge timeSample
|
|
liftIO $ withLabel healthReportDuration (checkVal, statusVal) $ flip observe durationSample
|
|
|
|
return report
|
|
|
|
registerGHCMetrics :: MonadIO m => m ()
|
|
registerGHCMetrics = void $ register ghcMetrics
|
|
|
|
observeHTTPRequestLatency :: forall site. ParseRoute site => (Route site -> String) -> Middleware
|
|
observeHTTPRequestLatency classifyHandler app req respond' = do
|
|
start <- getTime Monotonic
|
|
app req $ \res -> do
|
|
end <- getTime Monotonic
|
|
let method = decodeUtf8 $ Wai.requestMethod req
|
|
status = tshow . HTTP.statusCode $ Wai.responseStatus res
|
|
route :: Maybe (Route site)
|
|
route = parseRoute ( Wai.pathInfo req
|
|
, over (mapped . _2) (fromMaybe "") . HTTP.queryToQueryText $ Wai.queryString req
|
|
)
|
|
handler' = pack . classifyHandler <$> route
|
|
|
|
labels :: Label3
|
|
labels = (fromMaybe "n/a" handler', method, status)
|
|
withLabel httpRequestLatency labels . flip observe . realToFrac $ end - start
|
|
|
|
respond' res
|
|
|
|
registerReadyMetric :: MonadIO m => m ()
|
|
registerReadyMetric = liftIO $ void . register . readyMetric =<< getPOSIXTime
|
|
|
|
classifyJobWorkerState :: JobWorkerId -> JobWorkerState -> Prometheus.Label4
|
|
classifyJobWorkerState wId jws = (showWorkerId wId, tag, maybe "n/a" pack mJobCtl, maybe "n/a" pack mJob)
|
|
where
|
|
Aeson.Object obj = Aeson.toJSON jws
|
|
Aeson.String tag = obj HashMap.! "state"
|
|
mJobCtl = asum
|
|
[ classifyJobCtl <$> jws ^? _jobWorkerJobCtl
|
|
, "perform" <$ jws ^? _jobWorkerJob
|
|
]
|
|
mJob = classifyJob <$> jws ^? _jobWorkerJob
|
|
|
|
withJobWorkerState :: (MonadIO m, MonadMask m) => JobWorkerId -> JobWorkerState -> m a -> m a
|
|
withJobWorkerState wId newSt = withJobWorkerStateLbls $ classifyJobWorkerState wId newSt
|
|
|
|
withJobWorkerStateLbls :: (MonadIO m, MonadMask m) => Label4 -> m a -> m a
|
|
withJobWorkerStateLbls newLbls act = do
|
|
liftIO $ withLabel jobWorkerStateTransitions newLbls incCounter
|
|
|
|
start <- liftIO $ getTime Monotonic
|
|
res <- handleAll (return . Left) $ Right <$> act
|
|
end <- liftIO $ getTime Monotonic
|
|
|
|
liftIO . withLabel jobWorkerStateDuration newLbls . flip observe . realToFrac $ end - start
|
|
|
|
either throwM return res
|
|
|
|
observeYesodCacheSize :: MonadHandler m => m ()
|
|
observeYesodCacheSize = do
|
|
HandlerData{handlerState} <- liftHandler ask
|
|
liftIO $ do
|
|
GHState{..} <- readIORef handlerState
|
|
let size = fromIntegral $ length ghsCache + length ghsCacheBy
|
|
observe yesodCacheSize size
|
|
|
|
observeFavouritesQuickActionsDuration :: (MonadIO m, MonadMask m) => m a -> m a
|
|
observeFavouritesQuickActionsDuration act = do
|
|
start <- liftIO $ getTime Monotonic
|
|
res <- handleAll (return . Left) $ Right <$> act
|
|
end <- liftIO $ getTime Monotonic
|
|
|
|
liftIO . observe favouritesQuickActionsDuration . realToFrac $ end - start
|
|
|
|
either throwM return res
|
|
|
|
data LoginOutcome
|
|
= LoginSuccessful
|
|
| LoginInvalidCredentials
|
|
| LoginADInvalidCredentials
|
|
| LoginError
|
|
deriving (Eq, Ord, Read, Show, Enum, Bounded, Generic, Typeable)
|
|
deriving anyclass (Universe, Finite)
|
|
nullaryPathPiece ''LoginOutcome $ camelToPathPiece' 1
|
|
|
|
observeLoginOutcome :: MonadHandler m
|
|
=> Text -- ^ Plugin
|
|
-> LoginOutcome
|
|
-> m ()
|
|
observeLoginOutcome plugin outcome
|
|
= liftIO $ withLabel loginOutcomes (plugin, toPathPiece outcome) incCounter
|
|
|
|
registerJobHeldLocksCount :: MonadIO m => TVar (Set QueuedJobId) -> m ()
|
|
registerJobHeldLocksCount = liftIO . void . register . jobHeldLocksCount
|
|
|
|
data FileChunkStorage = StorageMinio | StorageDB
|
|
deriving (Eq, Ord, Read, Show, Enum, Bounded, Generic, Typeable)
|
|
deriving anyclass (Universe, Finite)
|
|
nullaryPathPiece ''FileChunkStorage $ camelToPathPiece' 1
|
|
|
|
observeSunkChunk, observeSourcedChunk :: (Integral n, MonadIO m) => FileChunkStorage -> n -> m ()
|
|
observeSunkChunk store = liftIO . observeChunkSize sunkFileChunkSizes store . fromIntegral
|
|
observeSourcedChunk store = liftIO . observeChunkSize sourcedFileChunkSizes store . fromIntegral
|
|
|
|
observeChunkSize :: Vector Label1 Histogram -> FileChunkStorage -> Integer -> IO ()
|
|
observeChunkSize metric (toPathPiece -> storageLabel) = withLabel metric storageLabel . flip observe . fromInteger
|
|
|
|
observeDeletedUnreferencedFiles :: MonadIO m => Natural -> m ()
|
|
observeDeletedUnreferencedFiles = liftIO . void . addCounter deletedUnreferencedFiles . fromIntegral
|
|
|
|
observeDeletedUnreferencedChunks :: MonadIO m => Natural -> Word64 -> m ()
|
|
observeDeletedUnreferencedChunks num size = liftIO $ do
|
|
void . addCounter deletedUnreferencedChunks $ fromIntegral num
|
|
void . addCounter deletedUnreferencedChunksBytes $ fromIntegral size
|
|
|
|
observeInjectedFiles :: MonadIO m => Natural -> Word64 -> m ()
|
|
observeInjectedFiles num size = liftIO $ do
|
|
void . addCounter injectedFiles $ fromIntegral num
|
|
void . addCounter injectedFilesBytes $ fromIntegral size
|
|
|
|
observeRechunkedFiles :: MonadIO m => Natural -> Word64 -> m ()
|
|
observeRechunkedFiles num size = liftIO $ do
|
|
void . addCounter rechunkedFiles $ fromIntegral num
|
|
void . addCounter rechunkedFilesBytes $ fromIntegral size
|
|
|
|
registerJobWorkerQueueDepth :: MonadIO m => TMVar JobState -> m ()
|
|
registerJobWorkerQueueDepth = liftIO . void . register . jobWorkerQueueDepth
|
|
|
|
observeMissingFiles :: MonadIO m => Text -> Int -> m ()
|
|
observeMissingFiles refIdent = liftIO . withLabel missingFiles refIdent . flip setGauge . fromIntegral
|