feat: implement in-memory cache for file download

This commit is contained in:
Gregor Kleen 2021-02-10 20:06:59 +01:00
parent e61b5611b1
commit 36debd865f
9 changed files with 392 additions and 11 deletions

View File

@ -269,3 +269,7 @@ fallback-personalised-sheet-files-keys-expire: 2419200
download-token-expire: 604801
memcache-auth: true
file-source-arc:
maximum-ghost: 512
maximum-weight: 1073741824 # 1GiB

View File

@ -158,6 +158,7 @@ dependencies:
- insert-ordered-containers
- topograph
- network-uri
- psqueues
other-extensions:
- GeneralizedNewtypeDeriving
- IncoherentInstances

View File

@ -104,6 +104,8 @@ import Web.ServerSession.Core (StorageException(..))
import GHC.RTS.Flags (getRTSFlags)
import qualified Prometheus as Prometheus
-- Import all relevant handler modules here.
-- (HPack takes care to add new modules to our cabal file nowadays.)
import Handler.News
@ -181,6 +183,11 @@ makeFoundation appSettings''@AppSettings{..} = do
appJobState <- liftIO newEmptyTMVarIO
appHealthReport <- liftIO $ newTVarIO Set.empty
appFileSourceARC <- for appFileSourceARCConf $ \c@ARCConf{..} -> do
ah <- initARCHandle arccMaximumGhost arccMaximumWeight
m <- Prometheus.register $ arcMetrics ARCFileSource c ah
return (ah, m)
-- We need a log function to create a connection pool. We need a connection
-- pool to create our foundation. And we need our foundation to get a
-- logging function. To get out of this loop, we initially create a

View File

@ -25,6 +25,8 @@ import qualified Jose.Jwk as Jose
import qualified Database.Memcached.Binary.IO as Memcached
import Network.Minio (MinioConn)
import Utils.Metrics (ARCMetrics)
type SMTPPool = Pool SMTPConnection
@ -60,6 +62,7 @@ data UniWorX = UniWorX
, appUploadCache :: Maybe MinioConn
, appVerpSecret :: VerpSecret
, appAuthKey :: Auth.Key
, appFileSourceARC :: Maybe (ARCHandle (FileContentChunkReference, (Int, Int)) Int ByteString, ARCMetrics)
}
makeLenses_ ''UniWorX

View File

@ -31,6 +31,32 @@ data SourceFilesException
deriving anyclass (Exception)
fileChunkARC :: ( MonadHandler m
, HandlerSite m ~ UniWorX
)
=> (FileContentChunkReference, (Int, Int))
-> m (Maybe ByteString)
-> m (Maybe ByteString)
fileChunkARC k getChunkDB = do
arc <- getsYesod appFileSourceARC
case arc of
Nothing -> getChunkDB
Just (ah, m) -> do
cachedARC' ah k $ \case
Nothing -> do
chunk' <- getChunkDB
for chunk' $ \chunk -> do
let w = length chunk
liftIO $ do
observeARCOutcome m False w
observeSourcedChunk StorageDB w
return (chunk, w)
Just x@(_, w) -> liftIO $ do
observeARCOutcome m True w
observeSourcedChunk StorageARC w
return $ Just x
sourceFileDB :: (MonadThrow m, MonadHandler m, HandlerSite m ~ UniWorX)
=> FileContentReference -> ConduitT () ByteString (SqlPersistT m) ()
sourceFileDB fileReference = do
@ -38,13 +64,13 @@ sourceFileDB fileReference = do
let retrieveChunk chunkHash = \case
Nothing -> return Nothing
Just start -> do
chunk <- E.selectMaybe . E.from $ \fileContentChunk -> do
E.where_ $ fileContentChunk E.^. FileContentChunkId E.==. E.val chunkHash
return $ E.substring (fileContentChunk E.^. FileContentChunkContent) (E.val start) (E.val dbChunksize)
let getChunkDB = fmap (fmap E.unValue) . E.selectMaybe . E.from $ \fileContentChunk -> do
E.where_ $ fileContentChunk E.^. FileContentChunkId E.==. E.val chunkHash
return $ E.substring (fileContentChunk E.^. FileContentChunkContent) (E.val start) (E.val dbChunksize)
chunk <- fileChunkARC (unFileContentChunkKey chunkHash, (start, dbChunksize)) getChunkDB
case chunk of
Nothing -> throwM SourceFilesContentUnavailable
Just (E.Value c) -> do
observeSourcedChunk StorageDB $ olength c
Just c -> do
return . Just . (c, ) $ if
| olength c >= dbChunksize -> Just $ start + dbChunksize
| otherwise -> Nothing
@ -185,13 +211,13 @@ respondFileConditional representationLastModified cType FileReference{..} = do
forM_ relevantChunks $ \(chunkHash, offset, cLength)
-> let retrieveChunk = \case
Just (start, cLength') | cLength' > 0 -> do
chunk <- E.selectMaybe . E.from $ \fileContentChunk -> do
E.where_ $ fileContentChunk E.^. FileContentChunkHash E.==. E.val chunkHash
return $ E.substring (fileContentChunk E.^. FileContentChunkContent) (E.val start) (E.val $ min cLength' dbChunksize)
let getChunkDB = fmap (fmap E.unValue) . E.selectMaybe . E.from $ \fileContentChunk -> do
E.where_ $ fileContentChunk E.^. FileContentChunkHash E.==. E.val chunkHash
return $ E.substring (fileContentChunk E.^. FileContentChunkContent) (E.val start) (E.val $ min cLength' dbChunksize)
chunk <- fileChunkARC (chunkHash, (fromIntegral start, fromIntegral $ min cLength' dbChunksize)) getChunkDB
case chunk of
Nothing -> throwM SourceFilesContentUnavailable
Just (E.Value c) -> do
observeSourcedChunk StorageDB $ olength c
Just c -> do
return . Just . (c, ) $ if
| fromIntegral (olength c) >= min cLength' dbChunksize
-> Just (start + dbChunksize, cLength' - fromIntegral (olength c))

View File

@ -212,6 +212,8 @@ data AppSettings = AppSettings
, appStudyFeaturesRecacheRelevanceInterval :: NominalDiffTime
, appMemcacheAuth :: Bool
, appFileSourceARCConf :: Maybe (ARCConf Int)
} deriving Show
data JobMode = JobsLocal { jobsAcceptOffload :: Bool }
@ -335,6 +337,11 @@ data VerpMode = VerpNone
| Verp { verpPrefix :: Text, verpSeparator :: Char }
deriving (Eq, Show, Read, Generic)
data ARCConf w = ARCConf
{ arccMaximumGhost :: Int
, arccMaximumWeight :: w
} deriving (Eq, Ord, Read, Show, Generic, Typeable)
nullaryPathPiece ''ApprootScope $ camelToPathPiece' 1
pathPieceJSON ''ApprootScope
pathPieceJSONKey ''ApprootScope
@ -361,6 +368,10 @@ deriveJSON defaultOptions
, constructorTagModifier = camelToPathPiece' 1
} ''JobMode
deriveJSON defaultOptions
{ fieldLabelModifier = camelToPathPiece' 1
} ''ARCConf
instance FromJSON LdapConf where
parseJSON = withObject "LdapConf" $ \o -> do
ldapTls <- o .:? "tls"
@ -620,6 +631,8 @@ instance FromJSON AppSettings where
appStudyFeaturesRecacheRelevanceWithin <- o .:? "study-features-recache-relevance-within"
appStudyFeaturesRecacheRelevanceInterval <- o .: "study-features-recache-relevance-interval"
appFileSourceARCConf <- assertM ((||) <$> ((> 0) . arccMaximumGhost) <*> ((> 0) . arccMaximumWeight)) <$> o .:? "file-source-arc"
return AppSettings{..}
makeClassy_ ''AppSettings

View File

@ -38,6 +38,7 @@ import Utils.I18n as Utils
import Utils.NTop as Utils
import Utils.HttpConditional as Utils
import Utils.Persist as Utils
import Utils.ARC as Utils
import Text.Blaze (Markup, ToMarkup(..))

263
src/Utils/ARC.hs Normal file
View File

@ -0,0 +1,263 @@
module Utils.ARC
( ARCTick
, ARC, initARC
, arcAlterF, lookupARC, insertARC
, ARCHandle, initARCHandle, cachedARC, cachedARC'
, readARCHandle
, arcRecentSize, arcFrequentSize, arcGhostRecentSize, arcGhostFrequentSize
, getARCRecentWeight, getARCFrequentWeight
, describeARC
) where
import ClassyPrelude
import Data.OrdPSQ (OrdPSQ)
import qualified Data.OrdPSQ as OrdPSQ
import Control.Lens
-- https://web.archive.org/web/20210115184012/https://dbs.uni-leipzig.de/file/ARC.pdf
newtype ARCTick = ARCTick { _getARCTick :: Word64 }
deriving (Eq, Ord, Show, Typeable)
deriving newtype (NFData)
makeLenses ''ARCTick
data ARC k w v = ARC
{ arcRecent, arcFrequent :: !(OrdPSQ k ARCTick (v, w))
, arcGhostRecent, arcGhostFrequent :: !(OrdPSQ k ARCTick ())
, arcRecentWeight, arcFrequentWeight :: !w
, arcTargetRecent, arcMaximumWeight :: !w
, arcMaximumGhost :: !Int
}
instance (NFData k, NFData w, NFData v) => NFData (ARC k w v) where
rnf ARC{..} = rnf arcRecent
`seq` rnf arcFrequent
`seq` rnf arcGhostRecent
`seq` rnf arcGhostFrequent
`seq` rnf arcRecentWeight
`seq` rnf arcFrequentWeight
`seq` rnf arcTargetRecent
`seq` rnf arcMaximumWeight
`seq` rnf arcMaximumGhost
describeARC :: Show w
=> ARC k w v
-> String
describeARC ARC{..} = intercalate ", "
[ "arcRecent: " <> show (OrdPSQ.size arcRecent)
, "arcFrequent: " <> show (OrdPSQ.size arcFrequent)
, "arcGhostRecent: " <> show (OrdPSQ.size arcGhostRecent)
, "arcGhostFrequent: " <> show (OrdPSQ.size arcGhostFrequent)
, "arcRecentWeight: " <> show arcRecentWeight
, "arcFrequentWeight: " <> show arcFrequentWeight
, "arcTargetRecent: " <> show arcTargetRecent
, "arcMaximumWeight: " <> show arcMaximumWeight
, "arcMaximumGhost: " <> show arcMaximumGhost
]
arcRecentSize, arcFrequentSize, arcGhostRecentSize, arcGhostFrequentSize :: ARC k w v -> Int
arcRecentSize = OrdPSQ.size . arcRecent
arcFrequentSize = OrdPSQ.size . arcFrequent
arcGhostRecentSize = OrdPSQ.size . arcGhostRecent
arcGhostFrequentSize = OrdPSQ.size . arcGhostFrequent
getARCRecentWeight, getARCFrequentWeight :: ARC k w v -> w
getARCRecentWeight = arcRecentWeight
getARCFrequentWeight = arcFrequentWeight
initialARCTick :: ARCTick
initialARCTick = ARCTick 0
initARC :: forall k w v.
Integral w
=> Int -- ^ @arcMaximumGhost@
-> w -- ^ @arcMaximumWeight@
-> (ARC k w v, ARCTick)
initARC arcMaximumGhost arcMaximumWeight
| arcMaximumWeight < 0 = error "initARC given negative maximum weight"
| arcMaximumGhost < 0 = error "initARC given negative maximum ghost size"
| otherwise = (, initialARCTick) ARC
{ arcRecent = OrdPSQ.empty
, arcFrequent = OrdPSQ.empty
, arcGhostRecent = OrdPSQ.empty
, arcGhostFrequent = OrdPSQ.empty
, arcRecentWeight = 0
, arcFrequentWeight = 0
, arcMaximumWeight
, arcTargetRecent = 0
, arcMaximumGhost
}
infixl 6 |-
(|-) :: (Num a, Ord a) => a -> a -> a
(|-) m s
| s >= m = 0
| otherwise = m - s
arcAlterF :: forall f k w v.
( Ord k
, Functor f
, Integral w
)
=> k
-> (Maybe (v, w) -> f (Maybe (v, w)))
-> ARC k w v
-> ARCTick -> f (ARC k w v, ARCTick)
-- | Unchecked precondition: item weights are always less than `arcMaximumWeight`
arcAlterF k f oldARC@ARC{..} now
| later <= initialARCTick = uncurry (arcAlterF k f) $ initARC arcMaximumGhost arcMaximumWeight
| otherwise = (, later) <$> if
| Just (_p, x@(_, w), arcFrequent') <- OrdPSQ.deleteView k arcFrequent
-> f (Just x) <&> \(fromMaybe x -> x'@(_, w'))
-> let (arcFrequent'', arcFrequentWeight'', arcGhostFrequent') = evictToSize (arcMaximumWeight |- arcTargetRecent |- w') arcFrequent' (arcFrequentWeight - w) arcGhostFrequent
in oldARC
{ arcFrequent = OrdPSQ.insert k now x' arcFrequent''
, arcFrequentWeight = arcFrequentWeight'' + w'
, arcGhostFrequent = arcGhostFrequent'
}
| Just (_p, x@(_, w), arcRecent') <- OrdPSQ.deleteView k arcRecent
-> f (Just x) <&> \(fromMaybe x -> x'@(_, w'))
-> let (arcFrequent', arcFrequentWeight', arcGhostFrequent') = evictToSize (arcMaximumWeight |- arcTargetRecent |- w') arcFrequent arcFrequentWeight arcGhostFrequent
in oldARC
{ arcRecent = arcRecent'
, arcRecentWeight = arcRecentWeight - w
, arcFrequent = OrdPSQ.insert k now x' arcFrequent'
, arcFrequentWeight = arcFrequentWeight' + w'
, arcGhostFrequent = arcGhostFrequent'
}
| Just (_p, (), arcGhostRecent') <- OrdPSQ.deleteView k arcGhostRecent
-> f Nothing <&> \case
Nothing -> oldARC
{ arcGhostRecent = OrdPSQ.insert k now () arcGhostRecent'
}
Just x@(_, w)
-> let arcTargetRecent' = min arcMaximumWeight $ arcTargetRecent + max avgWeight (round $ toRational (OrdPSQ.size arcGhostFrequent) / toRational (OrdPSQ.size arcGhostRecent) * toRational avgWeight)
(arcFrequent', arcFrequentWeight', arcGhostFrequent') = evictToSize (arcMaximumWeight |- arcTargetRecent' |- w) arcFrequent arcFrequentWeight arcGhostFrequent
(arcRecent', arcRecentWeight', arcGhostRecent'') = evictToSize (max arcTargetRecent' $ arcMaximumWeight |- arcFrequentWeight' |- w) arcRecent arcRecentWeight arcGhostRecent'
in oldARC
{ arcRecent = arcRecent'
, arcFrequent = OrdPSQ.insert k now x arcFrequent'
, arcGhostRecent = arcGhostRecent''
, arcGhostFrequent = arcGhostFrequent'
, arcRecentWeight = arcRecentWeight'
, arcFrequentWeight = arcFrequentWeight' + w
, arcTargetRecent = arcTargetRecent'
}
| Just (_p, (), arcGhostFrequent') <- OrdPSQ.deleteView k arcGhostFrequent
-> f Nothing <&> \case
Nothing -> oldARC
{ arcGhostFrequent = OrdPSQ.insert k now () arcGhostFrequent'
}
Just x@(_, w)
-> let arcTargetRecent' = arcTargetRecent |- max avgWeight (round $ toRational (OrdPSQ.size arcGhostRecent) / toRational (OrdPSQ.size arcGhostFrequent) * toRational avgWeight)
(arcFrequent', arcFrequentWeight', arcGhostFrequent'') = evictToSize (arcMaximumWeight |- arcTargetRecent' |- w) arcFrequent arcFrequentWeight arcGhostFrequent'
(arcRecent', arcRecentWeight', arcGhostRecent') = evictToSize (max arcTargetRecent' $ arcMaximumWeight |- arcFrequentWeight' |- w) arcRecent arcRecentWeight arcGhostRecent
in oldARC
{ arcRecent = arcRecent'
, arcFrequent = OrdPSQ.insert k now x arcFrequent'
, arcGhostRecent = arcGhostRecent'
, arcGhostFrequent = arcGhostFrequent''
, arcRecentWeight = arcRecentWeight'
, arcFrequentWeight = arcFrequentWeight' + w
, arcTargetRecent = arcTargetRecent'
}
| otherwise -> f Nothing <&> \case
Nothing -> oldARC
{ arcGhostRecent = OrdPSQ.insert k now () $ evictGhostToCount arcMaximumGhost arcGhostRecent
}
Just x@(_, w)
-> let (arcRecent', arcRecentWeight', arcGhostRecent') = evictToSize (max arcTargetRecent (arcMaximumWeight |- arcFrequentWeight) |- w) arcRecent arcRecentWeight arcGhostRecent'
in oldARC
{ arcRecent = OrdPSQ.insert k now x arcRecent'
, arcRecentWeight = arcRecentWeight' + w
, arcGhostRecent = arcGhostRecent'
}
where
avgWeight = round $ toRational (arcRecentWeight + arcFrequentWeight) / toRational (OrdPSQ.size arcFrequent + OrdPSQ.size arcRecent)
later :: ARCTick
later = over getARCTick succ now
evictToSize :: w -> OrdPSQ k ARCTick (v, w) -> w -> OrdPSQ k ARCTick () -> (OrdPSQ k ARCTick (v, w), w, OrdPSQ k ARCTick ())
evictToSize tSize c cSize ghostC
| cSize <= tSize = (c, cSize, ghostC)
| OrdPSQ.size ghostC >= arcMaximumGhost = evictToSize tSize c cSize $ OrdPSQ.deleteMin ghostC
| Just (k', p', (_, w'), c') <- OrdPSQ.minView c = evictToSize tSize c' (cSize - w') $ OrdPSQ.insert k' p' () ghostC
| otherwise = error "evictToSize: cannot reach required size through eviction"
evictGhostToCount :: Int -> OrdPSQ k ARCTick () -> OrdPSQ k ARCTick ()
evictGhostToCount tCount c
| OrdPSQ.size c <= tCount = c
| Just (_, _, _, c') <- OrdPSQ.minView c = evictGhostToCount tCount c'
| otherwise = error "evictGhostToCount: cannot reach required count through eviction"
lookupARC :: forall k w v.
( Ord k
, Integral w
)
=> k
-> (ARC k w v, ARCTick)
-> Maybe (v, w)
lookupARC k = getConst . uncurry (arcAlterF k Const)
insertARC :: forall k w v.
( Ord k
, Integral w
)
=> k
-> Maybe (v, w)
-> ARC k w v
-> ARCTick -> (ARC k w v, ARCTick)
insertARC k newVal = (runIdentity .) . arcAlterF k (const $ pure newVal)
newtype ARCHandle k w v = ARCHandle { _getARCHandle :: TVar (ARC k w v, ARCTick) }
deriving (Eq, Typeable)
initARCHandle :: forall k w v m.
( MonadIO m
, Integral w
)
=> Int -- ^ @arcMaximumGhost@
-> w -- ^ @arcMaximumWeight@
-> m (ARCHandle k w v)
initARCHandle maxGhost maxWeight = fmap ARCHandle . liftIO . newTVarIO $ initARC maxGhost maxWeight
cachedARC' :: forall k w v m.
( MonadIO m
, Ord k
, Integral w
, NFData k, NFData w, NFData v
)
=> ARCHandle k w v
-> k
-> (Maybe (v, w) -> m (Maybe (v, w)))
-> m (Maybe v)
cachedARC' (ARCHandle arcVar) k f = do
oldVal <- liftIO $ lookupARC k <$> readTVarIO arcVar
newVal <- f oldVal
atomically . modifyTVar' arcVar $ \(arc, tick) -> force $ insertARC k newVal arc tick
return $ view _1 <$> newVal
cachedARC :: forall k w v m.
( MonadIO m
, Ord k
, Integral w
, NFData k, NFData w, NFData v
)
=> ARCHandle k w v
-> k
-> (Maybe (v, w) -> m (v, w))
-> m v
cachedARC h k f = fromMaybe (error "cachedARC: cachedARC' returned Nothing") <$> cachedARC' h k (fmap Just . f)
readARCHandle :: MonadIO m
=> ARCHandle k w v
-> m (ARC k w v, ARCTick)
readARCHandle (ARCHandle arcVar) = readTVarIO arcVar

View File

@ -15,6 +15,8 @@ module Utils.Metrics
, observeDeletedUnreferencedFiles, observeDeletedUnreferencedChunks, observeInjectedFiles, observeRechunkedFiles
, registerJobWorkerQueueDepth
, observeMissingFiles
, ARCMetrics, ARCLabel(..)
, arcMetrics, observeARCOutcome
) where
import Import.NoModel hiding (Vector, Info)
@ -22,6 +24,8 @@ import Model
import Prometheus
import Prometheus.Metric.GHC
import Settings (ARCConf(..))
import qualified Data.List as List
import System.Clock
@ -228,6 +232,65 @@ missingFiles = unsafeRegister . vector "ref" $ gauge info
where info = Info "uni2work_missing_files_count"
"Number of files referenced from within database that are missing"
data ARCMetrics = ARCMetrics
{ arcmMiss, arcmHit :: Histogram
}
data ARCLabel = ARCFileSource
deriving (Eq, Ord, Read, Show, Enum, Bounded, Generic, Typeable)
deriving anyclass (Universe, Finite)
nullaryPathPiece ''ARCLabel $ camelToPathPiece' 1
arcMetrics :: Integral w
=> ARCLabel
-> ARCConf w
-> ARCHandle k w v
-> Metric ARCMetrics
arcMetrics lbl ARCConf{..} ah = Metric $ do
(arcmMiss, collectMiss) <- constructMissHit
(arcmHit, collectHit) <- constructMissHit
let
relabel l s (SampleGroup i t ss) = SampleGroup i t . flip map ss $ \(Sample k lbls v) -> Sample k ((l, s) : filter (views _1 $ (/=) l) lbls) v
labelOutcome = relabel "outcome"
labelArc = relabel "arc"
collectARCMetrics = map (labelArc $ toPathPiece lbl) <$> do
misses <- concatMap ((\(SampleGroup _ _ misses) -> misses) . labelOutcome "miss") <$> collectMiss
hits <- concatMap ((\(SampleGroup _ _ hits) -> hits) . labelOutcome "hit") <$> collectHit
(arc, _) <- readARCHandle ah
return
[ SampleGroup missHitInfo HistogramType $ hits ++ misses
, SampleGroup sizeInfo GaugeType
[ Sample "arc_size" [("lru", "ghost-recent")] . encodeUtf8 . tshow $ arcGhostRecentSize arc
, Sample "arc_size" [("lru", "recent")] . encodeUtf8 . tshow $ arcRecentSize arc
, Sample "arc_size" [("lru", "frequent")] . encodeUtf8 . tshow $ arcFrequentSize arc
, Sample "arc_size" [("lru", "ghost-frequent")] . encodeUtf8 . tshow $ arcGhostFrequentSize arc
]
, SampleGroup weightInfo GaugeType
[ Sample "arc_weight" [("lru", "recent")] . encodeUtf8 . tshow . toInteger $ getARCRecentWeight arc
, Sample "arc_weight" [("lru", "frequent")] . encodeUtf8 . tshow . toInteger $ getARCFrequentWeight arc
]
]
return (ARCMetrics{..}, collectARCMetrics)
where
Metric constructMissHit = histogram missHitInfo . histogramBuckets 1000 $ fromIntegral arccMaximumWeight
missHitInfo = Info "arc_query_outcome_count"
"Number of hits/misses in the ARC"
sizeInfo = Info "arc_size"
"Number of entries in the ARC LRUs"
weightInfo = Info "arc_weight"
"Sum of weights of entries in the ARC LRUs"
observeARCOutcome :: ( MonadMonitor m
, Integral w
)
=> ARCMetrics
-> Bool -- ^ Hit?
-> w -- ^ Weight
-> m ()
observeARCOutcome ARCMetrics{..} isHit = observe (bool arcmMiss arcmHit isHit) . fromIntegral
withHealthReportMetrics :: MonadIO m => m HealthReport -> m HealthReport
withHealthReportMetrics act = do
@ -336,7 +399,7 @@ observeLoginOutcome plugin outcome
registerJobHeldLocksCount :: MonadIO m => TVar (Set QueuedJobId) -> m ()
registerJobHeldLocksCount = liftIO . void . register . jobHeldLocksCount
data FileChunkStorage = StorageMinio | StorageDB
data FileChunkStorage = StorageMinio | StorageDB | StorageARC
deriving (Eq, Ord, Read, Show, Enum, Bounded, Generic, Typeable)
deriving anyclass (Universe, Finite)
nullaryPathPiece ''FileChunkStorage $ camelToPathPiece' 1