Made status reporting and cabal file caching optional for the cron job

This commit is contained in:
Alexey Kuleshevich 2019-05-25 20:00:28 +03:00
parent f5e147ab97
commit 385620e185
No known key found for this signature in database
GPG Key ID: E59B216127119E3E
3 changed files with 61 additions and 44 deletions

View File

@ -62,7 +62,13 @@ optsParser =
value (GithubRepo repoAccount repoName) <>
help
("Github repository with snapshot files. Default level is '" ++
repoAccount ++ "/" ++ repoName ++ "'."))
repoAccount ++ "/" ++ repoName ++ "'.")) <*>
switch (long "report-progress" <> help "Report how many packages has been loaded.") <*>
switch
(long "cache-cabal-files" <>
help
("Improve performance by cached parsed cabal files" ++
" at expense of higher memory consumption"))
where
repoAccount = "commercialhaskell"
repoName = "stackage-snapshots"

View File

@ -204,6 +204,8 @@ stackageServerCron StackageCronOptions {..} = do
, scDownloadBucketName = scoDownloadBucketName
, scUploadBucketName = scoUploadBucketName
, scSnapshotsRepo = scoSnapshotsRepo
, scReportProgress = scoReportProgress
, scCacheCabalFiles = scoCacheCabalFiles
}
in runRIO stackage (runStackageUpdate scoDoNotUpload)
@ -265,7 +267,8 @@ makeCorePackageGetter _compiler pname ver =
readIORef pkgInfoRef >>= \case
Just pkgInfo -> return pkgInfo
Nothing -> do
logSticky $ "Loading core package: " <> display pid
whenM (scReportProgress <$> ask) $
logSticky $ "Loading core package: " <> display pid
htr <- getHackageTarball pir Nothing
case htrFreshPackageInfo htr of
Just (gpd, treeId) -> do
@ -295,20 +298,24 @@ makeCorePackageGetter _compiler pname ver =
addPantryPackage ::
SnapshotId -> CompilerP -> Bool -> Map FlagNameP Bool -> PantryPackage -> RIO StackageCron Bool
addPantryPackage sid compiler isHidden flags (PantryPackage pc treeKey) = do
gpdCachedRef <- scCachedGPD <$> ask
env <- ask
let gpdCachedRef = scCachedGPD env
cache = scCacheCabalFiles env
let blobKeyToInt = fromIntegral . unSqlBackendKey . unBlobKey
let updateCacheGPD blobId gpd =
atomicModifyIORef' gpdCachedRef (\cacheMap -> (IntMap.insert blobId gpd cacheMap, gpd))
let getCachedGPD treeCabal =
\case
Just gpd -> updateCacheGPD (blobKeyToInt treeCabal) gpd
Nothing -> do
Just gpd | cache -> updateCacheGPD (blobKeyToInt treeCabal) gpd
Just gpd -> pure gpd
Nothing | cache -> do
cacheMap <- readIORef gpdCachedRef
case IntMap.lookup (blobKeyToInt treeCabal) cacheMap of
Just gpd -> pure gpd
Nothing ->
loadBlobById treeCabal >>=
updateCacheGPD (blobKeyToInt treeCabal) . parseCabalBlob
Nothing -> parseCabalBlob <$> loadBlobById treeCabal
let storeHackageSnapshotPackage hcid mtid mgpd =
getTreeForKey treeKey >>= \case
Just (Entity treeId _)
@ -521,7 +528,9 @@ createOrUpdateSnapshot corePackageInfoGetters prevAction sfi@SnapshotFileInfo {
sfiSnapName
sfiUpdatedOn
snapshotFile
unlessM (readIORef finishedDocs) $
report <- scReportProgress <$> ask
when report $
unlessM (readIORef finishedDocs) $
logSticky "Still loading the docs for previous snapshot ..."
pure loadDocs
@ -556,10 +565,29 @@ updateSnapshot corePackageGetters snapshotId snapName updatedOn SnapshotFile {..
pure curSucc
-- Leave some cores and db connections for the doc loader
n <- max 1 . (`div` 2) <$> getNumCapabilities
before <- getCurrentTime
report <- scReportProgress <$> ask
pantryUpdatesSucceeded <-
runConcurrently
(Concurrently (runProgressReporter loadedPackageCountRef totalPackages snapName) *>
(Concurrently
(when report (runProgressReporter loadedPackageCountRef totalPackages snapName)) *>
Concurrently (pooledMapConcurrentlyN n addPantryPackageWithReport sfPackages))
after <- getCurrentTime
let timeTotal = round (diffUTCTime after before)
(mins, secs) = timeTotal `quotRem` (60 :: Int)
packagePerSecond = fromIntegral ((totalPackages * 100) `div` timeTotal) / 100 :: Float
logInfo $
mconcat
[ "Loading snapshot '"
, display snapName
, "' was done (in "
, displayShow mins
, "min "
, displayShow secs
, "sec). With average "
, displayShow packagePerSecond
, " packages/sec. There are still docs."
]
return $ do
checkForDocsSucceeded <-
tryAny (checkForDocs snapshotId snapName) >>= \case
@ -574,44 +602,24 @@ updateSnapshot corePackageGetters snapshotId snapName updatedOn SnapshotFile {..
else logError $ "There were errors while adding snapshot '" <> display snapName <> "'"
-- | Report how many packages has been loaded so far and provide statistics at the end.
-- | Report how many packages has been loaded so far.
runProgressReporter :: IORef Int -> Int -> SnapName -> RIO StackageCron ()
runProgressReporter loadedPackageCountRef totalPackages snapName = do
before <- getCurrentTime
let reportProgress = do
loadedPackageCount <- readIORef loadedPackageCountRef
if loadedPackageCount < totalPackages
then do
logSticky $
mconcat
[ "Loading snapshot '"
, display snapName
, "' ("
, displayShow loadedPackageCount
, "/"
, displayShow totalPackages
, ")"
]
threadDelay 1000000
reportProgress
else do
after <- getCurrentTime
let timeTotal = round (diffUTCTime after before)
(mins, secs) = timeTotal `quotRem` (60 :: Int)
packagePerSecond =
fromIntegral ((loadedPackageCount * 100) `div` timeTotal) / 100 :: Float
logInfo $
mconcat
[ "Loading snapshot '"
, display snapName
, "' was done (in "
, displayShow mins
, "min "
, displayShow secs
, "sec). With average "
, displayShow packagePerSecond
, " packages/sec. There are still docs."
]
when (loadedPackageCount < totalPackages) $ do
logSticky $
mconcat
[ "Loading snapshot '"
, display snapName
, "' ("
, displayShow loadedPackageCount
, "/"
, displayShow totalPackages
, ")"
]
threadDelay 1000000
reportProgress
reportProgress
-- | Uploads a json file to S3 with all latest snapshots per major lts version and one nightly.

View File

@ -50,9 +50,8 @@ import Data.Aeson
import qualified Data.Text as T
import Data.Text.Read (decimal)
import Network.AWS (Env, HasEnv(..))
import Pantry as Pantry (BlobKey(..), CabalFileInfo(..), FileSize(..),
HasPantryConfig(..), PackageIdentifierRevision(..),
TreeKey(..))
import Pantry (BlobKey(..), CabalFileInfo(..), FileSize(..),
HasPantryConfig(..), PackageIdentifierRevision(..), TreeKey(..))
import Pantry.Internal.Stackage as Pantry (PackageNameP(..), PantryConfig,
VersionP(..))
import Pantry.SHA256 (fromHexText)
@ -74,6 +73,8 @@ data StackageCronOptions = StackageCronOptions
, scoDoNotUpload :: !Bool
, scoLogLevel :: !LogLevel
, scoSnapshotsRepo :: !GithubRepo
, scoReportProgress :: !Bool
, scoCacheCabalFiles :: !Bool
}
data StackageCron = StackageCron
@ -87,6 +88,8 @@ data StackageCron = StackageCron
, scDownloadBucketName :: !Text
, scUploadBucketName :: !Text
, scSnapshotsRepo :: !GithubRepo
, scReportProgress :: !Bool
, scCacheCabalFiles :: !Bool
}
instance HasEnv StackageCron where