Document more stackage-server-cron steps

This commit is contained in:
Bryan Richter 2025-02-07 12:53:18 +02:00
parent 608cf0f4f6
commit 672099d68e
No known key found for this signature in database
GPG Key ID: B202264020068BFB
3 changed files with 46 additions and 15 deletions

View File

@ -58,7 +58,7 @@ optsParser =
T.unpack defHaddockBucketName)) <*>
switch
(long "do-not-upload" <>
help "Stop from hoogle db and snapshots.json from being generated and uploaded") <*>
help "Disable upload of Hoogle database and snapshots.json") <*>
option
readLogLevel
(long "log-level" <> metavar "LOG_LEVEL" <> short 'l' <> value LevelInfo <>

View File

@ -107,9 +107,12 @@ withResponseUnliftIO req man f = withRunInIO $ \ runInIO -> withResponse req man
-- | Returns an action that, under the SingleRun wrapper that ensures only one
-- thing at a time is writing the file in question, ensure that a Hoogle
-- database exists on the filesystem for the given SnapName. But only going so
-- far as downloading it from the haddock bucket. See 'createHoogleDB' for the
-- far as downloading it from the haddock bucket. See 'buildAndUploadHoogleDBs' for the
-- function that puts it there in the first place. If no db exists in the
-- bucket, the action will return 'Nothing'.
--
-- The location searched is $PWD/hoogle/<snapshot>/<hoogle-version>.hoo
-- E.g. in production, ~stackage-update/hoogle/lts-22.20/5.0.18.4.hoo (for stackage-update).
newHoogleLocker ::
(HasLogFunc env, MonadIO m) => env -> Manager -> Text -> m (SingleRun SnapName (Maybe FilePath))
newHoogleLocker env man bucketUrl = mkSingleRun hoogleLocker
@ -238,7 +241,7 @@ runStackageUpdate doNotUpload = do
-- for the -1'th package.
runConduit $ sourceSnapshots .| foldMC (createOrUpdateSnapshot corePackageGetters) (pure ())
unless doNotUpload uploadSnapshotsJSON
buildAndUploadHoogleDB doNotUpload
buildAndUploadHoogleDBs doNotUpload
logInfo "Finished building and uploading Hoogle DBs"
@ -762,6 +765,9 @@ uploadHoogleDB fp key =
body <- toBody <$> readFileBinary fpgz
uploadBucket <- scUploadBucketName <$> ask
uploadFromRIO key $
-- FIXME: I should also set content encoding explicitly here. But
-- then I would break stackage-server, which applies an 'ungzip' in
-- 'newHoogleLocker'. :(
set putObject_acl (Just ObjectCannedACL_Public_read) $ newPutObject (BucketName uploadBucket) key body
@ -775,15 +781,28 @@ uploadFromRIO key po = do
logError $ "Couldn't upload " <> displayShow key <> " to S3 becuase " <> displayShow e
Right _ -> logInfo $ "Successfully uploaded " <> displayShow key <> " to S3"
buildAndUploadHoogleDB :: Bool -> RIO StackageCron ()
buildAndUploadHoogleDB doNotUpload = do
-- | As the name says, build and upload Hoogle DBs.
--
-- Which DBs? The last 5 LTS and the last 5 Nightlies that are missing their
-- Hoogle DBs.
--
-- How? It downloads the Hoogle inputs that were previously generated alongside
-- the Haddocks, runs @hoogle@ on them, and uploads the result back to the same
-- bucket. Those inputs were generated by snapshot curation.
--
-- Why? I feel like this should be a short Bash script using curl and hoogle, and
-- maybe one day it will be.
--
-- This action is only run by stackage-server-cron.
buildAndUploadHoogleDBs :: Bool -> RIO StackageCron ()
buildAndUploadHoogleDBs doNotUpload = do
snapshots <- lastLtsNightlyWithoutHoogleDb 5 5
-- currentHoogleVersionId <- scHoogleVersionId <$> ask
env <- ask
awsEnv <- asks scEnvAWS
bucketUrl <- asks scDownloadBucketUrl
-- locker is an action that returns the path to a hoogle db, if one exists
-- in the haddock bucket already.
-- in the haddock bucket already. It takes the SnapName as an argument.
-- I think it might be overkill.
locker <- newHoogleLocker (env ^. logFuncL) (awsEnv ^. env_manager) bucketUrl
let -- These bindings undo a questionable conflation of operations
insertH = checkInsertSnapshotHoogleDb True
@ -796,12 +815,18 @@ buildAndUploadHoogleDB doNotUpload = do
-- Perhaps the check can be removed.
unlessM (checkH snapshotId) $ do
logInfo $ "Starting Hoogle database download: " <> display (hoogleKey snapName)
-- Check if the database already exists (by downloading it).
-- FIXME: Why not just send a HEAD?
-- Perhaps the idea was to put the hoogle database somewhere the
-- main Stackage server process can find it? But nowadays
-- stackage-server downloads its own version separately.
mfp <- singleRun locker snapName
case mfp of
Just _ -> do
-- Something bad must have happened: we created the Hoogle db
-- previously, but didn't get to record it in our database.
logInfo $ "Current hoogle database exists for: " <> display snapName
-- Something bad must have happened: we created the hoogle db
-- previously, but didn't get to record it as available.
logWarn $ "Unregistered hoogle database found for: " <> display snapName
<> ". Registering now."
void $ insertH snapshotId
Nothing -> do
logInfo $ "Current hoogle database does not yet exist in the bucket for: " <> display snapName
@ -820,8 +845,9 @@ buildAndUploadHoogleDB doNotUpload = do
-- Haddocks are downloaded from the documentation bucket, where they were
-- uploaded as a tar file.
--
-- Returns the path to the .hoo database.
createHoogleDB :: SnapshotId -> SnapName -> RIO StackageCron (Maybe FilePath)
-- Returns the path to the .hoo database, which will be found in the first
-- argument. It will look like @<rootDir>/hoogle-gen/output.hoo@.
createHoogleDB :: SnapshotId -> SnapName -> RIO StackageCron FilePath
createHoogleDB snapshotId snapName = do
logInfo $ "Creating Hoogle DB for " <> display snapName
downloadBucketUrl <- scDownloadBucketUrl <$> ask
@ -848,7 +874,9 @@ createHoogleDB snapshotId snapName = do
sourceFile tarFP .|
untar (restoreHoogleTxtFileWithCabal tmpdir snapshotId snapName) .|
foldMapC Any
-- We just check if we have any Hoogle .txt file at all.
unless hasRestored $ error "No Hoogle .txt files found"
-- Generate the hoogle database
let args = ["generate", "--database=" ++ outname, "--local=" ++ tmpdir]
logInfo $
mconcat
@ -856,6 +884,8 @@ createHoogleDB snapshotId snapName = do
, foldMap fromString $ L.intersperse " " ("hoogle" : args)
, ")"
]
-- 'Hoogle.hoogle' expects to run as an app, and crashes if something
-- goes wrong. That's good.
liftIO $ Hoogle.hoogle args
logInfo "Merge done"
return $ Just outname

View File

@ -1224,9 +1224,10 @@ checkInsertSnapshotHoogleDb shouldInsert snapshotId = do
lift $
logInfo $
"Marking hoogle database for version " <> display hver <> " as available."
-- whether or not the version exists, we still put it into snapshot_hoogle_db
-- So literally the only use of the above query is to log the
-- action we're taking.
-- whether or not the version exists, we still put it into
-- snapshot_hoogle_db. So literally the only use of the above
-- query is to log the action we're taking. Whether or not it
-- exists is immaterial to the following action.
isJust <$> P.insertUniqueEntity sh
-- if we're not inserting, we're just checking if it already exists
-- in snapshot_hoogle_db.