diff --git a/app/stackage-server-cron.hs b/app/stackage-server-cron.hs index b0fc6bd..a722457 100644 --- a/app/stackage-server-cron.hs +++ b/app/stackage-server-cron.hs @@ -58,7 +58,7 @@ optsParser = T.unpack defHaddockBucketName)) <*> switch (long "do-not-upload" <> - help "Stop from hoogle db and snapshots.json from being generated and uploaded") <*> + help "Disable upload of Hoogle database and snapshots.json") <*> option readLogLevel (long "log-level" <> metavar "LOG_LEVEL" <> short 'l' <> value LevelInfo <> diff --git a/src/Stackage/Database/Cron.hs b/src/Stackage/Database/Cron.hs index 233575e..840aab1 100644 --- a/src/Stackage/Database/Cron.hs +++ b/src/Stackage/Database/Cron.hs @@ -107,9 +107,12 @@ withResponseUnliftIO req man f = withRunInIO $ \ runInIO -> withResponse req man -- | Returns an action that, under the SingleRun wrapper that ensures only one -- thing at a time is writing the file in question, ensure that a Hoogle -- database exists on the filesystem for the given SnapName. But only going so --- far as downloading it from the haddock bucket. See 'createHoogleDB' for the +-- far as downloading it from the haddock bucket. See 'buildAndUploadHoogleDBs' for the -- function that puts it there in the first place. If no db exists in the -- bucket, the action will return 'Nothing'. +-- +-- The location searched is $PWD/hoogle//.hoo +-- E.g. in production, ~stackage-update/hoogle/lts-22.20/5.0.18.4.hoo (for stackage-update). newHoogleLocker :: (HasLogFunc env, MonadIO m) => env -> Manager -> Text -> m (SingleRun SnapName (Maybe FilePath)) newHoogleLocker env man bucketUrl = mkSingleRun hoogleLocker @@ -238,7 +241,7 @@ runStackageUpdate doNotUpload = do -- for the -1'th package. runConduit $ sourceSnapshots .| foldMC (createOrUpdateSnapshot corePackageGetters) (pure ()) unless doNotUpload uploadSnapshotsJSON - buildAndUploadHoogleDB doNotUpload + buildAndUploadHoogleDBs doNotUpload logInfo "Finished building and uploading Hoogle DBs" @@ -762,6 +765,9 @@ uploadHoogleDB fp key = body <- toBody <$> readFileBinary fpgz uploadBucket <- scUploadBucketName <$> ask uploadFromRIO key $ + -- FIXME: I should also set content encoding explicitly here. But + -- then I would break stackage-server, which applies an 'ungzip' in + -- 'newHoogleLocker'. :( set putObject_acl (Just ObjectCannedACL_Public_read) $ newPutObject (BucketName uploadBucket) key body @@ -775,15 +781,28 @@ uploadFromRIO key po = do logError $ "Couldn't upload " <> displayShow key <> " to S3 becuase " <> displayShow e Right _ -> logInfo $ "Successfully uploaded " <> displayShow key <> " to S3" -buildAndUploadHoogleDB :: Bool -> RIO StackageCron () -buildAndUploadHoogleDB doNotUpload = do +-- | As the name says, build and upload Hoogle DBs. +-- +-- Which DBs? The last 5 LTS and the last 5 Nightlies that are missing their +-- Hoogle DBs. +-- +-- How? It downloads the Hoogle inputs that were previously generated alongside +-- the Haddocks, runs @hoogle@ on them, and uploads the result back to the same +-- bucket. Those inputs were generated by snapshot curation. +-- +-- Why? I feel like this should be a short Bash script using curl and hoogle, and +-- maybe one day it will be. +-- +-- This action is only run by stackage-server-cron. +buildAndUploadHoogleDBs :: Bool -> RIO StackageCron () +buildAndUploadHoogleDBs doNotUpload = do snapshots <- lastLtsNightlyWithoutHoogleDb 5 5 - -- currentHoogleVersionId <- scHoogleVersionId <$> ask env <- ask awsEnv <- asks scEnvAWS bucketUrl <- asks scDownloadBucketUrl -- locker is an action that returns the path to a hoogle db, if one exists - -- in the haddock bucket already. + -- in the haddock bucket already. It takes the SnapName as an argument. + -- I think it might be overkill. locker <- newHoogleLocker (env ^. logFuncL) (awsEnv ^. env_manager) bucketUrl let -- These bindings undo a questionable conflation of operations insertH = checkInsertSnapshotHoogleDb True @@ -796,12 +815,18 @@ buildAndUploadHoogleDB doNotUpload = do -- Perhaps the check can be removed. unlessM (checkH snapshotId) $ do logInfo $ "Starting Hoogle database download: " <> display (hoogleKey snapName) + -- Check if the database already exists (by downloading it). + -- FIXME: Why not just send a HEAD? + -- Perhaps the idea was to put the hoogle database somewhere the + -- main Stackage server process can find it? But nowadays + -- stackage-server downloads its own version separately. mfp <- singleRun locker snapName case mfp of Just _ -> do - -- Something bad must have happened: we created the Hoogle db - -- previously, but didn't get to record it in our database. - logInfo $ "Current hoogle database exists for: " <> display snapName + -- Something bad must have happened: we created the hoogle db + -- previously, but didn't get to record it as available. + logWarn $ "Unregistered hoogle database found for: " <> display snapName + <> ". Registering now." void $ insertH snapshotId Nothing -> do logInfo $ "Current hoogle database does not yet exist in the bucket for: " <> display snapName @@ -820,8 +845,9 @@ buildAndUploadHoogleDB doNotUpload = do -- Haddocks are downloaded from the documentation bucket, where they were -- uploaded as a tar file. -- --- Returns the path to the .hoo database. -createHoogleDB :: SnapshotId -> SnapName -> RIO StackageCron (Maybe FilePath) +-- Returns the path to the .hoo database, which will be found in the first +-- argument. It will look like @/hoogle-gen/output.hoo@. +createHoogleDB :: SnapshotId -> SnapName -> RIO StackageCron FilePath createHoogleDB snapshotId snapName = do logInfo $ "Creating Hoogle DB for " <> display snapName downloadBucketUrl <- scDownloadBucketUrl <$> ask @@ -848,7 +874,9 @@ createHoogleDB snapshotId snapName = do sourceFile tarFP .| untar (restoreHoogleTxtFileWithCabal tmpdir snapshotId snapName) .| foldMapC Any + -- We just check if we have any Hoogle .txt file at all. unless hasRestored $ error "No Hoogle .txt files found" + -- Generate the hoogle database let args = ["generate", "--database=" ++ outname, "--local=" ++ tmpdir] logInfo $ mconcat @@ -856,6 +884,8 @@ createHoogleDB snapshotId snapName = do , foldMap fromString $ L.intersperse " " ("hoogle" : args) , ")" ] + -- 'Hoogle.hoogle' expects to run as an app, and crashes if something + -- goes wrong. That's good. liftIO $ Hoogle.hoogle args logInfo "Merge done" return $ Just outname diff --git a/src/Stackage/Database/Query.hs b/src/Stackage/Database/Query.hs index 5610112..9eebfa6 100644 --- a/src/Stackage/Database/Query.hs +++ b/src/Stackage/Database/Query.hs @@ -1224,9 +1224,10 @@ checkInsertSnapshotHoogleDb shouldInsert snapshotId = do lift $ logInfo $ "Marking hoogle database for version " <> display hver <> " as available." - -- whether or not the version exists, we still put it into snapshot_hoogle_db - -- So literally the only use of the above query is to log the - -- action we're taking. + -- whether or not the version exists, we still put it into + -- snapshot_hoogle_db. So literally the only use of the above + -- query is to log the action we're taking. Whether or not it + -- exists is immaterial to the following action. isJust <$> P.insertUniqueEntity sh -- if we're not inserting, we're just checking if it already exists -- in snapshot_hoogle_db.