Add a bunch of docs around Hoogle DBs

so I remember how it all works.
2026-04-12 15:59:04 +02:00 · 2024-02-16 13:12:48 +02:00 · 2024-02-16 13:12:48 +02:00 · eebde8b817
commit eebde8b817
parent 22ef976f05
2 changed files with 65 additions and 8 deletions
--- a/src/Stackage/Database/Cron.hs
+++ b/src/Stackage/Database/Cron.hs
@ -103,6 +103,11 @@ getStackageSnapshotsDir = do
 withResponseUnliftIO :: MonadUnliftIO m => Request -> Manager -> (Response BodyReader -> m b) -> m b
 withResponseUnliftIO req man f = withRunInIO $ \ runInIO -> withResponse req man (runInIO . f)
 -- | Under the SingleRun wrapper that ensures only one thing at a time is
 -- writing the file in question, ensure that a Hoogle database exists on the
 -- filesystem for the given SnapName. But only going so far as downloading it
 -- from the haddock bucket. See 'createHoogleDB' for the function that puts it
 -- there in the first place.
 newHoogleLocker ::
       (HasLogFunc env, MonadIO m) => env -> Manager -> Text -> m (SingleRun SnapName (Maybe FilePath))
 newHoogleLocker env man bucketUrl = mkSingleRun hoogleLocker
@ -704,20 +709,30 @@ uploadFromRIO key po = do
 buildAndUploadHoogleDB :: Bool -> RIO StackageCron ()
 buildAndUploadHoogleDB doNotUpload = do
    snapshots <- lastLtsNightlyWithoutHoogleDb 5 5
    -- currentHoogleVersionId <- scHoogleVersionId <$> ask
    env <- ask
    awsEnv <- asks scEnvAWS
    bucketUrl <- asks scDownloadBucketUrl
    -- locker is an action that returns the path to a hoogle db, if one exists
    -- in the haddock bucket already.
    locker <- newHoogleLocker (env ^. logFuncL) (awsEnv ^. env_manager) bucketUrl
    let insertH = checkInsertSnapshotHoogleDb True
        checkH = checkInsertSnapshotHoogleDb False
    for_ snapshots $ \(snapshotId, snapName) ->
-        unlessM (checkInsertSnapshotHoogleDb False snapshotId) $ do
+        -- Even though we just got a list of snapshots that don't have hoogle
        -- databases, we check again. For some reason. I don't see how this can
        -- actually be useful. both lastLtsNightlyWithoutHoogleDb and
        -- checkInsertSnapshotHoogleDb just check against SnapshotHoogleDb.
        -- Perhaps the check can be removed.
        unlessM (checkH snapshotId) $ do
            logInfo $ "Starting Hoogle database download: " <> display (hoogleKey snapName)
            mfp <- singleRun locker snapName
            case mfp of
                Just _ -> do
                    logInfo $ "Current hoogle database exists for: " <> display snapName
-                    void $ checkInsertSnapshotHoogleDb True snapshotId
+                    void $ insertH snapshotId
                Nothing -> do
-                    logInfo $ "Current hoogle database does not yet exist for: " <> display snapName
+                    logInfo $ "Current hoogle database does not yet exist in the bucket for: " <> display snapName
                    mfp' <- createHoogleDB snapshotId snapName
                    forM_ mfp' $ \fp -> do
                        let key = hoogleKey snapName
@ -726,8 +741,10 @@ buildAndUploadHoogleDB doNotUpload = do
                        renamePath fp dest
                        unless doNotUpload $ do
                            uploadHoogleDB dest (ObjectKey key)
-                            void $ checkInsertSnapshotHoogleDb True snapshotId
+                            void $ insertH snapshotId
 -- | Create a hoogle db from haddocks for the given snapshot, and upload it to
 -- the haddock bucket.
 createHoogleDB :: SnapshotId -> SnapName -> RIO StackageCron (Maybe FilePath)
 createHoogleDB snapshotId snapName =
    handleAny logException $ do
--- a/src/Stackage/Database/Query.hs
+++ b/src/Stackage/Database/Query.hs
@ -167,25 +167,48 @@ ltsBefore x y = do
    go (Entity _ lts) = (ltsSnap lts, SNLts (ltsMajor lts) (ltsMinor lts))
 -- | Queries the database for the latest LTS and nightly snapshots that do not
 -- have corresponding entries in the SnapshotHoogleDb table with the current
 -- Hoogle version.
 lastLtsNightlyWithoutHoogleDb :: Int -> Int -> RIO StackageCron [(SnapshotId, SnapName)]
 lastLtsNightlyWithoutHoogleDb ltsCount nightlyCount = do
    currentHoogleVersionId <- scHoogleVersionId <$> ask
    let getSnapshotsWithoutHoogeDb snapId snapCount =
            map (unValue *** unValue) <$>
            select
                -- "snap" is either Lts or Nightly, while "snapshot" is indeed
                -- "snapshot"
                (from $ \(snap `InnerJoin` snapshot) -> do
                     on $ snap ^. snapId ==. snapshot ^. SnapshotId
                     where_ $
                         notExists $
                         from $ \snapshotHoogleDb ->
                             where_ $
-                             (snapshotHoogleDb ^. SnapshotHoogleDbSnapshot ==. snapshot ^.
+                             (snapshotHoogleDb ^. SnapshotHoogleDbSnapshot
-                              SnapshotId) &&.
+                                ==. snapshot ^. SnapshotId)
-                             (snapshotHoogleDb ^. SnapshotHoogleDbVersion ==.
+                             &&. (snapshotHoogleDb ^. SnapshotHoogleDbVersion
-                              val currentHoogleVersionId)
+                                    ==. val currentHoogleVersionId)
                     orderBy [desc (snapshot ^. SnapshotCreated)]
                     limit $ fromIntegral snapCount
                     pure (snapshot ^. SnapshotId, snapshot ^. SnapshotName))
                -- In sql, this query would be
                --
                --     select snapshot.id, snapshot.name
                --     from snapshot
                --     join $foo as snap -- either Lts or Nightly
                --     on snap.snap = snapshot.id
                --     where not exists (
                --       select 1
                --       from snapshot_hoogle_db
                --       where snapshot_hoogle_db.snapshot = snapshot.id
                --       and snapshot_hoogle_db.version = $currentHoogleVersionId
                --     )
                --     order by snapshot.created desc
                --     limit $snapCount
                --
                -- So it returns a list of snapshots where there is no
                -- corresponding entry in the snapshot_hoogle_db table for the
                -- current hoogle version.
    run $ do
        lts <- getSnapshotsWithoutHoogeDb LtsSnap ltsCount
        nightly <- getSnapshotsWithoutHoogeDb NightlySnap nightlyCount
@ -1159,10 +1182,27 @@ checkInsertSnapshotHoogleDb shouldInsert snapshotId = do
                        (from
                             (\v -> do
                                  where_ $ v ^. VersionId ==. val hoogleVersionId
                                  -- This is reaching into the *pantry*
                                  -- database!
                                  pure (v ^. VersionVersion)))
                    -- in sql, this query would be
                    --
                    --     select version.version
                    --     from version
                    --     where version.id = $hoogleVersionId
                    --
                    -- So it returns the "version"s that corresponds to the
                    -- current hoogle version id.
                -- mhver is now Maybe Version, and corresponds to the current
                -- hoogle version, assuming it exists in the Version table
                forM_ mhver $ \hver ->
                    lift $
                    logInfo $
                    "Marking hoogle database for version " <> display hver <> " as available."
                -- whether or not the version exists, we still put it into snapshot_hoogle_db
                -- So literally the only use of the above query is to log the
                -- action we're taking.
                isJust <$> P.insertUniqueEntity sh
            -- if we're not inserting, we're just checking if it already exists
            -- in snapshot_hoogle_db.
            else isJust <$> P.checkUnique sh