From eebde8b817b6340174a43ba4864470377861b6ad Mon Sep 17 00:00:00 2001 From: Bryan Richter Date: Fri, 16 Feb 2024 13:12:48 +0200 Subject: [PATCH] Add a bunch of docs around Hoogle DBs so I remember how it all works. --- src/Stackage/Database/Cron.hs | 25 +++++++++++++++--- src/Stackage/Database/Query.hs | 48 +++++++++++++++++++++++++++++++--- 2 files changed, 65 insertions(+), 8 deletions(-) diff --git a/src/Stackage/Database/Cron.hs b/src/Stackage/Database/Cron.hs index 9fa8485..b53e0d5 100644 --- a/src/Stackage/Database/Cron.hs +++ b/src/Stackage/Database/Cron.hs @@ -103,6 +103,11 @@ getStackageSnapshotsDir = do withResponseUnliftIO :: MonadUnliftIO m => Request -> Manager -> (Response BodyReader -> m b) -> m b withResponseUnliftIO req man f = withRunInIO $ \ runInIO -> withResponse req man (runInIO . f) +-- | Under the SingleRun wrapper that ensures only one thing at a time is +-- writing the file in question, ensure that a Hoogle database exists on the +-- filesystem for the given SnapName. But only going so far as downloading it +-- from the haddock bucket. See 'createHoogleDB' for the function that puts it +-- there in the first place. newHoogleLocker :: (HasLogFunc env, MonadIO m) => env -> Manager -> Text -> m (SingleRun SnapName (Maybe FilePath)) newHoogleLocker env man bucketUrl = mkSingleRun hoogleLocker @@ -704,20 +709,30 @@ uploadFromRIO key po = do buildAndUploadHoogleDB :: Bool -> RIO StackageCron () buildAndUploadHoogleDB doNotUpload = do snapshots <- lastLtsNightlyWithoutHoogleDb 5 5 + -- currentHoogleVersionId <- scHoogleVersionId <$> ask env <- ask awsEnv <- asks scEnvAWS bucketUrl <- asks scDownloadBucketUrl + -- locker is an action that returns the path to a hoogle db, if one exists + -- in the haddock bucket already. locker <- newHoogleLocker (env ^. logFuncL) (awsEnv ^. env_manager) bucketUrl + let insertH = checkInsertSnapshotHoogleDb True + checkH = checkInsertSnapshotHoogleDb False for_ snapshots $ \(snapshotId, snapName) -> - unlessM (checkInsertSnapshotHoogleDb False snapshotId) $ do + -- Even though we just got a list of snapshots that don't have hoogle + -- databases, we check again. For some reason. I don't see how this can + -- actually be useful. both lastLtsNightlyWithoutHoogleDb and + -- checkInsertSnapshotHoogleDb just check against SnapshotHoogleDb. + -- Perhaps the check can be removed. + unlessM (checkH snapshotId) $ do logInfo $ "Starting Hoogle database download: " <> display (hoogleKey snapName) mfp <- singleRun locker snapName case mfp of Just _ -> do logInfo $ "Current hoogle database exists for: " <> display snapName - void $ checkInsertSnapshotHoogleDb True snapshotId + void $ insertH snapshotId Nothing -> do - logInfo $ "Current hoogle database does not yet exist for: " <> display snapName + logInfo $ "Current hoogle database does not yet exist in the bucket for: " <> display snapName mfp' <- createHoogleDB snapshotId snapName forM_ mfp' $ \fp -> do let key = hoogleKey snapName @@ -726,8 +741,10 @@ buildAndUploadHoogleDB doNotUpload = do renamePath fp dest unless doNotUpload $ do uploadHoogleDB dest (ObjectKey key) - void $ checkInsertSnapshotHoogleDb True snapshotId + void $ insertH snapshotId +-- | Create a hoogle db from haddocks for the given snapshot, and upload it to +-- the haddock bucket. createHoogleDB :: SnapshotId -> SnapName -> RIO StackageCron (Maybe FilePath) createHoogleDB snapshotId snapName = handleAny logException $ do diff --git a/src/Stackage/Database/Query.hs b/src/Stackage/Database/Query.hs index 4f94c6e..41041a2 100644 --- a/src/Stackage/Database/Query.hs +++ b/src/Stackage/Database/Query.hs @@ -167,25 +167,48 @@ ltsBefore x y = do go (Entity _ lts) = (ltsSnap lts, SNLts (ltsMajor lts) (ltsMinor lts)) +-- | Queries the database for the latest LTS and nightly snapshots that do not +-- have corresponding entries in the SnapshotHoogleDb table with the current +-- Hoogle version. lastLtsNightlyWithoutHoogleDb :: Int -> Int -> RIO StackageCron [(SnapshotId, SnapName)] lastLtsNightlyWithoutHoogleDb ltsCount nightlyCount = do currentHoogleVersionId <- scHoogleVersionId <$> ask let getSnapshotsWithoutHoogeDb snapId snapCount = map (unValue *** unValue) <$> select + -- "snap" is either Lts or Nightly, while "snapshot" is indeed + -- "snapshot" (from $ \(snap `InnerJoin` snapshot) -> do on $ snap ^. snapId ==. snapshot ^. SnapshotId where_ $ notExists $ from $ \snapshotHoogleDb -> where_ $ - (snapshotHoogleDb ^. SnapshotHoogleDbSnapshot ==. snapshot ^. - SnapshotId) &&. - (snapshotHoogleDb ^. SnapshotHoogleDbVersion ==. - val currentHoogleVersionId) + (snapshotHoogleDb ^. SnapshotHoogleDbSnapshot + ==. snapshot ^. SnapshotId) + &&. (snapshotHoogleDb ^. SnapshotHoogleDbVersion + ==. val currentHoogleVersionId) orderBy [desc (snapshot ^. SnapshotCreated)] limit $ fromIntegral snapCount pure (snapshot ^. SnapshotId, snapshot ^. SnapshotName)) + -- In sql, this query would be + -- + -- select snapshot.id, snapshot.name + -- from snapshot + -- join $foo as snap -- either Lts or Nightly + -- on snap.snap = snapshot.id + -- where not exists ( + -- select 1 + -- from snapshot_hoogle_db + -- where snapshot_hoogle_db.snapshot = snapshot.id + -- and snapshot_hoogle_db.version = $currentHoogleVersionId + -- ) + -- order by snapshot.created desc + -- limit $snapCount + -- + -- So it returns a list of snapshots where there is no + -- corresponding entry in the snapshot_hoogle_db table for the + -- current hoogle version. run $ do lts <- getSnapshotsWithoutHoogeDb LtsSnap ltsCount nightly <- getSnapshotsWithoutHoogeDb NightlySnap nightlyCount @@ -1159,10 +1182,27 @@ checkInsertSnapshotHoogleDb shouldInsert snapshotId = do (from (\v -> do where_ $ v ^. VersionId ==. val hoogleVersionId + -- This is reaching into the *pantry* + -- database! pure (v ^. VersionVersion))) + -- in sql, this query would be + -- + -- select version.version + -- from version + -- where version.id = $hoogleVersionId + -- + -- So it returns the "version"s that corresponds to the + -- current hoogle version id. + -- mhver is now Maybe Version, and corresponds to the current + -- hoogle version, assuming it exists in the Version table forM_ mhver $ \hver -> lift $ logInfo $ "Marking hoogle database for version " <> display hver <> " as available." + -- whether or not the version exists, we still put it into snapshot_hoogle_db + -- So literally the only use of the above query is to log the + -- action we're taking. isJust <$> P.insertUniqueEntity sh + -- if we're not inserting, we're just checking if it already exists + -- in snapshot_hoogle_db. else isJust <$> P.checkUnique sh