From fe25b2fa2fde8003c769ed94e22b30a67c6ebd2b Mon Sep 17 00:00:00 2001 From: Alexey Kuleshevich Date: Fri, 14 Feb 2020 03:54:51 +0300 Subject: [PATCH] Record available hoogle db files per snapshot + hoogle version combination: * Make sure hoogle db is marked as available, when there is a copy on S3 * Create db even with `--do-no-upload` flag (useful for testing) * Make sure home page uses latest lts with hoogle db available --- src/Handler/Home.hs | 1 + src/Settings.hs | 2 +- src/Stackage/Database/Cron.hs | 92 +++++++++++++++++++-------------- src/Stackage/Database/Query.hs | 81 ++++++++++++++++++++++------- src/Stackage/Database/Schema.hs | 34 +++++++++--- src/Stackage/Database/Types.hs | 1 + templates/home.hamlet | 2 +- 7 files changed, 146 insertions(+), 67 deletions(-) diff --git a/src/Handler/Home.hs b/src/Handler/Home.hs index a02bb75..087cff2 100644 --- a/src/Handler/Home.hs +++ b/src/Handler/Home.hs @@ -36,6 +36,7 @@ getHomeR = track "Handler.Snapshots.getAllSnapshotsR" $ do getSnapshots Nothing snapshotsPerPage ((fromIntegral currentPage - 1) * snapshotsPerPage) let groups = groupUp now' snapshots + latestLtsNameWithHoogle <- getLatestLtsNameWithHoogle latestLtsByGhc <- getLatestLtsByGhc defaultLayout $ do setTitle "Stackage Server" diff --git a/src/Settings.hs b/src/Settings.hs index 08580dd..889d724 100644 --- a/src/Settings.hs +++ b/src/Settings.hs @@ -17,7 +17,7 @@ import Data.Yaml.Config import Language.Haskell.TH.Syntax (Exp, Name, Q) import Network.Wai.Handler.Warp (HostPreference) import Text.Hamlet -import Yesod.Default.Config2 (applyEnvValue, configSettingsYml) +import Yesod.Default.Config2 (configSettingsYml) import Yesod.Default.Util (WidgetFileSettings, wfsHamletSettings, widgetFileNoReload, widgetFileReload) diff --git a/src/Stackage/Database/Cron.hs b/src/Stackage/Database/Cron.hs index bbcf0fc..6cd2e70 100644 --- a/src/Stackage/Database/Cron.hs +++ b/src/Stackage/Database/Cron.hs @@ -38,11 +38,11 @@ import Network.HTTP.Simple (getResponseBody, httpJSONEither) import Network.HTTP.Types (status200, status404) import Pantry (CabalFileInfo(..), DidUpdateOccur(..), HpackExecutable(HpackBundled), PackageIdentifierRevision(..), - defaultHackageSecurityConfig, defaultCasaRepoPrefix, defaultCasaMaxPerRequest) -import Pantry.Internal.Stackage (HackageTarballResult(..), - PantryConfig(..), Storage(..), - forceUpdateHackageIndex, getHackageTarball, - packageTreeKey) + defaultCasaMaxPerRequest, defaultCasaRepoPrefix, + defaultHackageSecurityConfig) +import Pantry.Internal.Stackage (HackageTarballResult(..), PantryConfig(..), + Storage(..), forceUpdateHackageIndex, + getHackageTarball, packageTreeKey) import Path (parseAbsDir, toFilePath) import RIO import RIO.Directory @@ -171,7 +171,7 @@ stackageServerCron StackageCronOptions {..} = do gpdCache <- newIORef IntMap.empty defaultProcessContext <- mkDefaultProcessContext aws <- newEnv Discover - withLogFunc (setLogMinLevel scoLogLevel lo) $ \logFunc -> + withLogFunc (setLogMinLevel scoLogLevel lo) $ \logFunc -> do let pantryConfig = PantryConfig { pcHackageSecurity = defaultHackageSecurityConfig @@ -185,7 +185,9 @@ stackageServerCron StackageCronOptions {..} = do , pcCasaRepoPrefix = defaultCasaRepoPrefix , pcCasaMaxPerRequest = defaultCasaMaxPerRequest } - stackage = + currentHoogleVersionId <- + runRIO logFunc $ getCurrentHoogleVersionIdWithPantryConfig pantryConfig + let stackage = StackageCron { scPantryConfig = pantryConfig , scStackageRoot = stackageRootDir @@ -199,8 +201,9 @@ stackageServerCron StackageCronOptions {..} = do , scSnapshotsRepo = scoSnapshotsRepo , scReportProgress = scoReportProgress , scCacheCabalFiles = scoCacheCabalFiles + , scHoogleVersionId = currentHoogleVersionId } - in runRIO stackage (runStackageUpdate scoDoNotUpload) + runRIO stackage (runStackageUpdate scoDoNotUpload) runStackageUpdate :: Bool -> RIO StackageCron () @@ -210,7 +213,7 @@ runStackageUpdate doNotUpload = do runStackageMigrations didUpdate <- forceUpdateHackageIndex (Just "stackage-server cron job") case didUpdate of - UpdateOccurred -> logInfo "Updated hackage index" + UpdateOccurred -> logInfo "Updated hackage index" NoUpdateOccurred -> logInfo "No new packages in hackage index" logInfo "Getting deprecated info now" getHackageDeprecations >>= setDeprecations @@ -218,10 +221,9 @@ runStackageUpdate doNotUpload = do runResourceT $ join $ runConduit $ sourceSnapshots .| foldMC (createOrUpdateSnapshot corePackageGetters) (pure ()) + unless doNotUpload uploadSnapshotsJSON + buildAndUploadHoogleDB doNotUpload run $ mapM_ (`rawExecute` []) ["COMMIT", "VACUUM", "BEGIN"] - unless doNotUpload $ do - uploadSnapshotsJSON - buildAndUploadHoogleDB -- | This will look at 'global-hints.yaml' and will create core package getters that are reused @@ -677,7 +679,6 @@ uploadHoogleDB fp key = withTempFile (takeDirectory fp) (takeFileName fp <.> "gz") $ \fpgz h -> do runConduitRes $ sourceFile fp .| compress 9 (WindowBits 31) .| CB.sinkHandle h hClose h - -- FIXME body <- chunkedFile defaultChunkSize fpgz body <- toBody <$> readFileBinary fpgz uploadBucket <- scUploadBucketName <$> ask uploadFromRIO key $ @@ -694,26 +695,30 @@ uploadFromRIO key po = do logError $ "Couldn't upload " <> displayShow key <> " to S3 becuase " <> displayShow e Right _ -> logInfo $ "Successfully uploaded " <> displayShow key <> " to S3" -buildAndUploadHoogleDB :: RIO StackageCron () -buildAndUploadHoogleDB = do - snapshots <- lastLtsNightly 80 5 - let snapshots' = sortBy (\x y -> compare (snd (snd y)) (snd (snd x))) $ Map.toList snapshots +buildAndUploadHoogleDB :: Bool -> RIO StackageCron () +buildAndUploadHoogleDB doNotUpload = do + snapshots <- lastLtsNightlyWithoutHoogleDb 5 5 env <- ask locker <- newHoogleLocker (env ^. logFuncL) (env ^. envManager) - for_ snapshots' $ \(snapshotId, (snapName, _created)) -> do - logInfo $ "Starting Hoogle DB download: " <> display (hoogleKey snapName) - mfp <- singleRun locker snapName - case mfp of - Just _ -> logInfo $ "Hoogle database exists for: " <> display snapName - Nothing -> do - logInfo $ "Hoogle database does not exist for: " <> display snapName - mfp' <- createHoogleDB snapshotId snapName - forM_ mfp' $ \fp -> do - let key = hoogleKey snapName - uploadHoogleDB fp (ObjectKey key) - let dest = T.unpack key - createDirectoryIfMissing True $ takeDirectory dest - renamePath fp dest + for_ snapshots $ \(snapshotId, snapName) -> + unlessM (checkInsertSnapshotHoogleDb False snapshotId) $ do + logInfo $ "Starting Hoogle database download: " <> display (hoogleKey snapName) + mfp <- singleRun locker snapName + case mfp of + Just _ -> do + logInfo $ "Current hoogle database exists for: " <> display snapName + void $ checkInsertSnapshotHoogleDb True snapshotId + Nothing -> do + logInfo $ "Current hoogle database does not yet exist for: " <> display snapName + mfp' <- createHoogleDB snapshotId snapName + forM_ mfp' $ \fp -> do + let key = hoogleKey snapName + dest = T.unpack key + createDirectoryIfMissing True $ takeDirectory dest + renamePath fp dest + unless doNotUpload $ do + uploadHoogleDB dest (ObjectKey key) + void $ checkInsertSnapshotHoogleDb True snapshotId createHoogleDB :: SnapshotId -> SnapName -> RIO StackageCron (Maybe FilePath) createHoogleDB snapshotId snapName = @@ -726,9 +731,11 @@ createHoogleDB snapshotId snapName = tarKey = toPathPiece snapName <> "/hoogle/orig.tar" tarUrl = "https://s3.amazonaws.com/" <> downloadBucket <> "/" <> tarKey tarFP = root T.unpack tarKey - req <- parseRequest $ T.unpack tarUrl - man <- view envManager - unlessM (doesFileExist tarFP) $ + -- When tarball is downloaded it is saved with durability and atomicity, so if it + -- is present it is not in a corrupted state + unlessM (doesFileExist tarFP) $ do + req <- parseRequest $ T.unpack tarUrl + man <- view envManager withResponseUnliftIO req {decompress = const True} man $ \res -> do throwErrorStatusCodes req res createDirectoryIfMissing True $ takeDirectory tarFP @@ -740,8 +747,9 @@ createHoogleDB snapshotId snapName = withSystemTempDirectory ("hoogle-" ++ T.unpack (textDisplay snapName)) $ \tmpdir -> do Any hasRestored <- runConduitRes $ - sourceFile tarFP .| untar (restoreHoogleTxtFileWithCabal tmpdir snapshotId snapName) .| - foldC + sourceFile tarFP .| + untar (restoreHoogleTxtFileWithCabal tmpdir snapshotId snapName) .| + foldMapC Any unless hasRestored $ error "No Hoogle .txt files found" let args = ["generate", "--database=" ++ outname, "--local=" ++ tmpdir] logInfo $ @@ -758,12 +766,16 @@ createHoogleDB snapshotId snapName = logError ("Problem creating hoogle db for " <> display snapName <> ": " <> displayShow exc) $> Nothing + +-- | Grabs hoogle txt file from the tarball and a matching cabal file from pantry. Writes +-- them into supplied temp directory and yields the result of operation as a boolean for +-- every tar entry. restoreHoogleTxtFileWithCabal :: FilePath -> SnapshotId -> SnapName -> FileInfo - -> ConduitM ByteString Any (ResourceT (RIO StackageCron)) () + -> ConduitM ByteString Bool (ResourceT (RIO StackageCron)) () restoreHoogleTxtFileWithCabal tmpdir snapshotId snapName fileInfo = case fileType fileInfo of FTNormal -> do @@ -776,12 +788,12 @@ restoreHoogleTxtFileWithCabal tmpdir snapshotId snapName fileInfo = "Unexpected hoogle filename: " <> display txtFileName <> " in orig.tar for snapshot: " <> display snapName - yield $ Any False + yield False Just cabal -> do writeFileBinary (tmpdir T.unpack txtPackageName <.> "cabal") cabal sinkFile (tmpdir T.unpack txtFileName) - yield $ Any True - _ -> yield $ Any False + yield True + _ -> yield False pathToPackageModule :: Text -> Maybe (PackageIdentifierP, ModuleNameP) diff --git a/src/Stackage/Database/Query.hs b/src/Stackage/Database/Query.hs index 77c28fe..5fbccb9 100644 --- a/src/Stackage/Database/Query.hs +++ b/src/Stackage/Database/Query.hs @@ -16,9 +16,9 @@ module Stackage.Database.Query , snapshotBefore , lookupSnapshot , snapshotTitle - , lastXLts5Nightly , snapshotsJSON , getLatestLtsByGhc + , getLatestLtsNameWithHoogle , getSnapshotModules , getSnapshotPackageModules @@ -52,6 +52,7 @@ module Stackage.Database.Query , loadBlobById , getTreeForKey , treeCabal + , getVersionId -- ** Stackage server , CabalFileIds , addCabalFile @@ -64,8 +65,9 @@ module Stackage.Database.Query , markModuleHasDocs , insertDeps -- ** For Hoogle db creation - , lastLtsNightly + , lastLtsNightlyWithoutHoogleDb , getSnapshotPackageCabalBlob + , checkInsertSnapshotHoogleDb ) where import qualified Data.Aeson as A @@ -159,23 +161,29 @@ ltsBefore x y = do go (Entity _ lts) = (ltsSnap lts, SNLts (ltsMajor lts) (ltsMinor lts)) - -lastXLts5Nightly :: GetStackageDatabase env m => Int -> m [SnapName] -lastXLts5Nightly ltsCount = run $ do - ls <- P.selectList [] [P.Desc LtsMajor, P.Desc LtsMinor, P.LimitTo ltsCount] - ns <- P.selectList [] [P.Desc NightlyDay, P.LimitTo 5] - return $ map l ls <> map n ns - where - l (Entity _ x) = SNLts (ltsMajor x) (ltsMinor x) - n (Entity _ x) = SNNightly (nightlyDay x) - -lastLtsNightly :: GetStackageDatabase env m => Int -> Int -> m (Map SnapshotId (SnapName, Day)) -lastLtsNightly ltsCount nightlyCount = +lastLtsNightlyWithoutHoogleDb :: Int -> Int -> RIO StackageCron [(SnapshotId, SnapName)] +lastLtsNightlyWithoutHoogleDb ltsCount nightlyCount = do + currentHoogleVersionId <- scHoogleVersionId <$> ask + let getSnapshotsWithoutHoogeDb snapId snapCount = + map (unValue *** unValue) <$> + select + (from $ \(snap `InnerJoin` snapshot) -> do + on $ snap ^. snapId ==. snapshot ^. SnapshotId + where_ $ + notExists $ + from $ \snapshotHoogleDb -> + where_ $ + (snapshotHoogleDb ^. SnapshotHoogleDbSnapshot ==. snapshot ^. + SnapshotId) &&. + (snapshotHoogleDb ^. SnapshotHoogleDbVersion ==. + val currentHoogleVersionId) + orderBy [desc (snapshot ^. SnapshotCreated)] + limit $ fromIntegral snapCount + pure (snapshot ^. SnapshotId, snapshot ^. SnapshotName)) run $ do - ls <- P.selectList [] [P.Desc LtsMajor, P.Desc LtsMinor, P.LimitTo ltsCount] - ns <- P.selectList [] [P.Desc NightlyDay, P.LimitTo nightlyCount] - Map.map (snapshotName &&& snapshotCreated) <$> - P.getMany (map (ltsSnap . P.entityVal) ls <> map (nightlySnap . P.entityVal) ns) + lts <- getSnapshotsWithoutHoogeDb LtsSnap ltsCount + nightly <- getSnapshotsWithoutHoogeDb NightlySnap nightlyCount + pure $ lts ++ nightly snapshotsJSON :: GetStackageDatabase env m => m A.Value @@ -221,6 +229,20 @@ getLatestLtsByGhc = dedupe (x:xs) = x : dedupe (dropWhile (\y -> thd x == thd y) xs) thd (_, _, x, _) = x +getLatestLtsNameWithHoogle :: GetStackageDatabase env m => m Text +getLatestLtsNameWithHoogle = + run $ do + currentHoogleVersionId <- getCurrentHoogleVersionId + maybe "lts" (textDisplay . unValue) . listToMaybe <$> + select + (from $ \(lts `InnerJoin` snapshot `InnerJoin` snapshotHoogleDb) -> do + on $ snapshotHoogleDb ^. SnapshotHoogleDbSnapshot ==. snapshot ^. SnapshotId + on $ lts ^. LtsSnap ==. snapshot ^. SnapshotId + where_ $ + snapshotHoogleDb ^. SnapshotHoogleDbVersion ==. val currentHoogleVersionId + orderBy [desc (lts ^. LtsMajor), desc (lts ^. LtsMinor)] + limit 1 + return (snapshot ^. SnapshotName)) -- | Count snapshots that belong to a specific SnapshotBranch countSnapshots :: (GetStackageDatabase env m) => Maybe SnapshotBranch -> m Int @@ -1089,3 +1111,26 @@ markModuleHasDocs snapshotId pid mSnapshotPackageId modName = return $ Just snapshotPackageId Nothing -> return Nothing + +-- | We can either check or insert hoogle db for current hoogle version for current +-- snapshot. Returns True if current hoogle version was not in the database. +checkInsertSnapshotHoogleDb :: Bool -> SnapshotId -> RIO StackageCron Bool +checkInsertSnapshotHoogleDb shouldInsert snapshotId = do + hoogleVersionId <- scHoogleVersionId <$> ask + let sh = SnapshotHoogleDb snapshotId hoogleVersionId + run $ + if shouldInsert + then do + mhver <- + (fmap unValue . listToMaybe) <$> + select + (from + (\v -> do + where_ $ v ^. VersionId ==. val hoogleVersionId + pure (v ^. VersionVersion))) + forM_ mhver $ \hver -> + lift $ + logInfo $ + "Marking hoogle database for version " <> display hver <> " as available." + isJust <$> P.insertUniqueEntity sh + else isJust <$> P.checkUnique sh diff --git a/src/Stackage/Database/Schema.hs b/src/Stackage/Database/Schema.hs index bff3b22..9a35b79 100644 --- a/src/Stackage/Database/Schema.hs +++ b/src/Stackage/Database/Schema.hs @@ -1,3 +1,4 @@ +{-# LANGUAGE CPP #-} {-# LANGUAGE DerivingStrategies #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE FlexibleInstances #-} @@ -21,12 +22,15 @@ module Stackage.Database.Schema , GetStackageDatabase(..) , withStackageDatabase , runStackageMigrations + , getCurrentHoogleVersionId + , getCurrentHoogleVersionIdWithPantryConfig -- * Tables , Unique(..) , EntityField(..) -- ** Snapshot , Snapshot(..) , SnapshotId + , SnapshotHoogleDb(..) , Lts(..) , Nightly(..) -- ** Package @@ -48,12 +52,12 @@ import Data.Pool (destroyAllResources) import Database.Persist import Database.Persist.Postgresql import Database.Persist.TH -import Pantry (HasPantryConfig(..), Revision) +import Pantry (HasPantryConfig(..), Revision, parseVersionThrowing) import Pantry.Internal.Stackage as PS (BlobId, HackageCabalId, ModuleNameId, PackageNameId, Tree(..), TreeEntryId, TreeId, Unique(..), VersionId, unBlobKey) -import Pantry.Internal.Stackage (PantryConfig(..), Storage(..)) +import Pantry.Internal.Stackage (PantryConfig(..), Storage(..), getVersionId) import qualified Pantry.Internal.Stackage as Pantry (migrateAll) import RIO import RIO.Time @@ -82,6 +86,10 @@ Nightly snap SnapshotId day Day UniqueNightly day +SnapshotHoogleDb + snapshot SnapshotId + version VersionId + UniqueSnapshotHoogleVersion snapshot version SnapshotPackage snapshot SnapshotId packageName PackageNameId @@ -113,7 +121,7 @@ Deprecated UniqueDeprecated package |] -_hideUnusedWarnings :: (SchemaId, LtsId, NightlyId) -> () +_hideUnusedWarnings :: (SchemaId, LtsId, NightlyId, SnapshotHoogleDbId) -> () _hideUnusedWarnings _ = () @@ -146,12 +154,24 @@ class (MonadThrow m, MonadIO m) => GetStackageDatabase env m | m -> env where instance (HasLogFunc env, HasPantryConfig env) => GetStackageDatabase env (RIO env) where - getStackageDatabase = do - env <- view pantryConfigL - let Storage runStorage _ = pcStorage env - pure $ StackageDatabase runStorage + getStackageDatabase = view pantryConfigL >>= getStackageDatabaseFromPantry getLogFunc = view logFuncL +getStackageDatabaseFromPantry :: PantryConfig -> RIO env StackageDatabase +getStackageDatabaseFromPantry pc = do + let Storage runStorage _ = pcStorage pc + pure $ StackageDatabase runStorage + + +getCurrentHoogleVersionId :: HasLogFunc env => ReaderT SqlBackend (RIO env) VersionId +getCurrentHoogleVersionId = do + currentHoogleVersion <- parseVersionThrowing VERSION_hoogle + getVersionId currentHoogleVersion + +getCurrentHoogleVersionIdWithPantryConfig :: HasLogFunc env => PantryConfig -> RIO env VersionId +getCurrentHoogleVersionIdWithPantryConfig pantryConfig = do + stackageDb <- getStackageDatabaseFromPantry pantryConfig + runDatabase stackageDb getCurrentHoogleVersionId run :: GetStackageDatabase env m => SqlPersistT (RIO RIO.LogFunc) a -> m a diff --git a/src/Stackage/Database/Types.hs b/src/Stackage/Database/Types.hs index 8a47361..02dabdb 100644 --- a/src/Stackage/Database/Types.hs +++ b/src/Stackage/Database/Types.hs @@ -88,6 +88,7 @@ data StackageCron = StackageCron , scSnapshotsRepo :: !GithubRepo , scReportProgress :: !Bool , scCacheCabalFiles :: !Bool + , scHoogleVersionId :: !VersionId } instance HasEnv StackageCron where diff --git a/templates/home.hamlet b/templates/home.hamlet index 16e6aa6..4dbea85 100644 --- a/templates/home.hamlet +++ b/templates/home.hamlet @@ -3,7 +3,7 @@
-
+