mirror of
https://github.com/commercialhaskell/stackage-server.git
synced 2026-01-27 03:11:57 +01:00
Merge pull request #343 from commercialhaskell/b/disk-cleanup
Post Hoogle generation disk cleanup
This commit is contained in:
commit
a384248d68
@ -58,7 +58,7 @@ optsParser =
|
|||||||
T.unpack defHaddockBucketName)) <*>
|
T.unpack defHaddockBucketName)) <*>
|
||||||
switch
|
switch
|
||||||
(long "do-not-upload" <>
|
(long "do-not-upload" <>
|
||||||
help "Stop from hoogle db and snapshots.json from being generated and uploaded") <*>
|
help "Disable upload of Hoogle database and snapshots.json") <*>
|
||||||
option
|
option
|
||||||
readLogLevel
|
readLogLevel
|
||||||
(long "log-level" <> metavar "LOG_LEVEL" <> short 'l' <> value LevelInfo <>
|
(long "log-level" <> metavar "LOG_LEVEL" <> short 'l' <> value LevelInfo <>
|
||||||
|
|||||||
@ -104,11 +104,15 @@ getStackageSnapshotsDir = do
|
|||||||
withResponseUnliftIO :: MonadUnliftIO m => Request -> Manager -> (Response BodyReader -> m b) -> m b
|
withResponseUnliftIO :: MonadUnliftIO m => Request -> Manager -> (Response BodyReader -> m b) -> m b
|
||||||
withResponseUnliftIO req man f = withRunInIO $ \ runInIO -> withResponse req man (runInIO . f)
|
withResponseUnliftIO req man f = withRunInIO $ \ runInIO -> withResponse req man (runInIO . f)
|
||||||
|
|
||||||
-- | Under the SingleRun wrapper that ensures only one thing at a time is
|
-- | Returns an action that, under the SingleRun wrapper that ensures only one
|
||||||
-- writing the file in question, ensure that a Hoogle database exists on the
|
-- thing at a time is writing the file in question, ensure that a Hoogle
|
||||||
-- filesystem for the given SnapName. But only going so far as downloading it
|
-- database exists on the filesystem for the given SnapName. But only going so
|
||||||
-- from the haddock bucket. See 'createHoogleDB' for the function that puts it
|
-- far as downloading it from the haddock bucket. See 'buildAndUploadHoogleDBs' for the
|
||||||
-- there in the first place.
|
-- function that puts it there in the first place. If no db exists in the
|
||||||
|
-- bucket, the action will return 'Nothing'.
|
||||||
|
--
|
||||||
|
-- The location searched is $PWD/hoogle/<snapshot>/<hoogle-version>.hoo
|
||||||
|
-- E.g. in production, ~stackage-update/hoogle/lts-22.20/5.0.18.4.hoo (for stackage-update).
|
||||||
newHoogleLocker ::
|
newHoogleLocker ::
|
||||||
(HasLogFunc env, MonadIO m) => env -> Manager -> Text -> m (SingleRun SnapName (Maybe FilePath))
|
(HasLogFunc env, MonadIO m) => env -> Manager -> Text -> m (SingleRun SnapName (Maybe FilePath))
|
||||||
newHoogleLocker env man bucketUrl = mkSingleRun hoogleLocker
|
newHoogleLocker env man bucketUrl = mkSingleRun hoogleLocker
|
||||||
@ -232,38 +236,48 @@ runStackageUpdate doNotUpload = do
|
|||||||
corePackageGetters <- makeCorePackageGetters
|
corePackageGetters <- makeCorePackageGetters
|
||||||
runResourceT $
|
runResourceT $
|
||||||
join $
|
join $
|
||||||
|
-- @createOrUpdateSnapshot@ processes package N while processing docs for
|
||||||
|
-- package N-1. This @pure ()@ is the "documentation processing action"
|
||||||
|
-- for the -1'th package.
|
||||||
runConduit $ sourceSnapshots .| foldMC (createOrUpdateSnapshot corePackageGetters) (pure ())
|
runConduit $ sourceSnapshots .| foldMC (createOrUpdateSnapshot corePackageGetters) (pure ())
|
||||||
unless doNotUpload uploadSnapshotsJSON
|
unless doNotUpload uploadSnapshotsJSON
|
||||||
buildAndUploadHoogleDB doNotUpload
|
buildAndUploadHoogleDBs doNotUpload
|
||||||
logInfo "Finished building and uploading Hoogle DBs"
|
logInfo "Finished building and uploading Hoogle DBs"
|
||||||
|
|
||||||
|
|
||||||
-- | This will look at 'global-hints.yaml' and will create core package getters that are reused
|
-- | This will look at 'global-hints.yaml' and will create core package getters that are reused
|
||||||
-- later for adding those package to individual snapshot.
|
-- later for adding those package to individual snapshots.
|
||||||
makeCorePackageGetters ::
|
makeCorePackageGetters ::
|
||||||
RIO StackageCron (Map CompilerP [CorePackageGetter])
|
RIO StackageCron (Map CompilerP [CorePackageGetter])
|
||||||
makeCorePackageGetters = do
|
makeCorePackageGetters = do
|
||||||
rootDir <- scStackageRoot <$> ask
|
rootDir <- scStackageRoot <$> ask
|
||||||
contentDir <- getStackageContentDir rootDir
|
contentDir <- getStackageContentDir rootDir
|
||||||
coreCabalFiles <- getCoreCabalFiles rootDir
|
backupCoreCabalFiles <- getBackupCoreCabalFiles rootDir
|
||||||
liftIO (decodeFileEither (contentDir </> "stack" </> "global-hints.yaml")) >>= \case
|
liftIO (decodeFileEither (contentDir </> "stack" </> "global-hints.yaml")) >>= \case
|
||||||
Right (hints :: Map CompilerP (Map PackageNameP VersionP)) ->
|
Right (hints :: Map CompilerP (Map PackageNameP VersionP)) ->
|
||||||
Map.traverseWithKey
|
Map.traverseWithKey
|
||||||
(\compiler ->
|
(\compiler ->
|
||||||
fmap Map.elems .
|
fmap Map.elems .
|
||||||
Map.traverseMaybeWithKey (makeCorePackageGetter compiler coreCabalFiles))
|
Map.traverseMaybeWithKey (makeCorePackageGetter compiler backupCoreCabalFiles))
|
||||||
hints
|
hints
|
||||||
Left exc -> do
|
Left exc -> do
|
||||||
logError $
|
logError $
|
||||||
"Error parsing 'global-hints.yaml' file: " <> fromString (displayException exc)
|
"Error parsing 'global-hints.yaml' file: " <> fromString (displayException exc)
|
||||||
return mempty
|
return mempty
|
||||||
|
|
||||||
getCoreCabalFiles ::
|
-- | Packages distributed with GHC aren't taken from Hackage like normal
|
||||||
|
-- packages. Release managers do upload them, however, so that their docs are
|
||||||
|
-- available.
|
||||||
|
--
|
||||||
|
-- Or at least, they should. The release process was fragile, and some packages
|
||||||
|
-- weren't uploaded. This mechanism gives us a chance to fill in missing
|
||||||
|
-- packages.
|
||||||
|
getBackupCoreCabalFiles ::
|
||||||
FilePath
|
FilePath
|
||||||
-> RIO StackageCron (Map PackageIdentifierP (GenericPackageDescription, CabalFileIds))
|
-> RIO StackageCron (Map PackageIdentifierP (GenericPackageDescription, CabalFileIds))
|
||||||
getCoreCabalFiles rootDir = do
|
getBackupCoreCabalFiles rootDir = do
|
||||||
coreCabalFilesDir <- getCoreCabalFilesDir rootDir
|
backupCoreCabalFilesDir <- getBackupCoreCabalFilesDir rootDir
|
||||||
cabalFileNames <- getDirectoryContents coreCabalFilesDir
|
cabalFileNames <- getDirectoryContents backupCoreCabalFilesDir
|
||||||
cabalFiles <-
|
cabalFiles <-
|
||||||
forM (filter (isExtensionOf ".cabal") cabalFileNames) $ \cabalFileName ->
|
forM (filter (isExtensionOf ".cabal") cabalFileNames) $ \cabalFileName ->
|
||||||
let pidTxt = T.pack (dropExtension (takeFileName cabalFileName))
|
let pidTxt = T.pack (dropExtension (takeFileName cabalFileName))
|
||||||
@ -272,15 +286,17 @@ getCoreCabalFiles rootDir = do
|
|||||||
logError $ "Invalid package identifier: " <> fromString cabalFileName
|
logError $ "Invalid package identifier: " <> fromString cabalFileName
|
||||||
pure Nothing
|
pure Nothing
|
||||||
Just pid -> do
|
Just pid -> do
|
||||||
cabalBlob <- readFileBinary (coreCabalFilesDir </> cabalFileName)
|
cabalBlob <- readFileBinary (backupCoreCabalFilesDir </> cabalFileName)
|
||||||
mCabalInfo <- run $ addCabalFile pid cabalBlob
|
mCabalInfo <- run $ addCabalFile pid cabalBlob
|
||||||
pure ((,) pid <$> mCabalInfo)
|
pure ((,) pid <$> mCabalInfo)
|
||||||
pure $ Map.fromList $ catMaybes cabalFiles
|
pure $ Map.fromList $ catMaybes cabalFiles
|
||||||
|
|
||||||
-- | Core package info rarely changes between the snapshots, therefore it would be wasteful to
|
-- | Core package info rarely changes between the snapshots, therefore it would be wasteful to
|
||||||
-- load, parse and update all packages from gloabl-hints for each snapshot, instead we produce
|
-- load, parse and update all packages from gloabl-hints for each snapshot. Instead we produce
|
||||||
-- a memoized version that will do it once initiall and then return information aboat a
|
-- a memoized version that will do it once initially and then return information about a
|
||||||
-- package on subsequent invocations.
|
-- package on subsequent invocations.
|
||||||
|
--
|
||||||
|
-- FIXME: The compiler argument is unused (and never has been). Should it be used?
|
||||||
makeCorePackageGetter ::
|
makeCorePackageGetter ::
|
||||||
CompilerP
|
CompilerP
|
||||||
-> Map PackageIdentifierP (GenericPackageDescription, CabalFileIds)
|
-> Map PackageIdentifierP (GenericPackageDescription, CabalFileIds)
|
||||||
@ -309,6 +325,9 @@ makeCorePackageGetter _compiler fallbackCabalFileMap pname ver =
|
|||||||
Nothing -> do
|
Nothing -> do
|
||||||
whenM (scReportProgress <$> ask) $
|
whenM (scReportProgress <$> ask) $
|
||||||
logSticky $ "Loading core package: " <> display pid
|
logSticky $ "Loading core package: " <> display pid
|
||||||
|
-- I have no idea what's happening here. I guess I
|
||||||
|
-- don't know what it means to "load" a package.
|
||||||
|
-- What is actually going on?
|
||||||
htr <- getHackageTarball pir Nothing
|
htr <- getHackageTarball pir Nothing
|
||||||
case htrFreshPackageInfo htr of
|
case htrFreshPackageInfo htr of
|
||||||
Just (gpd, treeId) -> do
|
Just (gpd, treeId) -> do
|
||||||
@ -336,64 +355,70 @@ makeCorePackageGetter _compiler fallbackCabalFileMap pname ver =
|
|||||||
PackageIdentifierRevision (unPackageNameP pname) (unVersionP ver) (CFIRevision (Revision 0))
|
PackageIdentifierRevision (unPackageNameP pname) (unVersionP ver) (CFIRevision (Revision 0))
|
||||||
|
|
||||||
|
|
||||||
-- TODO: for now it is only from hackage, PantryPackage needs an update to use other origins
|
-- | Populates the database with information about a package?
|
||||||
-- | A pantry package is being added to a particular snapshot. Extra information like compiler and
|
--
|
||||||
-- flags are passed on in order to properly figure out dependencies and modules
|
-- Specifically, a pantry package is being added to a particular snapshot.
|
||||||
|
--
|
||||||
|
-- Extra information like compiler and flags are passed on in order to properly
|
||||||
|
-- figure out dependencies and modules.
|
||||||
|
--
|
||||||
|
-- TODO: for now it is only from hackage. PantryPackage needs an update to use other origins
|
||||||
addPantryPackage ::
|
addPantryPackage ::
|
||||||
SnapshotId -> CompilerP -> Bool -> Map FlagNameP Bool -> PantryPackage -> RIO StackageCron Bool
|
SnapshotId -> CompilerP -> Bool -> Map FlagNameP Bool -> PantryPackage -> RIO StackageCron Bool
|
||||||
addPantryPackage sid compiler isHidden flags (PantryPackage pc treeKey) = do
|
addPantryPackage snapId compiler isHidden flags (PantryPackage pcabal pTreeKey) = do
|
||||||
env <- ask
|
env <- ask
|
||||||
let gpdCachedRef = scCachedGPD env
|
let pkgDescCache = scCachedGPD env
|
||||||
cache = scCacheCabalFiles env
|
cacheP = scCacheCabalFiles env
|
||||||
let blobKeyToInt = fromIntegral . unSqlBackendKey . unBlobKey
|
let blobKeyToInt = fromIntegral . unSqlBackendKey . unBlobKey
|
||||||
let updateCacheGPD blobId gpd =
|
let cachedPkgDesc cabalBlobId pkgDesc =
|
||||||
gpd `deepseq`
|
pkgDesc `deepseq`
|
||||||
atomicModifyIORef' gpdCachedRef (\cacheMap -> (IntMap.insert blobId gpd cacheMap, gpd))
|
atomicModifyIORef' pkgDescCache (\cacheMap -> (IntMap.insert cabalBlobId pkgDesc cacheMap, pkgDesc))
|
||||||
let getCachedGPD treeCabal =
|
let getPkgDesc cabalBlobId =
|
||||||
\case
|
\case
|
||||||
Just gpd | cache -> updateCacheGPD (blobKeyToInt treeCabal) gpd
|
Just pkgDesc | cacheP -> cachedPkgDesc (blobKeyToInt cabalBlobId) pkgDesc
|
||||||
Just gpd -> pure gpd
|
Just pkgDesc -> pure pkgDesc
|
||||||
Nothing | cache -> do
|
Nothing | cacheP -> do
|
||||||
cacheMap <- readIORef gpdCachedRef
|
cacheMap <- readIORef pkgDescCache
|
||||||
case IntMap.lookup (blobKeyToInt treeCabal) cacheMap of
|
case IntMap.lookup (blobKeyToInt cabalBlobId) cacheMap of
|
||||||
Just gpd -> pure gpd
|
Just pkgDesc -> pure pkgDesc
|
||||||
Nothing ->
|
Nothing ->
|
||||||
loadBlobById treeCabal >>=
|
loadBlobById cabalBlobId >>=
|
||||||
updateCacheGPD (blobKeyToInt treeCabal) . parseCabalBlob
|
cachedPkgDesc (blobKeyToInt cabalBlobId) . parseCabalBlob
|
||||||
Nothing -> parseCabalBlob <$> loadBlobById treeCabal
|
Nothing -> parseCabalBlob <$> loadBlobById cabalBlobId
|
||||||
let storeHackageSnapshotPackage hcid mtid mgpd =
|
let storeHackageSnapshotPackage hackageCabalId mTreeId mpkgDesc =
|
||||||
getTreeForKey treeKey >>= \case
|
getTreeForKey pTreeKey >>= \case
|
||||||
Just (Entity treeId _)
|
-- error case #1
|
||||||
| Just tid <- mtid
|
Just (Entity treeId' _)
|
||||||
, tid /= treeId -> do
|
| Just treeId <- mTreeId
|
||||||
lift $ logError $ "Pantry Tree Key mismatch for: " <> display pc
|
, treeId /= treeId' -> do
|
||||||
|
lift $ logError $ "Pantry Tree Key mismatch for: " <> display pcabal
|
||||||
pure False
|
pure False
|
||||||
Just tree@(Entity _ Tree {treeCabal})
|
-- happy case
|
||||||
| Just treeCabal' <- treeCabal -> do
|
Just pkgTree@(Entity _ Tree {treeCabal})
|
||||||
gpd <- getCachedGPD treeCabal' mgpd
|
| Just cabalBlobId <- treeCabal -> do
|
||||||
let mhcid = Just hcid
|
pkgDesc <- getPkgDesc cabalBlobId mpkgDesc
|
||||||
eTree = Right tree
|
addSnapshotPackage snapId compiler Hackage (Right pkgTree) (Just hackageCabalId) isHidden flags packageId pkgDesc
|
||||||
addSnapshotPackage sid compiler Hackage eTree mhcid isHidden flags pid gpd
|
|
||||||
pure True
|
pure True
|
||||||
|
-- error case #2
|
||||||
_ -> do
|
_ -> do
|
||||||
lift $ logError $ "Pantry is missing the source tree for " <> display pc
|
lift $ logError $ "Pantry is missing the source tree for " <> display pcabal
|
||||||
pure False
|
pure False
|
||||||
mHackageCabalInfo <- run $ getHackageCabalByKey pid (pcCabalKey pc)
|
mHackageCabalInfo <- run $ getHackageCabalByKey packageId (pcCabalKey pcabal)
|
||||||
case mHackageCabalInfo of
|
case mHackageCabalInfo of
|
||||||
Nothing -> do
|
Nothing -> do
|
||||||
logError $ "Could not find the cabal file for: " <> display pc
|
logError $ "Could not find the cabal file for: " <> display pcabal
|
||||||
pure False
|
pure False
|
||||||
Just (hcid, Nothing) -> do
|
Just (hackageCabalId, Nothing) -> do
|
||||||
mHPI <-
|
mHPI <-
|
||||||
htrFreshPackageInfo <$>
|
htrFreshPackageInfo <$>
|
||||||
getHackageTarball (toPackageIdentifierRevision pc) (Just treeKey)
|
getHackageTarball (toPackageIdentifierRevision pcabal) (Just pTreeKey)
|
||||||
run $
|
run $
|
||||||
case mHPI of
|
case mHPI of
|
||||||
Just (gpd, treeId) -> storeHackageSnapshotPackage hcid (Just treeId) (Just gpd)
|
Just (pkgDesc, treeId) -> storeHackageSnapshotPackage hackageCabalId (Just treeId) (Just pkgDesc)
|
||||||
Nothing -> storeHackageSnapshotPackage hcid Nothing Nothing
|
Nothing -> storeHackageSnapshotPackage hackageCabalId Nothing Nothing
|
||||||
Just (hcid, mtid) -> run $ storeHackageSnapshotPackage hcid mtid Nothing
|
Just (hackageCabalId, mTreeId) -> run $ storeHackageSnapshotPackage hackageCabalId mTreeId Nothing
|
||||||
where
|
where
|
||||||
pid = PackageIdentifierP (pcPackageName pc) (pcVersion pc)
|
packageId = PackageIdentifierP (pcPackageName pcabal) (pcVersion pcabal)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -404,33 +429,33 @@ checkForDocs :: SnapshotId -> SnapName -> ResourceT (RIO StackageCron) ()
|
|||||||
checkForDocs snapshotId snapName = do
|
checkForDocs snapshotId snapName = do
|
||||||
bucketName <- lift (scDownloadBucketName <$> ask)
|
bucketName <- lift (scDownloadBucketName <$> ask)
|
||||||
env <- asks scEnvAWS
|
env <- asks scEnvAWS
|
||||||
mods <-
|
-- it is faster to download all modules in this snapshot separately, rather
|
||||||
|
-- than process them with a conduit all the way to the database.
|
||||||
|
packageModules <-
|
||||||
runConduit $
|
runConduit $
|
||||||
paginate env (req bucketName) .| concatMapC (fromMaybe [] . (^. listObjectsV2Response_contents)) .|
|
paginate env (listSnapshotObjects bucketName)
|
||||||
mapC (\obj -> toText (obj ^. object_key)) .|
|
.| concatMapC (fromMaybe [] . (^. listObjectsV2Response_contents))
|
||||||
concatMapC (T.stripSuffix ".html" >=> T.stripPrefix prefix >=> pathToPackageModule) .|
|
.| mapC (\obj -> toText (obj ^. object_key))
|
||||||
sinkList
|
.| concatMapC (T.stripSuffix ".html" >=> T.stripPrefix prefix >=> pathToPackageModule)
|
||||||
-- it is faster to download all modules in this snapshot, than process them with a conduit all
|
.| sinkList
|
||||||
-- the way to the database.
|
-- Cache SnapshotPackageId rather than look it up many times for each module in the package.
|
||||||
sidsCacheRef <- newIORef Map.empty
|
sidsCacheRef <- newIORef Map.empty
|
||||||
-- Cache is for SnapshotPackageId, there will be many modules per peckage, no need to look into
|
-- The other half of the cores are used in 'updateSnapshot'
|
||||||
-- the database for each one of them.
|
|
||||||
n <- max 1 . (`div` 2) <$> getNumCapabilities
|
n <- max 1 . (`div` 2) <$> getNumCapabilities
|
||||||
unexpectedPackages <- lift $ pooledMapConcurrentlyN n (markModules sidsCacheRef) mods
|
unexpectedPackages <- lift $ pooledMapConcurrentlyN n (markModule sidsCacheRef) packageModules
|
||||||
forM_ (Set.fromList $ catMaybes unexpectedPackages) $ \pid ->
|
forM_ (Set.fromList $ catMaybes unexpectedPackages) $ \pid ->
|
||||||
lift $
|
lift $ logWarn $
|
||||||
logWarn $
|
"Documentation found for package '" <> display pid <>
|
||||||
"Documentation found for package '" <> display pid <>
|
"', which does not exist in this snapshot: " <>
|
||||||
"', which does not exist in this snapshot: " <>
|
|
||||||
display snapName
|
display snapName
|
||||||
where
|
where
|
||||||
prefix = textDisplay snapName <> "/"
|
prefix = textDisplay snapName <> "/"
|
||||||
req bucketName = newListObjectsV2 (BucketName bucketName) & listObjectsV2_prefix ?~ prefix
|
listSnapshotObjects bucketName = newListObjectsV2 (BucketName bucketName) & listObjectsV2_prefix ?~ prefix
|
||||||
-- | This function records all package modules that have documentation available, the ones
|
-- | This function records all package modules that have documentation available, the ones
|
||||||
-- that are not found in the snapshot reported back as an error. Besides being run
|
-- that are not found in the snapshot reported back as an error. Besides being run
|
||||||
-- concurrently this function optimizes the SnapshotPackageId lookup as well, since that can
|
-- concurrently this function optimizes the SnapshotPackageId lookup as well, since that can
|
||||||
-- be shared amongst many modules of one package.
|
-- be shared amongst many modules of one package.
|
||||||
markModules sidsCacheRef (pid, modName) = do
|
markModule sidsCacheRef (pid, modName) = do
|
||||||
sidsCache <- readIORef sidsCacheRef
|
sidsCache <- readIORef sidsCacheRef
|
||||||
let mSnapshotPackageId = Map.lookup pid sidsCache
|
let mSnapshotPackageId = Map.lookup pid sidsCache
|
||||||
mFound <- run $ markModuleHasDocs snapshotId pid mSnapshotPackageId modName
|
mFound <- run $ markModuleHasDocs snapshotId pid mSnapshotPackageId modName
|
||||||
@ -456,8 +481,7 @@ sourceSnapshots :: ConduitT a SnapshotFileInfo (ResourceT (RIO StackageCron)) ()
|
|||||||
sourceSnapshots = do
|
sourceSnapshots = do
|
||||||
snapshotsDir <- lift $ lift getStackageSnapshotsDir
|
snapshotsDir <- lift $ lift getStackageSnapshotsDir
|
||||||
sourceDirectoryDeep False (snapshotsDir </> "lts") .| concatMapMC (getLtsParser snapshotsDir)
|
sourceDirectoryDeep False (snapshotsDir </> "lts") .| concatMapMC (getLtsParser snapshotsDir)
|
||||||
sourceDirectoryDeep False (snapshotsDir </> "nightly") .|
|
sourceDirectoryDeep False (snapshotsDir </> "nightly") .| concatMapMC (getNightlyParser snapshotsDir)
|
||||||
concatMapMC (getNightlyParser snapshotsDir)
|
|
||||||
where
|
where
|
||||||
makeSnapshotFileInfo gitDir fp mFileNameDate snapName = do
|
makeSnapshotFileInfo gitDir fp mFileNameDate snapName = do
|
||||||
let parseSnapshot updatedOn = do
|
let parseSnapshot updatedOn = do
|
||||||
@ -500,39 +524,52 @@ sourceSnapshots = do
|
|||||||
return Nothing
|
return Nothing
|
||||||
|
|
||||||
|
|
||||||
-- | Creates a new `Snapshot` if it is not yet present in the database and decides if update
|
data DecisionResult a e = NothingToDo | NoSnapshotFile | NeedsUpdate a e | DoesntExist e
|
||||||
|
|
||||||
|
-- | Creates a new `Snapshot` if it is not yet present in the database, and decides if update
|
||||||
-- is necessary when it already exists.
|
-- is necessary when it already exists.
|
||||||
|
--
|
||||||
|
-- TODO: Silently ignoring snapshots where the getter returns Nothing seems like
|
||||||
|
-- a potential problem. Anyway I'd rather run it beforehand!
|
||||||
decideOnSnapshotUpdate :: SnapshotFileInfo -> RIO StackageCron (Maybe (SnapshotId, SnapshotFile))
|
decideOnSnapshotUpdate :: SnapshotFileInfo -> RIO StackageCron (Maybe (SnapshotId, SnapshotFile))
|
||||||
decideOnSnapshotUpdate SnapshotFileInfo {sfiSnapName, sfiUpdatedOn, sfiSnapshotFileGetter} = do
|
decideOnSnapshotUpdate SnapshotFileInfo {sfiSnapName, sfiUpdatedOn, sfiSnapshotFileGetter} = do
|
||||||
forceUpdate <- scForceFullUpdate <$> ask
|
forceUpdate <- scForceFullUpdate <$> ask
|
||||||
let mkLogMsg rest = "Snapshot with name: " <> display sfiSnapName <> " " <> rest
|
let mkLogMsg rest = "Snapshot with name: " <> display sfiSnapName <> " " <> rest
|
||||||
mKeySnapFile <-
|
mKeySnapFile <-
|
||||||
run (getBy (UniqueSnapshot sfiSnapName)) >>= \case
|
run (getBy (UniqueSnapshot sfiSnapName)) >>= \case
|
||||||
|
-- exists, up to date, no force-updated requested; nothing to do
|
||||||
Just (Entity _key snap)
|
Just (Entity _key snap)
|
||||||
| snapshotUpdatedOn snap == Just sfiUpdatedOn && not forceUpdate -> do
|
| snapshotUpdatedOn snap == Just sfiUpdatedOn && not forceUpdate ->
|
||||||
logInfo $ mkLogMsg "already exists and is up to date."
|
return NothingToDo
|
||||||
return Nothing
|
-- exists but updatedOn was not previously set.
|
||||||
Just entity@(Entity _key snap)
|
Just entity@(Entity _key snap)
|
||||||
| Nothing <- snapshotUpdatedOn snap -> do
|
| Nothing <- snapshotUpdatedOn snap -> do
|
||||||
logWarn $ mkLogMsg "did not finish updating last time."
|
logWarn $ mkLogMsg "did not finish updating last time."
|
||||||
fmap (Just entity, ) <$> sfiSnapshotFileGetter
|
maybe NoSnapshotFile (NeedsUpdate entity) <$> sfiSnapshotFileGetter
|
||||||
|
-- exists, but updatedOn does not match or force-update was requested.
|
||||||
Just entity -> do
|
Just entity -> do
|
||||||
unless forceUpdate $ logWarn $ mkLogMsg "was updated, applying new patch."
|
unless forceUpdate $ logWarn $ mkLogMsg "was updated, applying new patch."
|
||||||
fmap (Just entity, ) <$> sfiSnapshotFileGetter
|
maybe NoSnapshotFile (NeedsUpdate entity) <$> sfiSnapshotFileGetter
|
||||||
Nothing -> fmap (Nothing, ) <$> sfiSnapshotFileGetter
|
-- does not exist
|
||||||
|
Nothing -> maybe NoSnapshotFile DoesntExist <$> sfiSnapshotFileGetter
|
||||||
-- Add new snapshot to the database, when necessary
|
-- Add new snapshot to the database, when necessary
|
||||||
case mKeySnapFile of
|
case mKeySnapFile of
|
||||||
Just (Just (Entity snapKey snap), sf@SnapshotFile {sfCompiler, sfPublishDate})
|
NothingToDo -> Nothing <$ logInfo (mkLogMsg "already exists and is up to date.")
|
||||||
|
NoSnapshotFile -> Nothing <$ logWarn (mkLogMsg "has no (readable?) snapshot file.")
|
||||||
|
NeedsUpdate (Entity oldSnapKey oldSnap) sf@SnapshotFile {sfCompiler, sfPublishDate}
|
||||||
| Just publishDate <- sfPublishDate -> do
|
| Just publishDate <- sfPublishDate -> do
|
||||||
let updatedSnap =
|
let updatedSnap =
|
||||||
Snapshot sfiSnapName sfCompiler publishDate (snapshotUpdatedOn snap)
|
Snapshot sfiSnapName sfCompiler publishDate (snapshotUpdatedOn oldSnap)
|
||||||
run $ replace snapKey updatedSnap
|
run $ replace oldSnapKey updatedSnap
|
||||||
pure $ Just (snapKey, sf)
|
pure $ Just (oldSnapKey, sf)
|
||||||
Just (Nothing, sf@SnapshotFile {sfCompiler, sfPublishDate})
|
| otherwise -> return Nothing
|
||||||
| Just publishDate <- sfPublishDate ->
|
|
||||||
|
DoesntExist sf@SnapshotFile {sfCompiler, sfPublishDate}
|
||||||
|
| Just publishDate <- sfPublishDate -> do
|
||||||
|
logInfo $ mkLogMsg "is new, adding to the database."
|
||||||
fmap (, sf) <$>
|
fmap (, sf) <$>
|
||||||
run (insertUnique (Snapshot sfiSnapName sfCompiler publishDate Nothing))
|
run (insertUnique (Snapshot sfiSnapName sfCompiler publishDate Nothing))
|
||||||
_ -> return Nothing
|
| otherwise -> Nothing <$ logWarn (mkLogMsg "has no publish date, skipping.")
|
||||||
|
|
||||||
type CorePackageGetter
|
type CorePackageGetter
|
||||||
= RIO StackageCron ( Either CabalFileIds (Entity Tree)
|
= RIO StackageCron ( Either CabalFileIds (Entity Tree)
|
||||||
@ -545,14 +582,38 @@ type CorePackageGetter
|
|||||||
-- current snapshot as well as an action that was passed as an argument. At the end it will return
|
-- current snapshot as well as an action that was passed as an argument. At the end it will return
|
||||||
-- an action that should be invoked in order to mark modules that have documentation available,
|
-- an action that should be invoked in order to mark modules that have documentation available,
|
||||||
-- which in turn can be passed as an argument to the next snapshot loader.
|
-- which in turn can be passed as an argument to the next snapshot loader.
|
||||||
|
-- Something something ouroboros.
|
||||||
|
--
|
||||||
|
-- Question: When do the docs for the last snapshot get loaded?
|
||||||
|
--
|
||||||
|
-- Well, this binding is called as @join $ runConduit $ foldMC (createOrUpdateSnapshot corePackageInfoGetters) (pure ())@
|
||||||
|
--
|
||||||
|
-- So the answer: the doc-loading action for the last snapshot gets returned by @runConduit $ foldMC ...@,
|
||||||
|
-- which means it gets executed by @join $ runConduit $ foldMC ...@.
|
||||||
|
--
|
||||||
|
-- Evidence:
|
||||||
|
--
|
||||||
|
-- Since @foldMC :: (a -> b -> m a) -> a -> ConduitT b o m a@, we see
|
||||||
|
--
|
||||||
|
-- @@
|
||||||
|
-- a ~ ResourceT (RIO Stackage Cron) () -- this is the doc-loading action
|
||||||
|
-- b ~ SnapshotFileInfo
|
||||||
|
-- m ~ ResourceT (RIO StackageCron)
|
||||||
|
-- @@
|
||||||
|
|
||||||
|
-- and the foldMC creates a @ConduitT SnapshotFileInfo o (ResourceT (RIO StackageCron)) (ResourceT (RIO StackageCron) ())@
|
||||||
|
--
|
||||||
|
-- TODO: It might be more efficient to just put all the actions (snapshot
|
||||||
|
-- creation and documentation writing both) on a queue and let a bunch of
|
||||||
|
-- workers chew on it. The current impl creates arbitrary synchronization points
|
||||||
|
-- with 'runConcurrently'. Granted, I don't know what a good chunk size would
|
||||||
|
-- actually be.
|
||||||
createOrUpdateSnapshot ::
|
createOrUpdateSnapshot ::
|
||||||
Map CompilerP [CorePackageGetter]
|
Map CompilerP [CorePackageGetter]
|
||||||
-> ResourceT (RIO StackageCron) ()
|
-> ResourceT (RIO StackageCron) ()
|
||||||
-> SnapshotFileInfo
|
-> SnapshotFileInfo
|
||||||
-> ResourceT (RIO StackageCron) (ResourceT (RIO StackageCron) ())
|
-> ResourceT (RIO StackageCron) (ResourceT (RIO StackageCron) ())
|
||||||
createOrUpdateSnapshot corePackageInfoGetters prevAction sfi@SnapshotFileInfo { sfiSnapName
|
createOrUpdateSnapshot corePackageInfoGetters prevAction sfi@SnapshotFileInfo { sfiSnapName , sfiUpdatedOn } = do
|
||||||
, sfiUpdatedOn
|
|
||||||
} = do
|
|
||||||
finishedDocs <- newIORef False
|
finishedDocs <- newIORef False
|
||||||
runConcurrently
|
runConcurrently
|
||||||
(Concurrently (prevAction >> writeIORef finishedDocs True) *>
|
(Concurrently (prevAction >> writeIORef finishedDocs True) *>
|
||||||
@ -561,6 +622,7 @@ createOrUpdateSnapshot corePackageInfoGetters prevAction sfi@SnapshotFileInfo {
|
|||||||
loadCurrentSnapshot finishedDocs = do
|
loadCurrentSnapshot finishedDocs = do
|
||||||
loadDocs <-
|
loadDocs <-
|
||||||
decideOnSnapshotUpdate sfi >>= \case
|
decideOnSnapshotUpdate sfi >>= \case
|
||||||
|
-- Nothing to do, and thus no docs to process
|
||||||
Nothing -> return $ pure ()
|
Nothing -> return $ pure ()
|
||||||
Just (snapshotId, snapshotFile) ->
|
Just (snapshotId, snapshotFile) ->
|
||||||
updateSnapshot
|
updateSnapshot
|
||||||
@ -575,22 +637,30 @@ createOrUpdateSnapshot corePackageInfoGetters prevAction sfi@SnapshotFileInfo {
|
|||||||
logSticky "Still loading the docs for previous snapshot ..."
|
logSticky "Still loading the docs for previous snapshot ..."
|
||||||
pure loadDocs
|
pure loadDocs
|
||||||
|
|
||||||
-- | Updates all packages in the snapshot. If any missing they will be created. Returns an action
|
-- | Creates Lts or Nightly entity [Question(bryan): Why not do this when
|
||||||
-- that will check for available documentation for modules that are known to exist and mark as
|
-- creating the snapshot? Why is this a separate table anyway?] and updates all
|
||||||
-- documented when haddock is present on AWS S3. Only after documentation has been checked this
|
-- packages in the snapshot. If any packages are missing they will be created.
|
||||||
-- snapshot will be marked as completely updated. This is required in case something goes wrong and
|
-- Returns an action that will (a) check for available documentation for the
|
||||||
-- process is interrupted
|
-- packages' modules and (b) mark the packages as documented when haddock is
|
||||||
|
-- present on AWS S3.
|
||||||
|
--
|
||||||
|
-- (Only after documentation has been checked will this snapshot be marked as
|
||||||
|
-- completely updated. This is required in case something goes wrong and process
|
||||||
|
-- is interrupted.)
|
||||||
updateSnapshot ::
|
updateSnapshot ::
|
||||||
Map CompilerP [CorePackageGetter]
|
Map CompilerP [CorePackageGetter]
|
||||||
-> SnapshotId
|
-> SnapshotId
|
||||||
-> SnapName
|
-> SnapName
|
||||||
-> UTCTime
|
-> UTCTime
|
||||||
-> SnapshotFile
|
-> SnapshotFile
|
||||||
-> RIO StackageCron (ResourceT (RIO StackageCron) ())
|
-> RIO StackageCron (ResourceT (RIO StackageCron) ()) -- ^ Returns the action for processing docs
|
||||||
updateSnapshot corePackageGetters snapshotId snapName updatedOn SnapshotFile {..} = do
|
updateSnapshot corePackageGetters snapshotId snapName updatedOn SnapshotFile {..} = do
|
||||||
insertSnapshotName snapshotId snapName
|
insertSnapshotName snapshotId snapName
|
||||||
loadedPackageCountRef <- newIORef (0 :: Int)
|
loadedPackageCountRef <- newIORef (0 :: Int)
|
||||||
let totalPackages = length sfPackages
|
let totalPackages = length sfPackages
|
||||||
|
-- A wrapper for 'addPantryPackage' that extracts the package info from
|
||||||
|
-- snapshot info, increments the count of loaded packages, and reports success
|
||||||
|
-- as a Bool.
|
||||||
addPantryPackageWithReport pp = do
|
addPantryPackageWithReport pp = do
|
||||||
let PantryCabal {pcPackageName} = ppPantryCabal pp
|
let PantryCabal {pcPackageName} = ppPantryCabal pp
|
||||||
isHidden = fromMaybe False (Map.lookup pcPackageName sfHidden)
|
isHidden = fromMaybe False (Map.lookup pcPackageName sfHidden)
|
||||||
@ -695,6 +765,9 @@ uploadHoogleDB fp key =
|
|||||||
body <- toBody <$> readFileBinary fpgz
|
body <- toBody <$> readFileBinary fpgz
|
||||||
uploadBucket <- scUploadBucketName <$> ask
|
uploadBucket <- scUploadBucketName <$> ask
|
||||||
uploadFromRIO key $
|
uploadFromRIO key $
|
||||||
|
-- FIXME: I should also set content encoding explicitly here. But
|
||||||
|
-- then I would break stackage-server, which applies an 'ungzip' in
|
||||||
|
-- 'newHoogleLocker'. :(
|
||||||
set putObject_acl (Just ObjectCannedACL_Public_read) $ newPutObject (BucketName uploadBucket) key body
|
set putObject_acl (Just ObjectCannedACL_Public_read) $ newPutObject (BucketName uploadBucket) key body
|
||||||
|
|
||||||
|
|
||||||
@ -708,17 +781,31 @@ uploadFromRIO key po = do
|
|||||||
logError $ "Couldn't upload " <> displayShow key <> " to S3 becuase " <> displayShow e
|
logError $ "Couldn't upload " <> displayShow key <> " to S3 becuase " <> displayShow e
|
||||||
Right _ -> logInfo $ "Successfully uploaded " <> displayShow key <> " to S3"
|
Right _ -> logInfo $ "Successfully uploaded " <> displayShow key <> " to S3"
|
||||||
|
|
||||||
buildAndUploadHoogleDB :: Bool -> RIO StackageCron ()
|
-- | As the name says, build and upload Hoogle DBs.
|
||||||
buildAndUploadHoogleDB doNotUpload = do
|
--
|
||||||
|
-- Which DBs? The last 5 LTS and the last 5 Nightlies that are missing their
|
||||||
|
-- Hoogle DBs.
|
||||||
|
--
|
||||||
|
-- How? It downloads the Hoogle inputs that were previously generated alongside
|
||||||
|
-- the Haddocks, runs @hoogle@ on them, and uploads the result back to the same
|
||||||
|
-- bucket. Those inputs were generated by snapshot curation.
|
||||||
|
--
|
||||||
|
-- Why? I feel like this should be a short Bash script using curl and hoogle, and
|
||||||
|
-- maybe one day it will be.
|
||||||
|
--
|
||||||
|
-- This action is only run by stackage-server-cron.
|
||||||
|
buildAndUploadHoogleDBs :: Bool -> RIO StackageCron ()
|
||||||
|
buildAndUploadHoogleDBs doNotUpload = do
|
||||||
snapshots <- lastLtsNightlyWithoutHoogleDb 5 5
|
snapshots <- lastLtsNightlyWithoutHoogleDb 5 5
|
||||||
-- currentHoogleVersionId <- scHoogleVersionId <$> ask
|
|
||||||
env <- ask
|
env <- ask
|
||||||
awsEnv <- asks scEnvAWS
|
awsEnv <- asks scEnvAWS
|
||||||
bucketUrl <- asks scDownloadBucketUrl
|
bucketUrl <- asks scDownloadBucketUrl
|
||||||
-- locker is an action that returns the path to a hoogle db, if one exists
|
-- locker is an action that returns the path to a hoogle db, if one exists
|
||||||
-- in the haddock bucket already.
|
-- in the haddock bucket already. It takes the SnapName as an argument.
|
||||||
|
-- I think it might be overkill.
|
||||||
locker <- newHoogleLocker (env ^. logFuncL) (awsEnv ^. env_manager) bucketUrl
|
locker <- newHoogleLocker (env ^. logFuncL) (awsEnv ^. env_manager) bucketUrl
|
||||||
let insertH = checkInsertSnapshotHoogleDb True
|
let -- These bindings undo a questionable conflation of operations
|
||||||
|
insertH = checkInsertSnapshotHoogleDb True
|
||||||
checkH = checkInsertSnapshotHoogleDb False
|
checkH = checkInsertSnapshotHoogleDb False
|
||||||
for_ snapshots $ \(snapshotId, snapName) ->
|
for_ snapshots $ \(snapshotId, snapName) ->
|
||||||
-- Even though we just got a list of snapshots that don't have hoogle
|
-- Even though we just got a list of snapshots that don't have hoogle
|
||||||
@ -727,72 +814,82 @@ buildAndUploadHoogleDB doNotUpload = do
|
|||||||
-- checkInsertSnapshotHoogleDb just check against SnapshotHoogleDb.
|
-- checkInsertSnapshotHoogleDb just check against SnapshotHoogleDb.
|
||||||
-- Perhaps the check can be removed.
|
-- Perhaps the check can be removed.
|
||||||
unlessM (checkH snapshotId) $ do
|
unlessM (checkH snapshotId) $ do
|
||||||
|
withSystemTempDirectory ("hoogle-" ++ T.unpack (textDisplay snapName)) $ \tmpdir -> do
|
||||||
logInfo $ "Starting Hoogle database download: " <> display (hoogleKey snapName)
|
logInfo $ "Starting Hoogle database download: " <> display (hoogleKey snapName)
|
||||||
|
-- Check if the database already exists (by downloading it).
|
||||||
|
-- FIXME: Why not just send a HEAD?
|
||||||
|
-- Perhaps the idea was to put the hoogle database somewhere the
|
||||||
|
-- main Stackage server process can find it? But nowadays
|
||||||
|
-- stackage-server downloads its own version separately.
|
||||||
mfp <- singleRun locker snapName
|
mfp <- singleRun locker snapName
|
||||||
case mfp of
|
case mfp of
|
||||||
Just _ -> do
|
Just fp -> do
|
||||||
logInfo $ "Current hoogle database exists for: " <> display snapName
|
-- Something bad must have happened: we created the hoogle db
|
||||||
|
-- previously, but didn't get to record it as available.
|
||||||
|
logWarn $ "Unregistered hoogle database found for: " <> display snapName
|
||||||
|
<> ". Registering now."
|
||||||
void $ insertH snapshotId
|
void $ insertH snapshotId
|
||||||
|
-- FIXME: For now we need to delete this file we just
|
||||||
|
-- downloaded. We probably shouldn't download it in the
|
||||||
|
-- first place, though (just use HEAD).
|
||||||
|
liftIO $ removeFile fp
|
||||||
Nothing -> do
|
Nothing -> do
|
||||||
logInfo $ "Current hoogle database does not yet exist in the bucket for: " <> display snapName
|
logInfo $ "Current hoogle database does not yet exist in the bucket for: " <> display snapName
|
||||||
mfp' <- createHoogleDB snapshotId snapName
|
-- NB: createHoogleDB will fail if something goes wrong.
|
||||||
forM_ mfp' $ \fp -> do
|
fp <- createHoogleDB tmpdir snapshotId snapName
|
||||||
let key = hoogleKey snapName
|
let key = hoogleKey snapName
|
||||||
dest = T.unpack key
|
unless doNotUpload $ do
|
||||||
createDirectoryIfMissing True $ takeDirectory dest
|
uploadHoogleDB fp (ObjectKey key)
|
||||||
renamePath fp dest
|
void $ insertH snapshotId
|
||||||
unless doNotUpload $ do
|
|
||||||
uploadHoogleDB dest (ObjectKey key)
|
|
||||||
void $ insertH snapshotId
|
|
||||||
|
|
||||||
-- | Create a hoogle db from haddocks for the given snapshot, and upload it to
|
-- | Create a hoogle db from haddocks for the given snapshot.
|
||||||
-- the haddock bucket.
|
--
|
||||||
createHoogleDB :: SnapshotId -> SnapName -> RIO StackageCron (Maybe FilePath)
|
-- Haddocks are downloaded from the documentation bucket, where they were
|
||||||
createHoogleDB snapshotId snapName =
|
-- uploaded as a tar file.
|
||||||
handleAny logException $ do
|
--
|
||||||
logInfo $ "Creating Hoogle DB for " <> display snapName
|
-- Returns the path to the .hoo database, which will be found in the first
|
||||||
downloadBucketUrl <- scDownloadBucketUrl <$> ask
|
-- argument. It will look like @<rootDir>/hoogle-gen/output.hoo@.
|
||||||
let root = "hoogle-gen"
|
createHoogleDB :: FilePath -> SnapshotId -> SnapName -> RIO StackageCron FilePath
|
||||||
bindir = root </> "bindir"
|
createHoogleDB rootDir snapshotId snapName = do
|
||||||
outname = root </> "output.hoo"
|
logInfo $ "Creating Hoogle DB for " <> display snapName
|
||||||
tarKey = toPathPiece snapName <> "/hoogle/orig.tar"
|
downloadBucketUrl <- scDownloadBucketUrl <$> ask
|
||||||
tarUrl = downloadBucketUrl <> "/" <> tarKey
|
let root = rootDir </> "hoogle-gen"
|
||||||
tarFP = root </> T.unpack tarKey
|
outname = root </> "output.hoo"
|
||||||
-- When tarball is downloaded it is saved with durability and atomicity, so if it
|
inputTarKey = toPathPiece snapName <> "/hoogle/orig.tar"
|
||||||
-- is present it is not in a corrupted state
|
inputTarUrl = downloadBucketUrl <> "/" <> inputTarKey
|
||||||
unlessM (doesFileExist tarFP) $ do
|
outputTarFP = root </> T.unpack inputTarKey
|
||||||
req <- parseRequest $ T.unpack tarUrl
|
-- Fetch the tarball with Hoogle inputs
|
||||||
env <- asks scEnvAWS
|
req <- parseRequest $ T.unpack inputTarUrl
|
||||||
let man = env ^. env_manager
|
env <- asks scEnvAWS
|
||||||
withResponseUnliftIO req {decompress = const True} man $ \res -> do
|
let man = env ^. env_manager
|
||||||
throwErrorStatusCodes req res
|
withResponseUnliftIO req {decompress = const True} man $ \res -> do
|
||||||
createDirectoryIfMissing True $ takeDirectory tarFP
|
throwErrorStatusCodes req res
|
||||||
withBinaryFileDurableAtomic tarFP WriteMode $ \tarHandle ->
|
createDirectoryIfMissing True $ takeDirectory outputTarFP
|
||||||
runConduitRes $ bodyReaderSource (responseBody res) .| sinkHandle tarHandle
|
withBinaryFileDurableAtomic outputTarFP WriteMode $ \tarHandle ->
|
||||||
void $ tryIO $ removeDirectoryRecursive bindir
|
runConduitRes $ bodyReaderSource (responseBody res) .| sinkHandle tarHandle
|
||||||
void $ tryIO $ removeFile outname
|
-- Extract the Hoogle inputs from the tarball into a separate temp dir, then
|
||||||
createDirectoryIfMissing True bindir
|
-- generate the hoogle database.
|
||||||
withSystemTempDirectory ("hoogle-" ++ T.unpack (textDisplay snapName)) $ \tmpdir -> do
|
withSystemTempDirectory ("hoogle-" ++ T.unpack (textDisplay snapName)) $ \tmpdir -> do
|
||||||
Any hasRestored <-
|
Any hasRestored <-
|
||||||
runConduitRes $
|
runConduitRes $
|
||||||
sourceFile tarFP .|
|
sourceFile outputTarFP .|
|
||||||
untar (restoreHoogleTxtFileWithCabal tmpdir snapshotId snapName) .|
|
untar (restoreHoogleTxtFileWithCabal tmpdir snapshotId snapName) .|
|
||||||
foldMapC Any
|
foldMapC Any
|
||||||
unless hasRestored $ error "No Hoogle .txt files found"
|
-- We just check if we have any Hoogle .txt file at all.
|
||||||
let args = ["generate", "--database=" ++ outname, "--local=" ++ tmpdir]
|
unless hasRestored $ error "No Hoogle .txt files found"
|
||||||
logInfo $
|
-- Generate the hoogle database
|
||||||
mconcat
|
let args = ["generate", "--database=" ++ outname, "--local=" ++ tmpdir]
|
||||||
[ "Merging databases... ("
|
logInfo $
|
||||||
, foldMap fromString $ L.intersperse " " ("hoogle" : args)
|
mconcat
|
||||||
, ")"
|
[ "Merging databases... ("
|
||||||
]
|
, foldMap fromString $ L.intersperse " " ("hoogle" : args)
|
||||||
liftIO $ Hoogle.hoogle args
|
, ")"
|
||||||
logInfo "Merge done"
|
]
|
||||||
return $ Just outname
|
-- 'Hoogle.hoogle' expects to run as an app, and crashes if something
|
||||||
where
|
-- goes wrong. That's good.
|
||||||
logException exc =
|
liftIO $ Hoogle.hoogle args
|
||||||
logError ("Problem creating hoogle db for " <> display snapName <> ": " <> displayShow exc) $>
|
logInfo "Merge done"
|
||||||
Nothing
|
pure outname
|
||||||
|
|
||||||
|
|
||||||
-- | Grabs hoogle txt file from the tarball and a matching cabal file from pantry. Writes
|
-- | Grabs hoogle txt file from the tarball and a matching cabal file from pantry. Writes
|
||||||
@ -824,7 +921,10 @@ restoreHoogleTxtFileWithCabal tmpdir snapshotId snapName fileInfo =
|
|||||||
_ -> yield False
|
_ -> yield False
|
||||||
|
|
||||||
|
|
||||||
pathToPackageModule :: Text -> Maybe (PackageIdentifierP, ModuleNameP)
|
pathToPackageModule
|
||||||
|
:: Text
|
||||||
|
-- ^ Input is like @ace-0.6/ACE-Combinators@
|
||||||
|
-> Maybe (PackageIdentifierP, ModuleNameP)
|
||||||
pathToPackageModule txt =
|
pathToPackageModule txt =
|
||||||
case T.split (== '/') txt of
|
case T.split (== '/') txt of
|
||||||
[pkgIdentifier, moduleNameDashes] -> do
|
[pkgIdentifier, moduleNameDashes] -> do
|
||||||
|
|||||||
@ -4,7 +4,7 @@ module Stackage.Database.Github
|
|||||||
( cloneOrUpdate
|
( cloneOrUpdate
|
||||||
, lastGitFileUpdate
|
, lastGitFileUpdate
|
||||||
, getStackageContentDir
|
, getStackageContentDir
|
||||||
, getCoreCabalFilesDir
|
, getBackupCoreCabalFilesDir
|
||||||
, GithubRepo(..)
|
, GithubRepo(..)
|
||||||
) where
|
) where
|
||||||
|
|
||||||
@ -81,9 +81,9 @@ getStackageContentDir rootDir =
|
|||||||
cloneOrUpdate rootDir (GithubRepo "commercialhaskell" "stackage-content")
|
cloneOrUpdate rootDir (GithubRepo "commercialhaskell" "stackage-content")
|
||||||
|
|
||||||
-- | Use backup location with cabal files, hackage doesn't have all of them.
|
-- | Use backup location with cabal files, hackage doesn't have all of them.
|
||||||
getCoreCabalFilesDir ::
|
getBackupCoreCabalFilesDir ::
|
||||||
(MonadReader env m, HasLogFunc env, HasProcessContext env, MonadIO m)
|
(MonadReader env m, HasLogFunc env, HasProcessContext env, MonadIO m)
|
||||||
=> FilePath
|
=> FilePath
|
||||||
-> m FilePath
|
-> m FilePath
|
||||||
getCoreCabalFilesDir rootDir =
|
getBackupCoreCabalFilesDir rootDir =
|
||||||
cloneOrUpdate rootDir (GithubRepo "commercialhaskell" "core-cabal-files")
|
cloneOrUpdate rootDir (GithubRepo "commercialhaskell" "core-cabal-files")
|
||||||
|
|||||||
@ -173,7 +173,7 @@ ltsBefore x y = do
|
|||||||
lastLtsNightlyWithoutHoogleDb :: Int -> Int -> RIO StackageCron [(SnapshotId, SnapName)]
|
lastLtsNightlyWithoutHoogleDb :: Int -> Int -> RIO StackageCron [(SnapshotId, SnapName)]
|
||||||
lastLtsNightlyWithoutHoogleDb ltsCount nightlyCount = do
|
lastLtsNightlyWithoutHoogleDb ltsCount nightlyCount = do
|
||||||
currentHoogleVersionId <- scHoogleVersionId <$> ask
|
currentHoogleVersionId <- scHoogleVersionId <$> ask
|
||||||
let getSnapshotsWithoutHoogeDb snapId snapCount =
|
let getSnapshotsWithoutHoogleDb snapId snapCount =
|
||||||
map (unValue *** unValue) <$>
|
map (unValue *** unValue) <$>
|
||||||
select
|
select
|
||||||
-- "snap" is either Lts or Nightly, while "snapshot" is indeed
|
-- "snap" is either Lts or Nightly, while "snapshot" is indeed
|
||||||
@ -206,12 +206,12 @@ lastLtsNightlyWithoutHoogleDb ltsCount nightlyCount = do
|
|||||||
-- order by snapshot.created desc
|
-- order by snapshot.created desc
|
||||||
-- limit $snapCount
|
-- limit $snapCount
|
||||||
--
|
--
|
||||||
-- So it returns a list of snapshots where there is no
|
-- So it returns a limited list of snapshots where there is no
|
||||||
-- corresponding entry in the snapshot_hoogle_db table for the
|
-- corresponding entry in the snapshot_hoogle_db table for the
|
||||||
-- current hoogle version.
|
-- current hoogle version.
|
||||||
run $ do
|
run $ do
|
||||||
lts <- getSnapshotsWithoutHoogeDb LtsSnap ltsCount
|
lts <- getSnapshotsWithoutHoogleDb LtsSnap ltsCount
|
||||||
nightly <- getSnapshotsWithoutHoogeDb NightlySnap nightlyCount
|
nightly <- getSnapshotsWithoutHoogleDb NightlySnap nightlyCount
|
||||||
pure $ lts ++ nightly
|
pure $ lts ++ nightly
|
||||||
|
|
||||||
|
|
||||||
@ -1100,6 +1100,8 @@ getHackageCabalByKey (PackageIdentifierP pname ver) (BlobKey sha size) =
|
|||||||
return (hc ^. HackageCabalId, hc ^. HackageCabalTree)
|
return (hc ^. HackageCabalId, hc ^. HackageCabalTree)
|
||||||
|
|
||||||
|
|
||||||
|
-- | Gets the id for the SnapshotPackage that corresponds to the given Snapshot
|
||||||
|
-- and PackageIdentifier.
|
||||||
getSnapshotPackageId ::
|
getSnapshotPackageId ::
|
||||||
SnapshotId
|
SnapshotId
|
||||||
-> PackageIdentifierP
|
-> PackageIdentifierP
|
||||||
@ -1114,6 +1116,18 @@ getSnapshotPackageId snapshotId (PackageIdentifierP pname ver) =
|
|||||||
(pn ^. PackageNameName ==. val pname) &&.
|
(pn ^. PackageNameName ==. val pname) &&.
|
||||||
(v ^. VersionVersion ==. val ver))
|
(v ^. VersionVersion ==. val ver))
|
||||||
return (sp ^. SnapshotPackageId)
|
return (sp ^. SnapshotPackageId)
|
||||||
|
--
|
||||||
|
-- i.e.
|
||||||
|
--
|
||||||
|
-- select sp.id
|
||||||
|
-- from snapshot_package sp
|
||||||
|
-- join version
|
||||||
|
-- on version.id = sp.version
|
||||||
|
-- join package_name pn
|
||||||
|
-- on pn.id = sp.package_name
|
||||||
|
-- where sp.snapshot = $snapshot_id
|
||||||
|
-- and pn.name = $name
|
||||||
|
-- and v.version = $version
|
||||||
|
|
||||||
|
|
||||||
getSnapshotPackageCabalBlob ::
|
getSnapshotPackageCabalBlob ::
|
||||||
@ -1127,6 +1141,16 @@ getSnapshotPackageCabalBlob snapshotId pname =
|
|||||||
((sp ^. SnapshotPackageSnapshot ==. val snapshotId) &&.
|
((sp ^. SnapshotPackageSnapshot ==. val snapshotId) &&.
|
||||||
(pn ^. PackageNameName ==. val pname))
|
(pn ^. PackageNameName ==. val pname))
|
||||||
return (blob ^. BlobContents)
|
return (blob ^. BlobContents)
|
||||||
|
-- i.e.
|
||||||
|
--
|
||||||
|
-- select blob.content
|
||||||
|
-- from snapshot_package sp
|
||||||
|
-- join package_name pn
|
||||||
|
-- on pn.id = sp.package_name
|
||||||
|
-- join blob
|
||||||
|
-- on blob.id = sp.cabal
|
||||||
|
-- where sp.snapshot = $snapshotId
|
||||||
|
-- and pn.name = $name
|
||||||
|
|
||||||
-- | Idempotent and thread safe way of adding a new module.
|
-- | Idempotent and thread safe way of adding a new module.
|
||||||
insertModuleSafe :: ModuleNameP -> ReaderT SqlBackend (RIO env) ModuleNameId
|
insertModuleSafe :: ModuleNameP -> ReaderT SqlBackend (RIO env) ModuleNameId
|
||||||
@ -1164,6 +1188,7 @@ markModuleHasDocs snapshotId pid mSnapshotPackageId modName =
|
|||||||
\AND snapshot_package_module.snapshot_package = ?"
|
\AND snapshot_package_module.snapshot_package = ?"
|
||||||
[toPersistValue modName, toPersistValue snapshotPackageId]
|
[toPersistValue modName, toPersistValue snapshotPackageId]
|
||||||
return $ Just snapshotPackageId
|
return $ Just snapshotPackageId
|
||||||
|
-- FIXME: The Nothing case seems like it should not happen.
|
||||||
Nothing -> return Nothing
|
Nothing -> return Nothing
|
||||||
|
|
||||||
|
|
||||||
@ -1199,9 +1224,10 @@ checkInsertSnapshotHoogleDb shouldInsert snapshotId = do
|
|||||||
lift $
|
lift $
|
||||||
logInfo $
|
logInfo $
|
||||||
"Marking hoogle database for version " <> display hver <> " as available."
|
"Marking hoogle database for version " <> display hver <> " as available."
|
||||||
-- whether or not the version exists, we still put it into snapshot_hoogle_db
|
-- whether or not the version exists, we still put it into
|
||||||
-- So literally the only use of the above query is to log the
|
-- snapshot_hoogle_db. So literally the only use of the above
|
||||||
-- action we're taking.
|
-- query is to log the action we're taking. Whether or not it
|
||||||
|
-- exists is immaterial to the following action.
|
||||||
isJust <$> P.insertUniqueEntity sh
|
isJust <$> P.insertUniqueEntity sh
|
||||||
-- if we're not inserting, we're just checking if it already exists
|
-- if we're not inserting, we're just checking if it already exists
|
||||||
-- in snapshot_hoogle_db.
|
-- in snapshot_hoogle_db.
|
||||||
|
|||||||
@ -117,6 +117,7 @@ data SnapshotFile = SnapshotFile
|
|||||||
} deriving (Show)
|
} deriving (Show)
|
||||||
|
|
||||||
|
|
||||||
|
-- Is this a reference to a cabal file stored in Pantry?
|
||||||
data PantryCabal = PantryCabal
|
data PantryCabal = PantryCabal
|
||||||
{ pcPackageName :: !PackageNameP
|
{ pcPackageName :: !PackageNameP
|
||||||
, pcVersion :: !VersionP
|
, pcVersion :: !VersionP
|
||||||
@ -131,6 +132,7 @@ instance Display PantryCabal where
|
|||||||
instance ToMarkup PantryCabal where
|
instance ToMarkup PantryCabal where
|
||||||
toMarkup = toMarkup . textDisplay
|
toMarkup = toMarkup . textDisplay
|
||||||
|
|
||||||
|
-- A Cabal file (package name, version, blob) and source tree
|
||||||
data PantryPackage = PantryPackage
|
data PantryPackage = PantryPackage
|
||||||
{ ppPantryCabal :: !PantryCabal
|
{ ppPantryCabal :: !PantryCabal
|
||||||
, ppPantryKey :: !TreeKey
|
, ppPantryKey :: !TreeKey
|
||||||
|
|||||||
@ -34,5 +34,4 @@ nix:
|
|||||||
- zlib
|
- zlib
|
||||||
- postgresql
|
- postgresql
|
||||||
- pkg-config
|
- pkg-config
|
||||||
- haskell-language-server
|
|
||||||
- cacert
|
- cacert
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user