stackage-server/src/Stackage/Database/Cron.hs
Alexey Kuleshevich cbfb68bdc8
Implemented automatic undeprecation of previously deprecated packages, also:
* Made sure update of deprecated is done each run, independently of Hackage update
2019-07-30 13:28:17 +03:00

775 lines
35 KiB
Haskell

{-# LANGUAGE CPP #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE NamedFieldPuns #-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TupleSections #-}
module Stackage.Database.Cron
( stackageServerCron
, newHoogleLocker
, singleRun
, StackageCronOptions(..)
, haddockBucketName
) where
import Conduit
import Control.DeepSeq
import Control.Lens ((.~))
import qualified Control.Monad.Trans.AWS as AWS (paginate)
import Control.SingleRun
import qualified Data.ByteString.Char8 as BS8
import qualified Data.Conduit.Binary as CB
import Data.Conduit.Tar (FileInfo(..), FileType(..), untar)
import Data.Conduit.Zlib (WindowBits(WindowBits), compress, ungzip)
import qualified Data.IntMap.Strict as IntMap
import Data.Monoid (Any(..))
import Data.Streaming.Network (bindPortTCP)
import Data.Yaml (decodeFileEither)
import Database.Persist
import Database.Persist.Postgresql
import Distribution.PackageDescription (GenericPackageDescription)
import qualified Hoogle
import Network.AWS hiding (Request, Response)
import Network.AWS.Data.Body (toBody)
import Network.AWS.Data.Text (toText)
import Network.AWS.S3
import Network.HTTP.Client
import Network.HTTP.Client.Conduit (bodyReaderSource)
import Network.HTTP.Simple (getResponseBody, httpJSONEither, parseRequest)
import Network.HTTP.Types (status200, status404)
import Pantry (CabalFileInfo(..), DidUpdateOccur(..),
HpackExecutable(HpackBundled), PackageIdentifierRevision(..),
defaultHackageSecurityConfig)
import Pantry.Internal.Stackage (HackageCabalId, HackageTarballResult(..),
PantryConfig(..), Storage(..),
forceUpdateHackageIndex, getHackageTarball,
getTreeForKey, loadBlobById, packageTreeKey,
treeCabal)
import Path (parseAbsDir, toFilePath)
import RIO
import RIO.Directory
import RIO.FilePath
import RIO.List as L
import qualified RIO.Map as Map
import RIO.Process (mkDefaultProcessContext)
import qualified RIO.Set as Set
import qualified RIO.Text as T
import RIO.Time
import Settings
import Stackage.Database.Github
import Stackage.Database.PackageInfo
import Stackage.Database.Query
import Stackage.Database.Schema
import Stackage.Database.Types
import System.Environment (lookupEnv)
import UnliftIO.Concurrent (getNumCapabilities)
import Web.PathPieces (fromPathPiece, toPathPiece)
hoogleKey :: SnapName -> Text
hoogleKey name = T.concat
[ "hoogle/"
, toPathPiece name
, "/"
, VERSION_hoogle
, ".hoo"
]
hoogleUrl :: SnapName -> Text
hoogleUrl n = T.concat
[ "https://s3.amazonaws.com/"
, haddockBucketName
, "/"
, hoogleKey n
]
hackageDeprecatedUrl :: Request
hackageDeprecatedUrl = "https://hackage.haskell.org/packages/deprecated.json"
withStorage :: Int -> (Storage -> IO a) -> IO a
withStorage poolSize inner = do
connstr <-
lookupEnv "PGSTRING" >>= \case
Just connstr -> pure (T.pack connstr)
Nothing -> appPostgresString <$> getAppSettings
withStackageDatabase
False
PostgresConf {pgPoolSize = poolSize, pgConnStr = encodeUtf8 connstr}
(\ db -> inner (Storage (runDatabase db) id))
getStackageSnapshotsDir :: RIO StackageCron FilePath
getStackageSnapshotsDir = do
cron <- ask
cloneOrUpdate (scStackageRoot cron) (scSnapshotsRepo cron)
withResponseUnliftIO :: MonadUnliftIO m => Request -> Manager -> (Response BodyReader -> m b) -> m b
withResponseUnliftIO req man f = withRunInIO $ \ runInIO -> withResponse req man (runInIO . f)
newHoogleLocker ::
(HasLogFunc env, MonadIO m) => env -> Manager -> m (SingleRun SnapName (Maybe FilePath))
newHoogleLocker env man = mkSingleRun hoogleLocker
where
hoogleLocker :: MonadIO n => SnapName -> n (Maybe FilePath)
hoogleLocker name =
runRIO env $ do
let fp = T.unpack $ hoogleKey name
fptmp = fp <.> "tmp"
exists <- doesFileExist fp
if exists
then return $ Just fp
else do
req' <- parseRequest $ T.unpack $ hoogleUrl name
let req = req' {decompress = const False}
withResponseUnliftIO req man $ \res ->
case responseStatus res of
status
| status == status200 -> do
createDirectoryIfMissing True $ takeDirectory fptmp
-- TODO: https://github.com/commercialhaskell/rio/issues/160
-- withBinaryFileDurableAtomic fp WriteMode $ \h ->
-- runConduitRes $
-- bodyReaderSource (responseBody res) .| ungzip .|
-- sinkHandle h
runConduitRes $
bodyReaderSource (responseBody res) .| ungzip .|
sinkFile fptmp
renamePath fptmp fp
return $ Just fp
| status == status404 -> do
logDebug $ "NotFound: " <> display (hoogleUrl name)
return Nothing
| otherwise -> do
body <- liftIO $ brConsume $ responseBody res
-- TODO: ideally only consume the body when log level set to
-- LevelDebug, will require a way to get LogLevel from LogFunc
mapM_ (logDebug . displayBytesUtf8) body
return Nothing
getHackageDeprecations ::
(HasLogFunc env, MonadReader env m, MonadIO m) => m [Deprecation]
getHackageDeprecations = do
jsonResponseDeprecated <- httpJSONEither hackageDeprecatedUrl
case getResponseBody jsonResponseDeprecated of
Left err -> do
logError $
"There was an error parsing deprecated.json file: " <>
fromString (displayException err)
return []
Right deprecated -> return deprecated
stackageServerCron :: StackageCronOptions -> IO ()
stackageServerCron StackageCronOptions {..} = do
void $
-- Hacky approach instead of PID files
catchIO (bindPortTCP 17834 "127.0.0.1") $
const $ throwString "Stackage Cron loader process already running, exiting."
connectionCount <- getNumCapabilities
withStorage connectionCount $ \storage -> do
lo <- logOptionsHandle stdout True
stackageRootDir <- getAppUserDataDirectory "stackage"
pantryRootDir <- parseAbsDir (stackageRootDir </> "pantry")
createDirectoryIfMissing True (toFilePath pantryRootDir)
updateRef <- newMVar True
cabalImmutable <- newIORef Map.empty
cabalMutable <- newIORef Map.empty
gpdCache <- newIORef IntMap.empty
defaultProcessContext <- mkDefaultProcessContext
aws <- newEnv Discover
withLogFunc (setLogMinLevel scoLogLevel lo) $ \logFunc ->
let pantryConfig =
PantryConfig
{ pcHackageSecurity = defaultHackageSecurityConfig
, pcHpackExecutable = HpackBundled
, pcRootDir = pantryRootDir
, pcStorage = storage
, pcUpdateRef = updateRef
, pcParsedCabalFilesRawImmutable = cabalImmutable
, pcParsedCabalFilesMutable = cabalMutable
, pcConnectionCount = connectionCount
}
stackage =
StackageCron
{ scPantryConfig = pantryConfig
, scStackageRoot = stackageRootDir
, scProcessContext = defaultProcessContext
, scLogFunc = logFunc
, scForceFullUpdate = scoForceUpdate
, scCachedGPD = gpdCache
, scEnvAWS = aws
, scDownloadBucketName = scoDownloadBucketName
, scUploadBucketName = scoUploadBucketName
, scSnapshotsRepo = scoSnapshotsRepo
, scReportProgress = scoReportProgress
, scCacheCabalFiles = scoCacheCabalFiles
}
in runRIO stackage (runStackageUpdate scoDoNotUpload)
runStackageUpdate :: Bool -> RIO StackageCron ()
runStackageUpdate doNotUpload = do
forceFullUpdate <- scForceFullUpdate <$> ask
logInfo $ "Starting stackage-cron update" <> bool "" " with --force-update" forceFullUpdate
runStackageMigrations
didUpdate <- forceUpdateHackageIndex (Just "stackage-server cron job")
case didUpdate of
UpdateOccurred -> logInfo "Updated hackage index"
NoUpdateOccurred -> logInfo "No new packages in hackage index"
logInfo "Getting deprecated info now"
getHackageDeprecations >>= setDeprecations
corePackageGetters <- makeCorePackageGetters
runResourceT $
join $
runConduit $ sourceSnapshots .| foldMC (createOrUpdateSnapshot corePackageGetters) (pure ())
run $ mapM_ (`rawExecute` []) ["COMMIT", "VACUUM", "BEGIN"]
unless doNotUpload $ do
uploadSnapshotsJSON
buildAndUploadHoogleDB
-- | This will look at 'global-hints.yaml' and will create core package getters that are reused
-- later for adding those package to individual snapshot.
makeCorePackageGetters ::
RIO StackageCron (Map CompilerP [CorePackageGetter])
makeCorePackageGetters = do
rootDir <- scStackageRoot <$> ask
contentDir <- getStackageContentDir rootDir
liftIO (decodeFileEither (contentDir </> "stack" </> "global-hints.yaml")) >>= \case
Right (hints :: Map CompilerP (Map PackageNameP VersionP)) ->
Map.traverseWithKey
(\compiler ->
fmap Map.elems . Map.traverseMaybeWithKey (makeCorePackageGetter compiler))
hints
Left exc -> do
logError $
"Error parsing 'global-hints.yaml' file: " <> fromString (displayException exc)
return mempty
-- | Core package info rarely changes between the snapshots, therefore it would be wasteful to
-- load, parse and update all packages from gloabl-hints for each snapshot, instead we produce
-- a memoized version that will do it once initiall and then return information aboat a
-- package on subsequent invocations.
makeCorePackageGetter ::
CompilerP -> PackageNameP -> VersionP -> RIO StackageCron (Maybe CorePackageGetter)
makeCorePackageGetter _compiler pname ver =
run (getHackageCabalByRev0 pid) >>= \case
Nothing -> do
logWarn $
"Core package from global-hints: '" <> display pid <> "' was not found in pantry."
pure Nothing
Just (hackageCabalId, blobId, _) -> do
pkgInfoRef <- newIORef Nothing -- use for caching of pkgInfo
let getMemoPackageInfo =
readIORef pkgInfoRef >>= \case
Just pkgInfo -> return pkgInfo
Nothing -> do
whenM (scReportProgress <$> ask) $
logSticky $ "Loading core package: " <> display pid
htr <- getHackageTarball pir Nothing
case htrFreshPackageInfo htr of
Just (gpd, treeId) -> do
mTree <- run $ getEntity treeId
let pkgInfo = (mTree, Just hackageCabalId, pid, gpd)
gpd `deepseq` writeIORef pkgInfoRef $ Just pkgInfo
pure pkgInfo
Nothing -> do
(cabalBlob, mTree) <-
run
((,) <$> loadBlobById blobId <*>
getTreeForKey (packageTreeKey (htrPackage htr)))
let gpd = parseCabalBlob cabalBlob
pkgInfo = (mTree, Just hackageCabalId, pid, gpd)
gpd `deepseq` writeIORef pkgInfoRef $ Just pkgInfo
pure pkgInfo
pure $ Just getMemoPackageInfo
where
pid = PackageIdentifierP pname ver
pir =
PackageIdentifierRevision (unPackageNameP pname) (unVersionP ver) (CFIRevision (Revision 0))
-- TODO: for now it is only from hackage, PantryPackage needs an update to use other origins
-- | A pantry package is being added to a particular snapshot. Extra information like compiler and
-- flags are passed on in order to properly figure out dependencies and modules
addPantryPackage ::
SnapshotId -> CompilerP -> Bool -> Map FlagNameP Bool -> PantryPackage -> RIO StackageCron Bool
addPantryPackage sid compiler isHidden flags (PantryPackage pc treeKey) = do
env <- ask
let gpdCachedRef = scCachedGPD env
cache = scCacheCabalFiles env
let blobKeyToInt = fromIntegral . unSqlBackendKey . unBlobKey
let updateCacheGPD blobId gpd =
gpd `deepseq`
atomicModifyIORef' gpdCachedRef (\cacheMap -> (IntMap.insert blobId gpd cacheMap, gpd))
let getCachedGPD treeCabal =
\case
Just gpd | cache -> updateCacheGPD (blobKeyToInt treeCabal) gpd
Just gpd -> pure gpd
Nothing | cache -> do
cacheMap <- readIORef gpdCachedRef
case IntMap.lookup (blobKeyToInt treeCabal) cacheMap of
Just gpd -> pure gpd
Nothing ->
loadBlobById treeCabal >>=
updateCacheGPD (blobKeyToInt treeCabal) . parseCabalBlob
Nothing -> parseCabalBlob <$> loadBlobById treeCabal
let storeHackageSnapshotPackage hcid mtid mgpd =
getTreeForKey treeKey >>= \case
Just (Entity treeId _)
| Just tid <- mtid
, tid /= treeId -> do
lift $ logError $ "Pantry Tree Key mismatch for: " <> display pc
pure False
mTree@(Just (Entity _ Tree {treeCabal}))
| Just treeCabal' <- treeCabal -> do
gpd <- getCachedGPD treeCabal' mgpd
let mhcid = Just hcid
addSnapshotPackage sid compiler Hackage mTree mhcid isHidden flags pid gpd
pure True
_ -> do
lift $ logError $ "Pantry is missing the source tree for " <> display pc
pure False
mHackageCabalInfo <- run $ getHackageCabalByKey pid (pcCabalKey pc)
case mHackageCabalInfo of
Nothing -> do
logError $ "Could not find the cabal file for: " <> display pc
pure False
Just (hcid, Nothing) -> do
mHPI <-
htrFreshPackageInfo <$>
getHackageTarball (toPackageIdentifierRevision pc) (Just treeKey)
run $
case mHPI of
Just (gpd, treeId) -> storeHackageSnapshotPackage hcid (Just treeId) (Just gpd)
Nothing -> storeHackageSnapshotPackage hcid Nothing Nothing
Just (hcid, mtid) -> run $ storeHackageSnapshotPackage hcid mtid Nothing
where
pid = PackageIdentifierP (pcPackageName pc) (pcVersion pc)
-- | Download a list of available .html files from S3 bucket for a particular resolver and record
-- in the database which modules have documentation available for them.
checkForDocs :: SnapshotId -> SnapName -> ResourceT (RIO StackageCron) ()
checkForDocs snapshotId snapName = do
bucketName <- lift (scDownloadBucketName <$> ask)
mods <-
runConduit $
AWS.paginate (req bucketName) .| concatMapC (^. lovrsContents) .|
mapC (\obj -> toText (obj ^. oKey)) .|
concatMapC (T.stripSuffix ".html") .|
concatMapC (T.stripPrefix prefix) .|
concatMapC pathToPackageModule .|
sinkList
-- it is faster to download all modules in this snapshot, than process them with a conduit all
-- the way to the database.
sidsCacheRef <- newIORef Map.empty
-- Cache is for SnapshotPackageId, there will be many modules per peckage, no need to look into
-- the database for each one of them.
n <- max 1 . (`div` 2) <$> getNumCapabilities
notFoundList <- lift $ pooledMapConcurrentlyN n (markModules sidsCacheRef) mods
forM_ (Set.fromList $ catMaybes notFoundList) $ \pid ->
lift $
logWarn $
"Documentation available for package '" <> display pid <>
"' but was not found in this snapshot: " <>
display snapName
where
prefix = textDisplay snapName <> "/"
req bucketName = listObjectsV2 (BucketName bucketName) & lovPrefix .~ Just prefix
-- | This function records all package modules that have documentation available, the ones
-- that are not found in the snapshot reported back as an error. Besides being run
-- concurrently this function optimizes the SnapshotPackageId lookup as well, since that can
-- be shared amongst many modules of one package.
markModules sidsCacheRef (pid, modName) = do
sidsCache <- readIORef sidsCacheRef
let mSnapshotPackageId = Map.lookup pid sidsCache
mFound <- run $ markModuleHasDocs snapshotId pid mSnapshotPackageId modName
case mFound of
Nothing -> pure $ Just pid
Just snapshotPackageId
| Nothing <- mSnapshotPackageId -> do
atomicModifyIORef'
sidsCacheRef
(\cacheMap -> (Map.insert pid snapshotPackageId cacheMap, ()))
pure Nothing
_ -> pure Nothing
data SnapshotFileInfo = SnapshotFileInfo
{ sfiSnapName :: !SnapName
, sfiUpdatedOn :: !UTCTime
, sfiSnapshotFileGetter :: !(RIO StackageCron (Maybe SnapshotFile))
}
-- | Use 'github.com/commercialhaskell/stackage-snapshots' repository to source all of the packages
-- one snapshot at a time.
sourceSnapshots :: ConduitT a SnapshotFileInfo (ResourceT (RIO StackageCron)) ()
sourceSnapshots = do
snapshotsDir <- lift $ lift getStackageSnapshotsDir
sourceDirectoryDeep False (snapshotsDir </> "lts") .| concatMapMC (getLtsParser snapshotsDir)
sourceDirectoryDeep False (snapshotsDir </> "nightly") .|
concatMapMC (getNightlyParser snapshotsDir)
where
makeSnapshotFileInfo gitDir fp mFileNameDate snapName = do
let parseSnapshot updatedOn = do
esnap <- liftIO $ decodeFileEither fp
case esnap of
Right snap ->
let publishDate =
sfPublishDate snap <|> mFileNameDate <|> Just (utctDay updatedOn)
in return $ Just snap {sfPublishDate = publishDate}
Left exc -> do
logError $
"Error parsing snapshot file: " <> fromString fp <> "\n" <>
fromString (displayException exc)
return Nothing
lastGitFileUpdate gitDir fp >>= \case
Left err -> do
logError $ "Error parsing git commit date: " <> fromString err
return Nothing
Right updatedOn -> do
env <- lift ask
return $
Just
SnapshotFileInfo
{ sfiSnapName = snapName
, sfiUpdatedOn = updatedOn
, sfiSnapshotFileGetter = runRIO env (parseSnapshot updatedOn)
}
getLtsParser gitDir fp =
case mapM (BS8.readInt . BS8.pack) $ take 2 $ reverse (splitPath fp) of
Just [(minor, ".yaml"), (major, "/")] ->
makeSnapshotFileInfo gitDir fp Nothing $ SNLts major minor
_ -> do
logError
("Couldn't parse the filepath into an LTS version: " <> display (T.pack fp))
return Nothing
getNightlyParser gitDir fp =
case mapM (BS8.readInt . BS8.pack) $ take 3 $ reverse (splitPath fp) of
Just [(day, ".yaml"), (month, "/"), (year, "/")]
| Just date <- fromGregorianValid (fromIntegral year) month day ->
makeSnapshotFileInfo gitDir fp (Just date) $ SNNightly date
_ -> do
logError
("Couldn't parse the filepath into a Nightly date: " <> display (T.pack fp))
return Nothing
-- | Creates a new `Snapshot` if it is not yet present in the database and decides if update
-- is necessary when it already exists.
decideOnSnapshotUpdate :: SnapshotFileInfo -> RIO StackageCron (Maybe (SnapshotId, SnapshotFile))
decideOnSnapshotUpdate SnapshotFileInfo {sfiSnapName, sfiUpdatedOn, sfiSnapshotFileGetter} = do
forceUpdate <- scForceFullUpdate <$> ask
let mkLogMsg rest = "Snapshot with name: " <> display sfiSnapName <> " " <> rest
mKeySnapFile <-
run (getBy (UniqueSnapshot sfiSnapName)) >>= \case
Just (Entity _key snap)
| snapshotUpdatedOn snap == Just sfiUpdatedOn && not forceUpdate -> do
logInfo $ mkLogMsg "already exists and is up to date."
return Nothing
Just entity@(Entity _key snap)
| Nothing <- snapshotUpdatedOn snap -> do
logWarn $ mkLogMsg "did not finish updating last time."
fmap (Just entity, ) <$> sfiSnapshotFileGetter
Just entity -> do
unless forceUpdate $ logWarn $ mkLogMsg "was updated, applying new patch."
fmap (Just entity, ) <$> sfiSnapshotFileGetter
Nothing -> fmap (Nothing, ) <$> sfiSnapshotFileGetter
-- Add new snapshot to the database, when necessary
case mKeySnapFile of
Just (Just (Entity snapKey snap), sf@SnapshotFile {sfCompiler, sfPublishDate})
| Just publishDate <- sfPublishDate -> do
let updatedSnap =
Snapshot sfiSnapName sfCompiler publishDate (snapshotUpdatedOn snap)
run $ replace snapKey updatedSnap
pure $ Just (snapKey, sf)
Just (Nothing, sf@SnapshotFile {sfCompiler, sfPublishDate})
| Just publishDate <- sfPublishDate ->
fmap (, sf) <$>
run (insertUnique (Snapshot sfiSnapName sfCompiler publishDate Nothing))
_ -> return Nothing
type CorePackageGetter
= RIO StackageCron ( Maybe (Entity Tree)
, Maybe HackageCabalId
, PackageIdentifierP
, GenericPackageDescription)
-- | This is an optimized version of snapshoat loading which can load a snapshot and documentation
-- info for previous snapshot at the same time. It will execute concurrently the loading of
-- current snapshot as well as an action that was passed as an argument. At the end it will return
-- an action that should be invoked in order to mark modules that have documentation available,
-- which in turn can be passed as an argument to the next snapshot loader.
createOrUpdateSnapshot ::
Map CompilerP [CorePackageGetter]
-> ResourceT (RIO StackageCron) ()
-> SnapshotFileInfo
-> ResourceT (RIO StackageCron) (ResourceT (RIO StackageCron) ())
createOrUpdateSnapshot corePackageInfoGetters prevAction sfi@SnapshotFileInfo { sfiSnapName
, sfiUpdatedOn
} = do
finishedDocs <- newIORef False
runConcurrently
(Concurrently (prevAction >> writeIORef finishedDocs True) *>
Concurrently (lift (loadCurrentSnapshot finishedDocs)))
where
loadCurrentSnapshot finishedDocs = do
loadDocs <-
decideOnSnapshotUpdate sfi >>= \case
Nothing -> return $ pure ()
Just (snapshotId, snapshotFile) ->
updateSnapshot
corePackageInfoGetters
snapshotId
sfiSnapName
sfiUpdatedOn
snapshotFile
report <- scReportProgress <$> ask
when report $
unlessM (readIORef finishedDocs) $
logSticky "Still loading the docs for previous snapshot ..."
pure loadDocs
-- | Updates all packages in the snapshot. If any missing they will be created. Returns an action
-- that will check for available documentation for modules that are known to exist and mark as
-- documented when haddock is present on AWS S3. Only after documentation has been checked this
-- snapshot will be marked as completely updated. This is required in case something goes wrong and
-- process is interrupted
updateSnapshot ::
Map CompilerP [CorePackageGetter]
-> SnapshotId
-> SnapName
-> UTCTime
-> SnapshotFile
-> RIO StackageCron (ResourceT (RIO StackageCron) ())
updateSnapshot corePackageGetters snapshotId snapName updatedOn SnapshotFile {..} = do
insertSnapshotName snapshotId snapName
loadedPackageCountRef <- newIORef (0 :: Int)
let totalPackages = length sfPackages
addPantryPackageWithReport pp = do
let PantryCabal {pcPackageName} = ppPantryCabal pp
isHidden = fromMaybe False (Map.lookup pcPackageName sfHidden)
flags = fromMaybe Map.empty $ Map.lookup pcPackageName sfFlags
curSucc <- addPantryPackage snapshotId sfCompiler isHidden flags pp
atomicModifyIORef' loadedPackageCountRef (\c -> (c + 1, ()))
pure curSucc
-- Leave some cores and db connections for the doc loader
n <- max 1 . (`div` 2) <$> getNumCapabilities
before <- getCurrentTime
report <- scReportProgress <$> ask
pantryUpdatesSucceeded <-
runConcurrently
(Concurrently
(when report (runProgressReporter loadedPackageCountRef totalPackages snapName)) *>
Concurrently (pooledMapConcurrentlyN n addPantryPackageWithReport sfPackages))
after <- getCurrentTime
let timeTotal = round (diffUTCTime after before)
(mins, secs) = timeTotal `quotRem` (60 :: Int)
packagePerSecond = fromIntegral ((totalPackages * 100) `div` timeTotal) / 100 :: Float
allPantryUpdatesSucceeded = and pantryUpdatesSucceeded
logInfo $
mconcat
[ "Loading snapshot '"
, display snapName
, "' was done (in "
, displayShow mins
, "min "
, displayShow secs
, "sec). With average "
, displayShow packagePerSecond
, " packages/sec. There are still docs."
]
case Map.lookup sfCompiler corePackageGetters of
Nothing -> logError $ "Hints are not found for the compiler: " <> display sfCompiler
Just _
| not allPantryUpdatesSucceeded ->
logWarn $
mconcat
[ "There was an issue loading a snapshot '"
, display snapName
, "', deferring addition of packages "
, "from global-hints until next time."
]
Just compilerCorePackages ->
forM_ compilerCorePackages $ \getCorePackageInfo -> do
(mTree, mhcid, pid, gpd) <- getCorePackageInfo
run $ addSnapshotPackage snapshotId sfCompiler Core mTree mhcid False mempty pid gpd
return $ do
checkForDocsSucceeded <-
tryAny (checkForDocs snapshotId snapName) >>= \case
Left exc -> do
logError $ "Received exception while getting the docs: " <> displayShow exc
return False
Right () -> return True
if allPantryUpdatesSucceeded &&
checkForDocsSucceeded && Map.member sfCompiler corePackageGetters
then do
lift $ snapshotMarkUpdated snapshotId updatedOn
logInfo $ "Created or updated snapshot '" <> display snapName <> "' successfully"
else logError $ "There were errors while adding snapshot '" <> display snapName <> "'"
-- | Report how many packages has been loaded so far.
runProgressReporter :: IORef Int -> Int -> SnapName -> RIO StackageCron ()
runProgressReporter loadedPackageCountRef totalPackages snapName = do
let reportProgress = do
loadedPackageCount <- readIORef loadedPackageCountRef
when (loadedPackageCount < totalPackages) $ do
logSticky $
mconcat
[ "Loading snapshot '"
, display snapName
, "' ("
, displayShow loadedPackageCount
, "/"
, displayShow totalPackages
, ")"
]
threadDelay 1000000
reportProgress
reportProgress
-- | Uploads a json file to S3 with all latest snapshots per major lts version and one nightly.
uploadSnapshotsJSON :: RIO StackageCron ()
uploadSnapshotsJSON = do
snapshots <- snapshotsJSON
uploadBucket <- scUploadBucketName <$> ask
let key = ObjectKey "snapshots.json"
uploadFromRIO key $
set poACL (Just OPublicRead) $
set poContentType (Just "application/json") $
putObject (BucketName uploadBucket) key (toBody snapshots)
-- | Writes a gzipped version of hoogle db into temporary file onto the file system and then uploads
-- it to S3. Temporary file is removed upon completion
uploadHoogleDB :: FilePath -> ObjectKey -> RIO StackageCron ()
uploadHoogleDB fp key =
withTempFile (takeDirectory fp) (takeFileName fp <.> "gz") $ \fpgz h -> do
runConduitRes $ sourceFile fp .| compress 9 (WindowBits 31) .| CB.sinkHandle h
hClose h
body <- chunkedFile defaultChunkSize fpgz
uploadBucket <- scUploadBucketName <$> ask
uploadFromRIO key $
set poACL (Just OPublicRead) $ putObject (BucketName uploadBucket) key body
uploadFromRIO :: AWSRequest a => ObjectKey -> a -> RIO StackageCron ()
uploadFromRIO key po = do
logInfo $ "Uploading " <> displayShow key <> " to S3 bucket."
env <- ask
eres <- runResourceT $ runAWS env $ trying _Error $ send po
case eres of
Left e ->
logError $ "Couldn't upload " <> displayShow key <> " to S3 becuase " <> displayShow e
Right _ -> logInfo $ "Successfully uploaded " <> displayShow key <> " to S3"
buildAndUploadHoogleDB :: RIO StackageCron ()
buildAndUploadHoogleDB = do
snapshots <- lastLtsNightly 50 5
env <- ask
locker <- newHoogleLocker (env ^. logFuncL) (env ^. envManager)
void $ flip Map.traverseWithKey snapshots $ \snapshotId snapName -> do
logDebug $ "Starting Hoogle DB download: " <> display (hoogleKey snapName)
mfp <- singleRun locker snapName
case mfp of
Just _ -> logDebug $ "Hoogle database exists for: " <> display snapName
Nothing -> do
mfp' <- createHoogleDB snapshotId snapName
forM_ mfp' $ \fp -> do
let key = hoogleKey snapName
uploadHoogleDB fp (ObjectKey key)
let dest = T.unpack key
createDirectoryIfMissing True $ takeDirectory dest
renamePath fp dest
createHoogleDB :: SnapshotId -> SnapName -> RIO StackageCron (Maybe FilePath)
createHoogleDB snapshotId snapName =
handleAny logException $ do
logInfo $ "Creating Hoogle DB for " <> display snapName
downloadBucket <- scDownloadBucketName <$> ask
let root = "hoogle-gen"
bindir = root </> "bindir"
outname = root </> "output.hoo"
tarKey = toPathPiece snapName <> "/hoogle/orig.tar"
tarUrl = "https://s3.amazonaws.com/" <> downloadBucket <> "/" <> tarKey
tarFP = root </> T.unpack tarKey
req <- parseRequest $ T.unpack tarUrl
man <- view envManager
unlessM (doesFileExist tarFP) $
withResponseUnliftIO req {decompress = const True} man $ \res -> do
throwErrorStatusCodes req res
createDirectoryIfMissing True $ takeDirectory tarFP
--withBinaryFileDurableAtomic tarFP WriteMode $ \tarHandle ->
--FIXME: https://github.com/commercialhaskell/rio/issues/160
let tmpTarFP = tarFP <.> "tmp"
withBinaryFile tmpTarFP WriteMode $ \tarHandle ->
runConduitRes $ bodyReaderSource (responseBody res) .| sinkHandle tarHandle
renameFile tmpTarFP tarFP
void $ tryIO $ removeDirectoryRecursive bindir
void $ tryIO $ removeFile outname
createDirectoryIfMissing True bindir
withSystemTempDirectory ("hoogle-" ++ T.unpack (textDisplay snapName)) $ \tmpdir -> do
Any hasRestored <-
runConduitRes $
sourceFile tarFP .| untar (restoreHoogleTxtFileWithCabal tmpdir snapshotId snapName) .|
foldC
unless hasRestored $ error "No Hoogle .txt files found"
let args = ["generate", "--database=" ++ outname, "--local=" ++ tmpdir]
logInfo $
mconcat
[ "Merging databases... ("
, foldMap fromString $ L.intersperse " " ("hoogle" : args)
, ")"
]
liftIO $ Hoogle.hoogle args
logInfo "Merge done"
return $ Just outname
where
logException exc =
logError ("Problem creating hoogle db for " <> display snapName <> ": " <> displayShow exc) $>
Nothing
restoreHoogleTxtFileWithCabal ::
FilePath
-> SnapshotId
-> SnapName
-> FileInfo
-> ConduitM ByteString Any (ResourceT (RIO StackageCron)) ()
restoreHoogleTxtFileWithCabal tmpdir snapshotId snapName fileInfo =
case fileType fileInfo of
FTNormal -> do
let txtFileName = T.decodeUtf8With T.lenientDecode $ filePath fileInfo
txtPackageName = T.takeWhile (/= '.') txtFileName
mpkg = fromPathPiece txtPackageName
maybe (pure Nothing) (lift . lift . getSnapshotPackageCabalBlob snapshotId) mpkg >>= \case
Nothing -> do
logWarn $
"Unexpected hoogle filename: " <> display txtFileName <>
" in orig.tar for snapshot: " <>
display snapName
yield $ Any False
Just cabal -> do
writeFileBinary (tmpdir </> T.unpack txtPackageName <.> "cabal") cabal
sinkFile (tmpdir </> T.unpack txtFileName)
yield $ Any True
_ -> yield $ Any False
pathToPackageModule :: Text -> Maybe (PackageIdentifierP, ModuleNameP)
pathToPackageModule txt =
case T.split (== '/') txt of
[pkgIdentifier, moduleNameDashes] -> do
modName :: ModuleNameP <- fromPathPiece moduleNameDashes
pkgId :: PackageIdentifierP <- fromPathPiece pkgIdentifier
Just (pkgId, modName)
_ -> Nothing