Add download-bucket-url option

This commit is contained in:
Bryan Richter 2024-01-08 14:26:38 +02:00
parent 2939d98b9f
commit 6b4232b1c6
No known key found for this signature in database
GPG Key ID: B202264020068BFB
8 changed files with 62 additions and 38 deletions

View File

@ -38,17 +38,24 @@ optsParser =
\their yaml files from stackage-snapshots repo have been updated or not.") <*> \their yaml files from stackage-snapshots repo have been updated or not.") <*>
option option
readText readText
(long "download-bucket" <> value haddockBucketName <> metavar "DOWNLOAD_BUCKET" <> (long "download-bucket" <> value defHaddockBucketName <> metavar "DOWNLOAD_BUCKET" <>
help help
("S3 Bucket name where things like haddock and current hoogle files should \ ("S3 Bucket name where things like haddock and current hoogle files should \
\be downloaded from. Default is: " <> \be downloaded from. Used in S3 API read operations. Default is: " <>
T.unpack haddockBucketName)) <*> T.unpack defHaddockBucketName)) <*>
option option
readText readText
(long "upload-bucket" <> value haddockBucketName <> metavar "UPLOAD_BUCKET" <> (long "download-bucket-url" <> value defHaddockBucketUrl <> metavar "DOWNLOAD_BUCKET_URL" <>
help
("Publicly accessible URL where the download bucket can be accessed. Used for \
\serving the Haddocks on the website. Default is: " <>
T.unpack defHaddockBucketUrl)) <*>
option
readText
(long "upload-bucket" <> value defHaddockBucketName <> metavar "UPLOAD_BUCKET" <>
help help
("S3 Bucket where hoogle db and snapshots.json file will be uploaded to. Default is: " <> ("S3 Bucket where hoogle db and snapshots.json file will be uploaded to. Default is: " <>
T.unpack haddockBucketName)) <*> T.unpack defHaddockBucketName)) <*>
switch switch
(long "do-not-upload" <> (long "do-not-upload" <>
help "Stop from hoogle db and snapshots.json from being generated and uploaded") <*> help "Stop from hoogle db and snapshots.json from being generated and uploaded") <*>

View File

@ -24,3 +24,6 @@ force-ssl: false
postgres-string: "_env:PGSTRING:host=localhost port=5432 user=stackage dbname=stackage password=stackage" postgres-string: "_env:PGSTRING:host=localhost port=5432 user=stackage dbname=stackage password=stackage"
postgres-poolsize: "_env:PGPOOLSIZE:8" postgres-poolsize: "_env:PGPOOLSIZE:8"
# Publicly-accessible URL for the bucket holding Haddock contents.
download-bucket-url: "_env:DOWNLOAD_BUCKET_URL:https://s3.amazonaws.com/haddock.stackage.org"

View File

@ -157,7 +157,7 @@ withFoundation appLogFunc appSettings inner = do
runRIO appLogFunc $ RIO.logError $ "Couldn't get Stack matcher: " <> displayShow e runRIO appLogFunc $ RIO.logError $ "Couldn't get Stack matcher: " <> displayShow e
pure oldMatcher pure oldMatcher
appMirrorStatus <- mkUpdateMirrorStatus appMirrorStatus <- mkUpdateMirrorStatus
hoogleLocker <- newHoogleLocker appLogFunc appHttpManager hoogleLocker <- newHoogleLocker appLogFunc appHttpManager (appDownloadBucketUrl appSettings)
let appGetHoogleDB = singleRun hoogleLocker let appGetHoogleDB = singleRun hoogleLocker
let appGitRev = $$tGitRev let appGitRev = $$tGitRev
runConcurrently $ runContentUpdates *> Concurrently (inner App {..}) runConcurrently $ runContentUpdates *> Concurrently (inner App {..})

View File

@ -8,13 +8,14 @@ import Import
import qualified Data.Text as T (takeEnd) import qualified Data.Text as T (takeEnd)
import Stackage.Database import Stackage.Database
makeURL :: SnapName -> [Text] -> Text makeURL :: SnapName -> [Text] -> Handler Text
makeURL snapName rest = concat makeURL snapName rest = do
$ "https://s3.amazonaws.com/" bucketUrl <- getsYesod (appDownloadBucketUrl . appSettings)
: haddockBucketName pure . concat
: "/" $ bucketUrl
: toPathPiece snapName : "/"
: map (cons '/') rest : toPathPiece snapName
: map (cons '/') rest
shouldRedirect :: Bool shouldRedirect :: Bool
shouldRedirect = False shouldRedirect = False
@ -27,7 +28,7 @@ getHaddockR snapName rest
result <- redirectWithVersion snapName rest result <- redirectWithVersion snapName rest
case result of case result of
Just route -> redirect route Just route -> redirect route
Nothing -> redirect $ makeURL snapName rest Nothing -> redirect =<< makeURL snapName rest
| Just docType <- mdocType = do | Just docType <- mdocType = do
cacheSeconds $ 60 * 60 * 24 * 7 cacheSeconds $ 60 * 60 * 24 * 7
result <- redirectWithVersion snapName rest result <- redirectWithVersion snapName rest
@ -41,7 +42,7 @@ getHaddockR snapName rest
return ("text/html; charset=utf-8", mstyle /= Just "stackage") return ("text/html; charset=utf-8", mstyle /= Just "stackage")
DocJson -> DocJson ->
return ("application/jsontml; charset=utf-8", True) return ("application/jsontml; charset=utf-8", True)
req <- parseRequest $ unpack $ makeURL snapName rest req <- parseRequest =<< unpack <$> makeURL snapName rest
man <- getHttpManager <$> getYesod man <- getHttpManager <$> getYesod
(_, res) <- runReaderT (acquireResponse req >>= allocateAcquire) man (_, res) <- runReaderT (acquireResponse req >>= allocateAcquire) man
if plain if plain
@ -54,7 +55,7 @@ getHaddockR snapName rest
peekC >>= maybe (return ()) (const $ yield $ encodeUtf8 extra) peekC >>= maybe (return ()) (const $ yield $ encodeUtf8 extra)
mapC id) .| mapC id) .|
mapC (Chunk . toBuilder) mapC (Chunk . toBuilder)
| otherwise = redirect $ makeURL snapName rest | otherwise = redirect =<< makeURL snapName rest
where where
mdocType = mdocType =
case T.takeEnd 5 <$> headMay (reverse rest) of case T.takeEnd 5 <$> headMay (reverse rest) of
@ -141,6 +142,9 @@ getHaddockBackupR (snap':rest)
| Just branch <- fromPathPiece snap' = track "Handler.Haddock.getHaddockBackupR" $ do | Just branch <- fromPathPiece snap' = track "Handler.Haddock.getHaddockBackupR" $ do
snapName <- newestSnapshot branch >>= maybe notFound pure snapName <- newestSnapshot branch >>= maybe notFound pure
redirect $ HaddockR snapName rest redirect $ HaddockR snapName rest
getHaddockBackupR rest = track "Handler.Haddock.getHaddockBackupR" $ redirect $ concat getHaddockBackupR rest = track "Handler.Haddock.getHaddockBackupR" $ do
$ "https://s3.amazonaws.com/haddock.stackage.org" bucketUrl <- getsYesod (appDownloadBucketUrl . appSettings)
: map (cons '/') rest redirect
$ concat
$ bucketUrl
: map (cons '/') rest

View File

@ -2,13 +2,12 @@
module Handler.StackageIndex where module Handler.StackageIndex where
import Import import Import
import Stackage.Database.Types (haddockBucketName)
getStackageIndexR :: SnapName -> Handler TypedContent getStackageIndexR :: SnapName -> Handler TypedContent
getStackageIndexR slug = getStackageIndexR slug = do
bucketUrl <- getsYesod (appDownloadBucketUrl . appSettings)
redirect $ concat redirect $ concat
[ "https://s3.amazonaws.com/" [ bucketUrl
, haddockBucketName
, "/package-index/" , "/package-index/"
, toPathPiece slug , toPathPiece slug
, ".tar.gz" , ".tar.gz"

View File

@ -56,6 +56,8 @@ data AppSettings = AppSettings
-- ^ Force redirect to SSL -- ^ Force redirect to SSL
, appDevDownload :: Bool , appDevDownload :: Bool
-- ^ Controls how Git and database resources are downloaded (True means less downloading) -- ^ Controls how Git and database resources are downloaded (True means less downloading)
, appDownloadBucketUrl :: Text
-- ^ Publicly-accessible URL for the bucket holding Haddock contents.
} }
data DatabaseSettings data DatabaseSettings
@ -109,6 +111,7 @@ instance FromJSON AppSettings where
appSkipCombining <- o .:? "skip-combining" .!= dev appSkipCombining <- o .:? "skip-combining" .!= dev
appForceSsl <- o .:? "force-ssl" .!= not dev appForceSsl <- o .:? "force-ssl" .!= not dev
appDevDownload <- o .:? "dev-download" .!= dev appDevDownload <- o .:? "dev-download" .!= dev
appDownloadBucketUrl <- o .:? "download-bucket-url" .!= "https://s3.amazonaws.com/haddock.stackage.org"
return AppSettings {..} return AppSettings {..}

View File

@ -10,7 +10,8 @@ module Stackage.Database.Cron
, newHoogleLocker , newHoogleLocker
, singleRun , singleRun
, StackageCronOptions(..) , StackageCronOptions(..)
, haddockBucketName , defHaddockBucketName
, defHaddockBucketUrl
) where ) where
import Conduit import Conduit
@ -74,10 +75,9 @@ hoogleKey name = T.concat
, ".hoo" , ".hoo"
] ]
hoogleUrl :: SnapName -> Text hoogleUrl :: SnapName -> Text -> Text
hoogleUrl n = T.concat hoogleUrl n haddockBucketUrl = T.concat
[ "https://s3.amazonaws.com/" [ haddockBucketUrl
, haddockBucketName
, "/" , "/"
, hoogleKey n , hoogleKey n
] ]
@ -101,8 +101,8 @@ withResponseUnliftIO :: MonadUnliftIO m => Request -> Manager -> (Response BodyR
withResponseUnliftIO req man f = withRunInIO $ \ runInIO -> withResponse req man (runInIO . f) withResponseUnliftIO req man f = withRunInIO $ \ runInIO -> withResponse req man (runInIO . f)
newHoogleLocker :: newHoogleLocker ::
(HasLogFunc env, MonadIO m) => env -> Manager -> m (SingleRun SnapName (Maybe FilePath)) (HasLogFunc env, MonadIO m) => env -> Manager -> Text -> m (SingleRun SnapName (Maybe FilePath))
newHoogleLocker env man = mkSingleRun hoogleLocker newHoogleLocker env man bucketUrl = mkSingleRun hoogleLocker
where where
hoogleLocker :: MonadIO n => SnapName -> n (Maybe FilePath) hoogleLocker :: MonadIO n => SnapName -> n (Maybe FilePath)
hoogleLocker name = hoogleLocker name =
@ -112,7 +112,7 @@ newHoogleLocker env man = mkSingleRun hoogleLocker
if exists if exists
then return $ Just fp then return $ Just fp
else do else do
req' <- parseRequest $ T.unpack $ hoogleUrl name req' <- parseRequest $ T.unpack $ hoogleUrl name bucketUrl
let req = req' {decompress = const False} let req = req' {decompress = const False}
withResponseUnliftIO req man $ \res -> withResponseUnliftIO req man $ \res ->
case responseStatus res of case responseStatus res of
@ -125,7 +125,7 @@ newHoogleLocker env man = mkSingleRun hoogleLocker
sinkHandle h sinkHandle h
return $ Just fp return $ Just fp
| status == status404 -> do | status == status404 -> do
logDebug $ "NotFound: " <> display (hoogleUrl name) logDebug $ "NotFound: " <> display (hoogleUrl name bucketUrl)
return Nothing return Nothing
| otherwise -> do | otherwise -> do
body <- liftIO $ brConsume $ responseBody res body <- liftIO $ brConsume $ responseBody res
@ -198,6 +198,7 @@ stackageServerCron StackageCronOptions {..} = do
, scCachedGPD = gpdCache , scCachedGPD = gpdCache
, scEnvAWS = aws , scEnvAWS = aws
, scDownloadBucketName = scoDownloadBucketName , scDownloadBucketName = scoDownloadBucketName
, scDownloadBucketUrl = scoDownloadBucketUrl
, scUploadBucketName = scoUploadBucketName , scUploadBucketName = scoUploadBucketName
, scSnapshotsRepo = scoSnapshotsRepo , scSnapshotsRepo = scoSnapshotsRepo
, scReportProgress = scoReportProgress , scReportProgress = scoReportProgress
@ -700,7 +701,8 @@ buildAndUploadHoogleDB :: Bool -> RIO StackageCron ()
buildAndUploadHoogleDB doNotUpload = do buildAndUploadHoogleDB doNotUpload = do
snapshots <- lastLtsNightlyWithoutHoogleDb 5 5 snapshots <- lastLtsNightlyWithoutHoogleDb 5 5
env <- ask env <- ask
locker <- newHoogleLocker (env ^. logFuncL) (env ^. envManager) bucketUrl <- asks scDownloadBucketUrl
locker <- newHoogleLocker (env ^. logFuncL) (env ^. envManager) bucketUrl
for_ snapshots $ \(snapshotId, snapName) -> for_ snapshots $ \(snapshotId, snapName) ->
unlessM (checkInsertSnapshotHoogleDb False snapshotId) $ do unlessM (checkInsertSnapshotHoogleDb False snapshotId) $ do
logInfo $ "Starting Hoogle database download: " <> display (hoogleKey snapName) logInfo $ "Starting Hoogle database download: " <> display (hoogleKey snapName)
@ -725,12 +727,12 @@ createHoogleDB :: SnapshotId -> SnapName -> RIO StackageCron (Maybe FilePath)
createHoogleDB snapshotId snapName = createHoogleDB snapshotId snapName =
handleAny logException $ do handleAny logException $ do
logInfo $ "Creating Hoogle DB for " <> display snapName logInfo $ "Creating Hoogle DB for " <> display snapName
downloadBucket <- scDownloadBucketName <$> ask downloadBucketUrl <- scDownloadBucketUrl <$> ask
let root = "hoogle-gen" let root = "hoogle-gen"
bindir = root </> "bindir" bindir = root </> "bindir"
outname = root </> "output.hoo" outname = root </> "output.hoo"
tarKey = toPathPiece snapName <> "/hoogle/orig.tar" tarKey = toPathPiece snapName <> "/hoogle/orig.tar"
tarUrl = "https://s3.amazonaws.com/" <> downloadBucket <> "/" <> tarKey tarUrl = downloadBucketUrl <> "/" <> tarKey
tarFP = root </> T.unpack tarKey tarFP = root </> T.unpack tarKey
-- When tarball is downloaded it is saved with durability and atomicity, so if it -- When tarball is downloaded it is saved with durability and atomicity, so if it
-- is present it is not in a corrupted state -- is present it is not in a corrupted state

View File

@ -40,7 +40,8 @@ module Stackage.Database.Types
, Origin(..) , Origin(..)
, LatestInfo(..) , LatestInfo(..)
, Deprecation(..) , Deprecation(..)
, haddockBucketName , defHaddockBucketName
, defHaddockBucketUrl
, Changelog(..) , Changelog(..)
, Readme(..) , Readme(..)
, StackageCronOptions(..) , StackageCronOptions(..)
@ -61,12 +62,16 @@ import Stackage.Database.Schema
import Text.Blaze (ToMarkup(..)) import Text.Blaze (ToMarkup(..))
import Types import Types
haddockBucketName :: Text defHaddockBucketName :: Text
haddockBucketName = "haddock.stackage.org" defHaddockBucketName = "haddock.stackage.org"
defHaddockBucketUrl :: Text
defHaddockBucketUrl = "https://s3.amazonaws.com/" <> defHaddockBucketName
data StackageCronOptions = StackageCronOptions data StackageCronOptions = StackageCronOptions
{ scoForceUpdate :: !Bool { scoForceUpdate :: !Bool
, scoDownloadBucketName :: !Text , scoDownloadBucketName :: !Text
, scoDownloadBucketUrl :: !Text
, scoUploadBucketName :: !Text , scoUploadBucketName :: !Text
, scoDoNotUpload :: !Bool , scoDoNotUpload :: !Bool
, scoLogLevel :: !LogLevel , scoLogLevel :: !LogLevel
@ -84,6 +89,7 @@ data StackageCron = StackageCron
, scCachedGPD :: !(IORef (IntMap GenericPackageDescription)) , scCachedGPD :: !(IORef (IntMap GenericPackageDescription))
, scEnvAWS :: !Env , scEnvAWS :: !Env
, scDownloadBucketName :: !Text , scDownloadBucketName :: !Text
, scDownloadBucketUrl :: !Text
, scUploadBucketName :: !Text , scUploadBucketName :: !Text
, scSnapshotsRepo :: !GithubRepo , scSnapshotsRepo :: !GithubRepo
, scReportProgress :: !Bool , scReportProgress :: !Bool