Merge pull request #324 from chreekat/b/handover-patches

Handover patches
This commit is contained in:
Michael Snoyman 2024-04-03 18:10:58 +03:00 committed by GitHub
commit 6ff1ee7d15
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
23 changed files with 269 additions and 116 deletions

View File

@ -1,6 +1,7 @@
The MIT License (MIT)
Copyright (c) 2014-2017 FP Complete
Copyright (c) 2024 Haskell Foundation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal

View File

@ -38,17 +38,24 @@ optsParser =
\their yaml files from stackage-snapshots repo have been updated or not.") <*>
option
readText
(long "download-bucket" <> value haddockBucketName <> metavar "DOWNLOAD_BUCKET" <>
(long "download-bucket" <> value defHaddockBucketName <> metavar "DOWNLOAD_BUCKET" <>
help
("S3 Bucket name where things like haddock and current hoogle files should \
\be downloaded from. Default is: " <>
T.unpack haddockBucketName)) <*>
\be downloaded from. Used in S3 API read operations. Default is: " <>
T.unpack defHaddockBucketName)) <*>
option
readText
(long "upload-bucket" <> value haddockBucketName <> metavar "UPLOAD_BUCKET" <>
(long "download-bucket-url" <> value defHaddockBucketUrl <> metavar "DOWNLOAD_BUCKET_URL" <>
help
("Publicly accessible URL where the download bucket can be accessed. Used for \
\serving the Haddocks on the website. Default is: " <>
T.unpack defHaddockBucketUrl)) <*>
option
readText
(long "upload-bucket" <> value defHaddockBucketName <> metavar "UPLOAD_BUCKET" <>
help
("S3 Bucket where hoogle db and snapshots.json file will be uploaded to. Default is: " <>
T.unpack haddockBucketName)) <*>
T.unpack defHaddockBucketName)) <*>
switch
(long "do-not-upload" <>
help "Stop from hoogle db and snapshots.json from being generated and uploaded") <*>

View File

@ -24,3 +24,6 @@ force-ssl: false
postgres-string: "_env:PGSTRING:host=localhost port=5432 user=stackage dbname=stackage password=stackage"
postgres-poolsize: "_env:PGPOOLSIZE:8"
# Publicly-accessible URL for the bucket holding Haddock contents.
download-bucket-url: "_env:DOWNLOAD_BUCKET_URL:https://s3.amazonaws.com/haddock.stackage.org"

View File

@ -157,7 +157,7 @@ withFoundation appLogFunc appSettings inner = do
runRIO appLogFunc $ RIO.logError $ "Couldn't get Stack matcher: " <> displayShow e
pure oldMatcher
appMirrorStatus <- mkUpdateMirrorStatus
hoogleLocker <- newHoogleLocker appLogFunc appHttpManager
hoogleLocker <- newHoogleLocker appLogFunc appHttpManager (appDownloadBucketUrl appSettings)
let appGetHoogleDB = singleRun hoogleLocker
let appGitRev = $$tGitRev
runConcurrently $ runContentUpdates *> Concurrently (inner App {..})

View File

@ -84,6 +84,12 @@ singleRun sr@(SingleRun var f) k =
-- OK, we're done running, so let other
-- threads run this again.
-- NB: as soon as we've modified the MVar, the next
-- call to singleRun will think no thread is working and
-- start over. Anything waiting on us will get our
-- result, but nobody else will. That's ok: singleRun
-- just provides a little caching on top of a mutex.
modifyMVar_ var $ return . filter (\(k', _) -> k /= k')
case eres of

View File

@ -2,6 +2,7 @@
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE TypeOperators #-}
module Data.WebsiteContent
( WebsiteContent (..)
, StackRelease (..)

View File

@ -11,6 +11,7 @@ import Data.Conduit.Attoparsec (sinkParser)
import Data.WebsiteContent
import Import
import Yesod.GitRepo
import qualified Data.Aeson.KeyMap as Aeson
getDownloadStackListR :: Handler Html
getDownloadStackListR = track "Handler.DownloadStack.getDownloadStackListR" $ do
@ -35,14 +36,14 @@ getLatestMatcher man = do
return $ \pattern' -> do
let pattern'' = pattern' ++ "."
Object top <- return val
Array assets <- lookup "assets" top
Array assets <- Aeson.lookup "assets" top
headMay $ preferZip $ catMaybes $ map (findMatch pattern'') assets
where
findMatch pattern' (Object o) = do
String name <- lookup "name" o
String name <- Aeson.lookup "name" o
guard $ not $ ".asc" `isSuffixOf` name
guard $ pattern' `isInfixOf` name
String url <- lookup "browser_download_url" o
String url <- Aeson.lookup "browser_download_url" o
Just url
findMatch _ _ = Nothing

View File

@ -8,13 +8,14 @@ import Import
import qualified Data.Text as T (takeEnd)
import Stackage.Database
makeURL :: SnapName -> [Text] -> Text
makeURL snapName rest = concat
$ "https://s3.amazonaws.com/"
: haddockBucketName
: "/"
: toPathPiece snapName
: map (cons '/') rest
makeURL :: SnapName -> [Text] -> Handler Text
makeURL snapName rest = do
bucketUrl <- getsYesod (appDownloadBucketUrl . appSettings)
pure . concat
$ bucketUrl
: "/"
: toPathPiece snapName
: map (cons '/') rest
shouldRedirect :: Bool
shouldRedirect = False
@ -27,7 +28,7 @@ getHaddockR snapName rest
result <- redirectWithVersion snapName rest
case result of
Just route -> redirect route
Nothing -> redirect $ makeURL snapName rest
Nothing -> redirect =<< makeURL snapName rest
| Just docType <- mdocType = do
cacheSeconds $ 60 * 60 * 24 * 7
result <- redirectWithVersion snapName rest
@ -41,7 +42,7 @@ getHaddockR snapName rest
return ("text/html; charset=utf-8", mstyle /= Just "stackage")
DocJson ->
return ("application/jsontml; charset=utf-8", True)
req <- parseRequest $ unpack $ makeURL snapName rest
req <- parseRequest =<< unpack <$> makeURL snapName rest
man <- getHttpManager <$> getYesod
(_, res) <- runReaderT (acquireResponse req >>= allocateAcquire) man
if plain
@ -54,7 +55,7 @@ getHaddockR snapName rest
peekC >>= maybe (return ()) (const $ yield $ encodeUtf8 extra)
mapC id) .|
mapC (Chunk . toBuilder)
| otherwise = redirect $ makeURL snapName rest
| otherwise = redirect =<< makeURL snapName rest
where
mdocType =
case T.takeEnd 5 <$> headMay (reverse rest) of
@ -141,6 +142,9 @@ getHaddockBackupR (snap':rest)
| Just branch <- fromPathPiece snap' = track "Handler.Haddock.getHaddockBackupR" $ do
snapName <- newestSnapshot branch >>= maybe notFound pure
redirect $ HaddockR snapName rest
getHaddockBackupR rest = track "Handler.Haddock.getHaddockBackupR" $ redirect $ concat
$ "https://s3.amazonaws.com/haddock.stackage.org"
: map (cons '/') rest
getHaddockBackupR rest = track "Handler.Haddock.getHaddockBackupR" $ do
bucketUrl <- getsYesod (appDownloadBucketUrl . appSettings)
redirect
$ concat
$ bucketUrl
: map (cons '/') rest

View File

@ -12,6 +12,8 @@ import RIO.Time (diffUTCTime, addUTCTime, getCurrentTime)
import Text.XML.Stream.Parse
import Data.XML.Types (Event (EventContent), Content (ContentText))
import qualified Prelude
import qualified Data.Aeson.Key as Aeson
import qualified Data.Aeson.KeyMap as Aeson
getMirrorStatusR :: Handler Html
getMirrorStatusR = do
@ -148,7 +150,7 @@ getLastModifiedGit org repo ref = do
lookupJ :: MonadThrow m => Text -> Value -> m Value
lookupJ key (Object o) =
case lookup key o of
case Aeson.lookup (Aeson.fromText key) o of
Nothing -> error $ "Key not found: " ++ show key
Just x -> return x
lookupJ key val = error $ concat

View File

@ -2,13 +2,12 @@
module Handler.StackageIndex where
import Import
import Stackage.Database.Types (haddockBucketName)
getStackageIndexR :: SnapName -> Handler TypedContent
getStackageIndexR slug =
getStackageIndexR slug = do
bucketUrl <- getsYesod (appDownloadBucketUrl . appSettings)
redirect $ concat
[ "https://s3.amazonaws.com/"
, haddockBucketName
[ bucketUrl
, "/package-index/"
, toPathPiece slug
, ".tar.gz"

View File

@ -56,6 +56,8 @@ data AppSettings = AppSettings
-- ^ Force redirect to SSL
, appDevDownload :: Bool
-- ^ Controls how Git and database resources are downloaded (True means less downloading)
, appDownloadBucketUrl :: Text
-- ^ Publicly-accessible URL for the bucket holding Haddock contents.
}
data DatabaseSettings
@ -109,6 +111,7 @@ instance FromJSON AppSettings where
appSkipCombining <- o .:? "skip-combining" .!= dev
appForceSsl <- o .:? "force-ssl" .!= not dev
appDevDownload <- o .:? "dev-download" .!= dev
appDownloadBucketUrl <- o .:? "download-bucket-url" .!= "https://s3.amazonaws.com/haddock.stackage.org"
return AppSettings {..}

View File

@ -5,18 +5,20 @@
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TupleSections #-}
{-# LANGUAGE FlexibleContexts #-}
module Stackage.Database.Cron
( stackageServerCron
, newHoogleLocker
, singleRun
, StackageCronOptions(..)
, haddockBucketName
, defHaddockBucketName
, defHaddockBucketUrl
) where
import Conduit
import Control.DeepSeq
import qualified Control.Monad.Trans.AWS as AWS (paginate)
import Control.SingleRun
import Control.Lens ((?~))
import qualified Data.ByteString.Char8 as BS8
import qualified Data.Conduit.Binary as CB
import Data.Conduit.Tar (FileInfo(..), FileType(..), untar)
@ -28,9 +30,12 @@ import Data.Yaml (decodeFileEither)
import Database.Persist hiding (exists)
import Database.Persist.Postgresql hiding (exists)
import qualified Hoogle
import Network.AWS hiding (Request, Response)
import Network.AWS.Data.Text (toText)
import Network.AWS.S3
import Amazonka hiding (Request, length, error)
import Amazonka.Data.Text (toText)
import Amazonka.S3
import Amazonka.S3.ListObjectsV2
import Amazonka.S3.Lens
import Amazonka.Lens
import Network.HTTP.Client
import Network.HTTP.Client.Conduit (bodyReaderSource)
import Network.HTTP.Simple (getResponseBody, httpJSONEither)
@ -59,6 +64,7 @@ import Stackage.Database.PackageInfo
import Stackage.Database.Query
import Stackage.Database.Schema
import Stackage.Database.Types
import System.Environment (getEnvironment)
import UnliftIO.Concurrent (getNumCapabilities)
import Web.PathPieces (fromPathPiece, toPathPiece)
import qualified Control.Retry as Retry
@ -73,10 +79,9 @@ hoogleKey name = T.concat
, ".hoo"
]
hoogleUrl :: SnapName -> Text
hoogleUrl n = T.concat
[ "https://s3.amazonaws.com/"
, haddockBucketName
hoogleUrl :: SnapName -> Text -> Text
hoogleUrl n haddockBucketUrl = T.concat
[ haddockBucketUrl
, "/"
, hoogleKey n
]
@ -99,9 +104,14 @@ getStackageSnapshotsDir = do
withResponseUnliftIO :: MonadUnliftIO m => Request -> Manager -> (Response BodyReader -> m b) -> m b
withResponseUnliftIO req man f = withRunInIO $ \ runInIO -> withResponse req man (runInIO . f)
-- | Under the SingleRun wrapper that ensures only one thing at a time is
-- writing the file in question, ensure that a Hoogle database exists on the
-- filesystem for the given SnapName. But only going so far as downloading it
-- from the haddock bucket. See 'createHoogleDB' for the function that puts it
-- there in the first place.
newHoogleLocker ::
(HasLogFunc env, MonadIO m) => env -> Manager -> m (SingleRun SnapName (Maybe FilePath))
newHoogleLocker env man = mkSingleRun hoogleLocker
(HasLogFunc env, MonadIO m) => env -> Manager -> Text -> m (SingleRun SnapName (Maybe FilePath))
newHoogleLocker env man bucketUrl = mkSingleRun hoogleLocker
where
hoogleLocker :: MonadIO n => SnapName -> n (Maybe FilePath)
hoogleLocker name =
@ -111,7 +121,7 @@ newHoogleLocker env man = mkSingleRun hoogleLocker
if exists
then return $ Just fp
else do
req' <- parseRequest $ T.unpack $ hoogleUrl name
req' <- parseRequest $ T.unpack $ hoogleUrl name bucketUrl
let req = req' {decompress = const False}
withResponseUnliftIO req man $ \res ->
case responseStatus res of
@ -124,11 +134,12 @@ newHoogleLocker env man = mkSingleRun hoogleLocker
sinkHandle h
return $ Just fp
| status == status404 -> do
logDebug $ "NotFound: " <> display (hoogleUrl name)
logWarn $ "NotFound: " <> display (hoogleUrl name bucketUrl)
return Nothing
| otherwise -> do
body <- liftIO $ brConsume $ responseBody res
mapM_ (logDebug . displayBytesUtf8) body
logWarn $ "Unexpected status: " <> displayShow status
mapM_ (logWarn . displayBytesUtf8) body
return Nothing
getHackageDeprecations ::
@ -163,7 +174,12 @@ stackageServerCron StackageCronOptions {..} = do
cabalMutable <- newIORef Map.empty
gpdCache <- newIORef IntMap.empty
defaultProcessContext <- mkDefaultProcessContext
aws <- newEnv Discover
aws <- do
aws' <- newEnv discover
endpoint <- lookup "AWS_S3_ENDPOINT" <$> getEnvironment
pure $ case endpoint of
Nothing -> aws'
Just ep -> configureService (setEndpoint True (BS8.pack ep) 443 Amazonka.S3.defaultService) aws'
withLogFunc (setLogMinLevel scoLogLevel lo) $ \logFunc -> do
let pantryConfig =
PantryConfig
@ -179,8 +195,9 @@ stackageServerCron StackageCronOptions {..} = do
, pcCasaMaxPerRequest = defaultCasaMaxPerRequest
, pcSnapshotLocation = defaultSnapshotLocation
}
currentHoogleVersionId <-
runRIO logFunc $ getCurrentHoogleVersionIdWithPantryConfig pantryConfig
currentHoogleVersionId <- runRIO logFunc $ do
runStackageMigrations' pantryConfig
getCurrentHoogleVersionIdWithPantryConfig pantryConfig
let stackage =
StackageCron
{ scPantryConfig = pantryConfig
@ -191,6 +208,7 @@ stackageServerCron StackageCronOptions {..} = do
, scCachedGPD = gpdCache
, scEnvAWS = aws
, scDownloadBucketName = scoDownloadBucketName
, scDownloadBucketUrl = scoDownloadBucketUrl
, scUploadBucketName = scoUploadBucketName
, scSnapshotsRepo = scoSnapshotsRepo
, scReportProgress = scoReportProgress
@ -218,7 +236,6 @@ runStackageUpdate doNotUpload = do
unless doNotUpload uploadSnapshotsJSON
buildAndUploadHoogleDB doNotUpload
logInfo "Finished building and uploading Hoogle DBs"
run $ rawExecute "TRUNCATE TABLE latest_version" []
-- | This will look at 'global-hints.yaml' and will create core package getters that are reused
@ -386,10 +403,11 @@ addPantryPackage sid compiler isHidden flags (PantryPackage pc treeKey) = do
checkForDocs :: SnapshotId -> SnapName -> ResourceT (RIO StackageCron) ()
checkForDocs snapshotId snapName = do
bucketName <- lift (scDownloadBucketName <$> ask)
env <- asks scEnvAWS
mods <-
runConduit $
AWS.paginate (req bucketName) .| concatMapC (^. lovrsContents) .|
mapC (\obj -> toText (obj ^. oKey)) .|
paginate env (req bucketName) .| concatMapC (fromMaybe [] . (^. listObjectsV2Response_contents)) .|
mapC (\obj -> toText (obj ^. object_key)) .|
concatMapC (T.stripSuffix ".html" >=> T.stripPrefix prefix >=> pathToPackageModule) .|
sinkList
-- it is faster to download all modules in this snapshot, than process them with a conduit all
@ -398,16 +416,16 @@ checkForDocs snapshotId snapName = do
-- Cache is for SnapshotPackageId, there will be many modules per peckage, no need to look into
-- the database for each one of them.
n <- max 1 . (`div` 2) <$> getNumCapabilities
notFoundList <- lift $ pooledMapConcurrentlyN n (markModules sidsCacheRef) mods
forM_ (Set.fromList $ catMaybes notFoundList) $ \pid ->
unexpectedPackages <- lift $ pooledMapConcurrentlyN n (markModules sidsCacheRef) mods
forM_ (Set.fromList $ catMaybes unexpectedPackages) $ \pid ->
lift $
logWarn $
"Documentation available for package '" <> display pid <>
"' but was not found in this snapshot: " <>
"Documentation found for package '" <> display pid <>
"', which does not exist in this snapshot: " <>
display snapName
where
prefix = textDisplay snapName <> "/"
req bucketName = listObjectsV2 (BucketName bucketName) & lovPrefix .~ Just prefix
req bucketName = newListObjectsV2 (BucketName bucketName) & listObjectsV2_prefix ?~ prefix
-- | This function records all package modules that have documentation available, the ones
-- that are not found in the snapshot reported back as an error. Besides being run
-- concurrently this function optimizes the SnapshotPackageId lookup as well, since that can
@ -417,7 +435,7 @@ checkForDocs snapshotId snapName = do
let mSnapshotPackageId = Map.lookup pid sidsCache
mFound <- run $ markModuleHasDocs snapshotId pid mSnapshotPackageId modName
case mFound of
Nothing -> pure $ Just pid
Nothing -> pure $ Just pid -- This package doesn't exist in the snapshot!
Just snapshotPackageId
| Nothing <- mSnapshotPackageId -> do
atomicModifyIORef'
@ -663,9 +681,9 @@ uploadSnapshotsJSON = do
uploadBucket <- scUploadBucketName <$> ask
let key = ObjectKey "snapshots.json"
uploadFromRIO key $
set poACL (Just OPublicRead) $
set poContentType (Just "application/json") $
putObject (BucketName uploadBucket) key (toBody snapshots)
set putObject_acl (Just ObjectCannedACL_Public_read) $
set putObject_contentType (Just "application/json") $
newPutObject (BucketName uploadBucket) key (toBody snapshots)
-- | Writes a gzipped version of hoogle db into temporary file onto the file system and then uploads
-- it to S3. Temporary file is removed upon completion
@ -677,14 +695,14 @@ uploadHoogleDB fp key =
body <- toBody <$> readFileBinary fpgz
uploadBucket <- scUploadBucketName <$> ask
uploadFromRIO key $
set poACL (Just OPublicRead) $ putObject (BucketName uploadBucket) key body
set putObject_acl (Just ObjectCannedACL_Public_read) $ newPutObject (BucketName uploadBucket) key body
uploadFromRIO :: AWSRequest a => ObjectKey -> a -> RIO StackageCron ()
uploadFromRIO :: (AWSRequest a, Typeable a, Typeable (AWSResponse a)) => ObjectKey -> a -> RIO StackageCron ()
uploadFromRIO key po = do
logInfo $ "Uploading " <> displayShow key <> " to S3 bucket."
env <- ask
eres <- runResourceT $ runAWS env $ trying _Error $ send po
env <- asks scEnvAWS
eres <- runResourceT $ trying _Error $ send env po
case eres of
Left e ->
logError $ "Couldn't upload " <> displayShow key <> " to S3 becuase " <> displayShow e
@ -693,18 +711,30 @@ uploadFromRIO key po = do
buildAndUploadHoogleDB :: Bool -> RIO StackageCron ()
buildAndUploadHoogleDB doNotUpload = do
snapshots <- lastLtsNightlyWithoutHoogleDb 5 5
-- currentHoogleVersionId <- scHoogleVersionId <$> ask
env <- ask
locker <- newHoogleLocker (env ^. logFuncL) (env ^. envManager)
awsEnv <- asks scEnvAWS
bucketUrl <- asks scDownloadBucketUrl
-- locker is an action that returns the path to a hoogle db, if one exists
-- in the haddock bucket already.
locker <- newHoogleLocker (env ^. logFuncL) (awsEnv ^. env_manager) bucketUrl
let insertH = checkInsertSnapshotHoogleDb True
checkH = checkInsertSnapshotHoogleDb False
for_ snapshots $ \(snapshotId, snapName) ->
unlessM (checkInsertSnapshotHoogleDb False snapshotId) $ do
-- Even though we just got a list of snapshots that don't have hoogle
-- databases, we check again. For some reason. I don't see how this can
-- actually be useful. both lastLtsNightlyWithoutHoogleDb and
-- checkInsertSnapshotHoogleDb just check against SnapshotHoogleDb.
-- Perhaps the check can be removed.
unlessM (checkH snapshotId) $ do
logInfo $ "Starting Hoogle database download: " <> display (hoogleKey snapName)
mfp <- singleRun locker snapName
case mfp of
Just _ -> do
logInfo $ "Current hoogle database exists for: " <> display snapName
void $ checkInsertSnapshotHoogleDb True snapshotId
void $ insertH snapshotId
Nothing -> do
logInfo $ "Current hoogle database does not yet exist for: " <> display snapName
logInfo $ "Current hoogle database does not yet exist in the bucket for: " <> display snapName
mfp' <- createHoogleDB snapshotId snapName
forM_ mfp' $ \fp -> do
let key = hoogleKey snapName
@ -713,24 +743,27 @@ buildAndUploadHoogleDB doNotUpload = do
renamePath fp dest
unless doNotUpload $ do
uploadHoogleDB dest (ObjectKey key)
void $ checkInsertSnapshotHoogleDb True snapshotId
void $ insertH snapshotId
-- | Create a hoogle db from haddocks for the given snapshot, and upload it to
-- the haddock bucket.
createHoogleDB :: SnapshotId -> SnapName -> RIO StackageCron (Maybe FilePath)
createHoogleDB snapshotId snapName =
handleAny logException $ do
logInfo $ "Creating Hoogle DB for " <> display snapName
downloadBucket <- scDownloadBucketName <$> ask
downloadBucketUrl <- scDownloadBucketUrl <$> ask
let root = "hoogle-gen"
bindir = root </> "bindir"
outname = root </> "output.hoo"
tarKey = toPathPiece snapName <> "/hoogle/orig.tar"
tarUrl = "https://s3.amazonaws.com/" <> downloadBucket <> "/" <> tarKey
tarUrl = downloadBucketUrl <> "/" <> tarKey
tarFP = root </> T.unpack tarKey
-- When tarball is downloaded it is saved with durability and atomicity, so if it
-- is present it is not in a corrupted state
unlessM (doesFileExist tarFP) $ do
req <- parseRequest $ T.unpack tarUrl
man <- view envManager
env <- asks scEnvAWS
let man = env ^. env_manager
withResponseUnliftIO req {decompress = const True} man $ \res -> do
throwErrorStatusCodes req res
createDirectoryIfMissing True $ takeDirectory tarFP

View File

@ -23,7 +23,7 @@ import Distribution.Compiler (CompilerFlavor(GHC))
import Distribution.Package (Dependency(..))
import Distribution.PackageDescription (CondTree(..), Condition(..),
ConfVar(..),
Flag(flagDefault, flagName), FlagName,
PackageFlag(..), flagDefault, flagName, FlagName,
GenericPackageDescription, author,
condExecutables, condLibrary,
description, genPackageFlags, homepage,
@ -152,7 +152,7 @@ getCheckCond compiler overrideFlags gpd = go
where
go (Var (OS os)) = os == Linux -- arbitrary
go (Var (Arch arch)) = arch == X86_64 -- arbitrary
go (Var (Flag flag)) = fromMaybe False $ Map.lookup flag flags
go (Var (PackageFlag flag)) = fromMaybe False $ Map.lookup flag flags
go (Var (Impl flavor range)) = flavor == compilerFlavor && compilerVersion `withinRange` range
go (Lit b) = b
go (CNot c) = not $ go c

View File

@ -167,25 +167,48 @@ ltsBefore x y = do
go (Entity _ lts) = (ltsSnap lts, SNLts (ltsMajor lts) (ltsMinor lts))
-- | Queries the database for the latest LTS and nightly snapshots that do not
-- have corresponding entries in the SnapshotHoogleDb table with the current
-- Hoogle version.
lastLtsNightlyWithoutHoogleDb :: Int -> Int -> RIO StackageCron [(SnapshotId, SnapName)]
lastLtsNightlyWithoutHoogleDb ltsCount nightlyCount = do
currentHoogleVersionId <- scHoogleVersionId <$> ask
let getSnapshotsWithoutHoogeDb snapId snapCount =
map (unValue *** unValue) <$>
select
-- "snap" is either Lts or Nightly, while "snapshot" is indeed
-- "snapshot"
(from $ \(snap `InnerJoin` snapshot) -> do
on $ snap ^. snapId ==. snapshot ^. SnapshotId
where_ $
notExists $
from $ \snapshotHoogleDb ->
where_ $
(snapshotHoogleDb ^. SnapshotHoogleDbSnapshot ==. snapshot ^.
SnapshotId) &&.
(snapshotHoogleDb ^. SnapshotHoogleDbVersion ==.
val currentHoogleVersionId)
(snapshotHoogleDb ^. SnapshotHoogleDbSnapshot
==. snapshot ^. SnapshotId)
&&. (snapshotHoogleDb ^. SnapshotHoogleDbVersion
==. val currentHoogleVersionId)
orderBy [desc (snapshot ^. SnapshotCreated)]
limit $ fromIntegral snapCount
pure (snapshot ^. SnapshotId, snapshot ^. SnapshotName))
-- In sql, this query would be
--
-- select snapshot.id, snapshot.name
-- from snapshot
-- join $foo as snap -- either Lts or Nightly
-- on snap.snap = snapshot.id
-- where not exists (
-- select 1
-- from snapshot_hoogle_db
-- where snapshot_hoogle_db.snapshot = snapshot.id
-- and snapshot_hoogle_db.version = $currentHoogleVersionId
-- )
-- order by snapshot.created desc
-- limit $snapCount
--
-- So it returns a list of snapshots where there is no
-- corresponding entry in the snapshot_hoogle_db table for the
-- current hoogle version.
run $ do
lts <- getSnapshotsWithoutHoogeDb LtsSnap ltsCount
nightly <- getSnapshotsWithoutHoogeDb NightlySnap nightlyCount
@ -206,7 +229,7 @@ snapshotsJSON = do
Just n -> (("nightly" A..= printNightly n) :)
return $ A.object $ nightly lts
where
toObj lts@(major, _) = T.pack ("lts-" <> show major) A..= printLts lts
toObj lts@(major, _) = fromString ("lts-" <> show major) A..= printLts lts
printLts (major, minor) = "lts-" <> show major <> "." <> show minor
printNightly day = "nightly-" <> T.pack (show day)
@ -1159,10 +1182,27 @@ checkInsertSnapshotHoogleDb shouldInsert snapshotId = do
(from
(\v -> do
where_ $ v ^. VersionId ==. val hoogleVersionId
-- This is reaching into the *pantry*
-- database!
pure (v ^. VersionVersion)))
-- in sql, this query would be
--
-- select version.version
-- from version
-- where version.id = $hoogleVersionId
--
-- So it returns the "version"s that corresponds to the
-- current hoogle version id.
-- mhver is now Maybe Version, and corresponds to the current
-- hoogle version, assuming it exists in the Version table
forM_ mhver $ \hver ->
lift $
logInfo $
"Marking hoogle database for version " <> display hver <> " as available."
-- whether or not the version exists, we still put it into snapshot_hoogle_db
-- So literally the only use of the above query is to log the
-- action we're taking.
isJust <$> P.insertUniqueEntity sh
-- if we're not inserting, we're just checking if it already exists
-- in snapshot_hoogle_db.
else isJust <$> P.checkUnique sh

View File

@ -15,6 +15,7 @@
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE UndecidableInstances #-}
{-# LANGUAGE TypeOperators #-}
module Stackage.Database.Schema
( -- * Database
run
@ -23,6 +24,7 @@ module Stackage.Database.Schema
, GetStackageDatabase(..)
, withStackageDatabase
, runStackageMigrations
, runStackageMigrations'
, getCurrentHoogleVersionId
, getCurrentHoogleVersionIdWithPantryConfig
-- * Tables
@ -217,25 +219,33 @@ withStackageDatabase shouldLog dbs inner = do
bracket (liftIO getPoolIO) (liftIO . destroyAllResources) $ \pool -> do
inner (StackageDatabase (`runSqlPool` pool))
getSchema :: (HasLogFunc env, GetStackageDatabase env (RIO env)) => RIO env (Maybe Int)
getSchema :: ReaderT SqlBackend (RIO RIO.LogFunc) (Maybe Int)
getSchema =
run $ do
do
eres <- tryAny (selectList [] [])
lift $ logInfo $ "getSchema result: " <> displayShow eres
case eres of
Right [Entity _ (Schema v)] -> return $ Just v
_ -> return Nothing
runStackageMigrations' :: PantryConfig -> RIO RIO.LogFunc () -- HasLogFunc env => PantryConfig -> RIO env ()
runStackageMigrations' pantryConfig = do
stackageDb <- getStackageDatabaseFromPantry pantryConfig
runDatabase stackageDb stackageMigrations
runStackageMigrations :: (HasLogFunc env, GetStackageDatabase env (RIO env)) => RIO env ()
runStackageMigrations = do
runStackageMigrations = run stackageMigrations
stackageMigrations :: ReaderT SqlBackend (RIO RIO.LogFunc) () -- ReaderT SqlBackend (RIO RIO.LogFunc) ()
stackageMigrations = do
runMigration Pantry.migrateAll
runMigration migrateAll
actualSchema <- getSchema
run $ do
runMigration Pantry.migrateAll
runMigration migrateAll
unless (actualSchema == Just currentSchema) $ do
lift $
logWarn $
"Current schema does not match actual schema: " <>
displayShow (actualSchema, currentSchema)
deleteWhere ([] :: [Filter Schema])
insert_ $ Schema currentSchema
unless (actualSchema == Just currentSchema) $ do
lift $
logWarn $
"Current schema does not match actual schema: " <>
displayShow (actualSchema, currentSchema)
deleteWhere ([] :: [Filter Schema])
insert_ $ Schema currentSchema

View File

@ -40,7 +40,8 @@ module Stackage.Database.Types
, Origin(..)
, LatestInfo(..)
, Deprecation(..)
, haddockBucketName
, defHaddockBucketName
, defHaddockBucketUrl
, Changelog(..)
, Readme(..)
, StackageCronOptions(..)
@ -49,7 +50,7 @@ module Stackage.Database.Types
import Data.Aeson
import qualified Data.Text as T
import Data.Text.Read (decimal)
import Network.AWS (Env, HasEnv(..))
import Amazonka (Env)
import Pantry (BlobKey(..), CabalFileInfo(..), FileSize(..),
HasPantryConfig(..), PantryConfig, PackageIdentifierRevision(..), TreeKey(..))
import Pantry.SHA256 (fromHexText)
@ -61,12 +62,16 @@ import Stackage.Database.Schema
import Text.Blaze (ToMarkup(..))
import Types
haddockBucketName :: Text
haddockBucketName = "haddock.stackage.org"
defHaddockBucketName :: Text
defHaddockBucketName = "haddock.stackage.org"
defHaddockBucketUrl :: Text
defHaddockBucketUrl = "https://s3.amazonaws.com/" <> defHaddockBucketName
data StackageCronOptions = StackageCronOptions
{ scoForceUpdate :: !Bool
, scoDownloadBucketName :: !Text
, scoDownloadBucketUrl :: !Text
, scoUploadBucketName :: !Text
, scoDoNotUpload :: !Bool
, scoLogLevel :: !LogLevel
@ -84,6 +89,7 @@ data StackageCron = StackageCron
, scCachedGPD :: !(IORef (IntMap GenericPackageDescription))
, scEnvAWS :: !Env
, scDownloadBucketName :: !Text
, scDownloadBucketUrl :: !Text
, scUploadBucketName :: !Text
, scSnapshotsRepo :: !GithubRepo
, scReportProgress :: !Bool
@ -91,9 +97,6 @@ data StackageCron = StackageCron
, scHoogleVersionId :: !VersionId
}
instance HasEnv StackageCron where
environment = lens scEnvAWS (\c f -> c {scEnvAWS = f})
instance HasLogFunc StackageCron where
logFuncL = lens scLogFunc (\c f -> c {scLogFunc = f})

View File

@ -15,6 +15,7 @@ module Stackage.Snapshot.Diff
import ClassyPrelude (sortOn, toCaseFold)
import Data.Aeson
import Data.Aeson.Key
import qualified Data.Text as T (commonPrefixes)
import Data.These
import RIO
@ -61,7 +62,7 @@ newtype VersionChange = VersionChange { unVersionChange :: These VersionP Versio
deriving (Show, Eq, Generic, Typeable)
instance ToJSON (WithSnapshotNames VersionChange) where
toJSON (WithSnapshotNames (toPathPiece -> aKey) (toPathPiece -> bKey) change) =
toJSON (WithSnapshotNames (fromText . toPathPiece -> aKey) (fromText . toPathPiece -> bKey) change) =
case change of
VersionChange (This a) -> object [ aKey .= a ]
VersionChange (That b) -> object [ bKey .= b ]

View File

@ -407,7 +407,25 @@ instance ToMarkup VersionRangeP where
instance PersistField VersionRangeP where
toPersistValue = PersistText . textDisplay
fromPersistValue v =
fromPersistValue v >>= bimap (T.pack . displayException) VersionRangeP . dtParse
fromPersistValue v >>= bimap (T.pack . displayException) VersionRangeP . dtParse . hackwardCompat_3_4
where
-- We use parseSimple under the hood, which always parses using
-- the latest version of the Cabal spec. In practice, this hasn't
-- been a problem. Until now.
--
-- Cabal spec 3.4 dropped support for "-any" as a version range, and the
-- database is full of such values. Luckily, ">=0" is a
-- backward-compatible synonym for "-any". New versions of this app will
-- write ">=0" instead of "-any", which old versions of this app will
-- understand just fine. We just need to substitute on read.
--
-- FIXME: strictly speaking, VersionRange cannot be parsed without
-- knowing the Cabal spec version of the package that used it. There's
-- nothing *wrong* with "-any". That means we probably need to decode it
-- no further than Text and do further processing outside of the
-- PersistField instance.
hackwardCompat_3_4 "-any" = ">=0"
hackwardCompat_3_4 t = t
instance PersistFieldSql VersionRangeP where
sqlType _ = SqlString

View File

@ -1,16 +1,32 @@
resolver: lts-18.28
resolver: lts-22.6
extra-deps:
- amazonka-1.6.1
- barrier-0.1.1
- classy-prelude-yesod-1.5.0
- unliftio-core-0.1.2.0
- yesod-gitrepo-0.3.0
- static-bytes-0.1.0
- companion-0.1.0
- aeson-warning-parser-0.1.0
- hpack-0.35.0
# WARNING: Changing the hoogle version causes stackage-server-cron to regenerate
# Hoogle databases FOR EVERY SNAPSHOT, EVER. Usually, that's ok! But don't
# forget! The consequences are: (1) More disk usage. Hoogle databases are not
# cleaned up on the stackage-server-cron side, nor on the stackage-server side.
# (Yet. This will change.) (2) More bucket usage. While it's easy to say it's a
# drop in the literal bucket, such excessive misuse of storage makes
# administration, backups, disaster recovery, and many other DevOps concerns
# harder and harder. All but the latest LTS's database are literally never used
# anyway. (3) The Hoogle database schema is defined by the first three
# version components. Any more frequent regeneration is pure unadulterated
# waste. (4) Stackage's Hoogle search will be unavailable until the new
# databases have been generated.
- hoogle-5.0.18.4@sha256:1372458e97dff541fcda099236af7936bf93ee6b8c5d15695ee6d9426dff5eed,3171
- safe-0.3.20@sha256:7813ad56161f57d5162a924de5597d454162a2faed06be6e268b37bb5c19d48d,2312
- Cabal-3.8.1.0@sha256:77121d8e1aff14a0fd95684b751599db78a7dd26d55862d9fcef27c88b193e9d,12609
- Cabal-syntax-3.8.1.0@sha256:ed2d937ba6c6a20b75850349eedd41374885fc42369ef152d69e2ba70f44f593,7620
- git: https://github.com/commercialhaskell/pantry.git
commit: 5df643cc1deb561d9c52a9cb6f593aba2bc4c08e
# This amazonka patched to support Cloudflare, which kinda has a bug. See
# https://github.com/brendanhay/amazonka/issues/975 for details.
- github: chreekat/amazonka
commit: b/r2-compat
subdirs: [lib/amazonka-core]
drop-packages:
- Cabal
nix:
packages:
- zlib
- postgresql
- pkg-config
- haskell-language-server

View File

@ -3,7 +3,6 @@
<LongName>Hoogle Stackage.org</LongName>
<Description>Search modules on Stackage.org using hoogle</Description>
<Developer>FP Complete CORP.</Developer>
<Attribution>Copyright FP Complete CORP.</Attribution>
<AdultContent>false</AdultContent>
<Language>en-us</Language>
<InputEncoding>UTF-8</InputEncoding>

View File

@ -1,15 +1,14 @@
<OpenSearchDescription xmlns="http://a9.com/-/spec/opensearch/1.1/" xmlns:moz="http://www.mozilla.org/2006/browser/search/">
<ShortName>Stackage Packages</ShortName>
<LongName>Stackage.org package page</LongName>
<Description>Just to a Stackage.org package page</Description>
<Description>Jump to a Stackage.org package page</Description>
<Developer>FP Complete CORP.</Developer>
<Attribution>Copyright FP Complete CORP.</Attribution>
<AdultContent>false</AdultContent>
<Language>en-us</Language>
<InputEncoding>UTF-8</InputEncoding>
<OutputEncoding>UTF-8</OutputEncoding>
<Image width="222" height="222" type="image/x-icon">https://www.stackage.org/static/img/stackage.png</Image>
<Url type="text/html" method="GET" template="https://www.stackage.org/package/{searchTerms}"/>
<Query role="example" searchTerms="E.g. bytestring"/>
<Query role="example" searchTerms="bytestring"/>
<moz:SearchForm>https://www.stackage.org</moz:SearchForm>
</OpenSearchDescription>

View File

@ -30,6 +30,10 @@ $else
<div .container>
<div .row>
<div .span12>
A service provided by
<a href="http://www.fpcomplete.com/">
A service created by
<a href="https://www.fpcomplete.com/">
FP Complete
in 2014 | Donated to the
<a href="https://haskell.foundation">
Haskell Foundation
in 2024.

View File

@ -60,4 +60,7 @@
<a href="https://github.com/fpco/stackage#frequently-asked-questions">FAQ section on Github</a>.
<p>
Stackage's infrastructure, build machines, initial creation and ongoing maintenance, are proudly sponsored by <a href="https://www.fpcomplete.com">FP Complete</a>.
Stackage's infrastructure, build machines, initial creation and ongoing maintenance were proudly sponsored by <a href="https://www.fpcomplete.com">FP Complete</a> from 2014 to 2024.
<p>
Today it is a service provided by the <a href="https://haskell.foundation">Haskell Foundation</a>.