Do a better job of downloading databases

This commit is contained in:
Michael Snoyman 2015-05-15 12:40:45 +03:00
parent 5dc16a55d5
commit 7490787bbe
3 changed files with 51 additions and 20 deletions

View File

@ -134,13 +134,13 @@ makeFoundation useEcho conf = do
"master" "master"
loadWebsiteContent loadWebsiteContent
(stackageDatabase', refreshDB) <- loadFromS3 (stackageDatabase', refreshDB) <- loadFromS3 manager
-- Temporary workaround to force content updates regularly, until -- Temporary workaround to force content updates regularly, until
-- distribution of webhooks is handled via consul -- distribution of webhooks is handled via consul
void $ forkIO $ forever $ void $ do void $ forkIO $ forever $ void $ do
handleAny print $ refreshDB manager
threadDelay $ 1000 * 1000 * 60 * 5 threadDelay $ 1000 * 1000 * 60 * 5
handleAny print refreshDB
handleAny print $ grRefresh websiteContent' handleAny print $ grRefresh websiteContent'
env <- getEnvironment env <- getEnvironment

View File

@ -35,7 +35,7 @@ data App = App
, appLogger :: Logger , appLogger :: Logger
, genIO :: MWC.GenIO , genIO :: MWC.GenIO
, websiteContent :: GitRepo WebsiteContent , websiteContent :: GitRepo WebsiteContent
, stackageDatabase :: StackageDatabase , stackageDatabase :: IO StackageDatabase
} }
instance HasGenIO App where instance HasGenIO App where
@ -271,6 +271,6 @@ getExtra = fmap (appExtra . settings) getYesod
-- https://github.com/yesodweb/yesod/wiki/Sending-email -- https://github.com/yesodweb/yesod/wiki/Sending-email
instance GetStackageDatabase Handler where instance GetStackageDatabase Handler where
getStackageDatabase = fmap stackageDatabase getYesod getStackageDatabase = getYesod >>= liftIO . stackageDatabase
instance GetStackageDatabase (WidgetT App IO) where instance GetStackageDatabase (WidgetT App IO) where
getStackageDatabase = fmap stackageDatabase getYesod getStackageDatabase = getYesod >>= liftIO . stackageDatabase

View File

@ -5,6 +5,7 @@ module Stackage.Database.Cron
) where ) where
import ClassyPrelude.Conduit import ClassyPrelude.Conduit
import Control.Concurrent (threadDelay)
import Stackage.PackageIndex.Conduit import Stackage.PackageIndex.Conduit
import Database.Persist (Entity (Entity)) import Database.Persist (Entity (Entity))
import Data.Char (isAlpha) import Data.Char (isAlpha)
@ -28,7 +29,7 @@ import Network.AWS.S3 (ObjectCannedACL (PublicRead),
import Control.Lens (set, view) import Control.Lens (set, view)
import qualified Data.Conduit.Binary as CB import qualified Data.Conduit.Binary as CB
import Data.Conduit.Zlib (WindowBits (WindowBits), import Data.Conduit.Zlib (WindowBits (WindowBits),
compress) compress, ungzip)
import qualified Hoogle import qualified Hoogle
filename' :: Text filename' :: Text
@ -45,23 +46,53 @@ url :: Text
url = concat url = concat
[ "https://s3.amazonaws.com/haddock.stackage.org/" [ "https://s3.amazonaws.com/haddock.stackage.org/"
, keyName , keyName
, ".gz"
] ]
-- | Provides an action to be used to refresh the file from S3. -- | Provides an action to be used to refresh the file from S3.
loadFromS3 :: IO (StackageDatabase, Manager -> IO ()) loadFromS3 :: Manager -> IO (IO StackageDatabase, IO ())
loadFromS3 = do loadFromS3 man = do
let fp = fpFromText keyName killPrevVar <- newTVarIO $ return ()
fptmp = fp <.> "tmp" currSuffixVar <- newTVarIO (1 :: Int)
let root = "stackage-database"
handleIO print $ removeTree root
createTree root
req <- parseUrl $ unpack url req <- parseUrl $ unpack url
let download man = withResponse req man $ \res -> do let download = do
createTree $ parent fptmp suffix <- atomically $ do
runResourceT x <- readTVar currSuffixVar
$ bodyReaderSource (responseBody res) writeTVar currSuffixVar $! x + 1
$$ sinkFile fptmp return x
rename fptmp fp
db <- openStackageDatabase fp let fp = root </> fpFromText ("database-download-" ++ tshow suffix)
return (db, download) putStrLn $ "Downloading database to " ++ fpToText fp
withResponse req man $ \res ->
runResourceT
$ bodyReaderSource (responseBody res)
$= ungzip
$$ sinkFile fp
putStrLn "Finished downloading database"
return fp
dbvar <- newTVarIO $ error "database not yet loaded"
let update = do
fp <- download
db <- openStackageDatabase fp
void $ tryIO $ join $ atomically $ do
writeTVar dbvar db
oldKill <- readTVar killPrevVar
writeTVar killPrevVar $ do
-- give existing users a chance to clean up
threadDelay $ 1000000 * 30
void $ tryIO $ removeFile fp
return oldKill
update
return (readTVarIO dbvar, update)
hoogleKey :: SnapName -> Text hoogleKey :: SnapName -> Text
hoogleKey name = concat hoogleKey name = concat
@ -128,7 +159,7 @@ stackageServerCron = do
createStackageDatabase dbfp createStackageDatabase dbfp
upload dbfp keyName upload dbfp keyName
(db, _) <- loadFromS3 db <- openStackageDatabase dbfp
names <- runReaderT last5Lts5Nightly db names <- runReaderT last5Lts5Nightly db
let manager = view envManager env let manager = view envManager env
forM_ names $ \name -> do forM_ names $ \name -> do