Do Hoogle database generation as a cron job #70

This commit is contained in:
Michael Snoyman 2015-01-05 09:00:45 +02:00
parent 3b8e3f596b
commit cb2ef331e6
5 changed files with 207 additions and 115 deletions

View File

@ -13,7 +13,7 @@ import Control.Monad.Logger (runLoggingT, LoggingT, defaultLogStr)
import Data.BlobStore (fileStore, storeWrite, cachedS3Store)
import Data.Hackage
import Data.Hackage.Views
import Data.Unpacking (newDocUnpacker)
import Data.Unpacking (newDocUnpacker, createHoogleDatabases)
import Data.WebsiteContent
import Data.Slug (SnapSlug (..), safeMakeSlug, HasGenIO)
import Data.Time (diffUTCTime)
@ -200,8 +200,12 @@ makeFoundation useEcho conf = do
env <- getEnvironment
let runDB' :: (MonadIO m, MonadBaseControl IO m) => SqlPersistT m a -> m a
runDB' = flip (Database.Persist.runPool dbconf) p
docUnpacker <- newDocUnpacker haddockRootDir' blobStore' runDB'
let logger = Yesod.Core.Types.Logger loggerSet' getter
mkFoundation du = App
foundation = App
{ settings = conf
, getStatic = s
, connPool = p
@ -213,19 +217,12 @@ makeFoundation useEcho conf = do
, progressMap = progressMap'
, nextProgressKey = nextProgressKey'
, haddockRootDir = haddockRootDir'
, appDocUnpacker = du
, appDocUnpacker = docUnpacker
, widgetCache = widgetCache'
, websiteContent = websiteContent'
}
let urlRender' = yesodRender (mkFoundation (error "docUnpacker forced")) (appRoot conf)
docUnpacker <- newDocUnpacker
haddockRootDir'
(lookup "STACKAGE_HOOGLE_LOADER" env /= Just "0")
blobStore'
(flip (Database.Persist.runPool dbconf) p)
urlRender'
let foundation = mkFoundation docUnpacker
let urlRender' = yesodRender foundation (appRoot conf)
-- Perform database migration using our application's logging settings.
when (lookup "STACKAGE_SKIP_MIGRATION" env /= Just "1") $
@ -251,6 +248,8 @@ makeFoundation useEcho conf = do
loadCabalFiles'
liftIO $ createHoogleDatabases blobStore' runDB' putStrLn urlRender'
liftIO $ threadDelay $ 30 * 60 * 1000000
return foundation
where ifRunCabalLoader m =
@ -291,6 +290,28 @@ cabalLoaderMain = do
}
dbconf
pool
let foundation = App
{ settings = conf
, getStatic = error "getStatic"
, connPool = pool
, httpManager = manager
, persistConfig = dbconf
, appLogger = error "appLogger"
, genIO = error "genIO"
, blobStore = bs
, progressMap = error "progressMap"
, nextProgressKey = error "nextProgressKey"
, haddockRootDir = error "haddockRootDir"
, appDocUnpacker = error "appDocUnpacker"
, widgetCache = error "widgetCache"
, websiteContent = error "websiteContent"
}
createHoogleDatabases
bs
(flip (Database.Persist.runPool dbconf) pool)
putStrLn
(yesodRender foundation (appRoot conf))
where
logFunc loc src level str
| level > LevelDebug = S.hPutStr stdout $ fromLogStr $ defaultLogStr loc src level str

View File

@ -2,13 +2,15 @@
-- and compressing/deduping contents.
module Data.Unpacking
( newDocUnpacker
, defaultHooDest
, getHoogleDB
, makeHoogle
, createHoogleDatabases
) where
import Import hiding (runDB)
import Data.BlobStore
import Handler.Haddock
import Filesystem (createTree, isFile, removeTree, isDirectory, listDirectory, copyFile, removeDirectory, removeFile, rename)
import Filesystem (createTree, isFile, removeTree, isDirectory, listDirectory, removeDirectory, removeFile, rename)
import System.Posix.Files (createLink)
import Crypto.Hash.Conduit (sinkHash)
import Control.Concurrent (forkIO)
@ -29,12 +31,10 @@ import Crypto.Hash (Digest, SHA1)
newDocUnpacker
:: FilePath -- ^ haddock root
-> Bool -- ^ loadHoogleDBs
-> BlobStore StoreKey
-> (forall a m. (MonadIO m, MonadBaseControl IO m) => SqlPersistT m a -> m a)
-> (Route App -> [(Text, Text)] -> Text)
-> IO DocUnpacker
newDocUnpacker root loadHoogleDBs store runDB urlRender = do
newDocUnpacker root store runDB = do
createDirs dirs
statusMapVar <- newTVarIO $ asMap mempty
@ -47,14 +47,14 @@ newDocUnpacker root loadHoogleDBs store runDB urlRender = do
$ insertMap (stackageSlug $ entityVal ent) var
writeTChan workChan (forceUnpack, ent, var)
forkForever $ unpackWorker dirs loadHoogleDBs runDB store messageVar urlRender workChan
forkForever $ unpackWorker dirs runDB store messageVar workChan
return DocUnpacker
{ duRequestDocs = \ent -> do
m <- readTVarIO statusMapVar
case lookup (stackageSlug $ entityVal ent) m of
Nothing -> do
b <- isUnpacked dirs ent
b <- isUnpacked dirs (entityVal ent)
if b
then return USReady
else do
@ -79,8 +79,8 @@ createDirs dirs = do
-- | Check for the presence of file system artifacts indicating that the docs
-- have been unpacked.
isUnpacked :: Dirs -> Entity Stackage -> IO Bool
isUnpacked dirs (Entity _ stackage) = isFile $ defaultHooDest dirs stackage
isUnpacked :: Dirs -> Stackage -> IO Bool
isUnpacked dirs stackage = isFile $ completeUnpackFile dirs stackage
defaultHooDest :: Dirs -> Stackage -> FilePath
defaultHooDest dirs stackage = dirHoogleFp dirs (stackageIdent stackage)
@ -92,20 +92,18 @@ forkForever inner = mask $ \restore ->
unpackWorker
:: Dirs
-> Bool -- ^ load Hoogle DBs?
-> (forall a m. (MonadIO m, MonadBaseControl IO m) => SqlPersistT m a -> m a)
-> BlobStore StoreKey
-> TVar Text
-> (Route App -> [(Text, Text)] -> Text)
-> TChan (Bool, Entity Stackage, TVar UnpackStatus)
-> IO ()
unpackWorker dirs loadHoogleDBs runDB store messageVar urlRender workChan = do
unpackWorker dirs runDB store messageVar workChan = do
atomically $ writeTVar messageVar "Waiting for new work item"
(forceUnpack, ent, resVar) <- atomically $ readTChan workChan
shouldUnpack <-
if forceUnpack
then return True
else not <$> isUnpacked dirs ent
else not <$> isUnpacked dirs (entityVal ent)
when shouldUnpack $ do
let say msg = atomically $ writeTVar messageVar $ concat
[ toPathPiece (stackageSlug $ entityVal ent)
@ -113,7 +111,7 @@ unpackWorker dirs loadHoogleDBs runDB store messageVar urlRender workChan = do
, msg
]
say "Beginning of processing"
eres <- tryAny $ unpacker dirs loadHoogleDBs runDB store say urlRender ent
eres <- tryAny $ unpacker dirs runDB store say ent
atomically $ writeTVar resVar $ case eres of
Left e -> USFailed $ tshow e
Right () -> USReady
@ -121,30 +119,21 @@ unpackWorker dirs loadHoogleDBs runDB store messageVar urlRender workChan = do
removeTreeIfExists :: FilePath -> IO ()
removeTreeIfExists fp = whenM (isDirectory fp) (removeTree fp)
unpacker
:: Dirs
-> Bool -- ^ load Hoogle DBs?
-> (forall a m. (MonadIO m, MonadBaseControl IO m) => SqlPersistT m a -> m a)
-> BlobStore StoreKey
unpackRawDocsTo
:: BlobStore StoreKey
-> PackageSetIdent
-> (Text -> IO ())
-> (Route App -> [(Text, Text)] -> Text)
-> Entity Stackage
-> FilePath
-> IO ()
unpacker dirs loadHoogleDBs runDB store say urlRender stackageEnt@(Entity _ stackage@Stackage {..}) = do
say "Removing old directories, if they exist"
removeTreeIfExists $ dirRawIdent dirs stackageIdent
removeTreeIfExists $ dirGzIdent dirs stackageIdent
removeTreeIfExists $ dirHoogleIdent dirs stackageIdent
unpackRawDocsTo store ident say destdir =
withSystemTempFile "haddock-bundle.tar.xz" $ \tempfp temph -> do
say "Downloading raw tarball"
withAcquire (storeRead' store (HaddockBundle stackageIdent)) $ \msrc ->
say "Downloading raw doc tarball"
withAcquire (storeRead' store (HaddockBundle ident)) $ \msrc ->
case msrc of
Nothing -> error "No haddocks exist for that snapshot"
Just src -> src $$ sinkHandle temph
hClose temph
let destdir = dirRawIdent dirs stackageIdent
createTree destdir
say "Unpacking tarball"
(Nothing, Nothing, Nothing, ph) <- createProcess
@ -154,54 +143,135 @@ unpacker dirs loadHoogleDBs runDB store say urlRender stackageEnt@(Entity _ stac
ec <- waitForProcess ph
if ec == ExitSuccess then return () else throwM ec
createTree $ dirHoogleIdent dirs stackageIdent
-- Determine which packages have documentation and update the
-- database appropriately
say "Updating database for available documentation"
runResourceT $ runDB $ do
let sid = entityKey stackageEnt
updateWhere
[PackageStackage ==. sid]
[PackageHasHaddocks =. False]
sourceDirectory destdir $$ mapM_C (\fp -> do
let mnv = nameAndVersionFromPath fp
forM_ mnv $ \(name, version) -> updateWhere
[ PackageStackage ==. sid
, PackageName' ==. PackageName name
, PackageVersion ==. Version version
]
[PackageHasHaddocks =. True]
)
unpacker
:: Dirs
-> (forall a m. (MonadIO m, MonadBaseControl IO m) => SqlPersistT m a -> m a)
-> BlobStore StoreKey
-> (Text -> IO ())
-> Entity Stackage
-> IO ()
unpacker dirs runDB store say (Entity sid stackage@Stackage {..}) = do
say "Removing old directories, if they exist"
removeTreeIfExists $ dirRawIdent dirs stackageIdent
removeTreeIfExists $ dirGzIdent dirs stackageIdent
removeTreeIfExists $ dirHoogleIdent dirs stackageIdent
let srcDefaultHoo = destdir </> "default.hoo"
dstDefaultHoo = defaultHooDest dirs stackage
hoogleKey = HoogleDB stackageIdent $ HoogleVersion VERSION_hoogle
defaultHooExists <- isFile srcDefaultHoo
if defaultHooExists
then copyFile srcDefaultHoo dstDefaultHoo
else withAcquire (storeRead' store hoogleKey) $ \msrc ->
case msrc of
Just src -> do
say "Downloading compiled Hoogle database"
withBinaryFile (fpToString dstDefaultHoo) WriteMode
$ \h -> src $$ ungzip =$ sinkHandle h
Nothing -> when loadHoogleDBs
$ handleAny print
$ withSystemTempDirectory "hoogle-database-gen"
$ \hoogletemp' -> do
let hoogletemp = fpFromString hoogletemp'
logFp = fpToString (dirHoogleFp dirs stackageIdent ["error-log"])
withBinaryFile logFp WriteMode $ \errorLog -> do
say "Copying Hoogle text files to temp directory"
runResourceT $ copyHoogleTextFiles errorLog destdir hoogletemp
say "Creating Hoogle database"
createHoogleDb say dirs stackageEnt errorLog hoogletemp urlRender
say "Uploading database to persistent storage"
withAcquire (storeWrite' store hoogleKey) $ \sink ->
runResourceT $ sourceFile dstDefaultHoo $$ gzip =$ sink
let destdir = dirRawIdent dirs stackageIdent
unpackRawDocsTo store stackageIdent say destdir
runCompressor say dirs
createTree $ dirHoogleIdent dirs stackageIdent
-- Determine which packages have documentation and update the
-- database appropriately
say "Updating database for available documentation"
runResourceT $ runDB $ do
updateWhere
[PackageStackage ==. sid]
[PackageHasHaddocks =. False]
sourceDirectory destdir $$ mapM_C (\fp -> do
let mnv = nameAndVersionFromPath fp
forM_ mnv $ \(name, version) -> updateWhere
[ PackageStackage ==. sid
, PackageName' ==. PackageName name
, PackageVersion ==. Version version
]
[PackageHasHaddocks =. True]
)
say "Running the compressor"
runCompressor say dirs
say "Unpack complete"
writeFile "completeUnpackFile dirs ent" ("Complete" :: ByteString)
completeUnpackFile :: Dirs -> Stackage -> FilePath
completeUnpackFile dirs stackage =
dirGzIdent dirs (stackageIdent stackage) </> "unpack-complete"
-- | Get the path to the Hoogle database, downloading from persistent storage
-- if necessary. This function will /not/ generate a new database, and
-- therefore is safe to run on a live web server.
getHoogleDB :: Dirs
-> Stackage
-> Handler (Maybe FilePath)
getHoogleDB dirs stackage = do
exists <- liftIO $ isFile fp
if exists
then return $ Just fp
else do
msrc <- storeRead key
case msrc of
Nothing -> return Nothing
Just src -> do
liftIO $ createTree $ F.parent fp
let tmpfp = fp <.> "tmp" -- FIXME add something random
src $$ ungzip =$ sinkFile tmpfp
liftIO $ rename tmpfp fp
return $ Just fp
where
fp = defaultHooDest dirs stackage
key = HoogleDB (stackageIdent stackage) $ HoogleVersion VERSION_hoogle
-- | Make sure that the last 5 LTS and last 5 Nightly releases all have Hoogle
-- databases available.
createHoogleDatabases
:: BlobStore StoreKey
-> (forall a m. (MonadIO m, MonadBaseControl IO m) => SqlPersistT m a -> m a)
-> (Text -> IO ())
-> (Route App -> [(Text, Text)] -> Text)
-> IO ()
createHoogleDatabases store runDB say urlRender = do
stackages <- runDB $ do
sids <- (++)
<$> fmap (map $ ltsStackage . entityVal)
(selectList [] [Desc LtsMajor, Desc LtsMinor, LimitTo 5])
<*> fmap (map $ nightlyStackage . entityVal)
(selectList [] [Desc NightlyDay, LimitTo 5])
catMaybes <$> mapM get sids
forM_ stackages $ \stackage -> do
let say' x = say $ concat
[ toPathPiece $ stackageSlug stackage
, ": "
, x
]
handleAny (say' . tshow) $ makeHoogle store say' urlRender stackage
-- | Either download the Hoogle database from persistent storage, or create it.
makeHoogle
:: BlobStore StoreKey
-> (Text -> IO ())
-> (Route App -> [(Text, Text)] -> Text)
-> Stackage
-> IO ()
makeHoogle store say urlRender stackage = do
say "Making hoogle database"
exists <- storeExists' store hoogleKey
if exists
then say "Hoogle database already exists, skipping"
else do
say "Generating Hoogle database"
generate
where
ident = stackageIdent stackage
hoogleKey = HoogleDB ident $ HoogleVersion VERSION_hoogle
generate = withSystemTempDirectory "hoogle-database-gen" $ \hoogletemp' -> do
let hoogletemp = fpFromString hoogletemp'
rawdocs = hoogletemp </> "rawdocs"
unpackRawDocsTo store ident say rawdocs
say "Copying Hoogle text files to temp directory"
runResourceT $ copyHoogleTextFiles say rawdocs hoogletemp
say "Creating Hoogle database"
withSystemTempFile "default.hoo" $ \dstFP' dstH -> do
let dstFP = fpFromString dstFP'
hClose dstH
createHoogleDb say dstFP stackage hoogletemp urlRender
say "Uploading database to persistent storage"
withAcquire (storeWrite' store hoogleKey) $ \sink ->
runResourceT $ sourceFile dstFP $$ gzip =$ sink
runCompressor :: (Text -> IO ()) -> Dirs -> IO ()
runCompressor say dirs =
@ -259,11 +329,11 @@ dirCacheFp dirs digest =
name = decodeUtf8 $ B16.encode $ toBytes digest
(x, y) = splitAt 2 name
copyHoogleTextFiles :: Handle -- ^ error log handle
copyHoogleTextFiles :: (Text -> IO ()) -- ^ log
-> FilePath -- ^ raw unpacked Haddock files
-> FilePath -- ^ temporary work directory
-> ResourceT IO ()
copyHoogleTextFiles errorLog raw tmp = do
copyHoogleTextFiles say raw tmp = do
let tmptext = tmp </> "text"
liftIO $ createTree tmptext
sourceDirectory raw $$ mapM_C (\fp ->
@ -273,7 +343,7 @@ copyHoogleTextFiles errorLog raw tmp = do
exists <- liftIO $ isFile src
if exists
then sourceFile src $$ (sinkFile dst :: Sink ByteString (ResourceT IO) ())
else liftIO $ appendHoogleErrors errorLog $ HoogleErrors
else liftIO $ appendHoogleErrors say $ HoogleErrors
{ packageName = name
, packageVersion = version
, errors = ["No textual Hoogle DB (use \"cabal haddock --hoogle\")"]
@ -281,13 +351,12 @@ copyHoogleTextFiles errorLog raw tmp = do
)
createHoogleDb :: (Text -> IO ())
-> Dirs
-> Entity Stackage
-> Handle -- ^ error log handle
-> FilePath -- ^ default.hoo output location
-> Stackage
-> FilePath -- ^ temp directory
-> (Route App -> [(Text, Text)] -> Text)
-> IO ()
createHoogleDb say dirs (Entity _ stackage) errorLog tmpdir urlRender = do
createHoogleDb say dstDefaultHoo stackage tmpdir urlRender = do
let tmpbin = tmpdir </> "binary"
createTree tmpbin
eres <- tryAny $ runResourceT $ do
@ -320,7 +389,7 @@ createHoogleDb say dirs (Entity _ stackage) errorLog tmpdir urlRender = do
, " Hoogle errors: "
, tshow errs
]
appendHoogleErrors errorLog $ HoogleErrors
appendHoogleErrors say $ HoogleErrors
{ packageName = name
, packageVersion = version
, errors = map show errs
@ -333,10 +402,10 @@ createHoogleDb say dirs (Entity _ stackage) errorLog tmpdir urlRender = do
dbs <- listDirectory tmpbin
Hoogle.mergeDatabase
(map fpToString dbs)
(fpToString $ defaultHooDest dirs stackage)
(fpToString dstDefaultHoo)
case eres of
Right () -> return ()
Left err -> liftIO $ appendHoogleErrors errorLog $ HoogleErrors
Left err -> liftIO $ appendHoogleErrors say $ HoogleErrors
{ packageName = "Exception thrown while building hoogle DB"
, packageVersion = ""
, errors = [show err]
@ -353,8 +422,8 @@ instance FromJSON HoogleErrors where
-- Appends hoogle errors to a log file. By encoding within a single
-- list, the resulting file can be decoded as [HoogleErrors].
appendHoogleErrors :: Handle -> HoogleErrors -> IO ()
appendHoogleErrors h errs = hPut h (Y.encode [errs])
appendHoogleErrors :: (Text -> IO ()) -> HoogleErrors -> IO ()
appendHoogleErrors say errs = say $ decodeUtf8 $ Y.encode [errs]
nameAndVersionFromPath :: FilePath -> Maybe (Text, Text)
nameAndVersionFromPath fp =

View File

@ -34,17 +34,17 @@ data App = App
, httpManager :: Manager
, persistConfig :: Settings.PersistConf
, appLogger :: Logger
, genIO :: !MWC.GenIO
, blobStore :: !(BlobStore StoreKey)
, progressMap :: !(IORef (IntMap Progress))
, nextProgressKey :: !(IORef Int)
, haddockRootDir :: !FilePath
, genIO :: MWC.GenIO
, blobStore :: BlobStore StoreKey
, progressMap :: IORef (IntMap Progress)
, nextProgressKey :: IORef Int
, haddockRootDir :: FilePath
, appDocUnpacker :: DocUnpacker
-- ^ We have a dedicated thread so that (1) we don't try to unpack too many
-- things at once, (2) we never unpack the same thing twice at the same
-- time, and (3) so that even if the client connection dies, we finish the
-- unpack job.
, widgetCache :: !(IORef (HashMap Text (UTCTime, GWData (Route App))))
, widgetCache :: IORef (HashMap Text (UTCTime, GWData (Route App)))
, websiteContent :: GitRepo WebsiteContent
}

View File

@ -6,8 +6,7 @@ import Control.Spoon (spoon)
import Data.Data (Data (..))
import Data.Slug (SnapSlug)
import Data.Text.Read (decimal)
import Data.Unpacking (defaultHooDest)
import Filesystem (isFile)
import Data.Unpacking (getHoogleDB)
import Handler.Haddock (getDirs)
import qualified Hoogle
import Import
@ -29,14 +28,18 @@ getHoogleR slug = do
Just (Right (i, "")) -> i
_ -> 1
offset = (page - 1) * perPage
stackageEnt@(Entity _ stackage) <- runDB $ getBy404 $ UniqueSnapshot slug
-- Unpack haddocks and generate hoogle DB, if necessary.
requireDocs stackageEnt
let databasePath = defaultHooDest dirs stackage
heDatabase = liftIO $ Hoogle.loadDatabase (fpToString databasePath)
-- If the hoogle DB isn't yet generated, yield 404.
dbExists <- liftIO $ isFile databasePath
when (not dbExists) notFound
Entity _ stackage <- runDB $ getBy404 $ UniqueSnapshot slug
mdatabasePath <- getHoogleDB dirs stackage
heDatabase <- case mdatabasePath of
Just x -> return $ liftIO $ Hoogle.loadDatabase $ fpToString x
Nothing -> (>>= sendResponse) $ defaultLayout $ do
setTitle "Hoogle database not available"
[whamlet|
<p>The given Hoogle database is not available.
<p>
<a href=@{SnapshotR slug StackageHomeR}>Return to snapshot homepage
|]
mresults <- case mquery of
Just query -> runHoogleQuery heDatabase HoogleQueryInput
{ hqiQueryInput = query

View File

@ -5,6 +5,5 @@ stanzas:
- production
env:
STACKAGE_CABAL_LOADER: "0"
STACKAGE_HOOGLE_LOADER: "0"
host: www.stackage.org
copy-to: fpuser@www.stackage.org:/var/opt/keter/incoming