Persist Hoogle DBs

This commit is contained in:
Michael Snoyman 2015-01-04 21:28:30 +02:00
parent 08ab874ae9
commit 7c94b008aa
3 changed files with 65 additions and 26 deletions

View File

@ -2,6 +2,7 @@
-- and compressing/deduping contents. -- and compressing/deduping contents.
module Data.Unpacking module Data.Unpacking
( newDocUnpacker ( newDocUnpacker
, defaultHooDest
) where ) where
import Import hiding (runDB) import Import hiding (runDB)
@ -11,18 +12,17 @@ import Filesystem (createTree, isFile, removeTree, isDirectory, listDirectory, c
import System.Posix.Files (createLink) import System.Posix.Files (createLink)
import Crypto.Hash.Conduit (sinkHash) import Crypto.Hash.Conduit (sinkHash)
import Control.Concurrent (forkIO) import Control.Concurrent (forkIO)
import Control.Monad.Trans.Resource (allocate, resourceForkIO, release) import Control.Monad.Trans.Resource (allocate, release)
import Data.Char (isAlpha) import Data.Char (isAlpha)
import qualified Hoogle import qualified Hoogle
import qualified Data.Text as T import qualified Data.Text as T
import qualified Data.Yaml as Y import qualified Data.Yaml as Y
import System.IO (IOMode (ReadMode, WriteMode), withBinaryFile, openBinaryFile) import System.IO (IOMode (ReadMode, WriteMode), withBinaryFile, openBinaryFile)
import System.IO.Temp (withSystemTempFile, withTempFile, createTempDirectory, withSystemTempDirectory) import System.IO.Temp (withSystemTempFile, withTempFile, withSystemTempDirectory)
import System.Directory (getTemporaryDirectory)
import System.Exit (ExitCode (ExitSuccess)) import System.Exit (ExitCode (ExitSuccess))
import System.Process (createProcess, proc, cwd, waitForProcess) import System.Process (createProcess, proc, cwd, waitForProcess)
import qualified Filesystem.Path.CurrentOS as F import qualified Filesystem.Path.CurrentOS as F
import Data.Conduit.Zlib (gzip) import Data.Conduit.Zlib (gzip, ungzip)
import qualified Data.ByteString.Base16 as B16 import qualified Data.ByteString.Base16 as B16
import Data.Byteable (toBytes) import Data.Byteable (toBytes)
import Crypto.Hash (Digest, SHA1) import Crypto.Hash (Digest, SHA1)
@ -46,7 +46,7 @@ newDocUnpacker root store runDB urlRender = do
$ insertMap (stackageSlug $ entityVal ent) var $ insertMap (stackageSlug $ entityVal ent) var
writeTChan workChan (forceUnpack, ent, var) writeTChan workChan (forceUnpack, ent, var)
forkForever $ unpackWorker dirs runDB store statusMapVar messageVar urlRender workChan forkForever $ unpackWorker dirs runDB store messageVar urlRender workChan
return DocUnpacker return DocUnpacker
{ duRequestDocs = \ent -> do { duRequestDocs = \ent -> do
@ -82,13 +82,22 @@ isUnpacked :: Dirs -> Entity Stackage -> IO Bool
isUnpacked dirs (Entity _ stackage) = isFile $ defaultHooDest dirs stackage isUnpacked dirs (Entity _ stackage) = isFile $ defaultHooDest dirs stackage
defaultHooDest :: Dirs -> Stackage -> FilePath defaultHooDest :: Dirs -> Stackage -> FilePath
defaultHooDest dirs stackage = dirHoogleFp dirs (stackageIdent stackage) ["default.hoo"] defaultHooDest dirs stackage = dirHoogleFp dirs (stackageIdent stackage)
["default-" ++ VERSION_hoogle ++ ".hoo"]
forkForever :: IO () -> IO () forkForever :: IO () -> IO ()
forkForever inner = mask $ \restore -> forkForever inner = mask $ \restore ->
void $ forkIO $ forever $ handleAny print $ restore $ forever inner void $ forkIO $ forever $ handleAny print $ restore $ forever inner
unpackWorker dirs runDB store statusMapVar messageVar urlRender workChan = do unpackWorker
:: Dirs
-> (forall a m. (MonadIO m, MonadBaseControl IO m) => SqlPersistT m a -> m a)
-> BlobStore StoreKey
-> TVar Text
-> (Route App -> [(Text, Text)] -> Text)
-> TChan (Bool, Entity Stackage, TVar UnpackStatus)
-> IO ()
unpackWorker dirs runDB store messageVar urlRender workChan = do
atomically $ writeTVar messageVar "Waiting for new work item" atomically $ writeTVar messageVar "Waiting for new work item"
(forceUnpack, ent, resVar) <- atomically $ readTChan workChan (forceUnpack, ent, resVar) <- atomically $ readTChan workChan
shouldUnpack <- shouldUnpack <-
@ -110,6 +119,14 @@ unpackWorker dirs runDB store statusMapVar messageVar urlRender workChan = do
removeTreeIfExists :: FilePath -> IO () removeTreeIfExists :: FilePath -> IO ()
removeTreeIfExists fp = whenM (isDirectory fp) (removeTree fp) removeTreeIfExists fp = whenM (isDirectory fp) (removeTree fp)
unpacker
:: Dirs
-> (forall a m. (MonadIO m, MonadBaseControl IO m) => SqlPersistT m a -> m a)
-> BlobStore StoreKey
-> (Text -> IO ())
-> (Route App -> [(Text, Text)] -> Text)
-> Entity Stackage
-> IO ()
unpacker dirs runDB store say urlRender stackageEnt@(Entity _ stackage@Stackage {..}) = do unpacker dirs runDB store say urlRender stackageEnt@(Entity _ stackage@Stackage {..}) = do
say "Removing old directories, if they exist" say "Removing old directories, if they exist"
removeTreeIfExists $ dirRawIdent dirs stackageIdent removeTreeIfExists $ dirRawIdent dirs stackageIdent
@ -135,7 +152,6 @@ unpacker dirs runDB store say urlRender stackageEnt@(Entity _ stackage@Stackage
if ec == ExitSuccess then return () else throwM ec if ec == ExitSuccess then return () else throwM ec
createTree $ dirHoogleIdent dirs stackageIdent createTree $ dirHoogleIdent dirs stackageIdent
tmp <- getTemporaryDirectory
-- Determine which packages have documentation and update the -- Determine which packages have documentation and update the
-- database appropriately -- database appropriately
@ -155,24 +171,35 @@ unpacker dirs runDB store say urlRender stackageEnt@(Entity _ stackage@Stackage
[PackageHasHaddocks =. True] [PackageHasHaddocks =. True]
) )
let defaultHoo = destdir </> "default.hoo" let srcDefaultHoo = destdir </> "default.hoo"
defaultHooExists <- isFile defaultHoo dstDefaultHoo = defaultHooDest dirs stackage
hoogleKey = HoogleDB stackageIdent $ HoogleVersion VERSION_hoogle
defaultHooExists <- isFile srcDefaultHoo
if defaultHooExists if defaultHooExists
then copyFile defaultHoo $ defaultHooDest dirs stackage then copyFile srcDefaultHoo dstDefaultHoo
else when isHoogleActive $ handleAny print $ withSystemTempDirectory "hoogle-database-gen" $ \hoogletemp' -> do else withAcquire (storeRead' store hoogleKey) $ \msrc ->
let hoogletemp = fpFromString hoogletemp' case msrc of
logFp = fpToString (dirHoogleFp dirs stackageIdent ["error-log"]) Just src -> do
withBinaryFile logFp WriteMode $ \errorLog -> do say "Downloading compiled Hoogle database"
say "Copying Hoogle text files to temp directory" withBinaryFile (fpToString dstDefaultHoo) WriteMode
runResourceT $ copyHoogleTextFiles errorLog destdir hoogletemp $ \h -> src $$ ungzip =$ sinkHandle h
say "Creating Hoogle database" Nothing ->
createHoogleDb say dirs stackageEnt errorLog hoogletemp urlRender handleAny print
$ withSystemTempDirectory "hoogle-database-gen"
$ \hoogletemp' -> do
let hoogletemp = fpFromString hoogletemp'
logFp = fpToString (dirHoogleFp dirs stackageIdent ["error-log"])
withBinaryFile logFp WriteMode $ \errorLog -> do
say "Copying Hoogle text files to temp directory"
runResourceT $ copyHoogleTextFiles errorLog destdir hoogletemp
say "Creating Hoogle database"
createHoogleDb say dirs stackageEnt errorLog hoogletemp urlRender
say "Uploading database to persistent storage"
withAcquire (storeWrite' store hoogleKey) $ \sink ->
runResourceT $ sourceFile dstDefaultHoo $$ gzip =$ sink
runCompressor say dirs runCompressor say dirs
isHoogleActive :: Bool
isHoogleActive = False
runCompressor :: (Text -> IO ()) -> Dirs -> IO () runCompressor :: (Text -> IO ()) -> Dirs -> IO ()
runCompressor say dirs = runCompressor say dirs =
runResourceT $ goDir $ dirRawRoot dirs runResourceT $ goDir $ dirRawRoot dirs
@ -258,8 +285,7 @@ createHoogleDb :: (Text -> IO ())
-> (Route App -> [(Text, Text)] -> Text) -> (Route App -> [(Text, Text)] -> Text)
-> IO () -> IO ()
createHoogleDb say dirs (Entity _ stackage) errorLog tmpdir urlRender = do createHoogleDb say dirs (Entity _ stackage) errorLog tmpdir urlRender = do
let ident = stackageIdent stackage let tmpbin = tmpdir </> "binary"
tmpbin = tmpdir </> "binary"
createTree tmpbin createTree tmpbin
eres <- tryAny $ runResourceT $ do eres <- tryAny $ runResourceT $ do
-- Create hoogle binary databases for each package. -- Create hoogle binary databases for each package.

View File

@ -6,8 +6,9 @@ import Control.Spoon (spoon)
import Data.Data (Data (..)) import Data.Data (Data (..))
import Data.Slug (SnapSlug) import Data.Slug (SnapSlug)
import Data.Text.Read (decimal) import Data.Text.Read (decimal)
import Data.Unpacking (defaultHooDest)
import Filesystem (isFile) import Filesystem (isFile)
import Handler.Haddock (dirHoogleFp, getDirs) import Handler.Haddock (getDirs)
import qualified Hoogle import qualified Hoogle
import Import import Import
import Text.Blaze.Html (preEscapedToHtml) import Text.Blaze.Html (preEscapedToHtml)
@ -31,7 +32,7 @@ getHoogleR slug = do
stackageEnt@(Entity _ stackage) <- runDB $ getBy404 $ UniqueSnapshot slug stackageEnt@(Entity _ stackage) <- runDB $ getBy404 $ UniqueSnapshot slug
-- Unpack haddocks and generate hoogle DB, if necessary. -- Unpack haddocks and generate hoogle DB, if necessary.
requireDocs stackageEnt requireDocs stackageEnt
let databasePath = dirHoogleFp dirs (stackageIdent stackage) ["default.hoo"] let databasePath = defaultHooDest dirs stackage
heDatabase = liftIO $ Hoogle.loadDatabase (fpToString databasePath) heDatabase = liftIO $ Hoogle.loadDatabase (fpToString databasePath)
-- If the hoogle DB isn't yet generated, yield 404. -- If the hoogle DB isn't yet generated, yield 404.
dbExists <- liftIO $ isFile databasePath dbExists <- liftIO $ isFile databasePath

View File

@ -58,8 +58,14 @@ data StoreKey = HackageCabal !PackageName !Version
| HackageViewIndex !HackageView | HackageViewIndex !HackageView
| SnapshotBundle !PackageSetIdent | SnapshotBundle !PackageSetIdent
| HaddockBundle !PackageSetIdent | HaddockBundle !PackageSetIdent
| HoogleDB !PackageSetIdent !HoogleVersion
deriving (Show, Eq, Ord, Typeable) deriving (Show, Eq, Ord, Typeable)
newtype HoogleVersion = HoogleVersion Text
deriving (Show, Eq, Ord, Typeable, PathPiece)
currentHoogleVersion :: HoogleVersion
currentHoogleVersion = HoogleVersion VERSION_hoogle
instance ToPath StoreKey where instance ToPath StoreKey where
toPath (HackageCabal name version) = ["hackage", toPathPiece name, toPathPiece version ++ ".cabal"] toPath (HackageCabal name version) = ["hackage", toPathPiece name, toPathPiece version ++ ".cabal"]
toPath (HackageSdist name version) = ["hackage", toPathPiece name, toPathPiece version ++ ".tar.gz"] toPath (HackageSdist name version) = ["hackage", toPathPiece name, toPathPiece version ++ ".tar.gz"]
@ -95,6 +101,11 @@ instance ToPath StoreKey where
[ "haddock" [ "haddock"
, toPathPiece ident ++ ".tar.xz" , toPathPiece ident ++ ".tar.xz"
] ]
toPath (HoogleDB ident ver) =
[ "hoogle"
, toPathPiece ver
, toPathPiece ident ++ ".hoo.gz"
]
instance BackupToS3 StoreKey where instance BackupToS3 StoreKey where
shouldBackup HackageCabal{} = False shouldBackup HackageCabal{} = False
shouldBackup HackageSdist{} = False shouldBackup HackageSdist{} = False
@ -105,6 +116,7 @@ instance BackupToS3 StoreKey where
shouldBackup HackageViewIndex{} = False shouldBackup HackageViewIndex{} = False
shouldBackup SnapshotBundle{} = True shouldBackup SnapshotBundle{} = True
shouldBackup HaddockBundle{} = True shouldBackup HaddockBundle{} = True
shouldBackup HoogleDB{} = True
newtype HackageRoot = HackageRoot { unHackageRoot :: Text } newtype HackageRoot = HackageRoot { unHackageRoot :: Text }
deriving (Show, Read, Typeable, Eq, Ord, Hashable, PathPiece, ToMarkup) deriving (Show, Read, Typeable, Eq, Ord, Hashable, PathPiece, ToMarkup)