Generate Hoogle DB when haddocks are unpacked #47

This commit is contained in:
Michael Sloan 2014-12-22 19:28:41 -08:00
parent 125e7ea130
commit c0fed800cc
4 changed files with 133 additions and 40 deletions

View File

@ -164,8 +164,12 @@ makeFoundation useEcho conf = do
blobStore' <- loadBlobStore manager conf blobStore' <- loadBlobStore manager conf
let haddockRootDir' = "/tmp/stackage-server-haddocks2" let haddockRootDir' = "/tmp/stackage-server-haddocks2"
(statusRef, unpacker) <- createHaddockUnpacker haddockRootDir' blobStore' urlRenderRef' <- newIORef (error "urlRenderRef not initialized")
(statusRef, unpacker) <- createHaddockUnpacker
haddockRootDir'
blobStore'
(flip (Database.Persist.runPool dbconf) p) (flip (Database.Persist.runPool dbconf) p)
urlRenderRef'
widgetCache' <- newIORef mempty widgetCache' <- newIORef mempty
#if MIN_VERSION_yesod_gitrepo(0,1,1) #if MIN_VERSION_yesod_gitrepo(0,1,1)
@ -217,6 +221,8 @@ makeFoundation useEcho conf = do
, websiteContent = websiteContent' , websiteContent = websiteContent'
} }
writeIORef urlRenderRef' (yesodRender foundation (appRoot conf))
env <- getEnvironment env <- getEnvironment
-- Perform database migration using our application's logging settings. -- Perform database migration using our application's logging settings.

View File

@ -39,7 +39,7 @@ data App = App
, progressMap :: !(IORef (IntMap Progress)) , progressMap :: !(IORef (IntMap Progress))
, nextProgressKey :: !(IORef Int) , nextProgressKey :: !(IORef Int)
, haddockRootDir :: !FilePath , haddockRootDir :: !FilePath
, haddockUnpacker :: !(ForceUnpack -> PackageSetIdent -> IO ()) , haddockUnpacker :: !(ForceUnpack -> Entity Stackage -> IO ())
-- ^ We have a dedicated thread so that (1) we don't try to unpack too many -- ^ We have a dedicated thread so that (1) we don't try to unpack too many
-- things at once, (2) we never unpack the same thing twice at the same -- things at once, (2) we never unpack the same thing twice at the same
-- time, and (3) so that even if the client connection dies, we finish the -- time, and (3) so that even if the client connection dies, we finish the

View File

@ -1,8 +1,15 @@
module Handler.Haddock where module Handler.Haddock
( getUploadHaddockR
, putUploadHaddockR
, getHaddockR
, getUploadDocMapR
, putUploadDocMapR
, createHaddockUnpacker
) where
import Import import Import
import Data.BlobStore import Data.BlobStore
import Filesystem (removeTree, isDirectory, createTree, isFile, rename, removeFile, removeDirectory) import Filesystem (removeTree, isDirectory, createTree, isFile, rename, removeFile, removeDirectory, listDirectory)
import Control.Concurrent (forkIO) import Control.Concurrent (forkIO)
import System.IO.Temp (withSystemTempFile, withTempFile) import System.IO.Temp (withSystemTempFile, withTempFile)
import System.Process (createProcess, proc, cwd, waitForProcess) import System.Process (createProcess, proc, cwd, waitForProcess)
@ -16,11 +23,12 @@ import qualified Data.ByteString.Base16 as B16
import Data.Byteable (toBytes) import Data.Byteable (toBytes)
import Crypto.Hash (Digest, SHA1) import Crypto.Hash (Digest, SHA1)
import qualified Filesystem.Path.CurrentOS as F import qualified Filesystem.Path.CurrentOS as F
import Data.Slug (SnapSlug) import Data.Slug (SnapSlug, unSlug)
import qualified Data.Text as T import qualified Data.Text as T
import Data.Slug (unSlug)
import qualified Data.Yaml as Y import qualified Data.Yaml as Y
import Data.Aeson (withObject) import Data.Aeson (withObject)
import qualified Hoogle
import Data.Char (isAlpha)
form :: Form FileInfo form :: Form FileInfo
form = renderDivs $ areq fileField "tarball containing docs" form = renderDivs $ areq fileField "tarball containing docs"
@ -30,7 +38,7 @@ form = renderDivs $ areq fileField "tarball containing docs"
getUploadHaddockR, putUploadHaddockR :: Text -> Handler Html getUploadHaddockR, putUploadHaddockR :: Text -> Handler Html
getUploadHaddockR slug0 = do getUploadHaddockR slug0 = do
uid <- requireAuthIdOrToken uid <- requireAuthIdOrToken
Entity sid Stackage {..} <- runDB $ do stackageEnt@(Entity sid Stackage {..}) <- runDB $ do
-- Provide fallback for old URLs -- Provide fallback for old URLs
ment <- getBy $ UniqueStackage $ PackageSetIdent slug0 ment <- getBy $ UniqueStackage $ PackageSetIdent slug0
case ment of case ment of
@ -47,7 +55,7 @@ getUploadHaddockR slug0 = do
fileSource fileInfo $$ storeWrite (HaddockBundle ident) fileSource fileInfo $$ storeWrite (HaddockBundle ident)
runDB $ update sid [StackageHasHaddocks =. True] runDB $ update sid [StackageHasHaddocks =. True]
master <- getYesod master <- getYesod
void $ liftIO $ forkIO $ haddockUnpacker master True ident void $ liftIO $ forkIO $ haddockUnpacker master True stackageEnt
setMessage "Haddocks uploaded" setMessage "Haddocks uploaded"
redirect $ SnapshotR slug StackageHomeR redirect $ SnapshotR slug StackageHomeR
_ -> defaultLayout $ do _ -> defaultLayout $ do
@ -58,7 +66,7 @@ putUploadHaddockR = getUploadHaddockR
getHaddockR :: SnapSlug -> [Text] -> Handler () getHaddockR :: SnapSlug -> [Text] -> Handler ()
getHaddockR slug rest = do getHaddockR slug rest = do
ident <- runDB $ do stackageEnt <- runDB $ do
ment <- getBy $ UniqueSnapshot slug ment <- getBy $ UniqueSnapshot slug
case ment of case ment of
Just ent -> do Just ent -> do
@ -66,7 +74,7 @@ getHaddockR slug rest = do
[pkgver] -> tryContentsRedirect ent pkgver [pkgver] -> tryContentsRedirect ent pkgver
[pkgver, "index.html"] -> tryContentsRedirect ent pkgver [pkgver, "index.html"] -> tryContentsRedirect ent pkgver
_ -> return () _ -> return ()
return $ stackageIdent $ entityVal ent return ent
Nothing -> do Nothing -> do
Entity _ stackage <- getBy404 Entity _ stackage <- getBy404
$ UniqueStackage $ UniqueStackage
@ -74,11 +82,12 @@ getHaddockR slug rest = do
$ toPathPiece slug $ toPathPiece slug
redirectWith status301 $ HaddockR (stackageSlug stackage) rest redirectWith status301 $ HaddockR (stackageSlug stackage) rest
mapM_ sanitize rest mapM_ sanitize rest
dirs <- getDirs -- (gzdir, rawdir) <- getHaddockDir ident dirs <- getDirs
master <- getYesod master <- getYesod
liftIO $ haddockUnpacker master False ident liftIO $ haddockUnpacker master False stackageEnt
let rawfp = dirRawFp dirs ident rest let ident = stackageIdent (entityVal stackageEnt)
rawfp = dirRawFp dirs ident rest
gzfp = dirGzFp dirs ident rest gzfp = dirGzFp dirs ident rest
mime = defaultMimeLookup $ fpToText $ filename rawfp mime = defaultMimeLookup $ fpToText $ filename rawfp
@ -124,19 +133,6 @@ tryContentsRedirect (Entity sid Stackage {..}) pkgver = do
dropDash :: Text -> Text dropDash :: Text -> Text
dropDash t = fromMaybe t $ stripSuffix "-" t dropDash t = fromMaybe t $ stripSuffix "-" t
getHaddockDir :: PackageSetIdent -> Handler (FilePath, FilePath)
getHaddockDir ident = do
master <- getYesod
return $ mkDirPair (haddockRootDir master) ident
mkDirPair :: FilePath -- ^ root
-> PackageSetIdent
-> (FilePath, FilePath) -- ^ compressed, uncompressed
mkDirPair root ident =
( root </> "idents-raw" </> fpFromText (toPathPiece ident)
, root </> "idents-gz" </> fpFromText (toPathPiece ident)
)
createCompressor createCompressor
:: Dirs :: Dirs
-> IO (IORef Text, IO ()) -- ^ action to kick off compressor again -> IO (IORef Text, IO ()) -- ^ action to kick off compressor again
@ -206,6 +202,7 @@ data Dirs = Dirs
{ dirRawRoot :: !FilePath { dirRawRoot :: !FilePath
, dirGzRoot :: !FilePath , dirGzRoot :: !FilePath
, dirCacheRoot :: !FilePath , dirCacheRoot :: !FilePath
, dirHoogleRoot :: !FilePath
} }
getDirs :: Handler Dirs getDirs :: Handler Dirs
@ -216,15 +213,18 @@ mkDirs dir = Dirs
{ dirRawRoot = dir </> "idents-raw" { dirRawRoot = dir </> "idents-raw"
, dirGzRoot = dir </> "idents-gz" , dirGzRoot = dir </> "idents-gz"
, dirCacheRoot = dir </> "cachedir" , dirCacheRoot = dir </> "cachedir"
, dirHoogleRoot = dir </> "hoogle"
} }
dirGzIdent, dirRawIdent :: Dirs -> PackageSetIdent -> FilePath dirGzIdent, dirRawIdent, dirHoogleIdent :: Dirs -> PackageSetIdent -> FilePath
dirGzIdent dirs ident = dirGzRoot dirs </> fpFromText (toPathPiece ident) dirGzIdent dirs ident = dirGzRoot dirs </> fpFromText (toPathPiece ident)
dirRawIdent dirs ident = dirRawRoot dirs </> fpFromText (toPathPiece ident) dirRawIdent dirs ident = dirRawRoot dirs </> fpFromText (toPathPiece ident)
dirHoogleIdent dirs ident = dirHoogleRoot dirs </> fpFromText (toPathPiece ident)
dirGzFp, dirRawFp :: Dirs -> PackageSetIdent -> [Text] -> FilePath dirGzFp, dirRawFp, dirHoogleFp :: Dirs -> PackageSetIdent -> [Text] -> FilePath
dirGzFp dirs ident rest = dirGzIdent dirs ident </> mconcat (map fpFromText rest) dirGzFp dirs ident rest = dirGzIdent dirs ident </> mconcat (map fpFromText rest)
dirRawFp dirs ident rest = dirRawIdent dirs ident </> mconcat (map fpFromText rest) dirRawFp dirs ident rest = dirRawIdent dirs ident </> mconcat (map fpFromText rest)
dirHoogleFp dirs ident rest = dirHoogleIdent dirs ident </> mconcat (map fpFromText rest)
dirCacheFp :: Dirs -> Digest SHA1 -> FilePath dirCacheFp :: Dirs -> Digest SHA1 -> FilePath
dirCacheFp dirs digest = dirCacheFp dirs digest =
@ -240,11 +240,13 @@ createHaddockUnpacker :: FilePath -- ^ haddock root
-> BlobStore StoreKey -> BlobStore StoreKey
-> (forall a m. (MonadIO m, MonadBaseControl IO m) -> (forall a m. (MonadIO m, MonadBaseControl IO m)
=> SqlPersistT m a -> m a) => SqlPersistT m a -> m a)
-> IO (IORef Text, ForceUnpack -> PackageSetIdent -> IO ()) -> IORef (Route App -> [(Text, Text)] -> Text)
createHaddockUnpacker root store runDB' = do -> IO (IORef Text, ForceUnpack -> Entity Stackage -> IO ())
createHaddockUnpacker root store runDB' urlRenderRef = do
createTree $ dirCacheRoot dirs createTree $ dirCacheRoot dirs
createTree $ dirRawRoot dirs createTree $ dirRawRoot dirs
createTree $ dirGzRoot dirs createTree $ dirGzRoot dirs
createTree $ dirHoogleRoot dirs
chan <- newChan chan <- newChan
(statusRef, compressor) <- createCompressor dirs (statusRef, compressor) <- createCompressor dirs
@ -253,7 +255,8 @@ createHaddockUnpacker root store runDB' = do
(forceUnpack, ident, res) <- readChan chan (forceUnpack, ident, res) <- readChan chan
try (restore $ go forceUnpack ident) >>= putMVar res try (restore $ go forceUnpack ident) >>= putMVar res
compressor compressor
return (statusRef, \forceUnpack ident -> do return (statusRef, \forceUnpack stackageEnt -> do
let ident = stackageIdent (entityVal stackageEnt)
shouldAct <- shouldAct <-
if forceUnpack if forceUnpack
then return True then return True
@ -261,7 +264,7 @@ createHaddockUnpacker root store runDB' = do
if shouldAct if shouldAct
then do then do
res <- newEmptyMVar res <- newEmptyMVar
writeChan chan (forceUnpack, ident, res) writeChan chan (forceUnpack, stackageEnt, res)
takeMVar res >>= either (throwM . asSomeException) return takeMVar res >>= either (throwM . asSomeException) return
else return ()) else return ())
where where
@ -274,12 +277,14 @@ createHaddockUnpacker root store runDB' = do
if e1 if e1
then return True then return True
else isDirectory $ dirRawIdent dirs ident else isDirectory $ dirRawIdent dirs ident
go forceUnpack ident = do go forceUnpack stackageEnt = do
let ident = stackageIdent (entityVal stackageEnt)
toRun <- toRun <-
if forceUnpack if forceUnpack
then do then do
removeTreeIfExists $ dirRawIdent dirs ident removeTreeIfExists $ dirRawIdent dirs ident
removeTreeIfExists $ dirGzIdent dirs ident removeTreeIfExists $ dirGzIdent dirs ident
removeTreeIfExists $ dirHoogleIdent dirs ident
return True return True
else not <$> doDirsExist ident else not <$> doDirsExist ident
when toRun $ do when toRun $ do
@ -289,8 +294,8 @@ createHaddockUnpacker root store runDB' = do
Nothing -> error "No haddocks exist for that snapshot" Nothing -> error "No haddocks exist for that snapshot"
Just src -> src $$ sinkHandle temph Just src -> src $$ sinkHandle temph
hClose temph hClose temph
createTree $ dirRawIdent dirs ident
let destdir = dirRawIdent dirs ident let destdir = dirRawIdent dirs ident
createTree destdir
(Nothing, Nothing, Nothing, ph) <- createProcess (Nothing, Nothing, Nothing, ph) <- createProcess
(proc "tar" ["xf", tempfp]) (proc "tar" ["xf", tempfp])
{ cwd = Just $ fpToString destdir { cwd = Just $ fpToString destdir
@ -298,6 +303,12 @@ createHaddockUnpacker root store runDB' = do
ec <- waitForProcess ph ec <- waitForProcess ph
if ec == ExitSuccess then return () else throwM ec if ec == ExitSuccess then return () else throwM ec
-- TODO: run hoogle and the database update in
-- concurrent threads.
urlRender <- readIORef urlRenderRef
createHoogleDb dirs stackageEnt destdir urlRender
-- Determine which packages have documentation and update the -- Determine which packages have documentation and update the
-- database appropriately -- database appropriately
runResourceT $ runDB' $ do runResourceT $ runDB' $ do
@ -307,12 +318,8 @@ createHaddockUnpacker root store runDB' = do
[PackageStackage ==. sid] [PackageStackage ==. sid]
[PackageHasHaddocks =. False] [PackageHasHaddocks =. False]
sourceDirectory destdir $$ mapM_C (\fp -> do sourceDirectory destdir $$ mapM_C (\fp -> do
let (name', version) = let mnv = nameAndVersionFromPath fp
T.breakOnEnd "-" forM_ mnv $ \(name, version) -> updateWhere
$ fpToText
$ filename fp
mname = stripSuffix "-" name'
forM_ mname $ \name -> updateWhere
[ PackageStackage ==. sid [ PackageStackage ==. sid
, PackageName' ==. PackageName name , PackageName' ==. PackageName name
, PackageVersion ==. Version version , PackageVersion ==. Version version
@ -379,3 +386,82 @@ getUploadDocMapR = do
putUploadDocMapR :: Handler Html putUploadDocMapR :: Handler Html
putUploadDocMapR = getUploadDocMapR putUploadDocMapR = getUploadDocMapR
createHoogleDb :: Dirs
-> Entity Stackage
-> FilePath
-> (Route App -> [(Text, Text)] -> Text)
-> IO ()
createHoogleDb dirs (Entity _ stackage) packagedir urlRender = do
let ident = stackageIdent stackage
hoogleDir = dirHoogleIdent dirs ident
createTree hoogleDir
-- Create hoogle binary databases for each package
runResourceT $ sourceDirectory packagedir $$ mapM_C (\fp ->
lift $ forM_ (nameAndVersionFromPath fp) $ \(name, version) -> do
src <- readFile (fp </> fpFromText name <.> "txt")
let -- Preprocess the haddock-generated manifest file.
src' = unlines $ haddockHacks (Just (unpack docsUrl)) $ lines src
docsUrl = urlRender (HaddockR (stackageSlug stackage) urlPieces) []
urlPieces = [name <> "-" <> version, "index.html"]
-- Compute the filepath of the resulting hoogle
-- database.
out = fpToString $ dirHoogleFp dirs ident [dirname]
dirname = fpToText $ filename fp <.> "hoo"
errs <- Hoogle.createDatabase "foo" Hoogle.Haskell [] src' out
-- TODO: handle these more gracefully?
putStrLn $ "Hoogle errors: " <> tshow errs
)
-- Merge the individual binary databases into one big database.
dbs <- listDirectory hoogleDir
let merged = hoogleDir </> "default.hoo"
Hoogle.mergeDatabase
(map fpToString (filter (/= merged) dbs))
(fpToString merged)
nameAndVersionFromPath :: FilePath -> Maybe (Text, Text)
nameAndVersionFromPath fp =
(\name -> (name, version)) <$> stripSuffix "-" name'
where
(name', version) = T.breakOnEnd "-" $ fpToText $ filename fp
---------------------------------------------------------------------
-- HADDOCK HACKS
-- (Copied from hoogle-4.2.36/src/Recipe/Haddock.hs)
-- Modifications:
-- 1) Some name qualification
-- 2) Explicit type sig due to polymorphic elem
-- 3) Fixed an unused binding warning
-- Eliminate @version
-- Change :*: to (:*:), Haddock bug
-- Change !!Int to !Int, Haddock bug
-- Change instance [overlap ok] to instance, Haddock bug
-- Change instance [incoherent] to instance, Haddock bug
-- Change instance [safe] to instance, Haddock bug
-- Change !Int to Int, HSE bug
-- Drop {-# UNPACK #-}, Haddock bug
-- Drop everything after where, Haddock bug
haddockHacks :: Maybe Hoogle.URL -> [String] -> [String]
haddockHacks loc src = maybe id haddockPackageUrl loc (translate src)
where
translate :: [String] -> [String]
translate = map (unwords . g . map f . words) . filter (not . isPrefixOf "@version ")
f "::" = "::"
f (':':xs) = "(:" ++ xs ++ ")"
f ('!':'!':x:xs) | isAlpha x = xs
f ('!':x:xs) | isAlpha x || x `elem` ("[(" :: String) = x:xs
f x | x `elem` ["[overlap","ok]","[incoherent]","[safe]"] = ""
f x | x `elem` ["{-#","UNPACK","#-}"] = ""
f x = x
g ("where":_) = []
g (x:xs) = x : g xs
g [] = []
haddockPackageUrl :: Hoogle.URL -> [String] -> [String]
haddockPackageUrl x = concatMap f
where f y | "@package " `isPrefixOf` y = ["@url " ++ x, y]
| otherwise = [y]

View File

@ -150,6 +150,7 @@ library
, blaze-html , blaze-html
, haddock-library , haddock-library
, yesod-gitrepo , yesod-gitrepo
, hoogle
executable stackage-server executable stackage-server
if flag(library-only) if flag(library-only)