Hoogle database generation

This commit is contained in:
Michael Snoyman 2015-05-14 21:21:42 +03:00
parent d627f63521
commit 54b69cb491
3 changed files with 163 additions and 73 deletions

View File

@ -14,7 +14,7 @@ import qualified Stackage.Database.Cron as Cron
getHoogleDB :: SnapName -> Handler (Maybe FilePath)
getHoogleDB name = do
app <- getYesod
liftIO $ Cron.getHoogleDB (httpManager app) name
liftIO $ Cron.getHoogleDB True (httpManager app) name
getHoogleR :: SnapName -> Handler Html
getHoogleR name = do

View File

@ -29,6 +29,7 @@ module Stackage.Database
, getSnapshotsForPackage
, getSnapshots
, currentSchema
, last5Lts5Nightly
) where
import Database.Sqlite (SqliteException)
@ -132,6 +133,8 @@ newtype StackageDatabase = StackageDatabase ConnectionPool
class MonadIO m => GetStackageDatabase m where
getStackageDatabase :: m StackageDatabase
instance MonadIO m => GetStackageDatabase (ReaderT StackageDatabase m) where
getStackageDatabase = ask
sourcePackages :: MonadResource m => FilePath -> Producer m Tar.Entry
sourcePackages root = do
@ -601,3 +604,12 @@ getSnapshots l o = run $ (,)
<*> fmap (map entityVal) (selectList
[]
[LimitTo l, OffsetBy o, Desc SnapshotCreated])
last5Lts5Nightly :: GetStackageDatabase m => m [SnapName]
last5Lts5Nightly = run $ do
ls <- selectList [] [Desc LtsMajor, Desc LtsMinor, LimitTo 5]
ns <- selectList [] [Desc NightlyDay, LimitTo 5]
return $ map l ls ++ map n ns
where
l (Entity _ x) = SNLts (ltsMajor x) (ltsMinor x)
n (Entity _ x) = SNNightly (nightlyDay x)

View File

@ -5,23 +5,31 @@ module Stackage.Database.Cron
) where
import ClassyPrelude.Conduit
import Stackage.PackageIndex.Conduit
import Database.Persist (Entity (Entity))
import Data.Char (isAlpha)
import qualified Codec.Archive.Tar as Tar
import Stackage.Database
import Network.HTTP.Client
import Network.HTTP.Client.Conduit (bodyReaderSource)
import Filesystem (rename)
import Filesystem (rename, removeTree, removeFile)
import Web.PathPieces (toPathPiece)
import Filesystem (isFile)
import Filesystem (isFile, createTree)
import Filesystem.Path (parent)
import Control.Monad.State.Strict (StateT, get, put)
import Network.HTTP.Types (status200)
import Data.Streaming.Network (bindPortTCP)
import Network.AWS (Credentials (Discover),
Region (NorthVirginia), getEnv,
send, sourceFileIO)
send, sourceFileIO, envManager )
import Network.AWS.S3 (ObjectCannedACL (PublicRead),
poACL,
putObject)
import Control.Lens (set)
import Control.Lens (set, view)
import qualified Data.Conduit.Binary as CB
import Data.Conduit.Zlib (WindowBits (WindowBits),
compress)
import qualified Hoogle
filename' :: Text
filename' = concat
@ -47,6 +55,7 @@ loadFromS3 = do
fptmp = fp <.> "tmp"
req <- parseUrl $ unpack url
let download man = withResponse req man $ \res -> do
createTree $ parent fptmp
runResourceT
$ bodyReaderSource (responseBody res)
$$ sinkFile fptmp
@ -70,8 +79,9 @@ hoogleUrl n = concat
, ".gz"
]
getHoogleDB :: Manager -> SnapName -> IO (Maybe FilePath)
getHoogleDB man name = do
getHoogleDB :: Bool -- ^ print exceptions?
-> Manager -> SnapName -> IO (Maybe FilePath)
getHoogleDB toPrint man name = do
let fp = fpFromText $ hoogleKey name
fptmp = fp <.> "tmp"
exists <- isFile fp
@ -82,18 +92,24 @@ getHoogleDB man name = do
let req = req' { checkStatus = \_ _ _ -> Nothing }
withResponse req man $ \res -> if responseStatus res == status200
then do
createTree $ parent fptmp
runResourceT $ bodyReaderSource (responseBody res)
$$ sinkFile fptmp
rename fptmp fp
return $ Just fp
else do
mapM brRead res >>= print
when toPrint $ mapM brRead res >>= print
return Nothing
stackageServerCron :: IO ()
stackageServerCron = do
-- Hacky approach instead of PID files
void $ catchIO (bindPortTCP 17834 "127.0.0.1") $ \_ ->
error $ "cabal loader process already running, exiting"
env <- getEnv NorthVirginia Discover
let upload fp key = do
let upload :: FilePath -> Text -> IO ()
upload fp key = do
let fpgz = fpToString $ fp <.> "gz"
runResourceT $ sourceFile fp
$$ compress 9 (WindowBits 31)
@ -102,81 +118,143 @@ stackageServerCron = do
let po =
set poACL (Just PublicRead)
$ putObject body "haddock.stackage.org" key
putStrLn $ "Uploading: " ++ key
eres <- runResourceT $ send env po
case eres of
Left e -> error $ show (fp, key, e)
Right _ -> return ()
Right _ -> putStrLn "Success"
let dbfp = fpFromText keyName
createStackageDatabase dbfp
upload dbfp keyName
_ <- return (upload, dbfp)
--createStackageDatabase dbfp
--upload dbfp keyName
{-
createStackageDatabase dbfile
(db, _) <- loadFromS3
names <- runReaderT last5Lts5Nightly db
let manager = view envManager env
forM_ names $ \name -> do
mfp <- getHoogleDB False manager name
case mfp of
Just _ -> putStrLn $ "Hoogle database exists for: " ++ toPathPiece name
Nothing -> do
mfp' <- createHoogleDB db manager name
forM_ mfp' $ \fp -> do
let key = hoogleKey name
upload fp key
let dest = fpFromText key
createTree $ parent dest
rename fp dest
import Data.Streaming.Network (bindPortTCP)
createHoogleDB :: StackageDatabase -> Manager -> SnapName -> IO (Maybe FilePath)
createHoogleDB db man name = handleAny (\e -> print e $> Nothing) $ do
req <- parseUrl $ unpack tarUrl
unlessM (isFile tarFP) $ withResponse req man $ \res -> do
let tmp = tarFP <.> "tmp"
createTree $ parent tmp
runResourceT $ bodyReaderSource (responseBody res)
$$ sinkFile tmp
rename tmp tarFP
data CabalLoaderEnv = CabalLoaderEnv
{ cleSettings :: !(AppConfig DefaultEnv Extra)
, cleManager :: !Manager
}
void $ tryIO $ removeTree bindir
void $ tryIO $ removeFile outname
createTree bindir
instance HasHackageRoot CabalLoaderEnv where
getHackageRoot = hackageRoot . appExtra . cleSettings
instance HasHttpManager CabalLoaderEnv where
getHttpManager = cleManager
dbs <- runResourceT
$ sourceTarFile True (fpToString tarFP)
$$ evalStateC 1 (mapMC (singleDB db name bindir))
=$ sinkList
cabalLoaderMain :: IO ()
cabalLoaderMain = do
-- Hacky approach instead of PID files
void $ catchIO (bindPortTCP 17834 "127.0.0.1") $ \_ ->
error $ "cabal loader process already running, exiting"
putStrLn "Merging databases..."
Hoogle.mergeDatabase (map fpToString $ catMaybes dbs) (fpToString outname)
putStrLn "Merge done"
error "cabalLoaderMain"
{- FIXME
conf <- fromArgs parseExtra
dbconf <- getDbConf conf
pool <- Database.Persist.createPoolConfig dbconf
manager <- newManager
bs <- loadBlobStore manager conf
hSetBuffering stdout LineBuffering
env <- getEnvironment
let forceUpdate = lookup "STACKAGE_FORCE_UPDATE" env == Just "1"
flip runLoggingT logFunc $ appLoadCabalFiles
True -- update database?
forceUpdate
CabalLoaderEnv
{ cleSettings = conf
, cleBlobStore = bs
, cleManager = manager
}
dbconf
pool
let foundation = App
{ settings = conf
, getStatic = error "getStatic"
, connPool = pool
, httpManager = manager
, persistConfig = dbconf
, appLogger = error "appLogger"
, genIO = error "genIO"
, blobStore = bs
, haddockRootDir = error "haddockRootDir"
, appDocUnpacker = error "appDocUnpacker"
, widgetCache = error "widgetCache"
, websiteContent = error "websiteContent"
}
createHoogleDatabases
bs
(flip (Database.Persist.runPool dbconf) pool)
putStrLn
(yesodRender foundation (appRoot conf))
return $ Just outname
where
logFunc loc src level str
| level > LevelDebug = S.hPutStr stdout $ fromLogStr $ defaultLogStr loc src level str
| otherwise = return ()
-}
root = "hoogle-gen"
bindir = root </> "bindir"
outname = root </> "output.hoo"
tarKey = toPathPiece name ++ "/hoogle/orig.tar"
tarUrl = "https://s3.amazonaws.com/haddock.stackage.org/" ++ tarKey
tarFP = root </> fpFromText tarKey
-}
singleDB :: StackageDatabase
-> SnapName
-> FilePath -- ^ bindir to write to
-> Tar.Entry
-> StateT Int (ResourceT IO) (Maybe FilePath)
singleDB db sname bindir e@(Tar.entryContent -> Tar.NormalFile lbs _) = do
idx <- get
put $! idx + 1
putStrLn $ "Loading file for Hoogle: " ++ pack (Tar.entryPath e)
let pkg = pack $ takeWhile (/= '.') $ Tar.entryPath e
msp <- flip runReaderT db $ do
Just (Entity sid _) <- lookupSnapshot sname
lookupSnapshotPackage sid pkg
case msp of
Nothing -> do
putStrLn $ "Unknown: " ++ pkg
return Nothing
Just (Entity _ sp) -> do
let ver = snapshotPackageVersion sp
pkgver = concat [pkg, "-", ver]
out = bindir </> fpFromString (show idx) <.> "hoo"
src' = unlines
$ haddockHacks (Just $ unpack docsUrl)
$ lines
$ unpack
$ decodeUtf8 lbs
docsUrl = concat
[ "https://www.stackage.org/haddock/"
, toPathPiece sname
, "/"
, pkgver
, "/index.html"
]
_errs <- liftIO $ Hoogle.createDatabase "" Hoogle.Haskell [] src' $ fpToString out
return $ Just out
singleDB _ _ _ _ = return Nothing
---------------------------------------------------------------------
-- HADDOCK HACKS
-- (Copied from hoogle-4.2.36/src/Recipe/Haddock.hs)
-- Modifications:
-- 1) Some name qualification
-- 2) Explicit type sig due to polymorphic elem
-- 3) Fixed an unused binding warning
-- Eliminate @version
-- Change :*: to (:*:), Haddock bug
-- Change !!Int to !Int, Haddock bug
-- Change instance [overlap ok] to instance, Haddock bug
-- Change instance [incoherent] to instance, Haddock bug
-- Change instance [safe] to instance, Haddock bug
-- Change !Int to Int, HSE bug
-- Drop {-# UNPACK #-}, Haddock bug
-- Drop everything after where, Haddock bug
haddockHacks :: Maybe Hoogle.URL -> [String] -> [String]
haddockHacks loc src = maybe id haddockPackageUrl loc (translate src)
where
translate :: [String] -> [String]
translate = map (unwords . g . map f . words) . filter (not . isPrefixOf "@version ")
f "::" = "::"
f (':':xs) = "(:" ++ xs ++ ")"
f ('!':'!':x:xs) | isAlpha x = xs
f ('!':x:xs) | isAlpha x || x `elem` ("[(" :: String) = x:xs
f x | x `elem` ["[overlap","ok]","[incoherent]","[safe]"] = ""
f x | x `elem` ["{-#","UNPACK","#-}"] = ""
f x = x
g ("where":_) = []
g (x:xs) = x : g xs
g [] = []
haddockPackageUrl :: Hoogle.URL -> [String] -> [String]
haddockPackageUrl x = concatMap f
where f y | "@package " `isPrefixOf` y = ["@url " ++ x, y]
| otherwise = [y]