Get Hoogle 5 working

This commit is contained in:
Michael Snoyman 2016-06-02 20:26:38 +03:00 committed by Chris Done
parent e54b3f80a6
commit c1e16d8e1a
4 changed files with 113 additions and 178 deletions

View File

@ -1,15 +1,16 @@
{-# LANGUAGE QuasiQuotes #-}
module Handler.Hoogle where module Handler.Hoogle where
import Control.DeepSeq (NFData(..)) import Control.DeepSeq (NFData(..))
import Control.DeepSeq.Generics (genericRnf) import Control.DeepSeq.Generics (genericRnf)
import Control.Spoon (spoon) import Data.Data (Data)
import Data.Data (Data (..))
import Data.Text.Read (decimal) import Data.Text.Read (decimal)
import qualified Hoogle import qualified Hoogle
import Import import Import
import Text.Blaze.Html (preEscapedToHtml) import Text.Blaze.Html (preEscapedToHtml)
import Stackage.Database import Stackage.Database
import qualified Stackage.Database.Cron as Cron import qualified Stackage.Database.Cron as Cron
import qualified Data.Text as T
getHoogleDB :: SnapName -> Handler (Maybe FilePath) getHoogleDB :: SnapName -> Handler (Maybe FilePath)
getHoogleDB name = do getHoogleDB name = do
@ -21,7 +22,7 @@ getHoogleR name = do
Entity _ snapshot <- lookupSnapshot name >>= maybe notFound return Entity _ snapshot <- lookupSnapshot name >>= maybe notFound return
mquery <- lookupGetParam "q" mquery <- lookupGetParam "q"
mpage <- lookupGetParam "page" mpage <- lookupGetParam "page"
exact <- maybe False (const True) <$> lookupGetParam "exact" exact <- isJust <$> lookupGetParam "exact" -- FIXME remove, Hoogle no longer supports
mresults' <- lookupGetParam "results" mresults' <- lookupGetParam "results"
let count' = let count' =
case decimal <$> mresults' of case decimal <$> mresults' of
@ -33,25 +34,30 @@ getHoogleR name = do
_ -> 1 _ -> 1
offset = (page - 1) * perPage offset = (page - 1) * perPage
mdatabasePath <- getHoogleDB name mdatabasePath <- getHoogleDB name
heDatabase <- case mdatabasePath of dbPath <- maybe (hoogleDatabaseNotAvailableFor name) return mdatabasePath
Just x -> return $ liftIO $ Hoogle.loadDatabase x
Nothing -> hoogleDatabaseNotAvailableFor name
-- Avoid concurrent Hoogle queries, see -- Avoid concurrent Hoogle queries, see
-- https://github.com/fpco/stackage-server/issues/172 -- https://github.com/fpco/stackage-server/issues/172
lock <- appHoogleLock <$> getYesod lock <- appHoogleLock <$> getYesod
mresults <- case mquery of HoogleQueryOutput results mtotalCount <-
Just query -> withMVar lock $ const $ runHoogleQuery heDatabase HoogleQueryInput case mquery of
Just query -> do
let input = HoogleQueryInput
{ hqiQueryInput = query { hqiQueryInput = query
, hqiExactSearch = if exact then Just query else Nothing
, hqiLimitTo = count' , hqiLimitTo = count'
, hqiOffsetBy = offset , hqiOffsetBy = offset
} }
Nothing -> return $ HoogleQueryOutput "" [] Nothing
liftIO $ withMVar lock
$ const
$ Hoogle.withDatabase dbPath
-- NB! I got a segfault when I didn't force with $!
$ \db -> return $! runHoogleQuery db input
Nothing -> return $ HoogleQueryOutput [] Nothing
let queryText = fromMaybe "" mquery let queryText = fromMaybe "" mquery
pageLink p = (SnapshotR name HoogleR pageLink p = (SnapshotR name HoogleR
, (if exact then (("exact", "true"):) else id) , (if exact then (("exact", "true"):) else id)
$ (maybe id (\q' -> (("q", q'):)) mquery) $ maybe id (\q' -> (("q", q'):)) mquery
[("page", tshow p)]) [("page", tshow p)])
snapshotLink = SnapshotR name StackageHomeR snapshotLink = SnapshotR name StackageHomeR
hoogleForm = $(widgetFile "hoogle-form") hoogleForm = $(widgetFile "hoogle-form")
@ -84,15 +90,14 @@ perPage = 10
data HoogleQueryInput = HoogleQueryInput data HoogleQueryInput = HoogleQueryInput
{ hqiQueryInput :: Text { hqiQueryInput :: Text
, hqiExactSearch :: Maybe Text
, hqiLimitTo :: Int , hqiLimitTo :: Int
, hqiOffsetBy :: Int , hqiOffsetBy :: Int
} }
deriving (Eq, Read, Show, Data, Typeable, Ord, Generic) deriving (Eq, Read, Show, Data, Typeable, Ord, Generic)
data HoogleQueryOutput = HoogleQueryBad Text data HoogleQueryOutput = HoogleQueryOutput [HoogleResult] (Maybe Int) -- ^ Int == total count
| HoogleQueryOutput Text [HoogleResult] (Maybe Int) -- ^ Text == HTML version of query, Int == total count deriving (Read, Typeable, Data, Show, Eq, Generic)
deriving (Read, Typeable, Data, Show, Eq) instance NFData HoogleQueryOutput
data HoogleResult = HoogleResult data HoogleResult = HoogleResult
{ hrURL :: String { hrURL :: String
@ -118,56 +123,33 @@ instance NFData HoogleResult where rnf = genericRnf
instance NFData PackageLink where rnf = genericRnf instance NFData PackageLink where rnf = genericRnf
instance NFData ModuleLink where rnf = genericRnf instance NFData ModuleLink where rnf = genericRnf
runHoogleQuery :: Monad m runHoogleQuery :: Hoogle.Database -> HoogleQueryInput -> HoogleQueryOutput
=> m Hoogle.Database runHoogleQuery hoogledb HoogleQueryInput {..} =
-> HoogleQueryInput HoogleQueryOutput targets mcount
-> m HoogleQueryOutput
runHoogleQuery heDatabase HoogleQueryInput {..} =
runQuery $ Hoogle.parseQuery Hoogle.Haskell query
where where
allTargets = Hoogle.searchDatabase hoogledb query
targets = take (min 100 hqiLimitTo)
$ drop hqiOffsetBy
$ map fixResult allTargets
query = unpack hqiQueryInput query = unpack hqiQueryInput
runQuery (Left err) = return $ HoogleQueryBad (tshow err) mcount = limitedLength 0 allTargets
runQuery (Right query') = do
hoogledb <- heDatabase
let query'' = Hoogle.queryExact classifier query'
rawRes = concatMap fixResult
$ Hoogle.search hoogledb query''
mres = spoon
$ take (min 100 hqiLimitTo)
$ drop hqiOffsetBy rawRes
mcount = spoon $ limitedLength 0 rawRes
limitedLength x [] = Just x limitedLength x [] = Just x
limitedLength x (_:rest) limitedLength x (_:rest)
| x >= 20 = Nothing | x >= 20 = Nothing
| otherwise = limitedLength (x + 1) rest | otherwise = limitedLength (x + 1) rest
rendered = pack $ Hoogle.showTagHTML $ Hoogle.renderQuery query''
return $ case (,) <$> mres <*> mcount of
Nothing ->
HoogleQueryOutput rendered [] (Just 0)
Just (results, mcount') ->
HoogleQueryOutput rendered (take hqiLimitTo results) mcount'
classifier = maybe Nothing fixResult Hoogle.Target {..} = HoogleResult
(const (Just Hoogle.UnclassifiedItem)) { hrURL = targetURL
hqiExactSearch , hrSources = toList $ do
(pname, purl) <- targetPackage
fixResult (_, Hoogle.Result locs self docs) = do (mname, murl) <- targetModule
(loc, _) <- take 1 locs let p = PackageLink pname purl
let sources' = unionsWith (++) $ m = ModuleLink mname murl
mapMaybe (getPkgModPair . snd) locs Just (p, [m])
return HoogleResult , hrTitle = -- FIXME find out why these replaces are necessary
{ hrURL = loc unpack $ T.replace "<0>" "" $ T.replace "</0>" "" $ pack
, hrSources = mapToList sources' targetItem
, hrTitle = Hoogle.showTagHTML self , hrBody = targetDocs
, hrBody = fromMaybe "Problem loading documentation" $
spoon $ Hoogle.showTagText docs
} }
getPkgModPair :: [(String, String)]
-> Maybe (Map PackageLink [ModuleLink])
getPkgModPair [(pkg, pkgname), (modu, moduname)] = do
let pkg' = PackageLink pkgname pkg
modu' = ModuleLink moduname modu
return $ asMap $ singletonMap pkg' [modu']
getPkgModPair _ = Nothing

View File

@ -7,16 +7,13 @@ module Stackage.Database.Cron
import ClassyPrelude.Conduit import ClassyPrelude.Conduit
import Stackage.PackageIndex.Conduit import Stackage.PackageIndex.Conduit
import Database.Persist (Entity (Entity)) import Database.Persist (Entity (Entity))
import Data.Char (isAlpha)
import qualified Codec.Archive.Tar as Tar import qualified Codec.Archive.Tar as Tar
import Stackage.Database import Stackage.Database
import Network.HTTP.Client import Network.HTTP.Client
import Network.HTTP.Client.Conduit (bodyReaderSource) import Network.HTTP.Client.Conduit (bodyReaderSource)
import Filesystem (rename, removeTree, removeFile) import Filesystem (rename, removeTree, removeFile, isFile, createTree)
import Web.PathPieces (toPathPiece) import Web.PathPieces (toPathPiece)
import Filesystem (isFile, createTree)
import Filesystem.Path.CurrentOS (parent, fromText, encodeString) import Filesystem.Path.CurrentOS (parent, fromText, encodeString)
import Control.Monad.State.Strict (StateT, get, put)
import Network.HTTP.Types (status200) import Network.HTTP.Types (status200)
import Data.Streaming.Network (bindPortTCP) import Data.Streaming.Network (bindPortTCP)
import Network.AWS (Credentials (Discover), import Network.AWS (Credentials (Discover),
@ -35,6 +32,7 @@ import Data.Conduit.Zlib (WindowBits (WindowBits),
compress, ungzip) compress, ungzip)
import qualified Hoogle import qualified Hoogle
import System.Directory (doesFileExist) import System.Directory (doesFileExist)
import System.IO.Temp (withSystemTempDirectory)
filename' :: Text filename' :: Text
filename' = concat filename' = concat
@ -208,6 +206,7 @@ stackageServerCron = do
createHoogleDB :: StackageDatabase -> Manager -> SnapName -> IO (Maybe FilePath) createHoogleDB :: StackageDatabase -> Manager -> SnapName -> IO (Maybe FilePath)
createHoogleDB db man name = handleAny (\e -> print e $> Nothing) $ do createHoogleDB db man name = handleAny (\e -> print e $> Nothing) $ do
putStrLn $ "Creating Hoogle DB for " ++ toPathPiece name
req' <- parseUrl $ unpack tarUrl req' <- parseUrl $ unpack tarUrl
let req = req' { decompress = const True } let req = req' { decompress = const True }
@ -222,13 +221,24 @@ createHoogleDB db man name = handleAny (\e -> print e $> Nothing) $ do
void $ tryIO $ removeFile (fromString outname) void $ tryIO $ removeFile (fromString outname)
createTree (fromString bindir) createTree (fromString bindir)
dbs <- runResourceT withSystemTempDirectory ("hoogle-" ++ unpack (toPathPiece name)) $ \_tmpdir -> do
let tmpdir = "/Users/michael/Desktop/hoo"
runResourceT
$ sourceTarFile False tarFP $ sourceTarFile False tarFP
$$ evalStateC 1 (mapMC (singleDB db name bindir)) $$ mapM_C (liftIO . singleDB db name tmpdir)
=$ sinkList
let args =
[ "generate"
, "--database=" ++ outname
, "--local=" ++ tmpdir
]
putStrLn $ concat
[ "Merging databases... ("
, tshow args
, ")"
]
Hoogle.hoogle args
putStrLn "Merging databases..."
Hoogle.mergeDatabase (catMaybes dbs) outname
putStrLn "Merge done" putStrLn "Merge done"
return $ Just outname return $ Just outname
@ -243,81 +253,29 @@ createHoogleDB db man name = handleAny (\e -> print e $> Nothing) $ do
singleDB :: StackageDatabase singleDB :: StackageDatabase
-> SnapName -> SnapName
-> FilePath -- ^ bindir to write to -> FilePath -- ^ temp directory to write .txt files to
-> Tar.Entry -> Tar.Entry
-> StateT Int (ResourceT IO) (Maybe FilePath) -> IO ()
singleDB db sname bindir e@(Tar.entryContent -> Tar.NormalFile lbs _) = do singleDB db sname tmpdir e@(Tar.entryContent -> Tar.NormalFile lbs _) = do
idx <- get --putStrLn $ "Loading file for Hoogle: " ++ pack (Tar.entryPath e)
put $! idx + 1
putStrLn $ "Loading file for Hoogle: " ++ pack (Tar.entryPath e)
let pkg = pack $ takeWhile (/= '.') $ Tar.entryPath e let pkg = pack $ takeWhile (/= '.') $ Tar.entryPath e
msp <- flip runReaderT db $ do msp <- flip runReaderT db $ do
Just (Entity sid _) <- lookupSnapshot sname Just (Entity sid _) <- lookupSnapshot sname
lookupSnapshotPackage sid pkg lookupSnapshotPackage sid pkg
case msp of case msp of
Nothing -> do Nothing -> putStrLn $ "Unknown: " ++ pkg
putStrLn $ "Unknown: " ++ pkg Just _ -> do
return Nothing let out = tmpdir </> unpack pkg <.> "txt"
Just (Entity _ sp) -> do -- FIXME add @url directive
let ver = snapshotPackageVersion sp writeFile out lbs
pkgver = concat [pkg, "-", ver] {-
out = bindir </> show idx <.> "hoo"
src' = unlines
$ haddockHacks (Just $ unpack docsUrl)
$ lines
$ unpack
$ decodeUtf8 lbs
docsUrl = concat docsUrl = concat
[ "https://www.stackage.org/haddock/" [ "https://www.stackage.org/haddock/"
, toPathPiece sname , toPathPiece sname
, "/" , "/"
, pkgver , pkgver
, "/index.html" , "/index.html"
] ] -}
_errs <- liftIO $ Hoogle.createDatabase "" Hoogle.Haskell [] src' out singleDB _ _ _ _ = return ()
return $ Just out
singleDB _ _ _ _ = return Nothing
---------------------------------------------------------------------
-- HADDOCK HACKS
-- (Copied from hoogle-4.2.36/src/Recipe/Haddock.hs)
-- Modifications:
-- 1) Some name qualification
-- 2) Explicit type sig due to polymorphic elem
-- 3) Fixed an unused binding warning
-- Eliminate @version
-- Change :*: to (:*:), Haddock bug
-- Change !!Int to !Int, Haddock bug
-- Change instance [overlap ok] to instance, Haddock bug
-- Change instance [incoherent] to instance, Haddock bug
-- Change instance [safe] to instance, Haddock bug
-- Change !Int to Int, HSE bug
-- Drop {-# UNPACK #-}, Haddock bug
-- Drop everything after where, Haddock bug
haddockHacks :: Maybe Hoogle.URL -> [String] -> [String]
haddockHacks loc src = maybe id haddockPackageUrl loc (translate src)
where
translate :: [String] -> [String]
translate = map (unwords . g . map f . words) . filter (not . isPrefixOf "@version ")
f "::" = "::"
f (':':xs) = "(:" ++ xs ++ ")"
f ('!':'!':x:xs) | isAlpha x = xs
f ('!':x:xs) | isAlpha x || x `elem` ("[(" :: String) = x:xs
f x | x `elem` ["[overlap","ok]","[incoherent]","[safe]"] = ""
f x | x `elem` ["{-#","UNPACK","#-}"] = ""
f x = x
g ("where":_) = []
g (x:xs) = x : g xs
g [] = []
haddockPackageUrl :: Hoogle.URL -> [String] -> [String]
haddockPackageUrl x = concatMap f
where f y | "@package " `isPrefixOf` y = ["@url " ++ x, y]
| otherwise = [y]

View File

@ -3,7 +3,7 @@ packages:
- . - .
- location: - location:
git: https://github.com/ndmitchell/hoogle.git git: https://github.com/ndmitchell/hoogle.git
commit: 779e04ed20a556bbb92789815ea60068fe188891 commit: ca42c4ce3af1c1ae7d413de242063ca1f682d3ff
extra-dep: true extra-dep: true
image: image:
container: container:

View File

@ -3,11 +3,6 @@
<h1>Hoogle Search <h1>Hoogle Search
<p>Within <a href=@{snapshotLink}>#{snapshotTitle snapshot}</a> <p>Within <a href=@{snapshotLink}>#{snapshotTitle snapshot}</a>
^{hoogleForm} ^{hoogleForm}
$case mresults
$of HoogleQueryBad err
<p>#{err}
<p>For information on what queries should look like, see the <a href="http://www.haskell.org/haskellwiki/Hoogle">hoogle user manual</a>.
$of HoogleQueryOutput _query results mtotalCount
$if null results $if null results
<p>Your search produced no results. <p>Your search produced no results.
$else $else