Get Hoogle 5 working

This commit is contained in:
Michael Snoyman 2016-06-02 20:26:38 +03:00 committed by Chris Done
parent e54b3f80a6
commit c1e16d8e1a
4 changed files with 113 additions and 178 deletions

View File

@ -1,15 +1,16 @@
{-# LANGUAGE QuasiQuotes #-}
module Handler.Hoogle where module Handler.Hoogle where
import Control.DeepSeq (NFData(..)) import Control.DeepSeq (NFData(..))
import Control.DeepSeq.Generics (genericRnf) import Control.DeepSeq.Generics (genericRnf)
import Control.Spoon (spoon) import Data.Data (Data)
import Data.Data (Data (..))
import Data.Text.Read (decimal) import Data.Text.Read (decimal)
import qualified Hoogle import qualified Hoogle
import Import import Import
import Text.Blaze.Html (preEscapedToHtml) import Text.Blaze.Html (preEscapedToHtml)
import Stackage.Database import Stackage.Database
import qualified Stackage.Database.Cron as Cron import qualified Stackage.Database.Cron as Cron
import qualified Data.Text as T
getHoogleDB :: SnapName -> Handler (Maybe FilePath) getHoogleDB :: SnapName -> Handler (Maybe FilePath)
getHoogleDB name = do getHoogleDB name = do
@ -21,7 +22,7 @@ getHoogleR name = do
Entity _ snapshot <- lookupSnapshot name >>= maybe notFound return Entity _ snapshot <- lookupSnapshot name >>= maybe notFound return
mquery <- lookupGetParam "q" mquery <- lookupGetParam "q"
mpage <- lookupGetParam "page" mpage <- lookupGetParam "page"
exact <- maybe False (const True) <$> lookupGetParam "exact" exact <- isJust <$> lookupGetParam "exact" -- FIXME remove, Hoogle no longer supports
mresults' <- lookupGetParam "results" mresults' <- lookupGetParam "results"
let count' = let count' =
case decimal <$> mresults' of case decimal <$> mresults' of
@ -33,25 +34,30 @@ getHoogleR name = do
_ -> 1 _ -> 1
offset = (page - 1) * perPage offset = (page - 1) * perPage
mdatabasePath <- getHoogleDB name mdatabasePath <- getHoogleDB name
heDatabase <- case mdatabasePath of dbPath <- maybe (hoogleDatabaseNotAvailableFor name) return mdatabasePath
Just x -> return $ liftIO $ Hoogle.loadDatabase x
Nothing -> hoogleDatabaseNotAvailableFor name
-- Avoid concurrent Hoogle queries, see -- Avoid concurrent Hoogle queries, see
-- https://github.com/fpco/stackage-server/issues/172 -- https://github.com/fpco/stackage-server/issues/172
lock <- appHoogleLock <$> getYesod lock <- appHoogleLock <$> getYesod
mresults <- case mquery of HoogleQueryOutput results mtotalCount <-
Just query -> withMVar lock $ const $ runHoogleQuery heDatabase HoogleQueryInput case mquery of
{ hqiQueryInput = query Just query -> do
, hqiExactSearch = if exact then Just query else Nothing let input = HoogleQueryInput
, hqiLimitTo = count' { hqiQueryInput = query
, hqiOffsetBy = offset , hqiLimitTo = count'
} , hqiOffsetBy = offset
Nothing -> return $ HoogleQueryOutput "" [] Nothing }
liftIO $ withMVar lock
$ const
$ Hoogle.withDatabase dbPath
-- NB! I got a segfault when I didn't force with $!
$ \db -> return $! runHoogleQuery db input
Nothing -> return $ HoogleQueryOutput [] Nothing
let queryText = fromMaybe "" mquery let queryText = fromMaybe "" mquery
pageLink p = (SnapshotR name HoogleR pageLink p = (SnapshotR name HoogleR
, (if exact then (("exact", "true"):) else id) , (if exact then (("exact", "true"):) else id)
$ (maybe id (\q' -> (("q", q'):)) mquery) $ maybe id (\q' -> (("q", q'):)) mquery
[("page", tshow p)]) [("page", tshow p)])
snapshotLink = SnapshotR name StackageHomeR snapshotLink = SnapshotR name StackageHomeR
hoogleForm = $(widgetFile "hoogle-form") hoogleForm = $(widgetFile "hoogle-form")
@ -84,15 +90,14 @@ perPage = 10
data HoogleQueryInput = HoogleQueryInput data HoogleQueryInput = HoogleQueryInput
{ hqiQueryInput :: Text { hqiQueryInput :: Text
, hqiExactSearch :: Maybe Text
, hqiLimitTo :: Int , hqiLimitTo :: Int
, hqiOffsetBy :: Int , hqiOffsetBy :: Int
} }
deriving (Eq, Read, Show, Data, Typeable, Ord, Generic) deriving (Eq, Read, Show, Data, Typeable, Ord, Generic)
data HoogleQueryOutput = HoogleQueryBad Text data HoogleQueryOutput = HoogleQueryOutput [HoogleResult] (Maybe Int) -- ^ Int == total count
| HoogleQueryOutput Text [HoogleResult] (Maybe Int) -- ^ Text == HTML version of query, Int == total count deriving (Read, Typeable, Data, Show, Eq, Generic)
deriving (Read, Typeable, Data, Show, Eq) instance NFData HoogleQueryOutput
data HoogleResult = HoogleResult data HoogleResult = HoogleResult
{ hrURL :: String { hrURL :: String
@ -118,56 +123,33 @@ instance NFData HoogleResult where rnf = genericRnf
instance NFData PackageLink where rnf = genericRnf instance NFData PackageLink where rnf = genericRnf
instance NFData ModuleLink where rnf = genericRnf instance NFData ModuleLink where rnf = genericRnf
runHoogleQuery :: Monad m runHoogleQuery :: Hoogle.Database -> HoogleQueryInput -> HoogleQueryOutput
=> m Hoogle.Database runHoogleQuery hoogledb HoogleQueryInput {..} =
-> HoogleQueryInput HoogleQueryOutput targets mcount
-> m HoogleQueryOutput
runHoogleQuery heDatabase HoogleQueryInput {..} =
runQuery $ Hoogle.parseQuery Hoogle.Haskell query
where where
allTargets = Hoogle.searchDatabase hoogledb query
targets = take (min 100 hqiLimitTo)
$ drop hqiOffsetBy
$ map fixResult allTargets
query = unpack hqiQueryInput query = unpack hqiQueryInput
runQuery (Left err) = return $ HoogleQueryBad (tshow err) mcount = limitedLength 0 allTargets
runQuery (Right query') = do
hoogledb <- heDatabase
let query'' = Hoogle.queryExact classifier query'
rawRes = concatMap fixResult
$ Hoogle.search hoogledb query''
mres = spoon
$ take (min 100 hqiLimitTo)
$ drop hqiOffsetBy rawRes
mcount = spoon $ limitedLength 0 rawRes
limitedLength x [] = Just x
limitedLength x (_:rest)
| x >= 20 = Nothing
| otherwise = limitedLength (x + 1) rest
rendered = pack $ Hoogle.showTagHTML $ Hoogle.renderQuery query''
return $ case (,) <$> mres <*> mcount of
Nothing ->
HoogleQueryOutput rendered [] (Just 0)
Just (results, mcount') ->
HoogleQueryOutput rendered (take hqiLimitTo results) mcount'
classifier = maybe Nothing limitedLength x [] = Just x
(const (Just Hoogle.UnclassifiedItem)) limitedLength x (_:rest)
hqiExactSearch | x >= 20 = Nothing
| otherwise = limitedLength (x + 1) rest
fixResult (_, Hoogle.Result locs self docs) = do fixResult Hoogle.Target {..} = HoogleResult
(loc, _) <- take 1 locs { hrURL = targetURL
let sources' = unionsWith (++) $ , hrSources = toList $ do
mapMaybe (getPkgModPair . snd) locs (pname, purl) <- targetPackage
return HoogleResult (mname, murl) <- targetModule
{ hrURL = loc let p = PackageLink pname purl
, hrSources = mapToList sources' m = ModuleLink mname murl
, hrTitle = Hoogle.showTagHTML self Just (p, [m])
, hrBody = fromMaybe "Problem loading documentation" $ , hrTitle = -- FIXME find out why these replaces are necessary
spoon $ Hoogle.showTagText docs unpack $ T.replace "<0>" "" $ T.replace "</0>" "" $ pack
} targetItem
, hrBody = targetDocs
getPkgModPair :: [(String, String)] }
-> Maybe (Map PackageLink [ModuleLink])
getPkgModPair [(pkg, pkgname), (modu, moduname)] = do
let pkg' = PackageLink pkgname pkg
modu' = ModuleLink moduname modu
return $ asMap $ singletonMap pkg' [modu']
getPkgModPair _ = Nothing

View File

@ -7,16 +7,13 @@ module Stackage.Database.Cron
import ClassyPrelude.Conduit import ClassyPrelude.Conduit
import Stackage.PackageIndex.Conduit import Stackage.PackageIndex.Conduit
import Database.Persist (Entity (Entity)) import Database.Persist (Entity (Entity))
import Data.Char (isAlpha)
import qualified Codec.Archive.Tar as Tar import qualified Codec.Archive.Tar as Tar
import Stackage.Database import Stackage.Database
import Network.HTTP.Client import Network.HTTP.Client
import Network.HTTP.Client.Conduit (bodyReaderSource) import Network.HTTP.Client.Conduit (bodyReaderSource)
import Filesystem (rename, removeTree, removeFile) import Filesystem (rename, removeTree, removeFile, isFile, createTree)
import Web.PathPieces (toPathPiece) import Web.PathPieces (toPathPiece)
import Filesystem (isFile, createTree)
import Filesystem.Path.CurrentOS (parent, fromText, encodeString) import Filesystem.Path.CurrentOS (parent, fromText, encodeString)
import Control.Monad.State.Strict (StateT, get, put)
import Network.HTTP.Types (status200) import Network.HTTP.Types (status200)
import Data.Streaming.Network (bindPortTCP) import Data.Streaming.Network (bindPortTCP)
import Network.AWS (Credentials (Discover), import Network.AWS (Credentials (Discover),
@ -35,6 +32,7 @@ import Data.Conduit.Zlib (WindowBits (WindowBits),
compress, ungzip) compress, ungzip)
import qualified Hoogle import qualified Hoogle
import System.Directory (doesFileExist) import System.Directory (doesFileExist)
import System.IO.Temp (withSystemTempDirectory)
filename' :: Text filename' :: Text
filename' = concat filename' = concat
@ -208,6 +206,7 @@ stackageServerCron = do
createHoogleDB :: StackageDatabase -> Manager -> SnapName -> IO (Maybe FilePath) createHoogleDB :: StackageDatabase -> Manager -> SnapName -> IO (Maybe FilePath)
createHoogleDB db man name = handleAny (\e -> print e $> Nothing) $ do createHoogleDB db man name = handleAny (\e -> print e $> Nothing) $ do
putStrLn $ "Creating Hoogle DB for " ++ toPathPiece name
req' <- parseUrl $ unpack tarUrl req' <- parseUrl $ unpack tarUrl
let req = req' { decompress = const True } let req = req' { decompress = const True }
@ -222,16 +221,27 @@ createHoogleDB db man name = handleAny (\e -> print e $> Nothing) $ do
void $ tryIO $ removeFile (fromString outname) void $ tryIO $ removeFile (fromString outname)
createTree (fromString bindir) createTree (fromString bindir)
dbs <- runResourceT withSystemTempDirectory ("hoogle-" ++ unpack (toPathPiece name)) $ \_tmpdir -> do
$ sourceTarFile False tarFP let tmpdir = "/Users/michael/Desktop/hoo"
$$ evalStateC 1 (mapMC (singleDB db name bindir)) runResourceT
=$ sinkList $ sourceTarFile False tarFP
$$ mapM_C (liftIO . singleDB db name tmpdir)
putStrLn "Merging databases..." let args =
Hoogle.mergeDatabase (catMaybes dbs) outname [ "generate"
putStrLn "Merge done" , "--database=" ++ outname
, "--local=" ++ tmpdir
]
putStrLn $ concat
[ "Merging databases... ("
, tshow args
, ")"
]
Hoogle.hoogle args
return $ Just outname putStrLn "Merge done"
return $ Just outname
where where
root = "hoogle-gen" root = "hoogle-gen"
bindir = root </> "bindir" bindir = root </> "bindir"
@ -243,81 +253,29 @@ createHoogleDB db man name = handleAny (\e -> print e $> Nothing) $ do
singleDB :: StackageDatabase singleDB :: StackageDatabase
-> SnapName -> SnapName
-> FilePath -- ^ bindir to write to -> FilePath -- ^ temp directory to write .txt files to
-> Tar.Entry -> Tar.Entry
-> StateT Int (ResourceT IO) (Maybe FilePath) -> IO ()
singleDB db sname bindir e@(Tar.entryContent -> Tar.NormalFile lbs _) = do singleDB db sname tmpdir e@(Tar.entryContent -> Tar.NormalFile lbs _) = do
idx <- get --putStrLn $ "Loading file for Hoogle: " ++ pack (Tar.entryPath e)
put $! idx + 1
putStrLn $ "Loading file for Hoogle: " ++ pack (Tar.entryPath e)
let pkg = pack $ takeWhile (/= '.') $ Tar.entryPath e let pkg = pack $ takeWhile (/= '.') $ Tar.entryPath e
msp <- flip runReaderT db $ do msp <- flip runReaderT db $ do
Just (Entity sid _) <- lookupSnapshot sname Just (Entity sid _) <- lookupSnapshot sname
lookupSnapshotPackage sid pkg lookupSnapshotPackage sid pkg
case msp of case msp of
Nothing -> do Nothing -> putStrLn $ "Unknown: " ++ pkg
putStrLn $ "Unknown: " ++ pkg Just _ -> do
return Nothing let out = tmpdir </> unpack pkg <.> "txt"
Just (Entity _ sp) -> do -- FIXME add @url directive
let ver = snapshotPackageVersion sp writeFile out lbs
pkgver = concat [pkg, "-", ver] {-
out = bindir </> show idx <.> "hoo"
src' = unlines
$ haddockHacks (Just $ unpack docsUrl)
$ lines
$ unpack
$ decodeUtf8 lbs
docsUrl = concat docsUrl = concat
[ "https://www.stackage.org/haddock/" [ "https://www.stackage.org/haddock/"
, toPathPiece sname , toPathPiece sname
, "/" , "/"
, pkgver , pkgver
, "/index.html" , "/index.html"
] ] -}
_errs <- liftIO $ Hoogle.createDatabase "" Hoogle.Haskell [] src' out singleDB _ _ _ _ = return ()
return $ Just out
singleDB _ _ _ _ = return Nothing
---------------------------------------------------------------------
-- HADDOCK HACKS
-- (Copied from hoogle-4.2.36/src/Recipe/Haddock.hs)
-- Modifications:
-- 1) Some name qualification
-- 2) Explicit type sig due to polymorphic elem
-- 3) Fixed an unused binding warning
-- Eliminate @version
-- Change :*: to (:*:), Haddock bug
-- Change !!Int to !Int, Haddock bug
-- Change instance [overlap ok] to instance, Haddock bug
-- Change instance [incoherent] to instance, Haddock bug
-- Change instance [safe] to instance, Haddock bug
-- Change !Int to Int, HSE bug
-- Drop {-# UNPACK #-}, Haddock bug
-- Drop everything after where, Haddock bug
haddockHacks :: Maybe Hoogle.URL -> [String] -> [String]
haddockHacks loc src = maybe id haddockPackageUrl loc (translate src)
where
translate :: [String] -> [String]
translate = map (unwords . g . map f . words) . filter (not . isPrefixOf "@version ")
f "::" = "::"
f (':':xs) = "(:" ++ xs ++ ")"
f ('!':'!':x:xs) | isAlpha x = xs
f ('!':x:xs) | isAlpha x || x `elem` ("[(" :: String) = x:xs
f x | x `elem` ["[overlap","ok]","[incoherent]","[safe]"] = ""
f x | x `elem` ["{-#","UNPACK","#-}"] = ""
f x = x
g ("where":_) = []
g (x:xs) = x : g xs
g [] = []
haddockPackageUrl :: Hoogle.URL -> [String] -> [String]
haddockPackageUrl x = concatMap f
where f y | "@package " `isPrefixOf` y = ["@url " ++ x, y]
| otherwise = [y]

View File

@ -3,7 +3,7 @@ packages:
- . - .
- location: - location:
git: https://github.com/ndmitchell/hoogle.git git: https://github.com/ndmitchell/hoogle.git
commit: 779e04ed20a556bbb92789815ea60068fe188891 commit: ca42c4ce3af1c1ae7d413de242063ca1f682d3ff
extra-dep: true extra-dep: true
image: image:
container: container:

View File

@ -3,37 +3,32 @@
<h1>Hoogle Search <h1>Hoogle Search
<p>Within <a href=@{snapshotLink}>#{snapshotTitle snapshot}</a> <p>Within <a href=@{snapshotLink}>#{snapshotTitle snapshot}</a>
^{hoogleForm} ^{hoogleForm}
$case mresults $if null results
$of HoogleQueryBad err <p>Your search produced no results.
<p>#{err} $else
<p>For information on what queries should look like, see the <a href="http://www.haskell.org/haskellwiki/Hoogle">hoogle user manual</a>. <ol .search-results>
$of HoogleQueryOutput _query results mtotalCount $forall HoogleResult url sources self docs <- results
$if null results <li>
<p>Your search produced no results. <p .self>
$else <a href=#{url}>#{preEscapedToHtml self}
<ol .search-results> <table .sources>
$forall HoogleResult url sources self docs <- results $forall (pkg, modus) <- sources
<li> <tr>
<p .self> <th>
<a href=#{url}>#{preEscapedToHtml self} <a href=#{plURL pkg}>#{plName pkg}
<table .sources> <td>
$forall (pkg, modus) <- sources $forall ModuleLink name url' <- modus
<tr> <a href=#{url'}>#{name}
<th> $if null docs
<a href=#{plURL pkg}>#{plName pkg} <p .nodocs>No documentation available.
<td> $else
$forall ModuleLink name url' <- modus <p .docs>#{docs}
<a href=#{url'}>#{name} <p .pagination>
$if null docs $with mpageCount <- fmap getPageCount mtotalCount
<p .nodocs>No documentation available. Page #{page} of #{maybe "many" show mpageCount} #
$else $if page > 1
<p .docs>#{docs} |
<p .pagination> <a href=@?{pageLink $ page - 1}>Previous
$with mpageCount <- fmap getPageCount mtotalCount $if maybe True ((<) page) mpageCount
Page #{page} of #{maybe "many" show mpageCount} # |
$if page > 1 <a href=@?{pageLink $ page + 1}>Next
|
<a href=@?{pageLink $ page - 1}>Previous
$if maybe True ((<) page) mpageCount
|
<a href=@?{pageLink $ page + 1}>Next