WIP: Planned fixes for the Haddock unpacking

This commit is contained in:
Michael Snoyman 2014-10-23 20:55:15 +03:00
parent 9c5c2aef2d
commit 8dc6c65b9d
4 changed files with 82 additions and 25 deletions

View File

@ -136,7 +136,7 @@ makeFoundation useEcho conf = do
<*> pure Nothing <*> pure Nothing
return $ cachedS3Store root creds bucket prefix manager return $ cachedS3Store root creds bucket prefix manager
let haddockRootDir' = "/tmp/stackage-server-haddocks" let haddockRootDir' = "/tmp/stackage-server-haddocks2"
unpacker <- createHaddockUnpacker haddockRootDir' blobStore' unpacker <- createHaddockUnpacker haddockRootDir' blobStore'
widgetCache' <- newIORef mempty widgetCache' <- newIORef mempty

View File

@ -36,7 +36,7 @@ data App = App
, progressMap :: !(IORef (IntMap Progress)) , progressMap :: !(IORef (IntMap Progress))
, nextProgressKey :: !(IORef Int) , nextProgressKey :: !(IORef Int)
, haddockRootDir :: !FilePath , haddockRootDir :: !FilePath
, haddockUnpacker :: !(PackageSetIdent -> IO ()) , haddockUnpacker :: !(ForceUnpack -> PackageSetIdent -> IO ())
-- ^ We have a dedicated thread so that (1) we don't try to unpack too many -- ^ We have a dedicated thread so that (1) we don't try to unpack too many
-- things at once, (2) we never unpack the same thing twice at the same -- things at once, (2) we never unpack the same thing twice at the same
-- time, and (3) so that even if the client connection dies, we finish the -- time, and (3) so that even if the client connection dies, we finish the
@ -44,6 +44,8 @@ data App = App
, widgetCache :: !(IORef (HashMap Text (UTCTime, GWData (Route App)))) , widgetCache :: !(IORef (HashMap Text (UTCTime, GWData (Route App))))
} }
type ForceUnpack = Bool
data Progress = ProgressWorking !Text data Progress = ProgressWorking !Text
| ProgressDone !Text !(Route App) | ProgressDone !Text !(Route App)

View File

@ -17,6 +17,7 @@ import System.Posix.Files (createLink)
import qualified Data.ByteString.Base16 as B16 import qualified Data.ByteString.Base16 as B16
import Data.Byteable (toBytes) import Data.Byteable (toBytes)
import Crypto.Hash (Digest, SHA1) import Crypto.Hash (Digest, SHA1)
import qualified Filesystem.Path.CurrentOS as F
form :: Form FileInfo form :: Form FileInfo
form = renderDivs $ areq fileField "tarball containing docs" form = renderDivs $ areq fileField "tarball containing docs"
@ -34,8 +35,7 @@ getUploadHaddockR ident = do
fileSource fileInfo $$ storeWrite (HaddockBundle ident) fileSource fileInfo $$ storeWrite (HaddockBundle ident)
runDB $ update sid [StackageHasHaddocks =. True] runDB $ update sid [StackageHasHaddocks =. True]
master <- getYesod master <- getYesod
getHaddockDir ident >>= liftIO . void . tryIO . removeTree void $ liftIO $ forkIO $ haddockUnpacker master True ident
void $ liftIO $ forkIO $ haddockUnpacker master ident
setMessage "Haddocks uploaded" setMessage "Haddocks uploaded"
redirect $ StackageHomeR ident redirect $ StackageHomeR ident
_ -> defaultLayout $ do _ -> defaultLayout $ do
@ -46,23 +46,28 @@ putUploadHaddockR = getUploadHaddockR
getHaddockR :: PackageSetIdent -> [Text] -> Handler () getHaddockR :: PackageSetIdent -> [Text] -> Handler ()
getHaddockR ident rest = do getHaddockR ident rest = do
error "getHaddockR"
{-
sanitize $ toPathPiece ident sanitize $ toPathPiece ident
mapM_ sanitize rest mapM_ sanitize rest
dir <- getHaddockDir ident (gzdir, rawdir) <- getHaddockDir ident
master <- getYesod master <- getYesod
liftIO $ unlessM (isDirectory dir) $ haddockUnpacker master ident liftIO $ unlessM (isDirectory dir) $ haddockUnpacker master ident
let fp = mconcat $ dir : map fpFromText rest
whenM (liftIO $ isDirectory fp) let rawfp = mconcat $ rawdir : map fpFromText rest
gzfp = mconcat $ gzdir : map fpFromText rest
mime = defaultMimeLookup $ fpToText $ filename rawfp
whenM (liftIO $ isDirectory rawfp)
$ redirect $ HaddockR ident $ rest ++ ["index.html"]
whenM (liftIO $ isDirectory gzfp)
$ redirect $ HaddockR ident $ rest ++ ["index.html"] $ redirect $ HaddockR ident $ rest ++ ["index.html"]
let fpgz = fp <.> "gz" whenM (liftIO $ isFile gzfp) $ do
mime = defaultMimeLookup $ fpToText $ filename fp
whenM (liftIO $ isFile fpgz) $ do
addHeader "Content-Encoding" "gzip" addHeader "Content-Encoding" "gzip"
sendFile mime $ fpToString fpgz sendFile mime $ fpToString gzfp
whenM (liftIO $ isFile fp) $ sendFile mime $ fpToString fp whenM (liftIO $ isFile rawfp) $ sendFile mime $ fpToString rawfp
notFound notFound
where where
@ -70,24 +75,76 @@ getHaddockR ident rest = do
| ("/" `isInfixOf` p) || p `member` (asHashSet $ setFromList ["", ".", ".."]) = | ("/" `isInfixOf` p) || p `member` (asHashSet $ setFromList ["", ".", ".."]) =
permissionDenied "Invalid request" permissionDenied "Invalid request"
| otherwise = return () | otherwise = return ()
-}
getHaddockDir :: PackageSetIdent -> Handler (FilePath, FilePath)
getHaddockDir ident = do
master <- getYesod
return $ mkDirPair (haddockRootDir master) ident
mkDirPair :: FilePath -- ^ root
-> PackageSetIdent
-> (FilePath, FilePath) -- ^ compressed, uncompressed
mkDirPair root ident =
( root </> "idents-raw" </> fpFromText (toPathPiece ident)
, root </> "idents-gz" </> fpFromText (toPathPiece ident)
)
createCompressor
:: FilePath -- ^ uncompressed dir
-> FilePath -- ^ compressed dir
-> FilePath -- ^ cache dir
-> IO (IO ()) -- ^ action to kick off compressor again
createCompressor rawdir gzdir cachedir = do
baton <- newMVar ()
mask $ \restore -> void $ forkIO $ forever $ do
takeMVar baton
runResourceT $ sourceDirectoryDeep False rawdir $$ mapM_C go
return $ void $ tryPutMVar baton ()
where
go fp = liftIO $ handle (print . asSomeException) $ do
gzipHash cachedir fp (gzdir </> fp)
where
Just suffix = F.stripPrefix (rawdir </> "") fp
gzipHash :: FilePath -- ^ cache directory
-> FilePath -- ^ source
-> FilePath -- ^ destination
-> IO ()
gzipHash cachedir src dst = do
putStrLn $ tshow ("gzipHash", cachedir, src, dst) ++ "\n"
-- Should have two threads: one to unpack, one to convert. Never serve the
-- uncompressed files, only the compressed files. When serving, convert on
-- demand.
createHaddockUnpacker :: FilePath -- ^ haddock root createHaddockUnpacker :: FilePath -- ^ haddock root
-> BlobStore StoreKey -> BlobStore StoreKey
-> IO (PackageSetIdent -> IO ()) -> IO (ForceUnpack -> PackageSetIdent -> IO ())
createHaddockUnpacker root store = do createHaddockUnpacker root store = do
chan <- newChan chan <- newChan
compressor <- createCompressor
(root </> "idents-raw")
(root </> "idents-gz")
cacehdir
mask $ \restore -> void $ forkIO $ forever $ do mask $ \restore -> void $ forkIO $ forever $ do
(ident, res) <- readChan chan (forceUnpack, ident, res) <- readChan chan
try (restore $ go ident) >>= putMVar res try (restore $ go forceUnpack ident) >>= putMVar res
restore (gzipHash ident) `catch` (print . asSomeException) compressor
return $ \ident -> do return $ \forceUnpack ident -> do
res <- newEmptyMVar res <- newEmptyMVar
writeChan chan (ident, res) writeChan chan (forceUnpack, ident, res)
takeMVar res >>= either (throwM . asSomeException) return takeMVar res >>= either (throwM . asSomeException) return
where where
go ident = unlessM (isDirectory dir) $ cacehdir = root </> "cachedir"
withSystemTempFile "haddock-bundle.tar.xz" $ \tempfp temph -> do gzipHash = error "gzipHash"
go forceUnpack ident = do
error "go"
{- FIXME
unlessM (isDirectory dir) $
withSystemTempFile "haddock-bundle.tar.xz" $ \tempfp temph -> do
when forceUnpack
$ liftIO $ mapM_ (void . tryIO . removeTree) [dir1, dir2]
withAcquire (storeRead' store (HaddockBundle ident)) $ \msrc -> withAcquire (storeRead' store (HaddockBundle ident)) $ \msrc ->
case msrc of case msrc of
Nothing -> error "No haddocks exist for that snapshot" Nothing -> error "No haddocks exist for that snapshot"
@ -104,6 +161,7 @@ createHaddockUnpacker root store = do
dir = mkDir ident dir = mkDir ident
mkDir ident = root </> "idents" </> fpFromText (toPathPiece ident) mkDir ident = root </> "idents" </> fpFromText (toPathPiece ident)
-}
-- Procedure is to: -- Procedure is to:
-- --
@ -112,6 +170,7 @@ createHaddockUnpacker root store = do
-- * If that hash doesn't exist in the cache, move the new file to the cache -- * If that hash doesn't exist in the cache, move the new file to the cache
-- * Create a hard link from /orig/file.gz to the file in the cache -- * Create a hard link from /orig/file.gz to the file in the cache
-- * Delete /orig/file -- * Delete /orig/file
{-
gzipHash ident = do gzipHash ident = do
createTree cachedir createTree cachedir
runResourceT $ sourceDirectoryDeep False dir runResourceT $ sourceDirectoryDeep False dir
@ -134,3 +193,4 @@ createHaddockUnpacker root store = do
unlessM (isFile fpcache) $ rename (fpFromString tempfp) fpcache unlessM (isFile fpcache) $ rename (fpFromString tempfp) fpcache
createLink (fpToString fpcache) (fpToString $ fp <.> "gz") createLink (fpToString fpcache) (fpToString $ fp <.> "gz")
removeFile fp removeFile fp
-}

View File

@ -12,11 +12,6 @@ import Types as Import
import Yesod.Auth as Import import Yesod.Auth as Import
import Data.Slug (mkSlug) import Data.Slug (mkSlug)
getHaddockDir :: PackageSetIdent -> Handler FilePath
getHaddockDir ident = do
master <- getYesod
return $ haddockRootDir master </> "idents" </> fpFromText (toPathPiece ident)
requireAuthIdOrToken :: Handler UserId requireAuthIdOrToken :: Handler UserId
requireAuthIdOrToken = do requireAuthIdOrToken = do
mtoken <- lookupHeader "authorization" mtoken <- lookupHeader "authorization"