mirror of
https://github.com/commercialhaskell/stackage-server.git
synced 2026-01-24 18:01:57 +01:00
Store cached, gzipped files
Pinging @manny-fp. This is similar to what we discussed. In a very rough test, this brought the incremental disk usage for an extra set of docs from 500MB to 20MB.
This commit is contained in:
parent
765ed91767
commit
9d6975aa55
@ -2,14 +2,21 @@ module Handler.Haddock where
|
|||||||
|
|
||||||
import Import
|
import Import
|
||||||
import Data.BlobStore
|
import Data.BlobStore
|
||||||
import Filesystem (removeTree, isDirectory, createTree, isFile)
|
import Filesystem (removeTree, isDirectory, createTree, isFile, rename, removeFile)
|
||||||
import Control.Concurrent (forkIO)
|
import Control.Concurrent (forkIO)
|
||||||
import Control.Concurrent.Chan
|
import Control.Concurrent.Chan
|
||||||
import System.IO.Temp (withSystemTempFile)
|
import System.IO.Temp (withSystemTempFile, withTempFile)
|
||||||
import Control.Exception (mask)
|
import Control.Exception (mask)
|
||||||
import System.Process (createProcess, proc, cwd, waitForProcess)
|
import System.Process (createProcess, proc, cwd, waitForProcess)
|
||||||
import System.Exit (ExitCode (ExitSuccess))
|
import System.Exit (ExitCode (ExitSuccess))
|
||||||
import Network.Mime (defaultMimeLookup)
|
import Network.Mime (defaultMimeLookup)
|
||||||
|
import Crypto.Hash.Conduit (sinkHash)
|
||||||
|
import System.IO (IOMode (ReadMode), withBinaryFile)
|
||||||
|
import Data.Conduit.Zlib (gzip)
|
||||||
|
import System.Posix.Files (createLink)
|
||||||
|
import qualified Data.ByteString.Base16 as B16
|
||||||
|
import Data.Byteable (toBytes)
|
||||||
|
import Crypto.Hash (Digest, SHA1)
|
||||||
|
|
||||||
form :: Form FileInfo
|
form :: Form FileInfo
|
||||||
form = renderDivs $ areq fileField "tarball containing docs"
|
form = renderDivs $ areq fileField "tarball containing docs"
|
||||||
@ -48,10 +55,16 @@ getHaddockR ident rest = do
|
|||||||
|
|
||||||
whenM (liftIO $ isDirectory fp)
|
whenM (liftIO $ isDirectory fp)
|
||||||
$ redirect $ HaddockR ident $ rest ++ ["index.html"]
|
$ redirect $ HaddockR ident $ rest ++ ["index.html"]
|
||||||
unlessM (liftIO $ isFile fp) notFound
|
|
||||||
|
|
||||||
let mime = defaultMimeLookup $ fpToText $ filename fp
|
let fpgz = fp <.> "gz"
|
||||||
sendFile mime $ fpToString fp
|
mime = defaultMimeLookup $ fpToText $ filename fp
|
||||||
|
whenM (liftIO $ isFile fpgz) $ do
|
||||||
|
addHeader "Content-Encoding" "gzip"
|
||||||
|
sendFile mime $ fpToString fpgz
|
||||||
|
|
||||||
|
whenM (liftIO $ isFile fp) $ sendFile mime $ fpToString fp
|
||||||
|
|
||||||
|
notFound
|
||||||
where
|
where
|
||||||
sanitize p
|
sanitize p
|
||||||
| ("/" `isInfixOf` p) || p `member` (asHashSet $ setFromList ["", ".", ".."]) =
|
| ("/" `isInfixOf` p) || p `member` (asHashSet $ setFromList ["", ".", ".."]) =
|
||||||
@ -67,6 +80,7 @@ createHaddockUnpacker root store = do
|
|||||||
mask $ \restore -> void $ forkIO $ forever $ do
|
mask $ \restore -> void $ forkIO $ forever $ do
|
||||||
(ident, res) <- readChan chan
|
(ident, res) <- readChan chan
|
||||||
try (restore $ go ident) >>= putMVar res
|
try (restore $ go ident) >>= putMVar res
|
||||||
|
restore (gzipHash ident) `catch` (print . asSomeException)
|
||||||
return $ \ident -> do
|
return $ \ident -> do
|
||||||
res <- newEmptyMVar
|
res <- newEmptyMVar
|
||||||
writeChan chan (ident, res)
|
writeChan chan (ident, res)
|
||||||
@ -87,4 +101,35 @@ createHaddockUnpacker root store = do
|
|||||||
ec <- waitForProcess ph
|
ec <- waitForProcess ph
|
||||||
if ec == ExitSuccess then return () else throwM ec
|
if ec == ExitSuccess then return () else throwM ec
|
||||||
where
|
where
|
||||||
dir = root </> fpFromText (toPathPiece ident)
|
dir = mkDir ident
|
||||||
|
|
||||||
|
mkDir ident = root </> fpFromText (toPathPiece ident)
|
||||||
|
|
||||||
|
-- Procedure is to:
|
||||||
|
--
|
||||||
|
-- * Traverse the entire directory
|
||||||
|
-- * Gzip each file to a temp file, and get a hash of the contents
|
||||||
|
-- * If that hash doesn't exist in the cache, move the new file to the cache
|
||||||
|
-- * Create a hard link from /orig/file.gz to the file in the cache
|
||||||
|
-- * Delete /orig/file
|
||||||
|
gzipHash ident = do
|
||||||
|
createTree cachedir
|
||||||
|
runResourceT $ sourceDirectoryDeep False dir
|
||||||
|
$$ mapM_C (liftIO . handle (print . asIOException) . oneFile)
|
||||||
|
where
|
||||||
|
dir = mkDir ident
|
||||||
|
cachedir = root </> "cache-dir"
|
||||||
|
|
||||||
|
oneFile fp = withTempFile (fpToString cachedir) "haddock-file.gz" $ \tempfp temph -> do
|
||||||
|
digest <- withBinaryFile (fpToString fp) ReadMode $ \inh ->
|
||||||
|
sourceHandle inh
|
||||||
|
$= gzip
|
||||||
|
$$ (getZipSink $
|
||||||
|
ZipSink (sinkHandle temph) *>
|
||||||
|
ZipSink sinkHash)
|
||||||
|
hClose temph
|
||||||
|
let name = decodeUtf8 $ B16.encode $ toBytes (digest :: Digest SHA1)
|
||||||
|
let fpcache = cachedir </> fpFromText name <.> "gz"
|
||||||
|
unlessM (isFile fpcache) $ rename (fpFromString tempfp) fpcache
|
||||||
|
createLink (fpToString fpcache) (fpToString $ fp <.> "gz")
|
||||||
|
removeFile fp
|
||||||
|
|||||||
@ -132,6 +132,7 @@ library
|
|||||||
, old-locale
|
, old-locale
|
||||||
, th-lift
|
, th-lift
|
||||||
, mime-types
|
, mime-types
|
||||||
|
, unix
|
||||||
|
|
||||||
executable stackage-server
|
executable stackage-server
|
||||||
if flag(library-only)
|
if flag(library-only)
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user