Minor reorganization in prep for zipping
This commit is contained in:
parent
2c87f62e2b
commit
49f413a492
25
Codec/Archive/Zip/Conduit/Internal.hs
Normal file
25
Codec/Archive/Zip/Conduit/Internal.hs
Normal file
@ -0,0 +1,25 @@
|
|||||||
|
module Codec.Archive.Zip.Conduit.Internal
|
||||||
|
( zipError
|
||||||
|
, sizeCRC
|
||||||
|
) where
|
||||||
|
|
||||||
|
import Control.Monad.Catch (MonadThrow, throwM)
|
||||||
|
import qualified Data.ByteString as BS
|
||||||
|
import qualified Data.Conduit as C
|
||||||
|
import Data.Digest.CRC32 (crc32Update)
|
||||||
|
import Data.Word (Word32)
|
||||||
|
|
||||||
|
import Codec.Archive.Zip.Conduit.Types
|
||||||
|
|
||||||
|
zipError :: MonadThrow m => String -> m a
|
||||||
|
zipError = throwM . ZipError
|
||||||
|
|
||||||
|
passthroughFold :: Monad m => (a -> b -> a) -> a -> C.ConduitM b b m a
|
||||||
|
passthroughFold f z = C.await >>= maybe
|
||||||
|
(return z)
|
||||||
|
(\x -> do
|
||||||
|
C.yield x
|
||||||
|
passthroughFold f (f z x))
|
||||||
|
|
||||||
|
sizeCRC :: (Monad m, Integral n) => C.ConduitM BS.ByteString BS.ByteString m (n, Word32)
|
||||||
|
sizeCRC = passthroughFold (\(l, c) b -> (l + fromIntegral (BS.length b), crc32Update c b)) (0, 0)
|
||||||
31
Codec/Archive/Zip/Conduit/Types.hs
Normal file
31
Codec/Archive/Zip/Conduit/Types.hs
Normal file
@ -0,0 +1,31 @@
|
|||||||
|
module Codec.Archive.Zip.Conduit.Types where
|
||||||
|
|
||||||
|
import Control.Exception (Exception(..))
|
||||||
|
import Data.ByteString (ByteString)
|
||||||
|
import Data.String (IsString(..))
|
||||||
|
import Data.Time.LocalTime (LocalTime)
|
||||||
|
import Data.Typeable (Typeable)
|
||||||
|
import Data.Word (Word64)
|
||||||
|
|
||||||
|
-- |Errors thrown during zip file processing
|
||||||
|
newtype ZipError = ZipError String
|
||||||
|
deriving (Show, Typeable)
|
||||||
|
|
||||||
|
instance IsString ZipError where
|
||||||
|
fromString = ZipError
|
||||||
|
|
||||||
|
instance Exception ZipError where
|
||||||
|
displayException (ZipError e) = "ZipError: " ++ e
|
||||||
|
|
||||||
|
-- |(The beginning of) a single entry in a zip stream, which may be any file or directory.
|
||||||
|
-- As per zip file conventions, directory names should end with a slash and have no data, but this library does not ensure that.
|
||||||
|
data ZipEntry = ZipEntry
|
||||||
|
{ zipEntryName :: ByteString -- ^File name, usually utf-8 encoded, with a trailing slash for directories
|
||||||
|
, zipEntryTime :: LocalTime -- ^Modification time
|
||||||
|
, zipEntrySize :: Maybe Word64 -- ^Size of file data (if known)
|
||||||
|
}
|
||||||
|
|
||||||
|
-- |Summary information at the end of a zip stream.
|
||||||
|
data ZipInfo = ZipInfo
|
||||||
|
{ zipComment :: ByteString
|
||||||
|
}
|
||||||
@ -9,10 +9,9 @@ module Codec.Archive.Zip.Conduit.UnZip
|
|||||||
) where
|
) where
|
||||||
|
|
||||||
import Control.Applicative ((<|>), empty)
|
import Control.Applicative ((<|>), empty)
|
||||||
import Control.Exception (Exception(..))
|
|
||||||
import Control.Monad (when, unless, guard)
|
import Control.Monad (when, unless, guard)
|
||||||
import Control.Monad.Base (MonadBase)
|
import Control.Monad.Base (MonadBase)
|
||||||
import Control.Monad.Catch (MonadThrow, throwM)
|
import Control.Monad.Catch (MonadThrow)
|
||||||
import Control.Monad.Primitive (PrimMonad)
|
import Control.Monad.Primitive (PrimMonad)
|
||||||
import qualified Data.Binary.Get as G
|
import qualified Data.Binary.Get as G
|
||||||
import Data.Bits ((.&.), complement, testBit, shiftL, shiftR)
|
import Data.Bits ((.&.), complement, testBit, shiftL, shiftR)
|
||||||
@ -22,30 +21,11 @@ import qualified Data.Conduit as C
|
|||||||
import qualified Data.Conduit.List as CL
|
import qualified Data.Conduit.List as CL
|
||||||
import Data.Conduit.Serialization.Binary (sinkGet)
|
import Data.Conduit.Serialization.Binary (sinkGet)
|
||||||
import Data.Conduit.Zlib (WindowBits(..), decompress)
|
import Data.Conduit.Zlib (WindowBits(..), decompress)
|
||||||
import Data.Digest.CRC32 (crc32Update)
|
|
||||||
import Data.Time (LocalTime(..), fromGregorian, TimeOfDay(..))
|
import Data.Time (LocalTime(..), fromGregorian, TimeOfDay(..))
|
||||||
import Data.Typeable (Typeable)
|
|
||||||
import Data.Word (Word32, Word64)
|
import Data.Word (Word32, Word64)
|
||||||
|
|
||||||
-- |(The beginning of) a single entry in a zip stream, which may be any file or directory.
|
import Codec.Archive.Zip.Conduit.Types
|
||||||
-- As per zip file conventions, directory names should end with a slash and have no data, but this library does not ensure that.
|
import Codec.Archive.Zip.Conduit.Internal
|
||||||
data ZipEntry = ZipEntry
|
|
||||||
{ zipEntryName :: BS.ByteString -- ^File name, usually utf-8 encoded, with a trailing slash for directories
|
|
||||||
, zipEntryTime :: LocalTime -- ^Modification time
|
|
||||||
, zipEntrySize :: !Word64 -- ^Size of file data
|
|
||||||
}
|
|
||||||
|
|
||||||
-- |Summary information at the end of a zip stream.
|
|
||||||
data ZipInfo = ZipInfo
|
|
||||||
{ zipComment :: BS.ByteString
|
|
||||||
}
|
|
||||||
|
|
||||||
-- |Errors thrown during zip file processing
|
|
||||||
newtype ZipError = ZipError String
|
|
||||||
deriving (Show, Typeable)
|
|
||||||
|
|
||||||
instance Exception ZipError where
|
|
||||||
displayException (ZipError e) = "ZipError: " ++ e
|
|
||||||
|
|
||||||
data Header m
|
data Header m
|
||||||
= FileHeader
|
= FileHeader
|
||||||
@ -54,7 +34,6 @@ data Header m
|
|||||||
, fileCRC :: !Word32
|
, fileCRC :: !Word32
|
||||||
, fileCSize :: !Word64
|
, fileCSize :: !Word64
|
||||||
, fileZip64 :: !Bool
|
, fileZip64 :: !Bool
|
||||||
, fileStream :: !Bool
|
|
||||||
}
|
}
|
||||||
| EndOfCentralDirectory
|
| EndOfCentralDirectory
|
||||||
{ endInfo :: ZipInfo
|
{ endInfo :: ZipInfo
|
||||||
@ -75,9 +54,6 @@ data ExtField = ExtField
|
|||||||
}
|
}
|
||||||
-}
|
-}
|
||||||
|
|
||||||
zipError :: MonadThrow m => String -> m a
|
|
||||||
zipError = throwM . ZipError
|
|
||||||
|
|
||||||
pass :: (MonadThrow m, Integral n) => n -> C.Conduit BS.ByteString m BS.ByteString
|
pass :: (MonadThrow m, Integral n) => n -> C.Conduit BS.ByteString m BS.ByteString
|
||||||
pass 0 = return ()
|
pass 0 = return ()
|
||||||
pass n = C.await >>= maybe
|
pass n = C.await >>= maybe
|
||||||
@ -94,16 +70,6 @@ pass n = C.await >>= maybe
|
|||||||
pass n')
|
pass n')
|
||||||
where ni = toInteger n
|
where ni = toInteger n
|
||||||
|
|
||||||
passthroughFold :: Monad m => (a -> b -> a) -> a -> C.ConduitM b b m a
|
|
||||||
passthroughFold f z = C.await >>= maybe
|
|
||||||
(return z)
|
|
||||||
(\x -> do
|
|
||||||
C.yield x
|
|
||||||
passthroughFold f (f z x))
|
|
||||||
|
|
||||||
sizeCRC :: (Monad m, Integral n) => C.ConduitM BS.ByteString BS.ByteString m (n, Word32)
|
|
||||||
sizeCRC = passthroughFold (\(l, c) b -> (l + fromIntegral (BS.length b), crc32Update c b)) (0, 0)
|
|
||||||
|
|
||||||
foldGet :: (a -> G.Get a) -> a -> G.Get a
|
foldGet :: (a -> G.Get a) -> a -> G.Get a
|
||||||
foldGet g z = do
|
foldGet g z = do
|
||||||
e <- G.isEmpty
|
e <- G.isEmpty
|
||||||
@ -126,8 +92,8 @@ unZip = next where
|
|||||||
FileHeader{..} -> do
|
FileHeader{..} -> do
|
||||||
C.yield $ Left fileEntry
|
C.yield $ Left fileEntry
|
||||||
r <- C.mapOutput Right $
|
r <- C.mapOutput Right $
|
||||||
if fileStream
|
case zipEntrySize fileEntry of
|
||||||
then do -- unknown size
|
Nothing -> do -- unknown size
|
||||||
((csize, _), (size, crc)) <- C.fuseBoth sizeCRC
|
((csize, _), (size, crc)) <- C.fuseBoth sizeCRC
|
||||||
$ fileDecompress
|
$ fileDecompress
|
||||||
C..| sizeCRC
|
C..| sizeCRC
|
||||||
@ -136,17 +102,17 @@ unZip = next where
|
|||||||
{ fileCSize = csize
|
{ fileCSize = csize
|
||||||
, fileCRC = crc
|
, fileCRC = crc
|
||||||
, fileEntry = fileEntry
|
, fileEntry = fileEntry
|
||||||
{ zipEntrySize = size
|
{ zipEntrySize = Just size
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else do -- known size
|
Just usize -> do -- known size
|
||||||
(size, crc) <- pass fileCSize
|
(size, crc) <- pass fileCSize
|
||||||
C..| (fileDecompress >> CL.sinkNull)
|
C..| (fileDecompress >> CL.sinkNull)
|
||||||
C..| sizeCRC
|
C..| sizeCRC
|
||||||
-- traceM $ "size=" ++ show size ++ "," ++ show (zipEntrySize fileEntry) ++ " crc=" ++ show crc ++ "," ++ show fileCRC
|
-- traceM $ "size=" ++ show size ++ "," ++ show (zipEntrySize fileEntry) ++ " crc=" ++ show crc ++ "," ++ show fileCRC
|
||||||
-- optional data description (possibly ambiguous!)
|
-- optional data description (possibly ambiguous!)
|
||||||
sinkGet $ (guard =<< dataDesc h) <|> return ()
|
sinkGet $ (guard =<< dataDesc h) <|> return ()
|
||||||
return (size == zipEntrySize fileEntry && crc == fileCRC)
|
return (size == usize && crc == fileCRC)
|
||||||
unless r $ zipError $ BSC.unpack (zipEntryName fileEntry) ++ ": data integrity check failed"
|
unless r $ zipError $ BSC.unpack (zipEntryName fileEntry) ++ ": data integrity check failed"
|
||||||
next
|
next
|
||||||
EndOfCentralDirectory{..} -> do
|
EndOfCentralDirectory{..} -> do
|
||||||
@ -168,7 +134,7 @@ unZip = next where
|
|||||||
csiz <- getSize
|
csiz <- getSize
|
||||||
usiz <- getSize
|
usiz <- getSize
|
||||||
-- traceM $ "crc=" ++ show crc ++ "," ++ show fileCRC ++ " csiz=" ++ show csiz ++ "," ++ show fileCSize ++ " usiz=" ++ show usiz ++ "," ++ show (zipEntrySize fileEntry)
|
-- traceM $ "crc=" ++ show crc ++ "," ++ show fileCRC ++ " csiz=" ++ show csiz ++ "," ++ show fileCSize ++ " usiz=" ++ show usiz ++ "," ++ show (zipEntrySize fileEntry)
|
||||||
return $ crc == fileCRC && csiz == fileCSize && usiz == zipEntrySize fileEntry
|
return $ crc == fileCRC && csiz == fileCSize && (usiz ==) `all` zipEntrySize fileEntry
|
||||||
dataDescBody _ = empty
|
dataDescBody _ = empty
|
||||||
central = G.getWord32le >>= centralBody
|
central = G.getWord32le >>= centralBody
|
||||||
centralBody 0x02014b50 = centralHeader >> central
|
centralBody 0x02014b50 = centralHeader >> central
|
||||||
@ -242,13 +208,12 @@ unZip = next where
|
|||||||
{ fileEntry = ZipEntry
|
{ fileEntry = ZipEntry
|
||||||
{ zipEntryName = name
|
{ zipEntryName = name
|
||||||
, zipEntryTime = mtime
|
, zipEntryTime = mtime
|
||||||
, zipEntrySize = extZip64USize
|
, zipEntrySize = if testBit gpf 3 then Nothing else Just extZip64USize
|
||||||
}
|
}
|
||||||
, fileDecompress = dcomp
|
, fileDecompress = dcomp
|
||||||
, fileCSize = extZip64CSize
|
, fileCSize = extZip64CSize
|
||||||
, fileCRC = crc
|
, fileCRC = crc
|
||||||
, fileZip64 = extZip64
|
, fileZip64 = extZip64
|
||||||
, fileStream = testBit gpf 3
|
|
||||||
}
|
}
|
||||||
centralHeader = do
|
centralHeader = do
|
||||||
-- ignore everything
|
-- ignore everything
|
||||||
|
|||||||
@ -20,10 +20,10 @@ extract = C.awaitForever start where
|
|||||||
liftIO $ BSC.putStrLn zipEntryName
|
liftIO $ BSC.putStrLn zipEntryName
|
||||||
liftIO $ createDirectoryIfMissing True (takeDirectory name)
|
liftIO $ createDirectoryIfMissing True (takeDirectory name)
|
||||||
if BSC.last zipEntryName == '/'
|
if BSC.last zipEntryName == '/'
|
||||||
then when (zipEntrySize /= 0) $ fail $ name ++ ": non-empty directory"
|
then when ((0 /=) `any` zipEntrySize) $ fail $ name ++ ": non-empty directory"
|
||||||
else do -- C.bracketP
|
else do -- C.bracketP
|
||||||
h <- liftIO $ openFile name WriteMode
|
h <- liftIO $ openFile name WriteMode
|
||||||
liftIO $ hSetFileSize h $ toInteger zipEntrySize
|
mapM_ (liftIO . hSetFileSize h . toInteger) zipEntrySize
|
||||||
write C..| CB.sinkHandle h
|
write C..| CB.sinkHandle h
|
||||||
liftIO $ hClose h
|
liftIO $ hClose h
|
||||||
liftIO $ setModificationTime name $ localTimeToUTC utc zipEntryTime -- FIXME: timezone
|
liftIO $ setModificationTime name $ localTimeToUTC utc zipEntryTime -- FIXME: timezone
|
||||||
|
|||||||
@ -1,7 +1,7 @@
|
|||||||
name: zip-stream
|
name: zip-stream
|
||||||
version: 0
|
version: 0
|
||||||
synopsis: ZIP file stream processing using conduits
|
synopsis: ZIP archive streaming using conduits
|
||||||
description: Process (extract and create) zip files as streams, accessing individual files without having to write a zip file to disk, unlike zip-conduit. Unfortunately, processing zip files in this way introduces some limitations on what ZIP features can be supported, but the goal is to support most cases.
|
description: Process (extract and create) zip files as streams (e.g., over the network), accessing individual files without having to write a zip file to disk (unlike zip-conduit).
|
||||||
license: BSD3
|
license: BSD3
|
||||||
license-file: LICENSE
|
license-file: LICENSE
|
||||||
author: Dylan Simon
|
author: Dylan Simon
|
||||||
@ -17,7 +17,10 @@ source-repository head
|
|||||||
|
|
||||||
library
|
library
|
||||||
exposed-modules:
|
exposed-modules:
|
||||||
|
Codec.Archive.Zip.Conduit.Types
|
||||||
Codec.Archive.Zip.Conduit.UnZip
|
Codec.Archive.Zip.Conduit.UnZip
|
||||||
|
other-modules:
|
||||||
|
Codec.Archive.Zip.Conduit.Internal
|
||||||
default-language: Haskell2010
|
default-language: Haskell2010
|
||||||
ghc-options: -Wall
|
ghc-options: -Wall
|
||||||
build-depends:
|
build-depends:
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user