diff --git a/Codec/Archive/Zip/Conduit/Types.hs b/Codec/Archive/Zip/Conduit/Types.hs index 705de3f..320c82e 100644 --- a/Codec/Archive/Zip/Conduit/Types.hs +++ b/Codec/Archive/Zip/Conduit/Types.hs @@ -2,6 +2,9 @@ module Codec.Archive.Zip.Conduit.Types where import Control.Exception (Exception(..)) import Data.ByteString (ByteString) +import qualified Data.ByteString.Lazy as BSL +import qualified Data.Conduit as C +import Data.Conduit.Binary (sourceLbs) import Data.String (IsString(..)) import Data.Time.LocalTime (LocalTime) import Data.Typeable (Typeable) @@ -17,15 +20,35 @@ instance IsString ZipError where instance Exception ZipError where displayException (ZipError e) = "ZipError: " ++ e +-- |Summary information at the end of a zip stream. +data ZipInfo = ZipInfo + { zipComment :: ByteString + } deriving (Eq, Show) + -- |(The beginning of) a single entry in a zip stream, which may be any file or directory. -- As per zip file conventions, directory names should end with a slash and have no data, but this library does not ensure that. data ZipEntry = ZipEntry { zipEntryName :: ByteString -- ^File name (in posix format, no leading slashes), usually utf-8 encoded, with a trailing slash for directories , zipEntryTime :: LocalTime -- ^Modification time , zipEntrySize :: Maybe Word64 -- ^Size of file data (if known); checked on zipping and also used as hint to enable zip64 - } + } deriving (Eq, Show) --- |Summary information at the end of a zip stream. -data ZipInfo = ZipInfo - { zipComment :: ByteString - } +-- |The data contents for a 'ZipEntry'. For empty entries (e.g., directories), use 'mempty'. +data ZipData m + = ZipDataByteString BSL.ByteString -- ^A known ByteString, which will be fully evaluated (not streamed) + | ZipDataSource (C.Source m ByteString) -- ^A byte stream producer, streamed (and compressed) directly into the zip + +instance Monad m => Monoid (ZipData m) where + mempty = ZipDataByteString BSL.empty + mappend (ZipDataByteString a) (ZipDataByteString b) = ZipDataByteString $ mappend a b + mappend a b = ZipDataSource $ mappend (sourceZipData a) (sourceZipData b) + +-- |Normalize any 'ZipData' to a simple source +sourceZipData :: Monad m => ZipData m -> C.Source m ByteString +sourceZipData (ZipDataByteString b) = sourceLbs b +sourceZipData (ZipDataSource s) = s + +-- |Convert between unpacked (as 'Codec.Archive.Zip.Conduit.UnZip.unZipStream' produces) and packed (as 'Codec.Archive.Zip.Conduit.Zip.zipStream' consumes) representations. +-- This is mainly for testing purposes, or if you really want to re-zip a stream on the fly for some reason. +-- Note that each 'ZipData' must be consumed completely before the next entry can be produced. +-- packZipEntries :: C.Conduit (Either ZipEntry BS.ByteString) m (ZipEntry, ZipData m) diff --git a/Codec/Archive/Zip/Conduit/UnZip.hs b/Codec/Archive/Zip/Conduit/UnZip.hs index 484eba5..62a5a0f 100644 --- a/Codec/Archive/Zip/Conduit/UnZip.hs +++ b/Codec/Archive/Zip/Conduit/UnZip.hs @@ -2,10 +2,9 @@ {-# LANGUAGE RecordWildCards #-} {-# LANGUAGE RankNTypes #-} module Codec.Archive.Zip.Conduit.UnZip - ( ZipEntry(..) + ( unZipStream + , ZipEntry(..) , ZipInfo(..) - , ZipError - , unZipStream ) where import Control.Applicative ((<|>), empty) @@ -94,7 +93,7 @@ fromDOSTime time date = LocalTime -- This only supports a limited number of zip file features, including deflate compression and zip64. -- It does not (ironically) support uncompressed zip files that have been created as streams, where file sizes are not known beforehand. -- Since it does not use the offset information at the end of the file, it assumes all entries are packed sequentially, which is usually the case. --- Any errors are thrown in the underlying monad. +-- Any errors are thrown in the underlying monad (as 'ZipError's or 'Data.Conduit.Serialization.Binary.ParseError'). unZipStream :: (MonadBase b m, PrimMonad b, MonadThrow m) => C.ConduitM BS.ByteString (Either ZipEntry BS.ByteString) m ZipInfo unZipStream = next where next = do -- local header, or start central directory diff --git a/Codec/Archive/Zip/Conduit/Zip.hs b/Codec/Archive/Zip/Conduit/Zip.hs index 79f79ce..4e47682 100644 --- a/Codec/Archive/Zip/Conduit/Zip.hs +++ b/Codec/Archive/Zip/Conduit/Zip.hs @@ -2,13 +2,13 @@ {-# LANGUAGE RecordWildCards #-} {-# LANGUAGE ViewPatterns #-} module Codec.Archive.Zip.Conduit.Zip - ( ZipOptions(..) + ( zipStream + , ZipOptions(..) , ZipInfo(..) , defaultZipOptions , ZipEntry(..) , ZipData(..) , zipFileData - , zipStream ) where import qualified Codec.Compression.Zlib.Raw as Z @@ -60,24 +60,10 @@ infixr 7 ?* True ?* x = x False ?* _ = 0 --- |The data contents for a 'ZipEntry'. For empty entries (e.g., directories), use 'mempty'. -data ZipData m - = ZipDataByteString BSL.ByteString -- ^A known ByteString, which will be fully evaluated (not streamed) - | ZipDataSource (C.Source m BS.ByteString) -- ^A byte stream producer, streamed (and compressed) directly into the zip - -instance Monad m => Monoid (ZipData m) where - mempty = ZipDataByteString BSL.empty - mappend (ZipDataByteString a) (ZipDataByteString b) = ZipDataByteString $ mappend a b - mappend a b = ZipDataSource $ mappend (zipDataSource a) (zipDataSource b) - -- |Use a file on disk as 'ZipData' (@'ZipDataSource' . 'CB.sourceFile'@). zipFileData :: MonadResource m => FilePath -> ZipData m zipFileData = ZipDataSource . CB.sourceFile -zipDataSource :: Monad m => ZipData m -> C.Source m BS.ByteString -zipDataSource (ZipDataByteString b) = CB.sourceLbs b -zipDataSource (ZipDataSource s) = s - zipData :: Monad m => ZipData m -> Either (C.Source m BS.ByteString) BSL.ByteString zipData (ZipDataByteString b) = Right b zipData (ZipDataSource s) = Left s @@ -106,6 +92,7 @@ maxBound16 = fromIntegral (maxBound :: Word16) -- The final result is the total size of the zip file. -- -- Depending on options, the resulting zip file should be compatible with most unzipping applications. +-- Any errors are thrown in the underlying monad (as 'ZipError's). zipStream :: (MonadBase b m, PrimMonad b, MonadThrow m) => ZipOptions -> C.ConduitM (ZipEntry, ZipData m) BS.ByteString m Word64 zipStream ZipOptions{..} = execStateC 0 $ do (cnt, cdir) <- next 0 (mempty :: P.Put) diff --git a/zip-stream.cabal b/zip-stream.cabal index d4df5b8..5a2cda2 100644 --- a/zip-stream.cabal +++ b/zip-stream.cabal @@ -1,7 +1,7 @@ name: zip-stream -version: 0 +version: 0.1 synopsis: ZIP archive streaming using conduits -description: Process (extract and create) zip files as streams (e.g., over the network), accessing individual files without having to write a zip file to disk (unlike zip-conduit). +description: Process (extract and create) zip files as streams (e.g., over the network), accessing contained files without having to write the zip file to disk (unlike zip-conduit). license: BSD3 license-file: LICENSE author: Dylan Simon