Start zipStream implementation
Only local headers and data so far; no central directory yet
This commit is contained in:
parent
49f413a492
commit
c460089d1a
@ -1,19 +1,26 @@
|
|||||||
module Codec.Archive.Zip.Conduit.Internal
|
module Codec.Archive.Zip.Conduit.Internal
|
||||||
( zipError
|
( zipError
|
||||||
|
, idConduit
|
||||||
, sizeCRC
|
, sizeCRC
|
||||||
|
, zip64Size
|
||||||
|
, deflateWindowBits
|
||||||
) where
|
) where
|
||||||
|
|
||||||
|
import Codec.Compression.Zlib.Raw (WindowBits(..))
|
||||||
import Control.Monad.Catch (MonadThrow, throwM)
|
import Control.Monad.Catch (MonadThrow, throwM)
|
||||||
import qualified Data.ByteString as BS
|
import qualified Data.ByteString as BS
|
||||||
import qualified Data.Conduit as C
|
import qualified Data.Conduit as C
|
||||||
import Data.Digest.CRC32 (crc32Update)
|
import Data.Digest.CRC32 (crc32Update)
|
||||||
import Data.Word (Word32)
|
import Data.Word (Word32, Word64)
|
||||||
|
|
||||||
import Codec.Archive.Zip.Conduit.Types
|
import Codec.Archive.Zip.Conduit.Types
|
||||||
|
|
||||||
zipError :: MonadThrow m => String -> m a
|
zipError :: MonadThrow m => String -> m a
|
||||||
zipError = throwM . ZipError
|
zipError = throwM . ZipError
|
||||||
|
|
||||||
|
idConduit :: Monad m => C.Conduit a m a
|
||||||
|
idConduit = C.awaitForever C.yield
|
||||||
|
|
||||||
passthroughFold :: Monad m => (a -> b -> a) -> a -> C.ConduitM b b m a
|
passthroughFold :: Monad m => (a -> b -> a) -> a -> C.ConduitM b b m a
|
||||||
passthroughFold f z = C.await >>= maybe
|
passthroughFold f z = C.await >>= maybe
|
||||||
(return z)
|
(return z)
|
||||||
@ -21,5 +28,11 @@ passthroughFold f z = C.await >>= maybe
|
|||||||
C.yield x
|
C.yield x
|
||||||
passthroughFold f (f z x))
|
passthroughFold f (f z x))
|
||||||
|
|
||||||
sizeCRC :: (Monad m, Integral n) => C.ConduitM BS.ByteString BS.ByteString m (n, Word32)
|
sizeCRC :: Monad m => C.ConduitM BS.ByteString BS.ByteString m (Word64, Word32)
|
||||||
sizeCRC = passthroughFold (\(l, c) b -> (l + fromIntegral (BS.length b), crc32Update c b)) (0, 0)
|
sizeCRC = passthroughFold (\(l, c) b -> (l + fromIntegral (BS.length b), crc32Update c b)) (0, 0)
|
||||||
|
|
||||||
|
zip64Size :: Integral n => n
|
||||||
|
zip64Size = 0xffffffff
|
||||||
|
|
||||||
|
deflateWindowBits :: WindowBits
|
||||||
|
deflateWindowBits = WindowBits (-15)
|
||||||
|
|||||||
@ -5,7 +5,7 @@ module Codec.Archive.Zip.Conduit.UnZip
|
|||||||
( ZipEntry(..)
|
( ZipEntry(..)
|
||||||
, ZipInfo(..)
|
, ZipInfo(..)
|
||||||
, ZipError
|
, ZipError
|
||||||
, unZip
|
, unZipStream
|
||||||
) where
|
) where
|
||||||
|
|
||||||
import Control.Applicative ((<|>), empty)
|
import Control.Applicative ((<|>), empty)
|
||||||
@ -20,9 +20,9 @@ import qualified Data.ByteString.Char8 as BSC
|
|||||||
import qualified Data.Conduit as C
|
import qualified Data.Conduit as C
|
||||||
import qualified Data.Conduit.List as CL
|
import qualified Data.Conduit.List as CL
|
||||||
import Data.Conduit.Serialization.Binary (sinkGet)
|
import Data.Conduit.Serialization.Binary (sinkGet)
|
||||||
import Data.Conduit.Zlib (WindowBits(..), decompress)
|
import Data.Conduit.Zlib (decompress)
|
||||||
import Data.Time (LocalTime(..), fromGregorian, TimeOfDay(..))
|
import Data.Time (LocalTime(..), TimeOfDay(..), fromGregorian)
|
||||||
import Data.Word (Word32, Word64)
|
import Data.Word (Word16, Word32, Word64)
|
||||||
|
|
||||||
import Codec.Archive.Zip.Conduit.Types
|
import Codec.Archive.Zip.Conduit.Types
|
||||||
import Codec.Archive.Zip.Conduit.Internal
|
import Codec.Archive.Zip.Conduit.Internal
|
||||||
@ -75,6 +75,17 @@ foldGet g z = do
|
|||||||
e <- G.isEmpty
|
e <- G.isEmpty
|
||||||
if e then return z else g z >>= foldGet g
|
if e then return z else g z >>= foldGet g
|
||||||
|
|
||||||
|
fromDOSTime :: Word16 -> Word16 -> LocalTime
|
||||||
|
fromDOSTime time date = LocalTime
|
||||||
|
(fromGregorian
|
||||||
|
(fromIntegral $ date `shiftR` 9 + 1980)
|
||||||
|
(fromIntegral $ date `shiftR` 5 .&. 0x0f)
|
||||||
|
(fromIntegral $ date .&. 0x1f))
|
||||||
|
(TimeOfDay
|
||||||
|
(fromIntegral $ time `shiftR` 11)
|
||||||
|
(fromIntegral $ time `shiftR` 5 .&. 0x3f)
|
||||||
|
(fromIntegral $ time `shiftL` 1 .&. 0x3f))
|
||||||
|
|
||||||
-- |Stream a zip file, producing a sequence of entry headers and data blocks.
|
-- |Stream a zip file, producing a sequence of entry headers and data blocks.
|
||||||
-- For example, this might produce: @Left (ZipEntry "directory\/" ...), Left (ZipEntry "directory\/file.txt" ...), Right "hello w", Right "orld!\\n", Left ...@
|
-- For example, this might produce: @Left (ZipEntry "directory\/" ...), Left (ZipEntry "directory\/file.txt" ...), Right "hello w", Right "orld!\\n", Left ...@
|
||||||
-- The final result is summary information taken from the end of the zip file.
|
-- The final result is summary information taken from the end of the zip file.
|
||||||
@ -84,8 +95,8 @@ foldGet g z = do
|
|||||||
-- It does not (ironically) support uncompressed zip files that have been created as streams, where file sizes are not known beforehand.
|
-- It does not (ironically) support uncompressed zip files that have been created as streams, where file sizes are not known beforehand.
|
||||||
-- Since it does not use the offset information at the end of the file, it assumes all entries are packed sequentially, which is usually the case.
|
-- Since it does not use the offset information at the end of the file, it assumes all entries are packed sequentially, which is usually the case.
|
||||||
-- Any errors are thrown in the underlying monad.
|
-- Any errors are thrown in the underlying monad.
|
||||||
unZip :: (MonadBase b m, PrimMonad b, MonadThrow m) => C.ConduitM BS.ByteString (Either ZipEntry BS.ByteString) m ZipInfo
|
unZipStream :: (MonadBase b m, PrimMonad b, MonadThrow m) => C.ConduitM BS.ByteString (Either ZipEntry BS.ByteString) m ZipInfo
|
||||||
unZip = next where
|
unZipStream = next where
|
||||||
next = do
|
next = do
|
||||||
h <- sinkGet header
|
h <- sinkGet header
|
||||||
case h of
|
case h of
|
||||||
@ -150,20 +161,10 @@ unZip = next where
|
|||||||
comp <- G.getWord16le
|
comp <- G.getWord16le
|
||||||
dcomp <- case comp of
|
dcomp <- case comp of
|
||||||
0 | testBit gpf 3 -> fail "Unsupported uncompressed streaming file data"
|
0 | testBit gpf 3 -> fail "Unsupported uncompressed streaming file data"
|
||||||
| otherwise -> return $ C.awaitForever C.yield -- idConduit
|
| otherwise -> return idConduit
|
||||||
8 -> return $ decompress (WindowBits (-15))
|
8 -> return $ decompress deflateWindowBits
|
||||||
_ -> fail $ "Unsupported compression method: " ++ show comp
|
_ -> fail $ "Unsupported compression method: " ++ show comp
|
||||||
time <- G.getWord16le
|
time <- fromDOSTime <$> G.getWord16le <*> G.getWord16le
|
||||||
date <- G.getWord16le
|
|
||||||
let mtime = LocalTime
|
|
||||||
(fromGregorian
|
|
||||||
(fromIntegral $ date `shiftR` 9 + 1980)
|
|
||||||
(fromIntegral $ date `shiftR` 5 .&. 0x0f)
|
|
||||||
(fromIntegral $ date .&. 0x1f))
|
|
||||||
(TimeOfDay
|
|
||||||
(fromIntegral $ time `shiftR` 11)
|
|
||||||
(fromIntegral $ time `shiftR` 5 .&. 0x3f)
|
|
||||||
(fromIntegral $ time `shiftL` 1 .&. 0x3f))
|
|
||||||
crc <- G.getWord32le
|
crc <- G.getWord32le
|
||||||
csiz <- G.getWord32le
|
csiz <- G.getWord32le
|
||||||
usiz <- G.getWord32le
|
usiz <- G.getWord32le
|
||||||
@ -176,8 +177,8 @@ unZip = next where
|
|||||||
G.isolate z $ case t of
|
G.isolate z $ case t of
|
||||||
0x0001 -> do
|
0x0001 -> do
|
||||||
-- the zip specs claim "the Local header MUST include BOTH" but "only if the corresponding field is set to 0xFFFFFFFF"
|
-- the zip specs claim "the Local header MUST include BOTH" but "only if the corresponding field is set to 0xFFFFFFFF"
|
||||||
usiz' <- if usiz == maxBound then G.getWord64le else return $ extZip64USize ext
|
usiz' <- if usiz == zip64Size then G.getWord64le else return $ extZip64USize ext
|
||||||
csiz' <- if csiz == maxBound then G.getWord64le else return $ extZip64CSize ext
|
csiz' <- if csiz == zip64Size then G.getWord64le else return $ extZip64CSize ext
|
||||||
return ext
|
return ext
|
||||||
{ extZip64 = True
|
{ extZip64 = True
|
||||||
, extZip64USize = usiz'
|
, extZip64USize = usiz'
|
||||||
@ -207,7 +208,7 @@ unZip = next where
|
|||||||
return FileHeader
|
return FileHeader
|
||||||
{ fileEntry = ZipEntry
|
{ fileEntry = ZipEntry
|
||||||
{ zipEntryName = name
|
{ zipEntryName = name
|
||||||
, zipEntryTime = mtime
|
, zipEntryTime = time
|
||||||
, zipEntrySize = if testBit gpf 3 then Nothing else Just extZip64USize
|
, zipEntrySize = if testBit gpf 3 then Nothing else Just extZip64USize
|
||||||
}
|
}
|
||||||
, fileDecompress = dcomp
|
, fileDecompress = dcomp
|
||||||
|
|||||||
132
Codec/Archive/Zip/Conduit/Zip.hs
Normal file
132
Codec/Archive/Zip/Conduit/Zip.hs
Normal file
@ -0,0 +1,132 @@
|
|||||||
|
{-# LANGUAGE RecordWildCards #-}
|
||||||
|
{-# LANGUAGE ViewPatterns #-}
|
||||||
|
module Codec.Archive.Zip.Conduit.Zip
|
||||||
|
( ZipOptions(..)
|
||||||
|
, defaultZipOptions
|
||||||
|
, ZipEntry(..)
|
||||||
|
, ZipData(..)
|
||||||
|
, zipFileData
|
||||||
|
, zipStream
|
||||||
|
) where
|
||||||
|
|
||||||
|
import qualified Codec.Compression.Zlib.Raw as Z
|
||||||
|
import Control.Arrow ((&&&), (+++), left)
|
||||||
|
import Control.Monad (when)
|
||||||
|
import Control.Monad.Base (MonadBase)
|
||||||
|
import Control.Monad.Catch (MonadThrow)
|
||||||
|
import Control.Monad.Primitive (PrimMonad)
|
||||||
|
import Control.Monad.Trans.Resource (MonadResource)
|
||||||
|
import qualified Data.Binary.Put as P
|
||||||
|
import Data.Bits (bit, shiftL, shiftR, (.|.))
|
||||||
|
import qualified Data.ByteString as BS
|
||||||
|
import qualified Data.ByteString.Char8 as BSC
|
||||||
|
import qualified Data.ByteString.Lazy as BSL
|
||||||
|
import qualified Data.Conduit as C
|
||||||
|
import qualified Data.Conduit.Binary as CB
|
||||||
|
import Data.Conduit.Serialization.Binary (sourcePut)
|
||||||
|
import Data.Conduit.Zlib (compress)
|
||||||
|
import Data.Digest.CRC32 (crc32)
|
||||||
|
import Data.Either (isLeft)
|
||||||
|
import Data.Maybe (fromMaybe)
|
||||||
|
import Data.Time (LocalTime(..), TimeOfDay(..), toGregorian)
|
||||||
|
import Data.Word (Word16, Word64)
|
||||||
|
|
||||||
|
import Codec.Archive.Zip.Conduit.Types
|
||||||
|
import Codec.Archive.Zip.Conduit.Internal
|
||||||
|
|
||||||
|
data ZipOptions = ZipOptions
|
||||||
|
{ zipOpt64 :: Bool -- ^Allow zip file sizes over 4GB (reduces compatibility, but is otherwise safe for any file sizes)
|
||||||
|
, zipOptCompressLevel :: Int -- ^Compress (0 = store only, 9 = best) zipped files (improves compatibility, since some unzip programs don't supported stored, streamed files, including the one in this package)
|
||||||
|
, zipOptInfo :: ZipInfo -- ^Other parameters to store in the zip file
|
||||||
|
}
|
||||||
|
|
||||||
|
defaultZipOptions :: ZipOptions
|
||||||
|
defaultZipOptions = ZipOptions
|
||||||
|
{ zipOpt64 = False
|
||||||
|
, zipOptCompressLevel = -1
|
||||||
|
, zipOptInfo = ZipInfo
|
||||||
|
{ zipComment = BS.empty
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
data ZipData m
|
||||||
|
= ZipDataByteString BSL.ByteString
|
||||||
|
| ZipDataSource (C.Source m BS.ByteString)
|
||||||
|
|
||||||
|
instance Monad m => Monoid (ZipData m) where
|
||||||
|
mempty = ZipDataByteString BSL.empty
|
||||||
|
mappend (ZipDataByteString a) (ZipDataByteString b) = ZipDataByteString $ mappend a b
|
||||||
|
mappend a b = ZipDataSource $ mappend (zipDataSource a) (zipDataSource b)
|
||||||
|
|
||||||
|
zipFileData :: MonadResource m => FilePath -> ZipData m
|
||||||
|
zipFileData = ZipDataSource . CB.sourceFile
|
||||||
|
|
||||||
|
zipDataSource :: Monad m => ZipData m -> C.Source m BS.ByteString
|
||||||
|
zipDataSource (ZipDataByteString b) = CB.sourceLbs b
|
||||||
|
zipDataSource (ZipDataSource s) = s
|
||||||
|
|
||||||
|
zipData :: Monad m => ZipData m -> Either (C.Source m BS.ByteString) BSL.ByteString
|
||||||
|
zipData (ZipDataByteString b) = Right b
|
||||||
|
zipData (ZipDataSource s) = Left s
|
||||||
|
|
||||||
|
dataSize :: Either a BSL.ByteString -> Maybe Word64
|
||||||
|
dataSize (Left _) = Nothing
|
||||||
|
dataSize (Right b) = Just $ fromIntegral $ BSL.length b
|
||||||
|
|
||||||
|
toDOSTime :: LocalTime -> (Word16, Word16)
|
||||||
|
toDOSTime (LocalTime (toGregorian -> (year, month, day)) (TimeOfDay hour mins secs)) =
|
||||||
|
( fromIntegral hour `shiftL` 11 .|. fromIntegral mins `shiftL` 5 .|. truncate secs `shiftR` 1
|
||||||
|
, fromIntegral (year - 1980) `shiftL` 9 .|. fromIntegral month `shiftL` 5 .|. fromIntegral day
|
||||||
|
)
|
||||||
|
|
||||||
|
zipStream :: (MonadBase b m, PrimMonad b, MonadThrow m) => ZipOptions -> C.ConduitM (ZipEntry, ZipData m) BS.ByteString m Word64
|
||||||
|
zipStream ZipOptions{..} = do
|
||||||
|
C.awaitForever $ C.toProducer . entry
|
||||||
|
return 0 -- TODO: size
|
||||||
|
where
|
||||||
|
entry (ZipEntry{..}, zipData -> dat) = do
|
||||||
|
let usiz = dataSize dat
|
||||||
|
sdat = left (C..| sizeCRC) dat
|
||||||
|
comp = zipOptCompressLevel /= 0 && all (0 /=) usiz
|
||||||
|
(cdat, csiz)
|
||||||
|
| comp =
|
||||||
|
( ((`C.fuseBoth` (compress zipOptCompressLevel deflateWindowBits C..| (fst <$> sizeCRC)))
|
||||||
|
+++ Z.compress) sdat -- level for Z.compress?
|
||||||
|
, dataSize cdat)
|
||||||
|
| otherwise = (left (fmap (id &&& fst)) sdat, usiz)
|
||||||
|
z64 = maybe zipOpt64 (zip64Size <) (max <$> usiz <*> csiz)
|
||||||
|
namelen = BS.length zipEntryName
|
||||||
|
when (namelen > fromIntegral (maxBound :: Word16)) $ zipError $ BSC.unpack zipEntryName ++ ": entry name too long"
|
||||||
|
sourcePut $ do
|
||||||
|
P.putWord32le 0x04034b50
|
||||||
|
P.putWord16le $ if z64 then 45 else 20
|
||||||
|
P.putWord16le $ if isLeft dat then bit 3 else 0
|
||||||
|
P.putWord16le $ if comp then 8 else 0
|
||||||
|
let (time, date) = toDOSTime zipEntryTime
|
||||||
|
P.putWord16le $ time
|
||||||
|
P.putWord16le $ date
|
||||||
|
P.putWord32le $ either (const 0) crc32 cdat
|
||||||
|
P.putWord32le $ if z64 then zip64Size else maybe 0 fromIntegral csiz
|
||||||
|
P.putWord32le $ if z64 then zip64Size else maybe 0 fromIntegral usiz
|
||||||
|
P.putWord16le $ fromIntegral namelen
|
||||||
|
P.putWord16le $ if z64 then 20 else 0
|
||||||
|
P.putByteString zipEntryName
|
||||||
|
when z64 $ do
|
||||||
|
P.putWord16le 0x0001
|
||||||
|
P.putWord16le 16
|
||||||
|
P.putWord64le $ fromMaybe 0 usiz
|
||||||
|
P.putWord64le $ fromMaybe 0 csiz
|
||||||
|
either
|
||||||
|
(\cd -> do
|
||||||
|
((usz, crc), csz) <- cd -- write compressed data
|
||||||
|
when (not z64 && (usz > zip64Size || csz > zip64Size)) $ zipError $ BSC.unpack zipEntryName ++ ": file too large and zipOpt64 disabled"
|
||||||
|
sourcePut $ do
|
||||||
|
P.putWord32le 0x08074b50
|
||||||
|
P.putWord32le crc
|
||||||
|
let putsz
|
||||||
|
| z64 = P.putWord64le
|
||||||
|
| otherwise = P.putWord32le . fromIntegral
|
||||||
|
putsz csz
|
||||||
|
putsz usz)
|
||||||
|
CB.sourceLbs
|
||||||
|
cdat
|
||||||
@ -43,5 +43,5 @@ main = do
|
|||||||
exitFailure
|
exitFailure
|
||||||
ZipInfo{..} <- C.runConduit
|
ZipInfo{..} <- C.runConduit
|
||||||
$ CB.sourceHandle stdin
|
$ CB.sourceHandle stdin
|
||||||
C..| C.fuseUpstream unZip extract
|
C..| C.fuseUpstream unZipStream extract
|
||||||
BSC.putStrLn zipComment
|
BSC.putStrLn zipComment
|
||||||
|
|||||||
@ -19,6 +19,7 @@ library
|
|||||||
exposed-modules:
|
exposed-modules:
|
||||||
Codec.Archive.Zip.Conduit.Types
|
Codec.Archive.Zip.Conduit.Types
|
||||||
Codec.Archive.Zip.Conduit.UnZip
|
Codec.Archive.Zip.Conduit.UnZip
|
||||||
|
Codec.Archive.Zip.Conduit.Zip
|
||||||
other-modules:
|
other-modules:
|
||||||
Codec.Archive.Zip.Conduit.Internal
|
Codec.Archive.Zip.Conduit.Internal
|
||||||
default-language: Haskell2010
|
default-language: Haskell2010
|
||||||
@ -33,8 +34,10 @@ library
|
|||||||
digest,
|
digest,
|
||||||
exceptions,
|
exceptions,
|
||||||
primitive,
|
primitive,
|
||||||
|
resourcet,
|
||||||
time,
|
time,
|
||||||
transformers-base
|
transformers-base,
|
||||||
|
zlib
|
||||||
|
|
||||||
executable unzip-stream
|
executable unzip-stream
|
||||||
main-is: unzip.hs
|
main-is: unzip.hs
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user