Complete zipStream, untested

This commit is contained in:
Dylan Simon 2017-05-13 00:41:41 -04:00
parent c460089d1a
commit ad6413d9b7
4 changed files with 136 additions and 42 deletions

View File

@ -2,7 +2,8 @@ module Codec.Archive.Zip.Conduit.Internal
( zipError ( zipError
, idConduit , idConduit
, sizeCRC , sizeCRC
, zip64Size , sizeC
, maxBound32
, deflateWindowBits , deflateWindowBits
) where ) where
@ -31,8 +32,11 @@ passthroughFold f z = C.await >>= maybe
sizeCRC :: Monad m => C.ConduitM BS.ByteString BS.ByteString m (Word64, Word32) sizeCRC :: Monad m => C.ConduitM BS.ByteString BS.ByteString m (Word64, Word32)
sizeCRC = passthroughFold (\(l, c) b -> (l + fromIntegral (BS.length b), crc32Update c b)) (0, 0) sizeCRC = passthroughFold (\(l, c) b -> (l + fromIntegral (BS.length b), crc32Update c b)) (0, 0)
zip64Size :: Integral n => n sizeC :: Monad m => C.ConduitM BS.ByteString BS.ByteString m Word64
zip64Size = 0xffffffff sizeC = passthroughFold (\l b -> l + fromIntegral (BS.length b)) 0 -- fst <$> sizeCRC
maxBound32 :: Integral n => n
maxBound32 = fromIntegral (maxBound :: Word32)
deflateWindowBits :: WindowBits deflateWindowBits :: WindowBits
deflateWindowBits = WindowBits (-15) deflateWindowBits = WindowBits (-15)

View File

@ -20,7 +20,7 @@ import qualified Data.ByteString.Char8 as BSC
import qualified Data.Conduit as C import qualified Data.Conduit as C
import qualified Data.Conduit.List as CL import qualified Data.Conduit.List as CL
import Data.Conduit.Serialization.Binary (sinkGet) import Data.Conduit.Serialization.Binary (sinkGet)
import Data.Conduit.Zlib (decompress) import qualified Data.Conduit.Zlib as CZ
import Data.Time (LocalTime(..), TimeOfDay(..), fromGregorian) import Data.Time (LocalTime(..), TimeOfDay(..), fromGregorian)
import Data.Word (Word16, Word32, Word64) import Data.Word (Word16, Word32, Word64)
@ -97,15 +97,19 @@ fromDOSTime time date = LocalTime
-- Any errors are thrown in the underlying monad. -- Any errors are thrown in the underlying monad.
unZipStream :: (MonadBase b m, PrimMonad b, MonadThrow m) => C.ConduitM BS.ByteString (Either ZipEntry BS.ByteString) m ZipInfo unZipStream :: (MonadBase b m, PrimMonad b, MonadThrow m) => C.ConduitM BS.ByteString (Either ZipEntry BS.ByteString) m ZipInfo
unZipStream = next where unZipStream = next where
next = do next = do -- local header, or start central directory
h <- sinkGet header h <- sinkGet $ do
sig <- G.getWord32le
case sig of
0x04034b50 -> fileHeader
_ -> centralBody sig
case h of case h of
FileHeader{..} -> do FileHeader{..} -> do
C.yield $ Left fileEntry C.yield $ Left fileEntry
r <- C.mapOutput Right $ r <- C.mapOutput Right $
case zipEntrySize fileEntry of case zipEntrySize fileEntry of
Nothing -> do -- unknown size Nothing -> do -- unknown size
((csize, _), (size, crc)) <- C.fuseBoth sizeCRC (csize, (size, crc)) <- C.fuseBoth sizeC
$ fileDecompress $ fileDecompress
C..| sizeCRC C..| sizeCRC
-- required data description -- required data description
@ -128,11 +132,6 @@ unZipStream = next where
next next
EndOfCentralDirectory{..} -> do EndOfCentralDirectory{..} -> do
return endInfo return endInfo
header = do
sig <- G.getWord32le
case sig of
0x04034b50 -> fileHeader
_ -> centralBody sig
dataDesc h = -- this takes a bit of flexibility to account for the various cases dataDesc h = -- this takes a bit of flexibility to account for the various cases
(do -- with signature (do -- with signature
sig <- G.getWord32le sig <- G.getWord32le
@ -162,7 +161,7 @@ unZipStream = next where
dcomp <- case comp of dcomp <- case comp of
0 | testBit gpf 3 -> fail "Unsupported uncompressed streaming file data" 0 | testBit gpf 3 -> fail "Unsupported uncompressed streaming file data"
| otherwise -> return idConduit | otherwise -> return idConduit
8 -> return $ decompress deflateWindowBits 8 -> return $ CZ.decompress deflateWindowBits
_ -> fail $ "Unsupported compression method: " ++ show comp _ -> fail $ "Unsupported compression method: " ++ show comp
time <- fromDOSTime <$> G.getWord16le <*> G.getWord16le time <- fromDOSTime <$> G.getWord16le <*> G.getWord16le
crc <- G.getWord32le crc <- G.getWord32le
@ -177,8 +176,8 @@ unZipStream = next where
G.isolate z $ case t of G.isolate z $ case t of
0x0001 -> do 0x0001 -> do
-- the zip specs claim "the Local header MUST include BOTH" but "only if the corresponding field is set to 0xFFFFFFFF" -- the zip specs claim "the Local header MUST include BOTH" but "only if the corresponding field is set to 0xFFFFFFFF"
usiz' <- if usiz == zip64Size then G.getWord64le else return $ extZip64USize ext usiz' <- if usiz == maxBound32 then G.getWord64le else return $ extZip64USize ext
csiz' <- if csiz == zip64Size then G.getWord64le else return $ extZip64CSize ext csiz' <- if csiz == maxBound32 then G.getWord64le else return $ extZip64CSize ext
return ext return ext
{ extZip64 = True { extZip64 = True
, extZip64USize = usiz' , extZip64USize = usiz'

View File

@ -15,6 +15,7 @@ import Control.Monad (when)
import Control.Monad.Base (MonadBase) import Control.Monad.Base (MonadBase)
import Control.Monad.Catch (MonadThrow) import Control.Monad.Catch (MonadThrow)
import Control.Monad.Primitive (PrimMonad) import Control.Monad.Primitive (PrimMonad)
import Control.Monad.State.Strict (StateT, get)
import Control.Monad.Trans.Resource (MonadResource) import Control.Monad.Trans.Resource (MonadResource)
import qualified Data.Binary.Put as P import qualified Data.Binary.Put as P
import Data.Bits (bit, shiftL, shiftR, (.|.)) import Data.Bits (bit, shiftL, shiftR, (.|.))
@ -23,11 +24,13 @@ import qualified Data.ByteString.Char8 as BSC
import qualified Data.ByteString.Lazy as BSL import qualified Data.ByteString.Lazy as BSL
import qualified Data.Conduit as C import qualified Data.Conduit as C
import qualified Data.Conduit.Binary as CB import qualified Data.Conduit.Binary as CB
import Data.Conduit.Lift (stateC, execStateC)
import Data.Conduit.Serialization.Binary (sourcePut) import Data.Conduit.Serialization.Binary (sourcePut)
import Data.Conduit.Zlib (compress) import qualified Data.Conduit.Zlib as CZ
import Data.Digest.CRC32 (crc32) import Data.Digest.CRC32 (crc32)
import Data.Either (isLeft) import Data.Either (isLeft)
import Data.Maybe (fromMaybe) import Data.Maybe (fromMaybe, fromJust)
import Data.Monoid ((<>))
import Data.Time (LocalTime(..), TimeOfDay(..), toGregorian) import Data.Time (LocalTime(..), TimeOfDay(..), toGregorian)
import Data.Word (Word16, Word64) import Data.Word (Word16, Word64)
@ -35,7 +38,7 @@ import Codec.Archive.Zip.Conduit.Types
import Codec.Archive.Zip.Conduit.Internal import Codec.Archive.Zip.Conduit.Internal
data ZipOptions = ZipOptions data ZipOptions = ZipOptions
{ zipOpt64 :: Bool -- ^Allow zip file sizes over 4GB (reduces compatibility, but is otherwise safe for any file sizes) { zipOpt64 :: Bool -- ^Allow 'ZipDataSource's over 4GB (reduces compatibility in some cases)
, zipOptCompressLevel :: Int -- ^Compress (0 = store only, 9 = best) zipped files (improves compatibility, since some unzip programs don't supported stored, streamed files, including the one in this package) , zipOptCompressLevel :: Int -- ^Compress (0 = store only, 9 = best) zipped files (improves compatibility, since some unzip programs don't supported stored, streamed files, including the one in this package)
, zipOptInfo :: ZipInfo -- ^Other parameters to store in the zip file , zipOptInfo :: ZipInfo -- ^Other parameters to store in the zip file
} }
@ -49,6 +52,11 @@ defaultZipOptions = ZipOptions
} }
} }
infixr 7 ?*
(?*) :: Num a => Bool -> a -> a
True ?* x = x
False ?* _ = 0
data ZipData m data ZipData m
= ZipDataByteString BSL.ByteString = ZipDataByteString BSL.ByteString
| ZipDataSource (C.Source m BS.ByteString) | ZipDataSource (C.Source m BS.ByteString)
@ -79,54 +87,136 @@ toDOSTime (LocalTime (toGregorian -> (year, month, day)) (TimeOfDay hour mins se
, fromIntegral (year - 1980) `shiftL` 9 .|. fromIntegral month `shiftL` 5 .|. fromIntegral day , fromIntegral (year - 1980) `shiftL` 9 .|. fromIntegral month `shiftL` 5 .|. fromIntegral day
) )
countBytes :: Monad m => C.ConduitM i BS.ByteString m a -> C.ConduitM i BS.ByteString (StateT Word64 m) a
countBytes c = stateC $ \s -> c `C.fuseBoth` ((s +) <$> sizeC)
output :: MonadThrow m => P.Put -> C.ConduitM i BS.ByteString (StateT Word64 m) ()
output = countBytes . sourcePut
-- |The version of this zip program, really just rough indicator of compatibility
zipVersion :: Word16
zipVersion = 48
maxBound16 :: Integral n => n
maxBound16 = fromIntegral (maxBound :: Word16)
zipStream :: (MonadBase b m, PrimMonad b, MonadThrow m) => ZipOptions -> C.ConduitM (ZipEntry, ZipData m) BS.ByteString m Word64 zipStream :: (MonadBase b m, PrimMonad b, MonadThrow m) => ZipOptions -> C.ConduitM (ZipEntry, ZipData m) BS.ByteString m Word64
zipStream ZipOptions{..} = do zipStream ZipOptions{..} = execStateC 0 $ do
C.awaitForever $ C.toProducer . entry (cnt, cdir) <- next 0 (mempty :: P.Put)
return 0 -- TODO: size cdoff <- get
output cdir
eoff <- get
endDirectory cdoff (eoff - cdoff) cnt
where where
next cnt dir = C.await >>= maybe
(return (cnt, dir))
(\e -> do
d <- entry e
next (succ cnt) $ dir <> d)
entry (ZipEntry{..}, zipData -> dat) = do entry (ZipEntry{..}, zipData -> dat) = do
let usiz = dataSize dat let usiz = dataSize dat
sdat = left (C..| sizeCRC) dat sdat = left ((C..| sizeCRC) . C.toProducer) dat
comp = zipOptCompressLevel /= 0 && all (0 /=) usiz comp = zipOptCompressLevel /= 0 && all (0 /=) usiz
(cdat, csiz) (cdat, csiz)
| comp = | comp =
( ((`C.fuseBoth` (compress zipOptCompressLevel deflateWindowBits C..| (fst <$> sizeCRC))) ( ((`C.fuseBoth` (CZ.compress zipOptCompressLevel deflateWindowBits C..| sizeC))
+++ Z.compress) sdat -- level for Z.compress? +++ Z.compress) sdat -- level for Z.compress?
, dataSize cdat) , dataSize cdat)
| otherwise = (left (fmap (id &&& fst)) sdat, usiz) | otherwise = (left (fmap (id &&& fst)) sdat, usiz)
z64 = maybe zipOpt64 (zip64Size <) (max <$> usiz <*> csiz) z64 = maybe zipOpt64 (maxBound32 <) (max <$> usiz <*> csiz)
namelen = BS.length zipEntryName namelen = BS.length zipEntryName
when (namelen > fromIntegral (maxBound :: Word16)) $ zipError $ BSC.unpack zipEntryName ++ ": entry name too long" (time, date) = toDOSTime zipEntryTime
sourcePut $ do mcrc = either (const Nothing) (Just . crc32) cdat
when (namelen > maxBound16) $ zipError $ BSC.unpack zipEntryName ++ ": entry name too long"
let common = do
P.putWord16le $ if z64 then 45 else 20
P.putWord16le $ isLeft dat ?* bit 3
P.putWord16le $ comp ?* 8
P.putWord16le $ time
P.putWord16le $ date
off <- get
output $ do
P.putWord32le 0x04034b50 P.putWord32le 0x04034b50
P.putWord16le $ if z64 then 45 else 20 common
P.putWord16le $ if isLeft dat then bit 3 else 0 P.putWord32le $ fromMaybe 0 mcrc
P.putWord16le $ if comp then 8 else 0 P.putWord32le $ if z64 then maxBound32 else maybe 0 fromIntegral csiz
let (time, date) = toDOSTime zipEntryTime P.putWord32le $ if z64 then maxBound32 else maybe 0 fromIntegral usiz
P.putWord16le $ time
P.putWord16le $ date
P.putWord32le $ either (const 0) crc32 cdat
P.putWord32le $ if z64 then zip64Size else maybe 0 fromIntegral csiz
P.putWord32le $ if z64 then zip64Size else maybe 0 fromIntegral usiz
P.putWord16le $ fromIntegral namelen P.putWord16le $ fromIntegral namelen
P.putWord16le $ if z64 then 20 else 0 P.putWord16le $ z64 ?* 20
P.putByteString zipEntryName P.putByteString zipEntryName
when z64 $ do when z64 $ do
P.putWord16le 0x0001 P.putWord16le 0x0001
P.putWord16le 16 P.putWord16le 16
P.putWord64le $ fromMaybe 0 usiz P.putWord64le $ fromMaybe 0 usiz
P.putWord64le $ fromMaybe 0 csiz P.putWord64le $ fromMaybe 0 csiz
either let outsz c = stateC $ \o -> (id &&& (o +) . snd) <$> c
((usz, crc), csz) <- either
(\cd -> do (\cd -> do
((usz, crc), csz) <- cd -- write compressed data r@((usz, crc), csz) <- outsz cd -- write compressed data
when (not z64 && (usz > zip64Size || csz > zip64Size)) $ zipError $ BSC.unpack zipEntryName ++ ": file too large and zipOpt64 disabled" when (not z64 && (usz > maxBound32 || csz > maxBound32)) $ zipError $ BSC.unpack zipEntryName ++ ": file too large and zipOpt64 disabled"
sourcePut $ do output $ do
P.putWord32le 0x08074b50 P.putWord32le 0x08074b50
P.putWord32le crc P.putWord32le crc
let putsz let putsz
| z64 = P.putWord64le | z64 = P.putWord64le
| otherwise = P.putWord32le . fromIntegral | otherwise = P.putWord32le . fromIntegral
putsz csz putsz csz
putsz usz) putsz usz
CB.sourceLbs return r)
(\b -> outsz $ ((fromJust usiz, fromJust mcrc), fromJust csiz) <$ CB.sourceLbs b)
cdat cdat
return $ do
let o64 = off >= maxBound32
l64 = z64 ?* 16 + o64 ?* 8
P.putWord32le 0x02014b50
P.putWord16le zipVersion
common
P.putWord32le crc
P.putWord32le $ if z64 then maxBound32 else fromIntegral csz
P.putWord32le $ if z64 then maxBound32 else fromIntegral usz
P.putWord16le $ fromIntegral namelen
P.putWord16le $ 4 + l64
P.putWord16le 0 -- comment length
P.putWord16le 0 -- disk number
P.putWord16le 0 -- internal file attributes
P.putWord32le 0 -- external file attributes
P.putWord32le $ if o64 then maxBound32 else fromIntegral off
P.putByteString zipEntryName
when (z64 || o64) $ do
P.putWord16le 0x0001
P.putWord16le l64
when z64 $ do
P.putWord64le usz
P.putWord64le csz
when o64 $
P.putWord64le off
endDirectory cdoff cdlen cnt = do
let z64 = zipOpt64 || cdoff > maxBound32 || cnt > maxBound16
when z64 $ output $ do
P.putWord32le 0x06064b50 -- zip64 end
P.putWord64le 44 -- length of this record
P.putWord16le zipVersion
P.putWord16le 45
P.putWord32le 0 -- disk
P.putWord32le 0 -- central disk
P.putWord64le cnt
P.putWord64le cnt
P.putWord64le cdlen
P.putWord64le cdoff
P.putWord32le 0x07064b50 -- locator:
P.putWord32le 0 -- central disk
P.putWord64le $ cdoff + cdlen
P.putWord32le 1 -- total disks
let comment = zipComment zipOptInfo
commlen = BS.length comment
when (commlen > maxBound16) $ zipError "comment too long"
output $ do
P.putWord32le 0x06054b50 -- end
P.putWord16le 0 -- disk
P.putWord16le 0 -- central disk
P.putWord16le $ fromIntegral $ min maxBound16 cnt
P.putWord16le $ fromIntegral $ min maxBound16 cnt
P.putWord32le $ fromIntegral $ min maxBound32 cdlen
P.putWord32le $ fromIntegral $ max maxBound32 cdoff
P.putWord16le $ fromIntegral commlen
P.putByteString comment

View File

@ -33,6 +33,7 @@ library
conduit-extra, conduit-extra,
digest, digest,
exceptions, exceptions,
mtl,
primitive, primitive,
resourcet, resourcet,
time, time,