Add support for compressed stream data

This commit is contained in:
Dylan Simon 2017-05-11 19:58:08 -04:00
parent bbc25f9766
commit 9a6b39ce11

View File

@ -17,6 +17,7 @@ import Control.Monad.Primitive (PrimMonad)
import qualified Data.Binary.Get as G import qualified Data.Binary.Get as G
import Data.Bits ((.&.), complement, testBit, shiftL, shiftR) import Data.Bits ((.&.), complement, testBit, shiftL, shiftR)
import qualified Data.ByteString as BS import qualified Data.ByteString as BS
import qualified Data.ByteString.Char8 as BSC
import qualified Data.Conduit as C import qualified Data.Conduit as C
import qualified Data.Conduit.List as CL import qualified Data.Conduit.List as CL
import Data.Conduit.Serialization.Binary (sinkGet) import Data.Conduit.Serialization.Binary (sinkGet)
@ -53,6 +54,7 @@ data Header m
, fileCRC :: !Word32 , fileCRC :: !Word32
, fileCSize :: !Word64 , fileCSize :: !Word64
, fileZip64 :: !Bool , fileZip64 :: !Bool
, fileStream :: !Bool
} }
| EndOfCentralDirectory | EndOfCentralDirectory
{ endInfo :: ZipInfo { endInfo :: ZipInfo
@ -92,11 +94,15 @@ pass n = C.await >>= maybe
pass n') pass n')
where ni = toInteger n where ni = toInteger n
crc32 :: Monad m => C.Consumer BS.ByteString m Word32 passthroughFold :: Monad m => (a -> b -> a) -> a -> C.ConduitM b b m a
crc32 = CL.fold crc32Update 0 passthroughFold f z = C.await >>= maybe
(return z)
(\x -> do
C.yield x
passthroughFold f (f z x))
checkCRC :: MonadThrow m => Word32 -> C.Conduit BS.ByteString m BS.ByteString sizeCRC :: (Monad m, Integral n) => C.ConduitM BS.ByteString BS.ByteString m (n, Word32)
checkCRC t = C.passthroughSink crc32 $ \r -> unless (r == t) $ zipError "CRC32 mismatch" sizeCRC = passthroughFold (\(l, c) b -> (l + fromIntegral (BS.length b), crc32Update c b)) (0, 0)
foldGet :: (a -> G.Get a) -> a -> G.Get a foldGet :: (a -> G.Get a) -> a -> G.Get a
foldGet g z = do foldGet g z = do
@ -109,7 +115,7 @@ foldGet g z = do
-- No state is maintained during processing, and, in particular, any information in the central directory is discarded. -- No state is maintained during processing, and, in particular, any information in the central directory is discarded.
-- --
-- This only supports a limited number of zip file features, including deflate compression and zip64. -- This only supports a limited number of zip file features, including deflate compression and zip64.
-- It does not (ironically) support zip files that have been created as streams, where file sizes are not known beforehand (though this could potentially be fixed for some cases). -- It does not (ironically) support uncompressed zip files that have been created as streams, where file sizes are not known beforehand.
-- Since it does not use the offset information at the end of the file, it assumes all entries are packed sequentially, which is usually the case. -- Since it does not use the offset information at the end of the file, it assumes all entries are packed sequentially, which is usually the case.
-- Any errors are thrown in the underlying monad. -- Any errors are thrown in the underlying monad.
unZip :: (MonadBase b m, PrimMonad b, MonadThrow m) => C.ConduitM BS.ByteString (Either ZipEntry BS.ByteString) m ZipInfo unZip :: (MonadBase b m, PrimMonad b, MonadThrow m) => C.ConduitM BS.ByteString (Either ZipEntry BS.ByteString) m ZipInfo
@ -119,10 +125,28 @@ unZip = next where
case h of case h of
FileHeader{..} -> do FileHeader{..} -> do
C.yield $ Left fileEntry C.yield $ Left fileEntry
C.mapOutput Right $ pass fileCSize r <- C.mapOutput Right $
C..| (fileDecompress >> CL.sinkNull) if fileStream
C..| checkCRC fileCRC then do -- unknown size
sinkGet $ dataDesc h ((csize, _), (size, crc)) <- C.fuseBoth sizeCRC
$ fileDecompress
C..| sizeCRC
-- required data description
sinkGet $ dataDesc h
{ fileCSize = csize
, fileCRC = crc
, fileEntry = fileEntry
{ zipEntrySize = size
}
}
else do -- known size
(size, crc) <- pass fileCSize
C..| (fileDecompress >> CL.sinkNull)
C..| sizeCRC
-- optional data description
(&&) (size == zipEntrySize fileEntry && crc == fileCRC)
<$> sinkGet (dataDesc h <|> return True)
unless r $ zipError $ BSC.unpack (zipEntryName fileEntry) ++ ": data integrity check failed"
next next
EndOfCentralDirectory{..} -> do EndOfCentralDirectory{..} -> do
return endInfo return endInfo
@ -137,13 +161,12 @@ unZip = next where
guard (sig == 0x06054b50) guard (sig == 0x06054b50)
dataDescBody h) dataDescBody h)
<|> dataDescBody h -- without signature <|> dataDescBody h -- without signature
<|> return () -- none
dataDescBody FileHeader{..} = do dataDescBody FileHeader{..} = do
crc <- G.getWord32le crc <- G.getWord32le
let getSize = if fileZip64 then G.getWord64le else fromIntegral <$> G.getWord32le let getSize = if fileZip64 then G.getWord64le else fromIntegral <$> G.getWord32le
csiz <- getSize csiz <- getSize
usiz <- getSize usiz <- getSize
guard $ crc == fileCRC && csiz == fileCSize && usiz == zipEntrySize fileEntry return $ crc == fileCRC && csiz == fileCSize && usiz == zipEntrySize fileEntry
dataDescBody _ = empty dataDescBody _ = empty
central = G.getWord32le >>= centralBody central = G.getWord32le >>= centralBody
centralBody 0x02014b50 = centralHeader >> central centralBody 0x02014b50 = centralHeader >> central
@ -224,6 +247,7 @@ unZip = next where
, fileCSize = extZip64CSize , fileCSize = extZip64CSize
, fileCRC = crc , fileCRC = crc
, fileZip64 = extZip64 , fileZip64 = extZip64
, fileStream = testBit gpf 3
} }
centralHeader = do centralHeader = do
-- ignore everything -- ignore everything