Add zip64 support to unzip
Untested, but hey, the last untested stuff worked perfectly
This commit is contained in:
parent
081c3b786a
commit
66d0ad3720
@ -6,9 +6,10 @@ module Codec.Archive.Zip.Conduit.UnZip
|
|||||||
, unZip
|
, unZip
|
||||||
) where
|
) where
|
||||||
|
|
||||||
import Control.Monad (when, unless)
|
import Control.Applicative ((<|>), empty)
|
||||||
|
import Control.Monad (when, unless, guard)
|
||||||
import qualified Data.Binary.Get as G
|
import qualified Data.Binary.Get as G
|
||||||
import Data.Bits ((.&.), complement, shiftL, shiftR)
|
import Data.Bits ((.&.), complement, testBit, shiftL, shiftR)
|
||||||
import qualified Data.ByteString as BS
|
import qualified Data.ByteString as BS
|
||||||
import qualified Data.Conduit as C
|
import qualified Data.Conduit as C
|
||||||
import qualified Data.Conduit.List as CL
|
import qualified Data.Conduit.List as CL
|
||||||
@ -16,12 +17,12 @@ import Data.Conduit.Serialization.Binary (sinkGet)
|
|||||||
import Data.Conduit.Zlib (WindowBits(..), decompress)
|
import Data.Conduit.Zlib (WindowBits(..), decompress)
|
||||||
import Data.Digest.CRC32 (crc32Update)
|
import Data.Digest.CRC32 (crc32Update)
|
||||||
import Data.Time (UTCTime(..), fromGregorian, timeOfDayToTime, TimeOfDay(..))
|
import Data.Time (UTCTime(..), fromGregorian, timeOfDayToTime, TimeOfDay(..))
|
||||||
import Data.Word (Word, Word32)
|
import Data.Word (Word32, Word64)
|
||||||
|
|
||||||
data ZipEntry = ZipEntry
|
data ZipEntry = ZipEntry
|
||||||
{ zipEntryName :: BS.ByteString
|
{ zipEntryName :: BS.ByteString
|
||||||
, zipEntryTime :: UTCTime
|
, zipEntryTime :: UTCTime
|
||||||
, zipEntrySize :: Word
|
, zipEntrySize :: !Word64
|
||||||
}
|
}
|
||||||
|
|
||||||
data ZipInfo = ZipInfo
|
data ZipInfo = ZipInfo
|
||||||
@ -33,12 +34,44 @@ data Header m
|
|||||||
{ fileDecompress :: C.Conduit BS.ByteString m BS.ByteString
|
{ fileDecompress :: C.Conduit BS.ByteString m BS.ByteString
|
||||||
, fileEntry :: !ZipEntry
|
, fileEntry :: !ZipEntry
|
||||||
, fileCRC :: !Word32
|
, fileCRC :: !Word32
|
||||||
, fileCSize :: !Word32
|
, fileCSize :: !Word64
|
||||||
|
, fileZip64 :: !Bool
|
||||||
}
|
}
|
||||||
| EndOfCentralDirectory
|
| EndOfCentralDirectory
|
||||||
{ endInfo :: ZipInfo
|
{ endInfo :: ZipInfo
|
||||||
}
|
}
|
||||||
|
|
||||||
|
data ExtField = ExtField
|
||||||
|
{ extZip64 :: Bool
|
||||||
|
, extZip64USize
|
||||||
|
, extZip64CSize :: Word64
|
||||||
|
}
|
||||||
|
|
||||||
|
{- ExtUnix
|
||||||
|
{ extUnixATime
|
||||||
|
, extUnixMTime :: UTCTime
|
||||||
|
, extUnixUID
|
||||||
|
, extUnixGID :: Word16
|
||||||
|
, extUnixData :: BS.ByteString
|
||||||
|
}
|
||||||
|
-}
|
||||||
|
|
||||||
|
pass :: (Monad m, Integral n) => n -> C.Conduit BS.ByteString m BS.ByteString
|
||||||
|
pass 0 = return ()
|
||||||
|
pass n = C.await >>= maybe
|
||||||
|
(fail $ "EOF in file data, expecting " ++ show ni ++ " more bytes")
|
||||||
|
(\b ->
|
||||||
|
let n' = ni - toInteger (BS.length b) in
|
||||||
|
if n' < 0
|
||||||
|
then do
|
||||||
|
let (b', r) = BS.splitAt (fromIntegral n) b
|
||||||
|
C.yield b'
|
||||||
|
C.leftover r
|
||||||
|
else do
|
||||||
|
C.yield b
|
||||||
|
pass n')
|
||||||
|
where ni = toInteger n
|
||||||
|
|
||||||
crc32 :: Monad m => C.Consumer BS.ByteString m Word32
|
crc32 :: Monad m => C.Consumer BS.ByteString m Word32
|
||||||
crc32 = CL.fold crc32Update 0
|
crc32 = CL.fold crc32Update 0
|
||||||
|
|
||||||
@ -52,9 +85,10 @@ unZip = next where
|
|||||||
case h of
|
case h of
|
||||||
FileHeader{..} -> do
|
FileHeader{..} -> do
|
||||||
C.yield $ Left fileEntry
|
C.yield $ Left fileEntry
|
||||||
C.mapOutput Right $ pass (fromIntegral fileCSize)
|
C.mapOutput Right $ pass fileCSize
|
||||||
C..| (fileDecompress >> CL.sinkNull)
|
C..| (fileDecompress >> CL.sinkNull)
|
||||||
C..| checkCRC fileCRC
|
C..| checkCRC fileCRC
|
||||||
|
sinkGet $ dataDesc h
|
||||||
next
|
next
|
||||||
EndOfCentralDirectory{..} -> do
|
EndOfCentralDirectory{..} -> do
|
||||||
return endInfo
|
return endInfo
|
||||||
@ -62,20 +96,36 @@ unZip = next where
|
|||||||
sig <- G.getWord32le
|
sig <- G.getWord32le
|
||||||
case sig of
|
case sig of
|
||||||
0x04034b50 -> fileHeader
|
0x04034b50 -> fileHeader
|
||||||
0x08074b50 -> -- data descriptor
|
_ -> centralBody sig
|
||||||
G.skip 12 >> header
|
dataDesc h = -- this takes a bit of flexibility to account for the various cases
|
||||||
_ -> centralDirectory sig
|
(do -- with signature
|
||||||
centralDirectory 0x02014b50 = centralHeader >> G.getWord32le >>= centralDirectory
|
sig <- G.getWord32le
|
||||||
centralDirectory 0x06054b50 = EndOfCentralDirectory <$> endDirectory
|
guard (sig == 0x06054b50)
|
||||||
centralDirectory sig = fail $ "Unknown header signature: " ++ show sig
|
dataDescBody h)
|
||||||
|
<|> dataDescBody h -- without signature
|
||||||
|
<|> return () -- none
|
||||||
|
dataDescBody FileHeader{..} = do
|
||||||
|
crc <- G.getWord32le
|
||||||
|
let getSize = if fileZip64 then G.getWord64le else fromIntegral <$> G.getWord32le
|
||||||
|
csiz <- getSize
|
||||||
|
usiz <- getSize
|
||||||
|
guard $ crc == fileCRC && csiz == fileCSize && usiz == zipEntrySize fileEntry
|
||||||
|
dataDescBody _ = empty
|
||||||
|
central = G.getWord32le >>= centralBody
|
||||||
|
centralBody 0x02014b50 = centralHeader >> central
|
||||||
|
centralBody 0x06064b50 = zip64EndDirectory >> central
|
||||||
|
centralBody 0x07064b50 = G.skip 16 >> central
|
||||||
|
centralBody 0x06054b50 = EndOfCentralDirectory <$> endDirectory
|
||||||
|
centralBody sig = fail $ "Unknown header signature: " ++ show sig
|
||||||
fileHeader = do
|
fileHeader = do
|
||||||
ver <- G.getWord16le
|
ver <- G.getWord16le
|
||||||
when (ver > 20) $ fail $ "Unsupported version: " ++ show ver
|
when (ver > 45) $ fail $ "Unsupported version: " ++ show ver
|
||||||
gpf <- G.getWord16le
|
gpf <- G.getWord16le
|
||||||
when (gpf .&. complement 6 /= 0) $ fail $ "Unsupported flags: " ++ show gpf
|
when (gpf .&. complement 0o06 /= 0) $ fail $ "Unsupported flags: " ++ show gpf
|
||||||
comp <- G.getWord16le
|
comp <- G.getWord16le
|
||||||
dcomp <- case comp of
|
dcomp <- case comp of
|
||||||
0 -> return $ C.awaitForever C.yield
|
0 | testBit gpf 3 -> fail "Unsupported uncompressed streaming file data"
|
||||||
|
| otherwise -> return $ C.awaitForever C.yield -- idConduit
|
||||||
8 -> return $ decompress (WindowBits (-15))
|
8 -> return $ decompress (WindowBits (-15))
|
||||||
_ -> fail $ "Unsupported compression method: " ++ show comp
|
_ -> fail $ "Unsupported compression method: " ++ show comp
|
||||||
time <- G.getWord16le
|
time <- G.getWord16le
|
||||||
@ -93,40 +143,69 @@ unZip = next where
|
|||||||
crc <- G.getWord32le
|
crc <- G.getWord32le
|
||||||
csiz <- G.getWord32le
|
csiz <- G.getWord32le
|
||||||
usiz <- G.getWord32le
|
usiz <- G.getWord32le
|
||||||
nlen <- G.getWord16le
|
nlen <- fromIntegral <$> G.getWord16le
|
||||||
elen <- G.getWord16le
|
elen <- fromIntegral <$> G.getWord16le
|
||||||
name <- G.getByteString $ fromIntegral nlen
|
name <- G.getByteString nlen
|
||||||
G.skip $ fromIntegral elen
|
let getExt ext = do
|
||||||
|
t <- G.getWord16le
|
||||||
|
z <- fromIntegral <$> G.getWord16le
|
||||||
|
ext' <- G.isolate z $ case t of
|
||||||
|
0x0001 -> do
|
||||||
|
-- the zip specs claim "the Local header MUST include BOTH" but "only if the corresponding field is set to 0xFFFFFFFF"
|
||||||
|
usiz' <- if usiz == maxBound then G.getWord64le else return $ extZip64USize ext
|
||||||
|
csiz' <- if csiz == maxBound then G.getWord64le else return $ extZip64CSize ext
|
||||||
|
return ext
|
||||||
|
{ extZip64 = True
|
||||||
|
, extZip64USize = usiz'
|
||||||
|
, extZip64CSize = csiz'
|
||||||
|
}
|
||||||
|
{-
|
||||||
|
0x000d -> do
|
||||||
|
atim <- G.getWord32le
|
||||||
|
mtim <- G.getWord32le
|
||||||
|
uid <- G.getWord16le
|
||||||
|
gid <- G.getWord16le
|
||||||
|
dat <- G.getByteString $ z - 12
|
||||||
|
return ExtUnix
|
||||||
|
{ extUnixATime = posixSecondsToUTCTime atim
|
||||||
|
, extUnixMTime = posixSecondsToUTCTime mtim
|
||||||
|
, extUnixUID = uid
|
||||||
|
, extUnixGID = gid
|
||||||
|
, extUnixData = dat
|
||||||
|
}
|
||||||
|
-}
|
||||||
|
_ -> ext <$ G.skip z
|
||||||
|
getExt ext'
|
||||||
|
ExtField{..} <- G.isolate elen $ getExt ExtField
|
||||||
|
{ extZip64 = False
|
||||||
|
, extZip64USize = fromIntegral usiz
|
||||||
|
, extZip64CSize = fromIntegral csiz
|
||||||
|
}
|
||||||
return FileHeader
|
return FileHeader
|
||||||
{ fileEntry = ZipEntry
|
{ fileEntry = ZipEntry
|
||||||
{ zipEntryName = name
|
{ zipEntryName = name
|
||||||
, zipEntryTime = mtime
|
, zipEntryTime = mtime
|
||||||
, zipEntrySize = fromIntegral usiz
|
, zipEntrySize = extZip64USize
|
||||||
}
|
}
|
||||||
, fileDecompress = dcomp
|
, fileDecompress = dcomp
|
||||||
, fileCSize = csiz
|
, fileCSize = extZip64CSize
|
||||||
, fileCRC = crc
|
, fileCRC = crc
|
||||||
|
, fileZip64 = extZip64
|
||||||
}
|
}
|
||||||
centralHeader = do
|
centralHeader = do
|
||||||
-- ignore everything
|
-- ignore everything
|
||||||
G.skip 24
|
G.skip 24
|
||||||
nlen <- G.getWord16le
|
nlen <- fromIntegral <$> G.getWord16le
|
||||||
elen <- G.getWord16le
|
elen <- fromIntegral <$> G.getWord16le
|
||||||
clen <- G.getWord16le
|
clen <- fromIntegral <$> G.getWord16le
|
||||||
G.skip $ 12 + fromIntegral nlen + fromIntegral elen + fromIntegral clen
|
G.skip $ 12 + nlen + elen + clen
|
||||||
|
zip64EndDirectory = do
|
||||||
|
len <- G.getWord64le
|
||||||
|
G.skip $ fromIntegral len -- would not expect to overflow...
|
||||||
endDirectory = do
|
endDirectory = do
|
||||||
G.skip 16
|
G.skip 16
|
||||||
clen <- G.getWord16le
|
clen <- fromIntegral <$> G.getWord16le
|
||||||
comm <- G.getByteString $ fromIntegral clen
|
comm <- G.getByteString clen
|
||||||
return ZipInfo
|
return ZipInfo
|
||||||
{ zipComment = comm
|
{ zipComment = comm
|
||||||
}
|
}
|
||||||
pass 0 = return ()
|
|
||||||
pass n = C.await >>= maybe
|
|
||||||
(fail $ "EOF in file data, expecting " ++ show n ++ " more bytes")
|
|
||||||
(\b -> do
|
|
||||||
let (b', r) = BS.splitAt n b
|
|
||||||
C.yield b'
|
|
||||||
if BS.null r
|
|
||||||
then pass $ n - BS.length b'
|
|
||||||
else C.leftover r)
|
|
||||||
|
|||||||
@ -22,7 +22,7 @@ library
|
|||||||
ghc-options: -Wall
|
ghc-options: -Wall
|
||||||
build-depends:
|
build-depends:
|
||||||
base >= 4.7 && < 5,
|
base >= 4.7 && < 5,
|
||||||
binary,
|
binary >= 0.7.2,
|
||||||
binary-conduit,
|
binary-conduit,
|
||||||
bytestring,
|
bytestring,
|
||||||
conduit,
|
conduit,
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user