Add command-line unzipper (mainly to test with)

This commit is contained in:
Dylan Simon 2017-05-11 21:20:15 -04:00
parent 9a6b39ce11
commit 2c87f62e2b
3 changed files with 76 additions and 13 deletions

View File

@ -23,7 +23,7 @@ import qualified Data.Conduit.List as CL
import Data.Conduit.Serialization.Binary (sinkGet) import Data.Conduit.Serialization.Binary (sinkGet)
import Data.Conduit.Zlib (WindowBits(..), decompress) import Data.Conduit.Zlib (WindowBits(..), decompress)
import Data.Digest.CRC32 (crc32Update) import Data.Digest.CRC32 (crc32Update)
import Data.Time (UTCTime(..), fromGregorian, timeOfDayToTime, TimeOfDay(..)) import Data.Time (LocalTime(..), fromGregorian, TimeOfDay(..))
import Data.Typeable (Typeable) import Data.Typeable (Typeable)
import Data.Word (Word32, Word64) import Data.Word (Word32, Word64)
@ -31,8 +31,8 @@ import Data.Word (Word32, Word64)
-- As per zip file conventions, directory names should end with a slash and have no data, but this library does not ensure that. -- As per zip file conventions, directory names should end with a slash and have no data, but this library does not ensure that.
data ZipEntry = ZipEntry data ZipEntry = ZipEntry
{ zipEntryName :: BS.ByteString -- ^File name, usually utf-8 encoded, with a trailing slash for directories { zipEntryName :: BS.ByteString -- ^File name, usually utf-8 encoded, with a trailing slash for directories
, zipEntryTime :: UTCTime -- ^Modification time , zipEntryTime :: LocalTime -- ^Modification time
, zipEntrySize :: !Word64 -- ^Size of file data (not checked) , zipEntrySize :: !Word64 -- ^Size of file data
} }
-- |Summary information at the end of a zip stream. -- |Summary information at the end of a zip stream.
@ -143,9 +143,10 @@ unZip = next where
(size, crc) <- pass fileCSize (size, crc) <- pass fileCSize
C..| (fileDecompress >> CL.sinkNull) C..| (fileDecompress >> CL.sinkNull)
C..| sizeCRC C..| sizeCRC
-- optional data description -- traceM $ "size=" ++ show size ++ "," ++ show (zipEntrySize fileEntry) ++ " crc=" ++ show crc ++ "," ++ show fileCRC
(&&) (size == zipEntrySize fileEntry && crc == fileCRC) -- optional data description (possibly ambiguous!)
<$> sinkGet (dataDesc h <|> return True) sinkGet $ (guard =<< dataDesc h) <|> return ()
return (size == zipEntrySize fileEntry && crc == fileCRC)
unless r $ zipError $ BSC.unpack (zipEntryName fileEntry) ++ ": data integrity check failed" unless r $ zipError $ BSC.unpack (zipEntryName fileEntry) ++ ": data integrity check failed"
next next
EndOfCentralDirectory{..} -> do EndOfCentralDirectory{..} -> do
@ -158,7 +159,7 @@ unZip = next where
dataDesc h = -- this takes a bit of flexibility to account for the various cases dataDesc h = -- this takes a bit of flexibility to account for the various cases
(do -- with signature (do -- with signature
sig <- G.getWord32le sig <- G.getWord32le
guard (sig == 0x06054b50) guard (sig == 0x08074b50)
dataDescBody h) dataDescBody h)
<|> dataDescBody h -- without signature <|> dataDescBody h -- without signature
dataDescBody FileHeader{..} = do dataDescBody FileHeader{..} = do
@ -166,6 +167,7 @@ unZip = next where
let getSize = if fileZip64 then G.getWord64le else fromIntegral <$> G.getWord32le let getSize = if fileZip64 then G.getWord64le else fromIntegral <$> G.getWord32le
csiz <- getSize csiz <- getSize
usiz <- getSize usiz <- getSize
-- traceM $ "crc=" ++ show crc ++ "," ++ show fileCRC ++ " csiz=" ++ show csiz ++ "," ++ show fileCSize ++ " usiz=" ++ show usiz ++ "," ++ show (zipEntrySize fileEntry)
return $ crc == fileCRC && csiz == fileCSize && usiz == zipEntrySize fileEntry return $ crc == fileCRC && csiz == fileCSize && usiz == zipEntrySize fileEntry
dataDescBody _ = empty dataDescBody _ = empty
central = G.getWord32le >>= centralBody central = G.getWord32le >>= centralBody
@ -187,16 +189,15 @@ unZip = next where
_ -> fail $ "Unsupported compression method: " ++ show comp _ -> fail $ "Unsupported compression method: " ++ show comp
time <- G.getWord16le time <- G.getWord16le
date <- G.getWord16le date <- G.getWord16le
let mtime = UTCTime (fromGregorian let mtime = LocalTime
(fromGregorian
(fromIntegral $ date `shiftR` 9 + 1980) (fromIntegral $ date `shiftR` 9 + 1980)
(fromIntegral $ date `shiftR` 5 .&. 0x0f) (fromIntegral $ date `shiftR` 5 .&. 0x0f)
(fromIntegral $ date .&. 0x1f) (fromIntegral $ date .&. 0x1f))
) (TimeOfDay
(timeOfDayToTime $ TimeOfDay
(fromIntegral $ time `shiftR` 11) (fromIntegral $ time `shiftR` 11)
(fromIntegral $ time `shiftR` 5 .&. 0x3f) (fromIntegral $ time `shiftR` 5 .&. 0x3f)
(fromIntegral $ time `shiftL` 1 .&. 0x3f) (fromIntegral $ time `shiftL` 1 .&. 0x3f))
)
crc <- G.getWord32le crc <- G.getWord32le
csiz <- G.getWord32le csiz <- G.getWord32le
usiz <- G.getWord32le usiz <- G.getWord32le

47
cmd/unzip.hs Normal file
View File

@ -0,0 +1,47 @@
{-# LANGUAGE RecordWildCards #-}
import Control.Monad (when, unless)
import Control.Monad.IO.Class (liftIO)
import qualified Data.ByteString as BS
import qualified Data.ByteString.Char8 as BSC
import qualified Data.Conduit as C
import qualified Data.Conduit.Binary as CB
import Data.Time.LocalTime (localTimeToUTC, utc)
import System.Directory (createDirectoryIfMissing, setModificationTime)
import System.Environment (getArgs)
import System.Exit (exitFailure)
import System.FilePath.Posix (takeDirectory)
import System.IO (stdin, openFile, IOMode(WriteMode), hClose, hSetFileSize, hPutStrLn, stderr)
import Codec.Archive.Zip.Conduit.UnZip
extract :: C.Sink (Either ZipEntry BS.ByteString) IO ()
extract = C.awaitForever start where
start (Left ZipEntry{..}) = do
liftIO $ BSC.putStrLn zipEntryName
liftIO $ createDirectoryIfMissing True (takeDirectory name)
if BSC.last zipEntryName == '/'
then when (zipEntrySize /= 0) $ fail $ name ++ ": non-empty directory"
else do -- C.bracketP
h <- liftIO $ openFile name WriteMode
liftIO $ hSetFileSize h $ toInteger zipEntrySize
write C..| CB.sinkHandle h
liftIO $ hClose h
liftIO $ setModificationTime name $ localTimeToUTC utc zipEntryTime -- FIXME: timezone
where name = BSC.unpack $ BSC.dropWhile ('/' ==) zipEntryName -- should we utf8 decode?
start (Right _) = fail "Unexpected leading or directory data contents"
write = C.await >>= maybe
(return ())
block
block (Right b) = C.yield b >> write
block a = C.leftover a
main :: IO ()
main = do
args <- getArgs
unless (null args) $ do
hPutStrLn stderr "Usage: unzip\nRead a zip file from stdin and extract it in the current directory."
exitFailure
ZipInfo{..} <- C.runConduit
$ CB.sourceHandle stdin
C..| C.fuseUpstream unZip extract
BSC.putStrLn zipComment

View File

@ -32,3 +32,18 @@ library
primitive, primitive,
time, time,
transformers-base transformers-base
executable unzip-stream
main-is: unzip.hs
hs-source-dirs: cmd
default-language: Haskell2010
ghc-options: -Wall
build-depends:
base >=4.7 && <5,
bytestring,
conduit,
conduit-extra,
directory,
filepath,
time,
zip-stream