Add command-line unzipper (mainly to test with)
This commit is contained in:
parent
9a6b39ce11
commit
2c87f62e2b
@ -23,7 +23,7 @@ import qualified Data.Conduit.List as CL
|
|||||||
import Data.Conduit.Serialization.Binary (sinkGet)
|
import Data.Conduit.Serialization.Binary (sinkGet)
|
||||||
import Data.Conduit.Zlib (WindowBits(..), decompress)
|
import Data.Conduit.Zlib (WindowBits(..), decompress)
|
||||||
import Data.Digest.CRC32 (crc32Update)
|
import Data.Digest.CRC32 (crc32Update)
|
||||||
import Data.Time (UTCTime(..), fromGregorian, timeOfDayToTime, TimeOfDay(..))
|
import Data.Time (LocalTime(..), fromGregorian, TimeOfDay(..))
|
||||||
import Data.Typeable (Typeable)
|
import Data.Typeable (Typeable)
|
||||||
import Data.Word (Word32, Word64)
|
import Data.Word (Word32, Word64)
|
||||||
|
|
||||||
@ -31,8 +31,8 @@ import Data.Word (Word32, Word64)
|
|||||||
-- As per zip file conventions, directory names should end with a slash and have no data, but this library does not ensure that.
|
-- As per zip file conventions, directory names should end with a slash and have no data, but this library does not ensure that.
|
||||||
data ZipEntry = ZipEntry
|
data ZipEntry = ZipEntry
|
||||||
{ zipEntryName :: BS.ByteString -- ^File name, usually utf-8 encoded, with a trailing slash for directories
|
{ zipEntryName :: BS.ByteString -- ^File name, usually utf-8 encoded, with a trailing slash for directories
|
||||||
, zipEntryTime :: UTCTime -- ^Modification time
|
, zipEntryTime :: LocalTime -- ^Modification time
|
||||||
, zipEntrySize :: !Word64 -- ^Size of file data (not checked)
|
, zipEntrySize :: !Word64 -- ^Size of file data
|
||||||
}
|
}
|
||||||
|
|
||||||
-- |Summary information at the end of a zip stream.
|
-- |Summary information at the end of a zip stream.
|
||||||
@ -143,9 +143,10 @@ unZip = next where
|
|||||||
(size, crc) <- pass fileCSize
|
(size, crc) <- pass fileCSize
|
||||||
C..| (fileDecompress >> CL.sinkNull)
|
C..| (fileDecompress >> CL.sinkNull)
|
||||||
C..| sizeCRC
|
C..| sizeCRC
|
||||||
-- optional data description
|
-- traceM $ "size=" ++ show size ++ "," ++ show (zipEntrySize fileEntry) ++ " crc=" ++ show crc ++ "," ++ show fileCRC
|
||||||
(&&) (size == zipEntrySize fileEntry && crc == fileCRC)
|
-- optional data description (possibly ambiguous!)
|
||||||
<$> sinkGet (dataDesc h <|> return True)
|
sinkGet $ (guard =<< dataDesc h) <|> return ()
|
||||||
|
return (size == zipEntrySize fileEntry && crc == fileCRC)
|
||||||
unless r $ zipError $ BSC.unpack (zipEntryName fileEntry) ++ ": data integrity check failed"
|
unless r $ zipError $ BSC.unpack (zipEntryName fileEntry) ++ ": data integrity check failed"
|
||||||
next
|
next
|
||||||
EndOfCentralDirectory{..} -> do
|
EndOfCentralDirectory{..} -> do
|
||||||
@ -158,7 +159,7 @@ unZip = next where
|
|||||||
dataDesc h = -- this takes a bit of flexibility to account for the various cases
|
dataDesc h = -- this takes a bit of flexibility to account for the various cases
|
||||||
(do -- with signature
|
(do -- with signature
|
||||||
sig <- G.getWord32le
|
sig <- G.getWord32le
|
||||||
guard (sig == 0x06054b50)
|
guard (sig == 0x08074b50)
|
||||||
dataDescBody h)
|
dataDescBody h)
|
||||||
<|> dataDescBody h -- without signature
|
<|> dataDescBody h -- without signature
|
||||||
dataDescBody FileHeader{..} = do
|
dataDescBody FileHeader{..} = do
|
||||||
@ -166,6 +167,7 @@ unZip = next where
|
|||||||
let getSize = if fileZip64 then G.getWord64le else fromIntegral <$> G.getWord32le
|
let getSize = if fileZip64 then G.getWord64le else fromIntegral <$> G.getWord32le
|
||||||
csiz <- getSize
|
csiz <- getSize
|
||||||
usiz <- getSize
|
usiz <- getSize
|
||||||
|
-- traceM $ "crc=" ++ show crc ++ "," ++ show fileCRC ++ " csiz=" ++ show csiz ++ "," ++ show fileCSize ++ " usiz=" ++ show usiz ++ "," ++ show (zipEntrySize fileEntry)
|
||||||
return $ crc == fileCRC && csiz == fileCSize && usiz == zipEntrySize fileEntry
|
return $ crc == fileCRC && csiz == fileCSize && usiz == zipEntrySize fileEntry
|
||||||
dataDescBody _ = empty
|
dataDescBody _ = empty
|
||||||
central = G.getWord32le >>= centralBody
|
central = G.getWord32le >>= centralBody
|
||||||
@ -187,16 +189,15 @@ unZip = next where
|
|||||||
_ -> fail $ "Unsupported compression method: " ++ show comp
|
_ -> fail $ "Unsupported compression method: " ++ show comp
|
||||||
time <- G.getWord16le
|
time <- G.getWord16le
|
||||||
date <- G.getWord16le
|
date <- G.getWord16le
|
||||||
let mtime = UTCTime (fromGregorian
|
let mtime = LocalTime
|
||||||
|
(fromGregorian
|
||||||
(fromIntegral $ date `shiftR` 9 + 1980)
|
(fromIntegral $ date `shiftR` 9 + 1980)
|
||||||
(fromIntegral $ date `shiftR` 5 .&. 0x0f)
|
(fromIntegral $ date `shiftR` 5 .&. 0x0f)
|
||||||
(fromIntegral $ date .&. 0x1f)
|
(fromIntegral $ date .&. 0x1f))
|
||||||
)
|
(TimeOfDay
|
||||||
(timeOfDayToTime $ TimeOfDay
|
|
||||||
(fromIntegral $ time `shiftR` 11)
|
(fromIntegral $ time `shiftR` 11)
|
||||||
(fromIntegral $ time `shiftR` 5 .&. 0x3f)
|
(fromIntegral $ time `shiftR` 5 .&. 0x3f)
|
||||||
(fromIntegral $ time `shiftL` 1 .&. 0x3f)
|
(fromIntegral $ time `shiftL` 1 .&. 0x3f))
|
||||||
)
|
|
||||||
crc <- G.getWord32le
|
crc <- G.getWord32le
|
||||||
csiz <- G.getWord32le
|
csiz <- G.getWord32le
|
||||||
usiz <- G.getWord32le
|
usiz <- G.getWord32le
|
||||||
|
|||||||
47
cmd/unzip.hs
Normal file
47
cmd/unzip.hs
Normal file
@ -0,0 +1,47 @@
|
|||||||
|
{-# LANGUAGE RecordWildCards #-}
|
||||||
|
import Control.Monad (when, unless)
|
||||||
|
import Control.Monad.IO.Class (liftIO)
|
||||||
|
import qualified Data.ByteString as BS
|
||||||
|
import qualified Data.ByteString.Char8 as BSC
|
||||||
|
import qualified Data.Conduit as C
|
||||||
|
import qualified Data.Conduit.Binary as CB
|
||||||
|
import Data.Time.LocalTime (localTimeToUTC, utc)
|
||||||
|
import System.Directory (createDirectoryIfMissing, setModificationTime)
|
||||||
|
import System.Environment (getArgs)
|
||||||
|
import System.Exit (exitFailure)
|
||||||
|
import System.FilePath.Posix (takeDirectory)
|
||||||
|
import System.IO (stdin, openFile, IOMode(WriteMode), hClose, hSetFileSize, hPutStrLn, stderr)
|
||||||
|
|
||||||
|
import Codec.Archive.Zip.Conduit.UnZip
|
||||||
|
|
||||||
|
extract :: C.Sink (Either ZipEntry BS.ByteString) IO ()
|
||||||
|
extract = C.awaitForever start where
|
||||||
|
start (Left ZipEntry{..}) = do
|
||||||
|
liftIO $ BSC.putStrLn zipEntryName
|
||||||
|
liftIO $ createDirectoryIfMissing True (takeDirectory name)
|
||||||
|
if BSC.last zipEntryName == '/'
|
||||||
|
then when (zipEntrySize /= 0) $ fail $ name ++ ": non-empty directory"
|
||||||
|
else do -- C.bracketP
|
||||||
|
h <- liftIO $ openFile name WriteMode
|
||||||
|
liftIO $ hSetFileSize h $ toInteger zipEntrySize
|
||||||
|
write C..| CB.sinkHandle h
|
||||||
|
liftIO $ hClose h
|
||||||
|
liftIO $ setModificationTime name $ localTimeToUTC utc zipEntryTime -- FIXME: timezone
|
||||||
|
where name = BSC.unpack $ BSC.dropWhile ('/' ==) zipEntryName -- should we utf8 decode?
|
||||||
|
start (Right _) = fail "Unexpected leading or directory data contents"
|
||||||
|
write = C.await >>= maybe
|
||||||
|
(return ())
|
||||||
|
block
|
||||||
|
block (Right b) = C.yield b >> write
|
||||||
|
block a = C.leftover a
|
||||||
|
|
||||||
|
main :: IO ()
|
||||||
|
main = do
|
||||||
|
args <- getArgs
|
||||||
|
unless (null args) $ do
|
||||||
|
hPutStrLn stderr "Usage: unzip\nRead a zip file from stdin and extract it in the current directory."
|
||||||
|
exitFailure
|
||||||
|
ZipInfo{..} <- C.runConduit
|
||||||
|
$ CB.sourceHandle stdin
|
||||||
|
C..| C.fuseUpstream unZip extract
|
||||||
|
BSC.putStrLn zipComment
|
||||||
@ -32,3 +32,18 @@ library
|
|||||||
primitive,
|
primitive,
|
||||||
time,
|
time,
|
||||||
transformers-base
|
transformers-base
|
||||||
|
|
||||||
|
executable unzip-stream
|
||||||
|
main-is: unzip.hs
|
||||||
|
hs-source-dirs: cmd
|
||||||
|
default-language: Haskell2010
|
||||||
|
ghc-options: -Wall
|
||||||
|
build-depends:
|
||||||
|
base >=4.7 && <5,
|
||||||
|
bytestring,
|
||||||
|
conduit,
|
||||||
|
conduit-extra,
|
||||||
|
directory,
|
||||||
|
filepath,
|
||||||
|
time,
|
||||||
|
zip-stream
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user