diff --git a/Codec/Archive/Zip/Conduit/UnZip.hs b/Codec/Archive/Zip/Conduit/UnZip.hs index b64fbb3..ec0d504 100644 --- a/Codec/Archive/Zip/Conduit/UnZip.hs +++ b/Codec/Archive/Zip/Conduit/UnZip.hs @@ -23,7 +23,7 @@ import qualified Data.Conduit.List as CL import Data.Conduit.Serialization.Binary (sinkGet) import Data.Conduit.Zlib (WindowBits(..), decompress) import Data.Digest.CRC32 (crc32Update) -import Data.Time (UTCTime(..), fromGregorian, timeOfDayToTime, TimeOfDay(..)) +import Data.Time (LocalTime(..), fromGregorian, TimeOfDay(..)) import Data.Typeable (Typeable) import Data.Word (Word32, Word64) @@ -31,8 +31,8 @@ import Data.Word (Word32, Word64) -- As per zip file conventions, directory names should end with a slash and have no data, but this library does not ensure that. data ZipEntry = ZipEntry { zipEntryName :: BS.ByteString -- ^File name, usually utf-8 encoded, with a trailing slash for directories - , zipEntryTime :: UTCTime -- ^Modification time - , zipEntrySize :: !Word64 -- ^Size of file data (not checked) + , zipEntryTime :: LocalTime -- ^Modification time + , zipEntrySize :: !Word64 -- ^Size of file data } -- |Summary information at the end of a zip stream. @@ -143,9 +143,10 @@ unZip = next where (size, crc) <- pass fileCSize C..| (fileDecompress >> CL.sinkNull) C..| sizeCRC - -- optional data description - (&&) (size == zipEntrySize fileEntry && crc == fileCRC) - <$> sinkGet (dataDesc h <|> return True) + -- traceM $ "size=" ++ show size ++ "," ++ show (zipEntrySize fileEntry) ++ " crc=" ++ show crc ++ "," ++ show fileCRC + -- optional data description (possibly ambiguous!) + sinkGet $ (guard =<< dataDesc h) <|> return () + return (size == zipEntrySize fileEntry && crc == fileCRC) unless r $ zipError $ BSC.unpack (zipEntryName fileEntry) ++ ": data integrity check failed" next EndOfCentralDirectory{..} -> do @@ -158,7 +159,7 @@ unZip = next where dataDesc h = -- this takes a bit of flexibility to account for the various cases (do -- with signature sig <- G.getWord32le - guard (sig == 0x06054b50) + guard (sig == 0x08074b50) dataDescBody h) <|> dataDescBody h -- without signature dataDescBody FileHeader{..} = do @@ -166,6 +167,7 @@ unZip = next where let getSize = if fileZip64 then G.getWord64le else fromIntegral <$> G.getWord32le csiz <- getSize usiz <- getSize + -- traceM $ "crc=" ++ show crc ++ "," ++ show fileCRC ++ " csiz=" ++ show csiz ++ "," ++ show fileCSize ++ " usiz=" ++ show usiz ++ "," ++ show (zipEntrySize fileEntry) return $ crc == fileCRC && csiz == fileCSize && usiz == zipEntrySize fileEntry dataDescBody _ = empty central = G.getWord32le >>= centralBody @@ -187,16 +189,15 @@ unZip = next where _ -> fail $ "Unsupported compression method: " ++ show comp time <- G.getWord16le date <- G.getWord16le - let mtime = UTCTime (fromGregorian + let mtime = LocalTime + (fromGregorian (fromIntegral $ date `shiftR` 9 + 1980) (fromIntegral $ date `shiftR` 5 .&. 0x0f) - (fromIntegral $ date .&. 0x1f) - ) - (timeOfDayToTime $ TimeOfDay + (fromIntegral $ date .&. 0x1f)) + (TimeOfDay (fromIntegral $ time `shiftR` 11) (fromIntegral $ time `shiftR` 5 .&. 0x3f) - (fromIntegral $ time `shiftL` 1 .&. 0x3f) - ) + (fromIntegral $ time `shiftL` 1 .&. 0x3f)) crc <- G.getWord32le csiz <- G.getWord32le usiz <- G.getWord32le diff --git a/cmd/unzip.hs b/cmd/unzip.hs new file mode 100644 index 0000000..9e1f501 --- /dev/null +++ b/cmd/unzip.hs @@ -0,0 +1,47 @@ +{-# LANGUAGE RecordWildCards #-} +import Control.Monad (when, unless) +import Control.Monad.IO.Class (liftIO) +import qualified Data.ByteString as BS +import qualified Data.ByteString.Char8 as BSC +import qualified Data.Conduit as C +import qualified Data.Conduit.Binary as CB +import Data.Time.LocalTime (localTimeToUTC, utc) +import System.Directory (createDirectoryIfMissing, setModificationTime) +import System.Environment (getArgs) +import System.Exit (exitFailure) +import System.FilePath.Posix (takeDirectory) +import System.IO (stdin, openFile, IOMode(WriteMode), hClose, hSetFileSize, hPutStrLn, stderr) + +import Codec.Archive.Zip.Conduit.UnZip + +extract :: C.Sink (Either ZipEntry BS.ByteString) IO () +extract = C.awaitForever start where + start (Left ZipEntry{..}) = do + liftIO $ BSC.putStrLn zipEntryName + liftIO $ createDirectoryIfMissing True (takeDirectory name) + if BSC.last zipEntryName == '/' + then when (zipEntrySize /= 0) $ fail $ name ++ ": non-empty directory" + else do -- C.bracketP + h <- liftIO $ openFile name WriteMode + liftIO $ hSetFileSize h $ toInteger zipEntrySize + write C..| CB.sinkHandle h + liftIO $ hClose h + liftIO $ setModificationTime name $ localTimeToUTC utc zipEntryTime -- FIXME: timezone + where name = BSC.unpack $ BSC.dropWhile ('/' ==) zipEntryName -- should we utf8 decode? + start (Right _) = fail "Unexpected leading or directory data contents" + write = C.await >>= maybe + (return ()) + block + block (Right b) = C.yield b >> write + block a = C.leftover a + +main :: IO () +main = do + args <- getArgs + unless (null args) $ do + hPutStrLn stderr "Usage: unzip\nRead a zip file from stdin and extract it in the current directory." + exitFailure + ZipInfo{..} <- C.runConduit + $ CB.sourceHandle stdin + C..| C.fuseUpstream unZip extract + BSC.putStrLn zipComment diff --git a/zip-stream.cabal b/zip-stream.cabal index f4cf7e0..4f0c563 100644 --- a/zip-stream.cabal +++ b/zip-stream.cabal @@ -32,3 +32,18 @@ library primitive, time, transformers-base + +executable unzip-stream + main-is: unzip.hs + hs-source-dirs: cmd + default-language: Haskell2010 + ghc-options: -Wall + build-depends: + base >=4.7 && <5, + bytestring, + conduit, + conduit-extra, + directory, + filepath, + time, + zip-stream