diff --git a/Codec/Archive/Zip/Conduit/Types.hs b/Codec/Archive/Zip/Conduit/Types.hs index dfcbe4a..b9d6e35 100644 --- a/Codec/Archive/Zip/Conduit/Types.hs +++ b/Codec/Archive/Zip/Conduit/Types.hs @@ -7,6 +7,7 @@ import qualified Data.Conduit as C import Data.Conduit.Binary (sourceLbs) import Data.Semigroup (Semigroup(..)) import Data.String (IsString(..)) +import qualified Data.Text as T import Data.Time.LocalTime (LocalTime) import Data.Typeable (Typeable) import Data.Word (Word64) @@ -29,7 +30,7 @@ data ZipInfo = ZipInfo -- |(The beginning of) a single entry in a zip stream, which may be any file or directory. -- As per zip file conventions, directory names should end with a slash and have no data, but this library does not ensure that. data ZipEntry = ZipEntry - { zipEntryName :: ByteString -- ^File name (in posix format, no leading slashes), usually utf-8 encoded, with a trailing slash for directories + { zipEntryName :: Either T.Text ByteString -- ^File name (in posix format, no leading slashes), either UTF-8 encoded text or raw bytes (CP437), with a trailing slash for directories , zipEntryTime :: LocalTime -- ^Modification time , zipEntrySize :: Maybe Word64 -- ^Size of file data (if known); checked on zipping and also used as hint to enable zip64 } deriving (Eq, Show) diff --git a/Codec/Archive/Zip/Conduit/UnZip.hs b/Codec/Archive/Zip/Conduit/UnZip.hs index 95af05a..de9efed 100644 --- a/Codec/Archive/Zip/Conduit/UnZip.hs +++ b/Codec/Archive/Zip/Conduit/UnZip.hs @@ -23,6 +23,8 @@ import qualified Data.Conduit as C import qualified Data.Conduit.List as CL import Data.Conduit.Serialization.Binary (sinkGet) import qualified Data.Conduit.Zlib as CZ +import qualified Data.Text as T +import qualified Data.Text.Encoding as TE import Data.Time (LocalTime(..), TimeOfDay(..), fromGregorian) import Data.Word (Word16, Word32, Word64) @@ -136,7 +138,7 @@ unZipStream = next where -- optional data description (possibly ambiguous!) sinkGet $ (guard =<< dataDesc h) <|> return () return (size == usize && crc == fileCRC) - unless r $ zipError $ BSC.unpack (zipEntryName fileEntry) ++ ": data integrity check failed" + unless r $ zipError $ either T.unpack BSC.unpack (zipEntryName fileEntry) ++ ": data integrity check failed" next EndOfCentralDirectory{..} -> do return endInfo @@ -166,7 +168,7 @@ unZipStream = next where when (ver > zipVersion) $ fail $ "Unsupported version: " ++ show ver gpf <- G.getWord16le -- when (gpf .&. complement (bit 1 .|. bit 2 .|. bit 3) /= 0) $ fail $ "Unsupported flags: " ++ show gpf - when (gpf `clearBit` 1 `clearBit` 2 `clearBit` 3 /= 0) $ fail $ "Unsupported flags: " ++ show gpf + when (gpf `clearBit` 1 `clearBit` 2 `clearBit` 3 `clearBit` 11 /= 0) $ fail $ "Unsupported flags: " ++ show gpf comp <- G.getWord16le dcomp <- case comp of 0 | testBit gpf 3 -> fail "Unsupported uncompressed streaming file data" @@ -216,7 +218,7 @@ unZipStream = next where } return FileHeader { fileEntry = ZipEntry - { zipEntryName = name + { zipEntryName = if testBit gpf 11 then Left (TE.decodeUtf8 name) else Right name , zipEntryTime = time , zipEntrySize = if testBit gpf 3 then Nothing else Just extZip64USize } diff --git a/Codec/Archive/Zip/Conduit/Zip.hs b/Codec/Archive/Zip/Conduit/Zip.hs index 5422f05..8d7272e 100644 --- a/Codec/Archive/Zip/Conduit/Zip.hs +++ b/Codec/Archive/Zip/Conduit/Zip.hs @@ -35,6 +35,8 @@ import qualified Data.Conduit.Zlib as CZ import Data.Digest.CRC32 (crc32) import Data.Either (isLeft) import Data.Maybe (fromMaybe, fromJust) +import qualified Data.Text as T +import qualified Data.Text.Encoding as TE import Data.Time (LocalTime(..), TimeOfDay(..), toGregorian) import Data.Word (Word16, Word64) @@ -127,12 +129,13 @@ zipStream ZipOptions{..} = execStateC 0 $ do | otherwise = (left (fmap (id &&& fst)) sdat, usiz) z64 = maybe (zipOpt64 || any (maxBound32 <) zipEntrySize) (maxBound32 <) (max <$> usiz <*> csiz) - namelen = BS.length zipEntryName + name = either TE.encodeUtf8 id zipEntryName + namelen = BS.length name (time, date) = toDOSTime zipEntryTime mcrc = either (const Nothing) (Just . crc32) dat - when (namelen > maxBound16) $ zipError $ BSC.unpack zipEntryName ++ ": entry name too long" + when (namelen > maxBound16) $ zipError $ either T.unpack BSC.unpack zipEntryName ++ ": entry name too long" let common = do - P.putWord16le $ isLeft dat ?* bit 3 + P.putWord16le $ isLeft dat ?* bit 3 .|. isLeft zipEntryName ?* bit 11 P.putWord16le $ comp ?* 8 P.putWord16le $ time P.putWord16le $ date @@ -147,7 +150,7 @@ zipStream ZipOptions{..} = execStateC 0 $ do P.putWord32le $ if z64 then maxBound32 else maybe 0 fromIntegral usiz P.putWord16le $ fromIntegral namelen P.putWord16le $ z64 ?* 20 - P.putByteString zipEntryName + P.putByteString name when z64 $ do P.putWord16le 0x0001 P.putWord16le 16 @@ -157,7 +160,7 @@ zipStream ZipOptions{..} = execStateC 0 $ do ((usz, crc), csz) <- either (\cd -> do r@((usz, crc), csz) <- outsz cd -- write compressed data - when (not z64 && (usz > maxBound32 || csz > maxBound32)) $ zipError $ BSC.unpack zipEntryName ++ ": file too large and zipOpt64 disabled" + when (not z64 && (usz > maxBound32 || csz > maxBound32)) $ zipError $ either T.unpack BSC.unpack zipEntryName ++ ": file too large and zipOpt64 disabled" output $ do P.putWord32le 0x08074b50 P.putWord32le crc @@ -169,7 +172,7 @@ zipStream ZipOptions{..} = execStateC 0 $ do return r) (\b -> outsz $ ((fromJust usiz, fromJust mcrc), fromJust csiz) <$ CB.sourceLbs b) cdat - when (any (usz /=) zipEntrySize) $ zipError $ BSC.unpack zipEntryName ++ ": incorrect zipEntrySize" + when (any (usz /=) zipEntrySize) $ zipError $ either T.unpack BSC.unpack zipEntryName ++ ": incorrect zipEntrySize" return $ do -- central directory let o64 = off >= maxBound32 @@ -191,7 +194,7 @@ zipStream ZipOptions{..} = execStateC 0 $ do P.putWord16le 0 -- internal file attributes P.putWord32le 0 -- external file attributes P.putWord32le $ if o64 then maxBound32 else fromIntegral off - P.putByteString zipEntryName + P.putByteString name when a64 $ do P.putWord16le 0x0001 P.putWord16le l64 diff --git a/cmd/unzip.hs b/cmd/unzip.hs index 738568d..0e92da0 100644 --- a/cmd/unzip.hs +++ b/cmd/unzip.hs @@ -6,6 +6,8 @@ import qualified Data.ByteString as BS import qualified Data.ByteString.Char8 as BSC import qualified Data.Conduit as C import qualified Data.Conduit.Binary as CB +import qualified Data.Text as T +import qualified Data.Text.IO as TIO import Data.Time.LocalTime (localTimeToUTC, utc) import Data.Void (Void) import System.Directory (createDirectoryIfMissing @@ -23,9 +25,9 @@ import Codec.Archive.Zip.Conduit.UnZip extract :: C.ConduitM (Either ZipEntry BS.ByteString) Void IO () extract = C.awaitForever start where start (Left ZipEntry{..}) = do - liftIO $ BSC.putStrLn zipEntryName + liftIO $ either TIO.putStrLn BSC.putStrLn zipEntryName liftIO $ createDirectoryIfMissing True (takeDirectory name) - if BSC.last zipEntryName == '/' + if either T.last BSC.last zipEntryName == '/' then when ((0 /=) `any` zipEntrySize) $ fail $ name ++ ": non-empty directory" else do -- C.bracketP h <- liftIO $ openFile name WriteMode @@ -35,7 +37,7 @@ extract = C.awaitForever start where #if MIN_VERSION_directory(1,2,3) liftIO $ setModificationTime name $ localTimeToUTC utc zipEntryTime -- FIXME: timezone #endif - where name = BSC.unpack $ BSC.dropWhile ('/' ==) zipEntryName -- should we utf8 decode? + where name = either (T.unpack . T.dropWhile ('/' ==)) (BSC.unpack . BSC.dropWhile ('/' ==)) zipEntryName start (Right _) = fail "Unexpected leading or directory data contents" write = C.await >>= maybe (return ()) diff --git a/cmd/zip.hs b/cmd/zip.hs index d29f909..d46853f 100644 --- a/cmd/zip.hs +++ b/cmd/zip.hs @@ -1,4 +1,5 @@ {-# LANGUAGE CPP #-} +import Control.Arrow ((+++)) import Control.Monad (filterM, void) import Control.Monad.IO.Class (MonadIO, liftIO) import Control.Monad.Trans.Resource (MonadResource, runResourceT) @@ -6,6 +7,7 @@ import qualified Data.ByteString.Char8 as BSC import qualified Data.Conduit as C import qualified Data.Conduit.Binary as CB import Data.List (foldl') +import qualified Data.Text as T import Data.Time.LocalTime (utcToLocalTime, utc) import qualified System.Console.GetOpt as Opt import System.Directory (doesDirectoryExist, getModificationTime @@ -43,7 +45,7 @@ generate :: (MonadIO m, MonadResource m) => [FilePath] -> C.ConduitM () (ZipEntr generate (p:paths) = do t <- liftIO $ getModificationTime p let e = ZipEntry - { zipEntryName = BSC.pack $ dropWhile ('/' ==) p + { zipEntryName = Right $ BSC.pack $ dropWhile ('/' ==) p , zipEntryTime = utcToLocalTime utc t -- FIXME: timezone , zipEntrySize = Nothing } @@ -62,7 +64,7 @@ generate (p:paths) = do #else filter (`notElem` [".",".."]) . map (p ) <$> getDirectoryContents p #endif - C.yield (e{ zipEntryName = zipEntryName e `BSC.snoc` '/', zipEntrySize = Just 0 }, mempty) + C.yield (e{ zipEntryName = (`T.snoc` '/') +++ (`BSC.snoc` '/') $ zipEntryName e, zipEntrySize = Just 0 }, mempty) generate $ dl ++ paths else do C.yield (e, zipFileData p) diff --git a/zip-stream.cabal b/zip-stream.cabal index 67ed9fd..5bf9658 100644 --- a/zip-stream.cabal +++ b/zip-stream.cabal @@ -36,6 +36,7 @@ library mtl, primitive, resourcet, + text, time, transformers-base, zlib @@ -52,6 +53,7 @@ executable unzip-stream conduit-extra, directory, filepath, + text, time, transformers, zip-stream @@ -69,6 +71,7 @@ executable zip-stream directory, filepath, resourcet, + text, time, transformers, zip-stream