Allow filenames to be stored as utf8 text

Fixes #3
This commit is contained in:
Dylan Simon 2018-04-24 13:48:32 -04:00
parent d7e9b3a74f
commit 33d3a06b35
6 changed files with 29 additions and 16 deletions

View File

@ -7,6 +7,7 @@ import qualified Data.Conduit as C
import Data.Conduit.Binary (sourceLbs) import Data.Conduit.Binary (sourceLbs)
import Data.Semigroup (Semigroup(..)) import Data.Semigroup (Semigroup(..))
import Data.String (IsString(..)) import Data.String (IsString(..))
import qualified Data.Text as T
import Data.Time.LocalTime (LocalTime) import Data.Time.LocalTime (LocalTime)
import Data.Typeable (Typeable) import Data.Typeable (Typeable)
import Data.Word (Word64) import Data.Word (Word64)
@ -29,7 +30,7 @@ data ZipInfo = ZipInfo
-- |(The beginning of) a single entry in a zip stream, which may be any file or directory. -- |(The beginning of) a single entry in a zip stream, which may be any file or directory.
-- As per zip file conventions, directory names should end with a slash and have no data, but this library does not ensure that. -- As per zip file conventions, directory names should end with a slash and have no data, but this library does not ensure that.
data ZipEntry = ZipEntry data ZipEntry = ZipEntry
{ zipEntryName :: ByteString -- ^File name (in posix format, no leading slashes), usually utf-8 encoded, with a trailing slash for directories { zipEntryName :: Either T.Text ByteString -- ^File name (in posix format, no leading slashes), either UTF-8 encoded text or raw bytes (CP437), with a trailing slash for directories
, zipEntryTime :: LocalTime -- ^Modification time , zipEntryTime :: LocalTime -- ^Modification time
, zipEntrySize :: Maybe Word64 -- ^Size of file data (if known); checked on zipping and also used as hint to enable zip64 , zipEntrySize :: Maybe Word64 -- ^Size of file data (if known); checked on zipping and also used as hint to enable zip64
} deriving (Eq, Show) } deriving (Eq, Show)

View File

@ -23,6 +23,8 @@ import qualified Data.Conduit as C
import qualified Data.Conduit.List as CL import qualified Data.Conduit.List as CL
import Data.Conduit.Serialization.Binary (sinkGet) import Data.Conduit.Serialization.Binary (sinkGet)
import qualified Data.Conduit.Zlib as CZ import qualified Data.Conduit.Zlib as CZ
import qualified Data.Text as T
import qualified Data.Text.Encoding as TE
import Data.Time (LocalTime(..), TimeOfDay(..), fromGregorian) import Data.Time (LocalTime(..), TimeOfDay(..), fromGregorian)
import Data.Word (Word16, Word32, Word64) import Data.Word (Word16, Word32, Word64)
@ -136,7 +138,7 @@ unZipStream = next where
-- optional data description (possibly ambiguous!) -- optional data description (possibly ambiguous!)
sinkGet $ (guard =<< dataDesc h) <|> return () sinkGet $ (guard =<< dataDesc h) <|> return ()
return (size == usize && crc == fileCRC) return (size == usize && crc == fileCRC)
unless r $ zipError $ BSC.unpack (zipEntryName fileEntry) ++ ": data integrity check failed" unless r $ zipError $ either T.unpack BSC.unpack (zipEntryName fileEntry) ++ ": data integrity check failed"
next next
EndOfCentralDirectory{..} -> do EndOfCentralDirectory{..} -> do
return endInfo return endInfo
@ -166,7 +168,7 @@ unZipStream = next where
when (ver > zipVersion) $ fail $ "Unsupported version: " ++ show ver when (ver > zipVersion) $ fail $ "Unsupported version: " ++ show ver
gpf <- G.getWord16le gpf <- G.getWord16le
-- when (gpf .&. complement (bit 1 .|. bit 2 .|. bit 3) /= 0) $ fail $ "Unsupported flags: " ++ show gpf -- when (gpf .&. complement (bit 1 .|. bit 2 .|. bit 3) /= 0) $ fail $ "Unsupported flags: " ++ show gpf
when (gpf `clearBit` 1 `clearBit` 2 `clearBit` 3 /= 0) $ fail $ "Unsupported flags: " ++ show gpf when (gpf `clearBit` 1 `clearBit` 2 `clearBit` 3 `clearBit` 11 /= 0) $ fail $ "Unsupported flags: " ++ show gpf
comp <- G.getWord16le comp <- G.getWord16le
dcomp <- case comp of dcomp <- case comp of
0 | testBit gpf 3 -> fail "Unsupported uncompressed streaming file data" 0 | testBit gpf 3 -> fail "Unsupported uncompressed streaming file data"
@ -216,7 +218,7 @@ unZipStream = next where
} }
return FileHeader return FileHeader
{ fileEntry = ZipEntry { fileEntry = ZipEntry
{ zipEntryName = name { zipEntryName = if testBit gpf 11 then Left (TE.decodeUtf8 name) else Right name
, zipEntryTime = time , zipEntryTime = time
, zipEntrySize = if testBit gpf 3 then Nothing else Just extZip64USize , zipEntrySize = if testBit gpf 3 then Nothing else Just extZip64USize
} }

View File

@ -35,6 +35,8 @@ import qualified Data.Conduit.Zlib as CZ
import Data.Digest.CRC32 (crc32) import Data.Digest.CRC32 (crc32)
import Data.Either (isLeft) import Data.Either (isLeft)
import Data.Maybe (fromMaybe, fromJust) import Data.Maybe (fromMaybe, fromJust)
import qualified Data.Text as T
import qualified Data.Text.Encoding as TE
import Data.Time (LocalTime(..), TimeOfDay(..), toGregorian) import Data.Time (LocalTime(..), TimeOfDay(..), toGregorian)
import Data.Word (Word16, Word64) import Data.Word (Word16, Word64)
@ -127,12 +129,13 @@ zipStream ZipOptions{..} = execStateC 0 $ do
| otherwise = (left (fmap (id &&& fst)) sdat, usiz) | otherwise = (left (fmap (id &&& fst)) sdat, usiz)
z64 = maybe (zipOpt64 || any (maxBound32 <) zipEntrySize) z64 = maybe (zipOpt64 || any (maxBound32 <) zipEntrySize)
(maxBound32 <) (max <$> usiz <*> csiz) (maxBound32 <) (max <$> usiz <*> csiz)
namelen = BS.length zipEntryName name = either TE.encodeUtf8 id zipEntryName
namelen = BS.length name
(time, date) = toDOSTime zipEntryTime (time, date) = toDOSTime zipEntryTime
mcrc = either (const Nothing) (Just . crc32) dat mcrc = either (const Nothing) (Just . crc32) dat
when (namelen > maxBound16) $ zipError $ BSC.unpack zipEntryName ++ ": entry name too long" when (namelen > maxBound16) $ zipError $ either T.unpack BSC.unpack zipEntryName ++ ": entry name too long"
let common = do let common = do
P.putWord16le $ isLeft dat ?* bit 3 P.putWord16le $ isLeft dat ?* bit 3 .|. isLeft zipEntryName ?* bit 11
P.putWord16le $ comp ?* 8 P.putWord16le $ comp ?* 8
P.putWord16le $ time P.putWord16le $ time
P.putWord16le $ date P.putWord16le $ date
@ -147,7 +150,7 @@ zipStream ZipOptions{..} = execStateC 0 $ do
P.putWord32le $ if z64 then maxBound32 else maybe 0 fromIntegral usiz P.putWord32le $ if z64 then maxBound32 else maybe 0 fromIntegral usiz
P.putWord16le $ fromIntegral namelen P.putWord16le $ fromIntegral namelen
P.putWord16le $ z64 ?* 20 P.putWord16le $ z64 ?* 20
P.putByteString zipEntryName P.putByteString name
when z64 $ do when z64 $ do
P.putWord16le 0x0001 P.putWord16le 0x0001
P.putWord16le 16 P.putWord16le 16
@ -157,7 +160,7 @@ zipStream ZipOptions{..} = execStateC 0 $ do
((usz, crc), csz) <- either ((usz, crc), csz) <- either
(\cd -> do (\cd -> do
r@((usz, crc), csz) <- outsz cd -- write compressed data r@((usz, crc), csz) <- outsz cd -- write compressed data
when (not z64 && (usz > maxBound32 || csz > maxBound32)) $ zipError $ BSC.unpack zipEntryName ++ ": file too large and zipOpt64 disabled" when (not z64 && (usz > maxBound32 || csz > maxBound32)) $ zipError $ either T.unpack BSC.unpack zipEntryName ++ ": file too large and zipOpt64 disabled"
output $ do output $ do
P.putWord32le 0x08074b50 P.putWord32le 0x08074b50
P.putWord32le crc P.putWord32le crc
@ -169,7 +172,7 @@ zipStream ZipOptions{..} = execStateC 0 $ do
return r) return r)
(\b -> outsz $ ((fromJust usiz, fromJust mcrc), fromJust csiz) <$ CB.sourceLbs b) (\b -> outsz $ ((fromJust usiz, fromJust mcrc), fromJust csiz) <$ CB.sourceLbs b)
cdat cdat
when (any (usz /=) zipEntrySize) $ zipError $ BSC.unpack zipEntryName ++ ": incorrect zipEntrySize" when (any (usz /=) zipEntrySize) $ zipError $ either T.unpack BSC.unpack zipEntryName ++ ": incorrect zipEntrySize"
return $ do return $ do
-- central directory -- central directory
let o64 = off >= maxBound32 let o64 = off >= maxBound32
@ -191,7 +194,7 @@ zipStream ZipOptions{..} = execStateC 0 $ do
P.putWord16le 0 -- internal file attributes P.putWord16le 0 -- internal file attributes
P.putWord32le 0 -- external file attributes P.putWord32le 0 -- external file attributes
P.putWord32le $ if o64 then maxBound32 else fromIntegral off P.putWord32le $ if o64 then maxBound32 else fromIntegral off
P.putByteString zipEntryName P.putByteString name
when a64 $ do when a64 $ do
P.putWord16le 0x0001 P.putWord16le 0x0001
P.putWord16le l64 P.putWord16le l64

View File

@ -6,6 +6,8 @@ import qualified Data.ByteString as BS
import qualified Data.ByteString.Char8 as BSC import qualified Data.ByteString.Char8 as BSC
import qualified Data.Conduit as C import qualified Data.Conduit as C
import qualified Data.Conduit.Binary as CB import qualified Data.Conduit.Binary as CB
import qualified Data.Text as T
import qualified Data.Text.IO as TIO
import Data.Time.LocalTime (localTimeToUTC, utc) import Data.Time.LocalTime (localTimeToUTC, utc)
import Data.Void (Void) import Data.Void (Void)
import System.Directory (createDirectoryIfMissing import System.Directory (createDirectoryIfMissing
@ -23,9 +25,9 @@ import Codec.Archive.Zip.Conduit.UnZip
extract :: C.ConduitM (Either ZipEntry BS.ByteString) Void IO () extract :: C.ConduitM (Either ZipEntry BS.ByteString) Void IO ()
extract = C.awaitForever start where extract = C.awaitForever start where
start (Left ZipEntry{..}) = do start (Left ZipEntry{..}) = do
liftIO $ BSC.putStrLn zipEntryName liftIO $ either TIO.putStrLn BSC.putStrLn zipEntryName
liftIO $ createDirectoryIfMissing True (takeDirectory name) liftIO $ createDirectoryIfMissing True (takeDirectory name)
if BSC.last zipEntryName == '/' if either T.last BSC.last zipEntryName == '/'
then when ((0 /=) `any` zipEntrySize) $ fail $ name ++ ": non-empty directory" then when ((0 /=) `any` zipEntrySize) $ fail $ name ++ ": non-empty directory"
else do -- C.bracketP else do -- C.bracketP
h <- liftIO $ openFile name WriteMode h <- liftIO $ openFile name WriteMode
@ -35,7 +37,7 @@ extract = C.awaitForever start where
#if MIN_VERSION_directory(1,2,3) #if MIN_VERSION_directory(1,2,3)
liftIO $ setModificationTime name $ localTimeToUTC utc zipEntryTime -- FIXME: timezone liftIO $ setModificationTime name $ localTimeToUTC utc zipEntryTime -- FIXME: timezone
#endif #endif
where name = BSC.unpack $ BSC.dropWhile ('/' ==) zipEntryName -- should we utf8 decode? where name = either (T.unpack . T.dropWhile ('/' ==)) (BSC.unpack . BSC.dropWhile ('/' ==)) zipEntryName
start (Right _) = fail "Unexpected leading or directory data contents" start (Right _) = fail "Unexpected leading or directory data contents"
write = C.await >>= maybe write = C.await >>= maybe
(return ()) (return ())

View File

@ -1,4 +1,5 @@
{-# LANGUAGE CPP #-} {-# LANGUAGE CPP #-}
import Control.Arrow ((+++))
import Control.Monad (filterM, void) import Control.Monad (filterM, void)
import Control.Monad.IO.Class (MonadIO, liftIO) import Control.Monad.IO.Class (MonadIO, liftIO)
import Control.Monad.Trans.Resource (MonadResource, runResourceT) import Control.Monad.Trans.Resource (MonadResource, runResourceT)
@ -6,6 +7,7 @@ import qualified Data.ByteString.Char8 as BSC
import qualified Data.Conduit as C import qualified Data.Conduit as C
import qualified Data.Conduit.Binary as CB import qualified Data.Conduit.Binary as CB
import Data.List (foldl') import Data.List (foldl')
import qualified Data.Text as T
import Data.Time.LocalTime (utcToLocalTime, utc) import Data.Time.LocalTime (utcToLocalTime, utc)
import qualified System.Console.GetOpt as Opt import qualified System.Console.GetOpt as Opt
import System.Directory (doesDirectoryExist, getModificationTime import System.Directory (doesDirectoryExist, getModificationTime
@ -43,7 +45,7 @@ generate :: (MonadIO m, MonadResource m) => [FilePath] -> C.ConduitM () (ZipEntr
generate (p:paths) = do generate (p:paths) = do
t <- liftIO $ getModificationTime p t <- liftIO $ getModificationTime p
let e = ZipEntry let e = ZipEntry
{ zipEntryName = BSC.pack $ dropWhile ('/' ==) p { zipEntryName = Right $ BSC.pack $ dropWhile ('/' ==) p
, zipEntryTime = utcToLocalTime utc t -- FIXME: timezone , zipEntryTime = utcToLocalTime utc t -- FIXME: timezone
, zipEntrySize = Nothing , zipEntrySize = Nothing
} }
@ -62,7 +64,7 @@ generate (p:paths) = do
#else #else
filter (`notElem` [".",".."]) . map (p </>) <$> getDirectoryContents p filter (`notElem` [".",".."]) . map (p </>) <$> getDirectoryContents p
#endif #endif
C.yield (e{ zipEntryName = zipEntryName e `BSC.snoc` '/', zipEntrySize = Just 0 }, mempty) C.yield (e{ zipEntryName = (`T.snoc` '/') +++ (`BSC.snoc` '/') $ zipEntryName e, zipEntrySize = Just 0 }, mempty)
generate $ dl ++ paths generate $ dl ++ paths
else do else do
C.yield (e, zipFileData p) C.yield (e, zipFileData p)

View File

@ -36,6 +36,7 @@ library
mtl, mtl,
primitive, primitive,
resourcet, resourcet,
text,
time, time,
transformers-base, transformers-base,
zlib zlib
@ -52,6 +53,7 @@ executable unzip-stream
conduit-extra, conduit-extra,
directory, directory,
filepath, filepath,
text,
time, time,
transformers, transformers,
zip-stream zip-stream
@ -69,6 +71,7 @@ executable zip-stream
directory, directory,
filepath, filepath,
resourcet, resourcet,
text,
time, time,
transformers, transformers,
zip-stream zip-stream