diff --git a/Codec/Archive/Zip/Conduit/Internal.hs b/Codec/Archive/Zip/Conduit/Internal.hs index 98fddb7..9751899 100644 --- a/Codec/Archive/Zip/Conduit/Internal.hs +++ b/Codec/Archive/Zip/Conduit/Internal.hs @@ -1,5 +1,6 @@ {-# LANGUAGE CPP #-} {-# LANGUAGE BangPatterns #-} +{-# LANGUAGE ScopedTypeVariables #-} module Codec.Archive.Zip.Conduit.Internal ( osVersion, zipVersion , zipError @@ -8,16 +9,21 @@ module Codec.Archive.Zip.Conduit.Internal , outputSize , inputSize , maxBound32 - , deflateWindowBits + , compressStream, decompressStream ) where -import Codec.Compression.Zlib.Raw (WindowBits(..)) import Control.Monad.Catch (MonadThrow, throwM) import qualified Data.ByteString as BS import qualified Data.Conduit as C import qualified Data.Conduit.Internal as CI import Data.Digest.CRC32 (crc32Update) import Data.Word (Word8, Word32, Word64) +import qualified Codec.Compression.Zlib.Raw as Z +import qualified Codec.Compression.Zlib.Internal as Z +import Control.Monad.Primitive +import Data.Maybe (fromMaybe) +import Control.Monad.Trans.Class (MonadTrans(lift)) +import qualified Control.Monad.ST.Lazy as STL import Codec.Archive.Zip.Conduit.Types @@ -71,5 +77,33 @@ inputSize (CI.ConduitM src) = CI.ConduitM $ \rest -> let maxBound32 :: Integral n => n maxBound32 = fromIntegral (maxBound :: Word32) -deflateWindowBits :: WindowBits -deflateWindowBits = WindowBits (-15) + +awaitNonNull :: forall m o. Monad m => C.ConduitT BS.ByteString o m (Maybe BS.ByteString) +awaitNonNull = do + next <- C.await + case next of + Nothing -> return Nothing + Just bs + | BS.null bs -> awaitNonNull + | otherwise -> return $ Just bs + +compressStream :: forall m. + PrimMonad m + => Z.CompressParams + -> C.ConduitT BS.ByteString BS.ByteString m () +compressStream params = C.transPipe primToPrim . go $ Z.compressST Z.rawFormat params + where + go Z.CompressStreamEnd = return () + go (Z.CompressOutputAvailable outBS cont) = C.yield outBS >> lift cont >>= go + go (Z.CompressInputRequired cont) = awaitNonNull >>= lift . cont . fromMaybe BS.empty >>= go + +decompressStream :: forall m. + ( MonadThrow m, PrimMonad m ) + => C.ConduitT BS.ByteString BS.ByteString m () +decompressStream = go $ Z.decompressST Z.rawFormat Z.defaultDecompressParams + where + go :: Z.DecompressStream (STL.ST (PrimState m)) -> C.ConduitT BS.ByteString BS.ByteString m () + go (Z.DecompressStreamEnd unconsumed) = C.leftover unconsumed + go (Z.DecompressOutputAvailable outBS cont) = C.yield outBS >> lift (primToPrim cont) >>= go + go (Z.DecompressInputRequired cont) = awaitNonNull >>= lift . primToPrim . cont . fromMaybe BS.empty >>= go + go (Z.DecompressStreamError err) = throwM err diff --git a/Codec/Archive/Zip/Conduit/Types.hs b/Codec/Archive/Zip/Conduit/Types.hs index ff2ef44..921a7ed 100644 --- a/Codec/Archive/Zip/Conduit/Types.hs +++ b/Codec/Archive/Zip/Conduit/Types.hs @@ -4,8 +4,7 @@ import Control.Exception (Exception(..)) import Data.ByteString (ByteString) import qualified Data.ByteString.Lazy as BSL import qualified Data.Conduit as C -import Data.Conduit.Binary (sourceLbs) -import Data.Semigroup (Semigroup(..)) +import qualified Data.Conduit.Combinators as C import Data.String (IsString(..)) import qualified Data.Text as T import Data.Time.LocalTime (LocalTime) @@ -51,7 +50,7 @@ instance Monad m => Monoid (ZipData m) where -- |Normalize any 'ZipData' to a simple source sourceZipData :: Monad m => ZipData m -> C.ConduitM () ByteString m () -sourceZipData (ZipDataByteString b) = sourceLbs b +sourceZipData (ZipDataByteString b) = C.sourceLazy b sourceZipData (ZipDataSource s) = s -- |Convert between unpacked (as 'Codec.Archive.Zip.Conduit.UnZip.unZipStream' produces) and packed (as 'Codec.Archive.Zip.Conduit.Zip.zipStream' consumes) representations. diff --git a/Codec/Archive/Zip/Conduit/UnZip.hs b/Codec/Archive/Zip/Conduit/UnZip.hs index 51b47ea..c8ef675 100644 --- a/Codec/Archive/Zip/Conduit/UnZip.hs +++ b/Codec/Archive/Zip/Conduit/UnZip.hs @@ -22,7 +22,6 @@ import qualified Data.ByteString.Char8 as BSC import qualified Data.Conduit as C import qualified Data.Conduit.List as CL import Data.Conduit.Serialization.Binary (sinkGet) -import qualified Data.Conduit.Zlib as CZ import qualified Data.Text as T import qualified Data.Text.Encoding as TE import Data.Time (LocalTime(..), TimeOfDay(..), fromGregorian) @@ -173,7 +172,7 @@ unZipStream = next where dcomp <- case comp of 0 | testBit gpf 3 -> fail "Unsupported uncompressed streaming file data" | otherwise -> return idConduit - 8 -> return $ CZ.decompress deflateWindowBits + 8 -> return decompressStream _ -> fail $ "Unsupported compression method: " ++ show comp time <- fromDOSTime <$> G.getWord16le <*> G.getWord16le crc <- G.getWord32le diff --git a/Codec/Archive/Zip/Conduit/Zip.hs b/Codec/Archive/Zip/Conduit/Zip.hs index 85c9f74..823671e 100644 --- a/Codec/Archive/Zip/Conduit/Zip.hs +++ b/Codec/Archive/Zip/Conduit/Zip.hs @@ -5,6 +5,8 @@ module Codec.Archive.Zip.Conduit.Zip ( zipStream , ZipOptions(..) + , Z.CompressionLevel + , Z.defaultCompression, Z.noCompression, Z.bestSpeed, Z.bestCompression, Z.compressionLevel , ZipInfo(..) , defaultZipOptions , ZipEntry(..) @@ -28,10 +30,9 @@ import qualified Data.ByteString as BS import qualified Data.ByteString.Char8 as BSC import qualified Data.ByteString.Lazy as BSL import qualified Data.Conduit as C -import qualified Data.Conduit.Binary as CB +import qualified Data.Conduit.Combinators as C import Data.Conduit.Lift (stateC, execStateC) import Data.Conduit.Serialization.Binary (sourcePut) -import qualified Data.Conduit.Zlib as CZ import Data.Digest.CRC32 (crc32) import Data.Either (isLeft) import Data.Maybe (fromMaybe, fromJust) @@ -46,14 +47,14 @@ import Codec.Archive.Zip.Conduit.Internal -- |Options controlling zip file parameters and features data ZipOptions = ZipOptions { zipOpt64 :: Bool -- ^Allow 'ZipDataSource's over 4GB (reduces compatibility in some cases); this is automatically enabled for any files of known size (e.g., 'zipEntrySize') - , zipOptCompressLevel :: Int -- ^Compress (0 = store only, 9 = best) zipped files (improves compatibility, since some unzip programs don't supported stored, streamed files, including the one in this package) + , zipOptCompressLevel :: Z.CompressionLevel , zipOptInfo :: ZipInfo -- ^Other parameters to store in the zip file } defaultZipOptions :: ZipOptions defaultZipOptions = ZipOptions { zipOpt64 = False - , zipOptCompressLevel = -1 + , zipOptCompressLevel = Z.defaultCompression , zipOptInfo = ZipInfo { zipComment = BS.empty } @@ -64,9 +65,9 @@ infixr 7 ?* True ?* x = x False ?* _ = 0 --- |Use a file on disk as 'ZipData' (@'ZipDataSource' . 'CB.sourceFile'@). +-- |Use a file on disk as 'ZipData' (@'ZipDataSource' . 'C.sourceFile'@). zipFileData :: MonadResource m => FilePath -> ZipData m -zipFileData = ZipDataSource . CB.sourceFile +zipFileData = ZipDataSource . C.sourceFile zipData :: Monad m => ZipData m -> Either (C.ConduitM () BS.ByteString m ()) BSL.ByteString zipData (ZipDataByteString b) = Right b @@ -120,13 +121,14 @@ zipStream ZipOptions{..} = execStateC 0 $ do entry (ZipEntry{..}, zipData -> dat) = do let usiz = dataSize dat sdat = left ((C..| sizeCRC) . C.toProducer) dat - comp = zipOptCompressLevel /= 0 + comp = zipOptCompressLevel /= Z.noCompression && all (0 /=) usiz && all (0 /=) zipEntrySize + compressParams = Z.defaultCompressParams { Z.compressLevel = zipOptCompressLevel } (cdat, csiz) | comp = - ( ((`C.fuseBoth` (outputSize $ CZ.compress zipOptCompressLevel deflateWindowBits)) - +++ Z.compress) sdat -- level for Z.compress? + ( ((`C.fuseBoth` (outputSize $ compressStream compressParams)) + +++ Z.compressWith compressParams) sdat , dataSize cdat) | otherwise = (left (fmap (id &&& fst)) sdat, usiz) z64 = maybe (zipOpt64 || any (maxBound32 <) zipEntrySize) @@ -172,7 +174,7 @@ zipStream ZipOptions{..} = execStateC 0 $ do putsz csz putsz usz return r) - (\b -> outsz $ ((fromJust usiz, fromJust mcrc), fromJust csiz) <$ CB.sourceLbs b) + (\b -> outsz $ ((fromJust usiz, fromJust mcrc), fromJust csiz) <$ C.sourceLazy b) cdat when (any (usz /=) zipEntrySize) $ zipError $ either T.unpack BSC.unpack zipEntryName ++ ": incorrect zipEntrySize" return $ do diff --git a/cmd/unzip.hs b/cmd/unzip.hs index c8819bf..19f3351 100644 --- a/cmd/unzip.hs +++ b/cmd/unzip.hs @@ -5,7 +5,7 @@ import Control.Monad.IO.Class (liftIO) import qualified Data.ByteString as BS import qualified Data.ByteString.Char8 as BSC import qualified Data.Conduit as C -import qualified Data.Conduit.Binary as CB +import qualified Data.Conduit.Combinators as C import qualified Data.Text as T import qualified Data.Text.IO as TIO import Data.Time.LocalTime (localTimeToUTC, utc) @@ -18,7 +18,7 @@ import System.Directory (createDirectoryIfMissing import System.Environment (getProgName, getArgs) import System.Exit (exitFailure) import System.FilePath.Posix (takeDirectory) -- zip files only use forward slashes -import System.IO (stdin, openFile, IOMode(WriteMode), hClose, hSetFileSize, hPutStrLn, stderr) +import System.IO (openFile, IOMode(WriteMode), hClose, hSetFileSize, hPutStrLn, stderr) import Codec.Archive.Zip.Conduit.UnZip @@ -32,7 +32,7 @@ extract = C.awaitForever start where else do -- C.bracketP h <- liftIO $ openFile name WriteMode mapM_ (liftIO . hSetFileSize h . toInteger) zipEntrySize - write C..| CB.sinkHandle h + write C..| C.sinkHandle h liftIO $ hClose h #if MIN_VERSION_directory(1,2,3) liftIO $ setModificationTime name $ localTimeToUTC utc zipEntryTime -- FIXME: timezone @@ -53,6 +53,6 @@ main = do hPutStrLn stderr $ "Usage: " ++ prog ++ "\nRead a zip file from stdin and extract it in the current directory." exitFailure ZipInfo{..} <- C.runConduit - $ CB.sourceHandle stdin + $ C.stdin C..| C.fuseUpstream unZipStream extract BSC.putStrLn zipComment diff --git a/cmd/zip.hs b/cmd/zip.hs index 9ea2789..48e527b 100644 --- a/cmd/zip.hs +++ b/cmd/zip.hs @@ -5,7 +5,7 @@ import Control.Monad.IO.Class (MonadIO, liftIO) import Control.Monad.Trans.Resource (MonadResource, runResourceT) import qualified Data.ByteString.Char8 as BSC import qualified Data.Conduit as C -import qualified Data.Conduit.Binary as CB +import qualified Data.Conduit.Combinators as C import Data.List (foldl') import qualified Data.Text as T import Data.Time.LocalTime (utcToLocalTime, utc) @@ -25,15 +25,15 @@ import System.Directory (doesDirectoryExist, getModificationTime import System.Environment (getProgName, getArgs) import System.Exit (exitFailure) import System.FilePath.Posix (()) -- zip files only want forward slashes -import System.IO (stdout, hPutStrLn, stderr) - +import System.IO (hPutStrLn, stderr) + import Codec.Archive.Zip.Conduit.Zip opts :: [Opt.OptDescr (ZipOptions -> ZipOptions)] opts = - [ Opt.Option "z" ["compress"] (Opt.ReqArg (\l o -> o{ zipOptCompressLevel = read l }) "LEVEL") + [ Opt.Option "z" ["compress"] (Opt.ReqArg (\l o -> o{ zipOptCompressLevel = compressionLevel $ read l }) "LEVEL") "set compression level for files (0-9)" - , Opt.Option "0" ["store"] (Opt.NoArg (\o -> o{ zipOptCompressLevel = 0 })) + , Opt.Option "0" ["store"] (Opt.NoArg (\o -> o{ zipOptCompressLevel = noCompression })) "don't compress files (-z0)" , Opt.Option "e" ["zip64"] (Opt.NoArg (\o -> o{ zipOpt64 = True })) "enable zip64 support for files over 4GB" @@ -85,4 +85,4 @@ main = do runResourceT $ C.runConduit $ generate paths C..| void (zipStream opt) - C..| CB.sinkHandle stdout + C..| C.stdout diff --git a/nixpkgs.nix b/nixpkgs.nix new file mode 100644 index 0000000..375c841 --- /dev/null +++ b/nixpkgs.nix @@ -0,0 +1,10 @@ +{ nixpkgs ? import +}: + +import ((nixpkgs {}).fetchFromGitHub { + owner = "NixOS"; + repo = "nixpkgs"; + rev = "bc00ecedfa709f4fa91d445dd76ecd792cb2c728"; + sha256 = "0plhwb04srr4b0h7w8qlqi207a19szz2wqz6r4gmic856jlkchaa"; + fetchSubmodules = true; +}) diff --git a/stack.nix b/stack.nix new file mode 100644 index 0000000..6d46cb0 --- /dev/null +++ b/stack.nix @@ -0,0 +1,13 @@ +{ ghc, nixpkgs ? import ./nixpkgs.nix {} }: + +let + haskellPackages = pkgs.haskellPackages; + inherit (nixpkgs {}) pkgs; +in pkgs.haskell.lib.buildStackProject { + inherit ghc; + inherit (haskellPackages) stack; + name = "stackenv"; + buildInputs = (with pkgs; + [ zlib + ]); +} diff --git a/stack.yaml b/stack.yaml index d5027a7..ad593e3 100644 --- a/stack.yaml +++ b/stack.yaml @@ -1,4 +1,9 @@ -resolver: lts-12.14 +resolver: lts-16.13 packages: -- '.' -extra-deps: [] + - '.' +extra-deps: + - primitive-0.7.1.0@sha256:6a237bb338bcc43193077ff8e8c0f0ce2de14c652231496a15672e8b563a07e2,2604 +nix: + packages: [] + shell-file: ./stack.nix + add-gc-roots: true diff --git a/stack.yaml.lock b/stack.yaml.lock new file mode 100644 index 0000000..5b1b18f --- /dev/null +++ b/stack.yaml.lock @@ -0,0 +1,19 @@ +# This file was autogenerated by Stack. +# You should not edit this file by hand. +# For more information, please see the documentation at: +# https://docs.haskellstack.org/en/stable/lock_files + +packages: +- completed: + hackage: primitive-0.7.1.0@sha256:6a237bb338bcc43193077ff8e8c0f0ce2de14c652231496a15672e8b563a07e2,2604 + pantry-tree: + size: 1376 + sha256: 924e88629b493abb6b2f3c3029cef076554a2b627091e3bb6887ec03487a707d + original: + hackage: primitive-0.7.1.0@sha256:6a237bb338bcc43193077ff8e8c0f0ce2de14c652231496a15672e8b563a07e2,2604 +snapshots: +- completed: + size: 532381 + url: https://raw.githubusercontent.com/commercialhaskell/stackage-snapshots/master/lts/16/13.yaml + sha256: 6ee17f7996e5bc75ae4406250841f1362ad4196418a4d90a0615ff4f26ac98df + original: lts-16.13 diff --git a/zip-stream.cabal b/zip-stream.cabal index e79efe9..257915f 100644 --- a/zip-stream.cabal +++ b/zip-stream.cabal @@ -30,14 +30,14 @@ library binary-conduit, bytestring, conduit, - conduit-extra, digest, exceptions, mtl, - primitive, + primitive >= 0.7.1.0, resourcet, text, time, + transformers, transformers-base, zlib @@ -50,7 +50,6 @@ executable unzip-stream base >=4.8 && <5, bytestring, conduit, - conduit-extra, directory, filepath, text, @@ -67,7 +66,6 @@ executable zip-stream base >=4.8 && <5, bytestring, conduit, - conduit-extra, directory, filepath, resourcet,