From 104b3ad3973d48de68de7a66420584c593855718 Mon Sep 17 00:00:00 2001 From: Gregor Kleen Date: Thu, 5 Oct 2017 13:37:54 +0200 Subject: [PATCH 01/11] produceZip --- package.yaml | 2 ++ src/Handler/Zip.hs | 69 ++++++++++++++++++++++++++++++++++++++++++++++ stack.yaml | 5 ++-- 3 files changed, 74 insertions(+), 2 deletions(-) create mode 100644 src/Handler/Zip.hs diff --git a/package.yaml b/package.yaml index e2b9b88f3..46ec0daa1 100644 --- a/package.yaml +++ b/package.yaml @@ -50,6 +50,8 @@ dependencies: - base64-bytestring - memory - http-api-data +- zip-stream +- filepath # The library contains all of our application code. The executable # defined below is just a thin wrapper. diff --git a/src/Handler/Zip.hs b/src/Handler/Zip.hs new file mode 100644 index 000000000..5462ac82f --- /dev/null +++ b/src/Handler/Zip.hs @@ -0,0 +1,69 @@ +{-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE RecordWildCards #-} +{-# OPTIONS_GHC -fno-warn-missing-fields #-} -- This concerns Zip.zipEntrySize in produceZip + +module Handler.Zip + ( Zip.ZipError(..) + , Zip.ZipInfo(..) + , produceZip + , consumeZip + ) where + +import Import + +import qualified Codec.Archive.Zip.Conduit.Types as Zip +import qualified Codec.Archive.Zip.Conduit.UnZip as Zip +import qualified Codec.Archive.Zip.Conduit.Zip as Zip + +import qualified Data.ByteString.Lazy as Lazy (ByteString) +import qualified Data.ByteString.Lazy as Lazy.ByteString + +import Data.ByteString (ByteString) + +import qualified Data.Text as Text +import qualified Data.Text.Encoding as Text + +import System.FilePath +import Data.Time + + +data ZipEntry = ZipEntry + { zipEntryName :: FilePath + , zipEntryTime :: UTCTime + , zipEntryContents :: Maybe Lazy.ByteString -- ^ 'Nothing' means this is a directory + } + + +consumeZip :: (ZipEntry -> YesodDB UniWorX a) -- ^ Insert entries into database + -> Sink ByteString (YesodDB UniWorX) (Zip.ZipInfo, [a]) +consumeZip = error "consumeZip not implemented yet" + + +produceZip :: ( MonadBase b m + , PrimMonad b + , MonadThrow m + ) => Zip.ZipInfo + -> Conduit ZipEntry m ByteString +produceZip info = toZipData =$= void (Zip.zipStream zipOptions) + where + zipOptions = Zip.ZipOptions + { zipOpt64 = True + , zipOptCompressLevel = -1 -- This is passed through all the way to the C zlib, where it means "default level" + , zipOptInfo = info + } + toZipData :: Monad m => Conduit ZipEntry m (Zip.ZipEntry, Zip.ZipData m) + toZipData = do + entry <- await + case entry of + Nothing + -> return () + Just (e@ZipEntry{ zipEntryContents = Nothing }) + -> yield ((toZipEntry True e){ Zip.zipEntrySize = Nothing }, mempty) + Just (e@ZipEntry{ zipEntryContents = Just b}) + -> yield ((toZipEntry False e){ Zip.zipEntrySize = Just . fromIntegral $ Lazy.ByteString.length b }, Zip.ZipDataByteString b) + toZipEntry :: Bool -- ^ Is directory? + -> ZipEntry -> Zip.ZipEntry + toZipEntry isDir ZipEntry{..} = Zip.ZipEntry + { zipEntryName = Text.encodeUtf8 . Text.pack . bool dropTrailingPathSeparator addTrailingPathSeparator isDir . normalise $ makeValid zipEntryName + , zipEntryTime = utcToLocalTime utc zipEntryTime + } diff --git a/stack.yaml b/stack.yaml index 8a276d119..0234457d9 100644 --- a/stack.yaml +++ b/stack.yaml @@ -36,10 +36,11 @@ resolver: lts-9.3 # non-dependency (i.e. a user package), and its test suites and benchmarks # will not be run. This is useful for tweaking upstream packages. packages: -- . + - . # Dependency packages to be pulled from upstream that are not in the resolver # (e.g., acme-missiles-0.3) -extra-deps: [] +extra-deps: + - zip-stream-0.1.0.1 # Override default flag values for local packages and extra-deps flags: {} From 0f0f77344f9f4c9946fc83aae641ee716546a95c Mon Sep 17 00:00:00 2001 From: Gregor Kleen Date: Thu, 5 Oct 2017 14:10:32 +0200 Subject: [PATCH 02/11] consumeZip --- src/Handler/Zip.hs | 59 ++++++++++++++++++++++++++++++++++++---------- 1 file changed, 46 insertions(+), 13 deletions(-) diff --git a/src/Handler/Zip.hs b/src/Handler/Zip.hs index 5462ac82f..fe1432c2d 100644 --- a/src/Handler/Zip.hs +++ b/src/Handler/Zip.hs @@ -1,6 +1,8 @@ {-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE RecordWildCards #-} +{-# LANGUAGE DeriveGeneric, DeriveDataTypeable #-} {-# OPTIONS_GHC -fno-warn-missing-fields #-} -- This concerns Zip.zipEntrySize in produceZip +{-# OPTIONS_GHC -fno-warn-orphans #-} module Handler.Zip ( Zip.ZipError(..) @@ -11,6 +13,8 @@ module Handler.Zip import Import +import qualified Data.Conduit.List as Conduit (map) + import qualified Codec.Archive.Zip.Conduit.Types as Zip import qualified Codec.Archive.Zip.Conduit.UnZip as Zip import qualified Codec.Archive.Zip.Conduit.Zip as Zip @@ -26,17 +30,49 @@ import qualified Data.Text.Encoding as Text import System.FilePath import Data.Time +import GHC.Generics (Generic) +import Data.Typeable (Typeable) + data ZipEntry = ZipEntry { zipEntryName :: FilePath , zipEntryTime :: UTCTime , zipEntryContents :: Maybe Lazy.ByteString -- ^ 'Nothing' means this is a directory - } + } deriving (Read, Show, Generic, Typeable) + + +instance Default Zip.ZipInfo where + def = Zip.ZipInfo + { zipComment = mempty + } consumeZip :: (ZipEntry -> YesodDB UniWorX a) -- ^ Insert entries into database -> Sink ByteString (YesodDB UniWorX) (Zip.ZipInfo, [a]) -consumeZip = error "consumeZip not implemented yet" +consumeZip insertEntry = Zip.unZipStream `fuseBoth` consumeZip' + where + -- consumeZip' :: Sink (Either Zip.ZipEntry ByteString) (YesodDB UniWorX) [a] + consumeZip' = do + input <- await + case input of + Nothing -> return [] + Just (Right _) -> throw $ userError "Data chunk in unexpected place when parsing ZIP" + Just (Left e) -> do + zipEntryName' <- either throw return . Text.decodeUtf8' $ Zip.zipEntryName e + contentChunks <- accContents + let + zipEntryName = normalise . makeValid $ Text.unpack zipEntryName' + zipEntryTime = localTimeToUTC utc $ Zip.zipEntryTime e + zipEntryContents + | hasTrailingPathSeparator zipEntryName = Nothing + | otherwise = Just $ Lazy.ByteString.fromChunks contentChunks + (:) <$> (lift $ insertEntry ZipEntry{..}) <*> consumeZip' + accContents :: Monad m => Sink (Either a b) m [b] + accContents = do + input <- await + case input of + Just (Right x) -> (x :) <$> accContents + _ -> return [] produceZip :: ( MonadBase b m @@ -44,23 +80,20 @@ produceZip :: ( MonadBase b m , MonadThrow m ) => Zip.ZipInfo -> Conduit ZipEntry m ByteString -produceZip info = toZipData =$= void (Zip.zipStream zipOptions) +produceZip info = Conduit.map toZipData =$= void (Zip.zipStream zipOptions) where zipOptions = Zip.ZipOptions { zipOpt64 = True , zipOptCompressLevel = -1 -- This is passed through all the way to the C zlib, where it means "default level" , zipOptInfo = info } - toZipData :: Monad m => Conduit ZipEntry m (Zip.ZipEntry, Zip.ZipData m) - toZipData = do - entry <- await - case entry of - Nothing - -> return () - Just (e@ZipEntry{ zipEntryContents = Nothing }) - -> yield ((toZipEntry True e){ Zip.zipEntrySize = Nothing }, mempty) - Just (e@ZipEntry{ zipEntryContents = Just b}) - -> yield ((toZipEntry False e){ Zip.zipEntrySize = Just . fromIntegral $ Lazy.ByteString.length b }, Zip.ZipDataByteString b) + + toZipData :: Monad m => ZipEntry -> (Zip.ZipEntry, Zip.ZipData m) + toZipData (e@ZipEntry{ zipEntryContents = Nothing }) + = ((toZipEntry True e){ Zip.zipEntrySize = Nothing }, mempty) + toZipData (e@ZipEntry{ zipEntryContents = Just b}) + = ((toZipEntry False e){ Zip.zipEntrySize = Just . fromIntegral $ Lazy.ByteString.length b }, Zip.ZipDataByteString b) + toZipEntry :: Bool -- ^ Is directory? -> ZipEntry -> Zip.ZipEntry toZipEntry isDir ZipEntry{..} = Zip.ZipEntry From 4191d65fc51b763730a1adfade1f327996ce6115 Mon Sep 17 00:00:00 2001 From: Gregor Kleen Date: Thu, 5 Oct 2017 14:13:51 +0200 Subject: [PATCH 03/11] More polymorphism. --- src/Handler/Zip.hs | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/Handler/Zip.hs b/src/Handler/Zip.hs index fe1432c2d..a75c84c9b 100644 --- a/src/Handler/Zip.hs +++ b/src/Handler/Zip.hs @@ -47,11 +47,14 @@ instance Default Zip.ZipInfo where } -consumeZip :: (ZipEntry -> YesodDB UniWorX a) -- ^ Insert entries into database - -> Sink ByteString (YesodDB UniWorX) (Zip.ZipInfo, [a]) -consumeZip insertEntry = Zip.unZipStream `fuseBoth` consumeZip' +consumeZip :: ( MonadBase b m + , PrimMonad b + , MonadThrow m + ) => (ZipEntry -> m a) -- ^ Handle entries (insert into database) + -> Sink ByteString m (Zip.ZipInfo, [a]) +consumeZip handleEntry = Zip.unZipStream `fuseBoth` consumeZip' where - -- consumeZip' :: Sink (Either Zip.ZipEntry ByteString) (YesodDB UniWorX) [a] + -- consumeZip' :: Sink (Either Zip.ZipEntry ByteString) m [a] consumeZip' = do input <- await case input of @@ -66,7 +69,7 @@ consumeZip insertEntry = Zip.unZipStream `fuseBoth` consumeZip' zipEntryContents | hasTrailingPathSeparator zipEntryName = Nothing | otherwise = Just $ Lazy.ByteString.fromChunks contentChunks - (:) <$> (lift $ insertEntry ZipEntry{..}) <*> consumeZip' + (:) <$> (lift $ handleEntry ZipEntry{..}) <*> consumeZip' accContents :: Monad m => Sink (Either a b) m [b] accContents = do input <- await From 15bd70f10a25cd24d67a3d7b0c38c84d40b0a153 Mon Sep 17 00:00:00 2001 From: Gregor Kleen Date: Thu, 5 Oct 2017 19:11:05 +0200 Subject: [PATCH 04/11] Test suite for zip handling --- package.yaml | 2 ++ src/Handler/Zip.hs | 13 +++++++------ test/Handler/ZipSpec.hs | 30 ++++++++++++++++++++++++++++++ test/TestImport.hs | 2 ++ 4 files changed, 41 insertions(+), 6 deletions(-) create mode 100644 test/Handler/ZipSpec.hs diff --git a/package.yaml b/package.yaml index 46ec0daa1..f7f17cd5c 100644 --- a/package.yaml +++ b/package.yaml @@ -98,6 +98,8 @@ tests: - hspec >=2.0.0 - QuickCheck - yesod-test + - conduit-extra + - quickcheck-instances # Define flags used by "yesod devel" to make compilation faster flags: diff --git a/src/Handler/Zip.hs b/src/Handler/Zip.hs index a75c84c9b..67a8f62b3 100644 --- a/src/Handler/Zip.hs +++ b/src/Handler/Zip.hs @@ -7,6 +7,7 @@ module Handler.Zip ( Zip.ZipError(..) , Zip.ZipInfo(..) + , ZipEntry(..) , produceZip , consumeZip ) where @@ -38,7 +39,7 @@ data ZipEntry = ZipEntry { zipEntryName :: FilePath , zipEntryTime :: UTCTime , zipEntryContents :: Maybe Lazy.ByteString -- ^ 'Nothing' means this is a directory - } deriving (Read, Show, Generic, Typeable) + } deriving (Read, Show, Generic, Typeable, Eq, Ord) instance Default Zip.ZipInfo where @@ -59,15 +60,15 @@ consumeZip handleEntry = Zip.unZipStream `fuseBoth` consumeZip' input <- await case input of Nothing -> return [] - Just (Right _) -> throw $ userError "Data chunk in unexpected place when parsing ZIP" + Just (Right _) -> consumeZip' -- throw $ userError "Data chunk in unexpected place when parsing ZIP" Just (Left e) -> do - zipEntryName' <- either throw return . Text.decodeUtf8' $ Zip.zipEntryName e + zipEntryName' <- fmap Text.unpack . either throw return . Text.decodeUtf8' $ Zip.zipEntryName e contentChunks <- accContents let - zipEntryName = normalise . makeValid $ Text.unpack zipEntryName' + zipEntryName = normalise $ dropTrailingPathSeparator zipEntryName' zipEntryTime = localTimeToUTC utc $ Zip.zipEntryTime e zipEntryContents - | hasTrailingPathSeparator zipEntryName = Nothing + | hasTrailingPathSeparator zipEntryName' = Nothing | otherwise = Just $ Lazy.ByteString.fromChunks contentChunks (:) <$> (lift $ handleEntry ZipEntry{..}) <*> consumeZip' accContents :: Monad m => Sink (Either a b) m [b] @@ -100,6 +101,6 @@ produceZip info = Conduit.map toZipData =$= void (Zip.zipStream zipOptions) toZipEntry :: Bool -- ^ Is directory? -> ZipEntry -> Zip.ZipEntry toZipEntry isDir ZipEntry{..} = Zip.ZipEntry - { zipEntryName = Text.encodeUtf8 . Text.pack . bool dropTrailingPathSeparator addTrailingPathSeparator isDir . normalise $ makeValid zipEntryName + { zipEntryName = Text.encodeUtf8 . Text.pack . bool dropTrailingPathSeparator addTrailingPathSeparator isDir . normalise $ zipEntryName , zipEntryTime = utcToLocalTime utc zipEntryTime } diff --git a/test/Handler/ZipSpec.hs b/test/Handler/ZipSpec.hs new file mode 100644 index 000000000..681041b3a --- /dev/null +++ b/test/Handler/ZipSpec.hs @@ -0,0 +1,30 @@ +{-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE RecordWildCards #-} +{-# OPTIONS_GHC -fno-warn-orphans #-} + +module Handler.ZipSpec where + +import TestImport + +import Handler.Zip + +import System.FilePath + +import Data.Conduit +import qualified Data.Conduit.List as Conduit +import Data.Conduit.Binary (sourceLbs, sinkLbs) + +instance Arbitrary ZipEntry where + arbitrary = do + zipEntryName <- normalise . dropTrailingPathSeparator . joinPath <$> arbitrary + zipEntryTime <- arbitrary + zipEntryContents <- arbitrary + return ZipEntry{..} + +spec :: Spec +spec = describe "Zip file handling" $ do + it "has compatible encoding/decoding to/from zip files" . property $ + \zipFiles -> do + bs <- runConduit $ Conduit.sourceList zipFiles =$= produceZip def =$= sinkLbs + (_, zipFiles') <- runConduit $ sourceLbs bs =$= consumeZip return + zipFiles' `shouldBe` zipFiles diff --git a/test/TestImport.hs b/test/TestImport.hs index 031453f19..768cafad7 100644 --- a/test/TestImport.hs +++ b/test/TestImport.hs @@ -22,6 +22,8 @@ import Yesod.Test as X import Yesod.Core.Unsafe (fakeHandlerGetLogger) import Test.QuickCheck as X import Test.QuickCheck.Gen as X +import Data.Default as X +import Test.QuickCheck.Instances as X runDB :: SqlPersistM a -> YesodExample UniWorX a runDB query = do From 6c1afb6919e0fc2aa6cf263305dc310065a671c8 Mon Sep 17 00:00:00 2001 From: Gregor Kleen Date: Thu, 5 Oct 2017 20:17:37 +0200 Subject: [PATCH 05/11] more detailed tests --- src/Handler/Zip.hs | 4 ++-- test/Handler/ZipSpec.hs | 7 +++++-- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/src/Handler/Zip.hs b/src/Handler/Zip.hs index 67a8f62b3..a4b49f461 100644 --- a/src/Handler/Zip.hs +++ b/src/Handler/Zip.hs @@ -65,7 +65,7 @@ consumeZip handleEntry = Zip.unZipStream `fuseBoth` consumeZip' zipEntryName' <- fmap Text.unpack . either throw return . Text.decodeUtf8' $ Zip.zipEntryName e contentChunks <- accContents let - zipEntryName = normalise $ dropTrailingPathSeparator zipEntryName' + zipEntryName = normalise . makeValid $ dropTrailingPathSeparator zipEntryName' zipEntryTime = localTimeToUTC utc $ Zip.zipEntryTime e zipEntryContents | hasTrailingPathSeparator zipEntryName' = Nothing @@ -101,6 +101,6 @@ produceZip info = Conduit.map toZipData =$= void (Zip.zipStream zipOptions) toZipEntry :: Bool -- ^ Is directory? -> ZipEntry -> Zip.ZipEntry toZipEntry isDir ZipEntry{..} = Zip.ZipEntry - { zipEntryName = Text.encodeUtf8 . Text.pack . bool dropTrailingPathSeparator addTrailingPathSeparator isDir . normalise $ zipEntryName + { zipEntryName = Text.encodeUtf8 . Text.pack . normalise . makeValid . bool dropTrailingPathSeparator addTrailingPathSeparator isDir $ zipEntryName , zipEntryTime = utcToLocalTime utc zipEntryTime } diff --git a/test/Handler/ZipSpec.hs b/test/Handler/ZipSpec.hs index 681041b3a..6cd6b43a2 100644 --- a/test/Handler/ZipSpec.hs +++ b/test/Handler/ZipSpec.hs @@ -16,7 +16,7 @@ import Data.Conduit.Binary (sourceLbs, sinkLbs) instance Arbitrary ZipEntry where arbitrary = do - zipEntryName <- normalise . dropTrailingPathSeparator . joinPath <$> arbitrary + zipEntryName <- joinPath <$> arbitrary zipEntryTime <- arbitrary zipEntryContents <- arbitrary return ZipEntry{..} @@ -27,4 +27,7 @@ spec = describe "Zip file handling" $ do \zipFiles -> do bs <- runConduit $ Conduit.sourceList zipFiles =$= produceZip def =$= sinkLbs (_, zipFiles') <- runConduit $ sourceLbs bs =$= consumeZip return - zipFiles' `shouldBe` zipFiles + forM_ (zip zipFiles zipFiles') $ \(file, file') -> do + (shouldBe `on` normalise . makeValid . dropTrailingPathSeparator) (zipEntryName file') (zipEntryName file) + -- (zipEntryTime file') `shouldBe` (zipEntryTime file) + (zipEntryContents file') `shouldBe` (zipEntryContents file) From 88493d34a0363ae7fab1984e7003ed93d3741014 Mon Sep 17 00:00:00 2001 From: Gregor Kleen Date: Fri, 6 Oct 2017 04:09:45 +0200 Subject: [PATCH 06/11] Bugfixes --- src/Handler/Zip.hs | 17 +++++++++++++---- test/Handler/ZipSpec.hs | 16 ++++++++++------ 2 files changed, 23 insertions(+), 10 deletions(-) diff --git a/src/Handler/Zip.hs b/src/Handler/Zip.hs index a4b49f461..51fdd5179 100644 --- a/src/Handler/Zip.hs +++ b/src/Handler/Zip.hs @@ -34,6 +34,8 @@ import Data.Time import GHC.Generics (Generic) import Data.Typeable (Typeable) +import Data.List (dropWhileEnd) + data ZipEntry = ZipEntry { zipEntryName :: FilePath @@ -60,13 +62,13 @@ consumeZip handleEntry = Zip.unZipStream `fuseBoth` consumeZip' input <- await case input of Nothing -> return [] - Just (Right _) -> consumeZip' -- throw $ userError "Data chunk in unexpected place when parsing ZIP" + Just (Right _) -> throw $ userError "Data chunk in unexpected place when parsing ZIP" Just (Left e) -> do zipEntryName' <- fmap Text.unpack . either throw return . Text.decodeUtf8' $ Zip.zipEntryName e contentChunks <- accContents let - zipEntryName = normalise . makeValid $ dropTrailingPathSeparator zipEntryName' - zipEntryTime = localTimeToUTC utc $ Zip.zipEntryTime e + zipEntryName = normalise $ makeValid zipEntryName' + zipEntryTime = fixZipEpoch . localTimeToUTC utc $ Zip.zipEntryTime e zipEntryContents | hasTrailingPathSeparator zipEntryName' = Nothing | otherwise = Just $ Lazy.ByteString.fromChunks contentChunks @@ -76,8 +78,15 @@ consumeZip handleEntry = Zip.unZipStream `fuseBoth` consumeZip' input <- await case input of Just (Right x) -> (x :) <$> accContents + Just (Left x) -> [] <$ leftover (Left x) _ -> return [] +fixZipEpoch :: UTCTime -> UTCTime +-- ^ Testing showed that the zip library used introduces a weird offset into +-- dates when packing/unpacking zip files. +-- This is fixed here, for now. +fixZipEpoch u@(UTCTime{..}) = u{ utctDay = addDays (-46751) utctDay } + produceZip :: ( MonadBase b m , PrimMonad b @@ -101,6 +110,6 @@ produceZip info = Conduit.map toZipData =$= void (Zip.zipStream zipOptions) toZipEntry :: Bool -- ^ Is directory? -> ZipEntry -> Zip.ZipEntry toZipEntry isDir ZipEntry{..} = Zip.ZipEntry - { zipEntryName = Text.encodeUtf8 . Text.pack . normalise . makeValid . bool dropTrailingPathSeparator addTrailingPathSeparator isDir $ zipEntryName + { zipEntryName = Text.encodeUtf8 . Text.pack . bool (dropWhileEnd isPathSeparator) addTrailingPathSeparator isDir . normalise . makeValid $ zipEntryName , zipEntryTime = utcToLocalTime utc zipEntryTime } diff --git a/test/Handler/ZipSpec.hs b/test/Handler/ZipSpec.hs index 6cd6b43a2..61712ec39 100644 --- a/test/Handler/ZipSpec.hs +++ b/test/Handler/ZipSpec.hs @@ -12,7 +12,9 @@ import System.FilePath import Data.Conduit import qualified Data.Conduit.List as Conduit -import Data.Conduit.Binary (sourceLbs, sinkLbs) + +import Data.List (dropWhileEnd) +import Data.Time instance Arbitrary ZipEntry where arbitrary = do @@ -25,9 +27,11 @@ spec :: Spec spec = describe "Zip file handling" $ do it "has compatible encoding/decoding to/from zip files" . property $ \zipFiles -> do - bs <- runConduit $ Conduit.sourceList zipFiles =$= produceZip def =$= sinkLbs - (_, zipFiles') <- runConduit $ sourceLbs bs =$= consumeZip return - forM_ (zip zipFiles zipFiles') $ \(file, file') -> do - (shouldBe `on` normalise . makeValid . dropTrailingPathSeparator) (zipEntryName file') (zipEntryName file) - -- (zipEntryTime file') `shouldBe` (zipEntryTime file) + (_, zipFiles') <- runConduit $ Conduit.sourceList zipFiles =$= produceZip def =$= consumeZip return + forM_ (zipFiles `zip` zipFiles') $ \(file, file') -> do + let acceptableFilenameChanges + = bool (dropWhileEnd isPathSeparator) addTrailingPathSeparator (isNothing $ zipEntryContents file) . normalise . makeValid + acceptableTimeDifference t1 t2 = abs (diffUTCTime t1 t2) <= 2 + (shouldBe `on` acceptableFilenameChanges) (zipEntryName file') (zipEntryName file) + (zipEntryTime file', zipEntryTime file) `shouldSatisfy` uncurry acceptableTimeDifference (zipEntryContents file') `shouldBe` (zipEntryContents file) From 5742d21406c051914f96905b4aa665e130db9945 Mon Sep 17 00:00:00 2001 From: Gregor Kleen Date: Mon, 9 Oct 2017 14:07:27 +0200 Subject: [PATCH 07/11] Address date confusion --- src/Handler/Zip.hs | 9 +-------- test/Handler/ZipSpec.hs | 16 +++++++++++++--- 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/src/Handler/Zip.hs b/src/Handler/Zip.hs index 51fdd5179..fab24fc85 100644 --- a/src/Handler/Zip.hs +++ b/src/Handler/Zip.hs @@ -68,7 +68,7 @@ consumeZip handleEntry = Zip.unZipStream `fuseBoth` consumeZip' contentChunks <- accContents let zipEntryName = normalise $ makeValid zipEntryName' - zipEntryTime = fixZipEpoch . localTimeToUTC utc $ Zip.zipEntryTime e + zipEntryTime = localTimeToUTC utc $ Zip.zipEntryTime e zipEntryContents | hasTrailingPathSeparator zipEntryName' = Nothing | otherwise = Just $ Lazy.ByteString.fromChunks contentChunks @@ -81,13 +81,6 @@ consumeZip handleEntry = Zip.unZipStream `fuseBoth` consumeZip' Just (Left x) -> [] <$ leftover (Left x) _ -> return [] -fixZipEpoch :: UTCTime -> UTCTime --- ^ Testing showed that the zip library used introduces a weird offset into --- dates when packing/unpacking zip files. --- This is fixed here, for now. -fixZipEpoch u@(UTCTime{..}) = u{ utctDay = addDays (-46751) utctDay } - - produceZip :: ( MonadBase b m , PrimMonad b , MonadThrow m diff --git a/test/Handler/ZipSpec.hs b/test/Handler/ZipSpec.hs index 61712ec39..eebcb72d3 100644 --- a/test/Handler/ZipSpec.hs +++ b/test/Handler/ZipSpec.hs @@ -19,7 +19,8 @@ import Data.Time instance Arbitrary ZipEntry where arbitrary = do zipEntryName <- joinPath <$> arbitrary - zipEntryTime <- arbitrary + let date = addDays <$> arbitrary <*> pure (fromGregorian 2043 7 2) + zipEntryTime <- UTCTime <$> date <*> arbitrary zipEntryContents <- arbitrary return ZipEntry{..} @@ -30,8 +31,17 @@ spec = describe "Zip file handling" $ do (_, zipFiles') <- runConduit $ Conduit.sourceList zipFiles =$= produceZip def =$= consumeZip return forM_ (zipFiles `zip` zipFiles') $ \(file, file') -> do let acceptableFilenameChanges - = bool (dropWhileEnd isPathSeparator) addTrailingPathSeparator (isNothing $ zipEntryContents file) . normalise . makeValid + = makeValid . bool (dropWhileEnd isPathSeparator) addTrailingPathSeparator (isNothing $ zipEntryContents file) . normalise . makeValid acceptableTimeDifference t1 t2 = abs (diffUTCTime t1 t2) <= 2 (shouldBe `on` acceptableFilenameChanges) (zipEntryName file') (zipEntryName file) - (zipEntryTime file', zipEntryTime file) `shouldSatisfy` uncurry acceptableTimeDifference + when (inZipRange $ zipEntryTime file) $ + (zipEntryTime file', zipEntryTime file) `shouldSatisfy` uncurry acceptableTimeDifference (zipEntryContents file') `shouldBe` (zipEntryContents file) + +inZipRange :: UTCTime -> Bool +inZipRange time + | time > UTCTime (fromGregorian 1980 1 1) 0 + , time < UTCTime (fromGregorian 2107 1 1) 0 + = True + | otherwise + = False From 332be4d9ceee6436745c3e0e98ccee6cfeab2888 Mon Sep 17 00:00:00 2001 From: Gregor Kleen Date: Mon, 9 Oct 2017 16:08:02 +0200 Subject: [PATCH 08/11] Switch Zip to work on 'File's --- models | 23 ++++++------ src/Handler/Zip.hs | 81 ++++++++++++++++++----------------------- src/Model/Types.hs | 4 ++ test/Handler/ZipSpec.hs | 22 +++++------ 4 files changed, 61 insertions(+), 69 deletions(-) diff --git a/models b/models index e115ddd5e..45a202e9b 100644 --- a/models +++ b/models @@ -51,10 +51,6 @@ Sheet sheetType SheetType maxPoints Double Maybe requiredPoints Double Maybe - exerciseId FileId Maybe - hintId FileId Maybe - solutionId FileId Maybe - markingId FileId Maybe markingText Text activeFrom UTCTime activeTo UTCTime @@ -64,16 +60,18 @@ Sheet changed UTCTime createdBy UserId changedBy UserId +SheetFile + sheetId SheetId + fileId FileId + type SheetFileType + UniqueSheetFile fileId sheetId type File - title Text - content ByteString - created UTCTime - changed UTCTime - createdBy UserId - changedBy UserId + title FilePath + content ByteString Maybe -- Nothing iff this is a directory + modified UTCTime + deriving Show Eq Ord Submission sheetId SheetId - updateId FileId Maybe ratingBy UserId Maybe ratingPoints Double Maybe ratingComment Text Maybe @@ -85,7 +83,8 @@ Submission SubmissionFile submissionId SubmissionId fileId FileId - UniqueSubmissionFile fileId submissionId + isUpdate Bool + UniqueSubmissionFile fileId submissionId isUpdate SubmissionUser userId UserId submissionId SubmissionId diff --git a/src/Handler/Zip.hs b/src/Handler/Zip.hs index fab24fc85..7d589e87b 100644 --- a/src/Handler/Zip.hs +++ b/src/Handler/Zip.hs @@ -1,13 +1,12 @@ {-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE RecordWildCards #-} {-# LANGUAGE DeriveGeneric, DeriveDataTypeable #-} -{-# OPTIONS_GHC -fno-warn-missing-fields #-} -- This concerns Zip.zipEntrySize in produceZip +{-# OPTIONS_GHC -fno-warn-missing-fields #-} -- This concerns zipEntrySize in produceZip {-# OPTIONS_GHC -fno-warn-orphans #-} module Handler.Zip - ( Zip.ZipError(..) - , Zip.ZipInfo(..) - , ZipEntry(..) + ( ZipError(..) + , ZipInfo(..) , produceZip , consumeZip ) where @@ -16,14 +15,15 @@ import Import import qualified Data.Conduit.List as Conduit (map) -import qualified Codec.Archive.Zip.Conduit.Types as Zip -import qualified Codec.Archive.Zip.Conduit.UnZip as Zip -import qualified Codec.Archive.Zip.Conduit.Zip as Zip +import Codec.Archive.Zip.Conduit.Types +import Codec.Archive.Zip.Conduit.UnZip +import Codec.Archive.Zip.Conduit.Zip -import qualified Data.ByteString.Lazy as Lazy (ByteString) +-- import qualified Data.ByteString.Lazy as Lazy (ByteString) import qualified Data.ByteString.Lazy as Lazy.ByteString import Data.ByteString (ByteString) +import qualified Data.ByteString as ByteString import qualified Data.Text as Text import qualified Data.Text.Encoding as Text @@ -31,21 +31,11 @@ import qualified Data.Text.Encoding as Text import System.FilePath import Data.Time -import GHC.Generics (Generic) -import Data.Typeable (Typeable) - import Data.List (dropWhileEnd) -data ZipEntry = ZipEntry - { zipEntryName :: FilePath - , zipEntryTime :: UTCTime - , zipEntryContents :: Maybe Lazy.ByteString -- ^ 'Nothing' means this is a directory - } deriving (Read, Show, Generic, Typeable, Eq, Ord) - - -instance Default Zip.ZipInfo where - def = Zip.ZipInfo +instance Default ZipInfo where + def = ZipInfo { zipComment = mempty } @@ -53,26 +43,27 @@ instance Default Zip.ZipInfo where consumeZip :: ( MonadBase b m , PrimMonad b , MonadThrow m - ) => (ZipEntry -> m a) -- ^ Handle entries (insert into database) - -> Sink ByteString m (Zip.ZipInfo, [a]) -consumeZip handleEntry = Zip.unZipStream `fuseBoth` consumeZip' + ) => ConduitM ByteString File m ZipInfo +consumeZip = unZipStream `fuseUpstream` consumeZip' where - -- consumeZip' :: Sink (Either Zip.ZipEntry ByteString) m [a] + consumeZip' :: ( MonadThrow m + ) => Conduit (Either ZipEntry ByteString) m File consumeZip' = do input <- await case input of - Nothing -> return [] + Nothing -> return () Just (Right _) -> throw $ userError "Data chunk in unexpected place when parsing ZIP" Just (Left e) -> do - zipEntryName' <- fmap Text.unpack . either throw return . Text.decodeUtf8' $ Zip.zipEntryName e - contentChunks <- accContents + zipEntryName' <- fmap Text.unpack . either throw return . Text.decodeUtf8' $ zipEntryName e + contentChunks <- toConsumer accContents let - zipEntryName = normalise $ makeValid zipEntryName' - zipEntryTime = localTimeToUTC utc $ Zip.zipEntryTime e - zipEntryContents + fileTitle = normalise $ makeValid zipEntryName' + fileModified = localTimeToUTC utc $ zipEntryTime e + fileContent | hasTrailingPathSeparator zipEntryName' = Nothing - | otherwise = Just $ Lazy.ByteString.fromChunks contentChunks - (:) <$> (lift $ handleEntry ZipEntry{..}) <*> consumeZip' + | otherwise = Just $ mconcat contentChunks + yield $ File{..} + consumeZip' accContents :: Monad m => Sink (Either a b) m [b] accContents = do input <- await @@ -84,25 +75,23 @@ consumeZip handleEntry = Zip.unZipStream `fuseBoth` consumeZip' produceZip :: ( MonadBase b m , PrimMonad b , MonadThrow m - ) => Zip.ZipInfo - -> Conduit ZipEntry m ByteString -produceZip info = Conduit.map toZipData =$= void (Zip.zipStream zipOptions) + ) => ZipInfo + -> Conduit File m ByteString +produceZip info = Conduit.map toZipData =$= void (zipStream zipOptions) where - zipOptions = Zip.ZipOptions + zipOptions = ZipOptions { zipOpt64 = True , zipOptCompressLevel = -1 -- This is passed through all the way to the C zlib, where it means "default level" , zipOptInfo = info } - toZipData :: Monad m => ZipEntry -> (Zip.ZipEntry, Zip.ZipData m) - toZipData (e@ZipEntry{ zipEntryContents = Nothing }) - = ((toZipEntry True e){ Zip.zipEntrySize = Nothing }, mempty) - toZipData (e@ZipEntry{ zipEntryContents = Just b}) - = ((toZipEntry False e){ Zip.zipEntrySize = Just . fromIntegral $ Lazy.ByteString.length b }, Zip.ZipDataByteString b) + toZipData :: Monad m => File -> (ZipEntry, ZipData m) + toZipData f@(File{..}) = ((toZipEntry f){ zipEntrySize = fromIntegral . ByteString.length <$> fileContent }, maybe mempty (ZipDataByteString . Lazy.ByteString.fromStrict) fileContent) - toZipEntry :: Bool -- ^ Is directory? - -> ZipEntry -> Zip.ZipEntry - toZipEntry isDir ZipEntry{..} = Zip.ZipEntry - { zipEntryName = Text.encodeUtf8 . Text.pack . bool (dropWhileEnd isPathSeparator) addTrailingPathSeparator isDir . normalise . makeValid $ zipEntryName - , zipEntryTime = utcToLocalTime utc zipEntryTime + toZipEntry :: File -> ZipEntry + toZipEntry File{..} = ZipEntry + { zipEntryName = Text.encodeUtf8 . Text.pack . bool (dropWhileEnd isPathSeparator) addTrailingPathSeparator isDir . normalise . makeValid $ fileTitle + , zipEntryTime = utcToLocalTime utc fileModified } + where + isDir = isNothing fileContent diff --git a/src/Model/Types.hs b/src/Model/Types.hs index 5d7f3c3d7..e6966a773 100644 --- a/src/Model/Types.hs +++ b/src/Model/Types.hs @@ -37,6 +37,10 @@ data ExamStatus = Attended | NoShow | Voided deriving (Show, Read, Eq, Ord, Enum, Bounded) derivePersistField "ExamStatus" +data SheetFileType = SheetExercise | SheetHint | SheetSolution | SheetMarking + deriving (Show, Read, Eq, Ord, Enum, Bounded) +derivePersistField "SheetFileType" + data Season = Summer | Winter deriving (Show, Read, Eq, Ord, Enum, Bounded, Generic, Typeable) diff --git a/test/Handler/ZipSpec.hs b/test/Handler/ZipSpec.hs index eebcb72d3..10a5ac901 100644 --- a/test/Handler/ZipSpec.hs +++ b/test/Handler/ZipSpec.hs @@ -16,27 +16,27 @@ import qualified Data.Conduit.List as Conduit import Data.List (dropWhileEnd) import Data.Time -instance Arbitrary ZipEntry where +instance Arbitrary File where arbitrary = do - zipEntryName <- joinPath <$> arbitrary + fileTitle <- joinPath <$> arbitrary let date = addDays <$> arbitrary <*> pure (fromGregorian 2043 7 2) - zipEntryTime <- UTCTime <$> date <*> arbitrary - zipEntryContents <- arbitrary - return ZipEntry{..} + fileModified <- UTCTime <$> date <*> arbitrary + fileContent <- arbitrary + return File{..} spec :: Spec spec = describe "Zip file handling" $ do it "has compatible encoding/decoding to/from zip files" . property $ \zipFiles -> do - (_, zipFiles') <- runConduit $ Conduit.sourceList zipFiles =$= produceZip def =$= consumeZip return + (_, zipFiles') <- runConduit $ Conduit.sourceList zipFiles =$= produceZip def =$= consumeZip `fuseBoth` Conduit.consume forM_ (zipFiles `zip` zipFiles') $ \(file, file') -> do let acceptableFilenameChanges - = makeValid . bool (dropWhileEnd isPathSeparator) addTrailingPathSeparator (isNothing $ zipEntryContents file) . normalise . makeValid + = makeValid . bool (dropWhileEnd isPathSeparator) addTrailingPathSeparator (isNothing $ fileContent file) . normalise . makeValid acceptableTimeDifference t1 t2 = abs (diffUTCTime t1 t2) <= 2 - (shouldBe `on` acceptableFilenameChanges) (zipEntryName file') (zipEntryName file) - when (inZipRange $ zipEntryTime file) $ - (zipEntryTime file', zipEntryTime file) `shouldSatisfy` uncurry acceptableTimeDifference - (zipEntryContents file') `shouldBe` (zipEntryContents file) + (shouldBe `on` acceptableFilenameChanges) (fileTitle file') (fileTitle file) + when (inZipRange $ fileModified file) $ + (fileModified file', fileModified file) `shouldSatisfy` uncurry acceptableTimeDifference + (fileContent file') `shouldBe` (fileContent file) inZipRange :: UTCTime -> Bool inZipRange time From 2e65cf175c711977405e5b1e214e0ffd3685d589 Mon Sep 17 00:00:00 2001 From: Gregor Kleen Date: Mon, 9 Oct 2017 17:16:08 +0200 Subject: [PATCH 09/11] minor cleanup --- models | 2 +- test/Handler/ZipSpec.hs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/models b/models index 45a202e9b..4f8b99f31 100644 --- a/models +++ b/models @@ -29,7 +29,7 @@ Course json shorthand Text owner UserId schoolId SchoolId - termId TermId -- TermId ist jetzt Text als Typ + termId TermId -- @data TermId = TermId TermIdentifier@ stored as text capacity Int Maybe created UTCTime changed UTCTime diff --git a/test/Handler/ZipSpec.hs b/test/Handler/ZipSpec.hs index 10a5ac901..bdaec75da 100644 --- a/test/Handler/ZipSpec.hs +++ b/test/Handler/ZipSpec.hs @@ -28,7 +28,7 @@ spec :: Spec spec = describe "Zip file handling" $ do it "has compatible encoding/decoding to/from zip files" . property $ \zipFiles -> do - (_, zipFiles') <- runConduit $ Conduit.sourceList zipFiles =$= produceZip def =$= consumeZip `fuseBoth` Conduit.consume + zipFiles' <- runConduit $ Conduit.sourceList zipFiles =$= produceZip def =$= void consumeZip =$= Conduit.consume forM_ (zipFiles `zip` zipFiles') $ \(file, file') -> do let acceptableFilenameChanges = makeValid . bool (dropWhileEnd isPathSeparator) addTrailingPathSeparator (isNothing $ fileContent file) . normalise . makeValid From 05d95fd1cbb89a84fc85e3efffdd03afee2ddc72 Mon Sep 17 00:00:00 2001 From: Gregor Kleen Date: Mon, 9 Oct 2017 17:18:42 +0200 Subject: [PATCH 10/11] Fix generation of UTCTimes --- test/Handler/ZipSpec.hs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/Handler/ZipSpec.hs b/test/Handler/ZipSpec.hs index bdaec75da..6d9eb663d 100644 --- a/test/Handler/ZipSpec.hs +++ b/test/Handler/ZipSpec.hs @@ -19,8 +19,8 @@ import Data.Time instance Arbitrary File where arbitrary = do fileTitle <- joinPath <$> arbitrary - let date = addDays <$> arbitrary <*> pure (fromGregorian 2043 7 2) - fileModified <- UTCTime <$> date <*> arbitrary + date <- addDays <$> arbitrary <*> pure (fromGregorian 2043 7 2) + fileModified <- addUTCTime <$> arbitrary <*> pure (UTCTime date 0) fileContent <- arbitrary return File{..} From 93b2c72a7b591bac77401c94613f67108a39f673 Mon Sep 17 00:00:00 2001 From: Gregor Kleen Date: Mon, 9 Oct 2017 19:00:39 +0200 Subject: [PATCH 11/11] Move Zip to Utils --- src/Handler/{ => Utils}/Zip.hs | 2 +- test/Handler/{ => Utils}/ZipSpec.hs | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) rename src/Handler/{ => Utils}/Zip.hs (99%) rename test/Handler/{ => Utils}/ZipSpec.hs (96%) diff --git a/src/Handler/Zip.hs b/src/Handler/Utils/Zip.hs similarity index 99% rename from src/Handler/Zip.hs rename to src/Handler/Utils/Zip.hs index 7d589e87b..4afcb8386 100644 --- a/src/Handler/Zip.hs +++ b/src/Handler/Utils/Zip.hs @@ -4,7 +4,7 @@ {-# OPTIONS_GHC -fno-warn-missing-fields #-} -- This concerns zipEntrySize in produceZip {-# OPTIONS_GHC -fno-warn-orphans #-} -module Handler.Zip +module Handler.Utils.Zip ( ZipError(..) , ZipInfo(..) , produceZip diff --git a/test/Handler/ZipSpec.hs b/test/Handler/Utils/ZipSpec.hs similarity index 96% rename from test/Handler/ZipSpec.hs rename to test/Handler/Utils/ZipSpec.hs index 6d9eb663d..b384143fd 100644 --- a/test/Handler/ZipSpec.hs +++ b/test/Handler/Utils/ZipSpec.hs @@ -2,11 +2,11 @@ {-# LANGUAGE RecordWildCards #-} {-# OPTIONS_GHC -fno-warn-orphans #-} -module Handler.ZipSpec where +module Handler.Utils.ZipSpec where import TestImport -import Handler.Zip +import Handler.Utils.Zip import System.FilePath