From 350ee79af3c8fcc480970166a559596873beab2a Mon Sep 17 00:00:00 2001 From: Gregor Kleen Date: Wed, 9 Sep 2020 13:44:01 +0200 Subject: [PATCH] fix: zip handling & tests --- config/test-settings.yml | 1 + src/Handler/Sheet/PersonalisedFiles.hs | 2 +- src/Handler/Utils/DateTime.hs | 5 +- src/Handler/Utils/Files.hs | 11 ++ src/Handler/Utils/Zip.hs | 156 ++++++++++++++------ src/Jobs.hs | 7 +- src/Jobs/Crontab.hs | 28 ++-- src/Jobs/Handler/Files.hs | 8 +- src/Model/Migration.hs | 2 +- src/Model/Types/File.hs | 84 ++++++++++- src/Settings.hs | 4 +- src/Utils/Files.hs | 2 +- stack.yaml | 4 + stack.yaml.lock | 18 +++ test/Handler/Sheet/PersonalisedFilesSpec.hs | 28 ++-- test/Handler/Utils/RatingSpec.hs | 4 +- test/Handler/Utils/ZipSpec.hs | 72 +++++++-- test/ModelSpec.hs | 21 ++- test/TestImport.hs | 3 + 19 files changed, 359 insertions(+), 101 deletions(-) diff --git a/config/test-settings.yml b/config/test-settings.yml index 7ba4552eb..ab9bd7f5a 100644 --- a/config/test-settings.yml +++ b/config/test-settings.yml @@ -10,4 +10,5 @@ log-settings: auth-dummy-login: true server-session-acid-fallback: true +job-cron-interval: null job-workers: 1 diff --git a/src/Handler/Sheet/PersonalisedFiles.hs b/src/Handler/Sheet/PersonalisedFiles.hs index dc29fb61f..91da0bcc2 100644 --- a/src/Handler/Sheet/PersonalisedFiles.hs +++ b/src/Handler/Sheet/PersonalisedFiles.hs @@ -55,7 +55,7 @@ data PersonalisedSheetFileUnresolved a = PSFUnresolvedDirectory a | PSFUnresolvedCollatable Text a | PSFUnresolved a - deriving (Eq, Ord, Read, Show, Generic, Typeable) + deriving (Eq, Ord, Read, Show, Functor, Foldable, Traversable, Generic, Typeable) makePrisms ''PersonalisedSheetFileUnresolved diff --git a/src/Handler/Utils/DateTime.hs b/src/Handler/Utils/DateTime.hs index 7fd3c4c54..a321bebff 100644 --- a/src/Handler/Utils/DateTime.hs +++ b/src/Handler/Utils/DateTime.hs @@ -2,7 +2,7 @@ module Handler.Utils.DateTime ( utcToLocalTime, utcToZonedTime - , localTimeToUTC, TZ.LocalToUTCResult(..) + , localTimeToUTC, TZ.LocalToUTCResult(..), localTimeToUTCSimple , toTimeOfDay , toMidnight, beforeMidnight, toMidday, toMorning , formatDiffDays @@ -47,6 +47,9 @@ utcToZonedTime = ZonedTime <$> TZ.utcToLocalTimeTZ appTZ <*> TZ.timeZoneForUTCTi localTimeToUTC :: LocalTime -> LocalToUTCResult localTimeToUTC = TZ.localTimeToUTCFull appTZ +localTimeToUTCSimple :: LocalTime -> UTCTime +localTimeToUTCSimple = TZ.localTimeToUTCTZ appTZ + -- | Local midnight of given day toMidnight :: Day -> UTCTime toMidnight = toTimeOfDay 0 0 0 diff --git a/src/Handler/Utils/Files.hs b/src/Handler/Utils/Files.hs index fafad4903..d53cc24ec 100644 --- a/src/Handler/Utils/Files.hs +++ b/src/Handler/Utils/Files.hs @@ -3,6 +3,7 @@ module Handler.Utils.Files , sourceFiles, sourceFiles' , SourceFilesException(..) , sourceFileDB + , acceptFile ) where import Import @@ -16,6 +17,8 @@ import qualified Network.Minio as Minio import qualified Database.Esqueleto as E import qualified Database.Esqueleto.Utils as E +import System.FilePath (normalise) + data SourceFilesException = SourceFilesMismatchedHashes @@ -85,3 +88,11 @@ sourceFiles' = C.map sourceFile' sourceFile' :: forall file. HasFileReference file => file -> DBFile sourceFile' = sourceFile . view (_FileReference . _1) + + +acceptFile :: (MonadResource m, MonadResource m') => FileInfo -> m (File m') +acceptFile fInfo = do + let fileTitle = dropWhile isPathSeparator . dropTrailingPathSeparator . normalise . unpack $ fileName fInfo + fileContent = Just $ fileSource fInfo + fileModified <- liftIO getCurrentTime + return File{..} diff --git a/src/Handler/Utils/Zip.hs b/src/Handler/Utils/Zip.hs index 0b469bcdb..70147cbd2 100644 --- a/src/Handler/Utils/Zip.hs +++ b/src/Handler/Utils/Zip.hs @@ -13,12 +13,14 @@ module Handler.Utils.Zip import Import +import Handler.Utils.Files (acceptFile) +import Handler.Utils.DateTime (localTimeToUTCSimple, utcToLocalTime) + import Codec.Archive.Zip.Conduit.Types import Codec.Archive.Zip.Conduit.UnZip import Codec.Archive.Zip.Conduit.Zip import System.FilePath -import Data.Time.LocalTime (localTimeToUTC, utcToLocalTime) import Data.List (dropWhileEnd) @@ -34,6 +36,8 @@ import Data.Encoding.CP437 import qualified Data.Char as Char import Control.Monad.Trans.Cont +import Control.Monad.Trans.State.Strict (evalStateT) +import qualified Control.Monad.State.Class as State typeZip :: ContentType @@ -50,47 +54,88 @@ instance Default ZipInfo where } -consumeZip :: forall b m m'. - ( MonadThrow b - , MonadThrow m - , MonadBase b m - , PrimMonad b +data ConsumeZipException + = ConsumeZipUnZipException SomeException + | ConsumeZipUnexpectedContent + deriving (Show, Generic, Typeable) + deriving anyclass (Exception) + + +consumeZip :: forall m m'. + ( MonadThrow m + , PrimMonad m , MonadUnliftIO m , MonadResource m , MonadIO m' + , MonadThrow m' ) => ConduitT () ByteString m () -> ConduitT () (File m') m ZipInfo consumeZip inpBS = do - inps <- liftIO newBroadcastTMChanIO - let feedSingle inp = atomically $ do - guardM $ isEmptyTMChan inps - writeTMChan inps inp - zipAsync <- lift . allocateLinkedAsync . runConduit $ do - zipInfo <- (inpBS .| transPipe liftBase unZipStream) `fuseUpstream` C.mapM_ feedSingle - atomically $ closeTMChan inps - return zipInfo + inpChunk <- liftIO newEmptyTMVarIO + zipAsync <- lift . allocateLinkedAsync $ + runConduit $ (inpBS .| unZipStream) `fuseUpstream` C.mapM_ (atomically . putTMVar inpChunk) + + flip evalStateT Nothing . evalContT . callCC $ \finishConsume -> forever $ do + inpChunk' <- atomically $ + Right <$> takeTMVar inpChunk + <|> Left <$> waitCatchSTM zipAsync + + fileSink <- State.get + case (fileSink, inpChunk') of + (mFSink , Left (Left unzipExc) ) -> do + for_ mFSink $ \fSink' -> atomically $ do + writeTMChan fSink' $ Left unzipExc + closeTMChan fSink' + throwM unzipExc + + (mFSink , Left (Right zInfo) ) -> do + for_ mFSink $ atomically . closeTMChan + finishConsume zInfo + + (Just fSink, Right (Right bs) ) -> + atomically . writeTMChan fSink $ Right bs + + (Nothing , Right (Right _) ) -> + throwM ConsumeZipUnexpectedContent + + (mFSink , Right (Left ZipEntry{..})) -> do + for_ mFSink $ atomically . closeTMChan + State.put Nothing - evalContT . callCC $ \finish -> forever $ do - (fileChan, fileDef) <- atomically $ do - fileChan <- dupTMChan inps - fileDef <- readTMChan fileChan - return (fileChan, fileDef) - case fileDef of - Nothing -> finish =<< waitAsync zipAsync - Just (Right _) -> return () - Just (Left ZipEntry{..}) -> do zipEntryName' <- decodeZipEntryName zipEntryName let - fileTitle = dropWhile isPathSeparator . dropTrailingPathSeparator . normalise $ makeValid zipEntryName' - fileModified = localTimeToUTC utc zipEntryTime + fileTitle = "." zipEntryName' + & normalise + & makeValid + & dropWhile isPathSeparator + & dropWhileEnd isPathSeparator + & normalise + & makeValid + fileModified = localTimeToUTCSimple zipEntryTime isDirectory = hasTrailingPathSeparator zipEntryName' - fileContent - | isDirectory = Nothing - | otherwise = Just . evalContT . callCC $ \finishContent -> forever $ do - nextVal <- atomically $ (preview _Right =<<) <$> readTMChan fileChan - maybe (finishContent ()) (lift . yield) nextVal - lift $ yield File{..} - + fileContent <- if + | isDirectory -> return Nothing + | otherwise -> do + fileChan <- liftIO newTMChanIO + State.put $ Just fileChan + return . Just . evalContT . callCC $ \finishFileContent -> forever $ do + nextVal <- atomically $ asum + [ readTMChan fileChan + , do + inpChunk'' <- Right <$> takeTMVar inpChunk + <|> Left <$> waitCatchSTM zipAsync + case inpChunk'' of + Left (Left unzipExc) -> return . Just $ Left unzipExc + Left (Right _ ) -> return Nothing + Right (Left zInfo ) -> Nothing <$ putTMVar inpChunk (Left zInfo) + Right (Right bs ) -> return . Just $ Right bs + ] + case nextVal of + Nothing -> finishFileContent () + Just (Right bs) -> lift $ yield bs + Just (Left exc) -> throwM $ ConsumeZipUnZipException exc + lift . lift $ yield File{..} + produceZip :: forall m. ( MonadThrow m , PrimMonad m @@ -100,22 +145,48 @@ produceZip :: forall m. produceZip info = C.map toZipData .| void (zipStream zipOptions) where zipOptions = ZipOptions - { zipOpt64 = True - , zipOptCompressLevel = -1 -- This is passed through all the way to the C zlib, where it means "default level" + { zipOpt64 = False + , zipOptCompressLevel = defaultCompression , zipOptInfo = info } + -- toZipData :: forall v. File m -> ConduitT v (ZipEntry, ZipData m) m () + -- toZipData f + -- | Just fc <- fileContent f = do + -- outpChunk <- newEmptyTMVarIO + -- outpAsync <- lift . allocateLinkedAsync $ + -- runConduit $ fc .| C.mapM_ (atomically . putTMVar outpChunk) + -- yield ( toZipEntry f + -- , ZipDataSource . evalContT . callCC $ \finishContent -> forever $ do + -- nextVal <- atomically $ + -- Right <$> takeTMVar outpChunk + -- <|> Left <$> waitCatchSTM outpAsync + -- case nextVal of + -- Right chunk -> lift $ yield chunk + -- Left (Right () ) -> finishContent () + -- Left (Left exc) -> throwM exc + -- ) + -- | otherwise = yield (toZipEntry f, mempty) + toZipData :: File m -> (ZipEntry, ZipData m) toZipData f@File{..} - = (toZipEntry f, ) $ maybe mempty ZipDataSource fileContent + = (toZipEntry f, maybe mempty ZipDataSource fileContent) toZipEntry :: File m -> ZipEntry toZipEntry File{..} = ZipEntry{..} where - isDir = isNothing fileContent + isDir = is _Nothing fileContent - zipEntryName = encodeZipEntryName . bool (dropWhileEnd isPathSeparator) addTrailingPathSeparator isDir . normalise $ makeValid fileTitle - zipEntryTime = utcToLocalTime utc fileModified + zipEntryName = "." fileTitle + & normalise + & makeValid + & dropWhile isPathSeparator + & dropWhileEnd isPathSeparator + & bool id addTrailingPathSeparator isDir + & normalise + & makeValid + & encodeZipEntryName + zipEntryTime = utcToLocalTime fileModified zipEntrySize = Nothing zipEntryExternalAttributes = Nothing @@ -123,7 +194,7 @@ modifyFileTitle :: Monad m => (FilePath -> FilePath) -> ConduitT (File m') (File modifyFileTitle f = mapC $ \x@File{..} -> x{ fileTitle = f fileTitle } -- Takes FileInfo and if it is a ZIP-Archive, extract files, otherwiese yield fileinfo -receiveFiles :: (MonadLogger m, MonadResource m, MonadThrow m, MonadBase IO m, MonadUnliftIO m, MonadResource m') => FileInfo -> ConduitT () (File m') m () +receiveFiles :: (MonadLogger m, MonadResource m, MonadThrow m, PrimMonad m, MonadUnliftIO m, MonadResource m', MonadThrow m') => FileInfo -> ConduitT () (File m') m () receiveFiles fInfo | ((==) `on` simpleContentType) mimeType typeZip = do $logInfoS "sourceFiles" "Unpacking ZIP" @@ -134,13 +205,6 @@ receiveFiles fInfo where mimeType = mimeLookup $ fileName fInfo -acceptFile :: (MonadResource m, MonadResource m') => FileInfo -> m (File m') -acceptFile fInfo = do - let fileTitle = dropWhile isPathSeparator . dropTrailingPathSeparator . normalise . unpack $ fileName fInfo - fileContent = Just $ fileSource fInfo - fileModified <- liftIO getCurrentTime - return File{..} - decodeZipEntryName :: MonadThrow m => Either Text ByteString -> m FilePath -- ^ Extract the filename from a 'ZipEntry' doing decoding along the way. diff --git a/src/Jobs.hs b/src/Jobs.hs index 21925bebc..7e849400f 100644 --- a/src/Jobs.hs +++ b/src/Jobs.hs @@ -116,8 +116,9 @@ manageCrontab foundation@UniWorX{..} unmask = do jState <- atomically $ readTMVar appJobState liftIO . unsafeHandler foundation . void $ do atomically . assertM_ (not . Map.null . jobWorkers) $ readTMVar appJobState - runReaderT ?? foundation $ - writeJobCtlBlock JobCtlDetermineCrontab + when (has (_appJobCronInterval . _Just) foundation) $ + runReaderT ?? foundation $ + writeJobCtlBlock JobCtlDetermineCrontab void $ evalRWST (forever execCrontab) jState HashMap.empty let awaitTermination = guardM $ @@ -479,7 +480,7 @@ jLocked jId act = flip evalStateT False $ do threshold <- getsYesod $ view _appJobStaleThreshold now <- liftIO getCurrentTime heldLocks <- asks jobHeldLocks - isHeld <- (jId `Set.member`) <$> atomically (readTVar heldLocks) + isHeld <- (jId `Set.member`) <$> readTVarIO heldLocks hadStale <- maybeT (return False) $ do lockTime <- MaybeT $ return queuedJobLockTime lockInstance <- MaybeT $ return queuedJobLockInstance diff --git a/src/Jobs/Crontab.hs b/src/Jobs/Crontab.hs index 84a73489e..8127e16e8 100644 --- a/src/Jobs/Crontab.hs +++ b/src/Jobs/Crontab.hs @@ -40,14 +40,15 @@ determineCrontab = execWriterT $ do } Nothing -> return () - tell $ HashMap.singleton - JobCtlDetermineCrontab - Cron - { cronInitial = CronAsap - , cronRepeat = CronRepeatScheduled CronAsap - , cronRateLimit = appJobCronInterval - , cronNotAfter = Right CronNotScheduled - } + whenIsJust appJobCronInterval $ \interval -> + tell $ HashMap.singleton + JobCtlDetermineCrontab + Cron + { cronInitial = CronAsap + , cronRepeat = CronRepeatScheduled CronAsap + , cronRateLimit = interval + , cronNotAfter = Right CronNotScheduled + } oldestInvitationMUTC <- lift $ preview (_head . _entityVal . _invitationExpiresAt . _Just) <$> selectList [InvitationExpiresAt !=. Nothing] [Asc InvitationExpiresAt, LimitTo 1] whenIsJust oldestInvitationMUTC $ \oldestInvUTC -> tell $ HashMap.singleton @@ -139,7 +140,7 @@ determineCrontab = execWriterT $ do } let - getNextIntervals within interval = do + getNextIntervals within interval cInterval = do now <- liftIO getPOSIXTime return $ do let @@ -147,7 +148,7 @@ determineCrontab = execWriterT $ do (currEpoch, epochNow) = now `divMod'` epochInterval currInterval = epochNow `div'` interval numIntervals = floor $ epochInterval / interval - n = ceiling $ 4 * appJobCronInterval / interval + n = ceiling $ 4 * cInterval / interval i <- [ negate (ceiling $ n % 2) .. ceiling $ n % 2 ] let ((+ currEpoch) -> nextEpoch, nextInterval) = (currInterval + i) `divMod` numIntervals @@ -158,8 +159,9 @@ determineCrontab = execWriterT $ do if | is _Just appLdapConf , Just syncWithin <- appSynchroniseLdapUsersWithin + , Just cInterval <- appJobCronInterval -> do - nextIntervals <- getNextIntervals syncWithin appSynchroniseLdapUsersInterval + nextIntervals <- getNextIntervals syncWithin appSynchroniseLdapUsersInterval cInterval forM_ nextIntervals $ \(nextEpoch, nextInterval, nextIntervalTime, numIntervals) -> do tell $ HashMap.singleton @@ -177,8 +179,8 @@ determineCrontab = execWriterT $ do | otherwise -> return () - whenIsJust appPruneUnreferencedFilesWithin $ \within -> do - nextIntervals <- getNextIntervals within appPruneUnreferencedFilesInterval + whenIsJust ((,) <$> appPruneUnreferencedFilesWithin <*> appJobCronInterval) $ \(within, cInterval) -> do + nextIntervals <- getNextIntervals within appPruneUnreferencedFilesInterval cInterval forM_ nextIntervals $ \(nextEpoch, nextInterval, nextIntervalTime, numIntervals) -> do tell $ HashMap.singleton (JobCtlQueue JobPruneUnreferencedFiles diff --git a/src/Jobs/Handler/Files.hs b/src/Jobs/Handler/Files.hs index 80cac055c..55218e170 100644 --- a/src/Jobs/Handler/Files.hs +++ b/src/Jobs/Handler/Files.hs @@ -97,7 +97,7 @@ dispatchJobPruneUnreferencedFiles numIterations epoch iteration = JobHandlerAtom | i <- [1 .. toInteger numIterations] ] - intervalsDgsts' = map (over both $ toDigest <=< assertM' (> 0)) $ zip (0 : init intervals) intervals + intervalsDgsts' = zipWith (curry . over both $ toDigest <=< assertM' (> 0)) (0 : init intervals) intervals toDigest :: Integer -> Maybe FileContentChunkReference toDigest = fmap (review _Wrapped) . digestFromByteString . pad . ByteString.pack . reverse . unfoldr step @@ -270,9 +270,9 @@ dispatchJobRechunkFiles = JobHandlerAtomic . hoist lift $ do E.where_ $ fileContentEntry' E.^. FileContentEntryHash E.==. fileContentEntry E.^. FileContentEntryHash return $ E.sum_ (E.length_ $ fileContentChunk E.^. FileContentChunkContent:: E.SqlExpr (E.Value Word64)) - return $ ( fileContentEntry E.^. FileContentEntryHash - , size - ) + return ( fileContentEntry E.^. FileContentEntryHash + , size + ) rechunkFile :: FileContentReference -> Word64 -> DB (Sum Natural, Sum Word64) rechunkFile fRef sz = do diff --git a/src/Model/Migration.hs b/src/Model/Migration.hs index 115cdddc3..0d77f6da3 100644 --- a/src/Model/Migration.hs +++ b/src/Model/Migration.hs @@ -923,7 +923,7 @@ customMigrations = Map.fromListWith (>>) ) , ( AppliedMigrationKey [migrationVersion|39.0.0|] [version|40.0.0|] - , whenM (tableExists "study_features") $ + , whenM (tableExists "study_features") [executeQQ| ALTER TABLE study_features RENAME updated TO last_observed; ALTER TABLE study_features ADD COLUMN first_observed timestamp with time zone; diff --git a/src/Model/Types/File.hs b/src/Model/Types/File.hs index 27855ce2b..5da04921b 100644 --- a/src/Model/Types/File.hs +++ b/src/Model/Types/File.hs @@ -1,10 +1,11 @@ module Model.Types.File ( FileContentChunkReference(..), FileContentReference(..) , File(..), _fileTitle, _fileContent, _fileModified + , PureFile, toPureFile, fromPureFile, pureFileToFileReference, _pureFileContent , transFile , minioFileReference , FileReference(..), _fileReferenceTitle, _fileReferenceContent, _fileReferenceModified - , HasFileReference(..), IsFileReference(..), FileReferenceResidual(..) + , HasFileReference(..), IsFileReference(..), FileReferenceResidual(FileReferenceResidual, FileReferenceResidualEither, unFileReferenceResidualEither, FileReferenceResidualEntity, fileReferenceResidualEntityKey, fileReferenceResidualEntityResidual, unPureFileResidual) ) where import Import.NoModel @@ -17,9 +18,14 @@ import qualified Data.ByteString.Base64.URL as Base64 import qualified Data.ByteArray as ByteArray import qualified Network.Minio as Minio (Object) import qualified Crypto.Hash as Crypto (digestFromByteString) +import qualified Crypto.Hash.Conduit as Crypto (sinkHash) import Utils.Lens.TH +import qualified Data.Conduit.Combinators as C + +import Text.Show + newtype FileContentChunkReference = FileContentChunkReference (Digest SHA3_512) @@ -57,6 +63,64 @@ data File m = File makeLenses_ ''File +type PureFile = File Identity + +_pureFileContent :: forall bs. + ( IsSequence bs + , Element bs ~ Word8 + ) + => Lens' PureFile (Maybe bs) +_pureFileContent = lens getPureFileContent setPureFileContent + where + getPureFileContent = fmap (repack . runIdentity . runConduit . (.| C.fold)) . fileContent + setPureFileContent f bs = f { fileContent = yield . repack <$> bs } + +toPureFile :: Monad m => File m -> m PureFile +toPureFile File{..} = do + c <- for fileContent $ runConduit . (.| C.fold) + return File + { fileContent = fmap yield c + , .. + } + +fromPureFile :: Monad m => PureFile -> File m +fromPureFile = transFile generalize + +pureFileToFileReference :: PureFile -> FileReference +pureFileToFileReference File{..} = FileReference + { fileReferenceTitle = fileTitle + , fileReferenceContent = review _Wrapped . runIdentity . runConduit . (.| Crypto.sinkHash) <$> fileContent + , fileReferenceModified = fileModified + } + +instance Eq PureFile where + a == b = all (\f -> f a b) + [ (==) `on` fileTitle + , (==) `on` fileModified + , (==) `on` (view _pureFileContent :: PureFile -> Maybe ByteString) + ] +instance Ord PureFile where + compare = mconcat + [ comparing fileTitle + , comparing (view _pureFileContent :: PureFile -> Maybe ByteString) + , comparing fileModified + ] +instance Show PureFile where + showsPrec _ f@File{..} + = showString "File{" + . showString "fileTitle = " + . shows fileTitle + . showString ", " + . showString "fileContent = " + . (case f ^. _pureFileContent of + Nothing -> showString "Nothing" + Just c -> showString "Just $ yield " . showsPrec 11 (c :: ByteString) + ) + . showString ", " + . showString "fileModified = " + . shows fileModified + . showString "}" + transFile :: Monad m => (forall a. m a -> n a) -> (File m -> File n) transFile l File{..} = File{ fileContent = transPipe l <$> fileContent, .. } @@ -78,6 +142,24 @@ instance HasFileReference FileReference where data FileReferenceResidual FileReference = FileReferenceResidual _FileReference = iso (, FileReferenceResidual) $ view _1 +instance HasFileReference PureFile where + newtype FileReferenceResidual PureFile = PureFileResidual { unPureFileResidual :: Maybe ByteString } + deriving (Eq, Ord, Read, Show, Generic, Typeable) + + _FileReference = iso toFileReference fromFileReference + where + toFileReference File{..} = (FileReference{..}, PureFileResidual{..}) + where + fileReferenceTitle = fileTitle + (fileReferenceContent, unPureFileResidual) = ((,) <$> preview (_Just . _1) <*> preview (_Just . _2)) $ + over _1 (review _Wrapped) . runIdentity . runConduit . (.| getZipConduit ((,) <$> ZipConduit Crypto.sinkHash <*> ZipConduit C.fold)) <$> fileContent + fileReferenceModified = fileModified + fromFileReference (FileReference{..}, PureFileResidual{..}) = File + { fileTitle = fileReferenceTitle + , fileContent = yield <$> unPureFileResidual + , fileModified = fileReferenceModified + } + instance (HasFileReference a, HasFileReference b) => HasFileReference (Either a b) where newtype FileReferenceResidual (Either a b) = FileReferenceResidualEither { unFileReferenceResidualEither :: Either (FileReferenceResidual a) (FileReferenceResidual b) } _FileReference = iso doSplit doJoin diff --git a/src/Settings.hs b/src/Settings.hs index a97ece5b4..acedff5c4 100644 --- a/src/Settings.hs +++ b/src/Settings.hs @@ -115,7 +115,7 @@ data AppSettings = AppSettings , appMailSupport :: Address , appJobWorkers :: Natural , appJobFlushInterval :: Maybe NominalDiffTime - , appJobCronInterval :: NominalDiffTime + , appJobCronInterval :: Maybe NominalDiffTime , appJobStaleThreshold :: NominalDiffTime , appNotificationRateLimit :: NominalDiffTime , appNotificationCollateDelay :: NominalDiffTime @@ -452,7 +452,7 @@ instance FromJSON AppSettings where appJobWorkers <- o .: "job-workers" appJobFlushInterval <- o .:? "job-flush-interval" - appJobCronInterval <- o .: "job-cron-interval" + appJobCronInterval <- o .:? "job-cron-interval" appJobStaleThreshold <- o .: "job-stale-threshold" appNotificationRateLimit <- o .: "notification-rate-limit" appNotificationCollateDelay <- o .: "notification-collate-delay" diff --git a/src/Utils/Files.hs b/src/Utils/Files.hs index 10228a94b..4749f46c5 100644 --- a/src/Utils/Files.hs +++ b/src/Utils/Files.hs @@ -50,7 +50,7 @@ sinkFileDB doReplace fileContentContent = do existsChunk <- lift $ exists [FileContentChunkHash ==. fileContentChunkHash] let setContentBased = updateWhere [FileContentChunkHash ==. fileContentChunkHash] [FileContentChunkContentBased =. fileContentChunkContentBased] if | existsChunk -> lift setContentBased - | otherwise -> lift . handleIfSql isUniqueConstraintViolation (const $ setContentBased) $ + | otherwise -> lift . handleIfSql isUniqueConstraintViolation (const setContentBased) $ insert_ FileContentChunk{..} return $ FileContentChunkKey fileContentChunkHash where fileContentChunkHash = _Wrapped # Crypto.hash fileContentChunkContent diff --git a/stack.yaml b/stack.yaml index 39a517f26..995468114 100644 --- a/stack.yaml +++ b/stack.yaml @@ -53,6 +53,9 @@ extra-deps: - gearhash - fastcdc + - git: git@gitlab2.rz.ifi.lmu.de:uni2work/zip-stream.git + commit: 094c70935f1d92016144cd4dfc0a99b831c56f25 + - generic-lens-1.2.0.0@sha256:b19e7970c93743a46bc3702331512a96d163de4356472f2d51a2945887aefe8c,6524 # manual downgrade; won't compile with >=2.0.0.0 - acid-state-0.16.0.1@sha256:d43f6ee0b23338758156c500290c4405d769abefeb98e9bc112780dae09ece6f,6207 @@ -71,6 +74,7 @@ extra-deps: - tz-0.1.3.4@sha256:bd311e202b8bdd15bcd6a4ca182e69794949d3b3b9f4aa835e9ccff011284979,5086 - unidecode-0.1.0.4@sha256:99581ee1ea334a4596a09ae3642e007808457c66893b587e965b31f15cbf8c4d,1144 - wai-middleware-prometheus-1.0.0@sha256:1625792914fb2139f005685be8ce519111451cfb854816e430fbf54af46238b4,1314 + - primitive-0.7.1.0@sha256:6a237bb338bcc43193077ff8e8c0f0ce2de14c652231496a15672e8b563a07e2,2604 resolver: nightly-2020-08-08 compiler: ghc-8.10.2 diff --git a/stack.yaml.lock b/stack.yaml.lock index f6232b055..ba4ec0b2c 100644 --- a/stack.yaml.lock +++ b/stack.yaml.lock @@ -222,6 +222,17 @@ packages: subdir: fastcdc git: https://github.com/gkleen/FastCDC.git commit: 7326e2931454282df9081105dad812845db5c530 +- completed: + name: zip-stream + version: 0.2.0.1 + git: git@gitlab2.rz.ifi.lmu.de:uni2work/zip-stream.git + pantry-tree: + size: 812 + sha256: e191cb4e12ae1f056acdcf0393ff4dcb566b3ab82305aa14d6d4e35f49a64c96 + commit: 094c70935f1d92016144cd4dfc0a99b831c56f25 + original: + git: git@gitlab2.rz.ifi.lmu.de:uni2work/zip-stream.git + commit: 094c70935f1d92016144cd4dfc0a99b831c56f25 - completed: hackage: generic-lens-1.2.0.0@sha256:b19e7970c93743a46bc3702331512a96d163de4356472f2d51a2945887aefe8c,6524 pantry-tree: @@ -341,6 +352,13 @@ packages: sha256: 6d64803c639ed4c7204ea6fab0536b97d3ee16cdecb9b4a883cd8e56d3c61402 original: hackage: wai-middleware-prometheus-1.0.0@sha256:1625792914fb2139f005685be8ce519111451cfb854816e430fbf54af46238b4,1314 +- completed: + hackage: primitive-0.7.1.0@sha256:6a237bb338bcc43193077ff8e8c0f0ce2de14c652231496a15672e8b563a07e2,2604 + pantry-tree: + size: 1376 + sha256: 924e88629b493abb6b2f3c3029cef076554a2b627091e3bb6887ec03487a707d + original: + hackage: primitive-0.7.1.0@sha256:6a237bb338bcc43193077ff8e8c0f0ce2de14c652231496a15672e8b563a07e2,2604 snapshots: - completed: size: 524392 diff --git a/test/Handler/Sheet/PersonalisedFilesSpec.hs b/test/Handler/Sheet/PersonalisedFilesSpec.hs index 8dacb3eca..bd41ca2e0 100644 --- a/test/Handler/Sheet/PersonalisedFilesSpec.hs +++ b/test/Handler/Sheet/PersonalisedFilesSpec.hs @@ -16,8 +16,6 @@ import qualified Data.Conduit.Combinators as C import Control.Lens.Extras import Control.Monad.Trans.Maybe -import qualified Crypto.Hash as Crypto (hash) - import System.FilePath (dropDrive) import Data.Time.Clock (diffUTCTime) @@ -25,6 +23,8 @@ import Data.Char (chr) import Database.Persist.Sql (transactionUndo) +import Data.Bitraversable + instance Arbitrary (FileReferenceResidual PersonalisedSheetFile) where arbitrary = PersonalisedSheetFileResidual @@ -59,7 +59,7 @@ spec = withApp . describe "Personalised sheet file zip encoding" $ do lift (insertUnique user) >>= maybe userLoop return in userLoop let res = res' { personalisedSheetFileResidualSheet = shid, personalisedSheetFileResidualUser = uid } - fRef <- lift (sinkFile f :: DB FileReference) + fRef <- lift (sinkFile (transFile generalize f) :: DB FileReference) now <- liftIO getCurrentTime void . lift . insert $ CourseParticipant cid (personalisedSheetFileResidualUser res) now Nothing CourseParticipantActive void . lift . insert $ _FileReference # (fRef, res) @@ -68,30 +68,38 @@ spec = withApp . describe "Personalised sheet file zip encoding" $ do anonMode <- liftIO $ generate arbitrary let - fpL :: Lens' (Either PersonalisedSheetFile File) FilePath + fpL :: forall m. Lens' (Either PersonalisedSheetFile (File m)) FilePath fpL = lens (either personalisedSheetFileTitle fileTitle) $ \f' path -> case f' of Left pf -> Left pf { personalisedSheetFileTitle = path } Right f -> Right f { fileTitle = path } isDirectory = either (is _Nothing . personalisedSheetFileContent) (is _Nothing . fileContent) + loadFile :: Either (PersonalisedSheetFileUnresolved (Either PersonalisedSheetFile DBFile)) (Either PersonalisedSheetFile DBFile, FileReferenceResidual PersonalisedSheetFile) + -> DB (Either (PersonalisedSheetFileUnresolved (Either PersonalisedSheetFile PureFile)) (Either PersonalisedSheetFile PureFile, FileReferenceResidual PersonalisedSheetFile)) + loadFile = bitraverse loadUnresolved loadResolved + where + loadUnresolved = traverse $ traverse toPureFile + loadResolved (f, fRes) = (, fRes) <$> traverse toPureFile f + recoveredFiles <- runConduit $ sourcePersonalisedSheetFiles cid (Just shid) Nothing anonMode .| resolvePersonalisedSheetFiles fpL isDirectory cid shid + .| C.mapM loadFile .| C.foldMap pure let - checkFile :: Either (PersonalisedSheetFileUnresolved (Either PersonalisedSheetFile File)) (Either PersonalisedSheetFile File, FileReferenceResidual PersonalisedSheetFile) - -> (File, FileReferenceResidual PersonalisedSheetFile) + checkFile :: Either (PersonalisedSheetFileUnresolved (Either PersonalisedSheetFile PureFile)) (Either PersonalisedSheetFile PureFile, FileReferenceResidual PersonalisedSheetFile) + -> (PureFile, FileReferenceResidual PersonalisedSheetFile) -> Bool checkFile (Left _) _ = False checkFile (Right (recFile, recResidual)) (file, residual) = recResidual == residual && case recFile of - Right f -> file == f - Left pf -> dropDrive (fileTitle file) == dropDrive (personalisedSheetFileTitle pf) - && abs (fileModified file `diffUTCTime` personalisedSheetFileModified pf) < 1e-6 -- Precision is a PostgreSQL limitation - && fmap Crypto.hash (fileContent file) == personalisedSheetFileContent pf + Right f -> f == file + Left pf -> dropDrive (fileTitle file) == dropDrive (personalisedSheetFileTitle pf) + && abs (fileModified file `diffUTCTime` personalisedSheetFileModified pf) < 1e-6 -- Precision is a PostgreSQL limitation + && fileReferenceContent (pureFileToFileReference file) == personalisedSheetFileContent pf errors = go [] sheetFiles recoveredFiles where go acc xs [] = reverse acc ++ map Left xs diff --git a/test/Handler/Utils/RatingSpec.hs b/test/Handler/Utils/RatingSpec.hs index 23b674f57..64b3c6f8f 100644 --- a/test/Handler/Utils/RatingSpec.hs +++ b/test/Handler/Utils/RatingSpec.hs @@ -15,6 +15,8 @@ import Text.Shakespeare.I18N (renderMessage) import Utils.Lens (_ratingValues, _ratingPoints) +import qualified Data.Conduit.Combinators as C + spec :: Spec spec = describe "Rating file parsing/pretty-printing" $ do @@ -33,7 +35,7 @@ spec = describe "Rating file parsing/pretty-printing" $ do mr' = MsgRenderer $ renderMessage (error "foundation inspected" :: site) [] parseRating' :: LBS.ByteString -> Maybe Rating' - parseRating' = either (\(_ :: SomeException) -> Nothing) (Just . fst) . parseRating . flip (File "bewertung.txt") time . Just . LBS.toStrict + parseRating' = either (\(_ :: SomeException) -> Nothing) (Just . fst) . parseRating . flip (File "bewertung.txt") time . Just . C.sourceLazy time = UTCTime systemEpochDay 0 mRating rating = rating { ratingValues = mRating' rating $ ratingValues rating } diff --git a/test/Handler/Utils/ZipSpec.hs b/test/Handler/Utils/ZipSpec.hs index 667edad18..798cd049d 100644 --- a/test/Handler/Utils/ZipSpec.hs +++ b/test/Handler/Utils/ZipSpec.hs @@ -1,11 +1,14 @@ +{-# OPTIONS_GHC -Wno-error=deprecations #-} + module Handler.Utils.ZipSpec where import TestImport +import Utils (()) import Handler.Utils.Zip import Data.Conduit -import qualified Data.Conduit.List as Conduit +import qualified Data.Conduit.Combinators as C import Data.List (dropWhileEnd) @@ -14,16 +17,63 @@ import ModelSpec () import System.FilePath import Data.Time +import Data.Universe + + +data ZipConsumptionStrategy + = ZipConsumeInterleaved + | ZipConsumeBuffered + deriving (Eq, Ord, Enum, Bounded, Read, Show, Generic, Typeable) + deriving anyclass (Universe, Finite) + +data ZipProductionStrategy + = ZipProduceInterleaved + | ZipProduceBuffered + deriving (Eq, Ord, Enum, Bounded, Read, Show, Generic, Typeable) + deriving anyclass (Universe, Finite) + + spec :: Spec spec = describe "Zip file handling" $ do - it "has compatible encoding/decoding to/from zip files" . property $ do - zipFiles <- listOf $ scale (`div` 2) arbitrary - return . property $ do - zipFiles' <- runConduit $ Conduit.sourceList zipFiles .| produceZip def .| void consumeZip .| Conduit.consume - forM_ (zipFiles `zip` zipFiles') $ \(file, file') -> do - let acceptableFilenameChanges - = makeValid . dropWhile isPathSeparator . bool (dropWhileEnd isPathSeparator) addTrailingPathSeparator (isNothing $ fileContent file) . normalise . makeValid + describe "has compatible encoding to, and decoding from zip files" . forM_ universeF $ \strategy -> + modifyMaxSuccess (bool id (37 *) $ strategy == (ZipProduceInterleaved, ZipConsumeInterleaved)) . it (show strategy) . property $ do + zipFiles <- listOf arbitrary :: Gen [PureFile] + return . property $ do + + let zipProduceBuffered + = evaluate . force <=< runConduitRes $ zipProduceInterleaved .| C.sinkLazy + zipProduceInterleaved + = C.yieldMany zipFiles .| C.map fromPureFile .| produceZip def + zipConsumeBuffered zipProd + = mapM toPureFile <=< runConduitRes $ void (consumeZip zipProd) .| C.foldMap pure + zipConsumeInterleaved zipProd + = void (consumeZip zipProd) .| C.mapM toPureFile .| C.foldMap pure + zipFiles' <- case strategy of + (ZipProduceBuffered, ZipConsumeInterleaved) -> + runConduitRes . zipConsumeInterleaved . C.sourceLazy =<< zipProduceBuffered + (ZipProduceBuffered, ZipConsumeBuffered) -> + zipConsumeBuffered . C.sourceLazy =<< zipProduceBuffered + (ZipProduceInterleaved, ZipConsumeInterleaved) -> + runConduitRes $ zipConsumeInterleaved zipProduceInterleaved + (ZipProduceInterleaved, ZipConsumeBuffered) -> + zipConsumeBuffered zipProduceInterleaved + + let acceptableFilenameChanges file + = "." fileTitle file + & normalise + & makeValid + & dropWhile isPathSeparator + & dropWhileEnd isPathSeparator + & bool id addTrailingPathSeparator (isNothing $ fileContent file) + & normalise + & makeValid acceptableTimeDifference t1 t2 = abs (diffUTCTime t1 t2) <= 2 - (shouldBe `on` acceptableFilenameChanges) (fileTitle file') (fileTitle file) - (fileModified file', fileModified file) `shouldSatisfy` uncurry acceptableTimeDifference - (fileContent file') `shouldBe` (fileContent file) + + forM_ (zipFiles `zip` zipFiles') $ \(file, file') -> do + when (((/=) `on` acceptableFilenameChanges) file' file) $ do + traceM $ show (fileTitle file, fileTitle file') + traceM $ show (acceptableFilenameChanges file, acceptableFilenameChanges file') + traceM $ show (isNothing $ fileContent file, isNothing $ fileContent file') + (shouldBe `on` acceptableFilenameChanges) file' file + (fileModified file', fileModified file) `shouldSatisfy` uncurry acceptableTimeDifference + (view _pureFileContent file' :: Maybe ByteString) `shouldBe` (view _pureFileContent file) diff --git a/test/ModelSpec.hs b/test/ModelSpec.hs index 588c4a532..1e7dd6a8a 100644 --- a/test/ModelSpec.hs +++ b/test/ModelSpec.hs @@ -34,6 +34,10 @@ import Control.Monad.Catch.Pure (Catch, runCatch) import System.IO.Unsafe (unsafePerformIO) +import qualified Data.Conduit.Combinators as C + +import Data.Ratio ((%)) + instance Arbitrary EmailAddress where arbitrary = do @@ -137,12 +141,18 @@ instance Arbitrary User where return User{..} shrink = genericShrink -instance Arbitrary File where +scaleRatio :: Rational -> Int -> Int +scaleRatio r = ceiling . (* r) . fromIntegral + +instance Monad m => Arbitrary (File m) where arbitrary = do - fileTitle <- scale (`div` 2) $ (joinPath <$> arbitrary) `suchThat` (any $ not . isPathSeparator) + fileTitle <- scale (scaleRatio $ 1 % 8) $ (joinPath <$> arbitrary) `suchThat` (any $ not . isPathSeparator) date <- addDays <$> arbitrary <*> pure (fromGregorian 2043 7 2) fileModified <- (addUTCTime <$> arbitrary <*> pure (UTCTime date 0)) `suchThat` inZipRange - fileContent <- arbitrary + fileContent <- oneof + [ pure Nothing + , Just . C.sourceLazy <$> scale (scaleRatio $ 7 % 8) arbitrary + ] return File{..} where inZipRange :: UTCTime -> Bool @@ -152,7 +162,6 @@ instance Arbitrary File where = True | otherwise = False - shrink = genericShrink instance Arbitrary School where arbitrary = do @@ -177,8 +186,8 @@ spec = do parallel $ do lawsCheckHspec (Proxy @User) [ eqLaws, jsonLaws ] - lawsCheckHspec (Proxy @File) - [ eqLaws ] + lawsCheckHspec (Proxy @PureFile) + [ eqLaws, ordLaws ] lawsCheckHspec (Proxy @School) [ eqLaws ] lawsCheckHspec (Proxy @Term) diff --git a/test/TestImport.hs b/test/TestImport.hs index d4cc61787..510b1e9c0 100644 --- a/test/TestImport.hs +++ b/test/TestImport.hs @@ -22,6 +22,7 @@ import Yesod.Auth as X import Yesod.Test as X import Yesod.Core.Unsafe (fakeHandlerGetLogger) import Test.QuickCheck as X +import Test.Hspec.QuickCheck as X hiding (prop) import Test.QuickCheck.Gen as X import Data.Default as X import Test.QuickCheck.Instances as X () @@ -65,6 +66,8 @@ import Handler.Utils (runAppLoggingT) import Web.PathPieces (toPathPiece) import Utils.Parameters (GlobalPostParam(PostLoginDummy)) +import Control.Monad.Morph as X (generalize) + runDB :: SqlPersistM a -> YesodExample UniWorX a runDB query = do