From 211ff5eacc83bb47e564dd88e11bc18ae7e0a6af Mon Sep 17 00:00:00 2001 From: Gregor Kleen Date: Thu, 26 Nov 2020 14:59:50 +0100 Subject: [PATCH] fix(csv): ignore empty lines --- src/Handler/Utils/Csv.hs | 47 ++++++++++++++++++++++++++++++---------- src/Model/Types/Exam.hs | 10 ++++----- 2 files changed, 40 insertions(+), 17 deletions(-) diff --git a/src/Handler/Utils/Csv.hs b/src/Handler/Utils/Csv.hs index 2a38b6b7f..cf090e171 100644 --- a/src/Handler/Utils/Csv.hs +++ b/src/Handler/Utils/Csv.hs @@ -43,14 +43,43 @@ import Data.Time.Format (iso8601DateFormat) import qualified Data.Char as Char +import Control.Monad.Error.Class (MonadError(..)) + + +_haltingCsvParseError :: Prism' CsvParseError CsvStreamHaltParseError +_haltingCsvParseError = prism' (\(HaltingCsvParseError bs t) -> CsvParseError bs t) $ \case + CsvParseError bs t -> Just $ HaltingCsvParseError bs t + _other -> Nothing + +_csvStreamRecordParseError :: Prism' CsvParseError CsvStreamRecordParseError +_csvStreamRecordParseError = prism' (\(CsvStreamRecordParseError t) -> IncrementalError t) $ \case + IncrementalError t -> Just $ CsvStreamRecordParseError t + _other -> Nothing + +throwIncrementalErrors :: MonadError CsvParseError m => ConduitT (Either CsvStreamRecordParseError a) a m () +throwIncrementalErrors = C.mapM $ either (throwError . review _csvStreamRecordParseError) return + + +newtype MaybeEmptyRecord csv = MaybeEmptyRecord { unMaybeEmptyRecord :: Maybe csv } + +instance FromNamedRecord csv => FromNamedRecord (MaybeEmptyRecord csv) where + parseNamedRecord r + | all null r = pure $ MaybeEmptyRecord Nothing + | otherwise = MaybeEmptyRecord . Just <$> parseNamedRecord r +instance FromRecord csv => FromRecord (MaybeEmptyRecord csv) where + parseRecord r + | all null r = pure $ MaybeEmptyRecord Nothing + | otherwise = MaybeEmptyRecord . Just <$> parseRecord r + decodeCsv :: (MonadHandler m, HandlerSite m ~ UniWorX, MonadThrow m, FromNamedRecord csv) => ConduitT ByteString csv m () -decodeCsv = decodeCsv' fromNamedCsv +decodeCsv = decodeCsv' $ \opts -> fromNamedCsvStreamError opts (review _haltingCsvParseError) .| throwIncrementalErrors decodeCsvPositional :: (MonadHandler m, HandlerSite m ~ UniWorX, MonadThrow m, FromRecord csv) => HasHeader -> ConduitT ByteString csv m () -decodeCsvPositional hdr = decodeCsv' (`fromCsv` hdr) +decodeCsvPositional hdr = decodeCsv' $ \opts -> fromCsvStreamError opts hdr (review _haltingCsvParseError) .| throwIncrementalErrors -decodeCsv' :: (MonadHandler m, HandlerSite m ~ UniWorX, MonadThrow m) => (forall m'. Monad m' => DecodeOptions -> ConduitT ByteString csv (ExceptT CsvParseError m') ()) -> ConduitT ByteString csv m () + +decodeCsv' :: (MonadHandler m, HandlerSite m ~ UniWorX, MonadThrow m) => (forall m'. Monad m' => DecodeOptions -> ConduitT ByteString (MaybeEmptyRecord csv) (ExceptT CsvParseError m') ()) -> ConduitT ByteString csv m () decodeCsv' fromCsv' = do encOpts <- maybe def (userCsvOptions . entityVal) <$> lift maybeAuth @@ -65,7 +94,7 @@ decodeCsv' fromCsv' = do sourceLazy inp' .| act where enc = encOpts ^. _csvFormat . _csvEncoding - recode' decodeCsv'' + recode' decodeCsv'' .| C.mapMaybe unMaybeEmptyRecord where decodeCsv'' = transPipe throwExceptT $ do testBuffer <- accumTestBuffer LBS.empty @@ -73,7 +102,6 @@ decodeCsv' fromCsv' = do let decodeOptions = defaultDecodeOptions & guessDelimiter testBuffer - & noAlphaNumDelimiters $logInfoS "decodeCsv" [st|Guessed Csv.DecodeOptions from buffer of size #{tshow (LBS.length testBuffer)}/#{tshow testBufferSize}: #{tshow decodeOptions}|] fromCsv' decodeOptions @@ -101,7 +129,7 @@ decodeCsv' fromCsv' = do | otherwise -> id -- Parsing of something, which should be a quoted field, failed; bail now | A.Done _ ls <- A.parse (A.many1 $ A.manyTill A.anyWord8 endOfLine) testBuffer - , (h:hs) <- filter (not . Map.null) $ map (fmap getSum . Map.unionsWith mappend . map (flip Map.singleton $ Sum 1)) ls + , (h:hs) <- filter (not . Map.null) $ map (fmap getSum . Map.unionsWith mappend . map (flip Map.singleton $ Sum 1) . filter (not . isAlphaNum')) ls , Just equals <- fromNullable $ Map.filterWithKey (\c n -> all ((== Just n) . Map.lookup c) hs) h , let maxH = maximum equals , [d] <- filter ((== Just maxH) . flip Map.lookup (toNullable equals)) . Map.keys $ toNullable equals @@ -109,12 +137,7 @@ decodeCsv' fromCsv' = do | otherwise = id - noAlphaNumDelimiters opts - | Char.isAlphaNum . Char.chr . fromIntegral $ decDelimiter opts - = opts { decDelimiter = decDelimiter defaultDecodeOptions } - | otherwise - = opts - + isAlphaNum' = Char.isAlphaNum . Char.chr . fromIntegral quotedField :: A.Parser () -- We don't care about the return value quotedField = void . Csv.field $ Csv.decDelimiter defaultDecodeOptions -- We can use comma as a separator, because we know that the field we're trying to parse is quoted and so does not rely on the delimiter diff --git a/src/Model/Types/Exam.hs b/src/Model/Types/Exam.hs index 44cd909dc..14c4de981 100644 --- a/src/Model/Types/Exam.hs +++ b/src/Model/Types/Exam.hs @@ -282,12 +282,12 @@ pathPieceJSONKey ''ExamGrade instance Csv.ToField ExamGrade where toField = Csv.toField . toPathPiece instance Csv.FromField ExamGrade where - parseField x = asum - [ parse =<< Csv.parseField x - , parse . Text.replace "," "." =<< Csv.parseField x -- Ugh. - ] + parseField x + = (parse =<< Csv.parseField x) + <<|> (parse . Text.replace "," "." =<< Csv.parseField x) -- Ugh. where parse :: Text -> Csv.Parser ExamGrade - parse = maybe (fail "Could not decode PathPiece") return . fromPathPiece + parse = maybe (fail "Could not decode ExamGrade from Text") return . fromPathPiece + a <<|> b = a <|> b <|> a instance PersistField ExamGrade where toPersistValue = PersistRational . review numberGrade