fix(csv): ignore empty lines

This commit is contained in:
Gregor Kleen 2020-11-26 14:59:50 +01:00
parent 946a42b7f0
commit 211ff5eacc
2 changed files with 40 additions and 17 deletions

View File

@ -43,14 +43,43 @@ import Data.Time.Format (iso8601DateFormat)
import qualified Data.Char as Char
import Control.Monad.Error.Class (MonadError(..))
_haltingCsvParseError :: Prism' CsvParseError CsvStreamHaltParseError
_haltingCsvParseError = prism' (\(HaltingCsvParseError bs t) -> CsvParseError bs t) $ \case
CsvParseError bs t -> Just $ HaltingCsvParseError bs t
_other -> Nothing
_csvStreamRecordParseError :: Prism' CsvParseError CsvStreamRecordParseError
_csvStreamRecordParseError = prism' (\(CsvStreamRecordParseError t) -> IncrementalError t) $ \case
IncrementalError t -> Just $ CsvStreamRecordParseError t
_other -> Nothing
throwIncrementalErrors :: MonadError CsvParseError m => ConduitT (Either CsvStreamRecordParseError a) a m ()
throwIncrementalErrors = C.mapM $ either (throwError . review _csvStreamRecordParseError) return
newtype MaybeEmptyRecord csv = MaybeEmptyRecord { unMaybeEmptyRecord :: Maybe csv }
instance FromNamedRecord csv => FromNamedRecord (MaybeEmptyRecord csv) where
parseNamedRecord r
| all null r = pure $ MaybeEmptyRecord Nothing
| otherwise = MaybeEmptyRecord . Just <$> parseNamedRecord r
instance FromRecord csv => FromRecord (MaybeEmptyRecord csv) where
parseRecord r
| all null r = pure $ MaybeEmptyRecord Nothing
| otherwise = MaybeEmptyRecord . Just <$> parseRecord r
decodeCsv :: (MonadHandler m, HandlerSite m ~ UniWorX, MonadThrow m, FromNamedRecord csv) => ConduitT ByteString csv m ()
decodeCsv = decodeCsv' fromNamedCsv
decodeCsv = decodeCsv' $ \opts -> fromNamedCsvStreamError opts (review _haltingCsvParseError) .| throwIncrementalErrors
decodeCsvPositional :: (MonadHandler m, HandlerSite m ~ UniWorX, MonadThrow m, FromRecord csv) => HasHeader -> ConduitT ByteString csv m ()
decodeCsvPositional hdr = decodeCsv' (`fromCsv` hdr)
decodeCsvPositional hdr = decodeCsv' $ \opts -> fromCsvStreamError opts hdr (review _haltingCsvParseError) .| throwIncrementalErrors
decodeCsv' :: (MonadHandler m, HandlerSite m ~ UniWorX, MonadThrow m) => (forall m'. Monad m' => DecodeOptions -> ConduitT ByteString csv (ExceptT CsvParseError m') ()) -> ConduitT ByteString csv m ()
decodeCsv' :: (MonadHandler m, HandlerSite m ~ UniWorX, MonadThrow m) => (forall m'. Monad m' => DecodeOptions -> ConduitT ByteString (MaybeEmptyRecord csv) (ExceptT CsvParseError m') ()) -> ConduitT ByteString csv m ()
decodeCsv' fromCsv' = do
encOpts <- maybe def (userCsvOptions . entityVal) <$> lift maybeAuth
@ -65,7 +94,7 @@ decodeCsv' fromCsv' = do
sourceLazy inp' .| act
where enc = encOpts ^. _csvFormat . _csvEncoding
recode' decodeCsv''
recode' decodeCsv'' .| C.mapMaybe unMaybeEmptyRecord
where
decodeCsv'' = transPipe throwExceptT $ do
testBuffer <- accumTestBuffer LBS.empty
@ -73,7 +102,6 @@ decodeCsv' fromCsv' = do
let decodeOptions = defaultDecodeOptions
& guessDelimiter testBuffer
& noAlphaNumDelimiters
$logInfoS "decodeCsv" [st|Guessed Csv.DecodeOptions from buffer of size #{tshow (LBS.length testBuffer)}/#{tshow testBufferSize}: #{tshow decodeOptions}|]
fromCsv' decodeOptions
@ -101,7 +129,7 @@ decodeCsv' fromCsv' = do
| otherwise
-> id -- Parsing of something, which should be a quoted field, failed; bail now
| A.Done _ ls <- A.parse (A.many1 $ A.manyTill A.anyWord8 endOfLine) testBuffer
, (h:hs) <- filter (not . Map.null) $ map (fmap getSum . Map.unionsWith mappend . map (flip Map.singleton $ Sum 1)) ls
, (h:hs) <- filter (not . Map.null) $ map (fmap getSum . Map.unionsWith mappend . map (flip Map.singleton $ Sum 1) . filter (not . isAlphaNum')) ls
, Just equals <- fromNullable $ Map.filterWithKey (\c n -> all ((== Just n) . Map.lookup c) hs) h
, let maxH = maximum equals
, [d] <- filter ((== Just maxH) . flip Map.lookup (toNullable equals)) . Map.keys $ toNullable equals
@ -109,12 +137,7 @@ decodeCsv' fromCsv' = do
| otherwise
= id
noAlphaNumDelimiters opts
| Char.isAlphaNum . Char.chr . fromIntegral $ decDelimiter opts
= opts { decDelimiter = decDelimiter defaultDecodeOptions }
| otherwise
= opts
isAlphaNum' = Char.isAlphaNum . Char.chr . fromIntegral
quotedField :: A.Parser () -- We don't care about the return value
quotedField = void . Csv.field $ Csv.decDelimiter defaultDecodeOptions -- We can use comma as a separator, because we know that the field we're trying to parse is quoted and so does not rely on the delimiter

View File

@ -282,12 +282,12 @@ pathPieceJSONKey ''ExamGrade
instance Csv.ToField ExamGrade where
toField = Csv.toField . toPathPiece
instance Csv.FromField ExamGrade where
parseField x = asum
[ parse =<< Csv.parseField x
, parse . Text.replace "," "." =<< Csv.parseField x -- Ugh.
]
parseField x
= (parse =<< Csv.parseField x)
<<|> (parse . Text.replace "," "." =<< Csv.parseField x) -- Ugh.
where parse :: Text -> Csv.Parser ExamGrade
parse = maybe (fail "Could not decode PathPiece") return . fromPathPiece
parse = maybe (fail "Could not decode ExamGrade from Text") return . fromPathPiece
a <<|> b = a <|> b <|> a
instance PersistField ExamGrade where
toPersistValue = PersistRational . review numberGrade