fix(csv): ignore empty lines
This commit is contained in:
parent
946a42b7f0
commit
211ff5eacc
@ -43,14 +43,43 @@ import Data.Time.Format (iso8601DateFormat)
|
|||||||
|
|
||||||
import qualified Data.Char as Char
|
import qualified Data.Char as Char
|
||||||
|
|
||||||
|
import Control.Monad.Error.Class (MonadError(..))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
_haltingCsvParseError :: Prism' CsvParseError CsvStreamHaltParseError
|
||||||
|
_haltingCsvParseError = prism' (\(HaltingCsvParseError bs t) -> CsvParseError bs t) $ \case
|
||||||
|
CsvParseError bs t -> Just $ HaltingCsvParseError bs t
|
||||||
|
_other -> Nothing
|
||||||
|
|
||||||
|
_csvStreamRecordParseError :: Prism' CsvParseError CsvStreamRecordParseError
|
||||||
|
_csvStreamRecordParseError = prism' (\(CsvStreamRecordParseError t) -> IncrementalError t) $ \case
|
||||||
|
IncrementalError t -> Just $ CsvStreamRecordParseError t
|
||||||
|
_other -> Nothing
|
||||||
|
|
||||||
|
throwIncrementalErrors :: MonadError CsvParseError m => ConduitT (Either CsvStreamRecordParseError a) a m ()
|
||||||
|
throwIncrementalErrors = C.mapM $ either (throwError . review _csvStreamRecordParseError) return
|
||||||
|
|
||||||
|
|
||||||
|
newtype MaybeEmptyRecord csv = MaybeEmptyRecord { unMaybeEmptyRecord :: Maybe csv }
|
||||||
|
|
||||||
|
instance FromNamedRecord csv => FromNamedRecord (MaybeEmptyRecord csv) where
|
||||||
|
parseNamedRecord r
|
||||||
|
| all null r = pure $ MaybeEmptyRecord Nothing
|
||||||
|
| otherwise = MaybeEmptyRecord . Just <$> parseNamedRecord r
|
||||||
|
instance FromRecord csv => FromRecord (MaybeEmptyRecord csv) where
|
||||||
|
parseRecord r
|
||||||
|
| all null r = pure $ MaybeEmptyRecord Nothing
|
||||||
|
| otherwise = MaybeEmptyRecord . Just <$> parseRecord r
|
||||||
|
|
||||||
decodeCsv :: (MonadHandler m, HandlerSite m ~ UniWorX, MonadThrow m, FromNamedRecord csv) => ConduitT ByteString csv m ()
|
decodeCsv :: (MonadHandler m, HandlerSite m ~ UniWorX, MonadThrow m, FromNamedRecord csv) => ConduitT ByteString csv m ()
|
||||||
decodeCsv = decodeCsv' fromNamedCsv
|
decodeCsv = decodeCsv' $ \opts -> fromNamedCsvStreamError opts (review _haltingCsvParseError) .| throwIncrementalErrors
|
||||||
|
|
||||||
decodeCsvPositional :: (MonadHandler m, HandlerSite m ~ UniWorX, MonadThrow m, FromRecord csv) => HasHeader -> ConduitT ByteString csv m ()
|
decodeCsvPositional :: (MonadHandler m, HandlerSite m ~ UniWorX, MonadThrow m, FromRecord csv) => HasHeader -> ConduitT ByteString csv m ()
|
||||||
decodeCsvPositional hdr = decodeCsv' (`fromCsv` hdr)
|
decodeCsvPositional hdr = decodeCsv' $ \opts -> fromCsvStreamError opts hdr (review _haltingCsvParseError) .| throwIncrementalErrors
|
||||||
|
|
||||||
decodeCsv' :: (MonadHandler m, HandlerSite m ~ UniWorX, MonadThrow m) => (forall m'. Monad m' => DecodeOptions -> ConduitT ByteString csv (ExceptT CsvParseError m') ()) -> ConduitT ByteString csv m ()
|
|
||||||
|
decodeCsv' :: (MonadHandler m, HandlerSite m ~ UniWorX, MonadThrow m) => (forall m'. Monad m' => DecodeOptions -> ConduitT ByteString (MaybeEmptyRecord csv) (ExceptT CsvParseError m') ()) -> ConduitT ByteString csv m ()
|
||||||
decodeCsv' fromCsv' = do
|
decodeCsv' fromCsv' = do
|
||||||
encOpts <- maybe def (userCsvOptions . entityVal) <$> lift maybeAuth
|
encOpts <- maybe def (userCsvOptions . entityVal) <$> lift maybeAuth
|
||||||
|
|
||||||
@ -65,7 +94,7 @@ decodeCsv' fromCsv' = do
|
|||||||
sourceLazy inp' .| act
|
sourceLazy inp' .| act
|
||||||
where enc = encOpts ^. _csvFormat . _csvEncoding
|
where enc = encOpts ^. _csvFormat . _csvEncoding
|
||||||
|
|
||||||
recode' decodeCsv''
|
recode' decodeCsv'' .| C.mapMaybe unMaybeEmptyRecord
|
||||||
where
|
where
|
||||||
decodeCsv'' = transPipe throwExceptT $ do
|
decodeCsv'' = transPipe throwExceptT $ do
|
||||||
testBuffer <- accumTestBuffer LBS.empty
|
testBuffer <- accumTestBuffer LBS.empty
|
||||||
@ -73,7 +102,6 @@ decodeCsv' fromCsv' = do
|
|||||||
|
|
||||||
let decodeOptions = defaultDecodeOptions
|
let decodeOptions = defaultDecodeOptions
|
||||||
& guessDelimiter testBuffer
|
& guessDelimiter testBuffer
|
||||||
& noAlphaNumDelimiters
|
|
||||||
$logInfoS "decodeCsv" [st|Guessed Csv.DecodeOptions from buffer of size #{tshow (LBS.length testBuffer)}/#{tshow testBufferSize}: #{tshow decodeOptions}|]
|
$logInfoS "decodeCsv" [st|Guessed Csv.DecodeOptions from buffer of size #{tshow (LBS.length testBuffer)}/#{tshow testBufferSize}: #{tshow decodeOptions}|]
|
||||||
|
|
||||||
fromCsv' decodeOptions
|
fromCsv' decodeOptions
|
||||||
@ -101,7 +129,7 @@ decodeCsv' fromCsv' = do
|
|||||||
| otherwise
|
| otherwise
|
||||||
-> id -- Parsing of something, which should be a quoted field, failed; bail now
|
-> id -- Parsing of something, which should be a quoted field, failed; bail now
|
||||||
| A.Done _ ls <- A.parse (A.many1 $ A.manyTill A.anyWord8 endOfLine) testBuffer
|
| A.Done _ ls <- A.parse (A.many1 $ A.manyTill A.anyWord8 endOfLine) testBuffer
|
||||||
, (h:hs) <- filter (not . Map.null) $ map (fmap getSum . Map.unionsWith mappend . map (flip Map.singleton $ Sum 1)) ls
|
, (h:hs) <- filter (not . Map.null) $ map (fmap getSum . Map.unionsWith mappend . map (flip Map.singleton $ Sum 1) . filter (not . isAlphaNum')) ls
|
||||||
, Just equals <- fromNullable $ Map.filterWithKey (\c n -> all ((== Just n) . Map.lookup c) hs) h
|
, Just equals <- fromNullable $ Map.filterWithKey (\c n -> all ((== Just n) . Map.lookup c) hs) h
|
||||||
, let maxH = maximum equals
|
, let maxH = maximum equals
|
||||||
, [d] <- filter ((== Just maxH) . flip Map.lookup (toNullable equals)) . Map.keys $ toNullable equals
|
, [d] <- filter ((== Just maxH) . flip Map.lookup (toNullable equals)) . Map.keys $ toNullable equals
|
||||||
@ -109,12 +137,7 @@ decodeCsv' fromCsv' = do
|
|||||||
| otherwise
|
| otherwise
|
||||||
= id
|
= id
|
||||||
|
|
||||||
noAlphaNumDelimiters opts
|
isAlphaNum' = Char.isAlphaNum . Char.chr . fromIntegral
|
||||||
| Char.isAlphaNum . Char.chr . fromIntegral $ decDelimiter opts
|
|
||||||
= opts { decDelimiter = decDelimiter defaultDecodeOptions }
|
|
||||||
| otherwise
|
|
||||||
= opts
|
|
||||||
|
|
||||||
|
|
||||||
quotedField :: A.Parser () -- We don't care about the return value
|
quotedField :: A.Parser () -- We don't care about the return value
|
||||||
quotedField = void . Csv.field $ Csv.decDelimiter defaultDecodeOptions -- We can use comma as a separator, because we know that the field we're trying to parse is quoted and so does not rely on the delimiter
|
quotedField = void . Csv.field $ Csv.decDelimiter defaultDecodeOptions -- We can use comma as a separator, because we know that the field we're trying to parse is quoted and so does not rely on the delimiter
|
||||||
|
|||||||
@ -282,12 +282,12 @@ pathPieceJSONKey ''ExamGrade
|
|||||||
instance Csv.ToField ExamGrade where
|
instance Csv.ToField ExamGrade where
|
||||||
toField = Csv.toField . toPathPiece
|
toField = Csv.toField . toPathPiece
|
||||||
instance Csv.FromField ExamGrade where
|
instance Csv.FromField ExamGrade where
|
||||||
parseField x = asum
|
parseField x
|
||||||
[ parse =<< Csv.parseField x
|
= (parse =<< Csv.parseField x)
|
||||||
, parse . Text.replace "," "." =<< Csv.parseField x -- Ugh.
|
<<|> (parse . Text.replace "," "." =<< Csv.parseField x) -- Ugh.
|
||||||
]
|
|
||||||
where parse :: Text -> Csv.Parser ExamGrade
|
where parse :: Text -> Csv.Parser ExamGrade
|
||||||
parse = maybe (fail "Could not decode PathPiece") return . fromPathPiece
|
parse = maybe (fail "Could not decode ExamGrade from Text") return . fromPathPiece
|
||||||
|
a <<|> b = a <|> b <|> a
|
||||||
|
|
||||||
instance PersistField ExamGrade where
|
instance PersistField ExamGrade where
|
||||||
toPersistValue = PersistRational . review numberGrade
|
toPersistValue = PersistRational . review numberGrade
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user