fix(csv): ignore empty lines
This commit is contained in:
parent
946a42b7f0
commit
211ff5eacc
@ -43,14 +43,43 @@ import Data.Time.Format (iso8601DateFormat)
|
||||
|
||||
import qualified Data.Char as Char
|
||||
|
||||
import Control.Monad.Error.Class (MonadError(..))
|
||||
|
||||
|
||||
|
||||
_haltingCsvParseError :: Prism' CsvParseError CsvStreamHaltParseError
|
||||
_haltingCsvParseError = prism' (\(HaltingCsvParseError bs t) -> CsvParseError bs t) $ \case
|
||||
CsvParseError bs t -> Just $ HaltingCsvParseError bs t
|
||||
_other -> Nothing
|
||||
|
||||
_csvStreamRecordParseError :: Prism' CsvParseError CsvStreamRecordParseError
|
||||
_csvStreamRecordParseError = prism' (\(CsvStreamRecordParseError t) -> IncrementalError t) $ \case
|
||||
IncrementalError t -> Just $ CsvStreamRecordParseError t
|
||||
_other -> Nothing
|
||||
|
||||
throwIncrementalErrors :: MonadError CsvParseError m => ConduitT (Either CsvStreamRecordParseError a) a m ()
|
||||
throwIncrementalErrors = C.mapM $ either (throwError . review _csvStreamRecordParseError) return
|
||||
|
||||
|
||||
newtype MaybeEmptyRecord csv = MaybeEmptyRecord { unMaybeEmptyRecord :: Maybe csv }
|
||||
|
||||
instance FromNamedRecord csv => FromNamedRecord (MaybeEmptyRecord csv) where
|
||||
parseNamedRecord r
|
||||
| all null r = pure $ MaybeEmptyRecord Nothing
|
||||
| otherwise = MaybeEmptyRecord . Just <$> parseNamedRecord r
|
||||
instance FromRecord csv => FromRecord (MaybeEmptyRecord csv) where
|
||||
parseRecord r
|
||||
| all null r = pure $ MaybeEmptyRecord Nothing
|
||||
| otherwise = MaybeEmptyRecord . Just <$> parseRecord r
|
||||
|
||||
decodeCsv :: (MonadHandler m, HandlerSite m ~ UniWorX, MonadThrow m, FromNamedRecord csv) => ConduitT ByteString csv m ()
|
||||
decodeCsv = decodeCsv' fromNamedCsv
|
||||
decodeCsv = decodeCsv' $ \opts -> fromNamedCsvStreamError opts (review _haltingCsvParseError) .| throwIncrementalErrors
|
||||
|
||||
decodeCsvPositional :: (MonadHandler m, HandlerSite m ~ UniWorX, MonadThrow m, FromRecord csv) => HasHeader -> ConduitT ByteString csv m ()
|
||||
decodeCsvPositional hdr = decodeCsv' (`fromCsv` hdr)
|
||||
decodeCsvPositional hdr = decodeCsv' $ \opts -> fromCsvStreamError opts hdr (review _haltingCsvParseError) .| throwIncrementalErrors
|
||||
|
||||
decodeCsv' :: (MonadHandler m, HandlerSite m ~ UniWorX, MonadThrow m) => (forall m'. Monad m' => DecodeOptions -> ConduitT ByteString csv (ExceptT CsvParseError m') ()) -> ConduitT ByteString csv m ()
|
||||
|
||||
decodeCsv' :: (MonadHandler m, HandlerSite m ~ UniWorX, MonadThrow m) => (forall m'. Monad m' => DecodeOptions -> ConduitT ByteString (MaybeEmptyRecord csv) (ExceptT CsvParseError m') ()) -> ConduitT ByteString csv m ()
|
||||
decodeCsv' fromCsv' = do
|
||||
encOpts <- maybe def (userCsvOptions . entityVal) <$> lift maybeAuth
|
||||
|
||||
@ -65,7 +94,7 @@ decodeCsv' fromCsv' = do
|
||||
sourceLazy inp' .| act
|
||||
where enc = encOpts ^. _csvFormat . _csvEncoding
|
||||
|
||||
recode' decodeCsv''
|
||||
recode' decodeCsv'' .| C.mapMaybe unMaybeEmptyRecord
|
||||
where
|
||||
decodeCsv'' = transPipe throwExceptT $ do
|
||||
testBuffer <- accumTestBuffer LBS.empty
|
||||
@ -73,7 +102,6 @@ decodeCsv' fromCsv' = do
|
||||
|
||||
let decodeOptions = defaultDecodeOptions
|
||||
& guessDelimiter testBuffer
|
||||
& noAlphaNumDelimiters
|
||||
$logInfoS "decodeCsv" [st|Guessed Csv.DecodeOptions from buffer of size #{tshow (LBS.length testBuffer)}/#{tshow testBufferSize}: #{tshow decodeOptions}|]
|
||||
|
||||
fromCsv' decodeOptions
|
||||
@ -101,7 +129,7 @@ decodeCsv' fromCsv' = do
|
||||
| otherwise
|
||||
-> id -- Parsing of something, which should be a quoted field, failed; bail now
|
||||
| A.Done _ ls <- A.parse (A.many1 $ A.manyTill A.anyWord8 endOfLine) testBuffer
|
||||
, (h:hs) <- filter (not . Map.null) $ map (fmap getSum . Map.unionsWith mappend . map (flip Map.singleton $ Sum 1)) ls
|
||||
, (h:hs) <- filter (not . Map.null) $ map (fmap getSum . Map.unionsWith mappend . map (flip Map.singleton $ Sum 1) . filter (not . isAlphaNum')) ls
|
||||
, Just equals <- fromNullable $ Map.filterWithKey (\c n -> all ((== Just n) . Map.lookup c) hs) h
|
||||
, let maxH = maximum equals
|
||||
, [d] <- filter ((== Just maxH) . flip Map.lookup (toNullable equals)) . Map.keys $ toNullable equals
|
||||
@ -109,12 +137,7 @@ decodeCsv' fromCsv' = do
|
||||
| otherwise
|
||||
= id
|
||||
|
||||
noAlphaNumDelimiters opts
|
||||
| Char.isAlphaNum . Char.chr . fromIntegral $ decDelimiter opts
|
||||
= opts { decDelimiter = decDelimiter defaultDecodeOptions }
|
||||
| otherwise
|
||||
= opts
|
||||
|
||||
isAlphaNum' = Char.isAlphaNum . Char.chr . fromIntegral
|
||||
|
||||
quotedField :: A.Parser () -- We don't care about the return value
|
||||
quotedField = void . Csv.field $ Csv.decDelimiter defaultDecodeOptions -- We can use comma as a separator, because we know that the field we're trying to parse is quoted and so does not rely on the delimiter
|
||||
|
||||
@ -282,12 +282,12 @@ pathPieceJSONKey ''ExamGrade
|
||||
instance Csv.ToField ExamGrade where
|
||||
toField = Csv.toField . toPathPiece
|
||||
instance Csv.FromField ExamGrade where
|
||||
parseField x = asum
|
||||
[ parse =<< Csv.parseField x
|
||||
, parse . Text.replace "," "." =<< Csv.parseField x -- Ugh.
|
||||
]
|
||||
parseField x
|
||||
= (parse =<< Csv.parseField x)
|
||||
<<|> (parse . Text.replace "," "." =<< Csv.parseField x) -- Ugh.
|
||||
where parse :: Text -> Csv.Parser ExamGrade
|
||||
parse = maybe (fail "Could not decode PathPiece") return . fromPathPiece
|
||||
parse = maybe (fail "Could not decode ExamGrade from Text") return . fromPathPiece
|
||||
a <<|> b = a <|> b <|> a
|
||||
|
||||
instance PersistField ExamGrade where
|
||||
toPersistValue = PersistRational . review numberGrade
|
||||
|
||||
Loading…
Reference in New Issue
Block a user