fix problem where empty cells at end of row were not recognized

This commit is contained in:
Andrew Martin 2017-06-11 23:02:08 -04:00
parent fb6064b79f
commit 83e069d1b6

View File

@ -57,7 +57,7 @@ import Control.Monad.ST
newtype Escaped c = Escaped { getEscaped :: c } newtype Escaped c = Escaped { getEscaped :: c }
data Ended = EndedYes | EndedNo data Ended = EndedYes | EndedNo
deriving (Show) deriving (Show)
data CellResult c = CellResultData !c | CellResultNewline !Ended data CellResult c = CellResultData !c | CellResultNewline !c !Ended
deriving (Show) deriving (Show)
decodeHeadedUtf8Csv :: Monad m decodeHeadedUtf8Csv :: Monad m
@ -257,12 +257,15 @@ field !delim = do
_ <- eatNewlines _ <- eatNewlines
isEnd <- A.atEnd isEnd <- A.atEnd
if isEnd if isEnd
then return (CellResultNewline EndedYes) then return (CellResultNewline B.empty EndedYes)
else return (CellResultNewline EndedNo) else return (CellResultNewline B.empty EndedNo)
| otherwise -> do | otherwise -> do
bs <- unescapedField delim (bs,tc) <- unescapedField delim
return (CellResultData bs) case tc of
Nothing -> return (CellResultNewline EndedYes) TrailCharComma -> return (CellResultData bs)
TrailCharNewline -> return (CellResultNewline bs EndedNo)
TrailCharEnd -> return (CellResultNewline bs EndedYes)
Nothing -> return (CellResultNewline B.empty EndedYes)
{-# INLINE field #-} {-# INLINE field #-}
eatNewlines :: AL.Parser S.ByteString eatNewlines :: AL.Parser S.ByteString
@ -284,16 +287,24 @@ escapedField !delim = do
Left err -> fail err Left err -> fail err
else return s else return s
data TrailChar = TrailCharNewline | TrailCharComma | TrailCharEnd
-- | Consume an unescaped field. If it ends with a newline, -- | Consume an unescaped field. If it ends with a newline,
-- leave that in tact. If it ends with a comma, consume the comma. -- leave that in tact. If it ends with a comma, consume the comma.
unescapedField :: Word8 -> AL.Parser S.ByteString unescapedField :: Word8 -> AL.Parser (S.ByteString,TrailChar)
unescapedField !delim = unescapedField !delim = do
( A.takeWhile $ \c -> bs <- A.takeWhile $ \c ->
c /= doubleQuote && c /= doubleQuote &&
c /= newline && c /= newline &&
c /= delim && c /= delim &&
c /= cr c /= cr
) <* A.option () (A.skip (== delim)) mb <- A.peekWord8
case mb of
Just b
| b == comma -> A.anyWord8 >> return (bs,TrailCharComma)
| b == newline || b == cr -> A.anyWord8 >> return (bs,TrailCharNewline)
| otherwise -> fail "encounter double quote in unescaped field"
Nothing -> return (bs,TrailCharEnd)
dquote :: AL.Parser Char dquote :: AL.Parser Char
dquote = char '"' dquote = char '"'
@ -477,8 +488,8 @@ consumeHeaderRow toStr parseCell isNull emptyStr isGood s0 = go 0 StrictListNil
ATYP.Fail _ _ _ -> return $ Left $ SiphonError 0 RowErrorParse ATYP.Fail _ _ _ -> return $ Left $ SiphonError 0 RowErrorParse
ATYP.Done !c1 !res -> case res of ATYP.Done !c1 !res -> case res of
-- it might be wrong to ignore whether or not the stream has ended -- it might be wrong to ignore whether or not the stream has ended
CellResultNewline _ -> do CellResultNewline cd _ -> do
let v = reverseVectorStrictList cellsLen cells let v = reverseVectorStrictList (cellsLen + 1) (StrictListCons cd cells)
return (Right (v :> (SMP.yield c1 >> s1))) return (Right (v :> (SMP.yield c1 >> s1)))
CellResultData !cd -> if isNull c1 CellResultData !cd -> if isNull c1
then go (cellsLen + 1) (StrictListCons cd cells) s1 then go (cellsLen + 1) (StrictListCons cd cells) s1
@ -518,8 +529,8 @@ consumeBody toStr parseCell isNull emptyStr isGood row0 reqLen siphon s0 =
handleResult !row !cellsLen !cells !result s1 = case result of handleResult !row !cellsLen !cells !result s1 = case result of
ATYP.Fail _ _ _ -> return $ Just $ SiphonError row RowErrorParse ATYP.Fail _ _ _ -> return $ Just $ SiphonError row RowErrorParse
ATYP.Done !c1 !res -> case res of ATYP.Done !c1 !res -> case res of
CellResultNewline !ended -> do CellResultNewline !cd !ended -> do
case decodeRow row (reverseVectorStrictList cellsLen cells) of case decodeRow row (reverseVectorStrictList (cellsLen + 1) (StrictListCons cd cells)) of
Left err -> return (Just err) Left err -> return (Just err)
Right a -> do Right a -> do
SMP.yield a SMP.yield a