fix problem in siphon

This commit is contained in:
Andrew Martin 2017-07-20 22:15:17 -04:00
parent 83e069d1b6
commit 45c961fdd1
2 changed files with 37 additions and 14 deletions

View File

@ -251,8 +251,11 @@ field !delim = do
case mb of
Just b
| b == doubleQuote -> do
bs <- escapedField delim
return (CellResultData bs)
(bs,tc) <- escapedField delim
case tc of
TrailCharComma -> return (CellResultData bs)
TrailCharNewline -> return (CellResultNewline bs EndedNo)
TrailCharEnd -> return (CellResultNewline bs EndedYes)
| b == 10 || b == 13 -> do
_ <- eatNewlines
isEnd <- A.atEnd
@ -271,21 +274,31 @@ field !delim = do
eatNewlines :: AL.Parser S.ByteString
eatNewlines = A.takeWhile (\x -> x == 10 || x == 13)
escapedField :: Word8 -> AL.Parser S.ByteString
escapedField :: Word8 -> AL.Parser (S.ByteString,TrailChar)
escapedField !delim = do
_ <- dquote
-- The scan state is 'True' if the previous character was a double
-- quote. We need to drop a trailing double quote left by scan.
s <- S.init <$> (A.scan False $ \s c -> if c == doubleQuote
then Just (not s)
else if s then Nothing
else Just False)
A.option () (A.skip (== delim))
s <- S.init <$>
( A.scan False $ \s c ->
if c == doubleQuote
then Just (not s)
else if s
then Nothing
else Just False
)
mb <- A.peekWord8
trailChar <- case mb of
Just b
| b == comma -> A.anyWord8 >> return TrailCharComma
| b == newline || b == cr -> A.anyWord8 >> return TrailCharNewline
| otherwise -> fail "encountered double quote after escaped field"
Nothing -> return TrailCharEnd
if doubleQuote `S.elem` s
then case Z.parse unescape s of
Right r -> return r
Left err -> fail err
else return s
then case Z.parse unescape s of
Right r -> return (r,trailChar)
Left err -> fail err
else return (s,trailChar)
data TrailChar = TrailCharNewline | TrailCharComma | TrailCharEnd
@ -303,7 +316,7 @@ unescapedField !delim = do
Just b
| b == comma -> A.anyWord8 >> return (bs,TrailCharComma)
| b == newline || b == cr -> A.anyWord8 >> return (bs,TrailCharNewline)
| otherwise -> fail "encounter double quote in unescaped field"
| otherwise -> fail "encountered double quote in unescaped field"
Nothing -> return (bs,TrailCharEnd)
dquote :: AL.Parser Char

View File

@ -76,7 +76,17 @@ tests =
]
)
) @?= ([(244,'z',True)] :> Nothing)
, testCase "Headed Decoding (escaped characters)"
, testCase "Headed Decoding (escaped characters, one big chunk)"
$ ( runIdentity . SMP.toList )
( S.decodeHeadedUtf8Csv decodingF
( SMP.yield $ BC8.pack $ concat
[ "name\n"
, "drew\n"
, "\"martin, drew\"\n"
]
)
) @?= (["drew","martin, drew"] :> Nothing)
, testCase "Headed Decoding (escaped characters, character per chunk)"
$ ( runIdentity . SMP.toList )
( S.decodeHeadedUtf8Csv decodingF
( mapM_ (SMP.yield . BC8.singleton) $ concat