make tests pass again

This commit is contained in:
Andrew Martin 2017-06-11 20:01:34 -04:00
parent 03e9e3734b
commit fb6064b79f
5 changed files with 293 additions and 226 deletions

View File

@ -18,11 +18,10 @@ library
Siphon Siphon
Siphon.Types Siphon.Types
build-depends: build-depends:
base >= 4.7 && < 5 base >= 4.9 && < 5
, colonnade >= 1.1 && < 1.2 , colonnade >= 1.1 && < 1.2
, text , text
, bytestring , bytestring
, contravariant
, vector , vector
, streaming , streaming
, attoparsec , attoparsec
@ -30,9 +29,9 @@ library
default-language: Haskell2010 default-language: Haskell2010
test-suite siphon-test test-suite siphon-test
type: exitcode-stdio-1.0 type: exitcode-stdio-1.0
hs-source-dirs: test hs-source-dirs: test
main-is: Test.hs main-is: Test.hs
build-depends: build-depends:
base base
, either , either
@ -48,9 +47,9 @@ test-suite siphon-test
, HUnit , HUnit
, test-framework-hunit , test-framework-hunit
, profunctors , profunctors
ghc-options: -threaded -rtsopts -with-rtsopts=-N , streaming
default-language: Haskell2010 default-language: Haskell2010
source-repository head source-repository head
type: git type: git
location: https://github.com/andrewthad/colonnade location: https://github.com/andrewthad/colonnade

View File

@ -9,8 +9,12 @@ module Siphon
( Siphon ( Siphon
, SiphonError , SiphonError
, Indexed(..) , Indexed(..)
, decodeHeadedChar8Csv , decodeHeadedUtf8Csv
, encodeHeadedUtf8Csv
, humanizeSiphonError , humanizeSiphonError
, headed
, headless
, indexed
) where ) where
import Siphon.Types import Siphon.Types
@ -34,6 +38,8 @@ import qualified Streaming as SM
import qualified Streaming.Prelude as SMP import qualified Streaming.Prelude as SMP
import qualified Data.Attoparsec.Types as ATYP import qualified Data.Attoparsec.Types as ATYP
import qualified Colonnade.Encode as CE import qualified Colonnade.Encode as CE
import qualified Data.Vector.Mutable as MV
import Control.Monad.Trans.Class import Control.Monad.Trans.Class
import Data.ByteString.Builder (toLazyByteString,byteString) import Data.ByteString.Builder (toLazyByteString,byteString)
@ -43,73 +49,90 @@ import Data.Vector (Vector)
import Data.ByteString (ByteString) import Data.ByteString (ByteString)
import Data.Coerce (coerce) import Data.Coerce (coerce)
import Data.Char (chr) import Data.Char (chr)
import Data.Text.Encoding (decodeUtf8')
import Streaming (Stream,Of(..)) import Streaming (Stream,Of(..))
import Data.Vector.Mutable (MVector)
import Control.Monad.ST
newtype Escaped c = Escaped { getEscaped :: c } newtype Escaped c = Escaped { getEscaped :: c }
data Ended = EndedYes | EndedNo data Ended = EndedYes | EndedNo
deriving (Show)
data CellResult c = CellResultData !c | CellResultNewline !Ended data CellResult c = CellResultData !c | CellResultNewline !Ended
deriving (Show)
decodeHeadedChar8Csv :: Monad m decodeHeadedUtf8Csv :: Monad m
=> Siphon CE.Headed ByteString a => Siphon CE.Headed ByteString a
-> Stream (Of ByteString) m () -- ^ encoded csv -> Stream (Of ByteString) m () -- ^ encoded csv
-> Stream (Of a) m (Maybe (SiphonError ByteString)) -> Stream (Of a) m (Maybe SiphonError)
decodeHeadedChar8Csv headedSiphon s1 = do decodeHeadedUtf8Csv headedSiphon s1 = do
e <- lift (consumeHeaderRowChar8 s1) e <- lift (consumeHeaderRowUtf8 s1)
case e of case e of
Left err -> return (Just err) Left err -> return (Just err)
Right (v :> s2) -> case headedToIndexed v headedSiphon of Right (v :> s2) -> case headedToIndexed utf8ToStr v headedSiphon of
Left err -> return (Just err) Left err -> return (Just err)
Right ixedSiphon -> do Right ixedSiphon -> do
let requiredLength = V.length v let requiredLength = V.length v
consumeBodyChar8 1 requiredLength ixedSiphon s2 consumeBodyUtf8 1 requiredLength ixedSiphon s2
encodeHeadedChar8Csv :: Monad m encodeHeadedUtf8Csv :: Monad m
=> Colonnade CE.Headed ByteString a => CE.Colonnade CE.Headed a ByteString
-> Stream (Of a) m r -> Stream (Of a) m r
-> Stream (Of ByteString) m r -> Stream (Of ByteString) m r
encodeHeadedChar8Csv headedSiphon s1 = do encodeHeadedUtf8Csv =
yield (header siphon encoding) encodeHeadedCsv escapeChar8 (B.singleton comma) (B.singleton newline)
pipe siphon encoding
encodeGeneralCsv :: Monad m encodeHeadedCsv :: Monad m
=> (c -> Escaped c) => (c -> Escaped c)
-> c -- ^ separator -> c -- ^ separator
-> Colonnade f a c -> c -- ^ newline
-> CE.Colonnade CE.Headed a c
-> Stream (Of a) m r -> Stream (Of a) m r
-> Stream (Of c) m r -> Stream (Of c) m r
encodeGeneralCsv escapeFunc separatorStr colonnade = do encodeHeadedCsv escapeFunc separatorStr newlineStr colonnade s = do
Pipes.map (row siphon encoding) encodeHeader escapeFunc separatorStr newlineStr colonnade
encodeRows escapeFunc separatorStr newlineStr colonnade s
encodeHeader :: Siphon c -> Colonnade Headed a c -> c encodeHeader :: Monad m
=> (c -> Escaped c) => (c -> Escaped c)
-> c -- ^ separator -> c -- ^ separator
-> Colonnade f a c -> c -- ^ newline
-> Stream (Of c) m r -> CE.Colonnade CE.Headed a c
encodeHeader escapeFunc separatorStr colonnade = SMP.mapM_ $ \a -> do -> Stream (Of c) m ()
encodeHeader escapeFunc separatorStr newlineStr colonnade = do
let (vs,ws) = V.splitAt 1 (CE.getColonnade colonnade) let (vs,ws) = V.splitAt 1 (CE.getColonnade colonnade)
-- we only need to do this split because the first cell -- we only need to do this split because the first cell
-- gets treated differently than the others. It does not -- gets treated differently than the others. It does not
-- get a separator added before it. -- get a separator added before it.
V.forM_ vs $ \(CE.OneColonnade _ encode) -> yield (getEscaped (escapeFunc (encode a))) V.forM_ vs $ \(CE.OneColonnade (CE.Headed h) _) -> do
V.forM_ ws $ \(CE.OneColonnade _ encode) -> do SMP.yield (getEscaped (escapeFunc h))
yield separator V.forM_ ws $ \(CE.OneColonnade (CE.Headed h) _) -> do
yeied (getEscaped (escapeFunc (encode a))) SMP.yield separatorStr
SMP.yield (getEscaped (escapeFunc h))
SMP.yield newlineStr
encodeRow :: mapStreamM :: Monad m
=> (a -> Stream (Of b) m x)
-> Stream (Of a) m r
-> Stream (Of b) m r
mapStreamM f = SM.concats . SM.mapsM (\(a :> s) -> return (f a >> return s))
encodeRows :: Monad m
=> (c -> Escaped c) => (c -> Escaped c)
-> c -- ^ separator -> c -- ^ separator
-> Colonnade f a c -> c -- ^ newline
-> CE.Colonnade f a c
-> Stream (Of a) m r -> Stream (Of a) m r
-> Stream (Of c) m r -> Stream (Of c) m r
encodeRow escapeFunc separatorStr colonnade = SMP.mapM_ $ \a -> do encodeRows escapeFunc separatorStr newlineStr colonnade = mapStreamM $ \a -> do
let (vs,ws) = V.splitAt 1 (CE.getColonnade colonnade) let (vs,ws) = V.splitAt 1 (CE.getColonnade colonnade)
-- we only need to do this split because the first cell -- we only need to do this split because the first cell
-- gets treated differently than the others. It does not -- gets treated differently than the others. It does not
-- get a separator added before it. -- get a separator added before it.
V.forM_ vs $ \(CE.OneColonnade _ encode) -> yield (getEscaped (escapeFunc (encode a))) V.forM_ vs $ \(CE.OneColonnade _ encode) -> SMP.yield (getEscaped (escapeFunc (encode a)))
V.forM_ ws $ \(CE.OneColonnade _ encode) -> do V.forM_ ws $ \(CE.OneColonnade _ encode) -> do
yield separator SMP.yield separatorStr
yeied (getEscaped (escapeFunc (encode a))) SMP.yield (getEscaped (escapeFunc (encode a)))
SMP.yield newlineStr
data IndexedHeader a = IndexedHeader data IndexedHeader a = IndexedHeader
{ indexedHeaderIndexed :: {-# UNPACK #-} !Int { indexedHeaderIndexed :: {-# UNPACK #-} !Int
@ -120,35 +143,36 @@ data IndexedHeader a = IndexedHeader
-- expected headers into the indices of the columns that they -- expected headers into the indices of the columns that they
-- correspond to. -- correspond to.
headedToIndexed :: forall c a. Eq c headedToIndexed :: forall c a. Eq c
=> Vector c -- ^ Headers in the source document => (c -> T.Text)
-> Vector c -- ^ Headers in the source document
-> Siphon CE.Headed c a -- ^ Decolonnade that contains expected headers -> Siphon CE.Headed c a -- ^ Decolonnade that contains expected headers
-> Either (SiphonError c) (Siphon IndexedHeader c a) -> Either SiphonError (Siphon IndexedHeader c a)
headedToIndexed v = headedToIndexed toStr v =
mapLeft (\(HeaderErrors a b c) -> SiphonError 0 (RowErrorHeaders a b c)) mapLeft (\(HeaderErrors a b c) -> SiphonError 0 (RowErrorHeaders a b c))
. getEitherWrap . getEitherWrap
. go . go
where where
go :: forall b. go :: forall b.
Siphon CE.Headed c b Siphon CE.Headed c b
-> EitherWrap (HeaderErrors c) (Siphon IndexedHeader c b) -> EitherWrap HeaderErrors (Siphon IndexedHeader c b)
go (SiphonPure b) = EitherWrap (Right (SiphonPure b)) go (SiphonPure b) = EitherWrap (Right (SiphonPure b))
go (SiphonAp (CE.Headed h) decode apNext) = go (SiphonAp (CE.Headed h) decode apNext) =
let rnext = go apNext let rnext = go apNext
ixs = V.elemIndices h v ixs = V.elemIndices h v
ixsLen = V.length ixs ixsLen = V.length ixs
rcurrent rcurrent
| ixsLen == 1 = Right (V.unsafeIndex ixs 0) | ixsLen == 1 = Right (ixs V.! 0) -- (V.unsafeIndex ixs 0)
| ixsLen == 0 = Left (HeaderErrors V.empty (V.singleton h) V.empty) | ixsLen == 0 = Left (HeaderErrors V.empty (V.singleton (toStr h)) V.empty)
| otherwise = | otherwise =
let dups = V.singleton (V.map (\ix -> CellError ix (V.unsafeIndex v ix)) ixs) let dups = V.singleton (V.map (\ix -> CellError ix (toStr (v V.! ix) {- (V.unsafeIndex v ix) -} )) ixs)
in Left (HeaderErrors dups V.empty V.empty) in Left (HeaderErrors dups V.empty V.empty)
in (\ix nextSiphon -> SiphonAp (IndexedHeader ix h) decode nextSiphon) in (\ix nextSiphon -> SiphonAp (IndexedHeader ix h) decode nextSiphon)
<$> EitherWrap rcurrent <$> EitherWrap rcurrent
<*> rnext <*> rnext
data HeaderErrors c = HeaderErrors !(Vector (Vector (CellError c))) !(Vector c) !(Vector Int) data HeaderErrors = HeaderErrors !(Vector (Vector CellError)) !(Vector T.Text) !(Vector Int)
instance Monoid (HeaderErrors c) where instance Monoid HeaderErrors where
mempty = HeaderErrors mempty mempty mempty mempty = HeaderErrors mempty mempty mempty
mappend (HeaderErrors a1 b1 c1) (HeaderErrors a2 b2 c2) = HeaderErrors mappend (HeaderErrors a1 b1 c1) (HeaderErrors a2 b2 c2) = HeaderErrors
(mappend a1 a2) (mappend b1 b2) (mappend c1 c2) (mappend a1 a2) (mappend b1 b2) (mappend c1 c2)
@ -160,15 +184,8 @@ instance Monoid (HeaderErrors c) where
-- (A.parse (row comma)) -- (A.parse (row comma))
-- B.null -- B.null
encodeRow :: Vector (Escaped ByteString) -> ByteString escapeChar8 :: ByteString -> Escaped ByteString
encodeRow = id escapeChar8 t = case B.find (\c -> c == newline || c == cr || c == comma || c == doubleQuote) t of
. flip B.append (B.singleton newline)
. B.intercalate (B.singleton comma)
. V.toList
. coerce
escape :: ByteString -> Escaped ByteString
escape t = case B.find (\c -> c == newline || c == cr || c == comma || c == doubleQuote) t of
Nothing -> Escaped t Nothing -> Escaped t
Just _ -> escapeAlways t Just _ -> escapeAlways t
@ -233,7 +250,7 @@ field !delim = do
-- choice if we see a double quote. -- choice if we see a double quote.
case mb of case mb of
Just b Just b
| b == delim -> do | b == doubleQuote -> do
bs <- escapedField delim bs <- escapedField delim
return (CellResultData bs) return (CellResultData bs)
| b == 10 || b == 13 -> do | b == 10 || b == 13 -> do
@ -260,7 +277,7 @@ escapedField !delim = do
then Just (not s) then Just (not s)
else if s then Nothing else if s then Nothing
else Just False) else Just False)
A.skip (== delim) A.option () (A.skip (== delim))
if doubleQuote `S.elem` s if doubleQuote `S.elem` s
then case Z.parse unescape s of then case Z.parse unescape s of
Right r -> return r Right r -> return r
@ -276,7 +293,7 @@ unescapedField !delim =
c /= newline && c /= newline &&
c /= delim && c /= delim &&
c /= cr c /= cr
) <* A.skip (== delim) ) <* A.option () (A.skip (== delim))
dquote :: AL.Parser Char dquote :: AL.Parser Char
dquote = char '"' dquote = char '"'
@ -327,15 +344,15 @@ comma = 44
-- | This adds one to the index because text editors consider -- | This adds one to the index because text editors consider
-- line number to be one-based, not zero-based. -- line number to be one-based, not zero-based.
humanizeSiphonError :: Eq c => (c -> String) -> SiphonError c -> String humanizeSiphonError :: SiphonError -> String
humanizeSiphonError toStr (SiphonError ix e) = unlines humanizeSiphonError (SiphonError ix e) = unlines
$ ("Decolonnade error on line " ++ show (ix + 1) ++ " of file.") $ ("Decolonnade error on line " ++ show (ix + 1) ++ " of file.")
: ("Error Category: " ++ descr) : ("Error Category: " ++ descr)
: map (" " ++) errDescrs : map (" " ++) errDescrs
where (descr,errDescrs) = prettyRowError toStr e where (descr,errDescrs) = prettyRowError e
prettyRowError :: Eq c => (c -> String) -> RowError c -> (String, [String]) prettyRowError :: RowError -> (String, [String])
prettyRowError toStr x = case x of prettyRowError x = case x of
RowErrorParse -> (,) "CSV Parsing" RowErrorParse -> (,) "CSV Parsing"
[ "The cells were malformed." [ "The cells were malformed."
] ]
@ -352,16 +369,16 @@ prettyRowError toStr x = case x of
, "There is a mistake in the encoding of the text." , "There is a mistake in the encoding of the text."
] ]
RowErrorHeaders dupErrs namedErrs unnamedErrs -> (,) "Missing Headers" $ concat RowErrorHeaders dupErrs namedErrs unnamedErrs -> (,) "Missing Headers" $ concat
[ if V.length namedErrs > 0 then prettyNamedMissingHeaders toStr namedErrs else [] [ if V.length namedErrs > 0 then prettyNamedMissingHeaders namedErrs else []
, if V.length unnamedErrs > 0 then ["Missing unnamed headers"] else [] , if V.length unnamedErrs > 0 then ["Missing unnamed headers"] else []
, if V.length dupErrs > 0 then prettyHeadingErrors toStr dupErrs else [] , if V.length dupErrs > 0 then prettyHeadingErrors dupErrs else []
] ]
RowErrorDecode errs -> (,) "Cell Decolonnade" (prettyCellErrors toStr errs) RowErrorDecode errs -> (,) "Cell Decolonnade" (prettyCellErrors errs)
prettyCellErrors :: (c -> String) -> Vector (CellError c) -> [String] prettyCellErrors :: Vector CellError -> [String]
prettyCellErrors toStr errs = drop 1 $ prettyCellErrors errs = drop 1 $
flip concatMap errs $ \(CellError ix content) -> flip concatMap errs $ \(CellError ix content) ->
let str = toStr content in let str = T.unpack content in
[ "-----------" [ "-----------"
, "Column " ++ columnNumToLetters ix , "Column " ++ columnNumToLetters ix
, "Cell Content Length: " ++ show (Prelude.length str) , "Cell Content Length: " ++ show (Prelude.length str)
@ -370,27 +387,26 @@ prettyCellErrors toStr errs = drop 1 $
else str else str
] ]
prettyNamedMissingHeaders :: (c -> String) -> Vector c -> [String] prettyNamedMissingHeaders :: Vector T.Text -> [String]
prettyNamedMissingHeaders conv missing = concat prettyNamedMissingHeaders missing = concat
[ concatMap (\h -> ["The header " ++ conv h ++ " was missing."]) missing [ concatMap (\h -> ["The header " ++ T.unpack h ++ " was missing."]) missing
] ]
prettyHeadingErrors :: forall c. Eq c prettyHeadingErrors :: Vector (Vector CellError) -> [String]
=> (c -> String) -> Vector (Vector (CellError c)) -> [String] prettyHeadingErrors missing = join (V.toList (fmap f missing))
prettyHeadingErrors conv missing = join (V.toList (fmap f missing))
where where
f :: Vector (CellError c) -> [String] f :: Vector CellError -> [String]
f v f v
| not (V.null w) && V.all (== V.head w) (V.tail w) = | not (V.null w) && V.all (== V.head w) (V.tail w) =
[ "The header [" [ "The header ["
, conv (V.head w) , T.unpack (V.head w)
, "] appears in columns " , "] appears in columns "
, L.intercalate ", " (V.toList (V.map (\(CellError ix _) -> columnNumToLetters ix) v)) , L.intercalate ", " (V.toList (V.map (\(CellError ix _) -> columnNumToLetters ix) v))
] ]
| otherwise = multiMsg : V.toList | otherwise = multiMsg : V.toList
(V.map (\(CellError ix content) -> " Column " ++ columnNumToLetters ix ++ ": " ++ conv content) v) (V.map (\(CellError ix content) -> " Column " ++ columnNumToLetters ix ++ ": " ++ T.unpack content) v)
where where
w :: Vector c w :: Vector T.Text
w = V.map cellErrorContent v w = V.map cellErrorContent v
multiMsg :: String multiMsg :: String
multiMsg = "Multiple headers matched the same predicate:" multiMsg = "Multiple headers matched the same predicate:"
@ -415,32 +431,37 @@ mapLeft :: (a -> b) -> Either a c -> Either b c
mapLeft _ (Right a) = Right a mapLeft _ (Right a) = Right a
mapLeft f (Left a) = Left (f a) mapLeft f (Left a) = Left (f a)
consumeHeaderRowChar8 :: Monad m consumeHeaderRowUtf8 :: Monad m
=> Stream (Of ByteString) m () => Stream (Of ByteString) m ()
-> m (Either (SiphonError ByteString) (Of (Vector ByteString) (Stream (Of ByteString) m ()))) -> m (Either SiphonError (Of (Vector ByteString) (Stream (Of ByteString) m ())))
consumeHeaderRowChar8 = consumeHeaderRow (A.parse (field comma)) B.null B.empty (\() -> True) consumeHeaderRowUtf8 = consumeHeaderRow utf8ToStr (A.parse (field comma)) B.null B.empty (\() -> True)
consumeBodyChar8 :: forall m a. Monad m consumeBodyUtf8 :: forall m a. Monad m
=> Int -- ^ index of first row, usually zero or one => Int -- ^ index of first row, usually zero or one
-> Int -- ^ Required row length -> Int -- ^ Required row length
-> Siphon IndexedHeader ByteString a -> Siphon IndexedHeader ByteString a
-> Stream (Of ByteString) m () -> Stream (Of ByteString) m ()
-> Stream (Of a) m (Maybe (SiphonError ByteString)) -> Stream (Of a) m (Maybe SiphonError)
consumeBodyChar8 = consumeBody (A.parse (field comma)) B.null B.empty (\() -> True) consumeBodyUtf8 = consumeBody utf8ToStr
(A.parse (field comma)) B.null B.empty (\() -> True)
utf8ToStr :: ByteString -> T.Text
utf8ToStr = either (\_ -> T.empty) id . decodeUtf8'
consumeHeaderRow :: forall m r c. Monad m consumeHeaderRow :: forall m r c. Monad m
=> (c -> ATYP.IResult c (CellResult c)) => (c -> T.Text)
-> (c -> ATYP.IResult c (CellResult c))
-> (c -> Bool) -- ^ true if null string -> (c -> Bool) -- ^ true if null string
-> c -> c
-> (r -> Bool) -- ^ true if termination is acceptable -> (r -> Bool) -- ^ true if termination is acceptable
-> Stream (Of c) m r -> Stream (Of c) m r
-> m (Either (SiphonError c) (Of (Vector c) (Stream (Of c) m r))) -> m (Either SiphonError (Of (Vector c) (Stream (Of c) m r)))
consumeHeaderRow parseCell isNull emptyStr isGood s0 = go 0 StrictListNil s0 consumeHeaderRow toStr parseCell isNull emptyStr isGood s0 = go 0 StrictListNil s0
where where
go :: Int go :: Int
-> StrictList c -> StrictList c
-> Stream (Of c) m r -> Stream (Of c) m r
-> m (Either (SiphonError c) (Of (Vector c) (Stream (Of c) m r))) -> m (Either SiphonError (Of (Vector c) (Stream (Of c) m r)))
go !cellsLen !cells !s1 = do go !cellsLen !cells !s1 = do
e <- skipWhile isNull s1 e <- skipWhile isNull s1
case e of case e of
@ -451,7 +472,7 @@ consumeHeaderRow parseCell isNull emptyStr isGood s0 = go 0 StrictListNil s0
handleResult :: Int -> StrictList c handleResult :: Int -> StrictList c
-> ATYP.IResult c (CellResult c) -> ATYP.IResult c (CellResult c)
-> Stream (Of c) m r -> Stream (Of c) m r
-> m (Either (SiphonError c) (Of (Vector c) (Stream (Of c) m r))) -> m (Either SiphonError (Of (Vector c) (Stream (Of c) m r)))
handleResult !cellsLen !cells !result s1 = case result of handleResult !cellsLen !cells !result s1 = case result of
ATYP.Fail _ _ _ -> return $ Left $ SiphonError 0 RowErrorParse ATYP.Fail _ _ _ -> return $ Left $ SiphonError 0 RowErrorParse
ATYP.Done !c1 !res -> case res of ATYP.Done !c1 !res -> case res of
@ -469,7 +490,8 @@ consumeHeaderRow parseCell isNull emptyStr isGood s0 = go 0 StrictListNil s0
Right (c1 :> s2) -> handleResult cellsLen cells (k c1) s2 Right (c1 :> s2) -> handleResult cellsLen cells (k c1) s2
consumeBody :: forall m r c a. Monad m consumeBody :: forall m r c a. Monad m
=> (c -> ATYP.IResult c (CellResult c)) => (c -> T.Text)
-> (c -> ATYP.IResult c (CellResult c))
-> (c -> Bool) -> (c -> Bool)
-> c -> c
-> (r -> Bool) -- ^ True if termination is acceptable. False if it is because of a decoding error. -> (r -> Bool) -- ^ True if termination is acceptable. False if it is because of a decoding error.
@ -477,10 +499,11 @@ consumeBody :: forall m r c a. Monad m
-> Int -- ^ Required row length -> Int -- ^ Required row length
-> Siphon IndexedHeader c a -> Siphon IndexedHeader c a
-> Stream (Of c) m r -> Stream (Of c) m r
-> Stream (Of a) m (Maybe (SiphonError c)) -> Stream (Of a) m (Maybe SiphonError)
consumeBody parseCell isNull emptyStr isGood row0 reqLen siphon s0 = go row0 0 StrictListNil s0 consumeBody toStr parseCell isNull emptyStr isGood row0 reqLen siphon s0 =
go row0 0 StrictListNil s0
where where
go :: Int -> Int -> StrictList c -> Stream (Of c) m r -> Stream (Of a) m (Maybe (SiphonError c)) go :: Int -> Int -> StrictList c -> Stream (Of c) m r -> Stream (Of a) m (Maybe SiphonError)
go !row !cellsLen !cells !s1 = do go !row !cellsLen !cells !s1 = do
e <- lift (skipWhile isNull s1) e <- lift (skipWhile isNull s1)
case e of case e of
@ -491,11 +514,11 @@ consumeBody parseCell isNull emptyStr isGood row0 reqLen siphon s0 = go row0 0 S
handleResult :: Int -> Int -> StrictList c handleResult :: Int -> Int -> StrictList c
-> ATYP.IResult c (CellResult c) -> ATYP.IResult c (CellResult c)
-> Stream (Of c) m r -> Stream (Of c) m r
-> Stream (Of a) m (Maybe (SiphonError c)) -> Stream (Of a) m (Maybe SiphonError)
handleResult !row !cellsLen !cells !result s1 = case result of handleResult !row !cellsLen !cells !result s1 = case result of
ATYP.Fail _ _ _ -> return $ Just $ SiphonError row RowErrorParse ATYP.Fail _ _ _ -> return $ Just $ SiphonError row RowErrorParse
ATYP.Done !c1 !res -> case res of ATYP.Done !c1 !res -> case res of
CellResultNewline ended -> do CellResultNewline !ended -> do
case decodeRow row (reverseVectorStrictList cellsLen cells) of case decodeRow row (reverseVectorStrictList cellsLen cells) of
Left err -> return (Just err) Left err -> return (Just err)
Right a -> do Right a -> do
@ -519,16 +542,30 @@ consumeBody parseCell isNull emptyStr isGood row0 reqLen siphon s0 = go row0 0 S
case e of case e of
Left r -> handleResult row cellsLen cells (k emptyStr) (return r) Left r -> handleResult row cellsLen cells (k emptyStr) (return r)
Right (c1 :> s2) -> handleResult row cellsLen cells (k c1) s2 Right (c1 :> s2) -> handleResult row cellsLen cells (k c1) s2
decodeRow :: Int -> Vector c -> Either (SiphonError c) a decodeRow :: Int -> Vector c -> Either SiphonError a
decodeRow rowIx v = decodeRow rowIx v =
let vlen = V.length v in let vlen = V.length v in
if vlen /= reqLen if vlen /= reqLen
then Left $ SiphonError rowIx $ RowErrorSize reqLen vlen then Left $ SiphonError rowIx $ RowErrorSize reqLen vlen
else uncheckedRunWithRow rowIx siphon v else uncheckedRunWithRow toStr rowIx siphon v
-- | You must pass the length of the list and as the first argument. -- | You must pass the length of the list and as the first argument.
reverseVectorStrictList :: Int -> StrictList c -> Vector c -- Passing the wrong length will lead to an error.
reverseVectorStrictList _ _ = error "write me" reverseVectorStrictList :: forall c. Int -> StrictList c -> Vector c
reverseVectorStrictList len sl0 = V.create $ do
mv <- MV.new len
go1 mv
return mv
where
go1 :: forall s. MVector s c -> ST s ()
go1 !mv = go2 0 sl0
where
go2 :: Int -> StrictList c -> ST s ()
go2 _ StrictListNil = return ()
go2 !ix (StrictListCons c slNext) = do
MV.write mv ix c
go2 (ix + 1) slNext
skipWhile :: forall m a r. Monad m skipWhile :: forall m a r. Monad m
=> (a -> Bool) => (a -> Bool)
@ -551,31 +588,34 @@ data StrictList a = StrictListNil | StrictListCons !a !(StrictList a)
-- | This function uses 'unsafeIndex' to access -- | This function uses 'unsafeIndex' to access
-- elements of the 'Vector'. -- elements of the 'Vector'.
uncheckedRunWithRow :: uncheckedRunWithRow ::
Int (c -> T.Text)
-> Int
-> Siphon IndexedHeader c a -> Siphon IndexedHeader c a
-> Vector c -> Vector c
-> Either (SiphonError c) a -> Either SiphonError a
uncheckedRunWithRow i d v = mapLeft (SiphonError i . RowErrorDecode) (uncheckedRun d v) uncheckedRunWithRow toStr i d v =
mapLeft (SiphonError i . RowErrorDecode) (uncheckedRun toStr d v)
-- | This function does not check to make sure that the indicies in -- | This function does not check to make sure that the indicies in
-- the 'Decolonnade' are in the 'Vector'. Only use this if you have -- the 'Decolonnade' are in the 'Vector'. Only use this if you have
-- already verified that none of the indices in the siphon are -- already verified that none of the indices in the siphon are
-- out of the bounds. -- out of the bounds.
uncheckedRun :: forall c a. uncheckedRun :: forall c a.
Siphon IndexedHeader c a (c -> T.Text)
-> Siphon IndexedHeader c a
-> Vector c -> Vector c
-> Either (Vector (CellError c)) a -> Either (Vector CellError) a
uncheckedRun dc v = getEitherWrap (go dc) uncheckedRun toStr dc v = getEitherWrap (go dc)
where where
go :: forall b. go :: forall b.
Siphon IndexedHeader c b Siphon IndexedHeader c b
-> EitherWrap (Vector (CellError c)) b -> EitherWrap (Vector CellError) b
go (SiphonPure b) = EitherWrap (Right b) go (SiphonPure b) = EitherWrap (Right b)
go (SiphonAp (IndexedHeader ix _) decode apNext) = go (SiphonAp (IndexedHeader ix _) decode apNext) =
let rnext = go apNext let rnext = go apNext
content = V.unsafeIndex v ix content = v V.! ix -- V.unsafeIndex v ix
rcurrent = maybe rcurrent = maybe
(Left (V.singleton (CellError ix content))) (Left (V.singleton (CellError ix (toStr content))))
Right Right
(decode content) (decode content)
in rnext <*> (EitherWrap rcurrent) in rnext <*> (EitherWrap rcurrent)
@ -593,3 +633,12 @@ maxIndex = go 0 where
go !ix1 (SiphonAp (IndexedHeader ix2 _) _ apNext) = go !ix1 (SiphonAp (IndexedHeader ix2 _) _ apNext) =
go (max ix1 ix2) apNext go (max ix1 ix2) apNext
headless :: (c -> Maybe a) -> Siphon CE.Headless c a
headless f = SiphonAp CE.Headless f (SiphonPure id)
headed :: c -> (c -> Maybe a) -> Siphon CE.Headed c a
headed h f = SiphonAp (CE.Headed h) f (SiphonPure id)
indexed :: Int -> (c -> Maybe a) -> Siphon Indexed c a
indexed ix f = SiphonAp (Indexed ix) f (SiphonPure id)

View File

@ -11,9 +11,6 @@ module Siphon.Decoding
, consumeGeneral , consumeGeneral
, pipeGeneral , pipeGeneral
, convertDecodeError , convertDecodeError
, headed
, headless
, indexed
) where ) where
import Siphon.Types import Siphon.Types

View File

@ -14,32 +14,32 @@ module Siphon.Types
import Data.Vector (Vector) import Data.Vector (Vector)
import Control.Exception (Exception) import Control.Exception (Exception)
import Data.Typeable (Typeable) import Data.Text (Text)
data CellError c = CellError data CellError = CellError
{ cellErrorColumn :: !Int { cellErrorColumn :: !Int
, cellErrorContent :: !c , cellErrorContent :: !Text
} deriving (Show,Read,Eq) } deriving (Show,Read,Eq)
newtype Indexed a = Indexed newtype Indexed a = Indexed
{ indexedIndex :: Int { indexedIndex :: Int
} deriving (Eq,Ord,Functor,Show,Read) } deriving (Eq,Ord,Functor,Show,Read)
data SiphonError c = SiphonError data SiphonError = SiphonError
{ siphonErrorRow :: !Int { siphonErrorRow :: !Int
, siphonErrorCause :: !(RowError c) , siphonErrorCause :: !RowError
} deriving (Show,Read,Eq) } deriving (Show,Read,Eq)
instance (Show c, Typeable c) => Exception (SiphonError c) instance Exception SiphonError
data RowError c data RowError
= RowErrorParse = RowErrorParse
-- ^ Error occurred parsing the document into cells -- ^ Error occurred parsing the document into cells
| RowErrorDecode !(Vector (CellError c)) | RowErrorDecode !(Vector CellError)
-- ^ Error decoding the content -- ^ Error decoding the content
| RowErrorSize !Int !Int | RowErrorSize !Int !Int
-- ^ Wrong number of cells in the row -- ^ Wrong number of cells in the row
| RowErrorHeaders !(Vector (Vector (CellError c))) !(Vector c) !(Vector Int) | RowErrorHeaders !(Vector (Vector CellError)) !(Vector Text) !(Vector Int)
-- ^ Three parts: -- ^ Three parts:
-- (a) Multiple header cells matched the same expected cell, -- (a) Multiple header cells matched the same expected cell,
-- (b) Headers that were missing, -- (b) Headers that were missing,

View File

@ -1,16 +1,18 @@
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE DeriveGeneric #-} {-# LANGUAGE DeriveGeneric #-}
module Main (main) where module Main (main) where
import Test.QuickCheck (Gen, Arbitrary(..), choose, elements) import Test.QuickCheck (Gen, Arbitrary(..), choose, elements, Property)
import Test.HUnit (Assertion,(@?=)) import Test.QuickCheck.Property (Result, succeeded, exception)
import Test.Framework (defaultMain, testGroup, Test) import Test.HUnit (Assertion,(@?=))
import Test.Framework (defaultMain, testGroup, Test)
import Test.Framework.Providers.QuickCheck2 (testProperty) import Test.Framework.Providers.QuickCheck2 (testProperty)
import Test.Framework.Providers.HUnit (testCase) import Test.Framework.Providers.HUnit (testCase)
import Data.ByteString (ByteString) import Data.ByteString (ByteString)
import Data.Text (Text) import Data.Text (Text)
import GHC.Generics (Generic) import GHC.Generics (Generic)
import Data.Either.Combinators import Data.Either.Combinators
import Siphon.Types import Siphon.Types
import Data.Functor.Identity import Data.Functor.Identity
@ -18,20 +20,20 @@ import Data.Functor.Contravariant (contramap)
import Data.Functor.Contravariant.Divisible (divided,conquered) import Data.Functor.Contravariant.Divisible (divided,conquered)
import Colonnade (headed,headless,Colonnade,Headed,Headless) import Colonnade (headed,headless,Colonnade,Headed,Headless)
import Data.Profunctor (lmap) import Data.Profunctor (lmap)
import Streaming (Stream,Of(..))
import Control.Exception
import Debug.Trace
import qualified Data.Text as Text import qualified Data.Text as Text
import qualified Data.ByteString.Builder as Builder import qualified Data.ByteString.Builder as Builder
import qualified Data.ByteString.Lazy as LByteString import qualified Data.ByteString.Lazy as LByteString
import qualified Data.ByteString as ByteString import qualified Data.ByteString as ByteString
import qualified Data.ByteString.Char8 as BC8 import qualified Data.ByteString.Char8 as BC8
import qualified Colonnade as Colonnade import qualified Colonnade as Colonnade
import qualified Siphon.Encoding as SE import qualified Siphon as S
import qualified Siphon.Decoding as SD import qualified Streaming.Prelude as SMP
import qualified Siphon.Content as SC
import qualified Pipes.Prelude as Pipes
import qualified Data.Text.Lazy as LText import qualified Data.Text.Lazy as LText
import qualified Data.Text.Lazy.Builder as TBuilder import qualified Data.Text.Lazy.Builder as TBuilder
import qualified Data.Text.Lazy.Builder.Int as TBuilder import qualified Data.Text.Lazy.Builder.Int as TBuilder
import Pipes
main :: IO () main :: IO ()
main = defaultMain tests main = defaultMain tests
@ -39,60 +41,55 @@ main = defaultMain tests
tests :: [Test] tests :: [Test]
tests = tests =
[ testGroup "ByteString encode/decode" [ testGroup "ByteString encode/decode"
[ testCase "Headless Encoding (int,char,bool)" [ testCase "Headed Encoding (int,char,bool)"
$ runTestScenario $ runTestScenario [(4,'c',False)]
SC.byteStringChar8 S.encodeHeadedUtf8Csv
SE.pipe
encodingA
"4,c,false\n"
, testProperty "Headless Isomorphism (int,char,bool)"
$ propIsoPipe $
(SE.pipe SC.byteStringChar8 encodingA)
>->
(void $ SD.headlessPipe SC.byteStringChar8 decodingA)
, testCase "Headed Encoding (int,char,bool)"
$ runTestScenario
SC.byteStringChar8
SE.headedPipe
encodingB encodingB
$ ByteString.concat $ ByteString.concat
[ "number,letter,boolean\n" [ "number,letter,boolean\n"
, "4,c,false\n" , "4,c,false\n"
] ]
, testCase "Headed Encoding (int,char,bool) monoidal building" , testCase "Headed Encoding (int,char,bool) monoidal building"
$ runTestScenario $ runTestScenario [(4,'c',False)]
SC.byteStringChar8 S.encodeHeadedUtf8Csv
SE.headedPipe
encodingC encodingC
$ ByteString.concat $ ByteString.concat
[ "boolean,letter\n" [ "boolean,letter\n"
, "false,c\n" , "false,c\n"
] ]
, testCase "Headed Encoding (escaped characters)"
$ runTestScenario ["bob","there,be,commas","the \" quote"]
S.encodeHeadedUtf8Csv
encodingF
$ ByteString.concat
[ "name\n"
, "bob\n"
, "\"there,be,commas\"\n"
, "\"the \"\" quote\"\n"
]
, testCase "Headed Decoding (int,char,bool)"
$ ( runIdentity . SMP.toList )
( S.decodeHeadedUtf8Csv decodingB
( mapM_ (SMP.yield . BC8.singleton) $ concat
[ "number,letter,boolean\n"
, "244,z,true\n"
]
)
) @?= ([(244,'z',True)] :> Nothing)
, testCase "Headed Decoding (escaped characters)"
$ ( runIdentity . SMP.toList )
( S.decodeHeadedUtf8Csv decodingF
( mapM_ (SMP.yield . BC8.singleton) $ concat
[ "name\n"
, "drew\n"
, "\"martin, drew\"\n"
]
)
) @?= (["drew","martin, drew"] :> Nothing)
, testProperty "Headed Isomorphism (int,char,bool)" , testProperty "Headed Isomorphism (int,char,bool)"
$ propIsoPipe $ $ propIsoStream BC8.unpack
(SE.headedPipe SC.byteStringChar8 encodingB) (S.decodeHeadedUtf8Csv decodingB)
>-> (S.encodeHeadedUtf8Csv encodingB)
(void $ SD.headedPipe SC.byteStringChar8 decodingB)
]
, testGroup "Text encode/decode"
[ testCase "Headless Encoding (int,char,bool)"
$ runTestScenario
SC.text
SE.pipe
encodingW
"4,c,false\n"
, testCase "Headless Encoding (Foo,Foo,Foo)"
$ runCustomTestScenario
SC.text
SE.pipe
encodingY
(FooA,FooA,FooC)
"Simple,Simple,\"More\"\"Escaped,\"\"\"\"Chars\"\n"
, testProperty "Headless Isomorphism (Foo,Foo,Foo)"
$ propIsoPipe $
(SE.pipe SC.text encodingY)
>->
(void $ SD.headlessPipe SC.text decodingY)
] ]
] ]
@ -111,27 +108,31 @@ fooToString x = case x of
encodeFoo :: (String -> c) -> Foo -> c encodeFoo :: (String -> c) -> Foo -> c
encodeFoo f = f . fooToString encodeFoo f = f . fooToString
fooFromString :: String -> Either String Foo fooFromString :: String -> Maybe Foo
fooFromString x = case x of fooFromString x = case x of
"Simple" -> Right FooA "Simple" -> Just FooA
"With,Escaped\nChars" -> Right FooB "With,Escaped\nChars" -> Just FooB
"More\"Escaped,\"\"Chars" -> Right FooC "More\"Escaped,\"\"Chars" -> Just FooC
_ -> Left "failed to decode Foo" _ -> Nothing
decodeFoo :: (c -> String) -> c -> Either String Foo decodeFoo :: (c -> String) -> c -> Maybe Foo
decodeFoo f = fooFromString . f decodeFoo f = fooFromString . f
decodingA :: Decolonnade Headless ByteString (Int,Char,Bool) decodingA :: Siphon Headless ByteString (Int,Char,Bool)
decodingA = (,,) decodingA = (,,)
<$> SD.headless dbInt <$> S.headless dbInt
<*> SD.headless dbChar <*> S.headless dbChar
<*> SD.headless dbBool <*> S.headless dbBool
decodingB :: Decolonnade Headed ByteString (Int,Char,Bool) decodingB :: Siphon Headed ByteString (Int,Char,Bool)
decodingB = (,,) decodingB = (,,)
<$> SD.headed "number" dbInt <$> S.headed "number" dbInt
<*> SD.headed "letter" dbChar <*> S.headed "letter" dbChar
<*> SD.headed "boolean" dbBool <*> S.headed "boolean" dbBool
decodingF :: Siphon Headed ByteString ByteString
decodingF = S.headed "name" Just
encodingA :: Colonnade Headless (Int,Char,Bool) ByteString encodingA :: Colonnade Headless (Int,Char,Bool) ByteString
encodingA = mconcat encodingA = mconcat
@ -154,11 +155,14 @@ encodingY = mconcat
, lmap thd3 (headless $ encodeFoo Text.pack) , lmap thd3 (headless $ encodeFoo Text.pack)
] ]
decodingY :: Decolonnade Headless Text (Foo,Foo,Foo) decodingY :: Siphon Headless Text (Foo,Foo,Foo)
decodingY = (,,) decodingY = (,,)
<$> SD.headless (decodeFoo Text.unpack) <$> S.headless (decodeFoo Text.unpack)
<*> SD.headless (decodeFoo Text.unpack) <*> S.headless (decodeFoo Text.unpack)
<*> SD.headless (decodeFoo Text.unpack) <*> S.headless (decodeFoo Text.unpack)
encodingF :: Colonnade Headed ByteString ByteString
encodingF = headed "name" id
encodingB :: Colonnade Headed (Int,Char,Bool) ByteString encodingB :: Colonnade Headed (Int,Char,Bool) ByteString
encodingB = mconcat encodingB = mconcat
@ -176,32 +180,51 @@ encodingC = mconcat
tripleToPairs :: (a,b,c) -> (a,(b,(c,()))) tripleToPairs :: (a,b,c) -> (a,(b,(c,())))
tripleToPairs (a,b,c) = (a,(b,(c,()))) tripleToPairs (a,b,c) = (a,(b,(c,())))
propIsoPipe :: Eq a => Pipe a a Identity () -> [a] -> Bool propIsoStream :: (Eq a, Show a, Monoid c)
propIsoPipe p as = (Pipes.toList $ each as >-> p) == as => (c -> String)
-> (Stream (Of c) Identity () -> Stream (Of a) Identity (Maybe SiphonError))
-> (Stream (Of a) Identity () -> Stream (Of c) Identity ())
-> [a]
-> Result
propIsoStream toStr decode encode as =
let asNew :> m = runIdentity $ SMP.toList $ decode $ encode $ SMP.each as
in case m of
Nothing -> if as == asNew
then succeeded
else exception ("expected " ++ show as ++ " but got " ++ show asNew) myException
Just err ->
let csv = toStr $ mconcat $ runIdentity $ SMP.toList_ $ encode $ SMP.each as
in exception (S.humanizeSiphonError err ++ "\nGenerated CSV\n" ++ csv) myException
runTestScenario :: (Monoid c, Eq c, Show c) data MyException = MyException
=> Siphon c deriving (Show,Read,Eq)
-> (Siphon c -> Colonnade f (Int,Char,Bool) c -> Pipe (Int,Char,Bool) c Identity ()) instance Exception MyException
-> Colonnade f (Int,Char,Bool) c
-> c
-> Assertion
runTestScenario s p e c =
( mconcat $ Pipes.toList $
Pipes.yield (4,'c',False) >-> p s e
) @?= c
runCustomTestScenario :: (Monoid c, Eq c, Show c) myException :: SomeException
=> Siphon c myException = SomeException MyException
-> (Siphon c -> Colonnade f a c -> Pipe a c Identity ())
runTestScenario :: (Monoid c, Eq c, Show c, Eq a, Show a)
=> [a]
-> (Colonnade f a c -> Stream (Of a) Identity () -> Stream (Of c) Identity ())
-> Colonnade f a c -> Colonnade f a c
-> a
-> c -> c
-> Assertion -> Assertion
runCustomTestScenario s p e a c = runTestScenario as p e c =
( mconcat $ Pipes.toList $ ( mconcat (runIdentity (SMP.toList_ (p e (mapM_ SMP.yield as))))
Pipes.yield a >-> p s e
) @?= c ) @?= c
-- runCustomTestScenario :: (Monoid c, Eq c, Show c)
-- => Siphon c
-- -> (Siphon c -> Colonnade f a c -> Pipe a c Identity ())
-- -> Colonnade f a c
-- -> a
-- -> c
-- -> Assertion
-- runCustomTestScenario s p e a c =
-- ( mconcat $ Pipes.toList $
-- Pipes.yield a >-> p s e
-- ) @?= c
-- testEncodingA :: Assertion -- testEncodingA :: Assertion
-- testEncodingA = runTestScenario encodingA "4,c,false\n" -- testEncodingA = runTestScenario encodingA "4,c,false\n"
@ -225,24 +248,23 @@ thd3 :: (a,b,c) -> c
thd3 (a,b,c) = c thd3 (a,b,c) = c
dbChar :: ByteString -> Either String Char dbChar :: ByteString -> Maybe Char
dbChar b = case BC8.length b of dbChar b = case BC8.length b of
1 -> Right (BC8.head b) 1 -> Just (BC8.head b)
0 -> Left "cannot decode Char from empty bytestring" _ -> Nothing
_ -> Left "cannot decode Char from multi-character bytestring"
dbInt :: ByteString -> Either String Int dbInt :: ByteString -> Maybe Int
dbInt b = do dbInt b = do
(a,bsRem) <- maybe (Left "could not parse int") Right (BC8.readInt b) (a,bsRem) <- BC8.readInt b
if ByteString.null bsRem if ByteString.null bsRem
then Right a then Just a
else Left "found extra characters after int" else Nothing
dbBool :: ByteString -> Either String Bool dbBool :: ByteString -> Maybe Bool
dbBool b dbBool b
| b == BC8.pack "true" = Right True | b == BC8.pack "true" = Just True
| b == BC8.pack "false" = Right False | b == BC8.pack "false" = Just False
| otherwise = Left "must be true or false" | otherwise = Nothing
ebChar :: Char -> ByteString ebChar :: Char -> ByteString
ebChar = BC8.singleton ebChar = BC8.singleton