refactored siphon type and improved testing

This commit is contained in:
Andrew Martin 2016-07-04 21:42:19 -04:00
parent e3c254a82e
commit 3bfd8265bc
8 changed files with 124 additions and 66 deletions

View File

@ -19,6 +19,7 @@ library
Siphon.ByteString.Char8 Siphon.ByteString.Char8
Siphon Siphon
Siphon.Types Siphon.Types
Siphon.Content
Siphon.Encoding Siphon.Encoding
Siphon.Decoding Siphon.Decoding
Siphon.Internal Siphon.Internal
@ -48,6 +49,9 @@ test-suite siphon-test
, QuickCheck , QuickCheck
, text , text
, bytestring , bytestring
, pipes
, HUnit
, test-framework-hunit
ghc-options: -threaded -rtsopts -with-rtsopts=-N ghc-options: -threaded -rtsopts -with-rtsopts=-N
default-language: Haskell2010 default-language: Haskell2010

View File

@ -0,0 +1,5 @@
module Siphon.Content
( byteStringChar8
) where
import Siphon.Internal

View File

@ -16,11 +16,6 @@ import qualified Data.Attoparsec.ByteString as AttoByteString
import qualified Data.ByteString.Char8 as ByteString import qualified Data.ByteString.Char8 as ByteString
import qualified Data.Attoparsec.Types as Atto import qualified Data.Attoparsec.Types as Atto
byteStringChar8 :: SiphonDecoding ByteString ByteString
byteStringChar8 = SiphonDecoding
(AttoByteString.parse (row comma))
ByteString.null
-- unrow :: c1 -> (Vector c2,c1) -- unrow :: c1 -> (Vector c2,c1)
-- --
-- row :: _ -- row :: _
@ -47,18 +42,18 @@ mkParseError i ctxs msg = id
-- | This is seldom useful but is included for completeness. -- | This is seldom useful but is included for completeness.
headlessPipe :: Monad m headlessPipe :: Monad m
=> SiphonDecoding c1 c2 => Siphon c
-> Decoding Headless c2 a -> Decoding Headless c a
-> Pipe c1 a m (DecodingRowError Headless c2) -> Pipe c a m (DecodingRowError Headless c)
headlessPipe sd decoding = uncheckedPipe requiredLength 0 sd indexedDecoding Nothing headlessPipe sd decoding = uncheckedPipe requiredLength 0 sd indexedDecoding Nothing
where where
indexedDecoding = Decoding.headlessToIndexed decoding indexedDecoding = Decoding.headlessToIndexed decoding
requiredLength = Decoding.length indexedDecoding requiredLength = Decoding.length indexedDecoding
indexedPipe :: Monad m indexedPipe :: Monad m
=> SiphonDecoding c1 c2 => Siphon c
-> Decoding (Indexed Headless) c2 a -> Decoding (Indexed Headless) c a
-> Pipe c1 a m (DecodingRowError Headless c2) -> Pipe c a m (DecodingRowError Headless c)
indexedPipe sd decoding = do indexedPipe sd decoding = do
(firstRow, mleftovers) <- consumeGeneral sd mkParseError (firstRow, mleftovers) <- consumeGeneral sd mkParseError
let req = Decoding.maxIndex decoding let req = Decoding.maxIndex decoding
@ -72,10 +67,10 @@ indexedPipe sd decoding = do
uncheckedPipe vlen 1 sd decoding mleftovers uncheckedPipe vlen 1 sd decoding mleftovers
headedPipe :: (Monad m, Eq c2) headedPipe :: (Monad m, Eq c)
=> SiphonDecoding c1 c2 => Siphon c
-> Decoding Headed c2 a -> Decoding Headed c a
-> Pipe c1 a m (DecodingRowError Headed c2) -> Pipe c a m (DecodingRowError Headed c)
headedPipe sd decoding = do headedPipe sd decoding = do
(headers, mleftovers) <- consumeGeneral sd mkParseError (headers, mleftovers) <- consumeGeneral sd mkParseError
case Decoding.headedToIndexed headers decoding of case Decoding.headedToIndexed headers decoding of
@ -88,10 +83,10 @@ headedPipe sd decoding = do
uncheckedPipe :: Monad m uncheckedPipe :: Monad m
=> Int -- ^ expected length of each row => Int -- ^ expected length of each row
-> Int -- ^ index of first row, usually zero or one -> Int -- ^ index of first row, usually zero or one
-> SiphonDecoding c1 c2 -> Siphon c
-> Decoding (Indexed f) c2 a -> Decoding (Indexed f) c a
-> Maybe c1 -> Maybe c
-> Pipe c1 a m (DecodingRowError f c2) -> Pipe c a m (DecodingRowError f c)
uncheckedPipe requiredLength ix sd d mleftovers = uncheckedPipe requiredLength ix sd d mleftovers =
pipeGeneral ix sd mkParseError checkedRunWithRow mleftovers pipeGeneral ix sd mkParseError checkedRunWithRow mleftovers
where where
@ -103,19 +98,19 @@ uncheckedPipe requiredLength ix sd d mleftovers =
else Decoding.uncheckedRunWithRow rowIx d v else Decoding.uncheckedRunWithRow rowIx d v
consumeGeneral :: Monad m consumeGeneral :: Monad m
=> SiphonDecoding c1 c2 => Siphon c
-> (Int -> [String] -> String -> e) -> (Int -> [String] -> String -> e)
-> Consumer' c1 m (Vector c2, Maybe c1) -> Consumer' c m (Vector c, Maybe c)
consumeGeneral = error "ahh" consumeGeneral = error "ahh"
pipeGeneral :: Monad m pipeGeneral :: Monad m
=> Int -- ^ index of first row, usually zero or one => Int -- ^ index of first row, usually zero or one
-> SiphonDecoding c1 c2 -> Siphon c
-> (Int -> [String] -> String -> e) -> (Int -> [String] -> String -> e)
-> (Int -> Vector c2 -> Either e a) -> (Int -> Vector c -> Either e a)
-> Maybe c1 -- ^ leftovers that should be handled first -> Maybe c -- ^ leftovers that should be handled first
-> Pipe c1 a m e -> Pipe c a m e
pipeGeneral initIx (SiphonDecoding parse isNull) wrapParseError decodeRow mleftovers = pipeGeneral initIx (Siphon _ _ parse isNull) wrapParseError decodeRow mleftovers =
case mleftovers of case mleftovers of
Nothing -> go1 initIx Nothing -> go1 initIx
Just leftovers -> handleResult initIx (parse leftovers) Just leftovers -> handleResult initIx (parse leftovers)

View File

@ -6,24 +6,30 @@ import Pipes (Pipe,yield)
import qualified Pipes.Prelude as Pipes import qualified Pipes.Prelude as Pipes
import qualified Colonnade.Encoding as Encoding import qualified Colonnade.Encoding as Encoding
row :: Siphon c1 c2 row :: Siphon c
-> Encoding f c1 a -> Encoding f c a
-> a -> a
-> c2 -> c
row (Siphon escape intercalate) e = row (Siphon escape intercalate _ _) e =
intercalate . Encoding.runRow escape e intercalate . Encoding.runRow escape e
header :: Siphon c1 c2 header :: Siphon c
-> Encoding Headed c1 a -> Encoding Headed c a
-> c2 -> c
header (Siphon escape intercalate) e = header (Siphon escape intercalate _ _) e =
intercalate (Encoding.runHeader escape e) intercalate (Encoding.runHeader escape e)
pipe :: Monad m => Siphon c1 c2 -> Encoding f c1 a -> Pipe a c2 m x pipe :: Monad m
=> Siphon c
-> Encoding f c a
-> Pipe a c m x
pipe siphon encoding = Pipes.map (row siphon encoding) pipe siphon encoding = Pipes.map (row siphon encoding)
pipeWithHeader :: Monad m => Siphon c1 c2 -> Encoding Headed c1 a -> Pipe a c2 m x headedPipe :: Monad m
pipeWithHeader siphon encoding = do => Siphon c
-> Encoding Headed c a
-> Pipe a c m x
headedPipe siphon encoding = do
yield (header siphon encoding) yield (header siphon encoding)
pipe siphon encoding pipe siphon encoding

View File

@ -29,31 +29,50 @@ import qualified Data.ByteString.Unsafe as S
import qualified Data.Vector as V import qualified Data.Vector as V
import qualified Data.ByteString as B import qualified Data.ByteString as B
import qualified Data.ByteString.Lazy as LByteString import qualified Data.ByteString.Lazy as LByteString
import qualified Data.ByteString.Builder as Builder
import Data.Word (Word8) import Data.Word (Word8)
import Data.Vector (Vector) import Data.Vector (Vector)
import Data.ByteString (ByteString) import Data.ByteString (ByteString)
import Data.Coerce (coerce)
import Siphon.Types
import Control.Applicative import Control.Applicative
import Data.Monoid import Data.Monoid
-- parse :: Monad m byteStringChar8 :: Siphon ByteString
-- => SiphonDecoding c1 c2 byteStringChar8 = Siphon
-- -> Atto.Parser a b -- ^ Attoparsec parser escape
-- -> Pipes.Parser a m (Maybe (Either ParsingError b)) -- ^ Pipes parser encodeRow
-- parse parser = S.StateT $ \p0 -> do (A.parse (row comma))
-- x <- nextSkipEmpty p0 B.null
-- case x of
-- Left r -> return (Nothing, return r) encodeRow :: Vector (Escaped ByteString) -> ByteString
-- Right (a,p1) -> step (yield a >>) (_parse parser a) p1 encodeRow = id
-- where . flip B.append (B.singleton newline)
-- step diffP res p0 = case res of . B.intercalate (B.singleton comma)
-- Fail _ c m -> return (Just (Left (ParsingError c m)), diffP p0) . V.toList
-- Done a b -> return (Just (Right b), yield a >> p0) . coerce
-- Partial k -> do
-- x <- nextSkipEmpty p0 escape :: ByteString -> Escaped ByteString
-- case x of escape t = case B.find (\c -> c == newline || c == comma || c == doubleQuote) t of
-- Left e -> step diffP (k mempty) (return e) Nothing -> Escaped t
-- Right (a,p1) -> step (diffP . (yield a >>)) (k a) p1 Just _ -> escapeAlways t
-- | This implementation is definitely suboptimal.
-- A better option (which would waste a little space
-- but would be much faster) would be to build the
-- new bytestring by writing to a buffer directly.
escapeAlways :: ByteString -> Escaped ByteString
escapeAlways t = Escaped $ LByteString.toStrict $ Builder.toLazyByteString $
Builder.word8 doubleQuote
<> B.foldl
(\ acc b -> acc <> if b == doubleQuote
then Builder.byteString
(B.pack [doubleQuote,doubleQuote])
else Builder.word8 b)
mempty
t
<> Builder.word8 doubleQuote
-- | Specialized version of 'sepBy1'' which is faster due to not -- | Specialized version of 'sepBy1'' which is faster due to not
-- accepting an arbitrary separator. -- accepting an arbitrary separator.

View File

@ -7,12 +7,13 @@ import Data.Coerce (coerce)
import qualified Data.Text as Text import qualified Data.Text as Text
import qualified Data.Vector as Vector import qualified Data.Vector as Vector
siphon :: Siphon Text Text siphon :: Siphon Text
siphon = Siphon escape encodeRow siphon = Siphon escape encodeRow
(error "siphon: uhoent") (error "siphon: uheokj")
encodeRow :: Vector (Escaped Text) -> Text encodeRow :: Vector (Escaped Text) -> Text
encodeRow = id encodeRow = id
. Text.intercalate (Text.pack ",") . Text.intercalate (Text.singleton ',')
. Vector.toList . Vector.toList
. coerce . coerce

View File

@ -5,12 +5,19 @@ import qualified Data.Attoparsec.Types as Atto
newtype Escaped c = Escaped { getEscaped :: c } newtype Escaped c = Escaped { getEscaped :: c }
data Siphon c = Siphon
{ siphonEscape :: !(c -> Escaped c)
, siphonIntercalate :: !(Vector (Escaped c) -> c)
, siphonParseRow :: c -> Atto.IResult c (Vector c)
, siphonNull :: c -> Bool
}
-- | Consider changing out the use of 'Vector' here -- | Consider changing out the use of 'Vector' here
-- with the humble list instead. It might fuse away -- with the humble list instead. It might fuse away
-- better. Not sure though. -- better. Not sure though.
data Siphon c1 c2 = Siphon data SiphonX c1 c2 = SiphonX
{ siphonEscape :: !(c1 -> Escaped c2) { siphonXEscape :: !(c1 -> Escaped c2)
, siphonIntercalate :: !(Vector (Escaped c2) -> c2) , siphonXIntercalate :: !(Vector (Escaped c2) -> c2)
} }
data SiphonDecoding c1 c2 = SiphonDecoding data SiphonDecoding c1 c2 = SiphonDecoding

View File

@ -1,11 +1,16 @@
{-# LANGUAGE OverloadedStrings #-}
module Main (main) where module Main (main) where
import Test.QuickCheck (Gen, Arbitrary(..), choose) import Test.QuickCheck (Gen, Arbitrary(..), choose)
import Test.HUnit (Assertion,(@?=))
import Test.Framework (defaultMain, testGroup, Test) import Test.Framework (defaultMain, testGroup, Test)
import Test.Framework.Providers.QuickCheck2 (testProperty) import Test.Framework.Providers.QuickCheck2 (testProperty)
import Test.Framework.Providers.HUnit (testCase)
import Data.ByteString (ByteString) import Data.ByteString (ByteString)
import Data.Either.Combinators import Data.Either.Combinators
import Colonnade.Types import Colonnade.Types
import Data.Functor.Identity
import Data.Functor.Contravariant (contramap) import Data.Functor.Contravariant (contramap)
import Data.Functor.Contravariant.Divisible (divided,conquered) import Data.Functor.Contravariant.Divisible (divided,conquered)
import qualified Data.ByteString.Builder as Builder import qualified Data.ByteString.Builder as Builder
@ -14,21 +19,27 @@ import qualified Data.ByteString as ByteString
import qualified Data.ByteString.Char8 as BC8 import qualified Data.ByteString.Char8 as BC8
import qualified Colonnade.Decoding as Decoding import qualified Colonnade.Decoding as Decoding
import qualified Colonnade.Encoding as Encoding import qualified Colonnade.Encoding as Encoding
import qualified Siphon.Encoding as SE
import qualified Siphon.Decoding as SD
import qualified Siphon.Content as SC
import qualified Pipes.Prelude as Pipes
import Pipes
main :: IO () main :: IO ()
main = defaultMain tests main = defaultMain tests
tests :: [Test] tests :: [Test]
tests = [] tests =
[ testGroup "ByteString encode/decode" [ testGroup "ByteString encode/decode"
[ testProperty "Headless Isomorphism (int,char,bool)" [ testCase "Headless Encoding (int,char,bool)" testEncodingA
$ propEncodeDecodeIso , testProperty "Headless Isomorphism (int,char,bool)"
(ipv4ToTextNaive) $ propIsoPipe $
(ipv4FromTextNaive) (SE.pipe SC.byteStringChar8 encodingA)
>->
(void $ SD.headlessPipe SC.byteStringChar8 decodingA)
] ]
] ]
byteStringDecodeInt :: ByteString -> Either String Int byteStringDecodeInt :: ByteString -> Either String Int
byteStringDecodeInt b = do byteStringDecodeInt b = do
(a,bsRem) <- maybe (Left "could not parse int") Right (BC8.readInt b) (a,bsRem) <- maybe (Left "could not parse int") Right (BC8.readInt b)
@ -78,6 +89,16 @@ encodingA = contramap tripleToPairs
tripleToPairs :: (a,b,c) -> (a,(b,(c,()))) tripleToPairs :: (a,b,c) -> (a,(b,(c,())))
tripleToPairs (a,b,c) = (a,(b,(c,()))) tripleToPairs (a,b,c) = (a,(b,(c,())))
propIsoPipe :: Eq a => Pipe a a Identity () -> [a] -> Bool
propIsoPipe p as = (Pipes.toList $ each as >-> p) == as
testEncodingA :: Assertion
testEncodingA =
( ByteString.concat $ Pipes.toList $
Pipes.yield (4,'c',False) >-> SE.pipe SC.byteStringChar8 encodingA
) @?= "4,c,false\n"
propEncodeDecodeIso :: Eq a => (a -> b) -> (b -> Maybe a) -> a -> Bool propEncodeDecodeIso :: Eq a => (a -> b) -> (b -> Maybe a) -> a -> Bool
propEncodeDecodeIso f g a = g (f a) == Just a propEncodeDecodeIso f g a = g (f a) == Just a