diff --git a/siphon/siphon.cabal b/siphon/siphon.cabal index 31e8933..089bba6 100644 --- a/siphon/siphon.cabal +++ b/siphon/siphon.cabal @@ -19,6 +19,7 @@ library Siphon.ByteString.Char8 Siphon Siphon.Types + Siphon.Content Siphon.Encoding Siphon.Decoding Siphon.Internal @@ -48,6 +49,9 @@ test-suite siphon-test , QuickCheck , text , bytestring + , pipes + , HUnit + , test-framework-hunit ghc-options: -threaded -rtsopts -with-rtsopts=-N default-language: Haskell2010 diff --git a/siphon/src/Siphon/Content.hs b/siphon/src/Siphon/Content.hs new file mode 100644 index 0000000..03f21bf --- /dev/null +++ b/siphon/src/Siphon/Content.hs @@ -0,0 +1,5 @@ +module Siphon.Content + ( byteStringChar8 + ) where + +import Siphon.Internal diff --git a/siphon/src/Siphon/Decoding.hs b/siphon/src/Siphon/Decoding.hs index 29aed89..0f06f2e 100644 --- a/siphon/src/Siphon/Decoding.hs +++ b/siphon/src/Siphon/Decoding.hs @@ -16,11 +16,6 @@ import qualified Data.Attoparsec.ByteString as AttoByteString import qualified Data.ByteString.Char8 as ByteString import qualified Data.Attoparsec.Types as Atto -byteStringChar8 :: SiphonDecoding ByteString ByteString -byteStringChar8 = SiphonDecoding - (AttoByteString.parse (row comma)) - ByteString.null - -- unrow :: c1 -> (Vector c2,c1) -- -- row :: _ @@ -47,18 +42,18 @@ mkParseError i ctxs msg = id -- | This is seldom useful but is included for completeness. headlessPipe :: Monad m - => SiphonDecoding c1 c2 - -> Decoding Headless c2 a - -> Pipe c1 a m (DecodingRowError Headless c2) + => Siphon c + -> Decoding Headless c a + -> Pipe c a m (DecodingRowError Headless c) headlessPipe sd decoding = uncheckedPipe requiredLength 0 sd indexedDecoding Nothing where indexedDecoding = Decoding.headlessToIndexed decoding requiredLength = Decoding.length indexedDecoding indexedPipe :: Monad m - => SiphonDecoding c1 c2 - -> Decoding (Indexed Headless) c2 a - -> Pipe c1 a m (DecodingRowError Headless c2) + => Siphon c + -> Decoding (Indexed Headless) c a + -> Pipe c a m (DecodingRowError Headless c) indexedPipe sd decoding = do (firstRow, mleftovers) <- consumeGeneral sd mkParseError let req = Decoding.maxIndex decoding @@ -72,10 +67,10 @@ indexedPipe sd decoding = do uncheckedPipe vlen 1 sd decoding mleftovers -headedPipe :: (Monad m, Eq c2) - => SiphonDecoding c1 c2 - -> Decoding Headed c2 a - -> Pipe c1 a m (DecodingRowError Headed c2) +headedPipe :: (Monad m, Eq c) + => Siphon c + -> Decoding Headed c a + -> Pipe c a m (DecodingRowError Headed c) headedPipe sd decoding = do (headers, mleftovers) <- consumeGeneral sd mkParseError case Decoding.headedToIndexed headers decoding of @@ -88,10 +83,10 @@ headedPipe sd decoding = do uncheckedPipe :: Monad m => Int -- ^ expected length of each row -> Int -- ^ index of first row, usually zero or one - -> SiphonDecoding c1 c2 - -> Decoding (Indexed f) c2 a - -> Maybe c1 - -> Pipe c1 a m (DecodingRowError f c2) + -> Siphon c + -> Decoding (Indexed f) c a + -> Maybe c + -> Pipe c a m (DecodingRowError f c) uncheckedPipe requiredLength ix sd d mleftovers = pipeGeneral ix sd mkParseError checkedRunWithRow mleftovers where @@ -103,19 +98,19 @@ uncheckedPipe requiredLength ix sd d mleftovers = else Decoding.uncheckedRunWithRow rowIx d v consumeGeneral :: Monad m - => SiphonDecoding c1 c2 + => Siphon c -> (Int -> [String] -> String -> e) - -> Consumer' c1 m (Vector c2, Maybe c1) + -> Consumer' c m (Vector c, Maybe c) consumeGeneral = error "ahh" pipeGeneral :: Monad m => Int -- ^ index of first row, usually zero or one - -> SiphonDecoding c1 c2 + -> Siphon c -> (Int -> [String] -> String -> e) - -> (Int -> Vector c2 -> Either e a) - -> Maybe c1 -- ^ leftovers that should be handled first - -> Pipe c1 a m e -pipeGeneral initIx (SiphonDecoding parse isNull) wrapParseError decodeRow mleftovers = + -> (Int -> Vector c -> Either e a) + -> Maybe c -- ^ leftovers that should be handled first + -> Pipe c a m e +pipeGeneral initIx (Siphon _ _ parse isNull) wrapParseError decodeRow mleftovers = case mleftovers of Nothing -> go1 initIx Just leftovers -> handleResult initIx (parse leftovers) diff --git a/siphon/src/Siphon/Encoding.hs b/siphon/src/Siphon/Encoding.hs index ef57cb0..d02d6c6 100644 --- a/siphon/src/Siphon/Encoding.hs +++ b/siphon/src/Siphon/Encoding.hs @@ -6,24 +6,30 @@ import Pipes (Pipe,yield) import qualified Pipes.Prelude as Pipes import qualified Colonnade.Encoding as Encoding -row :: Siphon c1 c2 - -> Encoding f c1 a +row :: Siphon c + -> Encoding f c a -> a - -> c2 -row (Siphon escape intercalate) e = + -> c +row (Siphon escape intercalate _ _) e = intercalate . Encoding.runRow escape e -header :: Siphon c1 c2 - -> Encoding Headed c1 a - -> c2 -header (Siphon escape intercalate) e = +header :: Siphon c + -> Encoding Headed c a + -> c +header (Siphon escape intercalate _ _) e = intercalate (Encoding.runHeader escape e) -pipe :: Monad m => Siphon c1 c2 -> Encoding f c1 a -> Pipe a c2 m x +pipe :: Monad m + => Siphon c + -> Encoding f c a + -> Pipe a c m x pipe siphon encoding = Pipes.map (row siphon encoding) -pipeWithHeader :: Monad m => Siphon c1 c2 -> Encoding Headed c1 a -> Pipe a c2 m x -pipeWithHeader siphon encoding = do +headedPipe :: Monad m + => Siphon c + -> Encoding Headed c a + -> Pipe a c m x +headedPipe siphon encoding = do yield (header siphon encoding) pipe siphon encoding diff --git a/siphon/src/Siphon/Internal.hs b/siphon/src/Siphon/Internal.hs index 8443f25..8a203b4 100644 --- a/siphon/src/Siphon/Internal.hs +++ b/siphon/src/Siphon/Internal.hs @@ -29,31 +29,50 @@ import qualified Data.ByteString.Unsafe as S import qualified Data.Vector as V import qualified Data.ByteString as B import qualified Data.ByteString.Lazy as LByteString +import qualified Data.ByteString.Builder as Builder import Data.Word (Word8) import Data.Vector (Vector) import Data.ByteString (ByteString) +import Data.Coerce (coerce) +import Siphon.Types import Control.Applicative import Data.Monoid --- parse :: Monad m --- => SiphonDecoding c1 c2 --- -> Atto.Parser a b -- ^ Attoparsec parser --- -> Pipes.Parser a m (Maybe (Either ParsingError b)) -- ^ Pipes parser --- parse parser = S.StateT $ \p0 -> do --- x <- nextSkipEmpty p0 --- case x of --- Left r -> return (Nothing, return r) --- Right (a,p1) -> step (yield a >>) (_parse parser a) p1 --- where --- step diffP res p0 = case res of --- Fail _ c m -> return (Just (Left (ParsingError c m)), diffP p0) --- Done a b -> return (Just (Right b), yield a >> p0) --- Partial k -> do --- x <- nextSkipEmpty p0 --- case x of --- Left e -> step diffP (k mempty) (return e) --- Right (a,p1) -> step (diffP . (yield a >>)) (k a) p1 +byteStringChar8 :: Siphon ByteString +byteStringChar8 = Siphon + escape + encodeRow + (A.parse (row comma)) + B.null + +encodeRow :: Vector (Escaped ByteString) -> ByteString +encodeRow = id + . flip B.append (B.singleton newline) + . B.intercalate (B.singleton comma) + . V.toList + . coerce + +escape :: ByteString -> Escaped ByteString +escape t = case B.find (\c -> c == newline || c == comma || c == doubleQuote) t of + Nothing -> Escaped t + Just _ -> escapeAlways t + +-- | This implementation is definitely suboptimal. +-- A better option (which would waste a little space +-- but would be much faster) would be to build the +-- new bytestring by writing to a buffer directly. +escapeAlways :: ByteString -> Escaped ByteString +escapeAlways t = Escaped $ LByteString.toStrict $ Builder.toLazyByteString $ + Builder.word8 doubleQuote + <> B.foldl + (\ acc b -> acc <> if b == doubleQuote + then Builder.byteString + (B.pack [doubleQuote,doubleQuote]) + else Builder.word8 b) + mempty + t + <> Builder.word8 doubleQuote -- | Specialized version of 'sepBy1'' which is faster due to not -- accepting an arbitrary separator. diff --git a/siphon/src/Siphon/Text.hs b/siphon/src/Siphon/Text.hs index e2f9354..21bcb3e 100644 --- a/siphon/src/Siphon/Text.hs +++ b/siphon/src/Siphon/Text.hs @@ -7,12 +7,13 @@ import Data.Coerce (coerce) import qualified Data.Text as Text import qualified Data.Vector as Vector -siphon :: Siphon Text Text +siphon :: Siphon Text siphon = Siphon escape encodeRow + (error "siphon: uhoent") (error "siphon: uheokj") encodeRow :: Vector (Escaped Text) -> Text encodeRow = id - . Text.intercalate (Text.pack ",") + . Text.intercalate (Text.singleton ',') . Vector.toList . coerce diff --git a/siphon/src/Siphon/Types.hs b/siphon/src/Siphon/Types.hs index 7addd80..110beeb 100644 --- a/siphon/src/Siphon/Types.hs +++ b/siphon/src/Siphon/Types.hs @@ -5,12 +5,19 @@ import qualified Data.Attoparsec.Types as Atto newtype Escaped c = Escaped { getEscaped :: c } +data Siphon c = Siphon + { siphonEscape :: !(c -> Escaped c) + , siphonIntercalate :: !(Vector (Escaped c) -> c) + , siphonParseRow :: c -> Atto.IResult c (Vector c) + , siphonNull :: c -> Bool + } + -- | Consider changing out the use of 'Vector' here -- with the humble list instead. It might fuse away -- better. Not sure though. -data Siphon c1 c2 = Siphon - { siphonEscape :: !(c1 -> Escaped c2) - , siphonIntercalate :: !(Vector (Escaped c2) -> c2) +data SiphonX c1 c2 = SiphonX + { siphonXEscape :: !(c1 -> Escaped c2) + , siphonXIntercalate :: !(Vector (Escaped c2) -> c2) } data SiphonDecoding c1 c2 = SiphonDecoding diff --git a/siphon/test/Test.hs b/siphon/test/Test.hs index 62591b1..da4b96d 100644 --- a/siphon/test/Test.hs +++ b/siphon/test/Test.hs @@ -1,11 +1,16 @@ +{-# LANGUAGE OverloadedStrings #-} + module Main (main) where import Test.QuickCheck (Gen, Arbitrary(..), choose) +import Test.HUnit (Assertion,(@?=)) import Test.Framework (defaultMain, testGroup, Test) import Test.Framework.Providers.QuickCheck2 (testProperty) +import Test.Framework.Providers.HUnit (testCase) import Data.ByteString (ByteString) import Data.Either.Combinators import Colonnade.Types +import Data.Functor.Identity import Data.Functor.Contravariant (contramap) import Data.Functor.Contravariant.Divisible (divided,conquered) import qualified Data.ByteString.Builder as Builder @@ -14,21 +19,27 @@ import qualified Data.ByteString as ByteString import qualified Data.ByteString.Char8 as BC8 import qualified Colonnade.Decoding as Decoding import qualified Colonnade.Encoding as Encoding +import qualified Siphon.Encoding as SE +import qualified Siphon.Decoding as SD +import qualified Siphon.Content as SC +import qualified Pipes.Prelude as Pipes +import Pipes main :: IO () main = defaultMain tests tests :: [Test] -tests = [] +tests = [ testGroup "ByteString encode/decode" - [ testProperty "Headless Isomorphism (int,char,bool)" - $ propEncodeDecodeIso - (ipv4ToTextNaive) - (ipv4FromTextNaive) + [ testCase "Headless Encoding (int,char,bool)" testEncodingA + , testProperty "Headless Isomorphism (int,char,bool)" + $ propIsoPipe $ + (SE.pipe SC.byteStringChar8 encodingA) + >-> + (void $ SD.headlessPipe SC.byteStringChar8 decodingA) ] ] - byteStringDecodeInt :: ByteString -> Either String Int byteStringDecodeInt b = do (a,bsRem) <- maybe (Left "could not parse int") Right (BC8.readInt b) @@ -78,6 +89,16 @@ encodingA = contramap tripleToPairs tripleToPairs :: (a,b,c) -> (a,(b,(c,()))) tripleToPairs (a,b,c) = (a,(b,(c,()))) +propIsoPipe :: Eq a => Pipe a a Identity () -> [a] -> Bool +propIsoPipe p as = (Pipes.toList $ each as >-> p) == as + +testEncodingA :: Assertion +testEncodingA = + ( ByteString.concat $ Pipes.toList $ + Pipes.yield (4,'c',False) >-> SE.pipe SC.byteStringChar8 encodingA + ) @?= "4,c,false\n" + + propEncodeDecodeIso :: Eq a => (a -> b) -> (b -> Maybe a) -> a -> Bool propEncodeDecodeIso f g a = g (f a) == Just a