mirror of
https://github.com/byteverse/colonnade.git
synced 2026-02-27 21:48:01 +01:00
make siphon build again and pass tests
This commit is contained in:
parent
6b007f8a7e
commit
7aa60cf7d1
@ -1,6 +1,6 @@
|
|||||||
name: siphon
|
name: siphon
|
||||||
version: 0.2
|
version: 0.6
|
||||||
synopsis: Generic types and functions for columnar encoding and decoding
|
synopsis: Encode and decode CSV files
|
||||||
description: Please see README.md
|
description: Please see README.md
|
||||||
homepage: https://github.com/andrewthad/colonnade#readme
|
homepage: https://github.com/andrewthad/colonnade#readme
|
||||||
license: BSD3
|
license: BSD3
|
||||||
@ -26,7 +26,7 @@ library
|
|||||||
Siphon.Internal.Text
|
Siphon.Internal.Text
|
||||||
build-depends:
|
build-depends:
|
||||||
base >= 4.7 && < 5
|
base >= 4.7 && < 5
|
||||||
, colonnade >= 0.4 && < 0.5
|
, colonnade >= 1.1 && < 1.2
|
||||||
, text
|
, text
|
||||||
, bytestring
|
, bytestring
|
||||||
, contravariant
|
, contravariant
|
||||||
@ -53,6 +53,7 @@ test-suite siphon-test
|
|||||||
, pipes
|
, pipes
|
||||||
, HUnit
|
, HUnit
|
||||||
, test-framework-hunit
|
, test-framework-hunit
|
||||||
|
, profunctors
|
||||||
ghc-options: -threaded -rtsopts -with-rtsopts=-N
|
ghc-options: -threaded -rtsopts -with-rtsopts=-N
|
||||||
default-language: Haskell2010
|
default-language: Haskell2010
|
||||||
|
|
||||||
|
|||||||
@ -1,24 +1,38 @@
|
|||||||
{-# LANGUAGE RankNTypes #-}
|
{-# LANGUAGE RankNTypes #-}
|
||||||
{-# LANGUAGE BangPatterns #-}
|
{-# LANGUAGE BangPatterns #-}
|
||||||
|
{-# LANGUAGE ScopedTypeVariables #-}
|
||||||
|
{-# LANGUAGE DeriveFunctor #-}
|
||||||
|
|
||||||
module Siphon.Decoding where
|
module Siphon.Decoding
|
||||||
|
( mkParseError
|
||||||
|
, headlessPipe
|
||||||
|
, indexedPipe
|
||||||
|
, headedPipe
|
||||||
|
, consumeGeneral
|
||||||
|
, pipeGeneral
|
||||||
|
, convertDecodeError
|
||||||
|
, headed
|
||||||
|
, headless
|
||||||
|
, indexed
|
||||||
|
) where
|
||||||
|
|
||||||
import Siphon.Types
|
import Siphon.Types
|
||||||
import Colonnade.Types
|
import Colonnade (Headed(..),Headless(..))
|
||||||
import Siphon.Internal (row,comma)
|
import Siphon.Internal (row,comma)
|
||||||
import Data.Text (Text)
|
import Data.Text (Text)
|
||||||
import Data.ByteString (ByteString)
|
import Data.ByteString (ByteString)
|
||||||
import Pipes (yield,Pipe,Consumer',Producer,await)
|
import Pipes (yield,Pipe,Consumer',Producer,await)
|
||||||
import Data.Vector (Vector)
|
import Data.Vector (Vector)
|
||||||
|
import Data.Functor.Contravariant (Contravariant(..))
|
||||||
|
import Data.Char (chr)
|
||||||
import qualified Data.Vector as Vector
|
import qualified Data.Vector as Vector
|
||||||
import qualified Colonnade.Decoding as Decoding
|
|
||||||
import qualified Data.Attoparsec.ByteString as AttoByteString
|
import qualified Data.Attoparsec.ByteString as AttoByteString
|
||||||
import qualified Data.ByteString.Char8 as ByteString
|
import qualified Data.ByteString.Char8 as ByteString
|
||||||
import qualified Data.Attoparsec.Types as Atto
|
import qualified Data.Attoparsec.Types as Atto
|
||||||
|
|
||||||
mkParseError :: Int -> [String] -> String -> DecodingRowError f content
|
mkParseError :: Int -> [String] -> String -> DecolonnadeRowError f content
|
||||||
mkParseError i ctxs msg = id
|
mkParseError i ctxs msg = id
|
||||||
$ DecodingRowError i
|
$ DecolonnadeRowError i
|
||||||
$ RowErrorParse $ concat
|
$ RowErrorParse $ concat
|
||||||
[ "Contexts: ["
|
[ "Contexts: ["
|
||||||
, concat ctxs
|
, concat ctxs
|
||||||
@ -28,37 +42,37 @@ mkParseError i ctxs msg = id
|
|||||||
]
|
]
|
||||||
|
|
||||||
-- | This is a convenience function for working with @pipes-text@.
|
-- | This is a convenience function for working with @pipes-text@.
|
||||||
-- It will convert a UTF-8 decoding error into a `DecodingRowError`,
|
-- It will convert a UTF-8 decoding error into a `DecolonnadeRowError`,
|
||||||
-- so the pipes can be properly chained together.
|
-- so the pipes can be properly chained together.
|
||||||
convertDecodeError :: String -> Either (Producer ByteString m ()) () -> Maybe (DecodingRowError f c)
|
convertDecodeError :: String -> Either (Producer ByteString m ()) () -> Maybe (DecolonnadeRowError f c)
|
||||||
convertDecodeError encodingName (Left _) = Just (DecodingRowError 0 (RowErrorMalformed encodingName))
|
convertDecodeError encodingName (Left _) = Just (DecolonnadeRowError 0 (RowErrorMalformed encodingName))
|
||||||
convertDecodeError _ (Right ()) = Nothing
|
convertDecodeError _ (Right ()) = Nothing
|
||||||
|
|
||||||
-- | This is seldom useful but is included for completeness.
|
-- | This is seldom useful but is included for completeness.
|
||||||
headlessPipe :: Monad m
|
headlessPipe :: Monad m
|
||||||
=> Siphon c
|
=> Siphon c
|
||||||
-> Decoding Headless c a
|
-> Decolonnade Headless c a
|
||||||
-> Pipe c a m (DecodingRowError Headless c)
|
-> Pipe c a m (DecolonnadeRowError Headless c)
|
||||||
headlessPipe sd decoding = uncheckedPipe requiredLength 0 sd indexedDecoding Nothing
|
headlessPipe sd decoding = uncheckedPipe requiredLength 0 sd indexedDecoding Nothing
|
||||||
where
|
where
|
||||||
indexedDecoding = Decoding.headlessToIndexed decoding
|
indexedDecoding = headlessToIndexed decoding
|
||||||
requiredLength = Decoding.length indexedDecoding
|
requiredLength = decLength indexedDecoding
|
||||||
|
|
||||||
indexedPipe :: Monad m
|
indexedPipe :: Monad m
|
||||||
=> Siphon c
|
=> Siphon c
|
||||||
-> Decoding (Indexed Headless) c a
|
-> Decolonnade (Indexed Headless) c a
|
||||||
-> Pipe c a m (DecodingRowError Headless c)
|
-> Pipe c a m (DecolonnadeRowError Headless c)
|
||||||
indexedPipe sd decoding = do
|
indexedPipe sd decoding = do
|
||||||
e <- consumeGeneral 0 sd mkParseError
|
e <- consumeGeneral 0 sd mkParseError
|
||||||
case e of
|
case e of
|
||||||
Left err -> return err
|
Left err -> return err
|
||||||
Right (firstRow, mleftovers) ->
|
Right (firstRow, mleftovers) ->
|
||||||
let req = Decoding.maxIndex decoding
|
let req = maxIndex decoding
|
||||||
vlen = Vector.length firstRow
|
vlen = Vector.length firstRow
|
||||||
in if vlen < req
|
in if vlen < req
|
||||||
then return (DecodingRowError 0 (RowErrorMinSize req vlen))
|
then return (DecolonnadeRowError 0 (RowErrorMinSize req vlen))
|
||||||
else case Decoding.uncheckedRun decoding firstRow of
|
else case uncheckedRun decoding firstRow of
|
||||||
Left cellErr -> return $ DecodingRowError 0 $ RowErrorDecode cellErr
|
Left cellErr -> return $ DecolonnadeRowError 0 $ RowErrorDecode cellErr
|
||||||
Right a -> do
|
Right a -> do
|
||||||
yield a
|
yield a
|
||||||
uncheckedPipe vlen 1 sd decoding mleftovers
|
uncheckedPipe vlen 1 sd decoding mleftovers
|
||||||
@ -66,15 +80,15 @@ indexedPipe sd decoding = do
|
|||||||
|
|
||||||
headedPipe :: (Monad m, Eq c)
|
headedPipe :: (Monad m, Eq c)
|
||||||
=> Siphon c
|
=> Siphon c
|
||||||
-> Decoding Headed c a
|
-> Decolonnade Headed c a
|
||||||
-> Pipe c a m (DecodingRowError Headed c)
|
-> Pipe c a m (DecolonnadeRowError Headed c)
|
||||||
headedPipe sd decoding = do
|
headedPipe sd decoding = do
|
||||||
e <- consumeGeneral 0 sd mkParseError
|
e <- consumeGeneral 0 sd mkParseError
|
||||||
case e of
|
case e of
|
||||||
Left err -> return err
|
Left err -> return err
|
||||||
Right (headers, mleftovers) ->
|
Right (headers, mleftovers) ->
|
||||||
case Decoding.headedToIndexed headers decoding of
|
case headedToIndexed headers decoding of
|
||||||
Left headingErrs -> return (DecodingRowError 0 (RowErrorHeading headingErrs))
|
Left headingErrs -> return (DecolonnadeRowError 0 (RowErrorHeading headingErrs))
|
||||||
Right indexedDecoding ->
|
Right indexedDecoding ->
|
||||||
let requiredLength = Vector.length headers
|
let requiredLength = Vector.length headers
|
||||||
in uncheckedPipe requiredLength 1 sd indexedDecoding mleftovers
|
in uncheckedPipe requiredLength 1 sd indexedDecoding mleftovers
|
||||||
@ -84,18 +98,18 @@ uncheckedPipe :: Monad m
|
|||||||
=> Int -- ^ expected length of each row
|
=> Int -- ^ expected length of each row
|
||||||
-> Int -- ^ index of first row, usually zero or one
|
-> Int -- ^ index of first row, usually zero or one
|
||||||
-> Siphon c
|
-> Siphon c
|
||||||
-> Decoding (Indexed f) c a
|
-> Decolonnade (Indexed f) c a
|
||||||
-> Maybe c
|
-> Maybe c
|
||||||
-> Pipe c a m (DecodingRowError f c)
|
-> Pipe c a m (DecolonnadeRowError f c)
|
||||||
uncheckedPipe requiredLength ix sd d mleftovers =
|
uncheckedPipe requiredLength ix sd d mleftovers =
|
||||||
pipeGeneral ix sd mkParseError checkedRunWithRow mleftovers
|
pipeGeneral ix sd mkParseError checkedRunWithRow mleftovers
|
||||||
where
|
where
|
||||||
checkedRunWithRow rowIx v =
|
checkedRunWithRow rowIx v =
|
||||||
let vlen = Vector.length v in
|
let vlen = Vector.length v in
|
||||||
if vlen /= requiredLength
|
if vlen /= requiredLength
|
||||||
then Left $ DecodingRowError rowIx
|
then Left $ DecolonnadeRowError rowIx
|
||||||
$ RowErrorSize requiredLength vlen
|
$ RowErrorSize requiredLength vlen
|
||||||
else Decoding.uncheckedRunWithRow rowIx d v
|
else uncheckedRunWithRow rowIx d v
|
||||||
|
|
||||||
consumeGeneral :: Monad m
|
consumeGeneral :: Monad m
|
||||||
=> Int
|
=> Int
|
||||||
@ -157,4 +171,169 @@ awaitSkip f = go where
|
|||||||
a <- await
|
a <- await
|
||||||
if f a then go else return a
|
if f a then go else return a
|
||||||
|
|
||||||
|
-- | Converts the content type of a 'Decolonnade'. The @'Contravariant' f@
|
||||||
|
-- constraint means that @f@ can be 'Headless' but not 'Headed'.
|
||||||
|
contramapContent :: forall c1 c2 f a. Contravariant f => (c2 -> c1) -> Decolonnade f c1 a -> Decolonnade f c2 a
|
||||||
|
contramapContent f = go
|
||||||
|
where
|
||||||
|
go :: forall b. Decolonnade f c1 b -> Decolonnade f c2 b
|
||||||
|
go (DecolonnadePure x) = DecolonnadePure x
|
||||||
|
go (DecolonnadeAp h decode apNext) =
|
||||||
|
DecolonnadeAp (contramap f h) (decode . f) (go apNext)
|
||||||
|
|
||||||
|
headless :: (content -> Either String a) -> Decolonnade Headless content a
|
||||||
|
headless f = DecolonnadeAp Headless f (DecolonnadePure id)
|
||||||
|
|
||||||
|
headed :: content -> (content -> Either String a) -> Decolonnade Headed content a
|
||||||
|
headed h f = DecolonnadeAp (Headed h) f (DecolonnadePure id)
|
||||||
|
|
||||||
|
indexed :: Int -> (content -> Either String a) -> Decolonnade (Indexed Headless) content a
|
||||||
|
indexed ix f = DecolonnadeAp (Indexed ix Headless) f (DecolonnadePure id)
|
||||||
|
|
||||||
|
maxIndex :: forall f c a. Decolonnade (Indexed f) c a -> Int
|
||||||
|
maxIndex = go 0 where
|
||||||
|
go :: forall b. Int -> Decolonnade (Indexed f) c b -> Int
|
||||||
|
go !ix (DecolonnadePure _) = ix
|
||||||
|
go !ix1 (DecolonnadeAp (Indexed ix2 _) decode apNext) =
|
||||||
|
go (max ix1 ix2) apNext
|
||||||
|
|
||||||
|
-- | This function uses 'unsafeIndex' to access
|
||||||
|
-- elements of the 'Vector'.
|
||||||
|
uncheckedRunWithRow ::
|
||||||
|
Int
|
||||||
|
-> Decolonnade (Indexed f) content a
|
||||||
|
-> Vector content
|
||||||
|
-> Either (DecolonnadeRowError f content) a
|
||||||
|
uncheckedRunWithRow i d v = mapLeft (DecolonnadeRowError i . RowErrorDecode) (uncheckedRun d v)
|
||||||
|
|
||||||
|
-- | This function does not check to make sure that the indicies in
|
||||||
|
-- the 'Decolonnade' are in the 'Vector'.
|
||||||
|
uncheckedRun :: forall content a f.
|
||||||
|
Decolonnade (Indexed f) content a
|
||||||
|
-> Vector content
|
||||||
|
-> Either (DecolonnadeCellErrors f content) a
|
||||||
|
uncheckedRun dc v = getEitherWrap (go dc)
|
||||||
|
where
|
||||||
|
go :: forall b.
|
||||||
|
Decolonnade (Indexed f) content b
|
||||||
|
-> EitherWrap (DecolonnadeCellErrors f content) b
|
||||||
|
go (DecolonnadePure b) = EitherWrap (Right b)
|
||||||
|
go (DecolonnadeAp ixed@(Indexed ix h) decode apNext) =
|
||||||
|
let rnext = go apNext
|
||||||
|
content = Vector.unsafeIndex v ix
|
||||||
|
rcurrent = mapLeft (DecolonnadeCellErrors . Vector.singleton . DecolonnadeCellError content ixed) (decode content)
|
||||||
|
in rnext <*> (EitherWrap rcurrent)
|
||||||
|
|
||||||
|
headlessToIndexed :: forall c a.
|
||||||
|
Decolonnade Headless c a -> Decolonnade (Indexed Headless) c a
|
||||||
|
headlessToIndexed = go 0 where
|
||||||
|
go :: forall b. Int -> Decolonnade Headless c b -> Decolonnade (Indexed Headless) c b
|
||||||
|
go !ix (DecolonnadePure a) = DecolonnadePure a
|
||||||
|
go !ix (DecolonnadeAp Headless decode apNext) =
|
||||||
|
DecolonnadeAp (Indexed ix Headless) decode (go (ix + 1) apNext)
|
||||||
|
|
||||||
|
decLength :: forall f c a. Decolonnade f c a -> Int
|
||||||
|
decLength = go 0 where
|
||||||
|
go :: forall b. Int -> Decolonnade f c b -> Int
|
||||||
|
go !a (DecolonnadePure _) = a
|
||||||
|
go !a (DecolonnadeAp _ _ apNext) = go (a + 1) apNext
|
||||||
|
|
||||||
|
-- | Maps over a 'Decolonnade' that expects headers, converting these
|
||||||
|
-- expected headers into the indices of the columns that they
|
||||||
|
-- correspond to.
|
||||||
|
headedToIndexed :: forall content a. Eq content
|
||||||
|
=> Vector content -- ^ Headers in the source document
|
||||||
|
-> Decolonnade Headed content a -- ^ Decolonnade that contains expected headers
|
||||||
|
-> Either (HeadingErrors content) (Decolonnade (Indexed Headed) content a)
|
||||||
|
headedToIndexed v = getEitherWrap . go
|
||||||
|
where
|
||||||
|
go :: forall b. Eq content
|
||||||
|
=> Decolonnade Headed content b
|
||||||
|
-> EitherWrap (HeadingErrors content) (Decolonnade (Indexed Headed) content b)
|
||||||
|
go (DecolonnadePure b) = EitherWrap (Right (DecolonnadePure b))
|
||||||
|
go (DecolonnadeAp hd@(Headed h) decode apNext) =
|
||||||
|
let rnext = go apNext
|
||||||
|
ixs = Vector.elemIndices h v
|
||||||
|
ixsLen = Vector.length ixs
|
||||||
|
rcurrent
|
||||||
|
| ixsLen == 1 = Right (Vector.unsafeIndex ixs 0)
|
||||||
|
| ixsLen == 0 = Left (HeadingErrors (Vector.singleton h) Vector.empty)
|
||||||
|
| otherwise = Left (HeadingErrors Vector.empty (Vector.singleton (h,ixsLen)))
|
||||||
|
in (\ix ap -> DecolonnadeAp (Indexed ix hd) decode ap)
|
||||||
|
<$> EitherWrap rcurrent
|
||||||
|
<*> rnext
|
||||||
|
|
||||||
|
-- | This adds one to the index because text editors consider
|
||||||
|
-- line number to be one-based, not zero-based.
|
||||||
|
prettyError :: (c -> String) -> DecolonnadeRowError f c -> String
|
||||||
|
prettyError toStr (DecolonnadeRowError ix e) = unlines
|
||||||
|
$ ("Decolonnade error on line " ++ show (ix + 1) ++ " of file.")
|
||||||
|
: ("Error Category: " ++ descr)
|
||||||
|
: map (" " ++) errDescrs
|
||||||
|
where (descr,errDescrs) = prettyRowError toStr e
|
||||||
|
|
||||||
|
prettyRowError :: (content -> String) -> RowError f content -> (String, [String])
|
||||||
|
prettyRowError toStr x = case x of
|
||||||
|
RowErrorParse err -> (,) "CSV Parsing"
|
||||||
|
[ "The line could not be parsed into cells correctly."
|
||||||
|
, "Original parser error: " ++ err
|
||||||
|
]
|
||||||
|
RowErrorSize reqLen actualLen -> (,) "Row Length"
|
||||||
|
[ "Expected the row to have exactly " ++ show reqLen ++ " cells."
|
||||||
|
, "The row only has " ++ show actualLen ++ " cells."
|
||||||
|
]
|
||||||
|
RowErrorMinSize reqLen actualLen -> (,) "Row Min Length"
|
||||||
|
[ "Expected the row to have at least " ++ show reqLen ++ " cells."
|
||||||
|
, "The row only has " ++ show actualLen ++ " cells."
|
||||||
|
]
|
||||||
|
RowErrorMalformed enc -> (,) "Text Decolonnade"
|
||||||
|
[ "Tried to decode the input as " ++ enc ++ " text"
|
||||||
|
, "There is a mistake in the encoding of the text."
|
||||||
|
]
|
||||||
|
RowErrorHeading errs -> (,) "Header" (prettyHeadingErrors toStr errs)
|
||||||
|
RowErrorDecode errs -> (,) "Cell Decolonnade" (prettyCellErrors toStr errs)
|
||||||
|
|
||||||
|
prettyCellErrors :: (c -> String) -> DecolonnadeCellErrors f c -> [String]
|
||||||
|
prettyCellErrors toStr (DecolonnadeCellErrors errs) = drop 1 $
|
||||||
|
flip concatMap errs $ \(DecolonnadeCellError content (Indexed ix _) msg) ->
|
||||||
|
let str = toStr content in
|
||||||
|
[ "-----------"
|
||||||
|
, "Column " ++ columnNumToLetters ix
|
||||||
|
, "Original parse error: " ++ msg
|
||||||
|
, "Cell Content Length: " ++ show (Prelude.length str)
|
||||||
|
, "Cell Content: " ++ if null str
|
||||||
|
then "[empty cell]"
|
||||||
|
else str
|
||||||
|
]
|
||||||
|
|
||||||
|
prettyHeadingErrors :: (c -> String) -> HeadingErrors c -> [String]
|
||||||
|
prettyHeadingErrors conv (HeadingErrors missing duplicates) = concat
|
||||||
|
[ concatMap (\h -> ["The header " ++ conv h ++ " was missing."]) missing
|
||||||
|
, concatMap (\(h,n) -> ["The header " ++ conv h ++ " occurred " ++ show n ++ " times."]) duplicates
|
||||||
|
]
|
||||||
|
|
||||||
|
columnNumToLetters :: Int -> String
|
||||||
|
columnNumToLetters i
|
||||||
|
| i >= 0 && i < 25 = [chr (i + 65)]
|
||||||
|
| otherwise = "Beyond Z. Fix this."
|
||||||
|
|
||||||
|
|
||||||
|
newtype EitherWrap a b = EitherWrap
|
||||||
|
{ getEitherWrap :: Either a b
|
||||||
|
} deriving (Functor)
|
||||||
|
|
||||||
|
instance Monoid a => Applicative (EitherWrap a) where
|
||||||
|
pure = EitherWrap . Right
|
||||||
|
EitherWrap (Left a1) <*> EitherWrap (Left a2) = EitherWrap (Left (mappend a1 a2))
|
||||||
|
EitherWrap (Left a1) <*> EitherWrap (Right _) = EitherWrap (Left a1)
|
||||||
|
EitherWrap (Right _) <*> EitherWrap (Left a2) = EitherWrap (Left a2)
|
||||||
|
EitherWrap (Right f) <*> EitherWrap (Right b) = EitherWrap (Right (f b))
|
||||||
|
|
||||||
|
mapLeft :: (a -> b) -> Either a c -> Either b c
|
||||||
|
mapLeft _ (Right a) = Right a
|
||||||
|
mapLeft f (Left a) = Left (f a)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -1,33 +1,28 @@
|
|||||||
module Siphon.Encoding where
|
module Siphon.Encoding where
|
||||||
|
|
||||||
import Siphon.Types
|
import Siphon.Types
|
||||||
import Colonnade.Types
|
import Colonnade (Colonnade,Headed)
|
||||||
import Pipes (Pipe,yield)
|
import Pipes (Pipe,yield)
|
||||||
import qualified Pipes.Prelude as Pipes
|
import qualified Pipes.Prelude as Pipes
|
||||||
import qualified Colonnade.Encoding as Encoding
|
import qualified Colonnade.Encode as E
|
||||||
|
|
||||||
row :: Siphon c
|
row :: Siphon c -> Colonnade f a c -> a -> c
|
||||||
-> Encoding f c a
|
|
||||||
-> a
|
|
||||||
-> c
|
|
||||||
row (Siphon escape intercalate _ _) e =
|
row (Siphon escape intercalate _ _) e =
|
||||||
intercalate . Encoding.runRow escape e
|
intercalate . E.row escape e
|
||||||
|
|
||||||
header :: Siphon c
|
header :: Siphon c -> Colonnade Headed a c -> c
|
||||||
-> Encoding Headed c a
|
|
||||||
-> c
|
|
||||||
header (Siphon escape intercalate _ _) e =
|
header (Siphon escape intercalate _ _) e =
|
||||||
intercalate (Encoding.runHeader escape e)
|
intercalate (E.header escape e)
|
||||||
|
|
||||||
pipe :: Monad m
|
pipe :: Monad m
|
||||||
=> Siphon c
|
=> Siphon c
|
||||||
-> Encoding f c a
|
-> Colonnade f a c
|
||||||
-> Pipe a c m x
|
-> Pipe a c m x
|
||||||
pipe siphon encoding = Pipes.map (row siphon encoding)
|
pipe siphon encoding = Pipes.map (row siphon encoding)
|
||||||
|
|
||||||
headedPipe :: Monad m
|
headedPipe :: Monad m
|
||||||
=> Siphon c
|
=> Siphon c
|
||||||
-> Encoding Headed c a
|
-> Colonnade Headed a c
|
||||||
-> Pipe a c m x
|
-> Pipe a c m x
|
||||||
headedPipe siphon encoding = do
|
headedPipe siphon encoding = do
|
||||||
yield (header siphon encoding)
|
yield (header siphon encoding)
|
||||||
|
|||||||
@ -1,7 +1,13 @@
|
|||||||
|
{-# LANGUAGE GADTs #-}
|
||||||
|
{-# LANGUAGE DeriveFunctor #-}
|
||||||
|
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
|
||||||
|
|
||||||
module Siphon.Types where
|
module Siphon.Types where
|
||||||
|
|
||||||
import Data.Vector (Vector)
|
import Data.Vector (Vector)
|
||||||
import Colonnade.Types (DecodingRowError)
|
import Control.Exception (Exception)
|
||||||
|
import Data.Typeable (Typeable)
|
||||||
|
import qualified Data.Vector as Vector
|
||||||
import qualified Data.Attoparsec.Types as Atto
|
import qualified Data.Attoparsec.Types as Atto
|
||||||
|
|
||||||
newtype Escaped c = Escaped { getEscaped :: c }
|
newtype Escaped c = Escaped { getEscaped :: c }
|
||||||
@ -13,6 +19,77 @@ data Siphon c = Siphon
|
|||||||
, siphonNull :: c -> Bool
|
, siphonNull :: c -> Bool
|
||||||
}
|
}
|
||||||
|
|
||||||
|
data DecolonnadeCellError f content = DecolonnadeCellError
|
||||||
|
{ decodingCellErrorContent :: !content
|
||||||
|
, decodingCellErrorHeader :: !(Indexed f content)
|
||||||
|
, decodingCellErrorMessage :: !String
|
||||||
|
} deriving (Show,Read,Eq)
|
||||||
|
|
||||||
|
-- instance (Show (f content), Typeable content) => Exception (DecolonnadeError f content)
|
||||||
|
|
||||||
|
data Indexed f a = Indexed
|
||||||
|
{ indexedIndex :: !Int
|
||||||
|
, indexedHeading :: !(f a)
|
||||||
|
} deriving (Eq,Ord,Functor,Show,Read)
|
||||||
|
|
||||||
|
newtype DecolonnadeCellErrors f content = DecolonnadeCellErrors
|
||||||
|
{ getDecolonnadeCellErrors :: Vector (DecolonnadeCellError f content)
|
||||||
|
} deriving (Monoid,Show,Read,Eq)
|
||||||
|
|
||||||
|
-- newtype ParseRowError = ParseRowError String
|
||||||
|
|
||||||
|
-- TODO: rewrite the instances for this by hand. They
|
||||||
|
-- currently use FlexibleContexts.
|
||||||
|
data DecolonnadeRowError f content = DecolonnadeRowError
|
||||||
|
{ decodingRowErrorRow :: !Int
|
||||||
|
, decodingRowErrorError :: !(RowError f content)
|
||||||
|
} deriving (Show,Read,Eq)
|
||||||
|
|
||||||
|
-- TODO: rewrite the instances for this by hand. They
|
||||||
|
-- currently use FlexibleContexts.
|
||||||
|
data RowError f content
|
||||||
|
= RowErrorParse !String -- ^ Error occurred parsing the document into cells
|
||||||
|
| RowErrorDecode !(DecolonnadeCellErrors f content) -- ^ Error decoding the content
|
||||||
|
| RowErrorSize !Int !Int -- ^ Wrong number of cells in the row
|
||||||
|
| RowErrorHeading !(HeadingErrors content)
|
||||||
|
| RowErrorMinSize !Int !Int
|
||||||
|
| RowErrorMalformed !String -- ^ Error decoding unicode content
|
||||||
|
deriving (Show,Read,Eq)
|
||||||
|
|
||||||
|
data HeadingErrors content = HeadingErrors
|
||||||
|
{ headingErrorsMissing :: Vector content -- ^ headers that were missing
|
||||||
|
, headingErrorsDuplicate :: Vector (content,Int) -- ^ headers that occurred more than once
|
||||||
|
} deriving (Show,Read,Eq)
|
||||||
|
|
||||||
|
instance (Show content, Typeable content) => Exception (HeadingErrors content)
|
||||||
|
|
||||||
|
instance Monoid (HeadingErrors content) where
|
||||||
|
mempty = HeadingErrors Vector.empty Vector.empty
|
||||||
|
mappend (HeadingErrors a1 b1) (HeadingErrors a2 b2) = HeadingErrors
|
||||||
|
(a1 Vector.++ a2) (b1 Vector.++ b2)
|
||||||
|
|
||||||
|
-- | This just actually a specialization of the free applicative.
|
||||||
|
-- Check out @Control.Applicative.Free@ in the @free@ library to
|
||||||
|
-- learn more about this. The meanings of the fields are documented
|
||||||
|
-- slightly more in the source code. Unfortunately, haddock does not
|
||||||
|
-- play nicely with GADTs.
|
||||||
|
data Decolonnade f content a where
|
||||||
|
DecolonnadePure :: !a -- function
|
||||||
|
-> Decolonnade f content a
|
||||||
|
DecolonnadeAp :: !(f content) -- header
|
||||||
|
-> !(content -> Either String a) -- decoding function
|
||||||
|
-> !(Decolonnade f content (a -> b)) -- next decoding
|
||||||
|
-> Decolonnade f content b
|
||||||
|
|
||||||
|
instance Functor (Decolonnade f content) where
|
||||||
|
fmap f (DecolonnadePure a) = DecolonnadePure (f a)
|
||||||
|
fmap f (DecolonnadeAp h c apNext) = DecolonnadeAp h c ((f .) <$> apNext)
|
||||||
|
|
||||||
|
instance Applicative (Decolonnade f content) where
|
||||||
|
pure = DecolonnadePure
|
||||||
|
DecolonnadePure f <*> y = fmap f y
|
||||||
|
DecolonnadeAp h c y <*> z = DecolonnadeAp h c (flip <$> y <*> z)
|
||||||
|
|
||||||
-- -- | This type is provided for convenience with @pipes-text@
|
-- -- | This type is provided for convenience with @pipes-text@
|
||||||
-- data CsvResult f c
|
-- data CsvResult f c
|
||||||
-- = CsvResultSuccess
|
-- = CsvResultSuccess
|
||||||
|
|||||||
@ -12,26 +12,25 @@ import Data.ByteString (ByteString)
|
|||||||
import Data.Text (Text)
|
import Data.Text (Text)
|
||||||
import GHC.Generics (Generic)
|
import GHC.Generics (Generic)
|
||||||
import Data.Either.Combinators
|
import Data.Either.Combinators
|
||||||
import Colonnade.Types
|
|
||||||
import Siphon.Types
|
import Siphon.Types
|
||||||
import Data.Functor.Identity
|
import Data.Functor.Identity
|
||||||
import Data.Functor.Contravariant (contramap)
|
import Data.Functor.Contravariant (contramap)
|
||||||
import Data.Functor.Contravariant.Divisible (divided,conquered)
|
import Data.Functor.Contravariant.Divisible (divided,conquered)
|
||||||
|
import Colonnade (headed,headless,Colonnade,Headed,Headless)
|
||||||
|
import Data.Profunctor (lmap)
|
||||||
import qualified Data.Text as Text
|
import qualified Data.Text as Text
|
||||||
import qualified Data.ByteString.Builder as Builder
|
import qualified Data.ByteString.Builder as Builder
|
||||||
import qualified Data.ByteString.Lazy as LByteString
|
import qualified Data.ByteString.Lazy as LByteString
|
||||||
import qualified Data.ByteString as ByteString
|
import qualified Data.ByteString as ByteString
|
||||||
import qualified Data.ByteString.Char8 as BC8
|
import qualified Data.ByteString.Char8 as BC8
|
||||||
import qualified Colonnade.Decoding as Decoding
|
import qualified Colonnade as Colonnade
|
||||||
import qualified Colonnade.Encoding as Encoding
|
|
||||||
import qualified Colonnade.Decoding.ByteString.Char8 as CDB
|
|
||||||
import qualified Colonnade.Encoding.ByteString.Char8 as CEB
|
|
||||||
import qualified Colonnade.Decoding.Text as CDT
|
|
||||||
import qualified Colonnade.Encoding.Text as CET
|
|
||||||
import qualified Siphon.Encoding as SE
|
import qualified Siphon.Encoding as SE
|
||||||
import qualified Siphon.Decoding as SD
|
import qualified Siphon.Decoding as SD
|
||||||
import qualified Siphon.Content as SC
|
import qualified Siphon.Content as SC
|
||||||
import qualified Pipes.Prelude as Pipes
|
import qualified Pipes.Prelude as Pipes
|
||||||
|
import qualified Data.Text.Lazy as LText
|
||||||
|
import qualified Data.Text.Lazy.Builder as TBuilder
|
||||||
|
import qualified Data.Text.Lazy.Builder.Int as TBuilder
|
||||||
import Pipes
|
import Pipes
|
||||||
|
|
||||||
main :: IO ()
|
main :: IO ()
|
||||||
@ -122,56 +121,56 @@ fooFromString x = case x of
|
|||||||
decodeFoo :: (c -> String) -> c -> Either String Foo
|
decodeFoo :: (c -> String) -> c -> Either String Foo
|
||||||
decodeFoo f = fooFromString . f
|
decodeFoo f = fooFromString . f
|
||||||
|
|
||||||
decodingA :: Decoding Headless ByteString (Int,Char,Bool)
|
decodingA :: Decolonnade Headless ByteString (Int,Char,Bool)
|
||||||
decodingA = (,,)
|
decodingA = (,,)
|
||||||
<$> Decoding.headless CDB.int
|
<$> SD.headless dbInt
|
||||||
<*> Decoding.headless CDB.char
|
<*> SD.headless dbChar
|
||||||
<*> Decoding.headless CDB.bool
|
<*> SD.headless dbBool
|
||||||
|
|
||||||
decodingB :: Decoding Headed ByteString (Int,Char,Bool)
|
decodingB :: Decolonnade Headed ByteString (Int,Char,Bool)
|
||||||
decodingB = (,,)
|
decodingB = (,,)
|
||||||
<$> Decoding.headed "number" CDB.int
|
<$> SD.headed "number" dbInt
|
||||||
<*> Decoding.headed "letter" CDB.char
|
<*> SD.headed "letter" dbChar
|
||||||
<*> Decoding.headed "boolean" CDB.bool
|
<*> SD.headed "boolean" dbBool
|
||||||
|
|
||||||
encodingA :: Encoding Headless ByteString (Int,Char,Bool)
|
encodingA :: Colonnade Headless (Int,Char,Bool) ByteString
|
||||||
encodingA = contramap tripleToPairs
|
encodingA = mconcat
|
||||||
$ divided (Encoding.headless CEB.int)
|
[ lmap fst3 (headless ebInt)
|
||||||
$ divided (Encoding.headless CEB.char)
|
, lmap snd3 (headless ebChar)
|
||||||
$ divided (Encoding.headless CEB.bool)
|
, lmap thd3 (headless ebBool)
|
||||||
$ conquered
|
]
|
||||||
|
|
||||||
encodingW :: Encoding Headless Text (Int,Char,Bool)
|
encodingW :: Colonnade Headless (Int,Char,Bool) Text
|
||||||
encodingW = contramap tripleToPairs
|
encodingW = mconcat
|
||||||
$ divided (Encoding.headless CET.int)
|
[ lmap fst3 (headless etInt)
|
||||||
$ divided (Encoding.headless CET.char)
|
, lmap snd3 (headless etChar)
|
||||||
$ divided (Encoding.headless CET.bool)
|
, lmap thd3 (headless etBool)
|
||||||
$ conquered
|
]
|
||||||
|
|
||||||
encodingY :: Encoding Headless Text (Foo,Foo,Foo)
|
encodingY :: Colonnade Headless (Foo,Foo,Foo) Text
|
||||||
encodingY = contramap tripleToPairs
|
encodingY = mconcat
|
||||||
$ divided (Encoding.headless $ encodeFoo Text.pack)
|
[ lmap fst3 (headless $ encodeFoo Text.pack)
|
||||||
$ divided (Encoding.headless $ encodeFoo Text.pack)
|
, lmap snd3 (headless $ encodeFoo Text.pack)
|
||||||
$ divided (Encoding.headless $ encodeFoo Text.pack)
|
, lmap thd3 (headless $ encodeFoo Text.pack)
|
||||||
$ conquered
|
]
|
||||||
|
|
||||||
decodingY :: Decoding Headless Text (Foo,Foo,Foo)
|
decodingY :: Decolonnade Headless Text (Foo,Foo,Foo)
|
||||||
decodingY = (,,)
|
decodingY = (,,)
|
||||||
<$> Decoding.headless (decodeFoo Text.unpack)
|
<$> SD.headless (decodeFoo Text.unpack)
|
||||||
<*> Decoding.headless (decodeFoo Text.unpack)
|
<*> SD.headless (decodeFoo Text.unpack)
|
||||||
<*> Decoding.headless (decodeFoo Text.unpack)
|
<*> SD.headless (decodeFoo Text.unpack)
|
||||||
|
|
||||||
encodingB :: Encoding Headed ByteString (Int,Char,Bool)
|
encodingB :: Colonnade Headed (Int,Char,Bool) ByteString
|
||||||
encodingB = contramap tripleToPairs
|
encodingB = mconcat
|
||||||
$ divided (Encoding.headed "number" CEB.int)
|
[ lmap fst3 (headed "number" ebInt)
|
||||||
$ divided (Encoding.headed "letter" CEB.char)
|
, lmap snd3 (headed "letter" ebChar)
|
||||||
$ divided (Encoding.headed "boolean" CEB.bool)
|
, lmap thd3 (headed "boolean" ebBool)
|
||||||
$ conquered
|
]
|
||||||
|
|
||||||
encodingC :: Encoding Headed ByteString (Int,Char,Bool)
|
encodingC :: Colonnade Headed (Int,Char,Bool) ByteString
|
||||||
encodingC = mconcat
|
encodingC = mconcat
|
||||||
[ contramap thd3 $ Encoding.headed "boolean" CEB.bool
|
[ lmap thd3 $ headed "boolean" ebBool
|
||||||
, contramap snd3 $ Encoding.headed "letter" CEB.char
|
, lmap snd3 $ headed "letter" ebChar
|
||||||
]
|
]
|
||||||
|
|
||||||
tripleToPairs :: (a,b,c) -> (a,(b,(c,())))
|
tripleToPairs :: (a,b,c) -> (a,(b,(c,())))
|
||||||
@ -182,8 +181,8 @@ propIsoPipe p as = (Pipes.toList $ each as >-> p) == as
|
|||||||
|
|
||||||
runTestScenario :: (Monoid c, Eq c, Show c)
|
runTestScenario :: (Monoid c, Eq c, Show c)
|
||||||
=> Siphon c
|
=> Siphon c
|
||||||
-> (Siphon c -> Encoding f c (Int,Char,Bool) -> Pipe (Int,Char,Bool) c Identity ())
|
-> (Siphon c -> Colonnade f (Int,Char,Bool) c -> Pipe (Int,Char,Bool) c Identity ())
|
||||||
-> Encoding f c (Int,Char,Bool)
|
-> Colonnade f (Int,Char,Bool) c
|
||||||
-> c
|
-> c
|
||||||
-> Assertion
|
-> Assertion
|
||||||
runTestScenario s p e c =
|
runTestScenario s p e c =
|
||||||
@ -193,8 +192,8 @@ runTestScenario s p e c =
|
|||||||
|
|
||||||
runCustomTestScenario :: (Monoid c, Eq c, Show c)
|
runCustomTestScenario :: (Monoid c, Eq c, Show c)
|
||||||
=> Siphon c
|
=> Siphon c
|
||||||
-> (Siphon c -> Encoding f c a -> Pipe a c Identity ())
|
-> (Siphon c -> Colonnade f a c -> Pipe a c Identity ())
|
||||||
-> Encoding f c a
|
-> Colonnade f a c
|
||||||
-> a
|
-> a
|
||||||
-> c
|
-> c
|
||||||
-> Assertion
|
-> Assertion
|
||||||
@ -225,3 +224,56 @@ snd3 (a,b,c) = b
|
|||||||
thd3 :: (a,b,c) -> c
|
thd3 :: (a,b,c) -> c
|
||||||
thd3 (a,b,c) = c
|
thd3 (a,b,c) = c
|
||||||
|
|
||||||
|
|
||||||
|
dbChar :: ByteString -> Either String Char
|
||||||
|
dbChar b = case BC8.length b of
|
||||||
|
1 -> Right (BC8.head b)
|
||||||
|
0 -> Left "cannot decode Char from empty bytestring"
|
||||||
|
_ -> Left "cannot decode Char from multi-character bytestring"
|
||||||
|
|
||||||
|
dbInt :: ByteString -> Either String Int
|
||||||
|
dbInt b = do
|
||||||
|
(a,bsRem) <- maybe (Left "could not parse int") Right (BC8.readInt b)
|
||||||
|
if ByteString.null bsRem
|
||||||
|
then Right a
|
||||||
|
else Left "found extra characters after int"
|
||||||
|
|
||||||
|
dbBool :: ByteString -> Either String Bool
|
||||||
|
dbBool b
|
||||||
|
| b == BC8.pack "true" = Right True
|
||||||
|
| b == BC8.pack "false" = Right False
|
||||||
|
| otherwise = Left "must be true or false"
|
||||||
|
|
||||||
|
ebChar :: Char -> ByteString
|
||||||
|
ebChar = BC8.singleton
|
||||||
|
|
||||||
|
ebInt :: Int -> ByteString
|
||||||
|
ebInt = LByteString.toStrict
|
||||||
|
. Builder.toLazyByteString
|
||||||
|
. Builder.intDec
|
||||||
|
|
||||||
|
ebBool :: Bool -> ByteString
|
||||||
|
ebBool x = case x of
|
||||||
|
True -> BC8.pack "true"
|
||||||
|
False -> BC8.pack "false"
|
||||||
|
|
||||||
|
ebByteString :: ByteString -> ByteString
|
||||||
|
ebByteString = id
|
||||||
|
|
||||||
|
|
||||||
|
etChar :: Char -> Text
|
||||||
|
etChar = Text.singleton
|
||||||
|
|
||||||
|
etInt :: Int -> Text
|
||||||
|
etInt = LText.toStrict
|
||||||
|
. TBuilder.toLazyText
|
||||||
|
. TBuilder.decimal
|
||||||
|
|
||||||
|
etText :: Text -> Text
|
||||||
|
etText = id
|
||||||
|
|
||||||
|
etBool :: Bool -> Text
|
||||||
|
etBool x = case x of
|
||||||
|
True -> Text.pack "true"
|
||||||
|
False -> Text.pack "false"
|
||||||
|
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user