improve csv decoding
This commit is contained in:
parent
b8da6c0fab
commit
45de414367
@ -1,5 +1,6 @@
|
|||||||
{-# LANGUAGE RankNTypes #-}
|
{-# LANGUAGE RankNTypes #-}
|
||||||
{-# LANGUAGE ScopedTypeVariables #-}
|
{-# LANGUAGE ScopedTypeVariables #-}
|
||||||
|
{-# LANGUAGE BangPatterns #-}
|
||||||
module Colonnade.Decoding where
|
module Colonnade.Decoding where
|
||||||
|
|
||||||
import Colonnade.Internal (EitherWrap(..))
|
import Colonnade.Internal (EitherWrap(..))
|
||||||
@ -24,24 +25,47 @@ headless f = DecodingAp Headless f (DecodingPure id)
|
|||||||
headed :: content -> (content -> Either String a) -> Decoding Headed content a
|
headed :: content -> (content -> Either String a) -> Decoding Headed content a
|
||||||
headed h f = DecodingAp (Headed h) f (DecodingPure id)
|
headed h f = DecodingAp (Headed h) f (DecodingPure id)
|
||||||
|
|
||||||
|
-- | This function uses 'unsafeIndex' to access
|
||||||
|
-- elements of the 'Vector'.
|
||||||
|
uncheckedRunWithRow ::
|
||||||
|
Int
|
||||||
|
-> Decoding (Indexed f) content a
|
||||||
|
-> Vector content
|
||||||
|
-> Either (DecodingRowError f content) a
|
||||||
|
uncheckedRunWithRow i d v = mapLeft (DecodingRowError i . RowErrorDecode) (uncheckedRun d v)
|
||||||
|
|
||||||
-- | This function does not check to make sure that the indicies in
|
-- | This function does not check to make sure that the indicies in
|
||||||
-- the 'Decoding' are in the 'Vector'.
|
-- the 'Decoding' are in the 'Vector'.
|
||||||
uncheckedRun :: forall content a f.
|
uncheckedRun :: forall content a f.
|
||||||
Vector content
|
Decoding (Indexed f) content a
|
||||||
-> Decoding (Indexed f) content a
|
-> Vector content
|
||||||
-> Either (DecodingErrors f content) a
|
-> Either (DecodingCellErrors f content) a
|
||||||
uncheckedRun v = getEitherWrap . go
|
uncheckedRun dc v = getEitherWrap (go dc)
|
||||||
where
|
where
|
||||||
go :: forall b.
|
go :: forall b.
|
||||||
Decoding (Indexed f) content b
|
Decoding (Indexed f) content b
|
||||||
-> EitherWrap (DecodingErrors f content) b
|
-> EitherWrap (DecodingCellErrors f content) b
|
||||||
go (DecodingPure b) = EitherWrap (Right b)
|
go (DecodingPure b) = EitherWrap (Right b)
|
||||||
go (DecodingAp ixed@(Indexed ix h) decode apNext) =
|
go (DecodingAp ixed@(Indexed ix h) decode apNext) =
|
||||||
let rnext = go apNext
|
let rnext = go apNext
|
||||||
content = Vector.unsafeIndex v ix
|
content = Vector.unsafeIndex v ix
|
||||||
rcurrent = mapLeft (DecodingErrors . Vector.singleton . DecodingError content ixed) (decode content)
|
rcurrent = mapLeft (DecodingCellErrors . Vector.singleton . DecodingCellError content ixed) (decode content)
|
||||||
in rnext <*> (EitherWrap rcurrent)
|
in rnext <*> (EitherWrap rcurrent)
|
||||||
|
|
||||||
|
headlessToIndexed :: forall c a.
|
||||||
|
Decoding Headless c a -> Decoding (Indexed Headless) c a
|
||||||
|
headlessToIndexed = go 0 where
|
||||||
|
go :: forall b. Int -> Decoding Headless c b -> Decoding (Indexed Headless) c b
|
||||||
|
go !ix (DecodingPure a) = DecodingPure a
|
||||||
|
go !ix (DecodingAp Headless decode apNext) =
|
||||||
|
DecodingAp (Indexed ix Headless) decode (go (ix + 1) apNext)
|
||||||
|
|
||||||
|
length :: forall f c a. Decoding f c a -> Int
|
||||||
|
length = go 0 where
|
||||||
|
go :: forall b. Int -> Decoding f c b -> Int
|
||||||
|
go !a (DecodingPure _) = a
|
||||||
|
go !a (DecodingAp _ _ apNext) = go (a + 1) apNext
|
||||||
|
|
||||||
-- | Maps over a 'Decoding' that expects headers, converting these
|
-- | Maps over a 'Decoding' that expects headers, converting these
|
||||||
-- expected headers into the indices of the columns that they
|
-- expected headers into the indices of the columns that they
|
||||||
-- correspond to.
|
-- correspond to.
|
||||||
|
|||||||
@ -9,8 +9,10 @@ module Colonnade.Types
|
|||||||
, Headless(..)
|
, Headless(..)
|
||||||
, Indexed(..)
|
, Indexed(..)
|
||||||
, HeadingErrors(..)
|
, HeadingErrors(..)
|
||||||
, DecodingError(..)
|
, DecodingCellError(..)
|
||||||
, DecodingErrors(..)
|
, DecodingRowError(..)
|
||||||
|
, DecodingCellErrors(..)
|
||||||
|
, RowError(..)
|
||||||
) where
|
) where
|
||||||
|
|
||||||
import Data.Vector (Vector)
|
import Data.Vector (Vector)
|
||||||
@ -29,8 +31,8 @@ data Headless a = Headless
|
|||||||
deriving (Eq,Ord,Functor,Show,Read)
|
deriving (Eq,Ord,Functor,Show,Read)
|
||||||
|
|
||||||
data Indexed f a = Indexed
|
data Indexed f a = Indexed
|
||||||
{ indexedIndex :: Int
|
{ indexedIndex :: !Int
|
||||||
, indexedHeading :: f a
|
, indexedHeading :: !(f a)
|
||||||
} deriving (Eq,Ord,Functor,Show,Read)
|
} deriving (Eq,Ord,Functor,Show,Read)
|
||||||
|
|
||||||
data HeadingErrors content = HeadingErrors
|
data HeadingErrors content = HeadingErrors
|
||||||
@ -45,18 +47,31 @@ instance Monoid (HeadingErrors content) where
|
|||||||
mappend (HeadingErrors a1 b1) (HeadingErrors a2 b2) = HeadingErrors
|
mappend (HeadingErrors a1 b1) (HeadingErrors a2 b2) = HeadingErrors
|
||||||
(a1 Vector.++ a2) (b1 Vector.++ b2)
|
(a1 Vector.++ a2) (b1 Vector.++ b2)
|
||||||
|
|
||||||
data DecodingError f content = DecodingError
|
data DecodingCellError f content = DecodingCellError
|
||||||
{ decodingErrorContent :: content
|
{ decodingCellErrorContent :: !content
|
||||||
, decodingErrorHeader :: Indexed f content
|
, decodingCellErrorHeader :: !(Indexed f content)
|
||||||
, decodingErrorMessage :: String
|
, decodingCellErrorMessage :: !String
|
||||||
} deriving (Show,Read)
|
} deriving (Show,Read)
|
||||||
|
|
||||||
-- instance (Show (f content), Typeable content) => Exception (DecodingError f content)
|
-- instance (Show (f content), Typeable content) => Exception (DecodingError f content)
|
||||||
|
|
||||||
newtype DecodingErrors f content = DecodingErrors
|
newtype DecodingCellErrors f content = DecodingCellErrors
|
||||||
{ getDecodingErrors :: Vector (DecodingError f content)
|
{ getDecodingCellErrors :: Vector (DecodingCellError f content)
|
||||||
} deriving (Monoid,Show,Read)
|
} deriving (Monoid,Show,Read)
|
||||||
|
|
||||||
|
-- newtype ParseRowError = ParseRowError String
|
||||||
|
|
||||||
|
data DecodingRowError f content = DecodingRowError
|
||||||
|
{ decodingRowErrorRow :: !Int
|
||||||
|
, decodingRowErrorError :: !(RowError f content)
|
||||||
|
}
|
||||||
|
|
||||||
|
data RowError f content
|
||||||
|
= RowErrorParse !String -- ^ Error occurred parsing the document into cells
|
||||||
|
| RowErrorDecode !(DecodingCellErrors f content) -- ^ Error decoding the content
|
||||||
|
| RowErrorSize !Int !Int -- ^ Wrong number of cells in the row
|
||||||
|
| RowErrorHeading !(HeadingErrors content)
|
||||||
|
|
||||||
-- instance (Show (f content), Typeable content) => Exception (DecodingErrors f content)
|
-- instance (Show (f content), Typeable content) => Exception (DecodingErrors f content)
|
||||||
|
|
||||||
instance Contravariant Headless where
|
instance Contravariant Headless where
|
||||||
|
|||||||
@ -1,13 +1,17 @@
|
|||||||
{-# LANGUAGE RankNTypes #-}
|
{-# LANGUAGE RankNTypes #-}
|
||||||
|
{-# LANGUAGE BangPatterns #-}
|
||||||
|
|
||||||
module Siphon.Decoding where
|
module Siphon.Decoding where
|
||||||
|
|
||||||
import Siphon.Types
|
import Siphon.Types
|
||||||
|
import Colonnade.Types
|
||||||
import Siphon.Internal (row,comma)
|
import Siphon.Internal (row,comma)
|
||||||
import Data.Text (Text)
|
import Data.Text (Text)
|
||||||
import Data.ByteString (ByteString)
|
import Data.ByteString (ByteString)
|
||||||
import Pipes (yield,Pipe,Consumer',Producer,await)
|
import Pipes (yield,Pipe,Consumer',Producer,await)
|
||||||
import Data.Vector (Vector)
|
import Data.Vector (Vector)
|
||||||
|
import qualified Data.Vector as Vector
|
||||||
|
import qualified Colonnade.Decoding as Decoding
|
||||||
import qualified Data.Attoparsec.ByteString as AttoByteString
|
import qualified Data.Attoparsec.ByteString as AttoByteString
|
||||||
import qualified Data.ByteString.Char8 as ByteString
|
import qualified Data.ByteString.Char8 as ByteString
|
||||||
import qualified Data.Attoparsec.Types as Atto
|
import qualified Data.Attoparsec.Types as Atto
|
||||||
@ -24,24 +28,96 @@ byteStringChar8 = SiphonDecoding
|
|||||||
-- -> Vector c
|
-- -> Vector c
|
||||||
-- -> Either DecodingErrors a
|
-- -> Either DecodingErrors a
|
||||||
|
|
||||||
pipe :: Monad m
|
-- decodeVectorPipe ::
|
||||||
=> SiphonDecoding c1 c2
|
-- Monad m
|
||||||
-> Atto.Parser c1 (WithEnd c2)
|
-- => Decoding (Indexed f) c a
|
||||||
-> Pipe c1 (Vector c2) m String
|
-- -> Pipe (Vector c) a m ()
|
||||||
pipe (SiphonDecoding parse isNull) p = go1 where
|
-- decodeVectorPipe
|
||||||
go1 = do
|
|
||||||
|
mkParseError :: Int -> [String] -> String -> DecodingRowError f content
|
||||||
|
mkParseError i ctxs msg = id
|
||||||
|
$ DecodingRowError i
|
||||||
|
$ RowErrorParse $ concat
|
||||||
|
[ "Contexts: ["
|
||||||
|
, concat ctxs
|
||||||
|
, "], Error Message: ["
|
||||||
|
, msg
|
||||||
|
, "]"
|
||||||
|
]
|
||||||
|
|
||||||
|
headlessPipe :: Monad m
|
||||||
|
=> SiphonDecoding c1 c2
|
||||||
|
-> Decoding Headless c2 a
|
||||||
|
-> Pipe c1 a m (DecodingRowError Headless c2)
|
||||||
|
headlessPipe sd decoding = uncheckedPipe requiredLength 0 sd indexedDecoding Nothing
|
||||||
|
where
|
||||||
|
indexedDecoding = Decoding.headlessToIndexed decoding
|
||||||
|
requiredLength = Decoding.length indexedDecoding
|
||||||
|
|
||||||
|
headedPipe :: (Monad m, Eq c2)
|
||||||
|
=> SiphonDecoding c1 c2
|
||||||
|
-> Decoding Headed c2 a
|
||||||
|
-> Pipe c1 a m (DecodingRowError Headed c2)
|
||||||
|
headedPipe sd decoding = do
|
||||||
|
(headers, mleftovers) <- consumeGeneral sd mkParseError
|
||||||
|
case Decoding.headedToIndexed headers decoding of
|
||||||
|
Left headingErrs -> return (DecodingRowError 0 (RowErrorHeading headingErrs))
|
||||||
|
Right indexedDecoding ->
|
||||||
|
let requiredLength = Decoding.length indexedDecoding
|
||||||
|
in uncheckedPipe requiredLength 1 sd indexedDecoding mleftovers
|
||||||
|
|
||||||
|
|
||||||
|
uncheckedPipe :: Monad m
|
||||||
|
=> Int -- ^ expected length of each row
|
||||||
|
-> Int -- ^ index of first row, usually zero or one
|
||||||
|
-> SiphonDecoding c1 c2
|
||||||
|
-> Decoding (Indexed f) c2 a
|
||||||
|
-> Maybe c1
|
||||||
|
-> Pipe c1 a m (DecodingRowError f c2)
|
||||||
|
uncheckedPipe requiredLength ix sd d mleftovers =
|
||||||
|
pipeGeneral ix sd mkParseError checkedRunWithRow mleftovers
|
||||||
|
where
|
||||||
|
checkedRunWithRow rowIx v =
|
||||||
|
let vlen = Vector.length v in
|
||||||
|
if vlen /= requiredLength
|
||||||
|
then Left $ DecodingRowError rowIx
|
||||||
|
$ RowErrorSize requiredLength vlen
|
||||||
|
else Decoding.uncheckedRunWithRow rowIx d v
|
||||||
|
|
||||||
|
consumeGeneral :: Monad m
|
||||||
|
=> SiphonDecoding c1 c2
|
||||||
|
-> (Int -> [String] -> String -> e)
|
||||||
|
-> Consumer' c1 m (Vector c2, Maybe c1)
|
||||||
|
consumeGeneral = error "ahh"
|
||||||
|
|
||||||
|
pipeGeneral :: Monad m
|
||||||
|
=> Int -- ^ index of first row, usually zero or one
|
||||||
|
-> SiphonDecoding c1 c2
|
||||||
|
-> (Int -> [String] -> String -> e)
|
||||||
|
-> (Int -> Vector c2 -> Either e a)
|
||||||
|
-> Maybe c1 -- ^ leftovers that should be handled first
|
||||||
|
-> Pipe c1 a m e
|
||||||
|
pipeGeneral initIx (SiphonDecoding parse isNull) wrapParseError decodeRow mleftovers =
|
||||||
|
case mleftovers of
|
||||||
|
Nothing -> go1 initIx
|
||||||
|
Just leftovers -> handleResult initIx (parse leftovers)
|
||||||
|
where
|
||||||
|
go1 !ix = do
|
||||||
c1 <- awaitSkip isNull
|
c1 <- awaitSkip isNull
|
||||||
handleResult (parse c1)
|
handleResult ix (parse c1)
|
||||||
go2 c1 = handleResult (parse c1)
|
go2 !ix c1 = handleResult ix (parse c1)
|
||||||
go3 k = do
|
go3 !ix k = do
|
||||||
c1 <- awaitSkip isNull
|
c1 <- awaitSkip isNull
|
||||||
handleResult (k c1)
|
handleResult ix (k c1)
|
||||||
handleResult r = case r of
|
handleResult !ix r = case r of
|
||||||
Atto.Fail _ _ _ -> error "ahh"
|
Atto.Fail _ ctxs msg -> return $ wrapParseError ix ctxs msg
|
||||||
Atto.Done c1 v -> do
|
Atto.Done c1 v -> do
|
||||||
yield v
|
case decodeRow ix v of
|
||||||
if isNull c1 then go1 else go2 c1
|
Left err -> return err
|
||||||
Atto.Partial k -> go3 k
|
Right r -> do
|
||||||
|
yield r
|
||||||
|
if isNull c1 then go1 ix else go2 ix c1
|
||||||
|
Atto.Partial k -> go3 ix k
|
||||||
|
|
||||||
awaitSkip :: Monad m
|
awaitSkip :: Monad m
|
||||||
=> (a -> Bool)
|
=> (a -> Bool)
|
||||||
|
|||||||
@ -18,10 +18,10 @@ data SiphonDecoding c1 c2 = SiphonDecoding
|
|||||||
, siphonDecodingNull :: c1 -> Bool
|
, siphonDecodingNull :: c1 -> Bool
|
||||||
}
|
}
|
||||||
|
|
||||||
data WithEnd c = WithEnd
|
-- data WithEnd c = WithEnd
|
||||||
{ withEndEnded :: Bool
|
-- { withEndEnded :: !Bool
|
||||||
, withEndContent :: c
|
-- , withEndContent :: !c
|
||||||
}
|
-- }
|
||||||
|
|
||||||
-- data SiphonDecodingError
|
-- data SiphonDecodingError
|
||||||
-- { clarify
|
-- { clarify
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user