add geolite csv parsing, improve tests, add decoding combinators

This commit is contained in:
Andrew Martin 2016-07-12 17:47:15 -04:00
parent 4d591380a5
commit 4bcf860fbc
25 changed files with 681 additions and 53 deletions

View File

@ -20,6 +20,7 @@ library
Colonnade.Encoding.Text
Colonnade.Encoding.ByteString.Char8
Colonnade.Decoding
Colonnade.Decoding.Text
Colonnade.Decoding.ByteString.Char8
Colonnade.Internal
Colonnade.Internal.Ap

View File

@ -3,7 +3,7 @@
{-# LANGUAGE BangPatterns #-}
module Colonnade.Decoding where
import Colonnade.Internal (EitherWrap(..))
import Colonnade.Internal (EitherWrap(..),mapLeft)
import Colonnade.Types
import Data.Functor.Contravariant
import Data.Vector (Vector)
@ -101,14 +101,3 @@ headedToIndexed v = getEitherWrap . go
<$> EitherWrap rcurrent
<*> rnext
eitherMonoidAp :: Monoid a => Either a (b -> c) -> Either a b -> Either a c
eitherMonoidAp = go where
go (Left a1) (Left a2) = Left (mappend a1 a2)
go (Left a1) (Right _) = Left a1
go (Right _) (Left a2) = Left a2
go (Right f) (Right b) = Right (f b)
mapLeft :: (a -> b) -> Either a c -> Either b c
mapLeft _ (Right a) = Right a
mapLeft f (Left a) = Left (f a)

View File

@ -0,0 +1,47 @@
module Colonnade.Decoding.Text where
import Prelude hiding (map)
import Data.Text (Text)
import qualified Data.Text as Text
import qualified Data.Text.Read as TextRead
char :: Text -> Either String Char
char t = case Text.length t of
1 -> Right (Text.head t)
0 -> Left "cannot decode Char from empty text"
_ -> Left "cannot decode Char from multi-character text"
text :: Text -> Either String Text
text = Right
int :: Text -> Either String Int
int t = do
(a,tRem) <- TextRead.decimal t
if Text.null tRem
then Right a
else Left "found extra characters after int"
trueFalse :: Text -> Text -> Text -> Either String Bool
trueFalse t f txt
| txt == t = Right True
| txt == f = Right False
| otherwise = Left $ concat
["must be [", Text.unpack t, "] or [", Text.unpack f, "]"]
-- | This refers to the 'TextRead.Reader' from @Data.Text.Read@, not
-- to the @Reader@ monad.
fromReader :: TextRead.Reader a -> Text -> Either String a
fromReader f t = do
(a,tRem) <- f t
if Text.null tRem
then Right a
else Left "found extra characters at end of text"
optional :: (Text -> Either String a) -> Text -> Either String (Maybe a)
optional f t = if Text.null t
then Right Nothing
else fmap Just (f t)
map :: (a -> b) -> (Text -> Either String a) -> Text -> Either String b
map f g t = fmap f (g t)

View File

@ -17,3 +17,8 @@ int = LText.toStrict
text :: Text -> Text
text = id
bool :: Bool -> Text
bool x = case x of
True -> Text.pack "true"
False -> Text.pack "false"

View File

@ -12,3 +12,6 @@ instance Monoid a => Applicative (EitherWrap a) where
EitherWrap (Right _) <*> EitherWrap (Left a2) = EitherWrap (Left a2)
EitherWrap (Right f) <*> EitherWrap (Right b) = EitherWrap (Right (f b))
mapLeft :: (a -> b) -> Either a c -> Either b c
mapLeft _ (Right a) = Right a
mapLeft f (Left a) = Left (f a)

View File

@ -38,7 +38,7 @@ data Indexed f a = Indexed
data HeadingErrors content = HeadingErrors
{ headingErrorsMissing :: Vector content -- ^ headers that were missing
, headingErrorsDuplicate :: Vector (content,Int) -- ^ headers that occurred more than once
} deriving (Show,Read)
} deriving (Show,Read,Eq)
instance (Show content, Typeable content) => Exception (HeadingErrors content)
@ -51,27 +51,32 @@ data DecodingCellError f content = DecodingCellError
{ decodingCellErrorContent :: !content
, decodingCellErrorHeader :: !(Indexed f content)
, decodingCellErrorMessage :: !String
} deriving (Show,Read)
} deriving (Show,Read,Eq)
-- instance (Show (f content), Typeable content) => Exception (DecodingError f content)
newtype DecodingCellErrors f content = DecodingCellErrors
{ getDecodingCellErrors :: Vector (DecodingCellError f content)
} deriving (Monoid,Show,Read)
} deriving (Monoid,Show,Read,Eq)
-- newtype ParseRowError = ParseRowError String
-- TODO: rewrite the instances for this by hand. They
-- currently use FlexibleContexts.
data DecodingRowError f content = DecodingRowError
{ decodingRowErrorRow :: !Int
, decodingRowErrorError :: !(RowError f content)
}
} deriving (Show,Read,Eq)
-- TODO: rewrite the instances for this by hand. They
-- currently use FlexibleContexts.
data RowError f content
= RowErrorParse !String -- ^ Error occurred parsing the document into cells
| RowErrorDecode !(DecodingCellErrors f content) -- ^ Error decoding the content
| RowErrorSize !Int !Int -- ^ Wrong number of cells in the row
| RowErrorHeading !(HeadingErrors content)
| RowErrorMinSize !Int !Int
deriving (Show,Read,Eq)
-- instance (Show (f content), Typeable content) => Exception (DecodingErrors f content)

30
geolite-csv/LICENSE Normal file
View File

@ -0,0 +1,30 @@
Copyright Andrew Martin (c) 2016
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following
disclaimer in the documentation and/or other materials provided
with the distribution.
* Neither the name of Andrew Martin nor the names of other
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

2
geolite-csv/Setup.hs Normal file
View File

@ -0,0 +1,2 @@
import Distribution.Simple
main = defaultMain

View File

@ -0,0 +1,11 @@
network,geoname_id,registered_country_geoname_id,represented_country_geoname_id,is_anonymous_proxy,is_satellite_provider,postal_code,latitude,longitude,accuracy_radius
24.165.56.0/22,5848280,6252001,,0,0,96746,22.0837,-159.3553,10
78.146.173.128/25,2655583,2635167,,0,0,DL14,54.6500,-1.6667,20
121.211.108.0/23,2160386,2077456,,0,0,2040,-33.8833,151.1500,5
69.74.43.16/30,6252001,6252001,,0,0,,37.7510,-97.8220,1000
77.128.35.136/30,3034803,3017382,,0,0,57450,49.0667,6.8333,20
90.54.234.0/24,2977062,3017382,,0,0,49320,47.3944,-0.4357,50
77.193.41.175/32,3018587,3017382,,0,0,78810,48.8700,1.9740,1
58.188.32.0/24,1861060,1861060,,0,0,,35.6900,139.6900,500
87.81.232.0/24,2635167,2635167,,0,0,,51.4964,-0.1224,200
88.191.56.0/22,2988507,3017382,,0,0,75001,48.8667,2.3333,500
1 network geoname_id registered_country_geoname_id represented_country_geoname_id is_anonymous_proxy is_satellite_provider postal_code latitude longitude accuracy_radius
2 24.165.56.0/22 5848280 6252001 0 0 96746 22.0837 -159.3553 10
3 78.146.173.128/25 2655583 2635167 0 0 DL14 54.6500 -1.6667 20
4 121.211.108.0/23 2160386 2077456 0 0 2040 -33.8833 151.1500 5
5 69.74.43.16/30 6252001 6252001 0 0 37.7510 -97.8220 1000
6 77.128.35.136/30 3034803 3017382 0 0 57450 49.0667 6.8333 20
7 90.54.234.0/24 2977062 3017382 0 0 49320 47.3944 -0.4357 50
8 77.193.41.175/32 3018587 3017382 0 0 78810 48.8700 1.9740 1
9 58.188.32.0/24 1861060 1861060 0 0 35.6900 139.6900 500
10 87.81.232.0/24 2635167 2635167 0 0 51.4964 -0.1224 200
11 88.191.56.0/22 2988507 3017382 0 0 75001 48.8667 2.3333 500

View File

@ -0,0 +1,21 @@
geoname_id,locale_code,continent_code,continent_name,country_iso_code,country_name,subdivision_1_iso_code,subdivision_1_name,subdivision_2_iso_code,subdivision_2_name,city_name,metro_code,time_zone
2653810,en,EU,Europe,GB,"United Kingdom",SCT,Scotland,GLG,"Glasgow City",Cardonald,,Europe/London
2832529,en,EU,Europe,DE,Germany,RP,Rheinland-Pfalz,,,Siefersheim,,Europe/Berlin
2885499,en,EU,Europe,DE,Germany,MV,Mecklenburg-Vorpommern,,,Koerchow,,Europe/Berlin
550870,en,EU,Europe,RU,Russia,NIZ,"Nizhegorodskaya Oblast'",,,Khabarskoye,,Europe/Moscow
766583,en,EU,Europe,PL,Poland,LU,"Lublin Voivodeship",,,Leczna,,Europe/Warsaw
2608246,en,EU,Europe,AT,Austria,1,Burgenland,,,"Neuhaus am Klausenbach",,Europe/Vienna
5121765,en,NA,"North America",US,"United States",NY,"New York",,,Ilion,526,America/New_York
2935825,en,EU,Europe,DE,Germany,NW,"North Rhine-Westphalia",,,Dormagen,,Europe/Berlin
3165189,en,EU,Europe,IT,Italy,36,"Friuli Venezia Giulia",UD,"Provincia di Udine",Tricesimo,,Europe/Rome
4564070,en,NA,"North America",PR,"Puerto Rico",,,,,Culebra,,America/Puerto_Rico
2993759,en,EU,Europe,FR,France,U,"Provence-Alpes-Côte d'Azur",13,Bouches-du-Rhône,Miramas-le-Vieux,,Europe/Paris
5861117,en,NA,"North America",US,"United States",AK,Alaska,,,"Dutch Harbor",743,America/Adak
4375229,en,NA,"North America",US,"United States",MO,Missouri,,,Ashland,604,America/Chicago
2946980,en,EU,Europe,DE,Germany,SN,Saxony,,,Boehlen,,Europe/Berlin
3156470,en,EU,Europe,NO,Norway,02,Akershus,,,Frogner,,Europe/Oslo
3166193,en,EU,Europe,IT,Italy,36,"Friuli Venezia Giulia",GO,"Provincia di Gorizia",Staranzano,,Europe/Rome
4913742,en,NA,"North America",US,"United States",IL,Illinois,,,Tiskilwa,675,America/Chicago
4853511,en,NA,"North America",US,"United States",IA,Iowa,,,Dayton,679,America/Chicago
480876,en,EU,Europe,RU,Russia,ROS,Rostov,,,Tsimlyansk,,Europe/Moscow
3000119,en,EU,Europe,FR,France,89,Yonne,,,"Les Ormes",,Europe/Paris
1 geoname_id locale_code continent_code continent_name country_iso_code country_name subdivision_1_iso_code subdivision_1_name subdivision_2_iso_code subdivision_2_name city_name metro_code time_zone
2 2653810 en EU Europe GB United Kingdom SCT Scotland GLG Glasgow City Cardonald Europe/London
3 2832529 en EU Europe DE Germany RP Rheinland-Pfalz Siefersheim Europe/Berlin
4 2885499 en EU Europe DE Germany MV Mecklenburg-Vorpommern Koerchow Europe/Berlin
5 550870 en EU Europe RU Russia NIZ Nizhegorodskaya Oblast' Khabarskoye Europe/Moscow
6 766583 en EU Europe PL Poland LU Lublin Voivodeship Leczna Europe/Warsaw
7 2608246 en EU Europe AT Austria 1 Burgenland Neuhaus am Klausenbach Europe/Vienna
8 5121765 en NA North America US United States NY New York Ilion 526 America/New_York
9 2935825 en EU Europe DE Germany NW North Rhine-Westphalia Dormagen Europe/Berlin
10 3165189 en EU Europe IT Italy 36 Friuli Venezia Giulia UD Provincia di Udine Tricesimo Europe/Rome
11 4564070 en NA North America PR Puerto Rico Culebra America/Puerto_Rico
12 2993759 en EU Europe FR France U Provence-Alpes-Côte d'Azur 13 Bouches-du-Rhône Miramas-le-Vieux Europe/Paris
13 5861117 en NA North America US United States AK Alaska Dutch Harbor 743 America/Adak
14 4375229 en NA North America US United States MO Missouri Ashland 604 America/Chicago
15 2946980 en EU Europe DE Germany SN Saxony Boehlen Europe/Berlin
16 3156470 en EU Europe NO Norway 02 Akershus Frogner Europe/Oslo
17 3166193 en EU Europe IT Italy 36 Friuli Venezia Giulia GO Provincia di Gorizia Staranzano Europe/Rome
18 4913742 en NA North America US United States IL Illinois Tiskilwa 675 America/Chicago
19 4853511 en NA North America US United States IA Iowa Dayton 679 America/Chicago
20 480876 en EU Europe RU Russia ROS Rostov Tsimlyansk Europe/Moscow
21 3000119 en EU Europe FR France 89 Yonne Les Ormes Europe/Paris

View File

@ -0,0 +1,21 @@
geoname_id,locale_code,continent_code,continent_name,country_iso_code,country_name,subdivision_1_iso_code,subdivision_1_name,subdivision_2_iso_code,subdivision_2_name,city_name,metro_code,time_zone
1260633,ja,AS,"アジア",IN,"インド",AP,"アーンドラ・プラデーシュ州",,,,,Asia/Kolkata
4765167,ja,NA,"北アメリカ",US,"アメリカ合衆国",VA,"バージニア州",,,,573,America/New_York
2703330,ja,EU,"ヨーロッパ",SE,"スウェーデン王国",Z,,,,,,Europe/Stockholm
535886,ja,EU,"ヨーロッパ",RU,"ロシア",STA,,,,,,Europe/Moscow
2989001,ja,EU,"ヨーロッパ",FR,"フランス共和国",F,,28,,,,Europe/Paris
3183178,ja,EU,"ヨーロッパ",IT,"イタリア共和国",75,"プッリャ州",BA,,"アルタムーラ",,Europe/Rome
3012956,ja,EU,"ヨーロッパ",FR,"フランス共和国",67,,,,,,Europe/Paris
4189157,ja,NA,"北アメリカ",US,"アメリカ合衆国",GA,"ジョージア州",,,,524,America/New_York
2758965,ja,EU,"ヨーロッパ",NL,"オランダ王国",ZE,,,,,,Europe/Amsterdam
3570412,ja,NA,"北アメリカ",MQ,"マルティニーク島",,,,,,,America/Martinique
3095604,ja,EU,"ヨーロッパ",PL,"ポーランド共和国",MZ,"マゾフシェ県",,,,,Europe/Warsaw
3070865,ja,EU,"ヨーロッパ",CZ,"チェコ共和国",ST,"中央ボヘミア州",,,,,Europe/Prague
2636062,ja,EU,"ヨーロッパ",GB,"イギリス",ENG,"イングランド",SRY,,,,Europe/London
3019338,ja,EU,"ヨーロッパ",FR,"フランス共和国",57,,,,,,Europe/Paris
2865603,ja,EU,"ヨーロッパ",DE,"ドイツ連邦共和国",BY,"バイエルン州",,,"ノイエンマルクト",,Europe/Berlin
2930628,ja,EU,"ヨーロッパ",DE,"ドイツ連邦共和国",HE,,,,,,Europe/Berlin
2976283,ja,EU,"ヨーロッパ",FR,"フランス共和国",01,,,,,,Europe/Paris
4062424,ja,NA,"北アメリカ",US,"アメリカ合衆国",AL,"アラバマ州",,,,575,America/Chicago
4461574,ja,NA,"北アメリカ",US,"アメリカ合衆国",NC,"ノースカロライナ州",,,"コンコード",517,America/New_York
1279945,ja,AS,"アジア",CN,"中国",62,,,,"酒泉市",,Asia/Shanghai
1 geoname_id locale_code continent_code continent_name country_iso_code country_name subdivision_1_iso_code subdivision_1_name subdivision_2_iso_code subdivision_2_name city_name metro_code time_zone
2 1260633 ja AS アジア IN インド AP アーンドラ・プラデーシュ州 Asia/Kolkata
3 4765167 ja NA 北アメリカ US アメリカ合衆国 VA バージニア州 573 America/New_York
4 2703330 ja EU ヨーロッパ SE スウェーデン王国 Z Europe/Stockholm
5 535886 ja EU ヨーロッパ RU ロシア STA Europe/Moscow
6 2989001 ja EU ヨーロッパ FR フランス共和国 F 28 Europe/Paris
7 3183178 ja EU ヨーロッパ IT イタリア共和国 75 プッリャ州 BA アルタムーラ Europe/Rome
8 3012956 ja EU ヨーロッパ FR フランス共和国 67 Europe/Paris
9 4189157 ja NA 北アメリカ US アメリカ合衆国 GA ジョージア州 524 America/New_York
10 2758965 ja EU ヨーロッパ NL オランダ王国 ZE Europe/Amsterdam
11 3570412 ja NA 北アメリカ MQ マルティニーク島 America/Martinique
12 3095604 ja EU ヨーロッパ PL ポーランド共和国 MZ マゾフシェ県 Europe/Warsaw
13 3070865 ja EU ヨーロッパ CZ チェコ共和国 ST 中央ボヘミア州 Europe/Prague
14 2636062 ja EU ヨーロッパ GB イギリス ENG イングランド SRY Europe/London
15 3019338 ja EU ヨーロッパ FR フランス共和国 57 Europe/Paris
16 2865603 ja EU ヨーロッパ DE ドイツ連邦共和国 BY バイエルン州 ノイエンマルクト Europe/Berlin
17 2930628 ja EU ヨーロッパ DE ドイツ連邦共和国 HE Europe/Berlin
18 2976283 ja EU ヨーロッパ FR フランス共和国 01 Europe/Paris
19 4062424 ja NA 北アメリカ US アメリカ合衆国 AL アラバマ州 575 America/Chicago
20 4461574 ja NA 北アメリカ US アメリカ合衆国 NC ノースカロライナ州 コンコード 517 America/New_York
21 1279945 ja AS アジア CN 中国 62 酒泉市 Asia/Shanghai

View File

@ -0,0 +1,51 @@
name: geolite-csv
version: 0.1.0
synopsis: Initial project template from stack
description: Please see README.md
homepage: https://github.com/andrewthad/colonnade
license: BSD3
license-file: LICENSE
author: Andrew Martin
maintainer: andrew.thaddeus@gmail.com
copyright: 2016 Andrew Martin
category: web
build-type: Simple
-- extra-source-files:
cabal-version: >=1.10
library
hs-source-dirs: src
exposed-modules:
Geolite.Types
Geolite.Csv
build-depends:
base >= 4.7 && < 5
, colonnade
, siphon
, ip >= 0.8.4
, text
, pipes
default-language: Haskell2010
test-suite geolite-csv-test
type: exitcode-stdio-1.0
hs-source-dirs: test
main-is: Spec.hs
build-depends:
base
, geolite-csv
, siphon
, colonnade
, test-framework
, text
, pipes
, HUnit
, test-framework-hunit
, pipes-bytestring
, pipes-text
ghc-options: -threaded -rtsopts -with-rtsopts=-N
default-language: Haskell2010
source-repository head
type: git
location: https://github.com/andrewthad/colonnade

View File

@ -0,0 +1,54 @@
{-# LANGUAGE OverloadedStrings #-}
module Geolite.Csv where
import Data.Text (Text)
import Pipes (Pipe)
import Colonnade.Types
import Geolite.Types
import qualified Data.Text as Text
import qualified Net.IPv4.Range.Text as IPv4RangeText
import qualified Data.Text.Read as TextRead
import qualified Siphon.Decoding as SD
import qualified Siphon.Content as SC
import qualified Colonnade.Decoding.Text as CDT
import qualified Colonnade.Decoding as CD
cities :: Monad m => Pipe Text City m (DecodingRowError Headed Text)
cities = SD.headedPipe SC.text decodingCity
blocks :: Monad m => Pipe Text Block m (DecodingRowError Headed Text)
blocks = SD.headedPipe SC.text decodingBlock
decodingCity :: Decoding Headed Text City
decodingCity = City
<$> fmap GeonameId (CD.headed "geoname_id" CDT.int)
<*> CD.headed "locale_code" CDT.text
<*> CD.headed "continent_code" CDT.text
<*> CD.headed "continent_name" CDT.text
<*> CD.headed "country_iso_code" CDT.text
<*> CD.headed "country_name" CDT.text
<*> CD.headed "subdivision_1_iso_code" CDT.text
<*> CD.headed "subdivision_1_name" CDT.text
<*> CD.headed "subdivision_2_iso_code" CDT.text
<*> CD.headed "subdivision_2_name" CDT.text
<*> CD.headed "metro_code" (CDT.optional CDT.int)
<*> CD.headed "time_zone" CDT.text
decodingBlock :: Decoding Headed Text Block
decodingBlock = Block
<$> CD.headed "network" IPv4RangeText.decodeEither
<*> CD.headed "geoname_id" (CDT.map GeonameId CDT.int)
<*> CD.headed "registered_country_geoname_id"
(CDT.map GeonameId CDT.int)
<*> CD.headed "represented_country_geoname_id"
(CDT.optional $ CDT.map GeonameId CDT.int)
<*> CD.headed "is_anonymous_proxy" (CDT.trueFalse "1" "0")
<*> CD.headed "is_satellite_provider" (CDT.trueFalse "1" "0")
<*> CD.headed "postal_code" CDT.text
<*> CD.headed "latitude" (CDT.fromReader TextRead.rational)
<*> CD.headed "longitude" (CDT.fromReader TextRead.rational)
<*> CD.headed "accuracy_radius" CDT.int

View File

@ -0,0 +1,42 @@
module Geolite.Types where
import Net.Types (IPv4Range)
import Data.Text (Text)
import Data.Fixed
data E4
instance HasResolution E4 where
resolution _ = 4
newtype GeonameId = GeonameId { getGeonameId :: Int }
deriving (Show,Read,Eq,Ord)
data City = City
{ cityGeonameId :: GeonameId
, cityLocaleCode :: Text
, cityContinentCode :: Text
, cityContinentName :: Text
, cityCountryIsoCode :: Text
, cityCountryName :: Text
, citySubdivision1IsoCode :: Text
, citySubdivision1Name :: Text
, citySubdivision2IsoCode :: Text
, citySubdivision2Name :: Text
, cityMetroCode :: Maybe Int
, cityTimeZone :: Text
} deriving (Show,Read,Eq,Ord)
data Block = Block
{ blockNetwork :: IPv4Range
, blockGeonameId :: GeonameId
, blockRegisteredCountryGeonameId :: GeonameId
, blockRepresentedCountryGeonameId :: Maybe GeonameId
, blockIsAnonymousProxy :: Bool
, blockIsSatelliteProvider :: Bool
, blockPostalCode :: Text
, blockLatitude :: Fixed E4
, blockLongitude :: Fixed E4
, blockAccuracyRadius :: Int
} deriving (Show,Read,Eq,Ord)

46
geolite-csv/test/Spec.hs Normal file
View File

@ -0,0 +1,46 @@
{-# LANGUAGE OverloadedStrings #-}
module Main (main) where
import Test.HUnit (Assertion,(@?=))
import Test.Framework (defaultMain, testGroup, Test)
import Test.Framework.Providers.HUnit (testCase)
import Geolite.Csv (cities,blocks)
import Data.Text (Text)
import Colonnade.Types
import Siphon.Types
import Data.Functor.Identity
import System.IO (withFile,IOMode(ReadMode))
import qualified Data.Text as Text
import qualified Pipes.Prelude as Pipes
import qualified Pipes.ByteString as PB
import qualified Pipes.Text.Encoding as PT
import qualified Siphon.Decoding as SD
import Pipes
main :: IO ()
main = defaultMain tests
tests :: [Test]
tests =
[ testGroup "Geolite CSV Decoding"
[ testCase "Network Blocks" $ streamFileWith
"data/GeoLite2-City-Blocks-IPv4.small.csv"
blocks
, testCase "English City Locations" $ streamFileWith
"data/GeoLite2-City-Locations-en.small.csv"
cities
]
]
streamFileWith ::
String
-> Pipe Text a IO (DecodingRowError Headed Text)
-> Assertion
streamFileWith filename decodingPipe = do
r <- withFile filename ReadMode $ \h -> runEffect $
fmap SD.csvResultFromEither (PT.decode (PT.utf8 . PT.eof) $ PB.fromHandle h)
>-> fmap SD.csvResultFromDecodingRowError decodingPipe
>-> Pipes.drain
r @?= CsvResultSuccess

View File

@ -1,5 +1,5 @@
name: reflex-dom-colonnade
version: 0.3
version: 0.4
synopsis: Use colonnade with reflex-dom
description: Please see README.md
homepage: https://github.com/andrewthad/colonnade#readme

View File

@ -1,3 +1,5 @@
{-# LANGUAGE DeriveFunctor #-}
module Reflex.Dom.Colonnade where
import Colonnade.Types
@ -18,7 +20,10 @@ cell = Cell Map.empty
data Cell m b = Cell
{ cellAttrs :: !(Map String String)
, cellContents :: !(m b)
}
} deriving (Functor)
-- instance Functor (Cell m) where
-- fmap f (a
basic :: (MonadWidget t m, Foldable f)
=> Map String String -- ^ Table element attributes
@ -29,7 +34,7 @@ basic tableAttrs as encoding = do
elAttr "table" tableAttrs $ do
theadBuild encoding
el "tbody" $ forM_ as $ \a -> do
el "tr" $ mapM_ (Encoding.runRowMonadic encoding (elFromCell "td")) as
el "tr" $ Encoding.runRowMonadic encoding (elFromCell "td") a
elFromCell :: MonadWidget t m => String -> Cell m b -> m b
elFromCell name (Cell attrs contents) = elAttr name attrs contents

View File

@ -23,6 +23,7 @@ library
Siphon.Encoding
Siphon.Decoding
Siphon.Internal
Siphon.Internal.Text
build-depends:
base >= 4.7 && < 5
, colonnade
@ -38,7 +39,7 @@ test-suite siphon-test
type: exitcode-stdio-1.0
hs-source-dirs: test
main-is: Test.hs
build-depends:
build-depends:
base
, either
, siphon

View File

@ -1,5 +1,8 @@
module Siphon.Content
( byteStringChar8
, text
) where
import Siphon.Internal
import Siphon.Internal (byteStringChar8)
import Siphon.Internal.Text (text)

View File

@ -27,6 +27,13 @@ mkParseError i ctxs msg = id
, "]"
]
csvResultFromEither :: Either (Producer ByteString m ()) () -> CsvResult f c
csvResultFromEither (Left _) = CsvResultTextDecodeError
csvResultFromEither (Right ()) = CsvResultSuccess
csvResultFromDecodingRowError :: DecodingRowError f c -> CsvResult f c
csvResultFromDecodingRowError = CsvResultDecodeError
-- | This is seldom useful but is included for completeness.
headlessPipe :: Monad m
=> Siphon c
@ -45,7 +52,7 @@ indexedPipe sd decoding = do
e <- consumeGeneral 0 sd mkParseError
case e of
Left err -> return err
Right (firstRow, mleftovers) ->
Right (firstRow, mleftovers) ->
let req = Decoding.maxIndex decoding
vlen = Vector.length firstRow
in if vlen < req
@ -65,7 +72,7 @@ headedPipe sd decoding = do
e <- consumeGeneral 0 sd mkParseError
case e of
Left err -> return err
Right (headers, mleftovers) ->
Right (headers, mleftovers) ->
case Decoding.headedToIndexed headers decoding of
Left headingErrs -> return (DecodingRowError 0 (RowErrorHeading headingErrs))
Right indexedDecoding ->
@ -103,7 +110,7 @@ consumeGeneral ix (Siphon _ _ parse isNull) wrapParseError = do
c <- awaitSkip isNull
handleResult (k c)
handleResult r = case r of
Atto.Fail _ ctxs msg -> return $ Left
Atto.Fail _ ctxs msg -> return $ Left
$ wrapParseError ix ctxs msg
Atto.Done c v ->
let mcontent = if isNull c

View File

@ -30,6 +30,7 @@ import qualified Data.Vector as V
import qualified Data.ByteString as B
import qualified Data.ByteString.Lazy as LByteString
import qualified Data.ByteString.Builder as Builder
import qualified Data.Text as T
import Data.Word (Word8)
import Data.Vector (Vector)
import Data.ByteString (ByteString)

View File

@ -0,0 +1,189 @@
{-# LANGUAGE BangPatterns #-}
module Siphon.Internal.Text where
import Siphon.Types
import Control.Applicative (optional)
import Data.Attoparsec.Text (char, endOfInput, string)
import qualified Data.Attoparsec.Text as A
import qualified Data.Attoparsec.Text.Lazy as AL
import qualified Data.Attoparsec.Zepto as Z
import qualified Data.Text as T
import qualified Data.Text as Text
import qualified Data.Vector as V
import qualified Data.Text.Lazy as LText
import qualified Data.Text.Lazy.Builder as Builder
import Data.Text.Lazy.Builder (Builder)
import Data.Word (Word8)
import Data.Vector (Vector)
import Data.Text (Text)
import Data.Coerce (coerce)
import Siphon.Types
import Control.Applicative
import Data.Monoid
text :: Siphon Text
text = Siphon
escape
encodeRow
(A.parse (row comma))
Text.null
encodeRow :: Vector (Escaped Text) -> Text
encodeRow = id
. flip Text.append (Text.singleton newline)
. Text.intercalate (Text.singleton comma)
. V.toList
. coerce
escape :: Text -> Escaped Text
escape t = case Text.find (\c -> c == newline || c == cr || c == comma || c == doubleQuote) t of
Nothing -> Escaped t
Just _ -> escapeAlways t
-- | This implementation is definitely suboptimal.
-- A better option (which would waste a little space
-- but would be much faster) would be to build the
-- new text by writing to a buffer directly.
escapeAlways :: Text -> Escaped Text
escapeAlways t = Escaped $ Text.concat
[ textDoubleQuote
, Text.replace textDoubleQuote (Text.pack [doubleQuote,doubleQuote]) t
, textDoubleQuote
]
-- | Specialized version of 'sepBy1'' which is faster due to not
-- accepting an arbitrary separator.
sepByDelim1' :: A.Parser a
-> Char -- ^ Field delimiter
-> A.Parser [a]
sepByDelim1' p !delim = liftM2' (:) p loop
where
loop = do
mb <- A.peekChar
case mb of
Just b | b == delim -> liftM2' (:) (A.anyChar *> p) loop
_ -> pure []
{-# INLINE sepByDelim1' #-}
-- | Specialized version of 'sepBy1'' which is faster due to not
-- accepting an arbitrary separator.
sepByEndOfLine1' :: A.Parser a
-> A.Parser [a]
sepByEndOfLine1' p = liftM2' (:) p loop
where
loop = do
mb <- A.peekChar
case mb of
Just b | b == cr ->
liftM2' (:) (A.anyChar *> A.char newline *> p) loop
| b == newline ->
liftM2' (:) (A.anyChar *> p) loop
_ -> pure []
{-# INLINE sepByEndOfLine1' #-}
-- | Parse a record, not including the terminating line separator. The
-- terminating line separate is not included as the last record in a
-- CSV file is allowed to not have a terminating line separator. You
-- most likely want to use the 'endOfLine' parser in combination with
-- this parser.
row :: Char -- ^ Field delimiter
-> A.Parser (Vector Text)
row !delim = rowNoNewline delim <* endOfLine
{-# INLINE row #-}
rowNoNewline :: Char -- ^ Field delimiter
-> A.Parser (Vector Text)
rowNoNewline !delim = V.fromList <$!> field delim `sepByDelim1'` delim
{-# INLINE rowNoNewline #-}
-- | Parse a field. The field may be in either the escaped or
-- non-escaped format. The return value is unescaped.
field :: Char -> A.Parser Text
field !delim = do
mb <- A.peekChar
-- We purposely don't use <|> as we want to commit to the first
-- choice if we see a double quote.
case mb of
Just b | b == doubleQuote -> escapedField
_ -> unescapedField delim
{-# INLINE field #-}
escapedField :: A.Parser Text
escapedField = do
_ <- dquote -- This can probably be replaced with anyChar
b <- escapedFieldInner mempty
return (LText.toStrict (Builder.toLazyText b))
escapedFieldInner :: Builder -> A.Parser Builder
escapedFieldInner b = do
t <- A.takeTill (== doubleQuote)
_ <- A.anyChar -- this will always be a double quote
c <- A.peekChar'
if c == doubleQuote
then do
_ <- A.anyChar -- this will always be a double quote
escapedFieldInner (b `mappend` Builder.fromText t `mappend` Builder.fromText textDoubleQuote)
else return (b `mappend` Builder.fromText t)
unescapedField :: Char -> A.Parser Text
unescapedField !delim = A.takeWhile (\ c -> c /= doubleQuote &&
c /= newline &&
c /= delim &&
c /= cr)
dquote :: A.Parser Char
dquote = char doubleQuote
unescape :: A.Parser Text
unescape = (LText.toStrict . Builder.toLazyText) <$!> go mempty where
go acc = do
h <- A.takeWhile (/= doubleQuote)
let rest = do
c0 <- A.anyChar
c1 <- A.anyChar
if (c0 == doubleQuote && c1 == doubleQuote)
then go (acc `mappend` Builder.fromText h `mappend` Builder.fromText textDoubleQuote)
else fail "invalid CSV escape sequence"
done <- A.atEnd
if done
then return (acc `mappend` Builder.fromText h)
else rest
-- | A strict version of 'Data.Functor.<$>' for monads.
(<$!>) :: Monad m => (a -> b) -> m a -> m b
f <$!> m = do
a <- m
return $! f a
{-# INLINE (<$!>) #-}
infixl 4 <$!>
-- | A version of 'liftM2' that is strict in the result of its first
-- action.
liftM2' :: (Monad m) => (a -> b -> c) -> m a -> m b -> m c
liftM2' f a b = do
!x <- a
y <- b
return (f x y)
{-# INLINE liftM2' #-}
-- | Match either a single newline character @\'\\n\'@, or a carriage
-- return followed by a newline character @\"\\r\\n\"@, or a single
-- carriage return @\'\\r\'@.
endOfLine :: A.Parser ()
endOfLine = (A.char newline *> return ()) <|> (string (Text.pack "\r\n") *> return ()) <|> (A.char cr *> return ())
{-# INLINE endOfLine #-}
textDoubleQuote :: Text
textDoubleQuote = Text.singleton doubleQuote
doubleQuote, newline, cr, comma :: Char
doubleQuote = '\"'
newline = '\n'
cr = '\r'
comma = ','

View File

@ -1,6 +1,7 @@
module Siphon.Types where
import Data.Vector (Vector)
import Colonnade.Types (DecodingRowError)
import qualified Data.Attoparsec.Types as Atto
newtype Escaped c = Escaped { getEscaped :: c }
@ -12,18 +13,26 @@ data Siphon c = Siphon
, siphonNull :: c -> Bool
}
-- | This type is provided for convenience with @pipes-text@
data CsvResult f c
= CsvResultSuccess
| CsvResultTextDecodeError
| CsvResultDecodeError (DecodingRowError f c)
deriving (Show,Read,Eq)
-- | Consider changing out the use of 'Vector' here
-- with the humble list instead. It might fuse away
-- better. Not sure though.
data SiphonX c1 c2 = SiphonX
{ siphonXEscape :: !(c1 -> Escaped c2)
, siphonXIntercalate :: !(Vector (Escaped c2) -> c2)
}
data SiphonDecoding c1 c2 = SiphonDecoding
{ siphonDecodingParse :: c1 -> Atto.IResult c1 (Vector c2)
, siphonDecodingNull :: c1 -> Bool
}
-- data SiphonX c1 c2 = SiphonX
-- { siphonXEscape :: !(c1 -> Escaped c2)
-- , siphonXIntercalate :: !(Vector (Escaped c2) -> c2)
-- }
--
-- data SiphonDecoding c1 c2 = SiphonDecoding
-- { siphonDecodingParse :: c1 -> Atto.IResult c1 (Vector c2)
-- , siphonDecodingNull :: c1 -> Bool
-- }
-- data WithEnd c = WithEnd
-- { withEndEnded :: !Bool

View File

@ -1,19 +1,23 @@
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE DeriveGeneric #-}
module Main (main) where
import Test.QuickCheck (Gen, Arbitrary(..), choose)
import Test.QuickCheck (Gen, Arbitrary(..), choose, elements)
import Test.HUnit (Assertion,(@?=))
import Test.Framework (defaultMain, testGroup, Test)
import Test.Framework.Providers.QuickCheck2 (testProperty)
import Test.Framework.Providers.HUnit (testCase)
import Data.ByteString (ByteString)
import Data.Text (Text)
import GHC.Generics (Generic)
import Data.Either.Combinators
import Colonnade.Types
import Siphon.Types
import Data.Functor.Identity
import Data.Functor.Contravariant (contramap)
import Data.Functor.Contravariant.Divisible (divided,conquered)
import qualified Data.Text as Text
import qualified Data.ByteString.Builder as Builder
import qualified Data.ByteString.Lazy as LByteString
import qualified Data.ByteString as ByteString
@ -22,6 +26,8 @@ import qualified Colonnade.Decoding as Decoding
import qualified Colonnade.Encoding as Encoding
import qualified Colonnade.Decoding.ByteString.Char8 as CDB
import qualified Colonnade.Encoding.ByteString.Char8 as CEB
import qualified Colonnade.Decoding.Text as CDT
import qualified Colonnade.Encoding.Text as CET
import qualified Siphon.Encoding as SE
import qualified Siphon.Decoding as SD
import qualified Siphon.Content as SC
@ -35,10 +41,10 @@ tests :: [Test]
tests =
[ testGroup "ByteString encode/decode"
[ testCase "Headless Encoding (int,char,bool)"
$ runTestScenario
SC.byteStringChar8
$ runTestScenario
SC.byteStringChar8
SE.pipe
encodingA
encodingA
"4,c,false\n"
, testProperty "Headless Isomorphism (int,char,bool)"
$ propIsoPipe $
@ -46,8 +52,8 @@ tests =
>->
(void $ SD.headlessPipe SC.byteStringChar8 decodingA)
, testCase "Headed Encoding (int,char,bool)"
$ runTestScenario
SC.byteStringChar8
$ runTestScenario
SC.byteStringChar8
SE.headedPipe
encodingB
$ ByteString.concat
@ -55,8 +61,8 @@ tests =
, "4,c,false\n"
]
, testCase "Headed Encoding (int,char,bool) monoidal building"
$ runTestScenario
SC.byteStringChar8
$ runTestScenario
SC.byteStringChar8
SE.headedPipe
encodingC
$ ByteString.concat
@ -69,8 +75,53 @@ tests =
>->
(void $ SD.headedPipe SC.byteStringChar8 decodingB)
]
, testGroup "Text encode/decode"
[ testCase "Headless Encoding (int,char,bool)"
$ runTestScenario
SC.text
SE.pipe
encodingW
"4,c,false\n"
, testCase "Headless Encoding (Foo,Foo,Foo)"
$ runCustomTestScenario
SC.text
SE.pipe
encodingY
(FooA,FooA,FooC)
"Simple,Simple,\"More\"\"Escaped,\"\"\"\"Chars\"\n"
, testProperty "Headless Isomorphism (Foo,Foo,Foo)"
$ propIsoPipe $
(SE.pipe SC.text encodingY)
>->
(void $ SD.headlessPipe SC.text decodingY)
]
]
data Foo = FooA | FooB | FooC
deriving (Generic,Eq,Ord,Show,Read,Bounded,Enum)
instance Arbitrary Foo where
arbitrary = elements [minBound..maxBound]
fooToString :: Foo -> String
fooToString x = case x of
FooA -> "Simple"
FooB -> "With,Escaped\nChars"
FooC -> "More\"Escaped,\"\"Chars"
encodeFoo :: (String -> c) -> Foo -> c
encodeFoo f = f . fooToString
fooFromString :: String -> Either String Foo
fooFromString x = case x of
"Simple" -> Right FooA
"With,Escaped\nChars" -> Right FooB
"More\"Escaped,\"\"Chars" -> Right FooC
_ -> Left "failed to decode Foo"
decodeFoo :: (c -> String) -> c -> Either String Foo
decodeFoo f = fooFromString . f
decodingA :: Decoding Headless ByteString (Int,Char,Bool)
decodingA = (,,)
<$> Decoding.headless CDB.int
@ -88,7 +139,27 @@ encodingA = contramap tripleToPairs
$ divided (Encoding.headless CEB.int)
$ divided (Encoding.headless CEB.char)
$ divided (Encoding.headless CEB.bool)
$ conquered
$ conquered
encodingW :: Encoding Headless Text (Int,Char,Bool)
encodingW = contramap tripleToPairs
$ divided (Encoding.headless CET.int)
$ divided (Encoding.headless CET.char)
$ divided (Encoding.headless CET.bool)
$ conquered
encodingY :: Encoding Headless Text (Foo,Foo,Foo)
encodingY = contramap tripleToPairs
$ divided (Encoding.headless $ encodeFoo Text.pack)
$ divided (Encoding.headless $ encodeFoo Text.pack)
$ divided (Encoding.headless $ encodeFoo Text.pack)
$ conquered
decodingY :: Decoding Headless Text (Foo,Foo,Foo)
decodingY = (,,)
<$> Decoding.headless (decodeFoo Text.unpack)
<*> Decoding.headless (decodeFoo Text.unpack)
<*> Decoding.headless (decodeFoo Text.unpack)
encodingB :: Encoding Headed ByteString (Int,Char,Bool)
encodingB = contramap tripleToPairs
@ -112,14 +183,26 @@ propIsoPipe p as = (Pipes.toList $ each as >-> p) == as
runTestScenario :: (Monoid c, Eq c, Show c)
=> Siphon c
-> (Siphon c -> Encoding f c (Int,Char,Bool) -> Pipe (Int,Char,Bool) c Identity ())
-> Encoding f c (Int,Char,Bool)
-> c
-> Encoding f c (Int,Char,Bool)
-> c
-> Assertion
runTestScenario s p e c =
runTestScenario s p e c =
( mconcat $ Pipes.toList $
Pipes.yield (4,'c',False) >-> p s e
) @?= c
runCustomTestScenario :: (Monoid c, Eq c, Show c)
=> Siphon c
-> (Siphon c -> Encoding f c a -> Pipe a c Identity ())
-> Encoding f c a
-> a
-> c
-> Assertion
runCustomTestScenario s p e a c =
( mconcat $ Pipes.toList $
Pipes.yield a >-> p s e
) @?= c
-- testEncodingA :: Assertion
-- testEncodingA = runTestScenario encodingA "4,c,false\n"

View File

@ -1,5 +1,5 @@
# This file was automatically generated by 'stack init'
#
#
# Some commonly used options have been documented as comments in this file.
# For advanced use and comprehensive documentation of the format, please see:
# http://docs.haskellstack.org/en/stable/yaml_configuration/
@ -7,7 +7,7 @@
# Resolver to choose a 'specific' stackage snapshot or a compiler version.
# A snapshot resolver dictates the compiler version and the set of packages
# to be used for project dependencies. For example:
#
#
# resolver: lts-3.5
# resolver: nightly-2015-09-21
# resolver: ghc-7.10.2
@ -19,7 +19,7 @@ resolver: lts-6.4
# User packages to be built.
# Various formats can be used as shown in the example below.
#
#
# packages:
# - some-directory
# - https://example.com/foo/bar/baz-0.0.2.tar.gz
@ -31,7 +31,7 @@ resolver: lts-6.4
# subdirs:
# - auto-update
# - wai
#
#
# A package marked 'extra-dep: true' will only be built if demanded by a
# non-dependency (i.e. a user package), and its test suites and benchmarks
# will not be run. This is useful for tweaking upstream packages.
@ -39,6 +39,7 @@ packages:
- 'colonnade'
- 'reflex-dom-colonnade'
- 'siphon'
- 'geolite-csv'
# Dependency packages to be pulled from upstream that are not in the resolver
# (e.g., acme-missiles-0.3)
extra-deps:
@ -48,6 +49,7 @@ extra-deps:
- 'aeson-0.9.0.1'
- 'haskell-src-exts-1.16.0.1'
- 'syb-0.5.1'
- 'ip-0.8.4'
@ -59,18 +61,18 @@ extra-package-dbs: []
# Control whether we use the GHC we find on the path
# system-ghc: true
#
#
# Require a specific version of stack, using version ranges
# require-stack-version: -any # Default
# require-stack-version: ">=1.1"
#
#
# Override the architecture used by stack, especially useful on Windows
# arch: i386
# arch: x86_64
#
#
# Extra directories used by stack for building
# extra-include-dirs: [/path/to/dir]
# extra-lib-dirs: [/path/to/dir]
#
#
# Allow a newer minor version of GHC than the snapshot specifies
# compiler-check: newer-minor