diff --git a/colonnade/colonnade.cabal b/colonnade/colonnade.cabal index d155e06..a83c88e 100644 --- a/colonnade/colonnade.cabal +++ b/colonnade/colonnade.cabal @@ -20,6 +20,7 @@ library Colonnade.Encoding.Text Colonnade.Encoding.ByteString.Char8 Colonnade.Decoding + Colonnade.Decoding.Text Colonnade.Decoding.ByteString.Char8 Colonnade.Internal Colonnade.Internal.Ap diff --git a/colonnade/src/Colonnade/Decoding.hs b/colonnade/src/Colonnade/Decoding.hs index 973b516..1cc2ef7 100644 --- a/colonnade/src/Colonnade/Decoding.hs +++ b/colonnade/src/Colonnade/Decoding.hs @@ -3,7 +3,7 @@ {-# LANGUAGE BangPatterns #-} module Colonnade.Decoding where -import Colonnade.Internal (EitherWrap(..)) +import Colonnade.Internal (EitherWrap(..),mapLeft) import Colonnade.Types import Data.Functor.Contravariant import Data.Vector (Vector) @@ -101,14 +101,3 @@ headedToIndexed v = getEitherWrap . go <$> EitherWrap rcurrent <*> rnext -eitherMonoidAp :: Monoid a => Either a (b -> c) -> Either a b -> Either a c -eitherMonoidAp = go where - go (Left a1) (Left a2) = Left (mappend a1 a2) - go (Left a1) (Right _) = Left a1 - go (Right _) (Left a2) = Left a2 - go (Right f) (Right b) = Right (f b) - -mapLeft :: (a -> b) -> Either a c -> Either b c -mapLeft _ (Right a) = Right a -mapLeft f (Left a) = Left (f a) - diff --git a/colonnade/src/Colonnade/Decoding/Text.hs b/colonnade/src/Colonnade/Decoding/Text.hs new file mode 100644 index 0000000..595deba --- /dev/null +++ b/colonnade/src/Colonnade/Decoding/Text.hs @@ -0,0 +1,47 @@ +module Colonnade.Decoding.Text where + +import Prelude hiding (map) +import Data.Text (Text) +import qualified Data.Text as Text +import qualified Data.Text.Read as TextRead + +char :: Text -> Either String Char +char t = case Text.length t of + 1 -> Right (Text.head t) + 0 -> Left "cannot decode Char from empty text" + _ -> Left "cannot decode Char from multi-character text" + +text :: Text -> Either String Text +text = Right + +int :: Text -> Either String Int +int t = do + (a,tRem) <- TextRead.decimal t + if Text.null tRem + then Right a + else Left "found extra characters after int" + +trueFalse :: Text -> Text -> Text -> Either String Bool +trueFalse t f txt + | txt == t = Right True + | txt == f = Right False + | otherwise = Left $ concat + ["must be [", Text.unpack t, "] or [", Text.unpack f, "]"] + +-- | This refers to the 'TextRead.Reader' from @Data.Text.Read@, not +-- to the @Reader@ monad. +fromReader :: TextRead.Reader a -> Text -> Either String a +fromReader f t = do + (a,tRem) <- f t + if Text.null tRem + then Right a + else Left "found extra characters at end of text" + +optional :: (Text -> Either String a) -> Text -> Either String (Maybe a) +optional f t = if Text.null t + then Right Nothing + else fmap Just (f t) + +map :: (a -> b) -> (Text -> Either String a) -> Text -> Either String b +map f g t = fmap f (g t) + diff --git a/colonnade/src/Colonnade/Encoding/Text.hs b/colonnade/src/Colonnade/Encoding/Text.hs index 604e4c1..ad383a3 100644 --- a/colonnade/src/Colonnade/Encoding/Text.hs +++ b/colonnade/src/Colonnade/Encoding/Text.hs @@ -17,3 +17,8 @@ int = LText.toStrict text :: Text -> Text text = id +bool :: Bool -> Text +bool x = case x of + True -> Text.pack "true" + False -> Text.pack "false" + diff --git a/colonnade/src/Colonnade/Internal.hs b/colonnade/src/Colonnade/Internal.hs index 727ebb5..dd3f36a 100644 --- a/colonnade/src/Colonnade/Internal.hs +++ b/colonnade/src/Colonnade/Internal.hs @@ -12,3 +12,6 @@ instance Monoid a => Applicative (EitherWrap a) where EitherWrap (Right _) <*> EitherWrap (Left a2) = EitherWrap (Left a2) EitherWrap (Right f) <*> EitherWrap (Right b) = EitherWrap (Right (f b)) +mapLeft :: (a -> b) -> Either a c -> Either b c +mapLeft _ (Right a) = Right a +mapLeft f (Left a) = Left (f a) diff --git a/colonnade/src/Colonnade/Types.hs b/colonnade/src/Colonnade/Types.hs index 99443e4..f9edcd8 100644 --- a/colonnade/src/Colonnade/Types.hs +++ b/colonnade/src/Colonnade/Types.hs @@ -38,7 +38,7 @@ data Indexed f a = Indexed data HeadingErrors content = HeadingErrors { headingErrorsMissing :: Vector content -- ^ headers that were missing , headingErrorsDuplicate :: Vector (content,Int) -- ^ headers that occurred more than once - } deriving (Show,Read) + } deriving (Show,Read,Eq) instance (Show content, Typeable content) => Exception (HeadingErrors content) @@ -51,27 +51,32 @@ data DecodingCellError f content = DecodingCellError { decodingCellErrorContent :: !content , decodingCellErrorHeader :: !(Indexed f content) , decodingCellErrorMessage :: !String - } deriving (Show,Read) + } deriving (Show,Read,Eq) -- instance (Show (f content), Typeable content) => Exception (DecodingError f content) newtype DecodingCellErrors f content = DecodingCellErrors { getDecodingCellErrors :: Vector (DecodingCellError f content) - } deriving (Monoid,Show,Read) + } deriving (Monoid,Show,Read,Eq) -- newtype ParseRowError = ParseRowError String +-- TODO: rewrite the instances for this by hand. They +-- currently use FlexibleContexts. data DecodingRowError f content = DecodingRowError { decodingRowErrorRow :: !Int , decodingRowErrorError :: !(RowError f content) - } + } deriving (Show,Read,Eq) +-- TODO: rewrite the instances for this by hand. They +-- currently use FlexibleContexts. data RowError f content = RowErrorParse !String -- ^ Error occurred parsing the document into cells | RowErrorDecode !(DecodingCellErrors f content) -- ^ Error decoding the content | RowErrorSize !Int !Int -- ^ Wrong number of cells in the row | RowErrorHeading !(HeadingErrors content) | RowErrorMinSize !Int !Int + deriving (Show,Read,Eq) -- instance (Show (f content), Typeable content) => Exception (DecodingErrors f content) diff --git a/geolite-csv/LICENSE b/geolite-csv/LICENSE new file mode 100644 index 0000000..9beb3f9 --- /dev/null +++ b/geolite-csv/LICENSE @@ -0,0 +1,30 @@ +Copyright Andrew Martin (c) 2016 + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + + * Neither the name of Andrew Martin nor the names of other + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file diff --git a/geolite-csv/Setup.hs b/geolite-csv/Setup.hs new file mode 100644 index 0000000..9a994af --- /dev/null +++ b/geolite-csv/Setup.hs @@ -0,0 +1,2 @@ +import Distribution.Simple +main = defaultMain diff --git a/geolite-csv/data/GeoLite2-City-Blocks-IPv4.small.csv b/geolite-csv/data/GeoLite2-City-Blocks-IPv4.small.csv new file mode 100644 index 0000000..c5c1f84 --- /dev/null +++ b/geolite-csv/data/GeoLite2-City-Blocks-IPv4.small.csv @@ -0,0 +1,11 @@ +network,geoname_id,registered_country_geoname_id,represented_country_geoname_id,is_anonymous_proxy,is_satellite_provider,postal_code,latitude,longitude,accuracy_radius +24.165.56.0/22,5848280,6252001,,0,0,96746,22.0837,-159.3553,10 +78.146.173.128/25,2655583,2635167,,0,0,DL14,54.6500,-1.6667,20 +121.211.108.0/23,2160386,2077456,,0,0,2040,-33.8833,151.1500,5 +69.74.43.16/30,6252001,6252001,,0,0,,37.7510,-97.8220,1000 +77.128.35.136/30,3034803,3017382,,0,0,57450,49.0667,6.8333,20 +90.54.234.0/24,2977062,3017382,,0,0,49320,47.3944,-0.4357,50 +77.193.41.175/32,3018587,3017382,,0,0,78810,48.8700,1.9740,1 +58.188.32.0/24,1861060,1861060,,0,0,,35.6900,139.6900,500 +87.81.232.0/24,2635167,2635167,,0,0,,51.4964,-0.1224,200 +88.191.56.0/22,2988507,3017382,,0,0,75001,48.8667,2.3333,500 diff --git a/geolite-csv/data/GeoLite2-City-Locations-en.small.csv b/geolite-csv/data/GeoLite2-City-Locations-en.small.csv new file mode 100644 index 0000000..0fbf70a --- /dev/null +++ b/geolite-csv/data/GeoLite2-City-Locations-en.small.csv @@ -0,0 +1,21 @@ +geoname_id,locale_code,continent_code,continent_name,country_iso_code,country_name,subdivision_1_iso_code,subdivision_1_name,subdivision_2_iso_code,subdivision_2_name,city_name,metro_code,time_zone +2653810,en,EU,Europe,GB,"United Kingdom",SCT,Scotland,GLG,"Glasgow City",Cardonald,,Europe/London +2832529,en,EU,Europe,DE,Germany,RP,Rheinland-Pfalz,,,Siefersheim,,Europe/Berlin +2885499,en,EU,Europe,DE,Germany,MV,Mecklenburg-Vorpommern,,,Koerchow,,Europe/Berlin +550870,en,EU,Europe,RU,Russia,NIZ,"Nizhegorodskaya Oblast'",,,Khabarskoye,,Europe/Moscow +766583,en,EU,Europe,PL,Poland,LU,"Lublin Voivodeship",,,Leczna,,Europe/Warsaw +2608246,en,EU,Europe,AT,Austria,1,Burgenland,,,"Neuhaus am Klausenbach",,Europe/Vienna +5121765,en,NA,"North America",US,"United States",NY,"New York",,,Ilion,526,America/New_York +2935825,en,EU,Europe,DE,Germany,NW,"North Rhine-Westphalia",,,Dormagen,,Europe/Berlin +3165189,en,EU,Europe,IT,Italy,36,"Friuli Venezia Giulia",UD,"Provincia di Udine",Tricesimo,,Europe/Rome +4564070,en,NA,"North America",PR,"Puerto Rico",,,,,Culebra,,America/Puerto_Rico +2993759,en,EU,Europe,FR,France,U,"Provence-Alpes-Côte d'Azur",13,Bouches-du-Rhône,Miramas-le-Vieux,,Europe/Paris +5861117,en,NA,"North America",US,"United States",AK,Alaska,,,"Dutch Harbor",743,America/Adak +4375229,en,NA,"North America",US,"United States",MO,Missouri,,,Ashland,604,America/Chicago +2946980,en,EU,Europe,DE,Germany,SN,Saxony,,,Boehlen,,Europe/Berlin +3156470,en,EU,Europe,NO,Norway,02,Akershus,,,Frogner,,Europe/Oslo +3166193,en,EU,Europe,IT,Italy,36,"Friuli Venezia Giulia",GO,"Provincia di Gorizia",Staranzano,,Europe/Rome +4913742,en,NA,"North America",US,"United States",IL,Illinois,,,Tiskilwa,675,America/Chicago +4853511,en,NA,"North America",US,"United States",IA,Iowa,,,Dayton,679,America/Chicago +480876,en,EU,Europe,RU,Russia,ROS,Rostov,,,Tsimlyansk,,Europe/Moscow +3000119,en,EU,Europe,FR,France,89,Yonne,,,"Les Ormes",,Europe/Paris diff --git a/geolite-csv/data/GeoLite2-City-Locations-ja.small.csv b/geolite-csv/data/GeoLite2-City-Locations-ja.small.csv new file mode 100644 index 0000000..33d5a64 --- /dev/null +++ b/geolite-csv/data/GeoLite2-City-Locations-ja.small.csv @@ -0,0 +1,21 @@ +geoname_id,locale_code,continent_code,continent_name,country_iso_code,country_name,subdivision_1_iso_code,subdivision_1_name,subdivision_2_iso_code,subdivision_2_name,city_name,metro_code,time_zone +1260633,ja,AS,"アジア",IN,"インド",AP,"アーンドラ・プラデーシュ州",,,,,Asia/Kolkata +4765167,ja,NA,"北アメリカ",US,"アメリカ合衆国",VA,"バージニア州",,,,573,America/New_York +2703330,ja,EU,"ヨーロッパ",SE,"スウェーデン王国",Z,,,,,,Europe/Stockholm +535886,ja,EU,"ヨーロッパ",RU,"ロシア",STA,,,,,,Europe/Moscow +2989001,ja,EU,"ヨーロッパ",FR,"フランス共和国",F,,28,,,,Europe/Paris +3183178,ja,EU,"ヨーロッパ",IT,"イタリア共和国",75,"プッリャ州",BA,,"アルタムーラ",,Europe/Rome +3012956,ja,EU,"ヨーロッパ",FR,"フランス共和国",67,,,,,,Europe/Paris +4189157,ja,NA,"北アメリカ",US,"アメリカ合衆国",GA,"ジョージア州",,,,524,America/New_York +2758965,ja,EU,"ヨーロッパ",NL,"オランダ王国",ZE,,,,,,Europe/Amsterdam +3570412,ja,NA,"北アメリカ",MQ,"マルティニーク島",,,,,,,America/Martinique +3095604,ja,EU,"ヨーロッパ",PL,"ポーランド共和国",MZ,"マゾフシェ県",,,,,Europe/Warsaw +3070865,ja,EU,"ヨーロッパ",CZ,"チェコ共和国",ST,"中央ボヘミア州",,,,,Europe/Prague +2636062,ja,EU,"ヨーロッパ",GB,"イギリス",ENG,"イングランド",SRY,,,,Europe/London +3019338,ja,EU,"ヨーロッパ",FR,"フランス共和国",57,,,,,,Europe/Paris +2865603,ja,EU,"ヨーロッパ",DE,"ドイツ連邦共和国",BY,"バイエルン州",,,"ノイエンマルクト",,Europe/Berlin +2930628,ja,EU,"ヨーロッパ",DE,"ドイツ連邦共和国",HE,,,,,,Europe/Berlin +2976283,ja,EU,"ヨーロッパ",FR,"フランス共和国",01,,,,,,Europe/Paris +4062424,ja,NA,"北アメリカ",US,"アメリカ合衆国",AL,"アラバマ州",,,,575,America/Chicago +4461574,ja,NA,"北アメリカ",US,"アメリカ合衆国",NC,"ノースカロライナ州",,,"コンコード",517,America/New_York +1279945,ja,AS,"アジア",CN,"中国",62,,,,"酒泉市",,Asia/Shanghai diff --git a/geolite-csv/geolite-csv.cabal b/geolite-csv/geolite-csv.cabal new file mode 100644 index 0000000..7561a30 --- /dev/null +++ b/geolite-csv/geolite-csv.cabal @@ -0,0 +1,51 @@ +name: geolite-csv +version: 0.1.0 +synopsis: Initial project template from stack +description: Please see README.md +homepage: https://github.com/andrewthad/colonnade +license: BSD3 +license-file: LICENSE +author: Andrew Martin +maintainer: andrew.thaddeus@gmail.com +copyright: 2016 Andrew Martin +category: web +build-type: Simple +-- extra-source-files: +cabal-version: >=1.10 + +library + hs-source-dirs: src + exposed-modules: + Geolite.Types + Geolite.Csv + build-depends: + base >= 4.7 && < 5 + , colonnade + , siphon + , ip >= 0.8.4 + , text + , pipes + default-language: Haskell2010 + +test-suite geolite-csv-test + type: exitcode-stdio-1.0 + hs-source-dirs: test + main-is: Spec.hs + build-depends: + base + , geolite-csv + , siphon + , colonnade + , test-framework + , text + , pipes + , HUnit + , test-framework-hunit + , pipes-bytestring + , pipes-text + ghc-options: -threaded -rtsopts -with-rtsopts=-N + default-language: Haskell2010 + +source-repository head + type: git + location: https://github.com/andrewthad/colonnade diff --git a/geolite-csv/src/Geolite/Csv.hs b/geolite-csv/src/Geolite/Csv.hs new file mode 100644 index 0000000..2e49282 --- /dev/null +++ b/geolite-csv/src/Geolite/Csv.hs @@ -0,0 +1,54 @@ +{-# LANGUAGE OverloadedStrings #-} + +module Geolite.Csv where + +import Data.Text (Text) +import Pipes (Pipe) +import Colonnade.Types +import Geolite.Types + +import qualified Data.Text as Text +import qualified Net.IPv4.Range.Text as IPv4RangeText +import qualified Data.Text.Read as TextRead +import qualified Siphon.Decoding as SD +import qualified Siphon.Content as SC +import qualified Colonnade.Decoding.Text as CDT +import qualified Colonnade.Decoding as CD + +cities :: Monad m => Pipe Text City m (DecodingRowError Headed Text) +cities = SD.headedPipe SC.text decodingCity + +blocks :: Monad m => Pipe Text Block m (DecodingRowError Headed Text) +blocks = SD.headedPipe SC.text decodingBlock + +decodingCity :: Decoding Headed Text City +decodingCity = City + <$> fmap GeonameId (CD.headed "geoname_id" CDT.int) + <*> CD.headed "locale_code" CDT.text + <*> CD.headed "continent_code" CDT.text + <*> CD.headed "continent_name" CDT.text + <*> CD.headed "country_iso_code" CDT.text + <*> CD.headed "country_name" CDT.text + <*> CD.headed "subdivision_1_iso_code" CDT.text + <*> CD.headed "subdivision_1_name" CDT.text + <*> CD.headed "subdivision_2_iso_code" CDT.text + <*> CD.headed "subdivision_2_name" CDT.text + <*> CD.headed "metro_code" (CDT.optional CDT.int) + <*> CD.headed "time_zone" CDT.text + +decodingBlock :: Decoding Headed Text Block +decodingBlock = Block + <$> CD.headed "network" IPv4RangeText.decodeEither + <*> CD.headed "geoname_id" (CDT.map GeonameId CDT.int) + <*> CD.headed "registered_country_geoname_id" + (CDT.map GeonameId CDT.int) + <*> CD.headed "represented_country_geoname_id" + (CDT.optional $ CDT.map GeonameId CDT.int) + <*> CD.headed "is_anonymous_proxy" (CDT.trueFalse "1" "0") + <*> CD.headed "is_satellite_provider" (CDT.trueFalse "1" "0") + <*> CD.headed "postal_code" CDT.text + <*> CD.headed "latitude" (CDT.fromReader TextRead.rational) + <*> CD.headed "longitude" (CDT.fromReader TextRead.rational) + <*> CD.headed "accuracy_radius" CDT.int + + diff --git a/geolite-csv/src/Geolite/Types.hs b/geolite-csv/src/Geolite/Types.hs new file mode 100644 index 0000000..94a0fab --- /dev/null +++ b/geolite-csv/src/Geolite/Types.hs @@ -0,0 +1,42 @@ +module Geolite.Types where + +import Net.Types (IPv4Range) +import Data.Text (Text) +import Data.Fixed + +data E4 + +instance HasResolution E4 where + resolution _ = 4 + +newtype GeonameId = GeonameId { getGeonameId :: Int } + deriving (Show,Read,Eq,Ord) + +data City = City + { cityGeonameId :: GeonameId + , cityLocaleCode :: Text + , cityContinentCode :: Text + , cityContinentName :: Text + , cityCountryIsoCode :: Text + , cityCountryName :: Text + , citySubdivision1IsoCode :: Text + , citySubdivision1Name :: Text + , citySubdivision2IsoCode :: Text + , citySubdivision2Name :: Text + , cityMetroCode :: Maybe Int + , cityTimeZone :: Text + } deriving (Show,Read,Eq,Ord) + +data Block = Block + { blockNetwork :: IPv4Range + , blockGeonameId :: GeonameId + , blockRegisteredCountryGeonameId :: GeonameId + , blockRepresentedCountryGeonameId :: Maybe GeonameId + , blockIsAnonymousProxy :: Bool + , blockIsSatelliteProvider :: Bool + , blockPostalCode :: Text + , blockLatitude :: Fixed E4 + , blockLongitude :: Fixed E4 + , blockAccuracyRadius :: Int + } deriving (Show,Read,Eq,Ord) + diff --git a/geolite-csv/test/Spec.hs b/geolite-csv/test/Spec.hs new file mode 100644 index 0000000..ef86751 --- /dev/null +++ b/geolite-csv/test/Spec.hs @@ -0,0 +1,46 @@ +{-# LANGUAGE OverloadedStrings #-} + +module Main (main) where + +import Test.HUnit (Assertion,(@?=)) +import Test.Framework (defaultMain, testGroup, Test) +import Test.Framework.Providers.HUnit (testCase) +import Geolite.Csv (cities,blocks) +import Data.Text (Text) +import Colonnade.Types +import Siphon.Types +import Data.Functor.Identity +import System.IO (withFile,IOMode(ReadMode)) +import qualified Data.Text as Text +import qualified Pipes.Prelude as Pipes +import qualified Pipes.ByteString as PB +import qualified Pipes.Text.Encoding as PT +import qualified Siphon.Decoding as SD +import Pipes + +main :: IO () +main = defaultMain tests + +tests :: [Test] +tests = + [ testGroup "Geolite CSV Decoding" + [ testCase "Network Blocks" $ streamFileWith + "data/GeoLite2-City-Blocks-IPv4.small.csv" + blocks + , testCase "English City Locations" $ streamFileWith + "data/GeoLite2-City-Locations-en.small.csv" + cities + ] + ] + +streamFileWith :: + String + -> Pipe Text a IO (DecodingRowError Headed Text) + -> Assertion +streamFileWith filename decodingPipe = do + r <- withFile filename ReadMode $ \h -> runEffect $ + fmap SD.csvResultFromEither (PT.decode (PT.utf8 . PT.eof) $ PB.fromHandle h) + >-> fmap SD.csvResultFromDecodingRowError decodingPipe + >-> Pipes.drain + r @?= CsvResultSuccess + diff --git a/reflex-dom-colonnade/reflex-dom-colonnade.cabal b/reflex-dom-colonnade/reflex-dom-colonnade.cabal index a5d83f1..5511b7a 100644 --- a/reflex-dom-colonnade/reflex-dom-colonnade.cabal +++ b/reflex-dom-colonnade/reflex-dom-colonnade.cabal @@ -1,5 +1,5 @@ name: reflex-dom-colonnade -version: 0.3 +version: 0.4 synopsis: Use colonnade with reflex-dom description: Please see README.md homepage: https://github.com/andrewthad/colonnade#readme diff --git a/reflex-dom-colonnade/src/Reflex/Dom/Colonnade.hs b/reflex-dom-colonnade/src/Reflex/Dom/Colonnade.hs index 86c2634..ebce21d 100644 --- a/reflex-dom-colonnade/src/Reflex/Dom/Colonnade.hs +++ b/reflex-dom-colonnade/src/Reflex/Dom/Colonnade.hs @@ -1,3 +1,5 @@ +{-# LANGUAGE DeriveFunctor #-} + module Reflex.Dom.Colonnade where import Colonnade.Types @@ -18,7 +20,10 @@ cell = Cell Map.empty data Cell m b = Cell { cellAttrs :: !(Map String String) , cellContents :: !(m b) - } + } deriving (Functor) + +-- instance Functor (Cell m) where +-- fmap f (a basic :: (MonadWidget t m, Foldable f) => Map String String -- ^ Table element attributes @@ -29,7 +34,7 @@ basic tableAttrs as encoding = do elAttr "table" tableAttrs $ do theadBuild encoding el "tbody" $ forM_ as $ \a -> do - el "tr" $ mapM_ (Encoding.runRowMonadic encoding (elFromCell "td")) as + el "tr" $ Encoding.runRowMonadic encoding (elFromCell "td") a elFromCell :: MonadWidget t m => String -> Cell m b -> m b elFromCell name (Cell attrs contents) = elAttr name attrs contents diff --git a/siphon/siphon.cabal b/siphon/siphon.cabal index 089bba6..23e9442 100644 --- a/siphon/siphon.cabal +++ b/siphon/siphon.cabal @@ -23,6 +23,7 @@ library Siphon.Encoding Siphon.Decoding Siphon.Internal + Siphon.Internal.Text build-depends: base >= 4.7 && < 5 , colonnade @@ -38,7 +39,7 @@ test-suite siphon-test type: exitcode-stdio-1.0 hs-source-dirs: test main-is: Test.hs - build-depends: + build-depends: base , either , siphon diff --git a/siphon/src/Siphon/Content.hs b/siphon/src/Siphon/Content.hs index 26e1f79..899f38a 100644 --- a/siphon/src/Siphon/Content.hs +++ b/siphon/src/Siphon/Content.hs @@ -1,5 +1,8 @@ module Siphon.Content ( byteStringChar8 + , text ) where -import Siphon.Internal +import Siphon.Internal (byteStringChar8) +import Siphon.Internal.Text (text) + diff --git a/siphon/src/Siphon/Decoding.hs b/siphon/src/Siphon/Decoding.hs index 0369cd4..188d8ce 100644 --- a/siphon/src/Siphon/Decoding.hs +++ b/siphon/src/Siphon/Decoding.hs @@ -27,6 +27,13 @@ mkParseError i ctxs msg = id , "]" ] +csvResultFromEither :: Either (Producer ByteString m ()) () -> CsvResult f c +csvResultFromEither (Left _) = CsvResultTextDecodeError +csvResultFromEither (Right ()) = CsvResultSuccess + +csvResultFromDecodingRowError :: DecodingRowError f c -> CsvResult f c +csvResultFromDecodingRowError = CsvResultDecodeError + -- | This is seldom useful but is included for completeness. headlessPipe :: Monad m => Siphon c @@ -45,7 +52,7 @@ indexedPipe sd decoding = do e <- consumeGeneral 0 sd mkParseError case e of Left err -> return err - Right (firstRow, mleftovers) -> + Right (firstRow, mleftovers) -> let req = Decoding.maxIndex decoding vlen = Vector.length firstRow in if vlen < req @@ -65,7 +72,7 @@ headedPipe sd decoding = do e <- consumeGeneral 0 sd mkParseError case e of Left err -> return err - Right (headers, mleftovers) -> + Right (headers, mleftovers) -> case Decoding.headedToIndexed headers decoding of Left headingErrs -> return (DecodingRowError 0 (RowErrorHeading headingErrs)) Right indexedDecoding -> @@ -103,7 +110,7 @@ consumeGeneral ix (Siphon _ _ parse isNull) wrapParseError = do c <- awaitSkip isNull handleResult (k c) handleResult r = case r of - Atto.Fail _ ctxs msg -> return $ Left + Atto.Fail _ ctxs msg -> return $ Left $ wrapParseError ix ctxs msg Atto.Done c v -> let mcontent = if isNull c diff --git a/siphon/src/Siphon/Internal.hs b/siphon/src/Siphon/Internal.hs index 3f3832a..3be524d 100644 --- a/siphon/src/Siphon/Internal.hs +++ b/siphon/src/Siphon/Internal.hs @@ -30,6 +30,7 @@ import qualified Data.Vector as V import qualified Data.ByteString as B import qualified Data.ByteString.Lazy as LByteString import qualified Data.ByteString.Builder as Builder +import qualified Data.Text as T import Data.Word (Word8) import Data.Vector (Vector) import Data.ByteString (ByteString) diff --git a/siphon/src/Siphon/Internal/Text.hs b/siphon/src/Siphon/Internal/Text.hs new file mode 100644 index 0000000..4d63431 --- /dev/null +++ b/siphon/src/Siphon/Internal/Text.hs @@ -0,0 +1,189 @@ +{-# LANGUAGE BangPatterns #-} + +module Siphon.Internal.Text where + +import Siphon.Types + +import Control.Applicative (optional) +import Data.Attoparsec.Text (char, endOfInput, string) +import qualified Data.Attoparsec.Text as A +import qualified Data.Attoparsec.Text.Lazy as AL +import qualified Data.Attoparsec.Zepto as Z +import qualified Data.Text as T +import qualified Data.Text as Text +import qualified Data.Vector as V +import qualified Data.Text.Lazy as LText +import qualified Data.Text.Lazy.Builder as Builder +import Data.Text.Lazy.Builder (Builder) +import Data.Word (Word8) +import Data.Vector (Vector) +import Data.Text (Text) +import Data.Coerce (coerce) +import Siphon.Types + +import Control.Applicative +import Data.Monoid + +text :: Siphon Text +text = Siphon + escape + encodeRow + (A.parse (row comma)) + Text.null + +encodeRow :: Vector (Escaped Text) -> Text +encodeRow = id + . flip Text.append (Text.singleton newline) + . Text.intercalate (Text.singleton comma) + . V.toList + . coerce + +escape :: Text -> Escaped Text +escape t = case Text.find (\c -> c == newline || c == cr || c == comma || c == doubleQuote) t of + Nothing -> Escaped t + Just _ -> escapeAlways t + +-- | This implementation is definitely suboptimal. +-- A better option (which would waste a little space +-- but would be much faster) would be to build the +-- new text by writing to a buffer directly. +escapeAlways :: Text -> Escaped Text +escapeAlways t = Escaped $ Text.concat + [ textDoubleQuote + , Text.replace textDoubleQuote (Text.pack [doubleQuote,doubleQuote]) t + , textDoubleQuote + ] + +-- | Specialized version of 'sepBy1'' which is faster due to not +-- accepting an arbitrary separator. +sepByDelim1' :: A.Parser a + -> Char -- ^ Field delimiter + -> A.Parser [a] +sepByDelim1' p !delim = liftM2' (:) p loop + where + loop = do + mb <- A.peekChar + case mb of + Just b | b == delim -> liftM2' (:) (A.anyChar *> p) loop + _ -> pure [] +{-# INLINE sepByDelim1' #-} + +-- | Specialized version of 'sepBy1'' which is faster due to not +-- accepting an arbitrary separator. +sepByEndOfLine1' :: A.Parser a + -> A.Parser [a] +sepByEndOfLine1' p = liftM2' (:) p loop + where + loop = do + mb <- A.peekChar + case mb of + Just b | b == cr -> + liftM2' (:) (A.anyChar *> A.char newline *> p) loop + | b == newline -> + liftM2' (:) (A.anyChar *> p) loop + _ -> pure [] +{-# INLINE sepByEndOfLine1' #-} + +-- | Parse a record, not including the terminating line separator. The +-- terminating line separate is not included as the last record in a +-- CSV file is allowed to not have a terminating line separator. You +-- most likely want to use the 'endOfLine' parser in combination with +-- this parser. +row :: Char -- ^ Field delimiter + -> A.Parser (Vector Text) +row !delim = rowNoNewline delim <* endOfLine +{-# INLINE row #-} + +rowNoNewline :: Char -- ^ Field delimiter + -> A.Parser (Vector Text) +rowNoNewline !delim = V.fromList <$!> field delim `sepByDelim1'` delim +{-# INLINE rowNoNewline #-} + +-- | Parse a field. The field may be in either the escaped or +-- non-escaped format. The return value is unescaped. +field :: Char -> A.Parser Text +field !delim = do + mb <- A.peekChar + -- We purposely don't use <|> as we want to commit to the first + -- choice if we see a double quote. + case mb of + Just b | b == doubleQuote -> escapedField + _ -> unescapedField delim +{-# INLINE field #-} + +escapedField :: A.Parser Text +escapedField = do + _ <- dquote -- This can probably be replaced with anyChar + b <- escapedFieldInner mempty + return (LText.toStrict (Builder.toLazyText b)) + +escapedFieldInner :: Builder -> A.Parser Builder +escapedFieldInner b = do + t <- A.takeTill (== doubleQuote) + _ <- A.anyChar -- this will always be a double quote + c <- A.peekChar' + if c == doubleQuote + then do + _ <- A.anyChar -- this will always be a double quote + escapedFieldInner (b `mappend` Builder.fromText t `mappend` Builder.fromText textDoubleQuote) + else return (b `mappend` Builder.fromText t) + +unescapedField :: Char -> A.Parser Text +unescapedField !delim = A.takeWhile (\ c -> c /= doubleQuote && + c /= newline && + c /= delim && + c /= cr) + +dquote :: A.Parser Char +dquote = char doubleQuote + +unescape :: A.Parser Text +unescape = (LText.toStrict . Builder.toLazyText) <$!> go mempty where + go acc = do + h <- A.takeWhile (/= doubleQuote) + let rest = do + c0 <- A.anyChar + c1 <- A.anyChar + if (c0 == doubleQuote && c1 == doubleQuote) + then go (acc `mappend` Builder.fromText h `mappend` Builder.fromText textDoubleQuote) + else fail "invalid CSV escape sequence" + done <- A.atEnd + if done + then return (acc `mappend` Builder.fromText h) + else rest + +-- | A strict version of 'Data.Functor.<$>' for monads. +(<$!>) :: Monad m => (a -> b) -> m a -> m b +f <$!> m = do + a <- m + return $! f a +{-# INLINE (<$!>) #-} + +infixl 4 <$!> + +-- | A version of 'liftM2' that is strict in the result of its first +-- action. +liftM2' :: (Monad m) => (a -> b -> c) -> m a -> m b -> m c +liftM2' f a b = do + !x <- a + y <- b + return (f x y) +{-# INLINE liftM2' #-} + + +-- | Match either a single newline character @\'\\n\'@, or a carriage +-- return followed by a newline character @\"\\r\\n\"@, or a single +-- carriage return @\'\\r\'@. +endOfLine :: A.Parser () +endOfLine = (A.char newline *> return ()) <|> (string (Text.pack "\r\n") *> return ()) <|> (A.char cr *> return ()) +{-# INLINE endOfLine #-} + +textDoubleQuote :: Text +textDoubleQuote = Text.singleton doubleQuote + +doubleQuote, newline, cr, comma :: Char +doubleQuote = '\"' +newline = '\n' +cr = '\r' +comma = ',' + diff --git a/siphon/src/Siphon/Types.hs b/siphon/src/Siphon/Types.hs index 6a6b0e3..8e5cfe8 100644 --- a/siphon/src/Siphon/Types.hs +++ b/siphon/src/Siphon/Types.hs @@ -1,6 +1,7 @@ module Siphon.Types where import Data.Vector (Vector) +import Colonnade.Types (DecodingRowError) import qualified Data.Attoparsec.Types as Atto newtype Escaped c = Escaped { getEscaped :: c } @@ -12,18 +13,26 @@ data Siphon c = Siphon , siphonNull :: c -> Bool } +-- | This type is provided for convenience with @pipes-text@ +data CsvResult f c + = CsvResultSuccess + | CsvResultTextDecodeError + | CsvResultDecodeError (DecodingRowError f c) + deriving (Show,Read,Eq) + + -- | Consider changing out the use of 'Vector' here -- with the humble list instead. It might fuse away -- better. Not sure though. -data SiphonX c1 c2 = SiphonX - { siphonXEscape :: !(c1 -> Escaped c2) - , siphonXIntercalate :: !(Vector (Escaped c2) -> c2) - } - -data SiphonDecoding c1 c2 = SiphonDecoding - { siphonDecodingParse :: c1 -> Atto.IResult c1 (Vector c2) - , siphonDecodingNull :: c1 -> Bool - } +-- data SiphonX c1 c2 = SiphonX +-- { siphonXEscape :: !(c1 -> Escaped c2) +-- , siphonXIntercalate :: !(Vector (Escaped c2) -> c2) +-- } +-- +-- data SiphonDecoding c1 c2 = SiphonDecoding +-- { siphonDecodingParse :: c1 -> Atto.IResult c1 (Vector c2) +-- , siphonDecodingNull :: c1 -> Bool +-- } -- data WithEnd c = WithEnd -- { withEndEnded :: !Bool diff --git a/siphon/test/Test.hs b/siphon/test/Test.hs index a4b1150..77f7b8b 100644 --- a/siphon/test/Test.hs +++ b/siphon/test/Test.hs @@ -1,19 +1,23 @@ {-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE DeriveGeneric #-} module Main (main) where -import Test.QuickCheck (Gen, Arbitrary(..), choose) +import Test.QuickCheck (Gen, Arbitrary(..), choose, elements) import Test.HUnit (Assertion,(@?=)) import Test.Framework (defaultMain, testGroup, Test) import Test.Framework.Providers.QuickCheck2 (testProperty) import Test.Framework.Providers.HUnit (testCase) import Data.ByteString (ByteString) +import Data.Text (Text) +import GHC.Generics (Generic) import Data.Either.Combinators import Colonnade.Types import Siphon.Types import Data.Functor.Identity import Data.Functor.Contravariant (contramap) import Data.Functor.Contravariant.Divisible (divided,conquered) +import qualified Data.Text as Text import qualified Data.ByteString.Builder as Builder import qualified Data.ByteString.Lazy as LByteString import qualified Data.ByteString as ByteString @@ -22,6 +26,8 @@ import qualified Colonnade.Decoding as Decoding import qualified Colonnade.Encoding as Encoding import qualified Colonnade.Decoding.ByteString.Char8 as CDB import qualified Colonnade.Encoding.ByteString.Char8 as CEB +import qualified Colonnade.Decoding.Text as CDT +import qualified Colonnade.Encoding.Text as CET import qualified Siphon.Encoding as SE import qualified Siphon.Decoding as SD import qualified Siphon.Content as SC @@ -35,10 +41,10 @@ tests :: [Test] tests = [ testGroup "ByteString encode/decode" [ testCase "Headless Encoding (int,char,bool)" - $ runTestScenario - SC.byteStringChar8 + $ runTestScenario + SC.byteStringChar8 SE.pipe - encodingA + encodingA "4,c,false\n" , testProperty "Headless Isomorphism (int,char,bool)" $ propIsoPipe $ @@ -46,8 +52,8 @@ tests = >-> (void $ SD.headlessPipe SC.byteStringChar8 decodingA) , testCase "Headed Encoding (int,char,bool)" - $ runTestScenario - SC.byteStringChar8 + $ runTestScenario + SC.byteStringChar8 SE.headedPipe encodingB $ ByteString.concat @@ -55,8 +61,8 @@ tests = , "4,c,false\n" ] , testCase "Headed Encoding (int,char,bool) monoidal building" - $ runTestScenario - SC.byteStringChar8 + $ runTestScenario + SC.byteStringChar8 SE.headedPipe encodingC $ ByteString.concat @@ -69,8 +75,53 @@ tests = >-> (void $ SD.headedPipe SC.byteStringChar8 decodingB) ] + , testGroup "Text encode/decode" + [ testCase "Headless Encoding (int,char,bool)" + $ runTestScenario + SC.text + SE.pipe + encodingW + "4,c,false\n" + , testCase "Headless Encoding (Foo,Foo,Foo)" + $ runCustomTestScenario + SC.text + SE.pipe + encodingY + (FooA,FooA,FooC) + "Simple,Simple,\"More\"\"Escaped,\"\"\"\"Chars\"\n" + , testProperty "Headless Isomorphism (Foo,Foo,Foo)" + $ propIsoPipe $ + (SE.pipe SC.text encodingY) + >-> + (void $ SD.headlessPipe SC.text decodingY) + ] ] +data Foo = FooA | FooB | FooC + deriving (Generic,Eq,Ord,Show,Read,Bounded,Enum) + +instance Arbitrary Foo where + arbitrary = elements [minBound..maxBound] + +fooToString :: Foo -> String +fooToString x = case x of + FooA -> "Simple" + FooB -> "With,Escaped\nChars" + FooC -> "More\"Escaped,\"\"Chars" + +encodeFoo :: (String -> c) -> Foo -> c +encodeFoo f = f . fooToString + +fooFromString :: String -> Either String Foo +fooFromString x = case x of + "Simple" -> Right FooA + "With,Escaped\nChars" -> Right FooB + "More\"Escaped,\"\"Chars" -> Right FooC + _ -> Left "failed to decode Foo" + +decodeFoo :: (c -> String) -> c -> Either String Foo +decodeFoo f = fooFromString . f + decodingA :: Decoding Headless ByteString (Int,Char,Bool) decodingA = (,,) <$> Decoding.headless CDB.int @@ -88,7 +139,27 @@ encodingA = contramap tripleToPairs $ divided (Encoding.headless CEB.int) $ divided (Encoding.headless CEB.char) $ divided (Encoding.headless CEB.bool) - $ conquered + $ conquered + +encodingW :: Encoding Headless Text (Int,Char,Bool) +encodingW = contramap tripleToPairs + $ divided (Encoding.headless CET.int) + $ divided (Encoding.headless CET.char) + $ divided (Encoding.headless CET.bool) + $ conquered + +encodingY :: Encoding Headless Text (Foo,Foo,Foo) +encodingY = contramap tripleToPairs + $ divided (Encoding.headless $ encodeFoo Text.pack) + $ divided (Encoding.headless $ encodeFoo Text.pack) + $ divided (Encoding.headless $ encodeFoo Text.pack) + $ conquered + +decodingY :: Decoding Headless Text (Foo,Foo,Foo) +decodingY = (,,) + <$> Decoding.headless (decodeFoo Text.unpack) + <*> Decoding.headless (decodeFoo Text.unpack) + <*> Decoding.headless (decodeFoo Text.unpack) encodingB :: Encoding Headed ByteString (Int,Char,Bool) encodingB = contramap tripleToPairs @@ -112,14 +183,26 @@ propIsoPipe p as = (Pipes.toList $ each as >-> p) == as runTestScenario :: (Monoid c, Eq c, Show c) => Siphon c -> (Siphon c -> Encoding f c (Int,Char,Bool) -> Pipe (Int,Char,Bool) c Identity ()) - -> Encoding f c (Int,Char,Bool) - -> c + -> Encoding f c (Int,Char,Bool) + -> c -> Assertion -runTestScenario s p e c = +runTestScenario s p e c = ( mconcat $ Pipes.toList $ Pipes.yield (4,'c',False) >-> p s e ) @?= c +runCustomTestScenario :: (Monoid c, Eq c, Show c) + => Siphon c + -> (Siphon c -> Encoding f c a -> Pipe a c Identity ()) + -> Encoding f c a + -> a + -> c + -> Assertion +runCustomTestScenario s p e a c = + ( mconcat $ Pipes.toList $ + Pipes.yield a >-> p s e + ) @?= c + -- testEncodingA :: Assertion -- testEncodingA = runTestScenario encodingA "4,c,false\n" diff --git a/stack.yaml b/stack.yaml index 3e0fba0..ad9afff 100644 --- a/stack.yaml +++ b/stack.yaml @@ -1,5 +1,5 @@ # This file was automatically generated by 'stack init' -# +# # Some commonly used options have been documented as comments in this file. # For advanced use and comprehensive documentation of the format, please see: # http://docs.haskellstack.org/en/stable/yaml_configuration/ @@ -7,7 +7,7 @@ # Resolver to choose a 'specific' stackage snapshot or a compiler version. # A snapshot resolver dictates the compiler version and the set of packages # to be used for project dependencies. For example: -# +# # resolver: lts-3.5 # resolver: nightly-2015-09-21 # resolver: ghc-7.10.2 @@ -19,7 +19,7 @@ resolver: lts-6.4 # User packages to be built. # Various formats can be used as shown in the example below. -# +# # packages: # - some-directory # - https://example.com/foo/bar/baz-0.0.2.tar.gz @@ -31,7 +31,7 @@ resolver: lts-6.4 # subdirs: # - auto-update # - wai -# +# # A package marked 'extra-dep: true' will only be built if demanded by a # non-dependency (i.e. a user package), and its test suites and benchmarks # will not be run. This is useful for tweaking upstream packages. @@ -39,6 +39,7 @@ packages: - 'colonnade' - 'reflex-dom-colonnade' - 'siphon' +- 'geolite-csv' # Dependency packages to be pulled from upstream that are not in the resolver # (e.g., acme-missiles-0.3) extra-deps: @@ -48,6 +49,7 @@ extra-deps: - 'aeson-0.9.0.1' - 'haskell-src-exts-1.16.0.1' - 'syb-0.5.1' +- 'ip-0.8.4' @@ -59,18 +61,18 @@ extra-package-dbs: [] # Control whether we use the GHC we find on the path # system-ghc: true -# +# # Require a specific version of stack, using version ranges # require-stack-version: -any # Default # require-stack-version: ">=1.1" -# +# # Override the architecture used by stack, especially useful on Windows # arch: i386 # arch: x86_64 -# +# # Extra directories used by stack for building # extra-include-dirs: [/path/to/dir] # extra-lib-dirs: [/path/to/dir] -# +# # Allow a newer minor version of GHC than the snapshot specifies # compiler-check: newer-minor