{-# OPTIONS_GHC -fno-warn-orphans #-} module Handler.Utils.Csv ( typeCsv, extensionCsv , decodeCsv , encodeCsv , respondCsv, respondCsvDB , fileSourceCsv , CsvParseError(..) , ToNamedRecord(..), FromNamedRecord(..) , DefaultOrdered(..) , ToField(..), FromField(..) , CsvRendered(..) , toCsvRendered ) where import Import hiding (Header, mapM_) import Data.Csv import Data.Csv.Conduit import Data.Function ((&)) import Control.Monad (mapM_) -- import qualified Data.Csv.Util as Csv import qualified Data.Csv.Parser as Csv import qualified Data.Conduit.List as C import qualified Data.Conduit.Combinators as C (sourceLazy) import qualified Data.Map as Map import qualified Data.Vector as Vector import qualified Data.HashMap.Strict as HashMap import qualified Data.ByteString.Lazy as LBS import qualified Data.Attoparsec.ByteString.Lazy as A deriving instance Typeable CsvParseError instance Exception CsvParseError typeCsv, typeCsv' :: ContentType typeCsv = simpleContentType typeCsv' typeCsv' = "text/csv; charset=UTF-8; header=present" extensionCsv :: Extension extensionCsv = fromMaybe "csv" $ listToMaybe [ ext | (ext, mime) <- Map.toList mimeMap, mime == typeCsv ] decodeCsv :: (MonadThrow m, FromNamedRecord csv, MonadLogger m) => Conduit ByteString m csv decodeCsv = transPipe throwExceptT $ do testBuffer <- accumTestBuffer LBS.empty mapM_ leftover $ LBS.toChunks testBuffer let decodeOptions = defaultDecodeOptions & guessDelimiter testBuffer $logInfoS "decodeCsv" [st|Guessed Csv.DecodeOptions from buffer of size #{tshow (LBS.length testBuffer)}/#{tshow testBufferSize}: #{tshow decodeOptions}|] fromNamedCsv decodeOptions where testBufferSize = 4096 accumTestBuffer acc | LBS.length acc >= testBufferSize = return acc | otherwise = do frag <- await case frag of Nothing -> return acc Just frag' -> accumTestBuffer (acc <> LBS.fromStrict frag') guessDelimiter testBuffer | Just firstDQuote <- doubleQuote `LBS.elemIndex` testBuffer = if | firstDQuote /= 0 -> \x -> x { Csv.decDelimiter = testBuffer `LBS.index` pred firstDQuote } | A.Done unused _ <- A.parse quotedField testBuffer -> case A.parse endOfLine unused of A.Fail{} | Just (nextChar, _) <- LBS.uncons unused -> \x -> x { Csv.decDelimiter = nextChar } _other -> guessDelimiter $ LBS.take firstDQuote testBuffer <> unused | otherwise -> id -- Parsing of something, which should be a quoted field, failed; bail now | A.Done _ ls <- A.parse (A.many1 $ A.manyTill A.anyWord8 endOfLine) testBuffer , (h:hs) <- filter (not . Map.null) $ map (fmap getSum . Map.unionsWith mappend . map (flip Map.singleton $ Sum 1)) ls , Just equals <- fromNullable $ Map.filterWithKey (\c n -> all ((== Just n) . Map.lookup c) hs) h , let maxH = maximum equals , [d] <- filter ((== Just maxH) . flip Map.lookup (toNullable equals)) . Map.keys $ toNullable equals = \x -> x { Csv.decDelimiter = d } | otherwise = id quotedField :: A.Parser () -- We don't care about the return value quotedField = void . Csv.field $ Csv.decDelimiter defaultDecodeOptions -- We can use comma as a separator, because we know that the field we're trying to parse is quoted and so does not rely on the delimiter endOfLine :: A.Parser () endOfLine = asum [ void $ A.word8 newline , mapM_ A.word8 [cr, newline] , void $ A.word8 cr ] doubleQuote, newline, cr :: Word8 doubleQuote = 34 newline = 10 cr = 13 encodeCsv :: ( ToNamedRecord csv , DefaultOrdered csv , Monad m ) => Conduit csv m ByteString -- ^ Encode a stream of records -- -- Currently not streaming encodeCsv = fmap encodeDefaultOrderedByName (C.foldMap pure) >>= C.sourceLazy respondCsv :: ( ToNamedRecord csv , DefaultOrdered csv ) => Source (HandlerT site IO) csv -> HandlerT site IO TypedContent respondCsv src = respondSource typeCsv' $ src .| encodeCsv .| awaitForever sendChunk respondCsvDB :: ( ToNamedRecord csv , DefaultOrdered csv , YesodPersistRunner site ) => Source (YesodDB site) csv -> HandlerT site IO TypedContent respondCsvDB src = respondSourceDB typeCsv' $ src .| encodeCsv .| awaitForever sendChunk fileSourceCsv :: ( FromNamedRecord csv , MonadResource m , MonadLogger m ) => FileInfo -> Source m csv fileSourceCsv = (.| decodeCsv) . fileSource data CsvRendered = CsvRendered { csvRenderedHeader :: Header , csvRenderedData :: [NamedRecord] } deriving (Eq, Read, Show, Generic, Typeable) instance ToWidget UniWorX CsvRendered where toWidget CsvRendered{..} = liftWidgetT $(widgetFile "widgets/csvRendered") where csvData = [ [ decodeUtf8 <$> HashMap.lookup columnKey row | columnKey <- Vector.toList csvRenderedHeader ] | row <- csvRenderedData ] headers = decodeUtf8 <$> Vector.toList csvRenderedHeader toCsvRendered :: forall mono. ( ToNamedRecord (Element mono) , DefaultOrdered (Element mono) , MonoFoldable mono ) => mono -> CsvRendered toCsvRendered (otoList -> csvs) = CsvRendered{..} where csvRenderedHeader = headerOrder (error "not forced" :: Element mono) csvRenderedData = map toNamedRecord csvs