This repository has been archived on 2024-10-24. You can view files and clone it, but cannot push or open issues or pull requests.
fradrive-old/src/Handler/Utils/Csv.hs
Gregor Kleen cfaea9c08b chore: bump to lts-15.0
BREAKING CHANGE: major version bumps
2020-02-23 11:12:45 +01:00

224 lines
7.9 KiB
Haskell

{-# OPTIONS_GHC -fno-warn-orphans #-}
module Handler.Utils.Csv
( decodeCsv, decodeCsvPositional
, timestampCsv
, encodeCsv
, encodeDefaultOrderedCsv
, respondCsv, respondCsvDB
, respondDefaultOrderedCsv, respondDefaultOrderedCsvDB
, fileSourceCsv
, partIsAttachmentCsv
, CsvParseError(..)
, ToNamedRecord(..), FromNamedRecord(..)
, DefaultOrdered(..)
, ToField(..), FromField(..)
) where
import Import hiding (Header, mapM_)
import Data.Csv
import Data.Csv.Conduit
import Control.Monad (mapM_)
-- import qualified Data.Csv.Util as Csv
import qualified Data.Csv.Parser as Csv
import qualified Data.Conduit.List as C
import qualified Data.Conduit.Combinators as C (sourceLazy)
import qualified Data.Map as Map
import qualified Data.Vector as Vector
import qualified Data.HashMap.Strict as HashMap
import qualified Data.ByteString.Lazy as LBS
import qualified Data.Attoparsec.ByteString.Lazy as A
import Control.Monad.Except (ExceptT)
import Handler.Utils.DateTime
import Data.Time.Format (iso8601DateFormat)
decodeCsv :: (MonadHandler m, HandlerSite m ~ UniWorX, MonadThrow m, FromNamedRecord csv) => ConduitT ByteString csv m ()
decodeCsv = decodeCsv' fromNamedCsv
decodeCsvPositional :: (MonadHandler m, HandlerSite m ~ UniWorX, MonadThrow m, FromRecord csv) => HasHeader -> ConduitT ByteString csv m ()
decodeCsvPositional hdr = decodeCsv' (`fromCsv` hdr)
decodeCsv' :: (MonadHandler m, HandlerSite m ~ UniWorX, MonadThrow m) => (forall m'. Monad m' => DecodeOptions -> ConduitT ByteString csv (ExceptT CsvParseError m') ()) -> ConduitT ByteString csv m ()
decodeCsv' fromCsv' = do
encOpts <- maybe def (userCsvOptions . entityVal) <$> lift maybeAuth
let
recode'
| enc == "UTF8"
= id
| otherwise
= \act -> do
inp <- sinkLazy
let inp' = encodeLazyByteString UTF8 $ decodeLazyByteString enc inp
sourceLazy inp' .| act
where enc = encOpts ^. _csvFormat . _csvEncoding
recode' decodeCsv''
where
decodeCsv'' = transPipe throwExceptT $ do
testBuffer <- accumTestBuffer LBS.empty
mapM_ leftover $ LBS.toChunks testBuffer
let decodeOptions = defaultDecodeOptions
& guessDelimiter testBuffer
$logInfoS "decodeCsv" [st|Guessed Csv.DecodeOptions from buffer of size #{tshow (LBS.length testBuffer)}/#{tshow testBufferSize}: #{tshow decodeOptions}|]
fromCsv' decodeOptions
testBufferSize :: Num a => a
testBufferSize = 4096
accumTestBuffer acc
| LBS.length acc >= testBufferSize = return acc
| otherwise = do
frag <- await
case frag of
Nothing -> return acc
Just frag' -> accumTestBuffer (acc <> LBS.fromStrict frag')
guessDelimiter testBuffer
| Just firstDQuote <- doubleQuote `LBS.elemIndex` testBuffer
= if
| firstDQuote /= 0
-> \x -> x { Csv.decDelimiter = testBuffer `LBS.index` pred firstDQuote }
| A.Done unused _ <- A.parse quotedField testBuffer
-> case A.parse endOfLine unused of
A.Fail{}
| Just (nextChar, _) <- LBS.uncons unused
-> \x -> x { Csv.decDelimiter = nextChar }
_other -> guessDelimiter $ LBS.take firstDQuote testBuffer <> unused
| otherwise
-> id -- Parsing of something, which should be a quoted field, failed; bail now
| A.Done _ ls <- A.parse (A.many1 $ A.manyTill A.anyWord8 endOfLine) testBuffer
, (h:hs) <- filter (not . Map.null) $ map (fmap getSum . Map.unionsWith mappend . map (flip Map.singleton $ Sum 1)) ls
, Just equals <- fromNullable $ Map.filterWithKey (\c n -> all ((== Just n) . Map.lookup c) hs) h
, let maxH = maximum equals
, [d] <- filter ((== Just maxH) . flip Map.lookup (toNullable equals)) . Map.keys $ toNullable equals
= \x -> x { Csv.decDelimiter = d }
| otherwise
= id
quotedField :: A.Parser () -- We don't care about the return value
quotedField = void . Csv.field $ Csv.decDelimiter defaultDecodeOptions -- We can use comma as a separator, because we know that the field we're trying to parse is quoted and so does not rely on the delimiter
endOfLine :: A.Parser ()
endOfLine = asum
[ void $ A.word8 newline
, mapM_ A.word8 [cr, newline]
, void $ A.word8 cr
]
doubleQuote, newline, cr :: Word8
doubleQuote = 34
newline = 10
cr = 13
encodeCsv :: ( ToNamedRecord csv
, MonadHandler m
, HandlerSite m ~ UniWorX
)
=> Header
-> ConduitT csv ByteString m ()
-- ^ Encode a stream of records
--
-- Currently not streaming
encodeCsv hdr = do
csvOpts <- maybe def (userCsvOptions . entityVal) <$> lift maybeAuth
let recode'
| enc == "UTF8"
= id
| otherwise
= encodeLazyByteString enc . decodeLazyByteString UTF8
where enc = csvOpts ^. _csvFormat . _csvEncoding
fmap (encodeByNameWith (csvOpts ^. _csvFormat . _CsvEncodeOptions) hdr) (C.foldMap pure) >>= C.sourceLazy . recode'
timestampCsv :: ( MonadHandler m
, HandlerSite m ~ UniWorX
)
=> m (FilePath -> FilePath)
timestampCsv = do
csvOpts <- maybe def (userCsvOptions . entityVal) <$> maybeAuth
if
| csvOpts ^. _csvTimestamp -> do
ts <- formatTime' (iso8601DateFormat $ Just "%H%M") =<< liftIO getCurrentTime
return $ (<>) (unpack ts <> "-")
| otherwise -> return id
partIsAttachmentCsv :: (Textual t, MonadMail m, HandlerSite m ~ UniWorX)
=> t
-> StateT Part m ()
partIsAttachmentCsv (repack -> fName) = do
ts <- timestampCsv
partIsAttachment . ts $ fName `addExtension` unpack extensionCsv
encodeDefaultOrderedCsv :: forall csv m.
( ToNamedRecord csv
, DefaultOrdered csv
, MonadHandler m
, HandlerSite m ~ UniWorX
)
=> ConduitT csv ByteString m ()
encodeDefaultOrderedCsv = encodeCsv $ headerOrder (error "headerOrder" :: csv)
respondCsv :: ToNamedRecord csv
=> Header
-> ConduitT () csv Handler ()
-> Handler TypedContent
respondCsv hdr src = respondSource typeCsv' $ src .| encodeCsv hdr .| awaitForever sendChunk
respondDefaultOrderedCsv :: forall csv.
( ToNamedRecord csv
, DefaultOrdered csv
)
=> ConduitT () csv Handler ()
-> Handler TypedContent
respondDefaultOrderedCsv = respondCsv $ headerOrder (error "headerOrder" :: csv)
respondCsvDB :: ToNamedRecord csv
=> Header
-> ConduitT () csv DB ()
-> Handler TypedContent
respondCsvDB hdr src = respondSourceDB typeCsv' $ src .| encodeCsv hdr .| awaitForever sendChunk
respondDefaultOrderedCsvDB :: forall csv.
( ToNamedRecord csv
, DefaultOrdered csv
)
=> ConduitT () csv DB ()
-> Handler TypedContent
respondDefaultOrderedCsvDB = respondCsvDB $ headerOrder (error "headerOrder" :: csv)
fileSourceCsv :: ( FromNamedRecord csv
, MonadThrow m
, MonadHandler m
, HandlerSite m ~ UniWorX
)
=> FileInfo
-> ConduitT () csv m ()
fileSourceCsv = (.| decodeCsv) . fileSource
instance ToWidget UniWorX CsvRendered where
toWidget CsvRendered{..} = liftWidget $(widgetFile "widgets/csvRendered")
where
csvData = [ [ decodeUtf8 <$> HashMap.lookup columnKey row
| columnKey <- Vector.toList csvRenderedHeader
]
| row <- csvRenderedData
]
headers = decodeUtf8 <$> Vector.toList csvRenderedHeader