264 lines
9.8 KiB
Haskell
264 lines
9.8 KiB
Haskell
{-# OPTIONS_GHC -fno-warn-orphans #-}
|
|
|
|
module Handler.Utils.Csv
|
|
( decodeCsv, decodeCsvPositional
|
|
, timestampCsv
|
|
, encodeCsv
|
|
, encodeDefaultOrderedCsv
|
|
, respondCsv, respondCsvDB
|
|
, respondDefaultOrderedCsv, respondDefaultOrderedCsvDB
|
|
, fileSourceCsv, fileSourceCsvPositional
|
|
, partIsAttachmentCsv
|
|
, CsvParseError(..)
|
|
, ToNamedRecord(..), FromNamedRecord(..)
|
|
, DefaultOrdered(..)
|
|
, ToField(..), FromField(..)
|
|
) where
|
|
|
|
import Import hiding (Header, mapM_)
|
|
|
|
import Data.Csv
|
|
import Data.Csv.Conduit
|
|
|
|
import Handler.Utils.Form (uploadContents)
|
|
|
|
import Control.Monad (mapM_)
|
|
|
|
-- import qualified Data.Csv.Util as Csv
|
|
import qualified Data.Csv.Parser as Csv
|
|
|
|
import qualified Data.Conduit.List as C
|
|
import qualified Data.Conduit.Combinators as C (sourceLazy)
|
|
|
|
import qualified Data.Map as Map
|
|
import qualified Data.Vector as Vector
|
|
import qualified Data.HashMap.Strict as HashMap
|
|
|
|
import qualified Data.ByteString.Lazy as LBS
|
|
|
|
import qualified Data.Attoparsec.ByteString.Lazy as A
|
|
|
|
import Handler.Utils.DateTime
|
|
import Data.Time.Format (iso8601DateFormat)
|
|
|
|
import qualified Data.Char as Char
|
|
|
|
import Control.Monad.Error.Class (MonadError(..))
|
|
|
|
|
|
|
|
_haltingCsvParseError :: Prism' CsvParseError CsvStreamHaltParseError
|
|
_haltingCsvParseError = prism' (\(HaltingCsvParseError bs t) -> CsvParseError bs t) $ \case
|
|
CsvParseError bs t -> Just $ HaltingCsvParseError bs t
|
|
_other -> Nothing
|
|
|
|
_csvStreamRecordParseError :: Prism' CsvParseError CsvStreamRecordParseError
|
|
_csvStreamRecordParseError = prism' (\(CsvStreamRecordParseError t) -> IncrementalError t) $ \case
|
|
IncrementalError t -> Just $ CsvStreamRecordParseError t
|
|
_other -> Nothing
|
|
|
|
throwIncrementalErrors :: MonadError CsvParseError m => ConduitT (Either CsvStreamRecordParseError a) a m ()
|
|
throwIncrementalErrors = C.mapM $ either (throwError . review _csvStreamRecordParseError) return
|
|
|
|
|
|
newtype MaybeEmptyRecord csv = MaybeEmptyRecord { unMaybeEmptyRecord :: Maybe csv }
|
|
|
|
instance FromNamedRecord csv => FromNamedRecord (MaybeEmptyRecord csv) where
|
|
parseNamedRecord r
|
|
| all null r = pure $ MaybeEmptyRecord Nothing
|
|
| otherwise = MaybeEmptyRecord . Just <$> parseNamedRecord r
|
|
instance FromRecord csv => FromRecord (MaybeEmptyRecord csv) where
|
|
parseRecord r
|
|
| all null r = pure $ MaybeEmptyRecord Nothing
|
|
| otherwise = MaybeEmptyRecord . Just <$> parseRecord r
|
|
|
|
decodeCsv :: (MonadHandler m, HandlerSite m ~ UniWorX, MonadThrow m, FromNamedRecord csv) => ConduitT ByteString csv m ()
|
|
decodeCsv = decodeCsv' $ \opts -> fromNamedCsvStreamError opts (review _haltingCsvParseError) .| throwIncrementalErrors
|
|
|
|
decodeCsvPositional :: (MonadHandler m, HandlerSite m ~ UniWorX, MonadThrow m, FromRecord csv) => HasHeader -> ConduitT ByteString csv m ()
|
|
decodeCsvPositional hdr = decodeCsv' $ \opts -> fromCsvStreamError opts hdr (review _haltingCsvParseError) .| throwIncrementalErrors
|
|
|
|
|
|
decodeCsv' :: (MonadHandler m, HandlerSite m ~ UniWorX, MonadThrow m) => (forall m'. Monad m' => DecodeOptions -> ConduitT ByteString (MaybeEmptyRecord csv) (ExceptT CsvParseError m') ()) -> ConduitT ByteString csv m ()
|
|
decodeCsv' fromCsv' = do
|
|
encOpts <- maybe def (userCsvOptions . entityVal) <$> lift maybeAuth
|
|
|
|
let
|
|
recode'
|
|
| enc == "UTF8"
|
|
= id
|
|
| otherwise
|
|
= \act -> do
|
|
inp <- sinkLazy
|
|
let inp' = encodeLazyByteString UTF8 $ decodeLazyByteString enc inp
|
|
sourceLazy inp' .| act
|
|
where enc = encOpts ^. _csvFormat . _csvEncoding
|
|
|
|
recode' decodeCsv'' .| C.mapMaybe unMaybeEmptyRecord
|
|
where
|
|
decodeCsv'' = transPipe throwExceptT $ do
|
|
testBuffer <- accumTestBuffer LBS.empty
|
|
mapM_ leftover $ LBS.toChunks testBuffer
|
|
|
|
let decodeOptions = defaultDecodeOptions
|
|
& guessDelimiter testBuffer
|
|
$logInfoS "decodeCsv" [st|Guessed Csv.DecodeOptions from buffer of size #{tshow (LBS.length testBuffer)}/#{tshow testBufferSize}: #{tshow decodeOptions}|]
|
|
|
|
fromCsv' decodeOptions
|
|
testBufferSize :: Num a => a
|
|
testBufferSize = 4096
|
|
accumTestBuffer acc
|
|
| LBS.length acc >= testBufferSize = return acc
|
|
| otherwise = do
|
|
frag <- await
|
|
case frag of
|
|
Nothing -> return acc
|
|
Just frag' -> accumTestBuffer (acc <> LBS.fromStrict frag')
|
|
|
|
guessDelimiter testBuffer
|
|
| Just firstDQuote <- doubleQuote `LBS.elemIndex` testBuffer
|
|
= if
|
|
| firstDQuote /= 0
|
|
-> \x -> x { Csv.decDelimiter = testBuffer `LBS.index` pred firstDQuote }
|
|
| A.Done unused _ <- A.parse quotedField testBuffer
|
|
-> case A.parse endOfLine unused of
|
|
A.Fail{}
|
|
| Just (nextChar, _) <- LBS.uncons unused
|
|
-> \x -> x { Csv.decDelimiter = nextChar }
|
|
_other -> guessDelimiter $ LBS.take firstDQuote testBuffer <> unused
|
|
| otherwise
|
|
-> id -- Parsing of something, which should be a quoted field, failed; bail now
|
|
| A.Done _ ls <- A.parse (A.many1 $ A.manyTill A.anyWord8 endOfLine) testBuffer
|
|
, (h:hs) <- filter (not . Map.null) $ map (fmap getSum . Map.unionsWith mappend . map (flip Map.singleton $ Sum 1) . filter (not . isAlphaNum')) ls
|
|
, Just equals <- fromNullable $ Map.filterWithKey (\c n -> all ((== Just n) . Map.lookup c) hs) h
|
|
, let maxH = maximum equals
|
|
, [d] <- filter ((== Just maxH) . flip Map.lookup (toNullable equals)) . Map.keys $ toNullable equals
|
|
= \x -> x { Csv.decDelimiter = d }
|
|
| otherwise
|
|
= id
|
|
|
|
isAlphaNum' = Char.isAlphaNum . Char.chr . fromIntegral
|
|
|
|
quotedField :: A.Parser () -- We don't care about the return value
|
|
quotedField = void . Csv.field $ Csv.decDelimiter defaultDecodeOptions -- We can use comma as a separator, because we know that the field we're trying to parse is quoted and so does not rely on the delimiter
|
|
|
|
|
|
endOfLine :: A.Parser ()
|
|
endOfLine = asum
|
|
[ void $ A.word8 newline
|
|
, mapM_ A.word8 [cr, newline]
|
|
, void $ A.word8 cr
|
|
]
|
|
|
|
doubleQuote, newline, cr :: Word8
|
|
doubleQuote = 34
|
|
newline = 10
|
|
cr = 13
|
|
|
|
|
|
|
|
encodeCsv :: ( ToNamedRecord csv
|
|
, MonadHandler m
|
|
, HandlerSite m ~ UniWorX
|
|
)
|
|
=> Header
|
|
-> ConduitT csv ByteString m ()
|
|
-- ^ Encode a stream of records
|
|
--
|
|
-- Currently not streaming
|
|
encodeCsv hdr = do
|
|
csvOpts <- maybe def (userCsvOptions . entityVal) <$> lift maybeAuth
|
|
let recode'
|
|
| enc == "UTF8"
|
|
= id
|
|
| otherwise
|
|
= encodeLazyByteString enc . decodeLazyByteString UTF8
|
|
where enc = csvOpts ^. _csvFormat . _csvEncoding
|
|
C.foldMap pure >>= (C.sourceLazy . recode') . encodeByNameWith (csvOpts ^. _csvFormat . _CsvEncodeOptions) hdr
|
|
|
|
timestampCsv :: ( MonadHandler m
|
|
, HandlerSite m ~ UniWorX
|
|
)
|
|
=> m (FilePath -> FilePath)
|
|
timestampCsv = do
|
|
csvOpts <- maybe def (userCsvOptions . entityVal) <$> maybeAuth
|
|
if
|
|
| csvOpts ^. _csvTimestamp -> do
|
|
ts <- formatTime' (iso8601DateFormat $ Just "%H%M") =<< liftIO getCurrentTime
|
|
return $ (<>) (unpack ts <> "-")
|
|
| otherwise -> return id
|
|
|
|
partIsAttachmentCsv :: (Textual t, MonadMail m, HandlerSite m ~ UniWorX)
|
|
=> t
|
|
-> StateT Part m ()
|
|
partIsAttachmentCsv (repack -> fName) = do
|
|
ts <- timestampCsv
|
|
partIsAttachment . ts $ fName `addExtension` unpack extensionCsv
|
|
|
|
encodeDefaultOrderedCsv :: forall csv m.
|
|
( ToNamedRecord csv
|
|
, DefaultOrdered csv
|
|
, MonadHandler m
|
|
, HandlerSite m ~ UniWorX
|
|
)
|
|
=> ConduitT csv ByteString m ()
|
|
encodeDefaultOrderedCsv = encodeCsv $ headerOrder (error "headerOrder" :: csv)
|
|
|
|
|
|
respondCsv :: ToNamedRecord csv
|
|
=> Header
|
|
-> ConduitT () csv Handler ()
|
|
-> Handler TypedContent
|
|
respondCsv hdr src = respondSource typeCsv' $ src .| encodeCsv hdr .| awaitForever sendChunk
|
|
|
|
respondDefaultOrderedCsv :: forall csv.
|
|
( ToNamedRecord csv
|
|
, DefaultOrdered csv
|
|
)
|
|
=> ConduitT () csv Handler ()
|
|
-> Handler TypedContent
|
|
respondDefaultOrderedCsv = respondCsv $ headerOrder (error "headerOrder" :: csv)
|
|
|
|
respondCsvDB :: ToNamedRecord csv
|
|
=> Header
|
|
-> ConduitT () csv DB ()
|
|
-> Handler TypedContent
|
|
respondCsvDB hdr src = respondSourceDB typeCsv' $ src .| encodeCsv hdr .| awaitForever sendChunk
|
|
|
|
respondDefaultOrderedCsvDB :: forall csv.
|
|
( ToNamedRecord csv
|
|
, DefaultOrdered csv
|
|
)
|
|
=> ConduitT () csv DB ()
|
|
-> Handler TypedContent
|
|
respondDefaultOrderedCsvDB = respondCsvDB $ headerOrder (error "headerOrder" :: csv)
|
|
|
|
fileSourceCsv :: ( FromNamedRecord csv
|
|
, MonadThrow m
|
|
, MonadHandler m
|
|
, HandlerSite m ~ UniWorX
|
|
)
|
|
=> ConduitT FileReference csv m ()
|
|
fileSourceCsv = uploadContents .| decodeCsv
|
|
|
|
fileSourceCsvPositional :: ( MonadHandler m
|
|
, HandlerSite m ~ UniWorX
|
|
, MonadThrow m
|
|
, FromRecord csv
|
|
)
|
|
=> HasHeader
|
|
-> ConduitT FileReference csv m ()
|
|
fileSourceCsvPositional hdr = uploadContents .| decodeCsvPositional hdr
|
|
|
|
|
|
instance ToWidget UniWorX CsvRendered where
|
|
toWidget CsvRendered{..} = liftWidget $(widgetFile "widgets/csvRendered")
|
|
where
|
|
csvData = [ [ decodeUtf8 <$> HashMap.lookup columnKey row
|
|
| columnKey <- Vector.toList csvRenderedHeader
|
|
]
|
|
| row <- csvRenderedData
|
|
]
|
|
|
|
headers = decodeUtf8 <$> Vector.toList csvRenderedHeader
|