{-# OPTIONS_GHC -fno-warn-orphans #-} module Handler.Utils.Csv ( decodeCsv, decodeCsvPositional , encodeCsv, encodeCsvWith, encodeCsvRendered, encodeCsvRenderedWith , csvRenderedToTypedContent, csvRenderedToTypedContentWith , expectedCsvFormat, expectedCsvContentType , encodeDefaultOrderedCsv , respondCsv, respondCsvDB , respondDefaultOrderedCsv, respondDefaultOrderedCsvDB , fileSourceCsv, fileSourceCsvPositional , partIsAttachmentCsv, setContentDispositionCsv , csvOptionsForFormat , CsvParseError(..) , ToNamedRecord(..), FromNamedRecord(..) , DefaultOrdered(..) , ToField(..), FromField(..) , recodeCsv ) where import Import hiding (Header, mapM_) import Data.Csv import Data.Csv.Conduit import Handler.Utils.Form (uploadContents) import Handler.Utils.ContentDisposition (setContentDisposition') import Control.Monad (mapM_) -- import qualified Data.Csv.Util as Csv import qualified Data.Csv.Parser as Csv import qualified Data.Conduit.List as C (mapMaybe) import qualified Data.Conduit.Combinators as C import qualified Data.Map as Map import qualified Data.Vector as Vector import qualified Data.HashMap.Strict as HashMap import qualified Data.ByteString.Lazy as LBS import qualified Data.Attoparsec.ByteString.Lazy as A import Data.Time.Format (iso8601DateFormat) import qualified Data.Char as Char import Control.Monad.Error.Class (MonadError(..)) import Data.Time.Clock.POSIX (getPOSIXTime) import qualified Data.Time.Format as Time -- import qualified Codec.Archive.Zip as Zip _haltingCsvParseError :: Prism' CsvParseError CsvStreamHaltParseError _haltingCsvParseError = prism' (\(HaltingCsvParseError bs t) -> CsvParseError bs t) $ \case CsvParseError bs t -> Just $ HaltingCsvParseError bs t _other -> Nothing _csvStreamRecordParseError :: Prism' CsvParseError CsvStreamRecordParseError _csvStreamRecordParseError = prism' (\(CsvStreamRecordParseError t) -> IncrementalError t) $ \case IncrementalError t -> Just $ CsvStreamRecordParseError t _other -> Nothing throwIncrementalErrors :: MonadError CsvParseError m => ConduitT (Either CsvStreamRecordParseError a) a m () throwIncrementalErrors = C.mapM $ either (throwError . review _csvStreamRecordParseError) return newtype MaybeEmptyRecord csv = MaybeEmptyRecord { unMaybeEmptyRecord :: Maybe csv } instance FromNamedRecord csv => FromNamedRecord (MaybeEmptyRecord csv) where parseNamedRecord r | all null r = pure $ MaybeEmptyRecord Nothing | otherwise = MaybeEmptyRecord . Just <$> parseNamedRecord r instance FromRecord csv => FromRecord (MaybeEmptyRecord csv) where parseRecord r | all null r = pure $ MaybeEmptyRecord Nothing | otherwise = MaybeEmptyRecord . Just <$> parseRecord r decodeCsv :: (MonadHandler m, HandlerSite m ~ UniWorX, MonadThrow m, FromNamedRecord csv) => ConduitT ByteString csv m () decodeCsv = decodeCsv' $ \opts -> fromNamedCsvStreamError opts (review _haltingCsvParseError) .| throwIncrementalErrors decodeCsvPositional :: (MonadHandler m, HandlerSite m ~ UniWorX, MonadThrow m, FromRecord csv) => HasHeader -> ConduitT ByteString csv m () decodeCsvPositional hdr = decodeCsv' $ \opts -> fromCsvStreamError opts hdr (review _haltingCsvParseError) .| throwIncrementalErrors decodeCsv' :: forall csv m. ( MonadHandler m, HandlerSite m ~ UniWorX , MonadThrow m ) => (forall m'. Monad m' => DecodeOptions -> ConduitT ByteString (MaybeEmptyRecord csv) (ExceptT CsvParseError m') ()) -> ConduitT ByteString csv m () decodeCsv' fromCsv' = do encOpts <- maybe def (userCsvOptions . entityVal) <$> lift maybeAuth recodeCsv encOpts False decodeCsv'' .| C.mapMaybe unMaybeEmptyRecord where decodeCsv'' = transPipe throwExceptT $ do testBuffer <- accumTestBuffer LBS.empty mapM_ leftover $ LBS.toChunks testBuffer let decodeOptions = defaultDecodeOptions & guessDelimiter testBuffer $logInfoS "decodeCsv" [st|Guessed Csv.DecodeOptions from buffer of size #{tshow (LBS.length testBuffer)}/#{tshow testBufferSize}: #{tshow decodeOptions}|] fromCsv' decodeOptions testBufferSize :: Num a => a testBufferSize = 4096 accumTestBuffer acc | LBS.length acc >= testBufferSize = return acc | otherwise = do frag <- await case frag of Nothing -> return acc Just frag' -> accumTestBuffer (acc <> LBS.fromStrict frag') guessDelimiter testBuffer | Just firstDQuote <- doubleQuote `LBS.elemIndex` testBuffer = if | firstDQuote /= 0 -> \x -> x { Csv.decDelimiter = testBuffer `LBS.index` pred firstDQuote } | A.Done unused _ <- A.parse quotedField testBuffer -> case A.parse endOfLine unused of A.Fail{} | Just (nextChar, _) <- LBS.uncons unused -> \x -> x { Csv.decDelimiter = nextChar } _other -> guessDelimiter $ LBS.take firstDQuote testBuffer <> unused | otherwise -> id -- Parsing of something, which should be a quoted field, failed; bail now | A.Done _ ls <- A.parse (A.many1 $ A.manyTill A.anyWord8 endOfLine) testBuffer , (h:hs) <- filter (not . Map.null) $ map (fmap getSum . Map.unionsWith mappend . map (flip Map.singleton $ Sum 1) . filter (not . isAlphaNum')) ls , Just equals <- fromNullable $ Map.filterWithKey (\c n -> all ((== Just n) . Map.lookup c) hs) h , let maxH = maximum equals , [d] <- filter ((== Just maxH) . flip Map.lookup (toNullable equals)) . Map.keys $ toNullable equals = \x -> x { Csv.decDelimiter = d } | otherwise = id isAlphaNum' = Char.isAlphaNum . Char.chr . fromIntegral quotedField :: A.Parser () -- We don't care about the return value quotedField = void . Csv.field $ Csv.decDelimiter defaultDecodeOptions -- We can use comma as a separator, because we know that the field we're trying to parse is quoted and so does not rely on the delimiter endOfLine :: A.Parser () endOfLine = asum [ void $ A.word8 newline , mapM_ A.word8 [cr, newline] , void $ A.word8 cr ] doubleQuote, newline, cr :: Word8 doubleQuote = 34 newline = 10 cr = 13 encodeCsv :: ( ToNamedRecord csv , MonadHandler m , HandlerSite m ~ UniWorX , RenderMessage UniWorX msg ) => msg -- ^ Sheet name for .xlsx -> Header -> ConduitT csv ByteString m CsvFormat -- ^ Encode a stream of records -- -- Currently not streaming encodeCsv sheetName hdr = do encOpts <- maybe def (userCsvOptions . entityVal) <$> lift maybeAuth encodeCsvWith encOpts sheetName hdr encodeCsvWith :: ( ToNamedRecord csv , MonadHandler m , HandlerSite m ~ UniWorX , RenderMessage UniWorX msg ) => CsvOptions -> msg -- ^ Sheet name for .xlsx -> Header -> ConduitT csv ByteString m CsvFormat -- ^ Encode a stream of records -- -- Currently not streaming encodeCsvWith encOpts sheetName hdr = transPipe liftHandler $ case encOpts ^. _csvFormat of CsvFormatOptions{} | Just csvOpts <- encOpts ^? _csvFormat . _CsvEncodeOptions, has (_csvFormat . _CsvFormat . _FormatCsv) encOpts -> do (C.sourceLazy . encodeByNameWith csvOpts hdr =<< C.foldMap pure) .| recode' return FormatCsv | otherwise -> error "encOpts is CsvFormatOptions but cannot be converted via _CsvEncodeOptions or hasn't _FormatCsv" CsvXlsxFormatOptions{} | has (_csvFormat . _CsvFormat . _FormatXlsx) encOpts -> do rendered <- toCsvRendered hdr <$> C.foldMap (pure @Seq) sheetName' <- ($ sheetName) <$> getMessageRender pNow <- liftIO getPOSIXTime C.sourceLazy (fromXlsx pNow $ csvRenderedToXlsx sheetName' rendered) .| recode' return FormatXlsx | otherwise -> error "encOpts hasn't _FormatXlsx" where recode' = recodeCsv encOpts True $ C.map id encodeCsvRendered :: ( MonadHandler m , HandlerSite m ~ UniWorX , RenderMessage UniWorX msg ) => msg -- ^ Sheet name for .xlsx -> CsvRendered -> m (CsvFormat, LBS.ByteString) encodeCsvRendered sheetName CsvRendered{..} = runConduit $ yieldMany csvRenderedData .| (encodeCsv sheetName csvRenderedHeader `fuseBoth` C.sinkLazy) encodeCsvRenderedWith :: ( MonadHandler m , HandlerSite m ~ UniWorX , RenderMessage UniWorX msg ) => CsvOptions -> msg -- ^ Sheet name for .xlsx -> CsvRendered -> m (CsvFormat, LBS.ByteString) encodeCsvRenderedWith encOpts sheetName CsvRendered{..} = runConduit $ yieldMany csvRenderedData .| (encodeCsvWith encOpts sheetName csvRenderedHeader `fuseBoth` C.sinkLazy) csvRenderedToTypedContent :: ( MonadHandler m , HandlerSite m ~ UniWorX , RenderMessage UniWorX msg ) => msg -- ^ Sheet name for .xlsx -> CsvRendered -> m TypedContent csvRenderedToTypedContent sheetName csvRendered = do encOpts <- maybe def (userCsvOptions . entityVal) <$> maybeAuth csvRenderedToTypedContentWith encOpts sheetName csvRendered csvRenderedToTypedContentWith :: ( MonadHandler m , HandlerSite m ~ UniWorX , RenderMessage UniWorX msg ) => CsvOptions -> msg -- ^ Sheet name for .xlsx -> CsvRendered -> m TypedContent csvRenderedToTypedContentWith encOpts sheetName csvRendered = do (csvFormat, resp) <- encodeCsvRenderedWith encOpts sheetName csvRendered let cType = case csvFormat of FormatCsv -> typeCsv' FormatXlsx -> typeXlsx return . TypedContent cType $ toContent resp timestampCsv :: ( MonadHandler m , HandlerSite m ~ UniWorX ) => m (FilePath -> FilePath) timestampCsv = do csvOpts <- fmap (maybe def $ userCsvOptions . entityVal) maybeAuth timestampCsv' csvOpts . review _Wrapped =<< languages timestampCsv' :: MonadIO m => CsvOptions -> Languages -> m (FilePath -> FilePath) timestampCsv' csvOpts (Languages langs) = liftIO $ if | csvOpts ^. _csvTimestamp -> do ts <- getCurrentTime <&> Time.formatTime (getTimeLocale' langs) (iso8601DateFormat $ Just "%H%M") return $ (<>) (ts <> "-") | otherwise -> return id expectedCsvFormat :: ( MonadHandler m , HandlerSite m ~ UniWorX ) => m CsvFormat expectedCsvFormat = view (_csvFormat . _CsvFormat) . maybe def (userCsvOptions . entityVal) <$> maybeAuth expectedCsvContentType :: ( MonadHandler m , HandlerSite m ~ UniWorX ) => m ContentType expectedCsvContentType = expectedCsvFormat <&> \case FormatCsv -> typeCsv' FormatXlsx -> typeXlsx partIsAttachmentCsv :: (RenderMessage UniWorX msg, MonadMail m, HandlerSite m ~ UniWorX) => msg -> StateT Part m () partIsAttachmentCsv fName' = do csvOpts <- lift askMailCsvOptions langs <- lift askMailLanguages fName <- ($ fName') <$> lift getMailMessageRender ts <- timestampCsv' csvOpts langs let ext = case csvOpts ^. _csvFormat . _CsvFormat of FormatCsv -> extensionCsv FormatXlsx -> extensionXlsx partIsAttachment . ts $ unpack fName `addExtension` unpack ext setContentDispositionCsv :: (RenderMessage UniWorX msg, MonadHandler m, HandlerSite m ~ UniWorX) => msg -> m () setContentDispositionCsv fName' = do fName <- unpack . ($ fName') <$> getMessageRender ts <- timestampCsv fmt <- expectedCsvFormat let ext = case fmt of FormatCsv -> extensionCsv FormatXlsx -> extensionXlsx setContentDisposition' . Just $ ensureExtension (unpack ext) (ts fName) encodeDefaultOrderedCsv :: forall csv m msg. ( ToNamedRecord csv , DefaultOrdered csv , MonadHandler m , HandlerSite m ~ UniWorX , RenderMessage UniWorX msg ) => msg -- ^ Sheet name for .xlsx -> ConduitT csv ByteString m CsvFormat encodeDefaultOrderedCsv sheetName = encodeCsv sheetName $ headerOrder (error "headerOrder" :: csv) respondCsv :: ( ToNamedRecord csv , RenderMessage UniWorX msg ) => msg -- ^ Sheet name for .xlsx -> Header -> ConduitT () csv Handler () -> Handler TypedContent respondCsv sheetName hdr src = do cType <- expectedCsvContentType respondSource cType $ src .| void (encodeCsv sheetName hdr) .| awaitForever sendChunk respondDefaultOrderedCsv :: forall csv msg. ( ToNamedRecord csv , DefaultOrdered csv , RenderMessage UniWorX msg ) => msg -- ^ Sheet name for .xlsx -> ConduitT () csv Handler () -> Handler TypedContent respondDefaultOrderedCsv sheetName = respondCsv sheetName $ headerOrder (error "headerOrder" :: csv) respondCsvDB :: ( ToNamedRecord csv , RenderMessage UniWorX msg ) => msg -- ^ Sheet name for .xlsx -> Header -> ConduitT () csv DB () -> Handler TypedContent respondCsvDB sheetName hdr src = do cType <- expectedCsvContentType respondSourceDB cType $ src .| void (encodeCsv sheetName hdr) .| awaitForever sendChunk respondDefaultOrderedCsvDB :: forall csv msg. ( ToNamedRecord csv , DefaultOrdered csv , RenderMessage UniWorX msg ) => msg -- ^ Sheet name for .xlsx -> ConduitT () csv DB () -> Handler TypedContent respondDefaultOrderedCsvDB sheetName = respondCsvDB sheetName $ headerOrder (error "headerOrder" :: csv) fileSourceCsv :: ( FromNamedRecord csv , MonadThrow m , MonadHandler m , HandlerSite m ~ UniWorX ) => ConduitT FileReference csv m () fileSourceCsv = uploadContents .| decodeCsv fileSourceCsvPositional :: ( MonadHandler m , HandlerSite m ~ UniWorX , MonadThrow m , FromRecord csv ) => HasHeader -> ConduitT FileReference csv m () fileSourceCsvPositional hdr = uploadContents .| decodeCsvPositional hdr instance ToWidget UniWorX CsvRendered where toWidget CsvRendered{..} = liftWidget $(widgetFile "widgets/csvRendered") where csvData = [ [ decodeUtf8 <$> HashMap.lookup columnKey row | columnKey <- Vector.toList csvRenderedHeader ] | row <- csvRenderedData ] headers = decodeUtf8 <$> Vector.toList csvRenderedHeader csvOptionsForFormat :: ( MonadHandler m, HandlerSite m ~ UniWorX ) => CsvFormat -> m CsvOptions csvOptionsForFormat fmt = do csvOpts <- fmap (maybe def $ userCsvOptions . entityVal) maybeAuth return $ if | fmt == csvOpts ^. _csvFormat . _CsvFormat -> csvOpts | otherwise -> csvOpts & _csvFormat .~ (csvPreset . _CsvFormatPreset # fmt)