fradrive/src/Utils/Csv.hs

158 lines
5.0 KiB
Haskell

-- SPDX-FileCopyrightText: 2022 Gregor Kleen <gregor.kleen@ifi.lmu.de>
--
-- SPDX-License-Identifier: AGPL-3.0-or-later
{-# OPTIONS -fno-warn-orphans #-}
module Utils.Csv
( typeCsv, typeCsv', extensionCsv
, typeXlsx, extensionXlsx
, pathPieceCsv
, (.:??)
, lsfHeaderTranslate
, CsvRendered(..)
, toCsvRendered
, toDefaultOrderedCsvRendered
, csvRenderedToXlsx, Xlsx, Xlsx.fromXlsx
, CsvSemicolonList(..)
) where
import ClassyPrelude hiding (lookup)
import Settings.Mime
import Data.Csv hiding (Name)
import Data.Csv.Conduit (CsvParseError)
import Language.Haskell.TH (Name)
import Language.Haskell.TH.Lib
import Yesod.Core.Content
import qualified Data.Map as Map
import qualified Data.HashMap.Strict as HashMap
import Codec.Xlsx (Xlsx)
import qualified Codec.Xlsx as Xlsx
import Data.Monoid (Endo(..))
import Control.Lens
import Data.Default
import qualified Data.Text as Text
import qualified Data.Text.Encoding as Text
import qualified Data.CaseInsensitive as CI
import qualified Data.Binary.Builder as Builder
import qualified Data.ByteString.Lazy as LBS
import qualified Data.Attoparsec.ByteString as Attoparsec
import qualified Data.Csv.Parser as Csv
import qualified Data.Csv.Builder as Csv
import qualified Data.Vector as Vector
import Data.Char (ord)
import Control.Monad.Fail
instance Exception CsvParseError
typeCsv, typeCsv' :: ContentType
typeCsv = simpleContentType typeCsv'
typeCsv' = "text/csv; header=present"
typeXlsx :: ContentType
typeXlsx = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
extensionCsv, extensionXlsx :: Extension
extensionCsv = fromMaybe "csv" $ listToMaybe [ ext | (ext, mime) <- Map.toList mimeMap, mime == typeCsv ]
extensionXlsx = fromMaybe "xlsx" $ listToMaybe [ ext | (ext, mime) <- Map.toList mimeMap, mime == typeXlsx ]
pathPieceCsv :: Name -> DecsQ
pathPieceCsv (conT -> t) =
[d|
instance ToField $(t) where
toField = toField . toPathPiece
instance FromField $(t) where
parseField = maybe (fail "Could not unmarshal from PathPiece") return . fromPathPiece <=< parseField
|]
(.:??) :: FromField (Maybe a) => NamedRecord -> ByteString -> Parser (Maybe a)
m .:?? name = lookup m name <|> return Nothing
lsfHeaderTranslate :: NamedRecord -> NamedRecord
lsfHeaderTranslate = HashMap.fromList . over (traverse . _1) lsfHeaderTranslate' . HashMap.toList
where
lsfHeaderTranslate' k = case CI.mk . Text.strip <$> Text.decodeUtf8' k of
Right k'
| k' == "mtknr" -> "matriculation"
| k' == "nachname" -> "surname"
| k' == "vorname" -> "first-name"
| k' == "bewertung" -> "exam-result"
_other -> k
data CsvRendered = CsvRendered
{ csvRenderedHeader :: Header
, csvRenderedData :: [NamedRecord]
} deriving (Eq, Read, Show, Generic)
toCsvRendered :: forall mono.
( ToNamedRecord (Element mono)
, MonoFoldable mono
)
=> Header
-> mono -> CsvRendered
toCsvRendered csvRenderedHeader (otoList -> csvs) = CsvRendered{..}
where
csvRenderedData = map toNamedRecord csvs
toDefaultOrderedCsvRendered :: forall mono.
( ToNamedRecord (Element mono)
, DefaultOrdered (Element mono)
, MonoFoldable mono
)
=> mono -> CsvRendered
toDefaultOrderedCsvRendered = toCsvRendered $ headerOrder (error "headerOrder" :: Element mono)
csvRenderedToXlsx :: Text -- ^ Name of worksheet
-> CsvRendered -> Xlsx
csvRenderedToXlsx sheetName CsvRendered{..} = def & Xlsx.atSheet sheetName ?~ (def & appEndo (addHeader <> addValues))
where
addHeader = flip foldMap (zip [1..] $ toList csvRenderedHeader) $ \(c, bs) -> Endo $ Xlsx.cellValueAtRC (1, c) ?~ Xlsx.CellText (decodeUtf8 bs)
addValues = flip foldMap (zip [2..] csvRenderedData) $ \(r, nr) -> flip foldMap (zip [1..] $ toList csvRenderedHeader) $ \(c, hBS) -> case HashMap.lookup hBS nr of
Nothing -> mempty
Just vBS -> Endo $ Xlsx.cellValueAtRC (r, c) ?~ Xlsx.CellText (decodeUtf8 vBS)
newtype CsvSemicolonList a = CsvSemicolonList { unCsvSemicolonList :: [a] }
deriving stock (Read, Show, Generic)
deriving newtype (Eq, Ord)
instance ToField a => ToField (CsvSemicolonList a) where
toField (CsvSemicolonList xs) = dropEnd 2 . LBS.toStrict . Builder.toLazyByteString $ Csv.encodeRecordWith encOpts fs
where
fs = map toField xs
encOpts = defaultEncodeOptions
{ encDelimiter = fromIntegral $ ord ';'
, encQuoting = case fs of
[fStr] | null fStr -> QuoteAll
_other -> QuoteMinimal
, encUseCrLf = True
}
instance FromField a => FromField (CsvSemicolonList a) where
parseField f
| null f = pure $ CsvSemicolonList []
| otherwise = fmap CsvSemicolonList . mapM parseField . Vector.toList <=< either fail return $ Attoparsec.parseOnly (Csv.record sep) f
where
sep = fromIntegral $ ord ';'