fradrive/src/Foundation/I18n/TH.hs

395 lines
20 KiB
Haskell

-- SPDX-FileCopyrightText: 2022 Gregor Kleen <gregor.kleen@ifi.lmu.de>
--
-- SPDX-License-Identifier: AGPL-3.0-or-later
{-# LANGUAGE UndecidableInstances #-}
module Foundation.I18n.TH
( mkMessage, mkMessageFor, mkMessageVariant, mkMessageAddition
) where
import Import.NoFoundation
import qualified Language.Haskell.TH.Lib as TH
import qualified Language.Haskell.TH.Syntax as TH
import qualified System.Directory.Tree as DirTree
import Data.HashMap.Strict.InsOrd (InsOrdHashMap)
import qualified Data.HashMap.Strict.InsOrd as InsOrdHashMap
import Data.HashSet.InsOrd (InsOrdHashSet)
import qualified Data.HashSet.InsOrd as InsOrdHashSet
import qualified Data.Foldable as F
import qualified Data.HashMap.Strict as HashMap
import qualified Data.HashSet as HashSet
import qualified Data.Map.Strict as Map
import qualified Data.Set as Set
import qualified Text.Parsec as P
import qualified Text.Parsec.Text.Lazy as P
import qualified Language.Haskell.Meta.Parse as Meta
import qualified Data.List.NonEmpty as NonEmpty
import qualified Data.Text as Text
import qualified Data.Char as Char
import Utils.TH.AlphaConversion (alphaConvE)
newtype MsgFile f g = MsgFile
{ msgFileContent :: InsOrdHashMap String (f (MsgDef f g))
} deriving (Generic)
deriving stock instance Eq (f (MsgDef f g)) => Eq (MsgFile f g)
deriving stock instance Show (f (MsgDef f g)) => Show (MsgFile f g)
instance Semigroup (f (MsgDef f g)) => Monoid (MsgFile f g) where
mempty = MsgFile InsOrdHashMap.empty
instance Semigroup (f (MsgDef f g)) => Semigroup (MsgFile f g) where
MsgFile a <> MsgFile b = MsgFile $ InsOrdHashMap.unionWith (<>) a b
data MsgDef f g = MsgDef
{ msgDefVars :: InsOrdHashMap String (f (g TH.Type))
, msgDefContent :: [MsgDefContent]
, msgDefAnnotations :: Set MsgDefAnnotation
} deriving (Generic)
deriving stock instance Eq (f (g TH.Type)) => Eq (MsgDef f g)
deriving stock instance Show (f (g TH.Type)) => Show (MsgDef f g)
data MsgDefContent = MsgDefContentLiteral String
| MsgDefContentSplice Bool {- Recurse? -} TH.Exp
deriving (Eq, Ord, Show, Generic)
data MsgDefAnnotation = MsgDefIdenticalOk
| MsgDefEmptyOk
deriving (Eq, Ord, Read, Show, Generic)
makePrisms ''MsgDefContent
makePrisms ''MsgDefAnnotation
makeLenses_ ''MsgDef
_msgDefAnnotation :: MsgDefAnnotation -> Lens' (MsgDef f g) Bool
_msgDefAnnotation ann = _msgDefAnnotations . at ann . _Maybe
_msgDefIdenticalOk :: Lens' (MsgDef f g) Bool
_msgDefIdenticalOk = _msgDefAnnotation MsgDefIdenticalOk
_msgDefEmptyOk :: Lens' (MsgDef f g) Bool
_msgDefEmptyOk = _msgDefAnnotation MsgDefEmptyOk
disambiguateMsgFile :: MsgFile NonEmpty Maybe -> Either (InsOrdHashSet String, InsOrdHashMap String (InsOrdHashSet String)) (MsgFile Identity Maybe)
disambiguateMsgFile MsgFile{..}
| not (InsOrdHashSet.null duplicateDefs) || not (InsOrdHashMap.null duplicateVars)
= Left (duplicateDefs, duplicateVars)
| otherwise
= Right $ MsgFile{ msgFileContent = fmap msgDefToSingletons <$> toSingletons msgFileContent, .. }
where
toDuplicates :: forall k v. (Eq k, Hashable k) => InsOrdHashMap k (NonEmpty v) -> InsOrdHashSet k
toDuplicates = InsOrdHashSet.fromList . InsOrdHashMap.keys . InsOrdHashMap.filter (minLength 2)
duplicateDefs = toDuplicates msgFileContent
duplicateVars = InsOrdHashMap.mapMaybe (assertM' (not . InsOrdHashSet.null) . toDuplicates . msgDefVars . NonEmpty.head) msgFileContent
toSingletons :: forall k v. InsOrdHashMap k (NonEmpty v) -> InsOrdHashMap k (Identity v)
toSingletons = InsOrdHashMap.map $ \case
x NonEmpty.:| [] -> Identity x
xs -> error $ "toSingletons: Expected length 1, but got: " <> show (NonEmpty.length xs)
msgDefToSingletons :: MsgDef NonEmpty Maybe -> MsgDef Identity Maybe
msgDefToSingletons MsgDef{..} = MsgDef
{ msgDefVars = toSingletons msgDefVars
, ..
}
ensureTypesMsgFile :: MsgFile Identity Maybe -> Either (InsOrdHashMap String (InsOrdHashSet String)) (MsgFile Identity Identity)
ensureTypesMsgFile MsgFile{..}
| not $ InsOrdHashMap.null untyped
= Left untyped
| otherwise
= Right $ MsgFile{ msgFileContent = over _Wrapped msgDefToTyped <$> msgFileContent, .. }
where
untyped = InsOrdHashMap.mapMaybe (assertM' (not . InsOrdHashSet.null) . InsOrdHashSet.fromList . InsOrdHashMap.keys . InsOrdHashMap.filter (is _Nothing . runIdentity) . msgDefVars . runIdentity) msgFileContent
msgDefToTyped MsgDef{..} = MsgDef
{ msgDefVars = flip InsOrdHashMap.map msgDefVars $ \case
Identity (Just x) -> Identity $ Identity x
_other -> error "msgDefToTyped got Nothing"
, ..
}
accumInsOrdHashMap :: (Foldable f, Eq k, Hashable k)
=> f (k, v)
-> InsOrdHashMap k (NonEmpty v)
accumInsOrdHashMap = F.foldl' (\acc (k, v) -> InsOrdHashMap.insertWith (<>) k (pure v) acc) InsOrdHashMap.empty
unionsInsOrdHashMap :: (Foldable f, Eq k, Hashable k)
=> f (InsOrdHashMap k (NonEmpty v))
-> InsOrdHashMap k (NonEmpty v)
unionsInsOrdHashMap = F.foldl' (InsOrdHashMap.unionWith (<>)) InsOrdHashMap.empty
insOrdHashMapKeysSet :: (Eq k, Hashable k) => InsOrdHashMap k v -> InsOrdHashSet k
insOrdHashMapKeysSet = InsOrdHashSet.fromList . map (view _1) . InsOrdHashMap.toList
mkMessage :: TH.Name -- ^ Foundation type
-> FilePath -- ^ Base directory of translation files
-> Lang -- ^ Default translation language
-> TH.DecsQ
mkMessage dt = mkMessageCommon True "Msg" dt . TH.mkName $ TH.nameBase dt <> "Message"
mkMessageFor :: TH.Name -- ^ Foundation type
-> TH.Name -- ^ Existing type to add translations for
-> FilePath -- ^ Base directory of translation files
-> Lang -- ^ Default translation language
-> TH.DecsQ
mkMessageFor = mkMessageCommon False ""
mkMessageVariant :: TH.Name -- ^ Foundation type
-> TH.Name -- ^ Existing type to add translations for
-> FilePath -- ^ Base directory of translation files
-> Lang -- ^ Default translation language
-> TH.DecsQ
mkMessageVariant = mkMessageCommon False "Msg"
mkMessageAddition :: TH.Name -- ^ Foundation type
-> String -- ^ Qualifier to insert into name of message type
-> FilePath -- ^ Base directory of translation files
-> Lang -- ^ Default translation language
-> TH.DecsQ
mkMessageAddition master qual = mkMessageCommon True "Msg" master . TH.mkName $ TH.nameBase master <> qual <> "Message"
mkMessageCommon :: Bool -- ^ Generate new datatype
-> String -- ^ String to prepend to constructor names
-> TH.Name -- ^ Name of master datatype
-> TH.Name -- ^ Name of translation datatype
-> FilePath -- ^ Base directory of translation files
-> Lang -- ^ Default translation language
-> TH.DecsQ
mkMessageCommon genType prefix master datName folder defLang = do
files <- fmap DirTree.zipPaths . liftIO $ DirTree.readDirectoryWith (runExceptT . parseMsgFile) folder
forMOf_ (folded . _1) files TH.addDependentFile
let (errors, successes) = flip (foldMapOf $ folded . _2) files $ \case
Left err -> ([err], mempty)
Right (lang, x) -> (mempty, MergeHashMap $ HashMap.singleton lang x)
unless (null errors) . fail $ "Errors occurred while parsing message files:\n" <> indent 2 (unlines $ map show errors)
let (ambiguous, disambiguated) = flip ifoldMap successes $ \lang x -> case disambiguateMsgFile x of
Left errs -> ([(lang, errs)], mempty)
Right x' -> (mempty, HashMap.singleton lang x')
ambiguousError (lang, (ambigDefs, ambigVars)) = "Language " <> unpack lang <> ":\n" <> unlines errs
where
errs = bool (pure $ "Duplicate message definitions:\n" <> indent 1 (unlines ambigDefsErrs)) [] (InsOrdHashSet.null ambigDefs)
++ bool (pure $ "Duplicate variable names:\n" <> indent 1 (unlines ambigVarsErrs)) [] (InsOrdHashMap.null ambigVars)
ambigDefsErrs = InsOrdHashSet.toList ambigDefs
ambigVarsErrs = map (\(defn, InsOrdHashSet.toList -> vars) -> defn <> ": " <> intercalate ", " vars) $ InsOrdHashMap.toList ambigVars
unless (null ambiguous) . fail . indent' 2 . unlines $ map ambiguousError ambiguous
defMsgFile <- case HashMap.lookup defLang disambiguated of
Nothing -> fail $ "Default language (" <> unpack defLang <> ") not found; found instead: " <> intercalate ", " (unpack <$> HashMap.keys disambiguated)
Just x -> return x
let allDefns = insOrdHashMapKeysSet $ msgFileContent defMsgFile
extraDefns = flip HashMap.mapMaybe disambiguated $ \MsgFile{..} -> assertM' (not . InsOrdHashSet.null) $ insOrdHashMapKeysSet msgFileContent `InsOrdHashSet.difference` allDefns
extraDefnsErrs = flip map (HashMap.toList extraDefns) $ \(lang, extra) -> "Language " <> unpack lang <> ":\n" <> indent 1 (intercalate ", " $ InsOrdHashSet.toList extra)
unless (null extraDefns) . fail $ "Extraneous message definitions:\n" <> indent 2 (unlines extraDefnsErrs)
let identical = filter isNonOkIdentical $ InsOrdHashSet.toList allDefns
where
isNonOkIdentical :: String -> Bool
isNonOkIdentical defnName = not identicalOk && superSingular defns && allEqual defns
where defns :: [[MsgDefContent]]
defns = mapMaybe (fmap (msgDefContent . runIdentity) . InsOrdHashMap.lookup defnName . msgFileContent) $ HashMap.elems disambiguated
superSingular :: forall a. [a] -> Bool
superSingular = \case
_ : _ : _ -> True
_other -> False
allEqual :: [[MsgDefContent]] -> Bool
allEqual [] = True
allEqual (x:xs) = all (roughEq x) xs
roughEq :: [MsgDefContent] -> [MsgDefContent] -> Bool
roughEq xs ys = not (null xs && null ys) && xs == ys
-- roughEq xs ys = fromMaybe False $ do
-- let xs' = mapMaybe (^? _MsgDefContentLiteral) xs
-- ys' = mapMaybe (^? _MsgDefContentLiteral) ys
-- guard . not $ null xs' && null ys'
-- return $ xs' == ys'
identicalOk = any (maybe False (view _msgDefIdenticalOk . runIdentity) . InsOrdHashMap.lookup defnName . msgFileContent) $ HashMap.elems disambiguated
unless (null identical) . TH.reportWarning . indent' 2 $ "Message definitions identical for all languages:\n" <> indent 1 (intercalate ", " identical)
let empties = InsOrdHashMap.fromList . mapMaybe (\defn -> fmap (defn, ) . assertM' (not . HashSet.null) $ nonOkEmpties defn) $ InsOrdHashSet.toList allDefns
where
nonOkEmpties :: String -> HashSet Lang
nonOkEmpties defnName | emptyOk = HashSet.empty
| otherwise = HashMap.keysSet $ HashMap.filter (maybe False (isEmpty . msgDefContent . runIdentity) . InsOrdHashMap.lookup defnName . msgFileContent) disambiguated
where isEmpty :: [MsgDefContent] -> Bool
isEmpty = (&&) <$> all (is _MsgDefContentLiteral) <*> allOf (folded . _MsgDefContentLiteral . folded) Char.isSpace
emptyOk = any (maybe False (view _msgDefEmptyOk . runIdentity) . InsOrdHashMap.lookup defnName . msgFileContent) $ HashMap.elems disambiguated
unless (InsOrdHashMap.null empties) . TH.reportWarning . indent' 2 $ "Message definitions empty for any language:\n" <> indent 1 (unlines . map (\(defn, langs) -> defn <> " (" <> intercalate ", " (map unpack $ HashSet.toList langs) <> ")") $ InsOrdHashMap.toList empties)
let defnName defn = TH.mkName $ prefix <> defn
execWriterT @_ @[TH.Dec] $ do
when genType $ do
typedDefMsgFile <- case ensureTypesMsgFile defMsgFile of
Left untypedVars -> fail $ "Default language (" <> unpack defLang <> ") contains untyped variables:\n" <> indent 2 (unlines . flip map (InsOrdHashMap.toList $ InsOrdHashSet.toList <$> untypedVars) $ \(defn, vs) -> defn <> ": " <> intercalate ", " vs)
Right x -> return x
let
datCons :: [TH.ConQ]
datCons = flip foldMap (InsOrdHashMap.toList $ msgFileContent typedDefMsgFile) $ \(defn, Identity MsgDef{ msgDefVars }) ->
pure . TH.normalC (defnName defn) . flip foldMap (InsOrdHashMap.toList msgDefVars) $ \(_, Identity (Identity varT)) ->
pure . TH.bangType (TH.bang TH.noSourceUnpackedness TH.sourceStrict) $ return varT
tellMPoint $ TH.dataD (TH.cxt []) datName [] Nothing datCons []
renderLangs <- iforM disambiguated $ \lang MsgFile{..} -> do
let missing = allDefns `InsOrdHashSet.difference` insOrdHashMapKeysSet msgFileContent
complete = InsOrdHashSet.null missing
unless complete $
lift . TH.reportWarning $ "Language " <> unpack lang <> " is not complete, missing:\n" <> indent 2 (unlines $ InsOrdHashSet.toList missing)
funName <- lift $ newUniqueName "renderLang"
tellMPoint $ TH.sigD funName [t| $(TH.conT master) -> [Lang] -> $(TH.conT datName) -> $(bool [t|Maybe Text|] [t|Text|] complete) |]
masterN <- lift $ TH.newName "_master"
langsN <- lift $ TH.newName "_langs"
let
lamExp :: TH.ExpQ
lamExp = TH.lamCaseE $ bool (++ [TH.match TH.wildP (TH.normalB [e|Nothing|]) []]) id complete matches
where matches :: [TH.MatchQ]
matches = flip map (InsOrdHashMap.toList msgFileContent) $ \(defn, Identity MsgDef{..}) -> do
varns <- flip foldMapM (InsOrdHashMap.toList msgDefVars) $ \(varn, Identity mType) -> InsOrdHashMap.singleton varn . (, mType) <$> TH.newName ("_" <> varn)
let transE :: TH.ExpQ
transE
| Just (x NonEmpty.:| xs) <- NonEmpty.nonEmpty msgDefContent = go x xs
| otherwise = [e|Text.empty|]
where
go' (MsgDefContentLiteral (pack -> t)) = TH.lift (t :: Text)
go' (MsgDefContentSplice isRec spliceE)
| isRec = [e|renderMessage $(TH.varE masterN) $(TH.varE langsN) $(return $ alphaConv spliceE)|]
| otherwise = [e|toMessage $(return $ alphaConv spliceE)|]
go c [] = go' c
go (MsgDefContentLiteral t1) (MsgDefContentLiteral t2 : cs) = go (MsgDefContentLiteral $ t1 <> t2) cs
go c1 (c2:cs) = [e|$(go' c1) `Text.append` $(go c2 cs)|]
alphaConv = alphaConvE . Map.fromList . map ((,) <$> views _1 TH.mkName <*> view (_2 . _1)) $ InsOrdHashMap.toList varns
defnP :: TH.PatQ
defnP = TH.conP (defnName defn) . map varP $ F.toList varns
where varP (varn, Nothing) = TH.varP varn
varP (varn, Just typ) = TH.sigP (varP (varn, Nothing)) $ return typ
TH.match defnP (TH.normalB $ bool [e|Just $(transE)|] transE complete) []
tellMPoint . TH.funD funName . pure $ TH.clause [TH.varP masterN, TH.varP langsN] (TH.normalB lamExp) []
tellMPoint $ TH.pragInlD funName TH.Inlinable TH.FunLike TH.AllPhases
return (complete, funName)
allRenderers <- lift $ newUniqueName "langRendereres"
tellMPoint $ TH.sigD allRenderers [t|HashMap Lang (Either ($(TH.conT master) -> [Lang] -> $(TH.conT datName) -> Maybe Text) ($(TH.conT master) -> [Lang] -> $(TH.conT datName) -> Text))|]
let allRenderers' = TH.listE . flip map (HashMap.toList renderLangs) $ \(lang, (complete, funName)) -> [e|($(TH.lift lang), $(bool [e|Left|] [e|Right|] complete) $(TH.varE funName))|]
in tellMPoint . TH.funD allRenderers . pure $ TH.clause [] (TH.normalB [e|HashMap.fromList $(allRenderers')|]) []
tellMPoint $ TH.pragInlD allRenderers TH.NoInline TH.FunLike TH.AllPhases
let defRender = views _2 TH.varE $ HashMap.findWithDefault (error "could not find default language in renderLangs") defLang renderLangs
in tellMPoint . TH.instanceD (TH.cxt []) [t|RenderMessage $(TH.conT master) $(TH.conT datName)|] . pure $
TH.funD 'renderMessage . pure $
TH.clause [] (TH.normalB [e|renderMessageDispatch $(TH.lift defLang) $(defRender) $(TH.varE allRenderers)|]) []
where
indent, indent' :: Int -> String -> String
indent n = unlines . map (replicate (4 * n) ' ' <>) . lines
indent' n = unlines . over (_tail . traverse) (replicate (4 * n) ' ' <>) . lines
parseMsgFile :: FilePath -> ExceptT P.ParseError IO (Lang, MsgFile NonEmpty Maybe)
parseMsgFile fPath = do
let msgFileLang = pack $ takeBaseName fPath
msgFileContent <- either throwE return <=< liftIO $ P.parseFromFile (pMsgFile <* P.eof) fPath
return (msgFileLang, MsgFile{..})
pMsgFile, pMsgLine :: P.Parser (InsOrdHashMap String (NonEmpty (MsgDef NonEmpty Maybe)))
pMsgFile = flip P.label ".msg file" . fmap unionsInsOrdHashMap $ pMsgLine `P.sepEndBy` P.try (P.many1 P.endOfLine)
pMsgLine = flip P.label ".msg line" $ do
spaces
P.choice
[ flip P.label "comment line" $ InsOrdHashMap.empty <$ P.char '#' <* P.manyTill P.anyChar (P.lookAhead . P.try $ void P.endOfLine <|> P.eof)
, flip P.label "empty line" $ InsOrdHashMap.empty <$ P.try P.endOfLine
, flip P.label "message line" $ do
constrBase <- (:) <$> P.upper <*> P.many (P.upper <|> P.lower <|> P.digit <|> P.char '\'')
msgDefVars <- P.option InsOrdHashMap.empty $ do
P.skipMany1 P.space
accumInsOrdHashMap <$> P.sepEndBy pMsgDefVar (P.many1 P.space)
spaces
let pAnnotations = Set.fromList <$> P.sepBy1 (P.char '!' *> pMsgDefAnnotation) spaces
msgDefAnnotations <- P.option Set.empty pAnnotations
void $ P.char ':'
spaces
msgDefContent <- P.manyTill pMsgDefContent . P.lookAhead . P.try $ void P.endOfLine <|> P.eof
return . InsOrdHashMap.singleton constrBase $ (NonEmpty.:| []) MsgDef{..}
]
pMsgDefAnnotation :: P.Parser MsgDefAnnotation
pMsgDefAnnotation = (MsgDefIdenticalOk <$ P.string "ident-ok")
<|> (MsgDefEmptyOk <$ P.string "empty-ok")
pMsgDefVar :: P.Parser (String, Maybe TH.Type)
pMsgDefVar = do
varBase <- (:) <$> P.lower <*> P.many (P.upper <|> P.lower <|> P.digit <|> P.char '\'')
varTyp <- P.optionMaybe $
P.char '@'
*> parseToSeparator Meta.parseType (fmap pure $ P.char ':' <|> P.space)
return (varBase, varTyp)
pMsgDefContent :: P.Parser MsgDefContent
pMsgDefContent = flip P.label "message definition content" $ choice
[ MsgDefContentLiteral . pure <$ P.char '\\' <*> (P.char '_' <|> P.char '#' <|> P.char '\\')
, do
isRecurse <- P.try $
(True <$ P.string "_{")
<|> (False <$ P.string "#{")
spliceExp <- parseToSeparator Meta.parseExp (pure <$> P.char '}')
void $ P.char '}'
return $ MsgDefContentSplice isRecurse spliceExp
, MsgDefContentLiteral <$> many1Till P.anyChar (P.lookAhead . P.try $ void (P.char '_') <|> void (P.char '#') <|> void P.endOfLine <|> P.eof)
]
parseToSeparator :: (String -> Either String a) -> P.Parser String -> P.Parser a
parseToSeparator innerP sepP = do
w <- many1Till P.anyChar $ P.lookAhead endOfWord
let cont mErrStr = do
endStr <- endOfWord
case endStr of
Nothing -> maybe mzero fail mErrStr
Just sepStr -> parseToSeparator (innerP . ((w <> sepStr) <>)) sepP
case innerP w of
Right res -> return res
<|> cont Nothing
Left errStr -> cont $ Just errStr
where
endOfWord = (Just <$> P.try sepP )
<|> (Nothing <$ P.try P.endOfLine)
many1Till :: P.Stream s m t => P.ParsecT s u m a -> P.ParsecT s u m end -> P.ParsecT s u m [a]
many1Till p end = (:) <$> p <*> P.manyTill p end
notLookAhead :: P.Stream s m t => P.ParsecT s u m a -> P.ParsecT s u m not -> P.ParsecT s u m a
notLookAhead p n = do
followingNot <- (True <$ P.lookAhead n) <|> pure False
bool p mzero followingNot
spaces :: P.Parser ()
spaces = P.skipMany $ notLookAhead P.space P.endOfLine
-- | Stolen from Richard Eisenberg: <https://gitlab.haskell.org/ghc/ghc/-/issues/13054>
newUniqueName :: TH.Quasi q => String -> q TH.Name
newUniqueName str = do
n <- TH.qNewName str
TH.qNewName $ show n