-- SPDX-FileCopyrightText: 2022 Gregor Kleen -- -- SPDX-License-Identifier: AGPL-3.0-or-later {-# LANGUAGE UndecidableInstances #-} module Foundation.I18n.TH ( mkMessage, mkMessageFor, mkMessageVariant, mkMessageAddition ) where import Import.NoFoundation import qualified Language.Haskell.TH.Lib as TH import qualified Language.Haskell.TH.Syntax as TH import qualified System.Directory.Tree as DirTree import Data.HashMap.Strict.InsOrd (InsOrdHashMap) import qualified Data.HashMap.Strict.InsOrd as InsOrdHashMap import Data.HashSet.InsOrd (InsOrdHashSet) import qualified Data.HashSet.InsOrd as InsOrdHashSet import qualified Data.Foldable as F import qualified Data.HashMap.Strict as HashMap import qualified Data.HashSet as HashSet import qualified Data.Map.Strict as Map import qualified Data.Set as Set import qualified Text.Parsec as P import qualified Text.Parsec.Text.Lazy as P import qualified Language.Haskell.Meta.Parse as Meta import qualified Data.List.NonEmpty as NonEmpty import qualified Data.Text as Text import qualified Data.Char as Char import Utils.TH.AlphaConversion (alphaConvE) newtype MsgFile f g = MsgFile { msgFileContent :: InsOrdHashMap String (f (MsgDef f g)) } deriving (Generic) deriving stock instance Eq (f (MsgDef f g)) => Eq (MsgFile f g) deriving stock instance Show (f (MsgDef f g)) => Show (MsgFile f g) instance Semigroup (f (MsgDef f g)) => Monoid (MsgFile f g) where mempty = MsgFile InsOrdHashMap.empty instance Semigroup (f (MsgDef f g)) => Semigroup (MsgFile f g) where MsgFile a <> MsgFile b = MsgFile $ InsOrdHashMap.unionWith (<>) a b data MsgDef f g = MsgDef { msgDefVars :: InsOrdHashMap String (f (g TH.Type)) , msgDefContent :: [MsgDefContent] , msgDefAnnotations :: Set MsgDefAnnotation } deriving (Generic) deriving stock instance Eq (f (g TH.Type)) => Eq (MsgDef f g) deriving stock instance Show (f (g TH.Type)) => Show (MsgDef f g) data MsgDefContent = MsgDefContentLiteral String | MsgDefContentSplice Bool {- Recurse? -} TH.Exp deriving (Eq, Ord, Show, Generic) data MsgDefAnnotation = MsgDefIdenticalOk | MsgDefEmptyOk deriving (Eq, Ord, Read, Show, Generic) makePrisms ''MsgDefContent makePrisms ''MsgDefAnnotation makeLenses_ ''MsgDef _msgDefAnnotation :: MsgDefAnnotation -> Lens' (MsgDef f g) Bool _msgDefAnnotation ann = _msgDefAnnotations . at ann . _Maybe _msgDefIdenticalOk :: Lens' (MsgDef f g) Bool _msgDefIdenticalOk = _msgDefAnnotation MsgDefIdenticalOk _msgDefEmptyOk :: Lens' (MsgDef f g) Bool _msgDefEmptyOk = _msgDefAnnotation MsgDefEmptyOk disambiguateMsgFile :: MsgFile NonEmpty Maybe -> Either (InsOrdHashSet String, InsOrdHashMap String (InsOrdHashSet String)) (MsgFile Identity Maybe) disambiguateMsgFile MsgFile{..} | not (InsOrdHashSet.null duplicateDefs) || not (InsOrdHashMap.null duplicateVars) = Left (duplicateDefs, duplicateVars) | otherwise = Right $ MsgFile{ msgFileContent = fmap msgDefToSingletons <$> toSingletons msgFileContent, .. } where toDuplicates :: forall k v. (Eq k, Hashable k) => InsOrdHashMap k (NonEmpty v) -> InsOrdHashSet k toDuplicates = InsOrdHashSet.fromList . InsOrdHashMap.keys . InsOrdHashMap.filter (minLength 2) duplicateDefs = toDuplicates msgFileContent duplicateVars = InsOrdHashMap.mapMaybe (assertM' (not . InsOrdHashSet.null) . toDuplicates . msgDefVars . NonEmpty.head) msgFileContent toSingletons :: forall k v. InsOrdHashMap k (NonEmpty v) -> InsOrdHashMap k (Identity v) toSingletons = InsOrdHashMap.map $ \case x NonEmpty.:| [] -> Identity x xs -> error $ "toSingletons: Expected length 1, but got: " <> show (NonEmpty.length xs) msgDefToSingletons :: MsgDef NonEmpty Maybe -> MsgDef Identity Maybe msgDefToSingletons MsgDef{..} = MsgDef { msgDefVars = toSingletons msgDefVars , .. } ensureTypesMsgFile :: MsgFile Identity Maybe -> Either (InsOrdHashMap String (InsOrdHashSet String)) (MsgFile Identity Identity) ensureTypesMsgFile MsgFile{..} | not $ InsOrdHashMap.null untyped = Left untyped | otherwise = Right $ MsgFile{ msgFileContent = over _Wrapped msgDefToTyped <$> msgFileContent, .. } where untyped = InsOrdHashMap.mapMaybe (assertM' (not . InsOrdHashSet.null) . InsOrdHashSet.fromList . InsOrdHashMap.keys . InsOrdHashMap.filter (is _Nothing . runIdentity) . msgDefVars . runIdentity) msgFileContent msgDefToTyped MsgDef{..} = MsgDef { msgDefVars = flip InsOrdHashMap.map msgDefVars $ \case Identity (Just x) -> Identity $ Identity x _other -> error "msgDefToTyped got Nothing" , .. } accumInsOrdHashMap :: (Foldable f, Eq k, Hashable k) => f (k, v) -> InsOrdHashMap k (NonEmpty v) accumInsOrdHashMap = F.foldl' (\acc (k, v) -> InsOrdHashMap.insertWith (<>) k (pure v) acc) InsOrdHashMap.empty unionsInsOrdHashMap :: (Foldable f, Eq k, Hashable k) => f (InsOrdHashMap k (NonEmpty v)) -> InsOrdHashMap k (NonEmpty v) unionsInsOrdHashMap = F.foldl' (InsOrdHashMap.unionWith (<>)) InsOrdHashMap.empty insOrdHashMapKeysSet :: (Eq k, Hashable k) => InsOrdHashMap k v -> InsOrdHashSet k insOrdHashMapKeysSet = InsOrdHashSet.fromList . map (view _1) . InsOrdHashMap.toList mkMessage :: TH.Name -- ^ Foundation type -> FilePath -- ^ Base directory of translation files -> Lang -- ^ Default translation language -> TH.DecsQ mkMessage dt = mkMessageCommon True "Msg" dt . TH.mkName $ TH.nameBase dt <> "Message" mkMessageFor :: TH.Name -- ^ Foundation type -> TH.Name -- ^ Existing type to add translations for -> FilePath -- ^ Base directory of translation files -> Lang -- ^ Default translation language -> TH.DecsQ mkMessageFor = mkMessageCommon False "" mkMessageVariant :: TH.Name -- ^ Foundation type -> TH.Name -- ^ Existing type to add translations for -> FilePath -- ^ Base directory of translation files -> Lang -- ^ Default translation language -> TH.DecsQ mkMessageVariant = mkMessageCommon False "Msg" mkMessageAddition :: TH.Name -- ^ Foundation type -> String -- ^ Qualifier to insert into name of message type -> FilePath -- ^ Base directory of translation files -> Lang -- ^ Default translation language -> TH.DecsQ mkMessageAddition master qual = mkMessageCommon True "Msg" master . TH.mkName $ TH.nameBase master <> qual <> "Message" mkMessageCommon :: Bool -- ^ Generate new datatype -> String -- ^ String to prepend to constructor names -> TH.Name -- ^ Name of master datatype -> TH.Name -- ^ Name of translation datatype -> FilePath -- ^ Base directory of translation files -> Lang -- ^ Default translation language -> TH.DecsQ mkMessageCommon genType prefix master datName folder defLang = do files <- fmap DirTree.zipPaths . liftIO $ DirTree.readDirectoryWith (runExceptT . parseMsgFile) folder forMOf_ (folded . _1) files TH.addDependentFile let (errors, successes) = flip (foldMapOf $ folded . _2) files $ \case Left err -> ([err], mempty) Right (lang, x) -> (mempty, MergeHashMap $ HashMap.singleton lang x) unless (null errors) . fail $ "Errors occurred while parsing message files:\n" <> indent 2 (unlines $ map show errors) let (ambiguous, disambiguated) = flip ifoldMap successes $ \lang x -> case disambiguateMsgFile x of Left errs -> ([(lang, errs)], mempty) Right x' -> (mempty, HashMap.singleton lang x') ambiguousError (lang, (ambigDefs, ambigVars)) = "Language " <> unpack lang <> ":\n" <> unlines errs where errs = bool (pure $ "Duplicate message definitions:\n" <> indent 1 (unlines ambigDefsErrs)) [] (InsOrdHashSet.null ambigDefs) ++ bool (pure $ "Duplicate variable names:\n" <> indent 1 (unlines ambigVarsErrs)) [] (InsOrdHashMap.null ambigVars) ambigDefsErrs = InsOrdHashSet.toList ambigDefs ambigVarsErrs = map (\(defn, InsOrdHashSet.toList -> vars) -> defn <> ": " <> intercalate ", " vars) $ InsOrdHashMap.toList ambigVars unless (null ambiguous) . fail . indent' 2 . unlines $ map ambiguousError ambiguous defMsgFile <- case HashMap.lookup defLang disambiguated of Nothing -> fail $ "Default language (" <> unpack defLang <> ") not found; found instead: " <> intercalate ", " (unpack <$> HashMap.keys disambiguated) Just x -> return x let allDefns = insOrdHashMapKeysSet $ msgFileContent defMsgFile extraDefns = flip HashMap.mapMaybe disambiguated $ \MsgFile{..} -> assertM' (not . InsOrdHashSet.null) $ insOrdHashMapKeysSet msgFileContent `InsOrdHashSet.difference` allDefns extraDefnsErrs = flip map (HashMap.toList extraDefns) $ \(lang, extra) -> "Language " <> unpack lang <> ":\n" <> indent 1 (intercalate ", " $ InsOrdHashSet.toList extra) unless (null extraDefns) . fail $ "Extraneous message definitions:\n" <> indent 2 (unlines extraDefnsErrs) let identical = filter isNonOkIdentical $ InsOrdHashSet.toList allDefns where isNonOkIdentical :: String -> Bool isNonOkIdentical defnName = not identicalOk && superSingular defns && allEqual defns where defns :: [[MsgDefContent]] defns = mapMaybe (fmap (msgDefContent . runIdentity) . InsOrdHashMap.lookup defnName . msgFileContent) $ HashMap.elems disambiguated superSingular :: forall a. [a] -> Bool superSingular = \case _ : _ : _ -> True _other -> False allEqual :: [[MsgDefContent]] -> Bool allEqual [] = True allEqual (x:xs) = all (roughEq x) xs roughEq :: [MsgDefContent] -> [MsgDefContent] -> Bool roughEq xs ys = not (null xs && null ys) && xs == ys -- roughEq xs ys = fromMaybe False $ do -- let xs' = mapMaybe (^? _MsgDefContentLiteral) xs -- ys' = mapMaybe (^? _MsgDefContentLiteral) ys -- guard . not $ null xs' && null ys' -- return $ xs' == ys' identicalOk = any (maybe False (view _msgDefIdenticalOk . runIdentity) . InsOrdHashMap.lookup defnName . msgFileContent) $ HashMap.elems disambiguated unless (null identical) . TH.reportWarning . indent' 2 $ "Message definitions identical for all languages:\n" <> indent 1 (intercalate ", " identical) let empties = InsOrdHashMap.fromList . mapMaybe (\defn -> fmap (defn, ) . assertM' (not . HashSet.null) $ nonOkEmpties defn) $ InsOrdHashSet.toList allDefns where nonOkEmpties :: String -> HashSet Lang nonOkEmpties defnName | emptyOk = HashSet.empty | otherwise = HashMap.keysSet $ HashMap.filter (maybe False (isEmpty . msgDefContent . runIdentity) . InsOrdHashMap.lookup defnName . msgFileContent) disambiguated where isEmpty :: [MsgDefContent] -> Bool isEmpty = (&&) <$> all (is _MsgDefContentLiteral) <*> allOf (folded . _MsgDefContentLiteral . folded) Char.isSpace emptyOk = any (maybe False (view _msgDefEmptyOk . runIdentity) . InsOrdHashMap.lookup defnName . msgFileContent) $ HashMap.elems disambiguated unless (InsOrdHashMap.null empties) . TH.reportWarning . indent' 2 $ "Message definitions empty for any language:\n" <> indent 1 (unlines . map (\(defn, langs) -> defn <> " (" <> intercalate ", " (map unpack $ HashSet.toList langs) <> ")") $ InsOrdHashMap.toList empties) let defnName defn = TH.mkName $ prefix <> defn execWriterT @_ @[TH.Dec] $ do when genType $ do typedDefMsgFile <- case ensureTypesMsgFile defMsgFile of Left untypedVars -> fail $ "Default language (" <> unpack defLang <> ") contains untyped variables:\n" <> indent 2 (unlines . flip map (InsOrdHashMap.toList $ InsOrdHashSet.toList <$> untypedVars) $ \(defn, vs) -> defn <> ": " <> intercalate ", " vs) Right x -> return x let datCons :: [TH.ConQ] datCons = flip foldMap (InsOrdHashMap.toList $ msgFileContent typedDefMsgFile) $ \(defn, Identity MsgDef{ msgDefVars }) -> pure . TH.normalC (defnName defn) . flip foldMap (InsOrdHashMap.toList msgDefVars) $ \(_, Identity (Identity varT)) -> pure . TH.bangType (TH.bang TH.noSourceUnpackedness TH.sourceStrict) $ return varT tellMPoint $ TH.dataD (TH.cxt []) datName [] Nothing datCons [] renderLangs <- iforM disambiguated $ \lang MsgFile{..} -> do let missing = allDefns `InsOrdHashSet.difference` insOrdHashMapKeysSet msgFileContent complete = InsOrdHashSet.null missing unless complete $ lift . TH.reportWarning $ "Language " <> unpack lang <> " is not complete, missing:\n" <> indent 2 (unlines $ InsOrdHashSet.toList missing) funName <- lift $ newUniqueName "renderLang" tellMPoint $ TH.sigD funName [t| $(TH.conT master) -> [Lang] -> $(TH.conT datName) -> $(bool [t|Maybe Text|] [t|Text|] complete) |] masterN <- lift $ TH.newName "_master" langsN <- lift $ TH.newName "_langs" let lamExp :: TH.ExpQ lamExp = TH.lamCaseE $ bool (++ [TH.match TH.wildP (TH.normalB [e|Nothing|]) []]) id complete matches where matches :: [TH.MatchQ] matches = flip map (InsOrdHashMap.toList msgFileContent) $ \(defn, Identity MsgDef{..}) -> do varns <- flip foldMapM (InsOrdHashMap.toList msgDefVars) $ \(varn, Identity mType) -> InsOrdHashMap.singleton varn . (, mType) <$> TH.newName ("_" <> varn) let transE :: TH.ExpQ transE | Just (x NonEmpty.:| xs) <- NonEmpty.nonEmpty msgDefContent = go x xs | otherwise = [e|Text.empty|] where go' (MsgDefContentLiteral (pack -> t)) = TH.lift (t :: Text) go' (MsgDefContentSplice isRec spliceE) | isRec = [e|renderMessage $(TH.varE masterN) $(TH.varE langsN) $(return $ alphaConv spliceE)|] | otherwise = [e|toMessage $(return $ alphaConv spliceE)|] go c [] = go' c go (MsgDefContentLiteral t1) (MsgDefContentLiteral t2 : cs) = go (MsgDefContentLiteral $ t1 <> t2) cs go c1 (c2:cs) = [e|$(go' c1) `Text.append` $(go c2 cs)|] alphaConv = alphaConvE . Map.fromList . map ((,) <$> views _1 TH.mkName <*> view (_2 . _1)) $ InsOrdHashMap.toList varns defnP :: TH.PatQ defnP = TH.conP (defnName defn) . map varP $ F.toList varns where varP (varn, Nothing) = TH.varP varn varP (varn, Just typ) = TH.sigP (varP (varn, Nothing)) $ return typ TH.match defnP (TH.normalB $ bool [e|Just $(transE)|] transE complete) [] tellMPoint . TH.funD funName . pure $ TH.clause [TH.varP masterN, TH.varP langsN] (TH.normalB lamExp) [] tellMPoint $ TH.pragInlD funName TH.Inlinable TH.FunLike TH.AllPhases return (complete, funName) allRenderers <- lift $ newUniqueName "langRendereres" tellMPoint $ TH.sigD allRenderers [t|HashMap Lang (Either ($(TH.conT master) -> [Lang] -> $(TH.conT datName) -> Maybe Text) ($(TH.conT master) -> [Lang] -> $(TH.conT datName) -> Text))|] let allRenderers' = TH.listE . flip map (HashMap.toList renderLangs) $ \(lang, (complete, funName)) -> [e|($(TH.lift lang), $(bool [e|Left|] [e|Right|] complete) $(TH.varE funName))|] in tellMPoint . TH.funD allRenderers . pure $ TH.clause [] (TH.normalB [e|HashMap.fromList $(allRenderers')|]) [] tellMPoint $ TH.pragInlD allRenderers TH.NoInline TH.FunLike TH.AllPhases let defRender = views _2 TH.varE $ HashMap.findWithDefault (error "could not find default language in renderLangs") defLang renderLangs in tellMPoint . TH.instanceD (TH.cxt []) [t|RenderMessage $(TH.conT master) $(TH.conT datName)|] . pure $ TH.funD 'renderMessage . pure $ TH.clause [] (TH.normalB [e|renderMessageDispatch $(TH.lift defLang) $(defRender) $(TH.varE allRenderers)|]) [] where indent, indent' :: Int -> String -> String indent n = unlines . map (replicate (4 * n) ' ' <>) . lines indent' n = unlines . over (_tail . traverse) (replicate (4 * n) ' ' <>) . lines parseMsgFile :: FilePath -> ExceptT P.ParseError IO (Lang, MsgFile NonEmpty Maybe) parseMsgFile fPath = do let msgFileLang = pack $ takeBaseName fPath msgFileContent <- either throwE return <=< liftIO $ P.parseFromFile (pMsgFile <* P.eof) fPath return (msgFileLang, MsgFile{..}) pMsgFile, pMsgLine :: P.Parser (InsOrdHashMap String (NonEmpty (MsgDef NonEmpty Maybe))) pMsgFile = flip P.label ".msg file" . fmap unionsInsOrdHashMap $ pMsgLine `P.sepEndBy` P.try (P.many1 P.endOfLine) pMsgLine = flip P.label ".msg line" $ do spaces P.choice [ flip P.label "comment line" $ InsOrdHashMap.empty <$ P.char '#' <* P.manyTill P.anyChar (P.lookAhead . P.try $ void P.endOfLine <|> P.eof) , flip P.label "empty line" $ InsOrdHashMap.empty <$ P.try P.endOfLine , flip P.label "message line" $ do constrBase <- (:) <$> P.upper <*> P.many (P.upper <|> P.lower <|> P.digit <|> P.char '\'') msgDefVars <- P.option InsOrdHashMap.empty $ do P.skipMany1 P.space accumInsOrdHashMap <$> P.sepEndBy pMsgDefVar (P.many1 P.space) spaces let pAnnotations = Set.fromList <$> P.sepBy1 (P.char '!' *> pMsgDefAnnotation) spaces msgDefAnnotations <- P.option Set.empty pAnnotations void $ P.char ':' spaces msgDefContent <- P.manyTill pMsgDefContent . P.lookAhead . P.try $ void P.endOfLine <|> P.eof return . InsOrdHashMap.singleton constrBase $ (NonEmpty.:| []) MsgDef{..} ] pMsgDefAnnotation :: P.Parser MsgDefAnnotation pMsgDefAnnotation = (MsgDefIdenticalOk <$ P.string "ident-ok") <|> (MsgDefEmptyOk <$ P.string "empty-ok") pMsgDefVar :: P.Parser (String, Maybe TH.Type) pMsgDefVar = do varBase <- (:) <$> P.lower <*> P.many (P.upper <|> P.lower <|> P.digit <|> P.char '\'') varTyp <- P.optionMaybe $ P.char '@' *> parseToSeparator Meta.parseType (fmap pure $ P.char ':' <|> P.space) return (varBase, varTyp) pMsgDefContent :: P.Parser MsgDefContent pMsgDefContent = flip P.label "message definition content" $ choice [ MsgDefContentLiteral . pure <$ P.char '\\' <*> (P.char '_' <|> P.char '#' <|> P.char '\\') , do isRecurse <- P.try $ (True <$ P.string "_{") <|> (False <$ P.string "#{") spliceExp <- parseToSeparator Meta.parseExp (pure <$> P.char '}') void $ P.char '}' return $ MsgDefContentSplice isRecurse spliceExp , MsgDefContentLiteral <$> many1Till P.anyChar (P.lookAhead . P.try $ void (P.char '_') <|> void (P.char '#') <|> void P.endOfLine <|> P.eof) ] parseToSeparator :: (String -> Either String a) -> P.Parser String -> P.Parser a parseToSeparator innerP sepP = do w <- many1Till P.anyChar $ P.lookAhead endOfWord let cont mErrStr = do endStr <- endOfWord case endStr of Nothing -> maybe mzero fail mErrStr Just sepStr -> parseToSeparator (innerP . ((w <> sepStr) <>)) sepP case innerP w of Right res -> return res <|> cont Nothing Left errStr -> cont $ Just errStr where endOfWord = (Just <$> P.try sepP ) <|> (Nothing <$ P.try P.endOfLine) many1Till :: P.Stream s m t => P.ParsecT s u m a -> P.ParsecT s u m end -> P.ParsecT s u m [a] many1Till p end = (:) <$> p <*> P.manyTill p end notLookAhead :: P.Stream s m t => P.ParsecT s u m a -> P.ParsecT s u m not -> P.ParsecT s u m a notLookAhead p n = do followingNot <- (True <$ P.lookAhead n) <|> pure False bool p mzero followingNot spaces :: P.Parser () spaces = P.skipMany $ notLookAhead P.space P.endOfLine -- | Stolen from Richard Eisenberg: newUniqueName :: TH.Quasi q => String -> q TH.Name newUniqueName str = do n <- TH.qNewName str TH.qNewName $ show n