-- SPDX-FileCopyrightText: 2023 David Mosbach -- -- SPDX-License-Identifier: AGPL-3.0-or-later {-# Language DuplicateRecordFields, NoFieldSelectors, OverloadedRecordDot #-} module YamlParser where import Prelude hiding (lookup) import qualified Prelude as P import Control.Monad.State.Lazy import Data.Map.Lazy (Map, insert, lookup, empty, fromList, toList, (!)) import Data.Text.Encoding (encodeUtf8, decodeUtf8) import Data.Text.Lazy (toStrict) import Debug.Trace (trace) import Data.Maybe (fromJust, isNothing, isJust, fromMaybe) import Data.Text (pack, unpack, Text) import Data.YAML (decode1Strict, Node, Pos, Parser, parseEither) import Data.YAML.Event hiding (Scalar) import qualified Data.YAML.Event as Y import qualified Data.ByteString.Char8 as BS import qualified Data.ByteString.Lazy as BS.L activateTrace = False showTrace :: EvPos -> a -> a showTrace event action = if activateTrace then trace (show (eEvent event) ++ " @" ++ show (posLine $ ePos event)) action else action data ParseState = PState { rootNodes :: [YAMLNode], anchors :: Map Text YAMLNode, comments :: [Comment] -- YAML comment queue for the next node. } data AnchorData = NoAnchor | AnchorDef Text | AnchorAlias Text deriving (Show, Eq, Ord) data MergeData = MergeData {keys :: [Text], anchor :: AnchorData} deriving (Show, Eq, Ord) data YAMLNode = Scalar { bytes :: BS.ByteString, {-tag :: Tag,-} {-style :: Style,-} comment :: [Comment], -- TODO every node preceded by a scalar preceded by a comment stores said comment anchorData :: AnchorData, pos :: Pos } | Mapping { content :: [(YAMLNode, YAMLNode)], comment :: [Comment], anchorData :: AnchorData, mergeData :: [MergeData], -- keys of the maps merged into this mapping by "<<" pos :: Pos } | Sequence { children :: [YAMLNode], comment :: [Comment], anchorData :: AnchorData, pos :: Pos } deriving (Show, Eq) instance Ord YAMLNode where (Scalar b1 _ _ _) <= (Scalar b2 _ _ _) = b1 <= b2 _ <= _ = undefined type Comment = Text parse :: EvStream -> State ParseState YAMLNode parse ((Right (EvPos (DocumentEnd _) pos)):_) = get >>= \pState -> return $ if length pState.rootNodes == 1 then head pState.rootNodes else Sequence pState.rootNodes [] NoAnchor pos parse [] = get >>= \pState -> return $ if length pState.rootNodes == 1 then head pState.rootNodes else Sequence pState.rootNodes [] NoAnchor undefined parse ((Right (EvPos StreamStart _)):es) = parseComment es >>= parse parse ((Right (EvPos (DocumentStart _) _)):es) = parse es parse es = do (root, es') <- parseNode es pState <- get when (isJust root) . put $ pState {rootNodes = fromJust root : pState.rootNodes} parse es' parseComment :: EvStream -> State ParseState EvStream parseComment ((Right (EvPos (Y.Comment comment) _)):es) = do pState <- get put $ pState {comments = comment : pState.comments} parseComment es parseComment es = return es parseNode :: EvStream -> State ParseState (Maybe YAMLNode, EvStream) parseNode [] = trace "Unexpected eof" $ return (Nothing, []) parseNode ((Left (p,s)):es) = trace ("Failed to parse: " ++ show s ++ " @ line " ++ show p.posLine) $ parseNode es parseNode es@((Right (EvPos event pos)):es') = do pState <- get showTrace (EvPos event pos) $ case event of Y.Comment _ -> parseComment es >>= parseNode Y.SequenceStart anchor _ _ -> parseSequence es' anchor [] >>= \(seq, es'') -> return (Just seq, es'') Y.MappingStart anchor _ _ -> parseMapping es' anchor [] [] >>= \(map, es'') -> return (Just map, es'') Y.Scalar anchor _ _ text -> parseScalar anchor text pos >>= \scal -> return (Just scal, es') Y.Alias anchor -> parseAlias anchor >>= \a -> return (Just a, es') _ -> return (Nothing, es) -- error $ "Unexpected event: " ++ show event ++ " @" ++ show (posLine pos) parseSequence :: EvStream -> Maybe Anchor -> [YAMLNode] -> State ParseState (YAMLNode, EvStream) parseSequence ((Right (EvPos SequenceEnd pos)):es) anchor children = showTrace (EvPos SequenceEnd pos) $ do pState <- get let anchorData = maybe NoAnchor AnchorDef anchor let seq = Sequence (reverse children) [] anchorData pos let anchors = if isNothing anchor then pState.anchors else insert (fromJust anchor) seq pState.anchors put $ pState {anchors = anchors} return (seq, es) parseSequence es anchor children = do (child, es') <- parseNode es case child of Nothing -> parseSequence es' anchor children Just c -> parseSequence es' anchor (c : children) parseMapping :: EvStream -> Maybe Anchor -> [(YAMLNode, YAMLNode)] -> [MergeData] -> State ParseState (YAMLNode, EvStream) parseMapping ((Right (EvPos MappingEnd pos)):es) anchor content mergeData = showTrace (EvPos MappingEnd pos) $ do pState <- get let anchorData = maybe NoAnchor AnchorDef anchor let map = Mapping (reverse content) [] anchorData mergeData pos let anchors = if isNothing anchor then pState.anchors else insert (fromJust anchor) map pState.anchors put $ pState {anchors = anchors} return (map, es) parseMapping es anchor content mergeData = do (maybeKey, es') <- parseNode es case maybeKey of Nothing -> parseMapping es' anchor content mergeData Just key -> do unless (isScalar key) . error $ "Key not a scalar: " ++ show key (maybeVal, es'') <- parseNode es' let val = fromJust maybeVal if isMerge key then do let (content', mergeKeys) = mergeMappings [] content val let mergeData' = (MergeData mergeKeys key.anchorData) : mergeData parseMapping es'' anchor content' mergeData' else do let content' = (key {comment = []}, val {comment = key.comment}) : content -- migrate comment to val to preserve it for the workflow data structure. alternative. don't use Data.Map for e.g. nodes and stages but a custom type and transfer it later. parseMapping es'' anchor content' mergeData where isScalar :: YAMLNode -> Bool isScalar (Scalar {}) = True isScalar _ = False isMapping :: YAMLNode -> Bool isMapping (Mapping {}) = True isMapping _ = False isSequence :: YAMLNode -> Bool isSequence (Sequence {}) = True isSequence _ = False isMerge :: YAMLNode -> Bool isMerge (Scalar b _ _ _) = unpack (decodeUtf8 b) == "<<" mergeMappings :: [Text] -> [(YAMLNode, YAMLNode)] -> YAMLNode -> ([(YAMLNode, YAMLNode)], [Text]) mergeMappings mergeKeys content (Mapping [] _ _ _ _) = (content, mergeKeys) mergeMappings mergeKeys content m@(Mapping (x@(key, _):xs) _ _ _ _) | isJust $ P.lookup key content = mergeMappings mergeKeys content m {content = xs} | otherwise = mergeMappings ((decodeUtf8 key.bytes) : mergeKeys) (x : content) m {content = xs} mergeMappings mergeKeys content (Sequence [] _ _ _) = (content, mergeKeys) mergeMappings mergeKeys content s@(Sequence (m@(Mapping {}):xs) _ _ _) = mergeMappings mergeKeys' content' s {children = xs} where (content', mergeKeys') = mergeMappings mergeKeys content m parseScalar :: Maybe Anchor -> Text -> Pos -> State ParseState YAMLNode parseScalar anchor text pos = do pState <- get let comments = pState.comments let anchorData = maybe NoAnchor AnchorDef anchor let scal = Scalar (encodeUtf8 text) comments anchorData pos let anchors = if isNothing anchor then pState.anchors else insert (fromJust anchor) scal pState.anchors put $ pState {anchors = anchors, comments = []} return scal parseAlias :: Anchor -> State ParseState YAMLNode parseAlias anchor = do pState <- get case lookup anchor pState.anchors of Nothing -> error $ "Anchor '" ++ show anchor ++ "' not defined" Just node -> return node {anchorData = AnchorAlias anchor} class FromYAML' a where fromYAML :: YAMLNode -> Parser a instance FromYAML' a => FromYAML' (Maybe a) where fromYAML y@(Scalar bs _ _ _) | decodeUtf8 bs == pack "null" = pure Nothing | otherwise = Just <$> fromYAML y fromYAML y = Just <$> fromYAML y instance (Ord k, FromYAML' k, FromYAML' v) => FromYAML' (Map k v) where fromYAML (Mapping c _ _ _ _) = fromList <$> mapM (\(a,b) -> (,) <$> fromYAML a <*> fromYAML b) c instance FromYAML' Text where fromYAML (Scalar bs _ _ _) = pure $ decodeUtf8 bs instance FromYAML' YAMLNode where fromYAML = pure instance FromYAML' v => FromYAML' [v] where fromYAML (Sequence c _ _ _) = mapM fromYAML c instance (FromYAML' a, FromYAML' b) => FromYAML' (a,b) where fromYAML (Sequence [n1, n2] _ _ _) = (,) <$> fromYAML n1 <*> fromYAML n2 decodeWithComments1 :: FromYAML' v => BS.L.ByteString -> Either (Pos, String) v decodeWithComments1 input = do let events = parseEvents input -- let mainEvents = validHead events -- unless (isJust mainEvents) . error $ "Missing DocumentStart event" let initState = PState [] empty [] let content = evalState (parse events) initState parseEither . fromYAML $ content where validHead :: EvStream -> Maybe EvStream validHead ((Right (EvPos StreamStart _)):(Right (EvPos (DocumentStart _) _)):es) = Just es validHead _ = Nothing decodeWithComments1Strict :: FromYAML' v => BS.ByteString -> Either (Pos, String) v decodeWithComments1Strict = decodeWithComments1 . BS.L.fromChunks . (:[]) (<|) :: FromYAML' a => [(YAMLNode, YAMLNode)] -> Text -> Parser a mapping <| key = maybe (fail $ "key " ++ show key ++ " not found") fromYAML (P.lookup key $ prep mapping) where prep :: [(YAMLNode, YAMLNode)] -> [(Text, YAMLNode)] prep mapping = reverse [(decodeUtf8 scalar.bytes, val) | (scalar, val) <- mapping] (<|?) :: FromYAML' a => [(YAMLNode, YAMLNode)] -> Text -> Parser (Maybe a) mapping <|? key = maybe (pure Nothing) fromYAML (P.lookup key $ prep mapping) where prep :: [(YAMLNode, YAMLNode)] -> [(Text, YAMLNode)] prep mapping = reverse [(decodeUtf8 scalar.bytes, val) | (scalar, val) <- mapping]