yesod-test: Improve CSS selector parser.

* Tighten up what the parser will accept (especially wrt identifiers).
* Write the parser in a more idiomatic (for *parsec) style.
This commit is contained in:
Erik de Castro Lopo 2014-09-13 18:52:17 +10:00
parent 50f57a3586
commit 581a688cf5

View File

@ -9,7 +9,10 @@ module Yesod.Test.CssQuery
import Prelude hiding (takeWhile) import Prelude hiding (takeWhile)
import Data.Text (Text) import Data.Text (Text)
import Data.Attoparsec.Text import Data.Attoparsec.Text
import Control.Applicative (many, (<|>), optional) import Control.Applicative
import Data.Char
import qualified Data.Text as T
data SelectorGroup data SelectorGroup
= DirectChildren [Selector] = DirectChildren [Selector]
@ -27,6 +30,13 @@ data Selector
| ByAttrEnds Text Text | ByAttrEnds Text Text
deriving (Show, Eq) deriving (Show, Eq)
-- The official syntax specification for CSS2 can be found here:
-- http://www.w3.org/TR/CSS2/syndata.html
-- but that spec is tricky to fully support. Instead we do the minimal and we
-- can extend it as needed.
-- | Parses a query into an intermediate format which is easy to feed to HXT -- | Parses a query into an intermediate format which is easy to feed to HXT
-- --
-- * The top-level lists represent the top level comma separated queries. -- * The top-level lists represent the top level comma separated queries.
@ -41,66 +51,54 @@ parseQuery = parseOnly cssQuery
-- Below this line is the Parsec parser for css queries. -- Below this line is the Parsec parser for css queries.
cssQuery :: Parser [[SelectorGroup]] cssQuery :: Parser [[SelectorGroup]]
cssQuery = sepBy rules (char ',' >> (optional (char ' '))) cssQuery = sepBy rules (char ',' >> optional (char ' '))
rules :: Parser [SelectorGroup] rules :: Parser [SelectorGroup]
rules = many $ directChildren <|> deepChildren rules = many $ directChildren <|> deepChildren
directChildren :: Parser SelectorGroup directChildren :: Parser SelectorGroup
directChildren = do directChildren = string "> " >> DirectChildren <$> parseSelectors
_ <- char '>'
_ <- char ' '
sels <- selectors
_ <- optional $ char ' '
return $ DirectChildren sels
deepChildren :: Parser SelectorGroup deepChildren :: Parser SelectorGroup
deepChildren = do deepChildren = DeepChildren <$> parseSelectors
sels <- selectors
_ <- optional $ char ' '
return $ DeepChildren sels
selectors :: Parser [Selector] parseSelectors :: Parser [Selector]
selectors = many1 $ parseId parseSelectors = pOptionalTrailingSpace . many1 $
<|> parseClass parseId <|> parseClass <|> parseTag <|> parseAttr
<|> parseTag
<|> parseAttr
parseId :: Parser Selector parseId :: Parser Selector
parseId = do parseId = char '#' >> ById <$> pIdent
_ <- char '#'
x <- takeWhile $ flip notElem ",#.[ >"
return $ ById x
parseClass :: Parser Selector parseClass :: Parser Selector
parseClass = do parseClass = char '.' >> ByClass <$> pIdent
_ <- char '.'
x <- takeWhile $ flip notElem ",#.[ >"
return $ ByClass x
parseTag :: Parser Selector parseTag :: Parser Selector
parseTag = do parseTag = ByTagName <$> pIdent
x <- takeWhile1 $ flip notElem ",#.[ >"
return $ ByTagName x
parseAttr :: Parser Selector parseAttr :: Parser Selector
parseAttr = do parseAttr = pSquare $ choice
_ <- char '[' [ ByAttrEquals <$> pIdent <*> (string "=" *> pAttrValue)
name <- takeWhile $ flip notElem ",#.=$^*]" , ByAttrContains <$> pIdent <*> (string "*=" *> pAttrValue)
(parseAttrExists name) , ByAttrStarts <$> pIdent <*> (string "^=" *> pAttrValue)
<|> (parseAttrWith "=" ByAttrEquals name) , ByAttrEnds <$> pIdent <*> (string "$=" *> pAttrValue)
<|> (parseAttrWith "*=" ByAttrContains name) , ByAttrExists <$> pIdent
<|> (parseAttrWith "^=" ByAttrStarts name) ]
<|> (parseAttrWith "$=" ByAttrEnds name)
parseAttrExists :: Text -> Parser Selector -- | pIdent : Parse an identifier (not yet supporting escapes and unicode as
parseAttrExists attrname = do -- part of the identifier). Basically the regex: [-]?[_a-zA-Z][_a-zA-Z0-9]*
_ <- char ']' pIdent :: Parser Text
return $ ByAttrExists attrname pIdent = pOptionalTrailingSpace $ do
leadingMinus <- string "-" <|> pure ""
nmstart <- T.singleton <$> satisfy (\c -> isAlpha c || c == '_')
nmchar <- takeWhile (\c -> isAlphaNum c || c == '_')
return $ T.concat [ leadingMinus, nmstart, nmchar ]
parseAttrWith :: Text -> (Text -> Text -> Selector) -> Text -> Parser Selector
parseAttrWith sign constructor name = do pAttrValue :: Parser Text
_ <- string sign pAttrValue = takeWhile (/= ']')
value <- takeWhile $ flip notElem ",#.]"
_ <- char ']' pSquare :: Parser a -> Parser a
return $ constructor name value pSquare p = char '[' *> p <* char ']'
pOptionalTrailingSpace :: Parser a -> Parser a
pOptionalTrailingSpace p = p <* optional (char ' ')