yesod-test: Improve CSS selector parser.
* Tighten up what the parser will accept (especially wrt identifiers). * Write the parser in a more idiomatic (for *parsec) style.
This commit is contained in:
parent
50f57a3586
commit
581a688cf5
@ -9,7 +9,10 @@ module Yesod.Test.CssQuery
|
||||
import Prelude hiding (takeWhile)
|
||||
import Data.Text (Text)
|
||||
import Data.Attoparsec.Text
|
||||
import Control.Applicative (many, (<|>), optional)
|
||||
import Control.Applicative
|
||||
import Data.Char
|
||||
|
||||
import qualified Data.Text as T
|
||||
|
||||
data SelectorGroup
|
||||
= DirectChildren [Selector]
|
||||
@ -27,6 +30,13 @@ data Selector
|
||||
| ByAttrEnds Text Text
|
||||
deriving (Show, Eq)
|
||||
|
||||
|
||||
-- The official syntax specification for CSS2 can be found here:
|
||||
-- http://www.w3.org/TR/CSS2/syndata.html
|
||||
-- but that spec is tricky to fully support. Instead we do the minimal and we
|
||||
-- can extend it as needed.
|
||||
|
||||
|
||||
-- | Parses a query into an intermediate format which is easy to feed to HXT
|
||||
--
|
||||
-- * The top-level lists represent the top level comma separated queries.
|
||||
@ -41,66 +51,54 @@ parseQuery = parseOnly cssQuery
|
||||
|
||||
-- Below this line is the Parsec parser for css queries.
|
||||
cssQuery :: Parser [[SelectorGroup]]
|
||||
cssQuery = sepBy rules (char ',' >> (optional (char ' ')))
|
||||
cssQuery = sepBy rules (char ',' >> optional (char ' '))
|
||||
|
||||
rules :: Parser [SelectorGroup]
|
||||
rules = many $ directChildren <|> deepChildren
|
||||
|
||||
directChildren :: Parser SelectorGroup
|
||||
directChildren = do
|
||||
_ <- char '>'
|
||||
_ <- char ' '
|
||||
sels <- selectors
|
||||
_ <- optional $ char ' '
|
||||
return $ DirectChildren sels
|
||||
directChildren = string "> " >> DirectChildren <$> parseSelectors
|
||||
|
||||
deepChildren :: Parser SelectorGroup
|
||||
deepChildren = do
|
||||
sels <- selectors
|
||||
_ <- optional $ char ' '
|
||||
return $ DeepChildren sels
|
||||
|
||||
selectors :: Parser [Selector]
|
||||
selectors = many1 $ parseId
|
||||
<|> parseClass
|
||||
<|> parseTag
|
||||
<|> parseAttr
|
||||
deepChildren = DeepChildren <$> parseSelectors
|
||||
|
||||
parseSelectors :: Parser [Selector]
|
||||
parseSelectors = pOptionalTrailingSpace . many1 $
|
||||
parseId <|> parseClass <|> parseTag <|> parseAttr
|
||||
|
||||
parseId :: Parser Selector
|
||||
parseId = do
|
||||
_ <- char '#'
|
||||
x <- takeWhile $ flip notElem ",#.[ >"
|
||||
return $ ById x
|
||||
parseId = char '#' >> ById <$> pIdent
|
||||
|
||||
parseClass :: Parser Selector
|
||||
parseClass = do
|
||||
_ <- char '.'
|
||||
x <- takeWhile $ flip notElem ",#.[ >"
|
||||
return $ ByClass x
|
||||
parseClass = char '.' >> ByClass <$> pIdent
|
||||
|
||||
parseTag :: Parser Selector
|
||||
parseTag = do
|
||||
x <- takeWhile1 $ flip notElem ",#.[ >"
|
||||
return $ ByTagName x
|
||||
parseTag = ByTagName <$> pIdent
|
||||
|
||||
parseAttr :: Parser Selector
|
||||
parseAttr = do
|
||||
_ <- char '['
|
||||
name <- takeWhile $ flip notElem ",#.=$^*]"
|
||||
(parseAttrExists name)
|
||||
<|> (parseAttrWith "=" ByAttrEquals name)
|
||||
<|> (parseAttrWith "*=" ByAttrContains name)
|
||||
<|> (parseAttrWith "^=" ByAttrStarts name)
|
||||
<|> (parseAttrWith "$=" ByAttrEnds name)
|
||||
parseAttr = pSquare $ choice
|
||||
[ ByAttrEquals <$> pIdent <*> (string "=" *> pAttrValue)
|
||||
, ByAttrContains <$> pIdent <*> (string "*=" *> pAttrValue)
|
||||
, ByAttrStarts <$> pIdent <*> (string "^=" *> pAttrValue)
|
||||
, ByAttrEnds <$> pIdent <*> (string "$=" *> pAttrValue)
|
||||
, ByAttrExists <$> pIdent
|
||||
]
|
||||
|
||||
parseAttrExists :: Text -> Parser Selector
|
||||
parseAttrExists attrname = do
|
||||
_ <- char ']'
|
||||
return $ ByAttrExists attrname
|
||||
-- | pIdent : Parse an identifier (not yet supporting escapes and unicode as
|
||||
-- part of the identifier). Basically the regex: [-]?[_a-zA-Z][_a-zA-Z0-9]*
|
||||
pIdent :: Parser Text
|
||||
pIdent = pOptionalTrailingSpace $ do
|
||||
leadingMinus <- string "-" <|> pure ""
|
||||
nmstart <- T.singleton <$> satisfy (\c -> isAlpha c || c == '_')
|
||||
nmchar <- takeWhile (\c -> isAlphaNum c || c == '_')
|
||||
return $ T.concat [ leadingMinus, nmstart, nmchar ]
|
||||
|
||||
parseAttrWith :: Text -> (Text -> Text -> Selector) -> Text -> Parser Selector
|
||||
parseAttrWith sign constructor name = do
|
||||
_ <- string sign
|
||||
value <- takeWhile $ flip notElem ",#.]"
|
||||
_ <- char ']'
|
||||
return $ constructor name value
|
||||
|
||||
pAttrValue :: Parser Text
|
||||
pAttrValue = takeWhile (/= ']')
|
||||
|
||||
pSquare :: Parser a -> Parser a
|
||||
pSquare p = char '[' *> p <* char ']'
|
||||
|
||||
pOptionalTrailingSpace :: Parser a -> Parser a
|
||||
pOptionalTrailingSpace p = p <* optional (char ' ')
|
||||
|
||||
Loading…
Reference in New Issue
Block a user