yesod-test: Improve CSS selector parser.
* Tighten up what the parser will accept (especially wrt identifiers). * Write the parser in a more idiomatic (for *parsec) style.
This commit is contained in:
parent
50f57a3586
commit
581a688cf5
@ -9,7 +9,10 @@ module Yesod.Test.CssQuery
|
|||||||
import Prelude hiding (takeWhile)
|
import Prelude hiding (takeWhile)
|
||||||
import Data.Text (Text)
|
import Data.Text (Text)
|
||||||
import Data.Attoparsec.Text
|
import Data.Attoparsec.Text
|
||||||
import Control.Applicative (many, (<|>), optional)
|
import Control.Applicative
|
||||||
|
import Data.Char
|
||||||
|
|
||||||
|
import qualified Data.Text as T
|
||||||
|
|
||||||
data SelectorGroup
|
data SelectorGroup
|
||||||
= DirectChildren [Selector]
|
= DirectChildren [Selector]
|
||||||
@ -27,6 +30,13 @@ data Selector
|
|||||||
| ByAttrEnds Text Text
|
| ByAttrEnds Text Text
|
||||||
deriving (Show, Eq)
|
deriving (Show, Eq)
|
||||||
|
|
||||||
|
|
||||||
|
-- The official syntax specification for CSS2 can be found here:
|
||||||
|
-- http://www.w3.org/TR/CSS2/syndata.html
|
||||||
|
-- but that spec is tricky to fully support. Instead we do the minimal and we
|
||||||
|
-- can extend it as needed.
|
||||||
|
|
||||||
|
|
||||||
-- | Parses a query into an intermediate format which is easy to feed to HXT
|
-- | Parses a query into an intermediate format which is easy to feed to HXT
|
||||||
--
|
--
|
||||||
-- * The top-level lists represent the top level comma separated queries.
|
-- * The top-level lists represent the top level comma separated queries.
|
||||||
@ -41,66 +51,54 @@ parseQuery = parseOnly cssQuery
|
|||||||
|
|
||||||
-- Below this line is the Parsec parser for css queries.
|
-- Below this line is the Parsec parser for css queries.
|
||||||
cssQuery :: Parser [[SelectorGroup]]
|
cssQuery :: Parser [[SelectorGroup]]
|
||||||
cssQuery = sepBy rules (char ',' >> (optional (char ' ')))
|
cssQuery = sepBy rules (char ',' >> optional (char ' '))
|
||||||
|
|
||||||
rules :: Parser [SelectorGroup]
|
rules :: Parser [SelectorGroup]
|
||||||
rules = many $ directChildren <|> deepChildren
|
rules = many $ directChildren <|> deepChildren
|
||||||
|
|
||||||
directChildren :: Parser SelectorGroup
|
directChildren :: Parser SelectorGroup
|
||||||
directChildren = do
|
directChildren = string "> " >> DirectChildren <$> parseSelectors
|
||||||
_ <- char '>'
|
|
||||||
_ <- char ' '
|
|
||||||
sels <- selectors
|
|
||||||
_ <- optional $ char ' '
|
|
||||||
return $ DirectChildren sels
|
|
||||||
|
|
||||||
deepChildren :: Parser SelectorGroup
|
deepChildren :: Parser SelectorGroup
|
||||||
deepChildren = do
|
deepChildren = DeepChildren <$> parseSelectors
|
||||||
sels <- selectors
|
|
||||||
_ <- optional $ char ' '
|
parseSelectors :: Parser [Selector]
|
||||||
return $ DeepChildren sels
|
parseSelectors = pOptionalTrailingSpace . many1 $
|
||||||
|
parseId <|> parseClass <|> parseTag <|> parseAttr
|
||||||
selectors :: Parser [Selector]
|
|
||||||
selectors = many1 $ parseId
|
|
||||||
<|> parseClass
|
|
||||||
<|> parseTag
|
|
||||||
<|> parseAttr
|
|
||||||
|
|
||||||
parseId :: Parser Selector
|
parseId :: Parser Selector
|
||||||
parseId = do
|
parseId = char '#' >> ById <$> pIdent
|
||||||
_ <- char '#'
|
|
||||||
x <- takeWhile $ flip notElem ",#.[ >"
|
|
||||||
return $ ById x
|
|
||||||
|
|
||||||
parseClass :: Parser Selector
|
parseClass :: Parser Selector
|
||||||
parseClass = do
|
parseClass = char '.' >> ByClass <$> pIdent
|
||||||
_ <- char '.'
|
|
||||||
x <- takeWhile $ flip notElem ",#.[ >"
|
|
||||||
return $ ByClass x
|
|
||||||
|
|
||||||
parseTag :: Parser Selector
|
parseTag :: Parser Selector
|
||||||
parseTag = do
|
parseTag = ByTagName <$> pIdent
|
||||||
x <- takeWhile1 $ flip notElem ",#.[ >"
|
|
||||||
return $ ByTagName x
|
|
||||||
|
|
||||||
parseAttr :: Parser Selector
|
parseAttr :: Parser Selector
|
||||||
parseAttr = do
|
parseAttr = pSquare $ choice
|
||||||
_ <- char '['
|
[ ByAttrEquals <$> pIdent <*> (string "=" *> pAttrValue)
|
||||||
name <- takeWhile $ flip notElem ",#.=$^*]"
|
, ByAttrContains <$> pIdent <*> (string "*=" *> pAttrValue)
|
||||||
(parseAttrExists name)
|
, ByAttrStarts <$> pIdent <*> (string "^=" *> pAttrValue)
|
||||||
<|> (parseAttrWith "=" ByAttrEquals name)
|
, ByAttrEnds <$> pIdent <*> (string "$=" *> pAttrValue)
|
||||||
<|> (parseAttrWith "*=" ByAttrContains name)
|
, ByAttrExists <$> pIdent
|
||||||
<|> (parseAttrWith "^=" ByAttrStarts name)
|
]
|
||||||
<|> (parseAttrWith "$=" ByAttrEnds name)
|
|
||||||
|
|
||||||
parseAttrExists :: Text -> Parser Selector
|
-- | pIdent : Parse an identifier (not yet supporting escapes and unicode as
|
||||||
parseAttrExists attrname = do
|
-- part of the identifier). Basically the regex: [-]?[_a-zA-Z][_a-zA-Z0-9]*
|
||||||
_ <- char ']'
|
pIdent :: Parser Text
|
||||||
return $ ByAttrExists attrname
|
pIdent = pOptionalTrailingSpace $ do
|
||||||
|
leadingMinus <- string "-" <|> pure ""
|
||||||
|
nmstart <- T.singleton <$> satisfy (\c -> isAlpha c || c == '_')
|
||||||
|
nmchar <- takeWhile (\c -> isAlphaNum c || c == '_')
|
||||||
|
return $ T.concat [ leadingMinus, nmstart, nmchar ]
|
||||||
|
|
||||||
parseAttrWith :: Text -> (Text -> Text -> Selector) -> Text -> Parser Selector
|
|
||||||
parseAttrWith sign constructor name = do
|
pAttrValue :: Parser Text
|
||||||
_ <- string sign
|
pAttrValue = takeWhile (/= ']')
|
||||||
value <- takeWhile $ flip notElem ",#.]"
|
|
||||||
_ <- char ']'
|
pSquare :: Parser a -> Parser a
|
||||||
return $ constructor name value
|
pSquare p = char '[' *> p <* char ']'
|
||||||
|
|
||||||
|
pOptionalTrailingSpace :: Parser a -> Parser a
|
||||||
|
pOptionalTrailingSpace p = p <* optional (char ' ')
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user