85 lines
2.7 KiB
Haskell
85 lines
2.7 KiB
Haskell
{-# LANGUAGE OverloadedStrings #-}
|
|
{- |
|
|
This module uses HXT to transverse an HTML document using CSS selectors.
|
|
|
|
The most important function here is 'findBySelector', it takes a CSS query and
|
|
a string containing the HTML to look into,
|
|
and it returns a list of the HTML fragments that matched the given query.
|
|
|
|
Only a subset of the CSS spec is currently supported:
|
|
|
|
* By tag name: /table td a/
|
|
|
|
* By class names: /.container .content/
|
|
|
|
* By Id: /#oneId/
|
|
|
|
* By attribute: /[hasIt]/, /[exact=match]/, /[contains*=text]/, /[starts^=with]/, /[ends$=with]/
|
|
|
|
* Union: /a, span, p/
|
|
|
|
* Immediate children: /div > p/
|
|
|
|
* Get jiggy with it: /div[data-attr=yeah] > .mon, .foo.bar div, #oneThing/
|
|
|
|
-}
|
|
|
|
module Yesod.Test.TransversingCSS (
|
|
findBySelector,
|
|
Html,
|
|
Query,
|
|
-- * For HXT hackers
|
|
-- | These functions expose some low level details that you can blissfully ignore.
|
|
parseQuery,
|
|
runQuery,
|
|
queryToArrow,
|
|
Selector(..),
|
|
SelectorGroup(..)
|
|
|
|
)
|
|
where
|
|
|
|
import Text.XML.HXT.Core
|
|
import qualified Data.List as DL
|
|
import Yesod.Test.CssQuery
|
|
import Data.Text (unpack)
|
|
import qualified Data.Text as T
|
|
|
|
type Html = String
|
|
type Query = T.Text
|
|
|
|
-- | Perform a css 'Query' on 'Html'. Returns Either
|
|
--
|
|
-- * Left: Query parse error.
|
|
--
|
|
-- * Right: List of matching Html fragments.
|
|
findBySelector :: Html-> Query -> Either String [Html]
|
|
findBySelector html query = fmap (runQuery html) (parseQuery query)
|
|
|
|
-- Run a compiled query on Html, returning a list of matching Html fragments.
|
|
runQuery :: Html -> [[SelectorGroup]] -> [Html]
|
|
runQuery html query =
|
|
runLA (hread >>> (queryToArrow query) >>> xshow this) html
|
|
|
|
-- | Transform a compiled query into the HXT arrow that finally transverses the Html
|
|
queryToArrow :: ArrowXml a => [[SelectorGroup]] -> a XmlTree XmlTree
|
|
queryToArrow commaSeparated =
|
|
DL.foldl uniteCommaSeparated none commaSeparated
|
|
where
|
|
uniteCommaSeparated accum selectorGroups =
|
|
accum <+> (DL.foldl sequenceSelectorGroups this selectorGroups)
|
|
sequenceSelectorGroups accum (DirectChildren sels) =
|
|
accum >>> getChildren >>> (DL.foldl applySelectors this $ sels)
|
|
sequenceSelectorGroups accum (DeepChildren sels) =
|
|
accum >>> getChildren >>> multi (DL.foldl applySelectors this $ sels)
|
|
applySelectors accum selector = accum >>> (toArrow selector)
|
|
toArrow selector = case selector of
|
|
ById v -> hasAttrValue "id" (== unpack v)
|
|
ByClass v -> hasAttrValue "class" ((DL.elem $ unpack v) . words)
|
|
ByTagName v -> hasName $ unpack v
|
|
ByAttrExists n -> hasAttr $ unpack n
|
|
ByAttrEquals n v -> hasAttrValue (unpack n) (== unpack v)
|
|
ByAttrContains n v -> hasAttrValue (unpack n) (DL.isInfixOf $ unpack v)
|
|
ByAttrStarts n v -> hasAttrValue (unpack n) (DL.isPrefixOf $ unpack v)
|
|
ByAttrEnds n v -> hasAttrValue (unpack n) (DL.isSuffixOf $ unpack v)
|