fix up css parsing
This commit is contained in:
parent
ae3b146712
commit
66545752c6
@ -59,7 +59,7 @@ If anyone knows of better sources or thinks a particular tag/attribute/value may
|
||||
|
||||
style attribute
|
||||
----------------
|
||||
style attributes are now *parsed* with the css-text and autoparsec-text dependencies. They are then ran through a white list for properties and keywords. Whitespace is not preserved.
|
||||
style attributes are now *parsed* with the css-text and autoparsec-text dependencies. They are then ran through a white list for properties and keywords. Whitespace is not preserved. This code was again translated from sanitizer.py, but uses attopoarsec-text instead of regexes.
|
||||
|
||||
data attributes
|
||||
-------------------------
|
||||
|
||||
@ -21,6 +21,7 @@ import Network.URI ( parseURIReference, URI (..),
|
||||
import Codec.Binary.UTF8.String ( encodeString )
|
||||
|
||||
import qualified Data.Map as Map
|
||||
import Data.Maybe (catMaybes)
|
||||
|
||||
|
||||
|
||||
@ -74,7 +75,7 @@ safeTags (t@(TagClose name):tags)
|
||||
| otherwise = safeTags tags
|
||||
safeTags (TagOpen name attributes:tags)
|
||||
| safeTagName name = TagOpen name
|
||||
(map sanitizeAttribute $ filter safeAttribute attributes) : safeTags tags
|
||||
(catMaybes $ map sanitizeAttribute $ filter safeAttribute attributes) : safeTags tags
|
||||
| otherwise = safeTags tags
|
||||
safeTags (t:tags) = t:safeTags tags
|
||||
|
||||
@ -85,9 +86,10 @@ safeAttribute :: (Text, Text) -> Bool
|
||||
safeAttribute (name, value) = name `member` sanitaryAttributes &&
|
||||
(name `notMember` uri_attributes || sanitaryURI value)
|
||||
|
||||
sanitizeAttribute :: (Text, Text) -> (Text, Text)
|
||||
sanitizeAttribute ("style", value) = ("style", sanitizeCSS value)
|
||||
sanitizeAttribute attrs = attrs
|
||||
sanitizeAttribute :: (Text, Text) -> Maybe (Text, Text)
|
||||
sanitizeAttribute ("style", value) =
|
||||
let css = sanitizeCSS value in if T.null css then Nothing else Just ("style", css)
|
||||
sanitizeAttribute attr = Just attr
|
||||
|
||||
|
||||
-- | Returns @True@ if the specified URI is not a potential security risk.
|
||||
@ -174,7 +176,7 @@ acceptable_attributes = ["abbr", "accept", "accept-charset", "accesskey",
|
||||
"replace", "required", "rev", "rightspacing", "rows", "rowspan",
|
||||
"rules", "scope", "selected", "shape", "size", "span", "start",
|
||||
"step",
|
||||
-- "style", TODO: allow this with further filtering
|
||||
"style", -- gets further filtering
|
||||
"summary", "suppress", "tabindex", "target",
|
||||
"template", "title", "toppadding", "type", "unselectable", "usemap",
|
||||
"urn", "valign", "value", "variable", "volume", "vspace", "vrml",
|
||||
|
||||
@ -1,5 +1,10 @@
|
||||
{-# LANGUAGE OverloadedStrings #-}
|
||||
module Text.HTML.SanitizeXSS.Css (sanitizeCSS) where
|
||||
{-# LANGUAGE OverloadedStrings, CPP #-}
|
||||
module Text.HTML.SanitizeXSS.Css (
|
||||
sanitizeCSS
|
||||
#ifdef TEST
|
||||
, allowedCssAttributeValue
|
||||
#endif
|
||||
) where
|
||||
|
||||
import Data.Text (Text)
|
||||
import qualified Data.Text as T
|
||||
@ -8,23 +13,48 @@ import Data.Text.Lazy.Builder (toLazyText)
|
||||
import Data.Text.Lazy (toStrict)
|
||||
import Data.Set (member, fromList, Set)
|
||||
import Data.Char (isDigit)
|
||||
import Control.Applicative ((<|>))
|
||||
import Control.Applicative ((<|>), pure)
|
||||
import Text.CSS.Render (renderAttrs)
|
||||
import Text.CSS.Parse (parseAttrs)
|
||||
import Prelude hiding (takeWhile)
|
||||
|
||||
-- import FileLocation (debug, debugM)
|
||||
|
||||
|
||||
-- this is a direct translation from sanitizer.py, except
|
||||
-- sanitizer.py filters out url(), but this is redundant
|
||||
sanitizeCSS :: Text -> Text
|
||||
sanitizeCSS css = toStrict . toLazyText . renderAttrs . filter isSanitaryAttr $ parseAttributes
|
||||
sanitizeCSS css = toStrict . toLazyText .
|
||||
renderAttrs . filter isSanitaryAttr . filterUrl $ parseAttributes
|
||||
where
|
||||
filterUrl :: [(Text,Text)] -> [(Text,Text)]
|
||||
filterUrl = map filterUrlAttribute
|
||||
where
|
||||
filterUrlAttribute :: (Text, Text) -> (Text, Text)
|
||||
filterUrlAttribute (prop,value) =
|
||||
case parseOnly rejectUrl value of
|
||||
Left _ -> (prop,value)
|
||||
Right noUrl -> filterUrlAttribute (prop, noUrl)
|
||||
|
||||
rejectUrl = do
|
||||
pre <- manyTill anyChar (string "url")
|
||||
skipMany space
|
||||
_<-char '('
|
||||
skipWhile (/= ')')
|
||||
_<-char ')'
|
||||
rest <- takeText
|
||||
return $ T.append (T.pack pre) rest
|
||||
|
||||
|
||||
parseAttributes = case parseAttrs css of
|
||||
Left _ -> []
|
||||
Right as -> as
|
||||
|
||||
isSanitaryAttr (_, "") = False
|
||||
isSanitaryAttr ("",_) = False
|
||||
isSanitaryAttr (prop, value)
|
||||
| prop `member` allowed_css_properties = True
|
||||
| (T.takeWhile (/= '-') value) `member` allowed_css_unit_properties &&
|
||||
| (T.takeWhile (/= '-') prop) `member` allowed_css_unit_properties &&
|
||||
all allowedCssAttributeValue (T.words value) = True
|
||||
| prop `member` allowed_svg_properties = True
|
||||
| otherwise = False
|
||||
@ -32,40 +62,45 @@ sanitizeCSS css = toStrict . toLazyText . renderAttrs . filter isSanitaryAttr $
|
||||
allowed_css_unit_properties :: Set Text
|
||||
allowed_css_unit_properties = fromList ["background","border","margin","padding"]
|
||||
|
||||
allowedCssAttributeValue :: Text -> Bool
|
||||
allowedCssAttributeValue val =
|
||||
val `member` allowed_css_keywords ||
|
||||
case parseOnly allowedCssAttributeParser val of
|
||||
Left _ -> False
|
||||
Right b -> b
|
||||
where
|
||||
allowedCssAttributeParser = do
|
||||
hex <|> rgb <|> cssUnit
|
||||
allowedCssAttributeValue :: Text -> Bool
|
||||
allowedCssAttributeValue val =
|
||||
val `member` allowed_css_keywords ||
|
||||
case parseOnly allowedCssAttributeParser val of
|
||||
Left _ -> False
|
||||
Right b -> b
|
||||
where
|
||||
allowedCssAttributeParser = do
|
||||
rgb <|> hex <|> rgb <|> cssUnit
|
||||
|
||||
aToF = fromList "abcdef"
|
||||
aToF = fromList "abcdef"
|
||||
|
||||
hex = do
|
||||
_ <- char '#'
|
||||
hx <- takeText
|
||||
return $ T.all (\c -> isDigit c || (c `member` aToF)) hx
|
||||
hex = do
|
||||
_ <- char '#'
|
||||
hx <- takeText
|
||||
return $ T.all (\c -> isDigit c || (c `member` aToF)) hx
|
||||
|
||||
rgb = do
|
||||
_<- string "rgb("
|
||||
skip isDigit >> try (skipWhile isDigit) >> try (skip (== '%'))
|
||||
skip (== ',')
|
||||
try (skipWhile isDigit) >> try (skip (== '%'))
|
||||
skip (== ',')
|
||||
try (skipWhile isDigit) >> try (skip (== '%'))
|
||||
skip (== ',')
|
||||
skip (== ')')
|
||||
return True
|
||||
-- should have used sepBy (symbol ",")
|
||||
rgb = do
|
||||
_<- string "rgb("
|
||||
skipMany1 digit >> skipOk (== '%')
|
||||
skip (== ',')
|
||||
skipMany digit >> skipOk (== '%')
|
||||
skip (== ',')
|
||||
skipMany digit >> skipOk (== '%')
|
||||
skip (== ')')
|
||||
return True
|
||||
|
||||
cssUnit = do
|
||||
try $ skip isDigit >> skip isDigit
|
||||
try $ skip (== '.')
|
||||
try $ skip isDigit >> skip isDigit
|
||||
unit <- takeText
|
||||
return $ unit `member` allowed_css_attribute_value_units
|
||||
cssUnit = do
|
||||
skip isDigit
|
||||
skipOk isDigit
|
||||
skipOk (== '.')
|
||||
skipOk isDigit >> skipOk isDigit
|
||||
skipSpace
|
||||
unit <- takeText
|
||||
return $ T.null unit || unit `member` allowed_css_attribute_value_units
|
||||
|
||||
skipOk :: (Char -> Bool) -> Parser ()
|
||||
skipOk p = skip p <|> pure ()
|
||||
|
||||
allowed_css_attribute_value_units :: Set Text
|
||||
allowed_css_attribute_value_units = fromList
|
||||
|
||||
@ -21,7 +21,6 @@ library
|
||||
, css-text >= 0.1 && < 0.2
|
||||
, text >= 0.11 && < 0.12
|
||||
, attoparsec-text >= 0.8.5.1 && < 0.9
|
||||
, file-location
|
||||
|
||||
|
||||
exposed-modules: Text.HTML.SanitizeXSS
|
||||
@ -31,15 +30,16 @@ library
|
||||
test-suite test
|
||||
type: exitcode-stdio-1.0
|
||||
main-is: test/main.hs
|
||||
cpp-options: -DTEST
|
||||
build-depends: base >= 4 && < 5, containers
|
||||
, xss-sanitize >= 0.3
|
||||
, tagsoup >= 0.11
|
||||
, utf8-string >= 0.3
|
||||
, network >= 2
|
||||
, css-text >= 0.1 && < 0.2
|
||||
, text >= 0.11 && < 0.12
|
||||
, attoparsec-text >= 0.8.5.1 && < 0.9
|
||||
, file-location
|
||||
, hspec
|
||||
, HUnit >= 1.2
|
||||
|
||||
|
||||
source-repository head
|
||||
|
||||
Loading…
Reference in New Issue
Block a user