css support

This commit is contained in:
Greg Weber 2011-08-08 21:12:04 -07:00
parent 193800f075
commit 844a580533
5 changed files with 184 additions and 96 deletions

View File

@ -24,7 +24,7 @@ It is recommended to integrate this so that it is automatically used whenever an
Credit
===========
Original code was taken from John MacFarlane's Pandoc (with permission), but modified to be faster and with parsing redone using TagSoup. html5lib is now being used as a reference (BSD style license).
Michael Snoyman added the balanced tags functionality.
Michael Snoyman added the balanced tags functionality and released css-text specifically to help with css parsing.
Limitations
@ -57,12 +57,16 @@ The [source code of html5lib](http://code.google.com/p/html5lib/source/browse/py
If anyone knows of better sources or thinks a particular tag/attribute/value may be vulnerable, please let me know.
[HTML Purifier](http://htmlpurifier.org/live/smoketests/printDefinition.php) does have a more permissive and configurable (yet safe) white list if you are looking to add anything.
attributes data and style
style attribute
----------------
style attributes are now *parsed* with the css-text and autoparsec-text dependencies. They are then ran through a white list for properties and keywords. Whitespace is not preserved.
data attributes
-------------------------
These attributes are not on the white list.
The href attribute is white listed, but its value must pass through a white list also. This is how the data and style attributes could work also. However, this was never implemented in Pandoc, and the html5lib code is complicated and relies on regular expressions that I don't understand.
data attributes are not on the white list.
The href attribute is white listed, but its value must pass through a white list also. This is how the data attributes could work also.
svg and mathml
--------------
A mathml white list is fully implemented.
There is a white list for svg elements and attributes. However, some elements are not included because they need further filtering (just like the data and style html attributes)
A mathml white list is fully implemented. There is some support for svg styling.
There is a full white list for svg elements and attributes. However, some elements are not included because they need further filtering (just like the data attributes) and this has not been done yet.

View File

@ -1,3 +1,4 @@
{-# LANGUAGE OverloadedStrings #-}
module Text.HTML.SanitizeXSS
( sanitize
, sanitizeBalance
@ -6,10 +7,14 @@ module Text.HTML.SanitizeXSS
, safeTags
) where
import Text.HTML.SanitizeXSS.Css
import Text.HTML.TagSoup
import Data.Set (Set(), member, notMember, (\\), fromList)
import Data.Char ( toLower )
import Data.Text (Text)
import qualified Data.Text as T
import Network.URI ( parseURIReference, URI (..),
isAllowedInURI, escapeURIString, uriScheme )
@ -17,32 +22,27 @@ import Codec.Binary.UTF8.String ( encodeString )
import qualified Data.Map as Map
{-
import Debug.Trace
debug :: (Show a) => a -> a
debug a = trace ("DEBUG: " ++ show a) a
-}
-- | santize the html to prevent XSS attacks. See README.md <http://github.com/gregwebs/haskell-xss-sanitize> for more details
sanitize :: String -> String
sanitize :: Text -> Text
sanitize = sanitizeXSS
-- | alias of sanitize function
sanitizeXSS :: String -> String
sanitizeXSS :: Text -> Text
sanitizeXSS = filterTags safeTags
-- | same as sanitize but makes sure there are no lone closing tags. See README.md <http://github.com/gregwebs/haskell-xss-sanitize> for more details
sanitizeBalance :: String -> String
sanitizeBalance :: Text -> Text
sanitizeBalance = filterTags (balance Map.empty . safeTags)
-- | insert custom tag filtering. Don't forget to compose your filter with safeTags!
filterTags :: ([Tag String] -> [Tag String]) -> String -> String
filterTags :: ([Tag Text] -> [Tag Text]) -> Text -> Text
filterTags f = renderTagsOptions renderOptions {
optMinimize = \x -> x `elem` ["br","img"] -- <img><img> converts to <img />, <a/> converts to <a></a>
} . f . canonicalizeTags . parseTags
balance :: Map.Map String Int -> [Tag String] -> [Tag String]
balance :: Map.Map Text Int -> [Tag Text] -> [Tag Text]
balance m [] =
concatMap go $ Map.toList m
where
@ -67,28 +67,33 @@ balance m (TagOpen name as : tags) =
balance m (t:ts) = t : balance m ts
-- | Filters out any usafe tags and attributes. Use with filterTags to create a custom filter.
safeTags :: [Tag String] -> [Tag String]
safeTags :: [Tag Text] -> [Tag Text]
safeTags [] = []
safeTags (t@(TagClose name):tags)
| safeTagName name = t : safeTags tags
| otherwise = safeTags tags
safeTags (TagOpen name attributes:tags)
| safeTagName name = TagOpen name (filter safeAttribute attributes) : safeTags tags
| safeTagName name = TagOpen name
(map sanitizeAttribute $ filter safeAttribute attributes) : safeTags tags
| otherwise = safeTags tags
safeTags (t:tags) = t:safeTags tags
safeTagName :: String -> Bool
safeTagName :: Text -> Bool
safeTagName tagname = tagname `member` sanitaryTags
safeAttribute :: (String, String) -> Bool
safeAttribute :: (Text, Text) -> Bool
safeAttribute (name, value) = name `member` sanitaryAttributes &&
(name `notMember` attrValIsUri || sanitaryURI value)
(name `notMember` uri_attributes || sanitaryURI value)
sanitizeAttribute :: (Text, Text) -> (Text, Text)
sanitizeAttribute ("style", value) = ("style", sanitizeCSS value)
sanitizeAttribute attrs = attrs
-- | Returns @True@ if the specified URI is not a potential security risk.
sanitaryURI :: String -> Bool
sanitaryURI :: Text -> Bool
sanitaryURI u =
case parseURIReference (escapeURI u) of
case parseURIReference (escapeURI $ T.unpack u) of
Just p -> (null (uriScheme p)) ||
((map toLower $ init $ uriScheme p) `member` safeURISchemes)
Nothing -> False
@ -102,19 +107,21 @@ escapeURI = escapeURIString isAllowedInURI . encodeString
safeURISchemes :: Set String
safeURISchemes = fromList acceptable_protocols
sanitaryTags :: Set String
sanitaryTags :: Set Text
sanitaryTags = fromList (acceptable_elements ++ mathml_elements ++ svg_elements)
\\ (fromList svg_allow_local_href) -- extra filtering not implemented
sanitaryAttributes :: Set String
sanitaryAttributes = fromList (acceptable_attributes ++ mathml_attributes ++ svg_attributes)
sanitaryAttributes :: Set Text
sanitaryAttributes = fromList (allowed_html_uri_attributes ++ acceptable_attributes ++ mathml_attributes ++ svg_attributes)
\\ (fromList svg_attr_val_allows_ref) -- extra unescaping not implemented
attrValIsUri :: Set String
attrValIsUri = fromList ["href", "src", "cite", "action", "longdesc",
"xlink:href", "xml:base"]
allowed_html_uri_attributes :: [Text]
allowed_html_uri_attributes = ["href", "src", "cite", "action", "longdesc"]
acceptable_elements :: [String]
uri_attributes :: Set Text
uri_attributes = fromList $ allowed_html_uri_attributes ++ ["xlink:href", "xml:base"]
acceptable_elements :: [Text]
acceptable_elements = ["a", "abbr", "acronym", "address", "area",
"article", "aside", "audio", "b", "big", "blockquote", "br", "button",
"canvas", "caption", "center", "cite", "code", "col", "colgroup",
@ -129,7 +136,7 @@ acceptable_elements = ["a", "abbr", "acronym", "address", "area",
"tbody", "td", "textarea", "time", "tfoot", "th", "thead", "tr", "tt",
"u", "ul", "var", "video"]
mathml_elements :: [String]
mathml_elements :: [Text]
mathml_elements = ["maction", "math", "merror", "mfrac", "mi",
"mmultiscripts", "mn", "mo", "mover", "mpadded", "mphantom",
"mprescripts", "mroot", "mrow", "mspace", "msqrt", "mstyle", "msub",
@ -137,7 +144,7 @@ mathml_elements = ["maction", "math", "merror", "mfrac", "mi",
"munderover", "none"]
-- this should include altGlyph I think
svg_elements :: [String]
svg_elements :: [Text]
svg_elements = ["a", "animate", "animateColor", "animateMotion",
"animateTransform", "clipPath", "circle", "defs", "desc", "ellipse",
"font-face", "font-face-name", "font-face-src", "g", "glyph", "hkern",
@ -145,27 +152,27 @@ svg_elements = ["a", "animate", "animateColor", "animateMotion",
"mpath", "path", "polygon", "polyline", "radialGradient", "rect",
"set", "stop", "svg", "switch", "text", "title", "tspan", "use"]
acceptable_attributes :: [String]
acceptable_attributes :: [Text]
acceptable_attributes = ["abbr", "accept", "accept-charset", "accesskey",
"action", "align", "alt", "autocomplete", "autofocus", "axis",
"align", "alt", "autocomplete", "autofocus", "axis",
"background", "balance", "bgcolor", "bgproperties", "border",
"bordercolor", "bordercolordark", "bordercolorlight", "bottompadding",
"cellpadding", "cellspacing", "ch", "challenge", "char", "charoff",
"choff", "charset", "checked", "cite", "class", "clear", "color",
"choff", "charset", "checked", "class", "clear", "color",
"cols", "colspan", "compact", "contenteditable", "controls", "coords",
-- "data", TODO: allow this with further filtering
"datafld", "datapagesize", "datasrc", "datetime", "default",
"delay", "dir", "disabled", "draggable", "dynsrc", "enctype", "end",
"face", "for", "form", "frame", "galleryimg", "gutter", "headers",
"height", "hidefocus", "hidden", "high", "href", "hreflang", "hspace",
"height", "hidefocus", "hidden", "high", "hreflang", "hspace",
"icon", "id", "inputmode", "ismap", "keytype", "label", "leftspacing",
"lang", "list", "longdesc", "loop", "loopcount", "loopend",
"lang", "list", "loop", "loopcount", "loopend",
"loopstart", "low", "lowsrc", "max", "maxlength", "media", "method",
"min", "multiple", "name", "nohref", "noshade", "nowrap", "open",
"optimum", "pattern", "ping", "point-size", "prompt", "pqg",
"radiogroup", "readonly", "rel", "repeat-max", "repeat-min",
"replace", "required", "rev", "rightspacing", "rows", "rowspan",
"rules", "scope", "selected", "shape", "size", "span", "src", "start",
"rules", "scope", "selected", "shape", "size", "span", "start",
"step",
-- "style", TODO: allow this with further filtering
"summary", "suppress", "tabindex", "target",
@ -179,7 +186,7 @@ acceptable_protocols = [ "ed2k", "ftp", "http", "https", "irc",
"xmpp", "callto", "feed", "urn", "aim", "rsync", "tag",
"ssh", "sftp", "rtsp", "afs" ]
mathml_attributes :: [String]
mathml_attributes :: [Text]
mathml_attributes = ["actiontype", "align", "columnalign", "columnalign",
"columnalign", "columnlines", "columnspacing", "columnspan", "depth",
"display", "displaystyle", "equalcolumns", "equalrows", "fence",
@ -190,7 +197,7 @@ mathml_attributes = ["actiontype", "align", "columnalign", "columnalign",
"separator", "stretchy", "width", "width", "xlink:href", "xlink:show",
"xlink:type", "xmlns", "xmlns:xlink"]
svg_attributes :: [String]
svg_attributes :: [Text]
svg_attributes = ["accent-height", "accumulate", "additive", "alphabetic",
"arabic-form", "ascent", "attributeName", "attributeType",
"baseProfile", "bbox", "begin", "by", "calcMode", "cap-height",
@ -221,43 +228,14 @@ svg_attributes = ["accent-height", "accumulate", "additive", "alphabetic",
"y1", "y2", "zoomAndPan"]
-- the values for these need to be escaped
svg_attr_val_allows_ref :: [String]
svg_attr_val_allows_ref :: [Text]
svg_attr_val_allows_ref = ["clip-path", "color-profile", "cursor", "fill",
"filter", "marker", "marker-start", "marker-mid", "marker-end",
"mask", "stroke"]
svg_allow_local_href :: [String]
svg_allow_local_href :: [Text]
svg_allow_local_href = ["altGlyph", "animate", "animateColor",
"animateMotion", "animateTransform", "cursor", "feImage", "filter",
"linearGradient", "pattern", "radialGradient", "textpath", "tref",
"set", "use"]
{- style value (css) filtering not implemented
-
- this is used for css filtering
allowed_svg_properties = fromList acceptable_svg_properties
acceptable_svg_properties = [ "fill", "fill-opacity", "fill-rule",
"stroke", "stroke-width", "stroke-linecap", "stroke-linejoin",
"stroke-opacity"]
allowed_css_properties = fromList acceptable_css_properties
allowed_css_keywords = fromList acceptable_css_keywords
acceptable_css_properties = ["azimuth", "background-color",
"border-bottom-color", "border-collapse", "border-color",
"border-left-color", "border-right-color", "border-top-color", "clear",
"color", "cursor", "direction", "display", "elevation", "float", "font",
"font-family", "font-size", "font-style", "font-variant", "font-weight",
"height", "letter-spacing", "line-height", "overflow", "pause",
"pause-after", "pause-before", "pitch", "pitch-range", "richness",
"speak", "speak-header", "speak-numeral", "speak-punctuation",
"speech-rate", "stress", "text-align", "text-decoration", "text-indent",
"unicode-bidi", "vertical-align", "voice-family", "volume",
"white-space", "width"]
acceptable_css_keywords = ["auto", "aqua", "black", "block", "blue",
"bold", "both", "bottom", "brown", "center", "collapse", "dashed",
"dotted", "fuchsia", "gray", "green", "!important", "italic", "left",
"lime", "maroon", "medium", "none", "navy", "normal", "nowrap", "olive",
"pointer", "purple", "red", "right", "solid", "silver", "teal", "top",
"transparent", "underline", "white", "yellow"]
-}

View File

@ -0,0 +1,105 @@
{-# LANGUAGE OverloadedStrings #-}
module Text.HTML.SanitizeXSS.Css (sanitizeCSS) where
import Data.Text (Text)
import qualified Data.Text as T
import Data.Attoparsec.Text
import Data.Text.Lazy.Builder (toLazyText)
import Data.Text.Lazy (toStrict)
import Data.Set (member, fromList, Set)
import Data.Char (isDigit)
import Control.Applicative ((<|>))
import Text.CSS.Render (renderAttrs)
import Text.CSS.Parse (parseAttrs)
-- this is a direct translation from sanitizer.py, except
-- sanitizer.py filters out url(), but this is redundant
sanitizeCSS :: Text -> Text
sanitizeCSS css = toStrict . toLazyText . renderAttrs . filter isSanitaryAttr $ parseAttributes
where
parseAttributes = case parseOnly parseAttrs css of
Left _ -> []
Right as -> as
isSanitaryAttr (prop, value)
| prop `member` allowed_css_properties = True
| (T.takeWhile (/= '-') value) `member` allowed_css_unit_properties &&
all allowedCssAttributeValue (T.words value) = True
| prop `member` allowed_svg_properties = True
| otherwise = False
allowed_css_unit_properties :: Set Text
allowed_css_unit_properties = fromList ["background","border","margin","padding"]
allowedCssAttributeValue :: Text -> Bool
allowedCssAttributeValue val =
val `member` allowed_css_keywords ||
case parseOnly allowedCssAttributeParser val of
Left _ -> False
Right b -> b
where
allowedCssAttributeParser = do
hex <|> rgb <|> cssUnit
aToF = fromList "abcdef"
hex = do
_ <- char '#'
hx <- takeText
return $ T.all (\c -> isDigit c || (c `member` aToF)) hx
rgb = do
_<- string "rgb("
skip isDigit >> try (skipWhile isDigit) >> try (skip (== '%'))
skip (== ',')
try (skipWhile isDigit) >> try (skip (== '%'))
skip (== ',')
try (skipWhile isDigit) >> try (skip (== '%'))
skip (== ',')
skip (== ')')
return True
cssUnit = do
try $ skip isDigit >> skip isDigit
try $ skip (== '.')
try $ skip isDigit >> skip isDigit
unit <- takeText
return $ unit `member` allowed_css_attribute_value_units
allowed_css_attribute_value_units :: Set Text
allowed_css_attribute_value_units = fromList
[ "cm", "em", "ex", "in", "mm", "pc", "pt", "px", "%", ",", "\\"]
allowed_css_properties :: Set Text
allowed_css_properties = fromList acceptable_css_properties
where
acceptable_css_properties = ["azimuth", "background-color",
"border-bottom-color", "border-collapse", "border-color",
"border-left-color", "border-right-color", "border-top-color", "clear",
"color", "cursor", "direction", "display", "elevation", "float", "font",
"font-family", "font-size", "font-style", "font-variant", "font-weight",
"height", "letter-spacing", "line-height", "overflow", "pause",
"pause-after", "pause-before", "pitch", "pitch-range", "richness",
"speak", "speak-header", "speak-numeral", "speak-punctuation",
"speech-rate", "stress", "text-align", "text-decoration", "text-indent",
"unicode-bidi", "vertical-align", "voice-family", "volume",
"white-space", "width"]
allowed_css_keywords :: Set Text
allowed_css_keywords = fromList acceptable_css_keywords
where
acceptable_css_keywords = ["auto", "aqua", "black", "block", "blue",
"bold", "both", "bottom", "brown", "center", "collapse", "dashed",
"dotted", "fuchsia", "gray", "green", "!important", "italic", "left",
"lime", "maroon", "medium", "none", "navy", "normal", "nowrap", "olive",
"pointer", "purple", "red", "right", "solid", "silver", "teal", "top",
"transparent", "underline", "white", "yellow"]
-- used in css filtering
allowed_svg_properties :: Set Text
allowed_svg_properties = fromList acceptable_svg_properties
where
acceptable_svg_properties = [ "fill", "fill-opacity", "fill-rule",
"stroke", "stroke-width", "stroke-linecap", "stroke-linejoin",
"stroke-opacity"]

21
test.hs
View File

@ -1,21 +0,0 @@
import Text.HTML.SanitizeXSS
testHTML = " <a href='http://safe.com'>safe</a><a href='unsafe://hack.com'>anchor</a> <img src='evil://evil.com' /> <unsafe></foo> <bar /> <br></br> <b>Unbalanced</div><img src='http://safe.com'>"
test f actual expected = do
putStrLn $ "testing: " ++ actual
putStrLn $ if f actual == expected then "pass" else "failure\n" ++ "\nexpected:" ++ (show expected) ++ "\nactual: " ++ (show actual)
main = do
test sanitizeBalance testHTML " <a href=\"http://safe.com\">safe</a><a>anchor</a> <img /> <br /> <b>Unbalanced<div></div><img src=\"http://safe.com\"></b>"
test sanitize testHTML " <a href=\"http://safe.com\">safe</a><a>anchor</a> <img /> <br /> <b>Unbalanced</div><img src=\"http://safe.com\">"
let testRelativeURI = "<a href=\"foo\">bar</a>"
test sanitize testRelativeURI testRelativeURI
let protocol_hack = "<script src=//ha.ckers.org/.j></script>"
test sanitize protocol_hack ""
let object_hack = "<object classid=clsid:ae24fdae-03c6-11d1-8b76-0080c744f389><param name=url value=javascript:alert('XSS')></object>"
test sanitize object_hack ""
let embed_hack = "<embed src=\" A6Ly93d3cudzMub3JnLzIwMDAvc3ZnIiB4bWxucz0iaHR0cDovL3d3dy53My5vcmcv MjAwMC9zdmciIHhtbG5zOnhsaW5rPSJodHRwOi8vd3d3LnczLm9yZy8xOTk5L3hs aW5rIiB2ZXJzaW9uPSIxLjAiIHg9IjAiIHk9IjAiIHdpZHRoPSIxOTQiIGhlaWdodD0iMjAw IiBpZD0ieHNzIj48c2NyaXB0IHR5cGU9InRleHQvZWNtYXNjcmlwdCI+YWxlcnQoIlh TUyIpOzwvc2NyaXB0Pjwvc3ZnPg==\" type=\"image/svg+xml\" AllowScriptAccess=\"always\"></embed>"
test sanitize embed_hack ""
let ucase_image_hack = "<IMG src=javascript:alert('XSS') />"
test sanitize ucase_image_hack "<img />"

View File

@ -1,5 +1,5 @@
name: xss-sanitize
version: 0.2.6
version: 0.3.0
license: BSD3
license-file: LICENSE
author: Greg Weber <greg@gregweber.info>
@ -9,17 +9,39 @@ description: run untrusted HTML through Text.HTML.SanitizeXSS.sanitizeXSS to
category: Web
stability: Stable
cabal-version: >= 1.6
cabal-version: >= 1.8
build-type: Simple
homepage: http://github.com/gregwebs/haskell-xss-sanitize
library
build-depends: base == 4.*, containers,
tagsoup >= 0.11, utf8-string >= 0.3, network >= 2
build-depends: base == 4.*, containers
, tagsoup >= 0.11
, utf8-string >= 0.3
, network >= 2
, css-text >= 0.1 && < 0.2
, text >= 0.11 && < 0.12
, attoparsec-text >= 0.8.5.1 && < 0.9
, file-location
exposed-modules: Text.HTML.SanitizeXSS
other-modules: Text.HTML.SanitizeXSS.Css
ghc-options: -Wall
test-suite test
type: exitcode-stdio-1.0
main-is: test/main.hs
build-depends: base >= 4 && < 5, containers
, xss-sanitize >= 0.3
, tagsoup >= 0.11
, utf8-string >= 0.3
, network >= 2
, css-text >= 0.1 && < 0.2
, text >= 0.11 && < 0.12
, attoparsec-text >= 0.8.5.1 && < 0.9
, file-location
source-repository head
type: git
location: http://github.com/gregwebs/haskell-xss-sanitize.git