xss-sanitize/Text/HTML/SanitizeXSS.hs
2010-10-31 23:06:11 +02:00

248 lines
12 KiB
Haskell

module Text.HTML.SanitizeXSS (sanitizeXSS) where
import Text.HTML.TagSoup
import Data.Set (Set(), member, notMember, (\\), fromList)
import Data.Char ( toLower )
import Network.URI ( parseURIReference, URI (..),
isAllowedInURI, escapeURIString, uriScheme )
import Codec.Binary.UTF8.String ( encodeString )
import qualified Data.Map as Map
-- | santize the html to prevent XSS attacks. See README.md <http://github.com/gregwebs/haskell-xss-sanitize> for more details
sanitizeXSS :: String -> String
sanitizeXSS = renderTagsOptions renderOptions {
optMinimize = \x -> x `elem` ["br","img"] -- <img><img> converts to <img />, <a/> converts to <a></a>
} . safeTags Map.empty . parseTags
where
safeTags :: Map.Map String Int -> [Tag String] -> [Tag String]
safeTags m [] =
concatMap go $ Map.toList m
where
go (name, i)
| noClosing name = []
| otherwise = replicate i $ TagClose name
noClosing = flip elem ["br", "img"]
safeTags m (t@(TagClose name):tags)
| safeTagName name =
case Map.lookup name m of
Nothing -> safeTags m tags
Just i ->
let m' = if i == 1
then Map.delete name m
else Map.insert name (i - 1) m
in t : safeTags m' tags
| otherwise = safeTags m tags
safeTags m (TagOpen name attributes:tags)
| safeTagName name =
let m' =
case Map.lookup name m of
Nothing -> Map.insert name 1 m
Just i -> Map.insert name (i + 1) m
in TagOpen name (filter safeAttribute attributes) : safeTags m' tags
| otherwise = safeTags m tags
safeTags m (t:tags) = t:safeTags m tags
safeTagName :: String -> Bool
safeTagName tagname = tagname `member` sanitaryTags
safeAttribute :: (String, String) -> Bool
safeAttribute (name, value) = name `member` sanitaryAttributes &&
(name `notMember` attrValIsUri || sanitaryURI value)
-- | Returns @True@ if the specified URI is not a potential security risk.
sanitaryURI :: String -> Bool
sanitaryURI u =
case parseURIReference (escapeURI u) of
Just p -> (init (map toLower $ uriScheme p)) `member` safeURISchemes
Nothing -> False
-- | Escape unicode characters in a URI. Characters that are
-- already valid in a URI, including % and ?, are left alone.
escapeURI :: String -> String
escapeURI = escapeURIString isAllowedInURI . encodeString
safeURISchemes :: Set String
safeURISchemes = fromList acceptable_protocols
sanitaryTags :: Set String
sanitaryTags = fromList (acceptable_elements ++ mathml_elements ++ svg_elements)
\\ (fromList svg_allow_local_href) -- extra filtering not implemented
sanitaryAttributes :: Set String
sanitaryAttributes = fromList (acceptable_attributes ++ mathml_attributes ++ svg_attributes)
\\ (fromList svg_attr_val_allows_ref) -- extra unescaping not implemented
attrValIsUri :: Set String
attrValIsUri = fromList ["href", "src", "cite", "action", "longdesc",
"xlink:href", "xml:base"]
acceptable_elements :: [String]
acceptable_elements = ["a", "abbr", "acronym", "address", "area",
"article", "aside", "audio", "b", "big", "blockquote", "br", "button",
"canvas", "caption", "center", "cite", "code", "col", "colgroup",
"command", "datagrid", "datalist", "dd", "del", "details", "dfn",
"dialog", "dir", "div", "dl", "dt", "em", "event-source", "fieldset",
"figure", "footer", "font", "form", "header", "h1", "h2", "h3", "h4",
"h5", "h6", "hr", "i", "img", "input", "ins", "keygen", "kbd",
"label", "legend", "li", "m", "map", "menu", "meter", "multicol",
"nav", "nextid", "ol", "output", "optgroup", "option", "p", "pre",
"progress", "q", "s", "samp", "section", "select", "small", "sound",
"source", "spacer", "span", "strike", "strong", "sub", "sup", "table",
"tbody", "td", "textarea", "time", "tfoot", "th", "thead", "tr", "tt",
"u", "ul", "var", "video"]
mathml_elements :: [String]
mathml_elements = ["maction", "math", "merror", "mfrac", "mi",
"mmultiscripts", "mn", "mo", "mover", "mpadded", "mphantom",
"mprescripts", "mroot", "mrow", "mspace", "msqrt", "mstyle", "msub",
"msubsup", "msup", "mtable", "mtd", "mtext", "mtr", "munder",
"munderover", "none"]
-- this should include altGlyph I think
svg_elements :: [String]
svg_elements = ["a", "animate", "animateColor", "animateMotion",
"animateTransform", "clipPath", "circle", "defs", "desc", "ellipse",
"font-face", "font-face-name", "font-face-src", "g", "glyph", "hkern",
"linearGradient", "line", "marker", "metadata", "missing-glyph",
"mpath", "path", "polygon", "polyline", "radialGradient", "rect",
"set", "stop", "svg", "switch", "text", "title", "tspan", "use"]
acceptable_attributes :: [String]
acceptable_attributes = ["abbr", "accept", "accept-charset", "accesskey",
"action", "align", "alt", "autocomplete", "autofocus", "axis",
"background", "balance", "bgcolor", "bgproperties", "border",
"bordercolor", "bordercolordark", "bordercolorlight", "bottompadding",
"cellpadding", "cellspacing", "ch", "challenge", "char", "charoff",
"choff", "charset", "checked", "cite", "class", "clear", "color",
"cols", "colspan", "compact", "contenteditable", "controls", "coords",
-- "data", TODO: allow this with further filtering
"datafld", "datapagesize", "datasrc", "datetime", "default",
"delay", "dir", "disabled", "draggable", "dynsrc", "enctype", "end",
"face", "for", "form", "frame", "galleryimg", "gutter", "headers",
"height", "hidefocus", "hidden", "high", "href", "hreflang", "hspace",
"icon", "id", "inputmode", "ismap", "keytype", "label", "leftspacing",
"lang", "list", "longdesc", "loop", "loopcount", "loopend",
"loopstart", "low", "lowsrc", "max", "maxlength", "media", "method",
"min", "multiple", "name", "nohref", "noshade", "nowrap", "open",
"optimum", "pattern", "ping", "point-size", "prompt", "pqg",
"radiogroup", "readonly", "rel", "repeat-max", "repeat-min",
"replace", "required", "rev", "rightspacing", "rows", "rowspan",
"rules", "scope", "selected", "shape", "size", "span", "src", "start",
"step",
-- "style", TODO: allow this with further filtering
"summary", "suppress", "tabindex", "target",
"template", "title", "toppadding", "type", "unselectable", "usemap",
"urn", "valign", "value", "variable", "volume", "vspace", "vrml",
"width", "wrap", "xml:lang"]
acceptable_protocols :: [String]
acceptable_protocols = [ "ed2k", "ftp", "http", "https", "irc",
"mailto", "news", "gopher", "nntp", "telnet", "webcal",
"xmpp", "callto", "feed", "urn", "aim", "rsync", "tag",
"ssh", "sftp", "rtsp", "afs" ]
mathml_attributes :: [String]
mathml_attributes = ["actiontype", "align", "columnalign", "columnalign",
"columnalign", "columnlines", "columnspacing", "columnspan", "depth",
"display", "displaystyle", "equalcolumns", "equalrows", "fence",
"fontstyle", "fontweight", "frame", "height", "linethickness", "lspace",
"mathbackground", "mathcolor", "mathvariant", "mathvariant", "maxsize",
"minsize", "other", "rowalign", "rowalign", "rowalign", "rowlines",
"rowspacing", "rowspan", "rspace", "scriptlevel", "selection",
"separator", "stretchy", "width", "width", "xlink:href", "xlink:show",
"xlink:type", "xmlns", "xmlns:xlink"]
svg_attributes :: [String]
svg_attributes = ["accent-height", "accumulate", "additive", "alphabetic",
"arabic-form", "ascent", "attributeName", "attributeType",
"baseProfile", "bbox", "begin", "by", "calcMode", "cap-height",
"class", "clip-path", "color", "color-rendering", "content", "cx",
"cy", "d", "dx", "dy", "descent", "display", "dur", "end", "fill",
"fill-opacity", "fill-rule", "font-family", "font-size",
"font-stretch", "font-style", "font-variant", "font-weight", "from",
"fx", "fy", "g1", "g2", "glyph-name", "gradientUnits", "hanging",
"height", "horiz-adv-x", "horiz-origin-x", "id", "ideographic", "k",
"keyPoints", "keySplines", "keyTimes", "lang", "marker-end",
"marker-mid", "marker-start", "markerHeight", "markerUnits",
"markerWidth", "mathematical", "max", "min", "name", "offset",
"opacity", "orient", "origin", "overline-position",
"overline-thickness", "panose-1", "path", "pathLength", "points",
"preserveAspectRatio", "r", "refX", "refY", "repeatCount",
"repeatDur", "requiredExtensions", "requiredFeatures", "restart",
"rotate", "rx", "ry", "slope", "stemh", "stemv", "stop-color",
"stop-opacity", "strikethrough-position", "strikethrough-thickness",
"stroke", "stroke-dasharray", "stroke-dashoffset", "stroke-linecap",
"stroke-linejoin", "stroke-miterlimit", "stroke-opacity",
"stroke-width", "systemLanguage", "target", "text-anchor", "to",
"transform", "type", "u1", "u2", "underline-position",
"underline-thickness", "unicode", "unicode-range", "units-per-em",
"values", "version", "viewBox", "visibility", "width", "widths", "x",
"x-height", "x1", "x2", "xlink:actuate", "xlink:arcrole",
"xlink:href", "xlink:role", "xlink:show", "xlink:title", "xlink:type",
"xml:base", "xml:lang", "xml:space", "xmlns", "xmlns:xlink", "y",
"y1", "y2", "zoomAndPan"]
-- the values for these need to be escaped
svg_attr_val_allows_ref :: [String]
svg_attr_val_allows_ref = ["clip-path", "color-profile", "cursor", "fill",
"filter", "marker", "marker-start", "marker-mid", "marker-end",
"mask", "stroke"]
svg_allow_local_href :: [String]
svg_allow_local_href = ["altGlyph", "animate", "animateColor",
"animateMotion", "animateTransform", "cursor", "feImage", "filter",
"linearGradient", "pattern", "radialGradient", "textpath", "tref",
"set", "use"]
{- style value (css) filtering not implemented
-
- this is used for css filtering
allowed_svg_properties = fromList acceptable_svg_properties
acceptable_svg_properties = [ "fill", "fill-opacity", "fill-rule",
"stroke", "stroke-width", "stroke-linecap", "stroke-linejoin",
"stroke-opacity"]
allowed_css_properties = fromList acceptable_css_properties
allowed_css_keywords = fromList acceptable_css_keywords
acceptable_css_properties = ["azimuth", "background-color",
"border-bottom-color", "border-collapse", "border-color",
"border-left-color", "border-right-color", "border-top-color", "clear",
"color", "cursor", "direction", "display", "elevation", "float", "font",
"font-family", "font-size", "font-style", "font-variant", "font-weight",
"height", "letter-spacing", "line-height", "overflow", "pause",
"pause-after", "pause-before", "pitch", "pitch-range", "richness",
"speak", "speak-header", "speak-numeral", "speak-punctuation",
"speech-rate", "stress", "text-align", "text-decoration", "text-indent",
"unicode-bidi", "vertical-align", "voice-family", "volume",
"white-space", "width"]
acceptable_css_keywords = ["auto", "aqua", "black", "block", "blue",
"bold", "both", "bottom", "brown", "center", "collapse", "dashed",
"dotted", "fuchsia", "gray", "green", "!important", "italic", "left",
"lime", "maroon", "medium", "none", "navy", "normal", "nowrap", "olive",
"pointer", "purple", "red", "right", "solid", "silver", "teal", "top",
"transparent", "underline", "white", "yellow"]
-}
-- I don't know where this is from!
-- The rest of pandoc's lists were smaller than the ones in html5lib
-- This one is bigger.
{-
pandoc_acceptable_protocols = [ "", "http:", "https:", "ftp:", "mailto:", "file:",
"telnet:", "gopher:", "aaa:", "aaas:", "acap:", "cap:", "cid:",
"crid:", "dav:", "dict:", "dns:", "fax:", "go:", "h323:", "im:",
"imap:", "ldap:", "mid:", "news:", "nfs:", "nntp:", "pop:",
"pres:", "sip:", "sips:", "snmp:", "tel:", "urn:", "wais:",
"xmpp:", "z39.50r:", "z39.50s:", "aim:", "callto:", "cvs:",
"ed2k:", "feed:", "fish:", "gg:", "irc:", "ircs:", "lastfm:",
"ldaps:", "magnet:", "mms:", "msnim:", "notes:", "rsync:",
"secondlife:", "skype:", "ssh:", "sftp:", "smb:", "sms:",
"snews:", "webcal:", "ymsgr:"]
-}