module Text.HTML.SanitizeXSS (sanitizeXSS) where import Text.HTML.TagSoup import Data.Set (Set(), member, notMember, (\\), fromList) import Data.Char ( toLower ) import Network.URI ( parseURIReference, URI (..), isAllowedInURI, escapeURIString, uriScheme ) import Codec.Binary.UTF8.String ( encodeString ) import qualified Data.Map as Map -- | santize the html to prevent XSS attacks. See README.md for more details sanitizeXSS :: String -> String sanitizeXSS = renderTagsOptions renderOptions { optMinimize = \x -> x `elem` ["br","img"] -- converts to , converts to } . safeTags Map.empty . parseTags where safeTags :: Map.Map String Int -> [Tag String] -> [Tag String] safeTags m [] = concatMap go $ Map.toList m where go (name, i) | noClosing name = [] | otherwise = replicate i $ TagClose name noClosing = flip elem ["br", "img"] safeTags m (t@(TagClose name):tags) | safeTagName name = case Map.lookup name m of Nothing -> safeTags m tags Just i -> let m' = if i == 1 then Map.delete name m else Map.insert name (i - 1) m in t : safeTags m' tags | otherwise = safeTags m tags safeTags m (TagOpen name attributes:tags) | safeTagName name = let m' = case Map.lookup name m of Nothing -> Map.insert name 1 m Just i -> Map.insert name (i + 1) m in TagOpen name (filter safeAttribute attributes) : safeTags m' tags | otherwise = safeTags m tags safeTags m (t:tags) = t:safeTags m tags safeTagName :: String -> Bool safeTagName tagname = tagname `member` sanitaryTags safeAttribute :: (String, String) -> Bool safeAttribute (name, value) = name `member` sanitaryAttributes && (name `notMember` attrValIsUri || sanitaryURI value) -- | Returns @True@ if the specified URI is not a potential security risk. sanitaryURI :: String -> Bool sanitaryURI u = case parseURIReference (escapeURI u) of Just p -> (init (map toLower $ uriScheme p)) `member` safeURISchemes Nothing -> False -- | Escape unicode characters in a URI. Characters that are -- already valid in a URI, including % and ?, are left alone. escapeURI :: String -> String escapeURI = escapeURIString isAllowedInURI . encodeString safeURISchemes :: Set String safeURISchemes = fromList acceptable_protocols sanitaryTags :: Set String sanitaryTags = fromList (acceptable_elements ++ mathml_elements ++ svg_elements) \\ (fromList svg_allow_local_href) -- extra filtering not implemented sanitaryAttributes :: Set String sanitaryAttributes = fromList (acceptable_attributes ++ mathml_attributes ++ svg_attributes) \\ (fromList svg_attr_val_allows_ref) -- extra unescaping not implemented attrValIsUri :: Set String attrValIsUri = fromList ["href", "src", "cite", "action", "longdesc", "xlink:href", "xml:base"] acceptable_elements :: [String] acceptable_elements = ["a", "abbr", "acronym", "address", "area", "article", "aside", "audio", "b", "big", "blockquote", "br", "button", "canvas", "caption", "center", "cite", "code", "col", "colgroup", "command", "datagrid", "datalist", "dd", "del", "details", "dfn", "dialog", "dir", "div", "dl", "dt", "em", "event-source", "fieldset", "figure", "footer", "font", "form", "header", "h1", "h2", "h3", "h4", "h5", "h6", "hr", "i", "img", "input", "ins", "keygen", "kbd", "label", "legend", "li", "m", "map", "menu", "meter", "multicol", "nav", "nextid", "ol", "output", "optgroup", "option", "p", "pre", "progress", "q", "s", "samp", "section", "select", "small", "sound", "source", "spacer", "span", "strike", "strong", "sub", "sup", "table", "tbody", "td", "textarea", "time", "tfoot", "th", "thead", "tr", "tt", "u", "ul", "var", "video"] mathml_elements :: [String] mathml_elements = ["maction", "math", "merror", "mfrac", "mi", "mmultiscripts", "mn", "mo", "mover", "mpadded", "mphantom", "mprescripts", "mroot", "mrow", "mspace", "msqrt", "mstyle", "msub", "msubsup", "msup", "mtable", "mtd", "mtext", "mtr", "munder", "munderover", "none"] -- this should include altGlyph I think svg_elements :: [String] svg_elements = ["a", "animate", "animateColor", "animateMotion", "animateTransform", "clipPath", "circle", "defs", "desc", "ellipse", "font-face", "font-face-name", "font-face-src", "g", "glyph", "hkern", "linearGradient", "line", "marker", "metadata", "missing-glyph", "mpath", "path", "polygon", "polyline", "radialGradient", "rect", "set", "stop", "svg", "switch", "text", "title", "tspan", "use"] acceptable_attributes :: [String] acceptable_attributes = ["abbr", "accept", "accept-charset", "accesskey", "action", "align", "alt", "autocomplete", "autofocus", "axis", "background", "balance", "bgcolor", "bgproperties", "border", "bordercolor", "bordercolordark", "bordercolorlight", "bottompadding", "cellpadding", "cellspacing", "ch", "challenge", "char", "charoff", "choff", "charset", "checked", "cite", "class", "clear", "color", "cols", "colspan", "compact", "contenteditable", "controls", "coords", -- "data", TODO: allow this with further filtering "datafld", "datapagesize", "datasrc", "datetime", "default", "delay", "dir", "disabled", "draggable", "dynsrc", "enctype", "end", "face", "for", "form", "frame", "galleryimg", "gutter", "headers", "height", "hidefocus", "hidden", "high", "href", "hreflang", "hspace", "icon", "id", "inputmode", "ismap", "keytype", "label", "leftspacing", "lang", "list", "longdesc", "loop", "loopcount", "loopend", "loopstart", "low", "lowsrc", "max", "maxlength", "media", "method", "min", "multiple", "name", "nohref", "noshade", "nowrap", "open", "optimum", "pattern", "ping", "point-size", "prompt", "pqg", "radiogroup", "readonly", "rel", "repeat-max", "repeat-min", "replace", "required", "rev", "rightspacing", "rows", "rowspan", "rules", "scope", "selected", "shape", "size", "span", "src", "start", "step", -- "style", TODO: allow this with further filtering "summary", "suppress", "tabindex", "target", "template", "title", "toppadding", "type", "unselectable", "usemap", "urn", "valign", "value", "variable", "volume", "vspace", "vrml", "width", "wrap", "xml:lang"] acceptable_protocols :: [String] acceptable_protocols = [ "ed2k", "ftp", "http", "https", "irc", "mailto", "news", "gopher", "nntp", "telnet", "webcal", "xmpp", "callto", "feed", "urn", "aim", "rsync", "tag", "ssh", "sftp", "rtsp", "afs" ] mathml_attributes :: [String] mathml_attributes = ["actiontype", "align", "columnalign", "columnalign", "columnalign", "columnlines", "columnspacing", "columnspan", "depth", "display", "displaystyle", "equalcolumns", "equalrows", "fence", "fontstyle", "fontweight", "frame", "height", "linethickness", "lspace", "mathbackground", "mathcolor", "mathvariant", "mathvariant", "maxsize", "minsize", "other", "rowalign", "rowalign", "rowalign", "rowlines", "rowspacing", "rowspan", "rspace", "scriptlevel", "selection", "separator", "stretchy", "width", "width", "xlink:href", "xlink:show", "xlink:type", "xmlns", "xmlns:xlink"] svg_attributes :: [String] svg_attributes = ["accent-height", "accumulate", "additive", "alphabetic", "arabic-form", "ascent", "attributeName", "attributeType", "baseProfile", "bbox", "begin", "by", "calcMode", "cap-height", "class", "clip-path", "color", "color-rendering", "content", "cx", "cy", "d", "dx", "dy", "descent", "display", "dur", "end", "fill", "fill-opacity", "fill-rule", "font-family", "font-size", "font-stretch", "font-style", "font-variant", "font-weight", "from", "fx", "fy", "g1", "g2", "glyph-name", "gradientUnits", "hanging", "height", "horiz-adv-x", "horiz-origin-x", "id", "ideographic", "k", "keyPoints", "keySplines", "keyTimes", "lang", "marker-end", "marker-mid", "marker-start", "markerHeight", "markerUnits", "markerWidth", "mathematical", "max", "min", "name", "offset", "opacity", "orient", "origin", "overline-position", "overline-thickness", "panose-1", "path", "pathLength", "points", "preserveAspectRatio", "r", "refX", "refY", "repeatCount", "repeatDur", "requiredExtensions", "requiredFeatures", "restart", "rotate", "rx", "ry", "slope", "stemh", "stemv", "stop-color", "stop-opacity", "strikethrough-position", "strikethrough-thickness", "stroke", "stroke-dasharray", "stroke-dashoffset", "stroke-linecap", "stroke-linejoin", "stroke-miterlimit", "stroke-opacity", "stroke-width", "systemLanguage", "target", "text-anchor", "to", "transform", "type", "u1", "u2", "underline-position", "underline-thickness", "unicode", "unicode-range", "units-per-em", "values", "version", "viewBox", "visibility", "width", "widths", "x", "x-height", "x1", "x2", "xlink:actuate", "xlink:arcrole", "xlink:href", "xlink:role", "xlink:show", "xlink:title", "xlink:type", "xml:base", "xml:lang", "xml:space", "xmlns", "xmlns:xlink", "y", "y1", "y2", "zoomAndPan"] -- the values for these need to be escaped svg_attr_val_allows_ref :: [String] svg_attr_val_allows_ref = ["clip-path", "color-profile", "cursor", "fill", "filter", "marker", "marker-start", "marker-mid", "marker-end", "mask", "stroke"] svg_allow_local_href :: [String] svg_allow_local_href = ["altGlyph", "animate", "animateColor", "animateMotion", "animateTransform", "cursor", "feImage", "filter", "linearGradient", "pattern", "radialGradient", "textpath", "tref", "set", "use"] {- style value (css) filtering not implemented - - this is used for css filtering allowed_svg_properties = fromList acceptable_svg_properties acceptable_svg_properties = [ "fill", "fill-opacity", "fill-rule", "stroke", "stroke-width", "stroke-linecap", "stroke-linejoin", "stroke-opacity"] allowed_css_properties = fromList acceptable_css_properties allowed_css_keywords = fromList acceptable_css_keywords acceptable_css_properties = ["azimuth", "background-color", "border-bottom-color", "border-collapse", "border-color", "border-left-color", "border-right-color", "border-top-color", "clear", "color", "cursor", "direction", "display", "elevation", "float", "font", "font-family", "font-size", "font-style", "font-variant", "font-weight", "height", "letter-spacing", "line-height", "overflow", "pause", "pause-after", "pause-before", "pitch", "pitch-range", "richness", "speak", "speak-header", "speak-numeral", "speak-punctuation", "speech-rate", "stress", "text-align", "text-decoration", "text-indent", "unicode-bidi", "vertical-align", "voice-family", "volume", "white-space", "width"] acceptable_css_keywords = ["auto", "aqua", "black", "block", "blue", "bold", "both", "bottom", "brown", "center", "collapse", "dashed", "dotted", "fuchsia", "gray", "green", "!important", "italic", "left", "lime", "maroon", "medium", "none", "navy", "normal", "nowrap", "olive", "pointer", "purple", "red", "right", "solid", "silver", "teal", "top", "transparent", "underline", "white", "yellow"] -} -- I don't know where this is from! -- The rest of pandoc's lists were smaller than the ones in html5lib -- This one is bigger. {- pandoc_acceptable_protocols = [ "", "http:", "https:", "ftp:", "mailto:", "file:", "telnet:", "gopher:", "aaa:", "aaas:", "acap:", "cap:", "cid:", "crid:", "dav:", "dict:", "dns:", "fax:", "go:", "h323:", "im:", "imap:", "ldap:", "mid:", "news:", "nfs:", "nntp:", "pop:", "pres:", "sip:", "sips:", "snmp:", "tel:", "urn:", "wais:", "xmpp:", "z39.50r:", "z39.50s:", "aim:", "callto:", "cvs:", "ed2k:", "feed:", "fish:", "gg:", "irc:", "ircs:", "lastfm:", "ldaps:", "magnet:", "mms:", "msnim:", "notes:", "rsync:", "secondlife:", "skype:", "ssh:", "sftp:", "smb:", "sms:", "snews:", "webcal:", "ymsgr:"] -}