make tag & attribute matching case insensitive

This commit is contained in:
Gregor Kleen 2020-06-10 09:21:23 +02:00
parent 7909bac24b
commit 074ed7c881

View File

@ -66,18 +66,18 @@ balanceTags = balance []
-- 'safeTags' or 'safeTagsCustom'.
filterTags :: ([Tag Text] -> [Tag Text]) -> Text -> Text
filterTags f = renderTagsOptions renderOptions {
optMinimize = \x -> x `member` voidElems -- <img><img> converts to <img />, <a/> converts to <a></a>
optMinimize = \x -> T.toLower x `member` voidElems -- <img><img> converts to <img />, <a/> converts to <a></a>
} . f . canonicalizeTags . parseTags
voidElems :: Set T.Text
voidElems = fromAscList $ T.words $ T.pack "area base br col command embed hr img input keygen link meta param source track wbr"
voidElems = fromAscList . map T.toLower . T.words $ T.pack "area base br col command embed hr img input keygen link meta param source track wbr"
balance :: [Text] -- ^ unclosed tags
-> [Tag Text] -> [Tag Text]
balance unclosed [] = map TagClose $ filter (`notMember` voidElems) unclosed
balance unclosed [] = map TagClose $ filter (flip notMember voidElems . T.toLower) unclosed
balance (x:xs) tags'@(TagClose name:tags)
| x == name = TagClose name : balance xs tags
| x `member` voidElems = balance xs tags'
| T.toLower x `member` voidElems = balance xs tags'
| otherwise = TagOpen name [] : TagClose name : balance (x:xs) tags
balance unclosed (TagOpen name as : tags) =
TagOpen name as : balance (name : unclosed) tags
@ -115,11 +115,11 @@ safeTagsCustom safeName sanitizeAttr (TagOpen name attributes:tags)
safeTagsCustom n a (t:tags) = t : safeTagsCustom n a tags
safeTagName :: Text -> Bool
safeTagName tagname = tagname `member` sanitaryTags
safeTagName tagname = T.toLower tagname `member` sanitaryTags
safeAttribute :: (Text, Text) -> Bool
safeAttribute (name, value) = name `member` sanitaryAttributes &&
(name `notMember` uri_attributes || sanitaryURI value)
safeAttribute (name, value) = T.toLower name `member` sanitaryAttributes &&
(T.toLower name `notMember` uri_attributes || sanitaryURI value)
-- | low-level API if you have your own HTML parser. Used by safeTags.
sanitizeAttribute :: (Text, Text) -> Maybe (Text, Text)
@ -129,17 +129,17 @@ sanitizeAttribute ("style", value) =
sanitizeAttribute attr
| safeAttribute attr = Just attr
sanitizeAttribute attr@(name, value)
| name `member` fromList svg_attr_val_allows_ref
| T.toLower name `member` fromList svg_attr_val_allows_ref
, Right () <- parseOnly (unsafeSVGRef <* endOfInput) value
= Nothing
| name `member` fromList svg_attr_val_allows_ref
| T.toLower name `member` fromList svg_attr_val_allows_ref
= Just attr
sanitizeAttribute _ = Nothing
unsafeSVGRef :: Parser ()
unsafeSVGRef = do
skipMany space
string "url"
asciiCI "url"
skipMany space
char '('
skipMany space
@ -175,13 +175,13 @@ sanitaryAttributes = fromList (allowed_html_uri_attributes ++ acceptable_attribu
\\ (fromList svg_attr_val_allows_ref) -- extra unescaping not implemented
allowed_html_uri_attributes :: [Text]
allowed_html_uri_attributes = ["href", "src", "cite", "action", "longdesc"]
allowed_html_uri_attributes = map T.toLower ["href", "src", "cite", "action", "longdesc"]
uri_attributes :: Set Text
uri_attributes = fromList $ allowed_html_uri_attributes ++ ["xlink:href", "xml:base"]
uri_attributes = fromList $ allowed_html_uri_attributes ++ map T.toLower ["xlink:href", "xml:base"]
acceptable_elements :: [Text]
acceptable_elements = ["a", "abbr", "acronym", "address", "area",
acceptable_elements = map T.toLower ["a", "abbr", "acronym", "address", "area",
"article", "aside", "audio", "b", "big", "blockquote", "br", "button",
"canvas", "caption", "center", "cite", "code", "col", "colgroup",
"command", "datagrid", "datalist", "dd", "del", "details", "dfn",
@ -196,7 +196,7 @@ acceptable_elements = ["a", "abbr", "acronym", "address", "area",
"u", "ul", "var", "video"]
mathml_elements :: [Text]
mathml_elements = ["maction", "math", "merror", "mfrac", "mi",
mathml_elements = map T.toLower ["maction", "math", "merror", "mfrac", "mi",
"mmultiscripts", "mn", "mo", "mover", "mpadded", "mphantom",
"mprescripts", "mroot", "mrow", "mspace", "msqrt", "mstyle", "msub",
"msubsup", "msup", "mtable", "mtd", "mtext", "mtr", "munder",
@ -204,7 +204,7 @@ mathml_elements = ["maction", "math", "merror", "mfrac", "mi",
-- this should include altGlyph I think
svg_elements :: [Text]
svg_elements = ["a", "animate", "animateColor", "animateMotion",
svg_elements = map T.toLower ["a", "animate", "animateColor", "animateMotion",
"animateTransform", "clipPath", "circle", "defs", "desc", "ellipse",
"font-face", "font-face-name", "font-face-src", "g", "glyph", "hkern",
"linearGradient", "line", "marker", "metadata", "missing-glyph",
@ -212,7 +212,7 @@ svg_elements = ["a", "animate", "animateColor", "animateMotion",
"set", "stop", "svg", "switch", "text", "title", "tspan", "use"]
acceptable_attributes :: [Text]
acceptable_attributes = ["abbr", "accept", "accept-charset", "accesskey",
acceptable_attributes = map T.toLower ["abbr", "accept", "accept-charset", "accesskey",
"align", "alt", "autocomplete", "autofocus", "axis",
"background", "balance", "bgcolor", "bgproperties", "border",
"bordercolor", "bordercolordark", "bordercolorlight", "bottompadding",
@ -240,13 +240,13 @@ acceptable_attributes = ["abbr", "accept", "accept-charset", "accesskey",
"width", "wrap", "xml:lang"]
acceptable_protocols :: [String]
acceptable_protocols = [ "ed2k", "ftp", "http", "https", "irc",
acceptable_protocols = map (map toLower) [ "ed2k", "ftp", "http", "https", "irc",
"mailto", "news", "gopher", "nntp", "telnet", "webcal",
"xmpp", "callto", "feed", "urn", "aim", "rsync", "tag",
"ssh", "sftp", "rtsp", "afs" ]
mathml_attributes :: [Text]
mathml_attributes = ["actiontype", "align", "columnalign", "columnalign",
mathml_attributes = map T.toLower ["actiontype", "align", "columnalign", "columnalign",
"columnalign", "columnlines", "columnspacing", "columnspan", "depth",
"display", "displaystyle", "equalcolumns", "equalrows", "fence",
"fontstyle", "fontweight", "frame", "height", "linethickness", "lspace",
@ -257,7 +257,7 @@ mathml_attributes = ["actiontype", "align", "columnalign", "columnalign",
"xlink:type", "xmlns", "xmlns:xlink"]
svg_attributes :: [Text]
svg_attributes = ["accent-height", "accumulate", "additive", "alphabetic",
svg_attributes = map T.toLower ["accent-height", "accumulate", "additive", "alphabetic",
"arabic-form", "ascent", "attributeName", "attributeType",
"baseProfile", "bbox", "begin", "by", "calcMode", "cap-height",
"class", "clip-path", "color", "color-rendering", "content", "cx",
@ -288,12 +288,12 @@ svg_attributes = ["accent-height", "accumulate", "additive", "alphabetic",
-- the values for these need to be escaped
svg_attr_val_allows_ref :: [Text]
svg_attr_val_allows_ref = ["clip-path", "color-profile", "cursor", "fill",
svg_attr_val_allows_ref = map T.toLower ["clip-path", "color-profile", "cursor", "fill",
"filter", "marker", "marker-start", "marker-mid", "marker-end",
"mask", "stroke"]
svg_allow_local_href :: [Text]
svg_allow_local_href = ["altGlyph", "animate", "animateColor",
svg_allow_local_href = map T.toLower ["altGlyph", "animate", "animateColor",
"animateMotion", "animateTransform", "cursor", "feImage", "filter",
"linearGradient", "pattern", "radialGradient", "textpath", "tref",
"set", "use"]