sanitizing implemented!
This commit is contained in:
parent
604085955e
commit
548aa9a8aa
@ -1,39 +1,42 @@
|
|||||||
module SanitizeXSS where
|
module Text.HTML.SanitizeXSS where
|
||||||
|
|
||||||
import Data.Set
|
import Data.Set (Set(..), member, notMember, fromList)
|
||||||
import Network.URI ( parseURIReference, URI (..) )
|
import Network.URI ( parseURIReference, URI (..) )
|
||||||
|
|
||||||
import Data.Char ( toLower, isLower, isUpper, isAlpha, isAscii,
|
import Data.Char ( toLower, isLower, isUpper, isAlpha, isAscii,
|
||||||
isLetter, isDigit )
|
isLetter, isDigit )
|
||||||
|
|
||||||
import Network.URI ( isAllowedInURI, escapeURIString, unEscapeString )
|
import Network.URI ( isAllowedInURI, escapeURIString, unEscapeString, uriScheme )
|
||||||
import Codec.Binary.UTF8.String ( encodeString, decodeString )
|
import Codec.Binary.UTF8.String ( encodeString, decodeString )
|
||||||
|
|
||||||
|
import Text.HTML.TagSoup
|
||||||
|
|
||||||
sanitizeXSS :: String -> String
|
sanitizeXSS :: String -> String
|
||||||
sanitizeXSS unsafeHtml = error
|
sanitizeXSS = renderTags . safeTags . parseTagsOptions parseOptions { optTagPosition = True }
|
||||||
|
where
|
||||||
|
safeTags :: [Tag String] -> [Tag String]
|
||||||
|
safeTags [] = []
|
||||||
|
safeTags (t@(TagClose name):tags) | safeTagName name = t:(safeTags tags)
|
||||||
|
| otherwise = safeTags tags
|
||||||
|
safeTags (TagOpen name attributes:tags)
|
||||||
|
| safeTagName name = TagOpen name (filter safeAttribute attributes) : safeTags tags
|
||||||
|
| otherwise = safeTags tags
|
||||||
|
safeTags (t:tags) = t:safeTags tags
|
||||||
|
|
||||||
unSafeTag tag = tag `notElem` sanitaryTags
|
safeTagName :: String -> Bool
|
||||||
unSafeAttributes tag = (attr `notElem` sanitaryAttributes ||
|
safeTagName tagname = tagname `member` sanitaryTags
|
||||||
(attr `elem` ["href","src"] && unsanitaryURI val))
|
|
||||||
|
|
||||||
-- | Returns @True@ if the specified URI is potentially a security risk.
|
safeAttribute :: (String, String) -> Bool
|
||||||
unsanitaryURI :: String -> Bool
|
safeAttribute (name, value) = name `member` sanitaryAttributes &&
|
||||||
unsanitaryURI u =
|
(name `notElem` ["href","src"] || sanitaryURI value)
|
||||||
let safeURISchemes = [ "", "http:", "https:", "ftp:", "mailto:", "file:",
|
|
||||||
"telnet:", "gopher:", "aaa:", "aaas:", "acap:", "cap:", "cid:",
|
|
||||||
"crid:", "dav:", "dict:", "dns:", "fax:", "go:", "h323:", "im:",
|
-- | Returns @True@ if the specified URI is not a potential security risk.
|
||||||
"imap:", "ldap:", "mid:", "news:", "nfs:", "nntp:", "pop:",
|
sanitaryURI :: String -> Bool
|
||||||
"pres:", "sip:", "sips:", "snmp:", "tel:", "urn:", "wais:",
|
sanitaryURI u =
|
||||||
"xmpp:", "z39.50r:", "z39.50s:", "aim:", "callto:", "cvs:",
|
case parseURIReference (escapeURI u) of
|
||||||
"ed2k:", "feed:", "fish:", "gg:", "irc:", "ircs:", "lastfm:",
|
Just p -> (map toLower $ uriScheme p) `member` safeURISchemes
|
||||||
"ldaps:", "magnet:", "mms:", "msnim:", "notes:", "rsync:",
|
Nothing -> False
|
||||||
"secondlife:", "skype:", "ssh:", "sftp:", "smb:", "sms:",
|
|
||||||
"snews:", "webcal:", "ymsgr:"]
|
|
||||||
in case parseURIReference (escapeURI u) of
|
|
||||||
-- uriScheme member of URI record data structure
|
|
||||||
Just p -> (map toLower $ uriScheme p) `notElem` safeURISchemes
|
|
||||||
Nothing -> True
|
|
||||||
|
|
||||||
|
|
||||||
-- | Escape unicode characters in a URI. Characters that are
|
-- | Escape unicode characters in a URI. Characters that are
|
||||||
@ -49,9 +52,20 @@ unescapeURI = escapeURIString (\c -> isAllowedInURI c || not (isAscii c)) .
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
safeURISchemes :: Set String
|
||||||
|
safeURISchemes = fromList [ "", "http:", "https:", "ftp:", "mailto:", "file:",
|
||||||
|
"telnet:", "gopher:", "aaa:", "aaas:", "acap:", "cap:", "cid:",
|
||||||
|
"crid:", "dav:", "dict:", "dns:", "fax:", "go:", "h323:", "im:",
|
||||||
|
"imap:", "ldap:", "mid:", "news:", "nfs:", "nntp:", "pop:",
|
||||||
|
"pres:", "sip:", "sips:", "snmp:", "tel:", "urn:", "wais:",
|
||||||
|
"xmpp:", "z39.50r:", "z39.50s:", "aim:", "callto:", "cvs:",
|
||||||
|
"ed2k:", "feed:", "fish:", "gg:", "irc:", "ircs:", "lastfm:",
|
||||||
|
"ldaps:", "magnet:", "mms:", "msnim:", "notes:", "rsync:",
|
||||||
|
"secondlife:", "skype:", "ssh:", "sftp:", "smb:", "sms:",
|
||||||
|
"snews:", "webcal:", "ymsgr:"]
|
||||||
|
|
||||||
sanitaryTags :: [[Char]]
|
sanitaryTags :: Set String
|
||||||
sanitaryTags = ["a", "abbr", "acronym", "address", "area", "b", "big",
|
sanitaryTags = fromList ["a", "abbr", "acronym", "address", "area", "b", "big",
|
||||||
"blockquote", "br", "button", "caption", "center",
|
"blockquote", "br", "button", "caption", "center",
|
||||||
"cite", "code", "col", "colgroup", "dd", "del", "dfn",
|
"cite", "code", "col", "colgroup", "dd", "del", "dfn",
|
||||||
"dir", "div", "dl", "dt", "em", "fieldset", "font",
|
"dir", "div", "dl", "dt", "em", "fieldset", "font",
|
||||||
@ -63,8 +77,8 @@ sanitaryTags = ["a", "abbr", "acronym", "address", "area", "b", "big",
|
|||||||
"td", "textarea", "tfoot", "th", "thead", "tr", "tt",
|
"td", "textarea", "tfoot", "th", "thead", "tr", "tt",
|
||||||
"u", "ul", "var"]
|
"u", "ul", "var"]
|
||||||
|
|
||||||
sanitaryAttributes :: [[Char]]
|
sanitaryAttributes :: Set String
|
||||||
sanitaryAttributes = ["abbr", "accept", "accept-charset",
|
sanitaryAttributes = fromList ["abbr", "accept", "accept-charset",
|
||||||
"accesskey", "action", "align", "alt", "axis",
|
"accesskey", "action", "align", "alt", "axis",
|
||||||
"border", "cellpadding", "cellspacing", "char",
|
"border", "cellpadding", "cellspacing", "char",
|
||||||
"charoff", "charset", "checked", "cite", "class",
|
"charoff", "charset", "checked", "cite", "class",
|
||||||
7
test.hs
Normal file
7
test.hs
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
import Text.HTML.SanitizeXSS
|
||||||
|
|
||||||
|
main = do
|
||||||
|
let test = " <a href='unsafe://hack.com'>anchor</a> <img src='evil://evil.com' /> </foo> "
|
||||||
|
let result = (sanitizeXSS test)
|
||||||
|
let expected = " <a>anchor</a> <img /> "
|
||||||
|
putStrLn $ if result == expected then "pass" else "failure parsing:" ++ (show test) ++ "\nexpected:" ++ (show expected) ++ "\nactual: " ++ (show result)
|
||||||
Loading…
Reference in New Issue
Block a user