upper case tags and attributes were rejected

lower case them all. Goes against my philosophy of trying to not modify.
But it is easier, and uppercase is completely deprecated.
This commit is contained in:
Greg Weber 2011-03-04 16:31:53 -08:00
parent 6c9dcd775f
commit 193800f075
4 changed files with 38 additions and 14 deletions

View File

@ -30,6 +30,10 @@ Michael Snoyman added the balanced tags functionality.
Limitations
===========
Lowercase
---------
All tag names and attribute names are converted to lower case as a matter of convenience. If you have a use case where this is undesirable let me know.
Balancing - sanitizeBalance
---------------------------------
The goal of this function is to prevent your html from breaking when (unknown) html with unbalanced tags are placed inside it. I would expect it to work very well in practice and don't see a downside to using it unless you have an alternative approach. However, this function does not at all guarantee valid html. In fact, it is likely that the result of balancing will still be invalid HTML. There is no guarantee for how a browser will display invalid HTML, so there is no guarantee that this function will protect your HTML from being broken by a user's html. Other possible approaches would be to run the HTML through a library like libxml2 which understands HTML or to first render the HTML in a hidden iframe or hidden div at the bottom of the page so that it is isolated, and then use JavaScript to insert it into the page where you want it.

View File

@ -2,6 +2,8 @@ module Text.HTML.SanitizeXSS
( sanitize
, sanitizeBalance
, sanitizeXSS
, filterTags
, safeTags
) where
import Text.HTML.TagSoup
@ -15,21 +17,30 @@ import Codec.Binary.UTF8.String ( encodeString )
import qualified Data.Map as Map
{-
import Debug.Trace
debug :: (Show a) => a -> a
debug a = trace ("DEBUG: " ++ show a) a
-}
-- | santize the html to prevent XSS attacks. See README.md <http://github.com/gregwebs/haskell-xss-sanitize> for more details
sanitize :: String -> String
sanitize = sanitizeXSS
-- alias of sanitize function
-- | alias of sanitize function
sanitizeXSS :: String -> String
sanitizeXSS = renderTagsOptions renderOptions {
optMinimize = \x -> x `elem` ["br","img"] -- <img><img> converts to <img />, <a/> converts to <a></a>
} . safeTags . parseTags
sanitizeXSS = filterTags safeTags
-- same as sanitizeXSS but makes sure there are no lone closing tags. See README.md <http://github.com/gregwebs/haskell-xss-sanitize> for more details
-- | same as sanitize but makes sure there are no lone closing tags. See README.md <http://github.com/gregwebs/haskell-xss-sanitize> for more details
sanitizeBalance :: String -> String
sanitizeBalance = renderTagsOptions renderOptions {
sanitizeBalance = filterTags (balance Map.empty . safeTags)
-- | insert custom tag filtering. Don't forget to compose your filter with safeTags!
filterTags :: ([Tag String] -> [Tag String]) -> String -> String
filterTags f = renderTagsOptions renderOptions {
optMinimize = \x -> x `elem` ["br","img"] -- <img><img> converts to <img />, <a/> converts to <a></a>
} . balance Map.empty . safeTags . parseTags
} . f . canonicalizeTags . parseTags
balance :: Map.Map String Int -> [Tag String] -> [Tag String]
balance m [] =
@ -55,6 +66,7 @@ balance m (TagOpen name as : tags) =
Just i -> Map.insert name (i + 1) m
balance m (t:ts) = t : balance m ts
-- | Filters out any usafe tags and attributes. Use with filterTags to create a custom filter.
safeTags :: [Tag String] -> [Tag String]
safeTags [] = []
safeTags (t@(TagClose name):tags)

20
test.hs
View File

@ -2,12 +2,20 @@ import Text.HTML.SanitizeXSS
testHTML = " <a href='http://safe.com'>safe</a><a href='unsafe://hack.com'>anchor</a> <img src='evil://evil.com' /> <unsafe></foo> <bar /> <br></br> <b>Unbalanced</div><img src='http://safe.com'>"
test actual expected = do
putStrLn $ "testing: " ++ testHTML
putStrLn $ if actual == expected then "pass" else "failure\n" ++ "\nexpected:" ++ (show expected) ++ "\nactual: " ++ (show actual)
test f actual expected = do
putStrLn $ "testing: " ++ actual
putStrLn $ if f actual == expected then "pass" else "failure\n" ++ "\nexpected:" ++ (show expected) ++ "\nactual: " ++ (show actual)
main = do
test (sanitizeBalance testHTML) " <a href=\"http://safe.com\">safe</a><a>anchor</a> <img /> <br /> <b>Unbalanced<div></div><img src=\"http://safe.com\"></b>"
test (sanitize testHTML) " <a href=\"http://safe.com\">safe</a><a>anchor</a> <img /> <br /> <b>Unbalanced</div><img src=\"http://safe.com\">"
test sanitizeBalance testHTML " <a href=\"http://safe.com\">safe</a><a>anchor</a> <img /> <br /> <b>Unbalanced<div></div><img src=\"http://safe.com\"></b>"
test sanitize testHTML " <a href=\"http://safe.com\">safe</a><a>anchor</a> <img /> <br /> <b>Unbalanced</div><img src=\"http://safe.com\">"
let testRelativeURI = "<a href=\"foo\">bar</a>"
test (sanitize testRelativeURI) testRelativeURI
test sanitize testRelativeURI testRelativeURI
let protocol_hack = "<script src=//ha.ckers.org/.j></script>"
test sanitize protocol_hack ""
let object_hack = "<object classid=clsid:ae24fdae-03c6-11d1-8b76-0080c744f389><param name=url value=javascript:alert('XSS')></object>"
test sanitize object_hack ""
let embed_hack = "<embed src=\"data:image/svg+xml;base64,PHN2ZyB4bWxuczpzdmc9Imh0dH A6Ly93d3cudzMub3JnLzIwMDAvc3ZnIiB4bWxucz0iaHR0cDovL3d3dy53My5vcmcv MjAwMC9zdmciIHhtbG5zOnhsaW5rPSJodHRwOi8vd3d3LnczLm9yZy8xOTk5L3hs aW5rIiB2ZXJzaW9uPSIxLjAiIHg9IjAiIHk9IjAiIHdpZHRoPSIxOTQiIGhlaWdodD0iMjAw IiBpZD0ieHNzIj48c2NyaXB0IHR5cGU9InRleHQvZWNtYXNjcmlwdCI+YWxlcnQoIlh TUyIpOzwvc2NyaXB0Pjwvc3ZnPg==\" type=\"image/svg+xml\" AllowScriptAccess=\"always\"></embed>"
test sanitize embed_hack ""
let ucase_image_hack = "<IMG src=javascript:alert('XSS') />"
test sanitize ucase_image_hack "<img />"

View File

@ -1,5 +1,5 @@
name: xss-sanitize
version: 0.2.5
version: 0.2.6
license: BSD3
license-file: LICENSE
author: Greg Weber <greg@gregweber.info>