diff --git a/README.md b/README.md index 2532157..8d55c86 100644 --- a/README.md +++ b/README.md @@ -30,6 +30,10 @@ Michael Snoyman added the balanced tags functionality. Limitations =========== +Lowercase +--------- +All tag names and attribute names are converted to lower case as a matter of convenience. If you have a use case where this is undesirable let me know. + Balancing - sanitizeBalance --------------------------------- The goal of this function is to prevent your html from breaking when (unknown) html with unbalanced tags are placed inside it. I would expect it to work very well in practice and don't see a downside to using it unless you have an alternative approach. However, this function does not at all guarantee valid html. In fact, it is likely that the result of balancing will still be invalid HTML. There is no guarantee for how a browser will display invalid HTML, so there is no guarantee that this function will protect your HTML from being broken by a user's html. Other possible approaches would be to run the HTML through a library like libxml2 which understands HTML or to first render the HTML in a hidden iframe or hidden div at the bottom of the page so that it is isolated, and then use JavaScript to insert it into the page where you want it. diff --git a/Text/HTML/SanitizeXSS.hs b/Text/HTML/SanitizeXSS.hs index 18e4068..137ee14 100644 --- a/Text/HTML/SanitizeXSS.hs +++ b/Text/HTML/SanitizeXSS.hs @@ -2,6 +2,8 @@ module Text.HTML.SanitizeXSS ( sanitize , sanitizeBalance , sanitizeXSS + , filterTags + , safeTags ) where import Text.HTML.TagSoup @@ -15,21 +17,30 @@ import Codec.Binary.UTF8.String ( encodeString ) import qualified Data.Map as Map +{- +import Debug.Trace +debug :: (Show a) => a -> a +debug a = trace ("DEBUG: " ++ show a) a + -} + + -- | santize the html to prevent XSS attacks. See README.md for more details sanitize :: String -> String sanitize = sanitizeXSS --- alias of sanitize function +-- | alias of sanitize function sanitizeXSS :: String -> String -sanitizeXSS = renderTagsOptions renderOptions { - optMinimize = \x -> x `elem` ["br","img"] -- converts to , converts to - } . safeTags . parseTags +sanitizeXSS = filterTags safeTags --- same as sanitizeXSS but makes sure there are no lone closing tags. See README.md for more details +-- | same as sanitize but makes sure there are no lone closing tags. See README.md for more details sanitizeBalance :: String -> String -sanitizeBalance = renderTagsOptions renderOptions { +sanitizeBalance = filterTags (balance Map.empty . safeTags) + +-- | insert custom tag filtering. Don't forget to compose your filter with safeTags! +filterTags :: ([Tag String] -> [Tag String]) -> String -> String +filterTags f = renderTagsOptions renderOptions { optMinimize = \x -> x `elem` ["br","img"] -- converts to , converts to - } . balance Map.empty . safeTags . parseTags + } . f . canonicalizeTags . parseTags balance :: Map.Map String Int -> [Tag String] -> [Tag String] balance m [] = @@ -55,6 +66,7 @@ balance m (TagOpen name as : tags) = Just i -> Map.insert name (i + 1) m balance m (t:ts) = t : balance m ts +-- | Filters out any usafe tags and attributes. Use with filterTags to create a custom filter. safeTags :: [Tag String] -> [Tag String] safeTags [] = [] safeTags (t@(TagClose name):tags) diff --git a/test.hs b/test.hs index d94f3bc..6ff26c3 100644 --- a/test.hs +++ b/test.hs @@ -2,12 +2,20 @@ import Text.HTML.SanitizeXSS testHTML = " safeanchor

Unbalanced" -test actual expected = do - putStrLn $ "testing: " ++ testHTML - putStrLn $ if actual == expected then "pass" else "failure\n" ++ "\nexpected:" ++ (show expected) ++ "\nactual: " ++ (show actual) +test f actual expected = do + putStrLn $ "testing: " ++ actual + putStrLn $ if f actual == expected then "pass" else "failure\n" ++ "\nexpected:" ++ (show expected) ++ "\nactual: " ++ (show actual) main = do - test (sanitizeBalance testHTML) " safeanchor
Unbalanced
" - test (sanitize testHTML) " safeanchor
Unbalanced" + test sanitizeBalance testHTML " safeanchor
Unbalanced
" + test sanitize testHTML " safeanchor
Unbalanced" let testRelativeURI = "bar" - test (sanitize testRelativeURI) testRelativeURI + test sanitize testRelativeURI testRelativeURI + let protocol_hack = "" + test sanitize protocol_hack "" + let object_hack = "" + test sanitize object_hack "" + let embed_hack = "" + test sanitize embed_hack "" + let ucase_image_hack = "" + test sanitize ucase_image_hack "" diff --git a/xss-sanitize.cabal b/xss-sanitize.cabal index 47b2350..6da1237 100644 --- a/xss-sanitize.cabal +++ b/xss-sanitize.cabal @@ -1,5 +1,5 @@ name: xss-sanitize -version: 0.2.5 +version: 0.2.6 license: BSD3 license-file: LICENSE author: Greg Weber