export just sanitizeXSS

This commit is contained in:
Greg Weber 2010-09-26 08:09:49 -07:00
parent 5dbc1428a8
commit eb5b78d429
2 changed files with 6 additions and 13 deletions

View File

@ -21,7 +21,7 @@ TagSoup is used to parse the HTML, and it does a good job. However TagSoup does
<a href>, <a href>
<a></a>, <a/>
img and br tags will be output as a single self-closing tags. Other self-closing tags will be output as an open and closing pair. There are future updates to TagSoup planned to fix these cases.
img and br tags will be output as a single self-closing tags. Other self-closing tags will be output as an open and closing pair. So <img /> or <img><img> converts to <img />, and <a></a> or <a/> converts to <a></a>. There are future updates to TagSoup planned to fix these cases.
Integration
===========

View File

@ -1,17 +1,17 @@
module Text.HTML.SanitizeXSS where
module Text.HTML.SanitizeXSS (sanitizeXSS) where
import Text.HTML.TagSoup
import Data.Set (Set(), member, fromList)
import Data.Char ( toLower, isAscii )
import Data.Char ( toLower )
import Network.URI ( parseURIReference, URI (..),
isAllowedInURI, escapeURIString, unEscapeString, uriScheme )
import Codec.Binary.UTF8.String ( encodeString, decodeString )
isAllowedInURI, escapeURIString, uriScheme )
import Codec.Binary.UTF8.String ( encodeString )
sanitizeXSS :: String -> String
sanitizeXSS = renderTagsOptions renderOptions {
optMinimize = \x -> x `elem` ["br","img"]
optMinimize = \x -> x `elem` ["br","img"] -- <img><img> converts to <img />, <a/> converts to <a></a>
} . safeTags . parseTags
where
safeTags :: [Tag String] -> [Tag String]
@ -44,13 +44,6 @@ sanitaryURI u =
escapeURI :: String -> String
escapeURI = escapeURIString isAllowedInURI . encodeString
-- | Unescape unicode and some special characters in a URI, but
-- without introducing spaces.
unescapeURI :: String -> String
unescapeURI = escapeURIString (\c -> isAllowedInURI c || not (isAscii c)) .
decodeString . unEscapeString
safeURISchemes :: Set String
safeURISchemes = fromList [ "", "http:", "https:", "ftp:", "mailto:", "file:",