Fix balancing algorithm.

I tried to keep the behavior as close to what was there previously as
possible, but I'm not convinced it's doing the best thing in all cases.
Ideally, we'd just follow the HTML5 parsing spec here.
This commit is contained in:
Michael Snoyman 2014-08-18 10:20:59 +03:00
parent 6719157355
commit 068bebc58a
3 changed files with 13 additions and 27 deletions

View File

@ -32,7 +32,6 @@ import Network.URI ( parseURIReference, URI (..),
isAllowedInURI, escapeURIString, uriScheme ) isAllowedInURI, escapeURIString, uriScheme )
import Codec.Binary.UTF8.String ( encodeString ) import Codec.Binary.UTF8.String ( encodeString )
import qualified Data.Map as Map
import Data.Maybe (catMaybes) import Data.Maybe (catMaybes)
@ -51,7 +50,7 @@ sanitizeBalance = filterTags (balanceTags . safeTags)
-- | Filter which makes sure the tags are balanced. Use with 'filterTags' and 'safeTags' to create a custom filter. -- | Filter which makes sure the tags are balanced. Use with 'filterTags' and 'safeTags' to create a custom filter.
balanceTags :: [Tag Text] -> [Tag Text] balanceTags :: [Tag Text] -> [Tag Text]
balanceTags = balance Map.empty balanceTags = balance []
-- | Parse the given text to a list of tags, apply the given filtering function, and render back to HTML. -- | Parse the given text to a list of tags, apply the given filtering function, and render back to HTML.
-- You can insert your own custom filtering but make sure you compose your filtering function with 'safeTags'! -- You can insert your own custom filtering but make sure you compose your filtering function with 'safeTags'!
@ -63,29 +62,16 @@ filterTags f = renderTagsOptions renderOptions {
voidElems :: Set T.Text voidElems :: Set T.Text
voidElems = fromAscList $ T.words $ T.pack "area base br col command embed hr img input keygen link meta param source track wbr" voidElems = fromAscList $ T.words $ T.pack "area base br col command embed hr img input keygen link meta param source track wbr"
balance :: Map.Map Text Int -> [Tag Text] -> [Tag Text] balance :: [Text] -- ^ unclosed tags
balance m [] = -> [Tag Text] -> [Tag Text]
concatMap go $ Map.toList m balance unclosed [] = map TagClose $ filter (`notMember` voidElems) unclosed
where balance (x:xs) tags'@(TagClose name:tags)
go (name, i) | x == name = TagClose name : balance xs tags
| noClosing name = [] | x `member` voidElems = balance xs tags'
| otherwise = replicate i $ TagClose name | otherwise = TagOpen name [] : TagClose name : balance (x:xs) tags
noClosing = flip member voidElems balance unclosed (TagOpen name as : tags) =
balance m (t@(TagClose name):tags) = TagOpen name as : balance (name : unclosed) tags
case Map.lookup name m of balance unclosed (t:ts) = t : balance unclosed ts
Nothing -> TagOpen name [] : TagClose name : balance m tags
Just i ->
let m' = if i == 1
then Map.delete name m
else Map.insert name (i - 1) m
in t : balance m' tags
balance m (TagOpen name as : tags) =
TagOpen name as : balance m' tags
where
m' = case Map.lookup name m of
Nothing -> Map.insert name 1 m
Just i -> Map.insert name (i + 1) m
balance m (t:ts) = t : balance m ts
-- | Filters out any usafe tags and attributes. Use with filterTags to create a custom filter. -- | Filters out any usafe tags and attributes. Use with filterTags to create a custom filter.
safeTags :: [Tag Text] -> [Tag Text] safeTags :: [Tag Text] -> [Tag Text]

View File

@ -86,4 +86,4 @@ main = hspec $ do
it "removes closing voids" $ do it "removes closing voids" $ do
sanitizedB "<img></img>" "<img />" sanitizedB "<img></img>" "<img />"
it "interleaved" $ it "interleaved" $
sanitizedB "<i>hello<b>world</i>" "<i>hello<b>world</b></i>" sanitizedB "<i>hello<b>world</i>" "<i>hello<b>world<i></i></b></i>"

View File

@ -1,5 +1,5 @@
name: xss-sanitize name: xss-sanitize
version: 0.3.5.3 version: 0.3.5.4
license: BSD3 license: BSD3
license-file: LICENSE license-file: LICENSE
author: Greg Weber <greg@gregweber.info> author: Greg Weber <greg@gregweber.info>