Fix balancing algorithm.
I tried to keep the behavior as close to what was there previously as possible, but I'm not convinced it's doing the best thing in all cases. Ideally, we'd just follow the HTML5 parsing spec here.
This commit is contained in:
parent
6719157355
commit
068bebc58a
@ -32,7 +32,6 @@ import Network.URI ( parseURIReference, URI (..),
|
|||||||
isAllowedInURI, escapeURIString, uriScheme )
|
isAllowedInURI, escapeURIString, uriScheme )
|
||||||
import Codec.Binary.UTF8.String ( encodeString )
|
import Codec.Binary.UTF8.String ( encodeString )
|
||||||
|
|
||||||
import qualified Data.Map as Map
|
|
||||||
import Data.Maybe (catMaybes)
|
import Data.Maybe (catMaybes)
|
||||||
|
|
||||||
|
|
||||||
@ -51,7 +50,7 @@ sanitizeBalance = filterTags (balanceTags . safeTags)
|
|||||||
|
|
||||||
-- | Filter which makes sure the tags are balanced. Use with 'filterTags' and 'safeTags' to create a custom filter.
|
-- | Filter which makes sure the tags are balanced. Use with 'filterTags' and 'safeTags' to create a custom filter.
|
||||||
balanceTags :: [Tag Text] -> [Tag Text]
|
balanceTags :: [Tag Text] -> [Tag Text]
|
||||||
balanceTags = balance Map.empty
|
balanceTags = balance []
|
||||||
|
|
||||||
-- | Parse the given text to a list of tags, apply the given filtering function, and render back to HTML.
|
-- | Parse the given text to a list of tags, apply the given filtering function, and render back to HTML.
|
||||||
-- You can insert your own custom filtering but make sure you compose your filtering function with 'safeTags'!
|
-- You can insert your own custom filtering but make sure you compose your filtering function with 'safeTags'!
|
||||||
@ -63,29 +62,16 @@ filterTags f = renderTagsOptions renderOptions {
|
|||||||
voidElems :: Set T.Text
|
voidElems :: Set T.Text
|
||||||
voidElems = fromAscList $ T.words $ T.pack "area base br col command embed hr img input keygen link meta param source track wbr"
|
voidElems = fromAscList $ T.words $ T.pack "area base br col command embed hr img input keygen link meta param source track wbr"
|
||||||
|
|
||||||
balance :: Map.Map Text Int -> [Tag Text] -> [Tag Text]
|
balance :: [Text] -- ^ unclosed tags
|
||||||
balance m [] =
|
-> [Tag Text] -> [Tag Text]
|
||||||
concatMap go $ Map.toList m
|
balance unclosed [] = map TagClose $ filter (`notMember` voidElems) unclosed
|
||||||
where
|
balance (x:xs) tags'@(TagClose name:tags)
|
||||||
go (name, i)
|
| x == name = TagClose name : balance xs tags
|
||||||
| noClosing name = []
|
| x `member` voidElems = balance xs tags'
|
||||||
| otherwise = replicate i $ TagClose name
|
| otherwise = TagOpen name [] : TagClose name : balance (x:xs) tags
|
||||||
noClosing = flip member voidElems
|
balance unclosed (TagOpen name as : tags) =
|
||||||
balance m (t@(TagClose name):tags) =
|
TagOpen name as : balance (name : unclosed) tags
|
||||||
case Map.lookup name m of
|
balance unclosed (t:ts) = t : balance unclosed ts
|
||||||
Nothing -> TagOpen name [] : TagClose name : balance m tags
|
|
||||||
Just i ->
|
|
||||||
let m' = if i == 1
|
|
||||||
then Map.delete name m
|
|
||||||
else Map.insert name (i - 1) m
|
|
||||||
in t : balance m' tags
|
|
||||||
balance m (TagOpen name as : tags) =
|
|
||||||
TagOpen name as : balance m' tags
|
|
||||||
where
|
|
||||||
m' = case Map.lookup name m of
|
|
||||||
Nothing -> Map.insert name 1 m
|
|
||||||
Just i -> Map.insert name (i + 1) m
|
|
||||||
balance m (t:ts) = t : balance m ts
|
|
||||||
|
|
||||||
-- | Filters out any usafe tags and attributes. Use with filterTags to create a custom filter.
|
-- | Filters out any usafe tags and attributes. Use with filterTags to create a custom filter.
|
||||||
safeTags :: [Tag Text] -> [Tag Text]
|
safeTags :: [Tag Text] -> [Tag Text]
|
||||||
|
|||||||
@ -86,4 +86,4 @@ main = hspec $ do
|
|||||||
it "removes closing voids" $ do
|
it "removes closing voids" $ do
|
||||||
sanitizedB "<img></img>" "<img />"
|
sanitizedB "<img></img>" "<img />"
|
||||||
it "interleaved" $
|
it "interleaved" $
|
||||||
sanitizedB "<i>hello<b>world</i>" "<i>hello<b>world</b></i>"
|
sanitizedB "<i>hello<b>world</i>" "<i>hello<b>world<i></i></b></i>"
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
name: xss-sanitize
|
name: xss-sanitize
|
||||||
version: 0.3.5.3
|
version: 0.3.5.4
|
||||||
license: BSD3
|
license: BSD3
|
||||||
license-file: LICENSE
|
license-file: LICENSE
|
||||||
author: Greg Weber <greg@gregweber.info>
|
author: Greg Weber <greg@gregweber.info>
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user