Compare commits

...

18 Commits

Author SHA1 Message Date
Michael Snoyman
be213a84a4
Merge pull request #18 from zoominsoftware/customize
Allow customized whitelists.
2018-07-02 08:51:35 +03:00
Yitzchak Gale
86d83508f2 Version bump to 0.3.6. 2018-07-01 19:51:02 +03:00
Yitzchak Gale
787b7f060f Improve function layout. 2018-07-01 14:54:41 +03:00
Yitzchak Gale
ea3047902a Improve haddocks. 2018-07-01 14:40:50 +03:00
Yitzchak Gale
c77292bf51 Merge. 2018-07-01 13:07:07 +03:00
Yitzchak Gale
8f453e3dd5 Change name to safeTagsCustom, minor bump version to 0.3.5.8. 2018-07-01 12:59:58 +03:00
Michael Snoyman
2df057fb65
Badges 2018-06-28 19:04:03 +03:00
Michael Snoyman
75cf17fd50
Add CI 2018-06-28 12:36:50 +03:00
Michael Snoyman
e1581e4006
hpack-ify 2018-06-28 12:36:06 +03:00
Michael Snoyman
5ae1267578
Drop older network support 2018-06-28 12:34:08 +03:00
Michael Snoyman
113ebdd321
src subdir 2018-06-28 12:32:40 +03:00
Michael Snoyman
69c8c83ab0
Stack 2018-06-28 12:30:07 +03:00
Yitzchak Gale
101690ff7b Tests for customized white list. 2018-06-26 22:27:51 +03:00
Yitzchak Gale
b1c8a1ceeb Use custom safe tags also for continuation. 2018-06-26 21:59:25 +03:00
Yitzchak Gale
cb252f660f Use custom safe tags also for close tags. 2018-06-26 20:37:56 +03:00
Yitzchak Gale
bbd7af410d Allow customized whitelists. 2018-06-26 19:12:02 +03:00
Greg Weber
937f41344a fix sanitizer.py pointer 2017-04-09 20:07:39 -07:00
Greg Weber
9a9101f658 fix LICENSE 2016-04-22 05:36:19 -07:00
10 changed files with 380 additions and 74 deletions

2
.gitignore vendored
View File

@ -3,3 +3,5 @@
dist
.cabal-sandbox/
cabal.sandbox.config
.stack-work/
xss-sanitize.cabal

237
.travis.yml Normal file
View File

@ -0,0 +1,237 @@
# This is the complex Travis configuration, which is intended for use
# on open source libraries which need compatibility across multiple GHC
# versions, must work with cabal-install, and should be
# cross-platform. For more information and other options, see:
#
# https://docs.haskellstack.org/en/stable/travis_ci/
#
# Copy these contents into the root directory of your Github project in a file
# named .travis.yml
# Use new container infrastructure to enable caching
sudo: false
# Do not choose a language; we provide our own build tools.
language: generic
# Caching so the next build will be fast too.
cache:
directories:
- $HOME/.ghc
- $HOME/.cabal
- $HOME/.stack
- $TRAVIS_BUILD_DIR/.stack-work
# The different configurations we want to test. We have BUILD=cabal which uses
# cabal-install, and BUILD=stack which uses Stack. More documentation on each
# of those below.
#
# We set the compiler values here to tell Travis to use a different
# cache file per set of arguments.
#
# If you need to have different apt packages for each combination in the
# matrix, you can use a line such as:
# addons: {apt: {packages: [libfcgi-dev,libgmp-dev]}}
matrix:
include:
# We grab the appropriate GHC and cabal-install versions from hvr's PPA. See:
# https://github.com/hvr/multi-ghc-travis
#- env: BUILD=cabal GHCVER=7.0.4 CABALVER=1.16 HAPPYVER=1.19.5 ALEXVER=3.1.7
# compiler: ": #GHC 7.0.4"
# addons: {apt: {packages: [cabal-install-1.16,ghc-7.0.4,happy-1.19.5,alex-3.1.7], sources: [hvr-ghc]}}
#- env: BUILD=cabal GHCVER=7.2.2 CABALVER=1.16 HAPPYVER=1.19.5 ALEXVER=3.1.7
# compiler: ": #GHC 7.2.2"
# addons: {apt: {packages: [cabal-install-1.16,ghc-7.2.2,happy-1.19.5,alex-3.1.7], sources: [hvr-ghc]}}
#- env: BUILD=cabal GHCVER=7.4.2 CABALVER=1.16 HAPPYVER=1.19.5 ALEXVER=3.1.7
# compiler: ": #GHC 7.4.2"
# addons: {apt: {packages: [cabal-install-1.16,ghc-7.4.2,happy-1.19.5,alex-3.1.7], sources: [hvr-ghc]}}
#- env: BUILD=cabal GHCVER=7.6.3 CABALVER=1.16 HAPPYVER=1.19.5 ALEXVER=3.1.7
# compiler: ": #GHC 7.6.3"
# addons: {apt: {packages: [cabal-install-1.16,ghc-7.6.3,happy-1.19.5,alex-3.1.7], sources: [hvr-ghc]}}
#- env: BUILD=cabal GHCVER=7.8.4 CABALVER=1.18 HAPPYVER=1.19.5 ALEXVER=3.1.7
# compiler: ": #GHC 7.8.4"
# addons: {apt: {packages: [cabal-install-1.18,ghc-7.8.4,happy-1.19.5,alex-3.1.7], sources: [hvr-ghc]}}
#- env: BUILD=cabal GHCVER=7.10.3 CABALVER=1.22 HAPPYVER=1.19.5 ALEXVER=3.1.7
# compiler: ": #GHC 7.10.3"
# addons: {apt: {packages: [cabal-install-1.22,ghc-7.10.3,happy-1.19.5,alex-3.1.7], sources: [hvr-ghc]}}
- env: BUILD=cabal GHCVER=8.0.2 CABALVER=1.24 HAPPYVER=1.19.5 ALEXVER=3.1.7
compiler: ": #GHC 8.0.2"
addons: {apt: {packages: [cabal-install-1.24,ghc-8.0.2,happy-1.19.5,alex-3.1.7], sources: [hvr-ghc]}}
- env: BUILD=cabal GHCVER=8.2.2 CABALVER=2.0 HAPPYVER=1.19.5 ALEXVER=3.1.7
compiler: ": #GHC 8.2.2"
addons: {apt: {packages: [cabal-install-2.0,ghc-8.2.2,happy-1.19.5,alex-3.1.7], sources: [hvr-ghc]}}
- env: BUILD=cabal GHCVER=8.4.3 CABALVER=2.2 HAPPYVER=1.19.5 ALEXVER=3.1.7
compiler: ": #GHC 8.4.3"
addons: {apt: {packages: [cabal-install-2.2,ghc-8.4.3,happy-1.19.5,alex-3.1.7], sources: [hvr-ghc]}}
# Build with the newest GHC and cabal-install. This is an accepted failure,
# see below.
- env: BUILD=cabal GHCVER=head CABALVER=head HAPPYVER=1.19.5 ALEXVER=3.1.7
compiler: ": #GHC HEAD"
addons: {apt: {packages: [cabal-install-head,ghc-head,happy-1.19.5,alex-3.1.7], sources: [hvr-ghc]}}
# The Stack builds. We can pass in arbitrary Stack arguments via the ARGS
# variable, such as using --stack-yaml to point to a different file.
- env: BUILD=stack ARGS=""
compiler: ": #stack default"
addons: {apt: {packages: [libgmp-dev]}}
#- env: BUILD=stack ARGS="--resolver lts-2"
# compiler: ": #stack 7.8.4"
# addons: {apt: {packages: [libgmp-dev]}}
#- env: BUILD=stack ARGS="--resolver lts-3"
# compiler: ": #stack 7.10.2"
# addons: {apt: {packages: [libgmp-dev]}}
#- env: BUILD=stack ARGS="--resolver lts-6"
# compiler: ": #stack 7.10.3"
# addons: {apt: {packages: [libgmp-dev]}}
#- env: BUILD=stack ARGS="--resolver lts-7"
# compiler: ": #stack 8.0.1"
# addons: {apt: {packages: [libgmp-dev]}}
- env: BUILD=stack ARGS="--resolver lts-9"
compiler: ": #stack 8.0.2"
addons: {apt: {packages: [libgmp-dev]}}
- env: BUILD=stack ARGS="--resolver lts-11"
compiler: ": #stack 8.2.2"
addons: {apt: {packages: [libgmp-dev]}}
# Nightly builds are allowed to fail
- env: BUILD=stack ARGS="--resolver nightly"
compiler: ": #stack nightly"
addons: {apt: {packages: [libgmp-dev]}}
# Build on macOS in addition to Linux
- env: BUILD=stack ARGS=""
compiler: ": #stack default osx"
os: osx
# Travis includes an macOS which is incompatible with GHC 7.8.4
#- env: BUILD=stack ARGS="--resolver lts-2"
# compiler: ": #stack 7.8.4 osx"
# os: osx
#- env: BUILD=stack ARGS="--resolver lts-3"
# compiler: ": #stack 7.10.2 osx"
# os: osx
#- env: BUILD=stack ARGS="--resolver lts-6"
# compiler: ": #stack 7.10.3 osx"
# os: osx
#- env: BUILD=stack ARGS="--resolver lts-7"
# compiler: ": #stack 8.0.1 osx"
# os: osx
- env: BUILD=stack ARGS="--resolver lts-9"
compiler: ": #stack 8.0.2 osx"
os: osx
- env: BUILD=stack ARGS="--resolver lts-11"
compiler: ": #stack 8.2.2 osx"
os: osx
- env: BUILD=stack ARGS="--resolver nightly"
compiler: ": #stack nightly osx"
os: osx
allow_failures:
- env: BUILD=cabal GHCVER=head CABALVER=head HAPPYVER=1.19.5 ALEXVER=3.1.7
- env: BUILD=stack ARGS="--resolver nightly"
before_install:
# Using compiler above sets CC to an invalid value, so unset it
- unset CC
# We want to always allow newer versions of packages when building on GHC HEAD
- CABALARGS=""
- if [ "x$GHCVER" = "xhead" ]; then CABALARGS=--allow-newer; fi
# Download and unpack the stack executable
- export PATH=/opt/ghc/$GHCVER/bin:/opt/cabal/$CABALVER/bin:$HOME/.local/bin:/opt/alex/$ALEXVER/bin:/opt/happy/$HAPPYVER/bin:$HOME/.cabal/bin:$PATH
- mkdir -p ~/.local/bin
- |
if [ `uname` = "Darwin" ]
then
travis_retry curl --insecure -L https://get.haskellstack.org/stable/osx-x86_64.tar.gz | tar xz --strip-components=1 --include '*/stack' -C ~/.local/bin
else
travis_retry curl -L https://get.haskellstack.org/stable/linux-x86_64.tar.gz | tar xz --wildcards --strip-components=1 -C ~/.local/bin '*/stack'
fi
# Use the more reliable S3 mirror of Hackage
mkdir -p $HOME/.cabal
echo 'remote-repo: hackage.haskell.org:http://hackage.fpcomplete.com/' > $HOME/.cabal/config
echo 'remote-repo-cache: $HOME/.cabal/packages' >> $HOME/.cabal/config
install:
- echo "$(ghc --version) [$(ghc --print-project-git-commit-id 2> /dev/null || echo '?')]"
- if [ -f configure.ac ]; then autoreconf -i; fi
- |
set -ex
case "$BUILD" in
stack)
# Add in extra-deps for older snapshots, as necessary
#
# This is disabled by default, as relying on the solver like this can
# make builds unreliable. Instead, if you have this situation, it's
# recommended that you maintain multiple stack-lts-X.yaml files.
#stack --no-terminal --install-ghc $ARGS test --bench --dry-run || ( \
# stack --no-terminal $ARGS build cabal-install && \
# stack --no-terminal $ARGS solver --update-config)
# Build the dependencies
stack --no-terminal --install-ghc $ARGS test --bench --only-dependencies
;;
cabal)
cabal --version
travis_retry cabal update
# Get the list of packages from the stack.yaml file. Note that
# this will also implicitly run hpack as necessary to generate
# the .cabal files needed by cabal-install.
PACKAGES=$(stack --install-ghc query locals | grep '^ *path' | sed 's@^ *path:@@')
cabal install --only-dependencies --enable-tests --enable-benchmarks --force-reinstalls --ghc-options=-O0 --reorder-goals --max-backjumps=-1 $CABALARGS $PACKAGES
;;
esac
set +ex
script:
- |
set -ex
case "$BUILD" in
stack)
stack --no-terminal $ARGS test --bench --no-run-benchmarks --haddock --no-haddock-deps
;;
cabal)
cabal install --enable-tests --enable-benchmarks --force-reinstalls --ghc-options=-O0 --reorder-goals --max-backjumps=-1 $CABALARGS $PACKAGES
ORIGDIR=$(pwd)
for dir in $PACKAGES
do
cd $dir
cabal check || [ "$CABALVER" == "1.16" ]
cabal sdist
PKGVER=$(cabal info . | awk '{print $2;exit}')
SRC_TGZ=$PKGVER.tar.gz
cd dist
tar zxfv "$SRC_TGZ"
cd "$PKGVER"
cabal configure --enable-tests --ghc-options -O0
cabal build
if [ "$CABALVER" = "1.16" ] || [ "$CABALVER" = "1.18" ]; then
cabal test
else
cabal test --show-details=streaming --log=/dev/stdout
fi
cd $ORIGDIR
done
;;
esac
set +ex

View File

@ -1,5 +1,8 @@
# Summary
[![Build Status](https://travis-ci.org/yesodweb/haskell-xss-sanitize.svg?branch=master)](https://travis-ci.org/yesodweb/haskell-xss-sanitize)
[![Build status](https://ci.appveyor.com/api/projects/status/1i4xx9qi53r58tsh/branch/master?svg=true)](https://ci.appveyor.com/project/snoyberg/haskell-xss-sanitize/branch/master)
xss-sanitize allows you to accept html from untrusted sources by first filtering it through a white list.
The white list filtering is fairly comprehensive, including support for css in style attributes, but there are limitations enumerated below.
@ -55,7 +58,7 @@ In the third case, img and br tags will be output as a single self-closing tags.
Ultimately this is where your security comes from. I would expect that a faulty white list would act as a strong deterrent, but this library strives for correctness.
The [source code of html5lib](https://github.com/html5lib/html5lib-python/blob/master/html5lib/sanitizer.py) is the source of the white list and my implementation reference. If you feel a tag is missing from the white list, check to see if it has been added there.
The [source code of html5lib](https://github.com/html5lib/html5lib-python/blob/master/html5lib/filters/sanitizer.py) is the source of the white list and my implementation reference. If you feel a tag is missing from the white list, check to see if it has been added there.
If anyone knows of better sources or thinks a particular tag/attribute/value may be vulnerable, please let me know.
[HTML Purifier](http://htmlpurifier.org/live/smoketests/printDefinition.php) does have a more permissive and configurable (yet safe) white list if you are looking to add anything.

36
appveyor.yml Normal file
View File

@ -0,0 +1,36 @@
build: off
before_test:
# http://help.appveyor.com/discussions/problems/6312-curl-command-not-found
- set PATH=C:\Program Files\Git\mingw64\bin;%PATH%
- curl -sS -ostack.zip -L --insecure https://get.haskellstack.org/stable/windows-x86_64.zip
- 7z x stack.zip stack.exe
clone_folder: "c:\\stack"
environment:
global:
STACK_ROOT: "c:\\sr"
# Override the temp directory to avoid sed escaping issues
# See https://github.com/haskell/cabal/issues/5386
TMP: "c:\\tmp"
matrix:
- ARGS: ""
#- ARGS: "--resolver lts-2"
#- ARGS: "--resolver lts-3"
#- ARGS: "--resolver lts-6"
#- ARGS: "--resolver lts-7"
- ARGS: "--resolver lts-9"
- ARGS: "--resolver lts-11"
#- ARGS: "--resolver nightly"
test_script:
# Install toolchain, but do it silently due to lots of output
- stack %ARGS% setup > nul
# The ugly echo "" hack is to avoid complaints about 0 being an invalid file
# descriptor
- echo "" | stack %ARGS% --no-terminal test

42
package.yaml Normal file
View File

@ -0,0 +1,42 @@
name: xss-sanitize
version: 0.3.6
synopsis: sanitize untrusted HTML to prevent XSS attacks
description: run untrusted HTML through Text.HTML.SanitizeXSS.sanitizeXSS to prevent
XSS attacks. see README.md <http://github.com/yesodweb/haskell-xss-sanitize> for
more details
category: Web
author: Greg Weber <greg@gregweber.info>
maintainer: Michael Snoyman <michael@snoyman.com>
license: BSD2
github: yesodweb/haskell-xss-sanitize
stability: Stable
extra-source-files:
- README.md
- ChangeLog.md
dependencies:
- base >= 4.9.1 && < 5
- containers
- tagsoup >=0.12.2 && <1
- utf8-string >=0.3 && <1.1
- css-text >=0.1.1 && <0.2
- text >=0.11 && <2
- attoparsec >=0.10.0.3 && <1
- network-uri >=2.6
library:
source-dirs: src
exposed-modules:
- Text.HTML.SanitizeXSS
tests:
test:
main: main.hs
source-dirs:
- test
- src
cpp-options: -DTEST
dependencies:
- hspec >=1.3
- HUnit >=1.2

View File

@ -12,6 +12,7 @@ module Text.HTML.SanitizeXSS
-- * Custom filtering
, filterTags
, safeTags
, safeTagsCustom
, balanceTags
-- * Utilities
@ -33,7 +34,7 @@ import Network.URI ( parseURIReference, URI (..),
isAllowedInURI, escapeURIString, uriScheme )
import Codec.Binary.UTF8.String ( encodeString )
import Data.Maybe (catMaybes)
import Data.Maybe (mapMaybe)
-- | Sanitize HTML to prevent XSS attacks. This is equivalent to @filterTags safeTags@.
@ -53,8 +54,10 @@ sanitizeBalance = filterTags (balanceTags . safeTags)
balanceTags :: [Tag Text] -> [Tag Text]
balanceTags = balance []
-- | Parse the given text to a list of tags, apply the given filtering function, and render back to HTML.
-- You can insert your own custom filtering but make sure you compose your filtering function with 'safeTags'!
-- | Parse the given text to a list of tags, apply the given filtering
-- function, and render back to HTML. You can insert your own custom
-- filtering, but make sure you compose your filtering function with
-- 'safeTags' or 'safeTagsCustom'.
filterTags :: ([Tag Text] -> [Tag Text]) -> Text -> Text
filterTags f = renderTagsOptions renderOptions {
optMinimize = \x -> x `member` voidElems -- <img><img> converts to <img />, <a/> converts to <a></a>
@ -74,17 +77,36 @@ balance unclosed (TagOpen name as : tags) =
TagOpen name as : balance (name : unclosed) tags
balance unclosed (t:ts) = t : balance unclosed ts
-- | Filters out any usafe tags and attributes. Use with filterTags to create a custom filter.
-- | Filters out unsafe tags and sanitizes attributes. Use with
-- filterTags to create a custom filter.
safeTags :: [Tag Text] -> [Tag Text]
safeTags [] = []
safeTags (t@(TagClose name):tags)
| safeTagName name = t : safeTags tags
| otherwise = safeTags tags
safeTags (TagOpen name attributes:tags)
| safeTagName name = TagOpen name
(catMaybes $ map sanitizeAttribute attributes) : safeTags tags
| otherwise = safeTags tags
safeTags (t:tags) = t:safeTags tags
safeTags = safeTagsCustom safeTagName sanitizeAttribute
-- | Filters out unsafe tags and sanitizes attributes, like
-- 'safeTags', but uses custom functions for determining which tags
-- are safe and for sanitizing attributes. This allows you to add or
-- remove specific tags or attributes on the white list, or to use
-- your own white list.
--
-- @safeTagsCustom safeTagName sanitizeAttribute@ is equivalent to
-- 'safeTags'.
--
-- @since 0.3.6
safeTagsCustom ::
(Text -> Bool) -- ^ Select safe tags, like
-- 'safeTagName'
-> ((Text, Text) -> Maybe (Text, Text)) -- ^ Sanitize attributes,
-- like 'sanitizeAttribute'
-> [Tag Text] -> [Tag Text]
safeTagsCustom _ _ [] = []
safeTagsCustom safeName sanitizeAttr (t@(TagClose name):tags)
| safeName name = t : safeTagsCustom safeName sanitizeAttr tags
| otherwise = safeTagsCustom safeName sanitizeAttr tags
safeTagsCustom safeName sanitizeAttr (TagOpen name attributes:tags)
| safeName name = TagOpen name (mapMaybe sanitizeAttr attributes) :
safeTagsCustom safeName sanitizeAttr tags
| otherwise = safeTagsCustom safeName sanitizeAttr tags
safeTagsCustom n a (t:tags) = t : safeTagsCustom n a tags
safeTagName :: Text -> Bool
safeTagName tagname = tagname `member` sanitaryTags

1
stack.yaml Normal file
View File

@ -0,0 +1 @@
resolver: lts-11.10

View File

@ -11,9 +11,19 @@ test f actual expected = do
let result = f actual
result @?= expected
sanitized :: Text -> Text -> Expectation
sanitized, sanitizedB, sanitizedC :: Text -> Text -> Expectation
sanitized = test sanitize
sanitizedB = test sanitizeBalance
sanitizedC = test sanitizeCustom
sanitizeCustom :: Text -> Text
sanitizeCustom = filterTags $ safeTagsCustom mySafeName mySanitizeAttr
where
mySafeName t = t `elem` myTags || safeTagName t
mySanitizeAttr (key, val) | key `elem` myAttrs = Just (key, val)
mySanitizeAttr x = sanitizeAttribute x
myTags = ["custtag"]
myAttrs = ["custattr"]
main :: IO ()
main = hspec $ do
@ -87,3 +97,15 @@ main = hspec $ do
sanitizedB "<img></img>" "<img />"
it "interleaved" $
sanitizedB "<i>hello<b>world</i>" "<i>hello<b>world<i></i></b></i>"
describe "customized white list" $ do
it "does not filter custom tags" $ do
let custtag = "<p><custtag></custtag></p>"
sanitizedC custtag custtag
it "filters non-custom tags" $ do
sanitizedC "<p><weird></weird></p>" "<p></p>"
it "does not filter custom attributes" $ do
let custattr = "<p custattr=\"foo\"></p>"
sanitizedC custattr custattr
it "filters non-custom attributes" $ do
sanitizedC "<p weird=\"bar\"></p>" "<p></p>"

View File

@ -1,59 +0,0 @@
name: xss-sanitize
version: 0.3.5.6
license: BSD3
license-file: LICENSE
author: Greg Weber <greg@gregweber.info>
maintainer: Greg Weber <greg@gregweber.info>
synopsis: sanitize untrusted HTML to prevent XSS attacks
description: run untrusted HTML through Text.HTML.SanitizeXSS.sanitizeXSS to prevent XSS attacks. see README.md <http://github.com/yesodweb/haskell-xss-sanitize> for more details
category: Web
stability: Stable
cabal-version: >= 1.8
build-type: Simple
homepage: http://github.com/yesodweb/haskell-xss-sanitize
extra-source-files: README.md
flag network-uri
description: Get Network.URI from the network-uri package
default: True
library
build-depends: base == 4.*, containers
, tagsoup >= 0.12.2 && < 1
, utf8-string >= 0.3 && < 1.1
, css-text >= 0.1.1 && < 0.2
, text >= 0.11 && < 2
, attoparsec >= 0.10.0.3 && < 1
if flag(network-uri)
build-depends: network-uri >= 2.6
else
build-depends: network < 2.6
exposed-modules: Text.HTML.SanitizeXSS
other-modules: Text.HTML.SanitizeXSS.Css
ghc-options: -Wall
test-suite test
type: exitcode-stdio-1.0
main-is: test/main.hs
cpp-options: -DTEST
build-depends: base == 4.* , containers
, tagsoup >= 0.12.2 && < 1
, utf8-string >= 0.3 && < 1.1
, css-text >= 0.1.1 && < 0.2
, text >= 0.11 && < 2
, attoparsec >= 0.10.0.3 && < 1
, hspec >= 1.3
, HUnit >= 1.2
if flag(network-uri)
build-depends: network-uri >= 2.6
else
build-depends: network < 2.6
source-repository head
type: git
location: http://github.com/yesodweb/haskell-xss-sanitize.git