No OneTemporary
Actions

Size

31 KB

Referenced Files

None

Subscribers

None

View Options

	diff --git a/lib/fast_sanitize.ex b/lib/fast_sanitize.ex
	index bdbb51c..ce6e290 100644
	--- a/lib/fast_sanitize.ex
	+++ b/lib/fast_sanitize.ex
	@@ -1,27 +1,27 @@
	defmodule FastSanitize do
	alias FastSanitize.Sanitizer

	@moduledoc """
	Fast HTML sanitization module.
	"""

	@doc """
	Strip all tags from a given document fragment.

	## Example

	iex> FastSanitize.strip_tags("<h1>hello world</h1>")
	{:ok, "hello world"}
	"""
	def strip_tags(doc), do: Sanitizer.scrub(doc, FastSanitize.Sanitizer.StripTags)

	@doc """
	Strip tags from a given document fragment that are not basic HTML.

	## Example

	iex> FastSanitize.basic_html("<h1>hello world</h1><script>alert('xss')</script>")
	- {:ok, "<h1>hello world</h1>"}
	+ {:ok, "<h1>hello world</h1>alert('xss')"}
	"""
	def basic_html(doc), do: Sanitizer.scrub(doc, FastSanitize.Sanitizer.BasicHTML)
	end
	diff --git a/lib/fast_sanitize/fragment.ex b/lib/fast_sanitize/fragment.ex
	index 25a7a39..6eba6eb 100644
	--- a/lib/fast_sanitize/fragment.ex
	+++ b/lib/fast_sanitize/fragment.ex
	@@ -1,65 +1,64 @@
	defmodule FastSanitize.Fragment do
	- require Logger
	+ import Plug.HTML, only: [html_escape: 1]

	def to_tree(bin) do
	with {:html, _, [{:head, _, _}, {:body, _, fragment}]} <-
	Myhtmlex.decode(bin, format: [:html_atoms, :nil_self_closing, :comment_tuple3]) do
	{:ok, fragment}
	else
	e -> {:error, e}
	end
	end

	- defp build_start_tag(tag, attrs) when length(attrs) == 0, do: "<#{tag}>"
	-
	- defp build_start_tag(tag, attrs) do
	- attr_chunks =
	- Enum.map(attrs, fn {k, v} ->
	- "#{k}=\"#{v}\""
	- end)
	- \|> Enum.join(" ")
	-
	- "<#{tag} #{attr_chunks}>"
	+ defp build_attr_chunks(attrs) do
	+ Enum.map(attrs, fn {k, v} ->
	+ "#{html_escape(k)}=\"#{html_escape(v)}\""
	+ end)
	+ \|> Enum.join(" ")
	end

	+ defp build_start_tag(tag, attrs, nil), do: "<#{tag} #{build_attr_chunks(attrs)}/>"
	+ defp build_start_tag(tag, attrs, _children) when length(attrs) == 0, do: "<#{tag}>"
	+ defp build_start_tag(tag, attrs, _children), do: "<#{tag} #{build_attr_chunks(attrs)}>"
	+
	# empty tuple - fragment was clobbered, return nothing
	defp fragment_to_html({}), do: ""

	# text node
	- defp fragment_to_html(text) when is_binary(text), do: text
	+ defp fragment_to_html(text) when is_binary(text), do: html_escape(text)

	# comment node
	defp fragment_to_html({:comment, _, text}), do: "<!-- #{text} -->"

	# bare subtree
	defp fragment_to_html(subtree) when is_list(subtree) do
	{:ok, result} = subtree_to_html(subtree)
	result
	end

	# a node which can never accept children will have nil instead of a subtree
	- defp fragment_to_html({tag, attrs, nil}), do: build_start_tag(tag, attrs)
	+ defp fragment_to_html({tag, attrs, nil}), do: build_start_tag(tag, attrs, nil)

	# every other case, assume a subtree
	defp fragment_to_html({tag, attrs, subtree}) do
	- with start_tag <- build_start_tag(tag, attrs),
	+ with start_tag <- build_start_tag(tag, attrs, subtree),
	end_tag <- "</#{tag}>",
	{:ok, subtree} <- subtree_to_html(subtree) do
	[start_tag, subtree, end_tag]
	\|> Enum.join("")
	end
	end

	defp subtree_to_html([]), do: {:ok, ""}

	defp subtree_to_html(tree) do
	rendered =
	Enum.reject(tree, &is_nil/1)
	\|> Enum.map(&fragment_to_html/1)
	\|> Enum.join("")

	{:ok, rendered}
	end

	def to_html(tree), do: subtree_to_html(tree)
	end
	diff --git a/lib/fast_sanitize/sanitizer/basic_html.ex b/lib/fast_sanitize/sanitizer/basic_html.ex
	index 155885d..546cf02 100644
	--- a/lib/fast_sanitize/sanitizer/basic_html.ex
	+++ b/lib/fast_sanitize/sanitizer/basic_html.ex
	@@ -1,53 +1,51 @@
	defmodule FastSanitize.Sanitizer.BasicHTML do
	require FastSanitize.Sanitizer.Meta
	alias FastSanitize.Sanitizer.Meta

	@valid_schemes ["http", "https", "mailto"]

	Meta.strip_comments()

	Meta.allow_tag_with_uri_attributes(:a, ["href"], @valid_schemes)
	Meta.allow_tag_with_these_attributes(:a, ["name", "title"])

	Meta.allow_tag_with_these_attributes(:b, [])
	Meta.allow_tag_with_these_attributes(:blockquote, [])
	Meta.allow_tag_with_these_attributes(:br, [])
	Meta.allow_tag_with_these_attributes(:code, [])
	Meta.allow_tag_with_these_attributes(:del, [])
	Meta.allow_tag_with_these_attributes(:em, [])
	Meta.allow_tag_with_these_attributes(:h1, [])
	Meta.allow_tag_with_these_attributes(:h2, [])
	Meta.allow_tag_with_these_attributes(:h3, [])
	Meta.allow_tag_with_these_attributes(:h4, [])
	Meta.allow_tag_with_these_attributes(:h5, [])
	Meta.allow_tag_with_these_attributes(:hr, [])
	Meta.allow_tag_with_these_attributes(:i, [])

	Meta.allow_tag_with_uri_attributes(:img, ["src"], @valid_schemes)

	Meta.allow_tag_with_these_attributes(:img, [
	"width",
	"height",
	"title",
	"alt"
	])

	Meta.allow_tag_with_these_attributes(:li, [])
	Meta.allow_tag_with_these_attributes(:ol, [])
	Meta.allow_tag_with_these_attributes(:p, [])
	Meta.allow_tag_with_these_attributes(:pre, [])
	Meta.allow_tag_with_these_attributes(:span, [])
	Meta.allow_tag_with_these_attributes(:strong, [])
	Meta.allow_tag_with_these_attributes(:table, [])
	Meta.allow_tag_with_these_attributes(:tbody, [])
	Meta.allow_tag_with_these_attributes(:td, [])
	Meta.allow_tag_with_these_attributes(:th, [])
	Meta.allow_tag_with_these_attributes(:thead, [])
	Meta.allow_tag_with_these_attributes(:tr, [])
	Meta.allow_tag_with_these_attributes(:u, [])
	Meta.allow_tag_with_these_attributes(:ul, [])

	- Meta.strip_children_of(:script)
	-
	Meta.strip_everything_not_covered()
	end
	diff --git a/lib/fast_sanitize/sanitizer/meta.ex b/lib/fast_sanitize/sanitizer/meta.ex
	index 09699b3..200c1cd 100644
	--- a/lib/fast_sanitize/sanitizer/meta.ex
	+++ b/lib/fast_sanitize/sanitizer/meta.ex
	@@ -1,213 +1,213 @@
	# Based on HtmlSanitizeEx.Scrubber.Meta
	# Copyright (c) 2015-2019 René Föhring (@rrrene)

	defmodule FastSanitize.Sanitizer.Meta do
	@moduledoc """
	This module contains some meta-programming magic to define your own rules
	for scrubbers.

	The StripTags scrubber is a good starting point:

	defmodule FastSanitize.Sanitizer.StripTags do
	require FastSanitize.Sanitizer.Meta
	alias FastSanitize.Sanitizer.Meta

	Meta.strip_comments

	Meta.strip_everything_not_covered
	end

	You can use the `allow_tag_with_uri_attributes/3` and
	`allow_tag_with_these_attributes/2` macros to define what is allowed:

	defmodule FastSanitize.Sanitizer.StripTags do
	require FastSanitize.Sanitizer.Meta
	alias FastSanitize.Sanitizer.Meta

	Meta.strip_comments

	Meta.allow_tag_with_uri_attributes "img", ["src"], ["http", "https"]
	Meta.allow_tag_with_these_attributes "img", ["width", "height"]

	Meta.strip_everything_not_covered
	end

	You can stack these if convenient:

	Meta.allow_tag_with_uri_attributes "img", ["src"], ["http", "https"]
	Meta.allow_tag_with_these_attributes "img", ["width", "height"]
	Meta.allow_tag_with_these_attributes "img", ["title", "alt"]

	"""

	@doc """
	Allow these tags and use the regular `scrub_attribute/2` function to scrub
	the attributes.
	"""
	defmacro allow_tags_and_scrub_their_attributes(list) do
	Enum.map(list, fn tag_name ->
	allow_this_tag_and_scrub_its_attributes(tag_name)
	end)
	end

	@doc """
	Allow the given +list+ of attributes for the specified +tag+.

	Meta.allow_tag_with_these_attributes "a", ["name", "title"]

	Meta.allow_tag_with_these_attributes "img", ["title", "alt"]
	"""
	defmacro allow_tag_with_these_attributes(tag_name, list \\ []) do
	list
	\|> Enum.map(fn attr_name ->
	allow_this_tag_with_this_attribute(tag_name, attr_name)
	end)
	\|> Enum.concat([allow_this_tag_and_scrub_its_attributes(tag_name)])
	end

	@doc """
	Allow the given list of +values+ for the given +attribute+ on the
	specified +tag+.

	Meta.allow_tag_with_this_attribute_values "a", "target", ["_blank"]
	"""
	defmacro allow_tag_with_this_attribute_values(tag_name, attribute, values) do
	quote do
	def scrub_attribute(unquote(tag_name), {unquote(attribute), value})
	when value in unquote(values) do
	{unquote(attribute), value}
	end
	end
	end

	@doc """
	Allow the given +list+ of attributes to contain URI information for the
	specified +tag+.

	# Only allow SSL-enabled and mailto links
	Meta.allow_tag_with_uri_attributes "a", ["href"], ["https", "mailto"]

	# Only allow none-SSL images
	Meta.allow_tag_with_uri_attributes "img", ["src"], ["http"]
	"""
	defmacro allow_tag_with_uri_attributes(tag, list, valid_schemes) do
	list
	\|> Enum.map(fn name ->
	allow_tag_with_uri_attribute(tag, name, valid_schemes)
	end)
	end

	@doc """

	"""
	defmacro allow_tags_with_style_attributes(list) do
	list
	\|> Enum.map(fn tag_name -> allow_this_tag_with_style_attribute(tag_name) end)
	end

	@doc """
	Strips all comments.
	"""
	defmacro strip_comments do
	quote do
	- def scrub({:comment, _, _}), do: ""
	+ def scrub({:comment, _, _}), do: nil
	end
	end

	@doc """
	Ensures any tags/attributes not explicitly whitelisted until this
	statement are stripped.
	"""
	defmacro strip_everything_not_covered do
	quote do
	# If we haven't covered the attribute until here, we just scrap it.
	def scrub_attribute(_tag, _attribute), do: nil

	# If we haven't covered the attribute until here, we just scrap it.
	def scrub({_tag, _attributes, children}), do: children

	# Text is left alone
	def scrub("" <> text), do: text
	end
	end

	@doc """
	Ensures any tags/attributes that are explicitly disallowed have
	their children dropped.
	"""
	defmacro strip_children_of(tag_name) do
	quote do
	def scrub({unquote(tag_name), _attributes, _children}), do: nil
	end
	end

	defp allow_this_tag_and_scrub_its_attributes(tag_name) do
	quote do
	def scrub({unquote(tag_name), attributes, children}) do
	{unquote(tag_name), scrub_attributes(unquote(tag_name), attributes), children}
	end

	defp scrub_attributes(unquote(tag_name), attributes) do
	Enum.map(attributes, fn attr ->
	scrub_attribute(unquote(tag_name), attr)
	end)
	\|> Enum.reject(&is_nil(&1))
	end
	end
	end

	defp allow_this_tag_with_this_attribute(tag_name, attr_name) do
	quote do
	def scrub_attribute(unquote(tag_name), {unquote(attr_name), value}) do
	{unquote(attr_name), value}
	end
	end
	end

	defp allow_this_tag_with_style_attribute(tag_name) do
	quote do
	def scrub_attribute(unquote(tag_name), {"style", value}) do
	{"style", scrub_css(value)}
	end
	end
	end

	defp allow_tag_with_uri_attribute(tag_name, attr_name, valid_schemes) do
	quote do
	def scrub_attribute(unquote(tag_name), {unquote(attr_name), "&" <> value}) do
	nil
	end

	@protocol_separator ":\|(&#058)\|(&#x70)\|(&#x03a)\|(%\|%)3A"
	@protocol_separator_regex Regex.compile!(@protocol_separator, "mi")

	@http_like_scheme "(?<scheme>.+?)(#{@protocol_separator})//"
	@other_schemes "(?<other_schemes>mailto)(#{@protocol_separator})"

	@scheme_capture Regex.compile!(
	"(#{@http_like_scheme})\|(#{@other_schemes})",
	"mi"
	)

	def scrub_attribute(unquote(tag_name), {unquote(attr_name), uri}) do
	valid_schema =
	if uri =~ @protocol_separator_regex do
	case Regex.named_captures(@scheme_capture, uri) do
	%{"scheme" => scheme, "other_schemes" => ""} ->
	scheme in unquote(valid_schemes)

	%{"other_schemes" => scheme, "scheme" => ""} ->
	scheme in unquote(valid_schemes)

	_ ->
	false
	end
	else
	true
	end

	if valid_schema, do: {unquote(attr_name), uri}
	end
	end
	end
	end
	diff --git a/mix.exs b/mix.exs
	index 19c101d..aa3c690 100644
	--- a/mix.exs
	+++ b/mix.exs
	@@ -1,30 +1,31 @@
	defmodule FastSanitize.MixProject do
	use Mix.Project

	def project do
	[
	app: :fast_sanitize,
	version: "0.1.0",
	elixir: "~> 1.7",
	start_permanent: Mix.env() == :prod,
	deps: deps()
	]
	end

	# Run "mix help compile.app" to learn about applications.
	def application do
	[
	extra_applications: [:logger]
	]
	end

	# Run "mix help deps" to learn about dependencies.
	defp deps do
	[
	+ {:plug, "~> 1.8"},
	{:myhtmlex, "~> 0.2"},
	{:credo, "~> 1.0.0", only: [:dev, :test], runtime: false},
	{:ex_doc, "~> 0.19", only: :dev, runtime: false},
	{:dialyxir, "~> 1.0.0-rc.5", only: [:dev], runtime: false}
	]
	end
	end
	diff --git a/mix.lock b/mix.lock
	index 01ca85e..b2cd240 100644
	--- a/mix.lock
	+++ b/mix.lock
	@@ -1,4 +1,17 @@
	%{
	+ "bunt": {:hex, :bunt, "0.2.0", "951c6e801e8b1d2cbe58ebbd3e616a869061ddadcc4863d0a2182541acae9a38", [:mix], [], "hexpm"},
	+ "credo": {:hex, :credo, "1.0.5", "fdea745579f8845315fe6a3b43e2f9f8866839cfbc8562bb72778e9fdaa94214", [:mix], [{:bunt, "~> 0.2.0", [hex: :bunt, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}], "hexpm"},
	+ "dialyxir": {:hex, :dialyxir, "1.0.0-rc.6", "78e97d9c0ff1b5521dd68041193891aebebce52fc3b93463c0a6806874557d7d", [:mix], [{:erlex, "~> 0.2.1", [hex: :erlex, repo: "hexpm", optional: false]}], "hexpm"},
	+ "earmark": {:hex, :earmark, "1.3.2", "b840562ea3d67795ffbb5bd88940b1bed0ed9fa32834915125ea7d02e35888a5", [:mix], [], "hexpm"},
	+ "erlex": {:hex, :erlex, "0.2.1", "cee02918660807cbba9a7229cae9b42d1c6143b768c781fa6cee1eaf03ad860b", [:mix], [], "hexpm"},
	+ "ex_doc": {:hex, :ex_doc, "0.20.2", "1bd0dfb0304bade58beb77f20f21ee3558cc3c753743ae0ddbb0fd7ba2912331", [:mix], [{:earmark, "~> 1.3", [hex: :earmark, repo: "hexpm", optional: false]}, {:makeup_elixir, "~> 0.10", [hex: :makeup_elixir, repo: "hexpm", optional: false]}], "hexpm"},
	+ "jason": {:hex, :jason, "1.1.2", "b03dedea67a99223a2eaf9f1264ce37154564de899fd3d8b9a21b1a6fd64afe7", [:mix], [{:decimal, "~> 1.0", [hex: :decimal, repo: "hexpm", optional: true]}], "hexpm"},
	+ "makeup": {:hex, :makeup, "0.8.0", "9cf32aea71c7fe0a4b2e9246c2c4978f9070257e5c9ce6d4a28ec450a839b55f", [:mix], [{:nimble_parsec, "~> 0.5.0", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm"},
	+ "makeup_elixir": {:hex, :makeup_elixir, "0.13.0", "be7a477997dcac2e48a9d695ec730b2d22418292675c75aa2d34ba0909dcdeda", [:mix], [{:makeup, "~> 0.8", [hex: :makeup, repo: "hexpm", optional: false]}], "hexpm"},
	+ "mime": {:hex, :mime, "1.3.1", "30ce04ab3175b6ad0bdce0035cba77bba68b813d523d1aac73d9781b4d193cf8", [:mix], [], "hexpm"},
	"myhtmlex": {:hex, :myhtmlex, "0.2.1", "d6f3eb1826f7cdaa0225a996569da0930d1a334405510845c905ae59295ab226", [:make, :mix], [{:nodex, "~> 0.1.1", [hex: :nodex, repo: "hexpm", optional: false]}], "hexpm"},
	+ "nimble_parsec": {:hex, :nimble_parsec, "0.5.0", "90e2eca3d0266e5c53f8fbe0079694740b9c91b6747f2b7e3c5d21966bba8300", [:mix], [], "hexpm"},
	"nodex": {:hex, :nodex, "0.1.1", "ed2f7bbe19ea62a43ad4b7ad332eb3f9ca12c64a35a5802a0eb545b93ebe32af", [:mix], [], "hexpm"},
	+ "plug": {:hex, :plug, "1.8.0", "9d2685cb007fe5e28ed9ac27af2815bc262b7817a00929ac10f56f169f43b977", [:mix], [{:mime, "~> 1.0", [hex: :mime, repo: "hexpm", optional: false]}, {:plug_crypto, "~> 1.0", [hex: :plug_crypto, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4", [hex: :telemetry, repo: "hexpm", optional: true]}], "hexpm"},
	+ "plug_crypto": {:hex, :plug_crypto, "1.0.0", "18e49317d3fa343f24620ed22795ec29d4a5e602d52d1513ccea0b07d8ea7d4d", [:mix], [], "hexpm"},
	}
	diff --git a/test/basic_html_test.exs b/test/basic_html_test.exs
	new file mode 100644
	index 0000000..c1460f0
	--- /dev/null
	+++ b/test/basic_html_test.exs
	@@ -0,0 +1,412 @@
	+defmodule FastSanitize.Sanitizer.BasicHTMLTest do
	+ use ExUnit.Case
	+
	+ defp basic_html_sanitize(text) do
	+ {:ok, text} = FastSanitize.basic_html(text)
	+ text
	+ end
	+
	+ test "strips nothing" do
	+ input = "This <b>is</b> <b>an</b> <i>example</i> of <u>space</u> eating."
	+ expected = "This <b>is</b> <b>an</b> <i>example</i> of <u>space</u> eating."
	+ assert expected == basic_html_sanitize(input)
	+ end
	+
	+ test "does strip language class from code tag" do
	+ input = "<code class=\"ruby\">Something.new</code>"
	+ expected = "<code>Something.new</code>"
	+ assert expected == basic_html_sanitize(input)
	+ end
	+
	+ test "strips everything except the allowed tags" do
	+ input = "<h1>hello <script>code!</script></h1>"
	+ expected = "<h1>hello code!</h1>"
	+ assert expected == basic_html_sanitize(input)
	+ end
	+
	+ test "strips everything except the allowed tags (for multiple tags)" do
	+ input =
	+ "<section><header><script>code!</script></header><p>hello <script>code!</script></p></section>"
	+
	+ expected = "code!<p>hello code!</p>"
	+ assert expected == basic_html_sanitize(input)
	+ end
	+
	+ test "strips everything for faulty allowed_tags: key" do
	+ input = "<h1>hello<h1>"
	+ expected = "hello"
	+ assert expected != basic_html_sanitize(input)
	+ end
	+
	+ test "strips invalid html" do
	+ input = "<<<bad html"
	+ expected = "<<"
	+ assert expected == basic_html_sanitize(input)
	+ end
	+
	+ test "strips tags with quote" do
	+ input = "<\" <img src=\"trollface.gif\" onload=\"alert(1)\"> hi"
	+
	+ assert "<" <img src=\"trollface.gif\"/> hi" ==
	+ basic_html_sanitize(input)
	+ end
	+
	+ test "strips nested tags" do
	+ input = "Wei<<a>a onclick='alert(document.cookie);'</a>/>rdos"
	+ expected = "Wei<<a>a onclick='alert(document.cookie);'</a>/>rdos"
	+ assert expected == basic_html_sanitize(input)
	+ end
	+
	+ test "strips certain tags in multi line strings" do
	+ input =
	+ "<title>This is <b>a <a href=\"\" target=\"_blank\">test</a></b>.</title>\n\n<!-- it has a comment -->\n\n<p>It no <b>longer <strong>contains <em>any <strike>HTML</strike></em>.</strong></b></p>\n"
	+
	+ expected =
	+ "This is <b>a <a href="" target="_blank">test</a></b>.\n\n\n\n<p>It no <b>longer <strong>contains <em>any HTML</em>.</strong></b></p>\n"
	+
	+ assert expected == basic_html_sanitize(input)
	+ end
	+
	+ test "strips blank string" do
	+ assert "" == basic_html_sanitize("")
	+ assert "" == basic_html_sanitize(nil)
	+ end
	+
	+ test "strips nothing from plain text" do
	+ input = "Dont touch me"
	+ expected = "Dont touch me"
	+ assert expected == basic_html_sanitize(input)
	+ end
	+
	+ test "strips nothing from a sentence" do
	+ input = "This is a test."
	+ expected = "This is a test."
	+ assert expected == basic_html_sanitize(input)
	+ end
	+
	+ test "strips tags with comment" do
	+ input = "This has a <!-- comment --> here."
	+ expected = "This has a here."
	+ assert expected == basic_html_sanitize(input)
	+ end
	+
	+ test "strip_tags escapes special characters" do
	+ assert "&", basic_html_sanitize("&")
	+ end
	+
	+ # link sanitizer
	+
	+ test "test_strip_links_with_tags_in_tags" do
	+ input = "<<a>a href='hello'>all <b>day</b> long<</A>/a>"
	+ expected = "<<a>a href='hello'>all <b>day</b> long<</a>/a>"
	+ assert expected == basic_html_sanitize(input)
	+ end
	+
	+ test "test_strip_links_with_unclosed_tags" do
	+ assert "" == basic_html_sanitize("<a<a")
	+ end
	+
	+ test "test_strip_links_with_plaintext" do
	+ assert "Dont touch me" == basic_html_sanitize("Dont touch me")
	+ end
	+
	+ @tag href_scrubbing: true
	+ test "test_strip_links_with_line_feed_and_uppercase_tag" do
	+ input = "<a href='almost'>on my mind</a> <A href='almost'>all day long</A>"
	+
	+ assert "<a href=\"almost\">on my mind</a> <a href=\"almost\">all day long</a>" ==
	+ basic_html_sanitize(input)
	+ end
	+
	+ @tag href_scrubbing: true
	+ test "test_strip_links_leaves_nonlink_tags" do
	+ assert "<a href=\"almost\">My mind</a>\n<a href=\"almost\">all <b>day</b> long</a>" ==
	+ basic_html_sanitize(
	+ "<a href='almost'>My mind</a>\n<A href='almost'>all <b>day</b> long</A>"
	+ )
	+ end
	+
	+ @tag href_scrubbing: true
	+ test "strips tags with basic_html_sanitize/1" do
	+ input =
	+ "<p>This <u>is</u> a <a href='test.html'><strong>test</strong></a>.</p>"
	+
	+ assert "<p>This <u>is</u> a <a href=\"test.html\"><strong>test</strong></a>.</p>" ==
	+ basic_html_sanitize(input)
	+ end
	+
	+ @a_href_hacks [
	+ "<a href=\"javascript:alert('XSS');\">text here</a>",
	+ "<a href=javascript:alert('XSS')>text here</a>",
	+ "<a href=JaVaScRiPt:alert('XSS')>text here</a>",
	+ "<a href=javascript:alert("XSS")>text here</a>",
	+ "<a href=javascript:alert(String.fromCharCode(88,83,83))>text here</a>",
	+ "<a href=javascript:alert('XSS')>text here</a>",
	+ "<a href=&#0000106&#0000097&#0000118&#0000097&#0000115&#0000099&#0000114&#0000105&#0000112&#0000116&#0000058&#0000097&#0000108&#0000101&#0000114&#0000116&#0000040&#0000039&#0000088&#0000083&#0000083&#0000039&#0000041>text here</a>",
	+ "<a href=&#x6A&#x61&#x76&#x61&#x73&#x63&#x72&#x69&#x70&#x74&#x3A&#x61&#x6C&#x65&#x72&#x74&#x28&#x27&#x58&#x53&#x53&#x27&#x29>text here</a>",
	+ "<a href=\"jav\tascript:alert('XSS');\">text here</a>",
	+ "<a href=\"jav ascript:alert('XSS');\">text here</a>",
	+ "<a href=\"jav ascript:alert('XSS');\">text here</a>",
	+ "<a href=\"jav ascript:alert('XSS');\">text here</a>",
	+ "<a href=\" javascript:alert('XSS');\">text here</a>",
	+ "<a href=\"javascript:alert('XSS');\">text here</a>",
	+ "<a href=`javascript:alert(\"RSnake says, 'XSS'\")`>text here</a>",
	+ "<a href=\"javascript:alert('XSS');\">text here</a>",
	+ "<a href=\"javascript:alert('XSS');\">text here</a>",
	+ "<a href=\"javascript:alert('XSS');\">text here</a>",
	+ "<a href=\"javascript:alert('XSS');\">text here</a>",
	+ "<a href=\"javascript:alert('XSS')\">text here</a>",
	+ "<a href=\"JAVASCRIPT:alert(\'foo\')\">text here</a>",
	+ "<a href=\"java<!-- -->script:alert(\'foo\')\">text here</a>",
	+ "<a href=\"awesome.html#this:stuff\">text here</a>",
	+ "<a href=\"java\0\t\r\n script:alert(\'foo\')\">text here</a>",
	+ "<a href=\"java&#0000001script:alert(\'foo\')\">text here</a>",
	+ "<a href=\"java&#0000000script:alert(\'foo\')\">text here</a>"
	+ ]
	+
	+ @tag href_scrubbing: true
	+ test "strips malicious protocol hacks from a href attribute" do
	+ expected = "<a>text here</a>"
	+
	+ Enum.each(@a_href_hacks, fn x ->
	+ assert expected == basic_html_sanitize(x)
	+ end)
	+ end
	+
	+ @tag href_scrubbing: true
	+ test "does not strip x03a legitimate" do
	+ assert "<a href=\"http://legit\"></a>" ==
	+ basic_html_sanitize("<a href=\"http://legit\">")
	+
	+ assert "<a href=\"http://legit\"></a>" ==
	+ basic_html_sanitize("<a href=\"http://legit\">")
	+ end
	+
	+ test "test_strip links with links" do
	+ input =
	+ "<a href='http://www.elixirstatus.com/'><a href='http://www.elixirstatus.com/' onlclick='steal()'>0wn3d</a></a>"
	+
	+ assert "<a href=\"http://www.elixirstatus.com/\"></a><a href=\"http://www.elixirstatus.com/\">0wn3d</a>" ==
	+ basic_html_sanitize(input)
	+ end
	+
	+ test "test_strip_links_with_linkception" do
	+ assert "<a href=\"http://www.elixirstatus.com/\">Mag</a><a href=\"http://www.elixir-lang.org/\">ic</a>" ==
	+ basic_html_sanitize(
	+ "<a href='http://www.elixirstatus.com/'>Mag<a href='http://www.elixir-lang.org/'>ic"
	+ )
	+ end
	+
	+ test "test_strip_links_with_a_tag_in_href" do
	+ assert "FrrFox" ==
	+ basic_html_sanitize("<href onlclick='steal()'>FrrFox</a></href>")
	+ end
	+
	+ test "normal scrubbing does only allow certain tags and attributes" do
	+ input = "<span data-foo=\"bar\">foo</span>"
	+ expected = "<span>foo</span>"
	+ assert expected == basic_html_sanitize(input)
	+ end
	+
	+ test "strips not allowed attributes" do
	+ input =
	+ "start <a title=\"1\" onclick=\"foo\">foo <bad>bar</bad> baz</a> end"
	+
	+ expected = "start <a title=\"1\">foo bar baz</a> end"
	+ assert expected == basic_html_sanitize(input)
	+ end
	+
	+ test "sanitize_script" do
	+ assert "a b cblah blah blahd e f" ==
	+ basic_html_sanitize(
	+ "a b c<script language=\"Javascript\">blah blah blah</script>d e f"
	+ )
	+ end
	+
	+ @tag href_scrubbing: true
	+ test "sanitize_js_handlers" do
	+ input =
	+ ~s(onthis="do that" <a href="#" onclick="hello" name="foo" onbogus="remove me">hello</a>)
	+
	+ assert "onthis="do that" <a href=\"#\" name=\"foo\">hello</a>" ==
	+ basic_html_sanitize(input)
	+ end
	+
	+ test "sanitize_javascript_href" do
	+ raw =
	+ ~s(href="javascript:bang" <a href="javascript:bang" name="hello">foo</a>, <span href="javascript:bang">bar</span>)
	+
	+ assert ~s(href="javascript:bang" <a name="hello">foo</a>, <span>bar</span>) ==
	+ basic_html_sanitize(raw)
	+ end
	+
	+ test "sanitize_image_src" do
	+ raw =
	+ ~s(src="javascript:bang" <img src="javascript:bang" width="5">foo</img>, <span src="javascript:bang">bar</span>)
	+
	+ assert "src="javascript:bang" <img width=\"5\"/>foo, <span>bar</span>" ==
	+ basic_html_sanitize(raw)
	+ end
	+
	+ @tag href_scrubbing: true
	+ test "should only allow http/https protocols" do
	+ assert "<a href=\"foo\">baz</a>" ==
	+ basic_html_sanitize(
	+ ~s(<a href="foo" onclick="bar"><script>baz</script></a>)
	+ )
	+
	+ assert "<a href=\"http://example.com\">baz</a>" ==
	+ basic_html_sanitize(
	+ ~s(<a href="http://example.com" onclick="bar"><script>baz</script></a>)
	+ )
	+
	+ assert "<a href=\"https://example.com\">baz</a>" ==
	+ basic_html_sanitize(
	+ ~s(<a href="https://example.com" onclick="bar"><script>baz</script></a>)
	+ )
	+ end
	+
	+ # test "video_poster_sanitization" do
	+ # assert ~s(<video src="videofile.ogg" autoplay poster="posterimage.jpg"></video>) == ~s(<video src="videofile.ogg" poster="posterimage.jpg"></video>)
	+ # assert ~s(<video src="videofile.ogg"></video>) == basic_html_sanitize("<video src=\"videofile.ogg\" poster=javascript:alert(1)></video>")
	+ # end
	+
	+ test "strips not allowed tags " do
	+ input = "<form><u></u></form>"
	+ expected = "<u></u>"
	+ assert expected == basic_html_sanitize(input)
	+ end
	+
	+ test "strips not allowed attributes " do
	+ input = "<a foo=\"hello\" bar=\"world\"></a>"
	+ expected = "<a></a>"
	+ assert expected == basic_html_sanitize(input)
	+ end
	+
	+ @image_src_hacks [
	+ "<IMG SRC=\"javascript:alert('XSS');\">",
	+ "<IMG SRC=javascript:alert('XSS')>",
	+ "<IMG SRC=JaVaScRiPt:alert('XSS')>",
	+ "<IMG SRC=javascript:alert("XSS")>",
	+ "<IMG SRC=javascript:alert(String.fromCharCode(88,83,83))>",
	+ "<IMG SRC=javascript:alert('XSS')>",
	+ "<IMG SRC=&#0000106&#0000097&#0000118&#0000097&#0000115&#0000099&#0000114&#0000105&#0000112&#0000116&#0000058&#0000097&#0000108&#0000101&#0000114&#0000116&#0000040&#0000039&#0000088&#0000083&#0000083&#0000039&#0000041>",
	+ "<IMG SRC=&#x6A&#x61&#x76&#x61&#x73&#x63&#x72&#x69&#x70&#x74&#x3A&#x61&#x6C&#x65&#x72&#x74&#x28&#x27&#x58&#x53&#x53&#x27&#x29>",
	+ "<IMG SRC=\"jav\tascript:alert('XSS');\">",
	+ "<IMG SRC=\"jav ascript:alert('XSS');\">",
	+ "<IMG SRC=\"jav ascript:alert('XSS');\">",
	+ "<IMG SRC=\"jav ascript:alert('XSS');\">",
	+ "<IMG SRC=\" javascript:alert('XSS');\">",
	+ "<IMG SRC=\"javascript:alert('XSS');\">",
	+ "<IMG SRC=`javascript:alert(\"RSnake says, 'XSS'\")`>"
	+ ]
	+
	+ test "strips malicious protocol hacks from img src attribute" do
	+ expected = "<img />"
	+
	+ Enum.each(@image_src_hacks, fn x ->
	+ assert expected == basic_html_sanitize(x)
	+ end)
	+ end
	+
	+ test "strips script tag" do
	+ input = "<SCRIPT\nSRC=http://ha.ckers.org/xss.js></SCRIPT>"
	+ expected = ""
	+ assert expected == basic_html_sanitize(input)
	+ end
	+
	+ test "strips xss image hack with uppercase tags" do
	+ input = "<IMG \"\"\"><SCRIPT>alert(\"XSS\")</SCRIPT>\">"
	+ expected = "<img />alert("XSS")">"
	+ assert expected == basic_html_sanitize(input)
	+ end
	+
	+ test "should_sanitize_tag_broken_up_by_null" do
	+ assert "alert("XSS")" ==
	+ basic_html_sanitize("<SCR\0IPT>alert(\"XSS\")</SCR\0IPT>")
	+ end
	+
	+ test "should_sanitize_invalid_script_tag" do
	+ input = "<SCRIPT/XSS SRC=\"http://ha.ckers.org/xss.js\"></SCRIPT>"
	+ assert "" == basic_html_sanitize(input)
	+ end
	+
	+ test "sanitize half open scripts" do
	+ input = "<IMG SRC=\"javascript:alert('XSS')\""
	+ assert "<img />" == basic_html_sanitize(input)
	+ end
	+
	+ test "should_not_fall_for_ridiculous_hack" do
	+ img_hack = """
	+ <IMG\nSRC\n=\n"\nj\na\nv\na\ns\nc\nr\ni\np\nt\n:\na\nl\ne\nr\nt\n(\n'\nX\nS\nS\n'\n)\n"\n>)
	+ """
	+
	+ assert "<img />)\n" == basic_html_sanitize(img_hack)
	+ end
	+
	+ test "should_sanitize_within attributes" do
	+ input =
	+ "<span title=\"'><script>alert()</script>\">blah</span>"
	+
	+ assert "<span>blah</span>" == basic_html_sanitize(input)
	+ end
	+
	+ test "should_sanitize_invalid_tag_names" do
	+ end
	+
	+ test "should_sanitize_non_alpha_and_non_digit_characters_in_tags" do
	+ assert "<a>foo</a>" ==
	+ basic_html_sanitize(
	+ "<a onclick!@#$%^&*='alert(\"XSS\")'>foo</a>"
	+ )
	+ end
	+
	+ test "should_sanitize_invalid_tag_names_in_single_tags" do
	+ assert "<img />" ==
	+ basic_html_sanitize("<img/src=\"javascript:alert('XSS')\"/>")
	+ end
	+
	+ test "should_sanitize_img_dynsrc_lowsrc" do
	+ assert "<img />" ==
	+ basic_html_sanitize("<img lowsrc=\"javascript:alert('XSS')\" />")
	+ end
	+
	+ test "should_sanitize_img_vbscript" do
	+ assert "<img />" ==
	+ basic_html_sanitize("<img src='vbscript:msgbox(\"XSS\")' />")
	+ end
	+
	+ test "should_not_mangle_urls_with_ampersand" do
	+ input = "<a href=\"http://www.domain.com?var1=1&var2=2\">my link</a>"
	+ assert input == basic_html_sanitize(input)
	+ end
	+
	+ test "should_not_crash_on_invalid_schema_formatting" do
	+ input =
	+ "<a href=\"http//www.domain.com/?encoded_param=param1%3Aparam2\">text here</a>"
	+
	+ assert "<a>text here</a>" == basic_html_sanitize(input)
	+ end
	+
	+ test "should_not_crash_on_invalid_schema_formatting_2" do
	+ input = "<a href=\"ftp://www.domain.com/http%3A//\">text here</a>"
	+ assert "<a>text here</a>" == basic_html_sanitize(input)
	+ end
	+
	+ test "should_sanitize_neverending_attribute" do
	+ assert "" == basic_html_sanitize("<span class=\"\\")
	+ end
	+
	+ # test "this affects only NS4, but we're on a roll, right?" do
	+ # input = "<div size=\"&{alert('XSS')}\">foo</div>"
	+ # expected = "<div>foo</div>"
	+ # assert expected == basic_html_sanitize(input)
	+ # end
	+
	+ test "does not strip the mailto URI scheme" do
	+ input = ~s(<a href="mailto:someone@yoursite.com">Email Us</a>)
	+ expected = ~s(<a href="mailto:someone@yoursite.com">Email Us</a>)
	+ assert expected == basic_html_sanitize(input)
	+ end
	+end
	+

File Metadata

Mime Type: text/x-diff
Expires: Sat, Nov 30, 4:44 PM (1 d, 20 h)
Storage Engine: blob
Storage Format: Raw Data
Storage Handle: 41499
Default Alt Text: (31 KB)

No OneTemporaryActions

View Options

File Metadata

Event Timeline

No OneTemporary
Actions