Page MenuHomePhorge

No OneTemporary

Size
31 KB
Referenced Files
None
Subscribers
None
diff --git a/lib/fast_sanitize.ex b/lib/fast_sanitize.ex
index bdbb51c..ce6e290 100644
--- a/lib/fast_sanitize.ex
+++ b/lib/fast_sanitize.ex
@@ -1,27 +1,27 @@
defmodule FastSanitize do
alias FastSanitize.Sanitizer
@moduledoc """
Fast HTML sanitization module.
"""
@doc """
Strip all tags from a given document fragment.
## Example
iex> FastSanitize.strip_tags("<h1>hello world</h1>")
{:ok, "hello world"}
"""
def strip_tags(doc), do: Sanitizer.scrub(doc, FastSanitize.Sanitizer.StripTags)
@doc """
Strip tags from a given document fragment that are not basic HTML.
## Example
iex> FastSanitize.basic_html("<h1>hello world</h1><script>alert('xss')</script>")
- {:ok, "<h1>hello world</h1>"}
+ {:ok, "<h1>hello world</h1>alert(&#39;xss&#39;)"}
"""
def basic_html(doc), do: Sanitizer.scrub(doc, FastSanitize.Sanitizer.BasicHTML)
end
diff --git a/lib/fast_sanitize/fragment.ex b/lib/fast_sanitize/fragment.ex
index 25a7a39..6eba6eb 100644
--- a/lib/fast_sanitize/fragment.ex
+++ b/lib/fast_sanitize/fragment.ex
@@ -1,65 +1,64 @@
defmodule FastSanitize.Fragment do
- require Logger
+ import Plug.HTML, only: [html_escape: 1]
def to_tree(bin) do
with {:html, _, [{:head, _, _}, {:body, _, fragment}]} <-
Myhtmlex.decode(bin, format: [:html_atoms, :nil_self_closing, :comment_tuple3]) do
{:ok, fragment}
else
e -> {:error, e}
end
end
- defp build_start_tag(tag, attrs) when length(attrs) == 0, do: "<#{tag}>"
-
- defp build_start_tag(tag, attrs) do
- attr_chunks =
- Enum.map(attrs, fn {k, v} ->
- "#{k}=\"#{v}\""
- end)
- |> Enum.join(" ")
-
- "<#{tag} #{attr_chunks}>"
+ defp build_attr_chunks(attrs) do
+ Enum.map(attrs, fn {k, v} ->
+ "#{html_escape(k)}=\"#{html_escape(v)}\""
+ end)
+ |> Enum.join(" ")
end
+ defp build_start_tag(tag, attrs, nil), do: "<#{tag} #{build_attr_chunks(attrs)}/>"
+ defp build_start_tag(tag, attrs, _children) when length(attrs) == 0, do: "<#{tag}>"
+ defp build_start_tag(tag, attrs, _children), do: "<#{tag} #{build_attr_chunks(attrs)}>"
+
# empty tuple - fragment was clobbered, return nothing
defp fragment_to_html({}), do: ""
# text node
- defp fragment_to_html(text) when is_binary(text), do: text
+ defp fragment_to_html(text) when is_binary(text), do: html_escape(text)
# comment node
defp fragment_to_html({:comment, _, text}), do: "<!-- #{text} -->"
# bare subtree
defp fragment_to_html(subtree) when is_list(subtree) do
{:ok, result} = subtree_to_html(subtree)
result
end
# a node which can never accept children will have nil instead of a subtree
- defp fragment_to_html({tag, attrs, nil}), do: build_start_tag(tag, attrs)
+ defp fragment_to_html({tag, attrs, nil}), do: build_start_tag(tag, attrs, nil)
# every other case, assume a subtree
defp fragment_to_html({tag, attrs, subtree}) do
- with start_tag <- build_start_tag(tag, attrs),
+ with start_tag <- build_start_tag(tag, attrs, subtree),
end_tag <- "</#{tag}>",
{:ok, subtree} <- subtree_to_html(subtree) do
[start_tag, subtree, end_tag]
|> Enum.join("")
end
end
defp subtree_to_html([]), do: {:ok, ""}
defp subtree_to_html(tree) do
rendered =
Enum.reject(tree, &is_nil/1)
|> Enum.map(&fragment_to_html/1)
|> Enum.join("")
{:ok, rendered}
end
def to_html(tree), do: subtree_to_html(tree)
end
diff --git a/lib/fast_sanitize/sanitizer/basic_html.ex b/lib/fast_sanitize/sanitizer/basic_html.ex
index 155885d..546cf02 100644
--- a/lib/fast_sanitize/sanitizer/basic_html.ex
+++ b/lib/fast_sanitize/sanitizer/basic_html.ex
@@ -1,53 +1,51 @@
defmodule FastSanitize.Sanitizer.BasicHTML do
require FastSanitize.Sanitizer.Meta
alias FastSanitize.Sanitizer.Meta
@valid_schemes ["http", "https", "mailto"]
Meta.strip_comments()
Meta.allow_tag_with_uri_attributes(:a, ["href"], @valid_schemes)
Meta.allow_tag_with_these_attributes(:a, ["name", "title"])
Meta.allow_tag_with_these_attributes(:b, [])
Meta.allow_tag_with_these_attributes(:blockquote, [])
Meta.allow_tag_with_these_attributes(:br, [])
Meta.allow_tag_with_these_attributes(:code, [])
Meta.allow_tag_with_these_attributes(:del, [])
Meta.allow_tag_with_these_attributes(:em, [])
Meta.allow_tag_with_these_attributes(:h1, [])
Meta.allow_tag_with_these_attributes(:h2, [])
Meta.allow_tag_with_these_attributes(:h3, [])
Meta.allow_tag_with_these_attributes(:h4, [])
Meta.allow_tag_with_these_attributes(:h5, [])
Meta.allow_tag_with_these_attributes(:hr, [])
Meta.allow_tag_with_these_attributes(:i, [])
Meta.allow_tag_with_uri_attributes(:img, ["src"], @valid_schemes)
Meta.allow_tag_with_these_attributes(:img, [
"width",
"height",
"title",
"alt"
])
Meta.allow_tag_with_these_attributes(:li, [])
Meta.allow_tag_with_these_attributes(:ol, [])
Meta.allow_tag_with_these_attributes(:p, [])
Meta.allow_tag_with_these_attributes(:pre, [])
Meta.allow_tag_with_these_attributes(:span, [])
Meta.allow_tag_with_these_attributes(:strong, [])
Meta.allow_tag_with_these_attributes(:table, [])
Meta.allow_tag_with_these_attributes(:tbody, [])
Meta.allow_tag_with_these_attributes(:td, [])
Meta.allow_tag_with_these_attributes(:th, [])
Meta.allow_tag_with_these_attributes(:thead, [])
Meta.allow_tag_with_these_attributes(:tr, [])
Meta.allow_tag_with_these_attributes(:u, [])
Meta.allow_tag_with_these_attributes(:ul, [])
- Meta.strip_children_of(:script)
-
Meta.strip_everything_not_covered()
end
diff --git a/lib/fast_sanitize/sanitizer/meta.ex b/lib/fast_sanitize/sanitizer/meta.ex
index 09699b3..200c1cd 100644
--- a/lib/fast_sanitize/sanitizer/meta.ex
+++ b/lib/fast_sanitize/sanitizer/meta.ex
@@ -1,213 +1,213 @@
# Based on HtmlSanitizeEx.Scrubber.Meta
# Copyright (c) 2015-2019 René Föhring (@rrrene)
defmodule FastSanitize.Sanitizer.Meta do
@moduledoc """
This module contains some meta-programming magic to define your own rules
for scrubbers.
The StripTags scrubber is a good starting point:
defmodule FastSanitize.Sanitizer.StripTags do
require FastSanitize.Sanitizer.Meta
alias FastSanitize.Sanitizer.Meta
Meta.strip_comments
Meta.strip_everything_not_covered
end
You can use the `allow_tag_with_uri_attributes/3` and
`allow_tag_with_these_attributes/2` macros to define what is allowed:
defmodule FastSanitize.Sanitizer.StripTags do
require FastSanitize.Sanitizer.Meta
alias FastSanitize.Sanitizer.Meta
Meta.strip_comments
Meta.allow_tag_with_uri_attributes "img", ["src"], ["http", "https"]
Meta.allow_tag_with_these_attributes "img", ["width", "height"]
Meta.strip_everything_not_covered
end
You can stack these if convenient:
Meta.allow_tag_with_uri_attributes "img", ["src"], ["http", "https"]
Meta.allow_tag_with_these_attributes "img", ["width", "height"]
Meta.allow_tag_with_these_attributes "img", ["title", "alt"]
"""
@doc """
Allow these tags and use the regular `scrub_attribute/2` function to scrub
the attributes.
"""
defmacro allow_tags_and_scrub_their_attributes(list) do
Enum.map(list, fn tag_name ->
allow_this_tag_and_scrub_its_attributes(tag_name)
end)
end
@doc """
Allow the given +list+ of attributes for the specified +tag+.
Meta.allow_tag_with_these_attributes "a", ["name", "title"]
Meta.allow_tag_with_these_attributes "img", ["title", "alt"]
"""
defmacro allow_tag_with_these_attributes(tag_name, list \\ []) do
list
|> Enum.map(fn attr_name ->
allow_this_tag_with_this_attribute(tag_name, attr_name)
end)
|> Enum.concat([allow_this_tag_and_scrub_its_attributes(tag_name)])
end
@doc """
Allow the given list of +values+ for the given +attribute+ on the
specified +tag+.
Meta.allow_tag_with_this_attribute_values "a", "target", ["_blank"]
"""
defmacro allow_tag_with_this_attribute_values(tag_name, attribute, values) do
quote do
def scrub_attribute(unquote(tag_name), {unquote(attribute), value})
when value in unquote(values) do
{unquote(attribute), value}
end
end
end
@doc """
Allow the given +list+ of attributes to contain URI information for the
specified +tag+.
# Only allow SSL-enabled and mailto links
Meta.allow_tag_with_uri_attributes "a", ["href"], ["https", "mailto"]
# Only allow none-SSL images
Meta.allow_tag_with_uri_attributes "img", ["src"], ["http"]
"""
defmacro allow_tag_with_uri_attributes(tag, list, valid_schemes) do
list
|> Enum.map(fn name ->
allow_tag_with_uri_attribute(tag, name, valid_schemes)
end)
end
@doc """
"""
defmacro allow_tags_with_style_attributes(list) do
list
|> Enum.map(fn tag_name -> allow_this_tag_with_style_attribute(tag_name) end)
end
@doc """
Strips all comments.
"""
defmacro strip_comments do
quote do
- def scrub({:comment, _, _}), do: ""
+ def scrub({:comment, _, _}), do: nil
end
end
@doc """
Ensures any tags/attributes not explicitly whitelisted until this
statement are stripped.
"""
defmacro strip_everything_not_covered do
quote do
# If we haven't covered the attribute until here, we just scrap it.
def scrub_attribute(_tag, _attribute), do: nil
# If we haven't covered the attribute until here, we just scrap it.
def scrub({_tag, _attributes, children}), do: children
# Text is left alone
def scrub("" <> text), do: text
end
end
@doc """
Ensures any tags/attributes that are explicitly disallowed have
their children dropped.
"""
defmacro strip_children_of(tag_name) do
quote do
def scrub({unquote(tag_name), _attributes, _children}), do: nil
end
end
defp allow_this_tag_and_scrub_its_attributes(tag_name) do
quote do
def scrub({unquote(tag_name), attributes, children}) do
{unquote(tag_name), scrub_attributes(unquote(tag_name), attributes), children}
end
defp scrub_attributes(unquote(tag_name), attributes) do
Enum.map(attributes, fn attr ->
scrub_attribute(unquote(tag_name), attr)
end)
|> Enum.reject(&is_nil(&1))
end
end
end
defp allow_this_tag_with_this_attribute(tag_name, attr_name) do
quote do
def scrub_attribute(unquote(tag_name), {unquote(attr_name), value}) do
{unquote(attr_name), value}
end
end
end
defp allow_this_tag_with_style_attribute(tag_name) do
quote do
def scrub_attribute(unquote(tag_name), {"style", value}) do
{"style", scrub_css(value)}
end
end
end
defp allow_tag_with_uri_attribute(tag_name, attr_name, valid_schemes) do
quote do
def scrub_attribute(unquote(tag_name), {unquote(attr_name), "&" <> value}) do
nil
end
@protocol_separator ":|(&#0*58)|(&#x70)|(&#x0*3a)|(%|&#37;)3A"
@protocol_separator_regex Regex.compile!(@protocol_separator, "mi")
@http_like_scheme "(?<scheme>.+?)(#{@protocol_separator})//"
@other_schemes "(?<other_schemes>mailto)(#{@protocol_separator})"
@scheme_capture Regex.compile!(
"(#{@http_like_scheme})|(#{@other_schemes})",
"mi"
)
def scrub_attribute(unquote(tag_name), {unquote(attr_name), uri}) do
valid_schema =
if uri =~ @protocol_separator_regex do
case Regex.named_captures(@scheme_capture, uri) do
%{"scheme" => scheme, "other_schemes" => ""} ->
scheme in unquote(valid_schemes)
%{"other_schemes" => scheme, "scheme" => ""} ->
scheme in unquote(valid_schemes)
_ ->
false
end
else
true
end
if valid_schema, do: {unquote(attr_name), uri}
end
end
end
end
diff --git a/mix.exs b/mix.exs
index 19c101d..aa3c690 100644
--- a/mix.exs
+++ b/mix.exs
@@ -1,30 +1,31 @@
defmodule FastSanitize.MixProject do
use Mix.Project
def project do
[
app: :fast_sanitize,
version: "0.1.0",
elixir: "~> 1.7",
start_permanent: Mix.env() == :prod,
deps: deps()
]
end
# Run "mix help compile.app" to learn about applications.
def application do
[
extra_applications: [:logger]
]
end
# Run "mix help deps" to learn about dependencies.
defp deps do
[
+ {:plug, "~> 1.8"},
{:myhtmlex, "~> 0.2"},
{:credo, "~> 1.0.0", only: [:dev, :test], runtime: false},
{:ex_doc, "~> 0.19", only: :dev, runtime: false},
{:dialyxir, "~> 1.0.0-rc.5", only: [:dev], runtime: false}
]
end
end
diff --git a/mix.lock b/mix.lock
index 01ca85e..b2cd240 100644
--- a/mix.lock
+++ b/mix.lock
@@ -1,4 +1,17 @@
%{
+ "bunt": {:hex, :bunt, "0.2.0", "951c6e801e8b1d2cbe58ebbd3e616a869061ddadcc4863d0a2182541acae9a38", [:mix], [], "hexpm"},
+ "credo": {:hex, :credo, "1.0.5", "fdea745579f8845315fe6a3b43e2f9f8866839cfbc8562bb72778e9fdaa94214", [:mix], [{:bunt, "~> 0.2.0", [hex: :bunt, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}], "hexpm"},
+ "dialyxir": {:hex, :dialyxir, "1.0.0-rc.6", "78e97d9c0ff1b5521dd68041193891aebebce52fc3b93463c0a6806874557d7d", [:mix], [{:erlex, "~> 0.2.1", [hex: :erlex, repo: "hexpm", optional: false]}], "hexpm"},
+ "earmark": {:hex, :earmark, "1.3.2", "b840562ea3d67795ffbb5bd88940b1bed0ed9fa32834915125ea7d02e35888a5", [:mix], [], "hexpm"},
+ "erlex": {:hex, :erlex, "0.2.1", "cee02918660807cbba9a7229cae9b42d1c6143b768c781fa6cee1eaf03ad860b", [:mix], [], "hexpm"},
+ "ex_doc": {:hex, :ex_doc, "0.20.2", "1bd0dfb0304bade58beb77f20f21ee3558cc3c753743ae0ddbb0fd7ba2912331", [:mix], [{:earmark, "~> 1.3", [hex: :earmark, repo: "hexpm", optional: false]}, {:makeup_elixir, "~> 0.10", [hex: :makeup_elixir, repo: "hexpm", optional: false]}], "hexpm"},
+ "jason": {:hex, :jason, "1.1.2", "b03dedea67a99223a2eaf9f1264ce37154564de899fd3d8b9a21b1a6fd64afe7", [:mix], [{:decimal, "~> 1.0", [hex: :decimal, repo: "hexpm", optional: true]}], "hexpm"},
+ "makeup": {:hex, :makeup, "0.8.0", "9cf32aea71c7fe0a4b2e9246c2c4978f9070257e5c9ce6d4a28ec450a839b55f", [:mix], [{:nimble_parsec, "~> 0.5.0", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm"},
+ "makeup_elixir": {:hex, :makeup_elixir, "0.13.0", "be7a477997dcac2e48a9d695ec730b2d22418292675c75aa2d34ba0909dcdeda", [:mix], [{:makeup, "~> 0.8", [hex: :makeup, repo: "hexpm", optional: false]}], "hexpm"},
+ "mime": {:hex, :mime, "1.3.1", "30ce04ab3175b6ad0bdce0035cba77bba68b813d523d1aac73d9781b4d193cf8", [:mix], [], "hexpm"},
"myhtmlex": {:hex, :myhtmlex, "0.2.1", "d6f3eb1826f7cdaa0225a996569da0930d1a334405510845c905ae59295ab226", [:make, :mix], [{:nodex, "~> 0.1.1", [hex: :nodex, repo: "hexpm", optional: false]}], "hexpm"},
+ "nimble_parsec": {:hex, :nimble_parsec, "0.5.0", "90e2eca3d0266e5c53f8fbe0079694740b9c91b6747f2b7e3c5d21966bba8300", [:mix], [], "hexpm"},
"nodex": {:hex, :nodex, "0.1.1", "ed2f7bbe19ea62a43ad4b7ad332eb3f9ca12c64a35a5802a0eb545b93ebe32af", [:mix], [], "hexpm"},
+ "plug": {:hex, :plug, "1.8.0", "9d2685cb007fe5e28ed9ac27af2815bc262b7817a00929ac10f56f169f43b977", [:mix], [{:mime, "~> 1.0", [hex: :mime, repo: "hexpm", optional: false]}, {:plug_crypto, "~> 1.0", [hex: :plug_crypto, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4", [hex: :telemetry, repo: "hexpm", optional: true]}], "hexpm"},
+ "plug_crypto": {:hex, :plug_crypto, "1.0.0", "18e49317d3fa343f24620ed22795ec29d4a5e602d52d1513ccea0b07d8ea7d4d", [:mix], [], "hexpm"},
}
diff --git a/test/basic_html_test.exs b/test/basic_html_test.exs
new file mode 100644
index 0000000..c1460f0
--- /dev/null
+++ b/test/basic_html_test.exs
@@ -0,0 +1,412 @@
+defmodule FastSanitize.Sanitizer.BasicHTMLTest do
+ use ExUnit.Case
+
+ defp basic_html_sanitize(text) do
+ {:ok, text} = FastSanitize.basic_html(text)
+ text
+ end
+
+ test "strips nothing" do
+ input = "This <b>is</b> <b>an</b> <i>example</i> of <u>space</u> eating."
+ expected = "This <b>is</b> <b>an</b> <i>example</i> of <u>space</u> eating."
+ assert expected == basic_html_sanitize(input)
+ end
+
+ test "does strip language class from code tag" do
+ input = "<code class=\"ruby\">Something.new</code>"
+ expected = "<code>Something.new</code>"
+ assert expected == basic_html_sanitize(input)
+ end
+
+ test "strips everything except the allowed tags" do
+ input = "<h1>hello <script>code!</script></h1>"
+ expected = "<h1>hello code!</h1>"
+ assert expected == basic_html_sanitize(input)
+ end
+
+ test "strips everything except the allowed tags (for multiple tags)" do
+ input =
+ "<section><header><script>code!</script></header><p>hello <script>code!</script></p></section>"
+
+ expected = "code!<p>hello code!</p>"
+ assert expected == basic_html_sanitize(input)
+ end
+
+ test "strips everything for faulty allowed_tags: key" do
+ input = "<h1>hello<h1>"
+ expected = "hello"
+ assert expected != basic_html_sanitize(input)
+ end
+
+ test "strips invalid html" do
+ input = "<<<bad html"
+ expected = "&lt;&lt;"
+ assert expected == basic_html_sanitize(input)
+ end
+
+ test "strips tags with quote" do
+ input = "<\" <img src=\"trollface.gif\" onload=\"alert(1)\"> hi"
+
+ assert "&lt;&quot; <img src=\"trollface.gif\"/> hi" ==
+ basic_html_sanitize(input)
+ end
+
+ test "strips nested tags" do
+ input = "Wei<<a>a onclick='alert(document.cookie);'</a>/>rdos"
+ expected = "Wei&lt;<a>a onclick=&#39;alert(document.cookie);&#39;</a>/&gt;rdos"
+ assert expected == basic_html_sanitize(input)
+ end
+
+ test "strips certain tags in multi line strings" do
+ input =
+ "<title>This is <b>a <a href=\"\" target=\"_blank\">test</a></b>.</title>\n\n<!-- it has a comment -->\n\n<p>It no <b>longer <strong>contains <em>any <strike>HTML</strike></em>.</strong></b></p>\n"
+
+ expected =
+ "This is &lt;b&gt;a &lt;a href=&quot;&quot; target=&quot;_blank&quot;&gt;test&lt;/a&gt;&lt;/b&gt;.\n\n\n\n<p>It no <b>longer <strong>contains <em>any HTML</em>.</strong></b></p>\n"
+
+ assert expected == basic_html_sanitize(input)
+ end
+
+ test "strips blank string" do
+ assert "" == basic_html_sanitize("")
+ assert "" == basic_html_sanitize(nil)
+ end
+
+ test "strips nothing from plain text" do
+ input = "Dont touch me"
+ expected = "Dont touch me"
+ assert expected == basic_html_sanitize(input)
+ end
+
+ test "strips nothing from a sentence" do
+ input = "This is a test."
+ expected = "This is a test."
+ assert expected == basic_html_sanitize(input)
+ end
+
+ test "strips tags with comment" do
+ input = "This has a <!-- comment --> here."
+ expected = "This has a here."
+ assert expected == basic_html_sanitize(input)
+ end
+
+ test "strip_tags escapes special characters" do
+ assert "&amp;", basic_html_sanitize("&")
+ end
+
+ # link sanitizer
+
+ test "test_strip_links_with_tags_in_tags" do
+ input = "<<a>a href='hello'>all <b>day</b> long<</A>/a>"
+ expected = "&lt;<a>a href=&#39;hello&#39;&gt;all <b>day</b> long&lt;</a>/a&gt;"
+ assert expected == basic_html_sanitize(input)
+ end
+
+ test "test_strip_links_with_unclosed_tags" do
+ assert "" == basic_html_sanitize("<a<a")
+ end
+
+ test "test_strip_links_with_plaintext" do
+ assert "Dont touch me" == basic_html_sanitize("Dont touch me")
+ end
+
+ @tag href_scrubbing: true
+ test "test_strip_links_with_line_feed_and_uppercase_tag" do
+ input = "<a href='almost'>on my mind</a> <A href='almost'>all day long</A>"
+
+ assert "<a href=\"almost\">on my mind</a> <a href=\"almost\">all day long</a>" ==
+ basic_html_sanitize(input)
+ end
+
+ @tag href_scrubbing: true
+ test "test_strip_links_leaves_nonlink_tags" do
+ assert "<a href=\"almost\">My mind</a>\n<a href=\"almost\">all <b>day</b> long</a>" ==
+ basic_html_sanitize(
+ "<a href='almost'>My mind</a>\n<A href='almost'>all <b>day</b> long</A>"
+ )
+ end
+
+ @tag href_scrubbing: true
+ test "strips tags with basic_html_sanitize/1" do
+ input =
+ "<p>This <u>is</u> a <a href='test.html'><strong>test</strong></a>.</p>"
+
+ assert "<p>This <u>is</u> a <a href=\"test.html\"><strong>test</strong></a>.</p>" ==
+ basic_html_sanitize(input)
+ end
+
+ @a_href_hacks [
+ "<a href=\"javascript:alert('XSS');\">text here</a>",
+ "<a href=javascript:alert('XSS')>text here</a>",
+ "<a href=JaVaScRiPt:alert('XSS')>text here</a>",
+ "<a href=javascript:alert(&quot;XSS&quot;)>text here</a>",
+ "<a href=javascript:alert(String.fromCharCode(88,83,83))>text here</a>",
+ "<a href=&#106;&#97;&#118;&#97;&#115;&#99;&#114;&#105;&#112;&#116;&#58;&#97;&#108;&#101;&#114;&#116;&#40;&#39;&#88;&#83;&#83;&#39;&#41;>text here</a>",
+ "<a href=&#0000106&#0000097&#0000118&#0000097&#0000115&#0000099&#0000114&#0000105&#0000112&#0000116&#0000058&#0000097&#0000108&#0000101&#0000114&#0000116&#0000040&#0000039&#0000088&#0000083&#0000083&#0000039&#0000041>text here</a>",
+ "<a href=&#x6A&#x61&#x76&#x61&#x73&#x63&#x72&#x69&#x70&#x74&#x3A&#x61&#x6C&#x65&#x72&#x74&#x28&#x27&#x58&#x53&#x53&#x27&#x29>text here</a>",
+ "<a href=\"jav\tascript:alert('XSS');\">text here</a>",
+ "<a href=\"jav&#x09;ascript:alert('XSS');\">text here</a>",
+ "<a href=\"jav&#x0A;ascript:alert('XSS');\">text here</a>",
+ "<a href=\"jav&#x0D;ascript:alert('XSS');\">text here</a>",
+ "<a href=\" &#14; javascript:alert('XSS');\">text here</a>",
+ "<a href=\"javascript&#x3a;alert('XSS');\">text here</a>",
+ "<a href=`javascript:alert(\"RSnake says, 'XSS'\")`>text here</a>",
+ "<a href=\"javascript&#x3a;alert('XSS');\">text here</a>",
+ "<a href=\"javascript&#x003a;alert('XSS');\">text here</a>",
+ "<a href=\"javascript&#x3A;alert('XSS');\">text here</a>",
+ "<a href=\"javascript&#x003A;alert('XSS');\">text here</a>",
+ "<a href=\"&#106;&#97;&#118;&#97;&#115;&#99;&#114;&#105;&#112;&#116;&#58;&#97;&#108;&#101;&#114;&#116;&#40;&#39;&#88;&#83;&#83;&#39;&#41;\">text here</a>",
+ "<a href=\"JAVASCRIPT:alert(\'foo\')\">text here</a>",
+ "<a href=\"java<!-- -->script:alert(\'foo\')\">text here</a>",
+ "<a href=\"awesome.html#this:stuff\">text here</a>",
+ "<a href=\"java\0&#14;\t\r\n script:alert(\'foo\')\">text here</a>",
+ "<a href=\"java&#0000001script:alert(\'foo\')\">text here</a>",
+ "<a href=\"java&#0000000script:alert(\'foo\')\">text here</a>"
+ ]
+
+ @tag href_scrubbing: true
+ test "strips malicious protocol hacks from a href attribute" do
+ expected = "<a>text here</a>"
+
+ Enum.each(@a_href_hacks, fn x ->
+ assert expected == basic_html_sanitize(x)
+ end)
+ end
+
+ @tag href_scrubbing: true
+ test "does not strip x03a legitimate" do
+ assert "<a href=\"http://legit\"></a>" ==
+ basic_html_sanitize("<a href=\"http&#x3a;//legit\">")
+
+ assert "<a href=\"http://legit\"></a>" ==
+ basic_html_sanitize("<a href=\"http&#x3A;//legit\">")
+ end
+
+ test "test_strip links with links" do
+ input =
+ "<a href='http://www.elixirstatus.com/'><a href='http://www.elixirstatus.com/' onlclick='steal()'>0wn3d</a></a>"
+
+ assert "<a href=\"http://www.elixirstatus.com/\"></a><a href=\"http://www.elixirstatus.com/\">0wn3d</a>" ==
+ basic_html_sanitize(input)
+ end
+
+ test "test_strip_links_with_linkception" do
+ assert "<a href=\"http://www.elixirstatus.com/\">Mag</a><a href=\"http://www.elixir-lang.org/\">ic</a>" ==
+ basic_html_sanitize(
+ "<a href='http://www.elixirstatus.com/'>Mag<a href='http://www.elixir-lang.org/'>ic"
+ )
+ end
+
+ test "test_strip_links_with_a_tag_in_href" do
+ assert "FrrFox" ==
+ basic_html_sanitize("<href onlclick='steal()'>FrrFox</a></href>")
+ end
+
+ test "normal scrubbing does only allow certain tags and attributes" do
+ input = "<span data-foo=\"bar\">foo</span>"
+ expected = "<span>foo</span>"
+ assert expected == basic_html_sanitize(input)
+ end
+
+ test "strips not allowed attributes" do
+ input =
+ "start <a title=\"1\" onclick=\"foo\">foo <bad>bar</bad> baz</a> end"
+
+ expected = "start <a title=\"1\">foo bar baz</a> end"
+ assert expected == basic_html_sanitize(input)
+ end
+
+ test "sanitize_script" do
+ assert "a b cblah blah blahd e f" ==
+ basic_html_sanitize(
+ "a b c<script language=\"Javascript\">blah blah blah</script>d e f"
+ )
+ end
+
+ @tag href_scrubbing: true
+ test "sanitize_js_handlers" do
+ input =
+ ~s(onthis="do that" <a href="#" onclick="hello" name="foo" onbogus="remove me">hello</a>)
+
+ assert "onthis=&quot;do that&quot; <a href=\"#\" name=\"foo\">hello</a>" ==
+ basic_html_sanitize(input)
+ end
+
+ test "sanitize_javascript_href" do
+ raw =
+ ~s(href="javascript:bang" <a href="javascript:bang" name="hello">foo</a>, <span href="javascript:bang">bar</span>)
+
+ assert ~s(href=&quot;javascript:bang&quot; <a name="hello">foo</a>, <span>bar</span>) ==
+ basic_html_sanitize(raw)
+ end
+
+ test "sanitize_image_src" do
+ raw =
+ ~s(src="javascript:bang" <img src="javascript:bang" width="5">foo</img>, <span src="javascript:bang">bar</span>)
+
+ assert "src=&quot;javascript:bang&quot; <img width=\"5\"/>foo, <span>bar</span>" ==
+ basic_html_sanitize(raw)
+ end
+
+ @tag href_scrubbing: true
+ test "should only allow http/https protocols" do
+ assert "<a href=\"foo\">baz</a>" ==
+ basic_html_sanitize(
+ ~s(<a href="foo" onclick="bar"><script>baz</script></a>)
+ )
+
+ assert "<a href=\"http://example.com\">baz</a>" ==
+ basic_html_sanitize(
+ ~s(<a href="http://example.com" onclick="bar"><script>baz</script></a>)
+ )
+
+ assert "<a href=\"https://example.com\">baz</a>" ==
+ basic_html_sanitize(
+ ~s(<a href="https://example.com" onclick="bar"><script>baz</script></a>)
+ )
+ end
+
+ # test "video_poster_sanitization" do
+ # assert ~s(<video src="videofile.ogg" autoplay poster="posterimage.jpg"></video>) == ~s(<video src="videofile.ogg" poster="posterimage.jpg"></video>)
+ # assert ~s(<video src="videofile.ogg"></video>) == basic_html_sanitize("<video src=\"videofile.ogg\" poster=javascript:alert(1)></video>")
+ # end
+
+ test "strips not allowed tags " do
+ input = "<form><u></u></form>"
+ expected = "<u></u>"
+ assert expected == basic_html_sanitize(input)
+ end
+
+ test "strips not allowed attributes " do
+ input = "<a foo=\"hello\" bar=\"world\"></a>"
+ expected = "<a></a>"
+ assert expected == basic_html_sanitize(input)
+ end
+
+ @image_src_hacks [
+ "<IMG SRC=\"javascript:alert('XSS');\">",
+ "<IMG SRC=javascript:alert('XSS')>",
+ "<IMG SRC=JaVaScRiPt:alert('XSS')>",
+ "<IMG SRC=javascript:alert(&quot;XSS&quot;)>",
+ "<IMG SRC=javascript:alert(String.fromCharCode(88,83,83))>",
+ "<IMG SRC=&#106;&#97;&#118;&#97;&#115;&#99;&#114;&#105;&#112;&#116;&#58;&#97;&#108;&#101;&#114;&#116;&#40;&#39;&#88;&#83;&#83;&#39;&#41;>",
+ "<IMG SRC=&#0000106&#0000097&#0000118&#0000097&#0000115&#0000099&#0000114&#0000105&#0000112&#0000116&#0000058&#0000097&#0000108&#0000101&#0000114&#0000116&#0000040&#0000039&#0000088&#0000083&#0000083&#0000039&#0000041>",
+ "<IMG SRC=&#x6A&#x61&#x76&#x61&#x73&#x63&#x72&#x69&#x70&#x74&#x3A&#x61&#x6C&#x65&#x72&#x74&#x28&#x27&#x58&#x53&#x53&#x27&#x29>",
+ "<IMG SRC=\"jav\tascript:alert('XSS');\">",
+ "<IMG SRC=\"jav&#x09;ascript:alert('XSS');\">",
+ "<IMG SRC=\"jav&#x0A;ascript:alert('XSS');\">",
+ "<IMG SRC=\"jav&#x0D;ascript:alert('XSS');\">",
+ "<IMG SRC=\" &#14; javascript:alert('XSS');\">",
+ "<IMG SRC=\"javascript&#x3a;alert('XSS');\">",
+ "<IMG SRC=`javascript:alert(\"RSnake says, 'XSS'\")`>"
+ ]
+
+ test "strips malicious protocol hacks from img src attribute" do
+ expected = "<img />"
+
+ Enum.each(@image_src_hacks, fn x ->
+ assert expected == basic_html_sanitize(x)
+ end)
+ end
+
+ test "strips script tag" do
+ input = "<SCRIPT\nSRC=http://ha.ckers.org/xss.js></SCRIPT>"
+ expected = ""
+ assert expected == basic_html_sanitize(input)
+ end
+
+ test "strips xss image hack with uppercase tags" do
+ input = "<IMG \"\"\"><SCRIPT>alert(\"XSS\")</SCRIPT>\">"
+ expected = "<img />alert(&quot;XSS&quot;)&quot;&gt;"
+ assert expected == basic_html_sanitize(input)
+ end
+
+ test "should_sanitize_tag_broken_up_by_null" do
+ assert "alert(&quot;XSS&quot;)" ==
+ basic_html_sanitize("<SCR\0IPT>alert(\"XSS\")</SCR\0IPT>")
+ end
+
+ test "should_sanitize_invalid_script_tag" do
+ input = "<SCRIPT/XSS SRC=\"http://ha.ckers.org/xss.js\"></SCRIPT>"
+ assert "" == basic_html_sanitize(input)
+ end
+
+ test "sanitize half open scripts" do
+ input = "<IMG SRC=\"javascript:alert('XSS')\""
+ assert "<img />" == basic_html_sanitize(input)
+ end
+
+ test "should_not_fall_for_ridiculous_hack" do
+ img_hack = """
+ <IMG\nSRC\n=\n"\nj\na\nv\na\ns\nc\nr\ni\np\nt\n:\na\nl\ne\nr\nt\n(\n'\nX\nS\nS\n'\n)\n"\n>)
+ """
+
+ assert "<img />)\n" == basic_html_sanitize(img_hack)
+ end
+
+ test "should_sanitize_within attributes" do
+ input =
+ "<span title=\"&#39;&gt;&lt;script&gt;alert()&lt;/script&gt;\">blah</span>"
+
+ assert "<span>blah</span>" == basic_html_sanitize(input)
+ end
+
+ test "should_sanitize_invalid_tag_names" do
+ end
+
+ test "should_sanitize_non_alpha_and_non_digit_characters_in_tags" do
+ assert "<a>foo</a>" ==
+ basic_html_sanitize(
+ "<a onclick!@#$%^&*='alert(\"XSS\")'>foo</a>"
+ )
+ end
+
+ test "should_sanitize_invalid_tag_names_in_single_tags" do
+ assert "<img />" ==
+ basic_html_sanitize("<img/src=\"javascript:alert('XSS')\"/>")
+ end
+
+ test "should_sanitize_img_dynsrc_lowsrc" do
+ assert "<img />" ==
+ basic_html_sanitize("<img lowsrc=\"javascript:alert('XSS')\" />")
+ end
+
+ test "should_sanitize_img_vbscript" do
+ assert "<img />" ==
+ basic_html_sanitize("<img src='vbscript:msgbox(\"XSS\")' />")
+ end
+
+ test "should_not_mangle_urls_with_ampersand" do
+ input = "<a href=\"http://www.domain.com?var1=1&amp;var2=2\">my link</a>"
+ assert input == basic_html_sanitize(input)
+ end
+
+ test "should_not_crash_on_invalid_schema_formatting" do
+ input =
+ "<a href=\"http//www.domain.com/?encoded_param=param1%3Aparam2\">text here</a>"
+
+ assert "<a>text here</a>" == basic_html_sanitize(input)
+ end
+
+ test "should_not_crash_on_invalid_schema_formatting_2" do
+ input = "<a href=\"ftp://www.domain.com/http%3A//\">text here</a>"
+ assert "<a>text here</a>" == basic_html_sanitize(input)
+ end
+
+ test "should_sanitize_neverending_attribute" do
+ assert "" == basic_html_sanitize("<span class=\"\\")
+ end
+
+ # test "this affects only NS4, but we're on a roll, right?" do
+ # input = "<div size=\"&{alert('XSS')}\">foo</div>"
+ # expected = "<div>foo</div>"
+ # assert expected == basic_html_sanitize(input)
+ # end
+
+ test "does not strip the mailto URI scheme" do
+ input = ~s(<a href="mailto:someone@yoursite.com">Email Us</a>)
+ expected = ~s(<a href="mailto:someone@yoursite.com">Email Us</a>)
+ assert expected == basic_html_sanitize(input)
+ end
+end
+

File Metadata

Mime Type
text/x-diff
Expires
Sat, Nov 30, 4:44 PM (1 d, 20 h)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
41499
Default Alt Text
(31 KB)

Event Timeline