Page Menu
Home
Phorge
Search
Configure Global Search
Log In
Files
F116259
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Award Token
Flag For Later
Size
11 KB
Referenced Files
None
Subscribers
None
View Options
diff --git a/lib/fast_sanitize.ex b/lib/fast_sanitize.ex
index c89e71e..bdbb51c 100644
--- a/lib/fast_sanitize.ex
+++ b/lib/fast_sanitize.ex
@@ -1,17 +1,27 @@
defmodule FastSanitize do
alias FastSanitize.Sanitizer
@moduledoc """
Fast HTML sanitization module.
"""
@doc """
Strip all tags from a given document fragment.
## Example
iex> FastSanitize.strip_tags("<h1>hello world</h1>")
{:ok, "hello world"}
"""
def strip_tags(doc), do: Sanitizer.scrub(doc, FastSanitize.Sanitizer.StripTags)
+
+ @doc """
+ Strip tags from a given document fragment that are not basic HTML.
+
+ ## Example
+
+ iex> FastSanitize.basic_html("<h1>hello world</h1><script>alert('xss')</script>")
+ {:ok, "<h1>hello world</h1>"}
+ """
+ def basic_html(doc), do: Sanitizer.scrub(doc, FastSanitize.Sanitizer.BasicHTML)
end
diff --git a/lib/fast_sanitize/fragment.ex b/lib/fast_sanitize/fragment.ex
index ba14f7b..25a7a39 100644
--- a/lib/fast_sanitize/fragment.ex
+++ b/lib/fast_sanitize/fragment.ex
@@ -1,64 +1,65 @@
defmodule FastSanitize.Fragment do
require Logger
def to_tree(bin) do
with {:html, _, [{:head, _, _}, {:body, _, fragment}]} <-
Myhtmlex.decode(bin, format: [:html_atoms, :nil_self_closing, :comment_tuple3]) do
{:ok, fragment}
else
e -> {:error, e}
end
end
defp build_start_tag(tag, attrs) when length(attrs) == 0, do: "<#{tag}>"
defp build_start_tag(tag, attrs) do
attr_chunks =
Enum.map(attrs, fn {k, v} ->
"#{k}=\"#{v}\""
end)
|> Enum.join(" ")
"<#{tag} #{attr_chunks}>"
end
# empty tuple - fragment was clobbered, return nothing
defp fragment_to_html({}), do: ""
# text node
defp fragment_to_html(text) when is_binary(text), do: text
# comment node
defp fragment_to_html({:comment, _, text}), do: "<!-- #{text} -->"
# bare subtree
defp fragment_to_html(subtree) when is_list(subtree) do
{:ok, result} = subtree_to_html(subtree)
result
end
# a node which can never accept children will have nil instead of a subtree
defp fragment_to_html({tag, attrs, nil}), do: build_start_tag(tag, attrs)
# every other case, assume a subtree
defp fragment_to_html({tag, attrs, subtree}) do
with start_tag <- build_start_tag(tag, attrs),
end_tag <- "</#{tag}>",
{:ok, subtree} <- subtree_to_html(subtree) do
[start_tag, subtree, end_tag]
|> Enum.join("")
end
end
defp subtree_to_html([]), do: {:ok, ""}
defp subtree_to_html(tree) do
rendered =
- Enum.map(tree, &fragment_to_html/1)
+ Enum.reject(tree, &is_nil/1)
+ |> Enum.map(&fragment_to_html/1)
|> Enum.join("")
{:ok, rendered}
end
def to_html(tree), do: subtree_to_html(tree)
end
diff --git a/lib/fast_sanitize/sanitizer/basic_html.ex b/lib/fast_sanitize/sanitizer/basic_html.ex
new file mode 100644
index 0000000..155885d
--- /dev/null
+++ b/lib/fast_sanitize/sanitizer/basic_html.ex
@@ -0,0 +1,53 @@
+defmodule FastSanitize.Sanitizer.BasicHTML do
+ require FastSanitize.Sanitizer.Meta
+ alias FastSanitize.Sanitizer.Meta
+
+ @valid_schemes ["http", "https", "mailto"]
+
+ Meta.strip_comments()
+
+ Meta.allow_tag_with_uri_attributes(:a, ["href"], @valid_schemes)
+ Meta.allow_tag_with_these_attributes(:a, ["name", "title"])
+
+ Meta.allow_tag_with_these_attributes(:b, [])
+ Meta.allow_tag_with_these_attributes(:blockquote, [])
+ Meta.allow_tag_with_these_attributes(:br, [])
+ Meta.allow_tag_with_these_attributes(:code, [])
+ Meta.allow_tag_with_these_attributes(:del, [])
+ Meta.allow_tag_with_these_attributes(:em, [])
+ Meta.allow_tag_with_these_attributes(:h1, [])
+ Meta.allow_tag_with_these_attributes(:h2, [])
+ Meta.allow_tag_with_these_attributes(:h3, [])
+ Meta.allow_tag_with_these_attributes(:h4, [])
+ Meta.allow_tag_with_these_attributes(:h5, [])
+ Meta.allow_tag_with_these_attributes(:hr, [])
+ Meta.allow_tag_with_these_attributes(:i, [])
+
+ Meta.allow_tag_with_uri_attributes(:img, ["src"], @valid_schemes)
+
+ Meta.allow_tag_with_these_attributes(:img, [
+ "width",
+ "height",
+ "title",
+ "alt"
+ ])
+
+ Meta.allow_tag_with_these_attributes(:li, [])
+ Meta.allow_tag_with_these_attributes(:ol, [])
+ Meta.allow_tag_with_these_attributes(:p, [])
+ Meta.allow_tag_with_these_attributes(:pre, [])
+ Meta.allow_tag_with_these_attributes(:span, [])
+ Meta.allow_tag_with_these_attributes(:strong, [])
+ Meta.allow_tag_with_these_attributes(:table, [])
+ Meta.allow_tag_with_these_attributes(:tbody, [])
+ Meta.allow_tag_with_these_attributes(:td, [])
+ Meta.allow_tag_with_these_attributes(:th, [])
+ Meta.allow_tag_with_these_attributes(:thead, [])
+ Meta.allow_tag_with_these_attributes(:tr, [])
+ Meta.allow_tag_with_these_attributes(:u, [])
+ Meta.allow_tag_with_these_attributes(:ul, [])
+
+ Meta.strip_children_of(:script)
+
+ Meta.strip_everything_not_covered()
+end
diff --git a/lib/fast_sanitize/sanitizer/meta.ex b/lib/fast_sanitize/sanitizer/meta.ex
index 6740b53..09699b3 100644
--- a/lib/fast_sanitize/sanitizer/meta.ex
+++ b/lib/fast_sanitize/sanitizer/meta.ex
@@ -1,203 +1,213 @@
# Based on HtmlSanitizeEx.Scrubber.Meta
# Copyright (c) 2015-2019 René Föhring (@rrrene)
defmodule FastSanitize.Sanitizer.Meta do
@moduledoc """
This module contains some meta-programming magic to define your own rules
for scrubbers.
The StripTags scrubber is a good starting point:
defmodule FastSanitize.Sanitizer.StripTags do
require FastSanitize.Sanitizer.Meta
alias FastSanitize.Sanitizer.Meta
Meta.strip_comments
Meta.strip_everything_not_covered
end
You can use the `allow_tag_with_uri_attributes/3` and
`allow_tag_with_these_attributes/2` macros to define what is allowed:
defmodule FastSanitize.Sanitizer.StripTags do
require FastSanitize.Sanitizer.Meta
alias FastSanitize.Sanitizer.Meta
Meta.strip_comments
Meta.allow_tag_with_uri_attributes "img", ["src"], ["http", "https"]
Meta.allow_tag_with_these_attributes "img", ["width", "height"]
Meta.strip_everything_not_covered
end
You can stack these if convenient:
Meta.allow_tag_with_uri_attributes "img", ["src"], ["http", "https"]
Meta.allow_tag_with_these_attributes "img", ["width", "height"]
Meta.allow_tag_with_these_attributes "img", ["title", "alt"]
"""
@doc """
Allow these tags and use the regular `scrub_attribute/2` function to scrub
the attributes.
"""
defmacro allow_tags_and_scrub_their_attributes(list) do
Enum.map(list, fn tag_name ->
allow_this_tag_and_scrub_its_attributes(tag_name)
end)
end
@doc """
Allow the given +list+ of attributes for the specified +tag+.
Meta.allow_tag_with_these_attributes "a", ["name", "title"]
Meta.allow_tag_with_these_attributes "img", ["title", "alt"]
"""
defmacro allow_tag_with_these_attributes(tag_name, list \\ []) do
list
|> Enum.map(fn attr_name ->
allow_this_tag_with_this_attribute(tag_name, attr_name)
end)
|> Enum.concat([allow_this_tag_and_scrub_its_attributes(tag_name)])
end
@doc """
Allow the given list of +values+ for the given +attribute+ on the
specified +tag+.
Meta.allow_tag_with_this_attribute_values "a", "target", ["_blank"]
"""
defmacro allow_tag_with_this_attribute_values(tag_name, attribute, values) do
quote do
def scrub_attribute(unquote(tag_name), {unquote(attribute), value})
when value in unquote(values) do
{unquote(attribute), value}
end
end
end
@doc """
Allow the given +list+ of attributes to contain URI information for the
specified +tag+.
# Only allow SSL-enabled and mailto links
Meta.allow_tag_with_uri_attributes "a", ["href"], ["https", "mailto"]
# Only allow none-SSL images
Meta.allow_tag_with_uri_attributes "img", ["src"], ["http"]
"""
defmacro allow_tag_with_uri_attributes(tag, list, valid_schemes) do
list
|> Enum.map(fn name ->
allow_tag_with_uri_attribute(tag, name, valid_schemes)
end)
end
@doc """
"""
defmacro allow_tags_with_style_attributes(list) do
list
|> Enum.map(fn tag_name -> allow_this_tag_with_style_attribute(tag_name) end)
end
@doc """
Strips all comments.
"""
defmacro strip_comments do
quote do
def scrub({:comment, _, _}), do: ""
end
end
@doc """
Ensures any tags/attributes not explicitly whitelisted until this
statement are stripped.
"""
defmacro strip_everything_not_covered do
quote do
# If we haven't covered the attribute until here, we just scrap it.
def scrub_attribute(_tag, _attribute), do: nil
# If we haven't covered the attribute until here, we just scrap it.
def scrub({_tag, _attributes, children}), do: children
# Text is left alone
def scrub("" <> text), do: text
end
end
+ @doc """
+ Ensures any tags/attributes that are explicitly disallowed have
+ their children dropped.
+ """
+ defmacro strip_children_of(tag_name) do
+ quote do
+ def scrub({unquote(tag_name), _attributes, _children}), do: nil
+ end
+ end
+
defp allow_this_tag_and_scrub_its_attributes(tag_name) do
quote do
def scrub({unquote(tag_name), attributes, children}) do
{unquote(tag_name), scrub_attributes(unquote(tag_name), attributes), children}
end
defp scrub_attributes(unquote(tag_name), attributes) do
Enum.map(attributes, fn attr ->
scrub_attribute(unquote(tag_name), attr)
end)
|> Enum.reject(&is_nil(&1))
end
end
end
defp allow_this_tag_with_this_attribute(tag_name, attr_name) do
quote do
def scrub_attribute(unquote(tag_name), {unquote(attr_name), value}) do
{unquote(attr_name), value}
end
end
end
defp allow_this_tag_with_style_attribute(tag_name) do
quote do
def scrub_attribute(unquote(tag_name), {"style", value}) do
{"style", scrub_css(value)}
end
end
end
defp allow_tag_with_uri_attribute(tag_name, attr_name, valid_schemes) do
quote do
def scrub_attribute(unquote(tag_name), {unquote(attr_name), "&" <> value}) do
nil
end
@protocol_separator ":|(�*58)|(p)|(�*3a)|(%|%)3A"
@protocol_separator_regex Regex.compile!(@protocol_separator, "mi")
@http_like_scheme "(?<scheme>.+?)(#{@protocol_separator})//"
@other_schemes "(?<other_schemes>mailto)(#{@protocol_separator})"
@scheme_capture Regex.compile!(
"(#{@http_like_scheme})|(#{@other_schemes})",
"mi"
)
def scrub_attribute(unquote(tag_name), {unquote(attr_name), uri}) do
valid_schema =
if uri =~ @protocol_separator_regex do
case Regex.named_captures(@scheme_capture, uri) do
%{"scheme" => scheme, "other_schemes" => ""} ->
scheme in unquote(valid_schemes)
%{"other_schemes" => scheme, "scheme" => ""} ->
scheme in unquote(valid_schemes)
_ ->
false
end
else
true
end
if valid_schema, do: {unquote(attr_name), uri}
end
end
end
end
File Metadata
Details
Attached
Mime Type
text/x-diff
Expires
Sat, Nov 30, 10:50 PM (1 d, 12 h)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
41558
Default Alt Text
(11 KB)
Attached To
Mode
R15 fast_sanitize
Attached
Detach File
Event Timeline
Log In to Comment