Page MenuHomePhorge

No OneTemporary

Size
11 KB
Referenced Files
None
Subscribers
None
diff --git a/lib/fast_sanitize.ex b/lib/fast_sanitize.ex
index c89e71e..bdbb51c 100644
--- a/lib/fast_sanitize.ex
+++ b/lib/fast_sanitize.ex
@@ -1,17 +1,27 @@
defmodule FastSanitize do
alias FastSanitize.Sanitizer
@moduledoc """
Fast HTML sanitization module.
"""
@doc """
Strip all tags from a given document fragment.
## Example
iex> FastSanitize.strip_tags("<h1>hello world</h1>")
{:ok, "hello world"}
"""
def strip_tags(doc), do: Sanitizer.scrub(doc, FastSanitize.Sanitizer.StripTags)
+
+ @doc """
+ Strip tags from a given document fragment that are not basic HTML.
+
+ ## Example
+
+ iex> FastSanitize.basic_html("<h1>hello world</h1><script>alert('xss')</script>")
+ {:ok, "<h1>hello world</h1>"}
+ """
+ def basic_html(doc), do: Sanitizer.scrub(doc, FastSanitize.Sanitizer.BasicHTML)
end
diff --git a/lib/fast_sanitize/fragment.ex b/lib/fast_sanitize/fragment.ex
index ba14f7b..25a7a39 100644
--- a/lib/fast_sanitize/fragment.ex
+++ b/lib/fast_sanitize/fragment.ex
@@ -1,64 +1,65 @@
defmodule FastSanitize.Fragment do
require Logger
def to_tree(bin) do
with {:html, _, [{:head, _, _}, {:body, _, fragment}]} <-
Myhtmlex.decode(bin, format: [:html_atoms, :nil_self_closing, :comment_tuple3]) do
{:ok, fragment}
else
e -> {:error, e}
end
end
defp build_start_tag(tag, attrs) when length(attrs) == 0, do: "<#{tag}>"
defp build_start_tag(tag, attrs) do
attr_chunks =
Enum.map(attrs, fn {k, v} ->
"#{k}=\"#{v}\""
end)
|> Enum.join(" ")
"<#{tag} #{attr_chunks}>"
end
# empty tuple - fragment was clobbered, return nothing
defp fragment_to_html({}), do: ""
# text node
defp fragment_to_html(text) when is_binary(text), do: text
# comment node
defp fragment_to_html({:comment, _, text}), do: "<!-- #{text} -->"
# bare subtree
defp fragment_to_html(subtree) when is_list(subtree) do
{:ok, result} = subtree_to_html(subtree)
result
end
# a node which can never accept children will have nil instead of a subtree
defp fragment_to_html({tag, attrs, nil}), do: build_start_tag(tag, attrs)
# every other case, assume a subtree
defp fragment_to_html({tag, attrs, subtree}) do
with start_tag <- build_start_tag(tag, attrs),
end_tag <- "</#{tag}>",
{:ok, subtree} <- subtree_to_html(subtree) do
[start_tag, subtree, end_tag]
|> Enum.join("")
end
end
defp subtree_to_html([]), do: {:ok, ""}
defp subtree_to_html(tree) do
rendered =
- Enum.map(tree, &fragment_to_html/1)
+ Enum.reject(tree, &is_nil/1)
+ |> Enum.map(&fragment_to_html/1)
|> Enum.join("")
{:ok, rendered}
end
def to_html(tree), do: subtree_to_html(tree)
end
diff --git a/lib/fast_sanitize/sanitizer/basic_html.ex b/lib/fast_sanitize/sanitizer/basic_html.ex
new file mode 100644
index 0000000..155885d
--- /dev/null
+++ b/lib/fast_sanitize/sanitizer/basic_html.ex
@@ -0,0 +1,53 @@
+defmodule FastSanitize.Sanitizer.BasicHTML do
+ require FastSanitize.Sanitizer.Meta
+ alias FastSanitize.Sanitizer.Meta
+
+ @valid_schemes ["http", "https", "mailto"]
+
+ Meta.strip_comments()
+
+ Meta.allow_tag_with_uri_attributes(:a, ["href"], @valid_schemes)
+ Meta.allow_tag_with_these_attributes(:a, ["name", "title"])
+
+ Meta.allow_tag_with_these_attributes(:b, [])
+ Meta.allow_tag_with_these_attributes(:blockquote, [])
+ Meta.allow_tag_with_these_attributes(:br, [])
+ Meta.allow_tag_with_these_attributes(:code, [])
+ Meta.allow_tag_with_these_attributes(:del, [])
+ Meta.allow_tag_with_these_attributes(:em, [])
+ Meta.allow_tag_with_these_attributes(:h1, [])
+ Meta.allow_tag_with_these_attributes(:h2, [])
+ Meta.allow_tag_with_these_attributes(:h3, [])
+ Meta.allow_tag_with_these_attributes(:h4, [])
+ Meta.allow_tag_with_these_attributes(:h5, [])
+ Meta.allow_tag_with_these_attributes(:hr, [])
+ Meta.allow_tag_with_these_attributes(:i, [])
+
+ Meta.allow_tag_with_uri_attributes(:img, ["src"], @valid_schemes)
+
+ Meta.allow_tag_with_these_attributes(:img, [
+ "width",
+ "height",
+ "title",
+ "alt"
+ ])
+
+ Meta.allow_tag_with_these_attributes(:li, [])
+ Meta.allow_tag_with_these_attributes(:ol, [])
+ Meta.allow_tag_with_these_attributes(:p, [])
+ Meta.allow_tag_with_these_attributes(:pre, [])
+ Meta.allow_tag_with_these_attributes(:span, [])
+ Meta.allow_tag_with_these_attributes(:strong, [])
+ Meta.allow_tag_with_these_attributes(:table, [])
+ Meta.allow_tag_with_these_attributes(:tbody, [])
+ Meta.allow_tag_with_these_attributes(:td, [])
+ Meta.allow_tag_with_these_attributes(:th, [])
+ Meta.allow_tag_with_these_attributes(:thead, [])
+ Meta.allow_tag_with_these_attributes(:tr, [])
+ Meta.allow_tag_with_these_attributes(:u, [])
+ Meta.allow_tag_with_these_attributes(:ul, [])
+
+ Meta.strip_children_of(:script)
+
+ Meta.strip_everything_not_covered()
+end
diff --git a/lib/fast_sanitize/sanitizer/meta.ex b/lib/fast_sanitize/sanitizer/meta.ex
index 6740b53..09699b3 100644
--- a/lib/fast_sanitize/sanitizer/meta.ex
+++ b/lib/fast_sanitize/sanitizer/meta.ex
@@ -1,203 +1,213 @@
# Based on HtmlSanitizeEx.Scrubber.Meta
# Copyright (c) 2015-2019 René Föhring (@rrrene)
defmodule FastSanitize.Sanitizer.Meta do
@moduledoc """
This module contains some meta-programming magic to define your own rules
for scrubbers.
The StripTags scrubber is a good starting point:
defmodule FastSanitize.Sanitizer.StripTags do
require FastSanitize.Sanitizer.Meta
alias FastSanitize.Sanitizer.Meta
Meta.strip_comments
Meta.strip_everything_not_covered
end
You can use the `allow_tag_with_uri_attributes/3` and
`allow_tag_with_these_attributes/2` macros to define what is allowed:
defmodule FastSanitize.Sanitizer.StripTags do
require FastSanitize.Sanitizer.Meta
alias FastSanitize.Sanitizer.Meta
Meta.strip_comments
Meta.allow_tag_with_uri_attributes "img", ["src"], ["http", "https"]
Meta.allow_tag_with_these_attributes "img", ["width", "height"]
Meta.strip_everything_not_covered
end
You can stack these if convenient:
Meta.allow_tag_with_uri_attributes "img", ["src"], ["http", "https"]
Meta.allow_tag_with_these_attributes "img", ["width", "height"]
Meta.allow_tag_with_these_attributes "img", ["title", "alt"]
"""
@doc """
Allow these tags and use the regular `scrub_attribute/2` function to scrub
the attributes.
"""
defmacro allow_tags_and_scrub_their_attributes(list) do
Enum.map(list, fn tag_name ->
allow_this_tag_and_scrub_its_attributes(tag_name)
end)
end
@doc """
Allow the given +list+ of attributes for the specified +tag+.
Meta.allow_tag_with_these_attributes "a", ["name", "title"]
Meta.allow_tag_with_these_attributes "img", ["title", "alt"]
"""
defmacro allow_tag_with_these_attributes(tag_name, list \\ []) do
list
|> Enum.map(fn attr_name ->
allow_this_tag_with_this_attribute(tag_name, attr_name)
end)
|> Enum.concat([allow_this_tag_and_scrub_its_attributes(tag_name)])
end
@doc """
Allow the given list of +values+ for the given +attribute+ on the
specified +tag+.
Meta.allow_tag_with_this_attribute_values "a", "target", ["_blank"]
"""
defmacro allow_tag_with_this_attribute_values(tag_name, attribute, values) do
quote do
def scrub_attribute(unquote(tag_name), {unquote(attribute), value})
when value in unquote(values) do
{unquote(attribute), value}
end
end
end
@doc """
Allow the given +list+ of attributes to contain URI information for the
specified +tag+.
# Only allow SSL-enabled and mailto links
Meta.allow_tag_with_uri_attributes "a", ["href"], ["https", "mailto"]
# Only allow none-SSL images
Meta.allow_tag_with_uri_attributes "img", ["src"], ["http"]
"""
defmacro allow_tag_with_uri_attributes(tag, list, valid_schemes) do
list
|> Enum.map(fn name ->
allow_tag_with_uri_attribute(tag, name, valid_schemes)
end)
end
@doc """
"""
defmacro allow_tags_with_style_attributes(list) do
list
|> Enum.map(fn tag_name -> allow_this_tag_with_style_attribute(tag_name) end)
end
@doc """
Strips all comments.
"""
defmacro strip_comments do
quote do
def scrub({:comment, _, _}), do: ""
end
end
@doc """
Ensures any tags/attributes not explicitly whitelisted until this
statement are stripped.
"""
defmacro strip_everything_not_covered do
quote do
# If we haven't covered the attribute until here, we just scrap it.
def scrub_attribute(_tag, _attribute), do: nil
# If we haven't covered the attribute until here, we just scrap it.
def scrub({_tag, _attributes, children}), do: children
# Text is left alone
def scrub("" <> text), do: text
end
end
+ @doc """
+ Ensures any tags/attributes that are explicitly disallowed have
+ their children dropped.
+ """
+ defmacro strip_children_of(tag_name) do
+ quote do
+ def scrub({unquote(tag_name), _attributes, _children}), do: nil
+ end
+ end
+
defp allow_this_tag_and_scrub_its_attributes(tag_name) do
quote do
def scrub({unquote(tag_name), attributes, children}) do
{unquote(tag_name), scrub_attributes(unquote(tag_name), attributes), children}
end
defp scrub_attributes(unquote(tag_name), attributes) do
Enum.map(attributes, fn attr ->
scrub_attribute(unquote(tag_name), attr)
end)
|> Enum.reject(&is_nil(&1))
end
end
end
defp allow_this_tag_with_this_attribute(tag_name, attr_name) do
quote do
def scrub_attribute(unquote(tag_name), {unquote(attr_name), value}) do
{unquote(attr_name), value}
end
end
end
defp allow_this_tag_with_style_attribute(tag_name) do
quote do
def scrub_attribute(unquote(tag_name), {"style", value}) do
{"style", scrub_css(value)}
end
end
end
defp allow_tag_with_uri_attribute(tag_name, attr_name, valid_schemes) do
quote do
def scrub_attribute(unquote(tag_name), {unquote(attr_name), "&" <> value}) do
nil
end
@protocol_separator ":|(&#0*58)|(&#x70)|(&#x0*3a)|(%|&#37;)3A"
@protocol_separator_regex Regex.compile!(@protocol_separator, "mi")
@http_like_scheme "(?<scheme>.+?)(#{@protocol_separator})//"
@other_schemes "(?<other_schemes>mailto)(#{@protocol_separator})"
@scheme_capture Regex.compile!(
"(#{@http_like_scheme})|(#{@other_schemes})",
"mi"
)
def scrub_attribute(unquote(tag_name), {unquote(attr_name), uri}) do
valid_schema =
if uri =~ @protocol_separator_regex do
case Regex.named_captures(@scheme_capture, uri) do
%{"scheme" => scheme, "other_schemes" => ""} ->
scheme in unquote(valid_schemes)
%{"other_schemes" => scheme, "scheme" => ""} ->
scheme in unquote(valid_schemes)
_ ->
false
end
else
true
end
if valid_schema, do: {unquote(attr_name), uri}
end
end
end
end

File Metadata

Mime Type
text/x-diff
Expires
Sat, Nov 30, 10:50 PM (1 d, 12 h)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
41558
Default Alt Text
(11 KB)

Event Timeline