Page Menu
Home
Phorge
Search
Configure Global Search
Log In
Files
F115986
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Award Token
Flag For Later
Size
12 KB
Referenced Files
None
Subscribers
None
View Options
diff --git a/lib/fast_sanitize/fragment.ex b/lib/fast_sanitize/fragment.ex
index abb9b33..da9eec2 100644
--- a/lib/fast_sanitize/fragment.ex
+++ b/lib/fast_sanitize/fragment.ex
@@ -1,71 +1,74 @@
defmodule FastSanitize.Fragment do
- import Plug.HTML, only: [html_escape: 1, html_escape_to_iodata: 1]
+ import Plug.HTML, only: [html_escape_to_iodata: 1]
def to_tree(bin) do
with {:html, _, [{:head, _, _}, {:body, _, fragment}]} <-
Myhtmlex.decode(bin, format: [:nil_self_closing, :comment_tuple3, :html_atoms]) do
{:ok, fragment}
else
e ->
{:error, e}
end
end
defp build_attr_chunks([]), do: ""
defp build_attr_chunks(attrs) do
List.foldr(attrs, [], fn {k, v}, iodata ->
[[" ", html_escape_to_iodata(k), "=\"", html_escape_to_iodata(v), "\""] | iodata]
end)
end
defp build_start_tag(tag, attrs, nil), do: ["<", to_string(tag), build_attr_chunks(attrs), "/>"]
defp build_start_tag(tag, attrs, _children) when length(attrs) == 0,
do: ["<", to_string(tag), ">"]
defp build_start_tag(tag, attrs, _children),
do: ["<", to_string(tag), build_attr_chunks(attrs), ">"]
# empty tuple - fragment was clobbered, return nothing
- defp fragment_to_html(nil), do: ""
+ defp fragment_to_html(nil, _), do: ""
- defp fragment_to_html({}), do: ""
+ defp fragment_to_html({}, _), do: ""
# text node
- defp fragment_to_html(text) when is_binary(text), do: html_escape_to_iodata(text)
+ defp fragment_to_html(text, _) when is_binary(text), do: html_escape_to_iodata(text)
# comment node
- defp fragment_to_html({:comment, _, text}), do: ["<!-- ", text, " -->"]
+ defp fragment_to_html({:comment, _, text}, _), do: ["<!-- ", text, " -->"]
# bare subtree
- defp fragment_to_html(subtree) when is_list(subtree) do
- {:ok, result} = subtree_to_html(subtree)
- result
+ defp fragment_to_html(subtree, scrubber) when is_list(subtree) do
+ subtree_to_iodata(subtree, scrubber)
end
# a node which can never accept children will have nil instead of a subtree
- defp fragment_to_html({tag, attrs, nil}), do: build_start_tag(tag, attrs, nil)
+ defp fragment_to_html({tag, attrs, nil}, _), do: build_start_tag(tag, attrs, nil)
# every other case, assume a subtree
- defp fragment_to_html({tag, attrs, subtree}) do
+ defp fragment_to_html({tag, attrs, subtree}, scrubber) do
with start_tag <- build_start_tag(tag, attrs, subtree),
end_tag <- ["</", to_string(tag), ">"],
- subtree <- subtree_to_iodata(subtree) do
+ subtree <- subtree_to_iodata(subtree, scrubber) do
[start_tag, subtree, end_tag]
end
end
- defp subtree_to_html([]), do: {:ok, ""}
+ defp subtree_to_html([], _), do: {:ok, ""}
- defp subtree_to_html(tree) do
- iodata = subtree_to_iodata(tree)
+ defp subtree_to_html(tree, scrubber) do
+ iodata = subtree_to_iodata(tree, scrubber)
rendered = :erlang.iolist_to_binary(iodata)
{:ok, rendered}
end
- defp subtree_to_iodata(tree),
- do: List.foldr(tree, [], fn node, iodata -> [fragment_to_html(node) | iodata] end)
+ defp subtree_to_iodata(tree, scrubber) do
+ List.foldr(tree, [], fn node, iodata ->
+ [fragment_to_html(scrubber.scrub(node), scrubber) | iodata]
+ end)
+ end
- def to_html(tree), do: subtree_to_html(tree)
+ def to_html(tree, scrubber \\ FastSanitize.Sanitizer.Dummy),
+ do: subtree_to_html(tree, scrubber)
end
diff --git a/lib/fast_sanitize/sanitizer.ex b/lib/fast_sanitize/sanitizer.ex
index d69946b..c75d72a 100644
--- a/lib/fast_sanitize/sanitizer.ex
+++ b/lib/fast_sanitize/sanitizer.ex
@@ -1,63 +1,38 @@
defmodule FastSanitize.Sanitizer do
require Logger
alias FastSanitize.Fragment
@moduledoc """
Defines the contract that Sanitizer modules must follow.
"""
@doc """
Scrubs a document node.
"""
@callback scrub({atom(), list(), list()}) :: tuple()
@doc """
Scrubs an unknown node.
"""
@callback scrub({binary(), list(), list()}) :: tuple()
@doc """
Scrubs a text node.
"""
@callback scrub(binary()) :: binary()
# fallbacks
def scrub("", _), do: {:ok, ""}
def scrub(nil, _), do: {:ok, ""}
def scrub(doc, scrubber) when is_binary(doc) do
with wrapped_doc <- "<body>" <> doc <> "</body>",
{:ok, subtree} <- Fragment.to_tree(wrapped_doc) do
- scrub(subtree, scrubber)
- |> Fragment.to_html()
+ Fragment.to_html(subtree, scrubber)
else
e ->
{:error, e}
end
end
-
- def scrub(subtree, scrubber) when is_list(subtree) do
- Logger.debug("Pre-process: #{inspect(subtree)}")
-
- Enum.map(subtree, fn fragment ->
- case scrubber.scrub(fragment) do
- {_tag, _attrs, nil} = fragment ->
- Logger.debug("Post-process closure: #{inspect(fragment)}")
- fragment
-
- {tag, attrs, children} ->
- Logger.debug("Post-process tag: #{inspect({tag, attrs, children})}")
- {tag, attrs, scrub(children, scrubber)}
-
- subtree when is_list(subtree) ->
- Logger.debug("Post-process subtree: #{inspect(subtree)}")
- scrub(subtree, scrubber)
-
- other ->
- Logger.debug("Post-process other: #{inspect(other)}")
- other
- end
- end)
- end
end
diff --git a/lib/fast_sanitize/sanitizer/dummy.ex b/lib/fast_sanitize/sanitizer/dummy.ex
new file mode 100644
index 0000000..3baaa31
--- /dev/null
+++ b/lib/fast_sanitize/sanitizer/dummy.ex
@@ -0,0 +1,3 @@
+defmodule FastSanitize.Sanitizer.Dummy do
+ def scrub(x), do: x
+end
diff --git a/lib/fast_sanitize/sanitizer/meta.ex b/lib/fast_sanitize/sanitizer/meta.ex
index 200c1cd..b41555e 100644
--- a/lib/fast_sanitize/sanitizer/meta.ex
+++ b/lib/fast_sanitize/sanitizer/meta.ex
@@ -1,213 +1,187 @@
# Based on HtmlSanitizeEx.Scrubber.Meta
# Copyright (c) 2015-2019 René Föhring (@rrrene)
defmodule FastSanitize.Sanitizer.Meta do
@moduledoc """
This module contains some meta-programming magic to define your own rules
for scrubbers.
The StripTags scrubber is a good starting point:
defmodule FastSanitize.Sanitizer.StripTags do
require FastSanitize.Sanitizer.Meta
alias FastSanitize.Sanitizer.Meta
Meta.strip_comments
Meta.strip_everything_not_covered
end
You can use the `allow_tag_with_uri_attributes/3` and
`allow_tag_with_these_attributes/2` macros to define what is allowed:
defmodule FastSanitize.Sanitizer.StripTags do
require FastSanitize.Sanitizer.Meta
alias FastSanitize.Sanitizer.Meta
Meta.strip_comments
Meta.allow_tag_with_uri_attributes "img", ["src"], ["http", "https"]
Meta.allow_tag_with_these_attributes "img", ["width", "height"]
Meta.strip_everything_not_covered
end
You can stack these if convenient:
Meta.allow_tag_with_uri_attributes "img", ["src"], ["http", "https"]
Meta.allow_tag_with_these_attributes "img", ["width", "height"]
Meta.allow_tag_with_these_attributes "img", ["title", "alt"]
"""
@doc """
Allow these tags and use the regular `scrub_attribute/2` function to scrub
the attributes.
"""
defmacro allow_tags_and_scrub_their_attributes(list) do
Enum.map(list, fn tag_name ->
allow_this_tag_and_scrub_its_attributes(tag_name)
end)
end
@doc """
Allow the given +list+ of attributes for the specified +tag+.
Meta.allow_tag_with_these_attributes "a", ["name", "title"]
Meta.allow_tag_with_these_attributes "img", ["title", "alt"]
"""
defmacro allow_tag_with_these_attributes(tag_name, list \\ []) do
list
|> Enum.map(fn attr_name ->
allow_this_tag_with_this_attribute(tag_name, attr_name)
end)
|> Enum.concat([allow_this_tag_and_scrub_its_attributes(tag_name)])
end
@doc """
Allow the given list of +values+ for the given +attribute+ on the
specified +tag+.
Meta.allow_tag_with_this_attribute_values "a", "target", ["_blank"]
"""
defmacro allow_tag_with_this_attribute_values(tag_name, attribute, values) do
quote do
def scrub_attribute(unquote(tag_name), {unquote(attribute), value})
when value in unquote(values) do
{unquote(attribute), value}
end
end
end
@doc """
Allow the given +list+ of attributes to contain URI information for the
specified +tag+.
# Only allow SSL-enabled and mailto links
Meta.allow_tag_with_uri_attributes "a", ["href"], ["https", "mailto"]
# Only allow none-SSL images
Meta.allow_tag_with_uri_attributes "img", ["src"], ["http"]
"""
defmacro allow_tag_with_uri_attributes(tag, list, valid_schemes) do
list
|> Enum.map(fn name ->
allow_tag_with_uri_attribute(tag, name, valid_schemes)
end)
end
@doc """
"""
defmacro allow_tags_with_style_attributes(list) do
list
|> Enum.map(fn tag_name -> allow_this_tag_with_style_attribute(tag_name) end)
end
@doc """
Strips all comments.
"""
defmacro strip_comments do
quote do
def scrub({:comment, _, _}), do: nil
end
end
@doc """
Ensures any tags/attributes not explicitly whitelisted until this
statement are stripped.
"""
defmacro strip_everything_not_covered do
quote do
# If we haven't covered the attribute until here, we just scrap it.
def scrub_attribute(_tag, _attribute), do: nil
# If we haven't covered the attribute until here, we just scrap it.
def scrub({_tag, _attributes, children}), do: children
# Text is left alone
def scrub("" <> text), do: text
end
end
@doc """
Ensures any tags/attributes that are explicitly disallowed have
their children dropped.
"""
defmacro strip_children_of(tag_name) do
quote do
def scrub({unquote(tag_name), _attributes, _children}), do: nil
end
end
defp allow_this_tag_and_scrub_its_attributes(tag_name) do
quote do
def scrub({unquote(tag_name), attributes, children}) do
{unquote(tag_name), scrub_attributes(unquote(tag_name), attributes), children}
end
defp scrub_attributes(unquote(tag_name), attributes) do
Enum.map(attributes, fn attr ->
scrub_attribute(unquote(tag_name), attr)
end)
|> Enum.reject(&is_nil(&1))
end
end
end
defp allow_this_tag_with_this_attribute(tag_name, attr_name) do
quote do
def scrub_attribute(unquote(tag_name), {unquote(attr_name), value}) do
{unquote(attr_name), value}
end
end
end
defp allow_this_tag_with_style_attribute(tag_name) do
quote do
def scrub_attribute(unquote(tag_name), {"style", value}) do
{"style", scrub_css(value)}
end
end
end
defp allow_tag_with_uri_attribute(tag_name, attr_name, valid_schemes) do
quote do
def scrub_attribute(unquote(tag_name), {unquote(attr_name), "&" <> value}) do
nil
end
- @protocol_separator ":|(�*58)|(p)|(�*3a)|(%|%)3A"
- @protocol_separator_regex Regex.compile!(@protocol_separator, "mi")
-
- @http_like_scheme "(?<scheme>.+?)(#{@protocol_separator})//"
- @other_schemes "(?<other_schemes>mailto)(#{@protocol_separator})"
-
- @scheme_capture Regex.compile!(
- "(#{@http_like_scheme})|(#{@other_schemes})",
- "mi"
- )
-
- def scrub_attribute(unquote(tag_name), {unquote(attr_name), uri}) do
- valid_schema =
- if uri =~ @protocol_separator_regex do
- case Regex.named_captures(@scheme_capture, uri) do
- %{"scheme" => scheme, "other_schemes" => ""} ->
- scheme in unquote(valid_schemes)
-
- %{"other_schemes" => scheme, "scheme" => ""} ->
- scheme in unquote(valid_schemes)
-
- _ ->
- false
- end
- else
- true
- end
-
- if valid_schema, do: {unquote(attr_name), uri}
+ def scrub_attribute(unquote(tag_name), {unquote(attr_name), uri} = attr) do
+ uri = URI.parse(uri)
+ if uri.scheme == nil or uri.scheme in unquote(valid_schemes), do: attr
end
end
end
end
File Metadata
Details
Attached
Mime Type
text/x-diff
Expires
Fri, Nov 29, 9:45 PM (1 d, 19 h)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
41312
Default Alt Text
(12 KB)
Attached To
Mode
R15 fast_sanitize
Attached
Detach File
Event Timeline
Log In to Comment