Page MenuHomePhorge

No OneTemporary

Size
12 KB
Referenced Files
None
Subscribers
None
diff --git a/lib/fast_sanitize/fragment.ex b/lib/fast_sanitize/fragment.ex
index abb9b33..da9eec2 100644
--- a/lib/fast_sanitize/fragment.ex
+++ b/lib/fast_sanitize/fragment.ex
@@ -1,71 +1,74 @@
defmodule FastSanitize.Fragment do
- import Plug.HTML, only: [html_escape: 1, html_escape_to_iodata: 1]
+ import Plug.HTML, only: [html_escape_to_iodata: 1]
def to_tree(bin) do
with {:html, _, [{:head, _, _}, {:body, _, fragment}]} <-
Myhtmlex.decode(bin, format: [:nil_self_closing, :comment_tuple3, :html_atoms]) do
{:ok, fragment}
else
e ->
{:error, e}
end
end
defp build_attr_chunks([]), do: ""
defp build_attr_chunks(attrs) do
List.foldr(attrs, [], fn {k, v}, iodata ->
[[" ", html_escape_to_iodata(k), "=\"", html_escape_to_iodata(v), "\""] | iodata]
end)
end
defp build_start_tag(tag, attrs, nil), do: ["<", to_string(tag), build_attr_chunks(attrs), "/>"]
defp build_start_tag(tag, attrs, _children) when length(attrs) == 0,
do: ["<", to_string(tag), ">"]
defp build_start_tag(tag, attrs, _children),
do: ["<", to_string(tag), build_attr_chunks(attrs), ">"]
# empty tuple - fragment was clobbered, return nothing
- defp fragment_to_html(nil), do: ""
+ defp fragment_to_html(nil, _), do: ""
- defp fragment_to_html({}), do: ""
+ defp fragment_to_html({}, _), do: ""
# text node
- defp fragment_to_html(text) when is_binary(text), do: html_escape_to_iodata(text)
+ defp fragment_to_html(text, _) when is_binary(text), do: html_escape_to_iodata(text)
# comment node
- defp fragment_to_html({:comment, _, text}), do: ["<!-- ", text, " -->"]
+ defp fragment_to_html({:comment, _, text}, _), do: ["<!-- ", text, " -->"]
# bare subtree
- defp fragment_to_html(subtree) when is_list(subtree) do
- {:ok, result} = subtree_to_html(subtree)
- result
+ defp fragment_to_html(subtree, scrubber) when is_list(subtree) do
+ subtree_to_iodata(subtree, scrubber)
end
# a node which can never accept children will have nil instead of a subtree
- defp fragment_to_html({tag, attrs, nil}), do: build_start_tag(tag, attrs, nil)
+ defp fragment_to_html({tag, attrs, nil}, _), do: build_start_tag(tag, attrs, nil)
# every other case, assume a subtree
- defp fragment_to_html({tag, attrs, subtree}) do
+ defp fragment_to_html({tag, attrs, subtree}, scrubber) do
with start_tag <- build_start_tag(tag, attrs, subtree),
end_tag <- ["</", to_string(tag), ">"],
- subtree <- subtree_to_iodata(subtree) do
+ subtree <- subtree_to_iodata(subtree, scrubber) do
[start_tag, subtree, end_tag]
end
end
- defp subtree_to_html([]), do: {:ok, ""}
+ defp subtree_to_html([], _), do: {:ok, ""}
- defp subtree_to_html(tree) do
- iodata = subtree_to_iodata(tree)
+ defp subtree_to_html(tree, scrubber) do
+ iodata = subtree_to_iodata(tree, scrubber)
rendered = :erlang.iolist_to_binary(iodata)
{:ok, rendered}
end
- defp subtree_to_iodata(tree),
- do: List.foldr(tree, [], fn node, iodata -> [fragment_to_html(node) | iodata] end)
+ defp subtree_to_iodata(tree, scrubber) do
+ List.foldr(tree, [], fn node, iodata ->
+ [fragment_to_html(scrubber.scrub(node), scrubber) | iodata]
+ end)
+ end
- def to_html(tree), do: subtree_to_html(tree)
+ def to_html(tree, scrubber \\ FastSanitize.Sanitizer.Dummy),
+ do: subtree_to_html(tree, scrubber)
end
diff --git a/lib/fast_sanitize/sanitizer.ex b/lib/fast_sanitize/sanitizer.ex
index d69946b..c75d72a 100644
--- a/lib/fast_sanitize/sanitizer.ex
+++ b/lib/fast_sanitize/sanitizer.ex
@@ -1,63 +1,38 @@
defmodule FastSanitize.Sanitizer do
require Logger
alias FastSanitize.Fragment
@moduledoc """
Defines the contract that Sanitizer modules must follow.
"""
@doc """
Scrubs a document node.
"""
@callback scrub({atom(), list(), list()}) :: tuple()
@doc """
Scrubs an unknown node.
"""
@callback scrub({binary(), list(), list()}) :: tuple()
@doc """
Scrubs a text node.
"""
@callback scrub(binary()) :: binary()
# fallbacks
def scrub("", _), do: {:ok, ""}
def scrub(nil, _), do: {:ok, ""}
def scrub(doc, scrubber) when is_binary(doc) do
with wrapped_doc <- "<body>" <> doc <> "</body>",
{:ok, subtree} <- Fragment.to_tree(wrapped_doc) do
- scrub(subtree, scrubber)
- |> Fragment.to_html()
+ Fragment.to_html(subtree, scrubber)
else
e ->
{:error, e}
end
end
-
- def scrub(subtree, scrubber) when is_list(subtree) do
- Logger.debug("Pre-process: #{inspect(subtree)}")
-
- Enum.map(subtree, fn fragment ->
- case scrubber.scrub(fragment) do
- {_tag, _attrs, nil} = fragment ->
- Logger.debug("Post-process closure: #{inspect(fragment)}")
- fragment
-
- {tag, attrs, children} ->
- Logger.debug("Post-process tag: #{inspect({tag, attrs, children})}")
- {tag, attrs, scrub(children, scrubber)}
-
- subtree when is_list(subtree) ->
- Logger.debug("Post-process subtree: #{inspect(subtree)}")
- scrub(subtree, scrubber)
-
- other ->
- Logger.debug("Post-process other: #{inspect(other)}")
- other
- end
- end)
- end
end
diff --git a/lib/fast_sanitize/sanitizer/dummy.ex b/lib/fast_sanitize/sanitizer/dummy.ex
new file mode 100644
index 0000000..3baaa31
--- /dev/null
+++ b/lib/fast_sanitize/sanitizer/dummy.ex
@@ -0,0 +1,3 @@
+defmodule FastSanitize.Sanitizer.Dummy do
+ def scrub(x), do: x
+end
diff --git a/lib/fast_sanitize/sanitizer/meta.ex b/lib/fast_sanitize/sanitizer/meta.ex
index 200c1cd..b41555e 100644
--- a/lib/fast_sanitize/sanitizer/meta.ex
+++ b/lib/fast_sanitize/sanitizer/meta.ex
@@ -1,213 +1,187 @@
# Based on HtmlSanitizeEx.Scrubber.Meta
# Copyright (c) 2015-2019 René Föhring (@rrrene)
defmodule FastSanitize.Sanitizer.Meta do
@moduledoc """
This module contains some meta-programming magic to define your own rules
for scrubbers.
The StripTags scrubber is a good starting point:
defmodule FastSanitize.Sanitizer.StripTags do
require FastSanitize.Sanitizer.Meta
alias FastSanitize.Sanitizer.Meta
Meta.strip_comments
Meta.strip_everything_not_covered
end
You can use the `allow_tag_with_uri_attributes/3` and
`allow_tag_with_these_attributes/2` macros to define what is allowed:
defmodule FastSanitize.Sanitizer.StripTags do
require FastSanitize.Sanitizer.Meta
alias FastSanitize.Sanitizer.Meta
Meta.strip_comments
Meta.allow_tag_with_uri_attributes "img", ["src"], ["http", "https"]
Meta.allow_tag_with_these_attributes "img", ["width", "height"]
Meta.strip_everything_not_covered
end
You can stack these if convenient:
Meta.allow_tag_with_uri_attributes "img", ["src"], ["http", "https"]
Meta.allow_tag_with_these_attributes "img", ["width", "height"]
Meta.allow_tag_with_these_attributes "img", ["title", "alt"]
"""
@doc """
Allow these tags and use the regular `scrub_attribute/2` function to scrub
the attributes.
"""
defmacro allow_tags_and_scrub_their_attributes(list) do
Enum.map(list, fn tag_name ->
allow_this_tag_and_scrub_its_attributes(tag_name)
end)
end
@doc """
Allow the given +list+ of attributes for the specified +tag+.
Meta.allow_tag_with_these_attributes "a", ["name", "title"]
Meta.allow_tag_with_these_attributes "img", ["title", "alt"]
"""
defmacro allow_tag_with_these_attributes(tag_name, list \\ []) do
list
|> Enum.map(fn attr_name ->
allow_this_tag_with_this_attribute(tag_name, attr_name)
end)
|> Enum.concat([allow_this_tag_and_scrub_its_attributes(tag_name)])
end
@doc """
Allow the given list of +values+ for the given +attribute+ on the
specified +tag+.
Meta.allow_tag_with_this_attribute_values "a", "target", ["_blank"]
"""
defmacro allow_tag_with_this_attribute_values(tag_name, attribute, values) do
quote do
def scrub_attribute(unquote(tag_name), {unquote(attribute), value})
when value in unquote(values) do
{unquote(attribute), value}
end
end
end
@doc """
Allow the given +list+ of attributes to contain URI information for the
specified +tag+.
# Only allow SSL-enabled and mailto links
Meta.allow_tag_with_uri_attributes "a", ["href"], ["https", "mailto"]
# Only allow none-SSL images
Meta.allow_tag_with_uri_attributes "img", ["src"], ["http"]
"""
defmacro allow_tag_with_uri_attributes(tag, list, valid_schemes) do
list
|> Enum.map(fn name ->
allow_tag_with_uri_attribute(tag, name, valid_schemes)
end)
end
@doc """
"""
defmacro allow_tags_with_style_attributes(list) do
list
|> Enum.map(fn tag_name -> allow_this_tag_with_style_attribute(tag_name) end)
end
@doc """
Strips all comments.
"""
defmacro strip_comments do
quote do
def scrub({:comment, _, _}), do: nil
end
end
@doc """
Ensures any tags/attributes not explicitly whitelisted until this
statement are stripped.
"""
defmacro strip_everything_not_covered do
quote do
# If we haven't covered the attribute until here, we just scrap it.
def scrub_attribute(_tag, _attribute), do: nil
# If we haven't covered the attribute until here, we just scrap it.
def scrub({_tag, _attributes, children}), do: children
# Text is left alone
def scrub("" <> text), do: text
end
end
@doc """
Ensures any tags/attributes that are explicitly disallowed have
their children dropped.
"""
defmacro strip_children_of(tag_name) do
quote do
def scrub({unquote(tag_name), _attributes, _children}), do: nil
end
end
defp allow_this_tag_and_scrub_its_attributes(tag_name) do
quote do
def scrub({unquote(tag_name), attributes, children}) do
{unquote(tag_name), scrub_attributes(unquote(tag_name), attributes), children}
end
defp scrub_attributes(unquote(tag_name), attributes) do
Enum.map(attributes, fn attr ->
scrub_attribute(unquote(tag_name), attr)
end)
|> Enum.reject(&is_nil(&1))
end
end
end
defp allow_this_tag_with_this_attribute(tag_name, attr_name) do
quote do
def scrub_attribute(unquote(tag_name), {unquote(attr_name), value}) do
{unquote(attr_name), value}
end
end
end
defp allow_this_tag_with_style_attribute(tag_name) do
quote do
def scrub_attribute(unquote(tag_name), {"style", value}) do
{"style", scrub_css(value)}
end
end
end
defp allow_tag_with_uri_attribute(tag_name, attr_name, valid_schemes) do
quote do
def scrub_attribute(unquote(tag_name), {unquote(attr_name), "&" <> value}) do
nil
end
- @protocol_separator ":|(&#0*58)|(&#x70)|(&#x0*3a)|(%|&#37;)3A"
- @protocol_separator_regex Regex.compile!(@protocol_separator, "mi")
-
- @http_like_scheme "(?<scheme>.+?)(#{@protocol_separator})//"
- @other_schemes "(?<other_schemes>mailto)(#{@protocol_separator})"
-
- @scheme_capture Regex.compile!(
- "(#{@http_like_scheme})|(#{@other_schemes})",
- "mi"
- )
-
- def scrub_attribute(unquote(tag_name), {unquote(attr_name), uri}) do
- valid_schema =
- if uri =~ @protocol_separator_regex do
- case Regex.named_captures(@scheme_capture, uri) do
- %{"scheme" => scheme, "other_schemes" => ""} ->
- scheme in unquote(valid_schemes)
-
- %{"other_schemes" => scheme, "scheme" => ""} ->
- scheme in unquote(valid_schemes)
-
- _ ->
- false
- end
- else
- true
- end
-
- if valid_schema, do: {unquote(attr_name), uri}
+ def scrub_attribute(unquote(tag_name), {unquote(attr_name), uri} = attr) do
+ uri = URI.parse(uri)
+ if uri.scheme == nil or uri.scheme in unquote(valid_schemes), do: attr
end
end
end
end

File Metadata

Mime Type
text/x-diff
Expires
Fri, Nov 29, 9:45 PM (1 d, 19 h)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
41312
Default Alt Text
(12 KB)

Event Timeline