Page MenuHomePhorge

No OneTemporary

Size
6 KB
Referenced Files
None
Subscribers
None
diff --git a/lib/fast_sanitize/fragment.ex b/lib/fast_sanitize/fragment.ex
index da9eec2..a67e7f5 100644
--- a/lib/fast_sanitize/fragment.ex
+++ b/lib/fast_sanitize/fragment.ex
@@ -1,74 +1,76 @@
defmodule FastSanitize.Fragment do
+ @moduledoc "Processing of HTML fragment trees."
+
import Plug.HTML, only: [html_escape_to_iodata: 1]
def to_tree(bin) do
with {:html, _, [{:head, _, _}, {:body, _, fragment}]} <-
Myhtmlex.decode(bin, format: [:nil_self_closing, :comment_tuple3, :html_atoms]) do
{:ok, fragment}
else
e ->
{:error, e}
end
end
defp build_attr_chunks([]), do: ""
defp build_attr_chunks(attrs) do
List.foldr(attrs, [], fn {k, v}, iodata ->
[[" ", html_escape_to_iodata(k), "=\"", html_escape_to_iodata(v), "\""] | iodata]
end)
end
defp build_start_tag(tag, attrs, nil), do: ["<", to_string(tag), build_attr_chunks(attrs), "/>"]
defp build_start_tag(tag, attrs, _children) when length(attrs) == 0,
do: ["<", to_string(tag), ">"]
defp build_start_tag(tag, attrs, _children),
do: ["<", to_string(tag), build_attr_chunks(attrs), ">"]
# empty tuple - fragment was clobbered, return nothing
defp fragment_to_html(nil, _), do: ""
defp fragment_to_html({}, _), do: ""
# text node
defp fragment_to_html(text, _) when is_binary(text), do: html_escape_to_iodata(text)
# comment node
defp fragment_to_html({:comment, _, text}, _), do: ["<!-- ", text, " -->"]
# bare subtree
defp fragment_to_html(subtree, scrubber) when is_list(subtree) do
subtree_to_iodata(subtree, scrubber)
end
# a node which can never accept children will have nil instead of a subtree
defp fragment_to_html({tag, attrs, nil}, _), do: build_start_tag(tag, attrs, nil)
# every other case, assume a subtree
defp fragment_to_html({tag, attrs, subtree}, scrubber) do
with start_tag <- build_start_tag(tag, attrs, subtree),
end_tag <- ["</", to_string(tag), ">"],
subtree <- subtree_to_iodata(subtree, scrubber) do
[start_tag, subtree, end_tag]
end
end
defp subtree_to_html([], _), do: {:ok, ""}
defp subtree_to_html(tree, scrubber) do
iodata = subtree_to_iodata(tree, scrubber)
rendered = :erlang.iolist_to_binary(iodata)
{:ok, rendered}
end
defp subtree_to_iodata(tree, scrubber) do
List.foldr(tree, [], fn node, iodata ->
[fragment_to_html(scrubber.scrub(node), scrubber) | iodata]
end)
end
def to_html(tree, scrubber \\ FastSanitize.Sanitizer.Dummy),
do: subtree_to_html(tree, scrubber)
end
diff --git a/lib/fast_sanitize/sanitizer/basic_html.ex b/lib/fast_sanitize/sanitizer/basic_html.ex
index 546cf02..2e79a8b 100644
--- a/lib/fast_sanitize/sanitizer/basic_html.ex
+++ b/lib/fast_sanitize/sanitizer/basic_html.ex
@@ -1,51 +1,53 @@
defmodule FastSanitize.Sanitizer.BasicHTML do
+ @moduledoc "The default sanitizer policy."
+
require FastSanitize.Sanitizer.Meta
alias FastSanitize.Sanitizer.Meta
@valid_schemes ["http", "https", "mailto"]
Meta.strip_comments()
Meta.allow_tag_with_uri_attributes(:a, ["href"], @valid_schemes)
Meta.allow_tag_with_these_attributes(:a, ["name", "title"])
Meta.allow_tag_with_these_attributes(:b, [])
Meta.allow_tag_with_these_attributes(:blockquote, [])
Meta.allow_tag_with_these_attributes(:br, [])
Meta.allow_tag_with_these_attributes(:code, [])
Meta.allow_tag_with_these_attributes(:del, [])
Meta.allow_tag_with_these_attributes(:em, [])
Meta.allow_tag_with_these_attributes(:h1, [])
Meta.allow_tag_with_these_attributes(:h2, [])
Meta.allow_tag_with_these_attributes(:h3, [])
Meta.allow_tag_with_these_attributes(:h4, [])
Meta.allow_tag_with_these_attributes(:h5, [])
Meta.allow_tag_with_these_attributes(:hr, [])
Meta.allow_tag_with_these_attributes(:i, [])
Meta.allow_tag_with_uri_attributes(:img, ["src"], @valid_schemes)
Meta.allow_tag_with_these_attributes(:img, [
"width",
"height",
"title",
"alt"
])
Meta.allow_tag_with_these_attributes(:li, [])
Meta.allow_tag_with_these_attributes(:ol, [])
Meta.allow_tag_with_these_attributes(:p, [])
Meta.allow_tag_with_these_attributes(:pre, [])
Meta.allow_tag_with_these_attributes(:span, [])
Meta.allow_tag_with_these_attributes(:strong, [])
Meta.allow_tag_with_these_attributes(:table, [])
Meta.allow_tag_with_these_attributes(:tbody, [])
Meta.allow_tag_with_these_attributes(:td, [])
Meta.allow_tag_with_these_attributes(:th, [])
Meta.allow_tag_with_these_attributes(:thead, [])
Meta.allow_tag_with_these_attributes(:tr, [])
Meta.allow_tag_with_these_attributes(:u, [])
Meta.allow_tag_with_these_attributes(:ul, [])
Meta.strip_everything_not_covered()
end
diff --git a/lib/fast_sanitize/sanitizer/dummy.ex b/lib/fast_sanitize/sanitizer/dummy.ex
index 3baaa31..d471403 100644
--- a/lib/fast_sanitize/sanitizer/dummy.ex
+++ b/lib/fast_sanitize/sanitizer/dummy.ex
@@ -1,3 +1,5 @@
defmodule FastSanitize.Sanitizer.Dummy do
+ @moduledoc "A sanitizer policy which does nothing."
+
def scrub(x), do: x
end
diff --git a/lib/fast_sanitize/sanitizer/strip_tags.ex b/lib/fast_sanitize/sanitizer/strip_tags.ex
index 2815af1..d20b243 100644
--- a/lib/fast_sanitize/sanitizer/strip_tags.ex
+++ b/lib/fast_sanitize/sanitizer/strip_tags.ex
@@ -1,7 +1,9 @@
defmodule FastSanitize.Sanitizer.StripTags do
+ @moduledoc "A sanitizer policy which strips all tags."
+
require FastSanitize.Sanitizer.Meta
alias FastSanitize.Sanitizer.Meta
Meta.strip_comments()
Meta.strip_everything_not_covered()
end
diff --git a/lib/mix/tasks/fast_sanitize/bench.ex b/lib/mix/tasks/fast_sanitize/bench.ex
index f0f0358..3527a7b 100644
--- a/lib/mix/tasks/fast_sanitize/bench.ex
+++ b/lib/mix/tasks/fast_sanitize/bench.ex
@@ -1,24 +1,26 @@
defmodule Mix.Tasks.FastSanitize.Bench do
+ @moduledoc "Benchmarking task."
+
use Mix.Task
@input_dir "lib/mix/tasks/fast_sanitize/html"
def run(_) do
inputs =
Enum.reduce(File.ls!(@input_dir), %{}, fn input_name, acc ->
IO.inspect(input_name)
input = File.read!(Path.join(@input_dir, input_name))
Map.put(acc, input_name, input)
end)
Benchee.run(
%{
"FastSanitize strip tags" => fn input -> FastSanitize.strip_tags(input) end,
"HtmlSanitizeex strip tags" => fn input -> HtmlSanitizeEx.strip_tags(input) end,
"FastSanitize basic html" => fn input -> FastSanitize.basic_html(input) end,
"HtmlSanitizeex basic html" => fn input -> HtmlSanitizeEx.basic_html(input) end
},
inputs: inputs
)
end
end

File Metadata

Mime Type
text/x-diff
Expires
Thu, Nov 28, 6:35 PM (1 d, 17 h)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
41084
Default Alt Text
(6 KB)

Event Timeline