Page Menu
Home
Phorge
Search
Configure Global Search
Log In
Files
F116460
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Award Token
Flag For Later
Size
21 KB
Referenced Files
None
Subscribers
None
View Options
diff --git a/lib/auto_linker/parser.ex b/lib/auto_linker/parser.ex
index c3ecc3c..739173f 100644
--- a/lib/auto_linker/parser.ex
+++ b/lib/auto_linker/parser.ex
@@ -1,453 +1,435 @@
defmodule AutoLinker.Parser do
@moduledoc """
Module to handle parsing the the input string.
"""
alias AutoLinker.Builder
@invalid_url ~r/(\.\.+)|(^(\d+\.){1,2}\d+$)/
@match_url ~r{^[\w\.-]+(?:\.[\w\.-]+)+[\w\-\._~%:/?#[\]@!\$&'\(\)\*\+,;=.]+$}
@match_scheme ~r{^(?:\W*)?(?<url>(?:https?:\/\/)?[\w.-]+(?:\.[\w\.-]+)+[\w\-\._~%:\/?#[\]@!\$&'\(\)\*\+,;=.]+$)}u
@match_phone ~r"((?:x\d{2,7})|(?:(?:\+?1\s?(?:[.-]\s?)?)?(?:\(\s?(?:[2-9]1[02-9]|[2-9][02-8]1|[2-9][02-8][02-9])\s?\)|(?:[2-9]1[02-9]|[2-9][02-8]1|[2-9][02-8][02-9]))\s?(?:[.-]\s?)?)(?:[2-9]1[02-9]|[2-9][02-9]1|[2-9][02-9]{2})\s?(?:[.-]\s?)?(?:[0-9]{4}))"
@match_hostname ~r{^(?:\W*https?:\/\/)?(?:[^@\n]+\\w@)?(?<host>[^:#~\/\n?]+)}u
@match_ip ~r"^(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])$"
# @user
# @user@example.com
@match_mention ~r"^@[a-zA-Z\d_-]+@[a-zA-Z0-9_-](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*|@[a-zA-Z\d_-]+"u
# https://www.w3.org/TR/html5/forms.html#valid-e-mail-address
@match_email ~r"^[a-zA-Z0-9.!#$%&'*+\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$"u
@match_hashtag ~r/^(?<tag>\#[[:word:]_]*[[:alpha:]_·][[:word:]_·\p{M}]*)/u
@prefix_extra [
"magnet:?",
"dweb://",
"dat://",
"gopher://",
"ipfs://",
"ipns://",
"irc://",
"ircs://",
"irc6://",
"mumble://",
"ssb://"
]
@tlds "./priv/tlds.txt" |> File.read!() |> String.split("\n", trim: true) |> MapSet.new()
@default_opts ~w(url)a
@doc """
Parse the given string, identifying items to link.
Parses the string, replacing the matching urls and phone numbers with an html link.
## Examples
iex> AutoLinker.Parser.parse("Check out google.com")
~s{Check out <a href="http://google.com" class="auto-linker" target="_blank" rel="noopener noreferrer">google.com</a>}
iex> AutoLinker.Parser.parse("call me at x9999", phone: true)
~s{call me at <a href="#" class="phone-number" data-phone="9999">x9999</a>}
iex> AutoLinker.Parser.parse("or at home on 555.555.5555", phone: true)
~s{or at home on <a href="#" class="phone-number" data-phone="5555555555">555.555.5555</a>}
iex> AutoLinker.Parser.parse(", work (555) 555-5555", phone: true)
~s{, work <a href="#" class="phone-number" data-phone="5555555555">(555) 555-5555</a>}
"""
def parse(input, opts \\ %{})
def parse(input, opts) when is_binary(input), do: {input, nil} |> parse(opts) |> elem(0)
def parse(input, list) when is_list(list), do: parse(input, Enum.into(list, %{}))
def parse(input, opts) do
config =
:auto_linker
|> Application.get_env(:opts, [])
|> Enum.into(%{})
|> Map.put(
:attributes,
Application.get_env(:auto_linker, :attributes, [])
)
opts =
Enum.reduce(@default_opts, opts, fn opt, acc ->
if is_nil(opts[opt]) and is_nil(config[opt]) do
Map.put(acc, opt, true)
else
acc
end
end)
do_parse(input, Map.merge(config, opts))
end
defp do_parse(input, %{phone: false} = opts), do: do_parse(input, Map.delete(opts, :phone))
defp do_parse(input, %{url: false} = opts), do: do_parse(input, Map.delete(opts, :url))
defp do_parse(input, %{phone: _} = opts) do
input
|> do_parse(opts, {"", "", :parsing}, &check_and_link_phone/3)
|> do_parse(Map.delete(opts, :phone))
end
defp do_parse(input, %{hashtag: true} = opts) do
input
|> do_parse(opts, {"", "", :parsing}, &check_and_link_hashtag/3)
|> do_parse(Map.delete(opts, :hashtag))
end
defp do_parse(input, %{extra: true} = opts) do
input
|> do_parse(opts, {"", "", :parsing}, &check_and_link_extra/3)
|> do_parse(Map.delete(opts, :extra))
end
defp do_parse({text, user_acc}, %{markdown: true} = opts) do
text
|> Builder.create_markdown_links(opts)
|> (&{&1, user_acc}).()
|> do_parse(Map.delete(opts, :markdown))
end
defp do_parse(input, %{email: true} = opts) do
input
|> do_parse(opts, {"", "", :parsing}, &check_and_link_email/3)
|> do_parse(Map.delete(opts, :email))
end
defp do_parse({text, user_acc}, %{url: _} = opts) do
input =
with exclude <- Map.get(opts, :exclude_patterns),
true <- is_list(exclude),
true <- String.starts_with?(text, exclude) do
{text, user_acc}
else
_ ->
do_parse(
{text, user_acc},
opts,
{"", "", :parsing},
&check_and_link/3
)
end
do_parse(input, Map.delete(opts, :url))
end
defp do_parse(input, %{mention: true} = opts) do
input
|> do_parse(opts, {"", "", :parsing}, &check_and_link_mention/3)
|> do_parse(Map.delete(opts, :mention))
end
defp do_parse(input, _), do: input
defp do_parse({"", user_acc}, _opts, {"", acc, _}, _handler),
do: {acc, user_acc}
defp do_parse({"<a" <> text, user_acc}, opts, {buffer, acc, :parsing}, handler),
do: do_parse({text, user_acc}, opts, {"", acc <> buffer <> "<a", :skip}, handler)
defp do_parse({"<pre" <> text, user_acc}, opts, {buffer, acc, :parsing}, handler),
do: do_parse({text, user_acc}, opts, {"", acc <> buffer <> "<pre", :skip}, handler)
defp do_parse({"<code" <> text, user_acc}, opts, {buffer, acc, :parsing}, handler),
do: do_parse({text, user_acc}, opts, {"", acc <> buffer <> "<code", :skip}, handler)
defp do_parse({"</a>" <> text, user_acc}, opts, {buffer, acc, :skip}, handler),
do: do_parse({text, user_acc}, opts, {"", acc <> buffer <> "</a>", :parsing}, handler)
defp do_parse({"</pre>" <> text, user_acc}, opts, {buffer, acc, :skip}, handler),
do: do_parse({text, user_acc}, opts, {"", acc <> buffer <> "</pre>", :parsing}, handler)
defp do_parse({"</code>" <> text, user_acc}, opts, {buffer, acc, :skip}, handler),
do: do_parse({text, user_acc}, opts, {"", acc <> buffer <> "</code>", :parsing}, handler)
defp do_parse({"<" <> text, user_acc}, opts, {"", acc, :parsing}, handler),
do: do_parse({text, user_acc}, opts, {"<", acc, {:open, 1}}, handler)
defp do_parse({"<" <> text, user_acc}, opts, {"", acc, {:html, level}}, handler) do
do_parse({text, user_acc}, opts, {"<", acc, {:open, level + 1}}, handler)
end
defp do_parse({">" <> text, user_acc}, opts, {buffer, acc, {:attrs, level}}, handler),
do:
do_parse(
{text, user_acc},
opts,
{"", acc <> buffer <> ">", {:html, level}},
handler
)
defp do_parse({<<ch::8>> <> text, user_acc}, opts, {"", acc, {:attrs, level}}, handler) do
do_parse({text, user_acc}, opts, {"", acc <> <<ch::8>>, {:attrs, level}}, handler)
end
defp do_parse({"</" <> text, user_acc}, opts, {buffer, acc, {:html, level}}, handler) do
{buffer, user_acc} = run_handler(handler, buffer, opts, user_acc)
do_parse(
{text, user_acc},
opts,
{"", acc <> buffer <> "</", {:close, level}},
handler
)
end
defp do_parse({">" <> text, user_acc}, opts, {buffer, acc, {:close, 1}}, handler),
do: do_parse({text, user_acc}, opts, {"", acc <> buffer <> ">", :parsing}, handler)
defp do_parse({">" <> text, user_acc}, opts, {buffer, acc, {:close, level}}, handler),
do:
do_parse(
{text, user_acc},
opts,
{"", acc <> buffer <> ">", {:html, level - 1}},
handler
)
defp do_parse({text, user_acc}, opts, {buffer, acc, {:open, level}}, handler) do
do_parse({text, user_acc}, opts, {"", acc <> buffer, {:attrs, level}}, handler)
end
# default cases where state is not important
defp do_parse(
{" " <> text, user_acc},
%{phone: _} = opts,
{buffer, acc, state},
handler
),
do: do_parse({text, user_acc}, opts, {buffer <> " ", acc, state}, handler)
defp do_parse(
{<<char::bytes-size(1), text::binary>>, user_acc},
opts,
{buffer, acc, state},
handler
)
when char in [" ", "\r", "\n"] do
{buffer, user_acc} = run_handler(handler, buffer, opts, user_acc)
do_parse(
{text, user_acc},
opts,
{"", acc <> buffer <> char, state},
handler
)
end
defp do_parse({<<ch::8>>, user_acc}, opts, {buffer, acc, state}, handler) do
{buffer, user_acc} = run_handler(handler, buffer <> <<ch::8>>, opts, user_acc)
do_parse(
{"", user_acc},
opts,
{"", acc <> buffer, state},
handler
)
end
defp do_parse({<<ch::8>> <> text, user_acc}, opts, {buffer, acc, state}, handler),
do: do_parse({text, user_acc}, opts, {buffer <> <<ch::8>>, acc, state}, handler)
- def check_and_link(buffer, %{scheme: true} = opts, _user_acc) do
- if is_url?(buffer, opts[:scheme]) do
- case Regex.run(@match_scheme, buffer, capture: [:url]) do
- [^buffer] -> link_url(true, buffer, opts)
- [url] -> String.replace(buffer, url, link_url(true, url, opts))
+ def check_and_link(buffer, opts, _user_acc) do
+ str = strip_parens(buffer)
+
+ if url?(str, opts[:scheme]) do
+ case parse_link(str, opts) do
+ ^buffer -> link_url(buffer, opts)
+ url -> String.replace(buffer, url, link_url(url, opts))
end
else
buffer
end
end
- def check_and_link(buffer, opts, _user_acc) do
- buffer
- |> is_url?(opts[:scheme])
- |> link_url(buffer, opts)
+ defp parse_link(str, %{scheme: true}) do
+ @match_scheme |> Regex.run(str, capture: [:url]) |> hd()
end
+ defp parse_link(str, _), do: str
+
+ defp strip_parens("(" <> buffer) do
+ ~r/[^\)]*/ |> Regex.run(buffer) |> hd()
+ end
+
+ defp strip_parens(buffer), do: buffer
+
def check_and_link_email(buffer, opts, _user_acc) do
- buffer
- |> is_email?
- |> link_email(buffer, opts)
+ if email?(buffer), do: link_email(buffer, opts), else: buffer
end
def check_and_link_phone(buffer, opts, _user_acc) do
buffer
|> match_phone
|> link_phone(buffer, opts)
end
def check_and_link_mention(buffer, opts, user_acc) do
buffer
|> match_mention
|> link_mention(buffer, opts, user_acc)
end
def check_and_link_hashtag(buffer, opts, user_acc) do
buffer
|> match_hashtag
|> link_hashtag(buffer, opts, user_acc)
end
def check_and_link_extra("xmpp:" <> handle, opts, _user_acc) do
- handle
- |> is_email?
- |> link_extra("xmpp:" <> handle, opts)
+ if email?(handle), do: link_extra("xmpp:" <> handle, opts), else: handle
end
def check_and_link_extra(buffer, opts, _user_acc) do
- buffer
- |> String.starts_with?(@prefix_extra)
- |> link_extra(buffer, opts)
+ if String.starts_with?(buffer, @prefix_extra), do: link_extra(buffer, opts), else: buffer
end
# @doc false
- def is_url?(buffer, true) do
- if Regex.match?(@invalid_url, buffer) do
- false
- else
- @match_scheme |> Regex.match?(buffer) |> is_valid_tld?(buffer)
- end
+ def url?(buffer, true) do
+ valid_url?(buffer) && Regex.match?(@match_scheme, buffer) && valid_tld?(buffer)
end
- def is_url?(buffer, _) do
- if Regex.match?(@invalid_url, buffer) do
- false
- else
- @match_url |> Regex.match?(buffer) |> is_valid_tld?(buffer)
- end
+ def url?(buffer, _) do
+ valid_url?(buffer) && Regex.match?(@match_url, buffer) && valid_tld?(buffer)
end
- def is_email?(buffer) do
- if Regex.match?(@invalid_url, buffer) do
- false
- else
- @match_email |> Regex.match?(buffer) |> is_valid_tld?(buffer)
- end
+ def email?(buffer) do
+ valid_url?(buffer) && Regex.match?(@match_email, buffer) && valid_tld?(buffer)
end
- def is_valid_tld?(true, buffer) do
+ defp valid_url?(url), do: !Regex.match?(@invalid_url, url)
+
+ def valid_tld?(buffer) do
with [host] <- Regex.run(@match_hostname, buffer, capture: [:host]) do
- if is_ip?(host) do
+ if ip?(host) do
true
else
tld = host |> String.split(".") |> List.last()
MapSet.member?(@tlds, tld)
end
else
_ -> false
end
end
- def is_valid_tld?(false, _), do: false
-
- def is_ip?(buffer) do
- Regex.match?(@match_ip, buffer)
- end
+ def ip?(buffer), do: Regex.match?(@match_ip, buffer)
@doc false
def match_phone(buffer) do
case Regex.scan(@match_phone, buffer) do
[] -> nil
other -> other
end
end
def match_mention(buffer) do
case Regex.run(@match_mention, buffer) do
[mention] -> mention
_ -> nil
end
end
def match_hashtag(buffer) do
case Regex.run(@match_hashtag, buffer, capture: [:tag]) do
[hashtag] -> hashtag
_ -> nil
end
end
def link_hashtag(nil, buffer, _, _user_acc), do: buffer
def link_hashtag(hashtag, buffer, %{hashtag_handler: hashtag_handler} = opts, user_acc) do
hashtag
|> hashtag_handler.(buffer, opts, user_acc)
|> maybe_update_buffer(hashtag, buffer)
end
def link_hashtag(hashtag, buffer, opts, _user_acc) do
hashtag
|> Builder.create_hashtag_link(buffer, opts)
|> maybe_update_buffer(hashtag, buffer)
end
def link_mention(nil, buffer, _, user_acc), do: {buffer, user_acc}
def link_mention(mention, buffer, %{mention_handler: mention_handler} = opts, user_acc) do
mention
|> mention_handler.(buffer, opts, user_acc)
|> maybe_update_buffer(mention, buffer)
end
def link_mention(mention, buffer, opts, _user_acc) do
mention
|> Builder.create_mention_link(buffer, opts)
|> maybe_update_buffer(mention, buffer)
end
defp maybe_update_buffer(out, match, buffer) when is_binary(out) do
maybe_update_buffer({out, nil}, match, buffer)
end
defp maybe_update_buffer({out, user_acc}, match, buffer)
when match != buffer and out != buffer do
out = String.replace(buffer, match, out)
{out, user_acc}
end
defp maybe_update_buffer(out, _match, _buffer), do: out
def link_phone(nil, buffer, _), do: buffer
def link_phone(list, buffer, opts) do
Builder.create_phone_link(list, buffer, opts)
end
@doc false
- def link_url(true, buffer, opts) do
+ def link_url(buffer, opts) do
Builder.create_link(buffer, opts)
end
- def link_url(_, buffer, _opts), do: buffer
-
@doc false
- def link_email(true, buffer, opts) do
+ def link_email(buffer, opts) do
Builder.create_email_link(buffer, opts)
end
- def link_email(_, buffer, _opts), do: buffer
-
- def link_extra(true, buffer, opts) do
+ def link_extra(buffer, opts) do
Builder.create_extra_link(buffer, opts)
end
- def link_extra(_, buffer, _opts), do: buffer
-
defp run_handler(handler, buffer, opts, user_acc) do
case handler.(buffer, opts, user_acc) do
{buffer, user_acc} -> {buffer, user_acc}
buffer -> {buffer, user_acc}
end
end
end
diff --git a/test/parser_test.exs b/test/parser_test.exs
index 90cf197..851fe3e 100644
--- a/test/parser_test.exs
+++ b/test/parser_test.exs
@@ -1,203 +1,219 @@
defmodule AutoLinker.ParserTest do
use ExUnit.Case, async: true
doctest AutoLinker.Parser
import AutoLinker.Parser
- describe "is_url" do
+ describe "url?/2" do
test "valid scheme true" do
valid_scheme_urls()
|> Enum.each(fn url ->
- assert is_url?(url, true)
+ assert url?(url, true)
end)
end
test "invalid scheme true" do
invalid_scheme_urls()
|> Enum.each(fn url ->
- refute is_url?(url, true)
+ refute url?(url, true)
end)
end
test "valid scheme false" do
valid_non_scheme_urls()
|> Enum.each(fn url ->
- assert is_url?(url, false)
+ assert url?(url, false)
end)
end
test "invalid scheme false" do
invalid_non_scheme_urls()
|> Enum.each(fn url ->
- refute is_url?(url, false)
+ refute url?(url, false)
end)
end
end
describe "match_phone" do
test "valid" do
valid_phone_nunbers()
|> Enum.each(fn number ->
assert number |> match_phone() |> valid_number?(number)
end)
end
test "invalid" do
invalid_phone_numbers()
|> Enum.each(fn number ->
assert number |> match_phone() |> is_nil
end)
end
end
describe "parse" do
test "handle line breakes" do
text = "google.com\r\nssss"
expected =
"<a href=\"http://google.com\" class=\"auto-linker\" target=\"_blank\" rel=\"noopener noreferrer\">google.com</a>\r\nssss"
assert parse(text) == expected
end
test "does not link attributes" do
text = "Check out <a href='google.com'>google</a>"
assert parse(text) == text
text = "Check out <img src='google.com' alt='google.com'/>"
assert parse(text) == text
text = "Check out <span><img src='google.com' alt='google.com'/></span>"
assert parse(text) == text
end
test "does not link inside `<pre>` and `<code>`" do
text = "<pre>google.com</pre>"
assert parse(text) == text
text = "<code>google.com</code>"
assert parse(text) == text
text = "<pre><code>google.com</code></pre>"
assert parse(text) == text
end
test "links url inside html" do
text = "<div>google.com</div>"
expected = "<div><a href=\"http://google.com\">google.com</a></div>"
assert parse(text, class: false, rel: false, new_window: false, phone: false) == expected
text = "Check out <div class='section'>google.com</div>"
expected =
"Check out <div class='section'><a href=\"http://google.com\">google.com</a></div>"
assert parse(text, class: false, rel: false, new_window: false) == expected
end
test "links url inside nested html" do
text = "<p><strong>google.com</strong></p>"
expected = "<p><strong><a href=\"http://google.com\">google.com</a></strong></p>"
assert parse(text, class: false, rel: false, new_window: false) == expected
end
test "excludes html with specified class" do
text = "```Check out <div class='section'>google.com</div>```"
assert parse(text, exclude_patterns: ["```"]) == text
end
+ test "do not link parens" do
+ text = " foo (https://example.com/path/folder/), bar"
+
+ expected =
+ " foo (<a href=\"https://example.com/path/folder/\">example.com/path/folder/</a>), bar"
+
+ assert parse(text, class: false, rel: false, new_window: false, scheme: true) == expected
+
+ text = " foo (example.com/path/folder/), bar"
+
+ expected =
+ " foo (<a href=\"http://example.com/path/folder/\">example.com/path/folder/</a>), bar"
+
+ assert parse(text, class: false, rel: false, new_window: false) == expected
+ end
+
test "do not link urls" do
text = "google.com"
assert parse(text, url: false, phone: true) == text
end
test "do not link `:test.test`" do
text = ":test.test"
assert parse(text, %{
scheme: true,
extra: true,
class: false,
strip_prefix: false,
new_window: false,
rel: false
}) == text
end
end
def valid_number?([list], number) do
assert List.last(list) == number
end
def valid_number?(_, _), do: false
def valid_scheme_urls,
do: [
"https://www.example.com",
"http://www2.example.com",
"http://home.example-site.com",
"http://blog.example.com",
"http://www.example.com/product",
"http://www.example.com/products?id=1&page=2",
"http://www.example.com#up",
"http://255.255.255.255",
"http://www.site.com:8008"
]
def invalid_scheme_urls,
do: [
"http://invalid.com/perl.cgi?key= | http://web-site.com/cgi-bin/perl.cgi?key1=value1&key2"
]
def valid_non_scheme_urls,
do: [
"www.example.com",
"www2.example.com",
"www.example.com:2000",
"www.example.com?abc=1",
"example.example-site.com",
"example.com",
"example.ca",
"example.tv",
"example.com:999?one=one",
"255.255.255.255",
"255.255.255.255:3000?one=1&two=2"
]
def invalid_non_scheme_urls,
do: [
"invalid.com/perl.cgi?key= | web-site.com/cgi-bin/perl.cgi?key1=value1&key2",
"invalid.",
"hi..there",
"555.555.5555"
]
def valid_phone_nunbers,
do: [
"x55",
"x555",
"x5555",
"x12345",
"+1 555 555-5555",
"555 555-5555",
"555.555.5555",
"613-555-5555",
"1 (555) 555-5555",
"(555) 555-5555",
"1.555.555.5555",
"800 555-5555",
"1.800.555.5555",
"1 (800) 555-5555",
"888 555-5555",
"887 555-5555",
"1-877-555-5555",
"1 800 710-5515"
]
def invalid_phone_numbers,
do: [
"5555",
"x5",
"(555) 555-55"
]
end
File Metadata
Details
Attached
Mime Type
text/x-diff
Expires
Sun, Dec 1, 3:38 PM (1 d, 12 h)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
41735
Default Alt Text
(21 KB)
Attached To
Mode
R19 linkify
Attached
Detach File
Event Timeline
Log In to Comment