Page MenuHomePhorge

No OneTemporary

Size
57 KB
Referenced Files
None
Subscribers
None
diff --git a/CHANGELOG.md b/CHANGELOG.md
index b51fa23..ff4e770 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,72 +1,82 @@
# Changelog
<!--
-Copyright © 2019-2022 Pleroma Authors
+Copyright © 2019-2024 Pleroma Authors
SPDX-License-Identifier: MIT
-->
+## 0.6.0 - 2024-??-??
+
+### Changed
+
+- No longer strips periods from the end of a link as it may break the link
+
+### Fixed
+
+- Incorrectly linked URLs where the domain part has a trailing character such as ,;:>?!
+
## 0.5.2 - 2022-01-09
### Fixed
- Fixed hashtags getting stripped at the end of lines.
## 0.5.1 - 2021-07-07
### Fixed
- Parsing crash with URLs ending in unbalanced closed paren, no path separator, and no query parameters
## 0.5.0 - 2021-03-02
### Added
- More robust detection of URLs inside a parenthetical
- Only link ip addresses with a scheme
- Fix mentions in markdown
- Fix mentions with apostrophe endings
## 0.4.1 - 2020-12-21
### Fixed
- Incorrect detection of IPv4 addresses causing random numbers (e.g., $123.45) to get linked
- Inability to link mentions with a trailing apostrophe. e.g., @user@example's
## 0.4.0 - 2020-11-24
### Added
- Support for linking URLs with FQDNs (e.g., "google.com.")
## 0.3.0 - 2020-11-17
### Added
- Support returning result as iodata and as safe iodata
### Fixed
- Hashtags followed by HTML tags "a", "code" and "pre" were not detected
- Incorrect parsing of HTML links inside HTML tags
- Punctuation marks in the end of urls were included in the html links
- Incorrect parsing of mentions with symbols before them
## 0.2.0 - 2020-07-21
### Added
- Added a `do_parse/4` clause to skip mentions when we're already skipping something else (eg, when inside a link)
### Fixed
- Fixed a typo in the readme
### Changed
- Refactored `Linkify.Parser.parse/2` to enumerate over the types instead of the opts
- Update dependencies
## 0.1.0 - 2019-07-11
- Initial release
diff --git a/lib/linkify/parser.ex b/lib/linkify/parser.ex
index cba70f6..ed8b8c2 100644
--- a/lib/linkify/parser.ex
+++ b/lib/linkify/parser.ex
@@ -1,481 +1,487 @@
# Copyright © 2017-2018 E-MetroTel
# Copyright © 2019-2022 Pleroma Authors
# SPDX-License-Identifier: MIT
defmodule Linkify.Parser do
@moduledoc """
Module to handle parsing the the input string.
"""
alias Linkify.Builder
@invalid_url ~r/(\.\.+)|(^(\d+\.){1,2}\d+$)/
@match_url ~r{^(?:\W*)?(?<url>(?:https?:\/\/)?[\w.-]+(?:\.[\w\.-]+)+[\w\-\._~%:\/?#[\]@!\$&'\(\)\*\+,;=.]+$)}u
@get_scheme_host ~r{^\W*(?<scheme>https?:\/\/)?(?:[^@\n]+\\w@)?(?<host>[^:#~\/\n?]+)}u
@match_hashtag ~r/^(?<tag>\#[[:word:]_]*[[:alpha:]_·\x{200c}][[:word:]_·\p{M}\x{200c}]*)/u
@match_skipped_tag ~r/^(?<tag>(a|code|pre)).*>*/
- @delimiters ~r/[,.;:>?!]*$/
+ @delimiters ~r/[,;:>?!]*$/
@en_apostrophes [
"'",
"'s",
"'ll",
"'d"
]
@prefix_extra [
"magnet:?",
"dweb://",
"dat://",
"gopher://",
"ipfs://",
"ipns://",
"irc://",
"ircs://",
"irc6://",
"mumble://",
"ssb://"
]
@tlds "./priv/tlds.txt"
|> File.read!()
|> String.split("\n", trim: true)
|> Enum.concat(["onion"])
|> MapSet.new()
@default_opts %{
url: true,
validate_tld: true
}
@doc """
Parse the given string, identifying items to link.
Parses the string, replacing the matching urls with an html link.
## Examples
iex> Linkify.Parser.parse("Check out google.com")
~s{Check out <a href="http://google.com">google.com</a>}
"""
@types [:url, :hashtag, :extra, :mention, :email]
def parse(input, opts \\ %{})
def parse(input, opts) when is_binary(input), do: {input, %{}} |> parse(opts) |> elem(0)
def parse(input, list) when is_list(list), do: parse(input, Enum.into(list, %{}))
def parse(input, opts) do
opts = Map.merge(@default_opts, opts)
{buffer, user_acc} = do_parse(input, opts, {"", [], :parsing})
if opts[:iodata] do
{buffer, user_acc}
else
{IO.iodata_to_binary(buffer), user_acc}
end
end
defp accumulate(acc, buffer),
do: [buffer | acc]
defp accumulate(acc, buffer, trailing),
do: [trailing, buffer | acc]
defp do_parse({"", user_acc}, _opts, {"", acc, _}),
do: {Enum.reverse(acc), user_acc}
defp do_parse(
{"<" <> text, user_acc},
%{hashtag: true} = opts,
{"#" <> _ = buffer, acc, :parsing}
) do
{buffer, user_acc} = link(buffer, opts, user_acc)
buffer =
case buffer do
[_, _, _] -> Enum.join(buffer)
_ -> buffer
end
case Regex.run(@match_skipped_tag, buffer, capture: [:tag]) do
[tag] ->
text = String.trim_leading(text, tag)
do_parse({text, user_acc}, opts, {"", accumulate(acc, buffer, "<#{tag}"), :skip})
nil ->
do_parse({text, user_acc}, opts, {"<", accumulate(acc, buffer, ""), {:open, 1}})
end
end
defp do_parse({"<br" <> text, user_acc}, opts, {buffer, acc, :parsing}) do
{buffer, user_acc} = link(buffer, opts, user_acc)
do_parse({text, user_acc}, opts, {"", accumulate(acc, buffer, "<br"), {:open, 1}})
end
defp do_parse({"<a" <> text, user_acc}, opts, {buffer, acc, :parsing}),
do: do_parse({text, user_acc}, opts, {"", accumulate(acc, buffer, "<a"), :skip})
defp do_parse({"<pre" <> text, user_acc}, opts, {buffer, acc, :parsing}),
do: do_parse({text, user_acc}, opts, {"", accumulate(acc, buffer, "<pre"), :skip})
defp do_parse({"<code" <> text, user_acc}, opts, {buffer, acc, :parsing}),
do: do_parse({text, user_acc}, opts, {"", accumulate(acc, buffer, "<code"), :skip})
defp do_parse({"</a>" <> text, user_acc}, opts, {buffer, acc, :skip}),
do: do_parse({text, user_acc}, opts, {"", accumulate(acc, buffer, "</a>"), :parsing})
defp do_parse({"</pre>" <> text, user_acc}, opts, {buffer, acc, :skip}),
do: do_parse({text, user_acc}, opts, {"", accumulate(acc, buffer, "</pre>"), :parsing})
defp do_parse({"</code>" <> text, user_acc}, opts, {buffer, acc, :skip}),
do: do_parse({text, user_acc}, opts, {"", accumulate(acc, buffer, "</code>"), :parsing})
defp do_parse({"<" <> text, user_acc}, opts, {"", acc, :parsing}),
do: do_parse({text, user_acc}, opts, {"<", acc, {:open, 1}})
defp do_parse({"<" <> text, user_acc}, opts, {buffer, acc, :parsing}) do
{buffer, user_acc} = link(buffer, opts, user_acc)
do_parse({text, user_acc}, opts, {"", accumulate(acc, buffer, "<"), {:open, 1}})
end
defp do_parse({">" <> text, user_acc}, opts, {buffer, acc, {:attrs, _level}}),
do: do_parse({text, user_acc}, opts, {"", accumulate(acc, buffer, ">"), :parsing})
defp do_parse({<<ch::8>> <> text, user_acc}, opts, {"", acc, {:attrs, level}}) do
do_parse({text, user_acc}, opts, {"", accumulate(acc, <<ch::8>>), {:attrs, level}})
end
defp do_parse({text, user_acc}, opts, {buffer, acc, {:open, level}}) do
do_parse({text, user_acc}, opts, {"", accumulate(acc, buffer), {:attrs, level}})
end
defp do_parse(
{<<char::bytes-size(1), text::binary>>, user_acc},
opts,
{buffer, acc, state}
)
when char in [" ", "\r", "\n"] do
{buffer, user_acc} = link(buffer, opts, user_acc)
do_parse(
{text, user_acc},
opts,
{"", accumulate(acc, buffer, char), state}
)
end
defp do_parse({<<ch::8>>, user_acc}, opts, {buffer, acc, state}) do
{buffer, user_acc} = link(buffer <> <<ch::8>>, opts, user_acc)
do_parse(
{"", user_acc},
opts,
{"", accumulate(acc, buffer), state}
)
end
defp do_parse({<<ch::8>> <> text, user_acc}, opts, {buffer, acc, state}),
do: do_parse({text, user_acc}, opts, {buffer <> <<ch::8>>, acc, state})
def check_and_link(:url, buffer, opts, _user_acc) do
if url?(buffer, opts) do
case @match_url |> Regex.run(buffer, capture: [:url]) |> hd() do
^buffer ->
link_url(buffer, opts)
url ->
link = link_url(url, opts)
restore_stripped_symbols(buffer, url, link)
end
else
:nomatch
end
end
def check_and_link(:email, buffer, opts, _user_acc) do
if email?(buffer, opts), do: link_email(buffer, opts), else: :nomatch
end
def check_and_link(:mention, buffer, opts, user_acc) do
buffer
|> match_mention
|> link_mention(buffer, opts, user_acc)
end
def check_and_link(:hashtag, buffer, opts, user_acc) do
buffer
|> match_hashtag
|> link_hashtag(buffer, opts, user_acc)
end
def check_and_link(:extra, "xmpp:" <> handle = buffer, opts, _user_acc) do
if email?(handle, opts), do: link_extra(buffer, opts), else: :nomatch
end
def check_and_link(:extra, buffer, opts, _user_acc) do
if String.starts_with?(buffer, @prefix_extra), do: link_extra(buffer, opts), else: :nomatch
end
+ defp maybe_strip_trailing_period(buffer, type) when type in [:mention, :hashtag, :email],
+ do: String.trim_trailing(buffer, ".")
+
+ defp maybe_strip_trailing_period(buffer, _), do: buffer
+
defp maybe_strip_parens(buffer) do
trimmed = trim_leading_paren(buffer)
with :next <- parens_check_trailing(buffer),
:next <- parens_found_email(trimmed),
:next <- parens_found_url(trimmed),
%{path: path, query: query} = URI.parse(trimmed),
:next <- parens_in_query(query),
:next <- parens_found_path_separator(path),
:next <- parens_path_has_open_paren(path),
:next <- parens_check_balanced(trimmed) do
buffer |> trim_leading_paren |> trim_trailing_paren
else
:both -> buffer |> trim_leading_paren |> trim_trailing_paren
:leading_only -> buffer |> trim_leading_paren
:noop -> buffer
_ -> buffer
end
end
defp parens_check_trailing(buffer), do: (String.ends_with?(buffer, ")") && :next) || :noop
defp parens_found_email(trimmed),
do: (trim_trailing_paren(trimmed) |> email?(nil) && :both) || :next
defp parens_found_url(trimmed),
do: (trim_trailing_paren(trimmed) |> url?(nil) && :next) || :noop
defp parens_in_query(query), do: (is_nil(query) && :next) || :both
defp parens_found_path_separator(path) when is_nil(path), do: :next
defp parens_found_path_separator(path), do: (String.contains?(path, "/") && :next) || :both
defp parens_path_has_open_paren(path) when is_nil(path), do: :next
defp parens_path_has_open_paren(path), do: (String.contains?(path, "(") && :next) || :both
defp parens_check_balanced(trimmed) do
graphemes = String.graphemes(trimmed)
opencnt = graphemes |> Enum.count(fn x -> x == "(" end)
closecnt = graphemes |> Enum.count(fn x -> x == ")" end)
if opencnt == closecnt do
:leading_only
else
:next
end
end
defp trim_leading_paren(buffer) do
case buffer do
"(" <> buffer -> buffer
buffer -> buffer
end
end
defp trim_trailing_paren(buffer),
do:
(String.ends_with?(buffer, ")") && String.slice(buffer, 0, String.length(buffer) - 1)) ||
buffer
defp strip_punctuation(buffer), do: String.replace(buffer, @delimiters, "")
defp strip_en_apostrophes(buffer) do
Enum.reduce(@en_apostrophes, buffer, fn abbrev, buf ->
String.replace_suffix(buf, abbrev, "")
end)
end
def url?(buffer, opts) do
valid_url?(buffer) && Regex.match?(@match_url, buffer) && valid_tld?(buffer, opts)
end
def email?(buffer, opts) do
# Note: In reality the local part can only be checked by the remote server
case Regex.run(~r/^(?<user>.*)@(?<host>[^@]+)$/, buffer, capture: [:user, :host]) do
[_user, hostname] -> valid_hostname?(hostname) && valid_tld?(hostname, opts)
_ -> false
end
end
defp valid_url?(url) do
with {_, [scheme]} <- {:regex, Regex.run(@get_scheme_host, url, capture: [:scheme])},
true <- scheme == "" do
!Regex.match?(@invalid_url, url)
else
_ ->
true
end
end
@doc """
Validates a URL's TLD. Returns a boolean.
Will return `true` if `:validate_tld` option set to `false`.
Will skip validation and return `true` if `:validate_tld` set to `:no_scheme` and the url has a scheme.
"""
def valid_tld?(url, opts) do
[scheme, host] = Regex.run(@get_scheme_host, url, capture: [:scheme, :host])
cond do
opts[:validate_tld] == false ->
true
scheme != "" && ip?(host) ->
true
# don't validate if scheme is present
opts[:validate_tld] == :no_scheme and scheme != "" ->
true
true ->
- tld = host |> strip_punctuation() |> String.split(".") |> List.last()
+ tld = host |> String.trim_trailing(".") |> String.split(".") |> List.last()
MapSet.member?(@tlds, tld)
end
end
def safe_to_integer(string, base \\ 10) do
String.to_integer(string, base)
rescue
_ ->
nil
end
def ip?(buffer) do
case :inet.parse_strict_address(to_charlist(buffer)) do
{:error, _} -> false
{:ok, _} -> true
end
end
# IDN-compatible, ported from musl-libc's is_valid_hostname()
def valid_hostname?(hostname) do
hostname
|> String.to_charlist()
|> Enum.any?(fn s ->
!(s >= 0x80 || s in 0x30..0x39 || s in 0x41..0x5A || s in 0x61..0x7A || s in '.-')
end)
|> Kernel.!()
end
def match_mention(buffer) do
case Regex.run(~r/^@(?<user>[a-zA-Z\d_-]+)(@(?<host>[^@]+))?$/, buffer,
capture: [:user, :host]
) do
[user, ""] ->
"@" <> user
[user, hostname] ->
if valid_hostname?(hostname) && valid_tld?(hostname, []),
do: "@" <> user <> "@" <> hostname,
else: nil
_ ->
nil
end
end
def match_hashtag(buffer) do
case Regex.run(@match_hashtag, buffer, capture: [:tag]) do
[hashtag] -> hashtag
_ -> nil
end
end
def link_hashtag(nil, _buffer, _, _user_acc), do: :nomatch
def link_hashtag(hashtag, buffer, %{hashtag_handler: hashtag_handler} = opts, user_acc) do
hashtag
|> hashtag_handler.(buffer, opts, user_acc)
|> maybe_update_buffer(hashtag, buffer)
end
def link_hashtag(hashtag, buffer, opts, _user_acc) do
hashtag
|> Builder.create_hashtag_link(buffer, opts)
|> maybe_update_buffer(hashtag, buffer)
end
def link_mention(nil, _buffer, _, _user_acc), do: :nomatch
def link_mention(mention, buffer, %{mention_handler: mention_handler} = opts, user_acc) do
mention
|> mention_handler.(buffer, opts, user_acc)
|> maybe_update_buffer(mention, buffer)
end
def link_mention(mention, buffer, opts, _user_acc) do
mention
|> Builder.create_mention_link(buffer, opts)
|> maybe_update_buffer(mention, buffer)
end
defp maybe_update_buffer(out, match, buffer) when is_binary(out) do
maybe_update_buffer({out, nil}, match, buffer)
end
defp maybe_update_buffer({out, user_acc}, match, buffer)
when match != buffer and out != buffer do
out = String.replace(buffer, match, out)
{out, user_acc}
end
defp maybe_update_buffer(out, _match, _buffer), do: out
@doc false
def link_url(buffer, opts) do
Builder.create_link(buffer, opts)
end
@doc false
def link_email(buffer, opts) do
Builder.create_email_link(buffer, opts)
end
def link_extra(buffer, opts) do
Builder.create_extra_link(buffer, opts)
end
defp link(buffer, opts, user_acc) do
Enum.reduce_while(@types, {buffer, user_acc}, fn type, _ ->
if opts[type] == true do
check_and_link_reducer(type, buffer, opts, user_acc)
else
{:cont, {buffer, user_acc}}
end
end)
end
defp check_and_link_reducer(type, buffer, opts, user_acc) do
str =
buffer
|> String.split("<")
|> List.first()
|> strip_en_apostrophes()
|> strip_punctuation()
+ |> maybe_strip_trailing_period(type)
|> maybe_strip_parens()
case check_and_link(type, str, opts, user_acc) do
:nomatch ->
{:cont, {buffer, user_acc}}
{link, user_acc} ->
{:halt, {restore_stripped_symbols(buffer, str, link), user_acc}}
link ->
{:halt, {restore_stripped_symbols(buffer, str, link), user_acc}}
end
end
defp restore_stripped_symbols(buffer, buffer, link), do: link
defp restore_stripped_symbols(buffer, stripped_buffer, link) do
buffer
|> String.split(stripped_buffer)
|> Enum.intersperse(link)
end
end
diff --git a/test/linkify_test.exs b/test/linkify_test.exs
index 8a9ace4..b31ecd1 100644
--- a/test/linkify_test.exs
+++ b/test/linkify_test.exs
@@ -1,870 +1,896 @@
# Copyright © 2017-2018 E-MetroTel
# Copyright © 2019-2022 Pleroma Authors
# SPDX-License-Identifier: MIT
defmodule LinkifyTest do
use ExUnit.Case, async: true
doctest Linkify
test "default link" do
assert Linkify.link("google.com") ==
"<a href=\"http://google.com\">google.com</a>"
end
test "default link iodata" do
assert Linkify.link_to_iodata("google.com") ==
[["<a ", "href=\"http://google.com\"", ">", "google.com", "</a>"]]
end
test "default link safe iodata" do
assert Linkify.link_safe("google.com") ==
[
[
{:safe, ["<a ", "href=\"http://google.com\"", ">"]},
"google.com",
{:safe, "</a>"}
]
]
end
test "does on link existing links" do
text = ~s(<a href="http://google.com">google.com</a>)
assert Linkify.link(text) == text
end
test "all kinds of links" do
text = "hello google.com https://ddg.com user@email.com irc:///mIRC"
expected =
"hello <a href=\"http://google.com\">google.com</a> <a href=\"https://ddg.com\">https://ddg.com</a> <a href=\"mailto:user@email.com\">user@email.com</a> <a href=\"irc:///mIRC\">irc:///mIRC</a>"
assert Linkify.link(text,
email: true,
extra: true
) == expected
end
test "all kinds of links iodata" do
text = "hello google.com https://ddg.com user@email.com irc:///mIRC"
expected = [
"hello",
" ",
["<a ", "href=\"http://google.com\"", ">", "google.com", "</a>"],
" ",
["<a ", "href=\"https://ddg.com\"", ">", "https://ddg.com", "</a>"],
" ",
["<a ", "href=\"mailto:user@email.com\"", ">", "user@email.com", "</a>"],
" ",
["<a ", "href=\"irc:///mIRC\"", ">", "irc:///mIRC", "</a>"]
]
assert Linkify.link_to_iodata(text,
email: true,
extra: true
) == expected
end
test "class attribute" do
assert Linkify.link("google.com", class: "linkified") ==
"<a href=\"http://google.com\" class=\"linkified\">google.com</a>"
end
test "class attribute iodata" do
assert Linkify.link_to_iodata("google.com", class: "linkified") ==
[
[
"<a ",
"href=\"http://google.com\" class=\"linkified\"",
">",
"google.com",
"</a>"
]
]
end
test "rel attribute" do
assert Linkify.link("google.com", rel: "noopener noreferrer") ==
"<a href=\"http://google.com\" rel=\"noopener noreferrer\">google.com</a>"
end
test "rel attribute iodata" do
assert Linkify.link_to_iodata("google.com", rel: "noopener noreferrer") ==
[
[
"<a ",
"href=\"http://google.com\" rel=\"noopener noreferrer\"",
">",
"google.com",
"</a>"
]
]
end
test "rel as function" do
text = "google.com"
expected = "<a href=\"http://google.com\" rel=\"com\">google.com</a>"
custom_rel = fn url ->
url |> String.split(".") |> List.last()
end
assert Linkify.link(text, rel: custom_rel) == expected
text = "google.com"
expected = "<a href=\"http://google.com\">google.com</a>"
custom_rel = fn _ -> nil end
assert Linkify.link(text, rel: custom_rel) == expected
end
test "strip parens" do
assert Linkify.link("(google.com)") ==
"(<a href=\"http://google.com\">google.com</a>)"
end
test "strip parens iodata" do
assert Linkify.link_to_iodata("(google.com)") ==
[["(", ["<a ", "href=\"http://google.com\"", ">", "google.com", "</a>"], ")"]]
end
test "link_map/2" do
assert Linkify.link_map("google.com", []) ==
{"<a href=\"http://google.com\">google.com</a>", []}
end
describe "custom handlers" do
test "mentions handler" do
text = "hello @user, @valid_user and @invalid_user"
valid_users = ["user", "valid_user"]
handler = fn "@" <> user = mention, buffer, _opts, acc ->
if Enum.member?(valid_users, user) do
link = ~s(<a href="https://example.com/user/#{user}" data-user="#{user}">#{mention}</a>)
{link, %{acc | mentions: MapSet.put(acc.mentions, {mention, user})}}
else
{buffer, acc}
end
end
{result_text, %{mentions: mentions}} =
Linkify.link_map(text, %{mentions: MapSet.new()},
mention: true,
mention_handler: handler
)
assert result_text ==
"hello <a href=\"https://example.com/user/user\" data-user=\"user\">@user</a>, <a href=\"https://example.com/user/valid_user\" data-user=\"valid_user\">@valid_user</a> and @invalid_user"
assert mentions |> MapSet.to_list() |> Enum.map(&elem(&1, 1)) == valid_users
end
test "hashtags handler" do
text = "#hello #world"
handler = fn hashtag, buffer, opts, acc ->
link = Linkify.Builder.create_hashtag_link(hashtag, buffer, opts)
{link, %{acc | tags: MapSet.put(acc.tags, hashtag)}}
end
{result_text, %{tags: tags}} =
Linkify.link_map(text, %{tags: MapSet.new()},
hashtag: true,
hashtag_handler: handler,
hashtag_prefix: "https://example.com/user/",
rel: false
)
assert result_text ==
"<a href=\"https://example.com/user/hello\">#hello</a> <a href=\"https://example.com/user/world\">#world</a>"
assert MapSet.to_list(tags) == ["#hello", "#world"]
text = "#justOne"
{result_text, %{tags: _tags}} =
Linkify.link_map(text, %{tags: MapSet.new()},
hashtag: true,
hashtag_handler: handler,
hashtag_prefix: "https://example.com/user/",
rel: false
)
assert result_text ==
"<a href=\"https://example.com/user/justOne\">#justOne</a>"
text = "#justOne."
{result_text, %{tags: _tags}} =
Linkify.link_map(text, %{tags: MapSet.new()},
hashtag: true,
hashtag_handler: handler,
hashtag_prefix: "https://example.com/user/",
rel: false
)
assert result_text ==
"<a href=\"https://example.com/user/justOne\">#justOne</a>."
text = "#justOne "
{result_text, %{tags: _tags}} =
Linkify.link_map(text, %{tags: MapSet.new()},
hashtag: true,
hashtag_handler: handler,
hashtag_prefix: "https://example.com/user/",
rel: false
)
assert result_text ==
"<a href=\"https://example.com/user/justOne\">#justOne</a> "
text = "#cofe <br><a href=\"https://pleroma.social/\">Source</a>"
{_result_text, %{tags: tags}} =
Linkify.link_map(text, %{tags: MapSet.new()},
hashtag: true,
hashtag_handler: handler,
hashtag_prefix: "https://example.com/tag/"
)
assert MapSet.to_list(tags) == ["#cofe"]
text = "#cofe<br><a href=\"https://pleroma.social/\">Source</a>"
{_result_text, %{tags: tags}} =
Linkify.link_map(text, %{tags: MapSet.new()},
hashtag: true,
hashtag_handler: handler,
hashtag_prefix: "https://example.com/tag/"
)
assert MapSet.to_list(tags) == ["#cofe"]
text = "#cofe<a href=\"https://pleroma.social/\">Source</a>"
{_result_text, %{tags: tags}} =
Linkify.link_map(text, %{tags: MapSet.new()},
hashtag: true,
hashtag_handler: handler,
hashtag_prefix: "https://example.com/tag/"
)
assert MapSet.to_list(tags) == ["#cofe"]
text = "#cofe<code>fetch()</code>"
{_result_text, %{tags: tags}} =
Linkify.link_map(text, %{tags: MapSet.new()},
hashtag: true,
hashtag_handler: handler,
hashtag_prefix: "https://example.com/tag/"
)
assert MapSet.to_list(tags) == ["#cofe"]
text = "#cofe<pre>fetch()</pre>"
{_result_text, %{tags: tags}} =
Linkify.link_map(text, %{tags: MapSet.new()},
hashtag: true,
hashtag_handler: handler,
hashtag_prefix: "https://example.com/tag/"
)
assert MapSet.to_list(tags) == ["#cofe"]
end
test "mention handler and hashtag prefix" do
text =
"Hello again, @user.&lt;script&gt;&lt;/script&gt;\nThis is on another :moominmamma: line. #2hu #epic #phantasmagoric"
handler = fn "@" <> user = mention, _, _, _ ->
~s(<span class="h-card"><a href="#/user/#{user}">@<span>#{mention}</span></a></span>)
end
expected =
~s(Hello again, @user.&lt;script&gt;&lt;/script&gt;\nThis is on another :moominmamma: line. <a href="/tag/2hu" target="_blank">#2hu</a> <a href="/tag/epic" target="_blank">#epic</a> <a href="/tag/phantasmagoric" target="_blank">#phantasmagoric</a>)
assert Linkify.link(text,
mention: true,
mention_handler: handler,
hashtag: true,
hashtag_prefix: "/tag/",
new_window: true
) == expected
end
test "mentions handler with hostname/@user links" do
text =
"hi @user, take a look at this post: https://example.com/@valid_user/posts/9w5AkQp956XIh74apc"
valid_users = ["user", "valid_user"]
handler = fn "@" <> user = mention, buffer, _opts, acc ->
if Enum.member?(valid_users, user) do
link = ~s(<a href="https://example.com/user/#{user}" data-user="#{user}">#{mention}</a>)
{link, %{acc | mentions: MapSet.put(acc.mentions, {mention, user})}}
else
{buffer, acc}
end
end
{result_text, %{mentions: mentions}} =
Linkify.link_map(text, %{mentions: MapSet.new()},
mention: true,
mention_handler: handler,
new_window: true
)
assert result_text ==
"hi <a href=\"https://example.com/user/user\" data-user=\"user\">@user</a>, take a look at this post: <a href=\"https://example.com/@valid_user/posts/9w5AkQp956XIh74apc\" target=\"_blank\">https://example.com/@valid_user/posts/9w5AkQp956XIh74apc</a>"
assert mentions |> MapSet.to_list() |> Enum.map(&elem(&1, 1)) == ["user"]
end
test "mentions handler and extra links" do
text =
- "hi @user, text me asap xmpp:me@cofe.ai, (or contact me at me@cofe.ai), please.<br>cofe.ai."
+ "hi @user, text me asap xmpp:me@cofe.ai, (or contact me at me@cofe.ai), please.<br>cofe.ai"
valid_users = ["user", "cofe"]
handler = fn "@" <> user = mention, buffer, _opts, acc ->
if Enum.member?(valid_users, user) do
link = ~s(<a href="https://example.com/user/#{user}" data-user="#{user}">#{mention}</a>)
{link, %{acc | mentions: MapSet.put(acc.mentions, {mention, user})}}
else
{buffer, acc}
end
end
{result_text, %{mentions: mentions}} =
Linkify.link_map(text, %{mentions: MapSet.new()},
mention: true,
mention_handler: handler,
extra: true,
email: true
)
assert result_text ==
- "hi <a href=\"https://example.com/user/user\" data-user=\"user\">@user</a>, text me asap <a href=\"xmpp:me@cofe.ai\">xmpp:me@cofe.ai</a>, (or contact me at <a href=\"mailto:me@cofe.ai\">me@cofe.ai</a>), please.<br><a href=\"http://cofe.ai\">cofe.ai</a>."
+ "hi <a href=\"https://example.com/user/user\" data-user=\"user\">@user</a>, text me asap <a href=\"xmpp:me@cofe.ai\">xmpp:me@cofe.ai</a>, (or contact me at <a href=\"mailto:me@cofe.ai\">me@cofe.ai</a>), please.<br><a href=\"http://cofe.ai\">cofe.ai</a>"
assert MapSet.to_list(mentions) == [{"@user", "user"}]
end
test "mentions handler and emails" do
text = "hi @friend, here is my email<br><br>user@user.me"
valid_users = ["user", "friend"]
handler = fn "@" <> user = mention, buffer, _opts, acc ->
if Enum.member?(valid_users, user) do
link = ~s(<a href="https://example.com/user/#{user}" data-user="#{user}">#{mention}</a>)
{link, %{acc | mentions: MapSet.put(acc.mentions, {mention, user})}}
else
{buffer, acc}
end
end
{result_text, %{mentions: mentions}} =
Linkify.link_map(text, %{mentions: MapSet.new()},
mention: true,
mention_handler: handler,
extra: true,
email: true
)
assert result_text ==
"hi <a href=\"https://example.com/user/friend\" data-user=\"friend\">@friend</a>, here is my email<br><br><a href=\"mailto:user@user.me\">user@user.me</a>"
assert MapSet.to_list(mentions) == [{"@friend", "friend"}]
end
+ test "mentions with apostrophes" do
+ text = "I'm going to @friend's house this weekend"
+
+ valid_users = ["friend"]
+
+ handler = fn "@" <> user = mention, buffer, _opts, acc ->
+ if Enum.member?(valid_users, user) do
+ link = ~s(<a href="https://example.com/user/#{user}" data-user="#{user}">#{mention}</a>)
+ {link, %{acc | mentions: MapSet.put(acc.mentions, {mention, user})}}
+ else
+ {buffer, acc}
+ end
+ end
+
+ {result_text, %{mentions: mentions}} =
+ Linkify.link_map(text, %{mentions: MapSet.new()},
+ mention: true,
+ mention_handler: handler,
+ extra: true,
+ email: true
+ )
+
+ assert result_text ==
+ "I'm going to <a href=\"https://example.com/user/friend\" data-user=\"friend\">@friend</a>'s house this weekend"
+
+ assert MapSet.to_list(mentions) == [{"@friend", "friend"}]
+ end
+
test "href handler" do
text = ~s(google.com)
result_text = Linkify.link(text, href_handler: &"/redirect?#{URI.encode_query(to: &1)}")
assert result_text == ~s(<a href="/redirect?to=http%3A%2F%2Fgoogle.com">google.com</a>)
end
end
describe "mentions" do
test "simple mentions" do
expected =
~s{hello <a href="https://example.com/user/user" target="_blank">@user</a> and <a href="https://example.com/user/anotherUser" target="_blank">@anotherUser</a>.}
assert Linkify.link("hello @user and @anotherUser.",
mention: true,
mention_prefix: "https://example.com/user/",
new_window: true
) == expected
end
test "mentions inside html tags" do
text =
"<p><strong>hello world</strong></p>\n<p><`em>another @user__test and @user__test google.com paragraph</em></p>\n"
expected =
"<p><strong>hello world</strong></p>\n<p><`em>another <a href=\"u/user__test\">@user__test</a> and <a href=\"u/user__test\">@user__test</a> <a href=\"http://google.com\">google.com</a> paragraph</em></p>\n"
assert Linkify.link(text, mention: true, mention_prefix: "u/") == expected
text = "<p>hi</p><p>@user @anotherUser</p>"
expected =
"<p>hi</p><p><a href=\"u/user\">@user</a> <a href=\"u/anotherUser\">@anotherUser</a></p>"
assert Linkify.link(text, mention: true, mention_prefix: "u/") == expected
end
test "mention @user@example.com" do
text = "hey @user@example.com"
expected =
"hey <a href=\"https://example.com/user/user@example.com\" target=\"_blank\">@user@example.com</a>"
assert Linkify.link(text,
mention: true,
mention_prefix: "https://example.com/user/",
new_window: true
) == expected
expected =
"That's <a href=\"https://example.com/user/user@example.com\" target=\"_blank\">@user@example.com</a>'s server"
text = "That's @user@example.com's server"
assert Linkify.link(text,
mention: true,
mention_prefix: "https://example.com/user/",
new_window: true
) ==
expected
end
test "mentions with no word-separation before them" do
text = "@@example hey! >@@test@example.com idolm@ster"
assert Linkify.link(text, mention: true, mention_prefix: "/users/") == text
end
test "invalid mentions" do
text = "hey user@example"
assert Linkify.link(text, mention: true, mention_prefix: "/users/") == text
end
test "IDN domain" do
text = "hello @lain@我爱你.com"
expected = "hello <a href=\"/users/lain@我爱你.com\">@lain@我爱你.com</a>"
assert Linkify.link(text, mention: true, mention_prefix: "/users/") == expected
text = "hello @lain@xn--6qq986b3xl.com"
expected = "hello <a href=\"/users/lain@xn--6qq986b3xl.com\">@lain@xn--6qq986b3xl.com</a>"
assert Linkify.link(text, mention: true, mention_prefix: "/users/") == expected
end
test ".onion domain" do
text = "Hey @admin@vww6ybal4bd7szmgncyruucpgfkqahzddi37ktceo3ah7ngmcopnpyyd.onion"
expected =
"Hey <a href=\"/users/admin@vww6ybal4bd7szmgncyruucpgfkqahzddi37ktceo3ah7ngmcopnpyyd.onion\">@admin@vww6ybal4bd7szmgncyruucpgfkqahzddi37ktceo3ah7ngmcopnpyyd.onion</a>"
assert Linkify.link(text, mention: true, mention_prefix: "/users/") == expected
end
end
describe "hashtag links" do
test "hashtag" do
expected =
" one <a href=\"https://example.com/tag/2two\" target=\"_blank\">#2two</a> three <a href=\"https://example.com/tag/four\" target=\"_blank\">#four</a>."
assert Linkify.link(" one #2two three #four.",
hashtag: true,
hashtag_prefix: "https://example.com/tag/",
new_window: true
) == expected
end
test "must have non-numbers" do
expected = "<a href=\"/t/1ok\">#1ok</a> #42 #7"
assert Linkify.link("#1ok #42 #7",
hashtag: true,
hashtag_prefix: "/t/",
rel: false
) == expected
end
test "support French" do
text = "#administrateur·rice·s #ingénieur·e·s"
expected =
"<a href=\"/t/administrateur·rice·s\">#administrateur·rice·s</a> <a href=\"/t/ingénieur·e·s\">#ingénieur·e·s</a>"
assert Linkify.link(text,
hashtag: true,
hashtag_prefix: "/t/",
rel: false
) == expected
end
test "support Telugu" do
text = "#చక్రం #కకకకక్ #కకకకాక #కకకక్రకకకక"
expected =
"<a href=\"/t/చక్రం\">#చక్రం</a> <a href=\"/t/కకకకక్\">#కకకకక్</a> <a href=\"/t/కకకకాక\">#కకకకాక</a> <a href=\"/t/కకకక్రకకకక\">#కకకక్రకకకక</a>"
assert Linkify.link(text,
hashtag: true,
hashtag_prefix: "/t/",
rel: false
) == expected
end
test "do not turn urls with hashes into hashtags" do
text = "google.com#test #test google.com/#test #tag"
expected =
"<a href=\"http://google.com#test\">google.com#test</a> <a href=\"https://example.com/tag/test\">#test</a> <a href=\"http://google.com/#test\">google.com/#test</a> <a href=\"https://example.com/tag/tag\">#tag</a>"
assert Linkify.link(text,
hashtag: true,
rel: false,
hashtag_prefix: "https://example.com/tag/"
) == expected
end
test "works with non-latin characters" do
text = "#漢字 #は #тест #ทดสอบ"
expected =
"<a href=\"https://example.com/tag/漢字\">#漢字</a> <a href=\"https://example.com/tag/は\">#は</a> <a href=\"https://example.com/tag/тест\">#тест</a> <a href=\"https://example.com/tag/ทดสอบ\">#ทดสอบ</a>"
assert Linkify.link(text,
rel: false,
hashtag: true,
hashtag_prefix: "https://example.com/tag/"
) == expected
end
test "ZWNJ does not break up hashtags" do
text = "#ساٴين‌س"
expected = "<a href=\"https://example.com/tag/ساٴين‌س\">#ساٴين‌س</a>"
assert Linkify.link(text,
rel: false,
hashtag: true,
hashtag_prefix: "https://example.com/tag/"
) == expected
end
end
describe "links" do
test "turning urls into links" do
text = "Hey, check out http://www.youtube.com/watch?v=8Zg1-TufF%20zY?x=1&y=2#blabla ."
expected =
"Hey, check out <a href=\"http://www.youtube.com/watch?v=8Zg1-TufF%20zY?x=1&y=2#blabla\" target=\"_blank\">http://www.youtube.com/watch?v=8Zg1-TufF%20zY?x=1&y=2#blabla</a> ."
assert Linkify.link(text, new_window: true) == expected
# no scheme
text = "Hey, check out www.youtube.com/watch?v=8Zg1-TufF%20zY?x=1&y=2#blabla ."
expected =
"Hey, check out <a href=\"http://www.youtube.com/watch?v=8Zg1-TufF%20zY?x=1&y=2#blabla\" target=\"_blank\">www.youtube.com/watch?v=8Zg1-TufF%20zY?x=1&y=2#blabla</a> ."
assert Linkify.link(text, new_window: true) == expected
end
test "turn urls with schema into urls" do
text = "📌https://google.com"
expected = "📌<a href=\"https://google.com\">https://google.com</a>"
assert Linkify.link(text, rel: false) == expected
text = "http://www.cs.vu.nl/~ast/intel/"
expected = "<a href=\"http://www.cs.vu.nl/~ast/intel/\">http://www.cs.vu.nl/~ast/intel/</a>"
assert Linkify.link(text) == expected
text = "https://forum.zdoom.org/viewtopic.php?f=44&t=57087"
expected =
"<a href=\"https://forum.zdoom.org/viewtopic.php?f=44&t=57087\">https://forum.zdoom.org/viewtopic.php?f=44&t=57087</a>"
assert Linkify.link(text) == expected
text = "https://en.wikipedia.org/wiki/Sophia_(Gnosticism)#Mythos_of_the_soul"
expected =
"<a href=\"https://en.wikipedia.org/wiki/Sophia_(Gnosticism)#Mythos_of_the_soul\">https://en.wikipedia.org/wiki/Sophia_(Gnosticism)#Mythos_of_the_soul</a>"
assert Linkify.link(text) == expected
text = "https://en.wikipedia.org/wiki/Duff's_device"
expected =
"<a href=\"https://en.wikipedia.org/wiki/Duff's_device\">https://en.wikipedia.org/wiki/Duff's_device</a>"
assert Linkify.link(text) == expected
text = "https://1.1.1.1/"
expected = "<a href=\"https://1.1.1.1/\">https://1.1.1.1/</a>"
assert Linkify.link(text) == expected
text = "https://1.1.1.1:8080/"
expected = "<a href=\"https://1.1.1.1:8080/\">https://1.1.1.1:8080/</a>"
assert Linkify.link(text) == expected
end
test "strip prefix" do
assert Linkify.link("http://google.com", strip_prefix: true) ==
"<a href=\"http://google.com\">google.com</a>"
assert Linkify.link("http://www.google.com", strip_prefix: true) ==
"<a href=\"http://www.google.com\">google.com</a>"
end
test "hostname/@user" do
text = "https://example.com/@user"
expected =
"<a href=\"https://example.com/@user\" target=\"_blank\">https://example.com/@user</a>"
assert Linkify.link(text, new_window: true) == expected
text = "https://example.com:4000/@user"
expected =
"<a href=\"https://example.com:4000/@user\" target=\"_blank\">https://example.com:4000/@user</a>"
assert Linkify.link(text, new_window: true) == expected
text = "https://example.com:4000/@user"
expected =
"<a href=\"https://example.com:4000/@user\" target=\"_blank\">https://example.com:4000/@user</a>"
assert Linkify.link(text, new_window: true) == expected
text = "@username"
expected = "@username"
assert Linkify.link(text, new_window: true) == expected
end
end
describe "non http links" do
test "xmpp" do
text = "xmpp:user@example.com"
expected = "<a href=\"xmpp:user@example.com\">xmpp:user@example.com</a>"
assert Linkify.link(text, extra: true) == expected
end
test "wrong xmpp" do
text = "xmpp:user.example.com"
assert Linkify.link(text, extra: true) == text
end
test "email" do
text = "user@example.com"
expected = "<a href=\"mailto:user@example.com\">user@example.com</a>"
assert Linkify.link(text, email: true) == expected
end
test "magnet" do
text =
"magnet:?xt=urn:btih:a4104a9d2f5615601c429fe8bab8177c47c05c84&dn=ubuntu-18.04.1.0-live-server-amd64.iso&tr=http%3A%2F%2Ftorrent.ubuntu.com%3A6969%2Fannounce&tr=http%3A%2F%2Fipv6.torrent.ubuntu.com%3A6969%2Fannounce"
expected =
"<a href=\"magnet:?xt=urn:btih:a4104a9d2f5615601c429fe8bab8177c47c05c84&dn=ubuntu-18.04.1.0-live-server-amd64.iso&tr=http%3A%2F%2Ftorrent.ubuntu.com%3A6969%2Fannounce&tr=http%3A%2F%2Fipv6.torrent.ubuntu.com%3A6969%2Fannounce\">magnet:?xt=urn:btih:a4104a9d2f5615601c429fe8bab8177c47c05c84&dn=ubuntu-18.04.1.0-live-server-amd64.iso&tr=http%3A%2F%2Ftorrent.ubuntu.com%3A6969%2Fannounce&tr=http%3A%2F%2Fipv6.torrent.ubuntu.com%3A6969%2Fannounce</a>"
assert Linkify.link(text, extra: true) == expected
end
test "dweb" do
text =
"dweb://584faa05d394190ab1a3f0240607f9bf2b7e2bd9968830a11cf77db0cea36a21+v1.0.0/path/to/file.txt"
expected =
"<a href=\"dweb://584faa05d394190ab1a3f0240607f9bf2b7e2bd9968830a11cf77db0cea36a21+v1.0.0/path/to/file.txt\">dweb://584faa05d394190ab1a3f0240607f9bf2b7e2bd9968830a11cf77db0cea36a21+v1.0.0/path/to/file.txt</a>"
assert Linkify.link(text, extra: true) == expected
end
end
describe "TLDs" do
test "parse with scheme" do
text = "https://google.com"
expected = "<a href=\"https://google.com\">https://google.com</a>"
assert Linkify.link(text) == expected
end
test "only existing TLDs with scheme" do
text = "this url https://google.foobar.blah11blah/ has invalid TLD"
expected = "this url https://google.foobar.blah11blah/ has invalid TLD"
assert Linkify.link(text) == expected
text = "this url https://google.foobar.com/ has valid TLD"
expected =
"this url <a href=\"https://google.foobar.com/\">https://google.foobar.com/</a> has valid TLD"
assert Linkify.link(text) == expected
end
test "only existing TLDs without scheme" do
text = "this url google.foobar.blah11blah/ has invalid TLD"
assert Linkify.link(text) == text
text = "this url google.foobar.com/ has valid TLD"
expected =
"this url <a href=\"http://google.foobar.com/\">google.foobar.com/</a> has valid TLD"
assert Linkify.link(text) == expected
end
test "only existing TLDs with and without scheme" do
text = "this url http://google.foobar.com/ has valid TLD"
expected =
"this url <a href=\"http://google.foobar.com/\">http://google.foobar.com/</a> has valid TLD"
assert Linkify.link(text) == expected
text = "this url google.foobar.com/ has valid TLD"
expected =
"this url <a href=\"http://google.foobar.com/\">google.foobar.com/</a> has valid TLD"
assert Linkify.link(text) == expected
end
test "FQDN (with trailing period)" do
text =
"Check out this article: https://www.wired.com./story/marissa-mayer-startup-sunshine-contacts/"
expected =
"Check out this article: <a href=\"https://www.wired.com./story/marissa-mayer-startup-sunshine-contacts/\">https://www.wired.com./story/marissa-mayer-startup-sunshine-contacts/</a>"
assert Linkify.link(text) == expected
- end
- test "Do not link trailing punctuation" do
- text = "You can find more info at https://pleroma.social."
+ text = "https://www.wired.com;/story/marissa-mayer-startup-sunshine-contacts/"
- expected =
- "You can find more info at <a href=\"https://pleroma.social\">https://pleroma.social</a>."
-
- assert Linkify.link(text) == expected
+ refute Linkify.link(text) ==
+ "<a href=\"https://www.wired.com;/story/marissa-mayer-startup-sunshine-contacts/\">https://www.wired.com;/story/marissa-mayer-startup-sunshine-contacts/</a>"
+ end
+ test "Do not link most trailing punctuation (excluding periods, which are allowed in URLs)" do
text = "Of course it was google.com!!"
expected = "Of course it was <a href=\"http://google.com\">google.com</a>!!"
assert Linkify.link(text) == expected
text =
"First I had to login to hotmail.com, then I had to delete emails because my 15MB quota was full."
expected =
"First I had to login to <a href=\"http://hotmail.com\">hotmail.com</a>, then I had to delete emails because my 15MB quota was full."
assert Linkify.link(text) == expected
text = "I looked at theonion.com; it was no longer funny."
expected =
"I looked at <a href=\"http://theonion.com\">theonion.com</a>; it was no longer funny."
assert Linkify.link(text) == expected
end
test "IDN and punycode domain" do
text = "FrauBücher.com says Neiiighhh!"
expected = "<a href=\"http://FrauBücher.com\">FrauBücher.com</a> says Neiiighhh!"
assert Linkify.link(text) == expected
text = "xn--fraubcher-u9a.com says Neiiighhh!"
expected =
"<a href=\"http://xn--fraubcher-u9a.com\">xn--fraubcher-u9a.com</a> says Neiiighhh!"
assert Linkify.link(text) == expected
end
test ".onion domain" do
text =
"The riseup.net hidden service is at vww6ybal4bd7szmgncyruucpgfkqahzddi37ktceo3ah7ngmcopnpyyd.onion"
expected =
"The <a href=\"http://riseup.net\">riseup.net</a> hidden service is at <a href=\"http://vww6ybal4bd7szmgncyruucpgfkqahzddi37ktceo3ah7ngmcopnpyyd.onion\">vww6ybal4bd7szmgncyruucpgfkqahzddi37ktceo3ah7ngmcopnpyyd.onion</a>"
assert Linkify.link(text) == expected
end
test "IPv4 is linked only with scheme" do
text = "1.1.1.1"
assert Linkify.link(text) == text
text = "http://1.1.1.1"
expected = "<a href=\"http://1.1.1.1\">http://1.1.1.1</a>"
assert Linkify.link(text) == expected
end
test "shortened IPv4 are not linked" do
text = "109.99"
expected = "109.99"
assert Linkify.link(text) == expected
end
test "URLs with last character is closing paren" do
text = "Have you read https://en.wikipedia.org/wiki/Frame_(networking)?"
expected =
"Have you read <a href=\"https://en.wikipedia.org/wiki/Frame_(networking)\">https://en.wikipedia.org/wiki/Frame_(networking)</a>?"
assert Linkify.link(text) == expected
end
test "works with URLs ending in unbalanced closed paren, no path separator, and no query params" do
text = "http://example.com)"
expected = "<a href=\"http://example.com\">http://example.com</a>)"
assert Linkify.link(text) == expected
end
end
end
diff --git a/test/parser_test.exs b/test/parser_test.exs
index aafecb5..fdfa4fa 100644
--- a/test/parser_test.exs
+++ b/test/parser_test.exs
@@ -1,319 +1,324 @@
# Copyright © 2017-2018 E-MetroTel
# Copyright © 2019-2022 Pleroma Authors
# SPDX-License-Identifier: MIT
defmodule Linkify.ParserTest do
use ExUnit.Case, async: true
doctest Linkify.Parser
import Linkify.Parser
describe "url?/2" do
test "valid scheme true" do
valid_scheme_urls()
|> Enum.each(fn url ->
assert url?(url, scheme: true, validate_tld: true)
end)
end
test "invalid scheme true" do
invalid_scheme_urls()
|> Enum.each(fn url ->
refute url?(url, scheme: true, validate_tld: true)
end)
end
test "valid scheme false" do
valid_non_scheme_urls()
|> Enum.each(fn url ->
assert url?(url, scheme: false, validate_tld: true)
end)
end
test "invalid scheme false" do
invalid_non_scheme_urls()
|> Enum.each(fn url ->
refute url?(url, scheme: false, validate_tld: true)
end)
end
test "checks the tld for url with a scheme when validate_tld: true" do
custom_tld_scheme_urls()
|> Enum.each(fn url ->
refute url?(url, scheme: true, validate_tld: true)
end)
end
test "does not check the tld for url with a scheme when validate_tld: false" do
custom_tld_scheme_urls()
|> Enum.each(fn url ->
assert url?(url, scheme: true, validate_tld: false)
end)
end
test "does not check the tld for url with a scheme when validate_tld: :no_scheme" do
custom_tld_scheme_urls()
|> Enum.each(fn url ->
assert url?(url, scheme: true, validate_tld: :no_scheme)
end)
end
test "checks the tld for url without a scheme when validate_tld: true" do
custom_tld_non_scheme_urls()
|> Enum.each(fn url ->
refute url?(url, scheme: false, validate_tld: true)
end)
end
test "checks the tld for url without a scheme when validate_tld: :no_scheme" do
custom_tld_non_scheme_urls()
|> Enum.each(fn url ->
refute url?(url, scheme: false, validate_tld: :no_scheme)
end)
end
test "does not check the tld for url without a scheme when validate_tld: false" do
custom_tld_non_scheme_urls()
|> Enum.each(fn url ->
assert url?(url, scheme: false, validate_tld: false)
end)
end
end
describe "email?" do
test "identifies valid emails" do
valid_emails()
|> Enum.each(fn email ->
assert email?(email, [])
end)
end
test "identifies invalid emails" do
invalid_emails()
|> Enum.each(fn email ->
refute email?(email, [])
end)
end
test "does not validate tlds when validate_tld: false" do
valid_custom_tld_emails()
|> Enum.each(fn email ->
assert email?(email, validate_tld: false)
end)
end
test "validates tlds when validate_tld: true" do
valid_custom_tld_emails()
|> Enum.each(fn email ->
refute email?(email, validate_tld: true)
end)
end
end
describe "parse" do
test "handle line breakes" do
text = "google.com\r\nssss"
expected = "<a href=\"http://google.com\">google.com</a>\r\nssss"
assert parse(text) == expected
end
test "handle angle bracket in the end" do
text = "google.com <br>"
assert parse(text) == "<a href=\"http://google.com\">google.com</a> <br>"
text = "google.com<br>hey"
assert parse(text) == "<a href=\"http://google.com\">google.com</a><br>hey"
text = "hey<br>google.com"
assert parse(text) == "hey<br><a href=\"http://google.com\">google.com</a>"
text = "<br />google.com"
assert parse(text) == "<br /><a href=\"http://google.com\">google.com</a>"
text = "google.com<"
assert parse(text) == "<a href=\"http://google.com\">google.com</a><"
text = "google.com>"
assert parse(text) == "<a href=\"http://google.com\">google.com</a>>"
end
test "does not link attributes" do
text = "Check out <a href='google.com'>google</a>"
assert parse(text) == text
text = "Check out <img src='google.com' alt='google.com'/>"
assert parse(text) == text
text = "Check out <span><img src='google.com' alt='google.com'/></span>"
assert parse(text) == text
end
test "does not link inside `<pre>` and `<code>`" do
text = "<pre>google.com</pre>"
assert parse(text) == text
text = "<code>google.com</code>"
assert parse(text) == text
text = "<pre><code>google.com</code></pre>"
assert parse(text) == text
end
test "links url inside html" do
text = "<div>google.com</div>"
expected = "<div><a href=\"http://google.com\">google.com</a></div>"
assert parse(text, class: false, rel: false) == expected
text = "Check out <div class='section'>google.com</div>"
expected =
"Check out <div class='section'><a href=\"http://google.com\">google.com</a></div>"
assert parse(text, class: false, rel: false) == expected
end
test "links url inside nested html" do
text = "<p><strong>google.com</strong></p>"
expected = "<p><strong><a href=\"http://google.com\">google.com</a></strong></p>"
assert parse(text, class: false, rel: false) == expected
end
test "html links inside html" do
text = ~s(<p><a href="http://google.com">google.com</a></p>)
assert parse(text) == text
text = ~s(<span><a href="http://google.com">google.com</a></span>)
assert parse(text) == text
text = ~s(<h1><a href="http://google.com">google.com</a></h1>)
assert parse(text) == text
text = ~s(<li><a href="http://google.com">google.com</a></li>)
assert parse(text) == text
end
test "do not link parens" do
text = " foo (https://example.com/path/folder/), bar"
expected =
" foo (<a href=\"https://example.com/path/folder/\">https://example.com/path/folder/</a>), bar"
assert parse(text, class: false, rel: false, scheme: true) == expected
text = " foo (example.com/path/folder/), bar"
expected =
" foo (<a href=\"http://example.com/path/folder/\">example.com/path/folder/</a>), bar"
assert parse(text, class: false, rel: false) == expected
end
- test "do not link punctuation marks in the end" do
- text = "google.com."
- assert parse(text) == "<a href=\"http://google.com\">google.com</a>."
-
+ test "do not link reserved chars (punctuation marks) in the end" do
text = "google.com;"
assert parse(text) == "<a href=\"http://google.com\">google.com</a>;"
text = "google.com:"
assert parse(text) == "<a href=\"http://google.com\">google.com</a>:"
text = "hack google.com, please"
assert parse(text) == "hack <a href=\"http://google.com\">google.com</a>, please"
text = "(check out google.com)"
assert parse(text) == "(check out <a href=\"http://google.com\">google.com</a>)"
end
+ test "links include periods at the end" do
+ text =
+ "The article is at https://en.wikipedia.org/wiki/Revlon,_Inc._v._MacAndrews_%26_Forbes_Holdings,_Inc."
+
+ assert parse(text) ==
+ "The article is at <a href=\"https://en.wikipedia.org/wiki/Revlon,_Inc._v._MacAndrews_%26_Forbes_Holdings,_Inc.\">https://en.wikipedia.org/wiki/Revlon,_Inc._v._MacAndrews_%26_Forbes_Holdings,_Inc.</a>"
+ end
+
test "double dot in link is allowed" do
text = "https://example.to/something..mp3"
assert parse(text) == "<a href=\"#{text}\">#{text}</a>"
end
test "do not link urls" do
text = "google.com"
assert parse(text, url: false) == text
end
test "do not link `:test.test`" do
text = ":test.test"
assert parse(text, %{
scheme: true,
extra: true,
class: false,
strip_prefix: false,
new_window: false,
rel: false
}) == text
end
end
def valid_number?([list], number) do
assert List.last(list) == number
end
def valid_number?(_, _), do: false
def valid_scheme_urls,
do: [
"https://www.example.com",
"http://www2.example.com",
"http://home.example-site.com",
"http://blog.example.com",
"http://www.example.com/product",
"http://www.example.com/products?id=1&page=2",
"http://www.example.com#up",
"http://255.255.255.255",
"http://www.site.com:8008"
]
def invalid_scheme_urls,
do: [
"http://invalid.com/perl.cgi?key= | http://web-site.com/cgi-bin/perl.cgi?key1=value1&key2"
]
def valid_non_scheme_urls,
do: [
"www.example.com",
"www2.example.com",
"www.example.com:2000",
"www.example.com?abc=1",
"example.example-site.com",
"example.com",
"example.ca",
"example.tv",
"example.com:999?one=one"
]
def invalid_non_scheme_urls,
do: [
"invalid.com/perl.cgi?key= | web-site.com/cgi-bin/perl.cgi?key1=value1&key2",
"invalid.",
"hi..there",
"555.555.5555",
"255.255.255.255",
"255.255.255.255:3000?one=1&two=2"
]
def custom_tld_scheme_urls,
do: [
"http://whatever.null/",
"https://example.o/index.html",
"http://pleroma.i2p/test",
"http://misskey.loki"
]
def custom_tld_non_scheme_urls,
do: [
"whatever.null/",
"example.o/index.html",
"pleroma.i2p/test",
"misskey.loki"
]
def valid_emails, do: ["rms@ai.mit.edu", "vc@cock.li", "guardian@33y6fjyhs3phzfjj.onion"]
def invalid_emails, do: ["rms[at]ai.mit.edu", "vc@cock"]
def valid_custom_tld_emails, do: ["hi@company.null"]
end

File Metadata

Mime Type
text/x-diff
Expires
Mon, Nov 25, 6:40 AM (1 d, 7 h)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
39695
Default Alt Text
(57 KB)

Event Timeline