diff --git a/lib/linkify/parser.ex b/lib/linkify/parser.ex index 233c681..af5afef 100644 --- a/lib/linkify/parser.ex +++ b/lib/linkify/parser.ex @@ -1,473 +1,473 @@ # Copyright © 2017-2018 E-MetroTel # Copyright © 2019-2022 Pleroma Authors # SPDX-License-Identifier: MIT defmodule Linkify.Parser do @moduledoc """ Module to handle parsing the the input string. """ alias Linkify.Builder @invalid_url ~r/(\.\.+)|(^(\d+\.){1,2}\d+$)/ @match_url ~r{^(?:\W*)?(?<url>(?:https?:\/\/)?[\w.-]+(?:\.[\w\.-]+)+[\w\-\._~%:\/?#[\]@!\$&'\(\)\*\+,;=.]+$)}u @get_scheme_host ~r{^\W*(?<scheme>https?:\/\/)?(?:[^@\n]+\\w@)?(?<host>[^:#~\/\n?]+)}u - @match_hashtag ~r/^(?<tag>\#[[:word:]_]*[[:alpha:]_·][[:word:]_·\p{M}]*)/u + @match_hashtag ~r/^(?<tag>\#[[:word:]_]*[[:alpha:]_·\x{200c}][[:word:]_·\p{M}\x{200c}]*)/u @match_skipped_tag ~r/^(?<tag>(a|code|pre)).*>*/ @delimiters ~r/[,.;:>?!]*$/ @en_apostrophes [ "'", "'s", "'ll", "'d" ] @prefix_extra [ "magnet:?", "dweb://", "dat://", "gopher://", "ipfs://", "ipns://", "irc://", "ircs://", "irc6://", "mumble://", "ssb://" ] @tlds "./priv/tlds.txt" |> File.read!() |> String.split("\n", trim: true) |> Enum.concat(["onion"]) |> MapSet.new() @default_opts %{ url: true, validate_tld: true } @doc """ Parse the given string, identifying items to link. Parses the string, replacing the matching urls with an html link. ## Examples iex> Linkify.Parser.parse("Check out google.com") ~s{Check out <a href="http://google.com">google.com</a>} """ @types [:url, :hashtag, :extra, :mention, :email] def parse(input, opts \\ %{}) def parse(input, opts) when is_binary(input), do: {input, %{}} |> parse(opts) |> elem(0) def parse(input, list) when is_list(list), do: parse(input, Enum.into(list, %{})) def parse(input, opts) do opts = Map.merge(@default_opts, opts) {buffer, user_acc} = do_parse(input, opts, {"", [], :parsing}) if opts[:iodata] do {buffer, user_acc} else {IO.iodata_to_binary(buffer), user_acc} end end defp accumulate(acc, buffer), do: [buffer | acc] defp accumulate(acc, buffer, trailing), do: [trailing, buffer | acc] defp do_parse({"", user_acc}, _opts, {"", acc, _}), do: {Enum.reverse(acc), user_acc} defp do_parse( {"<" <> text, user_acc}, %{hashtag: true} = opts, {"#" <> _ = buffer, acc, :parsing} ) do {buffer, user_acc} = link(buffer, opts, user_acc) buffer = case buffer do [_, _, _] -> Enum.join(buffer) _ -> buffer end case Regex.run(@match_skipped_tag, buffer, capture: [:tag]) do [tag] -> text = String.trim_leading(text, tag) do_parse({text, user_acc}, opts, {"", accumulate(acc, buffer, "<#{tag}"), :skip}) nil -> do_parse({text, user_acc}, opts, {"<", accumulate(acc, buffer, ""), {:open, 1}}) end end defp do_parse({"<br" <> text, user_acc}, opts, {buffer, acc, :parsing}) do {buffer, user_acc} = link(buffer, opts, user_acc) do_parse({text, user_acc}, opts, {"", accumulate(acc, buffer, "<br"), {:open, 1}}) end defp do_parse({"<a" <> text, user_acc}, opts, {buffer, acc, :parsing}), do: do_parse({text, user_acc}, opts, {"", accumulate(acc, buffer, "<a"), :skip}) defp do_parse({"<pre" <> text, user_acc}, opts, {buffer, acc, :parsing}), do: do_parse({text, user_acc}, opts, {"", accumulate(acc, buffer, "<pre"), :skip}) defp do_parse({"<code" <> text, user_acc}, opts, {buffer, acc, :parsing}), do: do_parse({text, user_acc}, opts, {"", accumulate(acc, buffer, "<code"), :skip}) defp do_parse({"</a>" <> text, user_acc}, opts, {buffer, acc, :skip}), do: do_parse({text, user_acc}, opts, {"", accumulate(acc, buffer, "</a>"), :parsing}) defp do_parse({"</pre>" <> text, user_acc}, opts, {buffer, acc, :skip}), do: do_parse({text, user_acc}, opts, {"", accumulate(acc, buffer, "</pre>"), :parsing}) defp do_parse({"</code>" <> text, user_acc}, opts, {buffer, acc, :skip}), do: do_parse({text, user_acc}, opts, {"", accumulate(acc, buffer, "</code>"), :parsing}) defp do_parse({"<" <> text, user_acc}, opts, {"", acc, :parsing}), do: do_parse({text, user_acc}, opts, {"<", acc, {:open, 1}}) defp do_parse({"<" <> text, user_acc}, opts, {buffer, acc, :parsing}) do {buffer, user_acc} = link(buffer, opts, user_acc) do_parse({text, user_acc}, opts, {"", accumulate(acc, buffer, "<"), {:open, 1}}) end defp do_parse({">" <> text, user_acc}, opts, {buffer, acc, {:attrs, _level}}), do: do_parse({text, user_acc}, opts, {"", accumulate(acc, buffer, ">"), :parsing}) defp do_parse({<<ch::8>> <> text, user_acc}, opts, {"", acc, {:attrs, level}}) do do_parse({text, user_acc}, opts, {"", accumulate(acc, <<ch::8>>), {:attrs, level}}) end defp do_parse({text, user_acc}, opts, {buffer, acc, {:open, level}}) do do_parse({text, user_acc}, opts, {"", accumulate(acc, buffer), {:attrs, level}}) end defp do_parse( {<<char::bytes-size(1), text::binary>>, user_acc}, opts, {buffer, acc, state} ) when char in [" ", "\r", "\n"] do {buffer, user_acc} = link(buffer, opts, user_acc) do_parse( {text, user_acc}, opts, {"", accumulate(acc, buffer, char), state} ) end defp do_parse({<<ch::8>>, user_acc}, opts, {buffer, acc, state}) do {buffer, user_acc} = link(buffer <> <<ch::8>>, opts, user_acc) do_parse( {"", user_acc}, opts, {"", accumulate(acc, buffer), state} ) end defp do_parse({<<ch::8>> <> text, user_acc}, opts, {buffer, acc, state}), do: do_parse({text, user_acc}, opts, {buffer <> <<ch::8>>, acc, state}) def check_and_link(:url, buffer, opts, _user_acc) do if url?(buffer, opts) do case @match_url |> Regex.run(buffer, capture: [:url]) |> hd() do ^buffer -> link_url(buffer, opts) url -> link = link_url(url, opts) restore_stripped_symbols(buffer, url, link) end else :nomatch end end def check_and_link(:email, buffer, opts, _user_acc) do if email?(buffer, opts), do: link_email(buffer, opts), else: :nomatch end def check_and_link(:mention, buffer, opts, user_acc) do buffer |> match_mention |> link_mention(buffer, opts, user_acc) end def check_and_link(:hashtag, buffer, opts, user_acc) do buffer |> match_hashtag |> link_hashtag(buffer, opts, user_acc) end def check_and_link(:extra, "xmpp:" <> handle = buffer, opts, _user_acc) do if email?(handle, opts), do: link_extra(buffer, opts), else: :nomatch end def check_and_link(:extra, buffer, opts, _user_acc) do if String.starts_with?(buffer, @prefix_extra), do: link_extra(buffer, opts), else: :nomatch end defp maybe_strip_parens(buffer) do trimmed = trim_leading_paren(buffer) with :next <- parens_check_trailing(buffer), :next <- parens_found_email(trimmed), :next <- parens_found_url(trimmed), %{path: path, query: query} = URI.parse(trimmed), :next <- parens_in_query(query), :next <- parens_found_path_separator(path), :next <- parens_path_has_open_paren(path), :next <- parens_check_balanced(trimmed) do buffer |> trim_leading_paren |> trim_trailing_paren else :both -> buffer |> trim_leading_paren |> trim_trailing_paren :leading_only -> buffer |> trim_leading_paren :noop -> buffer _ -> buffer end end defp parens_check_trailing(buffer), do: (String.ends_with?(buffer, ")") && :next) || :noop defp parens_found_email(trimmed), do: (trim_trailing_paren(trimmed) |> email?(nil) && :both) || :next defp parens_found_url(trimmed), do: (trim_trailing_paren(trimmed) |> url?(nil) && :next) || :noop defp parens_in_query(query), do: (is_nil(query) && :next) || :both defp parens_found_path_separator(path) when is_nil(path), do: :next defp parens_found_path_separator(path), do: (String.contains?(path, "/") && :next) || :both defp parens_path_has_open_paren(path) when is_nil(path), do: :next defp parens_path_has_open_paren(path), do: (String.contains?(path, "(") && :next) || :both defp parens_check_balanced(trimmed) do graphemes = String.graphemes(trimmed) opencnt = graphemes |> Enum.count(fn x -> x == "(" end) closecnt = graphemes |> Enum.count(fn x -> x == ")" end) if opencnt == closecnt do :leading_only else :next end end defp trim_leading_paren(buffer) do case buffer do "(" <> buffer -> buffer buffer -> buffer end end defp trim_trailing_paren(buffer), do: (String.ends_with?(buffer, ")") && String.slice(buffer, 0, String.length(buffer) - 1)) || buffer defp strip_punctuation(buffer), do: String.replace(buffer, @delimiters, "") defp strip_en_apostrophes(buffer) do Enum.reduce(@en_apostrophes, buffer, fn abbrev, buf -> String.replace_suffix(buf, abbrev, "") end) end def url?(buffer, opts) do valid_url?(buffer) && Regex.match?(@match_url, buffer) && valid_tld?(buffer, opts) end def email?(buffer, opts) do # Note: In reality the local part can only be checked by the remote server case Regex.run(~r/^(?<user>.*)@(?<host>[^@]+)$/, buffer, capture: [:user, :host]) do [_user, hostname] -> valid_hostname?(hostname) && valid_tld?(hostname, opts) _ -> false end end defp valid_url?(url), do: !Regex.match?(@invalid_url, url) @doc """ Validates a URL's TLD. Returns a boolean. Will return `true` if `:validate_tld` option set to `false`. Will skip validation and return `true` if `:validate_tld` set to `:no_scheme` and the url has a scheme. """ def valid_tld?(url, opts) do [scheme, host] = Regex.run(@get_scheme_host, url, capture: [:scheme, :host]) cond do opts[:validate_tld] == false -> true scheme != "" && ip?(host) -> true # don't validate if scheme is present opts[:validate_tld] == :no_scheme and scheme != "" -> true true -> tld = host |> strip_punctuation() |> String.split(".") |> List.last() MapSet.member?(@tlds, tld) end end def safe_to_integer(string, base \\ 10) do String.to_integer(string, base) rescue _ -> nil end def ip?(buffer) do case :inet.parse_strict_address(to_charlist(buffer)) do {:error, _} -> false {:ok, _} -> true end end # IDN-compatible, ported from musl-libc's is_valid_hostname() def valid_hostname?(hostname) do hostname |> String.to_charlist() |> Enum.any?(fn s -> !(s >= 0x80 || s in 0x30..0x39 || s in 0x41..0x5A || s in 0x61..0x7A || s in '.-') end) |> Kernel.!() end def match_mention(buffer) do case Regex.run(~r/^@(?<user>[a-zA-Z\d_-]+)(@(?<host>[^@]+))?$/, buffer, capture: [:user, :host] ) do [user, ""] -> "@" <> user [user, hostname] -> if valid_hostname?(hostname) && valid_tld?(hostname, []), do: "@" <> user <> "@" <> hostname, else: nil _ -> nil end end def match_hashtag(buffer) do case Regex.run(@match_hashtag, buffer, capture: [:tag]) do [hashtag] -> hashtag _ -> nil end end def link_hashtag(nil, _buffer, _, _user_acc), do: :nomatch def link_hashtag(hashtag, buffer, %{hashtag_handler: hashtag_handler} = opts, user_acc) do hashtag |> hashtag_handler.(buffer, opts, user_acc) |> maybe_update_buffer(hashtag, buffer) end def link_hashtag(hashtag, buffer, opts, _user_acc) do hashtag |> Builder.create_hashtag_link(buffer, opts) |> maybe_update_buffer(hashtag, buffer) end def link_mention(nil, _buffer, _, _user_acc), do: :nomatch def link_mention(mention, buffer, %{mention_handler: mention_handler} = opts, user_acc) do mention |> mention_handler.(buffer, opts, user_acc) |> maybe_update_buffer(mention, buffer) end def link_mention(mention, buffer, opts, _user_acc) do mention |> Builder.create_mention_link(buffer, opts) |> maybe_update_buffer(mention, buffer) end defp maybe_update_buffer(out, match, buffer) when is_binary(out) do maybe_update_buffer({out, nil}, match, buffer) end defp maybe_update_buffer({out, user_acc}, match, buffer) when match != buffer and out != buffer do out = String.replace(buffer, match, out) {out, user_acc} end defp maybe_update_buffer(out, _match, _buffer), do: out @doc false def link_url(buffer, opts) do Builder.create_link(buffer, opts) end @doc false def link_email(buffer, opts) do Builder.create_email_link(buffer, opts) end def link_extra(buffer, opts) do Builder.create_extra_link(buffer, opts) end defp link(buffer, opts, user_acc) do Enum.reduce_while(@types, {buffer, user_acc}, fn type, _ -> if opts[type] == true do check_and_link_reducer(type, buffer, opts, user_acc) else {:cont, {buffer, user_acc}} end end) end defp check_and_link_reducer(type, buffer, opts, user_acc) do str = buffer |> String.split("<") |> List.first() |> strip_en_apostrophes() |> strip_punctuation() |> maybe_strip_parens() case check_and_link(type, str, opts, user_acc) do :nomatch -> {:cont, {buffer, user_acc}} {link, user_acc} -> {:halt, {restore_stripped_symbols(buffer, str, link), user_acc}} link -> {:halt, {restore_stripped_symbols(buffer, str, link), user_acc}} end end defp restore_stripped_symbols(buffer, buffer, link), do: link defp restore_stripped_symbols(buffer, stripped_buffer, link) do buffer |> String.split(stripped_buffer) |> Enum.intersperse(link) end end diff --git a/test/linkify_test.exs b/test/linkify_test.exs index 1abca5f..8a9ace4 100644 --- a/test/linkify_test.exs +++ b/test/linkify_test.exs @@ -1,858 +1,870 @@ # Copyright © 2017-2018 E-MetroTel # Copyright © 2019-2022 Pleroma Authors # SPDX-License-Identifier: MIT defmodule LinkifyTest do use ExUnit.Case, async: true doctest Linkify test "default link" do assert Linkify.link("google.com") == "<a href=\"http://google.com\">google.com</a>" end test "default link iodata" do assert Linkify.link_to_iodata("google.com") == [["<a ", "href=\"http://google.com\"", ">", "google.com", "</a>"]] end test "default link safe iodata" do assert Linkify.link_safe("google.com") == [ [ {:safe, ["<a ", "href=\"http://google.com\"", ">"]}, "google.com", {:safe, "</a>"} ] ] end test "does on link existing links" do text = ~s(<a href="http://google.com">google.com</a>) assert Linkify.link(text) == text end test "all kinds of links" do text = "hello google.com https://ddg.com user@email.com irc:///mIRC" expected = "hello <a href=\"http://google.com\">google.com</a> <a href=\"https://ddg.com\">https://ddg.com</a> <a href=\"mailto:user@email.com\">user@email.com</a> <a href=\"irc:///mIRC\">irc:///mIRC</a>" assert Linkify.link(text, email: true, extra: true ) == expected end test "all kinds of links iodata" do text = "hello google.com https://ddg.com user@email.com irc:///mIRC" expected = [ "hello", " ", ["<a ", "href=\"http://google.com\"", ">", "google.com", "</a>"], " ", ["<a ", "href=\"https://ddg.com\"", ">", "https://ddg.com", "</a>"], " ", ["<a ", "href=\"mailto:user@email.com\"", ">", "user@email.com", "</a>"], " ", ["<a ", "href=\"irc:///mIRC\"", ">", "irc:///mIRC", "</a>"] ] assert Linkify.link_to_iodata(text, email: true, extra: true ) == expected end test "class attribute" do assert Linkify.link("google.com", class: "linkified") == "<a href=\"http://google.com\" class=\"linkified\">google.com</a>" end test "class attribute iodata" do assert Linkify.link_to_iodata("google.com", class: "linkified") == [ [ "<a ", "href=\"http://google.com\" class=\"linkified\"", ">", "google.com", "</a>" ] ] end test "rel attribute" do assert Linkify.link("google.com", rel: "noopener noreferrer") == "<a href=\"http://google.com\" rel=\"noopener noreferrer\">google.com</a>" end test "rel attribute iodata" do assert Linkify.link_to_iodata("google.com", rel: "noopener noreferrer") == [ [ "<a ", "href=\"http://google.com\" rel=\"noopener noreferrer\"", ">", "google.com", "</a>" ] ] end test "rel as function" do text = "google.com" expected = "<a href=\"http://google.com\" rel=\"com\">google.com</a>" custom_rel = fn url -> url |> String.split(".") |> List.last() end assert Linkify.link(text, rel: custom_rel) == expected text = "google.com" expected = "<a href=\"http://google.com\">google.com</a>" custom_rel = fn _ -> nil end assert Linkify.link(text, rel: custom_rel) == expected end test "strip parens" do assert Linkify.link("(google.com)") == "(<a href=\"http://google.com\">google.com</a>)" end test "strip parens iodata" do assert Linkify.link_to_iodata("(google.com)") == [["(", ["<a ", "href=\"http://google.com\"", ">", "google.com", "</a>"], ")"]] end test "link_map/2" do assert Linkify.link_map("google.com", []) == {"<a href=\"http://google.com\">google.com</a>", []} end describe "custom handlers" do test "mentions handler" do text = "hello @user, @valid_user and @invalid_user" valid_users = ["user", "valid_user"] handler = fn "@" <> user = mention, buffer, _opts, acc -> if Enum.member?(valid_users, user) do link = ~s(<a href="https://example.com/user/#{user}" data-user="#{user}">#{mention}</a>) {link, %{acc | mentions: MapSet.put(acc.mentions, {mention, user})}} else {buffer, acc} end end {result_text, %{mentions: mentions}} = Linkify.link_map(text, %{mentions: MapSet.new()}, mention: true, mention_handler: handler ) assert result_text == "hello <a href=\"https://example.com/user/user\" data-user=\"user\">@user</a>, <a href=\"https://example.com/user/valid_user\" data-user=\"valid_user\">@valid_user</a> and @invalid_user" assert mentions |> MapSet.to_list() |> Enum.map(&elem(&1, 1)) == valid_users end test "hashtags handler" do text = "#hello #world" handler = fn hashtag, buffer, opts, acc -> link = Linkify.Builder.create_hashtag_link(hashtag, buffer, opts) {link, %{acc | tags: MapSet.put(acc.tags, hashtag)}} end {result_text, %{tags: tags}} = Linkify.link_map(text, %{tags: MapSet.new()}, hashtag: true, hashtag_handler: handler, hashtag_prefix: "https://example.com/user/", rel: false ) assert result_text == "<a href=\"https://example.com/user/hello\">#hello</a> <a href=\"https://example.com/user/world\">#world</a>" assert MapSet.to_list(tags) == ["#hello", "#world"] text = "#justOne" {result_text, %{tags: _tags}} = Linkify.link_map(text, %{tags: MapSet.new()}, hashtag: true, hashtag_handler: handler, hashtag_prefix: "https://example.com/user/", rel: false ) assert result_text == "<a href=\"https://example.com/user/justOne\">#justOne</a>" text = "#justOne." {result_text, %{tags: _tags}} = Linkify.link_map(text, %{tags: MapSet.new()}, hashtag: true, hashtag_handler: handler, hashtag_prefix: "https://example.com/user/", rel: false ) assert result_text == "<a href=\"https://example.com/user/justOne\">#justOne</a>." text = "#justOne " {result_text, %{tags: _tags}} = Linkify.link_map(text, %{tags: MapSet.new()}, hashtag: true, hashtag_handler: handler, hashtag_prefix: "https://example.com/user/", rel: false ) assert result_text == "<a href=\"https://example.com/user/justOne\">#justOne</a> " text = "#cofe <br><a href=\"https://pleroma.social/\">Source</a>" {_result_text, %{tags: tags}} = Linkify.link_map(text, %{tags: MapSet.new()}, hashtag: true, hashtag_handler: handler, hashtag_prefix: "https://example.com/tag/" ) assert MapSet.to_list(tags) == ["#cofe"] text = "#cofe<br><a href=\"https://pleroma.social/\">Source</a>" {_result_text, %{tags: tags}} = Linkify.link_map(text, %{tags: MapSet.new()}, hashtag: true, hashtag_handler: handler, hashtag_prefix: "https://example.com/tag/" ) assert MapSet.to_list(tags) == ["#cofe"] text = "#cofe<a href=\"https://pleroma.social/\">Source</a>" {_result_text, %{tags: tags}} = Linkify.link_map(text, %{tags: MapSet.new()}, hashtag: true, hashtag_handler: handler, hashtag_prefix: "https://example.com/tag/" ) assert MapSet.to_list(tags) == ["#cofe"] text = "#cofe<code>fetch()</code>" {_result_text, %{tags: tags}} = Linkify.link_map(text, %{tags: MapSet.new()}, hashtag: true, hashtag_handler: handler, hashtag_prefix: "https://example.com/tag/" ) assert MapSet.to_list(tags) == ["#cofe"] text = "#cofe<pre>fetch()</pre>" {_result_text, %{tags: tags}} = Linkify.link_map(text, %{tags: MapSet.new()}, hashtag: true, hashtag_handler: handler, hashtag_prefix: "https://example.com/tag/" ) assert MapSet.to_list(tags) == ["#cofe"] end test "mention handler and hashtag prefix" do text = "Hello again, @user.<script></script>\nThis is on another :moominmamma: line. #2hu #epic #phantasmagoric" handler = fn "@" <> user = mention, _, _, _ -> ~s(<span class="h-card"><a href="#/user/#{user}">@<span>#{mention}</span></a></span>) end expected = ~s(Hello again, @user.<script></script>\nThis is on another :moominmamma: line. <a href="/tag/2hu" target="_blank">#2hu</a> <a href="/tag/epic" target="_blank">#epic</a> <a href="/tag/phantasmagoric" target="_blank">#phantasmagoric</a>) assert Linkify.link(text, mention: true, mention_handler: handler, hashtag: true, hashtag_prefix: "/tag/", new_window: true ) == expected end test "mentions handler with hostname/@user links" do text = "hi @user, take a look at this post: https://example.com/@valid_user/posts/9w5AkQp956XIh74apc" valid_users = ["user", "valid_user"] handler = fn "@" <> user = mention, buffer, _opts, acc -> if Enum.member?(valid_users, user) do link = ~s(<a href="https://example.com/user/#{user}" data-user="#{user}">#{mention}</a>) {link, %{acc | mentions: MapSet.put(acc.mentions, {mention, user})}} else {buffer, acc} end end {result_text, %{mentions: mentions}} = Linkify.link_map(text, %{mentions: MapSet.new()}, mention: true, mention_handler: handler, new_window: true ) assert result_text == "hi <a href=\"https://example.com/user/user\" data-user=\"user\">@user</a>, take a look at this post: <a href=\"https://example.com/@valid_user/posts/9w5AkQp956XIh74apc\" target=\"_blank\">https://example.com/@valid_user/posts/9w5AkQp956XIh74apc</a>" assert mentions |> MapSet.to_list() |> Enum.map(&elem(&1, 1)) == ["user"] end test "mentions handler and extra links" do text = "hi @user, text me asap xmpp:me@cofe.ai, (or contact me at me@cofe.ai), please.<br>cofe.ai." valid_users = ["user", "cofe"] handler = fn "@" <> user = mention, buffer, _opts, acc -> if Enum.member?(valid_users, user) do link = ~s(<a href="https://example.com/user/#{user}" data-user="#{user}">#{mention}</a>) {link, %{acc | mentions: MapSet.put(acc.mentions, {mention, user})}} else {buffer, acc} end end {result_text, %{mentions: mentions}} = Linkify.link_map(text, %{mentions: MapSet.new()}, mention: true, mention_handler: handler, extra: true, email: true ) assert result_text == "hi <a href=\"https://example.com/user/user\" data-user=\"user\">@user</a>, text me asap <a href=\"xmpp:me@cofe.ai\">xmpp:me@cofe.ai</a>, (or contact me at <a href=\"mailto:me@cofe.ai\">me@cofe.ai</a>), please.<br><a href=\"http://cofe.ai\">cofe.ai</a>." assert MapSet.to_list(mentions) == [{"@user", "user"}] end test "mentions handler and emails" do text = "hi @friend, here is my email<br><br>user@user.me" valid_users = ["user", "friend"] handler = fn "@" <> user = mention, buffer, _opts, acc -> if Enum.member?(valid_users, user) do link = ~s(<a href="https://example.com/user/#{user}" data-user="#{user}">#{mention}</a>) {link, %{acc | mentions: MapSet.put(acc.mentions, {mention, user})}} else {buffer, acc} end end {result_text, %{mentions: mentions}} = Linkify.link_map(text, %{mentions: MapSet.new()}, mention: true, mention_handler: handler, extra: true, email: true ) assert result_text == "hi <a href=\"https://example.com/user/friend\" data-user=\"friend\">@friend</a>, here is my email<br><br><a href=\"mailto:user@user.me\">user@user.me</a>" assert MapSet.to_list(mentions) == [{"@friend", "friend"}] end test "href handler" do text = ~s(google.com) result_text = Linkify.link(text, href_handler: &"/redirect?#{URI.encode_query(to: &1)}") assert result_text == ~s(<a href="/redirect?to=http%3A%2F%2Fgoogle.com">google.com</a>) end end describe "mentions" do test "simple mentions" do expected = ~s{hello <a href="https://example.com/user/user" target="_blank">@user</a> and <a href="https://example.com/user/anotherUser" target="_blank">@anotherUser</a>.} assert Linkify.link("hello @user and @anotherUser.", mention: true, mention_prefix: "https://example.com/user/", new_window: true ) == expected end test "mentions inside html tags" do text = "<p><strong>hello world</strong></p>\n<p><`em>another @user__test and @user__test google.com paragraph</em></p>\n" expected = "<p><strong>hello world</strong></p>\n<p><`em>another <a href=\"u/user__test\">@user__test</a> and <a href=\"u/user__test\">@user__test</a> <a href=\"http://google.com\">google.com</a> paragraph</em></p>\n" assert Linkify.link(text, mention: true, mention_prefix: "u/") == expected text = "<p>hi</p><p>@user @anotherUser</p>" expected = "<p>hi</p><p><a href=\"u/user\">@user</a> <a href=\"u/anotherUser\">@anotherUser</a></p>" assert Linkify.link(text, mention: true, mention_prefix: "u/") == expected end test "mention @user@example.com" do text = "hey @user@example.com" expected = "hey <a href=\"https://example.com/user/user@example.com\" target=\"_blank\">@user@example.com</a>" assert Linkify.link(text, mention: true, mention_prefix: "https://example.com/user/", new_window: true ) == expected expected = "That's <a href=\"https://example.com/user/user@example.com\" target=\"_blank\">@user@example.com</a>'s server" text = "That's @user@example.com's server" assert Linkify.link(text, mention: true, mention_prefix: "https://example.com/user/", new_window: true ) == expected end test "mentions with no word-separation before them" do text = "@@example hey! >@@test@example.com idolm@ster" assert Linkify.link(text, mention: true, mention_prefix: "/users/") == text end test "invalid mentions" do text = "hey user@example" assert Linkify.link(text, mention: true, mention_prefix: "/users/") == text end test "IDN domain" do text = "hello @lain@我爱你.com" expected = "hello <a href=\"/users/lain@我爱你.com\">@lain@我爱你.com</a>" assert Linkify.link(text, mention: true, mention_prefix: "/users/") == expected text = "hello @lain@xn--6qq986b3xl.com" expected = "hello <a href=\"/users/lain@xn--6qq986b3xl.com\">@lain@xn--6qq986b3xl.com</a>" assert Linkify.link(text, mention: true, mention_prefix: "/users/") == expected end test ".onion domain" do text = "Hey @admin@vww6ybal4bd7szmgncyruucpgfkqahzddi37ktceo3ah7ngmcopnpyyd.onion" expected = "Hey <a href=\"/users/admin@vww6ybal4bd7szmgncyruucpgfkqahzddi37ktceo3ah7ngmcopnpyyd.onion\">@admin@vww6ybal4bd7szmgncyruucpgfkqahzddi37ktceo3ah7ngmcopnpyyd.onion</a>" assert Linkify.link(text, mention: true, mention_prefix: "/users/") == expected end end describe "hashtag links" do test "hashtag" do expected = " one <a href=\"https://example.com/tag/2two\" target=\"_blank\">#2two</a> three <a href=\"https://example.com/tag/four\" target=\"_blank\">#four</a>." assert Linkify.link(" one #2two three #four.", hashtag: true, hashtag_prefix: "https://example.com/tag/", new_window: true ) == expected end test "must have non-numbers" do expected = "<a href=\"/t/1ok\">#1ok</a> #42 #7" assert Linkify.link("#1ok #42 #7", hashtag: true, hashtag_prefix: "/t/", rel: false ) == expected end test "support French" do text = "#administrateur·rice·s #ingénieur·e·s" expected = "<a href=\"/t/administrateur·rice·s\">#administrateur·rice·s</a> <a href=\"/t/ingénieur·e·s\">#ingénieur·e·s</a>" assert Linkify.link(text, hashtag: true, hashtag_prefix: "/t/", rel: false ) == expected end test "support Telugu" do text = "#చక్రం #కకకకక్ #కకకకాక #కకకక్రకకకక" expected = "<a href=\"/t/చక్రం\">#చక్రం</a> <a href=\"/t/కకకకక్\">#కకకకక్</a> <a href=\"/t/కకకకాక\">#కకకకాక</a> <a href=\"/t/కకకక్రకకకక\">#కకకక్రకకకక</a>" assert Linkify.link(text, hashtag: true, hashtag_prefix: "/t/", rel: false ) == expected end test "do not turn urls with hashes into hashtags" do text = "google.com#test #test google.com/#test #tag" expected = "<a href=\"http://google.com#test\">google.com#test</a> <a href=\"https://example.com/tag/test\">#test</a> <a href=\"http://google.com/#test\">google.com/#test</a> <a href=\"https://example.com/tag/tag\">#tag</a>" assert Linkify.link(text, hashtag: true, rel: false, hashtag_prefix: "https://example.com/tag/" ) == expected end test "works with non-latin characters" do text = "#漢字 #は #тест #ทดสอบ" expected = "<a href=\"https://example.com/tag/漢字\">#漢字</a> <a href=\"https://example.com/tag/は\">#は</a> <a href=\"https://example.com/tag/тест\">#тест</a> <a href=\"https://example.com/tag/ทดสอบ\">#ทดสอบ</a>" assert Linkify.link(text, rel: false, hashtag: true, hashtag_prefix: "https://example.com/tag/" ) == expected end + + test "ZWNJ does not break up hashtags" do + text = "#ساٴينس" + + expected = "<a href=\"https://example.com/tag/ساٴينس\">#ساٴينس</a>" + + assert Linkify.link(text, + rel: false, + hashtag: true, + hashtag_prefix: "https://example.com/tag/" + ) == expected + end end describe "links" do test "turning urls into links" do text = "Hey, check out http://www.youtube.com/watch?v=8Zg1-TufF%20zY?x=1&y=2#blabla ." expected = "Hey, check out <a href=\"http://www.youtube.com/watch?v=8Zg1-TufF%20zY?x=1&y=2#blabla\" target=\"_blank\">http://www.youtube.com/watch?v=8Zg1-TufF%20zY?x=1&y=2#blabla</a> ." assert Linkify.link(text, new_window: true) == expected # no scheme text = "Hey, check out www.youtube.com/watch?v=8Zg1-TufF%20zY?x=1&y=2#blabla ." expected = "Hey, check out <a href=\"http://www.youtube.com/watch?v=8Zg1-TufF%20zY?x=1&y=2#blabla\" target=\"_blank\">www.youtube.com/watch?v=8Zg1-TufF%20zY?x=1&y=2#blabla</a> ." assert Linkify.link(text, new_window: true) == expected end test "turn urls with schema into urls" do text = "📌https://google.com" expected = "📌<a href=\"https://google.com\">https://google.com</a>" assert Linkify.link(text, rel: false) == expected text = "http://www.cs.vu.nl/~ast/intel/" expected = "<a href=\"http://www.cs.vu.nl/~ast/intel/\">http://www.cs.vu.nl/~ast/intel/</a>" assert Linkify.link(text) == expected text = "https://forum.zdoom.org/viewtopic.php?f=44&t=57087" expected = "<a href=\"https://forum.zdoom.org/viewtopic.php?f=44&t=57087\">https://forum.zdoom.org/viewtopic.php?f=44&t=57087</a>" assert Linkify.link(text) == expected text = "https://en.wikipedia.org/wiki/Sophia_(Gnosticism)#Mythos_of_the_soul" expected = "<a href=\"https://en.wikipedia.org/wiki/Sophia_(Gnosticism)#Mythos_of_the_soul\">https://en.wikipedia.org/wiki/Sophia_(Gnosticism)#Mythos_of_the_soul</a>" assert Linkify.link(text) == expected text = "https://en.wikipedia.org/wiki/Duff's_device" expected = "<a href=\"https://en.wikipedia.org/wiki/Duff's_device\">https://en.wikipedia.org/wiki/Duff's_device</a>" assert Linkify.link(text) == expected text = "https://1.1.1.1/" expected = "<a href=\"https://1.1.1.1/\">https://1.1.1.1/</a>" assert Linkify.link(text) == expected text = "https://1.1.1.1:8080/" expected = "<a href=\"https://1.1.1.1:8080/\">https://1.1.1.1:8080/</a>" assert Linkify.link(text) == expected end test "strip prefix" do assert Linkify.link("http://google.com", strip_prefix: true) == "<a href=\"http://google.com\">google.com</a>" assert Linkify.link("http://www.google.com", strip_prefix: true) == "<a href=\"http://www.google.com\">google.com</a>" end test "hostname/@user" do text = "https://example.com/@user" expected = "<a href=\"https://example.com/@user\" target=\"_blank\">https://example.com/@user</a>" assert Linkify.link(text, new_window: true) == expected text = "https://example.com:4000/@user" expected = "<a href=\"https://example.com:4000/@user\" target=\"_blank\">https://example.com:4000/@user</a>" assert Linkify.link(text, new_window: true) == expected text = "https://example.com:4000/@user" expected = "<a href=\"https://example.com:4000/@user\" target=\"_blank\">https://example.com:4000/@user</a>" assert Linkify.link(text, new_window: true) == expected text = "@username" expected = "@username" assert Linkify.link(text, new_window: true) == expected end end describe "non http links" do test "xmpp" do text = "xmpp:user@example.com" expected = "<a href=\"xmpp:user@example.com\">xmpp:user@example.com</a>" assert Linkify.link(text, extra: true) == expected end test "wrong xmpp" do text = "xmpp:user.example.com" assert Linkify.link(text, extra: true) == text end test "email" do text = "user@example.com" expected = "<a href=\"mailto:user@example.com\">user@example.com</a>" assert Linkify.link(text, email: true) == expected end test "magnet" do text = "magnet:?xt=urn:btih:a4104a9d2f5615601c429fe8bab8177c47c05c84&dn=ubuntu-18.04.1.0-live-server-amd64.iso&tr=http%3A%2F%2Ftorrent.ubuntu.com%3A6969%2Fannounce&tr=http%3A%2F%2Fipv6.torrent.ubuntu.com%3A6969%2Fannounce" expected = "<a href=\"magnet:?xt=urn:btih:a4104a9d2f5615601c429fe8bab8177c47c05c84&dn=ubuntu-18.04.1.0-live-server-amd64.iso&tr=http%3A%2F%2Ftorrent.ubuntu.com%3A6969%2Fannounce&tr=http%3A%2F%2Fipv6.torrent.ubuntu.com%3A6969%2Fannounce\">magnet:?xt=urn:btih:a4104a9d2f5615601c429fe8bab8177c47c05c84&dn=ubuntu-18.04.1.0-live-server-amd64.iso&tr=http%3A%2F%2Ftorrent.ubuntu.com%3A6969%2Fannounce&tr=http%3A%2F%2Fipv6.torrent.ubuntu.com%3A6969%2Fannounce</a>" assert Linkify.link(text, extra: true) == expected end test "dweb" do text = "dweb://584faa05d394190ab1a3f0240607f9bf2b7e2bd9968830a11cf77db0cea36a21+v1.0.0/path/to/file.txt" expected = "<a href=\"dweb://584faa05d394190ab1a3f0240607f9bf2b7e2bd9968830a11cf77db0cea36a21+v1.0.0/path/to/file.txt\">dweb://584faa05d394190ab1a3f0240607f9bf2b7e2bd9968830a11cf77db0cea36a21+v1.0.0/path/to/file.txt</a>" assert Linkify.link(text, extra: true) == expected end end describe "TLDs" do test "parse with scheme" do text = "https://google.com" expected = "<a href=\"https://google.com\">https://google.com</a>" assert Linkify.link(text) == expected end test "only existing TLDs with scheme" do text = "this url https://google.foobar.blah11blah/ has invalid TLD" expected = "this url https://google.foobar.blah11blah/ has invalid TLD" assert Linkify.link(text) == expected text = "this url https://google.foobar.com/ has valid TLD" expected = "this url <a href=\"https://google.foobar.com/\">https://google.foobar.com/</a> has valid TLD" assert Linkify.link(text) == expected end test "only existing TLDs without scheme" do text = "this url google.foobar.blah11blah/ has invalid TLD" assert Linkify.link(text) == text text = "this url google.foobar.com/ has valid TLD" expected = "this url <a href=\"http://google.foobar.com/\">google.foobar.com/</a> has valid TLD" assert Linkify.link(text) == expected end test "only existing TLDs with and without scheme" do text = "this url http://google.foobar.com/ has valid TLD" expected = "this url <a href=\"http://google.foobar.com/\">http://google.foobar.com/</a> has valid TLD" assert Linkify.link(text) == expected text = "this url google.foobar.com/ has valid TLD" expected = "this url <a href=\"http://google.foobar.com/\">google.foobar.com/</a> has valid TLD" assert Linkify.link(text) == expected end test "FQDN (with trailing period)" do text = "Check out this article: https://www.wired.com./story/marissa-mayer-startup-sunshine-contacts/" expected = "Check out this article: <a href=\"https://www.wired.com./story/marissa-mayer-startup-sunshine-contacts/\">https://www.wired.com./story/marissa-mayer-startup-sunshine-contacts/</a>" assert Linkify.link(text) == expected end test "Do not link trailing punctuation" do text = "You can find more info at https://pleroma.social." expected = "You can find more info at <a href=\"https://pleroma.social\">https://pleroma.social</a>." assert Linkify.link(text) == expected text = "Of course it was google.com!!" expected = "Of course it was <a href=\"http://google.com\">google.com</a>!!" assert Linkify.link(text) == expected text = "First I had to login to hotmail.com, then I had to delete emails because my 15MB quota was full." expected = "First I had to login to <a href=\"http://hotmail.com\">hotmail.com</a>, then I had to delete emails because my 15MB quota was full." assert Linkify.link(text) == expected text = "I looked at theonion.com; it was no longer funny." expected = "I looked at <a href=\"http://theonion.com\">theonion.com</a>; it was no longer funny." assert Linkify.link(text) == expected end test "IDN and punycode domain" do text = "FrauBücher.com says Neiiighhh!" expected = "<a href=\"http://FrauBücher.com\">FrauBücher.com</a> says Neiiighhh!" assert Linkify.link(text) == expected text = "xn--fraubcher-u9a.com says Neiiighhh!" expected = "<a href=\"http://xn--fraubcher-u9a.com\">xn--fraubcher-u9a.com</a> says Neiiighhh!" assert Linkify.link(text) == expected end test ".onion domain" do text = "The riseup.net hidden service is at vww6ybal4bd7szmgncyruucpgfkqahzddi37ktceo3ah7ngmcopnpyyd.onion" expected = "The <a href=\"http://riseup.net\">riseup.net</a> hidden service is at <a href=\"http://vww6ybal4bd7szmgncyruucpgfkqahzddi37ktceo3ah7ngmcopnpyyd.onion\">vww6ybal4bd7szmgncyruucpgfkqahzddi37ktceo3ah7ngmcopnpyyd.onion</a>" assert Linkify.link(text) == expected end test "IPv4 is linked only with scheme" do text = "1.1.1.1" assert Linkify.link(text) == text text = "http://1.1.1.1" expected = "<a href=\"http://1.1.1.1\">http://1.1.1.1</a>" assert Linkify.link(text) == expected end test "shortened IPv4 are not linked" do text = "109.99" expected = "109.99" assert Linkify.link(text) == expected end test "URLs with last character is closing paren" do text = "Have you read https://en.wikipedia.org/wiki/Frame_(networking)?" expected = "Have you read <a href=\"https://en.wikipedia.org/wiki/Frame_(networking)\">https://en.wikipedia.org/wiki/Frame_(networking)</a>?" assert Linkify.link(text) == expected end test "works with URLs ending in unbalanced closed paren, no path separator, and no query params" do text = "http://example.com)" expected = "<a href=\"http://example.com\">http://example.com</a>)" assert Linkify.link(text) == expected end end end