Page MenuHomePhorge

fast_html.ex
No OneTemporary

Size
4 KB
Referenced Files
None
Subscribers
None

fast_html.ex

defmodule :fast_html do
@moduledoc """
A module to decode html into a tree structure.
"""
@type tag() :: String.t() | atom()
@type attr() :: {String.t(), String.t()}
@type attr_list() :: [] | [attr()]
@type comment_node() :: {:comment, String.t()}
@type comment_node3() :: {:comment, [], String.t()}
@type tree() ::
{tag(), attr_list(), tree()}
| {tag(), attr_list(), nil}
| comment_node()
| comment_node3()
@type format_flag() :: :html_atoms | :nil_self_closing | :comment_tuple3
@doc """
Returns a tree representation from the given html string.
`opts` is a keyword list of options, the options available:
* `timeout` - Call timeout
* `format` - Format flags for the tree
The following format flags are available:
* `:html_atoms` uses atoms for known html tags (faster), binaries for everything else.
* `:nil_self_closing` uses `nil` to designate self-closing tags and void elements.
For example `<br>` is then being represented like `{"br", [], nil}`.
See http://w3c.github.io/html-reference/syntax.html#void-elements for a full list of void elements.
* `:comment_tuple3` uses 3-tuple elements for comments, instead of the default 2-tuple element.
## Examples
iex> :fast_html.decode("<h1>Hello world</h1>")
{:ok, [{"html", [], [{"head", [], []}, {"body", [], [{"h1", [], ["Hello world"]}]}]}]}
iex> :fast_html.decode("Hello world", timeout: 0)
{:error, :timeout}
iex> :fast_html.decode("<span class='hello'>Hi there</span>")
{:ok, [{"html", [],
[{"head", [], []},
{"body", [], [{"span", [{"class", "hello"}], ["Hi there"]}]}]}]}
iex> :fast_html.decode("<body><!-- a comment --!></body>")
{:ok, [{"html", [], [{"head", [], []}, {"body", [], [comment: " a comment "]}]}]}
iex> :fast_html.decode("<br>")
{:ok, [{"html", [], [{"head", [], []}, {"body", [], [{"br", [], []}]}]}]}
iex> :fast_html.decode("<h1>Hello world</h1>", format: [:html_atoms])
{:ok, [{:html, [], [{:head, [], []}, {:body, [], [{:h1, [], ["Hello world"]}]}]}]}
iex> :fast_html.decode("<br>", format: [:nil_self_closing])
{:ok, [{"html", [], [{"head", [], []}, {"body", [], [{"br", [], nil}]}]}]}
iex> :fast_html.decode("<body><!-- a comment --!></body>", format: [:comment_tuple3])
{:ok, [{"html", [], [{"head", [], []}, {"body", [], [{:comment, [], " a comment "}]}]}]}
iex> html = "<body><!-- a comment --!><unknown /></body>"
iex> :fast_html.decode(html, format: [:html_atoms, :nil_self_closing, :comment_tuple3])
{:ok, [{:html, [],
[{:head, [], []},
{:body, [], [{:comment, [], " a comment "}, {"unknown", [], nil}]}]}]}
"""
@spec decode(String.t(), format: [format_flag()]) ::
{:ok, tree()} | {:error, String.t() | atom()}
def decode(bin, opts \\ []) do
flags = Keyword.get(opts, :format, [])
timeout = Keyword.get(opts, :timeout, 10000)
FastHtml.Cnode.call({:decode, bin, flags}, timeout)
end
@doc """
Like `decode/2`, but for parsing [HTML fragments](https://html.spec.whatwg.org/multipage/parsing.html#parsing-html-fragments).
`opts` is a keyword list of options, the options available are the same as in `decode/2` with addition of:
* `context` - Name of the context element, defaults to `div`
* `format` - Format flags for the tree
Example:
iex> :fast_html.decode_fragment("rin is the <i>best</i> girl")
{:ok, [{"html", [], ["rin is the ", {"i", [], ["best"]}, " girl"]}]}
iex> :fast_html.decode_fragment("rin is the <i>best</i> girl", context: "title")
{:ok, [{"html", [], ["rin is the <i>best</i> girl"]}]}
iex> :fast_html.decode_fragment("rin is the <i>best</i> girl", context: "objective_truth")
{:error, :unknown_context_tag}
iex> :fast_html.decode_fragment("rin is the <i>best</i> girl", format: [:html_atoms])
{:ok, [{:html, [], ["rin is the ", {:i, [], ["best"]}, " girl"]}]}
"""
def decode_fragment(bin, opts \\ []) do
flags = Keyword.get(opts, :format, [])
timeout = Keyword.get(opts, :timeout, 10000)
context = Keyword.get(opts, :context, "div")
FastHtml.Cnode.call({:decode_fragment, bin, flags, context}, timeout)
end
end

File Metadata

Mime Type
text/x-ruby
Expires
Fri, Nov 14, 11:20 PM (1 d, 18 h)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
632906
Default Alt Text
fast_html.ex (4 KB)

Event Timeline