Page MenuHomePhorge

No OneTemporary

Size
206 KB
Referenced Files
None
Subscribers
None
diff --git a/.formatter.exs b/.formatter.exs
index d2cda26..2acd350 100644
--- a/.formatter.exs
+++ b/.formatter.exs
@@ -1,4 +1,7 @@
+# SPDX-FileCopyrightText: 2019-2022 Pleroma Authors <https://pleroma.social>
+# SPDX-License-Identifier: LGPL-2.1-only
+
# Used by "mix format"
[
inputs: ["{mix,.formatter}.exs", "{config,lib,test}/**/*.{ex,exs}"]
]
diff --git a/.gitignore b/.gitignore
index cbb20b6..850381f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,24 +1,27 @@
+# SPDX-FileCopyrightText: 2017-2019 myhtmlex authors <https://github.com/Overbryd/myhtmlex>
+# SPDX-License-Identifier: LGPL-2.1-only
+
# The directory Mix will write compiled artifacts to.
/_build/
# If you run "mix test --cover", coverage assets end up here.
/cover/
# The directory Mix downloads your dependencies sources to.
/deps/
# Where 3rd-party dependencies like ExDoc output generated docs.
/doc/
# Ignore .fetch files in case you like to edit your project deps locally.
/.fetch
# If the VM crashes, it generates a dump, let's ignore it too.
erl_crash.dump
# Also ignore archive artifacts (built via "mix archive.build").
*.ez
priv/*
!priv/.gitignore
bench/snapshots
package-test
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index ee1665a..efe5acd 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -1,38 +1,41 @@
+# SPDX-FileCopyrightText: 2019-2022 Pleroma Authors <https://pleroma.social>
+# SPDX-License-Identifier: LGPL-2.1-only
+
image: elixir:1.12-alpine
variables:
MIX_ENV: test
GIT_SUBMODULE_STRATEGY: recursive
cache:
key: ${CI_COMMIT_REF_SLUG}
paths:
- deps
- _build
stages:
- test
- publish
before_script:
- apk add build-base cmake
- mix local.hex --force
- mix local.rebar --force
- mix deps.get --only test
- mix compile --force
lint:
stage: test
script:
- mix format --check-formatted
unit-testing:
stage: test
coverage: '/(\d+\.\d+\%) \| Total/'
script:
- mix test --trace --preload-modules --cover
dialyzer:
stage: test
script:
- mix dialyzer
diff --git a/.gitmodules b/.gitmodules
index f78be16..2dea083 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,4 +1,8 @@
+# SPDX-FileCopyrightText: 2017-2019 myhtmlex authors <https://github.com/Overbryd/myhtmlex>
+# SPDX-FileCopyrightText: 2019-2022 Pleroma Authors <https://pleroma.social>
+# SPDX-License-Identifier: LGPL-2.1-only
+
[submodule "c_src/lexbor"]
path = c_src/lexbor
url = https://github.com/lanodan/lexbor.git
branch = bugfix/gcc-10
diff --git a/CHANGELOG.md b/CHANGELOG.md
index e634b71..8a651eb 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,58 +1,63 @@
+<!--
+SPDX-FileCopyrightText: 2019-2022 Pleroma Authors <https://pleroma.social>
+SPDX-License-Identifier: LGPL-2.1-only
+-->
+
# Changelog
All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
## [2.0.4] - 2020-09-01
#### Fixed
- Regression of the gcc 10 fix in the hex package, due to an outdated submodule on the publishing machine
## [2.0.3] - 2020-08-30
### Changed
- Improved error message on make errors
## [2.0.2] - 2020-08-26
### Fixed
- lexbor compilation errors with gcc 10
- Inability to use the library with distillery releases due to priv dir being in the source code
## [2.0.1] - 2020-08-04
### Fixed
- Build failures using the hex package due to CMake cache files accidentally included there
### Added
- Changelog is now available at hexdocs
## [2.0.0] - 2020-08-01
### Changed
- **Breaking:** CMake is now required at compile-time due to it being lexbor's build system
- **Breaking:** namespaces are no longer automatically appended. i.e `<svg> </svg>` will be `{"svg", [], []}` instead of `{"svg:svg", [], []}`
- **Breaking:** when using `:nil_self_closing` flag, only valid [void elements](https://html.spec.whatwg.org/#void-elements) will have `nil` in children
- Now deprecated myhtml was switched to [lexbor](https://github.com/lexbor/lexbor)
- The worker process now communicates with the node via stdio, instead of TCP, which was known to cause issues
on BSD systems
### Added
- `FastHtml.Pool` for fast_html workers. There is a default pool of `System.schedulers_online/0` workers, but a custom pool can be started if desired, or it can be disabled altogether. See `FastHtml.Pool` module documentation for more info
## [1.0.3] - 2020-02-10
### Fixed
- C-Node not respawning after being killed.
## [1.0.2] - 2020-02-10
### Fixed
- Incorrect behavior when parsing empty attribute values. Instead of an empty string the attribute name was returned.
## [1.0.1] - 2019-12-11
### Added
- `:fast_html.decode_fragment`
### Fixed
- Errors from C-Node not being reported, timing out instead
## [1.0.0] - 2019-12-02
### Changed
- **BREAKING:** `:fast_html.decode` now returns an array of nodes at the top level, instead of a single node. This was done because it's possible to have more than one root node, for example in (`<!-- a comment --> <html> </html>` both the comment and the `html` tag are root nodes).
### Fixed
- Worker going into infinite loop when decoding a document with more than one root node.
diff --git a/LICENSE b/LICENSE
deleted file mode 100644
index 6a67a0d..0000000
--- a/LICENSE
+++ /dev/null
@@ -1,458 +0,0 @@
- GNU LESSER GENERAL PUBLIC LICENSE
- Version 2.1, February 1999
-
- Copyright (C) 1991, 1999 Free Software Foundation, Inc.
- 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- Everyone is permitted to copy and distribute verbatim copies
- of this license document, but changing it is not allowed.
-
-(This is the first released version of the Lesser GPL. It also counts
- as the successor of the GNU Library Public License, version 2, hence
- the version number 2.1.)
-
- Preamble
-
- The licenses for most software are designed to take away your
-freedom to share and change it. By contrast, the GNU General Public
-Licenses are intended to guarantee your freedom to share and change
-free software--to make sure the software is free for all its users.
-
- This license, the Lesser General Public License, applies to some
-specially designated software packages--typically libraries--of the
-Free Software Foundation and other authors who decide to use it. You
-can use it too, but we suggest you first think carefully about whether
-this license or the ordinary General Public License is the better
-strategy to use in any particular case, based on the explanations below.
-
- When we speak of free software, we are referring to freedom of use,
-not price. Our General Public Licenses are designed to make sure that
-you have the freedom to distribute copies of free software (and charge
-for this service if you wish); that you receive source code or can get
-it if you want it; that you can change the software and use pieces of
-it in new free programs; and that you are informed that you can do
-these things.
-
- To protect your rights, we need to make restrictions that forbid
-distributors to deny you these rights or to ask you to surrender these
-rights. These restrictions translate to certain responsibilities for
-you if you distribute copies of the library or if you modify it.
-
- For example, if you distribute copies of the library, whether gratis
-or for a fee, you must give the recipients all the rights that we gave
-you. You must make sure that they, too, receive or can get the source
-code. If you link other code with the library, you must provide
-complete object files to the recipients, so that they can relink them
-with the library after making changes to the library and recompiling
-it. And you must show them these terms so they know their rights.
-
- We protect your rights with a two-step method: (1) we copyright the
-library, and (2) we offer you this license, which gives you legal
-permission to copy, distribute and/or modify the library.
-
- To protect each distributor, we want to make it very clear that
-there is no warranty for the free library. Also, if the library is
-modified by someone else and passed on, the recipients should know
-that what they have is not the original version, so that the original
-author's reputation will not be affected by problems that might be
-introduced by others.
-
- Finally, software patents pose a constant threat to the existence of
-any free program. We wish to make sure that a company cannot
-effectively restrict the users of a free program by obtaining a
-restrictive license from a patent holder. Therefore, we insist that
-any patent license obtained for a version of the library must be
-consistent with the full freedom of use specified in this license.
-
- Most GNU software, including some libraries, is covered by the
-ordinary GNU General Public License. This license, the GNU Lesser
-General Public License, applies to certain designated libraries, and
-is quite different from the ordinary General Public License. We use
-this license for certain libraries in order to permit linking those
-libraries into non-free programs.
-
- When a program is linked with a library, whether statically or using
-a shared library, the combination of the two is legally speaking a
-combined work, a derivative of the original library. The ordinary
-General Public License therefore permits such linking only if the
-entire combination fits its criteria of freedom. The Lesser General
-Public License permits more lax criteria for linking other code with
-the library.
-
- We call this license the "Lesser" General Public License because it
-does Less to protect the user's freedom than the ordinary General
-Public License. It also provides other free software developers Less
-of an advantage over competing non-free programs. These disadvantages
-are the reason we use the ordinary General Public License for many
-libraries. However, the Lesser license provides advantages in certain
-special circumstances.
-
- For example, on rare occasions, there may be a special need to
-encourage the widest possible use of a certain library, so that it becomes
-a de-facto standard. To achieve this, non-free programs must be
-allowed to use the library. A more frequent case is that a free
-library does the same job as widely used non-free libraries. In this
-case, there is little to gain by limiting the free library to free
-software only, so we use the Lesser General Public License.
-
- In other cases, permission to use a particular library in non-free
-programs enables a greater number of people to use a large body of
-free software. For example, permission to use the GNU C Library in
-non-free programs enables many more people to use the whole GNU
-operating system, as well as its variant, the GNU/Linux operating
-system.
-
- Although the Lesser General Public License is Less protective of the
-users' freedom, it does ensure that the user of a program that is
-linked with the Library has the freedom and the wherewithal to run
-that program using a modified version of the Library.
-
- The precise terms and conditions for copying, distribution and
-modification follow. Pay close attention to the difference between a
-"work based on the library" and a "work that uses the library". The
-former contains code derived from the library, whereas the latter must
-be combined with the library in order to run.
-
- GNU LESSER GENERAL PUBLIC LICENSE
- TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
-
- 0. This License Agreement applies to any software library or other
-program which contains a notice placed by the copyright holder or
-other authorized party saying it may be distributed under the terms of
-this Lesser General Public License (also called "this License").
-Each licensee is addressed as "you".
-
- A "library" means a collection of software functions and/or data
-prepared so as to be conveniently linked with application programs
-(which use some of those functions and data) to form executables.
-
- The "Library", below, refers to any such software library or work
-which has been distributed under these terms. A "work based on the
-Library" means either the Library or any derivative work under
-copyright law: that is to say, a work containing the Library or a
-portion of it, either verbatim or with modifications and/or translated
-straightforwardly into another language. (Hereinafter, translation is
-included without limitation in the term "modification".)
-
- "Source code" for a work means the preferred form of the work for
-making modifications to it. For a library, complete source code means
-all the source code for all modules it contains, plus any associated
-interface definition files, plus the scripts used to control compilation
-and installation of the library.
-
- Activities other than copying, distribution and modification are not
-covered by this License; they are outside its scope. The act of
-running a program using the Library is not restricted, and output from
-such a program is covered only if its contents constitute a work based
-on the Library (independent of the use of the Library in a tool for
-writing it). Whether that is true depends on what the Library does
-and what the program that uses the Library does.
-
- 1. You may copy and distribute verbatim copies of the Library's
-complete source code as you receive it, in any medium, provided that
-you conspicuously and appropriately publish on each copy an
-appropriate copyright notice and disclaimer of warranty; keep intact
-all the notices that refer to this License and to the absence of any
-warranty; and distribute a copy of this License along with the
-Library.
-
- You may charge a fee for the physical act of transferring a copy,
-and you may at your option offer warranty protection in exchange for a
-fee.
-
- 2. You may modify your copy or copies of the Library or any portion
-of it, thus forming a work based on the Library, and copy and
-distribute such modifications or work under the terms of Section 1
-above, provided that you also meet all of these conditions:
-
- a) The modified work must itself be a software library.
-
- b) You must cause the files modified to carry prominent notices
- stating that you changed the files and the date of any change.
-
- c) You must cause the whole of the work to be licensed at no
- charge to all third parties under the terms of this License.
-
- d) If a facility in the modified Library refers to a function or a
- table of data to be supplied by an application program that uses
- the facility, other than as an argument passed when the facility
- is invoked, then you must make a good faith effort to ensure that,
- in the event an application does not supply such function or
- table, the facility still operates, and performs whatever part of
- its purpose remains meaningful.
-
- (For example, a function in a library to compute square roots has
- a purpose that is entirely well-defined independent of the
- application. Therefore, Subsection 2d requires that any
- application-supplied function or table used by this function must
- be optional: if the application does not supply it, the square
- root function must still compute square roots.)
-
-These requirements apply to the modified work as a whole. If
-identifiable sections of that work are not derived from the Library,
-and can be reasonably considered independent and separate works in
-themselves, then this License, and its terms, do not apply to those
-sections when you distribute them as separate works. But when you
-distribute the same sections as part of a whole which is a work based
-on the Library, the distribution of the whole must be on the terms of
-this License, whose permissions for other licensees extend to the
-entire whole, and thus to each and every part regardless of who wrote
-it.
-
-Thus, it is not the intent of this section to claim rights or contest
-your rights to work written entirely by you; rather, the intent is to
-exercise the right to control the distribution of derivative or
-collective works based on the Library.
-
-In addition, mere aggregation of another work not based on the Library
-with the Library (or with a work based on the Library) on a volume of
-a storage or distribution medium does not bring the other work under
-the scope of this License.
-
- 3. You may opt to apply the terms of the ordinary GNU General Public
-License instead of this License to a given copy of the Library. To do
-this, you must alter all the notices that refer to this License, so
-that they refer to the ordinary GNU General Public License, version 2,
-instead of to this License. (If a newer version than version 2 of the
-ordinary GNU General Public License has appeared, then you can specify
-that version instead if you wish.) Do not make any other change in
-these notices.
-
- Once this change is made in a given copy, it is irreversible for
-that copy, so the ordinary GNU General Public License applies to all
-subsequent copies and derivative works made from that copy.
-
- This option is useful when you wish to copy part of the code of
-the Library into a program that is not a library.
-
- 4. You may copy and distribute the Library (or a portion or
-derivative of it, under Section 2) in object code or executable form
-under the terms of Sections 1 and 2 above provided that you accompany
-it with the complete corresponding machine-readable source code, which
-must be distributed under the terms of Sections 1 and 2 above on a
-medium customarily used for software interchange.
-
- If distribution of object code is made by offering access to copy
-from a designated place, then offering equivalent access to copy the
-source code from the same place satisfies the requirement to
-distribute the source code, even though third parties are not
-compelled to copy the source along with the object code.
-
- 5. A program that contains no derivative of any portion of the
-Library, but is designed to work with the Library by being compiled or
-linked with it, is called a "work that uses the Library". Such a
-work, in isolation, is not a derivative work of the Library, and
-therefore falls outside the scope of this License.
-
- However, linking a "work that uses the Library" with the Library
-creates an executable that is a derivative of the Library (because it
-contains portions of the Library), rather than a "work that uses the
-library". The executable is therefore covered by this License.
-Section 6 states terms for distribution of such executables.
-
- When a "work that uses the Library" uses material from a header file
-that is part of the Library, the object code for the work may be a
-derivative work of the Library even though the source code is not.
-Whether this is true is especially significant if the work can be
-linked without the Library, or if the work is itself a library. The
-threshold for this to be true is not precisely defined by law.
-
- If such an object file uses only numerical parameters, data
-structure layouts and accessors, and small macros and small inline
-functions (ten lines or less in length), then the use of the object
-file is unrestricted, regardless of whether it is legally a derivative
-work. (Executables containing this object code plus portions of the
-Library will still fall under Section 6.)
-
- Otherwise, if the work is a derivative of the Library, you may
-distribute the object code for the work under the terms of Section 6.
-Any executables containing that work also fall under Section 6,
-whether or not they are linked directly with the Library itself.
-
- 6. As an exception to the Sections above, you may also combine or
-link a "work that uses the Library" with the Library to produce a
-work containing portions of the Library, and distribute that work
-under terms of your choice, provided that the terms permit
-modification of the work for the customer's own use and reverse
-engineering for debugging such modifications.
-
- You must give prominent notice with each copy of the work that the
-Library is used in it and that the Library and its use are covered by
-this License. You must supply a copy of this License. If the work
-during execution displays copyright notices, you must include the
-copyright notice for the Library among them, as well as a reference
-directing the user to the copy of this License. Also, you must do one
-of these things:
-
- a) Accompany the work with the complete corresponding
- machine-readable source code for the Library including whatever
- changes were used in the work (which must be distributed under
- Sections 1 and 2 above); and, if the work is an executable linked
- with the Library, with the complete machine-readable "work that
- uses the Library", as object code and/or source code, so that the
- user can modify the Library and then relink to produce a modified
- executable containing the modified Library. (It is understood
- that the user who changes the contents of definitions files in the
- Library will not necessarily be able to recompile the application
- to use the modified definitions.)
-
- b) Use a suitable shared library mechanism for linking with the
- Library. A suitable mechanism is one that (1) uses at run time a
- copy of the library already present on the user's computer system,
- rather than copying library functions into the executable, and (2)
- will operate properly with a modified version of the library, if
- the user installs one, as long as the modified version is
- interface-compatible with the version that the work was made with.
-
- c) Accompany the work with a written offer, valid for at
- least three years, to give the same user the materials
- specified in Subsection 6a, above, for a charge no more
- than the cost of performing this distribution.
-
- d) If distribution of the work is made by offering access to copy
- from a designated place, offer equivalent access to copy the above
- specified materials from the same place.
-
- e) Verify that the user has already received a copy of these
- materials or that you have already sent this user a copy.
-
- For an executable, the required form of the "work that uses the
-Library" must include any data and utility programs needed for
-reproducing the executable from it. However, as a special exception,
-the materials to be distributed need not include anything that is
-normally distributed (in either source or binary form) with the major
-components (compiler, kernel, and so on) of the operating system on
-which the executable runs, unless that component itself accompanies
-the executable.
-
- It may happen that this requirement contradicts the license
-restrictions of other proprietary libraries that do not normally
-accompany the operating system. Such a contradiction means you cannot
-use both them and the Library together in an executable that you
-distribute.
-
- 7. You may place library facilities that are a work based on the
-Library side-by-side in a single library together with other library
-facilities not covered by this License, and distribute such a combined
-library, provided that the separate distribution of the work based on
-the Library and of the other library facilities is otherwise
-permitted, and provided that you do these two things:
-
- a) Accompany the combined library with a copy of the same work
- based on the Library, uncombined with any other library
- facilities. This must be distributed under the terms of the
- Sections above.
-
- b) Give prominent notice with the combined library of the fact
- that part of it is a work based on the Library, and explaining
- where to find the accompanying uncombined form of the same work.
-
- 8. You may not copy, modify, sublicense, link with, or distribute
-the Library except as expressly provided under this License. Any
-attempt otherwise to copy, modify, sublicense, link with, or
-distribute the Library is void, and will automatically terminate your
-rights under this License. However, parties who have received copies,
-or rights, from you under this License will not have their licenses
-terminated so long as such parties remain in full compliance.
-
- 9. You are not required to accept this License, since you have not
-signed it. However, nothing else grants you permission to modify or
-distribute the Library or its derivative works. These actions are
-prohibited by law if you do not accept this License. Therefore, by
-modifying or distributing the Library (or any work based on the
-Library), you indicate your acceptance of this License to do so, and
-all its terms and conditions for copying, distributing or modifying
-the Library or works based on it.
-
- 10. Each time you redistribute the Library (or any work based on the
-Library), the recipient automatically receives a license from the
-original licensor to copy, distribute, link with or modify the Library
-subject to these terms and conditions. You may not impose any further
-restrictions on the recipients' exercise of the rights granted herein.
-You are not responsible for enforcing compliance by third parties with
-this License.
-
- 11. If, as a consequence of a court judgment or allegation of patent
-infringement or for any other reason (not limited to patent issues),
-conditions are imposed on you (whether by court order, agreement or
-otherwise) that contradict the conditions of this License, they do not
-excuse you from the conditions of this License. If you cannot
-distribute so as to satisfy simultaneously your obligations under this
-License and any other pertinent obligations, then as a consequence you
-may not distribute the Library at all. For example, if a patent
-license would not permit royalty-free redistribution of the Library by
-all those who receive copies directly or indirectly through you, then
-the only way you could satisfy both it and this License would be to
-refrain entirely from distribution of the Library.
-
-If any portion of this section is held invalid or unenforceable under any
-particular circumstance, the balance of the section is intended to apply,
-and the section as a whole is intended to apply in other circumstances.
-
-It is not the purpose of this section to induce you to infringe any
-patents or other property right claims or to contest validity of any
-such claims; this section has the sole purpose of protecting the
-integrity of the free software distribution system which is
-implemented by public license practices. Many people have made
-generous contributions to the wide range of software distributed
-through that system in reliance on consistent application of that
-system; it is up to the author/donor to decide if he or she is willing
-to distribute software through any other system and a licensee cannot
-impose that choice.
-
-This section is intended to make thoroughly clear what is believed to
-be a consequence of the rest of this License.
-
- 12. If the distribution and/or use of the Library is restricted in
-certain countries either by patents or by copyrighted interfaces, the
-original copyright holder who places the Library under this License may add
-an explicit geographical distribution limitation excluding those countries,
-so that distribution is permitted only in or among countries not thus
-excluded. In such case, this License incorporates the limitation as if
-written in the body of this License.
-
- 13. The Free Software Foundation may publish revised and/or new
-versions of the Lesser General Public License from time to time.
-Such new versions will be similar in spirit to the present version,
-but may differ in detail to address new problems or concerns.
-
-Each version is given a distinguishing version number. If the Library
-specifies a version number of this License which applies to it and
-"any later version", you have the option of following the terms and
-conditions either of that version or of any later version published by
-the Free Software Foundation. If the Library does not specify a
-license version number, you may choose any version ever published by
-the Free Software Foundation.
-
- 14. If you wish to incorporate parts of the Library into other free
-programs whose distribution conditions are incompatible with these,
-write to the author to ask for permission. For software which is
-copyrighted by the Free Software Foundation, write to the Free
-Software Foundation; we sometimes make exceptions for this. Our
-decision will be guided by the two goals of preserving the free status
-of all derivatives of our free software and of promoting the sharing
-and reuse of software generally.
-
- NO WARRANTY
-
- 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO
-WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW.
-EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR
-OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY
-KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE
-LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME
-THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
-
- 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN
-WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY
-AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU
-FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR
-CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE
-LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING
-RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A
-FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF
-SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
-DAMAGES.
-
- END OF TERMS AND CONDITIONS
\ No newline at end of file
diff --git a/LICENSES/BSD-3-Clause.txt b/LICENSES/BSD-3-Clause.txt
new file mode 100644
index 0000000..ea890af
--- /dev/null
+++ b/LICENSES/BSD-3-Clause.txt
@@ -0,0 +1,11 @@
+Copyright (c) <year> <owner>.
+
+Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/LICENSES/CC-BY-SA-4.0.txt b/LICENSES/CC-BY-SA-4.0.txt
new file mode 100644
index 0000000..835a683
--- /dev/null
+++ b/LICENSES/CC-BY-SA-4.0.txt
@@ -0,0 +1,170 @@
+Creative Commons Attribution-ShareAlike 4.0 International
+
+ Creative Commons Corporation (“Creative Commons”) is not a law firm and does not provide legal services or legal advice. Distribution of Creative Commons public licenses does not create a lawyer-client or other relationship. Creative Commons makes its licenses and related information available on an “as-is” basis. Creative Commons gives no warranties regarding its licenses, any material licensed under their terms and conditions, or any related information. Creative Commons disclaims all liability for damages resulting from their use to the fullest extent possible.
+
+Using Creative Commons Public Licenses
+
+Creative Commons public licenses provide a standard set of terms and conditions that creators and other rights holders may use to share original works of authorship and other material subject to copyright and certain other rights specified in the public license below. The following considerations are for informational purposes only, are not exhaustive, and do not form part of our licenses.
+
+Considerations for licensors: Our public licenses are intended for use by those authorized to give the public permission to use material in ways otherwise restricted by copyright and certain other rights. Our licenses are irrevocable. Licensors should read and understand the terms and conditions of the license they choose before applying it. Licensors should also secure all rights necessary before applying our licenses so that the public can reuse the material as expected. Licensors should clearly mark any material not subject to the license. This includes other CC-licensed material, or material used under an exception or limitation to copyright. More considerations for licensors.
+
+Considerations for the public: By using one of our public licenses, a licensor grants the public permission to use the licensed material under specified terms and conditions. If the licensor’s permission is not necessary for any reason–for example, because of any applicable exception or limitation to copyright–then that use is not regulated by the license. Our licenses grant only permissions under copyright and certain other rights that a licensor has authority to grant. Use of the licensed material may still be restricted for other reasons, including because others have copyright or other rights in the material. A licensor may make special requests, such as asking that all changes be marked or described.
+
+Although not required by our licenses, you are encouraged to respect those requests where reasonable. More considerations for the public.
+
+Creative Commons Attribution-ShareAlike 4.0 International Public License
+
+By exercising the Licensed Rights (defined below), You accept and agree to be bound by the terms and conditions of this Creative Commons Attribution-ShareAlike 4.0 International Public License ("Public License"). To the extent this Public License may be interpreted as a contract, You are granted the Licensed Rights in consideration of Your acceptance of these terms and conditions, and the Licensor grants You such rights in consideration of benefits the Licensor receives from making the Licensed Material available under these terms and conditions.
+
+Section 1 – Definitions.
+
+ a. Adapted Material means material subject to Copyright and Similar Rights that is derived from or based upon the Licensed Material and in which the Licensed Material is translated, altered, arranged, transformed, or otherwise modified in a manner requiring permission under the Copyright and Similar Rights held by the Licensor. For purposes of this Public License, where the Licensed Material is a musical work, performance, or sound recording, Adapted Material is always produced where the Licensed Material is synched in timed relation with a moving image.
+
+ b. Adapter's License means the license You apply to Your Copyright and Similar Rights in Your contributions to Adapted Material in accordance with the terms and conditions of this Public License.
+
+ c. BY-SA Compatible License means a license listed at creativecommons.org/compatiblelicenses, approved by Creative Commons as essentially the equivalent of this Public License.
+
+ d. Copyright and Similar Rights means copyright and/or similar rights closely related to copyright including, without limitation, performance, broadcast, sound recording, and Sui Generis Database Rights, without regard to how the rights are labeled or categorized. For purposes of this Public License, the rights specified in Section 2(b)(1)-(2) are not Copyright and Similar Rights.
+
+ e. Effective Technological Measures means those measures that, in the absence of proper authority, may not be circumvented under laws fulfilling obligations under Article 11 of the WIPO Copyright Treaty adopted on December 20, 1996, and/or similar international agreements.
+
+ f. Exceptions and Limitations means fair use, fair dealing, and/or any other exception or limitation to Copyright and Similar Rights that applies to Your use of the Licensed Material.
+
+ g. License Elements means the license attributes listed in the name of a Creative Commons Public License. The License Elements of this Public License are Attribution and ShareAlike.
+
+ h. Licensed Material means the artistic or literary work, database, or other material to which the Licensor applied this Public License.
+
+ i. Licensed Rights means the rights granted to You subject to the terms and conditions of this Public License, which are limited to all Copyright and Similar Rights that apply to Your use of the Licensed Material and that the Licensor has authority to license.
+
+ j. Licensor means the individual(s) or entity(ies) granting rights under this Public License.
+
+ k. Share means to provide material to the public by any means or process that requires permission under the Licensed Rights, such as reproduction, public display, public performance, distribution, dissemination, communication, or importation, and to make material available to the public including in ways that members of the public may access the material from a place and at a time individually chosen by them.
+
+ l. Sui Generis Database Rights means rights other than copyright resulting from Directive 96/9/EC of the European Parliament and of the Council of 11 March 1996 on the legal protection of databases, as amended and/or succeeded, as well as other essentially equivalent rights anywhere in the world.
+
+ m. You means the individual or entity exercising the Licensed Rights under this Public License. Your has a corresponding meaning.
+
+Section 2 – Scope.
+
+ a. License grant.
+
+ 1. Subject to the terms and conditions of this Public License, the Licensor hereby grants You a worldwide, royalty-free, non-sublicensable, non-exclusive, irrevocable license to exercise the Licensed Rights in the Licensed Material to:
+
+ A. reproduce and Share the Licensed Material, in whole or in part; and
+
+ B. produce, reproduce, and Share Adapted Material.
+
+ 2. Exceptions and Limitations. For the avoidance of doubt, where Exceptions and Limitations apply to Your use, this Public License does not apply, and You do not need to comply with its terms and conditions.
+
+ 3. Term. The term of this Public License is specified in Section 6(a).
+
+ 4. Media and formats; technical modifications allowed. The Licensor authorizes You to exercise the Licensed Rights in all media and formats whether now known or hereafter created, and to make technical modifications necessary to do so. The Licensor waives and/or agrees not to assert any right or authority to forbid You from making technical modifications necessary to exercise the Licensed Rights, including technical modifications necessary to circumvent Effective Technological Measures. For purposes of this Public License, simply making modifications authorized by this Section 2(a)(4) never produces Adapted Material.
+
+ 5. Downstream recipients.
+
+ A. Offer from the Licensor – Licensed Material. Every recipient of the Licensed Material automatically receives an offer from the Licensor to exercise the Licensed Rights under the terms and conditions of this Public License.
+
+ B. Additional offer from the Licensor – Adapted Material. Every recipient of Adapted Material from You automatically receives an offer from the Licensor to exercise the Licensed Rights in the Adapted Material under the conditions of the Adapter’s License You apply.
+
+ C. No downstream restrictions. You may not offer or impose any additional or different terms or conditions on, or apply any Effective Technological Measures to, the Licensed Material if doing so restricts exercise of the Licensed Rights by any recipient of the Licensed Material.
+
+ 6. No endorsement. Nothing in this Public License constitutes or may be construed as permission to assert or imply that You are, or that Your use of the Licensed Material is, connected with, or sponsored, endorsed, or granted official status by, the Licensor or others designated to receive attribution as provided in Section 3(a)(1)(A)(i).
+
+ b. Other rights.
+
+ 1. Moral rights, such as the right of integrity, are not licensed under this Public License, nor are publicity, privacy, and/or other similar personality rights; however, to the extent possible, the Licensor waives and/or agrees not to assert any such rights held by the Licensor to the limited extent necessary to allow You to exercise the Licensed Rights, but not otherwise.
+
+ 2. Patent and trademark rights are not licensed under this Public License.
+
+ 3. To the extent possible, the Licensor waives any right to collect royalties from You for the exercise of the Licensed Rights, whether directly or through a collecting society under any voluntary or waivable statutory or compulsory licensing scheme. In all other cases the Licensor expressly reserves any right to collect such royalties.
+
+Section 3 – License Conditions.
+
+Your exercise of the Licensed Rights is expressly made subject to the following conditions.
+
+ a. Attribution.
+
+ 1. If You Share the Licensed Material (including in modified form), You must:
+
+ A. retain the following if it is supplied by the Licensor with the Licensed Material:
+
+ i. identification of the creator(s) of the Licensed Material and any others designated to receive attribution, in any reasonable manner requested by the Licensor (including by pseudonym if designated);
+
+ ii. a copyright notice;
+
+ iii. a notice that refers to this Public License;
+
+ iv. a notice that refers to the disclaimer of warranties;
+
+ v. a URI or hyperlink to the Licensed Material to the extent reasonably practicable;
+
+ B. indicate if You modified the Licensed Material and retain an indication of any previous modifications; and
+
+ C. indicate the Licensed Material is licensed under this Public License, and include the text of, or the URI or hyperlink to, this Public License.
+
+ 2. You may satisfy the conditions in Section 3(a)(1) in any reasonable manner based on the medium, means, and context in which You Share the Licensed Material. For example, it may be reasonable to satisfy the conditions by providing a URI or hyperlink to a resource that includes the required information.
+
+ 3. If requested by the Licensor, You must remove any of the information required by Section 3(a)(1)(A) to the extent reasonably practicable.
+
+ b. ShareAlike.In addition to the conditions in Section 3(a), if You Share Adapted Material You produce, the following conditions also apply.
+
+ 1. The Adapter’s License You apply must be a Creative Commons license with the same License Elements, this version or later, or a BY-SA Compatible License.
+
+ 2. You must include the text of, or the URI or hyperlink to, the Adapter's License You apply. You may satisfy this condition in any reasonable manner based on the medium, means, and context in which You Share Adapted Material.
+
+ 3. You may not offer or impose any additional or different terms or conditions on, or apply any Effective Technological Measures to, Adapted Material that restrict exercise of the rights granted under the Adapter's License You apply.
+
+Section 4 – Sui Generis Database Rights.
+
+Where the Licensed Rights include Sui Generis Database Rights that apply to Your use of the Licensed Material:
+
+ a. for the avoidance of doubt, Section 2(a)(1) grants You the right to extract, reuse, reproduce, and Share all or a substantial portion of the contents of the database;
+
+ b. if You include all or a substantial portion of the database contents in a database in which You have Sui Generis Database Rights, then the database in which You have Sui Generis Database Rights (but not its individual contents) is Adapted Material, including for purposes of Section 3(b); and
+
+ c. You must comply with the conditions in Section 3(a) if You Share all or a substantial portion of the contents of the database.
+For the avoidance of doubt, this Section 4 supplements and does not replace Your obligations under this Public License where the Licensed Rights include other Copyright and Similar Rights.
+
+Section 5 – Disclaimer of Warranties and Limitation of Liability.
+
+ a. Unless otherwise separately undertaken by the Licensor, to the extent possible, the Licensor offers the Licensed Material as-is and as-available, and makes no representations or warranties of any kind concerning the Licensed Material, whether express, implied, statutory, or other. This includes, without limitation, warranties of title, merchantability, fitness for a particular purpose, non-infringement, absence of latent or other defects, accuracy, or the presence or absence of errors, whether or not known or discoverable. Where disclaimers of warranties are not allowed in full or in part, this disclaimer may not apply to You.
+
+ b. To the extent possible, in no event will the Licensor be liable to You on any legal theory (including, without limitation, negligence) or otherwise for any direct, special, indirect, incidental, consequential, punitive, exemplary, or other losses, costs, expenses, or damages arising out of this Public License or use of the Licensed Material, even if the Licensor has been advised of the possibility of such losses, costs, expenses, or damages. Where a limitation of liability is not allowed in full or in part, this limitation may not apply to You.
+
+ c. The disclaimer of warranties and limitation of liability provided above shall be interpreted in a manner that, to the extent possible, most closely approximates an absolute disclaimer and waiver of all liability.
+
+Section 6 – Term and Termination.
+
+ a. This Public License applies for the term of the Copyright and Similar Rights licensed here. However, if You fail to comply with this Public License, then Your rights under this Public License terminate automatically.
+
+ b. Where Your right to use the Licensed Material has terminated under Section 6(a), it reinstates:
+
+ 1. automatically as of the date the violation is cured, provided it is cured within 30 days of Your discovery of the violation; or
+
+ 2. upon express reinstatement by the Licensor.
+
+ c. For the avoidance of doubt, this Section 6(b) does not affect any right the Licensor may have to seek remedies for Your violations of this Public License.
+
+ d. For the avoidance of doubt, the Licensor may also offer the Licensed Material under separate terms or conditions or stop distributing the Licensed Material at any time; however, doing so will not terminate this Public License.
+
+ e. Sections 1, 5, 6, 7, and 8 survive termination of this Public License.
+
+Section 7 – Other Terms and Conditions.
+
+ a. The Licensor shall not be bound by any additional or different terms or conditions communicated by You unless expressly agreed.
+
+ b. Any arrangements, understandings, or agreements regarding the Licensed Material not stated herein are separate from and independent of the terms and conditions of this Public License.
+
+Section 8 – Interpretation.
+
+ a. For the avoidance of doubt, this Public License does not, and shall not be interpreted to, reduce, limit, restrict, or impose conditions on any use of the Licensed Material that could lawfully be made without permission under this Public License.
+
+ b. To the extent possible, if any provision of this Public License is deemed unenforceable, it shall be automatically reformed to the minimum extent necessary to make it enforceable. If the provision cannot be reformed, it shall be severed from this Public License without affecting the enforceability of the remaining terms and conditions.
+
+ c. No term or condition of this Public License will be waived and no failure to comply consented to unless expressly agreed to by the Licensor.
+
+ d. Nothing in this Public License constitutes or may be interpreted as a limitation upon, or waiver of, any privileges and immunities that apply to the Licensor or You, including from the legal processes of any jurisdiction or authority.
+
+Creative Commons is not a party to its public licenses. Notwithstanding, Creative Commons may elect to apply one of its public licenses to material it publishes and in those instances will be considered the “Licensor.” Except for the limited purpose of indicating that material is shared under a Creative Commons public license or as otherwise permitted by the Creative Commons policies published at creativecommons.org/policies, Creative Commons does not authorize the use of the trademark “Creative Commons” or any other trademark or logo of Creative Commons without its prior written consent including, without limitation, in connection with any unauthorized modifications to any of its public licenses or any other arrangements, understandings, or agreements concerning use of licensed material. For the avoidance of doubt, this paragraph does not form part of the public licenses.
+
+Creative Commons may be contacted at creativecommons.org.
diff --git a/LICENSES/LGPL-2.1-only.txt b/LICENSES/LGPL-2.1-only.txt
new file mode 100644
index 0000000..c9aa530
--- /dev/null
+++ b/LICENSES/LGPL-2.1-only.txt
@@ -0,0 +1,175 @@
+GNU LESSER GENERAL PUBLIC LICENSE
+
+Version 2.1, February 1999
+
+Copyright (C) 1991, 1999 Free Software Foundation, Inc.
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+
+Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed.
+
+[This is the first released version of the Lesser GPL. It also counts as the successor of the GNU Library Public License, version 2, hence the version number 2.1.]
+
+Preamble
+
+The licenses for most software are designed to take away your freedom to share and change it. By contrast, the GNU General Public Licenses are intended to guarantee your freedom to share and change free software--to make sure the software is free for all its users.
+
+This license, the Lesser General Public License, applies to some specially designated software packages--typically libraries--of the Free Software Foundation and other authors who decide to use it. You can use it too, but we suggest you first think carefully about whether this license or the ordinary General Public License is the better strategy to use in any particular case, based on the explanations below.
+
+When we speak of free software, we are referring to freedom of use, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for this service if you wish); that you receive source code or can get it if you want it; that you can change the software and use pieces of it in new free programs; and that you are informed that you can do these things.
+
+To protect your rights, we need to make restrictions that forbid distributors to deny you these rights or to ask you to surrender these rights. These restrictions translate to certain responsibilities for you if you distribute copies of the library or if you modify it.
+
+For example, if you distribute copies of the library, whether gratis or for a fee, you must give the recipients all the rights that we gave you. You must make sure that they, too, receive or can get the source code. If you link other code with the library, you must provide complete object files to the recipients, so that they can relink them with the library after making changes to the library and recompiling it. And you must show them these terms so they know their rights.
+
+We protect your rights with a two-step method: (1) we copyright the library, and (2) we offer you this license, which gives you legal permission to copy, distribute and/or modify the library.
+
+To protect each distributor, we want to make it very clear that there is no warranty for the free library. Also, if the library is modified by someone else and passed on, the recipients should know that what they have is not the original version, so that the original author's reputation will not be affected by problems that might be introduced by others.
+
+Finally, software patents pose a constant threat to the existence of any free program. We wish to make sure that a company cannot effectively restrict the users of a free program by obtaining a restrictive license from a patent holder. Therefore, we insist that any patent license obtained for a version of the library must be consistent with the full freedom of use specified in this license.
+
+Most GNU software, including some libraries, is covered by the ordinary GNU General Public License. This license, the GNU Lesser General Public License, applies to certain designated libraries, and is quite different from the ordinary General Public License. We use this license for certain libraries in order to permit linking those libraries into non-free programs.
+
+When a program is linked with a library, whether statically or using a shared library, the combination of the two is legally speaking a combined work, a derivative of the original library. The ordinary General Public License therefore permits such linking only if the entire combination fits its criteria of freedom. The Lesser General Public License permits more lax criteria for linking other code with the library.
+
+We call this license the "Lesser" General Public License because it does Less to protect the user's freedom than the ordinary General Public License. It also provides other free software developers Less of an advantage over competing non-free programs. These disadvantages are the reason we use the ordinary General Public License for many libraries. However, the Lesser license provides advantages in certain special circumstances.
+
+For example, on rare occasions, there may be a special need to encourage the widest possible use of a certain library, so that it becomes a de-facto standard. To achieve this, non-free programs must be allowed to use the library. A more frequent case is that a free library does the same job as widely used non-free libraries. In this case, there is little to gain by limiting the free library to free software only, so we use the Lesser General Public License.
+
+In other cases, permission to use a particular library in non-free programs enables a greater number of people to use a large body of free software. For example, permission to use the GNU C Library in non-free programs enables many more people to use the whole GNU operating system, as well as its variant, the GNU/Linux operating system.
+
+Although the Lesser General Public License is Less protective of the users' freedom, it does ensure that the user of a program that is linked with the Library has the freedom and the wherewithal to run that program using a modified version of the Library.
+
+The precise terms and conditions for copying, distribution and modification follow. Pay close attention to the difference between a "work based on the library" and a "work that uses the library". The former contains code derived from the library, whereas the latter must be combined with the library in order to run.
+
+TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+0. This License Agreement applies to any software library or other program which contains a notice placed by the copyright holder or other authorized party saying it may be distributed under the terms of this Lesser General Public License (also called "this License"). Each licensee is addressed as "you".
+
+A "library" means a collection of software functions and/or data prepared so as to be conveniently linked with application programs (which use some of those functions and data) to form executables.
+
+The "Library", below, refers to any such software library or work which has been distributed under these terms. A "work based on the Library" means either the Library or any derivative work under copyright law: that is to say, a work containing the Library or a portion of it, either verbatim or with modifications and/or translated straightforwardly into another language. (Hereinafter, translation is included without limitation in the term "modification".)
+
+"Source code" for a work means the preferred form of the work for making modifications to it. For a library, complete source code means all the source code for all modules it contains, plus any associated interface definition files, plus the scripts used to control compilation and installation of the library.
+
+Activities other than copying, distribution and modification are not covered by this License; they are outside its scope. The act of running a program using the Library is not restricted, and output from such a program is covered only if its contents constitute a work based on the Library (independent of the use of the Library in a tool for writing it). Whether that is true depends on what the Library does and what the program that uses the Library does.
+
+1. You may copy and distribute verbatim copies of the Library's complete source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice and disclaimer of warranty; keep intact all the notices that refer to this License and to the absence of any warranty; and distribute a copy of this License along with the Library.
+
+You may charge a fee for the physical act of transferring a copy, and you may at your option offer warranty protection in exchange for a fee.
+
+2. You may modify your copy or copies of the Library or any portion of it, thus forming a work based on the Library, and copy and distribute such modifications or work under the terms of Section 1 above, provided that you also meet all of these conditions:
+
+ a) The modified work must itself be a software library.
+
+ b) You must cause the files modified to carry prominent notices stating that you changed the files and the date of any change.
+
+ c) You must cause the whole of the work to be licensed at no charge to all third parties under the terms of this License.
+
+ d) If a facility in the modified Library refers to a function or a table of data to be supplied by an application program that uses the facility, other than as an argument passed when the facility is invoked, then you must make a good faith effort to ensure that, in the event an application does not supply such function or table, the facility still operates, and performs whatever part of its purpose remains meaningful.
+
+(For example, a function in a library to compute square roots has a purpose that is entirely well-defined independent of the application. Therefore, Subsection 2d requires that any application-supplied function or table used by this function must be optional: if the application does not supply it, the square root function must still compute square roots.)
+
+These requirements apply to the modified work as a whole. If identifiable sections of that work are not derived from the Library, and can be reasonably considered independent and separate works in themselves, then this License, and its terms, do not apply to those sections when you distribute them as separate works. But when you distribute the same sections as part of a whole which is a work based on the Library, the distribution of the whole must be on the terms of this License, whose permissions for other licensees extend to the entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest your rights to work written entirely by you; rather, the intent is to exercise the right to control the distribution of derivative or collective works based on the Library.
+
+In addition, mere aggregation of another work not based on the Library with the Library (or with a work based on the Library) on a volume of a storage or distribution medium does not bring the other work under the scope of this License.
+
+3. You may opt to apply the terms of the ordinary GNU General Public License instead of this License to a given copy of the Library. To do this, you must alter all the notices that refer to this License, so that they refer to the ordinary GNU General Public License, version 2, instead of to this License. (If a newer version than version 2 of the ordinary GNU General Public License has appeared, then you can specify that version instead if you wish.) Do not make any other change in these notices.
+
+Once this change is made in a given copy, it is irreversible for that copy, so the ordinary GNU General Public License applies to all subsequent copies and derivative works made from that copy.
+
+This option is useful when you wish to copy part of the code of the Library into a program that is not a library.
+
+4. You may copy and distribute the Library (or a portion or derivative of it, under Section 2) in object code or executable form under the terms of Sections 1 and 2 above provided that you accompany it with the complete corresponding machine-readable source code, which must be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange.
+
+If distribution of object code is made by offering access to copy from a designated place, then offering equivalent access to copy the source code from the same place satisfies the requirement to distribute the source code, even though third parties are not compelled to copy the source along with the object code.
+
+5. A program that contains no derivative of any portion of the Library, but is designed to work with the Library by being compiled or linked with it, is called a "work that uses the Library". Such a work, in isolation, is not a derivative work of the Library, and therefore falls outside the scope of this License.
+
+However, linking a "work that uses the Library" with the Library creates an executable that is a derivative of the Library (because it contains portions of the Library), rather than a "work that uses the library". The executable is therefore covered by this License. Section 6 states terms for distribution of such executables.
+
+When a "work that uses the Library" uses material from a header file that is part of the Library, the object code for the work may be a derivative work of the Library even though the source code is not. Whether this is true is especially significant if the work can be linked without the Library, or if the work is itself a library. The threshold for this to be true is not precisely defined by law.
+
+If such an object file uses only numerical parameters, data structure layouts and accessors, and small macros and small inline functions (ten lines or less in length), then the use of the object file is unrestricted, regardless of whether it is legally a derivative work. (Executables containing this object code plus portions of the Library will still fall under Section 6.)
+
+Otherwise, if the work is a derivative of the Library, you may distribute the object code for the work under the terms of Section 6. Any executables containing that work also fall under Section 6, whether or not they are linked directly with the Library itself.
+
+6. As an exception to the Sections above, you may also combine or link a "work that uses the Library" with the Library to produce a work containing portions of the Library, and distribute that work under terms of your choice, provided that the terms permit modification of the work for the customer's own use and reverse engineering for debugging such modifications.
+
+You must give prominent notice with each copy of the work that the Library is used in it and that the Library and its use are covered by this License. You must supply a copy of this License. If the work during execution displays copyright notices, you must include the copyright notice for the Library among them, as well as a reference directing the user to the copy of this License. Also, you must do one of these things:
+
+ a) Accompany the work with the complete corresponding machine-readable source code for the Library including whatever changes were used in the work (which must be distributed under Sections 1 and 2 above); and, if the work is an executable linked with the Library, with the complete machine-readable "work that uses the Library", as object code and/or source code, so that the user can modify the Library and then relink to produce a modified executable containing the modified Library. (It is understood that the user who changes the contents of definitions files in the Library will not necessarily be able to recompile the application to use the modified definitions.)
+
+ b) Use a suitable shared library mechanism for linking with the Library. A suitable mechanism is one that (1) uses at run time a copy of the library already present on the user's computer system, rather than copying library functions into the executable, and (2) will operate properly with a modified version of the library, if the user installs one, as long as the modified version is interface-compatible with the version that the work was made with.
+
+ c) Accompany the work with a written offer, valid for at least three years, to give the same user the materials specified in Subsection 6a, above, for a charge no more than the cost of performing this distribution.
+
+ d) If distribution of the work is made by offering access to copy from a designated place, offer equivalent access to copy the above specified materials from the same place.
+
+ e) Verify that the user has already received a copy of these materials or that you have already sent this user a copy.
+
+For an executable, the required form of the "work that uses the Library" must include any data and utility programs needed for reproducing the executable from it. However, as a special exception, the materials to be distributed need not include anything that is normally distributed (in either source or binary form) with the major components (compiler, kernel, and so on) of the operating system on which the executable runs, unless that component itself accompanies the executable.
+
+It may happen that this requirement contradicts the license restrictions of other proprietary libraries that do not normally accompany the operating system. Such a contradiction means you cannot use both them and the Library together in an executable that you distribute.
+
+7. You may place library facilities that are a work based on the Library side-by-side in a single library together with other library facilities not covered by this License, and distribute such a combined library, provided that the separate distribution of the work based on the Library and of the other library facilities is otherwise permitted, and provided that you do these two things:
+
+ a) Accompany the combined library with a copy of the same work based on the Library, uncombined with any other library facilities. This must be distributed under the terms of the Sections above.
+
+ b) Give prominent notice with the combined library of the fact that part of it is a work based on the Library, and explaining where to find the accompanying uncombined form of the same work.
+
+8. You may not copy, modify, sublicense, link with, or distribute the Library except as expressly provided under this License. Any attempt otherwise to copy, modify, sublicense, link with, or distribute the Library is void, and will automatically terminate your rights under this License. However, parties who have received copies, or rights, from you under this License will not have their licenses terminated so long as such parties remain in full compliance.
+
+9. You are not required to accept this License, since you have not signed it. However, nothing else grants you permission to modify or distribute the Library or its derivative works. These actions are prohibited by law if you do not accept this License. Therefore, by modifying or distributing the Library (or any work based on the Library), you indicate your acceptance of this License to do so, and all its terms and conditions for copying, distributing or modifying the Library or works based on it.
+
+10. Each time you redistribute the Library (or any work based on the Library), the recipient automatically receives a license from the original licensor to copy, distribute, link with or modify the Library subject to these terms and conditions. You may not impose any further restrictions on the recipients' exercise of the rights granted herein. You are not responsible for enforcing compliance by third parties with this License.
+
+11. If, as a consequence of a court judgment or allegation of patent infringement or for any other reason (not limited to patent issues), conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot distribute so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not distribute the Library at all. For example, if a patent license would not permit royalty-free redistribution of the Library by all those who receive copies directly or indirectly through you, then the only way you could satisfy both it and this License would be to refrain entirely from distribution of the Library.
+
+If any portion of this section is held invalid or unenforceable under any particular circumstance, the balance of the section is intended to apply, and the section as a whole is intended to apply in other circumstances.
+
+It is not the purpose of this section to induce you to infringe any patents or other property right claims or to contest validity of any such claims; this section has the sole purpose of protecting the integrity of the free software distribution system which is implemented by public license practices. Many people have made generous contributions to the wide range of software distributed through that system in reliance on consistent application of that system; it is up to the author/donor to decide if he or she is willing to distribute software through any other system and a licensee cannot impose that choice.
+
+This section is intended to make thoroughly clear what is believed to be a consequence of the rest of this License.
+
+12. If the distribution and/or use of the Library is restricted in certain countries either by patents or by copyrighted interfaces, the original copyright holder who places the Library under this License may add an explicit geographical distribution limitation excluding those countries, so that distribution is permitted only in or among countries not thus excluded. In such case, this License incorporates the limitation as if written in the body of this License.
+
+13. The Free Software Foundation may publish revised and/or new versions of the Lesser General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns.
+
+Each version is given a distinguishing version number. If the Library specifies a version number of this License which applies to it and "any later version", you have the option of following the terms and conditions either of that version or of any later version published by the Free Software Foundation. If the Library does not specify a license version number, you may choose any version ever published by the Free Software Foundation.
+
+14. If you wish to incorporate parts of the Library into other free programs whose distribution conditions are incompatible with these, write to the author to ask for permission. For software which is copyrighted by the Free Software Foundation, write to the Free Software Foundation; we sometimes make exceptions for this. Our decision will be guided by the two goals of preserving the free status of all derivatives of our free software and of promoting the sharing and reuse of software generally.
+
+NO WARRANTY
+
+15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+
+END OF TERMS AND CONDITIONS
+
+How to Apply These Terms to Your New Libraries
+
+If you develop a new library, and you want it to be of the greatest possible use to the public, we recommend making it free software that everyone can redistribute and change. You can do so by permitting redistribution under these terms (or, alternatively, under the terms of the ordinary General Public License).
+
+To apply these terms, attach the following notices to the library. It is safest to attach them to the start of each source file to most effectively convey the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found.
+
+ one line to give the library's name and an idea of what it does.
+ Copyright (C) year name of author
+
+ This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA Also add information on how to contact you by electronic and paper mail.
+
+You should also get your employer (if you work as a programmer) or your school, if any, to sign a "copyright disclaimer" for the library, if necessary. Here is a sample; alter the names:
+
+Yoyodyne, Inc., hereby disclaims all copyright interest in
+the library `Frob' (a library for tweaking knobs) written
+by James Random Hacker.
+
+signature of Ty Coon, 1 April 1990
+Ty Coon, President of Vice
+That's all there is to it!
diff --git a/LICENSES/MIT.txt b/LICENSES/MIT.txt
new file mode 100644
index 0000000..2071b23
--- /dev/null
+++ b/LICENSES/MIT.txt
@@ -0,0 +1,9 @@
+MIT License
+
+Copyright (c) <year> <copyright holders>
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
diff --git a/Makefile b/Makefile
index 67abc30..5851262 100644
--- a/Makefile
+++ b/Makefile
@@ -1,56 +1,60 @@
+# SPDX-FileCopyrightText: 2017-2019 myhtmlex authors <https://github.com/Overbryd/myhtmlex>
+# SPDX-FileCopyrightText: 2019-2022 Pleroma Authors <https://pleroma.social>
+# SPDX-License-Identifier: LGPL-2.1-only
+
MIX = mix
CMAKE = cmake
CNODE_CFLAGS = -g -O2 -std=c99 -pedantic -Wcomment -Wextra -Wno-old-style-declaration -Wall
# ignore unused parameter warnings
CNODE_CFLAGS += -Wno-unused-parameter
# set erlang include path
ERLANG_PATH = $(shell erl -eval 'io:format("~s", [lists:concat([code:root_dir(), "/erts-", erlang:system_info(version)])])' -s init stop -noshell)
CNODE_CFLAGS += -I$(ERLANG_PATH)/include
# expecting myhtml as a submodule in c_src/
# that way we can pin a version and package the whole thing in hex
# hex does not allow for non-app related dependencies.
LXB_PATH = c_src/lexbor
LXB_STATIC = $(LXB_PATH)/liblexbor_static.a
CNODE_CFLAGS += -I$(LXB_PATH)/source
# avoid undefined reference errors to phtread_mutex_trylock
CNODE_CFLAGS += -lpthread
# C-Node
ERL_INTERFACE = $(wildcard $(ERLANG_PATH)/../lib/erl_interface-*)
CNODE_CFLAGS += -L$(ERL_INTERFACE)/lib
CNODE_CFLAGS += -I$(ERL_INTERFACE)/include
CNODE_LDFLAGS =
ifeq ($(OTP22_DEF),YES)
CNODE_CFLAGS += -DOTP_22_OR_NEWER
else
CNODE_LDFLAGS += -lerl_interface
endif
CNODE_LDFLAGS += -lei -pthread
.PHONY: all
all: priv/fasthtml_worker
$(LXB_STATIC): $(LXB_PATH)
# Sadly, build components separately seems to sporadically fail
cd $(LXB_PATH); cmake -DLEXBOR_BUILD_SEPARATELY=OFF -DLEXBOR_BUILD_SHARED=OFF
$(MAKE) -C $(LXB_PATH)
priv/fasthtml_worker: c_src/fasthtml_worker.c $(LXB_STATIC)
mkdir -p priv
$(CC) -o $@ $< $(LXB_STATIC) $(CNODE_CFLAGS) $(CNODE_LDFLAGS)
clean: clean-myhtml
$(RM) -r priv/myhtmlex*
$(RM) priv/fasthtml_worker
$(RM) myhtmlex-*.tar
$(RM) -r package-test
clean-myhtml:
$(MAKE) -C $(MYHTML_PATH) clean
diff --git a/README.md b/README.md
index 16a31ea..391e168 100644
--- a/README.md
+++ b/README.md
@@ -1,29 +1,34 @@
+<!--
+SPDX-FileCopyrightText: 2017-2019 myhtmlex authors <https://github.com/Overbryd/myhtmlex>
+SPDX-FileCopyrightText: 2019-2022 Pleroma Authors <https://pleroma.social>
+SPDX-License-Identifier: LGPL-2.1-only
+-->
+
# FastHTML
A C Node wrapping lexborisov's [myhtml](https://github.com/lexborisov/myhtml).
Primarily used with [FastSanitize](https://git.pleroma.social/pleroma/fast_sanitize).
* Available as a hex package: `{:fast_html, "~> 2.0"}`
* [Documentation](https://hexdocs.pm/fast_html/fast_html.html)
## Benchmarks
The following table provides median times it takes to decode a string to a tree for html parsers that can be used from Elixir. Benchmarks were conducted on a machine with an `AMD Ryzen 9 3950X (32) @ 3.500GHz` CPU and 32GB of RAM. The `mix fast_html.bench` task can be used for running the benchmark by yourself.
| File/Parser | fast_html (Port) | mochiweb_html (erlang) | html5ever (Rust NIF) | Myhtmlex (NIF)¹ |
|----------------------|--------------------|------------------------|----------------------|----------------|
| document-large.html (6.9M) | 125.12 ms | 1778.34 ms | 395.21 ms | 327.17 ms |
-| document-medium.html (85K) | 1.93 ms | 12.10 ms | 4.74 ms | 3.82 ms |
| document-small.html (25K)| 0.50 ms | 2.76 ms | 1.72 ms | 1.19 ms |
| fragment-large.html (33K)| 0.93 ms | 4.78 ms | 2.34 ms | 2.15 ms |
| fragment-small.html² (757B)| 44.60 μs | 42.13 μs | 43.58 μs | 289.71 μs |
Full benchmark output can be seen in [this snippet](https://git.pleroma.social/pleroma/elixir-libraries/fast_html/snippets/3128)
1. Myhtmlex has a C-Node mode, but it wasn't benchmarked here because it segfaults on `document-large.html`
2. The slowdown on `fragment-small.html` is due to Port overhead. Unlike html5ever and Myhtmlex in NIF mode, `fast_html` has the parser process isolated and communicates with it over stdio, so even if a fatal crash in the parser happens, it won't bring down the entire VM.
## Contribution / Bug Reports
* Please make sure you do `git submodule update` after a checkout/pull
* The project aims to be fully tested
diff --git a/bench_fixtures/document-large.html.license b/bench_fixtures/document-large.html.license
new file mode 100644
index 0000000..f27d5e6
--- /dev/null
+++ b/bench_fixtures/document-large.html.license
@@ -0,0 +1,3 @@
+# https://github.com/whatwg/html/blob/main/LICENSE
+SPDX-FileCopyrightText: Copyright © WHATWG (Apple, Google, Mozilla, Microsoft).
+SPDX-License-Identifier: CC-BY-SA-4.0 AND BSD-3-Clause
diff --git a/bench_fixtures/document-medium.html b/bench_fixtures/document-medium.html
deleted file mode 100644
index 809b5b6..0000000
--- a/bench_fixtures/document-medium.html
+++ /dev/null
@@ -1,1512 +0,0 @@
-<!DOCTYPE html>
-<html class=" ">
- <head prefix="og: http://ogp.me/ns# fb: http://ogp.me/ns/fb# object: http://ogp.me/ns/object# article: http://ogp.me/ns/article# profile: http://ogp.me/ns/profile#">
- <meta charset='utf-8'>
- <meta http-equiv="X-UA-Compatible" content="IE=edge">
-
-
- <title>rgrove/sanitize at dev-3.0.0 · GitHub</title>
- <link rel="search" type="application/opensearchdescription+xml" href="/opensearch.xml" title="GitHub" />
- <link rel="fluid-icon" href="https://github.com/fluidicon.png" title="GitHub" />
- <link rel="apple-touch-icon" sizes="57x57" href="/apple-touch-icon-114.png" />
- <link rel="apple-touch-icon" sizes="114x114" href="/apple-touch-icon-114.png" />
- <link rel="apple-touch-icon" sizes="72x72" href="/apple-touch-icon-144.png" />
- <link rel="apple-touch-icon" sizes="144x144" href="/apple-touch-icon-144.png" />
- <meta property="fb:app_id" content="1401488693436528"/>
-
- <meta content="@github" name="twitter:site" /><meta content="summary" name="twitter:card" /><meta content="rgrove/sanitize" name="twitter:title" /><meta content="Whitelist-based Ruby HTML sanitizer." name="twitter:description" /><meta content="https://avatars0.githubusercontent.com/u/1465?s=400" name="twitter:image:src" />
-<meta content="GitHub" property="og:site_name" /><meta content="object" property="og:type" /><meta content="https://avatars0.githubusercontent.com/u/1465?s=400" property="og:image" /><meta content="rgrove/sanitize" property="og:title" /><meta content="https://github.com/rgrove/sanitize" property="og:url" /><meta content="Whitelist-based Ruby HTML sanitizer." property="og:description" />
-
- <link rel="assets" href="https://assets-cdn.github.com/">
- <link rel="conduit-xhr" href="https://ghconduit.com:25035/">
- <link rel="xhr-socket" href="/_sockets" />
-
- <meta name="msapplication-TileImage" content="/windows-tile.png" />
- <meta name="msapplication-TileColor" content="#ffffff" />
- <meta name="selected-link" value="repo_source" data-pjax-transient />
- <meta name="google-analytics" content="UA-3769691-2">
-
- <meta content="collector.githubapp.com" name="octolytics-host" /><meta content="collector-cdn.github.com" name="octolytics-script-host" /><meta content="github" name="octolytics-app-id" /><meta content="4C69DD2E:5170:6C7636:537BD990" name="octolytics-dimension-request_id" />
-
-
-
-
- <link rel="icon" type="image/x-icon" href="https://assets-cdn.github.com/favicon.ico" />
-
- <meta content="authenticity_token" name="csrf-param" />
-<meta content="vNsFBLnpso1tK/VKf2J+RA/S+CS/gAV56qG3exeB2dczzElTQhMtnOYq+nHZD6HlsH3FIXsvb9TUz7C8hRC+Aw==" name="csrf-token" />
-
- <link href="https://assets-cdn.github.com/assets/github-dc3b5ef1bc6b1a7195c5411444124d626d072527.css" media="all" rel="stylesheet" type="text/css" />
- <link href="https://assets-cdn.github.com/assets/github2-aaf82d4c2cd800a7e0df9bc5616889f46dc919b3.css" media="all" rel="stylesheet" type="text/css" />
-
-
-
- <meta http-equiv="x-pjax-version" content="28535d584f42419aa9cc2690ca69da48">
-
-
- <meta name="description" content="Whitelist-based Ruby HTML sanitizer." />
-
- <meta content="1465" name="octolytics-dimension-user_id" /><meta content="rgrove" name="octolytics-dimension-user_login" /><meta content="96577" name="octolytics-dimension-repository_id" /><meta content="rgrove/sanitize" name="octolytics-dimension-repository_nwo" /><meta content="true" name="octolytics-dimension-repository_public" /><meta content="false" name="octolytics-dimension-repository_is_fork" /><meta content="96577" name="octolytics-dimension-repository_network_root_id" /><meta content="rgrove/sanitize" name="octolytics-dimension-repository_network_root_nwo" />
- <link href="https://github.com/rgrove/sanitize/commits/dev-3.0.0.atom" rel="alternate" title="Recent Commits to sanitize:dev-3.0.0" type="application/atom+xml" />
-
- </head>
-
-
- <body class="logged_out env-production macintosh vis-public">
- <a href="#start-of-content" tabindex="1" class="accessibility-aid js-skip-to-content">Skip to content</a>
- <div class="wrapper">
-
-
-
-
-
-
-
- <div class="header header-logged-out">
- <div class="container clearfix">
-
- <a class="header-logo-wordmark" href="https://github.com/">
- <span class="mega-octicon octicon-logo-github"></span>
- </a>
-
- <div class="header-actions">
- <a class="button primary" href="/join">Sign up</a>
- <a class="button signin" href="/login?return_to=%2Frgrove%2Fsanitize%2Ftree%2Fdev-3.0.0">Sign in</a>
- </div>
-
- <div class="command-bar js-command-bar in-repository">
-
- <ul class="top-nav">
- <li class="explore"><a href="/explore">Explore</a></li>
- <li class="features"><a href="/features">Features</a></li>
- <li class="enterprise"><a href="https://enterprise.github.com/">Enterprise</a></li>
- <li class="blog"><a href="/blog">Blog</a></li>
- </ul>
- <form accept-charset="UTF-8" action="/search" class="command-bar-form" id="top_search_form" method="get">
-
-<div class="commandbar">
- <span class="message"></span>
- <input type="text" data-hotkey="s, /" name="q" id="js-command-bar-field" placeholder="Search or type a command" tabindex="1" autocapitalize="off"
-
-
- data-repo="rgrove/sanitize"
- data-branch="dev-3.0.0"
- data-sha="027259501299baa4767034acc355c600bd9ef720"
- >
- <div class="display hidden"></div>
-</div>
-
- <input type="hidden" name="nwo" value="rgrove/sanitize" />
-
- <div class="select-menu js-menu-container js-select-menu search-context-select-menu">
- <span class="minibutton select-menu-button js-menu-target" role="button" aria-haspopup="true">
- <span class="js-select-button">This repository</span>
- </span>
-
- <div class="select-menu-modal-holder js-menu-content js-navigation-container" aria-hidden="true">
- <div class="select-menu-modal">
-
- <div class="select-menu-item js-navigation-item js-this-repository-navigation-item selected">
- <span class="select-menu-item-icon octicon octicon-check"></span>
- <input type="radio" class="js-search-this-repository" name="search_target" value="repository" checked="checked" />
- <div class="select-menu-item-text js-select-button-text">This repository</div>
- </div> <!-- /.select-menu-item -->
-
- <div class="select-menu-item js-navigation-item js-all-repositories-navigation-item">
- <span class="select-menu-item-icon octicon octicon-check"></span>
- <input type="radio" name="search_target" value="global" />
- <div class="select-menu-item-text js-select-button-text">All repositories</div>
- </div> <!-- /.select-menu-item -->
-
- </div>
- </div>
- </div>
-
- <span class="help tooltipped tooltipped-s" aria-label="Show command bar help">
- <span class="octicon octicon-question"></span>
- </span>
-
-
- <input type="hidden" name="ref" value="cmdform">
-
-</form>
- </div>
-
- </div>
-</div>
-
-
-
- <div id="start-of-content" class="accessibility-aid"></div>
- <div class="site" itemscope itemtype="http://schema.org/WebPage">
- <div id="js-flash-container">
-
- </div>
- <div class="pagehead repohead instapaper_ignore readability-menu">
- <div class="container">
-
-
-<ul class="pagehead-actions">
-
-
- <li>
- <a href="/login?return_to=%2Frgrove%2Fsanitize"
- class="minibutton with-count star-button tooltipped tooltipped-n"
- aria-label="You must be signed in to star a repository" rel="nofollow">
- <span class="octicon octicon-star"></span>Star
- </a>
-
- <a class="social-count js-social-count" href="/rgrove/sanitize/stargazers">
- 1,152
- </a>
-
- </li>
-
- <li>
- <a href="/login?return_to=%2Frgrove%2Fsanitize"
- class="minibutton with-count js-toggler-target fork-button tooltipped tooltipped-n"
- aria-label="You must be signed in to fork a repository" rel="nofollow">
- <span class="octicon octicon-git-branch"></span>Fork
- </a>
- <a href="/rgrove/sanitize/network" class="social-count">
- 65
- </a>
- </li>
-</ul>
-
- <h1 itemscope itemtype="http://data-vocabulary.org/Breadcrumb" class="entry-title public">
- <span class="repo-label"><span>public</span></span>
- <span class="mega-octicon octicon-repo"></span>
- <span class="author"><a href="/rgrove" class="url fn" itemprop="url" rel="author"><span itemprop="title">rgrove</span></a></span><!--
- --><span class="path-divider">/</span><!--
- --><strong><a href="/rgrove/sanitize" class="js-current-repository js-repo-home-link">sanitize</a></strong>
-
- <span class="page-context-loader">
- <img alt="" height="16" src="https://assets-cdn.github.com/images/spinners/octocat-spinner-32.gif" width="16" />
- </span>
-
- </h1>
- </div><!-- /.container -->
- </div><!-- /.repohead -->
-
- <div class="container">
- <div class="repository-with-sidebar repo-container new-discussion-timeline js-new-discussion-timeline with-full-navigation ">
- <div class="repository-sidebar clearfix">
-
-
-<div class="sunken-menu vertical-right repo-nav js-repo-nav js-repository-container-pjax js-octicon-loaders">
- <div class="sunken-menu-contents">
- <ul class="sunken-menu-group">
- <li class="tooltipped tooltipped-w" aria-label="Code">
- <a href="/rgrove/sanitize/tree/dev-3.0.0" aria-label="Code" class="selected js-selected-navigation-item sunken-menu-item" data-hotkey="g c" data-pjax="true" data-selected-links="repo_source repo_downloads repo_commits repo_releases repo_tags repo_branches /rgrove/sanitize/tree/dev-3.0.0">
- <span class="octicon octicon-code"></span> <span class="full-word">Code</span>
- <img alt="" class="mini-loader" height="16" src="https://assets-cdn.github.com/images/spinners/octocat-spinner-32.gif" width="16" />
-</a> </li>
-
- <li class="tooltipped tooltipped-w" aria-label="Issues">
- <a href="/rgrove/sanitize/issues" aria-label="Issues" class="js-selected-navigation-item sunken-menu-item js-disable-pjax" data-hotkey="g i" data-selected-links="repo_issues /rgrove/sanitize/issues">
- <span class="octicon octicon-issue-opened"></span> <span class="full-word">Issues</span>
- <span class='counter'>4</span>
- <img alt="" class="mini-loader" height="16" src="https://assets-cdn.github.com/images/spinners/octocat-spinner-32.gif" width="16" />
-</a> </li>
-
- <li class="tooltipped tooltipped-w" aria-label="Pull Requests">
- <a href="/rgrove/sanitize/pulls" aria-label="Pull Requests" class="js-selected-navigation-item sunken-menu-item js-disable-pjax" data-hotkey="g p" data-selected-links="repo_pulls /rgrove/sanitize/pulls">
- <span class="octicon octicon-git-pull-request"></span> <span class="full-word">Pull Requests</span>
- <span class='counter'>0</span>
- <img alt="" class="mini-loader" height="16" src="https://assets-cdn.github.com/images/spinners/octocat-spinner-32.gif" width="16" />
-</a> </li>
-
-
- <li class="tooltipped tooltipped-w" aria-label="Wiki">
- <a href="/rgrove/sanitize/wiki" aria-label="Wiki" class="js-selected-navigation-item sunken-menu-item js-disable-pjax" data-hotkey="g w" data-selected-links="repo_wiki /rgrove/sanitize/wiki">
- <span class="octicon octicon-book"></span> <span class="full-word">Wiki</span>
- <img alt="" class="mini-loader" height="16" src="https://assets-cdn.github.com/images/spinners/octocat-spinner-32.gif" width="16" />
-</a> </li>
- </ul>
- <div class="sunken-menu-separator"></div>
- <ul class="sunken-menu-group">
-
- <li class="tooltipped tooltipped-w" aria-label="Pulse">
- <a href="/rgrove/sanitize/pulse" aria-label="Pulse" class="js-selected-navigation-item sunken-menu-item" data-pjax="true" data-selected-links="pulse /rgrove/sanitize/pulse">
- <span class="octicon octicon-pulse"></span> <span class="full-word">Pulse</span>
- <img alt="" class="mini-loader" height="16" src="https://assets-cdn.github.com/images/spinners/octocat-spinner-32.gif" width="16" />
-</a> </li>
-
- <li class="tooltipped tooltipped-w" aria-label="Graphs">
- <a href="/rgrove/sanitize/graphs" aria-label="Graphs" class="js-selected-navigation-item sunken-menu-item" data-pjax="true" data-selected-links="repo_graphs repo_contributors /rgrove/sanitize/graphs">
- <span class="octicon octicon-graph"></span> <span class="full-word">Graphs</span>
- <img alt="" class="mini-loader" height="16" src="https://assets-cdn.github.com/images/spinners/octocat-spinner-32.gif" width="16" />
-</a> </li>
-
- <li class="tooltipped tooltipped-w" aria-label="Network">
- <a href="/rgrove/sanitize/network" aria-label="Network" class="js-selected-navigation-item sunken-menu-item js-disable-pjax" data-selected-links="repo_network /rgrove/sanitize/network">
- <span class="octicon octicon-git-branch"></span> <span class="full-word">Network</span>
- <img alt="" class="mini-loader" height="16" src="https://assets-cdn.github.com/images/spinners/octocat-spinner-32.gif" width="16" />
-</a> </li>
- </ul>
-
-
- </div>
-</div>
-
- <div class="only-with-full-nav">
-
-
-
-
-<div class="clone-url open"
- data-protocol-type="http"
- data-url="/users/set_protocol?protocol_selector=http&amp;protocol_type=clone">
- <h3><strong>HTTPS</strong> clone URL</h3>
- <div class="clone-url-box">
- <input type="text" class="clone js-url-field"
- value="https://github.com/rgrove/sanitize.git" readonly="readonly">
- <span class="url-box-clippy">
- <button aria-label="copy to clipboard" class="js-zeroclipboard minibutton zeroclipboard-button" data-clipboard-text="https://github.com/rgrove/sanitize.git" data-copied-hint="copied!" type="button"><span class="octicon octicon-clippy"></span></button>
- </span>
- </div>
-</div>
-
-
-
-<div class="clone-url "
- data-protocol-type="subversion"
- data-url="/users/set_protocol?protocol_selector=subversion&amp;protocol_type=clone">
- <h3><strong>Subversion</strong> checkout URL</h3>
- <div class="clone-url-box">
- <input type="text" class="clone js-url-field"
- value="https://github.com/rgrove/sanitize" readonly="readonly">
- <span class="url-box-clippy">
- <button aria-label="copy to clipboard" class="js-zeroclipboard minibutton zeroclipboard-button" data-clipboard-text="https://github.com/rgrove/sanitize" data-copied-hint="copied!" type="button"><span class="octicon octicon-clippy"></span></button>
- </span>
- </div>
-</div>
-
-
-<p class="clone-options">You can clone with
- <a href="#" class="js-clone-selector" data-protocol="http">HTTPS</a>
- or <a href="#" class="js-clone-selector" data-protocol="subversion">Subversion</a>.
- <span class="help tooltipped tooltipped-n" aria-label="Get help on which URL is right for you.">
- <a href="https://help.github.com/articles/which-remote-url-should-i-use">
- <span class="octicon octicon-question"></span>
- </a>
- </span>
-</p>
-
- <a href="http://mac.github.com" data-url="github-mac://openRepo/https://github.com/rgrove/sanitize" class="minibutton sidebar-button js-conduit-rewrite-url" title="Save rgrove/sanitize to your computer and use it in GitHub Desktop." aria-label="Save rgrove/sanitize to your computer and use it in GitHub Desktop.">
- <span class="octicon octicon-device-desktop"></span>
- Clone in Desktop
- </a>
-
-
- <a href="/rgrove/sanitize/archive/dev-3.0.0.zip"
- class="minibutton sidebar-button"
- aria-label="Download rgrove/sanitize as a zip file"
- title="Download rgrove/sanitize as a zip file"
- rel="nofollow">
- <span class="octicon octicon-cloud-download"></span>
- Download ZIP
- </a>
- </div>
- </div><!-- /.repository-sidebar -->
-
- <div id="js-repo-pjax-container" class="repository-content context-loader-container" data-pjax-container>
-
-
-<span id="js-show-full-navigation"></span>
-
-<div class="repository-meta js-details-container ">
- <div class="repository-description js-details-show">
- <p>Whitelist-based Ruby HTML sanitizer.</p>
- </div>
-
-
-
-</div>
-
-<div class="capped-box overall-summary ">
-
- <div class="stats-switcher-viewport js-stats-switcher-viewport">
- <div class="stats-switcher-wrapper">
- <ul class="numbers-summary">
- <li class="commits">
- <a data-pjax href="/rgrove/sanitize/commits/dev-3.0.0">
- <span class="num">
- <span class="octicon octicon-history"></span>
- 241
- </span>
- commits
- </a>
- </li>
- <li>
- <a data-pjax href="/rgrove/sanitize/branches">
- <span class="num">
- <span class="octicon octicon-git-branch"></span>
- 5
- </span>
- branches
- </a>
- </li>
-
- <li>
- <a data-pjax href="/rgrove/sanitize/releases">
- <span class="num">
- <span class="octicon octicon-tag"></span>
- 19
- </span>
- releases
- </a>
- </li>
-
- <li>
-
- <a href="/rgrove/sanitize/graphs/contributors">
- <span class="num">
- <span class="octicon octicon-organization"></span>
- 16
- </span>
- contributors
- </a>
- </li>
- </ul>
-
- <div class="repository-lang-stats">
- <ol class="repository-lang-stats-numbers">
- <li>
- <a href="/rgrove/sanitize/search?l=ruby">
- <span class="color-block language-color" style="background-color:#701516;"></span>
- <span class="lang">Ruby</span>
- <span class="percent">100%</span>
- </a>
- </li>
- </ol>
- </div>
- </div>
- </div>
-
-</div>
-
- <div class="tooltipped tooltipped-s" aria-label="Show language statistics">
- <a href="#"
- class="repository-lang-stats-graph js-toggle-lang-stats"
- style="background-color:#701516">
- <span class="language-color" style="width:100%; background-color:#701516;" itemprop="keywords">Ruby</span>
- </a>
- </div>
-
-
-
-<div class="file-navigation in-mid-page">
- <a href="/rgrove/sanitize/compare/dev-3.0.0" aria-label="Compare, review, create a pull request" class="minibutton compact primary tooltipped tooltipped-s" aria-label="Compare &amp; review" data-pjax>
- <span class="octicon octicon-git-compare"></span>
- </a>
-
-
-
-<div class="select-menu js-menu-container js-select-menu" >
- <span class="minibutton select-menu-button js-menu-target" data-hotkey="w"
- data-master-branch="master"
- data-ref="dev-3.0.0"
- role="button" aria-label="Switch branches or tags" tabindex="0" aria-haspopup="true">
- <span class="octicon octicon-git-branch"></span>
- <i>branch:</i>
- <span class="js-select-button">dev-3.0.0</span>
- </span>
-
- <div class="select-menu-modal-holder js-menu-content js-navigation-container" data-pjax aria-hidden="true">
-
- <div class="select-menu-modal">
- <div class="select-menu-header">
- <span class="select-menu-title">Switch branches/tags</span>
- <span class="octicon octicon-remove-close js-menu-close"></span>
- </div> <!-- /.select-menu-header -->
-
- <div class="select-menu-filters">
- <div class="select-menu-text-filter">
- <input type="text" aria-label="Filter branches/tags" id="context-commitish-filter-field" class="js-filterable-field js-navigation-enable" placeholder="Filter branches/tags">
- </div>
- <div class="select-menu-tabs">
- <ul>
- <li class="select-menu-tab">
- <a href="#" data-tab-filter="branches" class="js-select-menu-tab">Branches</a>
- </li>
- <li class="select-menu-tab">
- <a href="#" data-tab-filter="tags" class="js-select-menu-tab">Tags</a>
- </li>
- </ul>
- </div><!-- /.select-menu-tabs -->
- </div><!-- /.select-menu-filters -->
-
- <div class="select-menu-list select-menu-tab-bucket js-select-menu-tab-bucket" data-tab-filter="branches">
-
- <div data-filterable-for="context-commitish-filter-field" data-filterable-type="substring">
-
-
- <div class="select-menu-item js-navigation-item ">
- <span class="select-menu-item-icon octicon octicon-check"></span>
- <a href="/rgrove/sanitize/tree/dev-2.2.0"
- data-name="dev-2.2.0"
- data-skip-pjax="true"
- rel="nofollow"
- class="js-navigation-open select-menu-item-text js-select-button-text css-truncate-target"
- title="dev-2.2.0">dev-2.2.0</a>
- </div> <!-- /.select-menu-item -->
- <div class="select-menu-item js-navigation-item selected">
- <span class="select-menu-item-icon octicon octicon-check"></span>
- <a href="/rgrove/sanitize/tree/dev-3.0.0"
- data-name="dev-3.0.0"
- data-skip-pjax="true"
- rel="nofollow"
- class="js-navigation-open select-menu-item-text js-select-button-text css-truncate-target"
- title="dev-3.0.0">dev-3.0.0</a>
- </div> <!-- /.select-menu-item -->
- <div class="select-menu-item js-navigation-item ">
- <span class="select-menu-item-icon octicon octicon-check"></span>
- <a href="/rgrove/sanitize/tree/freeze"
- data-name="freeze"
- data-skip-pjax="true"
- rel="nofollow"
- class="js-navigation-open select-menu-item-text js-select-button-text css-truncate-target"
- title="freeze">freeze</a>
- </div> <!-- /.select-menu-item -->
- <div class="select-menu-item js-navigation-item ">
- <span class="select-menu-item-icon octicon octicon-check"></span>
- <a href="/rgrove/sanitize/tree/gumbo"
- data-name="gumbo"
- data-skip-pjax="true"
- rel="nofollow"
- class="js-navigation-open select-menu-item-text js-select-button-text css-truncate-target"
- title="gumbo">gumbo</a>
- </div> <!-- /.select-menu-item -->
- <div class="select-menu-item js-navigation-item ">
- <span class="select-menu-item-icon octicon octicon-check"></span>
- <a href="/rgrove/sanitize/tree/master"
- data-name="master"
- data-skip-pjax="true"
- rel="nofollow"
- class="js-navigation-open select-menu-item-text js-select-button-text css-truncate-target"
- title="master">master</a>
- </div> <!-- /.select-menu-item -->
- </div>
-
- <div class="select-menu-no-results">Nothing to show</div>
- </div> <!-- /.select-menu-list -->
-
- <div class="select-menu-list select-menu-tab-bucket js-select-menu-tab-bucket" data-tab-filter="tags">
- <div data-filterable-for="context-commitish-filter-field" data-filterable-type="substring">
-
-
- <div class="select-menu-item js-navigation-item ">
- <span class="select-menu-item-icon octicon octicon-check"></span>
- <a href="/rgrove/sanitize/tree/v2.0.6"
- data-name="v2.0.6"
- data-skip-pjax="true"
- rel="nofollow"
- class="js-navigation-open select-menu-item-text js-select-button-text css-truncate-target"
- title="v2.0.6">v2.0.6</a>
- </div> <!-- /.select-menu-item -->
- <div class="select-menu-item js-navigation-item ">
- <span class="select-menu-item-icon octicon octicon-check"></span>
- <a href="/rgrove/sanitize/tree/v2.0.5"
- data-name="v2.0.5"
- data-skip-pjax="true"
- rel="nofollow"
- class="js-navigation-open select-menu-item-text js-select-button-text css-truncate-target"
- title="v2.0.5">v2.0.5</a>
- </div> <!-- /.select-menu-item -->
- <div class="select-menu-item js-navigation-item ">
- <span class="select-menu-item-icon octicon octicon-check"></span>
- <a href="/rgrove/sanitize/tree/v2.0.4"
- data-name="v2.0.4"
- data-skip-pjax="true"
- rel="nofollow"
- class="js-navigation-open select-menu-item-text js-select-button-text css-truncate-target"
- title="v2.0.4">v2.0.4</a>
- </div> <!-- /.select-menu-item -->
- <div class="select-menu-item js-navigation-item ">
- <span class="select-menu-item-icon octicon octicon-check"></span>
- <a href="/rgrove/sanitize/tree/v2.0.3"
- data-name="v2.0.3"
- data-skip-pjax="true"
- rel="nofollow"
- class="js-navigation-open select-menu-item-text js-select-button-text css-truncate-target"
- title="v2.0.3">v2.0.3</a>
- </div> <!-- /.select-menu-item -->
- <div class="select-menu-item js-navigation-item ">
- <span class="select-menu-item-icon octicon octicon-check"></span>
- <a href="/rgrove/sanitize/tree/release-2.0.2"
- data-name="release-2.0.2"
- data-skip-pjax="true"
- rel="nofollow"
- class="js-navigation-open select-menu-item-text js-select-button-text css-truncate-target"
- title="release-2.0.2">release-2.0.2</a>
- </div> <!-- /.select-menu-item -->
- <div class="select-menu-item js-navigation-item ">
- <span class="select-menu-item-icon octicon octicon-check"></span>
- <a href="/rgrove/sanitize/tree/release-2.0.1"
- data-name="release-2.0.1"
- data-skip-pjax="true"
- rel="nofollow"
- class="js-navigation-open select-menu-item-text js-select-button-text css-truncate-target"
- title="release-2.0.1">release-2.0.1</a>
- </div> <!-- /.select-menu-item -->
- <div class="select-menu-item js-navigation-item ">
- <span class="select-menu-item-icon octicon octicon-check"></span>
- <a href="/rgrove/sanitize/tree/release-2.0.0"
- data-name="release-2.0.0"
- data-skip-pjax="true"
- rel="nofollow"
- class="js-navigation-open select-menu-item-text js-select-button-text css-truncate-target"
- title="release-2.0.0">release-2.0.0</a>
- </div> <!-- /.select-menu-item -->
- <div class="select-menu-item js-navigation-item ">
- <span class="select-menu-item-icon octicon octicon-check"></span>
- <a href="/rgrove/sanitize/tree/release-1.2.1"
- data-name="release-1.2.1"
- data-skip-pjax="true"
- rel="nofollow"
- class="js-navigation-open select-menu-item-text js-select-button-text css-truncate-target"
- title="release-1.2.1">release-1.2.1</a>
- </div> <!-- /.select-menu-item -->
- <div class="select-menu-item js-navigation-item ">
- <span class="select-menu-item-icon octicon octicon-check"></span>
- <a href="/rgrove/sanitize/tree/release-1.2.0"
- data-name="release-1.2.0"
- data-skip-pjax="true"
- rel="nofollow"
- class="js-navigation-open select-menu-item-text js-select-button-text css-truncate-target"
- title="release-1.2.0">release-1.2.0</a>
- </div> <!-- /.select-menu-item -->
- <div class="select-menu-item js-navigation-item ">
- <span class="select-menu-item-icon octicon octicon-check"></span>
- <a href="/rgrove/sanitize/tree/release-1.1.0"
- data-name="release-1.1.0"
- data-skip-pjax="true"
- rel="nofollow"
- class="js-navigation-open select-menu-item-text js-select-button-text css-truncate-target"
- title="release-1.1.0">release-1.1.0</a>
- </div> <!-- /.select-menu-item -->
- <div class="select-menu-item js-navigation-item ">
- <span class="select-menu-item-icon octicon octicon-check"></span>
- <a href="/rgrove/sanitize/tree/release-1.0.8"
- data-name="release-1.0.8"
- data-skip-pjax="true"
- rel="nofollow"
- class="js-navigation-open select-menu-item-text js-select-button-text css-truncate-target"
- title="release-1.0.8">release-1.0.8</a>
- </div> <!-- /.select-menu-item -->
- <div class="select-menu-item js-navigation-item ">
- <span class="select-menu-item-icon octicon octicon-check"></span>
- <a href="/rgrove/sanitize/tree/release-1.0.7"
- data-name="release-1.0.7"
- data-skip-pjax="true"
- rel="nofollow"
- class="js-navigation-open select-menu-item-text js-select-button-text css-truncate-target"
- title="release-1.0.7">release-1.0.7</a>
- </div> <!-- /.select-menu-item -->
- <div class="select-menu-item js-navigation-item ">
- <span class="select-menu-item-icon octicon octicon-check"></span>
- <a href="/rgrove/sanitize/tree/release-1.0.6"
- data-name="release-1.0.6"
- data-skip-pjax="true"
- rel="nofollow"
- class="js-navigation-open select-menu-item-text js-select-button-text css-truncate-target"
- title="release-1.0.6">release-1.0.6</a>
- </div> <!-- /.select-menu-item -->
- <div class="select-menu-item js-navigation-item ">
- <span class="select-menu-item-icon octicon octicon-check"></span>
- <a href="/rgrove/sanitize/tree/release-1.0.5"
- data-name="release-1.0.5"
- data-skip-pjax="true"
- rel="nofollow"
- class="js-navigation-open select-menu-item-text js-select-button-text css-truncate-target"
- title="release-1.0.5">release-1.0.5</a>
- </div> <!-- /.select-menu-item -->
- <div class="select-menu-item js-navigation-item ">
- <span class="select-menu-item-icon octicon octicon-check"></span>
- <a href="/rgrove/sanitize/tree/release-1.0.4"
- data-name="release-1.0.4"
- data-skip-pjax="true"
- rel="nofollow"
- class="js-navigation-open select-menu-item-text js-select-button-text css-truncate-target"
- title="release-1.0.4">release-1.0.4</a>
- </div> <!-- /.select-menu-item -->
- <div class="select-menu-item js-navigation-item ">
- <span class="select-menu-item-icon octicon octicon-check"></span>
- <a href="/rgrove/sanitize/tree/release-1.0.3"
- data-name="release-1.0.3"
- data-skip-pjax="true"
- rel="nofollow"
- class="js-navigation-open select-menu-item-text js-select-button-text css-truncate-target"
- title="release-1.0.3">release-1.0.3</a>
- </div> <!-- /.select-menu-item -->
- <div class="select-menu-item js-navigation-item ">
- <span class="select-menu-item-icon octicon octicon-check"></span>
- <a href="/rgrove/sanitize/tree/release-1.0.2"
- data-name="release-1.0.2"
- data-skip-pjax="true"
- rel="nofollow"
- class="js-navigation-open select-menu-item-text js-select-button-text css-truncate-target"
- title="release-1.0.2">release-1.0.2</a>
- </div> <!-- /.select-menu-item -->
- <div class="select-menu-item js-navigation-item ">
- <span class="select-menu-item-icon octicon octicon-check"></span>
- <a href="/rgrove/sanitize/tree/release-1.0.1"
- data-name="release-1.0.1"
- data-skip-pjax="true"
- rel="nofollow"
- class="js-navigation-open select-menu-item-text js-select-button-text css-truncate-target"
- title="release-1.0.1">release-1.0.1</a>
- </div> <!-- /.select-menu-item -->
- <div class="select-menu-item js-navigation-item ">
- <span class="select-menu-item-icon octicon octicon-check"></span>
- <a href="/rgrove/sanitize/tree/release-1.0.0"
- data-name="release-1.0.0"
- data-skip-pjax="true"
- rel="nofollow"
- class="js-navigation-open select-menu-item-text js-select-button-text css-truncate-target"
- title="release-1.0.0">release-1.0.0</a>
- </div> <!-- /.select-menu-item -->
- </div>
-
- <div class="select-menu-no-results">Nothing to show</div>
- </div> <!-- /.select-menu-list -->
-
- </div> <!-- /.select-menu-modal -->
- </div> <!-- /.select-menu-modal-holder -->
-</div> <!-- /.select-menu -->
-
-
- <div class="breadcrumb"><span class='repo-root js-repo-root'><span itemscope="" itemtype="http://data-vocabulary.org/Breadcrumb"><a href="/rgrove/sanitize/tree/dev-3.0.0" data-branch="dev-3.0.0" data-direction="back" data-pjax="true" itemscope="url"><span itemprop="title">sanitize</span></a></span></span><span class="separator"> / </span><form action="/login?return_to=%2Frgrove%2Fsanitize%2Ftree%2Fdev-3.0.0" aria-label="Sign in to make or propose changes" class="js-new-blob-form tooltipped tooltipped-e new-file-link" method="post"><span aria-label="Sign in to make or propose changes" class="js-new-blob-submit octicon octicon-file-add" data-test-id="create-new-git-file" role="button"></span></form></div>
-</div>
-
-
-
-<a href="/rgrove/sanitize/find/dev-3.0.0"
- data-hotkey="t" class="js-show-file-finder" style="display:none" data-pjax>Show File Finder</a>
-
- <div class="branch-infobar clearfix">
- <p>
- This branch is
- 43 commits ahead and
- 0 commits behind master
- </p>
-
- <ul class="lightweight-actions">
- <li>
- <a href="/rgrove/sanitize/pull/new/dev-3.0.0">
- <span class="octicon octicon-git-pull-request"></span>
- Pull Request
- </a>
- </li>
- <!--
- <li>
- <a href="#">
- <span class="octicon octicon-comment-discussion"></span>
- Branch discussion
- </a>
- </li>
- -->
- <li>
- <a href="/rgrove/sanitize/compare/dev-3.0.0">
- <span class="octicon octicon-diff"></span>
- Compare
- </a>
- </li>
- </ul>
-
- </div>
-
-
- <div class="commit commit-tease js-details-container" >
- <p class="commit-title ">
- <a href="/rgrove/sanitize/commit/2e6c581fa92602e899407f018feb0320c5d130be" class="message" data-pjax="true" title="Add a couple of legacy attributes to the relaxed config.">Add a couple of legacy attributes to the relaxed config.</a>
-
- </p>
- <div class="commit-meta">
- <button aria-label="Copy SHA" class="js-zeroclipboard zeroclipboard-link" data-clipboard-text="2e6c581fa92602e899407f018feb0320c5d130be" data-copied-hint="copied!" type="button"><span class="octicon octicon-clippy"></span></button>
- <a href="/rgrove/sanitize/commit/2e6c581fa92602e899407f018feb0320c5d130be" class="sha-block" data-pjax>latest commit <span class="sha">2e6c581fa9</span></a>
-
- <div class="authorship">
- <img alt="Ryan Grove" class="gravatar js-avatar" data-user="1465" height="20" src="https://avatars3.githubusercontent.com/u/1465?s=140" width="20" />
- <span class="author-name"><a href="/rgrove" data-skip-pjax="true" rel="author">rgrove</a></span>
- authored <time class="updated" datetime="2014-05-20T12:45:52-07:00" is="relative-time" title-format="%Y-%m-%d %H:%M:%S %z" title="2014-05-20 12:45:52 -0700">May 20, 2014</time>
-
- </div>
- </div>
- </div>
-
- <div class="file-wrap">
- <table class="files" data-pjax>
-
-
-<tbody class=""
- data-url="/rgrove/sanitize/file-list/dev-3.0.0"
- data-deferred-content-error="Failed to load latest commit information.">
- <tr>
- <td class="icon">
- <span class="octicon octicon-file-directory"></span>
- <img alt="" class="spinner" height="16" src="https://assets-cdn.github.com/images/spinners/octocat-spinner-32.gif" width="16" />
- </td>
- <td class="content">
- <span class="css-truncate css-truncate-target"><a href="/rgrove/sanitize/tree/dev-3.0.0/benchmark" class="js-directory-link" id="07978586e47c8709a63e895fbf3c3c7d-5a95f75a08fc12884dfe0fc2842346d1aec98c4a" title="benchmark">benchmark</a></span>
- </td>
- <td class="message">
- <span class="css-truncate css-truncate-target ">
- <a href="/rgrove/sanitize/commit/ce844b7eb13bfee84276d41ba91ff183773f484b" class="message" data-pjax="true" title="Update benchmarks. We got a lot faster. Thanks Gumbo!">Update benchmarks. We got a lot faster. Thanks Gumbo!</a>
- </span>
- </td>
- <td class="age">
- <span class="css-truncate css-truncate-target"><time datetime="2014-05-18T16:41:36-07:00" is="relative-time" title-format="%Y-%m-%d %H:%M:%S %z" title="2014-05-18 16:41:36 -0700">May 18, 2014</time></span>
- </td>
- </tr>
- <tr>
- <td class="icon">
- <span class="octicon octicon-file-directory"></span>
- <img alt="" class="spinner" height="16" src="https://assets-cdn.github.com/images/spinners/octocat-spinner-32.gif" width="16" />
- </td>
- <td class="content">
- <span class="css-truncate css-truncate-target"><a href="/rgrove/sanitize/tree/dev-3.0.0/lib" class="js-directory-link" id="e8acc63b1e238f3255c900eed37254b8-2ba6ea05e193fc4e89efd4072ed1d5e66a263a53" title="lib">lib</a></span>
- </td>
- <td class="message">
- <span class="css-truncate css-truncate-target ">
- <a href="/rgrove/sanitize/commit/2e6c581fa92602e899407f018feb0320c5d130be" class="message" data-pjax="true" title="Add a couple of legacy attributes to the relaxed config.">Add a couple of legacy attributes to the relaxed config.</a>
- </span>
- </td>
- <td class="age">
- <span class="css-truncate css-truncate-target"><time datetime="2014-05-20T12:45:52-07:00" is="relative-time" title-format="%Y-%m-%d %H:%M:%S %z" title="2014-05-20 12:45:52 -0700">May 20, 2014</time></span>
- </td>
- </tr>
- <tr>
- <td class="icon">
- <span class="octicon octicon-file-directory"></span>
- <img alt="" class="spinner" height="16" src="https://assets-cdn.github.com/images/spinners/octocat-spinner-32.gif" width="16" />
- </td>
- <td class="content">
- <span class="css-truncate css-truncate-target"><a href="/rgrove/sanitize/tree/dev-3.0.0/test" class="js-directory-link" id="098f6bcd4621d373cade4e832627b4f6-a01581020167c9550f0d5a9f9ecef904184a3ef9" title="test">test</a></span>
- </td>
- <td class="message">
- <span class="css-truncate css-truncate-target ">
- <a href="/rgrove/sanitize/commit/5f2809e5e13341ff163d90f78981d729bfb00a58" class="message" data-pjax="true" title="Workaround for libxml2 forcibly adding a content-type meta tag.
-
-The version of libxml2 used by Nokogiri forcibly adds a content-type meta
-tag to all documents with a &lt;head&gt; element during serialization, which is
-stupid.
-
-See also: sparklemotion/nokogiri#1008">Workaround for libxml2 forcibly adding a content-type meta tag.</a>
- </span>
- </td>
- <td class="age">
- <span class="css-truncate css-truncate-target"><time datetime="2014-05-20T12:45:00-07:00" is="relative-time" title-format="%Y-%m-%d %H:%M:%S %z" title="2014-05-20 12:45:00 -0700">May 20, 2014</time></span>
- </td>
- </tr>
- <tr>
- <td class="icon">
- <span class="octicon octicon-file-text"></span>
- <img alt="" class="spinner" height="16" src="https://assets-cdn.github.com/images/spinners/octocat-spinner-32.gif" width="16" />
- </td>
- <td class="content">
- <span class="css-truncate css-truncate-target"><a href="/rgrove/sanitize/blob/dev-3.0.0/.gitignore" class="js-directory-link" id="a084b794bc0759e7a6b77810e01874f2-cb3dddb63c369ba2982d52fe9e28ef45f3d803b8" title=".gitignore">.gitignore</a></span>
- </td>
- <td class="message">
- <span class="css-truncate css-truncate-target ">
- <a href="/rgrove/sanitize/commit/21cece27a377d40b405fc54bdf942f8eecfb5008" class="message" data-pjax="true" title="Add .yardopts, and use yard to generate docs.">Add .yardopts, and use yard to generate docs.</a>
- </span>
- </td>
- <td class="age">
- <span class="css-truncate css-truncate-target"><time datetime="2013-09-18T17:09:07-07:00" is="relative-time" title-format="%Y-%m-%d %H:%M:%S %z" title="2013-09-18 17:09:07 -0700">September 18, 2013</time></span>
- </td>
- </tr>
- <tr>
- <td class="icon">
- <span class="octicon octicon-file-text"></span>
- <img alt="" class="spinner" height="16" src="https://assets-cdn.github.com/images/spinners/octocat-spinner-32.gif" width="16" />
- </td>
- <td class="content">
- <span class="css-truncate css-truncate-target"><a href="/rgrove/sanitize/blob/dev-3.0.0/.travis.yml" class="js-directory-link" id="354f30a63fb0907d4ad57269548329e3-506f40c8582b2e3c0243bf2ebbbdc2220937cdc7" title=".travis.yml">.travis.yml</a></span>
- </td>
- <td class="message">
- <span class="css-truncate css-truncate-target ">
- <a href="/rgrove/sanitize/commit/2ca27b786f5acbd48d7905204ff9a5410997eded" class="message" data-pjax="true" title="Travis: Test against Ruby 2.1.2.">Travis: Test against Ruby 2.1.2.</a>
- </span>
- </td>
- <td class="age">
- <span class="css-truncate css-truncate-target"><time datetime="2014-05-18T16:12:39-07:00" is="relative-time" title-format="%Y-%m-%d %H:%M:%S %z" title="2014-05-18 16:12:39 -0700">May 18, 2014</time></span>
- </td>
- </tr>
- <tr>
- <td class="icon">
- <span class="octicon octicon-file-text"></span>
- <img alt="" class="spinner" height="16" src="https://assets-cdn.github.com/images/spinners/octocat-spinner-32.gif" width="16" />
- </td>
- <td class="content">
- <span class="css-truncate css-truncate-target"><a href="/rgrove/sanitize/blob/dev-3.0.0/.yardopts" class="js-directory-link" id="5808b886486adcc2f1820a316cd5652d-88c462fdff6253794381fc4e452bc34d9b2b3233" title=".yardopts">.yardopts</a></span>
- </td>
- <td class="message">
- <span class="css-truncate css-truncate-target ">
- <a href="/rgrove/sanitize/commit/e28fc3ec6ea1db83de0c8dbaf55c08e7f72b4183" class="message" data-pjax="true" title="Include HISTORY.md in the docs.">Include HISTORY.md in the docs.</a>
- </span>
- </td>
- <td class="age">
- <span class="css-truncate css-truncate-target"><time datetime="2014-05-19T17:20:29-07:00" is="relative-time" title-format="%Y-%m-%d %H:%M:%S %z" title="2014-05-19 17:20:29 -0700">May 19, 2014</time></span>
- </td>
- </tr>
- <tr>
- <td class="icon">
- <span class="octicon octicon-file-text"></span>
- <img alt="" class="spinner" height="16" src="https://assets-cdn.github.com/images/spinners/octocat-spinner-32.gif" width="16" />
- </td>
- <td class="content">
- <span class="css-truncate css-truncate-target"><a href="/rgrove/sanitize/blob/dev-3.0.0/Gemfile" class="js-directory-link" id="8b7db4d5cc4b8f6dc8feb7030baa2478-3be9c3cd812e6cb2d9d029ec79a88bf4662aa68b" title="Gemfile">Gemfile</a></span>
- </td>
- <td class="message">
- <span class="css-truncate css-truncate-target ">
- <a href="/rgrove/sanitize/commit/8646dc2fb688cdd035daa35b32b7b873f3feceeb" class="message" data-pjax="true" title="Use https://rubygems.org.">Use</a> <a href="https://rubygems.org">https://rubygems.org</a><a href="/rgrove/sanitize/commit/8646dc2fb688cdd035daa35b32b7b873f3feceeb" class="message" data-pjax="true" title="Use https://rubygems.org.">.</a>
- </span>
- </td>
- <td class="age">
- <span class="css-truncate css-truncate-target"><time datetime="2014-05-20T10:29:27-07:00" is="relative-time" title-format="%Y-%m-%d %H:%M:%S %z" title="2014-05-20 10:29:27 -0700">May 20, 2014</time></span>
- </td>
- </tr>
- <tr>
- <td class="icon">
- <span class="octicon octicon-file-text"></span>
- <img alt="" class="spinner" height="16" src="https://assets-cdn.github.com/images/spinners/octocat-spinner-32.gif" width="16" />
- </td>
- <td class="content">
- <span class="css-truncate css-truncate-target"><a href="/rgrove/sanitize/blob/dev-3.0.0/HISTORY.md" class="js-directory-link" id="88dc7475eedf918122374be6d7c2c151-41baeafc07a544dd990d21902ecfdcbbb1dbbdad" title="HISTORY.md">HISTORY.md</a></span>
- </td>
- <td class="message">
- <span class="css-truncate css-truncate-target ">
- <a href="/rgrove/sanitize/commit/5f2809e5e13341ff163d90f78981d729bfb00a58" class="message" data-pjax="true" title="Workaround for libxml2 forcibly adding a content-type meta tag.
-
-The version of libxml2 used by Nokogiri forcibly adds a content-type meta
-tag to all documents with a &lt;head&gt; element during serialization, which is
-stupid.
-
-See also: sparklemotion/nokogiri#1008">Workaround for libxml2 forcibly adding a content-type meta tag.</a>
- </span>
- </td>
- <td class="age">
- <span class="css-truncate css-truncate-target"><time datetime="2014-05-20T12:45:00-07:00" is="relative-time" title-format="%Y-%m-%d %H:%M:%S %z" title="2014-05-20 12:45:00 -0700">May 20, 2014</time></span>
- </td>
- </tr>
- <tr>
- <td class="icon">
- <span class="octicon octicon-file-text"></span>
- <img alt="" class="spinner" height="16" src="https://assets-cdn.github.com/images/spinners/octocat-spinner-32.gif" width="16" />
- </td>
- <td class="content">
- <span class="css-truncate css-truncate-target"><a href="/rgrove/sanitize/blob/dev-3.0.0/LICENSE" class="js-directory-link" id="9879d6db96fd29134fc802214163b95a-05549118f3b1bde9a88ac7c642ccb4590915b944" title="LICENSE">LICENSE</a></span>
- </td>
- <td class="message">
- <span class="css-truncate css-truncate-target ">
- <a href="/rgrove/sanitize/commit/76ee6e448b5480c49a79ed1f7ed6f212069f272b" class="message" data-pjax="true" title="Release 2.1.0.">Release 2.1.0.</a>
- </span>
- </td>
- <td class="age">
- <span class="css-truncate css-truncate-target"><time datetime="2014-01-13T15:27:16-08:00" is="relative-time" title-format="%Y-%m-%d %H:%M:%S %z" title="2014-01-13 15:27:16 -0800">January 13, 2014</time></span>
- </td>
- </tr>
- <tr>
- <td class="icon">
- <span class="octicon octicon-file-text"></span>
- <img alt="" class="spinner" height="16" src="https://assets-cdn.github.com/images/spinners/octocat-spinner-32.gif" width="16" />
- </td>
- <td class="content">
- <span class="css-truncate css-truncate-target"><a href="/rgrove/sanitize/blob/dev-3.0.0/README.md" class="js-directory-link" id="04c6e90faac2675aa89e2176d2eec7d8-356a35f993ab9b85d0fb9213e73a1feb92bfb4dd" title="README.md">README.md</a></span>
- </td>
- <td class="message">
- <span class="css-truncate css-truncate-target ">
- <a href="/rgrove/sanitize/commit/301eda5ae733f650fe3e2d4498f306c3eddf4ac5" class="message" data-pjax="true" title="Deep freeze the built-in configs.
-
-Based on PR #75 from @pda.
-
-This also adds Sanitize::Config.merge(), which can be used to safely
-deep-merge two configs (details in the readme).">Deep freeze the built-in configs.</a>
- </span>
- </td>
- <td class="age">
- <span class="css-truncate css-truncate-target"><time datetime="2014-05-19T17:20:15-07:00" is="relative-time" title-format="%Y-%m-%d %H:%M:%S %z" title="2014-05-19 17:20:15 -0700">May 19, 2014</time></span>
- </td>
- </tr>
- <tr>
- <td class="icon">
- <span class="octicon octicon-file-text"></span>
- <img alt="" class="spinner" height="16" src="https://assets-cdn.github.com/images/spinners/octocat-spinner-32.gif" width="16" />
- </td>
- <td class="content">
- <span class="css-truncate css-truncate-target"><a href="/rgrove/sanitize/blob/dev-3.0.0/Rakefile" class="js-directory-link" id="52c976fc38ed2b4e3b1192f8a8e24cff-fd0ea81d9c35ecde722be2005a2b408df03c7fd9" title="Rakefile">Rakefile</a></span>
- </td>
- <td class="message">
- <span class="css-truncate css-truncate-target ">
- <a href="/rgrove/sanitize/commit/eb618ad624798a22389b05a095d4c1f946a7e0ec" class="message" data-pjax="true" title="Remove redundant licenses.
-
-These things are so 90s.">Remove redundant licenses.</a>
- </span>
- </td>
- <td class="age">
- <span class="css-truncate css-truncate-target"><time datetime="2014-05-17T20:51:02-07:00" is="relative-time" title-format="%Y-%m-%d %H:%M:%S %z" title="2014-05-17 20:51:02 -0700">May 17, 2014</time></span>
- </td>
- </tr>
- <tr>
- <td class="icon">
- <span class="octicon octicon-file-text"></span>
- <img alt="" class="spinner" height="16" src="https://assets-cdn.github.com/images/spinners/octocat-spinner-32.gif" width="16" />
- </td>
- <td class="content">
- <span class="css-truncate css-truncate-target"><a href="/rgrove/sanitize/blob/dev-3.0.0/sanitize.gemspec" class="js-directory-link" id="b54405cb6293bf0eed12ec79eee871c8-f8b56a2a688848f3cea0e8e4155ede4f0c524753" title="sanitize.gemspec">sanitize.gemspec</a></span>
- </td>
- <td class="message">
- <span class="css-truncate css-truncate-target ">
- <a href="/rgrove/sanitize/commit/8c29d8480a3d2db7279ccf9c336b6498230e1bd5" class="message" data-pjax="true" title="Require Minitest ~&gt; 5.3.4.">Require Minitest ~&gt; 5.3.4.</a>
- </span>
- </td>
- <td class="age">
- <span class="css-truncate css-truncate-target"><time datetime="2014-05-17T15:36:08-07:00" is="relative-time" title-format="%Y-%m-%d %H:%M:%S %z" title="2014-05-17 15:36:08 -0700">May 17, 2014</time></span>
- </td>
- </tr>
-</tbody>
-
- </table>
- </div>
-
-
- <div id="readme" class="clearfix announce instapaper_body md">
- <span class="name">
- <span class="octicon octicon-book"></span>
- README.md
- </span>
-
- <article class="markdown-body entry-content" itemprop="mainContentOfPage"><h1>
-<a name="user-content-sanitize" class="anchor" href="#sanitize"><span class="octicon octicon-link"></span></a>Sanitize</h1>
-
-<p>Sanitize is a whitelist-based HTML sanitizer. Given a list of acceptable
-elements and attributes, Sanitize will remove all unacceptable HTML from a
-string.</p>
-
-<p>Using a simple configuration syntax, you can tell Sanitize to allow certain
-elements, certain attributes within those elements, and even certain URL
-protocols within attributes that contain URLs. Any HTML elements or attributes
-that you don't explicitly allow will be removed.</p>
-
-<p>Sanitize is based on <a href="https://github.com/google/gumbo-parser">Google's Gumbo HTML5 parser</a>, which parses HTML
-exactly the same way modern browsers do. As long as your whitelist config only
-allows safe markup, even the most malformed or malicious input will be
-transformed into safe output.</p>
-
-<p><a href="https://travis-ci.org/rgrove/sanitize"><img src="https://camo.githubusercontent.com/8cbb6b37206c06dca9aad142aeabdb2db8a5614b/68747470733a2f2f7472617669732d63692e6f72672f7267726f76652f73616e6974697a652e7376673f6272616e63683d6d6173746572" alt="Build Status" data-canonical-src="https://travis-ci.org/rgrove/sanitize.svg?branch=master" style="max-width:100%;"></a>
-<a href="http://badge.fury.io/rb/sanitize"><img src="https://camo.githubusercontent.com/f74d6ba1eba40a388a496b343275302bdb4260d5/68747470733a2f2f62616467652e667572792e696f2f72622f73616e6974697a652e737667" alt="Gem Version" data-canonical-src="https://badge.fury.io/rb/sanitize.svg" style="max-width:100%;"></a></p>
-
-<h2>
-<a name="user-content-links" class="anchor" href="#links"><span class="octicon octicon-link"></span></a>Links</h2>
-
-<ul class="task-list">
-<li><a href="https://github.com/rgrove/sanitize/">Home</a></li>
-<li><a href="http://rubydoc.info/github/rgrove/sanitize/master">API Docs</a></li>
-<li><a href="https://github.com/rgrove/sanitize/issues">Issues</a></li>
-</ul><h2>
-<a name="user-content-installation" class="anchor" href="#installation"><span class="octicon octicon-link"></span></a>Installation</h2>
-
-<pre><code>gem install sanitize
-</code></pre>
-
-<h2>
-<a name="user-content-usage" class="anchor" href="#usage"><span class="octicon octicon-link"></span></a>Usage</h2>
-
-<p>Sanitize can sanitize both HTML fragments and fully qualified documents.</p>
-
-<h3>
-<a name="user-content-fragments" class="anchor" href="#fragments"><span class="octicon octicon-link"></span></a>Fragments</h3>
-
-<p>A fragment is a snippet of HTML that doesn't contain a root-level <code>&lt;html&gt;</code>
-element.</p>
-
-<div class="highlight highlight-ruby"><pre><span class="n">html</span> <span class="o">=</span> <span class="s1">'&lt;b&gt;&lt;a href="http://foo.com/"&gt;foo&lt;/a&gt;&lt;/b&gt;&lt;img src="bar.jpg"&gt;'</span>
-
-<span class="no">Sanitize</span><span class="o">.</span><span class="n">fragment</span><span class="p">(</span><span class="n">html</span><span class="p">)</span>
-<span class="c1"># =&gt; 'foo'</span>
-</pre></div>
-
-<p>If you don't specify any configuration options, Sanitize will use its strictest
-settings by default, which means it will strip all HTML and leave only safe text
-behind.</p>
-
-<p>To keep certain elements, add them to the element whitelist.</p>
-
-<div class="highlight highlight-ruby"><pre><span class="no">Sanitize</span><span class="o">.</span><span class="n">fragment</span><span class="p">(</span><span class="n">html</span><span class="p">,</span> <span class="ss">:elements</span> <span class="o">=&gt;</span> <span class="o">[</span><span class="s1">'b'</span><span class="o">]</span><span class="p">)</span>
-<span class="c1"># =&gt; '&lt;b&gt;foo&lt;/b&gt;'</span>
-</pre></div>
-
-<h3>
-<a name="user-content-documents" class="anchor" href="#documents"><span class="octicon octicon-link"></span></a>Documents</h3>
-
-<p>When sanitizing a document, the <code>&lt;html&gt;</code> element must be whitelisted. You can
-also set <code>:allow_doctype</code> to <code>true</code> to allow well-formed document type
-definitions.</p>
-
-<div class="highlight highlight-ruby"><pre><span class="n">html</span> <span class="o">=</span> <span class="sx">%[</span>
-<span class="sx"> &lt;!DOCTYPE html&gt;</span>
-<span class="sx"> &lt;html&gt;</span>
-<span class="sx"> &lt;b&gt;&lt;a href="http://foo.com/"&gt;foo&lt;/a&gt;&lt;/b&gt;&lt;img src="bar.jpg"&gt;</span>
-<span class="sx"> &lt;/html&gt;</span>
-<span class="sx">]</span>
-
-<span class="no">Sanitize</span><span class="o">.</span><span class="n">document</span><span class="p">(</span><span class="n">html</span><span class="p">,</span>
- <span class="ss">:allow_doctype</span> <span class="o">=&gt;</span> <span class="kp">true</span><span class="p">,</span>
- <span class="ss">:elements</span> <span class="o">=&gt;</span> <span class="o">[</span><span class="s1">'html'</span><span class="o">]</span>
-<span class="p">)</span>
-<span class="c1"># =&gt; "&lt;!DOCTYPE html&gt;\n&lt;html&gt;foo\n \n&lt;/html&gt;\n"</span>
-</pre></div>
-
-<h2>
-<a name="user-content-configuration" class="anchor" href="#configuration"><span class="octicon octicon-link"></span></a>Configuration</h2>
-
-<p>In addition to the ultra-safe default settings, Sanitize comes with three other
-built-in configurations that you can use out of the box or adapt to meet your
-needs.</p>
-
-<h3>
-<a name="user-content-sanitizeconfigrestricted" class="anchor" href="#sanitizeconfigrestricted"><span class="octicon octicon-link"></span></a>Sanitize::Config::RESTRICTED</h3>
-
-<p>Allows only very simple inline markup. No links, images, or block elements.</p>
-
-<div class="highlight highlight-ruby"><pre><span class="no">Sanitize</span><span class="o">.</span><span class="n">fragment</span><span class="p">(</span><span class="n">html</span><span class="p">,</span> <span class="no">Sanitize</span><span class="o">::</span><span class="no">Config</span><span class="o">::</span><span class="no">RESTRICTED</span><span class="p">)</span>
-<span class="c1"># =&gt; '&lt;b&gt;foo&lt;/b&gt;'</span>
-</pre></div>
-
-<h3>
-<a name="user-content-sanitizeconfigbasic" class="anchor" href="#sanitizeconfigbasic"><span class="octicon octicon-link"></span></a>Sanitize::Config::BASIC</h3>
-
-<p>Allows a variety of markup including formatting elements, links, and lists.</p>
-
-<p>Images and tables are not allowed, links are limited to FTP, HTTP, HTTPS, and
-mailto protocols, and a <code>rel="nofollow"</code> attribute is added to all links to
-mitigate SEO spam.</p>
-
-<div class="highlight highlight-ruby"><pre><span class="no">Sanitize</span><span class="o">.</span><span class="n">fragment</span><span class="p">(</span><span class="n">html</span><span class="p">,</span> <span class="no">Sanitize</span><span class="o">::</span><span class="no">Config</span><span class="o">::</span><span class="no">BASIC</span><span class="p">)</span>
-<span class="c1"># =&gt; '&lt;b&gt;&lt;a href="http://foo.com/" rel="nofollow"&gt;foo&lt;/a&gt;&lt;/b&gt;'</span>
-</pre></div>
-
-<h3>
-<a name="user-content-sanitizeconfigrelaxed" class="anchor" href="#sanitizeconfigrelaxed"><span class="octicon octicon-link"></span></a>Sanitize::Config::RELAXED</h3>
-
-<p>Allows an even wider variety of markup, including images and tables. Links are
-still limited to FTP, HTTP, HTTPS, and mailto protocols, while images are
-limited to HTTP and HTTPS. In this mode, <code>rel="nofollow"</code> is not added to links.</p>
-
-<div class="highlight highlight-ruby"><pre><span class="no">Sanitize</span><span class="o">.</span><span class="n">fragment</span><span class="p">(</span><span class="n">html</span><span class="p">,</span> <span class="no">Sanitize</span><span class="o">::</span><span class="no">Config</span><span class="o">::</span><span class="no">RELAXED</span><span class="p">)</span>
-<span class="c1"># =&gt; '&lt;b&gt;&lt;a href="http://foo.com/"&gt;foo&lt;/a&gt;&lt;/b&gt;&lt;img src="bar.jpg"&gt;'</span>
-</pre></div>
-
-<h3>
-<a name="user-content-custom-configuration" class="anchor" href="#custom-configuration"><span class="octicon octicon-link"></span></a>Custom Configuration</h3>
-
-<p>If the built-in modes don't meet your needs, you can easily specify a custom
-configuration:</p>
-
-<div class="highlight highlight-ruby"><pre><span class="no">Sanitize</span><span class="o">.</span><span class="n">fragment</span><span class="p">(</span><span class="n">html</span><span class="p">,</span>
- <span class="ss">:elements</span> <span class="o">=&gt;</span> <span class="o">[</span><span class="s1">'a'</span><span class="p">,</span> <span class="s1">'span'</span><span class="o">]</span><span class="p">,</span>
-
- <span class="ss">:attributes</span> <span class="o">=&gt;</span> <span class="p">{</span>
- <span class="s1">'a'</span> <span class="o">=&gt;</span> <span class="o">[</span><span class="s1">'href'</span><span class="p">,</span> <span class="s1">'title'</span><span class="o">]</span><span class="p">,</span>
- <span class="s1">'span'</span> <span class="o">=&gt;</span> <span class="o">[</span><span class="s1">'class'</span><span class="o">]</span>
- <span class="p">},</span>
-
- <span class="ss">:protocols</span> <span class="o">=&gt;</span> <span class="p">{</span>
- <span class="s1">'a'</span> <span class="o">=&gt;</span> <span class="p">{</span><span class="s1">'href'</span> <span class="o">=&gt;</span> <span class="o">[</span><span class="s1">'http'</span><span class="p">,</span> <span class="s1">'https'</span><span class="p">,</span> <span class="s1">'mailto'</span><span class="o">]</span><span class="p">}</span>
- <span class="p">}</span>
-<span class="p">)</span>
-</pre></div>
-
-<p>You can also start with one of Sanitize's built-in configurations and then
-customize it to meet your needs.</p>
-
-<p>The built-in configs are deeply frozen to prevent people from modifying them
-(either accidentally or maliciously). To customize a built-in config, create a
-new copy using <code>Sanitize::Config.merge()</code>, like so:</p>
-
-<div class="highlight highlight-ruby"><pre><span class="c1"># Create a customized copy of the Basic config, adding &lt;div&gt; and &lt;table&gt; to the</span>
-<span class="c1"># existing whitelisted elements.</span>
-<span class="no">Sanitize</span><span class="o">.</span><span class="n">fragment</span><span class="p">(</span><span class="n">html</span><span class="p">,</span> <span class="no">Sanitize</span><span class="o">::</span><span class="no">Config</span><span class="o">.</span><span class="n">merge</span><span class="p">(</span><span class="no">Sanitize</span><span class="o">::</span><span class="no">Config</span><span class="o">::</span><span class="no">BASIC</span><span class="p">,</span>
- <span class="ss">:elements</span> <span class="o">=&gt;</span> <span class="no">Sanitize</span><span class="o">::</span><span class="no">Config</span><span class="o">::</span><span class="no">BASIC</span><span class="o">[</span><span class="ss">:elements</span><span class="o">]</span> <span class="o">+</span> <span class="o">[</span><span class="s1">'div'</span><span class="p">,</span> <span class="s1">'table'</span><span class="o">]</span><span class="p">,</span>
- <span class="ss">:remove_contents</span> <span class="o">=&gt;</span> <span class="kp">true</span>
-<span class="p">))</span>
-</pre></div>
-
-<p>The example above adds the <code>&lt;div&gt;</code> and <code>&lt;table&gt;</code> elements to a copy of the
-existing list of elements in <code>Sanitize::Config::BASIC</code>. If you instead want to
-completely overwrite the elements array with your own, you can omit the <code>+</code>
-operation:</p>
-
-<div class="highlight highlight-ruby"><pre><span class="c1"># Overwrite :elements instead of creating a copy with new entries.</span>
-<span class="no">Sanitize</span><span class="o">.</span><span class="n">fragment</span><span class="p">(</span><span class="n">html</span><span class="p">,</span> <span class="no">Sanitize</span><span class="o">::</span><span class="no">Config</span><span class="o">.</span><span class="n">merge</span><span class="p">(</span><span class="no">Sanitize</span><span class="o">::</span><span class="no">Config</span><span class="o">::</span><span class="no">BASIC</span><span class="p">,</span>
- <span class="ss">:elements</span> <span class="o">=&gt;</span> <span class="o">[</span><span class="s1">'div'</span><span class="p">,</span> <span class="s1">'table'</span><span class="o">]</span><span class="p">,</span>
- <span class="ss">:remove_contents</span> <span class="o">=&gt;</span> <span class="kp">true</span>
-<span class="p">))</span>
-</pre></div>
-
-<h3>
-<a name="user-content-config-settings" class="anchor" href="#config-settings"><span class="octicon octicon-link"></span></a>Config Settings</h3>
-
-<h4>
-<a name="user-content-add_attributes-hash" class="anchor" href="#add_attributes-hash"><span class="octicon octicon-link"></span></a>:add_attributes (Hash)</h4>
-
-<p>Attributes to add to specific elements. If the attribute already exists, it will
-be replaced with the value specified here. Specify all element names and
-attributes in lowercase.</p>
-
-<div class="highlight highlight-ruby"><pre><span class="ss">:add_attributes</span> <span class="o">=&gt;</span> <span class="p">{</span>
- <span class="s1">'a'</span> <span class="o">=&gt;</span> <span class="p">{</span><span class="s1">'rel'</span> <span class="o">=&gt;</span> <span class="s1">'nofollow'</span><span class="p">}</span>
-<span class="p">}</span>
-</pre></div>
-
-<h4>
-<a name="user-content-allow_comments-boolean" class="anchor" href="#allow_comments-boolean"><span class="octicon octicon-link"></span></a>:allow_comments (boolean)</h4>
-
-<p>Whether or not to allow HTML comments. Allowing comments is strongly
-discouraged, since IE allows script execution within conditional comments. The
-default value is <code>false</code>.</p>
-
-<h4>
-<a name="user-content-allow_doctype-boolean" class="anchor" href="#allow_doctype-boolean"><span class="octicon octicon-link"></span></a>:allow_doctype (boolean)</h4>
-
-<p>Whether or not to allow well-formed HTML doctype declarations such as "&lt;!DOCTYPE
-html&gt;" when sanitizing a document. This setting is ignored when sanitizing
-fragments. The default value is <code>false</code>.</p>
-
-<h4>
-<a name="user-content-attributes-hash" class="anchor" href="#attributes-hash"><span class="octicon octicon-link"></span></a>:attributes (Hash)</h4>
-
-<p>Attributes to allow on specific elements. Specify all element names and
-attributes in lowercase.</p>
-
-<div class="highlight highlight-ruby"><pre><span class="ss">:attributes</span> <span class="o">=&gt;</span> <span class="p">{</span>
- <span class="s1">'a'</span> <span class="o">=&gt;</span> <span class="o">[</span><span class="s1">'href'</span><span class="p">,</span> <span class="s1">'title'</span><span class="o">]</span><span class="p">,</span>
- <span class="s1">'blockquote'</span> <span class="o">=&gt;</span> <span class="o">[</span><span class="s1">'cite'</span><span class="o">]</span><span class="p">,</span>
- <span class="s1">'img'</span> <span class="o">=&gt;</span> <span class="o">[</span><span class="s1">'alt'</span><span class="p">,</span> <span class="s1">'src'</span><span class="p">,</span> <span class="s1">'title'</span><span class="o">]</span>
-<span class="p">}</span>
-</pre></div>
-
-<p>If you'd like to allow certain attributes on all elements, use the symbol <code>:all</code>
-instead of an element name.</p>
-
-<div class="highlight highlight-ruby"><pre><span class="c1"># Allow the class attribute on all elements.</span>
-<span class="ss">:attributes</span> <span class="o">=&gt;</span> <span class="p">{</span>
- <span class="ss">:all</span> <span class="o">=&gt;</span> <span class="o">[</span><span class="s1">'class'</span><span class="o">]</span><span class="p">,</span>
- <span class="s1">'a'</span> <span class="o">=&gt;</span> <span class="o">[</span><span class="s1">'href'</span><span class="p">,</span> <span class="s1">'title'</span><span class="o">]</span>
-<span class="p">}</span>
-</pre></div>
-
-<p>To allow arbitrary HTML5 <code>data-*</code> attributes, use the symbol <code>:data</code> in place of
-an attribute name.</p>
-
-<div class="highlight highlight-ruby"><pre><span class="c1"># Allow arbitrary HTML5 data-* attributes on &lt;div&gt; elements.</span>
-<span class="ss">:attributes</span> <span class="o">=&gt;</span> <span class="p">{</span>
- <span class="s1">'div'</span> <span class="o">=&gt;</span> <span class="o">[</span><span class="ss">:data</span><span class="o">]</span>
-<span class="p">}</span>
-</pre></div>
-
-<h4>
-<a name="user-content-elements-array" class="anchor" href="#elements-array"><span class="octicon octicon-link"></span></a>:elements (Array)</h4>
-
-<p>Array of HTML element names to allow. Specify all names in lowercase. Any
-elements not in this array will be removed.</p>
-
-<div class="highlight highlight-ruby"><pre><span class="ss">:elements</span> <span class="o">=&gt;</span> <span class="sx">%w[</span>
-<span class="sx"> a abbr b blockquote br cite code dd dfn dl dt em i kbd li mark ol p pre</span>
-<span class="sx"> q s samp small strike strong sub sup time u ul var</span>
-<span class="sx">]</span>
-</pre></div>
-
-<h4>
-<a name="user-content-protocols-hash" class="anchor" href="#protocols-hash"><span class="octicon octicon-link"></span></a>:protocols (Hash)</h4>
-
-<p>URL protocols to allow in specific attributes. If an attribute is listed here
-and contains a protocol other than those specified (or if it contains no
-protocol at all), it will be removed.</p>
-
-<div class="highlight highlight-ruby"><pre><span class="ss">:protocols</span> <span class="o">=&gt;</span> <span class="p">{</span>
- <span class="s1">'a'</span> <span class="o">=&gt;</span> <span class="p">{</span><span class="s1">'href'</span> <span class="o">=&gt;</span> <span class="o">[</span><span class="s1">'ftp'</span><span class="p">,</span> <span class="s1">'http'</span><span class="p">,</span> <span class="s1">'https'</span><span class="p">,</span> <span class="s1">'mailto'</span><span class="o">]</span><span class="p">},</span>
- <span class="s1">'img'</span> <span class="o">=&gt;</span> <span class="p">{</span><span class="s1">'src'</span> <span class="o">=&gt;</span> <span class="o">[</span><span class="s1">'http'</span><span class="p">,</span> <span class="s1">'https'</span><span class="o">]</span><span class="p">}</span>
-<span class="p">}</span>
-</pre></div>
-
-<p>If you'd like to allow the use of relative URLs which don't have a protocol,
-include the symbol <code>:relative</code> in the protocol array:</p>
-
-<div class="highlight highlight-ruby"><pre><span class="ss">:protocols</span> <span class="o">=&gt;</span> <span class="p">{</span>
- <span class="s1">'a'</span> <span class="o">=&gt;</span> <span class="p">{</span><span class="s1">'href'</span> <span class="o">=&gt;</span> <span class="o">[</span><span class="s1">'http'</span><span class="p">,</span> <span class="s1">'https'</span><span class="p">,</span> <span class="ss">:relative</span><span class="o">]</span><span class="p">}</span>
-<span class="p">}</span>
-</pre></div>
-
-<h4>
-<a name="user-content-remove_contents-boolean-or-array" class="anchor" href="#remove_contents-boolean-or-array"><span class="octicon octicon-link"></span></a>:remove_contents (boolean or Array)</h4>
-
-<p>If set to <code>true</code>, Sanitize will remove the contents of any non-whitelisted
-elements in addition to the elements themselves. By default, Sanitize leaves the
-safe parts of an element's contents behind when the element is removed.</p>
-
-<p>If set to an array of element names, then only the contents of the specified
-elements (when filtered) will be removed, and the contents of all other filtered
-elements will be left behind.</p>
-
-<p>The default value is <code>false</code>.</p>
-
-<h4>
-<a name="user-content-transformers" class="anchor" href="#transformers"><span class="octicon octicon-link"></span></a>:transformers</h4>
-
-<p>Custom transformer or array of custom transformers. See the Transformers section
-below for details.</p>
-
-<h4>
-<a name="user-content-whitespace_elements-hash" class="anchor" href="#whitespace_elements-hash"><span class="octicon octicon-link"></span></a>:whitespace_elements (Hash)</h4>
-
-<p>Hash of element names which, when removed, should have their contents surrounded
-by whitespace to preserve readability.</p>
-
-<p>Each element name is a key pointing to another Hash, which provides the specific
-whitespace that should be inserted <code>:before</code> and <code>:after</code> the removed element's
-position. The <code>:after</code> value will only be inserted if the removed element has
-children, in which case it will be inserted after those children.</p>
-
-<div class="highlight highlight-ruby"><pre><span class="ss">:whitespace_elements</span> <span class="o">=&gt;</span> <span class="p">{</span>
- <span class="s1">'br'</span> <span class="o">=&gt;</span> <span class="p">{</span> <span class="ss">:before</span> <span class="o">=&gt;</span> <span class="s2">"</span><span class="se">\n</span><span class="s2">"</span><span class="p">,</span> <span class="ss">:after</span> <span class="o">=&gt;</span> <span class="s2">""</span> <span class="p">},</span>
- <span class="s1">'div'</span> <span class="o">=&gt;</span> <span class="p">{</span> <span class="ss">:before</span> <span class="o">=&gt;</span> <span class="s2">"</span><span class="se">\n</span><span class="s2">"</span><span class="p">,</span> <span class="ss">:after</span> <span class="o">=&gt;</span> <span class="s2">"</span><span class="se">\n</span><span class="s2">"</span> <span class="p">},</span>
- <span class="s1">'p'</span> <span class="o">=&gt;</span> <span class="p">{</span> <span class="ss">:before</span> <span class="o">=&gt;</span> <span class="s2">"</span><span class="se">\n</span><span class="s2">"</span><span class="p">,</span> <span class="ss">:after</span> <span class="o">=&gt;</span> <span class="s2">"</span><span class="se">\n</span><span class="s2">"</span> <span class="p">}</span>
-<span class="p">}</span>
-</pre></div>
-
-<h2>
-<a name="user-content-transformers-1" class="anchor" href="#transformers-1"><span class="octicon octicon-link"></span></a>Transformers</h2>
-
-<p>Transformers allow you to filter and modify nodes using your own custom logic,
-on top of (or instead of) Sanitize's core filter. A transformer is any object
-that responds to <code>call()</code> (such as a lambda or proc).</p>
-
-<p>To use one or more transformers, pass them to the <code>:transformers</code> config
-setting. You may pass a single transformer or an array of transformers.</p>
-
-<div class="highlight highlight-ruby"><pre><span class="no">Sanitize</span><span class="o">.</span><span class="n">fragment</span><span class="p">(</span><span class="n">html</span><span class="p">,</span> <span class="ss">:transformers</span> <span class="o">=&gt;</span> <span class="o">[</span>
- <span class="n">transformer_one</span><span class="p">,</span>
- <span class="n">transformer_two</span>
-<span class="o">]</span><span class="p">)</span>
-</pre></div>
-
-<h3>
-<a name="user-content-input" class="anchor" href="#input"><span class="octicon octicon-link"></span></a>Input</h3>
-
-<p>Each transformer's <code>call()</code> method will be called once for each node in the HTML
-(including elements, text nodes, comments, etc.), and will receive as an
-argument a Hash that contains the following items:</p>
-
-<ul class="task-list">
-<li><p><strong>:config</strong> - The current Sanitize configuration Hash.</p></li>
-<li><p><strong>:is_whitelisted</strong> - <code>true</code> if the current node has been whitelisted by a
-previous transformer, <code>false</code> otherwise. It's generally bad form to remove
-a node that a previous transformer has whitelisted.</p></li>
-<li><p><strong>:node</strong> - A <code>Nokogiri::XML::Node</code> object representing an HTML node. The
-node may be an element, a text node, a comment, a CDATA node, or a document
-fragment. Use Nokogiri's inspection methods (<code>element?</code>, <code>text?</code>, etc.) to
-selectively ignore node types you aren't interested in.</p></li>
-<li><p><strong>:node_name</strong> - The name of the current HTML node, always lowercase (e.g.
-"div" or "span"). For non-element nodes, the name will be something like
-"text", "comment", "#cdata-section", "#document-fragment", etc.</p></li>
-<li><p><strong>:node_whitelist</strong> - Set of <code>Nokogiri::XML::Node</code> objects in the current
-document that have been whitelisted by previous transformers, if any. It's
-generally bad form to remove a node that a previous transformer has
-whitelisted.</p></li>
-</ul><h3>
-<a name="user-content-output" class="anchor" href="#output"><span class="octicon octicon-link"></span></a>Output</h3>
-
-<p>A transformer doesn't have to return anything, but may optionally return a Hash,
-which may contain the following items:</p>
-
-<ul class="task-list">
-<li>
-<strong>:node_whitelist</strong> - Array or Set of specific Nokogiri::XML::Node objects
-to add to the document's whitelist, bypassing the current Sanitize config.
-These specific nodes and all their attributes will be whitelisted, but
-their children will not be.</li>
-</ul><p>If a transformer returns anything other than a Hash, the return value will be
-ignored.</p>
-
-<h3>
-<a name="user-content-processing" class="anchor" href="#processing"><span class="octicon octicon-link"></span></a>Processing</h3>
-
-<p>Each transformer has full access to the <code>Nokogiri::XML::Node</code> that's passed into
-it and to the rest of the document via the node's <code>document()</code> method. Any
-changes made to the current node or to the document will be reflected instantly
-in the document and passed on to subsequently called transformers and to
-Sanitize itself. A transformer may even call Sanitize internally to perform
-custom sanitization if needed.</p>
-
-<p>Nodes are passed into transformers in the order in which they're traversed.
-Sanitize performs top-down traversal, meaning that nodes are traversed in the
-same order you'd read them in the HTML, starting at the top node, then its first
-child, and so on.</p>
-
-<div class="highlight highlight-ruby"><pre><span class="n">html</span> <span class="o">=</span> <span class="sx">%[</span>
-<span class="sx"> &lt;header&gt;</span>
-<span class="sx"> &lt;span&gt;</span>
-<span class="sx"> &lt;strong&gt;foo&lt;/strong&gt;</span>
-<span class="sx"> &lt;/span&gt;</span>
-<span class="sx"> &lt;p&gt;bar&lt;/p&gt;</span>
-<span class="sx"> &lt;/header&gt;</span>
-
-<span class="sx"> &lt;footer&gt;&lt;/footer&gt;</span>
-<span class="sx">]</span>
-
-<span class="n">transformer</span> <span class="o">=</span> <span class="nb">lambda</span> <span class="k">do</span> <span class="o">|</span><span class="n">env</span><span class="o">|</span>
- <span class="nb">puts</span> <span class="n">env</span><span class="o">[</span><span class="ss">:node_name</span><span class="o">]</span> <span class="k">if</span> <span class="n">env</span><span class="o">[</span><span class="ss">:node</span><span class="o">].</span><span class="n">element?</span>
-<span class="k">end</span>
-
-<span class="c1"># Prints "header", "span", "strong", "p", "footer".</span>
-<span class="no">Sanitize</span><span class="o">.</span><span class="n">fragment</span><span class="p">(</span><span class="n">html</span><span class="p">,</span> <span class="ss">:transformers</span> <span class="o">=&gt;</span> <span class="n">transformer</span><span class="p">)</span>
-</pre></div>
-
-<p>Transformers have a tremendous amount of power, including the power to
-completely bypass Sanitize's built-in filtering. Be careful! Your safety is in
-your own hands.</p>
-
-<h3>
-<a name="user-content-example-transformer-to-whitelist-youtube-video-embeds" class="anchor" href="#example-transformer-to-whitelist-youtube-video-embeds"><span class="octicon octicon-link"></span></a>Example: Transformer to whitelist YouTube video embeds</h3>
-
-<p>The following example demonstrates how to create a transformer that will safely
-whitelist valid YouTube video embeds without having to blindly allow other kinds
-of embedded content, which would be the case if you tried to do this by just
-whitelisting all <code>&lt;iframe&gt;</code> elements:</p>
-
-<div class="highlight highlight-ruby"><pre><span class="n">youtube_transformer</span> <span class="o">=</span> <span class="nb">lambda</span> <span class="k">do</span> <span class="o">|</span><span class="n">env</span><span class="o">|</span>
- <span class="n">node</span> <span class="o">=</span> <span class="n">env</span><span class="o">[</span><span class="ss">:node</span><span class="o">]</span>
- <span class="n">node_name</span> <span class="o">=</span> <span class="n">env</span><span class="o">[</span><span class="ss">:node_name</span><span class="o">]</span>
-
- <span class="c1"># Don't continue if this node is already whitelisted or is not an element.</span>
- <span class="k">return</span> <span class="k">if</span> <span class="n">env</span><span class="o">[</span><span class="ss">:is_whitelisted</span><span class="o">]</span> <span class="o">||</span> <span class="o">!</span><span class="n">node</span><span class="o">.</span><span class="n">element?</span>
-
- <span class="c1"># Don't continue unless the node is an iframe.</span>
- <span class="k">return</span> <span class="k">unless</span> <span class="n">node_name</span> <span class="o">==</span> <span class="s1">'iframe'</span>
-
- <span class="c1"># Verify that the video URL is actually a valid YouTube video URL.</span>
- <span class="k">return</span> <span class="k">unless</span> <span class="n">node</span><span class="o">[</span><span class="s1">'src'</span><span class="o">]</span> <span class="o">=~</span> <span class="sr">%r|\A(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/|</span>
-
- <span class="c1"># We're now certain that this is a YouTube embed, but we still need to run</span>
- <span class="c1"># it through a special Sanitize step to ensure that no unwanted elements or</span>
- <span class="c1"># attributes that don't belong in a YouTube embed can sneak in.</span>
- <span class="no">Sanitize</span><span class="o">.</span><span class="n">node!</span><span class="p">(</span><span class="n">node</span><span class="p">,</span> <span class="p">{</span>
- <span class="ss">:elements</span> <span class="o">=&gt;</span> <span class="sx">%w[iframe]</span><span class="p">,</span>
-
- <span class="ss">:attributes</span> <span class="o">=&gt;</span> <span class="p">{</span>
- <span class="s1">'iframe'</span> <span class="o">=&gt;</span> <span class="sx">%w[allowfullscreen frameborder height src width]</span>
- <span class="p">}</span>
- <span class="p">})</span>
-
- <span class="c1"># Now that we're sure that this is a valid YouTube embed and that there are</span>
- <span class="c1"># no unwanted elements or attributes hidden inside it, we can tell Sanitize</span>
- <span class="c1"># to whitelist the current node.</span>
- <span class="p">{</span><span class="ss">:node_whitelist</span> <span class="o">=&gt;</span> <span class="o">[</span><span class="n">node</span><span class="o">]</span><span class="p">}</span>
-<span class="k">end</span>
-
-<span class="n">html</span> <span class="o">=</span> <span class="sx">%[</span>
-<span class="sx">&lt;iframe width="420" height="315" src="//www.youtube.com/embed/dQw4w9WgXcQ"</span>
-<span class="sx"> frameborder="0" allowfullscreen&gt;&lt;/iframe&gt;</span>
-<span class="sx">]</span>
-
-<span class="no">Sanitize</span><span class="o">.</span><span class="n">fragment</span><span class="p">(</span><span class="n">html</span><span class="p">,</span> <span class="ss">:transformers</span> <span class="o">=&gt;</span> <span class="n">youtube_transformer</span><span class="p">)</span>
-<span class="c1"># =&gt; '&lt;iframe width="420" height="315" src="//www.youtube.com/embed/dQw4w9WgXcQ" frameborder="0" allowfullscreen=""&gt;&lt;/iframe&gt;'</span>
-</pre></div>
-
-<h2>
-<a name="user-content-license" class="anchor" href="#license"><span class="octicon octicon-link"></span></a>License</h2>
-
-<p>Copyright (c) 2014 Ryan Grove (<a href="mailto:ryan@wonko.com">ryan@wonko.com</a>)</p>
-
-<p>Permission is hereby granted, free of charge, to any person obtaining a copy of
-this software and associated documentation files (the 'Software'), to deal in
-the Software without restriction, including without limitation the rights to
-use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
-the Software, and to permit persons to whom the Software is furnished to do so,
-subject to the following conditions:</p>
-
-<p>The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.</p>
-
-<p>THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
-FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
-COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
-IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.</p></article>
- </div>
-
-
- </div>
-
- </div><!-- /.repo-container -->
- <div class="modal-backdrop"></div>
- </div><!-- /.container -->
- </div><!-- /.site -->
-
-
- </div><!-- /.wrapper -->
-
- <div class="container">
- <div class="site-footer">
- <ul class="site-footer-links right">
- <li><a href="https://status.github.com/">Status</a></li>
- <li><a href="http://developer.github.com">API</a></li>
- <li><a href="http://training.github.com">Training</a></li>
- <li><a href="http://shop.github.com">Shop</a></li>
- <li><a href="/blog">Blog</a></li>
- <li><a href="/about">About</a></li>
-
- </ul>
-
- <a href="/">
- <span class="mega-octicon octicon-mark-github" title="GitHub"></span>
- </a>
-
- <ul class="site-footer-links">
- <li>&copy; 2014 <span title="0.08040s from github-fe124-cp1-prd.iad.github.net">GitHub</span>, Inc.</li>
- <li><a href="/site/terms">Terms</a></li>
- <li><a href="/site/privacy">Privacy</a></li>
- <li><a href="/security">Security</a></li>
- <li><a href="/contact">Contact</a></li>
- </ul>
- </div><!-- /.site-footer -->
-</div><!-- /.container -->
-
-
- <div class="fullscreen-overlay js-fullscreen-overlay" id="fullscreen_overlay">
- <div class="fullscreen-container js-fullscreen-container">
- <div class="textarea-wrap">
- <textarea name="fullscreen-contents" id="fullscreen-contents" class="fullscreen-contents js-fullscreen-contents" placeholder="" data-suggester="fullscreen_suggester"></textarea>
- </div>
- </div>
- <div class="fullscreen-sidebar">
- <a href="#" class="exit-fullscreen js-exit-fullscreen tooltipped tooltipped-w" aria-label="Exit Zen Mode">
- <span class="mega-octicon octicon-screen-normal"></span>
- </a>
- <a href="#" class="theme-switcher js-theme-switcher tooltipped tooltipped-w"
- aria-label="Switch themes">
- <span class="octicon octicon-color-mode"></span>
- </a>
- </div>
-</div>
-
-
-
- <div id="ajax-error-message" class="flash flash-error">
- <span class="octicon octicon-alert"></span>
- <a href="#" class="octicon octicon-remove-close close js-ajax-error-dismiss"></a>
- Something went wrong with that request. Please try again.
- </div>
-
-
- <script crossorigin="anonymous" src="https://assets-cdn.github.com/assets/frameworks-9027ad6a9d00434697fea4d0143670c6fb7b2471.js" type="text/javascript"></script>
- <script async="async" crossorigin="anonymous" src="https://assets-cdn.github.com/assets/github-cb8ceb101dbfeeab8bc4a2ee07ea2e5bdd668289.js" type="text/javascript"></script>
-
-
- </body>
-</html>
diff --git a/bench_fixtures/document-small.html.license b/bench_fixtures/document-small.html.license
new file mode 100644
index 0000000..e9b77c3
--- /dev/null
+++ b/bench_fixtures/document-small.html.license
@@ -0,0 +1,2 @@
+SPDX-FileCopyrightText: Copyright © 2008-2012 Aaron Patterson <http://tenderlovemaking.com>, Mike Dalessio <http://mike.daless.io>, Charles Nutter <http://blog.headius.com>, Sergio Arbeo <http://www.serabe.com>, Patrick Mahoney <http://polycrystal.org>, Yoko Harada <http://yokolet.blogspot.com>
+SPDX-License-Identifier: MIT
diff --git a/bench_fixtures/fragment-large.html.license b/bench_fixtures/fragment-large.html.license
new file mode 100644
index 0000000..f9d82b5
--- /dev/null
+++ b/bench_fixtures/fragment-large.html.license
@@ -0,0 +1,2 @@
+Copyright (c) 2014 Ryan Grove <ryan@wonko.com>
+SPDX-License-Identifier: MIT
diff --git a/bench_fixtures/fragment-small.html.license b/bench_fixtures/fragment-small.html.license
new file mode 100644
index 0000000..f9d82b5
--- /dev/null
+++ b/bench_fixtures/fragment-small.html.license
@@ -0,0 +1,2 @@
+Copyright (c) 2014 Ryan Grove <ryan@wonko.com>
+SPDX-License-Identifier: MIT
diff --git a/c_src/fasthtml_worker.c b/c_src/fasthtml_worker.c
index 566672e..0030b2b 100644
--- a/c_src/fasthtml_worker.c
+++ b/c_src/fasthtml_worker.c
@@ -1,534 +1,537 @@
+// SPDX-FileCopyrightText: 2019-2022 Pleroma Authors <https://pleroma.social>
+// SPDX-License-Identifier: LGPL-2.1-only
+
#include <stdlib.h>
#include <stdbool.h>
#include <stdio.h>
#include <string.h>
#include <stdarg.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <errno.h>
#include <ctype.h>
#ifndef _REENTRANT
#define _REENTRANT /* For some reason __erl_errno is undefined unless _REENTRANT is defined */
#endif
#include "ei.h"
#ifndef OTP_22_OR_NEWER
# include "erl_interface.h"
#endif
#define HEADER_SIZE 4
#include <lexbor/html/html.h>
#include "tstack.h"
#ifdef __GNUC__
# define AFP(x, y) __attribute__((format (printf, x, y)))
#else
# define AFP(x, y)
#endif
#ifdef __GNUC__
# define NORETURN __attribute__((noreturn))
#else
# define NORETURN
#endif
typedef struct _state_t {
ei_x_buff buffer;
} state_t;
typedef enum parse_flags_e {
FLAG_HTML_ATOMS = 1 << 0,
FLAG_NIL_SELF_CLOSING = 1 << 1,
FLAG_COMMENT_TUPLE3 = 1 << 2
} parse_flags_t;
char* read_packet(int *len);
static void handle_send(state_t * state);
static void err_term(ei_x_buff * response, const char * error_atom);
static parse_flags_t decode_parse_flags(state_t * state, int arity);
static void decode(state_t * state, ei_x_buff * response, lxb_html_document_t *document, bool fragment, lxb_dom_element_t *context_element, lxb_char_t * bin_data, size_t bin_size, parse_flags_t parse_flags);
static void build_tree(ei_x_buff * response, lxb_dom_node_t* tree, parse_flags_t parse_flags);
static void prepare_node_attrs(ei_x_buff * response, lxb_dom_node_t* node);
static inline char * lowercase(char * c);
static void panic(const char *fmt, ...) AFP(1, 2);
static void panic(const char *fmt, ...) {
char buf[4096];
va_list va;
va_start (va, fmt);
vsnprintf (buf, sizeof buf, fmt, va);
va_end (va);
fprintf (stderr, "fast_html worker: error: %s\n", buf);
exit (EXIT_FAILURE);
}
int main(int argc, const char *argv[]) {
state_t* state = calloc (1, sizeof(state_t));
#ifdef OTP_22_OR_NEWER
// initialize erlang client library
ei_init ();
#else
erl_init (NULL, -1);
#endif
ei_x_new (&state->buffer);
fflush (stdout);
while (true) {
int len;
char* buf = read_packet(&len);
ei_x_free(&state->buffer);
state->buffer.index = 0;
state->buffer.buff = buf;
state->buffer.buffsz = len;
handle_send (state);
}
// shutdown: free all state
ei_x_free (&state->buffer);
free (state);
return EXIT_SUCCESS;
}
/*
* Reads a packet from Erlang. The packet must be a standard {packet, 2}
* packet. This function aborts if any error is detected (including EOF).
*
* Returns: The number of bytes in the packet.
*/
char *read_packet(int *len)
{
char* io_buf = NULL; /* Buffer for file i/o. */
unsigned char header[HEADER_SIZE];
uint32_t packet_length; /* Length of current packet. */
uint32_t bytes_read;
uint32_t total_bytes_read;
/*
* Read the packet header.
*/
total_bytes_read = read(STDIN_FILENO, header, HEADER_SIZE);
if (total_bytes_read == 0) {
exit(0);
}
if (total_bytes_read != HEADER_SIZE) {
panic("Failed to read packet header, read: %d\n", total_bytes_read);
}
/*
* Get the length of this packet.
*/
packet_length = 0;
for (int i = 0; i < HEADER_SIZE; i++)
packet_length = (packet_length << 8) | header[i];
*len=packet_length;
if ((io_buf = (char *) malloc(packet_length)) == NULL) {
panic("insufficient memory for i/o buffer of size %d\n", packet_length);
}
/*
* Read the packet itself.
*/
total_bytes_read = 0;
while((bytes_read = read(STDIN_FILENO, (io_buf + total_bytes_read), (packet_length - total_bytes_read))))
total_bytes_read += bytes_read;
if (total_bytes_read != packet_length) {
free(io_buf);
panic("couldn't read packet of length %d, read: %d\r\n",
packet_length, total_bytes_read);
}
return io_buf;
}
// handle ERL_SEND message type.
// we expect a tuple with arity of 3 or 4 in state->buffer.
// we expect the first argument to be an atom (`decode` or `decode_fragment`),
// the second argument to be the HTML payload, and the
// third argument to be the argument list.
// In case of `decode_fragment`, the fourth argument should be
// the context tag name.
// any other message: respond with an {error, unknown_call} tuple.
static void handle_send (state_t * state)
{
// response holds our response, prepare it
ei_x_buff response;
ei_x_new (&response);
// check the protocol version, if it's unsupported, panic
int version;
if (ei_decode_version (state->buffer.buff, &state->buffer.index, &version) < 0)
panic ("malformed message - bad version (%d).", version);
// decode the tuple header
int arity;
if (ei_decode_tuple_header (state->buffer.buff, &state->buffer.index, &arity) < 0)
{
err_term (&response, "badmatch");
goto out;
}
char atom[MAXATOMLEN];
if (ei_decode_atom (state->buffer.buff, &state->buffer.index, atom) < 0)
{
err_term (&response, "badmatch");
goto out;
}
bool fragment = false;
if (strcmp (atom, "decode"))
{
if (strcmp (atom, "decode_fragment")) {
err_term (&response, "unknown_call");
goto out;
} else if (arity != 4) {
err_term (&response, "badmatch");
goto out;
} else {
fragment = true;
}
} else if (arity != 3) {
err_term (&response, "badmatch");
goto out;
}
// the next argument should be a binary, allocate it dynamically.
int bin_type, bin_size;
if (ei_get_type (state->buffer.buff, &state->buffer.index, &bin_type, &bin_size) < 0)
panic ("failed to decode binary size in message");
// verify the type
if (bin_type != ERL_BINARY_EXT)
{
err_term (&response, "badmatch");
goto out;
}
// decode the binary
char * bin_data = calloc (1, bin_size + 1);
if (ei_decode_binary (state->buffer.buff, &state->buffer.index, bin_data, NULL) < 0)
panic ("failed to decode binary in message");
// next should be the options list
if (ei_decode_list_header (state->buffer.buff, &state->buffer.index, &arity) < 0)
panic ("failed to decode options list header in message");
parse_flags_t parse_flags = decode_parse_flags (state, arity);
// Lists with items always have an empty list as their tail
if (arity != 0)
if (ei_decode_list_header (state->buffer.buff, &state->buffer.index, &arity) < 0)
panic ("failed to decode empty list header after option list in message");
lxb_html_document_t *document = lxb_html_document_create();
lxb_dom_element_t *context_element = NULL;
// if we are parsing a fragment, context tag name should come next
if (fragment) {
int context_bin_type, context_bin_size;
if (ei_get_type (state->buffer.buff, &state->buffer.index, &context_bin_type, &context_bin_size) < 0)
panic ("failed to decode binary size in message");
// verify the type
if (context_bin_type != ERL_BINARY_EXT)
{
err_term (&response, "badmatch");
goto out;
}
// decode the binary
char* context_bin_data = calloc (1, context_bin_size + 1);
if (ei_decode_binary (state->buffer.buff, &state->buffer.index, context_bin_data, NULL) < 0)
panic ("failed to decode context binary in message");
context_element = lxb_dom_document_create_element(&document->dom_document, (lxb_char_t*) context_bin_data, context_bin_size, NULL);
free (context_bin_data);
}
if (context_element && lxb_dom_element_tag_id(context_element) >= LXB_TAG__LAST_ENTRY) {
err_term (&response, "unknown_context_tag");
} else {
decode (state, &response, document, fragment, context_element, (lxb_char_t *) bin_data, bin_size, parse_flags);
}
lxb_html_document_destroy(document);
free (bin_data);
out: ;
// send response
unsigned char header[HEADER_SIZE];
uint32_t size = (uint32_t) response.index;
for (int i = HEADER_SIZE-1; i != -1; i--) {
header[i] = (unsigned char) size & 0xFF;
size = size >> 8;
}
write(STDOUT_FILENO, header, sizeof(header));
write(STDOUT_FILENO, response.buff, response.index);
// free response
ei_x_free (&response);
return;
}
static void err_term (ei_x_buff * response, const char * error_atom)
{
response->index = 0;
ei_x_encode_version (response);
ei_x_encode_tuple_header (response, 2);
ei_x_encode_atom (response, "error");
ei_x_encode_atom (response, error_atom);
}
static parse_flags_t decode_parse_flags (state_t * state, int arity)
{
parse_flags_t parse_flags = 0;
for (int i = 0; i < arity; i++)
{
char atom[MAXATOMLEN];
if (ei_decode_atom (state->buffer.buff, &state->buffer.index, atom) < 0)
continue;
if (! strcmp ("html_atoms", atom))
parse_flags |= FLAG_HTML_ATOMS;
else if (! strcmp ("nil_self_closing", atom))
parse_flags |= FLAG_NIL_SELF_CLOSING;
else if (! strcmp ("comment_tuple3", atom))
parse_flags |= FLAG_COMMENT_TUPLE3;
}
return parse_flags;
}
static void decode(state_t * state, ei_x_buff * response, lxb_html_document_t *document, bool fragment, lxb_dom_element_t *context_element, lxb_char_t * bin_data, size_t bin_size, parse_flags_t parse_flags)
{
// parse tree
lxb_status_t status;
lxb_dom_node_t *node;
if (fragment) {
node = lxb_html_document_parse_fragment(document, context_element, bin_data, bin_size);
status = (node == NULL)? LXB_STATUS_ERROR : LXB_STATUS_OK;
} else {
status = lxb_html_document_parse(document, bin_data, bin_size);
node = lxb_dom_interface_node(document);
}
if (status != LXB_STATUS_OK)
{
err_term (response, "parse_failed");
return;
}
// build tree
build_tree (response, node, parse_flags);
}
// a tag is sent as a tuple:
// - a string or atom for the tag name
// - an attribute list
// - a children list
// in this function, we prepare the atom and complete attribute list
static void prepare_tag_header (ei_x_buff * response, const char * tag_string, lxb_dom_node_t* node, parse_flags_t parse_flags)
{
lxb_tag_id_t tag_id = lxb_dom_node_tag_id(node);
ei_x_encode_tuple_header (response, 3);
if (! (parse_flags & FLAG_HTML_ATOMS) || (tag_id == LXB_TAG__UNDEF || tag_id >= LXB_TAG__LAST_ENTRY))
ei_x_encode_binary (response, tag_string, strlen (tag_string));
else
ei_x_encode_atom (response, tag_string);
prepare_node_attrs (response, node);
}
// prepare an attribute node
static void prepare_node_attrs(ei_x_buff * response, lxb_dom_node_t* node)
{
lxb_dom_attr_t *attr;
for (attr = lxb_dom_element_first_attribute(lxb_dom_interface_element(node)); attr != NULL; attr = lxb_dom_element_next_attribute(attr))
{
size_t attr_name_len;
char *attr_name = (char*) lxb_dom_attr_qualified_name(attr, &attr_name_len);
size_t attr_value_len;
const char *attr_value = (char*) lxb_dom_attr_value(attr, &attr_value_len);
/* guard against poisoned attribute nodes */
if (! attr_name_len)
continue;
ei_x_encode_list_header (response, 1);
ei_x_encode_tuple_header (response, 2);
ei_x_encode_binary (response, attr_name, attr_name_len);
ei_x_encode_binary (response, attr_value, attr_value_len);
}
ei_x_encode_empty_list (response);
}
// dump a comment node
static void prepare_comment (ei_x_buff * response, const char * node_comment, size_t comment_len, parse_flags_t parse_flags)
{
ei_x_encode_tuple_header (response, parse_flags & FLAG_COMMENT_TUPLE3 ? 3 : 2);
ei_x_encode_atom (response, "comment");
if (parse_flags & FLAG_COMMENT_TUPLE3)
ei_x_encode_list_header (response, 0);
ei_x_encode_binary (response, node_comment, comment_len);
}
#ifdef DEBUG_LIST_MANIP
#define EMIT_LIST_HDR \
printf ("list hdr for node %p\n", current_node); \
fflush (stdout); \
ei_x_encode_list_header (response, 1)
#define EMIT_EMPTY_LIST_HDR \
printf ("list empty for node %p\n", current_node); \
fflush (stdout); \
ei_x_encode_list_header (response, 0)
#define EMIT_LIST_TAIL \
printf ("list tail for node %p\n", current_node); \
fflush (stdout); \
ei_x_encode_empty_list (response)
#else
#define EMIT_LIST_HDR ei_x_encode_list_header (response, 1)
#define EMIT_EMPTY_LIST_HDR ei_x_encode_list_header (response, 0)
#define EMIT_LIST_TAIL ei_x_encode_empty_list (response)
#endif
static void build_tree (ei_x_buff * response, lxb_dom_node_t* node, parse_flags_t parse_flags)
{
tstack stack;
tstack_init (&stack, 30);
tstack_push (&stack, node);
lxb_dom_node_t* current_node = node->first_child;
// ok we're going to send an actual response so start encoding it
response->index = 0;
ei_x_encode_version (response);
ei_x_encode_tuple_header(response, 2);
ei_x_encode_atom(response, "ok");
if (current_node == NULL) {
EMIT_EMPTY_LIST_HDR;
EMIT_LIST_TAIL;
}
while (current_node != NULL)
{
if (current_node->type == LXB_DOM_NODE_TYPE_TEXT)
{
size_t text_len;
const char * node_text = (char*) lxb_dom_node_text_content(current_node, &text_len);
EMIT_LIST_HDR;
ei_x_encode_binary (response, node_text, text_len);
}
else if (current_node->type == LXB_DOM_NODE_TYPE_COMMENT)
{
size_t comment_len;
const char* node_comment = (char*) lxb_dom_node_text_content(current_node, &comment_len);
EMIT_LIST_HDR;
prepare_comment(response, node_comment, comment_len, parse_flags);
}
else if(current_node->type == LXB_DOM_NODE_TYPE_ELEMENT)
{
// get name of tag
size_t tag_name_len;
const char *tag_name = (char*) lxb_dom_element_qualified_name(lxb_dom_interface_element(current_node), &tag_name_len);
EMIT_LIST_HDR;
prepare_tag_header (response, tag_name, current_node, parse_flags);
if (current_node->first_child)
{
tstack_push (&stack, current_node);
current_node = current_node->first_child;
continue;
}
else
{
if (parse_flags & FLAG_NIL_SELF_CLOSING && lxb_html_tag_is_void(lxb_dom_node_tag_id(current_node))) {
#ifdef DEBUG_LIST_MANIP
printf ("self-closing tag %s emit nil?\n", tag_string); fflush (stdout);
#endif
ei_x_encode_atom (response, "nil");
}
else
{
EMIT_EMPTY_LIST_HDR;
}
}
}
if (current_node->next)
current_node = current_node->next;
else
{
while (! current_node->next && stack.used != 0)
{
EMIT_LIST_TAIL;
current_node = tstack_pop (&stack);
}
if (current_node->next)
current_node = current_node->next;
}
// are we at root?
if (current_node == node)
break;
}
tstack_free (&stack);
}
static inline char * lowercase(char* c)
{
char * p = c;
while (*p)
{
*p = tolower ((unsigned char) *p);
p++;
}
return c;
}
diff --git a/c_src/tstack.h b/c_src/tstack.h
index ef3f007..1f6223b 100644
--- a/c_src/tstack.h
+++ b/c_src/tstack.h
@@ -1,38 +1,41 @@
+// SPDX-FileCopyrightText: 2019-2022 Pleroma Authors <https://pleroma.social>
+// SPDX-License-Identifier: LGPL-2.1-only
+
#ifndef TSTACK_H
#define TSTACK_H
#define GROW_BY 30
typedef struct {
lxb_dom_node_t **data;
size_t used;
size_t size;
} tstack;
void tstack_init(tstack *stack, size_t initial_size) {
stack->data = (lxb_dom_node_t **) malloc(initial_size * sizeof(lxb_dom_node_t *));
stack->used = 0;
stack->size = initial_size;
}
void tstack_free(tstack *stack) {
free(stack->data);
}
void tstack_resize(tstack *stack, size_t new_size) {
stack->data = (lxb_dom_node_t **) realloc(stack->data, new_size * sizeof(lxb_dom_node_t *));
stack->size = new_size;
}
void tstack_push(tstack *stack, lxb_dom_node_t * element) {
if(stack->used == stack->size) {
tstack_resize(stack, stack->size + GROW_BY);
}
stack->data[stack->used++] = element;
}
lxb_dom_node_t * tstack_pop(tstack *stack) {
return stack->data[--(stack->used)];
}
#endif
diff --git a/config/config.exs b/config/config.exs
index 6c2ac2a..522e3d9 100644
--- a/config/config.exs
+++ b/config/config.exs
@@ -1,30 +1,33 @@
+# SPDX-FileCopyrightText: 2017-2019 myhtmlex authors <https://github.com/Overbryd/myhtmlex>
+# SPDX-License-Identifier: LGPL-2.1-only
+
# This file is responsible for configuring your application
# and its dependencies with the aid of the Mix.Config module.
use Mix.Config
# This configuration is loaded before any dependency and is restricted
# to this project. If another project depends on this project, this
# file won't be loaded nor affect the parent project. For this reason,
# if you want to provide default values for your application for
# 3rd-party users, it should be done in your "mix.exs" file.
# You can configure your application as:
#
# config :myhtmlex, key: :value
#
# and access this configuration in your application as:
#
# Application.get_env(:myhtmlex, :key)
#
# You can also configure a 3rd-party app:
#
# config :logger, level: :info
#
# It is also possible to import configuration files, relative to this
# directory. For example, you can emulate configuration per environment
# by uncommenting the line below and defining dev.exs, test.exs and such.
# Configuration from the imported file will override the ones defined
# here (which is why it is important to import them last).
#
# import_config "#{Mix.env}.exs"
diff --git a/lib/fast_html.ex b/lib/fast_html.ex
index 89f5919..0fc92bc 100644
--- a/lib/fast_html.ex
+++ b/lib/fast_html.ex
@@ -1,147 +1,151 @@
+# SPDX-FileCopyrightText: 2017-2019 myhtmlex authors <https://github.com/Overbryd/myhtmlex>
+# SPDX-FileCopyrightText: 2019-2022 Pleroma Authors <https://pleroma.social>
+# SPDX-License-Identifier: LGPL-2.1-only
+
defmodule :fast_html do
@moduledoc """
A module to decode html into a tree structure.
"""
@type tag() :: String.t() | atom()
@type attr() :: {String.t(), String.t()}
@type attr_list() :: [] | [attr()]
@type comment_node() :: {:comment, String.t()}
@type comment_node3() :: {:comment, [], String.t()}
@type tree() ::
{tag(), attr_list(), tree()}
| {tag(), attr_list(), nil}
| comment_node()
| comment_node3()
@type format_flag() :: :html_atoms | :nil_self_closing | :comment_tuple3
@doc """
Returns a tree representation from the given html string.
`opts` is a keyword list of options, the options available:
* `timeout` - Call timeout. If pooling is used and the worker doesn't return
the result in time, the worker will be killed with a warning.
* `format` - Format flags for the tree.
The following format flags are available:
* `:html_atoms` uses atoms for known html tags (faster), binaries for everything else.
* `:nil_self_closing` uses `nil` to designate void elements.
For example `<br>` is then being represented like `{"br", [], nil}`.
See http://w3c.github.io/html-reference/syntax.html#void-elements for a full list of void elements.
* `:comment_tuple3` uses 3-tuple elements for comments, instead of the default 2-tuple element.
## Examples
iex> :fast_html.decode("<h1>Hello world</h1>")
{:ok, [{"html", [], [{"head", [], []}, {"body", [], [{"h1", [], ["Hello world"]}]}]}]}
iex> :fast_html.decode("Hello world", timeout: 0)
{:error, :timeout}
iex> :fast_html.decode("<span class='hello'>Hi there</span>")
{:ok, [{"html", [],
[{"head", [], []},
{"body", [], [{"span", [{"class", "hello"}], ["Hi there"]}]}]}]}
iex> :fast_html.decode("<body><!-- a comment --!></body>")
{:ok, [{"html", [], [{"head", [], []}, {"body", [], [comment: " a comment "]}]}]}
iex> :fast_html.decode("<br>")
{:ok, [{"html", [], [{"head", [], []}, {"body", [], [{"br", [], []}]}]}]}
iex> :fast_html.decode("<h1>Hello world</h1>", format: [:html_atoms])
{:ok, [{:html, [], [{:head, [], []}, {:body, [], [{:h1, [], ["Hello world"]}]}]}]}
iex> :fast_html.decode("<br>", format: [:nil_self_closing])
{:ok, [{"html", [], [{"head", [], []}, {"body", [], [{"br", [], nil}]}]}]}
iex> :fast_html.decode("<body><!-- a comment --!></body>", format: [:comment_tuple3])
{:ok, [{"html", [], [{"head", [], []}, {"body", [], [{:comment, [], " a comment "}]}]}]}
iex> html = "<body><!-- a comment --!><unknown /></body>"
iex> :fast_html.decode(html, format: [:html_atoms, :nil_self_closing, :comment_tuple3])
{:ok, [{:html, [],
[{:head, [], []},
{:body, [], [{:comment, [], " a comment "}, {"unknown", [], []}]}]}]}
"""
@spec decode(String.t(), format: [format_flag()]) ::
{:ok, tree()} | {:error, String.t() | atom()}
def decode(bin, opts \\ []) do
flags = Keyword.get(opts, :format, [])
timeout = Keyword.get(opts, :timeout, 10000)
find_and_use_port({:decode, bin, flags}, timeout, opts)
end
@doc """
Like `decode/2`, but for parsing [HTML fragments](https://html.spec.whatwg.org/multipage/parsing.html#parsing-html-fragments).
`opts` is a keyword list of options, the options available are the same as in `decode/2` with addition of:
* `context` - Name of the context element, defaults to `div`
Example:
iex> :fast_html.decode_fragment("rin is the <i>best</i> girl")
{:ok, ["rin is the ", {"i", [], ["best"]}, " girl"]}
iex> :fast_html.decode_fragment("rin is the <i>best</i> girl", context: "title")
{:ok, ["rin is the <i>best</i> girl"]}
iex> :fast_html.decode_fragment("rin is the <i>best</i> girl", context: "objective_truth")
{:error, :unknown_context_tag}
iex> :fast_html.decode_fragment("rin is the <i>best</i> girl", format: [:html_atoms])
{:ok, ["rin is the ", {:i, [], ["best"]}, " girl"]}
"""
def decode_fragment(bin, opts \\ []) do
flags = Keyword.get(opts, :format, [])
timeout = Keyword.get(opts, :timeout, 10000)
context = Keyword.get(opts, :context, "div")
find_and_use_port({:decode_fragment, bin, flags, context}, timeout, opts)
end
@default_pool FastHtml.Pool
defp find_and_use_port(term_command, timeout, opts) do
command = :erlang.term_to_binary(term_command)
pool =
cond do
pool = Keyword.get(opts, :pool) -> pool
Application.get_env(:fast_html, :pool, enabled: true)[:enabled] -> @default_pool
true -> nil
end
execute_command_fun = fn port ->
send(port, {self(), {:command, command}})
receive do
{^port, {:data, res}} -> {:ok, res}
after
timeout ->
{:error, :timeout}
end
end
result =
if pool do
FastHtml.Pool.get_port(pool, execute_command_fun)
else
port = open_port()
result = execute_command_fun.(port)
Port.close(port)
result
end
case result do
{:ok, result} -> :erlang.binary_to_term(result)
{:error, _} = e -> e
end
end
def open_port do
Port.open({:spawn_executable, Path.join([:code.priv_dir(:fast_html), "fasthtml_worker"])}, [
:binary,
{:packet, 4},
:use_stdio,
:exit_status
])
end
end
diff --git a/lib/fast_html/application.ex b/lib/fast_html/application.ex
index 68b3c3b..0502701 100644
--- a/lib/fast_html/application.ex
+++ b/lib/fast_html/application.ex
@@ -1,15 +1,18 @@
+# SPDX-FileCopyrightText: 2019-2022 Pleroma Authors <https://pleroma.social>
+# SPDX-License-Identifier: LGPL-2.1-only
+
defmodule FastHtml.Application do
@moduledoc false
use Application
def start(_type, _args) do
default_pool_config = Application.get_env(:fast_html, :pool, enabled: true)
children = if default_pool_config[:enabled], do: [FastHtml.Pool], else: []
Supervisor.start_link(children,
strategy: :one_for_one,
name: FastHtml.Supervisor
)
end
end
diff --git a/lib/fast_html/pool.ex b/lib/fast_html/pool.ex
index 47a29e1..2032f28 100644
--- a/lib/fast_html/pool.ex
+++ b/lib/fast_html/pool.ex
@@ -1,132 +1,135 @@
+# SPDX-FileCopyrightText: 2019-2022 Pleroma Authors <https://pleroma.social>
+# SPDX-License-Identifier: LGPL-2.1-only
+
defmodule FastHtml.Pool do
@behaviour NimblePool
@moduledoc """
"""
require Logger
@doc false
def child_spec(opts) do
%{
id: __MODULE__,
start: {__MODULE__, :start_link, [opts]},
type: :worker,
restart: :permanent
}
end
@doc """
Starts the port pool.
### Options
- `:size` - Number of ports in the pool. Defaults to `System.schedulers_online/0` if not set.
- `:name` - Registered name of the pool. Defaults to `#{__MODULE__}` if not set, set to `false` to not register the process.
"""
@type option :: {:size, pos_integer()} | {:name, atom()}
@spec start_link([option()]) :: term()
def start_link(options) do
{size, options} = Keyword.pop(options, :size, System.schedulers_online())
NimblePool.start_link(worker: {__MODULE__, options}, pool_size: size)
end
@type pool :: atom() | pid()
@type result :: {:ok, term()} | {:error, atom()}
@spec get_port(pool(), (port() -> result())) :: result()
def get_port(pool, fun) do
NimblePool.checkout!(pool, :checkout, fn _from, port ->
result = fun.(port)
client_state =
case result do
{:ok, _} ->
:ok
{:error, reason} ->
reason
end
send(port, {self(), {:connect, GenServer.whereis(pool)}})
client_state =
receive do
{^port, :connected} -> client_state
{:EXIT, ^port, reason} -> {:EXIT, reason}
end
{result, client_state}
end)
end
@impl NimblePool
@doc false
def init_pool(state) do
{name, options} =
case Keyword.pop(state, :name) do
{nil, state} -> {__MODULE__, state}
{name, state} when is_atom(name) -> {name, state}
{_, state} -> {nil, state}
end
if name, do: Process.register(self(), name)
{:ok, options}
end
@impl NimblePool
@doc false
def init_worker(pool_state) do
port = :fast_html.open_port()
{:ok, port, pool_state}
end
@impl NimblePool
@doc false
def terminate_worker({:EXIT, reason}, port, pool_state) do
Logger.warn(fn ->
"[#{__MODULE__}]: Port #{port} unexpectedly exited with reason: #{reason}"
end)
{:ok, pool_state}
end
@impl NimblePool
@doc false
def terminate_worker(_reason, port, pool_state) do
Port.close(port)
{:ok, pool_state}
end
@impl NimblePool
@doc false
def handle_checkout(:checkout, {client_pid, _}, port, pool_state) do
send(port, {self(), {:connect, client_pid}})
receive do
{^port, :connected} -> {:ok, port, port, pool_state}
{:EXIT, ^port, reason} -> {:remove, {:EXIT, reason}}
end
end
@impl NimblePool
@doc false
def handle_checkin(:timeout, _, _, pool_state), do: {:remove, :timeout, pool_state}
@impl NimblePool
@doc false
def handle_checkin(_, _, port, pool_state), do: {:ok, port, pool_state}
@impl NimblePool
@doc false
def handle_info({:EXIT, port, reason}, port), do: {:remove, {:EXIT, reason}}
@impl NimblePool
@doc false
def handle_info({:EXIT, _, _}, port), do: {:ok, port}
# Port sent data to the pool, this happens when the timeout was reached
# and the port got disconnected from the client, but not yet killed by the pool.
# Just discard the message.
@impl NimblePool
@doc false
def handle_info({_sending_port, {:data, _}}, port), do: {:ok, port}
end
diff --git a/lib/mix/tasks/fast_html/bench.ex b/lib/mix/tasks/fast_html/bench.ex
index 32823ee..1198615 100644
--- a/lib/mix/tasks/fast_html/bench.ex
+++ b/lib/mix/tasks/fast_html/bench.ex
@@ -1,31 +1,34 @@
+# SPDX-FileCopyrightText: 2019-2022 Pleroma Authors <https://pleroma.social>
+# SPDX-License-Identifier: LGPL-2.1-only
+
if Mix.env() == :bench do
defmodule Mix.Tasks.FastHtml.Bench do
@moduledoc "Benchmarking task."
use Mix.Task
@input_dir "bench_fixtures"
def run(_) do
Application.ensure_all_started(:fast_html)
inputs =
Enum.reduce(File.ls!(@input_dir), %{}, fn input_name, acc ->
input = File.read!(Path.join(@input_dir, input_name))
Map.put(acc, input_name, input)
end)
Benchee.run(
%{
"fast_html" => fn input -> :fast_html.decode(input) end,
"myhtmlex nif" => fn input -> Myhtmlex.Nif.decode(input) end,
"html5ever nif" => fn input -> Html5ever.parse(input) end,
"mochiweb_html" => fn input -> :mochiweb_html.parse(input) end
},
inputs: inputs,
save: [path: "fast_html.bench"],
load: "fast_html.bench"
)
end
end
end
diff --git a/mix.exs b/mix.exs
index 15e1574..06f1b7d 100644
--- a/mix.exs
+++ b/mix.exs
@@ -1,113 +1,117 @@
+# SPDX-FileCopyrightText: 2017-2019 myhtmlex authors <https://github.com/Overbryd/myhtmlex>
+# SPDX-FileCopyrightText: 2019-2022 Pleroma Authors <https://pleroma.social>
+# SPDX-License-Identifier: LGPL-2.1-only
+
defmodule FastHtml.Mixfile do
use Mix.Project
def project do
[
app: :fast_html,
version: "2.0.5",
elixir: "~> 1.5",
deps: deps(),
package: package(),
compilers: [:elixir_make] ++ Mix.compilers(),
make_env: make_env(),
make_error_message: make_error_message(),
build_embedded: Mix.env() == :prod,
start_permanent: Mix.env() == :prod,
name: "FastHtml",
description: """
A module to decode HTML into a tree,
porting all properties of the underlying
library lexbor, being fast and correct
in regards to the html spec.
""",
docs: docs()
]
end
def package do
[
maintainers: ["Ariadne Conill", "rinpatch"],
- licenses: ["GNU LGPL"],
+ licenses: ["LGPL-2.1-only"],
links: %{
"GitLab" => "https://git.pleroma.social/pleroma/elixir-libraries/fast_html/",
"Issues" => "https://git.pleroma.social/pleroma/elixir-libraries/fast_html/issues",
"lexbor" => "https://github.com/lexbor/lexbor"
},
files: hex_files()
]
end
def application do
[
extra_applications: [:logger],
mod: {FastHtml.Application, []}
]
end
defp deps do
[
# documentation helpers
{:ex_doc, "~> 0.19", only: :dev},
# benchmarking helpers
{:benchee, "~> 1.0", only: :bench, optional: true},
{:dialyxir, "~> 1.0", only: [:dev, :test], runtime: false},
{:myhtmlex, "~> 0.2.0", only: :bench, runtime: false, optional: true},
{:mochiweb, "~> 2.18", only: :bench, optional: true},
{:html5ever,
git: "https://github.com/rusterlium/html5ever_elixir.git", only: :bench, optional: true},
{:nimble_pool, "~> 0.2.0"},
{:elixir_make, "~> 0.4", runtime: false}
]
end
defp docs do
[
main: "readme",
extras: ["README.md", "CHANGELOG.md"]
]
end
defp hex_files do
# This is run every time mix is executed, so it will fail in the hex package,
# therefore check if git is even available
if File.exists?(".git") and System.find_executable("git") do
{files, 0} = System.cmd("git", ["ls-files", "--recurse-submodules"])
files
|> String.split("\n")
# Last element is "", which makes hex include all files in the folder to the project
|> List.delete_at(-1)
|> Enum.reject(fn path ->
Path.dirname(path) == "bench_fixtures" or
(Path.dirname(path) != "priv" and String.starts_with?(Path.basename(path), "."))
end)
else
[]
end
end
defp otp_version do
:erlang.system_info(:otp_release)
|> to_string()
|> String.to_integer()
end
defp otp_22_or_newer? do
otp_version() >= 22
end
defp make_env do
%{
"OTP22_DEF" =>
if otp_22_or_newer?() do
"YES"
else
"NO"
end
}
end
defp make_error_message,
do:
"Please check you have: a C compiler, GNU\Make, CMake and Erlang development headers installed before reporting an issue."
end
diff --git a/mix.lock.license b/mix.lock.license
new file mode 100644
index 0000000..e7038e9
--- /dev/null
+++ b/mix.lock.license
@@ -0,0 +1,4 @@
+SPDX-FileCopyrightText: 2017-2019 myhtmlex authors <https://github.com/Overbryd/myhtmlex>
+SPDX-FileCopyrightText: 2019-2022 Pleroma Authors <https://pleroma.social>
+
+SPDX-License-Identifier: LGPL-2.1-only
diff --git a/test/fast_html_test.exs b/test/fast_html_test.exs
index 63246f1..d243c0a 100644
--- a/test/fast_html_test.exs
+++ b/test/fast_html_test.exs
@@ -1,165 +1,169 @@
+# SPDX-FileCopyrightText: 2017-2019 myhtmlex authors <https://github.com/Overbryd/myhtmlex>
+# SPDX-FileCopyrightText: 2019-2022 Pleroma Authors <https://pleroma.social>
+# SPDX-License-Identifier: LGPL-2.1-only
+
defmodule :fast_html_test do
use ExUnit.Case
doctest :fast_html
test "doesn't segfault when <!----> is encountered" do
assert {:ok, [{"html", _attrs, _children}]} = :fast_html.decode("<div> <!----> </div>")
end
test "builds a tree, formatted like mochiweb by default" do
assert {:ok,
[
{"html", [],
[
{"head", [], []},
{"body", [],
[
{"br", [], []}
]}
]}
]} = :fast_html.decode("<br>")
end
test "builds a tree, html tags as atoms" do
assert {:ok,
[
{:html, [],
[
{:head, [], []},
{:body, [],
[
{:br, [], []}
]}
]}
]} = :fast_html.decode("<br>", format: [:html_atoms])
end
test "builds a tree, nil self closing" do
assert {:ok,
[
{"html", [],
[
{"head", [], []},
{"body", [],
[
{"br", [], nil},
{"esi:include", [], []}
]}
]}
]} = :fast_html.decode("<br><esi:include />", format: [:nil_self_closing])
end
test "builds a tree, multiple format options" do
assert {:ok,
[
{:html, [],
[
{:head, [], []},
{:body, [],
[
{:br, [], nil}
]}
]}
]} = :fast_html.decode("<br>", format: [:html_atoms, :nil_self_closing])
end
test "attributes" do
assert {:ok,
[
{:html, [],
[
{:head, [], []},
{:body, [],
[
{:span, [{"id", "test"}, {"class", "foo garble"}], []}
]}
]}
]} =
:fast_html.decode(~s'<span id="test" class="foo garble"></span>',
format: [:html_atoms]
)
end
test "single attributes" do
assert {:ok,
[
{:html, [],
[
{:head, [], []},
{:body, [],
[
{:button, [{"disabled", ""}, {"class", "foo garble"}], []}
]}
]}
]} =
:fast_html.decode(~s'<button disabled class="foo garble"></span>',
format: [:html_atoms]
)
end
test "text nodes" do
assert {:ok,
[
{:html, [],
[
{:head, [], []},
{:body, [],
[
"text node"
]}
]}
]} = :fast_html.decode(~s'<body>text node</body>', format: [:html_atoms])
end
test "broken input" do
assert {:ok,
[
{:html, [],
[
{:head, [], []},
{:body, [],
[
{:a, [{"<", ""}], [" asdf"]}
]}
]}
]} = :fast_html.decode(~s'<a <> asdf', format: [:html_atoms])
end
test "custom namespaced tags" do
assert {:ok,
[
{:html, [],
[
{:head, [], []},
{:body, [],
[
{"esi:include", [], []}
]}
]}
]} = :fast_html.decode(~s'<esi:include />', format: [:html_atoms, :nil_self_closing])
end
test "html comments" do
assert {:ok,
[
{:html, [],
[
{:head, [], []},
{:body, [],
[
comment: " a comment "
]}
]}
]} = :fast_html.decode(~s'<body><!-- a comment --></body>', format: [:html_atoms])
end
test "doesn't go into an infinite loop when there are more than one root tags" do
assert {:ok, [{:comment, " a comment "}, {"html", [], [{"head", [], []}, {"body", [], []}]}]} ==
:fast_html.decode("<!-- a comment --> <html> </html>")
end
test "doesn't return attribute name in attribute value when the latter is empty" do
assert :fast_html.decode_fragment("<meta content=\"\"/>") ==
{:ok, [{"meta", [{"content", ""}], []}]}
end
end
diff --git a/test/test_helper.exs b/test/test_helper.exs
index 869559e..021039e 100644
--- a/test/test_helper.exs
+++ b/test/test_helper.exs
@@ -1 +1,5 @@
+# SPDX-FileCopyrightText: 2017-2019 myhtmlex authors <https://github.com/Overbryd/myhtmlex>
+# SPDX-FileCopyrightText: 2019-2022 Pleroma Authors <https://pleroma.social>
+# SPDX-License-Identifier: LGPL-2.1-only
+
ExUnit.start()

File Metadata

Mime Type
text/x-diff
Expires
Sun, Nov 24, 7:29 PM (1 d, 4 h)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
39476
Default Alt Text
(206 KB)

Event Timeline