Page Menu
Home
Phorge
Search
Configure Global Search
Log In
Files
F45707754
html_tree_converter.service.js
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Award Token
Flag For Later
Size
3 KB
Referenced Files
None
Subscribers
None
html_tree_converter.service.js
View Options
import
{
getTagName
}
from
'./utility.service.js'
import
{
unescape
}
from
'lodash'
/**
* This is a not-so-tiny purpose-built HTML parser/processor. This parses html
* and converts it into a tree structure representing tag openers/closers and
* children.
*
* Structure follows this pattern: [opener, [...children], closer] except root
* node which is just [...children]. Text nodes can only be within children and
* are represented as strings.
*
* Intended use is to convert HTML structure and then recursively iterate over it
* most likely using a map. Very useful for dynamically rendering html replacing
* tags with JSX elements in a render function.
*
* known issue: doesn't handle CDATA so CDATA might not work well
* known issue: doesn't handle HTML comments
*
* @param {Object} input - input data
* @return {string} processed html
*/
export
const
convertHtmlToTree
=
(
html
=
''
)
=>
{
// Elements that are implicitly self-closing
// https://developer.mozilla.org/en-US/docs/Glossary/empty_element
const
emptyElements
=
new
Set
([
'area'
,
'base'
,
'br'
,
'col'
,
'embed'
,
'hr'
,
'img'
,
'input'
,
'keygen'
,
'link'
,
'meta'
,
'param'
,
'source'
,
'track'
,
'wbr'
])
// TODO For future - also parse HTML5 multi-source components?
const
buffer
=
[]
// Current output buffer
const
levels
=
[[
''
,
buffer
]]
// How deep we are in tags and which tags were there
let
textBuffer
=
''
// Current line content
let
tagBuffer
=
null
// Current tag buffer, if null = we are not currently reading a tag
const
getCurrentBuffer
=
()
=>
{
return
levels
[
levels
.
length
-
1
][
1
]
}
const
flushText
=
()
=>
{
// Processes current line buffer, adds it to output buffer and clears line buffer
if
(
textBuffer
===
''
)
return
getCurrentBuffer
().
push
(
textBuffer
)
textBuffer
=
''
}
const
handleSelfClosing
=
(
tag
)
=>
{
getCurrentBuffer
().
push
([
tag
])
}
const
handleOpen
=
(
tag
)
=>
{
const
curBuf
=
getCurrentBuffer
()
const
newLevel
=
[
unescape
(
tag
),
[]]
levels
.
push
(
newLevel
)
curBuf
.
push
(
newLevel
)
}
const
handleClose
=
(
tag
)
=>
{
const
currentTag
=
levels
[
levels
.
length
-
1
]
if
(
getTagName
(
levels
[
levels
.
length
-
1
][
0
])
===
getTagName
(
tag
))
{
currentTag
.
push
(
tag
)
levels
.
pop
()
}
else
{
getCurrentBuffer
().
push
(
tag
)
}
}
for
(
let
i
=
0
;
i
<
html
.
length
;
i
++
)
{
const
char
=
html
[
i
]
if
(
char
===
'<'
&&
tagBuffer
===
null
)
{
flushText
()
tagBuffer
=
char
}
else
if
(
char
!==
'>'
&&
tagBuffer
!==
null
)
{
tagBuffer
+=
char
}
else
if
(
char
===
'>'
&&
tagBuffer
!==
null
)
{
tagBuffer
+=
char
const
tagFull
=
tagBuffer
tagBuffer
=
null
const
tagName
=
getTagName
(
tagFull
)
if
(
tagFull
[
1
]
===
'/'
)
{
handleClose
(
tagFull
)
}
else
if
(
emptyElements
.
has
(
tagName
)
||
tagFull
[
tagFull
.
length
-
2
]
===
'/'
)
{
// self-closing
handleSelfClosing
(
tagFull
)
}
else
{
handleOpen
(
tagFull
)
}
}
else
{
textBuffer
+=
char
}
}
if
(
tagBuffer
)
{
textBuffer
+=
tagBuffer
}
flushText
()
return
buffer
}
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Sat, Mar 7, 9:49 AM (1 d, 3 h)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
1072751
Default Alt Text
html_tree_converter.service.js (3 KB)
Attached To
Mode
rPUFE pleroma-fe-upstream
Attached
Detach File
Event Timeline
Log In to Comment