Page Menu
Home
Phorge
Search
Configure Global Search
Log In
Files
F114740
kazv-markdown.cpp
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Award Token
Flag For Later
Size
5 KB
Referenced Files
None
Subscribers
None
kazv-markdown.cpp
View Options
/*
* This file is part of kazv.
* SPDX-FileCopyrightText: 2024 tusooa <tusooa@kazv.moe>
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
#include
<kazv-defs.hpp>
#include
<unordered_set>
#include
<boost/algorithm/string.hpp>
#include
<boost/regex.hpp>
#include
<cmark.h>
#include
<QDebug>
#include
"kazv-markdown.hpp"
static
const
std
::
string
serverNameRegexStr
=
R
"
aaaa(
[A-Za-z0-9.-]{1,255}(:[0-9]{1,5})?
)aaaa
"
;
static
const
auto
userIdRegex
=
boost
::
regex
(
// The char before must not be a non-space
// This means either it is the beginning of the string or a space char
R
"
aaaa(
(?<!\S)@[a-z0-9._=/+-]+:
)aaaa
"
+
serverNameRegexStr
// The char after must not be a non-space
// This means either it is the end of the string or a space char
+
R
"
(
(?!\S)
)
"
);
static
std
::
string
makeUserLink
(
const
std
::
string
&
userId
)
{
return
"https://matrix.to/#/"
+
userId
;
}
struct
MarkdownParser
{
MarkdownParser
(
std
::
string
markdown
)
:
m_doc
(
cmark_parse_document
(
markdown
.
data
(),
markdown
.
size
(),
CMARK_OPT_UNSAFE
))
{
linkify
();
m_html
=
cmark_render_html
(
m_doc
,
CMARK_OPT_UNSAFE
);
}
~
MarkdownParser
()
{
cmark_node_free
(
m_doc
);
free
(
m_html
);
}
void
linkify
()
{
cmark_iter
*
iter
=
cmark_iter_new
(
m_doc
);
for
(
cmark_event_type
evType
=
cmark_iter_next
(
iter
);
evType
!=
CMARK_EVENT_DONE
;
evType
=
cmark_iter_next
(
iter
)
)
{
cmark_node
*
cur
=
cmark_iter_get_node
(
iter
);
maybeLinkifyNode
(
cur
,
evType
);
}
cmark_iter_free
(
iter
);
}
bool
isInsideLink
(
cmark_node
*
cur
)
{
if
(
!
cur
)
{
return
false
;
}
if
(
cmark_node_get_type
(
cur
)
==
CMARK_NODE_LINK
)
{
return
true
;
}
return
isInsideLink
(
cmark_node_parent
(
cur
));
}
void
maybeLinkifyNode
(
cmark_node
*
cur
,
cmark_event_type
evType
)
{
auto
nodeType
=
cmark_node_get_type
(
cur
);
if
(
!
((
nodeType
==
CMARK_NODE_HTML_BLOCK
||
nodeType
==
CMARK_NODE_TEXT
)
&&
evType
==
CMARK_EVENT_ENTER
)
||
isInsideLink
(
cur
))
{
return
;
}
auto
textContent
=
cmark_node_get_literal
(
cur
);
if
(
!
textContent
)
{
return
;
}
auto
contentStr
=
std
::
string
(
textContent
);
if
(
nodeType
==
CMARK_NODE_HTML_BLOCK
)
{
auto
replacedStr
=
boost
::
regex_replace
(
contentStr
,
userIdRegex
,
[
this
](
const
boost
::
smatch
&
match
)
{
auto
matched
=
match
.
str
();
m_mentions
.
insert
(
matched
);
return
"<a href=
\"
"
+
makeUserLink
(
matched
)
+
"
\"
>"
+
matched
+
"</a>"
;
}
);
if
(
contentStr
!=
replacedStr
)
{
cmark_node_set_literal
(
cur
,
replacedStr
.
data
());
}
}
else
{
boost
::
smatch
match
;
auto
firstNode
=
cmark_node_new
(
CMARK_NODE_TEXT
);
cmark_node_set_literal
(
firstNode
,
""
);
cmark_node_replace
(
cur
,
firstNode
);
cmark_node_free
(
cur
);
auto
lastNode
=
firstNode
;
auto
appendTextNode
=
[
&
lastNode
](
const
std
::
string
&
str
)
{
auto
textNode
=
cmark_node_new
(
CMARK_NODE_TEXT
);
cmark_node_set_literal
(
textNode
,
str
.
data
());
cmark_node_insert_after
(
lastNode
,
textNode
);
lastNode
=
textNode
;
};
auto
appendLinkNode
=
[
&
lastNode
](
const
std
::
string
&
str
,
const
std
::
string
&
linkTarget
)
{
auto
linkNode
=
cmark_node_new
(
CMARK_NODE_LINK
);
cmark_node_set_url
(
linkNode
,
linkTarget
.
data
());
cmark_node_insert_after
(
lastNode
,
linkNode
);
lastNode
=
linkNode
;
auto
textNode
=
cmark_node_new
(
CMARK_NODE_TEXT
);
cmark_node_set_literal
(
textNode
,
str
.
data
());
cmark_node_append_child
(
linkNode
,
textNode
);
};
while
(
boost
::
regex_search
(
contentStr
,
match
,
userIdRegex
))
{
if
(
match
.
position
())
{
auto
preMatchStr
=
match
.
prefix
().
str
();
appendTextNode
(
preMatchStr
);
}
auto
matched
=
match
.
str
();
m_mentions
.
insert
(
matched
);
appendLinkNode
(
matched
,
makeUserLink
(
matched
));
contentStr
=
match
.
suffix
().
str
();
};
if
(
!
contentStr
.
empty
())
{
appendTextNode
(
contentStr
);
}
}
}
std
::
string
getHtml
()
const
{
auto
htmlStr
=
std
::
string
(
m_html
);
boost
::
algorithm
::
trim
(
htmlStr
);
return
htmlStr
;
}
immer
::
flex_vector
<
std
::
string
>
getMentions
()
const
{
return
immer
::
flex_vector
<
std
::
string
>
(
m_mentions
.
begin
(),
m_mentions
.
end
()
);
}
cmark_node
*
m_doc
;
char
*
m_html
;
std
::
unordered_set
<
std
::
string
>
m_mentions
;
};
KazvRichTextParseResult
markdownToHtml
(
const
std
::
string
&
markdown
)
{
auto
parser
=
MarkdownParser
(
markdown
);
return
{
parser
.
getHtml
(),
parser
.
getMentions
()
};
}
File Metadata
Details
Attached
Mime Type
text/x-c
Expires
Tue, Nov 26, 2:08 PM (1 d, 10 h)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
40365
Default Alt Text
kazv-markdown.cpp (5 KB)
Attached To
Mode
rK kazv
Attached
Detach File
Event Timeline
Log In to Comment