Page Menu
Home
Phorge
Search
Configure Global Search
Log In
Files
F113321
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Award Token
Flag For Later
Size
14 KB
Referenced Files
None
Subscribers
None
View Options
diff --git a/c_src/myhtml_worker.c b/c_src/myhtml_worker.c
index 4b4972e..ff447fc 100644
--- a/c_src/myhtml_worker.c
+++ b/c_src/myhtml_worker.c
@@ -1,444 +1,449 @@
#include <stdlib.h>
#include <stdbool.h>
#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <errno.h>
#include <ctype.h>
#include "erl_interface.h"
#include "ei.h"
#include <myhtml/myhtml.h>
#include <myhtml/mynamespace.h>
#define BUFFER_SIZE 1000
typedef struct _state_t {
int fd;
myhtml_tree_t* tree;
} state_t;
typedef struct _prefab_t {
ETERM* atom_nil;
ETERM* atom_comment;
ETERM* empty_list;
} prefab_t;
void
handle_emsg(state_t* state, ErlMessage* emsg);
void
handle_send(state_t* state, ErlMessage* emsg);
ETERM*
decode(state_t* state, ErlMessage* emsg, ETERM* bin, ETERM* args);
ETERM*
build_tree(prefab_t* prefab, myhtml_tree_t* tree, myhtml_tree_node_t* node, unsigned char* parse_flags);
ETERM*
build_node_children(prefab_t* prefab, myhtml_tree_t* tree, myhtml_tree_node_t* parent, unsigned char* parse_flags);
ETERM*
build_node_attrs(prefab_t* prefab, myhtml_tree_t* tree, myhtml_tree_node_t* node);
ETERM*
err_term(const char* error_atom);
unsigned char
read_parse_flags(ETERM* list);
char*
lowercase(char* c);
const unsigned char FLAG_HTML_ATOMS = 1 << 0;
const unsigned char FLAG_NIL_SELF_CLOSING = 1 << 1;
const unsigned char FLAG_COMMENT_TUPLE3 = 1 << 2;
int main(int argc, char **argv) {
if (argc != 5 || !strcmp(argv[1],"-h") || !strcmp(argv[1],"--help")) {
printf("\nUsage: ./priv/cnode_server <sname> <hostname> <cookie> <tname>\n\n");
printf(" sname the short name you want this c-node to connect as\n");
printf(" hostname the hostname\n");
printf(" cookie the authentication cookie\n");
printf(" tname the target node short name to connect to");
return 0;
}
char *sname = argv[1];
char *hostname = argv[2];
char *cookie = argv[3];
char *tname = argv[4];
char full_name[1024];
stpcpy(stpcpy(stpcpy(full_name, sname), "@"), hostname);
char target_node[1024];
stpcpy(stpcpy(stpcpy(target_node, tname), "@"), hostname);
struct in_addr addr;
addr.s_addr = htonl(INADDR_ANY);
// fd to erlang node
state_t* state = (state_t*)malloc(sizeof(state_t));
bool looping = true;
int buffer_size = BUFFER_SIZE;
unsigned char* bufferpp = (unsigned char*)malloc(BUFFER_SIZE);
ErlMessage emsg;
// initialize all of Erl_Interface
erl_init(NULL, 0);
// initialize this node
printf("initialising %s\n", full_name); fflush(stdout);
if ( erl_connect_xinit(hostname, sname, full_name, &addr, cookie, 0) == -1 )
erl_err_quit("error erl_connect_init");
// connect to target node
printf("connecting to %s\n", target_node); fflush(stdout);
if ((state->fd = erl_connect(target_node)) < 0)
erl_err_quit("erl_connect");
myhtml_t* myhtml = myhtml_create();
myhtml_init(myhtml, MyHTML_OPTIONS_DEFAULT, 1, 0);
state->tree = myhtml_tree_create();
myhtml_tree_init(state->tree, myhtml);
// signal to stdout that we are ready
printf("%s ready\n", full_name); fflush(stdout);
while (looping)
{
// erl_xreceive_msg adapts the buffer width
switch( erl_xreceive_msg(state->fd, &bufferpp, &buffer_size, &emsg) )
// erl_receive_msg, uses a fixed buffer width
/* switch( erl_receive_msg(state->fd, buffer, BUFFER_SIZE, &emsg) ) */
{
case ERL_TICK:
// ignore
break;
case ERL_ERROR:
// On failure, the function returns ERL_ERROR and sets erl_errno to one of:
//
// EMSGSIZE
// Buffer is too small.
// ENOMEM
// No more memory is available.
// EIO
// I/O error.
//
// TODO: what is the correct reaction?
looping = false;
break;
default:
handle_emsg(state, &emsg);
}
}
}
void
handle_emsg(state_t* state, ErlMessage* emsg)
{
switch(emsg->type)
{
case ERL_REG_SEND:
case ERL_SEND:
handle_send(state, emsg);
break;
case ERL_LINK:
case ERL_UNLINK:
break;
case ERL_EXIT:
break;
}
// its our responsibility to free these pointers
erl_free_compound(emsg->msg);
erl_free_compound(emsg->to);
erl_free_compound(emsg->from);
}
void
handle_send(state_t* state, ErlMessage* emsg)
{
ETERM *decode_pattern = erl_format("{decode, Bin, Args}");
ETERM *response;
if (erl_match(decode_pattern, emsg->msg))
{
ETERM *bin = erl_var_content(decode_pattern, "Bin");
ETERM *args = erl_var_content(decode_pattern, "Args");
response = decode(state, emsg, bin, args);
// free allocated resources
erl_free_term(bin);
erl_free_term(args);
}
else
{
response = err_term("unknown_call");
return;
}
// send response
erl_send(state->fd, emsg->from, response);
// free allocated resources
erl_free_compound(response);
erl_free_term(decode_pattern);
// free the free-list
erl_eterm_release();
return;
}
ETERM*
err_term(const char* error_atom)
{
/* ETERM* tuple2[] = {erl_mk_atom("error"), erl_mk_atom(error_atom)}; */
/* return erl_mk_tuple(tuple2, 2); */
return erl_format("{error, ~w}", erl_mk_atom(error_atom));
}
ETERM*
decode(state_t* state, ErlMessage* emsg, ETERM* bin, ETERM* args)
{
unsigned char parse_flags = 0;
prefab_t prefab;
// prepare reusable prefab terms
prefab.atom_nil = erl_mk_atom("nil");
prefab.atom_comment = erl_mk_atom("comment");
prefab.empty_list = erl_mk_empty_list();
if (!ERL_IS_BINARY(bin) || !ERL_IS_LIST(args))
{
return err_term("badarg");
}
// get contents of binary argument
char* binary = (char*)ERL_BIN_PTR(bin);
size_t binary_len = ERL_BIN_SIZE(bin);
// parse tree
mystatus_t status = myhtml_parse(state->tree, MyENCODING_UTF_8, binary, binary_len);
if (status != MyHTML_STATUS_OK)
{
return err_term("myhtml_parse_failed");
}
// read parse flags
parse_flags = read_parse_flags(args);
// build tree
myhtml_tree_node_t *root = myhtml_tree_get_document(state->tree);
return build_tree(&prefab, state->tree, myhtml_node_last_child(root), &parse_flags);
}
unsigned char
read_parse_flags(ETERM* list)
{
unsigned char parse_flags = 0;
ETERM *flag;
for (; !ERL_IS_EMPTY_LIST(list); list = ERL_CONS_TAIL(list)) {
flag = ERL_CONS_HEAD(list);
if (erl_match(erl_format("html_atoms"), flag))
{
parse_flags |= FLAG_HTML_ATOMS;
}
else if (erl_match(erl_format("nil_self_closing"), flag))
{
parse_flags |= FLAG_NIL_SELF_CLOSING;
}
else if (erl_match(erl_format("comment_tuple3"), flag))
{
parse_flags |= FLAG_COMMENT_TUPLE3;
}
}
return parse_flags;
}
ETERM*
build_tree(prefab_t* prefab, myhtml_tree_t* tree, myhtml_tree_node_t* node, unsigned char* parse_flags)
{
ETERM* result;
myhtml_tag_id_t tag_id = myhtml_node_tag_id(node);
myhtml_namespace_t tag_ns = myhtml_node_namespace(node);
if (tag_id == MyHTML_TAG__TEXT)
{
size_t text_len;
const char* node_text = myhtml_node_text(node, &text_len);
result = erl_mk_binary(node_text, text_len);
}
else if (tag_id == MyHTML_TAG__COMMENT)
{
size_t comment_len;
const char* node_comment = myhtml_node_text(node, &comment_len);
ETERM* comment = erl_mk_binary(node_comment, comment_len);
if (*parse_flags & FLAG_COMMENT_TUPLE3)
{
/* ETERM* tuple3[] = {prefab->atom_comment, prefab->empty_list, comment}; */
/* result = erl_mk_tuple(tuple3, 3); */
result = erl_format("{comment, [], ~w}", comment);
}
else
{
/* ETERM* tuple2[] = {prefab->atom_comment, comment}; */
/* result = erl_mk_tuple(tuple2, 2); */
result = erl_format("{comment, ~w}", comment);
}
}
else
{
ETERM* tag;
ETERM* attrs;
ETERM* children;
// get name of tag
size_t tag_name_len;
const char *tag_name = myhtml_tag_name_by_id(tree, tag_id, &tag_name_len);
// get namespace of tag
size_t tag_ns_len;
const char *tag_ns_name_ptr = myhtml_namespace_name_by_id(tag_ns, &tag_ns_len);
char *tag_ns_buffer;
char buffer [tag_ns_len + tag_name_len + 1];
char *tag_string = buffer;
size_t tag_string_len;
if (tag_ns != MyHTML_NAMESPACE_HTML)
{
// tag_ns_name_ptr is unmodifyable, copy it in our tag_ns_buffer to make it modifyable.
tag_ns_buffer = malloc(tag_ns_len);
strcpy(tag_ns_buffer, tag_ns_name_ptr);
// lowercase tag buffer (can be removed, just a nice to have)
tag_ns_buffer = lowercase(tag_ns_buffer);
// prepend namespace to tag name, e.g. "svg:path"
stpcpy(stpcpy(stpcpy(tag_string, tag_ns_buffer), ":"), tag_name);
tag_string_len = tag_ns_len + tag_name_len + 1; // +1 for colon
}
else
{
stpcpy(tag_string, tag_name);
tag_string_len = tag_name_len;
}
// attributes
attrs = build_node_attrs(prefab, tree, node);
// children
children = build_node_children(prefab, tree, node, parse_flags);
if (!(*parse_flags & FLAG_HTML_ATOMS) || (tag_id == MyHTML_TAG__UNDEF || tag_id == MyHTML_TAG_LAST_ENTRY || tag_ns != MyHTML_NAMESPACE_HTML))
{
tag = erl_mk_binary(tag_string, tag_string_len);
/* ETERM* tuple3[] = {tag, attrs, children}; */
/* result = erl_mk_tuple(tuple3, 3); */
result = erl_format("{~w, ~w, ~w}", tag, attrs, children);
}
else
{
// tag = erl_mk_atom(tag_string);
tag = erl_mk_atom(tag_string);
/* ETERM* tuple3[] = {tag, attrs, children}; */
/* result = erl_mk_tuple(tuple3, 3); */
result = erl_format("{~w, ~w, ~w}", tag, attrs, children);
}
+ // free allocated resources
+ if (tag_ns != MyHTML_NAMESPACE_HTML)
+ {
+ free(tag_ns_buffer);
+ }
}
return result;
}
ETERM*
build_node_children(prefab_t* prefab, myhtml_tree_t* tree, myhtml_tree_node_t* parent, unsigned char* parse_flags)
{
if (myhtml_node_is_close_self(parent) && (*parse_flags & FLAG_NIL_SELF_CLOSING))
{
/* prefab->atom_nil; */
return erl_mk_atom("nil");
}
myhtml_tree_node_t* child = myhtml_node_last_child(parent);
if (child == NULL)
{
if (myhtml_node_is_void_element(parent) && (*parse_flags & FLAG_NIL_SELF_CLOSING))
{
/* return prefab->atom_nil; */
return erl_mk_atom("nil");
}
/* else */
/* { */
/* return prefab->empty_list; */
/* } */
}
ETERM* list = erl_mk_empty_list();
while (child)
{
ETERM* node_tuple = build_tree(prefab, tree, child, parse_flags);
list = erl_cons(node_tuple, list);
// get previous child, building the list from reverse
child = myhtml_node_prev(child);
}
return list;
}
ETERM*
build_node_attrs(prefab_t* prefab, myhtml_tree_t* tree, myhtml_tree_node_t* node)
{
myhtml_tree_attr_t* attr = myhtml_node_attribute_last(node);
/* if (attr == NULL) */
/* { */
/* return prefab->empty_list; */
/* } */
ETERM* list = erl_mk_empty_list();
while (attr)
{
ETERM* name;
ETERM* value;
ETERM* attr_tuple;
size_t attr_name_len;
const char *attr_name = myhtml_attribute_key(attr, &attr_name_len);
size_t attr_value_len;
const char *attr_value = myhtml_attribute_value(attr, &attr_value_len);
if (attr_value) {
value = erl_mk_binary(attr_value, attr_value_len);
} else {
value = erl_mk_binary(attr_name, attr_name_len);
}
name = erl_mk_binary(attr_name, attr_name_len);
/* ETERM* tuple2[] = {name, value}; */
/* attr_tuple = erl_mk_tuple(tuple2, 2); */
attr_tuple = erl_format("{~w, ~w}", name, value);
list = erl_cons(attr_tuple, list);
// get prev attribute, building the list from reverse
attr = myhtml_attribute_prev(attr);
}
return list;
}
char*
lowercase(char* c)
{
char* p = c;
while(*p)
{
*p = tolower((unsigned char)*p);
p++;
}
return c;
}
diff --git a/mix.exs b/mix.exs
index 405bd57..8b620b6 100644
--- a/mix.exs
+++ b/mix.exs
@@ -1,106 +1,106 @@
defmodule Myhtmlex.Mixfile do
use Mix.Project
def project do
[
app: :myhtmlex,
- version: "0.2.0",
+ version: "0.2.1",
elixir: "~> 1.5",
deps: deps(),
package: package(),
compilers: [:myhtmlex_make] ++ Mix.compilers,
build_embedded: Mix.env == :prod,
start_permanent: Mix.env == :prod,
name: "Myhtmlex",
description: """
A module to decode HTML into a tree,
porting all properties of the underlying
library myhtml, being fast and correct
in regards to the html spec.
""",
docs: docs()
]
end
def package do
[
maintainers: ["Lukas Rieder"],
licenses: ["GNU LGPL"],
links: %{
"Github" => "https://github.com/Overbryd/myhtmlex",
"Issues" => "https://github.com/Overbryd/myhtmlex/issues",
"MyHTML" => "https://github.com/lexborisov/myhtml"
},
files: [
"lib",
"c_src",
"priv/.gitignore",
"test",
"Makefile",
"Makefile.Darwin",
"Makefile.Linux",
"mix.exs",
"README.md",
"LICENSE"
]
]
end
def application do
[
extra_applications: [:logger],
mod: {Myhtmlex.Safe, []},
# used to detect conflicts with other applications named processes
registered: [Myhtmlex.Safe.Cnode, Myhtmlex.Safe.Supervisor],
env: [
mode: Myhtmlex.Safe
]
]
end
defp deps do
[
# documentation helpers
{:ex_doc, ">= 0.0.0", only: :dev},
# benchmarking helpers
{:benchfella, "~> 0.3.0", only: :dev},
# cnode helpers
{:nodex, "~> 0.1.1"}
]
end
defp docs do
[
main: "Myhtmlex"
]
end
end
defmodule Mix.Tasks.Compile.MyhtmlexMake do
@artifacts [
"priv/myhtmlex.so",
"priv/myhtml_worker"
]
def run(_) do
if match? {:win32, _}, :os.type do
IO.warn "Windows is not yet a target."
exit(1)
else
{result, _error_code} = System.cmd("make",
@artifacts,
stderr_to_stdout: true,
env: [{"MIX_ENV", to_string(Mix.env)}]
)
IO.binwrite result
end
:ok
end
def clean() do
{result, _error_code} = System.cmd("make", ["clean"], stderr_to_stdout: true)
Mix.shell.info result
:ok
end
end
File Metadata
Details
Attached
Mime Type
text/x-diff
Expires
Mon, Nov 25, 2:43 AM (1 d, 10 h)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
39552
Default Alt Text
(14 KB)
Attached To
Mode
R16 fast_html
Attached
Detach File
Event Timeline
Log In to Comment