Page MenuHomePhorge

No OneTemporary

Size
21 KB
Referenced Files
None
Subscribers
None
diff --git a/lib/gen_magic/server.ex b/lib/gen_magic/server.ex
index 5eaed86..09cdd41 100644
--- a/lib/gen_magic/server.ex
+++ b/lib/gen_magic/server.ex
@@ -1,301 +1,302 @@
defmodule GenMagic.Server do
@moduledoc """
Provides access to the underlying libmagic client, which performs file introspection.
The Server needs to be supervised, since it will terminate if it receives any unexpected error.
"""
@behaviour :gen_statem
alias GenMagic.Result
alias GenMagic.Server.Data
alias GenMagic.Server.Status
import Kernel, except: [send: 2]
@typedoc """
Represents the reference to the underlying server, as returned by `:gen_statem`.
"""
@type t :: :gen_statem.server_ref()
@typedoc """
Represents values accepted as startup options, which can be passed to `start_link/1`.
- `:name`: If present, this will be the registered name for the underlying process.
Note that `:gen_statem` requires `{:local, name}`, but given widespread GenServer convention,
atoms are accepted and will be converted to `{:local, name}`.
- `:startup_timeout`: Specifies how long the Server waits for the C program to initialise.
However, if the underlying C program exits, then the process exits immediately.
Can be set to `:infinity`.
- `:process_timeout`: Specifies how long the Server waits for each request to complete.
Can be set to `:infinity`.
Please note that, if you have chosen a custom timeout value, you should also pass it when
using `GenMagic.Server.perform/3`.
- `:recycle_threshold`: Specifies the number of requests processed before the underlying C
program is recycled.
Can be set to `:infinity` if you do not wish for the program to be recycled.
- `:database_patterns`: Specifies what magic databases to load; you can specify a list of either
Path Patterns (see `Path.wildcard/2`) or `:default` to instruct the C program to load the
appropriate databases.
For example, if you have had to add custom magics, then you can set this value to:
[:default, "path/to/my/magic"]
"""
@type option ::
{:name, atom() | :gen_statem.server_name()}
| {:startup_timeout, timeout()}
| {:process_timeout, timeout()}
| {:recycle_threshold, non_neg_integer() | :infinity}
| {:database_patterns, nonempty_list(:default | Path.t())}
@typedoc """
Current state of the Server:
- `:pending`: This is the initial state; the Server will attempt to start the underlying Port
and the libmagic client, then automatically transition to either Available or Crashed.
- `:available`: This is the default state. In this state the Server is able to accept requests
and they will be replied in the same order.
- `:processing`: This is the state the Server will be in if it is processing requests. In this
state, further requests can still be lodged and they will be processed when the Server is
available again.
For proper concurrency, use a process pool like Poolboy, Sbroker, etc.
- `:recycling`: This is the state the Server will be in, if its underlying C program needs to be
recycled. This state is triggered whenever the cycle count reaches the defined value as per
`:recycle_threshold`.
In this state, the Server is able to accept requests, but they will not be processed until the
underlying C server program has been started again.
"""
@type state :: :starting | :processing | :available | :recycling
@spec child_spec([option()]) :: Supervisor.child_spec()
@spec start_link([option()]) :: :gen_statem.start_ret()
@spec perform(t(), Path.t(), timeout()) :: {:ok, Result.t()} | {:error, term() | String.t()}
@spec status(t(), timeout()) :: {:ok, Status.t()} | {:error, term()}
@spec stop(t(), term(), timeout()) :: :ok
@doc """
Returns the default Child Specification for this Server for use in Supervisors.
You can override this with `Supervisor.child_spec/2` as required.
"""
def child_spec(options) do
%{
id: __MODULE__,
start: {__MODULE__, :start_link, [options]},
type: :worker,
restart: :permanent,
shutdown: 500
}
end
@doc """
Starts a new Server.
See `t:option/0` for further details.
"""
def start_link(options) do
{name, options} = Keyword.pop(options, :name)
case name do
nil -> :gen_statem.start_link(__MODULE__, options, [])
name when is_atom(name) -> :gen_statem.start_link({:local, name}, __MODULE__, options, [])
{:global, _} -> :gen_statem.start_link(name, __MODULE__, options, [])
{:via, _, _} -> :gen_statem.start_link(name, __MODULE__, options, [])
{:local, _} -> :gen_statem.start_link(name, __MODULE__, options, [])
end
end
@doc """
Determines the type of the file provided.
"""
def perform(server_ref, path, timeout \\ 5000) do
case :gen_statem.call(server_ref, {:perform, path}, timeout) do
{:ok, %Result{} = result} -> {:ok, result}
{:error, reason} -> {:error, reason}
end
end
@doc """
Returns status of the Server.
"""
def status(server_ref, timeout \\ 5000) do
:gen_statem.call(server_ref, :status, timeout)
end
@doc """
Stops the Server with reason `:normal` and timeout `:infinity`.
"""
def stop(server_ref) do
:gen_statem.stop(server_ref)
end
@doc """
Stops the Server with the specified reason and timeout.
"""
def stop(server_ref, reason, timeout) do
:gen_statem.stop(server_ref, reason, timeout)
end
@impl :gen_statem
def init(options) do
import GenMagic.Config
data = %Data{
port_name: get_port_name(),
port_options: get_port_options(options),
startup_timeout: get_startup_timeout(options),
process_timeout: get_process_timeout(options),
recycle_threshold: get_recycle_threshold(options)
}
{:ok, :starting, data}
end
@impl :gen_statem
def callback_mode do
[:state_functions, :state_enter]
end
@doc false
def starting(:enter, _, %{request: nil, port: nil} = data) do
port = Port.open(data.port_name, data.port_options)
{:keep_state, %{data | port: port}, data.startup_timeout}
end
@doc false
def starting({:call, from}, :status, data) do
handle_status_call(from, :starting, data)
end
@doc false
def starting({:call, _from}, {:perform, _path}, _data) do
{:keep_state_and_data, :postpone}
end
@doc false
def starting(:info, {port, {:data, binary}}, %{port: port} = data) do
case :erlang.binary_to_term(binary) do
:ready ->
{:next_state, :available, data}
end
end
def starting(:info, {port, {:exit_status, code}}, %{port: port} = data) do
error =
case code do
1 -> :no_database
2 -> :no_argument
3 -> :missing_database
end
{:stop, {:error, error}, data}
end
@doc false
def available(:enter, _old_state, %{request: nil}) do
:keep_state_and_data
end
@doc false
def available({:call, from}, {:perform, path}, data) do
data = %{data | cycles: data.cycles + 1, request: {path, from, :erlang.now()}}
arg =
case path do
path when is_binary(path) -> {:file, path}
{:bytes, bytes} -> {:bytes, bytes}
end
send(data.port, arg)
{:next_state, :processing, data}
end
@doc false
def available({:call, from}, :status, data) do
handle_status_call(from, :available, data)
end
@doc false
def processing(:enter, _old_state, %{request: {_path, _from, _time}} = data) do
{:keep_state_and_data, data.process_timeout}
end
@doc false
def processing({:call, _from}, {:perform, _path}, _data) do
{:keep_state_and_data, :postpone}
end
@doc false
def processing({:call, from}, :status, data) do
handle_status_call(from, :processing, data)
end
@doc false
def processing(:info, {port, {:data, response}}, %{port: port} = data) do
{_, from, _} = data.request
data = %{data | request: nil}
response = {:reply, from, handle_response(response)}
next_state = (data.cycles >= data.recycle_threshold && :recycling) || :available
{:next_state, next_state, data, [response, :hibernate]}
end
@doc false
def recycling(:enter, _, %{request: nil, port: port} = data) when is_port(port) do
send(data.port, {:stop, :recycle})
{:keep_state_and_data, data.startup_timeout}
end
@doc false
def recycling({:call, _from}, {:perform, _path}, _data) do
{:keep_state_and_data, :postpone}
end
@doc false
def recycling({:call, from}, :status, data) do
handle_status_call(from, :recycling, data)
end
@doc false
def recycling(:info, {port, {:exit_status, 0}}, %{port: port} = data) do
{:next_state, :starting, %{data | port: nil, cycles: 0}}
end
defp send(port, command) do
Kernel.send(port, {self(), {:command, :erlang.term_to_binary(command)}})
end
@errnos %{
2 => :enoent,
13 => :eaccess,
21 => :eisdir,
20 => :enotdir,
12 => :enomem,
- 24 => :emfile
+ 24 => :emfile,
+ 36 => :enametoolong,
}
@errno Map.keys(@errnos)
defp handle_response(data) do
case :erlang.binary_to_term(data) do
{:ok, {mime_type, encoding, content}} -> {:ok, Result.build(mime_type, encoding, content)}
{:error, {errno, _}} when errno in @errno -> {:error, @errnos[errno]}
{:error, {errno, string}} -> {:error, "#{errno}: #{string}"}
end
end
defp handle_status_call(from, state, data) do
response = {:ok, %__MODULE__.Status{state: state, cycles: data.cycles}}
{:keep_state_and_data, {:reply, from, response}}
end
end
diff --git a/src/apprentice.c b/src/apprentice.c
index b3c578f..33c42b5 100644
--- a/src/apprentice.c
+++ b/src/apprentice.c
@@ -1,396 +1,406 @@
//
// The Sorcerer’s Apprentice
//
// To use this program, compile it with dynamically linked libmagic, as mirrored
// at https://github.com/threatstack/libmagic. You may install it with apt-get,
// yum or brew. Refer to the Makefile for further reference.
//
// This program is designed to run interactively as a backend daemon to the
// GenMagic library, and follows the command line pattern:
//
// $ apprentice --database-file <file> --database-default
//
// Where each argument either refers to a compiled or uncompiled magic database,
// or the default database. They will be loaded in the sequence that they were
// specified. Note that you must specify at least one database. Erlang Term
//
// -- main: send atom ready
// enter loop
//
// -- while
// get {:file, path} -> process_file -> ok | error
// {:bytes, path} -> process_bytes -> ok | error
// ok: {:ok, {type, encoding, name}}
// error: {:error, :badarg} | {:error, {errno, String.t()}}
// {:stop, _} -> exit(ERROR_OK) -> exit 0
//
#include <ei.h>
#include <errno.h>
#include <getopt.h>
#include <libgen.h>
#include <magic.h>
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>
#define USAGE "[--database-file <path/to/magic.mgc> | --database-default, ...]"
#define DELIMITER "\t"
#define ERROR_OK 0
#define ERROR_NO_DATABASE 1
#define ERROR_NO_ARGUMENT 2
#define ERROR_MISSING_DATABASE 3
#define ERROR_BAD_TERM 4
#define ERROR_EI 5
#define ANSI_INFO "\x1b[37m" // gray
#define ANSI_OK "\x1b[32m" // green
#define ANSI_ERROR "\x1b[31m" // red
#define ANSI_IGNORE "\x1b[90m" // red
#define ANSI_RESET "\x1b[0m"
#define MAGIC_FLAGS_COMMON (MAGIC_CHECK | MAGIC_ERROR)
magic_t magic_setup(int flags);
typedef char byte;
-int read_cmd(byte *buf);
-int write_cmd(byte *buf, int len);
-
void setup_environment();
void setup_options(int argc, char **argv);
void setup_options_file(char *optarg);
void setup_options_default();
void setup_system();
int process_command(byte *buf);
void process_line(char *line);
void process_file(char *path, ei_x_buff *result);
void process_bytes(char *bytes, int size, ei_x_buff *result);
+size_t read_cmd(byte *buf);
+size_t write_cmd(byte *buf, size_t len);
void error(ei_x_buff *result, const char *error);
void handle_magic_error(magic_t handle, int errn, ei_x_buff *result);
struct magic_file {
struct magic_file *prev;
struct magic_file *next;
char *path;
};
static struct magic_file *magic_database;
static magic_t magic_mime_type; // MAGIC_MIME_TYPE
static magic_t magic_mime_encoding; // MAGIC_MIME_ENCODING
static magic_t magic_type_name; // MAGIC_NONE
int main(int argc, char **argv) {
ei_init();
setup_environment();
setup_options(argc, argv);
setup_system();
ei_x_buff ok_buf;
if (ei_x_new_with_version(&ok_buf) || ei_x_encode_atom(&ok_buf, "ready"))
- return 5;
+ exit(ERROR_EI);
write_cmd(ok_buf.buff, ok_buf.index);
if (ei_x_free(&ok_buf) != 0)
exit(ERROR_EI);
- byte buf[4111];
+ byte buf[4112];
while (read_cmd(buf) > 0) {
process_command(buf);
}
- return 0;
+ return 255;
}
int process_command(byte *buf) {
ei_x_buff result;
char atom[128];
int index, version, arity, termtype, termsize;
index = 0;
- if (ei_decode_version(buf, &index, &version) != 0)
+ if (ei_decode_version(buf, &index, &version) != 0) {
exit(ERROR_BAD_TERM);
+ }
// Initialize result
- if (ei_x_new_with_version(&result) || ei_x_encode_tuple_header(&result, 2))
+ if (ei_x_new_with_version(&result) || ei_x_encode_tuple_header(&result, 2)) {
exit(ERROR_EI);
+ }
if (ei_decode_tuple_header(buf, &index, &arity) != 0) {
error(&result, "badarg");
return 1;
}
if (arity != 2) {
error(&result, "badarg");
return 1;
}
if (ei_decode_atom(buf, &index, atom) != 0) {
error(&result, "badarg");
return 1;
}
if (strlen(atom) == 4 && strncmp(atom, "file", 4) == 0) {
char path[4097];
ei_get_type(buf, &index, &termtype, &termsize);
if (termtype == ERL_BINARY_EXT && termsize < 4096) {
long bin_length;
ei_decode_binary(buf, &index, path, &bin_length);
path[termsize] = '\0';
process_file(path, &result);
} else {
error(&result, "badarg");
return 1;
}
} else if (strlen(atom) == 5 && strncmp(atom, "bytes", 5) == 0) {
int termtype;
int termsize;
char bytes[51];
ei_get_type(buf, &index, &termtype, &termsize);
if (termtype == ERL_BINARY_EXT && termsize < 50) {
long bin_length;
ei_decode_binary(buf, &index, bytes, &bin_length);
bytes[termsize] = '\0';
process_bytes(bytes, termsize, &result);
} else {
error(&result, "badarg");
return 1;
}
} else if (strlen(atom) == 4 && strncmp(atom, "stop", 4) == 0) {
exit(ERROR_OK);
} else {
error(&result, "badarg");
return 1;
}
write_cmd(result.buff, result.index);
- if (ei_x_free(&result) != 0)
+ if (ei_x_free(&result) != 0) {
exit(ERROR_EI);
+ }
return 0;
}
void setup_environment() { opterr = 0; }
void setup_options(int argc, char **argv) {
const char *option_string = "f:";
static struct option long_options[] = {
{"database-file", required_argument, 0, 'f'},
{"database-default", no_argument, 0, 'd'},
{0, 0, 0, 0}};
int option_character;
while (1) {
int option_index = 0;
option_character =
getopt_long(argc, argv, option_string, long_options, &option_index);
if (-1 == option_character) {
break;
}
switch (option_character) {
case 'f': {
setup_options_file(optarg);
break;
}
case 'd': {
setup_options_default();
break;
}
case '?':
default: {
exit(ERROR_NO_ARGUMENT);
break;
}
}
}
}
void setup_options_file(char *optarg) {
if (0 != access(optarg, R_OK)) {
exit(ERROR_MISSING_DATABASE);
}
struct magic_file *next = malloc(sizeof(struct magic_file));
size_t path_length = strlen(optarg) + 1;
char *path = malloc(path_length);
memcpy(path, optarg, path_length);
next->path = path;
next->prev = magic_database;
if (magic_database) {
magic_database->next = next;
}
magic_database = next;
}
void setup_options_default() {
struct magic_file *next = malloc(sizeof(struct magic_file));
next->path = NULL;
next->prev = magic_database;
if (magic_database) {
magic_database->next = next;
}
magic_database = next;
}
void setup_system() {
magic_mime_encoding = magic_setup(MAGIC_FLAGS_COMMON | MAGIC_MIME_ENCODING);
magic_mime_type = magic_setup(MAGIC_FLAGS_COMMON | MAGIC_MIME_TYPE);
magic_type_name = magic_setup(MAGIC_FLAGS_COMMON | MAGIC_NONE);
}
magic_t magic_setup(int flags) {
magic_t magic = magic_open(flags);
struct magic_file *current_database = magic_database;
if (!current_database) {
exit(ERROR_NO_DATABASE);
}
while (current_database->prev) {
current_database = current_database->prev;
}
while (current_database) {
magic_load(magic, current_database->path);
current_database = current_database->next;
}
return magic;
}
void process_bytes(char *path, int size, ei_x_buff *result) {
const char *mime_type_result = magic_buffer(magic_mime_type, path, size);
const int mime_type_errno = magic_errno(magic_mime_type);
if (mime_type_errno > 0) {
handle_magic_error(magic_mime_type, mime_type_errno, result);
return;
}
- const char *mime_encoding_result = magic_buffer(magic_mime_encoding, path, size);
+ const char *mime_encoding_result =
+ magic_buffer(magic_mime_encoding, path, size);
int mime_encoding_errno = magic_errno(magic_mime_encoding);
if (mime_encoding_errno > 0) {
handle_magic_error(magic_mime_encoding, mime_encoding_errno, result);
return;
}
const char *type_name_result = magic_buffer(magic_type_name, path, size);
int type_name_errno = magic_errno(magic_type_name);
if (type_name_errno > 0) {
handle_magic_error(magic_type_name, type_name_errno, result);
return;
}
ei_x_encode_atom(result, "ok");
ei_x_encode_tuple_header(result, 3);
ei_x_encode_binary(result, mime_type_result, strlen(mime_type_result));
ei_x_encode_binary(result, mime_encoding_result,
strlen(mime_encoding_result));
ei_x_encode_binary(result, type_name_result, strlen(type_name_result));
return;
}
void handle_magic_error(magic_t handle, int errn, ei_x_buff *result) {
const char *error = magic_error(handle);
ei_x_encode_atom(result, "error");
ei_x_encode_tuple_header(result, 2);
long errlon = (long)errn;
ei_x_encode_long(result, errlon);
ei_x_encode_binary(result, error, strlen(error));
return;
}
void process_file(char *path, ei_x_buff *result) {
const char *mime_type_result = magic_file(magic_mime_type, path);
const int mime_type_errno = magic_errno(magic_mime_type);
if (mime_type_errno > 0) {
handle_magic_error(magic_mime_type, mime_type_errno, result);
return;
}
const char *mime_encoding_result = magic_file(magic_mime_encoding, path);
int mime_encoding_errno = magic_errno(magic_mime_encoding);
if (mime_encoding_errno > 0) {
handle_magic_error(magic_mime_encoding, mime_encoding_errno, result);
return;
}
const char *type_name_result = magic_file(magic_type_name, path);
int type_name_errno = magic_errno(magic_type_name);
if (type_name_errno > 0) {
handle_magic_error(magic_type_name, type_name_errno, result);
return;
}
ei_x_encode_atom(result, "ok");
ei_x_encode_tuple_header(result, 3);
ei_x_encode_binary(result, mime_type_result, strlen(mime_type_result));
ei_x_encode_binary(result, mime_encoding_result,
strlen(mime_encoding_result));
ei_x_encode_binary(result, type_name_result, strlen(type_name_result));
return;
}
-// From https://erlang.org/doc/tutorial/erl_interface.html
-int read_exact(byte *buf, int len) {
+// Adapted from https://erlang.org/doc/tutorial/erl_interface.html
+// Changed `read_cmd`, the original one was buggy given some length (due to
+// endinaness).
+// TODO: Check if `write_cmd` exhibits the same issue.
+size_t read_exact(byte *buf, size_t len) {
int i, got = 0;
do {
if ((i = read(0, buf + got, len - got)) <= 0) {
return (i);
}
got += i;
} while (got < len);
return (len);
}
-int write_exact(byte *buf, int len) {
+size_t write_exact(byte *buf, size_t len) {
int i, wrote = 0;
do {
if ((i = write(1, buf + wrote, len - wrote)) <= 0)
return (i);
wrote += i;
} while (wrote < len);
return (len);
}
-int read_cmd(byte *buf) {
- int len;
-
- if (read_exact(buf, 2) != 2)
- return (-1);
- len = (buf[0] << 8) | buf[1];
- return read_exact(buf, len);
+size_t read_cmd(byte *buf) {
+ int i;
+ if ((i = read(0, buf, sizeof(uint16_t))) <= 0) {
+ return (i);
+ }
+ uint16_t len16 = *(uint16_t *)buf;
+ len16 = ntohs(len16);
+ if (len16 > 4111) {
+ exit(ERROR_BAD_TERM);
+ }
+ return read_exact(buf, len16);
}
-int write_cmd(byte *buf, int len) {
+size_t write_cmd(byte *buf, size_t len) {
byte li;
li = (len >> 8) & 0xff;
write_exact(&li, 1);
li = len & 0xff;
write_exact(&li, 1);
return write_exact(buf, len);
}
void error(ei_x_buff *result, const char *error) {
ei_x_encode_atom(result, "error");
ei_x_encode_atom(result, error);
write_cmd(result->buff, result->index);
if (ei_x_free(result) != 0)
exit(ERROR_EI);
}

File Metadata

Mime Type
text/x-diff
Expires
Fri, Nov 22, 7:38 PM (21 h, 57 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
38702
Default Alt Text
(21 KB)

Event Timeline