Page MenuHomePhorge

No OneTemporary

Size
23 KB
Referenced Files
None
Subscribers
None
diff --git a/lib/gen_magic/helpers.ex b/lib/gen_magic/helpers.ex
index 183fe31..13ab3de 100644
--- a/lib/gen_magic/helpers.ex
+++ b/lib/gen_magic/helpers.ex
@@ -1,28 +1,30 @@
defmodule GenMagic.Helpers do
@moduledoc """
Contains convenience functions for one-off use.
"""
alias GenMagic.Result
alias GenMagic.Server
- @spec perform_once(Path.t(), [Server.option()]) :: {:ok, Result.t()} | {:error, term()}
+
+ @spec perform_once(Path.t() | {:bytes, binary}, [Server.option()]) ::
+ {:ok, Result.t()} | {:error, term()}
@doc """
Runs a one-shot process without supervision.
Useful in tests, but not recommended for actual applications.
## Example
iex(1)> {:ok, result} = GenMagic.Helpers.perform_once(".")
iex(2)> result
%GenMagic.Result{content: "directory", encoding: "binary", mime_type: "inode/directory"}
"""
def perform_once(path, options \\ []) do
with {:ok, pid} <- Server.start_link(options),
{:ok, result} <- Server.perform(pid, path),
:ok <- Server.stop(pid) do
{:ok, result}
end
end
end
diff --git a/lib/gen_magic/server.ex b/lib/gen_magic/server.ex
index c0f07d7..f057dda 100644
--- a/lib/gen_magic/server.ex
+++ b/lib/gen_magic/server.ex
@@ -1,303 +1,301 @@
defmodule GenMagic.Server do
@moduledoc """
Provides access to the underlying libmagic client, which performs file introspection.
The Server needs to be supervised, since it will terminate if it receives any unexpected error.
"""
@behaviour :gen_statem
alias GenMagic.Result
alias GenMagic.Server.Data
alias GenMagic.Server.Status
import Kernel, except: [send: 2]
@typedoc """
Represents the reference to the underlying server, as returned by `:gen_statem`.
"""
@type t :: :gen_statem.server_ref()
@typedoc """
Represents values accepted as startup options, which can be passed to `start_link/1`.
- `:name`: If present, this will be the registered name for the underlying process.
Note that `:gen_statem` requires `{:local, name}`, but given widespread GenServer convention,
atoms are accepted and will be converted to `{:local, name}`.
- `:startup_timeout`: Specifies how long the Server waits for the C program to initialise.
However, if the underlying C program exits, then the process exits immediately.
Can be set to `:infinity`.
- `:process_timeout`: Specifies how long the Server waits for each request to complete.
Can be set to `:infinity`.
Please note that, if you have chosen a custom timeout value, you should also pass it when
using `GenMagic.Server.perform/3`.
- `:recycle_threshold`: Specifies the number of requests processed before the underlying C
program is recycled.
Can be set to `:infinity` if you do not wish for the program to be recycled.
- `:database_patterns`: Specifies what magic databases to load; you can specify a list of either
Path Patterns (see `Path.wildcard/2`) or `:default` to instruct the C program to load the
appropriate databases.
For example, if you have had to add custom magics, then you can set this value to:
[:default, "path/to/my/magic"]
"""
@type option ::
{:name, atom() | :gen_statem.server_name()}
| {:startup_timeout, timeout()}
| {:process_timeout, timeout()}
| {:recycle_threshold, non_neg_integer() | :infinity}
| {:database_patterns, nonempty_list(:default | Path.t())}
@typedoc """
Current state of the Server:
- `:pending`: This is the initial state; the Server will attempt to start the underlying Port
and the libmagic client, then automatically transition to either Available or Crashed.
- `:available`: This is the default state. In this state the Server is able to accept requests
and they will be replied in the same order.
- `:processing`: This is the state the Server will be in if it is processing requests. In this
state, further requests can still be lodged and they will be processed when the Server is
available again.
For proper concurrency, use a process pool like Poolboy, Sbroker, etc.
- `:recycling`: This is the state the Server will be in, if its underlying C program needs to be
recycled. This state is triggered whenever the cycle count reaches the defined value as per
`:recycle_threshold`.
In this state, the Server is able to accept requests, but they will not be processed until the
underlying C server program has been started again.
"""
@type state :: :starting | :processing | :available | :recycling
@spec child_spec([option()]) :: Supervisor.child_spec()
@spec start_link([option()]) :: :gen_statem.start_ret()
- @spec perform(t(), Path.t(), timeout()) :: {:ok, Result.t()} | {:error, term() | String.t()}
+ @spec perform(t(), Path.t() | {:bytes, binary()}, timeout()) ::
+ {:ok, Result.t()} | {:error, term() | String.t()}
@spec status(t(), timeout()) :: {:ok, Status.t()} | {:error, term()}
@spec stop(t(), term(), timeout()) :: :ok
@doc """
Returns the default Child Specification for this Server for use in Supervisors.
You can override this with `Supervisor.child_spec/2` as required.
"""
def child_spec(options) do
%{
id: __MODULE__,
start: {__MODULE__, :start_link, [options]},
type: :worker,
restart: :permanent,
shutdown: 500
}
end
@doc """
Starts a new Server.
See `t:option/0` for further details.
"""
def start_link(options) do
{name, options} = Keyword.pop(options, :name)
case name do
nil -> :gen_statem.start_link(__MODULE__, options, [])
name when is_atom(name) -> :gen_statem.start_link({:local, name}, __MODULE__, options, [])
{:global, _} -> :gen_statem.start_link(name, __MODULE__, options, [])
{:via, _, _} -> :gen_statem.start_link(name, __MODULE__, options, [])
{:local, _} -> :gen_statem.start_link(name, __MODULE__, options, [])
end
end
@doc """
Determines the type of the file provided.
"""
def perform(server_ref, path, timeout \\ 5000) do
case :gen_statem.call(server_ref, {:perform, path}, timeout) do
{:ok, %Result{} = result} -> {:ok, result}
{:error, reason} -> {:error, reason}
end
end
@doc """
Returns status of the Server.
"""
def status(server_ref, timeout \\ 5000) do
:gen_statem.call(server_ref, :status, timeout)
end
@doc """
Stops the Server with reason `:normal` and timeout `:infinity`.
"""
def stop(server_ref) do
:gen_statem.stop(server_ref)
end
@doc """
Stops the Server with the specified reason and timeout.
"""
def stop(server_ref, reason, timeout) do
:gen_statem.stop(server_ref, reason, timeout)
end
@impl :gen_statem
def init(options) do
import GenMagic.Config
data = %Data{
port_name: get_port_name(),
port_options: get_port_options(options),
startup_timeout: get_startup_timeout(options),
process_timeout: get_process_timeout(options),
recycle_threshold: get_recycle_threshold(options)
}
{:ok, :starting, data}
end
@impl :gen_statem
def callback_mode do
[:state_functions, :state_enter]
end
@doc false
def starting(:enter, _, %{request: nil, port: nil} = data) do
port = Port.open(data.port_name, data.port_options)
{:keep_state, %{data | port: port}, data.startup_timeout}
end
@doc false
def starting({:call, from}, :status, data) do
handle_status_call(from, :starting, data)
end
@doc false
def starting({:call, _from}, {:perform, _path}, _data) do
{:keep_state_and_data, :postpone}
end
@doc false
- def starting(:info, {port, {:data, binary}}, %{port: port} = data) do
- case :erlang.binary_to_term(binary) do
- :ready ->
- {:next_state, :available, data}
+ def starting(:info, {port, {:data, ready}}, %{port: port} = data) do
+ case :erlang.binary_to_term(ready) do
+ :ready -> {:next_state, :available, data}
end
end
def starting(:info, {port, {:exit_status, code}}, %{port: port} = data) do
error =
case code do
1 -> :no_database
2 -> :no_argument
3 -> :missing_database
+ code -> {:unexpected_error, code}
end
{:stop, {:error, error}, data}
end
@doc false
def available(:enter, _old_state, %{request: nil}) do
:keep_state_and_data
end
@doc false
def available({:call, from}, {:perform, path}, data) do
data = %{data | cycles: data.cycles + 1, request: {path, from, :erlang.now()}}
arg =
case path do
path when is_binary(path) -> {:file, path}
{:bytes, bytes} -> {:bytes, bytes}
end
send(data.port, arg)
{:next_state, :processing, data}
end
@doc false
def available({:call, from}, :status, data) do
handle_status_call(from, :available, data)
end
@doc false
def processing(:enter, _old_state, %{request: {_path, _from, _time}} = data) do
{:keep_state_and_data, data.process_timeout}
end
@doc false
def processing({:call, _from}, {:perform, _path}, _data) do
{:keep_state_and_data, :postpone}
end
@doc false
def processing({:call, from}, :status, data) do
handle_status_call(from, :processing, data)
end
@doc false
- def processing(:info, {port, {:data, response}}, %{port: port} = data) do
- {_, from, _} = data.request
- data = %{data | request: nil}
+ def processing(:info, {port, {:data, response}}, %{port: port, request: {_, from, _}} = data) do
response = {:reply, from, handle_response(response)}
next_state = (data.cycles >= data.recycle_threshold && :recycling) || :available
- {:next_state, next_state, data, [response, :hibernate]}
+ {:next_state, next_state, %{data | request: nil}, [response, :hibernate]}
end
@doc false
def recycling(:enter, _, %{request: nil, port: port} = data) when is_port(port) do
send(data.port, {:stop, :recycle})
{:keep_state_and_data, data.startup_timeout}
end
@doc false
def recycling({:call, _from}, {:perform, _path}, _data) do
{:keep_state_and_data, :postpone}
end
@doc false
def recycling({:call, from}, :status, data) do
handle_status_call(from, :recycling, data)
end
@doc false
def recycling(:info, {port, {:exit_status, 0}}, %{port: port} = data) do
{:next_state, :starting, %{data | port: nil, cycles: 0}}
end
defp send(port, command) do
Kernel.send(port, {self(), {:command, :erlang.term_to_binary(command)}})
end
@errnos %{
2 => :enoent,
13 => :eaccess,
- 21 => :eisdir,
20 => :enotdir,
12 => :enomem,
24 => :emfile,
36 => :enametoolong
}
@errno Map.keys(@errnos)
defp handle_response(data) do
case :erlang.binary_to_term(data) do
{:ok, {mime_type, encoding, content}} -> {:ok, Result.build(mime_type, encoding, content)}
{:error, {errno, _}} when errno in @errno -> {:error, @errnos[errno]}
{:error, {errno, string}} -> {:error, "#{errno}: #{string}"}
{:error, _} = error -> error
end
end
defp handle_status_call(from, state, data) do
response = {:ok, %__MODULE__.Status{state: state, cycles: data.cycles}}
{:keep_state_and_data, {:reply, from, response}}
end
end
diff --git a/src/apprentice.c b/src/apprentice.c
index 1afd53c..10c09b9 100644
--- a/src/apprentice.c
+++ b/src/apprentice.c
@@ -1,445 +1,444 @@
//
// The Sorcerer’s Apprentice
//
// To use this program, compile it with dynamically linked libmagic, as mirrored
// at https://github.com/file/file. You may install it with apt-get,
// yum or brew. Refer to the Makefile for further reference.
//
// This program is designed to run interactively as a backend daemon to the
// GenMagic library, and follows the command line pattern:
//
// $ apprentice --database-file <file> --database-default
//
// Where each argument either refers to a compiled or uncompiled magic database,
// or the default database. They will be loaded in the sequence that they were
// specified. Note that you must specify at least one database.
//
// Communication is done over STDIN/STDOUT as binary packets of 2 bytes length
// plus X bytes payload, where the payload is an erlang term encoded with
// :erlang.term_to_binary/1 and decoded with :erlang.binary_to_term/1.
//
// Once the program is ready, it sends the `:ready` atom. The startup can fail
// for multiples reasons, and the program will exit accordingly:
// - 1: No database
// - 2: Missing/Bad argument
// - 3: Missing database
//
// Commands are sent to the program STDIN as an erlang term of `{Operation,
// Argument}`, and response of `{:ok | :error, Response}`.
//
// Invalid packets will cause the program to exit (exit code 4). This will
// happen if your Erlang Term format doesn't match the version the program has
// been compiled with, or if you send a command too huge.
//
// The program may exit with error codes 5 or 255 if something went wrong (such
// as error allocating terms, or if stdin is lost).
//
// Commands:
// {:file, path :: String.t()} :: {:ok, {type, encoding, name}} | {:error,
// :badarg} | {:error, {errno :: integer(), String.t()}}
// {:bytes, binary()} :: same as :file
// {:stop, reason :: atom()} :: exit 0
#include <ei.h>
#include <errno.h>
#include <getopt.h>
#include <libgen.h>
#include <magic.h>
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>
#define ERROR_OK 0
#define ERROR_NO_DATABASE 1
#define ERROR_NO_ARGUMENT 2
#define ERROR_MISSING_DATABASE 3
#define ERROR_BAD_TERM 4
#define ERROR_EI 5
// We use a bigger than possible valid command length (around 4111 bytes) to
// allow more precise errors when using too long paths.
#define COMMAND_LEN 8000
#define COMMAND_BUFFER_SIZE COMMAND_LEN + 1
#define MAGIC_FLAGS_COMMON (MAGIC_CHECK | MAGIC_ERROR)
magic_t magic_setup(int flags);
typedef char byte;
void setup_environment();
void setup_options(int argc, char **argv);
void setup_options_file(char *optarg);
void setup_options_default();
void setup_system();
int process_command(uint16_t len, byte *buf);
void process_file(char *path, ei_x_buff *result);
void process_bytes(char *bytes, int size, ei_x_buff *result);
size_t read_cmd(byte *buf);
size_t write_cmd(byte *buf, size_t len);
void error(ei_x_buff *result, const char *error);
void handle_magic_error(magic_t handle, int errn, ei_x_buff *result);
void fdseek(uint16_t count);
struct magic_file {
struct magic_file *prev;
struct magic_file *next;
char *path;
};
static struct magic_file *magic_database;
static magic_t magic_mime_type; // MAGIC_MIME_TYPE
static magic_t magic_mime_encoding; // MAGIC_MIME_ENCODING
static magic_t magic_type_name; // MAGIC_NONE
int main(int argc, char **argv) {
ei_init();
setup_environment();
setup_options(argc, argv);
setup_system();
ei_x_buff ok_buf;
if (ei_x_new_with_version(&ok_buf) || ei_x_encode_atom(&ok_buf, "ready"))
exit(ERROR_EI);
write_cmd(ok_buf.buff, ok_buf.index);
if (ei_x_free(&ok_buf) != 0)
exit(ERROR_EI);
byte buf[COMMAND_BUFFER_SIZE];
uint16_t len;
while ((len = read_cmd(buf)) > 0) {
process_command(len, buf);
}
return 255;
}
int process_command(uint16_t len, byte *buf) {
ei_x_buff result;
char atom[128];
int index, version, arity, termtype, termsize;
index = 0;
// Initialize result
if (ei_x_new_with_version(&result) || ei_x_encode_tuple_header(&result, 2)) {
exit(ERROR_EI);
}
if (len >= COMMAND_LEN) {
error(&result, "badarg");
return 1;
}
if (ei_decode_version(buf, &index, &version) != 0) {
exit(ERROR_BAD_TERM);
}
if (ei_decode_tuple_header(buf, &index, &arity) != 0) {
error(&result, "badarg");
return 1;
}
if (arity != 2) {
error(&result, "badarg");
return 1;
}
if (ei_decode_atom(buf, &index, atom) != 0) {
error(&result, "badarg");
return 1;
}
if (strlen(atom) == 4 && strncmp(atom, "file", 4) == 0) {
char path[4097];
ei_get_type(buf, &index, &termtype, &termsize);
if (termtype == ERL_BINARY_EXT) {
if (termsize < 4096) {
long bin_length;
ei_decode_binary(buf, &index, path, &bin_length);
path[termsize] = '\0';
process_file(path, &result);
} else {
error(&result, "enametoolong");
return 1;
}
} else {
error(&result, "badarg");
return 1;
}
} else if (strlen(atom) == 5 && strncmp(atom, "bytes", 5) == 0) {
int termtype;
int termsize;
char bytes[51];
ei_get_type(buf, &index, &termtype, &termsize);
if (termtype == ERL_BINARY_EXT && termsize < 50) {
long bin_length;
ei_decode_binary(buf, &index, bytes, &bin_length);
bytes[termsize] = '\0';
process_bytes(bytes, termsize, &result);
} else {
error(&result, "badarg");
return 1;
}
} else if (strlen(atom) == 4 && strncmp(atom, "stop", 4) == 0) {
exit(ERROR_OK);
} else {
error(&result, "badarg");
return 1;
}
- // Empty the buffer.
write_cmd(result.buff, result.index);
if (ei_x_free(&result) != 0) {
exit(ERROR_EI);
}
return 0;
}
void setup_environment() { opterr = 0; }
void setup_options(int argc, char **argv) {
const char *option_string = "f:";
static struct option long_options[] = {
{"database-file", required_argument, 0, 'f'},
{"database-default", no_argument, 0, 'd'},
{0, 0, 0, 0}};
int option_character;
while (1) {
int option_index = 0;
option_character =
getopt_long(argc, argv, option_string, long_options, &option_index);
if (-1 == option_character) {
break;
}
switch (option_character) {
case 'f': {
setup_options_file(optarg);
break;
}
case 'd': {
setup_options_default();
break;
}
case '?':
default: {
exit(ERROR_NO_ARGUMENT);
break;
}
}
}
}
void setup_options_file(char *optarg) {
if (0 != access(optarg, R_OK)) {
exit(ERROR_MISSING_DATABASE);
}
struct magic_file *next = malloc(sizeof(struct magic_file));
size_t path_length = strlen(optarg) + 1;
char *path = malloc(path_length);
memcpy(path, optarg, path_length);
next->path = path;
next->prev = magic_database;
if (magic_database) {
magic_database->next = next;
}
magic_database = next;
}
void setup_options_default() {
struct magic_file *next = malloc(sizeof(struct magic_file));
next->path = NULL;
next->prev = magic_database;
if (magic_database) {
magic_database->next = next;
}
magic_database = next;
}
void setup_system() {
magic_mime_encoding = magic_setup(MAGIC_FLAGS_COMMON | MAGIC_MIME_ENCODING);
magic_mime_type = magic_setup(MAGIC_FLAGS_COMMON | MAGIC_MIME_TYPE);
magic_type_name = magic_setup(MAGIC_FLAGS_COMMON | MAGIC_NONE);
}
magic_t magic_setup(int flags) {
magic_t magic = magic_open(flags);
struct magic_file *current_database = magic_database;
if (!current_database) {
exit(ERROR_NO_DATABASE);
}
while (current_database->prev) {
current_database = current_database->prev;
}
while (current_database) {
magic_load(magic, current_database->path);
current_database = current_database->next;
}
return magic;
}
void process_bytes(char *path, int size, ei_x_buff *result) {
const char *mime_type_result = magic_buffer(magic_mime_type, path, size);
const int mime_type_errno = magic_errno(magic_mime_type);
if (mime_type_errno > 0) {
handle_magic_error(magic_mime_type, mime_type_errno, result);
return;
}
const char *mime_encoding_result =
magic_buffer(magic_mime_encoding, path, size);
int mime_encoding_errno = magic_errno(magic_mime_encoding);
if (mime_encoding_errno > 0) {
handle_magic_error(magic_mime_encoding, mime_encoding_errno, result);
return;
}
const char *type_name_result = magic_buffer(magic_type_name, path, size);
int type_name_errno = magic_errno(magic_type_name);
if (type_name_errno > 0) {
handle_magic_error(magic_type_name, type_name_errno, result);
return;
}
ei_x_encode_atom(result, "ok");
ei_x_encode_tuple_header(result, 3);
ei_x_encode_binary(result, mime_type_result, strlen(mime_type_result));
ei_x_encode_binary(result, mime_encoding_result,
strlen(mime_encoding_result));
ei_x_encode_binary(result, type_name_result, strlen(type_name_result));
return;
}
void handle_magic_error(magic_t handle, int errn, ei_x_buff *result) {
const char *error = magic_error(handle);
ei_x_encode_atom(result, "error");
ei_x_encode_tuple_header(result, 2);
long errlon = (long)errn;
ei_x_encode_long(result, errlon);
ei_x_encode_binary(result, error, strlen(error));
return;
}
void process_file(char *path, ei_x_buff *result) {
const char *mime_type_result = magic_file(magic_mime_type, path);
const int mime_type_errno = magic_errno(magic_mime_type);
if (mime_type_errno > 0) {
handle_magic_error(magic_mime_type, mime_type_errno, result);
return;
}
const char *mime_encoding_result = magic_file(magic_mime_encoding, path);
int mime_encoding_errno = magic_errno(magic_mime_encoding);
if (mime_encoding_errno > 0) {
handle_magic_error(magic_mime_encoding, mime_encoding_errno, result);
return;
}
const char *type_name_result = magic_file(magic_type_name, path);
int type_name_errno = magic_errno(magic_type_name);
if (type_name_errno > 0) {
handle_magic_error(magic_type_name, type_name_errno, result);
return;
}
ei_x_encode_atom(result, "ok");
ei_x_encode_tuple_header(result, 3);
ei_x_encode_binary(result, mime_type_result, strlen(mime_type_result));
ei_x_encode_binary(result, mime_encoding_result,
strlen(mime_encoding_result));
ei_x_encode_binary(result, type_name_result, strlen(type_name_result));
return;
}
// Adapted from https://erlang.org/doc/tutorial/erl_interface.html
// Changed `read_cmd`, the original one was buggy given some length (due to
// endinaness).
// TODO: Check if `write_cmd` exhibits the same issue.
size_t read_exact(byte *buf, size_t len) {
int i, got = 0;
do {
if ((i = read(0, buf + got, len - got)) <= 0) {
return (i);
}
got += i;
} while (got < len);
return (len);
}
size_t write_exact(byte *buf, size_t len) {
int i, wrote = 0;
do {
if ((i = write(1, buf + wrote, len - wrote)) <= 0)
return (i);
wrote += i;
} while (wrote < len);
return (len);
}
size_t read_cmd(byte *buf) {
int i;
if ((i = read(0, buf, sizeof(uint16_t))) <= 0) {
return (i);
}
uint16_t len16 = *(uint16_t *)buf;
len16 = ntohs(len16);
// Buffer isn't large enough: just return possible len, without reading.
// Up to the caller of verifying the size again and return an error.
- // buf left unchanged.
+ // buf left unchanged, stdin emptied of X bytes.
if (len16 > COMMAND_LEN) {
fdseek(len16);
return len16;
}
return read_exact(buf, len16);
}
size_t write_cmd(byte *buf, size_t len) {
byte li;
li = (len >> 8) & 0xff;
write_exact(&li, 1);
li = len & 0xff;
write_exact(&li, 1);
return write_exact(buf, len);
}
void error(ei_x_buff *result, const char *error) {
ei_x_encode_atom(result, "error");
ei_x_encode_atom(result, error);
write_cmd(result->buff, result->index);
if (ei_x_free(result) != 0)
exit(ERROR_EI);
}
void fdseek(uint16_t count) {
int i = 0;
while (i < count) {
getchar();
i += 1;
}
}

File Metadata

Mime Type
text/x-diff
Expires
Mon, Nov 25, 12:40 AM (1 d, 9 h)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
39523
Default Alt Text
(23 KB)

Event Timeline