Merge branch 'ls/filter-process'
The smudge/clean filter API expect an external process is spawned to filter the contents for each path that has a filter defined. A new type of "process" filter API has been added to allow the first request to run the filter for a path to spawn a single process, and all filtering need is served by this single process for multiple paths, reducing the process creation overhead. * ls/filter-process: contrib/long-running-filter: add long running filter example convert: add filter.<driver>.process option convert: prepare filter.<driver>.process option convert: make apply_filter() adhere to standard Git error handling pkt-line: add functions to read/write flush terminated packet streams pkt-line: add packet_write_gently() pkt-line: add packet_flush_gently() pkt-line: add packet_write_fmt_gently() pkt-line: extract set_packet_header() pkt-line: rename packet_write() to packet_write_fmt() run-command: add clean_on_exit_handler run-command: move check_pipe() from write_or_die to run_command convert: modernize tests convert: quote filter names in error messages
This commit is contained in:
376
convert.c
376
convert.c
@ -3,6 +3,7 @@
|
||||
#include "run-command.h"
|
||||
#include "quote.h"
|
||||
#include "sigchain.h"
|
||||
#include "pkt-line.h"
|
||||
|
||||
/*
|
||||
* convert.c - convert a file when checking it out and checking it in.
|
||||
@ -416,7 +417,7 @@ static int filter_buffer_or_fd(int in, int out, void *data)
|
||||
child_process.out = out;
|
||||
|
||||
if (start_command(&child_process))
|
||||
return error("cannot fork to run external filter %s", params->cmd);
|
||||
return error("cannot fork to run external filter '%s'", params->cmd);
|
||||
|
||||
sigchain_push(SIGPIPE, SIG_IGN);
|
||||
|
||||
@ -434,19 +435,19 @@ static int filter_buffer_or_fd(int in, int out, void *data)
|
||||
if (close(child_process.in))
|
||||
write_err = 1;
|
||||
if (write_err)
|
||||
error("cannot feed the input to external filter %s", params->cmd);
|
||||
error("cannot feed the input to external filter '%s'", params->cmd);
|
||||
|
||||
sigchain_pop(SIGPIPE);
|
||||
|
||||
status = finish_command(&child_process);
|
||||
if (status)
|
||||
error("external filter %s failed %d", params->cmd, status);
|
||||
error("external filter '%s' failed %d", params->cmd, status);
|
||||
|
||||
strbuf_release(&cmd);
|
||||
return (write_err || status);
|
||||
}
|
||||
|
||||
static int apply_filter(const char *path, const char *src, size_t len, int fd,
|
||||
static int apply_single_file_filter(const char *path, const char *src, size_t len, int fd,
|
||||
struct strbuf *dst, const char *cmd)
|
||||
{
|
||||
/*
|
||||
@ -455,17 +456,11 @@ static int apply_filter(const char *path, const char *src, size_t len, int fd,
|
||||
*
|
||||
* (child --> cmd) --> us
|
||||
*/
|
||||
int ret = 1;
|
||||
int err = 0;
|
||||
struct strbuf nbuf = STRBUF_INIT;
|
||||
struct async async;
|
||||
struct filter_params params;
|
||||
|
||||
if (!cmd || !*cmd)
|
||||
return 0;
|
||||
|
||||
if (!dst)
|
||||
return 1;
|
||||
|
||||
memset(&async, 0, sizeof(async));
|
||||
async.proc = filter_buffer_or_fd;
|
||||
async.data = ¶ms;
|
||||
@ -481,23 +476,304 @@ static int apply_filter(const char *path, const char *src, size_t len, int fd,
|
||||
return 0; /* error was already reported */
|
||||
|
||||
if (strbuf_read(&nbuf, async.out, len) < 0) {
|
||||
error("read from external filter %s failed", cmd);
|
||||
ret = 0;
|
||||
err = error("read from external filter '%s' failed", cmd);
|
||||
}
|
||||
if (close(async.out)) {
|
||||
error("read from external filter %s failed", cmd);
|
||||
ret = 0;
|
||||
err = error("read from external filter '%s' failed", cmd);
|
||||
}
|
||||
if (finish_async(&async)) {
|
||||
error("external filter %s failed", cmd);
|
||||
ret = 0;
|
||||
err = error("external filter '%s' failed", cmd);
|
||||
}
|
||||
|
||||
if (ret) {
|
||||
if (!err) {
|
||||
strbuf_swap(dst, &nbuf);
|
||||
}
|
||||
strbuf_release(&nbuf);
|
||||
return ret;
|
||||
return !err;
|
||||
}
|
||||
|
||||
#define CAP_CLEAN (1u<<0)
|
||||
#define CAP_SMUDGE (1u<<1)
|
||||
|
||||
struct cmd2process {
|
||||
struct hashmap_entry ent; /* must be the first member! */
|
||||
unsigned int supported_capabilities;
|
||||
const char *cmd;
|
||||
struct child_process process;
|
||||
};
|
||||
|
||||
static int cmd_process_map_initialized;
|
||||
static struct hashmap cmd_process_map;
|
||||
|
||||
static int cmd2process_cmp(const struct cmd2process *e1,
|
||||
const struct cmd2process *e2,
|
||||
const void *unused)
|
||||
{
|
||||
return strcmp(e1->cmd, e2->cmd);
|
||||
}
|
||||
|
||||
static struct cmd2process *find_multi_file_filter_entry(struct hashmap *hashmap, const char *cmd)
|
||||
{
|
||||
struct cmd2process key;
|
||||
hashmap_entry_init(&key, strhash(cmd));
|
||||
key.cmd = cmd;
|
||||
return hashmap_get(hashmap, &key, NULL);
|
||||
}
|
||||
|
||||
static int packet_write_list(int fd, const char *line, ...)
|
||||
{
|
||||
va_list args;
|
||||
int err;
|
||||
va_start(args, line);
|
||||
for (;;) {
|
||||
if (!line)
|
||||
break;
|
||||
if (strlen(line) > LARGE_PACKET_DATA_MAX)
|
||||
return -1;
|
||||
err = packet_write_fmt_gently(fd, "%s\n", line);
|
||||
if (err)
|
||||
return err;
|
||||
line = va_arg(args, const char*);
|
||||
}
|
||||
va_end(args);
|
||||
return packet_flush_gently(fd);
|
||||
}
|
||||
|
||||
static void read_multi_file_filter_status(int fd, struct strbuf *status)
|
||||
{
|
||||
struct strbuf **pair;
|
||||
char *line;
|
||||
for (;;) {
|
||||
line = packet_read_line(fd, NULL);
|
||||
if (!line)
|
||||
break;
|
||||
pair = strbuf_split_str(line, '=', 2);
|
||||
if (pair[0] && pair[0]->len && pair[1]) {
|
||||
/* the last "status=<foo>" line wins */
|
||||
if (!strcmp(pair[0]->buf, "status=")) {
|
||||
strbuf_reset(status);
|
||||
strbuf_addbuf(status, pair[1]);
|
||||
}
|
||||
}
|
||||
strbuf_list_free(pair);
|
||||
}
|
||||
}
|
||||
|
||||
static void kill_multi_file_filter(struct hashmap *hashmap, struct cmd2process *entry)
|
||||
{
|
||||
if (!entry)
|
||||
return;
|
||||
|
||||
entry->process.clean_on_exit = 0;
|
||||
kill(entry->process.pid, SIGTERM);
|
||||
finish_command(&entry->process);
|
||||
|
||||
hashmap_remove(hashmap, entry, NULL);
|
||||
free(entry);
|
||||
}
|
||||
|
||||
static void stop_multi_file_filter(struct child_process *process)
|
||||
{
|
||||
sigchain_push(SIGPIPE, SIG_IGN);
|
||||
/* Closing the pipe signals the filter to initiate a shutdown. */
|
||||
close(process->in);
|
||||
close(process->out);
|
||||
sigchain_pop(SIGPIPE);
|
||||
/* Finish command will wait until the shutdown is complete. */
|
||||
finish_command(process);
|
||||
}
|
||||
|
||||
static struct cmd2process *start_multi_file_filter(struct hashmap *hashmap, const char *cmd)
|
||||
{
|
||||
int err;
|
||||
struct cmd2process *entry;
|
||||
struct child_process *process;
|
||||
const char *argv[] = { cmd, NULL };
|
||||
struct string_list cap_list = STRING_LIST_INIT_NODUP;
|
||||
char *cap_buf;
|
||||
const char *cap_name;
|
||||
|
||||
entry = xmalloc(sizeof(*entry));
|
||||
entry->cmd = cmd;
|
||||
entry->supported_capabilities = 0;
|
||||
process = &entry->process;
|
||||
|
||||
child_process_init(process);
|
||||
process->argv = argv;
|
||||
process->use_shell = 1;
|
||||
process->in = -1;
|
||||
process->out = -1;
|
||||
process->clean_on_exit = 1;
|
||||
process->clean_on_exit_handler = stop_multi_file_filter;
|
||||
|
||||
if (start_command(process)) {
|
||||
error("cannot fork to run external filter '%s'", cmd);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
hashmap_entry_init(entry, strhash(cmd));
|
||||
|
||||
sigchain_push(SIGPIPE, SIG_IGN);
|
||||
|
||||
err = packet_write_list(process->in, "git-filter-client", "version=2", NULL);
|
||||
if (err)
|
||||
goto done;
|
||||
|
||||
err = strcmp(packet_read_line(process->out, NULL), "git-filter-server");
|
||||
if (err) {
|
||||
error("external filter '%s' does not support filter protocol version 2", cmd);
|
||||
goto done;
|
||||
}
|
||||
err = strcmp(packet_read_line(process->out, NULL), "version=2");
|
||||
if (err)
|
||||
goto done;
|
||||
err = packet_read_line(process->out, NULL) != NULL;
|
||||
if (err)
|
||||
goto done;
|
||||
|
||||
err = packet_write_list(process->in, "capability=clean", "capability=smudge", NULL);
|
||||
|
||||
for (;;) {
|
||||
cap_buf = packet_read_line(process->out, NULL);
|
||||
if (!cap_buf)
|
||||
break;
|
||||
string_list_split_in_place(&cap_list, cap_buf, '=', 1);
|
||||
|
||||
if (cap_list.nr != 2 || strcmp(cap_list.items[0].string, "capability"))
|
||||
continue;
|
||||
|
||||
cap_name = cap_list.items[1].string;
|
||||
if (!strcmp(cap_name, "clean")) {
|
||||
entry->supported_capabilities |= CAP_CLEAN;
|
||||
} else if (!strcmp(cap_name, "smudge")) {
|
||||
entry->supported_capabilities |= CAP_SMUDGE;
|
||||
} else {
|
||||
warning(
|
||||
"external filter '%s' requested unsupported filter capability '%s'",
|
||||
cmd, cap_name
|
||||
);
|
||||
}
|
||||
|
||||
string_list_clear(&cap_list, 0);
|
||||
}
|
||||
|
||||
done:
|
||||
sigchain_pop(SIGPIPE);
|
||||
|
||||
if (err || errno == EPIPE) {
|
||||
error("initialization for external filter '%s' failed", cmd);
|
||||
kill_multi_file_filter(hashmap, entry);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
hashmap_add(hashmap, entry);
|
||||
return entry;
|
||||
}
|
||||
|
||||
static int apply_multi_file_filter(const char *path, const char *src, size_t len,
|
||||
int fd, struct strbuf *dst, const char *cmd,
|
||||
const unsigned int wanted_capability)
|
||||
{
|
||||
int err;
|
||||
struct cmd2process *entry;
|
||||
struct child_process *process;
|
||||
struct strbuf nbuf = STRBUF_INIT;
|
||||
struct strbuf filter_status = STRBUF_INIT;
|
||||
const char *filter_type;
|
||||
|
||||
if (!cmd_process_map_initialized) {
|
||||
cmd_process_map_initialized = 1;
|
||||
hashmap_init(&cmd_process_map, (hashmap_cmp_fn) cmd2process_cmp, 0);
|
||||
entry = NULL;
|
||||
} else {
|
||||
entry = find_multi_file_filter_entry(&cmd_process_map, cmd);
|
||||
}
|
||||
|
||||
fflush(NULL);
|
||||
|
||||
if (!entry) {
|
||||
entry = start_multi_file_filter(&cmd_process_map, cmd);
|
||||
if (!entry)
|
||||
return 0;
|
||||
}
|
||||
process = &entry->process;
|
||||
|
||||
if (!(wanted_capability & entry->supported_capabilities))
|
||||
return 0;
|
||||
|
||||
if (CAP_CLEAN & wanted_capability)
|
||||
filter_type = "clean";
|
||||
else if (CAP_SMUDGE & wanted_capability)
|
||||
filter_type = "smudge";
|
||||
else
|
||||
die("unexpected filter type");
|
||||
|
||||
sigchain_push(SIGPIPE, SIG_IGN);
|
||||
|
||||
assert(strlen(filter_type) < LARGE_PACKET_DATA_MAX - strlen("command=\n"));
|
||||
err = packet_write_fmt_gently(process->in, "command=%s\n", filter_type);
|
||||
if (err)
|
||||
goto done;
|
||||
|
||||
err = strlen(path) > LARGE_PACKET_DATA_MAX - strlen("pathname=\n");
|
||||
if (err) {
|
||||
error("path name too long for external filter");
|
||||
goto done;
|
||||
}
|
||||
|
||||
err = packet_write_fmt_gently(process->in, "pathname=%s\n", path);
|
||||
if (err)
|
||||
goto done;
|
||||
|
||||
err = packet_flush_gently(process->in);
|
||||
if (err)
|
||||
goto done;
|
||||
|
||||
if (fd >= 0)
|
||||
err = write_packetized_from_fd(fd, process->in);
|
||||
else
|
||||
err = write_packetized_from_buf(src, len, process->in);
|
||||
if (err)
|
||||
goto done;
|
||||
|
||||
read_multi_file_filter_status(process->out, &filter_status);
|
||||
err = strcmp(filter_status.buf, "success");
|
||||
if (err)
|
||||
goto done;
|
||||
|
||||
err = read_packetized_to_strbuf(process->out, &nbuf) < 0;
|
||||
if (err)
|
||||
goto done;
|
||||
|
||||
read_multi_file_filter_status(process->out, &filter_status);
|
||||
err = strcmp(filter_status.buf, "success");
|
||||
|
||||
done:
|
||||
sigchain_pop(SIGPIPE);
|
||||
|
||||
if (err || errno == EPIPE) {
|
||||
if (!strcmp(filter_status.buf, "error")) {
|
||||
/* The filter signaled a problem with the file. */
|
||||
} else if (!strcmp(filter_status.buf, "abort")) {
|
||||
/*
|
||||
* The filter signaled a permanent problem. Don't try to filter
|
||||
* files with the same command for the lifetime of the current
|
||||
* Git process.
|
||||
*/
|
||||
entry->supported_capabilities &= ~wanted_capability;
|
||||
} else {
|
||||
/*
|
||||
* Something went wrong with the protocol filter.
|
||||
* Force shutdown and restart if another blob requires filtering.
|
||||
*/
|
||||
error("external filter '%s' failed", cmd);
|
||||
kill_multi_file_filter(&cmd_process_map, entry);
|
||||
}
|
||||
} else {
|
||||
strbuf_swap(dst, &nbuf);
|
||||
}
|
||||
strbuf_release(&nbuf);
|
||||
return !err;
|
||||
}
|
||||
|
||||
static struct convert_driver {
|
||||
@ -505,9 +781,35 @@ static struct convert_driver {
|
||||
struct convert_driver *next;
|
||||
const char *smudge;
|
||||
const char *clean;
|
||||
const char *process;
|
||||
int required;
|
||||
} *user_convert, **user_convert_tail;
|
||||
|
||||
static int apply_filter(const char *path, const char *src, size_t len,
|
||||
int fd, struct strbuf *dst, struct convert_driver *drv,
|
||||
const unsigned int wanted_capability)
|
||||
{
|
||||
const char *cmd = NULL;
|
||||
|
||||
if (!drv)
|
||||
return 0;
|
||||
|
||||
if (!dst)
|
||||
return 1;
|
||||
|
||||
if ((CAP_CLEAN & wanted_capability) && !drv->process && drv->clean)
|
||||
cmd = drv->clean;
|
||||
else if ((CAP_SMUDGE & wanted_capability) && !drv->process && drv->smudge)
|
||||
cmd = drv->smudge;
|
||||
|
||||
if (cmd && *cmd)
|
||||
return apply_single_file_filter(path, src, len, fd, dst, cmd);
|
||||
else if (drv->process && *drv->process)
|
||||
return apply_multi_file_filter(path, src, len, fd, dst, drv->process, wanted_capability);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int read_convert_config(const char *var, const char *value, void *cb)
|
||||
{
|
||||
const char *key, *name;
|
||||
@ -545,6 +847,9 @@ static int read_convert_config(const char *var, const char *value, void *cb)
|
||||
if (!strcmp("clean", key))
|
||||
return git_config_string(&drv->clean, var, value);
|
||||
|
||||
if (!strcmp("process", key))
|
||||
return git_config_string(&drv->process, var, value);
|
||||
|
||||
if (!strcmp("required", key)) {
|
||||
drv->required = git_config_bool(var, value);
|
||||
return 0;
|
||||
@ -846,7 +1151,7 @@ int would_convert_to_git_filter_fd(const char *path)
|
||||
if (!ca.drv->required)
|
||||
return 0;
|
||||
|
||||
return apply_filter(path, NULL, 0, -1, NULL, ca.drv->clean);
|
||||
return apply_filter(path, NULL, 0, -1, NULL, ca.drv, CAP_CLEAN);
|
||||
}
|
||||
|
||||
const char *get_convert_attr_ascii(const char *path)
|
||||
@ -879,18 +1184,12 @@ int convert_to_git(const char *path, const char *src, size_t len,
|
||||
struct strbuf *dst, enum safe_crlf checksafe)
|
||||
{
|
||||
int ret = 0;
|
||||
const char *filter = NULL;
|
||||
int required = 0;
|
||||
struct conv_attrs ca;
|
||||
|
||||
convert_attrs(&ca, path);
|
||||
if (ca.drv) {
|
||||
filter = ca.drv->clean;
|
||||
required = ca.drv->required;
|
||||
}
|
||||
|
||||
ret |= apply_filter(path, src, len, -1, dst, filter);
|
||||
if (!ret && required)
|
||||
ret |= apply_filter(path, src, len, -1, dst, ca.drv, CAP_CLEAN);
|
||||
if (!ret && ca.drv && ca.drv->required)
|
||||
die("%s: clean filter '%s' failed", path, ca.drv->name);
|
||||
|
||||
if (ret && dst) {
|
||||
@ -912,9 +1211,9 @@ void convert_to_git_filter_fd(const char *path, int fd, struct strbuf *dst,
|
||||
convert_attrs(&ca, path);
|
||||
|
||||
assert(ca.drv);
|
||||
assert(ca.drv->clean);
|
||||
assert(ca.drv->clean || ca.drv->process);
|
||||
|
||||
if (!apply_filter(path, NULL, 0, fd, dst, ca.drv->clean))
|
||||
if (!apply_filter(path, NULL, 0, fd, dst, ca.drv, CAP_CLEAN))
|
||||
die("%s: clean filter '%s' failed", path, ca.drv->name);
|
||||
|
||||
crlf_to_git(path, dst->buf, dst->len, dst, ca.crlf_action, checksafe);
|
||||
@ -926,15 +1225,9 @@ static int convert_to_working_tree_internal(const char *path, const char *src,
|
||||
int normalizing)
|
||||
{
|
||||
int ret = 0, ret_filter = 0;
|
||||
const char *filter = NULL;
|
||||
int required = 0;
|
||||
struct conv_attrs ca;
|
||||
|
||||
convert_attrs(&ca, path);
|
||||
if (ca.drv) {
|
||||
filter = ca.drv->smudge;
|
||||
required = ca.drv->required;
|
||||
}
|
||||
|
||||
ret |= ident_to_worktree(path, src, len, dst, ca.ident);
|
||||
if (ret) {
|
||||
@ -943,9 +1236,10 @@ static int convert_to_working_tree_internal(const char *path, const char *src,
|
||||
}
|
||||
/*
|
||||
* CRLF conversion can be skipped if normalizing, unless there
|
||||
* is a smudge filter. The filter might expect CRLFs.
|
||||
* is a smudge or process filter (even if the process filter doesn't
|
||||
* support smudge). The filters might expect CRLFs.
|
||||
*/
|
||||
if (filter || !normalizing) {
|
||||
if ((ca.drv && (ca.drv->smudge || ca.drv->process)) || !normalizing) {
|
||||
ret |= crlf_to_worktree(path, src, len, dst, ca.crlf_action);
|
||||
if (ret) {
|
||||
src = dst->buf;
|
||||
@ -953,8 +1247,8 @@ static int convert_to_working_tree_internal(const char *path, const char *src,
|
||||
}
|
||||
}
|
||||
|
||||
ret_filter = apply_filter(path, src, len, -1, dst, filter);
|
||||
if (!ret_filter && required)
|
||||
ret_filter = apply_filter(path, src, len, -1, dst, ca.drv, CAP_SMUDGE);
|
||||
if (!ret_filter && ca.drv && ca.drv->required)
|
||||
die("%s: smudge filter %s failed", path, ca.drv->name);
|
||||
|
||||
return ret | ret_filter;
|
||||
@ -1406,7 +1700,7 @@ struct stream_filter *get_stream_filter(const char *path, const unsigned char *s
|
||||
struct stream_filter *filter = NULL;
|
||||
|
||||
convert_attrs(&ca, path);
|
||||
if (ca.drv && (ca.drv->smudge || ca.drv->clean))
|
||||
if (ca.drv && (ca.drv->process || ca.drv->smudge || ca.drv->clean))
|
||||
return NULL;
|
||||
|
||||
if (ca.crlf_action == CRLF_AUTO || ca.crlf_action == CRLF_AUTO_CRLF)
|
||||
|
Reference in New Issue
Block a user