Merge branch 'en/ort-perf-batch-9'
The ort merge backend has been optimized by skipping irrelevant renames. * en/ort-perf-batch-9: diffcore-rename: avoid doing basename comparisons for irrelevant sources merge-ort: skip rename detection entirely if possible merge-ort: use relevant_sources to filter possible rename sources merge-ort: precompute whether directory rename detection is needed merge-ort: introduce wrappers for alternate tree traversal merge-ort: add data structures for an alternate tree traversal merge-ort: precompute subset of sources for which we need rename detection diffcore-rename: enable filtering possible rename sources
This commit is contained in:
234
merge-ort.c
234
merge-ort.c
@ -51,6 +51,12 @@ enum merge_side {
|
||||
MERGE_SIDE2 = 2
|
||||
};
|
||||
|
||||
struct traversal_callback_data {
|
||||
unsigned long mask;
|
||||
unsigned long dirmask;
|
||||
struct name_entry names[3];
|
||||
};
|
||||
|
||||
struct rename_info {
|
||||
/*
|
||||
* All variables that are arrays of size 3 correspond to data tracked
|
||||
@ -88,6 +94,44 @@ struct rename_info {
|
||||
*/
|
||||
struct strmap dir_renames[3];
|
||||
|
||||
/*
|
||||
* relevant_sources: deleted paths for which we need rename detection
|
||||
*
|
||||
* relevant_sources is a set of deleted paths on each side of
|
||||
* history for which we need rename detection. If a path is deleted
|
||||
* on one side of history, we need to detect if it is part of a
|
||||
* rename if either
|
||||
* * we need to detect renames for an ancestor directory
|
||||
* * the file is modified/deleted on the other side of history
|
||||
* If neither of those are true, we can skip rename detection for
|
||||
* that path.
|
||||
*/
|
||||
struct strset relevant_sources[3];
|
||||
|
||||
/*
|
||||
* dir_rename_mask:
|
||||
* 0: optimization removing unmodified potential rename source okay
|
||||
* 2 or 4: optimization okay, but must check for files added to dir
|
||||
* 7: optimization forbidden; need rename source in case of dir rename
|
||||
*/
|
||||
unsigned dir_rename_mask:3;
|
||||
|
||||
/*
|
||||
* callback_data_*: supporting data structures for alternate traversal
|
||||
*
|
||||
* We sometimes need to be able to traverse through all the files
|
||||
* in a given tree before all immediate subdirectories within that
|
||||
* tree. Since traverse_trees() doesn't do that naturally, we have
|
||||
* a traverse_trees_wrapper() that stores any immediate
|
||||
* subdirectories while traversing files, then traverses the
|
||||
* immediate subdirectories later. These callback_data* variables
|
||||
* store the information for the subdirectories so that we can do
|
||||
* that traversal order.
|
||||
*/
|
||||
struct traversal_callback_data *callback_data;
|
||||
int callback_data_nr, callback_data_alloc;
|
||||
char *callback_data_traverse_path;
|
||||
|
||||
/*
|
||||
* needed_limit: value needed for inexact rename detection to run
|
||||
*
|
||||
@ -358,6 +402,8 @@ static void clear_or_reinit_internal_opts(struct merge_options_internal *opti,
|
||||
strmap_clear(&renames->dir_rename_count[i], 1);
|
||||
|
||||
strmap_func(&renames->dir_renames[i], 0);
|
||||
|
||||
strset_func(&renames->relevant_sources[i]);
|
||||
}
|
||||
|
||||
if (!reinitialize) {
|
||||
@ -380,6 +426,12 @@ static void clear_or_reinit_internal_opts(struct merge_options_internal *opti,
|
||||
}
|
||||
strmap_clear(&opti->output, 0);
|
||||
}
|
||||
|
||||
renames->dir_rename_mask = 0;
|
||||
|
||||
/* Clean out callback_data as well. */
|
||||
FREE_AND_NULL(renames->callback_data);
|
||||
renames->callback_data_nr = renames->callback_data_alloc = 0;
|
||||
}
|
||||
|
||||
static int err(struct merge_options *opt, const char *err, ...)
|
||||
@ -470,6 +522,82 @@ static char *unique_path(struct strmap *existing_paths,
|
||||
|
||||
/*** Function Grouping: functions related to collect_merge_info() ***/
|
||||
|
||||
static int traverse_trees_wrapper_callback(int n,
|
||||
unsigned long mask,
|
||||
unsigned long dirmask,
|
||||
struct name_entry *names,
|
||||
struct traverse_info *info)
|
||||
{
|
||||
struct merge_options *opt = info->data;
|
||||
struct rename_info *renames = &opt->priv->renames;
|
||||
unsigned filemask = mask & ~dirmask;
|
||||
|
||||
assert(n==3);
|
||||
|
||||
if (!renames->callback_data_traverse_path)
|
||||
renames->callback_data_traverse_path = xstrdup(info->traverse_path);
|
||||
|
||||
if (filemask && filemask == renames->dir_rename_mask)
|
||||
renames->dir_rename_mask = 0x07;
|
||||
|
||||
ALLOC_GROW(renames->callback_data, renames->callback_data_nr + 1,
|
||||
renames->callback_data_alloc);
|
||||
renames->callback_data[renames->callback_data_nr].mask = mask;
|
||||
renames->callback_data[renames->callback_data_nr].dirmask = dirmask;
|
||||
COPY_ARRAY(renames->callback_data[renames->callback_data_nr].names,
|
||||
names, 3);
|
||||
renames->callback_data_nr++;
|
||||
|
||||
return mask;
|
||||
}
|
||||
|
||||
/*
|
||||
* Much like traverse_trees(), BUT:
|
||||
* - read all the tree entries FIRST, saving them
|
||||
* - note that the above step provides an opportunity to compute necessary
|
||||
* additional details before the "real" traversal
|
||||
* - loop through the saved entries and call the original callback on them
|
||||
*/
|
||||
static int traverse_trees_wrapper(struct index_state *istate,
|
||||
int n,
|
||||
struct tree_desc *t,
|
||||
struct traverse_info *info)
|
||||
{
|
||||
int ret, i, old_offset;
|
||||
traverse_callback_t old_fn;
|
||||
char *old_callback_data_traverse_path;
|
||||
struct merge_options *opt = info->data;
|
||||
struct rename_info *renames = &opt->priv->renames;
|
||||
|
||||
assert(renames->dir_rename_mask == 2 || renames->dir_rename_mask == 4);
|
||||
|
||||
old_callback_data_traverse_path = renames->callback_data_traverse_path;
|
||||
old_fn = info->fn;
|
||||
old_offset = renames->callback_data_nr;
|
||||
|
||||
renames->callback_data_traverse_path = NULL;
|
||||
info->fn = traverse_trees_wrapper_callback;
|
||||
ret = traverse_trees(istate, n, t, info);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
info->traverse_path = renames->callback_data_traverse_path;
|
||||
info->fn = old_fn;
|
||||
for (i = old_offset; i < renames->callback_data_nr; ++i) {
|
||||
info->fn(n,
|
||||
renames->callback_data[i].mask,
|
||||
renames->callback_data[i].dirmask,
|
||||
renames->callback_data[i].names,
|
||||
info);
|
||||
}
|
||||
|
||||
renames->callback_data_nr = old_offset;
|
||||
free(renames->callback_data_traverse_path);
|
||||
renames->callback_data_traverse_path = old_callback_data_traverse_path;
|
||||
info->traverse_path = NULL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void setup_path_info(struct merge_options *opt,
|
||||
struct string_list_item *result,
|
||||
const char *current_dir_name,
|
||||
@ -533,12 +661,22 @@ static void add_pair(struct merge_options *opt,
|
||||
struct name_entry *names,
|
||||
const char *pathname,
|
||||
unsigned side,
|
||||
unsigned is_add /* if false, is_delete */)
|
||||
unsigned is_add /* if false, is_delete */,
|
||||
unsigned match_mask,
|
||||
unsigned dir_rename_mask)
|
||||
{
|
||||
struct diff_filespec *one, *two;
|
||||
struct rename_info *renames = &opt->priv->renames;
|
||||
int names_idx = is_add ? side : 0;
|
||||
|
||||
if (!is_add) {
|
||||
unsigned content_relevant = (match_mask == 0);
|
||||
unsigned location_relevant = (dir_rename_mask == 0x07);
|
||||
|
||||
if (content_relevant || location_relevant)
|
||||
strset_add(&renames->relevant_sources[side], pathname);
|
||||
}
|
||||
|
||||
one = alloc_filespec(pathname);
|
||||
two = alloc_filespec(pathname);
|
||||
fill_filespec(is_add ? two : one,
|
||||
@ -557,6 +695,36 @@ static void collect_rename_info(struct merge_options *opt,
|
||||
struct rename_info *renames = &opt->priv->renames;
|
||||
unsigned side;
|
||||
|
||||
/*
|
||||
* Update dir_rename_mask (determines ignore-rename-source validity)
|
||||
*
|
||||
* dir_rename_mask helps us keep track of when directory rename
|
||||
* detection may be relevant. Basically, whenver a directory is
|
||||
* removed on one side of history, and a file is added to that
|
||||
* directory on the other side of history, directory rename
|
||||
* detection is relevant (meaning we have to detect renames for all
|
||||
* files within that directory to deduce where the directory
|
||||
* moved). Also, whenever a directory needs directory rename
|
||||
* detection, due to the "majority rules" choice for where to move
|
||||
* it (see t6423 testcase 1f), we also need to detect renames for
|
||||
* all files within subdirectories of that directory as well.
|
||||
*
|
||||
* Here we haven't looked at files within the directory yet, we are
|
||||
* just looking at the directory itself. So, if we aren't yet in
|
||||
* a case where a parent directory needed directory rename detection
|
||||
* (i.e. dir_rename_mask != 0x07), and if the directory was removed
|
||||
* on one side of history, record the mask of the other side of
|
||||
* history in dir_rename_mask.
|
||||
*/
|
||||
if (renames->dir_rename_mask != 0x07 &&
|
||||
(dirmask == 3 || dirmask == 5)) {
|
||||
/* simple sanity check */
|
||||
assert(renames->dir_rename_mask == 0 ||
|
||||
renames->dir_rename_mask == (dirmask & ~1));
|
||||
/* update dir_rename_mask; have it record mask of new side */
|
||||
renames->dir_rename_mask = (dirmask & ~1);
|
||||
}
|
||||
|
||||
/* Update dirs_removed, as needed */
|
||||
if (dirmask == 1 || dirmask == 3 || dirmask == 5) {
|
||||
/* absent_mask = 0x07 - dirmask; sides = absent_mask/2 */
|
||||
@ -575,11 +743,15 @@ static void collect_rename_info(struct merge_options *opt,
|
||||
|
||||
/* Check for deletion on side */
|
||||
if ((filemask & 1) && !(filemask & side_mask))
|
||||
add_pair(opt, names, fullname, side, 0 /* delete */);
|
||||
add_pair(opt, names, fullname, side, 0 /* delete */,
|
||||
match_mask & filemask,
|
||||
renames->dir_rename_mask);
|
||||
|
||||
/* Check for addition on side */
|
||||
if (!(filemask & 1) && (filemask & side_mask))
|
||||
add_pair(opt, names, fullname, side, 1 /* add */);
|
||||
add_pair(opt, names, fullname, side, 1 /* add */,
|
||||
match_mask & filemask,
|
||||
renames->dir_rename_mask);
|
||||
}
|
||||
}
|
||||
|
||||
@ -597,12 +769,14 @@ static int collect_merge_info_callback(int n,
|
||||
*/
|
||||
struct merge_options *opt = info->data;
|
||||
struct merge_options_internal *opti = opt->priv;
|
||||
struct rename_info *renames = &opt->priv->renames;
|
||||
struct string_list_item pi; /* Path Info */
|
||||
struct conflict_info *ci; /* typed alias to pi.util (which is void*) */
|
||||
struct name_entry *p;
|
||||
size_t len;
|
||||
char *fullpath;
|
||||
const char *dirname = opti->current_dir_name;
|
||||
unsigned prev_dir_rename_mask = renames->dir_rename_mask;
|
||||
unsigned filemask = mask & ~dirmask;
|
||||
unsigned match_mask = 0; /* will be updated below */
|
||||
unsigned mbase_null = !(mask & 1);
|
||||
@ -743,8 +917,13 @@ static int collect_merge_info_callback(int n,
|
||||
|
||||
original_dir_name = opti->current_dir_name;
|
||||
opti->current_dir_name = pi.string;
|
||||
ret = traverse_trees(NULL, 3, t, &newinfo);
|
||||
if (renames->dir_rename_mask == 0 ||
|
||||
renames->dir_rename_mask == 0x07)
|
||||
ret = traverse_trees(NULL, 3, t, &newinfo);
|
||||
else
|
||||
ret = traverse_trees_wrapper(NULL, 3, t, &newinfo);
|
||||
opti->current_dir_name = original_dir_name;
|
||||
renames->dir_rename_mask = prev_dir_rename_mask;
|
||||
|
||||
for (i = MERGE_BASE; i <= MERGE_SIDE2; i++)
|
||||
free(buf[i]);
|
||||
@ -1977,6 +2156,19 @@ static int process_renames(struct merge_options *opt,
|
||||
return clean_merge;
|
||||
}
|
||||
|
||||
static inline int possible_side_renames(struct rename_info *renames,
|
||||
unsigned side_index)
|
||||
{
|
||||
return renames->pairs[side_index].nr > 0 &&
|
||||
!strset_empty(&renames->relevant_sources[side_index]);
|
||||
}
|
||||
|
||||
static inline int possible_renames(struct rename_info *renames)
|
||||
{
|
||||
return possible_side_renames(renames, 1) ||
|
||||
possible_side_renames(renames, 2);
|
||||
}
|
||||
|
||||
static void resolve_diffpair_statuses(struct diff_queue_struct *q)
|
||||
{
|
||||
/*
|
||||
@ -2013,6 +2205,16 @@ static void detect_regular_renames(struct merge_options *opt,
|
||||
struct diff_options diff_opts;
|
||||
struct rename_info *renames = &opt->priv->renames;
|
||||
|
||||
if (!possible_side_renames(renames, side_index)) {
|
||||
/*
|
||||
* No rename detection needed for this side, but we still need
|
||||
* to make sure 'adds' are marked correctly in case the other
|
||||
* side had directory renames.
|
||||
*/
|
||||
resolve_diffpair_statuses(&renames->pairs[side_index]);
|
||||
return;
|
||||
}
|
||||
|
||||
repo_diff_setup(opt->repo, &diff_opts);
|
||||
diff_opts.flags.recursive = 1;
|
||||
diff_opts.flags.rename_empty = 0;
|
||||
@ -2028,6 +2230,7 @@ static void detect_regular_renames(struct merge_options *opt,
|
||||
diff_queued_diff = renames->pairs[side_index];
|
||||
trace2_region_enter("diff", "diffcore_rename", opt->repo);
|
||||
diffcore_rename_extended(&diff_opts,
|
||||
&renames->relevant_sources[side_index],
|
||||
&renames->dirs_removed[side_index],
|
||||
&renames->dir_rename_count[side_index]);
|
||||
trace2_region_leave("diff", "diffcore_rename", opt->repo);
|
||||
@ -2129,6 +2332,8 @@ static int detect_and_process_renames(struct merge_options *opt,
|
||||
int need_dir_renames, s, clean = 1;
|
||||
|
||||
memset(&combined, 0, sizeof(combined));
|
||||
if (!possible_renames(renames))
|
||||
goto cleanup;
|
||||
|
||||
trace2_region_enter("merge", "regular renames", opt->repo);
|
||||
detect_regular_renames(opt, MERGE_SIDE1);
|
||||
@ -2163,6 +2368,25 @@ static int detect_and_process_renames(struct merge_options *opt,
|
||||
clean &= process_renames(opt, &combined);
|
||||
trace2_region_leave("merge", "process renames", opt->repo);
|
||||
|
||||
goto simple_cleanup; /* collect_renames() handles some of cleanup */
|
||||
|
||||
cleanup:
|
||||
/*
|
||||
* Free now unneeded filepairs, which would have been handled
|
||||
* in collect_renames() normally but we skipped that code.
|
||||
*/
|
||||
for (s = MERGE_SIDE1; s <= MERGE_SIDE2; s++) {
|
||||
struct diff_queue_struct *side_pairs;
|
||||
int i;
|
||||
|
||||
side_pairs = &renames->pairs[s];
|
||||
for (i = 0; i < side_pairs->nr; ++i) {
|
||||
struct diff_filepair *p = side_pairs->queue[i];
|
||||
diff_free_filepair(p);
|
||||
}
|
||||
}
|
||||
|
||||
simple_cleanup:
|
||||
/* Free memory for renames->pairs[] and combined */
|
||||
for (s = MERGE_SIDE1; s <= MERGE_SIDE2; s++) {
|
||||
free(renames->pairs[s].queue);
|
||||
@ -3226,6 +3450,8 @@ static void merge_start(struct merge_options *opt, struct merge_result *result)
|
||||
NULL, 1);
|
||||
strmap_init_with_options(&renames->dir_renames[i],
|
||||
NULL, 0);
|
||||
strset_init_with_options(&renames->relevant_sources[i],
|
||||
NULL, 0);
|
||||
}
|
||||
|
||||
/*
|
||||
|
Reference in New Issue
Block a user