From 67845745c1ab7dcce72116fb58f99630d14e12cc Mon Sep 17 00:00:00 2001 From: Elijah Newren Date: Thu, 3 Dec 2020 15:59:40 +0000 Subject: [PATCH 1/7] merge-ort: add a few includes Include blob.h for definition of blob_type, and commit-reach.h for declarations of get_merge_bases() and in_merge_bases(). While none of these are used yet, we want to avoid cross-dependencies in the next three series of patches for merge-ort and merge them at the end; adding these "#include"s now avoids textual conflicts. Signed-off-by: Elijah Newren Signed-off-by: Junio C Hamano --- merge-ort.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/merge-ort.c b/merge-ort.c index 51b049358e..ba3ce8d5d5 100644 --- a/merge-ort.c +++ b/merge-ort.c @@ -17,7 +17,9 @@ #include "cache.h" #include "merge-ort.h" +#include "blob.h" #include "cache-tree.h" +#include "commit-reach.h" #include "diff.h" #include "diffcore.h" #include "dir.h" From 101bc5bc2d73484d288a43fdcf1c00bc04b080e4 Mon Sep 17 00:00:00 2001 From: Elijah Newren Date: Thu, 3 Dec 2020 15:59:41 +0000 Subject: [PATCH 2/7] merge-ort: add a clear_internal_opts helper Move most of merge_finalize() into a new helper function, clear_internal_opts(). This is a step to facilitate recursive merges, as well as some future optimizations. Signed-off-by: Elijah Newren Signed-off-by: Junio C Hamano --- merge-ort.c | 40 ++++++++++++++++++++++++---------------- 1 file changed, 24 insertions(+), 16 deletions(-) diff --git a/merge-ort.c b/merge-ort.c index ba3ce8d5d5..ced6be1f9f 100644 --- a/merge-ort.c +++ b/merge-ort.c @@ -221,6 +221,29 @@ static void free_strmap_strings(struct strmap *map) } } +static void clear_internal_opts(struct merge_options_internal *opti, + int reinitialize) +{ + assert(!reinitialize); + + /* + * We marked opti->paths with strdup_strings = 0, so that we + * wouldn't have to make another copy of the fullpath created by + * make_traverse_path from setup_path_info(). But, now that we've + * used it and have no other references to these strings, it is time + * to deallocate them. + */ + free_strmap_strings(&opti->paths); + strmap_clear(&opti->paths, 1); + + /* + * All keys and values in opti->conflicted are a subset of those in + * opti->paths. We don't want to deallocate anything twice, so we + * don't free the keys and we pass 0 for free_values. + */ + strmap_clear(&opti->conflicted, 0); +} + static int err(struct merge_options *opt, const char *err, ...) { va_list params; @@ -1169,22 +1192,7 @@ void merge_finalize(struct merge_options *opt, assert(opt->priv == NULL); - /* - * We marked opti->paths with strdup_strings = 0, so that we - * wouldn't have to make another copy of the fullpath created by - * make_traverse_path from setup_path_info(). But, now that we've - * used it and have no other references to these strings, it is time - * to deallocate them. - */ - free_strmap_strings(&opti->paths); - strmap_clear(&opti->paths, 1); - - /* - * All keys and values in opti->conflicted are a subset of those in - * opti->paths. We don't want to deallocate anything twice, so we - * don't free the keys and we pass 0 for free_values. - */ - strmap_clear(&opti->conflicted, 0); + clear_internal_opts(opti, 0); FREE_AND_NULL(opti); } From 1c7873cdf4a7e84755c54e3f9ef10599041565d0 Mon Sep 17 00:00:00 2001 From: Elijah Newren Date: Thu, 3 Dec 2020 15:59:42 +0000 Subject: [PATCH 3/7] merge-ort: add a path_conflict field to merge_options_internal This field is not yet used, but will be used by both the rename handling code, and the conflict type handling code in process_entry(). Signed-off-by: Elijah Newren Signed-off-by: Junio C Hamano --- merge-ort.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/merge-ort.c b/merge-ort.c index ced6be1f9f..d88307489b 100644 --- a/merge-ort.c +++ b/merge-ort.c @@ -165,6 +165,13 @@ struct conflict_info { /* Whether this path is/was involved in a directory/file conflict */ unsigned df_conflict:1; + /* + * Whether this path is/was involved in a non-content conflict other + * than a directory/file conflict (e.g. rename/rename, rename/delete, + * file location based on possible directory rename). + */ + unsigned path_conflict:1; + /* * For filemask and dirmask, the ith bit corresponds to whether the * ith entry is a file (filemask) or a directory (dirmask). Thus, From 43c1dccb91c0d56b0b00f1b452a1a7204c4242da Mon Sep 17 00:00:00 2001 From: Elijah Newren Date: Thu, 3 Dec 2020 15:59:43 +0000 Subject: [PATCH 4/7] merge-ort: add a paths_to_free field to merge_options_internal This field will be used in future patches to allow removal of paths from opt->priv->paths. Signed-off-by: Elijah Newren Signed-off-by: Junio C Hamano --- merge-ort.c | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/merge-ort.c b/merge-ort.c index d88307489b..ae855a4ae5 100644 --- a/merge-ort.c +++ b/merge-ort.c @@ -58,6 +58,8 @@ struct merge_options_internal { * * these keys serve to intern all the path strings, which allows * us to do pointer comparison on directory names instead of * strcmp; we just have to be careful to use the interned strings. + * (Technically paths_to_free may track some strings that were + * removed from froms paths.) * * The values of paths: * * either a pointer to a merged_info, or a conflict_info struct @@ -92,6 +94,16 @@ struct merge_options_internal { */ struct strmap conflicted; + /* + * paths_to_free: additional list of strings to free + * + * If keys are removed from "paths", they are added to paths_to_free + * to ensure they are later freed. We avoid free'ing immediately since + * other places (e.g. conflict_info.pathnames[]) may still be + * referencing these paths. + */ + struct string_list paths_to_free; + /* * current_dir_name: temporary var used in collect_merge_info_callback() * @@ -249,6 +261,17 @@ static void clear_internal_opts(struct merge_options_internal *opti, * don't free the keys and we pass 0 for free_values. */ strmap_clear(&opti->conflicted, 0); + + /* + * opti->paths_to_free is similar to opti->paths; we created it with + * strdup_strings = 0 to avoid making _another_ copy of the fullpath + * but now that we've used it and have no other references to these + * strings, it is time to deallocate them. We do so by temporarily + * setting strdup_strings to 1. + */ + opti->paths_to_free.strdup_strings = 1; + string_list_clear(&opti->paths_to_free, 0); + opti->paths_to_free.strdup_strings = 0; } static int err(struct merge_options *opt, const char *err, ...) @@ -1243,13 +1266,14 @@ static void merge_start(struct merge_options *opt, struct merge_result *result) * Although we initialize opt->priv->paths with strdup_strings=0, * that's just to avoid making yet another copy of an allocated * string. Putting the entry into paths means we are taking - * ownership, so we will later free it. + * ownership, so we will later free it. paths_to_free is similar. * * In contrast, conflicted just has a subset of keys from paths, so * we don't want to free those (it'd be a duplicate free). */ strmap_init_with_options(&opt->priv->paths, NULL, 0); strmap_init_with_options(&opt->priv->conflicted, NULL, 0); + string_list_init(&opt->priv->paths_to_free, 0); } /* From 04af1879b9313a83aea46791bad8963e14e7651e Mon Sep 17 00:00:00 2001 From: Elijah Newren Date: Thu, 3 Dec 2020 15:59:44 +0000 Subject: [PATCH 5/7] merge-ort: add function grouping comments Commit b658536f59 ("merge-ort: add some high-level algorithm structure", 2020-10-27) added high-level structure of the ort merge algorithm. As we have added more and more functions, that high-level structure has been slightly obscured. Since functions are still grouped according to this high-level structure, add comments denoting sections where all the functions are specifically tied to a piece of the high-level structure. This function groupings include a few sub-divisions of the original high-level structure, including some sub-divisions that are yet to be submitted. Each has (or will have) several functions all serving as helpers to one or two main functions for each section. As an added bonus, the comments will serve to provide a small textual separation between nearby sections and allow the next three patch series to be submitted independently and merge cleanly. Signed-off-by: Elijah Newren Signed-off-by: Junio C Hamano --- merge-ort.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/merge-ort.c b/merge-ort.c index ae855a4ae5..8a144fbe29 100644 --- a/merge-ort.c +++ b/merge-ort.c @@ -210,6 +210,8 @@ struct conflict_info { unsigned match_mask:3; }; +/*** Function Grouping: various utility functions ***/ + /* * For the next three macros, see warning for conflict_info.merged. * @@ -290,6 +292,8 @@ static int err(struct merge_options *opt, const char *err, ...) return -1; } +/*** Function Grouping: functions related to collect_merge_info() ***/ + static void setup_path_info(struct merge_options *opt, struct string_list_item *result, const char *current_dir_name, @@ -544,6 +548,15 @@ static int collect_merge_info(struct merge_options *opt, return ret; } +/*** Function Grouping: functions related to threeway content merges ***/ + +/*** Function Grouping: functions related to detect_and_process_renames(), *** + *** which are split into directory and regular rename detection sections. ***/ + +/*** Function Grouping: functions related to directory rename detection ***/ + +/*** Function Grouping: functions related to regular rename detection ***/ + static int detect_and_process_renames(struct merge_options *opt, struct tree *merge_base, struct tree *side1, @@ -561,6 +574,8 @@ static int detect_and_process_renames(struct merge_options *opt, return clean; } +/*** Function Grouping: functions related to process_entries() ***/ + static int string_list_df_name_compare(const char *one, const char *two) { int onelen = strlen(one); @@ -1039,6 +1054,8 @@ static void process_entries(struct merge_options *opt, string_list_clear(&dir_metadata.offsets, 0); } +/*** Function Grouping: functions related to merge_switch_to_result() ***/ + static int checkout(struct merge_options *opt, struct tree *prev, struct tree *next) @@ -1226,6 +1243,8 @@ void merge_finalize(struct merge_options *opt, FREE_AND_NULL(opti); } +/*** Function Grouping: helper functions for merge_incore_*() ***/ + static void merge_start(struct merge_options *opt, struct merge_result *result) { /* Sanity checks on opt */ @@ -1276,6 +1295,8 @@ static void merge_start(struct merge_options *opt, struct merge_result *result) string_list_init(&opt->priv->paths_to_free, 0); } +/*** Function Grouping: merge_incore_*() and their internal variants ***/ + /* * Originally from merge_trees_internal(); heavily adapted, though. */ From e2e9dc030cb37619256ec995b05412623043e74c Mon Sep 17 00:00:00 2001 From: Elijah Newren Date: Thu, 3 Dec 2020 15:59:45 +0000 Subject: [PATCH 6/7] merge-ort: add die-not-implemented stub handle_content_merge() function This simplistic and weird-looking patch is here to facilitate future patch submissions. Adding this stub allows rename detection code to reference it in one patch series, while a separate patch series can define the implementation, and then both series can merge cleanly and work nicely together at that point. Signed-off-by: Elijah Newren Signed-off-by: Junio C Hamano --- merge-ort.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/merge-ort.c b/merge-ort.c index 8a144fbe29..e6b763de14 100644 --- a/merge-ort.c +++ b/merge-ort.c @@ -550,6 +550,18 @@ static int collect_merge_info(struct merge_options *opt, /*** Function Grouping: functions related to threeway content merges ***/ +static int handle_content_merge(struct merge_options *opt, + const char *path, + const struct version_info *o, + const struct version_info *a, + const struct version_info *b, + const char *pathnames[3], + const int extra_marker_size, + struct version_info *result) +{ + die("Not yet implemented"); +} + /*** Function Grouping: functions related to detect_and_process_renames(), *** *** which are split into directory and regular rename detection sections. ***/ @@ -957,6 +969,8 @@ static void process_entry(struct merge_options *opt, ci->merged.clean = 0; ci->merged.result.mode = ci->stages[1].mode; oidcpy(&ci->merged.result.oid, &ci->stages[1].oid); + /* When we fix above, we'll call handle_content_merge() */ + (void)handle_content_merge; } else if (ci->filemask == 3 || ci->filemask == 5) { /* Modify/delete */ die("Not yet implemented."); From c5a6f65527aa3b6f5d7cf25437a88d8727ab0646 Mon Sep 17 00:00:00 2001 From: Elijah Newren Date: Thu, 3 Dec 2020 15:59:46 +0000 Subject: [PATCH 7/7] merge-ort: add modify/delete handling and delayed output processing The focus here is on adding a path_msg() which will queue up warning/conflict/notice messages about the merge for later processing, storing these in a pathname -> strbuf map. It might seem like a big change, but it really just is: * declaration of necessary map with some comments * initialization and recording of data * a bunch of code to iterate over the map at print/free time * at least one caller in order to avoid an error about having an unused function (which we provide in the form of implementing modify/delete conflict handling). At this stage, it is probably not clear why I am opting for delayed output processing. There are multiple reasons: 1. Merges are supposed to abort if they would overwrite dirty changes in the working tree. We cannot correctly determine whether changes would be overwritten until both rename detection has occurred and full processing of entries with the renames has finalized. Warning/conflict/notice messages come up at intermediate codepaths along the way, so unless we want spurious conflict/warning messages being printed when the merge will be aborted anyway, we need to save these messages and only print them when relevant. 2. There can be multiple messages for a single path, and we want all messages for a give path to appear together instead of having them grouped by conflict/warning type. This was a problem already with merge-recursive.c but became even more important due to the splitting apart of conflict types as discussed in the commit message for 1f3c9ba707 ("t6425: be more flexible with rename/delete conflict messages", 2020-08-10) 3. Some callers might want to avoid showing the output in certain cases, such as if the end result is a clean merge. Rebases have typically done this. 4. Some callers might not want the output to go to stdout or even stderr, but might want to do something else with it entirely. For example, a --remerge-diff option to `git show` or `git log -p` that remerges on the fly and diffs merge commits against the remerged version would benefit from stdout/stderr not being written to in the standard form. Signed-off-by: Elijah Newren Signed-off-by: Junio C Hamano --- merge-ort.c | 100 ++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 98 insertions(+), 2 deletions(-) diff --git a/merge-ort.c b/merge-ort.c index e6b763de14..414e7b7eea 100644 --- a/merge-ort.c +++ b/merge-ort.c @@ -104,6 +104,15 @@ struct merge_options_internal { */ struct string_list paths_to_free; + /* + * output: special messages and conflict notices for various paths + * + * This is a map of pathnames (a subset of the keys in "paths" above) + * to strbufs. It gathers various warning/conflict/notice messages + * for later processing. + */ + struct strmap output; + /* * current_dir_name: temporary var used in collect_merge_info_callback() * @@ -274,6 +283,27 @@ static void clear_internal_opts(struct merge_options_internal *opti, opti->paths_to_free.strdup_strings = 1; string_list_clear(&opti->paths_to_free, 0); opti->paths_to_free.strdup_strings = 0; + + if (!reinitialize) { + struct hashmap_iter iter; + struct strmap_entry *e; + + /* Release and free each strbuf found in output */ + strmap_for_each_entry(&opti->output, &iter, e) { + struct strbuf *sb = e->value; + strbuf_release(sb); + /* + * While strictly speaking we don't need to free(sb) + * here because we could pass free_values=1 when + * calling strmap_clear() on opti->output, that would + * require strmap_clear to do another + * strmap_for_each_entry() loop, so we just free it + * while we're iterating anyway. + */ + free(sb); + } + strmap_clear(&opti->output, 0); + } } static int err(struct merge_options *opt, const char *err, ...) @@ -292,6 +322,27 @@ static int err(struct merge_options *opt, const char *err, ...) return -1; } +__attribute__((format (printf, 4, 5))) +static void path_msg(struct merge_options *opt, + const char *path, + int omittable_hint, /* skippable under --remerge-diff */ + const char *fmt, ...) +{ + va_list ap; + struct strbuf *sb = strmap_get(&opt->priv->output, path); + if (!sb) { + sb = xmalloc(sizeof(*sb)); + strbuf_init(sb, 0); + strmap_put(&opt->priv->output, path, sb); + } + + va_start(ap, fmt); + strbuf_vaddf(sb, fmt, ap); + va_end(ap); + + strbuf_addch(sb, '\n'); +} + /*** Function Grouping: functions related to collect_merge_info() ***/ static void setup_path_info(struct merge_options *opt, @@ -973,7 +1024,23 @@ static void process_entry(struct merge_options *opt, (void)handle_content_merge; } else if (ci->filemask == 3 || ci->filemask == 5) { /* Modify/delete */ - die("Not yet implemented."); + const char *modify_branch, *delete_branch; + int side = (ci->filemask == 5) ? 2 : 1; + int index = opt->priv->call_depth ? 0 : side; + + ci->merged.result.mode = ci->stages[index].mode; + oidcpy(&ci->merged.result.oid, &ci->stages[index].oid); + ci->merged.clean = 0; + + modify_branch = (side == 1) ? opt->branch1 : opt->branch2; + delete_branch = (side == 1) ? opt->branch2 : opt->branch1; + + path_msg(opt, path, 0, + _("CONFLICT (modify/delete): %s deleted in %s " + "and modified in %s. Version %s of %s left " + "in tree."), + path, delete_branch, modify_branch, + modify_branch, path); } else if (ci->filemask == 2 || ci->filemask == 4) { /* Added on one side */ int side = (ci->filemask == 4) ? 2 : 1; @@ -1240,7 +1307,29 @@ void merge_switch_to_result(struct merge_options *opt, } if (display_update_msgs) { - /* TODO: print out CONFLICT and other informational messages. */ + struct merge_options_internal *opti = result->priv; + struct hashmap_iter iter; + struct strmap_entry *e; + struct string_list olist = STRING_LIST_INIT_NODUP; + int i; + + /* Hack to pre-allocate olist to the desired size */ + ALLOC_GROW(olist.items, strmap_get_size(&opti->output), + olist.alloc); + + /* Put every entry from output into olist, then sort */ + strmap_for_each_entry(&opti->output, &iter, e) { + string_list_append(&olist, e->key)->util = e->value; + } + string_list_sort(&olist); + + /* Iterate over the items, printing them */ + for (i = 0; i < olist.nr; ++i) { + struct strbuf *sb = olist.items[i].util; + + printf("%s", sb->buf); + } + string_list_clear(&olist, 0); } merge_finalize(opt, result); @@ -1307,6 +1396,13 @@ static void merge_start(struct merge_options *opt, struct merge_result *result) strmap_init_with_options(&opt->priv->paths, NULL, 0); strmap_init_with_options(&opt->priv->conflicted, NULL, 0); string_list_init(&opt->priv->paths_to_free, 0); + + /* + * keys & strbufs in output will sometimes need to outlive "paths", + * so it will have a copy of relevant keys. It's probably a small + * subset of the overall paths that have special output. + */ + strmap_init(&opt->priv->output); } /*** Function Grouping: merge_incore_*() and their internal variants ***/