From f407f14deaa14ebddd0d27238523ced8eca74393 Mon Sep 17 00:00:00 2001
From: Johannes Schindelin <Johannes.Schindelin@gmx.de>
Date: Sun, 17 Feb 2008 19:07:19 +0000
Subject: [PATCH 1/2] xdl_merge(): make XDL_MERGE_ZEALOUS output simpler

When a merge conflicts, there are often less than three common lines
between two conflicting regions.

Since a conflict takes up as many lines as are conflicting, plus three
lines for the commit markers,  the output will be shorter (and thus,
simpler) in this case, if the common lines will be merged into the
conflicting regions.

This patch merges up to three common lines into the conflicts.

For example, what looked like this before this patch:

	<<<<<<<
	if (a == 1)
	=======
	if (a != 0)
	>>>>>>>
	{
		int i;
	<<<<<<<
		a = 0;
	=======
		a = !a;
	>>>>>>>

will now look like this:

	<<<<<<<
	if (a == 1)
	{
		int i;
		a = 0;
	=======
	if (a != 0)
	{
		int i;
		a = !a;
	>>>>>>>

Suggested Linus (based on ideas by "Voltage Spike" -- if that name is
real, it is mighty cool).

Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 t/t6023-merge-file.sh | 10 +++++++++
 xdiff/xmerge.c        | 47 ++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 56 insertions(+), 1 deletion(-)

diff --git a/t/t6023-merge-file.sh b/t/t6023-merge-file.sh
index 86419964b44..869e8d559e8 100755
--- a/t/t6023-merge-file.sh
+++ b/t/t6023-merge-file.sh
@@ -139,4 +139,14 @@ test_expect_success 'binary files cannot be merged' '
 	grep "Cannot merge binary files" merge.err
 '
 
+sed -e "s/deerit.$/deerit;/" -e "s/me;$/me./" < new5.txt > new6.txt
+sed -e "s/deerit.$/deerit,/" -e "s/me;$/me,/" < new5.txt > new7.txt
+
+test_expect_success 'MERGE_ZEALOUS simplifies non-conflicts' '
+
+	! git merge-file -p new6.txt new5.txt new7.txt > output &&
+	test 1 = $(grep ======= < output | wc -l)
+
+'
+
 test_done
diff --git a/xdiff/xmerge.c b/xdiff/xmerge.c
index b83b3348cc3..ecbdae502c7 100644
--- a/xdiff/xmerge.c
+++ b/xdiff/xmerge.c
@@ -248,6 +248,49 @@ static int xdl_refine_conflicts(xdfenv_t *xe1, xdfenv_t *xe2, xdmerge_t *m,
 	return 0;
 }
 
+/*
+ * This function merges m and m->next, marking everything between those hunks
+ * as conflicting, too.
+ */
+static void xdl_merge_two_conflicts(xdmerge_t *m)
+{
+	xdmerge_t *next_m = m->next;
+	m->chg1 = next_m->i1 + next_m->chg1 - m->i1;
+	m->chg2 = next_m->i2 + next_m->chg2 - m->i2;
+	m->next = next_m->next;
+	free(next_m);
+}
+
+/*
+ * If there are less than 3 non-conflicting lines between conflicts,
+ * it appears simpler -- because it takes up less (or as many) lines --
+ * if the lines are moved into the conflicts.
+ */
+static int xdl_simplify_non_conflicts(xdfenv_t *xe1, xdmerge_t *m)
+{
+	int result = 0;
+
+	if (!m)
+		return result;
+	for (;;) {
+		xdmerge_t *next_m = m->next;
+		int begin, end;
+
+		if (!next_m)
+			return result;
+
+		begin = m->i1 + m->chg1;
+		end = next_m->i1;
+
+		if (m->mode != 0 || next_m->mode != 0 || end - begin > 3)
+			m = next_m;
+		else {
+			result++;
+			xdl_merge_two_conflicts(m);
+		}
+	}
+}
+
 /*
  * level == 0: mark all overlapping changes as conflict
  * level == 1: mark overlapping changes as conflict only if not identical
@@ -355,7 +398,9 @@ static int xdl_do_merge(xdfenv_t *xe1, xdchange_t *xscr1, const char *name1,
 	if (!changes)
 		changes = c;
 	/* refine conflicts */
-	if (level > 1 && xdl_refine_conflicts(xe1, xe2, changes, xpp) < 0) {
+	if (level > 1 &&
+	    (xdl_refine_conflicts(xe1, xe2, changes, xpp) < 0 ||
+	     xdl_simplify_non_conflicts(xe1, changes) < 0)) {
 		xdl_cleanup_merge(changes);
 		return -1;
 	}

From ee95ec5d58d536243966de6ee810d345074b755e Mon Sep 17 00:00:00 2001
From: Johannes Schindelin <Johannes.Schindelin@gmx.de>
Date: Sun, 17 Feb 2008 19:07:40 +0000
Subject: [PATCH 2/2] xdl_merge(): introduce XDL_MERGE_ZEALOUS_ALNUM

When a merge conflicts, there are often common lines that are not really
common, such as empty lines or lines containing a single curly bracket.

With XDL_MERGE_ZEALOUS_ALNUM, we use the following heuristics: when a
hunk does not contain any letters or digits, it is treated as conflicting.

In other words, a conflict which used to look like this:

	<<<<<<<
					a = 1;
	=======
					output();
	>>>>>>>
				}
			}
		}

	<<<<<<<
		output();
	=======
		b = 1;
	>>>>>>>

will look like this with ZEALOUS_ALNUM:

	<<<<<<<
					a = 1;
				}
			}
		}

		output();
	=======
					output();
				}
			}
		}

		b = 1;
	>>>>>>>

To demonstrate this, git-merge-file has been switched from
XDL_MERGE_ZEALOUS to XDL_MERGE_ZEALOUS_ALNUM.

Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 builtin-merge-file.c  |  2 +-
 t/t6023-merge-file.sh | 10 ++++++++++
 xdiff/xdiff.h         |  1 +
 xdiff/xmerge.c        | 31 +++++++++++++++++++++++++++----
 4 files changed, 39 insertions(+), 5 deletions(-)

diff --git a/builtin-merge-file.c b/builtin-merge-file.c
index 58deb62ac08..adce6d4635a 100644
--- a/builtin-merge-file.c
+++ b/builtin-merge-file.c
@@ -46,7 +46,7 @@ int cmd_merge_file(int argc, const char **argv, const char *prefix)
 	}
 
 	ret = xdl_merge(mmfs + 1, mmfs + 0, names[0], mmfs + 2, names[2],
-			&xpp, XDL_MERGE_ZEALOUS, &result);
+			&xpp, XDL_MERGE_ZEALOUS_ALNUM, &result);
 
 	for (i = 0; i < 3; i++)
 		free(mmfs[i].ptr);
diff --git a/t/t6023-merge-file.sh b/t/t6023-merge-file.sh
index 869e8d559e8..79dc58b2ce9 100755
--- a/t/t6023-merge-file.sh
+++ b/t/t6023-merge-file.sh
@@ -149,4 +149,14 @@ test_expect_success 'MERGE_ZEALOUS simplifies non-conflicts' '
 
 '
 
+sed -e 's/deerit./&\n\n\n\n/' -e "s/locavit,/locavit;/" < new6.txt > new8.txt
+sed -e 's/deerit./&\n\n\n\n/' -e "s/locavit,/locavit --/" < new7.txt > new9.txt
+
+test_expect_success 'ZEALOUS_ALNUM' '
+
+	! git merge-file -p new8.txt new5.txt new9.txt > merge.out &&
+	test 1 = $(grep ======= < merge.out | wc -l)
+
+'
+
 test_done
diff --git a/xdiff/xdiff.h b/xdiff/xdiff.h
index c00ddaa6e98..413082e1fdf 100644
--- a/xdiff/xdiff.h
+++ b/xdiff/xdiff.h
@@ -53,6 +53,7 @@ extern "C" {
 #define XDL_MERGE_MINIMAL 0
 #define XDL_MERGE_EAGER 1
 #define XDL_MERGE_ZEALOUS 2
+#define XDL_MERGE_ZEALOUS_ALNUM 3
 
 typedef struct s_mmfile {
 	char *ptr;
diff --git a/xdiff/xmerge.c b/xdiff/xmerge.c
index ecbdae502c7..82b3573e7ad 100644
--- a/xdiff/xmerge.c
+++ b/xdiff/xmerge.c
@@ -248,6 +248,23 @@ static int xdl_refine_conflicts(xdfenv_t *xe1, xdfenv_t *xe2, xdmerge_t *m,
 	return 0;
 }
 
+static int line_contains_alnum(const char *ptr, long size)
+{
+	while (size--)
+		if (isalnum(*(ptr++)))
+			return 1;
+	return 0;
+}
+
+static int lines_contain_alnum(xdfenv_t *xe, int i, int chg)
+{
+	for (; chg; chg--, i++)
+		if (line_contains_alnum(xe->xdf2.recs[i]->ptr,
+				xe->xdf2.recs[i]->size))
+			return 1;
+	return 0;
+}
+
 /*
  * This function merges m and m->next, marking everything between those hunks
  * as conflicting, too.
@@ -266,7 +283,8 @@ static void xdl_merge_two_conflicts(xdmerge_t *m)
  * it appears simpler -- because it takes up less (or as many) lines --
  * if the lines are moved into the conflicts.
  */
-static int xdl_simplify_non_conflicts(xdfenv_t *xe1, xdmerge_t *m)
+static int xdl_simplify_non_conflicts(xdfenv_t *xe1, xdmerge_t *m,
+				      int simplify_if_no_alnum)
 {
 	int result = 0;
 
@@ -282,9 +300,12 @@ static int xdl_simplify_non_conflicts(xdfenv_t *xe1, xdmerge_t *m)
 		begin = m->i1 + m->chg1;
 		end = next_m->i1;
 
-		if (m->mode != 0 || next_m->mode != 0 || end - begin > 3)
+		if (m->mode != 0 || next_m->mode != 0 ||
+		    (end - begin > 3 &&
+		     (!simplify_if_no_alnum ||
+		      lines_contain_alnum(xe1, begin, end - begin)))) {
 			m = next_m;
-		else {
+		} else {
 			result++;
 			xdl_merge_two_conflicts(m);
 		}
@@ -295,6 +316,8 @@ static int xdl_simplify_non_conflicts(xdfenv_t *xe1, xdmerge_t *m)
  * level == 0: mark all overlapping changes as conflict
  * level == 1: mark overlapping changes as conflict only if not identical
  * level == 2: analyze non-identical changes for minimal conflict set
+ * level == 3: analyze non-identical changes for minimal conflict set, but
+ *             treat hunks not containing any letter or number as conflicting
  *
  * returns < 0 on error, == 0 for no conflicts, else number of conflicts
  */
@@ -400,7 +423,7 @@ static int xdl_do_merge(xdfenv_t *xe1, xdchange_t *xscr1, const char *name1,
 	/* refine conflicts */
 	if (level > 1 &&
 	    (xdl_refine_conflicts(xe1, xe2, changes, xpp) < 0 ||
-	     xdl_simplify_non_conflicts(xe1, changes) < 0)) {
+	     xdl_simplify_non_conflicts(xe1, changes, level > 2) < 0)) {
 		xdl_cleanup_merge(changes);
 		return -1;
 	}