From 204f01a2f734fddab95b09123b04b1305620e7b6 Mon Sep 17 00:00:00 2001
From: Johan Herland <johan@herland.net>
Date: Mon, 11 Apr 2011 00:48:50 +0200
Subject: [PATCH 1/4] --dirstat: Describe non-obvious differences relative to
 --stat or regular diff

Also add a testcase documenting the current behavior.

Improved-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Johan Herland <johan@herland.net>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 Documentation/diff-options.txt                |  4 +++
 t/t4013-diff-various.sh                       | 27 ++++++++++++++-----
 t/t4013/diff.diff_--dirstat_initial_rearrange |  2 ++
 ...-stdout_--cover-letter_-n_initial..master^ |  2 +-
 t/t4013/diff.log_--decorate=full_--all        |  6 +++++
 t/t4013/diff.log_--decorate_--all             |  6 +++++
 6 files changed, 40 insertions(+), 7 deletions(-)
 create mode 100644 t/t4013/diff.diff_--dirstat_initial_rearrange

diff --git a/Documentation/diff-options.txt b/Documentation/diff-options.txt
index c93124be798..23772d615d2 100644
--- a/Documentation/diff-options.txt
+++ b/Documentation/diff-options.txt
@@ -72,6 +72,10 @@ endif::git-format-patch[]
 	a cut-off percent (3% by default) are not shown. The cut-off percent
 	can be set with `--dirstat=<limit>`. Changes in a child directory are not
 	counted for the parent directory, unless `--cumulative` is used.
++
+Note that the `--dirstat` option computes the changes while ignoring
+pure code movements within a file.  In other words, rearranging lines
+in a file is not counted as a change.
 
 --dirstat-by-file[=<limit>]::
 	Same as `--dirstat`, but counts changed files instead of lines.
diff --git a/t/t4013-diff-various.sh b/t/t4013-diff-various.sh
index 5daa0f2a0c9..3b1b392a56b 100755
--- a/t/t4013-diff-various.sh
+++ b/t/t4013-diff-various.sh
@@ -80,18 +80,31 @@ test_expect_success setup '
 
 	git config log.showroot false &&
 	git commit --amend &&
+
+	GIT_AUTHOR_DATE="2006-06-26 00:06:00 +0000" &&
+	GIT_COMMITTER_DATE="2006-06-26 00:06:00 +0000" &&
+	export GIT_AUTHOR_DATE GIT_COMMITTER_DATE &&
+	git checkout -b rearrange initial &&
+	for i in B A; do echo $i; done >dir/sub &&
+	git add dir/sub &&
+	git commit -m "Rearranged lines in dir/sub" &&
+	git checkout master &&
+
 	git show-branch
 '
 
 : <<\EOF
 ! [initial] Initial
  * [master] Merge branch 'side'
-  ! [side] Side
----
- -  [master] Merge branch 'side'
- *+ [side] Side
- *  [master^] Second
-+*+ [initial] Initial
+  ! [rearrange] Rearranged lines in dir/sub
+   ! [side] Side
+----
+  +  [rearrange] Rearranged lines in dir/sub
+ -   [master] Merge branch 'side'
+ * + [side] Side
+ *   [master^] Third
+ *   [master~2] Second
++*++ [initial] Initial
 EOF
 
 V=`git version | sed -e 's/^git version //' -e 's/\./\\./g'`
@@ -287,6 +300,8 @@ diff --no-index --name-status -- dir2 dir
 diff --no-index dir dir3
 diff master master^ side
 diff --dirstat master~1 master~2
+# --dirstat doesn't notice changes that simply rearrange existing lines
+diff --dirstat initial rearrange
 EOF
 
 test_expect_success 'log -S requires an argument' '
diff --git a/t/t4013/diff.diff_--dirstat_initial_rearrange b/t/t4013/diff.diff_--dirstat_initial_rearrange
new file mode 100644
index 00000000000..fb2e17dd2e4
--- /dev/null
+++ b/t/t4013/diff.diff_--dirstat_initial_rearrange
@@ -0,0 +1,2 @@
+$ git diff --dirstat initial rearrange
+$
diff --git a/t/t4013/diff.format-patch_--stdout_--cover-letter_-n_initial..master^ b/t/t4013/diff.format-patch_--stdout_--cover-letter_-n_initial..master^
index 1f0f9ad44b2..3b4e1130125 100644
--- a/t/t4013/diff.format-patch_--stdout_--cover-letter_-n_initial..master^
+++ b/t/t4013/diff.format-patch_--stdout_--cover-letter_-n_initial..master^
@@ -1,7 +1,7 @@
 $ git format-patch --stdout --cover-letter -n initial..master^
 From 9a6d4949b6b76956d9d5e26f2791ec2ceff5fdc0 Mon Sep 17 00:00:00 2001
 From: C O Mitter <committer@example.com>
-Date: Mon, 26 Jun 2006 00:05:00 +0000
+Date: Mon, 26 Jun 2006 00:06:00 +0000
 Subject: [DIFFERENT_PREFIX 0/2] *** SUBJECT HERE ***
 
 *** BLURB HERE ***
diff --git a/t/t4013/diff.log_--decorate=full_--all b/t/t4013/diff.log_--decorate=full_--all
index d155e0bab29..44d45257da7 100644
--- a/t/t4013/diff.log_--decorate=full_--all
+++ b/t/t4013/diff.log_--decorate=full_--all
@@ -1,4 +1,10 @@
 $ git log --decorate=full --all
+commit cd4e72fd96faed3f0ba949dc42967430374e2290 (refs/heads/rearrange)
+Author: A U Thor <author@example.com>
+Date:   Mon Jun 26 00:06:00 2006 +0000
+
+    Rearranged lines in dir/sub
+
 commit 59d314ad6f356dd08601a4cd5e530381da3e3c64 (HEAD, refs/heads/master)
 Merge: 9a6d494 c7a2ab9
 Author: A U Thor <author@example.com>
diff --git a/t/t4013/diff.log_--decorate_--all b/t/t4013/diff.log_--decorate_--all
index fd7c3e64396..27d3eabc26f 100644
--- a/t/t4013/diff.log_--decorate_--all
+++ b/t/t4013/diff.log_--decorate_--all
@@ -1,4 +1,10 @@
 $ git log --decorate --all
+commit cd4e72fd96faed3f0ba949dc42967430374e2290 (rearrange)
+Author: A U Thor <author@example.com>
+Date:   Mon Jun 26 00:06:00 2006 +0000
+
+    Rearranged lines in dir/sub
+
 commit 59d314ad6f356dd08601a4cd5e530381da3e3c64 (HEAD, master)
 Merge: 9a6d494 c7a2ab9
 Author: A U Thor <author@example.com>

From 0133dab75d8b15c559aa9df66134d72dce0e0476 Mon Sep 17 00:00:00 2001
From: Johan Herland <johan@herland.net>
Date: Mon, 11 Apr 2011 00:48:51 +0200
Subject: [PATCH 2/4] --dirstat-by-file: Make it faster and more correct

Currently, when using --dirstat-by-file, it first does the full --dirstat
analysis (using diffcore_count_changes()), and then resets 'damage' to 1,
if any damage was found by diffcore_count_changes().

But --dirstat-by-file is not interested in the file damage per se. It only
cares if the file changed at all. In that sense it only cares if the blob
object for a file has changed. We therefore only need to compare the
object names of each file pair in the diff queue and we can skip the
entire --dirstat analysis and simply set 'damage' to 1 for each entry
where the object name has changed.

This makes --dirstat-by-file faster, and also bypasses --dirstat's practice
of ignoring rearranged lines within a file.

The patch also contains an added testcase verifying that --dirstat-by-file
now detects changes that only rearrange lines within a file.

Signed-off-by: Johan Herland <johan@herland.net>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 diff.c                                        | 25 +++++++++++++++----
 t/t4013-diff-various.sh                       |  2 ++
 ...f.diff_--dirstat-by-file_initial_rearrange |  3 +++
 3 files changed, 25 insertions(+), 5 deletions(-)
 create mode 100644 t/t4013/diff.diff_--dirstat-by-file_initial_rearrange

diff --git a/diff.c b/diff.c
index 3fd9e0c7031..4f5270b8dbc 100644
--- a/diff.c
+++ b/diff.c
@@ -1539,9 +1539,27 @@ static void show_dirstat(struct diff_options *options)
 		struct diff_filepair *p = q->queue[i];
 		const char *name;
 		unsigned long copied, added, damage;
+		int content_changed;
 
 		name = p->one->path ? p->one->path : p->two->path;
 
+		if (p->one->sha1_valid && p->two->sha1_valid)
+			content_changed = hashcmp(p->one->sha1, p->two->sha1);
+		else
+			content_changed = 1;
+
+		if (DIFF_OPT_TST(options, DIRSTAT_BY_FILE)) {
+			/*
+			 * In --dirstat-by-file mode, we don't really need to
+			 * look at the actual file contents at all.
+			 * The fact that the SHA1 changed is enough for us to
+			 * add this file to the list of results
+			 * (with each file contributing equal damage).
+			 */
+			damage = content_changed ? 1 : 0;
+			goto found_damage;
+		}
+
 		if (DIFF_FILE_VALID(p->one) && DIFF_FILE_VALID(p->two)) {
 			diff_populate_filespec(p->one, 0);
 			diff_populate_filespec(p->two, 0);
@@ -1564,14 +1582,11 @@ static void show_dirstat(struct diff_options *options)
 		/*
 		 * Original minus copied is the removed material,
 		 * added is the new material.  They are both damages
-		 * made to the preimage. In --dirstat-by-file mode, count
-		 * damaged files, not damaged lines. This is done by
-		 * counting only a single damaged line per file.
+		 * made to the preimage.
 		 */
 		damage = (p->one->size - copied) + added;
-		if (DIFF_OPT_TST(options, DIRSTAT_BY_FILE) && damage > 0)
-			damage = 1;
 
+found_damage:
 		ALLOC_GROW(dir.files, dir.nr + 1, dir.alloc);
 		dir.files[dir.nr].name = name;
 		dir.files[dir.nr].changed = damage;
diff --git a/t/t4013-diff-various.sh b/t/t4013-diff-various.sh
index 3b1b392a56b..6428a905ab7 100755
--- a/t/t4013-diff-various.sh
+++ b/t/t4013-diff-various.sh
@@ -302,6 +302,8 @@ diff master master^ side
 diff --dirstat master~1 master~2
 # --dirstat doesn't notice changes that simply rearrange existing lines
 diff --dirstat initial rearrange
+# ...but --dirstat-by-file does notice changes that only rearrange lines
+diff --dirstat-by-file initial rearrange
 EOF
 
 test_expect_success 'log -S requires an argument' '
diff --git a/t/t4013/diff.diff_--dirstat-by-file_initial_rearrange b/t/t4013/diff.diff_--dirstat-by-file_initial_rearrange
new file mode 100644
index 00000000000..e48e33f6787
--- /dev/null
+++ b/t/t4013/diff.diff_--dirstat-by-file_initial_rearrange
@@ -0,0 +1,3 @@
+$ git diff --dirstat-by-file initial rearrange
+ 100.0% dir/
+$

From 2ff3a80334115797b8446909655e536f43900bc5 Mon Sep 17 00:00:00 2001
From: Johan Herland <johan@herland.net>
Date: Mon, 11 Apr 2011 00:48:52 +0200
Subject: [PATCH 3/4] Teach --dirstat not to completely ignore rearranged lines
 within a file

Currently, the --dirstat analysis ignores when lines within a file are
rearranged, because the "damage" calculated by show_dirstat() is 0.
However, if the object name has changed, we already know that there is
some damage, and it is unintuitive to claim there is _no_ damage.

Teach show_dirstat() to assign a minimum amount of damage (== 1) to
entries for which the analysis otherwise yields zero damage, to still
represent that these files are changed, instead of saying that there
is no change.

Also, skip --dirstat analysis when the object names are the same (e.g. for
a pure file rename).

Signed-off-by: Johan Herland <johan@herland.net>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 Documentation/diff-options.txt                |  4 ++--
 diff.c                                        | 19 ++++++++++++++++++-
 t/t4013-diff-various.sh                       |  2 --
 t/t4013/diff.diff_--dirstat_initial_rearrange |  1 +
 4 files changed, 21 insertions(+), 5 deletions(-)

diff --git a/Documentation/diff-options.txt b/Documentation/diff-options.txt
index 23772d615d2..7e4bd425e1f 100644
--- a/Documentation/diff-options.txt
+++ b/Documentation/diff-options.txt
@@ -74,8 +74,8 @@ endif::git-format-patch[]
 	counted for the parent directory, unless `--cumulative` is used.
 +
 Note that the `--dirstat` option computes the changes while ignoring
-pure code movements within a file.  In other words, rearranging lines
-in a file is not counted as a change.
+the amount of pure code movements within a file.  In other words,
+rearranging lines in a file is not counted as much as other changes.
 
 --dirstat-by-file[=<limit>]::
 	Same as `--dirstat`, but counts changed files instead of lines.
diff --git a/diff.c b/diff.c
index 4f5270b8dbc..1f44cb42373 100644
--- a/diff.c
+++ b/diff.c
@@ -1548,6 +1548,16 @@ static void show_dirstat(struct diff_options *options)
 		else
 			content_changed = 1;
 
+		if (!content_changed) {
+			/*
+			 * The SHA1 has not changed, so pre-/post-content is
+			 * identical. We can therefore skip looking at the
+			 * file contents altogether.
+			 */
+			damage = 0;
+			goto found_damage;
+		}
+
 		if (DIFF_OPT_TST(options, DIRSTAT_BY_FILE)) {
 			/*
 			 * In --dirstat-by-file mode, we don't really need to
@@ -1556,7 +1566,7 @@ static void show_dirstat(struct diff_options *options)
 			 * add this file to the list of results
 			 * (with each file contributing equal damage).
 			 */
-			damage = content_changed ? 1 : 0;
+			damage = 1;
 			goto found_damage;
 		}
 
@@ -1583,8 +1593,15 @@ static void show_dirstat(struct diff_options *options)
 		 * Original minus copied is the removed material,
 		 * added is the new material.  They are both damages
 		 * made to the preimage.
+		 * If the resulting damage is zero, we know that
+		 * diffcore_count_changes() considers the two entries to
+		 * be identical, but since content_changed is true, we
+		 * know that there must have been _some_ kind of change,
+		 * so we force all entries to have damage > 0.
 		 */
 		damage = (p->one->size - copied) + added;
+		if (!damage)
+			damage = 1;
 
 found_damage:
 		ALLOC_GROW(dir.files, dir.nr + 1, dir.alloc);
diff --git a/t/t4013-diff-various.sh b/t/t4013-diff-various.sh
index 6428a905ab7..93a6f208710 100755
--- a/t/t4013-diff-various.sh
+++ b/t/t4013-diff-various.sh
@@ -300,9 +300,7 @@ diff --no-index --name-status -- dir2 dir
 diff --no-index dir dir3
 diff master master^ side
 diff --dirstat master~1 master~2
-# --dirstat doesn't notice changes that simply rearrange existing lines
 diff --dirstat initial rearrange
-# ...but --dirstat-by-file does notice changes that only rearrange lines
 diff --dirstat-by-file initial rearrange
 EOF
 
diff --git a/t/t4013/diff.diff_--dirstat_initial_rearrange b/t/t4013/diff.diff_--dirstat_initial_rearrange
index fb2e17dd2e4..5fb02c13bc5 100644
--- a/t/t4013/diff.diff_--dirstat_initial_rearrange
+++ b/t/t4013/diff.diff_--dirstat_initial_rearrange
@@ -1,2 +1,3 @@
 $ git diff --dirstat initial rearrange
+ 100.0% dir/
 $

From 2ca86714703f81f9dd5dfb31f8d97a8a0089634d Mon Sep 17 00:00:00 2001
From: Johan Herland <johan@herland.net>
Date: Tue, 12 Apr 2011 11:24:34 +0200
Subject: [PATCH 4/4] --dirstat: In case of renames, use target filename
 instead of source filename

This changes --dirstat analysis to count "damage" toward the target filename,
rather than the source filename. For renames within a directory, this won't
matter to the final output, but when moving files between diretories, the
output now lists the target directory rather than the source directory.

Signed-off-by: Johan Herland <johan@herland.net>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 diff.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/diff.c b/diff.c
index 1f44cb42373..abd9cd5f33d 100644
--- a/diff.c
+++ b/diff.c
@@ -1541,7 +1541,7 @@ static void show_dirstat(struct diff_options *options)
 		unsigned long copied, added, damage;
 		int content_changed;
 
-		name = p->one->path ? p->one->path : p->two->path;
+		name = p->two->path ? p->two->path : p->one->path;
 
 		if (p->one->sha1_valid && p->two->sha1_valid)
 			content_changed = hashcmp(p->one->sha1, p->two->sha1);