From 4e8da1958111796d55ad63b229ebd3ae6c54bf87 Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nico@cam.org>
Date: Mon, 15 May 2006 11:40:05 -0400
Subject: [PATCH 1/3] simple euristic for further free packing improvements

Given that the early eviction of objects with maximum delta depth
may exhibit bad packing on its own, why not considering a bias against
deep base objects in try_delta() to mitigate that bad behavior.

This patch adjust the MAX_size allowed for a delta based on the depth of
the base object as well as enabling the early eviction of max depth
objects from the object window.  When used separately, those two things
produce slightly better and much worse results respectively.  But their
combined effect is a surprising significant packing improvement.

With this really simple patch the GIT repo gets nearly 15% smaller, and
the Linux kernel repo about 5% smaller, with no significantly measurable
CPU usage difference.

Signed-off-by: Nicolas Pitre <nico@cam.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 pack-objects.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/pack-objects.c b/pack-objects.c
index 5466b151678..526c090c619 100644
--- a/pack-objects.c
+++ b/pack-objects.c
@@ -1039,8 +1039,8 @@ static int try_delta(struct unpacked *trg, struct unpacked *src,
 
 	/* Now some size filtering euristics. */
 	size = trg_entry->size;
-	max_size = size / 2 - 20;
-	if (trg_entry->delta)
+	max_size = (size/2 - 20) / (src_entry->depth + 1);
+	if (trg_entry->delta && trg_entry->delta_size <= max_size)
 		max_size = trg_entry->delta_size-1;
 	src_size = src_entry->size;
 	sizediff = src_size < size ? size - src_size : 0;
@@ -1129,15 +1129,12 @@ static void find_deltas(struct object_entry **list, int window, int depth)
 			if (try_delta(n, m, m->index, depth) < 0)
 				break;
 		}
-#if 0
 		/* if we made n a delta, and if n is already at max
 		 * depth, leaving it in the window is pointless.  we
 		 * should evict it first.
-		 * ... in theory only; somehow this makes things worse.
 		 */
 		if (entry->delta && depth <= entry->depth)
 			continue;
-#endif
 		idx++;
 		if (idx >= window)
 			idx = 0;

From ff45715ce50b80ab16ee0d0dc7fff0c47a51959a Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nico@cam.org>
Date: Mon, 15 May 2006 13:47:16 -0400
Subject: [PATCH 2/3] pack-object: slightly more efficient

Avoid creating a delta index for objects with maximum depth since they
are not going to be used as delta base anyway.  This also reduce peak
memory usage slightly as the current object's delta index is not useful
until the next object in the loop is considered for deltification. This
saves a bit more than 1% on CPU usage.

Signed-off-by: Nicolas Pitre <nico@cam.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 delta.h        |  2 ++
 pack-objects.c | 15 ++++++++-------
 2 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/delta.h b/delta.h
index 727ae30e9eb..7b3f86d85f7 100644
--- a/delta.h
+++ b/delta.h
@@ -18,6 +18,8 @@ create_delta_index(const void *buf, unsigned long bufsize);
 
 /*
  * free_delta_index: free the index created by create_delta_index()
+ *
+ * Given pointer must be what create_delta_index() returned, or NULL.
  */
 extern void free_delta_index(struct delta_index *index);
 
diff --git a/pack-objects.c b/pack-objects.c
index 526c090c619..b430b02cf7f 100644
--- a/pack-objects.c
+++ b/pack-objects.c
@@ -1105,17 +1105,14 @@ static void find_deltas(struct object_entry **list, int window, int depth)
 
 		if (entry->size < 50)
 			continue;
-		if (n->index)
-			free_delta_index(n->index);
+		free_delta_index(n->index);
+		n->index = NULL;
 		free(n->data);
 		n->entry = entry;
 		n->data = read_sha1_file(entry->sha1, type, &size);
 		if (size != entry->size)
 			die("object %s inconsistent object length (%lu vs %lu)",
 			    sha1_to_hex(entry->sha1), size, entry->size);
-		n->index = create_delta_index(n->data, size);
-		if (!n->index)
-			die("out of memory");
 
 		j = window;
 		while (--j > 0) {
@@ -1135,6 +1132,11 @@ static void find_deltas(struct object_entry **list, int window, int depth)
 		 */
 		if (entry->delta && depth <= entry->depth)
 			continue;
+
+		n->index = create_delta_index(n->data, size);
+		if (!n->index)
+			die("out of memory");
+
 		idx++;
 		if (idx >= window)
 			idx = 0;
@@ -1144,8 +1146,7 @@ static void find_deltas(struct object_entry **list, int window, int depth)
 		fputc('\n', stderr);
 
 	for (i = 0; i < window; ++i) {
-		if (array[i].index)
-			free_delta_index(array[i].index);
+		free_delta_index(array[i].index);
 		free(array[i].data);
 	}
 	free(array);

From c3b06a69ffc41b3ac3600628593dd0fdd3988607 Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nico@cam.org>
Date: Tue, 16 May 2006 16:29:14 -0400
Subject: [PATCH 3/3] improve depth heuristic for maximum delta size

This provides a linear decrement on the penalty related to delta depth
instead of being an 1/x function.  With this another 5% reduction is
observed on packs for both the GIT repo and the Linux kernel repo, as
well as fixing a pack size regression in another sample repo I have.

Signed-off-by: Nicolas Pitre <nico@cam.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 pack-objects.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/pack-objects.c b/pack-objects.c
index b430b02cf7f..33751797fa3 100644
--- a/pack-objects.c
+++ b/pack-objects.c
@@ -1037,9 +1037,12 @@ static int try_delta(struct unpacked *trg, struct unpacked *src,
 	if (src_entry->depth >= max_depth)
 		return 0;
 
-	/* Now some size filtering euristics. */
+	/* Now some size filtering heuristics. */
 	size = trg_entry->size;
-	max_size = (size/2 - 20) / (src_entry->depth + 1);
+	max_size = size/2 - 20;
+	max_size = max_size * (max_depth - src_entry->depth) / max_depth;
+	if (max_size == 0)
+		return 0;
 	if (trg_entry->delta && trg_entry->delta_size <= max_size)
 		max_size = trg_entry->delta_size-1;
 	src_size = src_entry->size;