From cc7d5bcf006e92d554cae0aefc9d8681d4659dce Mon Sep 17 00:00:00 2001
From: Junio C Hamano <junkio@cox.net>
Date: Thu, 29 Jun 2006 14:48:22 -0700
Subject: [PATCH 01/14] Racy GIT (part #3)

Commit 29e4d3635709778bcc808dbad0477efad82f8d7e fixed the
underlying update-index races but git-commit was not careful
enough to preserve the index file timestamp when copying the
index file.  This caused t3402 test to occasionally fail.

Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 git-commit.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/git-commit.sh b/git-commit.sh
index 7e50cf399b7..22c4ce86c3c 100755
--- a/git-commit.sh
+++ b/git-commit.sh
@@ -29,7 +29,7 @@ THIS_INDEX="$GIT_DIR/index"
 NEXT_INDEX="$GIT_DIR/next-index$$"
 rm -f "$NEXT_INDEX"
 save_index () {
-	cp "$THIS_INDEX" "$NEXT_INDEX"
+	cp -p "$THIS_INDEX" "$NEXT_INDEX"
 }
 
 report () {

From 51d1e83f91a16363930023a5c39f79f270c04653 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@osdl.org>
Date: Thu, 29 Jun 2006 14:04:01 -0700
Subject: [PATCH 02/14] Do not try futile object pairs when repacking.

In the repacking window, if both objects we are looking at already came
from the same (old) pack-file, don't bother delta'ing them against each
other.

That means that we'll still always check for better deltas for (and
against!) _unpacked_ objects, but assuming incremental repacks, you'll
avoid the delta creation 99% of the time.

Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 pack-objects.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/pack-objects.c b/pack-objects.c
index bed2497b797..6e1767652c8 100644
--- a/pack-objects.c
+++ b/pack-objects.c
@@ -987,6 +987,13 @@ static int try_delta(struct unpacked *trg, struct unpacked *src,
 	if (trg_entry->preferred_base)
 		return -1;
 
+	/*
+	 * We do not bother to try a delta that we discarded
+	 * on an earlier try.
+	 */
+	if (trg_entry->in_pack && trg_entry->in_pack == src_entry->in_pack)
+		return 0;
+
 	/*
 	 * If the current object is at pack edge, take the depth the
 	 * objects that depend on the current object into account --

From 8dbbd14ea3ae1b4e825f1e7d314afddf26c67298 Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nico@cam.org>
Date: Thu, 29 Jun 2006 23:44:52 -0400
Subject: [PATCH 03/14] consider previous pack undeltified object state only
 when reusing delta data

Without this there would never be a chance to improve packing for
previously undeltified objects.

Signed-off-by: Nicolas Pitre <nico@cam.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 pack-objects.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/pack-objects.c b/pack-objects.c
index 6e1767652c8..47da33baa3f 100644
--- a/pack-objects.c
+++ b/pack-objects.c
@@ -989,9 +989,10 @@ static int try_delta(struct unpacked *trg, struct unpacked *src,
 
 	/*
 	 * We do not bother to try a delta that we discarded
-	 * on an earlier try.
+	 * on an earlier try, but only when reusing delta data.
 	 */
-	if (trg_entry->in_pack && trg_entry->in_pack == src_entry->in_pack)
+	if (!no_reuse_delta && trg_entry->in_pack &&
+	    trg_entry->in_pack == src_entry->in_pack)
 		return 0;
 
 	/*

From fc046a75d539a78e6b2c16534c4078617a69a327 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@osdl.org>
Date: Thu, 29 Jun 2006 21:38:55 -0700
Subject: [PATCH 04/14] Abstract out accesses to object hash array

There are a few special places where some programs accessed the object
hash array directly, which bothered me because I wanted to play with some
simple re-organizations.

So this patch makes the object hash array data structures all entirely
local to object.c, and the few users who wanted to look at it now get to
use a function to query how many object index entries there can be, and to
actually access the array.

Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 fsck-objects.c |  7 ++++---
 name-rev.c     | 13 ++++++++-----
 object.c       | 15 ++++++++++++---
 object.h       |  5 +++--
 4 files changed, 27 insertions(+), 13 deletions(-)

diff --git a/fsck-objects.c b/fsck-objects.c
index 769bb2a6a75..ef54a8a411b 100644
--- a/fsck-objects.c
+++ b/fsck-objects.c
@@ -60,12 +60,13 @@ static int objwarning(struct object *obj, const char *err, ...)
 
 static void check_connectivity(void)
 {
-	int i;
+	int i, max;
 
 	/* Look up all the requirements, warn about missing objects.. */
-	for (i = 0; i < obj_allocs; i++) {
+	max = get_max_object_index();
+	for (i = 0; i < max; i++) {
 		const struct object_refs *refs;
-		struct object *obj = objs[i];
+		struct object *obj = get_indexed_object(i);
 
 		if (!obj)
 			continue;
diff --git a/name-rev.c b/name-rev.c
index 3a5ac35d163..6a23f2d8a2d 100644
--- a/name-rev.c
+++ b/name-rev.c
@@ -234,12 +234,15 @@ int main(int argc, char **argv)
 				fwrite(p_start, p - p_start, 1, stdout);
 		}
 	} else if (all) {
-		int i;
+		int i, max;
 
-		for (i = 0; i < obj_allocs; i++)
-			if (objs[i])
-				printf("%s %s\n", sha1_to_hex(objs[i]->sha1),
-						get_rev_name(objs[i]));
+		max = get_max_object_index();
+		for (i = 0; i < max; i++) {
+			struct object * obj = get_indexed_object(i);
+			if (!obj)
+				continue;
+			printf("%s %s\n", sha1_to_hex(obj->sha1), get_rev_name(obj));
+		}
 	} else {
 		int i;
 		for (i = 0; i < revs.nr; i++)
diff --git a/object.c b/object.c
index 37784cee9a9..31c77ea03a2 100644
--- a/object.c
+++ b/object.c
@@ -5,9 +5,18 @@
 #include "commit.h"
 #include "tag.h"
 
-struct object **objs;
-static int nr_objs;
-int obj_allocs;
+static struct object **objs;
+static int nr_objs, obj_allocs;
+
+unsigned int get_max_object_index(void)
+{
+	return obj_allocs;
+}
+
+struct object *get_indexed_object(unsigned int idx)
+{
+	return objs[idx];
+}
 
 const char *type_names[] = {
 	"none", "blob", "tree", "commit", "bad"
diff --git a/object.h b/object.h
index 6f23a9a1809..e0125e154fd 100644
--- a/object.h
+++ b/object.h
@@ -40,10 +40,11 @@ struct object {
 };
 
 extern int track_object_refs;
-extern int obj_allocs;
-extern struct object **objs;
 extern const char *type_names[];
 
+extern unsigned int get_max_object_index(void);
+extern struct object *get_indexed_object(unsigned int);
+
 static inline const char *typename(unsigned int type)
 {
 	return type_names[type > TYPE_TAG ? TYPE_BAD : type];

From e3a56298135ed4cce302e7da792793d3e5623c29 Mon Sep 17 00:00:00 2001
From: Junio C Hamano <junkio@cox.net>
Date: Fri, 30 Jun 2006 11:30:29 -0700
Subject: [PATCH 05/14] upload-pack.c: <sys/poll.h> includes <ctype.h> on
 OpenBSD 3.8

Merlyn reports that <sys/poll.h> on OpenBSD 3.8 includes <ctype.h>
and having our custom ctype (done in git-compat-util.h which is
included via cache.h) makes upload-pack.c uncompilable.  Try to
work it around by including the system headers first.

Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 upload-pack.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/upload-pack.c b/upload-pack.c
index 2b70c3dcb4e..b18eb9ba0db 100644
--- a/upload-pack.c
+++ b/upload-pack.c
@@ -1,3 +1,6 @@
+#include <signal.h>
+#include <sys/wait.h>
+#include <sys/poll.h>
 #include "cache.h"
 #include "refs.h"
 #include "pkt-line.h"
@@ -5,9 +8,6 @@
 #include "object.h"
 #include "commit.h"
 #include "exec_cmd.h"
-#include <signal.h>
-#include <sys/poll.h>
-#include <sys/wait.h>
 
 static const char upload_pack_usage[] = "git-upload-pack [--strict] [--timeout=nn] <dir>";
 

From 560b25a86f30ad81d2bc3a383da19c3b7e631b8b Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nico@cam.org>
Date: Fri, 30 Jun 2006 22:55:30 -0400
Subject: [PATCH 06/14] don't load objects needlessly when repacking

If no delta is attempted on some objects then it is useless to load them
in memory, neither create any delta index for them.  The best thing to
do is therefore to load and index them only when really needed.

Signed-off-by: Nicolas Pitre <nico@cam.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 pack-objects.c | 45 ++++++++++++++++++++++++++++-----------------
 1 file changed, 28 insertions(+), 17 deletions(-)

diff --git a/pack-objects.c b/pack-objects.c
index 47da33baa3f..b486ea528af 100644
--- a/pack-objects.c
+++ b/pack-objects.c
@@ -970,11 +970,12 @@ struct unpacked {
  * one.
  */
 static int try_delta(struct unpacked *trg, struct unpacked *src,
-		     struct delta_index *src_index, unsigned max_depth)
+		     unsigned max_depth)
 {
 	struct object_entry *trg_entry = trg->entry;
 	struct object_entry *src_entry = src->entry;
-	unsigned long size, src_size, delta_size, sizediff, max_size;
+	unsigned long trg_size, src_size, delta_size, sizediff, max_size, sz;
+	char type[10];
 	void *delta_buf;
 
 	/* Don't bother doing diffs between different types */
@@ -1009,19 +1010,38 @@ static int try_delta(struct unpacked *trg, struct unpacked *src,
 		return 0;
 
 	/* Now some size filtering heuristics. */
-	size = trg_entry->size;
-	max_size = size/2 - 20;
+	trg_size = trg_entry->size;
+	max_size = trg_size/2 - 20;
 	max_size = max_size * (max_depth - src_entry->depth) / max_depth;
 	if (max_size == 0)
 		return 0;
 	if (trg_entry->delta && trg_entry->delta_size <= max_size)
 		max_size = trg_entry->delta_size-1;
 	src_size = src_entry->size;
-	sizediff = src_size < size ? size - src_size : 0;
+	sizediff = src_size < trg_size ? trg_size - src_size : 0;
 	if (sizediff >= max_size)
 		return 0;
 
-	delta_buf = create_delta(src_index, trg->data, size, &delta_size, max_size);
+	/* Load data if not already done */
+	if (!trg->data) {
+		trg->data = read_sha1_file(trg_entry->sha1, type, &sz);
+		if (sz != trg_size)
+			die("object %s inconsistent object length (%lu vs %lu)",
+			    sha1_to_hex(trg_entry->sha1), sz, trg_size);
+	}
+	if (!src->data) {
+		src->data = read_sha1_file(src_entry->sha1, type, &sz);
+		if (sz != src_size)
+			die("object %s inconsistent object length (%lu vs %lu)",
+			    sha1_to_hex(src_entry->sha1), sz, src_size);
+	}
+	if (!src->index) {
+		src->index = create_delta_index(src->data, src_size);
+		if (!src->index)
+			die("out of memory");
+	}
+
+	delta_buf = create_delta(src->index, trg->data, trg_size, &delta_size, max_size);
 	if (!delta_buf)
 		return 0;
 
@@ -1054,8 +1074,6 @@ static void find_deltas(struct object_entry **list, int window, int depth)
 	while (--i >= 0) {
 		struct object_entry *entry = list[i];
 		struct unpacked *n = array + idx;
-		unsigned long size;
-		char type[10];
 		int j;
 
 		if (!entry->preferred_base)
@@ -1082,11 +1100,8 @@ static void find_deltas(struct object_entry **list, int window, int depth)
 		free_delta_index(n->index);
 		n->index = NULL;
 		free(n->data);
+		n->data = NULL;
 		n->entry = entry;
-		n->data = read_sha1_file(entry->sha1, type, &size);
-		if (size != entry->size)
-			die("object %s inconsistent object length (%lu vs %lu)",
-			    sha1_to_hex(entry->sha1), size, entry->size);
 
 		j = window;
 		while (--j > 0) {
@@ -1097,7 +1112,7 @@ static void find_deltas(struct object_entry **list, int window, int depth)
 			m = array + other_idx;
 			if (!m->entry)
 				break;
-			if (try_delta(n, m, m->index, depth) < 0)
+			if (try_delta(n, m, depth) < 0)
 				break;
 		}
 		/* if we made n a delta, and if n is already at max
@@ -1107,10 +1122,6 @@ static void find_deltas(struct object_entry **list, int window, int depth)
 		if (entry->delta && depth <= entry->depth)
 			continue;
 
-		n->index = create_delta_index(n->data, size);
-		if (!n->index)
-			die("out of memory");
-
 		idx++;
 		if (idx >= window)
 			idx = 0;

From 03e0ea871206e50bcd1c5167c6fc9a41c6642abb Mon Sep 17 00:00:00 2001
From: Eric Wong <normalperson@yhbt.net>
Date: Fri, 30 Jun 2006 21:42:53 -0700
Subject: [PATCH 07/14] git-svn: allow a local target directory to be specified
 for init

	git-svn init url://to/the/repo local-repo

will create the local-repo dirrectory if doesn't exist yet and
populate it as expected.

Original patch by Luca Barbato, cleaned up and made to work for
the current version of git-svn by me (Eric Wong).

Signed-off-by: Eric Wong <normalperson@yhbt.net>
Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 contrib/git-svn/git-svn.perl | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/contrib/git-svn/git-svn.perl b/contrib/git-svn/git-svn.perl
index b3d3f479da7..1e19aa19b29 100755
--- a/contrib/git-svn/git-svn.perl
+++ b/contrib/git-svn/git-svn.perl
@@ -264,9 +264,19 @@ when you have upgraded your tools and habits to use refs/remotes/$GIT_SVN
 }
 
 sub init {
-	$SVN_URL = shift or die "SVN repository location required " .
+	my $url = shift or die "SVN repository location required " .
 				"as a command-line argument\n";
-	$SVN_URL =~ s!/+$!!; # strip trailing slash
+	$url =~ s!/+$!!; # strip trailing slash
+
+	if (my $repo_path = shift) {
+		unless (-d $repo_path) {
+			mkpath([$repo_path]);
+		}
+		$GIT_DIR = $ENV{GIT_DIR} = $repo_path . "/.git";
+		init_vars();
+	}
+
+	$SVN_URL = $url;
 	unless (-d $GIT_DIR) {
 		my @init_db = ('git-init-db');
 		push @init_db, "--template=$_template" if defined $_template;

From c64ea8521bc81b52da45d3deb3a419928d4b6b29 Mon Sep 17 00:00:00 2001
From: Robin Rosenberg <robin.rosenberg@dewire.com>
Date: Sun, 2 Jul 2006 02:07:40 +0200
Subject: [PATCH 08/14] Minor documentation fixup.

Signed-off-by: Robin Rosenberg <robin.rosenberg@dewire.com>
Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 Documentation/git-commit.txt | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/Documentation/git-commit.txt b/Documentation/git-commit.txt
index 0fe66f2d0c8..517a86b238a 100644
--- a/Documentation/git-commit.txt
+++ b/Documentation/git-commit.txt
@@ -15,9 +15,9 @@ SYNOPSIS
 DESCRIPTION
 -----------
 Updates the index file for given paths, or all modified files if
-'-a' is specified, and makes a commit object.  The command
-VISUAL and EDITOR environment variables to edit the commit log
-message.
+'-a' is specified, and makes a commit object.  The command specified
+by either the VISUAL or EDITOR environment variables are used to edit
+the commit log message.
 
 Several environment variable are used during commits.  They are
 documented in gitlink:git-commit-tree[1].

From ac3bc6c1d1f549f6809d8a7d29bcaed302f3c193 Mon Sep 17 00:00:00 2001
From: Petr Baudis <pasky@suse.cz>
Date: Sat, 1 Jul 2006 23:56:26 +0200
Subject: [PATCH 09/14] Fix errno usage in connect.c

errno was used after it could've been modified by a subsequent library call.
Spotted by Morten Welinder.

Signed-off-by: Petr Baudis <pasky@suse.cz>
Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 connect.c | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/connect.c b/connect.c
index cb4656d79d3..9a87bd999a1 100644
--- a/connect.c
+++ b/connect.c
@@ -328,7 +328,7 @@ static enum protocol get_protocol(const char *name)
  */
 static int git_tcp_connect_sock(char *host)
 {
-	int sockfd = -1;
+	int sockfd = -1, saved_errno = 0;
 	char *colon, *end;
 	const char *port = STR(DEFAULT_GIT_PORT);
 	struct addrinfo hints, *ai0, *ai;
@@ -362,9 +362,12 @@ static int git_tcp_connect_sock(char *host)
 	for (ai0 = ai; ai; ai = ai->ai_next) {
 		sockfd = socket(ai->ai_family,
 				ai->ai_socktype, ai->ai_protocol);
-		if (sockfd < 0)
+		if (sockfd < 0) {
+			saved_errno = errno;
 			continue;
+		}
 		if (connect(sockfd, ai->ai_addr, ai->ai_addrlen) < 0) {
+			saved_errno = errno;
 			close(sockfd);
 			sockfd = -1;
 			continue;
@@ -375,7 +378,7 @@ static int git_tcp_connect_sock(char *host)
 	freeaddrinfo(ai0);
 
 	if (sockfd < 0)
-		die("unable to connect a socket (%s)", strerror(errno));
+		die("unable to connect a socket (%s)", strerror(saved_errno));
 
 	return sockfd;
 }
@@ -387,7 +390,7 @@ static int git_tcp_connect_sock(char *host)
  */
 static int git_tcp_connect_sock(char *host)
 {
-	int sockfd = -1;
+	int sockfd = -1, saved_errno = 0;
 	char *colon, *end;
 	char *port = STR(DEFAULT_GIT_PORT), *ep;
 	struct hostent *he;
@@ -426,8 +429,10 @@ static int git_tcp_connect_sock(char *host)
 
 	for (ap = he->h_addr_list; *ap; ap++) {
 		sockfd = socket(he->h_addrtype, SOCK_STREAM, 0);
-		if (sockfd < 0)
+		if (sockfd < 0) {
+			saved_errno = errno;
 			continue;
+		}
 
 		memset(&sa, 0, sizeof sa);
 		sa.sin_family = he->h_addrtype;
@@ -435,6 +440,7 @@ static int git_tcp_connect_sock(char *host)
 		memcpy(&sa.sin_addr, *ap, he->h_length);
 
 		if (connect(sockfd, (struct sockaddr *)&sa, sizeof sa) < 0) {
+			saved_errno = errno;
 			close(sockfd);
 			sockfd = -1;
 			continue;
@@ -443,7 +449,7 @@ static int git_tcp_connect_sock(char *host)
 	}
 
 	if (sockfd < 0)
-		die("unable to connect a socket (%s)", strerror(errno));
+		die("unable to connect a socket (%s)", strerror(saved_errno));
 
 	return sockfd;
 }

From 6631c73685bea3c6300938f4900db0d0c6bee457 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@osdl.org>
Date: Fri, 30 Jun 2006 20:21:59 -0700
Subject: [PATCH 10/14] revision.c: --full-history fix.

With history simplification, we still show merges that are required
to make the history _complete_, i.e. say that you had:

	  a
	  |
	  b
	 / \
	c   d
	|   |

and neither "a" nor "b" actually changed the file, but both "c" and "d"
did: in this case we have to leave "b" around just because otherwise there
would be no way to show the _relationship_, even if "b" itself doesn't
actually change the tree in any way what-so-ever.

It would make sense to make that further simplification if the
"--parents" flag wasn't present.  In that case the user is
literally asking for a list of commits and is not interested in
the relationship between them.

This patch also fixes a real bug.  Without this patch, the
"--parents --full-history" combination (which you'd get if you
do something like

	gitk --full-history Makefile

or similar) will actually _drop_ merges where all children are identical.
That's wrong in the --full-history case, because it means that the graph
ends up missing lots of entries.

In the process, this also should make

	git-rev-list --full-history Makefile

give just the _true_ list of all commits that changed Makefile (and
properly ignore merges that were identical in one parent), because now
we're not asking for "--parent", so we don't need the unnecessary merge
commits to keep the history together.

Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 revision.c | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/revision.c b/revision.c
index b963f2adfd9..1cf6276ad8f 100644
--- a/revision.c
+++ b/revision.c
@@ -280,7 +280,7 @@ int rev_same_tree_as_empty(struct rev_info *revs, struct tree *t1)
 static void try_to_simplify_commit(struct rev_info *revs, struct commit *commit)
 {
 	struct commit_list **pp, *parent;
-	int tree_changed = 0;
+	int tree_changed = 0, tree_same = 0;
 
 	if (!commit->tree)
 		return;
@@ -298,6 +298,7 @@ static void try_to_simplify_commit(struct rev_info *revs, struct commit *commit)
 		parse_commit(p);
 		switch (rev_compare_tree(revs, p->tree, commit->tree)) {
 		case REV_TREE_SAME:
+			tree_same = 1;
 			if (!revs->simplify_history || (p->object.flags & UNINTERESTING)) {
 				/* Even if a merge with an uninteresting
 				 * side branch brought the entire change
@@ -334,7 +335,7 @@ static void try_to_simplify_commit(struct rev_info *revs, struct commit *commit)
 		}
 		die("bad tree compare for commit %s", sha1_to_hex(commit->object.sha1));
 	}
-	if (tree_changed)
+	if (tree_changed && !tree_same)
 		commit->object.flags |= TREECHANGE;
 }
 
@@ -896,6 +897,8 @@ static int rewrite_one(struct rev_info *revs, struct commit **pp)
 		struct commit *p = *pp;
 		if (!revs->limited)
 			add_parents_to_list(revs, p, &revs->commits);
+		if (p->parents && p->parents->next)
+			return 0;
 		if (p->object.flags & (TREECHANGE | UNINTERESTING))
 			return 0;
 		if (!p->parents)
@@ -988,8 +991,15 @@ struct commit *get_revision(struct rev_info *revs)
 		    commit->parents && commit->parents->next)
 			continue;
 		if (revs->prune_fn && revs->dense) {
-			if (!(commit->object.flags & TREECHANGE))
-				continue;
+			/* Commit without changes? */
+			if (!(commit->object.flags & TREECHANGE)) {
+				/* drop merges unless we want parenthood */
+				if (!revs->parents)
+					continue;
+				/* non-merge - always ignore it */
+				if (commit->parents && !commit->parents->next)
+					continue;
+			}
 			if (revs->parents)
 				rewrite_parents(revs, commit);
 		}

From 0556a11a0df6b4119e01aa77dfb795561e62eb34 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@osdl.org>
Date: Fri, 30 Jun 2006 11:20:33 -0700
Subject: [PATCH 11/14] git object hash cleanups

This IMNSHO cleans up the object hashing.

The hash expansion is separated out into a function of its own, the hash
array (and size) names are made more obvious, and the code is generally
made to look a bit more like the object-ref hashing.

It also gets rid of "find_object()" returning an index (or negative
position if no object is found), since that is made redundant by the
simplified object rehashing. The basic operation is now "lookup_object()"
which just returns the object itself.

There's an almost unmeasurable speed increase, but more importantly, I
think the end result is more readable.

Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 object.c | 115 ++++++++++++++++++++++++++++++-------------------------
 1 file changed, 62 insertions(+), 53 deletions(-)

diff --git a/object.c b/object.c
index 31c77ea03a2..37277f94384 100644
--- a/object.c
+++ b/object.c
@@ -5,88 +5,97 @@
 #include "commit.h"
 #include "tag.h"
 
-static struct object **objs;
-static int nr_objs, obj_allocs;
+static struct object **obj_hash;
+static int nr_objs, obj_hash_size;
 
 unsigned int get_max_object_index(void)
 {
-	return obj_allocs;
+	return obj_hash_size;
 }
 
 struct object *get_indexed_object(unsigned int idx)
 {
-	return objs[idx];
+	return obj_hash[idx];
 }
 
 const char *type_names[] = {
 	"none", "blob", "tree", "commit", "bad"
 };
 
+static unsigned int hash_obj(struct object *obj, unsigned int n)
+{
+	unsigned int hash = *(unsigned int *)obj->sha1;
+	return hash % n;
+}
+
+static void insert_obj_hash(struct object *obj, struct object **hash, unsigned int size)
+{
+	int j = hash_obj(obj, size);
+
+	while (hash[j]) {
+		j++;
+		if (j >= size)
+			j = 0;
+	}
+	hash[j] = obj;
+}
+
 static int hashtable_index(const unsigned char *sha1)
 {
 	unsigned int i;
 	memcpy(&i, sha1, sizeof(unsigned int));
-	return (int)(i % obj_allocs);
-}
-
-static int find_object(const unsigned char *sha1)
-{
-	int i;
-
-	if (!objs)
-		return -1;
-
-	i = hashtable_index(sha1);
-	while (objs[i]) {
-		if (memcmp(sha1, objs[i]->sha1, 20) == 0)
-			return i;
-		i++;
-		if (i == obj_allocs)
-			i = 0;
-	}
-	return -1 - i;
+	return (int)(i % obj_hash_size);
 }
 
 struct object *lookup_object(const unsigned char *sha1)
 {
-	int pos = find_object(sha1);
-	if (pos >= 0)
-		return objs[pos];
-	return NULL;
+	int i;
+	struct object *obj;
+
+	if (!obj_hash)
+		return NULL;
+
+	i = hashtable_index(sha1);
+	while ((obj = obj_hash[i]) != NULL) {
+		if (!memcmp(sha1, obj->sha1, 20))
+			break;
+		i++;
+		if (i == obj_hash_size)
+			i = 0;
+	}
+	return obj;
+}
+
+static void grow_object_hash(void)
+{
+	int i;
+	int new_hash_size = obj_hash_size < 32 ? 32 : 2 * obj_hash_size;
+	struct object **new_hash;
+
+	new_hash = calloc(new_hash_size, sizeof(struct object *));
+	for (i = 0; i < obj_hash_size; i++) {
+		struct object *obj = obj_hash[i];
+		if (!obj)
+			continue;
+		insert_obj_hash(obj, new_hash, new_hash_size);
+	}
+	free(obj_hash);
+	obj_hash = new_hash;
+	obj_hash_size = new_hash_size;
 }
 
 void created_object(const unsigned char *sha1, struct object *obj)
 {
-	int pos;
-
 	obj->parsed = 0;
-	memcpy(obj->sha1, sha1, 20);
-	obj->type = TYPE_NONE;
 	obj->used = 0;
+	obj->type = TYPE_NONE;
+	obj->flags = 0;
+	memcpy(obj->sha1, sha1, 20);
 
-	if (obj_allocs - 1 <= nr_objs * 2) {
-		int i, count = obj_allocs;
-		obj_allocs = (obj_allocs < 32 ? 32 : 2 * obj_allocs);
-		objs = xrealloc(objs, obj_allocs * sizeof(struct object *));
-		memset(objs + count, 0, (obj_allocs - count)
-				* sizeof(struct object *));
-		for (i = 0; i < obj_allocs; i++)
-			if (objs[i]) {
-				int j = find_object(objs[i]->sha1);
-				if (j != i) {
-					j = -1 - j;
-					objs[j] = objs[i];
-					objs[i] = NULL;
-				}
-			}
-	}
+	if (obj_hash_size - 1 <= nr_objs * 2)
+		grow_object_hash();
 
-	pos = find_object(sha1);
-	if (pos >= 0)
-		die("Inserting %s twice\n", sha1_to_hex(sha1));
-	pos = -pos-1;
-
-	objs[pos] = obj;
+	insert_obj_hash(obj, obj_hash, obj_hash_size);
 	nr_objs++;
 }
 

From 02d3dca3bff6a67dead9f5b97dfe3576fe5b14e5 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@osdl.org>
Date: Sun, 2 Jul 2006 10:55:59 -0700
Subject: [PATCH 12/14] revision.c: fix "dense" under --remove-empty

It had the wrong test for whether a commit was a merge. What it did was to
say that a non-merge has exactly one parent (which sounds almost right),
but the fact is, initial trees have no parent at all, but they're
obviously not merges.

Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 revision.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/revision.c b/revision.c
index 1cf6276ad8f..880fb7bb30b 100644
--- a/revision.c
+++ b/revision.c
@@ -997,7 +997,7 @@ struct commit *get_revision(struct rev_info *revs)
 				if (!revs->parents)
 					continue;
 				/* non-merge - always ignore it */
-				if (commit->parents && !commit->parents->next)
+				if (!commit->parents || !commit->parents->next)
 					continue;
 			}
 			if (revs->parents)

From 35c636ec487d0d5c38e0ce8d3d7bc7ca42c6e5c1 Mon Sep 17 00:00:00 2001
From: Robin Rosenberg <robin.rosenberg@dewire.com>
Date: Mon, 3 Jul 2006 00:21:00 +0200
Subject: [PATCH 13/14] Empty author may be presented by svn as an empty string
 or a null value.

Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 git-svnimport.perl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/git-svnimport.perl b/git-svnimport.perl
index 38ac732ca9b..26dc4547953 100755
--- a/git-svnimport.perl
+++ b/git-svnimport.perl
@@ -534,7 +534,7 @@ sub commit {
 	my($author_name,$author_email,$dest);
 	my(@old,@new,@parents);
 
-	if (not defined $author) {
+	if (not defined $author or $author eq "") {
 		$author_name = $author_email = "unknown";
 	} elsif (defined $users_file) {
 		die "User $author is not listed in $users_file\n"

From 8fced61cbc32f0c4b81a3dcecfeb40b7d96339ce Mon Sep 17 00:00:00 2001
From: Junio C Hamano <junkio@cox.net>
Date: Mon, 3 Jul 2006 00:53:13 -0700
Subject: [PATCH 14/14] Makefile: tighten git-http-{fetch,push} dependencies

Although our "git-%$X:" implicit target had dependency on
$(GITLIBS) which included xdiff/lib.a, git-http-{fetch,push} had
their own building rules and with an obsolete dependency on
$(LIB_FILES).  Update the rules to depend on $(GITLIBS), to make
parallel build work correctly.

Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 Makefile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Makefile b/Makefile
index cde619c498d..76abcc473b4 100644
--- a/Makefile
+++ b/Makefile
@@ -587,11 +587,11 @@ git-ssh-push$X: rsh.o
 git-imap-send$X: imap-send.o $(LIB_FILE)
 
 http.o http-fetch.o http-push.o: http.h
-git-http-fetch$X: fetch.o http.o http-fetch.o $(LIB_FILE)
+git-http-fetch$X: fetch.o http.o http-fetch.o $(GITLIBS)
 	$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) \
 		$(LIBS) $(CURL_LIBCURL) $(EXPAT_LIBEXPAT)
 
-git-http-push$X: revision.o http.o http-push.o $(LIB_FILE)
+git-http-push$X: revision.o http.o http-push.o $(GITLIBS)
 	$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) \
 		$(LIBS) $(CURL_LIBCURL) $(EXPAT_LIBEXPAT)