From a39c14af82e973ba1502888e89585b7501721ede Mon Sep 17 00:00:00 2001
From: Jeff King <peff@peff.net>
Date: Wed, 15 Jan 2014 03:26:33 -0500
Subject: [PATCH 1/5] interpret_branch_name: factor out upstream handling

This function checks a few different @{}-constructs. The
early part checks for and dispatches us to helpers for each
construct, but the code for handling @{upstream} is inline.

Let's factor this out into its own function. This makes
interpret_branch_name more readable, and will make it much
simpler to further refactor the function in future patches.

While we're at it, let's also break apart the refactored
code into a few helper functions. These will be useful if we
eventually implement similar @{upstream}-like constructs.

Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 sha1_name.c | 83 +++++++++++++++++++++++++++++++++--------------------
 1 file changed, 52 insertions(+), 31 deletions(-)

diff --git a/sha1_name.c b/sha1_name.c
index e9c299943b8..03c574e2959 100644
--- a/sha1_name.c
+++ b/sha1_name.c
@@ -1048,6 +1048,54 @@ static int reinterpret(const char *name, int namelen, int len, struct strbuf *bu
 	return ret - used + len;
 }
 
+static void set_shortened_ref(struct strbuf *buf, const char *ref)
+{
+	char *s = shorten_unambiguous_ref(ref, 0);
+	strbuf_reset(buf);
+	strbuf_addstr(buf, s);
+	free(s);
+}
+
+static const char *get_upstream_branch(const char *branch_buf, int len)
+{
+	char *branch = xstrndup(branch_buf, len);
+	struct branch *upstream = branch_get(*branch ? branch : NULL);
+
+	/*
+	 * Upstream can be NULL only if branch refers to HEAD and HEAD
+	 * points to something different than a branch.
+	 */
+	if (!upstream)
+		die(_("HEAD does not point to a branch"));
+	if (!upstream->merge || !upstream->merge[0]->dst) {
+		if (!ref_exists(upstream->refname))
+			die(_("No such branch: '%s'"), branch);
+		if (!upstream->merge) {
+			die(_("No upstream configured for branch '%s'"),
+				upstream->name);
+		}
+		die(
+			_("Upstream branch '%s' not stored as a remote-tracking branch"),
+			upstream->merge[0]->src);
+	}
+	free(branch);
+
+	return upstream->merge[0]->dst;
+}
+
+static int interpret_upstream_mark(const char *name, int namelen,
+				   int at, struct strbuf *buf)
+{
+	int len;
+
+	len = upstream_mark(name + at, namelen - at);
+	if (!len)
+		return -1;
+
+	set_shortened_ref(buf, get_upstream_branch(name, at));
+	return len + at;
+}
+
 /*
  * This reads short-hand syntax that not only evaluates to a commit
  * object name, but also can act as if the end user spelled the name
@@ -1072,9 +1120,7 @@ static int reinterpret(const char *name, int namelen, int len, struct strbuf *bu
 int interpret_branch_name(const char *name, int namelen, struct strbuf *buf)
 {
 	char *cp;
-	struct branch *upstream;
 	int len = interpret_nth_prior_checkout(name, buf);
-	int tmp_len;
 
 	if (!namelen)
 		namelen = strlen(name);
@@ -1096,36 +1142,11 @@ int interpret_branch_name(const char *name, int namelen, struct strbuf *buf)
 	if (len > 0)
 		return reinterpret(name, namelen, len, buf);
 
-	tmp_len = upstream_mark(cp, namelen - (cp - name));
-	if (!tmp_len)
-		return -1;
+	len = interpret_upstream_mark(name, namelen, cp - name, buf);
+	if (len > 0)
+		return len;
 
-	len = cp + tmp_len - name;
-	cp = xstrndup(name, cp - name);
-	upstream = branch_get(*cp ? cp : NULL);
-	/*
-	 * Upstream can be NULL only if cp refers to HEAD and HEAD
-	 * points to something different than a branch.
-	 */
-	if (!upstream)
-		die(_("HEAD does not point to a branch"));
-	if (!upstream->merge || !upstream->merge[0]->dst) {
-		if (!ref_exists(upstream->refname))
-			die(_("No such branch: '%s'"), cp);
-		if (!upstream->merge) {
-			die(_("No upstream configured for branch '%s'"),
-				upstream->name);
-		}
-		die(
-			_("Upstream branch '%s' not stored as a remote-tracking branch"),
-			upstream->merge[0]->src);
-	}
-	free(cp);
-	cp = shorten_unambiguous_ref(upstream->merge[0]->dst, 0);
-	strbuf_reset(buf);
-	strbuf_addstr(buf, cp);
-	free(cp);
-	return len;
+	return -1;
 }
 
 int strbuf_branchname(struct strbuf *sb, const char *name)

From f278f40f09fd5a4e8e091be489bcb54230d2e3de Mon Sep 17 00:00:00 2001
From: Jeff King <peff@peff.net>
Date: Wed, 15 Jan 2014 03:27:32 -0500
Subject: [PATCH 2/5] interpret_branch_name: rename "cp" variable to "at"

In the original version of this function, "cp" acted as a
pointer to many different things. Since the refactoring in
the last patch, it only marks the at-sign in the string.
Let's use a more descriptive variable name.

Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 sha1_name.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/sha1_name.c b/sha1_name.c
index 03c574e2959..47a71e310e9 100644
--- a/sha1_name.c
+++ b/sha1_name.c
@@ -1119,7 +1119,7 @@ static int interpret_upstream_mark(const char *name, int namelen,
  */
 int interpret_branch_name(const char *name, int namelen, struct strbuf *buf)
 {
-	char *cp;
+	char *at;
 	int len = interpret_nth_prior_checkout(name, buf);
 
 	if (!namelen)
@@ -1134,15 +1134,15 @@ int interpret_branch_name(const char *name, int namelen, struct strbuf *buf)
 			return reinterpret(name, namelen, len, buf);
 	}
 
-	cp = strchr(name, '@');
-	if (!cp)
+	at = strchr(name, '@');
+	if (!at)
 		return -1;
 
-	len = interpret_empty_at(name, namelen, cp - name, buf);
+	len = interpret_empty_at(name, namelen, at - name, buf);
 	if (len > 0)
 		return reinterpret(name, namelen, len, buf);
 
-	len = interpret_upstream_mark(name, namelen, cp - name, buf);
+	len = interpret_upstream_mark(name, namelen, at - name, buf);
 	if (len > 0)
 		return len;
 

From 8cd4249c4cdb19fce489a3f6ba31f499f4331341 Mon Sep 17 00:00:00 2001
From: Jeff King <peff@peff.net>
Date: Wed, 15 Jan 2014 03:31:57 -0500
Subject: [PATCH 3/5] interpret_branch_name: always respect "namelen" parameter

interpret_branch_name gets passed a "name" buffer to parse,
along with a "namelen" parameter representing its length. If
"namelen" is zero, we fallback to the NUL-terminated
string-length of "name".

However, it does not necessarily follow that if we have
gotten a non-zero "namelen", it is the NUL-terminated
string-length of "name". E.g., when get_sha1() is parsing
"foo:bar", we will be asked to operate only on the first
three characters.

Yet in interpret_branch_name and its helpers, we use string
functions like strchr() to operate on "name", looking past
the length we were given.  This can result in us mis-parsing
object names.  We should instead be limiting our search to
"namelen" bytes.

There are three distinct types of object names this patch
addresses:

  - The intrepret_empty_at helper uses strchr to find the
    next @-expression after our potential empty-at.  In an
    expression like "@:foo@bar", it erroneously thinks that
    the second "@" is relevant, even if we were asked only
    to look at the first character. This case is easy to
    trigger (and we test it in this patch).

  - When finding the initial @-mark for @{upstream}, we use
    strchr.  This means we might treat "foo:@{upstream}" as
    the upstream for "foo:", even though we were asked only
    to look at "foo". We cannot test this one in practice,
    because it is masked by another bug (which is fixed in
    the next patch).

  - The interpret_nth_prior_checkout helper did not receive
    the name length at all. This turns out not to be a
    problem in practice, though, because its parsing is so
    limited: it always starts from the far-left of the
    string, and will not tolerate a colon (which is
    currently the only way to get a smaller-than-strlen
    "namelen"). However, it's still worth fixing to make the
    code more obviously correct, and to future-proof us
    against callers with more exotic buffers.

Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 sha1_name.c                | 17 ++++++++++-------
 t/t1508-at-combinations.sh | 15 ++++++++++++++-
 2 files changed, 24 insertions(+), 8 deletions(-)

diff --git a/sha1_name.c b/sha1_name.c
index 47a71e310e9..afdff2f1d55 100644
--- a/sha1_name.c
+++ b/sha1_name.c
@@ -430,7 +430,7 @@ static inline int upstream_mark(const char *string, int len)
 }
 
 static int get_sha1_1(const char *name, int len, unsigned char *sha1, unsigned lookup_flags);
-static int interpret_nth_prior_checkout(const char *name, struct strbuf *buf);
+static int interpret_nth_prior_checkout(const char *name, int namelen, struct strbuf *buf);
 
 static int get_sha1_basic(const char *str, int len, unsigned char *sha1)
 {
@@ -492,7 +492,7 @@ static int get_sha1_basic(const char *str, int len, unsigned char *sha1)
 		struct strbuf buf = STRBUF_INIT;
 		int detached;
 
-		if (interpret_nth_prior_checkout(str, &buf) > 0) {
+		if (interpret_nth_prior_checkout(str, len, &buf) > 0) {
 			detached = (buf.len == 40 && !get_sha1_hex(buf.buf, sha1));
 			strbuf_release(&buf);
 			if (detached)
@@ -931,7 +931,8 @@ static int grab_nth_branch_switch(unsigned char *osha1, unsigned char *nsha1,
  * Parse @{-N} syntax, return the number of characters parsed
  * if successful; otherwise signal an error with negative value.
  */
-static int interpret_nth_prior_checkout(const char *name, struct strbuf *buf)
+static int interpret_nth_prior_checkout(const char *name, int namelen,
+					struct strbuf *buf)
 {
 	long nth;
 	int retval;
@@ -939,9 +940,11 @@ static int interpret_nth_prior_checkout(const char *name, struct strbuf *buf)
 	const char *brace;
 	char *num_end;
 
+	if (namelen < 4)
+		return -1;
 	if (name[0] != '@' || name[1] != '{' || name[2] != '-')
 		return -1;
-	brace = strchr(name, '}');
+	brace = memchr(name, '}', namelen);
 	if (!brace)
 		return -1;
 	nth = strtol(name + 3, &num_end, 10);
@@ -1014,7 +1017,7 @@ static int interpret_empty_at(const char *name, int namelen, int len, struct str
 		return -1;
 
 	/* make sure it's a single @, or @@{.*}, not @foo */
-	next = strchr(name + len + 1, '@');
+	next = memchr(name + len + 1, '@', namelen - len - 1);
 	if (next && next[1] != '{')
 		return -1;
 	if (!next)
@@ -1120,7 +1123,7 @@ static int interpret_upstream_mark(const char *name, int namelen,
 int interpret_branch_name(const char *name, int namelen, struct strbuf *buf)
 {
 	char *at;
-	int len = interpret_nth_prior_checkout(name, buf);
+	int len = interpret_nth_prior_checkout(name, namelen, buf);
 
 	if (!namelen)
 		namelen = strlen(name);
@@ -1134,7 +1137,7 @@ int interpret_branch_name(const char *name, int namelen, struct strbuf *buf)
 			return reinterpret(name, namelen, len, buf);
 	}
 
-	at = strchr(name, '@');
+	at = memchr(name, '@', namelen);
 	if (!at)
 		return -1;
 
diff --git a/t/t1508-at-combinations.sh b/t/t1508-at-combinations.sh
index ceb844985f9..078e1195dfa 100755
--- a/t/t1508-at-combinations.sh
+++ b/t/t1508-at-combinations.sh
@@ -9,8 +9,11 @@ check() {
 		if test '$2' = 'commit'
 		then
 			git log -1 --format=%s '$1' >actual
-		else
+		elif test '$2' = 'ref'
+		then
 			git rev-parse --symbolic-full-name '$1' >actual
+		else
+			git cat-file -p '$1' >actual
 		fi &&
 		test_cmp expect actual
 	"
@@ -82,4 +85,14 @@ check HEAD ref refs/heads/old-branch
 check "HEAD@{1}" commit new-two
 check "@{1}" commit old-one
 
+test_expect_success 'create path with @' '
+	echo content >normal &&
+	echo content >fun@ny &&
+	git add normal fun@ny &&
+	git commit -m "funny path"
+'
+
+check "@:normal" blob content
+check "@:fun@ny" blob content
+
 test_done

From 3f6eb30f1dbb6f8f715c9121bba43f2a1d294e28 Mon Sep 17 00:00:00 2001
From: Jeff King <peff@peff.net>
Date: Wed, 15 Jan 2014 03:37:23 -0500
Subject: [PATCH 4/5] interpret_branch_name: avoid @{upstream} past colon

get_sha1() cannot currently parse a valid object name like
"HEAD:@{upstream}" (assuming that such an oddly named file
exists in the HEAD commit). It takes two passes to parse the
string:

  1. It first considers the whole thing as a ref, which
     results in looking for the upstream of "HEAD:".

  2. It finds the colon, parses "HEAD" as a tree-ish, and then
     finds the path "@{upstream}" in the tree.

For a path that looks like a normal reflog (e.g.,
"HEAD:@{yesterday}"), the first pass is a no-op. We try to
dwim_ref("HEAD:"), that returns zero refs, and we proceed
with colon-parsing.

For "HEAD:@{upstream}", though, the first pass ends up in
interpret_upstream_mark, which tries to find the branch
"HEAD:". When it sees that the branch does not exist, it
actually dies rather than returning an error to the caller.
As a result, we never make it to the second pass.

One obvious way of fixing this would be to teach
interpret_upstream_mark to simply report "no, this isn't an
upstream" in such a case. However, that would make the
error-reporting for legitimate upstream cases significantly
worse. Something like "bogus@{upstream}" would simply report
"unknown revision: bogus@{upstream}", while the current code
diagnoses a wide variety of possible misconfigurations (no
such branch, branch exists but does not have upstream, etc).

However, we can take advantage of the fact that a branch
name cannot contain a colon. Therefore even if we find an
upstream mark, any prefix with a colon must mean that
the upstream mark we found is actually a pathname, and
should be disregarded completely. This patch implements that
logic.

Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 sha1_name.c                   |  3 +++
 t/t1507-rev-parse-upstream.sh | 16 ++++++++++++++++
 2 files changed, 19 insertions(+)

diff --git a/sha1_name.c b/sha1_name.c
index afdff2f1d55..26a5811c846 100644
--- a/sha1_name.c
+++ b/sha1_name.c
@@ -1095,6 +1095,9 @@ static int interpret_upstream_mark(const char *name, int namelen,
 	if (!len)
 		return -1;
 
+	if (memchr(name, ':', at))
+		return -1;
+
 	set_shortened_ref(buf, get_upstream_branch(name, at));
 	return len + at;
 }
diff --git a/t/t1507-rev-parse-upstream.sh b/t/t1507-rev-parse-upstream.sh
index 2a19e797eb7..cace1ca4ad4 100755
--- a/t/t1507-rev-parse-upstream.sh
+++ b/t/t1507-rev-parse-upstream.sh
@@ -210,4 +210,20 @@ test_expect_success 'log -g other@{u}@{now}' '
 	test_cmp expect actual
 '
 
+test_expect_success '@{reflog}-parsing does not look beyond colon' '
+	echo content >@{yesterday} &&
+	git add @{yesterday} &&
+	git commit -m "funny reflog file" &&
+	git hash-object @{yesterday} >expect &&
+	git rev-parse HEAD:@{yesterday} >actual
+'
+
+test_expect_success '@{upstream}-parsing does not look beyond colon' '
+	echo content >@{upstream} &&
+	git add @{upstream} &&
+	git commit -m "funny upstream file" &&
+	git hash-object @{upstream} >expect &&
+	git rev-parse HEAD:@{upstream} >actual
+'
+
 test_done

From 9892d5d4541d93a8a6a4fd9ac4178d71d0d308e3 Mon Sep 17 00:00:00 2001
From: Jeff King <peff@peff.net>
Date: Wed, 15 Jan 2014 03:40:46 -0500
Subject: [PATCH 5/5] interpret_branch_name: find all possible @-marks

When we parse a string like "foo@{upstream}", we look for
the first "@"-sign, and check to see if it is an upstream
mark. However, since branch names can contain an @, we may
also see "@foo@{upstream}". In this case, we check only the
first @, and ignore the second. As a result, we do not find
the upstream.

We can solve this by iterating through all @-marks in the
string, and seeing if any is a legitimate upstream or
empty-at mark.

Another strategy would be to parse from the right-hand side
of the string. However, that does not work for the
"empty_at" case, which allows "@@{upstream}". We need to
find the left-most one in this case (and we then recurse as
"HEAD@{upstream}").

Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 sha1_name.c                   | 20 +++++++++++---------
 t/t1507-rev-parse-upstream.sh | 21 +++++++++++++++++++++
 2 files changed, 32 insertions(+), 9 deletions(-)

diff --git a/sha1_name.c b/sha1_name.c
index 26a5811c846..15854e35ece 100644
--- a/sha1_name.c
+++ b/sha1_name.c
@@ -1126,6 +1126,7 @@ static int interpret_upstream_mark(const char *name, int namelen,
 int interpret_branch_name(const char *name, int namelen, struct strbuf *buf)
 {
 	char *at;
+	const char *start;
 	int len = interpret_nth_prior_checkout(name, namelen, buf);
 
 	if (!namelen)
@@ -1140,17 +1141,18 @@ int interpret_branch_name(const char *name, int namelen, struct strbuf *buf)
 			return reinterpret(name, namelen, len, buf);
 	}
 
-	at = memchr(name, '@', namelen);
-	if (!at)
-		return -1;
+	for (start = name;
+	     (at = memchr(start, '@', namelen - (start - name)));
+	     start = at + 1) {
 
-	len = interpret_empty_at(name, namelen, at - name, buf);
-	if (len > 0)
-		return reinterpret(name, namelen, len, buf);
+		len = interpret_empty_at(name, namelen, at - name, buf);
+		if (len > 0)
+			return reinterpret(name, namelen, len, buf);
 
-	len = interpret_upstream_mark(name, namelen, at - name, buf);
-	if (len > 0)
-		return len;
+		len = interpret_upstream_mark(name, namelen, at - name, buf);
+		if (len > 0)
+			return len;
+	}
 
 	return -1;
 }
diff --git a/t/t1507-rev-parse-upstream.sh b/t/t1507-rev-parse-upstream.sh
index cace1ca4ad4..178694ee639 100755
--- a/t/t1507-rev-parse-upstream.sh
+++ b/t/t1507-rev-parse-upstream.sh
@@ -17,6 +17,9 @@ test_expect_success 'setup' '
 	 test_commit 4 &&
 	 git branch --track my-side origin/side &&
 	 git branch --track local-master master &&
+	 git branch --track fun@ny origin/side &&
+	 git branch --track @funny origin/side &&
+	 git branch --track funny@ origin/side &&
 	 git remote add -t master master-only .. &&
 	 git fetch master-only &&
 	 git branch bad-upstream &&
@@ -54,6 +57,24 @@ test_expect_success 'my-side@{upstream} resolves to correct full name' '
 	test refs/remotes/origin/side = "$(full_name my-side@{u})"
 '
 
+test_expect_success 'upstream of branch with @ in middle' '
+	full_name fun@ny@{u} >actual &&
+	echo refs/remotes/origin/side >expect &&
+	test_cmp expect actual
+'
+
+test_expect_success 'upstream of branch with @ at start' '
+	full_name @funny@{u} >actual &&
+	echo refs/remotes/origin/side >expect &&
+	test_cmp expect actual
+'
+
+test_expect_success 'upstream of branch with @ at end' '
+	full_name funny@@{u} >actual &&
+	echo refs/remotes/origin/side >expect &&
+	test_cmp expect actual
+'
+
 test_expect_success 'refs/heads/my-side@{upstream} does not resolve to my-side{upstream}' '
 	test_must_fail full_name refs/heads/my-side@{upstream}
 '