1
0
mirror of https://github.com/git/git.git synced 2025-02-06 10:03:06 +00:00

Merge branch 'tb/unsafe-hash-cleanup'

The API around choosing to use unsafe variant of SHA-1
implementation has been updated in an attempt to make it harder to
abuse.

* tb/unsafe-hash-cleanup:
  hash.h: drop unsafe_ function variants
  csum-file: introduce hashfile_checkpoint_init()
  t/helper/test-hash.c: use unsafe_hash_algo()
  csum-file.c: use unsafe_hash_algo()
  hash.h: introduce `unsafe_hash_algo()`
  csum-file.c: extract algop from hashfile_checksum_valid()
  csum-file: store the hash algorithm as a struct field
  t/helper/test-tool: implement sha1-unsafe helper
This commit is contained in:
Junio C Hamano 2025-02-03 10:23:32 -08:00
commit caf17423d3
12 changed files with 107 additions and 70 deletions

View File

@ -1106,7 +1106,7 @@ static void stream_blob(uintmax_t len, struct object_id *oidout, uintmax_t mark)
|| (pack_size + PACK_SIZE_THRESHOLD + len) < pack_size)
cycle_packfile();
the_hash_algo->unsafe_init_fn(&checkpoint.ctx);
hashfile_checkpoint_init(pack_file, &checkpoint);
hashfile_checkpoint(pack_file, &checkpoint);
offset = checkpoint.offset;

View File

@ -261,7 +261,7 @@ static int deflate_blob_to_pack(struct bulk_checkin_packfile *state,
git_hash_ctx ctx;
unsigned char obuf[16384];
unsigned header_len;
struct hashfile_checkpoint checkpoint = {0};
struct hashfile_checkpoint checkpoint;
struct pack_idx_entry *idx = NULL;
seekback = lseek(fd, 0, SEEK_CUR);
@ -272,12 +272,15 @@ static int deflate_blob_to_pack(struct bulk_checkin_packfile *state,
OBJ_BLOB, size);
the_hash_algo->init_fn(&ctx);
the_hash_algo->update_fn(&ctx, obuf, header_len);
the_hash_algo->unsafe_init_fn(&checkpoint.ctx);
/* Note: idx is non-NULL when we are writing */
if ((flags & HASH_WRITE_OBJECT) != 0)
if ((flags & HASH_WRITE_OBJECT) != 0) {
CALLOC_ARRAY(idx, 1);
prepare_to_stream(state, flags);
hashfile_checkpoint_init(state->f, &checkpoint);
}
already_hashed_to = 0;
while (1) {

View File

@ -50,7 +50,7 @@ void hashflush(struct hashfile *f)
if (offset) {
if (!f->skip_hash)
the_hash_algo->unsafe_update_fn(&f->ctx, f->buffer, offset);
f->algop->update_fn(&f->ctx, f->buffer, offset);
flush(f, f->buffer, offset);
f->offset = 0;
}
@ -71,14 +71,14 @@ int finalize_hashfile(struct hashfile *f, unsigned char *result,
hashflush(f);
if (f->skip_hash)
hashclr(f->buffer, the_repository->hash_algo);
hashclr(f->buffer, f->algop);
else
the_hash_algo->unsafe_final_fn(f->buffer, &f->ctx);
f->algop->final_fn(f->buffer, &f->ctx);
if (result)
hashcpy(result, f->buffer, the_repository->hash_algo);
hashcpy(result, f->buffer, f->algop);
if (flags & CSUM_HASH_IN_STREAM)
flush(f, f->buffer, the_hash_algo->rawsz);
flush(f, f->buffer, f->algop->rawsz);
if (flags & CSUM_FSYNC)
fsync_component_or_die(component, f->fd, f->name);
if (flags & CSUM_CLOSE) {
@ -128,7 +128,7 @@ void hashwrite(struct hashfile *f, const void *buf, unsigned int count)
* f->offset is necessarily zero.
*/
if (!f->skip_hash)
the_hash_algo->unsafe_update_fn(&f->ctx, buf, nr);
f->algop->update_fn(&f->ctx, buf, nr);
flush(f, buf, nr);
} else {
/*
@ -174,7 +174,9 @@ static struct hashfile *hashfd_internal(int fd, const char *name,
f->name = name;
f->do_crc = 0;
f->skip_hash = 0;
the_hash_algo->unsafe_init_fn(&f->ctx);
f->algop = unsafe_hash_algo(the_hash_algo);
f->algop->init_fn(&f->ctx);
f->buffer_len = buffer_len;
f->buffer = xmalloc(buffer_len);
@ -204,11 +206,18 @@ struct hashfile *hashfd_throughput(int fd, const char *name, struct progress *tp
return hashfd_internal(fd, name, tp, 8 * 1024);
}
void hashfile_checkpoint_init(struct hashfile *f,
struct hashfile_checkpoint *checkpoint)
{
memset(checkpoint, 0, sizeof(*checkpoint));
f->algop->init_fn(&checkpoint->ctx);
}
void hashfile_checkpoint(struct hashfile *f, struct hashfile_checkpoint *checkpoint)
{
hashflush(f);
checkpoint->offset = f->total;
the_hash_algo->unsafe_clone_fn(&checkpoint->ctx, &f->ctx);
f->algop->clone_fn(&checkpoint->ctx, &f->ctx);
}
int hashfile_truncate(struct hashfile *f, struct hashfile_checkpoint *checkpoint)
@ -219,7 +228,7 @@ int hashfile_truncate(struct hashfile *f, struct hashfile_checkpoint *checkpoint
lseek(f->fd, offset, SEEK_SET) != offset)
return -1;
f->total = offset;
the_hash_algo->unsafe_clone_fn(&f->ctx, &checkpoint->ctx);
f->algop->clone_fn(&f->ctx, &checkpoint->ctx);
f->offset = 0; /* hashflush() was called in checkpoint */
return 0;
}
@ -240,14 +249,15 @@ int hashfile_checksum_valid(const unsigned char *data, size_t total_len)
{
unsigned char got[GIT_MAX_RAWSZ];
git_hash_ctx ctx;
size_t data_len = total_len - the_hash_algo->rawsz;
const struct git_hash_algo *algop = unsafe_hash_algo(the_hash_algo);
size_t data_len = total_len - algop->rawsz;
if (total_len < the_hash_algo->rawsz)
if (total_len < algop->rawsz)
return 0; /* say "too short"? */
the_hash_algo->unsafe_init_fn(&ctx);
the_hash_algo->unsafe_update_fn(&ctx, data, data_len);
the_hash_algo->unsafe_final_fn(got, &ctx);
algop->init_fn(&ctx);
algop->update_fn(&ctx, data, data_len);
algop->final_fn(got, &ctx);
return hasheq(got, data + data_len, the_repository->hash_algo);
return hasheq(got, data + data_len, algop);
}

View File

@ -20,6 +20,7 @@ struct hashfile {
size_t buffer_len;
unsigned char *buffer;
unsigned char *check_buffer;
const struct git_hash_algo *algop;
/**
* If non-zero, skip_hash indicates that we should
@ -35,6 +36,7 @@ struct hashfile_checkpoint {
git_hash_ctx ctx;
};
void hashfile_checkpoint_init(struct hashfile *, struct hashfile_checkpoint *);
void hashfile_checkpoint(struct hashfile *, struct hashfile_checkpoint *);
int hashfile_truncate(struct hashfile *, struct hashfile_checkpoint *);

28
hash.h
View File

@ -282,21 +282,6 @@ struct git_hash_algo {
/* The hash finalization function for object IDs. */
git_hash_final_oid_fn final_oid_fn;
/* The non-cryptographic hash initialization function. */
git_hash_init_fn unsafe_init_fn;
/* The non-cryptographic hash context cloning function. */
git_hash_clone_fn unsafe_clone_fn;
/* The non-cryptographic hash update function. */
git_hash_update_fn unsafe_update_fn;
/* The non-cryptographic hash finalization function. */
git_hash_final_fn unsafe_final_fn;
/* The non-cryptographic hash finalization function. */
git_hash_final_oid_fn unsafe_final_oid_fn;
/* The OID of the empty tree. */
const struct object_id *empty_tree;
@ -305,6 +290,9 @@ struct git_hash_algo {
/* The all-zeros OID. */
const struct object_id *null_oid;
/* The unsafe variant of this hash function, if one exists. */
const struct git_hash_algo *unsafe;
};
extern const struct git_hash_algo hash_algos[GIT_HASH_NALGOS];
@ -320,9 +308,17 @@ int hash_algo_by_length(int len);
/* Identical, except for a pointer to struct git_hash_algo. */
static inline int hash_algo_by_ptr(const struct git_hash_algo *p)
{
return p - hash_algos;
size_t i;
for (i = 0; i < GIT_HASH_NALGOS; i++) {
const struct git_hash_algo *algop = &hash_algos[i];
if (p == algop)
return i;
}
return GIT_HASH_UNKNOWN;
}
const struct git_hash_algo *unsafe_hash_algo(const struct git_hash_algo *algop);
const struct object_id *null_oid(void);
static inline int hashcmp(const unsigned char *sha1, const unsigned char *sha2, const struct git_hash_algo *algop)

View File

@ -202,6 +202,22 @@ static void git_hash_unknown_final_oid(struct object_id *oid UNUSED,
BUG("trying to finalize unknown hash");
}
static const struct git_hash_algo sha1_unsafe_algo = {
.name = "sha1",
.format_id = GIT_SHA1_FORMAT_ID,
.rawsz = GIT_SHA1_RAWSZ,
.hexsz = GIT_SHA1_HEXSZ,
.blksz = GIT_SHA1_BLKSZ,
.init_fn = git_hash_sha1_init_unsafe,
.clone_fn = git_hash_sha1_clone_unsafe,
.update_fn = git_hash_sha1_update_unsafe,
.final_fn = git_hash_sha1_final_unsafe,
.final_oid_fn = git_hash_sha1_final_oid_unsafe,
.empty_tree = &empty_tree_oid,
.empty_blob = &empty_blob_oid,
.null_oid = &null_oid_sha1,
};
const struct git_hash_algo hash_algos[GIT_HASH_NALGOS] = {
{
.name = NULL,
@ -214,11 +230,6 @@ const struct git_hash_algo hash_algos[GIT_HASH_NALGOS] = {
.update_fn = git_hash_unknown_update,
.final_fn = git_hash_unknown_final,
.final_oid_fn = git_hash_unknown_final_oid,
.unsafe_init_fn = git_hash_unknown_init,
.unsafe_clone_fn = git_hash_unknown_clone,
.unsafe_update_fn = git_hash_unknown_update,
.unsafe_final_fn = git_hash_unknown_final,
.unsafe_final_oid_fn = git_hash_unknown_final_oid,
.empty_tree = NULL,
.empty_blob = NULL,
.null_oid = NULL,
@ -234,11 +245,7 @@ const struct git_hash_algo hash_algos[GIT_HASH_NALGOS] = {
.update_fn = git_hash_sha1_update,
.final_fn = git_hash_sha1_final,
.final_oid_fn = git_hash_sha1_final_oid,
.unsafe_init_fn = git_hash_sha1_init_unsafe,
.unsafe_clone_fn = git_hash_sha1_clone_unsafe,
.unsafe_update_fn = git_hash_sha1_update_unsafe,
.unsafe_final_fn = git_hash_sha1_final_unsafe,
.unsafe_final_oid_fn = git_hash_sha1_final_oid_unsafe,
.unsafe = &sha1_unsafe_algo,
.empty_tree = &empty_tree_oid,
.empty_blob = &empty_blob_oid,
.null_oid = &null_oid_sha1,
@ -254,11 +261,6 @@ const struct git_hash_algo hash_algos[GIT_HASH_NALGOS] = {
.update_fn = git_hash_sha256_update,
.final_fn = git_hash_sha256_final,
.final_oid_fn = git_hash_sha256_final_oid,
.unsafe_init_fn = git_hash_sha256_init,
.unsafe_clone_fn = git_hash_sha256_clone,
.unsafe_update_fn = git_hash_sha256_update,
.unsafe_final_fn = git_hash_sha256_final,
.unsafe_final_oid_fn = git_hash_sha256_final_oid,
.empty_tree = &empty_tree_oid_sha256,
.empty_blob = &empty_blob_oid_sha256,
.null_oid = &null_oid_sha256,
@ -305,6 +307,15 @@ int hash_algo_by_length(int len)
return GIT_HASH_UNKNOWN;
}
const struct git_hash_algo *unsafe_hash_algo(const struct git_hash_algo *algop)
{
/* If we have a faster "unsafe" implementation, use that. */
if (algop->unsafe)
return algop->unsafe;
/* Otherwise use the default one. */
return algop;
}
/*
* This is meant to hold a *small* number of objects that you would
* want repo_read_object_file() to be able to return, but yet you do not want

View File

@ -1,7 +1,7 @@
#include "test-tool.h"
#include "hex.h"
int cmd_hash_impl(int ac, const char **av, int algo)
int cmd_hash_impl(int ac, const char **av, int algo, int unsafe)
{
git_hash_ctx ctx;
unsigned char hash[GIT_MAX_HEXSZ];
@ -9,6 +9,8 @@ int cmd_hash_impl(int ac, const char **av, int algo)
int binary = 0;
char *buffer;
const struct git_hash_algo *algop = &hash_algos[algo];
if (unsafe)
algop = unsafe_hash_algo(algop);
if (ac == 2) {
if (!strcmp(av[1], "-b"))

View File

@ -3,7 +3,7 @@
int cmd__sha1(int ac, const char **av)
{
return cmd_hash_impl(ac, av, GIT_HASH_SHA1);
return cmd_hash_impl(ac, av, GIT_HASH_SHA1, 0);
}
int cmd__sha1_is_sha1dc(int argc UNUSED, const char **argv UNUSED)
@ -13,3 +13,8 @@ int cmd__sha1_is_sha1dc(int argc UNUSED, const char **argv UNUSED)
#endif
return 1;
}
int cmd__sha1_unsafe(int ac, const char **av)
{
return cmd_hash_impl(ac, av, GIT_HASH_SHA1, 1);
}

View File

@ -3,25 +3,31 @@
dd if=/dev/zero bs=1048576 count=100 2>/dev/null |
/usr/bin/time t/helper/test-tool sha1 >/dev/null
dd if=/dev/zero bs=1048576 count=100 2>/dev/null |
/usr/bin/time t/helper/test-tool sha1-unsafe >/dev/null
while read expect cnt pfx
do
case "$expect" in '#'*) continue ;; esac
actual=$(
{
test -z "$pfx" || echo "$pfx"
dd if=/dev/zero bs=1048576 count=$cnt 2>/dev/null |
perl -pe 'y/\000/g/'
} | ./t/helper/test-tool sha1 $cnt
)
if test "$expect" = "$actual"
then
echo "OK: $expect $cnt $pfx"
else
echo >&2 "OOPS: $cnt"
echo >&2 "expect: $expect"
echo >&2 "actual: $actual"
exit 1
fi
for sha1 in sha1 sha1-unsafe
do
actual=$(
{
test -z "$pfx" || echo "$pfx"
dd if=/dev/zero bs=1048576 count=$cnt 2>/dev/null |
perl -pe 'y/\000/g/'
} | ./t/helper/test-tool $sha1 $cnt
)
if test "$expect" = "$actual"
then
echo "OK ($sha1): $expect $cnt $pfx"
else
echo >&2 "OOPS ($sha1): $cnt"
echo >&2 "expect ($sha1): $expect"
echo >&2 "actual ($sha1): $actual"
exit 1
fi
done
done <<EOF
da39a3ee5e6b4b0d3255bfef95601890afd80709 0
3f786850e387550fdab836ed7e6dc881de23001b 0 a

View File

@ -3,5 +3,5 @@
int cmd__sha256(int ac, const char **av)
{
return cmd_hash_impl(ac, av, GIT_HASH_SHA256);
return cmd_hash_impl(ac, av, GIT_HASH_SHA256, 0);
}

View File

@ -71,6 +71,7 @@ static struct test_cmd cmds[] = {
{ "serve-v2", cmd__serve_v2 },
{ "sha1", cmd__sha1 },
{ "sha1-is-sha1dc", cmd__sha1_is_sha1dc },
{ "sha1-unsafe", cmd__sha1_unsafe },
{ "sha256", cmd__sha256 },
{ "sigchain", cmd__sigchain },
{ "simple-ipc", cmd__simple_ipc },

View File

@ -64,6 +64,7 @@ int cmd__scrap_cache_tree(int argc, const char **argv);
int cmd__serve_v2(int argc, const char **argv);
int cmd__sha1(int argc, const char **argv);
int cmd__sha1_is_sha1dc(int argc, const char **argv);
int cmd__sha1_unsafe(int argc, const char **argv);
int cmd__sha256(int argc, const char **argv);
int cmd__sigchain(int argc, const char **argv);
int cmd__simple_ipc(int argc, const char **argv);
@ -82,6 +83,6 @@ int cmd__windows_named_pipe(int argc, const char **argv);
#endif
int cmd__write_cache(int argc, const char **argv);
int cmd_hash_impl(int ac, const char **av, int algo);
int cmd_hash_impl(int ac, const char **av, int algo, int unsafe);
#endif