http-fetch: support fetching packfiles by URL

Teach http-fetch the ability to download packfiles directly, given a
URL, and to verify them.

The http_pack_request suite has been augmented with a function that
takes a URL directly. With this function, the hash is only used to
determine the name of the temporary file.

Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
This commit is contained in:
Jonathan Tan
2020-06-10 13:57:18 -07:00
committed by Junio C Hamano
parent 8e6adb69e1
commit 8d5d2a34df
5 changed files with 123 additions and 18 deletions

View File

@ -9,7 +9,7 @@ git-http-fetch - Download from a remote Git repository via HTTP
SYNOPSIS SYNOPSIS
-------- --------
[verse] [verse]
'git http-fetch' [-c] [-t] [-a] [-d] [-v] [-w filename] [--recover] [--stdin] <commit> <url> 'git http-fetch' [-c] [-t] [-a] [-d] [-v] [-w filename] [--recover] [--stdin | --packfile=<hash> | <commit>] <url>
DESCRIPTION DESCRIPTION
----------- -----------
@ -40,6 +40,13 @@ commit-id::
<commit-id>['\t'<filename-as-in--w>] <commit-id>['\t'<filename-as-in--w>]
--packfile=<hash>::
Instead of a commit id on the command line (which is not expected in
this case), 'git http-fetch' fetches the packfile directly at the given
URL and uses index-pack to generate corresponding .idx and .keep files.
The hash is used to determine the name of the temporary file and is
arbitrary. The output of index-pack is printed to stdout.
--recover:: --recover::
Verify that everything reachable from target is fetched. Used after Verify that everything reachable from target is fetched. Used after
an earlier fetch is interrupted. an earlier fetch is interrupted.

View File

@ -5,7 +5,7 @@
#include "walker.h" #include "walker.h"
static const char http_fetch_usage[] = "git http-fetch " static const char http_fetch_usage[] = "git http-fetch "
"[-c] [-t] [-a] [-v] [--recover] [-w ref] [--stdin] commit-id url"; "[-c] [-t] [-a] [-v] [--recover] [-w ref] [--stdin | --packfile=hash | commit-id] url";
static int fetch_using_walker(const char *raw_url, int get_verbosely, static int fetch_using_walker(const char *raw_url, int get_verbosely,
int get_recover, int commits, char **commit_id, int get_recover, int commits, char **commit_id,
@ -43,6 +43,37 @@ static int fetch_using_walker(const char *raw_url, int get_verbosely,
return rc; return rc;
} }
static void fetch_single_packfile(struct object_id *packfile_hash,
const char *url) {
struct http_pack_request *preq;
struct slot_results results;
int ret;
http_init(NULL, url, 0);
preq = new_direct_http_pack_request(packfile_hash->hash, xstrdup(url));
if (preq == NULL)
die("couldn't create http pack request");
preq->slot->results = &results;
preq->generate_keep = 1;
if (start_active_slot(preq->slot)) {
run_active_slot(preq->slot);
if (results.curl_result != CURLE_OK) {
die("Unable to get pack file %s\n%s", preq->url,
curl_errorstr);
}
} else {
die("Unable to start request");
}
if ((ret = finish_http_pack_request(preq)))
die("finish_http_pack_request gave result %d", ret);
release_http_pack_request(preq);
http_cleanup();
}
int cmd_main(int argc, const char **argv) int cmd_main(int argc, const char **argv)
{ {
int commits_on_stdin = 0; int commits_on_stdin = 0;
@ -52,8 +83,12 @@ int cmd_main(int argc, const char **argv)
int arg = 1; int arg = 1;
int get_verbosely = 0; int get_verbosely = 0;
int get_recover = 0; int get_recover = 0;
int packfile = 0;
struct object_id packfile_hash;
while (arg < argc && argv[arg][0] == '-') { while (arg < argc && argv[arg][0] == '-') {
const char *p;
if (argv[arg][1] == 't') { if (argv[arg][1] == 't') {
} else if (argv[arg][1] == 'c') { } else if (argv[arg][1] == 'c') {
} else if (argv[arg][1] == 'a') { } else if (argv[arg][1] == 'a') {
@ -68,25 +103,33 @@ int cmd_main(int argc, const char **argv)
get_recover = 1; get_recover = 1;
} else if (!strcmp(argv[arg], "--stdin")) { } else if (!strcmp(argv[arg], "--stdin")) {
commits_on_stdin = 1; commits_on_stdin = 1;
} else if (skip_prefix(argv[arg], "--packfile=", &p)) {
const char *end;
packfile = 1;
if (parse_oid_hex(p, &packfile_hash, &end) || *end)
die(_("argument to --packfile must be a valid hash (got '%s')"), p);
} }
arg++; arg++;
} }
if (argc != arg + 2 - commits_on_stdin) if (argc != arg + 2 - (commits_on_stdin || packfile))
usage(http_fetch_usage); usage(http_fetch_usage);
setup_git_directory();
git_config(git_default_config, NULL);
if (packfile) {
fetch_single_packfile(&packfile_hash, argv[arg]);
return 0;
}
if (commits_on_stdin) { if (commits_on_stdin) {
commits = walker_targets_stdin(&commit_id, &write_ref); commits = walker_targets_stdin(&commit_id, &write_ref);
} else { } else {
commit_id = (char **) &argv[arg++]; commit_id = (char **) &argv[arg++];
commits = 1; commits = 1;
} }
setup_git_directory();
git_config(git_default_config, NULL);
if (!argv[arg])
BUG("must have one arg remaining");
return fetch_using_walker(argv[arg], get_verbosely, get_recover, return fetch_using_walker(argv[arg], get_verbosely, get_recover,
commits, commit_id, write_ref, commits, commit_id, write_ref,
commits_on_stdin); commits_on_stdin);

28
http.c
View File

@ -2281,7 +2281,13 @@ int finish_http_pack_request(struct http_pack_request *preq)
argv_array_push(&ip.args, "--stdin"); argv_array_push(&ip.args, "--stdin");
ip.git_cmd = 1; ip.git_cmd = 1;
ip.in = tmpfile_fd; ip.in = tmpfile_fd;
ip.no_stdout = 1; if (preq->generate_keep) {
argv_array_pushf(&ip.args, "--keep=git %"PRIuMAX,
(uintmax_t)getpid());
ip.out = 0;
} else {
ip.no_stdout = 1;
}
if (run_command(&ip)) { if (run_command(&ip)) {
ret = -1; ret = -1;
@ -2307,19 +2313,27 @@ void http_install_packfile(struct packed_git *p,
} }
struct http_pack_request *new_http_pack_request( struct http_pack_request *new_http_pack_request(
const unsigned char *packed_git_hash, const char *base_url) const unsigned char *packed_git_hash, const char *base_url) {
struct strbuf buf = STRBUF_INIT;
end_url_with_slash(&buf, base_url);
strbuf_addf(&buf, "objects/pack/pack-%s.pack",
hash_to_hex(packed_git_hash));
return new_direct_http_pack_request(packed_git_hash,
strbuf_detach(&buf, NULL));
}
struct http_pack_request *new_direct_http_pack_request(
const unsigned char *packed_git_hash, char *url)
{ {
off_t prev_posn = 0; off_t prev_posn = 0;
struct strbuf buf = STRBUF_INIT;
struct http_pack_request *preq; struct http_pack_request *preq;
preq = xcalloc(1, sizeof(*preq)); preq = xcalloc(1, sizeof(*preq));
strbuf_init(&preq->tmpfile, 0); strbuf_init(&preq->tmpfile, 0);
end_url_with_slash(&buf, base_url); preq->url = url;
strbuf_addf(&buf, "objects/pack/pack-%s.pack",
hash_to_hex(packed_git_hash));
preq->url = strbuf_detach(&buf, NULL);
strbuf_addf(&preq->tmpfile, "%s.temp", sha1_pack_name(packed_git_hash)); strbuf_addf(&preq->tmpfile, "%s.temp", sha1_pack_name(packed_git_hash));
preq->packfile = fopen(preq->tmpfile.buf, "a"); preq->packfile = fopen(preq->tmpfile.buf, "a");

11
http.h
View File

@ -216,6 +216,15 @@ int http_get_info_packs(const char *base_url,
struct http_pack_request { struct http_pack_request {
char *url; char *url;
/*
* If this is true, finish_http_pack_request() will pass "--keep" to
* index-pack, resulting in the creation of a keep file, and will not
* suppress its stdout (that is, the "keep\t<hash>\n" line will be
* printed to stdout).
*/
unsigned generate_keep : 1;
FILE *packfile; FILE *packfile;
struct strbuf tmpfile; struct strbuf tmpfile;
struct active_request_slot *slot; struct active_request_slot *slot;
@ -223,6 +232,8 @@ struct http_pack_request {
struct http_pack_request *new_http_pack_request( struct http_pack_request *new_http_pack_request(
const unsigned char *packed_git_hash, const char *base_url); const unsigned char *packed_git_hash, const char *base_url);
struct http_pack_request *new_direct_http_pack_request(
const unsigned char *packed_git_hash, char *url);
int finish_http_pack_request(struct http_pack_request *preq); int finish_http_pack_request(struct http_pack_request *preq);
void release_http_pack_request(struct http_pack_request *preq); void release_http_pack_request(struct http_pack_request *preq);

View File

@ -199,6 +199,28 @@ test_expect_success 'fetch packed objects' '
git clone $HTTPD_URL/dumb/repo_pack.git git clone $HTTPD_URL/dumb/repo_pack.git
' '
test_expect_success 'http-fetch --packfile' '
# Arbitrary hash. Use rev-parse so that we get one of the correct
# length.
ARBITRARY=$(git -C "$HTTPD_DOCUMENT_ROOT_PATH"/repo_pack.git rev-parse HEAD) &&
git init packfileclient &&
p=$(cd "$HTTPD_DOCUMENT_ROOT_PATH"/repo_pack.git && ls objects/pack/pack-*.pack) &&
git -C packfileclient http-fetch --packfile=$ARBITRARY "$HTTPD_URL"/dumb/repo_pack.git/$p >out &&
grep "^keep.[0-9a-f]\{16,\}$" out &&
cut -c6- out >packhash &&
# Ensure that the expected files are generated
test -e "packfileclient/.git/objects/pack/pack-$(cat packhash).pack" &&
test -e "packfileclient/.git/objects/pack/pack-$(cat packhash).idx" &&
test -e "packfileclient/.git/objects/pack/pack-$(cat packhash).keep" &&
# Ensure that it has the HEAD of repo_pack, at least
HASH=$(git -C "$HTTPD_DOCUMENT_ROOT_PATH"/repo_pack.git rev-parse HEAD) &&
git -C packfileclient cat-file -e "$HASH"
'
test_expect_success 'fetch notices corrupt pack' ' test_expect_success 'fetch notices corrupt pack' '
cp -R "$HTTPD_DOCUMENT_ROOT_PATH"/repo_pack.git "$HTTPD_DOCUMENT_ROOT_PATH"/repo_bad1.git && cp -R "$HTTPD_DOCUMENT_ROOT_PATH"/repo_pack.git "$HTTPD_DOCUMENT_ROOT_PATH"/repo_bad1.git &&
(cd "$HTTPD_DOCUMENT_ROOT_PATH"/repo_bad1.git && (cd "$HTTPD_DOCUMENT_ROOT_PATH"/repo_bad1.git &&
@ -214,6 +236,14 @@ test_expect_success 'fetch notices corrupt pack' '
) )
' '
test_expect_success 'http-fetch --packfile with corrupt pack' '
rm -rf packfileclient &&
git init packfileclient &&
p=$(cd "$HTTPD_DOCUMENT_ROOT_PATH"/repo_bad1.git && ls objects/pack/pack-*.pack) &&
test_must_fail git -C packfileclient http-fetch --packfile \
"$HTTPD_URL"/dumb/repo_bad1.git/$p
'
test_expect_success 'fetch notices corrupt idx' ' test_expect_success 'fetch notices corrupt idx' '
cp -R "$HTTPD_DOCUMENT_ROOT_PATH"/repo_pack.git "$HTTPD_DOCUMENT_ROOT_PATH"/repo_bad2.git && cp -R "$HTTPD_DOCUMENT_ROOT_PATH"/repo_pack.git "$HTTPD_DOCUMENT_ROOT_PATH"/repo_bad2.git &&
(cd "$HTTPD_DOCUMENT_ROOT_PATH"/repo_bad2.git && (cd "$HTTPD_DOCUMENT_ROOT_PATH"/repo_bad2.git &&