git
/
packfile.c
2307 строк · 59.1 Кб
1#define USE_THE_REPOSITORY_VARIABLE2
3#include "git-compat-util.h"4#include "environment.h"5#include "gettext.h"6#include "hex.h"7#include "list.h"8#include "pack.h"9#include "repository.h"10#include "dir.h"11#include "mergesort.h"12#include "packfile.h"13#include "delta.h"14#include "hash-lookup.h"15#include "commit.h"16#include "object.h"17#include "tag.h"18#include "trace.h"19#include "tree-walk.h"20#include "tree.h"21#include "object-file.h"22#include "object-store-ll.h"23#include "midx.h"24#include "commit-graph.h"25#include "pack-revindex.h"26#include "promisor-remote.h"27
28char *odb_pack_name(struct strbuf *buf,29const unsigned char *hash,30const char *ext)31{
32strbuf_reset(buf);33strbuf_addf(buf, "%s/pack/pack-%s.%s", get_object_directory(),34hash_to_hex(hash), ext);35return buf->buf;36}
37
38char *sha1_pack_name(const unsigned char *sha1)39{
40static struct strbuf buf = STRBUF_INIT;41return odb_pack_name(&buf, sha1, "pack");42}
43
44char *sha1_pack_index_name(const unsigned char *sha1)45{
46static struct strbuf buf = STRBUF_INIT;47return odb_pack_name(&buf, sha1, "idx");48}
49
50static unsigned int pack_used_ctr;51static unsigned int pack_mmap_calls;52static unsigned int peak_pack_open_windows;53static unsigned int pack_open_windows;54static unsigned int pack_open_fds;55static unsigned int pack_max_fds;56static size_t peak_pack_mapped;57static size_t pack_mapped;58
59#define SZ_FMT PRIuMAX60static inline uintmax_t sz_fmt(size_t s) { return s; }61
62void pack_report(void)63{
64fprintf(stderr,65"pack_report: getpagesize() = %10" SZ_FMT "\n"66"pack_report: core.packedGitWindowSize = %10" SZ_FMT "\n"67"pack_report: core.packedGitLimit = %10" SZ_FMT "\n",68sz_fmt(getpagesize()),69sz_fmt(packed_git_window_size),70sz_fmt(packed_git_limit));71fprintf(stderr,72"pack_report: pack_used_ctr = %10u\n"73"pack_report: pack_mmap_calls = %10u\n"74"pack_report: pack_open_windows = %10u / %10u\n"75"pack_report: pack_mapped = "76"%10" SZ_FMT " / %10" SZ_FMT "\n",77pack_used_ctr,78pack_mmap_calls,79pack_open_windows, peak_pack_open_windows,80sz_fmt(pack_mapped), sz_fmt(peak_pack_mapped));81}
82
83/*
84* Open and mmap the index file at path, perform a couple of
85* consistency checks, then record its information to p. Return 0 on
86* success.
87*/
88static int check_packed_git_idx(const char *path, struct packed_git *p)89{
90void *idx_map;91size_t idx_size;92int fd = git_open(path), ret;93struct stat st;94const unsigned int hashsz = the_hash_algo->rawsz;95
96if (fd < 0)97return -1;98if (fstat(fd, &st)) {99close(fd);100return -1;101}102idx_size = xsize_t(st.st_size);103if (idx_size < 4 * 256 + hashsz + hashsz) {104close(fd);105return error("index file %s is too small", path);106}107idx_map = xmmap(NULL, idx_size, PROT_READ, MAP_PRIVATE, fd, 0);108close(fd);109
110ret = load_idx(path, hashsz, idx_map, idx_size, p);111
112if (ret)113munmap(idx_map, idx_size);114
115return ret;116}
117
118int load_idx(const char *path, const unsigned int hashsz, void *idx_map,119size_t idx_size, struct packed_git *p)120{
121struct pack_idx_header *hdr = idx_map;122uint32_t version, nr, i, *index;123
124if (idx_size < 4 * 256 + hashsz + hashsz)125return error("index file %s is too small", path);126if (!idx_map)127return error("empty data");128
129if (hdr->idx_signature == htonl(PACK_IDX_SIGNATURE)) {130version = ntohl(hdr->idx_version);131if (version < 2 || version > 2)132return error("index file %s is version %"PRIu32133" and is not supported by this binary"134" (try upgrading GIT to a newer version)",135path, version);136} else137version = 1;138
139nr = 0;140index = idx_map;141if (version > 1)142index += 2; /* skip index header */143for (i = 0; i < 256; i++) {144uint32_t n = ntohl(index[i]);145if (n < nr)146return error("non-monotonic index %s", path);147nr = n;148}149
150if (version == 1) {151/*152* Total size:
153* - 256 index entries 4 bytes each
154* - 24-byte entries * nr (object ID + 4-byte offset)
155* - hash of the packfile
156* - file checksum
157*/
158if (idx_size != st_add(4 * 256 + hashsz + hashsz, st_mult(nr, hashsz + 4)))159return error("wrong index v1 file size in %s", path);160} else if (version == 2) {161/*162* Minimum size:
163* - 8 bytes of header
164* - 256 index entries 4 bytes each
165* - object ID entry * nr
166* - 4-byte crc entry * nr
167* - 4-byte offset entry * nr
168* - hash of the packfile
169* - file checksum
170* And after the 4-byte offset table might be a
171* variable sized table containing 8-byte entries
172* for offsets larger than 2^31.
173*/
174size_t min_size = st_add(8 + 4*256 + hashsz + hashsz, st_mult(nr, hashsz + 4 + 4));175size_t max_size = min_size;176if (nr)177max_size = st_add(max_size, st_mult(nr - 1, 8));178if (idx_size < min_size || idx_size > max_size)179return error("wrong index v2 file size in %s", path);180if (idx_size != min_size &&181/*182* make sure we can deal with large pack offsets.
183* 31-bit signed offset won't be enough, neither
184* 32-bit unsigned one will be.
185*/
186(sizeof(off_t) <= 4))187return error("pack too large for current definition of off_t in %s", path);188p->crc_offset = st_add(8 + 4 * 256, st_mult(nr, hashsz));189}190
191p->index_version = version;192p->index_data = idx_map;193p->index_size = idx_size;194p->num_objects = nr;195return 0;196}
197
198int open_pack_index(struct packed_git *p)199{
200char *idx_name;201size_t len;202int ret;203
204if (p->index_data)205return 0;206
207if (!strip_suffix(p->pack_name, ".pack", &len))208BUG("pack_name does not end in .pack");209idx_name = xstrfmt("%.*s.idx", (int)len, p->pack_name);210ret = check_packed_git_idx(idx_name, p);211free(idx_name);212return ret;213}
214
215uint32_t get_pack_fanout(struct packed_git *p, uint32_t value)216{
217const uint32_t *level1_ofs = p->index_data;218
219if (!level1_ofs) {220if (open_pack_index(p))221return 0;222level1_ofs = p->index_data;223}224
225if (p->index_version > 1) {226level1_ofs += 2;227}228
229return ntohl(level1_ofs[value]);230}
231
232static struct packed_git *alloc_packed_git(int extra)233{
234struct packed_git *p = xmalloc(st_add(sizeof(*p), extra));235memset(p, 0, sizeof(*p));236p->pack_fd = -1;237return p;238}
239
240struct packed_git *parse_pack_index(unsigned char *sha1, const char *idx_path)241{
242const char *path = sha1_pack_name(sha1);243size_t alloc = st_add(strlen(path), 1);244struct packed_git *p = alloc_packed_git(alloc);245
246memcpy(p->pack_name, path, alloc); /* includes NUL */247hashcpy(p->hash, sha1, the_repository->hash_algo);248if (check_packed_git_idx(idx_path, p)) {249free(p);250return NULL;251}252
253return p;254}
255
256static void scan_windows(struct packed_git *p,257struct packed_git **lru_p,258struct pack_window **lru_w,259struct pack_window **lru_l)260{
261struct pack_window *w, *w_l;262
263for (w_l = NULL, w = p->windows; w; w = w->next) {264if (!w->inuse_cnt) {265if (!*lru_w || w->last_used < (*lru_w)->last_used) {266*lru_p = p;267*lru_w = w;268*lru_l = w_l;269}270}271w_l = w;272}273}
274
275static int unuse_one_window(struct packed_git *current)276{
277struct packed_git *p, *lru_p = NULL;278struct pack_window *lru_w = NULL, *lru_l = NULL;279
280if (current)281scan_windows(current, &lru_p, &lru_w, &lru_l);282for (p = the_repository->objects->packed_git; p; p = p->next)283scan_windows(p, &lru_p, &lru_w, &lru_l);284if (lru_p) {285munmap(lru_w->base, lru_w->len);286pack_mapped -= lru_w->len;287if (lru_l)288lru_l->next = lru_w->next;289else290lru_p->windows = lru_w->next;291free(lru_w);292pack_open_windows--;293return 1;294}295return 0;296}
297
298void close_pack_windows(struct packed_git *p)299{
300while (p->windows) {301struct pack_window *w = p->windows;302
303if (w->inuse_cnt)304die("pack '%s' still has open windows to it",305p->pack_name);306munmap(w->base, w->len);307pack_mapped -= w->len;308pack_open_windows--;309p->windows = w->next;310free(w);311}312}
313
314int close_pack_fd(struct packed_git *p)315{
316if (p->pack_fd < 0)317return 0;318
319close(p->pack_fd);320pack_open_fds--;321p->pack_fd = -1;322
323return 1;324}
325
326void close_pack_index(struct packed_git *p)327{
328if (p->index_data) {329munmap((void *)p->index_data, p->index_size);330p->index_data = NULL;331}332}
333
334static void close_pack_revindex(struct packed_git *p)335{
336if (!p->revindex_map)337return;338
339munmap((void *)p->revindex_map, p->revindex_size);340p->revindex_map = NULL;341p->revindex_data = NULL;342}
343
344static void close_pack_mtimes(struct packed_git *p)345{
346if (!p->mtimes_map)347return;348
349munmap((void *)p->mtimes_map, p->mtimes_size);350p->mtimes_map = NULL;351}
352
353void close_pack(struct packed_git *p)354{
355close_pack_windows(p);356close_pack_fd(p);357close_pack_index(p);358close_pack_revindex(p);359close_pack_mtimes(p);360oidset_clear(&p->bad_objects);361}
362
363void close_object_store(struct raw_object_store *o)364{
365struct packed_git *p;366
367for (p = o->packed_git; p; p = p->next)368if (p->do_not_close)369BUG("want to close pack marked 'do-not-close'");370else371close_pack(p);372
373if (o->multi_pack_index) {374close_midx(o->multi_pack_index);375o->multi_pack_index = NULL;376}377
378close_commit_graph(o);379}
380
381void unlink_pack_path(const char *pack_name, int force_delete)382{
383static const char *exts[] = {".idx", ".pack", ".rev", ".keep", ".bitmap", ".promisor", ".mtimes"};384int i;385struct strbuf buf = STRBUF_INIT;386size_t plen;387
388strbuf_addstr(&buf, pack_name);389strip_suffix_mem(buf.buf, &buf.len, ".pack");390plen = buf.len;391
392if (!force_delete) {393strbuf_addstr(&buf, ".keep");394if (!access(buf.buf, F_OK)) {395strbuf_release(&buf);396return;397}398}399
400for (i = 0; i < ARRAY_SIZE(exts); i++) {401strbuf_setlen(&buf, plen);402strbuf_addstr(&buf, exts[i]);403unlink(buf.buf);404}405
406strbuf_release(&buf);407}
408
409/*
410* The LRU pack is the one with the oldest MRU window, preferring packs
411* with no used windows, or the oldest mtime if it has no windows allocated.
412*/
413static void find_lru_pack(struct packed_git *p, struct packed_git **lru_p, struct pack_window **mru_w, int *accept_windows_inuse)414{
415struct pack_window *w, *this_mru_w;416int has_windows_inuse = 0;417
418/*419* Reject this pack if it has windows and the previously selected
420* one does not. If this pack does not have windows, reject
421* it if the pack file is newer than the previously selected one.
422*/
423if (*lru_p && !*mru_w && (p->windows || p->mtime > (*lru_p)->mtime))424return;425
426for (w = this_mru_w = p->windows; w; w = w->next) {427/*428* Reject this pack if any of its windows are in use,
429* but the previously selected pack did not have any
430* inuse windows. Otherwise, record that this pack
431* has windows in use.
432*/
433if (w->inuse_cnt) {434if (*accept_windows_inuse)435has_windows_inuse = 1;436else437return;438}439
440if (w->last_used > this_mru_w->last_used)441this_mru_w = w;442
443/*444* Reject this pack if it has windows that have been
445* used more recently than the previously selected pack.
446* If the previously selected pack had windows inuse and
447* we have not encountered a window in this pack that is
448* inuse, skip this check since we prefer a pack with no
449* inuse windows to one that has inuse windows.
450*/
451if (*mru_w && *accept_windows_inuse == has_windows_inuse &&452this_mru_w->last_used > (*mru_w)->last_used)453return;454}455
456/*457* Select this pack.
458*/
459*mru_w = this_mru_w;460*lru_p = p;461*accept_windows_inuse = has_windows_inuse;462}
463
464static int close_one_pack(void)465{
466struct packed_git *p, *lru_p = NULL;467struct pack_window *mru_w = NULL;468int accept_windows_inuse = 1;469
470for (p = the_repository->objects->packed_git; p; p = p->next) {471if (p->pack_fd == -1)472continue;473find_lru_pack(p, &lru_p, &mru_w, &accept_windows_inuse);474}475
476if (lru_p)477return close_pack_fd(lru_p);478
479return 0;480}
481
482static unsigned int get_max_fd_limit(void)483{
484#ifdef RLIMIT_NOFILE485{486struct rlimit lim;487
488if (!getrlimit(RLIMIT_NOFILE, &lim))489return lim.rlim_cur;490}491#endif492
493#ifdef _SC_OPEN_MAX494{495long open_max = sysconf(_SC_OPEN_MAX);496if (0 < open_max)497return open_max;498/*499* Otherwise, we got -1 for one of the two
500* reasons:
501*
502* (1) sysconf() did not understand _SC_OPEN_MAX
503* and signaled an error with -1; or
504* (2) sysconf() said there is no limit.
505*
506* We _could_ clear errno before calling sysconf() to
507* tell these two cases apart and return a huge number
508* in the latter case to let the caller cap it to a
509* value that is not so selfish, but letting the
510* fallback OPEN_MAX codepath take care of these cases
511* is a lot simpler.
512*/
513}514#endif515
516#ifdef OPEN_MAX517return OPEN_MAX;518#else519return 1; /* see the caller ;-) */520#endif521}
522
523const char *pack_basename(struct packed_git *p)524{
525const char *ret = strrchr(p->pack_name, '/');526if (ret)527ret = ret + 1; /* skip past slash */528else529ret = p->pack_name; /* we only have a base */530return ret;531}
532
533/*
534* Do not call this directly as this leaks p->pack_fd on error return;
535* call open_packed_git() instead.
536*/
537static int open_packed_git_1(struct packed_git *p)538{
539struct stat st;540struct pack_header hdr;541unsigned char hash[GIT_MAX_RAWSZ];542unsigned char *idx_hash;543ssize_t read_result;544const unsigned hashsz = the_hash_algo->rawsz;545
546if (open_pack_index(p))547return error("packfile %s index unavailable", p->pack_name);548
549if (!pack_max_fds) {550unsigned int max_fds = get_max_fd_limit();551
552/* Save 3 for stdin/stdout/stderr, 22 for work */553if (25 < max_fds)554pack_max_fds = max_fds - 25;555else556pack_max_fds = 1;557}558
559while (pack_max_fds <= pack_open_fds && close_one_pack())560; /* nothing */561
562p->pack_fd = git_open(p->pack_name);563if (p->pack_fd < 0 || fstat(p->pack_fd, &st))564return -1;565pack_open_fds++;566
567/* If we created the struct before we had the pack we lack size. */568if (!p->pack_size) {569if (!S_ISREG(st.st_mode))570return error("packfile %s not a regular file", p->pack_name);571p->pack_size = st.st_size;572} else if (p->pack_size != st.st_size)573return error("packfile %s size changed", p->pack_name);574
575/* Verify we recognize this pack file format. */576read_result = read_in_full(p->pack_fd, &hdr, sizeof(hdr));577if (read_result < 0)578return error_errno("error reading from %s", p->pack_name);579if (read_result != sizeof(hdr))580return error("file %s is far too short to be a packfile", p->pack_name);581if (hdr.hdr_signature != htonl(PACK_SIGNATURE))582return error("file %s is not a GIT packfile", p->pack_name);583if (!pack_version_ok(hdr.hdr_version))584return error("packfile %s is version %"PRIu32" and not"585" supported (try upgrading GIT to a newer version)",586p->pack_name, ntohl(hdr.hdr_version));587
588/* Verify the pack matches its index. */589if (p->num_objects != ntohl(hdr.hdr_entries))590return error("packfile %s claims to have %"PRIu32" objects"591" while index indicates %"PRIu32" objects",592p->pack_name, ntohl(hdr.hdr_entries),593p->num_objects);594read_result = pread_in_full(p->pack_fd, hash, hashsz,595p->pack_size - hashsz);596if (read_result < 0)597return error_errno("error reading from %s", p->pack_name);598if (read_result != hashsz)599return error("packfile %s signature is unavailable", p->pack_name);600idx_hash = ((unsigned char *)p->index_data) + p->index_size - hashsz * 2;601if (!hasheq(hash, idx_hash, the_repository->hash_algo))602return error("packfile %s does not match index", p->pack_name);603return 0;604}
605
606static int open_packed_git(struct packed_git *p)607{
608if (!open_packed_git_1(p))609return 0;610close_pack_fd(p);611return -1;612}
613
614static int in_window(struct pack_window *win, off_t offset)615{
616/* We must promise at least one full hash after the617* offset is available from this window, otherwise the offset
618* is not actually in this window and a different window (which
619* has that one hash excess) must be used. This is to support
620* the object header and delta base parsing routines below.
621*/
622off_t win_off = win->offset;623return win_off <= offset624&& (offset + the_hash_algo->rawsz) <= (win_off + win->len);625}
626
627unsigned char *use_pack(struct packed_git *p,628struct pack_window **w_cursor,629off_t offset,630unsigned long *left)631{
632struct pack_window *win = *w_cursor;633
634/* Since packfiles end in a hash of their content and it's635* pointless to ask for an offset into the middle of that
636* hash, and the in_window function above wouldn't match
637* don't allow an offset too close to the end of the file.
638*/
639if (!p->pack_size && p->pack_fd == -1 && open_packed_git(p))640die("packfile %s cannot be accessed", p->pack_name);641if (offset > (p->pack_size - the_hash_algo->rawsz))642die("offset beyond end of packfile (truncated pack?)");643if (offset < 0)644die(_("offset before end of packfile (broken .idx?)"));645
646if (!win || !in_window(win, offset)) {647if (win)648win->inuse_cnt--;649for (win = p->windows; win; win = win->next) {650if (in_window(win, offset))651break;652}653if (!win) {654size_t window_align = packed_git_window_size / 2;655off_t len;656
657if (p->pack_fd == -1 && open_packed_git(p))658die("packfile %s cannot be accessed", p->pack_name);659
660CALLOC_ARRAY(win, 1);661win->offset = (offset / window_align) * window_align;662len = p->pack_size - win->offset;663if (len > packed_git_window_size)664len = packed_git_window_size;665win->len = (size_t)len;666pack_mapped += win->len;667while (packed_git_limit < pack_mapped668&& unuse_one_window(p))669; /* nothing */670win->base = xmmap_gently(NULL, win->len,671PROT_READ, MAP_PRIVATE,672p->pack_fd, win->offset);673if (win->base == MAP_FAILED)674die_errno(_("packfile %s cannot be mapped%s"),675p->pack_name, mmap_os_err());676if (!win->offset && win->len == p->pack_size677&& !p->do_not_close)678close_pack_fd(p);679pack_mmap_calls++;680pack_open_windows++;681if (pack_mapped > peak_pack_mapped)682peak_pack_mapped = pack_mapped;683if (pack_open_windows > peak_pack_open_windows)684peak_pack_open_windows = pack_open_windows;685win->next = p->windows;686p->windows = win;687}688}689if (win != *w_cursor) {690win->last_used = pack_used_ctr++;691win->inuse_cnt++;692*w_cursor = win;693}694offset -= win->offset;695if (left)696*left = win->len - xsize_t(offset);697return win->base + offset;698}
699
700void unuse_pack(struct pack_window **w_cursor)701{
702struct pack_window *w = *w_cursor;703if (w) {704w->inuse_cnt--;705*w_cursor = NULL;706}707}
708
709struct packed_git *add_packed_git(const char *path, size_t path_len, int local)710{
711struct stat st;712size_t alloc;713struct packed_git *p;714
715/*716* Make sure a corresponding .pack file exists and that
717* the index looks sane.
718*/
719if (!strip_suffix_mem(path, &path_len, ".idx"))720return NULL;721
722/*723* ".promisor" is long enough to hold any suffix we're adding (and
724* the use xsnprintf double-checks that)
725*/
726alloc = st_add3(path_len, strlen(".promisor"), 1);727p = alloc_packed_git(alloc);728memcpy(p->pack_name, path, path_len);729
730xsnprintf(p->pack_name + path_len, alloc - path_len, ".keep");731if (!access(p->pack_name, F_OK))732p->pack_keep = 1;733
734xsnprintf(p->pack_name + path_len, alloc - path_len, ".promisor");735if (!access(p->pack_name, F_OK))736p->pack_promisor = 1;737
738xsnprintf(p->pack_name + path_len, alloc - path_len, ".mtimes");739if (!access(p->pack_name, F_OK))740p->is_cruft = 1;741
742xsnprintf(p->pack_name + path_len, alloc - path_len, ".pack");743if (stat(p->pack_name, &st) || !S_ISREG(st.st_mode)) {744free(p);745return NULL;746}747
748/* ok, it looks sane as far as we can check without749* actually mapping the pack file.
750*/
751p->pack_size = st.st_size;752p->pack_local = local;753p->mtime = st.st_mtime;754if (path_len < the_hash_algo->hexsz ||755get_hash_hex(path + path_len - the_hash_algo->hexsz, p->hash))756hashclr(p->hash, the_repository->hash_algo);757return p;758}
759
760void install_packed_git(struct repository *r, struct packed_git *pack)761{
762if (pack->pack_fd != -1)763pack_open_fds++;764
765pack->next = r->objects->packed_git;766r->objects->packed_git = pack;767
768hashmap_entry_init(&pack->packmap_ent, strhash(pack->pack_name));769hashmap_add(&r->objects->pack_map, &pack->packmap_ent);770}
771
772void (*report_garbage)(unsigned seen_bits, const char *path);773
774static void report_helper(const struct string_list *list,775int seen_bits, int first, int last)776{
777if (seen_bits == (PACKDIR_FILE_PACK|PACKDIR_FILE_IDX))778return;779
780for (; first < last; first++)781report_garbage(seen_bits, list->items[first].string);782}
783
784static void report_pack_garbage(struct string_list *list)785{
786int i, baselen = -1, first = 0, seen_bits = 0;787
788if (!report_garbage)789return;790
791string_list_sort(list);792
793for (i = 0; i < list->nr; i++) {794const char *path = list->items[i].string;795if (baselen != -1 &&796strncmp(path, list->items[first].string, baselen)) {797report_helper(list, seen_bits, first, i);798baselen = -1;799seen_bits = 0;800}801if (baselen == -1) {802const char *dot = strrchr(path, '.');803if (!dot) {804report_garbage(PACKDIR_FILE_GARBAGE, path);805continue;806}807baselen = dot - path + 1;808first = i;809}810if (!strcmp(path + baselen, "pack"))811seen_bits |= 1;812else if (!strcmp(path + baselen, "idx"))813seen_bits |= 2;814}815report_helper(list, seen_bits, first, list->nr);816}
817
818void for_each_file_in_pack_subdir(const char *objdir,819const char *subdir,820each_file_in_pack_dir_fn fn,821void *data)822{
823struct strbuf path = STRBUF_INIT;824size_t dirnamelen;825DIR *dir;826struct dirent *de;827
828strbuf_addstr(&path, objdir);829strbuf_addstr(&path, "/pack");830if (subdir)831strbuf_addf(&path, "/%s", subdir);832dir = opendir(path.buf);833if (!dir) {834if (errno != ENOENT)835error_errno("unable to open object pack directory: %s",836path.buf);837strbuf_release(&path);838return;839}840strbuf_addch(&path, '/');841dirnamelen = path.len;842while ((de = readdir_skip_dot_and_dotdot(dir)) != NULL) {843strbuf_setlen(&path, dirnamelen);844strbuf_addstr(&path, de->d_name);845
846fn(path.buf, path.len, de->d_name, data);847}848
849closedir(dir);850strbuf_release(&path);851}
852
853void for_each_file_in_pack_dir(const char *objdir,854each_file_in_pack_dir_fn fn,855void *data)856{
857for_each_file_in_pack_subdir(objdir, NULL, fn, data);858}
859
860struct prepare_pack_data {861struct repository *r;862struct string_list *garbage;863int local;864struct multi_pack_index *m;865};866
867static void prepare_pack(const char *full_name, size_t full_name_len,868const char *file_name, void *_data)869{
870struct prepare_pack_data *data = (struct prepare_pack_data *)_data;871struct packed_git *p;872size_t base_len = full_name_len;873
874if (strip_suffix_mem(full_name, &base_len, ".idx") &&875!(data->m && midx_contains_pack(data->m, file_name))) {876struct hashmap_entry hent;877char *pack_name = xstrfmt("%.*s.pack", (int)base_len, full_name);878unsigned int hash = strhash(pack_name);879hashmap_entry_init(&hent, hash);880
881/* Don't reopen a pack we already have. */882if (!hashmap_get(&data->r->objects->pack_map, &hent, pack_name)) {883p = add_packed_git(full_name, full_name_len, data->local);884if (p)885install_packed_git(data->r, p);886}887free(pack_name);888}889
890if (!report_garbage)891return;892
893if (!strcmp(file_name, "multi-pack-index") ||894!strcmp(file_name, "multi-pack-index.d"))895return;896if (starts_with(file_name, "multi-pack-index") &&897(ends_with(file_name, ".bitmap") || ends_with(file_name, ".rev")))898return;899if (ends_with(file_name, ".idx") ||900ends_with(file_name, ".rev") ||901ends_with(file_name, ".pack") ||902ends_with(file_name, ".bitmap") ||903ends_with(file_name, ".keep") ||904ends_with(file_name, ".promisor") ||905ends_with(file_name, ".mtimes"))906string_list_append(data->garbage, full_name);907else908report_garbage(PACKDIR_FILE_GARBAGE, full_name);909}
910
911static void prepare_packed_git_one(struct repository *r, char *objdir, int local)912{
913struct prepare_pack_data data;914struct string_list garbage = STRING_LIST_INIT_DUP;915
916data.m = r->objects->multi_pack_index;917
918/* look for the multi-pack-index for this object directory */919while (data.m && strcmp(data.m->object_dir, objdir))920data.m = data.m->next;921
922data.r = r;923data.garbage = &garbage;924data.local = local;925
926for_each_file_in_pack_dir(objdir, prepare_pack, &data);927
928report_pack_garbage(data.garbage);929string_list_clear(data.garbage, 0);930}
931
932static void prepare_packed_git(struct repository *r);933/*
934* Give a fast, rough count of the number of objects in the repository. This
935* ignores loose objects completely. If you have a lot of them, then either
936* you should repack because your performance will be awful, or they are
937* all unreachable objects about to be pruned, in which case they're not really
938* interesting as a measure of repo size in the first place.
939*/
940unsigned long repo_approximate_object_count(struct repository *r)941{
942if (!r->objects->approximate_object_count_valid) {943unsigned long count;944struct multi_pack_index *m;945struct packed_git *p;946
947prepare_packed_git(r);948count = 0;949for (m = get_multi_pack_index(r); m; m = m->next)950count += m->num_objects;951for (p = r->objects->packed_git; p; p = p->next) {952if (open_pack_index(p))953continue;954count += p->num_objects;955}956r->objects->approximate_object_count = count;957r->objects->approximate_object_count_valid = 1;958}959return r->objects->approximate_object_count;960}
961
962DEFINE_LIST_SORT(static, sort_packs, struct packed_git, next);963
964static int sort_pack(const struct packed_git *a, const struct packed_git *b)965{
966int st;967
968/*969* Local packs tend to contain objects specific to our
970* variant of the project than remote ones. In addition,
971* remote ones could be on a network mounted filesystem.
972* Favor local ones for these reasons.
973*/
974st = a->pack_local - b->pack_local;975if (st)976return -st;977
978/*979* Younger packs tend to contain more recent objects,
980* and more recent objects tend to get accessed more
981* often.
982*/
983if (a->mtime < b->mtime)984return 1;985else if (a->mtime == b->mtime)986return 0;987return -1;988}
989
990static void rearrange_packed_git(struct repository *r)991{
992sort_packs(&r->objects->packed_git, sort_pack);993}
994
995static void prepare_packed_git_mru(struct repository *r)996{
997struct packed_git *p;998
999INIT_LIST_HEAD(&r->objects->packed_git_mru);1000
1001for (p = r->objects->packed_git; p; p = p->next)1002list_add_tail(&p->mru, &r->objects->packed_git_mru);1003}
1004
1005static void prepare_packed_git(struct repository *r)1006{
1007struct object_directory *odb;1008
1009if (r->objects->packed_git_initialized)1010return;1011
1012prepare_alt_odb(r);1013for (odb = r->objects->odb; odb; odb = odb->next) {1014int local = (odb == r->objects->odb);1015prepare_multi_pack_index_one(r, odb->path, local);1016prepare_packed_git_one(r, odb->path, local);1017}1018rearrange_packed_git(r);1019
1020prepare_packed_git_mru(r);1021r->objects->packed_git_initialized = 1;1022}
1023
1024void reprepare_packed_git(struct repository *r)1025{
1026struct object_directory *odb;1027
1028obj_read_lock();1029
1030/*1031* Reprepare alt odbs, in case the alternates file was modified
1032* during the course of this process. This only _adds_ odbs to
1033* the linked list, so existing odbs will continue to exist for
1034* the lifetime of the process.
1035*/
1036r->objects->loaded_alternates = 0;1037prepare_alt_odb(r);1038
1039for (odb = r->objects->odb; odb; odb = odb->next)1040odb_clear_loose_cache(odb);1041
1042r->objects->approximate_object_count_valid = 0;1043r->objects->packed_git_initialized = 0;1044prepare_packed_git(r);1045obj_read_unlock();1046}
1047
1048struct packed_git *get_packed_git(struct repository *r)1049{
1050prepare_packed_git(r);1051return r->objects->packed_git;1052}
1053
1054struct multi_pack_index *get_multi_pack_index(struct repository *r)1055{
1056prepare_packed_git(r);1057return r->objects->multi_pack_index;1058}
1059
1060struct multi_pack_index *get_local_multi_pack_index(struct repository *r)1061{
1062struct multi_pack_index *m = get_multi_pack_index(r);1063
1064/* no need to iterate; we always put the local one first (if any) */1065if (m && m->local)1066return m;1067
1068return NULL;1069}
1070
1071struct packed_git *get_all_packs(struct repository *r)1072{
1073struct multi_pack_index *m;1074
1075prepare_packed_git(r);1076for (m = r->objects->multi_pack_index; m; m = m->next) {1077uint32_t i;1078for (i = 0; i < m->num_packs + m->num_packs_in_base; i++)1079prepare_midx_pack(r, m, i);1080}1081
1082return r->objects->packed_git;1083}
1084
1085struct list_head *get_packed_git_mru(struct repository *r)1086{
1087prepare_packed_git(r);1088return &r->objects->packed_git_mru;1089}
1090
1091unsigned long unpack_object_header_buffer(const unsigned char *buf,1092unsigned long len, enum object_type *type, unsigned long *sizep)1093{
1094unsigned shift;1095size_t size, c;1096unsigned long used = 0;1097
1098c = buf[used++];1099*type = (c >> 4) & 7;1100size = c & 15;1101shift = 4;1102while (c & 0x80) {1103if (len <= used || (bitsizeof(long) - 7) < shift) {1104error("bad object header");1105size = used = 0;1106break;1107}1108c = buf[used++];1109size = st_add(size, st_left_shift(c & 0x7f, shift));1110shift += 7;1111}1112*sizep = cast_size_t_to_ulong(size);1113return used;1114}
1115
1116unsigned long get_size_from_delta(struct packed_git *p,1117struct pack_window **w_curs,1118off_t curpos)1119{
1120const unsigned char *data;1121unsigned char delta_head[20], *in;1122git_zstream stream;1123int st;1124
1125memset(&stream, 0, sizeof(stream));1126stream.next_out = delta_head;1127stream.avail_out = sizeof(delta_head);1128
1129git_inflate_init(&stream);1130do {1131in = use_pack(p, w_curs, curpos, &stream.avail_in);1132stream.next_in = in;1133/*1134* Note: the window section returned by use_pack() must be
1135* available throughout git_inflate()'s unlocked execution. To
1136* ensure no other thread will modify the window in the
1137* meantime, we rely on the packed_window.inuse_cnt. This
1138* counter is incremented before window reading and checked
1139* before window disposal.
1140*
1141* Other worrying sections could be the call to close_pack_fd(),
1142* which can close packs even with in-use windows, and to
1143* reprepare_packed_git(). Regarding the former, mmap doc says:
1144* "closing the file descriptor does not unmap the region". And
1145* for the latter, it won't re-open already available packs.
1146*/
1147obj_read_unlock();1148st = git_inflate(&stream, Z_FINISH);1149obj_read_lock();1150curpos += stream.next_in - in;1151} while ((st == Z_OK || st == Z_BUF_ERROR) &&1152stream.total_out < sizeof(delta_head));1153git_inflate_end(&stream);1154if ((st != Z_STREAM_END) && stream.total_out != sizeof(delta_head)) {1155error("delta data unpack-initial failed");1156return 0;1157}1158
1159/* Examine the initial part of the delta to figure out1160* the result size.
1161*/
1162data = delta_head;1163
1164/* ignore base size */1165get_delta_hdr_size(&data, delta_head+sizeof(delta_head));1166
1167/* Read the result size */1168return get_delta_hdr_size(&data, delta_head+sizeof(delta_head));1169}
1170
1171int unpack_object_header(struct packed_git *p,1172struct pack_window **w_curs,1173off_t *curpos,1174unsigned long *sizep)1175{
1176unsigned char *base;1177unsigned long left;1178unsigned long used;1179enum object_type type;1180
1181/* use_pack() assures us we have [base, base + 20) available1182* as a range that we can look at. (Its actually the hash
1183* size that is assured.) With our object header encoding
1184* the maximum deflated object size is 2^137, which is just
1185* insane, so we know won't exceed what we have been given.
1186*/
1187base = use_pack(p, w_curs, *curpos, &left);1188used = unpack_object_header_buffer(base, left, &type, sizep);1189if (!used) {1190type = OBJ_BAD;1191} else1192*curpos += used;1193
1194return type;1195}
1196
1197void mark_bad_packed_object(struct packed_git *p, const struct object_id *oid)1198{
1199oidset_insert(&p->bad_objects, oid);1200}
1201
1202const struct packed_git *has_packed_and_bad(struct repository *r,1203const struct object_id *oid)1204{
1205struct packed_git *p;1206
1207for (p = r->objects->packed_git; p; p = p->next)1208if (oidset_contains(&p->bad_objects, oid))1209return p;1210return NULL;1211}
1212
1213off_t get_delta_base(struct packed_git *p,1214struct pack_window **w_curs,1215off_t *curpos,1216enum object_type type,1217off_t delta_obj_offset)1218{
1219unsigned char *base_info = use_pack(p, w_curs, *curpos, NULL);1220off_t base_offset;1221
1222/* use_pack() assured us we have [base_info, base_info + 20)1223* as a range that we can look at without walking off the
1224* end of the mapped window. Its actually the hash size
1225* that is assured. An OFS_DELTA longer than the hash size
1226* is stupid, as then a REF_DELTA would be smaller to store.
1227*/
1228if (type == OBJ_OFS_DELTA) {1229unsigned used = 0;1230unsigned char c = base_info[used++];1231base_offset = c & 127;1232while (c & 128) {1233base_offset += 1;1234if (!base_offset || MSB(base_offset, 7))1235return 0; /* overflow */1236c = base_info[used++];1237base_offset = (base_offset << 7) + (c & 127);1238}1239base_offset = delta_obj_offset - base_offset;1240if (base_offset <= 0 || base_offset >= delta_obj_offset)1241return 0; /* out of bound */1242*curpos += used;1243} else if (type == OBJ_REF_DELTA) {1244/* The base entry _must_ be in the same pack */1245base_offset = find_pack_entry_one(base_info, p);1246*curpos += the_hash_algo->rawsz;1247} else1248die("I am totally screwed");1249return base_offset;1250}
1251
1252/*
1253* Like get_delta_base above, but we return the sha1 instead of the pack
1254* offset. This means it is cheaper for REF deltas (we do not have to do
1255* the final object lookup), but more expensive for OFS deltas (we
1256* have to load the revidx to convert the offset back into a sha1).
1257*/
1258static int get_delta_base_oid(struct packed_git *p,1259struct pack_window **w_curs,1260off_t curpos,1261struct object_id *oid,1262enum object_type type,1263off_t delta_obj_offset)1264{
1265if (type == OBJ_REF_DELTA) {1266unsigned char *base = use_pack(p, w_curs, curpos, NULL);1267oidread(oid, base, the_repository->hash_algo);1268return 0;1269} else if (type == OBJ_OFS_DELTA) {1270uint32_t base_pos;1271off_t base_offset = get_delta_base(p, w_curs, &curpos,1272type, delta_obj_offset);1273
1274if (!base_offset)1275return -1;1276
1277if (offset_to_pack_pos(p, base_offset, &base_pos) < 0)1278return -1;1279
1280return nth_packed_object_id(oid, p,1281pack_pos_to_index(p, base_pos));1282} else1283return -1;1284}
1285
1286static int retry_bad_packed_offset(struct repository *r,1287struct packed_git *p,1288off_t obj_offset)1289{
1290int type;1291uint32_t pos;1292struct object_id oid;1293if (offset_to_pack_pos(p, obj_offset, &pos) < 0)1294return OBJ_BAD;1295nth_packed_object_id(&oid, p, pack_pos_to_index(p, pos));1296mark_bad_packed_object(p, &oid);1297type = oid_object_info(r, &oid, NULL);1298if (type <= OBJ_NONE)1299return OBJ_BAD;1300return type;1301}
1302
1303#define POI_STACK_PREALLOC 641304
1305static enum object_type packed_to_object_type(struct repository *r,1306struct packed_git *p,1307off_t obj_offset,1308enum object_type type,1309struct pack_window **w_curs,1310off_t curpos)1311{
1312off_t small_poi_stack[POI_STACK_PREALLOC];1313off_t *poi_stack = small_poi_stack;1314int poi_stack_nr = 0, poi_stack_alloc = POI_STACK_PREALLOC;1315
1316while (type == OBJ_OFS_DELTA || type == OBJ_REF_DELTA) {1317off_t base_offset;1318unsigned long size;1319/* Push the object we're going to leave behind */1320if (poi_stack_nr >= poi_stack_alloc && poi_stack == small_poi_stack) {1321poi_stack_alloc = alloc_nr(poi_stack_nr);1322ALLOC_ARRAY(poi_stack, poi_stack_alloc);1323COPY_ARRAY(poi_stack, small_poi_stack, poi_stack_nr);1324} else {1325ALLOC_GROW(poi_stack, poi_stack_nr+1, poi_stack_alloc);1326}1327poi_stack[poi_stack_nr++] = obj_offset;1328/* If parsing the base offset fails, just unwind */1329base_offset = get_delta_base(p, w_curs, &curpos, type, obj_offset);1330if (!base_offset)1331goto unwind;1332curpos = obj_offset = base_offset;1333type = unpack_object_header(p, w_curs, &curpos, &size);1334if (type <= OBJ_NONE) {1335/* If getting the base itself fails, we first1336* retry the base, otherwise unwind */
1337type = retry_bad_packed_offset(r, p, base_offset);1338if (type > OBJ_NONE)1339goto out;1340goto unwind;1341}1342}1343
1344switch (type) {1345case OBJ_BAD:1346case OBJ_COMMIT:1347case OBJ_TREE:1348case OBJ_BLOB:1349case OBJ_TAG:1350break;1351default:1352error("unknown object type %i at offset %"PRIuMAX" in %s",1353type, (uintmax_t)obj_offset, p->pack_name);1354type = OBJ_BAD;1355}1356
1357out:1358if (poi_stack != small_poi_stack)1359free(poi_stack);1360return type;1361
1362unwind:1363while (poi_stack_nr) {1364obj_offset = poi_stack[--poi_stack_nr];1365type = retry_bad_packed_offset(r, p, obj_offset);1366if (type > OBJ_NONE)1367goto out;1368}1369type = OBJ_BAD;1370goto out;1371}
1372
1373static struct hashmap delta_base_cache;1374static size_t delta_base_cached;1375
1376static LIST_HEAD(delta_base_cache_lru);1377
1378struct delta_base_cache_key {1379struct packed_git *p;1380off_t base_offset;1381};1382
1383struct delta_base_cache_entry {1384struct hashmap_entry ent;1385struct delta_base_cache_key key;1386struct list_head lru;1387void *data;1388unsigned long size;1389enum object_type type;1390};1391
1392static unsigned int pack_entry_hash(struct packed_git *p, off_t base_offset)1393{
1394unsigned int hash;1395
1396hash = (unsigned int)(intptr_t)p + (unsigned int)base_offset;1397hash += (hash >> 8) + (hash >> 16);1398return hash;1399}
1400
1401static struct delta_base_cache_entry *1402get_delta_base_cache_entry(struct packed_git *p, off_t base_offset)1403{
1404struct hashmap_entry entry, *e;1405struct delta_base_cache_key key;1406
1407if (!delta_base_cache.cmpfn)1408return NULL;1409
1410hashmap_entry_init(&entry, pack_entry_hash(p, base_offset));1411key.p = p;1412key.base_offset = base_offset;1413e = hashmap_get(&delta_base_cache, &entry, &key);1414return e ? container_of(e, struct delta_base_cache_entry, ent) : NULL;1415}
1416
1417static int delta_base_cache_key_eq(const struct delta_base_cache_key *a,1418const struct delta_base_cache_key *b)1419{
1420return a->p == b->p && a->base_offset == b->base_offset;1421}
1422
1423static int delta_base_cache_hash_cmp(const void *cmp_data UNUSED,1424const struct hashmap_entry *va,1425const struct hashmap_entry *vb,1426const void *vkey)1427{
1428const struct delta_base_cache_entry *a, *b;1429const struct delta_base_cache_key *key = vkey;1430
1431a = container_of(va, const struct delta_base_cache_entry, ent);1432b = container_of(vb, const struct delta_base_cache_entry, ent);1433
1434if (key)1435return !delta_base_cache_key_eq(&a->key, key);1436else1437return !delta_base_cache_key_eq(&a->key, &b->key);1438}
1439
1440static int in_delta_base_cache(struct packed_git *p, off_t base_offset)1441{
1442return !!get_delta_base_cache_entry(p, base_offset);1443}
1444
1445/*
1446* Remove the entry from the cache, but do _not_ free the associated
1447* entry data. The caller takes ownership of the "data" buffer, and
1448* should copy out any fields it wants before detaching.
1449*/
1450static void detach_delta_base_cache_entry(struct delta_base_cache_entry *ent)1451{
1452hashmap_remove(&delta_base_cache, &ent->ent, &ent->key);1453list_del(&ent->lru);1454delta_base_cached -= ent->size;1455free(ent);1456}
1457
1458static void *cache_or_unpack_entry(struct repository *r, struct packed_git *p,1459off_t base_offset, unsigned long *base_size,1460enum object_type *type)1461{
1462struct delta_base_cache_entry *ent;1463
1464ent = get_delta_base_cache_entry(p, base_offset);1465if (!ent)1466return unpack_entry(r, p, base_offset, type, base_size);1467
1468if (type)1469*type = ent->type;1470if (base_size)1471*base_size = ent->size;1472return xmemdupz(ent->data, ent->size);1473}
1474
1475static inline void release_delta_base_cache(struct delta_base_cache_entry *ent)1476{
1477free(ent->data);1478detach_delta_base_cache_entry(ent);1479}
1480
1481void clear_delta_base_cache(void)1482{
1483struct list_head *lru, *tmp;1484list_for_each_safe(lru, tmp, &delta_base_cache_lru) {1485struct delta_base_cache_entry *entry =1486list_entry(lru, struct delta_base_cache_entry, lru);1487release_delta_base_cache(entry);1488}1489}
1490
1491static void add_delta_base_cache(struct packed_git *p, off_t base_offset,1492void *base, unsigned long base_size, enum object_type type)1493{
1494struct delta_base_cache_entry *ent;1495struct list_head *lru, *tmp;1496
1497/*1498* Check required to avoid redundant entries when more than one thread
1499* is unpacking the same object, in unpack_entry() (since its phases I
1500* and III might run concurrently across multiple threads).
1501*/
1502if (in_delta_base_cache(p, base_offset)) {1503free(base);1504return;1505}1506
1507delta_base_cached += base_size;1508
1509list_for_each_safe(lru, tmp, &delta_base_cache_lru) {1510struct delta_base_cache_entry *f =1511list_entry(lru, struct delta_base_cache_entry, lru);1512if (delta_base_cached <= delta_base_cache_limit)1513break;1514release_delta_base_cache(f);1515}1516
1517ent = xmalloc(sizeof(*ent));1518ent->key.p = p;1519ent->key.base_offset = base_offset;1520ent->type = type;1521ent->data = base;1522ent->size = base_size;1523list_add_tail(&ent->lru, &delta_base_cache_lru);1524
1525if (!delta_base_cache.cmpfn)1526hashmap_init(&delta_base_cache, delta_base_cache_hash_cmp, NULL, 0);1527hashmap_entry_init(&ent->ent, pack_entry_hash(p, base_offset));1528hashmap_add(&delta_base_cache, &ent->ent);1529}
1530
1531int packed_object_info(struct repository *r, struct packed_git *p,1532off_t obj_offset, struct object_info *oi)1533{
1534struct pack_window *w_curs = NULL;1535unsigned long size;1536off_t curpos = obj_offset;1537enum object_type type;1538
1539/*1540* We always get the representation type, but only convert it to
1541* a "real" type later if the caller is interested.
1542*/
1543if (oi->contentp) {1544*oi->contentp = cache_or_unpack_entry(r, p, obj_offset, oi->sizep,1545&type);1546if (!*oi->contentp)1547type = OBJ_BAD;1548} else {1549type = unpack_object_header(p, &w_curs, &curpos, &size);1550}1551
1552if (!oi->contentp && oi->sizep) {1553if (type == OBJ_OFS_DELTA || type == OBJ_REF_DELTA) {1554off_t tmp_pos = curpos;1555off_t base_offset = get_delta_base(p, &w_curs, &tmp_pos,1556type, obj_offset);1557if (!base_offset) {1558type = OBJ_BAD;1559goto out;1560}1561*oi->sizep = get_size_from_delta(p, &w_curs, tmp_pos);1562if (*oi->sizep == 0) {1563type = OBJ_BAD;1564goto out;1565}1566} else {1567*oi->sizep = size;1568}1569}1570
1571if (oi->disk_sizep) {1572uint32_t pos;1573if (offset_to_pack_pos(p, obj_offset, &pos) < 0) {1574error("could not find object at offset %"PRIuMAX" "1575"in pack %s", (uintmax_t)obj_offset, p->pack_name);1576type = OBJ_BAD;1577goto out;1578}1579
1580*oi->disk_sizep = pack_pos_to_offset(p, pos + 1) - obj_offset;1581}1582
1583if (oi->typep || oi->type_name) {1584enum object_type ptot;1585ptot = packed_to_object_type(r, p, obj_offset,1586type, &w_curs, curpos);1587if (oi->typep)1588*oi->typep = ptot;1589if (oi->type_name) {1590const char *tn = type_name(ptot);1591if (tn)1592strbuf_addstr(oi->type_name, tn);1593}1594if (ptot < 0) {1595type = OBJ_BAD;1596goto out;1597}1598}1599
1600if (oi->delta_base_oid) {1601if (type == OBJ_OFS_DELTA || type == OBJ_REF_DELTA) {1602if (get_delta_base_oid(p, &w_curs, curpos,1603oi->delta_base_oid,1604type, obj_offset) < 0) {1605type = OBJ_BAD;1606goto out;1607}1608} else1609oidclr(oi->delta_base_oid, the_repository->hash_algo);1610}1611
1612oi->whence = in_delta_base_cache(p, obj_offset) ? OI_DBCACHED :1613OI_PACKED;1614
1615out:1616unuse_pack(&w_curs);1617return type;1618}
1619
1620static void *unpack_compressed_entry(struct packed_git *p,1621struct pack_window **w_curs,1622off_t curpos,1623unsigned long size)1624{
1625int st;1626git_zstream stream;1627unsigned char *buffer, *in;1628
1629buffer = xmallocz_gently(size);1630if (!buffer)1631return NULL;1632memset(&stream, 0, sizeof(stream));1633stream.next_out = buffer;1634stream.avail_out = size + 1;1635
1636git_inflate_init(&stream);1637do {1638in = use_pack(p, w_curs, curpos, &stream.avail_in);1639stream.next_in = in;1640/*1641* Note: we must ensure the window section returned by
1642* use_pack() will be available throughout git_inflate()'s
1643* unlocked execution. Please refer to the comment at
1644* get_size_from_delta() to see how this is done.
1645*/
1646obj_read_unlock();1647st = git_inflate(&stream, Z_FINISH);1648obj_read_lock();1649if (!stream.avail_out)1650break; /* the payload is larger than it should be */1651curpos += stream.next_in - in;1652} while (st == Z_OK || st == Z_BUF_ERROR);1653git_inflate_end(&stream);1654if ((st != Z_STREAM_END) || stream.total_out != size) {1655free(buffer);1656return NULL;1657}1658
1659/* versions of zlib can clobber unconsumed portion of outbuf */1660buffer[size] = '\0';1661
1662return buffer;1663}
1664
1665static void write_pack_access_log(struct packed_git *p, off_t obj_offset)1666{
1667static struct trace_key pack_access = TRACE_KEY_INIT(PACK_ACCESS);1668trace_printf_key(&pack_access, "%s %"PRIuMAX"\n",1669p->pack_name, (uintmax_t)obj_offset);1670}
1671
1672int do_check_packed_object_crc;1673
1674#define UNPACK_ENTRY_STACK_PREALLOC 641675struct unpack_entry_stack_ent {1676off_t obj_offset;1677off_t curpos;1678unsigned long size;1679};1680
1681void *unpack_entry(struct repository *r, struct packed_git *p, off_t obj_offset,1682enum object_type *final_type, unsigned long *final_size)1683{
1684struct pack_window *w_curs = NULL;1685off_t curpos = obj_offset;1686void *data = NULL;1687unsigned long size;1688enum object_type type;1689struct unpack_entry_stack_ent small_delta_stack[UNPACK_ENTRY_STACK_PREALLOC];1690struct unpack_entry_stack_ent *delta_stack = small_delta_stack;1691int delta_stack_nr = 0, delta_stack_alloc = UNPACK_ENTRY_STACK_PREALLOC;1692int base_from_cache = 0;1693
1694write_pack_access_log(p, obj_offset);1695
1696/* PHASE 1: drill down to the innermost base object */1697for (;;) {1698off_t base_offset;1699int i;1700struct delta_base_cache_entry *ent;1701
1702ent = get_delta_base_cache_entry(p, curpos);1703if (ent) {1704type = ent->type;1705data = ent->data;1706size = ent->size;1707detach_delta_base_cache_entry(ent);1708base_from_cache = 1;1709break;1710}1711
1712if (do_check_packed_object_crc && p->index_version > 1) {1713uint32_t pack_pos, index_pos;1714off_t len;1715
1716if (offset_to_pack_pos(p, obj_offset, &pack_pos) < 0) {1717error("could not find object at offset %"PRIuMAX" in pack %s",1718(uintmax_t)obj_offset, p->pack_name);1719data = NULL;1720goto out;1721}1722
1723len = pack_pos_to_offset(p, pack_pos + 1) - obj_offset;1724index_pos = pack_pos_to_index(p, pack_pos);1725if (check_pack_crc(p, &w_curs, obj_offset, len, index_pos)) {1726struct object_id oid;1727nth_packed_object_id(&oid, p, index_pos);1728error("bad packed object CRC for %s",1729oid_to_hex(&oid));1730mark_bad_packed_object(p, &oid);1731data = NULL;1732goto out;1733}1734}1735
1736type = unpack_object_header(p, &w_curs, &curpos, &size);1737if (type != OBJ_OFS_DELTA && type != OBJ_REF_DELTA)1738break;1739
1740base_offset = get_delta_base(p, &w_curs, &curpos, type, obj_offset);1741if (!base_offset) {1742error("failed to validate delta base reference "1743"at offset %"PRIuMAX" from %s",1744(uintmax_t)curpos, p->pack_name);1745/* bail to phase 2, in hopes of recovery */1746data = NULL;1747break;1748}1749
1750/* push object, proceed to base */1751if (delta_stack_nr >= delta_stack_alloc1752&& delta_stack == small_delta_stack) {1753delta_stack_alloc = alloc_nr(delta_stack_nr);1754ALLOC_ARRAY(delta_stack, delta_stack_alloc);1755COPY_ARRAY(delta_stack, small_delta_stack,1756delta_stack_nr);1757} else {1758ALLOC_GROW(delta_stack, delta_stack_nr+1, delta_stack_alloc);1759}1760i = delta_stack_nr++;1761delta_stack[i].obj_offset = obj_offset;1762delta_stack[i].curpos = curpos;1763delta_stack[i].size = size;1764
1765curpos = obj_offset = base_offset;1766}1767
1768/* PHASE 2: handle the base */1769switch (type) {1770case OBJ_OFS_DELTA:1771case OBJ_REF_DELTA:1772if (data)1773BUG("unpack_entry: left loop at a valid delta");1774break;1775case OBJ_COMMIT:1776case OBJ_TREE:1777case OBJ_BLOB:1778case OBJ_TAG:1779if (!base_from_cache)1780data = unpack_compressed_entry(p, &w_curs, curpos, size);1781break;1782default:1783data = NULL;1784error("unknown object type %i at offset %"PRIuMAX" in %s",1785type, (uintmax_t)obj_offset, p->pack_name);1786}1787
1788/* PHASE 3: apply deltas in order */1789
1790/* invariants:1791* 'data' holds the base data, or NULL if there was corruption
1792*/
1793while (delta_stack_nr) {1794void *delta_data;1795void *base = data;1796void *external_base = NULL;1797unsigned long delta_size, base_size = size;1798int i;1799off_t base_obj_offset = obj_offset;1800
1801data = NULL;1802
1803if (!base) {1804/*1805* We're probably in deep shit, but let's try to fetch
1806* the required base anyway from another pack or loose.
1807* This is costly but should happen only in the presence
1808* of a corrupted pack, and is better than failing outright.
1809*/
1810uint32_t pos;1811struct object_id base_oid;1812if (!(offset_to_pack_pos(p, obj_offset, &pos))) {1813struct object_info oi = OBJECT_INFO_INIT;1814
1815nth_packed_object_id(&base_oid, p,1816pack_pos_to_index(p, pos));1817error("failed to read delta base object %s"1818" at offset %"PRIuMAX" from %s",1819oid_to_hex(&base_oid), (uintmax_t)obj_offset,1820p->pack_name);1821mark_bad_packed_object(p, &base_oid);1822
1823oi.typep = &type;1824oi.sizep = &base_size;1825oi.contentp = &base;1826if (oid_object_info_extended(r, &base_oid, &oi, 0) < 0)1827base = NULL;1828
1829external_base = base;1830}1831}1832
1833i = --delta_stack_nr;1834obj_offset = delta_stack[i].obj_offset;1835curpos = delta_stack[i].curpos;1836delta_size = delta_stack[i].size;1837
1838if (!base)1839continue;1840
1841delta_data = unpack_compressed_entry(p, &w_curs, curpos, delta_size);1842
1843if (!delta_data) {1844error("failed to unpack compressed delta "1845"at offset %"PRIuMAX" from %s",1846(uintmax_t)curpos, p->pack_name);1847data = NULL;1848} else {1849data = patch_delta(base, base_size, delta_data,1850delta_size, &size);1851
1852/*1853* We could not apply the delta; warn the user, but
1854* keep going. Our failure will be noticed either in
1855* the next iteration of the loop, or if this is the
1856* final delta, in the caller when we return NULL.
1857* Those code paths will take care of making a more
1858* explicit warning and retrying with another copy of
1859* the object.
1860*/
1861if (!data)1862error("failed to apply delta");1863}1864
1865/*1866* We delay adding `base` to the cache until the end of the loop
1867* because unpack_compressed_entry() momentarily releases the
1868* obj_read_mutex, giving another thread the chance to access
1869* the cache. Therefore, if `base` was already there, this other
1870* thread could free() it (e.g. to make space for another entry)
1871* before we are done using it.
1872*/
1873if (!external_base)1874add_delta_base_cache(p, base_obj_offset, base, base_size, type);1875
1876free(delta_data);1877free(external_base);1878}1879
1880if (final_type)1881*final_type = type;1882if (final_size)1883*final_size = size;1884
1885out:1886unuse_pack(&w_curs);1887
1888if (delta_stack != small_delta_stack)1889free(delta_stack);1890
1891return data;1892}
1893
1894int bsearch_pack(const struct object_id *oid, const struct packed_git *p, uint32_t *result)1895{
1896const unsigned char *index_fanout = p->index_data;1897const unsigned char *index_lookup;1898const unsigned int hashsz = the_hash_algo->rawsz;1899int index_lookup_width;1900
1901if (!index_fanout)1902BUG("bsearch_pack called without a valid pack-index");1903
1904index_lookup = index_fanout + 4 * 256;1905if (p->index_version == 1) {1906index_lookup_width = hashsz + 4;1907index_lookup += 4;1908} else {1909index_lookup_width = hashsz;1910index_fanout += 8;1911index_lookup += 8;1912}1913
1914return bsearch_hash(oid->hash, (const uint32_t*)index_fanout,1915index_lookup, index_lookup_width, result);1916}
1917
1918int nth_packed_object_id(struct object_id *oid,1919struct packed_git *p,1920uint32_t n)1921{
1922const unsigned char *index = p->index_data;1923const unsigned int hashsz = the_hash_algo->rawsz;1924if (!index) {1925if (open_pack_index(p))1926return -1;1927index = p->index_data;1928}1929if (n >= p->num_objects)1930return -1;1931index += 4 * 256;1932if (p->index_version == 1) {1933oidread(oid, index + st_add(st_mult(hashsz + 4, n), 4),1934the_repository->hash_algo);1935} else {1936index += 8;1937oidread(oid, index + st_mult(hashsz, n),1938the_repository->hash_algo);1939}1940return 0;1941}
1942
1943void check_pack_index_ptr(const struct packed_git *p, const void *vptr)1944{
1945const unsigned char *ptr = vptr;1946const unsigned char *start = p->index_data;1947const unsigned char *end = start + p->index_size;1948if (ptr < start)1949die(_("offset before start of pack index for %s (corrupt index?)"),1950p->pack_name);1951/* No need to check for underflow; .idx files must be at least 8 bytes */1952if (ptr >= end - 8)1953die(_("offset beyond end of pack index for %s (truncated index?)"),1954p->pack_name);1955}
1956
1957off_t nth_packed_object_offset(const struct packed_git *p, uint32_t n)1958{
1959const unsigned char *index = p->index_data;1960const unsigned int hashsz = the_hash_algo->rawsz;1961index += 4 * 256;1962if (p->index_version == 1) {1963return ntohl(*((uint32_t *)(index + st_mult(hashsz + 4, n))));1964} else {1965uint32_t off;1966index += st_add(8, st_mult(p->num_objects, hashsz + 4));1967off = ntohl(*((uint32_t *)(index + st_mult(4, n))));1968if (!(off & 0x80000000))1969return off;1970index += st_add(st_mult(p->num_objects, 4),1971st_mult(off & 0x7fffffff, 8));1972check_pack_index_ptr(p, index);1973return get_be64(index);1974}1975}
1976
1977off_t find_pack_entry_one(const unsigned char *sha1,1978struct packed_git *p)1979{
1980const unsigned char *index = p->index_data;1981struct object_id oid;1982uint32_t result;1983
1984if (!index) {1985if (open_pack_index(p))1986return 0;1987}1988
1989hashcpy(oid.hash, sha1, the_repository->hash_algo);1990if (bsearch_pack(&oid, p, &result))1991return nth_packed_object_offset(p, result);1992return 0;1993}
1994
1995int is_pack_valid(struct packed_git *p)1996{
1997/* An already open pack is known to be valid. */1998if (p->pack_fd != -1)1999return 1;2000
2001/* If the pack has one window completely covering the2002* file size, the pack is known to be valid even if
2003* the descriptor is not currently open.
2004*/
2005if (p->windows) {2006struct pack_window *w = p->windows;2007
2008if (!w->offset && w->len == p->pack_size)2009return 1;2010}2011
2012/* Force the pack to open to prove its valid. */2013return !open_packed_git(p);2014}
2015
2016struct packed_git *find_sha1_pack(const unsigned char *sha1,2017struct packed_git *packs)2018{
2019struct packed_git *p;2020
2021for (p = packs; p; p = p->next) {2022if (find_pack_entry_one(sha1, p))2023return p;2024}2025return NULL;2026
2027}
2028
2029static int fill_pack_entry(const struct object_id *oid,2030struct pack_entry *e,2031struct packed_git *p)2032{
2033off_t offset;2034
2035if (oidset_size(&p->bad_objects) &&2036oidset_contains(&p->bad_objects, oid))2037return 0;2038
2039offset = find_pack_entry_one(oid->hash, p);2040if (!offset)2041return 0;2042
2043/*2044* We are about to tell the caller where they can locate the
2045* requested object. We better make sure the packfile is
2046* still here and can be accessed before supplying that
2047* answer, as it may have been deleted since the index was
2048* loaded!
2049*/
2050if (!is_pack_valid(p))2051return 0;2052e->offset = offset;2053e->p = p;2054return 1;2055}
2056
2057int find_pack_entry(struct repository *r, const struct object_id *oid, struct pack_entry *e)2058{
2059struct list_head *pos;2060struct multi_pack_index *m;2061
2062prepare_packed_git(r);2063if (!r->objects->packed_git && !r->objects->multi_pack_index)2064return 0;2065
2066for (m = r->objects->multi_pack_index; m; m = m->next) {2067if (fill_midx_entry(r, oid, e, m))2068return 1;2069}2070
2071list_for_each(pos, &r->objects->packed_git_mru) {2072struct packed_git *p = list_entry(pos, struct packed_git, mru);2073if (!p->multi_pack_index && fill_pack_entry(oid, e, p)) {2074list_move(&p->mru, &r->objects->packed_git_mru);2075return 1;2076}2077}2078return 0;2079}
2080
2081static void maybe_invalidate_kept_pack_cache(struct repository *r,2082unsigned flags)2083{
2084if (!r->objects->kept_pack_cache.packs)2085return;2086if (r->objects->kept_pack_cache.flags == flags)2087return;2088FREE_AND_NULL(r->objects->kept_pack_cache.packs);2089r->objects->kept_pack_cache.flags = 0;2090}
2091
2092static struct packed_git **kept_pack_cache(struct repository *r, unsigned flags)2093{
2094maybe_invalidate_kept_pack_cache(r, flags);2095
2096if (!r->objects->kept_pack_cache.packs) {2097struct packed_git **packs = NULL;2098size_t nr = 0, alloc = 0;2099struct packed_git *p;2100
2101/*2102* We want "all" packs here, because we need to cover ones that
2103* are used by a midx, as well. We need to look in every one of
2104* them (instead of the midx itself) to cover duplicates. It's
2105* possible that an object is found in two packs that the midx
2106* covers, one kept and one not kept, but the midx returns only
2107* the non-kept version.
2108*/
2109for (p = get_all_packs(r); p; p = p->next) {2110if ((p->pack_keep && (flags & ON_DISK_KEEP_PACKS)) ||2111(p->pack_keep_in_core && (flags & IN_CORE_KEEP_PACKS))) {2112ALLOC_GROW(packs, nr + 1, alloc);2113packs[nr++] = p;2114}2115}2116ALLOC_GROW(packs, nr + 1, alloc);2117packs[nr] = NULL;2118
2119r->objects->kept_pack_cache.packs = packs;2120r->objects->kept_pack_cache.flags = flags;2121}2122
2123return r->objects->kept_pack_cache.packs;2124}
2125
2126int find_kept_pack_entry(struct repository *r,2127const struct object_id *oid,2128unsigned flags,2129struct pack_entry *e)2130{
2131struct packed_git **cache;2132
2133for (cache = kept_pack_cache(r, flags); *cache; cache++) {2134struct packed_git *p = *cache;2135if (fill_pack_entry(oid, e, p))2136return 1;2137}2138
2139return 0;2140}
2141
2142int has_object_pack(const struct object_id *oid)2143{
2144struct pack_entry e;2145return find_pack_entry(the_repository, oid, &e);2146}
2147
2148int has_object_kept_pack(const struct object_id *oid, unsigned flags)2149{
2150struct pack_entry e;2151return find_kept_pack_entry(the_repository, oid, flags, &e);2152}
2153
2154int has_pack_index(const unsigned char *sha1)2155{
2156struct stat st;2157if (stat(sha1_pack_index_name(sha1), &st))2158return 0;2159return 1;2160}
2161
2162int for_each_object_in_pack(struct packed_git *p,2163each_packed_object_fn cb, void *data,2164enum for_each_object_flags flags)2165{
2166uint32_t i;2167int r = 0;2168
2169if (flags & FOR_EACH_OBJECT_PACK_ORDER) {2170if (load_pack_revindex(the_repository, p))2171return -1;2172}2173
2174for (i = 0; i < p->num_objects; i++) {2175uint32_t index_pos;2176struct object_id oid;2177
2178/*2179* We are iterating "i" from 0 up to num_objects, but its
2180* meaning may be different, depending on the requested output
2181* order:
2182*
2183* - in object-name order, it is the same as the index order
2184* used by nth_packed_object_id(), so we can pass it
2185* directly
2186*
2187* - in pack-order, it is pack position, which we must
2188* convert to an index position in order to get the oid.
2189*/
2190if (flags & FOR_EACH_OBJECT_PACK_ORDER)2191index_pos = pack_pos_to_index(p, i);2192else2193index_pos = i;2194
2195if (nth_packed_object_id(&oid, p, index_pos) < 0)2196return error("unable to get sha1 of object %u in %s",2197index_pos, p->pack_name);2198
2199r = cb(&oid, p, index_pos, data);2200if (r)2201break;2202}2203return r;2204}
2205
2206int for_each_packed_object(each_packed_object_fn cb, void *data,2207enum for_each_object_flags flags)2208{
2209struct packed_git *p;2210int r = 0;2211int pack_errors = 0;2212
2213prepare_packed_git(the_repository);2214for (p = get_all_packs(the_repository); p; p = p->next) {2215if ((flags & FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local)2216continue;2217if ((flags & FOR_EACH_OBJECT_PROMISOR_ONLY) &&2218!p->pack_promisor)2219continue;2220if ((flags & FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS) &&2221p->pack_keep_in_core)2222continue;2223if ((flags & FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS) &&2224p->pack_keep)2225continue;2226if (open_pack_index(p)) {2227pack_errors = 1;2228continue;2229}2230r = for_each_object_in_pack(p, cb, data, flags);2231if (r)2232break;2233}2234return r ? r : pack_errors;2235}
2236
2237static int add_promisor_object(const struct object_id *oid,2238struct packed_git *pack UNUSED,2239uint32_t pos UNUSED,2240void *set_)2241{
2242struct oidset *set = set_;2243struct object *obj;2244int we_parsed_object;2245
2246obj = lookup_object(the_repository, oid);2247if (obj && obj->parsed) {2248we_parsed_object = 0;2249} else {2250we_parsed_object = 1;2251obj = parse_object(the_repository, oid);2252}2253
2254if (!obj)2255return 1;2256
2257oidset_insert(set, oid);2258
2259/*2260* If this is a tree, commit, or tag, the objects it refers
2261* to are also promisor objects. (Blobs refer to no objects->)
2262*/
2263if (obj->type == OBJ_TREE) {2264struct tree *tree = (struct tree *)obj;2265struct tree_desc desc;2266struct name_entry entry;2267if (init_tree_desc_gently(&desc, &tree->object.oid,2268tree->buffer, tree->size, 0))2269/*2270* Error messages are given when packs are
2271* verified, so do not print any here.
2272*/
2273return 0;2274while (tree_entry_gently(&desc, &entry))2275oidset_insert(set, &entry.oid);2276if (we_parsed_object)2277free_tree_buffer(tree);2278} else if (obj->type == OBJ_COMMIT) {2279struct commit *commit = (struct commit *) obj;2280struct commit_list *parents = commit->parents;2281
2282oidset_insert(set, get_commit_tree_oid(commit));2283for (; parents; parents = parents->next)2284oidset_insert(set, &parents->item->object.oid);2285} else if (obj->type == OBJ_TAG) {2286struct tag *tag = (struct tag *) obj;2287oidset_insert(set, get_tagged_oid(tag));2288}2289return 0;2290}
2291
2292int is_promisor_object(const struct object_id *oid)2293{
2294static struct oidset promisor_objects;2295static int promisor_objects_prepared;2296
2297if (!promisor_objects_prepared) {2298if (repo_has_promisor_remote(the_repository)) {2299for_each_packed_object(add_promisor_object,2300&promisor_objects,2301FOR_EACH_OBJECT_PROMISOR_ONLY |2302FOR_EACH_OBJECT_PACK_ORDER);2303}2304promisor_objects_prepared = 1;2305}2306return oidset_contains(&promisor_objects, oid);2307}
2308