git
/
bulk-checkin.c
389 строк · 10.4 Кб
1/*
2* Copyright (c) 2011, Google Inc.
3*/
4
5#define USE_THE_REPOSITORY_VARIABLE6
7#include "git-compat-util.h"8#include "bulk-checkin.h"9#include "environment.h"10#include "gettext.h"11#include "hex.h"12#include "lockfile.h"13#include "repository.h"14#include "csum-file.h"15#include "pack.h"16#include "strbuf.h"17#include "tmp-objdir.h"18#include "packfile.h"19#include "object-file.h"20#include "object-store-ll.h"21
22static int odb_transaction_nesting;23
24static struct tmp_objdir *bulk_fsync_objdir;25
26static struct bulk_checkin_packfile {27char *pack_tmp_name;28struct hashfile *f;29off_t offset;30struct pack_idx_option pack_idx_opts;31
32struct pack_idx_entry **written;33uint32_t alloc_written;34uint32_t nr_written;35} bulk_checkin_packfile;36
37static void finish_tmp_packfile(struct strbuf *basename,38const char *pack_tmp_name,39struct pack_idx_entry **written_list,40uint32_t nr_written,41struct pack_idx_option *pack_idx_opts,42unsigned char hash[])43{
44char *idx_tmp_name = NULL;45
46stage_tmp_packfiles(basename, pack_tmp_name, written_list, nr_written,47NULL, pack_idx_opts, hash, &idx_tmp_name);48rename_tmp_packfile_idx(basename, &idx_tmp_name);49
50free(idx_tmp_name);51}
52
53static void flush_bulk_checkin_packfile(struct bulk_checkin_packfile *state)54{
55unsigned char hash[GIT_MAX_RAWSZ];56struct strbuf packname = STRBUF_INIT;57int i;58
59if (!state->f)60return;61
62if (state->nr_written == 0) {63close(state->f->fd);64free_hashfile(state->f);65unlink(state->pack_tmp_name);66goto clear_exit;67} else if (state->nr_written == 1) {68finalize_hashfile(state->f, hash, FSYNC_COMPONENT_PACK,69CSUM_HASH_IN_STREAM | CSUM_FSYNC | CSUM_CLOSE);70} else {71int fd = finalize_hashfile(state->f, hash, FSYNC_COMPONENT_PACK, 0);72fixup_pack_header_footer(fd, hash, state->pack_tmp_name,73state->nr_written, hash,74state->offset);75close(fd);76}77
78strbuf_addf(&packname, "%s/pack/pack-%s.", get_object_directory(),79hash_to_hex(hash));80finish_tmp_packfile(&packname, state->pack_tmp_name,81state->written, state->nr_written,82&state->pack_idx_opts, hash);83for (i = 0; i < state->nr_written; i++)84free(state->written[i]);85
86clear_exit:87free(state->pack_tmp_name);88free(state->written);89memset(state, 0, sizeof(*state));90
91strbuf_release(&packname);92/* Make objects we just wrote available to ourselves */93reprepare_packed_git(the_repository);94}
95
96/*
97* Cleanup after batch-mode fsync_object_files.
98*/
99static void flush_batch_fsync(void)100{
101struct strbuf temp_path = STRBUF_INIT;102struct tempfile *temp;103
104if (!bulk_fsync_objdir)105return;106
107/*108* Issue a full hardware flush against a temporary file to ensure
109* that all objects are durable before any renames occur. The code in
110* fsync_loose_object_bulk_checkin has already issued a writeout
111* request, but it has not flushed any writeback cache in the storage
112* hardware or any filesystem logs. This fsync call acts as a barrier
113* to ensure that the data in each new object file is durable before
114* the final name is visible.
115*/
116strbuf_addf(&temp_path, "%s/bulk_fsync_XXXXXX", get_object_directory());117temp = xmks_tempfile(temp_path.buf);118fsync_or_die(get_tempfile_fd(temp), get_tempfile_path(temp));119delete_tempfile(&temp);120strbuf_release(&temp_path);121
122/*123* Make the object files visible in the primary ODB after their data is
124* fully durable.
125*/
126tmp_objdir_migrate(bulk_fsync_objdir);127bulk_fsync_objdir = NULL;128}
129
130static int already_written(struct bulk_checkin_packfile *state, struct object_id *oid)131{
132int i;133
134/* The object may already exist in the repository */135if (repo_has_object_file(the_repository, oid))136return 1;137
138/* Might want to keep the list sorted */139for (i = 0; i < state->nr_written; i++)140if (oideq(&state->written[i]->oid, oid))141return 1;142
143/* This is a new object we need to keep */144return 0;145}
146
147/*
148* Read the contents from fd for size bytes, streaming it to the
149* packfile in state while updating the hash in ctx. Signal a failure
150* by returning a negative value when the resulting pack would exceed
151* the pack size limit and this is not the first object in the pack,
152* so that the caller can discard what we wrote from the current pack
153* by truncating it and opening a new one. The caller will then call
154* us again after rewinding the input fd.
155*
156* The already_hashed_to pointer is kept untouched by the caller to
157* make sure we do not hash the same byte when we are called
158* again. This way, the caller does not have to checkpoint its hash
159* status before calling us just in case we ask it to call us again
160* with a new pack.
161*/
162static int stream_blob_to_pack(struct bulk_checkin_packfile *state,163git_hash_ctx *ctx, off_t *already_hashed_to,164int fd, size_t size, const char *path,165unsigned flags)166{
167git_zstream s;168unsigned char ibuf[16384];169unsigned char obuf[16384];170unsigned hdrlen;171int status = Z_OK;172int write_object = (flags & HASH_WRITE_OBJECT);173off_t offset = 0;174
175git_deflate_init(&s, pack_compression_level);176
177hdrlen = encode_in_pack_object_header(obuf, sizeof(obuf), OBJ_BLOB, size);178s.next_out = obuf + hdrlen;179s.avail_out = sizeof(obuf) - hdrlen;180
181while (status != Z_STREAM_END) {182if (size && !s.avail_in) {183ssize_t rsize = size < sizeof(ibuf) ? size : sizeof(ibuf);184ssize_t read_result = read_in_full(fd, ibuf, rsize);185if (read_result < 0)186die_errno("failed to read from '%s'", path);187if (read_result != rsize)188die("failed to read %d bytes from '%s'",189(int)rsize, path);190offset += rsize;191if (*already_hashed_to < offset) {192size_t hsize = offset - *already_hashed_to;193if (rsize < hsize)194hsize = rsize;195if (hsize)196the_hash_algo->update_fn(ctx, ibuf, hsize);197*already_hashed_to = offset;198}199s.next_in = ibuf;200s.avail_in = rsize;201size -= rsize;202}203
204status = git_deflate(&s, size ? 0 : Z_FINISH);205
206if (!s.avail_out || status == Z_STREAM_END) {207if (write_object) {208size_t written = s.next_out - obuf;209
210/* would we bust the size limit? */211if (state->nr_written &&212pack_size_limit_cfg &&213pack_size_limit_cfg < state->offset + written) {214git_deflate_abort(&s);215return -1;216}217
218hashwrite(state->f, obuf, written);219state->offset += written;220}221s.next_out = obuf;222s.avail_out = sizeof(obuf);223}224
225switch (status) {226case Z_OK:227case Z_BUF_ERROR:228case Z_STREAM_END:229continue;230default:231die("unexpected deflate failure: %d", status);232}233}234git_deflate_end(&s);235return 0;236}
237
238/* Lazily create backing packfile for the state */
239static void prepare_to_stream(struct bulk_checkin_packfile *state,240unsigned flags)241{
242if (!(flags & HASH_WRITE_OBJECT) || state->f)243return;244
245state->f = create_tmp_packfile(&state->pack_tmp_name);246reset_pack_idx_option(&state->pack_idx_opts);247
248/* Pretend we are going to write only one object */249state->offset = write_pack_header(state->f, 1);250if (!state->offset)251die_errno("unable to write pack header");252}
253
254static int deflate_blob_to_pack(struct bulk_checkin_packfile *state,255struct object_id *result_oid,256int fd, size_t size,257const char *path, unsigned flags)258{
259off_t seekback, already_hashed_to;260git_hash_ctx ctx;261unsigned char obuf[16384];262unsigned header_len;263struct hashfile_checkpoint checkpoint = {0};264struct pack_idx_entry *idx = NULL;265
266seekback = lseek(fd, 0, SEEK_CUR);267if (seekback == (off_t) -1)268return error("cannot find the current offset");269
270header_len = format_object_header((char *)obuf, sizeof(obuf),271OBJ_BLOB, size);272the_hash_algo->init_fn(&ctx);273the_hash_algo->update_fn(&ctx, obuf, header_len);274the_hash_algo->init_fn(&checkpoint.ctx);275
276/* Note: idx is non-NULL when we are writing */277if ((flags & HASH_WRITE_OBJECT) != 0)278CALLOC_ARRAY(idx, 1);279
280already_hashed_to = 0;281
282while (1) {283prepare_to_stream(state, flags);284if (idx) {285hashfile_checkpoint(state->f, &checkpoint);286idx->offset = state->offset;287crc32_begin(state->f);288}289if (!stream_blob_to_pack(state, &ctx, &already_hashed_to,290fd, size, path, flags))291break;292/*293* Writing this object to the current pack will make
294* it too big; we need to truncate it, start a new
295* pack, and write into it.
296*/
297if (!idx)298BUG("should not happen");299hashfile_truncate(state->f, &checkpoint);300state->offset = checkpoint.offset;301flush_bulk_checkin_packfile(state);302if (lseek(fd, seekback, SEEK_SET) == (off_t) -1)303return error("cannot seek back");304}305the_hash_algo->final_oid_fn(result_oid, &ctx);306if (!idx)307return 0;308
309idx->crc32 = crc32_end(state->f);310if (already_written(state, result_oid)) {311hashfile_truncate(state->f, &checkpoint);312state->offset = checkpoint.offset;313free(idx);314} else {315oidcpy(&idx->oid, result_oid);316ALLOC_GROW(state->written,317state->nr_written + 1,318state->alloc_written);319state->written[state->nr_written++] = idx;320}321return 0;322}
323
324void prepare_loose_object_bulk_checkin(void)325{
326/*327* We lazily create the temporary object directory
328* the first time an object might be added, since
329* callers may not know whether any objects will be
330* added at the time they call begin_odb_transaction.
331*/
332if (!odb_transaction_nesting || bulk_fsync_objdir)333return;334
335bulk_fsync_objdir = tmp_objdir_create("bulk-fsync");336if (bulk_fsync_objdir)337tmp_objdir_replace_primary_odb(bulk_fsync_objdir, 0);338}
339
340void fsync_loose_object_bulk_checkin(int fd, const char *filename)341{
342/*343* If we have an active ODB transaction, we issue a call that
344* cleans the filesystem page cache but avoids a hardware flush
345* command. Later on we will issue a single hardware flush
346* before renaming the objects to their final names as part of
347* flush_batch_fsync.
348*/
349if (!bulk_fsync_objdir ||350git_fsync(fd, FSYNC_WRITEOUT_ONLY) < 0) {351if (errno == ENOSYS)352warning(_("core.fsyncMethod = batch is unsupported on this platform"));353fsync_or_die(fd, filename);354}355}
356
357int index_blob_bulk_checkin(struct object_id *oid,358int fd, size_t size,359const char *path, unsigned flags)360{
361int status = deflate_blob_to_pack(&bulk_checkin_packfile, oid, fd, size,362path, flags);363if (!odb_transaction_nesting)364flush_bulk_checkin_packfile(&bulk_checkin_packfile);365return status;366}
367
368void begin_odb_transaction(void)369{
370odb_transaction_nesting += 1;371}
372
373void flush_odb_transaction(void)374{
375flush_batch_fsync();376flush_bulk_checkin_packfile(&bulk_checkin_packfile);377}
378
379void end_odb_transaction(void)380{
381odb_transaction_nesting -= 1;382if (odb_transaction_nesting < 0)383BUG("Unbalanced ODB transaction nesting");384
385if (odb_transaction_nesting)386return;387
388flush_odb_transaction();389}
390