git
/
fsck.c
1391 строка · 36.6 Кб
1#define USE_THE_REPOSITORY_VARIABLE2
3#include "git-compat-util.h"4#include "date.h"5#include "dir.h"6#include "hex.h"7#include "object-store-ll.h"8#include "path.h"9#include "repository.h"10#include "object.h"11#include "attr.h"12#include "blob.h"13#include "tree.h"14#include "tree-walk.h"15#include "commit.h"16#include "tag.h"17#include "fsck.h"18#include "refs.h"19#include "url.h"20#include "utf8.h"21#include "oidset.h"22#include "packfile.h"23#include "submodule-config.h"24#include "config.h"25#include "help.h"26
27static ssize_t max_tree_entry_len = 4096;28
29#define STR(x) #x30#define MSG_ID(id, msg_type) { STR(id), NULL, NULL, FSCK_##msg_type },31static struct {32const char *id_string;33const char *downcased;34const char *camelcased;35enum fsck_msg_type msg_type;36} msg_id_info[FSCK_MSG_MAX + 1] = {37FOREACH_FSCK_MSG_ID(MSG_ID)38{ NULL, NULL, NULL, -1 }39};40#undef MSG_ID41#undef STR42
43static void prepare_msg_ids(void)44{
45int i;46
47if (msg_id_info[0].downcased)48return;49
50/* convert id_string to lower case, without underscores. */51for (i = 0; i < FSCK_MSG_MAX; i++) {52const char *p = msg_id_info[i].id_string;53int len = strlen(p);54char *q = xmalloc(len);55
56msg_id_info[i].downcased = q;57while (*p)58if (*p == '_')59p++;60else61*(q)++ = tolower(*(p)++);62*q = '\0';63
64p = msg_id_info[i].id_string;65q = xmalloc(len);66msg_id_info[i].camelcased = q;67while (*p) {68if (*p == '_') {69p++;70if (*p)71*q++ = *p++;72} else {73*q++ = tolower(*p++);74}75}76*q = '\0';77}78}
79
80static int parse_msg_id(const char *text)81{
82int i;83
84prepare_msg_ids();85
86for (i = 0; i < FSCK_MSG_MAX; i++)87if (!strcmp(text, msg_id_info[i].downcased))88return i;89
90return -1;91}
92
93void list_config_fsck_msg_ids(struct string_list *list, const char *prefix)94{
95int i;96
97prepare_msg_ids();98
99for (i = 0; i < FSCK_MSG_MAX; i++)100list_config_item(list, prefix, msg_id_info[i].camelcased);101}
102
103static enum fsck_msg_type fsck_msg_type(enum fsck_msg_id msg_id,104struct fsck_options *options)105{
106assert(msg_id >= 0 && msg_id < FSCK_MSG_MAX);107
108if (!options->msg_type) {109enum fsck_msg_type msg_type = msg_id_info[msg_id].msg_type;110
111if (options->strict && msg_type == FSCK_WARN)112msg_type = FSCK_ERROR;113return msg_type;114}115
116return options->msg_type[msg_id];117}
118
119static enum fsck_msg_type parse_msg_type(const char *str)120{
121if (!strcmp(str, "error"))122return FSCK_ERROR;123else if (!strcmp(str, "warn"))124return FSCK_WARN;125else if (!strcmp(str, "ignore"))126return FSCK_IGNORE;127else128die("Unknown fsck message type: '%s'", str);129}
130
131int is_valid_msg_type(const char *msg_id, const char *msg_type)132{
133if (parse_msg_id(msg_id) < 0)134return 0;135parse_msg_type(msg_type);136return 1;137}
138
139void fsck_set_msg_type_from_ids(struct fsck_options *options,140enum fsck_msg_id msg_id,141enum fsck_msg_type msg_type)142{
143if (!options->msg_type) {144int i;145enum fsck_msg_type *severity;146ALLOC_ARRAY(severity, FSCK_MSG_MAX);147for (i = 0; i < FSCK_MSG_MAX; i++)148severity[i] = fsck_msg_type(i, options);149options->msg_type = severity;150}151
152options->msg_type[msg_id] = msg_type;153}
154
155void fsck_set_msg_type(struct fsck_options *options,156const char *msg_id_str, const char *msg_type_str)157{
158int msg_id = parse_msg_id(msg_id_str);159char *to_free = NULL;160enum fsck_msg_type msg_type;161
162if (msg_id < 0)163die("Unhandled message id: %s", msg_id_str);164
165if (msg_id == FSCK_MSG_LARGE_PATHNAME) {166const char *colon = strchr(msg_type_str, ':');167if (colon) {168msg_type_str = to_free =169xmemdupz(msg_type_str, colon - msg_type_str);170colon++;171if (!git_parse_ssize_t(colon, &max_tree_entry_len))172die("unable to parse max tree entry len: %s", colon);173}174}175msg_type = parse_msg_type(msg_type_str);176
177if (msg_type != FSCK_ERROR && msg_id_info[msg_id].msg_type == FSCK_FATAL)178die("Cannot demote %s to %s", msg_id_str, msg_type_str);179
180fsck_set_msg_type_from_ids(options, msg_id, msg_type);181free(to_free);182}
183
184void fsck_set_msg_types(struct fsck_options *options, const char *values)185{
186char *buf = xstrdup(values), *to_free = buf;187int done = 0;188
189while (!done) {190int len = strcspn(buf, " ,|"), equal;191
192done = !buf[len];193if (!len) {194buf++;195continue;196}197buf[len] = '\0';198
199for (equal = 0;200equal < len && buf[equal] != '=' && buf[equal] != ':';201equal++)202buf[equal] = tolower(buf[equal]);203buf[equal] = '\0';204
205if (!strcmp(buf, "skiplist")) {206if (equal == len)207die("skiplist requires a path");208oidset_parse_file(&options->skip_oids, buf + equal + 1,209the_repository->hash_algo);210buf += len + 1;211continue;212}213
214if (equal == len)215die("Missing '=': '%s'", buf);216
217fsck_set_msg_type(options, buf, buf + equal + 1);218buf += len + 1;219}220free(to_free);221}
222
223static int object_on_skiplist(struct fsck_options *opts,224const struct object_id *oid)225{
226return opts && oid && oidset_contains(&opts->skip_oids, oid);227}
228
229/*
230* Provide the common functionality for either fscking refs or objects.
231* It will get the current msg error type and call the error_func callback
232* which is registered in the "fsck_options" struct.
233*/
234static int fsck_vreport(struct fsck_options *options,235void *fsck_report,236enum fsck_msg_id msg_id, const char *fmt, va_list ap)237{
238struct strbuf sb = STRBUF_INIT;239enum fsck_msg_type msg_type = fsck_msg_type(msg_id, options);240int result;241
242if (msg_type == FSCK_IGNORE)243return 0;244
245if (msg_type == FSCK_FATAL)246msg_type = FSCK_ERROR;247else if (msg_type == FSCK_INFO)248msg_type = FSCK_WARN;249
250prepare_msg_ids();251strbuf_addf(&sb, "%s: ", msg_id_info[msg_id].camelcased);252
253strbuf_vaddf(&sb, fmt, ap);254result = options->error_func(options, fsck_report,255msg_type, msg_id, sb.buf);256strbuf_release(&sb);257
258return result;259}
260
261__attribute__((format (printf, 5, 6)))262static int report(struct fsck_options *options,263const struct object_id *oid, enum object_type object_type,264enum fsck_msg_id msg_id, const char *fmt, ...)265{
266va_list ap;267struct fsck_object_report report = {268.oid = oid,269.object_type = object_type270};271int result;272
273if (object_on_skiplist(options, oid))274return 0;275
276va_start(ap, fmt);277result = fsck_vreport(options, &report, msg_id, fmt, ap);278va_end(ap);279
280return result;281}
282
283int fsck_report_ref(struct fsck_options *options,284struct fsck_ref_report *report,285enum fsck_msg_id msg_id,286const char *fmt, ...)287{
288va_list ap;289int result;290va_start(ap, fmt);291result = fsck_vreport(options, report, msg_id, fmt, ap);292va_end(ap);293return result;294}
295
296void fsck_enable_object_names(struct fsck_options *options)297{
298if (!options->object_names)299options->object_names = kh_init_oid_map();300}
301
302const char *fsck_get_object_name(struct fsck_options *options,303const struct object_id *oid)304{
305khiter_t pos;306if (!options->object_names)307return NULL;308pos = kh_get_oid_map(options->object_names, *oid);309if (pos >= kh_end(options->object_names))310return NULL;311return kh_value(options->object_names, pos);312}
313
314void fsck_put_object_name(struct fsck_options *options,315const struct object_id *oid,316const char *fmt, ...)317{
318va_list ap;319struct strbuf buf = STRBUF_INIT;320khiter_t pos;321int hashret;322
323if (!options->object_names)324return;325
326pos = kh_put_oid_map(options->object_names, *oid, &hashret);327if (!hashret)328return;329va_start(ap, fmt);330strbuf_vaddf(&buf, fmt, ap);331kh_value(options->object_names, pos) = strbuf_detach(&buf, NULL);332va_end(ap);333}
334
335const char *fsck_describe_object(struct fsck_options *options,336const struct object_id *oid)337{
338static struct strbuf bufs[] = {339STRBUF_INIT, STRBUF_INIT, STRBUF_INIT, STRBUF_INIT340};341static int b = 0;342struct strbuf *buf;343const char *name = fsck_get_object_name(options, oid);344
345buf = bufs + b;346b = (b + 1) % ARRAY_SIZE(bufs);347strbuf_reset(buf);348strbuf_addstr(buf, oid_to_hex(oid));349if (name)350strbuf_addf(buf, " (%s)", name);351
352return buf->buf;353}
354
355static int fsck_walk_tree(struct tree *tree, void *data, struct fsck_options *options)356{
357struct tree_desc desc;358struct name_entry entry;359int res = 0;360const char *name;361
362if (parse_tree(tree))363return -1;364
365name = fsck_get_object_name(options, &tree->object.oid);366if (init_tree_desc_gently(&desc, &tree->object.oid,367tree->buffer, tree->size, 0))368return -1;369while (tree_entry_gently(&desc, &entry)) {370struct object *obj;371int result;372
373if (S_ISGITLINK(entry.mode))374continue;375
376if (S_ISDIR(entry.mode)) {377obj = (struct object *)lookup_tree(the_repository, &entry.oid);378if (name && obj)379fsck_put_object_name(options, &entry.oid, "%s%s/",380name, entry.path);381result = options->walk(obj, OBJ_TREE, data, options);382}383else if (S_ISREG(entry.mode) || S_ISLNK(entry.mode)) {384obj = (struct object *)lookup_blob(the_repository, &entry.oid);385if (name && obj)386fsck_put_object_name(options, &entry.oid, "%s%s",387name, entry.path);388result = options->walk(obj, OBJ_BLOB, data, options);389}390else {391result = error("in tree %s: entry %s has bad mode %.6o",392fsck_describe_object(options, &tree->object.oid),393entry.path, entry.mode);394}395if (result < 0)396return result;397if (!res)398res = result;399}400return res;401}
402
403static int fsck_walk_commit(struct commit *commit, void *data, struct fsck_options *options)404{
405int counter = 0, generation = 0, name_prefix_len = 0;406struct commit_list *parents;407int res;408int result;409const char *name;410
411if (repo_parse_commit(the_repository, commit))412return -1;413
414name = fsck_get_object_name(options, &commit->object.oid);415if (name)416fsck_put_object_name(options, get_commit_tree_oid(commit),417"%s:", name);418
419result = options->walk((struct object *) repo_get_commit_tree(the_repository, commit),420OBJ_TREE, data, options);421if (result < 0)422return result;423res = result;424
425parents = commit->parents;426if (name && parents) {427int len = strlen(name), power;428
429if (len && name[len - 1] == '^') {430generation = 1;431name_prefix_len = len - 1;432}433else { /* parse ~<generation> suffix */434for (generation = 0, power = 1;435len && isdigit(name[len - 1]);436power *= 10)437generation += power * (name[--len] - '0');438if (power > 1 && len && name[len - 1] == '~')439name_prefix_len = len - 1;440else {441/* Maybe a non-first parent, e.g. HEAD^2 */442generation = 0;443name_prefix_len = len;444}445}446}447
448while (parents) {449if (name) {450struct object_id *oid = &parents->item->object.oid;451
452if (counter++)453fsck_put_object_name(options, oid, "%s^%d",454name, counter);455else if (generation > 0)456fsck_put_object_name(options, oid, "%.*s~%d",457name_prefix_len, name,458generation + 1);459else460fsck_put_object_name(options, oid, "%s^", name);461}462result = options->walk((struct object *)parents->item, OBJ_COMMIT, data, options);463if (result < 0)464return result;465if (!res)466res = result;467parents = parents->next;468}469return res;470}
471
472static int fsck_walk_tag(struct tag *tag, void *data, struct fsck_options *options)473{
474const char *name = fsck_get_object_name(options, &tag->object.oid);475
476if (parse_tag(tag))477return -1;478if (name)479fsck_put_object_name(options, &tag->tagged->oid, "%s", name);480return options->walk(tag->tagged, OBJ_ANY, data, options);481}
482
483int fsck_walk(struct object *obj, void *data, struct fsck_options *options)484{
485if (!obj)486return -1;487
488if (obj->type == OBJ_NONE)489parse_object(the_repository, &obj->oid);490
491switch (obj->type) {492case OBJ_BLOB:493return 0;494case OBJ_TREE:495return fsck_walk_tree((struct tree *)obj, data, options);496case OBJ_COMMIT:497return fsck_walk_commit((struct commit *)obj, data, options);498case OBJ_TAG:499return fsck_walk_tag((struct tag *)obj, data, options);500default:501error("Unknown object type for %s",502fsck_describe_object(options, &obj->oid));503return -1;504}505}
506
507struct name_stack {508const char **names;509size_t nr, alloc;510};511
512static void name_stack_push(struct name_stack *stack, const char *name)513{
514ALLOC_GROW(stack->names, stack->nr + 1, stack->alloc);515stack->names[stack->nr++] = name;516}
517
518static const char *name_stack_pop(struct name_stack *stack)519{
520return stack->nr ? stack->names[--stack->nr] : NULL;521}
522
523static void name_stack_clear(struct name_stack *stack)524{
525FREE_AND_NULL(stack->names);526stack->nr = stack->alloc = 0;527}
528
529/*
530* The entries in a tree are ordered in the _path_ order,
531* which means that a directory entry is ordered by adding
532* a slash to the end of it.
533*
534* So a directory called "a" is ordered _after_ a file
535* called "a.c", because "a/" sorts after "a.c".
536*/
537#define TREE_UNORDERED (-1)538#define TREE_HAS_DUPS (-2)539
540static int is_less_than_slash(unsigned char c)541{
542return '\0' < c && c < '/';543}
544
545static int verify_ordered(unsigned mode1, const char *name1,546unsigned mode2, const char *name2,547struct name_stack *candidates)548{
549int len1 = strlen(name1);550int len2 = strlen(name2);551int len = len1 < len2 ? len1 : len2;552unsigned char c1, c2;553int cmp;554
555cmp = memcmp(name1, name2, len);556if (cmp < 0)557return 0;558if (cmp > 0)559return TREE_UNORDERED;560
561/*562* Ok, the first <len> characters are the same.
563* Now we need to order the next one, but turn
564* a '\0' into a '/' for a directory entry.
565*/
566c1 = name1[len];567c2 = name2[len];568if (!c1 && !c2)569/*570* git-write-tree used to write out a nonsense tree that has
571* entries with the same name, one blob and one tree. Make
572* sure we do not have duplicate entries.
573*/
574return TREE_HAS_DUPS;575if (!c1 && S_ISDIR(mode1))576c1 = '/';577if (!c2 && S_ISDIR(mode2))578c2 = '/';579
580/*581* There can be non-consecutive duplicates due to the implicitly
582* added slash, e.g.:
583*
584* foo
585* foo.bar
586* foo.bar.baz
587* foo.bar/
588* foo/
589*
590* Record non-directory candidates (like "foo" and "foo.bar" in
591* the example) on a stack and check directory candidates (like
592* foo/" and "foo.bar/") against that stack.
593*/
594if (!c1 && is_less_than_slash(c2)) {595name_stack_push(candidates, name1);596} else if (c2 == '/' && is_less_than_slash(c1)) {597for (;;) {598const char *p;599const char *f_name = name_stack_pop(candidates);600
601if (!f_name)602break;603if (!skip_prefix(name2, f_name, &p))604continue;605if (!*p)606return TREE_HAS_DUPS;607if (is_less_than_slash(*p)) {608name_stack_push(candidates, f_name);609break;610}611}612}613
614return c1 < c2 ? 0 : TREE_UNORDERED;615}
616
617static int fsck_tree(const struct object_id *tree_oid,618const char *buffer, unsigned long size,619struct fsck_options *options)620{
621int retval = 0;622int has_null_sha1 = 0;623int has_full_path = 0;624int has_empty_name = 0;625int has_dot = 0;626int has_dotdot = 0;627int has_dotgit = 0;628int has_zero_pad = 0;629int has_bad_modes = 0;630int has_dup_entries = 0;631int not_properly_sorted = 0;632int has_large_name = 0;633struct tree_desc desc;634unsigned o_mode;635const char *o_name;636struct name_stack df_dup_candidates = { NULL };637
638if (init_tree_desc_gently(&desc, tree_oid, buffer, size,639TREE_DESC_RAW_MODES)) {640retval += report(options, tree_oid, OBJ_TREE,641FSCK_MSG_BAD_TREE,642"cannot be parsed as a tree");643return retval;644}645
646o_mode = 0;647o_name = NULL;648
649while (desc.size) {650unsigned short mode;651const char *name, *backslash;652const struct object_id *entry_oid;653
654entry_oid = tree_entry_extract(&desc, &name, &mode);655
656has_null_sha1 |= is_null_oid(entry_oid);657has_full_path |= !!strchr(name, '/');658has_empty_name |= !*name;659has_dot |= !strcmp(name, ".");660has_dotdot |= !strcmp(name, "..");661has_dotgit |= is_hfs_dotgit(name) || is_ntfs_dotgit(name);662has_zero_pad |= *(char *)desc.buffer == '0';663has_large_name |= tree_entry_len(&desc.entry) > max_tree_entry_len;664
665if (is_hfs_dotgitmodules(name) || is_ntfs_dotgitmodules(name)) {666if (!S_ISLNK(mode))667oidset_insert(&options->gitmodules_found,668entry_oid);669else670retval += report(options,671tree_oid, OBJ_TREE,672FSCK_MSG_GITMODULES_SYMLINK,673".gitmodules is a symbolic link");674}675
676if (is_hfs_dotgitattributes(name) || is_ntfs_dotgitattributes(name)) {677if (!S_ISLNK(mode))678oidset_insert(&options->gitattributes_found,679entry_oid);680else681retval += report(options, tree_oid, OBJ_TREE,682FSCK_MSG_GITATTRIBUTES_SYMLINK,683".gitattributes is a symlink");684}685
686if (S_ISLNK(mode)) {687if (is_hfs_dotgitignore(name) ||688is_ntfs_dotgitignore(name))689retval += report(options, tree_oid, OBJ_TREE,690FSCK_MSG_GITIGNORE_SYMLINK,691".gitignore is a symlink");692if (is_hfs_dotmailmap(name) ||693is_ntfs_dotmailmap(name))694retval += report(options, tree_oid, OBJ_TREE,695FSCK_MSG_MAILMAP_SYMLINK,696".mailmap is a symlink");697}698
699if ((backslash = strchr(name, '\\'))) {700while (backslash) {701backslash++;702has_dotgit |= is_ntfs_dotgit(backslash);703if (is_ntfs_dotgitmodules(backslash)) {704if (!S_ISLNK(mode))705oidset_insert(&options->gitmodules_found,706entry_oid);707else708retval += report(options, tree_oid, OBJ_TREE,709FSCK_MSG_GITMODULES_SYMLINK,710".gitmodules is a symbolic link");711}712backslash = strchr(backslash, '\\');713}714}715
716if (update_tree_entry_gently(&desc)) {717retval += report(options, tree_oid, OBJ_TREE,718FSCK_MSG_BAD_TREE,719"cannot be parsed as a tree");720break;721}722
723switch (mode) {724/*725* Standard modes..
726*/
727case S_IFREG | 0755:728case S_IFREG | 0644:729case S_IFLNK:730case S_IFDIR:731case S_IFGITLINK:732break;733/*734* This is nonstandard, but we had a few of these
735* early on when we honored the full set of mode
736* bits..
737*/
738case S_IFREG | 0664:739if (!options->strict)740break;741/* fallthrough */742default:743has_bad_modes = 1;744}745
746if (o_name) {747switch (verify_ordered(o_mode, o_name, mode, name,748&df_dup_candidates)) {749case TREE_UNORDERED:750not_properly_sorted = 1;751break;752case TREE_HAS_DUPS:753has_dup_entries = 1;754break;755default:756break;757}758}759
760o_mode = mode;761o_name = name;762}763
764name_stack_clear(&df_dup_candidates);765
766if (has_null_sha1)767retval += report(options, tree_oid, OBJ_TREE,768FSCK_MSG_NULL_SHA1,769"contains entries pointing to null sha1");770if (has_full_path)771retval += report(options, tree_oid, OBJ_TREE,772FSCK_MSG_FULL_PATHNAME,773"contains full pathnames");774if (has_empty_name)775retval += report(options, tree_oid, OBJ_TREE,776FSCK_MSG_EMPTY_NAME,777"contains empty pathname");778if (has_dot)779retval += report(options, tree_oid, OBJ_TREE,780FSCK_MSG_HAS_DOT,781"contains '.'");782if (has_dotdot)783retval += report(options, tree_oid, OBJ_TREE,784FSCK_MSG_HAS_DOTDOT,785"contains '..'");786if (has_dotgit)787retval += report(options, tree_oid, OBJ_TREE,788FSCK_MSG_HAS_DOTGIT,789"contains '.git'");790if (has_zero_pad)791retval += report(options, tree_oid, OBJ_TREE,792FSCK_MSG_ZERO_PADDED_FILEMODE,793"contains zero-padded file modes");794if (has_bad_modes)795retval += report(options, tree_oid, OBJ_TREE,796FSCK_MSG_BAD_FILEMODE,797"contains bad file modes");798if (has_dup_entries)799retval += report(options, tree_oid, OBJ_TREE,800FSCK_MSG_DUPLICATE_ENTRIES,801"contains duplicate file entries");802if (not_properly_sorted)803retval += report(options, tree_oid, OBJ_TREE,804FSCK_MSG_TREE_NOT_SORTED,805"not properly sorted");806if (has_large_name)807retval += report(options, tree_oid, OBJ_TREE,808FSCK_MSG_LARGE_PATHNAME,809"contains excessively large pathname");810return retval;811}
812
813/*
814* Confirm that the headers of a commit or tag object end in a reasonable way,
815* either with the usual "\n\n" separator, or at least with a trailing newline
816* on the final header line.
817*
818* This property is important for the memory safety of our callers. It allows
819* them to scan the buffer linewise without constantly checking the remaining
820* size as long as:
821*
822* - they check that there are bytes left in the buffer at the start of any
823* line (i.e., that the last newline they saw was not the final one we
824* found here)
825*
826* - any intra-line scanning they do will stop at a newline, which will worst
827* case hit the newline we found here as the end-of-header. This makes it
828* OK for them to use helpers like parse_oid_hex(), or even skip_prefix().
829*/
830static int verify_headers(const void *data, unsigned long size,831const struct object_id *oid, enum object_type type,832struct fsck_options *options)833{
834const char *buffer = (const char *)data;835unsigned long i;836
837for (i = 0; i < size; i++) {838switch (buffer[i]) {839case '\0':840return report(options, oid, type,841FSCK_MSG_NUL_IN_HEADER,842"unterminated header: NUL at offset %ld", i);843case '\n':844if (i + 1 < size && buffer[i + 1] == '\n')845return 0;846}847}848
849/*850* We did not find double-LF that separates the header
851* and the body. Not having a body is not a crime but
852* we do want to see the terminating LF for the last header
853* line.
854*/
855if (size && buffer[size - 1] == '\n')856return 0;857
858return report(options, oid, type,859FSCK_MSG_UNTERMINATED_HEADER, "unterminated header");860}
861
862static int fsck_ident(const char **ident,863const struct object_id *oid, enum object_type type,864struct fsck_options *options)865{
866const char *p = *ident;867char *end;868
869*ident = strchrnul(*ident, '\n');870if (**ident == '\n')871(*ident)++;872
873if (*p == '<')874return report(options, oid, type, FSCK_MSG_MISSING_NAME_BEFORE_EMAIL, "invalid author/committer line - missing space before email");875p += strcspn(p, "<>\n");876if (*p == '>')877return report(options, oid, type, FSCK_MSG_BAD_NAME, "invalid author/committer line - bad name");878if (*p != '<')879return report(options, oid, type, FSCK_MSG_MISSING_EMAIL, "invalid author/committer line - missing email");880if (p[-1] != ' ')881return report(options, oid, type, FSCK_MSG_MISSING_SPACE_BEFORE_EMAIL, "invalid author/committer line - missing space before email");882p++;883p += strcspn(p, "<>\n");884if (*p != '>')885return report(options, oid, type, FSCK_MSG_BAD_EMAIL, "invalid author/committer line - bad email");886p++;887if (*p != ' ')888return report(options, oid, type, FSCK_MSG_MISSING_SPACE_BEFORE_DATE, "invalid author/committer line - missing space before date");889p++;890/*891* Our timestamp parser is based on the C strto*() functions, which
892* will happily eat whitespace, including the newline that is supposed
893* to prevent us walking past the end of the buffer. So do our own
894* scan, skipping linear whitespace but not newlines, and then
895* confirming we found a digit. We _could_ be even more strict here,
896* as we really expect only a single space, but since we have
897* traditionally allowed extra whitespace, we'll continue to do so.
898*/
899while (*p == ' ' || *p == '\t')900p++;901if (!isdigit(*p))902return report(options, oid, type, FSCK_MSG_BAD_DATE,903"invalid author/committer line - bad date");904if (*p == '0' && p[1] != ' ')905return report(options, oid, type, FSCK_MSG_ZERO_PADDED_DATE, "invalid author/committer line - zero-padded date");906if (date_overflows(parse_timestamp(p, &end, 10)))907return report(options, oid, type, FSCK_MSG_BAD_DATE_OVERFLOW, "invalid author/committer line - date causes integer overflow");908if ((end == p || *end != ' '))909return report(options, oid, type, FSCK_MSG_BAD_DATE, "invalid author/committer line - bad date");910p = end + 1;911if ((*p != '+' && *p != '-') ||912!isdigit(p[1]) ||913!isdigit(p[2]) ||914!isdigit(p[3]) ||915!isdigit(p[4]) ||916(p[5] != '\n'))917return report(options, oid, type, FSCK_MSG_BAD_TIMEZONE, "invalid author/committer line - bad time zone");918p += 6;919return 0;920}
921
922static int fsck_commit(const struct object_id *oid,923const char *buffer, unsigned long size,924struct fsck_options *options)925{
926struct object_id tree_oid, parent_oid;927unsigned author_count;928int err;929const char *buffer_begin = buffer;930const char *buffer_end = buffer + size;931const char *p;932
933/*934* We _must_ stop parsing immediately if this reports failure, as the
935* memory safety of the rest of the function depends on it. See the
936* comment above the definition of verify_headers() for more details.
937*/
938if (verify_headers(buffer, size, oid, OBJ_COMMIT, options))939return -1;940
941if (buffer >= buffer_end || !skip_prefix(buffer, "tree ", &buffer))942return report(options, oid, OBJ_COMMIT, FSCK_MSG_MISSING_TREE, "invalid format - expected 'tree' line");943if (parse_oid_hex(buffer, &tree_oid, &p) || *p != '\n') {944err = report(options, oid, OBJ_COMMIT, FSCK_MSG_BAD_TREE_SHA1, "invalid 'tree' line format - bad sha1");945if (err)946return err;947}948buffer = p + 1;949while (buffer < buffer_end && skip_prefix(buffer, "parent ", &buffer)) {950if (parse_oid_hex(buffer, &parent_oid, &p) || *p != '\n') {951err = report(options, oid, OBJ_COMMIT, FSCK_MSG_BAD_PARENT_SHA1, "invalid 'parent' line format - bad sha1");952if (err)953return err;954}955buffer = p + 1;956}957author_count = 0;958while (buffer < buffer_end && skip_prefix(buffer, "author ", &buffer)) {959author_count++;960err = fsck_ident(&buffer, oid, OBJ_COMMIT, options);961if (err)962return err;963}964if (author_count < 1)965err = report(options, oid, OBJ_COMMIT, FSCK_MSG_MISSING_AUTHOR, "invalid format - expected 'author' line");966else if (author_count > 1)967err = report(options, oid, OBJ_COMMIT, FSCK_MSG_MULTIPLE_AUTHORS, "invalid format - multiple 'author' lines");968if (err)969return err;970if (buffer >= buffer_end || !skip_prefix(buffer, "committer ", &buffer))971return report(options, oid, OBJ_COMMIT, FSCK_MSG_MISSING_COMMITTER, "invalid format - expected 'committer' line");972err = fsck_ident(&buffer, oid, OBJ_COMMIT, options);973if (err)974return err;975if (memchr(buffer_begin, '\0', size)) {976err = report(options, oid, OBJ_COMMIT, FSCK_MSG_NUL_IN_COMMIT,977"NUL byte in the commit object body");978if (err)979return err;980}981return 0;982}
983
984static int fsck_tag(const struct object_id *oid, const char *buffer,985unsigned long size, struct fsck_options *options)986{
987struct object_id tagged_oid;988int tagged_type;989return fsck_tag_standalone(oid, buffer, size, options, &tagged_oid,990&tagged_type);991}
992
993int fsck_tag_standalone(const struct object_id *oid, const char *buffer,994unsigned long size, struct fsck_options *options,995struct object_id *tagged_oid,996int *tagged_type)997{
998int ret = 0;999char *eol;1000struct strbuf sb = STRBUF_INIT;1001const char *buffer_end = buffer + size;1002const char *p;1003
1004/*1005* We _must_ stop parsing immediately if this reports failure, as the
1006* memory safety of the rest of the function depends on it. See the
1007* comment above the definition of verify_headers() for more details.
1008*/
1009ret = verify_headers(buffer, size, oid, OBJ_TAG, options);1010if (ret)1011goto done;1012
1013if (buffer >= buffer_end || !skip_prefix(buffer, "object ", &buffer)) {1014ret = report(options, oid, OBJ_TAG, FSCK_MSG_MISSING_OBJECT, "invalid format - expected 'object' line");1015goto done;1016}1017if (parse_oid_hex(buffer, tagged_oid, &p) || *p != '\n') {1018ret = report(options, oid, OBJ_TAG, FSCK_MSG_BAD_OBJECT_SHA1, "invalid 'object' line format - bad sha1");1019if (ret)1020goto done;1021}1022buffer = p + 1;1023
1024if (buffer >= buffer_end || !skip_prefix(buffer, "type ", &buffer)) {1025ret = report(options, oid, OBJ_TAG, FSCK_MSG_MISSING_TYPE_ENTRY, "invalid format - expected 'type' line");1026goto done;1027}1028eol = memchr(buffer, '\n', buffer_end - buffer);1029if (!eol) {1030ret = report(options, oid, OBJ_TAG, FSCK_MSG_MISSING_TYPE, "invalid format - unexpected end after 'type' line");1031goto done;1032}1033*tagged_type = type_from_string_gently(buffer, eol - buffer, 1);1034if (*tagged_type < 0)1035ret = report(options, oid, OBJ_TAG, FSCK_MSG_BAD_TYPE, "invalid 'type' value");1036if (ret)1037goto done;1038buffer = eol + 1;1039
1040if (buffer >= buffer_end || !skip_prefix(buffer, "tag ", &buffer)) {1041ret = report(options, oid, OBJ_TAG, FSCK_MSG_MISSING_TAG_ENTRY, "invalid format - expected 'tag' line");1042goto done;1043}1044eol = memchr(buffer, '\n', buffer_end - buffer);1045if (!eol) {1046ret = report(options, oid, OBJ_TAG, FSCK_MSG_MISSING_TAG, "invalid format - unexpected end after 'type' line");1047goto done;1048}1049strbuf_addf(&sb, "refs/tags/%.*s", (int)(eol - buffer), buffer);1050if (check_refname_format(sb.buf, 0)) {1051ret = report(options, oid, OBJ_TAG,1052FSCK_MSG_BAD_TAG_NAME,1053"invalid 'tag' name: %.*s",1054(int)(eol - buffer), buffer);1055if (ret)1056goto done;1057}1058buffer = eol + 1;1059
1060if (buffer >= buffer_end || !skip_prefix(buffer, "tagger ", &buffer)) {1061/* early tags do not contain 'tagger' lines; warn only */1062ret = report(options, oid, OBJ_TAG, FSCK_MSG_MISSING_TAGGER_ENTRY, "invalid format - expected 'tagger' line");1063if (ret)1064goto done;1065}1066else1067ret = fsck_ident(&buffer, oid, OBJ_TAG, options);1068
1069if (buffer < buffer_end && !starts_with(buffer, "\n")) {1070/*1071* The verify_headers() check will allow
1072* e.g. "[...]tagger <tagger>\nsome
1073* garbage\n\nmessage" to pass, thinking "some
1074* garbage" could be a custom header. E.g. "mktag"
1075* doesn't want any unknown headers.
1076*/
1077ret = report(options, oid, OBJ_TAG, FSCK_MSG_EXTRA_HEADER_ENTRY, "invalid format - extra header(s) after 'tagger'");1078if (ret)1079goto done;1080}1081
1082done:1083strbuf_release(&sb);1084return ret;1085}
1086
1087struct fsck_gitmodules_data {1088const struct object_id *oid;1089struct fsck_options *options;1090int ret;1091};1092
1093static int fsck_gitmodules_fn(const char *var, const char *value,1094const struct config_context *ctx UNUSED,1095void *vdata)1096{
1097struct fsck_gitmodules_data *data = vdata;1098const char *subsection, *key;1099size_t subsection_len;1100char *name;1101
1102if (parse_config_key(var, "submodule", &subsection, &subsection_len, &key) < 0 ||1103!subsection)1104return 0;1105
1106name = xmemdupz(subsection, subsection_len);1107if (check_submodule_name(name) < 0)1108data->ret |= report(data->options,1109data->oid, OBJ_BLOB,1110FSCK_MSG_GITMODULES_NAME,1111"disallowed submodule name: %s",1112name);1113if (!strcmp(key, "url") && value &&1114check_submodule_url(value) < 0)1115data->ret |= report(data->options,1116data->oid, OBJ_BLOB,1117FSCK_MSG_GITMODULES_URL,1118"disallowed submodule url: %s",1119value);1120if (!strcmp(key, "path") && value &&1121looks_like_command_line_option(value))1122data->ret |= report(data->options,1123data->oid, OBJ_BLOB,1124FSCK_MSG_GITMODULES_PATH,1125"disallowed submodule path: %s",1126value);1127if (!strcmp(key, "update") && value &&1128parse_submodule_update_type(value) == SM_UPDATE_COMMAND)1129data->ret |= report(data->options, data->oid, OBJ_BLOB,1130FSCK_MSG_GITMODULES_UPDATE,1131"disallowed submodule update setting: %s",1132value);1133free(name);1134
1135return 0;1136}
1137
1138static int fsck_blob(const struct object_id *oid, const char *buf,1139unsigned long size, struct fsck_options *options)1140{
1141int ret = 0;1142
1143if (object_on_skiplist(options, oid))1144return 0;1145
1146if (oidset_contains(&options->gitmodules_found, oid)) {1147struct config_options config_opts = { 0 };1148struct fsck_gitmodules_data data;1149
1150oidset_insert(&options->gitmodules_done, oid);1151
1152if (!buf) {1153/*1154* A missing buffer here is a sign that the caller found the
1155* blob too gigantic to load into memory. Let's just consider
1156* that an error.
1157*/
1158return report(options, oid, OBJ_BLOB,1159FSCK_MSG_GITMODULES_LARGE,1160".gitmodules too large to parse");1161}1162
1163data.oid = oid;1164data.options = options;1165data.ret = 0;1166config_opts.error_action = CONFIG_ERROR_SILENT;1167if (git_config_from_mem(fsck_gitmodules_fn, CONFIG_ORIGIN_BLOB,1168".gitmodules", buf, size, &data,1169CONFIG_SCOPE_UNKNOWN, &config_opts))1170data.ret |= report(options, oid, OBJ_BLOB,1171FSCK_MSG_GITMODULES_PARSE,1172"could not parse gitmodules blob");1173ret |= data.ret;1174}1175
1176if (oidset_contains(&options->gitattributes_found, oid)) {1177const char *ptr;1178
1179oidset_insert(&options->gitattributes_done, oid);1180
1181if (!buf || size > ATTR_MAX_FILE_SIZE) {1182/*1183* A missing buffer here is a sign that the caller found the
1184* blob too gigantic to load into memory. Let's just consider
1185* that an error.
1186*/
1187return report(options, oid, OBJ_BLOB,1188FSCK_MSG_GITATTRIBUTES_LARGE,1189".gitattributes too large to parse");1190}1191
1192for (ptr = buf; *ptr; ) {1193const char *eol = strchrnul(ptr, '\n');1194if (eol - ptr >= ATTR_MAX_LINE_LENGTH) {1195ret |= report(options, oid, OBJ_BLOB,1196FSCK_MSG_GITATTRIBUTES_LINE_LENGTH,1197".gitattributes has too long lines to parse");1198break;1199}1200
1201ptr = *eol ? eol + 1 : eol;1202}1203}1204
1205return ret;1206}
1207
1208int fsck_object(struct object *obj, void *data, unsigned long size,1209struct fsck_options *options)1210{
1211if (!obj)1212return report(options, NULL, OBJ_NONE, FSCK_MSG_BAD_OBJECT_SHA1, "no valid object to fsck");1213
1214return fsck_buffer(&obj->oid, obj->type, data, size, options);1215}
1216
1217int fsck_buffer(const struct object_id *oid, enum object_type type,1218const void *data, unsigned long size,1219struct fsck_options *options)1220{
1221if (type == OBJ_BLOB)1222return fsck_blob(oid, data, size, options);1223if (type == OBJ_TREE)1224return fsck_tree(oid, data, size, options);1225if (type == OBJ_COMMIT)1226return fsck_commit(oid, data, size, options);1227if (type == OBJ_TAG)1228return fsck_tag(oid, data, size, options);1229
1230return report(options, oid, type,1231FSCK_MSG_UNKNOWN_TYPE,1232"unknown type '%d' (internal fsck error)",1233type);1234}
1235
1236int fsck_objects_error_function(struct fsck_options *o,1237void *fsck_report,1238enum fsck_msg_type msg_type,1239enum fsck_msg_id msg_id UNUSED,1240const char *message)1241{
1242struct fsck_object_report *report = fsck_report;1243const struct object_id *oid = report->oid;1244
1245if (msg_type == FSCK_WARN) {1246warning("object %s: %s", fsck_describe_object(o, oid), message);1247return 0;1248}1249error("object %s: %s", fsck_describe_object(o, oid), message);1250return 1;1251}
1252
1253int fsck_refs_error_function(struct fsck_options *options UNUSED,1254void *fsck_report,1255enum fsck_msg_type msg_type,1256enum fsck_msg_id msg_id UNUSED,1257const char *message)1258{
1259struct fsck_ref_report *report = fsck_report;1260struct strbuf sb = STRBUF_INIT;1261int ret = 0;1262
1263strbuf_addstr(&sb, report->path);1264
1265if (report->oid)1266strbuf_addf(&sb, " -> (%s)", oid_to_hex(report->oid));1267else if (report->referent)1268strbuf_addf(&sb, " -> (%s)", report->referent);1269
1270if (msg_type == FSCK_WARN)1271warning("%s: %s", sb.buf, message);1272else1273ret = error("%s: %s", sb.buf, message);1274
1275strbuf_release(&sb);1276return ret;1277}
1278
1279static int fsck_blobs(struct oidset *blobs_found, struct oidset *blobs_done,1280enum fsck_msg_id msg_missing, enum fsck_msg_id msg_type,1281struct fsck_options *options, const char *blob_type)1282{
1283int ret = 0;1284struct oidset_iter iter;1285const struct object_id *oid;1286
1287oidset_iter_init(blobs_found, &iter);1288while ((oid = oidset_iter_next(&iter))) {1289enum object_type type;1290unsigned long size;1291char *buf;1292
1293if (oidset_contains(blobs_done, oid))1294continue;1295
1296buf = repo_read_object_file(the_repository, oid, &type, &size);1297if (!buf) {1298if (is_promisor_object(oid))1299continue;1300ret |= report(options,1301oid, OBJ_BLOB, msg_missing,1302"unable to read %s blob", blob_type);1303continue;1304}1305
1306if (type == OBJ_BLOB)1307ret |= fsck_blob(oid, buf, size, options);1308else1309ret |= report(options, oid, type, msg_type,1310"non-blob found at %s", blob_type);1311free(buf);1312}1313
1314oidset_clear(blobs_found);1315oidset_clear(blobs_done);1316
1317return ret;1318}
1319
1320int fsck_finish(struct fsck_options *options)1321{
1322int ret = 0;1323
1324ret |= fsck_blobs(&options->gitmodules_found, &options->gitmodules_done,1325FSCK_MSG_GITMODULES_MISSING, FSCK_MSG_GITMODULES_BLOB,1326options, ".gitmodules");1327ret |= fsck_blobs(&options->gitattributes_found, &options->gitattributes_done,1328FSCK_MSG_GITATTRIBUTES_MISSING, FSCK_MSG_GITATTRIBUTES_BLOB,1329options, ".gitattributes");1330
1331return ret;1332}
1333
1334void fsck_options_clear(struct fsck_options *options)1335{
1336free(options->msg_type);1337oidset_clear(&options->skip_oids);1338oidset_clear(&options->gitmodules_found);1339oidset_clear(&options->gitmodules_done);1340oidset_clear(&options->gitattributes_found);1341oidset_clear(&options->gitattributes_done);1342kh_clear_oid_map(options->object_names);1343}
1344
1345int git_fsck_config(const char *var, const char *value,1346const struct config_context *ctx, void *cb)1347{
1348struct fsck_options *options = cb;1349const char *msg_id;1350
1351if (strcmp(var, "fsck.skiplist") == 0) {1352char *path;1353struct strbuf sb = STRBUF_INIT;1354
1355if (git_config_pathname(&path, var, value))1356return 1;1357strbuf_addf(&sb, "skiplist=%s", path);1358free(path);1359fsck_set_msg_types(options, sb.buf);1360strbuf_release(&sb);1361return 0;1362}1363
1364if (skip_prefix(var, "fsck.", &msg_id)) {1365if (!value)1366return config_error_nonbool(var);1367fsck_set_msg_type(options, msg_id, value);1368return 0;1369}1370
1371return git_default_config(var, value, ctx, cb);1372}
1373
1374/*
1375* Custom error callbacks that are used in more than one place.
1376*/
1377
1378int fsck_objects_error_cb_print_missing_gitmodules(struct fsck_options *o,1379void *fsck_report,1380enum fsck_msg_type msg_type,1381enum fsck_msg_id msg_id,1382const char *message)1383{
1384if (msg_id == FSCK_MSG_GITMODULES_MISSING) {1385struct fsck_object_report *report = fsck_report;1386puts(oid_to_hex(report->oid));1387return 0;1388}1389return fsck_objects_error_function(o, fsck_report,1390msg_type, msg_id, message);1391}
1392