git
/
pseudo-merge.c
765 строк · 19.7 Кб
1#define USE_THE_REPOSITORY_VARIABLE2
3#include "git-compat-util.h"4#include "pseudo-merge.h"5#include "date.h"6#include "oid-array.h"7#include "strbuf.h"8#include "config.h"9#include "string-list.h"10#include "refs.h"11#include "pack-bitmap.h"12#include "commit.h"13#include "alloc.h"14#include "progress.h"15#include "hex.h"16
17#define DEFAULT_PSEUDO_MERGE_DECAY 1.018#define DEFAULT_PSEUDO_MERGE_MAX_MERGES 6419#define DEFAULT_PSEUDO_MERGE_SAMPLE_RATE 120#define DEFAULT_PSEUDO_MERGE_THRESHOLD approxidate("1.week.ago")21#define DEFAULT_PSEUDO_MERGE_STABLE_THRESHOLD approxidate("1.month.ago")22#define DEFAULT_PSEUDO_MERGE_STABLE_SIZE 51223
24static double gitexp(double base, int exp)25{
26double result = 1;27while (1) {28if (exp % 2)29result *= base;30exp >>= 1;31if (!exp)32break;33base *= base;34}35return result;36}
37
38static uint32_t pseudo_merge_group_size(const struct pseudo_merge_group *group,39const struct pseudo_merge_matches *matches,40uint32_t i)41{
42double C = 0.0f;43uint32_t n;44
45/*46* The size of pseudo-merge groups decays according to a power series,
47* which looks like:
48*
49* f(n) = C * n^-k
50*
51* , where 'n' is the n-th pseudo-merge group, 'f(n)' is its size, 'k'
52* is the decay rate, and 'C' is a scaling value.
53*
54* The value of C depends on the number of groups, decay rate, and total
55* number of commits. It is computed such that if there are M and N
56* total groups and commits, respectively, that:
57*
58* N = f(0) + f(1) + ... f(M-1)
59*
60* Rearranging to isolate C, we get:
61*
62* N = \sum_{n=1}^M C / n^k
63*
64* N / C = \sum_{n=1}^M n^-k
65*
66* C = N / \sum_{n=1}^M n^-k
67*
68* For example, if we have a decay rate of 'k' being equal to 1.5, 'N'
69* total commits equal to 10,000, and 'M' being equal to 6 groups, then
70* the (rounded) group sizes are:
71*
72* { 5469, 1934, 1053, 684, 489, 372 }
73*
74* increasing the number of total groups, say to 10, scales the group
75* sizes appropriately:
76*
77* { 5012, 1772, 964, 626, 448, 341, 271, 221, 186, 158 }
78*/
79for (n = 0; n < group->max_merges; n++)80C += 1.0 / gitexp(n + 1, group->decay);81C = matches->unstable_nr / C;82
83return (uint32_t)((C / gitexp(i + 1, group->decay)) + 0.5);84}
85
86static void pseudo_merge_group_init(struct pseudo_merge_group *group)87{
88memset(group, 0, sizeof(struct pseudo_merge_group));89
90strmap_init_with_options(&group->matches, NULL, 0);91
92group->decay = DEFAULT_PSEUDO_MERGE_DECAY;93group->max_merges = DEFAULT_PSEUDO_MERGE_MAX_MERGES;94group->sample_rate = DEFAULT_PSEUDO_MERGE_SAMPLE_RATE;95group->threshold = DEFAULT_PSEUDO_MERGE_THRESHOLD;96group->stable_threshold = DEFAULT_PSEUDO_MERGE_STABLE_THRESHOLD;97group->stable_size = DEFAULT_PSEUDO_MERGE_STABLE_SIZE;98}
99
100static int pseudo_merge_config(const char *var, const char *value,101const struct config_context *ctx,102void *cb_data)103{
104struct string_list *list = cb_data;105struct string_list_item *item;106struct pseudo_merge_group *group;107struct strbuf buf = STRBUF_INIT;108const char *sub, *key;109size_t sub_len;110int ret = 0;111
112if (parse_config_key(var, "bitmappseudomerge", &sub, &sub_len, &key))113goto done;114
115if (!sub_len)116goto done;117
118strbuf_add(&buf, sub, sub_len);119
120item = string_list_lookup(list, buf.buf);121if (!item) {122item = string_list_insert(list, buf.buf);123
124item->util = xmalloc(sizeof(struct pseudo_merge_group));125pseudo_merge_group_init(item->util);126}127
128group = item->util;129
130if (!strcmp(key, "pattern")) {131struct strbuf re = STRBUF_INIT;132
133free(group->pattern);134if (*value != '^')135strbuf_addch(&re, '^');136strbuf_addstr(&re, value);137
138group->pattern = xcalloc(1, sizeof(regex_t));139if (regcomp(group->pattern, re.buf, REG_EXTENDED))140die(_("failed to load pseudo-merge regex for %s: '%s'"),141sub, re.buf);142
143strbuf_release(&re);144} else if (!strcmp(key, "decay")) {145group->decay = git_config_double(var, value, ctx->kvi);146if (group->decay < 0) {147warning(_("%s must be non-negative, using default"), var);148group->decay = DEFAULT_PSEUDO_MERGE_DECAY;149}150} else if (!strcmp(key, "samplerate")) {151group->sample_rate = git_config_double(var, value, ctx->kvi);152if (!(0 <= group->sample_rate && group->sample_rate <= 1)) {153warning(_("%s must be between 0 and 1, using default"), var);154group->sample_rate = DEFAULT_PSEUDO_MERGE_SAMPLE_RATE;155}156} else if (!strcmp(key, "threshold")) {157if (git_config_expiry_date(&group->threshold, var, value)) {158ret = -1;159goto done;160}161} else if (!strcmp(key, "maxmerges")) {162group->max_merges = git_config_int(var, value, ctx->kvi);163if (group->max_merges < 0) {164warning(_("%s must be non-negative, using default"), var);165group->max_merges = DEFAULT_PSEUDO_MERGE_MAX_MERGES;166}167} else if (!strcmp(key, "stablethreshold")) {168if (git_config_expiry_date(&group->stable_threshold, var, value)) {169ret = -1;170goto done;171}172} else if (!strcmp(key, "stablesize")) {173group->stable_size = git_config_int(var, value, ctx->kvi);174if (group->stable_size <= 0) {175warning(_("%s must be positive, using default"), var);176group->stable_size = DEFAULT_PSEUDO_MERGE_STABLE_SIZE;177}178}179
180done:181strbuf_release(&buf);182
183return ret;184}
185
186void load_pseudo_merges_from_config(struct repository *r,187struct string_list *list)188{
189struct string_list_item *item;190
191repo_config(r, pseudo_merge_config, list);192
193for_each_string_list_item(item, list) {194struct pseudo_merge_group *group = item->util;195if (!group->pattern)196die(_("pseudo-merge group '%s' missing required pattern"),197item->string);198if (group->threshold < group->stable_threshold)199die(_("pseudo-merge group '%s' has unstable threshold "200"before stable one"), item->string);201}202}
203
204static int find_pseudo_merge_group_for_ref(const char *refname,205const char *referent UNUSED,206const struct object_id *oid,207int flags UNUSED,208void *_data)209{
210struct bitmap_writer *writer = _data;211struct object_id peeled;212struct commit *c;213uint32_t i;214int has_bitmap;215
216if (!peel_iterated_oid(the_repository, oid, &peeled))217oid = &peeled;218
219c = lookup_commit(the_repository, oid);220if (!c)221return 0;222if (!packlist_find(writer->to_pack, oid))223return 0;224
225has_bitmap = bitmap_writer_has_bitmapped_object_id(writer, oid);226
227for (i = 0; i < writer->pseudo_merge_groups.nr; i++) {228struct pseudo_merge_group *group;229struct pseudo_merge_matches *matches;230struct strbuf group_name = STRBUF_INIT;231regmatch_t captures[16];232size_t j;233
234group = writer->pseudo_merge_groups.items[i].util;235if (regexec(group->pattern, refname, ARRAY_SIZE(captures),236captures, 0))237continue;238
239if (captures[ARRAY_SIZE(captures) - 1].rm_so != -1)240warning(_("pseudo-merge regex from config has too many capture "241"groups (max=%"PRIuMAX")"),242(uintmax_t)ARRAY_SIZE(captures) - 2);243
244for (j = !!group->pattern->re_nsub; j < ARRAY_SIZE(captures); j++) {245regmatch_t *match = &captures[j];246if (match->rm_so == -1)247continue;248
249if (group_name.len)250strbuf_addch(&group_name, '-');251
252strbuf_add(&group_name, refname + match->rm_so,253match->rm_eo - match->rm_so);254}255
256matches = strmap_get(&group->matches, group_name.buf);257if (!matches) {258matches = xcalloc(1, sizeof(*matches));259strmap_put(&group->matches, strbuf_detach(&group_name, NULL),260matches);261}262
263if (c->date <= group->stable_threshold) {264ALLOC_GROW(matches->stable, matches->stable_nr + 1,265matches->stable_alloc);266matches->stable[matches->stable_nr++] = c;267} else if (c->date <= group->threshold && !has_bitmap) {268ALLOC_GROW(matches->unstable, matches->unstable_nr + 1,269matches->unstable_alloc);270matches->unstable[matches->unstable_nr++] = c;271}272
273strbuf_release(&group_name);274}275
276return 0;277}
278
279static struct commit *push_pseudo_merge(struct pseudo_merge_group *group)280{
281struct commit *merge;282
283ALLOC_GROW(group->merges, group->merges_nr + 1, group->merges_alloc);284
285merge = alloc_commit_node(the_repository);286merge->object.parsed = 1;287merge->object.flags |= BITMAP_PSEUDO_MERGE;288
289group->merges[group->merges_nr++] = merge;290
291return merge;292}
293
294static struct pseudo_merge_commit_idx *pseudo_merge_idx(kh_oid_map_t *pseudo_merge_commits,295const struct object_id *oid)296
297{
298struct pseudo_merge_commit_idx *pmc;299int hash_ret;300khiter_t hash_pos = kh_put_oid_map(pseudo_merge_commits, *oid,301&hash_ret);302
303if (hash_ret) {304CALLOC_ARRAY(pmc, 1);305kh_value(pseudo_merge_commits, hash_pos) = pmc;306} else {307pmc = kh_value(pseudo_merge_commits, hash_pos);308}309
310return pmc;311}
312
313#define MIN_PSEUDO_MERGE_SIZE 8314
315static void select_pseudo_merges_1(struct bitmap_writer *writer,316struct pseudo_merge_group *group,317struct pseudo_merge_matches *matches)318{
319uint32_t i, j;320uint32_t stable_merges_nr;321
322if (!matches->stable_nr && !matches->unstable_nr)323return; /* all tips in this group already have bitmaps */324
325stable_merges_nr = matches->stable_nr / group->stable_size;326if (matches->stable_nr % group->stable_size)327stable_merges_nr++;328
329/* make stable_merges_nr pseudo merges for stable commits */330for (i = 0, j = 0; i < stable_merges_nr; i++) {331struct commit *merge;332struct commit_list **p;333
334merge = push_pseudo_merge(group);335p = &merge->parents;336
337/*338* For each pseudo-merge created above, add parents to the
339* allocated commit node from the stable set of commits
340* (un-bitmapped, newer than the stable threshold).
341*/
342do {343struct commit *c;344struct pseudo_merge_commit_idx *pmc;345
346if (j >= matches->stable_nr)347break;348
349c = matches->stable[j++];350/*351* Here and below, make sure that we keep our mapping of
352* commits -> pseudo-merge(s) which include the key'd
353* commit up-to-date.
354*/
355pmc = pseudo_merge_idx(writer->pseudo_merge_commits,356&c->object.oid);357
358ALLOC_GROW(pmc->pseudo_merge, pmc->nr + 1, pmc->alloc);359
360pmc->pseudo_merge[pmc->nr++] = writer->pseudo_merges_nr;361p = commit_list_append(c, p);362} while (j % group->stable_size);363
364if (merge->parents) {365bitmap_writer_push_commit(writer, merge, 1);366writer->pseudo_merges_nr++;367}368}369
370/* make up to group->max_merges pseudo merges for unstable commits */371for (i = 0, j = 0; i < group->max_merges; i++) {372struct commit *merge;373struct commit_list **p;374uint32_t size, end;375
376merge = push_pseudo_merge(group);377p = &merge->parents;378
379size = pseudo_merge_group_size(group, matches, i);380end = size < MIN_PSEUDO_MERGE_SIZE ? matches->unstable_nr : j + size;381
382/*383* For each pseudo-merge commit created above, add parents to
384* the allocated commit node from the unstable set of commits
385* (newer than the stable threshold).
386*
387* Account for the sample rate, since not every candidate from
388* the set of stable commits will be included as a pseudo-merge
389* parent.
390*/
391for (; j < end && j < matches->unstable_nr; j++) {392struct commit *c = matches->unstable[j];393struct pseudo_merge_commit_idx *pmc;394
395if (j % (uint32_t)(1.0 / group->sample_rate))396continue;397
398pmc = pseudo_merge_idx(writer->pseudo_merge_commits,399&c->object.oid);400
401ALLOC_GROW(pmc->pseudo_merge, pmc->nr + 1, pmc->alloc);402
403pmc->pseudo_merge[pmc->nr++] = writer->pseudo_merges_nr;404p = commit_list_append(c, p);405}406
407if (merge->parents) {408bitmap_writer_push_commit(writer, merge, 1);409writer->pseudo_merges_nr++; }410if (end >= matches->unstable_nr)411break;412}413}
414
415static int commit_date_cmp(const void *va, const void *vb)416{
417timestamp_t a = (*(const struct commit **)va)->date;418timestamp_t b = (*(const struct commit **)vb)->date;419
420if (a < b)421return -1;422else if (a > b)423return 1;424return 0;425}
426
427static void sort_pseudo_merge_matches(struct pseudo_merge_matches *matches)428{
429QSORT(matches->stable, matches->stable_nr, commit_date_cmp);430QSORT(matches->unstable, matches->unstable_nr, commit_date_cmp);431}
432
433void select_pseudo_merges(struct bitmap_writer *writer)434{
435struct progress *progress = NULL;436uint32_t i;437
438if (!writer->pseudo_merge_groups.nr)439return;440
441if (writer->show_progress)442progress = start_progress("Selecting pseudo-merge commits",443writer->pseudo_merge_groups.nr);444
445refs_for_each_ref(get_main_ref_store(the_repository),446find_pseudo_merge_group_for_ref, writer);447
448for (i = 0; i < writer->pseudo_merge_groups.nr; i++) {449struct pseudo_merge_group *group;450struct hashmap_iter iter;451struct strmap_entry *e;452
453group = writer->pseudo_merge_groups.items[i].util;454strmap_for_each_entry(&group->matches, &iter, e) {455struct pseudo_merge_matches *matches = e->value;456
457sort_pseudo_merge_matches(matches);458
459select_pseudo_merges_1(writer, group, matches);460}461
462display_progress(progress, i + 1);463}464
465stop_progress(&progress);466}
467
468void free_pseudo_merge_map(struct pseudo_merge_map *pm)469{
470uint32_t i;471for (i = 0; i < pm->nr; i++) {472ewah_pool_free(pm->v[i].commits);473ewah_pool_free(pm->v[i].bitmap);474}475free(pm->v);476}
477
478struct pseudo_merge_commit_ext {479uint32_t nr;480const unsigned char *ptr;481};482
483static int pseudo_merge_ext_at(const struct pseudo_merge_map *pm,484struct pseudo_merge_commit_ext *ext, size_t at)485{
486if (at >= pm->map_size)487return error(_("extended pseudo-merge read out-of-bounds "488"(%"PRIuMAX" >= %"PRIuMAX")"),489(uintmax_t)at, (uintmax_t)pm->map_size);490if (at + 4 >= pm->map_size)491return error(_("extended pseudo-merge entry is too short "492"(%"PRIuMAX" >= %"PRIuMAX")"),493(uintmax_t)(at + 4), (uintmax_t)pm->map_size);494
495ext->nr = get_be32(pm->map + at);496ext->ptr = pm->map + at + sizeof(uint32_t);497
498return 0;499}
500
501struct ewah_bitmap *pseudo_merge_bitmap(const struct pseudo_merge_map *pm,502struct pseudo_merge *merge)503{
504if (!merge->loaded_commits)505BUG("cannot use unloaded pseudo-merge bitmap");506
507if (!merge->loaded_bitmap) {508size_t at = merge->bitmap_at;509
510merge->bitmap = read_bitmap(pm->map, pm->map_size, &at);511merge->loaded_bitmap = 1;512}513
514return merge->bitmap;515}
516
517struct pseudo_merge *use_pseudo_merge(const struct pseudo_merge_map *pm,518struct pseudo_merge *merge)519{
520if (!merge->loaded_commits) {521size_t pos = merge->at;522
523merge->commits = read_bitmap(pm->map, pm->map_size, &pos);524merge->bitmap_at = pos;525merge->loaded_commits = 1;526}527return merge;528}
529
530static struct pseudo_merge *pseudo_merge_at(const struct pseudo_merge_map *pm,531struct object_id *oid,532size_t want)533{
534size_t lo = 0;535size_t hi = pm->nr;536
537while (lo < hi) {538size_t mi = lo + (hi - lo) / 2;539size_t got = pm->v[mi].at;540
541if (got == want)542return use_pseudo_merge(pm, &pm->v[mi]);543else if (got < want)544hi = mi;545else546lo = mi + 1;547}548
549warning(_("could not find pseudo-merge for commit %s at offset %"PRIuMAX),550oid_to_hex(oid), (uintmax_t)want);551
552return NULL;553}
554
555struct pseudo_merge_commit {556uint32_t commit_pos;557uint64_t pseudo_merge_ofs;558};559
560#define PSEUDO_MERGE_COMMIT_RAWSZ (sizeof(uint32_t)+sizeof(uint64_t))561
562static void read_pseudo_merge_commit_at(struct pseudo_merge_commit *merge,563const unsigned char *at)564{
565merge->commit_pos = get_be32(at);566merge->pseudo_merge_ofs = get_be64(at + sizeof(uint32_t));567}
568
569static int nth_pseudo_merge_ext(const struct pseudo_merge_map *pm,570struct pseudo_merge_commit_ext *ext,571struct pseudo_merge_commit *merge,572uint32_t n)573{
574size_t ofs;575
576if (n >= ext->nr)577return error(_("extended pseudo-merge lookup out-of-bounds "578"(%"PRIu32" >= %"PRIu32")"), n, ext->nr);579
580ofs = get_be64(ext->ptr + st_mult(n, sizeof(uint64_t)));581if (ofs >= pm->map_size)582return error(_("out-of-bounds read: (%"PRIuMAX" >= %"PRIuMAX")"),583(uintmax_t)ofs, (uintmax_t)pm->map_size);584
585read_pseudo_merge_commit_at(merge, pm->map + ofs);586
587return 0;588}
589
590static unsigned apply_pseudo_merge(const struct pseudo_merge_map *pm,591struct pseudo_merge *merge,592struct bitmap *result,593struct bitmap *roots)594{
595if (merge->satisfied)596return 0;597
598if (!ewah_bitmap_is_subset(merge->commits, roots ? roots : result))599return 0;600
601bitmap_or_ewah(result, pseudo_merge_bitmap(pm, merge));602if (roots)603bitmap_or_ewah(roots, pseudo_merge_bitmap(pm, merge));604merge->satisfied = 1;605
606return 1;607}
608
609static int pseudo_merge_commit_cmp(const void *va, const void *vb)610{
611struct pseudo_merge_commit merge;612uint32_t key = *(uint32_t*)va;613
614read_pseudo_merge_commit_at(&merge, vb);615
616if (key < merge.commit_pos)617return -1;618if (key > merge.commit_pos)619return 1;620return 0;621}
622
623static struct pseudo_merge_commit *find_pseudo_merge(const struct pseudo_merge_map *pm,624uint32_t pos)625{
626if (!pm->commits_nr)627return NULL;628
629return bsearch(&pos, pm->commits, pm->commits_nr,630PSEUDO_MERGE_COMMIT_RAWSZ, pseudo_merge_commit_cmp);631}
632
633int apply_pseudo_merges_for_commit(const struct pseudo_merge_map *pm,634struct bitmap *result,635struct commit *commit, uint32_t commit_pos)636{
637struct pseudo_merge *merge;638struct pseudo_merge_commit *merge_commit;639int ret = 0;640
641merge_commit = find_pseudo_merge(pm, commit_pos);642if (!merge_commit)643return 0;644
645if (merge_commit->pseudo_merge_ofs & ((uint64_t)1<<63)) {646struct pseudo_merge_commit_ext ext = { 0 };647off_t ofs = merge_commit->pseudo_merge_ofs & ~((uint64_t)1<<63);648uint32_t i;649
650if (pseudo_merge_ext_at(pm, &ext, ofs) < -1) {651warning(_("could not read extended pseudo-merge table "652"for commit %s"),653oid_to_hex(&commit->object.oid));654return ret;655}656
657for (i = 0; i < ext.nr; i++) {658if (nth_pseudo_merge_ext(pm, &ext, merge_commit, i) < 0)659return ret;660
661merge = pseudo_merge_at(pm, &commit->object.oid,662merge_commit->pseudo_merge_ofs);663
664if (!merge)665return ret;666
667if (apply_pseudo_merge(pm, merge, result, NULL))668ret++;669}670} else {671merge = pseudo_merge_at(pm, &commit->object.oid,672merge_commit->pseudo_merge_ofs);673
674if (!merge)675return ret;676
677if (apply_pseudo_merge(pm, merge, result, NULL))678ret++;679}680
681if (ret)682cascade_pseudo_merges(pm, result, NULL);683
684return ret;685}
686
687int cascade_pseudo_merges(const struct pseudo_merge_map *pm,688struct bitmap *result,689struct bitmap *roots)690{
691unsigned any_satisfied;692int ret = 0;693
694do {695struct pseudo_merge *merge;696uint32_t i;697
698any_satisfied = 0;699
700for (i = 0; i < pm->nr; i++) {701merge = use_pseudo_merge(pm, &pm->v[i]);702if (apply_pseudo_merge(pm, merge, result, roots)) {703any_satisfied |= 1;704ret++;705}706}707} while (any_satisfied);708
709return ret;710}
711
712struct pseudo_merge *pseudo_merge_for_parents(const struct pseudo_merge_map *pm,713struct bitmap *parents)714{
715struct pseudo_merge *match = NULL;716size_t i;717
718if (!pm->nr)719return NULL;720
721/*722* NOTE: this loop is quadratic in the worst-case (where no
723* matching pseudo-merge bitmaps are found), but in practice
724* this is OK for a few reasons:
725*
726* - Rejecting pseudo-merge bitmaps that do not match the
727* given commit is done quickly (i.e. `bitmap_equals_ewah()`
728* returns early when we know the two bitmaps aren't equal.
729*
730* - Already matched pseudo-merge bitmaps (which we track with
731* the `->satisfied` bit here) are skipped as potential
732* candidates.
733*
734* - The number of pseudo-merges should be small (in the
735* hundreds for most repositories).
736*
737* If in the future this semi-quadratic behavior does become a
738* problem, another approach would be to keep track of which
739* pseudo-merges are still "viable" after enumerating the
740* pseudo-merge commit's parents:
741*
742* - A pseudo-merge bitmap becomes non-viable when the bit(s)
743* corresponding to one or more parent(s) of the given
744* commit are not set in a candidate pseudo-merge's commits
745* bitmap.
746*
747* - After processing all bits, enumerate the remaining set of
748* viable pseudo-merge bitmaps, and check that their
749* popcount() matches the number of parents in the given
750* commit.
751*/
752for (i = 0; i < pm->nr; i++) {753struct pseudo_merge *candidate = use_pseudo_merge(pm, &pm->v[i]);754if (!candidate || candidate->satisfied)755continue;756if (!bitmap_equals_ewah(parents, candidate->commits))757continue;758
759match = candidate;760match->satisfied = 1;761break;762}763
764return match;765}
766