git
/
diffcore-break.c
317 строк · 9.4 Кб
1/*
2* Copyright (C) 2005 Junio C Hamano
3*/
4
5#define USE_THE_REPOSITORY_VARIABLE6
7#include "git-compat-util.h"8#include "diffcore.h"9#include "hash.h"10#include "object.h"11#include "promisor-remote.h"12
13static int should_break(struct repository *r,14struct diff_filespec *src,15struct diff_filespec *dst,16int break_score,17int *merge_score_p)18{
19/* dst is recorded as a modification of src. Are they so20* different that we are better off recording this as a pair
21* of delete and create?
22*
23* There are two criteria used in this algorithm. For the
24* purposes of helping later rename/copy, we take both delete
25* and insert into account and estimate the amount of "edit".
26* If the edit is very large, we break this pair so that
27* rename/copy can pick the pieces up to match with other
28* files.
29*
30* On the other hand, we would want to ignore inserts for the
31* pure "complete rewrite" detection. As long as most of the
32* existing contents were removed from the file, it is a
33* complete rewrite, and if sizable chunk from the original
34* still remains in the result, it is not a rewrite. It does
35* not matter how much or how little new material is added to
36* the file.
37*
38* The score we leave for such a broken filepair uses the
39* latter definition so that later clean-up stage can find the
40* pieces that should not have been broken according to the
41* latter definition after rename/copy runs, and merge the
42* broken pair that have a score lower than given criteria
43* back together. The break operation itself happens
44* according to the former definition.
45*
46* The minimum_edit parameter tells us when to break (the
47* amount of "edit" required for us to consider breaking the
48* pair). We leave the amount of deletion in *merge_score_p
49* when we return.
50*
51* The value we return is 1 if we want the pair to be broken,
52* or 0 if we do not.
53*/
54unsigned long delta_size, max_size;55unsigned long src_copied, literal_added, src_removed;56
57struct diff_populate_filespec_options options = { 0 };58
59*merge_score_p = 0; /* assume no deletion --- "do not break"60* is the default.
61*/
62
63if (S_ISREG(src->mode) != S_ISREG(dst->mode)) {64*merge_score_p = (int)MAX_SCORE;65return 1; /* even their types are different */66}67
68if (src->oid_valid && dst->oid_valid &&69oideq(&src->oid, &dst->oid))70return 0; /* they are the same */71
72if (r == the_repository && repo_has_promisor_remote(the_repository)) {73options.missing_object_cb = diff_queued_diff_prefetch;74options.missing_object_data = r;75}76
77if (diff_populate_filespec(r, src, &options) ||78diff_populate_filespec(r, dst, &options))79return 0; /* error but caught downstream */80
81max_size = ((src->size > dst->size) ? src->size : dst->size);82if (max_size < MINIMUM_BREAK_SIZE)83return 0; /* we do not break too small filepair */84
85if (!src->size)86return 0; /* we do not let empty files get renamed */87
88if (diffcore_count_changes(r, src, dst,89&src->cnt_data, &dst->cnt_data,90&src_copied, &literal_added))91return 0;92
93/* sanity */94if (src->size < src_copied)95src_copied = src->size;96if (dst->size < literal_added + src_copied) {97if (src_copied < dst->size)98literal_added = dst->size - src_copied;99else100literal_added = 0;101}102src_removed = src->size - src_copied;103
104/* Compute merge-score, which is "how much is removed105* from the source material". The clean-up stage will
106* merge the surviving pair together if the score is
107* less than the minimum, after rename/copy runs.
108*/
109*merge_score_p = (int)(src_removed * MAX_SCORE / src->size);110if (*merge_score_p > break_score)111return 1;112
113/* Extent of damage, which counts both inserts and114* deletes.
115*/
116delta_size = src_removed + literal_added;117if (delta_size * MAX_SCORE / max_size < break_score)118return 0;119
120/* If you removed a lot without adding new material, that is121* not really a rewrite.
122*/
123if ((src->size * break_score < src_removed * MAX_SCORE) &&124(literal_added * 20 < src_removed) &&125(literal_added * 20 < src_copied))126return 0;127
128return 1;129}
130
131void diffcore_break(struct repository *r, int break_score)132{
133struct diff_queue_struct *q = &diff_queued_diff;134struct diff_queue_struct outq;135
136/* When the filepair has this much edit (insert and delete),137* it is first considered to be a rewrite and broken into a
138* create and delete filepair. This is to help breaking a
139* file that had too much new stuff added, possibly from
140* moving contents from another file, so that rename/copy can
141* match it with the other file.
142*
143* int break_score; we reuse incoming parameter for this.
144*/
145
146/* After a pair is broken according to break_score and147* subjected to rename/copy, both of them may survive intact,
148* due to lack of suitable rename/copy peer. Or, the caller
149* may be calling us without using rename/copy. When that
150* happens, we merge the broken pieces back into one
151* modification together if the pair did not have more than
152* this much delete. For this computation, we do not take
153* insert into account at all. If you start from a 100-line
154* file and delete 97 lines of it, it does not matter if you
155* add 27 lines to it to make a new 30-line file or if you add
156* 997 lines to it to make a 1000-line file. Either way what
157* you did was a rewrite of 97%. On the other hand, if you
158* delete 3 lines, keeping 97 lines intact, it does not matter
159* if you add 3 lines to it to make a new 100-line file or if
160* you add 903 lines to it to make a new 1000-line file.
161* Either way you did a lot of additions and not a rewrite.
162* This merge happens to catch the latter case. A merge_score
163* of 80% would be a good default value (a broken pair that
164* has score lower than merge_score will be merged back
165* together).
166*/
167int merge_score;168int i;169
170/* See comment on DEFAULT_BREAK_SCORE and171* DEFAULT_MERGE_SCORE in diffcore.h
172*/
173merge_score = (break_score >> 16) & 0xFFFF;174break_score = (break_score & 0xFFFF);175
176if (!break_score)177break_score = DEFAULT_BREAK_SCORE;178if (!merge_score)179merge_score = DEFAULT_MERGE_SCORE;180
181DIFF_QUEUE_CLEAR(&outq);182
183for (i = 0; i < q->nr; i++) {184struct diff_filepair *p = q->queue[i];185int score;186
187/*188* We deal only with in-place edit of blobs.
189* We do not break anything else.
190*/
191if (DIFF_FILE_VALID(p->one) && DIFF_FILE_VALID(p->two) &&192object_type(p->one->mode) == OBJ_BLOB &&193object_type(p->two->mode) == OBJ_BLOB &&194!strcmp(p->one->path, p->two->path)) {195if (should_break(r, p->one, p->two,196break_score, &score)) {197/* Split this into delete and create */198struct diff_filespec *null_one, *null_two;199struct diff_filepair *dp;200
201/* Set score to 0 for the pair that202* needs to be merged back together
203* should they survive rename/copy.
204* Also we do not want to break very
205* small files.
206*/
207if (score < merge_score)208score = 0;209
210/* deletion of one */211null_one = alloc_filespec(p->one->path);212dp = diff_queue(&outq, p->one, null_one);213dp->score = score;214dp->broken_pair = 1;215
216/* creation of two */217null_two = alloc_filespec(p->two->path);218dp = diff_queue(&outq, null_two, p->two);219dp->score = score;220dp->broken_pair = 1;221
222diff_free_filespec_blob(p->one);223diff_free_filespec_blob(p->two);224free(p); /* not diff_free_filepair(), we are225* reusing one and two here.
226*/
227continue;228}229}230diff_free_filespec_data(p->one);231diff_free_filespec_data(p->two);232diff_q(&outq, p);233}234free(q->queue);235*q = outq;236
237return;238}
239
240static void merge_broken(struct diff_filepair *p,241struct diff_filepair *pp,242struct diff_queue_struct *outq)243{
244/* p and pp are broken pairs we want to merge */245struct diff_filepair *c = p, *d = pp, *dp;246if (DIFF_FILE_VALID(p->one)) {247/* this must be a delete half */248d = p; c = pp;249}250/* Sanity check */251if (!DIFF_FILE_VALID(d->one))252die("internal error in merge #1");253if (DIFF_FILE_VALID(d->two))254die("internal error in merge #2");255if (DIFF_FILE_VALID(c->one))256die("internal error in merge #3");257if (!DIFF_FILE_VALID(c->two))258die("internal error in merge #4");259
260dp = diff_queue(outq, d->one, c->two);261dp->score = p->score;262/*263* We will be one extra user of the same src side of the
264* broken pair, if it was used as the rename source for other
265* paths elsewhere. Increment to mark that the path stays
266* in the resulting tree.
267*/
268d->one->rename_used++;269diff_free_filespec_data(d->two);270diff_free_filespec_data(c->one);271free(d);272free(c);273}
274
275void diffcore_merge_broken(void)276{
277struct diff_queue_struct *q = &diff_queued_diff;278struct diff_queue_struct outq;279int i, j;280
281DIFF_QUEUE_CLEAR(&outq);282
283for (i = 0; i < q->nr; i++) {284struct diff_filepair *p = q->queue[i];285if (!p)286/* we already merged this with its peer */287continue;288else if (p->broken_pair &&289!strcmp(p->one->path, p->two->path)) {290/* If the peer also survived rename/copy, then291* we merge them back together.
292*/
293for (j = i + 1; j < q->nr; j++) {294struct diff_filepair *pp = q->queue[j];295if (pp->broken_pair &&296!strcmp(pp->one->path, pp->two->path) &&297!strcmp(p->one->path, pp->two->path)) {298/* Peer survived. Merge them */299merge_broken(p, pp, &outq);300q->queue[j] = NULL;301goto next;302}303}304/* The peer did not survive, so we keep305* it in the output.
306*/
307diff_q(&outq, p);308}309else310diff_q(&outq, p);311next:;312}313free(q->queue);314*q = outq;315
316return;317}
318