git
/
list-objects-filter.c
825 строк · 21.8 Кб
1#define USE_THE_REPOSITORY_VARIABLE2
3#include "git-compat-util.h"4#include "dir.h"5#include "gettext.h"6#include "hex.h"7#include "commit.h"8#include "diff.h"9#include "revision.h"10#include "list-objects-filter.h"11#include "list-objects-filter-options.h"12#include "oidmap.h"13#include "oidset.h"14#include "object-name.h"15#include "object-store-ll.h"16
17/* Remember to update object flag allocation in object.h */
18/*
19* FILTER_SHOWN_BUT_REVISIT -- we set this bit on tree objects
20* that have been shown, but should be revisited if they appear
21* in the traversal (until we mark it SEEN). This is a way to
22* let us silently de-dup calls to show() in the caller. This
23* is subtly different from the "revision.h:SHOWN" and the
24* "object-name.c:ONELINE_SEEN" bits. And also different from
25* the non-de-dup usage in pack-bitmap.c
26*/
27#define FILTER_SHOWN_BUT_REVISIT (1<<21)28
29struct subfilter {30struct filter *filter;31struct oidset seen;32struct oidset omits;33struct object_id skip_tree;34unsigned is_skipping_tree : 1;35};36
37struct filter {38enum list_objects_filter_result (*filter_object_fn)(39struct repository *r,40enum list_objects_filter_situation filter_situation,41struct object *obj,42const char *pathname,43const char *filename,44struct oidset *omits,45void *filter_data);46
47/*48* Optional. If this function is supplied and the filter needs
49* to collect omits, then this function is called once before
50* free_fn is called.
51*
52* This is required because the following two conditions hold:
53*
54* a. A tree filter can add and remove objects as an object
55* graph is traversed.
56* b. A combine filter's omit set is the union of all its
57* subfilters, which may include tree: filters.
58*
59* As such, the omits sets must be separate sets, and can only
60* be unioned after the traversal is completed.
61*/
62void (*finalize_omits_fn)(struct oidset *omits, void *filter_data);63
64void (*free_fn)(void *filter_data);65
66void *filter_data;67
68/* If non-NULL, the filter collects a list of the omitted OIDs here. */69struct oidset *omits;70};71
72static enum list_objects_filter_result filter_blobs_none(73struct repository *r UNUSED,74enum list_objects_filter_situation filter_situation,75struct object *obj,76const char *pathname UNUSED,77const char *filename UNUSED,78struct oidset *omits,79void *filter_data_ UNUSED)80{
81switch (filter_situation) {82default:83BUG("unknown filter_situation: %d", filter_situation);84
85case LOFS_TAG:86assert(obj->type == OBJ_TAG);87/* always include all tag objects */88return LOFR_MARK_SEEN | LOFR_DO_SHOW;89
90case LOFS_COMMIT:91assert(obj->type == OBJ_COMMIT);92/* always include all commit objects */93return LOFR_MARK_SEEN | LOFR_DO_SHOW;94
95case LOFS_BEGIN_TREE:96assert(obj->type == OBJ_TREE);97/* always include all tree objects */98return LOFR_MARK_SEEN | LOFR_DO_SHOW;99
100case LOFS_END_TREE:101assert(obj->type == OBJ_TREE);102return LOFR_ZERO;103
104case LOFS_BLOB:105assert(obj->type == OBJ_BLOB);106assert((obj->flags & SEEN) == 0);107
108if (omits)109oidset_insert(omits, &obj->oid);110return LOFR_MARK_SEEN; /* but not LOFR_DO_SHOW (hard omit) */111}112}
113
114static void filter_blobs_none__init(115struct list_objects_filter_options *filter_options UNUSED,116struct filter *filter)117{
118filter->filter_object_fn = filter_blobs_none;119filter->free_fn = free;120}
121
122/*
123* A filter for list-objects to omit ALL trees and blobs from the traversal.
124* Can OPTIONALLY collect a list of the omitted OIDs.
125*/
126struct filter_trees_depth_data {127/*128* Maps trees to the minimum depth at which they were seen. It is not
129* necessary to re-traverse a tree at deeper or equal depths than it has
130* already been traversed.
131*
132* We can't use LOFR_MARK_SEEN for tree objects since this will prevent
133* it from being traversed at shallower depths.
134*/
135struct oidmap seen_at_depth;136
137unsigned long exclude_depth;138unsigned long current_depth;139};140
141struct seen_map_entry {142struct oidmap_entry base;143size_t depth;144};145
146/* Returns 1 if the oid was in the omits set before it was invoked. */
147static int filter_trees_update_omits(148struct object *obj,149struct oidset *omits,150int include_it)151{
152if (!omits)153return 0;154
155if (include_it)156return oidset_remove(omits, &obj->oid);157else158return oidset_insert(omits, &obj->oid);159}
160
161static enum list_objects_filter_result filter_trees_depth(162struct repository *r UNUSED,163enum list_objects_filter_situation filter_situation,164struct object *obj,165const char *pathname UNUSED,166const char *filename UNUSED,167struct oidset *omits,168void *filter_data_)169{
170struct filter_trees_depth_data *filter_data = filter_data_;171struct seen_map_entry *seen_info;172int include_it = filter_data->current_depth <173filter_data->exclude_depth;174int filter_res;175int already_seen;176
177/*178* Note that we do not use _MARK_SEEN in order to allow re-traversal in
179* case we encounter a tree or blob again at a shallower depth.
180*/
181
182switch (filter_situation) {183default:184BUG("unknown filter_situation: %d", filter_situation);185
186case LOFS_TAG:187assert(obj->type == OBJ_TAG);188/* always include all tag objects */189return LOFR_MARK_SEEN | LOFR_DO_SHOW;190
191case LOFS_COMMIT:192assert(obj->type == OBJ_COMMIT);193/* always include all commit objects */194return LOFR_MARK_SEEN | LOFR_DO_SHOW;195
196case LOFS_END_TREE:197assert(obj->type == OBJ_TREE);198filter_data->current_depth--;199return LOFR_ZERO;200
201case LOFS_BLOB:202filter_trees_update_omits(obj, omits, include_it);203return include_it ? LOFR_MARK_SEEN | LOFR_DO_SHOW : LOFR_ZERO;204
205case LOFS_BEGIN_TREE:206seen_info = oidmap_get(207&filter_data->seen_at_depth, &obj->oid);208if (!seen_info) {209CALLOC_ARRAY(seen_info, 1);210oidcpy(&seen_info->base.oid, &obj->oid);211seen_info->depth = filter_data->current_depth;212oidmap_put(&filter_data->seen_at_depth, seen_info);213already_seen = 0;214} else {215already_seen =216filter_data->current_depth >= seen_info->depth;217}218
219if (already_seen) {220filter_res = LOFR_SKIP_TREE;221} else {222int been_omitted = filter_trees_update_omits(223obj, omits, include_it);224seen_info->depth = filter_data->current_depth;225
226if (include_it)227filter_res = LOFR_DO_SHOW;228else if (omits && !been_omitted)229/*230* Must update omit information of children
231* recursively; they have not been omitted yet.
232*/
233filter_res = LOFR_ZERO;234else235filter_res = LOFR_SKIP_TREE;236}237
238filter_data->current_depth++;239return filter_res;240}241}
242
243static void filter_trees_free(void *filter_data) {244struct filter_trees_depth_data *d = filter_data;245if (!d)246return;247oidmap_free(&d->seen_at_depth, 1);248free(d);249}
250
251static void filter_trees_depth__init(252struct list_objects_filter_options *filter_options,253struct filter *filter)254{
255struct filter_trees_depth_data *d = xcalloc(1, sizeof(*d));256oidmap_init(&d->seen_at_depth, 0);257d->exclude_depth = filter_options->tree_exclude_depth;258d->current_depth = 0;259
260filter->filter_data = d;261filter->filter_object_fn = filter_trees_depth;262filter->free_fn = filter_trees_free;263}
264
265/*
266* A filter for list-objects to omit large blobs.
267* And to OPTIONALLY collect a list of the omitted OIDs.
268*/
269struct filter_blobs_limit_data {270unsigned long max_bytes;271};272
273static enum list_objects_filter_result filter_blobs_limit(274struct repository *r,275enum list_objects_filter_situation filter_situation,276struct object *obj,277const char *pathname UNUSED,278const char *filename UNUSED,279struct oidset *omits,280void *filter_data_)281{
282struct filter_blobs_limit_data *filter_data = filter_data_;283unsigned long object_length;284enum object_type t;285
286switch (filter_situation) {287default:288BUG("unknown filter_situation: %d", filter_situation);289
290case LOFS_TAG:291assert(obj->type == OBJ_TAG);292/* always include all tag objects */293return LOFR_MARK_SEEN | LOFR_DO_SHOW;294
295case LOFS_COMMIT:296assert(obj->type == OBJ_COMMIT);297/* always include all commit objects */298return LOFR_MARK_SEEN | LOFR_DO_SHOW;299
300case LOFS_BEGIN_TREE:301assert(obj->type == OBJ_TREE);302/* always include all tree objects */303return LOFR_MARK_SEEN | LOFR_DO_SHOW;304
305case LOFS_END_TREE:306assert(obj->type == OBJ_TREE);307return LOFR_ZERO;308
309case LOFS_BLOB:310assert(obj->type == OBJ_BLOB);311assert((obj->flags & SEEN) == 0);312
313t = oid_object_info(r, &obj->oid, &object_length);314if (t != OBJ_BLOB) { /* probably OBJ_NONE */315/*316* We DO NOT have the blob locally, so we cannot
317* apply the size filter criteria. Be conservative
318* and force show it (and let the caller deal with
319* the ambiguity).
320*/
321goto include_it;322}323
324if (object_length < filter_data->max_bytes)325goto include_it;326
327if (omits)328oidset_insert(omits, &obj->oid);329return LOFR_MARK_SEEN; /* but not LOFR_DO_SHOW (hard omit) */330}331
332include_it:333if (omits)334oidset_remove(omits, &obj->oid);335return LOFR_MARK_SEEN | LOFR_DO_SHOW;336}
337
338static void filter_blobs_limit__init(339struct list_objects_filter_options *filter_options,340struct filter *filter)341{
342struct filter_blobs_limit_data *d = xcalloc(1, sizeof(*d));343d->max_bytes = filter_options->blob_limit_value;344
345filter->filter_data = d;346filter->filter_object_fn = filter_blobs_limit;347filter->free_fn = free;348}
349
350/*
351* A filter driven by a sparse-checkout specification to only
352* include blobs that a sparse checkout would populate.
353*
354* The sparse-checkout spec can be loaded from a blob with the
355* given OID or from a local pathname. We allow an OID because
356* the repo may be bare or we may be doing the filtering on the
357* server.
358*/
359struct frame {360/*361* default_match is the usual default include/exclude value that
362* should be inherited as we recurse into directories based
363* upon pattern matching of the directory itself or of a
364* containing directory.
365*/
366enum pattern_match_result default_match;367
368/*369* 1 if the directory (recursively) contains any provisionally
370* omitted objects.
371*
372* 0 if everything (recursively) contained in this directory
373* has been explicitly included (SHOWN) in the result and
374* the directory may be short-cut later in the traversal.
375*/
376unsigned child_prov_omit : 1;377};378
379struct filter_sparse_data {380struct pattern_list pl;381
382size_t nr, alloc;383struct frame *array_frame;384};385
386static enum list_objects_filter_result filter_sparse(387struct repository *r,388enum list_objects_filter_situation filter_situation,389struct object *obj,390const char *pathname,391const char *filename,392struct oidset *omits,393void *filter_data_)394{
395struct filter_sparse_data *filter_data = filter_data_;396int dtype;397struct frame *frame;398enum pattern_match_result match;399
400switch (filter_situation) {401default:402BUG("unknown filter_situation: %d", filter_situation);403
404case LOFS_TAG:405assert(obj->type == OBJ_TAG);406/* always include all tag objects */407return LOFR_MARK_SEEN | LOFR_DO_SHOW;408
409case LOFS_COMMIT:410assert(obj->type == OBJ_COMMIT);411/* always include all commit objects */412return LOFR_MARK_SEEN | LOFR_DO_SHOW;413
414case LOFS_BEGIN_TREE:415assert(obj->type == OBJ_TREE);416dtype = DT_DIR;417match = path_matches_pattern_list(pathname, strlen(pathname),418filename, &dtype, &filter_data->pl,419r->index);420if (match == UNDECIDED)421match = filter_data->array_frame[filter_data->nr - 1].default_match;422
423ALLOC_GROW(filter_data->array_frame, filter_data->nr + 1,424filter_data->alloc);425filter_data->array_frame[filter_data->nr].default_match = match;426filter_data->array_frame[filter_data->nr].child_prov_omit = 0;427filter_data->nr++;428
429/*430* A directory with this tree OID may appear in multiple
431* places in the tree. (Think of a directory move or copy,
432* with no other changes, so the OID is the same, but the
433* full pathnames of objects within this directory are new
434* and may match is_excluded() patterns differently.)
435* So we cannot mark this directory as SEEN (yet), since
436* that will prevent process_tree() from revisiting this
437* tree object with other pathname prefixes.
438*
439* Only _DO_SHOW the tree object the first time we visit
440* this tree object.
441*
442* We always show all tree objects. A future optimization
443* may want to attempt to narrow this.
444*/
445if (obj->flags & FILTER_SHOWN_BUT_REVISIT)446return LOFR_ZERO;447obj->flags |= FILTER_SHOWN_BUT_REVISIT;448return LOFR_DO_SHOW;449
450case LOFS_END_TREE:451assert(obj->type == OBJ_TREE);452assert(filter_data->nr > 1);453
454frame = &filter_data->array_frame[--filter_data->nr];455
456/*457* Tell our parent directory if any of our children were
458* provisionally omitted.
459*/
460filter_data->array_frame[filter_data->nr - 1].child_prov_omit |=461frame->child_prov_omit;462
463/*464* If there are NO provisionally omitted child objects (ALL child
465* objects in this folder were INCLUDED), then we can mark the
466* folder as SEEN (so we will not have to revisit it again).
467*/
468if (!frame->child_prov_omit)469return LOFR_MARK_SEEN;470return LOFR_ZERO;471
472case LOFS_BLOB:473assert(obj->type == OBJ_BLOB);474assert((obj->flags & SEEN) == 0);475
476frame = &filter_data->array_frame[filter_data->nr - 1];477
478dtype = DT_REG;479match = path_matches_pattern_list(pathname, strlen(pathname),480filename, &dtype, &filter_data->pl,481r->index);482if (match == UNDECIDED)483match = frame->default_match;484if (match == MATCHED) {485if (omits)486oidset_remove(omits, &obj->oid);487return LOFR_MARK_SEEN | LOFR_DO_SHOW;488}489
490/*491* Provisionally omit it. We've already established that
492* this pathname is not in the sparse-checkout specification
493* with the CURRENT pathname, so we *WANT* to omit this blob.
494*
495* However, a pathname elsewhere in the tree may also
496* reference this same blob, so we cannot reject it yet.
497* Leave the LOFR_ bits unset so that if the blob appears
498* again in the traversal, we will be asked again.
499*/
500if (omits)501oidset_insert(omits, &obj->oid);502
503/*504* Remember that at least 1 blob in this tree was
505* provisionally omitted. This prevents us from short
506* cutting the tree in future iterations.
507*/
508frame->child_prov_omit = 1;509return LOFR_ZERO;510}511}
512
513
514static void filter_sparse_free(void *filter_data)515{
516struct filter_sparse_data *d = filter_data;517clear_pattern_list(&d->pl);518free(d->array_frame);519free(d);520}
521
522static void filter_sparse_oid__init(523struct list_objects_filter_options *filter_options,524struct filter *filter)525{
526struct filter_sparse_data *d = xcalloc(1, sizeof(*d));527struct object_context oc;528struct object_id sparse_oid;529
530if (get_oid_with_context(the_repository,531filter_options->sparse_oid_name,532GET_OID_BLOB, &sparse_oid, &oc))533die(_("unable to access sparse blob in '%s'"),534filter_options->sparse_oid_name);535if (add_patterns_from_blob_to_list(&sparse_oid, "", 0, &d->pl) < 0)536die(_("unable to parse sparse filter data in %s"),537oid_to_hex(&sparse_oid));538
539ALLOC_GROW(d->array_frame, d->nr + 1, d->alloc);540d->array_frame[d->nr].default_match = 0; /* default to include */541d->array_frame[d->nr].child_prov_omit = 0;542d->nr++;543
544filter->filter_data = d;545filter->filter_object_fn = filter_sparse;546filter->free_fn = filter_sparse_free;547
548object_context_release(&oc);549}
550
551/*
552* A filter for list-objects to omit large blobs.
553* And to OPTIONALLY collect a list of the omitted OIDs.
554*/
555struct filter_object_type_data {556enum object_type object_type;557};558
559static enum list_objects_filter_result filter_object_type(560struct repository *r UNUSED,561enum list_objects_filter_situation filter_situation,562struct object *obj,563const char *pathname UNUSED,564const char *filename UNUSED,565struct oidset *omits UNUSED,566void *filter_data_)567{
568struct filter_object_type_data *filter_data = filter_data_;569
570switch (filter_situation) {571default:572BUG("unknown filter_situation: %d", filter_situation);573
574case LOFS_TAG:575assert(obj->type == OBJ_TAG);576if (filter_data->object_type == OBJ_TAG)577return LOFR_MARK_SEEN | LOFR_DO_SHOW;578return LOFR_MARK_SEEN;579
580case LOFS_COMMIT:581assert(obj->type == OBJ_COMMIT);582if (filter_data->object_type == OBJ_COMMIT)583return LOFR_MARK_SEEN | LOFR_DO_SHOW;584return LOFR_MARK_SEEN;585
586case LOFS_BEGIN_TREE:587assert(obj->type == OBJ_TREE);588
589/*590* If we only want to show commits or tags, then there is no
591* need to walk down trees.
592*/
593if (filter_data->object_type == OBJ_COMMIT ||594filter_data->object_type == OBJ_TAG)595return LOFR_SKIP_TREE;596
597if (filter_data->object_type == OBJ_TREE)598return LOFR_MARK_SEEN | LOFR_DO_SHOW;599
600return LOFR_MARK_SEEN;601
602case LOFS_BLOB:603assert(obj->type == OBJ_BLOB);604
605if (filter_data->object_type == OBJ_BLOB)606return LOFR_MARK_SEEN | LOFR_DO_SHOW;607return LOFR_MARK_SEEN;608
609case LOFS_END_TREE:610return LOFR_ZERO;611}612}
613
614static void filter_object_type__init(615struct list_objects_filter_options *filter_options,616struct filter *filter)617{
618struct filter_object_type_data *d = xcalloc(1, sizeof(*d));619d->object_type = filter_options->object_type;620
621filter->filter_data = d;622filter->filter_object_fn = filter_object_type;623filter->free_fn = free;624}
625
626/* A filter which only shows objects shown by all sub-filters. */
627struct combine_filter_data {628struct subfilter *sub;629size_t nr;630};631
632static enum list_objects_filter_result process_subfilter(633struct repository *r,634enum list_objects_filter_situation filter_situation,635struct object *obj,636const char *pathname,637const char *filename,638struct subfilter *sub)639{
640enum list_objects_filter_result result;641
642/*643* Check and update is_skipping_tree before oidset_contains so
644* that is_skipping_tree gets unset even when the object is
645* marked as seen. As of this writing, no filter uses
646* LOFR_MARK_SEEN on trees that also uses LOFR_SKIP_TREE, so the
647* ordering is only theoretically important. Be cautious if you
648* change the order of the below checks and more filters have
649* been added!
650*/
651if (sub->is_skipping_tree) {652if (filter_situation == LOFS_END_TREE &&653oideq(&obj->oid, &sub->skip_tree))654sub->is_skipping_tree = 0;655else656return LOFR_ZERO;657}658if (oidset_contains(&sub->seen, &obj->oid))659return LOFR_ZERO;660
661result = list_objects_filter__filter_object(662r, filter_situation, obj, pathname, filename, sub->filter);663
664if (result & LOFR_MARK_SEEN)665oidset_insert(&sub->seen, &obj->oid);666
667if (result & LOFR_SKIP_TREE) {668sub->is_skipping_tree = 1;669sub->skip_tree = obj->oid;670}671
672return result;673}
674
675static enum list_objects_filter_result filter_combine(676struct repository *r,677enum list_objects_filter_situation filter_situation,678struct object *obj,679const char *pathname,680const char *filename,681struct oidset *omits UNUSED,682void *filter_data)683{
684struct combine_filter_data *d = filter_data;685enum list_objects_filter_result combined_result =686LOFR_DO_SHOW | LOFR_MARK_SEEN | LOFR_SKIP_TREE;687size_t sub;688
689for (sub = 0; sub < d->nr; sub++) {690enum list_objects_filter_result sub_result = process_subfilter(691r, filter_situation, obj, pathname, filename,692&d->sub[sub]);693if (!(sub_result & LOFR_DO_SHOW))694combined_result &= ~LOFR_DO_SHOW;695if (!(sub_result & LOFR_MARK_SEEN))696combined_result &= ~LOFR_MARK_SEEN;697if (!d->sub[sub].is_skipping_tree)698combined_result &= ~LOFR_SKIP_TREE;699}700
701return combined_result;702}
703
704static void filter_combine__free(void *filter_data)705{
706struct combine_filter_data *d = filter_data;707size_t sub;708for (sub = 0; sub < d->nr; sub++) {709list_objects_filter__free(d->sub[sub].filter);710oidset_clear(&d->sub[sub].seen);711if (d->sub[sub].omits.set.size)712BUG("expected oidset to be cleared already");713}714free(d->sub);715free(d);716}
717
718static void filter_combine__finalize_omits(719struct oidset *omits,720void *filter_data)721{
722struct combine_filter_data *d = filter_data;723size_t sub;724
725for (sub = 0; sub < d->nr; sub++) {726oidset_insert_from_set(omits, &d->sub[sub].omits);727oidset_clear(&d->sub[sub].omits);728}729}
730
731static void filter_combine__init(732struct list_objects_filter_options *filter_options,733struct filter* filter)734{
735struct combine_filter_data *d = xcalloc(1, sizeof(*d));736size_t sub;737
738d->nr = filter_options->sub_nr;739CALLOC_ARRAY(d->sub, d->nr);740for (sub = 0; sub < d->nr; sub++)741d->sub[sub].filter = list_objects_filter__init(742filter->omits ? &d->sub[sub].omits : NULL,743&filter_options->sub[sub]);744
745filter->filter_data = d;746filter->filter_object_fn = filter_combine;747filter->free_fn = filter_combine__free;748filter->finalize_omits_fn = filter_combine__finalize_omits;749}
750
751typedef void (*filter_init_fn)(752struct list_objects_filter_options *filter_options,753struct filter *filter);754
755/*
756* Must match "enum list_objects_filter_choice".
757*/
758static filter_init_fn s_filters[] = {759NULL,760filter_blobs_none__init,761filter_blobs_limit__init,762filter_trees_depth__init,763filter_sparse_oid__init,764filter_object_type__init,765filter_combine__init,766};767
768struct filter *list_objects_filter__init(769struct oidset *omitted,770struct list_objects_filter_options *filter_options)771{
772struct filter *filter;773filter_init_fn init_fn;774
775assert((sizeof(s_filters) / sizeof(s_filters[0])) == LOFC__COUNT);776
777if (!filter_options)778return NULL;779
780if (filter_options->choice >= LOFC__COUNT)781BUG("invalid list-objects filter choice: %d",782filter_options->choice);783
784init_fn = s_filters[filter_options->choice];785if (!init_fn)786return NULL;787
788CALLOC_ARRAY(filter, 1);789filter->omits = omitted;790init_fn(filter_options, filter);791return filter;792}
793
794enum list_objects_filter_result list_objects_filter__filter_object(795struct repository *r,796enum list_objects_filter_situation filter_situation,797struct object *obj,798const char *pathname,799const char *filename,800struct filter *filter)801{
802if (filter && (obj->flags & NOT_USER_GIVEN))803return filter->filter_object_fn(r, filter_situation, obj,804pathname, filename,805filter->omits,806filter->filter_data);807/*808* No filter is active or user gave object explicitly. In this case,
809* always show the object (except when LOFS_END_TREE, since this tree
810* had already been shown when LOFS_BEGIN_TREE).
811*/
812if (filter_situation == LOFS_END_TREE)813return 0;814return LOFR_MARK_SEEN | LOFR_DO_SHOW;815}
816
817void list_objects_filter__free(struct filter *filter)818{
819if (!filter)820return;821if (filter->finalize_omits_fn && filter->omits)822filter->finalize_omits_fn(filter->omits, filter->filter_data);823filter->free_fn(filter->filter_data);824free(filter);825}
826