git
/
git-archimport.perl
1134 строки · 36.1 Кб
1#!/usr/bin/perl
2#
3# This tool is copyright (c) 2005, Martin Langhoff.
4# It is released under the Gnu Public License, version 2.
5#
6# The basic idea is to walk the output of tla abrowse,
7# fetch the changesets and apply them.
8#
9
10=head1 Invocation
11
12git archimport [ -h ] [ -v ] [ -o ] [ -a ] [ -f ] [ -T ]
13[ -D depth] [ -t tempdir ] <archive>/<branch> [ <archive>/<branch> ]
14
15Imports a project from one or more Arch repositories. It will follow branches
16and repositories within the namespaces defined by the <archive/branch>
17parameters supplied. If it cannot find the remote branch a merge comes from
18it will just import it as a regular commit. If it can find it, it will mark it
19as a merge whenever possible.
20
21See man (1) git-archimport for more details.
22
23=head1 TODO
24
25- create tag objects instead of ref tags
26- audit shell-escaping of filenames
27- hide our private tags somewhere smarter
28- find a way to make "cat *patches | patch" safe even when patchfiles are missing newlines
29- sort and apply patches by graphing ancestry relations instead of just
30relying in dates supplied in the changeset itself.
31tla ancestry-graph -m could be helpful here...
32
33=head1 Devel tricks
34
35Add print in front of the shell commands invoked via backticks.
36
37=head1 Devel Notes
38
39There are several places where Arch and git terminology are intermixed
40and potentially confused.
41
42The notion of a "branch" in git is approximately equivalent to
43a "archive/category--branch--version" in Arch. Also, it should be noted
44that the "--branch" portion of "archive/category--branch--version" is really
45optional in Arch although not many people (nor tools!) seem to know this.
46This means that "archive/category--version" is also a valid "branch"
47in git terms.
48
49We always refer to Arch names by their fully qualified variant (which
50means the "archive" name is prefixed.
51
52For people unfamiliar with Arch, an "archive" is the term for "repository",
53and can contain multiple, unrelated branches.
54
55=cut
56
57use 5.008001;
58use strict;
59use warnings;
60use Getopt::Std;
61use File::Temp qw(tempdir);
62use File::Path qw(mkpath rmtree);
63use File::Basename qw(basename dirname);
64use Data::Dumper qw/ Dumper /;
65use IPC::Open2;
66
67$SIG{'PIPE'}="IGNORE";
68$ENV{'TZ'}="UTC";
69
70my $git_dir = $ENV{"GIT_DIR"} || ".git";
71$ENV{"GIT_DIR"} = $git_dir;
72my $ptag_dir = "$git_dir/archimport/tags";
73
74our($opt_h,$opt_f,$opt_v,$opt_T,$opt_t,$opt_D,$opt_a,$opt_o);
75
76sub usage() {
77print STDERR <<END;
78usage: git archimport # fetch/update GIT from Arch
79[ -h ] [ -v ] [ -o ] [ -a ] [ -f ] [ -T ] [ -D depth ] [ -t tempdir ]
80repository/arch-branch [ repository/arch-branch] ...
81END
82exit(1);
83}
84
85getopts("fThvat:D:") or usage();
86usage if $opt_h;
87
88@ARGV >= 1 or usage();
89# $arch_branches:
90# values associated with keys:
91# =1 - Arch version / git 'branch' detected via abrowse on a limit
92# >1 - Arch version / git 'branch' of an auxiliary branch we've merged
93my %arch_branches = map { my $branch = $_; $branch =~ s/:[^:]*$//; $branch => 1 } @ARGV;
94
95# $branch_name_map:
96# maps arch branches to git branch names
97my %branch_name_map = map { m/^(.*):([^:]*)$/; $1 => $2 } grep { m/:/ } @ARGV;
98
99$ENV{'TMPDIR'} = $opt_t if $opt_t; # $ENV{TMPDIR} will affect tempdir() calls:
100my $tmp = tempdir('git-archimport-XXXXXX', TMPDIR => 1, CLEANUP => 1);
101$opt_v && print "+ Using $tmp as temporary directory\n";
102
103unless (-d $git_dir) { # initial import needs empty directory
104opendir DIR, '.' or die "Unable to open current directory: $!\n";
105while (my $entry = readdir DIR) {
106$entry =~ /^\.\.?$/ or
107die "Initial import needs an empty current working directory.\n"
108}
109closedir DIR
110}
111
112my $default_archive; # default Arch archive
113my %reachable = (); # Arch repositories we can access
114my %unreachable = (); # Arch repositories we can't access :<
115my @psets = (); # the collection
116my %psets = (); # the collection, by name
117my %stats = ( # Track which strategy we used to import:
118get_tag => 0, replay => 0, get_new => 0, get_delta => 0,
119simple_changeset => 0, import_or_tag => 0
120);
121
122my %rptags = (); # my reverse private tags
123# to map a SHA1 to a commitid
124my $TLA = $ENV{'ARCH_CLIENT'} || 'tla';
125
126sub do_abrowse {
127my $stage = shift;
128while (my ($limit, $level) = each %arch_branches) {
129next unless $level == $stage;
130
131open ABROWSE, "$TLA abrowse -fkD --merges $limit |"
132or die "Problems with tla abrowse: $!";
133
134my %ps = (); # the current one
135my $lastseen = '';
136
137while (<ABROWSE>) {
138chomp;
139
140# first record padded w 8 spaces
141if (s/^\s{8}\b//) {
142my ($id, $type) = split(m/\s+/, $_, 2);
143
144my %last_ps;
145# store the record we just captured
146if (%ps && !exists $psets{ $ps{id} }) {
147%last_ps = %ps; # break references
148push (@psets, \%last_ps);
149$psets{ $last_ps{id} } = \%last_ps;
150}
151
152my $branch = extract_versionname($id);
153%ps = ( id => $id, branch => $branch );
154if (%last_ps && ($last_ps{branch} eq $branch)) {
155$ps{parent_id} = $last_ps{id};
156}
157
158$arch_branches{$branch} = 1;
159$lastseen = 'id';
160
161# deal with types (should work with baz or tla):
162if ($type =~ m/\(.*changeset\)/) {
163$ps{type} = 's';
164} elsif ($type =~ /\(.*import\)/) {
165$ps{type} = 'i';
166} elsif ($type =~ m/\(tag.*?(\S+\@\S+).*?\)/) {
167$ps{type} = 't';
168# read which revision we've tagged when we parse the log
169$ps{tag} = $1;
170} else {
171warn "Unknown type $type";
172}
173
174$arch_branches{$branch} = 1;
175$lastseen = 'id';
176} elsif (s/^\s{10}//) {
177# 10 leading spaces or more
178# indicate commit metadata
179
180# date
181if ($lastseen eq 'id' && m/^(\d{4}-\d\d-\d\d \d\d:\d\d:\d\d)/){
182$ps{date} = $1;
183$lastseen = 'date';
184} elsif ($_ eq 'merges in:') {
185$ps{merges} = [];
186$lastseen = 'merges';
187} elsif ($lastseen eq 'merges' && s/^\s{2}//) {
188my $id = $_;
189push (@{$ps{merges}}, $id);
190
191# aggressive branch finding:
192if ($opt_D) {
193my $branch = extract_versionname($id);
194my $repo = extract_reponame($branch);
195
196if (archive_reachable($repo) &&
197!defined $arch_branches{$branch}) {
198$arch_branches{$branch} = $stage + 1;
199}
200}
201} else {
202warn "more metadata after merges!?: $_\n" unless /^\s*$/;
203}
204}
205}
206
207if (%ps && !exists $psets{ $ps{id} }) {
208my %temp = %ps; # break references
209if (@psets && $psets[$#psets]{branch} eq $ps{branch}) {
210$temp{parent_id} = $psets[$#psets]{id};
211}
212push (@psets, \%temp);
213$psets{ $temp{id} } = \%temp;
214}
215
216close ABROWSE or die "$TLA abrowse failed on $limit\n";
217}
218} # end foreach $root
219
220do_abrowse(1);
221my $depth = 2;
222$opt_D ||= 0;
223while ($depth <= $opt_D) {
224do_abrowse($depth);
225$depth++;
226}
227
228## Order patches by time
229# FIXME see if we can find a more optimal way to do this by graphing
230# the ancestry data and walking it, that way we won't have to rely on
231# client-supplied dates
232@psets = sort {$a->{date}.$b->{id} cmp $b->{date}.$b->{id}} @psets;
233
234#print Dumper \@psets;
235
236##
237## TODO cleanup irrelevant patches
238## and put an initial import
239## or a full tag
240my $import = 0;
241unless (-d $git_dir) { # initial import
242if ($psets[0]{type} eq 'i' || $psets[0]{type} eq 't') {
243print "Starting import from $psets[0]{id}\n";
244`git-init`;
245die $! if $?;
246$import = 1;
247} else {
248die "Need to start from an import or a tag -- cannot use $psets[0]{id}";
249}
250} else { # progressing an import
251# load the rptags
252opendir(DIR, $ptag_dir)
253|| die "can't opendir: $!";
254while (my $file = readdir(DIR)) {
255# skip non-interesting-files
256next unless -f "$ptag_dir/$file";
257
258# convert first '--' to '/' from old git-archimport to use
259# as an archivename/c--b--v private tag
260if ($file !~ m!,!) {
261my $oldfile = $file;
262$file =~ s!--!,!;
263print STDERR "converting old tag $oldfile to $file\n";
264rename("$ptag_dir/$oldfile", "$ptag_dir/$file") or die $!;
265}
266my $sha = ptag($file);
267chomp $sha;
268$rptags{$sha} = $file;
269}
270closedir DIR;
271}
272
273# process patchsets
274# extract the Arch repository name (Arch "archive" in Arch-speak)
275sub extract_reponame {
276my $fq_cvbr = shift; # archivename/[[[[category]branch]version]revision]
277return (split(/\//, $fq_cvbr))[0];
278}
279
280sub extract_versionname {
281my $name = shift;
282$name =~ s/--(?:patch|version(?:fix)?|base)-\d+$//;
283return $name;
284}
285
286# convert a fully-qualified revision or version to a unique dirname:
287# normalperson@yhbt.net-05/mpd--uclinux--1--patch-2
288# becomes: normalperson@yhbt.net-05,mpd--uclinux--1
289#
290# the git notion of a branch is closer to
291# archive/category--branch--version than archive/category--branch, so we
292# use this to convert to git branch names.
293# Also, keep archive names but replace '/' with ',' since it won't require
294# subdirectories, and is safer than swapping '--' which could confuse
295# reverse-mapping when dealing with bastard branches that
296# are just archive/category--version (no --branch)
297sub tree_dirname {
298my $revision = shift;
299my $name = extract_versionname($revision);
300$name =~ s#/#,#;
301return $name;
302}
303
304# old versions of git-archimport just use the <category--branch> part:
305sub old_style_branchname {
306my $id = shift;
307my $ret = safe_pipe_capture($TLA,'parse-package-name','-p',$id);
308chomp $ret;
309return $ret;
310}
311
312*git_default_branchname = $opt_o ? *old_style_branchname : *tree_dirname;
313
314# retrieve default archive, since $branch_name_map keys might not include it
315sub get_default_archive {
316if (!defined $default_archive) {
317$default_archive = safe_pipe_capture($TLA,'my-default-archive');
318chomp $default_archive;
319}
320return $default_archive;
321}
322
323sub git_branchname {
324my $revision = shift;
325my $name = extract_versionname($revision);
326
327if (exists $branch_name_map{$name}) {
328return $branch_name_map{$name};
329
330} elsif ($name =~ m#^([^/]*)/(.*)$#
331&& $1 eq get_default_archive()
332&& exists $branch_name_map{$2}) {
333# the names given in the command-line lacked the archive.
334return $branch_name_map{$2};
335
336} else {
337return git_default_branchname($revision);
338}
339}
340
341sub process_patchset_accurate {
342my $ps = shift;
343
344# switch to that branch if we're not already in that branch:
345if (-e "$git_dir/refs/heads/$ps->{branch}") {
346system('git-checkout','-f',$ps->{branch}) == 0 or die "$! $?\n";
347
348# remove any old stuff that got leftover:
349my $rm = safe_pipe_capture('git-ls-files','--others','-z');
350rmtree(split(/\0/,$rm)) if $rm;
351}
352
353# Apply the import/changeset/merge into the working tree
354my $dir = sync_to_ps($ps);
355# read the new log entry:
356my @commitlog = safe_pipe_capture($TLA,'cat-log','-d',$dir,$ps->{id});
357die "Error in cat-log: $!" if $?;
358chomp @commitlog;
359
360# grab variables we want from the log, new fields get added to $ps:
361# (author, date, email, summary, message body ...)
362parselog($ps, \@commitlog);
363
364if ($ps->{id} =~ /--base-0$/ && $ps->{id} ne $psets[0]{id}) {
365# this should work when importing continuations
366if ($ps->{tag} && (my $branchpoint = eval { ptag($ps->{tag}) })) {
367
368# find where we are supposed to branch from
369if (! -e "$git_dir/refs/heads/$ps->{branch}") {
370system('git-branch',$ps->{branch},$branchpoint) == 0 or die "$! $?\n";
371
372# We trust Arch with the fact that this is just a tag,
373# and it does not affect the state of the tree, so
374# we just tag and move on. If the user really wants us
375# to consolidate more branches into one, don't tag because
376# the tag name would be already taken.
377tag($ps->{id}, $branchpoint);
378ptag($ps->{id}, $branchpoint);
379print " * Tagged $ps->{id} at $branchpoint\n";
380}
381system('git-checkout','-f',$ps->{branch}) == 0 or die "$! $?\n";
382
383# remove any old stuff that got leftover:
384my $rm = safe_pipe_capture('git-ls-files','--others','-z');
385rmtree(split(/\0/,$rm)) if $rm;
386return 0;
387} else {
388warn "Tagging from unknown id unsupported\n" if $ps->{tag};
389}
390# allow multiple bases/imports here since Arch supports cherry-picks
391# from unrelated trees
392}
393
394# update the index with all the changes we got
395system('git-diff-files --name-only -z | '.
396'git-update-index --remove -z --stdin') == 0 or die "$! $?\n";
397system('git-ls-files --others -z | '.
398'git-update-index --add -z --stdin') == 0 or die "$! $?\n";
399return 1;
400}
401
402# the native changeset processing strategy. This is very fast, but
403# does not handle permissions or any renames involving directories
404sub process_patchset_fast {
405my $ps = shift;
406#
407# create the branch if needed
408#
409if ($ps->{type} eq 'i' && !$import) {
410die "Should not have more than one 'Initial import' per GIT import: $ps->{id}";
411}
412
413unless ($import) { # skip for import
414if ( -e "$git_dir/refs/heads/$ps->{branch}") {
415# we know about this branch
416system('git-checkout',$ps->{branch});
417} else {
418# new branch! we need to verify a few things
419die "Branch on a non-tag!" unless $ps->{type} eq 't';
420my $branchpoint = ptag($ps->{tag});
421die "Tagging from unknown id unsupported: $ps->{tag}"
422unless $branchpoint;
423
424# find where we are supposed to branch from
425if (! -e "$git_dir/refs/heads/$ps->{branch}") {
426system('git-branch',$ps->{branch},$branchpoint) == 0 or die "$! $?\n";
427
428# We trust Arch with the fact that this is just a tag,
429# and it does not affect the state of the tree, so
430# we just tag and move on. If the user really wants us
431# to consolidate more branches into one, don't tag because
432# the tag name would be already taken.
433tag($ps->{id}, $branchpoint);
434ptag($ps->{id}, $branchpoint);
435print " * Tagged $ps->{id} at $branchpoint\n";
436}
437system('git-checkout',$ps->{branch}) == 0 or die "$! $?\n";
438return 0;
439}
440die $! if $?;
441}
442
443#
444# Apply the import/changeset/merge into the working tree
445#
446if ($ps->{type} eq 'i' || $ps->{type} eq 't') {
447apply_import($ps) or die $!;
448$stats{import_or_tag}++;
449$import=0;
450} elsif ($ps->{type} eq 's') {
451apply_cset($ps);
452$stats{simple_changeset}++;
453}
454
455#
456# prepare update git's index, based on what arch knows
457# about the pset, resolve parents, etc
458#
459
460my @commitlog = safe_pipe_capture($TLA,'cat-archive-log',$ps->{id});
461die "Error in cat-archive-log: $!" if $?;
462
463parselog($ps,\@commitlog);
464
465# imports don't give us good info
466# on added files. Shame on them
467if ($ps->{type} eq 'i' || $ps->{type} eq 't') {
468system('git-ls-files --deleted -z | '.
469'git-update-index --remove -z --stdin') == 0 or die "$! $?\n";
470system('git-ls-files --others -z | '.
471'git-update-index --add -z --stdin') == 0 or die "$! $?\n";
472}
473
474# TODO: handle removed_directories and renamed_directories:
475
476if (my $del = $ps->{removed_files}) {
477unlink @$del;
478while (@$del) {
479my @slice = splice(@$del, 0, 100);
480system('git-update-index','--remove','--',@slice) == 0 or
481die "Error in git-update-index --remove: $! $?\n";
482}
483}
484
485if (my $ren = $ps->{renamed_files}) { # renamed
486if (@$ren % 2) {
487die "Odd number of entries in rename!?";
488}
489
490while (@$ren) {
491my $from = shift @$ren;
492my $to = shift @$ren;
493
494unless (-d dirname($to)) {
495mkpath(dirname($to)); # will die on err
496}
497# print "moving $from $to";
498rename($from, $to) or die "Error renaming '$from' '$to': $!\n";
499system('git-update-index','--remove','--',$from) == 0 or
500die "Error in git-update-index --remove: $! $?\n";
501system('git-update-index','--add','--',$to) == 0 or
502die "Error in git-update-index --add: $! $?\n";
503}
504}
505
506if (my $add = $ps->{new_files}) {
507while (@$add) {
508my @slice = splice(@$add, 0, 100);
509system('git-update-index','--add','--',@slice) == 0 or
510die "Error in git-update-index --add: $! $?\n";
511}
512}
513
514if (my $mod = $ps->{modified_files}) {
515while (@$mod) {
516my @slice = splice(@$mod, 0, 100);
517system('git-update-index','--',@slice) == 0 or
518die "Error in git-update-index: $! $?\n";
519}
520}
521return 1; # we successfully applied the changeset
522}
523
524if ($opt_f) {
525print "Will import patchsets using the fast strategy\n",
526"Renamed directories and permission changes will be missed\n";
527*process_patchset = *process_patchset_fast;
528} else {
529print "Using the default (accurate) import strategy.\n",
530"Things may be a bit slow\n";
531*process_patchset = *process_patchset_accurate;
532}
533
534foreach my $ps (@psets) {
535# process patchsets
536$ps->{branch} = git_branchname($ps->{id});
537
538#
539# ensure we have a clean state
540#
541if (my $dirty = `git-diff-files`) {
542die "Unclean tree when about to process $ps->{id} " .
543" - did we fail to commit cleanly before?\n$dirty";
544}
545die $! if $?;
546
547#
548# skip commits already in repo
549#
550if (ptag($ps->{id})) {
551$opt_v && print " * Skipping already imported: $ps->{id}\n";
552next;
553}
554
555print " * Starting to work on $ps->{id}\n";
556
557process_patchset($ps) or next;
558
559# warn "errors when running git-update-index! $!";
560my $tree = `git-write-tree`;
561die "cannot write tree $!" if $?;
562chomp $tree;
563
564#
565# Who's your daddy?
566#
567my @par;
568if ( -e "$git_dir/refs/heads/$ps->{branch}") {
569if (open HEAD, "<","$git_dir/refs/heads/$ps->{branch}") {
570my $p = <HEAD>;
571close HEAD;
572chomp $p;
573push @par, '-p', $p;
574} else {
575if ($ps->{type} eq 's') {
576warn "Could not find the right head for the branch $ps->{branch}";
577}
578}
579}
580
581if ($ps->{merges}) {
582push @par, find_parents($ps);
583}
584
585#
586# Commit, tag and clean state
587#
588$ENV{TZ} = 'GMT';
589$ENV{GIT_AUTHOR_NAME} = $ps->{author};
590$ENV{GIT_AUTHOR_EMAIL} = $ps->{email};
591$ENV{GIT_AUTHOR_DATE} = $ps->{date};
592$ENV{GIT_COMMITTER_NAME} = $ps->{author};
593$ENV{GIT_COMMITTER_EMAIL} = $ps->{email};
594$ENV{GIT_COMMITTER_DATE} = $ps->{date};
595
596my $pid = open2(*READER, *WRITER,'git-commit-tree',$tree,@par)
597or die $!;
598print WRITER $ps->{summary},"\n\n";
599
600# only print message if it's not empty, to avoid a spurious blank line;
601# also append an extra newline, so there's a blank line before the
602# following "git-archimport-id:" line.
603print WRITER $ps->{message},"\n\n" if ($ps->{message} ne "");
604
605# make it easy to backtrack and figure out which Arch revision this was:
606print WRITER 'git-archimport-id: ',$ps->{id},"\n";
607
608close WRITER;
609my $commitid = <READER>; # read
610chomp $commitid;
611close READER;
612waitpid $pid,0; # close;
613
614if (length $commitid != 40) {
615die "Something went wrong with the commit! $! $commitid";
616}
617#
618# Update the branch
619#
620open HEAD, ">","$git_dir/refs/heads/$ps->{branch}";
621print HEAD $commitid;
622close HEAD;
623system('git-update-ref', 'HEAD', "$ps->{branch}");
624
625# tag accordingly
626ptag($ps->{id}, $commitid); # private tag
627if ($opt_T || $ps->{type} eq 't' || $ps->{type} eq 'i') {
628tag($ps->{id}, $commitid);
629}
630print " * Committed $ps->{id}\n";
631print " + tree $tree\n";
632print " + commit $commitid\n";
633$opt_v && print " + commit date is $ps->{date} \n";
634$opt_v && print " + parents: ",join(' ',@par),"\n";
635}
636
637if ($opt_v) {
638foreach (sort keys %stats) {
639print" $_: $stats{$_}\n";
640}
641}
642exit 0;
643
644# used by the accurate strategy:
645sub sync_to_ps {
646my $ps = shift;
647my $tree_dir = $tmp.'/'.tree_dirname($ps->{id});
648
649$opt_v && print "sync_to_ps($ps->{id}) method: ";
650
651if (-d $tree_dir) {
652if ($ps->{type} eq 't') {
653$opt_v && print "get (tag)\n";
654# looks like a tag-only or (worse,) a mixed tags/changeset branch,
655# can't rely on replay to work correctly on these
656rmtree($tree_dir);
657safe_pipe_capture($TLA,'get','--no-pristine',$ps->{id},$tree_dir);
658$stats{get_tag}++;
659} else {
660my $tree_id = arch_tree_id($tree_dir);
661if ($ps->{parent_id} && ($ps->{parent_id} eq $tree_id)) {
662# the common case (hopefully)
663$opt_v && print "replay\n";
664safe_pipe_capture($TLA,'replay','-d',$tree_dir,$ps->{id});
665$stats{replay}++;
666} else {
667# getting one tree is usually faster than getting two trees
668# and applying the delta ...
669rmtree($tree_dir);
670$opt_v && print "apply-delta\n";
671safe_pipe_capture($TLA,'get','--no-pristine',
672$ps->{id},$tree_dir);
673$stats{get_delta}++;
674}
675}
676} else {
677# new branch work
678$opt_v && print "get (new tree)\n";
679safe_pipe_capture($TLA,'get','--no-pristine',$ps->{id},$tree_dir);
680$stats{get_new}++;
681}
682
683# added -I flag to rsync since we're going to fast! AIEEEEE!!!!
684system('rsync','-aI','--delete','--exclude',$git_dir,
685# '--exclude','.arch-inventory',
686'--exclude','.arch-ids','--exclude','{arch}',
687'--exclude','+*','--exclude',',*',
688"$tree_dir/",'./') == 0 or die "Cannot rsync $tree_dir: $! $?";
689return $tree_dir;
690}
691
692sub apply_import {
693my $ps = shift;
694my $bname = git_branchname($ps->{id});
695
696mkpath($tmp);
697
698safe_pipe_capture($TLA,'get','-s','--no-pristine',$ps->{id},"$tmp/import");
699die "Cannot get import: $!" if $?;
700system('rsync','-aI','--delete', '--exclude',$git_dir,
701'--exclude','.arch-ids','--exclude','{arch}',
702"$tmp/import/", './');
703die "Cannot rsync import:$!" if $?;
704
705rmtree("$tmp/import");
706die "Cannot remove tempdir: $!" if $?;
707
708
709return 1;
710}
711
712sub apply_cset {
713my $ps = shift;
714
715mkpath($tmp);
716
717# get the changeset
718safe_pipe_capture($TLA,'get-changeset',$ps->{id},"$tmp/changeset");
719die "Cannot get changeset: $!" if $?;
720
721# apply patches
722if (`find $tmp/changeset/patches -type f -name '*.patch'`) {
723# this can be sped up considerably by doing
724# (find | xargs cat) | patch
725# but that can get mucked up by patches
726# with missing trailing newlines or the standard
727# 'missing newline' flag in the patch - possibly
728# produced with an old/buggy diff.
729# slow and safe, we invoke patch once per patchfile
730`find $tmp/changeset/patches -type f -name '*.patch' -print0 | grep -zv '{arch}' | xargs -iFILE -0 --no-run-if-empty patch -p1 --forward -iFILE`;
731die "Problem applying patches! $!" if $?;
732}
733
734# apply changed binary files
735if (my @modified = `find $tmp/changeset/patches -type f -name '*.modified'`) {
736foreach my $mod (@modified) {
737chomp $mod;
738my $orig = $mod;
739$orig =~ s/\.modified$//; # lazy
740$orig =~ s!^\Q$tmp\E/changeset/patches/!!;
741#print "rsync -p '$mod' '$orig'";
742system('rsync','-p',$mod,"./$orig");
743die "Problem applying binary changes! $!" if $?;
744}
745}
746
747# bring in new files
748system('rsync','-aI','--exclude',$git_dir,
749'--exclude','.arch-ids',
750'--exclude', '{arch}',
751"$tmp/changeset/new-files-archive/",'./');
752
753# deleted files are hinted from the commitlog processing
754
755rmtree("$tmp/changeset");
756}
757
758
759# =for reference
760# notes: *-files/-directories keys cannot have spaces, they're always
761# pika-escaped. Everything after the first newline
762# A log entry looks like:
763# Revision: moodle-org--moodle--1.3.3--patch-15
764# Archive: arch-eduforge@catalyst.net.nz--2004
765# Creator: Penny Leach <penny@catalyst.net.nz>
766# Date: Wed May 25 14:15:34 NZST 2005
767# Standard-date: 2005-05-25 02:15:34 GMT
768# New-files: lang/de/.arch-ids/block_glossary_random.php.id
769# lang/de/.arch-ids/block_html.php.id
770# New-directories: lang/de/help/questionnaire
771# lang/de/help/questionnaire/.arch-ids
772# Renamed-files: .arch-ids/db_sears.sql.id db/.arch-ids/db_sears.sql.id
773# db_sears.sql db/db_sears.sql
774# Removed-files: lang/be/docs/.arch-ids/release.html.id
775# lang/be/docs/.arch-ids/releaseold.html.id
776# Modified-files: admin/cron.php admin/delete.php
777# admin/editor.html backup/lib.php backup/restore.php
778# New-patches: arch-eduforge@catalyst.net.nz--2004/moodle-org--moodle--1.3.3--patch-15
779# Summary: Updating to latest from MOODLE_14_STABLE (1.4.5+)
780# summary can be multiline with a leading space just like the above fields
781# Keywords:
782#
783# Updating yadda tadda tadda madda
784sub parselog {
785my ($ps, $log) = @_;
786my $key = undef;
787
788# headers we want that contain filenames:
789my %want_headers = (
790new_files => 1,
791modified_files => 1,
792renamed_files => 1,
793renamed_directories => 1,
794removed_files => 1,
795removed_directories => 1,
796);
797
798chomp (@$log);
799while ($_ = shift @$log) {
800if (/^Continuation-of:\s*(.*)/) {
801$ps->{tag} = $1;
802$key = undef;
803} elsif (/^Summary:\s*(.*)$/ ) {
804# summary can be multiline as long as it has a leading space.
805# we squeeze it onto a single line, though.
806$ps->{summary} = [ $1 ];
807$key = 'summary';
808} elsif (/^Creator: (.*)\s*<([^\>]+)>/) {
809$ps->{author} = $1;
810$ps->{email} = $2;
811$key = undef;
812# any *-files or *-directories can be read here:
813} elsif (/^([A-Z][a-z\-]+):\s*(.*)$/) {
814my $val = $2;
815$key = lc $1;
816$key =~ tr/-/_/; # too lazy to quote :P
817if ($want_headers{$key}) {
818push @{$ps->{$key}}, split(/\s+/, $val);
819} else {
820$key = undef;
821}
822} elsif (/^$/) {
823last; # remainder of @$log that didn't get shifted off is message
824} elsif ($key) {
825if (/^\s+(.*)$/) {
826if ($key eq 'summary') {
827push @{$ps->{$key}}, $1;
828} else { # files/directories:
829push @{$ps->{$key}}, split(/\s+/, $1);
830}
831} else {
832$key = undef;
833}
834}
835}
836
837# drop leading empty lines from the log message
838while (@$log && $log->[0] eq '') {
839shift @$log;
840}
841if (exists $ps->{summary} && @{$ps->{summary}}) {
842$ps->{summary} = join(' ', @{$ps->{summary}});
843}
844elsif (@$log == 0) {
845$ps->{summary} = 'empty commit message';
846} else {
847$ps->{summary} = $log->[0] . '...';
848}
849$ps->{message} = join("\n",@$log);
850
851# skip Arch control files, unescape pika-escaped files
852foreach my $k (keys %want_headers) {
853next unless (defined $ps->{$k});
854my @tmp = ();
855foreach my $t (@{$ps->{$k}}) {
856next unless length ($t);
857next if $t =~ m!\{arch\}/!;
858next if $t =~ m!\.arch-ids/!;
859# should we skip this?
860next if $t =~ m!\.arch-inventory$!;
861# tla cat-archive-log will give us filenames with spaces as file\(sp)name - why?
862# we can assume that any filename with \ indicates some pika escaping that we want to get rid of.
863if ($t =~ /\\/ ){
864$t = (safe_pipe_capture($TLA,'escape','--unescaped',$t))[0];
865}
866push @tmp, $t;
867}
868$ps->{$k} = \@tmp;
869}
870}
871
872# write/read a tag
873sub tag {
874my ($tag, $commit) = @_;
875
876if ($opt_o) {
877$tag =~ s|/|--|g;
878} else {
879my $patchname = $tag;
880$patchname =~ s/.*--//;
881$tag = git_branchname ($tag) . '--' . $patchname;
882}
883
884if ($commit) {
885open(C,">","$git_dir/refs/tags/$tag")
886or die "Cannot create tag $tag: $!\n";
887print C "$commit\n"
888or die "Cannot write tag $tag: $!\n";
889close(C)
890or die "Cannot write tag $tag: $!\n";
891print " * Created tag '$tag' on '$commit'\n" if $opt_v;
892} else { # read
893open(C,"<","$git_dir/refs/tags/$tag")
894or die "Cannot read tag $tag: $!\n";
895$commit = <C>;
896chomp $commit;
897die "Error reading tag $tag: $!\n" unless length $commit == 40;
898close(C)
899or die "Cannot read tag $tag: $!\n";
900return $commit;
901}
902}
903
904# write/read a private tag
905# reads fail softly if the tag isn't there
906sub ptag {
907my ($tag, $commit) = @_;
908
909# don't use subdirs for tags yet, it could screw up other porcelains
910$tag =~ s|/|,|g;
911
912my $tag_file = "$ptag_dir/$tag";
913my $tag_branch_dir = dirname($tag_file);
914mkpath($tag_branch_dir) unless (-d $tag_branch_dir);
915
916if ($commit) { # write
917open(C,">",$tag_file)
918or die "Cannot create tag $tag: $!\n";
919print C "$commit\n"
920or die "Cannot write tag $tag: $!\n";
921close(C)
922or die "Cannot write tag $tag: $!\n";
923$rptags{$commit} = $tag
924unless $tag =~ m/--base-0$/;
925} else { # read
926# if the tag isn't there, return 0
927unless ( -s $tag_file) {
928return 0;
929}
930open(C,"<",$tag_file)
931or die "Cannot read tag $tag: $!\n";
932$commit = <C>;
933chomp $commit;
934die "Error reading tag $tag: $!\n" unless length $commit == 40;
935close(C)
936or die "Cannot read tag $tag: $!\n";
937unless (defined $rptags{$commit}) {
938$rptags{$commit} = $tag;
939}
940return $commit;
941}
942}
943
944sub find_parents {
945#
946# Identify what branches are merging into me
947# and whether we are fully merged
948# git-merge-base <headsha> <headsha> should tell
949# me what the base of the merge should be
950#
951my $ps = shift;
952
953my %branches; # holds an arrayref per branch
954# the arrayref contains a list of
955# merged patches between the base
956# of the merge and the current head
957
958my @parents; # parents found for this commit
959
960# simple loop to split the merges
961# per branch
962foreach my $merge (@{$ps->{merges}}) {
963my $branch = git_branchname($merge);
964unless (defined $branches{$branch} ){
965$branches{$branch} = [];
966}
967push @{$branches{$branch}}, $merge;
968}
969
970#
971# foreach branch find a merge base and walk it to the
972# head where we are, collecting the merged patchsets that
973# Arch has recorded. Keep that in @have
974# Compare that with the commits on the other branch
975# between merge-base and the tip of the branch (@need)
976# and see if we have a series of consecutive patches
977# starting from the merge base. The tip of the series
978# of consecutive patches merged is our new parent for
979# that branch.
980#
981foreach my $branch (keys %branches) {
982
983# check that we actually know about the branch
984next unless -e "$git_dir/refs/heads/$branch";
985
986my $mergebase = safe_pipe_capture(qw(git-merge-base), $branch, $ps->{branch});
987if ($?) {
988# Don't die here, Arch supports one-way cherry-picking
989# between branches with no common base (or any relationship
990# at all beforehand)
991warn "Cannot find merge base for $branch and $ps->{branch}";
992next;
993}
994chomp $mergebase;
995
996# now walk up to the mergepoint collecting what patches we have
997my $branchtip = git_rev_parse($ps->{branch});
998my @ancestors = `git-rev-list --topo-order $branchtip ^$mergebase`;
999my %have; # collected merges this branch has
1000foreach my $merge (@{$ps->{merges}}) {
1001$have{$merge} = 1;
1002}
1003my %ancestorshave;
1004foreach my $par (@ancestors) {
1005$par = commitid2pset($par);
1006if (defined $par->{merges}) {
1007foreach my $merge (@{$par->{merges}}) {
1008$ancestorshave{$merge}=1;
1009}
1010}
1011}
1012# print "++++ Merges in $ps->{id} are....\n";
1013# my @have = sort keys %have; print Dumper(\@have);
1014
1015# merge what we have with what ancestors have
1016%have = (%have, %ancestorshave);
1017
1018# see what the remote branch has - these are the merges we
1019# will want to have in a consecutive series from the mergebase
1020my $otherbranchtip = git_rev_parse($branch);
1021my @needraw = `git-rev-list --topo-order $otherbranchtip ^$mergebase`;
1022my @need;
1023foreach my $needps (@needraw) { # get the psets
1024$needps = commitid2pset($needps);
1025# git-rev-list will also
1026# list commits merged in via earlier
1027# merges. we are only interested in commits
1028# from the branch we're looking at
1029if ($branch eq $needps->{branch}) {
1030push @need, $needps->{id};
1031}
1032}
1033
1034# print "++++ Merges from $branch we want are....\n";
1035# print Dumper(\@need);
1036
1037my $newparent;
1038while (my $needed_commit = pop @need) {
1039if ($have{$needed_commit}) {
1040$newparent = $needed_commit;
1041} else {
1042last; # break out of the while
1043}
1044}
1045if ($newparent) {
1046push @parents, $newparent;
1047}
1048
1049
1050} # end foreach branch
1051
1052# prune redundant parents
1053my %parents;
1054foreach my $p (@parents) {
1055$parents{$p} = 1;
1056}
1057foreach my $p (@parents) {
1058next unless exists $psets{$p}{merges};
1059next unless ref $psets{$p}{merges};
1060my @merges = @{$psets{$p}{merges}};
1061foreach my $merge (@merges) {
1062if ($parents{$merge}) {
1063delete $parents{$merge};
1064}
1065}
1066}
1067
1068@parents = ();
1069foreach (keys %parents) {
1070push @parents, '-p', ptag($_);
1071}
1072return @parents;
1073}
1074
1075sub git_rev_parse {
1076my $name = shift;
1077my $val = safe_pipe_capture(qw(git-rev-parse), $name);
1078die "Error: git-rev-parse $name" if $?;
1079chomp $val;
1080return $val;
1081}
1082
1083# resolve a SHA1 to a known patchset
1084sub commitid2pset {
1085my $commitid = shift;
1086chomp $commitid;
1087my $name = $rptags{$commitid}
1088|| die "Cannot find reverse tag mapping for $commitid";
1089$name =~ s|,|/|;
1090my $ps = $psets{$name}
1091|| (print Dumper(sort keys %psets)) && die "Cannot find patchset for $name";
1092return $ps;
1093}
1094
1095
1096# an alternative to `command` that allows input to be passed as an array
1097# to work around shell problems with weird characters in arguments
1098sub safe_pipe_capture {
1099my @output;
1100if (my $pid = open my $child, '-|') {
1101@output = (<$child>);
1102close $child or die join(' ',@_).": $! $?";
1103} else {
1104exec(@_) or die "$! $?"; # exec() can fail the executable can't be found
1105}
1106return wantarray ? @output : join('',@output);
1107}
1108
1109# `tla logs -rf -d <dir> | head -n1` or `baz tree-id <dir>`
1110sub arch_tree_id {
1111my $dir = shift;
1112chomp( my $ret = (safe_pipe_capture($TLA,'logs','-rf','-d',$dir))[0] );
1113return $ret;
1114}
1115
1116sub archive_reachable {
1117my $archive = shift;
1118return 1 if $reachable{$archive};
1119return 0 if $unreachable{$archive};
1120
1121if (system "$TLA whereis-archive $archive >/dev/null") {
1122if ($opt_a && (system($TLA,'register-archive',
1123"http://mirrors.sourcecontrol.net/$archive") == 0)) {
1124$reachable{$archive} = 1;
1125return 1;
1126}
1127print STDERR "Archive is unreachable: $archive\n";
1128$unreachable{$archive} = 1;
1129return 0;
1130} else {
1131$reachable{$archive} = 1;
1132return 1;
1133}
1134}
1135