git
/
git-cvsimport.perl
1183 строки · 31.3 Кб
1#!/usr/bin/perl
2
3# This tool is copyright (c) 2005, Matthias Urlichs.
4# It is released under the Gnu Public License, version 2.
5#
6# The basic idea is to aggregate CVS check-ins into related changes.
7# Fortunately, "cvsps" does that for us; all we have to do is to parse
8# its output.
9#
10# Checking out the files is done by a single long-running CVS connection
11# / server process.
12#
13# The head revision is on branch "origin" by default.
14# You can change that with the '-o' option.
15
16use 5.008001;
17use strict;
18use warnings;
19use Getopt::Long;
20use File::Spec;
21use File::Temp qw(tempfile tmpnam);
22use File::Path qw(mkpath);
23use File::Basename qw(basename dirname);
24use Time::Local;
25use IO::Socket;
26use IO::Pipe;
27use POSIX qw(strftime tzset dup2 ENOENT);
28use IPC::Open2;
29use Git qw(get_tz_offset);
30
31$SIG{'PIPE'}="IGNORE";
32set_timezone('UTC');
33
34our ($opt_h,$opt_o,$opt_v,$opt_k,$opt_u,$opt_d,$opt_p,$opt_C,$opt_z,$opt_i,$opt_P, $opt_s,$opt_m,@opt_M,$opt_A,$opt_S,$opt_L, $opt_a, $opt_r, $opt_R);
35my (%conv_author_name, %conv_author_email, %conv_author_tz);
36
37sub usage(;$) {
38my $msg = shift;
39print(STDERR "Error: $msg\n") if $msg;
40print STDERR <<END;
41usage: git cvsimport # fetch/update GIT from CVS
42[-o branch-for-HEAD] [-h] [-v] [-d CVSROOT] [-A author-conv-file]
43[-p opts-for-cvsps] [-P file] [-C GIT_repository] [-z fuzz] [-i] [-k]
44[-u] [-s subst] [-a] [-m] [-M regex] [-S regex] [-L commitlimit]
45[-r remote] [-R] [CVS_module]
46END
47exit(1);
48}
49
50sub read_author_info($) {
51my ($file) = @_;
52my $user;
53open my $f, '<', "$file" or die("Failed to open $file: $!\n");
54
55while (<$f>) {
56# Expected format is this:
57# exon=Andreas Ericsson <ae@op5.se>
58if (m/^(\S+?)\s*=\s*(.+?)\s*<(.+)>\s*$/) {
59$user = $1;
60$conv_author_name{$user} = $2;
61$conv_author_email{$user} = $3;
62}
63# or with an optional timezone:
64# spawn=Simon Pawn <spawn@frog-pond.org> America/Chicago
65elsif (m/^(\S+?)\s*=\s*(.+?)\s*<(.+)>\s*(\S+?)\s*$/) {
66$user = $1;
67$conv_author_name{$user} = $2;
68$conv_author_email{$user} = $3;
69$conv_author_tz{$user} = $4;
70}
71# However, we also read from CVSROOT/users format
72# to ease migration.
73elsif (/^(\w+):(['"]?)(.+?)\2\s*$/) {
74my $mapped;
75($user, $mapped) = ($1, $3);
76if ($mapped =~ /^\s*(.*?)\s*<(.*)>\s*$/) {
77$conv_author_name{$user} = $1;
78$conv_author_email{$user} = $2;
79}
80elsif ($mapped =~ /^<?(.*)>?$/) {
81$conv_author_name{$user} = $user;
82$conv_author_email{$user} = $1;
83}
84}
85# NEEDSWORK: Maybe warn on unrecognized lines?
86}
87close ($f);
88}
89
90sub write_author_info($) {
91my ($file) = @_;
92open my $f, '>', $file or
93die("Failed to open $file for writing: $!");
94
95foreach (keys %conv_author_name) {
96print $f "$_=$conv_author_name{$_} <$conv_author_email{$_}>";
97print $f " $conv_author_tz{$_}" if ($conv_author_tz{$_});
98print $f "\n";
99}
100close ($f);
101}
102
103# Versions of perl before 5.10.0 may not automatically check $TZ each
104# time localtime is run (most platforms will do so only the first time).
105# We can work around this by using tzset() to update the internal
106# variable whenever we change the environment.
107sub set_timezone {
108$ENV{TZ} = shift;
109tzset();
110}
111
112# convert getopts specs for use by git config
113my %longmap = (
114'A:' => 'authors-file',
115'M:' => 'merge-regex',
116'P:' => undef,
117'R' => 'track-revisions',
118'S:' => 'ignore-paths',
119);
120
121sub read_repo_config {
122# Split the string between characters, unless there is a ':'
123# So "abc:de" becomes ["a", "b", "c:", "d", "e"]
124my @opts = split(/ *(?!:)/, shift);
125foreach my $o (@opts) {
126my $key = $o;
127$key =~ s/://g;
128my $arg = 'git config';
129$arg .= ' --bool' if ($o !~ /:$/);
130my $ckey = $key;
131
132if (exists $longmap{$o}) {
133# An uppercase option like -R cannot be
134# expressed in the configuration, as the
135# variable names are downcased.
136$ckey = $longmap{$o};
137next if (! defined $ckey);
138$ckey =~ s/-//g;
139}
140chomp(my $tmp = `$arg --get cvsimport.$ckey`);
141if ($tmp && !($arg =~ /--bool/ && $tmp eq 'false')) {
142no strict 'refs';
143my $opt_name = "opt_" . $key;
144if (!$$opt_name) {
145$$opt_name = $tmp;
146}
147}
148}
149}
150
151my $opts = "haivmkuo:d:p:r:C:z:s:M:P:A:S:L:R";
152read_repo_config($opts);
153Getopt::Long::Configure( 'no_ignore_case', 'bundling' );
154
155# turn the Getopt::Std specification in a Getopt::Long one,
156# with support for multiple -M options
157GetOptions( map { s/:/=s/; /M/ ? "$_\@" : $_ } split( /(?!:)/, $opts ) )
158or usage();
159usage if $opt_h;
160
161if (@ARGV == 0) {
162chomp(my $module = `git config --get cvsimport.module`);
163push(@ARGV, $module) if $? == 0;
164}
165@ARGV <= 1 or usage("You can't specify more than one CVS module");
166
167if ($opt_d) {
168$ENV{"CVSROOT"} = $opt_d;
169} elsif (-f 'CVS/Root') {
170open my $f, '<', 'CVS/Root' or die 'Failed to open CVS/Root';
171$opt_d = <$f>;
172chomp $opt_d;
173close $f;
174$ENV{"CVSROOT"} = $opt_d;
175} elsif ($ENV{"CVSROOT"}) {
176$opt_d = $ENV{"CVSROOT"};
177} else {
178usage("CVSROOT needs to be set");
179}
180$opt_s ||= "-";
181$opt_a ||= 0;
182
183my $git_tree = $opt_C;
184$git_tree ||= ".";
185
186my $remote;
187if (defined $opt_r) {
188$remote = 'refs/remotes/' . $opt_r;
189$opt_o ||= "master";
190} else {
191$opt_o ||= "origin";
192$remote = 'refs/heads';
193}
194
195my $cvs_tree;
196if ($#ARGV == 0) {
197$cvs_tree = $ARGV[0];
198} elsif (-f 'CVS/Repository') {
199open my $f, '<', 'CVS/Repository' or
200die 'Failed to open CVS/Repository';
201$cvs_tree = <$f>;
202chomp $cvs_tree;
203close $f;
204} else {
205usage("CVS module has to be specified");
206}
207
208our @mergerx = ();
209if ($opt_m) {
210@mergerx = ( qr/\b(?:from|of|merge|merging|merged) ([-\w]+)/i );
211}
212if (@opt_M) {
213push (@mergerx, map { qr/$_/ } @opt_M);
214}
215
216# Remember UTC of our starting time
217# we'll want to avoid importing commits
218# that are too recent
219our $starttime = time();
220
221select(STDERR); $|=1; select(STDOUT);
222
223
224package CVSconn;
225# Basic CVS dialog.
226# We're only interested in connecting and downloading, so ...
227
228use File::Spec;
229use File::Temp qw(tempfile);
230use POSIX qw(strftime dup2);
231
232sub new {
233my ($what,$repo,$subdir) = @_;
234$what=ref($what) if ref($what);
235
236my $self = {};
237$self->{'buffer'} = "";
238bless($self,$what);
239
240$repo =~ s#/+$##;
241$self->{'fullrep'} = $repo;
242$self->conn();
243
244$self->{'subdir'} = $subdir;
245$self->{'lines'} = undef;
246
247return $self;
248}
249
250sub find_password_entry {
251my ($cvspass, @cvsroot) = @_;
252my ($file, $delim) = @$cvspass;
253my $pass;
254local ($_);
255
256if (open(my $fh, $file)) {
257# :pserver:cvs@mea.tmt.tele.fi:/cvsroot/zmailer Ah<Z
258CVSPASSFILE:
259while (<$fh>) {
260chomp;
261s/^\/\d+\s+//;
262my ($w, $p) = split($delim,$_,2);
263for my $cvsroot (@cvsroot) {
264if ($w eq $cvsroot) {
265$pass = $p;
266last CVSPASSFILE;
267}
268}
269}
270close($fh);
271}
272return $pass;
273}
274
275sub conn {
276my $self = shift;
277my $repo = $self->{'fullrep'};
278if ($repo =~ s/^:pserver(?:([^:]*)):(?:(.*?)(?::(.*?))?@)?([^:\/]*)(?::(\d*))?//) {
279my ($param,$user,$pass,$serv,$port) = ($1,$2,$3,$4,$5);
280
281my ($proxyhost,$proxyport);
282if ($param && ($param =~ m/proxy=([^;]+)/)) {
283$proxyhost = $1;
284# Default proxyport, if not specified, is 8080.
285$proxyport = 8080;
286if ($ENV{"CVS_PROXY_PORT"}) {
287$proxyport = $ENV{"CVS_PROXY_PORT"};
288}
289if ($param =~ m/proxyport=([^;]+)/) {
290$proxyport = $1;
291}
292}
293$repo ||= '/';
294
295# if username is not explicit in CVSROOT, then use current user, as cvs would
296$user=(getlogin() || $ENV{'LOGNAME'} || $ENV{'USER'} || "anonymous") unless $user;
297my $rr2 = "-";
298unless ($port) {
299$rr2 = ":pserver:$user\@$serv:$repo";
300$port=2401;
301}
302my $rr = ":pserver:$user\@$serv:$port$repo";
303
304if ($pass) {
305$pass = $self->_scramble($pass);
306} else {
307my @cvspass = ([$ENV{'HOME'}."/.cvspass", qr/\s/],
308[$ENV{'HOME'}."/.cvs/cvspass", qr/=/]);
309my @loc = ();
310foreach my $cvspass (@cvspass) {
311my $p = find_password_entry($cvspass, $rr, $rr2);
312if ($p) {
313push @loc, $cvspass->[0];
314$pass = $p;
315}
316}
317
318if (1 < @loc) {
319die("Multiple cvs password files have ".
320"entries for CVSROOT $opt_d: @loc");
321} elsif (!$pass) {
322$pass = "A";
323}
324}
325
326my ($s, $rep);
327if ($proxyhost) {
328
329# Use a HTTP Proxy. Only works for HTTP proxies that
330# don't require user authentication
331#
332# See: https://www.ietf.org/rfc/rfc2817.txt
333
334$s = IO::Socket::INET->new(PeerHost => $proxyhost, PeerPort => $proxyport);
335die "Socket to $proxyhost: $!\n" unless defined $s;
336$s->write("CONNECT $serv:$port HTTP/1.1\r\nHost: $serv:$port\r\n\r\n")
337or die "Write to $proxyhost: $!\n";
338$s->flush();
339
340$rep = <$s>;
341
342# The answer should look like 'HTTP/1.x 2yy ....'
343if (!($rep =~ m#^HTTP/1\.. 2[0-9][0-9]#)) {
344die "Proxy connect: $rep\n";
345}
346# Skip up to the empty line of the proxy server output
347# including the response headers.
348while ($rep = <$s>) {
349last if (!defined $rep ||
350$rep eq "\n" ||
351$rep eq "\r\n");
352}
353} else {
354$s = IO::Socket::INET->new(PeerHost => $serv, PeerPort => $port);
355die "Socket to $serv: $!\n" unless defined $s;
356}
357
358$s->write("BEGIN AUTH REQUEST\n$repo\n$user\n$pass\nEND AUTH REQUEST\n")
359or die "Write to $serv: $!\n";
360$s->flush();
361
362$rep = <$s>;
363
364if ($rep ne "I LOVE YOU\n") {
365$rep="<unknown>" unless $rep;
366die "AuthReply: $rep\n";
367}
368$self->{'socketo'} = $s;
369$self->{'socketi'} = $s;
370} else { # local or ext: Fork off our own cvs server.
371my $pr = IO::Pipe->new();
372my $pw = IO::Pipe->new();
373my $pid = fork();
374die "Fork: $!\n" unless defined $pid;
375my $cvs = 'cvs';
376$cvs = $ENV{CVS_SERVER} if exists $ENV{CVS_SERVER};
377my $rsh = 'rsh';
378$rsh = $ENV{CVS_RSH} if exists $ENV{CVS_RSH};
379
380my @cvs = ($cvs, 'server');
381my ($local, $user, $host);
382$local = $repo =~ s/:local://;
383if (!$local) {
384$repo =~ s/:ext://;
385$local = !($repo =~ s/^(?:([^\@:]+)\@)?([^:]+)://);
386($user, $host) = ($1, $2);
387}
388if (!$local) {
389if ($user) {
390unshift @cvs, $rsh, '-l', $user, $host;
391} else {
392unshift @cvs, $rsh, $host;
393}
394}
395
396unless ($pid) {
397$pr->writer();
398$pw->reader();
399dup2($pw->fileno(),0);
400dup2($pr->fileno(),1);
401$pr->close();
402$pw->close();
403exec(@cvs);
404}
405$pw->writer();
406$pr->reader();
407$self->{'socketo'} = $pw;
408$self->{'socketi'} = $pr;
409}
410$self->{'socketo'}->write("Root $repo\n");
411
412# Trial and error says that this probably is the minimum set
413$self->{'socketo'}->write("Valid-responses ok error Valid-requests Mode M Mbinary E Checked-in Created Updated Merged Removed\n");
414
415$self->{'socketo'}->write("valid-requests\n");
416$self->{'socketo'}->flush();
417
418my $rep=$self->readline();
419die "Failed to read from server" unless defined $rep;
420chomp($rep);
421if ($rep !~ s/^Valid-requests\s*//) {
422$rep="<unknown>" unless $rep;
423die "Expected Valid-requests from server, but got: $rep\n";
424}
425chomp(my $res=$self->readline());
426die "validReply: $res\n" if $res ne "ok";
427
428$self->{'socketo'}->write("UseUnchanged\n") if $rep =~ /\bUseUnchanged\b/;
429$self->{'repo'} = $repo;
430}
431
432sub readline {
433my ($self) = @_;
434return $self->{'socketi'}->getline();
435}
436
437sub _file {
438# Request a file with a given revision.
439# Trial and error says this is a good way to do it. :-/
440my ($self,$fn,$rev) = @_;
441$self->{'socketo'}->write("Argument -N\n") or return undef;
442$self->{'socketo'}->write("Argument -P\n") or return undef;
443# -kk: Linus' version doesn't use it - defaults to off
444if ($opt_k) {
445$self->{'socketo'}->write("Argument -kk\n") or return undef;
446}
447$self->{'socketo'}->write("Argument -r\n") or return undef;
448$self->{'socketo'}->write("Argument $rev\n") or return undef;
449$self->{'socketo'}->write("Argument --\n") or return undef;
450$self->{'socketo'}->write("Argument $self->{'subdir'}/$fn\n") or return undef;
451$self->{'socketo'}->write("Directory .\n") or return undef;
452$self->{'socketo'}->write("$self->{'repo'}\n") or return undef;
453# $self->{'socketo'}->write("Sticky T1.0\n") or return undef;
454$self->{'socketo'}->write("co\n") or return undef;
455$self->{'socketo'}->flush() or return undef;
456$self->{'lines'} = 0;
457return 1;
458}
459sub _line {
460# Read a line from the server.
461# ... except that 'line' may be an entire file. ;-)
462my ($self, $fh) = @_;
463die "Not in lines" unless defined $self->{'lines'};
464
465my $line;
466my $res=0;
467while (defined($line = $self->readline())) {
468# M U gnupg-cvs-rep/AUTHORS
469# Updated gnupg-cvs-rep/
470# /daten/src/rsync/gnupg-cvs-rep/AUTHORS
471# /AUTHORS/1.1///T1.1
472# u=rw,g=rw,o=rw
473# 0
474# ok
475
476if ($line =~ s/^(?:Created|Updated) //) {
477$line = $self->readline(); # path
478$line = $self->readline(); # Entries line
479my $mode = $self->readline(); chomp $mode;
480$self->{'mode'} = $mode;
481defined (my $cnt = $self->readline())
482or die "EOF from server after 'Changed'\n";
483chomp $cnt;
484die "Duh: Filesize $cnt" if $cnt !~ /^\d+$/;
485$line="";
486$res = $self->_fetchfile($fh, $cnt);
487} elsif ($line =~ s/^ //) {
488print $fh $line;
489$res += length($line);
490} elsif ($line =~ /^M\b/) {
491# output, do nothing
492} elsif ($line =~ /^Mbinary\b/) {
493my $cnt;
494die "EOF from server after 'Mbinary'" unless defined ($cnt = $self->readline());
495chomp $cnt;
496die "Duh: Mbinary $cnt" if $cnt !~ /^\d+$/ or $cnt<1;
497$line="";
498$res += $self->_fetchfile($fh, $cnt);
499} else {
500chomp $line;
501if ($line eq "ok") {
502# print STDERR "S: ok (".length($res).")\n";
503return $res;
504} elsif ($line =~ s/^E //) {
505# print STDERR "S: $line\n";
506} elsif ($line =~ /^(Remove-entry|Removed) /i) {
507$line = $self->readline(); # filename
508$line = $self->readline(); # OK
509chomp $line;
510die "Unknown: $line" if $line ne "ok";
511return -1;
512} else {
513die "Unknown: $line\n";
514}
515}
516}
517return undef;
518}
519sub file {
520my ($self,$fn,$rev) = @_;
521my $res;
522
523my ($fh, $name) = tempfile('gitcvs.XXXXXX',
524DIR => File::Spec->tmpdir(), UNLINK => 1);
525
526$self->_file($fn,$rev) and $res = $self->_line($fh);
527
528if (!defined $res) {
529print STDERR "Server has gone away while fetching $fn $rev, retrying...\n";
530truncate $fh, 0;
531$self->conn();
532$self->_file($fn,$rev) or die "No file command send";
533$res = $self->_line($fh);
534die "Retry failed" unless defined $res;
535}
536close ($fh);
537
538return ($name, $res);
539}
540sub _fetchfile {
541my ($self, $fh, $cnt) = @_;
542my $res = 0;
543my $bufsize = 1024 * 1024;
544while ($cnt) {
545if ($bufsize > $cnt) {
546$bufsize = $cnt;
547}
548my $buf;
549my $num = $self->{'socketi'}->read($buf,$bufsize);
550die "Server: Filesize $cnt: $num: $!\n" if not defined $num or $num<=0;
551print $fh $buf;
552$res += $num;
553$cnt -= $num;
554}
555return $res;
556}
557
558sub _scramble {
559my ($self, $pass) = @_;
560my $scrambled = "A";
561
562return $scrambled unless $pass;
563
564my $pass_len = length($pass);
565my @pass_arr = split("", $pass);
566my $i;
567
568# from cvs/src/scramble.c
569my @shifts = (
5700, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
57116, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
572114,120, 53, 79, 96,109, 72,108, 70, 64, 76, 67,116, 74, 68, 87,
573111, 52, 75,119, 49, 34, 82, 81, 95, 65,112, 86,118,110,122,105,
57441, 57, 83, 43, 46,102, 40, 89, 38,103, 45, 50, 42,123, 91, 35,
575125, 55, 54, 66,124,126, 59, 47, 92, 71,115, 78, 88,107,106, 56,
57636,121,117,104,101,100, 69, 73, 99, 63, 94, 93, 39, 37, 61, 48,
57758,113, 32, 90, 44, 98, 60, 51, 33, 97, 62, 77, 84, 80, 85,223,
578225,216,187,166,229,189,222,188,141,249,148,200,184,136,248,190,
579199,170,181,204,138,232,218,183,255,234,220,247,213,203,226,193,
580174,172,228,252,217,201,131,230,197,211,145,238,161,179,160,212,
581207,221,254,173,202,146,224,151,140,196,205,130,135,133,143,246,
582192,159,244,239,185,168,215,144,139,165,180,157,147,186,214,176,
583227,231,219,169,175,156,206,198,129,164,150,210,154,177,134,127,
584182,128,158,208,162,132,167,209,149,241,153,251,237,236,171,195,
585243,233,253,240,194,250,191,155,142,137,245,235,163,242,178,152
586);
587
588for ($i = 0; $i < $pass_len; $i++) {
589$scrambled .= pack("C", $shifts[ord($pass_arr[$i])]);
590}
591
592return $scrambled;
593}
594
595package main;
596
597my $cvs = CVSconn->new($opt_d, $cvs_tree);
598
599
600sub pdate($) {
601my ($d) = @_;
602m#(\d{2,4})/(\d\d)/(\d\d)\s(\d\d):(\d\d)(?::(\d\d))?#
603or die "Unparsable date: $d\n";
604my $y=$1;
605$y+=100 if $y<70;
606$y+=1900 if $y<1000;
607return timegm($6||0,$5,$4,$3,$2-1,$y);
608}
609
610sub pmode($) {
611my ($mode) = @_;
612my $m = 0;
613my $mm = 0;
614my $um = 0;
615for my $x(split(//,$mode)) {
616if ($x eq ",") {
617$m |= $mm&$um;
618$mm = 0;
619$um = 0;
620} elsif ($x eq "u") { $um |= 0700;
621} elsif ($x eq "g") { $um |= 0070;
622} elsif ($x eq "o") { $um |= 0007;
623} elsif ($x eq "r") { $mm |= 0444;
624} elsif ($x eq "w") { $mm |= 0222;
625} elsif ($x eq "x") { $mm |= 0111;
626} elsif ($x eq "=") { # do nothing
627} else { die "Unknown mode: $mode\n";
628}
629}
630$m |= $mm&$um;
631return $m;
632}
633
634sub getwd() {
635my $pwd = `pwd`;
636chomp $pwd;
637return $pwd;
638}
639
640sub is_oid {
641my $s = shift;
642return $s =~ /^[a-f0-9]{40}(?:[a-f0-9]{24})?$/;
643}
644
645sub get_headref ($) {
646my $name = shift;
647$name =~ s/'/'\\''/g;
648my $r = `git rev-parse --verify '$name' 2>/dev/null`;
649return undef unless $? == 0;
650chomp $r;
651return $r;
652}
653
654my $user_filename_prepend = '';
655sub munge_user_filename {
656my $name = shift;
657return File::Spec->file_name_is_absolute($name) ?
658$name :
659$user_filename_prepend . $name;
660}
661
662-d $git_tree
663or mkdir($git_tree,0777)
664or die "Could not create $git_tree: $!";
665if ($git_tree ne '.') {
666$user_filename_prepend = getwd() . '/';
667chdir($git_tree);
668}
669
670my $last_branch = "";
671my $orig_branch = "";
672my %branch_date;
673my $tip_at_start = undef;
674
675my $git_dir = $ENV{"GIT_DIR"} || ".git";
676$git_dir = getwd()."/".$git_dir unless $git_dir =~ m#^/#;
677$ENV{"GIT_DIR"} = $git_dir;
678my $orig_git_index;
679$orig_git_index = $ENV{GIT_INDEX_FILE} if exists $ENV{GIT_INDEX_FILE};
680
681my %index; # holds filenames of one index per branch
682
683unless (-d $git_dir) {
684system(qw(git init));
685die "Cannot init the GIT db at $git_tree: $?\n" if $?;
686system(qw(git read-tree --empty));
687die "Cannot init an empty tree: $?\n" if $?;
688
689$last_branch = $opt_o;
690$orig_branch = "";
691} else {
692open(F, "-|", qw(git symbolic-ref HEAD)) or
693die "Cannot run git symbolic-ref: $!\n";
694chomp ($last_branch = <F>);
695$last_branch = basename($last_branch);
696close(F);
697unless ($last_branch) {
698warn "Cannot read the last branch name: $! -- assuming 'master'\n";
699$last_branch = "master";
700}
701$orig_branch = $last_branch;
702$tip_at_start = `git rev-parse --verify HEAD`;
703
704# Get the last import timestamps
705my $fmt = '($ref, $author) = (%(refname), %(author));';
706my @cmd = ('git', 'for-each-ref', '--perl', "--format=$fmt", $remote);
707open(H, "-|", @cmd) or die "Cannot run git for-each-ref: $!\n";
708while (defined(my $entry = <H>)) {
709my ($ref, $author);
710eval($entry) || die "cannot eval refs list: $@";
711my ($head) = ($ref =~ m|^$remote/(.*)|);
712$author =~ /^.*\s(\d+)\s[-+]\d{4}$/;
713$branch_date{$head} = $1;
714}
715close(H);
716if (!exists $branch_date{$opt_o}) {
717die "Branch '$opt_o' does not exist.\n".
718"Either use the correct '-o branch' option,\n".
719"or import to a new repository.\n";
720}
721}
722
723-d $git_dir
724or die "Could not create git subdir ($git_dir).\n";
725
726# now we read (and possibly save) author-info as well
727-f "$git_dir/cvs-authors" and
728read_author_info("$git_dir/cvs-authors");
729if ($opt_A) {
730read_author_info(munge_user_filename($opt_A));
731write_author_info("$git_dir/cvs-authors");
732}
733
734# open .git/cvs-revisions, if requested
735open my $revision_map, '>>', "$git_dir/cvs-revisions"
736or die "Can't open $git_dir/cvs-revisions for appending: $!\n"
737if defined $opt_R;
738
739
740#
741# run cvsps into a file unless we are getting
742# it passed as a file via $opt_P
743#
744my $cvspsfile;
745unless ($opt_P) {
746print "Running cvsps...\n" if $opt_v;
747my $pid = open(CVSPS,"-|");
748my $cvspsfh;
749die "Cannot fork: $!\n" unless defined $pid;
750unless ($pid) {
751my @opt;
752@opt = split(/,/,$opt_p) if defined $opt_p;
753unshift @opt, '-z', $opt_z if defined $opt_z;
754unshift @opt, '-q' unless defined $opt_v;
755unless (defined($opt_p) && $opt_p =~ m/--no-cvs-direct/) {
756push @opt, '--cvs-direct';
757}
758exec("cvsps","--norc",@opt,"-u","-A",'--root',$opt_d,$cvs_tree);
759die "Could not start cvsps: $!\n";
760}
761($cvspsfh, $cvspsfile) = tempfile('gitXXXXXX', SUFFIX => '.cvsps',
762DIR => File::Spec->tmpdir());
763while (<CVSPS>) {
764print $cvspsfh $_;
765}
766close CVSPS;
767$? == 0 or die "git cvsimport: fatal: cvsps reported error\n";
768close $cvspsfh;
769} else {
770$cvspsfile = munge_user_filename($opt_P);
771}
772
773open(CVS, "<$cvspsfile") or die $!;
774
775## cvsps output:
776#---------------------
777#PatchSet 314
778#Date: 1999/09/18 13:03:59
779#Author: wkoch
780#Branch: STABLE-BRANCH-1-0
781#Ancestor branch: HEAD
782#Tag: (none)
783#Log:
784# See ChangeLog: Sat Sep 18 13:03:28 CEST 1999 Werner Koch
785#Members:
786# README:1.57->1.57.2.1
787# VERSION:1.96->1.96.2.1
788#
789#---------------------
790
791my $state = 0;
792
793sub update_index (\@\@) {
794my $old = shift;
795my $new = shift;
796open(my $fh, '|-', qw(git update-index -z --index-info))
797or die "unable to open git update-index: $!";
798print $fh
799(map { "0 0000000000000000000000000000000000000000\t$_\0" }
800@$old),
801(map { '100' . sprintf('%o', $_->[0]) . " $_->[1]\t$_->[2]\0" }
802@$new)
803or die "unable to write to git update-index: $!";
804close $fh
805or die "unable to write to git update-index: $!";
806$? and die "git update-index reported error: $?";
807}
808
809sub write_tree () {
810open(my $fh, '-|', qw(git write-tree))
811or die "unable to open git write-tree: $!";
812chomp(my $tree = <$fh>);
813is_oid($tree)
814or die "Cannot get tree id ($tree): $!";
815close($fh)
816or die "Error running git write-tree: $?\n";
817print "Tree ID $tree\n" if $opt_v;
818return $tree;
819}
820
821my ($patchset,$date,$author_name,$author_email,$author_tz,$branch,$ancestor,$tag,$logmsg);
822my (@old,@new,@skipped,%ignorebranch,@commit_revisions);
823
824# commits that cvsps cannot place anywhere...
825$ignorebranch{'#CVSPS_NO_BRANCH'} = 1;
826
827sub commit {
828if ($branch eq $opt_o && !$index{branch} &&
829!get_headref("$remote/$branch")) {
830# looks like an initial commit
831# use the index primed by git init
832$ENV{GIT_INDEX_FILE} = "$git_dir/index";
833$index{$branch} = "$git_dir/index";
834} else {
835# use an index per branch to speed up
836# imports of projects with many branches
837unless ($index{$branch}) {
838$index{$branch} = tmpnam();
839$ENV{GIT_INDEX_FILE} = $index{$branch};
840if ($ancestor) {
841system("git", "read-tree", "$remote/$ancestor");
842} else {
843system("git", "read-tree", "$remote/$branch");
844}
845die "read-tree failed: $?\n" if $?;
846}
847}
848$ENV{GIT_INDEX_FILE} = $index{$branch};
849
850update_index(@old, @new);
851@old = @new = ();
852my $tree = write_tree();
853my $parent = get_headref("$remote/$last_branch");
854print "Parent ID " . ($parent ? $parent : "(empty)") . "\n" if $opt_v;
855
856my @commit_args;
857push @commit_args, ("-p", $parent) if $parent;
858
859# loose detection of merges
860# based on the commit msg
861foreach my $rx (@mergerx) {
862next unless $logmsg =~ $rx && $1;
863my $mparent = $1 eq 'HEAD' ? $opt_o : $1;
864if (my $sha1 = get_headref("$remote/$mparent")) {
865push @commit_args, '-p', "$remote/$mparent";
866print "Merge parent branch: $mparent\n" if $opt_v;
867}
868}
869
870set_timezone($author_tz);
871# $date is in the seconds since epoch format
872my $tz_offset = get_tz_offset($date);
873my $commit_date = "$date $tz_offset";
874set_timezone('UTC');
875$ENV{GIT_AUTHOR_NAME} = $author_name;
876$ENV{GIT_AUTHOR_EMAIL} = $author_email;
877$ENV{GIT_AUTHOR_DATE} = $commit_date;
878$ENV{GIT_COMMITTER_NAME} = $author_name;
879$ENV{GIT_COMMITTER_EMAIL} = $author_email;
880$ENV{GIT_COMMITTER_DATE} = $commit_date;
881my $pid = open2(my $commit_read, my $commit_write,
882'git', 'commit-tree', $tree, @commit_args);
883
884# compatibility with git2cvs
885substr($logmsg,32767) = "" if length($logmsg) > 32767;
886$logmsg =~ s/[\s\n]+\z//;
887
888if (@skipped) {
889$logmsg .= "\n\n\nSKIPPED:\n\t";
890$logmsg .= join("\n\t", @skipped) . "\n";
891@skipped = ();
892}
893
894print($commit_write "$logmsg\n") && close($commit_write)
895or die "Error writing to git commit-tree: $!\n";
896
897print "Committed patch $patchset ($branch $commit_date)\n" if $opt_v;
898chomp(my $cid = <$commit_read>);
899is_oid($cid) or die "Cannot get commit id ($cid): $!\n";
900print "Commit ID $cid\n" if $opt_v;
901close($commit_read);
902
903waitpid($pid,0);
904die "Error running git commit-tree: $?\n" if $?;
905
906system('git' , 'update-ref', "$remote/$branch", $cid) == 0
907or die "Cannot write branch $branch for update: $!\n";
908
909if ($revision_map) {
910print $revision_map "@$_ $cid\n" for @commit_revisions;
911}
912@commit_revisions = ();
913
914if ($tag) {
915my ($xtag) = $tag;
916$xtag =~ s/\s+\*\*.*$//; # Remove stuff like ** INVALID ** and ** FUNKY **
917$xtag =~ tr/_/\./ if ( $opt_u );
918$xtag =~ s/[\/]/$opt_s/g;
919
920# See refs.c for these rules.
921# Tag cannot contain bad chars. (See bad_ref_char in refs.c.)
922$xtag =~ s/[ ~\^:\\\*\?\[]//g;
923# Other bad strings for tags:
924# (See check_refname_component in refs.c.)
9251 while $xtag =~ s/
926(?: \.\. # Tag cannot contain '..'.
927| \@\{ # Tag cannot contain '@{'.
928| ^ - # Tag cannot begin with '-'.
929| \.lock $ # Tag cannot end with '.lock'.
930| ^ \. # Tag cannot begin...
931| \. $ # ...or end with '.'
932)//xg;
933# Tag cannot be empty.
934if ($xtag eq '') {
935warn("warning: ignoring tag '$tag'",
936" with invalid tagname\n");
937return;
938}
939
940if (system('git' , 'tag', '-f', $xtag, $cid) != 0) {
941# We did our best to sanitize the tag, but still failed
942# for whatever reason. Bail out, and give the user
943# enough information to understand if/how we should
944# improve the translation in the future.
945if ($tag ne $xtag) {
946print "Translated '$tag' tag to '$xtag'\n";
947}
948die "Cannot create tag $xtag: $!\n";
949}
950
951print "Created tag '$xtag' on '$branch'\n" if $opt_v;
952}
953};
954
955my $commitcount = 1;
956while (<CVS>) {
957chomp;
958if ($state == 0 and /^-+$/) {
959$state = 1;
960} elsif ($state == 0) {
961$state = 1;
962redo;
963} elsif (($state==0 or $state==1) and s/^PatchSet\s+//) {
964$patchset = 0+$_;
965$state=2;
966} elsif ($state == 2 and s/^Date:\s+//) {
967$date = pdate($_);
968unless ($date) {
969print STDERR "Could not parse date: $_\n";
970$state=0;
971next;
972}
973$state=3;
974} elsif ($state == 3 and s/^Author:\s+//) {
975$author_tz = "UTC";
976s/\s+$//;
977if (/^(.*?)\s+<(.*)>/) {
978($author_name, $author_email) = ($1, $2);
979} elsif ($conv_author_name{$_}) {
980$author_name = $conv_author_name{$_};
981$author_email = $conv_author_email{$_};
982$author_tz = $conv_author_tz{$_} if ($conv_author_tz{$_});
983} else {
984$author_name = $author_email = $_;
985}
986$state = 4;
987} elsif ($state == 4 and s/^Branch:\s+//) {
988s/\s+$//;
989tr/_/\./ if ( $opt_u );
990s/[\/]/$opt_s/g;
991$branch = $_;
992$state = 5;
993} elsif ($state == 5 and s/^Ancestor branch:\s+//) {
994s/\s+$//;
995$ancestor = $_;
996$ancestor = $opt_o if $ancestor eq "HEAD";
997$state = 6;
998} elsif ($state == 5) {
999$ancestor = undef;
1000$state = 6;
1001redo;
1002} elsif ($state == 6 and s/^Tag:\s+//) {
1003s/\s+$//;
1004if ($_ eq "(none)") {
1005$tag = undef;
1006} else {
1007$tag = $_;
1008}
1009$state = 7;
1010} elsif ($state == 7 and /^Log:/) {
1011$logmsg = "";
1012$state = 8;
1013} elsif ($state == 8 and /^Members:/) {
1014$branch = $opt_o if $branch eq "HEAD";
1015if (defined $branch_date{$branch} and $branch_date{$branch} >= $date) {
1016# skip
1017print "skip patchset $patchset: $date before $branch_date{$branch}\n" if $opt_v;
1018$state = 11;
1019next;
1020}
1021if (!$opt_a && $starttime - 300 - (defined $opt_z ? $opt_z : 300) <= $date) {
1022# skip if the commit is too recent
1023# given that the cvsps default fuzz is 300s, we give ourselves another
1024# 300s just in case -- this also prevents skipping commits
1025# due to server clock drift
1026print "skip patchset $patchset: $date too recent\n" if $opt_v;
1027$state = 11;
1028next;
1029}
1030if (exists $ignorebranch{$branch}) {
1031print STDERR "Skipping $branch\n";
1032$state = 11;
1033next;
1034}
1035if ($ancestor) {
1036if ($ancestor eq $branch) {
1037print STDERR "Branch $branch erroneously stems from itself -- changed ancestor to $opt_o\n";
1038$ancestor = $opt_o;
1039}
1040if (defined get_headref("$remote/$branch")) {
1041print STDERR "Branch $branch already exists!\n";
1042$state=11;
1043next;
1044}
1045my $id = get_headref("$remote/$ancestor");
1046if (!$id) {
1047print STDERR "Branch $ancestor does not exist!\n";
1048$ignorebranch{$branch} = 1;
1049$state=11;
1050next;
1051}
1052
1053system(qw(git update-ref -m cvsimport),
1054"$remote/$branch", $id);
1055if($? != 0) {
1056print STDERR "Could not create branch $branch\n";
1057$ignorebranch{$branch} = 1;
1058$state=11;
1059next;
1060}
1061}
1062$last_branch = $branch if $branch ne $last_branch;
1063$state = 9;
1064} elsif ($state == 8) {
1065$logmsg .= "$_\n";
1066} elsif ($state == 9 and /^\s+(.+?):(INITIAL|\d+(?:\.\d+)+)->(\d+(?:\.\d+)+)\s*$/) {
1067# VERSION:1.96->1.96.2.1
1068my $init = ($2 eq "INITIAL");
1069my $fn = $1;
1070my $rev = $3;
1071$fn =~ s#^/+##;
1072if ($opt_S && $fn =~ m/$opt_S/) {
1073print "SKIPPING $fn v $rev\n";
1074push(@skipped, $fn);
1075next;
1076}
1077push @commit_revisions, [$fn, $rev];
1078print "Fetching $fn v $rev\n" if $opt_v;
1079my ($tmpname, $size) = $cvs->file($fn,$rev);
1080if ($size == -1) {
1081push(@old,$fn);
1082print "Drop $fn\n" if $opt_v;
1083} else {
1084print "".($init ? "New" : "Update")." $fn: $size bytes\n" if $opt_v;
1085my $pid = open(my $F, '-|');
1086die $! unless defined $pid;
1087if (!$pid) {
1088exec("git", "hash-object", "-w", $tmpname)
1089or die "Cannot create object: $!\n";
1090}
1091my $sha = <$F>;
1092chomp $sha;
1093close $F;
1094my $mode = pmode($cvs->{'mode'});
1095push(@new,[$mode, $sha, $fn]); # may be resurrected!
1096}
1097unlink($tmpname);
1098} elsif ($state == 9 and /^\s+(.+?):\d+(?:\.\d+)+->(\d+(?:\.\d+)+)\(DEAD\)\s*$/) {
1099my $fn = $1;
1100my $rev = $2;
1101$fn =~ s#^/+##;
1102push @commit_revisions, [$fn, $rev];
1103push(@old,$fn);
1104print "Delete $fn\n" if $opt_v;
1105} elsif ($state == 9 and /^\s*$/) {
1106$state = 10;
1107} elsif (($state == 9 or $state == 10) and /^-+$/) {
1108$commitcount++;
1109if ($opt_L && $commitcount > $opt_L) {
1110last;
1111}
1112commit();
1113if (($commitcount & 1023) == 0) {
1114system(qw(git repack -a -d));
1115}
1116$state = 1;
1117} elsif ($state == 11 and /^-+$/) {
1118$state = 1;
1119} elsif (/^-+$/) { # end of unknown-line processing
1120$state = 1;
1121} elsif ($state != 11) { # ignore stuff when skipping
1122print STDERR "* UNKNOWN LINE * $_\n";
1123}
1124}
1125commit() if $branch and $state != 11;
1126
1127unless ($opt_P) {
1128unlink($cvspsfile);
1129}
1130
1131# The heuristic of repacking every 1024 commits can leave a
1132# lot of unpacked data. If there is more than 1MB worth of
1133# not-packed objects, repack once more.
1134my $line = `git count-objects`;
1135if ($line =~ /^(\d+) objects, (\d+) kilobytes$/) {
1136my ($n_objects, $kb) = ($1, $2);
11371024 < $kb
1138and system(qw(git repack -a -d));
1139}
1140
1141foreach my $git_index (values %index) {
1142if ($git_index ne "$git_dir/index") {
1143unlink($git_index);
1144}
1145}
1146
1147if (defined $orig_git_index) {
1148$ENV{GIT_INDEX_FILE} = $orig_git_index;
1149} else {
1150delete $ENV{GIT_INDEX_FILE};
1151}
1152
1153# Now switch back to the branch we were in before all of this happened
1154if ($orig_branch) {
1155print "DONE.\n" if $opt_v;
1156if ($opt_i) {
1157exit 0;
1158}
1159my $tip_at_end = `git rev-parse --verify HEAD`;
1160if ($tip_at_start ne $tip_at_end) {
1161for ($tip_at_start, $tip_at_end) { chomp; }
1162print "Fetched into the current branch.\n" if $opt_v;
1163system(qw(git read-tree -u -m),
1164$tip_at_start, $tip_at_end);
1165die "Fast-forward update failed: $?\n" if $?;
1166}
1167else {
1168system(qw(git merge -m cvsimport), "$remote/$opt_o");
1169die "Could not merge $opt_o into the current branch.\n" if $?;
1170}
1171} else {
1172$orig_branch = "master";
1173print "DONE; creating $orig_branch branch\n" if $opt_v;
1174system("git", "update-ref", "refs/heads/master", "$remote/$opt_o")
1175unless defined get_headref('refs/heads/master');
1176system("git", "symbolic-ref", "$remote/HEAD", "$remote/$opt_o")
1177if ($opt_r && $opt_o ne 'HEAD');
1178system('git', 'update-ref', 'HEAD', "$orig_branch");
1179unless ($opt_i) {
1180system(qw(git checkout -f));
1181die "checkout failed: $?\n" if $?;
1182}
1183}
1184