git
416 строк · 10.9 Кб
1#!/usr/bin/perl
2#
3# Copyright 2008-2009 Peter Krefting <peter@softwolves.pp.se>
4#
5# ------------------------------------------------------------------------
6#
7# This program is free software; you can redistribute it and/or modify
8# it under the terms of the GNU General Public License as published by
9# the Free Software Foundation.
10#
11# This program is distributed in the hope that it will be useful,
12# but WITHOUT ANY WARRANTY; without even the implied warranty of
13# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14# GNU General Public License for more details.
15#
16# You should have received a copy of the GNU General Public License
17# along with this program; if not, see <http://www.gnu.org/licenses/>.
18#
19# ------------------------------------------------------------------------
20
21=pod
22
23=head1 NAME
24
25import-directories - Import bits and pieces to Git.
26
27=head1 SYNOPSIS
28
29B<import-directories.perl> F<configfile> F<outputfile>
30
31=head1 DESCRIPTION
32
33Script to import arbitrary projects version controlled by the "copy the
34source directory to a new location and edit it there"-version controlled
35projects into version control. Handles projects with arbitrary branching
36and version trees, taking a file describing the inputs and generating a
37file compatible with the L<git-fast-import(1)> format.
38
39=head1 CONFIGURATION FILE
40
41=head2 Format
42
43The configuration file is based on the standard I<.ini> format.
44
45; Comments start with semi-colons
46[section]
47key=value
48
49Please see below for information on how to escape special characters.
50
51=head2 Global configuration
52
53Global configuration is done in the B<[config]> section, which should be
54the first section in the file. Configuration can be changed by
55repeating configuration sections later on.
56
57[config]
58; configure conversion of CRLFs. "convert" means that all CRLFs
59; should be converted into LFs (suitable for the core.autocrlf
60; setting set to true in Git). "none" means that all data is
61; treated as binary.
62crlf=convert
63
64=head2 Revision configuration
65
66Each revision that is to be imported is described in three
67sections. Revisions should be defined in topological order, so
68that a revision's parent has always been defined when a new revision
69is introduced. All the sections for one revision must be defined
70before defining the next revision.
71
72Each revision is assigned a unique numerical identifier. The
73numbers do not need to be consecutive, nor monotonically
74increasing.
75
76For instance, if your configuration file contains only the two
77revisions 4711 and 42, where 4711 is the initial commit, the
78only requirement is that 4711 is completely defined before 42.
79
80=pod
81
82=head3 Revision description section
83
84A section whose section name is just an integer gives meta-data
85about the revision.
86
87[3]
88; author sets the author of the revisions
89author=Peter Krefting <peter@softwolves.pp.se>
90; branch sets the branch that the revision should be committed to
91branch=master
92; parent describes the revision that is the parent of this commit
93; (optional)
94parent=1
95; merges describes a revision that is merged into this commit
96; (optional; can be repeated)
97merges=2
98; selects one file to take the timestamp from
99; (optional; if unspecified, the most recent file from the .files
100; section is used)
101timestamp=3/source.c
102
103=head3 Revision contents section
104
105A section whose section name is an integer followed by B<.files>
106describe all the files included in this revision. If a file that
107was available previously is not included in this revision, it will
108be removed.
109
110If an on-disk revision is incomplete, you can point to files from
111a previous revision. There are no restrictions on where the source
112files are located, nor on their names.
113
114[3.files]
115; the key is the path inside the repository, the value is the path
116; as seen from the importer script.
117source.c=ver-3.00/source.c
118source.h=ver-2.99/source.h
119readme.txt=ver-3.00/introduction to the project.txt
120
121File names are treated as byte strings (but please see below on
122quoting rules), and should be stored in the configuration file in
123the encoding that should be used in the generated repository.
124
125=head3 Revision commit message section
126
127A section whose section name is an integer followed by B<.message>
128gives the commit message. This section is read verbatim, up until
129the beginning of the next section. As such, a commit message may not
130contain a line that begins with an opening square bracket ("[") and
131ends with a closing square bracket ("]"), unless they are surrounded
132by whitespace or other characters.
133
134[3.message]
135Implement foobar.
136; trailing blank lines are ignored.
137
138=cut
139
140# Globals
141use strict;
142use warnings;
143use integer;
144my $crlfmode = 0;
145my @revs;
146my (%revmap, %message, %files, %author, %branch, %parent, %merges, %time, %timesource);
147my $sectiontype = 0;
148my $rev = 0;
149my $mark = 1;
150
151# Check command line
152if ($#ARGV < 1 || $ARGV[0] =~ /^--?h/)
153{
154exec('perldoc', $0);
155exit 1;
156}
157
158# Open configuration
159my $config = $ARGV[0];
160open CFG, '<', $config or die "Cannot open configuration file \"$config\": ";
161
162# Open output
163my $output = $ARGV[1];
164open OUT, '>', $output or die "Cannot create output file \"$output\": ";
165binmode OUT;
166
167LINE: while (my $line = <CFG>)
168{
169$line =~ s/\r?\n$//;
170next LINE if $sectiontype != 4 && $line eq '';
171next LINE if $line =~ /^;/;
172my $oldsectiontype = $sectiontype;
173my $oldrev = $rev;
174
175# Sections
176if ($line =~ m"^\[(config|(\d+)(|\.files|\.message))\]$")
177{
178if ($1 eq 'config')
179{
180$sectiontype = 1;
181}
182elsif ($3 eq '')
183{
184$sectiontype = 2;
185$rev = $2;
186# Create a new revision
187die "Duplicate rev: $line\n " if defined $revmap{$rev};
188print "Reading revision $rev\n";
189push @revs, $rev;
190$revmap{$rev} = $mark ++;
191$time{$revmap{$rev}} = 0;
192}
193elsif ($3 eq '.files')
194{
195$sectiontype = 3;
196$rev = $2;
197die "Revision mismatch: $line\n " unless $rev == $oldrev;
198}
199elsif ($3 eq '.message')
200{
201$sectiontype = 4;
202$rev = $2;
203die "Revision mismatch: $line\n " unless $rev == $oldrev;
204}
205else
206{
207die "Internal parse error: $line\n ";
208}
209next LINE;
210}
211
212# Parse data
213if ($sectiontype != 4)
214{
215# Key and value
216if ($line =~ m"^\s*([^\s].*=.*[^\s])\s*$")
217{
218my ($key, $value) = &parsekeyvaluepair($1);
219# Global configuration
220if (1 == $sectiontype)
221{
222if ($key eq 'crlf')
223{
224$crlfmode = 1, next LINE if $value eq 'convert';
225$crlfmode = 0, next LINE if $value eq 'none';
226}
227die "Unknown configuration option: $line\n ";
228}
229# Revision specification
230if (2 == $sectiontype)
231{
232my $current = $revmap{$rev};
233$author{$current} = $value, next LINE if $key eq 'author';
234$branch{$current} = $value, next LINE if $key eq 'branch';
235$parent{$current} = $value, next LINE if $key eq 'parent';
236$timesource{$current} = $value, next LINE if $key eq 'timestamp';
237push(@{$merges{$current}}, $value), next LINE if $key eq 'merges';
238die "Unknown revision option: $line\n ";
239}
240# Filespecs
241if (3 == $sectiontype)
242{
243# Add the file and create a marker
244die "File not found: $line\n " unless -f $value;
245my $current = $revmap{$rev};
246${$files{$current}}{$key} = $mark;
247my $time = &fileblob($value, $crlfmode, $mark ++);
248
249# Update revision timestamp if more recent than other
250# files seen, or if this is the file we have selected
251# to take the time stamp from using the "timestamp"
252# directive.
253if ((defined $timesource{$current} && $timesource{$current} eq $value)
254|| $time > $time{$current})
255{
256$time{$current} = $time;
257}
258}
259}
260else
261{
262die "Parse error: $line\n ";
263}
264}
265else
266{
267# Commit message
268my $current = $revmap{$rev};
269if (defined $message{$current})
270{
271$message{$current} .= "\n";
272}
273$message{$current} .= $line;
274}
275}
276close CFG;
277
278# Start spewing out data for git-fast-import
279foreach my $commit (@revs)
280{
281# Progress
282print OUT "progress Creating revision $commit\n";
283
284# Create commit header
285my $mark = $revmap{$commit};
286
287# Branch and commit id
288print OUT "commit refs/heads/", $branch{$mark}, "\nmark :", $mark, "\n";
289
290# Author and timestamp
291die "No timestamp defined for $commit (no files?)\n" unless defined $time{$mark};
292print OUT "committer ", $author{$mark}, " ", $time{$mark}, " +0100\n";
293
294# Commit message
295die "No message defined for $commit\n" unless defined $message{$mark};
296my $message = $message{$mark};
297$message =~ s/\n$//; # Kill trailing empty line
298print OUT "data ", length($message), "\n", $message, "\n";
299
300# Parent and any merges
301print OUT "from :", $revmap{$parent{$mark}}, "\n" if defined $parent{$mark};
302if (defined $merges{$mark})
303{
304foreach my $merge (@{$merges{$mark}})
305{
306print OUT "merge :", $revmap{$merge}, "\n";
307}
308}
309
310# Output file marks
311print OUT "deleteall\n"; # start from scratch
312foreach my $file (sort keys %{$files{$mark}})
313{
314print OUT "M 644 :", ${$files{$mark}}{$file}, " $file\n";
315}
316print OUT "\n";
317}
318
319# Create one file blob
320sub fileblob
321{
322my ($filename, $crlfmode, $mark) = @_;
323
324# Import the file
325print OUT "progress Importing $filename\nblob\nmark :$mark\n";
326open FILE, '<', $filename or die "Cannot read $filename\n ";
327binmode FILE;
328my ($size, $mtime) = (stat(FILE))[7,9];
329my $file;
330read FILE, $file, $size;
331close FILE;
332$file =~ s/\r\n/\n/g if $crlfmode;
333print OUT "data ", length($file), "\n", $file, "\n";
334
335return $mtime;
336}
337
338# Parse a key=value pair
339sub parsekeyvaluepair
340{
341=pod
342
343=head2 Escaping special characters
344
345Key and value strings may be enclosed in quotes, in which case
346whitespace inside the quotes is preserved. Additionally, an equal
347sign may be included in the key by preceding it with a backslash.
348For example:
349
350"key1 "=value1
351key2=" value2"
352key\=3=value3
353key4=value=4
354"key5""=value5
355
356Here the first key is "key1 " (note the trailing white-space) and the
357second value is " value2" (note the leading white-space). The third
358key contains an equal sign "key=3" and so does the fourth value, which
359does not need to be escaped. The fifth key contains a trailing quote,
360which does not need to be escaped since it is inside a surrounding
361quote.
362
363=cut
364my $pair = shift;
365
366# Separate key and value by the first non-quoted equal sign
367my ($key, $value);
368if ($pair =~ /^(.*[^\\])=(.*)$/)
369{
370($key, $value) = ($1, $2)
371}
372else
373{
374die "Parse error: $pair\n ";
375}
376
377# Unquote and unescape the key and value separately
378return (&unescape($key), &unescape($value));
379}
380
381# Unquote and unescape
382sub unescape
383{
384my $string = shift;
385
386# First remove enclosing quotes. Backslash before the trailing
387# quote leaves both.
388if ($string =~ /^"(.*[^\\])"$/)
389{
390$string = $1;
391}
392
393# Second remove any backslashes inside the unquoted string.
394# For later: Handle special sequences like \t ?
395$string =~ s/\\(.)/$1/g;
396
397return $string;
398}
399
400__END__
401
402=pod
403
404=head1 EXAMPLES
405
406B<import-directories.perl> F<project.import>
407
408=head1 AUTHOR
409
410Copyright 2008-2009 Peter Krefting E<lt>peter@softwolves.pp.se>
411
412This program is free software; you can redistribute it and/or modify
413it under the terms of the GNU General Public License as published by
414the Free Software Foundation.
415
416=cut
417