git
/
convert.c
2056 строк · 49.7 Кб
1#define USE_THE_REPOSITORY_VARIABLE2
3#include "git-compat-util.h"4#include "advice.h"5#include "config.h"6#include "convert.h"7#include "copy.h"8#include "gettext.h"9#include "hex.h"10#include "object-store-ll.h"11#include "attr.h"12#include "run-command.h"13#include "quote.h"14#include "read-cache-ll.h"15#include "sigchain.h"16#include "pkt-line.h"17#include "sub-process.h"18#include "trace.h"19#include "utf8.h"20#include "merge-ll.h"21
22/*
23* convert.c - convert a file when checking it out and checking it in.
24*
25* This should use the pathname to decide on whether it wants to do some
26* more interesting conversions (automatic gzip/unzip, general format
27* conversions etc etc), but by default it just does automatic CRLF<->LF
28* translation when the "text" attribute or "auto_crlf" option is set.
29*/
30
31/* Stat bits: When BIN is set, the txt bits are unset */
32#define CONVERT_STAT_BITS_TXT_LF 0x133#define CONVERT_STAT_BITS_TXT_CRLF 0x234#define CONVERT_STAT_BITS_BIN 0x435
36struct text_stat {37/* NUL, CR, LF and CRLF counts */38unsigned nul, lonecr, lonelf, crlf;39
40/* These are just approximations! */41unsigned printable, nonprintable;42};43
44static void gather_stats(const char *buf, unsigned long size, struct text_stat *stats)45{
46unsigned long i;47
48memset(stats, 0, sizeof(*stats));49
50for (i = 0; i < size; i++) {51unsigned char c = buf[i];52if (c == '\r') {53if (i+1 < size && buf[i+1] == '\n') {54stats->crlf++;55i++;56} else57stats->lonecr++;58continue;59}60if (c == '\n') {61stats->lonelf++;62continue;63}64if (c == 127)65/* DEL */66stats->nonprintable++;67else if (c < 32) {68switch (c) {69/* BS, HT, ESC and FF */70case '\b': case '\t': case '\033': case '\014':71stats->printable++;72break;73case 0:74stats->nul++;75/* fall through */76default:77stats->nonprintable++;78}79}80else81stats->printable++;82}83
84/* If file ends with EOF then don't count this EOF as non-printable. */85if (size >= 1 && buf[size-1] == '\032')86stats->nonprintable--;87}
88
89/*
90* The same heuristics as diff.c::mmfile_is_binary()
91* We treat files with bare CR as binary
92*/
93static int convert_is_binary(const struct text_stat *stats)94{
95if (stats->lonecr)96return 1;97if (stats->nul)98return 1;99if ((stats->printable >> 7) < stats->nonprintable)100return 1;101return 0;102}
103
104static unsigned int gather_convert_stats(const char *data, unsigned long size)105{
106struct text_stat stats;107int ret = 0;108if (!data || !size)109return 0;110gather_stats(data, size, &stats);111if (convert_is_binary(&stats))112ret |= CONVERT_STAT_BITS_BIN;113if (stats.crlf)114ret |= CONVERT_STAT_BITS_TXT_CRLF;115if (stats.lonelf)116ret |= CONVERT_STAT_BITS_TXT_LF;117
118return ret;119}
120
121static const char *gather_convert_stats_ascii(const char *data, unsigned long size)122{
123unsigned int convert_stats = gather_convert_stats(data, size);124
125if (convert_stats & CONVERT_STAT_BITS_BIN)126return "-text";127switch (convert_stats) {128case CONVERT_STAT_BITS_TXT_LF:129return "lf";130case CONVERT_STAT_BITS_TXT_CRLF:131return "crlf";132case CONVERT_STAT_BITS_TXT_LF | CONVERT_STAT_BITS_TXT_CRLF:133return "mixed";134default:135return "none";136}137}
138
139const char *get_cached_convert_stats_ascii(struct index_state *istate,140const char *path)141{
142const char *ret;143unsigned long sz;144void *data = read_blob_data_from_index(istate, path, &sz);145ret = gather_convert_stats_ascii(data, sz);146free(data);147return ret;148}
149
150const char *get_wt_convert_stats_ascii(const char *path)151{
152const char *ret = "";153struct strbuf sb = STRBUF_INIT;154if (strbuf_read_file(&sb, path, 0) >= 0)155ret = gather_convert_stats_ascii(sb.buf, sb.len);156strbuf_release(&sb);157return ret;158}
159
160static int text_eol_is_crlf(void)161{
162if (auto_crlf == AUTO_CRLF_TRUE)163return 1;164else if (auto_crlf == AUTO_CRLF_INPUT)165return 0;166if (core_eol == EOL_CRLF)167return 1;168if (core_eol == EOL_UNSET && EOL_NATIVE == EOL_CRLF)169return 1;170return 0;171}
172
173static enum eol output_eol(enum convert_crlf_action crlf_action)174{
175switch (crlf_action) {176case CRLF_BINARY:177return EOL_UNSET;178case CRLF_TEXT_CRLF:179return EOL_CRLF;180case CRLF_TEXT_INPUT:181return EOL_LF;182case CRLF_UNDEFINED:183case CRLF_AUTO_CRLF:184return EOL_CRLF;185case CRLF_AUTO_INPUT:186return EOL_LF;187case CRLF_TEXT:188case CRLF_AUTO:189/* fall through */190return text_eol_is_crlf() ? EOL_CRLF : EOL_LF;191}192warning(_("illegal crlf_action %d"), (int)crlf_action);193return core_eol;194}
195
196static void check_global_conv_flags_eol(const char *path,197struct text_stat *old_stats, struct text_stat *new_stats,198int conv_flags)199{
200if (old_stats->crlf && !new_stats->crlf ) {201/*202* CRLFs would not be restored by checkout
203*/
204if (conv_flags & CONV_EOL_RNDTRP_DIE)205die(_("CRLF would be replaced by LF in %s"), path);206else if (conv_flags & CONV_EOL_RNDTRP_WARN)207warning(_("in the working copy of '%s', CRLF will be"208" replaced by LF the next time Git touches"209" it"), path);210} else if (old_stats->lonelf && !new_stats->lonelf ) {211/*212* CRLFs would be added by checkout
213*/
214if (conv_flags & CONV_EOL_RNDTRP_DIE)215die(_("LF would be replaced by CRLF in %s"), path);216else if (conv_flags & CONV_EOL_RNDTRP_WARN)217warning(_("in the working copy of '%s', LF will be"218" replaced by CRLF the next time Git touches"219" it"), path);220}221}
222
223static int has_crlf_in_index(struct index_state *istate, const char *path)224{
225unsigned long sz;226void *data;227const char *crp;228int has_crlf = 0;229
230data = read_blob_data_from_index(istate, path, &sz);231if (!data)232return 0;233
234crp = memchr(data, '\r', sz);235if (crp) {236unsigned int ret_stats;237ret_stats = gather_convert_stats(data, sz);238if (!(ret_stats & CONVERT_STAT_BITS_BIN) &&239(ret_stats & CONVERT_STAT_BITS_TXT_CRLF))240has_crlf = 1;241}242free(data);243return has_crlf;244}
245
246static int will_convert_lf_to_crlf(struct text_stat *stats,247enum convert_crlf_action crlf_action)248{
249if (output_eol(crlf_action) != EOL_CRLF)250return 0;251/* No "naked" LF? Nothing to convert, regardless. */252if (!stats->lonelf)253return 0;254
255if (crlf_action == CRLF_AUTO || crlf_action == CRLF_AUTO_INPUT || crlf_action == CRLF_AUTO_CRLF) {256/* If we have any CR or CRLF line endings, we do not touch it */257/* This is the new safer autocrlf-handling */258if (stats->lonecr || stats->crlf)259return 0;260
261if (convert_is_binary(stats))262return 0;263}264return 1;265
266}
267
268static int validate_encoding(const char *path, const char *enc,269const char *data, size_t len, int die_on_error)270{
271const char *stripped;272
273/* We only check for UTF here as UTF?? can be an alias for UTF-?? */274if (skip_iprefix(enc, "UTF", &stripped)) {275skip_prefix(stripped, "-", &stripped);276
277/*278* Check for detectable errors in UTF encodings
279*/
280if (has_prohibited_utf_bom(enc, data, len)) {281const char *error_msg = _(282"BOM is prohibited in '%s' if encoded as %s");283/*284* This advice is shown for UTF-??BE and UTF-??LE encodings.
285* We cut off the last two characters of the encoding name
286* to generate the encoding name suitable for BOMs.
287*/
288const char *advise_msg = _(289"The file '%s' contains a byte order "290"mark (BOM). Please use UTF-%.*s as "291"working-tree-encoding.");292int stripped_len = strlen(stripped) - strlen("BE");293advise(advise_msg, path, stripped_len, stripped);294if (die_on_error)295die(error_msg, path, enc);296else {297return error(error_msg, path, enc);298}299
300} else if (is_missing_required_utf_bom(enc, data, len)) {301const char *error_msg = _(302"BOM is required in '%s' if encoded as %s");303const char *advise_msg = _(304"The file '%s' is missing a byte order "305"mark (BOM). Please use UTF-%sBE or UTF-%sLE "306"(depending on the byte order) as "307"working-tree-encoding.");308advise(advise_msg, path, stripped, stripped);309if (die_on_error)310die(error_msg, path, enc);311else {312return error(error_msg, path, enc);313}314}315
316}317return 0;318}
319
320static void trace_encoding(const char *context, const char *path,321const char *encoding, const char *buf, size_t len)322{
323static struct trace_key coe = TRACE_KEY_INIT(WORKING_TREE_ENCODING);324struct strbuf trace = STRBUF_INIT;325int i;326
327if (!trace_want(&coe))328return;329
330strbuf_addf(&trace, "%s (%s, considered %s):\n", context, path, encoding);331for (i = 0; i < len && buf; ++i) {332strbuf_addf(333&trace, "| \033[2m%2i:\033[0m %2x \033[2m%c\033[0m%c",334i,335(unsigned char) buf[i],336(buf[i] > 32 && buf[i] < 127 ? buf[i] : ' '),337((i+1) % 8 && (i+1) < len ? ' ' : '\n')338);339}340strbuf_addchars(&trace, '\n', 1);341
342trace_strbuf(&coe, &trace);343strbuf_release(&trace);344}
345
346static int check_roundtrip(const char *enc_name)347{
348/*349* check_roundtrip_encoding contains a string of comma and/or
350* space separated encodings (eg. "UTF-16, ASCII, CP1125").
351* Search for the given encoding in that string.
352*/
353const char *encoding = check_roundtrip_encoding ?354check_roundtrip_encoding : "SHIFT-JIS";355const char *found = strcasestr(encoding, enc_name);356const char *next;357int len;358if (!found)359return 0;360next = found + strlen(enc_name);361len = strlen(encoding);362return (found && (363/*364* Check that the found encoding is at the beginning of
365* encoding or that it is prefixed with a space or
366* comma.
367*/
368found == encoding || (369(isspace(found[-1]) || found[-1] == ',')370)371) && (372/*373* Check that the found encoding is at the end of
374* encoding or that it is suffixed with a space
375* or comma.
376*/
377next == encoding + len || (378next < encoding + len &&379(isspace(next[0]) || next[0] == ',')380)381));382}
383
384static const char *default_encoding = "UTF-8";385
386static int encode_to_git(const char *path, const char *src, size_t src_len,387struct strbuf *buf, const char *enc, int conv_flags)388{
389char *dst;390size_t dst_len;391int die_on_error = conv_flags & CONV_WRITE_OBJECT;392
393/*394* No encoding is specified or there is nothing to encode.
395* Tell the caller that the content was not modified.
396*/
397if (!enc || (src && !src_len))398return 0;399
400/*401* Looks like we got called from "would_convert_to_git()".
402* This means Git wants to know if it would encode (= modify!)
403* the content. Let's answer with "yes", since an encoding was
404* specified.
405*/
406if (!buf && !src)407return 1;408
409if (validate_encoding(path, enc, src, src_len, die_on_error))410return 0;411
412trace_encoding("source", path, enc, src, src_len);413dst = reencode_string_len(src, src_len, default_encoding, enc,414&dst_len);415if (!dst) {416/*417* We could add the blob "as-is" to Git. However, on checkout
418* we would try to re-encode to the original encoding. This
419* would fail and we would leave the user with a messed-up
420* working tree. Let's try to avoid this by screaming loud.
421*/
422const char* msg = _("failed to encode '%s' from %s to %s");423if (die_on_error)424die(msg, path, enc, default_encoding);425else {426error(msg, path, enc, default_encoding);427return 0;428}429}430trace_encoding("destination", path, default_encoding, dst, dst_len);431
432/*433* UTF supports lossless conversion round tripping [1] and conversions
434* between UTF and other encodings are mostly round trip safe as
435* Unicode aims to be a superset of all other character encodings.
436* However, certain encodings (e.g. SHIFT-JIS) are known to have round
437* trip issues [2]. Check the round trip conversion for all encodings
438* listed in core.checkRoundtripEncoding.
439*
440* The round trip check is only performed if content is written to Git.
441* This ensures that no information is lost during conversion to/from
442* the internal UTF-8 representation.
443*
444* Please note, the code below is not tested because I was not able to
445* generate a faulty round trip without an iconv error. Iconv errors
446* are already caught above.
447*
448* [1] http://unicode.org/faq/utf_bom.html#gen2
449* [2] https://support.microsoft.com/en-us/help/170559/prb-conversion-problem-between-shift-jis-and-unicode
450*/
451if (die_on_error && check_roundtrip(enc)) {452char *re_src;453size_t re_src_len;454
455re_src = reencode_string_len(dst, dst_len,456enc, default_encoding,457&re_src_len);458
459trace_printf("Checking roundtrip encoding for %s...\n", enc);460trace_encoding("reencoded source", path, enc,461re_src, re_src_len);462
463if (!re_src || src_len != re_src_len ||464memcmp(src, re_src, src_len)) {465const char* msg = _("encoding '%s' from %s to %s and "466"back is not the same");467die(msg, path, enc, default_encoding);468}469
470free(re_src);471}472
473strbuf_attach(buf, dst, dst_len, dst_len + 1);474return 1;475}
476
477static int encode_to_worktree(const char *path, const char *src, size_t src_len,478struct strbuf *buf, const char *enc)479{
480char *dst;481size_t dst_len;482
483/*484* No encoding is specified or there is nothing to encode.
485* Tell the caller that the content was not modified.
486*/
487if (!enc || (src && !src_len))488return 0;489
490dst = reencode_string_len(src, src_len, enc, default_encoding,491&dst_len);492if (!dst) {493error(_("failed to encode '%s' from %s to %s"),494path, default_encoding, enc);495return 0;496}497
498strbuf_attach(buf, dst, dst_len, dst_len + 1);499return 1;500}
501
502static int crlf_to_git(struct index_state *istate,503const char *path, const char *src, size_t len,504struct strbuf *buf,505enum convert_crlf_action crlf_action, int conv_flags)506{
507struct text_stat stats;508char *dst;509int convert_crlf_into_lf;510
511if (crlf_action == CRLF_BINARY ||512(src && !len))513return 0;514
515/*516* If we are doing a dry-run and have no source buffer, there is
517* nothing to analyze; we must assume we would convert.
518*/
519if (!buf && !src)520return 1;521
522gather_stats(src, len, &stats);523/* Optimization: No CRLF? Nothing to convert, regardless. */524convert_crlf_into_lf = !!stats.crlf;525
526if (crlf_action == CRLF_AUTO || crlf_action == CRLF_AUTO_INPUT || crlf_action == CRLF_AUTO_CRLF) {527if (convert_is_binary(&stats))528return 0;529/*530* If the file in the index has any CR in it, do not
531* convert. This is the new safer autocrlf handling,
532* unless we want to renormalize in a merge or
533* cherry-pick.
534*/
535if ((!(conv_flags & CONV_EOL_RENORMALIZE)) &&536has_crlf_in_index(istate, path))537convert_crlf_into_lf = 0;538}539if (((conv_flags & CONV_EOL_RNDTRP_WARN) ||540((conv_flags & CONV_EOL_RNDTRP_DIE) && len))) {541struct text_stat new_stats;542memcpy(&new_stats, &stats, sizeof(new_stats));543/* simulate "git add" */544if (convert_crlf_into_lf) {545new_stats.lonelf += new_stats.crlf;546new_stats.crlf = 0;547}548/* simulate "git checkout" */549if (will_convert_lf_to_crlf(&new_stats, crlf_action)) {550new_stats.crlf += new_stats.lonelf;551new_stats.lonelf = 0;552}553check_global_conv_flags_eol(path, &stats, &new_stats, conv_flags);554}555if (!convert_crlf_into_lf)556return 0;557
558/*559* At this point all of our source analysis is done, and we are sure we
560* would convert. If we are in dry-run mode, we can give an answer.
561*/
562if (!buf)563return 1;564
565/* only grow if not in place */566if (strbuf_avail(buf) + buf->len < len)567strbuf_grow(buf, len - buf->len);568dst = buf->buf;569if (crlf_action == CRLF_AUTO || crlf_action == CRLF_AUTO_INPUT || crlf_action == CRLF_AUTO_CRLF) {570/*571* If we guessed, we already know we rejected a file with
572* lone CR, and we can strip a CR without looking at what
573* follow it.
574*/
575do {576unsigned char c = *src++;577if (c != '\r')578*dst++ = c;579} while (--len);580} else {581do {582unsigned char c = *src++;583if (! (c == '\r' && (1 < len && *src == '\n')))584*dst++ = c;585} while (--len);586}587strbuf_setlen(buf, dst - buf->buf);588return 1;589}
590
591static int crlf_to_worktree(const char *src, size_t len, struct strbuf *buf,592enum convert_crlf_action crlf_action)593{
594char *to_free = NULL;595struct text_stat stats;596
597if (!len || output_eol(crlf_action) != EOL_CRLF)598return 0;599
600gather_stats(src, len, &stats);601if (!will_convert_lf_to_crlf(&stats, crlf_action))602return 0;603
604/* are we "faking" in place editing ? */605if (src == buf->buf)606to_free = strbuf_detach(buf, NULL);607
608strbuf_grow(buf, len + stats.lonelf);609for (;;) {610const char *nl = memchr(src, '\n', len);611if (!nl)612break;613if (nl > src && nl[-1] == '\r') {614strbuf_add(buf, src, nl + 1 - src);615} else {616strbuf_add(buf, src, nl - src);617strbuf_addstr(buf, "\r\n");618}619len -= nl + 1 - src;620src = nl + 1;621}622strbuf_add(buf, src, len);623
624free(to_free);625return 1;626}
627
628struct filter_params {629const char *src;630size_t size;631int fd;632const char *cmd;633const char *path;634};635
636static int filter_buffer_or_fd(int in UNUSED, int out, void *data)637{
638/*639* Spawn cmd and feed the buffer contents through its stdin.
640*/
641struct child_process child_process = CHILD_PROCESS_INIT;642struct filter_params *params = (struct filter_params *)data;643const char *format = params->cmd;644int write_err, status;645
646/* apply % substitution to cmd */647struct strbuf cmd = STRBUF_INIT;648
649/* expand all %f with the quoted path; quote to preserve space, etc. */650while (strbuf_expand_step(&cmd, &format)) {651if (skip_prefix(format, "%", &format))652strbuf_addch(&cmd, '%');653else if (skip_prefix(format, "f", &format))654sq_quote_buf(&cmd, params->path);655else656strbuf_addch(&cmd, '%');657}658
659strvec_push(&child_process.args, cmd.buf);660child_process.use_shell = 1;661child_process.in = -1;662child_process.out = out;663
664if (start_command(&child_process)) {665strbuf_release(&cmd);666return error(_("cannot fork to run external filter '%s'"),667params->cmd);668}669
670sigchain_push(SIGPIPE, SIG_IGN);671
672if (params->src) {673write_err = (write_in_full(child_process.in,674params->src, params->size) < 0);675if (errno == EPIPE)676write_err = 0;677} else {678write_err = copy_fd(params->fd, child_process.in);679if (write_err == COPY_WRITE_ERROR && errno == EPIPE)680write_err = 0;681}682
683if (close(child_process.in))684write_err = 1;685if (write_err)686error(_("cannot feed the input to external filter '%s'"),687params->cmd);688
689sigchain_pop(SIGPIPE);690
691status = finish_command(&child_process);692if (status)693error(_("external filter '%s' failed %d"), params->cmd, status);694
695strbuf_release(&cmd);696return (write_err || status);697}
698
699static int apply_single_file_filter(const char *path, const char *src, size_t len, int fd,700struct strbuf *dst, const char *cmd)701{
702/*703* Create a pipeline to have the command filter the buffer's
704* contents.
705*
706* (child --> cmd) --> us
707*/
708int err = 0;709struct strbuf nbuf = STRBUF_INIT;710struct async async;711struct filter_params params;712
713memset(&async, 0, sizeof(async));714async.proc = filter_buffer_or_fd;715async.data = ¶ms;716async.out = -1;717params.src = src;718params.size = len;719params.fd = fd;720params.cmd = cmd;721params.path = path;722
723fflush(NULL);724if (start_async(&async))725return 0; /* error was already reported */726
727if (strbuf_read(&nbuf, async.out, 0) < 0) {728err = error(_("read from external filter '%s' failed"), cmd);729}730if (close(async.out)) {731err = error(_("read from external filter '%s' failed"), cmd);732}733if (finish_async(&async)) {734err = error(_("external filter '%s' failed"), cmd);735}736
737if (!err) {738strbuf_swap(dst, &nbuf);739}740strbuf_release(&nbuf);741return !err;742}
743
744#define CAP_CLEAN (1u<<0)745#define CAP_SMUDGE (1u<<1)746#define CAP_DELAY (1u<<2)747
748struct cmd2process {749struct subprocess_entry subprocess; /* must be the first member! */750unsigned int supported_capabilities;751};752
753static int subprocess_map_initialized;754static struct hashmap subprocess_map;755
756static int start_multi_file_filter_fn(struct subprocess_entry *subprocess)757{
758static int versions[] = {2, 0};759static struct subprocess_capability capabilities[] = {760{ "clean", CAP_CLEAN },761{ "smudge", CAP_SMUDGE },762{ "delay", CAP_DELAY },763{ NULL, 0 }764};765struct cmd2process *entry = (struct cmd2process *)subprocess;766return subprocess_handshake(subprocess, "git-filter", versions, NULL,767capabilities,768&entry->supported_capabilities);769}
770
771static void handle_filter_error(const struct strbuf *filter_status,772struct cmd2process *entry,773const unsigned int wanted_capability)774{
775if (!strcmp(filter_status->buf, "error"))776; /* The filter signaled a problem with the file. */777else if (!strcmp(filter_status->buf, "abort") && wanted_capability) {778/*779* The filter signaled a permanent problem. Don't try to filter
780* files with the same command for the lifetime of the current
781* Git process.
782*/
783entry->supported_capabilities &= ~wanted_capability;784} else {785/*786* Something went wrong with the protocol filter.
787* Force shutdown and restart if another blob requires filtering.
788*/
789error(_("external filter '%s' failed"), entry->subprocess.cmd);790subprocess_stop(&subprocess_map, &entry->subprocess);791free(entry);792}793}
794
795static int apply_multi_file_filter(const char *path, const char *src, size_t len,796int fd, struct strbuf *dst, const char *cmd,797const unsigned int wanted_capability,798const struct checkout_metadata *meta,799struct delayed_checkout *dco)800{
801int err;802int can_delay = 0;803struct cmd2process *entry;804struct child_process *process;805struct strbuf nbuf = STRBUF_INIT;806struct strbuf filter_status = STRBUF_INIT;807const char *filter_type;808
809if (!subprocess_map_initialized) {810subprocess_map_initialized = 1;811hashmap_init(&subprocess_map, cmd2process_cmp, NULL, 0);812entry = NULL;813} else {814entry = (struct cmd2process *)subprocess_find_entry(&subprocess_map, cmd);815}816
817fflush(NULL);818
819if (!entry) {820entry = xmalloc(sizeof(*entry));821entry->supported_capabilities = 0;822
823if (subprocess_start(&subprocess_map, &entry->subprocess, cmd, start_multi_file_filter_fn)) {824free(entry);825return 0;826}827}828process = &entry->subprocess.process;829
830if (!(entry->supported_capabilities & wanted_capability))831return 0;832
833if (wanted_capability & CAP_CLEAN)834filter_type = "clean";835else if (wanted_capability & CAP_SMUDGE)836filter_type = "smudge";837else838die(_("unexpected filter type"));839
840sigchain_push(SIGPIPE, SIG_IGN);841
842assert(strlen(filter_type) < LARGE_PACKET_DATA_MAX - strlen("command=\n"));843err = packet_write_fmt_gently(process->in, "command=%s\n", filter_type);844if (err)845goto done;846
847err = strlen(path) > LARGE_PACKET_DATA_MAX - strlen("pathname=\n");848if (err) {849error(_("path name too long for external filter"));850goto done;851}852
853err = packet_write_fmt_gently(process->in, "pathname=%s\n", path);854if (err)855goto done;856
857if (meta && meta->refname) {858err = packet_write_fmt_gently(process->in, "ref=%s\n", meta->refname);859if (err)860goto done;861}862
863if (meta && !is_null_oid(&meta->treeish)) {864err = packet_write_fmt_gently(process->in, "treeish=%s\n", oid_to_hex(&meta->treeish));865if (err)866goto done;867}868
869if (meta && !is_null_oid(&meta->blob)) {870err = packet_write_fmt_gently(process->in, "blob=%s\n", oid_to_hex(&meta->blob));871if (err)872goto done;873}874
875if ((entry->supported_capabilities & CAP_DELAY) &&876dco && dco->state == CE_CAN_DELAY) {877can_delay = 1;878err = packet_write_fmt_gently(process->in, "can-delay=1\n");879if (err)880goto done;881}882
883err = packet_flush_gently(process->in);884if (err)885goto done;886
887if (fd >= 0)888err = write_packetized_from_fd_no_flush(fd, process->in);889else890err = write_packetized_from_buf_no_flush(src, len, process->in);891if (err)892goto done;893
894err = packet_flush_gently(process->in);895if (err)896goto done;897
898err = subprocess_read_status(process->out, &filter_status);899if (err)900goto done;901
902if (can_delay && !strcmp(filter_status.buf, "delayed")) {903string_list_insert(&dco->filters, cmd);904string_list_insert(&dco->paths, path);905} else {906/* The filter got the blob and wants to send us a response. */907err = strcmp(filter_status.buf, "success");908if (err)909goto done;910
911err = read_packetized_to_strbuf(process->out, &nbuf,912PACKET_READ_GENTLE_ON_EOF) < 0;913if (err)914goto done;915
916err = subprocess_read_status(process->out, &filter_status);917if (err)918goto done;919
920err = strcmp(filter_status.buf, "success");921}922
923done:924sigchain_pop(SIGPIPE);925
926if (err)927handle_filter_error(&filter_status, entry, wanted_capability);928else929strbuf_swap(dst, &nbuf);930strbuf_release(&nbuf);931strbuf_release(&filter_status);932return !err;933}
934
935
936int async_query_available_blobs(const char *cmd, struct string_list *available_paths)937{
938int err;939char *line;940struct cmd2process *entry;941struct child_process *process;942struct strbuf filter_status = STRBUF_INIT;943
944assert(subprocess_map_initialized);945entry = (struct cmd2process *)subprocess_find_entry(&subprocess_map, cmd);946if (!entry) {947error(_("external filter '%s' is not available anymore although "948"not all paths have been filtered"), cmd);949return 0;950}951process = &entry->subprocess.process;952sigchain_push(SIGPIPE, SIG_IGN);953
954err = packet_write_fmt_gently(955process->in, "command=list_available_blobs\n");956if (err)957goto done;958
959err = packet_flush_gently(process->in);960if (err)961goto done;962
963while ((line = packet_read_line(process->out, NULL))) {964const char *path;965if (skip_prefix(line, "pathname=", &path))966string_list_insert(available_paths, path);967else968; /* ignore unknown keys */969}970
971err = subprocess_read_status(process->out, &filter_status);972if (err)973goto done;974
975err = strcmp(filter_status.buf, "success");976
977done:978sigchain_pop(SIGPIPE);979
980if (err)981handle_filter_error(&filter_status, entry, 0);982strbuf_release(&filter_status);983return !err;984}
985
986static struct convert_driver {987const char *name;988struct convert_driver *next;989char *smudge;990char *clean;991char *process;992int required;993} *user_convert, **user_convert_tail;994
995static int apply_filter(const char *path, const char *src, size_t len,996int fd, struct strbuf *dst, struct convert_driver *drv,997const unsigned int wanted_capability,998const struct checkout_metadata *meta,999struct delayed_checkout *dco)1000{
1001const char *cmd = NULL;1002
1003if (!drv)1004return 0;1005
1006if (!dst)1007return 1;1008
1009if ((wanted_capability & CAP_CLEAN) && !drv->process && drv->clean)1010cmd = drv->clean;1011else if ((wanted_capability & CAP_SMUDGE) && !drv->process && drv->smudge)1012cmd = drv->smudge;1013
1014if (cmd && *cmd)1015return apply_single_file_filter(path, src, len, fd, dst, cmd);1016else if (drv->process && *drv->process)1017return apply_multi_file_filter(path, src, len, fd, dst,1018drv->process, wanted_capability, meta, dco);1019
1020return 0;1021}
1022
1023static int read_convert_config(const char *var, const char *value,1024const struct config_context *ctx UNUSED,1025void *cb UNUSED)1026{
1027const char *key, *name;1028size_t namelen;1029struct convert_driver *drv;1030
1031/*1032* External conversion drivers are configured using
1033* "filter.<name>.variable".
1034*/
1035if (parse_config_key(var, "filter", &name, &namelen, &key) < 0 || !name)1036return 0;1037for (drv = user_convert; drv; drv = drv->next)1038if (!xstrncmpz(drv->name, name, namelen))1039break;1040if (!drv) {1041CALLOC_ARRAY(drv, 1);1042drv->name = xmemdupz(name, namelen);1043*user_convert_tail = drv;1044user_convert_tail = &(drv->next);1045}1046
1047/*1048* filter.<name>.smudge and filter.<name>.clean specifies
1049* the command line:
1050*
1051* command-line
1052*
1053* The command-line will not be interpolated in any way.
1054*/
1055
1056if (!strcmp("smudge", key)) {1057FREE_AND_NULL(drv->smudge);1058return git_config_string(&drv->smudge, var, value);1059}1060
1061if (!strcmp("clean", key)) {1062FREE_AND_NULL(drv->clean);1063return git_config_string(&drv->clean, var, value);1064}1065
1066if (!strcmp("process", key)) {1067FREE_AND_NULL(drv->process);1068return git_config_string(&drv->process, var, value);1069}1070
1071if (!strcmp("required", key)) {1072drv->required = git_config_bool(var, value);1073return 0;1074}1075
1076return 0;1077}
1078
1079static int count_ident(const char *cp, unsigned long size)1080{
1081/*1082* "$Id: 0000000000000000000000000000000000000000 $" <=> "$Id$"
1083*/
1084int cnt = 0;1085char ch;1086
1087while (size) {1088ch = *cp++;1089size--;1090if (ch != '$')1091continue;1092if (size < 3)1093break;1094if (memcmp("Id", cp, 2))1095continue;1096ch = cp[2];1097cp += 3;1098size -= 3;1099if (ch == '$')1100cnt++; /* $Id$ */1101if (ch != ':')1102continue;1103
1104/*1105* "$Id: ... "; scan up to the closing dollar sign and discard.
1106*/
1107while (size) {1108ch = *cp++;1109size--;1110if (ch == '$') {1111cnt++;1112break;1113}1114if (ch == '\n')1115break;1116}1117}1118return cnt;1119}
1120
1121static int ident_to_git(const char *src, size_t len,1122struct strbuf *buf, int ident)1123{
1124char *dst, *dollar;1125
1126if (!ident || (src && !count_ident(src, len)))1127return 0;1128
1129if (!buf)1130return 1;1131
1132/* only grow if not in place */1133if (strbuf_avail(buf) + buf->len < len)1134strbuf_grow(buf, len - buf->len);1135dst = buf->buf;1136for (;;) {1137dollar = memchr(src, '$', len);1138if (!dollar)1139break;1140memmove(dst, src, dollar + 1 - src);1141dst += dollar + 1 - src;1142len -= dollar + 1 - src;1143src = dollar + 1;1144
1145if (len > 3 && !memcmp(src, "Id:", 3)) {1146dollar = memchr(src + 3, '$', len - 3);1147if (!dollar)1148break;1149if (memchr(src + 3, '\n', dollar - src - 3)) {1150/* Line break before the next dollar. */1151continue;1152}1153
1154memcpy(dst, "Id$", 3);1155dst += 3;1156len -= dollar + 1 - src;1157src = dollar + 1;1158}1159}1160memmove(dst, src, len);1161strbuf_setlen(buf, dst + len - buf->buf);1162return 1;1163}
1164
1165static int ident_to_worktree(const char *src, size_t len,1166struct strbuf *buf, int ident)1167{
1168struct object_id oid;1169char *to_free = NULL, *dollar, *spc;1170int cnt;1171
1172if (!ident)1173return 0;1174
1175cnt = count_ident(src, len);1176if (!cnt)1177return 0;1178
1179/* are we "faking" in place editing ? */1180if (src == buf->buf)1181to_free = strbuf_detach(buf, NULL);1182hash_object_file(the_hash_algo, src, len, OBJ_BLOB, &oid);1183
1184strbuf_grow(buf, len + cnt * (the_hash_algo->hexsz + 3));1185for (;;) {1186/* step 1: run to the next '$' */1187dollar = memchr(src, '$', len);1188if (!dollar)1189break;1190strbuf_add(buf, src, dollar + 1 - src);1191len -= dollar + 1 - src;1192src = dollar + 1;1193
1194/* step 2: does it looks like a bit like Id:xxx$ or Id$ ? */1195if (len < 3 || memcmp("Id", src, 2))1196continue;1197
1198/* step 3: skip over Id$ or Id:xxxxx$ */1199if (src[2] == '$') {1200src += 3;1201len -= 3;1202} else if (src[2] == ':') {1203/*1204* It's possible that an expanded Id has crept its way into the
1205* repository, we cope with that by stripping the expansion out.
1206* This is probably not a good idea, since it will cause changes
1207* on checkout, which won't go away by stash, but let's keep it
1208* for git-style ids.
1209*/
1210dollar = memchr(src + 3, '$', len - 3);1211if (!dollar) {1212/* incomplete keyword, no more '$', so just quit the loop */1213break;1214}1215
1216if (memchr(src + 3, '\n', dollar - src - 3)) {1217/* Line break before the next dollar. */1218continue;1219}1220
1221spc = memchr(src + 4, ' ', dollar - src - 4);1222if (spc && spc < dollar-1) {1223/* There are spaces in unexpected places.1224* This is probably an id from some other
1225* versioning system. Keep it for now.
1226*/
1227continue;1228}1229
1230len -= dollar + 1 - src;1231src = dollar + 1;1232} else {1233/* it wasn't a "Id$" or "Id:xxxx$" */1234continue;1235}1236
1237/* step 4: substitute */1238strbuf_addstr(buf, "Id: ");1239strbuf_addstr(buf, oid_to_hex(&oid));1240strbuf_addstr(buf, " $");1241}1242strbuf_add(buf, src, len);1243
1244free(to_free);1245return 1;1246}
1247
1248static const char *git_path_check_encoding(struct attr_check_item *check)1249{
1250const char *value = check->value;1251
1252if (ATTR_UNSET(value) || !strlen(value))1253return NULL;1254
1255if (ATTR_TRUE(value) || ATTR_FALSE(value)) {1256die(_("true/false are no valid working-tree-encodings"));1257}1258
1259/* Don't encode to the default encoding */1260if (same_encoding(value, default_encoding))1261return NULL;1262
1263return value;1264}
1265
1266static enum convert_crlf_action git_path_check_crlf(struct attr_check_item *check)1267{
1268const char *value = check->value;1269
1270if (ATTR_TRUE(value))1271return CRLF_TEXT;1272else if (ATTR_FALSE(value))1273return CRLF_BINARY;1274else if (ATTR_UNSET(value))1275;1276else if (!strcmp(value, "input"))1277return CRLF_TEXT_INPUT;1278else if (!strcmp(value, "auto"))1279return CRLF_AUTO;1280return CRLF_UNDEFINED;1281}
1282
1283static enum eol git_path_check_eol(struct attr_check_item *check)1284{
1285const char *value = check->value;1286
1287if (ATTR_UNSET(value))1288;1289else if (!strcmp(value, "lf"))1290return EOL_LF;1291else if (!strcmp(value, "crlf"))1292return EOL_CRLF;1293return EOL_UNSET;1294}
1295
1296static struct convert_driver *git_path_check_convert(struct attr_check_item *check)1297{
1298const char *value = check->value;1299struct convert_driver *drv;1300
1301if (ATTR_TRUE(value) || ATTR_FALSE(value) || ATTR_UNSET(value))1302return NULL;1303for (drv = user_convert; drv; drv = drv->next)1304if (!strcmp(value, drv->name))1305return drv;1306return NULL;1307}
1308
1309static int git_path_check_ident(struct attr_check_item *check)1310{
1311const char *value = check->value;1312
1313return !!ATTR_TRUE(value);1314}
1315
1316static struct attr_check *check;1317
1318void convert_attrs(struct index_state *istate,1319struct conv_attrs *ca, const char *path)1320{
1321struct attr_check_item *ccheck = NULL;1322
1323if (!check) {1324check = attr_check_initl("crlf", "ident", "filter",1325"eol", "text", "working-tree-encoding",1326NULL);1327user_convert_tail = &user_convert;1328git_config(read_convert_config, NULL);1329}1330
1331git_check_attr(istate, path, check);1332ccheck = check->items;1333ca->crlf_action = git_path_check_crlf(ccheck + 4);1334if (ca->crlf_action == CRLF_UNDEFINED)1335ca->crlf_action = git_path_check_crlf(ccheck + 0);1336ca->ident = git_path_check_ident(ccheck + 1);1337ca->drv = git_path_check_convert(ccheck + 2);1338if (ca->crlf_action != CRLF_BINARY) {1339enum eol eol_attr = git_path_check_eol(ccheck + 3);1340if (ca->crlf_action == CRLF_AUTO && eol_attr == EOL_LF)1341ca->crlf_action = CRLF_AUTO_INPUT;1342else if (ca->crlf_action == CRLF_AUTO && eol_attr == EOL_CRLF)1343ca->crlf_action = CRLF_AUTO_CRLF;1344else if (eol_attr == EOL_LF)1345ca->crlf_action = CRLF_TEXT_INPUT;1346else if (eol_attr == EOL_CRLF)1347ca->crlf_action = CRLF_TEXT_CRLF;1348}1349ca->working_tree_encoding = git_path_check_encoding(ccheck + 5);1350
1351/* Save attr and make a decision for action */1352ca->attr_action = ca->crlf_action;1353if (ca->crlf_action == CRLF_TEXT)1354ca->crlf_action = text_eol_is_crlf() ? CRLF_TEXT_CRLF : CRLF_TEXT_INPUT;1355if (ca->crlf_action == CRLF_UNDEFINED && auto_crlf == AUTO_CRLF_FALSE)1356ca->crlf_action = CRLF_BINARY;1357if (ca->crlf_action == CRLF_UNDEFINED && auto_crlf == AUTO_CRLF_TRUE)1358ca->crlf_action = CRLF_AUTO_CRLF;1359if (ca->crlf_action == CRLF_UNDEFINED && auto_crlf == AUTO_CRLF_INPUT)1360ca->crlf_action = CRLF_AUTO_INPUT;1361}
1362
1363void reset_parsed_attributes(void)1364{
1365struct convert_driver *drv, *next;1366
1367attr_check_free(check);1368check = NULL;1369reset_merge_attributes();1370
1371for (drv = user_convert; drv; drv = next) {1372next = drv->next;1373free((void *)drv->name);1374free(drv);1375}1376user_convert = NULL;1377user_convert_tail = NULL;1378}
1379
1380int would_convert_to_git_filter_fd(struct index_state *istate, const char *path)1381{
1382struct conv_attrs ca;1383
1384convert_attrs(istate, &ca, path);1385if (!ca.drv)1386return 0;1387
1388/*1389* Apply a filter to an fd only if the filter is required to succeed.
1390* We must die if the filter fails, because the original data before
1391* filtering is not available.
1392*/
1393if (!ca.drv->required)1394return 0;1395
1396return apply_filter(path, NULL, 0, -1, NULL, ca.drv, CAP_CLEAN, NULL, NULL);1397}
1398
1399const char *get_convert_attr_ascii(struct index_state *istate, const char *path)1400{
1401struct conv_attrs ca;1402
1403convert_attrs(istate, &ca, path);1404switch (ca.attr_action) {1405case CRLF_UNDEFINED:1406return "";1407case CRLF_BINARY:1408return "-text";1409case CRLF_TEXT:1410return "text";1411case CRLF_TEXT_INPUT:1412return "text eol=lf";1413case CRLF_TEXT_CRLF:1414return "text eol=crlf";1415case CRLF_AUTO:1416return "text=auto";1417case CRLF_AUTO_CRLF:1418return "text=auto eol=crlf";1419case CRLF_AUTO_INPUT:1420return "text=auto eol=lf";1421}1422return "";1423}
1424
1425int convert_to_git(struct index_state *istate,1426const char *path, const char *src, size_t len,1427struct strbuf *dst, int conv_flags)1428{
1429int ret = 0;1430struct conv_attrs ca;1431
1432convert_attrs(istate, &ca, path);1433
1434ret |= apply_filter(path, src, len, -1, dst, ca.drv, CAP_CLEAN, NULL, NULL);1435if (!ret && ca.drv && ca.drv->required)1436die(_("%s: clean filter '%s' failed"), path, ca.drv->name);1437
1438if (ret && dst) {1439src = dst->buf;1440len = dst->len;1441}1442
1443ret |= encode_to_git(path, src, len, dst, ca.working_tree_encoding, conv_flags);1444if (ret && dst) {1445src = dst->buf;1446len = dst->len;1447}1448
1449if (!(conv_flags & CONV_EOL_KEEP_CRLF)) {1450ret |= crlf_to_git(istate, path, src, len, dst, ca.crlf_action, conv_flags);1451if (ret && dst) {1452src = dst->buf;1453len = dst->len;1454}1455}1456return ret | ident_to_git(src, len, dst, ca.ident);1457}
1458
1459void convert_to_git_filter_fd(struct index_state *istate,1460const char *path, int fd, struct strbuf *dst,1461int conv_flags)1462{
1463struct conv_attrs ca;1464convert_attrs(istate, &ca, path);1465
1466assert(ca.drv);1467
1468if (!apply_filter(path, NULL, 0, fd, dst, ca.drv, CAP_CLEAN, NULL, NULL))1469die(_("%s: clean filter '%s' failed"), path, ca.drv->name);1470
1471encode_to_git(path, dst->buf, dst->len, dst, ca.working_tree_encoding, conv_flags);1472crlf_to_git(istate, path, dst->buf, dst->len, dst, ca.crlf_action, conv_flags);1473ident_to_git(dst->buf, dst->len, dst, ca.ident);1474}
1475
1476static int convert_to_working_tree_ca_internal(const struct conv_attrs *ca,1477const char *path, const char *src,1478size_t len, struct strbuf *dst,1479int normalizing,1480const struct checkout_metadata *meta,1481struct delayed_checkout *dco)1482{
1483int ret = 0, ret_filter = 0;1484
1485ret |= ident_to_worktree(src, len, dst, ca->ident);1486if (ret) {1487src = dst->buf;1488len = dst->len;1489}1490/*1491* CRLF conversion can be skipped if normalizing, unless there
1492* is a smudge or process filter (even if the process filter doesn't
1493* support smudge). The filters might expect CRLFs.
1494*/
1495if ((ca->drv && (ca->drv->smudge || ca->drv->process)) || !normalizing) {1496ret |= crlf_to_worktree(src, len, dst, ca->crlf_action);1497if (ret) {1498src = dst->buf;1499len = dst->len;1500}1501}1502
1503ret |= encode_to_worktree(path, src, len, dst, ca->working_tree_encoding);1504if (ret) {1505src = dst->buf;1506len = dst->len;1507}1508
1509ret_filter = apply_filter(1510path, src, len, -1, dst, ca->drv, CAP_SMUDGE, meta, dco);1511if (!ret_filter && ca->drv && ca->drv->required)1512die(_("%s: smudge filter %s failed"), path, ca->drv->name);1513
1514return ret | ret_filter;1515}
1516
1517int async_convert_to_working_tree_ca(const struct conv_attrs *ca,1518const char *path, const char *src,1519size_t len, struct strbuf *dst,1520const struct checkout_metadata *meta,1521void *dco)1522{
1523return convert_to_working_tree_ca_internal(ca, path, src, len, dst, 0,1524meta, dco);1525}
1526
1527int convert_to_working_tree_ca(const struct conv_attrs *ca,1528const char *path, const char *src,1529size_t len, struct strbuf *dst,1530const struct checkout_metadata *meta)1531{
1532return convert_to_working_tree_ca_internal(ca, path, src, len, dst, 0,1533meta, NULL);1534}
1535
1536int renormalize_buffer(struct index_state *istate, const char *path,1537const char *src, size_t len, struct strbuf *dst)1538{
1539struct conv_attrs ca;1540int ret;1541
1542convert_attrs(istate, &ca, path);1543ret = convert_to_working_tree_ca_internal(&ca, path, src, len, dst, 1,1544NULL, NULL);1545if (ret) {1546src = dst->buf;1547len = dst->len;1548}1549return ret | convert_to_git(istate, path, src, len, dst, CONV_EOL_RENORMALIZE);1550}
1551
1552/*****************************************************************
1553*
1554* Streaming conversion support
1555*
1556*****************************************************************/
1557
1558typedef int (*filter_fn)(struct stream_filter *,1559const char *input, size_t *isize_p,1560char *output, size_t *osize_p);1561typedef void (*free_fn)(struct stream_filter *);1562
1563struct stream_filter_vtbl {1564filter_fn filter;1565free_fn free;1566};1567
1568struct stream_filter {1569struct stream_filter_vtbl *vtbl;1570};1571
1572static int null_filter_fn(struct stream_filter *filter UNUSED,1573const char *input, size_t *isize_p,1574char *output, size_t *osize_p)1575{
1576size_t count;1577
1578if (!input)1579return 0; /* we do not keep any states */1580count = *isize_p;1581if (*osize_p < count)1582count = *osize_p;1583if (count) {1584memmove(output, input, count);1585*isize_p -= count;1586*osize_p -= count;1587}1588return 0;1589}
1590
1591static void null_free_fn(struct stream_filter *filter UNUSED)1592{
1593; /* nothing -- null instances are shared */1594}
1595
1596static struct stream_filter_vtbl null_vtbl = {1597.filter = null_filter_fn,1598.free = null_free_fn,1599};1600
1601static struct stream_filter null_filter_singleton = {1602.vtbl = &null_vtbl,1603};1604
1605int is_null_stream_filter(struct stream_filter *filter)1606{
1607return filter == &null_filter_singleton;1608}
1609
1610
1611/*
1612* LF-to-CRLF filter
1613*/
1614
1615struct lf_to_crlf_filter {1616struct stream_filter filter;1617unsigned has_held:1;1618char held;1619};1620
1621static int lf_to_crlf_filter_fn(struct stream_filter *filter,1622const char *input, size_t *isize_p,1623char *output, size_t *osize_p)1624{
1625size_t count, o = 0;1626struct lf_to_crlf_filter *lf_to_crlf = (struct lf_to_crlf_filter *)filter;1627
1628/*1629* We may be holding onto the CR to see if it is followed by a
1630* LF, in which case we would need to go to the main loop.
1631* Otherwise, just emit it to the output stream.
1632*/
1633if (lf_to_crlf->has_held && (lf_to_crlf->held != '\r' || !input)) {1634output[o++] = lf_to_crlf->held;1635lf_to_crlf->has_held = 0;1636}1637
1638/* We are told to drain */1639if (!input) {1640*osize_p -= o;1641return 0;1642}1643
1644count = *isize_p;1645if (count || lf_to_crlf->has_held) {1646size_t i;1647int was_cr = 0;1648
1649if (lf_to_crlf->has_held) {1650was_cr = 1;1651lf_to_crlf->has_held = 0;1652}1653
1654for (i = 0; o < *osize_p && i < count; i++) {1655char ch = input[i];1656
1657if (ch == '\n') {1658output[o++] = '\r';1659} else if (was_cr) {1660/*1661* Previous round saw CR and it is not followed
1662* by a LF; emit the CR before processing the
1663* current character.
1664*/
1665output[o++] = '\r';1666}1667
1668/*1669* We may have consumed the last output slot,
1670* in which case we need to break out of this
1671* loop; hold the current character before
1672* returning.
1673*/
1674if (*osize_p <= o) {1675lf_to_crlf->has_held = 1;1676lf_to_crlf->held = ch;1677continue; /* break but increment i */1678}1679
1680if (ch == '\r') {1681was_cr = 1;1682continue;1683}1684
1685was_cr = 0;1686output[o++] = ch;1687}1688
1689*osize_p -= o;1690*isize_p -= i;1691
1692if (!lf_to_crlf->has_held && was_cr) {1693lf_to_crlf->has_held = 1;1694lf_to_crlf->held = '\r';1695}1696}1697return 0;1698}
1699
1700static void lf_to_crlf_free_fn(struct stream_filter *filter)1701{
1702free(filter);1703}
1704
1705static struct stream_filter_vtbl lf_to_crlf_vtbl = {1706.filter = lf_to_crlf_filter_fn,1707.free = lf_to_crlf_free_fn,1708};1709
1710static struct stream_filter *lf_to_crlf_filter(void)1711{
1712struct lf_to_crlf_filter *lf_to_crlf = xcalloc(1, sizeof(*lf_to_crlf));1713
1714lf_to_crlf->filter.vtbl = &lf_to_crlf_vtbl;1715return (struct stream_filter *)lf_to_crlf;1716}
1717
1718/*
1719* Cascade filter
1720*/
1721#define FILTER_BUFFER 10241722struct cascade_filter {1723struct stream_filter filter;1724struct stream_filter *one;1725struct stream_filter *two;1726char buf[FILTER_BUFFER];1727int end, ptr;1728};1729
1730static int cascade_filter_fn(struct stream_filter *filter,1731const char *input, size_t *isize_p,1732char *output, size_t *osize_p)1733{
1734struct cascade_filter *cas = (struct cascade_filter *) filter;1735size_t filled = 0;1736size_t sz = *osize_p;1737size_t to_feed, remaining;1738
1739/*1740* input -- (one) --> buf -- (two) --> output
1741*/
1742while (filled < sz) {1743remaining = sz - filled;1744
1745/* do we already have something to feed two with? */1746if (cas->ptr < cas->end) {1747to_feed = cas->end - cas->ptr;1748if (stream_filter(cas->two,1749cas->buf + cas->ptr, &to_feed,1750output + filled, &remaining))1751return -1;1752cas->ptr += (cas->end - cas->ptr) - to_feed;1753filled = sz - remaining;1754continue;1755}1756
1757/* feed one from upstream and have it emit into our buffer */1758to_feed = input ? *isize_p : 0;1759if (input && !to_feed)1760break;1761remaining = sizeof(cas->buf);1762if (stream_filter(cas->one,1763input, &to_feed,1764cas->buf, &remaining))1765return -1;1766cas->end = sizeof(cas->buf) - remaining;1767cas->ptr = 0;1768if (input) {1769size_t fed = *isize_p - to_feed;1770*isize_p -= fed;1771input += fed;1772}1773
1774/* do we know that we drained one completely? */1775if (input || cas->end)1776continue;1777
1778/* tell two to drain; we have nothing more to give it */1779to_feed = 0;1780remaining = sz - filled;1781if (stream_filter(cas->two,1782NULL, &to_feed,1783output + filled, &remaining))1784return -1;1785if (remaining == (sz - filled))1786break; /* completely drained two */1787filled = sz - remaining;1788}1789*osize_p -= filled;1790return 0;1791}
1792
1793static void cascade_free_fn(struct stream_filter *filter)1794{
1795struct cascade_filter *cas = (struct cascade_filter *)filter;1796free_stream_filter(cas->one);1797free_stream_filter(cas->two);1798free(filter);1799}
1800
1801static struct stream_filter_vtbl cascade_vtbl = {1802.filter = cascade_filter_fn,1803.free = cascade_free_fn,1804};1805
1806static struct stream_filter *cascade_filter(struct stream_filter *one,1807struct stream_filter *two)1808{
1809struct cascade_filter *cascade;1810
1811if (!one || is_null_stream_filter(one))1812return two;1813if (!two || is_null_stream_filter(two))1814return one;1815
1816cascade = xmalloc(sizeof(*cascade));1817cascade->one = one;1818cascade->two = two;1819cascade->end = cascade->ptr = 0;1820cascade->filter.vtbl = &cascade_vtbl;1821return (struct stream_filter *)cascade;1822}
1823
1824/*
1825* ident filter
1826*/
1827#define IDENT_DRAINING (-1)1828#define IDENT_SKIPPING (-2)1829struct ident_filter {1830struct stream_filter filter;1831struct strbuf left;1832int state;1833char ident[GIT_MAX_HEXSZ + 5]; /* ": x40 $" */1834};1835
1836static int is_foreign_ident(const char *str)1837{
1838int i;1839
1840if (!skip_prefix(str, "$Id: ", &str))1841return 0;1842for (i = 0; str[i]; i++) {1843if (isspace(str[i]) && str[i+1] != '$')1844return 1;1845}1846return 0;1847}
1848
1849static void ident_drain(struct ident_filter *ident, char **output_p, size_t *osize_p)1850{
1851size_t to_drain = ident->left.len;1852
1853if (*osize_p < to_drain)1854to_drain = *osize_p;1855if (to_drain) {1856memcpy(*output_p, ident->left.buf, to_drain);1857strbuf_remove(&ident->left, 0, to_drain);1858*output_p += to_drain;1859*osize_p -= to_drain;1860}1861if (!ident->left.len)1862ident->state = 0;1863}
1864
1865static int ident_filter_fn(struct stream_filter *filter,1866const char *input, size_t *isize_p,1867char *output, size_t *osize_p)1868{
1869struct ident_filter *ident = (struct ident_filter *)filter;1870static const char head[] = "$Id";1871
1872if (!input) {1873/* drain upon eof */1874switch (ident->state) {1875default:1876strbuf_add(&ident->left, head, ident->state);1877/* fallthrough */1878case IDENT_SKIPPING:1879/* fallthrough */1880case IDENT_DRAINING:1881ident_drain(ident, &output, osize_p);1882}1883return 0;1884}1885
1886while (*isize_p || (ident->state == IDENT_DRAINING)) {1887int ch;1888
1889if (ident->state == IDENT_DRAINING) {1890ident_drain(ident, &output, osize_p);1891if (!*osize_p)1892break;1893continue;1894}1895
1896ch = *(input++);1897(*isize_p)--;1898
1899if (ident->state == IDENT_SKIPPING) {1900/*1901* Skipping until '$' or LF, but keeping them
1902* in case it is a foreign ident.
1903*/
1904strbuf_addch(&ident->left, ch);1905if (ch != '\n' && ch != '$')1906continue;1907if (ch == '$' && !is_foreign_ident(ident->left.buf)) {1908strbuf_setlen(&ident->left, sizeof(head) - 1);1909strbuf_addstr(&ident->left, ident->ident);1910}1911ident->state = IDENT_DRAINING;1912continue;1913}1914
1915if (ident->state < sizeof(head) &&1916head[ident->state] == ch) {1917ident->state++;1918continue;1919}1920
1921if (ident->state)1922strbuf_add(&ident->left, head, ident->state);1923if (ident->state == sizeof(head) - 1) {1924if (ch != ':' && ch != '$') {1925strbuf_addch(&ident->left, ch);1926ident->state = 0;1927continue;1928}1929
1930if (ch == ':') {1931strbuf_addch(&ident->left, ch);1932ident->state = IDENT_SKIPPING;1933} else {1934strbuf_addstr(&ident->left, ident->ident);1935ident->state = IDENT_DRAINING;1936}1937continue;1938}1939
1940strbuf_addch(&ident->left, ch);1941ident->state = IDENT_DRAINING;1942}1943return 0;1944}
1945
1946static void ident_free_fn(struct stream_filter *filter)1947{
1948struct ident_filter *ident = (struct ident_filter *)filter;1949strbuf_release(&ident->left);1950free(filter);1951}
1952
1953static struct stream_filter_vtbl ident_vtbl = {1954.filter = ident_filter_fn,1955.free = ident_free_fn,1956};1957
1958static struct stream_filter *ident_filter(const struct object_id *oid)1959{
1960struct ident_filter *ident = xmalloc(sizeof(*ident));1961
1962xsnprintf(ident->ident, sizeof(ident->ident),1963": %s $", oid_to_hex(oid));1964strbuf_init(&ident->left, 0);1965ident->filter.vtbl = &ident_vtbl;1966ident->state = 0;1967return (struct stream_filter *)ident;1968}
1969
1970/*
1971* Return an appropriately constructed filter for the given ca, or NULL if
1972* the contents cannot be filtered without reading the whole thing
1973* in-core.
1974*
1975* Note that you would be crazy to set CRLF, smudge/clean or ident to a
1976* large binary blob you would want us not to slurp into the memory!
1977*/
1978struct stream_filter *get_stream_filter_ca(const struct conv_attrs *ca,1979const struct object_id *oid)1980{
1981struct stream_filter *filter = NULL;1982
1983if (classify_conv_attrs(ca) != CA_CLASS_STREAMABLE)1984return NULL;1985
1986if (ca->ident)1987filter = ident_filter(oid);1988
1989if (output_eol(ca->crlf_action) == EOL_CRLF)1990filter = cascade_filter(filter, lf_to_crlf_filter());1991else1992filter = cascade_filter(filter, &null_filter_singleton);1993
1994return filter;1995}
1996
1997struct stream_filter *get_stream_filter(struct index_state *istate,1998const char *path,1999const struct object_id *oid)2000{
2001struct conv_attrs ca;2002convert_attrs(istate, &ca, path);2003return get_stream_filter_ca(&ca, oid);2004}
2005
2006void free_stream_filter(struct stream_filter *filter)2007{
2008filter->vtbl->free(filter);2009}
2010
2011int stream_filter(struct stream_filter *filter,2012const char *input, size_t *isize_p,2013char *output, size_t *osize_p)2014{
2015return filter->vtbl->filter(filter, input, isize_p, output, osize_p);2016}
2017
2018void init_checkout_metadata(struct checkout_metadata *meta, const char *refname,2019const struct object_id *treeish,2020const struct object_id *blob)2021{
2022memset(meta, 0, sizeof(*meta));2023if (refname)2024meta->refname = refname;2025if (treeish)2026oidcpy(&meta->treeish, treeish);2027if (blob)2028oidcpy(&meta->blob, blob);2029}
2030
2031void clone_checkout_metadata(struct checkout_metadata *dst,2032const struct checkout_metadata *src,2033const struct object_id *blob)2034{
2035memcpy(dst, src, sizeof(*dst));2036if (blob)2037oidcpy(&dst->blob, blob);2038}
2039
2040enum conv_attrs_classification classify_conv_attrs(const struct conv_attrs *ca)2041{
2042if (ca->drv) {2043if (ca->drv->process)2044return CA_CLASS_INCORE_PROCESS;2045if (ca->drv->smudge || ca->drv->clean)2046return CA_CLASS_INCORE_FILTER;2047}2048
2049if (ca->working_tree_encoding)2050return CA_CLASS_INCORE;2051
2052if (ca->crlf_action == CRLF_AUTO || ca->crlf_action == CRLF_AUTO_CRLF)2053return CA_CLASS_INCORE;2054
2055return CA_CLASS_STREAMABLE;2056}
2057