scikit-image
71 строка · 1.6 Кб
1#!/usr/bin/env python
2# Requires package 'editdistance'
3
4# A mailmap file is used (by GitHub and other tools) to associate multiple
5# commit emails with one user. This helps to count number of commits,
6# contributors, etc.
7
8import subprocess9import shlex10import numpy as np11from collections import defaultdict12
13from editdistance import eval as dist14
15threshold = 516
17
18def call(cmd):19return subprocess.check_output(shlex.split(cmd), text=True).split('\n')20
21
22def _clean_email(email):23if '@' not in email:24return25
26name, domain = email.split('@')27name = name.split('+', 1)[0]28
29return f'{name}@{domain}'.lower()30
31
32call("rm -f .mailmap")33authors = call("git log --format='%aN::%aE'")34
35names, emails = [], []36
37for name, email in (author.split('::') for author in authors if author.strip()):38if email not in emails:39names.append(name)40emails.append(email)41
42N = len(names)43D = np.zeros((N, N)) + np.inf44
45for i in range(1, N):46for j in range(i):47D[i, j] = dist(names[i], names[j])48
49for i in range(N):50(dupes,) = np.where(D[:, i] < threshold)51for j in dupes:52names[j] = names[i]53
54mailmap = defaultdict(set)55for name, email in zip(names, emails):56email = _clean_email(email)57if email:58mailmap[name].add(email)59
60for key, value in list(mailmap.items()):61if len(value) < 2 or (len(key.split()) < 2):62mailmap.pop(key)63
64entries = []65for name, emails in mailmap.items():66entries.append([name])67entries[-1].extend([f'<{email}>' for email in emails])68
69entries = sorted(entries, key=lambda x: x[0].split()[-1])70for entry in entries:71print(' '.join(entry))72