leaky-repo
166 строк · 6.1 Кб
1# For py2 compat
2from __future__ import division
3import os
4import csv
5import json
6import subprocess
7from io import StringIO
8from subprocess import PIPE
9
10def get_secret_counts():
11'''
12A generator for secrets in default files.
13:returns: filepath, risk_count, informative_count
14'''
15raw_csv = None
16with open('secrets.csv') as f:
17raw_csv = [l for l in f.readlines()
18if len(l.strip()) != 0 and not l.startswith('#')]
19# Parse array to CSV
20csv_reader = csv.reader(raw_csv, delimiter=',')
21for row in csv_reader:
22# Yield str, int, int.
23yield [row[0], int(row[1]), int(row[2])]
24
25def get_command_stdout(cmd, cwd='..'):
26os.path.abspath(cwd)
27p = subprocess.Popen(cmd, stdout=PIPE, stderr=PIPE, cwd=cwd)
28stdout, stderr = p.communicate()
29return stdout.decode('utf-8'), stderr.decode('utf-8') if stderr else None
30
31def get_secret_count_detectsecrets():
32finds = {}
33cmd = ['detect-secrets', 'scan']
34stdout, _ = get_command_stdout(cmd)
35results = json.loads(stdout).get('results')
36for key in results.keys():
37finds[key] = len(results.get(key))
38
39return cmd, finds
40
41def get_secret_count_gitleaks():
42finds = {}
43cmd = ['gitleaks', '--config=.leaky-meta/gitleaks-config.toml', '--report=.leaky-meta/gitleaks.json', '--repo-path', '.']
44stdout, stderr = get_command_stdout(cmd)
45with open('gitleaks.json') as f:
46data = json.load(f)
47for obj in data:
48filename = obj.get('file')
49if not filename in finds:
50finds[filename] = 0
51finds[filename] += 1
52
53# Clean up
54os.remove('gitleaks.json')
55return cmd, finds
56
57def get_secret_count_trufflehog():
58finds = {}
59trufflehog_cmd = ['trufflehog', '--json', '--regex', '.']
60stdout, _ = get_command_stdout(trufflehog_cmd)
61for line in stdout.split('\n'):
62if len(line) == 0:
63# Skip empty lines
64continue
65obj = json.loads(line)
66finds[obj.get('path')] = len(obj.get('stringsFound'))
67
68return trufflehog_cmd, finds
69
70def build_markdown_rows(secrets_function, expected_counts):
71dat = {}
72cmd, secrets = secrets_function()
73for row in expected_counts:
74name = row[0]
75expected = row[1] + row[2]
76if not name in secrets:
77dat[name] = {'name': name, 'found': 0, 'expected': expected, 'false_positives' :0 }
78continue
79
80found = secrets[name]
81# If found > expected, we have false positives. This will be negative or zero of there's no false positives.
82false_positives = found - expected
83# This will be zero or positive.
84false_positives = max(false_positives, 0)
85dat[name] = {'name': name, 'found': found, 'expected': expected, 'false_positives' :false_positives }
86return cmd, dat
87
88def build_table_header(filename_cols):
89template = 'File Name{}| Found/Total | False Positives |\n{}|----------------|-----------------|\n'
90# 9 = len('File Name')
91return template.format(' ' * (filename_cols - 9), '-' * filename_cols)
92
93def build_md_table(secrets_function):
94# {name}{padding}| {found}/{total} |{false positives}
95print_template = '{}{}| {}/{} | {}\n'
96
97expected_counts = [x for x in get_secret_counts()]
98# Get the max length of a filename, so we can put a column seperator after it
99sep_col = max([len(val[0]) for val in expected_counts]) + 2
100out = build_table_header(sep_col)
101total_files = len(expected_counts)
102
103cmd_used, md_rows = build_markdown_rows(secrets_function, expected_counts)
104md_rows = sorted(md_rows.items(), key=lambda val: -val[1]['found'])
105total_finds = 0
106total_expected = 0
107total_false_positives = 0
108files_covered = 0
109for dat in md_rows:
110obj = dat[1]
111name = obj.get('name')
112found = obj.get('found')
113expected = obj.get('expected')
114false_positives = obj.get('false_positives')
115
116# Determine right padding for name column
117right_padding = sep_col - len(name)
118right_padding_str = (' ' * right_padding)
119
120# For metrics we exclude false positives.
121total_finds += found - false_positives
122total_expected += expected
123total_false_positives += false_positives
124if found != 0:
125files_covered += 1
126
127out += print_template.format(name, right_padding_str, found, expected, false_positives)
128return cmd_used, total_files, files_covered, total_finds, total_expected, total_false_positives, out
129
130def build_md(secrets_function, tool_url):
131header_fmt = 'Tool: {} ' \
132'\nCommand Used: `{}` ' \
133'\nFiles covered: {}/{} ({}% coverage) ' \
134'\nTotal finds: {}/{} ({}% coverage) ' \
135'\nFalse Positives: {} ' \
136'\n\n{}'
137
138cmd, total_files, files_covered, total_finds, \
139total_expected, false_positives, table = build_md_table(secrets_function)
140# Convert cmd to a string
141cmd = ' '.join(cmd)
142
143# Get a % coverage value
144file_coverage = (files_covered / total_files) * 100
145
146find_coverage = (total_finds / total_expected) * 100
147
148# Sanity!
149file_coverage = round(file_coverage, 2)
150find_coverage = round(find_coverage, 2)
151out = header_fmt.format(tool_url, cmd,
152files_covered, total_files, file_coverage,
153total_finds, total_expected, find_coverage,
154false_positives, table)
155return out
156
157if __name__ == '__main__':
158detect_secrets = build_md(get_secret_count_detectsecrets, 'https://github.com/Yelp/detect-secrets')
159truffle_hog = build_md(get_secret_count_trufflehog, 'https://github.com/dxa4481/truffleHog')
160gitleaks = build_md(get_secret_count_gitleaks, 'https://github.com/zricethezav/gitleaks')
161with open('benchmarking' + os.path.sep + 'TRUFFLEHOG.md', 'w+') as f:
162f.write(truffle_hog)
163with open('benchmarking' + os.path.sep + 'DETECT-SECRETS.md', 'w+') as f:
164f.write(detect_secrets)
165with open('benchmarking' + os.path.sep + 'GITLEAKS.md', 'w+') as f:
166f.write(gitleaks)
167