leaky-repo

benchmark.py
166 строк · 6.1 Кб
Перенос по словам
1
# For py2 compat
2
from __future__ import division
3
import os
4
import csv
5
import json
6
import subprocess
7
from io import StringIO
8
from subprocess import PIPE
9

10
def get_secret_counts():
11
    '''
12
    A generator for secrets in default files.
13
    :returns: filepath, risk_count, informative_count
14
    '''
15
    raw_csv = None
16
    with open('secrets.csv') as f:
17
        raw_csv = [l for l in f.readlines() 
18
                            if len(l.strip()) != 0 and not l.startswith('#')]
19
    # Parse array to CSV
20
    csv_reader = csv.reader(raw_csv, delimiter=',')
21
    for row in csv_reader:
22
        # Yield str, int, int.
23
        yield [row[0], int(row[1]), int(row[2])]
24

25
def get_command_stdout(cmd, cwd='..'):
26
    os.path.abspath(cwd)
27
    p = subprocess.Popen(cmd, stdout=PIPE, stderr=PIPE, cwd=cwd)
28
    stdout, stderr = p.communicate()
29
    return stdout.decode('utf-8'), stderr.decode('utf-8') if stderr else None
30

31
def get_secret_count_detectsecrets():
32
    finds = {}
33
    cmd = ['detect-secrets', 'scan']
34
    stdout, _ = get_command_stdout(cmd)
35
    results = json.loads(stdout).get('results')
36
    for key in results.keys():
37
        finds[key] = len(results.get(key))
38

39
    return cmd, finds
40

41
def get_secret_count_gitleaks():
42
    finds = {}
43
    cmd = ['gitleaks', '--config=.leaky-meta/gitleaks-config.toml', '--report=.leaky-meta/gitleaks.json', '--repo-path', '.']
44
    stdout, stderr = get_command_stdout(cmd)
45
    with open('gitleaks.json') as f:
46
        data = json.load(f)
47
    for obj in  data:
48
        filename = obj.get('file')
49
        if not filename in finds:
50
            finds[filename] = 0
51
        finds[filename] += 1
52
    
53
    # Clean up
54
    os.remove('gitleaks.json')
55
    return cmd, finds
56

57
def get_secret_count_trufflehog():
58
    finds = {}
59
    trufflehog_cmd = ['trufflehog', '--json', '--regex', '.']
60
    stdout, _ = get_command_stdout(trufflehog_cmd)
61
    for line in stdout.split('\n'):
62
        if len(line) == 0:
63
            # Skip empty lines
64
            continue
65
        obj = json.loads(line)
66
        finds[obj.get('path')] = len(obj.get('stringsFound'))
67

68
    return trufflehog_cmd, finds
69

70
def build_markdown_rows(secrets_function, expected_counts):
71
    dat = {}
72
    cmd, secrets = secrets_function()
73
    for row in expected_counts:
74
        name = row[0]
75
        expected = row[1] + row[2]
76
        if not name in secrets:
77
            dat[name] = {'name': name, 'found': 0, 'expected': expected, 'false_positives' :0 }
78
            continue
79

80
        found = secrets[name]
81
        # If found > expected, we have false positives. This will be negative or zero of there's no false positives.
82
        false_positives = found - expected
83
        # This will be zero or positive.
84
        false_positives = max(false_positives, 0)
85
        dat[name] = {'name': name, 'found': found, 'expected': expected, 'false_positives' :false_positives }
86
    return cmd, dat
87

88
def build_table_header(filename_cols):
89
    template = 'File Name{}|  Found/Total   | False Positives |\n{}|----------------|-----------------|\n'
90
    # 9 = len('File Name')
91
    return template.format(' ' * (filename_cols - 9), '-' * filename_cols)
92

93
def build_md_table(secrets_function):
94
    # {name}{padding}| {found}/{total} |{false positives}
95
    print_template = '{}{}| {}/{} | {}\n'
96

97
    expected_counts = [x for x in get_secret_counts()]
98
    # Get the max length of a filename, so we can put a column seperator after it
99
    sep_col = max([len(val[0]) for val in expected_counts]) + 2
100
    out = build_table_header(sep_col)
101
    total_files = len(expected_counts)
102
    
103
    cmd_used, md_rows = build_markdown_rows(secrets_function, expected_counts)
104
    md_rows = sorted(md_rows.items(), key=lambda val: -val[1]['found'])
105
    total_finds = 0
106
    total_expected = 0
107
    total_false_positives = 0
108
    files_covered = 0
109
    for dat in md_rows:
110
        obj = dat[1]
111
        name = obj.get('name')
112
        found = obj.get('found')
113
        expected = obj.get('expected')
114
        false_positives = obj.get('false_positives')
115

116
        # Determine right padding for name column
117
        right_padding = sep_col - len(name)
118
        right_padding_str = (' ' * right_padding)
119

120
        # For metrics we exclude false positives.
121
        total_finds += found - false_positives
122
        total_expected += expected
123
        total_false_positives += false_positives
124
        if found != 0:
125
            files_covered += 1
126

127
        out += print_template.format(name, right_padding_str, found, expected, false_positives)
128
    return cmd_used, total_files, files_covered, total_finds, total_expected, total_false_positives, out
129

130
def build_md(secrets_function, tool_url):
131
    header_fmt = 'Tool: {}  ' \
132
                 '\nCommand Used: `{}`  ' \
133
                 '\nFiles covered: {}/{} ({}% coverage)  ' \
134
                 '\nTotal finds: {}/{} ({}% coverage)  ' \
135
                 '\nFalse Positives: {}  ' \
136
                 '\n\n{}'
137
    
138
    cmd, total_files, files_covered, total_finds, \
139
     total_expected, false_positives, table = build_md_table(secrets_function)
140
    # Convert cmd to a string
141
    cmd = ' '.join(cmd)
142

143
    # Get a % coverage value
144
    file_coverage = (files_covered / total_files) * 100
145

146
    find_coverage = (total_finds / total_expected) * 100
147

148
    # Sanity!
149
    file_coverage = round(file_coverage, 2)
150
    find_coverage = round(find_coverage, 2)
151
    out = header_fmt.format(tool_url, cmd,
152
                           files_covered, total_files, file_coverage, 
153
                           total_finds, total_expected, find_coverage,
154
                           false_positives, table)
155
    return out
156

157
if __name__ == '__main__':
158
    detect_secrets = build_md(get_secret_count_detectsecrets, 'https://github.com/Yelp/detect-secrets')
159
    truffle_hog = build_md(get_secret_count_trufflehog, 'https://github.com/dxa4481/truffleHog')
160
    gitleaks = build_md(get_secret_count_gitleaks, 'https://github.com/zricethezav/gitleaks')
161
    with open('benchmarking' + os.path.sep + 'TRUFFLEHOG.md', 'w+') as f:
162
        f.write(truffle_hog)
163
    with open('benchmarking' + os.path.sep + 'DETECT-SECRETS.md', 'w+') as f:
164
        f.write(detect_secrets)
165
    with open('benchmarking' + os.path.sep + 'GITLEAKS.md', 'w+') as f:
166
        f.write(gitleaks)
167
leaky-repo

Использование cookies