llvm-project
283 строки · 8.0 Кб
1#!/usr/bin/env python
2
3from __future__ import absolute_import, division, print_function
4import os
5import re
6import subprocess
7import sys
8import tempfile
9
10###
11
12
13class DeltaAlgorithm(object):
14def __init__(self):
15self.cache = set()
16
17def test(self, changes):
18abstract
19
20###
21
22def getTestResult(self, changes):
23# There is no reason to cache successful tests because we will
24# always reduce the changeset when we see one.
25
26changeset = frozenset(changes)
27if changeset in self.cache:
28return False
29elif not self.test(changes):
30self.cache.add(changeset)
31return False
32else:
33return True
34
35def run(self, changes, force=False):
36# Make sure the initial test passes, if not then (a) either
37# the user doesn't expect monotonicity, and we may end up
38# doing O(N^2) tests, or (b) the test is wrong. Avoid the
39# O(N^2) case unless user requests it.
40if not force:
41if not self.getTestResult(changes):
42raise ValueError("Initial test passed to delta fails.")
43
44# Check empty set first to quickly find poor test functions.
45if self.getTestResult(set()):
46return set()
47else:
48return self.delta(changes, self.split(changes))
49
50def split(self, S):
51"""split(set) -> [sets]
52
53Partition a set into one or two pieces.
54"""
55
56# There are many ways to split, we could do a better job with more
57# context information (but then the API becomes grosser).
58L = list(S)
59mid = len(L) // 2
60if mid == 0:
61return (L,)
62else:
63return L[:mid], L[mid:]
64
65def delta(self, c, sets):
66# assert(reduce(set.union, sets, set()) == c)
67
68# If there is nothing left we can remove, we are done.
69if len(sets) <= 1:
70return c
71
72# Look for a passing subset.
73res = self.search(c, sets)
74if res is not None:
75return res
76
77# Otherwise, partition sets if possible; if not we are done.
78refined = sum(map(list, map(self.split, sets)), [])
79if len(refined) == len(sets):
80return c
81
82return self.delta(c, refined)
83
84def search(self, c, sets):
85for i, S in enumerate(sets):
86# If test passes on this subset alone, recurse.
87if self.getTestResult(S):
88return self.delta(S, self.split(S))
89
90# Otherwise if we have more than two sets, see if test
91# pases without this subset.
92if len(sets) > 2:
93complement = sum(sets[:i] + sets[i + 1 :], [])
94if self.getTestResult(complement):
95return self.delta(complement, sets[:i] + sets[i + 1 :])
96
97
98###
99
100
101class Token(object):
102def __init__(self, type, data, flags, file, line, column):
103self.type = type
104self.data = data
105self.flags = flags
106self.file = file
107self.line = line
108self.column = column
109
110
111kTokenRE = re.compile(
112r"""([a-z_]+) '(.*)'\t(.*)\tLoc=<(.*):(.*):(.*)>""", re.DOTALL | re.MULTILINE
113)
114
115
116def getTokens(path):
117p = subprocess.Popen(
118["clang", "-dump-raw-tokens", path],
119stdin=subprocess.PIPE,
120stdout=subprocess.PIPE,
121stderr=subprocess.PIPE,
122)
123out, err = p.communicate()
124
125tokens = []
126collect = None
127for ln in err.split("\n"):
128# Silly programmers refuse to print in simple machine readable
129# formats. Whatever.
130if collect is None:
131collect = ln
132else:
133collect = collect + "\n" + ln
134if "Loc=<" in ln and ln.endswith(">"):
135ln, collect = collect, None
136tokens.append(Token(*kTokenRE.match(ln).groups()))
137
138return tokens
139
140
141###
142
143
144class TMBDDelta(DeltaAlgorithm):
145def __init__(self, testProgram, tokenLists, log):
146def patchName(name, suffix):
147base, ext = os.path.splitext(name)
148return base + "." + suffix + ext
149
150super(TMBDDelta, self).__init__()
151self.testProgram = testProgram
152self.tokenLists = tokenLists
153self.tempFiles = [patchName(f, "tmp") for f, _ in self.tokenLists]
154self.targetFiles = [patchName(f, "ok") for f, _ in self.tokenLists]
155self.log = log
156self.numTests = 0
157
158def writeFiles(self, changes, fileNames):
159assert len(fileNames) == len(self.tokenLists)
160byFile = [[] for i in self.tokenLists]
161for i, j in changes:
162byFile[i].append(j)
163
164for i, (file, tokens) in enumerate(self.tokenLists):
165f = open(fileNames[i], "w")
166for j in byFile[i]:
167f.write(tokens[j])
168f.close()
169
170return byFile
171
172def test(self, changes):
173self.numTests += 1
174
175byFile = self.writeFiles(changes, self.tempFiles)
176
177if self.log:
178print("TEST - ", end=" ", file=sys.stderr)
179if self.log > 1:
180for i, (file, _) in enumerate(self.tokenLists):
181indices = byFile[i]
182if i:
183sys.stderr.write("\n ")
184sys.stderr.write("%s:%d tokens: [" % (file, len(byFile[i])))
185prev = None
186for j in byFile[i]:
187if prev is None or j != prev + 1:
188if prev:
189sys.stderr.write("%d][" % prev)
190sys.stderr.write(str(j))
191sys.stderr.write(":")
192prev = j
193if byFile[i]:
194sys.stderr.write(str(byFile[i][-1]))
195sys.stderr.write("] ")
196else:
197print(
198", ".join(
199[
200"%s:%d tokens" % (file, len(byFile[i]))
201for i, (file, _) in enumerate(self.tokenLists)
202]
203),
204end=" ",
205file=sys.stderr,
206)
207
208p = subprocess.Popen([self.testProgram] + self.tempFiles)
209res = p.wait() == 0
210
211if res:
212self.writeFiles(changes, self.targetFiles)
213
214if self.log:
215print("=> %s" % res, file=sys.stderr)
216else:
217if res:
218print("\nSUCCESS (%d tokens)" % len(changes))
219else:
220sys.stderr.write(".")
221
222return res
223
224def run(self):
225res = super(TMBDDelta, self).run(
226[
227(i, j)
228for i, (file, tokens) in enumerate(self.tokenLists)
229for j in range(len(tokens))
230]
231)
232self.writeFiles(res, self.targetFiles)
233if not self.log:
234print(file=sys.stderr)
235return res
236
237
238def tokenBasedMultiDelta(program, files, log):
239# Read in the lists of tokens.
240tokenLists = [(file, [t.data for t in getTokens(file)]) for file in files]
241
242numTokens = sum([len(tokens) for _, tokens in tokenLists])
243print("Delta on %s with %d tokens." % (", ".join(files), numTokens))
244
245tbmd = TMBDDelta(program, tokenLists, log)
246
247res = tbmd.run()
248
249print(
250"Finished %s with %d tokens (in %d tests)."
251% (", ".join(tbmd.targetFiles), len(res), tbmd.numTests)
252)
253
254
255def main():
256from optparse import OptionParser, OptionGroup
257
258parser = OptionParser("%prog <test program> {files+}")
259parser.add_option(
260"",
261"--debug",
262dest="debugLevel",
263help="set debug level [default %default]",
264action="store",
265type=int,
266default=0,
267)
268(opts, args) = parser.parse_args()
269
270if len(args) <= 1:
271parser.error("Invalid number of arguments.")
272
273program, files = args[0], args[1:]
274
275md = tokenBasedMultiDelta(program, files, log=opts.debugLevel)
276
277
278if __name__ == "__main__":
279try:
280main()
281except KeyboardInterrupt:
282print("Interrupted.", file=sys.stderr)
283os._exit(1) # Avoid freeing our giant cache.
284