1#!/usr/bin/env python 2 3from __future__ import absolute_import, division, print_function 4import os 5import re 6import subprocess 7import sys 8import tempfile 9 10### 11 12class DeltaAlgorithm(object): 13 def __init__(self): 14 self.cache = set() 15 16 def test(self, changes): 17 abstract 18 19 ### 20 21 def getTestResult(self, changes): 22 # There is no reason to cache successful tests because we will 23 # always reduce the changeset when we see one. 24 25 changeset = frozenset(changes) 26 if changeset in self.cache: 27 return False 28 elif not self.test(changes): 29 self.cache.add(changeset) 30 return False 31 else: 32 return True 33 34 def run(self, changes, force=False): 35 # Make sure the initial test passes, if not then (a) either 36 # the user doesn't expect monotonicity, and we may end up 37 # doing O(N^2) tests, or (b) the test is wrong. Avoid the 38 # O(N^2) case unless user requests it. 39 if not force: 40 if not self.getTestResult(changes): 41 raise ValueError('Initial test passed to delta fails.') 42 43 # Check empty set first to quickly find poor test functions. 44 if self.getTestResult(set()): 45 return set() 46 else: 47 return self.delta(changes, self.split(changes)) 48 49 def split(self, S): 50 """split(set) -> [sets] 51 52 Partition a set into one or two pieces. 53 """ 54 55 # There are many ways to split, we could do a better job with more 56 # context information (but then the API becomes grosser). 57 L = list(S) 58 mid = len(L)//2 59 if mid==0: 60 return L, 61 else: 62 return L[:mid],L[mid:] 63 64 def delta(self, c, sets): 65 # assert(reduce(set.union, sets, set()) == c) 66 67 # If there is nothing left we can remove, we are done. 68 if len(sets) <= 1: 69 return c 70 71 # Look for a passing subset. 72 res = self.search(c, sets) 73 if res is not None: 74 return res 75 76 # Otherwise, partition sets if possible; if not we are done. 77 refined = sum(map(list, map(self.split, sets)), []) 78 if len(refined) == len(sets): 79 return c 80 81 return self.delta(c, refined) 82 83 def search(self, c, sets): 84 for i,S in enumerate(sets): 85 # If test passes on this subset alone, recurse. 86 if self.getTestResult(S): 87 return self.delta(S, self.split(S)) 88 89 # Otherwise if we have more than two sets, see if test 90 # pases without this subset. 91 if len(sets) > 2: 92 complement = sum(sets[:i] + sets[i+1:],[]) 93 if self.getTestResult(complement): 94 return self.delta(complement, sets[:i] + sets[i+1:]) 95 96### 97 98class Token(object): 99 def __init__(self, type, data, flags, file, line, column): 100 self.type = type 101 self.data = data 102 self.flags = flags 103 self.file = file 104 self.line = line 105 self.column = column 106 107kTokenRE = re.compile(r"""([a-z_]+) '(.*)'\t(.*)\tLoc=<(.*):(.*):(.*)>""", 108 re.DOTALL | re.MULTILINE) 109 110def getTokens(path): 111 p = subprocess.Popen(['clang','-dump-raw-tokens',path], 112 stdin=subprocess.PIPE, 113 stdout=subprocess.PIPE, 114 stderr=subprocess.PIPE) 115 out,err = p.communicate() 116 117 tokens = [] 118 collect = None 119 for ln in err.split('\n'): 120 # Silly programmers refuse to print in simple machine readable 121 # formats. Whatever. 122 if collect is None: 123 collect = ln 124 else: 125 collect = collect + '\n' + ln 126 if 'Loc=<' in ln and ln.endswith('>'): 127 ln,collect = collect,None 128 tokens.append(Token(*kTokenRE.match(ln).groups())) 129 130 return tokens 131 132### 133 134class TMBDDelta(DeltaAlgorithm): 135 def __init__(self, testProgram, tokenLists, log): 136 def patchName(name, suffix): 137 base,ext = os.path.splitext(name) 138 return base + '.' + suffix + ext 139 super(TMBDDelta, self).__init__() 140 self.testProgram = testProgram 141 self.tokenLists = tokenLists 142 self.tempFiles = [patchName(f,'tmp') 143 for f,_ in self.tokenLists] 144 self.targetFiles = [patchName(f,'ok') 145 for f,_ in self.tokenLists] 146 self.log = log 147 self.numTests = 0 148 149 def writeFiles(self, changes, fileNames): 150 assert len(fileNames) == len(self.tokenLists) 151 byFile = [[] for i in self.tokenLists] 152 for i,j in changes: 153 byFile[i].append(j) 154 155 for i,(file,tokens) in enumerate(self.tokenLists): 156 f = open(fileNames[i],'w') 157 for j in byFile[i]: 158 f.write(tokens[j]) 159 f.close() 160 161 return byFile 162 163 def test(self, changes): 164 self.numTests += 1 165 166 byFile = self.writeFiles(changes, self.tempFiles) 167 168 if self.log: 169 print('TEST - ', end=' ', file=sys.stderr) 170 if self.log > 1: 171 for i,(file,_) in enumerate(self.tokenLists): 172 indices = byFile[i] 173 if i: 174 sys.stderr.write('\n ') 175 sys.stderr.write('%s:%d tokens: [' % (file,len(byFile[i]))) 176 prev = None 177 for j in byFile[i]: 178 if prev is None or j != prev + 1: 179 if prev: 180 sys.stderr.write('%d][' % prev) 181 sys.stderr.write(str(j)) 182 sys.stderr.write(':') 183 prev = j 184 if byFile[i]: 185 sys.stderr.write(str(byFile[i][-1])) 186 sys.stderr.write('] ') 187 else: 188 print(', '.join(['%s:%d tokens' % (file, len(byFile[i])) 189 for i,(file,_) in enumerate(self.tokenLists)]), end=' ', file=sys.stderr) 190 191 p = subprocess.Popen([self.testProgram] + self.tempFiles) 192 res = p.wait() == 0 193 194 if res: 195 self.writeFiles(changes, self.targetFiles) 196 197 if self.log: 198 print('=> %s' % res, file=sys.stderr) 199 else: 200 if res: 201 print('\nSUCCESS (%d tokens)' % len(changes)) 202 else: 203 sys.stderr.write('.') 204 205 return res 206 207 def run(self): 208 res = super(TMBDDelta, self).run([(i,j) 209 for i,(file,tokens) in enumerate(self.tokenLists) 210 for j in range(len(tokens))]) 211 self.writeFiles(res, self.targetFiles) 212 if not self.log: 213 print(file=sys.stderr) 214 return res 215 216def tokenBasedMultiDelta(program, files, log): 217 # Read in the lists of tokens. 218 tokenLists = [(file, [t.data for t in getTokens(file)]) 219 for file in files] 220 221 numTokens = sum([len(tokens) for _,tokens in tokenLists]) 222 print("Delta on %s with %d tokens." % (', '.join(files), numTokens)) 223 224 tbmd = TMBDDelta(program, tokenLists, log) 225 226 res = tbmd.run() 227 228 print("Finished %s with %d tokens (in %d tests)." % (', '.join(tbmd.targetFiles), 229 len(res), 230 tbmd.numTests)) 231 232def main(): 233 from optparse import OptionParser, OptionGroup 234 parser = OptionParser("%prog <test program> {files+}") 235 parser.add_option("", "--debug", dest="debugLevel", 236 help="set debug level [default %default]", 237 action="store", type=int, default=0) 238 (opts, args) = parser.parse_args() 239 240 if len(args) <= 1: 241 parser.error('Invalid number of arguments.') 242 243 program,files = args[0],args[1:] 244 245 md = tokenBasedMultiDelta(program, files, log=opts.debugLevel) 246 247if __name__ == '__main__': 248 try: 249 main() 250 except KeyboardInterrupt: 251 print('Interrupted.', file=sys.stderr) 252 os._exit(1) # Avoid freeing our giant cache. 253