1#!/usr/bin/env python 2'''A source code database 3 4 SourceDB is a database of file information used to determine whether files 5 should be rebuilt by the build system. All files names are stored relative 6 to a given root, which is intended as the root of a Project. 7 8 Relative or absolute pathnames may be used as keys, but absolute pathnames 9 must fall under the database root. The value format is a tuple of the following: 10 11 Checksum: The md5 checksum of the file 12 Mod Time: The time the file was last modified 13 Timestamp: The time theentry was last modified 14 Dependencies: A tuple of files upon which this entry depends 15 16 This script also provides some default actions: 17 18 - insert <database file> <filename> 19 Inserts this file from the database, or updates its entry if it 20 already exists. 21 22 - remove <database file> <filename> 23 Removes this file from the database. The filename may also be a 24 regular expression. 25 26''' 27from __future__ import print_function 28from __future__ import absolute_import 29import logger 30 31import errno 32import os 33import re 34import time 35 36import pickle 37 38try: 39 from hashlib import md5 as new_md5 40except ImportError: 41 from md5 import new as new_md5 # novermin 42 43 44class SourceDB (dict, logger.Logger): 45 '''A SourceDB is a dictionary of file data used during the build process.''' 46 includeRE = re.compile(r'^#include (<|")(?P<includeFile>.+)\1') 47 isLoading = 0 48 49 def __init__(self, root, filename = None): 50 dict.__init__(self) 51 logger.Logger.__init__(self) 52 self.root = root 53 self.filename = filename 54 if self.filename is None: 55 self.filename = os.path.join(str(root), 'bsSource.db') 56 self.isDirty = 0 57 return 58 59 def __str__(self): 60 output = '' 61 for source in self: 62 (checksum, mtime, timestamp, dependencies) = self[source] 63 output += source+'\n' 64 output += ' Checksum: '+str(checksum)+'\n' 65 output += ' Mod Time: '+str(mtime)+'\n' 66 output += ' Timestamp: '+str(timestamp)+'\n' 67 output += ' Deps: '+str(dependencies)+'\n' 68 return output 69 70 def __setstate__(self, d): 71 logger.Logger.__setstate__(self, d) 72 # We have to prevent recursive calls to this when the pickled database is loaded in load() 73 # This is to ensure that fresh copies of the database are obtained after unpickling 74 if not SourceDB.isLoading: 75 SourceDB.isLoading = 1 76 self.load() 77 SourceDB.isLoading = 0 78 return 79 80 def getRelativePath(self, path): 81 '''Returns a relative source file path using the root''' 82 if os.path.isabs(path): 83 root = str(self.root) 84 if not path.startswith(root+os.sep): 85 raise ValueError('Absolute path '+path+' conflicts with root '+root) 86 else: 87 path = path[len(root)+1:] 88 return path 89 90 def checkValue(self, value): 91 '''Validate the value, raising ValueError for problems''' 92 if not isinstance(value, tuple): 93 raise ValueError('Source database values must be tuples, '+str(type(value))+' given') 94 if not len(value) == 4: 95 raise ValueError('Source database values must have 4 items, '+str(len(value))+' given') 96 (checksum, mtime, timestamp, dependencies) = value 97 if not isinstance(checksum, str): 98 raise ValueError('Invalid checksum for source database, '+str(type(checksum))+' given') 99 if not isinstance(mtime, int): 100 raise ValueError('Invalid modification time for source database, '+str(type(mtime))+' given') 101 elif mtime < 0: 102 raise ValueError('Negative modification time for source database, '+str(mtime)) 103 if not isinstance(timestamp, float): 104 raise ValueError('Invalid timestamp for source database, '+str(type(timestamp))+' given') 105 elif timestamp < 0: 106 raise ValueError('Negative timestamp for source database, '+str(timestamp)) 107 if not isinstance(dependencies, tuple): 108 raise ValueError('Invalid dependencies for source database, '+str(type(dependencies))+' given') 109 return value 110 111 def __getitem__(self, key): 112 '''Converts the key to a relative source file path using the root''' 113 return dict.__getitem__(self, self.getRelativePath(key)) 114 115 def __setitem__(self, key, value): 116 '''Converts the key to a relative source file path using the root, and checks the validity of the value''' 117 self.isDirty = 1 118 return dict.__setitem__(self, self.getRelativePath(key), self.checkValue(value)) 119 120 def __delitem__(self, key): 121 '''Converts the key to a relative source file path using the root''' 122 self.isDirty = 1 123 return dict.__delitem__(self, self.getRelativePath(key)) 124 125 def __contains__(self, key): 126 '''Converts the key to a relative source file path using the root''' 127 return dict.__contains__(self, self.getRelativePath(key)) 128 129 def has_key(self, key): 130 '''This method just calls self.__contains__(key)''' 131 return self.__contains__(key) 132 133 def items(self): 134 '''Converts each key to a relative source file path using the root''' 135 return [(self.getRelativePath(item[0]), item[1]) for item in dict.items(self)] 136 137 def keys(self): 138 '''Converts each key to a relative source file path using the root''' 139 return map(self.getRelativePath, dict.keys(self)) 140 141 def update(self, d): 142 '''Update the dictionary with the contents of d''' 143 self.isDirty = 1 144 for k in d: 145 self[k] = d[k] 146 return 147 148 def getChecksum(source, chunkSize = 1024*1024): 149 '''Return the md5 checksum for a given file, which may also be specified by its filename 150 - The chunkSize argument specifies the size of blocks read from the file''' 151 if hasattr(source, 'close'): 152 f = source 153 else: 154 f = open(source) 155 m = new_md5() 156 size = chunkSize 157 buf = f.read(size) 158 while buf: 159 m.update(buf) 160 buf = f.read(size) 161 f.close() 162 return m.hexdigest() 163 getChecksum = staticmethod(getChecksum) 164 165 def getModificationTime(source): 166 t = os.path.getmtime(source) 167 if isinstance(t, float): 168 t = int(t) 169 return t 170 getModificationTime = staticmethod(getModificationTime) 171 172 def updateSource(self, source, noChecksum = 0): 173 self.isDirty = 1 174 dependencies = () 175 try: 176 (checksum, mtime, timestamp, dependencies) = self[source] 177 except KeyError: 178 pass 179 self.logPrint('Updating '+source+' in source database', 3, 'sourceDB') 180 if noChecksum: 181 checksum = '' 182 else: 183 checksum = SourceDB.getChecksum(source) 184 self[source] = (checksum, SourceDB.getModificationTime(source), time.time(), dependencies) 185 return 186 187 def clearSource(self, source): 188 '''This removes source information, but preserved dependencies''' 189 if source in self: 190 self.isDirty = 1 191 self.logPrint('Clearing '+source+' from source database', 3, 'sourceDB') 192 (checksum, mtime, timestamp, dependencies) = self[source] 193 self[source] = ('', 0, time.time(), dependencies) 194 return 195 196 def getDependencies(self, source): 197 try: 198 (checksum, mtime, timestamp, dependencies) = self[source] 199 except KeyError: 200 dependencies = () 201 return dependencies 202 203 def addDependency(self, source, dependency): 204 self.isDirty = 1 205 dependencies = () 206 try: 207 (checksum, mtime, timestamp, dependencies) = self[source] 208 except KeyError: 209 checksum = '' 210 mtime = 0 211 if not dependency in dependencies: 212 self.logPrint('Adding dependency '+dependency+' to source '+source+' in source database', 3, 'sourceDB') 213 dependencies = dependencies+(dependency,) 214 self[source] = (checksum, mtime, time.time(), dependencies) 215 return 216 217 def calculateDependencies(self): 218 self.logPrint('Recalculating dependencies', 1, 'sourceDB') 219 for source in self: 220 self.logPrint('Calculating '+source, 3, 'sourceDB') 221 (checksum, mtime, timestamp, dependencies) = self[source] 222 newDep = [] 223 try: 224 file = open(source) 225 except IOError as e: 226 if e.errno == errno.ENOENT: 227 del self[source] 228 else: 229 raise e 230 comps = source.split('/') 231 for line in file: 232 m = self.includeRE.match(line) 233 if m: 234 filename = m.group('includeFile') 235 matchNum = 0 236 matchName = filename 237 self.logPrint(' Includes '+filename, 3, 'sourceDB') 238 for s in self: 239 if s.find(filename) >= 0: 240 self.logPrint(' Checking '+s, 3, 'sourceDB') 241 c = s.split('/') 242 for i in range(len(c)): 243 if not comps[i] == c[i]: break 244 if i > matchNum: 245 self.logPrint(' Choosing '+s+'('+str(i)+')', 3, 'sourceDB') 246 matchName = s 247 matchNum = i 248 newDep.append(matchName) 249 # Grep for #include, then put these files in a tuple, we can be recursive later in a fixpoint algorithm 250 self[source] = (checksum, mtime, timestamp, tuple(newDep)) 251 file.close() 252 253 def load(self): 254 '''Load the source database from the saved filename''' 255 filename = str(self.filename) 256 if os.path.exists(filename): 257 self.clear() 258 self.logPrint('Loading source database from '+filename, 2, 'sourceDB') 259 dbFile = open(filename) 260 newDB = pickle.load(dbFile) 261 dbFile.close() 262 self.update(newDB) 263 else: 264 self.logPrint('Could not load source database from '+filename, 1, 'sourceDB') 265 return 266 267 def save(self, force = 0): 268 '''Save the source database to a file. The saved database with have path names relative to the root.''' 269 if not self.isDirty and not force: 270 self.logPrint('No need to save source database in '+str(self.filename), 2, 'sourceDB') 271 return 272 filename = str(self.filename) 273 if os.path.exists(os.path.dirname(filename)): 274 self.logPrint('Saving source database in '+filename, 2, 'sourceDB') 275 dbFile = open(filename, 'w') 276 pickle.dump(self, dbFile) 277 dbFile.close() 278 self.isDirty = 0 279 else: 280 self.logPrint('Could not save source database in '+filename, 1, 'sourceDB') 281 return 282 283class DependencyAnalyzer (logger.Logger): 284 def __init__(self, sourceDB): 285 logger.Logger.__init__(self) 286 self.sourceDB = sourceDB 287 self.includeRE = re.compile(r'^#include (<|")(?P<includeFile>.+)\1') 288 return 289 290 def resolveDependency(self, source, dep): 291 if dep in self.sourceDB: return dep 292 # Choose the entry in sourceDB whose base matches dep, 293 # and who has the most path components in common with source 294 # This should be replaced by an appeal to cpp 295 matchNum = 0 296 matchName = dep 297 components = source.split(os.sep) 298 self.logPrint(' Includes '+filename, 3, 'sourceDB') 299 for s in self.sourceDB: 300 if s.find(dep) >= 0: 301 self.logPrint(' Checking '+s, 3, 'sourceDB') 302 comp = s.split(os.sep) 303 for i in range(len(comp)): 304 if not components[i] == comp[i]: break 305 if i > matchNum: 306 self.logPrint(' Choosing '+s+'('+str(i)+')', 3, 'sourceDB') 307 matchName = s 308 matchNum = i 309 if not matchName in self.sourceDB: raise RuntimeError('Invalid #include '+matchName+' in '+source) 310 return matchName 311 312 def getNeighbors(self, source): 313 file = open(source) 314 adj = [] 315 for line in file: 316 match = self.includeRE.match(line) 317 if match: 318 adj.append(self.resolveDependency(source, m.group('includeFile'))) 319 file.close() 320 return adj 321 322 def calculateDependencies(self): 323 '''Should this be a generator? 324 First assemble the DAG using #include relations 325 Then calculate the depdencies with all pairs shortest-path 326 - I think Floyd-Warshell and N-source Dijkstra are just as good 327 ''' 328 # Assembling DAG 329 dag = {} 330 for source in self.sourceDB: 331 try: 332 dag[source] = self.getNeighbors(self, source) 333 except IOError as e: 334 if e.errno == errno.ENOENT: 335 del self[source] 336 else: 337 raise e 338 # Finding all-pairs shortest path 339 340if __name__ == '__main__': 341 import sys 342 try: 343 if len(sys.argv) < 3: 344 print('sourceDatabase.py <database filename> [insert | remove] <filename>') 345 else: 346 if os.path.exists(sys.argv[1]): 347 dbFile = open(sys.argv[1]) 348 sourceDB = pickle.load(dbFile) 349 dbFile.close() 350 else: 351 sys.exit('Could not load source database from '+sys.argv[1]) 352 if sys.argv[2] == 'insert': 353 if sys.argv[3] in sourceDB: 354 self.logPrint('Updating '+sys.argv[3], 3, 'sourceDB') 355 else: 356 self.logPrint('Inserting '+sys.argv[3], 3, 'sourceDB') 357 self.sourceDB.updateSource(sys.argv[3]) 358 elif sys.argv[2] == 'remove': 359 if sys.argv[3] in sourceDB: 360 sourceDB.logPrint('Removing '+sys.argv[3], 3, 'sourceDB') 361 del self.sourceDB[sys.argv[3]] 362 else: 363 sourceDB.logPrint('Matching regular expression '+sys.argv[3]+' over source database', 1, 'sourceDB') 364 removeRE = re.compile(sys.argv[3]) 365 removes = list(filter(removeRE.match, sourceDB.keys())) 366 for source in removes: 367 self.logPrint('Removing '+source, 3, 'sourceDB') 368 del self.sourceDB[source] 369 else: 370 sys.exit('Unknown source database action: '+sys.argv[2]) 371 sourceDB.save() 372 except Exception as e: 373 import traceback 374 print(traceback.print_tb(sys.exc_info()[2])) 375 sys.exit(str(e)) 376 sys.exit(0) 377