1#!/usr/bin/env python
2'''A source code database
3
4    SourceDB is a database of file information used to determine whether files
5    should be rebuilt by the build system. All files names are stored relative
6    to a given root, which is intended as the root of a Project.
7
8    Relative or absolute pathnames may be used as keys, but absolute pathnames
9    must fall under the database root. The value format is a tuple of the following:
10
11      Checksum:     The md5 checksum of the file
12      Mod Time:     The time the file was last modified
13      Timestamp:    The time theentry was last modified
14      Dependencies: A tuple of files upon which this entry depends
15
16    This script also provides some default actions:
17
18      - insert <database file> <filename>
19        Inserts this file from the database, or updates its entry if it
20        already exists.
21
22      - remove <database file> <filename>
23        Removes this file from the database. The filename may also be a
24        regular expression.
25
26'''
27from __future__ import print_function
28from __future__ import absolute_import
29import logger
30
31import errno
32import os
33import re
34import time
35
36import pickle
37
38try:
39  from hashlib import md5 as new_md5
40except ImportError:
41  from md5 import new as new_md5 # novermin
42
43
44class SourceDB (dict, logger.Logger):
45  '''A SourceDB is a dictionary of file data used during the build process.'''
46  includeRE = re.compile(r'^#include (<|")(?P<includeFile>.+)\1')
47  isLoading = 0
48
49  def __init__(self, root, filename = None):
50    dict.__init__(self)
51    logger.Logger.__init__(self)
52    self.root       = root
53    self.filename   = filename
54    if self.filename is None:
55      self.filename = os.path.join(str(root), 'bsSource.db')
56    self.isDirty    = 0
57    return
58
59  def __str__(self):
60    output = ''
61    for source in self:
62      (checksum, mtime, timestamp, dependencies) = self[source]
63      output += source+'\n'
64      output += '  Checksum:  '+str(checksum)+'\n'
65      output += '  Mod Time:  '+str(mtime)+'\n'
66      output += '  Timestamp: '+str(timestamp)+'\n'
67      output += '  Deps:      '+str(dependencies)+'\n'
68    return output
69
70  def __setstate__(self, d):
71    logger.Logger.__setstate__(self, d)
72    # We have to prevent recursive calls to this when the pickled database is loaded in load()
73    #   This is to ensure that fresh copies of the database are obtained after unpickling
74    if not SourceDB.isLoading:
75      SourceDB.isLoading = 1
76      self.load()
77      SourceDB.isLoading = 0
78    return
79
80  def getRelativePath(self, path):
81    '''Returns a relative source file path using the root'''
82    if os.path.isabs(path):
83      root = str(self.root)
84      if not path.startswith(root+os.sep):
85        raise ValueError('Absolute path '+path+' conflicts with root '+root)
86      else:
87        path = path[len(root)+1:]
88    return path
89
90  def checkValue(self, value):
91    '''Validate the value, raising ValueError for problems'''
92    if not isinstance(value, tuple):
93      raise ValueError('Source database values must be tuples, '+str(type(value))+' given')
94    if not len(value) == 4:
95      raise ValueError('Source database values must have 4 items, '+str(len(value))+' given')
96    (checksum, mtime, timestamp, dependencies) = value
97    if not isinstance(checksum, str):
98      raise ValueError('Invalid checksum for source database, '+str(type(checksum))+' given')
99    if not isinstance(mtime, int):
100      raise ValueError('Invalid modification time for source database, '+str(type(mtime))+' given')
101    elif mtime < 0:
102      raise ValueError('Negative modification time for source database, '+str(mtime))
103    if not isinstance(timestamp, float):
104      raise ValueError('Invalid timestamp for source database, '+str(type(timestamp))+' given')
105    elif timestamp < 0:
106      raise ValueError('Negative timestamp for source database, '+str(timestamp))
107    if not isinstance(dependencies, tuple):
108      raise ValueError('Invalid dependencies for source database, '+str(type(dependencies))+' given')
109    return value
110
111  def __getitem__(self, key):
112    '''Converts the key to a relative source file path using the root'''
113    return dict.__getitem__(self, self.getRelativePath(key))
114
115  def __setitem__(self, key, value):
116    '''Converts the key to a relative source file path using the root, and checks the validity of the value'''
117    self.isDirty = 1
118    return dict.__setitem__(self, self.getRelativePath(key), self.checkValue(value))
119
120  def __delitem__(self, key):
121    '''Converts the key to a relative source file path using the root'''
122    self.isDirty = 1
123    return dict.__delitem__(self, self.getRelativePath(key))
124
125  def __contains__(self, key):
126    '''Converts the key to a relative source file path using the root'''
127    return dict.__contains__(self, self.getRelativePath(key))
128
129  def has_key(self, key):
130    '''This method just calls self.__contains__(key)'''
131    return self.__contains__(key)
132
133  def items(self):
134    '''Converts each key to a relative source file path using the root'''
135    return [(self.getRelativePath(item[0]), item[1]) for item in dict.items(self)]
136
137  def keys(self):
138    '''Converts each key to a relative source file path using the root'''
139    return map(self.getRelativePath, dict.keys(self))
140
141  def update(self, d):
142    '''Update the dictionary with the contents of d'''
143    self.isDirty = 1
144    for k in d:
145      self[k] = d[k]
146    return
147
148  def getChecksum(source, chunkSize = 1024*1024):
149    '''Return the md5 checksum for a given file, which may also be specified by its filename
150       - The chunkSize argument specifies the size of blocks read from the file'''
151    if hasattr(source, 'close'):
152      f = source
153    else:
154      f = open(source)
155    m = new_md5()
156    size = chunkSize
157    buf  = f.read(size)
158    while buf:
159      m.update(buf)
160      buf = f.read(size)
161    f.close()
162    return m.hexdigest()
163  getChecksum = staticmethod(getChecksum)
164
165  def getModificationTime(source):
166    t = os.path.getmtime(source)
167    if isinstance(t, float):
168      t = int(t)
169    return t
170  getModificationTime = staticmethod(getModificationTime)
171
172  def updateSource(self, source, noChecksum = 0):
173    self.isDirty = 1
174    dependencies = ()
175    try:
176      (checksum, mtime, timestamp, dependencies) = self[source]
177    except KeyError:
178      pass
179    self.logPrint('Updating '+source+' in source database', 3, 'sourceDB')
180    if noChecksum:
181      checksum   = ''
182    else:
183      checksum   = SourceDB.getChecksum(source)
184    self[source] = (checksum, SourceDB.getModificationTime(source), time.time(), dependencies)
185    return
186
187  def clearSource(self, source):
188    '''This removes source information, but preserved dependencies'''
189    if source in self:
190      self.isDirty = 1
191      self.logPrint('Clearing '+source+' from source database', 3, 'sourceDB')
192      (checksum, mtime, timestamp, dependencies) = self[source]
193      self[source] = ('', 0, time.time(), dependencies)
194    return
195
196  def getDependencies(self, source):
197    try:
198      (checksum, mtime, timestamp, dependencies) = self[source]
199    except KeyError:
200      dependencies = ()
201    return dependencies
202
203  def addDependency(self, source, dependency):
204    self.isDirty = 1
205    dependencies = ()
206    try:
207      (checksum, mtime, timestamp, dependencies) = self[source]
208    except KeyError:
209      checksum = ''
210      mtime    = 0
211    if not dependency in dependencies:
212      self.logPrint('Adding dependency '+dependency+' to source '+source+' in source database', 3, 'sourceDB')
213      dependencies = dependencies+(dependency,)
214    self[source] = (checksum, mtime, time.time(), dependencies)
215    return
216
217  def calculateDependencies(self):
218    self.logPrint('Recalculating dependencies', 1, 'sourceDB')
219    for source in self:
220      self.logPrint('Calculating '+source, 3, 'sourceDB')
221      (checksum, mtime, timestamp, dependencies) = self[source]
222      newDep = []
223      try:
224        file = open(source)
225      except IOError as e:
226        if e.errno == errno.ENOENT:
227          del self[source]
228        else:
229          raise e
230      comps  = source.split('/')
231      for line in file:
232        m = self.includeRE.match(line)
233        if m:
234          filename  = m.group('includeFile')
235          matchNum  = 0
236          matchName = filename
237          self.logPrint('  Includes '+filename, 3, 'sourceDB')
238          for s in self:
239            if s.find(filename) >= 0:
240              self.logPrint('    Checking '+s, 3, 'sourceDB')
241              c = s.split('/')
242              for i in range(len(c)):
243                if not comps[i] == c[i]: break
244              if i > matchNum:
245                self.logPrint('    Choosing '+s+'('+str(i)+')', 3, 'sourceDB')
246                matchName = s
247                matchNum  = i
248          newDep.append(matchName)
249      # Grep for #include, then put these files in a tuple, we can be recursive later in a fixpoint algorithm
250      self[source] = (checksum, mtime, timestamp, tuple(newDep))
251      file.close()
252
253  def load(self):
254    '''Load the source database from the saved filename'''
255    filename = str(self.filename)
256    if os.path.exists(filename):
257      self.clear()
258      self.logPrint('Loading source database from '+filename, 2, 'sourceDB')
259      dbFile = open(filename)
260      newDB  = pickle.load(dbFile)
261      dbFile.close()
262      self.update(newDB)
263    else:
264      self.logPrint('Could not load source database from '+filename, 1, 'sourceDB')
265    return
266
267  def save(self, force = 0):
268    '''Save the source database to a file. The saved database with have path names relative to the root.'''
269    if not self.isDirty and not force:
270      self.logPrint('No need to save source database in '+str(self.filename), 2, 'sourceDB')
271      return
272    filename = str(self.filename)
273    if os.path.exists(os.path.dirname(filename)):
274      self.logPrint('Saving source database in '+filename, 2, 'sourceDB')
275      dbFile = open(filename, 'w')
276      pickle.dump(self, dbFile)
277      dbFile.close()
278      self.isDirty = 0
279    else:
280      self.logPrint('Could not save source database in '+filename, 1, 'sourceDB')
281    return
282
283class DependencyAnalyzer (logger.Logger):
284  def __init__(self, sourceDB):
285    logger.Logger.__init__(self)
286    self.sourceDB  = sourceDB
287    self.includeRE = re.compile(r'^#include (<|")(?P<includeFile>.+)\1')
288    return
289
290  def resolveDependency(self, source, dep):
291    if dep in self.sourceDB: return dep
292    # Choose the entry in sourceDB whose base matches dep,
293    #   and who has the most path components in common with source
294    # This should be replaced by an appeal to cpp
295    matchNum   = 0
296    matchName  = dep
297    components = source.split(os.sep)
298    self.logPrint('  Includes '+filename, 3, 'sourceDB')
299    for s in self.sourceDB:
300      if s.find(dep) >= 0:
301        self.logPrint('    Checking '+s, 3, 'sourceDB')
302        comp = s.split(os.sep)
303        for i in range(len(comp)):
304          if not components[i] == comp[i]: break
305        if i > matchNum:
306          self.logPrint('    Choosing '+s+'('+str(i)+')', 3, 'sourceDB')
307          matchName = s
308          matchNum  = i
309    if not matchName in self.sourceDB: raise RuntimeError('Invalid #include '+matchName+' in '+source)
310    return matchName
311
312  def getNeighbors(self, source):
313    file = open(source)
314    adj  = []
315    for line in file:
316      match = self.includeRE.match(line)
317      if match:
318        adj.append(self.resolveDependency(source, m.group('includeFile')))
319    file.close()
320    return adj
321
322  def calculateDependencies(self):
323    '''Should this be a generator?
324    First assemble the DAG using #include relations
325    Then calculate the depdencies with all pairs shortest-path
326      - I think Floyd-Warshell and N-source Dijkstra are just as good
327    '''
328    # Assembling DAG
329    dag = {}
330    for source in self.sourceDB:
331      try:
332        dag[source] = self.getNeighbors(self, source)
333      except IOError as e:
334        if e.errno == errno.ENOENT:
335          del self[source]
336        else:
337          raise e
338    # Finding all-pairs shortest path
339
340if __name__ == '__main__':
341  import sys
342  try:
343    if len(sys.argv) < 3:
344      print('sourceDatabase.py <database filename> [insert | remove] <filename>')
345    else:
346      if os.path.exists(sys.argv[1]):
347        dbFile   = open(sys.argv[1])
348        sourceDB = pickle.load(dbFile)
349        dbFile.close()
350      else:
351        sys.exit('Could not load source database from '+sys.argv[1])
352      if sys.argv[2] == 'insert':
353        if sys.argv[3] in sourceDB:
354          self.logPrint('Updating '+sys.argv[3], 3, 'sourceDB')
355        else:
356          self.logPrint('Inserting '+sys.argv[3], 3, 'sourceDB')
357        self.sourceDB.updateSource(sys.argv[3])
358      elif sys.argv[2] == 'remove':
359        if sys.argv[3] in sourceDB:
360          sourceDB.logPrint('Removing '+sys.argv[3], 3, 'sourceDB')
361          del self.sourceDB[sys.argv[3]]
362        else:
363          sourceDB.logPrint('Matching regular expression '+sys.argv[3]+' over source database', 1, 'sourceDB')
364          removeRE = re.compile(sys.argv[3])
365          removes  = list(filter(removeRE.match, sourceDB.keys()))
366          for source in removes:
367            self.logPrint('Removing '+source, 3, 'sourceDB')
368            del self.sourceDB[source]
369      else:
370        sys.exit('Unknown source database action: '+sys.argv[2])
371      sourceDB.save()
372  except Exception as e:
373    import traceback
374    print(traceback.print_tb(sys.exc_info()[2]))
375    sys.exit(str(e))
376  sys.exit(0)
377