1#! /usr/bin/env python
2# encoding: utf-8
3# Thomas Nagy, 2017-2018 (ita)
4
5"""
6A system for fast partial rebuilds
7
8Creating a large amount of task objects up front can take some time.
9By making a few assumptions, it is possible to avoid posting creating
10task objects for targets that are already up-to-date.
11
12On a silly benchmark the gain observed for 1M tasks can be 5m->10s
13for a single file change.
14
15Usage::
16
17	def options(opt):
18		opt.load('fast_partial')
19
20Assuptions:
21* Mostly for C/C++/Fortran targets with link tasks (object-only targets are not handled)
22* For full project builds: no --targets and no pruning from subfolders
23* The installation phase is ignored
24* `use=` dependencies are specified up front even across build groups
25* Task generator source files are not obtained from globs
26
27Implementation details:
28* The first layer obtains file timestamps to recalculate file hashes only
29  when necessary (similar to md5_tstamp); the timestamps are then stored
30  in a dedicated pickle file
31* A second layer associates each task generator to a file set to help
32  detecting changes. Task generators are to create their tasks only when
33  the related files have been modified. A specific db file is created
34  to store such data (5m -> 1m10)
35* A third layer binds build context proxies onto task generators, replacing
36  the default context. While loading data for the full build uses more memory
37  (4GB -> 9GB), partial builds are then much faster (1m10 -> 13s)
38* A fourth layer enables a 2-level cache on file signatures to
39  reduce the size of the main pickle file (13s -> 10s)
40"""
41
42import os
43from waflib import Build, Context, Errors, Logs, Task, TaskGen, Utils
44from waflib.TaskGen import feature, after_method, taskgen_method
45import waflib.Node
46
47DONE = 0
48DIRTY = 1
49NEEDED = 2
50
51SKIPPABLE = ['cshlib', 'cxxshlib', 'cstlib', 'cxxstlib', 'cprogram', 'cxxprogram']
52
53TSTAMP_DB = '.wafpickle_tstamp_db_file'
54
55SAVED_ATTRS = 'root node_sigs task_sigs imp_sigs raw_deps node_deps'.split()
56
57class bld_proxy(object):
58	def __init__(self, bld):
59		object.__setattr__(self, 'bld', bld)
60
61		object.__setattr__(self, 'node_class', type('Nod3', (waflib.Node.Node,), {}))
62		self.node_class.__module__ = 'waflib.Node'
63		self.node_class.ctx = self
64
65		object.__setattr__(self, 'root', self.node_class('', None))
66		for x in SAVED_ATTRS:
67			if x != 'root':
68				object.__setattr__(self, x, {})
69
70		self.fix_nodes()
71
72	def __setattr__(self, name, value):
73		bld = object.__getattribute__(self, 'bld')
74		setattr(bld, name, value)
75
76	def __delattr__(self, name):
77		bld = object.__getattribute__(self, 'bld')
78		delattr(bld, name)
79
80	def __getattribute__(self, name):
81		try:
82			return object.__getattribute__(self, name)
83		except AttributeError:
84			bld = object.__getattribute__(self, 'bld')
85			return getattr(bld, name)
86
87	def __call__(self, *k, **kw):
88		return self.bld(*k, **kw)
89
90	def fix_nodes(self):
91		for x in ('srcnode', 'path', 'bldnode'):
92			node = self.root.find_dir(getattr(self.bld, x).abspath())
93			object.__setattr__(self, x, node)
94
95	def set_key(self, store_key):
96		object.__setattr__(self, 'store_key', store_key)
97
98	def fix_tg_path(self, *tgs):
99		# changing Node objects on task generators is possible
100		# yet, all Node objects must belong to the same parent
101		for tg in tgs:
102			tg.path = self.root.make_node(tg.path.abspath())
103
104	def restore(self):
105		dbfn = os.path.join(self.variant_dir, Context.DBFILE + self.store_key)
106		Logs.debug('rev_use: reading %s', dbfn)
107		try:
108			data = Utils.readf(dbfn, 'rb')
109		except (EnvironmentError, EOFError):
110			# handle missing file/empty file
111			Logs.debug('rev_use: Could not load the build cache %s (missing)', dbfn)
112		else:
113			try:
114				waflib.Node.pickle_lock.acquire()
115				waflib.Node.Nod3 = self.node_class
116				try:
117					data = Build.cPickle.loads(data)
118				except Exception as e:
119					Logs.debug('rev_use: Could not pickle the build cache %s: %r', dbfn, e)
120				else:
121					for x in SAVED_ATTRS:
122						object.__setattr__(self, x, data.get(x, {}))
123			finally:
124				waflib.Node.pickle_lock.release()
125		self.fix_nodes()
126
127	def store(self):
128		data = {}
129		for x in Build.SAVED_ATTRS:
130			data[x] = getattr(self, x)
131		db = os.path.join(self.variant_dir, Context.DBFILE + self.store_key)
132
133		try:
134			waflib.Node.pickle_lock.acquire()
135			waflib.Node.Nod3 = self.node_class
136			x = Build.cPickle.dumps(data, Build.PROTOCOL)
137		finally:
138			waflib.Node.pickle_lock.release()
139
140		Logs.debug('rev_use: storing %s', db)
141		Utils.writef(db + '.tmp', x, m='wb')
142		try:
143			st = os.stat(db)
144			os.remove(db)
145			if not Utils.is_win32:
146				os.chown(db + '.tmp', st.st_uid, st.st_gid)
147		except (AttributeError, OSError):
148			pass
149		os.rename(db + '.tmp', db)
150
151class bld(Build.BuildContext):
152	def __init__(self, **kw):
153		super(bld, self).__init__(**kw)
154		self.hashes_md5_tstamp = {}
155
156	def __call__(self, *k, **kw):
157		# this is one way of doing it, one could use a task generator method too
158		bld = kw['bld'] = bld_proxy(self)
159		ret = TaskGen.task_gen(*k, **kw)
160		self.task_gen_cache_names = {}
161		self.add_to_group(ret, group=kw.get('group'))
162		ret.bld = bld
163		bld.set_key(ret.path.abspath().replace(os.sep, '') + str(ret.idx))
164		return ret
165
166	def is_dirty(self):
167		return True
168
169	def store_tstamps(self):
170		# Called after a build is finished
171		# For each task generator, record all files involved in task objects
172		# optimization: done only if there was something built
173		do_store = False
174		try:
175			f_deps = self.f_deps
176		except AttributeError:
177			f_deps = self.f_deps = {}
178			self.f_tstamps = {}
179
180		allfiles = set()
181		for g in self.groups:
182			for tg in g:
183				try:
184					staleness = tg.staleness
185				except AttributeError:
186					staleness = DIRTY
187
188				if staleness != DIRTY:
189					# DONE case: there was nothing built
190					# NEEDED case: the tg was brought in because of 'use' propagation
191					# but nothing really changed for them, there may be incomplete
192					# tasks (object files) and in this case it is best to let the next build
193					# figure out if an input/output file changed
194					continue
195
196				do_cache = False
197				for tsk in tg.tasks:
198					if tsk.hasrun == Task.SUCCESS:
199						do_cache = True
200						pass
201					elif tsk.hasrun == Task.SKIPPED:
202						pass
203					else:
204						# one failed task, clear the cache for this tg
205						try:
206							del f_deps[(tg.path.abspath(), tg.idx)]
207						except KeyError:
208							pass
209						else:
210							# just store the new state because there is a change
211							do_store = True
212
213						# skip the rest because there is no valid cache possible
214						break
215				else:
216					if not do_cache:
217						# all skipped, but is there anything in cache?
218						try:
219							f_deps[(tg.path.abspath(), tg.idx)]
220						except KeyError:
221							# probably cleared because a wscript file changed
222							# store it
223							do_cache = True
224
225					if do_cache:
226
227						# there was a rebuild, store the data structure too
228						tg.bld.store()
229
230						# all tasks skipped but no cache
231						# or a successful task build
232						do_store = True
233						st = set()
234						for tsk in tg.tasks:
235							st.update(tsk.inputs)
236							st.update(self.node_deps.get(tsk.uid(), []))
237
238						# TODO do last/when loading the tgs?
239						lst = []
240						for k in ('wscript', 'wscript_build'):
241							n = tg.path.find_node(k)
242							if n:
243								n.get_bld_sig()
244								lst.append(n.abspath())
245
246						lst.extend(sorted(x.abspath() for x in st))
247						allfiles.update(lst)
248						f_deps[(tg.path.abspath(), tg.idx)] = lst
249
250		for x in allfiles:
251			# f_tstamps has everything, while md5_tstamp can be relatively empty on partial builds
252			self.f_tstamps[x] = self.hashes_md5_tstamp[x][0]
253
254		if do_store:
255			dbfn = os.path.join(self.variant_dir, TSTAMP_DB)
256			Logs.debug('rev_use: storing %s', dbfn)
257			dbfn_tmp = dbfn + '.tmp'
258			x = Build.cPickle.dumps([self.f_tstamps, f_deps], Build.PROTOCOL)
259			Utils.writef(dbfn_tmp, x, m='wb')
260			os.rename(dbfn_tmp, dbfn)
261			Logs.debug('rev_use: stored %s', dbfn)
262
263	def store(self):
264		self.store_tstamps()
265		if self.producer.dirty:
266			Build.BuildContext.store(self)
267
268	def compute_needed_tgs(self):
269		# assume the 'use' keys are not modified during the build phase
270
271		dbfn = os.path.join(self.variant_dir, TSTAMP_DB)
272		Logs.debug('rev_use: Loading %s', dbfn)
273		try:
274			data = Utils.readf(dbfn, 'rb')
275		except (EnvironmentError, EOFError):
276			Logs.debug('rev_use: Could not load the build cache %s (missing)', dbfn)
277			self.f_deps = {}
278			self.f_tstamps = {}
279		else:
280			try:
281				self.f_tstamps, self.f_deps = Build.cPickle.loads(data)
282			except Exception as e:
283				Logs.debug('rev_use: Could not pickle the build cache %s: %r', dbfn, e)
284				self.f_deps = {}
285				self.f_tstamps = {}
286			else:
287				Logs.debug('rev_use: Loaded %s', dbfn)
288
289
290		# 1. obtain task generators that contain rebuilds
291		# 2. obtain the 'use' graph and its dual
292		stales = set()
293		reverse_use_map = Utils.defaultdict(list)
294		use_map = Utils.defaultdict(list)
295
296		for g in self.groups:
297			for tg in g:
298				if tg.is_stale():
299					stales.add(tg)
300
301				try:
302					lst = tg.use = Utils.to_list(tg.use)
303				except AttributeError:
304					pass
305				else:
306					for x in lst:
307						try:
308							xtg = self.get_tgen_by_name(x)
309						except Errors.WafError:
310							pass
311						else:
312							use_map[tg].append(xtg)
313							reverse_use_map[xtg].append(tg)
314
315		Logs.debug('rev_use: found %r stale tgs', len(stales))
316
317		# 3. dfs to post downstream tg as stale
318		visited = set()
319		def mark_down(tg):
320			if tg in visited:
321				return
322			visited.add(tg)
323			Logs.debug('rev_use: marking down %r as stale', tg.name)
324			tg.staleness = DIRTY
325			for x in reverse_use_map[tg]:
326				mark_down(x)
327		for tg in stales:
328			mark_down(tg)
329
330		# 4. dfs to find ancestors tg to mark as needed
331		self.needed_tgs = needed_tgs = set()
332		def mark_needed(tg):
333			if tg in needed_tgs:
334				return
335			needed_tgs.add(tg)
336			if tg.staleness == DONE:
337				Logs.debug('rev_use: marking up %r as needed', tg.name)
338				tg.staleness = NEEDED
339			for x in use_map[tg]:
340				mark_needed(x)
341		for xx in visited:
342			mark_needed(xx)
343
344		# so we have the whole tg trees to post in the set "needed"
345		# load their build trees
346		for tg in needed_tgs:
347			tg.bld.restore()
348			tg.bld.fix_tg_path(tg)
349
350		# the stale ones should be fully build, while the needed ones
351		# may skip a few tasks, see create_compiled_task and apply_link_after below
352		Logs.debug('rev_use: amount of needed task gens: %r', len(needed_tgs))
353
354	def post_group(self):
355		# assumption: we can ignore the folder/subfolders cuts
356		def tgpost(tg):
357			try:
358				f = tg.post
359			except AttributeError:
360				pass
361			else:
362				f()
363
364		if not self.targets or self.targets == '*':
365			for tg in self.groups[self.current_group]:
366				# this can cut quite a lot of tg objects
367				if tg in self.needed_tgs:
368					tgpost(tg)
369		else:
370			# default implementation
371			return Build.BuildContext.post_group()
372
373	def get_build_iterator(self):
374		if not self.targets or self.targets == '*':
375			self.compute_needed_tgs()
376		return Build.BuildContext.get_build_iterator(self)
377
378@taskgen_method
379def is_stale(self):
380	# assume no globs
381	self.staleness = DIRTY
382
383	# 1. the case of always stale targets
384	if getattr(self, 'always_stale', False):
385		return True
386
387	# 2. check if the db file exists
388	db = os.path.join(self.bld.variant_dir, Context.DBFILE)
389	try:
390		dbstat = os.stat(db).st_mtime
391	except OSError:
392		Logs.debug('rev_use: must post %r because this is a clean build')
393		return True
394
395	# 3. check if the configuration changed
396	if os.stat(self.bld.bldnode.find_node('c4che/build.config.py').abspath()).st_mtime > dbstat:
397		Logs.debug('rev_use: must post %r because the configuration has changed', self.name)
398		return True
399
400	# 3.a any tstamp data?
401	try:
402		f_deps = self.bld.f_deps
403	except AttributeError:
404		Logs.debug('rev_use: must post %r because there is no f_deps', self.name)
405		return True
406
407	# 4. check if this is the first build (no cache)
408	try:
409		lst = f_deps[(self.path.abspath(), self.idx)]
410	except KeyError:
411		Logs.debug('rev_use: must post %r because there it has no cached data', self.name)
412		return True
413
414	try:
415		cache = self.bld.cache_tstamp_rev_use
416	except AttributeError:
417		cache = self.bld.cache_tstamp_rev_use = {}
418
419	# 5. check the timestamp of each dependency files listed is unchanged
420	f_tstamps = self.bld.f_tstamps
421	for x in lst:
422		try:
423			old_ts = f_tstamps[x]
424		except KeyError:
425			Logs.debug('rev_use: must post %r because %r is not in cache', self.name, x)
426			return True
427
428		try:
429			try:
430				ts = cache[x]
431			except KeyError:
432				ts = cache[x] = os.stat(x).st_mtime
433		except OSError:
434			del f_deps[(self.path.abspath(), self.idx)]
435			Logs.debug('rev_use: must post %r because %r does not exist anymore', self.name, x)
436			return True
437		else:
438			if ts != old_ts:
439				Logs.debug('rev_use: must post %r because the timestamp on %r changed %r %r', self.name, x, old_ts, ts)
440				return True
441
442	self.staleness = DONE
443	return False
444
445@taskgen_method
446def create_compiled_task(self, name, node):
447	# skip the creation of object files
448	# assumption: object-only targets are not skippable
449	if self.staleness == NEEDED:
450		# only libraries/programs can skip object files
451		for x in SKIPPABLE:
452			if x in self.features:
453				return None
454
455	out = '%s.%d.o' % (node.name, self.idx)
456	task = self.create_task(name, node, node.parent.find_or_declare(out))
457	try:
458		self.compiled_tasks.append(task)
459	except AttributeError:
460		self.compiled_tasks = [task]
461	return task
462
463@feature(*SKIPPABLE)
464@after_method('apply_link')
465def apply_link_after(self):
466	# cprogram/cxxprogram might be unnecessary
467	if self.staleness != NEEDED:
468		return
469	for tsk in self.tasks:
470		tsk.hasrun = Task.SKIPPED
471
472def path_from(self, node):
473	# handle nodes of distinct types
474	if node.ctx is not self.ctx:
475		node = self.ctx.root.make_node(node.abspath())
476	return self.default_path_from(node)
477waflib.Node.Node.default_path_from = waflib.Node.Node.path_from
478waflib.Node.Node.path_from = path_from
479
480def h_file(self):
481	# similar to md5_tstamp.py, but with 2-layer cache
482	# global_cache for the build context common for all task generators
483	# local_cache for the build context proxy (one by task generator)
484	#
485	# the global cache is not persistent
486	# the local cache is persistent and meant for partial builds
487	#
488	# assume all calls are made from a single thread
489	#
490	filename = self.abspath()
491	st = os.stat(filename)
492
493	global_cache = self.ctx.bld.hashes_md5_tstamp
494	local_cache = self.ctx.hashes_md5_tstamp
495
496	if filename in global_cache:
497		# value already calculated in this build
498		cval = global_cache[filename]
499
500		# the value in global cache is assumed to be calculated once
501		# reverifying it could cause task generators
502		# to get distinct tstamp values, thus missing rebuilds
503		local_cache[filename] = cval
504		return cval[1]
505
506	if filename in local_cache:
507		cval = local_cache[filename]
508		if cval[0] == st.st_mtime:
509			# correct value from a previous build
510			# put it in the global cache
511			global_cache[filename] = cval
512			return cval[1]
513
514	ret = Utils.h_file(filename)
515	local_cache[filename] = global_cache[filename] = (st.st_mtime, ret)
516	return ret
517waflib.Node.Node.h_file = h_file
518
519