1#!/usr/bin/env python3.7
2#
3#  BLIS
4#  An object-based framework for developing high-performance BLAS-like
5#  libraries.
6#
7#  Copyright (C) 2014, The University of Texas at Austin
8#
9#  Redistribution and use in source and binary forms, with or without
10#  modification, are permitted provided that the following conditions are
11#  met:
12#   - Redistributions of source code must retain the above copyright
13#     notice, this list of conditions and the following disclaimer.
14#   - Redistributions in binary form must reproduce the above copyright
15#     notice, this list of conditions and the following disclaimer in the
16#     documentation and/or other materials provided with the distribution.
17#   - Neither the name(s) of the copyright holder(s) nor the names of its
18#     contributors may be used to endorse or promote products derived
19#     from this software without specific prior written permission.
20#
21#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22#  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25#  HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26#  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28#  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29#  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32#
33#
34
35# Import modules
36import os
37import sys
38import getopt
39import re
40
41def print_usage():
42
43	my_print( " " )
44	my_print( " %s" % script_name )
45	my_print( " " )
46	my_print( " Field G. Van Zee" )
47	my_print( " " )
48	my_print( " Generate a monolithic header by recursively replacing all #include" )
49	my_print( " directives in a selected file with the contents of the header files" )
50	my_print( " they reference." )
51	my_print( " " )
52	my_print( " Usage:" )
53	my_print( " " )
54	my_print( "   %s header header_out temp_dir dir_list" % script_name )
55	my_print( " " )
56	my_print( " Arguments:" )
57	my_print( " " )
58	my_print( "   header        The filepath to the top-level header, which is the file" )
59	my_print( "                 that will #include all other header files." )
60	my_print( " " )
61	my_print( "   header_out    The filepath of the file into which the script will output" )
62	my_print( "                 the monolithic header." )
63	my_print( " " )
64	my_print( "   temp_dir      A directory in which temporary files may be created." )
65	my_print( "                 NOTE: No temporary files are created in the current" )
66	my_print( "                 implementation, but this argument must still be specified." )
67	my_print( " " )
68	my_print( "   dir_list      The list of directory paths in which to search for the" )
69	my_print( "                 headers that are #included by 'header'. By default, these" )
70	my_print( "                 directories are scanned for .h files, but sub-directories" )
71	my_print( "                 within the various directories are not inspected. If the" )
72	my_print( "                 -r option is given, these directories are recursively" )
73	my_print( "                 scanned. In either case, the subset of directories scanned" )
74	my_print( "                 that actually contains .h files is then searched whenever" )
75	my_print( "                 a #include directive is encountered in 'header' (or any" )
76	my_print( "                 file subsequently #included). If a referenced header file" )
77	my_print( "                 is not found, the #include directive is left untouched and" )
78	my_print( "                 translated directly into 'header_out'." )
79	my_print( " " )
80	my_print( " The following options are accepted:" )
81	my_print( " " )
82	my_print( "   -r          recursive" )
83	my_print( "                 Scan the directories listed in 'dir_list' recursively when" )
84	my_print( "                 searching for .h header files. By default, the directories" )
85	my_print( "                 are not searched recursively." )
86	my_print( " " )
87	my_print( "   -c          strip C-style comments" )
88	my_print( "                 Strip comments enclosed in /* */ delimiters from the" )
89	my_print( "                 output, including multi-line comments. By default, C-style" )
90	my_print( "                 comments are not stripped." )
91	my_print( " " )
92	my_print( "   -o SCRIPT   output script name" )
93	my_print( "                 Use SCRIPT as a prefix when outputting messages instead" )
94	my_print( "                 the script's actual name. Useful when the current script" )
95	my_print( "                 is going to be called from within another, higher-level" )
96	my_print( "                 driver script and seeing the current script's name might" )
97	my_print( "                 unnecessarily confuse the user." )
98	my_print( " " )
99	my_print( "   -v [0|1|2]  verboseness level" )
100	my_print( "                 level 0: silent  (no output)" )
101	my_print( "                 level 1: default (single character '.' per header)" )
102	my_print( "                 level 2: verbose (several lines per header)." )
103	my_print( " " )
104	my_print( "   -h          help" )
105	my_print( "                 Output this information and exit." )
106	my_print( " " )
107
108
109# ------------------------------------------------------------------------------
110
111def canonicalize_ws( s ):
112
113	return re.sub( '\s+', ' ', s ).strip()
114
115# ---
116
117def my_print( s ):
118
119	sys.stdout.write( "%s\n" % s )
120
121# ---
122
123#def echov1( s ):
124#
125#	if verbose_flag == "1":
126#		print "%s: %s" % ( output_name, s )
127
128def echov1_n( s ):
129
130	if verbose_flag == "1":
131		sys.stdout.write( s )
132		sys.stdout.flush()
133
134def echov1_n2( s ):
135
136	if verbose_flag == "1":
137		sys.stdout.write( "%s\n" % s )
138		sys.stdout.flush()
139
140# ---
141
142def echov2( s ):
143
144	if verbose_flag == "2":
145		sys.stdout.write( "%s: %s\n" % ( output_name, s ) )
146		sys.stdout.flush()
147
148def echov2_n( s ):
149
150	if verbose_flag == "2":
151		sys.stdout.write( output_name )
152		sys.stdout.write( ": " )
153		sys.stdout.write( s )
154		sys.stdout.flush()
155
156def echov2_n2( s ):
157
158	if verbose_flag == "2":
159		sys.stdout.write( "%s\n" % s )
160		sys.stdout.flush()
161
162# ------------------------------------------------------------------------------
163
164def list_contains_header( items ):
165
166	rval = False
167	for item in items:
168
169		is_h = re.search( "\.h", item )
170
171		if is_h:
172			rval = True
173			break
174
175	return rval
176
177# ------------------------------------------------------------------------------
178
179def get_header_path( filename, header_dirpaths ):
180
181	filepath = None
182
183	# Search each directory path for the filename given.
184	for dirpath in header_dirpaths:
185
186		# Construct a possible path to the sought-after file.
187		cur_filepath = "%s/%s" % ( dirpath, filename )
188
189		# Check whether the file exists.
190		found = os.path.exists( cur_filepath )
191		if found:
192			filepath = cur_filepath
193			break
194
195	return filepath
196
197# ------------------------------------------------------------------------------
198
199def strip_cstyle_comments( string ):
200
201	return re.sub( "/\*.*?\*/", "", string, flags=re.S )
202
203# ------------------------------------------------------------------------------
204
205def flatten_header( inputfile, header_dirpaths, cursp ):
206
207	# This string is inserted after #include directives after having
208	# determined that they are not present in the directory tree.
209	skipstr  = "// skipped"
210	beginstr = "// begin "
211	endstr   = "// end "
212
213	ostring  = ""
214
215	# Open the input file to process.
216	ifile = open( inputfile, "r" )
217
218	# Iterate over the lines in the file.
219	while True:
220
221		# Read a line in the file.
222		line = ifile.readline()
223
224		# Check for EOF.
225		if line == '': break
226
227		# Check for the #include directive and isolate the header name within
228		# a group (parentheses).
229		#result = re.search( '^[\s]*#include (["<])([\w\.\-/]*)([">])', line )
230		result = regex.search( line )
231
232		# If the line contained a #include directive, we must try to replace
233		# it with the contents of the header referenced by the directive.
234		if result:
235
236			# Extract the header file referenced in the #include directive,
237			# saved as the second group in the regular expression
238			# above.
239			header = result.group(2)
240
241			echov2( "%sfound reference to '%s'." % ( cursp, header ) )
242
243			# Search for the path to the header referenced in the #include
244			# directive.
245			header_path = get_header_path( header, header_dirpaths )
246
247			# First, check if the header is our root header (and if so, ignore it).
248			# Otherwise, if the header was found, we recurse. Otherwise, we output
249			# the #include directive with a comment indicating that it as skipped
250			if header == root_inputfile:
251
252				markl = result.group(1)
253				markr = result.group(3)
254
255				echov2( "%sthis is the root header '%s'; commenting out / skipping." \
256				        % ( cursp, header ) )
257
258				# If the header found is our root header, then we cannot
259				# recurse into it lest we enter an infinite loop. Output the
260				# line but make sure it's commented out entirely.
261				ostring += "%s #include %c%s%c %c" \
262				           % ( skipstr, markl, header, markr, '\n' )
263
264			elif header_path:
265
266				echov2( "%slocated file '%s'; recursing." \
267				        % ( cursp, header_path ) )
268
269				# Mark the beginning of the header being inserted.
270				ostring += "%s%s%c" % ( beginstr, header, '\n' )
271
272				# Recurse on the header, accumulating the string.
273				ostring += flatten_header( header_path, header_dirpaths, cursp + "  " )
274
275				# Mark the end of the header being inserted.
276				ostring += "%s%s%c" % ( endstr, header, '\n' )
277
278				echov2( "%sheader file '%s' fully processed." \
279				        % ( cursp, header_path ) )
280
281			else:
282
283				markl = result.group(1)
284				markr = result.group(3)
285
286				echov2( "%scould not locate file '%s'; marking as skipped." \
287				        % ( cursp, header ) )
288
289				# If the header was not found, output the line with a
290				# comment that the header was skipped.
291				ostring += "#include %c%s%c %s%c" \
292				           % ( markl, header, markr, skipstr, '\n' )
293			# endif
294
295		else:
296			# If the line did not contain a #include directive, simply output
297			# the line verbatim.
298			ostring += "%s" % line
299
300		# endif
301
302	# endwhile
303
304	# Close the input file.
305	ifile.close()
306
307	echov1_n( "." )
308
309	return ostring
310
311# ------------------------------------------------------------------------------
312
313def find_header_dirs( dirpath ):
314
315	header_dirpaths = []
316	for root, dirs, files in os.walk( dirpath, topdown=True ):
317
318		echov2_n( "scanning contents of %s" % root )
319
320		if list_contains_header( files ):
321
322			echov2_n2( "...found headers" )
323			header_dirpaths.append( root )
324
325		else:
326			echov2_n2( "" )
327
328		#endif
329
330	#endfor
331
332	return header_dirpaths
333
334
335# ------------------------------------------------------------------------------
336
337# Global variables.
338script_name    = None
339output_name    = None
340strip_comments = None
341recursive_flag = None
342verbose_flag   = None
343regex          = None
344root_inputfile = None
345
346def main():
347
348	global script_name
349	global output_name
350	global strip_comments
351	global recursive_flag
352	global verbose_flag
353	global regex
354	global root_inputfile
355
356	# Obtain the script name.
357	path, script_name = os.path.split(sys.argv[0])
358
359	output_name    = script_name
360
361	strip_comments = False
362	recursive_flag = False
363	verbose_flag   = "1"
364
365	nestsp         = "  "
366
367	# Process our command line options.
368	try:
369		opts, args = getopt.getopt( sys.argv[1:], "o:rchv:" )
370
371	except getopt.GetoptError as err:
372		# print help information and exit:
373		my_print( str(err) ) # will print something like "option -a not recognized"
374		print_usage()
375		sys.exit(2)
376
377	for opt, optarg in opts:
378		if   opt == "-o":
379			output_name = optarg
380		elif opt == "-r":
381			recursive_flag = True
382		elif opt == "-c":
383			strip_comments = True
384		elif opt == "-v":
385			verbose_flag = optarg
386		elif opt == "-h":
387			print_usage()
388			sys.exit()
389		else:
390			print_usage()
391			sys.exit()
392
393	# Make sure that the verboseness level is valid.
394	if ( verbose_flag != "0" and
395	     verbose_flag != "1" and
396	     verbose_flag != "2" ):
397		my_print( "%s Invalid verboseness argument: %s" \
398		                  % output_name, verbose_flag )
399		sys.exit()
400
401	# Print usage if we don't have exactly four arguments.
402	if len( args ) != 4:
403		print_usage()
404		sys.exit()
405
406	# Acquire the four required arguments:
407	# - the input header file,
408	# - the output header file,
409	# - the temporary directory in which we can write intermediate files,
410	# - the list of directories in which to search for the headers.
411	inputfile  = args[0]
412	outputfile = args[1]
413	temp_dir   = args[2]
414	dir_list   = args[3]
415
416	# Save the filename (basename) part of the input file (or root file) into a
417	# global variable that we can access later from within flatten_header().
418	root_inputfile = os.path.basename( inputfile )
419
420	# Separate the directories into distinct strings.
421	dir_list = dir_list.split()
422
423	# First, confirm that the directories in dir_list are valid.
424	dir_list_checked = []
425	for item in dir_list:
426
427		#absitem = os.path.abspath( item )
428
429		echov2_n( "checking " + item )
430
431		if os.path.exists( item ):
432			dir_list_checked.append( item )
433			echov2_n2( "...directory exists." )
434		else:
435			echov2_n2( "...invalid directory; omitting." )
436
437	# endfor
438
439	# Overwrite the original dir_list with the updated copy that omits
440	# invalid directories.
441	dir_list = dir_list_checked
442
443	echov2( "check summary:" )
444	echov2( "  accessible directories:" )
445	echov2( "  %s" % ' '.join( dir_list ) )
446
447	# Generate a list of directories (header_dirpaths) which will be searched
448	# whenever a #include directive is encountered. The method by which
449	# header_dirpaths is compiled will depend on whether the recursive flag
450	# was given.
451	if recursive_flag:
452
453		header_dirpaths = []
454		for d in dir_list:
455
456			# For each directory in dir_list, recursively walk that directory
457			# and return a list of directories that contain headers.
458			d_dirpaths = find_header_dirs( d )
459
460			# Add the list resulting from the current search to the running
461			# list of directory paths that contain headers.
462			header_dirpaths += d_dirpaths
463
464		# endfor
465
466	else:
467
468		# If the recursive flag was not given, we can just use dir_list
469		# as-is, though we opt to filter out the directories that don't
470		# contain .h files.
471
472		header_dirpaths = []
473		for d in dir_list:
474
475			echov2_n( "scanning %s" % d )
476
477			# Acquire a list of the directory's contents.
478			sub_items = os.listdir( d )
479
480			# If there is at least one header present, add the current
481			# directory to the list of header directories.
482			if list_contains_header( sub_items ):
483				header_dirpaths.append( d )
484				echov2_n2( "...found headers." )
485			else:
486				echov2_n2( "...no headers found." )
487			# endif
488
489		# endfor
490
491	# endfor
492
493	echov2( "scan summary:" )
494	echov2( "  headers found in:" )
495	echov2( "  %s" % ' '.join( header_dirpaths ) )
496
497	echov2( "preparing to monolithify '%s'" % inputfile )
498
499	echov2( "new header will be saved to '%s'" % outputfile )
500
501	echov1_n( "." )
502
503	# Open the output file.
504	ofile = open( outputfile, "w" )
505
506	# Precompile the main regular expression used to isolate #include
507	# directives and the headers they reference. This regex object will
508	# get reused over and over again in flatten_header().
509	regex = re.compile( '^[\s]*#include (["<])([\w\.\-/]*)([">])' )
510
511	# Recursively substitute headers for occurrences of #include directives.
512	final_string = flatten_header( inputfile, header_dirpaths, nestsp )
513
514	# Strip C-style comments from the final output, if requested.
515	if strip_comments:
516		final_string = strip_cstyle_comments( final_string )
517
518	# Write the lines to the file.
519	ofile.write( final_string )
520
521	# Close the output file.
522	ofile.close()
523
524	echov2( "substitution complete." )
525	echov2( "monolithic header saved as '%s'" % outputfile )
526
527	echov1_n2( "." )
528
529	return 0
530
531
532
533
534if __name__ == "__main__":
535	main()
536