1#!/usr/local/bin/python3.8
2#--------------------------------------------------------------------
3#
4# preproc.py
5#
6# General purpose macro preprocessor
7#
8#--------------------------------------------------------------------
9# Usage:
10#
11#	preproc.py input_file [output_file] [-D<variable> ...]
12#
13# Where <variable> may be a keyword or a key=value pair
14#
15# Syntax:  Basically like cpp.  However, this preprocessor handles
16# only a limited set of keywords, so it does not otherwise mangle
17# the file in the belief that it must be C code.  Handling of boolean
18# relations is important, so these are thoroughly defined (see below)
19#
20#	#if defined(<variable>) [...]
21#	#ifdef <variable>
22#	#ifndef <variable>
23#	#elseif <variable>
24#	#else
25#	#endif
26#
27#	#define <variable> [...]
28#	#define <variable>(<parameters>) [...]
29#	#undef <variable>
30#
31#	#include <filename>
32#
33# <variable> may be
34#	<keyword>
35#	<keyword>=<value>
36#
37#	<keyword> without '=' is effectively the same as <keyword>=1
38#	Lack of a keyword is equivalent to <keyword>=0, in a conditional.
39#
40# Boolean operators (in order of precedence):
41#	!	NOT
42#	&&	AND
43#	||	OR
44#
45# Comments:
46#       Most comments (C-like or Tcl-like) are output as-is.  A
47#	line beginning with "###" is treated as a preprocessor
48#	comment and is not copied to the output.
49#
50# Examples;
51#	#if defined(X) || defined(Y)
52#	#else
53#	#if defined(Z)
54#	#endif
55#--------------------------------------------------------------------
56
57import re
58import sys
59
60def solve_statement(condition):
61
62    defrex = re.compile('defined[ \t]*\(([^\)]+)\)')
63    orrex = re.compile('(.+)\|\|(.+)')
64    andrex = re.compile('(.+)&&(.+)')
65    notrex = re.compile('!([^&\|]+)')
66    parenrex = re.compile('\(([^\)]+)\)')
67    leadspacerex = re.compile('^[ \t]+(.*)')
68    endspacerex = re.compile('(.*)[ \t]+$')
69
70    matchfound = True
71    while matchfound:
72        matchfound = False
73
74        # Search for defined(K) (K must be a single keyword)
75        # If the keyword was defined, then it should have been replaced by 1
76        lmatch = defrex.search(condition)
77        if lmatch:
78            key = lmatch.group(1)
79            if key == 1 or key == '1' or key == True:
80                repl = 1
81            else:
82                repl = 0
83
84            condition = defrex.sub(str(repl), condition)
85            matchfound = True
86
87        # Search for (X) recursively
88        lmatch = parenrex.search(condition)
89        if lmatch:
90            repl = solve_statement(lmatch.group(1))
91            condition = parenrex.sub(str(repl), condition)
92            matchfound = True
93
94        # Search for !X recursively
95        lmatch = notrex.search(condition)
96        if lmatch:
97            only = solve_statement(lmatch.group(1))
98            if only == '1':
99                repl = '0'
100            else:
101                repl = '1'
102            condition = notrex.sub(str(repl), condition)
103            matchfound = True
104
105        # Search for A&&B recursively
106        lmatch = andrex.search(condition)
107        if lmatch:
108            first = solve_statement(lmatch.group(1))
109            second = solve_statement(lmatch.group(2))
110            if first == '1' and second == '1':
111                repl = '1'
112            else:
113                repl = '0'
114            condition = andrex.sub(str(repl), condition)
115            matchfound = True
116
117        # Search for A||B recursively
118        lmatch = orrex.search(condition)
119        if lmatch:
120            first = solve_statement(lmatch.group(1))
121            second = solve_statement(lmatch.group(2))
122            if first == '1' or second == '1':
123                repl = '1'
124            else:
125                repl = '0'
126            condition = orrex.sub(str(repl), condition)
127            matchfound = True
128
129    # Remove whitespace
130    lmatch = leadspacerex.match(condition)
131    if lmatch:
132        condition = lmatch.group(1)
133    lmatch = endspacerex.match(condition)
134    if lmatch:
135        condition = lmatch.group(1)
136
137    return condition
138
139def solve_condition(condition, keys, defines, keyrex):
140    # Do definition replacement on the conditional
141    for keyword in keys:
142        condition = keyrex[keyword].sub(defines[keyword], condition)
143
144    value = solve_statement(condition)
145    if value == '1':
146        return 1
147    else:
148        return 0
149
150def sortkeys(keys):
151    newkeys = []
152    for i in range(0, len(keys)):
153        keyword = keys[i]
154        found = False
155        for j in range(0, len(newkeys)):
156            inword = newkeys[j]
157            if inword in keyword:
158                # Insert keyword before inword
159                newkeys.insert(j, keyword)
160                found = True
161                break
162        if not found:
163            newkeys.append(keyword)
164    return newkeys
165
166def runpp(keys, keyrex, defines, ccomm, incdirs, inputfile, ofile):
167
168    includerex = re.compile('^[ \t]*#include[ \t]+"*([^ \t\n\r"]+)')
169    definerex = re.compile('^[ \t]*#define[ \t]+([^ \t]+)[ \t]+(.+)')
170    paramrex = re.compile('^([^\(]+)\(([^\)]+)\)')
171    defrex = re.compile('^[ \t]*#define[ \t]+([^ \t\n\r]+)')
172    undefrex = re.compile('^[ \t]*#undef[ \t]+([^ \t\n\r]+)')
173    ifdefrex = re.compile('^[ \t]*#ifdef[ \t]+(.+)')
174    ifndefrex = re.compile('^[ \t]*#ifndef[ \t]+(.+)')
175    ifrex = re.compile('^[ \t]*#if[ \t]+(.+)')
176    elseifrex = re.compile('^[ \t]*#elseif[ \t]+(.+)')
177    elserex = re.compile('^[ \t]*#else')
178    endifrex = re.compile('^[ \t]*#endif')
179    commentrex = re.compile('^###[^#]*$')
180    ccstartrex = re.compile('/\*')		# C-style comment start
181    ccendrex = re.compile('\*/')		# C-style comment end
182    contrex = re.compile('.*\\\\$')		# Backslash continuation line
183
184    badifrex = re.compile('^[ \t]*#if[ \t]*.*')
185    badelserex = re.compile('^[ \t]*#else[ \t]*.*')
186
187    # This code is not designed to operate on huge files.  Neither is it designed to be
188    # efficient.
189
190    # ifblock state:
191    # -1 : not in an if/else block
192    #  0 : no condition satisfied yet
193    #  1 : condition satisfied
194    #  2 : condition was handled, waiting for endif
195
196    ifile = False
197    try:
198        ifile = open(inputfile, 'r')
199    except FileNotFoundError:
200        for dir in incdirs:
201            try:
202                ifile = open(dir + '/' + inputfile, 'r')
203            except FileNotFoundError:
204                pass
205            else:
206                break
207
208    if not ifile:
209        print("Error:  Cannot open file " + inputfile + " for reading.\n", file=sys.stderr)
210        return
211
212    ccblock = -1
213    ifblock = -1
214    ifstack = []
215    lineno = 0
216
217    filetext = ifile.readlines()
218    lastline = []
219
220    for line in filetext:
221        lineno += 1
222
223        # C-style comments override everything else
224        if ccomm:
225            if ccblock == -1:
226                pmatch = ccstartrex.search(line)
227                if pmatch:
228                    ematch = ccendrex.search(line[pmatch.end(0):])
229                    if ematch:
230                        line = line[0:pmatch.start(0)] + line[pmatch.end(0) + ematch.end(0):]
231                    else:
232                        line = line[0:pmatch.start(0)]
233                        ccblock = 1
234            elif ccblock == 1:
235                ematch = ccendrex.search(line)
236                if ematch:
237                    line = line[ematch.end(0)+2:]
238                    ccblock = -1
239                else:
240                    continue
241
242        # Handle continuation detected in previous line
243        if lastline:
244            # Note:  Apparently there is a character retained after the backslash,
245            # so strip the last two characters from the line.
246            line = lastline[0:-2] + line
247            lastline = []
248
249        # Continuation lines have the next highest priority.  However, this
250        # script will attempt to keep continuation lines in the body of the
251        # text and only collapse lines where continuation lines occur in
252        # a preprocessor statement.
253
254        cmatch = contrex.match(line)
255
256        # Ignore lines beginning with "###"
257        pmatch = commentrex.match(line)
258        if pmatch:
259            continue
260
261        # Handle ifdef
262        pmatch = ifdefrex.match(line)
263        if pmatch:
264            if cmatch:
265                lastline = line
266                continue
267            if ifblock != -1:
268                ifstack.append(ifblock)
269
270            if ifblock == 1 or ifblock == -1:
271                condition = pmatch.group(1)
272                ifblock = solve_condition(condition, keys, defines, keyrex)
273            else:
274                ifblock = 2
275            continue
276
277        # Handle ifndef
278        pmatch = ifndefrex.match(line)
279        if pmatch:
280            if cmatch:
281                lastline = line
282                continue
283            if ifblock != -1:
284                ifstack.append(ifblock)
285
286            if ifblock == 1 or ifblock == -1:
287                condition = pmatch.group(1)
288                ifblock = solve_condition(condition, keys, defines, keyrex)
289                ifblock = 1 if ifblock == 0 else 0
290            else:
291                ifblock = 2
292            continue
293
294        # Handle if
295        pmatch = ifrex.match(line)
296        if pmatch:
297            if cmatch:
298                lastline = line
299                continue
300            if ifblock != -1:
301                ifstack.append(ifblock)
302
303            if ifblock == 1 or ifblock == -1:
304                condition = pmatch.group(1)
305                ifblock = solve_condition(condition, keys, defines, keyrex)
306            else:
307                ifblock = 2
308            continue
309
310        # Handle elseif
311        pmatch = elseifrex.match(line)
312        if pmatch:
313            if cmatch:
314                lastline = line
315                continue
316            if ifblock == -1:
317               print("Error: #elseif without preceding #if at line " + str(lineno) + ".", file=sys.stderr)
318               ifblock = 0
319
320            if ifblock == 1:
321                ifblock = 2
322            elif ifblock != 2:
323                condition = pmatch.group(1)
324                ifblock = solve_condition(condition, keys, defines, keyrex)
325            continue
326
327        # Handle else
328        pmatch = elserex.match(line)
329        if pmatch:
330            if cmatch:
331                lastline = line
332                continue
333            if ifblock == -1:
334               print("Error: #else without preceding #if at line " + str(lineno) + ".", file=sys.stderr)
335               ifblock = 0
336
337            if ifblock == 1:
338                ifblock = 2
339            elif ifblock == 0:
340                ifblock = 1
341            continue
342
343        # Handle endif
344        pmatch = endifrex.match(line)
345        if pmatch:
346            if cmatch:
347                lastline = line
348                continue
349            if ifblock == -1:
350                print("Error:  #endif outside of #if block at line " + str(lineno) + " (ignored)", file=sys.stderr)
351            elif ifstack:
352                ifblock = ifstack.pop()
353            else:
354                ifblock = -1
355            continue
356
357        # Check for 'if' or 'else' that were not properly formed
358        pmatch = badifrex.match(line)
359        if pmatch:
360            print("Error:  Badly formed #if statement at line " + str(lineno) + " (ignored)", file=sys.stderr)
361            if ifblock != -1:
362                ifstack.append(ifblock)
363
364            if ifblock == 1 or ifblock == -1:
365                ifblock = 0
366            else:
367                ifblock = 2
368            continue
369
370        pmatch = badelserex.match(line)
371        if pmatch:
372            print("Error:  Badly formed #else statement at line " + str(lineno) + " (ignored)", file=sys.stderr)
373            ifblock = 2
374            continue
375
376        # Ignore all lines that are not satisfied by a conditional
377        if ifblock == 0 or ifblock == 2:
378            continue
379
380        # Handle include.  Note that this code does not expect or
381        # handle 'if' blocks that cross file boundaries.
382        pmatch = includerex.match(line)
383        if pmatch:
384            if cmatch:
385                lastline = line
386                continue
387            inclfile = pmatch.group(1)
388            runpp(keys, keyrex, defines, ccomm, incdirs, inclfile, ofile)
389            continue
390
391        # Handle define (with value)
392        pmatch = definerex.match(line)
393        if pmatch:
394            if cmatch:
395                lastline = line
396                continue
397            condition = pmatch.group(1)
398
399            # Additional handling of definition w/parameters: #define X(a,b,c) ..."
400            rmatch = paramrex.match(condition)
401            if rmatch:
402                # 'condition' as a key into keyrex only needs to be unique.
403                # Use the definition word without everything in parentheses
404                condition = rmatch.group(1)
405
406                # 'pcondition' is the actual search regexp and must capture all
407                # the parameters individually for substitution
408
409                parameters = rmatch.group(2).split(',')
410
411                # Generate the regexp string to match comma-separate values
412                # Note that this is based on the cpp preprocessor, which
413                # apparently allows commas in arguments if surrounded by
414                # parentheses;  e.g., "def(a, b, (c1,c2))".  This is NOT
415                # handled.
416
417                pcondition = condition + '\('
418                for param in parameters[0:-1]:
419                    pcondition += '(.*),'
420                pcondition += '(.*)\)'
421
422                # Generate the substitution string with group substitutions
423                pvalue = pmatch.group(2)
424                idx = 1
425                for param in parameters:
426                    pvalue = pvalue.replace(param, '\g<' + str(idx) + '>')
427                    idx = idx + 1
428
429                defines[condition] = pvalue
430                keyrex[condition] = re.compile(pcondition)
431            else:
432                parameters = []
433                value = pmatch.group(2)
434                # Note:  Need to check for infinite recursion here, but it's tricky.
435                defines[condition] = value
436                keyrex[condition] = re.compile(condition)
437
438            if condition not in keys:
439                # Parameterized keys go to the front of the list
440                if parameters:
441                    keys.insert(0, condition)
442                else:
443                    keys.append(condition)
444                keys = sortkeys(keys)
445            continue
446
447        # Handle define (simple case, no value)
448        pmatch = defrex.match(line)
449        if pmatch:
450            if cmatch:
451                lastline = line
452                continue
453            condition = pmatch.group(1)
454            defines[condition] = '1'
455            keyrex[condition] = re.compile(condition)
456            if condition not in keys:
457                keys.append(condition)
458                keys = sortkeys(keys)
459            continue
460
461        # Handle undef
462        pmatch = undefrex.match(line)
463        if pmatch:
464            if cmatch:
465                lastline = line
466                continue
467            condition = pmatch.group(1)
468            if condition in keys:
469                defines.pop(condition)
470                keyrex.pop(condition)
471                keys.remove(condition)
472            continue
473
474        # Now do definition replacement on what's left (if anything)
475        # This must be done repeatedly from the top until there are no
476        # more substitutions to make.
477
478        while True:
479            origline = line
480            for keyword in keys:
481                newline = keyrex[keyword].sub(defines[keyword], line)
482                if newline != line:
483                    line = newline
484                    break
485
486            if line == origline:
487                break
488
489        # Output the line
490        print(line, file=ofile, end='')
491
492    if ifblock != -1 or ifstack != []:
493        print("Error:  input file ended with an unterminated #if block.", file=sys.stderr)
494
495    if ifile != sys.stdin:
496        ifile.close()
497    return
498
499def printusage(progname):
500    print('Usage: ' + progname + ' input_file [output_file] [-options]')
501    print('   Options are:')
502    print('      -help         Print this help text.')
503    print('      -ccomm        Remove C comments in /* ... */ delimiters.')
504    print('      -D<def>       Define word <def> and set its value to 1.')
505    print('      -D<def>=<val> Define word <def> and set its value to <val>.')
506    print('      -I<dir>       Add <dir> to search path for input files.')
507    return
508
509if __name__ == '__main__':
510
511   # Parse command line for options and arguments
512    options = []
513    arguments = []
514    for item in sys.argv[1:]:
515        if item.find('-', 0) == 0:
516            options.append(item)
517        else:
518            arguments.append(item)
519
520    if len(arguments) > 0:
521        inputfile = arguments[0]
522        if len(arguments) > 1:
523            outputfile = arguments[1]
524        else:
525            outputfile = []
526    else:
527        printusage(sys.argv[0])
528        sys.exit(0)
529
530    defines = {}
531    keyrex = {}
532    keys = []
533    incdirs = []
534    ccomm = False
535    for item in options:
536        result = item.split('=')
537        if result[0] == '-help':
538            printusage(sys.argv[0])
539            sys.exit(0)
540        elif result[0] == '-ccomm':
541            ccomm = True
542        elif result[0][0:2] == '-I':
543            incdirs.append(result[0][2:])
544        elif result[0][0:2] == '-D':
545            keyword = result[0][2:]
546            try:
547                value = result[1]
548            except:
549                value = '1'
550            defines[keyword] = value
551            keyrex[keyword] = re.compile(keyword)
552            keys.append(keyword)
553            keys = sortkeys(keys)
554        else:
555            print('Bad option ' + item + ', options are -help, -ccomm, -D<def> -I<dir>\n')
556            sys.exit(1)
557
558    if outputfile:
559        ofile = open(outputfile, 'w')
560    else:
561        ofile = sys.stdout
562
563    if not ofile:
564        print("Error:  Cannot open file " + output_file + " for writing.")
565        sys.exit(1)
566
567    # Sort keys so that if any definition contains another definition, the
568    # subset word is handled last;  otherwise the subset word will get
569    # substituted, screwing up the definition names in which it occurs.
570
571    keys = sortkeys(keys)
572
573    runpp(keys, keyrex, defines, ccomm, incdirs, inputfile, ofile)
574    if ofile != sys.stdout:
575        ofile.close()
576    sys.exit(0)
577