1#!/usr/local/bin/python3.8 2#-------------------------------------------------------------------- 3# 4# preproc.py 5# 6# General purpose macro preprocessor 7# 8#-------------------------------------------------------------------- 9# Usage: 10# 11# preproc.py input_file [output_file] [-D<variable> ...] 12# 13# Where <variable> may be a keyword or a key=value pair 14# 15# Syntax: Basically like cpp. However, this preprocessor handles 16# only a limited set of keywords, so it does not otherwise mangle 17# the file in the belief that it must be C code. Handling of boolean 18# relations is important, so these are thoroughly defined (see below) 19# 20# #if defined(<variable>) [...] 21# #ifdef <variable> 22# #ifndef <variable> 23# #elseif <variable> 24# #else 25# #endif 26# 27# #define <variable> [...] 28# #define <variable>(<parameters>) [...] 29# #undef <variable> 30# 31# #include <filename> 32# 33# <variable> may be 34# <keyword> 35# <keyword>=<value> 36# 37# <keyword> without '=' is effectively the same as <keyword>=1 38# Lack of a keyword is equivalent to <keyword>=0, in a conditional. 39# 40# Boolean operators (in order of precedence): 41# ! NOT 42# && AND 43# || OR 44# 45# Comments: 46# Most comments (C-like or Tcl-like) are output as-is. A 47# line beginning with "###" is treated as a preprocessor 48# comment and is not copied to the output. 49# 50# Examples; 51# #if defined(X) || defined(Y) 52# #else 53# #if defined(Z) 54# #endif 55#-------------------------------------------------------------------- 56 57import re 58import sys 59 60def solve_statement(condition): 61 62 defrex = re.compile('defined[ \t]*\(([^\)]+)\)') 63 orrex = re.compile('(.+)\|\|(.+)') 64 andrex = re.compile('(.+)&&(.+)') 65 notrex = re.compile('!([^&\|]+)') 66 parenrex = re.compile('\(([^\)]+)\)') 67 leadspacerex = re.compile('^[ \t]+(.*)') 68 endspacerex = re.compile('(.*)[ \t]+$') 69 70 matchfound = True 71 while matchfound: 72 matchfound = False 73 74 # Search for defined(K) (K must be a single keyword) 75 # If the keyword was defined, then it should have been replaced by 1 76 lmatch = defrex.search(condition) 77 if lmatch: 78 key = lmatch.group(1) 79 if key == 1 or key == '1' or key == True: 80 repl = 1 81 else: 82 repl = 0 83 84 condition = defrex.sub(str(repl), condition) 85 matchfound = True 86 87 # Search for (X) recursively 88 lmatch = parenrex.search(condition) 89 if lmatch: 90 repl = solve_statement(lmatch.group(1)) 91 condition = parenrex.sub(str(repl), condition) 92 matchfound = True 93 94 # Search for !X recursively 95 lmatch = notrex.search(condition) 96 if lmatch: 97 only = solve_statement(lmatch.group(1)) 98 if only == '1': 99 repl = '0' 100 else: 101 repl = '1' 102 condition = notrex.sub(str(repl), condition) 103 matchfound = True 104 105 # Search for A&&B recursively 106 lmatch = andrex.search(condition) 107 if lmatch: 108 first = solve_statement(lmatch.group(1)) 109 second = solve_statement(lmatch.group(2)) 110 if first == '1' and second == '1': 111 repl = '1' 112 else: 113 repl = '0' 114 condition = andrex.sub(str(repl), condition) 115 matchfound = True 116 117 # Search for A||B recursively 118 lmatch = orrex.search(condition) 119 if lmatch: 120 first = solve_statement(lmatch.group(1)) 121 second = solve_statement(lmatch.group(2)) 122 if first == '1' or second == '1': 123 repl = '1' 124 else: 125 repl = '0' 126 condition = orrex.sub(str(repl), condition) 127 matchfound = True 128 129 # Remove whitespace 130 lmatch = leadspacerex.match(condition) 131 if lmatch: 132 condition = lmatch.group(1) 133 lmatch = endspacerex.match(condition) 134 if lmatch: 135 condition = lmatch.group(1) 136 137 return condition 138 139def solve_condition(condition, keys, defines, keyrex): 140 # Do definition replacement on the conditional 141 for keyword in keys: 142 condition = keyrex[keyword].sub(defines[keyword], condition) 143 144 value = solve_statement(condition) 145 if value == '1': 146 return 1 147 else: 148 return 0 149 150def sortkeys(keys): 151 newkeys = [] 152 for i in range(0, len(keys)): 153 keyword = keys[i] 154 found = False 155 for j in range(0, len(newkeys)): 156 inword = newkeys[j] 157 if inword in keyword: 158 # Insert keyword before inword 159 newkeys.insert(j, keyword) 160 found = True 161 break 162 if not found: 163 newkeys.append(keyword) 164 return newkeys 165 166def runpp(keys, keyrex, defines, ccomm, incdirs, inputfile, ofile): 167 168 includerex = re.compile('^[ \t]*#include[ \t]+"*([^ \t\n\r"]+)') 169 definerex = re.compile('^[ \t]*#define[ \t]+([^ \t]+)[ \t]+(.+)') 170 paramrex = re.compile('^([^\(]+)\(([^\)]+)\)') 171 defrex = re.compile('^[ \t]*#define[ \t]+([^ \t\n\r]+)') 172 undefrex = re.compile('^[ \t]*#undef[ \t]+([^ \t\n\r]+)') 173 ifdefrex = re.compile('^[ \t]*#ifdef[ \t]+(.+)') 174 ifndefrex = re.compile('^[ \t]*#ifndef[ \t]+(.+)') 175 ifrex = re.compile('^[ \t]*#if[ \t]+(.+)') 176 elseifrex = re.compile('^[ \t]*#elseif[ \t]+(.+)') 177 elserex = re.compile('^[ \t]*#else') 178 endifrex = re.compile('^[ \t]*#endif') 179 commentrex = re.compile('^###[^#]*$') 180 ccstartrex = re.compile('/\*') # C-style comment start 181 ccendrex = re.compile('\*/') # C-style comment end 182 contrex = re.compile('.*\\\\$') # Backslash continuation line 183 184 badifrex = re.compile('^[ \t]*#if[ \t]*.*') 185 badelserex = re.compile('^[ \t]*#else[ \t]*.*') 186 187 # This code is not designed to operate on huge files. Neither is it designed to be 188 # efficient. 189 190 # ifblock state: 191 # -1 : not in an if/else block 192 # 0 : no condition satisfied yet 193 # 1 : condition satisfied 194 # 2 : condition was handled, waiting for endif 195 196 ifile = False 197 try: 198 ifile = open(inputfile, 'r') 199 except FileNotFoundError: 200 for dir in incdirs: 201 try: 202 ifile = open(dir + '/' + inputfile, 'r') 203 except FileNotFoundError: 204 pass 205 else: 206 break 207 208 if not ifile: 209 print("Error: Cannot open file " + inputfile + " for reading.\n", file=sys.stderr) 210 return 211 212 ccblock = -1 213 ifblock = -1 214 ifstack = [] 215 lineno = 0 216 217 filetext = ifile.readlines() 218 lastline = [] 219 220 for line in filetext: 221 lineno += 1 222 223 # C-style comments override everything else 224 if ccomm: 225 if ccblock == -1: 226 pmatch = ccstartrex.search(line) 227 if pmatch: 228 ematch = ccendrex.search(line[pmatch.end(0):]) 229 if ematch: 230 line = line[0:pmatch.start(0)] + line[pmatch.end(0) + ematch.end(0):] 231 else: 232 line = line[0:pmatch.start(0)] 233 ccblock = 1 234 elif ccblock == 1: 235 ematch = ccendrex.search(line) 236 if ematch: 237 line = line[ematch.end(0)+2:] 238 ccblock = -1 239 else: 240 continue 241 242 # Handle continuation detected in previous line 243 if lastline: 244 # Note: Apparently there is a character retained after the backslash, 245 # so strip the last two characters from the line. 246 line = lastline[0:-2] + line 247 lastline = [] 248 249 # Continuation lines have the next highest priority. However, this 250 # script will attempt to keep continuation lines in the body of the 251 # text and only collapse lines where continuation lines occur in 252 # a preprocessor statement. 253 254 cmatch = contrex.match(line) 255 256 # Ignore lines beginning with "###" 257 pmatch = commentrex.match(line) 258 if pmatch: 259 continue 260 261 # Handle ifdef 262 pmatch = ifdefrex.match(line) 263 if pmatch: 264 if cmatch: 265 lastline = line 266 continue 267 if ifblock != -1: 268 ifstack.append(ifblock) 269 270 if ifblock == 1 or ifblock == -1: 271 condition = pmatch.group(1) 272 ifblock = solve_condition(condition, keys, defines, keyrex) 273 else: 274 ifblock = 2 275 continue 276 277 # Handle ifndef 278 pmatch = ifndefrex.match(line) 279 if pmatch: 280 if cmatch: 281 lastline = line 282 continue 283 if ifblock != -1: 284 ifstack.append(ifblock) 285 286 if ifblock == 1 or ifblock == -1: 287 condition = pmatch.group(1) 288 ifblock = solve_condition(condition, keys, defines, keyrex) 289 ifblock = 1 if ifblock == 0 else 0 290 else: 291 ifblock = 2 292 continue 293 294 # Handle if 295 pmatch = ifrex.match(line) 296 if pmatch: 297 if cmatch: 298 lastline = line 299 continue 300 if ifblock != -1: 301 ifstack.append(ifblock) 302 303 if ifblock == 1 or ifblock == -1: 304 condition = pmatch.group(1) 305 ifblock = solve_condition(condition, keys, defines, keyrex) 306 else: 307 ifblock = 2 308 continue 309 310 # Handle elseif 311 pmatch = elseifrex.match(line) 312 if pmatch: 313 if cmatch: 314 lastline = line 315 continue 316 if ifblock == -1: 317 print("Error: #elseif without preceding #if at line " + str(lineno) + ".", file=sys.stderr) 318 ifblock = 0 319 320 if ifblock == 1: 321 ifblock = 2 322 elif ifblock != 2: 323 condition = pmatch.group(1) 324 ifblock = solve_condition(condition, keys, defines, keyrex) 325 continue 326 327 # Handle else 328 pmatch = elserex.match(line) 329 if pmatch: 330 if cmatch: 331 lastline = line 332 continue 333 if ifblock == -1: 334 print("Error: #else without preceding #if at line " + str(lineno) + ".", file=sys.stderr) 335 ifblock = 0 336 337 if ifblock == 1: 338 ifblock = 2 339 elif ifblock == 0: 340 ifblock = 1 341 continue 342 343 # Handle endif 344 pmatch = endifrex.match(line) 345 if pmatch: 346 if cmatch: 347 lastline = line 348 continue 349 if ifblock == -1: 350 print("Error: #endif outside of #if block at line " + str(lineno) + " (ignored)", file=sys.stderr) 351 elif ifstack: 352 ifblock = ifstack.pop() 353 else: 354 ifblock = -1 355 continue 356 357 # Check for 'if' or 'else' that were not properly formed 358 pmatch = badifrex.match(line) 359 if pmatch: 360 print("Error: Badly formed #if statement at line " + str(lineno) + " (ignored)", file=sys.stderr) 361 if ifblock != -1: 362 ifstack.append(ifblock) 363 364 if ifblock == 1 or ifblock == -1: 365 ifblock = 0 366 else: 367 ifblock = 2 368 continue 369 370 pmatch = badelserex.match(line) 371 if pmatch: 372 print("Error: Badly formed #else statement at line " + str(lineno) + " (ignored)", file=sys.stderr) 373 ifblock = 2 374 continue 375 376 # Ignore all lines that are not satisfied by a conditional 377 if ifblock == 0 or ifblock == 2: 378 continue 379 380 # Handle include. Note that this code does not expect or 381 # handle 'if' blocks that cross file boundaries. 382 pmatch = includerex.match(line) 383 if pmatch: 384 if cmatch: 385 lastline = line 386 continue 387 inclfile = pmatch.group(1) 388 runpp(keys, keyrex, defines, ccomm, incdirs, inclfile, ofile) 389 continue 390 391 # Handle define (with value) 392 pmatch = definerex.match(line) 393 if pmatch: 394 if cmatch: 395 lastline = line 396 continue 397 condition = pmatch.group(1) 398 399 # Additional handling of definition w/parameters: #define X(a,b,c) ..." 400 rmatch = paramrex.match(condition) 401 if rmatch: 402 # 'condition' as a key into keyrex only needs to be unique. 403 # Use the definition word without everything in parentheses 404 condition = rmatch.group(1) 405 406 # 'pcondition' is the actual search regexp and must capture all 407 # the parameters individually for substitution 408 409 parameters = rmatch.group(2).split(',') 410 411 # Generate the regexp string to match comma-separate values 412 # Note that this is based on the cpp preprocessor, which 413 # apparently allows commas in arguments if surrounded by 414 # parentheses; e.g., "def(a, b, (c1,c2))". This is NOT 415 # handled. 416 417 pcondition = condition + '\(' 418 for param in parameters[0:-1]: 419 pcondition += '(.*),' 420 pcondition += '(.*)\)' 421 422 # Generate the substitution string with group substitutions 423 pvalue = pmatch.group(2) 424 idx = 1 425 for param in parameters: 426 pvalue = pvalue.replace(param, '\g<' + str(idx) + '>') 427 idx = idx + 1 428 429 defines[condition] = pvalue 430 keyrex[condition] = re.compile(pcondition) 431 else: 432 parameters = [] 433 value = pmatch.group(2) 434 # Note: Need to check for infinite recursion here, but it's tricky. 435 defines[condition] = value 436 keyrex[condition] = re.compile(condition) 437 438 if condition not in keys: 439 # Parameterized keys go to the front of the list 440 if parameters: 441 keys.insert(0, condition) 442 else: 443 keys.append(condition) 444 keys = sortkeys(keys) 445 continue 446 447 # Handle define (simple case, no value) 448 pmatch = defrex.match(line) 449 if pmatch: 450 if cmatch: 451 lastline = line 452 continue 453 condition = pmatch.group(1) 454 defines[condition] = '1' 455 keyrex[condition] = re.compile(condition) 456 if condition not in keys: 457 keys.append(condition) 458 keys = sortkeys(keys) 459 continue 460 461 # Handle undef 462 pmatch = undefrex.match(line) 463 if pmatch: 464 if cmatch: 465 lastline = line 466 continue 467 condition = pmatch.group(1) 468 if condition in keys: 469 defines.pop(condition) 470 keyrex.pop(condition) 471 keys.remove(condition) 472 continue 473 474 # Now do definition replacement on what's left (if anything) 475 # This must be done repeatedly from the top until there are no 476 # more substitutions to make. 477 478 while True: 479 origline = line 480 for keyword in keys: 481 newline = keyrex[keyword].sub(defines[keyword], line) 482 if newline != line: 483 line = newline 484 break 485 486 if line == origline: 487 break 488 489 # Output the line 490 print(line, file=ofile, end='') 491 492 if ifblock != -1 or ifstack != []: 493 print("Error: input file ended with an unterminated #if block.", file=sys.stderr) 494 495 if ifile != sys.stdin: 496 ifile.close() 497 return 498 499def printusage(progname): 500 print('Usage: ' + progname + ' input_file [output_file] [-options]') 501 print(' Options are:') 502 print(' -help Print this help text.') 503 print(' -ccomm Remove C comments in /* ... */ delimiters.') 504 print(' -D<def> Define word <def> and set its value to 1.') 505 print(' -D<def>=<val> Define word <def> and set its value to <val>.') 506 print(' -I<dir> Add <dir> to search path for input files.') 507 return 508 509if __name__ == '__main__': 510 511 # Parse command line for options and arguments 512 options = [] 513 arguments = [] 514 for item in sys.argv[1:]: 515 if item.find('-', 0) == 0: 516 options.append(item) 517 else: 518 arguments.append(item) 519 520 if len(arguments) > 0: 521 inputfile = arguments[0] 522 if len(arguments) > 1: 523 outputfile = arguments[1] 524 else: 525 outputfile = [] 526 else: 527 printusage(sys.argv[0]) 528 sys.exit(0) 529 530 defines = {} 531 keyrex = {} 532 keys = [] 533 incdirs = [] 534 ccomm = False 535 for item in options: 536 result = item.split('=') 537 if result[0] == '-help': 538 printusage(sys.argv[0]) 539 sys.exit(0) 540 elif result[0] == '-ccomm': 541 ccomm = True 542 elif result[0][0:2] == '-I': 543 incdirs.append(result[0][2:]) 544 elif result[0][0:2] == '-D': 545 keyword = result[0][2:] 546 try: 547 value = result[1] 548 except: 549 value = '1' 550 defines[keyword] = value 551 keyrex[keyword] = re.compile(keyword) 552 keys.append(keyword) 553 keys = sortkeys(keys) 554 else: 555 print('Bad option ' + item + ', options are -help, -ccomm, -D<def> -I<dir>\n') 556 sys.exit(1) 557 558 if outputfile: 559 ofile = open(outputfile, 'w') 560 else: 561 ofile = sys.stdout 562 563 if not ofile: 564 print("Error: Cannot open file " + output_file + " for writing.") 565 sys.exit(1) 566 567 # Sort keys so that if any definition contains another definition, the 568 # subset word is handled last; otherwise the subset word will get 569 # substituted, screwing up the definition names in which it occurs. 570 571 keys = sortkeys(keys) 572 573 runpp(keys, keyrex, defines, ccomm, incdirs, inputfile, ofile) 574 if ofile != sys.stdout: 575 ofile.close() 576 sys.exit(0) 577