1#!/usr/bin/python 2 3# Usage: Just specify files and it will output all the combinations possible 4# from what's given in each file. 5# See optional args below. 6 7# .template files are in the following format: 8# 9# Possibilities are surrounded by "@(" and ")@" with each choice 10# delimited by "@@". There is currently no escape character, so 11# be careful if you have at-signs or parens in adjacent text. 12# 13# It will output the file for every possible option to fill that spot. 14# If there are multiple places with this, you will get a sort of cartesian 15# product and it's easy to have an explosion in the number of generated files. 16# Note that the replacement spot can cross line boundaries, so you can 17# substitute whole structures. 18# Here's an example, and all the possible lines it can produce: 19# The @(quick@@slow)@ brown fox @(napped@@jumped over the @(lazy@@tired)@ dog)@. 20# 21# The quick brown fox napped. 22# The quick brown fox jumped over the lazy dog. 23# The quick brown fox jumped over the tired dog. 24# The slow brown fox napped. 25# The slow brown fox jumped over the lazy dog. 26# The slow brown fox jumped over the tired dog. 27 28# To avoid a combinatorial explosion when you want the same substitution 29# in several places, you can use back-references via this syntax: 30# @(BACKREF:0)@ 31# The 0 can be any number that has a valid reference. It refers back to the 32# nth (0 == first, 1 == second, etc.) choice block. 33# For example, "@(a@@b@@c)@ @(BACKREF:0)@ @(BACKREF:0)@" would generate just 3 files, 34# with these lines: 35# a a a 36# b b b 37# c c c 38# 39# You can reference any "@(...something...)@" block, unless it's 40# inside another one. Careful because you may produce infinite recursion 41# resulting in an error (circular dependency), 42# such as when a backref refs itself. 43 44# You can also have number ranges. Example: 45# @(NUMRANGE:3:10)@ 46# This produces output with 3, 4, 5, ..., 9. 47# You can optionally have a step. E.g.: 48# @(NUMRANGE:3:10:2)@ 49# ( Gives: 3, 5, 7 and 9 ) 50 51# This program also takes command-line arguments: 52# -d 53# dumps debug data, and shows the number of files it can produce. 54# -n 55# just test syntax, don't actually produce the files 56# 57# The combination "-d -n" is great for just seeing how many files 58# your .template file could produce. 59 60# Initially created on January 13, 2011 61# Jan 18, 2011: Sep now defaults to "@" if not specified. 62# Jan 24, 2011: Added NUMRANGE 63# Jan 25, 2011: Added ability to specify empty strings 64# Mar 2 & 3, 2011: Added "nested" ability ( almost a rewrite ) 65# Apr 7, 2011: Added "-1" option to write everything in one file 66# Apr 11, 2011: Finally checked into repository. 67 68import sys 69import re 70import collections 71from operator import mul 72 73###################################################################### 74# These are "constants", although in the future it might 75# be possible for them to be changed at the beginning of the program. 76 77OPEN_SEP="@(" 78CLOSE_SEP=")@" 79DIVIDER="@@" 80NUMRANGE="NUMRANGE" 81BACKREF="BACKREF" 82 83 84###################################################################### 85def die(msg): 86 print msg 87 sys.exit(1) 88 89###################################################################### 90# A Block is a class that holds all the different possible texts 91# that could be in that area of the file. 92# You call "next()" for it to rotate to the next possibile choice 93# ( which returns True when it's rolled over to its initial state ), 94# and "echo()" to print the current possible choice. 95# "next()" will rotate back to the first choice if you 96# go past the last 97class Block: 98 99 _children = [] 100 101 def echo(self, output_file): 102 raise NotImplementedError 103 104 def next(self): 105 raise NotImplementedError 106 107 def num_possibilities(self): 108 raise NotImplementedError 109 110 def dump(self, indent): 111 raise NotImplementedError 112 113 def is_backrefable_type(self): 114 return False 115 116 def is_backref_type(self): 117 return False 118 119 def get_children(self): 120 return self._children 121 122###################################################################### 123# This merely holds a piece of text 124class TextBlock(Block): 125 126 _text = "" 127 128 def __init__(self, text): 129 self._text = text 130 131 # simply writes its text out 132 def echo(self, output_file): 133 output_file.write( self._text ) 134 135 # This always rotates over, since it only has one choice 136 def next(self): 137 return True 138 139 def num_possibilities(self): 140 return 1 141 142 def dump(self, indent): 143 print( " " * indent + "TextBlock [\"" + self._text + "\"]" ) 144 145###################################################################### 146# This holds a choice of blocks 147class ChoiceBlock(Block): 148 149 _current_choice = 0 150 151 def __init__(self, children): 152 self._children = children 153 154 def echo(self, output_file): 155 self._children[self._current_choice].echo( output_file ) 156 157 def next(self): 158 # next() our current choice, and rotate ourselves if 159 # it rolls over. 160 choice_is_done = self._children[self._current_choice].next() 161 if choice_is_done: 162 self._current_choice += 1 163 if self._current_choice >= len(self._children): 164 self._current_choice = 0 165 return True 166 return False 167 168 def num_possibilities(self): 169 return sum( [ choice.num_possibilities() for choice in self._children ] ) 170 171 def dump(self, indent): 172 print( " " * indent + "ChoiceBlock: [" ) 173 for choice in self._children: 174 choice.dump(indent + 2) 175 print( " " * indent + "]" ) 176 177 def is_backrefable_type(self): 178 return True 179 180###################################################################### 181# This holds a sequence of blocks 182class SequenceBlock(Block): 183 184 def __init__(self, children): 185 self._children = children 186 187 def echo(self, output_file): 188 for part in self._children: 189 part.echo(output_file) 190 191 # increment the parts from right to left until we reach 192 # one that doesn't roll over (This is somewhat akin 193 # to incrementing a number. You increase the digits from 194 # right to left until you no longer have a carry digit ) 195 def next(self): 196 # increment parts until we find one that hasn't rolled over 197 for part in reversed(self._children): 198 if not part.next(): 199 return False 200 # If every part rolled over, then we consider ourselves rolled over 201 return True 202 203 def num_possibilities(self): 204 return reduce( mul, [ \ 205 part.num_possibilities() for part in self._children ], 1 ) 206 207 def dump(self, indent): 208 print( " " * indent + "SequenceBlock: [" ) 209 for part in self._children: 210 part.dump(indent + 2) 211 print( " " * indent + "]" ) 212 213###################################################################### 214# This holds a reference to another block. 215class RefBlock(Block): 216 217 _index = -1 218 _underlying = None 219 220 def __init__(self, index): 221 self._index = index 222 223 def echo(self, output_file): 224 self._underlying.echo(output_file) 225 226 def next(self): 227 # We always roll over, since we consider our value to have 228 # only one choice: whatever the reference Block has 229 return True 230 231 def num_possibilities(self): 232 return 1 233 234 def dump(self, indent): 235 print( " " * indent + "RefBlock: [" ) 236 print( " " * (indent+2) + str(self._index) ) 237 print( " " * indent + "]" ) 238 239 def is_backref_type(self): 240 return True 241 242 # Used by the function that resolves backrefs 243 def get_index(self): 244 return self._index; 245 def set_underlying(self, underlying): 246 self._underlying = underlying 247 248 249###################################################################### 250# This is the highest level, which holds one inner Block. 251class MasterBlock: 252 _file_name_base = None 253 _highest_block = None 254 _next_file_number = 0 # The number of the next file we're going to print 255 256 _output_file = None 257 258 def __init__(self, file_name_base, highest_block): 259 self._file_name_base = file_name_base 260 self._highest_block = highest_block 261 self._next_file_number = 0 262 263 # Print all choices to files 264 def echo_all(self): 265 266 self._echo_master() 267 while not self._highest_block.next(): 268 self._next_file_number += 1 269 self._echo_master() 270 271 self._output_file.close() 272 273 274 # Print our current choice to a file, 275 # including opening and closing files as necessary 276 def _echo_master(self): 277 if "-1" in sys.argv: 278 if not self._output_file: 279 self._output_file = open( self._file_name_base + ".template_expanded", "w" ) 280 else: 281 self._output_file = open( self._file_name_base + "." + \ 282 str(self._next_file_number) + ".test", "w" ) 283 284 # the actual write 285 self._highest_block.echo( self._output_file ) 286 287 if "-1" not in sys.argv: 288 self._output_file.close() 289 290###################################################################### 291# Break the contents of a file into a list of tokens, where 292# a token is "@(", ")@", etc. or just plain text 293def tokenize( file_string ): 294 295 token_pattern = re.escape(OPEN_SEP) + "|" + re.escape(CLOSE_SEP) + "|" + \ 296 re.escape(DIVIDER) + "|" + \ 297 re.escape(BACKREF) + ":[0-9]+" + "|" + \ 298 re.escape(NUMRANGE) + ":[0-9]+:[0-9]+(?::[0-9]+)?" 299 token_list = re.split( "(" + token_pattern + ")", file_string ) 300 301 # remove empty strings from list 302 token_list = [ x for x in token_list if x != "" ] 303 304 return token_list 305 306###################################################################### 307# For debugging: 308def print_deque( a_deque ): 309 310 for x in a_deque: 311 print x 312 313###################################################################### 314# Give it a numrange block and it creates ChoiceBlock 315# representing all the numbers in the sequence 316def create_numrange_block( piece ): 317 318 a_match = re.match( "^" + re.escape(NUMRANGE) + ":([0-9])+:([0-9]+)(:([0-9]+))?" + "$", piece ) 319 if not a_match: 320 die( "This is not a valid NUMRANGE: \"" + piece + "\"" ) 321 322 match_pieces = a_match.groups() 323 324 start = int(match_pieces[0]) 325 stop = int(match_pieces[1]) 326 if match_pieces[3]: 327 step = int(match_pieces[3]) 328 else: 329 step = 1 330 331 _choices = [ TextBlock(str(num)) for num in xrange(start,stop,step) ] 332 return ChoiceBlock( _choices ) 333 334###################################################################### 335# Creates a backref_block from the given pattern 336def create_backref_block( piece ): 337 338 a_match = re.match( "^" + re.escape(BACKREF) + ":([0-9]+)" + "$", piece ) 339 if not a_match: 340 die( "Invalid backref statement: \"" + piece + "\"" ) 341 342 index = int(a_match.group(1)) 343 return RefBlock( index ) 344 345###################################################################### 346# Creates a sequence_block, removing any parts of the token_deque 347# that it used. 348def create_choice_block( token_deque ): 349 350 if len(token_deque) < 1 or token_deque[0] != OPEN_SEP: 351 die( "bug in this program: tried to start a choice at a place that doesn't have an opening separator" ) 352 353 # pop off the opening separator 354 token_deque.popleft() 355 356 choices = [] 357 358 while len(token_deque) > 0: 359 choices.append( create_sequence_block(token_deque) ) 360 361 if len(token_deque) < 1: 362 die("unclosed choice") 363 364 piece = token_deque[0] 365 if piece == DIVIDER: 366 token_deque.popleft() # keep going 367 elif piece == CLOSE_SEP: 368 token_deque.popleft() 369 break 370 else: 371 die( "bug in this program: create_sequence_block " + \ 372 "returned on bad item") 373 374 if len(choices) == 1: 375 return choices[0] 376 else: 377 return ChoiceBlock( choices ) 378 379###################################################################### 380# Creates a sequence_block, removing any parts of the token_deque 381# that it used. 382def create_sequence_block( token_deque ): 383 384 parts = [] 385 386 while len(token_deque) > 0: 387 piece = token_deque[0] 388 if piece == OPEN_SEP: 389 parts.append( create_choice_block( token_deque ) ) 390 elif piece == CLOSE_SEP or piece == DIVIDER: 391 break 392 elif piece.startswith(NUMRANGE): 393 parts.append( create_numrange_block( piece ) ) 394 token_deque.popleft() 395 elif piece.startswith(BACKREF): 396 parts.append( create_backref_block( piece ) ) 397 token_deque.popleft() 398 else: 399 parts.append( TextBlock( token_deque[0] ) ) 400 token_deque.popleft() 401 402 403 if len(parts) == 1: 404 return parts[0] 405 else: 406 return SequenceBlock( parts ) 407 408###################################################################### 409def find_backrefs_recurse( block, backrefs ): 410 411 if block.is_backrefable_type(): 412 backrefs.append( block ) 413 else: 414 for child in block.get_children(): 415 find_backrefs_recurse( child, backrefs ) 416 417###################################################################### 418def resolve_backrefs_recurse( block, backrefs ): 419 420 if block.is_backref_type(): 421 # resolve this one 422 index = block.get_index() 423 if( index < 0 or index >= len(backrefs) ): 424 die( "could not resolve backref: " + str(index) ) 425 else: 426 block.set_underlying( backrefs[index] ) 427 else: 428 for child in block.get_children(): 429 resolve_backrefs_recurse( child, backrefs ) 430 431###################################################################### 432def resolve_backrefs( highest_block ): 433 434 backrefs = [] 435 436 # recurse downwards to determine all Blocks that 437 # can be used by RefBlocks 438 find_backrefs_recurse( highest_block, backrefs ) 439 440 # now find all the RefBlocks and assign them 441 resolve_backrefs_recurse( highest_block, backrefs ) 442 443###################################################################### 444def process_file(file_name): 445 446 file_name_base = file_name.replace(".template", "") 447 448 # slurp the entire file into one string (file_string) 449 file_handle = open( file_name, "r" ) 450 file_string = file_handle.read() 451 file_handle.close() 452 453 # Divide the text into tokens 454 token_list = tokenize( file_string ) 455 456 # create the master_block and use it to print all 457 # possibilities 458 token_deque = collections.deque(token_list) 459 highest_block = create_sequence_block( token_deque ) 460 if len(token_deque) != 0: 461 die( "stray token such as a divider or separator at the top level" ) 462 463 resolve_backrefs( highest_block ) 464 if "-d" in sys.argv: 465 highest_block.dump(0) 466 print("total files potentially created: " + \ 467 str(highest_block.num_possibilities()) ) 468 master_block = MasterBlock( file_name_base, highest_block ) 469 470 if not "-n" in sys.argv: 471 master_block.echo_all() 472 473###################################################################### 474def main(): 475 476 # load args (and direct derivatives of args) 477 if len(sys.argv) < 2: 478 die( "Specify the files on which to perform substitution" ) 479 480 # process each file 481 for file_name in sys.argv[1:]: 482 if not file_name.startswith("-"): 483 process_file( file_name ) 484 485###################################################################### 486main() 487