1#!/usr/bin/python
2
3# Usage: Just specify files and it will output all the combinations possible
4# from what's given in each file.
5# See optional args below.
6
7# .template files are in the following format:
8#
9# Possibilities are surrounded by "@(" and ")@" with each choice
10# delimited by "@@".  There is currently no escape character, so
11# be careful if you have at-signs or parens in adjacent text.
12#
13# It will output the file for every possible option to fill that spot.
14# If there are multiple places with this, you will get a sort of cartesian
15# product and it's easy to have an explosion in the number of generated files.
16# Note that the replacement spot can cross line boundaries, so you can
17# substitute whole structures.
18# Here's an example, and all the possible lines it can produce:
19# The @(quick@@slow)@ brown fox @(napped@@jumped over the @(lazy@@tired)@ dog)@.
20#
21# The quick brown fox napped.
22# The quick brown fox jumped over the lazy dog.
23# The quick brown fox jumped over the tired dog.
24# The slow brown fox napped.
25# The slow brown fox jumped over the lazy dog.
26# The slow brown fox jumped over the tired dog.
27
28# To avoid a combinatorial explosion when you want the same substitution
29# in several places, you can use back-references via this syntax:
30# @(BACKREF:0)@
31# The 0 can be any number that has a valid reference.  It refers back to the
32# nth (0 == first, 1 == second, etc.) choice block.
33# For example, "@(a@@b@@c)@ @(BACKREF:0)@ @(BACKREF:0)@" would generate just 3 files,
34# with these lines:
35# a a a
36# b b b
37# c c c
38#
39# You can reference any "@(...something...)@" block, unless it's
40# inside another one.  Careful because you may produce infinite recursion
41# resulting in an error (circular dependency),
42# such as when a backref refs itself.
43
44# You can also have number ranges. Example:
45# @(NUMRANGE:3:10)@
46# This produces output with 3, 4, 5, ..., 9.
47# You can optionally have a step.  E.g.:
48# @(NUMRANGE:3:10:2)@
49# ( Gives: 3, 5, 7 and 9 )
50
51# This program also takes command-line arguments:
52# -d
53#     dumps debug data, and shows the number of files it can produce.
54# -n
55#     just test syntax, don't actually produce the files
56#
57# The combination "-d -n" is great for just seeing how many files
58# your .template file could produce.
59
60# Initially created on January 13, 2011
61# Jan 18, 2011: Sep now defaults to "@" if not specified.
62# Jan 24, 2011: Added NUMRANGE
63# Jan 25, 2011: Added ability to specify empty strings
64# Mar 2 & 3, 2011: Added "nested" ability ( almost a rewrite )
65# Apr 7, 2011: Added "-1" option to write everything in one file
66# Apr 11, 2011: Finally checked into repository.
67
68import sys
69import re
70import collections
71from operator import mul
72
73######################################################################
74# These are "constants", although in the future it might
75# be possible for them to be changed at the beginning of the program.
76
77OPEN_SEP="@("
78CLOSE_SEP=")@"
79DIVIDER="@@"
80NUMRANGE="NUMRANGE"
81BACKREF="BACKREF"
82
83
84######################################################################
85def die(msg):
86    print msg
87    sys.exit(1)
88
89######################################################################
90# A Block is a class that holds all the different possible texts
91# that could be in that area of the file.
92# You call "next()" for it to rotate to the next possibile choice
93# ( which returns True when it's rolled over to its initial state ),
94# and "echo()" to print the current possible choice.
95# "next()" will rotate back to the first choice if you
96# go past the last
97class Block:
98
99    _children = []
100
101    def echo(self, output_file):
102        raise NotImplementedError
103
104    def next(self):
105        raise NotImplementedError
106
107    def num_possibilities(self):
108        raise NotImplementedError
109
110    def dump(self, indent):
111        raise NotImplementedError
112
113    def is_backrefable_type(self):
114        return False
115
116    def is_backref_type(self):
117        return False
118
119    def get_children(self):
120        return self._children
121
122######################################################################
123# This merely holds a piece of text
124class TextBlock(Block):
125
126    _text = ""
127
128    def __init__(self, text):
129        self._text = text
130
131    # simply writes its text out
132    def echo(self, output_file):
133        output_file.write( self._text )
134
135    # This always rotates over, since it only has one choice
136    def next(self):
137        return True
138
139    def num_possibilities(self):
140        return 1
141
142    def dump(self, indent):
143        print( " " * indent + "TextBlock [\"" + self._text + "\"]" )
144
145######################################################################
146# This holds a choice of blocks
147class ChoiceBlock(Block):
148
149    _current_choice = 0
150
151    def __init__(self, children):
152        self._children = children
153
154    def echo(self, output_file):
155        self._children[self._current_choice].echo( output_file )
156
157    def next(self):
158        # next() our current choice, and rotate ourselves if
159        # it rolls over.
160        choice_is_done = self._children[self._current_choice].next()
161        if choice_is_done:
162            self._current_choice += 1
163            if self._current_choice >= len(self._children):
164                self._current_choice = 0
165                return True
166        return False
167
168    def num_possibilities(self):
169        return sum( [ choice.num_possibilities() for choice in self._children ] )
170
171    def dump(self, indent):
172        print( " " * indent + "ChoiceBlock: [" )
173        for choice in self._children:
174            choice.dump(indent + 2)
175        print( " " * indent + "]" )
176
177    def is_backrefable_type(self):
178        return True
179
180######################################################################
181# This holds a sequence of blocks
182class SequenceBlock(Block):
183
184    def __init__(self, children):
185        self._children = children
186
187    def echo(self, output_file):
188        for part in self._children:
189            part.echo(output_file)
190
191    # increment the parts from right to left until we reach
192    # one that doesn't roll over (This is somewhat akin
193    # to incrementing a number.  You increase the digits from
194    # right to left until you no longer have a carry digit )
195    def next(self):
196        # increment parts until we find one that hasn't rolled over
197        for part in reversed(self._children):
198            if not part.next():
199                return False
200        # If every part rolled over, then we consider ourselves rolled over
201        return True
202
203    def num_possibilities(self):
204        return reduce( mul, [ \
205                part.num_possibilities() for part in self._children ], 1 )
206
207    def dump(self, indent):
208        print( " " * indent + "SequenceBlock: [" )
209        for part in self._children:
210            part.dump(indent + 2)
211        print( " " * indent + "]" )
212
213######################################################################
214# This holds a reference to another block.
215class RefBlock(Block):
216
217    _index = -1
218    _underlying = None
219
220    def __init__(self, index):
221        self._index = index
222
223    def echo(self, output_file):
224        self._underlying.echo(output_file)
225
226    def next(self):
227        # We always roll over, since we consider our value to have
228        # only one choice: whatever the reference Block has
229        return True
230
231    def num_possibilities(self):
232        return 1
233
234    def dump(self, indent):
235        print( " " * indent + "RefBlock: [" )
236        print( " " * (indent+2) + str(self._index) )
237        print( " " * indent + "]" )
238
239    def is_backref_type(self):
240        return True
241
242    # Used by the function that resolves backrefs
243    def get_index(self):
244        return self._index;
245    def set_underlying(self, underlying):
246        self._underlying = underlying
247
248
249######################################################################
250# This is the highest level, which holds one inner Block.
251class MasterBlock:
252    _file_name_base = None
253    _highest_block = None
254    _next_file_number = 0 # The number of the next file we're going to print
255
256    _output_file = None
257
258    def __init__(self, file_name_base, highest_block):
259        self._file_name_base = file_name_base
260        self._highest_block = highest_block
261        self._next_file_number = 0
262
263    # Print all choices to files
264    def echo_all(self):
265
266        self._echo_master()
267        while not self._highest_block.next():
268            self._next_file_number += 1
269            self._echo_master()
270
271        self._output_file.close()
272
273
274    # Print our current choice to a file,
275    # including opening and closing files as necessary
276    def _echo_master(self):
277        if "-1" in sys.argv:
278            if not self._output_file:
279                self._output_file = open( self._file_name_base + ".template_expanded", "w" )
280        else:
281            self._output_file = open( self._file_name_base + "." + \
282                                str(self._next_file_number) + ".test", "w" )
283
284        # the actual write
285        self._highest_block.echo( self._output_file )
286
287        if "-1" not in sys.argv:
288            self._output_file.close()
289
290######################################################################
291# Break the contents of a file into a list of tokens, where
292# a token is "@(", ")@", etc. or just plain text
293def tokenize( file_string ):
294
295    token_pattern = re.escape(OPEN_SEP) + "|" + re.escape(CLOSE_SEP) + "|" + \
296        re.escape(DIVIDER) + "|" + \
297        re.escape(BACKREF) + ":[0-9]+" + "|" + \
298        re.escape(NUMRANGE) + ":[0-9]+:[0-9]+(?::[0-9]+)?"
299    token_list = re.split( "(" + token_pattern + ")", file_string )
300
301    # remove empty strings from list
302    token_list = [ x for x in token_list if x != "" ]
303
304    return token_list
305
306######################################################################
307# For debugging:
308def print_deque( a_deque ):
309
310    for x in a_deque:
311        print x
312
313######################################################################
314# Give it a numrange block and it creates ChoiceBlock
315# representing all the numbers in the sequence
316def create_numrange_block( piece ):
317
318    a_match = re.match( "^" + re.escape(NUMRANGE) + ":([0-9])+:([0-9]+)(:([0-9]+))?" + "$", piece )
319    if not a_match:
320        die( "This is not a valid NUMRANGE: \"" + piece + "\"" )
321
322    match_pieces = a_match.groups()
323
324    start = int(match_pieces[0])
325    stop  = int(match_pieces[1])
326    if match_pieces[3]:
327        step = int(match_pieces[3])
328    else:
329        step = 1
330
331    _choices = [ TextBlock(str(num)) for num in xrange(start,stop,step) ]
332    return ChoiceBlock( _choices )
333
334######################################################################
335# Creates a backref_block from the given pattern
336def create_backref_block( piece ):
337
338    a_match = re.match( "^" + re.escape(BACKREF) + ":([0-9]+)" + "$", piece )
339    if not a_match:
340        die( "Invalid backref statement: \"" + piece + "\"" )
341
342    index = int(a_match.group(1))
343    return RefBlock( index )
344
345######################################################################
346# Creates a sequence_block, removing any parts of the token_deque
347# that it used.
348def create_choice_block( token_deque ):
349
350    if len(token_deque) < 1 or token_deque[0] != OPEN_SEP:
351        die( "bug in this program: tried to start a choice at a place that doesn't have an opening separator" )
352
353    # pop off the opening separator
354    token_deque.popleft()
355
356    choices = []
357
358    while len(token_deque) > 0:
359        choices.append( create_sequence_block(token_deque) )
360
361        if len(token_deque) < 1:
362            die("unclosed choice")
363
364        piece = token_deque[0]
365        if piece == DIVIDER:
366            token_deque.popleft() # keep going
367        elif piece == CLOSE_SEP:
368            token_deque.popleft()
369            break
370        else:
371            die( "bug in this program: create_sequence_block " + \
372                     "returned on bad item")
373
374    if len(choices) == 1:
375        return choices[0]
376    else:
377        return ChoiceBlock( choices )
378
379######################################################################
380# Creates a sequence_block, removing any parts of the token_deque
381# that it used.
382def create_sequence_block( token_deque ):
383
384    parts = []
385
386    while len(token_deque) > 0:
387        piece = token_deque[0]
388        if piece == OPEN_SEP:
389            parts.append( create_choice_block( token_deque ) )
390        elif piece == CLOSE_SEP or piece == DIVIDER:
391            break
392        elif piece.startswith(NUMRANGE):
393            parts.append( create_numrange_block( piece ) )
394            token_deque.popleft()
395        elif piece.startswith(BACKREF):
396            parts.append( create_backref_block( piece ) )
397            token_deque.popleft()
398        else:
399            parts.append( TextBlock( token_deque[0] ) )
400            token_deque.popleft()
401
402
403    if len(parts) == 1:
404        return parts[0]
405    else:
406        return SequenceBlock( parts )
407
408######################################################################
409def find_backrefs_recurse( block, backrefs ):
410
411    if block.is_backrefable_type():
412        backrefs.append( block )
413    else:
414        for child in block.get_children():
415            find_backrefs_recurse( child, backrefs )
416
417######################################################################
418def resolve_backrefs_recurse( block, backrefs ):
419
420    if block.is_backref_type():
421        # resolve this one
422        index = block.get_index()
423        if( index < 0 or index >= len(backrefs) ):
424            die( "could not resolve backref: " + str(index) )
425        else:
426            block.set_underlying( backrefs[index] )
427    else:
428        for child in block.get_children():
429            resolve_backrefs_recurse( child, backrefs )
430
431######################################################################
432def resolve_backrefs( highest_block ):
433
434    backrefs = []
435
436    # recurse downwards to determine all Blocks that
437    # can be used by RefBlocks
438    find_backrefs_recurse( highest_block, backrefs )
439
440    # now find all the RefBlocks and assign them
441    resolve_backrefs_recurse( highest_block, backrefs )
442
443######################################################################
444def process_file(file_name):
445
446    file_name_base = file_name.replace(".template", "")
447
448    # slurp the entire file into one string (file_string)
449    file_handle = open( file_name, "r" )
450    file_string = file_handle.read()
451    file_handle.close()
452
453    # Divide the text into tokens
454    token_list = tokenize( file_string )
455
456    # create the master_block and use it to print all
457    # possibilities
458    token_deque = collections.deque(token_list)
459    highest_block = create_sequence_block( token_deque )
460    if len(token_deque) != 0:
461        die( "stray token such as a divider or separator at the top level" )
462
463    resolve_backrefs( highest_block )
464    if "-d" in sys.argv:
465        highest_block.dump(0)
466        print("total files potentially created: " + \
467                  str(highest_block.num_possibilities()) )
468    master_block = MasterBlock( file_name_base, highest_block )
469
470    if not "-n" in sys.argv:
471        master_block.echo_all()
472
473######################################################################
474def main():
475
476    # load args (and direct derivatives of args)
477    if len(sys.argv) < 2:
478        die( "Specify the files on which to perform substitution" )
479
480    # process each file
481    for file_name in sys.argv[1:]:
482        if not file_name.startswith("-"):
483            process_file( file_name )
484
485######################################################################
486main()
487