1"""Conversion pipeline templates.
2
3The problem:
4------------
5
6Suppose you have some data that you want to convert to another format,
7such as from GIF image format to PPM image format.  Maybe the
8conversion involves several steps (e.g. piping it through compress or
9uuencode).  Some of the conversion steps may require that their input
10is a disk file, others may be able to read standard input; similar for
11their output.  The input to the entire conversion may also be read
12from a disk file or from an open file, and similar for its output.
13
14The module lets you construct a pipeline template by sticking one or
15more conversion steps together.  It will take care of creating and
16removing temporary files if they are necessary to hold intermediate
17data.  You can then use the template to do conversions from many
18different sources to many different destinations.  The temporary
19file names used are different each time the template is used.
20
21The templates are objects so you can create templates for many
22different conversion steps and store them in a dictionary, for
23instance.
24
25
26Directions:
27-----------
28
29To create a template:
30    t = Template()
31
32To add a conversion step to a template:
33   t.append(command, kind)
34where kind is a string of two characters: the first is '-' if the
35command reads its standard input or 'f' if it requires a file; the
36second likewise for the output. The command must be valid /bin/sh
37syntax.  If input or output files are required, they are passed as
38$IN and $OUT; otherwise, it must be  possible to use the command in
39a pipeline.
40
41To add a conversion step at the beginning:
42   t.prepend(command, kind)
43
44To convert a file to another file using a template:
45  sts = t.copy(infile, outfile)
46If infile or outfile are the empty string, standard input is read or
47standard output is written, respectively.  The return value is the
48exit status of the conversion pipeline.
49
50To open a file for reading or writing through a conversion pipeline:
51   fp = t.open(file, mode)
52where mode is 'r' to read the file, or 'w' to write it -- just like
53for the built-in function open() or for os.popen().
54
55To create a new template object initialized to a given one:
56   t2 = t.clone()
57"""                                     # '
58
59
60import re
61import os
62import tempfile
63# we import the quote function rather than the module for backward compat
64# (quote used to be an undocumented but used function in pipes)
65from shlex import quote
66
67__all__ = ["Template"]
68
69# Conversion step kinds
70
71FILEIN_FILEOUT = 'ff'                   # Must read & write real files
72STDIN_FILEOUT  = '-f'                   # Must write a real file
73FILEIN_STDOUT  = 'f-'                   # Must read a real file
74STDIN_STDOUT   = '--'                   # Normal pipeline element
75SOURCE         = '.-'                   # Must be first, writes stdout
76SINK           = '-.'                   # Must be last, reads stdin
77
78stepkinds = [FILEIN_FILEOUT, STDIN_FILEOUT, FILEIN_STDOUT, STDIN_STDOUT, \
79             SOURCE, SINK]
80
81
82class Template:
83    """Class representing a pipeline template."""
84
85    def __init__(self):
86        """Template() returns a fresh pipeline template."""
87        self.debugging = 0
88        self.reset()
89
90    def __repr__(self):
91        """t.__repr__() implements repr(t)."""
92        return '<Template instance, steps=%r>' % (self.steps,)
93
94    def reset(self):
95        """t.reset() restores a pipeline template to its initial state."""
96        self.steps = []
97
98    def clone(self):
99        """t.clone() returns a new pipeline template with identical
100        initial state as the current one."""
101        t = Template()
102        t.steps = self.steps[:]
103        t.debugging = self.debugging
104        return t
105
106    def debug(self, flag):
107        """t.debug(flag) turns debugging on or off."""
108        self.debugging = flag
109
110    def append(self, cmd, kind):
111        """t.append(cmd, kind) adds a new step at the end."""
112        if type(cmd) is not type(''):
113            raise TypeError('Template.append: cmd must be a string')
114        if kind not in stepkinds:
115            raise ValueError('Template.append: bad kind %r' % (kind,))
116        if kind == SOURCE:
117            raise ValueError('Template.append: SOURCE can only be prepended')
118        if self.steps and self.steps[-1][1] == SINK:
119            raise ValueError('Template.append: already ends with SINK')
120        if kind[0] == 'f' and not re.search(r'\$IN\b', cmd):
121            raise ValueError('Template.append: missing $IN in cmd')
122        if kind[1] == 'f' and not re.search(r'\$OUT\b', cmd):
123            raise ValueError('Template.append: missing $OUT in cmd')
124        self.steps.append((cmd, kind))
125
126    def prepend(self, cmd, kind):
127        """t.prepend(cmd, kind) adds a new step at the front."""
128        if type(cmd) is not type(''):
129            raise TypeError('Template.prepend: cmd must be a string')
130        if kind not in stepkinds:
131            raise ValueError('Template.prepend: bad kind %r' % (kind,))
132        if kind == SINK:
133            raise ValueError('Template.prepend: SINK can only be appended')
134        if self.steps and self.steps[0][1] == SOURCE:
135            raise ValueError('Template.prepend: already begins with SOURCE')
136        if kind[0] == 'f' and not re.search(r'\$IN\b', cmd):
137            raise ValueError('Template.prepend: missing $IN in cmd')
138        if kind[1] == 'f' and not re.search(r'\$OUT\b', cmd):
139            raise ValueError('Template.prepend: missing $OUT in cmd')
140        self.steps.insert(0, (cmd, kind))
141
142    def open(self, file, rw):
143        """t.open(file, rw) returns a pipe or file object open for
144        reading or writing; the file is the other end of the pipeline."""
145        if rw == 'r':
146            return self.open_r(file)
147        if rw == 'w':
148            return self.open_w(file)
149        raise ValueError('Template.open: rw must be \'r\' or \'w\', not %r'
150                         % (rw,))
151
152    def open_r(self, file):
153        """t.open_r(file) and t.open_w(file) implement
154        t.open(file, 'r') and t.open(file, 'w') respectively."""
155        if not self.steps:
156            return open(file, 'r')
157        if self.steps[-1][1] == SINK:
158            raise ValueError('Template.open_r: pipeline ends width SINK')
159        cmd = self.makepipeline(file, '')
160        return os.popen(cmd, 'r')
161
162    def open_w(self, file):
163        if not self.steps:
164            return open(file, 'w')
165        if self.steps[0][1] == SOURCE:
166            raise ValueError('Template.open_w: pipeline begins with SOURCE')
167        cmd = self.makepipeline('', file)
168        return os.popen(cmd, 'w')
169
170    def copy(self, infile, outfile):
171        return os.system(self.makepipeline(infile, outfile))
172
173    def makepipeline(self, infile, outfile):
174        cmd = makepipeline(infile, self.steps, outfile)
175        if self.debugging:
176            print(cmd)
177            cmd = 'set -x; ' + cmd
178        return cmd
179
180
181def makepipeline(infile, steps, outfile):
182    # Build a list with for each command:
183    # [input filename or '', command string, kind, output filename or '']
184
185    list = []
186    for cmd, kind in steps:
187        list.append(['', cmd, kind, ''])
188    #
189    # Make sure there is at least one step
190    #
191    if not list:
192        list.append(['', 'cat', '--', ''])
193    #
194    # Take care of the input and output ends
195    #
196    [cmd, kind] = list[0][1:3]
197    if kind[0] == 'f' and not infile:
198        list.insert(0, ['', 'cat', '--', ''])
199    list[0][0] = infile
200    #
201    [cmd, kind] = list[-1][1:3]
202    if kind[1] == 'f' and not outfile:
203        list.append(['', 'cat', '--', ''])
204    list[-1][-1] = outfile
205    #
206    # Invent temporary files to connect stages that need files
207    #
208    garbage = []
209    for i in range(1, len(list)):
210        lkind = list[i-1][2]
211        rkind = list[i][2]
212        if lkind[1] == 'f' or rkind[0] == 'f':
213            (fd, temp) = tempfile.mkstemp()
214            os.close(fd)
215            garbage.append(temp)
216            list[i-1][-1] = list[i][0] = temp
217    #
218    for item in list:
219        [inf, cmd, kind, outf] = item
220        if kind[1] == 'f':
221            cmd = 'OUT=' + quote(outf) + '; ' + cmd
222        if kind[0] == 'f':
223            cmd = 'IN=' + quote(inf) + '; ' + cmd
224        if kind[0] == '-' and inf:
225            cmd = cmd + ' <' + quote(inf)
226        if kind[1] == '-' and outf:
227            cmd = cmd + ' >' + quote(outf)
228        item[1] = cmd
229    #
230    cmdlist = list[0][1]
231    for item in list[1:]:
232        [cmd, kind] = item[1:3]
233        if item[0] == '':
234            if 'f' in kind:
235                cmd = '{ ' + cmd + '; }'
236            cmdlist = cmdlist + ' |\n' + cmd
237        else:
238            cmdlist = cmdlist + '\n' + cmd
239    #
240    if garbage:
241        rmcmd = 'rm -f'
242        for file in garbage:
243            rmcmd = rmcmd + ' ' + quote(file)
244        trapcmd = 'trap ' + quote(rmcmd + '; exit') + ' 1 2 3 13 14 15'
245        cmdlist = trapcmd + '\n' + cmdlist + '\n' + rmcmd
246    #
247    return cmdlist
248