1#########################################################################
2#                                                                       #
3#                                                                       #
4#   copyright 2002 Paul Henry Tremblay                                  #
5#                                                                       #
6#   This program is distributed in the hope that it will be useful,     #
7#   but WITHOUT ANY WARRANTY; without even the implied warranty of      #
8#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU    #
9#   General Public License for more details.                            #
10#                                                                       #
11#                                                                       #
12#########################################################################
13import sys, os, re
14
15from calibre.ebooks.rtf2xml import copy
16from calibre.ptempfile import better_mktemp
17from . import open_for_read, open_for_write
18
19
20class Info:
21    """
22    Make tags for document-information
23    """
24
25    def __init__(self,
26            in_file,
27            bug_handler,
28            copy=None,
29            run_level=1,
30            ):
31        """
32        Required:
33            'file'--file to parse
34        Optional:
35            'copy'-- whether to make a copy of result for debugging
36            'temp_dir' --where to output temporary results (default is
37            directory from which the script is run.)
38        Returns:
39            nothing
40            """
41        self.__file = in_file
42        self.__bug_handler = bug_handler
43        self.__copy = copy
44        self.__run_level = run_level
45        self.__write_to = better_mktemp()
46
47    def __initiate_values(self):
48        """
49        Initiate all values.
50        """
51        self.__text_string = ''
52        self.__state = 'before_info_table'
53        self.rmspace = re.compile(r'\s+')
54        self.__state_dict = {
55        'before_info_table': self.__before_info_table_func,
56        'after_info_table': self.__after_info_table_func,
57        'in_info_table'    : self.__in_info_table_func,
58        'collect_text'      : self.__collect_text_func,
59        'collect_tokens'      : self.__collect_tokens_func,
60        }
61        self.__info_table_dict = {
62        'cw<di<title_____'  : (self.__found_tag_with_text_func, 'title'),
63        'cw<di<author____'  : (self.__found_tag_with_text_func, 'author'),
64        'cw<di<operator__'  : (self.__found_tag_with_text_func, 'operator'),
65        'cw<di<manager___'  : (self.__found_tag_with_text_func, 'manager'),
66        'cw<di<company___'  : (self.__found_tag_with_text_func, 'company'),
67        'cw<di<keywords__'  : (self.__found_tag_with_text_func, 'keywords'),
68        'cw<di<category__'  : (self.__found_tag_with_text_func, 'category'),
69        'cw<di<doc-notes_'  : (self.__found_tag_with_text_func, 'doc-notes'),
70        'cw<di<subject___'  : (self.__found_tag_with_text_func, 'subject'),
71        'cw<di<linkbase__'  : (self.__found_tag_with_text_func, 'hyperlink-base'),
72
73        'cw<di<create-tim'  : (self.__found_tag_with_tokens_func, 'creation-time'),
74        'cw<di<revis-time'  : (self.__found_tag_with_tokens_func, 'revision-time'),
75        'cw<di<print-time'  : (self.__found_tag_with_tokens_func, 'printing-time'),
76        'cw<di<backuptime'  : (self.__found_tag_with_tokens_func, 'backup-time'),
77
78        'cw<di<num-of-wor'  : (self.__single_field_func, 'number-of-words'),
79        'cw<di<num-of-chr'  : (self.__single_field_func, 'number-of-characters'),
80        'cw<di<numofchrws'  : (self.__single_field_func, 'number-of-characters-without-space'),
81        'cw<di<num-of-pag'  : (self.__single_field_func, 'number-of-pages'),
82        'cw<di<version___'  : (self.__single_field_func, 'version'),
83        'cw<di<edit-time_'  : (self.__single_field_func, 'editing-time'),
84        'cw<di<intern-ver'  : (self.__single_field_func, 'internal-version-number'),
85        'cw<di<internalID'  : (self.__single_field_func, 'internal-id-number'),
86        }
87        self.__token_dict = {
88        'year______'        : 'year',
89        'month_____'        : 'month',
90        'day_______'        : 'day',
91        'minute____'        : 'minute',
92        'second____'        : 'second',
93        'revis-time'        : 'revision-time',
94        'create-tim'        : 'creation-time',
95        'edit-time_'        : 'editing-time',
96        'print-time'        : 'printing-time',
97        'backuptime'        : 'backup-time',
98        'num-of-wor'        : 'number-of-words',
99        'num-of-chr'        : 'number-of-characters',
100        'numofchrws'        : 'number-of-characters-without-space',
101        'num-of-pag'        : 'number-of-pages',
102        'version___'        : 'version',
103        'intern-ver'        : 'internal-version-number',
104        'internalID'        : 'internal-id-number',
105        }
106
107    def __before_info_table_func(self, line):
108        """
109        Required:
110            line -- the line to parse
111        Returns:
112            nothing
113        Logic:
114            Check for the beginning of the information table. When found, set
115            the state to the information table. Always write the line.
116        """
117        if self.__token_info == 'mi<mk<doc-in-beg':
118            self.__state = 'in_info_table'
119        self.__write_obj.write(line)
120
121    def __in_info_table_func(self, line):
122        """
123        Requires:
124            line -- line to parse
125        Returns:
126            nothing.
127        Logic:
128            Check for the end of information. If not found, check if the
129            token has a special value in the info table dictionary. If it
130            does, execute that function.
131            Otherwise, output the line to the file.
132        """
133        if self.__token_info == 'mi<mk<doc-in-end':
134            self.__state = 'after_info_table'
135        else:
136            action, tag = self.__info_table_dict.get(self.__token_info, (None, None))
137            if action:
138                action(line, tag)
139            else:
140                self.__write_obj.write(line)
141
142    def __found_tag_with_text_func(self, line, tag):
143        """
144        Requires:
145            line -- line to parse
146            tag --what kind of line
147        Returns:
148            nothing
149        Logic:
150            This function marks the beginning of information fields that have
151            text that must be collected.  Set the type of information field
152            with the tag option. Set the state to collecting text
153        """
154        self.__tag = tag
155        self.__state = 'collect_text'
156
157    def __collect_text_func(self, line):
158        """
159        Requires:
160            line -- line to parse
161        Returns:
162            nothing
163        Logic:
164            If the end of the information field is found, write the text
165            string to the file.
166            Otherwise, if the line contains text, add it to the text string.
167        """
168        if self.__token_info == 'mi<mk<docinf-end':
169            self.__state = 'in_info_table'
170            # Don't print empty tags
171            if len(self.rmspace.sub('',self.__text_string)):
172                self.__write_obj.write(
173                    'mi<tg<open______<%s\n'
174                    'tx<nu<__________<%s\n'
175                    'mi<tg<close_____<%s\n' % (self.__tag, self.__text_string, self.__tag)
176                )
177            self.__text_string = ''
178        elif line[0:2] == 'tx':
179            self.__text_string += line[17:-1]
180
181    def __found_tag_with_tokens_func(self, line, tag):
182        """
183        Requires:
184            line -- line to parse
185            tag -- type of field
186        Returns:
187            nothing
188        Logic:
189            Some fields have a series of tokens (cw<di<year______<nu<2003)
190            that must be parsed as attributes for the element.
191            Set the state to collect tokesn, and set the text string to
192            start an empty element with attributes.
193        """
194        self.__state = 'collect_tokens'
195        self.__text_string = 'mi<tg<empty-att_<%s' % tag
196        # mi<tg<empty-att_<page-definition<margin>33\n
197
198    def __collect_tokens_func(self, line):
199        """
200        Requires:
201            line -- line to parse
202        Returns:
203            nothing
204        Logic:
205            This function collects all the token information and adds it to
206            the text string until the end of the field is found.
207            First check of the end of the information field. If found, write
208            the text string to the file.
209            If not found, get the relevant information from the text string.
210            This information cannot be directly added to the text string,
211            because it exists in abbreviated form.  (num-of-wor)
212            I want to check this information in a dictionary to convert it
213            to a longer, readable form. If the key does not exist in the
214            dictionary, print out an error message. Otherwise add the value
215            to the text string.
216            (num-of-wor => number-of-words)
217        """
218        # cw<di<year______<nu<2003
219        if self.__token_info == 'mi<mk<docinf-end':
220            self.__state = 'in_info_table'
221            self.__write_obj.write(
222            '%s\n' % self.__text_string
223            )
224            self.__text_string = ''
225        else:
226            att = line[6:16]
227            value = line[20:-1]
228            att_changed = self.__token_dict.get(att)
229            if att_changed is None:
230                if self.__run_level > 3:
231                    msg = 'No dictionary match for %s\n' % att
232                    raise self.__bug_handler(msg)
233            else:
234                self.__text_string += '<%s>%s' % (att_changed, value)
235
236    def __single_field_func(self, line, tag):
237        value = line[20:-1]
238        self.__write_obj.write(
239        'mi<tg<empty-att_<%s<%s>%s\n' % (tag, tag, value)
240        )
241
242    def __after_info_table_func(self, line):
243        """
244        Requires:
245            line --line to write to file
246        Returns:
247            nothing
248        Logic:
249            After the end of the information table, simple write the line to
250            the file.
251        """
252        self.__write_obj.write(line)
253
254    def fix_info(self):
255        """
256        Requires:
257            nothing
258        Returns:
259            nothing (changes the original file)
260        Logic:
261            Read one line in at a time. Determine what action to take based on
262            the state. If the state is before the information table, look for the
263            beginning of the style table.
264            If the state is in the information table, use other methods to
265            parse the information
266            style table, look for lines with style info, and substitute the
267            number with the name of the style.  If the state if after the
268            information table, simply write the line to the output file.
269        """
270        self.__initiate_values()
271        with open_for_read(self.__file) as read_obj:
272            with open_for_write(self.__write_to) as self.__write_obj:
273                for line in read_obj:
274                    self.__token_info = line[:16]
275                    action = self.__state_dict.get(self.__state)
276                    if action is None:
277                        sys.stderr.write('No matching state in module styles.py\n')
278                        sys.stderr.write(self.__state + '\n')
279                    action(line)
280        copy_obj = copy.Copy(bug_handler=self.__bug_handler)
281        if self.__copy:
282            copy_obj.copy_file(self.__write_to, "info.data")
283        copy_obj.rename(self.__write_to, self.__file)
284        os.remove(self.__write_to)
285