1######################################################################### 2# # 3# # 4# copyright 2002 Paul Henry Tremblay # 5# # 6# This program is distributed in the hope that it will be useful, # 7# but WITHOUT ANY WARRANTY; without even the implied warranty of # 8# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # 9# General Public License for more details. # 10# # 11# # 12######################################################################### 13import sys, os, re 14 15from calibre.ebooks.rtf2xml import copy 16from calibre.ptempfile import better_mktemp 17from . import open_for_read, open_for_write 18 19 20class Info: 21 """ 22 Make tags for document-information 23 """ 24 25 def __init__(self, 26 in_file, 27 bug_handler, 28 copy=None, 29 run_level=1, 30 ): 31 """ 32 Required: 33 'file'--file to parse 34 Optional: 35 'copy'-- whether to make a copy of result for debugging 36 'temp_dir' --where to output temporary results (default is 37 directory from which the script is run.) 38 Returns: 39 nothing 40 """ 41 self.__file = in_file 42 self.__bug_handler = bug_handler 43 self.__copy = copy 44 self.__run_level = run_level 45 self.__write_to = better_mktemp() 46 47 def __initiate_values(self): 48 """ 49 Initiate all values. 50 """ 51 self.__text_string = '' 52 self.__state = 'before_info_table' 53 self.rmspace = re.compile(r'\s+') 54 self.__state_dict = { 55 'before_info_table': self.__before_info_table_func, 56 'after_info_table': self.__after_info_table_func, 57 'in_info_table' : self.__in_info_table_func, 58 'collect_text' : self.__collect_text_func, 59 'collect_tokens' : self.__collect_tokens_func, 60 } 61 self.__info_table_dict = { 62 'cw<di<title_____' : (self.__found_tag_with_text_func, 'title'), 63 'cw<di<author____' : (self.__found_tag_with_text_func, 'author'), 64 'cw<di<operator__' : (self.__found_tag_with_text_func, 'operator'), 65 'cw<di<manager___' : (self.__found_tag_with_text_func, 'manager'), 66 'cw<di<company___' : (self.__found_tag_with_text_func, 'company'), 67 'cw<di<keywords__' : (self.__found_tag_with_text_func, 'keywords'), 68 'cw<di<category__' : (self.__found_tag_with_text_func, 'category'), 69 'cw<di<doc-notes_' : (self.__found_tag_with_text_func, 'doc-notes'), 70 'cw<di<subject___' : (self.__found_tag_with_text_func, 'subject'), 71 'cw<di<linkbase__' : (self.__found_tag_with_text_func, 'hyperlink-base'), 72 73 'cw<di<create-tim' : (self.__found_tag_with_tokens_func, 'creation-time'), 74 'cw<di<revis-time' : (self.__found_tag_with_tokens_func, 'revision-time'), 75 'cw<di<print-time' : (self.__found_tag_with_tokens_func, 'printing-time'), 76 'cw<di<backuptime' : (self.__found_tag_with_tokens_func, 'backup-time'), 77 78 'cw<di<num-of-wor' : (self.__single_field_func, 'number-of-words'), 79 'cw<di<num-of-chr' : (self.__single_field_func, 'number-of-characters'), 80 'cw<di<numofchrws' : (self.__single_field_func, 'number-of-characters-without-space'), 81 'cw<di<num-of-pag' : (self.__single_field_func, 'number-of-pages'), 82 'cw<di<version___' : (self.__single_field_func, 'version'), 83 'cw<di<edit-time_' : (self.__single_field_func, 'editing-time'), 84 'cw<di<intern-ver' : (self.__single_field_func, 'internal-version-number'), 85 'cw<di<internalID' : (self.__single_field_func, 'internal-id-number'), 86 } 87 self.__token_dict = { 88 'year______' : 'year', 89 'month_____' : 'month', 90 'day_______' : 'day', 91 'minute____' : 'minute', 92 'second____' : 'second', 93 'revis-time' : 'revision-time', 94 'create-tim' : 'creation-time', 95 'edit-time_' : 'editing-time', 96 'print-time' : 'printing-time', 97 'backuptime' : 'backup-time', 98 'num-of-wor' : 'number-of-words', 99 'num-of-chr' : 'number-of-characters', 100 'numofchrws' : 'number-of-characters-without-space', 101 'num-of-pag' : 'number-of-pages', 102 'version___' : 'version', 103 'intern-ver' : 'internal-version-number', 104 'internalID' : 'internal-id-number', 105 } 106 107 def __before_info_table_func(self, line): 108 """ 109 Required: 110 line -- the line to parse 111 Returns: 112 nothing 113 Logic: 114 Check for the beginning of the information table. When found, set 115 the state to the information table. Always write the line. 116 """ 117 if self.__token_info == 'mi<mk<doc-in-beg': 118 self.__state = 'in_info_table' 119 self.__write_obj.write(line) 120 121 def __in_info_table_func(self, line): 122 """ 123 Requires: 124 line -- line to parse 125 Returns: 126 nothing. 127 Logic: 128 Check for the end of information. If not found, check if the 129 token has a special value in the info table dictionary. If it 130 does, execute that function. 131 Otherwise, output the line to the file. 132 """ 133 if self.__token_info == 'mi<mk<doc-in-end': 134 self.__state = 'after_info_table' 135 else: 136 action, tag = self.__info_table_dict.get(self.__token_info, (None, None)) 137 if action: 138 action(line, tag) 139 else: 140 self.__write_obj.write(line) 141 142 def __found_tag_with_text_func(self, line, tag): 143 """ 144 Requires: 145 line -- line to parse 146 tag --what kind of line 147 Returns: 148 nothing 149 Logic: 150 This function marks the beginning of information fields that have 151 text that must be collected. Set the type of information field 152 with the tag option. Set the state to collecting text 153 """ 154 self.__tag = tag 155 self.__state = 'collect_text' 156 157 def __collect_text_func(self, line): 158 """ 159 Requires: 160 line -- line to parse 161 Returns: 162 nothing 163 Logic: 164 If the end of the information field is found, write the text 165 string to the file. 166 Otherwise, if the line contains text, add it to the text string. 167 """ 168 if self.__token_info == 'mi<mk<docinf-end': 169 self.__state = 'in_info_table' 170 # Don't print empty tags 171 if len(self.rmspace.sub('',self.__text_string)): 172 self.__write_obj.write( 173 'mi<tg<open______<%s\n' 174 'tx<nu<__________<%s\n' 175 'mi<tg<close_____<%s\n' % (self.__tag, self.__text_string, self.__tag) 176 ) 177 self.__text_string = '' 178 elif line[0:2] == 'tx': 179 self.__text_string += line[17:-1] 180 181 def __found_tag_with_tokens_func(self, line, tag): 182 """ 183 Requires: 184 line -- line to parse 185 tag -- type of field 186 Returns: 187 nothing 188 Logic: 189 Some fields have a series of tokens (cw<di<year______<nu<2003) 190 that must be parsed as attributes for the element. 191 Set the state to collect tokesn, and set the text string to 192 start an empty element with attributes. 193 """ 194 self.__state = 'collect_tokens' 195 self.__text_string = 'mi<tg<empty-att_<%s' % tag 196 # mi<tg<empty-att_<page-definition<margin>33\n 197 198 def __collect_tokens_func(self, line): 199 """ 200 Requires: 201 line -- line to parse 202 Returns: 203 nothing 204 Logic: 205 This function collects all the token information and adds it to 206 the text string until the end of the field is found. 207 First check of the end of the information field. If found, write 208 the text string to the file. 209 If not found, get the relevant information from the text string. 210 This information cannot be directly added to the text string, 211 because it exists in abbreviated form. (num-of-wor) 212 I want to check this information in a dictionary to convert it 213 to a longer, readable form. If the key does not exist in the 214 dictionary, print out an error message. Otherwise add the value 215 to the text string. 216 (num-of-wor => number-of-words) 217 """ 218 # cw<di<year______<nu<2003 219 if self.__token_info == 'mi<mk<docinf-end': 220 self.__state = 'in_info_table' 221 self.__write_obj.write( 222 '%s\n' % self.__text_string 223 ) 224 self.__text_string = '' 225 else: 226 att = line[6:16] 227 value = line[20:-1] 228 att_changed = self.__token_dict.get(att) 229 if att_changed is None: 230 if self.__run_level > 3: 231 msg = 'No dictionary match for %s\n' % att 232 raise self.__bug_handler(msg) 233 else: 234 self.__text_string += '<%s>%s' % (att_changed, value) 235 236 def __single_field_func(self, line, tag): 237 value = line[20:-1] 238 self.__write_obj.write( 239 'mi<tg<empty-att_<%s<%s>%s\n' % (tag, tag, value) 240 ) 241 242 def __after_info_table_func(self, line): 243 """ 244 Requires: 245 line --line to write to file 246 Returns: 247 nothing 248 Logic: 249 After the end of the information table, simple write the line to 250 the file. 251 """ 252 self.__write_obj.write(line) 253 254 def fix_info(self): 255 """ 256 Requires: 257 nothing 258 Returns: 259 nothing (changes the original file) 260 Logic: 261 Read one line in at a time. Determine what action to take based on 262 the state. If the state is before the information table, look for the 263 beginning of the style table. 264 If the state is in the information table, use other methods to 265 parse the information 266 style table, look for lines with style info, and substitute the 267 number with the name of the style. If the state if after the 268 information table, simply write the line to the output file. 269 """ 270 self.__initiate_values() 271 with open_for_read(self.__file) as read_obj: 272 with open_for_write(self.__write_to) as self.__write_obj: 273 for line in read_obj: 274 self.__token_info = line[:16] 275 action = self.__state_dict.get(self.__state) 276 if action is None: 277 sys.stderr.write('No matching state in module styles.py\n') 278 sys.stderr.write(self.__state + '\n') 279 action(line) 280 copy_obj = copy.Copy(bug_handler=self.__bug_handler) 281 if self.__copy: 282 copy_obj.copy_file(self.__write_to, "info.data") 283 copy_obj.rename(self.__write_to, self.__file) 284 os.remove(self.__write_to) 285