1#!/neo/opt/bin/python 2 3import sys, string, os, getopt, pwd, signal, time, re 4import fcntl 5 6import tstart 7 8import db_trans 9from log import * 10import neo_cgi, neo_util 11import odb 12 13eTransError = "eTransError" 14 15DONE = 0 16DEBUG = 0 17 18TIER2_DIV = 11 19TIER1_DIV = 11 * TIER2_DIV 20 21if not DEBUG: LOGGING_STATUS[DEV_UPDATE] = 0 22 23def handleSignal(*arg): 24 global DONE 25 DONE = 1 26 27def usage(): 28 print "usage info!!" 29 30def exceptionString(): 31 import StringIO, traceback 32 33 ## get the traceback message 34 sfp = StringIO.StringIO() 35 traceback.print_exc(file=sfp) 36 exception = sfp.getvalue() 37 sfp.close() 38 39 return exception 40 41class TransLoc: 42 def __init__ (self, string_id, filename, location): 43 self.string_id = string_id 44 self.filename = filename 45 self.location = location 46 47class Translator: 48 _HTML_TAG_RE = None 49 _HTML_TAG_REGEX = '<[^!][^>]*?>' 50 _HTML_CMT_RE = None 51 _HTML_CMT_REGEX = '<!--.*?-->' 52 _CS_TAG_RE = None 53 _CS_TAG_REGEX = '<\\?.+?\\?>' 54 55 def __init__ (self): 56 self.tdb = db_trans.trans_connect() 57 58 # configuration data ...... 59 # - we should stop hardcoding this... - jeske 60 61 self.root = "testroot" 62 self.languages = ['es', 'en'] 63 64 self.ignore_paths = ['tmpl/m'] # common place for mockups 65 self.ignore_files = ['blah_ignore.cs'] # ignore clearsilver file 66 67 # ignore clearsilver javascript files 68 self.ignore_patterns = ['tmpl/[^ ]*_js.cs'] 69 70 # ............................ 71 72 73 if self.root is None: 74 raise "Unable to determine installation root" 75 76 77 if Translator._HTML_TAG_RE is None: 78 Translator._HTML_TAG_RE = re.compile(Translator._HTML_TAG_REGEX, re.MULTILINE | re.DOTALL) 79 if Translator._HTML_CMT_RE is None: 80 Translator._HTML_CMT_RE = re.compile(Translator._HTML_CMT_REGEX, re.MULTILINE | re.DOTALL) 81 if Translator._CS_TAG_RE is None: 82 Translator._CS_TAG_RE = re.compile(Translator._CS_TAG_REGEX, re.MULTILINE | re.DOTALL) 83 84 self._html_state = 0 85 86 87 def parseHTMLTag(self, data): 88 # this is only called if we see a full tag in one parse... 89 i = 0 90 if len(data) == 0: return [] 91 if data[0] in '/?': return [] 92 while i < len(data) and data[i] not in ' \n\r\t>': i = i + 1 93 if i == len(data): return [] 94 tag = data[:i].lower() 95 #print "Searching tag: %s" % data 96 #print "Found tag: %s" % tag 97 results = [] 98 attrfind = re.compile( 99 r'\s*([a-zA-Z_][-.a-zA-Z_0-9]*)(\s*=\s*' 100 r'(\'[^\']*\'|"[^"]*"|[^ \t\n<>]*))?') 101 k = i 102 attrs = {} 103 attrs_beg = {} 104 while k < len(data): 105 match = attrfind.match(data, k) 106 if not match: break 107 attrname, rest, attrvalue = match.group(1, 2, 3) 108 if not rest: 109 attrvalue = attrname 110 elif attrvalue[:1] == '\'' == attrvalue[-1:] or \ 111 attrvalue[:1] == '"' == attrvalue[-1:]: 112 attrvalue = attrvalue[1:-1] 113 attrname = attrname.lower() 114 if attrs.has_key(attrname): 115 log("Can't handle duplicate attrs: %s" % attrname) 116 attrs[attrname] = attrvalue 117 attrs_beg[attrname] = match.start(3) 118 k = match.end(0) 119 120 find_l = [] 121 if tag == "input": 122 if attrs.get('type', "").lower() in ["submit", "button"]: 123 find_l.append((attrs.get('value', ''), attrs_beg.get('value', 0))) 124 125 for s,k in find_l: 126 if s: 127 x = data[k:].find(s) 128 if x != -1: results.append((s, x+k, 1)) 129 130 return results 131 132 def parseHTML(self, data, reset=1): 133 if reset: self._html_state = 0 134 if DEBUG: print "- %d ---------\n%s\n- E ---------" % (self._html_state, data) 135 136 results = [] 137 i = 0 138 n = len(data) 139 # if we had state from the last parse... find it 140 if self._html_state: 141 if self._html_state == 2: 142 x = string.find(data[i:], '-->') 143 l = 3 144 else: 145 x = string.find(data[i:], '>') 146 l = 1 147 if x == -1: return results 148 i = i + x + l 149 self._html_state = 0 150 while i < n: 151 if DEBUG: print "MATCHING>%s<MATCHING" % data[i:] 152 cmt_b = string.find(data[i:], '<!--') 153 cmt_e = string.find(data[i:], '-->') 154 tag_b = string.find(data[i:], '<') 155 tag_e = string.find(data[i:], '>') 156 if DEBUG: print "B> %d %d %d %d <B" % (cmt_b, cmt_e, tag_b, tag_e) 157 if cmt_b != -1 and cmt_b <= tag_b: 158 x = i 159 y = i+cmt_b-1 160 while x < y and data[x] in string.whitespace: x+=1 161 while y > x and data[y] in string.whitespace: y-=1 162 results.append((data[x:y+1], x, 1)) 163 if cmt_e == -1: # partial comment: 164 self._html_state = 2 165 break 166 i = i + cmt_e + 3 167 elif tag_b != -1: 168 x = i 169 y = i+tag_b-1 170 while x < y and data[x] in string.whitespace: x+=1 171 while y > x and data[y] in string.whitespace: y-=1 172 results.append((data[x:y+1], x, 1)) 173 if tag_e == -1: # partial tag 174 self._html_state = 1 175 break 176 h_results = self.parseHTMLTag(data[i+tag_b+1:i+tag_e]) 177 h_results = map(lambda x: (x[0], x[1] + i+tag_b+1, x[2]), h_results) 178 results = results + h_results 179 i = i + tag_e + 1 180 else: 181 x = i 182 y = n-1 183 while x < y and data[x] in string.whitespace: x+=1 184 while y > x and data[y] in string.whitespace: y-=1 185 results.append((data[x:y+1], x, 1)) 186 break 187 return results 188 189 def parseCS(self, data): 190 results = [] 191 i = 0 192 n = len(data) 193 while i < n: 194 m = Translator._CS_TAG_RE.search(data, i) 195 if not m: 196 # search for a partial... 197 x = string.find(data[i:], '<?') 198 if x == -1: 199 results.append((data[i:], i)) 200 else: 201 results.append((data[i:x], i)) 202 break 203 (b, e) = m.span() 204 if i != b: results.append((data[i:b], i)) 205 i = e 206 t_results = [] 207 self._html_in = 0 208 for (s, ofs) in results: 209 r = self.parseHTML(s, reset=0) 210 r = map(lambda x: (x[0], x[1] + ofs, x[2]), r) 211 t_results = t_results + r 212 return t_results 213 214 def descendHDF(self, obj, prefix): 215 results = [] 216 while obj is not None: 217 if obj.value(): 218 attrs = obj.attrs() 219 attrs = map(lambda x: x[0], attrs) 220 if "Lang" in attrs: 221 if prefix: 222 results.append((obj.value(), "%s.%s" % (prefix, obj.name()), 0)) 223 else: 224 results.append((obj.value(), "%s" % (obj.name()), 0)) 225 if obj.child(): 226 if prefix: 227 results = results + self.descendHDF(obj.child(), "%s.%s" % (prefix, obj.name())) 228 else: 229 results = results + self.descendHDF(obj.child(), (obj.name())) 230 obj = obj.next() 231 return results 232 233 def parseHDF(self, data): 234 # Ok, we handle HDF files specially.. the theory is, we only 235 # extract entire HDF elements which have the attribute Lang 236 hdf = neo_util.HDF() 237 hdf.readString(data, 1) 238 return self.descendHDF(hdf, "") 239 240 def handleFile(self, file): 241 if file in self.ignore_files: return [] 242 for a_re in self.ignore_patterns: 243 if re.match(a_re,file): 244 return [] 245 fpath = self.root + '/' + file 246 x = string.rfind(file, '.') 247 if x == -1: return [] 248 data = open(fpath, 'r').read() 249 ext = file[x:] 250 strings = [] 251 if ext in ['.cst', '.cs']: 252 strings = self.parseCS(data) 253 elif ext in ['.html', '.htm']: 254 strings = self.parseHTML(data) 255 elif ext in ['.hdf']: 256 strings = self.parseHDF(data) 257 if len(strings): 258 print "Found %d strings in %s" % (len(strings), file) 259 return strings 260 return [] 261 262 def walkDirectory(self, path): 263 if path in self.ignore_paths: return [] 264 fpath = self.root + '/' + path 265 files = os.listdir(fpath) 266 dirs = [] 267 results = [] 268 for file in files: 269 if file[0] == '.': continue 270 fname = fpath + '/' + file 271 if os.path.isdir(fname): 272 dirs.append(file) 273 else: 274 strings = self.handleFile(path + '/' + file) 275 if len(strings): 276 results.append((path + '/' + file, strings)) 277 for dir in dirs: 278 if dir not in ["release"]: 279 results = results + self.walkDirectory(path + '/' + dir) 280 return results 281 282 def cleanHtmlString(self, s): 283 s = re.sub("\s+", " ", s) 284 return string.strip(s) 285 286 def containsWords(self, s, ishtml): 287 if ishtml: 288 s = string.replace(s, ' ', ' ') 289 s = string.replace(s, '"', '"') 290 s = string.replace(s, '©', '') 291 s = string.replace(s, '<', '<') 292 s = string.replace(s, '>', '>') 293 s = string.replace(s, '&', '&') 294 for x in range (len (s)): 295 n = ord(s[x]) 296 if (n>47 and n<58) or (n>64 and n<91) or (n>96 and n<123): return 1 297 return 0 298 299 def findString(self, s): 300 rows = self.tdb.strings.fetchRows( ('string', s) ) 301 if len(rows) == 0: 302 row = self.tdb.strings.newRow() 303 row.string = s 304 row.save() 305 return row.string_id 306 elif len(rows) > 1: 307 raise eTransError, "String %s exists multiple times!" % s 308 else: 309 return rows[0].string_id 310 311 def loadStrings(self, one_file=None, verbose=0): 312 if one_file is not None: 313 strings = self.handleFile(one_file) 314 results = [(one_file, strings)] 315 else: 316 results = self.walkDirectory('tmpl') 317 uniq = {} 318 cnt = 0 319 seen_hdf = {} 320 for fname, strings in results: 321 for (s, ofs, ishtml) in strings: 322 if s and string.strip(s): 323 l = len(s) 324 if ishtml: 325 s = self.cleanHtmlString(s) 326 if self.containsWords(s, ishtml): 327 if type(ofs) == type(""): # HDF 328 if seen_hdf.has_key(ofs): 329 if seen_hdf[ofs][0] != s: 330 log("Duplicate HDF Name %s:\n\t file %s = %s\n\t file %s = %s" % (ofs, seen_hdf[ofs][1], seen_hdf[ofs][0], fname, s)) 331 else: 332 seen_hdf[ofs] = (s, fname) 333 try: 334 uniq[s].append((fname, ofs, l)) 335 except KeyError: 336 uniq[s] = [(fname, ofs, l)] 337 cnt = cnt + 1 338 print "%d strings, %d unique" % (cnt, len(uniq.keys())) 339 fp = open("map", 'w') 340 for (s, locs) in uniq.items(): 341 locs = map(lambda x: "%s:%s:%d" % x, locs) 342 fp.write('#: %s\n' % (string.join(locs, ','))) 343 fp.write('msgid=%s\n\n' % repr(s)) 344 345 log("Loading strings/locations into database") 346 locations = [] 347 for (s, locs) in uniq.items(): 348 s_id = self.findString(s) 349 for (fname, ofs, l) in locs: 350 if type(ofs) == type(""): # ie, its HDF 351 location = "hdf:%s" % ofs 352 else: 353 location = "ofs:%d:%d" % (ofs, l) 354 loc_r = TransLoc(s_id, fname, location) 355 locations.append(loc_r) 356 return locations 357 358 def stringsHDF(self, prefix, locations, lang='en', exist=0, tiered=0): 359 hdf = neo_util.HDF() 360 if exist and lang == 'en': return hdf 361 done = {} 362 locations.sort() 363 maps = self.tdb.maps.fetchRows( ('lang', lang) ) 364 maps_d = {} 365 for map in maps: 366 maps_d[int(map.string_id)] = map 367 strings = self.tdb.strings.fetchRows() 368 strings_d = {} 369 for string in strings: 370 strings_d[int(string.string_id)] = string 371 count = 0 372 for loc in locations: 373 s_id = int(loc.string_id) 374 if done.has_key(s_id): continue 375 try: 376 s_row = maps_d[s_id] 377 if exist: continue 378 except KeyError: 379 try: 380 s_row = strings_d[s_id] 381 except KeyError: 382 log("Missing string_id %d, skipping" % s_id) 383 continue 384 count = count + 1 385 if tiered: 386 hdf.setValue("%s.%d.%d.%s" % (prefix, int(s_id) / TIER1_DIV, int(s_id) / TIER2_DIV, s_id), s_row.string) 387 else: 388 hdf.setValue("%s.%s" % (prefix, s_id), s_row.string) 389 done[s_id] = 1 390 if exist == 1: log("Missing %d strings for lang %s" % (count, lang)) 391 return hdf 392 393 def dumpStrings(self, locations, lang=None): 394 log("Dumping strings to HDF") 395 if lang is None: 396 langs = ['en'] 397 sql = "select lang from nt_trans_maps group by lang" 398 cursor = self.tdb.defaultCursor() 399 cursor.execute(sql) 400 rows = cursor.fetchall() 401 for row in rows: 402 langs.append(row[0]) 403 else: 404 langs = [lang] 405 406 for a_lang in langs: 407 hdf = self.stringsHDF('S', locations, a_lang) 408 hdf.writeFile("strings_%s.hdf" % a_lang) 409 410 for a_lang in langs: 411 hdf = self.stringsHDF('S', locations, a_lang, exist=1) 412 if hdf.child(): 413 hdf.writeFile("strings_missing_%s.hdf" % a_lang) 414 415 def fetchString(self, s_id, lang): 416 if lang == "hdf": 417 return "<?cs var:Lang.Extracted.%d.%d.%s ?>" % (int(s_id) / TIER1_DIV, int(s_id) / TIER2_DIV, s_id) 418 rows = self.tdb.maps.fetchRows( [('string_id', s_id), ('lang', lang)] ) 419 if len(rows) == 0: 420 try: 421 row = self.tdb.strings.fetchRow( ('string_id', s_id) ) 422 except odb.eNoMatchingRows: 423 log("Unable to find string id %s" % s_id) 424 raise eNoString 425 if lang != 'en': 426 log("Untranslated string for id %s" % s_id) 427 return row.string 428 else: 429 return rows[0].string 430 431 def dumpFiles(self, locations, lang): 432 log("Dumping files for %s" % lang) 433 files = {} 434 for row in locations: 435 try: 436 files[row.filename].append(row) 437 except KeyError: 438 files[row.filename] = [row] 439 440 hdf_map = [] 441 442 os.system("rm -rf %s/gen/tmpl" % (self.root)) 443 for file in files.keys(): 444 fname = "%s/gen/%s" % (self.root, file) 445 try: 446 os.makedirs(os.path.dirname(fname)) 447 except OSError, reason: 448 if reason[0] != 17: 449 raise 450 do_hdf = 0 451 x = string.rfind(file, '.') 452 if x != -1 and file[x:] == '.hdf': 453 do_hdf = 1 454 ofs = [] 455 for loc in files[file]: 456 parts = string.split(loc.location, ':') 457 if len(parts) == 3 and parts[0] == 'ofs' and do_hdf == 0: 458 ofs.append((int(parts[1]), int(parts[2]), loc.string_id)) 459 elif len(parts) == 2 and parts[0] == 'hdf' and do_hdf == 1: 460 hdf_map.append((parts[1], loc.string_id)) 461 else: 462 log("Invalid location for loc_id %s" % loc.loc_id) 463 continue 464 if not do_hdf: 465 ofs.sort() 466 data = open(self.root + '/' + file).read() 467 # ok, now we split up the original data into sections 468 x = 0 469 n = len(data) 470 out = [] 471 #sys.stderr.write("%s\n" % repr(ofs)) 472 while len(ofs): 473 if ofs[0][0] > x: 474 out.append(data[x:ofs[0][0]]) 475 x = ofs[0][0] 476 elif ofs[0][0] == x: 477 out.append(self.fetchString(ofs[0][2], lang)) 478 x = ofs[0][0] + ofs[0][1] 479 ofs = ofs[1:] 480 else: 481 log("How did we get here? %s x=%d ofs=%d sid=%d" % (file, x, ofs[0][0], ofs[0][2])) 482 log("Data[x:20]: %s" % data[x:20]) 483 log("Data[ofs:20]: %s" % data[ofs[0][0]:20]) 484 break 485 if n > x: 486 out.append(data[x:]) 487 odata = string.join(out, '') 488 open(fname, 'w').write(odata) 489 490 if lang == "hdf": 491 langs = self.languages 492 else: 493 langs = [lang] 494 495 for d_lang in langs: 496 # dumping the extracted strings 497 hdf = self.stringsHDF('Lang.Extracted', locations, d_lang, tiered=1) 498 fname = "%s/gen/tmpl/lang_%s.hdf" % (self.root, d_lang) 499 hdf.writeFile(fname) 500 data = open(fname).read() 501 fp = open(fname, 'w') 502 fp.write('## AUTOMATICALLY GENERATED -- DO NOT EDIT\n\n') 503 fp.write(data) 504 fp.write('\n#include "lang_map.hdf"\n') 505 506 # dumping the hdf strings file 507 if d_lang == "en": 508 map_file = "%s/gen/tmpl/lang_map.hdf" % (self.root) 509 else: 510 map_file = "%s/gen/tmpl/%s/lang_map.hdf" % (self.root, d_lang) 511 try: 512 os.makedirs(os.path.dirname(map_file)) 513 except OSError, reason: 514 if reason[0] != 17: raise 515 map_hdf = neo_util.HDF() 516 for (name, s_id) in hdf_map: 517 str = hdf.getValue('Lang.Extracted.%d.%d.%s' % (int(s_id) / TIER1_DIV, int(s_id) / TIER2_DIV, s_id), '') 518 map_hdf.setValue(name, str) 519 map_hdf.writeFile(map_file) 520 521 def loadMap(self, file, prefix, lang): 522 log("Loading map for language %s" % lang) 523 hdf = neo_util.HDF() 524 hdf.readFile(file) 525 obj = hdf.getChild(prefix) 526 updates = 0 527 new_r = 0 528 while obj is not None: 529 s_id = obj.name() 530 str = obj.value() 531 532 try: 533 map_r = self.tdb.maps.fetchRow( [('string_id', s_id), ('lang', lang)]) 534 except odb.eNoMatchingRows: 535 map_r = self.tdb.maps.newRow() 536 map_r.string_id = s_id 537 map_r.lang = lang 538 new_r = new_r + 1 539 540 if map_r.string != str: 541 updates = updates + 1 542 map_r.string = str 543 map_r.save() 544 545 obj = obj.next() 546 log("New maps: %d Updates: %d" % (new_r, updates - new_r)) 547 548 549def main(argv): 550 alist, args = getopt.getopt(argv[1:], "f:v:", ["help", "load=", "lang="]) 551 552 one_file = None 553 verbose = 0 554 load_file = None 555 lang = 'en' 556 for (field, val) in alist: 557 if field == "--help": 558 usage(argv[0]) 559 return -1 560 if field == "-f": 561 one_file = val 562 if field == "-v": 563 verbose = int(val) 564 if field == "--load": 565 load_file = val 566 if field == "--lang": 567 lang = val 568 569 570 global DONE 571 572 #signal.signal(signal.SIGTERM, handleSignal) 573 #signal.signal(signal.SIGINT, handleSignal) 574 575 log("trans: start") 576 577 start_time = time.time() 578 579 try: 580 t = Translator() 581 if load_file: 582 t.loadMap(load_file, 'S', lang) 583 else: 584 locations = t.loadStrings(one_file, verbose=verbose) 585 t.dumpStrings(locations) 586 t.dumpFiles(locations, 'hdf') 587 except KeyboardInterrupt: 588 pass 589 except: 590 import handle_error 591 handle_error.handleException("Translation Error") 592 593if __name__ == "__main__": 594 main(sys.argv) 595