1#!/neo/opt/bin/python
2
3import sys, string, os, getopt, pwd, signal, time, re
4import fcntl
5
6import tstart
7
8import db_trans
9from log import *
10import neo_cgi, neo_util
11import odb
12
13eTransError = "eTransError"
14
15DONE = 0
16DEBUG = 0
17
18TIER2_DIV = 11
19TIER1_DIV = 11 * TIER2_DIV
20
21if not DEBUG: LOGGING_STATUS[DEV_UPDATE] = 0
22
23def handleSignal(*arg):
24  global DONE
25  DONE = 1
26
27def usage():
28  print "usage info!!"
29
30def exceptionString():
31  import StringIO, traceback
32
33  ## get the traceback message
34  sfp = StringIO.StringIO()
35  traceback.print_exc(file=sfp)
36  exception = sfp.getvalue()
37  sfp.close()
38
39  return exception
40
41class TransLoc:
42    def __init__ (self, string_id, filename, location):
43        self.string_id = string_id
44        self.filename = filename
45        self.location = location
46
47class Translator:
48    _HTML_TAG_RE = None
49    _HTML_TAG_REGEX = '<[^!][^>]*?>'
50    _HTML_CMT_RE = None
51    _HTML_CMT_REGEX = '<!--.*?-->'
52    _CS_TAG_RE = None
53    _CS_TAG_REGEX = '<\\?.+?\\?>'
54
55    def __init__ (self):
56        self.tdb = db_trans.trans_connect()
57
58        # configuration data ......
59        #  - we should stop hardcoding this... - jeske
60
61        self.root = "testroot"
62        self.languages = ['es', 'en']
63
64        self.ignore_paths = ['tmpl/m']  # common place for mockups
65        self.ignore_files = ['blah_ignore.cs'] # ignore clearsilver file
66
67        # ignore clearsilver javascript files
68        self.ignore_patterns = ['tmpl/[^ ]*_js.cs']
69
70        # ............................
71
72
73        if self.root is None:
74            raise "Unable to determine installation root"
75
76
77        if Translator._HTML_TAG_RE is None:
78            Translator._HTML_TAG_RE = re.compile(Translator._HTML_TAG_REGEX, re.MULTILINE | re.DOTALL)
79        if Translator._HTML_CMT_RE is None:
80            Translator._HTML_CMT_RE = re.compile(Translator._HTML_CMT_REGEX, re.MULTILINE | re.DOTALL)
81        if Translator._CS_TAG_RE is None:
82            Translator._CS_TAG_RE = re.compile(Translator._CS_TAG_REGEX, re.MULTILINE | re.DOTALL)
83
84        self._html_state = 0
85
86
87    def parseHTMLTag(self, data):
88        # this is only called if we see a full tag in one parse...
89        i = 0
90        if len(data) == 0: return []
91        if data[0] in '/?': return []
92        while i < len(data) and data[i] not in ' \n\r\t>': i = i + 1
93        if i == len(data): return []
94        tag = data[:i].lower()
95        #print "Searching tag: %s" % data
96        #print "Found tag: %s" % tag
97        results = []
98        attrfind = re.compile(
99            r'\s*([a-zA-Z_][-.a-zA-Z_0-9]*)(\s*=\s*'
100            r'(\'[^\']*\'|"[^"]*"|[^ \t\n<>]*))?')
101        k = i
102        attrs = {}
103        attrs_beg = {}
104        while k < len(data):
105            match = attrfind.match(data, k)
106            if not match: break
107            attrname, rest, attrvalue = match.group(1, 2, 3)
108            if not rest:
109               attrvalue = attrname
110            elif attrvalue[:1] == '\'' == attrvalue[-1:] or \
111                 attrvalue[:1] == '"' == attrvalue[-1:]:
112               attrvalue = attrvalue[1:-1]
113            attrname = attrname.lower()
114            if attrs.has_key(attrname):
115                log("Can't handle duplicate attrs: %s" % attrname)
116            attrs[attrname] = attrvalue
117            attrs_beg[attrname] = match.start(3)
118            k = match.end(0)
119
120        find_l = []
121        if tag == "input":
122            if attrs.get('type', "").lower() in ["submit", "button"]:
123                find_l.append((attrs.get('value', ''), attrs_beg.get('value', 0)))
124
125        for s,k in find_l:
126            if s:
127                x = data[k:].find(s)
128                if x != -1: results.append((s, x+k, 1))
129
130        return results
131
132    def parseHTML(self, data, reset=1):
133        if reset: self._html_state = 0
134        if DEBUG: print "- %d ---------\n%s\n- E ---------" % (self._html_state, data)
135
136        results = []
137        i = 0
138        n = len(data)
139        # if we had state from the last parse... find it
140        if self._html_state:
141            if self._html_state == 2:
142                x = string.find(data[i:], '-->')
143                l = 3
144            else:
145                x = string.find(data[i:], '>')
146                l = 1
147            if x == -1: return results
148            i = i + x + l
149            self._html_state = 0
150        while i < n:
151            if DEBUG: print "MATCHING>%s<MATCHING" % data[i:]
152            cmt_b = string.find(data[i:], '<!--')
153            cmt_e = string.find(data[i:], '-->')
154            tag_b = string.find(data[i:], '<')
155            tag_e = string.find(data[i:], '>')
156            if DEBUG: print "B> %d %d %d %d <B" % (cmt_b, cmt_e, tag_b, tag_e)
157            if cmt_b != -1 and cmt_b <= tag_b:
158                x = i
159                y = i+cmt_b-1
160                while x < y and data[x] in string.whitespace: x+=1
161                while y > x and data[y] in string.whitespace: y-=1
162                results.append((data[x:y+1], x, 1))
163                if cmt_e == -1: # partial comment:
164                    self._html_state = 2
165                    break
166                i = i + cmt_e + 3
167            elif tag_b != -1:
168                x = i
169                y = i+tag_b-1
170                while x < y and data[x] in string.whitespace: x+=1
171                while y > x and data[y] in string.whitespace: y-=1
172                results.append((data[x:y+1], x, 1))
173                if tag_e == -1: # partial tag
174                    self._html_state = 1
175                    break
176                h_results = self.parseHTMLTag(data[i+tag_b+1:i+tag_e])
177                h_results = map(lambda x: (x[0], x[1] + i+tag_b+1, x[2]), h_results)
178                results = results + h_results
179                i = i + tag_e + 1
180            else:
181                x = i
182                y = n-1
183                while x < y and data[x] in string.whitespace: x+=1
184                while y > x and data[y] in string.whitespace: y-=1
185                results.append((data[x:y+1], x, 1))
186                break
187        return results
188
189    def parseCS(self, data):
190        results = []
191        i = 0
192        n = len(data)
193        while i < n:
194            m = Translator._CS_TAG_RE.search(data, i)
195            if not m:
196                # search for a partial...
197                x = string.find(data[i:], '<?')
198                if x == -1:
199                    results.append((data[i:], i))
200                else:
201                    results.append((data[i:x], i))
202                break
203            (b, e) = m.span()
204            if i != b: results.append((data[i:b], i))
205            i = e
206        t_results = []
207        self._html_in = 0
208        for (s, ofs) in results:
209            r = self.parseHTML(s, reset=0)
210            r = map(lambda x: (x[0], x[1] + ofs, x[2]), r)
211            t_results = t_results + r
212        return t_results
213
214    def descendHDF(self, obj, prefix):
215        results = []
216        while obj is not None:
217            if obj.value():
218                attrs = obj.attrs()
219                attrs = map(lambda x: x[0], attrs)
220                if "Lang" in attrs:
221                    if prefix:
222                        results.append((obj.value(), "%s.%s" % (prefix, obj.name()), 0))
223                    else:
224                        results.append((obj.value(), "%s" % (obj.name()), 0))
225            if obj.child():
226                if prefix:
227                    results = results + self.descendHDF(obj.child(), "%s.%s" % (prefix, obj.name()))
228                else:
229                    results = results + self.descendHDF(obj.child(), (obj.name()))
230            obj = obj.next()
231        return results
232
233    def parseHDF(self, data):
234        # Ok, we handle HDF files specially.. the theory is, we only
235        # extract entire HDF elements which have the attribute Lang
236        hdf = neo_util.HDF()
237        hdf.readString(data, 1)
238        return self.descendHDF(hdf, "")
239
240    def handleFile(self, file):
241        if file in self.ignore_files: return []
242        for a_re in self.ignore_patterns:
243            if re.match(a_re,file):
244                return []
245        fpath = self.root + '/' + file
246        x = string.rfind(file, '.')
247        if x == -1: return []
248        data = open(fpath, 'r').read()
249        ext = file[x:]
250        strings = []
251        if ext in ['.cst', '.cs']:
252            strings = self.parseCS(data)
253        elif ext in ['.html', '.htm']:
254            strings = self.parseHTML(data)
255        elif ext in ['.hdf']:
256            strings = self.parseHDF(data)
257        if len(strings):
258            print "Found %d strings in %s" % (len(strings), file)
259            return strings
260        return []
261
262    def walkDirectory(self, path):
263        if path in self.ignore_paths: return []
264        fpath = self.root + '/' + path
265        files = os.listdir(fpath)
266        dirs = []
267        results = []
268        for file in files:
269            if file[0] == '.': continue
270            fname = fpath + '/' + file
271            if os.path.isdir(fname):
272                dirs.append(file)
273            else:
274                strings = self.handleFile(path + '/' + file)
275                if len(strings):
276                    results.append((path + '/' + file, strings))
277        for dir in dirs:
278            if dir not in ["release"]:
279                results = results + self.walkDirectory(path + '/' + dir)
280        return results
281
282    def cleanHtmlString(self, s):
283        s = re.sub("\s+", " ", s)
284        return string.strip(s)
285
286    def containsWords(self, s, ishtml):
287        if ishtml:
288            s = string.replace(s, '&nbsp;', ' ')
289            s = string.replace(s, '&quot;', '"')
290            s = string.replace(s, '&copy;', '')
291            s = string.replace(s, '&lt;', '<')
292            s = string.replace(s, '&gt;', '>')
293            s = string.replace(s, '&amp;', '&')
294        for x in range (len (s)):
295          n = ord(s[x])
296          if (n>47 and n<58) or (n>64 and n<91) or (n>96 and n<123): return 1
297        return 0
298
299    def findString(self, s):
300        rows = self.tdb.strings.fetchRows( ('string', s) )
301        if len(rows) == 0:
302            row = self.tdb.strings.newRow()
303            row.string = s
304            row.save()
305            return row.string_id
306        elif len(rows) > 1:
307            raise eTransError, "String %s exists multiple times!" % s
308        else:
309            return rows[0].string_id
310
311    def loadStrings(self, one_file=None, verbose=0):
312        if one_file is not None:
313            strings = self.handleFile(one_file)
314            results = [(one_file, strings)]
315        else:
316            results = self.walkDirectory('tmpl')
317        uniq = {}
318        cnt = 0
319        seen_hdf = {}
320        for fname, strings in results:
321            for (s, ofs, ishtml) in strings:
322                if s and string.strip(s):
323                    l = len(s)
324                    if ishtml:
325                        s = self.cleanHtmlString(s)
326                    if self.containsWords(s, ishtml):
327                        if type(ofs) == type(""): # HDF
328                            if seen_hdf.has_key(ofs):
329                                if seen_hdf[ofs][0] != s:
330                                    log("Duplicate HDF Name %s:\n\t file %s = %s\n\t file %s = %s" % (ofs, seen_hdf[ofs][1], seen_hdf[ofs][0], fname, s))
331                            else:
332                                seen_hdf[ofs] = (s, fname)
333                        try:
334                            uniq[s].append((fname, ofs, l))
335                        except KeyError:
336                            uniq[s] = [(fname, ofs, l)]
337                        cnt = cnt + 1
338        print "%d strings, %d unique" % (cnt, len(uniq.keys()))
339        fp = open("map", 'w')
340        for (s, locs) in uniq.items():
341            locs = map(lambda x: "%s:%s:%d" % x, locs)
342            fp.write('#: %s\n' % (string.join(locs, ',')))
343            fp.write('msgid=%s\n\n' % repr(s))
344
345        log("Loading strings/locations into database")
346        locations = []
347        for (s, locs) in uniq.items():
348            s_id = self.findString(s)
349            for (fname, ofs, l) in locs:
350                if type(ofs) == type(""): # ie, its HDF
351                    location = "hdf:%s" % ofs
352                else:
353                    location = "ofs:%d:%d" % (ofs, l)
354                loc_r = TransLoc(s_id, fname, location)
355                locations.append(loc_r)
356        return locations
357
358    def stringsHDF(self, prefix, locations, lang='en', exist=0, tiered=0):
359        hdf = neo_util.HDF()
360        if exist and lang == 'en': return hdf
361        done = {}
362        locations.sort()
363        maps = self.tdb.maps.fetchRows( ('lang', lang) )
364        maps_d = {}
365        for map in maps:
366            maps_d[int(map.string_id)] = map
367        strings = self.tdb.strings.fetchRows()
368        strings_d = {}
369        for string in strings:
370            strings_d[int(string.string_id)] = string
371        count = 0
372        for loc in locations:
373            s_id = int(loc.string_id)
374            if done.has_key(s_id): continue
375            try:
376                s_row = maps_d[s_id]
377                if exist: continue
378            except KeyError:
379                try:
380                    s_row = strings_d[s_id]
381                except KeyError:
382                    log("Missing string_id %d, skipping" % s_id)
383                    continue
384            count = count + 1
385            if tiered:
386                hdf.setValue("%s.%d.%d.%s" % (prefix, int(s_id) / TIER1_DIV, int(s_id) / TIER2_DIV, s_id), s_row.string)
387            else:
388                hdf.setValue("%s.%s" % (prefix, s_id), s_row.string)
389            done[s_id] = 1
390        if exist == 1: log("Missing %d strings for lang %s" % (count, lang))
391        return hdf
392
393    def dumpStrings(self, locations, lang=None):
394        log("Dumping strings to HDF")
395        if lang is None:
396            langs = ['en']
397            sql = "select lang from nt_trans_maps group by lang"
398            cursor = self.tdb.defaultCursor()
399            cursor.execute(sql)
400            rows = cursor.fetchall()
401            for row in rows:
402                langs.append(row[0])
403        else:
404            langs = [lang]
405
406        for a_lang in langs:
407            hdf = self.stringsHDF('S', locations, a_lang)
408            hdf.writeFile("strings_%s.hdf" % a_lang)
409
410        for a_lang in langs:
411            hdf = self.stringsHDF('S', locations, a_lang, exist=1)
412            if hdf.child():
413                hdf.writeFile("strings_missing_%s.hdf" % a_lang)
414
415    def fetchString(self, s_id, lang):
416        if lang == "hdf":
417            return "<?cs var:Lang.Extracted.%d.%d.%s ?>" % (int(s_id) / TIER1_DIV, int(s_id) / TIER2_DIV, s_id)
418        rows = self.tdb.maps.fetchRows( [('string_id', s_id), ('lang', lang)] )
419        if len(rows) == 0:
420            try:
421                row = self.tdb.strings.fetchRow( ('string_id', s_id) )
422            except odb.eNoMatchingRows:
423                log("Unable to find string id %s" % s_id)
424                raise eNoString
425            if lang != 'en':
426                log("Untranslated string for id %s" % s_id)
427            return row.string
428        else:
429            return rows[0].string
430
431    def dumpFiles(self, locations, lang):
432        log("Dumping files for %s" % lang)
433        files = {}
434        for row in locations:
435            try:
436                files[row.filename].append(row)
437            except KeyError:
438                files[row.filename] = [row]
439
440        hdf_map = []
441
442        os.system("rm -rf %s/gen/tmpl" % (self.root))
443        for file in files.keys():
444            fname = "%s/gen/%s" % (self.root, file)
445            try:
446                os.makedirs(os.path.dirname(fname))
447            except OSError, reason:
448                if reason[0] != 17:
449                    raise
450            do_hdf = 0
451            x = string.rfind(file, '.')
452            if x != -1 and file[x:] == '.hdf':
453                do_hdf = 1
454            ofs = []
455            for loc in files[file]:
456                parts = string.split(loc.location, ':')
457                if len(parts) == 3 and parts[0] == 'ofs' and do_hdf == 0:
458                    ofs.append((int(parts[1]), int(parts[2]), loc.string_id))
459                elif len(parts) == 2 and parts[0] == 'hdf' and do_hdf == 1:
460                    hdf_map.append((parts[1], loc.string_id))
461                else:
462                    log("Invalid location for loc_id %s" % loc.loc_id)
463                    continue
464            if not do_hdf:
465                ofs.sort()
466                data = open(self.root + '/' + file).read()
467                # ok, now we split up the original data into sections
468                x = 0
469                n = len(data)
470                out = []
471                #sys.stderr.write("%s\n" % repr(ofs))
472                while len(ofs):
473                    if ofs[0][0] > x:
474                        out.append(data[x:ofs[0][0]])
475                        x = ofs[0][0]
476                    elif ofs[0][0] == x:
477                        out.append(self.fetchString(ofs[0][2], lang))
478                        x = ofs[0][0] + ofs[0][1]
479                        ofs = ofs[1:]
480                    else:
481                        log("How did we get here? %s x=%d ofs=%d sid=%d" % (file, x, ofs[0][0], ofs[0][2]))
482                        log("Data[x:20]: %s" % data[x:20])
483                        log("Data[ofs:20]: %s" % data[ofs[0][0]:20])
484                        break
485                if n > x:
486                    out.append(data[x:])
487                odata = string.join(out, '')
488                open(fname, 'w').write(odata)
489
490        if lang == "hdf":
491            langs = self.languages
492        else:
493            langs = [lang]
494
495        for d_lang in langs:
496          # dumping the extracted strings
497          hdf = self.stringsHDF('Lang.Extracted', locations, d_lang, tiered=1)
498          fname = "%s/gen/tmpl/lang_%s.hdf" % (self.root, d_lang)
499          hdf.writeFile(fname)
500          data = open(fname).read()
501          fp = open(fname, 'w')
502          fp.write('## AUTOMATICALLY GENERATED -- DO NOT EDIT\n\n')
503          fp.write(data)
504          fp.write('\n#include "lang_map.hdf"\n')
505
506          # dumping the hdf strings file
507          if d_lang == "en":
508            map_file = "%s/gen/tmpl/lang_map.hdf" % (self.root)
509          else:
510            map_file = "%s/gen/tmpl/%s/lang_map.hdf" % (self.root, d_lang)
511          try:
512              os.makedirs(os.path.dirname(map_file))
513          except OSError, reason:
514              if reason[0] != 17: raise
515          map_hdf = neo_util.HDF()
516          for (name, s_id) in hdf_map:
517              str = hdf.getValue('Lang.Extracted.%d.%d.%s' % (int(s_id) / TIER1_DIV, int(s_id) / TIER2_DIV, s_id), '')
518              map_hdf.setValue(name, str)
519          map_hdf.writeFile(map_file)
520
521    def loadMap(self, file, prefix, lang):
522        log("Loading map for language %s" % lang)
523        hdf = neo_util.HDF()
524        hdf.readFile(file)
525        obj = hdf.getChild(prefix)
526        updates = 0
527        new_r = 0
528        while obj is not None:
529            s_id = obj.name()
530            str = obj.value()
531
532            try:
533                map_r = self.tdb.maps.fetchRow( [('string_id', s_id), ('lang', lang)])
534            except odb.eNoMatchingRows:
535                map_r = self.tdb.maps.newRow()
536                map_r.string_id = s_id
537                map_r.lang = lang
538                new_r = new_r + 1
539
540            if map_r.string != str:
541                updates = updates + 1
542                map_r.string = str
543                map_r.save()
544
545            obj = obj.next()
546        log("New maps: %d  Updates: %d" % (new_r, updates - new_r))
547
548
549def main(argv):
550  alist, args = getopt.getopt(argv[1:], "f:v:", ["help", "load=", "lang="])
551
552  one_file = None
553  verbose = 0
554  load_file = None
555  lang = 'en'
556  for (field, val) in alist:
557    if field == "--help":
558      usage(argv[0])
559      return -1
560    if field == "-f":
561      one_file = val
562    if field == "-v":
563      verbose = int(val)
564    if field == "--load":
565        load_file = val
566    if field == "--lang":
567        lang = val
568
569
570  global DONE
571
572  #signal.signal(signal.SIGTERM, handleSignal)
573  #signal.signal(signal.SIGINT, handleSignal)
574
575  log("trans: start")
576
577  start_time = time.time()
578
579  try:
580    t = Translator()
581    if load_file:
582        t.loadMap(load_file, 'S', lang)
583    else:
584        locations = t.loadStrings(one_file, verbose=verbose)
585        t.dumpStrings(locations)
586        t.dumpFiles(locations, 'hdf')
587  except KeyboardInterrupt:
588    pass
589  except:
590    import handle_error
591    handle_error.handleException("Translation Error")
592
593if __name__ == "__main__":
594  main(sys.argv)
595