1#!/usr/bin/env python3
2import polib
3import os
4import re
5import sys
6types = ["d", "i", "u", "f", "e", "g", "x", "s", "c", "p"]
7
8
9def findall(p, s):
10    i = s.find(p)
11    while i != -1:
12        yield i
13        i = s.find(p, i + 1)
14
15
16def is_formatted_entry(msg):
17    if "%" not in msg:
18        return False
19    indices = list(findall("%", msg))
20    index_iter = iter(indices)
21    for i in index_iter:
22        if i + 1 == len(msg):
23            return False
24        if msg[i + 1] == "%":  # skip %%
25            next(index_iter, None)
26            continue
27        if not msg[i + 1].isnumeric() and \
28           msg[i + 1] not in ["d", "s", "c", "l", "f", "."]:
29            return False
30        if msg[i + 1] == "." and \
31           (i + 2 == len(msg) or not msg[i + 2].isnumeric()):
32            return False
33    return True
34
35
36def decompose_format_strings(msg):
37    indices = list(findall("%", msg))
38    non_positional = []
39    positional = []
40    segments = []
41    index_iter = iter(indices)
42    for i in index_iter:
43        if i + 1 == len(msg):
44            break
45        if msg[i + 1] == "%":  # skip %%
46            next(index_iter, None)
47            continue
48        if msg[i + 1].isnumeric() and msg[i + 2] == "$":
49            # positional format strings %1$s, %2$d, etc.
50            positional.append(msg[i:i + 4])
51            segments.append((i, i + 4))
52        else:
53            # non-positional format strings %s, %3d, etc.
54            idx = i + 1
55            while msg[idx] not in types and idx + 1 < len(msg):
56                idx += 1
57            if idx == len(msg):
58                break
59            # ignore the cases where "%d" is translated to "%2d"
60            non_positional.append(re.sub(r'[0-9]+', '', msg[i:idx + 1]))
61            segments.append((i, idx + 1))
62    return (non_positional, positional, segments)
63
64
65def get_type(arg):
66    answer = ""
67    for i in range(len(arg) - 1, -1, -1):
68        if arg[i].isnumeric() or arg[i] == '.' \
69           or arg[i] == '$' or arg[i] == '%':
70            break
71        answer = arg[i] + answer
72    return answer
73
74
75def check_message(entry):
76    msgid = entry.msgid
77    msgstr = entry.msgstr
78    if not is_formatted_entry(msgid) and not is_formatted_entry(msgstr):
79        return (True, [], [], "")
80    (non_pos_msgid, pos_msgid, seg_msgid) = decompose_format_strings(msgid)
81    if len(non_pos_msgid) > 0 and len(pos_msgid) > 0:
82        return (False, seg_msgid, [],
83                "Cannot mix positional and non-positional arguments"
84                " in format string")
85    (non_pos_msgstr, pos_msgstr, seg_msgstr) = decompose_format_strings(msgstr)
86    if len(non_pos_msgstr) > 0 and len(pos_msgstr) > 0:
87        return (False, [], seg_msgstr,
88                "Cannot mix positional and non-positional arguments"
89                " in format string")
90    if non_pos_msgid == non_pos_msgstr and \
91       sorted(pos_msgid) == sorted(pos_msgstr):
92        return (True, [], [], "")
93    # "%2$d %1$s" is considered equivalent to "%s %d"
94    # as the order of types is preserved
95    msgid_types = []
96    msgstr_types = []
97    for arg in non_pos_msgid + sorted(pos_msgid):
98        msgid_types.append(get_type(arg))
99    for arg in non_pos_msgstr + sorted(pos_msgstr):
100        msgstr_types.append(get_type(arg))
101    if len(msgid_types) != len(msgstr_types):
102        return (False, seg_msgid, seg_msgstr, "The number of arguments differ")
103    for i in range(0, len(msgid_types)):
104        if msgid_types[i] != msgstr_types[i]:
105            return (False, seg_msgid, seg_msgstr,
106                    "The types of arguments differ")
107    return (True, [], [], "")
108
109
110def check_po_file(file):
111    pofile = polib.pofile(file)
112    errors = []
113    for entry in pofile.translated_entries():
114        if entry.msgid_plural:
115            # TODO: implement proper check for plural messages
116            continue
117        (ok, seg_msgid, seg_msgstr, reason) = check_message(entry)
118        if not ok:
119            errors.append((entry.msgid, entry.msgstr,
120                           seg_msgid, seg_msgstr, reason))
121    return errors
122
123
124class bcolors:
125    HEADER = '\033[95m'
126    OKBLUE = '\033[94m'
127    OKCYAN = '\033[96m'
128    OKGREEN = '\033[92m'
129    WARNING = '\033[93m'
130    FAIL = '\033[91m'
131    ENDC = '\033[0m'
132    BOLD = '\033[1m'
133    UNDERLINE = '\033[4m'
134
135
136def print_message(msg, segments):
137    beg = [x for (x, y) in segments]
138    end = [y for (x, y) in segments]
139    for i in range(len(msg)):
140        if i in beg:
141            print(bcolors.FAIL, end="")
142            print(bcolors.UNDERLINE, end="")
143        elif i in end:
144            print(bcolors.ENDC, end="")
145        print(msg[i], end="")
146    print(bcolors.ENDC)
147
148
149po_files = []
150for file in sorted(os.listdir("lang/po")):
151    if file.endswith(".po") and not file.endswith("en.po"):
152        po_files.append(file)
153files_to_check = []
154if len(sys.argv) == 1:
155    files_to_check = po_files
156else:
157    for i in range(1, len(sys.argv)):
158        if sys.argv[i] + ".po" in po_files:
159            files_to_check.append(sys.argv[i] + ".po")
160        else:
161            print("Warning: Unknown language", sys.argv[i])
162num_errors = 0
163for file in sorted(files_to_check):
164    print("Checking", file, end="", flush=True)
165    errors = check_po_file("lang/po/" + file)
166    n = len(errors)
167    num_errors += n
168    if n > 0:
169        print(f" => {bcolors.FAIL}{n} error(s) detected:{bcolors.ENDC}")
170        for (msgid, msgstr, seg_msgid, seg_msgstr, reason) in errors:
171            print(f"{bcolors.BOLD}problem   :{bcolors.ENDC}", end="")
172            print(reason)
173            print(f"{bcolors.BOLD}original  :{bcolors.ENDC}", end="")
174            print_message(msgid.replace("\n\n", "\n"), seg_msgid)
175            print(f"{bcolors.BOLD}translated:{bcolors.ENDC}", end="")
176            print_message(msgstr.replace("\n\n", "\n"), seg_msgstr)
177            print()
178    else:
179        print(f" => {bcolors.OKGREEN}No error detected.{bcolors.ENDC}")
180    print()
181exit(num_errors)
182