1from test.test_support import run_unittest, verbose
2import unittest
3import locale
4import sys
5import codecs
6
7
8enUS_locale = None
9
10def get_enUS_locale():
11    global enUS_locale
12    if sys.platform == 'darwin':
13        import os
14        tlocs = ("en_US.UTF-8", "en_US.ISO8859-1", "en_US")
15        if int(os.uname()[2].split('.')[0]) < 10:
16            # The locale test work fine on OSX 10.6, I (ronaldoussoren)
17            # haven't had time yet to verify if tests work on OSX 10.5
18            # (10.4 is known to be bad)
19            raise unittest.SkipTest("Locale support on MacOSX is minimal")
20    if sys.platform.startswith("win"):
21        tlocs = ("En", "English")
22    else:
23        tlocs = ("en_US.UTF-8", "en_US.US-ASCII", "en_US")
24    oldlocale = locale.setlocale(locale.LC_NUMERIC)
25    for tloc in tlocs:
26        try:
27            locale.setlocale(locale.LC_NUMERIC, tloc)
28        except locale.Error:
29            continue
30        break
31    else:
32        raise unittest.SkipTest(
33            "Test locale not supported (tried %s)" % (', '.join(tlocs)))
34    enUS_locale = tloc
35    locale.setlocale(locale.LC_NUMERIC, oldlocale)
36
37
38class BaseLocalizedTest(unittest.TestCase):
39    #
40    # Base class for tests using a real locale
41    #
42
43    def setUp(self):
44        self.oldlocale = locale.setlocale(self.locale_type)
45        locale.setlocale(self.locale_type, enUS_locale)
46        if verbose:
47            print "testing with \"%s\"..." % enUS_locale,
48
49    def tearDown(self):
50        locale.setlocale(self.locale_type, self.oldlocale)
51
52
53class BaseCookedTest(unittest.TestCase):
54    #
55    # Base class for tests using cooked localeconv() values
56    #
57
58    def setUp(self):
59        locale._override_localeconv = self.cooked_values
60
61    def tearDown(self):
62        locale._override_localeconv = {}
63
64
65class CCookedTest(BaseCookedTest):
66    # A cooked "C" locale
67
68    cooked_values = {
69        'currency_symbol': '',
70        'decimal_point': '.',
71        'frac_digits': 127,
72        'grouping': [],
73        'int_curr_symbol': '',
74        'int_frac_digits': 127,
75        'mon_decimal_point': '',
76        'mon_grouping': [],
77        'mon_thousands_sep': '',
78        'n_cs_precedes': 127,
79        'n_sep_by_space': 127,
80        'n_sign_posn': 127,
81        'negative_sign': '',
82        'p_cs_precedes': 127,
83        'p_sep_by_space': 127,
84        'p_sign_posn': 127,
85        'positive_sign': '',
86        'thousands_sep': ''
87    }
88
89class EnUSCookedTest(BaseCookedTest):
90    # A cooked "en_US" locale
91
92    cooked_values = {
93        'currency_symbol': '$',
94        'decimal_point': '.',
95        'frac_digits': 2,
96        'grouping': [3, 3, 0],
97        'int_curr_symbol': 'USD ',
98        'int_frac_digits': 2,
99        'mon_decimal_point': '.',
100        'mon_grouping': [3, 3, 0],
101        'mon_thousands_sep': ',',
102        'n_cs_precedes': 1,
103        'n_sep_by_space': 0,
104        'n_sign_posn': 1,
105        'negative_sign': '-',
106        'p_cs_precedes': 1,
107        'p_sep_by_space': 0,
108        'p_sign_posn': 1,
109        'positive_sign': '',
110        'thousands_sep': ','
111    }
112
113
114class FrFRCookedTest(BaseCookedTest):
115    # A cooked "fr_FR" locale with a space character as decimal separator
116    # and a non-ASCII currency symbol.
117
118    cooked_values = {
119        'currency_symbol': '\xe2\x82\xac',
120        'decimal_point': ',',
121        'frac_digits': 2,
122        'grouping': [3, 3, 0],
123        'int_curr_symbol': 'EUR ',
124        'int_frac_digits': 2,
125        'mon_decimal_point': ',',
126        'mon_grouping': [3, 3, 0],
127        'mon_thousands_sep': ' ',
128        'n_cs_precedes': 0,
129        'n_sep_by_space': 1,
130        'n_sign_posn': 1,
131        'negative_sign': '-',
132        'p_cs_precedes': 0,
133        'p_sep_by_space': 1,
134        'p_sign_posn': 1,
135        'positive_sign': '',
136        'thousands_sep': ' '
137    }
138
139
140class BaseFormattingTest(object):
141    #
142    # Utility functions for formatting tests
143    #
144
145    def _test_formatfunc(self, format, value, out, func, **format_opts):
146        self.assertEqual(
147            func(format, value, **format_opts), out)
148
149    def _test_format(self, format, value, out, **format_opts):
150        self._test_formatfunc(format, value, out,
151            func=locale.format, **format_opts)
152
153    def _test_format_string(self, format, value, out, **format_opts):
154        self._test_formatfunc(format, value, out,
155            func=locale.format_string, **format_opts)
156
157    def _test_currency(self, value, out, **format_opts):
158        self.assertEqual(locale.currency(value, **format_opts), out)
159
160
161class EnUSNumberFormatting(BaseFormattingTest):
162    # XXX there is a grouping + padding bug when the thousands separator
163    # is empty but the grouping array contains values (e.g. Solaris 10)
164
165    def setUp(self):
166        self.sep = locale.localeconv()['thousands_sep']
167
168    def test_grouping(self):
169        self._test_format("%f", 1024, grouping=1, out='1%s024.000000' % self.sep)
170        self._test_format("%f", 102, grouping=1, out='102.000000')
171        self._test_format("%f", -42, grouping=1, out='-42.000000')
172        self._test_format("%+f", -42, grouping=1, out='-42.000000')
173
174    def test_grouping_and_padding(self):
175        self._test_format("%20.f", -42, grouping=1, out='-42'.rjust(20))
176        if self.sep:
177            self._test_format("%+10.f", -4200, grouping=1,
178                out=('-4%s200' % self.sep).rjust(10))
179            self._test_format("%-10.f", -4200, grouping=1,
180                out=('-4%s200' % self.sep).ljust(10))
181
182    def test_integer_grouping(self):
183        self._test_format("%d", 4200, grouping=True, out='4%s200' % self.sep)
184        self._test_format("%+d", 4200, grouping=True, out='+4%s200' % self.sep)
185        self._test_format("%+d", -4200, grouping=True, out='-4%s200' % self.sep)
186
187    def test_integer_grouping_and_padding(self):
188        self._test_format("%10d", 4200, grouping=True,
189            out=('4%s200' % self.sep).rjust(10))
190        self._test_format("%-10d", -4200, grouping=True,
191            out=('-4%s200' % self.sep).ljust(10))
192
193    def test_simple(self):
194        self._test_format("%f", 1024, grouping=0, out='1024.000000')
195        self._test_format("%f", 102, grouping=0, out='102.000000')
196        self._test_format("%f", -42, grouping=0, out='-42.000000')
197        self._test_format("%+f", -42, grouping=0, out='-42.000000')
198
199    def test_padding(self):
200        self._test_format("%20.f", -42, grouping=0, out='-42'.rjust(20))
201        self._test_format("%+10.f", -4200, grouping=0, out='-4200'.rjust(10))
202        self._test_format("%-10.f", 4200, grouping=0, out='4200'.ljust(10))
203
204    def test_complex_formatting(self):
205        # Spaces in formatting string
206        self._test_format_string("One million is %i", 1000000, grouping=1,
207            out='One million is 1%s000%s000' % (self.sep, self.sep))
208        self._test_format_string("One  million is %i", 1000000, grouping=1,
209            out='One  million is 1%s000%s000' % (self.sep, self.sep))
210        # Dots in formatting string
211        self._test_format_string(".%f.", 1000.0, out='.1000.000000.')
212        # Padding
213        if self.sep:
214            self._test_format_string("-->  %10.2f", 4200, grouping=1,
215                out='-->  ' + ('4%s200.00' % self.sep).rjust(10))
216        # Asterisk formats
217        self._test_format_string("%10.*f", (2, 1000), grouping=0,
218            out='1000.00'.rjust(10))
219        if self.sep:
220            self._test_format_string("%*.*f", (10, 2, 1000), grouping=1,
221                out=('1%s000.00' % self.sep).rjust(10))
222        # Test more-in-one
223        if self.sep:
224            self._test_format_string("int %i float %.2f str %s",
225                (1000, 1000.0, 'str'), grouping=1,
226                out='int 1%s000 float 1%s000.00 str str' %
227                (self.sep, self.sep))
228
229
230class TestFormatPatternArg(unittest.TestCase):
231    # Test handling of pattern argument of format
232
233    def test_onlyOnePattern(self):
234        # Issue 2522: accept exactly one % pattern, and no extra chars.
235        self.assertRaises(ValueError, locale.format, "%f\n", 'foo')
236        self.assertRaises(ValueError, locale.format, "%f\r", 'foo')
237        self.assertRaises(ValueError, locale.format, "%f\r\n", 'foo')
238        self.assertRaises(ValueError, locale.format, " %f", 'foo')
239        self.assertRaises(ValueError, locale.format, "%fg", 'foo')
240        self.assertRaises(ValueError, locale.format, "%^g", 'foo')
241        self.assertRaises(ValueError, locale.format, "%f%%", 'foo')
242
243
244class TestLocaleFormatString(unittest.TestCase):
245    """General tests on locale.format_string"""
246
247    def test_percent_escape(self):
248        self.assertEqual(locale.format_string('%f%%', 1.0), '%f%%' % 1.0)
249        self.assertEqual(locale.format_string('%d %f%%d', (1, 1.0)),
250            '%d %f%%d' % (1, 1.0))
251        self.assertEqual(locale.format_string('%(foo)s %%d', {'foo': 'bar'}),
252            ('%(foo)s %%d' % {'foo': 'bar'}))
253
254    def test_mapping(self):
255        self.assertEqual(locale.format_string('%(foo)s bing.', {'foo': 'bar'}),
256            ('%(foo)s bing.' % {'foo': 'bar'}))
257        self.assertEqual(locale.format_string('%(foo)s', {'foo': 'bar'}),
258            ('%(foo)s' % {'foo': 'bar'}))
259
260
261
262class TestNumberFormatting(BaseLocalizedTest, EnUSNumberFormatting):
263    # Test number formatting with a real English locale.
264
265    locale_type = locale.LC_NUMERIC
266
267    def setUp(self):
268        BaseLocalizedTest.setUp(self)
269        EnUSNumberFormatting.setUp(self)
270
271
272class TestEnUSNumberFormatting(EnUSCookedTest, EnUSNumberFormatting):
273    # Test number formatting with a cooked "en_US" locale.
274
275    def setUp(self):
276        EnUSCookedTest.setUp(self)
277        EnUSNumberFormatting.setUp(self)
278
279    def test_currency(self):
280        self._test_currency(50000, "$50000.00")
281        self._test_currency(50000, "$50,000.00", grouping=True)
282        self._test_currency(50000, "USD 50,000.00",
283            grouping=True, international=True)
284
285
286class TestCNumberFormatting(CCookedTest, BaseFormattingTest):
287    # Test number formatting with a cooked "C" locale.
288
289    def test_grouping(self):
290        self._test_format("%.2f", 12345.67, grouping=True, out='12345.67')
291
292    def test_grouping_and_padding(self):
293        self._test_format("%9.2f", 12345.67, grouping=True, out=' 12345.67')
294
295
296class TestFrFRNumberFormatting(FrFRCookedTest, BaseFormattingTest):
297    # Test number formatting with a cooked "fr_FR" locale.
298
299    def test_decimal_point(self):
300        self._test_format("%.2f", 12345.67, out='12345,67')
301
302    def test_grouping(self):
303        self._test_format("%.2f", 345.67, grouping=True, out='345,67')
304        self._test_format("%.2f", 12345.67, grouping=True, out='12 345,67')
305
306    def test_grouping_and_padding(self):
307        self._test_format("%6.2f", 345.67, grouping=True, out='345,67')
308        self._test_format("%7.2f", 345.67, grouping=True, out=' 345,67')
309        self._test_format("%8.2f", 12345.67, grouping=True, out='12 345,67')
310        self._test_format("%9.2f", 12345.67, grouping=True, out='12 345,67')
311        self._test_format("%10.2f", 12345.67, grouping=True, out=' 12 345,67')
312        self._test_format("%-6.2f", 345.67, grouping=True, out='345,67')
313        self._test_format("%-7.2f", 345.67, grouping=True, out='345,67 ')
314        self._test_format("%-8.2f", 12345.67, grouping=True, out='12 345,67')
315        self._test_format("%-9.2f", 12345.67, grouping=True, out='12 345,67')
316        self._test_format("%-10.2f", 12345.67, grouping=True, out='12 345,67 ')
317
318    def test_integer_grouping(self):
319        self._test_format("%d", 200, grouping=True, out='200')
320        self._test_format("%d", 4200, grouping=True, out='4 200')
321
322    def test_integer_grouping_and_padding(self):
323        self._test_format("%4d", 4200, grouping=True, out='4 200')
324        self._test_format("%5d", 4200, grouping=True, out='4 200')
325        self._test_format("%10d", 4200, grouping=True, out='4 200'.rjust(10))
326        self._test_format("%-4d", 4200, grouping=True, out='4 200')
327        self._test_format("%-5d", 4200, grouping=True, out='4 200')
328        self._test_format("%-10d", 4200, grouping=True, out='4 200'.ljust(10))
329
330    def test_currency(self):
331        euro = u'\u20ac'.encode('utf-8')
332        self._test_currency(50000, "50000,00 " + euro)
333        self._test_currency(50000, "50 000,00 " + euro, grouping=True)
334        # XXX is the trailing space a bug?
335        self._test_currency(50000, "50 000,00 EUR ",
336            grouping=True, international=True)
337
338
339class TestStringMethods(BaseLocalizedTest):
340    locale_type = locale.LC_CTYPE
341
342    if sys.platform != 'sunos5' and not sys.platform.startswith("win"):
343        # Test BSD Rune locale's bug for isctype functions.
344
345        def test_isspace(self):
346            self.assertEqual('\x20'.isspace(), True)
347            self.assertEqual('\xa0'.isspace(), False)
348            self.assertEqual('\xa1'.isspace(), False)
349
350        def test_isalpha(self):
351            self.assertEqual('\xc0'.isalpha(), False)
352
353        def test_isalnum(self):
354            self.assertEqual('\xc0'.isalnum(), False)
355
356        def test_isupper(self):
357            self.assertEqual('\xc0'.isupper(), False)
358
359        def test_islower(self):
360            self.assertEqual('\xc0'.islower(), False)
361
362        def test_lower(self):
363            self.assertEqual('\xcc\x85'.lower(), '\xcc\x85')
364
365        def test_upper(self):
366            self.assertEqual('\xed\x95\xa0'.upper(), '\xed\x95\xa0')
367
368        def test_strip(self):
369            self.assertEqual('\xed\x95\xa0'.strip(), '\xed\x95\xa0')
370
371        def test_split(self):
372            self.assertEqual('\xec\xa0\xbc'.split(), ['\xec\xa0\xbc'])
373
374
375class NormalizeTest(unittest.TestCase):
376    def check(self, localename, expected):
377        self.assertEqual(locale.normalize(localename), expected, msg=localename)
378
379    def test_locale_alias(self):
380        for localename, alias in locale.locale_alias.items():
381            self.check(localename, alias)
382
383    def test_empty(self):
384        self.check('', '')
385
386    def test_c(self):
387        self.check('c', 'C')
388        self.check('posix', 'C')
389
390    def test_english(self):
391        self.check('en', 'en_US.ISO8859-1')
392        self.check('EN', 'en_US.ISO8859-1')
393        self.check('en_US', 'en_US.ISO8859-1')
394        self.check('en_us', 'en_US.ISO8859-1')
395        self.check('en_GB', 'en_GB.ISO8859-1')
396        self.check('en_US.UTF-8', 'en_US.UTF-8')
397        self.check('en_US.utf8', 'en_US.UTF-8')
398        self.check('en_US:UTF-8', 'en_US.UTF-8')
399        self.check('en_US.ISO8859-1', 'en_US.ISO8859-1')
400        self.check('en_US.US-ASCII', 'en_US.ISO8859-1')
401        self.check('english', 'en_EN.ISO8859-1')
402
403    def test_hyphenated_encoding(self):
404        self.check('az_AZ.iso88599e', 'az_AZ.ISO8859-9E')
405        self.check('az_AZ.ISO8859-9E', 'az_AZ.ISO8859-9E')
406        self.check('tt_RU.koi8c', 'tt_RU.KOI8-C')
407        self.check('tt_RU.KOI8-C', 'tt_RU.KOI8-C')
408        self.check('lo_LA.cp1133', 'lo_LA.IBM-CP1133')
409        self.check('lo_LA.ibmcp1133', 'lo_LA.IBM-CP1133')
410        self.check('lo_LA.IBM-CP1133', 'lo_LA.IBM-CP1133')
411        self.check('uk_ua.microsoftcp1251', 'uk_UA.CP1251')
412        self.check('uk_ua.microsoft-cp1251', 'uk_UA.CP1251')
413        self.check('ka_ge.georgianacademy', 'ka_GE.GEORGIAN-ACADEMY')
414        self.check('ka_GE.GEORGIAN-ACADEMY', 'ka_GE.GEORGIAN-ACADEMY')
415        self.check('cs_CZ.iso88592', 'cs_CZ.ISO8859-2')
416        self.check('cs_CZ.ISO8859-2', 'cs_CZ.ISO8859-2')
417
418    def test_euro_modifier(self):
419        self.check('de_DE@euro', 'de_DE.ISO8859-15')
420        self.check('en_US.ISO8859-15@euro', 'en_US.ISO8859-15')
421
422    def test_latin_modifier(self):
423        self.check('be_BY.UTF-8@latin', 'be_BY.UTF-8@latin')
424        self.check('sr_RS.UTF-8@latin', 'sr_RS.UTF-8@latin')
425
426    def test_valencia_modifier(self):
427        self.check('ca_ES.UTF-8@valencia', 'ca_ES.UTF-8@valencia')
428        self.check('ca_ES@valencia', 'ca_ES.UTF-8@valencia')
429        self.check('ca@valencia', 'ca_ES.ISO8859-1@valencia')
430
431    def test_devanagari_modifier(self):
432        self.check('ks_IN.UTF-8@devanagari', 'ks_IN.UTF-8@devanagari')
433        self.check('ks_IN@devanagari', 'ks_IN.UTF-8@devanagari')
434        self.check('ks@devanagari', 'ks_IN.UTF-8@devanagari')
435        self.check('ks_IN.UTF-8', 'ks_IN.UTF-8')
436        self.check('ks_IN', 'ks_IN.UTF-8')
437        self.check('ks', 'ks_IN.UTF-8')
438        self.check('sd_IN.UTF-8@devanagari', 'sd_IN.UTF-8@devanagari')
439        self.check('sd_IN@devanagari', 'sd_IN.UTF-8@devanagari')
440        self.check('sd@devanagari', 'sd_IN.UTF-8@devanagari')
441        self.check('sd_IN.UTF-8', 'sd_IN.UTF-8')
442        self.check('sd_IN', 'sd_IN.UTF-8')
443        self.check('sd', 'sd_IN.UTF-8')
444
445
446class TestMiscellaneous(unittest.TestCase):
447    def test_getpreferredencoding(self):
448        # Invoke getpreferredencoding to make sure it does not cause exceptions.
449        enc = locale.getpreferredencoding()
450        if enc:
451            # If encoding non-empty, make sure it is valid
452            codecs.lookup(enc)
453
454    if hasattr(locale, "strcoll"):
455        def test_strcoll_3303(self):
456            # test crasher from bug #3303
457            self.assertRaises(TypeError, locale.strcoll, u"a", None)
458
459    def test_setlocale_category(self):
460        locale.setlocale(locale.LC_ALL)
461        locale.setlocale(locale.LC_TIME)
462        locale.setlocale(locale.LC_CTYPE)
463        locale.setlocale(locale.LC_COLLATE)
464        locale.setlocale(locale.LC_MONETARY)
465        locale.setlocale(locale.LC_NUMERIC)
466
467        # crasher from bug #7419
468        self.assertRaises(locale.Error, locale.setlocale, 12345)
469
470    def test_getsetlocale_issue1813(self):
471        # Issue #1813: setting and getting the locale under a Turkish locale
472        oldlocale = locale.getlocale()
473        self.addCleanup(locale.setlocale, locale.LC_CTYPE, oldlocale)
474        for loc in ('tr_TR', 'tr_TR.UTF-8', 'tr_TR.ISO8859-9'):
475            try:
476                locale.setlocale(locale.LC_CTYPE, loc)
477                break
478            except locale.Error:
479                continue
480        else:
481            # Unsupported locale on this system
482            self.skipTest('test needs Turkish locale')
483        loc = locale.getlocale()
484        try:
485            locale.setlocale(locale.LC_CTYPE, loc)
486        except Exception as e:
487            self.fail("Failed to set locale %r (default locale is %r): %r" %
488                      (loc, oldlocale, e))
489        self.assertEqual(loc, locale.getlocale())
490
491    def test_normalize_issue12752(self):
492        # Issue #1813 caused a regression where locale.normalize() would no
493        # longer accept unicode strings.
494        self.assertEqual(locale.normalize(u'en_US'), 'en_US.ISO8859-1')
495
496    def test_setlocale_unicode(self):
497        oldlocale = locale.getlocale()
498        self.addCleanup(locale.setlocale, locale.LC_CTYPE, oldlocale)
499
500        user_locale = locale.setlocale(locale.LC_CTYPE, '')
501        unicode_locale = user_locale.decode('utf-8')
502
503        user_locale2 = locale.setlocale(locale.LC_CTYPE, unicode_locale)
504        self.assertEqual(user_locale, user_locale2)
505
506
507def test_main():
508    tests = [
509        TestMiscellaneous,
510        TestFormatPatternArg,
511        TestLocaleFormatString,
512        TestEnUSNumberFormatting,
513        TestCNumberFormatting,
514        TestFrFRNumberFormatting,
515    ]
516    # SkipTest can't be raised inside unittests, handle it manually instead
517    try:
518        get_enUS_locale()
519    except unittest.SkipTest as e:
520        if verbose:
521            print "Some tests will be disabled: %s" % e
522    else:
523        tests += [TestNumberFormatting, TestStringMethods]
524    run_unittest(*tests)
525
526if __name__ == '__main__':
527    test_main()
528