1# Copyright (C) 2006, 2009, 2010, 2011 Canonical Ltd
2# -*- coding: utf-8 -*-
3#
4# This program is free software; you can redistribute it and/or modify
5# it under the terms of the GNU General Public License as published by
6# the Free Software Foundation; either version 2 of the License, or
7# (at your option) any later version.
8#
9# This program is distributed in the hope that it will be useful,
10# but WITHOUT ANY WARRANTY; without even the implied warranty of
11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12# GNU General Public License for more details.
13#
14# You should have received a copy of the GNU General Public License
15# along with this program; if not, write to the Free Software
16# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
18"""Adapter for running test cases against multiple encodings."""
19
20# prefix for micro (1/1000000)
21_mu = u'\xb5'
22
23# greek letter omega, not to be confused with
24# the Ohm sign, u'\u2126'. Though they are probably identical
25# cp437 can handle the first, but not the second
26_omega = u'\u03a9'
27
28# smallest error possible, epsilon
29# cp437 handles u03b5, but not u2208 the 'element of' operator
30_epsilon = u'\u03b5'
31
32# Swedish?
33_erik = u'Erik B\xe5gfors'
34
35# Swedish 'räksmörgås' means shrimp sandwich
36_shrimp_sandwich = u'r\xe4ksm\xf6rg\xe5s'
37
38# Arabic, probably only Unicode encodings can handle this one
39_juju = u'\u062c\u0648\u062c\u0648'
40
41# iso-8859-1 alternative for juju
42_juju_alt = u'j\xfbj\xfa'
43
44# Russian, 'Alexander' in russian
45_alexander = u'\u0410\u043b\u0435\u043a\u0441\u0430\u043d\u0434\u0440'
46# The word 'test' in Russian
47_russian_test = u'\u0422\u0435\u0441\u0442'
48
49# Kanji
50# It is a kanji sequence for nihonjin, or Japanese in English.
51#
52# '\u4eba' being person, 'u\65e5' sun and '\u672c' origin. Ie,
53# sun-origin-person, 'native from the land where the sun rises'. Note, I'm
54# not a fluent speaker, so this is just my crude breakdown.
55#
56# Wouter van Heyst
57_nihonjin = u'\u65e5\u672c\u4eba'
58
59# Czech
60# It's what is usually used for showing how fonts look, because it contains
61# most accented characters, ie. in places where Englishman use 'Quick brown fox
62# jumped over a lazy dog'. The literal translation of the Czech version would
63# be something like 'Yellow horse groaned devilish codes'. Actually originally
64# the last word used to be 'ódy' (odes). The 'k' was added as a pun when using
65# the sentece to check whether one has properly set encoding.
66_yellow_horse = (u'\u017dlu\u0165ou\u010dk\xfd k\u016f\u0148'
67                 u' \xfap\u011bl \u010f\xe1belsk\xe9 k\xf3dy')
68_yellow = u'\u017dlu\u0165ou\u010dk\xfd'
69_someone = u'Some\u016f\u0148\u011b'
70_something = u'\u0165ou\u010dk\xfd'
71
72# Hebrew
73# Shalom -> 'hello' or 'peace', used as a common greeting
74_shalom = u'\u05e9\u05dc\u05d5\u05dd'
75
76
77encoding_scenarios = [
78    # Permutation 1 of utf-8
79    ('utf-8,1', {
80        'info': {
81            'committer': _erik,
82            'message': _yellow_horse,
83            'filename': _shrimp_sandwich,
84            'directory': _nihonjin,
85            },
86        'encoding': 'utf-8',
87        }),
88    # Permutation 2 of utf-8
89    ('utf-8,2', {
90        'info': {
91            'committer': _alexander,
92            'message': u'Testing ' + _mu,
93            'filename': _shalom,
94            'directory': _juju,
95            },
96        'encoding': 'utf-8',
97        }),
98    ('iso-8859-1', {
99        'info': {
100            'committer': _erik,
101            'message': u'Testing ' + _mu,
102            'filename': _juju_alt,
103            'directory': _shrimp_sandwich,
104            },
105        'encoding': 'iso-8859-1',
106        }),
107    ('iso-8859-2', {
108        'info': {
109            'committer': _someone,
110            'message': _yellow_horse,
111            'filename': _yellow,
112            'directory': _something,
113            },
114        'encoding': 'iso-8859-2',
115        }),
116    ('cp1251', {
117        'info': {
118            'committer': _alexander,
119            'message': u'Testing ' + _mu,
120            'filename': _russian_test,
121            'directory': _russian_test + 'dir',
122            },
123        'encoding': 'cp1251',
124        }),
125    # The iso-8859-1 tests run on a default windows cp437 installation
126    # and it takes a long time to run an extra permutation of the tests
127    # But just in case we want to add this back in:
128    #        ('cp437', {'committer':_erik
129    #                  , 'message':u'Testing ' + _mu
130    #                  , 'filename':'file_' + _omega
131    #                  , 'directory':_epsilon + '_dir',
132    #            'encoding': 'cp437'}),
133    ]
134