1# Copyright (C) 2006, 2009, 2010, 2011 Canonical Ltd 2# -*- coding: utf-8 -*- 3# 4# This program is free software; you can redistribute it and/or modify 5# it under the terms of the GNU General Public License as published by 6# the Free Software Foundation; either version 2 of the License, or 7# (at your option) any later version. 8# 9# This program is distributed in the hope that it will be useful, 10# but WITHOUT ANY WARRANTY; without even the implied warranty of 11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12# GNU General Public License for more details. 13# 14# You should have received a copy of the GNU General Public License 15# along with this program; if not, write to the Free Software 16# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 17 18"""Adapter for running test cases against multiple encodings.""" 19 20# prefix for micro (1/1000000) 21_mu = u'\xb5' 22 23# greek letter omega, not to be confused with 24# the Ohm sign, u'\u2126'. Though they are probably identical 25# cp437 can handle the first, but not the second 26_omega = u'\u03a9' 27 28# smallest error possible, epsilon 29# cp437 handles u03b5, but not u2208 the 'element of' operator 30_epsilon = u'\u03b5' 31 32# Swedish? 33_erik = u'Erik B\xe5gfors' 34 35# Swedish 'räksmörgås' means shrimp sandwich 36_shrimp_sandwich = u'r\xe4ksm\xf6rg\xe5s' 37 38# Arabic, probably only Unicode encodings can handle this one 39_juju = u'\u062c\u0648\u062c\u0648' 40 41# iso-8859-1 alternative for juju 42_juju_alt = u'j\xfbj\xfa' 43 44# Russian, 'Alexander' in russian 45_alexander = u'\u0410\u043b\u0435\u043a\u0441\u0430\u043d\u0434\u0440' 46# The word 'test' in Russian 47_russian_test = u'\u0422\u0435\u0441\u0442' 48 49# Kanji 50# It is a kanji sequence for nihonjin, or Japanese in English. 51# 52# '\u4eba' being person, 'u\65e5' sun and '\u672c' origin. Ie, 53# sun-origin-person, 'native from the land where the sun rises'. Note, I'm 54# not a fluent speaker, so this is just my crude breakdown. 55# 56# Wouter van Heyst 57_nihonjin = u'\u65e5\u672c\u4eba' 58 59# Czech 60# It's what is usually used for showing how fonts look, because it contains 61# most accented characters, ie. in places where Englishman use 'Quick brown fox 62# jumped over a lazy dog'. The literal translation of the Czech version would 63# be something like 'Yellow horse groaned devilish codes'. Actually originally 64# the last word used to be 'ódy' (odes). The 'k' was added as a pun when using 65# the sentece to check whether one has properly set encoding. 66_yellow_horse = (u'\u017dlu\u0165ou\u010dk\xfd k\u016f\u0148' 67 u' \xfap\u011bl \u010f\xe1belsk\xe9 k\xf3dy') 68_yellow = u'\u017dlu\u0165ou\u010dk\xfd' 69_someone = u'Some\u016f\u0148\u011b' 70_something = u'\u0165ou\u010dk\xfd' 71 72# Hebrew 73# Shalom -> 'hello' or 'peace', used as a common greeting 74_shalom = u'\u05e9\u05dc\u05d5\u05dd' 75 76 77encoding_scenarios = [ 78 # Permutation 1 of utf-8 79 ('utf-8,1', { 80 'info': { 81 'committer': _erik, 82 'message': _yellow_horse, 83 'filename': _shrimp_sandwich, 84 'directory': _nihonjin, 85 }, 86 'encoding': 'utf-8', 87 }), 88 # Permutation 2 of utf-8 89 ('utf-8,2', { 90 'info': { 91 'committer': _alexander, 92 'message': u'Testing ' + _mu, 93 'filename': _shalom, 94 'directory': _juju, 95 }, 96 'encoding': 'utf-8', 97 }), 98 ('iso-8859-1', { 99 'info': { 100 'committer': _erik, 101 'message': u'Testing ' + _mu, 102 'filename': _juju_alt, 103 'directory': _shrimp_sandwich, 104 }, 105 'encoding': 'iso-8859-1', 106 }), 107 ('iso-8859-2', { 108 'info': { 109 'committer': _someone, 110 'message': _yellow_horse, 111 'filename': _yellow, 112 'directory': _something, 113 }, 114 'encoding': 'iso-8859-2', 115 }), 116 ('cp1251', { 117 'info': { 118 'committer': _alexander, 119 'message': u'Testing ' + _mu, 120 'filename': _russian_test, 121 'directory': _russian_test + 'dir', 122 }, 123 'encoding': 'cp1251', 124 }), 125 # The iso-8859-1 tests run on a default windows cp437 installation 126 # and it takes a long time to run an extra permutation of the tests 127 # But just in case we want to add this back in: 128 # ('cp437', {'committer':_erik 129 # , 'message':u'Testing ' + _mu 130 # , 'filename':'file_' + _omega 131 # , 'directory':_epsilon + '_dir', 132 # 'encoding': 'cp437'}), 133 ] 134