1#!/usr/bin/env python 2# -*- coding: utf-8 -*- 3# 4# utf8_tests.py: testing the svn client's utf8 (i18n) handling 5# 6# Subversion is a tool for revision control. 7# See http://subversion.apache.org for more information. 8# 9# ==================================================================== 10# Licensed to the Apache Software Foundation (ASF) under one 11# or more contributor license agreements. See the NOTICE file 12# distributed with this work for additional information 13# regarding copyright ownership. The ASF licenses this file 14# to you under the Apache License, Version 2.0 (the 15# "License"); you may not use this file except in compliance 16# with the License. You may obtain a copy of the License at 17# 18# http://www.apache.org/licenses/LICENSE-2.0 19# 20# Unless required by applicable law or agreed to in writing, 21# software distributed under the License is distributed on an 22# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 23# KIND, either express or implied. See the License for the 24# specific language governing permissions and limitations 25# under the License. 26###################################################################### 27 28# General modules 29import sys, re, os, locale 30 31# Our testing module 32import svntest 33from svntest import wc 34 35# (abbreviation) 36Item = wc.StateItem 37Skip = svntest.testcase.Skip_deco 38SkipUnless = svntest.testcase.SkipUnless_deco 39XFail = svntest.testcase.XFail_deco 40Issues = svntest.testcase.Issues_deco 41Issue = svntest.testcase.Issue_deco 42Wimp = svntest.testcase.Wimp_deco 43 44#-------------------------------------------------------------------- 45# Data 46 47# Here's a filename and a log message which contain some high-ascii 48# data. In theory this data has different interpretations when 49# converting from 2 different charsets into UTF-8. 50 51### "bÔçÅ" in ISO-8859-1 encoding: 52i18n_filename = 'b\xd4\xe7\xc5' 53 54### "drieëntwintig keer was één keer teveel" in ISO-8859-1 encoding: 55i18n_logmsg = 'drie\xc3\xabntwintig keer was \xc3\xa9\xc3\xa9n keer teveel' 56 57 58###################################################################### 59# Tests 60# 61# Each test must return on success or raise on failure. 62 63@Skip() 64def basic_utf8_conversion(sbox): 65 "conversion of paths and logs to/from utf8" 66 67 sbox.build() 68 wc_dir = sbox.wc_dir 69 70 # Create the new i18n file and schedule it for addition 71 svntest.main.file_append(os.path.join(wc_dir, i18n_filename), "hi") 72 svntest.actions.run_and_verify_svn( 73 "Failed to schedule i18n filename for addition", None, [], 74 'add', os.path.join(wc_dir, i18n_filename)) 75 76 svntest.actions.run_and_verify_svn( 77 "Failed to commit i18n filename", None, [], 78 'commit', '-m', i18n_logmsg, wc_dir) 79 80# Here's how the test should really work: 81 82# 1. sh LC_ALL=ISO-8859-1 svn commit <filename> -m "<logmsg>" 83 84# 2. sh LC_ALL=UTF-8 svn log -rHEAD > output 85 86# 3. verify that output is the exact UTF-8 data that we expect. 87 88# 4. repeat the process using some other locale other than ISO8859-1, 89# preferably some locale which will convert the high-ascii data to 90# *different* UTF-8. 91 92 93 94#---------------------------------------------------------------------- 95 96######################################################################## 97# Run the tests 98 99try: 100 # Generic setlocale so that getlocale returns something sensible 101 locale.setlocale(locale.LC_ALL, '') 102 103 # Try to make these test run in an ISO-8859-1 environment, otherwise 104 # they would run in whatever random locale the testing platform 105 # happens to have, and then we couldn't predict the exact results. 106 if svntest.main.windows: 107 # In this case, it would probably be "english_usa.1252", but you should 108 # be able to set just the encoding by using ".1252" (that's codepage 109 # 1252, which is almost but not quite entirely unlike tea; um, I mean 110 # it's very similar to ISO-8859-1). 111 # -- Branko Čibej <brane@xbc.nu> 112 locale.setlocale(locale.LC_ALL, '.1252') 113 else: 114 locale.setlocale(locale.LC_ALL, 'en_US.ISO8859-1') 115 116 if os.putenv: 117 # propagate to the svn* executables, so they do the correct translation 118 # the line below works for Linux systems if they have the particular 119 # locale installed 120 os.environ['LC_ALL'] = "en_US.ISO8859-1" 121except: 122 pass 123 124# Check to see if the locale uses ISO-8859-1 encoding. The regex is necessary 125# because some systems ommit the first hyphen or use lowercase letters for ISO. 126if sys.platform == 'win32': 127 localematch = 1 128else: 129 localeenc = locale.getlocale()[1] 130 if localeenc: 131 localeregex = re.compile('^ISO-?8859-1$', re.I) 132 localematch = localeregex.search(localeenc) 133 try: 134 svntest.actions.run_and_verify_svn(None, svntest.SVNAnyOutput, [],"help") 135 except: 136 # We won't be able to run the client; this might be because the 137 # system does not support the iso-8859-1 locale. Anyhow, it makes 138 # no sense to run the test. 139 localematch = None 140 else: 141 localematch = None 142 143# Also check that the environment contains the expected locale settings 144# either by default, or because we set them above. 145if localematch: 146 localeregex = re.compile('^en_US\.ISO-?8859-1$', re.I) 147 for env in [ 'LC_ALL', 'LC_CTYPE', 'LANG' ]: 148 env_value = os.getenv(env) 149 if env_value: 150 if localeregex.search(env_value): 151 break 152 else: 153 localematch = None 154 break 155 156 157######################################################################## 158# Run the tests 159 160# list all tests here, starting with None: 161test_list = [ None, 162 basic_utf8_conversion, 163 ] 164 165if __name__ == '__main__': 166 svntest.main.run_tests(test_list) 167 # NOTREACHED 168 169 170### End of file. 171