1#!/usr/bin/env python
2#  -*- coding: utf-8 -*-
3#
4#  utf8_tests.py:  testing the svn client's utf8 (i18n) handling
5#
6#  Subversion is a tool for revision control.
7#  See http://subversion.apache.org for more information.
8#
9# ====================================================================
10#    Licensed to the Apache Software Foundation (ASF) under one
11#    or more contributor license agreements.  See the NOTICE file
12#    distributed with this work for additional information
13#    regarding copyright ownership.  The ASF licenses this file
14#    to you under the Apache License, Version 2.0 (the
15#    "License"); you may not use this file except in compliance
16#    with the License.  You may obtain a copy of the License at
17#
18#      http://www.apache.org/licenses/LICENSE-2.0
19#
20#    Unless required by applicable law or agreed to in writing,
21#    software distributed under the License is distributed on an
22#    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
23#    KIND, either express or implied.  See the License for the
24#    specific language governing permissions and limitations
25#    under the License.
26######################################################################
27
28# General modules
29import sys, re, os, locale
30
31# Our testing module
32import svntest
33from svntest import wc
34
35# (abbreviation)
36Item = wc.StateItem
37Skip = svntest.testcase.Skip_deco
38SkipUnless = svntest.testcase.SkipUnless_deco
39XFail = svntest.testcase.XFail_deco
40Issues = svntest.testcase.Issues_deco
41Issue = svntest.testcase.Issue_deco
42Wimp = svntest.testcase.Wimp_deco
43
44#--------------------------------------------------------------------
45# Data
46
47# Here's a filename and a log message which contain some high-ascii
48# data.  In theory this data has different interpretations when
49# converting from 2 different charsets into UTF-8.
50
51### "b" in ISO-8859-1 encoding:
52i18n_filename = 'b\xd4\xe7\xc5'
53
54### "drieëntwintig keer was één keer teveel" in ISO-8859-1 encoding:
55i18n_logmsg = 'drie\xc3\xabntwintig keer was \xc3\xa9\xc3\xa9n keer teveel'
56
57
58######################################################################
59# Tests
60#
61#   Each test must return on success or raise on failure.
62
63@Skip()
64def basic_utf8_conversion(sbox):
65  "conversion of paths and logs to/from utf8"
66
67  sbox.build()
68  wc_dir = sbox.wc_dir
69
70  # Create the new i18n file and schedule it for addition
71  svntest.main.file_append(os.path.join(wc_dir, i18n_filename), "hi")
72  svntest.actions.run_and_verify_svn(
73    "Failed to schedule i18n filename for addition", None, [],
74    'add', os.path.join(wc_dir, i18n_filename))
75
76  svntest.actions.run_and_verify_svn(
77    "Failed to commit i18n filename", None, [],
78    'commit', '-m', i18n_logmsg, wc_dir)
79
80# Here's how the test should really work:
81
82# 1. sh LC_ALL=ISO-8859-1 svn commit <filename> -m "<logmsg>"
83
84# 2. sh LC_ALL=UTF-8 svn log -rHEAD > output
85
86# 3. verify that output is the exact UTF-8 data that we expect.
87
88# 4. repeat the process using some other locale other than ISO8859-1,
89#    preferably some locale which will convert the high-ascii data to
90#    *different* UTF-8.
91
92
93
94#----------------------------------------------------------------------
95
96########################################################################
97# Run the tests
98
99try:
100  # Generic setlocale so that getlocale returns something sensible
101  locale.setlocale(locale.LC_ALL, '')
102
103  # Try to make these test run in an ISO-8859-1 environment, otherwise
104  # they would run in whatever random locale the testing platform
105  # happens to have, and then we couldn't predict the exact results.
106  if svntest.main.windows:
107    # In this case, it would probably be "english_usa.1252", but you should
108    # be able to set just the encoding by using ".1252" (that's codepage
109    # 1252, which is almost but not quite entirely unlike tea; um, I mean
110    # it's very similar to ISO-8859-1).
111    #                                     -- Branko Čibej <brane@xbc.nu>
112    locale.setlocale(locale.LC_ALL, '.1252')
113  else:
114    locale.setlocale(locale.LC_ALL, 'en_US.ISO8859-1')
115
116    if os.putenv:
117      # propagate to the svn* executables, so they do the correct translation
118      # the line below works for Linux systems if they have the particular
119      # locale installed
120      os.environ['LC_ALL'] = "en_US.ISO8859-1"
121except:
122  pass
123
124# Check to see if the locale uses ISO-8859-1 encoding.  The regex is necessary
125# because some systems ommit the first hyphen or use lowercase letters for ISO.
126if sys.platform == 'win32':
127  localematch = 1
128else:
129  localeenc = locale.getlocale()[1]
130  if localeenc:
131    localeregex = re.compile('^ISO-?8859-1$', re.I)
132    localematch = localeregex.search(localeenc)
133    try:
134      svntest.actions.run_and_verify_svn(None, svntest.SVNAnyOutput, [],"help")
135    except:
136      # We won't be able to run the client; this might be because the
137      # system does not support the iso-8859-1 locale. Anyhow, it makes
138      # no sense to run the test.
139      localematch = None
140  else:
141    localematch = None
142
143# Also check that the environment contains the expected locale settings
144# either by default, or because we set them above.
145if localematch:
146  localeregex = re.compile('^en_US\.ISO-?8859-1$', re.I)
147  for env in [ 'LC_ALL', 'LC_CTYPE', 'LANG' ]:
148    env_value = os.getenv(env)
149    if env_value:
150      if localeregex.search(env_value):
151        break
152      else:
153        localematch = None
154        break
155
156
157########################################################################
158# Run the tests
159
160# list all tests here, starting with None:
161test_list = [ None,
162              basic_utf8_conversion,
163             ]
164
165if __name__ == '__main__':
166  svntest.main.run_tests(test_list)
167  # NOTREACHED
168
169
170### End of file.
171