1dnl PSPP - a program for statistical analysis.
2dnl Copyright (C) 2017 Free Software Foundation, Inc.
3dnl
4dnl This program is free software: you can redistribute it and/or modify
5dnl it under the terms of the GNU General Public License as published by
6dnl the Free Software Foundation, either version 3 of the License, or
7dnl (at your option) any later version.
8dnl
9dnl This program is distributed in the hope that it will be useful,
10dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
11dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12dnl GNU General Public License for more details.
13dnl
14dnl You should have received a copy of the GNU General Public License
15dnl along with this program.  If not, see <http://www.gnu.org/licenses/>.
16dnl
17AT_BANNER([encoding guesser])
18
19AT_SETUP([ASCII])
20AT_KEYWORDS([encoding guesser])
21AT_CHECK([echo string | encoding-guesser-test Auto,ISO-8859-1], [0], [ASCII
22])
23AT_CLEANUP
24
25AT_SETUP([UTF-8])
26AT_KEYWORDS([encoding guesser])
27AT_CHECK([i18n-test supports_encodings ISO-8859-1])
28AT_CHECK([printf '\346\227\245\346\234\254\350\252\236\n' | encoding-guesser-test Auto,ISO-8859-1], [0], [UTF-8
29])
30AT_CLEANUP
31
32AT_SETUP([UTF-8 starting with ASCII])
33AT_KEYWORDS([encoding guesser])
34AT_CHECK([i18n-test supports_encodings ISO-8859-1])
35AT_CHECK([printf 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\346\227\245\346\234\254\350\252\236\n' | encoding-guesser-test Auto,ISO-8859-1 32], [0], [UTF-8
36])
37AT_CLEANUP
38
39AT_SETUP([UTF-16 with big-endian byte order mark])
40AT_KEYWORDS([encoding guesser])
41AT_CHECK([printf '\376\377' | encoding-guesser-test Auto,ISO-8859-1],
42  [0], [UTF-16
43])
44AT_CLEANUP
45
46AT_SETUP([UTF-16 with little-endian byte order mark])
47AT_KEYWORDS([encoding guesser])
48AT_CHECK([printf '\377\376' | encoding-guesser-test Auto,ISO-8859-1],
49  [0], [UTF-16
50])
51AT_CLEANUP
52
53AT_SETUP([UTF-16BE])
54AT_KEYWORDS([encoding guesser])
55AT_CHECK([printf '\0e\0n\0t\0r\0\351\0e\0\n' | encoding-guesser-test Auto,ISO-8859-1],
56  [0], [UTF-16BE
57])
58AT_CLEANUP
59
60dnl Unicode U+XX00 characters are confusing in UTF-16 because they look
61dnl likely to be of the opposite endianness, so this tests for proper handling.
62AT_SETUP([UTF-16BE starting with U+0100])
63AT_KEYWORDS([encoding guesser])
64AT_CHECK([printf '\1\0\0e\0n\0t\0r\0\351\0e\0\n' | encoding-guesser-test Auto,ISO-8859-1],
65  [0], [UTF-16BE
66])
67AT_CLEANUP
68
69AT_SETUP([UTF-16LE])
70AT_KEYWORDS([encoding guesser])
71AT_CHECK([printf 'e\0n\0t\0r\0\351\0e\0\n\0' | encoding-guesser-test Auto,ISO-8859-1],
72  [0], [UTF-16LE
73])
74AT_CLEANUP
75
76dnl Unicode U+XX00 characters are confusing in UTF-16 because they look
77dnl likely to be of the opposite endianness, so this tests for proper handling.
78AT_SETUP([UTF-16LE starting with U+0100])
79AT_KEYWORDS([encoding guesser])
80AT_CHECK([printf '\0\1e\0n\0t\0r\0\351\0e\0\n\0' | encoding-guesser-test Auto,ISO-8859-1],
81  [0], [UTF-16LE
82])
83AT_CLEANUP
84
85AT_SETUP([UTF-32 with big-endian byte order mark])
86AT_KEYWORDS([encoding guesser])
87AT_CHECK([printf '\0\0\376\377' | encoding-guesser-test Auto,ISO-8859-1],
88  [0], [UTF-32
89])
90AT_CLEANUP
91
92AT_SETUP([UTF-32 with little-endian byte order mark])
93AT_KEYWORDS([encoding guesser])
94AT_CHECK([printf '\377\376\0\0' | encoding-guesser-test Auto,ISO-8859-1],
95  [0], [UTF-32
96])
97AT_CLEANUP
98
99AT_SETUP([UTF-32BE])
100AT_KEYWORDS([encoding guesser])
101AT_CHECK([printf '\0\0\0e\0\0\0n\0\0\0t\0\0\0r\0\0\0\351\0\0\0e\0\0\0\n' | encoding-guesser-test Auto,ISO-8859-1],
102  [0], [UTF-32BE
103])
104AT_CLEANUP
105
106AT_SETUP([UTF-32LE])
107AT_KEYWORDS([encoding guesser])
108AT_CHECK([printf 'e\0\0\0n\0\0\0t\0\0\0r\0\0\0\351\0\0\0e\0\0\0\n\0\0\0' | encoding-guesser-test Auto,ISO-8859-1],
109  [0], [UTF-32LE
110])
111AT_CLEANUP
112
113AT_SETUP([ISO-8859-1])
114AT_KEYWORDS([encoding guesser])
115AT_CHECK([i18n-test supports_encodings ISO-8859-1])
116AT_CHECK([printf 'entr\351e\n' | encoding-guesser-test Auto,ISO-8859-1],
117  [0], [ISO-8859-1
118])
119AT_CLEANUP
120
121AT_SETUP([GB-18030 with byte order mark])
122AT_KEYWORDS([encoding guesser])
123AT_CHECK([i18n-test supports_encodings ISO-8859-1])
124AT_CHECK([printf '\204\061\225\063' | encoding-guesser-test Auto,ISO-8859-1],
125  [0], [GB-18030
126])
127AT_CLEANUP
128
129AT_SETUP([UTF-EBCDIC with byte order mark])
130AT_KEYWORDS([encoding guesser])
131AT_CHECK([i18n-test supports_encodings ISO-8859-1])
132AT_CHECK([printf '\335\163\146\163' | encoding-guesser-test Auto,ISO-8859-1],
133  [0], [UTF-EBCDIC
134])
135AT_CLEANUP
136
137AT_SETUP([EUC-JP as Auto,EUC-JP])
138AT_KEYWORDS([encoding guesser])
139AT_CHECK([i18n-test supports_encodings EUC-JP])
140AT_CHECK([printf '\244\241 \244\242 \244\243 \244\244 \244\245 \244\246 \244\247 \244\250 \244\251 \244\252\n' | encoding-guesser-test Auto,EUC-JP],
141  [0], [EUC-JP
142])
143AT_CLEANUP
144
145AT_SETUP([EUC-JP starting with ASCII as Auto,EUC-JP])
146AT_KEYWORDS([encoding guesser])
147AT_CHECK([i18n-test supports_encodings EUC-JP])
148AT_CHECK([printf 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx \244\241 \244\242 \244\243 \244\244 \244\245 \244\246 \244\247 \244\250 \244\251 \244\252\n' | encoding-guesser-test Auto,EUC-JP 32],
149  [0], [EUC-JP
150])
151AT_CLEANUP
152
153AT_SETUP([UTF-8 with character split across input buffers])
154AT_KEYWORDS([encoding guesser])
155AT_CHECK([i18n-test supports_encodings ISO-8859-1])
156AT_CHECK([printf '\343\201\201\343\201\202\343\201\203\343\201\204\343\201\205\343\201\206\343\201\207\343\201\210\343\201\211\343\201\212\343\201\201\343\201\202\343\201\203\343\201\204\343\201\205\343\201\206\343\201\207\343\201\210\343\201\211\343\201\212\n' | encoding-guesser-test Auto,ISO-8859-1 32],
157  [0], [UTF-8
158])
159AT_CLEANUP
160
161AT_SETUP([windows-1252 as Auto,UTF-8])
162AT_KEYWORDS([encoding guesser])
163AT_CHECK([i18n-test supports_encodings windows-1252])
164AT_CHECK([printf 'entr\351e' | encoding-guesser-test Auto,UTF-8 32], [0],
165  [windows-1252
166])
167AT_CLEANUP
168