1 /****************************************************************************
2 **
3 ** Copyright (C) 2015 The Qt Company Ltd.
4 ** Contact: http://www.qt.io/licensing/
5 **
6 ** This file is part of the test suite of the Qt Toolkit.
7 **
8 ** $QT_BEGIN_LICENSE:LGPL$
9 ** Commercial License Usage
10 ** Licensees holding valid commercial Qt licenses may use this file in
11 ** accordance with the commercial license agreement provided with the
12 ** Software or, alternatively, in accordance with the terms contained in
13 ** a written agreement between you and The Qt Company. For licensing terms
14 ** and conditions see http://www.qt.io/terms-conditions. For further
15 ** information use the contact form at http://www.qt.io/contact-us.
16 **
17 ** GNU Lesser General Public License Usage
18 ** Alternatively, this file may be used under the terms of the GNU Lesser
19 ** General Public License version 2.1 or version 3 as published by the Free
20 ** Software Foundation and appearing in the file LICENSE.LGPLv21 and
21 ** LICENSE.LGPLv3 included in the packaging of this file. Please review the
22 ** following information to ensure the GNU Lesser General Public License
23 ** requirements will be met: https://www.gnu.org/licenses/lgpl.html and
24 ** http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
25 **
26 ** As a special exception, The Qt Company gives you certain additional
27 ** rights. These rights are described in The Qt Company LGPL Exception
28 ** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
29 **
30 ** GNU General Public License Usage
31 ** Alternatively, this file may be used under the terms of the GNU
32 ** General Public License version 3.0 as published by the Free Software
33 ** Foundation and appearing in the file LICENSE.GPL included in the
34 ** packaging of this file. Please review the following information to
35 ** ensure the GNU General Public License version 3.0 requirements will be
36 ** met: http://www.gnu.org/copyleft/gpl.html.
37 **
38 ** $QT_END_LICENSE$
39 **
40 ****************************************************************************/
41
42
43 #include <QtTest/QtTest>
44
45 #include <qtextcodec.h>
46 #include <qfile.h>
47 #include <qtextdocument.h>
48 #include <time.h>
49 #include <qprocess.h>
50 #include <QtConcurrentMap>
51 #include <QThreadPool>
52
53 #ifdef Q_OS_SYMBIAN
54 #define SRCDIR ""
55 #endif
56
57
58 class tst_QTextCodec : public QObject
59 {
60 Q_OBJECT
61
62 private slots:
63
64 void threadSafety();
65
66 void toUnicode_data();
67 void toUnicode();
68 void codecForName_data();
69 void codecForName();
70 void fromUnicode_data();
71 void fromUnicode();
72 void toUnicode_codecForHtml();
73 void toUnicode_incremental();
74 void codecForLocale();
75
76 void asciiToIscii() const;
77 void flagCodepointFFFF() const;
78 void flagF7808080() const;
79 void flagEFBFBF() const;
80 void decode0D() const;
81 void codecForIndex() const;
82 void aliasForUTF16() const;
83 void mibForTSCII() const;
84
85 void utf8Codec_data();
86 void utf8Codec();
87
88 void utf8bom_data();
89 void utf8bom();
90
91 void utfHeaders_data();
92 void utfHeaders();
93
94 void codecForHtml();
95
96 void codecForUtfText_data();
97 void codecForUtfText();
98
99 #ifdef Q_OS_UNIX
100 void toLocal8Bit();
101 #endif
102
103 void invalidNames();
104 void checkAliases_data();
105 void checkAliases();
106
107 void moreToFromUnicode_data();
108 void moreToFromUnicode();
109
110 void shiftJis();
111 };
112
toUnicode_data()113 void tst_QTextCodec::toUnicode_data()
114 {
115 QTest::addColumn<QString>("fileName");
116 QTest::addColumn<QString>("codecName");
117
118 QTest::newRow( "korean-eucKR" ) << SRCDIR "korean.txt" << "eucKR";
119 QTest::newRow( "UTF-8" ) << SRCDIR "utf8.txt" << "UTF-8";
120 }
121
toUnicode()122 void tst_QTextCodec::toUnicode()
123 {
124 QFETCH( QString, fileName );
125 QFETCH( QString, codecName );
126
127 QFile file( fileName );
128
129 if ( file.open( QIODevice::ReadOnly ) ) {
130 QByteArray ba = file.readAll();
131 QVERIFY(!ba.isEmpty());
132 QTextCodec *c = QTextCodec::codecForName( codecName.toLatin1() );
133 QVERIFY(c != 0);
134 QString uniString = c->toUnicode( ba );
135 if (codecName == QLatin1String("UTF-8")) {
136 QCOMPARE(uniString, QString::fromUtf8(ba));
137 QCOMPARE(ba, uniString.toUtf8());
138 }
139 QVERIFY(!uniString.isEmpty());
140 QCOMPARE( ba, c->fromUnicode( uniString ) );
141
142 if (codecName == QLatin1String("eucKR")) {
143 char ch = '\0';
144 QVERIFY(c->toUnicode(&ch, 1).isEmpty());
145 QVERIFY(c->toUnicode(&ch, 1).isNull());
146 }
147 } else {
148 QFAIL(qPrintable("File could not be opened: " + file.errorString()));
149 }
150 }
151
codecForName_data()152 void tst_QTextCodec::codecForName_data()
153 {
154 QTest::addColumn<QString>("hint");
155 QTest::addColumn<QString>("actualCodecName");
156
157 QTest::newRow("data1") << "iso88591" << "ISO-8859-1";
158 QTest::newRow("data2") << "iso88592" << "ISO-8859-2";
159 QTest::newRow("data3") << " IsO(8)8/5*9-2 " << "ISO-8859-2";
160 QTest::newRow("data4") << " IsO(8)8/5*2-9 " << "";
161 QTest::newRow("data5") << "latin2" << "ISO-8859-2";
162 }
163
codecForName()164 void tst_QTextCodec::codecForName()
165 {
166 QFETCH(QString, hint);
167 QFETCH(QString, actualCodecName);
168
169 QTextCodec *codec = QTextCodec::codecForName(hint.toLatin1());
170 if (actualCodecName.isEmpty()) {
171 QVERIFY(codec == 0);
172 } else {
173 QVERIFY(codec != 0);
174 QCOMPARE(QString(codec->name()), actualCodecName);
175 }
176 }
177
fromUnicode_data()178 void tst_QTextCodec::fromUnicode_data()
179 {
180 QTest::addColumn<QString>("codecName");
181 QTest::addColumn<bool>("eightBit");
182
183 QTest::newRow("data1") << "ISO-8859-1" << true;
184 QTest::newRow("data2") << "ISO-8859-2" << true;
185 QTest::newRow("data3") << "ISO-8859-3" << true;
186 QTest::newRow("data4") << "ISO-8859-4" << true;
187 QTest::newRow("data5") << "ISO-8859-5" << true;
188 QTest::newRow("data6") << "ISO-8859-6" << true;
189 QTest::newRow("data7") << "ISO-8859-7" << true;
190 QTest::newRow("data8") << "ISO-8859-8" << true;
191 QTest::newRow("data9") << "ISO-8859-9" << true;
192 QTest::newRow("data10") << "ISO-8859-10" << true;
193 QTest::newRow("data13") << "ISO-8859-13" << true;
194 QTest::newRow("data14") << "ISO-8859-14" << true;
195 QTest::newRow("data15") << "ISO-8859-15" << true;
196 QTest::newRow("data16") << "ISO-8859-16" << true;
197
198 QTest::newRow("data18") << "IBM850" << true;
199 #ifndef Q_OS_SYMBIAN //symbian implementation will return empty string if all char are invalid
200 QTest::newRow("data19") << "IBM874" << true;
201 #endif
202 QTest::newRow("data20") << "IBM866" << true;
203
204 QTest::newRow("data21") << "windows-1250" << true;
205 QTest::newRow("data22") << "windows-1251" << true;
206 QTest::newRow("data23") << "windows-1252" << true;
207 QTest::newRow("data24") << "windows-1253" << true;
208 QTest::newRow("data25") << "windows-1254" << true;
209 #ifndef Q_OS_SYMBIAN //symbian implementation will return empty string if all char are invalid
210 QTest::newRow("data26") << "windows-1255" << true;
211 #endif
212 QTest::newRow("data27") << "windows-1256" << true;
213 QTest::newRow("data28") << "windows-1257" << true;
214 QTest::newRow("data28") << "windows-1258" << true;
215
216 QTest::newRow("data29") << "Apple Roman" << true;
217 QTest::newRow("data29") << "WINSAMI2" << true;
218 QTest::newRow("data30") << "TIS-620" << true;
219 QTest::newRow("data31") << "roman8" << true;
220
221 QTest::newRow("data32") << "SJIS" << false;
222 QTest::newRow("data33") << "EUC-KR" << false;
223
224 // all codecs from documentation
225 QTest::newRow("doc2") << "Big5" << false;
226 QTest::newRow("doc3") << "Big5-HKSCS" << false;
227 QTest::newRow("doc4") << "CP949" << false;
228 QTest::newRow("doc5") << "EUC-JP" << false;
229 QTest::newRow("doc6") << "EUC-KR" << false;
230 //QTest::newRow("doc7") << "GB18030-0" << false; // only GB18030 works
231 QTest::newRow("doc7-bis") << "GB18030" << false;
232 QTest::newRow("doc8") << "IBM 850" << false;
233 QTest::newRow("doc9") << "IBM 866" << false;
234 QTest::newRow("doc10") << "IBM 874" << false;
235 QTest::newRow("doc11") << "ISO 2022-JP" << false;
236 //ISO 8859-1 to 10 and ISO 8859-13 to 16 tested previously
237 // Iscii-Bng, Dev, Gjr, Knd, Mlm, Ori, Pnj, Tlg, and Tml tested in Iscii test
238 //QTest::newRow("doc12") << "JIS X 0201" << false; //actually not there
239 //QTest::newRow("doc13") << "JIS X 0208" << false; // actually not there
240 QTest::newRow("doc14") << "KOI8-R" << false;
241 QTest::newRow("doc15") << "KOI8-U" << false;
242 //QTest::newRow("doc16") << "MuleLao-1" << false; //only on x11
243 QTest::newRow("doc17") << "ROMAN8" << false;
244 QTest::newRow("doc18") << "Shift-JIS" << false;
245 QTest::newRow("doc19") << "TIS-620" << false;
246 QTest::newRow("doc20") << "TSCII" << false;
247 QTest::newRow("doc21") << "UTF-8" << false;
248 QTest::newRow("doc22") << "UTF-16" << false;
249 QTest::newRow("doc23") << "UTF-16BE" << false;
250 QTest::newRow("doc24") << "UTF-16LE" << false;
251 QTest::newRow("doc25") << "UTF-32" << false;
252 QTest::newRow("doc26") << "UTF-32BE" << false;
253 QTest::newRow("doc27") << "UTF-32LE" << false;
254 //Windows-1250 to 1258 tested previously
255 QTest::newRow("doc3") << "WINSAMI2" << false;
256 }
257
fromUnicode()258 void tst_QTextCodec::fromUnicode()
259 {
260 QFETCH(QString, codecName);
261 QFETCH(bool, eightBit);
262
263 QTextCodec *codec = QTextCodec::codecForName(codecName.toLatin1());
264 QVERIFY(codec != 0);
265
266 // Check if the reverse lookup is what we expect
267 if (eightBit) {
268 char chars[128];
269 for (int i = 0; i < 128; ++i)
270 chars[i] = i + 128;
271 QString s = codec->toUnicode(chars, 128);
272 QByteArray c = codec->fromUnicode(s);
273 QCOMPARE(c.size(), 128);
274
275 int numberOfQuestionMarks = 0;
276 for (int i = 0; i < 128; ++i) {
277 if (c.at(i) == '?')
278 ++numberOfQuestionMarks;
279 else
280 QCOMPARE(c.at(i), char(i + 128));
281 }
282 QVERIFY(numberOfQuestionMarks != 128);
283 }
284
285 /*
286 If the encoding is a superset of ASCII, test that the byte
287 array is correct (no off by one, no trailing '\0').
288 */
289 QByteArray result = codec->fromUnicode(QString("abc"));
290 if (result.startsWith("a")) {
291 QCOMPARE(result.size(), 3);
292 QCOMPARE(result, QByteArray("abc"));
293 } else {
294 QVERIFY(true);
295 }
296 }
297
toUnicode_codecForHtml()298 void tst_QTextCodec::toUnicode_codecForHtml()
299 {
300 QFile file(QString(SRCDIR "QT4-crashtest.txt"));
301 QVERIFY(file.open(QFile::ReadOnly));
302
303 QByteArray data = file.readAll();
304 QTextCodec *codec = Qt::codecForHtml(data);
305 codec->toUnicode(data); // this line crashes
306 }
307
308
toUnicode_incremental()309 void tst_QTextCodec::toUnicode_incremental()
310 {
311 QByteArray ba;
312 ba += char(0xf0);
313 ba += char(0x90);
314 ba += char(0x80);
315 ba += char(0x80);
316 ba += char(0xf4);
317 ba += char(0x8f);
318 ba += char(0xbf);
319 ba += char(0xbd);
320
321 QString expected = QString::fromUtf8(ba);
322
323 QString incremental;
324 QTextDecoder *utf8Decoder = QTextCodec::codecForMib(106)->makeDecoder();
325
326 QString actual;
327 for (int i = 0; i < ba.size(); ++i)
328 utf8Decoder->toUnicode(&actual, ba.constData() + i, 1);
329
330 QCOMPARE(actual, expected);
331
332
333 delete utf8Decoder;
334 }
335
codecForLocale()336 void tst_QTextCodec::codecForLocale()
337 {
338 QTextCodec *codec = QTextCodec::codecForLocale();
339 QVERIFY(codec != 0);
340
341 #if defined(Q_OS_UNIX)
342 // get a time string that is locale-encoded
343 QByteArray originalLocaleEncodedTimeString;
344 originalLocaleEncodedTimeString.resize(1024);
345 time_t t;
346 time(&t);
347 int r = strftime(originalLocaleEncodedTimeString.data(),
348 originalLocaleEncodedTimeString.size(),
349 "%A%a%B%b%Z",
350 localtime(&t));
351 if (r == 0)
352 QSKIP("strftime() failed", SkipAll);
353 originalLocaleEncodedTimeString.resize(r);
354
355 QString unicodeTimeString = codec->toUnicode(originalLocaleEncodedTimeString);
356 QByteArray localeEncodedTimeString = codec->fromUnicode(unicodeTimeString);
357 QCOMPARE(localeEncodedTimeString, originalLocaleEncodedTimeString);
358 #else
359 QSKIP("This test is not implemented on Windows", SkipAll);
360 #endif
361
362 // find a codec that is not the codecForLocale()
363 QTextCodec *codec2 = 0;
364 foreach (int mib, QTextCodec::availableMibs()) {
365 if (mib != codec->mibEnum()) {
366 codec2 = QTextCodec::codecForMib(mib);
367 if (codec2)
368 break;
369 }
370 }
371 if (!codec2) {
372 QSKIP("Could not find a codec that is not already the codecForLocale()", SkipAll);
373 }
374
375 // set it, codecForLocale() should return it now
376 QTextCodec::setCodecForLocale(codec2);
377 QCOMPARE(QTextCodec::codecForLocale(), codec2);
378
379 // reset back to the default
380 QTextCodec::setCodecForLocale(0);
381 QCOMPARE(QTextCodec::codecForLocale(), codec);
382 }
383
asciiToIscii() const384 void tst_QTextCodec::asciiToIscii() const
385 {
386 /* Add all low, 7-bit ASCII characters. */
387 QString ascii;
388 const int len = 0xA0 - 1;
389 ascii.resize(len);
390
391 for(int i = 0; i < len; ++i)
392 ascii[i] = QChar(i + 1);
393
394 static const char *const isciiCodecs[] =
395 {
396 "Iscii-Mlm",
397 "Iscii-Knd",
398 "Iscii-Tlg",
399 "Iscii-Tml",
400 "Iscii-Ori",
401 "Iscii-Gjr",
402 "Iscii-Pnj",
403 "Iscii-Bng",
404 "Iscii-Dev"
405 };
406 const int isciiCodecsLen = sizeof(isciiCodecs) / sizeof(const char *);
407
408 for(int i = 0; i < isciiCodecsLen; ++i) {
409 /* For each codec. */
410
411 const QTextCodec *const textCodec = QTextCodec::codecForName(isciiCodecs[i]);
412 QVERIFY(textCodec);
413
414 for(int i2 = 0; i2 < len; ++i2) {
415 /* For each character in ascii. */
416 const QChar c(ascii[i2]);
417 QVERIFY2(textCodec->canEncode(c), qPrintable(QString::fromLatin1("Failed to encode %1 with encoding %2")
418 .arg(QString::number(c.unicode()), QString::fromLatin1(textCodec->name().constData()))));
419 }
420
421 QVERIFY2(textCodec->canEncode(ascii), qPrintable(QString::fromLatin1("Failed for full string with encoding %1")
422 .arg(QString::fromLatin1(textCodec->name().constData()))));
423 }
424 }
425
flagCodepointFFFF() const426 void tst_QTextCodec::flagCodepointFFFF() const
427 {
428 // This is an invalid Unicode codepoint.
429 const QChar ch(0xFFFF);
430 QString input(ch);
431
432 QTextCodec *const codec = QTextCodec::codecForMib(106); // UTF-8
433 QVERIFY(codec);
434
435 const QByteArray asDecoded(codec->fromUnicode(input));
436 QCOMPARE(asDecoded, QByteArray("?"));
437
438 QByteArray ffff("\357\277\277");
439 QTextCodec::ConverterState state(QTextCodec::ConvertInvalidToNull);
440 QVERIFY(codec->toUnicode(ffff.constData(), ffff.length(), &state) == QChar(0));
441 QVERIFY(codec->toUnicode(ffff) == QChar(0xfffd));
442 }
443
flagF7808080() const444 void tst_QTextCodec::flagF7808080() const
445 {
446 /* This test case stems from test not-wf-sa-170, tests/qxmlstream/XML-Test-Suite/xmlconf/xmltest/not-wf/sa/166.xml,
447 * whose description reads:
448 *
449 * "Four byte UTF-8 encodings can encode UCS-4 characters
450 * which are beyond the range of legal XML characters
451 * (and can't be expressed in Unicode surrogate pairs).
452 * This document holds such a character."
453 *
454 * In binary, this is:
455 * 11110111100000001000000010000000
456 * * * * *
457 * 11110www10xxxxxx10yyyyyy10zzzzzz
458 *
459 * With multibyte logic removed it is the codepoint 0x1C0000.
460 */
461 QByteArray input;
462 input.resize(4);
463 input[0] = char(0xF7);
464 input[1] = char(0x80);
465 input[2] = char(0x80);
466 input[3] = char(0x80);
467
468
469 QTextCodec *const codec = QTextCodec::codecForMib(106); // UTF-8
470 QVERIFY(codec);
471
472 //QVERIFY(!codec->canEncode(QChar(0x1C0000)));
473
474 QTextCodec::ConverterState state(QTextCodec::ConvertInvalidToNull);
475 QVERIFY(codec->toUnicode(input.constData(), input.length(), &state) == QChar(0));
476 }
477
flagEFBFBF() const478 void tst_QTextCodec::flagEFBFBF() const
479 {
480 QByteArray invalidInput;
481 invalidInput.resize(3);
482 invalidInput[0] = char(0xEF);
483 invalidInput[1] = char(0xBF);
484 invalidInput[2] = char(0xBF);
485
486 const QTextCodec *const codec = QTextCodec::codecForMib(106); // UTF-8
487 QVERIFY(codec);
488
489 {
490 //QVERIFY(!codec->canEncode(QChar(0xFFFF)));
491 QTextCodec::ConverterState state(QTextCodec::ConvertInvalidToNull);
492 QVERIFY(codec->toUnicode(invalidInput.constData(), invalidInput.length(), &state) == QChar(0));
493
494 QByteArray start("<?pi ");
495 start.append(invalidInput);
496 start.append("?>");
497 }
498
499 /* When 0xEFBFBF is preceded by what seems to be an arbitrary character,
500 * QTextCodec fails to flag it. */
501 {
502 QByteArray start("B");
503 start.append(invalidInput);
504
505 QTextCodec::ConverterState state(QTextCodec::ConvertInvalidToNull);
506 QVERIFY(codec->toUnicode(start.constData(), start.length(), &state) == QString::fromLatin1("B\0", 2));
507 }
508 }
509
decode0D() const510 void tst_QTextCodec::decode0D() const
511 {
512 QByteArray input;
513 input.resize(3);
514 input[0] = 'A';
515 input[1] = '\r';
516 input[2] = 'B';
517
518 QCOMPARE(QString::fromUtf8(input.constData()).toUtf8(), input);
519 }
520
codecForIndex() const521 void tst_QTextCodec::codecForIndex() const
522 {
523 }
524
aliasForUTF16() const525 void tst_QTextCodec::aliasForUTF16() const
526 {
527 QVERIFY(QTextCodec::codecForName("UTF-16")->aliases().isEmpty());
528 }
529
mibForTSCII() const530 void tst_QTextCodec::mibForTSCII() const
531 {
532 QTextCodec *codec = QTextCodec::codecForName("TSCII");
533 QVERIFY(codec);
534 QCOMPARE(codec->mibEnum(), 2107);
535 }
536
fromInvalidUtf8Sequence(const QByteArray & ba)537 static QString fromInvalidUtf8Sequence(const QByteArray &ba)
538 {
539 return QString().fill(QChar::ReplacementCharacter, ba.size());
540 }
541
542 // copied from tst_QString::fromUtf8_data()
utf8Codec_data()543 void tst_QTextCodec::utf8Codec_data()
544 {
545 QTest::addColumn<QByteArray>("utf8");
546 QTest::addColumn<QString>("res");
547 QTest::addColumn<int>("len");
548 QString str;
549
550 QTest::newRow("str0") << QByteArray("abcdefgh") << QString("abcdefgh") << -1;
551 QTest::newRow("str0-len") << QByteArray("abcdefgh") << QString("abc") << 3;
552 QTest::newRow("str1") << QByteArray("\303\266\303\244\303\274\303\226\303\204\303\234\303\270\303\246\303\245\303\230\303\206\303\205") << QString("\366\344\374\326\304\334\370\346\345\330\306\305") << -1;
553 QTest::newRow("str1-len") << QByteArray("\303\266\303\244\303\274\303\226\303\204\303\234\303\270\303\246\303\245\303\230\303\206\303\205") << QString("\366\344\374\326\304") << 10;
554
555 str += QChar(0x05e9);
556 str += QChar(0x05d3);
557 str += QChar(0x05d2);
558 QTest::newRow("str2") << QByteArray("\327\251\327\223\327\222") << str << -1;
559
560 str = QChar(0x05e9);
561 QTest::newRow("str2-len") << QByteArray("\327\251\327\223\327\222") << str << 2;
562
563 str = QChar(0x20ac);
564 str += " some text";
565 QTest::newRow("str3") << QByteArray("\342\202\254 some text") << str << -1;
566
567 str = QChar(0x20ac);
568 str += " some ";
569 QTest::newRow("str3-len") << QByteArray("\342\202\254 some text") << str << 9;
570
571 str = "hello";
572 str += QChar::ReplacementCharacter;
573 str += QChar(0x68);
574 str += QChar::ReplacementCharacter;
575 str += QChar::ReplacementCharacter;
576 str += QChar::ReplacementCharacter;
577 str += QChar::ReplacementCharacter;
578 str += QChar(0x61);
579 str += QChar::ReplacementCharacter;
580 QTest::newRow("invalid utf8") << QByteArray("hello\344h\344\344\366\344a\304") << str << -1;
581 QTest::newRow("invalid utf8-len") << QByteArray("hello\344h\344\344\366\344a\304") << QString("hello") << 5;
582
583 str = "Prohl";
584 str += QChar::ReplacementCharacter;
585 str += QChar::ReplacementCharacter;
586 str += "e";
587 str += QChar::ReplacementCharacter;
588 str += " plugin";
589 str += QChar::ReplacementCharacter;
590 str += " Netscape";
591
592 QTest::newRow("task28417") << QByteArray("Prohl\355\276e\350 plugin\371 Netscape") << str << -1;
593 QTest::newRow("task28417-len") << QByteArray("Prohl\355\276e\350 plugin\371 Netscape") << QString("") << 0;
594
595 QTest::newRow("null-1") << QByteArray() << QString() << -1;
596 QTest::newRow("null0") << QByteArray() << QString() << 0;
597 // QTest::newRow("null5") << QByteArray() << QString() << 5;
598 QTest::newRow("empty-1") << QByteArray("\0abcd", 5) << QString() << -1;
599 QTest::newRow("empty0") << QByteArray() << QString() << 0;
600 QTest::newRow("empty5") << QByteArray("\0abcd", 5) << QString::fromAscii("\0abcd", 5) << 5;
601 QTest::newRow("other-1") << QByteArray("ab\0cd", 5) << QString::fromAscii("ab") << -1;
602 QTest::newRow("other5") << QByteArray("ab\0cd", 5) << QString::fromAscii("ab\0cd", 5) << 5;
603
604 str = "Old Italic: ";
605 str += QChar(0xd800);
606 str += QChar(0xdf00);
607 str += QChar(0xd800);
608 str += QChar(0xdf01);
609 str += QChar(0xd800);
610 str += QChar(0xdf02);
611 str += QChar(0xd800);
612 str += QChar(0xdf03);
613 str += QChar(0xd800);
614 str += QChar(0xdf04);
615 QTest::newRow("surrogate") << QByteArray("Old Italic: \360\220\214\200\360\220\214\201\360\220\214\202\360\220\214\203\360\220\214\204") << str << -1;
616
617 QTest::newRow("surrogate-len") << QByteArray("Old Italic: \360\220\214\200\360\220\214\201\360\220\214\202\360\220\214\203\360\220\214\204") << str.left(16) << 20;
618
619 // from http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html
620
621 // 2.1.1 U+00000000
622 QByteArray utf8;
623 utf8 += char(0x00);
624 str = QChar(QChar::Null);
625 QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 2.1.1") << utf8 << str << 1;
626
627 // 2.1.2 U+00000080
628 utf8.clear();
629 utf8 += char(0xc2);
630 utf8 += char(0x80);
631 str = QChar(0x80);
632 QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 2.1.2") << utf8 << str << -1;
633
634 // 2.1.3 U+00000800
635 utf8.clear();
636 utf8 += char(0xe0);
637 utf8 += char(0xa0);
638 utf8 += char(0x80);
639 str = QChar(0x800);
640 QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 2.1.3") << utf8 << str << -1;
641
642 // 2.1.4 U+00010000
643 utf8.clear();
644 utf8 += char(0xf0);
645 utf8 += char(0x90);
646 utf8 += char(0x80);
647 utf8 += char(0x80);
648 str.clear();
649 str += QChar(0xd800);
650 str += QChar(0xdc00);
651 QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 2.1.4") << utf8 << str << -1;
652
653 // 2.1.5 U+00200000 (not a valid Unicode character)
654 utf8.clear();
655 utf8 += char(0xf8);
656 utf8 += char(0x88);
657 utf8 += char(0x80);
658 utf8 += char(0x80);
659 utf8 += char(0x80);
660 str = fromInvalidUtf8Sequence(utf8);
661 QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 2.1.5") << utf8 << str << -1;
662
663 // 2.1.6 U+04000000 (not a valid Unicode character)
664 utf8.clear();
665 utf8 += char(0xfc);
666 utf8 += char(0x84);
667 utf8 += char(0x80);
668 utf8 += char(0x80);
669 utf8 += char(0x80);
670 utf8 += char(0x80);
671 str = fromInvalidUtf8Sequence(utf8);
672 QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 2.1.6") << utf8 << str << -1;
673
674 // 2.2.1 U+0000007F
675 utf8.clear();
676 utf8 += char(0x7f);
677 str = QChar(0x7f);
678 QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 2.2.1") << utf8 << str << -1;
679
680 // 2.2.2 U+000007FF
681 utf8.clear();
682 utf8 += char(0xdf);
683 utf8 += char(0xbf);
684 str = QChar(0x7ff);
685 QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 2.2.2") << utf8 << str << -1;
686
687 // 2.2.3 U+000FFFF
688 utf8.clear();
689 utf8 += char(0xef);
690 utf8 += char(0xbf);
691 utf8 += char(0xbf);
692 str.clear();
693 str += QChar::ReplacementCharacter;
694 QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 2.2.3") << utf8 << str << -1;
695
696 // 2.2.4 U+001FFFFF
697 utf8.clear();
698 utf8 += char(0xf7);
699 utf8 += char(0xbf);
700 utf8 += char(0xbf);
701 utf8 += char(0xbf);
702 str.clear();
703 str += QChar(QChar::ReplacementCharacter);
704 QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 2.2.4") << utf8 << str << -1;
705
706 // 2.2.5 U+03FFFFFF (not a valid Unicode character)
707 utf8.clear();
708 utf8 += char(0xfb);
709 utf8 += char(0xbf);
710 utf8 += char(0xbf);
711 utf8 += char(0xbf);
712 utf8 += char(0xbf);
713 str = fromInvalidUtf8Sequence(utf8);
714 QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 2.2.5") << utf8 << str << -1;
715
716 // 2.2.6 U+7FFFFFFF
717 utf8.clear();
718 utf8 += char(0xfd);
719 utf8 += char(0xbf);
720 utf8 += char(0xbf);
721 utf8 += char(0xbf);
722 utf8 += char(0xbf);
723 utf8 += char(0xbf);
724 str = fromInvalidUtf8Sequence(utf8);
725 QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 2.2.6") << utf8 << str << -1;
726
727 // 2.3.1 U+0000D7FF
728 utf8.clear();
729 utf8 += char(0xed);
730 utf8 += char(0x9f);
731 utf8 += char(0xbf);
732 str = QChar(0xd7ff);
733 QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 2.3.1") << utf8 << str << -1;
734
735 // 2.3.2 U+0000E000
736 utf8.clear();
737 utf8 += char(0xee);
738 utf8 += char(0x80);
739 utf8 += char(0x80);
740 str = QChar(0xe000);
741 QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 2.3.2") << utf8 << str << -1;
742
743 // 2.3.3 U+0000FFFD
744 utf8.clear();
745 utf8 += char(0xef);
746 utf8 += char(0xbf);
747 utf8 += char(0xbd);
748 str = QChar(QChar::ReplacementCharacter);
749 QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 2.3.3") << utf8 << str << -1;
750
751 // 2.3.4 U+0010FFFD
752 utf8.clear();
753 utf8 += char(0xf4);
754 utf8 += char(0x8f);
755 utf8 += char(0xbf);
756 utf8 += char(0xbd);
757 str.clear();
758 str += QChar(0xdbff);
759 str += QChar(0xdffd);
760 QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 2.3.4") << utf8 << str << -1;
761
762 // 2.3.5 U+00110000
763 utf8.clear();
764 utf8 += char(0xf4);
765 utf8 += char(0x90);
766 utf8 += char(0x80);
767 utf8 += char(0x80);
768 str.clear();
769 str += QChar(QChar::ReplacementCharacter);
770 QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 2.3.5") << utf8 << str << -1;
771
772 // 3.1.1
773 utf8.clear();
774 utf8 += char(0x80);
775 str = fromInvalidUtf8Sequence(utf8);
776 QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.1.1") << utf8 << str << -1;
777
778 // 3.1.2
779 utf8.clear();
780 utf8 += char(0xbf);
781 str = fromInvalidUtf8Sequence(utf8);
782 QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.1.2") << utf8 << str << -1;
783
784 // 3.1.3
785 utf8.clear();
786 utf8 += char(0x80);
787 utf8 += char(0xbf);
788 str = fromInvalidUtf8Sequence(utf8);
789 QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.1.3") << utf8 << str << -1;
790
791 // 3.1.4
792 utf8.clear();
793 utf8 += char(0x80);
794 utf8 += char(0xbf);
795 utf8 += char(0x80);
796 str = fromInvalidUtf8Sequence(utf8);
797 QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.1.4") << utf8 << str << -1;
798
799 // 3.1.5
800 utf8.clear();
801 utf8 += char(0x80);
802 utf8 += char(0xbf);
803 utf8 += char(0x80);
804 utf8 += char(0xbf);
805 str = fromInvalidUtf8Sequence(utf8);
806 QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.1.5") << utf8 << str << -1;
807
808 // 3.1.6
809 utf8.clear();
810 utf8 += char(0x80);
811 utf8 += char(0xbf);
812 utf8 += char(0x80);
813 utf8 += char(0xbf);
814 utf8 += char(0x80);
815 str = fromInvalidUtf8Sequence(utf8);
816 QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.1.6") << utf8 << str << -1;
817
818 // 3.1.7
819 utf8.clear();
820 utf8 += char(0x80);
821 utf8 += char(0xbf);
822 utf8 += char(0x80);
823 utf8 += char(0xbf);
824 utf8 += char(0x80);
825 utf8 += char(0xbf);
826 str = fromInvalidUtf8Sequence(utf8);
827 QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.1.7") << utf8 << str << -1;
828
829 // 3.1.8
830 utf8.clear();
831 utf8 += char(0x80);
832 utf8 += char(0xbf);
833 utf8 += char(0x80);
834 utf8 += char(0xbf);
835 utf8 += char(0x80);
836 utf8 += char(0xbf);
837 utf8 += char(0x80);
838 str = fromInvalidUtf8Sequence(utf8);
839 QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.1.8") << utf8 << str << -1;
840
841 // 3.1.9
842 utf8.clear();
843 for (uint i = 0x80; i<= 0xbf; ++i)
844 utf8 += i;
845 str = fromInvalidUtf8Sequence(utf8);
846 QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.1.9") << utf8 << str << -1;
847
848 // 3.2.1
849 utf8.clear();
850 str.clear();
851 for (uint i = 0xc8; i <= 0xdf; ++i) {
852 utf8 += i;
853 utf8 += char(0x20);
854
855 str += QChar::ReplacementCharacter;
856 str += QChar(0x0020);
857 }
858 QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.2.1") << utf8 << str << -1;
859
860 // 3.2.2
861 utf8.clear();
862 str.clear();
863 for (uint i = 0xe0; i <= 0xef; ++i) {
864 utf8 += i;
865 utf8 += char(0x20);
866
867 str += QChar::ReplacementCharacter;
868 str += QChar(0x0020);
869 }
870 QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.2.2") << utf8 << str << -1;
871
872 // 3.2.3
873 utf8.clear();
874 str.clear();
875 for (uint i = 0xf0; i <= 0xf7; ++i) {
876 utf8 += i;
877 utf8 += 0x20;
878
879 str += QChar::ReplacementCharacter;
880 str += QChar(0x0020);
881 }
882 QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.2.3") << utf8 << str << -1;
883
884 // 3.2.4
885 utf8.clear();
886 str.clear();
887 for (uint i = 0xf8; i <= 0xfb; ++i) {
888 utf8 += i;
889 utf8 += 0x20;
890
891 str += QChar::ReplacementCharacter;
892 str += QChar(0x0020);
893 }
894 QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.2.4") << utf8 << str << -1;
895
896 // 3.2.5
897 utf8.clear();
898 str.clear();
899 for (uint i = 0xfc; i <= 0xfd; ++i) {
900 utf8 += i;
901 utf8 += 0x20;
902
903 str += QChar::ReplacementCharacter;
904 str += QChar(0x0020);
905 }
906 QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.2.5") << utf8 << str << -1;
907
908 // 3.3.1
909 utf8.clear();
910 utf8 += char(0xc0);
911 str = fromInvalidUtf8Sequence(utf8);
912 QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.1") << utf8 << str << -1;
913 utf8 += char(0x30);
914 str += 0x30;
915 QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.1-1") << utf8 << str << -1;
916
917 // 3.3.2
918 utf8.clear();
919 utf8 += char(0xe0);
920 utf8 += char(0x80);
921 str = fromInvalidUtf8Sequence(utf8);
922 QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.2") << utf8 << str << -1;
923 utf8 += char(0x30);
924 str += 0x30;
925 QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.2-1") << utf8 << str << -1;
926
927 utf8.clear();
928 utf8 += char(0xe0);
929 str = fromInvalidUtf8Sequence(utf8);
930 QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.2-2") << utf8 << str << -1;
931 utf8 += 0x30;
932 str += 0x30;
933 QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.2-3") << utf8 << str << -1;
934
935 // 3.3.3
936 utf8.clear();
937 utf8 += char(0xf0);
938 utf8 += char(0x80);
939 utf8 += char(0x80);
940 str = fromInvalidUtf8Sequence(utf8);
941 QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.3") << utf8 << str << -1;
942 utf8 += char(0x30);
943 str += 0x30;
944 QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.3-1") << utf8 << str << -1;
945
946 utf8.clear();
947 utf8 += char(0xf0);
948 str = fromInvalidUtf8Sequence(utf8);
949 QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.3-2") << utf8 << str << -1;
950 utf8 += char(0x30);
951 str += 0x30;
952 QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.3-3") << utf8 << str << -1;
953
954 utf8.clear();
955 utf8 += char(0xf0);
956 utf8 += char(0x80);
957 str = fromInvalidUtf8Sequence(utf8);
958 QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.3-4") << utf8 << str << -1;
959 utf8 += char(0x30);
960 str += 0x30;
961 QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.3-5") << utf8 << str << -1;
962
963 // 3.3.4
964 utf8.clear();
965 utf8 += char(0xf8);
966 utf8 += char(0x80);
967 utf8 += char(0x80);
968 utf8 += char(0x80);
969 str = fromInvalidUtf8Sequence(utf8);
970 QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.4") << utf8 << str << -1;
971 utf8 += char(0x30);
972 str += 0x30;
973 QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.4-1") << utf8 << str << -1;
974
975 utf8.clear();
976 utf8 += char(0xf8);
977 utf8 += char(0x80);
978 utf8 += char(0x80);
979 str = fromInvalidUtf8Sequence(utf8);
980 QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.4-2") << utf8 << str << -1;
981 utf8 += char(0x30);
982 str += 0x30;
983 QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.4-3") << utf8 << str << -1;
984
985 utf8.clear();
986 utf8 += char(0xf8);
987 utf8 += char(0x80);
988 str = fromInvalidUtf8Sequence(utf8);
989 QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.4-4") << utf8 << str << -1;
990 utf8 += char(0x30);
991 str += 0x30;
992 QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.4-5") << utf8 << str << -1;
993
994 utf8.clear();
995 utf8 += char(0xf8);
996 str = fromInvalidUtf8Sequence(utf8);
997 QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.4-6") << utf8 << str << -1;
998 utf8 += char(0x30);
999 str += 0x30;
1000 QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.4-7") << utf8 << str << -1;
1001
1002 // 3.3.5
1003 utf8.clear();
1004 utf8 += char(0xfc);
1005 utf8 += char(0x80);
1006 utf8 += char(0x80);
1007 utf8 += char(0x80);
1008 utf8 += char(0x80);
1009 str = fromInvalidUtf8Sequence(utf8);
1010 QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.5") << utf8 << str << -1;
1011 utf8 += char(0x30);
1012 str += 0x30;
1013 QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.5-1") << utf8 << str << -1;
1014
1015 utf8.clear();
1016 utf8 += char(0xfc);
1017 utf8 += char(0x80);
1018 utf8 += char(0x80);
1019 utf8 += char(0x80);
1020 str = fromInvalidUtf8Sequence(utf8);
1021 QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.5-2") << utf8 << str << -1;
1022 utf8 += char(0x30);
1023 str += 0x30;
1024 QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.5-3") << utf8 << str << -1;
1025
1026 utf8.clear();
1027 utf8 += char(0xfc);
1028 utf8 += char(0x80);
1029 utf8 += char(0x80);
1030 str = fromInvalidUtf8Sequence(utf8);
1031 QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.5-4") << utf8 << str << -1;
1032 utf8 += char(0x30);
1033 str += 0x30;
1034 QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.5-5") << utf8 << str << -1;
1035
1036 utf8.clear();
1037 utf8 += char(0xfc);
1038 utf8 += char(0x80);
1039 str = fromInvalidUtf8Sequence(utf8);
1040 QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.5-6") << utf8 << str << -1;
1041 utf8 += char(0x30);
1042 str += 0x30;
1043 QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.5-7") << utf8 << str << -1;
1044
1045 utf8.clear();
1046 utf8 += char(0xfc);
1047 str = fromInvalidUtf8Sequence(utf8);
1048 QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.5-8") << utf8 << str << -1;
1049 utf8 += char(0x30);
1050 str += 0x30;
1051 QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.5-9") << utf8 << str << -1;
1052
1053 // 3.3.6
1054 utf8.clear();
1055 utf8 += char(0xdf);
1056 str = fromInvalidUtf8Sequence(utf8);
1057 QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.6") << utf8 << str << -1;
1058 utf8 += char(0x30);
1059 str += 0x30;
1060 QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.6-1") << utf8 << str << -1;
1061
1062 // 3.3.7
1063 utf8.clear();
1064 utf8 += char(0xef);
1065 utf8 += char(0xbf);
1066 str = fromInvalidUtf8Sequence(utf8);
1067 QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.7") << utf8 << str << -1;
1068 utf8 += char(0x30);
1069 str += 0x30;
1070 QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.7-1") << utf8 << str << -1;
1071
1072 utf8.clear();
1073 utf8 += char(0xef);
1074 str = fromInvalidUtf8Sequence(utf8);
1075 QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.7-2") << utf8 << str << -1;
1076 utf8 += char(0x30);
1077 str += 0x30;
1078 QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.7-3") << utf8 << str << -1;
1079
1080 // 3.3.8
1081 utf8.clear();
1082 utf8 += char(0xf7);
1083 utf8 += char(0xbf);
1084 utf8 += char(0xbf);
1085 str = fromInvalidUtf8Sequence(utf8);
1086 QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.8") << utf8 << str << -1;
1087 utf8 += char(0x30);
1088 str += 0x30;
1089 QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.8-1") << utf8 << str << -1;
1090
1091 utf8.clear();
1092 utf8 += char(0xf7);
1093 utf8 += char(0xbf);
1094 str = fromInvalidUtf8Sequence(utf8);
1095 QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.8-2") << utf8 << str << -1;
1096 utf8 += char(0x30);
1097 str += 0x30;
1098 QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.8-3") << utf8 << str << -1;
1099
1100 utf8.clear();
1101 utf8 += char(0xf7);
1102 str = fromInvalidUtf8Sequence(utf8);
1103 QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.8-4") << utf8 << str << -1;
1104 utf8 += char(0x30);
1105 str += 0x30;
1106 QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.8-5") << utf8 << str << -1;
1107
1108 // 3.3.9
1109 utf8.clear();
1110 utf8 += char(0xfb);
1111 utf8 += char(0xbf);
1112 utf8 += char(0xbf);
1113 utf8 += char(0xbf);
1114 str = fromInvalidUtf8Sequence(utf8);
1115 QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.9") << utf8 << str << -1;
1116 utf8 += char(0x30);
1117 str += 0x30;
1118 QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.9-1") << utf8 << str << -1;
1119
1120 utf8.clear();
1121 utf8 += char(0xfb);
1122 utf8 += char(0xbf);
1123 utf8 += char(0xbf);
1124 str = fromInvalidUtf8Sequence(utf8);
1125 QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.9-2") << utf8 << str << -1;
1126 utf8 += char(0x30);
1127 str += 0x30;
1128 QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.9-3") << utf8 << str << -1;
1129
1130 utf8.clear();
1131 utf8 += char(0xfb);
1132 utf8 += char(0xbf);
1133 str = fromInvalidUtf8Sequence(utf8);
1134 QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.9-4") << utf8 << str << -1;
1135 utf8 += char(0x30);
1136 str += 0x30;
1137 QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.9-5") << utf8 << str << -1;
1138
1139 utf8.clear();
1140 utf8 += char(0xfb);
1141 str = fromInvalidUtf8Sequence(utf8);
1142 QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.9-6") << utf8 << str << -1;
1143 utf8 += char(0x30);
1144 str += 0x30;
1145 QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.9-7") << utf8 << str << -1;
1146
1147 // 3.3.10
1148 utf8.clear();
1149 utf8 += char(0xfd);
1150 utf8 += char(0xbf);
1151 utf8 += char(0xbf);
1152 utf8 += char(0xbf);
1153 utf8 += char(0xbf);
1154 str = fromInvalidUtf8Sequence(utf8);
1155 QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.10") << utf8 << str << -1;
1156 utf8 += char(0x30);
1157 str += 0x30;
1158 QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.10-1") << utf8 << str << -1;
1159
1160 utf8.clear();
1161 utf8 += char(0xfd);
1162 utf8 += char(0xbf);
1163 utf8 += char(0xbf);
1164 utf8 += char(0xbf);
1165 str = fromInvalidUtf8Sequence(utf8);
1166 QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.10-2") << utf8 << str << -1;
1167 utf8 += char(0x30);
1168 str += 0x30;
1169 QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.10-3") << utf8 << str << -1;
1170
1171 utf8.clear();
1172 utf8 += char(0xfd);
1173 utf8 += char(0xbf);
1174 utf8 += char(0xbf);
1175 str = fromInvalidUtf8Sequence(utf8);
1176 QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.10-4") << utf8 << str << -1;
1177 utf8 += char(0x30);
1178 str += 0x30;
1179 QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.10-5") << utf8 << str << -1;
1180
1181 utf8.clear();
1182 utf8 += char(0xfd);
1183 utf8 += char(0xbf);
1184 str = fromInvalidUtf8Sequence(utf8);
1185 QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.10-6") << utf8 << str << -1;
1186 utf8 += char(0x30);
1187 str += 0x30;
1188 QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.10-7") << utf8 << str << -1;
1189
1190 utf8.clear();
1191 utf8 += char(0xfd);
1192 str = fromInvalidUtf8Sequence(utf8);
1193 QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.10-8") << utf8 << str << -1;
1194 utf8 += char(0x30);
1195 str += 0x30;
1196 QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.10-9") << utf8 << str << -1;
1197
1198 // 3.4
1199 utf8.clear();
1200 utf8 += char(0xc0);
1201 utf8 += char(0xe0);
1202 utf8 += char(0x80);
1203 utf8 += char(0xf0);
1204 utf8 += char(0x80);
1205 utf8 += char(0x80);
1206 utf8 += char(0xf8);
1207 utf8 += char(0x80);
1208 utf8 += char(0x80);
1209 utf8 += char(0x80);
1210 utf8 += char(0xfc);
1211 utf8 += char(0x80);
1212 utf8 += char(0x80);
1213 utf8 += char(0x80);
1214 utf8 += char(0x80);
1215 utf8 += char(0xdf);
1216 utf8 += char(0xef);
1217 utf8 += char(0xbf);
1218 utf8 += char(0xf7);
1219 utf8 += char(0xbf);
1220 utf8 += char(0xbf);
1221 utf8 += char(0xfb);
1222 utf8 += char(0xbf);
1223 utf8 += char(0xbf);
1224 utf8 += char(0xbf);
1225 utf8 += char(0xfd);
1226 utf8 += char(0xbf);
1227 utf8 += char(0xbf);
1228 utf8 += char(0xbf);
1229 utf8 += char(0xbf);
1230 str = fromInvalidUtf8Sequence(utf8);
1231 QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.4") << utf8 << str << -1;
1232
1233 // 3.5.1
1234 utf8.clear();
1235 utf8 += char(0xfe);
1236 str = fromInvalidUtf8Sequence(utf8);
1237 QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.5.1") << utf8 << str << -1;
1238
1239 // 3.5.2
1240 utf8.clear();
1241 utf8 += char(0xff);
1242 str = fromInvalidUtf8Sequence(utf8);
1243 QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.5.1") << utf8 << str << -1;
1244
1245 // 3.5.2
1246 utf8.clear();
1247 utf8 += char(0xfe);
1248 utf8 += char(0xfe);
1249 utf8 += char(0xff);
1250 str = fromInvalidUtf8Sequence(utf8);
1251 QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.5.1") << utf8 << str << -1;
1252
1253 // 4.1.1
1254 utf8.clear();
1255 utf8 += char(0xc0);
1256 utf8 += char(0xaf);
1257 str = QChar(QChar::ReplacementCharacter);
1258 QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 4.1.1") << utf8 << str << -1;
1259
1260 // 4.1.2
1261 utf8.clear();
1262 utf8 += char(0xe0);
1263 utf8 += char(0x80);
1264 utf8 += char(0xaf);
1265 str = QChar(QChar::ReplacementCharacter);
1266 QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 4.1.2") << utf8 << str << -1;
1267
1268 // 4.1.3
1269 utf8.clear();
1270 utf8 += char(0xf0);
1271 utf8 += char(0x80);
1272 utf8 += char(0x80);
1273 utf8 += char(0xaf);
1274 str = QChar(QChar::ReplacementCharacter);
1275 QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 4.1.3") << utf8 << str << -1;
1276
1277 // 4.1.4
1278 utf8.clear();
1279 utf8 += char(0xf8);
1280 utf8 += char(0x80);
1281 utf8 += char(0x80);
1282 utf8 += char(0x80);
1283 utf8 += char(0xaf);
1284 str = fromInvalidUtf8Sequence(utf8);
1285 QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 4.1.4") << utf8 << str << -1;
1286
1287 // 4.1.5
1288 utf8.clear();
1289 utf8 += char(0xfc);
1290 utf8 += char(0x80);
1291 utf8 += char(0x80);
1292 utf8 += char(0x80);
1293 utf8 += char(0x80);
1294 utf8 += char(0xaf);
1295 str = fromInvalidUtf8Sequence(utf8);
1296 QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 4.1.5") << utf8 << str << -1;
1297
1298 // 4.2.1
1299 utf8.clear();
1300 utf8 += char(0xc1);
1301 utf8 += char(0xbf);
1302 str = QChar(QChar::ReplacementCharacter);
1303 QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 4.2.1") << utf8 << str << -1;
1304
1305 // 4.2.2
1306 utf8.clear();
1307 utf8 += char(0xe0);
1308 utf8 += char(0x9f);
1309 utf8 += char(0xbf);
1310 str = QChar(QChar::ReplacementCharacter);
1311 QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 4.2.2") << utf8 << str << -1;
1312
1313 // 4.2.3
1314 utf8.clear();
1315 utf8 += char(0xf0);
1316 utf8 += char(0x8f);
1317 utf8 += char(0xbf);
1318 utf8 += char(0xbf);
1319 str = QChar(QChar::ReplacementCharacter);
1320 QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 4.2.3") << utf8 << str << -1;
1321
1322 // 4.2.4
1323 utf8.clear();
1324 utf8 += char(0xf8);
1325 utf8 += char(0x87);
1326 utf8 += char(0xbf);
1327 utf8 += char(0xbf);
1328 utf8 += char(0xbf);
1329 str = fromInvalidUtf8Sequence(utf8);
1330 QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 4.2.4") << utf8 << str << -1;
1331
1332 // 4.2.5
1333 utf8.clear();
1334 utf8 += char(0xfc);
1335 utf8 += char(0x83);
1336 utf8 += char(0xbf);
1337 utf8 += char(0xbf);
1338 utf8 += char(0xbf);
1339 utf8 += char(0xbf);
1340 str = fromInvalidUtf8Sequence(utf8);
1341 QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 4.2.5") << utf8 << str << -1;
1342
1343 // 4.3.1
1344 utf8.clear();
1345 utf8 += char(0xc0);
1346 utf8 += char(0x80);
1347 str = QChar(QChar::ReplacementCharacter);
1348 QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 4.3.1") << utf8 << str << -1;
1349
1350 // 4.3.2
1351 utf8.clear();
1352 utf8 += char(0xe0);
1353 utf8 += char(0x80);
1354 utf8 += char(0x80);
1355 str = QChar(QChar::ReplacementCharacter);
1356 QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 4.3.2") << utf8 << str << -1;
1357
1358 // 4.3.3
1359 utf8.clear();
1360 utf8 += char(0xf0);
1361 utf8 += char(0x80);
1362 utf8 += char(0x80);
1363 utf8 += char(0x80);
1364 str = QChar(QChar::ReplacementCharacter);
1365 QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 4.3.3") << utf8 << str << -1;
1366
1367 // 4.3.4
1368 utf8.clear();
1369 utf8 += char(0xf8);
1370 utf8 += char(0x80);
1371 utf8 += char(0x80);
1372 utf8 += char(0x80);
1373 utf8 += char(0x80);
1374 str = fromInvalidUtf8Sequence(utf8);
1375 QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 4.3.4") << utf8 << str << -1;
1376
1377 // 4.3.5
1378 utf8.clear();
1379 utf8 += char(0xfc);
1380 utf8 += char(0x80);
1381 utf8 += char(0x80);
1382 utf8 += char(0x80);
1383 utf8 += char(0x80);
1384 utf8 += char(0x80);
1385 str = fromInvalidUtf8Sequence(utf8);
1386 QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 4.3.5") << utf8 << str << -1;
1387
1388 // 5.1.1
1389 utf8.clear();
1390 utf8 += char(0xed);
1391 utf8 += char(0xa0);
1392 utf8 += char(0x80);
1393 str = QChar(QChar::ReplacementCharacter);
1394 QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 5.1.1") << utf8 << str << -1;
1395
1396 // 5.1.2
1397 utf8.clear();
1398 utf8 += char(0xed);
1399 utf8 += char(0xad);
1400 utf8 += char(0xbf);
1401 str = QChar(QChar::ReplacementCharacter);
1402 QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 5.1.2") << utf8 << str << -1;
1403
1404 // 5.1.3
1405 utf8.clear();
1406 utf8 += char(0xed);
1407 utf8 += char(0xae);
1408 utf8 += char(0x80);
1409 str = QChar(QChar::ReplacementCharacter);
1410 QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 5.1.3") << utf8 << str << -1;
1411
1412 // 5.1.4
1413 utf8.clear();
1414 utf8 += char(0xed);
1415 utf8 += char(0xaf);
1416 utf8 += char(0xbf);
1417 str = QChar(QChar::ReplacementCharacter);
1418 QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 5.1.4") << utf8 << str << -1;
1419
1420 // 5.1.5
1421 utf8.clear();
1422 utf8 += char(0xed);
1423 utf8 += char(0xb0);
1424 utf8 += char(0x80);
1425 str = QChar(QChar::ReplacementCharacter);
1426 QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 5.1.5") << utf8 << str << -1;
1427
1428 // 5.1.6
1429 utf8.clear();
1430 utf8 += char(0xed);
1431 utf8 += char(0xbe);
1432 utf8 += char(0x80);
1433 str = QChar(QChar::ReplacementCharacter);
1434 QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 5.1.6") << utf8 << str << -1;
1435
1436 // 5.1.7
1437 utf8.clear();
1438 utf8 += char(0xed);
1439 utf8 += char(0xbf);
1440 utf8 += char(0xbf);
1441 str = QChar(QChar::ReplacementCharacter);
1442 QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 5.1.7") << utf8 << str << -1;
1443
1444 // 5.2.1
1445 utf8.clear();
1446 utf8 += char(0xed);
1447 utf8 += char(0xa0);
1448 utf8 += char(0x80);
1449 utf8 += char(0xed);
1450 utf8 += char(0xb0);
1451 utf8 += char(0x80);
1452 str.clear();
1453 str += QChar(QChar::ReplacementCharacter);
1454 str += QChar(QChar::ReplacementCharacter);
1455 QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 5.2.1") << utf8 << str << -1;
1456
1457 // 5.2.2
1458 utf8.clear();
1459 utf8 += char(0xed);
1460 utf8 += char(0xa0);
1461 utf8 += char(0x80);
1462 utf8 += char(0xed);
1463 utf8 += char(0xbf);
1464 utf8 += char(0xbf);
1465 str.clear();
1466 str += QChar(QChar::ReplacementCharacter);
1467 str += QChar(QChar::ReplacementCharacter);
1468 QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 5.2.2") << utf8 << str << -1;
1469
1470 // 5.2.3
1471 utf8.clear();
1472 utf8 += char(0xed);
1473 utf8 += char(0xad);
1474 utf8 += char(0xbf);
1475 utf8 += char(0xed);
1476 utf8 += char(0xb0);
1477 utf8 += char(0x80);
1478 str.clear();
1479 str += QChar(QChar::ReplacementCharacter);
1480 str += QChar(QChar::ReplacementCharacter);
1481 QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 5.2.3") << utf8 << str << -1;
1482
1483 // 5.2.4
1484 utf8.clear();
1485 utf8 += char(0xed);
1486 utf8 += char(0xad);
1487 utf8 += char(0xbf);
1488 utf8 += char(0xed);
1489 utf8 += char(0xbf);
1490 utf8 += char(0xbf);
1491 str.clear();
1492 str += QChar(QChar::ReplacementCharacter);
1493 str += QChar(QChar::ReplacementCharacter);
1494 QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 5.2.4") << utf8 << str << -1;
1495
1496 // 5.2.5
1497 utf8.clear();
1498 utf8 += char(0xed);
1499 utf8 += char(0xae);
1500 utf8 += char(0x80);
1501 utf8 += char(0xed);
1502 utf8 += char(0xb0);
1503 utf8 += char(0x80);
1504 str.clear();
1505 str += QChar(QChar::ReplacementCharacter);
1506 str += QChar(QChar::ReplacementCharacter);
1507 QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 5.2.5") << utf8 << str << -1;
1508
1509 // 5.2.6
1510 utf8.clear();
1511 utf8 += char(0xed);
1512 utf8 += char(0xae);
1513 utf8 += char(0x80);
1514 utf8 += char(0xed);
1515 utf8 += char(0xbf);
1516 utf8 += char(0xbf);
1517 str.clear();
1518 str += QChar(QChar::ReplacementCharacter);
1519 str += QChar(QChar::ReplacementCharacter);
1520 QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 5.2.6") << utf8 << str << -1;
1521
1522 // 5.2.7
1523 utf8.clear();
1524 utf8 += char(0xed);
1525 utf8 += char(0xaf);
1526 utf8 += char(0xbf);
1527 utf8 += char(0xed);
1528 utf8 += char(0xb0);
1529 utf8 += char(0x80);
1530 str.clear();
1531 str += QChar(QChar::ReplacementCharacter);
1532 str += QChar(QChar::ReplacementCharacter);
1533 QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 5.2.7") << utf8 << str << -1;
1534
1535 // 5.2.8
1536 utf8.clear();
1537 utf8 += char(0xed);
1538 utf8 += char(0xaf);
1539 utf8 += char(0xbf);
1540 utf8 += char(0xed);
1541 utf8 += char(0xbf);
1542 utf8 += char(0xbf);
1543 str.clear();
1544 str += QChar(QChar::ReplacementCharacter);
1545 str += QChar(QChar::ReplacementCharacter);
1546 QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 5.2.8") << utf8 << str << -1;
1547
1548 // 5.3.1
1549 utf8.clear();
1550 utf8 += char(0xef);
1551 utf8 += char(0xbf);
1552 utf8 += char(0xbe);
1553 str = QChar(QChar::ReplacementCharacter);
1554 QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 5.3.1") << utf8 << str << -1;
1555
1556 // 5.3.2
1557 utf8.clear();
1558 utf8 += char(0xef);
1559 utf8 += char(0xbf);
1560 utf8 += char(0xbf);
1561 str = QChar(QChar::ReplacementCharacter);
1562 QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 5.3.2") << utf8 << str << -1;
1563 }
1564
utf8Codec()1565 void tst_QTextCodec::utf8Codec()
1566 {
1567 QTextCodec *codec = QTextCodec::codecForMib(106); // UTF-8
1568 QVERIFY(codec != 0);
1569
1570 QFETCH(QByteArray, utf8);
1571 QFETCH(QString, res);
1572 QFETCH(int, len);
1573
1574 QString str = codec->toUnicode(utf8.isNull() ? 0 : utf8.constData(),
1575 len < 0 ? qstrlen(utf8.constData()) : len);
1576 QCOMPARE(str, res);
1577
1578 str = QString::fromUtf8(utf8.isNull() ? 0 : utf8.constData(), len);
1579 QCOMPARE(str, res);
1580 }
1581
utf8bom_data()1582 void tst_QTextCodec::utf8bom_data()
1583 {
1584 QTest::addColumn<QByteArray>("data");
1585 QTest::addColumn<QString>("result");
1586
1587 QTest::newRow("nobom")
1588 << QByteArray("\302\240", 2)
1589 << QString("\240");
1590
1591 {
1592 static const ushort data[] = { 0x201d };
1593 QTest::newRow("nobom 2")
1594 << QByteArray("\342\200\235", 3)
1595 << QString::fromUtf16(data, sizeof(data)/sizeof(short));
1596 }
1597
1598 {
1599 static const ushort data[] = { 0xf000 };
1600 QTest::newRow("bom1")
1601 << QByteArray("\357\200\200", 3)
1602 << QString::fromUtf16(data, sizeof(data)/sizeof(short));
1603 }
1604
1605 {
1606 static const ushort data[] = { 0xfec0 };
1607 QTest::newRow("bom2")
1608 << QByteArray("\357\273\200", 3)
1609 << QString::fromUtf16(data, sizeof(data)/sizeof(short));
1610 }
1611
1612 {
1613 QTest::newRow("normal-bom")
1614 << QByteArray("\357\273\277a", 4)
1615 << QString("a");
1616 }
1617
1618 {
1619 static const ushort data[] = { 0x61, 0xfeff, 0x62 };
1620 QTest::newRow("middle-bom")
1621 << QByteArray("a\357\273\277b", 5)
1622 << QString::fromUtf16(data, sizeof(data)/sizeof(short));
1623 }
1624 }
1625
utf8bom()1626 void tst_QTextCodec::utf8bom()
1627 {
1628 QFETCH(QByteArray, data);
1629 QFETCH(QString, result);
1630
1631 QTextCodec *const codec = QTextCodec::codecForMib(106); // UTF-8
1632 QVERIFY(codec);
1633
1634 QCOMPARE(codec->toUnicode(data.constData(), data.length(), 0), result);
1635
1636 QTextCodec::ConverterState state;
1637 QCOMPARE(codec->toUnicode(data.constData(), data.length(), &state), result);
1638 }
1639
utfHeaders_data()1640 void tst_QTextCodec::utfHeaders_data()
1641 {
1642 QTest::addColumn<QByteArray>("codecName");
1643 QTest::addColumn<int>("flags");
1644 QTest::addColumn<QByteArray>("encoded");
1645 QTest::addColumn<QString>("unicode");
1646 QTest::addColumn<bool>("toUnicode");
1647
1648 QTest::newRow("utf8 bom")
1649 << QByteArray("UTF-8")
1650 << 0
1651 << QByteArray("\xef\xbb\xbfhello")
1652 << QString::fromLatin1("hello")
1653 << true;
1654 QTest::newRow("utf8 nobom")
1655 << QByteArray("UTF-8")
1656 << 0
1657 << QByteArray("hello")
1658 << QString::fromLatin1("hello")
1659 << true;
1660 QTest::newRow("utf8 bom ignore header")
1661 << QByteArray("UTF-8")
1662 << (int)QTextCodec::IgnoreHeader
1663 << QByteArray("\xef\xbb\xbfhello")
1664 << (QString(QChar(0xfeff)) + QString::fromLatin1("hello"))
1665 << true;
1666 QTest::newRow("utf8 nobom ignore header")
1667 << QByteArray("UTF-8")
1668 << (int)QTextCodec::IgnoreHeader
1669 << QByteArray("hello")
1670 << QString::fromLatin1("hello")
1671 << true;
1672
1673 QTest::newRow("utf16 bom be")
1674 << QByteArray("UTF-16")
1675 << 0
1676 << QByteArray("\xfe\xff\0h\0e\0l", 8)
1677 << QString::fromLatin1("hel")
1678 << true;
1679 QTest::newRow("utf16 bom le")
1680 << QByteArray("UTF-16")
1681 << 0
1682 << QByteArray("\xff\xfeh\0e\0l\0", 8)
1683 << QString::fromLatin1("hel")
1684 << true;
1685 if (QSysInfo::ByteOrder == QSysInfo::BigEndian) {
1686 QTest::newRow("utf16 nobom")
1687 << QByteArray("UTF-16")
1688 << 0
1689 << QByteArray("\0h\0e\0l", 6)
1690 << QString::fromLatin1("hel")
1691 << true;
1692 QTest::newRow("utf16 bom be ignore header")
1693 << QByteArray("UTF-16")
1694 << (int)QTextCodec::IgnoreHeader
1695 << QByteArray("\xfe\xff\0h\0e\0l", 8)
1696 << (QString(QChar(0xfeff)) + QString::fromLatin1("hel"))
1697 << true;
1698 } else {
1699 QTest::newRow("utf16 nobom")
1700 << QByteArray("UTF-16")
1701 << 0
1702 << QByteArray("h\0e\0l\0", 6)
1703 << QString::fromLatin1("hel")
1704 << true;
1705 QTest::newRow("utf16 bom le ignore header")
1706 << QByteArray("UTF-16")
1707 << (int)QTextCodec::IgnoreHeader
1708 << QByteArray("\xff\xfeh\0e\0l\0", 8)
1709 << (QString(QChar(0xfeff)) + QString::fromLatin1("hel"))
1710 << true;
1711 }
1712
1713 QTest::newRow("utf16-be bom be")
1714 << QByteArray("UTF-16BE")
1715 << 0
1716 << QByteArray("\xfe\xff\0h\0e\0l", 8)
1717 << QString::fromLatin1("hel")
1718 << true;
1719 QTest::newRow("utf16-be nobom")
1720 << QByteArray("UTF-16BE")
1721 << 0
1722 << QByteArray("\0h\0e\0l", 6)
1723 << QString::fromLatin1("hel")
1724 << true;
1725 QTest::newRow("utf16-be bom be ignore header")
1726 << QByteArray("UTF-16BE")
1727 << (int)QTextCodec::IgnoreHeader
1728 << QByteArray("\xfe\xff\0h\0e\0l", 8)
1729 << (QString(QChar(0xfeff)) + QString::fromLatin1("hel"))
1730 << true;
1731
1732 QTest::newRow("utf16-le bom le")
1733 << QByteArray("UTF-16LE")
1734 << 0
1735 << QByteArray("\xff\xfeh\0e\0l\0", 8)
1736 << QString::fromLatin1("hel")
1737 << true;
1738 QTest::newRow("utf16-le nobom")
1739 << QByteArray("UTF-16LE")
1740 << 0
1741 << QByteArray("h\0e\0l\0", 6)
1742 << QString::fromLatin1("hel")
1743 << true;
1744 QTest::newRow("utf16-le bom le ignore header")
1745 << QByteArray("UTF-16LE")
1746 << (int)QTextCodec::IgnoreHeader
1747 << QByteArray("\xff\xfeh\0e\0l\0", 8)
1748 << (QString(QChar(0xfeff)) + QString::fromLatin1("hel"))
1749 << true;
1750
1751
1752 QTest::newRow("utf32 bom be")
1753 << QByteArray("UTF-32")
1754 << 0
1755 << QByteArray("\0\0\xfe\xff\0\0\0h\0\0\0e\0\0\0l", 16)
1756 << QString::fromLatin1("hel")
1757 << true;
1758 QTest::newRow("utf32 bom le")
1759 << QByteArray("UTF-32")
1760 << 0
1761 << QByteArray("\xff\xfe\0\0h\0\0\0e\0\0\0l\0\0\0", 16)
1762 << QString::fromLatin1("hel")
1763 << true;
1764 if (QSysInfo::ByteOrder == QSysInfo::BigEndian) {
1765 QTest::newRow("utf32 nobom")
1766 << QByteArray("UTF-32")
1767 << 0
1768 << QByteArray("\0\0\0h\0\0\0e\0\0\0l", 12)
1769 << QString::fromLatin1("hel")
1770 << true;
1771 QTest::newRow("utf32 bom be ignore header")
1772 << QByteArray("UTF-32")
1773 << (int)QTextCodec::IgnoreHeader
1774 << QByteArray("\0\0\xfe\xff\0\0\0h\0\0\0e\0\0\0l", 16)
1775 << (QString(QChar(0xfeff)) + QString::fromLatin1("hel"))
1776 << true;
1777 } else {
1778 QTest::newRow("utf32 nobom")
1779 << QByteArray("UTF-32")
1780 << 0
1781 << QByteArray("h\0\0\0e\0\0\0l\0\0\0", 12)
1782 << QString::fromLatin1("hel")
1783 << true;
1784 QTest::newRow("utf32 bom le ignore header")
1785 << QByteArray("UTF-32")
1786 << (int)QTextCodec::IgnoreHeader
1787 << QByteArray("\xff\xfe\0\0h\0\0\0e\0\0\0l\0\0\0", 16)
1788 << (QString(QChar(0xfeff)) + QString::fromLatin1("hel"))
1789 << true;
1790 }
1791
1792
1793 QTest::newRow("utf32-be bom be")
1794 << QByteArray("UTF-32BE")
1795 << 0
1796 << QByteArray("\0\0\xfe\xff\0\0\0h\0\0\0e\0\0\0l", 16)
1797 << QString::fromLatin1("hel")
1798 << true;
1799 QTest::newRow("utf32-be nobom")
1800 << QByteArray("UTF-32BE")
1801 << 0
1802 << QByteArray("\0\0\0h\0\0\0e\0\0\0l", 12)
1803 << QString::fromLatin1("hel")
1804 << true;
1805 QTest::newRow("utf32-be bom be ignore header")
1806 << QByteArray("UTF-32BE")
1807 << (int)QTextCodec::IgnoreHeader
1808 << QByteArray("\0\0\xfe\xff\0\0\0h\0\0\0e\0\0\0l", 16)
1809 << (QString(QChar(0xfeff)) + QString::fromLatin1("hel"))
1810 << true;
1811
1812
1813 QTest::newRow("utf32-le bom le")
1814 << QByteArray("UTF-32LE")
1815 << 0
1816 << QByteArray("\xff\xfe\0\0h\0\0\0e\0\0\0l\0\0\0", 16)
1817 << QString::fromLatin1("hel")
1818 << true;
1819 QTest::newRow("utf32-le nobom")
1820 << QByteArray("UTF-32LE")
1821 << 0
1822 << QByteArray("h\0\0\0e\0\0\0l\0\0\0", 12)
1823 << QString::fromLatin1("hel")
1824 << true;
1825 QTest::newRow("utf32-le bom le ignore header")
1826 << QByteArray("UTF-32LE")
1827 << (int)QTextCodec::IgnoreHeader
1828 << QByteArray("\xff\xfe\0\0h\0\0\0e\0\0\0l\0\0\0", 16)
1829 << (QString(QChar(0xfeff)) + QString::fromLatin1("hel"))
1830 << true;
1831 }
1832
utfHeaders()1833 void tst_QTextCodec::utfHeaders()
1834 {
1835 QFETCH(QByteArray, codecName);
1836 QTextCodec *codec = QTextCodec::codecForName(codecName);
1837 QVERIFY(codec != 0);
1838
1839 QFETCH(int, flags);
1840 QTextCodec::ConversionFlags cFlags = QTextCodec::ConversionFlags(flags);
1841 QTextCodec::ConverterState state(cFlags);
1842
1843 QFETCH(QByteArray, encoded);
1844 QFETCH(QString, unicode);
1845
1846 QFETCH(bool, toUnicode);
1847
1848 QLatin1String ignoreReverseTestOn = (QSysInfo::ByteOrder == QSysInfo::BigEndian) ? QLatin1String(" le") : QLatin1String(" be");
1849 QString rowName(QTest::currentDataTag());
1850
1851 /*for (int i = 0; i < encoded.length(); ++i)
1852 qDebug() << hex << " " << (uint)(uchar)encoded.at(i);*/
1853 if (toUnicode) {
1854 QString result = codec->toUnicode(encoded.constData(), encoded.length(), &state);
1855 /*for (int i = 0; i < result.length(); ++i)
1856 qDebug() << hex << " " << (uint)result.at(i).unicode();*/
1857 QCOMPARE(result.length(), unicode.length());
1858 QCOMPARE(result, unicode);
1859
1860 if (!rowName.endsWith("nobom") && !rowName.contains(ignoreReverseTestOn)) {
1861 QTextCodec::ConverterState state2(cFlags);
1862 QByteArray reencoded = codec->fromUnicode(unicode.unicode(), unicode.length(), &state2);
1863 QCOMPARE(reencoded, encoded);
1864 }
1865 } else {
1866 QByteArray result = codec->fromUnicode(unicode.unicode(), unicode.length(), &state);
1867 QCOMPARE(result, encoded);
1868 }
1869 }
1870
codecForHtml()1871 void tst_QTextCodec::codecForHtml()
1872 {
1873 QByteArray html("<html><head></head><body>blah</body></html>");
1874
1875 QCOMPARE(QTextCodec::codecForHtml(html)->mibEnum(), 4); // latin 1
1876
1877 QCOMPARE(QTextCodec::codecForHtml(html, QTextCodec::codecForMib(106))->mibEnum(), 106); // UTF-8
1878
1879 html = "<html><head><meta http-equiv=\"content-type\" content=\"text/html; charset=ISO-8859-15\" /></head></html>";
1880 QCOMPARE(QTextCodec::codecForHtml(html, QTextCodec::codecForMib(106))->mibEnum(), 111); // latin 15
1881
1882 html = "<html><head><meta content=\"text/html; charset=ISO-8859-15\" http-equiv=\"content-type\" /></head></html>";
1883 QCOMPARE(QTextCodec::codecForHtml(html, QTextCodec::codecForMib(106))->mibEnum(), 111); // latin 15
1884
1885 html = "<html><head><meta http-equiv=\"content-type\" content=\"text/html; charset=invalid-foo\" /></head></html>";
1886 QCOMPARE(QTextCodec::codecForHtml(html, QTextCodec::codecForMib(106))->mibEnum(), 106); // UTF-8
1887 QCOMPARE(QTextCodec::codecForHtml(html)->mibEnum(), 4); // latin 1
1888 }
1889
codecForUtfText_data()1890 void tst_QTextCodec::codecForUtfText_data()
1891 {
1892 QTest::addColumn<QByteArray>("encoded");
1893 QTest::addColumn<bool>("detected");
1894 QTest::addColumn<int>("mib");
1895
1896
1897 QTest::newRow("utf8 bom")
1898 << QByteArray("\xef\xbb\xbfhello")
1899 << true
1900 << 106;
1901 QTest::newRow("utf8 nobom")
1902 << QByteArray("hello")
1903 << false
1904 << 0;
1905
1906 QTest::newRow("utf16 bom be")
1907 << QByteArray("\xfe\xff\0h\0e\0l", 8)
1908 << true
1909 << 1013;
1910 QTest::newRow("utf16 bom le")
1911 << QByteArray("\xff\xfeh\0e\0l\0", 8)
1912 << true
1913 << 1014;
1914 QTest::newRow("utf16 nobom")
1915 << QByteArray("\0h\0e\0l", 6)
1916 << false
1917 << 0;
1918
1919 QTest::newRow("utf32 bom be")
1920 << QByteArray("\0\0\xfe\xff\0\0\0h\0\0\0e\0\0\0l", 16)
1921 << true
1922 << 1018;
1923 QTest::newRow("utf32 bom le")
1924 << QByteArray("\xff\xfe\0\0h\0\0\0e\0\0\0l\0\0\0", 16)
1925 << true
1926 << 1019;
1927 QTest::newRow("utf32 nobom")
1928 << QByteArray("\0\0\0h\0\0\0e\0\0\0l", 12)
1929 << false
1930 << 0;
1931 }
1932
codecForUtfText()1933 void tst_QTextCodec::codecForUtfText()
1934 {
1935 QFETCH(QByteArray, encoded);
1936 QFETCH(bool, detected);
1937 QFETCH(int, mib);
1938
1939 QTextCodec *codec = QTextCodec::codecForUtfText(encoded, 0);
1940 if (detected)
1941 QCOMPARE(codec->mibEnum(), mib);
1942 else
1943 QVERIFY(codec == 0);
1944 }
1945
1946 #ifdef Q_OS_UNIX
toLocal8Bit()1947 void tst_QTextCodec::toLocal8Bit()
1948 {
1949 #ifdef QT_NO_PROCESS
1950 QSKIP("This test requires QProcess", SkipAll);
1951 #elif defined(Q_OS_SYMBIAN)
1952 QSKIP("This test requires streams support in QProcess", SkipAll);
1953 #else
1954 QProcess process;
1955 process.start("echo/echo");
1956 QString string(QChar(0x410));
1957 process.write((const char*)string.utf16(), string.length()*2);
1958
1959 process.closeWriteChannel();
1960 process.waitForFinished();
1961 QCOMPARE(process.exitStatus(), QProcess::NormalExit);
1962 QCOMPARE(process.exitCode(), 0);
1963 #endif
1964 }
1965 #endif
1966
loadAndConvert(const QByteArray & codecName)1967 static QByteArray loadAndConvert(const QByteArray &codecName)
1968 {
1969 QTextCodec *c = QTextCodec::codecForName(codecName);
1970 if (!c) {
1971 qDebug() << "WARNING " << codecName << " not found? ";
1972 return QByteArray();
1973 }
1974 QString str = QString::fromLatin1(codecName);
1975 QByteArray b = c->fromUnicode(str);
1976 c->toUnicode(b);
1977 return codecName;
1978 }
1979
loadAndConvertMIB(int mib)1980 static int loadAndConvertMIB(int mib)
1981 {
1982 QTextCodec *c = QTextCodec::codecForMib(mib);
1983 if (!c) {
1984 qDebug() << "WARNING " << mib << " not found? ";
1985 return 0;
1986 }
1987 QString str = QString::number(mib);
1988 QByteArray b = c->fromUnicode(str);
1989 c->toUnicode(b);
1990 return mib;
1991 }
1992
1993
threadSafety()1994 void tst_QTextCodec::threadSafety()
1995 {
1996 QList<QByteArray> codecList = QTextCodec::availableCodecs();
1997 QList<int> mibList = QTextCodec::availableMibs();
1998 #ifndef QT_NO_CONCURRENT
1999 QThreadPool::globalInstance()->setMaxThreadCount(12);
2000
2001 QFuture<QByteArray> res = QtConcurrent::mapped(codecList, loadAndConvert);
2002
2003
2004 QFuture<int> res2 = QtConcurrent::mapped(mibList, loadAndConvertMIB);
2005
2006 QCOMPARE(res.results(), codecList);
2007 QCOMPARE(res2.results(), mibList);
2008 #else
2009 QSKIP("This function is not yet supported with QT_NO_CONCURRENT defined.", SkipAll);
2010 #endif
2011 }
2012
invalidNames()2013 void tst_QTextCodec::invalidNames()
2014 {
2015 QVERIFY(!QTextCodec::codecForName(""));
2016 QVERIFY(!QTextCodec::codecForName(QByteArray()));
2017 QVERIFY(!QTextCodec::codecForName("-"));
2018 QVERIFY(!QTextCodec::codecForName("\1a\2b\3a\4d\5c\6s\7a\xffr\xec_\x9c_"));
2019 QVERIFY(!QTextCodec::codecForName("\n"));
2020 QVERIFY(!QTextCodec::codecForName("don't exist"));
2021 QByteArray huge = "azertyuiop^$qsdfghjklm<wxcvbn,;:=1234567890�_";
2022 huge = huge + huge + huge + huge + huge + huge + huge + huge;
2023 huge = huge + huge + huge + huge + huge + huge + huge + huge;
2024 huge = huge + huge + huge + huge + huge + huge + huge + huge;
2025 huge = huge + huge + huge + huge + huge + huge + huge + huge;
2026 QVERIFY(!QTextCodec::codecForName(huge));
2027 }
2028
checkAliases_data()2029 void tst_QTextCodec::checkAliases_data()
2030 {
2031 QTest::addColumn<QByteArray>("codecName");
2032 QList<QByteArray> codecList = QTextCodec::availableCodecs();
2033 foreach (const QByteArray &a, codecList) {
2034 QTest::newRow( a.constData() ) << a;
2035 }
2036 }
2037
checkAliases()2038 void tst_QTextCodec::checkAliases()
2039 {
2040 QFETCH( QByteArray, codecName );
2041 QTextCodec *c = QTextCodec::codecForName(codecName);
2042 QVERIFY(c);
2043 QCOMPARE(QTextCodec::codecForName(codecName), c);
2044 QCOMPARE(QTextCodec::codecForName(c->name()), c);
2045
2046 foreach(const QByteArray &a, c->aliases()) {
2047 QCOMPARE(QTextCodec::codecForName(a), c);
2048 }
2049 }
2050
2051
moreToFromUnicode_data()2052 void tst_QTextCodec::moreToFromUnicode_data() {
2053 QTest::addColumn<QByteArray>("codecName");
2054 QTest::addColumn<QByteArray>("testData");
2055
2056 QTest::newRow("russian") << QByteArray("ISO-8859-5")
2057 << QByteArray("\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F\x20\x21\x22\x23\x24\x25\x26\x27\x28\x29\x2A\x2B\x2C\x2D\x2E\x2F\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x3A\x3B\x3C\x3D\x3E\x3F\x40\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4A\x4B\x4C\x4D\x4E\x4F\x50\x51\x52\x53\x54\x55\x56\x57\x58\x59\x5A\x5B\x5C\x5D\x5E\x5F\x60\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6A\x6B\x6C\x6D\x6E\x6F\x70\x71\x72\x73\x74\x75\x76\x77\x78\x79\x7A\x7B\x7C\x7D\x7E\x7F\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8A\x8B\x8C\x8D\x8E\x8F\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9A\x9B\x9C\x9D\x9E\x9F\xA0\xA1\xA2\xA3\xA4\xA5\xA6\xA7\xA8\xA9\xAA\xAB\xAC\xAD\xAE\xAF\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7\xB8\xB9\xBA\xBB\xBC\xBD\xBE\xBF\xC0\xC1\xC2\xC3\xC4\xC5\xC6\xC7\xC8\xC9\xCA\xCB\xCC\xCD\xCE\xCF\xD0\xD1\xD2\xD3\xD4\xD5\xD6\xD7\xD8\xD9\xDA\xDB\xDC\xDD\xDE\xDF\xE0\xE1\xE2\xE3\xE4\xE5\xE6\xE7\xE8\xE9\xEA\xEB\xEC\xED\xEE\xEF\xF0\xF1\xF2\xF3\xF4\xF5\xF6\xF7\xF8\xF9\xFA\xFB\xFC\xFD\xFE\xFF\x00");
2058
2059 QTest::newRow("arabic") << QByteArray("ISO-8859-6")
2060 << QByteArray("\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F\x20\x21\x22\x23\x24\x25\x26\x27\x28\x29\x2A\x2B\x2C\x2D\x2E\x2F\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x3A\x3B\x3C\x3D\x3E\x3F\x40\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4A\x4B\x4C\x4D\x4E\x4F\x50\x51\x52\x53\x54\x55\x56\x57\x58\x59\x5A\x5B\x5C\x5D\x5E\x5F\x60\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6A\x6B\x6C\x6D\x6E\x6F\x70\x71\x72\x73\x74\x75\x76\x77\x78\x79\x7A\x7B\x7C\x7D\x7E\x7F\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8A\x8B\x8C\x8D\x8E\x8F\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9A\x9B\x9C\x9D\x9E\x9F\xA0\xA4\xAC\xAD\xBB\xBF\xC1\xC2\xC3\xC4\xC5\xC6\xC7\xC8\xC9\xCA\xCB\xCC\xCD\xCE\xCF\xD0\xD1\xD2\xD3\xD4\xD5\xD6\xD7\xD8\xD9\xDA\xE0\xE1\xE2\xE3\xE4\xE5\xE6\xE7\xE8\xE9\xEA\xEB\xEC\xED\xEE\xEF\xF0\xF1\xF2");
2061
2062 QTest::newRow("greek") << QByteArray("ISO-8859-7")
2063 << QByteArray("\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F\x20\x21\x22\x23\x24\x25\x26\x27\x28\x29\x2A\x2B\x2C\x2D\x2E\x2F\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x3A\x3B\x3C\x3D\x3E\x3F\x40\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4A\x4B\x4C\x4D\x4E\x4F\x50\x51\x52\x53\x54\x55\x56\x57\x58\x59\x5A\x5B\x5C\x5D\x5E\x5F\x60\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6A\x6B\x6C\x6D\x6E\x6F\x70\x71\x72\x73\x74\x75\x76\x77\x78\x79\x7A\x7B\x7C\x7D\x7E\x7F\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8A\x8B\x8C\x8D\x8E\x8F\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9A\x9B\x9C\x9D\x9E\x9F\xA0\xA1\xA2\xA3\xA6\xA7\xA8\xA9\xAB\xAC\xAD\xAF\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7\xB8\xB9\xBA\xBB\xBC\xBD\xBE\xBF\xC0\xC1\xC2\xC3\xC4\xC5\xC6\xC7\xC8\xC9\xCA\xCB\xCC\xCD\xCE\xCF\xD0\xD1\xD3\xD4\xD5\xD6\xD7\xD8\xD9\xDA\xDB\xDC\xDD\xDE\xDF\xE0\xE1\xE2\xE3\xE4\xE5\xE6\xE7\xE8\xE9\xEA\xEB\xEC\xED\xEE\xEF\xF0\xF1\xF2\xF3\xF4\xF5\xF6\xF7\xF8\xF9\xFA\xFB\xFC\xFD\xFE");
2064
2065 QTest::newRow("turkish") << QByteArray("ISO-8859-9")
2066 << QByteArray("\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F\x20\x21\x22\x23\x24\x25\x26\x27\x28\x29\x2A\x2B\x2C\x2D\x2E\x2F\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x3A\x3B\x3C\x3D\x3E\x3F\x40\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4A\x4B\x4C\x4D\x4E\x4F\x50\x51\x52\x53\x54\x55\x56\x57\x58\x59\x5A\x5B\x5C\x5D\x5E\x5F\x60\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6A\x6B\x6C\x6D\x6E\x6F\x70\x71\x72\x73\x74\x75\x76\x77\x78\x79\x7A\x7B\x7C\x7D\x7E\x7F\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8A\x8B\x8C\x8D\x8E\x8F\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9A\x9B\x9C\x9D\x9E\x9F\xA0\xA1\xA2\xA3\xA4\xA5\xA6\xA7\xA8\xA9\xAA\xAB\xAC\xAD\xAE\xAF\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7\xB8\xB9\xBA\xBB\xBC\xBD\xBE\xBF\xC0\xC1\xC2\xC3\xC4\xC5\xC6\xC7\xC8\xC9\xCA\xCB\xCC\xCD\xCE\xCF\xD0\xD1\xD2\xD3\xD4\xD5\xD6\xD7\xD8\xD9\xDA\xDB\xDC\xDD\xDE\xDF\xE0\xE1\xE2\xE3\xE4\xE5\xE6\xE7\xE8\xE9\xEA\xEB\xEC\xED\xEE\xEF\xF0\xF1\xF2\xF3\xF4\xF5\xF6\xF7\xF8\xF9\xFA\xFB\xFC\xFD\xFE\xFF");
2067
2068 #ifdef Q_OS_SYMBIAN
2069 QTest::newRow("thai") << QByteArray("TIS-620")
2070 << QByteArray("\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F\x20\x21\x22\x23\x24\x25\x26\x27\x28\x29\x2A\x2B\x2C\x2D\x2E\x2F\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x3A\x3B\x3C\x3D\x3E\x3F\x40\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4A\x4B\x4C\x4D\x4E\x4F\x50\x51\x52\x53\x54\x55\x56\x57\x58\x59\x5A\x5B\x5C\x5D\x5E\x5F\x60\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6A\x6B\x6C\x6D\x6E\x6F\x70\x71\x72\x73\x74\x75\x76\x77\x78\x79\x7A\x7B\x7C\x7D\x7E\x7F\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8A\x8B\x8C\x8D\x8E\x8F\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9A\x9B\x9C\x9D\x9E\x9F\xA1\xA2\xA3\xA4\xA5\xA6\xA7\xA8\xA9\xAA\xAB\xAC\xAD\xAE\xAF\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7\xB8\xB9\xBA\xBB\xBC\xBD\xBE\xBF\xC0\xC1\xC2\xC3\xC4\xC5\xC6\xC7\xC8\xC9\xCA\xCB\xCC\xCD\xCE\xCF\xD0\xD1\xD2\xD3\xD4\xD5\xD6\xD7\xD8\xD9\xDA\xDF\xE0\xE1\xE2\xE3\xE4\xE5\xE6\xE7\xE8\xE9\xEA\xEB\xEC\xED\xEE\xEF\xF0\xF1\xF2\xF3\xF4\xF5\xF6\xF7\xF8\xF9\xFA\xFB");
2071 #endif
2072
2073 QTest::newRow("latin1") << QByteArray("ISO-8859-1")
2074 << QByteArray("\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F\x20\x21\x22\x23\x24\x25\x26\x27\x28\x29\x2A\x2B\x2C\x2D\x2E\x2F\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x3A\x3B\x3C\x3D\x3E\x3F\x40\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4A\x4B\x4C\x4D\x4E\x4F\x50\x51\x52\x53\x54\x55\x56\x57\x58\x59\x5A\x5B\x5C\x5D\x5E\x5F\x60\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6A\x6B\x6C\x6D\x6E\x6F\x70\x71\x72\x73\x74\x75\x76\x77\x78\x79\x7A\x7B\x7C\x7D\x7E\x7F\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8A\x8B\x8C\x8D\x8E\x8F\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9A\x9B\x9C\x9D\x9E\x9F\xA0\xA1\xA2\xA3\xA4\xA5\xA6\xA7\xA8\xA9\xAA\xAB\xAC\xAD\xAE\xAF\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7\xB8\xB9\xBA\xBB\xBC\xBD\xBE\xBF\xC0\xC1\xC2\xC3\xC4\xC5\xC6\xC7\xC8\xC9\xCA\xCB\xCC\xCD\xCE\xCF\xD0\xD1\xD2\xD3\xD4\xD5\xD6\xD7\xD8\xD9\xDA\xDB\xDC\xDD\xDE\xDF\xE0\xE1\xE2\xE3\xE4\xE5\xE6\xE7\xE8\xE9\xEA\xEB\xEC\xED\xEE\xEF\xF0\xF1\xF2\xF3\xF4\xF5\xF6\xF7\xF8\xF9\xFA\xFB\xFC\xFD\xFE\xFF");
2075
2076 QByteArray sms7bit_ba;
2077 for (int i=1; i <= 0x7f; ++i) {
2078 if (i!='\x1b') {
2079 sms7bit_ba.append(i);
2080 }
2081 }
2082 #ifdef Q_OS_SYMBIAN
2083 QTest::newRow("sms7bit") << QByteArray("SMS 7-bit") << sms7bit_ba;
2084 #endif
2085
2086 QTest::newRow("latin2") << QByteArray("ISO-8859-2")
2087 << QByteArray("\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F\x20\x21\x22\x23\x24\x25\x26\x27\x28\x29\x2A\x2B\x2C\x2D\x2E\x2F\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x3A\x3B\x3C\x3D\x3E\x3F\x40\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4A\x4B\x4C\x4D\x4E\x4F\x50\x51\x52\x53\x54\x55\x56\x57\x58\x59\x5A\x5B\x5C\x5D\x5E\x5F\x60\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6A\x6B\x6C\x6D\x6E\x6F\x70\x71\x72\x73\x74\x75\x76\x77\x78\x79\x7A\x7B\x7C\x7D\x7E\x7F\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8A\x8B\x8C\x8D\x8E\x8F\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9A\x9B\x9C\x9D\x9E\x9F\xA0\xA1\xA2\xA3\xA4\xA5\xA6\xA7\xA8\xA9\xAA\xAB\xAC\xAD\xAE\xAF\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7\xB8\xB9\xBA\xBB\xBC\xBD\xBE\xBF\xC0\xC1\xC2\xC3\xC4\xC5\xC6\xC7\xC8\xC9\xCA\xCB\xCC\xCD\xCE\xCF\xD0\xD1\xD2\xD3\xD4\xD5\xD6\xD7\xD8\xD9\xDA\xDB\xDC\xDD\xDE\xDF\xE0\xE1\xE2\xE3\xE4\xE5\xE6\xE7\xE8\xE9\xEA\xEB\xEC\xED\xEE\xEF\xF0\xF1\xF2\xF3\xF4\xF5\xF6\xF7\xF8\xF9\xFA\xFB\xFC\xFD\xFE\xFF");
2088
2089 QTest::newRow("latin3") << QByteArray("ISO-8859-3")
2090 << QByteArray("\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F\x20\x21\x22\x23\x24\x25\x26\x27\x28\x29\x2A\x2B\x2C\x2D\x2E\x2F\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x3A\x3B\x3C\x3D\x3E\x3F\x40\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4A\x4B\x4C\x4D\x4E\x4F\x50\x51\x52\x53\x54\x55\x56\x57\x58\x59\x5A\x5B\x5C\x5D\x5E\x5F\x60\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6A\x6B\x6C\x6D\x6E\x6F\x70\x71\x72\x73\x74\x75\x76\x77\x78\x79\x7A\x7B\x7C\x7D\x7E\x7F\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8A\x8B\x8C\x8D\x8E\x8F\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9A\x9B\x9C\x9D\x9E\x9F\xA0\xA1\xA2\xA3\xA4\xA6\xA7\xA8\xA9\xAA\xAB\xAC\xAD\xAF\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7\xB8\xB9\xBA\xBB\xBC\xBD\xBF\xC0\xC1\xC2\xC4\xC5\xC6\xC7\xC8\xC9\xCA\xCB\xCC\xCD\xCE\xCF\xD1\xD2\xD3\xD4\xD5\xD6\xD7\xD8\xD9\xDA\xDB\xDC\xDD\xDE\xDF\xE0\xE1\xE2\xE4\xE5\xE6\xE7\xE8\xE9\xEA\xEB\xEC\xED\xEE\xEF\xF1\xF2\xF3\xF4\xF5\xF6\xF7\xF8\xF9\xFA\xFB\xFC\xFD\xFE\xFF");
2091
2092 QTest::newRow("latin4") << QByteArray("ISO-8859-4")
2093 << QByteArray("\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F\x20\x21\x22\x23\x24\x25\x26\x27\x28\x29\x2A\x2B\x2C\x2D\x2E\x2F\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x3A\x3B\x3C\x3D\x3E\x3F\x40\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4A\x4B\x4C\x4D\x4E\x4F\x50\x51\x52\x53\x54\x55\x56\x57\x58\x59\x5A\x5B\x5C\x5D\x5E\x5F\x60\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6A\x6B\x6C\x6D\x6E\x6F\x70\x71\x72\x73\x74\x75\x76\x77\x78\x79\x7A\x7B\x7C\x7D\x7E\x7F\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8A\x8B\x8C\x8D\x8E\x8F\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9A\x9B\x9C\x9D\x9E\x9F\xA0\xA1\xA2\xA3\xA4\xA5\xA6\xA7\xA8\xA9\xAA\xAB\xAC\xAD\xAE\xAF\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7\xB8\xB9\xBA\xBB\xBC\xBD\xBE\xBF\xC0\xC1\xC2\xC3\xC4\xC5\xC6\xC7\xC8\xC9\xCA\xCB\xCC\xCD\xCE\xCF\xD0\xD1\xD2\xD3\xD4\xD5\xD6\xD7\xD8\xD9\xDA\xDB\xDC\xDD\xDE\xDF\xE0\xE1\xE2\xE3\xE4\xE5\xE6\xE7\xE8\xE9\xEA\xEB\xEC\xED\xEE\xEF\xF0\xF1\xF2\xF3\xF4\xF5\xF6\xF7\xF8\xF9\xFA\xFB\xFC\xFD\xFE\xFF");
2094
2095 QTest::newRow("russian 2") << QByteArray("ISO-8859-5")
2096 << QByteArray("\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F\x20\x21\x22\x23\x24\x25\x26\x27\x28\x29\x2A\x2B\x2C\x2D\x2E\x2F\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x3A\x3B\x3C\x3D\x3E\x3F\x40\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4A\x4B\x4C\x4D\x4E\x4F\x50\x51\x52\x53\x54\x55\x56\x57\x58\x59\x5A\x5B\x5C\x5D\x5E\x5F\x60\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6A\x6B\x6C\x6D\x6E\x6F\x70\x71\x72\x73\x74\x75\x76\x77\x78\x79\x7A\x7B\x7C\x7D\x7E\x7F\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8A\x8B\x8C\x8D\x8E\x8F\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9A\x9B\x9C\x9D\x9E\x9F\xA0\xA1\xA2\xA3\xA4\xA5\xA6\xA7\xA8\xA9\xAA\xAB\xAC\xAD\xAE\xAF\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7\xB8\xB9\xBA\xBB\xBC\xBD\xBE\xBF\xC0\xC1\xC2\xC3\xC4\xC5\xC6\xC7\xC8\xC9\xCA\xCB\xCC\xCD\xCE\xCF\xD0\xD1\xD2\xD3\xD4\xD5\xD6\xD7\xD8\xD9\xDA\xDB\xDC\xDD\xDE\xDF\xE0\xE1\xE2\xE3\xE4\xE5\xE6\xE7\xE8\xE9\xEA\xEB\xEC\xED\xEE\xEF\xF0\xF1\xF2\xF3\xF4\xF5\xF6\xF7\xF8\xF9\xFA\xFB\xFC\xFD\xFE\xFF");
2097
2098 QTest::newRow("arabic 2") << QByteArray("ISO-8859-6")
2099 << QByteArray("\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F\x20\x21\x22\x23\x24\x25\x26\x27\x28\x29\x2A\x2B\x2C\x2D\x2E\x2F\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x3A\x3B\x3C\x3D\x3E\x3F\x40\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4A\x4B\x4C\x4D\x4E\x4F\x50\x51\x52\x53\x54\x55\x56\x57\x58\x59\x5A\x5B\x5C\x5D\x5E\x5F\x60\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6A\x6B\x6C\x6D\x6E\x6F\x70\x71\x72\x73\x74\x75\x76\x77\x78\x79\x7A\x7B\x7C\x7D\x7E\x7F\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8A\x8B\x8C\x8D\x8E\x8F\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9A\x9B\x9C\x9D\x9E\x9F\xA0\xA4\xAC\xAD\xBB\xBF\xC1\xC2\xC3\xC4\xC5\xC6\xC7\xC8\xC9\xCA\xCB\xCC\xCD\xCE\xCF\xD0\xD1\xD2\xD3\xD4\xD5\xD6\xD7\xD8\xD9\xDA\xE0\xE1\xE2\xE3\xE4\xE5\xE6\xE7\xE8\xE9\xEA\xEB\xEC\xED\xEE\xEF\xF0\xF1\xF2");
2100
2101 QTest::newRow("greek 2") << QByteArray("ISO-8859-7")
2102 << QByteArray("\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F\x20\x21\x22\x23\x24\x25\x26\x27\x28\x29\x2A\x2B\x2C\x2D\x2E\x2F\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x3A\x3B\x3C\x3D\x3E\x3F\x40\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4A\x4B\x4C\x4D\x4E\x4F\x50\x51\x52\x53\x54\x55\x56\x57\x58\x59\x5A\x5B\x5C\x5D\x5E\x5F\x60\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6A\x6B\x6C\x6D\x6E\x6F\x70\x71\x72\x73\x74\x75\x76\x77\x78\x79\x7A\x7B\x7C\x7D\x7E\x7F\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8A\x8B\x8C\x8D\x8E\x8F\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9A\x9B\x9C\x9D\x9E\x9F\xA0\xA1\xA2\xA3\xA6\xA7\xA8\xA9\xAB\xAC\xAD\xAF\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7\xB8\xB9\xBA\xBB\xBC\xBD\xBE\xBF\xC0\xC1\xC2\xC3\xC4\xC5\xC6\xC7\xC8\xC9\xCA\xCB\xCC\xCD\xCE\xCF\xD0\xD1\xD3\xD4\xD5\xD6\xD7\xD8\xD9\xDA\xDB\xDC\xDD\xDE\xDF\xE0\xE1\xE2\xE3\xE4\xE5\xE6\xE7\xE8\xE9\xEA\xEB\xEC\xED\xEE\xEF\xF0\xF1\xF2\xF3\xF4\xF5\xF6\xF7\xF8\xF9\xFA\xFB\xFC\xFD\xFE");
2103
2104 #ifdef Q_OS_SYMBIAN
2105 QTest::newRow("hebriew") << QByteArray("ISO-8859-8")
2106 << QByteArray("\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F\x20\x21\x22\x23\x24\x25\x26\x27\x28\x29\x2A\x2B\x2C\x2D\x2E\x2F\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x3A\x3B\x3C\x3D\x3E\x3F\x40\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4A\x4B\x4C\x4D\x4E\x4F\x50\x51\x52\x53\x54\x55\x56\x57\x58\x59\x5A\x5B\x5C\x5D\x5E\x5F\x60\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6A\x6B\x6C\x6D\x6E\x6F\x70\x71\x72\x73\x74\x75\x76\x77\x78\x79\x7A\x7B\x7C\x7D\x7E\x7F\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8A\x8B\x8C\x8D\x8E\x8F\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9A\x9B\x9C\x9D\x9E\x9F\xA0\xA2\xA3\xA4\xA5\xA6\xA7\xA8\xA9\xAA\xAB\xAC\xAD\xAE\xAF\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7\xB8\xB9\xBA\xBB\xBC\xBD\xBE\xDF\xE0\xE1\xE2\xE3\xE4\xE5\xE6\xE7\xE8\xE9\xEA\xEB\xEC\xED\xEE\xEF\xF0\xF1\xF2\xF3\xF4\xF5\xF6\xF7\xF8\xF9\xFA\xFD\xFE");
2107 #endif
2108
2109 QTest::newRow("latin5") << QByteArray("ISO-8859-9")
2110 << QByteArray("\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F\x20\x21\x22\x23\x24\x25\x26\x27\x28\x29\x2A\x2B\x2C\x2D\x2E\x2F\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x3A\x3B\x3C\x3D\x3E\x3F\x40\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4A\x4B\x4C\x4D\x4E\x4F\x50\x51\x52\x53\x54\x55\x56\x57\x58\x59\x5A\x5B\x5C\x5D\x5E\x5F\x60\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6A\x6B\x6C\x6D\x6E\x6F\x70\x71\x72\x73\x74\x75\x76\x77\x78\x79\x7A\x7B\x7C\x7D\x7E\x7F\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8A\x8B\x8C\x8D\x8E\x8F\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9A\x9B\x9C\x9D\x9E\x9F\xA0\xA1\xA2\xA3\xA4\xA5\xA6\xA7\xA8\xA9\xAA\xAB\xAC\xAD\xAE\xAF\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7\xB8\xB9\xBA\xBB\xBC\xBD\xBE\xBF\xC0\xC1\xC2\xC3\xC4\xC5\xC6\xC7\xC8\xC9\xCA\xCB\xCC\xCD\xCE\xCF\xD0\xD1\xD2\xD3\xD4\xD5\xD6\xD7\xD8\xD9\xDA\xDB\xDC\xDD\xDE\xDF\xE0\xE1\xE2\xE3\xE4\xE5\xE6\xE7\xE8\xE9\xEA\xEB\xEC\xED\xEE\xEF\xF0\xF1\xF2\xF3\xF4\xF5\xF6\xF7\xF8\xF9\xFA\xFB\xFC\xFD\xFE\xFF");
2111
2112 QTest::newRow("latin6") << QByteArray("ISO-8859-10")
2113 << QByteArray("\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F\x20\x21\x22\x23\x24\x25\x26\x27\x28\x29\x2A\x2B\x2C\x2D\x2E\x2F\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x3A\x3B\x3C\x3D\x3E\x3F\x40\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4A\x4B\x4C\x4D\x4E\x4F\x50\x51\x52\x53\x54\x55\x56\x57\x58\x59\x5A\x5B\x5C\x5D\x5E\x5F\x60\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6A\x6B\x6C\x6D\x6E\x6F\x70\x71\x72\x73\x74\x75\x76\x77\x78\x79\x7A\x7B\x7C\x7D\x7E\x7F\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8A\x8B\x8C\x8D\x8E\x8F\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9A\x9B\x9C\x9D\x9E\x9F\xA0\xA1\xA2\xA3\xA4\xA5\xA6\xA7\xA8\xA9\xAA\xAB\xAC\xAD\xAE\xAF\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7\xB8\xB9\xBA\xBB\xBC\xBD\xBE\xBF\xC0\xC1\xC2\xC3\xC4\xC5\xC6\xC7\xC8\xC9\xCA\xCB\xCC\xCD\xCE\xCF\xD0\xD1\xD2\xD3\xD4\xD5\xD6\xD7\xD8\xD9\xDA\xDB\xDC\xDD\xDE\xDF\xE0\xE1\xE2\xE3\xE4\xE5\xE6\xE7\xE8\xE9\xEA\xEB\xEC\xED\xEE\xEF\xF0\xF1\xF2\xF3\xF4\xF5\xF6\xF7\xF8\xF9\xFA\xFB\xFC\xFD\xFE\xFF");
2114
2115 #if 0
2116 QByteArray iso8859_11_ba;
2117 for (int x=0x20; x<=0x7f; ++x) {
2118 iso8859_11_ba.append(x);
2119 }
2120 for (int x=0xa0; x<0xff; ++x) {
2121 if ((x>=0xdb && x<0xdf) || x>0xfb){
2122 continue;
2123 }
2124 iso8859_11_ba.append(x);
2125 }
2126 QTest::newRow("latin-thai") << QByteArray("ISO-8859-11") << iso8859_11_ba;
2127 #endif
2128
2129 QTest::newRow("latin7") << QByteArray("ISO-8859-13")
2130 << QByteArray("\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F\x20\x21\x22\x23\x24\x25\x26\x27\x28\x29\x2A\x2B\x2C\x2D\x2E\x2F\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x3A\x3B\x3C\x3D\x3E\x3F\x40\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4A\x4B\x4C\x4D\x4E\x4F\x50\x51\x52\x53\x54\x55\x56\x57\x58\x59\x5A\x5B\x5C\x5D\x5E\x5F\x60\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6A\x6B\x6C\x6D\x6E\x6F\x70\x71\x72\x73\x74\x75\x76\x77\x78\x79\x7A\x7B\x7C\x7D\x7E\x7F\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8A\x8B\x8C\x8D\x8E\x8F\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9A\x9B\x9C\x9D\x9E\x9F\xA0\xA1\xA2\xA3\xA4\xA5\xA6\xA7\xA8\xA9\xAA\xAB\xAC\xAD\xAE\xAF\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7\xB8\xB9\xBA\xBB\xBC\xBD\xBE\xBF\xC0\xC1\xC2\xC3\xC4\xC5\xC6\xC7\xC8\xC9\xCA\xCB\xCC\xCD\xCE\xCF\xD0\xD1\xD2\xD3\xD4\xD5\xD6\xD7\xD8\xD9\xDA\xDB\xDC\xDD\xDE\xDF\xE0\xE1\xE2\xE3\xE4\xE5\xE6\xE7\xE8\xE9\xEA\xEB\xEC\xED\xEE\xEF\xF0\xF1\xF2\xF3\xF4\xF5\xF6\xF7\xF8\xF9\xFA\xFB\xFC\xFD\xFE\xFF");
2131
2132 QTest::newRow("celtic") << QByteArray("ISO-8859-14")
2133 << QByteArray("\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F\x20\x21\x22\x23\x24\x25\x26\x27\x28\x29\x2A\x2B\x2C\x2D\x2E\x2F\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x3A\x3B\x3C\x3D\x3E\x3F\x40\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4A\x4B\x4C\x4D\x4E\x4F\x50\x51\x52\x53\x54\x55\x56\x57\x58\x59\x5A\x5B\x5C\x5D\x5E\x5F\x60\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6A\x6B\x6C\x6D\x6E\x6F\x70\x71\x72\x73\x74\x75\x76\x77\x78\x79\x7A\x7B\x7C\x7D\x7E\x7F\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8A\x8B\x8C\x8D\x8E\x8F\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9A\x9B\x9C\x9D\x9E\x9F\xA0\xA1\xA2\xA3\xA4\xA5\xA6\xA7\xA8\xA9\xAA\xAB\xAC\xAD\xAE\xAF\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7\xB8\xB9\xBA\xBB\xBC\xBD\xBE\xBF\xC0\xC1\xC2\xC3\xC4\xC5\xC6\xC7\xC8\xC9\xCA\xCB\xCC\xCD\xCE\xCF\xD0\xD1\xD2\xD3\xD4\xD5\xD6\xD7\xD8\xD9\xDA\xDB\xDC\xDD\xDE\xDF\xE0\xE1\xE2\xE3\xE4\xE5\xE6\xE7\xE8\xE9\xEA\xEB\xEC\xED\xEE\xEF\xF0\xF1\xF2\xF3\xF4\xF5\xF6\xF7\xF8\xF9\xFA\xFB\xFC\xFD\xFE\xFF");
2134
2135 QTest::newRow("latin9") << QByteArray("ISO-8859-15")
2136 << QByteArray("\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F\x20\x21\x22\x23\x24\x25\x26\x27\x28\x29\x2A\x2B\x2C\x2D\x2E\x2F\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x3A\x3B\x3C\x3D\x3E\x3F\x40\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4A\x4B\x4C\x4D\x4E\x4F\x50\x51\x52\x53\x54\x55\x56\x57\x58\x59\x5A\x5B\x5C\x5D\x5E\x5F\x60\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6A\x6B\x6C\x6D\x6E\x6F\x70\x71\x72\x73\x74\x75\x76\x77\x78\x79\x7A\x7B\x7C\x7D\x7E\x7F\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8A\x8B\x8C\x8D\x8E\x8F\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9A\x9B\x9C\x9D\x9E\x9F\xA0\xA1\xA2\xA3\xA4\xA5\xA6\xA7\xA8\xA9\xAA\xAB\xAC\xAD\xAE\xAF\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7\xB8\xB9\xBA\xBB\xBC\xBD\xBE\xBF\xC0\xC1\xC2\xC3\xC4\xC5\xC6\xC7\xC8\xC9\xCA\xCB\xCC\xCD\xCE\xCF\xD0\xD1\xD2\xD3\xD4\xD5\xD6\xD7\xD8\xD9\xDA\xDB\xDC\xDD\xDE\xDF\xE0\xE1\xE2\xE3\xE4\xE5\xE6\xE7\xE8\xE9\xEA\xEB\xEC\xED\xEE\xEF\xF0\xF1\xF2\xF3\xF4\xF5\xF6\xF7\xF8\xF9\xFA\xFB\xFC\xFD\xFE\xFF");
2137
2138 QTest::newRow("latin10") << QByteArray("ISO-8859-16")
2139 << QByteArray("\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F\x20\x21\x22\x23\x24\x25\x26\x27\x28\x29\x2A\x2B\x2C\x2D\x2E\x2F\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x3A\x3B\x3C\x3D\x3E\x3F\x40\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4A\x4B\x4C\x4D\x4E\x4F\x50\x51\x52\x53\x54\x55\x56\x57\x58\x59\x5A\x5B\x5C\x5D\x5E\x5F\x60\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6A\x6B\x6C\x6D\x6E\x6F\x70\x71\x72\x73\x74\x75\x76\x77\x78\x79\x7A\x7B\x7C\x7D\x7E\x7F\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8A\x8B\x8C\x8D\x8E\x8F\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9A\x9B\x9C\x9D\x9E\x9F\xA0\xA1\xA2\xA3\xA4\xA5\xA6\xA7\xA8\xA9\xAA\xAB\xAC\xAD\xAE\xAF\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7\xB8\xB9\xBA\xBB\xBC\xBD\xBE\xBF\xC0\xC1\xC2\xC3\xC4\xC5\xC6\xC7\xC8\xC9\xCA\xCB\xCC\xCD\xCE\xCF\xD0\xD1\xD2\xD3\xD4\xD5\xD6\xD7\xD8\xD9\xDA\xDB\xDC\xDD\xDE\xDF\xE0\xE1\xE2\xE3\xE4\xE5\xE6\xE7\xE8\xE9\xEA\xEB\xEC\xED\xEE\xEF\xF0\xF1\xF2\xF3\xF4\xF5\xF6\xF7\xF8\xF9\xFA\xFB\xFC\xFD\xFE\xFF");
2140
2141 QTest::newRow("cp850") << QByteArray("CP850")
2142 << QByteArray("\x20\x21\x22\x23\x24\x25\x26\x27\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f\x40\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f\x50\x51\x52\x53\x54\x55\x56\x57\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f\x60\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f\x70\x71\x72\x73\x74\x75\x76\x77\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff");
2143
2144 QTest::newRow("cp874") << QByteArray("CP874")
2145 << QByteArray("\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F\x20\x21\x22\x23\x24\x25\x26\x27\x28\x29\x2A\x2B\x2C\x2D\x2E\x2F\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x3A\x3B\x3C\x3D\x3E\x3F\x40\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4A\x4B\x4C\x4D\x4E\x4F\x50\x51\x52\x53\x54\x55\x56\x57\x58\x59\x5A\x5B\x5C\x5D\x5E\x5F\x60\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6A\x6B\x6C\x6D\x6E\x6F\x70\x71\x72\x73\x74\x75\x76\x77\x78\x79\x7A\x7B\x7C\x7D\x7E\x7F\x80\x85\x91\x92\x93\x94\x95\x96\x97\xA0\xA1\xA2\xA3\xA4\xA5\xA6\xA7\xA8\xA9\xAA\xAB\xAC\xAD\xAE\xAF\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7\xB8\xB9\xBA\xBB\xBC\xBD\xBE\xBF\xC0\xC1\xC2\xC3\xC4\xC5\xC6\xC7\xC8\xC9\xCA\xCB\xCC\xCD\xCE\xCF\xD0\xD1\xD2\xD3\xD4\xD5\xD6\xD7\xD8\xD9\xDA\xDF\xE0\xE1\xE2\xE3\xE4\xE5\xE6\xE7\xE8\xE9\xEA\xEB\xEC\xED\xEE\xEF\xF0\xF1\xF2\xF3\xF4\xF5\xF6\xF7\xF8\xF9\xFA\xFB");
2146
2147 QTest::newRow("cp1250") << QByteArray("CP1250")
2148 << QByteArray("\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F\x20\x21\x22\x23\x24\x25\x26\x27\x28\x29\x2A\x2B\x2C\x2D\x2E\x2F\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x3A\x3B\x3C\x3D\x3E\x3F\x40\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4A\x4B\x4C\x4D\x4E\x4F\x50\x51\x52\x53\x54\x55\x56\x57\x58\x59\x5A\x5B\x5C\x5D\x5E\x5F\x60\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6A\x6B\x6C\x6D\x6E\x6F\x70\x71\x72\x73\x74\x75\x76\x77\x78\x79\x7A\x7B\x7C\x7D\x7E\x7F\x80\x82\x84\x85\x86\x87\x89\x8A\x8B\x8C\x8D\x8E\x8F\x91\x92\x93\x94\x95\x96\x97\x99\x9A\x9B\x9C\x9D\x9E\x9F\xA0\xA1\xA2\xA3\xA4\xA5\xA6\xA7\xA8\xA9\xAA\xAB\xAC\xAD\xAE\xAF\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7\xB8\xB9\xBA\xBB\xBC\xBD\xBE\xBF\xC0\xC1\xC2\xC3\xC4\xC5\xC6\xC7\xC8\xC9\xCA\xCB\xCC\xCD\xCE\xCF\xD0\xD1\xD2\xD3\xD4\xD5\xD6\xD7\xD8\xD9\xDA\xDB\xDC\xDD\xDE\xDF\xE0\xE1\xE2\xE3\xE4\xE5\xE6\xE7\xE8\xE9\xEA\xEB\xEC\xED\xEE\xEF\xF0\xF1\xF2\xF3\xF4\xF5\xF6\xF7\xF8\xF9\xFA\xFB\xFC\xFD\xFE\xFF");
2149
2150 QTest::newRow("cp1251") << QByteArray("CP1251")
2151 << QByteArray("\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F\x20\x21\x22\x23\x24\x25\x26\x27\x28\x29\x2A\x2B\x2C\x2D\x2E\x2F\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x3A\x3B\x3C\x3D\x3E\x3F\x40\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4A\x4B\x4C\x4D\x4E\x4F\x50\x51\x52\x53\x54\x55\x56\x57\x58\x59\x5A\x5B\x5C\x5D\x5E\x5F\x60\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6A\x6B\x6C\x6D\x6E\x6F\x70\x71\x72\x73\x74\x75\x76\x77\x78\x79\x7A\x7B\x7C\x7D\x7E\x7F\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8A\x8B\x8C\x8D\x8E\x8F\x90\x91\x92\x93\x94\x95\x96\x97\x99\x9A\x9B\x9C\x9D\x9E\x9F\xA0\xA1\xA2\xA3\xA4\xA5\xA6\xA7\xA8\xA9\xAA\xAB\xAC\xAD\xAE\xAF\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7\xB8\xB9\xBA\xBB\xBC\xBD\xBE\xBF\xC0\xC1\xC2\xC3\xC4\xC5\xC6\xC7\xC8\xC9\xCA\xCB\xCC\xCD\xCE\xCF\xD0\xD1\xD2\xD3\xD4\xD5\xD6\xD7\xD8\xD9\xDA\xDB\xDC\xDD\xDE\xDF\xE0\xE1\xE2\xE3\xE4\xE5\xE6\xE7\xE8\xE9\xEA\xEB\xEC\xED\xEE\xEF\xF0\xF1\xF2\xF3\xF4\xF5\xF6\xF7\xF8\xF9\xFA\xFB\xFC\xFD\xFE\xFF");
2152
2153 QTest::newRow("cp1252") << QByteArray("CP1252")
2154 << QByteArray("\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F\x20\x21\x22\x23\x24\x25\x26\x27\x28\x29\x2A\x2B\x2C\x2D\x2E\x2F\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x3A\x3B\x3C\x3D\x3E\x3F\x40\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4A\x4B\x4C\x4D\x4E\x4F\x50\x51\x52\x53\x54\x55\x56\x57\x58\x59\x5A\x5B\x5C\x5D\x5E\x5F\x60\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6A\x6B\x6C\x6D\x6E\x6F\x70\x71\x72\x73\x74\x75\x76\x77\x78\x79\x7A\x7B\x7C\x7D\x7E\x7F\x80\x82\x83\x84\x85\x86\x87\x88\x89\x8A\x8B\x8C\x8E\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9A\x9B\x9C\x9E\x9F\xA0\xA1\xA2\xA3\xA4\xA5\xA6\xA7\xA8\xA9\xAA\xAB\xAC\xAD\xAE\xAF\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7\xB8\xB9\xBA\xBB\xBC\xBD\xBE\xBF\xC0\xC1\xC2\xC3\xC4\xC5\xC6\xC7\xC8\xC9\xCA\xCB\xCC\xCD\xCE\xCF\xD0\xD1\xD2\xD3\xD4\xD5\xD6\xD7\xD8\xD9\xDA\xDB\xDC\xDD\xDE\xDF\xE0\xE1\xE2\xE3\xE4\xE5\xE6\xE7\xE8\xE9\xEA\xEB\xEC\xED\xEE\xEF\xF0\xF1\xF2\xF3\xF4\xF5\xF6\xF7\xF8\xF9\xFA\xFB\xFC\xFD\xFE\xFF");
2155
2156 QTest::newRow("cp1253") << QByteArray("CP1253")
2157 << QByteArray("\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F\x20\x21\x22\x23\x24\x25\x26\x27\x28\x29\x2A\x2B\x2C\x2D\x2E\x2F\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x3A\x3B\x3C\x3D\x3E\x3F\x40\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4A\x4B\x4C\x4D\x4E\x4F\x50\x51\x52\x53\x54\x55\x56\x57\x58\x59\x5A\x5B\x5C\x5D\x5E\x5F\x60\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6A\x6B\x6C\x6D\x6E\x6F\x70\x71\x72\x73\x74\x75\x76\x77\x78\x79\x7A\x7B\x7C\x7D\x7E\x7F\x80\x82\x83\x84\x85\x86\x87\x89\x8B\x91\x92\x93\x94\x95\x96\x97\x99\x9B\xA0\xA1\xA2\xA3\xA4\xA5\xA6\xA7\xA8\xA9\xAB\xAC\xAD\xAE\xAF\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7\xB8\xB9\xBA\xBB\xBC\xBD\xBE\xBF\xC0\xC1\xC2\xC3\xC4\xC5\xC6\xC7\xC8\xC9\xCA\xCB\xCC\xCD\xCE\xCF\xD0\xD1\xD3\xD4\xD5\xD6\xD7\xD8\xD9\xDA\xDB\xDC\xDD\xDE\xDF\xE0\xE1\xE2\xE3\xE4\xE5\xE6\xE7\xE8\xE9\xEA\xEB\xEC\xED\xEE\xEF\xF0\xF1\xF2\xF3\xF4\xF5\xF6\xF7\xF8\xF9\xFA\xFB\xFC\xFD\xFE");
2158
2159 QTest::newRow("cp1254") << QByteArray("CP1254")
2160 << QByteArray("\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F\x20\x21\x22\x23\x24\x25\x26\x27\x28\x29\x2A\x2B\x2C\x2D\x2E\x2F\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x3A\x3B\x3C\x3D\x3E\x3F\x40\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4A\x4B\x4C\x4D\x4E\x4F\x50\x51\x52\x53\x54\x55\x56\x57\x58\x59\x5A\x5B\x5C\x5D\x5E\x5F\x60\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6A\x6B\x6C\x6D\x6E\x6F\x70\x71\x72\x73\x74\x75\x76\x77\x78\x79\x7A\x7B\x7C\x7D\x7E\x7F\x80\x82\x83\x84\x85\x86\x87\x88\x89\x8A\x8B\x8C\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9A\x9B\x9C\x9F\xA0\xA1\xA2\xA3\xA4\xA5\xA6\xA7\xA8\xA9\xAA\xAB\xAC\xAD\xAE\xAF\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7\xB8\xB9\xBA\xBB\xBC\xBD\xBE\xBF\xC0\xC1\xC2\xC3\xC4\xC5\xC6\xC7\xC8\xC9\xCA\xCB\xCC\xCD\xCE\xCF\xD0\xD1\xD2\xD3\xD4\xD5\xD6\xD7\xD8\xD9\xDA\xDB\xDC\xDD\xDE\xDF\xE0\xE1\xE2\xE3\xE4\xE5\xE6\xE7\xE8\xE9\xEA\xEB\xEC\xED\xEE\xEF\xF0\xF1\xF2\xF3\xF4\xF5\xF6\xF7\xF8\xF9\xFA\xFB\xFC\xFD\xFE\xFF");
2161
2162 QTest::newRow("cp1255") << QByteArray("CP1255")
2163 << QByteArray("\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F\x20\x21\x22\x23\x24\x25\x26\x27\x28\x29\x2A\x2B\x2C\x2D\x2E\x2F\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x3A\x3B\x3C\x3D\x3E\x3F\x40\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4A\x4B\x4C\x4D\x4E\x4F\x50\x51\x52\x53\x54\x55\x56\x57\x58\x59\x5A\x5B\x5C\x5D\x5E\x5F\x60\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6A\x6B\x6C\x6D\x6E\x6F\x70\x71\x72\x73\x74\x75\x76\x77\x78\x79\x7A\x7B\x7C\x7D\x7E\x7F\x80\x82\x83\x84\x85\x86\x87\x88\x89,x8B\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9B\xA0\xA1\xA2\xA3\xA4\xA5\xA6\xA7\xA8\xA9\xAA\xAB\xAC\xAD\xAE\xAF\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7\xB8\xB9\xBA\xBB\xBC\xBD\xBE\xBF\xC0\xC1\xC2\xC3\xC4\xC5\xC6\xC7\xC8\xC9\xCB\xCC\xCD\xCE\xCF\xD0\xD1\xD2\xD3\xD4\xD5\xD6\xD7\xD8\xE0\xE1\xE2\xE3\xE4\xE5\xE6\xE7\xE8\xE9\xEA\xEB\xEC\xED\xEE\xEF\xF0\xF1\xF2\xF3\xF4\xF5\xF6\xF7\xF8\xF9\xFA\xFD\xFE");
2164
2165 QTest::newRow("cp1256") << QByteArray("CP1256")
2166 << QByteArray("\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F\x20\x21\x22\x23\x24\x25\x26\x27\x28\x29\x2A\x2B\x2C\x2D\x2E\x2F\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x3A\x3B\x3C\x3D\x3E\x3F\x40\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4A\x4B\x4C\x4D\x4E\x4F\x50\x51\x52\x53\x54\x55\x56\x57\x58\x59\x5A\x5B\x5C\x5D\x5E\x5F\x60\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6A\x6B\x6C\x6D\x6E\x6F\x70\x71\x72\x73\x74\x75\x76\x77\x78\x79\x7A\x7B\x7C\x7D\x7E\x7F\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8A\x8B\x8C\x8D\x8E\x8F\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9A\x9B\x9C\x9D\x9E\x9F\xA0\xA1\xA2\xA3\xA4\xA5\xA6\xA7\xA8\xA9\xAA\xAB\xAC\xAD\xAE\xAF\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7\xB8\xB9\xBA\xBB\xBC\xBD\xBE\xBF\xC0\xC1\xC2\xC3\xC4\xC5\xC6\xC7\xC8\xC9\xCA\xCB\xCC\xCD\xCE\xCF\xD0\xD1\xD2\xD3\xD4\xD5\xD6\xD7\xD8\xD9\xDA\xDB\xDC\xDD\xDE\xDF\xE0\xE1\xE2\xE3\xE4\xE5\xE6\xE7\xE8\xE9\xEA\xEB\xEC\xED\xEE\xEF\xF0\xF1\xF2\xF3\xF4\xF5\xF6\xF7\xF8\xF9\xFA\xFB\xFC\xFD\xFE\xFF");
2167
2168 QTest::newRow("cp1257") << QByteArray("CP1257")
2169 << QByteArray("\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F\x20\x21\x22\x23\x24\x25\x26\x27\x28\x29\x2A\x2B\x2C\x2D\x2E\x2F\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x3A\x3B\x3C\x3D\x3E\x3F\x40\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4A\x4B\x4C\x4D\x4E\x4F\x50\x51\x52\x53\x54\x55\x56\x57\x58\x59\x5A\x5B\x5C\x5D\x5E\x5F\x60\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6A\x6B\x6C\x6D\x6E\x6F\x70\x71\x72\x73\x74\x75\x76\x77\x78\x79\x7A\x7B\x7C\x7D\x7E\x7F\x80\x82\x84\x85\x86\x87\x89\x8B\x8D\x8E\x8F\x91\x92\x93\x94\x95\x96\x97\x99\x9B\x9D\x9E\xA0\xA2\xA3\xA4\xA6\xA7\xA8\xA9\xAA\xAB\xAC\xAD\xAE\xAF\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7\xB8\xB9\xBA\xBB\xBC\xBD\xBE\xBF\xC0\xC1\xC2\xC3\xC4\xC5\xC6\xC7\xC8\xC9\xCA\xCB\xCC\xCD\xCE\xCF\xD0\xD1\xD2\xD3\xD4\xD5\xD6\xD7\xD8\xD9\xDA\xDB\xDC\xDD\xDE\xDF\xE0\xE1\xE2\xE3\xE4\xE5\xE6\xE7\xE8\xE9\xEA\xEB\xEC\xED\xEE\xEF\xF0\xF1\xF2\xF3\xF4\xF5\xF6\xF7\xF8\xF9\xFA\xFB\xFC\xFD\xFE\xFF");
2170
2171 QTest::newRow("cp1258") << QByteArray("CP1258")
2172 << QByteArray("\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F\x20\x21\x22\x23\x24\x25\x26\x27\x28\x29\x2A\x2B\x2C\x2D\x2E\x2F\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x3A\x3B\x3C\x3D\x3E\x3F\x40\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4A\x4B\x4C\x4D\x4E\x4F\x50\x51\x52\x53\x54\x55\x56\x57\x58\x59\x5A\x5B\x5C\x5D\x5E\x5F\x60\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6A\x6B\x6C\x6D\x6E\x6F\x70\x71\x72\x73\x74\x75\x76\x77\x78\x79\x7A\x7B\x7C\x7D\x7E\x7F\x80\x82\x83\x84\x85\x86\x87\x88\x89\x8B\x8C\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9B\x9C\x9F\xA0\xA1\xA2\xA3\xA4\xA5\xA6\xA7\xA8\xA9\xAA\xAB\xAC\xAD\xAE\xAF\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7\xB8\xB9\xBA\xBB\xBC\xBD\xBE\xBF\xC0\xC1\xC2\xC3\xC4\xC5\xC6\xC7\xC8\xC9\xCA\xCB\xCC\xCD\xCE\xCF\xD0\xD1\xD2\xD3\xD4\xD5\xD6\xD7\xD8\xD9\xDA\xDB\xDC\xDD\xDE\xDF\xE0\xE1\xE2\xE3\xE4\xE5\xE6\xE7\xE8\xE9\xEA\xEB\xEC\xED\xEE\xEF\xF0\xF1\xF2\xF3\xF4\xF5\xF6\xF7\xF8\xF9\xFA\xFB\xFC\xFD\xFE\xFF");
2173
2174 QByteArray koi8_r_ba;
2175 for (int x=0x20; x<=0xff; ++x) {
2176 if (x!=0x9A && x!=0xbf) {
2177 koi8_r_ba.append(x);
2178 }
2179 }
2180 QTest::newRow("KOI8-R") << QByteArray("KOI8-R") << koi8_r_ba;
2181
2182 QByteArray koi8_u_ba;
2183 for (int x=0x20; x<=0xff; ++x) {
2184 koi8_u_ba.append(x);
2185 }
2186 QTest::newRow("KOI8-U") << QByteArray("KOI8-U") << koi8_u_ba;
2187
2188
2189 QByteArray big5_ba;
2190 for (unsigned char u=0xa1; u<=0xf9; u++) {
2191 if (u==0xc8) {
2192 continue;
2193 }
2194 for (unsigned char v=0x40; v<=0x7e; v++) {
2195 big5_ba.append(u);
2196 big5_ba.append(v);
2197 }
2198 unsigned char v_up;
2199 switch (u) {
2200 case 0xa3: v_up=0xbf; break;
2201 case 0xc7: v_up=0xfc; break;
2202 case 0xf9: v_up=0xd5; break;
2203 default: v_up=0xfe;
2204 }
2205
2206 for (unsigned char v=0xa1; v<=v_up; v++) {
2207 if (u==0xa2 && (v==0xcc || v==0xce)) {
2208 continue;
2209 }
2210 big5_ba.append(u);
2211 big5_ba.append(v);
2212 }
2213 }
2214
2215 QTest::newRow("BIG5") << QByteArray("BIG5") << big5_ba;
2216
2217 QByteArray gb2312_ba;
2218 for (unsigned char u=0xa1; u<=0xf7; u++) {
2219 for (unsigned char v=0xa1; v<=0xfe; v++) {
2220 gb2312_ba.append(u);
2221 gb2312_ba.append(v);
2222 }
2223 }
2224
2225 QTest::newRow("GB2312") << QByteArray("GB2312") << gb2312_ba;
2226 }
2227
moreToFromUnicode()2228 void tst_QTextCodec::moreToFromUnicode()
2229 {
2230 QFETCH( QByteArray, codecName );
2231 QFETCH( QByteArray, testData );
2232
2233 QTextCodec *c = QTextCodec::codecForName( codecName.data() );
2234 QVERIFY(c);
2235
2236 QString uStr = c->toUnicode(testData);
2237 QByteArray cStr = c->fromUnicode(uStr);
2238 QCOMPARE(testData, cStr);
2239 }
2240
shiftJis()2241 void tst_QTextCodec::shiftJis()
2242 {
2243 QByteArray backslashTilde("\\~");
2244 QTextCodec* codec = QTextCodec::codecForName("shift_jis");
2245 QString string = codec->toUnicode(backslashTilde);
2246 QCOMPARE(string.length(), 2);
2247 QCOMPARE(string.at(0), QChar(QLatin1Char('\\')));
2248 QCOMPARE(string.at(1), QChar(QLatin1Char('~')));
2249
2250 QByteArray encoded = codec->fromUnicode(string);
2251 QCOMPARE(encoded, backslashTilde);
2252 }
2253
2254 struct DontCrashAtExit {
~DontCrashAtExitDontCrashAtExit2255 ~DontCrashAtExit() {
2256 QTextCodec *c = QTextCodec::codecForName("utf8");
2257 if (c)
2258 c->toUnicode("azerty");
2259
2260 }
2261 } dontCrashAtExit;
2262
2263
2264 QTEST_MAIN(tst_QTextCodec)
2265 #include "tst_qtextcodec.moc"
2266