1 /****************************************************************************
2 **
3 ** Copyright (C) 2016 The Qt Company Ltd.
4 ** Contact: https://www.qt.io/licensing/
5 **
6 ** This file is part of the QtCore module of the Qt Toolkit.
7 **
8 ** $QT_BEGIN_LICENSE:LGPL$
9 ** Commercial License Usage
10 ** Licensees holding valid commercial Qt licenses may use this file in
11 ** accordance with the commercial license agreement provided with the
12 ** Software or, alternatively, in accordance with the terms contained in
13 ** a written agreement between you and The Qt Company. For licensing terms
14 ** and conditions see https://www.qt.io/terms-conditions. For further
15 ** information use the contact form at https://www.qt.io/contact-us.
16 **
17 ** GNU Lesser General Public License Usage
18 ** Alternatively, this file may be used under the terms of the GNU Lesser
19 ** General Public License version 3 as published by the Free Software
20 ** Foundation and appearing in the file LICENSE.LGPL3 included in the
21 ** packaging of this file. Please review the following information to
22 ** ensure the GNU Lesser General Public License version 3 requirements
23 ** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
24 **
25 ** GNU General Public License Usage
26 ** Alternatively, this file may be used under the terms of the GNU
27 ** General Public License version 2.0 or (at your option) the GNU General
28 ** Public license version 3 or any later version approved by the KDE Free
29 ** Qt Foundation. The licenses are as published by the Free Software
30 ** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
31 ** included in the packaging of this file. Please review the following
32 ** information to ensure the GNU General Public License requirements will
33 ** be met: https://www.gnu.org/licenses/gpl-2.0.html and
34 ** https://www.gnu.org/licenses/gpl-3.0.html.
35 **
36 ** $QT_END_LICENSE$
37 **
38 ****************************************************************************/
39 #include "qisciicodec_p.h"
40 #include "qtextcodec_p.h"
41 #include "qlist.h"
42 
43 QT_BEGIN_NAMESPACE
44 
45 /*!
46     \class QIsciiCodec
47     \inmodule QtCore
48     \brief The QIsciiCodec class provides conversion to and from the ISCII encoding.
49 
50     \internal
51 */
52 
53 
54 struct Codecs {
55     const char name[10];
56     ushort base;
57 };
58 
59 static const Codecs codecs [] = {
60     { "iscii-dev", 0x900 },
61     { "iscii-bng", 0x980 },
62     { "iscii-pnj", 0xa00 },
63     { "iscii-gjr", 0xa80 },
64     { "iscii-ori", 0xb00 },
65     { "iscii-tml", 0xb80 },
66     { "iscii-tlg", 0xc00 },
67     { "iscii-knd", 0xc80 },
68     { "iscii-mlm", 0xd00 }
69 };
70 
create(const char * name)71 QTextCodec *QIsciiCodec::create(const char *name)
72 {
73     for (int i = 0; i < 9; ++i) {
74         if (qTextCodecNameMatch(name, codecs[i].name))
75             return new QIsciiCodec(i);
76     }
77     return nullptr;
78 }
79 
~QIsciiCodec()80 QIsciiCodec::~QIsciiCodec()
81 {
82 }
83 
name() const84 QByteArray QIsciiCodec::name() const
85 {
86   return codecs[idx].name;
87 }
88 
mibEnum() const89 int QIsciiCodec::mibEnum() const
90 {
91     /* There is no MIBEnum for Iscii */
92     return -3000-idx;
93 }
94 
95 static const uchar inv = 0xFF;
96 
97 /* iscii range from 0xa0 - 0xff */
98 static const uchar iscii_to_uni_table[0x60] = {
99     0x00, 0x01, 0x02, 0x03,
100     0x05, 0x06, 0x07, 0x08,
101     0x09, 0x0a, 0x0b, 0x0e,
102     0x0f, 0x20, 0x0d, 0x12,
103 
104     0x13, 0x14, 0x11, 0x15,
105     0x16, 0x17, 0x18, 0x19,
106     0x1a, 0x1b, 0x1c, 0x1d,
107     0x1e, 0x1f, 0x20, 0x21,
108 
109     0x22, 0x23, 0x24, 0x25,
110     0x26, 0x27, 0x28, 0x29,
111     0x2a, 0x2b, 0x2c, 0x2d,
112     0x2e, 0x2f, 0x5f, 0x30,
113 
114     0x31, 0x32, 0x33, 0x34,
115     0x35, 0x36, 0x37, 0x38,
116     0x39,  inv, 0x3e, 0x3f,
117     0x40, 0x41, 0x42, 0x43,
118 
119     0x46, 0x47, 0x48, 0x45,
120     0x4a, 0x4b, 0x4c, 0x49,
121     0x4d, 0x3c, 0x64, 0x00,
122     0x00, 0x00, 0x00, 0x00,
123 
124     0x00, 0x66, 0x67, 0x68,
125     0x69, 0x6a, 0x6b, 0x6c,
126     0x6d, 0x6e, 0x6f, 0x00,
127     0x00, 0x00, 0x00, 0x00
128 };
129 
130 static const uchar uni_to_iscii_table[0x80] = {
131     0x00, 0xa1, 0xa2, 0xa3,
132     0x00, 0xa4, 0xa5, 0xa6,
133     0xa7, 0xa8, 0xa9, 0xaa,
134     0x00, 0xae, 0xab, 0xac,
135 
136     0xad, 0xb2, 0xaf, 0xb0,
137     0xb1, 0xb3, 0xb4, 0xb5,
138     0xb6, 0xb7, 0xb8, 0xb9,
139     0xba, 0xbb, 0xbc, 0xbd,
140 
141     0xbe, 0xbf, 0xc0, 0xc1,
142     0xc2, 0xc3, 0xc4, 0xc5,
143     0xc6, 0xc7, 0xc8, 0xc9,
144     0xca, 0xcb, 0xcc, 0xcd,
145 
146     0xcf, 0xd0, 0xd1, 0xd2,
147     0xd3, 0xd4, 0xd5, 0xd6,
148     0xd7, 0xd8, 0x00, 0x00,
149     0xe9, 0x00, 0xda, 0xdb,
150 
151     0xdc, 0xdd, 0xde, 0xdf,
152     0x00, 0xe3, 0xe0, 0xe1,
153     0xe2, 0xe7, 0xe4, 0xe5,
154     0xe6, 0xe8, 0x00, 0x00,
155 
156     0x00, 0x00, 0x00, 0x00,
157     0x00, 0x00, 0x00, 0x00,
158     0x01, 0x02, 0x03, 0x04, // decomposable into the uc codes listed here + nukta
159     0x05, 0x06, 0x07, 0xce,
160 
161     0x00, 0x00, 0x00, 0x00,
162     0xea, 0x08, 0xf1, 0xf2,
163     0xf3, 0xf4, 0xf5, 0xf6,
164     0xf7, 0xf8, 0xf9, 0xfa,
165 
166     0x00, 0x00, 0x00, 0x00,
167     0x00, 0x00, 0x00, 0x00,
168     0x00, 0x00, 0x00, 0x00,
169     0x00, 0x00, 0x00, 0x00
170 };
171 
172 static const uchar uni_to_iscii_pairs[] = {
173     0x00, 0x00,
174     0x15, 0x3c, // 0x958
175     0x16, 0x3c, // 0x959
176     0x17, 0x3c, // 0x95a
177     0x1c, 0x3c, // 0x95b
178     0x21, 0x3c, // 0x95c
179     0x22, 0x3c, // 0x95d
180     0x2b, 0x3c, // 0x95e
181     0x64, 0x64  // 0x965
182 };
183 
184 
convertFromUnicode(const QChar * uc,int len,ConverterState * state) const185 QByteArray QIsciiCodec::convertFromUnicode(const QChar *uc, int len, ConverterState *state) const
186 {
187     char replacement = '?';
188     bool halant = false;
189     if (state) {
190         if (state->flags & ConvertInvalidToNull)
191             replacement = 0;
192         halant = state->state_data[0];
193     }
194     int invalid = 0;
195 
196     QByteArray result(2 * len, Qt::Uninitialized); //worst case
197 
198     uchar *ch = reinterpret_cast<uchar *>(result.data());
199 
200     const int base = codecs[idx].base;
201 
202     for (int i =0; i < len; ++i) {
203         const ushort codePoint = uc[i].unicode();
204 
205         /* The low 7 bits of ISCII is plain ASCII. However, we go all the
206          * way up to 0xA0 such that we can roundtrip with convertToUnicode()'s
207          * behavior. */
208         if(codePoint < 0xA0) {
209             *ch++ = static_cast<uchar>(codePoint);
210             continue;
211         }
212 
213         const int pos = codePoint - base;
214         if (pos > 0 && pos < 0x80) {
215             uchar iscii = uni_to_iscii_table[pos];
216             if (iscii > 0x80) {
217                 *ch++ = iscii;
218             } else if (iscii) {
219                 Q_ASSERT((2 * iscii) < (sizeof(uni_to_iscii_pairs) / sizeof(uni_to_iscii_pairs[0])));
220                 const uchar *pair = uni_to_iscii_pairs + 2*iscii;
221                 *ch++ = *pair++;
222                 *ch++ = *pair++;
223             } else {
224                 *ch++ = replacement;
225                 ++invalid;
226             }
227         } else {
228             if (uc[i].unicode() == 0x200c) { // ZWNJ
229                 if (halant)
230                     // Consonant Halant ZWNJ -> Consonant Halant Halant
231                     *ch++ = 0xe8;
232             } else if (uc[i].unicode() == 0x200d) { // ZWJ
233                 if (halant)
234                     // Consonant Halant ZWJ -> Consonant Halant Nukta
235                     *ch++ = 0xe9;
236             } else {
237                 *ch++ = replacement;
238                 ++invalid;
239             }
240         }
241         halant = (pos == 0x4d);
242     }
243     result.truncate(ch - (uchar *)result.data());
244 
245     if (state) {
246         state->invalidChars += invalid;
247         state->state_data[0] = halant;
248     }
249     return result;
250 }
251 
convertToUnicode(const char * chars,int len,ConverterState * state) const252 QString QIsciiCodec::convertToUnicode(const char* chars, int len, ConverterState *state) const
253 {
254     bool halant = false;
255     if (state) {
256         halant = state->state_data[0];
257     }
258 
259     QString result(len, Qt::Uninitialized);
260     QChar *uc = result.data();
261 
262     const int base = codecs[idx].base;
263 
264     for (int i = 0; i < len; ++i) {
265         ushort ch = (uchar) chars[i];
266         if (ch < 0xa0)
267             *uc++ = ch;
268         else {
269             ushort c = iscii_to_uni_table[ch - 0xa0];
270             if (halant && (c == inv || c == 0xe9)) {
271                 // Consonant Halant inv -> Consonant Halant ZWJ
272                 // Consonant Halant Nukta -> Consonant Halant ZWJ
273                 *uc++ = QChar(0x200d);
274             } else if (halant && c == 0xe8) {
275                 // Consonant Halant Halant -> Consonant Halant ZWNJ
276                 *uc++ = QChar(0x200c);
277             } else {
278                 *uc++ = QChar(c+base);
279             }
280         }
281         halant = ((uchar)chars[i] == 0xe8);
282     }
283     result.resize(uc - result.unicode());
284 
285     if (state) {
286         state->state_data[0] = halant;
287     }
288     return result;
289 }
290 
291 QT_END_NAMESPACE
292