1 /****************************************************************************
2 **
3 ** Copyright (C) 2016 The Qt Company Ltd.
4 ** Contact: https://www.qt.io/licensing/
5 **
6 ** This file is part of the QtCore module of the Qt Toolkit.
7 **
8 ** $QT_BEGIN_LICENSE:LGPL$
9 ** Commercial License Usage
10 ** Licensees holding valid commercial Qt licenses may use this file in
11 ** accordance with the commercial license agreement provided with the
12 ** Software or, alternatively, in accordance with the terms contained in
13 ** a written agreement between you and The Qt Company. For licensing terms
14 ** and conditions see https://www.qt.io/terms-conditions. For further
15 ** information use the contact form at https://www.qt.io/contact-us.
16 **
17 ** GNU Lesser General Public License Usage
18 ** Alternatively, this file may be used under the terms of the GNU Lesser
19 ** General Public License version 3 as published by the Free Software
20 ** Foundation and appearing in the file LICENSE.LGPL3 included in the
21 ** packaging of this file. Please review the following information to
22 ** ensure the GNU Lesser General Public License version 3 requirements
23 ** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
24 **
25 ** GNU General Public License Usage
26 ** Alternatively, this file may be used under the terms of the GNU
27 ** General Public License version 2.0 or (at your option) the GNU General
28 ** Public license version 3 or any later version approved by the KDE Free
29 ** Qt Foundation. The licenses are as published by the Free Software
30 ** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
31 ** included in the packaging of this file. Please review the following
32 ** information to ensure the GNU General Public License requirements will
33 ** be met: https://www.gnu.org/licenses/gpl-2.0.html and
34 ** https://www.gnu.org/licenses/gpl-3.0.html.
35 **
36 ** $QT_END_LICENSE$
37 **
38 ****************************************************************************/
39 
40 // Most of the code here was originally written by Serika Kurusugawa
41 // a.k.a. Junji Takagi, and is included in Qt with the author's permission,
42 // and the grateful thanks of the Qt team.
43 
44 /*! \class QEucJpCodec
45     \inmodule QtCore
46     \reentrant
47     \internal
48 */
49 
50 /*
51  * Copyright (C) 1999 Serika Kurusugawa, All rights reserved.
52  *
53  * Redistribution and use in source and binary forms, with or without
54  * modification, are permitted provided that the following conditions
55  * are met:
56  * 1. Redistributions of source code must retain the above copyright
57  *    notice, this list of conditions and the following disclaimer.
58  * 2. Redistributions in binary form must reproduce the above copyright
59  *    notice, this list of conditions and the following disclaimer in the
60  *    documentation and/or other materials provided with the distribution.
61  *
62  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
63  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
64  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
65  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
66  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
67  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
68  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
69  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
70  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
71  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
72  * SUCH DAMAGE.
73  */
74 
75 #include "qeucjpcodec_p.h"
76 
77 QT_BEGIN_NAMESPACE
78 
79 static const uchar Ss2 = 0x8e;        // Single Shift 2
80 static const uchar Ss3 = 0x8f;        // Single Shift 3
81 
82 #define        IsKana(c)        (((c) >= 0xa1) && ((c) <= 0xdf))
83 #define        IsEucChar(c)        (((c) >= 0xa1) && ((c) <= 0xfe))
84 
85 #define        QValidChar(u)        ((u) ? QChar((ushort)(u)) : QChar(QChar::ReplacementCharacter))
86 
87 /*!
88   Constructs a QEucJpCodec.
89 */
QEucJpCodec()90 QEucJpCodec::QEucJpCodec() : conv(QJpUnicodeConv::newConverter(QJpUnicodeConv::Default))
91 {
92 }
93 
94 /*!
95   Destroys the codec.
96 */
~QEucJpCodec()97 QEucJpCodec::~QEucJpCodec()
98 {
99     delete (const QJpUnicodeConv*)conv;
100     conv = 0;
101 }
102 
convertFromUnicode(const QChar * uc,int len,ConverterState * state) const103 QByteArray QEucJpCodec::convertFromUnicode(const QChar *uc, int len, ConverterState *state) const
104 {
105     char replacement = '?';
106     if (state) {
107         if (state->flags & ConvertInvalidToNull)
108             replacement = 0;
109     }
110     int invalid = 0;
111 
112     int rlen = 3*len + 1;
113     QByteArray rstr;
114     rstr.resize(rlen);
115     uchar* cursor = (uchar*)rstr.data();
116     for (int i = 0; i < len; i++) {
117         QChar ch = uc[i];
118         uint j;
119         if (ch.unicode() < 0x80) {
120             // ASCII
121             *cursor++ = ch.cell();
122         } else if ((j = conv->unicodeToJisx0201(ch.row(), ch.cell())) != 0) {
123             if (j < 0x80) {
124                 // JIS X 0201 Latin ?
125                 *cursor++ = j;
126             } else {
127                 // JIS X 0201 Kana
128                 *cursor++ = Ss2;
129                 *cursor++ = j;
130             }
131         } else if ((j = conv->unicodeToJisx0208(ch.row(), ch.cell())) != 0) {
132             // JIS X 0208
133             *cursor++ = (j >> 8)   | 0x80;
134             *cursor++ = (j & 0xff) | 0x80;
135         } else if ((j = conv->unicodeToJisx0212(ch.row(), ch.cell())) != 0) {
136             // JIS X 0212
137             *cursor++ = Ss3;
138             *cursor++ = (j >> 8)   | 0x80;
139             *cursor++ = (j & 0xff) | 0x80;
140         } else {
141             // Error
142             *cursor++ = replacement;
143             ++invalid;
144         }
145     }
146     rstr.resize(cursor - (const uchar*)rstr.constData());
147 
148     if (state) {
149         state->invalidChars += invalid;
150     }
151     return rstr;
152 }
153 
154 
convertToUnicode(const char * chars,int len,ConverterState * state) const155 QString QEucJpCodec::convertToUnicode(const char* chars, int len, ConverterState *state) const
156 {
157     uchar buf[2] = {0, 0};
158     int nbuf = 0;
159     QChar replacement = QChar::ReplacementCharacter;
160     if (state) {
161         if (state->flags & ConvertInvalidToNull)
162             replacement = QChar::Null;
163         nbuf = state->remainingChars;
164         buf[0] = state->state_data[0];
165         buf[1] = state->state_data[1];
166     }
167     int invalid = 0;
168 
169     QString result;
170     for (int i=0; i<len; i++) {
171         uchar ch = chars[i];
172         switch (nbuf) {
173         case 0:
174             if (ch < 0x80) {
175                 // ASCII
176                 result += QLatin1Char(ch);
177             } else if (ch == Ss2 || ch == Ss3) {
178                 // JIS X 0201 Kana or JIS X 0212
179                 buf[0] = ch;
180                 nbuf = 1;
181             } else if (IsEucChar(ch)) {
182                 // JIS X 0208
183                 buf[0] = ch;
184                 nbuf = 1;
185             } else {
186                 // Invalid
187                 result += replacement;
188                 ++invalid;
189             }
190             break;
191         case 1:
192             if (buf[0] == Ss2) {
193                 // JIS X 0201 Kana
194                 if (IsKana(ch)) {
195                     uint u = conv->jisx0201ToUnicode(ch);
196                     result += QValidChar(u);
197                 } else {
198                     result += replacement;
199                     ++invalid;
200                 }
201                 nbuf = 0;
202             } else if (buf[0] == Ss3) {
203                 // JIS X 0212-1990
204                 if (IsEucChar(ch)) {
205                     buf[1] = ch;
206                     nbuf = 2;
207                 } else {
208                     // Error
209                     result += replacement;
210                     ++invalid;
211                     nbuf = 0;
212                 }
213             } else {
214                 // JIS X 0208-1990
215                 if (IsEucChar(ch)) {
216                     uint u = conv->jisx0208ToUnicode(buf[0] & 0x7f, ch & 0x7f);
217                     result += QValidChar(u);
218                 } else {
219                     // Error
220                     result += replacement;
221                     ++invalid;
222                 }
223                 nbuf = 0;
224             }
225             break;
226         case 2:
227             // JIS X 0212
228             if (IsEucChar(ch)) {
229                 uint u = conv->jisx0212ToUnicode(buf[1] & 0x7f, ch & 0x7f);
230                 result += QValidChar(u);
231             } else {
232                 result += replacement;
233                 ++invalid;
234             }
235             nbuf = 0;
236         }
237     }
238     if (state) {
239         state->remainingChars = nbuf;
240         state->state_data[0] = buf[0];
241         state->state_data[1] = buf[1];
242         state->invalidChars += invalid;
243     }
244     return result;
245 }
246 
_mibEnum()247 int QEucJpCodec::_mibEnum()
248 {
249     return 18;
250 }
251 
_name()252 QByteArray QEucJpCodec::_name()
253 {
254     return "EUC-JP";
255 }
256 
257 QT_END_NAMESPACE
258