1 /*
2     This file is part of the Okteta Kasten Framework, made within the KDE community.
3 
4     SPDX-FileCopyrightText: 2011 Alex Richardson <alex.richardson@gmx.de>
5 
6     SPDX-License-Identifier: LGPL-2.1-only OR LGPL-3.0-only OR LicenseRef-KDE-Accepted-LGPL
7 */
8 
9 #include "utf16stringdata.hpp"
10 
11 #include "../../structlogging.hpp"
12 
13 #include <QVarLengthArray>
14 
15 #include <KLocalizedString>
16 
17 #include <Okteta/AbstractByteArrayModel>
18 
19 #include "../topleveldatainformation.hpp"
20 #include "../dummydatainformation.hpp"
21 #include "stringdatainformation.hpp"
22 
Utf16StringData(StringDataInformation * parent)23 Utf16StringData::Utf16StringData(StringDataInformation* parent)
24     : StringData(parent)
25 {
26 }
27 
28 Utf16StringData::~Utf16StringData() = default;
29 
charType() const30 QString Utf16StringData::charType() const
31 {
32     return mLittleEndian ? i18n("UTF16-LE char") : i18n("UTF16-BE char");
33 }
34 
typeName() const35 QString Utf16StringData::typeName() const
36 {
37     return mLittleEndian ? i18n("UTF16-LE string") : i18n("UTF16-BE string");
38 }
39 
count() const40 uint Utf16StringData::count() const
41 {
42     return mCodePoints.size();
43 }
44 
stringValue(int row) const45 QString Utf16StringData::stringValue(int row) const
46 {
47     // TODO details
48     Q_ASSERT((uint)row < count());
49     // TODO show invalid values
50     uint val = mCodePoints.at(row);
51     QString number = QString::number(val, 16).toUpper();
52     if (number.length() == 1) {
53         number = QLatin1Char('0') + number;
54     }
55     if (val > UNICODE_MAX) {
56         return i18n("Value too big: 0x%1", number);
57     }
58     if (val > BMP_MAX) {
59         QString ret(2, Qt::Uninitialized);
60         ret[0] = QChar::highSurrogate(val);
61         ret[1] = QChar::lowSurrogate(val);
62         return i18n("%1 (U+%2)", ret, number);
63     }
64 
65     return i18n("%1 (U+%2)", QString(QChar(mCodePoints.at(row))), number);
66 }
67 
completeString(bool skipInvalid) const68 QString Utf16StringData::completeString(bool skipInvalid) const
69 {
70     QVarLengthArray<QChar> data(mCodePoints.size() + mNonBMPCount);
71     int codePointCount = mCodePoints.size();
72     int i = 0;
73     for (int idx = 0; idx < codePointCount; ++idx) {
74         uint val = mCodePoints.at(idx);
75         if (val > UNICODE_MAX) {
76             if (skipInvalid) {
77                 continue;
78             }
79 
80             data[i] = QChar::ReplacementCharacter;
81         } else if (val > BMP_MAX) {
82             data[i] = QChar::highSurrogate(val);
83             i++;
84             data[i] = QChar::lowSurrogate(val);
85         } else {
86             data[i] = QChar((ushort)val);
87         }
88         i++;
89     }
90 
91     return QString(data.constData(), i);
92 }
93 
read(Okteta::AbstractByteArrayModel * input,Okteta::Address address,BitCount64 bitsRemaining)94 qint64 Utf16StringData::read(Okteta::AbstractByteArrayModel* input, Okteta::Address address,
95                              BitCount64 bitsRemaining)
96 {
97     const int oldSize = count();
98     mNonBMPCount = 0;
99     if (mMode == CharCount) {
100         mCodePoints.reserve(mLength.maxChars);
101     } else if (mMode == ByteCount) {
102         mCodePoints.reserve(mLength.maxBytes / 2);
103     }
104 
105     mParent->topLevelDataInformation()->_childCountAboutToChange(mParent, oldSize, 0);
106     mParent->topLevelDataInformation()->_childCountChanged(mParent, oldSize, 0);
107 
108     const uint oldMax = mCodePoints.size();
109     quint64 remaining = bitsRemaining;
110     Okteta::Address addr = address;
111     uint count = 0;
112     mEofReached = false;
113     if (((mMode & CharCount) && mLength.maxChars == 0)
114         || ((mMode & ByteCount) && mLength.maxBytes < 2)) {
115         return 0;
116     }
117 
118     bool eofAtStart = false;
119     if (bitsRemaining < 16) {
120         eofAtStart = true;
121     }
122 
123     while (true) {
124         if (remaining < 16) {
125             mEofReached = true;
126             break;
127         }
128         uint codePoint;
129         ushort val;
130         bool terminate = false;
131 
132         if (mLittleEndian) {
133             val = input->byte(addr) | (input->byte(addr + 1) << 8);
134         } else {
135             val = (input->byte(addr) << 8) | input->byte(addr + 1);
136         }
137         // high surrogate -> if is followed by low surrogate we have a 4 bit char
138         if (QChar::isHighSurrogate(val)) {
139             if (remaining < 32 || ((mMode & ByteCount) && (addr + 2 - address) / 2 >= Okteta::Address(mLength.maxBytes / 2))) {
140                 codePoint = val;
141                 mEofReached = true;
142                 terminate = true;
143             } else {
144                 ushort val2;
145                 if (mLittleEndian) {
146                     val2 = input->byte(addr + 2) | (input->byte(addr + 3) << 8);
147                 } else {
148                     val2 = (input->byte(addr + 2) << 8) | input->byte(addr + 3);
149                 }
150 
151                 if (QChar::isLowSurrogate(val2)) {
152                     codePoint = QChar::surrogateToUcs4(val, val2);
153                     remaining -= 16;
154                     addr += 2;
155                     mNonBMPCount++; // codepoint > 0xffff -> non BMP
156                 } else {
157                     codePoint = val;
158                 }
159             }
160         } else {
161             codePoint = val;
162         }
163 
164         if (count < oldMax) {
165             mCodePoints[count] = codePoint;
166         } else {
167             mCodePoints.append(codePoint);
168         }
169 
170         remaining -= 16;
171         addr += 2;
172         count++;
173 
174         // now check if we have to terminate
175         if (mMode & Sequence) {
176             if (codePoint == mTerminationCodePoint) {
177                 terminate = true;
178             }
179         }
180         if (mMode & ByteCount) {
181             // divide by two in case someone set length to an odd number of bytes
182             if ((addr - address) / 2 >= Okteta::Address(mLength.maxBytes / 2)) {
183                 terminate = true;
184             }
185         }
186         if (mMode & CharCount) {
187             if (count >= mLength.maxChars) {
188                 terminate = true;
189             }
190         }
191         if (mMode == None) {
192             qCDebug(LOG_KASTEN_OKTETA_CONTROLLERS_STRUCTURES) << "no termination mode set!!";
193             Q_ASSERT(false);
194         }
195         if (terminate) {
196             break;
197         }
198     }
199     mCodePoints.resize(count);
200     mParent->topLevelDataInformation()->_childCountAboutToChange(mParent, 0, count);
201     mParent->topLevelDataInformation()->_childCountChanged(mParent, 0, count);
202 
203     if (eofAtStart) {
204         return -1;
205     }
206     return (addr - address) * 8;
207 }
208 
size() const209 BitCount32 Utf16StringData::size() const
210 {
211     // add 16 for every non BMP char, since they use 32 bits
212     return (mCodePoints.size() + mNonBMPCount) * 16;
213 }
214 
sizeAt(uint i) const215 BitCount32 Utf16StringData::sizeAt(uint i) const
216 {
217     Q_ASSERT(i <= count());
218     uint val = mCodePoints.at(i);
219     return val > 0xffff ? 32 : 16;
220 }
221