1 /*
2 This file is part of the Okteta Kasten Framework, made within the KDE community.
3
4 SPDX-FileCopyrightText: 2011 Alex Richardson <alex.richardson@gmx.de>
5
6 SPDX-License-Identifier: LGPL-2.1-only OR LGPL-3.0-only OR LicenseRef-KDE-Accepted-LGPL
7 */
8
9 #include "utf16stringdata.hpp"
10
11 #include "../../structlogging.hpp"
12
13 #include <QVarLengthArray>
14
15 #include <KLocalizedString>
16
17 #include <Okteta/AbstractByteArrayModel>
18
19 #include "../topleveldatainformation.hpp"
20 #include "../dummydatainformation.hpp"
21 #include "stringdatainformation.hpp"
22
Utf16StringData(StringDataInformation * parent)23 Utf16StringData::Utf16StringData(StringDataInformation* parent)
24 : StringData(parent)
25 {
26 }
27
28 Utf16StringData::~Utf16StringData() = default;
29
charType() const30 QString Utf16StringData::charType() const
31 {
32 return mLittleEndian ? i18n("UTF16-LE char") : i18n("UTF16-BE char");
33 }
34
typeName() const35 QString Utf16StringData::typeName() const
36 {
37 return mLittleEndian ? i18n("UTF16-LE string") : i18n("UTF16-BE string");
38 }
39
count() const40 uint Utf16StringData::count() const
41 {
42 return mCodePoints.size();
43 }
44
stringValue(int row) const45 QString Utf16StringData::stringValue(int row) const
46 {
47 // TODO details
48 Q_ASSERT((uint)row < count());
49 // TODO show invalid values
50 uint val = mCodePoints.at(row);
51 QString number = QString::number(val, 16).toUpper();
52 if (number.length() == 1) {
53 number = QLatin1Char('0') + number;
54 }
55 if (val > UNICODE_MAX) {
56 return i18n("Value too big: 0x%1", number);
57 }
58 if (val > BMP_MAX) {
59 QString ret(2, Qt::Uninitialized);
60 ret[0] = QChar::highSurrogate(val);
61 ret[1] = QChar::lowSurrogate(val);
62 return i18n("%1 (U+%2)", ret, number);
63 }
64
65 return i18n("%1 (U+%2)", QString(QChar(mCodePoints.at(row))), number);
66 }
67
completeString(bool skipInvalid) const68 QString Utf16StringData::completeString(bool skipInvalid) const
69 {
70 QVarLengthArray<QChar> data(mCodePoints.size() + mNonBMPCount);
71 int codePointCount = mCodePoints.size();
72 int i = 0;
73 for (int idx = 0; idx < codePointCount; ++idx) {
74 uint val = mCodePoints.at(idx);
75 if (val > UNICODE_MAX) {
76 if (skipInvalid) {
77 continue;
78 }
79
80 data[i] = QChar::ReplacementCharacter;
81 } else if (val > BMP_MAX) {
82 data[i] = QChar::highSurrogate(val);
83 i++;
84 data[i] = QChar::lowSurrogate(val);
85 } else {
86 data[i] = QChar((ushort)val);
87 }
88 i++;
89 }
90
91 return QString(data.constData(), i);
92 }
93
read(Okteta::AbstractByteArrayModel * input,Okteta::Address address,BitCount64 bitsRemaining)94 qint64 Utf16StringData::read(Okteta::AbstractByteArrayModel* input, Okteta::Address address,
95 BitCount64 bitsRemaining)
96 {
97 const int oldSize = count();
98 mNonBMPCount = 0;
99 if (mMode == CharCount) {
100 mCodePoints.reserve(mLength.maxChars);
101 } else if (mMode == ByteCount) {
102 mCodePoints.reserve(mLength.maxBytes / 2);
103 }
104
105 mParent->topLevelDataInformation()->_childCountAboutToChange(mParent, oldSize, 0);
106 mParent->topLevelDataInformation()->_childCountChanged(mParent, oldSize, 0);
107
108 const uint oldMax = mCodePoints.size();
109 quint64 remaining = bitsRemaining;
110 Okteta::Address addr = address;
111 uint count = 0;
112 mEofReached = false;
113 if (((mMode & CharCount) && mLength.maxChars == 0)
114 || ((mMode & ByteCount) && mLength.maxBytes < 2)) {
115 return 0;
116 }
117
118 bool eofAtStart = false;
119 if (bitsRemaining < 16) {
120 eofAtStart = true;
121 }
122
123 while (true) {
124 if (remaining < 16) {
125 mEofReached = true;
126 break;
127 }
128 uint codePoint;
129 ushort val;
130 bool terminate = false;
131
132 if (mLittleEndian) {
133 val = input->byte(addr) | (input->byte(addr + 1) << 8);
134 } else {
135 val = (input->byte(addr) << 8) | input->byte(addr + 1);
136 }
137 // high surrogate -> if is followed by low surrogate we have a 4 bit char
138 if (QChar::isHighSurrogate(val)) {
139 if (remaining < 32 || ((mMode & ByteCount) && (addr + 2 - address) / 2 >= Okteta::Address(mLength.maxBytes / 2))) {
140 codePoint = val;
141 mEofReached = true;
142 terminate = true;
143 } else {
144 ushort val2;
145 if (mLittleEndian) {
146 val2 = input->byte(addr + 2) | (input->byte(addr + 3) << 8);
147 } else {
148 val2 = (input->byte(addr + 2) << 8) | input->byte(addr + 3);
149 }
150
151 if (QChar::isLowSurrogate(val2)) {
152 codePoint = QChar::surrogateToUcs4(val, val2);
153 remaining -= 16;
154 addr += 2;
155 mNonBMPCount++; // codepoint > 0xffff -> non BMP
156 } else {
157 codePoint = val;
158 }
159 }
160 } else {
161 codePoint = val;
162 }
163
164 if (count < oldMax) {
165 mCodePoints[count] = codePoint;
166 } else {
167 mCodePoints.append(codePoint);
168 }
169
170 remaining -= 16;
171 addr += 2;
172 count++;
173
174 // now check if we have to terminate
175 if (mMode & Sequence) {
176 if (codePoint == mTerminationCodePoint) {
177 terminate = true;
178 }
179 }
180 if (mMode & ByteCount) {
181 // divide by two in case someone set length to an odd number of bytes
182 if ((addr - address) / 2 >= Okteta::Address(mLength.maxBytes / 2)) {
183 terminate = true;
184 }
185 }
186 if (mMode & CharCount) {
187 if (count >= mLength.maxChars) {
188 terminate = true;
189 }
190 }
191 if (mMode == None) {
192 qCDebug(LOG_KASTEN_OKTETA_CONTROLLERS_STRUCTURES) << "no termination mode set!!";
193 Q_ASSERT(false);
194 }
195 if (terminate) {
196 break;
197 }
198 }
199 mCodePoints.resize(count);
200 mParent->topLevelDataInformation()->_childCountAboutToChange(mParent, 0, count);
201 mParent->topLevelDataInformation()->_childCountChanged(mParent, 0, count);
202
203 if (eofAtStart) {
204 return -1;
205 }
206 return (addr - address) * 8;
207 }
208
size() const209 BitCount32 Utf16StringData::size() const
210 {
211 // add 16 for every non BMP char, since they use 32 bits
212 return (mCodePoints.size() + mNonBMPCount) * 16;
213 }
214
sizeAt(uint i) const215 BitCount32 Utf16StringData::sizeAt(uint i) const
216 {
217 Q_ASSERT(i <= count());
218 uint val = mCodePoints.at(i);
219 return val > 0xffff ? 32 : 16;
220 }
221