1 /****************************************************************************
2 **
3 ** Copyright (C) 2016 The Qt Company Ltd.
4 ** Copyright (C) 2016 Intel Corporation.
5 ** Contact: https://www.qt.io/licensing/
6 **
7 ** This file is part of the QtCore module of the Qt Toolkit.
8 **
9 ** $QT_BEGIN_LICENSE:LGPL$
10 ** Commercial License Usage
11 ** Licensees holding valid commercial Qt licenses may use this file in
12 ** accordance with the commercial license agreement provided with the
13 ** Software or, alternatively, in accordance with the terms contained in
14 ** a written agreement between you and The Qt Company. For licensing terms
15 ** and conditions see https://www.qt.io/terms-conditions. For further
16 ** information use the contact form at https://www.qt.io/contact-us.
17 **
18 ** GNU Lesser General Public License Usage
19 ** Alternatively, this file may be used under the terms of the GNU Lesser
20 ** General Public License version 3 as published by the Free Software
21 ** Foundation and appearing in the file LICENSE.LGPL3 included in the
22 ** packaging of this file. Please review the following information to
23 ** ensure the GNU Lesser General Public License version 3 requirements
24 ** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
25 **
26 ** GNU General Public License Usage
27 ** Alternatively, this file may be used under the terms of the GNU
28 ** General Public License version 2.0 or (at your option) the GNU General
29 ** Public license version 3 or any later version approved by the KDE Free
30 ** Qt Foundation. The licenses are as published by the Free Software
31 ** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
32 ** included in the packaging of this file. Please review the following
33 ** information to ensure the GNU General Public License requirements will
34 ** be met: https://www.gnu.org/licenses/gpl-2.0.html and
35 ** https://www.gnu.org/licenses/gpl-3.0.html.
36 **
37 ** $QT_END_LICENSE$
38 **
39 ****************************************************************************/
40
41 #ifndef QBINARYJSON_P_H
42 #define QBINARYJSON_P_H
43
44 //
45 // W A R N I N G
46 // -------------
47 //
48 // This file is not part of the Qt API. It exists purely as an
49 // implementation detail. This header file may change from version to
50 // version without notice, or even be removed.
51 //
52 // We mean it.
53 //
54
55 #include <private/qbinaryjsonvalue_p.h>
56 #include <private/qendian_p.h>
57
58 #include <qjsondocument.h>
59
60 #include <limits>
61
62 QT_REQUIRE_CONFIG(binaryjson);
63
64 QT_BEGIN_NAMESPACE
65
66 // in qstring.cpp
67 void qt_to_latin1_unchecked(uchar *dst, const ushort *uc, qsizetype len);
68 void qt_from_latin1(ushort *dst, const char *str, size_t size) noexcept;
69
70 /*
71 This defines a binary data structure for Json data. The data structure is optimised for fast reading
72 and minimum allocations. The whole data structure can be mmap'ed and used directly.
73
74 In most cases the binary structure is not as space efficient as a utf8 encoded text representation, but
75 much faster to access.
76
77 The size requirements are:
78
79 String:
80 Latin1 data: 2 bytes header + string.length()
81 Full Unicode: 4 bytes header + 2*(string.length())
82
83 Values: 4 bytes + size of data (size can be 0 for some data)
84 bool: 0 bytes
85 double: 8 bytes (0 if integer with less than 27bits)
86 string: see above
87 array: size of array
88 object: size of object
89 Array: 12 bytes + 4*length + size of Value data
90 Object: 12 bytes + 8*length + size of Key Strings + size of Value data
91
92 For an example such as
93
94 { // object: 12 + 5*8 = 52
95 "firstName": "John", // key 12, value 8 = 20
96 "lastName" : "Smith", // key 12, value 8 = 20
97 "age" : 25, // key 8, value 0 = 8
98 "address" : // key 12, object below = 140
99 { // object: 12 + 4*8
100 "streetAddress": "21 2nd Street", // key 16, value 16
101 "city" : "New York", // key 8, value 12
102 "state" : "NY", // key 8, value 4
103 "postalCode" : "10021" // key 12, value 8
104 }, // object total: 128
105 "phoneNumber": // key: 16, value array below = 172
106 [ // array: 12 + 2*4 + values below: 156
107 { // object 12 + 2*8
108 "type" : "home", // key 8, value 8
109 "number": "212 555-1234" // key 8, value 16
110 }, // object total: 68
111 { // object 12 + 2*8
112 "type" : "fax", // key 8, value 8
113 "number": "646 555-4567" // key 8, value 16
114 } // object total: 68
115 ] // array total: 156
116 } // great total: 412 bytes
117
118 The uncompressed text file used roughly 500 bytes, so in this case we end up using about
119 the same space as the text representation.
120
121 Other measurements have shown a slightly bigger binary size than a compact text
122 representation where all possible whitespace was stripped out.
123 */
124 namespace QBinaryJsonPrivate {
125
126 class Array;
127 class Object;
128 class Value;
129 class Entry;
130
131 template<typename T>
132 using q_littleendian = QLEInteger<T>;
133
134 using qle_short = q_littleendian<short>;
135 using qle_ushort = q_littleendian<unsigned short>;
136 using qle_int = q_littleendian<int>;
137 using qle_uint = q_littleendian<unsigned int>;
138
139 template<int pos, int width>
140 using qle_bitfield = QLEIntegerBitfield<uint, pos, width>;
141
142 template<int pos, int width>
143 using qle_signedbitfield = QLEIntegerBitfield<int, pos, width>;
144
145 using offset = qle_uint;
146
147 // round the size up to the next 4 byte boundary
alignedSize(uint size)148 inline uint alignedSize(uint size) { return (size + 3) & ~3; }
149
150 const int MaxLatin1Length = 0x7fff;
151
useCompressed(QStringView s)152 static inline bool useCompressed(QStringView s)
153 {
154 if (s.length() > MaxLatin1Length)
155 return false;
156 return QtPrivate::isLatin1(s);
157 }
158
useCompressed(QLatin1String s)159 static inline bool useCompressed(QLatin1String s)
160 {
161 return s.size() <= MaxLatin1Length;
162 }
163
qStringSize(const QString & string,bool compress)164 static inline uint qStringSize(const QString &string, bool compress)
165 {
166 uint l = 2 + string.size();
167 if (!compress)
168 l *= 2;
169 return alignedSize(l);
170 }
171
172 // returns INT_MAX if it can't compress it into 28 bits
compressedNumber(double d)173 static inline int compressedNumber(double d)
174 {
175 // this relies on details of how ieee floats are represented
176 const int exponent_off = 52;
177 const quint64 fraction_mask = 0x000fffffffffffffULL;
178 const quint64 exponent_mask = 0x7ff0000000000000ULL;
179
180 quint64 val;
181 memcpy (&val, &d, sizeof(double));
182 int exp = (int)((val & exponent_mask) >> exponent_off) - 1023;
183 if (exp < 0 || exp > 25)
184 return std::numeric_limits<int>::max();
185
186 quint64 non_int = val & (fraction_mask >> exp);
187 if (non_int)
188 return std::numeric_limits<int>::max();
189
190 bool neg = (val >> 63) != 0;
191 val &= fraction_mask;
192 val |= ((quint64)1 << 52);
193 int res = (int)(val >> (52 - exp));
194 return neg ? -res : res;
195 }
196
197 class Latin1String;
198
199 class String
200 {
201 public:
String(const char * data)202 explicit String(const char *data) : d(reinterpret_cast<const Data *>(data)) {}
203
204 struct Data {
205 qle_uint length;
206 qle_ushort utf16[1];
207 };
208 const Data *d;
209
byteSize()210 uint byteSize() const { return sizeof(uint) + sizeof(ushort) * d->length; }
isValid(uint maxSize)211 bool isValid(uint maxSize) const
212 {
213 // Check byteSize() <= maxSize, avoiding integer overflow
214 return maxSize >= sizeof(uint)
215 && uint(d->length) <= (maxSize - sizeof(uint)) / sizeof(ushort);
216 }
217
copy(char * dest,QStringView str)218 static void copy(char *dest, QStringView str)
219 {
220 Data *data = reinterpret_cast<Data *>(dest);
221 data->length = str.length();
222 qToLittleEndian<quint16>(str.utf16(), str.length(), data->utf16);
223 fillTrailingZeros(data);
224 }
225
fillTrailingZeros(Data * data)226 static void fillTrailingZeros(Data *data)
227 {
228 if (data->length & 1)
229 data->utf16[data->length] = 0;
230 }
231
232 bool operator ==(QStringView str) const
233 {
234 int slen = str.length();
235 int l = d->length;
236 if (slen != l)
237 return false;
238 const auto *s = reinterpret_cast<const ushort *>(str.utf16());
239 const qle_ushort *a = d->utf16;
240 const ushort *b = s;
241 while (l-- && *a == *b)
242 a++,b++;
243 return (l == -1);
244 }
245
246 bool operator ==(const String &str) const
247 {
248 if (d->length != str.d->length)
249 return false;
250 return !memcmp(d->utf16, str.d->utf16, d->length * sizeof(ushort));
251 }
252
toString()253 QString toString() const
254 {
255 #if Q_BYTE_ORDER == Q_LITTLE_ENDIAN
256 return QString(reinterpret_cast<const QChar *>(d->utf16), d->length);
257 #else
258 const uint l = d->length;
259 QString str(l, Qt::Uninitialized);
260 QChar *ch = str.data();
261 for (uint i = 0; i < l; ++i)
262 ch[i] = QChar(d->utf16[i]);
263 return str;
264 #endif
265 }
266 };
267
268 class Latin1String
269 {
270 public:
Latin1String(const char * data)271 explicit Latin1String(const char *data) : d(reinterpret_cast<const Data *>(data)) {}
272
273 struct Data {
274 qle_ushort length;
275 char latin1[1];
276 };
277 const Data *d;
278
byteSize()279 uint byteSize() const { return sizeof(ushort) + sizeof(char) * (d->length); }
isValid(uint maxSize)280 bool isValid(uint maxSize) const { return byteSize() <= maxSize; }
281
copy(char * dest,QStringView src)282 static void copy(char *dest, QStringView src)
283 {
284 Data *data = reinterpret_cast<Data *>(dest);
285 data->length = src.length();
286 auto *l = reinterpret_cast<uchar *>(data->latin1);
287 const auto *uc = reinterpret_cast<const ushort *>(src.utf16());
288 qt_to_latin1_unchecked(l, uc, data->length);
289
290 for (uint len = data->length; quintptr(l + len) & 0x3; ++len)
291 l[len] = 0;
292 }
293
toQLatin1String()294 QLatin1String toQLatin1String() const noexcept { return QLatin1String(d->latin1, d->length); }
toString()295 QString toString() const { return QString::fromLatin1(d->latin1, d->length); }
296 };
297
copyString(char * dest,QStringView str,bool compress)298 static inline void copyString(char *dest, QStringView str, bool compress)
299 {
300 if (compress)
301 Latin1String::copy(dest, str);
302 else
303 String::copy(dest, str);
304 }
305
306 /*
307 Base is the base class for both Object and Array. Both classes work more or less the same way.
308 The class starts with a header (defined by the struct below), then followed by data (the data for
309 values in the Array case and Entry's (see below) for objects.
310
311 After the data a table follows (tableOffset points to it) containing Value objects for Arrays, and
312 offsets from the beginning of the object to Entry's in the case of Object.
313
314 Entry's in the Object's table are lexicographically sorted by key in the table(). This allows the usage
315 of a binary search over the keys in an Object.
316 */
317 class Base
318 {
319 public:
320 qle_uint size;
321 union {
322 uint _dummy;
323 qle_bitfield<0, 1> is_object;
324 qle_bitfield<1, 31> length;
325 };
326 offset tableOffset;
327 // content follows here
328
isObject()329 bool isObject() const { return !!is_object; }
isArray()330 bool isArray() const { return !isObject(); }
331
table()332 offset *table()
333 {
334 return reinterpret_cast<offset *>(reinterpret_cast<char *>(this) + tableOffset);
335 }
336
table()337 const offset *table() const
338 {
339 return reinterpret_cast<const offset *>(reinterpret_cast<const char *>(this) + tableOffset);
340 }
341
342 uint reserveSpace(uint dataSize, uint posInTable, uint numItems, bool replace);
343 };
344
345 class Object : public Base
346 {
347 public:
entryAt(uint i)348 const Entry *entryAt(uint i) const
349 {
350 return reinterpret_cast<const Entry *>(reinterpret_cast<const char *>(this) + table()[i]);
351 }
352
entryAt(uint i)353 Entry *entryAt(uint i)
354 {
355 return reinterpret_cast<Entry *>(reinterpret_cast<char *>(this) + table()[i]);
356 }
357
358 uint indexOf(QStringView key, bool *exists) const;
359 QJsonObject toJsonObject() const;
360 bool isValid(uint maxSize) const;
361 };
362
363 class Array : public Base
364 {
365 public:
at(uint i)366 const Value *at(uint i) const { return reinterpret_cast<const Value *>(table() + i); }
at(uint i)367 Value *at(uint i) { return reinterpret_cast<Value *>(table() + i); }
368
369 QJsonArray toJsonArray() const;
370 bool isValid(uint maxSize) const;
371 };
372
373 class Value
374 {
375 public:
376 enum {
377 MaxSize = (1 << 27) - 1
378 };
379 union {
380 uint _dummy;
381 qle_bitfield<0, 3> type;
382 qle_bitfield<3, 1> latinOrIntValue;
383 qle_bitfield<4, 1> latinKey;
384 qle_bitfield<5, 27> value;
385 qle_signedbitfield<5, 27> int_value;
386 };
387
data(const Base * b)388 inline const char *data(const Base *b) const
389 {
390 return reinterpret_cast<const char *>(b) + value;
391 }
392
393 uint usedStorage(const Base *b) const;
394
toBoolean()395 bool toBoolean() const
396 {
397 Q_ASSERT(type == QJsonValue::Bool);
398 return value != 0;
399 }
400
toDouble(const Base * b)401 double toDouble(const Base *b) const
402 {
403 Q_ASSERT(type == QJsonValue::Double);
404 if (latinOrIntValue)
405 return int_value;
406
407 auto i = qFromLittleEndian<quint64>(reinterpret_cast<const uchar *>(b) + value);
408 double d;
409 memcpy(&d, &i, sizeof(double));
410 return d;
411 }
412
toString(const Base * b)413 QString toString(const Base *b) const
414 {
415 return latinOrIntValue
416 ? asLatin1String(b).toString()
417 : asString(b).toString();
418 }
419
asString(const Base * b)420 String asString(const Base *b) const
421 {
422 Q_ASSERT(type == QJsonValue::String && !latinOrIntValue);
423 return String(data(b));
424 }
425
asLatin1String(const Base * b)426 Latin1String asLatin1String(const Base *b) const
427 {
428 Q_ASSERT(type == QJsonValue::String && latinOrIntValue);
429 return Latin1String(data(b));
430 }
431
base(const Base * b)432 const Base *base(const Base *b) const
433 {
434 Q_ASSERT(type == QJsonValue::Array || type == QJsonValue::Object);
435 return reinterpret_cast<const Base *>(data(b));
436 }
437
438 QJsonValue toJsonValue(const Base *b) const;
439 bool isValid(const Base *b) const;
440
441 static uint requiredStorage(const QBinaryJsonValue &v, bool *compressed);
442 static uint valueToStore(const QBinaryJsonValue &v, uint offset);
443 static void copyData(const QBinaryJsonValue &v, char *dest, bool compressed);
444 };
445
446 class Entry {
447 public:
448 Value value;
449 // key
450 // value data follows key
451
size()452 uint size() const
453 {
454 uint s = sizeof(Entry);
455 if (value.latinKey)
456 s += shallowLatin1Key().byteSize();
457 else
458 s += shallowKey().byteSize();
459 return alignedSize(s);
460 }
461
usedStorage(Base * b)462 uint usedStorage(Base *b) const
463 {
464 return size() + value.usedStorage(b);
465 }
466
shallowKey()467 String shallowKey() const
468 {
469 Q_ASSERT(!value.latinKey);
470 return String(reinterpret_cast<const char *>(this) + sizeof(Entry));
471 }
472
shallowLatin1Key()473 Latin1String shallowLatin1Key() const
474 {
475 Q_ASSERT(value.latinKey);
476 return Latin1String(reinterpret_cast<const char *>(this) + sizeof(Entry));
477 }
478
key()479 QString key() const
480 {
481 return value.latinKey
482 ? shallowLatin1Key().toString()
483 : shallowKey().toString();
484 }
485
isValid(uint maxSize)486 bool isValid(uint maxSize) const
487 {
488 if (maxSize < sizeof(Entry))
489 return false;
490 maxSize -= sizeof(Entry);
491 return value.latinKey
492 ? shallowLatin1Key().isValid(maxSize)
493 : shallowKey().isValid(maxSize);
494 }
495
496 bool operator ==(QStringView key) const
497 {
498 return value.latinKey
499 ? (shallowLatin1Key().toQLatin1String() == key)
500 : (shallowKey() == key);
501 }
502
503 bool operator >=(QStringView key) const
504 {
505 return value.latinKey
506 ? (shallowLatin1Key().toQLatin1String() >= key)
507 : (shallowKey().toString() >= key);
508 }
509 };
510
511 class Header {
512 public:
513 qle_uint tag; // 'qbjs'
514 qle_uint version; // 1
root()515 Base *root() { return reinterpret_cast<Base *>(this + 1); }
root()516 const Base *root() const { return reinterpret_cast<const Base *>(this + 1); }
517 };
518
519 class ConstData
520 {
Q_DISABLE_COPY_MOVE(ConstData)521 Q_DISABLE_COPY_MOVE(ConstData)
522 public:
523 const uint alloc;
524 union {
525 const char *rawData;
526 const Header *header;
527 };
528
ConstData(const char * raw,uint a)529 ConstData(const char *raw, uint a) : alloc(a), rawData(raw) {}
530 bool isValid() const;
531 QJsonDocument toJsonDocument() const;
532 };
533
534 class MutableData
535 {
536 Q_DISABLE_COPY_MOVE(MutableData)
537 public:
538 QAtomicInt ref;
539 uint alloc;
540 union {
541 char *rawData;
542 Header *header;
543 };
544 uint compactionCounter : 31;
545
MutableData(char * raw,uint a)546 MutableData(char *raw, uint a)
547 : alloc(a), rawData(raw), compactionCounter(0)
548 {
549 }
550
MutableData(uint reserved,QJsonValue::Type valueType)551 MutableData(uint reserved, QJsonValue::Type valueType)
552 : rawData(nullptr), compactionCounter(0)
553 {
554 Q_ASSERT(valueType == QJsonValue::Array || valueType == QJsonValue::Object);
555
556 alloc = sizeof(Header) + sizeof(Base) + reserved + sizeof(offset);
557 header = reinterpret_cast<Header *>(malloc(alloc));
558 Q_CHECK_PTR(header);
559 header->tag = QJsonDocument::BinaryFormatTag;
560 header->version = 1;
561 Base *b = header->root();
562 b->size = sizeof(Base);
563 b->is_object = (valueType == QJsonValue::Object);
564 b->tableOffset = sizeof(Base);
565 b->length = 0;
566 }
567
~MutableData()568 ~MutableData()
569 {
570 free(rawData);
571 }
572
573 MutableData *clone(const Base *b, uint reserve = 0)
574 {
575 uint size = sizeof(Header) + b->size;
576 if (b == header->root() && ref.loadRelaxed() == 1 && alloc >= size + reserve)
577 return this;
578
579 if (reserve) {
580 if (reserve < 128)
581 reserve = 128;
582 size = qMax(size + reserve, qMin(size *2, uint(Value::MaxSize)));
583 if (size > Value::MaxSize) {
584 qWarning("QJson: Document too large to store in data structure");
585 return nullptr;
586 }
587 }
588 char *raw = reinterpret_cast<char *>(malloc(size));
589 Q_CHECK_PTR(raw);
590 memcpy(raw + sizeof(Header), b, b->size);
591 auto *h = reinterpret_cast<Header *>(raw);
592 h->tag = QJsonDocument::BinaryFormatTag;
593 h->version = 1;
594 auto *d = new MutableData(raw, size);
595 d->compactionCounter = (b == header->root()) ? compactionCounter : 0;
596 return d;
597 }
598
takeRawData(uint * size)599 char *takeRawData(uint *size)
600 {
601 *size = alloc;
602 char *result = rawData;
603 rawData = nullptr;
604 alloc = 0;
605 return result;
606 }
607
608 void compact();
609 };
610
611 } // namespace QBinaryJsonPrivate
612
613 Q_DECLARE_TYPEINFO(QBinaryJsonPrivate::Value, Q_PRIMITIVE_TYPE);
614
615 QT_END_NAMESPACE
616
617 #endif // QBINARYJSON_P_H
618