1 /****************************************************************************
2 **
3 ** Copyright (C) 2016 The Qt Company Ltd.
4 ** Copyright (C) 2016 Intel Corporation.
5 ** Contact: https://www.qt.io/licensing/
6 **
7 ** This file is part of the QtCore module of the Qt Toolkit.
8 **
9 ** $QT_BEGIN_LICENSE:LGPL$
10 ** Commercial License Usage
11 ** Licensees holding valid commercial Qt licenses may use this file in
12 ** accordance with the commercial license agreement provided with the
13 ** Software or, alternatively, in accordance with the terms contained in
14 ** a written agreement between you and The Qt Company. For licensing terms
15 ** and conditions see https://www.qt.io/terms-conditions. For further
16 ** information use the contact form at https://www.qt.io/contact-us.
17 **
18 ** GNU Lesser General Public License Usage
19 ** Alternatively, this file may be used under the terms of the GNU Lesser
20 ** General Public License version 3 as published by the Free Software
21 ** Foundation and appearing in the file LICENSE.LGPL3 included in the
22 ** packaging of this file. Please review the following information to
23 ** ensure the GNU Lesser General Public License version 3 requirements
24 ** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
25 **
26 ** GNU General Public License Usage
27 ** Alternatively, this file may be used under the terms of the GNU
28 ** General Public License version 2.0 or (at your option) the GNU General
29 ** Public license version 3 or any later version approved by the KDE Free
30 ** Qt Foundation. The licenses are as published by the Free Software
31 ** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
32 ** included in the packaging of this file. Please review the following
33 ** information to ensure the GNU General Public License requirements will
34 ** be met: https://www.gnu.org/licenses/gpl-2.0.html and
35 ** https://www.gnu.org/licenses/gpl-3.0.html.
36 **
37 ** $QT_END_LICENSE$
38 **
39 ****************************************************************************/
40 
41 #ifndef QBINARYJSON_P_H
42 #define QBINARYJSON_P_H
43 
44 //
45 //  W A R N I N G
46 //  -------------
47 //
48 // This file is not part of the Qt API.  It exists purely as an
49 // implementation detail.  This header file may change from version to
50 // version without notice, or even be removed.
51 //
52 // We mean it.
53 //
54 
55 #include <private/qbinaryjsonvalue_p.h>
56 #include <private/qendian_p.h>
57 
58 #include <qjsondocument.h>
59 
60 #include <limits>
61 
62 QT_REQUIRE_CONFIG(binaryjson);
63 
64 QT_BEGIN_NAMESPACE
65 
66 // in qstring.cpp
67 void qt_to_latin1_unchecked(uchar *dst, const ushort *uc, qsizetype len);
68 void qt_from_latin1(ushort *dst, const char *str, size_t size) noexcept;
69 
70 /*
71   This defines a binary data structure for Json data. The data structure is optimised for fast reading
72   and minimum allocations. The whole data structure can be mmap'ed and used directly.
73 
74   In most cases the binary structure is not as space efficient as a utf8 encoded text representation, but
75   much faster to access.
76 
77   The size requirements are:
78 
79   String:
80     Latin1 data: 2 bytes header + string.length()
81     Full Unicode: 4 bytes header + 2*(string.length())
82 
83   Values: 4 bytes + size of data (size can be 0 for some data)
84     bool: 0 bytes
85     double: 8 bytes (0 if integer with less than 27bits)
86     string: see above
87     array: size of array
88     object: size of object
89   Array: 12 bytes + 4*length + size of Value data
90   Object: 12 bytes + 8*length + size of Key Strings + size of Value data
91 
92   For an example such as
93 
94     {                                           // object: 12 + 5*8                   = 52
95          "firstName": "John",                   // key 12, value 8                    = 20
96          "lastName" : "Smith",                  // key 12, value 8                    = 20
97          "age"      : 25,                       // key 8, value 0                     = 8
98          "address"  :                           // key 12, object below               = 140
99          {                                      // object: 12 + 4*8
100              "streetAddress": "21 2nd Street",  // key 16, value 16
101              "city"         : "New York",       // key 8, value 12
102              "state"        : "NY",             // key 8, value 4
103              "postalCode"   : "10021"           // key 12, value 8
104          },                                     // object total: 128
105          "phoneNumber":                         // key: 16, value array below         = 172
106          [                                      // array: 12 + 2*4 + values below: 156
107              {                                  // object 12 + 2*8
108                "type"  : "home",                // key 8, value 8
109                "number": "212 555-1234"         // key 8, value 16
110              },                                 // object total: 68
111              {                                  // object 12 + 2*8
112                "type"  : "fax",                 // key 8, value 8
113                "number": "646 555-4567"         // key 8, value 16
114              }                                  // object total: 68
115          ]                                      // array total: 156
116     }                                           // great total:                         412 bytes
117 
118     The uncompressed text file used roughly 500 bytes, so in this case we end up using about
119     the same space as the text representation.
120 
121     Other measurements have shown a slightly bigger binary size than a compact text
122     representation where all possible whitespace was stripped out.
123 */
124 namespace QBinaryJsonPrivate {
125 
126 class Array;
127 class Object;
128 class Value;
129 class Entry;
130 
131 template<typename T>
132 using q_littleendian = QLEInteger<T>;
133 
134 using qle_short = q_littleendian<short>;
135 using qle_ushort = q_littleendian<unsigned short>;
136 using qle_int = q_littleendian<int>;
137 using qle_uint = q_littleendian<unsigned int>;
138 
139 template<int pos, int width>
140 using qle_bitfield = QLEIntegerBitfield<uint, pos, width>;
141 
142 template<int pos, int width>
143 using qle_signedbitfield = QLEIntegerBitfield<int, pos, width>;
144 
145 using offset = qle_uint;
146 
147 // round the size up to the next 4 byte boundary
alignedSize(uint size)148 inline uint alignedSize(uint size) { return (size + 3) & ~3; }
149 
150 const int MaxLatin1Length = 0x7fff;
151 
useCompressed(QStringView s)152 static inline bool useCompressed(QStringView s)
153 {
154     if (s.length() > MaxLatin1Length)
155         return false;
156     return QtPrivate::isLatin1(s);
157 }
158 
useCompressed(QLatin1String s)159 static inline bool useCompressed(QLatin1String s)
160 {
161     return s.size() <= MaxLatin1Length;
162 }
163 
qStringSize(const QString & string,bool compress)164 static inline uint qStringSize(const QString &string, bool compress)
165 {
166     uint l = 2 + string.size();
167     if (!compress)
168         l *= 2;
169     return alignedSize(l);
170 }
171 
172 // returns INT_MAX if it can't compress it into 28 bits
compressedNumber(double d)173 static inline int compressedNumber(double d)
174 {
175     // this relies on details of how ieee floats are represented
176     const int exponent_off = 52;
177     const quint64 fraction_mask = 0x000fffffffffffffULL;
178     const quint64 exponent_mask = 0x7ff0000000000000ULL;
179 
180     quint64 val;
181     memcpy (&val, &d, sizeof(double));
182     int exp = (int)((val & exponent_mask) >> exponent_off) - 1023;
183     if (exp < 0 || exp > 25)
184         return std::numeric_limits<int>::max();
185 
186     quint64 non_int = val & (fraction_mask >> exp);
187     if (non_int)
188         return std::numeric_limits<int>::max();
189 
190     bool neg = (val >> 63) != 0;
191     val &= fraction_mask;
192     val |= ((quint64)1 << 52);
193     int res = (int)(val >> (52 - exp));
194     return neg ? -res : res;
195 }
196 
197 class Latin1String;
198 
199 class String
200 {
201 public:
String(const char * data)202     explicit String(const char *data) : d(reinterpret_cast<const Data *>(data)) {}
203 
204     struct Data {
205         qle_uint length;
206         qle_ushort utf16[1];
207     };
208     const Data *d;
209 
byteSize()210     uint byteSize() const { return sizeof(uint) + sizeof(ushort) * d->length; }
isValid(uint maxSize)211     bool isValid(uint maxSize) const
212     {
213         // Check byteSize() <= maxSize, avoiding integer overflow
214         return maxSize >= sizeof(uint)
215                 && uint(d->length) <= (maxSize - sizeof(uint)) / sizeof(ushort);
216     }
217 
copy(char * dest,QStringView str)218     static void copy(char *dest, QStringView str)
219     {
220         Data *data = reinterpret_cast<Data *>(dest);
221         data->length = str.length();
222         qToLittleEndian<quint16>(str.utf16(), str.length(), data->utf16);
223         fillTrailingZeros(data);
224     }
225 
fillTrailingZeros(Data * data)226     static void fillTrailingZeros(Data *data)
227     {
228         if (data->length & 1)
229             data->utf16[data->length] = 0;
230     }
231 
232     bool operator ==(QStringView str) const
233     {
234         int slen = str.length();
235         int l = d->length;
236         if (slen != l)
237             return false;
238         const auto *s = reinterpret_cast<const ushort *>(str.utf16());
239         const qle_ushort *a = d->utf16;
240         const ushort *b = s;
241         while (l-- && *a == *b)
242             a++,b++;
243         return (l == -1);
244     }
245 
246     bool operator ==(const String &str) const
247     {
248         if (d->length != str.d->length)
249             return false;
250         return !memcmp(d->utf16, str.d->utf16, d->length * sizeof(ushort));
251     }
252 
toString()253     QString toString() const
254     {
255 #if Q_BYTE_ORDER == Q_LITTLE_ENDIAN
256         return QString(reinterpret_cast<const QChar *>(d->utf16), d->length);
257 #else
258         const uint l = d->length;
259         QString str(l, Qt::Uninitialized);
260         QChar *ch = str.data();
261         for (uint i = 0; i < l; ++i)
262             ch[i] = QChar(d->utf16[i]);
263         return str;
264 #endif
265     }
266 };
267 
268 class Latin1String
269 {
270 public:
Latin1String(const char * data)271     explicit Latin1String(const char *data) : d(reinterpret_cast<const Data *>(data)) {}
272 
273     struct Data {
274         qle_ushort length;
275         char latin1[1];
276     };
277     const Data *d;
278 
byteSize()279     uint byteSize() const { return sizeof(ushort) + sizeof(char) * (d->length); }
isValid(uint maxSize)280     bool isValid(uint maxSize) const { return byteSize() <= maxSize; }
281 
copy(char * dest,QStringView src)282     static void copy(char *dest, QStringView src)
283     {
284         Data *data = reinterpret_cast<Data *>(dest);
285         data->length = src.length();
286         auto *l = reinterpret_cast<uchar *>(data->latin1);
287         const auto *uc = reinterpret_cast<const ushort *>(src.utf16());
288         qt_to_latin1_unchecked(l, uc, data->length);
289 
290         for (uint len = data->length; quintptr(l + len) & 0x3; ++len)
291             l[len] = 0;
292     }
293 
toQLatin1String()294     QLatin1String toQLatin1String() const noexcept { return QLatin1String(d->latin1, d->length); }
toString()295     QString toString() const { return QString::fromLatin1(d->latin1, d->length); }
296 };
297 
copyString(char * dest,QStringView str,bool compress)298 static inline void copyString(char *dest, QStringView str, bool compress)
299 {
300     if (compress)
301         Latin1String::copy(dest, str);
302     else
303         String::copy(dest, str);
304 }
305 
306 /*
307  Base is the base class for both Object and Array. Both classes work more or less the same way.
308  The class starts with a header (defined by the struct below), then followed by data (the data for
309  values in the Array case and Entry's (see below) for objects.
310 
311  After the data a table follows (tableOffset points to it) containing Value objects for Arrays, and
312  offsets from the beginning of the object to Entry's in the case of Object.
313 
314  Entry's in the Object's table are lexicographically sorted by key in the table(). This allows the usage
315  of a binary search over the keys in an Object.
316  */
317 class Base
318 {
319 public:
320     qle_uint size;
321     union {
322         uint _dummy;
323         qle_bitfield<0, 1> is_object;
324         qle_bitfield<1, 31> length;
325     };
326     offset tableOffset;
327     // content follows here
328 
isObject()329     bool isObject() const { return !!is_object; }
isArray()330     bool isArray() const { return !isObject(); }
331 
table()332     offset *table()
333     {
334         return reinterpret_cast<offset *>(reinterpret_cast<char *>(this) + tableOffset);
335     }
336 
table()337     const offset *table() const
338     {
339         return reinterpret_cast<const offset *>(reinterpret_cast<const char *>(this) + tableOffset);
340     }
341 
342     uint reserveSpace(uint dataSize, uint posInTable, uint numItems, bool replace);
343 };
344 
345 class Object : public Base
346 {
347 public:
entryAt(uint i)348     const Entry *entryAt(uint i) const
349     {
350         return reinterpret_cast<const Entry *>(reinterpret_cast<const char *>(this) + table()[i]);
351     }
352 
entryAt(uint i)353     Entry *entryAt(uint i)
354     {
355         return reinterpret_cast<Entry *>(reinterpret_cast<char *>(this) + table()[i]);
356     }
357 
358     uint indexOf(QStringView key, bool *exists) const;
359     QJsonObject toJsonObject() const;
360     bool isValid(uint maxSize) const;
361 };
362 
363 class Array : public Base
364 {
365 public:
at(uint i)366     const Value *at(uint i) const { return reinterpret_cast<const Value *>(table() + i); }
at(uint i)367     Value *at(uint i) { return reinterpret_cast<Value *>(table() + i); }
368 
369     QJsonArray toJsonArray() const;
370     bool isValid(uint maxSize) const;
371 };
372 
373 class Value
374 {
375 public:
376     enum {
377         MaxSize = (1 << 27) - 1
378     };
379     union {
380         uint _dummy;
381         qle_bitfield<0, 3> type;
382         qle_bitfield<3, 1> latinOrIntValue;
383         qle_bitfield<4, 1> latinKey;
384         qle_bitfield<5, 27> value;
385         qle_signedbitfield<5, 27> int_value;
386     };
387 
data(const Base * b)388     inline const char *data(const Base *b) const
389     {
390         return reinterpret_cast<const char *>(b) + value;
391     }
392 
393     uint usedStorage(const Base *b) const;
394 
toBoolean()395     bool toBoolean() const
396     {
397         Q_ASSERT(type == QJsonValue::Bool);
398         return value != 0;
399     }
400 
toDouble(const Base * b)401     double toDouble(const Base *b) const
402     {
403         Q_ASSERT(type == QJsonValue::Double);
404         if (latinOrIntValue)
405             return int_value;
406 
407         auto i = qFromLittleEndian<quint64>(reinterpret_cast<const uchar *>(b) + value);
408         double d;
409         memcpy(&d, &i, sizeof(double));
410         return d;
411     }
412 
toString(const Base * b)413     QString toString(const Base *b) const
414     {
415         return latinOrIntValue
416                 ? asLatin1String(b).toString()
417                 : asString(b).toString();
418     }
419 
asString(const Base * b)420     String asString(const Base *b) const
421     {
422         Q_ASSERT(type == QJsonValue::String && !latinOrIntValue);
423         return String(data(b));
424     }
425 
asLatin1String(const Base * b)426     Latin1String asLatin1String(const Base *b) const
427     {
428         Q_ASSERT(type == QJsonValue::String && latinOrIntValue);
429         return Latin1String(data(b));
430     }
431 
base(const Base * b)432     const Base *base(const Base *b) const
433     {
434         Q_ASSERT(type == QJsonValue::Array || type == QJsonValue::Object);
435         return reinterpret_cast<const Base *>(data(b));
436     }
437 
438     QJsonValue toJsonValue(const Base *b) const;
439     bool isValid(const Base *b) const;
440 
441     static uint requiredStorage(const QBinaryJsonValue &v, bool *compressed);
442     static uint valueToStore(const QBinaryJsonValue &v, uint offset);
443     static void copyData(const QBinaryJsonValue &v, char *dest, bool compressed);
444 };
445 
446 class Entry {
447 public:
448     Value value;
449     // key
450     // value data follows key
451 
size()452     uint size() const
453     {
454         uint s = sizeof(Entry);
455         if (value.latinKey)
456             s += shallowLatin1Key().byteSize();
457         else
458             s += shallowKey().byteSize();
459         return alignedSize(s);
460     }
461 
usedStorage(Base * b)462     uint usedStorage(Base *b) const
463     {
464         return size() + value.usedStorage(b);
465     }
466 
shallowKey()467     String shallowKey() const
468     {
469         Q_ASSERT(!value.latinKey);
470         return String(reinterpret_cast<const char *>(this) + sizeof(Entry));
471     }
472 
shallowLatin1Key()473     Latin1String shallowLatin1Key() const
474     {
475         Q_ASSERT(value.latinKey);
476         return Latin1String(reinterpret_cast<const char *>(this) + sizeof(Entry));
477     }
478 
key()479     QString key() const
480     {
481         return value.latinKey
482                 ? shallowLatin1Key().toString()
483                 : shallowKey().toString();
484     }
485 
isValid(uint maxSize)486     bool isValid(uint maxSize) const
487     {
488         if (maxSize < sizeof(Entry))
489             return false;
490         maxSize -= sizeof(Entry);
491         return value.latinKey
492                 ? shallowLatin1Key().isValid(maxSize)
493                 : shallowKey().isValid(maxSize);
494     }
495 
496     bool operator ==(QStringView key) const
497     {
498         return value.latinKey
499                 ? (shallowLatin1Key().toQLatin1String() == key)
500                 : (shallowKey() == key);
501     }
502 
503     bool operator >=(QStringView key) const
504     {
505         return value.latinKey
506                 ? (shallowLatin1Key().toQLatin1String() >= key)
507                 : (shallowKey().toString() >= key);
508     }
509 };
510 
511 class Header {
512 public:
513     qle_uint tag; // 'qbjs'
514     qle_uint version; // 1
root()515     Base *root() { return reinterpret_cast<Base *>(this + 1); }
root()516     const Base *root() const { return reinterpret_cast<const Base *>(this + 1); }
517 };
518 
519 class ConstData
520 {
Q_DISABLE_COPY_MOVE(ConstData)521     Q_DISABLE_COPY_MOVE(ConstData)
522 public:
523     const uint alloc;
524     union {
525         const char *rawData;
526         const Header *header;
527     };
528 
ConstData(const char * raw,uint a)529     ConstData(const char *raw, uint a) : alloc(a), rawData(raw) {}
530     bool isValid() const;
531     QJsonDocument toJsonDocument() const;
532 };
533 
534 class MutableData
535 {
536     Q_DISABLE_COPY_MOVE(MutableData)
537 public:
538     QAtomicInt ref;
539     uint alloc;
540     union {
541         char *rawData;
542         Header *header;
543     };
544     uint compactionCounter : 31;
545 
MutableData(char * raw,uint a)546     MutableData(char *raw, uint a)
547         : alloc(a), rawData(raw), compactionCounter(0)
548     {
549     }
550 
MutableData(uint reserved,QJsonValue::Type valueType)551     MutableData(uint reserved, QJsonValue::Type valueType)
552         : rawData(nullptr), compactionCounter(0)
553     {
554         Q_ASSERT(valueType == QJsonValue::Array || valueType == QJsonValue::Object);
555 
556         alloc = sizeof(Header) + sizeof(Base) + reserved + sizeof(offset);
557         header = reinterpret_cast<Header *>(malloc(alloc));
558         Q_CHECK_PTR(header);
559         header->tag = QJsonDocument::BinaryFormatTag;
560         header->version = 1;
561         Base *b = header->root();
562         b->size = sizeof(Base);
563         b->is_object = (valueType == QJsonValue::Object);
564         b->tableOffset = sizeof(Base);
565         b->length = 0;
566     }
567 
~MutableData()568     ~MutableData()
569     {
570         free(rawData);
571     }
572 
573     MutableData *clone(const Base *b, uint reserve = 0)
574     {
575         uint size = sizeof(Header) + b->size;
576         if (b == header->root() && ref.loadRelaxed() == 1 && alloc >= size + reserve)
577             return this;
578 
579         if (reserve) {
580             if (reserve < 128)
581                 reserve = 128;
582             size = qMax(size + reserve, qMin(size *2, uint(Value::MaxSize)));
583             if (size > Value::MaxSize) {
584                 qWarning("QJson: Document too large to store in data structure");
585                 return nullptr;
586             }
587         }
588         char *raw = reinterpret_cast<char *>(malloc(size));
589         Q_CHECK_PTR(raw);
590         memcpy(raw + sizeof(Header), b, b->size);
591         auto *h = reinterpret_cast<Header *>(raw);
592         h->tag = QJsonDocument::BinaryFormatTag;
593         h->version = 1;
594         auto *d = new MutableData(raw, size);
595         d->compactionCounter = (b == header->root()) ? compactionCounter : 0;
596         return d;
597     }
598 
takeRawData(uint * size)599     char *takeRawData(uint *size)
600     {
601         *size = alloc;
602         char *result = rawData;
603         rawData = nullptr;
604         alloc = 0;
605         return result;
606     }
607 
608     void compact();
609 };
610 
611 } // namespace QBinaryJsonPrivate
612 
613 Q_DECLARE_TYPEINFO(QBinaryJsonPrivate::Value, Q_PRIMITIVE_TYPE);
614 
615 QT_END_NAMESPACE
616 
617 #endif // QBINARYJSON_P_H
618