1 /* packed_data.c : implement the packed binary stream data structure
2  *
3  * ====================================================================
4  *    Licensed to the Apache Software Foundation (ASF) under one
5  *    or more contributor license agreements.  See the NOTICE file
6  *    distributed with this work for additional information
7  *    regarding copyright ownership.  The ASF licenses this file
8  *    to you under the Apache License, Version 2.0 (the
9  *    "License"); you may not use this file except in compliance
10  *    with the License.  You may obtain a copy of the License at
11  *
12  *      http://www.apache.org/licenses/LICENSE-2.0
13  *
14  *    Unless required by applicable law or agreed to in writing,
15  *    software distributed under the License is distributed on an
16  *    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17  *    KIND, either express or implied.  See the License for the
18  *    specific language governing permissions and limitations
19  *    under the License.
20  * ====================================================================
21  */
22 
23 #include <apr_tables.h>
24 
25 #include "svn_string.h"
26 #include "svn_sorts.h"
27 #include "private/svn_string_private.h"
28 #include "private/svn_subr_private.h"
29 #include "private/svn_delta_private.h"
30 #include "private/svn_packed_data.h"
31 
32 #include "svn_private_config.h"
33 
34 
35 
36 /* Private int stream data referenced by svn_packed__int_stream_t.
37  */
38 typedef struct packed_int_private_t
39 {
40   /* First sub-stream, if any.  NULL otherwise. */
41   svn_packed__int_stream_t *first_substream;
42 
43   /* Last sub-stream, if any.  NULL otherwise. */
44   svn_packed__int_stream_t *last_substream;
45 
46   /* Current sub-stream to read from / write to, if any.  NULL otherwise.
47      This will be initialized to FIRST_SUBSTREAM and then advanced in a
48      round-robin scheme after each number being read. */
49   svn_packed__int_stream_t *current_substream;
50 
51   /* Number of sub-streams. */
52   apr_size_t substream_count;
53 
54   /* Next (sibling) integer stream.  If this is the last one, points to
55      the first in the list (i.e. this forms a ring list).  Never NULL. */
56   svn_packed__int_stream_t *next;
57 
58   /* 7b/8b encoded integer values (previously diff'ed and sign-handled,
59      if indicated by the flags below).  The contents are disjoint from
60      the unparsed number buffer.  May be NULL while not written to. */
61   svn_stringbuf_t *packed;
62 
63   /* Initialized to 0.  Latest value written to / read from PACKED.
64      Undefined if DIFF is FALSE. */
65   apr_uint64_t last_value;
66 
67   /* Deltify data before storing it in PACKED. */
68   svn_boolean_t diff;
69 
70   /* Numbers are likely to contain negative values with small absolutes.
71      If TRUE, store the signed bit in LSB before encoding. */
72   svn_boolean_t is_signed;
73 
74   /* Number of integers in this stream. */
75   apr_size_t item_count;
76 
77   /* TRUE for the last stream in a list of siblings. */
78   svn_boolean_t is_last;
79 
80   /* Pool to use for allocations. */
81   apr_pool_t *pool;
82 } packed_int_private_t;
83 
84 /* A byte sequence stream.  Please note that NEXT is defined different
85  * from the NEXT member in integer streams.
86  */
87 struct svn_packed__byte_stream_t
88 {
89   /* First sub-stream, if any.  NULL otherwise. */
90   svn_packed__byte_stream_t *first_substream;
91 
92   /* Last sub-stream, if any.  NULL otherwise. */
93   svn_packed__byte_stream_t *last_substream;
94 
95   /* Next (sibling) byte sequence stream, if any.  NULL otherwise. */
96   svn_packed__byte_stream_t *next;
97 
98   /* Stream to store the sequence lengths. */
99   svn_packed__int_stream_t *lengths_stream;
100 
101   /* It's index (relative to its parent). */
102   apr_size_t lengths_stream_index;
103 
104   /* Concatenated byte sequences. */
105   svn_stringbuf_t *packed;
106 
107   /* Pool to use for allocations. */
108   apr_pool_t *pool;
109 };
110 
111 /* The serialization root object.  It references the top-level streams.
112  */
113 struct svn_packed__data_root_t
114 {
115   /* First top-level integer stream, if any.  NULL otherwise. */
116   svn_packed__int_stream_t *first_int_stream;
117 
118   /* Last top-level integer stream, if any.  NULL otherwise. */
119   svn_packed__int_stream_t *last_int_stream;
120 
121   /* Number of top-level integer streams. */
122   apr_size_t int_stream_count;
123 
124   /* First top-level byte sequence stream, if any.  NULL otherwise. */
125   svn_packed__byte_stream_t *first_byte_stream;
126 
127   /* Last top-level byte sequence stream, if any.  NULL otherwise. */
128   svn_packed__byte_stream_t *last_byte_stream;
129 
130   /* Number of top-level byte sequence streams. */
131   apr_size_t byte_stream_count;
132 
133   /* Pool to use for allocations. */
134   apr_pool_t *pool;
135 };
136 
137 /* Write access. */
138 
139 svn_packed__data_root_t *
svn_packed__data_create_root(apr_pool_t * pool)140 svn_packed__data_create_root(apr_pool_t *pool)
141 {
142   svn_packed__data_root_t *root = apr_pcalloc(pool, sizeof(*root));
143   root->pool = pool;
144 
145   return root;
146 }
147 
148 svn_packed__int_stream_t *
svn_packed__create_int_stream(svn_packed__data_root_t * root,svn_boolean_t diff,svn_boolean_t signed_ints)149 svn_packed__create_int_stream(svn_packed__data_root_t *root,
150                               svn_boolean_t diff,
151                               svn_boolean_t signed_ints)
152 {
153   /* allocate and initialize the stream node */
154   packed_int_private_t *private_data
155     = apr_pcalloc(root->pool, sizeof(*private_data));
156   svn_packed__int_stream_t *stream
157     = apr_palloc(root->pool, sizeof(*stream));
158 
159   private_data->diff = diff;
160   private_data->is_signed = signed_ints;
161   private_data->is_last = TRUE;
162   private_data->pool = root->pool;
163 
164   stream->buffer_used = 0;
165   stream->private_data = private_data;
166 
167   /* maintain the ring list */
168   if (root->last_int_stream)
169     {
170       packed_int_private_t *previous_private_data
171         = root->last_int_stream->private_data;
172       previous_private_data->next = stream;
173       previous_private_data->is_last = FALSE;
174     }
175   else
176     {
177       root->first_int_stream = stream;
178     }
179 
180   root->last_int_stream = stream;
181   root->int_stream_count++;
182 
183   return stream;
184 }
185 
186 svn_packed__int_stream_t *
svn_packed__create_int_substream(svn_packed__int_stream_t * parent,svn_boolean_t diff,svn_boolean_t signed_ints)187 svn_packed__create_int_substream(svn_packed__int_stream_t *parent,
188                                  svn_boolean_t diff,
189                                  svn_boolean_t signed_ints)
190 {
191   packed_int_private_t *parent_private = parent->private_data;
192 
193   /* allocate and initialize the stream node */
194   packed_int_private_t *private_data
195     = apr_pcalloc(parent_private->pool, sizeof(*private_data));
196   svn_packed__int_stream_t *stream
197     = apr_palloc(parent_private->pool, sizeof(*stream));
198 
199   private_data->diff = diff;
200   private_data->is_signed = signed_ints;
201   private_data->is_last = TRUE;
202   private_data->pool = parent_private->pool;
203 
204   stream->buffer_used = 0;
205   stream->private_data = private_data;
206 
207   /* maintain the ring list */
208   if (parent_private->last_substream)
209     {
210       packed_int_private_t *previous_private_data
211         = parent_private->last_substream->private_data;
212       previous_private_data->next = stream;
213       previous_private_data->is_last = FALSE;
214     }
215   else
216     {
217       parent_private->first_substream = stream;
218       parent_private->current_substream = stream;
219     }
220 
221   parent_private->last_substream = stream;
222   parent_private->substream_count++;
223   private_data->next = parent_private->first_substream;
224 
225   return stream;
226 }
227 
228 /* Returns a new top-level byte sequence stream for ROOT but does not
229  * initialize the LENGTH_STREAM member.
230  */
231 static svn_packed__byte_stream_t *
create_bytes_stream_body(svn_packed__data_root_t * root)232 create_bytes_stream_body(svn_packed__data_root_t *root)
233 {
234   svn_packed__byte_stream_t *stream
235     = apr_pcalloc(root->pool, sizeof(*stream));
236 
237   stream->packed = svn_stringbuf_create_empty(root->pool);
238 
239   if (root->last_byte_stream)
240     root->last_byte_stream->next = stream;
241   else
242     root->first_byte_stream = stream;
243 
244   root->last_byte_stream = stream;
245   root->byte_stream_count++;
246 
247   return stream;
248 }
249 
250 svn_packed__byte_stream_t *
svn_packed__create_bytes_stream(svn_packed__data_root_t * root)251 svn_packed__create_bytes_stream(svn_packed__data_root_t *root)
252 {
253   svn_packed__byte_stream_t *stream
254     = create_bytes_stream_body(root);
255 
256   stream->lengths_stream_index = root->int_stream_count;
257   stream->lengths_stream = svn_packed__create_int_stream(root, FALSE, FALSE);
258 
259   return stream;
260 }
261 
262 /* Write the 7b/8b representation of VALUE into BUFFER.  BUFFER must
263  * provide at least 10 bytes space.
264  * Returns the first position behind the written data.
265  */
266 static unsigned char *
write_packed_uint_body(unsigned char * buffer,apr_uint64_t value)267 write_packed_uint_body(unsigned char *buffer, apr_uint64_t value)
268 {
269   while (value >= 0x80)
270     {
271       *(buffer++) = (unsigned char)((value % 0x80) + 0x80);
272       value /= 0x80;
273     }
274 
275   *(buffer++) = (unsigned char)value;
276   return buffer;
277 }
278 
279 /* Return remapped VALUE.
280  *
281  * Due to sign conversion and diff underflow, values close to UINT64_MAX
282  * are almost as frequent as those close to 0.  Remap them such that the
283  * MSB is stored in the LSB and the remainder stores the absolute distance
284  * to 0.
285  *
286  * This minimizes the absolute value to store in many scenarios.
287  * Hence, the variable-length representation on disk is shorter, too.
288  */
289 static apr_uint64_t
remap_uint(apr_uint64_t value)290 remap_uint(apr_uint64_t value)
291 {
292   return value & APR_UINT64_C(0x8000000000000000)
293        ? APR_UINT64_MAX - (2 * value)
294        : 2 * value;
295 }
296 
297 /* Invert remap_uint. */
298 static apr_uint64_t
unmap_uint(apr_uint64_t value)299 unmap_uint(apr_uint64_t value)
300 {
301   return value & 1
302        ? (APR_UINT64_MAX - value / 2)
303        : value / 2;
304 }
305 
306 /* Empty the unprocessed integer buffer in STREAM by either pushing the
307  * data to the sub-streams or writing to the packed data (in case there
308  * are no sub-streams).
309  */
310 static void
data_flush_buffer(svn_packed__int_stream_t * stream)311 data_flush_buffer(svn_packed__int_stream_t *stream)
312 {
313   packed_int_private_t *private_data = stream->private_data;
314   apr_size_t i;
315 
316   /* if we have sub-streams, push the data down to them */
317   if (private_data->current_substream)
318     for (i = 0; i < stream->buffer_used; ++i)
319       {
320         packed_int_private_t *current_private_data
321           = private_data->current_substream->private_data;
322 
323         svn_packed__add_uint(private_data->current_substream,
324                              stream->buffer[i]);
325         private_data->current_substream = current_private_data->next;
326       }
327   else
328     {
329       /* pack the numbers into our local PACKED buffer */
330 
331       /* temporary buffer, max 10 bytes required per 7b/8b encoded number */
332       unsigned char local_buffer[10 * SVN__PACKED_DATA_BUFFER_SIZE];
333       unsigned char *p = local_buffer;
334 
335       /* if configured, deltify numbers before packing them.
336          Since delta may be negative, always use the 'signed' encoding. */
337       if (private_data->diff)
338         {
339           apr_uint64_t last_value = private_data->last_value;
340           for (i = 0; i < stream->buffer_used; ++i)
341             {
342               apr_uint64_t temp = stream->buffer[i];
343               stream->buffer[i] = remap_uint(temp - last_value);
344               last_value = temp;
345             }
346 
347           private_data->last_value = last_value;
348         }
349 
350       /* if configured and not already done by the deltification above,
351          transform to 'signed' encoding.  Store the sign in the LSB and
352          the absolute value (-1 for negative values) in the remaining
353          63 bits. */
354       if (!private_data->diff && private_data->is_signed)
355         for (i = 0; i < stream->buffer_used; ++i)
356           stream->buffer[i] = remap_uint(stream->buffer[i]);
357 
358       /* auto-create packed data buffer.  Give it some reasonable initial
359          size - just enough for a few tens of values. */
360       if (private_data->packed == NULL)
361         private_data->packed
362           = svn_stringbuf_create_ensure(256, private_data->pool);
363 
364       /* encode numbers into our temp buffer. */
365       for (i = 0; i < stream->buffer_used; ++i)
366         p = write_packed_uint_body(p, stream->buffer[i]);
367 
368       /* append them to the final packed data */
369       svn_stringbuf_appendbytes(private_data->packed,
370                                 (char *)local_buffer,
371                                 p - local_buffer);
372     }
373 
374   /* maintain counters */
375   private_data->item_count += stream->buffer_used;
376   stream->buffer_used = 0;
377 }
378 
379 void
svn_packed__add_uint(svn_packed__int_stream_t * stream,apr_uint64_t value)380 svn_packed__add_uint(svn_packed__int_stream_t *stream,
381                      apr_uint64_t value)
382 {
383   stream->buffer[stream->buffer_used] = value;
384   if (++stream->buffer_used == SVN__PACKED_DATA_BUFFER_SIZE)
385     data_flush_buffer(stream);
386 }
387 
388 void
svn_packed__add_int(svn_packed__int_stream_t * stream,apr_int64_t value)389 svn_packed__add_int(svn_packed__int_stream_t *stream,
390                     apr_int64_t value)
391 {
392   svn_packed__add_uint(stream, (apr_uint64_t)value);
393 }
394 
395 void
svn_packed__add_bytes(svn_packed__byte_stream_t * stream,const char * data,apr_size_t len)396 svn_packed__add_bytes(svn_packed__byte_stream_t *stream,
397                       const char *data,
398                       apr_size_t len)
399 {
400   svn_packed__add_uint(stream->lengths_stream, len);
401   svn_stringbuf_appendbytes(stream->packed, data, len);
402 }
403 
404 /* Append the 7b/8b encoded representation of VALUE to PACKED.
405  */
406 static void
write_packed_uint(svn_stringbuf_t * packed,apr_uint64_t value)407 write_packed_uint(svn_stringbuf_t* packed, apr_uint64_t value)
408 {
409   if (value < 0x80)
410     {
411       svn_stringbuf_appendbyte(packed, (char)value);
412     }
413   else
414     {
415       unsigned char buffer[10];
416       unsigned char *p = write_packed_uint_body(buffer, value);
417 
418       svn_stringbuf_appendbytes(packed, (char *)buffer, p - buffer);
419     }
420 }
421 
422 /* Recursively write the structure (config parameters, sub-streams, data
423  * sizes) of the STREAM and all its siblings to the TREE_STRUCT buffer.
424  */
425 static void
write_int_stream_structure(svn_stringbuf_t * tree_struct,svn_packed__int_stream_t * stream)426 write_int_stream_structure(svn_stringbuf_t* tree_struct,
427                            svn_packed__int_stream_t* stream)
428 {
429   while (stream)
430     {
431       /* store config parameters and number of sub-streams in 1 number */
432       packed_int_private_t *private_data = stream->private_data;
433       write_packed_uint(tree_struct, (private_data->substream_count << 2)
434                                    + (private_data->diff ? 1 : 0)
435                                    + (private_data->is_signed ? 2 : 0));
436 
437       /* store item count and length their of packed representation */
438       data_flush_buffer(stream);
439 
440       write_packed_uint(tree_struct, private_data->item_count);
441       write_packed_uint(tree_struct, private_data->packed
442                                    ? private_data->packed->len
443                                    : 0);
444 
445       /* append all sub-stream structures */
446       write_int_stream_structure(tree_struct, private_data->first_substream);
447 
448       /* continue with next sibling */
449       stream = private_data->is_last ? NULL : private_data->next;
450     }
451 }
452 
453 /* Recursively write the structure (sub-streams, data sizes) of the STREAM
454  * and all its siblings to the TREE_STRUCT buffer.
455  */
456 static void
write_byte_stream_structure(svn_stringbuf_t * tree_struct,svn_packed__byte_stream_t * stream)457 write_byte_stream_structure(svn_stringbuf_t* tree_struct,
458                             svn_packed__byte_stream_t* stream)
459 {
460   /* for this and all siblings */
461   for (; stream; stream = stream->next)
462     {
463       /* this stream's structure and size */
464       write_packed_uint(tree_struct, 0);
465       write_packed_uint(tree_struct, stream->lengths_stream_index);
466       write_packed_uint(tree_struct, stream->packed->len);
467 
468       /* followed by all its sub-streams */
469       write_byte_stream_structure(tree_struct, stream->first_substream);
470     }
471 }
472 
473 /* Write the 7b/8b encoded representation of VALUE to STREAM.
474  */
475 static svn_error_t *
write_stream_uint(svn_stream_t * stream,apr_uint64_t value)476 write_stream_uint(svn_stream_t *stream,
477                   apr_uint64_t value)
478 {
479   unsigned char buffer[10];
480   apr_size_t count = write_packed_uint_body(buffer, value) - buffer;
481 
482   SVN_ERR(svn_stream_write(stream, (char *)buffer, &count));
483 
484   return SVN_NO_ERROR;
485 }
486 
487 /* Return the total size of all packed data in STREAM, its siblings and
488  * all sub-streams.  To get an accurate value, flush all buffers prior to
489  * calling this function.
490  */
491 static apr_size_t
packed_int_stream_length(svn_packed__int_stream_t * stream)492 packed_int_stream_length(svn_packed__int_stream_t *stream)
493 {
494   packed_int_private_t *private_data = stream->private_data;
495   apr_size_t result = private_data->packed ? private_data->packed->len : 0;
496 
497   stream = private_data->first_substream;
498   while (stream)
499     {
500       private_data = stream->private_data;
501       result += packed_int_stream_length(stream);
502       stream = private_data->is_last ? NULL : private_data->next;
503     }
504 
505   return result;
506 }
507 
508 /* Return the total size of all byte sequences data in STREAM, its siblings
509  * and all sub-streams.
510  */
511 static apr_size_t
packed_byte_stream_length(svn_packed__byte_stream_t * stream)512 packed_byte_stream_length(svn_packed__byte_stream_t *stream)
513 {
514   apr_size_t result = stream->packed->len;
515 
516   for (stream = stream->first_substream; stream; stream = stream->next)
517     result += packed_byte_stream_length(stream);
518 
519   return result;
520 }
521 
522 /* Append all packed data in STREAM, its siblings and all sub-streams to
523  * COMBINED.
524  */
525 static void
append_int_stream(svn_packed__int_stream_t * stream,svn_stringbuf_t * combined)526 append_int_stream(svn_packed__int_stream_t *stream,
527                   svn_stringbuf_t *combined)
528 {
529   packed_int_private_t *private_data = stream->private_data;
530   if (private_data->packed)
531     svn_stringbuf_appendstr(combined, private_data->packed);
532 
533   stream = private_data->first_substream;
534   while (stream)
535     {
536       private_data = stream->private_data;
537       append_int_stream(stream, combined);
538       stream = private_data->is_last ? NULL : private_data->next;
539     }
540 }
541 
542 /* Append all byte sequences in STREAM, its siblings and all sub-streams
543  * to COMBINED.
544  */
545 static void
append_byte_stream(svn_packed__byte_stream_t * stream,svn_stringbuf_t * combined)546 append_byte_stream(svn_packed__byte_stream_t *stream,
547                    svn_stringbuf_t *combined)
548 {
549   svn_stringbuf_appendstr(combined, stream->packed);
550 
551   for (stream = stream->first_substream; stream; stream = stream->next)
552     append_byte_stream(stream, combined);
553 }
554 
555 /* Take the binary data in UNCOMPRESSED, zip it into COMPRESSED and write
556  * it to STREAM.  COMPRESSED simply acts as a re-usable memory buffer.
557  * Clear all buffers (COMPRESSED, UNCOMPRESSED) at the end of the function.
558  */
559 static svn_error_t *
write_stream_data(svn_stream_t * stream,svn_stringbuf_t * uncompressed,svn_stringbuf_t * compressed)560 write_stream_data(svn_stream_t *stream,
561                   svn_stringbuf_t *uncompressed,
562                   svn_stringbuf_t *compressed)
563 {
564   SVN_ERR(svn__compress_zlib(uncompressed->data, uncompressed->len,
565                              compressed,
566                              SVN_DELTA_COMPRESSION_LEVEL_DEFAULT));
567 
568   SVN_ERR(write_stream_uint(stream, compressed->len));
569   SVN_ERR(svn_stream_write(stream, compressed->data, &compressed->len));
570 
571   svn_stringbuf_setempty(uncompressed);
572   svn_stringbuf_setempty(compressed);
573 
574   return SVN_NO_ERROR;
575 }
576 
577 svn_error_t *
svn_packed__data_write(svn_stream_t * stream,svn_packed__data_root_t * root,apr_pool_t * scratch_pool)578 svn_packed__data_write(svn_stream_t *stream,
579                        svn_packed__data_root_t *root,
580                        apr_pool_t *scratch_pool)
581 {
582   svn_packed__int_stream_t *int_stream;
583   svn_packed__byte_stream_t *byte_stream;
584 
585   /* re-usable data buffers */
586   svn_stringbuf_t *compressed
587     = svn_stringbuf_create_ensure(1024, scratch_pool);
588   svn_stringbuf_t *uncompressed
589     = svn_stringbuf_create_ensure(1024, scratch_pool);
590 
591   /* write tree structure */
592   svn_stringbuf_t *tree_struct
593     = svn_stringbuf_create_ensure(127, scratch_pool);
594 
595   write_packed_uint(tree_struct, root->int_stream_count);
596   write_int_stream_structure(tree_struct, root->first_int_stream);
597 
598   write_packed_uint(tree_struct, root->byte_stream_count);
599   write_byte_stream_structure(tree_struct, root->first_byte_stream);
600 
601   SVN_ERR(write_stream_uint(stream, tree_struct->len));
602   SVN_ERR(svn_stream_write(stream, tree_struct->data, &tree_struct->len));
603 
604   /* flatten sub-streams, zip them and write them to disk */
605 
606   for (int_stream = root->first_int_stream;
607        int_stream;
608        int_stream = ((packed_int_private_t*)int_stream->private_data)->next)
609     {
610       apr_size_t len = packed_int_stream_length(int_stream);
611       svn_stringbuf_ensure(uncompressed, len);
612 
613       append_int_stream(int_stream, uncompressed);
614       SVN_ERR(write_stream_data(stream, uncompressed, compressed));
615     }
616 
617   for (byte_stream = root->first_byte_stream;
618        byte_stream;
619        byte_stream = byte_stream->next)
620     {
621       apr_size_t len = packed_byte_stream_length(byte_stream);
622       svn_stringbuf_ensure(uncompressed, len);
623 
624       append_byte_stream(byte_stream, uncompressed);
625       SVN_ERR(write_stream_data(stream, uncompressed, compressed));
626     }
627 
628   return SVN_NO_ERROR;
629 }
630 
631 
632 /* Read access. */
633 
634 svn_packed__int_stream_t *
svn_packed__first_int_stream(svn_packed__data_root_t * root)635 svn_packed__first_int_stream(svn_packed__data_root_t *root)
636 {
637   return root->first_int_stream;
638 }
639 
640 svn_packed__byte_stream_t *
svn_packed__first_byte_stream(svn_packed__data_root_t * root)641 svn_packed__first_byte_stream(svn_packed__data_root_t *root)
642 {
643   return root->first_byte_stream;
644 }
645 
646 svn_packed__int_stream_t *
svn_packed__next_int_stream(svn_packed__int_stream_t * stream)647 svn_packed__next_int_stream(svn_packed__int_stream_t *stream)
648 {
649   packed_int_private_t *private_data = stream->private_data;
650   return private_data->is_last ? NULL : private_data->next;
651 }
652 
653 svn_packed__byte_stream_t *
svn_packed__next_byte_stream(svn_packed__byte_stream_t * stream)654 svn_packed__next_byte_stream(svn_packed__byte_stream_t *stream)
655 {
656   return stream->next;
657 }
658 
659 svn_packed__int_stream_t *
svn_packed__first_int_substream(svn_packed__int_stream_t * stream)660 svn_packed__first_int_substream(svn_packed__int_stream_t *stream)
661 {
662   packed_int_private_t *private_data = stream->private_data;
663   return private_data->first_substream;
664 }
665 
666 apr_size_t
svn_packed__int_count(svn_packed__int_stream_t * stream)667 svn_packed__int_count(svn_packed__int_stream_t *stream)
668 {
669   packed_int_private_t *private_data = stream->private_data;
670   return private_data->item_count + stream->buffer_used;
671 }
672 
673 apr_size_t
svn_packed__byte_count(svn_packed__byte_stream_t * stream)674 svn_packed__byte_count(svn_packed__byte_stream_t *stream)
675 {
676   return stream->packed->len;
677 }
678 
679 apr_size_t
svn_packed__byte_block_count(svn_packed__byte_stream_t * stream)680 svn_packed__byte_block_count(svn_packed__byte_stream_t *stream)
681 {
682   return svn_packed__int_count(stream->lengths_stream);
683 }
684 
685 /* Read one 7b/8b encoded value from *P and return it in *RESULT.  Returns
686  * the first position after the parsed data.
687  *
688  * Overflows will be detected in the sense that it will end parsing the
689  * input but the result is undefined.
690  */
691 static unsigned char *
read_packed_uint_body(unsigned char * p,apr_uint64_t * result)692 read_packed_uint_body(unsigned char *p, apr_uint64_t *result)
693 {
694   if (*p < 0x80)
695     {
696       *result = *p;
697     }
698   else
699     {
700       apr_uint64_t shift = 0;
701       apr_uint64_t value = 0;
702       while (*p >= 0x80)
703         {
704           value += (apr_uint64_t)(*p & 0x7f) << shift;
705           ++p;
706 
707           shift += 7;
708           if (shift > 64)
709             {
710               /* a definite overflow.  Note, that numbers of 65 .. 70
711                  bits will not be detected as an overflow as they don't
712                  threaten to exceed the input buffer. */
713               *result = 0;
714               return p;
715             }
716         }
717 
718       *result = value + ((apr_uint64_t)*p << shift);
719     }
720 
721   return ++p;
722 }
723 
724 /* Read one 7b/8b encoded value from STREAM and return it in *RESULT.
725  *
726  * Overflows will be detected in the sense that it will end parsing the
727  * input but the result is undefined.
728  */
729 static svn_error_t *
read_stream_uint(svn_stream_t * stream,apr_uint64_t * result)730 read_stream_uint(svn_stream_t *stream, apr_uint64_t *result)
731 {
732   apr_uint64_t shift = 0;
733   apr_uint64_t value = 0;
734   unsigned char c;
735 
736   do
737     {
738       apr_size_t len = 1;
739       SVN_ERR(svn_stream_read_full(stream, (char *)&c, &len));
740       if (len != 1)
741         return svn_error_create(SVN_ERR_CORRUPT_PACKED_DATA, NULL,
742                                 _("Unexpected end of stream"));
743 
744       value += (apr_uint64_t)(c & 0x7f) << shift;
745       shift += 7;
746       if (shift > 64)
747         return svn_error_create(SVN_ERR_CORRUPT_PACKED_DATA, NULL,
748                                 _("Integer representation too long"));
749     }
750   while (c >= 0x80);
751 
752   *result = value;
753   return SVN_NO_ERROR;
754 }
755 
756 /* Extract and return the next integer from PACKED and make PACKED point
757  * to the next integer.
758  */
759 static apr_uint64_t
read_packed_uint(svn_stringbuf_t * packed)760 read_packed_uint(svn_stringbuf_t *packed)
761 {
762   apr_uint64_t result = 0;
763   unsigned char *p = (unsigned char *)packed->data;
764   apr_size_t read = read_packed_uint_body(p, &result) - p;
765 
766   if (read > packed->len)
767     read = packed->len;
768 
769   packed->data += read;
770   packed->blocksize -= read;
771   packed->len -= read;
772 
773   return result;
774 }
775 
776 /* Ensure that STREAM contains at least one item in its buffer.
777  */
778 static void
svn_packed__data_fill_buffer(svn_packed__int_stream_t * stream)779 svn_packed__data_fill_buffer(svn_packed__int_stream_t *stream)
780 {
781   packed_int_private_t *private_data = stream->private_data;
782   apr_size_t i;
783   apr_size_t end = MIN(SVN__PACKED_DATA_BUFFER_SIZE,
784                        private_data->item_count);
785 
786   /* in case, some user calls us explicitly without a good reason ... */
787   if (stream->buffer_used)
788     return;
789 
790   /* can we get data from the sub-streams or do we have to decode it from
791      our local packed container? */
792   if (private_data->current_substream)
793     for (i = end; i > 0; --i)
794       {
795         packed_int_private_t *current_private_data
796           = private_data->current_substream->private_data;
797         stream->buffer[i-1]
798           = svn_packed__get_uint(private_data->current_substream);
799         private_data->current_substream = current_private_data->next;
800       }
801   else
802     {
803       /* use this local buffer only if the packed data is shorter than this.
804          The goal is that read_packed_uint_body doesn't need check for
805          overflows. */
806       unsigned char local_buffer[10 * SVN__PACKED_DATA_BUFFER_SIZE];
807       unsigned char *p;
808       unsigned char *start;
809       apr_size_t packed_read;
810 
811       if (private_data->packed->len < sizeof(local_buffer))
812         {
813           apr_size_t trail = sizeof(local_buffer) - private_data->packed->len;
814           memcpy(local_buffer,
815                  private_data->packed->data,
816                  private_data->packed->len);
817           memset(local_buffer + private_data->packed->len, 0, MIN(trail, end));
818 
819           p = local_buffer;
820         }
821       else
822         p = (unsigned char *)private_data->packed->data;
823 
824       /* unpack numbers */
825       start = p;
826       for (i = end; i > 0; --i)
827         p = read_packed_uint_body(p, &stream->buffer[i-1]);
828 
829       /* adjust remaining packed data buffer */
830       packed_read = p - start;
831       private_data->packed->data += packed_read;
832       private_data->packed->len -= packed_read;
833       private_data->packed->blocksize -= packed_read;
834 
835       /* undeltify numbers, if configured */
836       if (private_data->diff)
837         {
838           apr_uint64_t last_value = private_data->last_value;
839           for (i = end; i > 0; --i)
840             {
841               last_value += unmap_uint(stream->buffer[i-1]);
842               stream->buffer[i-1] = last_value;
843             }
844 
845           private_data->last_value = last_value;
846         }
847 
848       /* handle signed values, if configured and not handled already */
849       if (!private_data->diff && private_data->is_signed)
850         for (i = 0; i < end; ++i)
851           stream->buffer[i] = unmap_uint(stream->buffer[i]);
852     }
853 
854   stream->buffer_used = end;
855   private_data->item_count -= end;
856 }
857 
858 apr_uint64_t
svn_packed__get_uint(svn_packed__int_stream_t * stream)859 svn_packed__get_uint(svn_packed__int_stream_t *stream)
860 {
861   if (stream->buffer_used == 0)
862     svn_packed__data_fill_buffer(stream);
863 
864   return stream->buffer_used ? stream->buffer[--stream->buffer_used] : 0;
865 }
866 
867 apr_int64_t
svn_packed__get_int(svn_packed__int_stream_t * stream)868 svn_packed__get_int(svn_packed__int_stream_t *stream)
869 {
870   return (apr_int64_t)svn_packed__get_uint(stream);
871 }
872 
873 const char *
svn_packed__get_bytes(svn_packed__byte_stream_t * stream,apr_size_t * len)874 svn_packed__get_bytes(svn_packed__byte_stream_t *stream,
875                       apr_size_t *len)
876 {
877   const char *result = stream->packed->data;
878   apr_size_t count = (apr_size_t)svn_packed__get_uint(stream->lengths_stream);
879 
880   if (count > stream->packed->len)
881     count = stream->packed->len;
882 
883   /* advance packed buffer */
884   stream->packed->data += count;
885   stream->packed->len -= count;
886   stream->packed->blocksize -= count;
887 
888   *len = count;
889   return result;
890 }
891 
892 /* Read the integer stream structure and recreate it in STREAM, including
893  * sub-streams, from TREE_STRUCT.
894  */
895 static void
read_int_stream_structure(svn_stringbuf_t * tree_struct,svn_packed__int_stream_t * stream)896 read_int_stream_structure(svn_stringbuf_t *tree_struct,
897                           svn_packed__int_stream_t *stream)
898 {
899   packed_int_private_t *private_data = stream->private_data;
900   apr_uint64_t value = read_packed_uint(tree_struct);
901   apr_size_t substream_count;
902   apr_size_t i;
903 
904   /* extract local parameters */
905   private_data->diff = (value & 1) != 0;
906   private_data->is_signed = (value & 2) != 0;
907   substream_count = (apr_size_t)(value >> 2);
908 
909   /* read item count & packed size; allocate packed data buffer */
910   private_data->item_count = (apr_size_t)read_packed_uint(tree_struct);
911   value = read_packed_uint(tree_struct);
912   if (value)
913     {
914       private_data->packed = svn_stringbuf_create_ensure((apr_size_t)value,
915                                                          private_data->pool);
916       private_data->packed->len = (apr_size_t)value;
917     }
918 
919   /* add sub-streams and read their config, too */
920   for (i = 0; i < substream_count; ++i)
921     read_int_stream_structure(tree_struct,
922                               svn_packed__create_int_substream(stream,
923                                                                FALSE,
924                                                                FALSE));
925 }
926 
927 /* Read the integer stream structure and recreate it in STREAM, including
928  * sub-streams, from TREE_STRUCT.  FIRST_INT_STREAM is the integer stream
929  * that would correspond to lengths_stream_index 0.
930  */
931 static void
read_byte_stream_structure(svn_stringbuf_t * tree_struct,svn_packed__byte_stream_t * stream,svn_packed__int_stream_t * first_int_stream)932 read_byte_stream_structure(svn_stringbuf_t *tree_struct,
933                            svn_packed__byte_stream_t *stream,
934                            svn_packed__int_stream_t *first_int_stream)
935 {
936   apr_size_t lengths_stream_index;
937   apr_size_t packed_size;
938   apr_size_t i;
939 
940   /* read parameters from the TREE_STRUCT buffer */
941   (void) (apr_size_t)read_packed_uint(tree_struct); /* discard first uint */
942   lengths_stream_index = (apr_size_t)read_packed_uint(tree_struct);
943   packed_size = (apr_size_t)read_packed_uint(tree_struct);
944 
945   /* allocate byte sequence buffer size */
946   svn_stringbuf_ensure(stream->packed, packed_size);
947   stream->packed->len = packed_size;
948 
949   /* navigate to the (already existing) lengths_stream */
950   stream->lengths_stream_index = lengths_stream_index;
951   stream->lengths_stream = first_int_stream;
952   for (i = 0; i < lengths_stream_index; ++i)
953     {
954       packed_int_private_t *length_private
955         = stream->lengths_stream->private_data;
956       stream->lengths_stream = length_private->next;
957     }
958 }
959 
960 /* Read a compressed block from STREAM and uncompress it into UNCOMPRESSED.
961  * UNCOMPRESSED_LEN is the expected size of the stream.  COMPRESSED is a
962  * re-used buffer for temporary data.
963  */
964 static svn_error_t *
read_stream_data(svn_stream_t * stream,apr_size_t uncompressed_len,svn_stringbuf_t * uncompressed,svn_stringbuf_t * compressed)965 read_stream_data(svn_stream_t *stream,
966                  apr_size_t uncompressed_len,
967                  svn_stringbuf_t *uncompressed,
968                  svn_stringbuf_t *compressed)
969 {
970   apr_uint64_t len;
971   apr_size_t compressed_len;
972 
973   SVN_ERR(read_stream_uint(stream, &len));
974   compressed_len = (apr_size_t)len;
975 
976   svn_stringbuf_ensure(compressed, compressed_len);
977   compressed->len = compressed_len;
978   SVN_ERR(svn_stream_read_full(stream, compressed->data, &compressed->len));
979   compressed->data[compressed_len] = '\0';
980 
981   SVN_ERR(svn__decompress_zlib(compressed->data, compressed->len,
982                                uncompressed, uncompressed_len));
983 
984   return SVN_NO_ERROR;
985 }
986 
987 /* Read the packed contents from COMBINED, starting at *OFFSET and store
988  * it in STREAM.  Update *OFFSET to point to the next stream's data and
989  * continue with the sub-streams.
990  */
991 static void
unflatten_int_stream(svn_packed__int_stream_t * stream,svn_stringbuf_t * combined,apr_size_t * offset)992 unflatten_int_stream(svn_packed__int_stream_t *stream,
993                      svn_stringbuf_t *combined,
994                      apr_size_t *offset)
995 {
996   packed_int_private_t *private_data = stream->private_data;
997   if (private_data->packed)
998     {
999       memcpy(private_data->packed->data,
1000              combined->data + *offset,
1001              private_data->packed->len);
1002 
1003       private_data->packed->data[private_data->packed->len] = '\0';
1004       *offset += private_data->packed->len;
1005     }
1006 
1007   stream = private_data->first_substream;
1008   while (stream)
1009     {
1010       private_data = stream->private_data;
1011       unflatten_int_stream(stream, combined, offset);
1012       stream = private_data->is_last ? NULL : private_data->next;
1013     }
1014 }
1015 
1016 /* Read the packed contents from COMBINED, starting at *OFFSET and store
1017  * it in STREAM.  Update *OFFSET to point to the next stream's data and
1018  * continue with the sub-streams.
1019  */
1020 static void
unflatten_byte_stream(svn_packed__byte_stream_t * stream,svn_stringbuf_t * combined,apr_size_t * offset)1021 unflatten_byte_stream(svn_packed__byte_stream_t *stream,
1022                       svn_stringbuf_t *combined,
1023                       apr_size_t *offset)
1024 {
1025   memcpy(stream->packed->data,
1026          combined->data + *offset,
1027          stream->packed->len);
1028   stream->packed->data[stream->packed->len] = '\0';
1029 
1030   *offset += stream->packed->len;
1031   for (stream = stream->first_substream; stream; stream = stream->next)
1032     unflatten_byte_stream(stream, combined, offset);
1033 }
1034 
1035 svn_error_t *
svn_packed__data_read(svn_packed__data_root_t ** root_p,svn_stream_t * stream,apr_pool_t * result_pool,apr_pool_t * scratch_pool)1036 svn_packed__data_read(svn_packed__data_root_t **root_p,
1037                       svn_stream_t *stream,
1038                       apr_pool_t *result_pool,
1039                       apr_pool_t *scratch_pool)
1040 {
1041   apr_uint64_t i;
1042   apr_uint64_t count;
1043 
1044   svn_packed__int_stream_t *int_stream;
1045   svn_packed__byte_stream_t *byte_stream;
1046   svn_packed__data_root_t *root = svn_packed__data_create_root(result_pool);
1047 
1048   svn_stringbuf_t *compressed
1049     = svn_stringbuf_create_ensure(1024, scratch_pool);
1050   svn_stringbuf_t *uncompressed
1051     = svn_stringbuf_create_ensure(1024, scratch_pool);
1052 
1053   /* read tree structure */
1054 
1055   apr_uint64_t tree_struct_size;
1056   svn_stringbuf_t *tree_struct;
1057 
1058   SVN_ERR(read_stream_uint(stream, &tree_struct_size));
1059   tree_struct
1060     = svn_stringbuf_create_ensure((apr_size_t)tree_struct_size, scratch_pool);
1061   tree_struct->len = (apr_size_t)tree_struct_size;
1062 
1063   SVN_ERR(svn_stream_read_full(stream, tree_struct->data, &tree_struct->len));
1064   tree_struct->data[tree_struct->len] = '\0';
1065 
1066   /* reconstruct tree structure */
1067 
1068   count = read_packed_uint(tree_struct);
1069   for (i = 0; i < count; ++i)
1070     read_int_stream_structure(tree_struct,
1071                               svn_packed__create_int_stream(root, FALSE,
1072                                                                  FALSE));
1073 
1074   count = read_packed_uint(tree_struct);
1075   for (i = 0; i < count; ++i)
1076     read_byte_stream_structure(tree_struct,
1077                                create_bytes_stream_body(root),
1078                                root->first_int_stream);
1079 
1080   /* read sub-stream data from disk, unzip it and buffer it */
1081 
1082   for (int_stream = root->first_int_stream;
1083        int_stream;
1084        int_stream = ((packed_int_private_t*)int_stream->private_data)->next)
1085     {
1086       apr_size_t offset = 0;
1087       SVN_ERR(read_stream_data(stream,
1088                                packed_int_stream_length(int_stream),
1089                                uncompressed, compressed));
1090       unflatten_int_stream(int_stream, uncompressed, &offset);
1091     }
1092 
1093   for (byte_stream = root->first_byte_stream;
1094        byte_stream;
1095        byte_stream = byte_stream->next)
1096     {
1097       apr_size_t offset = 0;
1098       SVN_ERR(read_stream_data(stream,
1099                                packed_byte_stream_length(byte_stream),
1100                                uncompressed, compressed));
1101       unflatten_byte_stream(byte_stream, uncompressed, &offset);
1102     }
1103 
1104   *root_p = root;
1105   return SVN_NO_ERROR;
1106 }
1107