1 /* packed_data.h : Interface to the packed binary stream data structure
2  *
3  * ====================================================================
4  *    Licensed to the Apache Software Foundation (ASF) under one
5  *    or more contributor license agreements.  See the NOTICE file
6  *    distributed with this work for additional information
7  *    regarding copyright ownership.  The ASF licenses this file
8  *    to you under the Apache License, Version 2.0 (the
9  *    "License"); you may not use this file except in compliance
10  *    with the License.  You may obtain a copy of the License at
11  *
12  *      http://www.apache.org/licenses/LICENSE-2.0
13  *
14  *    Unless required by applicable law or agreed to in writing,
15  *    software distributed under the License is distributed on an
16  *    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17  *    KIND, either express or implied.  See the License for the
18  *    specific language governing permissions and limitations
19  *    under the License.
20  * ====================================================================
21  */
22 
23 #ifndef SVN_PACKED_DATA_H
24 #define SVN_PACKED_DATA_H
25 
26 #include "svn_string.h"
27 #include "svn_io.h"
28 
29 #ifdef __cplusplus
30 extern "C" {
31 #endif /* __cplusplus */
32 
33 /* This API provides Yet Another Serialization Framework.
34  *
35  * It is geared towards efficiently encoding collections of structured
36  * binary data (e.g. an array of noderev objects).  The basic idea is to
37  * transform them into hierarchies of streams with each stream usually
38  * corresponding to a single attribute in the original data structures.
39  * The user is free model the mapping structure <-> streams mapping as she
40  * sees fit.
41  *
42  * With all data inside the same (sub-)stream carrying similar attribute
43  * values, the whole stream lends itself to data compression.  Strings /
44  * plain byte sequences will be stored as is.  Numbers use a 7b/8b encoding
45  * scheme to eliminate leading zeros.  Because values are often dependent
46  * (increasing offsets, roughly similar revision number, etc.), streams
47  * can be configured as storing (hopefully shorter) deltas instead of the
48  * original value.
49  *
50  * Two stream types are provided: integer and byte streams.  While the
51  * first store 64 bit integers only and can be configured to assume
52  * signed and / or deltifyable data, the second will store arbitrary
53  * byte sequences including their length.  At the root level, you may
54  * create an arbitrary number of integer and byte streams.  Any stream
55  * may have an arbitrary number of sub-streams of the same kind.  You
56  * should create the full stream hierarchy before writing any data to it.
57  *
58  * As a convenience, when an integer stream has sub-streams, you may write
59  * to the parent stream instead of all sub-streams individually and the
60  * values will be passed down automatically in a round-robin fashion.
61  * Reading from the parent stream is similarly supported.
62  *
63  * When all data has been added to the stream, it can be written to an
64  * ordinary svn_stream_t.  First, we write a description of the stream
65  * structure (types, sub-streams, sizes and configurations) followed by
66  * zlib compressed stream content.  For each top-level stream, all sub-
67  * stream data will be concatenated and then compressed as a single block.
68  * To maximize the effect of this, make sure all data in that stream
69  * hierarchy has a similar value distribution.
70  *
71  * Reading data starts with an svn_stream_t and automatically recreates
72  * the stream hierarchies.  You only need to extract data from it in the
73  * same order as you wrote it.
74  *
75  * Although not enforced programmatically, you may either only write to a
76  * stream hierarchy or only read from it but you cannot do both on the
77  * same data structure.
78  */
79 
80 
81 
82 /* We pack / unpack integers en block to minimize calling and setup overhead.
83  * This is the number of integers we put into a buffer before writing them
84  * them to / after reading them from the 7b/8b stream.  Under 64 bits, this
85  * value creates a 128 byte data structure (14 + 2 integers, 8 bytes each).
86  */
87 #define SVN__PACKED_DATA_BUFFER_SIZE 14
88 
89 
90 /* Data types. */
91 
92 /* Opaque type for the root object.
93  */
94 typedef struct svn_packed__data_root_t svn_packed__data_root_t;
95 
96 /* Opaque type for byte streams.
97  */
98 typedef struct svn_packed__byte_stream_t svn_packed__byte_stream_t;
99 
100 /* Semi-opaque type for integer streams.  We expose the unpacked buffer
101  * to allow for replacing svn_packed__add_uint and friends by macros.
102  */
103 typedef struct svn_packed__int_stream_t
104 {
105   /* pointer to the remainder of the data structure */
106   void *private_data;
107 
108   /* number of value entries in BUFFER */
109   apr_size_t buffer_used;
110 
111   /* unpacked integers (either yet to be packed or pre-fetched from the
112    * packed buffers).  Only the first BUFFER_USED entries are valid. */
113   apr_uint64_t buffer[SVN__PACKED_DATA_BUFFER_SIZE];
114 } svn_packed__int_stream_t;
115 
116 
117 /* Writing data. */
118 
119 /* Return a new serialization root object, allocated in POOL.
120  */
121 svn_packed__data_root_t *
122 svn_packed__data_create_root(apr_pool_t *pool);
123 
124 /* Create and return a new top-level integer stream in ROOT.  If signed,
125  * negative numbers will be put into that stream, SIGNED_INTS should be
126  * TRUE as a more efficient encoding will be used in that case.  Set
127  * DIFF to TRUE if you expect the difference between consecutive numbers
128  * to be much smaller (~100 times) than the actual numbers.
129  */
130 svn_packed__int_stream_t *
131 svn_packed__create_int_stream(svn_packed__data_root_t *root,
132                               svn_boolean_t diff,
133                               svn_boolean_t signed_ints);
134 
135 /* Create and return a sub-stream to the existing integer stream PARENT.
136  * If signed, negative numbers will be put into that stream, SIGNED_INTS
137  * should be TRUE as a more efficient encoding will be used in that case.
138  * Set DIFF to TRUE if you expect the difference between consecutive numbers
139  * to be much smaller (~100 times) than the actual numbers.
140  */
141 svn_packed__int_stream_t *
142 svn_packed__create_int_substream(svn_packed__int_stream_t *parent,
143                                  svn_boolean_t diff,
144                                  svn_boolean_t signed_ints);
145 
146 /* Create and return a new top-level byte sequence stream in ROOT.
147  */
148 svn_packed__byte_stream_t *
149 svn_packed__create_bytes_stream(svn_packed__data_root_t *root);
150 
151 /* Write the unsigned integer VALUE to STEAM.
152  */
153 void
154 svn_packed__add_uint(svn_packed__int_stream_t *stream,
155                      apr_uint64_t value);
156 
157 /* Write the signed integer VALUE to STEAM.
158  */
159 void
160 svn_packed__add_int(svn_packed__int_stream_t *stream,
161                     apr_int64_t value);
162 
163 /* Write the sequence stating at DATA containing LEN bytes to STEAM.
164  */
165 void
166 svn_packed__add_bytes(svn_packed__byte_stream_t *stream,
167                       const char *data,
168                       apr_size_t len);
169 
170 /* Write all contents of ROOT (including all sub-streams) to STREAM.
171  * Use SCRATCH_POOL for temporary allocations.
172  */
173 svn_error_t *
174 svn_packed__data_write(svn_stream_t *stream,
175                        svn_packed__data_root_t *root,
176                        apr_pool_t *scratch_pool);
177 
178 
179 /* Reading data. */
180 
181 /* Return the first integer stream in ROOT.  Returns NULL in case there
182  * aren't any.
183  */
184 svn_packed__int_stream_t *
185 svn_packed__first_int_stream(svn_packed__data_root_t *root);
186 
187 /* Return the first byte sequence stream in ROOT.  Returns NULL in case
188  * there aren't any.
189  */
190 svn_packed__byte_stream_t *
191 svn_packed__first_byte_stream(svn_packed__data_root_t *root);
192 
193 /* Return the next (sibling) integer stream to STREAM.  Returns NULL in
194  * case there isn't any.
195  */
196 svn_packed__int_stream_t *
197 svn_packed__next_int_stream(svn_packed__int_stream_t *stream);
198 
199 /* Return the next (sibling) byte sequence stream to STREAM.  Returns NULL
200  * in case there isn't any.
201  */
202 svn_packed__byte_stream_t *
203 svn_packed__next_byte_stream(svn_packed__byte_stream_t *stream);
204 
205 /* Return the first sub-stream of STREAM.  Returns NULL in case there
206  * isn't any.
207  */
208 svn_packed__int_stream_t *
209 svn_packed__first_int_substream(svn_packed__int_stream_t *stream);
210 
211 /* Return the number of integers left to read from STREAM.
212  */
213 apr_size_t
214 svn_packed__int_count(svn_packed__int_stream_t *stream);
215 
216 /* Return the number of bytes left to read from STREAM.
217  */
218 apr_size_t
219 svn_packed__byte_count(svn_packed__byte_stream_t *stream);
220 
221 /* Return the number of entries left to read from STREAM.
222  */
223 apr_size_t
224 svn_packed__byte_block_count(svn_packed__byte_stream_t *stream);
225 
226 /* Return the next number from STREAM as unsigned integer.  Returns 0 when
227  * reading beyond the end of the stream.
228  */
229 apr_uint64_t
230 svn_packed__get_uint(svn_packed__int_stream_t *stream);
231 
232 /* Return the next number from STREAM as signed integer.  Returns 0 when
233  * reading beyond the end of the stream.
234  */
235 apr_int64_t
236 svn_packed__get_int(svn_packed__int_stream_t *stream);
237 
238 /* Return the next byte sequence from STREAM and set *LEN to the length
239  * of that sequence.  Sets *LEN to 0 when reading beyond the end of the
240  * stream.
241  */
242 const char *
243 svn_packed__get_bytes(svn_packed__byte_stream_t *stream,
244                       apr_size_t *len);
245 
246 /* Allocate a new packed data root in RESULT_POOL, read its structure and
247  * stream contents from STREAM and return it in *ROOT_P.  Use SCRATCH_POOL
248  * for temporary allocations.
249  */
250 svn_error_t *
251 svn_packed__data_read(svn_packed__data_root_t **root_p,
252                       svn_stream_t *stream,
253                       apr_pool_t *result_pool,
254                       apr_pool_t *scratch_pool);
255 
256 #ifdef __cplusplus
257 }
258 #endif /* __cplusplus */
259 
260 #endif /* SVN_PACKED_DATA_H */
261