1 /* packed_data.h : Interface to the packed binary stream data structure 2 * 3 * ==================================================================== 4 * Licensed to the Apache Software Foundation (ASF) under one 5 * or more contributor license agreements. See the NOTICE file 6 * distributed with this work for additional information 7 * regarding copyright ownership. The ASF licenses this file 8 * to you under the Apache License, Version 2.0 (the 9 * "License"); you may not use this file except in compliance 10 * with the License. You may obtain a copy of the License at 11 * 12 * http://www.apache.org/licenses/LICENSE-2.0 13 * 14 * Unless required by applicable law or agreed to in writing, 15 * software distributed under the License is distributed on an 16 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 17 * KIND, either express or implied. See the License for the 18 * specific language governing permissions and limitations 19 * under the License. 20 * ==================================================================== 21 */ 22 23 #ifndef SVN_PACKED_DATA_H 24 #define SVN_PACKED_DATA_H 25 26 #include "svn_string.h" 27 #include "svn_io.h" 28 29 #ifdef __cplusplus 30 extern "C" { 31 #endif /* __cplusplus */ 32 33 /* This API provides Yet Another Serialization Framework. 34 * 35 * It is geared towards efficiently encoding collections of structured 36 * binary data (e.g. an array of noderev objects). The basic idea is to 37 * transform them into hierarchies of streams with each stream usually 38 * corresponding to a single attribute in the original data structures. 39 * The user is free model the mapping structure <-> streams mapping as she 40 * sees fit. 41 * 42 * With all data inside the same (sub-)stream carrying similar attribute 43 * values, the whole stream lends itself to data compression. Strings / 44 * plain byte sequences will be stored as is. Numbers use a 7b/8b encoding 45 * scheme to eliminate leading zeros. Because values are often dependent 46 * (increasing offsets, roughly similar revision number, etc.), streams 47 * can be configured as storing (hopefully shorter) deltas instead of the 48 * original value. 49 * 50 * Two stream types are provided: integer and byte streams. While the 51 * first store 64 bit integers only and can be configured to assume 52 * signed and / or deltifyable data, the second will store arbitrary 53 * byte sequences including their length. At the root level, you may 54 * create an arbitrary number of integer and byte streams. Any stream 55 * may have an arbitrary number of sub-streams of the same kind. You 56 * should create the full stream hierarchy before writing any data to it. 57 * 58 * As a convenience, when an integer stream has sub-streams, you may write 59 * to the parent stream instead of all sub-streams individually and the 60 * values will be passed down automatically in a round-robin fashion. 61 * Reading from the parent stream is similarly supported. 62 * 63 * When all data has been added to the stream, it can be written to an 64 * ordinary svn_stream_t. First, we write a description of the stream 65 * structure (types, sub-streams, sizes and configurations) followed by 66 * zlib compressed stream content. For each top-level stream, all sub- 67 * stream data will be concatenated and then compressed as a single block. 68 * To maximize the effect of this, make sure all data in that stream 69 * hierarchy has a similar value distribution. 70 * 71 * Reading data starts with an svn_stream_t and automatically recreates 72 * the stream hierarchies. You only need to extract data from it in the 73 * same order as you wrote it. 74 * 75 * Although not enforced programmatically, you may either only write to a 76 * stream hierarchy or only read from it but you cannot do both on the 77 * same data structure. 78 */ 79 80 81 82 /* We pack / unpack integers en block to minimize calling and setup overhead. 83 * This is the number of integers we put into a buffer before writing them 84 * them to / after reading them from the 7b/8b stream. Under 64 bits, this 85 * value creates a 128 byte data structure (14 + 2 integers, 8 bytes each). 86 */ 87 #define SVN__PACKED_DATA_BUFFER_SIZE 14 88 89 90 /* Data types. */ 91 92 /* Opaque type for the root object. 93 */ 94 typedef struct svn_packed__data_root_t svn_packed__data_root_t; 95 96 /* Opaque type for byte streams. 97 */ 98 typedef struct svn_packed__byte_stream_t svn_packed__byte_stream_t; 99 100 /* Semi-opaque type for integer streams. We expose the unpacked buffer 101 * to allow for replacing svn_packed__add_uint and friends by macros. 102 */ 103 typedef struct svn_packed__int_stream_t 104 { 105 /* pointer to the remainder of the data structure */ 106 void *private_data; 107 108 /* number of value entries in BUFFER */ 109 apr_size_t buffer_used; 110 111 /* unpacked integers (either yet to be packed or pre-fetched from the 112 * packed buffers). Only the first BUFFER_USED entries are valid. */ 113 apr_uint64_t buffer[SVN__PACKED_DATA_BUFFER_SIZE]; 114 } svn_packed__int_stream_t; 115 116 117 /* Writing data. */ 118 119 /* Return a new serialization root object, allocated in POOL. 120 */ 121 svn_packed__data_root_t * 122 svn_packed__data_create_root(apr_pool_t *pool); 123 124 /* Create and return a new top-level integer stream in ROOT. If signed, 125 * negative numbers will be put into that stream, SIGNED_INTS should be 126 * TRUE as a more efficient encoding will be used in that case. Set 127 * DIFF to TRUE if you expect the difference between consecutive numbers 128 * to be much smaller (~100 times) than the actual numbers. 129 */ 130 svn_packed__int_stream_t * 131 svn_packed__create_int_stream(svn_packed__data_root_t *root, 132 svn_boolean_t diff, 133 svn_boolean_t signed_ints); 134 135 /* Create and return a sub-stream to the existing integer stream PARENT. 136 * If signed, negative numbers will be put into that stream, SIGNED_INTS 137 * should be TRUE as a more efficient encoding will be used in that case. 138 * Set DIFF to TRUE if you expect the difference between consecutive numbers 139 * to be much smaller (~100 times) than the actual numbers. 140 */ 141 svn_packed__int_stream_t * 142 svn_packed__create_int_substream(svn_packed__int_stream_t *parent, 143 svn_boolean_t diff, 144 svn_boolean_t signed_ints); 145 146 /* Create and return a new top-level byte sequence stream in ROOT. 147 */ 148 svn_packed__byte_stream_t * 149 svn_packed__create_bytes_stream(svn_packed__data_root_t *root); 150 151 /* Write the unsigned integer VALUE to STEAM. 152 */ 153 void 154 svn_packed__add_uint(svn_packed__int_stream_t *stream, 155 apr_uint64_t value); 156 157 /* Write the signed integer VALUE to STEAM. 158 */ 159 void 160 svn_packed__add_int(svn_packed__int_stream_t *stream, 161 apr_int64_t value); 162 163 /* Write the sequence stating at DATA containing LEN bytes to STEAM. 164 */ 165 void 166 svn_packed__add_bytes(svn_packed__byte_stream_t *stream, 167 const char *data, 168 apr_size_t len); 169 170 /* Write all contents of ROOT (including all sub-streams) to STREAM. 171 * Use SCRATCH_POOL for temporary allocations. 172 */ 173 svn_error_t * 174 svn_packed__data_write(svn_stream_t *stream, 175 svn_packed__data_root_t *root, 176 apr_pool_t *scratch_pool); 177 178 179 /* Reading data. */ 180 181 /* Return the first integer stream in ROOT. Returns NULL in case there 182 * aren't any. 183 */ 184 svn_packed__int_stream_t * 185 svn_packed__first_int_stream(svn_packed__data_root_t *root); 186 187 /* Return the first byte sequence stream in ROOT. Returns NULL in case 188 * there aren't any. 189 */ 190 svn_packed__byte_stream_t * 191 svn_packed__first_byte_stream(svn_packed__data_root_t *root); 192 193 /* Return the next (sibling) integer stream to STREAM. Returns NULL in 194 * case there isn't any. 195 */ 196 svn_packed__int_stream_t * 197 svn_packed__next_int_stream(svn_packed__int_stream_t *stream); 198 199 /* Return the next (sibling) byte sequence stream to STREAM. Returns NULL 200 * in case there isn't any. 201 */ 202 svn_packed__byte_stream_t * 203 svn_packed__next_byte_stream(svn_packed__byte_stream_t *stream); 204 205 /* Return the first sub-stream of STREAM. Returns NULL in case there 206 * isn't any. 207 */ 208 svn_packed__int_stream_t * 209 svn_packed__first_int_substream(svn_packed__int_stream_t *stream); 210 211 /* Return the number of integers left to read from STREAM. 212 */ 213 apr_size_t 214 svn_packed__int_count(svn_packed__int_stream_t *stream); 215 216 /* Return the number of bytes left to read from STREAM. 217 */ 218 apr_size_t 219 svn_packed__byte_count(svn_packed__byte_stream_t *stream); 220 221 /* Return the number of entries left to read from STREAM. 222 */ 223 apr_size_t 224 svn_packed__byte_block_count(svn_packed__byte_stream_t *stream); 225 226 /* Return the next number from STREAM as unsigned integer. Returns 0 when 227 * reading beyond the end of the stream. 228 */ 229 apr_uint64_t 230 svn_packed__get_uint(svn_packed__int_stream_t *stream); 231 232 /* Return the next number from STREAM as signed integer. Returns 0 when 233 * reading beyond the end of the stream. 234 */ 235 apr_int64_t 236 svn_packed__get_int(svn_packed__int_stream_t *stream); 237 238 /* Return the next byte sequence from STREAM and set *LEN to the length 239 * of that sequence. Sets *LEN to 0 when reading beyond the end of the 240 * stream. 241 */ 242 const char * 243 svn_packed__get_bytes(svn_packed__byte_stream_t *stream, 244 apr_size_t *len); 245 246 /* Allocate a new packed data root in RESULT_POOL, read its structure and 247 * stream contents from STREAM and return it in *ROOT_P. Use SCRATCH_POOL 248 * for temporary allocations. 249 */ 250 svn_error_t * 251 svn_packed__data_read(svn_packed__data_root_t **root_p, 252 svn_stream_t *stream, 253 apr_pool_t *result_pool, 254 apr_pool_t *scratch_pool); 255 256 #ifdef __cplusplus 257 } 258 #endif /* __cplusplus */ 259 260 #endif /* SVN_PACKED_DATA_H */ 261