1 /* index.h : interface to FSX indexing functionality
2  *
3  * ====================================================================
4  *    Licensed to the Apache Software Foundation (ASF) under one
5  *    or more contributor license agreements.  See the NOTICE file
6  *    distributed with this work for additional information
7  *    regarding copyright ownership.  The ASF licenses this file
8  *    to you under the Apache License, Version 2.0 (the
9  *    "License"); you may not use this file except in compliance
10  *    with the License.  You may obtain a copy of the License at
11  *
12  *      http://www.apache.org/licenses/LICENSE-2.0
13  *
14  *    Unless required by applicable law or agreed to in writing,
15  *    software distributed under the License is distributed on an
16  *    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17  *    KIND, either express or implied.  See the License for the
18  *    specific language governing permissions and limitations
19  *    under the License.
20  * ====================================================================
21  */
22 
23 #ifndef SVN_LIBSVN_FS_X_INDEX_H
24 #define SVN_LIBSVN_FS_X_INDEX_H
25 
26 #include "fs.h"
27 #include "rev_file.h"
28 
29 /* Per-defined item index values.  They are used to identify empty or
30  * mandatory items.
31  */
32 #define SVN_FS_X__ITEM_INDEX_UNUSED     0  /* invalid / reserved value */
33 #define SVN_FS_X__ITEM_INDEX_CHANGES    1  /* list of changed paths */
34 #define SVN_FS_X__ITEM_INDEX_ROOT_NODE  2  /* the root noderev */
35 #define SVN_FS_X__ITEM_INDEX_FIRST_USER 3  /* first noderev to be freely
36                                                assigned */
37 
38 /* Data / item types as stored in the phys-to-log index.
39  */
40 #define SVN_FS_X__ITEM_TYPE_UNUSED     0  /* file section not used */
41 #define SVN_FS_X__ITEM_TYPE_FILE_REP   1  /* item is a file representation */
42 #define SVN_FS_X__ITEM_TYPE_DIR_REP    2  /* item is a directory rep. */
43 #define SVN_FS_X__ITEM_TYPE_FILE_PROPS 3  /* item is a file property rep. */
44 #define SVN_FS_X__ITEM_TYPE_DIR_PROPS  4  /* item is a directory prop rep */
45 #define SVN_FS_X__ITEM_TYPE_NODEREV    5  /* item is a noderev */
46 #define SVN_FS_X__ITEM_TYPE_CHANGES    6  /* item is a changed paths list */
47 
48 #define SVN_FS_X__ITEM_TYPE_ANY_REP    7  /* item is any representation.
49                                               Only used in pre-format7. */
50 
51 #define SVN_FS_X__ITEM_TYPE_CHANGES_CONT  8  /* item is a changes container */
52 #define SVN_FS_X__ITEM_TYPE_NODEREVS_CONT 9  /* item is a noderevs container */
53 #define SVN_FS_X__ITEM_TYPE_REPS_CONT    10  /* item is a representations
54                                                  container */
55 
56 /* We put this string in front of the L2P index header. */
57 #define SVN_FS_X__L2P_STREAM_PREFIX "L2P-INDEX\n"
58 
59 /* We put this string in front of the P2L index header. */
60 #define SVN_FS_X__P2L_STREAM_PREFIX "P2L-INDEX\n"
61 
62 
63 /* Create and open a packed number stream reading from offsets START to
64  * END in FILE and return it in *STREAM.  Access the file in chunks of
65  * BLOCK_SIZE bytes.  Expect the stream to be prefixed by STREAM_PREFIX.
66  * Allocate *STREAM in RESULT_POOL and use SCRATCH_POOL for temporaries.
67  */
68 svn_error_t *
69 svn_fs_x__packed_stream_open(svn_fs_x__packed_number_stream_t **stream,
70                              apr_file_t *file,
71                              apr_off_t start,
72                              apr_off_t end,
73                              const char *stream_prefix,
74                              apr_size_t block_size,
75                              apr_pool_t *result_pool,
76                              apr_pool_t *scratch_pool);
77 
78 /* (user visible) entry in the phys-to-log index.  It describes a section
79  * of some packed / non-packed rev file as containing a specific item.
80  * There must be no overlapping / conflicting entries.
81  */
82 typedef struct svn_fs_x__p2l_entry_t
83 {
84   /* offset of the first byte that belongs to the item */
85   apr_off_t offset;
86 
87   /* length of the item in bytes */
88   apr_off_t size;
89 
90   /* type of the item (see SVN_FS_X__ITEM_TYPE_*) defines */
91   apr_uint32_t type;
92 
93   /* modified FNV-1a checksum.  0 if unknown checksum */
94   apr_uint32_t fnv1_checksum;
95 
96   /* Number of items in this block / container.  Their list can be found
97    * in *ITEMS.  0 for unused sections.  1 for non-container items,
98    * > 1 for containers. */
99   apr_uint32_t item_count;
100 
101   /* List of items in that block / container */
102   svn_fs_x__id_t *items;
103 } svn_fs_x__p2l_entry_t;
104 
105 /* Return a (deep) copy of ENTRY, allocated in RESULT_POOL.
106  */
107 svn_fs_x__p2l_entry_t *
108 svn_fs_x__p2l_entry_dup(const svn_fs_x__p2l_entry_t *entry,
109                         apr_pool_t *result_pool);
110 
111 /* Open / create a log-to-phys index file with the full file path name
112  * FILE_NAME.  Return the open file in *PROTO_INDEX allocated in
113  * RESULT_POOL.
114  */
115 svn_error_t *
116 svn_fs_x__l2p_proto_index_open(apr_file_t **proto_index,
117                                const char *file_name,
118                                apr_pool_t *result_pool);
119 
120 /* Call this function before adding entries for the next revision to the
121  * log-to-phys index file in PROTO_INDEX.  Use SCRATCH_POOL for temporary
122  * allocations.
123  */
124 svn_error_t *
125 svn_fs_x__l2p_proto_index_add_revision(apr_file_t *proto_index,
126                                        apr_pool_t *scratch_pool);
127 
128 /* Add a new mapping, ITEM_INDEX to the (OFFSET, SUB_ITEM) pair, to log-to-
129  * phys index file in PROTO_INDEX.  Please note that mappings may be added
130  * in any order but duplicate entries for the same ITEM_INDEX, SUB_ITEM
131  * are not supported.  Not all possible index values need to be used.
132  * (OFFSET, SUB_ITEM) may be (-1, 0) to mark 'invalid' item indexes but
133  * that is already implied for all item indexes not explicitly given a
134  * mapping.
135  *
136  * Use SCRATCH_POOL for temporary allocations.
137  */
138 svn_error_t *
139 svn_fs_x__l2p_proto_index_add_entry(apr_file_t *proto_index,
140                                     apr_off_t offset,
141                                     apr_uint32_t sub_item,
142                                     apr_uint64_t item_index,
143                                     apr_pool_t *scratch_pool);
144 
145 /* Use the proto index file stored at PROTO_FILE_NAME, construct the final
146  * log-to-phys index and append it to INDEX_FILE.  The first revision will
147  * be REVISION, entries to the next revision will be assigned to REVISION+1
148  * and so forth.
149  *
150  * Return the MD5 checksum of the on-disk index data in *CHECKSUM, allocated
151  * in RESULT_POOL.  Use SCRATCH_POOL for temporary allocations.
152  */
153 svn_error_t *
154 svn_fs_x__l2p_index_append(svn_checksum_t **checksum,
155                            svn_fs_t *fs,
156                            apr_file_t *index_file,
157                            const char *proto_file_name,
158                            svn_revnum_t revision,
159                            apr_pool_t *result_pool,
160                            apr_pool_t *scratch_pool);
161 
162 /* Open / create a phys-to-log index file with the full file path name
163  * FILE_NAME.  Return the open file in *PROTO_INDEX allocated in
164  * RESULT_POOL.
165  */
166 svn_error_t *
167 svn_fs_x__p2l_proto_index_open(apr_file_t **proto_index,
168                                const char *file_name,
169                                apr_pool_t *result_pool);
170 
171 /* Add a new mapping ENTRY to the phys-to-log index file in PROTO_INDEX.
172  * The entries must be added in ascending offset order and must not leave
173  * intermittent ranges uncovered.  The revision value in ENTRY may be
174  * SVN_INVALID_REVISION.  Use SCRATCH_POOL for temporary allocations.
175  */
176 svn_error_t *
177 svn_fs_x__p2l_proto_index_add_entry(apr_file_t *proto_index,
178                                     const svn_fs_x__p2l_entry_t *entry,
179                                     apr_pool_t *scratch_pool);
180 
181 /* Set *NEXT_OFFSET to the first offset behind the last entry in the
182  * phys-to-log proto index file PROTO_INDEX.  This will be 0 for empty
183  * index files.  Use SCRATCH_POOL for temporary allocations.
184  */
185 svn_error_t *
186 svn_fs_x__p2l_proto_index_next_offset(apr_off_t *next_offset,
187                                       apr_file_t *proto_index,
188                                       apr_pool_t *scratch_pool);
189 
190 /* Use the proto index file stored at PROTO_FILE_NAME, construct the final
191  * phys-to-log index and append it to INDEX_FILE.  Entries without a valid
192  * revision will be assigned to the REVISION given here.
193  *
194  * Return the MD5 checksum of the on-disk index data in *CHECKSUM, allocated
195  * in RESULT_POOL.  Use SCRATCH_POOL for temporary allocations.
196  */
197 svn_error_t *
198 svn_fs_x__p2l_index_append(svn_checksum_t **checksum,
199                            svn_fs_t *fs,
200                            apr_file_t *index_file,
201                            const char *proto_file_name,
202                            svn_revnum_t revision,
203                            apr_pool_t *result_pool,
204                            apr_pool_t *scratch_pool);
205 
206 /* Use the phys-to-log mapping files in FS to build a list of entries
207  * that (at least partly) overlap with the range given by BLOCK_START
208  * offset and BLOCK_SIZE in the rep / pack file containing REVISION.
209  * Return the array in *ENTRIES with svn_fs_x__p2l_entry_t as elements,
210  * allocated in RESULT_POOL.  REV_FILE determines whether to access single
211  * rev or pack file data.  If that is not available anymore (neither in
212  * cache nor on disk), return an error.  Use SCRATCH_POOL for temporary
213  * allocations.
214  *
215  * Note that (only) the first and the last mapping may cross a cluster
216  * boundary.
217  */
218 svn_error_t *
219 svn_fs_x__p2l_index_lookup(apr_array_header_t **entries,
220                            svn_fs_t *fs,
221                            svn_fs_x__revision_file_t *rev_file,
222                            svn_revnum_t revision,
223                            apr_off_t block_start,
224                            apr_off_t block_size,
225                            apr_pool_t *result_pool,
226                            apr_pool_t *scratch_pool);
227 
228 /* Use the phys-to-log mapping files in FS to return the entry for the
229  * container or single item starting at global OFFSET in the rep file
230  * containing REVISION in*ENTRY, allocated in RESULT_POOL.  Sets *ENTRY
231  * to NULL if no item starts at exactly that offset.  REV_FILE determines
232  * whether to access single rev or pack file data.  If that is not available
233  * anymore (neither in cache nor on disk), return an error.
234  * Use SCRATCH_POOL for temporary allocations.
235  */
236 svn_error_t *
237 svn_fs_x__p2l_entry_lookup(svn_fs_x__p2l_entry_t **entry,
238                            svn_fs_t *fs,
239                            svn_fs_x__revision_file_t *rev_file,
240                            svn_revnum_t revision,
241                            apr_off_t offset,
242                            apr_pool_t *result_pool,
243                            apr_pool_t *scratch_pool);
244 
245 /* Use the phys-to-log mapping files in FS to return the svn_fs_x__id_t
246  * for the SUB_ITEM of the container starting at global OFFSET in the rep /
247  * pack file containing REVISION in *ITEM, allocated in RESULT_POOL.  Sets
248  * *ITEM to NULL if no element starts at exactly that offset or if it
249  * contains no more than SUB_ITEM sub-items.
250  *
251  * Use SCRATCH_POOL for temporary allocations.
252  */
253 svn_error_t *
254 svn_fs_x__p2l_item_lookup(svn_fs_x__id_t **item,
255                           svn_fs_t *fs,
256                           svn_fs_x__revision_file_t *rev_file,
257                           svn_revnum_t revision,
258                           apr_off_t offset,
259                           apr_uint32_t sub_item,
260                           apr_pool_t *result_pool,
261                           apr_pool_t *scratch_pool);
262 
263 /* For ITEM_ID in FS, return the position in the respective rev or pack file
264  * in *ABSOLUTE_POSITION and the *SUB_ITEM number within the object at that
265  * location. *SUB_ITEM will be 0 for non-container items.
266  *
267  * REV_FILE determines whether to access single rev or pack file data.
268  * If that is not available anymore (neither in cache nor on disk), re-open
269  * the rev / pack file and retry to open the index file.  For transaction
270  * content, REV_FILE may be NULL.
271  *
272  * Use SCRATCH_POOL for temporary allocations.
273  */
274 svn_error_t *
275 svn_fs_x__item_offset(apr_off_t *absolute_position,
276                       apr_uint32_t *sub_item,
277                       svn_fs_t *fs,
278                       svn_fs_x__revision_file_t *rev_file,
279                       const svn_fs_x__id_t *item_id,
280                       apr_pool_t *scratch_pool);
281 
282 /* Use the log-to-phys indexes in FS to determine the maximum item indexes
283  * assigned to revision START_REV to START_REV + COUNT - 1.  That is a
284  * close upper limit to the actual number of items in the respective revs.
285  * Return the results in *MAX_IDS,  allocated in RESULT_POOL.
286  * Use SCRATCH_POOL for temporary allocations.
287  */
288 svn_error_t *
289 svn_fs_x__l2p_get_max_ids(apr_array_header_t **max_ids,
290                           svn_fs_t *fs,
291                           svn_revnum_t start_rev,
292                           apr_size_t count,
293                           apr_pool_t *result_pool,
294                           apr_pool_t *scratch_pool);
295 
296 /* In *OFFSET, return the first OFFSET in the pack / rev file containing
297  * REVISION in FS not covered by the log-to-phys index.
298  * Use SCRATCH_POOL for temporary allocations.
299  */
300 svn_error_t *
301 svn_fs_x__p2l_get_max_offset(apr_off_t *offset,
302                              svn_fs_t *fs,
303                              svn_fs_x__revision_file_t *rev_file,
304                              svn_revnum_t revision,
305                              apr_pool_t *scratch_pool);
306 
307 /* Index (re-)creation utilities.
308  */
309 
310 /* For FS, create a new L2P auto-deleting proto index file in POOL and return
311  * its name in *PROTONAME.  All entries to write are given in ENTRIES and
312  * entries are of type svn_fs_x__p2l_entry_t* (sic!).  The ENTRIES array
313  * will be reordered.  Give the proto index file the lifetime of RESULT_POOL
314  * and use SCRATCH_POOL for temporary allocations.
315  */
316 svn_error_t *
317 svn_fs_x__l2p_index_from_p2l_entries(const char **protoname,
318                                      svn_fs_t *fs,
319                                      apr_array_header_t *entries,
320                                      apr_pool_t *result_pool,
321                                      apr_pool_t *scratch_pool);
322 
323 /* For FS, create a new P2L auto-deleting proto index file in POOL and return
324  * its name in *PROTONAME.  All entries to write are given in ENTRIES and
325  * of type svn_fs_x__p2l_entry_t*.  The FVN1 checksums are not taken from
326  * ENTRIES but are begin calculated from the current contents of REV_FILE
327  * as we go.  Give the proto index file the lifetime of RESULT_POOL and use
328  * SCRATCH_POOL for temporary allocations.
329  */
330 svn_error_t *
331 svn_fs_x__p2l_index_from_p2l_entries(const char **protoname,
332                                      svn_fs_t *fs,
333                                      svn_fs_x__revision_file_t *rev_file,
334                                      apr_array_header_t *entries,
335                                      apr_pool_t *result_pool,
336                                      apr_pool_t *scratch_pool);
337 
338 /* Serialization and caching interface
339  */
340 
341 /* We use this key type to address individual pages from both index types.
342  */
343 typedef struct svn_fs_x__page_cache_key_t
344 {
345   /* in l2p: this is the revision of the items being mapped
346      in p2l: this is the start revision identifying the pack / rev file */
347   apr_uint32_t revision;
348 
349   /* if TRUE, this is the index to a pack file
350    */
351   svn_boolean_t is_packed;
352 
353   /* in l2p: page number within the revision
354    * in p2l: page number with the rev / pack file
355    */
356   apr_uint64_t page;
357 } svn_fs_x__page_cache_key_t;
358 
359 /*
360  * Implements svn_cache__serialize_func_t for l2p_header_t objects.
361  */
362 svn_error_t *
363 svn_fs_x__serialize_l2p_header(void **data,
364                                apr_size_t *data_len,
365                                void *in,
366                                apr_pool_t *pool);
367 
368 /*
369  * Implements svn_cache__deserialize_func_t for l2p_header_t objects.
370  */
371 svn_error_t *
372 svn_fs_x__deserialize_l2p_header(void **out,
373                                  void *data,
374                                  apr_size_t data_len,
375                                  apr_pool_t *result_pool);
376 
377 /*
378  * Implements svn_cache__serialize_func_t for l2p_page_t objects.
379  */
380 svn_error_t *
381 svn_fs_x__serialize_l2p_page(void **data,
382                              apr_size_t *data_len,
383                              void *in,
384                              apr_pool_t *pool);
385 
386 /*
387  * Implements svn_cache__deserialize_func_t for l2p_page_t objects.
388  */
389 svn_error_t *
390 svn_fs_x__deserialize_l2p_page(void **out,
391                                void *data,
392                                apr_size_t data_len,
393                                apr_pool_t *result_pool);
394 
395 /*
396  * Implements svn_cache__serialize_func_t for p2l_header_t objects.
397  */
398 svn_error_t *
399 svn_fs_x__serialize_p2l_header(void **data,
400                                apr_size_t *data_len,
401                                void *in,
402                                apr_pool_t *pool);
403 
404 /*
405  * Implements svn_cache__deserialize_func_t for p2l_header_t objects.
406  */
407 svn_error_t *
408 svn_fs_x__deserialize_p2l_header(void **out,
409                                  void *data,
410                                  apr_size_t data_len,
411                                  apr_pool_t *result_pool);
412 
413 /*
414  * Implements svn_cache__serialize_func_t for apr_array_header_t objects
415  * with elements of type svn_fs_x__p2l_entry_t.
416  */
417 svn_error_t *
418 svn_fs_x__serialize_p2l_page(void **data,
419                              apr_size_t *data_len,
420                              void *in,
421                              apr_pool_t *pool);
422 
423 /*
424  * Implements svn_cache__deserialize_func_t for apr_array_header_t objects
425  * with elements of type svn_fs_x__p2l_entry_t.
426  */
427 svn_error_t *
428 svn_fs_x__deserialize_p2l_page(void **out,
429                                void *data,
430                                apr_size_t data_len,
431                                apr_pool_t *result_pool);
432 
433 #endif
434