1 /* index.c indexing support for FSX support
2  *
3  * ====================================================================
4  *    Licensed to the Apache Software Foundation (ASF) under one
5  *    or more contributor license agreements.  See the NOTICE file
6  *    distributed with this work for additional information
7  *    regarding copyright ownership.  The ASF licenses this file
8  *    to you under the Apache License, Version 2.0 (the
9  *    "License"); you may not use this file except in compliance
10  *    with the License.  You may obtain a copy of the License at
11  *
12  *      http://www.apache.org/licenses/LICENSE-2.0
13  *
14  *    Unless required by applicable law or agreed to in writing,
15  *    software distributed under the License is distributed on an
16  *    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17  *    KIND, either express or implied.  See the License for the
18  *    specific language governing permissions and limitations
19  *    under the License.
20  * ====================================================================
21  */
22 
23 #include <assert.h>
24 
25 #include "svn_io.h"
26 #include "svn_pools.h"
27 #include "svn_sorts.h"
28 
29 #include "index.h"
30 #include "util.h"
31 #include "pack.h"
32 
33 #include "private/svn_dep_compat.h"
34 #include "private/svn_sorts_private.h"
35 #include "private/svn_subr_private.h"
36 #include "private/svn_temp_serializer.h"
37 
38 #include "svn_private_config.h"
39 #include "temp_serializer.h"
40 #include "fs_x.h"
41 
42 #include "../libsvn_fs/fs-loader.h"
43 
44 /* maximum length of a uint64 in an 7/8b encoding */
45 #define ENCODED_INT_LENGTH 10
46 
47 /* APR is missing an APR_OFF_T_MAX.  So, define one.  We will use it to
48  * limit file offsets stored in the indexes.
49  *
50  * We assume that everything shorter than 64 bits, it is at least 32 bits.
51  * We also assume that the type is always signed meaning we only have an
52  * effective positive range of 63 or 31 bits, respectively.
53  */
54 static
55 const apr_uint64_t off_t_max = (sizeof(apr_off_t) == sizeof(apr_int64_t))
56                              ? APR_INT64_MAX
57                              : APR_INT32_MAX;
58 
59 /* We store P2L proto-index entries as 6 values, 64 bits each on disk.
60  * See also svn_fs_x__p2l_proto_index_add_entry().
61  */
62 #define P2L_PROTO_INDEX_ENTRY_SIZE (6 * sizeof(apr_uint64_t))
63 
64 /* Size of the buffer that will fit the index header prefixes. */
65 #define STREAM_PREFIX_LEN MAX(sizeof(SVN_FS_X__L2P_STREAM_PREFIX), \
66                               sizeof(SVN_FS_X__P2L_STREAM_PREFIX))
67 
68 /* Page tables in the log-to-phys index file exclusively contain entries
69  * of this type to describe position and size of a given page.
70  */
71 typedef struct l2p_page_table_entry_t
72 {
73   /* global offset on the page within the index file */
74   apr_uint64_t offset;
75 
76   /* number of mapping entries in that page */
77   apr_uint32_t entry_count;
78 
79   /* size of the page on disk (in the index file) */
80   apr_uint32_t size;
81 } l2p_page_table_entry_t;
82 
83 /* Master run-time data structure of an log-to-phys index.  It contains
84  * the page tables of every revision covered by that index - but not the
85  * pages themselves.
86  */
87 typedef struct l2p_header_t
88 {
89   /* first revision covered by this index */
90   svn_revnum_t first_revision;
91 
92   /* number of revisions covered */
93   apr_size_t revision_count;
94 
95   /* (max) number of entries per page */
96   apr_uint32_t page_size;
97 
98   /* indexes into PAGE_TABLE that mark the first page of the respective
99    * revision.  PAGE_TABLE_INDEX[REVISION_COUNT] points to the end of
100    * PAGE_TABLE.
101    */
102   apr_size_t * page_table_index;
103 
104   /* Page table covering all pages in the index */
105   l2p_page_table_entry_t * page_table;
106 } l2p_header_t;
107 
108 /* Run-time data structure containing a single log-to-phys index page.
109  */
110 typedef struct l2p_page_t
111 {
112   /* number of entries in the OFFSETS array */
113   apr_uint32_t entry_count;
114 
115   /* global file offsets (item index is the array index) within the
116    * packed or non-packed rev file.  Offset will be -1 for unused /
117    * invalid item index values. */
118   apr_off_t *offsets;
119 
120   /* In case that the item is stored inside a container, this is the
121    * identifying index of the item within that container.  0 for the
122    * container itself or for items that aren't containers. */
123   apr_uint32_t *sub_items;
124 } l2p_page_t;
125 
126 /* All of the log-to-phys proto index file consist of entries of this type.
127  */
128 typedef struct l2p_proto_entry_t
129 {
130   /* phys offset + 1 of the data container. 0 for "new revision" entries. */
131   apr_uint64_t offset;
132 
133   /* corresponding item index. 0 for "new revision" entries. */
134   apr_uint64_t item_index;
135 
136   /* index within the container starting @ offset.  0 for "new revision"
137    * entries and for items with no outer container. */
138   apr_uint32_t sub_item;
139 } l2p_proto_entry_t;
140 
141 /* Master run-time data structure of an phys-to-log index.  It contains
142  * an array with one offset value for each rev file cluster.
143  */
144 typedef struct p2l_header_t
145 {
146   /* first revision covered by the index (and rev file) */
147   svn_revnum_t first_revision;
148 
149   /* number of bytes in the rev files covered by each p2l page */
150   apr_uint64_t page_size;
151 
152   /* number of pages / clusters in that rev file */
153   apr_size_t page_count;
154 
155   /* number of bytes in the rev file */
156   apr_uint64_t file_size;
157 
158   /* offsets of the pages / cluster descriptions within the index file */
159   apr_off_t *offsets;
160 } p2l_header_t;
161 
162 /*
163  * packed stream array
164  */
165 
166 /* How many numbers we will pre-fetch and buffer in a packed number stream.
167  */
168 enum { MAX_NUMBER_PREFETCH = 64 };
169 
170 /* Prefetched number entry in a packed number stream.
171  */
172 typedef struct value_position_pair_t
173 {
174   /* prefetched number */
175   apr_uint64_t value;
176 
177   /* number of bytes read, *including* this number, since the buffer start */
178   apr_size_t total_len;
179 } value_position_pair_t;
180 
181 /* State of a prefetching packed number stream.  It will read compressed
182  * index data efficiently and present it as a series of non-packed uint64.
183  */
184 struct svn_fs_x__packed_number_stream_t
185 {
186   /* underlying data file containing the packed values */
187   apr_file_t *file;
188 
189   /* Offset within FILE at which the stream data starts
190    * (i.e. which offset will reported as offset 0 by packed_stream_offset). */
191   apr_off_t stream_start;
192 
193   /* First offset within FILE after the stream data.
194    * Attempts to read beyond this will cause an "Unexpected End Of Stream"
195    * error. */
196   apr_off_t stream_end;
197 
198   /* number of used entries in BUFFER (starting at index 0) */
199   apr_size_t used;
200 
201   /* index of the next number to read from the BUFFER (0 .. USED).
202    * If CURRENT == USED, we need to read more data upon get() */
203   apr_size_t current;
204 
205   /* offset in FILE from which the first entry in BUFFER has been read */
206   apr_off_t start_offset;
207 
208   /* offset in FILE from which the next number has to be read */
209   apr_off_t next_offset;
210 
211   /* read the file in chunks of this size */
212   apr_size_t block_size;
213 
214   /* pool to be used for file ops etc. */
215   apr_pool_t *pool;
216 
217   /* buffer for prefetched values */
218   value_position_pair_t buffer[MAX_NUMBER_PREFETCH];
219 };
220 
221 /* Return an svn_error_t * object for error ERR on STREAM with the given
222  * MESSAGE string.  The latter must have a placeholder for the index file
223  * name ("%s") and the current read offset (e.g. "0x%lx").
224  */
225 static svn_error_t *
stream_error_create(svn_fs_x__packed_number_stream_t * stream,apr_status_t err,const char * message)226 stream_error_create(svn_fs_x__packed_number_stream_t *stream,
227                     apr_status_t err,
228                     const char *message)
229 {
230   const char *file_name;
231   apr_off_t offset;
232   SVN_ERR(svn_io_file_name_get(&file_name, stream->file,
233                                stream->pool));
234   SVN_ERR(svn_io_file_get_offset(&offset, stream->file, stream->pool));
235 
236   return svn_error_createf(err, NULL, message, file_name,
237                            apr_psprintf(stream->pool,
238                                         "%" APR_UINT64_T_HEX_FMT,
239                                         (apr_uint64_t)offset));
240 }
241 
242 /* Read up to MAX_NUMBER_PREFETCH numbers from the STREAM->NEXT_OFFSET in
243  * STREAM->FILE and buffer them.
244  *
245  * We don't want GCC and others to inline this (infrequently called)
246  * function into packed_stream_get() because it prevents the latter from
247  * being inlined itself.
248  */
249 SVN__PREVENT_INLINE
250 static svn_error_t *
packed_stream_read(svn_fs_x__packed_number_stream_t * stream)251 packed_stream_read(svn_fs_x__packed_number_stream_t *stream)
252 {
253   unsigned char buffer[MAX_NUMBER_PREFETCH];
254   apr_size_t bytes_read = 0;
255   apr_size_t i;
256   value_position_pair_t *target;
257   apr_off_t block_start = 0;
258   apr_off_t block_left = 0;
259   apr_status_t err;
260 
261   /* all buffered data will have been read starting here */
262   stream->start_offset = stream->next_offset;
263 
264   /* packed numbers are usually not aligned to MAX_NUMBER_PREFETCH blocks,
265    * i.e. the last number has been incomplete (and not buffered in stream)
266    * and need to be re-read.  Therefore, always correct the file pointer.
267    */
268   SVN_ERR(svn_io_file_aligned_seek(stream->file, stream->block_size,
269                                    &block_start, stream->next_offset,
270                                    stream->pool));
271 
272   /* prefetch at least one number but, if feasible, don't cross block
273    * boundaries.  This shall prevent jumping back and forth between two
274    * blocks because the extra data was not actually request _now_.
275    */
276   bytes_read = sizeof(buffer);
277   block_left = stream->block_size - (stream->next_offset - block_start);
278   if (block_left >= 10 && block_left < bytes_read)
279     bytes_read = (apr_size_t)block_left;
280 
281   /* Don't read beyond the end of the file section that belongs to this
282    * index / stream. */
283   bytes_read = (apr_size_t)MIN(bytes_read,
284                                stream->stream_end - stream->next_offset);
285 
286   err = apr_file_read(stream->file, buffer, &bytes_read);
287   if (err && !APR_STATUS_IS_EOF(err))
288     return stream_error_create(stream, err,
289       _("Can't read index file '%s' at offset 0x%"));
290 
291   /* if the last number is incomplete, trim it from the buffer */
292   while (bytes_read > 0 && buffer[bytes_read-1] >= 0x80)
293     --bytes_read;
294 
295   /* we call read() only if get() requires more data.  So, there must be
296    * at least *one* further number. */
297   if SVN__PREDICT_FALSE(bytes_read == 0)
298     return stream_error_create(stream, err,
299       _("Unexpected end of index file %s at offset 0x%"));
300 
301   /* parse file buffer and expand into stream buffer */
302   target = stream->buffer;
303   for (i = 0; i < bytes_read;)
304     {
305       if (buffer[i] < 0x80)
306         {
307           /* numbers < 128 are relatively frequent and particularly easy
308            * to decode.  Give them special treatment. */
309           target->value = buffer[i];
310           ++i;
311           target->total_len = i;
312           ++target;
313         }
314       else
315         {
316           apr_uint64_t value = 0;
317           apr_uint64_t shift = 0;
318           while (buffer[i] >= 0x80)
319             {
320               value += ((apr_uint64_t)buffer[i] & 0x7f) << shift;
321               shift += 7;
322               ++i;
323             }
324 
325           target->value = value + ((apr_uint64_t)buffer[i] << shift);
326           ++i;
327           target->total_len = i;
328           ++target;
329 
330           /* let's catch corrupted data early.  It would surely cause
331            * havoc further down the line. */
332           if SVN__PREDICT_FALSE(shift > 8 * sizeof(value))
333             return svn_error_createf(SVN_ERR_FS_INDEX_CORRUPTION, NULL,
334                                      _("Corrupt index: number too large"));
335        }
336     }
337 
338   /* update stream state */
339   stream->used = target - stream->buffer;
340   stream->next_offset = stream->start_offset + i;
341   stream->current = 0;
342 
343   return SVN_NO_ERROR;
344 }
345 
346 svn_error_t *
svn_fs_x__packed_stream_open(svn_fs_x__packed_number_stream_t ** stream,apr_file_t * file,apr_off_t start,apr_off_t end,const char * stream_prefix,apr_size_t block_size,apr_pool_t * result_pool,apr_pool_t * scratch_pool)347 svn_fs_x__packed_stream_open(svn_fs_x__packed_number_stream_t **stream,
348                              apr_file_t *file,
349                              apr_off_t start,
350                              apr_off_t end,
351                              const char *stream_prefix,
352                              apr_size_t block_size,
353                              apr_pool_t *result_pool,
354                              apr_pool_t *scratch_pool)
355 {
356   char buffer[STREAM_PREFIX_LEN + 1] = { 0 };
357   apr_size_t len = strlen(stream_prefix);
358   svn_fs_x__packed_number_stream_t *result;
359 
360   /* If this is violated, we forgot to adjust STREAM_PREFIX_LEN after
361    * changing the index header prefixes. */
362   SVN_ERR_ASSERT(len < sizeof(buffer));
363 
364   /* Read the header prefix and compare it with the expected prefix */
365   SVN_ERR(svn_io_file_aligned_seek(file, block_size, NULL, start,
366                                    scratch_pool));
367   SVN_ERR(svn_io_file_read_full2(file, buffer, len, NULL, NULL,
368                                  scratch_pool));
369 
370   if (strncmp(buffer, stream_prefix, len))
371     return svn_error_createf(SVN_ERR_FS_INDEX_CORRUPTION, NULL,
372                              _("Index stream header prefix mismatch.\n"
373                                "  expected: %s"
374                                "  found: %s"), stream_prefix, buffer);
375 
376   /* Construct the actual stream object. */
377   result = apr_palloc(result_pool, sizeof(*result));
378 
379   result->pool = result_pool;
380   result->file = file;
381   result->stream_start = start + len;
382   result->stream_end = end;
383 
384   result->used = 0;
385   result->current = 0;
386   result->start_offset = result->stream_start;
387   result->next_offset = result->stream_start;
388   result->block_size = block_size;
389 
390   *stream = result;
391 
392   return SVN_NO_ERROR;
393 }
394 
395 /*
396  * The forced inline is required for performance reasons:  This is a very
397  * hot code path (called for every item we read) but e.g. GCC would rather
398  * chose to inline packed_stream_read() here, preventing packed_stream_get
399  * from being inlined itself.
400  */
401 SVN__FORCE_INLINE
402 static svn_error_t*
packed_stream_get(apr_uint64_t * value,svn_fs_x__packed_number_stream_t * stream)403 packed_stream_get(apr_uint64_t *value,
404                   svn_fs_x__packed_number_stream_t *stream)
405 {
406   if (stream->current == stream->used)
407     SVN_ERR(packed_stream_read(stream));
408 
409   *value = stream->buffer[stream->current].value;
410   ++stream->current;
411 
412   return SVN_NO_ERROR;
413 }
414 
415 /* Navigate STREAM to packed stream offset OFFSET.  There will be no checks
416  * whether the given OFFSET is valid.
417  */
418 static void
packed_stream_seek(svn_fs_x__packed_number_stream_t * stream,apr_off_t offset)419 packed_stream_seek(svn_fs_x__packed_number_stream_t *stream,
420                    apr_off_t offset)
421 {
422   apr_off_t file_offset = offset + stream->stream_start;
423 
424   if (   stream->used == 0
425       || offset < stream->start_offset
426       || offset >= stream->next_offset)
427     {
428       /* outside buffered data.  Next get() will read() from OFFSET. */
429       stream->start_offset = file_offset;
430       stream->next_offset = file_offset;
431       stream->current = 0;
432       stream->used = 0;
433     }
434   else
435     {
436       /* Find the suitable location in the stream buffer.
437        * Since our buffer is small, it is efficient enough to simply scan
438        * it for the desired position. */
439       apr_size_t i;
440       for (i = 0; i < stream->used; ++i)
441         if (stream->buffer[i].total_len > file_offset - stream->start_offset)
442           break;
443 
444       stream->current = i;
445     }
446 }
447 
448 /* Return the packed stream offset of at which the next number in the stream
449  * can be found.
450  */
451 static apr_off_t
packed_stream_offset(svn_fs_x__packed_number_stream_t * stream)452 packed_stream_offset(svn_fs_x__packed_number_stream_t *stream)
453 {
454   apr_off_t file_offset
455        = stream->current == 0
456        ? stream->start_offset
457        : stream->buffer[stream->current-1].total_len + stream->start_offset;
458 
459   return file_offset - stream->stream_start;
460 }
461 
462 /* Write VALUE to the PROTO_INDEX file, using SCRATCH_POOL for temporary
463  * allocations.
464  *
465  * The point of this function is to ensure an architecture-independent
466  * proto-index file format.  All data is written as unsigned 64 bits ints
467  * in little endian byte order.  64 bits is the largest portable integer
468  * we have and unsigned values have well-defined conversions in C.
469  */
470 static svn_error_t *
write_uint64_to_proto_index(apr_file_t * proto_index,apr_uint64_t value,apr_pool_t * scratch_pool)471 write_uint64_to_proto_index(apr_file_t *proto_index,
472                             apr_uint64_t value,
473                             apr_pool_t *scratch_pool)
474 {
475   apr_byte_t buffer[sizeof(value)];
476   int i;
477   apr_size_t written;
478 
479   /* Split VALUE into 8 bytes using LE ordering. */
480   for (i = 0; i < sizeof(buffer); ++i)
481     {
482       /* Unsigned conversions are well-defined ... */
483       buffer[i] = (apr_byte_t)value;
484       value >>= CHAR_BIT;
485     }
486 
487   /* Write it all to disk. */
488   SVN_ERR(svn_io_file_write_full(proto_index, buffer, sizeof(buffer),
489                                  &written, scratch_pool));
490   SVN_ERR_ASSERT(written == sizeof(buffer));
491 
492   return SVN_NO_ERROR;
493 }
494 
495 /* Read one unsigned 64 bit value from PROTO_INDEX file and return it in
496  * *VALUE_P.  If EOF is NULL, error out when trying to read beyond EOF.
497  * Use SCRATCH_POOL for temporary allocations.
498  *
499  * This function is the inverse to write_uint64_to_proto_index (see there),
500  * reading the external LE byte order and convert it into host byte order.
501  */
502 static svn_error_t *
read_uint64_from_proto_index(apr_file_t * proto_index,apr_uint64_t * value_p,svn_boolean_t * eof,apr_pool_t * scratch_pool)503 read_uint64_from_proto_index(apr_file_t *proto_index,
504                              apr_uint64_t *value_p,
505                              svn_boolean_t *eof,
506                              apr_pool_t *scratch_pool)
507 {
508   apr_byte_t buffer[sizeof(*value_p)];
509   apr_size_t bytes_read;
510 
511   /* Read the full 8 bytes or our 64 bit value, unless we hit EOF.
512    * Assert that we never read partial values. */
513   SVN_ERR(svn_io_file_read_full2(proto_index, buffer, sizeof(buffer),
514                                  &bytes_read, eof, scratch_pool));
515   SVN_ERR_ASSERT((eof && *eof) || bytes_read == sizeof(buffer));
516 
517   /* If we did not hit EOF, reconstruct the uint64 value and return it. */
518   if (!eof || !*eof)
519     {
520       int i;
521       apr_uint64_t value;
522 
523       /* This could only overflow if CHAR_BIT had a value that is not
524        * a divisor of 64. */
525       value = 0;
526       for (i = sizeof(buffer) - 1; i >= 0; --i)
527         value = (value << CHAR_BIT) + buffer[i];
528 
529       *value_p = value;
530     }
531 
532   return SVN_NO_ERROR;
533 }
534 
535 /* Convenience function similar to read_uint64_from_proto_index, but returns
536  * an uint32 value in VALUE_P.  Return an error if the value does not fit.
537  */
538 static svn_error_t *
read_uint32_from_proto_index(apr_file_t * proto_index,apr_uint32_t * value_p,svn_boolean_t * eof,apr_pool_t * scratch_pool)539 read_uint32_from_proto_index(apr_file_t *proto_index,
540                              apr_uint32_t *value_p,
541                              svn_boolean_t *eof,
542                              apr_pool_t *scratch_pool)
543 {
544   apr_uint64_t value;
545   SVN_ERR(read_uint64_from_proto_index(proto_index, &value, eof,
546                                        scratch_pool));
547   if (!eof || !*eof)
548     {
549       if (value > APR_UINT32_MAX)
550         return svn_error_createf(SVN_ERR_FS_INDEX_OVERFLOW, NULL,
551                                  _("UINT32 0x%s too large, max = 0x%s"),
552                                  apr_psprintf(scratch_pool,
553                                               "%" APR_UINT64_T_HEX_FMT,
554                                               value),
555                                  apr_psprintf(scratch_pool,
556                                               "%" APR_UINT64_T_HEX_FMT,
557                                              (apr_uint64_t)APR_UINT32_MAX));
558 
559       /* This conversion is not lossy because the value can be represented
560        * in the target type. */
561       *value_p = (apr_uint32_t)value;
562     }
563 
564   return SVN_NO_ERROR;
565 }
566 
567 /* Convenience function similar to read_uint64_from_proto_index, but returns
568  * an off_t value in VALUE_P.  Return an error if the value does not fit.
569  */
570 static svn_error_t *
read_off_t_from_proto_index(apr_file_t * proto_index,apr_off_t * value_p,svn_boolean_t * eof,apr_pool_t * scratch_pool)571 read_off_t_from_proto_index(apr_file_t *proto_index,
572                             apr_off_t *value_p,
573                             svn_boolean_t *eof,
574                             apr_pool_t *scratch_pool)
575 {
576   apr_uint64_t value;
577   SVN_ERR(read_uint64_from_proto_index(proto_index, &value, eof,
578                                        scratch_pool));
579   if (!eof || !*eof)
580     {
581       if (value > off_t_max)
582         return svn_error_createf(SVN_ERR_FS_INDEX_OVERFLOW, NULL,
583                                 _("File offset 0x%s too large, max = 0x%s"),
584                                  apr_psprintf(scratch_pool,
585                                               "%" APR_UINT64_T_HEX_FMT,
586                                               value),
587                                  apr_psprintf(scratch_pool,
588                                               "%" APR_UINT64_T_HEX_FMT,
589                                               off_t_max));
590 
591       /* Shortening conversion from unsigned to signed int is well-defined
592        * and not lossy in C because the value can be represented in the
593        * target type. */
594       *value_p = (apr_off_t)value;
595     }
596 
597   return SVN_NO_ERROR;
598 }
599 
600 /*
601  * log-to-phys index
602  */
603 svn_error_t *
svn_fs_x__l2p_proto_index_open(apr_file_t ** proto_index,const char * file_name,apr_pool_t * result_pool)604 svn_fs_x__l2p_proto_index_open(apr_file_t **proto_index,
605                                const char *file_name,
606                                apr_pool_t *result_pool)
607 {
608   SVN_ERR(svn_io_file_open(proto_index, file_name, APR_READ | APR_WRITE
609                            | APR_CREATE | APR_APPEND | APR_BUFFERED,
610                            APR_OS_DEFAULT, result_pool));
611 
612   return SVN_NO_ERROR;
613 }
614 
615 /* Append ENTRY to log-to-phys PROTO_INDEX file.
616  * Use SCRATCH_POOL for temporary allocations.
617  */
618 static svn_error_t *
write_l2p_entry_to_proto_index(apr_file_t * proto_index,l2p_proto_entry_t entry,apr_pool_t * scratch_pool)619 write_l2p_entry_to_proto_index(apr_file_t *proto_index,
620                                l2p_proto_entry_t entry,
621                                apr_pool_t *scratch_pool)
622 {
623   SVN_ERR(write_uint64_to_proto_index(proto_index, entry.offset,
624                                       scratch_pool));
625   SVN_ERR(write_uint64_to_proto_index(proto_index, entry.item_index,
626                                       scratch_pool));
627   SVN_ERR(write_uint64_to_proto_index(proto_index, entry.sub_item,
628                                       scratch_pool));
629 
630   return SVN_NO_ERROR;
631 }
632 
633 /* Read *ENTRY from log-to-phys PROTO_INDEX file and indicate end-of-file
634  * in *EOF, or error out in that case if EOF is NULL.  *ENTRY is in an
635  * undefined state if an end-of-file occurred.
636  * Use SCRATCH_POOL for temporary allocations.
637  */
638 static svn_error_t *
read_l2p_entry_from_proto_index(apr_file_t * proto_index,l2p_proto_entry_t * entry,svn_boolean_t * eof,apr_pool_t * scratch_pool)639 read_l2p_entry_from_proto_index(apr_file_t *proto_index,
640                                 l2p_proto_entry_t *entry,
641                                 svn_boolean_t *eof,
642                                 apr_pool_t *scratch_pool)
643 {
644   SVN_ERR(read_uint64_from_proto_index(proto_index, &entry->offset, eof,
645                                        scratch_pool));
646   SVN_ERR(read_uint64_from_proto_index(proto_index, &entry->item_index, eof,
647                                        scratch_pool));
648   SVN_ERR(read_uint32_from_proto_index(proto_index, &entry->sub_item, eof,
649                                        scratch_pool));
650 
651   return SVN_NO_ERROR;
652 }
653 
654 svn_error_t *
svn_fs_x__l2p_proto_index_add_revision(apr_file_t * proto_index,apr_pool_t * scratch_pool)655 svn_fs_x__l2p_proto_index_add_revision(apr_file_t *proto_index,
656                                        apr_pool_t *scratch_pool)
657 {
658   l2p_proto_entry_t entry = { 0 };
659   return svn_error_trace(write_l2p_entry_to_proto_index(proto_index, entry,
660                                                         scratch_pool));
661 }
662 
663 svn_error_t *
svn_fs_x__l2p_proto_index_add_entry(apr_file_t * proto_index,apr_off_t offset,apr_uint32_t sub_item,apr_uint64_t item_index,apr_pool_t * scratch_pool)664 svn_fs_x__l2p_proto_index_add_entry(apr_file_t *proto_index,
665                                     apr_off_t offset,
666                                     apr_uint32_t sub_item,
667                                     apr_uint64_t item_index,
668                                     apr_pool_t *scratch_pool)
669 {
670   l2p_proto_entry_t entry = { 0 };
671 
672   /* make sure the conversion to uint64 works */
673   SVN_ERR_ASSERT(offset >= -1);
674 
675   /* we support offset '-1' as a "not used" indication */
676   entry.offset = (apr_uint64_t)offset + 1;
677 
678   /* make sure we can use item_index as an array index when building the
679    * final index file */
680   SVN_ERR_ASSERT(item_index < UINT_MAX / 2);
681   entry.item_index = item_index;
682 
683   /* no limits on the container sub-item index */
684   entry.sub_item = sub_item;
685 
686   return svn_error_trace(write_l2p_entry_to_proto_index(proto_index, entry,
687                                                         scratch_pool));
688 }
689 
690 /* Encode VALUE as 7/8b into P and return the number of bytes written.
691  * This will be used when _writing_ packed data.  packed_stream_* is for
692  * read operations only.
693  */
694 static apr_size_t
encode_uint(unsigned char * p,apr_uint64_t value)695 encode_uint(unsigned char *p,
696             apr_uint64_t value)
697 {
698   unsigned char *start = p;
699   while (value >= 0x80)
700     {
701       *p = (unsigned char)((value % 0x80) + 0x80);
702       value /= 0x80;
703       ++p;
704     }
705 
706   *p = (unsigned char)(value % 0x80);
707   return (p - start) + 1;
708 }
709 
710 /* Encode VALUE as 7/8b into P and return the number of bytes written.
711  * This maps signed ints onto unsigned ones.
712  */
713 static apr_size_t
encode_int(unsigned char * p,apr_int64_t value)714 encode_int(unsigned char *p,
715            apr_int64_t value)
716 {
717   return encode_uint(p, (apr_uint64_t)(value < 0 ? -1 - 2*value : 2*value));
718 }
719 
720 /* Append VALUE to STREAM in 7/8b encoding.
721  */
722 static svn_error_t *
stream_write_encoded(svn_stream_t * stream,apr_uint64_t value)723 stream_write_encoded(svn_stream_t *stream,
724                      apr_uint64_t value)
725 {
726   unsigned char encoded[ENCODED_INT_LENGTH];
727 
728   apr_size_t len = encode_uint(encoded, value);
729   return svn_error_trace(svn_stream_write(stream, (char *)encoded, &len));
730 }
731 
732 /* Run-length-encode the uint64 numbers in ARRAY starting at index START
733  * up to but not including END.  All numbers must be > 0.
734  * Return the number of remaining entries in ARRAY after START.
735  */
736 static int
rle_array(apr_array_header_t * array,int start,int end)737 rle_array(apr_array_header_t *array,
738           int start,
739           int end)
740 {
741   int i;
742   int target = start;
743   for (i = start; i < end; ++i)
744     {
745       apr_uint64_t value = APR_ARRAY_IDX(array, i, apr_uint64_t);
746       assert(value > 0);
747 
748       if (value == 1)
749         {
750           int counter;
751           for (counter = 1; i + counter < end; ++counter)
752             if (APR_ARRAY_IDX(array, i + counter, apr_uint64_t) != 1)
753               break;
754 
755           if (--counter)
756             {
757               APR_ARRAY_IDX(array, target, apr_uint64_t) = 0;
758               APR_ARRAY_IDX(array, target + 1, apr_uint64_t) = counter;
759               target += 2;
760               i += counter;
761               continue;
762             }
763         }
764 
765       APR_ARRAY_IDX(array, target, apr_uint64_t) = value;
766       ++target;
767     }
768 
769   return target;
770 }
771 
772 /* Utility data structure describing an log-2-phys page entry.
773  * This is only used as a transient representation during index creation.
774  */
775 typedef struct l2p_page_entry_t
776 {
777   apr_uint64_t offset;
778   apr_uint32_t sub_item;
779 } l2p_page_entry_t;
780 
781 /* qsort-compatible compare function taking two l2p_page_entry_t and
782  * ordering them by offset.
783  */
784 static int
compare_l2p_entries_by_offset(const l2p_page_entry_t * lhs,const l2p_page_entry_t * rhs)785 compare_l2p_entries_by_offset(const l2p_page_entry_t *lhs,
786                               const l2p_page_entry_t *rhs)
787 {
788   return lhs->offset > rhs->offset ? 1
789                                    : lhs->offset == rhs->offset ? 0 : -1;
790 }
791 
792 /* Write the log-2-phys index page description for the l2p_page_entry_t
793  * array ENTRIES, starting with element START up to but not including END.
794  * Write the resulting representation into BUFFER.  Use SCRATCH_POOL for
795  * temporary allocations.
796  */
797 static svn_error_t *
encode_l2p_page(apr_array_header_t * entries,int start,int end,svn_spillbuf_t * buffer,apr_pool_t * scratch_pool)798 encode_l2p_page(apr_array_header_t *entries,
799                 int start,
800                 int end,
801                 svn_spillbuf_t *buffer,
802                 apr_pool_t *scratch_pool)
803 {
804   unsigned char encoded[ENCODED_INT_LENGTH];
805   apr_hash_t *containers = apr_hash_make(scratch_pool);
806   int count = end - start;
807   int container_count = 0;
808   apr_uint64_t last_offset = 0;
809   int i;
810 
811   apr_size_t data_size = count * sizeof(l2p_page_entry_t);
812   svn_stringbuf_t *container_offsets
813     = svn_stringbuf_create_ensure(count * 2, scratch_pool);
814 
815   /* SORTED: relevant items from ENTRIES, sorted by offset */
816   l2p_page_entry_t *sorted
817     = apr_pmemdup(scratch_pool,
818                   entries->elts + start * sizeof(l2p_page_entry_t),
819                   data_size);
820   qsort(sorted, end - start, sizeof(l2p_page_entry_t),
821         (int (*)(const void *, const void *))compare_l2p_entries_by_offset);
822 
823   /* identify container offsets and create container list */
824   for (i = 0; i < count; ++i)
825     {
826       /* skip "unused" entries */
827       if (sorted[i].offset == 0)
828         continue;
829 
830       /* offset already covered? */
831       if (i > 0 && sorted[i].offset == sorted[i-1].offset)
832         continue;
833 
834       /* is this a container item
835        * (appears more than once or accesses to sub-items other than 0)? */
836       if (   (i != count-1 && sorted[i].offset == sorted[i+1].offset)
837           || (sorted[i].sub_item != 0))
838         {
839           svn_stringbuf_appendbytes(container_offsets, (const char *)encoded,
840                                     encode_uint(encoded,   sorted[i].offset
841                                                          - last_offset));
842           last_offset = sorted[i].offset;
843           apr_hash_set(containers,
844                        &sorted[i].offset,
845                        sizeof(sorted[i].offset),
846                        (void *)(apr_uintptr_t)++container_count);
847         }
848     }
849 
850   /* write container list to BUFFER */
851   SVN_ERR(svn_spillbuf__write(buffer, (const char *)encoded,
852                               encode_uint(encoded, container_count),
853                               scratch_pool));
854   SVN_ERR(svn_spillbuf__write(buffer, (const char *)container_offsets->data,
855                               container_offsets->len, scratch_pool));
856 
857   /* encode items */
858   for (i = start; i < end; ++i)
859     {
860       l2p_page_entry_t *entry = &APR_ARRAY_IDX(entries, i, l2p_page_entry_t);
861       if (entry->offset == 0)
862         {
863           SVN_ERR(svn_spillbuf__write(buffer, "\0", 1, scratch_pool));
864         }
865       else
866         {
867           void *void_idx = apr_hash_get(containers, &entry->offset,
868                                         sizeof(entry->offset));
869           if (void_idx == NULL)
870             {
871               apr_uint64_t value = entry->offset + container_count;
872               SVN_ERR(svn_spillbuf__write(buffer, (const char *)encoded,
873                                           encode_uint(encoded, value),
874                                           scratch_pool));
875             }
876           else
877             {
878               apr_uintptr_t idx = (apr_uintptr_t)void_idx;
879               apr_uint64_t value = entry->sub_item;
880               SVN_ERR(svn_spillbuf__write(buffer, (const char *)encoded,
881                                           encode_uint(encoded, idx),
882                                           scratch_pool));
883               SVN_ERR(svn_spillbuf__write(buffer, (const char *)encoded,
884                                           encode_uint(encoded, value),
885                                           scratch_pool));
886             }
887         }
888     }
889 
890   return SVN_NO_ERROR;
891 }
892 
893 svn_error_t *
svn_fs_x__l2p_index_append(svn_checksum_t ** checksum,svn_fs_t * fs,apr_file_t * index_file,const char * proto_file_name,svn_revnum_t revision,apr_pool_t * result_pool,apr_pool_t * scratch_pool)894 svn_fs_x__l2p_index_append(svn_checksum_t **checksum,
895                            svn_fs_t *fs,
896                            apr_file_t *index_file,
897                            const char *proto_file_name,
898                            svn_revnum_t revision,
899                            apr_pool_t * result_pool,
900                            apr_pool_t *scratch_pool)
901 {
902   svn_fs_x__data_t *ffd = fs->fsap_data;
903   apr_file_t *proto_index = NULL;
904   svn_stream_t *stream;
905   int i;
906   int end;
907   apr_uint64_t entry;
908   svn_boolean_t eof = FALSE;
909 
910   int last_page_count = 0;          /* total page count at the start of
911                                        the current revision */
912 
913   /* temporary data structures that collect the data which will be moved
914      to the target file in a second step */
915   apr_pool_t *local_pool = svn_pool_create(scratch_pool);
916   apr_pool_t *iterpool = svn_pool_create(local_pool);
917   apr_array_header_t *page_counts
918     = apr_array_make(local_pool, 16, sizeof(apr_uint64_t));
919   apr_array_header_t *page_sizes
920     = apr_array_make(local_pool, 16, sizeof(apr_uint64_t));
921   apr_array_header_t *entry_counts
922     = apr_array_make(local_pool, 16, sizeof(apr_uint64_t));
923 
924   /* collect the item offsets and sub-item value for the current revision */
925   apr_array_header_t *entries
926     = apr_array_make(local_pool, 256, sizeof(l2p_page_entry_t));
927 
928   /* 64k blocks, spill after 16MB */
929   svn_spillbuf_t *buffer
930     = svn_spillbuf__create(0x10000, 0x1000000, local_pool);
931 
932   /* Paranoia check that makes later casting to int32 safe.
933    * The current implementation is limited to 2G entries per page. */
934   if (ffd->l2p_page_size > APR_INT32_MAX)
935     return svn_error_createf(SVN_ERR_FS_INDEX_OVERFLOW , NULL,
936                             _("L2P index page size  %s"
937                               " exceeds current limit of 2G entries"),
938                             apr_psprintf(local_pool, "%" APR_UINT64_T_FMT,
939                                          ffd->l2p_page_size));
940 
941   /* start at the beginning of the source file */
942   SVN_ERR(svn_io_file_open(&proto_index, proto_file_name,
943                            APR_READ | APR_CREATE | APR_BUFFERED,
944                            APR_OS_DEFAULT, local_pool));
945 
946   /* process all entries until we fail due to EOF */
947   for (entry = 0; !eof; ++entry)
948     {
949       l2p_proto_entry_t proto_entry;
950 
951       /* (attempt to) read the next entry from the source */
952       SVN_ERR(read_l2p_entry_from_proto_index(proto_index, &proto_entry,
953                                               &eof, local_pool));
954 
955       /* handle new revision */
956       if ((entry > 0 && proto_entry.offset == 0) || eof)
957         {
958           /* dump entries, grouped into pages */
959 
960           int entry_count = 0;
961           for (i = 0; i < entries->nelts; i += entry_count)
962             {
963               /* 1 page with up to L2P_PAGE_SIZE entries.
964                * fsfs.conf settings validation guarantees this to fit into
965                * our address space. */
966               apr_uint64_t last_buffer_size
967                 = (apr_uint64_t)svn_spillbuf__get_size(buffer);
968 
969               svn_pool_clear(iterpool);
970 
971               entry_count = ffd->l2p_page_size < entries->nelts - i
972                           ? (int)ffd->l2p_page_size
973                           : entries->nelts - i;
974               SVN_ERR(encode_l2p_page(entries, i, i + entry_count,
975                                       buffer, iterpool));
976 
977               APR_ARRAY_PUSH(entry_counts, apr_uint64_t) = entry_count;
978               APR_ARRAY_PUSH(page_sizes, apr_uint64_t)
979                 = svn_spillbuf__get_size(buffer) - last_buffer_size;
980             }
981 
982           apr_array_clear(entries);
983 
984           /* store the number of pages in this revision */
985           APR_ARRAY_PUSH(page_counts, apr_uint64_t)
986             = page_sizes->nelts - last_page_count;
987 
988           last_page_count = page_sizes->nelts;
989         }
990       else
991         {
992           int idx;
993 
994           /* store the mapping in our array */
995           l2p_page_entry_t page_entry = { 0 };
996 
997           if (proto_entry.item_index > APR_INT32_MAX)
998             return svn_error_createf(SVN_ERR_FS_INDEX_OVERFLOW , NULL,
999                                     _("Item index %s too large "
1000                                       "in l2p proto index for revision %ld"),
1001                                     apr_psprintf(local_pool,
1002                                                  "%" APR_UINT64_T_FMT,
1003                                                 proto_entry.item_index),
1004                                     revision + page_counts->nelts);
1005 
1006           idx = (int)proto_entry.item_index;
1007           while (idx >= entries->nelts)
1008             APR_ARRAY_PUSH(entries, l2p_page_entry_t) = page_entry;
1009 
1010           page_entry.offset = proto_entry.offset;
1011           page_entry.sub_item = proto_entry.sub_item;
1012           APR_ARRAY_IDX(entries, idx, l2p_page_entry_t) = page_entry;
1013         }
1014     }
1015 
1016   /* we are now done with the source file */
1017   SVN_ERR(svn_io_file_close(proto_index, local_pool));
1018 
1019   /* Paranoia check that makes later casting to int32 safe.
1020    * The current implementation is limited to 2G pages per index. */
1021   if (page_counts->nelts > APR_INT32_MAX)
1022     return svn_error_createf(SVN_ERR_FS_INDEX_OVERFLOW , NULL,
1023                             _("L2P index page count  %d"
1024                               " exceeds current limit of 2G pages"),
1025                             page_counts->nelts);
1026 
1027   /* open target stream. */
1028   stream = svn_stream_checksummed2(svn_stream_from_aprfile2(index_file, TRUE,
1029                                                             local_pool),
1030                                    NULL, checksum, svn_checksum_md5, FALSE,
1031                                    result_pool);
1032 
1033 
1034   /* write header info */
1035   SVN_ERR(svn_stream_puts(stream, SVN_FS_X__L2P_STREAM_PREFIX));
1036   SVN_ERR(stream_write_encoded(stream, revision));
1037   SVN_ERR(stream_write_encoded(stream, page_counts->nelts));
1038   SVN_ERR(stream_write_encoded(stream, ffd->l2p_page_size));
1039   SVN_ERR(stream_write_encoded(stream, page_sizes->nelts));
1040 
1041   /* write the revision table */
1042   end = rle_array(page_counts, 0, page_counts->nelts);
1043   for (i = 0; i < end; ++i)
1044     {
1045       apr_uint64_t value = APR_ARRAY_IDX(page_counts, i, apr_uint64_t);
1046       SVN_ERR(stream_write_encoded(stream, value));
1047     }
1048 
1049   /* write the page table */
1050   for (i = 0; i < page_sizes->nelts; ++i)
1051     {
1052       apr_uint64_t value = APR_ARRAY_IDX(page_sizes, i, apr_uint64_t);
1053       SVN_ERR(stream_write_encoded(stream, value));
1054       value = APR_ARRAY_IDX(entry_counts, i, apr_uint64_t);
1055       SVN_ERR(stream_write_encoded(stream, value));
1056     }
1057 
1058   /* append page contents and implicitly close STREAM */
1059   SVN_ERR(svn_stream_copy3(svn_stream__from_spillbuf(buffer, local_pool),
1060                            stream, NULL, NULL, local_pool));
1061 
1062   svn_pool_destroy(local_pool);
1063 
1064   return SVN_NO_ERROR;
1065 }
1066 
1067 /* Return the base revision used to identify the p2l or lp2 index covering
1068  * REVISION in FS.
1069  */
1070 static svn_revnum_t
base_revision(svn_fs_t * fs,svn_revnum_t revision)1071 base_revision(svn_fs_t *fs,
1072               svn_revnum_t revision)
1073 {
1074   svn_fs_x__data_t *ffd = fs->fsap_data;
1075   return svn_fs_x__is_packed_rev(fs, revision)
1076        ? revision - (revision % ffd->max_files_per_dir)
1077        : revision;
1078 }
1079 
1080 /* Data structure that describes which l2p page info shall be extracted
1081  * from the cache and contains the fields that receive the result.
1082  */
1083 typedef struct l2p_page_info_baton_t
1084 {
1085   /* input data: we want the page covering (REVISION,ITEM_INDEX) */
1086   svn_revnum_t revision;
1087   apr_uint64_t item_index;
1088 
1089   /* out data */
1090   /* page location and size of the page within the l2p index file */
1091   l2p_page_table_entry_t entry;
1092 
1093   /* page number within the pages for REVISION (not l2p index global!) */
1094   apr_uint32_t page_no;
1095 
1096   /* offset of ITEM_INDEX within that page */
1097   apr_uint32_t page_offset;
1098 
1099   /* revision identifying the l2p index file, also the first rev in that */
1100   svn_revnum_t first_revision;
1101 } l2p_page_info_baton_t;
1102 
1103 
1104 /* Utility function that copies the info requested by BATON->REVISION and
1105  * BATON->ITEM_INDEX and from HEADER and PAGE_TABLE into the output fields
1106  * of *BATON.  Use SCRATCH_POOL for temporary allocations.
1107  */
1108 static svn_error_t *
l2p_header_copy(l2p_page_info_baton_t * baton,const l2p_header_t * header,const l2p_page_table_entry_t * page_table,const apr_size_t * page_table_index,apr_pool_t * scratch_pool)1109 l2p_header_copy(l2p_page_info_baton_t *baton,
1110                 const l2p_header_t *header,
1111                 const l2p_page_table_entry_t *page_table,
1112                 const apr_size_t *page_table_index,
1113                 apr_pool_t *scratch_pool)
1114 {
1115   /* revision offset within the index file */
1116   apr_size_t rel_revision = baton->revision - header->first_revision;
1117   if (rel_revision >= header->revision_count)
1118     return svn_error_createf(SVN_ERR_FS_INDEX_REVISION , NULL,
1119                              _("Revision %ld not covered by item index"),
1120                              baton->revision);
1121 
1122   /* select the relevant page */
1123   if (baton->item_index < header->page_size)
1124     {
1125       /* most revs fit well into a single page */
1126       baton->page_offset = (apr_uint32_t)baton->item_index;
1127       baton->page_no = 0;
1128       baton->entry = page_table[page_table_index[rel_revision]];
1129     }
1130   else
1131     {
1132       const l2p_page_table_entry_t *first_entry;
1133       const l2p_page_table_entry_t *last_entry;
1134       apr_uint64_t max_item_index;
1135 
1136       /* range of pages for this rev */
1137       first_entry = page_table + page_table_index[rel_revision];
1138       last_entry = page_table + page_table_index[rel_revision + 1];
1139 
1140       /* do we hit a valid index page? */
1141       max_item_index =   (apr_uint64_t)header->page_size
1142                        * (last_entry - first_entry);
1143       if (baton->item_index >= max_item_index)
1144         return svn_error_createf(SVN_ERR_FS_INDEX_OVERFLOW , NULL,
1145                                 _("Item index %s exceeds l2p limit "
1146                                   "of %s for revision %ld"),
1147                                 apr_psprintf(scratch_pool,
1148                                              "%" APR_UINT64_T_FMT,
1149                                              baton->item_index),
1150                                 apr_psprintf(scratch_pool,
1151                                              "%" APR_UINT64_T_FMT,
1152                                              max_item_index),
1153                                 baton->revision);
1154 
1155       /* all pages are of the same size and full, except for the last one */
1156       baton->page_offset = (apr_uint32_t)(baton->item_index % header->page_size);
1157       baton->page_no = (apr_uint32_t)(baton->item_index / header->page_size);
1158       baton->entry = first_entry[baton->page_no];
1159     }
1160 
1161   baton->first_revision = header->first_revision;
1162 
1163   return SVN_NO_ERROR;
1164 }
1165 
1166 /* Implement svn_cache__partial_getter_func_t: copy the data requested in
1167  * l2p_page_info_baton_t *BATON from l2p_header_t *DATA into the output
1168  * fields in *BATON.
1169  */
1170 static svn_error_t *
l2p_header_access_func(void ** out,const void * data,apr_size_t data_len,void * baton,apr_pool_t * result_pool)1171 l2p_header_access_func(void **out,
1172                        const void *data,
1173                        apr_size_t data_len,
1174                        void *baton,
1175                        apr_pool_t *result_pool)
1176 {
1177   /* resolve all pointer values of in-cache data */
1178   const l2p_header_t *header = data;
1179   const l2p_page_table_entry_t *page_table
1180     = svn_temp_deserializer__ptr(header,
1181                                  (const void *const *)&header->page_table);
1182   const apr_size_t *page_table_index
1183     = svn_temp_deserializer__ptr(header,
1184                            (const void *const *)&header->page_table_index);
1185 
1186   /* copy the info */
1187   return l2p_header_copy(baton, header, page_table, page_table_index,
1188                          result_pool);
1189 }
1190 
1191 /* Read COUNT run-length-encoded (see rle_array) uint64 from STREAM and
1192  * return them in VALUES.
1193  */
1194 static svn_error_t *
expand_rle(apr_array_header_t * values,svn_fs_x__packed_number_stream_t * stream,apr_size_t count)1195 expand_rle(apr_array_header_t *values,
1196            svn_fs_x__packed_number_stream_t *stream,
1197            apr_size_t count)
1198 {
1199   apr_array_clear(values);
1200 
1201   while (count)
1202     {
1203       apr_uint64_t value;
1204       SVN_ERR(packed_stream_get(&value, stream));
1205 
1206       if (value)
1207         {
1208           APR_ARRAY_PUSH(values, apr_uint64_t) = value;
1209           --count;
1210         }
1211       else
1212         {
1213           apr_uint64_t i;
1214           apr_uint64_t repetitions;
1215           SVN_ERR(packed_stream_get(&repetitions, stream));
1216           if (++repetitions > count)
1217             repetitions = count;
1218 
1219           for (i = 0; i < repetitions; ++i)
1220             APR_ARRAY_PUSH(values, apr_uint64_t) = 1;
1221 
1222           count -= repetitions;
1223         }
1224     }
1225 
1226   return SVN_NO_ERROR;
1227 }
1228 
1229 /* Read the header data structure of the log-to-phys index for REVISION
1230  * in FS and return it in *HEADER, allocated in RESULT_POOL.  Use REV_FILE
1231  * to access on-disk data.  Use SCRATCH_POOL for temporary allocations.
1232  */
1233 static svn_error_t *
get_l2p_header_body(l2p_header_t ** header,svn_fs_x__revision_file_t * rev_file,svn_fs_t * fs,svn_revnum_t revision,apr_pool_t * result_pool,apr_pool_t * scratch_pool)1234 get_l2p_header_body(l2p_header_t **header,
1235                     svn_fs_x__revision_file_t *rev_file,
1236                     svn_fs_t *fs,
1237                     svn_revnum_t revision,
1238                     apr_pool_t *result_pool,
1239                     apr_pool_t *scratch_pool)
1240 {
1241   svn_fs_x__data_t *ffd = fs->fsap_data;
1242   apr_uint64_t value;
1243   apr_size_t i;
1244   apr_size_t page, page_count;
1245   apr_off_t offset;
1246   l2p_header_t *result = apr_pcalloc(result_pool, sizeof(*result));
1247   apr_size_t page_table_index;
1248   svn_revnum_t next_rev;
1249   apr_array_header_t *expanded_values
1250     = apr_array_make(scratch_pool, 16, sizeof(apr_uint64_t));
1251   svn_fs_x__packed_number_stream_t *stream;
1252   svn_fs_x__rev_file_info_t file_info;
1253   svn_fs_x__index_info_t index_info;
1254 
1255   /* What to look for. */
1256   svn_fs_x__pair_cache_key_t key;
1257   SVN_ERR(svn_fs_x__rev_file_info(&file_info, rev_file));
1258   key.revision = file_info.start_revision;
1259   key.second = file_info.is_packed;
1260 
1261   /* Access the L2P index stream. */
1262   SVN_ERR(svn_fs_x__rev_file_l2p_index(&stream, rev_file));
1263   SVN_ERR(svn_fs_x__rev_file_l2p_info(&index_info, rev_file));
1264   packed_stream_seek(stream, 0);
1265 
1266   /* Read the table sizes.  Check the data for plausibility and
1267    * consistency with other bits. */
1268   SVN_ERR(packed_stream_get(&value, stream));
1269   result->first_revision = (svn_revnum_t)value;
1270   if (result->first_revision != file_info.start_revision)
1271     return svn_error_create(SVN_ERR_FS_INDEX_CORRUPTION, NULL,
1272                   _("Index rev / pack file revision numbers do not match"));
1273 
1274   SVN_ERR(packed_stream_get(&value, stream));
1275   result->revision_count = (int)value;
1276   if (   result->revision_count != 1
1277       && result->revision_count != (apr_uint64_t)ffd->max_files_per_dir)
1278     return svn_error_create(SVN_ERR_FS_INDEX_CORRUPTION, NULL,
1279                             _("Invalid number of revisions in L2P index"));
1280 
1281   SVN_ERR(packed_stream_get(&value, stream));
1282   result->page_size = (apr_uint32_t)value;
1283   if (!result->page_size || (result->page_size & (result->page_size - 1)))
1284     return svn_error_create(SVN_ERR_FS_INDEX_CORRUPTION, NULL,
1285                             _("L2P index page size is not a power of two"));
1286 
1287   SVN_ERR(packed_stream_get(&value, stream));
1288   page_count = (apr_size_t)value;
1289   if (page_count < result->revision_count)
1290     return svn_error_create(SVN_ERR_FS_INDEX_CORRUPTION, NULL,
1291                             _("Fewer L2P index pages than revisions"));
1292   if (page_count > (index_info.end - index_info.start) / 2)
1293     return svn_error_create(SVN_ERR_FS_INDEX_CORRUPTION, NULL,
1294                             _("L2P index page count implausibly large"));
1295 
1296   next_rev = result->first_revision + (svn_revnum_t)result->revision_count;
1297   if (result->first_revision > revision || next_rev <= revision)
1298     return svn_error_createf(SVN_ERR_FS_INDEX_CORRUPTION, NULL,
1299                       _("Corrupt L2P index for r%ld only covers r%ld:%ld"),
1300                       revision, result->first_revision, next_rev);
1301 
1302   /* allocate the page tables */
1303   result->page_table
1304     = apr_pcalloc(result_pool, page_count * sizeof(*result->page_table));
1305   result->page_table_index
1306     = apr_pcalloc(result_pool, (result->revision_count + 1)
1307                              * sizeof(*result->page_table_index));
1308 
1309   /* read per-revision page table sizes (i.e. number of pages per rev) */
1310   page_table_index = 0;
1311   result->page_table_index[0] = page_table_index;
1312   SVN_ERR(expand_rle(expanded_values, stream, result->revision_count));
1313   for (i = 0; i < result->revision_count; ++i)
1314     {
1315       value = (apr_size_t)APR_ARRAY_IDX(expanded_values, i, apr_uint64_t);
1316       if (value == 0)
1317         return svn_error_create(SVN_ERR_FS_INDEX_CORRUPTION, NULL,
1318                                 _("Revision with no L2P index pages"));
1319 
1320       page_table_index += (apr_size_t)value;
1321       if (page_table_index > page_count)
1322         return svn_error_create(SVN_ERR_FS_INDEX_CORRUPTION, NULL,
1323                                 _("L2P page table exceeded"));
1324 
1325       result->page_table_index[i+1] = page_table_index;
1326     }
1327 
1328   if (page_table_index != page_count)
1329     return svn_error_create(SVN_ERR_FS_INDEX_CORRUPTION, NULL,
1330                  _("Revisions do not cover the full L2P index page table"));
1331 
1332   /* read actual page tables */
1333   for (page = 0; page < page_count; ++page)
1334     {
1335       SVN_ERR(packed_stream_get(&value, stream));
1336       if (value == 0)
1337         return svn_error_create(SVN_ERR_FS_INDEX_CORRUPTION, NULL,
1338                                 _("Empty L2P index page"));
1339 
1340       result->page_table[page].size = (apr_uint32_t)value;
1341       SVN_ERR(packed_stream_get(&value, stream));
1342       if (value > result->page_size)
1343         return svn_error_create(SVN_ERR_FS_INDEX_CORRUPTION, NULL,
1344                                 _("Page exceeds L2P index page size"));
1345 
1346       result->page_table[page].entry_count = (apr_uint32_t)value;
1347     }
1348 
1349   /* correct the page description offsets */
1350   offset = packed_stream_offset(stream);
1351   for (page = 0; page < page_count; ++page)
1352     {
1353       result->page_table[page].offset = offset;
1354       offset += result->page_table[page].size;
1355     }
1356 
1357   /* return and cache the header */
1358   *header = result;
1359   SVN_ERR(svn_cache__set(ffd->l2p_header_cache, &key, result, scratch_pool));
1360 
1361   return SVN_NO_ERROR;
1362 }
1363 
1364 /* Get the page info requested in *BATON from FS and set the output fields
1365  * in *BATON.
1366  * To maximize efficiency, use or return the data stream in *STREAM.
1367  * Use SCRATCH_POOL for temporary allocations.
1368  */
1369 static svn_error_t *
get_l2p_page_info(l2p_page_info_baton_t * baton,svn_fs_x__revision_file_t * rev_file,svn_fs_t * fs,apr_pool_t * scratch_pool)1370 get_l2p_page_info(l2p_page_info_baton_t *baton,
1371                   svn_fs_x__revision_file_t *rev_file,
1372                   svn_fs_t *fs,
1373                   apr_pool_t *scratch_pool)
1374 {
1375   svn_fs_x__data_t *ffd = fs->fsap_data;
1376   l2p_header_t *result;
1377   svn_boolean_t is_cached = FALSE;
1378   void *dummy = NULL;
1379 
1380   /* try to find the info in the cache */
1381   svn_fs_x__pair_cache_key_t key;
1382   key.revision = base_revision(fs, baton->revision);
1383   key.second = svn_fs_x__is_packed_rev(fs, baton->revision);
1384   SVN_ERR(svn_cache__get_partial((void**)&dummy, &is_cached,
1385                                  ffd->l2p_header_cache, &key,
1386                                  l2p_header_access_func, baton,
1387                                  scratch_pool));
1388   if (is_cached)
1389     return SVN_NO_ERROR;
1390 
1391   /* read from disk, cache and copy the result */
1392   SVN_ERR(get_l2p_header_body(&result, rev_file, fs, baton->revision,
1393                               scratch_pool, scratch_pool));
1394   SVN_ERR(l2p_header_copy(baton, result, result->page_table,
1395                           result->page_table_index, scratch_pool));
1396 
1397   return SVN_NO_ERROR;
1398 }
1399 
1400 /* Read the log-to-phys header info of the index covering REVISION from FS
1401  * and return it in *HEADER.  REV_FILE provides the pack / rev status.
1402  * Allocate *HEADER in RESULT_POOL, use SCRATCH_POOL for temporary
1403  * allocations.
1404  */
1405 static svn_error_t *
get_l2p_header(l2p_header_t ** header,svn_fs_x__revision_file_t * rev_file,svn_fs_t * fs,svn_revnum_t revision,apr_pool_t * result_pool,apr_pool_t * scratch_pool)1406 get_l2p_header(l2p_header_t **header,
1407                svn_fs_x__revision_file_t *rev_file,
1408                svn_fs_t *fs,
1409                svn_revnum_t revision,
1410                apr_pool_t *result_pool,
1411                apr_pool_t *scratch_pool)
1412 {
1413   svn_fs_x__data_t *ffd = fs->fsap_data;
1414   svn_boolean_t is_cached = FALSE;
1415   svn_fs_x__rev_file_info_t file_info;
1416 
1417   /* first, try cache lookop */
1418   svn_fs_x__pair_cache_key_t key;
1419   SVN_ERR(svn_fs_x__rev_file_info(&file_info, rev_file));
1420   key.revision = file_info.start_revision;
1421   key.second = file_info.is_packed;
1422   SVN_ERR(svn_cache__get((void**)header, &is_cached, ffd->l2p_header_cache,
1423                          &key, result_pool));
1424   if (is_cached)
1425     return SVN_NO_ERROR;
1426 
1427   /* read from disk and cache the result */
1428   SVN_ERR(get_l2p_header_body(header, rev_file, fs, revision, result_pool,
1429                               scratch_pool));
1430 
1431   return SVN_NO_ERROR;
1432 }
1433 
1434 /* From the log-to-phys index in REV_FILE, read the mapping page identified
1435  * by TABLE_ENTRY and return it in *PAGE, allocated in RESULT_POOL.
1436  */
1437 static svn_error_t *
get_l2p_page(l2p_page_t ** page,svn_fs_x__revision_file_t * rev_file,l2p_page_table_entry_t * table_entry,apr_pool_t * result_pool)1438 get_l2p_page(l2p_page_t **page,
1439              svn_fs_x__revision_file_t *rev_file,
1440              l2p_page_table_entry_t *table_entry,
1441              apr_pool_t *result_pool)
1442 {
1443   apr_uint64_t value, last_value = 0;
1444   apr_uint32_t i;
1445   l2p_page_t *result = apr_pcalloc(result_pool, sizeof(*result));
1446   apr_uint64_t container_count;
1447   apr_off_t *container_offsets;
1448   svn_fs_x__packed_number_stream_t *stream;
1449 
1450   /* open index file and select page */
1451   SVN_ERR(svn_fs_x__rev_file_l2p_index(&stream, rev_file));
1452   packed_stream_seek(stream, table_entry->offset);
1453 
1454   /* initialize the page content */
1455   result->entry_count = table_entry->entry_count;
1456   result->offsets = apr_pcalloc(result_pool, result->entry_count
1457                                            * sizeof(*result->offsets));
1458   result->sub_items = apr_pcalloc(result_pool, result->entry_count
1459                                              * sizeof(*result->sub_items));
1460 
1461   /* container offsets array */
1462 
1463   SVN_ERR(packed_stream_get(&container_count, stream));
1464   container_offsets = apr_pcalloc(result_pool,
1465                                   container_count * sizeof(*result));
1466   for (i = 0; i < container_count; ++i)
1467     {
1468       SVN_ERR(packed_stream_get(&value, stream));
1469       last_value += value;
1470       container_offsets[i] = (apr_off_t)last_value - 1;
1471       /* '-1' is represented as '0' in the index file */
1472     }
1473 
1474   /* read all page entries (offsets in rev file and container sub-items) */
1475   for (i = 0; i < result->entry_count; ++i)
1476     {
1477       SVN_ERR(packed_stream_get(&value, stream));
1478       if (value == 0)
1479         {
1480           result->offsets[i] = -1;
1481           result->sub_items[i] = 0;
1482         }
1483       else if (value <= container_count)
1484         {
1485           result->offsets[i] = container_offsets[value - 1];
1486           SVN_ERR(packed_stream_get(&value, stream));
1487           result->sub_items[i] = (apr_uint32_t)value;
1488         }
1489       else
1490         {
1491           result->offsets[i] = (apr_off_t)(value - 1 - container_count);
1492           result->sub_items[i] = 0;
1493         }
1494     }
1495 
1496   /* After reading all page entries, the read cursor must have moved by
1497    * TABLE_ENTRY->SIZE bytes. */
1498   if (   packed_stream_offset(stream)
1499       != table_entry->offset + table_entry->size)
1500     return svn_error_create(SVN_ERR_FS_INDEX_CORRUPTION, NULL,
1501                 _("L2P actual page size does not match page table value."));
1502 
1503   *page = result;
1504 
1505   return SVN_NO_ERROR;
1506 }
1507 
1508 /* Request data structure for l2p_page_access_func.
1509  */
1510 typedef struct l2p_page_baton_t
1511 {
1512   /* in data */
1513   /* revision. Used for error messages only */
1514   svn_revnum_t revision;
1515 
1516   /* item index to look up. Used for error messages only */
1517   apr_uint64_t item_index;
1518 
1519   /* offset within the cached page */
1520   apr_uint32_t page_offset;
1521 
1522   /* out data */
1523   /* absolute item or container offset in rev / pack file */
1524   apr_off_t offset;
1525 
1526   /* 0 -> container / item itself; sub-item in container otherwise */
1527   apr_uint32_t sub_item;
1528 
1529 } l2p_page_baton_t;
1530 
1531 /* Return the rev / pack file offset of the item at BATON->PAGE_OFFSET in
1532  * OFFSETS of PAGE and write it to *OFFSET.
1533  * Allocate temporaries in SCRATCH_POOL.
1534  */
1535 static svn_error_t *
l2p_page_get_offset(l2p_page_baton_t * baton,const l2p_page_t * page,const apr_off_t * offsets,const apr_uint32_t * sub_items,apr_pool_t * scratch_pool)1536 l2p_page_get_offset(l2p_page_baton_t *baton,
1537                     const l2p_page_t *page,
1538                     const apr_off_t *offsets,
1539                     const apr_uint32_t *sub_items,
1540                     apr_pool_t *scratch_pool)
1541 {
1542   /* overflow check */
1543   if (page->entry_count <= baton->page_offset)
1544     return svn_error_createf(SVN_ERR_FS_INDEX_OVERFLOW , NULL,
1545                              _("Item index %s too large in"
1546                                " revision %ld"),
1547                              apr_psprintf(scratch_pool, "%" APR_UINT64_T_FMT,
1548                                           baton->item_index),
1549                              baton->revision);
1550 
1551   /* return the result */
1552   baton->offset = offsets[baton->page_offset];
1553   baton->sub_item = sub_items[baton->page_offset];
1554 
1555   return SVN_NO_ERROR;
1556 }
1557 
1558 /* Implement svn_cache__partial_getter_func_t: copy the data requested in
1559  * l2p_page_baton_t *BATON from l2p_page_t *DATA into apr_off_t *OUT.
1560  */
1561 static svn_error_t *
l2p_page_access_func(void ** out,const void * data,apr_size_t data_len,void * baton,apr_pool_t * result_pool)1562 l2p_page_access_func(void **out,
1563                      const void *data,
1564                      apr_size_t data_len,
1565                      void *baton,
1566                      apr_pool_t *result_pool)
1567 {
1568   /* resolve all in-cache pointers */
1569   const l2p_page_t *page = data;
1570   const apr_off_t *offsets
1571     = svn_temp_deserializer__ptr(page, (const void *const *)&page->offsets);
1572   const apr_uint32_t *sub_items
1573     = svn_temp_deserializer__ptr(page, (const void *const *)&page->sub_items);
1574 
1575   /* return the requested data */
1576   return l2p_page_get_offset(baton, page, offsets, sub_items, result_pool);
1577 }
1578 
1579 /* Data request structure used by l2p_page_table_access_func.
1580  */
1581 typedef struct l2p_page_table_baton_t
1582 {
1583   /* revision for which to read the page table */
1584   svn_revnum_t revision;
1585 
1586   /* page table entries (of type l2p_page_table_entry_t).
1587    * Must be created by caller and will be filled by callee. */
1588   apr_array_header_t *pages;
1589 } l2p_page_table_baton_t;
1590 
1591 /* Implement svn_cache__partial_getter_func_t: copy the data requested in
1592  * l2p_page_baton_t *BATON from l2p_page_t *DATA into BATON->PAGES and *OUT.
1593  */
1594 static svn_error_t *
l2p_page_table_access_func(void ** out,const void * data,apr_size_t data_len,void * baton,apr_pool_t * result_pool)1595 l2p_page_table_access_func(void **out,
1596                            const void *data,
1597                            apr_size_t data_len,
1598                            void *baton,
1599                            apr_pool_t *result_pool)
1600 {
1601   /* resolve in-cache pointers */
1602   l2p_page_table_baton_t *table_baton = baton;
1603   const l2p_header_t *header = (const l2p_header_t *)data;
1604   const l2p_page_table_entry_t *page_table
1605     = svn_temp_deserializer__ptr(header,
1606                                  (const void *const *)&header->page_table);
1607   const apr_size_t *page_table_index
1608     = svn_temp_deserializer__ptr(header,
1609                            (const void *const *)&header->page_table_index);
1610 
1611   /* copy the revision's page table into BATON */
1612   apr_size_t rel_revision = table_baton->revision - header->first_revision;
1613   if (rel_revision < header->revision_count)
1614     {
1615       const l2p_page_table_entry_t *entry
1616         = page_table + page_table_index[rel_revision];
1617       const l2p_page_table_entry_t *last_entry
1618         = page_table + page_table_index[rel_revision + 1];
1619 
1620       for (; entry < last_entry; ++entry)
1621         APR_ARRAY_PUSH(table_baton->pages, l2p_page_table_entry_t)
1622           = *entry;
1623     }
1624 
1625   /* set output as a courtesy to the caller */
1626   *out = table_baton->pages;
1627 
1628   return SVN_NO_ERROR;
1629 }
1630 
1631 /* Read the l2p index page table for REVISION in FS from cache and return
1632  * it in PAGES.  The later must be provided by the caller (and can be
1633  * re-used); existing entries will be removed before writing the result.
1634  * If the data cannot be found in the cache, the result will be empty
1635  * (it never can be empty for a valid REVISION if the data is cached).
1636  * Use the info from REV_FILE to determine pack / rev file properties.
1637  * Use SCRATCH_POOL for temporary allocations.
1638  */
1639 static svn_error_t *
get_l2p_page_table(apr_array_header_t * pages,svn_fs_t * fs,svn_revnum_t revision,apr_pool_t * scratch_pool)1640 get_l2p_page_table(apr_array_header_t *pages,
1641                    svn_fs_t *fs,
1642                    svn_revnum_t revision,
1643                    apr_pool_t *scratch_pool)
1644 {
1645   svn_fs_x__data_t *ffd = fs->fsap_data;
1646   svn_boolean_t is_cached = FALSE;
1647   l2p_page_table_baton_t baton;
1648 
1649   svn_fs_x__pair_cache_key_t key;
1650   key.revision = base_revision(fs, revision);
1651   key.second = svn_fs_x__is_packed_rev(fs, revision);
1652 
1653   apr_array_clear(pages);
1654   baton.revision = revision;
1655   baton.pages = pages;
1656   SVN_ERR(svn_cache__get_partial((void**)&pages, &is_cached,
1657                                  ffd->l2p_header_cache, &key,
1658                                  l2p_page_table_access_func, &baton,
1659                                  scratch_pool));
1660 
1661   return SVN_NO_ERROR;
1662 }
1663 
1664 /* Utility function.  Read the l2p index pages for REVISION in FS from
1665  * STREAM and put them into the cache.  Skip page number EXLCUDED_PAGE_NO
1666  * (use -1 for 'skip none') and pages outside the MIN_OFFSET, MAX_OFFSET
1667  * range in the l2p index file.  PAGES is a scratch container provided by
1668  * the caller.  SCRATCH_POOL is used for temporary allocations.
1669  *
1670  * This function may be a no-op if the header cache lookup fails / misses.
1671  */
1672 static svn_error_t *
prefetch_l2p_pages(svn_boolean_t * end,svn_fs_t * fs,svn_fs_x__revision_file_t * rev_file,svn_revnum_t revision,apr_array_header_t * pages,int exlcuded_page_no,apr_off_t min_offset,apr_off_t max_offset,apr_pool_t * scratch_pool)1673 prefetch_l2p_pages(svn_boolean_t *end,
1674                    svn_fs_t *fs,
1675                    svn_fs_x__revision_file_t *rev_file,
1676                    svn_revnum_t revision,
1677                    apr_array_header_t *pages,
1678                    int exlcuded_page_no,
1679                    apr_off_t min_offset,
1680                    apr_off_t max_offset,
1681                    apr_pool_t *scratch_pool)
1682 {
1683   svn_fs_x__data_t *ffd = fs->fsap_data;
1684   int i;
1685   apr_pool_t *iterpool;
1686   svn_fs_x__page_cache_key_t key = { 0 };
1687 
1688   /* Parameter check. */
1689   if (min_offset < 0)
1690     min_offset = 0;
1691 
1692   if (max_offset <= 0)
1693     {
1694       /* Nothing to do */
1695       *end = TRUE;
1696       return SVN_NO_ERROR;
1697     }
1698 
1699   /* get the page table for REVISION from cache */
1700   *end = FALSE;
1701   SVN_ERR(get_l2p_page_table(pages, fs, revision, scratch_pool));
1702   if (pages->nelts == 0)
1703     {
1704       /* not found -> we can't continue without hitting the disk again */
1705       *end = TRUE;
1706       return SVN_NO_ERROR;
1707     }
1708 
1709   /* prefetch pages individually until all are done or we found one in
1710    * the cache */
1711   iterpool = svn_pool_create(scratch_pool);
1712   assert(revision <= APR_UINT32_MAX);
1713   key.revision = (apr_uint32_t)revision;
1714   key.is_packed = svn_fs_x__is_packed_rev(fs, revision);
1715 
1716   for (i = 0; i < pages->nelts && !*end; ++i)
1717     {
1718       svn_boolean_t is_cached;
1719 
1720       l2p_page_table_entry_t *entry
1721         = &APR_ARRAY_IDX(pages, i, l2p_page_table_entry_t);
1722       svn_pool_clear(iterpool);
1723 
1724       if (i == exlcuded_page_no)
1725         continue;
1726 
1727       /* skip pages outside the specified index file range */
1728       if (   entry->offset < (apr_uint64_t)min_offset
1729           || entry->offset + entry->size > (apr_uint64_t)max_offset)
1730         {
1731           *end = TRUE;
1732           continue;
1733         }
1734 
1735       /* page already in cache? */
1736       key.page = i;
1737       SVN_ERR(svn_cache__has_key(&is_cached, ffd->l2p_page_cache,
1738                                  &key, iterpool));
1739       if (!is_cached)
1740         {
1741           /* no in cache -> read from stream (data already buffered in APR)
1742            * and cache the result */
1743           l2p_page_t *page = NULL;
1744           SVN_ERR(get_l2p_page(&page, rev_file, entry, iterpool));
1745 
1746           SVN_ERR(svn_cache__set(ffd->l2p_page_cache, &key, page,
1747                                  iterpool));
1748         }
1749     }
1750 
1751   svn_pool_destroy(iterpool);
1752 
1753   return SVN_NO_ERROR;
1754 }
1755 
1756 /* Using the log-to-phys indexes in FS, find the absolute offset in the
1757  * rev file for (REVISION, ITEM_INDEX) and return it in *OFFSET.
1758  * Use SCRATCH_POOL for temporary allocations.
1759  */
1760 static svn_error_t *
l2p_index_lookup(apr_off_t * offset,apr_uint32_t * sub_item,svn_fs_t * fs,svn_fs_x__revision_file_t * rev_file,svn_revnum_t revision,apr_uint64_t item_index,apr_pool_t * scratch_pool)1761 l2p_index_lookup(apr_off_t *offset,
1762                  apr_uint32_t *sub_item,
1763                  svn_fs_t *fs,
1764                  svn_fs_x__revision_file_t *rev_file,
1765                  svn_revnum_t revision,
1766                  apr_uint64_t item_index,
1767                  apr_pool_t *scratch_pool)
1768 {
1769   svn_fs_x__data_t *ffd = fs->fsap_data;
1770   l2p_page_info_baton_t info_baton;
1771   l2p_page_baton_t page_baton;
1772   l2p_page_t *page = NULL;
1773   svn_fs_x__page_cache_key_t key = { 0 };
1774   svn_boolean_t is_cached = FALSE;
1775   void *dummy = NULL;
1776 
1777   /* read index master data structure and extract the info required to
1778    * access the l2p index page for (REVISION,ITEM_INDEX)*/
1779   info_baton.revision = revision;
1780   info_baton.item_index = item_index;
1781   SVN_ERR(get_l2p_page_info(&info_baton, rev_file, fs, scratch_pool));
1782 
1783   /* try to find the page in the cache and get the OFFSET from it */
1784   page_baton.revision = revision;
1785   page_baton.item_index = item_index;
1786   page_baton.page_offset = info_baton.page_offset;
1787 
1788   assert(revision <= APR_UINT32_MAX);
1789   key.revision = (apr_uint32_t)revision;
1790   key.is_packed = svn_fs_x__is_packed_rev(fs, revision);
1791   key.page = info_baton.page_no;
1792 
1793   SVN_ERR(svn_cache__get_partial(&dummy, &is_cached,
1794                                  ffd->l2p_page_cache, &key,
1795                                  l2p_page_access_func, &page_baton,
1796                                  scratch_pool));
1797 
1798   if (!is_cached)
1799     {
1800       /* we need to read the info from disk (might already be in the
1801        * APR file buffer, though) */
1802       apr_array_header_t *pages;
1803       svn_revnum_t prefetch_revision;
1804       svn_revnum_t last_revision
1805         = info_baton.first_revision
1806           + svn_fs_x__pack_size(fs, info_baton.first_revision);
1807       apr_pool_t *iterpool = svn_pool_create(scratch_pool);
1808       svn_boolean_t end;
1809       apr_off_t max_offset
1810         = APR_ALIGN(info_baton.entry.offset + info_baton.entry.size,
1811                     ffd->block_size);
1812       apr_off_t min_offset = max_offset - ffd->block_size;
1813 
1814       /* read the relevant page */
1815       SVN_ERR(get_l2p_page(&page, rev_file, &info_baton.entry, scratch_pool));
1816 
1817       /* cache the page and extract the result we need */
1818       SVN_ERR(svn_cache__set(ffd->l2p_page_cache, &key, page, scratch_pool));
1819       SVN_ERR(l2p_page_get_offset(&page_baton, page, page->offsets,
1820                                   page->sub_items, scratch_pool));
1821 
1822       /* prefetch pages from following and preceding revisions */
1823       pages = apr_array_make(scratch_pool, 16,
1824                              sizeof(l2p_page_table_entry_t));
1825       end = FALSE;
1826       for (prefetch_revision = revision;
1827            prefetch_revision < last_revision && !end;
1828            ++prefetch_revision)
1829         {
1830           int excluded_page_no = prefetch_revision == revision
1831                                ? info_baton.page_no
1832                                : -1;
1833           svn_pool_clear(iterpool);
1834 
1835           SVN_ERR(prefetch_l2p_pages(&end, fs, rev_file,
1836                                      prefetch_revision, pages,
1837                                      excluded_page_no, min_offset,
1838                                      max_offset, iterpool));
1839         }
1840 
1841       end = FALSE;
1842       for (prefetch_revision = revision-1;
1843            prefetch_revision >= info_baton.first_revision && !end;
1844            --prefetch_revision)
1845         {
1846           svn_pool_clear(iterpool);
1847 
1848           SVN_ERR(prefetch_l2p_pages(&end, fs, rev_file,
1849                                      prefetch_revision, pages, -1,
1850                                      min_offset, max_offset, iterpool));
1851         }
1852 
1853       svn_pool_destroy(iterpool);
1854     }
1855 
1856   *offset = page_baton.offset;
1857   *sub_item = page_baton.sub_item;
1858 
1859   return SVN_NO_ERROR;
1860 }
1861 
1862 /* Using the log-to-phys proto index in transaction TXN_ID in FS, find the
1863  * absolute offset in the proto rev file for the given ITEM_INDEX and return
1864  * it in *OFFSET.  Use SCRATCH_POOL for temporary allocations.
1865  */
1866 static svn_error_t *
l2p_proto_index_lookup(apr_off_t * offset,apr_uint32_t * sub_item,svn_fs_t * fs,svn_fs_x__txn_id_t txn_id,apr_uint64_t item_index,apr_pool_t * scratch_pool)1867 l2p_proto_index_lookup(apr_off_t *offset,
1868                        apr_uint32_t *sub_item,
1869                        svn_fs_t *fs,
1870                        svn_fs_x__txn_id_t txn_id,
1871                        apr_uint64_t item_index,
1872                        apr_pool_t *scratch_pool)
1873 {
1874   svn_boolean_t eof = FALSE;
1875   apr_file_t *file = NULL;
1876   SVN_ERR(svn_io_file_open(&file,
1877                            svn_fs_x__path_l2p_proto_index(fs, txn_id,
1878                                                           scratch_pool),
1879                            APR_READ | APR_BUFFERED, APR_OS_DEFAULT,
1880                            scratch_pool));
1881 
1882   /* process all entries until we fail due to EOF */
1883   *offset = -1;
1884   while (!eof)
1885     {
1886       l2p_proto_entry_t entry;
1887 
1888       /* (attempt to) read the next entry from the source */
1889       SVN_ERR(read_l2p_entry_from_proto_index(file, &entry, &eof,
1890                                               scratch_pool));
1891 
1892       /* handle new revision */
1893       if (!eof && entry.item_index == item_index)
1894         {
1895           *offset = (apr_off_t)entry.offset - 1;
1896           *sub_item = entry.sub_item;
1897           break;
1898         }
1899     }
1900 
1901   SVN_ERR(svn_io_file_close(file, scratch_pool));
1902 
1903   return SVN_NO_ERROR;
1904 }
1905 
1906 svn_error_t *
svn_fs_x__l2p_get_max_ids(apr_array_header_t ** max_ids,svn_fs_t * fs,svn_revnum_t start_rev,apr_size_t count,apr_pool_t * result_pool,apr_pool_t * scratch_pool)1907 svn_fs_x__l2p_get_max_ids(apr_array_header_t **max_ids,
1908                           svn_fs_t *fs,
1909                           svn_revnum_t start_rev,
1910                           apr_size_t count,
1911                           apr_pool_t *result_pool,
1912                           apr_pool_t *scratch_pool)
1913 {
1914   l2p_header_t *header = NULL;
1915   svn_revnum_t revision;
1916   svn_revnum_t last_rev = (svn_revnum_t)(start_rev + count);
1917   svn_fs_x__revision_file_t *rev_file;
1918   apr_pool_t *header_pool = svn_pool_create(scratch_pool);
1919 
1920   /* read index master data structure for the index covering START_REV */
1921   SVN_ERR(svn_fs_x__rev_file_init(&rev_file, fs, start_rev, header_pool));
1922   SVN_ERR(get_l2p_header(&header, rev_file, fs, start_rev, header_pool,
1923                          header_pool));
1924   SVN_ERR(svn_fs_x__close_revision_file(rev_file));
1925 
1926   /* Determine the length of the item index list for each rev.
1927    * Read new index headers as required. */
1928   *max_ids = apr_array_make(result_pool, (int)count, sizeof(apr_uint64_t));
1929   for (revision = start_rev; revision < last_rev; ++revision)
1930     {
1931       apr_uint64_t full_page_count;
1932       apr_uint64_t item_count;
1933       apr_size_t first_page_index, last_page_index;
1934 
1935       if (revision - header->first_revision >= header->revision_count)
1936         {
1937           /* need to read the next index. Clear up memory used for the
1938            * previous one.  Note that intermittent pack runs do not change
1939            * the number of items in a revision, i.e. there is no consistency
1940            * issue here. */
1941           svn_pool_clear(header_pool);
1942           SVN_ERR(svn_fs_x__rev_file_init(&rev_file, fs, revision,
1943                                           header_pool));
1944           SVN_ERR(get_l2p_header(&header, rev_file, fs, revision,
1945                                  header_pool, header_pool));
1946           SVN_ERR(svn_fs_x__close_revision_file(rev_file));
1947         }
1948 
1949       /* in a revision with N index pages, the first N-1 index pages are
1950        * "full", i.e. contain HEADER->PAGE_SIZE entries */
1951       first_page_index
1952          = header->page_table_index[revision - header->first_revision];
1953       last_page_index
1954          = header->page_table_index[revision - header->first_revision + 1];
1955       full_page_count = last_page_index - first_page_index - 1;
1956       item_count = full_page_count * header->page_size
1957                  + header->page_table[last_page_index - 1].entry_count;
1958 
1959       APR_ARRAY_PUSH(*max_ids, apr_uint64_t) = item_count;
1960     }
1961 
1962   svn_pool_destroy(header_pool);
1963   return SVN_NO_ERROR;
1964 }
1965 
1966 /*
1967  * phys-to-log index
1968  */
1969 svn_fs_x__p2l_entry_t *
svn_fs_x__p2l_entry_dup(const svn_fs_x__p2l_entry_t * entry,apr_pool_t * result_pool)1970 svn_fs_x__p2l_entry_dup(const svn_fs_x__p2l_entry_t *entry,
1971                         apr_pool_t *result_pool)
1972 {
1973   svn_fs_x__p2l_entry_t *new_entry = apr_pmemdup(result_pool, entry,
1974                                                  sizeof(*new_entry));
1975   if (new_entry->item_count)
1976     new_entry->items = apr_pmemdup(result_pool,
1977                                    entry->items,
1978                                    entry->item_count * sizeof(*entry->items));
1979 
1980   return new_entry;
1981 }
1982 
1983 /*
1984  * phys-to-log index
1985  */
1986 svn_error_t *
svn_fs_x__p2l_proto_index_open(apr_file_t ** proto_index,const char * file_name,apr_pool_t * result_pool)1987 svn_fs_x__p2l_proto_index_open(apr_file_t **proto_index,
1988                                const char *file_name,
1989                                apr_pool_t *result_pool)
1990 {
1991   SVN_ERR(svn_io_file_open(proto_index, file_name, APR_READ | APR_WRITE
1992                            | APR_CREATE | APR_APPEND | APR_BUFFERED,
1993                            APR_OS_DEFAULT, result_pool));
1994 
1995   return SVN_NO_ERROR;
1996 }
1997 
1998 
1999 svn_error_t *
svn_fs_x__p2l_proto_index_add_entry(apr_file_t * proto_index,const svn_fs_x__p2l_entry_t * entry,apr_pool_t * scratch_pool)2000 svn_fs_x__p2l_proto_index_add_entry(apr_file_t *proto_index,
2001                                     const svn_fs_x__p2l_entry_t *entry,
2002                                     apr_pool_t *scratch_pool)
2003 {
2004   apr_uint64_t revision;
2005   apr_int32_t i;
2006 
2007   /* Make sure all signed elements of ENTRY have non-negative values.
2008    *
2009    * For file offsets and sizes, this is a given as we use them to describe
2010    * absolute positions and sizes.  For revisions, SVN_INVALID_REVNUM is
2011    * valid, hence we have to shift it by 1.
2012    */
2013   SVN_ERR_ASSERT(entry->offset >= 0);
2014   SVN_ERR_ASSERT(entry->size >= 0);
2015 
2016   /* Now, all values will nicely convert to uint64. */
2017   /* Make sure to keep P2L_PROTO_INDEX_ENTRY_SIZE consistent with this: */
2018 
2019   SVN_ERR(write_uint64_to_proto_index(proto_index, entry->offset,
2020                                       scratch_pool));
2021   SVN_ERR(write_uint64_to_proto_index(proto_index, entry->size,
2022                                       scratch_pool));
2023   SVN_ERR(write_uint64_to_proto_index(proto_index, entry->type,
2024                                       scratch_pool));
2025   SVN_ERR(write_uint64_to_proto_index(proto_index, entry->fnv1_checksum,
2026                                       scratch_pool));
2027   SVN_ERR(write_uint64_to_proto_index(proto_index, entry->item_count,
2028                                       scratch_pool));
2029 
2030   /* Add sub-items. */
2031   for (i = 0; i < entry->item_count; ++i)
2032     {
2033       const svn_fs_x__id_t *sub_item = &entry->items[i];
2034 
2035       /* Make sure all signed elements of ENTRY have non-negative values.
2036        *
2037        * For file offsets and sizes, this is a given as we use them to
2038        * describe absolute positions and sizes.  For revisions,
2039        * SVN_INVALID_REVNUM is valid, hence we have to shift it by 1.
2040       */
2041       SVN_ERR_ASSERT(   sub_item->change_set >= 0
2042                      || sub_item->change_set == SVN_INVALID_REVNUM);
2043 
2044       /* Write sub- record. */
2045       revision = sub_item->change_set == SVN_INVALID_REVNUM
2046                ? 0
2047                : ((apr_uint64_t)sub_item->change_set + 1);
2048 
2049       SVN_ERR(write_uint64_to_proto_index(proto_index, revision,
2050                                           scratch_pool));
2051       SVN_ERR(write_uint64_to_proto_index(proto_index, sub_item->number,
2052                                           scratch_pool));
2053     }
2054 
2055   /* Add trailer: rev / pack file offset of the next item */
2056   SVN_ERR(write_uint64_to_proto_index(proto_index,
2057                                       entry->offset + entry->size,
2058                                       scratch_pool));
2059 
2060   return SVN_NO_ERROR;
2061 }
2062 
2063 /* Read *ENTRY from log-to-phys PROTO_INDEX file and indicate end-of-file
2064  * in *EOF, or error out in that case if EOF is NULL.  *ENTRY is in an
2065  * undefined state if an end-of-file occurred.
2066  * Use SCRATCH_POOL for temporary allocations.
2067  */
2068 static svn_error_t *
read_p2l_entry_from_proto_index(apr_file_t * proto_index,svn_fs_x__p2l_entry_t * entry,svn_boolean_t * eof,apr_pool_t * scratch_pool)2069 read_p2l_entry_from_proto_index(apr_file_t *proto_index,
2070                                 svn_fs_x__p2l_entry_t *entry,
2071                                 svn_boolean_t *eof,
2072                                 apr_pool_t *scratch_pool)
2073 {
2074   SVN_ERR(read_off_t_from_proto_index(proto_index, &entry->offset,
2075                                       eof, scratch_pool));
2076   SVN_ERR(read_off_t_from_proto_index(proto_index, &entry->size,
2077                                       eof, scratch_pool));
2078   SVN_ERR(read_uint32_from_proto_index(proto_index, &entry->type,
2079                                        eof, scratch_pool));
2080   SVN_ERR(read_uint32_from_proto_index(proto_index, &entry->fnv1_checksum,
2081                                        eof, scratch_pool));
2082   SVN_ERR(read_uint32_from_proto_index(proto_index, &entry->item_count,
2083                                        eof, scratch_pool));
2084 
2085   return SVN_NO_ERROR;
2086 }
2087 
2088 static svn_error_t *
read_p2l_sub_items_from_proto_index(apr_file_t * proto_index,svn_fs_x__p2l_entry_t * entry,svn_boolean_t * eof,apr_pool_t * scratch_pool)2089 read_p2l_sub_items_from_proto_index(apr_file_t *proto_index,
2090                                     svn_fs_x__p2l_entry_t *entry,
2091                                     svn_boolean_t *eof,
2092                                     apr_pool_t *scratch_pool)
2093 {
2094   apr_int32_t i;
2095   for (i = 0; i < entry->item_count; ++i)
2096     {
2097       apr_uint64_t revision;
2098       svn_fs_x__id_t *sub_item = &entry->items[i];
2099 
2100       SVN_ERR(read_uint64_from_proto_index(proto_index, &revision,
2101                                            eof, scratch_pool));
2102       SVN_ERR(read_uint64_from_proto_index(proto_index, &sub_item->number,
2103                                            eof, scratch_pool));
2104 
2105       /* Do the inverse REVSION number conversion (see
2106        * svn_fs_x__p2l_proto_index_add_entry), if we actually read a
2107        * complete record.
2108       */
2109       if (!eof || !*eof)
2110         {
2111           /* Be careful with the arithmetics here (overflows and wrap-around):
2112            */
2113           if (revision > 0 && revision - 1 > LONG_MAX)
2114             return svn_error_createf(SVN_ERR_FS_INDEX_OVERFLOW, NULL,
2115                                      _("Revision 0x%s too large, max = 0x%s"),
2116                                      apr_psprintf(scratch_pool,
2117                                                   "%" APR_UINT64_T_FMT,
2118                                                   revision),
2119                                      apr_psprintf(scratch_pool,
2120                                                   "%" APR_UINT64_T_FMT,
2121                                                   (apr_uint64_t)LONG_MAX));
2122 
2123           /* Shortening conversion from unsigned to signed int is well-
2124            * defined and not lossy in C because the value can be represented
2125            * in the target type.  Also, cast to 'long' instead of
2126            * 'svn_revnum_t' here to provoke a compiler warning if those
2127            * types should differ and we would need to change the overflow
2128            * checking logic.
2129            */
2130           sub_item->change_set = revision == 0
2131                                ? SVN_INVALID_REVNUM
2132                                : (long)(revision - 1);
2133         }
2134 
2135     }
2136 
2137   return SVN_NO_ERROR;
2138 }
2139 
2140 svn_error_t *
svn_fs_x__p2l_proto_index_next_offset(apr_off_t * next_offset,apr_file_t * proto_index,apr_pool_t * scratch_pool)2141 svn_fs_x__p2l_proto_index_next_offset(apr_off_t *next_offset,
2142                                       apr_file_t *proto_index,
2143                                       apr_pool_t *scratch_pool)
2144 {
2145   apr_off_t offset = 0;
2146 
2147   /* Empty index file? */
2148   SVN_ERR(svn_io_file_seek(proto_index, APR_END, &offset, scratch_pool));
2149   if (offset == 0)
2150     {
2151       *next_offset = 0;
2152     }
2153   else
2154     {
2155       /* The last 64 bits contain the value we are looking for. */
2156       offset -= sizeof(apr_uint64_t);
2157       SVN_ERR(svn_io_file_seek(proto_index, APR_SET, &offset, scratch_pool));
2158       SVN_ERR(read_off_t_from_proto_index(proto_index, next_offset, NULL,
2159                                           scratch_pool));
2160     }
2161 
2162   return SVN_NO_ERROR;
2163 }
2164 
2165 svn_error_t *
svn_fs_x__p2l_index_append(svn_checksum_t ** checksum,svn_fs_t * fs,apr_file_t * index_file,const char * proto_file_name,svn_revnum_t revision,apr_pool_t * result_pool,apr_pool_t * scratch_pool)2166 svn_fs_x__p2l_index_append(svn_checksum_t **checksum,
2167                            svn_fs_t *fs,
2168                            apr_file_t *index_file,
2169                            const char *proto_file_name,
2170                            svn_revnum_t revision,
2171                            apr_pool_t *result_pool,
2172                            apr_pool_t *scratch_pool)
2173 {
2174   svn_fs_x__data_t *ffd = fs->fsap_data;
2175   apr_uint64_t page_size = ffd->p2l_page_size;
2176   apr_file_t *proto_index = NULL;
2177   svn_stream_t *stream;
2178   int i;
2179   apr_uint32_t sub_item;
2180   svn_boolean_t eof = FALSE;
2181   unsigned char encoded[ENCODED_INT_LENGTH];
2182 
2183   apr_uint64_t last_entry_end = 0;
2184   apr_uint64_t last_page_end = 0;
2185   apr_uint64_t last_buffer_size = 0;  /* byte offset in the spill buffer at
2186                                          the begin of the current revision */
2187   apr_uint64_t file_size = 0;
2188 
2189   /* temporary data structures that collect the data which will be moved
2190      to the target file in a second step */
2191   apr_pool_t *local_pool = svn_pool_create(scratch_pool);
2192   apr_array_header_t *table_sizes
2193      = apr_array_make(local_pool, 16, sizeof(apr_uint64_t));
2194 
2195   /* 64k blocks, spill after 16MB */
2196   svn_spillbuf_t *buffer
2197      = svn_spillbuf__create(0x10000, 0x1000000, local_pool);
2198 
2199   /* for loop temps ... */
2200   apr_pool_t *iterpool = svn_pool_create(scratch_pool);
2201 
2202   /* start at the beginning of the source file */
2203   SVN_ERR(svn_io_file_open(&proto_index, proto_file_name,
2204                            APR_READ | APR_CREATE | APR_BUFFERED,
2205                            APR_OS_DEFAULT, local_pool));
2206 
2207   /* process all entries until we fail due to EOF */
2208   while (!eof)
2209     {
2210       svn_fs_x__p2l_entry_t entry;
2211       apr_uint64_t entry_end;
2212       svn_boolean_t new_page = svn_spillbuf__get_size(buffer) == 0;
2213       svn_revnum_t last_revision = revision;
2214       apr_uint64_t last_number = 0;
2215 
2216       svn_pool_clear(iterpool);
2217 
2218       /* (attempt to) read the next entry from the source */
2219       SVN_ERR(read_p2l_entry_from_proto_index(proto_index, &entry,
2220                                               &eof, iterpool));
2221 
2222       if (entry.item_count && !eof)
2223         {
2224           entry.items = apr_palloc(iterpool,
2225                                    entry.item_count * sizeof(*entry.items));
2226           SVN_ERR(read_p2l_sub_items_from_proto_index(proto_index, &entry,
2227                                                       &eof, iterpool));
2228         }
2229 
2230       /* Read entry trailer. However, we won't need its content. */
2231       if (!eof)
2232         {
2233           apr_uint64_t trailer;
2234           SVN_ERR(read_uint64_from_proto_index(proto_index, &trailer, &eof,
2235                                                scratch_pool));
2236         }
2237 
2238       /* "unused" (and usually non-existent) section to cover the offsets
2239          at the end the of the last page. */
2240       if (eof)
2241         {
2242           file_size = last_entry_end;
2243 
2244           entry.offset = last_entry_end;
2245           entry.size = APR_ALIGN(entry.offset, page_size) - entry.offset;
2246           entry.type = SVN_FS_X__ITEM_TYPE_UNUSED;
2247           entry.fnv1_checksum = 0;
2248           entry.item_count = 0;
2249           entry.items = NULL;
2250         }
2251 
2252       for (sub_item = 0; sub_item < entry.item_count; ++sub_item)
2253         if (entry.items[sub_item].change_set == SVN_FS_X__INVALID_CHANGE_SET)
2254           entry.items[sub_item].change_set
2255             = svn_fs_x__change_set_by_rev(revision);
2256 
2257       /* end pages if entry is extending beyond their boundaries */
2258       entry_end = entry.offset + entry.size;
2259       while (entry_end - last_page_end > page_size)
2260         {
2261           apr_uint64_t buffer_size = svn_spillbuf__get_size(buffer);
2262           APR_ARRAY_PUSH(table_sizes, apr_uint64_t)
2263              = buffer_size - last_buffer_size;
2264 
2265           last_buffer_size = buffer_size;
2266           last_page_end += page_size;
2267           new_page = TRUE;
2268         }
2269 
2270       /* this entry starts a new table -> store its offset
2271          (all following entries in the same table will store sizes only) */
2272       if (new_page)
2273         {
2274           SVN_ERR(svn_spillbuf__write(buffer, (const char *)encoded,
2275                                       encode_uint(encoded, entry.offset),
2276                                       iterpool));
2277           last_revision = revision;
2278         }
2279 
2280       /* write simple item / container entry */
2281       SVN_ERR(svn_spillbuf__write(buffer, (const char *)encoded,
2282                                   encode_uint(encoded, entry.size),
2283                                   iterpool));
2284       SVN_ERR(svn_spillbuf__write(buffer, (const char *)encoded,
2285                                   encode_uint(encoded, entry.type
2286                                                      + entry.item_count * 16),
2287                                   iterpool));
2288       SVN_ERR(svn_spillbuf__write(buffer, (const char *)encoded,
2289                                   encode_uint(encoded, entry.fnv1_checksum),
2290                                   iterpool));
2291 
2292       /* container contents (only one for non-container items) */
2293       for (sub_item = 0; sub_item < entry.item_count; ++sub_item)
2294         {
2295           svn_revnum_t item_rev
2296             = svn_fs_x__get_revnum(entry.items[sub_item].change_set);
2297           apr_int64_t diff = item_rev - last_revision;
2298           SVN_ERR(svn_spillbuf__write(buffer, (const char *)encoded,
2299                                       encode_int(encoded, diff),
2300                                       iterpool));
2301           last_revision = item_rev;
2302         }
2303 
2304       for (sub_item = 0; sub_item < entry.item_count; ++sub_item)
2305         {
2306           apr_int64_t diff = entry.items[sub_item].number - last_number;
2307           SVN_ERR(svn_spillbuf__write(buffer, (const char *)encoded,
2308                                       encode_int(encoded, diff),
2309                                       iterpool));
2310           last_number = entry.items[sub_item].number;
2311         }
2312 
2313       last_entry_end = entry_end;
2314     }
2315 
2316   /* close the source file */
2317   SVN_ERR(svn_io_file_close(proto_index, local_pool));
2318 
2319   /* store length of last table */
2320   APR_ARRAY_PUSH(table_sizes, apr_uint64_t)
2321       = svn_spillbuf__get_size(buffer) - last_buffer_size;
2322 
2323   /* Open target stream. */
2324   stream = svn_stream_checksummed2(svn_stream_from_aprfile2(index_file, TRUE,
2325                                                             local_pool),
2326                                    NULL, checksum, svn_checksum_md5, FALSE,
2327                                    result_pool);
2328 
2329   /* write the start revision, file size and page size */
2330   SVN_ERR(svn_stream_puts(stream, SVN_FS_X__P2L_STREAM_PREFIX));
2331   SVN_ERR(stream_write_encoded(stream, revision));
2332   SVN_ERR(stream_write_encoded(stream, file_size));
2333   SVN_ERR(stream_write_encoded(stream, page_size));
2334 
2335   /* write the page table (actually, the sizes of each page description) */
2336   SVN_ERR(stream_write_encoded(stream, table_sizes->nelts));
2337   for (i = 0; i < table_sizes->nelts; ++i)
2338     {
2339       apr_uint64_t value = APR_ARRAY_IDX(table_sizes, i, apr_uint64_t);
2340       SVN_ERR(stream_write_encoded(stream, value));
2341     }
2342 
2343   /* append page contents and implicitly close STREAM */
2344   SVN_ERR(svn_stream_copy3(svn_stream__from_spillbuf(buffer, local_pool),
2345                            stream, NULL, NULL, local_pool));
2346 
2347   svn_pool_destroy(iterpool);
2348   svn_pool_destroy(local_pool);
2349 
2350   return SVN_NO_ERROR;
2351 }
2352 
2353 /* Data structure that describes which p2l page info shall be extracted
2354  * from the cache and contains the fields that receive the result.
2355  */
2356 typedef struct p2l_page_info_baton_t
2357 {
2358   /* input variables */
2359   /* revision identifying the index file */
2360   svn_revnum_t revision;
2361 
2362   /* offset within the page in rev / pack file */
2363   apr_off_t offset;
2364 
2365   /* output variables */
2366   /* page containing OFFSET */
2367   apr_size_t page_no;
2368 
2369   /* first revision in this p2l index */
2370   svn_revnum_t first_revision;
2371 
2372   /* offset within the p2l index file describing this page */
2373   apr_off_t start_offset;
2374 
2375   /* offset within the p2l index file describing the following page */
2376   apr_off_t next_offset;
2377 
2378   /* PAGE_NO * PAGE_SIZE (if <= OFFSET) */
2379   apr_off_t page_start;
2380 
2381   /* total number of pages indexed */
2382   apr_size_t page_count;
2383 
2384   /* size of each page in pack / rev file */
2385   apr_uint64_t page_size;
2386 } p2l_page_info_baton_t;
2387 
2388 /* From HEADER and the list of all OFFSETS, fill BATON with the page info
2389  * requested by BATON->OFFSET.
2390  */
2391 static void
p2l_page_info_copy(p2l_page_info_baton_t * baton,const p2l_header_t * header,const apr_off_t * offsets)2392 p2l_page_info_copy(p2l_page_info_baton_t *baton,
2393                    const p2l_header_t *header,
2394                    const apr_off_t *offsets)
2395 {
2396   /* if the requested offset is out of bounds, return info for
2397    * a zero-sized empty page right behind the last page.
2398    */
2399   if (baton->offset / header->page_size < header->page_count)
2400     {
2401       /* This cast is safe because the value is < header->page_count. */
2402       baton->page_no = (apr_size_t)(baton->offset / header->page_size);
2403       baton->start_offset = offsets[baton->page_no];
2404       baton->next_offset = offsets[baton->page_no + 1];
2405       baton->page_size = header->page_size;
2406     }
2407   else
2408     {
2409       /* Beyond the last page. */
2410       baton->page_no = header->page_count;
2411       baton->start_offset = offsets[baton->page_no];
2412       baton->next_offset = offsets[baton->page_no];
2413       baton->page_size = 0;
2414     }
2415 
2416   baton->first_revision = header->first_revision;
2417   baton->page_start = (apr_off_t)(header->page_size * baton->page_no);
2418   baton->page_count = header->page_count;
2419 }
2420 
2421 /* Implement svn_cache__partial_getter_func_t: extract the p2l page info
2422  * requested by BATON and return it in BATON.
2423  */
2424 static svn_error_t *
p2l_page_info_func(void ** out,const void * data,apr_size_t data_len,void * baton,apr_pool_t * result_pool)2425 p2l_page_info_func(void **out,
2426                    const void *data,
2427                    apr_size_t data_len,
2428                    void *baton,
2429                    apr_pool_t *result_pool)
2430 {
2431   /* all the pointers to cached data we need */
2432   const p2l_header_t *header = data;
2433   const apr_off_t *offsets
2434     = svn_temp_deserializer__ptr(header,
2435                                  (const void *const *)&header->offsets);
2436 
2437   /* copy data from cache to BATON */
2438   p2l_page_info_copy(baton, header, offsets);
2439   return SVN_NO_ERROR;
2440 }
2441 
2442 /* Read the header data structure of the phys-to-log index for REVISION in
2443  * FS and return it in *HEADER, allocated in RESULT_POOL. Use REV_FILE to
2444  * access on-disk data.  Use SCRATCH_POOL for temporary allocations.
2445  */
2446 static svn_error_t *
get_p2l_header(p2l_header_t ** header,svn_fs_x__revision_file_t * rev_file,svn_fs_t * fs,svn_revnum_t revision,apr_pool_t * result_pool,apr_pool_t * scratch_pool)2447 get_p2l_header(p2l_header_t **header,
2448                svn_fs_x__revision_file_t *rev_file,
2449                svn_fs_t *fs,
2450                svn_revnum_t revision,
2451                apr_pool_t *result_pool,
2452                apr_pool_t *scratch_pool)
2453 {
2454   svn_fs_x__data_t *ffd = fs->fsap_data;
2455   apr_uint64_t value;
2456   apr_size_t i;
2457   apr_off_t offset;
2458   p2l_header_t *result;
2459   svn_boolean_t is_cached = FALSE;
2460   svn_fs_x__packed_number_stream_t *stream;
2461   svn_fs_x__rev_file_info_t file_info;
2462   svn_fs_x__index_info_t l2p_index_info;
2463 
2464   /* look for the header data in our cache */
2465   svn_fs_x__pair_cache_key_t key;
2466   SVN_ERR(svn_fs_x__rev_file_info(&file_info, rev_file));
2467   key.revision = file_info.start_revision;
2468   key.second = file_info.is_packed;
2469 
2470   SVN_ERR(svn_cache__get((void**)header, &is_cached, ffd->p2l_header_cache,
2471                          &key, result_pool));
2472   if (is_cached)
2473     return SVN_NO_ERROR;
2474 
2475   /* not found -> must read it from disk.
2476    * Open index file or position read pointer to the begin of the file */
2477   SVN_ERR(svn_fs_x__rev_file_p2l_index(&stream, rev_file));
2478   SVN_ERR(svn_fs_x__rev_file_l2p_info(&l2p_index_info, rev_file));
2479   packed_stream_seek(stream, 0);
2480 
2481   /* allocate result data structure */
2482   result = apr_pcalloc(result_pool, sizeof(*result));
2483 
2484   /* Read table sizes, check them for plausibility and allocate page array. */
2485   SVN_ERR(packed_stream_get(&value, stream));
2486   result->first_revision = (svn_revnum_t)value;
2487   if (result->first_revision != file_info.start_revision)
2488     return svn_error_create(SVN_ERR_FS_INDEX_CORRUPTION, NULL,
2489                   _("Index rev / pack file revision numbers do not match"));
2490 
2491   SVN_ERR(packed_stream_get(&value, stream));
2492   result->file_size = value;
2493   if (result->file_size != (apr_uint64_t)l2p_index_info.start)
2494     return svn_error_create(SVN_ERR_FS_INDEX_CORRUPTION, NULL,
2495                    _("Index offset and rev / pack file size do not match"));
2496 
2497   SVN_ERR(packed_stream_get(&value, stream));
2498   result->page_size = value;
2499   if (!result->page_size || (result->page_size & (result->page_size - 1)))
2500     return svn_error_create(SVN_ERR_FS_INDEX_CORRUPTION, NULL,
2501                             _("P2L index page size is not a power of two"));
2502 
2503   SVN_ERR(packed_stream_get(&value, stream));
2504   result->page_count = (apr_size_t)value;
2505   if (result->page_count != (result->file_size - 1) / result->page_size + 1)
2506     return svn_error_create(SVN_ERR_FS_INDEX_CORRUPTION, NULL,
2507                    _("P2L page count does not match rev / pack file size"));
2508 
2509   result->offsets
2510     = apr_pcalloc(result_pool, (result->page_count + 1) * sizeof(*result->offsets));
2511 
2512   /* read page sizes and derive page description offsets from them */
2513   result->offsets[0] = 0;
2514   for (i = 0; i < result->page_count; ++i)
2515     {
2516       SVN_ERR(packed_stream_get(&value, stream));
2517       result->offsets[i+1] = result->offsets[i] + (apr_off_t)value;
2518     }
2519 
2520   /* correct the offset values */
2521   offset = packed_stream_offset(stream);
2522   for (i = 0; i <= result->page_count; ++i)
2523     result->offsets[i] += offset;
2524 
2525   /* cache the header data */
2526   SVN_ERR(svn_cache__set(ffd->p2l_header_cache, &key, result, scratch_pool));
2527 
2528   /* return the result */
2529   *header = result;
2530 
2531   return SVN_NO_ERROR;
2532 }
2533 
2534 /* Read the header data structure of the phys-to-log index for revision
2535  * BATON->REVISION in FS.  Return in *BATON all info relevant to read the
2536  * index page for the rev / pack file offset BATON->OFFSET.  Use REV_FILE
2537  * to access on-disk data.  Use SCRATCH_POOL for temporary allocations.
2538  */
2539 static svn_error_t *
get_p2l_page_info(p2l_page_info_baton_t * baton,svn_fs_x__revision_file_t * rev_file,svn_fs_t * fs,apr_pool_t * scratch_pool)2540 get_p2l_page_info(p2l_page_info_baton_t *baton,
2541                   svn_fs_x__revision_file_t *rev_file,
2542                   svn_fs_t *fs,
2543                   apr_pool_t *scratch_pool)
2544 {
2545   svn_fs_x__data_t *ffd = fs->fsap_data;
2546   p2l_header_t *header;
2547   svn_boolean_t is_cached = FALSE;
2548   void *dummy = NULL;
2549 
2550   /* look for the header data in our cache */
2551   svn_fs_x__pair_cache_key_t key;
2552   key.revision = base_revision(fs, baton->revision);
2553   key.second = svn_fs_x__is_packed_rev(fs, baton->revision);
2554 
2555   SVN_ERR(svn_cache__get_partial(&dummy, &is_cached, ffd->p2l_header_cache,
2556                                  &key, p2l_page_info_func, baton,
2557                                  scratch_pool));
2558   if (is_cached)
2559     return SVN_NO_ERROR;
2560 
2561   SVN_ERR(get_p2l_header(&header, rev_file, fs, baton->revision,
2562                          scratch_pool, scratch_pool));
2563 
2564   /* copy the requested info into *BATON */
2565   p2l_page_info_copy(baton, header, header->offsets);
2566 
2567   return SVN_NO_ERROR;
2568 }
2569 
2570 /* Read a mapping entry from the phys-to-log index STREAM and append it to
2571  * RESULT.  *ITEM_INDEX contains the phys offset for the entry and will
2572  * be moved forward by the size of entry.
2573  */
2574 static svn_error_t *
read_entry(svn_fs_x__packed_number_stream_t * stream,apr_off_t * item_offset,svn_revnum_t revision,apr_array_header_t * result)2575 read_entry(svn_fs_x__packed_number_stream_t *stream,
2576            apr_off_t *item_offset,
2577            svn_revnum_t revision,
2578            apr_array_header_t *result)
2579 {
2580   apr_uint64_t value;
2581   apr_uint64_t number = 0;
2582   apr_uint32_t sub_item;
2583 
2584   svn_fs_x__p2l_entry_t entry;
2585 
2586   entry.offset = *item_offset;
2587   SVN_ERR(packed_stream_get(&value, stream));
2588   entry.size = (apr_off_t)value;
2589   SVN_ERR(packed_stream_get(&value, stream));
2590   entry.type = (int)value % 16;
2591   entry.item_count = (apr_uint32_t)(value / 16);
2592 
2593   /* Verify item type. */
2594   if (entry.type > SVN_FS_X__ITEM_TYPE_REPS_CONT)
2595     return svn_error_create(SVN_ERR_FS_INDEX_CORRUPTION, NULL,
2596                             _("Invalid item type in P2L index"));
2597 
2598   SVN_ERR(packed_stream_get(&value, stream));
2599   entry.fnv1_checksum = (apr_uint32_t)value;
2600 
2601   /* Truncating the checksum to 32 bits may have hidden random data in the
2602    * unused extra bits of the on-disk representation (7/8 bit representation
2603    * uses 5 bytes on disk for the 32 bit value, leaving 3 bits unused). */
2604   if (value > APR_UINT32_MAX)
2605     return svn_error_create(SVN_ERR_FS_INDEX_CORRUPTION, NULL,
2606                             _("Invalid FNV1 checksum in P2L index"));
2607 
2608   /* Some of the index data for empty rev / pack file sections will not be
2609    * used during normal operation.  Thus, we have strict rules for the
2610    * contents of those unused fields. */
2611   if (entry.type == SVN_FS_X__ITEM_TYPE_UNUSED)
2612     if (   entry.fnv1_checksum != 0
2613         || entry.item_count != 0)
2614       return svn_error_create(SVN_ERR_FS_INDEX_CORRUPTION, NULL,
2615                  _("Unused regions must be empty and have checksum 0"));
2616 
2617   if (entry.item_count == 0)
2618     {
2619       entry.items = NULL;
2620     }
2621   else
2622     {
2623       entry.items
2624         = apr_pcalloc(result->pool, entry.item_count * sizeof(*entry.items));
2625 
2626       if (   entry.item_count > 1
2627           && entry.type < SVN_FS_X__ITEM_TYPE_CHANGES_CONT)
2628         return svn_error_create(SVN_ERR_FS_INDEX_CORRUPTION, NULL,
2629                       _("Only containers may have more than one sub-item"));
2630 
2631       for (sub_item = 0; sub_item < entry.item_count; ++sub_item)
2632         {
2633           SVN_ERR(packed_stream_get(&value, stream));
2634           revision += (svn_revnum_t)(value % 2 ? -1 - value / 2 : value / 2);
2635           entry.items[sub_item].change_set
2636             = svn_fs_x__change_set_by_rev(revision);
2637         }
2638 
2639       for (sub_item = 0; sub_item < entry.item_count; ++sub_item)
2640         {
2641           SVN_ERR(packed_stream_get(&value, stream));
2642           number += value % 2 ? -1 - value / 2 : value / 2;
2643           entry.items[sub_item].number = number;
2644 
2645           if (   (   entry.type == SVN_FS_X__ITEM_TYPE_CHANGES
2646                   || entry.type == SVN_FS_X__ITEM_TYPE_CHANGES_CONT)
2647               && number != SVN_FS_X__ITEM_INDEX_CHANGES)
2648             return svn_error_create(SVN_ERR_FS_INDEX_CORRUPTION, NULL,
2649                             _("Changed path list must have item number 1"));
2650         }
2651     }
2652 
2653   /* Corrupted SIZE values might cause arithmetic overflow.
2654    * The same can happen if you copy a repository from a system with 63 bit
2655    * file lengths to one with 31 bit file lengths. */
2656   if ((apr_uint64_t)entry.offset + (apr_uint64_t)entry.size > off_t_max)
2657     return svn_error_create(SVN_ERR_FS_INDEX_OVERFLOW , NULL,
2658                             _("P2L index entry size overflow."));
2659 
2660   APR_ARRAY_PUSH(result, svn_fs_x__p2l_entry_t) = entry;
2661   *item_offset += entry.size;
2662 
2663   return SVN_NO_ERROR;
2664 }
2665 
2666 /* Read the phys-to-log mappings for the cluster beginning at rev file
2667  * offset PAGE_START from the index for START_REVISION in FS.  The data
2668  * can be found in the index page beginning at START_OFFSET with the next
2669  * page beginning at NEXT_OFFSET.  PAGE_SIZE is the L2P index page size.
2670  * Return the relevant index entries in *ENTRIES.  Use REV_FILE to access
2671  * on-disk data.  Allocate *ENTRIES in RESULT_POOL.
2672  */
2673 static svn_error_t *
get_p2l_page(apr_array_header_t ** entries,svn_fs_x__revision_file_t * rev_file,svn_fs_t * fs,svn_revnum_t start_revision,apr_off_t start_offset,apr_off_t next_offset,apr_off_t page_start,apr_uint64_t page_size,apr_pool_t * result_pool)2674 get_p2l_page(apr_array_header_t **entries,
2675              svn_fs_x__revision_file_t *rev_file,
2676              svn_fs_t *fs,
2677              svn_revnum_t start_revision,
2678              apr_off_t start_offset,
2679              apr_off_t next_offset,
2680              apr_off_t page_start,
2681              apr_uint64_t page_size,
2682              apr_pool_t *result_pool)
2683 {
2684   apr_uint64_t value;
2685   apr_array_header_t *result
2686     = apr_array_make(result_pool, 16, sizeof(svn_fs_x__p2l_entry_t));
2687   apr_off_t item_offset;
2688   apr_off_t offset;
2689   svn_fs_x__packed_number_stream_t *stream;
2690 
2691   /* open index and navigate to page start */
2692   SVN_ERR(svn_fs_x__rev_file_p2l_index(&stream, rev_file));
2693   packed_stream_seek(stream, start_offset);
2694 
2695   /* read rev file offset of the first page entry (all page entries will
2696    * only store their sizes). */
2697   SVN_ERR(packed_stream_get(&value, stream));
2698   item_offset = (apr_off_t)value;
2699 
2700   /* Special case: empty pages. */
2701   if (start_offset == next_offset)
2702     {
2703       /* Empty page. This only happens if the first entry of the next page
2704        * also covers this page (and possibly more) completely. */
2705       SVN_ERR(read_entry(stream, &item_offset, start_revision, result));
2706     }
2707   else
2708     {
2709       /* Read non-empty page. */
2710       do
2711         {
2712           SVN_ERR(read_entry(stream, &item_offset, start_revision, result));
2713           offset = packed_stream_offset(stream);
2714         }
2715       while (offset < next_offset);
2716 
2717       /* We should now be exactly at the next offset, i.e. the numbers in
2718        * the stream cannot overlap into the next page description. */
2719       if (offset != next_offset)
2720         return svn_error_create(SVN_ERR_FS_INDEX_CORRUPTION, NULL,
2721              _("P2L page description overlaps with next page description"));
2722 
2723       /* if we haven't covered the cluster end yet, we must read the first
2724        * entry of the next page */
2725       if (item_offset < page_start + page_size)
2726         {
2727           SVN_ERR(packed_stream_get(&value, stream));
2728           item_offset = (apr_off_t)value;
2729           SVN_ERR(read_entry(stream, &item_offset,
2730                              start_revision, result));
2731         }
2732     }
2733 
2734   *entries = result;
2735 
2736   return SVN_NO_ERROR;
2737 }
2738 
2739 /* If it cannot be found in FS's caches, read the p2l index page selected
2740  * by BATON->OFFSET from *STREAM.  If the latter is yet to be constructed,
2741  * do so in STREAM_POOL.  Don't read the page if it precedes MIN_OFFSET.
2742  * Set *END to TRUE if the caller should stop refeching.
2743  *
2744  * *BATON will be updated with the selected page's info and SCRATCH_POOL
2745  * will be used for temporary allocations.  If the data is alread in the
2746  * cache, descrease *LEAKING_BUCKET and increase it otherwise.  With that
2747  * pattern we will still read all pages from the block even if some of
2748  * them survived in the cached.
2749  */
2750 static svn_error_t *
prefetch_p2l_page(svn_boolean_t * end,int * leaking_bucket,svn_fs_t * fs,svn_fs_x__revision_file_t * rev_file,p2l_page_info_baton_t * baton,apr_off_t min_offset,apr_pool_t * scratch_pool)2751 prefetch_p2l_page(svn_boolean_t *end,
2752                   int *leaking_bucket,
2753                   svn_fs_t *fs,
2754                   svn_fs_x__revision_file_t *rev_file,
2755                   p2l_page_info_baton_t *baton,
2756                   apr_off_t min_offset,
2757                   apr_pool_t *scratch_pool)
2758 {
2759   svn_fs_x__data_t *ffd = fs->fsap_data;
2760   svn_boolean_t already_cached;
2761   apr_array_header_t *page;
2762   svn_fs_x__page_cache_key_t key = { 0 };
2763 
2764   /* fetch the page info */
2765   *end = FALSE;
2766   baton->revision = baton->first_revision;
2767   SVN_ERR(get_p2l_page_info(baton, rev_file, fs, scratch_pool));
2768   if (baton->start_offset < min_offset)
2769     {
2770       /* page outside limits -> stop prefetching */
2771       *end = TRUE;
2772       return SVN_NO_ERROR;
2773     }
2774 
2775   /* do we have that page in our caches already? */
2776   assert(baton->first_revision <= APR_UINT32_MAX);
2777   key.revision = (apr_uint32_t)baton->first_revision;
2778   key.is_packed = svn_fs_x__is_packed_rev(fs, baton->first_revision);
2779   key.page = baton->page_no;
2780   SVN_ERR(svn_cache__has_key(&already_cached, ffd->p2l_page_cache,
2781                              &key, scratch_pool));
2782 
2783   /* yes, already cached */
2784   if (already_cached)
2785     {
2786       /* stop prefetching if most pages are already cached. */
2787       if (!--*leaking_bucket)
2788         *end = TRUE;
2789 
2790       return SVN_NO_ERROR;
2791     }
2792 
2793   ++*leaking_bucket;
2794 
2795   /* read from disk */
2796   SVN_ERR(get_p2l_page(&page, rev_file, fs,
2797                        baton->first_revision,
2798                        baton->start_offset,
2799                        baton->next_offset,
2800                        baton->page_start,
2801                        baton->page_size,
2802                        scratch_pool));
2803 
2804   /* and put it into our cache */
2805   SVN_ERR(svn_cache__set(ffd->p2l_page_cache, &key, page, scratch_pool));
2806 
2807   return SVN_NO_ERROR;
2808 }
2809 
2810 /* Lookup & construct the baton and key information that we will need for
2811  * a P2L page cache lookup.  We want the page covering OFFSET in the rev /
2812  * pack file containing REVSION in FS.  Return the results in *PAGE_INFO_P
2813  * and *KEY_P.  Read data through REV_FILE.  Use SCRATCH_POOL for temporary
2814  * allocations.
2815  */
2816 static svn_error_t *
get_p2l_keys(p2l_page_info_baton_t * page_info_p,svn_fs_x__page_cache_key_t * key_p,svn_fs_x__revision_file_t * rev_file,svn_fs_t * fs,svn_revnum_t revision,apr_off_t offset,apr_pool_t * scratch_pool)2817 get_p2l_keys(p2l_page_info_baton_t *page_info_p,
2818              svn_fs_x__page_cache_key_t *key_p,
2819              svn_fs_x__revision_file_t *rev_file,
2820              svn_fs_t *fs,
2821              svn_revnum_t revision,
2822              apr_off_t offset,
2823              apr_pool_t *scratch_pool)
2824 {
2825   p2l_page_info_baton_t page_info;
2826 
2827   /* request info for the index pages that describes the pack / rev file
2828    * contents at pack / rev file position OFFSET. */
2829   page_info.offset = offset;
2830   page_info.revision = revision;
2831   SVN_ERR(get_p2l_page_info(&page_info, rev_file, fs, scratch_pool));
2832 
2833   /* if the offset refers to a non-existent page, bail out */
2834   if (page_info.page_count <= page_info.page_no)
2835     return svn_error_createf(SVN_ERR_FS_INDEX_OVERFLOW , NULL,
2836                              _("Offset %s too large in revision %ld"),
2837                              apr_off_t_toa(scratch_pool, offset), revision);
2838 
2839   /* return results */
2840   if (page_info_p)
2841     *page_info_p = page_info;
2842 
2843   /* construct cache key */
2844   if (key_p)
2845     {
2846       svn_fs_x__page_cache_key_t key = { 0 };
2847       assert(page_info.first_revision <= APR_UINT32_MAX);
2848       key.revision = (apr_uint32_t)page_info.first_revision;
2849       key.is_packed = svn_fs_x__is_packed_rev(fs, revision);
2850       key.page = page_info.page_no;
2851 
2852       *key_p = key;
2853     }
2854 
2855   return SVN_NO_ERROR;
2856 }
2857 
2858 /* qsort-compatible compare function that compares the OFFSET of the
2859  * svn_fs_x__p2l_entry_t in *LHS with the apr_off_t in *RHS. */
2860 static int
compare_start_p2l_entry(const void * lhs,const void * rhs)2861 compare_start_p2l_entry(const void *lhs,
2862                         const void *rhs)
2863 {
2864   const svn_fs_x__p2l_entry_t *entry = lhs;
2865   apr_off_t start = *(const apr_off_t*)rhs;
2866   apr_off_t diff = entry->offset - start;
2867 
2868   /* restrict result to int */
2869   return diff < 0 ? -1 : (diff == 0 ? 0 : 1);
2870 }
2871 
2872 /* From the PAGE_ENTRIES array of svn_fs_x__p2l_entry_t, ordered
2873  * by their OFFSET member, copy all elements overlapping the range
2874  * [BLOCK_START, BLOCK_END) to ENTRIES.  If RESOLVE_PTR is set, the ITEMS
2875  * sub-array in each entry needs to be de-serialized. */
2876 static void
append_p2l_entries(apr_array_header_t * entries,apr_array_header_t * page_entries,apr_off_t block_start,apr_off_t block_end,svn_boolean_t resolve_ptr)2877 append_p2l_entries(apr_array_header_t *entries,
2878                    apr_array_header_t *page_entries,
2879                    apr_off_t block_start,
2880                    apr_off_t block_end,
2881                    svn_boolean_t resolve_ptr)
2882 {
2883   const svn_fs_x__p2l_entry_t *entry;
2884   int idx = svn_sort__bsearch_lower_bound(page_entries, &block_start,
2885                                           compare_start_p2l_entry);
2886 
2887   /* start at the first entry that overlaps with BLOCK_START */
2888   if (idx > 0)
2889     {
2890       entry = &APR_ARRAY_IDX(page_entries, idx - 1, svn_fs_x__p2l_entry_t);
2891       if (entry->offset + entry->size > block_start)
2892         --idx;
2893     }
2894 
2895   /* copy all entries covering the requested range */
2896   for ( ; idx < page_entries->nelts; ++idx)
2897     {
2898       svn_fs_x__p2l_entry_t *copy;
2899       entry = &APR_ARRAY_IDX(page_entries, idx, svn_fs_x__p2l_entry_t);
2900       if (entry->offset >= block_end)
2901         break;
2902 
2903       /* Copy the entry record. */
2904       copy = apr_array_push(entries);
2905       *copy = *entry;
2906 
2907       /* Copy the items of that entries. */
2908       if (entry->item_count)
2909         {
2910           const svn_fs_x__id_t *items
2911             = resolve_ptr
2912             ? svn_temp_deserializer__ptr(page_entries->elts,
2913                                          (const void * const *)&entry->items)
2914             : entry->items;
2915 
2916           copy->items = apr_pmemdup(entries->pool, items,
2917                                     entry->item_count * sizeof(*items));
2918         }
2919     }
2920 }
2921 
2922 /* Auxilliary struct passed to p2l_entries_func selecting the relevant
2923  * data range. */
2924 typedef struct p2l_entries_baton_t
2925 {
2926   apr_off_t start;
2927   apr_off_t end;
2928 } p2l_entries_baton_t;
2929 
2930 /* Implement svn_cache__partial_getter_func_t: extract p2l entries from
2931  * the page in DATA which overlap the p2l_entries_baton_t in BATON.
2932  * The target array is already provided in *OUT.
2933  */
2934 static svn_error_t *
p2l_entries_func(void ** out,const void * data,apr_size_t data_len,void * baton,apr_pool_t * result_pool)2935 p2l_entries_func(void **out,
2936                  const void *data,
2937                  apr_size_t data_len,
2938                  void *baton,
2939                  apr_pool_t *result_pool)
2940 {
2941   apr_array_header_t *entries = *(apr_array_header_t **)out;
2942   const apr_array_header_t *raw_page = data;
2943   p2l_entries_baton_t *block = baton;
2944 
2945   /* Make PAGE a readable APR array. */
2946   apr_array_header_t page = *raw_page;
2947   page.elts = (void *)svn_temp_deserializer__ptr(raw_page,
2948                                     (const void * const *)&raw_page->elts);
2949 
2950   /* append relevant information to result */
2951   append_p2l_entries(entries, &page, block->start, block->end, TRUE);
2952 
2953   return SVN_NO_ERROR;
2954 }
2955 
2956 
2957 /* Body of svn_fs_x__p2l_index_lookup.  However, do a single index page
2958  * lookup and append the result to the ENTRIES array provided by the caller.
2959  * Use successive calls to cover larger ranges.
2960  */
2961 static svn_error_t *
p2l_index_lookup(apr_array_header_t * entries,svn_fs_x__revision_file_t * rev_file,svn_fs_t * fs,svn_revnum_t revision,apr_off_t block_start,apr_off_t block_end,apr_pool_t * scratch_pool)2962 p2l_index_lookup(apr_array_header_t *entries,
2963                  svn_fs_x__revision_file_t *rev_file,
2964                  svn_fs_t *fs,
2965                  svn_revnum_t revision,
2966                  apr_off_t block_start,
2967                  apr_off_t block_end,
2968                  apr_pool_t *scratch_pool)
2969 {
2970   svn_fs_x__data_t *ffd = fs->fsap_data;
2971   svn_fs_x__page_cache_key_t key;
2972   svn_boolean_t is_cached = FALSE;
2973   p2l_page_info_baton_t page_info;
2974   apr_array_header_t *local_result = entries;
2975 
2976   /* baton selecting the relevant entries from the one page we access */
2977   p2l_entries_baton_t block;
2978   block.start = block_start;
2979   block.end = block_end;
2980 
2981   /* if we requested an empty range, the result would be empty */
2982   SVN_ERR_ASSERT(block_start < block_end);
2983 
2984   /* look for the fist page of the range in our cache */
2985   SVN_ERR(get_p2l_keys(&page_info, &key, rev_file, fs, revision, block_start,
2986                        scratch_pool));
2987   SVN_ERR(svn_cache__get_partial((void**)&local_result, &is_cached,
2988                                  ffd->p2l_page_cache, &key, p2l_entries_func,
2989                                  &block, scratch_pool));
2990 
2991   if (!is_cached)
2992     {
2993       svn_boolean_t end;
2994       apr_pool_t *iterpool = svn_pool_create(scratch_pool);
2995       apr_off_t original_page_start = page_info.page_start;
2996       int leaking_bucket = 4;
2997       p2l_page_info_baton_t prefetch_info = page_info;
2998       apr_array_header_t *page_entries;
2999 
3000       apr_off_t max_offset
3001         = APR_ALIGN(page_info.next_offset, ffd->block_size);
3002       apr_off_t min_offset
3003         = APR_ALIGN(page_info.start_offset, ffd->block_size) - ffd->block_size;
3004 
3005       /* Since we read index data in larger chunks, we probably got more
3006        * page data than we requested.  Parse & cache that until either we
3007        * encounter pages already cached or reach the end of the buffer.
3008        */
3009 
3010       /* pre-fetch preceding pages */
3011       end = FALSE;
3012       prefetch_info.offset = original_page_start;
3013       while (prefetch_info.offset >= prefetch_info.page_size && !end)
3014         {
3015           prefetch_info.offset -= prefetch_info.page_size;
3016           SVN_ERR(prefetch_p2l_page(&end, &leaking_bucket, fs, rev_file,
3017                                     &prefetch_info, min_offset, iterpool));
3018           svn_pool_clear(iterpool);
3019         }
3020 
3021       /* fetch page from disk and put it into the cache */
3022       SVN_ERR(get_p2l_page(&page_entries, rev_file, fs,
3023                            page_info.first_revision,
3024                            page_info.start_offset,
3025                            page_info.next_offset,
3026                            page_info.page_start,
3027                            page_info.page_size, iterpool));
3028 
3029       /* The last cache entry must not end beyond the range covered by
3030        * this index.  The same applies for any subset of entries. */
3031       if (page_entries->nelts)
3032         {
3033           const svn_fs_x__p2l_entry_t *entry
3034             = &APR_ARRAY_IDX(page_entries, page_entries->nelts - 1,
3035                              svn_fs_x__p2l_entry_t);
3036           if (  entry->offset + entry->size
3037               > page_info.page_size * page_info.page_count)
3038             return svn_error_createf(SVN_ERR_FS_INDEX_OVERFLOW , NULL,
3039                                      _("Last P2L index entry extends beyond "
3040                                        "the last page in revision %ld."),
3041                                      revision);
3042         }
3043 
3044       SVN_ERR(svn_cache__set(ffd->p2l_page_cache, &key, page_entries,
3045                              iterpool));
3046 
3047       /* append relevant information to result */
3048       append_p2l_entries(entries, page_entries, block_start, block_end, FALSE);
3049 
3050       /* pre-fetch following pages */
3051       end = FALSE;
3052       leaking_bucket = 4;
3053       prefetch_info = page_info;
3054       prefetch_info.offset = original_page_start;
3055       while (   prefetch_info.next_offset < max_offset
3056              && prefetch_info.page_no + 1 < prefetch_info.page_count
3057              && !end)
3058         {
3059           prefetch_info.offset += prefetch_info.page_size;
3060           SVN_ERR(prefetch_p2l_page(&end, &leaking_bucket, fs, rev_file,
3061                                     &prefetch_info, min_offset, iterpool));
3062           svn_pool_clear(iterpool);
3063         }
3064 
3065       svn_pool_destroy(iterpool);
3066     }
3067 
3068   /* We access a valid page (otherwise, we had seen an error in the
3069    * get_p2l_keys request).  Hence, at least one entry must be found. */
3070   SVN_ERR_ASSERT(entries->nelts > 0);
3071 
3072   /* Add an "unused" entry if it extends beyond the end of the data file.
3073    * Since the index page size might be smaller than the current data
3074    * read block size, the trailing "unused" entry in this index may not
3075    * fully cover the end of the last block. */
3076   if (page_info.page_no + 1 >= page_info.page_count)
3077     {
3078       svn_fs_x__p2l_entry_t *entry
3079         = &APR_ARRAY_IDX(entries, entries->nelts-1, svn_fs_x__p2l_entry_t);
3080 
3081       apr_off_t entry_end = entry->offset + entry->size;
3082       if (entry_end < block_end)
3083         {
3084           if (entry->type == SVN_FS_X__ITEM_TYPE_UNUSED)
3085             {
3086               /* extend the terminal filler */
3087               entry->size = block_end - entry->offset;
3088             }
3089           else
3090             {
3091               /* No terminal filler. Add one. */
3092               entry = apr_array_push(entries);
3093               entry->offset = entry_end;
3094               entry->size = block_end - entry_end;
3095               entry->type = SVN_FS_X__ITEM_TYPE_UNUSED;
3096               entry->fnv1_checksum = 0;
3097               entry->item_count = 0;
3098               entry->items = NULL;
3099             }
3100         }
3101     }
3102 
3103   return SVN_NO_ERROR;
3104 }
3105 
3106 svn_error_t *
svn_fs_x__p2l_index_lookup(apr_array_header_t ** entries,svn_fs_t * fs,svn_fs_x__revision_file_t * rev_file,svn_revnum_t revision,apr_off_t block_start,apr_off_t block_size,apr_pool_t * result_pool,apr_pool_t * scratch_pool)3107 svn_fs_x__p2l_index_lookup(apr_array_header_t **entries,
3108                            svn_fs_t *fs,
3109                            svn_fs_x__revision_file_t *rev_file,
3110                            svn_revnum_t revision,
3111                            apr_off_t block_start,
3112                            apr_off_t block_size,
3113                            apr_pool_t *result_pool,
3114                            apr_pool_t *scratch_pool)
3115 {
3116   apr_off_t block_end = block_start + block_size;
3117 
3118   /* the receiving container */
3119   int last_count = 0;
3120   apr_array_header_t *result = apr_array_make(result_pool, 16,
3121                                               sizeof(svn_fs_x__p2l_entry_t));
3122 
3123   /* Fetch entries page-by-page.  Since the p2l index is supposed to cover
3124    * every single byte in the rev / pack file - even unused sections -
3125    * every iteration must result in some progress. */
3126   while (block_start < block_end)
3127     {
3128       svn_fs_x__p2l_entry_t *entry;
3129       SVN_ERR(p2l_index_lookup(result, rev_file, fs, revision, block_start,
3130                                block_end, scratch_pool));
3131       SVN_ERR_ASSERT(result->nelts > 0);
3132 
3133       /* continue directly behind last item */
3134       entry = &APR_ARRAY_IDX(result, result->nelts-1, svn_fs_x__p2l_entry_t);
3135       block_start = entry->offset + entry->size;
3136 
3137       /* Some paranoia check.  Successive iterations should never return
3138        * duplicates but if it did, we might get into trouble later on. */
3139       if (last_count > 0 && last_count < result->nelts)
3140         {
3141            entry = &APR_ARRAY_IDX(result, last_count - 1,
3142                                   svn_fs_x__p2l_entry_t);
3143            SVN_ERR_ASSERT(APR_ARRAY_IDX(result, last_count,
3144                                         svn_fs_x__p2l_entry_t).offset
3145                           >= entry->offset + entry->size);
3146         }
3147 
3148       last_count = result->nelts;
3149     }
3150 
3151   *entries = result;
3152   return SVN_NO_ERROR;
3153 }
3154 
3155 /* compare_fn_t comparing a svn_fs_x__p2l_entry_t at LHS with an offset
3156  * RHS.
3157  */
3158 static int
compare_p2l_entry_offsets(const void * lhs,const void * rhs)3159 compare_p2l_entry_offsets(const void *lhs,
3160                           const void *rhs)
3161 {
3162   const svn_fs_x__p2l_entry_t *entry = (const svn_fs_x__p2l_entry_t *)lhs;
3163   apr_off_t offset = *(const apr_off_t *)rhs;
3164 
3165   return entry->offset < offset ? -1 : (entry->offset == offset ? 0 : 1);
3166 }
3167 
3168 /* Cached data extraction utility.  DATA is a P2L index page, e.g. an APR
3169  * array of svn_fs_x__p2l_entry_t elements.  Return the entry for the item,
3170  * allocated in RESULT_POOL, starting at OFFSET or NULL if that's not an
3171  * the start offset of any item. Use SCRATCH_POOL for temporary allocations.
3172  */
3173 static svn_fs_x__p2l_entry_t *
get_p2l_entry_from_cached_page(const void * data,apr_off_t offset,apr_pool_t * result_pool,apr_pool_t * scratch_pool)3174 get_p2l_entry_from_cached_page(const void *data,
3175                                apr_off_t offset,
3176                                apr_pool_t *result_pool,
3177                                apr_pool_t *scratch_pool)
3178 {
3179   /* resolve all pointer values of in-cache data */
3180   const apr_array_header_t *page = data;
3181   apr_array_header_t *entries = apr_pmemdup(scratch_pool, page,
3182                                             sizeof(*page));
3183   svn_fs_x__p2l_entry_t *entry;
3184 
3185   entries->elts = (char *)svn_temp_deserializer__ptr(page,
3186                                      (const void *const *)&page->elts);
3187 
3188   /* search of the offset we want */
3189   entry = svn_sort__array_lookup(entries, &offset, NULL,
3190       (int (*)(const void *, const void *))compare_p2l_entry_offsets);
3191 
3192   /* return it, if it is a perfect match */
3193   if (entry)
3194     {
3195       svn_fs_x__p2l_entry_t *result
3196         = apr_pmemdup(result_pool, entry, sizeof(*result));
3197       result->items
3198         = (svn_fs_x__id_t *)svn_temp_deserializer__ptr(entries->elts,
3199                                      (const void *const *)&entry->items);
3200       return result;
3201     }
3202 
3203   return NULL;
3204 }
3205 
3206 /* Implements svn_cache__partial_getter_func_t for P2L index pages, copying
3207  * the entry for the apr_off_t at BATON into *OUT.  *OUT will be NULL if
3208  * there is no matching entry in the index page at DATA.
3209  */
3210 static svn_error_t *
p2l_entry_lookup_func(void ** out,const void * data,apr_size_t data_len,void * baton,apr_pool_t * result_pool)3211 p2l_entry_lookup_func(void **out,
3212                       const void *data,
3213                       apr_size_t data_len,
3214                       void *baton,
3215                       apr_pool_t *result_pool)
3216 {
3217   svn_fs_x__p2l_entry_t *entry
3218     = get_p2l_entry_from_cached_page(data, *(apr_off_t *)baton, result_pool,
3219                                      result_pool);
3220 
3221   *out = entry && entry->offset == *(apr_off_t *)baton
3222        ? svn_fs_x__p2l_entry_dup(entry, result_pool)
3223        : NULL;
3224 
3225   return SVN_NO_ERROR;
3226 }
3227 
3228 static svn_error_t *
p2l_entry_lookup(svn_fs_x__p2l_entry_t ** entry_p,svn_fs_x__revision_file_t * rev_file,svn_fs_t * fs,svn_revnum_t revision,apr_off_t offset,apr_pool_t * result_pool,apr_pool_t * scratch_pool)3229 p2l_entry_lookup(svn_fs_x__p2l_entry_t **entry_p,
3230                  svn_fs_x__revision_file_t *rev_file,
3231                  svn_fs_t *fs,
3232                  svn_revnum_t revision,
3233                  apr_off_t offset,
3234                  apr_pool_t *result_pool,
3235                  apr_pool_t *scratch_pool)
3236 {
3237   svn_fs_x__data_t *ffd = fs->fsap_data;
3238   svn_fs_x__page_cache_key_t key = { 0 };
3239   svn_boolean_t is_cached = FALSE;
3240   p2l_page_info_baton_t page_info;
3241 
3242   /* look for this info in our cache */
3243   SVN_ERR(get_p2l_keys(&page_info, &key, rev_file, fs, revision, offset,
3244                        scratch_pool));
3245   SVN_ERR(svn_cache__get_partial((void**)entry_p, &is_cached,
3246                                  ffd->p2l_page_cache, &key,
3247                                  p2l_entry_lookup_func, &offset,
3248                                  result_pool));
3249   if (!is_cached)
3250     {
3251       /* do a standard index lookup.  This is will automatically prefetch
3252        * data to speed up future lookups. */
3253       apr_array_header_t *entries = apr_array_make(result_pool, 1,
3254                                                    sizeof(**entry_p));
3255       SVN_ERR(p2l_index_lookup(entries, rev_file, fs, revision, offset,
3256                                offset + 1, scratch_pool));
3257 
3258       /* Find the entry that we want. */
3259       *entry_p = svn_sort__array_lookup(entries, &offset, NULL,
3260           (int (*)(const void *, const void *))compare_p2l_entry_offsets);
3261     }
3262 
3263   return SVN_NO_ERROR;
3264 }
3265 
3266 svn_error_t *
svn_fs_x__p2l_entry_lookup(svn_fs_x__p2l_entry_t ** entry_p,svn_fs_t * fs,svn_fs_x__revision_file_t * rev_file,svn_revnum_t revision,apr_off_t offset,apr_pool_t * result_pool,apr_pool_t * scratch_pool)3267 svn_fs_x__p2l_entry_lookup(svn_fs_x__p2l_entry_t **entry_p,
3268                            svn_fs_t *fs,
3269                            svn_fs_x__revision_file_t *rev_file,
3270                            svn_revnum_t revision,
3271                            apr_off_t offset,
3272                            apr_pool_t *result_pool,
3273                            apr_pool_t *scratch_pool)
3274 {
3275   /* look for this info in our cache */
3276   SVN_ERR(p2l_entry_lookup(entry_p, rev_file, fs, revision, offset,
3277                            result_pool, scratch_pool));
3278 
3279   return SVN_NO_ERROR;
3280 }
3281 
3282 /* Baton structure for p2l_item_lookup_func.  It describes which sub_item
3283  * info shall be returned.
3284  */
3285 typedef struct p2l_item_lookup_baton_t
3286 {
3287   /* file offset to find the P2L index entry for */
3288   apr_off_t offset;
3289 
3290   /* return the sub-item at this position within that entry */
3291   apr_uint32_t sub_item;
3292 } p2l_item_lookup_baton_t;
3293 
3294 /* Implements svn_cache__partial_getter_func_t for P2L index pages, copying
3295  * the svn_fs_x__id_t for the item described 2l_item_lookup_baton_t
3296  * *BATON.  *OUT will be NULL if there is no matching index entry or the
3297  * sub-item is out of range.
3298  */
3299 static svn_error_t *
p2l_item_lookup_func(void ** out,const void * data,apr_size_t data_len,void * baton,apr_pool_t * result_pool)3300 p2l_item_lookup_func(void **out,
3301                      const void *data,
3302                      apr_size_t data_len,
3303                      void *baton,
3304                      apr_pool_t *result_pool)
3305 {
3306   p2l_item_lookup_baton_t *lookup_baton = baton;
3307   svn_fs_x__p2l_entry_t *entry
3308     = get_p2l_entry_from_cached_page(data, lookup_baton->offset, result_pool,
3309                                      result_pool);
3310 
3311   *out =    entry
3312          && entry->offset == lookup_baton->offset
3313          && entry->item_count > lookup_baton->sub_item
3314        ? apr_pmemdup(result_pool,
3315                      entry->items + lookup_baton->sub_item,
3316                      sizeof(*entry->items))
3317        : NULL;
3318 
3319   return SVN_NO_ERROR;
3320 }
3321 
3322 svn_error_t *
svn_fs_x__p2l_item_lookup(svn_fs_x__id_t ** item,svn_fs_t * fs,svn_fs_x__revision_file_t * rev_file,svn_revnum_t revision,apr_off_t offset,apr_uint32_t sub_item,apr_pool_t * result_pool,apr_pool_t * scratch_pool)3323 svn_fs_x__p2l_item_lookup(svn_fs_x__id_t **item,
3324                           svn_fs_t *fs,
3325                           svn_fs_x__revision_file_t *rev_file,
3326                           svn_revnum_t revision,
3327                           apr_off_t offset,
3328                           apr_uint32_t sub_item,
3329                           apr_pool_t *result_pool,
3330                           apr_pool_t *scratch_pool)
3331 {
3332   svn_fs_x__data_t *ffd = fs->fsap_data;
3333   svn_fs_x__page_cache_key_t key = { 0 };
3334   svn_boolean_t is_cached = FALSE;
3335   p2l_page_info_baton_t page_info;
3336   p2l_item_lookup_baton_t baton;
3337 
3338   *item = NULL;
3339 
3340   /* look for this info in our cache */
3341   SVN_ERR(get_p2l_keys(&page_info, &key, rev_file, fs, revision, offset,
3342                        scratch_pool));
3343   baton.offset = offset;
3344   baton.sub_item = sub_item;
3345   SVN_ERR(svn_cache__get_partial((void**)item, &is_cached,
3346                                  ffd->p2l_page_cache, &key,
3347                                  p2l_item_lookup_func, &baton, result_pool));
3348   if (!is_cached)
3349     {
3350       /* do a standard index lookup.  This is will automatically prefetch
3351        * data to speed up future lookups. */
3352       svn_fs_x__p2l_entry_t *entry;
3353       SVN_ERR(p2l_entry_lookup(&entry, rev_file, fs, revision, offset,
3354                                result_pool, scratch_pool));
3355 
3356       /* return result */
3357       if (entry && entry->item_count > sub_item)
3358         *item = apr_pmemdup(result_pool, entry->items + sub_item,
3359                             sizeof(**item));
3360     }
3361 
3362   return SVN_NO_ERROR;
3363 }
3364 
3365 /* Implements svn_cache__partial_getter_func_t for P2L headers, setting *OUT
3366  * to the largest the first offset not covered by this P2L index.
3367  */
3368 static svn_error_t *
p2l_get_max_offset_func(void ** out,const void * data,apr_size_t data_len,void * baton,apr_pool_t * result_pool)3369 p2l_get_max_offset_func(void **out,
3370                         const void *data,
3371                         apr_size_t data_len,
3372                         void *baton,
3373                         apr_pool_t *result_pool)
3374 {
3375   const p2l_header_t *header = data;
3376   apr_off_t max_offset = header->file_size;
3377   *out = apr_pmemdup(result_pool, &max_offset, sizeof(max_offset));
3378 
3379   return SVN_NO_ERROR;
3380 }
3381 
3382 svn_error_t *
svn_fs_x__p2l_get_max_offset(apr_off_t * offset,svn_fs_t * fs,svn_fs_x__revision_file_t * rev_file,svn_revnum_t revision,apr_pool_t * scratch_pool)3383 svn_fs_x__p2l_get_max_offset(apr_off_t *offset,
3384                              svn_fs_t *fs,
3385                              svn_fs_x__revision_file_t *rev_file,
3386                              svn_revnum_t revision,
3387                              apr_pool_t *scratch_pool)
3388 {
3389   svn_fs_x__data_t *ffd = fs->fsap_data;
3390   p2l_header_t *header;
3391   svn_boolean_t is_cached = FALSE;
3392   apr_off_t *offset_p;
3393 
3394   /* look for the header data in our cache */
3395   svn_fs_x__pair_cache_key_t key;
3396   key.revision = base_revision(fs, revision);
3397   key.second = svn_fs_x__is_packed_rev(fs, revision);
3398 
3399   SVN_ERR(svn_cache__get_partial((void **)&offset_p, &is_cached,
3400                                  ffd->p2l_header_cache, &key,
3401                                  p2l_get_max_offset_func, NULL,
3402                                  scratch_pool));
3403   if (is_cached)
3404     {
3405       *offset = *offset_p;
3406       return SVN_NO_ERROR;
3407     }
3408 
3409   SVN_ERR(get_p2l_header(&header, rev_file, fs, revision, scratch_pool,
3410                          scratch_pool));
3411   *offset = header->file_size;
3412 
3413   return SVN_NO_ERROR;
3414 }
3415 
3416 svn_error_t *
svn_fs_x__item_offset(apr_off_t * absolute_position,apr_uint32_t * sub_item,svn_fs_t * fs,svn_fs_x__revision_file_t * rev_file,const svn_fs_x__id_t * item_id,apr_pool_t * scratch_pool)3417 svn_fs_x__item_offset(apr_off_t *absolute_position,
3418                       apr_uint32_t *sub_item,
3419                       svn_fs_t *fs,
3420                       svn_fs_x__revision_file_t *rev_file,
3421                       const svn_fs_x__id_t *item_id,
3422                       apr_pool_t *scratch_pool)
3423 {
3424   if (svn_fs_x__is_txn(item_id->change_set))
3425     SVN_ERR(l2p_proto_index_lookup(absolute_position, sub_item, fs,
3426                                    svn_fs_x__get_txn_id(item_id->change_set),
3427                                    item_id->number, scratch_pool));
3428   else
3429     SVN_ERR(l2p_index_lookup(absolute_position, sub_item, fs, rev_file,
3430                              svn_fs_x__get_revnum(item_id->change_set),
3431                              item_id->number, scratch_pool));
3432 
3433   return SVN_NO_ERROR;
3434 }
3435 
3436 /* Calculate the FNV1 checksum over the offset range in REV_FILE, covered by
3437  * ENTRY.  Store the result in ENTRY->FNV1_CHECKSUM.  Use SCRATCH_POOL for
3438  * temporary allocations. */
3439 static svn_error_t *
calc_fnv1(svn_fs_x__p2l_entry_t * entry,svn_fs_x__revision_file_t * rev_file,apr_pool_t * scratch_pool)3440 calc_fnv1(svn_fs_x__p2l_entry_t *entry,
3441           svn_fs_x__revision_file_t *rev_file,
3442           apr_pool_t *scratch_pool)
3443 {
3444   unsigned char buffer[4096];
3445   svn_checksum_t *checksum;
3446   svn_checksum_ctx_t *context
3447     = svn_checksum_ctx_create(svn_checksum_fnv1a_32x4, scratch_pool);
3448   apr_off_t size = entry->size;
3449 
3450   /* Special rules apply to unused sections / items.  The data must be a
3451    * sequence of NUL bytes (not checked here) and the checksum is fixed to 0.
3452    */
3453   if (entry->type == SVN_FS_X__ITEM_TYPE_UNUSED)
3454     {
3455       entry->fnv1_checksum = 0;
3456       return SVN_NO_ERROR;
3457     }
3458 
3459   /* Read the block and feed it to the checksum calculator. */
3460   SVN_ERR(svn_fs_x__rev_file_seek(rev_file, NULL, entry->offset));
3461   while (size > 0)
3462     {
3463       apr_size_t to_read = size > sizeof(buffer)
3464                          ? sizeof(buffer)
3465                          : (apr_size_t)size;
3466       SVN_ERR(svn_fs_x__rev_file_read(rev_file, buffer, to_read));
3467       SVN_ERR(svn_checksum_update(context, buffer, to_read));
3468       size -= to_read;
3469     }
3470 
3471   /* Store final checksum in ENTRY. */
3472   SVN_ERR(svn_checksum_final(&checksum, context, scratch_pool));
3473   entry->fnv1_checksum = ntohl(*(const apr_uint32_t *)checksum->digest);
3474 
3475   return SVN_NO_ERROR;
3476 }
3477 
3478 /*
3479  * Index (re-)creation utilities.
3480  */
3481 
3482 svn_error_t *
svn_fs_x__p2l_index_from_p2l_entries(const char ** protoname,svn_fs_t * fs,svn_fs_x__revision_file_t * rev_file,apr_array_header_t * entries,apr_pool_t * result_pool,apr_pool_t * scratch_pool)3483 svn_fs_x__p2l_index_from_p2l_entries(const char **protoname,
3484                                      svn_fs_t *fs,
3485                                      svn_fs_x__revision_file_t *rev_file,
3486                                      apr_array_header_t *entries,
3487                                      apr_pool_t *result_pool,
3488                                      apr_pool_t *scratch_pool)
3489 {
3490   apr_file_t *proto_index;
3491 
3492   /* Use a subpool for immediate temp file cleanup at the end of this
3493    * function. */
3494   apr_pool_t *iterpool = svn_pool_create(scratch_pool);
3495   int i;
3496 
3497   /* Create a proto-index file. */
3498   SVN_ERR(svn_io_open_unique_file3(NULL, protoname, NULL,
3499                                    svn_io_file_del_on_pool_cleanup,
3500                                    result_pool, scratch_pool));
3501   SVN_ERR(svn_fs_x__p2l_proto_index_open(&proto_index, *protoname,
3502                                          scratch_pool));
3503 
3504   /* Write ENTRIES to proto-index file and calculate checksums as we go. */
3505   for (i = 0; i < entries->nelts; ++i)
3506     {
3507       svn_fs_x__p2l_entry_t *entry
3508         = APR_ARRAY_IDX(entries, i, svn_fs_x__p2l_entry_t *);
3509       svn_pool_clear(iterpool);
3510 
3511       SVN_ERR(calc_fnv1(entry, rev_file, iterpool));
3512       SVN_ERR(svn_fs_x__p2l_proto_index_add_entry(proto_index, entry,
3513                                                   iterpool));
3514     }
3515 
3516   /* Convert proto-index into final index and move it into position.
3517    * Note that REV_FILE contains the start revision of the shard file if it
3518    * has been packed while REVISION may be somewhere in the middle.  For
3519    * non-packed shards, they will have identical values. */
3520   SVN_ERR(svn_io_file_close(proto_index, iterpool));
3521 
3522   /* Temp file cleanup. */
3523   svn_pool_destroy(iterpool);
3524 
3525   return SVN_NO_ERROR;
3526 }
3527 
3528 /* Decorator for svn_fs_x__p2l_entry_t that associates it with a sorted
3529  * variant of its ITEMS array.
3530  */
3531 typedef struct sub_item_ordered_t
3532 {
3533   /* ENTRY that got wrapped */
3534   svn_fs_x__p2l_entry_t *entry;
3535 
3536   /* Array of pointers into ENTRY->ITEMS, sorted by their revision member
3537    * _descending_ order.  May be NULL if ENTRY->ITEM_COUNT < 2. */
3538   svn_fs_x__id_t **order;
3539 } sub_item_ordered_t;
3540 
3541 /* implements compare_fn_t. Place LHS before RHS, if the latter is younger.
3542  * Used to sort sub_item_ordered_t::order
3543  */
3544 static int
compare_sub_items(const svn_fs_x__id_t * const * lhs,const svn_fs_x__id_t * const * rhs)3545 compare_sub_items(const svn_fs_x__id_t * const * lhs,
3546                   const svn_fs_x__id_t * const * rhs)
3547 {
3548   return (*lhs)->change_set < (*rhs)->change_set
3549        ? 1
3550        : ((*lhs)->change_set > (*rhs)->change_set ? -1 : 0);
3551 }
3552 
3553 /* implements compare_fn_t. Place LHS before RHS, if the latter belongs to
3554  * a newer revision.
3555  */
3556 static int
compare_p2l_info_rev(const sub_item_ordered_t * lhs,const sub_item_ordered_t * rhs)3557 compare_p2l_info_rev(const sub_item_ordered_t * lhs,
3558                      const sub_item_ordered_t * rhs)
3559 {
3560   svn_fs_x__id_t *lhs_part;
3561   svn_fs_x__id_t *rhs_part;
3562 
3563   assert(lhs != rhs);
3564   if (lhs->entry->item_count == 0)
3565     return rhs->entry->item_count == 0 ? 0 : -1;
3566   if (rhs->entry->item_count == 0)
3567     return 1;
3568 
3569   lhs_part = lhs->order ? lhs->order[lhs->entry->item_count - 1]
3570                         : &lhs->entry->items[0];
3571   rhs_part = rhs->order ? rhs->order[rhs->entry->item_count - 1]
3572                         : &rhs->entry->items[0];
3573 
3574   if (lhs_part->change_set == rhs_part->change_set)
3575     return 0;
3576 
3577   return lhs_part->change_set < rhs_part->change_set ? -1 : 1;
3578 }
3579 
3580 svn_error_t *
svn_fs_x__l2p_index_from_p2l_entries(const char ** protoname,svn_fs_t * fs,apr_array_header_t * entries,apr_pool_t * result_pool,apr_pool_t * scratch_pool)3581 svn_fs_x__l2p_index_from_p2l_entries(const char **protoname,
3582                                      svn_fs_t *fs,
3583                                      apr_array_header_t *entries,
3584                                      apr_pool_t *result_pool,
3585                                      apr_pool_t *scratch_pool)
3586 {
3587   apr_file_t *proto_index;
3588 
3589   /* Use a subpool for immediate temp file cleanup at the end of this
3590    * function. */
3591   apr_pool_t *iterpool = svn_pool_create(scratch_pool);
3592   svn_revnum_t prev_rev = SVN_INVALID_REVNUM;
3593   int i;
3594   apr_uint32_t k;
3595   svn_priority_queue__t *queue;
3596   apr_size_t count = 0;
3597   apr_array_header_t *sub_item_orders;
3598 
3599   /* Create the temporary proto-rev file. */
3600   SVN_ERR(svn_io_open_unique_file3(NULL, protoname, NULL,
3601                                    svn_io_file_del_on_pool_cleanup,
3602                                    result_pool, scratch_pool));
3603   SVN_ERR(svn_fs_x__l2p_proto_index_open(&proto_index, *protoname,
3604                                          scratch_pool));
3605 
3606 
3607   /* wrap P2L entries such that we have access to the sub-items in revision
3608      order.  The ENTRY_COUNT member will point to the next item to read+1. */
3609   sub_item_orders = apr_array_make(scratch_pool, entries->nelts,
3610                                    sizeof(sub_item_ordered_t));
3611   sub_item_orders->nelts = entries->nelts;
3612 
3613   for (i = 0; i < entries->nelts; ++i)
3614     {
3615       svn_fs_x__p2l_entry_t *entry
3616         = APR_ARRAY_IDX(entries, i, svn_fs_x__p2l_entry_t *);
3617       sub_item_ordered_t *ordered
3618         = &APR_ARRAY_IDX(sub_item_orders, i, sub_item_ordered_t);
3619 
3620       /* skip unused regions (e.g. padding) */
3621       if (entry->item_count == 0)
3622         {
3623           --sub_item_orders->nelts;
3624           continue;
3625         }
3626 
3627       assert(entry);
3628       ordered->entry = entry;
3629       count += entry->item_count;
3630 
3631       if (entry->item_count > 1)
3632         {
3633           ordered->order
3634             = apr_palloc(scratch_pool,
3635                          sizeof(*ordered->order) * entry->item_count);
3636           for (k = 0; k < entry->item_count; ++k)
3637             ordered->order[k] = &entry->items[k];
3638 
3639           qsort(ordered->order, entry->item_count, sizeof(*ordered->order),
3640                 (int (*)(const void *, const void *))compare_sub_items);
3641         }
3642     }
3643 
3644   /* we need to write the index in ascending revision order */
3645   queue = svn_priority_queue__create
3646             (sub_item_orders,
3647              (int (*)(const void *, const void *))compare_p2l_info_rev);
3648 
3649   /* write index entries */
3650   for (i = 0; i < count; ++i)
3651     {
3652       svn_fs_x__id_t *sub_item;
3653       sub_item_ordered_t *ordered = svn_priority_queue__peek(queue);
3654 
3655       if (ordered->entry->item_count > 0)
3656         {
3657           /* if there is only one item, we skip the overhead of having an
3658              extra array for the item order */
3659           sub_item = ordered->order
3660                    ? ordered->order[ordered->entry->item_count - 1]
3661                    : &ordered->entry->items[0];
3662 
3663           /* next revision? */
3664           if (prev_rev != svn_fs_x__get_revnum(sub_item->change_set))
3665             {
3666               prev_rev = svn_fs_x__get_revnum(sub_item->change_set);
3667               SVN_ERR(svn_fs_x__l2p_proto_index_add_revision
3668                           (proto_index, iterpool));
3669             }
3670 
3671           /* add entry */
3672           SVN_ERR(svn_fs_x__l2p_proto_index_add_entry
3673                       (proto_index, ordered->entry->offset,
3674                       (apr_uint32_t)(sub_item - ordered->entry->items),
3675                       sub_item->number, iterpool));
3676 
3677           /* make ITEM_COUNT point the next sub-item to use+1 */
3678           --ordered->entry->item_count;
3679         }
3680 
3681       /* process remaining sub-items (if any) of that container later */
3682       if (ordered->entry->item_count)
3683         svn_priority_queue__update(queue);
3684       else
3685         svn_priority_queue__pop(queue);
3686 
3687       /* keep memory usage in check */
3688       if (i % 256 == 0)
3689         svn_pool_clear(iterpool);
3690     }
3691 
3692   /* Convert proto-index into final index and move it into position. */
3693   SVN_ERR(svn_io_file_close(proto_index, iterpool));
3694 
3695   /* Temp file cleanup. */
3696   svn_pool_destroy(iterpool);
3697 
3698   return SVN_NO_ERROR;
3699 }
3700 
3701 
3702 /*
3703  * Standard (de-)serialization functions
3704  */
3705 
3706 svn_error_t *
svn_fs_x__serialize_l2p_header(void ** data,apr_size_t * data_len,void * in,apr_pool_t * pool)3707 svn_fs_x__serialize_l2p_header(void **data,
3708                                apr_size_t *data_len,
3709                                void *in,
3710                                apr_pool_t *pool)
3711 {
3712   l2p_header_t *header = in;
3713   svn_temp_serializer__context_t *context;
3714   svn_stringbuf_t *serialized;
3715   apr_size_t page_count = header->page_table_index[header->revision_count];
3716   apr_size_t page_table_size = page_count * sizeof(*header->page_table);
3717   apr_size_t index_size
3718     = (header->revision_count + 1) * sizeof(*header->page_table_index);
3719   apr_size_t data_size = sizeof(*header) + index_size + page_table_size;
3720 
3721   /* serialize header and all its elements */
3722   context = svn_temp_serializer__init(header,
3723                                       sizeof(*header),
3724                                       data_size + 32,
3725                                       pool);
3726 
3727   /* page table index array */
3728   svn_temp_serializer__add_leaf(context,
3729                                 (const void * const *)&header->page_table_index,
3730                                 index_size);
3731 
3732   /* page table array */
3733   svn_temp_serializer__add_leaf(context,
3734                                 (const void * const *)&header->page_table,
3735                                 page_table_size);
3736 
3737   /* return the serialized result */
3738   serialized = svn_temp_serializer__get(context);
3739 
3740   *data = serialized->data;
3741   *data_len = serialized->len;
3742 
3743   return SVN_NO_ERROR;
3744 }
3745 
3746 svn_error_t *
svn_fs_x__deserialize_l2p_header(void ** out,void * data,apr_size_t data_len,apr_pool_t * result_pool)3747 svn_fs_x__deserialize_l2p_header(void **out,
3748                                  void *data,
3749                                  apr_size_t data_len,
3750                                  apr_pool_t *result_pool)
3751 {
3752   l2p_header_t *header = (l2p_header_t *)data;
3753 
3754   /* resolve the pointers in the struct */
3755   svn_temp_deserializer__resolve(header, (void**)&header->page_table_index);
3756   svn_temp_deserializer__resolve(header, (void**)&header->page_table);
3757 
3758   /* done */
3759   *out = header;
3760 
3761   return SVN_NO_ERROR;
3762 }
3763 
3764 svn_error_t *
svn_fs_x__serialize_l2p_page(void ** data,apr_size_t * data_len,void * in,apr_pool_t * pool)3765 svn_fs_x__serialize_l2p_page(void **data,
3766                              apr_size_t *data_len,
3767                              void *in,
3768                              apr_pool_t *pool)
3769 {
3770   l2p_page_t *page = in;
3771   svn_temp_serializer__context_t *context;
3772   svn_stringbuf_t *serialized;
3773   apr_size_t of_table_size = page->entry_count * sizeof(*page->offsets);
3774   apr_size_t si_table_size = page->entry_count * sizeof(*page->sub_items);
3775 
3776   /* serialize struct and all its elements */
3777   context = svn_temp_serializer__init(page,
3778                                       sizeof(*page),
3779                                         of_table_size + si_table_size
3780                                       + sizeof(*page) + 32,
3781                                       pool);
3782 
3783   /* offsets and sub_items arrays */
3784   svn_temp_serializer__add_leaf(context,
3785                                 (const void * const *)&page->offsets,
3786                                 of_table_size);
3787   svn_temp_serializer__add_leaf(context,
3788                                 (const void * const *)&page->sub_items,
3789                                 si_table_size);
3790 
3791   /* return the serialized result */
3792   serialized = svn_temp_serializer__get(context);
3793 
3794   *data = serialized->data;
3795   *data_len = serialized->len;
3796 
3797   return SVN_NO_ERROR;
3798 }
3799 
3800 svn_error_t *
svn_fs_x__deserialize_l2p_page(void ** out,void * data,apr_size_t data_len,apr_pool_t * result_pool)3801 svn_fs_x__deserialize_l2p_page(void **out,
3802                                void *data,
3803                                apr_size_t data_len,
3804                                apr_pool_t *result_pool)
3805 {
3806   l2p_page_t *page = data;
3807 
3808   /* resolve the pointers in the struct */
3809   svn_temp_deserializer__resolve(page, (void**)&page->offsets);
3810   svn_temp_deserializer__resolve(page, (void**)&page->sub_items);
3811 
3812   /* done */
3813   *out = page;
3814 
3815   return SVN_NO_ERROR;
3816 }
3817 
3818 svn_error_t *
svn_fs_x__serialize_p2l_header(void ** data,apr_size_t * data_len,void * in,apr_pool_t * pool)3819 svn_fs_x__serialize_p2l_header(void **data,
3820                                apr_size_t *data_len,
3821                                void *in,
3822                                apr_pool_t *pool)
3823 {
3824   p2l_header_t *header = in;
3825   svn_temp_serializer__context_t *context;
3826   svn_stringbuf_t *serialized;
3827   apr_size_t table_size = (header->page_count + 1) * sizeof(*header->offsets);
3828 
3829   /* serialize header and all its elements */
3830   context = svn_temp_serializer__init(header,
3831                                       sizeof(*header),
3832                                       table_size + sizeof(*header) + 32,
3833                                       pool);
3834 
3835   /* offsets array */
3836   svn_temp_serializer__add_leaf(context,
3837                                 (const void * const *)&header->offsets,
3838                                 table_size);
3839 
3840   /* return the serialized result */
3841   serialized = svn_temp_serializer__get(context);
3842 
3843   *data = serialized->data;
3844   *data_len = serialized->len;
3845 
3846   return SVN_NO_ERROR;
3847 }
3848 
3849 svn_error_t *
svn_fs_x__deserialize_p2l_header(void ** out,void * data,apr_size_t data_len,apr_pool_t * result_pool)3850 svn_fs_x__deserialize_p2l_header(void **out,
3851                                  void *data,
3852                                  apr_size_t data_len,
3853                                  apr_pool_t *result_pool)
3854 {
3855   p2l_header_t *header = data;
3856 
3857   /* resolve the only pointer in the struct */
3858   svn_temp_deserializer__resolve(header, (void**)&header->offsets);
3859 
3860   /* done */
3861   *out = header;
3862 
3863   return SVN_NO_ERROR;
3864 }
3865 
3866 svn_error_t *
svn_fs_x__serialize_p2l_page(void ** data,apr_size_t * data_len,void * in,apr_pool_t * pool)3867 svn_fs_x__serialize_p2l_page(void **data,
3868                              apr_size_t *data_len,
3869                              void *in,
3870                              apr_pool_t *pool)
3871 {
3872   apr_array_header_t *page = in;
3873   svn_temp_serializer__context_t *context;
3874   svn_stringbuf_t *serialized;
3875   apr_size_t table_size = page->elt_size * page->nelts;
3876   svn_fs_x__p2l_entry_t *entries = (svn_fs_x__p2l_entry_t *)page->elts;
3877   int i;
3878 
3879   /* serialize array header and all its elements */
3880   context = svn_temp_serializer__init(page,
3881                                       sizeof(*page),
3882                                       table_size + sizeof(*page) + 32,
3883                                       pool);
3884 
3885   /* items in the array */
3886   svn_temp_serializer__push(context,
3887                             (const void * const *)&page->elts,
3888                             table_size);
3889 
3890   for (i = 0; i < page->nelts; ++i)
3891     svn_temp_serializer__add_leaf(context,
3892                                   (const void * const *)&entries[i].items,
3893                                     entries[i].item_count
3894                                   * sizeof(*entries[i].items));
3895 
3896   svn_temp_serializer__pop(context);
3897 
3898   /* return the serialized result */
3899   serialized = svn_temp_serializer__get(context);
3900 
3901   *data = serialized->data;
3902   *data_len = serialized->len;
3903 
3904   return SVN_NO_ERROR;
3905 }
3906 
3907 svn_error_t *
svn_fs_x__deserialize_p2l_page(void ** out,void * data,apr_size_t data_len,apr_pool_t * result_pool)3908 svn_fs_x__deserialize_p2l_page(void **out,
3909                                void *data,
3910                                apr_size_t data_len,
3911                                apr_pool_t *result_pool)
3912 {
3913   apr_array_header_t *page = (apr_array_header_t *)data;
3914   svn_fs_x__p2l_entry_t *entries;
3915   int i;
3916 
3917   /* resolve the only pointer in the struct */
3918   svn_temp_deserializer__resolve(page, (void**)&page->elts);
3919 
3920   /* resolve sub-struct pointers*/
3921   entries = (svn_fs_x__p2l_entry_t *)page->elts;
3922   for (i = 0; i < page->nelts; ++i)
3923     svn_temp_deserializer__resolve(entries, (void**)&entries[i].items);
3924 
3925   /* patch up members */
3926   page->pool = result_pool;
3927   page->nalloc = page->nelts;
3928 
3929   /* done */
3930   *out = page;
3931 
3932   return SVN_NO_ERROR;
3933 }
3934