1 /* cached_data.c --- cached (read) access to FSFS data
2 *
3 * ====================================================================
4 * Licensed to the Apache Software Foundation (ASF) under one
5 * or more contributor license agreements. See the NOTICE file
6 * distributed with this work for additional information
7 * regarding copyright ownership. The ASF licenses this file
8 * to you under the Apache License, Version 2.0 (the
9 * "License"); you may not use this file except in compliance
10 * with the License. You may obtain a copy of the License at
11 *
12 * http://www.apache.org/licenses/LICENSE-2.0
13 *
14 * Unless required by applicable law or agreed to in writing,
15 * software distributed under the License is distributed on an
16 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17 * KIND, either express or implied. See the License for the
18 * specific language governing permissions and limitations
19 * under the License.
20 * ====================================================================
21 */
22
23 #include "cached_data.h"
24
25 #include <assert.h>
26
27 #include "svn_hash.h"
28 #include "svn_ctype.h"
29 #include "svn_sorts.h"
30 #include "private/svn_delta_private.h"
31 #include "private/svn_io_private.h"
32 #include "private/svn_sorts_private.h"
33 #include "private/svn_subr_private.h"
34 #include "private/svn_temp_serializer.h"
35
36 #include "fs_fs.h"
37 #include "id.h"
38 #include "index.h"
39 #include "low_level.h"
40 #include "pack.h"
41 #include "util.h"
42 #include "temp_serializer.h"
43
44 #include "../libsvn_fs/fs-loader.h"
45 #include "../libsvn_delta/delta.h" /* for SVN_DELTA_WINDOW_SIZE */
46
47 #include "svn_private_config.h"
48
49 /* forward-declare. See implementation for the docstring */
50 static svn_error_t *
51 block_read(void **result,
52 svn_fs_t *fs,
53 svn_revnum_t revision,
54 apr_uint64_t item_index,
55 svn_fs_fs__revision_file_t *revision_file,
56 apr_pool_t *result_pool,
57 apr_pool_t *scratch_pool);
58
59
60 /* Define this to enable access logging via dbg_log_access
61 #define SVN_FS_FS__LOG_ACCESS
62 */
63
64 /* When SVN_FS_FS__LOG_ACCESS has been defined, write a line to console
65 * showing where REVISION, ITEM_INDEX is located in FS and use ITEM to
66 * show details on it's contents if not NULL. To support format 6 and
67 * earlier repos, ITEM_TYPE (SVN_FS_FS__ITEM_TYPE_*) must match ITEM.
68 * Use SCRATCH_POOL for temporary allocations.
69 *
70 * For pre-format7 repos, the display will be restricted.
71 */
72 static svn_error_t *
dbg_log_access(svn_fs_t * fs,svn_revnum_t revision,apr_uint64_t item_index,void * item,apr_uint32_t item_type,apr_pool_t * scratch_pool)73 dbg_log_access(svn_fs_t *fs,
74 svn_revnum_t revision,
75 apr_uint64_t item_index,
76 void *item,
77 apr_uint32_t item_type,
78 apr_pool_t *scratch_pool)
79 {
80 /* no-op if this macro is not defined */
81 #ifdef SVN_FS_FS__LOG_ACCESS
82 fs_fs_data_t *ffd = fs->fsap_data;
83 apr_off_t end_offset = 0;
84 svn_fs_fs__p2l_entry_t *entry = NULL;
85 static const char *types[] = {"<n/a>", "frep ", "drep ", "fprop", "dprop",
86 "node ", "chgs ", "rep "};
87 const char *description = "";
88 const char *type = types[item_type];
89 const char *pack = "";
90 apr_off_t offset;
91 svn_fs_fs__revision_file_t *rev_file;
92
93 SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&rev_file, fs, revision,
94 scratch_pool, scratch_pool));
95
96 /* determine rev / pack file offset */
97 SVN_ERR(svn_fs_fs__item_offset(&offset, fs, rev_file, revision, NULL,
98 item_index, scratch_pool));
99
100 /* constructing the pack file description */
101 if (revision < ffd->min_unpacked_rev)
102 pack = apr_psprintf(scratch_pool, "%4ld|",
103 revision / ffd->max_files_per_dir);
104
105 /* construct description if possible */
106 if (item_type == SVN_FS_FS__ITEM_TYPE_NODEREV && item != NULL)
107 {
108 node_revision_t *node = item;
109 const char *data_rep
110 = node->data_rep
111 ? apr_psprintf(scratch_pool, " d=%ld/%" APR_UINT64_T_FMT,
112 node->data_rep->revision,
113 node->data_rep->item_index)
114 : "";
115 const char *prop_rep
116 = node->prop_rep
117 ? apr_psprintf(scratch_pool, " p=%ld/%" APR_UINT64_T_FMT,
118 node->prop_rep->revision,
119 node->prop_rep->item_index)
120 : "";
121 description = apr_psprintf(scratch_pool, "%s (pc=%d%s%s)",
122 node->created_path,
123 node->predecessor_count,
124 data_rep,
125 prop_rep);
126 }
127 else if (item_type == SVN_FS_FS__ITEM_TYPE_ANY_REP)
128 {
129 svn_fs_fs__rep_header_t *header = item;
130 if (header == NULL)
131 description = " (txdelta window)";
132 else if (header->type == svn_fs_fs__rep_plain)
133 description = " PLAIN";
134 else if (header->type == svn_fs_fs__rep_self_delta)
135 description = " DELTA";
136 else
137 description = apr_psprintf(scratch_pool,
138 " DELTA against %ld/%" APR_UINT64_T_FMT,
139 header->base_revision,
140 header->base_item_index);
141 }
142 else if (item_type == SVN_FS_FS__ITEM_TYPE_CHANGES && item != NULL)
143 {
144 apr_array_header_t *changes = item;
145 switch (changes->nelts)
146 {
147 case 0: description = " no change";
148 break;
149 case 1: description = " 1 change";
150 break;
151 default: description = apr_psprintf(scratch_pool, " %d changes",
152 changes->nelts);
153 }
154 }
155
156 /* some info is only available in format7 repos */
157 if (svn_fs_fs__use_log_addressing(fs))
158 {
159 /* reverse index lookup: get item description in ENTRY */
160 SVN_ERR(svn_fs_fs__p2l_entry_lookup(&entry, fs, rev_file, revision,
161 offset, scratch_pool,
162 scratch_pool));
163 if (entry)
164 {
165 /* more details */
166 end_offset = offset + entry->size;
167 type = types[entry->type];
168 }
169
170 /* line output */
171 printf("%5s%4lx:%04lx -%4lx:%04lx %s %7ld %5"APR_UINT64_T_FMT" %s\n",
172 pack, (long)(offset / ffd->block_size),
173 (long)(offset % ffd->block_size),
174 (long)(end_offset / ffd->block_size),
175 (long)(end_offset % ffd->block_size),
176 type, revision, item_index, description);
177 }
178 else
179 {
180 /* reduced logging for format 6 and earlier */
181 printf("%5s%10" APR_UINT64_T_HEX_FMT " %s %7ld %7" APR_UINT64_T_FMT \
182 " %s\n",
183 pack, (apr_uint64_t)(offset), type, revision, item_index,
184 description);
185 }
186
187 /* We don't know when SCRATCH_POOL will be cleared, so close the rev file
188 explicitly. */
189 SVN_ERR(svn_fs_fs__close_revision_file(rev_file));
190
191 #endif
192
193 return SVN_NO_ERROR;
194 }
195
196 /* Convenience wrapper around svn_io_file_aligned_seek, taking filesystem
197 FS instead of a block size. */
198 static svn_error_t *
aligned_seek(svn_fs_t * fs,apr_file_t * file,apr_off_t * buffer_start,apr_off_t offset,apr_pool_t * pool)199 aligned_seek(svn_fs_t *fs,
200 apr_file_t *file,
201 apr_off_t *buffer_start,
202 apr_off_t offset,
203 apr_pool_t *pool)
204 {
205 fs_fs_data_t *ffd = fs->fsap_data;
206 return svn_error_trace(svn_io_file_aligned_seek(file, ffd->block_size,
207 buffer_start, offset,
208 pool));
209 }
210
211 /* Open the revision file for revision REV in filesystem FS and store
212 the newly opened file in FILE. Seek to location OFFSET before
213 returning. Perform temporary allocations in POOL. */
214 static svn_error_t *
open_and_seek_revision(svn_fs_fs__revision_file_t ** file,svn_fs_t * fs,svn_revnum_t rev,apr_uint64_t item,apr_pool_t * pool)215 open_and_seek_revision(svn_fs_fs__revision_file_t **file,
216 svn_fs_t *fs,
217 svn_revnum_t rev,
218 apr_uint64_t item,
219 apr_pool_t *pool)
220 {
221 svn_fs_fs__revision_file_t *rev_file;
222 apr_off_t offset = -1;
223
224 SVN_ERR(svn_fs_fs__ensure_revision_exists(rev, fs, pool));
225
226 SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&rev_file, fs, rev, pool, pool));
227 SVN_ERR(svn_fs_fs__item_offset(&offset, fs, rev_file, rev, NULL, item,
228 pool));
229
230 SVN_ERR(aligned_seek(fs, rev_file->file, NULL, offset, pool));
231
232 *file = rev_file;
233
234 return SVN_NO_ERROR;
235 }
236
237 /* Open the representation REP for a node-revision in filesystem FS, seek
238 to its position and store the newly opened file in FILE. Perform
239 temporary allocations in POOL. */
240 static svn_error_t *
open_and_seek_transaction(svn_fs_fs__revision_file_t ** file,svn_fs_t * fs,representation_t * rep,apr_pool_t * pool)241 open_and_seek_transaction(svn_fs_fs__revision_file_t **file,
242 svn_fs_t *fs,
243 representation_t *rep,
244 apr_pool_t *pool)
245 {
246 apr_off_t offset;
247
248 SVN_ERR(svn_fs_fs__open_proto_rev_file(file, fs, &rep->txn_id, pool, pool));
249
250 SVN_ERR(svn_fs_fs__item_offset(&offset, fs, NULL, SVN_INVALID_REVNUM,
251 &rep->txn_id, rep->item_index, pool));
252 SVN_ERR(aligned_seek(fs, (*file)->file, NULL, offset, pool));
253
254 return SVN_NO_ERROR;
255 }
256
257 /* Given a node-id ID, and a representation REP in filesystem FS, open
258 the correct file and seek to the correction location. Store this
259 file in *FILE_P. Perform any allocations in POOL. */
260 static svn_error_t *
open_and_seek_representation(svn_fs_fs__revision_file_t ** file_p,svn_fs_t * fs,representation_t * rep,apr_pool_t * pool)261 open_and_seek_representation(svn_fs_fs__revision_file_t **file_p,
262 svn_fs_t *fs,
263 representation_t *rep,
264 apr_pool_t *pool)
265 {
266 if (! svn_fs_fs__id_txn_used(&rep->txn_id))
267 return open_and_seek_revision(file_p, fs, rep->revision, rep->item_index,
268 pool);
269 else
270 return open_and_seek_transaction(file_p, fs, rep, pool);
271 }
272
273
274
275 static svn_error_t *
err_dangling_id(svn_fs_t * fs,const svn_fs_id_t * id)276 err_dangling_id(svn_fs_t *fs, const svn_fs_id_t *id)
277 {
278 svn_string_t *id_str = svn_fs_fs__id_unparse(id, fs->pool);
279 return svn_error_createf
280 (SVN_ERR_FS_ID_NOT_FOUND, 0,
281 _("Reference to non-existent node '%s' in filesystem '%s'"),
282 id_str->data, fs->path);
283 }
284
285 /* Return TRUE, if FS is of a format that supports block-read and the
286 feature has been enabled. */
287 static svn_boolean_t
use_block_read(svn_fs_t * fs)288 use_block_read(svn_fs_t *fs)
289 {
290 fs_fs_data_t *ffd = fs->fsap_data;
291 return svn_fs_fs__use_log_addressing(fs) && ffd->use_block_read;
292 }
293
294 svn_error_t *
svn_fs_fs__fixup_expanded_size(svn_fs_t * fs,representation_t * rep,apr_pool_t * scratch_pool)295 svn_fs_fs__fixup_expanded_size(svn_fs_t *fs,
296 representation_t *rep,
297 apr_pool_t *scratch_pool)
298 {
299 svn_checksum_t checksum;
300 svn_checksum_t *empty_md5;
301 svn_fs_fs__revision_file_t *revision_file;
302 svn_fs_fs__rep_header_t *rep_header;
303
304 /* Anything to do at all?
305 *
306 * Note that a 0 SIZE is only possible for PLAIN reps due to the SVN\1
307 * magic prefix in any DELTA rep. */
308 if (!rep || rep->expanded_size != 0 || rep->size == 0)
309 return SVN_NO_ERROR;
310
311 /* This function may only be called for committed data. */
312 assert(!svn_fs_fs__id_txn_used(&rep->txn_id));
313
314 /* EXPANDED_SIZE is 0. If the MD5 does not match the one for empty
315 * contents, we know that EXPANDED_SIZE == 0 is wrong and needs to
316 * be set to the actual value given by SIZE.
317 *
318 * Using svn_checksum_match() will also accept all-zero values for
319 * the MD5 digest and only report a mismatch if the MD5 has actually
320 * been given. */
321 empty_md5 = svn_checksum_empty_checksum(svn_checksum_md5, scratch_pool);
322
323 checksum.digest = rep->md5_digest;
324 checksum.kind = svn_checksum_md5;
325 if (!svn_checksum_match(empty_md5, &checksum))
326 {
327 rep->expanded_size = rep->size;
328 return SVN_NO_ERROR;
329 }
330
331 /* Data in the rep-cache.db does not have MD5 checksums (all zero) on it.
332 * Compare SHA1 instead. */
333 if (rep->has_sha1)
334 {
335 svn_checksum_t *empty_sha1
336 = svn_checksum_empty_checksum(svn_checksum_sha1, scratch_pool);
337
338 checksum.digest = rep->sha1_digest;
339 checksum.kind = svn_checksum_sha1;
340 if (!svn_checksum_match(empty_sha1, &checksum))
341 {
342 rep->expanded_size = rep->size;
343 return SVN_NO_ERROR;
344 }
345 }
346
347 /* Only two cases are left here.
348 * (1) A non-empty PLAIN rep with a MD5 collision on EMPTY_MD5.
349 * (2) A DELTA rep with zero-length output. */
350
351 /* SVN always stores a DELTA rep with zero-length output as an empty
352 * sequence of txdelta windows, i.e. as "SVN\1". In that case, SIZE is
353 * 4 bytes. There is no other possible DELTA rep of that size and any
354 * PLAIN rep of 4 bytes would produce a different MD5. Hence, if SIZE is
355 * actually 4 here, we know that this is an empty DELTA rep.
356 *
357 * Note that it is technically legal to have DELTA reps with a 0 length
358 * output window. Their on-disk size would be longer. We handle that
359 * case later together with the equally unlikely MD5 collision. */
360 if (rep->size == 4)
361 {
362 /* EXPANDED_SIZE is already 0. */
363 return SVN_NO_ERROR;
364 }
365
366 /* We still have the two options, PLAIN or DELTA rep. At this point, we
367 * are in an extremely unlikely case and can spend some time to figure it
368 * out. So, let's just look at the representation header. */
369 SVN_ERR(open_and_seek_revision(&revision_file, fs, rep->revision,
370 rep->item_index, scratch_pool));
371 SVN_ERR(svn_fs_fs__read_rep_header(&rep_header, revision_file->stream,
372 scratch_pool, scratch_pool));
373 SVN_ERR(svn_fs_fs__close_revision_file(revision_file));
374
375 /* Only for PLAIN reps do we have to correct EXPANDED_SIZE. */
376 if (rep_header->type == svn_fs_fs__rep_plain)
377 rep->expanded_size = rep->size;
378
379 return SVN_NO_ERROR;
380 }
381
382 /* Correct known issues with committed NODEREV in FS.
383 * Uses SCRATCH_POOL for temporaries.
384 */
385 static svn_error_t *
fixup_node_revision(svn_fs_t * fs,node_revision_t * noderev,apr_pool_t * scratch_pool)386 fixup_node_revision(svn_fs_t *fs,
387 node_revision_t *noderev,
388 apr_pool_t *scratch_pool)
389 {
390 /* Workaround issue #4031: is-fresh-txn-root in revision files. */
391 noderev->is_fresh_txn_root = FALSE;
392
393 /* Make sure EXPANDED_SIZE has the correct value for every rep. */
394 SVN_ERR(svn_fs_fs__fixup_expanded_size(fs, noderev->data_rep,
395 scratch_pool));
396 SVN_ERR(svn_fs_fs__fixup_expanded_size(fs, noderev->prop_rep,
397 scratch_pool));
398
399 return SVN_NO_ERROR;
400 }
401
402 /* Get the node-revision for the node ID in FS.
403 Set *NODEREV_P to the new node-revision structure, allocated in POOL.
404 See svn_fs_fs__get_node_revision, which wraps this and adds another
405 error. */
406 static svn_error_t *
get_node_revision_body(node_revision_t ** noderev_p,svn_fs_t * fs,const svn_fs_id_t * id,apr_pool_t * result_pool,apr_pool_t * scratch_pool)407 get_node_revision_body(node_revision_t **noderev_p,
408 svn_fs_t *fs,
409 const svn_fs_id_t *id,
410 apr_pool_t *result_pool,
411 apr_pool_t *scratch_pool)
412 {
413 svn_error_t *err;
414 svn_boolean_t is_cached = FALSE;
415 fs_fs_data_t *ffd = fs->fsap_data;
416
417 if (svn_fs_fs__id_is_txn(id))
418 {
419 apr_file_t *file;
420
421 /* This is a transaction node-rev. Its storage logic is very
422 different from that of rev / pack files. */
423 err = svn_io_file_open(&file,
424 svn_fs_fs__path_txn_node_rev(fs, id,
425 scratch_pool),
426 APR_READ | APR_BUFFERED, APR_OS_DEFAULT,
427 scratch_pool);
428 if (err && APR_STATUS_IS_ENOENT(err->apr_err))
429 {
430 svn_error_clear(err);
431 return svn_error_trace(err_dangling_id(fs, id));
432 }
433 else if (err)
434 {
435 return svn_error_trace(err);
436 }
437
438 SVN_ERR(svn_fs_fs__read_noderev(noderev_p,
439 svn_stream_from_aprfile2(file,
440 FALSE,
441 scratch_pool),
442 result_pool, scratch_pool));
443 }
444 else
445 {
446 svn_fs_fs__revision_file_t *revision_file;
447
448 /* noderevs in rev / pack files can be cached */
449 const svn_fs_fs__id_part_t *rev_item = svn_fs_fs__id_rev_item(id);
450 pair_cache_key_t key = { 0 };
451 key.revision = rev_item->revision;
452 key.second = rev_item->number;
453
454 /* Not found or not applicable. Try a noderev cache lookup.
455 * If that succeeds, we are done here. */
456 if (ffd->node_revision_cache)
457 {
458 SVN_ERR(svn_cache__get((void **) noderev_p,
459 &is_cached,
460 ffd->node_revision_cache,
461 &key,
462 result_pool));
463 if (is_cached)
464 return SVN_NO_ERROR;
465 }
466
467 /* read the data from disk */
468 SVN_ERR(open_and_seek_revision(&revision_file, fs,
469 rev_item->revision,
470 rev_item->number,
471 scratch_pool));
472
473 if (use_block_read(fs))
474 {
475 /* block-read will parse the whole block and will also return
476 the one noderev that we need right now. */
477 SVN_ERR(block_read((void **)noderev_p, fs,
478 rev_item->revision,
479 rev_item->number,
480 revision_file,
481 result_pool,
482 scratch_pool));
483 }
484 else
485 {
486 /* physical addressing mode reading, parsing and caching */
487 SVN_ERR(svn_fs_fs__read_noderev(noderev_p,
488 revision_file->stream,
489 result_pool,
490 scratch_pool));
491 SVN_ERR(fixup_node_revision(fs, *noderev_p, scratch_pool));
492
493 /* The noderev is not in cache, yet. Add it, if caching has been enabled. */
494 if (ffd->node_revision_cache)
495 SVN_ERR(svn_cache__set(ffd->node_revision_cache,
496 &key,
497 *noderev_p,
498 scratch_pool));
499 }
500
501 SVN_ERR(svn_fs_fs__close_revision_file(revision_file));
502 }
503
504 return SVN_NO_ERROR;
505 }
506
507 svn_error_t *
svn_fs_fs__get_node_revision(node_revision_t ** noderev_p,svn_fs_t * fs,const svn_fs_id_t * id,apr_pool_t * result_pool,apr_pool_t * scratch_pool)508 svn_fs_fs__get_node_revision(node_revision_t **noderev_p,
509 svn_fs_t *fs,
510 const svn_fs_id_t *id,
511 apr_pool_t *result_pool,
512 apr_pool_t *scratch_pool)
513 {
514 const svn_fs_fs__id_part_t *rev_item = svn_fs_fs__id_rev_item(id);
515
516 svn_error_t *err = get_node_revision_body(noderev_p, fs, id,
517 result_pool, scratch_pool);
518 if (err && err->apr_err == SVN_ERR_FS_CORRUPT)
519 {
520 svn_string_t *id_string = svn_fs_fs__id_unparse(id, scratch_pool);
521 return svn_error_createf(SVN_ERR_FS_CORRUPT, err,
522 "Corrupt node-revision '%s'",
523 id_string->data);
524 }
525
526 SVN_ERR(dbg_log_access(fs,
527 rev_item->revision,
528 rev_item->number,
529 *noderev_p,
530 SVN_FS_FS__ITEM_TYPE_NODEREV,
531 scratch_pool));
532
533 return svn_error_trace(err);
534 }
535
536
537 /* Given a revision file REV_FILE, opened to REV in FS, find the Node-ID
538 of the header located at OFFSET and store it in *ID_P. Allocate
539 temporary variables from POOL. */
540 static svn_error_t *
get_fs_id_at_offset(svn_fs_id_t ** id_p,svn_fs_fs__revision_file_t * rev_file,svn_fs_t * fs,svn_revnum_t rev,apr_off_t offset,apr_pool_t * pool)541 get_fs_id_at_offset(svn_fs_id_t **id_p,
542 svn_fs_fs__revision_file_t *rev_file,
543 svn_fs_t *fs,
544 svn_revnum_t rev,
545 apr_off_t offset,
546 apr_pool_t *pool)
547 {
548 node_revision_t *noderev;
549
550 SVN_ERR(aligned_seek(fs, rev_file->file, NULL, offset, pool));
551 SVN_ERR(svn_fs_fs__read_noderev(&noderev,
552 rev_file->stream,
553 pool, pool));
554
555 /* noderev->id is const, get rid of that */
556 *id_p = svn_fs_fs__id_copy(noderev->id, pool);
557
558 /* assert that the txn_id is REV
559 * (asserting on offset would be harder because we the rev_offset is not
560 * known here) */
561 assert(svn_fs_fs__id_rev(*id_p) == rev);
562
563 return SVN_NO_ERROR;
564 }
565
566
567 /* Given an open revision file REV_FILE in FS for REV, locate the trailer that
568 specifies the offset to the root node-id and to the changed path
569 information. Store the root node offset in *ROOT_OFFSET and the
570 changed path offset in *CHANGES_OFFSET. If either of these
571 pointers is NULL, do nothing with it.
572
573 Allocate temporary variables from POOL. */
574 static svn_error_t *
get_root_changes_offset(apr_off_t * root_offset,apr_off_t * changes_offset,svn_fs_fs__revision_file_t * rev_file,svn_fs_t * fs,svn_revnum_t rev,apr_pool_t * pool)575 get_root_changes_offset(apr_off_t *root_offset,
576 apr_off_t *changes_offset,
577 svn_fs_fs__revision_file_t *rev_file,
578 svn_fs_t *fs,
579 svn_revnum_t rev,
580 apr_pool_t *pool)
581 {
582 fs_fs_data_t *ffd = fs->fsap_data;
583 apr_off_t rev_offset;
584 apr_seek_where_t seek_relative;
585 svn_stringbuf_t *trailer;
586 char buffer[64];
587 apr_off_t start;
588 apr_off_t end;
589 apr_size_t len;
590
591 /* Determine where to seek to in the file.
592
593 If we've got a pack file, we want to seek to the end of the desired
594 revision. But we don't track that, so we seek to the beginning of the
595 next revision.
596
597 Unless the next revision is in a different file, in which case, we can
598 just seek to the end of the pack file -- just like we do in the
599 non-packed case. */
600 if (rev_file->is_packed && ((rev + 1) % ffd->max_files_per_dir != 0))
601 {
602 SVN_ERR(svn_fs_fs__get_packed_offset(&end, fs, rev + 1, pool));
603 seek_relative = APR_SET;
604 }
605 else
606 {
607 seek_relative = APR_END;
608 end = 0;
609 }
610
611 /* Offset of the revision from the start of the pack file, if applicable. */
612 if (rev_file->is_packed)
613 SVN_ERR(svn_fs_fs__get_packed_offset(&rev_offset, fs, rev, pool));
614 else
615 rev_offset = 0;
616
617 /* We will assume that the last line containing the two offsets
618 will never be longer than 64 characters. */
619 SVN_ERR(svn_io_file_seek(rev_file->file, seek_relative, &end, pool));
620
621 if (end < sizeof(buffer))
622 {
623 len = (apr_size_t)end;
624 start = 0;
625 }
626 else
627 {
628 len = sizeof(buffer);
629 start = end - sizeof(buffer);
630 }
631
632 /* Read in this last block, from which we will identify the last line. */
633 SVN_ERR(aligned_seek(fs, rev_file->file, NULL, start, pool));
634 SVN_ERR(svn_io_file_read_full2(rev_file->file, buffer, len, NULL, NULL,
635 pool));
636
637 /* Parse the last line. */
638 trailer = svn_stringbuf_ncreate(buffer, len, pool);
639 SVN_ERR(svn_fs_fs__parse_revision_trailer(root_offset,
640 changes_offset,
641 trailer,
642 rev));
643
644 /* return absolute offsets */
645 if (root_offset)
646 *root_offset += rev_offset;
647 if (changes_offset)
648 *changes_offset += rev_offset;
649
650 return SVN_NO_ERROR;
651 }
652
653 svn_error_t *
svn_fs_fs__rev_get_root(svn_fs_id_t ** root_id_p,svn_fs_t * fs,svn_revnum_t rev,apr_pool_t * result_pool,apr_pool_t * scratch_pool)654 svn_fs_fs__rev_get_root(svn_fs_id_t **root_id_p,
655 svn_fs_t *fs,
656 svn_revnum_t rev,
657 apr_pool_t *result_pool,
658 apr_pool_t *scratch_pool)
659 {
660 fs_fs_data_t *ffd = fs->fsap_data;
661 SVN_ERR(svn_fs_fs__ensure_revision_exists(rev, fs, scratch_pool));
662
663 if (svn_fs_fs__use_log_addressing(fs))
664 {
665 *root_id_p = svn_fs_fs__id_create_root(rev, result_pool);
666 }
667 else
668 {
669 svn_fs_fs__revision_file_t *revision_file;
670 apr_off_t root_offset;
671 svn_fs_id_t *root_id = NULL;
672 svn_boolean_t is_cached;
673
674 SVN_ERR(svn_cache__get((void **) root_id_p, &is_cached,
675 ffd->rev_root_id_cache, &rev, result_pool));
676 if (is_cached)
677 return SVN_NO_ERROR;
678
679 SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&revision_file, fs, rev,
680 scratch_pool, scratch_pool));
681 SVN_ERR(get_root_changes_offset(&root_offset, NULL,
682 revision_file, fs, rev,
683 scratch_pool));
684
685 SVN_ERR(get_fs_id_at_offset(&root_id, revision_file, fs, rev,
686 root_offset, result_pool));
687
688 SVN_ERR(svn_fs_fs__close_revision_file(revision_file));
689
690 SVN_ERR(svn_cache__set(ffd->rev_root_id_cache, &rev, root_id,
691 scratch_pool));
692
693 *root_id_p = root_id;
694 }
695
696 return SVN_NO_ERROR;
697 }
698
699 /* Describes a lazily opened rev / pack file. Instances will be shared
700 between multiple instances of rep_state_t. */
701 typedef struct shared_file_t
702 {
703 /* The opened file. NULL while file is not open, yet. */
704 svn_fs_fs__revision_file_t *rfile;
705
706 /* file system to open the file in */
707 svn_fs_t *fs;
708
709 /* a revision contained in the FILE. Since this file may be shared,
710 that value may be different from REP_STATE_T->REVISION. */
711 svn_revnum_t revision;
712
713 /* pool to use when creating the FILE. This guarantees that the file
714 remains open / valid beyond the respective local context that required
715 the file to be opened eventually. */
716 apr_pool_t *pool;
717 } shared_file_t;
718
719 /* Represents where in the current svndiff data block each
720 representation is. */
721 typedef struct rep_state_t
722 {
723 /* shared lazy-open rev/pack file structure */
724 shared_file_t *sfile;
725 /* The txdelta window cache to use or NULL. */
726 svn_cache__t *raw_window_cache;
727 /* Caches raw (unparsed) windows. May be NULL. */
728 svn_cache__t *window_cache;
729 /* Caches un-deltified windows. May be NULL. */
730 svn_cache__t *combined_cache;
731 /* revision containing the representation */
732 svn_revnum_t revision;
733 /* representation's item index in REVISION */
734 apr_uint64_t item_index;
735 /* length of the header at the start of the rep.
736 0 iff this is rep is stored in a container
737 (i.e. does not have a header) */
738 apr_size_t header_size;
739 apr_off_t start; /* The starting offset for the raw
740 svndiff/plaintext data minus header.
741 -1 if the offset is yet unknown. */
742 apr_off_t current;/* The current offset relative to START. */
743 apr_off_t size; /* The on-disk size of the representation. */
744 int ver; /* If a delta, what svndiff version?
745 -1 for unknown delta version. */
746 int chunk_index; /* number of the window to read */
747 } rep_state_t;
748
749 /* Simple wrapper around svn_io_file_get_offset to simplify callers. */
750 static svn_error_t *
get_file_offset(apr_off_t * offset,rep_state_t * rs,apr_pool_t * pool)751 get_file_offset(apr_off_t *offset,
752 rep_state_t *rs,
753 apr_pool_t *pool)
754 {
755 return svn_error_trace(svn_io_file_get_offset(offset,
756 rs->sfile->rfile->file,
757 pool));
758 }
759
760 /* Simple wrapper around svn_io_file_aligned_seek to simplify callers. */
761 static svn_error_t *
rs_aligned_seek(rep_state_t * rs,apr_off_t * buffer_start,apr_off_t offset,apr_pool_t * pool)762 rs_aligned_seek(rep_state_t *rs,
763 apr_off_t *buffer_start,
764 apr_off_t offset,
765 apr_pool_t *pool)
766 {
767 fs_fs_data_t *ffd = rs->sfile->fs->fsap_data;
768 return svn_error_trace(svn_io_file_aligned_seek(rs->sfile->rfile->file,
769 ffd->block_size,
770 buffer_start, offset,
771 pool));
772 }
773
774 /* Open FILE->FILE and FILE->STREAM if they haven't been opened, yet. */
775 static svn_error_t*
auto_open_shared_file(shared_file_t * file)776 auto_open_shared_file(shared_file_t *file)
777 {
778 if (file->rfile == NULL)
779 SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&file->rfile, file->fs,
780 file->revision, file->pool,
781 file->pool));
782
783 return SVN_NO_ERROR;
784 }
785
786 /* Set RS->START to the begin of the representation raw in RS->FILE->FILE,
787 if that hasn't been done yet. Use POOL for temporary allocations. */
788 static svn_error_t*
auto_set_start_offset(rep_state_t * rs,apr_pool_t * pool)789 auto_set_start_offset(rep_state_t *rs, apr_pool_t *pool)
790 {
791 if (rs->start == -1)
792 {
793 SVN_ERR(svn_fs_fs__item_offset(&rs->start, rs->sfile->fs,
794 rs->sfile->rfile, rs->revision, NULL,
795 rs->item_index, pool));
796 rs->start += rs->header_size;
797 }
798
799 return SVN_NO_ERROR;
800 }
801
802 /* Set RS->VER depending on what is found in the already open RS->FILE->FILE
803 if the diff version is still unknown. Use POOL for temporary allocations.
804 */
805 static svn_error_t*
auto_read_diff_version(rep_state_t * rs,apr_pool_t * pool)806 auto_read_diff_version(rep_state_t *rs, apr_pool_t *pool)
807 {
808 if (rs->ver == -1)
809 {
810 char buf[4];
811 SVN_ERR(rs_aligned_seek(rs, NULL, rs->start, pool));
812 SVN_ERR(svn_io_file_read_full2(rs->sfile->rfile->file, buf,
813 sizeof(buf), NULL, NULL, pool));
814
815 /* ### Layering violation */
816 if (! ((buf[0] == 'S') && (buf[1] == 'V') && (buf[2] == 'N')))
817 return svn_error_create
818 (SVN_ERR_FS_CORRUPT, NULL,
819 _("Malformed svndiff data in representation"));
820 rs->ver = buf[3];
821
822 rs->chunk_index = 0;
823 rs->current = 4;
824 }
825
826 return SVN_NO_ERROR;
827 }
828
829 /* See create_rep_state, which wraps this and adds another error. */
830 static svn_error_t *
create_rep_state_body(rep_state_t ** rep_state,svn_fs_fs__rep_header_t ** rep_header,shared_file_t ** shared_file,representation_t * rep,svn_fs_t * fs,apr_pool_t * result_pool,apr_pool_t * scratch_pool)831 create_rep_state_body(rep_state_t **rep_state,
832 svn_fs_fs__rep_header_t **rep_header,
833 shared_file_t **shared_file,
834 representation_t *rep,
835 svn_fs_t *fs,
836 apr_pool_t *result_pool,
837 apr_pool_t *scratch_pool)
838 {
839 fs_fs_data_t *ffd = fs->fsap_data;
840 rep_state_t *rs = apr_pcalloc(result_pool, sizeof(*rs));
841 svn_fs_fs__rep_header_t *rh;
842 svn_boolean_t is_cached = FALSE;
843 apr_uint64_t estimated_window_storage;
844
845 /* If the hint is
846 * - given,
847 * - refers to a valid revision,
848 * - refers to a packed revision,
849 * - as does the rep we want to read, and
850 * - refers to the same pack file as the rep
851 * we can re-use the same, already open file object
852 */
853 svn_boolean_t reuse_shared_file
854 = shared_file && *shared_file && (*shared_file)->rfile
855 && SVN_IS_VALID_REVNUM((*shared_file)->revision)
856 && (*shared_file)->revision < ffd->min_unpacked_rev
857 && rep->revision < ffd->min_unpacked_rev
858 && ( ((*shared_file)->revision / ffd->max_files_per_dir)
859 == (rep->revision / ffd->max_files_per_dir));
860
861 pair_cache_key_t key;
862 key.revision = rep->revision;
863 key.second = rep->item_index;
864
865 /* continue constructing RS and RA */
866 rs->size = rep->size;
867 rs->revision = rep->revision;
868 rs->item_index = rep->item_index;
869 rs->raw_window_cache = use_block_read(fs) ? ffd->raw_window_cache : NULL;
870 rs->ver = -1;
871 rs->start = -1;
872
873 /* Very long files stored as self-delta will produce a huge number of
874 delta windows. Don't cache them lest we don't thrash the cache.
875 Since we don't know the depth of the delta chain, let's assume, the
876 whole contents get rewritten 3 times.
877 */
878 estimated_window_storage = 4 * (rep->expanded_size + SVN_DELTA_WINDOW_SIZE);
879 estimated_window_storage = MIN(estimated_window_storage, APR_SIZE_MAX);
880
881 rs->window_cache = ffd->txdelta_window_cache
882 && svn_cache__is_cachable(ffd->txdelta_window_cache,
883 (apr_size_t)estimated_window_storage)
884 ? ffd->txdelta_window_cache
885 : NULL;
886 rs->combined_cache = ffd->combined_window_cache
887 && svn_cache__is_cachable(ffd->combined_window_cache,
888 (apr_size_t)estimated_window_storage)
889 ? ffd->combined_window_cache
890 : NULL;
891
892 /* cache lookup, i.e. skip reading the rep header if possible */
893 if (ffd->rep_header_cache && !svn_fs_fs__id_txn_used(&rep->txn_id))
894 SVN_ERR(svn_cache__get((void **) &rh, &is_cached,
895 ffd->rep_header_cache, &key, result_pool));
896
897 /* initialize the (shared) FILE member in RS */
898 if (reuse_shared_file)
899 {
900 rs->sfile = *shared_file;
901 }
902 else
903 {
904 shared_file_t *file = apr_pcalloc(result_pool, sizeof(*file));
905 file->revision = rep->revision;
906 file->pool = result_pool;
907 file->fs = fs;
908 rs->sfile = file;
909
910 /* remember the current file, if suggested by the caller */
911 if (shared_file)
912 *shared_file = file;
913 }
914
915 /* read rep header, if necessary */
916 if (!is_cached)
917 {
918 /* ensure file is open and navigate to the start of rep header */
919 if (reuse_shared_file)
920 {
921 apr_off_t offset;
922
923 /* ... we can re-use the same, already open file object.
924 * This implies that we don't read from a txn.
925 */
926 rs->sfile = *shared_file;
927 SVN_ERR(auto_open_shared_file(rs->sfile));
928 SVN_ERR(svn_fs_fs__item_offset(&offset, fs, rs->sfile->rfile,
929 rep->revision, NULL, rep->item_index,
930 scratch_pool));
931 SVN_ERR(rs_aligned_seek(rs, NULL, offset, scratch_pool));
932 }
933 else
934 {
935 /* otherwise, create a new file object. May or may not be
936 * an in-txn file.
937 */
938 SVN_ERR(open_and_seek_representation(&rs->sfile->rfile, fs, rep,
939 result_pool));
940 }
941
942 SVN_ERR(svn_fs_fs__read_rep_header(&rh, rs->sfile->rfile->stream,
943 result_pool, scratch_pool));
944 SVN_ERR(get_file_offset(&rs->start, rs, result_pool));
945
946 /* populate the cache if appropriate */
947 if (! svn_fs_fs__id_txn_used(&rep->txn_id))
948 {
949 if (use_block_read(fs))
950 SVN_ERR(block_read(NULL, fs, rep->revision, rep->item_index,
951 rs->sfile->rfile, result_pool, scratch_pool));
952 else
953 if (ffd->rep_header_cache)
954 SVN_ERR(svn_cache__set(ffd->rep_header_cache, &key, rh,
955 scratch_pool));
956 }
957 }
958
959 /* finalize */
960 SVN_ERR(dbg_log_access(fs, rep->revision, rep->item_index, rh,
961 SVN_FS_FS__ITEM_TYPE_ANY_REP, scratch_pool));
962
963 rs->header_size = rh->header_size;
964 *rep_state = rs;
965 *rep_header = rh;
966
967 if (rh->type == svn_fs_fs__rep_plain)
968 /* This is a plaintext, so just return the current rep_state. */
969 return SVN_NO_ERROR;
970
971 /* skip "SVNx" diff marker */
972 rs->current = 4;
973
974 return SVN_NO_ERROR;
975 }
976
977 /* Read the rep args for REP in filesystem FS and create a rep_state
978 for reading the representation. Return the rep_state in *REP_STATE
979 and the rep header in *REP_HEADER, both allocated in POOL.
980
981 When reading multiple reps, i.e. a skip delta chain, you may provide
982 non-NULL SHARED_FILE. (If SHARED_FILE is not NULL, in the first
983 call it should be a pointer to NULL.) The function will use this
984 variable to store the previous call results and tries to re-use it.
985 This may result in significant savings in I/O for packed files and
986 number of open file handles.
987 */
988 static svn_error_t *
create_rep_state(rep_state_t ** rep_state,svn_fs_fs__rep_header_t ** rep_header,shared_file_t ** shared_file,representation_t * rep,svn_fs_t * fs,apr_pool_t * result_pool,apr_pool_t * scratch_pool)989 create_rep_state(rep_state_t **rep_state,
990 svn_fs_fs__rep_header_t **rep_header,
991 shared_file_t **shared_file,
992 representation_t *rep,
993 svn_fs_t *fs,
994 apr_pool_t *result_pool,
995 apr_pool_t *scratch_pool)
996 {
997 svn_error_t *err = create_rep_state_body(rep_state, rep_header,
998 shared_file, rep, fs,
999 result_pool, scratch_pool);
1000 if (err && err->apr_err == SVN_ERR_FS_CORRUPT)
1001 {
1002 fs_fs_data_t *ffd = fs->fsap_data;
1003 const char *rep_str;
1004
1005 /* ### This always returns "-1" for transaction reps, because
1006 ### this particular bit of code doesn't know if the rep is
1007 ### stored in the protorev or in the mutable area (for props
1008 ### or dir contents). It is pretty rare for FSFS to *read*
1009 ### from the protorev file, though, so this is probably OK.
1010 ### And anyone going to debug corruption errors is probably
1011 ### going to jump straight to this comment anyway! */
1012 rep_str = rep
1013 ? svn_fs_fs__unparse_representation
1014 (rep, ffd->format, TRUE, scratch_pool, scratch_pool)->data
1015 : "(null)";
1016
1017 return svn_error_createf(SVN_ERR_FS_CORRUPT, err,
1018 "Corrupt representation '%s'",
1019 rep_str);
1020 }
1021 /* ### Call representation_string() ? */
1022 return svn_error_trace(err);
1023 }
1024
1025 svn_error_t *
svn_fs_fs__check_rep(representation_t * rep,svn_fs_t * fs,void ** hint,apr_pool_t * scratch_pool)1026 svn_fs_fs__check_rep(representation_t *rep,
1027 svn_fs_t *fs,
1028 void **hint,
1029 apr_pool_t *scratch_pool)
1030 {
1031 if (svn_fs_fs__use_log_addressing(fs))
1032 {
1033 apr_off_t offset;
1034 svn_fs_fs__p2l_entry_t *entry;
1035 svn_fs_fs__revision_file_t *rev_file = NULL;
1036
1037 /* Reuse the revision file provided by *HINT, if it is given and
1038 * actually the rev / pack file that we want. */
1039 svn_revnum_t start_rev = svn_fs_fs__packed_base_rev(fs, rep->revision);
1040 if (hint)
1041 rev_file = *(svn_fs_fs__revision_file_t **)hint;
1042
1043 if (rev_file == NULL || rev_file->start_revision != start_rev)
1044 SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&rev_file, fs, rep->revision,
1045 scratch_pool, scratch_pool));
1046
1047 if (hint)
1048 *hint = rev_file;
1049
1050 /* This will auto-retry if there was a background pack. */
1051 SVN_ERR(svn_fs_fs__item_offset(&offset, fs, rev_file, rep->revision,
1052 NULL, rep->item_index, scratch_pool));
1053
1054 /* This may fail if there is a background pack operation (can't auto-
1055 retry because the item offset lookup has to be redone as well). */
1056 SVN_ERR(svn_fs_fs__p2l_entry_lookup(&entry, fs, rev_file,
1057 rep->revision, offset,
1058 scratch_pool, scratch_pool));
1059
1060 if ( entry == NULL
1061 || entry->type < SVN_FS_FS__ITEM_TYPE_FILE_REP
1062 || entry->type > SVN_FS_FS__ITEM_TYPE_DIR_PROPS)
1063 return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL,
1064 _("No representation found at offset %s "
1065 "for item %s in revision %ld"),
1066 apr_off_t_toa(scratch_pool, offset),
1067 apr_psprintf(scratch_pool,
1068 "%" APR_UINT64_T_FMT,
1069 rep->item_index),
1070 rep->revision);
1071 }
1072 else
1073 {
1074 rep_state_t *rs;
1075 svn_fs_fs__rep_header_t *rep_header;
1076
1077 /* ### Should this be using read_rep_line() directly? */
1078 SVN_ERR(create_rep_state(&rs, &rep_header, (shared_file_t**)hint,
1079 rep, fs, scratch_pool, scratch_pool));
1080 }
1081
1082 return SVN_NO_ERROR;
1083 }
1084
1085 svn_error_t *
svn_fs_fs__rep_chain_length(int * chain_length,int * shard_count,representation_t * rep,svn_fs_t * fs,apr_pool_t * scratch_pool)1086 svn_fs_fs__rep_chain_length(int *chain_length,
1087 int *shard_count,
1088 representation_t *rep,
1089 svn_fs_t *fs,
1090 apr_pool_t *scratch_pool)
1091 {
1092 fs_fs_data_t *ffd = fs->fsap_data;
1093 svn_revnum_t shard_size = ffd->max_files_per_dir
1094 ? ffd->max_files_per_dir
1095 : 1;
1096 apr_pool_t *subpool = svn_pool_create(scratch_pool);
1097 apr_pool_t *iterpool = svn_pool_create(scratch_pool);
1098 svn_boolean_t is_delta = FALSE;
1099 int count = 0;
1100 int shards = 1;
1101 svn_revnum_t last_shard = rep->revision / shard_size;
1102
1103 /* Check whether the length of the deltification chain is acceptable.
1104 * Otherwise, shared reps may form a non-skipping delta chain in
1105 * extreme cases. */
1106 representation_t base_rep = *rep;
1107
1108 /* re-use open files between iterations */
1109 shared_file_t *file_hint = NULL;
1110
1111 svn_fs_fs__rep_header_t *header;
1112
1113 /* follow the delta chain towards the end but for at most
1114 * MAX_CHAIN_LENGTH steps. */
1115 do
1116 {
1117 rep_state_t *rep_state;
1118
1119 svn_pool_clear(iterpool);
1120
1121 if (base_rep.revision / shard_size != last_shard)
1122 {
1123 last_shard = base_rep.revision / shard_size;
1124 ++shards;
1125 }
1126
1127 SVN_ERR(create_rep_state_body(&rep_state,
1128 &header,
1129 &file_hint,
1130 &base_rep,
1131 fs,
1132 subpool,
1133 iterpool));
1134
1135 base_rep.revision = header->base_revision;
1136 base_rep.item_index = header->base_item_index;
1137 base_rep.size = header->base_length;
1138 svn_fs_fs__id_txn_reset(&base_rep.txn_id);
1139 is_delta = header->type == svn_fs_fs__rep_delta;
1140
1141 /* Clear it the SUBPOOL once in a while. Doing it too frequently
1142 * renders the FILE_HINT ineffective. Doing too infrequently, may
1143 * leave us with too many open file handles.
1144 *
1145 * Note that this is mostly about efficiency, with larger values
1146 * being more efficient, and any non-zero value is legal here. When
1147 * reading deltified contents, we may keep 10s of rev files open at
1148 * the same time and the system has to cope with that. Thus, the
1149 * limit of 16 chosen below is in the same ballpark.
1150 */
1151 ++count;
1152 if (count % 16 == 0)
1153 {
1154 file_hint = NULL;
1155 svn_pool_clear(subpool);
1156 }
1157 }
1158 while (is_delta && base_rep.revision);
1159
1160 *chain_length = count;
1161 *shard_count = shards;
1162 svn_pool_destroy(subpool);
1163 svn_pool_destroy(iterpool);
1164
1165 return SVN_NO_ERROR;
1166 }
1167
1168 struct rep_read_baton
1169 {
1170 /* The FS from which we're reading. */
1171 svn_fs_t *fs;
1172
1173 /* Representation to read. */
1174 representation_t rep;
1175
1176 /* If not NULL, this is the base for the first delta window in rs_list */
1177 svn_stringbuf_t *base_window;
1178
1179 /* The state of all prior delta representations. */
1180 apr_array_header_t *rs_list;
1181
1182 /* The plaintext state, if there is a plaintext. */
1183 rep_state_t *src_state;
1184
1185 /* The index of the current delta chunk, if we are reading a delta. */
1186 int chunk_index;
1187
1188 /* The buffer where we store undeltified data. */
1189 char *buf;
1190 apr_size_t buf_pos;
1191 apr_size_t buf_len;
1192
1193 /* A checksum context for summing the data read in order to verify it.
1194 Note: we don't need to use the sha1 checksum because we're only doing
1195 data verification, for which md5 is perfectly safe. */
1196 svn_checksum_ctx_t *md5_checksum_ctx;
1197
1198 svn_boolean_t checksum_finalized;
1199
1200 /* The stored checksum of the representation we are reading, its
1201 length, and the amount we've read so far. Some of this
1202 information is redundant with rs_list and src_state, but it's
1203 convenient for the checksumming code to have it here. */
1204 unsigned char md5_digest[APR_MD5_DIGESTSIZE];
1205
1206 svn_filesize_t len;
1207 svn_filesize_t off;
1208
1209 /* The key for the fulltext cache for this rep, if there is a
1210 fulltext cache. */
1211 pair_cache_key_t fulltext_cache_key;
1212 /* The text we've been reading, if we're going to cache it. */
1213 svn_stringbuf_t *current_fulltext;
1214
1215 /* If not NULL, attempt to read the data from this cache.
1216 Once that lookup fails, reset it to NULL. */
1217 svn_cache__t *fulltext_cache;
1218
1219 /* Bytes delivered from the FULLTEXT_CACHE so far. If the next
1220 lookup fails, we need to skip that much data from the reconstructed
1221 window stream before we continue normal operation. */
1222 svn_filesize_t fulltext_delivered;
1223
1224 /* Used for temporary allocations during the read. */
1225 apr_pool_t *pool;
1226
1227 /* Pool used to store file handles and other data that is persistant
1228 for the entire stream read. */
1229 apr_pool_t *filehandle_pool;
1230 };
1231
1232 /* Set window key in *KEY to address the window described by RS.
1233 For convenience, return the KEY. */
1234 static window_cache_key_t *
get_window_key(window_cache_key_t * key,rep_state_t * rs)1235 get_window_key(window_cache_key_t *key, rep_state_t *rs)
1236 {
1237 assert(rs->revision <= APR_UINT32_MAX);
1238 key->revision = (apr_uint32_t)rs->revision;
1239 key->item_index = rs->item_index;
1240 key->chunk_index = rs->chunk_index;
1241
1242 return key;
1243 }
1244
1245 /* Implement svn_cache__partial_getter_func_t for raw txdelta windows.
1246 * Parse the raw data and return a svn_fs_fs__txdelta_cached_window_t.
1247 */
1248 static svn_error_t *
parse_raw_window(void ** out,const void * data,apr_size_t data_len,void * baton,apr_pool_t * result_pool)1249 parse_raw_window(void **out,
1250 const void *data,
1251 apr_size_t data_len,
1252 void *baton,
1253 apr_pool_t *result_pool)
1254 {
1255 svn_string_t raw_window;
1256 svn_stream_t *stream;
1257
1258 /* unparsed and parsed window */
1259 const svn_fs_fs__raw_cached_window_t *window
1260 = (const svn_fs_fs__raw_cached_window_t *)data;
1261 svn_fs_fs__txdelta_cached_window_t *result
1262 = apr_pcalloc(result_pool, sizeof(*result));
1263
1264 /* create a read stream taking the raw window as input */
1265 raw_window.data = svn_temp_deserializer__ptr(window,
1266 (const void * const *)&window->window.data);
1267 raw_window.len = window->window.len;
1268 stream = svn_stream_from_string(&raw_window, result_pool);
1269
1270 /* parse it */
1271 SVN_ERR(svn_txdelta_read_svndiff_window(&result->window, stream, window->ver,
1272 result_pool));
1273
1274 /* complete the window and return it */
1275 result->end_offset = window->end_offset;
1276 *out = result;
1277
1278 return SVN_NO_ERROR;
1279 }
1280
1281
1282 /* Read the WINDOW_P number CHUNK_INDEX for the representation given in
1283 * rep state RS from the current FSFS session's cache. This will be a
1284 * no-op and IS_CACHED will be set to FALSE if no cache has been given.
1285 * If a cache is available IS_CACHED will inform the caller about the
1286 * success of the lookup. Allocations of the window in will be made
1287 * from RESULT_POOL. Use SCRATCH_POOL for temporary allocations.
1288 *
1289 * If the information could be found, put RS to CHUNK_INDEX.
1290 */
1291 static svn_error_t *
get_cached_window(svn_txdelta_window_t ** window_p,rep_state_t * rs,int chunk_index,svn_boolean_t * is_cached,apr_pool_t * result_pool,apr_pool_t * scratch_pool)1292 get_cached_window(svn_txdelta_window_t **window_p,
1293 rep_state_t *rs,
1294 int chunk_index,
1295 svn_boolean_t *is_cached,
1296 apr_pool_t *result_pool,
1297 apr_pool_t *scratch_pool)
1298 {
1299 if (! rs->window_cache)
1300 {
1301 /* txdelta window has not been enabled */
1302 *is_cached = FALSE;
1303 }
1304 else
1305 {
1306 /* ask the cache for the desired txdelta window */
1307 svn_fs_fs__txdelta_cached_window_t *cached_window;
1308 window_cache_key_t key = { 0 };
1309 get_window_key(&key, rs);
1310 key.chunk_index = chunk_index;
1311 SVN_ERR(svn_cache__get((void **) &cached_window,
1312 is_cached,
1313 rs->window_cache,
1314 &key,
1315 result_pool));
1316
1317 /* If we did not find a parsed txdelta window, we might have a raw
1318 version of it in our cache. If so, read, parse and re-cache it. */
1319 if (!*is_cached && rs->raw_window_cache)
1320 {
1321 SVN_ERR(svn_cache__get_partial((void **) &cached_window, is_cached,
1322 rs->raw_window_cache, &key,
1323 parse_raw_window, NULL, result_pool));
1324 if (*is_cached)
1325 SVN_ERR(svn_cache__set(rs->window_cache, &key, cached_window,
1326 scratch_pool));
1327 }
1328
1329 /* Return cached information. */
1330 if (*is_cached)
1331 {
1332 /* found it. Pass it back to the caller. */
1333 *window_p = cached_window->window;
1334
1335 /* manipulate the RS as if we just read the data */
1336 rs->current = cached_window->end_offset;
1337 rs->chunk_index = chunk_index;
1338 }
1339 }
1340
1341 return SVN_NO_ERROR;
1342 }
1343
1344 /* Store the WINDOW read for the rep state RS in the current FSFS
1345 * session's cache. This will be a no-op if no cache has been given.
1346 * Temporary allocations will be made from SCRATCH_POOL. */
1347 static svn_error_t *
set_cached_window(svn_txdelta_window_t * window,rep_state_t * rs,apr_pool_t * scratch_pool)1348 set_cached_window(svn_txdelta_window_t *window,
1349 rep_state_t *rs,
1350 apr_pool_t *scratch_pool)
1351 {
1352 if (rs->window_cache)
1353 {
1354 /* store the window and the first offset _past_ it */
1355 svn_fs_fs__txdelta_cached_window_t cached_window;
1356 window_cache_key_t key = {0};
1357
1358 cached_window.window = window;
1359 cached_window.end_offset = rs->current;
1360
1361 /* but key it with the start offset because that is the known state
1362 * when we will look it up */
1363 SVN_ERR(svn_cache__set(rs->window_cache,
1364 get_window_key(&key, rs),
1365 &cached_window,
1366 scratch_pool));
1367 }
1368
1369 return SVN_NO_ERROR;
1370 }
1371
1372 /* Read the WINDOW_P for the rep state RS from the current FSFS session's
1373 * cache. This will be a no-op and IS_CACHED will be set to FALSE if no
1374 * cache has been given. If a cache is available IS_CACHED will inform
1375 * the caller about the success of the lookup. Allocations (of the window
1376 * in particular) will be made from POOL.
1377 */
1378 static svn_error_t *
get_cached_combined_window(svn_stringbuf_t ** window_p,rep_state_t * rs,svn_boolean_t * is_cached,apr_pool_t * pool)1379 get_cached_combined_window(svn_stringbuf_t **window_p,
1380 rep_state_t *rs,
1381 svn_boolean_t *is_cached,
1382 apr_pool_t *pool)
1383 {
1384 if (! rs->combined_cache)
1385 {
1386 /* txdelta window has not been enabled */
1387 *is_cached = FALSE;
1388 }
1389 else
1390 {
1391 /* ask the cache for the desired txdelta window */
1392 window_cache_key_t key = { 0 };
1393 return svn_cache__get((void **)window_p,
1394 is_cached,
1395 rs->combined_cache,
1396 get_window_key(&key, rs),
1397 pool);
1398 }
1399
1400 return SVN_NO_ERROR;
1401 }
1402
1403 /* Store the WINDOW read for the rep state RS in the current FSFS session's
1404 * cache. This will be a no-op if no cache has been given.
1405 * Temporary allocations will be made from SCRATCH_POOL. */
1406 static svn_error_t *
set_cached_combined_window(svn_stringbuf_t * window,rep_state_t * rs,apr_pool_t * scratch_pool)1407 set_cached_combined_window(svn_stringbuf_t *window,
1408 rep_state_t *rs,
1409 apr_pool_t *scratch_pool)
1410 {
1411 if (rs->combined_cache)
1412 {
1413 /* but key it with the start offset because that is the known state
1414 * when we will look it up */
1415 window_cache_key_t key = { 0 };
1416 return svn_cache__set(rs->combined_cache,
1417 get_window_key(&key, rs),
1418 window,
1419 scratch_pool);
1420 }
1421
1422 return SVN_NO_ERROR;
1423 }
1424
1425 /* Build an array of rep_state structures in *LIST giving the delta
1426 reps from first_rep to a plain-text or self-compressed rep. Set
1427 *SRC_STATE to the plain-text rep we find at the end of the chain,
1428 or to NULL if the final delta representation is self-compressed.
1429 The representation to start from is designated by filesystem FS, id
1430 ID, and representation REP.
1431 Also, set *WINDOW_P to the base window content for *LIST, if it
1432 could be found in cache. Otherwise, *LIST will contain the base
1433 representation for the whole delta chain. */
1434 static svn_error_t *
build_rep_list(apr_array_header_t ** list,svn_stringbuf_t ** window_p,rep_state_t ** src_state,svn_fs_t * fs,representation_t * first_rep,apr_pool_t * pool)1435 build_rep_list(apr_array_header_t **list,
1436 svn_stringbuf_t **window_p,
1437 rep_state_t **src_state,
1438 svn_fs_t *fs,
1439 representation_t *first_rep,
1440 apr_pool_t *pool)
1441 {
1442 representation_t rep;
1443 rep_state_t *rs = NULL;
1444 svn_fs_fs__rep_header_t *rep_header;
1445 svn_boolean_t is_cached = FALSE;
1446 shared_file_t *shared_file = NULL;
1447 apr_pool_t *iterpool = svn_pool_create(pool);
1448
1449 *list = apr_array_make(pool, 1, sizeof(rep_state_t *));
1450 rep = *first_rep;
1451
1452 /* for the top-level rep, we need the rep_args */
1453 SVN_ERR(create_rep_state(&rs, &rep_header, &shared_file, &rep, fs, pool,
1454 iterpool));
1455 while (1)
1456 {
1457 svn_pool_clear(iterpool);
1458
1459 /* fetch state, if that has not been done already */
1460 if (!rs)
1461 SVN_ERR(create_rep_state(&rs, &rep_header, &shared_file,
1462 &rep, fs, pool, iterpool));
1463
1464 /* for txn reps, there won't be a cached combined window */
1465 if ( !svn_fs_fs__id_txn_used(&rep.txn_id)
1466 && rep.expanded_size < SVN_DELTA_WINDOW_SIZE)
1467 SVN_ERR(get_cached_combined_window(window_p, rs, &is_cached, pool));
1468
1469 if (is_cached)
1470 {
1471 /* We already have a reconstructed window in our cache.
1472 Write a pseudo rep_state with the full length. */
1473 rs->start = 0;
1474 rs->current = 0;
1475 rs->size = (*window_p)->len;
1476 *src_state = rs;
1477 break;
1478 }
1479
1480 if (rep_header->type == svn_fs_fs__rep_plain)
1481 {
1482 /* This is a plaintext, so just return the current rep_state. */
1483 *src_state = rs;
1484 break;
1485 }
1486
1487 /* Push this rep onto the list. If it's self-compressed, we're done. */
1488 APR_ARRAY_PUSH(*list, rep_state_t *) = rs;
1489 if (rep_header->type == svn_fs_fs__rep_self_delta)
1490 {
1491 *src_state = NULL;
1492 break;
1493 }
1494
1495 rep.revision = rep_header->base_revision;
1496 rep.item_index = rep_header->base_item_index;
1497 rep.size = rep_header->base_length;
1498 svn_fs_fs__id_txn_reset(&rep.txn_id);
1499
1500 rs = NULL;
1501 }
1502 svn_pool_destroy(iterpool);
1503
1504 return SVN_NO_ERROR;
1505 }
1506
1507
1508 /* Create a rep_read_baton structure for node revision NODEREV in
1509 filesystem FS and store it in *RB_P. Perform all allocations in
1510 POOL. If rep is mutable, it must be for file contents. */
1511 static svn_error_t *
rep_read_get_baton(struct rep_read_baton ** rb_p,svn_fs_t * fs,representation_t * rep,pair_cache_key_t fulltext_cache_key,apr_pool_t * pool)1512 rep_read_get_baton(struct rep_read_baton **rb_p,
1513 svn_fs_t *fs,
1514 representation_t *rep,
1515 pair_cache_key_t fulltext_cache_key,
1516 apr_pool_t *pool)
1517 {
1518 struct rep_read_baton *b;
1519
1520 b = apr_pcalloc(pool, sizeof(*b));
1521 b->fs = fs;
1522 b->rep = *rep;
1523 b->base_window = NULL;
1524 b->chunk_index = 0;
1525 b->buf = NULL;
1526 b->md5_checksum_ctx = svn_checksum_ctx_create(svn_checksum_md5, pool);
1527 b->checksum_finalized = FALSE;
1528 memcpy(b->md5_digest, rep->md5_digest, sizeof(rep->md5_digest));
1529 b->len = rep->expanded_size;
1530 b->off = 0;
1531 b->fulltext_cache_key = fulltext_cache_key;
1532 b->pool = svn_pool_create(pool);
1533 b->filehandle_pool = svn_pool_create(pool);
1534 b->fulltext_cache = NULL;
1535 b->fulltext_delivered = 0;
1536 b->current_fulltext = NULL;
1537
1538 /* Save our output baton. */
1539 *rb_p = b;
1540
1541 return SVN_NO_ERROR;
1542 }
1543
1544 /* Skip forwards to THIS_CHUNK in REP_STATE and then read the next delta
1545 window into *NWIN. Note that RS->CHUNK_INDEX will be THIS_CHUNK rather
1546 than THIS_CHUNK + 1 when this function returns. */
1547 static svn_error_t *
read_delta_window(svn_txdelta_window_t ** nwin,int this_chunk,rep_state_t * rs,apr_pool_t * result_pool,apr_pool_t * scratch_pool)1548 read_delta_window(svn_txdelta_window_t **nwin, int this_chunk,
1549 rep_state_t *rs, apr_pool_t *result_pool,
1550 apr_pool_t *scratch_pool)
1551 {
1552 svn_boolean_t is_cached;
1553 apr_off_t start_offset;
1554 apr_off_t end_offset;
1555 apr_pool_t *iterpool;
1556
1557 SVN_ERR_ASSERT(rs->chunk_index <= this_chunk);
1558
1559 SVN_ERR(dbg_log_access(rs->sfile->fs, rs->revision, rs->item_index,
1560 NULL, SVN_FS_FS__ITEM_TYPE_ANY_REP, scratch_pool));
1561
1562 /* Read the next window. But first, try to find it in the cache. */
1563 SVN_ERR(get_cached_window(nwin, rs, this_chunk, &is_cached,
1564 result_pool, scratch_pool));
1565 if (is_cached)
1566 return SVN_NO_ERROR;
1567
1568 /* someone has to actually read the data from file. Open it */
1569 SVN_ERR(auto_open_shared_file(rs->sfile));
1570
1571 /* invoke the 'block-read' feature for non-txn data.
1572 However, don't do that if we are in the middle of some representation,
1573 because the block is unlikely to contain other data. */
1574 if ( rs->chunk_index == 0
1575 && SVN_IS_VALID_REVNUM(rs->revision)
1576 && use_block_read(rs->sfile->fs)
1577 && rs->raw_window_cache)
1578 {
1579 SVN_ERR(block_read(NULL, rs->sfile->fs, rs->revision, rs->item_index,
1580 rs->sfile->rfile, result_pool, scratch_pool));
1581
1582 /* reading the whole block probably also provided us with the
1583 desired txdelta window */
1584 SVN_ERR(get_cached_window(nwin, rs, this_chunk, &is_cached,
1585 result_pool, scratch_pool));
1586 if (is_cached)
1587 return SVN_NO_ERROR;
1588 }
1589
1590 /* data is still not cached -> we need to read it.
1591 Make sure we have all the necessary info. */
1592 SVN_ERR(auto_set_start_offset(rs, scratch_pool));
1593 SVN_ERR(auto_read_diff_version(rs, scratch_pool));
1594
1595 /* RS->FILE may be shared between RS instances -> make sure we point
1596 * to the right data. */
1597 start_offset = rs->start + rs->current;
1598 SVN_ERR(rs_aligned_seek(rs, NULL, start_offset, scratch_pool));
1599
1600 /* Skip windows to reach the current chunk if we aren't there yet. */
1601 iterpool = svn_pool_create(scratch_pool);
1602 while (rs->chunk_index < this_chunk)
1603 {
1604 svn_pool_clear(iterpool);
1605 SVN_ERR(svn_txdelta_skip_svndiff_window(rs->sfile->rfile->file,
1606 rs->ver, iterpool));
1607 rs->chunk_index++;
1608 SVN_ERR(get_file_offset(&start_offset, rs, iterpool));
1609 rs->current = start_offset - rs->start;
1610 if (rs->current >= rs->size)
1611 return svn_error_create(SVN_ERR_FS_CORRUPT, NULL,
1612 _("Reading one svndiff window read "
1613 "beyond the end of the "
1614 "representation"));
1615 }
1616 svn_pool_destroy(iterpool);
1617
1618 /* Actually read the next window. */
1619 SVN_ERR(svn_txdelta_read_svndiff_window(nwin, rs->sfile->rfile->stream,
1620 rs->ver, result_pool));
1621 SVN_ERR(get_file_offset(&end_offset, rs, scratch_pool));
1622 rs->current = end_offset - rs->start;
1623 if (rs->current > rs->size)
1624 return svn_error_create(SVN_ERR_FS_CORRUPT, NULL,
1625 _("Reading one svndiff window read beyond "
1626 "the end of the representation"));
1627
1628 /* the window has not been cached before, thus cache it now
1629 * (if caching is used for them at all) */
1630 if (SVN_IS_VALID_REVNUM(rs->revision))
1631 SVN_ERR(set_cached_window(*nwin, rs, scratch_pool));
1632
1633 return SVN_NO_ERROR;
1634 }
1635
1636 /* Read SIZE bytes from the representation RS and return it in *NWIN. */
1637 static svn_error_t *
read_plain_window(svn_stringbuf_t ** nwin,rep_state_t * rs,apr_size_t size,apr_pool_t * result_pool,apr_pool_t * scratch_pool)1638 read_plain_window(svn_stringbuf_t **nwin, rep_state_t *rs,
1639 apr_size_t size, apr_pool_t *result_pool,
1640 apr_pool_t *scratch_pool)
1641 {
1642 apr_off_t offset;
1643
1644 /* RS->FILE may be shared between RS instances -> make sure we point
1645 * to the right data. */
1646 SVN_ERR(auto_open_shared_file(rs->sfile));
1647 SVN_ERR(auto_set_start_offset(rs, scratch_pool));
1648
1649 offset = rs->start + rs->current;
1650 SVN_ERR(rs_aligned_seek(rs, NULL, offset, scratch_pool));
1651
1652 /* Read the plain data. */
1653 *nwin = svn_stringbuf_create_ensure(size, result_pool);
1654 SVN_ERR(svn_io_file_read_full2(rs->sfile->rfile->file, (*nwin)->data, size,
1655 NULL, NULL, result_pool));
1656 (*nwin)->data[size] = 0;
1657
1658 /* Update RS. */
1659 rs->current += (apr_off_t)size;
1660
1661 return SVN_NO_ERROR;
1662 }
1663
1664 /* Skip SIZE bytes from the PLAIN representation RS. */
1665 static svn_error_t *
skip_plain_window(rep_state_t * rs,apr_size_t size)1666 skip_plain_window(rep_state_t *rs,
1667 apr_size_t size)
1668 {
1669 /* Update RS. */
1670 rs->current += (apr_off_t)size;
1671
1672 return SVN_NO_ERROR;
1673 }
1674
1675 /* Get the undeltified window that is a result of combining all deltas
1676 from the current desired representation identified in *RB with its
1677 base representation. Store the window in *RESULT. */
1678 static svn_error_t *
get_combined_window(svn_stringbuf_t ** result,struct rep_read_baton * rb)1679 get_combined_window(svn_stringbuf_t **result,
1680 struct rep_read_baton *rb)
1681 {
1682 apr_pool_t *pool, *new_pool, *window_pool;
1683 int i;
1684 apr_array_header_t *windows;
1685 svn_stringbuf_t *source, *buf = rb->base_window;
1686 rep_state_t *rs;
1687 apr_pool_t *iterpool;
1688
1689 /* Read all windows that we need to combine. This is fine because
1690 the size of each window is relatively small (100kB) and skip-
1691 delta limits the number of deltas in a chain to well under 100.
1692 Stop early if one of them does not depend on its predecessors. */
1693 window_pool = svn_pool_create(rb->pool);
1694 windows = apr_array_make(window_pool, 0, sizeof(svn_txdelta_window_t *));
1695 iterpool = svn_pool_create(rb->pool);
1696 for (i = 0; i < rb->rs_list->nelts; ++i)
1697 {
1698 svn_txdelta_window_t *window;
1699
1700 svn_pool_clear(iterpool);
1701
1702 rs = APR_ARRAY_IDX(rb->rs_list, i, rep_state_t *);
1703 SVN_ERR(read_delta_window(&window, rb->chunk_index, rs, window_pool,
1704 iterpool));
1705
1706 APR_ARRAY_PUSH(windows, svn_txdelta_window_t *) = window;
1707 if (window->src_ops == 0)
1708 {
1709 ++i;
1710 break;
1711 }
1712 }
1713
1714 /* Combine in the windows from the other delta reps. */
1715 pool = svn_pool_create(rb->pool);
1716 for (--i; i >= 0; --i)
1717 {
1718 svn_txdelta_window_t *window;
1719
1720 svn_pool_clear(iterpool);
1721
1722 rs = APR_ARRAY_IDX(rb->rs_list, i, rep_state_t *);
1723 window = APR_ARRAY_IDX(windows, i, svn_txdelta_window_t *);
1724
1725 /* Maybe, we've got a PLAIN start representation. If we do, read
1726 as much data from it as the needed for the txdelta window's source
1727 view.
1728 Note that BUF / SOURCE may only be NULL in the first iteration.
1729 Also note that we may have short-cut reading the delta chain --
1730 in which case SRC_OPS is 0 and it might not be a PLAIN rep. */
1731 source = buf;
1732 if (source == NULL && rb->src_state != NULL)
1733 {
1734 /* Even if we don't need the source rep now, we still must keep
1735 * its read offset in sync with what we might need for the next
1736 * window. */
1737 if (window->src_ops)
1738 SVN_ERR(read_plain_window(&source, rb->src_state,
1739 window->sview_len,
1740 pool, iterpool));
1741 else
1742 SVN_ERR(skip_plain_window(rb->src_state, window->sview_len));
1743 }
1744
1745 /* Combine this window with the current one. */
1746 new_pool = svn_pool_create(rb->pool);
1747 buf = svn_stringbuf_create_ensure(window->tview_len, new_pool);
1748 buf->len = window->tview_len;
1749
1750 svn_txdelta_apply_instructions(window, source ? source->data : NULL,
1751 buf->data, &buf->len);
1752 if (buf->len != window->tview_len)
1753 return svn_error_create(SVN_ERR_FS_CORRUPT, NULL,
1754 _("svndiff window length is "
1755 "corrupt"));
1756
1757 /* Cache windows only if the whole rep content could be read as a
1758 single chunk. Only then will no other chunk need a deeper RS
1759 list than the cached chunk. */
1760 if ( (rb->chunk_index == 0) && (rs->current == rs->size)
1761 && SVN_IS_VALID_REVNUM(rs->revision))
1762 SVN_ERR(set_cached_combined_window(buf, rs, new_pool));
1763
1764 rs->chunk_index++;
1765
1766 /* Cycle pools so that we only need to hold three windows at a time. */
1767 svn_pool_destroy(pool);
1768 pool = new_pool;
1769 }
1770 svn_pool_destroy(iterpool);
1771
1772 svn_pool_destroy(window_pool);
1773
1774 *result = buf;
1775 return SVN_NO_ERROR;
1776 }
1777
1778 /* Returns whether or not the expanded fulltext of the file is cachable
1779 * based on its size SIZE. The decision depends on the cache used by FFD.
1780 */
1781 static svn_boolean_t
fulltext_size_is_cachable(fs_fs_data_t * ffd,svn_filesize_t size)1782 fulltext_size_is_cachable(fs_fs_data_t *ffd, svn_filesize_t size)
1783 {
1784 return (size < APR_SIZE_MAX)
1785 && svn_cache__is_cachable(ffd->fulltext_cache, (apr_size_t)size);
1786 }
1787
1788 /* Close method used on streams returned by read_representation().
1789 */
1790 static svn_error_t *
rep_read_contents_close(void * baton)1791 rep_read_contents_close(void *baton)
1792 {
1793 struct rep_read_baton *rb = baton;
1794
1795 svn_pool_destroy(rb->pool);
1796 svn_pool_destroy(rb->filehandle_pool);
1797
1798 return SVN_NO_ERROR;
1799 }
1800
1801 /* Return the next *LEN bytes of the rep from our plain / delta windows
1802 and store them in *BUF. */
1803 static svn_error_t *
get_contents_from_windows(struct rep_read_baton * rb,char * buf,apr_size_t * len)1804 get_contents_from_windows(struct rep_read_baton *rb,
1805 char *buf,
1806 apr_size_t *len)
1807 {
1808 apr_size_t copy_len, remaining = *len;
1809 char *cur = buf;
1810 rep_state_t *rs;
1811
1812 /* Special case for when there are no delta reps, only a plain
1813 text. */
1814 if (rb->rs_list->nelts == 0)
1815 {
1816 copy_len = remaining;
1817 rs = rb->src_state;
1818
1819 if (rb->base_window != NULL)
1820 {
1821 /* We got the desired rep directly from the cache.
1822 This is where we need the pseudo rep_state created
1823 by build_rep_list(). */
1824 apr_size_t offset = (apr_size_t)rs->current;
1825 if (offset >= rb->base_window->len)
1826 copy_len = 0ul;
1827 else if (copy_len > rb->base_window->len - offset)
1828 copy_len = rb->base_window->len - offset;
1829
1830 memcpy (cur, rb->base_window->data + offset, copy_len);
1831 }
1832 else
1833 {
1834 apr_off_t offset;
1835 if (((apr_off_t) copy_len) > rs->size - rs->current)
1836 copy_len = (apr_size_t) (rs->size - rs->current);
1837
1838 SVN_ERR(auto_open_shared_file(rs->sfile));
1839 SVN_ERR(auto_set_start_offset(rs, rb->pool));
1840
1841 offset = rs->start + rs->current;
1842 SVN_ERR(rs_aligned_seek(rs, NULL, offset, rb->pool));
1843 SVN_ERR(svn_io_file_read_full2(rs->sfile->rfile->file, cur,
1844 copy_len, NULL, NULL, rb->pool));
1845 }
1846
1847 rs->current += copy_len;
1848 *len = copy_len;
1849 return SVN_NO_ERROR;
1850 }
1851
1852 while (remaining > 0)
1853 {
1854 /* If we have buffered data from a previous chunk, use that. */
1855 if (rb->buf)
1856 {
1857 /* Determine how much to copy from the buffer. */
1858 copy_len = rb->buf_len - rb->buf_pos;
1859 if (copy_len > remaining)
1860 copy_len = remaining;
1861
1862 /* Actually copy the data. */
1863 memcpy(cur, rb->buf + rb->buf_pos, copy_len);
1864 rb->buf_pos += copy_len;
1865 cur += copy_len;
1866 remaining -= copy_len;
1867
1868 /* If the buffer is all used up, clear it and empty the
1869 local pool. */
1870 if (rb->buf_pos == rb->buf_len)
1871 {
1872 svn_pool_clear(rb->pool);
1873 rb->buf = NULL;
1874 }
1875 }
1876 else
1877 {
1878 svn_stringbuf_t *sbuf = NULL;
1879
1880 rs = APR_ARRAY_IDX(rb->rs_list, 0, rep_state_t *);
1881 if (rs->current == rs->size)
1882 break;
1883
1884 /* Get more buffered data by evaluating a chunk. */
1885 SVN_ERR(get_combined_window(&sbuf, rb));
1886
1887 rb->chunk_index++;
1888 rb->buf_len = sbuf->len;
1889 rb->buf = sbuf->data;
1890 rb->buf_pos = 0;
1891 }
1892 }
1893
1894 *len = cur - buf;
1895
1896 return SVN_NO_ERROR;
1897 }
1898
1899 /* Baton type for get_fulltext_partial. */
1900 typedef struct fulltext_baton_t
1901 {
1902 /* Target buffer to write to; of at least LEN bytes. */
1903 char *buffer;
1904
1905 /* Offset within the respective fulltext at which we shall start to
1906 copy data into BUFFER. */
1907 apr_size_t start;
1908
1909 /* Number of bytes to copy. The actual amount may be less in case
1910 the fulltext is short(er). */
1911 apr_size_t len;
1912
1913 /* Number of bytes actually copied into BUFFER. */
1914 apr_size_t read;
1915 } fulltext_baton_t;
1916
1917 /* Implement svn_cache__partial_getter_func_t for fulltext caches.
1918 * From the fulltext in DATA, we copy the range specified by the
1919 * fulltext_baton_t* BATON into the buffer provided by that baton.
1920 * OUT and RESULT_POOL are not used.
1921 */
1922 static svn_error_t *
get_fulltext_partial(void ** out,const void * data,apr_size_t data_len,void * baton,apr_pool_t * result_pool)1923 get_fulltext_partial(void **out,
1924 const void *data,
1925 apr_size_t data_len,
1926 void *baton,
1927 apr_pool_t *result_pool)
1928 {
1929 fulltext_baton_t *fulltext_baton = baton;
1930
1931 /* We cached the fulltext with an NUL appended to it. */
1932 apr_size_t fulltext_len = data_len - 1;
1933
1934 /* Clip the copy range to what the fulltext size allows. */
1935 apr_size_t start = MIN(fulltext_baton->start, fulltext_len);
1936 fulltext_baton->read = MIN(fulltext_len - start, fulltext_baton->len);
1937
1938 /* Copy the data to the output buffer and be done. */
1939 memcpy(fulltext_baton->buffer, (const char *)data + start,
1940 fulltext_baton->read);
1941
1942 return SVN_NO_ERROR;
1943 }
1944
1945 /* Find the fulltext specified in BATON in the fulltext cache given
1946 * as well by BATON. If that succeeds, set *CACHED to TRUE and copy
1947 * up to the next *LEN bytes into BUFFER. Set *LEN to the actual
1948 * number of bytes copied.
1949 */
1950 static svn_error_t *
get_contents_from_fulltext(svn_boolean_t * cached,struct rep_read_baton * baton,char * buffer,apr_size_t * len)1951 get_contents_from_fulltext(svn_boolean_t *cached,
1952 struct rep_read_baton *baton,
1953 char *buffer,
1954 apr_size_t *len)
1955 {
1956 void *dummy;
1957 fulltext_baton_t fulltext_baton;
1958
1959 SVN_ERR_ASSERT((apr_size_t)baton->fulltext_delivered
1960 == baton->fulltext_delivered);
1961 fulltext_baton.buffer = buffer;
1962 fulltext_baton.start = (apr_size_t)baton->fulltext_delivered;
1963 fulltext_baton.len = *len;
1964 fulltext_baton.read = 0;
1965
1966 SVN_ERR(svn_cache__get_partial(&dummy, cached, baton->fulltext_cache,
1967 &baton->fulltext_cache_key,
1968 get_fulltext_partial, &fulltext_baton,
1969 baton->pool));
1970
1971 if (*cached)
1972 {
1973 baton->fulltext_delivered += fulltext_baton.read;
1974 *len = fulltext_baton.read;
1975 }
1976
1977 return SVN_NO_ERROR;
1978 }
1979
1980 /* Determine the optimal size of a string buf that shall receive a
1981 * (full-) text of NEEDED bytes.
1982 *
1983 * The critical point is that those buffers may be very large and
1984 * can cause memory fragmentation. We apply simple heuristics to
1985 * make fragmentation less likely.
1986 */
1987 static apr_size_t
optimimal_allocation_size(apr_size_t needed)1988 optimimal_allocation_size(apr_size_t needed)
1989 {
1990 /* For all allocations, assume some overhead that is shared between
1991 * OS memory managemnt, APR memory management and svn_stringbuf_t. */
1992 const apr_size_t overhead = 0x400;
1993 apr_size_t optimal;
1994
1995 /* If an allocation size if safe for other ephemeral buffers, it should
1996 * be safe for ours. */
1997 if (needed <= SVN__STREAM_CHUNK_SIZE)
1998 return needed;
1999
2000 /* Paranoia edge case:
2001 * Skip our heuristics if they created arithmetical overflow.
2002 * Beware to make this test work for NEEDED = APR_SIZE_MAX as well! */
2003 if (needed >= APR_SIZE_MAX / 2 - overhead)
2004 return needed;
2005
2006 /* As per definition SVN__STREAM_CHUNK_SIZE is a power of two.
2007 * Since we know NEEDED to be larger than that, use it as the
2008 * starting point.
2009 *
2010 * Heuristics: Allocate a power-of-two number of bytes that fit
2011 * NEEDED plus some OVERHEAD. The APR allocator
2012 * will round it up to the next full page size.
2013 */
2014 optimal = SVN__STREAM_CHUNK_SIZE;
2015 while (optimal - overhead < needed)
2016 optimal *= 2;
2017
2018 /* This is above or equal to NEEDED. */
2019 return optimal - overhead;
2020 }
2021
2022 /* After a fulltext cache lookup failure, we will continue to read from
2023 * combined delta or plain windows. However, we must first make that data
2024 * stream in BATON catch up tho the position LEN already delivered from the
2025 * fulltext cache. Also, we need to store the reconstructed fulltext if we
2026 * want to cache it at the end.
2027 */
2028 static svn_error_t *
skip_contents(struct rep_read_baton * baton,svn_filesize_t len)2029 skip_contents(struct rep_read_baton *baton,
2030 svn_filesize_t len)
2031 {
2032 svn_error_t *err = SVN_NO_ERROR;
2033
2034 /* Do we want to cache the reconstructed fulltext? */
2035 if (SVN_IS_VALID_REVNUM(baton->fulltext_cache_key.revision))
2036 {
2037 char *buffer;
2038 svn_filesize_t to_alloc = MAX(len, baton->len);
2039
2040 /* This should only be happening if BATON->LEN and LEN are
2041 * cacheable, implying they fit into memory. */
2042 SVN_ERR_ASSERT((apr_size_t)to_alloc == to_alloc);
2043
2044 /* Allocate the fulltext buffer. */
2045 baton->current_fulltext = svn_stringbuf_create_ensure(
2046 optimimal_allocation_size((apr_size_t)to_alloc),
2047 baton->filehandle_pool);
2048
2049 /* Read LEN bytes from the window stream and store the data
2050 * in the fulltext buffer (will be filled by further reads later). */
2051 baton->current_fulltext->len = (apr_size_t)len;
2052 baton->current_fulltext->data[(apr_size_t)len] = 0;
2053
2054 buffer = baton->current_fulltext->data;
2055 while (len > 0 && !err)
2056 {
2057 apr_size_t to_read = (apr_size_t)len;
2058 err = get_contents_from_windows(baton, buffer, &to_read);
2059 len -= to_read;
2060 buffer += to_read;
2061 }
2062
2063 /* Make the MD5 calculation catch up with the data delivered
2064 * (we did not run MD5 on the data that we took from the cache). */
2065 if (!err)
2066 {
2067 SVN_ERR(svn_checksum_update(baton->md5_checksum_ctx,
2068 baton->current_fulltext->data,
2069 baton->current_fulltext->len));
2070 baton->off += baton->current_fulltext->len;
2071 }
2072 }
2073 else if (len > 0)
2074 {
2075 /* Simply drain LEN bytes from the window stream. */
2076 apr_pool_t *subpool = svn_pool_create(baton->pool);
2077 char *buffer = apr_palloc(subpool, SVN__STREAM_CHUNK_SIZE);
2078
2079 while (len > 0 && !err)
2080 {
2081 apr_size_t to_read = len > SVN__STREAM_CHUNK_SIZE
2082 ? SVN__STREAM_CHUNK_SIZE
2083 : (apr_size_t)len;
2084
2085 err = get_contents_from_windows(baton, buffer, &to_read);
2086 len -= to_read;
2087
2088 /* Make the MD5 calculation catch up with the data delivered
2089 * (we did not run MD5 on the data that we took from the cache). */
2090 if (!err)
2091 {
2092 SVN_ERR(svn_checksum_update(baton->md5_checksum_ctx,
2093 buffer, to_read));
2094 baton->off += to_read;
2095 }
2096 }
2097
2098 svn_pool_destroy(subpool);
2099 }
2100
2101 return svn_error_trace(err);
2102 }
2103
2104 /* BATON is of type `rep_read_baton'; read the next *LEN bytes of the
2105 representation and store them in *BUF. Sum as we read and verify
2106 the MD5 sum at the end. This is a READ_FULL_FN for svn_stream_t. */
2107 static svn_error_t *
rep_read_contents(void * baton,char * buf,apr_size_t * len)2108 rep_read_contents(void *baton,
2109 char *buf,
2110 apr_size_t *len)
2111 {
2112 struct rep_read_baton *rb = baton;
2113 apr_size_t len_requested = *len;
2114
2115 /* Get data from the fulltext cache for as long as we can. */
2116 if (rb->fulltext_cache)
2117 {
2118 svn_boolean_t cached;
2119 SVN_ERR(get_contents_from_fulltext(&cached, rb, buf, len));
2120 if (cached)
2121 return SVN_NO_ERROR;
2122
2123 /* Cache miss. From now on, we will never read from the fulltext
2124 * cache for this representation anymore. */
2125 rb->fulltext_cache = NULL;
2126 }
2127
2128 /* No fulltext cache to help us. We must read from the window stream. */
2129 if (!rb->rs_list)
2130 {
2131 /* Window stream not initialized, yet. Do it now. */
2132 rb->len = rb->rep.expanded_size;
2133 SVN_ERR(build_rep_list(&rb->rs_list, &rb->base_window,
2134 &rb->src_state, rb->fs, &rb->rep,
2135 rb->filehandle_pool));
2136
2137 /* In case we did read from the fulltext cache before, make the
2138 * window stream catch up. Also, initialize the fulltext buffer
2139 * if we want to cache the fulltext at the end. */
2140 SVN_ERR(skip_contents(rb, rb->fulltext_delivered));
2141 }
2142
2143 /* Get the next block of data.
2144 * Keep in mind that the representation might be empty and leave us
2145 * already positioned at the end of the rep. */
2146 if (rb->off == rb->len)
2147 *len = 0;
2148 else
2149 SVN_ERR(get_contents_from_windows(rb, buf, len));
2150
2151 if (rb->current_fulltext)
2152 svn_stringbuf_appendbytes(rb->current_fulltext, buf, *len);
2153
2154 /* This is a FULL_READ_FN so a short read implies EOF and we can
2155 verify the length. */
2156 rb->off += *len;
2157 if (*len < len_requested && rb->off != rb->len)
2158 {
2159 /* A warning rather than an error to allow the data to be
2160 retrieved when the length is wrong but the data is
2161 present, i.e. if repository corruption has stored the wrong
2162 expanded length. */
2163 svn_error_t *err = svn_error_createf(SVN_ERR_FS_CORRUPT, NULL,
2164 _("Length mismatch while reading representation:"
2165 " expected %s,"
2166 " got %s"),
2167 apr_psprintf(rb->pool, "%" SVN_FILESIZE_T_FMT,
2168 rb->len),
2169 apr_psprintf(rb->pool, "%" SVN_FILESIZE_T_FMT,
2170 rb->off));
2171
2172 rb->fs->warning(rb->fs->warning_baton, err);
2173 svn_error_clear(err);
2174 }
2175
2176 /* Perform checksumming. We want to check the checksum as soon as
2177 the last byte of data is read, in case the caller never performs
2178 a short read, but we don't want to finalize the MD5 context
2179 twice. */
2180 if (!rb->checksum_finalized)
2181 {
2182 SVN_ERR(svn_checksum_update(rb->md5_checksum_ctx, buf, *len));
2183 if (rb->off == rb->len)
2184 {
2185 svn_checksum_t *md5_checksum;
2186 svn_checksum_t expected;
2187 expected.kind = svn_checksum_md5;
2188 expected.digest = rb->md5_digest;
2189
2190 rb->checksum_finalized = TRUE;
2191 SVN_ERR(svn_checksum_final(&md5_checksum, rb->md5_checksum_ctx,
2192 rb->pool));
2193 if (!svn_checksum_match(md5_checksum, &expected))
2194 return svn_error_create(SVN_ERR_FS_CORRUPT,
2195 svn_checksum_mismatch_err(&expected, md5_checksum,
2196 rb->pool,
2197 _("Checksum mismatch while reading representation")),
2198 NULL);
2199 }
2200 }
2201
2202 if (rb->off == rb->len && rb->current_fulltext)
2203 {
2204 fs_fs_data_t *ffd = rb->fs->fsap_data;
2205 SVN_ERR(svn_cache__set(ffd->fulltext_cache, &rb->fulltext_cache_key,
2206 rb->current_fulltext, rb->pool));
2207 rb->current_fulltext = NULL;
2208 }
2209
2210 return SVN_NO_ERROR;
2211 }
2212
2213 svn_error_t *
svn_fs_fs__get_contents(svn_stream_t ** contents_p,svn_fs_t * fs,representation_t * rep,svn_boolean_t cache_fulltext,apr_pool_t * pool)2214 svn_fs_fs__get_contents(svn_stream_t **contents_p,
2215 svn_fs_t *fs,
2216 representation_t *rep,
2217 svn_boolean_t cache_fulltext,
2218 apr_pool_t *pool)
2219 {
2220 if (! rep)
2221 {
2222 *contents_p = svn_stream_empty(pool);
2223 }
2224 else
2225 {
2226 fs_fs_data_t *ffd = fs->fsap_data;
2227 struct rep_read_baton *rb;
2228
2229 pair_cache_key_t fulltext_cache_key = { 0 };
2230 fulltext_cache_key.revision = rep->revision;
2231 fulltext_cache_key.second = rep->item_index;
2232
2233 /* Initialize the reader baton. Some members may added lazily
2234 * while reading from the stream */
2235 SVN_ERR(rep_read_get_baton(&rb, fs, rep, fulltext_cache_key, pool));
2236
2237 /* Make the stream attempt fulltext cache lookups if the fulltext
2238 * is cacheable. If it is not, then also don't try to buffer and
2239 * cache it. */
2240 if (ffd->fulltext_cache && cache_fulltext
2241 && SVN_IS_VALID_REVNUM(rep->revision)
2242 && fulltext_size_is_cachable(ffd, rep->expanded_size))
2243 {
2244 rb->fulltext_cache = ffd->fulltext_cache;
2245 }
2246 else
2247 {
2248 /* This will also prevent the reconstructed fulltext from being
2249 put into the cache. */
2250 rb->fulltext_cache_key.revision = SVN_INVALID_REVNUM;
2251 }
2252
2253 *contents_p = svn_stream_create(rb, pool);
2254 svn_stream_set_read2(*contents_p, NULL /* only full read support */,
2255 rep_read_contents);
2256 svn_stream_set_close(*contents_p, rep_read_contents_close);
2257 }
2258
2259 return SVN_NO_ERROR;
2260 }
2261
2262 svn_error_t *
svn_fs_fs__get_contents_from_file(svn_stream_t ** contents_p,svn_fs_t * fs,representation_t * rep,apr_file_t * file,apr_off_t offset,apr_pool_t * pool)2263 svn_fs_fs__get_contents_from_file(svn_stream_t **contents_p,
2264 svn_fs_t *fs,
2265 representation_t *rep,
2266 apr_file_t *file,
2267 apr_off_t offset,
2268 apr_pool_t *pool)
2269 {
2270 struct rep_read_baton *rb;
2271 pair_cache_key_t fulltext_cache_key = { SVN_INVALID_REVNUM, 0 };
2272 rep_state_t *rs = apr_pcalloc(pool, sizeof(*rs));
2273 svn_fs_fs__rep_header_t *rh;
2274
2275 /* Initialize the reader baton. Some members may added lazily
2276 * while reading from the stream. */
2277 SVN_ERR(rep_read_get_baton(&rb, fs, rep, fulltext_cache_key, pool));
2278
2279 /* Continue constructing RS. Leave caches as NULL. */
2280 rs->size = rep->size;
2281 rs->revision = SVN_INVALID_REVNUM;
2282 rs->item_index = 0;
2283 rs->ver = -1;
2284 rs->start = -1;
2285
2286 /* Provide just enough file access info to allow for a basic read from
2287 * FILE but leave all index / footer info with empty values b/c FILE
2288 * probably is not a complete revision file. */
2289 rs->sfile = apr_pcalloc(pool, sizeof(*rs->sfile));
2290 rs->sfile->revision = rep->revision;
2291 rs->sfile->pool = pool;
2292 rs->sfile->fs = fs;
2293 rs->sfile->rfile = apr_pcalloc(pool, sizeof(*rs->sfile->rfile));
2294 rs->sfile->rfile->start_revision = SVN_INVALID_REVNUM;
2295 rs->sfile->rfile->file = file;
2296 rs->sfile->rfile->stream = svn_stream_from_aprfile2(file, TRUE, pool);
2297
2298 /* Read the rep header. */
2299 SVN_ERR(aligned_seek(fs, file, NULL, offset, pool));
2300 SVN_ERR(svn_fs_fs__read_rep_header(&rh, rs->sfile->rfile->stream,
2301 pool, pool));
2302 SVN_ERR(get_file_offset(&rs->start, rs, pool));
2303 rs->header_size = rh->header_size;
2304
2305 /* Log the access. */
2306 SVN_ERR(dbg_log_access(fs, SVN_INVALID_REVNUM, 0, rh,
2307 SVN_FS_FS__ITEM_TYPE_ANY_REP, pool));
2308
2309 /* Build the representation list (delta chain). */
2310 if (rh->type == svn_fs_fs__rep_plain)
2311 {
2312 rb->rs_list = apr_array_make(pool, 0, sizeof(rep_state_t *));
2313 rb->src_state = rs;
2314 }
2315 else if (rh->type == svn_fs_fs__rep_self_delta)
2316 {
2317 rb->rs_list = apr_array_make(pool, 1, sizeof(rep_state_t *));
2318 APR_ARRAY_PUSH(rb->rs_list, rep_state_t *) = rs;
2319 rb->src_state = NULL;
2320 }
2321 else
2322 {
2323 representation_t next_rep = { 0 };
2324
2325 /* skip "SVNx" diff marker */
2326 rs->current = 4;
2327
2328 /* REP's base rep is inside a proper revision.
2329 * It can be reconstructed in the usual way. */
2330 next_rep.revision = rh->base_revision;
2331 next_rep.item_index = rh->base_item_index;
2332 next_rep.size = rh->base_length;
2333 svn_fs_fs__id_txn_reset(&next_rep.txn_id);
2334
2335 SVN_ERR(build_rep_list(&rb->rs_list, &rb->base_window,
2336 &rb->src_state, rb->fs, &next_rep,
2337 rb->filehandle_pool));
2338
2339 /* Insert the access to REP as the first element of the delta chain. */
2340 SVN_ERR(svn_sort__array_insert2(rb->rs_list, &rs, 0));
2341 }
2342
2343 /* Now, the baton is complete and we can assemble the stream around it. */
2344 *contents_p = svn_stream_create(rb, pool);
2345 svn_stream_set_read2(*contents_p, NULL /* only full read support */,
2346 rep_read_contents);
2347 svn_stream_set_close(*contents_p, rep_read_contents_close);
2348
2349 return SVN_NO_ERROR;
2350 }
2351
2352 /* Baton for cache_access_wrapper. Wraps the original parameters of
2353 * svn_fs_fs__try_process_file_content().
2354 */
2355 typedef struct cache_access_wrapper_baton_t
2356 {
2357 svn_fs_process_contents_func_t func;
2358 void* baton;
2359 } cache_access_wrapper_baton_t;
2360
2361 /* Wrapper to translate between svn_fs_process_contents_func_t and
2362 * svn_cache__partial_getter_func_t.
2363 */
2364 static svn_error_t *
cache_access_wrapper(void ** out,const void * data,apr_size_t data_len,void * baton,apr_pool_t * pool)2365 cache_access_wrapper(void **out,
2366 const void *data,
2367 apr_size_t data_len,
2368 void *baton,
2369 apr_pool_t *pool)
2370 {
2371 cache_access_wrapper_baton_t *wrapper_baton = baton;
2372
2373 SVN_ERR(wrapper_baton->func((const unsigned char *)data,
2374 data_len - 1, /* cache adds terminating 0 */
2375 wrapper_baton->baton,
2376 pool));
2377
2378 /* non-NULL value to signal the calling cache that all went well */
2379 *out = baton;
2380
2381 return SVN_NO_ERROR;
2382 }
2383
2384 svn_error_t *
svn_fs_fs__try_process_file_contents(svn_boolean_t * success,svn_fs_t * fs,node_revision_t * noderev,svn_fs_process_contents_func_t processor,void * baton,apr_pool_t * pool)2385 svn_fs_fs__try_process_file_contents(svn_boolean_t *success,
2386 svn_fs_t *fs,
2387 node_revision_t *noderev,
2388 svn_fs_process_contents_func_t processor,
2389 void* baton,
2390 apr_pool_t *pool)
2391 {
2392 representation_t *rep = noderev->data_rep;
2393 if (rep)
2394 {
2395 fs_fs_data_t *ffd = fs->fsap_data;
2396 pair_cache_key_t fulltext_cache_key = { 0 };
2397
2398 fulltext_cache_key.revision = rep->revision;
2399 fulltext_cache_key.second = rep->item_index;
2400 if (ffd->fulltext_cache && SVN_IS_VALID_REVNUM(rep->revision)
2401 && fulltext_size_is_cachable(ffd, rep->expanded_size))
2402 {
2403 cache_access_wrapper_baton_t wrapper_baton;
2404 void *dummy = NULL;
2405
2406 wrapper_baton.func = processor;
2407 wrapper_baton.baton = baton;
2408 return svn_cache__get_partial(&dummy, success,
2409 ffd->fulltext_cache,
2410 &fulltext_cache_key,
2411 cache_access_wrapper,
2412 &wrapper_baton,
2413 pool);
2414 }
2415 }
2416
2417 *success = FALSE;
2418 return SVN_NO_ERROR;
2419 }
2420
2421
2422 /* Baton used when reading delta windows. */
2423 struct delta_read_baton
2424 {
2425 rep_state_t *rs;
2426 unsigned char md5_digest[APR_MD5_DIGESTSIZE];
2427 };
2428
2429 /* This implements the svn_txdelta_next_window_fn_t interface. */
2430 static svn_error_t *
delta_read_next_window(svn_txdelta_window_t ** window,void * baton,apr_pool_t * pool)2431 delta_read_next_window(svn_txdelta_window_t **window, void *baton,
2432 apr_pool_t *pool)
2433 {
2434 struct delta_read_baton *drb = baton;
2435 apr_pool_t *scratch_pool = svn_pool_create(pool);
2436
2437 *window = NULL;
2438 if (drb->rs->current < drb->rs->size)
2439 {
2440 SVN_ERR(read_delta_window(window, drb->rs->chunk_index, drb->rs, pool,
2441 scratch_pool));
2442 drb->rs->chunk_index++;
2443 }
2444
2445 svn_pool_destroy(scratch_pool);
2446
2447 return SVN_NO_ERROR;
2448 }
2449
2450 /* This implements the svn_txdelta_md5_digest_fn_t interface. */
2451 static const unsigned char *
delta_read_md5_digest(void * baton)2452 delta_read_md5_digest(void *baton)
2453 {
2454 struct delta_read_baton *drb = baton;
2455 return drb->md5_digest;
2456 }
2457
2458 /* Return a txdelta stream for on-disk representation REP_STATE
2459 * of TARGET. Allocate the result in POOL.
2460 */
2461 static svn_txdelta_stream_t *
get_storaged_delta_stream(rep_state_t * rep_state,node_revision_t * target,apr_pool_t * pool)2462 get_storaged_delta_stream(rep_state_t *rep_state,
2463 node_revision_t *target,
2464 apr_pool_t *pool)
2465 {
2466 /* Create the delta read baton. */
2467 struct delta_read_baton *drb = apr_pcalloc(pool, sizeof(*drb));
2468 drb->rs = rep_state;
2469 memcpy(drb->md5_digest, target->data_rep->md5_digest,
2470 sizeof(drb->md5_digest));
2471 return svn_txdelta_stream_create(drb, delta_read_next_window,
2472 delta_read_md5_digest, pool);
2473 }
2474
2475 svn_error_t *
svn_fs_fs__get_file_delta_stream(svn_txdelta_stream_t ** stream_p,svn_fs_t * fs,node_revision_t * source,node_revision_t * target,apr_pool_t * pool)2476 svn_fs_fs__get_file_delta_stream(svn_txdelta_stream_t **stream_p,
2477 svn_fs_t *fs,
2478 node_revision_t *source,
2479 node_revision_t *target,
2480 apr_pool_t *pool)
2481 {
2482 svn_stream_t *source_stream, *target_stream;
2483 rep_state_t *rep_state;
2484 svn_fs_fs__rep_header_t *rep_header;
2485 fs_fs_data_t *ffd = fs->fsap_data;
2486
2487 /* Try a shortcut: if the target is stored as a delta against the source,
2488 then just use that delta. However, prefer using the fulltext cache
2489 whenever that is available. */
2490 if (target->data_rep && (source || ! ffd->fulltext_cache))
2491 {
2492 /* Read target's base rep if any. */
2493 SVN_ERR(create_rep_state(&rep_state, &rep_header, NULL,
2494 target->data_rep, fs, pool, pool));
2495
2496 if (source && source->data_rep && target->data_rep)
2497 {
2498 /* If that matches source, then use this delta as is.
2499 Note that we want an actual delta here. E.g. a self-delta would
2500 not be good enough. */
2501 if (rep_header->type == svn_fs_fs__rep_delta
2502 && rep_header->base_revision == source->data_rep->revision
2503 && rep_header->base_item_index == source->data_rep->item_index)
2504 {
2505 *stream_p = get_storaged_delta_stream(rep_state, target, pool);
2506 return SVN_NO_ERROR;
2507 }
2508 }
2509 else if (!source)
2510 {
2511 /* We want a self-delta. There is a fair chance that TARGET got
2512 added in this revision and is already stored in the requested
2513 format. */
2514 if (rep_header->type == svn_fs_fs__rep_self_delta)
2515 {
2516 *stream_p = get_storaged_delta_stream(rep_state, target, pool);
2517 return SVN_NO_ERROR;
2518 }
2519 }
2520
2521 /* Don't keep file handles open for longer than necessary. */
2522 if (rep_state->sfile->rfile)
2523 {
2524 SVN_ERR(svn_fs_fs__close_revision_file(rep_state->sfile->rfile));
2525 rep_state->sfile->rfile = NULL;
2526 }
2527 }
2528
2529 /* Read both fulltexts and construct a delta. */
2530 if (source)
2531 SVN_ERR(svn_fs_fs__get_contents(&source_stream, fs, source->data_rep,
2532 TRUE, pool));
2533 else
2534 source_stream = svn_stream_empty(pool);
2535 SVN_ERR(svn_fs_fs__get_contents(&target_stream, fs, target->data_rep,
2536 TRUE, pool));
2537
2538 /* Because source and target stream will already verify their content,
2539 * there is no need to do this once more. In particular if the stream
2540 * content is being fetched from cache. */
2541 svn_txdelta2(stream_p, source_stream, target_stream, FALSE, pool);
2542
2543 return SVN_NO_ERROR;
2544 }
2545
2546 /* Return TRUE when all svn_fs_dirent_t* in ENTRIES are already sorted
2547 by their respective name. */
2548 static svn_boolean_t
sorted(apr_array_header_t * entries)2549 sorted(apr_array_header_t *entries)
2550 {
2551 int i;
2552
2553 const svn_fs_dirent_t * const *dirents = (const void *)entries->elts;
2554 for (i = 0; i < entries->nelts-1; ++i)
2555 if (strcmp(dirents[i]->name, dirents[i+1]->name) > 0)
2556 return FALSE;
2557
2558 return TRUE;
2559 }
2560
2561 /* Compare the names of the two dirents given in **A and **B. */
2562 static int
compare_dirents(const void * a,const void * b)2563 compare_dirents(const void *a, const void *b)
2564 {
2565 const svn_fs_dirent_t *lhs = *((const svn_fs_dirent_t * const *) a);
2566 const svn_fs_dirent_t *rhs = *((const svn_fs_dirent_t * const *) b);
2567
2568 return strcmp(lhs->name, rhs->name);
2569 }
2570
2571 /* Compare the name of the dirents given in **A with the C string in *B. */
2572 static int
compare_dirent_name(const void * a,const void * b)2573 compare_dirent_name(const void *a, const void *b)
2574 {
2575 const svn_fs_dirent_t *lhs = *((const svn_fs_dirent_t * const *) a);
2576 const char *rhs = b;
2577
2578 return strcmp(lhs->name, rhs);
2579 }
2580
2581 /* Into *ENTRIES_P, read all directories entries from the key-value text in
2582 * STREAM. If INCREMENTAL is TRUE, read until the end of the STREAM and
2583 * update the data. ID is provided for nicer error messages.
2584 */
2585 static svn_error_t *
read_dir_entries(apr_array_header_t ** entries_p,svn_stream_t * stream,svn_boolean_t incremental,const svn_fs_id_t * id,apr_pool_t * result_pool,apr_pool_t * scratch_pool)2586 read_dir_entries(apr_array_header_t **entries_p,
2587 svn_stream_t *stream,
2588 svn_boolean_t incremental,
2589 const svn_fs_id_t *id,
2590 apr_pool_t *result_pool,
2591 apr_pool_t *scratch_pool)
2592 {
2593 apr_pool_t *iterpool = svn_pool_create(scratch_pool);
2594 apr_hash_t *hash = NULL;
2595 const char *terminator = SVN_HASH_TERMINATOR;
2596 apr_array_header_t *entries = NULL;
2597
2598 if (incremental)
2599 hash = svn_hash__make(scratch_pool);
2600 else
2601 entries = apr_array_make(result_pool, 16, sizeof(svn_fs_dirent_t *));
2602
2603 /* Read until the terminator (non-incremental) or the end of STREAM
2604 (incremental mode). In the latter mode, we use a temporary HASH
2605 to make updating and removing entries cheaper. */
2606 while (1)
2607 {
2608 svn_hash__entry_t entry;
2609 svn_fs_dirent_t *dirent;
2610 char *str;
2611
2612 svn_pool_clear(iterpool);
2613 SVN_ERR_W(svn_hash__read_entry(&entry, stream, terminator,
2614 incremental, iterpool),
2615 apr_psprintf(iterpool,
2616 _("Directory representation corrupt in '%s'"),
2617 svn_fs_fs__id_unparse(id, scratch_pool)->data));
2618
2619 /* End of directory? */
2620 if (entry.key == NULL)
2621 {
2622 /* In incremental mode, we skip the terminator and read the
2623 increments following it until the end of the stream. */
2624 if (incremental && terminator)
2625 terminator = NULL;
2626 else
2627 break;
2628 }
2629
2630 /* Deleted entry? */
2631 if (entry.val == NULL)
2632 {
2633 /* We must be in incremental mode */
2634 assert(hash);
2635 apr_hash_set(hash, entry.key, entry.keylen, NULL);
2636 continue;
2637 }
2638
2639 /* Add a new directory entry. */
2640 dirent = apr_pcalloc(result_pool, sizeof(*dirent));
2641 dirent->name = apr_pstrmemdup(result_pool, entry.key, entry.keylen);
2642
2643 str = svn_cstring_tokenize(" ", &entry.val);
2644 if (str == NULL)
2645 return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL,
2646 _("Directory entry corrupt in '%s'"),
2647 svn_fs_fs__id_unparse(id, scratch_pool)->data);
2648
2649 if (strcmp(str, SVN_FS_FS__KIND_FILE) == 0)
2650 {
2651 dirent->kind = svn_node_file;
2652 }
2653 else if (strcmp(str, SVN_FS_FS__KIND_DIR) == 0)
2654 {
2655 dirent->kind = svn_node_dir;
2656 }
2657 else
2658 {
2659 return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL,
2660 _("Directory entry corrupt in '%s'"),
2661 svn_fs_fs__id_unparse(id, scratch_pool)->data);
2662 }
2663
2664 str = svn_cstring_tokenize(" ", &entry.val);
2665 if (str == NULL)
2666 return svn_error_createf(SVN_ERR_FS_CORRUPT, NULL,
2667 _("Directory entry corrupt in '%s'"),
2668 svn_fs_fs__id_unparse(id, scratch_pool)->data);
2669
2670 SVN_ERR(svn_fs_fs__id_parse(&dirent->id, str, result_pool));
2671
2672 /* In incremental mode, update the hash; otherwise, write to the
2673 * final array. Be sure to use hash keys that survive this iteration.
2674 */
2675 if (incremental)
2676 apr_hash_set(hash, dirent->name, entry.keylen, dirent);
2677 else
2678 APR_ARRAY_PUSH(entries, svn_fs_dirent_t *) = dirent;
2679 }
2680
2681 /* Convert container to a sorted array. */
2682 if (incremental)
2683 {
2684 apr_hash_index_t *hi;
2685
2686 entries = apr_array_make(result_pool, apr_hash_count(hash),
2687 sizeof(svn_fs_dirent_t *));
2688 for (hi = apr_hash_first(iterpool, hash); hi; hi = apr_hash_next(hi))
2689 APR_ARRAY_PUSH(entries, svn_fs_dirent_t *) = apr_hash_this_val(hi);
2690 }
2691
2692 if (!sorted(entries))
2693 svn_sort__array(entries, compare_dirents);
2694
2695 svn_pool_destroy(iterpool);
2696
2697 *entries_p = entries;
2698 return SVN_NO_ERROR;
2699 }
2700
2701 /* For directory NODEREV in FS, return the *FILESIZE of its in-txn
2702 * representation. If the directory representation is comitted data,
2703 * set *FILESIZE to SVN_INVALID_FILESIZE. Use SCRATCH_POOL for temporaries.
2704 */
2705 static svn_error_t *
get_txn_dir_info(svn_filesize_t * filesize,svn_fs_t * fs,node_revision_t * noderev,apr_pool_t * scratch_pool)2706 get_txn_dir_info(svn_filesize_t *filesize,
2707 svn_fs_t *fs,
2708 node_revision_t *noderev,
2709 apr_pool_t *scratch_pool)
2710 {
2711 if (noderev->data_rep && svn_fs_fs__id_txn_used(&noderev->data_rep->txn_id))
2712 {
2713 const svn_io_dirent2_t *dirent;
2714 const char *filename;
2715
2716 filename = svn_fs_fs__path_txn_node_children(fs, noderev->id,
2717 scratch_pool);
2718
2719 SVN_ERR(svn_io_stat_dirent2(&dirent, filename, FALSE, FALSE,
2720 scratch_pool, scratch_pool));
2721 *filesize = dirent->filesize;
2722 }
2723 else
2724 {
2725 *filesize = SVN_INVALID_FILESIZE;
2726 }
2727
2728 return SVN_NO_ERROR;
2729 }
2730
2731 /* Fetch the contents of a directory into DIR. Values are stored
2732 as filename to string mappings; further conversion is necessary to
2733 convert them into svn_fs_dirent_t values. */
2734 static svn_error_t *
get_dir_contents(svn_fs_fs__dir_data_t * dir,svn_fs_t * fs,node_revision_t * noderev,apr_pool_t * result_pool,apr_pool_t * scratch_pool)2735 get_dir_contents(svn_fs_fs__dir_data_t *dir,
2736 svn_fs_t *fs,
2737 node_revision_t *noderev,
2738 apr_pool_t *result_pool,
2739 apr_pool_t *scratch_pool)
2740 {
2741 svn_stream_t *contents;
2742
2743 /* Initialize the result. */
2744 dir->txn_filesize = SVN_INVALID_FILESIZE;
2745
2746 /* Read dir contents - unless there is none in which case we are done. */
2747 if (noderev->data_rep && svn_fs_fs__id_txn_used(&noderev->data_rep->txn_id))
2748 {
2749 /* Get location & current size of the directory representation. */
2750 const char *filename;
2751 apr_file_t *file;
2752
2753 filename = svn_fs_fs__path_txn_node_children(fs, noderev->id,
2754 scratch_pool);
2755
2756 /* The representation is mutable. Read the old directory
2757 contents from the mutable children file, followed by the
2758 changes we've made in this transaction. */
2759 SVN_ERR(svn_io_file_open(&file, filename, APR_READ | APR_BUFFERED,
2760 APR_OS_DEFAULT, scratch_pool));
2761
2762 /* Obtain txn children file size. */
2763 SVN_ERR(svn_io_file_size_get(&dir->txn_filesize, file, scratch_pool));
2764
2765 contents = svn_stream_from_aprfile2(file, FALSE, scratch_pool);
2766 SVN_ERR(read_dir_entries(&dir->entries, contents, TRUE, noderev->id,
2767 result_pool, scratch_pool));
2768 SVN_ERR(svn_stream_close(contents));
2769 }
2770 else if (noderev->data_rep)
2771 {
2772 /* Undeltify content before parsing it. Otherwise, we could only
2773 * parse it byte-by-byte.
2774 */
2775 apr_size_t len = noderev->data_rep->expanded_size;
2776 svn_stringbuf_t *text;
2777
2778 /* The representation is immutable. Read it normally. */
2779 SVN_ERR(svn_fs_fs__get_contents(&contents, fs, noderev->data_rep,
2780 FALSE, scratch_pool));
2781 SVN_ERR(svn_stringbuf_from_stream(&text, contents, len, scratch_pool));
2782 SVN_ERR(svn_stream_close(contents));
2783
2784 /* de-serialize hash */
2785 contents = svn_stream_from_stringbuf(text, scratch_pool);
2786 SVN_ERR(read_dir_entries(&dir->entries, contents, FALSE, noderev->id,
2787 result_pool, scratch_pool));
2788 }
2789 else
2790 {
2791 dir->entries = apr_array_make(result_pool, 0, sizeof(svn_fs_dirent_t *));
2792 }
2793
2794 return SVN_NO_ERROR;
2795 }
2796
2797
2798 /* Return the cache object in FS responsible to storing the directory the
2799 * NODEREV plus the corresponding *KEY. If no cache exists, return NULL.
2800 * PAIR_KEY must point to some key struct, which does not need to be
2801 * initialized. We use it to avoid dynamic allocation.
2802 */
2803 static svn_cache__t *
locate_dir_cache(svn_fs_t * fs,const void ** key,pair_cache_key_t * pair_key,node_revision_t * noderev,apr_pool_t * pool)2804 locate_dir_cache(svn_fs_t *fs,
2805 const void **key,
2806 pair_cache_key_t *pair_key,
2807 node_revision_t *noderev,
2808 apr_pool_t *pool)
2809 {
2810 fs_fs_data_t *ffd = fs->fsap_data;
2811 if (!noderev->data_rep)
2812 {
2813 /* no data rep -> empty directory.
2814 A NULL key causes a cache miss. */
2815 *key = NULL;
2816 return ffd->dir_cache;
2817 }
2818
2819 if (svn_fs_fs__id_txn_used(&noderev->data_rep->txn_id))
2820 {
2821 /* data in txns requires the expensive fs_id-based addressing mode */
2822 *key = svn_fs_fs__id_unparse(noderev->id, pool)->data;
2823
2824 return ffd->txn_dir_cache;
2825 }
2826 else
2827 {
2828 /* committed data can use simple rev,item pairs */
2829 pair_key->revision = noderev->data_rep->revision;
2830 pair_key->second = noderev->data_rep->item_index;
2831 *key = pair_key;
2832
2833 return ffd->dir_cache;
2834 }
2835 }
2836
2837 svn_error_t *
svn_fs_fs__rep_contents_dir(apr_array_header_t ** entries_p,svn_fs_t * fs,node_revision_t * noderev,apr_pool_t * result_pool,apr_pool_t * scratch_pool)2838 svn_fs_fs__rep_contents_dir(apr_array_header_t **entries_p,
2839 svn_fs_t *fs,
2840 node_revision_t *noderev,
2841 apr_pool_t *result_pool,
2842 apr_pool_t *scratch_pool)
2843 {
2844 pair_cache_key_t pair_key = { 0 };
2845 const void *key;
2846 svn_fs_fs__dir_data_t *dir;
2847
2848 /* find the cache we may use */
2849 svn_cache__t *cache = locate_dir_cache(fs, &key, &pair_key, noderev,
2850 scratch_pool);
2851 if (cache)
2852 {
2853 svn_boolean_t found;
2854
2855 SVN_ERR(svn_cache__get((void **)&dir, &found, cache, key,
2856 result_pool));
2857 if (found)
2858 {
2859 /* Verify that the cached dir info is not stale
2860 * (no-op for committed data). */
2861 svn_filesize_t filesize;
2862 SVN_ERR(get_txn_dir_info(&filesize, fs, noderev, scratch_pool));
2863
2864 if (filesize == dir->txn_filesize)
2865 {
2866 /* Still valid. Done. */
2867 *entries_p = dir->entries;
2868 return SVN_NO_ERROR;
2869 }
2870 }
2871 }
2872
2873 /* Read in the directory contents. */
2874 dir = apr_pcalloc(scratch_pool, sizeof(*dir));
2875 SVN_ERR(get_dir_contents(dir, fs, noderev, result_pool, scratch_pool));
2876 *entries_p = dir->entries;
2877
2878 /* Update the cache, if we are to use one.
2879 *
2880 * Don't even attempt to serialize very large directories; it would cause
2881 * an unnecessary memory allocation peak. 150 bytes/entry is about right.
2882 */
2883 if (cache && svn_cache__is_cachable(cache, 150 * dir->entries->nelts))
2884 SVN_ERR(svn_cache__set(cache, key, dir, scratch_pool));
2885
2886 return SVN_NO_ERROR;
2887 }
2888
2889 svn_fs_dirent_t *
svn_fs_fs__find_dir_entry(apr_array_header_t * entries,const char * name,int * hint)2890 svn_fs_fs__find_dir_entry(apr_array_header_t *entries,
2891 const char *name,
2892 int *hint)
2893 {
2894 svn_fs_dirent_t **result
2895 = svn_sort__array_lookup(entries, name, hint, compare_dirent_name);
2896 return result ? *result : NULL;
2897 }
2898
2899 svn_error_t *
svn_fs_fs__rep_contents_dir_entry(svn_fs_dirent_t ** dirent,svn_fs_t * fs,node_revision_t * noderev,const char * name,apr_pool_t * result_pool,apr_pool_t * scratch_pool)2900 svn_fs_fs__rep_contents_dir_entry(svn_fs_dirent_t **dirent,
2901 svn_fs_t *fs,
2902 node_revision_t *noderev,
2903 const char *name,
2904 apr_pool_t *result_pool,
2905 apr_pool_t *scratch_pool)
2906 {
2907 extract_dir_entry_baton_t baton;
2908 svn_boolean_t found = FALSE;
2909
2910 /* find the cache we may use */
2911 pair_cache_key_t pair_key = { 0 };
2912 const void *key;
2913 svn_cache__t *cache = locate_dir_cache(fs, &key, &pair_key, noderev,
2914 scratch_pool);
2915 if (cache)
2916 {
2917 svn_filesize_t filesize;
2918 SVN_ERR(get_txn_dir_info(&filesize, fs, noderev, scratch_pool));
2919
2920 /* Cache lookup. */
2921 baton.txn_filesize = filesize;
2922 baton.name = name;
2923 SVN_ERR(svn_cache__get_partial((void **)dirent,
2924 &found,
2925 cache,
2926 key,
2927 svn_fs_fs__extract_dir_entry,
2928 &baton,
2929 result_pool));
2930 }
2931
2932 /* fetch data from disk if we did not find it in the cache */
2933 if (! found || baton.out_of_date)
2934 {
2935 svn_fs_dirent_t *entry;
2936 svn_fs_dirent_t *entry_copy = NULL;
2937 svn_fs_fs__dir_data_t dir;
2938
2939 /* Read in the directory contents. */
2940 SVN_ERR(get_dir_contents(&dir, fs, noderev, scratch_pool,
2941 scratch_pool));
2942
2943 /* Update the cache, if we are to use one.
2944 *
2945 * Don't even attempt to serialize very large directories; it would
2946 * cause an unnecessary memory allocation peak. 150 bytes / entry is
2947 * about right. */
2948 if (cache && svn_cache__is_cachable(cache, 150 * dir.entries->nelts))
2949 SVN_ERR(svn_cache__set(cache, key, &dir, scratch_pool));
2950
2951 /* find desired entry and return a copy in POOL, if found */
2952 entry = svn_fs_fs__find_dir_entry(dir.entries, name, NULL);
2953 if (entry)
2954 {
2955 entry_copy = apr_palloc(result_pool, sizeof(*entry_copy));
2956 entry_copy->name = apr_pstrdup(result_pool, entry->name);
2957 entry_copy->id = svn_fs_fs__id_copy(entry->id, result_pool);
2958 entry_copy->kind = entry->kind;
2959 }
2960
2961 *dirent = entry_copy;
2962 }
2963
2964 return SVN_NO_ERROR;
2965 }
2966
2967 svn_error_t *
svn_fs_fs__get_proplist(apr_hash_t ** proplist_p,svn_fs_t * fs,node_revision_t * noderev,apr_pool_t * pool)2968 svn_fs_fs__get_proplist(apr_hash_t **proplist_p,
2969 svn_fs_t *fs,
2970 node_revision_t *noderev,
2971 apr_pool_t *pool)
2972 {
2973 apr_hash_t *proplist;
2974 svn_stream_t *stream;
2975
2976 if (noderev->prop_rep && svn_fs_fs__id_txn_used(&noderev->prop_rep->txn_id))
2977 {
2978 svn_error_t *err;
2979 const char *filename
2980 = svn_fs_fs__path_txn_node_props(fs, noderev->id, pool);
2981 proplist = apr_hash_make(pool);
2982
2983 SVN_ERR(svn_stream_open_readonly(&stream, filename, pool, pool));
2984 err = svn_hash_read2(proplist, stream, SVN_HASH_TERMINATOR, pool);
2985 if (err)
2986 {
2987 svn_string_t *id_str = svn_fs_fs__id_unparse(noderev->id, pool);
2988
2989 err = svn_error_compose_create(err, svn_stream_close(stream));
2990 return svn_error_quick_wrapf(err,
2991 _("malformed property list for node-revision '%s' in '%s'"),
2992 id_str->data, filename);
2993 }
2994 SVN_ERR(svn_stream_close(stream));
2995 }
2996 else if (noderev->prop_rep)
2997 {
2998 svn_error_t *err;
2999 fs_fs_data_t *ffd = fs->fsap_data;
3000 representation_t *rep = noderev->prop_rep;
3001 pair_cache_key_t key = { 0 };
3002
3003 key.revision = rep->revision;
3004 key.second = rep->item_index;
3005 if (ffd->properties_cache && SVN_IS_VALID_REVNUM(rep->revision))
3006 {
3007 svn_boolean_t is_cached;
3008 SVN_ERR(svn_cache__get((void **) proplist_p, &is_cached,
3009 ffd->properties_cache, &key, pool));
3010 if (is_cached)
3011 return SVN_NO_ERROR;
3012 }
3013
3014 proplist = apr_hash_make(pool);
3015 SVN_ERR(svn_fs_fs__get_contents(&stream, fs, noderev->prop_rep, FALSE,
3016 pool));
3017 err = svn_hash_read2(proplist, stream, SVN_HASH_TERMINATOR, pool);
3018 if (err)
3019 {
3020 svn_string_t *id_str = svn_fs_fs__id_unparse(noderev->id, pool);
3021
3022 err = svn_error_compose_create(err, svn_stream_close(stream));
3023 return svn_error_quick_wrapf(err,
3024 _("malformed property list for node-revision '%s'"),
3025 id_str->data);
3026 }
3027 SVN_ERR(svn_stream_close(stream));
3028
3029 if (ffd->properties_cache && SVN_IS_VALID_REVNUM(rep->revision))
3030 SVN_ERR(svn_cache__set(ffd->properties_cache, &key, proplist, pool));
3031 }
3032 else
3033 {
3034 /* return an empty prop list if the node doesn't have any props */
3035 proplist = apr_hash_make(pool);
3036 }
3037
3038 *proplist_p = proplist;
3039
3040 return SVN_NO_ERROR;
3041 }
3042
3043 svn_error_t *
svn_fs_fs__create_changes_context(svn_fs_fs__changes_context_t ** context,svn_fs_t * fs,svn_revnum_t rev,apr_pool_t * result_pool)3044 svn_fs_fs__create_changes_context(svn_fs_fs__changes_context_t **context,
3045 svn_fs_t *fs,
3046 svn_revnum_t rev,
3047 apr_pool_t *result_pool)
3048 {
3049 svn_fs_fs__changes_context_t *result = apr_pcalloc(result_pool,
3050 sizeof(*result));
3051 result->fs = fs;
3052 result->revision = rev;
3053 result->rev_file_pool = result_pool;
3054
3055 *context = result;
3056 return SVN_NO_ERROR;
3057 }
3058
3059 svn_error_t *
svn_fs_fs__get_changes(apr_array_header_t ** changes,svn_fs_fs__changes_context_t * context,apr_pool_t * result_pool,apr_pool_t * scratch_pool)3060 svn_fs_fs__get_changes(apr_array_header_t **changes,
3061 svn_fs_fs__changes_context_t *context,
3062 apr_pool_t *result_pool,
3063 apr_pool_t *scratch_pool)
3064 {
3065 apr_off_t item_index = SVN_FS_FS__ITEM_INDEX_CHANGES;
3066 svn_boolean_t found;
3067 fs_fs_data_t *ffd = context->fs->fsap_data;
3068 svn_fs_fs__changes_list_t *changes_list;
3069
3070 pair_cache_key_t key;
3071 key.revision = context->revision;
3072 key.second = context->next;
3073
3074 /* try cache lookup first */
3075
3076 if (ffd->changes_cache)
3077 {
3078 SVN_ERR(svn_cache__get((void **)&changes_list, &found,
3079 ffd->changes_cache, &key, result_pool));
3080 }
3081 else
3082 {
3083 found = FALSE;
3084 }
3085
3086 if (!found)
3087 {
3088 /* read changes from revision file */
3089
3090 if (!context->revision_file)
3091 {
3092 SVN_ERR(svn_fs_fs__ensure_revision_exists(context->revision,
3093 context->fs,
3094 scratch_pool));
3095 SVN_ERR(svn_fs_fs__open_pack_or_rev_file(&context->revision_file,
3096 context->fs,
3097 context->revision,
3098 context->rev_file_pool,
3099 scratch_pool));
3100 }
3101
3102 if (use_block_read(context->fs))
3103 {
3104 /* 'block-read' will probably populate the cache with the data
3105 * that we want. However, we won't want to force it to process
3106 * very large change lists as part of this prefetching mechanism.
3107 * Those would be better handled by the iterative code below. */
3108 SVN_ERR(block_read(NULL, context->fs,
3109 context->revision, SVN_FS_FS__ITEM_INDEX_CHANGES,
3110 context->revision_file, scratch_pool,
3111 scratch_pool));
3112
3113 /* This may succeed now ... */
3114 SVN_ERR(svn_cache__get((void **)&changes_list, &found,
3115 ffd->changes_cache, &key, result_pool));
3116 }
3117
3118 /* If we still have no data, read it here. */
3119 if (!found)
3120 {
3121 apr_off_t changes_offset;
3122
3123 /* Addressing is very different for old formats
3124 * (needs to read the revision trailer). */
3125 if (svn_fs_fs__use_log_addressing(context->fs))
3126 {
3127 SVN_ERR(svn_fs_fs__item_offset(&changes_offset, context->fs,
3128 context->revision_file,
3129 context->revision, NULL,
3130 SVN_FS_FS__ITEM_INDEX_CHANGES,
3131 scratch_pool));
3132 }
3133 else
3134 {
3135 SVN_ERR(get_root_changes_offset(NULL, &changes_offset,
3136 context->revision_file,
3137 context->fs, context->revision,
3138 scratch_pool));
3139
3140 /* This variable will be used for debug logging only. */
3141 item_index = changes_offset;
3142 }
3143
3144 /* Actual reading and parsing are the same, though. */
3145 SVN_ERR(aligned_seek(context->fs, context->revision_file->file,
3146 NULL, changes_offset + context->next_offset,
3147 scratch_pool));
3148
3149 SVN_ERR(svn_fs_fs__read_changes(changes,
3150 context->revision_file->stream,
3151 SVN_FS_FS__CHANGES_BLOCK_SIZE,
3152 result_pool, scratch_pool));
3153
3154 /* Construct the info object for the entries block we just read. */
3155 changes_list = apr_pcalloc(scratch_pool, sizeof(*changes_list));
3156 SVN_ERR(svn_io_file_get_offset(&changes_list->end_offset,
3157 context->revision_file->file,
3158 scratch_pool));
3159 changes_list->end_offset -= changes_offset;
3160 changes_list->start_offset = context->next_offset;
3161 changes_list->count = (*changes)->nelts;
3162 changes_list->changes = (change_t **)(*changes)->elts;
3163 changes_list->eol = changes_list->count < SVN_FS_FS__CHANGES_BLOCK_SIZE;
3164
3165 /* cache for future reference */
3166
3167 if (ffd->changes_cache)
3168 SVN_ERR(svn_cache__set(ffd->changes_cache, &key, changes_list,
3169 scratch_pool));
3170 }
3171 }
3172
3173 if (found)
3174 {
3175 /* Return the block as a "proper" APR array. */
3176 (*changes) = apr_array_make(result_pool, 0, sizeof(void *));
3177 (*changes)->elts = (char *)changes_list->changes;
3178 (*changes)->nelts = changes_list->count;
3179 (*changes)->nalloc = changes_list->count;
3180 }
3181
3182 /* Where to look next - if there is more data. */
3183 context->next += (*changes)->nelts;
3184 context->next_offset = changes_list->end_offset;
3185 context->eol = changes_list->eol;
3186
3187 /* Close the revision file after we read all data. */
3188 if (context->eol && context->revision_file)
3189 {
3190 SVN_ERR(svn_fs_fs__close_revision_file(context->revision_file));
3191 context->revision_file = NULL;
3192 }
3193
3194 SVN_ERR(dbg_log_access(context->fs, context->revision, item_index, *changes,
3195 SVN_FS_FS__ITEM_TYPE_CHANGES, scratch_pool));
3196
3197 return SVN_NO_ERROR;
3198 }
3199
3200 /* Inialize the representation read state RS for the given REP_HEADER and
3201 * p2l index ENTRY. If not NULL, assign FILE and STREAM to RS.
3202 * Use RESULT_POOL for allocations.
3203 */
3204 static svn_error_t *
init_rep_state(rep_state_t * rs,svn_fs_fs__rep_header_t * rep_header,svn_fs_t * fs,svn_fs_fs__revision_file_t * file,svn_fs_fs__p2l_entry_t * entry,apr_pool_t * result_pool)3205 init_rep_state(rep_state_t *rs,
3206 svn_fs_fs__rep_header_t *rep_header,
3207 svn_fs_t *fs,
3208 svn_fs_fs__revision_file_t *file,
3209 svn_fs_fs__p2l_entry_t* entry,
3210 apr_pool_t *result_pool)
3211 {
3212 fs_fs_data_t *ffd = fs->fsap_data;
3213 shared_file_t *shared_file = apr_pcalloc(result_pool, sizeof(*shared_file));
3214
3215 /* this function does not apply to representation containers */
3216 SVN_ERR_ASSERT(entry->type >= SVN_FS_FS__ITEM_TYPE_FILE_REP
3217 && entry->type <= SVN_FS_FS__ITEM_TYPE_DIR_PROPS);
3218
3219 shared_file->rfile = file;
3220 shared_file->fs = fs;
3221 shared_file->revision = entry->item.revision;
3222 shared_file->pool = result_pool;
3223
3224 rs->sfile = shared_file;
3225 rs->revision = entry->item.revision;
3226 rs->item_index = entry->item.number;
3227 rs->header_size = rep_header->header_size;
3228 rs->start = entry->offset + rs->header_size;
3229 rs->current = rep_header->type == svn_fs_fs__rep_plain ? 0 : 4;
3230 rs->size = entry->size - rep_header->header_size - 7;
3231 rs->ver = -1;
3232 rs->chunk_index = 0;
3233 rs->raw_window_cache = ffd->raw_window_cache;
3234 rs->window_cache = ffd->txdelta_window_cache;
3235 rs->combined_cache = ffd->combined_window_cache;
3236
3237 return SVN_NO_ERROR;
3238 }
3239
3240 /* Implement svn_cache__partial_getter_func_t for txdelta windows.
3241 * Instead of the whole window data, return only END_OFFSET member.
3242 */
3243 static svn_error_t *
get_txdelta_window_end(void ** out,const void * data,apr_size_t data_len,void * baton,apr_pool_t * result_pool)3244 get_txdelta_window_end(void **out,
3245 const void *data,
3246 apr_size_t data_len,
3247 void *baton,
3248 apr_pool_t *result_pool)
3249 {
3250 const svn_fs_fs__txdelta_cached_window_t *window
3251 = (const svn_fs_fs__txdelta_cached_window_t *)data;
3252 *(apr_off_t*)out = window->end_offset;
3253
3254 return SVN_NO_ERROR;
3255 }
3256
3257 /* Implement svn_cache__partial_getter_func_t for raw windows.
3258 * Instead of the whole window data, return only END_OFFSET member.
3259 */
3260 static svn_error_t *
get_raw_window_end(void ** out,const void * data,apr_size_t data_len,void * baton,apr_pool_t * result_pool)3261 get_raw_window_end(void **out,
3262 const void *data,
3263 apr_size_t data_len,
3264 void *baton,
3265 apr_pool_t *result_pool)
3266 {
3267 const svn_fs_fs__raw_cached_window_t *window
3268 = (const svn_fs_fs__raw_cached_window_t *)data;
3269 *(apr_off_t*)out = window->end_offset;
3270
3271 return SVN_NO_ERROR;
3272 }
3273
3274 /* Walk through all windows in the representation addressed by RS in FS
3275 * (excluding the delta bases) and put those not already cached into the
3276 * window caches. If MAX_OFFSET is not -1, don't read windows that start
3277 * at or beyond that offset. Use POOL for temporary allocations.
3278 *
3279 * This function requires RS->RAW_WINDOW_CACHE and RS->WINDOW_CACHE to
3280 * be non-NULL.
3281 */
3282 static svn_error_t *
cache_windows(svn_fs_t * fs,rep_state_t * rs,apr_off_t max_offset,apr_pool_t * pool)3283 cache_windows(svn_fs_t *fs,
3284 rep_state_t *rs,
3285 apr_off_t max_offset,
3286 apr_pool_t *pool)
3287 {
3288 apr_pool_t *iterpool = svn_pool_create(pool);
3289
3290 SVN_ERR(auto_read_diff_version(rs, iterpool));
3291
3292 while (rs->current < rs->size)
3293 {
3294 apr_off_t end_offset;
3295 svn_boolean_t found = FALSE;
3296 window_cache_key_t key = { 0 };
3297
3298 svn_pool_clear(iterpool);
3299
3300 if (max_offset != -1 && rs->start + rs->current >= max_offset)
3301 {
3302 svn_pool_destroy(iterpool);
3303 return SVN_NO_ERROR;
3304 }
3305
3306 /* We don't need to read the data again if it is already in cache.
3307 * It might be cached as either raw or parsed window.
3308 */
3309 SVN_ERR(svn_cache__get_partial((void **) &end_offset, &found,
3310 rs->raw_window_cache,
3311 get_window_key(&key, rs),
3312 get_raw_window_end, NULL,
3313 iterpool));
3314 if (! found)
3315 SVN_ERR(svn_cache__get_partial((void **) &end_offset, &found,
3316 rs->window_cache, &key,
3317 get_txdelta_window_end, NULL,
3318 iterpool));
3319
3320 if (found)
3321 {
3322 rs->current = end_offset;
3323 }
3324 else
3325 {
3326 /* Read, decode and cache the window. */
3327 svn_fs_fs__raw_cached_window_t window;
3328 apr_off_t start_offset = rs->start + rs->current;
3329 apr_size_t window_len;
3330 char *buf;
3331
3332 /* navigate to the current window */
3333 SVN_ERR(rs_aligned_seek(rs, NULL, start_offset, iterpool));
3334 SVN_ERR(svn_txdelta__read_raw_window_len(&window_len,
3335 rs->sfile->rfile->stream,
3336 iterpool));
3337
3338 /* Read the raw window. */
3339 buf = apr_palloc(iterpool, window_len + 1);
3340 SVN_ERR(rs_aligned_seek(rs, NULL, start_offset, iterpool));
3341 SVN_ERR(svn_io_file_read_full2(rs->sfile->rfile->file, buf,
3342 window_len, NULL, NULL, iterpool));
3343 buf[window_len] = 0;
3344
3345 /* update relative offset in representation */
3346 rs->current += window_len;
3347
3348 /* Construct the cachable raw window object. */
3349 window.end_offset = rs->current;
3350 window.window.len = window_len;
3351 window.window.data = buf;
3352 window.ver = rs->ver;
3353
3354 /* cache the window now */
3355 SVN_ERR(svn_cache__set(rs->raw_window_cache, &key, &window,
3356 iterpool));
3357 }
3358
3359 if (rs->current > rs->size)
3360 return svn_error_create(SVN_ERR_FS_CORRUPT, NULL,
3361 _("Reading one svndiff window read beyond "
3362 "the end of the representation"));
3363
3364 rs->chunk_index++;
3365 }
3366
3367 svn_pool_destroy(iterpool);
3368 return SVN_NO_ERROR;
3369 }
3370
3371 /* Read all txdelta / plain windows following REP_HEADER in FS as described
3372 * by ENTRY. Read the data from the already open FILE and the wrapping
3373 * STREAM object. If MAX_OFFSET is not -1, don't read windows that start
3374 * at or beyond that offset. Use SCRATCH_POOL for temporary allocations.
3375 * If caching is not enabled, this is a no-op.
3376 */
3377 static svn_error_t *
block_read_windows(svn_fs_fs__rep_header_t * rep_header,svn_fs_t * fs,svn_fs_fs__revision_file_t * rev_file,svn_fs_fs__p2l_entry_t * entry,apr_off_t max_offset,apr_pool_t * result_pool,apr_pool_t * scratch_pool)3378 block_read_windows(svn_fs_fs__rep_header_t *rep_header,
3379 svn_fs_t *fs,
3380 svn_fs_fs__revision_file_t *rev_file,
3381 svn_fs_fs__p2l_entry_t* entry,
3382 apr_off_t max_offset,
3383 apr_pool_t *result_pool,
3384 apr_pool_t *scratch_pool)
3385 {
3386 fs_fs_data_t *ffd = fs->fsap_data;
3387 rep_state_t rs = { 0 };
3388 apr_off_t offset;
3389 window_cache_key_t key = { 0 };
3390
3391 if ( (rep_header->type != svn_fs_fs__rep_plain
3392 && (!ffd->txdelta_window_cache || !ffd->raw_window_cache))
3393 || (rep_header->type == svn_fs_fs__rep_plain
3394 && !ffd->combined_window_cache))
3395 return SVN_NO_ERROR;
3396
3397 SVN_ERR(init_rep_state(&rs, rep_header, fs, rev_file, entry,
3398 result_pool));
3399
3400 /* RS->FILE may be shared between RS instances -> make sure we point
3401 * to the right data. */
3402 offset = rs.start + rs.current;
3403 if (rep_header->type == svn_fs_fs__rep_plain)
3404 {
3405 svn_stringbuf_t *plaintext;
3406 svn_boolean_t is_cached;
3407
3408 /* already in cache? */
3409 SVN_ERR(svn_cache__has_key(&is_cached, rs.combined_cache,
3410 get_window_key(&key, &rs),
3411 scratch_pool));
3412 if (is_cached)
3413 return SVN_NO_ERROR;
3414
3415 /* for larger reps, the header may have crossed a block boundary.
3416 * make sure we still read blocks properly aligned, i.e. don't use
3417 * plain seek here. */
3418 SVN_ERR(aligned_seek(fs, rev_file->file, NULL, offset, scratch_pool));
3419
3420 plaintext = svn_stringbuf_create_ensure(rs.size, result_pool);
3421 SVN_ERR(svn_io_file_read_full2(rev_file->file, plaintext->data,
3422 rs.size, &plaintext->len, NULL,
3423 result_pool));
3424 plaintext->data[plaintext->len] = 0;
3425 rs.current += rs.size;
3426
3427 SVN_ERR(set_cached_combined_window(plaintext, &rs, scratch_pool));
3428 }
3429 else
3430 {
3431 SVN_ERR(cache_windows(fs, &rs, max_offset, scratch_pool));
3432 }
3433
3434 return SVN_NO_ERROR;
3435 }
3436
3437 /* Try to get the representation header identified by KEY from FS's cache.
3438 * If it has not been cached, read it from the current position in STREAM
3439 * and put it into the cache (if caching has been enabled for rep headers).
3440 * Return the result in *REP_HEADER. Use POOL for allocations.
3441 */
3442 static svn_error_t *
read_rep_header(svn_fs_fs__rep_header_t ** rep_header,svn_fs_t * fs,svn_stream_t * stream,pair_cache_key_t * key,apr_pool_t * result_pool,apr_pool_t * scratch_pool)3443 read_rep_header(svn_fs_fs__rep_header_t **rep_header,
3444 svn_fs_t *fs,
3445 svn_stream_t *stream,
3446 pair_cache_key_t *key,
3447 apr_pool_t *result_pool,
3448 apr_pool_t *scratch_pool)
3449 {
3450 fs_fs_data_t *ffd = fs->fsap_data;
3451 svn_boolean_t is_cached = FALSE;
3452
3453 if (ffd->rep_header_cache)
3454 {
3455 SVN_ERR(svn_cache__get((void**)rep_header, &is_cached,
3456 ffd->rep_header_cache, key,
3457 result_pool));
3458 if (is_cached)
3459 return SVN_NO_ERROR;
3460 }
3461
3462 SVN_ERR(svn_fs_fs__read_rep_header(rep_header, stream, result_pool,
3463 scratch_pool));
3464
3465 if (ffd->rep_header_cache)
3466 SVN_ERR(svn_cache__set(ffd->rep_header_cache, key, *rep_header,
3467 scratch_pool));
3468
3469 return SVN_NO_ERROR;
3470 }
3471
3472 /* Fetch the representation data (header, txdelta / plain windows)
3473 * addressed by ENTRY->ITEM in FS and cache it if caches are enabled.
3474 * Read the data from REV_FILE. If MAX_OFFSET is not -1, don't read
3475 * windows that start at or beyond that offset.
3476 * Use SCRATCH_POOL for temporary allocations.
3477 */
3478 static svn_error_t *
block_read_contents(svn_fs_t * fs,svn_fs_fs__revision_file_t * rev_file,svn_fs_fs__p2l_entry_t * entry,apr_off_t max_offset,apr_pool_t * scratch_pool)3479 block_read_contents(svn_fs_t *fs,
3480 svn_fs_fs__revision_file_t *rev_file,
3481 svn_fs_fs__p2l_entry_t* entry,
3482 apr_off_t max_offset,
3483 apr_pool_t *scratch_pool)
3484 {
3485 pair_cache_key_t header_key = { 0 };
3486 svn_fs_fs__rep_header_t *rep_header;
3487
3488 header_key.revision = (apr_int32_t)entry->item.revision;
3489 header_key.second = entry->item.number;
3490
3491 SVN_ERR(read_rep_header(&rep_header, fs, rev_file->stream, &header_key,
3492 scratch_pool, scratch_pool));
3493 SVN_ERR(block_read_windows(rep_header, fs, rev_file, entry, max_offset,
3494 scratch_pool, scratch_pool));
3495
3496 return SVN_NO_ERROR;
3497 }
3498
3499 /* For the given REV_FILE in FS, in *STREAM return a stream covering the
3500 * item specified by ENTRY. Also, verify the item's content by low-level
3501 * checksum. Allocate the result in POOL.
3502 */
3503 static svn_error_t *
read_item(svn_stream_t ** stream,svn_fs_t * fs,svn_fs_fs__revision_file_t * rev_file,svn_fs_fs__p2l_entry_t * entry,apr_pool_t * pool)3504 read_item(svn_stream_t **stream,
3505 svn_fs_t *fs,
3506 svn_fs_fs__revision_file_t *rev_file,
3507 svn_fs_fs__p2l_entry_t* entry,
3508 apr_pool_t *pool)
3509 {
3510 apr_uint32_t digest;
3511 svn_checksum_t *expected, *actual;
3512 apr_uint32_t plain_digest;
3513
3514 /* Read item into string buffer. */
3515 svn_stringbuf_t *text = svn_stringbuf_create_ensure(entry->size, pool);
3516 text->len = entry->size;
3517 text->data[text->len] = 0;
3518 SVN_ERR(svn_io_file_read_full2(rev_file->file, text->data, text->len,
3519 NULL, NULL, pool));
3520
3521 /* Return (construct, calculate) stream and checksum. */
3522 *stream = svn_stream_from_stringbuf(text, pool);
3523 digest = svn__fnv1a_32x4(text->data, text->len);
3524
3525 /* Checksums will match most of the time. */
3526 if (entry->fnv1_checksum == digest)
3527 return SVN_NO_ERROR;
3528
3529 /* Construct proper checksum objects from their digests to allow for
3530 * nice error messages. */
3531 plain_digest = htonl(entry->fnv1_checksum);
3532 expected = svn_checksum__from_digest_fnv1a_32x4(
3533 (const unsigned char *)&plain_digest, pool);
3534 plain_digest = htonl(digest);
3535 actual = svn_checksum__from_digest_fnv1a_32x4(
3536 (const unsigned char *)&plain_digest, pool);
3537
3538 /* Construct the full error message with all the info we have. */
3539 return svn_checksum_mismatch_err(expected, actual, pool,
3540 _("Low-level checksum mismatch while reading\n"
3541 "%s bytes of meta data at offset %s "
3542 "for item %s in revision %ld"),
3543 apr_off_t_toa(pool, entry->size),
3544 apr_off_t_toa(pool, entry->offset),
3545 apr_psprintf(pool, "%" APR_UINT64_T_FMT, entry->item.number),
3546 entry->item.revision);
3547 }
3548
3549 /* If not already cached, read the changed paths list addressed by ENTRY in
3550 * FS and cache it if it has no more than SVN_FS_FS__CHANGES_BLOCK_SIZE
3551 * entries and caching is enabled. Read the data from REV_FILE.
3552 * Allocate temporaries in SCRATCH_POOL.
3553 */
3554 static svn_error_t *
block_read_changes(svn_fs_t * fs,svn_fs_fs__revision_file_t * rev_file,svn_fs_fs__p2l_entry_t * entry,apr_pool_t * scratch_pool)3555 block_read_changes(svn_fs_t *fs,
3556 svn_fs_fs__revision_file_t *rev_file,
3557 svn_fs_fs__p2l_entry_t *entry,
3558 apr_pool_t *scratch_pool)
3559 {
3560 fs_fs_data_t *ffd = fs->fsap_data;
3561 svn_stream_t *stream;
3562 apr_array_header_t *changes;
3563
3564 pair_cache_key_t key;
3565 key.revision = entry->item.revision;
3566 key.second = 0;
3567
3568 if (!ffd->changes_cache)
3569 return SVN_NO_ERROR;
3570
3571 /* already in cache? */
3572 if (ffd->changes_cache)
3573 {
3574 svn_boolean_t is_cached;
3575 SVN_ERR(svn_cache__has_key(&is_cached, ffd->changes_cache, &key,
3576 scratch_pool));
3577 if (is_cached)
3578 return SVN_NO_ERROR;
3579 }
3580
3581 SVN_ERR(read_item(&stream, fs, rev_file, entry, scratch_pool));
3582
3583 /* Read changes from revision file. But read just past the first block to
3584 enable us to determine whether the first block already hit the EOL.
3585
3586 Note: A 100 entries block is already > 10kB on disk. With a 4kB default
3587 disk block size, this function won't even be called for larger
3588 changed paths lists. */
3589 SVN_ERR(svn_fs_fs__read_changes(&changes, stream,
3590 SVN_FS_FS__CHANGES_BLOCK_SIZE + 1,
3591 scratch_pool, scratch_pool));
3592
3593 /* We can only cache small lists that don't need to be split up.
3594 For longer lists, we miss the file offset info for the respective */
3595 if (changes->nelts <= SVN_FS_FS__CHANGES_BLOCK_SIZE)
3596 {
3597 svn_fs_fs__changes_list_t changes_list;
3598
3599 /* Construct the info object for the entries block we just read. */
3600 changes_list.end_offset = entry->size;
3601 changes_list.start_offset = 0;
3602 changes_list.count = changes->nelts;
3603 changes_list.changes = (change_t **)changes->elts;
3604 changes_list.eol = TRUE;
3605
3606 SVN_ERR(svn_cache__set(ffd->changes_cache, &key, &changes_list,
3607 scratch_pool));
3608 }
3609
3610 return SVN_NO_ERROR;
3611 }
3612
3613 /* If not already cached or if MUST_READ is set, read the node revision
3614 * addressed by ENTRY in FS and retúrn it in *NODEREV_P. Cache the
3615 * result if caching is enabled. Read the data from REV_FILE. Allocate
3616 * *NODEREV_P in RESUSLT_POOL and allocate temporaries in SCRATCH_POOL.
3617 */
3618 static svn_error_t *
block_read_noderev(node_revision_t ** noderev_p,svn_fs_t * fs,svn_fs_fs__revision_file_t * rev_file,svn_fs_fs__p2l_entry_t * entry,svn_boolean_t must_read,apr_pool_t * result_pool,apr_pool_t * scratch_pool)3619 block_read_noderev(node_revision_t **noderev_p,
3620 svn_fs_t *fs,
3621 svn_fs_fs__revision_file_t *rev_file,
3622 svn_fs_fs__p2l_entry_t *entry,
3623 svn_boolean_t must_read,
3624 apr_pool_t *result_pool,
3625 apr_pool_t *scratch_pool)
3626 {
3627 fs_fs_data_t *ffd = fs->fsap_data;
3628 svn_stream_t *stream;
3629
3630 pair_cache_key_t key = { 0 };
3631 key.revision = entry->item.revision;
3632 key.second = entry->item.number;
3633
3634 if (!must_read && !ffd->node_revision_cache)
3635 return SVN_NO_ERROR;
3636
3637 /* already in cache? */
3638 if (!must_read && ffd->node_revision_cache)
3639 {
3640 svn_boolean_t is_cached;
3641 SVN_ERR(svn_cache__has_key(&is_cached, ffd->node_revision_cache,
3642 &key, scratch_pool));
3643 if (is_cached)
3644 return SVN_NO_ERROR;
3645 }
3646
3647 SVN_ERR(read_item(&stream, fs, rev_file, entry, scratch_pool));
3648
3649 /* read node rev from revision file */
3650 SVN_ERR(svn_fs_fs__read_noderev(noderev_p, stream,
3651 result_pool, scratch_pool));
3652 SVN_ERR(fixup_node_revision(fs, *noderev_p, scratch_pool));
3653
3654 if (ffd->node_revision_cache)
3655 SVN_ERR(svn_cache__set(ffd->node_revision_cache, &key, *noderev_p,
3656 scratch_pool));
3657
3658 return SVN_NO_ERROR;
3659 }
3660
3661 /* Read the whole (e.g. 64kB) block containing ITEM_INDEX of REVISION in FS
3662 * and put all data into cache. If necessary and depending on heuristics,
3663 * neighboring blocks may also get read. The data is being read from
3664 * already open REVISION_FILE, which must be the correct rev / pack file
3665 * w.r.t. REVISION.
3666 *
3667 * For noderevs and changed path lists, the item fetched can be allocated
3668 * RESULT_POOL and returned in *RESULT. Otherwise, RESULT must be NULL.
3669 */
3670 static svn_error_t *
block_read(void ** result,svn_fs_t * fs,svn_revnum_t revision,apr_uint64_t item_index,svn_fs_fs__revision_file_t * revision_file,apr_pool_t * result_pool,apr_pool_t * scratch_pool)3671 block_read(void **result,
3672 svn_fs_t *fs,
3673 svn_revnum_t revision,
3674 apr_uint64_t item_index,
3675 svn_fs_fs__revision_file_t *revision_file,
3676 apr_pool_t *result_pool,
3677 apr_pool_t *scratch_pool)
3678 {
3679 fs_fs_data_t *ffd = fs->fsap_data;
3680 apr_off_t offset, wanted_offset = 0;
3681 apr_off_t block_start = 0;
3682 apr_array_header_t *entries;
3683 int run_count = 0;
3684 int i;
3685 apr_pool_t *iterpool;
3686
3687 /* Block read is an optional feature. If the caller does not want anything
3688 * specific we may not have to read anything. */
3689 if (!result)
3690 return SVN_NO_ERROR;
3691
3692 iterpool = svn_pool_create(scratch_pool);
3693
3694 /* don't try this on transaction protorev files */
3695 SVN_ERR_ASSERT(SVN_IS_VALID_REVNUM(revision));
3696
3697 /* index lookup: find the OFFSET of the item we *must* read plus (in the
3698 * "do-while" block) the list of items in the same block. */
3699 SVN_ERR(svn_fs_fs__item_offset(&wanted_offset, fs, revision_file,
3700 revision, NULL, item_index, iterpool));
3701
3702 offset = wanted_offset;
3703
3704 /* Heuristics:
3705 *
3706 * Read this block. If the last item crosses the block boundary, read
3707 * the next block but stop there. Because cross-boundary items cause
3708 * blocks to be read twice, this heuristics will limit this effect to
3709 * approx. 50% of blocks, probably less, while providing a sensible
3710 * amount of read-ahead.
3711 */
3712 do
3713 {
3714 /* fetch list of items in the block surrounding OFFSET */
3715 block_start = offset - (offset % ffd->block_size);
3716 SVN_ERR(svn_fs_fs__p2l_index_lookup(&entries, fs, revision_file,
3717 revision, block_start,
3718 ffd->block_size, scratch_pool,
3719 scratch_pool));
3720
3721 SVN_ERR(aligned_seek(fs, revision_file->file, &block_start, offset,
3722 iterpool));
3723
3724 /* read all items from the block */
3725 for (i = 0; i < entries->nelts; ++i)
3726 {
3727 svn_boolean_t is_result, is_wanted;
3728 apr_pool_t *pool;
3729 svn_fs_fs__p2l_entry_t* entry;
3730
3731 svn_pool_clear(iterpool);
3732
3733 /* skip empty sections */
3734 entry = &APR_ARRAY_IDX(entries, i, svn_fs_fs__p2l_entry_t);
3735 if (entry->type == SVN_FS_FS__ITEM_TYPE_UNUSED)
3736 continue;
3737
3738 /* the item / container we were looking for? */
3739 is_wanted = entry->offset == wanted_offset
3740 && entry->item.revision == revision
3741 && entry->item.number == item_index;
3742 is_result = result && is_wanted;
3743
3744 /* select the pool that we want the item to be allocated in */
3745 pool = is_result ? result_pool : iterpool;
3746
3747 /* handle all items that start within this block and are relatively
3748 * small (i.e. < block size). Always read the item we need to return.
3749 */
3750 if (is_result || ( entry->offset >= block_start
3751 && entry->size < ffd->block_size))
3752 {
3753 void *item = NULL;
3754 SVN_ERR(svn_io_file_seek(revision_file->file, APR_SET,
3755 &entry->offset, iterpool));
3756 switch (entry->type)
3757 {
3758 case SVN_FS_FS__ITEM_TYPE_FILE_REP:
3759 case SVN_FS_FS__ITEM_TYPE_DIR_REP:
3760 case SVN_FS_FS__ITEM_TYPE_FILE_PROPS:
3761 case SVN_FS_FS__ITEM_TYPE_DIR_PROPS:
3762 SVN_ERR(block_read_contents(fs, revision_file, entry,
3763 is_wanted
3764 ? -1
3765 : block_start + ffd->block_size,
3766 iterpool));
3767 break;
3768
3769 case SVN_FS_FS__ITEM_TYPE_NODEREV:
3770 if (ffd->node_revision_cache || is_result)
3771 SVN_ERR(block_read_noderev((node_revision_t **)&item,
3772 fs, revision_file,
3773 entry, is_result, pool,
3774 iterpool));
3775 break;
3776
3777 case SVN_FS_FS__ITEM_TYPE_CHANGES:
3778 SVN_ERR(block_read_changes(fs, revision_file,
3779 entry, iterpool));
3780 break;
3781
3782 default:
3783 break;
3784 }
3785
3786 if (is_result)
3787 *result = item;
3788
3789 /* if we crossed a block boundary, read the remainder of
3790 * the last block as well */
3791 offset = entry->offset + entry->size;
3792 if (offset - block_start > ffd->block_size)
3793 ++run_count;
3794 }
3795 }
3796
3797 }
3798 while(run_count++ == 1); /* can only be true once and only if a block
3799 * boundary got crossed */
3800
3801 /* if the caller requested a result, we must have provided one by now */
3802 assert(!result || *result);
3803 svn_pool_destroy(iterpool);
3804
3805 return SVN_NO_ERROR;
3806 }
3807