1 /* dag_cache.c : DAG walker and node cache.
2 *
3 * ====================================================================
4 * Licensed to the Apache Software Foundation (ASF) under one
5 * or more contributor license agreements. See the NOTICE file
6 * distributed with this work for additional information
7 * regarding copyright ownership. The ASF licenses this file
8 * to you under the Apache License, Version 2.0 (the
9 * "License"); you may not use this file except in compliance
10 * with the License. You may obtain a copy of the License at
11 *
12 * http://www.apache.org/licenses/LICENSE-2.0
13 *
14 * Unless required by applicable law or agreed to in writing,
15 * software distributed under the License is distributed on an
16 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17 * KIND, either express or implied. See the License for the
18 * specific language governing permissions and limitations
19 * under the License.
20 * ====================================================================
21 */
22
23
24 /* The job of this layer is to take a filesystem with lots of node
25 sharing going on --- the real DAG filesystem as it appears in the
26 database --- and make it look and act like an ordinary tree
27 filesystem, with no sharing.
28
29 We do just-in-time cloning: you can walk from some unfinished
30 transaction's root down into directories and files shared with
31 committed revisions; as soon as you try to change something, the
32 appropriate nodes get cloned (and parent directory entries updated)
33 invisibly, behind your back. Any other references you have to
34 nodes that have been cloned by other changes, even made by other
35 processes, are automatically updated to point to the right clones. */
36
37
38 #include <stdlib.h>
39 #include <string.h>
40 #include <assert.h>
41 #include <apr_pools.h>
42 #include <apr_hash.h>
43
44 #include "svn_hash.h"
45 #include "svn_private_config.h"
46 #include "svn_pools.h"
47 #include "svn_error.h"
48 #include "svn_path.h"
49 #include "svn_mergeinfo.h"
50 #include "svn_fs.h"
51 #include "svn_props.h"
52 #include "svn_sorts.h"
53
54 #include "fs.h"
55 #include "dag.h"
56 #include "dag_cache.h"
57 #include "lock.h"
58 #include "tree.h"
59 #include "fs_x.h"
60 #include "fs_id.h"
61 #include "temp_serializer.h"
62 #include "cached_data.h"
63 #include "transaction.h"
64 #include "pack.h"
65 #include "util.h"
66
67 #include "private/svn_mergeinfo_private.h"
68 #include "private/svn_subr_private.h"
69 #include "private/svn_fs_util.h"
70 #include "private/svn_fspath.h"
71 #include "../libsvn_fs/fs-loader.h"
72
73
74 /*** Path handling ***/
75
76 /* DAG caching uses "normalized" paths - which are a relaxed form of
77 canonical relpaths. They omit the leading '/' of the abspath and trim
78 any trailing '/'. Any sequences of '//' will be kept as the path walker
79 simply skips over them.
80
81 Non-canonical sections of the path will therefore only impact efficiency
82 (extra walker iterations and possibly duplicated entries in the cache)
83 but not correctness.
84
85 Another optimization is that we don't NUL-terminate the path but strictly
86 use its length info. That way, it can be traversed easily without
87 chopping it up and patching it together again. ultimately, however,
88 the path string is NUL-terminated because we wrapped a NUL-terminated
89 C string.
90 */
91
92 /* Set *RESULT to a normalized version of PATH without actually copying any
93 string contents.
94
95 For convenience, return the RESULT pointer as the function value too. */
96 static svn_string_t *
normalize_path(svn_string_t * result,const char * path)97 normalize_path(svn_string_t *result,
98 const char *path)
99 {
100 apr_size_t len;
101
102 if (path[0] == '/')
103 ++path;
104
105 len = strlen(path);
106 while (len && path[len-1] == '/')
107 --len;
108
109 result->data = path;
110 result->len = len;
111
112 return result;
113 }
114
115 /* Extend PATH, i.e. increase its LEN, to cover the next segment. Skip
116 sequences of '/'. Store the segment in ENTRY and return a pointer to
117 it C string representation. If no segment has been found (end of path),
118 return NULL. */
119 static const char *
next_entry_name(svn_string_t * path,svn_stringbuf_t * entry)120 next_entry_name(svn_string_t *path,
121 svn_stringbuf_t *entry)
122 {
123 const char *segment_start;
124 const char *segment_end;
125
126 /* Moving to the next segment, skip separators
127 (normalized does not imply canonical). */
128 segment_start = path->data + path->len;
129 while (*segment_start == '/')
130 ++segment_start;
131
132 /* End of path? */
133 if (*segment_start == '\0')
134 return NULL;
135
136 /* Find the end of this segment. Note that strchr would not give us the
137 length of the last segment. */
138 segment_end = segment_start;
139 while (*segment_end != '/' && *segment_end != '\0')
140 ++segment_end;
141
142 /* Copy the segment into the result buffer. */
143 svn_stringbuf_setempty(entry);
144 svn_stringbuf_appendbytes(entry, segment_start,
145 segment_end - segment_start);
146
147 /* Extend the "visible" part of the path to the end of that segment. */
148 path->len = segment_end - path->data;
149
150 /* Indicate that we found something. */
151 return entry->data;
152 }
153
154 /* Split the normalized PATH into its last segment the corresponding parent
155 directory. Store them in ENTRY and DIRECTORY, respectively.
156
157 If PATH is empty, return FALSE and produce no output.
158 Otherwise, return TRUE.
159 */
160 static svn_boolean_t
extract_last_segment(const svn_string_t * path,svn_string_t * directory,svn_stringbuf_t * entry)161 extract_last_segment(const svn_string_t *path,
162 svn_string_t *directory,
163 svn_stringbuf_t *entry)
164 {
165 const char *segment_start;
166 const char *parent_end;
167
168 /* Edge case. We can't navigate in empty paths. */
169 if (path->len == 0)
170 return FALSE;
171
172 /* Find the start of the last segment. Note that normalized paths never
173 start nor end with a '/'. */
174 segment_start = path->data + path->len - 1;
175 while (*segment_start != '/' && segment_start != path->data)
176 --segment_start;
177
178 /* At root level already, i.e. no parent? */
179 if (segment_start == path->data)
180 {
181 /* Construct result. */
182 directory->data = "";
183 directory->len = 0;
184
185 svn_stringbuf_setempty(entry);
186 svn_stringbuf_appendbytes(entry, path->data, path->len);
187
188 return TRUE;
189 }
190
191 /* Find the end of the parent directory. */
192 parent_end = segment_start;
193 while (parent_end[-1] == '/')
194 --parent_end;
195
196 /* Construct result. */
197 directory->data = path->data;
198 directory->len = parent_end - path->data;
199
200 ++segment_start; /* previously pointed to the last '/'. */
201 svn_stringbuf_setempty(entry);
202 svn_stringbuf_appendbytes(entry, segment_start,
203 path->len - (segment_start - path->data));
204
205 return TRUE;
206 }
207
208
209 /*** Node Caching ***/
210
211 /* 1st level cache */
212
213 /* An entry in the first-level cache. REVISION and PATH form the key that
214 will ultimately be matched.
215 */
216 typedef struct cache_entry_t
217 {
218 /* hash value derived from PATH, REVISION.
219 Used to short-circuit failed lookups. */
220 apr_uint32_t hash_value;
221
222 /* change set to which the NODE belongs */
223 svn_fs_x__change_set_t change_set;
224
225 /* path of the NODE */
226 char *path;
227
228 /* cached value of strlen(PATH). */
229 apr_size_t path_len;
230
231 /* the node allocated in the cache's pool. NULL for empty entries. */
232 dag_node_t *node;
233 } cache_entry_t;
234
235 /* Number of entries in the cache. Keep this low to keep pressure on the
236 CPU caches low as well. A binary value is most efficient. If we walk
237 a directory tree, we want enough entries to store nodes for all files
238 without overwriting the nodes for the parent folder. That way, there
239 will be no unnecessary misses (except for a few random ones caused by
240 hash collision).
241
242 The actual number of instances may be higher but entries that got
243 overwritten are no longer visible.
244 */
245 enum { BUCKET_COUNT = 256 };
246
247 /* The actual cache structure. All nodes will be allocated in POOL.
248 When the number of INSERTIONS (i.e. objects created form that pool)
249 exceeds a certain threshold, the pool will be cleared and the cache
250 with it.
251 */
252 struct svn_fs_x__dag_cache_t
253 {
254 /* fixed number of (possibly empty) cache entries */
255 cache_entry_t buckets[BUCKET_COUNT];
256
257 /* pool used for all node allocation */
258 apr_pool_t *pool;
259
260 /* number of entries created from POOL since the last cleanup */
261 apr_size_t insertions;
262
263 /* Property lookups etc. have a very high locality (75% re-hit).
264 Thus, remember the last hit location for optimistic lookup. */
265 apr_size_t last_hit;
266
267 /* Position of the last bucket hit that actually had a DAG node in it.
268 LAST_HIT may refer to a bucket that matches path@rev but has not
269 its NODE element set, yet.
270 This value is a mere hint for optimistic lookup and any value is
271 valid (as long as it is < BUCKET_COUNT). */
272 apr_size_t last_non_empty;
273 };
274
275 svn_fs_x__dag_cache_t*
svn_fs_x__create_dag_cache(apr_pool_t * result_pool)276 svn_fs_x__create_dag_cache(apr_pool_t *result_pool)
277 {
278 svn_fs_x__dag_cache_t *result = apr_pcalloc(result_pool, sizeof(*result));
279 result->pool = svn_pool_create(result_pool);
280
281 return result;
282 }
283
284 /* Clears the CACHE at regular intervals (destroying all cached nodes).
285 * Return TRUE if the cache got cleared and previously obtained references
286 * to cache contents have become invalid.
287 */
288 static svn_boolean_t
auto_clear_dag_cache(svn_fs_x__dag_cache_t * cache)289 auto_clear_dag_cache(svn_fs_x__dag_cache_t* cache)
290 {
291 if (cache->insertions <= BUCKET_COUNT)
292 return FALSE;
293
294 svn_pool_clear(cache->pool);
295
296 memset(cache->buckets, 0, sizeof(cache->buckets));
297 cache->insertions = 0;
298
299 return TRUE;
300 }
301
302 /* For the given CHANGE_SET and PATH, return the respective entry in CACHE.
303 If the entry is empty, its NODE member will be NULL and the caller
304 may then set it to the corresponding DAG node allocated in CACHE->POOL.
305 */
306 static cache_entry_t *
cache_lookup(svn_fs_x__dag_cache_t * cache,svn_fs_x__change_set_t change_set,const svn_string_t * path)307 cache_lookup(svn_fs_x__dag_cache_t *cache,
308 svn_fs_x__change_set_t change_set,
309 const svn_string_t *path)
310 {
311 apr_size_t i, bucket_index;
312 apr_size_t path_len = path->len;
313 apr_uint32_t hash_value = (apr_uint32_t)(apr_uint64_t)change_set;
314
315 #if SVN_UNALIGNED_ACCESS_IS_OK
316 /* "randomizing" / distributing factor used in our hash function */
317 const apr_uint32_t factor = 0xd1f3da69;
318 #endif
319
320 /* optimistic lookup: hit the same bucket again? */
321 cache_entry_t *result = &cache->buckets[cache->last_hit];
322 if ( (result->change_set == change_set)
323 && (result->path_len == path_len)
324 && !memcmp(result->path, path->data, path_len))
325 {
326 /* Remember the position of the last node we found in this cache. */
327 if (result->node)
328 cache->last_non_empty = cache->last_hit;
329
330 return result;
331 }
332
333 /* need to do a full lookup. Calculate the hash value
334 (HASH_VALUE has been initialized to REVISION). */
335 i = 0;
336 #if SVN_UNALIGNED_ACCESS_IS_OK
337 /* We relax the dependency chain between iterations by processing
338 two chunks from the input per hash_value self-multiplication.
339 The HASH_VALUE update latency is now 1 MUL latency + 1 ADD latency
340 per 2 chunks instead of 1 chunk.
341 */
342 for (; i + 8 <= path_len; i += 8)
343 hash_value = hash_value * factor * factor
344 + ( *(const apr_uint32_t*)(path->data + i) * factor
345 + *(const apr_uint32_t*)(path->data + i + 4));
346 #endif
347
348 for (; i < path_len; ++i)
349 /* Help GCC to minimize the HASH_VALUE update latency by splitting the
350 MUL 33 of the naive implementation: h = h * 33 + path[i]. This
351 shortens the dependency chain from 1 shift + 2 ADDs to 1 shift + 1 ADD.
352 */
353 hash_value = hash_value * 32 + (hash_value + (apr_byte_t)path->data[i]);
354
355 bucket_index = hash_value + (hash_value >> 16);
356 bucket_index = (bucket_index + (bucket_index >> 8)) % BUCKET_COUNT;
357
358 /* access the corresponding bucket and remember its location */
359 result = &cache->buckets[bucket_index];
360 cache->last_hit = bucket_index;
361
362 /* if it is *NOT* a match, clear the bucket, expect the caller to fill
363 in the node and count it as an insertion */
364 if ( (result->hash_value != hash_value)
365 || (result->change_set != change_set)
366 || (result->path_len != path_len)
367 || memcmp(result->path, path->data, path_len))
368 {
369 result->hash_value = hash_value;
370 result->change_set = change_set;
371
372 if (result->path_len < path_len || result->path_len == 0)
373 result->path = apr_palloc(cache->pool, path_len + 1);
374 result->path_len = path_len;
375
376 memcpy(result->path, path->data, path_len);
377 result->path[path_len] = 0;
378
379 result->node = NULL;
380
381 cache->insertions++;
382 }
383 else if (result->node)
384 {
385 /* This bucket is valid & has a suitable DAG node in it.
386 Remember its location. */
387 cache->last_non_empty = bucket_index;
388 }
389
390 return result;
391 }
392
393 /* Optimistic lookup using the last seen non-empty location in CACHE.
394 Return the node of that entry, if it is still in use and matches PATH.
395 Return NULL otherwise. */
396 static dag_node_t *
cache_lookup_last_path(svn_fs_x__dag_cache_t * cache,const svn_string_t * path)397 cache_lookup_last_path(svn_fs_x__dag_cache_t *cache,
398 const svn_string_t *path)
399 {
400 cache_entry_t *result = &cache->buckets[cache->last_non_empty];
401
402 if ( result->node
403 && (result->path_len == path->len)
404 && !memcmp(result->path, path->data, path->len))
405 {
406 return result->node;
407 }
408
409 return NULL;
410 }
411
412 /* Return the cached DAG node for PATH from ROOT's node cache, or NULL if
413 the node isn't cached.
414 */
415 static dag_node_t *
dag_node_cache_get(svn_fs_root_t * root,const svn_string_t * path)416 dag_node_cache_get(svn_fs_root_t *root,
417 const svn_string_t *path)
418 {
419 svn_fs_x__data_t *ffd = root->fs->fsap_data;
420 svn_fs_x__change_set_t change_set = svn_fs_x__root_change_set(root);
421
422 auto_clear_dag_cache(ffd->dag_node_cache);
423 return cache_lookup(ffd->dag_node_cache, change_set, path)->node;
424 }
425
426
427 void
svn_fs_x__update_dag_cache(dag_node_t * node)428 svn_fs_x__update_dag_cache(dag_node_t *node)
429 {
430 svn_fs_x__data_t *ffd = svn_fs_x__dag_get_fs(node)->fsap_data;
431 const char *path = svn_fs_x__dag_get_created_path(node);
432 svn_fs_x__dag_cache_t *cache = ffd->dag_node_cache;
433
434 cache_entry_t *bucket;
435 svn_string_t normalized;
436
437 auto_clear_dag_cache(cache);
438 bucket = cache_lookup(cache, svn_fs_x__dag_get_id(node)->change_set,
439 normalize_path(&normalized, path));
440 bucket->node = svn_fs_x__dag_dup(node, cache->pool);
441 }
442
443 void
svn_fs_x__invalidate_dag_cache(svn_fs_root_t * root,const char * path)444 svn_fs_x__invalidate_dag_cache(svn_fs_root_t *root,
445 const char *path)
446 {
447 svn_fs_x__data_t *ffd = root->fs->fsap_data;
448 svn_fs_x__dag_cache_t *cache = ffd->dag_node_cache;
449 svn_fs_x__change_set_t change_set = svn_fs_x__root_change_set(root);
450
451 apr_size_t i;
452 for (i = 0; i < BUCKET_COUNT; ++i)
453 {
454 cache_entry_t *bucket = &cache->buckets[i];
455 if (bucket->change_set == change_set && bucket->node)
456 {
457 /* The call to svn_relpath_skip_ancestor() will require both
458 parameters to be canonical. Since we allow for non-canonical
459 paths in our cache (unlikely to actually happen), we drop all
460 such entries.
461 */
462 if (!svn_relpath_is_canonical(bucket->path)
463 || svn_relpath_skip_ancestor(path + 1, bucket->path))
464 bucket->node = NULL;
465 }
466 }
467 }
468
469
470 /* Traversing directory paths. */
471
472 /* Try a short-cut for the open_path() function using the last node accessed.
473 * If that ROOT is that nodes's "created rev" and PATH matches its "created-
474 * path", return the node in *NODE_P. Set it to NULL otherwise.
475 *
476 * This function is used to support ra_serf-style access patterns where we
477 * are first asked for path@rev and then for path@c_rev of the same node.
478 * The shortcut works by ignoring the "rev" part of the cache key and then
479 * checking whether we got lucky. Lookup and verification are both quick
480 * plus there are many early outs for common types of mismatch.
481 */
482 static svn_error_t *
try_match_last_node(dag_node_t ** node_p,svn_fs_root_t * root,const svn_string_t * path)483 try_match_last_node(dag_node_t **node_p,
484 svn_fs_root_t *root,
485 const svn_string_t *path)
486 {
487 svn_fs_x__data_t *ffd = root->fs->fsap_data;
488
489 /* Optimistic lookup: if the last node returned from the cache applied to
490 the same PATH, return it in NODE. */
491 dag_node_t *node = cache_lookup_last_path(ffd->dag_node_cache, path);
492
493 /* Did we get a bucket with a committed node? */
494 if (node && !svn_fs_x__dag_check_mutable(node))
495 {
496 /* Get the path&rev pair at which this node was created.
497 This is repository location for which this node is _known_ to be
498 the right lookup result irrespective of how we found it. */
499 const char *created_path
500 = svn_fs_x__dag_get_created_path(node) + 1;
501 svn_revnum_t revision = svn_fs_x__dag_get_revision(node);
502
503 /* Is it an exact match? */
504 if ( revision == root->rev
505 && strlen(created_path) == path->len
506 && memcmp(created_path, path->data, path->len) == 0)
507 {
508 svn_fs_x__dag_cache_t *cache = ffd->dag_node_cache;
509
510 /* Insert NODE into the cache at a second location.
511 In a fraction of all calls, the auto-cleanup may
512 cause NODE to become invalid. */
513 if (!auto_clear_dag_cache(cache))
514 {
515 /* Cache it under its full path@rev access path. */
516 svn_fs_x__change_set_t change_set
517 = svn_fs_x__change_set_by_rev(revision);
518 cache_entry_t *bucket = cache_lookup(cache, change_set, path);
519 bucket->node = node;
520
521 *node_p = node;
522 return SVN_NO_ERROR;
523 }
524 }
525 }
526
527 *node_p = NULL;
528 return SVN_NO_ERROR;
529 }
530
531
532 /* From directory node PARENT, under ROOT, go one step down to the entry
533 NAME and return a reference to it in *CHILD_P.
534
535 PATH is the combination of PARENT's path and NAME and is provided by
536 the caller such that we don't have to construct it here ourselves.
537 Similarly, CHANGE_SET is redundant with ROOT.
538
539 If the directory entry cannot be found, instead of returning an error,
540 *CHILD_P will be set to NULL if ALLOW_EMPTY is TRUE.
541
542 NOTE: *NODE_P will live within the DAG cache and we merely return a
543 reference to it. Hence, it will invalid upon the next cache insertion.
544 Callers must create a copy if they want a non-temporary object.
545 */
546 static svn_error_t *
dag_step(dag_node_t ** child_p,svn_fs_root_t * root,dag_node_t * parent,const char * name,const svn_string_t * path,svn_fs_x__change_set_t change_set,svn_boolean_t allow_empty,apr_pool_t * scratch_pool)547 dag_step(dag_node_t **child_p,
548 svn_fs_root_t *root,
549 dag_node_t *parent,
550 const char *name,
551 const svn_string_t *path,
552 svn_fs_x__change_set_t change_set,
553 svn_boolean_t allow_empty,
554 apr_pool_t *scratch_pool)
555 {
556 svn_fs_t *fs = svn_fs_x__dag_get_fs(parent);
557 svn_fs_x__data_t *ffd = fs->fsap_data;
558 cache_entry_t *bucket;
559 svn_fs_x__id_t node_id;
560
561 /* Locate the corresponding cache entry. We may need PARENT to remain
562 valid for later use, so don't call auto_clear_dag_cache() here. */
563 bucket = cache_lookup(ffd->dag_node_cache, change_set, path);
564 if (bucket->node)
565 {
566 /* Already cached. Return a reference to the cached object. */
567 *child_p = bucket->node;
568 return SVN_NO_ERROR;
569 }
570
571 /* Get the ID of the node we are looking for. The function call checks
572 for various error conditions such like PARENT not being a directory. */
573 SVN_ERR(svn_fs_x__dir_entry_id(&node_id, parent, name, scratch_pool));
574 if (! svn_fs_x__id_used(&node_id))
575 {
576 const char *dir;
577
578 /* No such directory entry. Is a simple NULL result o.k.? */
579 if (allow_empty)
580 {
581 *child_p = NULL;
582 return SVN_NO_ERROR;
583 }
584
585 /* Produce an appropriate error message. */
586 dir = apr_pstrmemdup(scratch_pool, path->data, path->len);
587 dir = svn_fs__canonicalize_abspath(dir, scratch_pool);
588
589 return SVN_FS__NOT_FOUND(root, dir);
590 }
591
592 /* We are about to add a new entry to the cache. Periodically clear it.
593 If we had to clear it just now (< 1% chance), re-add the entry for our
594 item. */
595 if (auto_clear_dag_cache(ffd->dag_node_cache))
596 bucket = cache_lookup(ffd->dag_node_cache, change_set, path);
597
598 /* Construct the DAG node object for NODE_ID. Let it live in the cache. */
599 SVN_ERR(svn_fs_x__dag_get_node(&bucket->node, fs, &node_id,
600 ffd->dag_node_cache->pool,
601 scratch_pool));
602
603 /* Return a reference to the cached object. */
604 *child_p = bucket->node;
605 return SVN_NO_ERROR;
606 }
607
608 /* Return the CHANGE_SET's root node in *NODE_P. ROOT is the FS API root
609 object for CHANGE_SET. Use SCRATCH_POOL for temporary allocations.
610
611 NOTE: *NODE_P will live within the DAG cache and we merely return a
612 reference to it. Hence, it will invalid upon the next cache insertion.
613 Callers must create a copy if they want a non-temporary object.
614 */
615 static svn_error_t *
get_root_node(dag_node_t ** node_p,svn_fs_root_t * root,svn_fs_x__change_set_t change_set,apr_pool_t * scratch_pool)616 get_root_node(dag_node_t **node_p,
617 svn_fs_root_t *root,
618 svn_fs_x__change_set_t change_set,
619 apr_pool_t *scratch_pool)
620 {
621 svn_fs_t *fs = root->fs;
622 svn_fs_x__data_t *ffd = fs->fsap_data;
623 cache_entry_t *bucket;
624 const svn_string_t path = { "", 0 };
625
626 /* Auto-insert the node in the cache. */
627 auto_clear_dag_cache(ffd->dag_node_cache);
628 bucket = cache_lookup(ffd->dag_node_cache, change_set, &path);
629
630 /* If it is not already cached, construct the DAG node object for NODE_ID.
631 Let it live in the cache. Sadly, we often can't reuse txn DAG nodes. */
632 if (bucket->node == NULL)
633 SVN_ERR(svn_fs_x__dag_root(&bucket->node, fs, change_set,
634 ffd->dag_node_cache->pool, scratch_pool));
635
636 /* Return a reference to the cached object. */
637 *node_p = bucket->node;
638 return SVN_NO_ERROR;
639 }
640
641 /* Walk the DAG starting at ROOT, following PATH and return a reference to
642 the target node in *NODE_P. Use SCRATCH_POOL for temporary allocations.
643
644 NOTE: *NODE_P will live within the DAG cache and we merely return a
645 reference to it. Hence, it will invalid upon the next cache insertion.
646 Callers must create a copy if they want a non-temporary object.
647 */
648 static svn_error_t *
walk_dag_path(dag_node_t ** node_p,svn_fs_root_t * root,svn_string_t * path,apr_pool_t * scratch_pool)649 walk_dag_path(dag_node_t **node_p,
650 svn_fs_root_t *root,
651 svn_string_t *path,
652 apr_pool_t *scratch_pool)
653 {
654 dag_node_t *here = NULL; /* The directory we're currently looking at. */
655 apr_pool_t *iterpool;
656 svn_fs_x__change_set_t change_set = svn_fs_x__root_change_set(root);
657 const char *entry;
658 svn_string_t directory;
659 svn_stringbuf_t *entry_buffer;
660
661 /* Special case: root directory.
662 We will later assume that all paths have at least one parent level,
663 so we must check here for those that don't. */
664 if (path->len == 0)
665 return svn_error_trace(get_root_node(node_p, root, change_set,
666 scratch_pool));
667
668 /* Callers often traverse the DAG in some path-based order or along the
669 history segments. That allows us to try a few guesses about where to
670 find the next item. This is only useful if the caller didn't request
671 the full parent chain. */
672
673 /* First attempt: Assume that we access the DAG for the same path as
674 in the last lookup but for a different revision that happens to be
675 the last revision that touched the respective node. This is a
676 common pattern when e.g. checking out over ra_serf. Note that this
677 will only work for committed data as the revision info for nodes in
678 txns is bogus.
679
680 This shortcut is quick and will exit this function upon success.
681 So, try it first. */
682 if (!root->is_txn_root)
683 {
684 SVN_ERR(try_match_last_node(node_p, root, path));
685
686 /* Did the shortcut work? */
687 if (*node_p)
688 return SVN_NO_ERROR;
689 }
690
691 /* Second attempt: Try starting the lookup immediately at the parent
692 node. We will often have recently accessed either a sibling or
693 said parent directory itself for the same revision. ENTRY will
694 point to the last '/' in PATH. */
695 entry_buffer = svn_stringbuf_create_ensure(64, scratch_pool);
696 if (extract_last_segment(path, &directory, entry_buffer))
697 {
698 here = dag_node_cache_get(root, &directory);
699
700 /* Did the shortcut work? */
701 if (here)
702 return svn_error_trace(dag_step(node_p, root, here,
703 entry_buffer->data, path,
704 change_set, FALSE, scratch_pool));
705 }
706
707 /* Now there is something to iterate over. Thus, create the ITERPOOL. */
708 iterpool = svn_pool_create(scratch_pool);
709
710 /* Make a parent_path item for the root node, using its own current
711 copy id. */
712 SVN_ERR(get_root_node(&here, root, change_set, iterpool));
713 path->len = 0;
714
715 /* Walk the path segment by segment. */
716 for (entry = next_entry_name(path, entry_buffer);
717 entry;
718 entry = next_entry_name(path, entry_buffer))
719 {
720 svn_pool_clear(iterpool);
721
722 /* Note that HERE is allocated from the DAG node cache and will
723 therefore survive the ITERPOOL cleanup. */
724 SVN_ERR(dag_step(&here, root, here, entry, path, change_set, FALSE,
725 iterpool));
726 }
727
728 svn_pool_destroy(iterpool);
729 *node_p = here;
730
731 return SVN_NO_ERROR;
732 }
733
734
735 /* Return a text string describing the absolute path of parent path
736 DAG_PATH. It will be allocated in POOL. */
737 static const char *
parent_path_path(svn_fs_x__dag_path_t * dag_path,apr_pool_t * pool)738 parent_path_path(svn_fs_x__dag_path_t *dag_path,
739 apr_pool_t *pool)
740 {
741 const char *path_so_far = "/";
742 if (dag_path->parent)
743 path_so_far = parent_path_path(dag_path->parent, pool);
744 return dag_path->entry
745 ? svn_fspath__join(path_so_far, dag_path->entry, pool)
746 : path_so_far;
747 }
748
749
750 /* Choose a copy ID inheritance method *INHERIT_P to be used in the
751 event that immutable node CHILD in FS needs to be made mutable. If
752 the inheritance method is copy_id_inherit_new, also return a
753 *COPY_SRC_PATH on which to base the new copy ID (else return NULL
754 for that path). CHILD must have a parent (it cannot be the root
755 node). Temporary allocations are taken from SCRATCH_POOL. */
756 static svn_error_t *
get_copy_inheritance(svn_fs_x__copy_id_inherit_t * inherit_p,const char ** copy_src_path,svn_fs_t * fs,svn_fs_x__dag_path_t * child,apr_pool_t * scratch_pool)757 get_copy_inheritance(svn_fs_x__copy_id_inherit_t *inherit_p,
758 const char **copy_src_path,
759 svn_fs_t *fs,
760 svn_fs_x__dag_path_t *child,
761 apr_pool_t *scratch_pool)
762 {
763 svn_fs_x__id_t child_copy_id, parent_copy_id;
764 const char *id_path = NULL;
765 svn_fs_root_t *copyroot_root;
766 dag_node_t *copyroot_node;
767 svn_revnum_t copyroot_rev;
768 const char *copyroot_path;
769
770 SVN_ERR_ASSERT(child && child->parent);
771
772 /* Initialize some convenience variables. */
773 child_copy_id = *svn_fs_x__dag_get_copy_id(child->node);
774 parent_copy_id = *svn_fs_x__dag_get_copy_id(child->parent->node);
775
776 /* By default, there is no copy source. */
777 *copy_src_path = NULL;
778
779 /* If this child is already mutable, we have nothing to do. */
780 if (svn_fs_x__dag_check_mutable(child->node))
781 {
782 *inherit_p = svn_fs_x__copy_id_inherit_self;
783 return SVN_NO_ERROR;
784 }
785
786 /* From this point on, we'll assume that the child will just take
787 its copy ID from its parent. */
788 *inherit_p = svn_fs_x__copy_id_inherit_parent;
789
790 /* Special case: if the child's copy ID is '0', use the parent's
791 copy ID. */
792 if (svn_fs_x__id_is_root(&child_copy_id))
793 return SVN_NO_ERROR;
794
795 /* Compare the copy IDs of the child and its parent. If they are
796 the same, then the child is already on the same branch as the
797 parent, and should use the same mutability copy ID that the
798 parent will use. */
799 if (svn_fs_x__id_eq(&child_copy_id, &parent_copy_id))
800 return SVN_NO_ERROR;
801
802 /* If the child is on the same branch that the parent is on, the
803 child should just use the same copy ID that the parent would use.
804 Else, the child needs to generate a new copy ID to use should it
805 need to be made mutable. We will claim that child is on the same
806 branch as its parent if the child itself is not a branch point,
807 or if it is a branch point that we are accessing via its original
808 copy destination path. */
809 svn_fs_x__dag_get_copyroot(©root_rev, ©root_path, child->node);
810 SVN_ERR(svn_fs_x__revision_root(©root_root, fs, copyroot_rev,
811 scratch_pool));
812 SVN_ERR(svn_fs_x__get_temp_dag_node(©root_node, copyroot_root,
813 copyroot_path, scratch_pool));
814
815 if (!svn_fs_x__dag_related_node(copyroot_node, child->node))
816 return SVN_NO_ERROR;
817
818 /* Determine if we are looking at the child via its original path or
819 as a subtree item of a copied tree. */
820 id_path = svn_fs_x__dag_get_created_path(child->node);
821 if (strcmp(id_path, parent_path_path(child, scratch_pool)) == 0)
822 {
823 *inherit_p = svn_fs_x__copy_id_inherit_self;
824 return SVN_NO_ERROR;
825 }
826
827 /* We are pretty sure that the child node is an unedited nested
828 branched node. When it needs to be made mutable, it should claim
829 a new copy ID. */
830 *inherit_p = svn_fs_x__copy_id_inherit_new;
831 *copy_src_path = id_path;
832 return SVN_NO_ERROR;
833 }
834
835 /* Allocate a new svn_fs_x__dag_path_t node from RESULT_POOL, containing
836 NODE, ENTRY and PARENT; NODE and ENTRY are copied into RESULT_POOL. */
837 static svn_fs_x__dag_path_t *
make_parent_path(dag_node_t * node,const svn_stringbuf_t * entry,svn_fs_x__dag_path_t * parent,apr_pool_t * result_pool)838 make_parent_path(dag_node_t *node,
839 const svn_stringbuf_t *entry,
840 svn_fs_x__dag_path_t *parent,
841 apr_pool_t *result_pool)
842 {
843 svn_fs_x__dag_path_t *dag_path
844 = apr_pcalloc(result_pool, sizeof(*dag_path));
845 if (node)
846 dag_path->node = svn_fs_x__dag_dup(node, result_pool);
847 dag_path->entry = apr_pstrmemdup(result_pool, entry->data, entry->len);
848 dag_path->parent = parent;
849 dag_path->copy_inherit = svn_fs_x__copy_id_inherit_unknown;
850 dag_path->copy_src_path = NULL;
851 return dag_path;
852 }
853
854 svn_error_t *
svn_fs_x__get_dag_path(svn_fs_x__dag_path_t ** dag_path_p,svn_fs_root_t * root,const char * fs_path,int flags,svn_boolean_t is_txn_path,apr_pool_t * result_pool,apr_pool_t * scratch_pool)855 svn_fs_x__get_dag_path(svn_fs_x__dag_path_t **dag_path_p,
856 svn_fs_root_t *root,
857 const char *fs_path,
858 int flags,
859 svn_boolean_t is_txn_path,
860 apr_pool_t *result_pool,
861 apr_pool_t *scratch_pool)
862 {
863 svn_fs_t *fs = root->fs;
864 dag_node_t *here = NULL; /* The directory we're currently looking at. */
865 svn_fs_x__dag_path_t *dag_path; /* The path from HERE up to the root. */
866 apr_pool_t *iterpool = svn_pool_create(scratch_pool);
867
868 svn_fs_x__change_set_t change_set = svn_fs_x__root_change_set(root);
869 const char *entry;
870 svn_string_t path;
871 svn_stringbuf_t *entry_buffer = svn_stringbuf_create_ensure(64,
872 scratch_pool);
873 apr_size_t path_len;
874
875 /* Normalize the FS_PATH to be compatible with our DAG walk utils. */
876 normalize_path(&path, fs_path); /* "" */
877
878 /* Make a DAG_PATH for the root node, using its own current copy id. */
879 SVN_ERR(get_root_node(&here, root, change_set, iterpool));
880 dag_path = make_parent_path(here, entry_buffer, NULL, result_pool);
881 dag_path->copy_inherit = svn_fs_x__copy_id_inherit_self;
882
883 path_len = path.len;
884 path.len = 0;
885
886 /* Walk the path segment by segment. Add to the DAG_PATH as we go. */
887 for (entry = next_entry_name(&path, entry_buffer);
888 entry;
889 entry = next_entry_name(&path, entry_buffer))
890 {
891 svn_pool_clear(iterpool);
892
893 /* If the current node is not a directory and we are just here to
894 * check for the path's existence, then that's o.k.
895 * Otherwise, non-dir nodes will cause an error in dag_step. */
896 if ( (flags & svn_fs_x__dag_path_allow_null)
897 && (svn_fs_x__dag_node_kind(dag_path->node) != svn_node_dir))
898 {
899 dag_path = NULL;
900 break;
901 }
902
903 /* Find the sub-node. */
904 SVN_ERR(dag_step(&here, root, dag_path->node, entry, &path, change_set,
905 TRUE, iterpool));
906
907 /* "node not found" requires special handling. */
908 if (here == NULL)
909 {
910 /* If this was the last path component, and the caller
911 said it was optional, then don't return an error;
912 just put a NULL node pointer in the path.
913 */
914 if ((flags & svn_fs_x__dag_path_last_optional)
915 && (path_len == path.len))
916 {
917 dag_path = make_parent_path(NULL, entry_buffer, dag_path,
918 result_pool);
919 break;
920 }
921 else if (flags & svn_fs_x__dag_path_allow_null)
922 {
923 dag_path = NULL;
924 break;
925 }
926 else
927 {
928 /* Build a better error message than svn_fs_x__dag_open
929 can provide, giving the root and full path name. */
930 return SVN_FS__NOT_FOUND(root, fs_path);
931 }
932 }
933
934 /* Now, make a parent_path item for CHILD. */
935 dag_path = make_parent_path(here, entry_buffer, dag_path, result_pool);
936 if (is_txn_path)
937 {
938 SVN_ERR(get_copy_inheritance(&dag_path->copy_inherit,
939 &dag_path->copy_src_path,
940 fs, dag_path, iterpool));
941 }
942 }
943
944 svn_pool_destroy(iterpool);
945 *dag_path_p = dag_path;
946 return SVN_NO_ERROR;
947 }
948
949 /* Set *NODE_P to a mutable root directory for ROOT, cloning if
950 necessary, allocating in RESULT_POOL. ROOT must be a transaction root.
951 Use ERROR_PATH in error messages. Use SCRATCH_POOL for temporaries.*/
952 static svn_error_t *
mutable_root_node(dag_node_t ** node_p,svn_fs_root_t * root,const char * error_path,apr_pool_t * result_pool,apr_pool_t * scratch_pool)953 mutable_root_node(dag_node_t **node_p,
954 svn_fs_root_t *root,
955 const char *error_path,
956 apr_pool_t *result_pool,
957 apr_pool_t *scratch_pool)
958 {
959 /* If it's not a transaction root, we can't change its contents. */
960 if (!root->is_txn_root)
961 return SVN_FS__ERR_NOT_MUTABLE(root->fs, root->rev, error_path);
962
963 /* It's a transaction root.
964 Get the appropriate DAG root node and copy it into RESULT_POOL. */
965 SVN_ERR(get_root_node(node_p, root, svn_fs_x__root_change_set(root),
966 scratch_pool));
967 *node_p = svn_fs_x__dag_dup(*node_p, result_pool);
968
969 return SVN_NO_ERROR;
970 }
971
972 svn_error_t *
svn_fs_x__make_path_mutable(svn_fs_root_t * root,svn_fs_x__dag_path_t * parent_path,const char * error_path,apr_pool_t * result_pool,apr_pool_t * scratch_pool)973 svn_fs_x__make_path_mutable(svn_fs_root_t *root,
974 svn_fs_x__dag_path_t *parent_path,
975 const char *error_path,
976 apr_pool_t *result_pool,
977 apr_pool_t *scratch_pool)
978 {
979 dag_node_t *clone;
980 svn_fs_x__txn_id_t txn_id = svn_fs_x__root_txn_id(root);
981
982 /* Is the node mutable already? */
983 if (svn_fs_x__dag_check_mutable(parent_path->node))
984 return SVN_NO_ERROR;
985
986 /* Are we trying to clone the root, or somebody's child node? */
987 if (parent_path->parent)
988 {
989 svn_fs_x__id_t copy_id = { SVN_INVALID_REVNUM, 0 };
990 svn_fs_x__id_t *copy_id_ptr = ©_id;
991 svn_fs_x__copy_id_inherit_t inherit = parent_path->copy_inherit;
992 const char *clone_path, *copyroot_path;
993 svn_revnum_t copyroot_rev;
994 svn_boolean_t is_parent_copyroot = FALSE;
995 svn_fs_root_t *copyroot_root;
996 dag_node_t *copyroot_node;
997 apr_pool_t *subpool;
998
999 /* We're trying to clone somebody's child. Make sure our parent
1000 is mutable. */
1001 SVN_ERR(svn_fs_x__make_path_mutable(root, parent_path->parent,
1002 error_path, result_pool,
1003 scratch_pool));
1004
1005 /* Allocate all temporaries in a sub-pool that we control locally.
1006 That way, we keep only the data of one level of recursion around
1007 at any time. */
1008 subpool = svn_pool_create(scratch_pool);
1009 switch (inherit)
1010 {
1011 case svn_fs_x__copy_id_inherit_parent:
1012 copy_id = *svn_fs_x__dag_get_copy_id(parent_path->parent->node);
1013 break;
1014
1015 case svn_fs_x__copy_id_inherit_new:
1016 SVN_ERR(svn_fs_x__reserve_copy_id(©_id, root->fs, txn_id,
1017 subpool));
1018 break;
1019
1020 case svn_fs_x__copy_id_inherit_self:
1021 copy_id_ptr = NULL;
1022 break;
1023
1024 case svn_fs_x__copy_id_inherit_unknown:
1025 default:
1026 SVN_ERR_MALFUNCTION(); /* uh-oh -- somebody didn't calculate copy-ID
1027 inheritance data. */
1028 }
1029
1030 /* Determine what copyroot our new child node should use. */
1031 svn_fs_x__dag_get_copyroot(©root_rev, ©root_path,
1032 parent_path->node);
1033 SVN_ERR(svn_fs_x__revision_root(©root_root, root->fs,
1034 copyroot_rev, subpool));
1035 SVN_ERR(svn_fs_x__get_temp_dag_node(©root_node, copyroot_root,
1036 copyroot_path, subpool));
1037
1038 if (!svn_fs_x__dag_related_node(copyroot_node, parent_path->node))
1039 is_parent_copyroot = TRUE;
1040
1041 /* Now make this node mutable. */
1042 clone_path = parent_path_path(parent_path->parent, subpool);
1043 SVN_ERR(svn_fs_x__dag_clone_child(&clone,
1044 parent_path->parent->node,
1045 clone_path,
1046 parent_path->entry,
1047 copy_id_ptr, txn_id,
1048 is_parent_copyroot,
1049 result_pool,
1050 subpool));
1051
1052 /* Update the path cache. */
1053 svn_fs_x__update_dag_cache(clone);
1054 svn_pool_destroy(subpool);
1055 }
1056 else
1057 {
1058 /* We're trying to clone the root directory. */
1059 SVN_ERR(mutable_root_node(&clone, root, error_path, result_pool,
1060 scratch_pool));
1061 }
1062
1063 /* Update the PARENT_PATH link to refer to the clone. */
1064 parent_path->node = clone;
1065
1066 return SVN_NO_ERROR;
1067 }
1068
1069
1070 svn_error_t *
svn_fs_x__get_temp_dag_node(dag_node_t ** node_p,svn_fs_root_t * root,const char * path,apr_pool_t * scratch_pool)1071 svn_fs_x__get_temp_dag_node(dag_node_t **node_p,
1072 svn_fs_root_t *root,
1073 const char *path,
1074 apr_pool_t *scratch_pool)
1075 {
1076 svn_string_t normalized;
1077
1078 /* First we look for the DAG in our cache. */
1079 *node_p = dag_node_cache_get(root, normalize_path(&normalized, path));
1080
1081 /* If it is not there, walk the DAG and fill the cache. */
1082 if (! *node_p)
1083 SVN_ERR(walk_dag_path(node_p, root, &normalized, scratch_pool));
1084
1085 return SVN_NO_ERROR;
1086 }
1087
1088
1089 svn_error_t *
svn_fs_x__get_dag_node(dag_node_t ** dag_node_p,svn_fs_root_t * root,const char * path,apr_pool_t * result_pool,apr_pool_t * scratch_pool)1090 svn_fs_x__get_dag_node(dag_node_t **dag_node_p,
1091 svn_fs_root_t *root,
1092 const char *path,
1093 apr_pool_t *result_pool,
1094 apr_pool_t *scratch_pool)
1095 {
1096 dag_node_t *node = NULL;
1097 SVN_ERR(svn_fs_x__get_temp_dag_node(&node, root, path, scratch_pool));
1098
1099 /* We want the returned node to live in POOL. */
1100 *dag_node_p = svn_fs_x__dag_dup(node, result_pool);
1101
1102 return SVN_NO_ERROR;
1103 }
1104