1 /*
2 * missing-branches.c -- Efficiently scan for missing branches.
3 *
4 * ====================================================================
5 * Licensed to the Apache Software Foundation (ASF) under one
6 * or more contributor license agreements. See the NOTICE file
7 * distributed with this work for additional information
8 * regarding copyright ownership. The ASF licenses this file
9 * to you under the Apache License, Version 2.0 (the
10 * "License"); you may not use this file except in compliance
11 * with the License. You may obtain a copy of the License at
12 *
13 * http://www.apache.org/licenses/LICENSE-2.0
14 *
15 * Unless required by applicable law or agreed to in writing,
16 * software distributed under the License is distributed on an
17 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
18 * KIND, either express or implied. See the License for the
19 * specific language governing permissions and limitations
20 * under the License.
21 * ====================================================================
22 */
23
24 /* ==================================================================== */
25
26
27
28 /*** Includes. ***/
29
30 #include <assert.h>
31
32 #include "svn_hash.h"
33 #include "svn_pools.h"
34 #include "private/svn_sorts_private.h"
35 #include "private/svn_subr_private.h"
36
37 #include "mergeinfo-normalizer.h"
38
39
40 /*** Code. ***/
41
42 struct svn_min__branch_lookup_t
43 {
44 /* Connection to the repository where we are looking for paths.
45 If this is NULL, then only local lookups may be performed. */
46 svn_ra_session_t *session;
47
48 /* Keyed by const char * FS paths that are known not to exist.
49 It is implied that sub-paths won't and can't exist either. */
50 apr_hash_t *deleted;
51
52 /* Keyed by const char * FS paths that are known to exist. */
53 apr_hash_t *existing;
54 };
55
56 /* Return the location of the last '/' in PATH before LEN.
57 Return 0 for root and empty paths. PATH must be a canonical FS path. */
58 static apr_size_t
parent_segment(const char * path,apr_size_t len)59 parent_segment(const char *path,
60 apr_size_t len)
61 {
62 assert(path[0] == '/');
63
64 if (len <= 1)
65 return 0;
66
67 --len;
68 while (path[len] != '/')
69 --len;
70
71 return len;
72 }
73
74 /* Look for BRANCH in LOOKUP without connecting to the server. Return
75 * svn_tristate_true, if it is known to exist, svn_tristate_false if it is
76 * known to not exist. Otherwise return svn_tristate_unknown. */
77 static svn_tristate_t
local_lookup(const svn_min__branch_lookup_t * lookup,const char * branch)78 local_lookup(const svn_min__branch_lookup_t *lookup,
79 const char *branch)
80 {
81 apr_size_t len;
82
83 /* Non-canonical paths are bad but we let the remote lookup take care of
84 * them. Our hashes simply have no info on them. */
85 if (branch[0] != '/')
86 return svn_tristate_unknown;
87
88 /* Hard-coded: "/" always exists. */
89 if (branch[1] == '\0')
90 return svn_tristate_true;
91
92 /* For every existing path that we encountered, there is an entry in the
93 EXISITING hash. So, we can just use that. */
94 len = strlen(branch);
95 if (apr_hash_get(lookup->existing, branch, len))
96 return svn_tristate_true;
97
98 /* Not known to exist and might be known to not exist. We only record
99 the top level deleted directory for DELETED branches, so we need to
100 walk up the path until we either find that deletion or an existing
101 path. In the latter case, we don't know what happened to the levels
102 below that, including BRANCH. */
103 while (len > 0)
104 {
105 /* Known deleted? Note that we checked BRANCH for existence but not
106 for deletion, yet. */
107 if (apr_hash_get(lookup->deleted, branch, len))
108 return svn_tristate_false;
109
110 /* Parent known to exist?
111 Then, we don't know what happened to the BRANCH. */
112 len = parent_segment(branch, len);
113
114 if (apr_hash_get(lookup->existing, branch, len))
115 return svn_tristate_unknown;
116 }
117
118 /* We don't know. */
119 return svn_tristate_unknown;
120 }
121
122 /* Set *DELETED to TRUE, if PATH can't be found at HEAD in SESSION.
123 Use SCRATCH_POOL for temporary allocations. */
124 static svn_error_t *
path_deleted(svn_boolean_t * deleted,svn_ra_session_t * session,const char * path,apr_pool_t * scratch_pool)125 path_deleted(svn_boolean_t *deleted,
126 svn_ra_session_t *session,
127 const char *path,
128 apr_pool_t *scratch_pool)
129 {
130 svn_node_kind_t kind;
131
132 SVN_ERR_ASSERT(*path == '/');
133 SVN_ERR(svn_ra_check_path(session, path + 1, SVN_INVALID_REVNUM, &kind,
134 scratch_pool));
135 *deleted = kind == svn_node_none;
136
137 return SVN_NO_ERROR;
138 }
139
140 /* Chop the last segment off PATH. PATH must be a canonical FS path.
141 No-op for the root path. */
142 static void
to_parent(svn_stringbuf_t * path)143 to_parent(svn_stringbuf_t *path)
144 {
145 path->len = parent_segment(path->data, path->len);
146 if (path->len == 0)
147 path->len = 1;
148
149 path->data[path->len] = '\0';
150 }
151
152 /* Contact the repository used by LOOKUP and set *DELETED to TRUE, if path
153 BRANCH does not exist at HEAD. Cache the lookup results in LOOKUP and
154 use SCRATCH_POOL for temporary allocations. Call this only if
155 local_lookup returned svn_tristate_unknown. */
156 static svn_error_t *
remote_lookup(svn_boolean_t * deleted,const svn_min__branch_lookup_t * lookup,const char * branch,apr_pool_t * scratch_pool)157 remote_lookup(svn_boolean_t *deleted,
158 const svn_min__branch_lookup_t *lookup,
159 const char *branch,
160 apr_pool_t *scratch_pool)
161 {
162 svn_stringbuf_t *path = svn_stringbuf_create(branch, scratch_pool);
163
164 /* We shall call this function only after the local lookup failed. */
165 assert(local_lookup(lookup, branch) == svn_tristate_unknown);
166
167 /* Actual repository lookup. */
168 SVN_ERR(path_deleted(deleted, lookup->session, branch, scratch_pool));
169
170 /* If the path did not exist, store the furthest non-existent parent. */
171 if (*deleted)
172 {
173 apr_pool_t *iterpool;
174 svn_boolean_t is_deleted;
175 const char *deleted_path;
176 apr_size_t len;
177
178 /* Find the closest parent that exists. Often, that is something like
179 "branches" and the next level already does not exist. So, use that
180 as a heuristics to minimize the number of lookups. */
181
182 /* Set LEN to the length of the last unknown to exist sub-path. */
183 svn_stringbuf_t *temp = svn_stringbuf_dup(path, scratch_pool);
184 do
185 {
186 len = temp->len;
187 to_parent(temp);
188 }
189 while (local_lookup(lookup, temp->data) != svn_tristate_true);
190
191 /* Check whether that path actually does not exist. */
192 if (len == path->len)
193 {
194 /* We already know that the full PATH does not exist.
195 We get here if the immediate parent of PATH is known to exist. */
196 is_deleted = TRUE;
197 }
198 else
199 {
200 temp = svn_stringbuf_ncreate(branch, len, scratch_pool);
201 SVN_ERR(path_deleted(&is_deleted, lookup->session, temp->data,
202 scratch_pool));
203 }
204
205 /* Whether or not that path does not exist, we know now and should
206 store that in LOOKUP. */
207 if (is_deleted)
208 {
209 /* We are almost done here. The existing parent is already in
210 LOOKUP and we only need to add the deleted path. */
211 deleted_path = apr_pstrmemdup(apr_hash_pool_get(lookup->deleted),
212 branch, len);
213 apr_hash_set(lookup->deleted, deleted_path, len, deleted_path);
214
215 return SVN_NO_ERROR;
216 }
217 else
218 {
219 /* We just learned that TEMP does exist. Remember this fact and
220 later continue the search for the deletion boundary. */
221 const char *hash_path
222 = apr_pstrmemdup(apr_hash_pool_get(lookup->existing), temp->data,
223 temp->len);
224
225 /* Only add HASH_PATH. Its parents are already in that hash. */
226 apr_hash_set(lookup->existing, hash_path, temp->len, hash_path);
227 }
228
229 /* Find the closest parent that does exist.
230 "/" exists, hence, this will terminate. */
231 iterpool = svn_pool_create(scratch_pool);
232 do
233 {
234 svn_pool_clear(iterpool);
235
236 len = path->len;
237 to_parent(path);
238
239 /* We often know that "/branches" etc. exist. So, we can skip
240 the final lookup in that case. */
241 if (local_lookup(lookup, path->data) == svn_tristate_true)
242 break;
243
244 /* Get the info from the repository. */
245 SVN_ERR(path_deleted(&is_deleted, lookup->session, path->data,
246 iterpool));
247 }
248 while (is_deleted);
249 svn_pool_destroy(iterpool);
250
251 /* PATH exists, it's sub-path of length LEN does not. */
252 deleted_path = apr_pstrmemdup(apr_hash_pool_get(lookup->deleted),
253 branch, len);
254 apr_hash_set(lookup->deleted, deleted_path, len, deleted_path);
255 }
256
257 /* PATH and all its parents exist. Add them to the EXISITING hash.
258 Make sure to allocate only the longest path and then reference
259 sub-sequences of it to keep memory usage in check. */
260 if (!apr_hash_get(lookup->existing, path->data, path->len))
261 {
262 const char *hash_path
263 = apr_pstrmemdup(apr_hash_pool_get(lookup->existing), path->data,
264 path->len);
265
266 /* Note that we don't need to check for exiting entries here because
267 the APR hash will reuse existing nodes and we are not allocating
268 anything else here. So, this does not allocate duplicate nodes. */
269 for (; path->len > 1; to_parent(path))
270 apr_hash_set(lookup->existing, hash_path, path->len, hash_path);
271 }
272
273 return SVN_NO_ERROR;
274 }
275
276 svn_min__branch_lookup_t *
svn_min__branch_lookup_create(svn_ra_session_t * session,apr_pool_t * result_pool)277 svn_min__branch_lookup_create(svn_ra_session_t *session,
278 apr_pool_t *result_pool)
279 {
280 svn_min__branch_lookup_t *result = apr_pcalloc(result_pool,
281 sizeof(*result));
282 result->session = session;
283 result->deleted = svn_hash__make(result_pool);
284 result->existing = svn_hash__make(result_pool);
285
286 return result;
287 }
288
289 svn_min__branch_lookup_t *
svn_min__branch_lookup_from_paths(apr_array_header_t * paths,apr_pool_t * result_pool)290 svn_min__branch_lookup_from_paths(apr_array_header_t *paths,
291 apr_pool_t *result_pool)
292 {
293 svn_min__branch_lookup_t *result
294 = svn_min__branch_lookup_create(NULL, result_pool);
295
296 int i;
297 for (i = 0; i < paths->nelts; ++i)
298 {
299 const char *path = APR_ARRAY_IDX(paths, i, const char *);
300 if (strlen(path) > 0)
301 {
302 path = apr_pstrdup(result_pool, path);
303 svn_hash_sets(result->deleted, path, path);
304 }
305 }
306
307 return result;
308 }
309
310 svn_error_t *
svn_min__branch_lookup(svn_boolean_t * deleted,svn_min__branch_lookup_t * lookup,const char * branch,svn_boolean_t local_only,apr_pool_t * scratch_pool)311 svn_min__branch_lookup(svn_boolean_t *deleted,
312 svn_min__branch_lookup_t *lookup,
313 const char *branch,
314 svn_boolean_t local_only,
315 apr_pool_t *scratch_pool)
316 {
317 switch (local_lookup(lookup, branch))
318 {
319 case svn_tristate_false:
320 *deleted = TRUE;
321 return SVN_NO_ERROR;
322
323 case svn_tristate_true:
324 *deleted = FALSE;
325 return SVN_NO_ERROR;
326
327 default:
328 /* If the state is unknown and we are only allowed to do a local
329 lookup, default to a possible false negative. Note that not
330 having the session available implies local-only lookup. */
331 if (local_only || !lookup->session)
332 {
333 *deleted = FALSE;
334 return SVN_NO_ERROR;
335 }
336 }
337
338 return svn_error_trace(remote_lookup(deleted, lookup, branch,
339 scratch_pool));
340 }
341
342 apr_array_header_t *
svn_min__branch_deleted_list(svn_min__branch_lookup_t * lookup,apr_pool_t * result_pool,apr_pool_t * scratch_pool)343 svn_min__branch_deleted_list(svn_min__branch_lookup_t *lookup,
344 apr_pool_t *result_pool,
345 apr_pool_t *scratch_pool)
346 {
347 apr_array_header_t *result = apr_array_make(result_pool,
348 apr_hash_count(lookup->deleted),
349 sizeof(const char *));
350 apr_hash_index_t *hi;
351 for (hi = apr_hash_first(scratch_pool, lookup->deleted);
352 hi;
353 hi = apr_hash_next(hi))
354 {
355 const char *path = apr_hash_this_key(hi);
356 apr_size_t len = apr_hash_this_key_len(hi);
357
358 APR_ARRAY_PUSH(result, const char *) = apr_pstrmemdup(result_pool,
359 path, len);
360 }
361
362 svn_sort__array(result, svn_sort_compare_paths);
363
364 return result;
365 }
366