1 #include "cache.h"
2 #include "repository.h"
3 #include "commit.h"
4 #include "walker.h"
5 #include "http.h"
6 #include "list.h"
7 #include "transport.h"
8 #include "packfile.h"
9 #include "object-store.h"
10 
11 struct alt_base {
12 	char *base;
13 	int got_indices;
14 	struct packed_git *packs;
15 	struct alt_base *next;
16 };
17 
18 enum object_request_state {
19 	WAITING,
20 	ABORTED,
21 	ACTIVE,
22 	COMPLETE
23 };
24 
25 struct object_request {
26 	struct walker *walker;
27 	struct object_id oid;
28 	struct alt_base *repo;
29 	enum object_request_state state;
30 	struct http_object_request *req;
31 	struct list_head node;
32 };
33 
34 struct alternates_request {
35 	struct walker *walker;
36 	const char *base;
37 	struct strbuf *url;
38 	struct strbuf *buffer;
39 	struct active_request_slot *slot;
40 	int http_specific;
41 };
42 
43 struct walker_data {
44 	const char *url;
45 	int got_alternates;
46 	struct alt_base *alt;
47 };
48 
49 static LIST_HEAD(object_queue_head);
50 
51 static void fetch_alternates(struct walker *walker, const char *base);
52 
53 static void process_object_response(void *callback_data);
54 
start_object_request(struct walker * walker,struct object_request * obj_req)55 static void start_object_request(struct walker *walker,
56 				 struct object_request *obj_req)
57 {
58 	struct active_request_slot *slot;
59 	struct http_object_request *req;
60 
61 	req = new_http_object_request(obj_req->repo->base, &obj_req->oid);
62 	if (req == NULL) {
63 		obj_req->state = ABORTED;
64 		return;
65 	}
66 	obj_req->req = req;
67 
68 	slot = req->slot;
69 	slot->callback_func = process_object_response;
70 	slot->callback_data = obj_req;
71 
72 	/* Try to get the request started, abort the request on error */
73 	obj_req->state = ACTIVE;
74 	if (!start_active_slot(slot)) {
75 		obj_req->state = ABORTED;
76 		release_http_object_request(req);
77 		return;
78 	}
79 }
80 
finish_object_request(struct object_request * obj_req)81 static void finish_object_request(struct object_request *obj_req)
82 {
83 	if (finish_http_object_request(obj_req->req))
84 		return;
85 
86 	if (obj_req->req->rename == 0)
87 		walker_say(obj_req->walker, "got %s\n", oid_to_hex(&obj_req->oid));
88 }
89 
process_object_response(void * callback_data)90 static void process_object_response(void *callback_data)
91 {
92 	struct object_request *obj_req =
93 		(struct object_request *)callback_data;
94 	struct walker *walker = obj_req->walker;
95 	struct walker_data *data = walker->data;
96 	struct alt_base *alt = data->alt;
97 
98 	process_http_object_request(obj_req->req);
99 	obj_req->state = COMPLETE;
100 
101 	normalize_curl_result(&obj_req->req->curl_result,
102 			      obj_req->req->http_code,
103 			      obj_req->req->errorstr,
104 			      sizeof(obj_req->req->errorstr));
105 
106 	/* Use alternates if necessary */
107 	if (missing_target(obj_req->req)) {
108 		fetch_alternates(walker, alt->base);
109 		if (obj_req->repo->next != NULL) {
110 			obj_req->repo =
111 				obj_req->repo->next;
112 			release_http_object_request(obj_req->req);
113 			start_object_request(walker, obj_req);
114 			return;
115 		}
116 	}
117 
118 	finish_object_request(obj_req);
119 }
120 
release_object_request(struct object_request * obj_req)121 static void release_object_request(struct object_request *obj_req)
122 {
123 	if (obj_req->req !=NULL && obj_req->req->localfile != -1)
124 		error("fd leakage in release: %d", obj_req->req->localfile);
125 
126 	list_del(&obj_req->node);
127 	free(obj_req);
128 }
129 
fill_active_slot(struct walker * walker)130 static int fill_active_slot(struct walker *walker)
131 {
132 	struct object_request *obj_req;
133 	struct list_head *pos, *tmp, *head = &object_queue_head;
134 
135 	list_for_each_safe(pos, tmp, head) {
136 		obj_req = list_entry(pos, struct object_request, node);
137 		if (obj_req->state == WAITING) {
138 			if (has_object_file(&obj_req->oid))
139 				obj_req->state = COMPLETE;
140 			else {
141 				start_object_request(walker, obj_req);
142 				return 1;
143 			}
144 		}
145 	}
146 	return 0;
147 }
148 
prefetch(struct walker * walker,unsigned char * sha1)149 static void prefetch(struct walker *walker, unsigned char *sha1)
150 {
151 	struct object_request *newreq;
152 	struct walker_data *data = walker->data;
153 
154 	newreq = xmalloc(sizeof(*newreq));
155 	newreq->walker = walker;
156 	oidread(&newreq->oid, sha1);
157 	newreq->repo = data->alt;
158 	newreq->state = WAITING;
159 	newreq->req = NULL;
160 
161 	http_is_verbose = walker->get_verbosely;
162 	list_add_tail(&newreq->node, &object_queue_head);
163 
164 	fill_active_slots();
165 	step_active_slots();
166 }
167 
is_alternate_allowed(const char * url)168 static int is_alternate_allowed(const char *url)
169 {
170 	const char *protocols[] = {
171 		"http", "https", "ftp", "ftps"
172 	};
173 	int i;
174 
175 	if (http_follow_config != HTTP_FOLLOW_ALWAYS) {
176 		warning("alternate disabled by http.followRedirects: %s", url);
177 		return 0;
178 	}
179 
180 	for (i = 0; i < ARRAY_SIZE(protocols); i++) {
181 		const char *end;
182 		if (skip_prefix(url, protocols[i], &end) &&
183 		    starts_with(end, "://"))
184 			break;
185 	}
186 
187 	if (i >= ARRAY_SIZE(protocols)) {
188 		warning("ignoring alternate with unknown protocol: %s", url);
189 		return 0;
190 	}
191 	if (!is_transport_allowed(protocols[i], 0)) {
192 		warning("ignoring alternate with restricted protocol: %s", url);
193 		return 0;
194 	}
195 
196 	return 1;
197 }
198 
process_alternates_response(void * callback_data)199 static void process_alternates_response(void *callback_data)
200 {
201 	struct alternates_request *alt_req =
202 		(struct alternates_request *)callback_data;
203 	struct walker *walker = alt_req->walker;
204 	struct walker_data *cdata = walker->data;
205 	struct active_request_slot *slot = alt_req->slot;
206 	struct alt_base *tail = cdata->alt;
207 	const char *base = alt_req->base;
208 	const char null_byte = '\0';
209 	char *data;
210 	int i = 0;
211 
212 	normalize_curl_result(&slot->curl_result, slot->http_code,
213 			      curl_errorstr, sizeof(curl_errorstr));
214 
215 	if (alt_req->http_specific) {
216 		if (slot->curl_result != CURLE_OK ||
217 		    !alt_req->buffer->len) {
218 
219 			/* Try reusing the slot to get non-http alternates */
220 			alt_req->http_specific = 0;
221 			strbuf_reset(alt_req->url);
222 			strbuf_addf(alt_req->url, "%s/objects/info/alternates",
223 				    base);
224 			curl_easy_setopt(slot->curl, CURLOPT_URL,
225 					 alt_req->url->buf);
226 			active_requests++;
227 			slot->in_use = 1;
228 			if (slot->finished != NULL)
229 				(*slot->finished) = 0;
230 			if (!start_active_slot(slot)) {
231 				cdata->got_alternates = -1;
232 				slot->in_use = 0;
233 				if (slot->finished != NULL)
234 					(*slot->finished) = 1;
235 			}
236 			return;
237 		}
238 	} else if (slot->curl_result != CURLE_OK) {
239 		if (!missing_target(slot)) {
240 			cdata->got_alternates = -1;
241 			return;
242 		}
243 	}
244 
245 	fwrite_buffer((char *)&null_byte, 1, 1, alt_req->buffer);
246 	alt_req->buffer->len--;
247 	data = alt_req->buffer->buf;
248 
249 	while (i < alt_req->buffer->len) {
250 		int posn = i;
251 		while (posn < alt_req->buffer->len && data[posn] != '\n')
252 			posn++;
253 		if (data[posn] == '\n') {
254 			int okay = 0;
255 			int serverlen = 0;
256 			struct alt_base *newalt;
257 			if (data[i] == '/') {
258 				/*
259 				 * This counts
260 				 * http://git.host/pub/scm/linux.git/
261 				 * -----------here^
262 				 * so memcpy(dst, base, serverlen) will
263 				 * copy up to "...git.host".
264 				 */
265 				const char *colon_ss = strstr(base,"://");
266 				if (colon_ss) {
267 					serverlen = (strchr(colon_ss + 3, '/')
268 						     - base);
269 					okay = 1;
270 				}
271 			} else if (!memcmp(data + i, "../", 3)) {
272 				/*
273 				 * Relative URL; chop the corresponding
274 				 * number of subpath from base (and ../
275 				 * from data), and concatenate the result.
276 				 *
277 				 * The code first drops ../ from data, and
278 				 * then drops one ../ from data and one path
279 				 * from base.  IOW, one extra ../ is dropped
280 				 * from data than path is dropped from base.
281 				 *
282 				 * This is not wrong.  The alternate in
283 				 *     http://git.host/pub/scm/linux.git/
284 				 * to borrow from
285 				 *     http://git.host/pub/scm/linus.git/
286 				 * is ../../linus.git/objects/.  You need
287 				 * two ../../ to borrow from your direct
288 				 * neighbour.
289 				 */
290 				i += 3;
291 				serverlen = strlen(base);
292 				while (i + 2 < posn &&
293 				       !memcmp(data + i, "../", 3)) {
294 					do {
295 						serverlen--;
296 					} while (serverlen &&
297 						 base[serverlen - 1] != '/');
298 					i += 3;
299 				}
300 				/* If the server got removed, give up. */
301 				okay = strchr(base, ':') - base + 3 <
302 				       serverlen;
303 			} else if (alt_req->http_specific) {
304 				char *colon = strchr(data + i, ':');
305 				char *slash = strchr(data + i, '/');
306 				if (colon && slash && colon < data + posn &&
307 				    slash < data + posn && colon < slash) {
308 					okay = 1;
309 				}
310 			}
311 			if (okay) {
312 				struct strbuf target = STRBUF_INIT;
313 				strbuf_add(&target, base, serverlen);
314 				strbuf_add(&target, data + i, posn - i);
315 				if (!strbuf_strip_suffix(&target, "objects")) {
316 					warning("ignoring alternate that does"
317 						" not end in 'objects': %s",
318 						target.buf);
319 					strbuf_release(&target);
320 				} else if (is_alternate_allowed(target.buf)) {
321 					warning("adding alternate object store: %s",
322 						target.buf);
323 					newalt = xmalloc(sizeof(*newalt));
324 					newalt->next = NULL;
325 					newalt->base = strbuf_detach(&target, NULL);
326 					newalt->got_indices = 0;
327 					newalt->packs = NULL;
328 
329 					while (tail->next != NULL)
330 						tail = tail->next;
331 					tail->next = newalt;
332 				} else {
333 					strbuf_release(&target);
334 				}
335 			}
336 		}
337 		i = posn + 1;
338 	}
339 
340 	cdata->got_alternates = 1;
341 }
342 
fetch_alternates(struct walker * walker,const char * base)343 static void fetch_alternates(struct walker *walker, const char *base)
344 {
345 	struct strbuf buffer = STRBUF_INIT;
346 	struct strbuf url = STRBUF_INIT;
347 	struct active_request_slot *slot;
348 	struct alternates_request alt_req;
349 	struct walker_data *cdata = walker->data;
350 
351 	/*
352 	 * If another request has already started fetching alternates,
353 	 * wait for them to arrive and return to processing this request's
354 	 * curl message
355 	 */
356 	while (cdata->got_alternates == 0) {
357 		step_active_slots();
358 	}
359 
360 	/* Nothing to do if they've already been fetched */
361 	if (cdata->got_alternates == 1)
362 		return;
363 
364 	/* Start the fetch */
365 	cdata->got_alternates = 0;
366 
367 	if (walker->get_verbosely)
368 		fprintf(stderr, "Getting alternates list for %s\n", base);
369 
370 	strbuf_addf(&url, "%s/objects/info/http-alternates", base);
371 
372 	/*
373 	 * Use a callback to process the result, since another request
374 	 * may fail and need to have alternates loaded before continuing
375 	 */
376 	slot = get_active_slot();
377 	slot->callback_func = process_alternates_response;
378 	alt_req.walker = walker;
379 	slot->callback_data = &alt_req;
380 
381 	curl_easy_setopt(slot->curl, CURLOPT_WRITEDATA, &buffer);
382 	curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
383 	curl_easy_setopt(slot->curl, CURLOPT_URL, url.buf);
384 
385 	alt_req.base = base;
386 	alt_req.url = &url;
387 	alt_req.buffer = &buffer;
388 	alt_req.http_specific = 1;
389 	alt_req.slot = slot;
390 
391 	if (start_active_slot(slot))
392 		run_active_slot(slot);
393 	else
394 		cdata->got_alternates = -1;
395 
396 	strbuf_release(&buffer);
397 	strbuf_release(&url);
398 }
399 
fetch_indices(struct walker * walker,struct alt_base * repo)400 static int fetch_indices(struct walker *walker, struct alt_base *repo)
401 {
402 	int ret;
403 
404 	if (repo->got_indices)
405 		return 0;
406 
407 	if (walker->get_verbosely)
408 		fprintf(stderr, "Getting pack list for %s\n", repo->base);
409 
410 	switch (http_get_info_packs(repo->base, &repo->packs)) {
411 	case HTTP_OK:
412 	case HTTP_MISSING_TARGET:
413 		repo->got_indices = 1;
414 		ret = 0;
415 		break;
416 	default:
417 		repo->got_indices = 0;
418 		ret = -1;
419 	}
420 
421 	return ret;
422 }
423 
http_fetch_pack(struct walker * walker,struct alt_base * repo,unsigned char * sha1)424 static int http_fetch_pack(struct walker *walker, struct alt_base *repo, unsigned char *sha1)
425 {
426 	struct packed_git *target;
427 	int ret;
428 	struct slot_results results;
429 	struct http_pack_request *preq;
430 
431 	if (fetch_indices(walker, repo))
432 		return -1;
433 	target = find_sha1_pack(sha1, repo->packs);
434 	if (!target)
435 		return -1;
436 	close_pack_index(target);
437 
438 	if (walker->get_verbosely) {
439 		fprintf(stderr, "Getting pack %s\n",
440 			hash_to_hex(target->hash));
441 		fprintf(stderr, " which contains %s\n",
442 			hash_to_hex(sha1));
443 	}
444 
445 	preq = new_http_pack_request(target->hash, repo->base);
446 	if (preq == NULL)
447 		goto abort;
448 	preq->slot->results = &results;
449 
450 	if (start_active_slot(preq->slot)) {
451 		run_active_slot(preq->slot);
452 		if (results.curl_result != CURLE_OK) {
453 			error("Unable to get pack file %s\n%s", preq->url,
454 			      curl_errorstr);
455 			goto abort;
456 		}
457 	} else {
458 		error("Unable to start request");
459 		goto abort;
460 	}
461 
462 	ret = finish_http_pack_request(preq);
463 	release_http_pack_request(preq);
464 	if (ret)
465 		return ret;
466 	http_install_packfile(target, &repo->packs);
467 
468 	return 0;
469 
470 abort:
471 	return -1;
472 }
473 
abort_object_request(struct object_request * obj_req)474 static void abort_object_request(struct object_request *obj_req)
475 {
476 	release_object_request(obj_req);
477 }
478 
fetch_object(struct walker * walker,unsigned char * hash)479 static int fetch_object(struct walker *walker, unsigned char *hash)
480 {
481 	char *hex = hash_to_hex(hash);
482 	int ret = 0;
483 	struct object_request *obj_req = NULL;
484 	struct http_object_request *req;
485 	struct list_head *pos, *head = &object_queue_head;
486 
487 	list_for_each(pos, head) {
488 		obj_req = list_entry(pos, struct object_request, node);
489 		if (hasheq(obj_req->oid.hash, hash))
490 			break;
491 	}
492 	if (obj_req == NULL)
493 		return error("Couldn't find request for %s in the queue", hex);
494 
495 	if (has_object_file(&obj_req->oid)) {
496 		if (obj_req->req != NULL)
497 			abort_http_object_request(obj_req->req);
498 		abort_object_request(obj_req);
499 		return 0;
500 	}
501 
502 	while (obj_req->state == WAITING)
503 		step_active_slots();
504 
505 	/*
506 	 * obj_req->req might change when fetching alternates in the callback
507 	 * process_object_response; therefore, the "shortcut" variable, req,
508 	 * is used only after we're done with slots.
509 	 */
510 	while (obj_req->state == ACTIVE)
511 		run_active_slot(obj_req->req->slot);
512 
513 	req = obj_req->req;
514 
515 	if (req->localfile != -1) {
516 		close(req->localfile);
517 		req->localfile = -1;
518 	}
519 
520 	normalize_curl_result(&req->curl_result, req->http_code,
521 			      req->errorstr, sizeof(req->errorstr));
522 
523 	if (obj_req->state == ABORTED) {
524 		ret = error("Request for %s aborted", hex);
525 	} else if (req->curl_result != CURLE_OK &&
526 		   req->http_code != 416) {
527 		if (missing_target(req))
528 			ret = -1; /* Be silent, it is probably in a pack. */
529 		else
530 			ret = error("%s (curl_result = %d, http_code = %ld, sha1 = %s)",
531 				    req->errorstr, req->curl_result,
532 				    req->http_code, hex);
533 	} else if (req->zret != Z_STREAM_END) {
534 		walker->corrupt_object_found++;
535 		ret = error("File %s (%s) corrupt", hex, req->url);
536 	} else if (!oideq(&obj_req->oid, &req->real_oid)) {
537 		ret = error("File %s has bad hash", hex);
538 	} else if (req->rename < 0) {
539 		struct strbuf buf = STRBUF_INIT;
540 		loose_object_path(the_repository, &buf, &req->oid);
541 		ret = error("unable to write sha1 filename %s", buf.buf);
542 		strbuf_release(&buf);
543 	}
544 
545 	release_http_object_request(req);
546 	release_object_request(obj_req);
547 	return ret;
548 }
549 
fetch(struct walker * walker,unsigned char * hash)550 static int fetch(struct walker *walker, unsigned char *hash)
551 {
552 	struct walker_data *data = walker->data;
553 	struct alt_base *altbase = data->alt;
554 
555 	if (!fetch_object(walker, hash))
556 		return 0;
557 	while (altbase) {
558 		if (!http_fetch_pack(walker, altbase, hash))
559 			return 0;
560 		fetch_alternates(walker, data->alt->base);
561 		altbase = altbase->next;
562 	}
563 	return error("Unable to find %s under %s", hash_to_hex(hash),
564 		     data->alt->base);
565 }
566 
fetch_ref(struct walker * walker,struct ref * ref)567 static int fetch_ref(struct walker *walker, struct ref *ref)
568 {
569 	struct walker_data *data = walker->data;
570 	return http_fetch_ref(data->alt->base, ref);
571 }
572 
cleanup(struct walker * walker)573 static void cleanup(struct walker *walker)
574 {
575 	struct walker_data *data = walker->data;
576 	struct alt_base *alt, *alt_next;
577 
578 	if (data) {
579 		alt = data->alt;
580 		while (alt) {
581 			alt_next = alt->next;
582 
583 			free(alt->base);
584 			free(alt);
585 
586 			alt = alt_next;
587 		}
588 		free(data);
589 		walker->data = NULL;
590 	}
591 }
592 
get_http_walker(const char * url)593 struct walker *get_http_walker(const char *url)
594 {
595 	char *s;
596 	struct walker_data *data = xmalloc(sizeof(struct walker_data));
597 	struct walker *walker = xmalloc(sizeof(struct walker));
598 
599 	data->alt = xmalloc(sizeof(*data->alt));
600 	data->alt->base = xstrdup(url);
601 	for (s = data->alt->base + strlen(data->alt->base) - 1; *s == '/'; --s)
602 		*s = 0;
603 
604 	data->alt->got_indices = 0;
605 	data->alt->packs = NULL;
606 	data->alt->next = NULL;
607 	data->got_alternates = -1;
608 
609 	walker->corrupt_object_found = 0;
610 	walker->fetch = fetch;
611 	walker->fetch_ref = fetch_ref;
612 	walker->prefetch = prefetch;
613 	walker->cleanup = cleanup;
614 	walker->data = data;
615 
616 	add_fill_function(walker, (int (*)(void *)) fill_active_slot);
617 
618 	return walker;
619 }
620