1 /* Copyright (c) 2014, Daniel Martí
2  * Copyright (c) 2014-2021, The Tor Project, Inc. */
3 /* See LICENSE for licensing information */
4 
5 /**
6  * \file consdiff.c
7  * \brief Consensus diff implementation, including both the generation and the
8  * application of diffs in a minimal ed format.
9  *
10  * The consensus diff application is done in consdiff_apply_diff, which relies
11  * on apply_ed_diff for the main ed diff part and on some digest helper
12  * functions to check the digest hashes found in the consensus diff header.
13  *
14  * The consensus diff generation is more complex. consdiff_gen_diff generates
15  * it, relying on gen_ed_diff to generate the ed diff and some digest helper
16  * functions to generate the digest hashes.
17  *
18  * gen_ed_diff is the tricky bit. In it simplest form, it will take quadratic
19  * time and linear space to generate an ed diff given two smartlists. As shown
20  * in its comment section, calling calc_changes on the entire two consensuses
21  * will calculate what is to be added and what is to be deleted in the diff.
22  * Its comment section briefly explains how it works.
23  *
24  * In our case specific to consensuses, we take advantage of the fact that
25  * consensuses list routers sorted by their identities. We use that
26  * information to avoid running calc_changes on the whole smartlists.
27  * gen_ed_diff will navigate through the two consensuses identity by identity
28  * and will send small couples of slices to calc_changes, keeping the running
29  * time near-linear. This is explained in more detail in the gen_ed_diff
30  * comments.
31  *
32  * The allocation strategy tries to save time and memory by avoiding needless
33  * copies.  Instead of actually splitting the inputs into separate strings, we
34  * allocate cdline_t objects, each of which represents a line in the original
35  * object or in the output.  We use memarea_t allocators to manage the
36  * temporary memory we use when generating or applying diffs.
37  **/
38 
39 #define CONSDIFF_PRIVATE
40 
41 #include "core/or/or.h"
42 #include "feature/dircommon/consdiff.h"
43 #include "lib/memarea/memarea.h"
44 #include "feature/dirparse/ns_parse.h"
45 
46 static const char* ns_diff_version = "network-status-diff-version 1";
47 static const char* hash_token = "hash";
48 
49 static char *consensus_join_lines(const smartlist_t *inp);
50 
51 /** Return true iff a and b have the same contents. */
52 STATIC int
lines_eq(const cdline_t * a,const cdline_t * b)53 lines_eq(const cdline_t *a, const cdline_t *b)
54 {
55   return a->len == b->len && fast_memeq(a->s, b->s, a->len);
56 }
57 
58 /** Return true iff a has the same contents as the nul-terminated string b. */
59 STATIC int
line_str_eq(const cdline_t * a,const char * b)60 line_str_eq(const cdline_t *a, const char *b)
61 {
62   const size_t len = strlen(b);
63   tor_assert(len <= UINT32_MAX);
64   cdline_t bline = { b, (uint32_t)len };
65   return lines_eq(a, &bline);
66 }
67 
68 /** Return true iff a begins with the same contents as the nul-terminated
69  * string b. */
70 static int
line_starts_with_str(const cdline_t * a,const char * b)71 line_starts_with_str(const cdline_t *a, const char *b)
72 {
73   const size_t len = strlen(b);
74   tor_assert(len <= UINT32_MAX);
75   return a->len >= len && fast_memeq(a->s, b, len);
76 }
77 
78 /** Return a new cdline_t holding as its contents the nul-terminated
79  * string s. Use the provided memory area for storage. */
80 static cdline_t *
cdline_linecpy(memarea_t * area,const char * s)81 cdline_linecpy(memarea_t *area, const char *s)
82 {
83   size_t len = strlen(s);
84   const char *ss = memarea_memdup(area, s, len);
85   cdline_t *line = memarea_alloc(area, sizeof(cdline_t));
86   line->s = ss;
87   line->len = (uint32_t)len;
88   return line;
89 }
90 
91 /** Add a cdline_t to <b>lst</b> holding as its contents the nul-terminated
92  * string s.  Use the provided memory area for storage. */
93 STATIC void
smartlist_add_linecpy(smartlist_t * lst,memarea_t * area,const char * s)94 smartlist_add_linecpy(smartlist_t *lst, memarea_t *area, const char *s)
95 {
96   smartlist_add(lst, cdline_linecpy(area, s));
97 }
98 
99 /** Compute the digest of <b>cons</b>, and store the result in
100  * <b>digest_out</b>. Return 0 on success, -1 on failure. */
101 /* This is a separate, mockable function so that we can override it when
102  * fuzzing. */
103 MOCK_IMPL(STATIC int,
104 consensus_compute_digest,(const char *cons, size_t len,
105                           consensus_digest_t *digest_out))
106 {
107   int r = crypto_digest256((char*)digest_out->sha3_256,
108                            cons, len, DIGEST_SHA3_256);
109   return r;
110 }
111 
112 /** Compute the digest-as-signed of <b>cons</b>, and store the result in
113  * <b>digest_out</b>. Return 0 on success, -1 on failure. */
114 /* This is a separate, mockable function so that we can override it when
115  * fuzzing. */
116 MOCK_IMPL(STATIC int,
117 consensus_compute_digest_as_signed,(const char *cons, size_t len,
118                                     consensus_digest_t *digest_out))
119 {
120   return router_get_networkstatus_v3_sha3_as_signed(digest_out->sha3_256,
121                                                     cons, len);
122 }
123 
124 /** Return true iff <b>d1</b> and <b>d2</b> contain the same digest */
125 /* This is a separate, mockable function so that we can override it when
126  * fuzzing. */
127 MOCK_IMPL(STATIC int,
128 consensus_digest_eq,(const uint8_t *d1,
129                      const uint8_t *d2))
130 {
131   return fast_memeq(d1, d2, DIGEST256_LEN);
132 }
133 
134 /** Create (allocate) a new slice from a smartlist. Assumes that the start
135  * and the end indexes are within the bounds of the initial smartlist. The end
136  * element is not part of the resulting slice. If end is -1, the slice is to
137  * reach the end of the smartlist.
138  */
139 STATIC smartlist_slice_t *
smartlist_slice(const smartlist_t * list,int start,int end)140 smartlist_slice(const smartlist_t *list, int start, int end)
141 {
142   int list_len = smartlist_len(list);
143   tor_assert(start >= 0);
144   tor_assert(start <= list_len);
145   if (end == -1) {
146     end = list_len;
147   }
148   tor_assert(start <= end);
149 
150   smartlist_slice_t *slice = tor_malloc(sizeof(smartlist_slice_t));
151   slice->list = list;
152   slice->offset = start;
153   slice->len = end - start;
154   return slice;
155 }
156 
157 /** Helper: Compute the longest common subsequence lengths for the two slices.
158  * Used as part of the diff generation to find the column at which to split
159  * slice2 while still having the optimal solution.
160  * If direction is -1, the navigation is reversed. Otherwise it must be 1.
161  * The length of the resulting integer array is that of the second slice plus
162  * one.
163  */
164 STATIC int *
lcs_lengths(const smartlist_slice_t * slice1,const smartlist_slice_t * slice2,int direction)165 lcs_lengths(const smartlist_slice_t *slice1, const smartlist_slice_t *slice2,
166             int direction)
167 {
168   size_t a_size = sizeof(int) * (slice2->len+1);
169 
170   /* Resulting lcs lengths. */
171   int *result = tor_malloc_zero(a_size);
172   /* Copy of the lcs lengths from the last iteration. */
173   int *prev = tor_malloc(a_size);
174 
175   tor_assert(direction == 1 || direction == -1);
176 
177   int si = slice1->offset;
178   if (direction == -1) {
179     si += (slice1->len-1);
180   }
181 
182   for (int i = 0; i < slice1->len; ++i, si+=direction) {
183 
184     const cdline_t *line1 = smartlist_get(slice1->list, si);
185     /* Store the last results. */
186     memcpy(prev, result, a_size);
187 
188     int sj = slice2->offset;
189     if (direction == -1) {
190       sj += (slice2->len-1);
191     }
192 
193     for (int j = 0; j < slice2->len; ++j, sj+=direction) {
194 
195       const cdline_t *line2 = smartlist_get(slice2->list, sj);
196       if (lines_eq(line1, line2)) {
197         /* If the lines are equal, the lcs is one line longer. */
198         result[j + 1] = prev[j] + 1;
199       } else {
200         /* If not, see what lcs parent path is longer. */
201         result[j + 1] = MAX(result[j], prev[j + 1]);
202       }
203     }
204   }
205   tor_free(prev);
206   return result;
207 }
208 
209 /** Helper: Trim any number of lines that are equally at the start or the end
210  * of both slices.
211  */
212 STATIC void
trim_slices(smartlist_slice_t * slice1,smartlist_slice_t * slice2)213 trim_slices(smartlist_slice_t *slice1, smartlist_slice_t *slice2)
214 {
215   while (slice1->len>0 && slice2->len>0) {
216     const cdline_t *line1 = smartlist_get(slice1->list, slice1->offset);
217     const cdline_t *line2 = smartlist_get(slice2->list, slice2->offset);
218     if (!lines_eq(line1, line2)) {
219       break;
220     }
221     slice1->offset++; slice1->len--;
222     slice2->offset++; slice2->len--;
223   }
224 
225   int i1 = (slice1->offset+slice1->len)-1;
226   int i2 = (slice2->offset+slice2->len)-1;
227 
228   while (slice1->len>0 && slice2->len>0) {
229     const cdline_t *line1 = smartlist_get(slice1->list, i1);
230     const cdline_t *line2 = smartlist_get(slice2->list, i2);
231     if (!lines_eq(line1, line2)) {
232       break;
233     }
234     i1--;
235     slice1->len--;
236     i2--;
237     slice2->len--;
238   }
239 }
240 
241 /** Like smartlist_string_pos, but uses a cdline_t, and is restricted to the
242  * bounds of the slice.
243  */
244 STATIC int
smartlist_slice_string_pos(const smartlist_slice_t * slice,const cdline_t * string)245 smartlist_slice_string_pos(const smartlist_slice_t *slice,
246                            const cdline_t *string)
247 {
248   int end = slice->offset + slice->len;
249   for (int i = slice->offset; i < end; ++i) {
250     const cdline_t *el = smartlist_get(slice->list, i);
251     if (lines_eq(el, string)) {
252       return i;
253     }
254   }
255   return -1;
256 }
257 
258 /** Helper: Set all the appropriate changed booleans to true. The first slice
259  * must be of length 0 or 1. All the lines of slice1 and slice2 which are not
260  * present in the other slice will be set to changed in their bool array.
261  * The two changed bool arrays are passed in the same order as the slices.
262  */
263 STATIC void
set_changed(bitarray_t * changed1,bitarray_t * changed2,const smartlist_slice_t * slice1,const smartlist_slice_t * slice2)264 set_changed(bitarray_t *changed1, bitarray_t *changed2,
265             const smartlist_slice_t *slice1, const smartlist_slice_t *slice2)
266 {
267   int toskip = -1;
268   tor_assert(slice1->len == 0 || slice1->len == 1);
269 
270   if (slice1->len == 1) {
271     const cdline_t *line_common = smartlist_get(slice1->list, slice1->offset);
272     toskip = smartlist_slice_string_pos(slice2, line_common);
273     if (toskip == -1) {
274       bitarray_set(changed1, slice1->offset);
275     }
276   }
277   int end = slice2->offset + slice2->len;
278   for (int i = slice2->offset; i < end; ++i) {
279     if (i != toskip) {
280       bitarray_set(changed2, i);
281     }
282   }
283 }
284 
285 /*
286  * Helper: Given that slice1 has been split by half into top and bot, we want
287  * to fetch the column at which to split slice2 so that we are still on track
288  * to the optimal diff solution, i.e. the shortest one. We use lcs_lengths
289  * since the shortest diff is just another way to say the longest common
290  * subsequence.
291  */
292 static int
optimal_column_to_split(const smartlist_slice_t * top,const smartlist_slice_t * bot,const smartlist_slice_t * slice2)293 optimal_column_to_split(const smartlist_slice_t *top,
294                         const smartlist_slice_t *bot,
295                         const smartlist_slice_t *slice2)
296 {
297   int *lens_top = lcs_lengths(top, slice2, 1);
298   int *lens_bot = lcs_lengths(bot, slice2, -1);
299   int column=0, max_sum=-1;
300 
301   for (int i = 0; i < slice2->len+1; ++i) {
302     int sum = lens_top[i] + lens_bot[slice2->len-i];
303     if (sum > max_sum) {
304       column = i;
305       max_sum = sum;
306     }
307   }
308   tor_free(lens_top);
309   tor_free(lens_bot);
310 
311   return column;
312 }
313 
314 /**
315  * Helper: Figure out what elements are new or gone on the second smartlist
316  * relative to the first smartlist, and store the booleans in the bitarrays.
317  * True on the first bitarray means the element is gone, true on the second
318  * bitarray means it's new.
319  *
320  * In its base case, either of the smartlists is of length <= 1 and we can
321  * quickly see what elements are new or are gone. In the other case, we will
322  * split one smartlist by half and we'll use optimal_column_to_split to find
323  * the optimal column at which to split the second smartlist so that we are
324  * finding the smallest diff possible.
325  */
326 STATIC void
calc_changes(smartlist_slice_t * slice1,smartlist_slice_t * slice2,bitarray_t * changed1,bitarray_t * changed2)327 calc_changes(smartlist_slice_t *slice1,
328              smartlist_slice_t *slice2,
329              bitarray_t *changed1, bitarray_t *changed2)
330 {
331   trim_slices(slice1, slice2);
332 
333   if (slice1->len <= 1) {
334     set_changed(changed1, changed2, slice1, slice2);
335 
336   } else if (slice2->len <= 1) {
337     set_changed(changed2, changed1, slice2, slice1);
338 
339   /* Keep on splitting the slices in two. */
340   } else {
341     smartlist_slice_t *top, *bot, *left, *right;
342 
343     /* Split the first slice in half. */
344     int mid = slice1->len/2;
345     top = smartlist_slice(slice1->list, slice1->offset, slice1->offset+mid);
346     bot = smartlist_slice(slice1->list, slice1->offset+mid,
347         slice1->offset+slice1->len);
348 
349     /* Split the second slice by the optimal column. */
350     int mid2 = optimal_column_to_split(top, bot, slice2);
351     left = smartlist_slice(slice2->list, slice2->offset, slice2->offset+mid2);
352     right = smartlist_slice(slice2->list, slice2->offset+mid2,
353         slice2->offset+slice2->len);
354 
355     calc_changes(top, left, changed1, changed2);
356     calc_changes(bot, right, changed1, changed2);
357     tor_free(top);
358     tor_free(bot);
359     tor_free(left);
360     tor_free(right);
361   }
362 }
363 
364 /* This table is from crypto.c. The SP and PAD defines are different. */
365 #define NOT_VALID_BASE64 255
366 #define X NOT_VALID_BASE64
367 #define SP NOT_VALID_BASE64
368 #define PAD NOT_VALID_BASE64
369 static const uint8_t base64_compare_table[256] = {
370   X, X, X, X, X, X, X, X, X, SP, SP, SP, X, SP, X, X,
371   X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
372   SP, X, X, X, X, X, X, X, X, X, X, 62, X, X, X, 63,
373   52, 53, 54, 55, 56, 57, 58, 59, 60, 61, X, X, X, PAD, X, X,
374   X, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
375   15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, X, X, X, X, X,
376   X, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
377   41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, X, X, X, X, X,
378   X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
379   X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
380   X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
381   X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
382   X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
383   X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
384   X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
385   X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
386 };
387 
388 /** Helper: Get the identity hash from a router line, assuming that the line
389  * at least appears to be a router line and thus starts with "r ".
390  *
391  * If an identity hash is found, store it (without decoding it) in
392  * <b>hash_out</b>, and return 0.  On failure, return -1.
393  */
394 STATIC int
get_id_hash(const cdline_t * line,cdline_t * hash_out)395 get_id_hash(const cdline_t *line, cdline_t *hash_out)
396 {
397   if (line->len < 2)
398     return -1;
399 
400   /* Skip the router name. */
401   const char *hash = memchr(line->s + 2, ' ', line->len - 2);
402   if (!hash) {
403     return -1;
404   }
405 
406   hash++;
407   const char *hash_end = hash;
408   /* Stop when the first non-base64 character is found. Use unsigned chars to
409    * avoid negative indexes causing crashes.
410    */
411   while (base64_compare_table[*((unsigned char*)hash_end)]
412            != NOT_VALID_BASE64 &&
413          hash_end < line->s + line->len) {
414     hash_end++;
415   }
416 
417   /* Empty hash. */
418   if (hash_end == hash) {
419     return -1;
420   }
421 
422   hash_out->s = hash;
423   /* Always true because lines are limited to this length */
424   tor_assert(hash_end >= hash);
425   tor_assert((size_t)(hash_end - hash) <= UINT32_MAX);
426   hash_out->len = (uint32_t)(hash_end - hash);
427 
428   return 0;
429 }
430 
431 /** Helper: Check that a line is a valid router entry. We must at least be
432  * able to fetch a proper identity hash from it for it to be valid.
433  */
434 STATIC int
is_valid_router_entry(const cdline_t * line)435 is_valid_router_entry(const cdline_t *line)
436 {
437   if (line->len < 2 || fast_memneq(line->s, "r ", 2))
438     return 0;
439   cdline_t tmp;
440   return (get_id_hash(line, &tmp) == 0);
441 }
442 
443 /** Helper: Find the next router line starting at the current position.
444  * Assumes that cur is lower than the length of the smartlist, i.e. it is a
445  * line within the bounds of the consensus. The only exception is when we
446  * don't want to skip the first line, in which case cur will be -1.
447  */
448 STATIC int
next_router(const smartlist_t * cons,int cur)449 next_router(const smartlist_t *cons, int cur)
450 {
451   int len = smartlist_len(cons);
452   tor_assert(cur >= -1 && cur < len);
453 
454   if (++cur >= len) {
455     return len;
456   }
457 
458   const cdline_t *line = smartlist_get(cons, cur);
459   while (!is_valid_router_entry(line)) {
460     if (++cur >= len) {
461       return len;
462     }
463     line = smartlist_get(cons, cur);
464   }
465   return cur;
466 }
467 
468 /** Helper: compare two base64-encoded identity hashes, which may be of
469  * different lengths. Comparison ends when the first non-base64 char is found.
470  */
471 STATIC int
base64cmp(const cdline_t * hash1,const cdline_t * hash2)472 base64cmp(const cdline_t *hash1, const cdline_t *hash2)
473 {
474   /* NULL is always lower, useful for last_hash which starts at NULL. */
475   if (!hash1->s && !hash2->s) {
476     return 0;
477   }
478   if (!hash1->s) {
479     return -1;
480   }
481   if (!hash2->s) {
482     return 1;
483   }
484 
485   /* Don't index with a char; char may be signed. */
486   const unsigned char *a = (unsigned char*)hash1->s;
487   const unsigned char *b = (unsigned char*)hash2->s;
488   const unsigned char *a_end = a + hash1->len;
489   const unsigned char *b_end = b + hash2->len;
490   while (1) {
491     uint8_t av = base64_compare_table[*a];
492     uint8_t bv = base64_compare_table[*b];
493     if (av == NOT_VALID_BASE64) {
494       if (bv == NOT_VALID_BASE64) {
495         /* Both ended with exactly the same characters. */
496         return 0;
497       } else {
498         /* hash2 goes on longer than hash1 and thus hash1 is lower. */
499         return -1;
500       }
501     } else if (bv == NOT_VALID_BASE64) {
502       /* hash1 goes on longer than hash2 and thus hash1 is greater. */
503       return 1;
504     } else if (av < bv) {
505       /* The first difference shows that hash1 is lower. */
506       return -1;
507     } else if (av > bv) {
508       /* The first difference shows that hash1 is greater. */
509       return 1;
510     } else {
511       a++;
512       b++;
513       if (a == a_end) {
514         if (b == b_end) {
515           return 0;
516         } else {
517           return -1;
518         }
519       } else if (b == b_end) {
520         return 1;
521       }
522     }
523   }
524 }
525 
526 /** Structure used to remember the previous and current identity hash of
527  * the "r " lines in a consensus, to enforce well-ordering. */
528 typedef struct router_id_iterator_t {
529   cdline_t last_hash;
530   cdline_t hash;
531 } router_id_iterator_t;
532 
533 #ifndef COCCI
534 /**
535  * Initializer for a router_id_iterator_t.
536  */
537 #define ROUTER_ID_ITERATOR_INIT { { NULL, 0 }, { NULL, 0 } }
538 #endif /* !defined(COCCI) */
539 
540 /** Given an index *<b>idxp</b> into the consensus at <b>cons</b>, advance
541  * the index to the next router line ("r ...") in the consensus, or to
542  * an index one after the end of the list if there is no such line.
543  *
544  * Use <b>iter</b> to record the hash of the found router line, if any,
545  * and to enforce ordering on the hashes.  If the hashes are mis-ordered,
546  * return -1.  Else, return 0.
547  **/
548 static int
find_next_router_line(const smartlist_t * cons,const char * consname,int * idxp,router_id_iterator_t * iter)549 find_next_router_line(const smartlist_t *cons,
550                       const char *consname,
551                       int *idxp,
552                       router_id_iterator_t *iter)
553 {
554   *idxp = next_router(cons, *idxp);
555   if (*idxp < smartlist_len(cons)) {
556     memcpy(&iter->last_hash, &iter->hash, sizeof(cdline_t));
557     if (get_id_hash(smartlist_get(cons, *idxp), &iter->hash) < 0 ||
558         base64cmp(&iter->hash, &iter->last_hash) <= 0) {
559       log_warn(LD_CONSDIFF, "Refusing to generate consensus diff because "
560                "the %s consensus doesn't have its router entries sorted "
561                "properly.", consname);
562       return -1;
563     }
564   }
565   return 0;
566 }
567 
568 /** Line-prefix indicating the beginning of the signatures section that we
569  * intend to delete. */
570 #define START_OF_SIGNATURES_SECTION "directory-signature "
571 
572 /** Pre-process a consensus in <b>cons</b> (represented as a list of cdline_t)
573  * to remove the signatures from it.  If the footer is removed, return a
574  * cdline_t containing a delete command to delete the footer, allocated in
575  * <b>area</b>.  If no footer is removed, return NULL.
576  *
577  * We remove the signatures here because they are not themselves signed, and
578  * as such there might be different encodings for them.
579  */
580 static cdline_t *
preprocess_consensus(memarea_t * area,smartlist_t * cons)581 preprocess_consensus(memarea_t *area,
582                      smartlist_t *cons)
583 {
584   int idx;
585   int dirsig_idx = -1;
586   for (idx = 0; idx < smartlist_len(cons); ++idx) {
587     cdline_t *line = smartlist_get(cons, idx);
588     if (line_starts_with_str(line, START_OF_SIGNATURES_SECTION)) {
589       dirsig_idx = idx;
590       break;
591     }
592   }
593   if (dirsig_idx >= 0) {
594     char buf[64];
595     while (smartlist_len(cons) > dirsig_idx)
596       smartlist_del(cons, dirsig_idx);
597     tor_snprintf(buf, sizeof(buf), "%d,$d", dirsig_idx+1);
598     return cdline_linecpy(area, buf);
599   } else {
600     return NULL;
601   }
602 }
603 
604 /** Generate an ed diff as a smartlist from two consensuses, also given as
605  * smartlists. Will return NULL if the diff could not be generated, which can
606  * happen if any lines the script had to add matched "." or if the routers
607  * were not properly ordered.
608  *
609  * All cdline_t objects in the resulting object are either references to lines
610  * in one of the inputs, or are newly allocated lines in the provided memarea.
611  *
612  * This implementation is consensus-specific. To generate an ed diff for any
613  * given input in quadratic time, you can replace all the code until the
614  * navigation in reverse order with the following:
615  *
616  *   int len1 = smartlist_len(cons1);
617  *   int len2 = smartlist_len(cons2);
618  *   bitarray_t *changed1 = bitarray_init_zero(len1);
619  *   bitarray_t *changed2 = bitarray_init_zero(len2);
620  *   cons1_sl = smartlist_slice(cons1, 0, -1);
621  *   cons2_sl = smartlist_slice(cons2, 0, -1);
622  *   calc_changes(cons1_sl, cons2_sl, changed1, changed2);
623  */
624 STATIC smartlist_t *
gen_ed_diff(const smartlist_t * cons1_orig,const smartlist_t * cons2,memarea_t * area)625 gen_ed_diff(const smartlist_t *cons1_orig, const smartlist_t *cons2,
626             memarea_t *area)
627 {
628   smartlist_t *cons1 = smartlist_new();
629   smartlist_add_all(cons1, cons1_orig);
630   cdline_t *remove_trailer = preprocess_consensus(area, cons1);
631 
632   int len1 = smartlist_len(cons1);
633   int len2 = smartlist_len(cons2);
634   smartlist_t *result = smartlist_new();
635 
636   if (remove_trailer) {
637     /* There's a delete-the-trailer line at the end, so add it here. */
638     smartlist_add(result, remove_trailer);
639   }
640 
641   /* Initialize the changed bitarrays to zero, so that calc_changes only needs
642    * to set the ones that matter and leave the rest untouched.
643    */
644   bitarray_t *changed1 = bitarray_init_zero(len1);
645   bitarray_t *changed2 = bitarray_init_zero(len2);
646   int i1=-1, i2=-1;
647   int start1=0, start2=0;
648 
649   /* To check that hashes are ordered properly */
650   router_id_iterator_t iter1 = ROUTER_ID_ITERATOR_INIT;
651   router_id_iterator_t iter2 = ROUTER_ID_ITERATOR_INIT;
652 
653   /* i1 and i2 are initialized at the first line of each consensus. They never
654    * reach past len1 and len2 respectively, since next_router doesn't let that
655    * happen. i1 and i2 are advanced by at least one line at each iteration as
656    * long as they have not yet reached len1 and len2, so the loop is
657    * guaranteed to end, and each pair of (i1,i2) will be inspected at most
658    * once.
659    */
660   while (i1 < len1 || i2 < len2) {
661 
662     /* Advance each of the two navigation positions by one router entry if not
663      * yet at the end.
664      */
665     if (i1 < len1) {
666       if (find_next_router_line(cons1, "base", &i1, &iter1) < 0) {
667         goto error_cleanup;
668       }
669     }
670 
671     if (i2 < len2) {
672       if (find_next_router_line(cons2, "target", &i2, &iter2) < 0) {
673         goto error_cleanup;
674       }
675     }
676 
677     /* If we have reached the end of both consensuses, there is no need to
678      * compare hashes anymore, since this is the last iteration.
679      */
680     if (i1 < len1 || i2 < len2) {
681 
682       /* Keep on advancing the lower (by identity hash sorting) position until
683        * we have two matching positions. The only other possible outcome is
684        * that a lower position reaches the end of the consensus before it can
685        * reach a hash that is no longer the lower one. Since there will always
686        * be a lower hash for as long as the loop runs, one of the two indexes
687        * will always be incremented, thus assuring that the loop must end
688        * after a finite number of iterations. If that cannot be because said
689        * consensus has already reached the end, both are extended to their
690        * respecting ends since we are done.
691        */
692       int cmp = base64cmp(&iter1.hash, &iter2.hash);
693       while (cmp != 0) {
694         if (i1 < len1 && cmp < 0) {
695           if (find_next_router_line(cons1, "base", &i1, &iter1) < 0) {
696             goto error_cleanup;
697           }
698           if (i1 == len1) {
699             /* We finished the first consensus, so grab all the remaining
700              * lines of the second consensus and finish up.
701              */
702             i2 = len2;
703             break;
704           }
705         } else if (i2 < len2 && cmp > 0) {
706           if (find_next_router_line(cons2, "target", &i2, &iter2) < 0) {
707             goto error_cleanup;
708           }
709           if (i2 == len2) {
710             /* We finished the second consensus, so grab all the remaining
711              * lines of the first consensus and finish up.
712              */
713             i1 = len1;
714             break;
715           }
716         } else {
717           i1 = len1;
718           i2 = len2;
719           break;
720         }
721         cmp = base64cmp(&iter1.hash, &iter2.hash);
722       }
723     }
724 
725     /* Make slices out of these chunks (up to the common router entry) and
726      * calculate the changes for them.
727      * Error if any of the two slices are longer than 10K lines. That should
728      * never happen with any pair of real consensuses. Feeding more than 10K
729      * lines to calc_changes would be very slow anyway.
730      */
731 #define MAX_LINE_COUNT (10000)
732     if (i1-start1 > MAX_LINE_COUNT || i2-start2 > MAX_LINE_COUNT) {
733       log_warn(LD_CONSDIFF, "Refusing to generate consensus diff because "
734           "we found too few common router ids.");
735       goto error_cleanup;
736     }
737 
738     smartlist_slice_t *cons1_sl = smartlist_slice(cons1, start1, i1);
739     smartlist_slice_t *cons2_sl = smartlist_slice(cons2, start2, i2);
740     calc_changes(cons1_sl, cons2_sl, changed1, changed2);
741     tor_free(cons1_sl);
742     tor_free(cons2_sl);
743     start1 = i1, start2 = i2;
744   }
745 
746   /* Navigate the changes in reverse order and generate one ed command for
747    * each chunk of changes.
748    */
749   i1=len1-1, i2=len2-1;
750   char buf[128];
751   while (i1 >= 0 || i2 >= 0) {
752 
753     int start1x, start2x, end1, end2, added, deleted;
754 
755     /* We are at a point were no changed bools are true, so just keep going. */
756     if (!(i1 >= 0 && bitarray_is_set(changed1, i1)) &&
757         !(i2 >= 0 && bitarray_is_set(changed2, i2))) {
758       if (i1 >= 0) {
759         i1--;
760       }
761       if (i2 >= 0) {
762         i2--;
763       }
764       continue;
765     }
766 
767     end1 = i1, end2 = i2;
768 
769     /* Grab all contiguous changed lines */
770     while (i1 >= 0 && bitarray_is_set(changed1, i1)) {
771       i1--;
772     }
773     while (i2 >= 0 && bitarray_is_set(changed2, i2)) {
774       i2--;
775     }
776 
777     start1x = i1+1, start2x = i2+1;
778     added = end2-i2, deleted = end1-i1;
779 
780     if (added == 0) {
781       if (deleted == 1) {
782         tor_snprintf(buf, sizeof(buf), "%id", start1x+1);
783         smartlist_add_linecpy(result, area, buf);
784       } else {
785         tor_snprintf(buf, sizeof(buf), "%i,%id", start1x+1, start1x+deleted);
786         smartlist_add_linecpy(result, area, buf);
787       }
788     } else {
789       int i;
790       if (deleted == 0) {
791         tor_snprintf(buf, sizeof(buf), "%ia", start1x);
792         smartlist_add_linecpy(result, area, buf);
793       } else if (deleted == 1) {
794         tor_snprintf(buf, sizeof(buf), "%ic", start1x+1);
795         smartlist_add_linecpy(result, area, buf);
796       } else {
797         tor_snprintf(buf, sizeof(buf), "%i,%ic", start1x+1, start1x+deleted);
798         smartlist_add_linecpy(result, area, buf);
799       }
800 
801       for (i = start2x; i <= end2; ++i) {
802         cdline_t *line = smartlist_get(cons2, i);
803         if (line_str_eq(line, ".")) {
804           log_warn(LD_CONSDIFF, "Cannot generate consensus diff because "
805               "one of the lines to be added is \".\".");
806           goto error_cleanup;
807         }
808         smartlist_add(result, line);
809       }
810       smartlist_add_linecpy(result, area, ".");
811     }
812   }
813 
814   smartlist_free(cons1);
815   bitarray_free(changed1);
816   bitarray_free(changed2);
817 
818   return result;
819 
820  error_cleanup:
821 
822   smartlist_free(cons1);
823   bitarray_free(changed1);
824   bitarray_free(changed2);
825 
826   smartlist_free(result);
827 
828   return NULL;
829 }
830 
831 /* Helper: Read a base-10 number between 0 and INT32_MAX from <b>s</b> and
832  * store it in <b>num_out</b>.  Advance <b>s</b> to the character immediately
833  * after the number.  Return 0 on success, -1 on failure. */
834 static int
get_linenum(const char ** s,int * num_out)835 get_linenum(const char **s, int *num_out)
836 {
837   int ok;
838   char *next;
839   if (!TOR_ISDIGIT(**s)) {
840     return -1;
841   }
842   *num_out = (int) tor_parse_long(*s, 10, 0, INT32_MAX, &ok, &next);
843   if (ok && next) {
844     *s = next;
845     return 0;
846   } else {
847     return -1;
848   }
849 }
850 
851 /** Apply the ed diff, starting at <b>diff_starting_line</b>, to the consensus
852  * and return a new consensus, also as a line-based smartlist. Will return
853  * NULL if the ed diff is not properly formatted.
854  *
855  * All cdline_t objects in the resulting object are references to lines
856  * in one of the inputs; nothing is copied.
857  */
858 STATIC smartlist_t *
apply_ed_diff(const smartlist_t * cons1,const smartlist_t * diff,int diff_starting_line)859 apply_ed_diff(const smartlist_t *cons1, const smartlist_t *diff,
860               int diff_starting_line)
861 {
862   int diff_len = smartlist_len(diff);
863   int j = smartlist_len(cons1);
864   smartlist_t *cons2 = smartlist_new();
865 
866   for (int i=diff_starting_line; i<diff_len; ++i) {
867     const cdline_t *diff_cdline = smartlist_get(diff, i);
868     char diff_line[128];
869 
870     if (diff_cdline->len > sizeof(diff_line) - 1) {
871       log_warn(LD_CONSDIFF, "Could not apply consensus diff because "
872                "an ed command was far too long");
873       goto error_cleanup;
874     }
875     /* Copy the line to make it nul-terminated. */
876     memcpy(diff_line, diff_cdline->s, diff_cdline->len);
877     diff_line[diff_cdline->len] = 0;
878     const char *ptr = diff_line;
879     int start = 0, end = 0;
880     int had_range = 0;
881     int end_was_eof = 0;
882     if (get_linenum(&ptr, &start) < 0) {
883       log_warn(LD_CONSDIFF, "Could not apply consensus diff because "
884                "an ed command was missing a line number.");
885       goto error_cleanup;
886     }
887     if (*ptr == ',') {
888       /* Two-item range */
889       had_range = 1;
890       ++ptr;
891       if (*ptr == '$') {
892         end_was_eof = 1;
893         end = smartlist_len(cons1);
894         ++ptr;
895       } else if (get_linenum(&ptr, &end) < 0) {
896         log_warn(LD_CONSDIFF, "Could not apply consensus diff because "
897                  "an ed command was missing a range end line number.");
898         goto error_cleanup;
899       }
900       /* Incoherent range. */
901       if (end <= start) {
902         log_warn(LD_CONSDIFF, "Could not apply consensus diff because "
903                  "an invalid range was found in an ed command.");
904         goto error_cleanup;
905       }
906     } else {
907       /* We'll take <n1> as <n1>,<n1> for simplicity. */
908       end = start;
909     }
910 
911     if (end > j) {
912       log_warn(LD_CONSDIFF, "Could not apply consensus diff because "
913           "its commands are not properly sorted in reverse order.");
914       goto error_cleanup;
915     }
916 
917     if (*ptr == '\0') {
918       log_warn(LD_CONSDIFF, "Could not apply consensus diff because "
919                "a line with no ed command was found");
920       goto error_cleanup;
921     }
922 
923     if (*(ptr+1) != '\0') {
924       log_warn(LD_CONSDIFF, "Could not apply consensus diff because "
925           "an ed command longer than one char was found.");
926       goto error_cleanup;
927     }
928 
929     char action = *ptr;
930 
931     switch (action) {
932       case 'a':
933       case 'c':
934       case 'd':
935         break;
936       default:
937         log_warn(LD_CONSDIFF, "Could not apply consensus diff because "
938             "an unrecognised ed command was found.");
939         goto error_cleanup;
940     }
941 
942     /** $ is not allowed with non-d actions. */
943     if (end_was_eof && action != 'd') {
944       log_warn(LD_CONSDIFF, "Could not apply consensus diff because "
945                "it wanted to use $ with a command other than delete");
946       goto error_cleanup;
947     }
948 
949     /* 'a' commands are not allowed to have ranges. */
950     if (had_range && action == 'a') {
951       log_warn(LD_CONSDIFF, "Could not apply consensus diff because "
952           "it wanted to add lines after a range.");
953       goto error_cleanup;
954     }
955 
956     /* Add unchanged lines. */
957     for (; j && j > end; --j) {
958       cdline_t *cons_line = smartlist_get(cons1, j-1);
959       smartlist_add(cons2, cons_line);
960     }
961 
962     /* Ignore removed lines. */
963     if (action == 'c' || action == 'd') {
964       while (--j >= start) {
965         /* Skip line */
966       }
967     }
968 
969     /* Add new lines in reverse order, since it will all be reversed at the
970      * end.
971      */
972     if (action == 'a' || action == 'c') {
973       int added_end = i;
974 
975       i++; /* Skip the line with the range and command. */
976       while (i < diff_len) {
977         if (line_str_eq(smartlist_get(diff, i), ".")) {
978           break;
979         }
980         if (++i == diff_len) {
981           log_warn(LD_CONSDIFF, "Could not apply consensus diff because "
982               "it has lines to be inserted that don't end with a \".\".");
983           goto error_cleanup;
984         }
985       }
986 
987       int added_i = i-1;
988 
989       /* It would make no sense to add zero new lines. */
990       if (added_i == added_end) {
991         log_warn(LD_CONSDIFF, "Could not apply consensus diff because "
992             "it has an ed command that tries to insert zero lines.");
993         goto error_cleanup;
994       }
995 
996       while (added_i > added_end) {
997         cdline_t *added_line = smartlist_get(diff, added_i--);
998         smartlist_add(cons2, added_line);
999       }
1000     }
1001   }
1002 
1003   /* Add remaining unchanged lines. */
1004   for (; j > 0; --j) {
1005     cdline_t *cons_line = smartlist_get(cons1, j-1);
1006     smartlist_add(cons2, cons_line);
1007   }
1008 
1009   /* Reverse the whole thing since we did it from the end. */
1010   smartlist_reverse(cons2);
1011   return cons2;
1012 
1013  error_cleanup:
1014 
1015   smartlist_free(cons2);
1016 
1017   return NULL;
1018 }
1019 
1020 /** Generate a consensus diff as a smartlist from two given consensuses, also
1021  * as smartlists. Will return NULL if the consensus diff could not be
1022  * generated. Neither of the two consensuses are modified in any way, so it's
1023  * up to the caller to free their resources.
1024  */
1025 smartlist_t *
consdiff_gen_diff(const smartlist_t * cons1,const smartlist_t * cons2,const consensus_digest_t * digests1,const consensus_digest_t * digests2,memarea_t * area)1026 consdiff_gen_diff(const smartlist_t *cons1,
1027                   const smartlist_t *cons2,
1028                   const consensus_digest_t *digests1,
1029                   const consensus_digest_t *digests2,
1030                   memarea_t *area)
1031 {
1032   smartlist_t *ed_diff = gen_ed_diff(cons1, cons2, area);
1033   /* ed diff could not be generated - reason already logged by gen_ed_diff. */
1034   if (!ed_diff) {
1035     goto error_cleanup;
1036   }
1037 
1038   /* See that the script actually produces what we want. */
1039   smartlist_t *ed_cons2 = apply_ed_diff(cons1, ed_diff, 0);
1040   if (!ed_cons2) {
1041     /* LCOV_EXCL_START -- impossible if diff generation is correct */
1042     log_warn(LD_BUG|LD_CONSDIFF, "Refusing to generate consensus diff because "
1043         "the generated ed diff could not be tested to successfully generate "
1044         "the target consensus.");
1045     goto error_cleanup;
1046     /* LCOV_EXCL_STOP */
1047   }
1048 
1049   int cons2_eq = 1;
1050   if (smartlist_len(cons2) == smartlist_len(ed_cons2)) {
1051     SMARTLIST_FOREACH_BEGIN(cons2, const cdline_t *, line1) {
1052       const cdline_t *line2 = smartlist_get(ed_cons2, line1_sl_idx);
1053       if (!lines_eq(line1, line2)) {
1054         cons2_eq = 0;
1055         break;
1056       }
1057     } SMARTLIST_FOREACH_END(line1);
1058   } else {
1059     cons2_eq = 0;
1060   }
1061   smartlist_free(ed_cons2);
1062   if (!cons2_eq) {
1063     /* LCOV_EXCL_START -- impossible if diff generation is correct. */
1064     log_warn(LD_BUG|LD_CONSDIFF, "Refusing to generate consensus diff because "
1065         "the generated ed diff did not generate the target consensus "
1066         "successfully when tested.");
1067     goto error_cleanup;
1068     /* LCOV_EXCL_STOP */
1069   }
1070 
1071   char cons1_hash_hex[HEX_DIGEST256_LEN+1];
1072   char cons2_hash_hex[HEX_DIGEST256_LEN+1];
1073   base16_encode(cons1_hash_hex, HEX_DIGEST256_LEN+1,
1074                 (const char*)digests1->sha3_256, DIGEST256_LEN);
1075   base16_encode(cons2_hash_hex, HEX_DIGEST256_LEN+1,
1076                 (const char*)digests2->sha3_256, DIGEST256_LEN);
1077 
1078   /* Create the resulting consensus diff. */
1079   char buf[160];
1080   smartlist_t *result = smartlist_new();
1081   tor_snprintf(buf, sizeof(buf), "%s", ns_diff_version);
1082   smartlist_add_linecpy(result, area, buf);
1083   tor_snprintf(buf, sizeof(buf), "%s %s %s", hash_token,
1084       cons1_hash_hex, cons2_hash_hex);
1085   smartlist_add_linecpy(result, area, buf);
1086   smartlist_add_all(result, ed_diff);
1087   smartlist_free(ed_diff);
1088   return result;
1089 
1090  error_cleanup:
1091 
1092   if (ed_diff) {
1093     /* LCOV_EXCL_START -- ed_diff is NULL except in unreachable cases above */
1094     smartlist_free(ed_diff);
1095     /* LCOV_EXCL_STOP */
1096   }
1097 
1098   return NULL;
1099 }
1100 
1101 /** Fetch the digest of the base consensus in the consensus diff, encoded in
1102  * base16 as found in the diff itself. digest1_out and digest2_out must be of
1103  * length DIGEST256_LEN or larger if not NULL.
1104  */
1105 int
consdiff_get_digests(const smartlist_t * diff,char * digest1_out,char * digest2_out)1106 consdiff_get_digests(const smartlist_t *diff,
1107                      char *digest1_out,
1108                      char *digest2_out)
1109 {
1110   smartlist_t *hash_words = NULL;
1111   const cdline_t *format;
1112   char cons1_hash[DIGEST256_LEN], cons2_hash[DIGEST256_LEN];
1113   char *cons1_hash_hex, *cons2_hash_hex;
1114   if (smartlist_len(diff) < 2) {
1115     log_info(LD_CONSDIFF, "The provided consensus diff is too short.");
1116     goto error_cleanup;
1117   }
1118 
1119   /* Check that it's the format and version we know. */
1120   format = smartlist_get(diff, 0);
1121   if (!line_str_eq(format, ns_diff_version)) {
1122     log_warn(LD_CONSDIFF, "The provided consensus diff format is not known.");
1123     goto error_cleanup;
1124   }
1125 
1126   /* Grab the base16 digests. */
1127   hash_words = smartlist_new();
1128   {
1129     const cdline_t *line2 = smartlist_get(diff, 1);
1130     char *h = tor_memdup_nulterm(line2->s, line2->len);
1131     smartlist_split_string(hash_words, h, " ", 0, 0);
1132     tor_free(h);
1133   }
1134 
1135   /* There have to be three words, the first of which must be hash_token. */
1136   if (smartlist_len(hash_words) != 3 ||
1137       strcmp(smartlist_get(hash_words, 0), hash_token)) {
1138     log_info(LD_CONSDIFF, "The provided consensus diff does not include "
1139         "the necessary digests.");
1140     goto error_cleanup;
1141   }
1142 
1143   /* Expected hashes as found in the consensus diff header. They must be of
1144    * length HEX_DIGEST256_LEN, normally 64 hexadecimal characters.
1145    * If any of the decodings fail, error to make sure that the hashes are
1146    * proper base16-encoded digests.
1147    */
1148   cons1_hash_hex = smartlist_get(hash_words, 1);
1149   cons2_hash_hex = smartlist_get(hash_words, 2);
1150   if (strlen(cons1_hash_hex) != HEX_DIGEST256_LEN ||
1151       strlen(cons2_hash_hex) != HEX_DIGEST256_LEN) {
1152     log_info(LD_CONSDIFF, "The provided consensus diff includes "
1153         "base16-encoded digests of incorrect size.");
1154     goto error_cleanup;
1155   }
1156 
1157   if (base16_decode(cons1_hash, DIGEST256_LEN,
1158           cons1_hash_hex, HEX_DIGEST256_LEN) != DIGEST256_LEN ||
1159       base16_decode(cons2_hash, DIGEST256_LEN,
1160           cons2_hash_hex, HEX_DIGEST256_LEN) != DIGEST256_LEN) {
1161     log_info(LD_CONSDIFF, "The provided consensus diff includes "
1162         "malformed digests.");
1163     goto error_cleanup;
1164   }
1165 
1166   if (digest1_out) {
1167     memcpy(digest1_out, cons1_hash, DIGEST256_LEN);
1168   }
1169   if (digest2_out) {
1170     memcpy(digest2_out, cons2_hash, DIGEST256_LEN);
1171   }
1172 
1173   SMARTLIST_FOREACH(hash_words, char *, cp, tor_free(cp));
1174   smartlist_free(hash_words);
1175   return 0;
1176 
1177  error_cleanup:
1178 
1179   if (hash_words) {
1180     SMARTLIST_FOREACH(hash_words, char *, cp, tor_free(cp));
1181     smartlist_free(hash_words);
1182   }
1183   return 1;
1184 }
1185 
1186 /** Apply the consensus diff to the given consensus and return a new
1187  * consensus, also as a line-based smartlist. Will return NULL if the diff
1188  * could not be applied. Neither the consensus nor the diff are modified in
1189  * any way, so it's up to the caller to free their resources.
1190  */
1191 char *
consdiff_apply_diff(const smartlist_t * cons1,const smartlist_t * diff,const consensus_digest_t * digests1)1192 consdiff_apply_diff(const smartlist_t *cons1,
1193                     const smartlist_t *diff,
1194                     const consensus_digest_t *digests1)
1195 {
1196   smartlist_t *cons2 = NULL;
1197   char *cons2_str = NULL;
1198   char e_cons1_hash[DIGEST256_LEN];
1199   char e_cons2_hash[DIGEST256_LEN];
1200 
1201   if (consdiff_get_digests(diff, e_cons1_hash, e_cons2_hash) != 0) {
1202     goto error_cleanup;
1203   }
1204 
1205   /* See that the consensus that was given to us matches its hash. */
1206   if (!consensus_digest_eq(digests1->sha3_256,
1207                            (const uint8_t*)e_cons1_hash)) {
1208     char hex_digest1[HEX_DIGEST256_LEN+1];
1209     char e_hex_digest1[HEX_DIGEST256_LEN+1];
1210     log_warn(LD_CONSDIFF, "Refusing to apply consensus diff because "
1211         "the base consensus doesn't match the digest as found in "
1212         "the consensus diff header.");
1213     base16_encode(hex_digest1, HEX_DIGEST256_LEN+1,
1214                   (const char *)digests1->sha3_256, DIGEST256_LEN);
1215     base16_encode(e_hex_digest1, HEX_DIGEST256_LEN+1,
1216                   e_cons1_hash, DIGEST256_LEN);
1217     log_warn(LD_CONSDIFF, "Expected: %s; found: %s",
1218              hex_digest1, e_hex_digest1);
1219     goto error_cleanup;
1220   }
1221 
1222   /* Grab the ed diff and calculate the resulting consensus. */
1223   /* Skip the first two lines. */
1224   cons2 = apply_ed_diff(cons1, diff, 2);
1225 
1226   /* ed diff could not be applied - reason already logged by apply_ed_diff. */
1227   if (!cons2) {
1228     goto error_cleanup;
1229   }
1230 
1231   cons2_str = consensus_join_lines(cons2);
1232 
1233   consensus_digest_t cons2_digests;
1234   if (consensus_compute_digest(cons2_str, strlen(cons2_str),
1235                                &cons2_digests) < 0) {
1236     /* LCOV_EXCL_START -- digest can't fail */
1237     log_warn(LD_CONSDIFF, "Could not compute digests of the consensus "
1238         "resulting from applying a consensus diff.");
1239     goto error_cleanup;
1240     /* LCOV_EXCL_STOP */
1241   }
1242 
1243   /* See that the resulting consensus matches its hash. */
1244   if (!consensus_digest_eq(cons2_digests.sha3_256,
1245                            (const uint8_t*)e_cons2_hash)) {
1246     log_warn(LD_CONSDIFF, "Refusing to apply consensus diff because "
1247         "the resulting consensus doesn't match the digest as found in "
1248         "the consensus diff header.");
1249     char hex_digest2[HEX_DIGEST256_LEN+1];
1250     char e_hex_digest2[HEX_DIGEST256_LEN+1];
1251     base16_encode(hex_digest2, HEX_DIGEST256_LEN+1,
1252         (const char *)cons2_digests.sha3_256, DIGEST256_LEN);
1253     base16_encode(e_hex_digest2, HEX_DIGEST256_LEN+1,
1254         e_cons2_hash, DIGEST256_LEN);
1255     log_warn(LD_CONSDIFF, "Expected: %s; found: %s",
1256              hex_digest2, e_hex_digest2);
1257     goto error_cleanup;
1258   }
1259 
1260   goto done;
1261 
1262  error_cleanup:
1263   tor_free(cons2_str); /* Sets it to NULL */
1264 
1265  done:
1266   if (cons2) {
1267     smartlist_free(cons2);
1268   }
1269 
1270   return cons2_str;
1271 }
1272 
1273 /** Any consensus line longer than this means that the input is invalid. */
1274 #define CONSENSUS_LINE_MAX_LEN (1<<20)
1275 
1276 /**
1277  * Helper: For every NL-terminated line in <b>s</b>, add a cdline referring to
1278  * that line (without trailing newline) to <b>out</b>.  Return -1 if there are
1279  * any non-NL terminated lines; 0 otherwise.
1280  *
1281  * Unlike tor_split_lines, this function avoids ambiguity on its
1282  * handling of a final line that isn't NL-terminated.
1283  *
1284  * All cdline_t objects are allocated in the provided memarea.  Strings
1285  * are not copied: if <b>s</b> changes or becomes invalid, then all
1286  * generated cdlines will become invalid.
1287  */
1288 STATIC int
consensus_split_lines(smartlist_t * out,const char * s,size_t len,memarea_t * area)1289 consensus_split_lines(smartlist_t *out,
1290                       const char *s, size_t len,
1291                       memarea_t *area)
1292 {
1293   const char *end_of_str = s + len;
1294 
1295   while (s < end_of_str) {
1296     const char *eol = memchr(s, '\n', end_of_str - s);
1297     if (!eol) {
1298       /* File doesn't end with newline. */
1299       return -1;
1300     }
1301     if (eol - s > CONSENSUS_LINE_MAX_LEN) {
1302       /* Line is far too long. */
1303       return -1;
1304     }
1305     cdline_t *line = memarea_alloc(area, sizeof(cdline_t));
1306     line->s = s;
1307     line->len = (uint32_t)(eol - s);
1308     smartlist_add(out, line);
1309     s = eol+1;
1310   }
1311   return 0;
1312 }
1313 
1314 /** Given a list of cdline_t, return a newly allocated string containing
1315  * all of the lines, terminated with NL, concatenated.
1316  *
1317  * Unlike smartlist_join_strings(), avoids lossy operations on empty
1318  * lists.  */
1319 static char *
consensus_join_lines(const smartlist_t * inp)1320 consensus_join_lines(const smartlist_t *inp)
1321 {
1322   size_t n = 0;
1323   SMARTLIST_FOREACH(inp, const cdline_t *, cdline, n += cdline->len + 1);
1324   n += 1;
1325   char *result = tor_malloc(n);
1326   char *out = result;
1327   SMARTLIST_FOREACH_BEGIN(inp, const cdline_t *, cdline) {
1328     memcpy(out, cdline->s, cdline->len);
1329     out += cdline->len;
1330     *out++ = '\n';
1331   } SMARTLIST_FOREACH_END(cdline);
1332   *out++ = '\0';
1333   tor_assert(out == result+n);
1334   return result;
1335 }
1336 
1337 /** Given two consensus documents, try to compute a diff between them.  On
1338  * success, return a newly allocated string containing that diff.  On failure,
1339  * return NULL. */
1340 char *
consensus_diff_generate(const char * cons1,size_t cons1len,const char * cons2,size_t cons2len)1341 consensus_diff_generate(const char *cons1, size_t cons1len,
1342                         const char *cons2, size_t cons2len)
1343 {
1344   consensus_digest_t d1, d2;
1345   smartlist_t *lines1 = NULL, *lines2 = NULL, *result_lines = NULL;
1346   int r1, r2;
1347   char *result = NULL;
1348 
1349   r1 = consensus_compute_digest_as_signed(cons1, cons1len, &d1);
1350   r2 = consensus_compute_digest(cons2, cons2len, &d2);
1351   if (BUG(r1 < 0 || r2 < 0))
1352     return NULL; // LCOV_EXCL_LINE
1353 
1354   memarea_t *area = memarea_new();
1355   lines1 = smartlist_new();
1356   lines2 = smartlist_new();
1357   if (consensus_split_lines(lines1, cons1, cons1len, area) < 0)
1358     goto done;
1359   if (consensus_split_lines(lines2, cons2, cons2len, area) < 0)
1360     goto done;
1361 
1362   result_lines = consdiff_gen_diff(lines1, lines2, &d1, &d2, area);
1363 
1364  done:
1365   if (result_lines) {
1366     result = consensus_join_lines(result_lines);
1367     smartlist_free(result_lines);
1368   }
1369 
1370   memarea_drop_all(area);
1371   smartlist_free(lines1);
1372   smartlist_free(lines2);
1373 
1374   return result;
1375 }
1376 
1377 /** Given a consensus document and a diff, try to apply the diff to the
1378  * consensus.  On success return a newly allocated string containing the new
1379  * consensus.  On failure, return NULL. */
1380 char *
consensus_diff_apply(const char * consensus,size_t consensus_len,const char * diff,size_t diff_len)1381 consensus_diff_apply(const char *consensus,
1382                      size_t consensus_len,
1383                      const char *diff,
1384                      size_t diff_len)
1385 {
1386   consensus_digest_t d1;
1387   smartlist_t *lines1 = NULL, *lines2 = NULL;
1388   int r1;
1389   char *result = NULL;
1390   memarea_t *area = memarea_new();
1391 
1392   r1 = consensus_compute_digest_as_signed(consensus, consensus_len, &d1);
1393   if (BUG(r1 < 0))
1394     goto done;
1395 
1396   lines1 = smartlist_new();
1397   lines2 = smartlist_new();
1398   if (consensus_split_lines(lines1, consensus, consensus_len, area) < 0)
1399     goto done;
1400   if (consensus_split_lines(lines2, diff, diff_len, area) < 0)
1401     goto done;
1402 
1403   result = consdiff_apply_diff(lines1, lines2, &d1);
1404 
1405  done:
1406   smartlist_free(lines1);
1407   smartlist_free(lines2);
1408   memarea_drop_all(area);
1409 
1410   return result;
1411 }
1412 
1413 /** Return true iff, based on its header, <b>document</b> is likely
1414  * to be a consensus diff. */
1415 int
looks_like_a_consensus_diff(const char * document,size_t len)1416 looks_like_a_consensus_diff(const char *document, size_t len)
1417 {
1418   return (len >= strlen(ns_diff_version) &&
1419           fast_memeq(document, ns_diff_version, strlen(ns_diff_version)));
1420 }
1421