1 #include "tree.h"
2
3 #ifndef WIN32
4 #include <sys/mman.h>
5 #else
6 #include "windows_mman.h"
7 #endif
8
9 #include <sys/socket.h>
10 #include <sys/stat.h>
11 #include <sys/types.h>
12
13 #include <errno.h>
14 #include <fcntl.h>
15 #include <inttypes.h>
16 #include <netdb.h>
17 #include <stdbool.h>
18 #include <stdio.h>
19
20 #ifdef __GNUC__
21 #define UNUSED(x) UNUSED_##x __attribute__((__unused__))
22 #else
23 #define UNUSED(x) UNUSED_##x
24 #endif
25
26 /* This is also defined in MaxMind::DB::Common but we don't want to have to
27 * fetch it every time we need it. */
28 #define DATA_SECTION_SEPARATOR_SIZE (16)
29
30 #define SHA1_KEY_LENGTH (27)
31
32 #define MERGE_KEY_SIZE (57)
33
34 typedef struct freeze_args_s {
35 FILE *file;
36 char *filename;
37 HV *data_hash;
38 } freeze_args_s;
39
40 typedef struct thawed_network_s {
41 MMDBW_network_s *network;
42 MMDBW_record_s *record;
43 } thawed_network_s;
44
45 typedef struct encode_args_s {
46 PerlIO *output_io;
47 SV *root_data_type;
48 SV *serializer;
49 HV *data_pointer_cache;
50 } encode_args_s;
51
52 struct network {
53 const char *const ipstr;
54 const uint8_t prefix_length;
55 };
56
57 static void verify_ip(MMDBW_tree_s *tree, const char *ipstr);
58 static int128_t ip_string_to_integer(const char *ipstr, int family);
59 static int128_t ip_bytes_to_integer(uint8_t *bytes, int family);
60 static void
61 integer_to_ip_bytes(int tree_ip_version, uint128_t ip, uint8_t *bytes);
62 static void integer_to_ip_string(int tree_ip_version,
63 uint128_t ip,
64 char *dst,
65 int dst_length);
66 static int prefix_length_for_largest_subnet(uint128_t start_ip,
67 uint128_t end_ip,
68 int family,
69 uint128_t *reverse_mask);
70 static const char *
71 store_data_in_tree(MMDBW_tree_s *tree, const char *const key, SV *data_sv);
72 static const char *increment_data_reference_count(MMDBW_tree_s *tree,
73 const char *const key);
74 static void
75 set_stored_data_in_tree(MMDBW_tree_s *tree, const char *const key, SV *data_sv);
76 static void decrement_data_reference_count(MMDBW_tree_s *tree,
77 const char *const key);
78 static MMDBW_network_s resolve_network(MMDBW_tree_s *tree,
79 const char *const ipstr,
80 uint8_t prefix_length);
81 static MMDBW_status
82 resolve_ip(int tree_ip_version, const char *const ipstr, uint8_t *bytes);
83 static void free_network(MMDBW_network_s *network);
84 static void alias_ipv4_networks(MMDBW_tree_s *tree);
85 static MMDBW_status insert_reserved_networks_as_fixed_empty(MMDBW_tree_s *tree);
86 static MMDBW_status
87 insert_networks_as_fixed_empty(MMDBW_tree_s *tree,
88 struct network const *const networks,
89 const size_t num_networks);
90 static MMDBW_status
91 insert_record_for_network(MMDBW_tree_s *tree,
92 MMDBW_network_s *network,
93 MMDBW_record_s *new_record,
94 MMDBW_merge_strategy merge_strategy,
95 bool is_internal_insert);
96 static MMDBW_status
97 insert_record_into_next_node(MMDBW_tree_s *tree,
98 MMDBW_record_s *current_record,
99 MMDBW_network_s *network,
100 int current_bit,
101 MMDBW_record_s *new_record,
102 MMDBW_merge_strategy merge_strategy,
103 bool is_internal_insert);
104 static MMDBW_status
105 insert_record_into_current_record(MMDBW_tree_s *tree,
106 MMDBW_record_s *current_record,
107 MMDBW_network_s *network,
108 MMDBW_record_s *new_record,
109 MMDBW_merge_strategy merge_strategy);
110 static const char *maybe_merge_records(MMDBW_tree_s *tree,
111 MMDBW_network_s *network,
112 MMDBW_record_s *new_record,
113 MMDBW_record_s *record_to_set,
114 MMDBW_merge_strategy merge_strategy);
115 static int network_bit_value(MMDBW_network_s *network, uint8_t current_bit);
116 static int tree_depth0(MMDBW_tree_s *tree);
117 static SV *merge_hashes(MMDBW_tree_s *tree,
118 SV *from,
119 SV *into,
120 MMDBW_merge_strategy merge_strategy);
121 static void merge_new_from_hash_into_hash(MMDBW_tree_s *tree,
122 HV *from,
123 HV *to,
124 MMDBW_merge_strategy merge_strategy);
125 static SV *merge_values(MMDBW_tree_s *tree,
126 SV *from,
127 SV *into,
128 MMDBW_merge_strategy merge_strategy);
129 static SV *merge_arrays(MMDBW_tree_s *tree,
130 SV *from,
131 SV *into,
132 MMDBW_merge_strategy merge_strategy);
133 static MMDBW_status find_record_for_network(MMDBW_tree_s *tree,
134 MMDBW_network_s *network,
135 MMDBW_record_s **record);
136 static MMDBW_node_s *new_node_from_record(MMDBW_tree_s *tree,
137 MMDBW_record_s *record);
138 static MMDBW_status free_node_and_subnodes(MMDBW_tree_s *tree,
139 MMDBW_node_s *node,
140 bool remove_alias_and_fixed_nodes);
141 static MMDBW_status free_record_value(MMDBW_tree_s *tree,
142 MMDBW_record_s *record,
143 bool remove_alias_and_fixed_nodes);
144 static void assign_node_number(MMDBW_tree_s *tree,
145 MMDBW_node_s *node,
146 uint128_t UNUSED(network),
147 uint8_t UNUSED(depth),
148 void *UNUSED(args));
149 static void freeze_search_tree(MMDBW_tree_s *tree, freeze_args_s *args);
150 static void freeze_node(MMDBW_tree_s *tree,
151 MMDBW_node_s *node,
152 uint128_t network,
153 uint8_t depth,
154 void *void_args);
155 static void freeze_data_record(MMDBW_tree_s *UNUSED(tree),
156 uint128_t network,
157 uint8_t depth,
158 const char *key,
159 freeze_args_s *args);
160 static void freeze_to_file(freeze_args_s *args, void *data, size_t size);
161 static void freeze_data_to_file(freeze_args_s *args, MMDBW_tree_s *tree);
162 static SV *freeze_hash(HV *hash);
163 static uint8_t thaw_uint8(uint8_t **buffer);
164 static thawed_network_s *thaw_network(MMDBW_tree_s *tree, uint8_t **buffer);
165 static uint8_t *thaw_bytes(uint8_t **buffer, size_t size);
166 static uint128_t thaw_uint128(uint8_t **buffer);
167 static STRLEN thaw_strlen(uint8_t **buffer);
168 static const char *thaw_data_key(uint8_t **buffer);
169 static HV *thaw_data_hash(SV *data_to_decode);
170 static void encode_node(MMDBW_tree_s *tree,
171 MMDBW_node_s *node,
172 uint128_t UNUSED(network),
173 uint8_t UNUSED(depth),
174 void *void_args);
175 static void
176 check_record_sanity(MMDBW_node_s *node, MMDBW_record_s *record, char *side);
177 static uint32_t record_value_as_number(MMDBW_tree_s *tree,
178 MMDBW_record_s *record,
179 encode_args_s *args);
180 static void iterate_tree(MMDBW_tree_s *tree,
181 MMDBW_record_s *record,
182 uint128_t network,
183 const uint8_t depth,
184 bool depth_first,
185 void *args,
186 MMDBW_iterator_callback callback);
187 static SV *key_for_data(SV *data);
188 static const char *merge_cache_lookup(MMDBW_tree_s *tree,
189 char *merge_cache_key);
190 static void store_in_merge_cache(MMDBW_tree_s *tree,
191 char *merge_cache_key,
192 const char *const new_key);
193 static void *checked_malloc(size_t size);
194 static void
195 checked_fwrite(FILE *file, char *filename, void *buffer, size_t count);
196 static void check_perlio_result(SSize_t result, SSize_t expected, char *op);
197 static char *status_error_message(MMDBW_status status);
198 static const char *record_type_name(MMDBW_record_type type);
199
200 // Create a new tree.
201 //
202 // For a description of `alias_ipv6' and `remove_reserved_networks', refer to
203 // the MaxMind::DB::Writer::Tree documentation about these options.
new_tree(const uint8_t ip_version,uint8_t record_size,MMDBW_merge_strategy merge_strategy,const bool alias_ipv6,const bool remove_reserved_networks)204 MMDBW_tree_s *new_tree(const uint8_t ip_version,
205 uint8_t record_size,
206 MMDBW_merge_strategy merge_strategy,
207 const bool alias_ipv6,
208 const bool remove_reserved_networks) {
209 if (merge_strategy == MMDBW_MERGE_STRATEGY_UNKNOWN) {
210 croak("Unknown merge_strategy encountered");
211 }
212 if (ip_version != 4 && ip_version != 6) {
213 croak("Unexpected IP version of %u", ip_version);
214 }
215 if (record_size != 24 && record_size != 28 && record_size != 32) {
216 croak("Only record sizes of 24, 28, and 32 are supported. Received %u.",
217 record_size);
218 }
219
220 MMDBW_tree_s *tree = checked_malloc(sizeof(MMDBW_tree_s));
221 tree->ip_version = ip_version;
222
223 tree->record_size = record_size;
224 tree->merge_strategy = merge_strategy;
225 tree->merge_cache = NULL;
226 tree->data_table = NULL;
227 tree->root_record = (MMDBW_record_s){
228 .type = MMDBW_RECORD_TYPE_EMPTY,
229 };
230 tree->node_count = 0;
231
232 if (alias_ipv6) {
233 alias_ipv4_networks(tree);
234 }
235
236 if (remove_reserved_networks) {
237 MMDBW_status const status =
238 insert_reserved_networks_as_fixed_empty(tree);
239 if (status != MMDBW_SUCCESS) {
240 free_tree(tree);
241 croak("Error inserting reserved networks: %s",
242 status_error_message(status));
243 }
244 }
245
246 return tree;
247 }
248
insert_network(MMDBW_tree_s * tree,const char * ipstr,const uint8_t prefix_length,SV * key_sv,SV * data,MMDBW_merge_strategy merge_strategy)249 void insert_network(MMDBW_tree_s *tree,
250 const char *ipstr,
251 const uint8_t prefix_length,
252 SV *key_sv,
253 SV *data,
254 MMDBW_merge_strategy merge_strategy) {
255 verify_ip(tree, ipstr);
256
257 MMDBW_network_s network = resolve_network(tree, ipstr, prefix_length);
258
259 const char *const key =
260 store_data_in_tree(tree, SvPVbyte_nolen(key_sv), data);
261 MMDBW_record_s new_record = {.type = MMDBW_RECORD_TYPE_DATA,
262 .value = {.key = key}};
263
264 MMDBW_status status = insert_record_for_network(
265 tree, &network, &new_record, merge_strategy, false);
266
267 // The data's ref count gets incremented by the insert each time it is
268 // inserted. As such, we need to decrement it here.
269 decrement_data_reference_count(tree, key);
270 free_network(&network);
271
272 if (MMDBW_SUCCESS != status) {
273 croak("%s (when inserting %s/%" PRIu8 ")",
274 status_error_message(status),
275 ipstr,
276 prefix_length);
277 }
278 }
279
verify_ip(MMDBW_tree_s * tree,const char * ipstr)280 static void verify_ip(MMDBW_tree_s *tree, const char *ipstr) {
281 if (tree->ip_version == 4 && strchr(ipstr, ':')) {
282 croak("You cannot insert an IPv6 address (%s) into an IPv4 tree.",
283 ipstr);
284 }
285 }
286
insert_range(MMDBW_tree_s * tree,const char * start_ipstr,const char * end_ipstr,SV * key_sv,SV * data_sv,MMDBW_merge_strategy merge_strategy)287 void insert_range(MMDBW_tree_s *tree,
288 const char *start_ipstr,
289 const char *end_ipstr,
290 SV *key_sv,
291 SV *data_sv,
292 MMDBW_merge_strategy merge_strategy) {
293 verify_ip(tree, start_ipstr);
294 verify_ip(tree, end_ipstr);
295
296 uint128_t start_ip = ip_string_to_integer(start_ipstr, tree->ip_version);
297 uint128_t end_ip = ip_string_to_integer(end_ipstr, tree->ip_version);
298
299 if (end_ip < start_ip) {
300 croak("First IP (%s) in range comes before last IP (%s)",
301 start_ipstr,
302 end_ipstr);
303 }
304
305 const char *const key =
306 store_data_in_tree(tree, SvPVbyte_nolen(key_sv), data_sv);
307
308 uint8_t bytes[tree->ip_version == 6 ? 16 : 4];
309
310 MMDBW_status status = MMDBW_SUCCESS;
311
312 // Eventually we could change the code to walk the tree and break up the
313 // range at the same time, saving some unnecessary computation. However,
314 // that would require more significant refactoring of the insertion and
315 // merging code.
316 while (start_ip <= end_ip) {
317 uint128_t reverse_mask;
318 int prefix_length = prefix_length_for_largest_subnet(
319 start_ip, end_ip, tree->ip_version, &reverse_mask);
320
321 integer_to_ip_bytes(tree->ip_version, start_ip, bytes);
322
323 MMDBW_network_s network = {
324 .bytes = bytes,
325 .prefix_length = prefix_length,
326 };
327
328 MMDBW_record_s new_record = {.type = MMDBW_RECORD_TYPE_DATA,
329 .value = {.key = key}};
330
331 status = insert_record_for_network(
332 tree, &network, &new_record, merge_strategy, false);
333
334 if (MMDBW_SUCCESS != status) {
335 break;
336 }
337
338 start_ip = (start_ip | reverse_mask) + 1;
339
340 // The +1 caused an overflow and we are done.
341 if (start_ip == 0) {
342 break;
343 }
344 }
345 // store_data_in_tree starts at a reference count of 1, so we need to
346 // decrement in order to account for that.
347 decrement_data_reference_count(tree, key);
348
349 if (MMDBW_SUCCESS != status) {
350 croak("%s (when inserting %s - %s)",
351 status_error_message(status),
352 start_ipstr,
353 end_ipstr);
354 }
355 }
356
ip_string_to_integer(const char * ipstr,int family)357 static int128_t ip_string_to_integer(const char *ipstr, int family) {
358 uint8_t bytes[family == 6 ? 16 : 4];
359 if (resolve_ip(family, ipstr, bytes) != MMDBW_SUCCESS) {
360 croak("Invalid IP address: %s", ipstr);
361 }
362 return ip_bytes_to_integer(bytes, family);
363 }
364
ip_bytes_to_integer(uint8_t * bytes,int family)365 static int128_t ip_bytes_to_integer(uint8_t *bytes, int family) {
366 int length = family == 6 ? 16 : 4;
367
368 int128_t ipint = 0;
369 for (int i = 0; i < length; i++) {
370 ipint = (ipint << 8) | bytes[i];
371 }
372 return ipint;
373 }
374
375 static void
integer_to_ip_bytes(int tree_ip_version,uint128_t ip,uint8_t * bytes)376 integer_to_ip_bytes(int tree_ip_version, uint128_t ip, uint8_t *bytes) {
377 int bytes_length = tree_ip_version == 6 ? 16 : 4;
378 for (int i = 1; i <= bytes_length; i++) {
379 bytes[bytes_length - i] = 0xFF & ip;
380 ip >>= 8;
381 }
382 }
383
integer_to_ip_string(int tree_ip_version,uint128_t ip,char * dst,int dst_length)384 static void integer_to_ip_string(int tree_ip_version,
385 uint128_t ip,
386 char *dst,
387 int dst_length) {
388 uint8_t bytes[tree_ip_version == 6 ? 16 : 4];
389 integer_to_ip_bytes(tree_ip_version, ip, bytes);
390
391 if (NULL == inet_ntop(tree_ip_version == 6 ? AF_INET6 : AF_INET,
392 bytes,
393 dst,
394 dst_length)) {
395 croak("Error converting IP integer to string");
396 }
397 }
398
prefix_length_for_largest_subnet(uint128_t start_ip,uint128_t end_ip,int family,uint128_t * reverse_mask)399 static int prefix_length_for_largest_subnet(uint128_t start_ip,
400 uint128_t end_ip,
401 int family,
402 uint128_t *reverse_mask) {
403
404 if (start_ip > end_ip) {
405 croak("Start IP of the range must be less than or equal to end IP");
406 }
407
408 int prefix_length = family == 6 ? 128 : 32;
409 *reverse_mask = 1;
410
411 while (
412 // First IP of the subnet must be the start IP
413 (start_ip & ~*reverse_mask) == start_ip
414 // the last IP of the subnet must be <= the end IP
415 && (start_ip | *reverse_mask) <= end_ip
416 // stop if we have all IPs (shouldn't be required, but safety measure)
417 && prefix_length > 0) {
418 prefix_length--;
419 *reverse_mask = (*reverse_mask << 1) | 1;
420 }
421
422 // We overshoot by one shift
423 *reverse_mask >>= 1;
424
425 return prefix_length;
426 }
427
remove_network(MMDBW_tree_s * tree,const char * ipstr,const uint8_t prefix_length)428 void remove_network(MMDBW_tree_s *tree,
429 const char *ipstr,
430 const uint8_t prefix_length) {
431 verify_ip(tree, ipstr);
432
433 MMDBW_network_s network = resolve_network(tree, ipstr, prefix_length);
434
435 MMDBW_record_s new_record = {.type = MMDBW_RECORD_TYPE_EMPTY};
436
437 MMDBW_status status = insert_record_for_network(
438 tree, &network, &new_record, MMDBW_MERGE_STRATEGY_NONE, false);
439
440 free_network(&network);
441 if (status != MMDBW_SUCCESS) {
442 croak(status_error_message(status));
443 }
444 }
445
446 static const char *
store_data_in_tree(MMDBW_tree_s * tree,const char * const key,SV * data_sv)447 store_data_in_tree(MMDBW_tree_s *tree, const char *const key, SV *data_sv) {
448 const char *const new_key = increment_data_reference_count(tree, key);
449 set_stored_data_in_tree(tree, key, data_sv);
450
451 return new_key;
452 }
453
increment_data_reference_count(MMDBW_tree_s * tree,const char * const key)454 static const char *increment_data_reference_count(MMDBW_tree_s *tree,
455 const char *const key) {
456 MMDBW_data_hash_s *data = NULL;
457 HASH_FIND(hh, tree->data_table, key, SHA1_KEY_LENGTH, data);
458
459 /* We allow this possibility as we need to create the record separately
460 from updating the data when thawing */
461 if (NULL == data) {
462 data = checked_malloc(sizeof(MMDBW_data_hash_s));
463 data->reference_count = 0;
464
465 data->data_sv = NULL;
466
467 data->key = checked_malloc(SHA1_KEY_LENGTH + 1);
468 strcpy((char *)data->key, key);
469
470 HASH_ADD_KEYPTR(hh, tree->data_table, data->key, SHA1_KEY_LENGTH, data);
471 }
472 data->reference_count++;
473
474 return data->key;
475 }
476
set_stored_data_in_tree(MMDBW_tree_s * tree,const char * const key,SV * data_sv)477 static void set_stored_data_in_tree(MMDBW_tree_s *tree,
478 const char *const key,
479 SV *data_sv) {
480 MMDBW_data_hash_s *data = NULL;
481 HASH_FIND(hh, tree->data_table, key, SHA1_KEY_LENGTH, data);
482
483 if (NULL == data) {
484 croak("Attempt to set unknown data record in tree");
485 }
486
487 if (NULL != data->data_sv) {
488 return;
489 }
490
491 SvREFCNT_inc_simple_void_NN(data_sv);
492 data->data_sv = data_sv;
493 }
494
decrement_data_reference_count(MMDBW_tree_s * tree,const char * const key)495 static void decrement_data_reference_count(MMDBW_tree_s *tree,
496 const char *const key) {
497 MMDBW_data_hash_s *data = NULL;
498 HASH_FIND(hh, tree->data_table, key, SHA1_KEY_LENGTH, data);
499
500 if (NULL == data) {
501 croak("Attempt to remove data that does not exist from tree");
502 }
503
504 data->reference_count--;
505 if (0 == data->reference_count) {
506 HASH_DEL(tree->data_table, data);
507 SvREFCNT_dec(data->data_sv);
508 free((char *)data->key);
509 free(data);
510 }
511 }
512
resolve_network(MMDBW_tree_s * tree,const char * const ipstr,uint8_t prefix_length)513 static MMDBW_network_s resolve_network(MMDBW_tree_s *tree,
514 const char *const ipstr,
515 uint8_t prefix_length) {
516 uint8_t *bytes = checked_malloc(tree->ip_version == 6 ? 16 : 4);
517
518 if (resolve_ip(tree->ip_version, ipstr, bytes) != MMDBW_SUCCESS) {
519 free(bytes);
520 croak("Invalid IP address: %s", ipstr);
521 }
522
523 if (NULL == strchr(ipstr, ':')) {
524 // IPv4
525 if (prefix_length > 32) {
526 free(bytes);
527 croak("Prefix length greater than 32 on an IPv4 network (%s/%d)",
528 ipstr,
529 prefix_length);
530 }
531 if (tree->ip_version == 6) {
532 // Inserting IPv4 network into an IPv6 tree
533 prefix_length += 96;
534 }
535 } else if (prefix_length > 128) {
536 free(bytes);
537 croak("Prefix length greater than 128 on an IPv6 network (%s/%d)",
538 ipstr,
539 prefix_length);
540 }
541
542 MMDBW_network_s network = {
543 .bytes = bytes,
544 .prefix_length = prefix_length,
545 };
546
547 return network;
548 }
549
550 static MMDBW_status
resolve_ip(int tree_ip_version,const char * const ipstr,uint8_t * bytes)551 resolve_ip(int tree_ip_version, const char *const ipstr, uint8_t *bytes) {
552 bool is_ipv4_address = NULL == strchr(ipstr, ':');
553 int family = is_ipv4_address ? AF_INET : AF_INET6;
554 if (tree_ip_version == 6 && is_ipv4_address) {
555 // We are inserting/looking up an IPv4 address in an IPv6 tree.
556 // The canonical location for this in our database is ::a.b.c.d.
557 // To get this address, we zero out the first 12 bytes of bytes
558 // and then put the IPv4 address in the remaining 4. The reason to
559 // not use getaddrinfo with AI_V4MAPPED is that it gives us
560 // ::FFFF:a.b.c.d and AI_V4MAPPED doesn't work on all platforms.
561 // See GitHub #7 and #51.
562 memset(bytes, 0, 12);
563 bytes += 12;
564 }
565 if (!inet_pton(family, ipstr, bytes)) {
566 return MMDBW_RESOLVING_IP_ERROR;
567 }
568 return MMDBW_SUCCESS;
569 }
570
free_network(MMDBW_network_s * network)571 static void free_network(MMDBW_network_s *network) {
572 free((char *)network->bytes);
573 }
574
575 static struct network ipv4_aliases[] = {
576 {
577 .ipstr = "::ffff:0:0",
578 .prefix_length = 96,
579 },
580 {.ipstr = "2001::", .prefix_length = 32},
581 {.ipstr = "2002::", .prefix_length = 16}};
582
alias_ipv4_networks(MMDBW_tree_s * tree)583 static void alias_ipv4_networks(MMDBW_tree_s *tree) {
584 if (tree->ip_version == 4) {
585 return;
586 }
587
588 MMDBW_network_s ipv4_root_network = resolve_network(tree, "::0.0.0.0", 96);
589 MMDBW_node_s *ipv4_root_node = new_node();
590 MMDBW_record_s ipv4_root_record = {
591 .type = MMDBW_RECORD_TYPE_FIXED_NODE,
592 .value.node = ipv4_root_node,
593 };
594
595 MMDBW_status status = insert_record_for_network(tree,
596 &ipv4_root_network,
597 &ipv4_root_record,
598 MMDBW_MERGE_STRATEGY_NONE,
599 true);
600
601 free_network(&ipv4_root_network);
602
603 if (status != MMDBW_SUCCESS) {
604 croak("Unable to create IPv4 root node when setting up aliases: %s",
605 status_error_message(status));
606 }
607
608 for (size_t i = 0; i < sizeof(ipv4_aliases) / sizeof(struct network); i++) {
609 MMDBW_network_s alias_network = resolve_network(
610 tree, ipv4_aliases[i].ipstr, ipv4_aliases[i].prefix_length);
611
612 MMDBW_record_s record_for_alias = {
613 .type = MMDBW_RECORD_TYPE_ALIAS,
614 .value.node = ipv4_root_node,
615 };
616
617 MMDBW_status status =
618 insert_record_for_network(tree,
619 &alias_network,
620 &record_for_alias,
621 MMDBW_MERGE_STRATEGY_NONE,
622 true);
623
624 free_network(&alias_network);
625
626 if (MMDBW_SUCCESS != status) {
627 croak("Unexpected error when searching for last node for alias: %s",
628 status_error_message(status));
629 }
630 }
631 }
632
633 // https://www.iana.org/assignments/iana-ipv4-special-registry/iana-ipv4-special-registry.xhtml
634 static struct network reserved_networks_ipv4[] = {
635 {.ipstr = "0.0.0.0", .prefix_length = 8},
636 {.ipstr = "10.0.0.0", .prefix_length = 8},
637 {.ipstr = "100.64.0.0", .prefix_length = 10},
638 {.ipstr = "127.0.0.0", .prefix_length = 8},
639 {.ipstr = "169.254.0.0", .prefix_length = 16},
640 {.ipstr = "172.16.0.0", .prefix_length = 12},
641 // This is an odd case. 192.0.0.0/24 is reserved, but there is a note that
642 // says "Not useable unless by virtue of a more specific reservation". As
643 // such, since 192.0.0.0/29 was more recently reserved, it's possible the
644 // intention is that the rest is not reserved any longer. I'm not too clear
645 // on this, but I believe that is the rationale, so I choose to leave it.
646 {.ipstr = "192.0.0.0", .prefix_length = 29},
647 // TODO(wstorey@maxmind.com): 192.168.0.8/32
648 // TODO(wstorey@maxmind.com): 192.168.0.9/32
649 // TODO(wstorey@maxmind.com): 192.168.0.10/32
650 // TODO(wstorey@maxmind.com): 192.168.0.170/32
651 // TODO(wstorey@maxmind.com): 192.168.0.171/32
652 {.ipstr = "192.0.2.0", .prefix_length = 24},
653 // 192.31.196.0/24 is routable I believe
654 // TODO(wstorey@maxmnd.com): 192.52.193.0/24
655 // TODO(wstorey@maxmind.com): Looks like 192.88.99.0/24 may no longer be
656 // reserved?
657 {.ipstr = "192.88.99.0", .prefix_length = 24},
658 {.ipstr = "192.168.0.0", .prefix_length = 16},
659 // 192.175.48.0/24 is routable I believe
660 {.ipstr = "198.18.0.0", .prefix_length = 15},
661 {.ipstr = "198.51.100.0", .prefix_length = 24},
662 {.ipstr = "203.0.113.0", .prefix_length = 24},
663 // The above IANA page doesn't list 224.0.0.0/4, but at least some parts
664 // are listed in https://tools.ietf.org/html/rfc5771
665 {.ipstr = "224.0.0.0", .prefix_length = 4},
666 {.ipstr = "240.0.0.0", .prefix_length = 4},
667 // 255.255.255.255/32 gets brought in by 240.0.0.0/4.
668 };
669
670 // https://www.iana.org/assignments/iana-ipv6-special-registry/iana-ipv6-special-registry.xhtml
671 static struct network reserved_networks_ipv6[] = {
672 // ::/128 and ::1/128 are reserved under IPv6 but these are already
673 // covered under 0.0.0.0/8.
674 //
675 // ::ffff:0:0/96 - IPv4 mapped addresses. We treat it specially with the
676 // `alias_ipv6_to_ipv4' option.
677 //
678 // 64:ff9b::/96 - well known prefix mapping, covered by alias_ipv6_to_ipv4
679 //
680 // TODO(wstorey@maxmind.com): 64:ff9b:1::/48 should be in
681 // alias_ipv6_to_ipv4?
682
683 {.ipstr = "100::", .prefix_length = 64},
684
685 // 2001::/23 is reserved. We include all of it here other than 2001::/32
686 // as it is Teredo which is globally routable.
687 {.ipstr = "2001:1::", .prefix_length = 32},
688 {.ipstr = "2001:2::", .prefix_length = 31},
689 {.ipstr = "2001:4::", .prefix_length = 30},
690 {.ipstr = "2001:8::", .prefix_length = 29},
691 {.ipstr = "2001:10::", .prefix_length = 28},
692 {.ipstr = "2001:20::", .prefix_length = 27},
693 {.ipstr = "2001:40::", .prefix_length = 26},
694 {.ipstr = "2001:80::", .prefix_length = 25},
695 {.ipstr = "2001:100::", .prefix_length = 24},
696
697 {.ipstr = "2001:db8::", .prefix_length = 32},
698 // 2002::/16 - 6to4, part of alias_ipv6_to_ipv4
699 // 2620:4f:8000::/48 is routable I believe
700 {.ipstr = "fc00::", .prefix_length = 7},
701 {.ipstr = "fe80::", .prefix_length = 10},
702 // Multicast
703 {.ipstr = "ff00::", .prefix_length = 8},
704 };
705
706 // Insert a FIXED_EMPTY record for all private and reserved networks.
707 //
708 // This is to avoid the case where we might otherwise accidentally add
709 // information about such networks.
710 static MMDBW_status
insert_reserved_networks_as_fixed_empty(MMDBW_tree_s * tree)711 insert_reserved_networks_as_fixed_empty(MMDBW_tree_s *tree) {
712 MMDBW_status const status = insert_networks_as_fixed_empty(
713 tree,
714 reserved_networks_ipv4,
715 sizeof(reserved_networks_ipv4) / sizeof(struct network));
716 if (status != MMDBW_SUCCESS) {
717 return status;
718 }
719
720 if (tree->ip_version == 6) {
721 MMDBW_status const status = insert_networks_as_fixed_empty(
722 tree,
723 reserved_networks_ipv6,
724 sizeof(reserved_networks_ipv6) / sizeof(struct network));
725 if (status != MMDBW_SUCCESS) {
726 return status;
727 }
728 }
729
730 return MMDBW_SUCCESS;
731 }
732
733 // Insert a FIXED_EMPTY record for each network.
734 static MMDBW_status
insert_networks_as_fixed_empty(MMDBW_tree_s * tree,struct network const * const networks,const size_t num_networks)735 insert_networks_as_fixed_empty(MMDBW_tree_s *tree,
736 struct network const *const networks,
737 const size_t num_networks) {
738 for (size_t i = 0; i < num_networks; i++) {
739 MMDBW_network_s resolved_network =
740 resolve_network(tree, networks[i].ipstr, networks[i].prefix_length);
741
742 MMDBW_record_s record = {
743 .type = MMDBW_RECORD_TYPE_FIXED_EMPTY,
744 };
745
746 MMDBW_status const status = insert_record_for_network(
747 tree, &resolved_network, &record, MMDBW_MERGE_STRATEGY_NONE, true);
748
749 free_network(&resolved_network);
750
751 if (status != MMDBW_SUCCESS) {
752 return status;
753 }
754 }
755
756 return MMDBW_SUCCESS;
757 }
758
759 static MMDBW_status
insert_record_for_network(MMDBW_tree_s * tree,MMDBW_network_s * network,MMDBW_record_s * new_record,MMDBW_merge_strategy merge_strategy,bool is_internal_insert)760 insert_record_for_network(MMDBW_tree_s *tree,
761 MMDBW_network_s *network,
762 MMDBW_record_s *new_record,
763 MMDBW_merge_strategy merge_strategy,
764 bool is_internal_insert) {
765 if (merge_strategy == MMDBW_MERGE_STRATEGY_UNKNOWN) {
766 merge_strategy = tree->merge_strategy;
767 }
768
769 return insert_record_into_next_node(tree,
770 &(tree->root_record),
771 network,
772 0,
773 new_record,
774 merge_strategy,
775 is_internal_insert);
776 }
777
778 static MMDBW_status
insert_record_into_next_node(MMDBW_tree_s * tree,MMDBW_record_s * current_record,MMDBW_network_s * network,int current_bit,MMDBW_record_s * new_record,MMDBW_merge_strategy merge_strategy,bool is_internal_insert)779 insert_record_into_next_node(MMDBW_tree_s *tree,
780 MMDBW_record_s *current_record,
781 MMDBW_network_s *network,
782 int current_bit,
783 MMDBW_record_s *new_record,
784 MMDBW_merge_strategy merge_strategy,
785 bool is_internal_insert) {
786 // We've reached the record where the network belongs. Depending on the
787 // type of record it is, we insert right here.
788 if (current_bit >= network->prefix_length &&
789 (current_record->type == MMDBW_RECORD_TYPE_EMPTY ||
790 current_record->type == MMDBW_RECORD_TYPE_DATA ||
791 (current_record->type == MMDBW_RECORD_TYPE_NODE &&
792 merge_strategy == MMDBW_MERGE_STRATEGY_NONE))) {
793 return insert_record_into_current_record(
794 tree, current_record, network, new_record, merge_strategy);
795 }
796
797 if (current_bit == network->prefix_length &&
798 current_record->type == MMDBW_RECORD_TYPE_FIXED_NODE &&
799 new_record->type == MMDBW_RECORD_TYPE_FIXED_NODE) {
800 // We could potentially make this work, but it's tricky. One of the
801 // purposes of fixed nodes is for alias nodes to point at them.
802 // Returning success here without doing anything is not what we want to
803 // do: We should be setting the node value to what is in the new record
804 // since it may be setting up alias nodes pointing at it. Changing the
805 // current node value is not an option either: Aliases may already be
806 // pointing to it.
807 //
808 // If we don't have this case, we'll continue to descend the tree,
809 // inserting the new record's fixed node in potentially multiple
810 // locations. If that happens, we end up freeing the node multiple
811 // times when destroying the tree.
812 //
813 // Consider the case where we previously inserted fixed nodes to a
814 // greater depth than we are right now. Because we'd continue to
815 // descend the tree, we'd insert this new fixed node record at
816 // potentially multiple spots in the tree, at each point we encounter
817 // an EMPTY, DATA, or NODE record (the conditions where we can enter
818 // insert_record_into_current_record()). The stopping condition would
819 // not be correct.
820 return MMDBW_FIXED_NODE_OVERWRITE_ATTEMPT_ERROR;
821 }
822
823 // Figure out the next node.
824 MMDBW_node_s *next_node = NULL;
825 switch (current_record->type) {
826 case MMDBW_RECORD_TYPE_EMPTY:
827 case MMDBW_RECORD_TYPE_DATA: {
828 // In this case we create a new node to point to. We make the new
829 // nodes left and right identical to us.
830 next_node = new_node_from_record(tree, current_record);
831 current_record->value.node = next_node;
832 current_record->type = MMDBW_RECORD_TYPE_NODE;
833 break;
834 }
835 case MMDBW_RECORD_TYPE_FIXED_EMPTY:
836 // We're a) trying to overwrite a fixed empty network, b) trying to
837 // insert a network containing a fixed empty network, or c) trying
838 // to insert inside a fixed empty network. (You can see these 3
839 // cases handled separately in the ALIAS case.) We ignore the insert
840 // with the assumption that it was not intended. e.g., you probably
841 // didn't mean to insert data about a reserved network. Doing so
842 // makes working with dirty data easier. Note this may change to be
843 // more strict and return an error in the future.
844 return MMDBW_SUCCESS;
845 break;
846 case MMDBW_RECORD_TYPE_ALIAS: {
847 // The insert is trying to overwrite an aliased network. We do not
848 // allow this.
849 if (current_bit == network->prefix_length) {
850 return MMDBW_ALIAS_OVERWRITE_ATTEMPT_ERROR;
851 }
852
853 // We don't follow aliases when inserting a network that contains
854 // an aliased network within it.
855 if (current_bit > network->prefix_length) {
856 // We return success to silently ignore when we try to insert
857 // here. If we raise an error, it makes working with dirty data
858 // more difficult. We assume such inserts were not intended and
859 // can be safely skipped.
860 return MMDBW_SUCCESS;
861 }
862 // current_bit < network->prefix_length. This means we contain the
863 // new network already as ALIAS. Inserting the network is not valid
864 // because of that.
865 return MMDBW_INSERT_INTO_ALIAS_NODE_ERROR;
866 break;
867 }
868 case MMDBW_RECORD_TYPE_FIXED_NODE:
869 case MMDBW_RECORD_TYPE_NODE: {
870 // We're a node already.
871 next_node = current_record->value.node;
872 break;
873 }
874 }
875
876 // If we are inserting an alias, a fixed node, or a fixed empty record, we
877 // make all of the nodes down to that record fixed nodes. This makes it
878 // easier to not accidentally delete or modify them.
879 if (new_record->type == MMDBW_RECORD_TYPE_ALIAS ||
880 new_record->type == MMDBW_RECORD_TYPE_FIXED_NODE ||
881 new_record->type == MMDBW_RECORD_TYPE_FIXED_EMPTY) {
882 current_record->type = MMDBW_RECORD_TYPE_FIXED_NODE;
883 }
884
885 bool next_is_right = network_bit_value(network, current_bit);
886
887 // if we are beyond the prefix length, both records are within the
888 // network we are inserting.
889 bool insert_into_both = current_bit >= network->prefix_length;
890
891 if (next_is_right || insert_into_both) {
892 MMDBW_status status =
893 insert_record_into_next_node(tree,
894 &(next_node->right_record),
895 network,
896 current_bit + 1,
897 new_record,
898 merge_strategy,
899 is_internal_insert);
900 if (status != MMDBW_SUCCESS) {
901 return status;
902 }
903 }
904
905 if (!next_is_right || insert_into_both) {
906 MMDBW_status status =
907 insert_record_into_next_node(tree,
908 &(next_node->left_record),
909 network,
910 current_bit + 1,
911 new_record,
912 merge_strategy,
913 is_internal_insert);
914 if (status != MMDBW_SUCCESS) {
915 return status;
916 }
917 }
918
919 // We inserted the new record into the right and/or left record of the next
920 // node. We now need to trim the tree upwards by merging identical records.
921 // Basically what we do here is take care of the case where the record
922 // we're at points at another node, and the records in that node are both
923 // the same. In that case, we delete the node we point at and take its
924 // value on ourselves.
925
926 if (next_node->left_record.type == next_node->right_record.type &&
927 // We don't allow merging into aliases or fixed nodes
928 current_record->type == MMDBW_RECORD_TYPE_NODE) {
929 switch (next_node->left_record.type) {
930 case MMDBW_RECORD_TYPE_EMPTY: {
931 MMDBW_status status =
932 free_node_and_subnodes(tree, next_node, false);
933 if (status != MMDBW_SUCCESS) {
934 return MMDBW_SUCCESS;
935 }
936 current_record->type = MMDBW_RECORD_TYPE_EMPTY;
937 break;
938 }
939 case MMDBW_RECORD_TYPE_DATA: {
940 // If the two keys are the same, the records can be merged.
941 // Otherwise, break.
942 if (strcmp(next_node->left_record.value.key,
943 next_node->right_record.value.key)) {
944 break;
945 }
946 const char *key = increment_data_reference_count(
947 tree, next_node->left_record.value.key);
948 MMDBW_status status =
949 free_node_and_subnodes(tree, next_node, false);
950 if (status != MMDBW_SUCCESS) {
951 return MMDBW_SUCCESS;
952 }
953 current_record->type = MMDBW_RECORD_TYPE_DATA;
954 current_record->value.key = key;
955 break;
956 }
957 case MMDBW_RECORD_TYPE_ALIAS:
958 case MMDBW_RECORD_TYPE_FIXED_NODE:
959 case MMDBW_RECORD_TYPE_FIXED_EMPTY:
960 case MMDBW_RECORD_TYPE_NODE: {
961 // Do nothing in these cases. We don't trim immutable nodes.
962 break;
963 }
964 }
965 }
966 return MMDBW_SUCCESS;
967 }
968
969 static MMDBW_status
insert_record_into_current_record(MMDBW_tree_s * tree,MMDBW_record_s * current_record,MMDBW_network_s * network,MMDBW_record_s * new_record,MMDBW_merge_strategy merge_strategy)970 insert_record_into_current_record(MMDBW_tree_s *tree,
971 MMDBW_record_s *current_record,
972 MMDBW_network_s *network,
973 MMDBW_record_s *new_record,
974 MMDBW_merge_strategy merge_strategy) {
975 // We only get called when we have a current_record with these record
976 // types. There was previously logic for other types, but that was
977 // confusing.
978 if (current_record->type != MMDBW_RECORD_TYPE_EMPTY &&
979 current_record->type != MMDBW_RECORD_TYPE_DATA &&
980 current_record->type != MMDBW_RECORD_TYPE_NODE) {
981 return MMDBW_INSERT_INVALID_RECORD_TYPE_ERROR;
982 }
983
984 if (current_record->type == MMDBW_RECORD_TYPE_EMPTY &&
985 merge_strategy == MMDBW_MERGE_STRATEGY_ADD_ONLY_IF_PARENT_EXISTS) {
986 // We do not create a new record when using "only if parent exists"
987 return MMDBW_SUCCESS;
988 }
989
990 const char *merged_key = maybe_merge_records(
991 tree, network, new_record, current_record, merge_strategy);
992
993 MMDBW_status status = free_record_value(tree, current_record, false);
994 if (status != MMDBW_SUCCESS) {
995 return status;
996 }
997
998 // Update the record to match the new one. Replace what's there.
999 current_record->type = new_record->type;
1000 if (new_record->type == MMDBW_RECORD_TYPE_DATA) {
1001 const char *const key = increment_data_reference_count(
1002 tree, merged_key == NULL ? new_record->value.key : merged_key);
1003 current_record->value.key = key;
1004 } else if (new_record->type == MMDBW_RECORD_TYPE_FIXED_NODE ||
1005 new_record->type == MMDBW_RECORD_TYPE_NODE ||
1006 new_record->type == MMDBW_RECORD_TYPE_ALIAS) {
1007 current_record->value.node = new_record->value.node;
1008 } else if (new_record->type == MMDBW_RECORD_TYPE_EMPTY ||
1009 new_record->type == MMDBW_RECORD_TYPE_FIXED_EMPTY) {
1010 current_record->value.key = NULL;
1011 current_record->value.node = NULL;
1012 } else {
1013 return MMDBW_INSERT_INVALID_RECORD_TYPE_ERROR;
1014 }
1015
1016 if (merged_key) {
1017 decrement_data_reference_count(tree, merged_key);
1018 }
1019
1020 return status;
1021 }
1022
maybe_merge_records(MMDBW_tree_s * tree,MMDBW_network_s * network,MMDBW_record_s * new_record,MMDBW_record_s * record_to_set,MMDBW_merge_strategy merge_strategy)1023 static const char *maybe_merge_records(MMDBW_tree_s *tree,
1024 MMDBW_network_s *network,
1025 MMDBW_record_s *new_record,
1026 MMDBW_record_s *record_to_set,
1027 MMDBW_merge_strategy merge_strategy) {
1028 if (MMDBW_RECORD_TYPE_DATA != new_record->type ||
1029 merge_strategy == MMDBW_MERGE_STRATEGY_NONE) {
1030 return NULL;
1031 }
1032
1033 /* This must come before the node pruning code in
1034 insert_record_for_network, as we only want to prune nodes where the
1035 merged record matches. */
1036 if (MMDBW_RECORD_TYPE_DATA != record_to_set->type
1037 // If the two keys are equal, there is no point in trying to merge
1038 // the contents.
1039 || strcmp(new_record->value.key, record_to_set->value.key) == 0) {
1040 return NULL;
1041 }
1042
1043 char merge_cache_key[MERGE_KEY_SIZE + 1];
1044 snprintf(merge_cache_key,
1045 MERGE_KEY_SIZE + 1,
1046 "%d-%s-%s",
1047 merge_strategy,
1048 new_record->value.key,
1049 record_to_set->value.key);
1050
1051 const char *cached_key = merge_cache_lookup(tree, merge_cache_key);
1052 if (cached_key != NULL) {
1053 const char *const new_key =
1054 increment_data_reference_count(tree, cached_key);
1055 return new_key;
1056 }
1057
1058 SV *merged = merge_hashes_for_keys(tree,
1059 new_record->value.key,
1060 record_to_set->value.key,
1061 network,
1062 merge_strategy);
1063
1064 SV *key_sv = key_for_data(merged);
1065 const char *const new_key =
1066 store_data_in_tree(tree, SvPVbyte_nolen(key_sv), merged);
1067 SvREFCNT_dec(key_sv);
1068
1069 /* The ref count was incremented in store_data_in_tree */
1070 SvREFCNT_dec(merged);
1071
1072 store_in_merge_cache(tree, merge_cache_key, new_key);
1073
1074 return new_key;
1075 }
1076
network_bit_value(MMDBW_network_s * network,uint8_t current_bit)1077 static int network_bit_value(MMDBW_network_s *network, uint8_t current_bit) {
1078 return network->bytes[current_bit >> 3] & (1U << (~current_bit & 7));
1079 }
1080
tree_depth0(MMDBW_tree_s * tree)1081 static int tree_depth0(MMDBW_tree_s *tree) {
1082 return tree->ip_version == 6 ? 127 : 31;
1083 }
1084
merge_hashes_for_keys(MMDBW_tree_s * tree,const char * const key_from,const char * const key_into,MMDBW_network_s * network,MMDBW_merge_strategy merge_strategy)1085 SV *merge_hashes_for_keys(MMDBW_tree_s *tree,
1086 const char *const key_from,
1087 const char *const key_into,
1088 MMDBW_network_s *network,
1089 MMDBW_merge_strategy merge_strategy) {
1090
1091 SV *data_from = data_for_key(tree, key_from);
1092 SV *data_into = data_for_key(tree, key_into);
1093
1094 if (!(SvROK(data_from) && SvROK(data_into) &&
1095 SvTYPE(SvRV(data_from)) == SVt_PVHV &&
1096 SvTYPE(SvRV(data_into)) == SVt_PVHV)) {
1097 /* We added key_into earlier during insert_record_for_network, so we
1098 have to make sure here that it's removed again after we decide to
1099 not actually store this network. It might be nicer to not insert
1100 anything into the tree until we're sure we really want to. */
1101 decrement_data_reference_count(tree, key_from);
1102
1103 bool is_ipv6 = tree->ip_version == 6;
1104 char address_string[is_ipv6 ? INET6_ADDRSTRLEN : INET_ADDRSTRLEN];
1105 inet_ntop(is_ipv6 ? AF_INET6 : AF_INET,
1106 network->bytes,
1107 address_string,
1108 sizeof(address_string));
1109
1110 croak("Cannot merge data records unless both records are hashes - "
1111 "inserting %s/%" PRIu8,
1112 address_string,
1113 network->prefix_length);
1114 }
1115
1116 return merge_hashes(tree, data_from, data_into, merge_strategy);
1117 }
1118
merge_hashes(MMDBW_tree_s * tree,SV * from,SV * into,MMDBW_merge_strategy merge_strategy)1119 static SV *merge_hashes(MMDBW_tree_s *tree,
1120 SV *from,
1121 SV *into,
1122 MMDBW_merge_strategy merge_strategy) {
1123 HV *hash_from = (HV *)SvRV(from);
1124 HV *hash_into = (HV *)SvRV(into);
1125 HV *hash_new = newHV();
1126
1127 merge_new_from_hash_into_hash(tree, hash_into, hash_new, false);
1128 merge_new_from_hash_into_hash(tree, hash_from, hash_new, merge_strategy);
1129
1130 return newRV_noinc((SV *)hash_new);
1131 }
1132
1133 // Note: unlike the other merge functions, this does _not_ replace existing
1134 // values.
merge_new_from_hash_into_hash(MMDBW_tree_s * tree,HV * from,HV * to,MMDBW_merge_strategy merge_strategy)1135 static void merge_new_from_hash_into_hash(MMDBW_tree_s *tree,
1136 HV *from,
1137 HV *to,
1138 MMDBW_merge_strategy merge_strategy) {
1139 (void)hv_iterinit(from);
1140 HE *he;
1141 while (NULL != (he = hv_iternext(from))) {
1142 STRLEN key_length;
1143 const char *const key = HePV(he, key_length);
1144 U32 hash = 0;
1145 SV *value = HeVAL(he);
1146 if (hv_exists(to, key, key_length)) {
1147 if (merge_strategy == MMDBW_MERGE_STRATEGY_RECURSE ||
1148 merge_strategy ==
1149 MMDBW_MERGE_STRATEGY_ADD_ONLY_IF_PARENT_EXISTS) {
1150 SV **existing_value = hv_fetch(to, key, key_length, 0);
1151 if (existing_value == NULL) {
1152 // This should never happen as we just did an hv_exists
1153 croak("Received an unexpected NULL from hv_fetch");
1154 }
1155
1156 value =
1157 merge_values(tree, value, *existing_value, merge_strategy);
1158 } else {
1159 // We are replacing the current value
1160 hash = HeHASH(he);
1161 SvREFCNT_inc_simple_void_NN(value);
1162 }
1163 } else if (merge_strategy ==
1164 MMDBW_MERGE_STRATEGY_ADD_ONLY_IF_PARENT_EXISTS &&
1165 SvROK(value)) {
1166 continue;
1167 } else {
1168 hash = HeHASH(he);
1169 SvREFCNT_inc_simple_void_NN(value);
1170 }
1171
1172 (void)hv_store(to, key, key_length, value, hash);
1173 }
1174
1175 return;
1176 }
1177
merge_values(MMDBW_tree_s * tree,SV * from,SV * into,MMDBW_merge_strategy merge_strategy)1178 static SV *merge_values(MMDBW_tree_s *tree,
1179 SV *from,
1180 SV *into,
1181 MMDBW_merge_strategy merge_strategy) {
1182 if (SvROK(from) != SvROK(into)) {
1183 croak("Attempt to merge a reference value and non-refrence value");
1184 }
1185
1186 if (!SvROK(from)) {
1187 // If the two values are scalars, we prefer the one in the hash being
1188 // inserted.
1189 SvREFCNT_inc_simple_void_NN(from);
1190 return from;
1191 }
1192
1193 if (SvTYPE(SvRV(from)) == SVt_PVHV && SvTYPE(SvRV(into)) == SVt_PVHV) {
1194 return merge_hashes(tree, from, into, merge_strategy);
1195 }
1196
1197 if (SvTYPE(SvRV(from)) == SVt_PVAV && SvTYPE(SvRV(into)) == SVt_PVAV) {
1198 return merge_arrays(tree, from, into, merge_strategy);
1199 }
1200
1201 croak("Only arrayrefs, hashrefs, and scalars can be merged.");
1202 }
1203
merge_arrays(MMDBW_tree_s * tree,SV * from,SV * into,MMDBW_merge_strategy merge_strategy)1204 static SV *merge_arrays(MMDBW_tree_s *tree,
1205 SV *from,
1206 SV *into,
1207 MMDBW_merge_strategy merge_strategy) {
1208 AV *from_array = (AV *)SvRV(from);
1209 AV *into_array = (AV *)SvRV(into);
1210
1211 // Note that av_len() is really the index of the last element. In newer
1212 // Perl versions, it is also called av_top_index() or av_tindex()
1213 SSize_t from_top_index = av_len(from_array);
1214 SSize_t into_top_index = av_len(into_array);
1215
1216 SSize_t new_top_index =
1217 from_top_index > into_top_index ? from_top_index : into_top_index;
1218
1219 AV *new_array = newAV();
1220 for (SSize_t i = 0; i <= new_top_index; i++) {
1221 SV *new_value = NULL;
1222 SV **from_value = av_fetch(from_array, i, 0);
1223 SV **into_value = av_fetch(into_array, i, 0);
1224 if (from_value != NULL && into_value != NULL) {
1225 new_value =
1226 merge_values(tree, *from_value, *into_value, merge_strategy);
1227 } else if (from_value != NULL) {
1228 if (merge_strategy ==
1229 MMDBW_MERGE_STRATEGY_ADD_ONLY_IF_PARENT_EXISTS &&
1230 SvROK(*from_value)) {
1231 break;
1232 }
1233 new_value = *from_value;
1234 SvREFCNT_inc_simple_void_NN(new_value);
1235 } else if (into_value != NULL) {
1236 new_value = *into_value;
1237 SvREFCNT_inc_simple_void_NN(new_value);
1238 } else {
1239 croak("Received unexpected NULLs when merging arrays");
1240 }
1241
1242 av_push(new_array, new_value);
1243 }
1244 return newRV_noinc((SV *)new_array);
1245 }
1246
lookup_ip_address(MMDBW_tree_s * tree,const char * const ipstr)1247 SV *lookup_ip_address(MMDBW_tree_s *tree, const char *const ipstr) {
1248 bool is_ipv6_address = NULL != strchr(ipstr, ':');
1249 if (tree->ip_version == 4 && is_ipv6_address) {
1250 return &PL_sv_undef;
1251 }
1252 MMDBW_network_s network =
1253 resolve_network(tree, ipstr, is_ipv6_address ? 128 : 32);
1254
1255 MMDBW_record_s *record_for_address;
1256 MMDBW_status status =
1257 find_record_for_network(tree, &network, &record_for_address);
1258
1259 free_network(&network);
1260
1261 if (MMDBW_SUCCESS != status) {
1262 croak("Received an unexpected NULL when looking up %s: %s",
1263 ipstr,
1264 status_error_message(status));
1265 }
1266
1267 if (record_for_address->type == MMDBW_RECORD_TYPE_NODE ||
1268 record_for_address->type == MMDBW_RECORD_TYPE_FIXED_NODE ||
1269 record_for_address->type == MMDBW_RECORD_TYPE_ALIAS) {
1270 croak("WTF - found a node or alias record for an address lookup - "
1271 "%s" PRIu8,
1272 ipstr);
1273 return &PL_sv_undef;
1274 }
1275
1276 if (record_for_address->type == MMDBW_RECORD_TYPE_EMPTY ||
1277 record_for_address->type == MMDBW_RECORD_TYPE_FIXED_EMPTY) {
1278 return &PL_sv_undef;
1279 }
1280
1281 return newSVsv(data_for_key(tree, record_for_address->value.key));
1282 }
1283
find_record_for_network(MMDBW_tree_s * tree,MMDBW_network_s * network,MMDBW_record_s ** record)1284 static MMDBW_status find_record_for_network(MMDBW_tree_s *tree,
1285 MMDBW_network_s *network,
1286 MMDBW_record_s **record) {
1287 *record = &(tree->root_record);
1288
1289 for (int current_bit = 0; current_bit < network->prefix_length;
1290 current_bit++) {
1291
1292 MMDBW_node_s *node;
1293 if ((*record)->type == MMDBW_RECORD_TYPE_NODE ||
1294 (*record)->type == MMDBW_RECORD_TYPE_FIXED_NODE ||
1295 (*record)->type == MMDBW_RECORD_TYPE_ALIAS) {
1296 node = (*record)->value.node;
1297 } else {
1298 break;
1299 }
1300
1301 if (network_bit_value(network, current_bit)) {
1302 *record = &(node->right_record);
1303 } else {
1304 *record = &(node->left_record);
1305 }
1306 }
1307
1308 return MMDBW_SUCCESS;
1309 }
1310
new_node_from_record(MMDBW_tree_s * tree,MMDBW_record_s * record)1311 static MMDBW_node_s *new_node_from_record(MMDBW_tree_s *tree,
1312 MMDBW_record_s *record) {
1313 MMDBW_node_s *node = new_node();
1314 if (record->type == MMDBW_RECORD_TYPE_DATA) {
1315 /* We only need to increment the reference count once as we are
1316 replacing the parent record */
1317 increment_data_reference_count(tree, record->value.key);
1318
1319 node->left_record.type = MMDBW_RECORD_TYPE_DATA;
1320 node->left_record.value.key = record->value.key;
1321
1322 node->right_record.type = MMDBW_RECORD_TYPE_DATA;
1323 node->right_record.value.key = record->value.key;
1324 }
1325
1326 return node;
1327 }
1328
new_node()1329 MMDBW_node_s *new_node() {
1330 MMDBW_node_s *node = checked_malloc(sizeof(MMDBW_node_s));
1331
1332 node->number = 0;
1333 node->left_record.type = node->right_record.type = MMDBW_RECORD_TYPE_EMPTY;
1334
1335 return node;
1336 }
1337
free_node_and_subnodes(MMDBW_tree_s * tree,MMDBW_node_s * node,bool remove_alias_and_fixed_nodes)1338 static MMDBW_status free_node_and_subnodes(MMDBW_tree_s *tree,
1339 MMDBW_node_s *node,
1340 bool remove_alias_and_fixed_nodes) {
1341 MMDBW_status status = free_record_value(
1342 tree, &(node->left_record), remove_alias_and_fixed_nodes);
1343 if (status != MMDBW_SUCCESS) {
1344 return status;
1345 }
1346
1347 status = free_record_value(
1348 tree, &(node->right_record), remove_alias_and_fixed_nodes);
1349 if (status != MMDBW_SUCCESS) {
1350 return status;
1351 }
1352
1353 free(node);
1354 return MMDBW_SUCCESS;
1355 }
1356
free_record_value(MMDBW_tree_s * tree,MMDBW_record_s * record,bool remove_alias_and_fixed_nodes)1357 static MMDBW_status free_record_value(MMDBW_tree_s *tree,
1358 MMDBW_record_s *record,
1359 bool remove_alias_and_fixed_nodes) {
1360 if (record->type == MMDBW_RECORD_TYPE_FIXED_NODE &&
1361 !remove_alias_and_fixed_nodes) {
1362 return MMDBW_FREED_FIXED_NODE_ERROR;
1363 }
1364
1365 if (record->type == MMDBW_RECORD_TYPE_FIXED_EMPTY &&
1366 !remove_alias_and_fixed_nodes) {
1367 return MMDBW_FREED_FIXED_EMPTY_ERROR;
1368 }
1369
1370 if (record->type == MMDBW_RECORD_TYPE_NODE ||
1371 record->type == MMDBW_RECORD_TYPE_FIXED_NODE) {
1372 return free_node_and_subnodes(
1373 tree, record->value.node, remove_alias_and_fixed_nodes);
1374 }
1375
1376 if (record->type == MMDBW_RECORD_TYPE_DATA) {
1377 decrement_data_reference_count(tree, record->value.key);
1378 }
1379
1380 /* Alias nodes should only be removed explicitly. We can't just croak
1381 as it will leave the tree in an inconsistent state causing a segfault
1382 during unwinding. */
1383 if (record->type == MMDBW_RECORD_TYPE_ALIAS &&
1384 !remove_alias_and_fixed_nodes) {
1385 return MMDBW_FREED_ALIAS_NODE_ERROR;
1386 }
1387 return MMDBW_SUCCESS;
1388 }
1389
assign_node_numbers(MMDBW_tree_s * tree)1390 void assign_node_numbers(MMDBW_tree_s *tree) {
1391 tree->node_count = 0;
1392 start_iteration(tree, false, (void *)NULL, &assign_node_number);
1393 }
1394
assign_node_number(MMDBW_tree_s * tree,MMDBW_node_s * node,uint128_t UNUSED (network),uint8_t UNUSED (depth),void * UNUSED (args))1395 static void assign_node_number(MMDBW_tree_s *tree,
1396 MMDBW_node_s *node,
1397 uint128_t UNUSED(network),
1398 uint8_t UNUSED(depth),
1399 void *UNUSED(args)) {
1400 node->number = tree->node_count++;
1401 return;
1402 }
1403
1404 /* 16 bytes for an IP address, 1 byte for the prefix length */
1405 #define FROZEN_RECORD_MAX_SIZE (16 + 1 + SHA1_KEY_LENGTH)
1406 #define FROZEN_NODE_MAX_SIZE (FROZEN_RECORD_MAX_SIZE * 2)
1407
1408 /* 17 bytes of NULLs followed by something that cannot be an SHA1 key are a
1409 clear indicator that there are no more frozen networks in the buffer. */
1410 #define SEVENTEEN_NULLS "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
1411 #define FREEZE_SEPARATOR "not an SHA1 key"
1412 /* We subtract 1 as we treat this as a sequence of bytes rather than a null
1413 terminated string. */
1414 #define FREEZE_SEPARATOR_LENGTH (sizeof(FREEZE_SEPARATOR) - 1)
1415
freeze_tree(MMDBW_tree_s * tree,char * filename,char * frozen_params,size_t frozen_params_size)1416 void freeze_tree(MMDBW_tree_s *tree,
1417 char *filename,
1418 char *frozen_params,
1419 size_t frozen_params_size) {
1420 FILE *file = fopen(filename, "wb");
1421 if (!file) {
1422 croak("Could not open file %s: %s", filename, strerror(errno));
1423 }
1424
1425 freeze_args_s args = {
1426 .file = file,
1427 .filename = filename,
1428 };
1429
1430 freeze_to_file(&args, &frozen_params_size, 4);
1431 freeze_to_file(&args, frozen_params, frozen_params_size);
1432
1433 freeze_search_tree(tree, &args);
1434
1435 freeze_to_file(&args, SEVENTEEN_NULLS, 17);
1436 freeze_to_file(&args, FREEZE_SEPARATOR, FREEZE_SEPARATOR_LENGTH);
1437
1438 freeze_data_to_file(&args, tree);
1439
1440 if (fclose(file) != 0) {
1441 croak("Could not close file %s: %s", filename, strerror(errno));
1442 }
1443
1444 /* When the hash is _freed_, Perl decrements the ref count for each value
1445 * so we don't need to mess with them. */
1446 SvREFCNT_dec((SV *)args.data_hash);
1447 }
1448
freeze_search_tree(MMDBW_tree_s * tree,freeze_args_s * args)1449 static void freeze_search_tree(MMDBW_tree_s *tree, freeze_args_s *args) {
1450 if (tree->root_record.type == MMDBW_RECORD_TYPE_DATA) {
1451 croak("A tree that only contains a data record for /0 cannot be "
1452 "frozen");
1453 }
1454
1455 if (tree->root_record.type == MMDBW_RECORD_TYPE_NODE ||
1456 tree->root_record.type == MMDBW_RECORD_TYPE_FIXED_NODE) {
1457 start_iteration(tree, false, (void *)args, &freeze_node);
1458 return;
1459 }
1460
1461 croak("Unexected root record type when freezing tree: %s",
1462 record_type_name(tree->root_record.type));
1463 }
1464
freeze_node(MMDBW_tree_s * tree,MMDBW_node_s * node,uint128_t network,uint8_t depth,void * void_args)1465 static void freeze_node(MMDBW_tree_s *tree,
1466 MMDBW_node_s *node,
1467 uint128_t network,
1468 uint8_t depth,
1469 void *void_args) {
1470 freeze_args_s *args = (freeze_args_s *)void_args;
1471
1472 const uint8_t next_depth = depth + 1;
1473
1474 if (node->left_record.type == MMDBW_RECORD_TYPE_DATA) {
1475 freeze_data_record(
1476 tree, network, next_depth, node->left_record.value.key, args);
1477 }
1478
1479 if (node->right_record.type == MMDBW_RECORD_TYPE_DATA) {
1480 uint128_t right_network = flip_network_bit(tree, network, depth);
1481 freeze_data_record(tree,
1482 right_network,
1483 next_depth,
1484 node->right_record.value.key,
1485 args);
1486 }
1487 }
1488
freeze_data_record(MMDBW_tree_s * UNUSED (tree),uint128_t network,uint8_t depth,const char * key,freeze_args_s * args)1489 static void freeze_data_record(MMDBW_tree_s *UNUSED(tree),
1490 uint128_t network,
1491 uint8_t depth,
1492 const char *key,
1493 freeze_args_s *args) {
1494 /* It'd save some space to shrink this to 4 bytes for IPv4-only trees, but
1495 * that would also complicated thawing quite a bit. */
1496 freeze_to_file(args, &network, 16);
1497 freeze_to_file(args, &(depth), 1);
1498 freeze_to_file(args, (char *)key, SHA1_KEY_LENGTH);
1499 }
1500
freeze_to_file(freeze_args_s * args,void * data,size_t size)1501 static void freeze_to_file(freeze_args_s *args, void *data, size_t size) {
1502 checked_fwrite(args->file, args->filename, data, size);
1503 }
1504
freeze_data_to_file(freeze_args_s * args,MMDBW_tree_s * tree)1505 static void freeze_data_to_file(freeze_args_s *args, MMDBW_tree_s *tree) {
1506 HV *data_hash = newHV();
1507
1508 MMDBW_data_hash_s *item, *tmp;
1509 HASH_ITER(hh, tree->data_table, item, tmp) {
1510 SvREFCNT_inc_simple_void_NN(item->data_sv);
1511 (void)hv_store(data_hash, item->key, SHA1_KEY_LENGTH, item->data_sv, 0);
1512 }
1513
1514 SV *frozen_data = freeze_hash(data_hash);
1515 STRLEN frozen_data_size;
1516 char *frozen_data_chars = SvPV(frozen_data, frozen_data_size);
1517
1518 checked_fwrite(
1519 args->file, args->filename, &frozen_data_size, sizeof(STRLEN));
1520 checked_fwrite(
1521 args->file, args->filename, frozen_data_chars, frozen_data_size);
1522
1523 SvREFCNT_dec(frozen_data);
1524 SvREFCNT_dec((SV *)data_hash);
1525 }
1526
freeze_hash(HV * hash)1527 static SV *freeze_hash(HV *hash) {
1528 dSP;
1529 ENTER;
1530 SAVETMPS;
1531
1532 SV *hashref = sv_2mortal(newRV_inc((SV *)hash));
1533
1534 PUSHMARK(SP);
1535 EXTEND(SP, 1);
1536 PUSHs(hashref);
1537 PUTBACK;
1538
1539 int count = call_pv("Sereal::Encoder::encode_sereal", G_SCALAR);
1540
1541 SPAGAIN;
1542
1543 if (count != 1) {
1544 croak("Expected 1 item back from Sereal::Encoder::encode_sereal call");
1545 }
1546
1547 SV *frozen = POPs;
1548 if (!SvPOK(frozen)) {
1549 croak("The Sereal::Encoder::encode_sereal sub returned an SV which is "
1550 "not SvPOK!");
1551 }
1552
1553 /* The SV will be mortal so it's about to lose a ref with the FREETMPS
1554 call below. */
1555 SvREFCNT_inc_simple_void_NN(frozen);
1556
1557 PUTBACK;
1558 FREETMPS;
1559 LEAVE;
1560
1561 return frozen;
1562 }
1563
thaw_tree(char * filename,uint32_t initial_offset,uint8_t ip_version,uint8_t record_size,MMDBW_merge_strategy merge_strategy,const bool alias_ipv6,const bool remove_reserved_networks)1564 MMDBW_tree_s *thaw_tree(char *filename,
1565 uint32_t initial_offset,
1566 uint8_t ip_version,
1567 uint8_t record_size,
1568 MMDBW_merge_strategy merge_strategy,
1569 const bool alias_ipv6,
1570 const bool remove_reserved_networks) {
1571 #ifdef WIN32
1572 int fd = open(filename, O_RDONLY);
1573 #else
1574 int fd = open(filename, O_RDONLY, 0);
1575 #endif
1576 if (fd == -1) {
1577 croak("Could not open file %s: %s", filename, strerror(errno));
1578 }
1579
1580 struct stat fileinfo;
1581 if (fstat(fd, &fileinfo) == -1) {
1582 close(fd);
1583 croak("Could not stat file: %s: %s", filename, strerror(errno));
1584 }
1585
1586 uint8_t *buffer =
1587 (uint8_t *)mmap(NULL, fileinfo.st_size, PROT_READ, MAP_SHARED, fd, 0);
1588 close(fd);
1589
1590 buffer += initial_offset;
1591
1592 MMDBW_tree_s *tree = new_tree(ip_version,
1593 record_size,
1594 merge_strategy,
1595 alias_ipv6,
1596 remove_reserved_networks);
1597
1598 thawed_network_s *thawed;
1599 while (NULL != (thawed = thaw_network(tree, &buffer))) {
1600 // We should never need to merge when thawing a tree.
1601 MMDBW_status status =
1602 insert_record_for_network(tree,
1603 thawed->network,
1604 thawed->record,
1605 MMDBW_MERGE_STRATEGY_NONE,
1606 true);
1607 free_network(thawed->network);
1608 free(thawed->network);
1609 if (thawed->record->type == MMDBW_RECORD_TYPE_DATA) {
1610 free((char *)thawed->record->value.key);
1611 }
1612 free(thawed->record);
1613 free(thawed);
1614 if (status != MMDBW_SUCCESS) {
1615 croak(status_error_message(status));
1616 }
1617 }
1618
1619 STRLEN frozen_data_size = thaw_strlen(&buffer);
1620
1621 /* per perlapi newSVpvn copies the string */
1622 SV *data_to_decode = sv_2mortal(newSVpvn((char *)buffer, frozen_data_size));
1623 HV *data_hash = thaw_data_hash(data_to_decode);
1624
1625 hv_iterinit(data_hash);
1626 char *key;
1627 I32 keylen;
1628 SV *value;
1629 while (NULL != (value = hv_iternextsv(data_hash, &key, &keylen))) {
1630 set_stored_data_in_tree(tree, key, value);
1631 }
1632
1633 SvREFCNT_dec((SV *)data_hash);
1634
1635 return tree;
1636 }
1637
thaw_uint8(uint8_t ** buffer)1638 static uint8_t thaw_uint8(uint8_t **buffer) {
1639 uint8_t value;
1640 memcpy(&value, *buffer, 1);
1641 *buffer += 1;
1642 return value;
1643 }
1644
thaw_network(MMDBW_tree_s * tree,uint8_t ** buffer)1645 static thawed_network_s *thaw_network(MMDBW_tree_s *tree, uint8_t **buffer) {
1646 uint128_t start_ip = thaw_uint128(buffer);
1647 uint8_t prefix_length = thaw_uint8(buffer);
1648
1649 if (start_ip == 0 && prefix_length == 0) {
1650 uint8_t *maybe_separator = thaw_bytes(buffer, FREEZE_SEPARATOR_LENGTH);
1651 if (memcmp(maybe_separator,
1652 FREEZE_SEPARATOR,
1653 FREEZE_SEPARATOR_LENGTH) == 0) {
1654
1655 free(maybe_separator);
1656 return NULL;
1657 }
1658
1659 croak("Found a ::0/0 network but that should never happen!");
1660 }
1661
1662 uint8_t *start_ip_bytes = (uint8_t *)&start_ip;
1663 uint8_t temp;
1664 for (int i = 0; i < 8; i++) {
1665 temp = start_ip_bytes[i];
1666 start_ip_bytes[i] = start_ip_bytes[15 - i];
1667 start_ip_bytes[15 - i] = temp;
1668 }
1669
1670 thawed_network_s *thawed = checked_malloc(sizeof(thawed_network_s));
1671
1672 uint8_t *bytes;
1673 if (tree->ip_version == 4) {
1674 bytes = checked_malloc(4);
1675 memcpy(bytes, start_ip_bytes + 12, 4);
1676 } else {
1677 bytes = checked_malloc(16);
1678 memcpy(bytes, &start_ip, 16);
1679 }
1680
1681 MMDBW_network_s network = {
1682 .bytes = bytes,
1683 .prefix_length = prefix_length,
1684 };
1685
1686 thawed->network = checked_malloc(sizeof(MMDBW_network_s));
1687 memcpy(thawed->network, &network, sizeof(MMDBW_network_s));
1688
1689 MMDBW_record_s *record = checked_malloc(sizeof(MMDBW_record_s));
1690 record->type = MMDBW_RECORD_TYPE_DATA;
1691
1692 record->value.key = thaw_data_key(buffer);
1693
1694 thawed->record = record;
1695
1696 return thawed;
1697 }
1698
thaw_bytes(uint8_t ** buffer,size_t size)1699 static uint8_t *thaw_bytes(uint8_t **buffer, size_t size) {
1700 uint8_t *value = checked_malloc(size);
1701 memcpy(value, *buffer, size);
1702 *buffer += size;
1703 return value;
1704 }
1705
thaw_uint128(uint8_t ** buffer)1706 static uint128_t thaw_uint128(uint8_t **buffer) {
1707 uint128_t value;
1708 memcpy(&value, *buffer, 16);
1709 *buffer += 16;
1710 return value;
1711 }
1712
thaw_strlen(uint8_t ** buffer)1713 static STRLEN thaw_strlen(uint8_t **buffer) {
1714 STRLEN value;
1715 memcpy(&value, *buffer, sizeof(STRLEN));
1716 *buffer += sizeof(STRLEN);
1717 return value;
1718 }
1719
thaw_data_key(uint8_t ** buffer)1720 static const char *thaw_data_key(uint8_t **buffer) {
1721 char *value = checked_malloc(SHA1_KEY_LENGTH + 1);
1722 memcpy(value, *buffer, SHA1_KEY_LENGTH);
1723 *buffer += SHA1_KEY_LENGTH;
1724 value[SHA1_KEY_LENGTH] = '\0';
1725 return (const char *)value;
1726 }
1727
thaw_data_hash(SV * data_to_decode)1728 static HV *thaw_data_hash(SV *data_to_decode) {
1729 dSP;
1730 ENTER;
1731 SAVETMPS;
1732
1733 PUSHMARK(SP);
1734 EXTEND(SP, 1);
1735 PUSHs(data_to_decode);
1736 PUTBACK;
1737
1738 int count = call_pv("Sereal::Decoder::decode_sereal", G_SCALAR);
1739
1740 SPAGAIN;
1741
1742 if (count != 1) {
1743 croak("Expected 1 item back from Sereal::Decoder::decode_sereal call");
1744 }
1745
1746 SV *thawed = POPs;
1747 if (!SvROK(thawed)) {
1748 croak("The Sereal::Decoder::decode_sereal sub returned an SV which is "
1749 "not SvROK!");
1750 }
1751
1752 SV *data_hash = SvREFCNT_inc_simple_NN(SvRV(thawed));
1753
1754 PUTBACK;
1755 FREETMPS;
1756 LEAVE;
1757
1758 return (HV *)data_hash;
1759 }
1760
write_search_tree(MMDBW_tree_s * tree,SV * output,SV * root_data_type,SV * serializer)1761 void write_search_tree(MMDBW_tree_s *tree,
1762 SV *output,
1763 SV *root_data_type,
1764 SV *serializer) {
1765 assign_node_numbers(tree);
1766
1767 /* This is a gross way to get around the fact that with C function
1768 * pointers we can't easily pass different params to different
1769 * callbacks. */
1770 encode_args_s args = {.output_io = IoOFP(sv_2io(output)),
1771 .root_data_type = root_data_type,
1772 .serializer = serializer,
1773 .data_pointer_cache = newHV()};
1774
1775 start_iteration(tree, false, (void *)&args, &encode_node);
1776
1777 /* When the hash is _freed_, Perl decrements the ref count for each value
1778 * so we don't need to mess with them. */
1779 SvREFCNT_dec((SV *)args.data_pointer_cache);
1780
1781 return;
1782 }
1783
encode_node(MMDBW_tree_s * tree,MMDBW_node_s * node,uint128_t UNUSED (network),uint8_t UNUSED (depth),void * void_args)1784 static void encode_node(MMDBW_tree_s *tree,
1785 MMDBW_node_s *node,
1786 uint128_t UNUSED(network),
1787 uint8_t UNUSED(depth),
1788 void *void_args) {
1789 encode_args_s *args = (encode_args_s *)void_args;
1790
1791 check_record_sanity(node, &(node->left_record), "left");
1792 check_record_sanity(node, &(node->right_record), "right");
1793
1794 uint32_t left =
1795 htonl(record_value_as_number(tree, &(node->left_record), args));
1796 uint32_t right =
1797 htonl(record_value_as_number(tree, &(node->right_record), args));
1798
1799 uint8_t *left_bytes = (uint8_t *)&left;
1800 uint8_t *right_bytes = (uint8_t *)&right;
1801
1802 if (tree->record_size == 24) {
1803 check_perlio_result(PerlIO_printf(args->output_io,
1804 "%c%c%c%c%c%c",
1805 left_bytes[1],
1806 left_bytes[2],
1807 left_bytes[3],
1808 right_bytes[1],
1809 right_bytes[2],
1810 right_bytes[3]),
1811 6,
1812 "PerlIO_printf");
1813 } else if (tree->record_size == 28) {
1814 check_perlio_result(
1815 PerlIO_printf(args->output_io,
1816 "%c%c%c%c%c%c%c",
1817 left_bytes[1],
1818 left_bytes[2],
1819 left_bytes[3],
1820 (left_bytes[0] << 4) | (right_bytes[0] & 15),
1821 right_bytes[1],
1822 right_bytes[2],
1823 right_bytes[3]),
1824 7,
1825 "PerlIO_printf");
1826 } else {
1827 check_perlio_result(PerlIO_printf(args->output_io,
1828 "%c%c%c%c%c%c%c%c",
1829 left_bytes[0],
1830 left_bytes[1],
1831 left_bytes[2],
1832 left_bytes[3],
1833 right_bytes[0],
1834 right_bytes[1],
1835 right_bytes[2],
1836 right_bytes[3]),
1837 8,
1838 "PerlIO_printf");
1839 }
1840 }
1841
1842 /* Note that for data records, we will ensure that the key they contain does
1843 * match a data record in the record_value_as_number() subroutine. */
1844 static void
check_record_sanity(MMDBW_node_s * node,MMDBW_record_s * record,char * side)1845 check_record_sanity(MMDBW_node_s *node, MMDBW_record_s *record, char *side) {
1846 if (record->type == MMDBW_RECORD_TYPE_NODE ||
1847 record->type == MMDBW_RECORD_TYPE_FIXED_NODE) {
1848 if (record->value.node->number == node->number) {
1849 croak("%s record of node %" PRIu32 " points to the same node",
1850 side,
1851 node->number);
1852 }
1853
1854 if (record->value.node->number < node->number) {
1855 croak("%s record of node %" PRIu32
1856 " points to a node number (%" PRIu32 ")",
1857 side,
1858 node->number,
1859 record->value.node->number);
1860 }
1861 }
1862
1863 // This is a simple check that we aren't pointing at the tree root.
1864 if (record->type == MMDBW_RECORD_TYPE_ALIAS) {
1865 if (record->value.node->number == 0) {
1866 croak("%s record of node %" PRIu32 " is an alias to node 0",
1867 side,
1868 node->number);
1869 }
1870 }
1871 }
1872
record_value_as_number(MMDBW_tree_s * tree,MMDBW_record_s * record,encode_args_s * args)1873 static uint32_t record_value_as_number(MMDBW_tree_s *tree,
1874 MMDBW_record_s *record,
1875 encode_args_s *args) {
1876 uint32_t record_value = 0;
1877
1878 switch (record->type) {
1879 case MMDBW_RECORD_TYPE_FIXED_EMPTY:
1880 case MMDBW_RECORD_TYPE_EMPTY: {
1881 // Say that the IP isn't here.
1882 record_value = tree->node_count;
1883 break;
1884 }
1885 case MMDBW_RECORD_TYPE_NODE:
1886 case MMDBW_RECORD_TYPE_ALIAS:
1887 case MMDBW_RECORD_TYPE_FIXED_NODE: {
1888 record_value = record->value.node->number;
1889 break;
1890 }
1891 case MMDBW_RECORD_TYPE_DATA: {
1892 SV **cache_record = hv_fetch(args->data_pointer_cache,
1893 record->value.key,
1894 SHA1_KEY_LENGTH,
1895 0);
1896 if (cache_record) {
1897 /* It is ok to return this without the size check below as it
1898 would have already croaked when it was inserted if it was too
1899 big. */
1900 return SvIV(*cache_record);
1901 }
1902
1903 SV *data = newSVsv(data_for_key(tree, record->value.key));
1904 if (!SvOK(data)) {
1905 croak("No data associated with key - %s", record->value.key);
1906 }
1907
1908 dSP;
1909 ENTER;
1910 SAVETMPS;
1911
1912 PUSHMARK(SP);
1913 EXTEND(SP, 5);
1914 PUSHs(args->serializer);
1915 PUSHs(args->root_data_type);
1916 mPUSHs(data);
1917 PUSHs(&PL_sv_undef);
1918 mPUSHp(record->value.key, strlen(record->value.key));
1919 PUTBACK;
1920
1921 int count = call_method("store_data", G_SCALAR);
1922
1923 SPAGAIN;
1924
1925 if (count != 1) {
1926 croak("Expected 1 item back from ->store_data() call");
1927 }
1928
1929 SV *rval = POPs;
1930 if (!(SvIOK(rval) || SvUOK(rval))) {
1931 croak("The serializer's store_data() method returned an SV "
1932 "which is not SvIOK or SvUOK!");
1933 }
1934 uint32_t position = (uint32_t)SvUV(rval);
1935
1936 PUTBACK;
1937 FREETMPS;
1938 LEAVE;
1939
1940 record_value =
1941 position + tree->node_count + DATA_SECTION_SEPARATOR_SIZE;
1942
1943 SV *value = newSViv(record_value);
1944 (void)hv_store(args->data_pointer_cache,
1945 record->value.key,
1946 SHA1_KEY_LENGTH,
1947 value,
1948 0);
1949 break;
1950 }
1951 }
1952
1953 if (record_value > max_record_value(tree)) {
1954 croak("Node value of %" PRIu32 " exceeds the record size of %" PRIu8
1955 " bits",
1956 record_value,
1957 tree->record_size);
1958 }
1959
1960 return record_value;
1961 }
1962
max_record_value(MMDBW_tree_s * tree)1963 uint32_t max_record_value(MMDBW_tree_s *tree) {
1964 uint8_t record_size = tree->record_size;
1965 return record_size == 32 ? UINT32_MAX : (uint32_t)(1 << record_size) - 1;
1966 }
1967
start_iteration(MMDBW_tree_s * tree,bool depth_first,void * args,MMDBW_iterator_callback callback)1968 void start_iteration(MMDBW_tree_s *tree,
1969 bool depth_first,
1970 void *args,
1971 MMDBW_iterator_callback callback) {
1972 uint128_t network = 0;
1973 uint8_t depth = 0;
1974
1975 // We disallow this as the callback is based on nodes rather than records,
1976 // and changing that is a rabbit hole that I don't want to go down
1977 // currently. (I stuck my head in and regretted it.)
1978 if (MMDBW_RECORD_TYPE_NODE != tree->root_record.type &&
1979 MMDBW_RECORD_TYPE_FIXED_NODE != tree->root_record.type) {
1980 croak("Iteration is not currently allowed in trees with no nodes. "
1981 "Record type: %s",
1982 record_type_name(tree->root_record.type));
1983 }
1984
1985 iterate_tree(
1986 tree, &tree->root_record, network, depth, depth_first, args, callback);
1987
1988 return;
1989 }
1990
iterate_tree(MMDBW_tree_s * tree,MMDBW_record_s * record,uint128_t network,const uint8_t depth,bool depth_first,void * args,MMDBW_iterator_callback callback)1991 static void iterate_tree(MMDBW_tree_s *tree,
1992 MMDBW_record_s *record,
1993 uint128_t network,
1994 const uint8_t depth,
1995 bool depth_first,
1996 void *args,
1997 MMDBW_iterator_callback callback) {
1998 if (depth > tree_depth0(tree) + 1) {
1999 char ip[INET6_ADDRSTRLEN];
2000 integer_to_ip_string(tree->ip_version, network, ip, sizeof(ip));
2001 croak("Depth during iteration is greater than 127 (depth: %u, "
2002 "start IP: %s)! The tree is wonky.\n",
2003 depth,
2004 ip);
2005 }
2006
2007 if (record->type == MMDBW_RECORD_TYPE_NODE ||
2008 record->type == MMDBW_RECORD_TYPE_FIXED_NODE) {
2009 MMDBW_node_s *node = record->value.node;
2010
2011 if (!depth_first) {
2012 callback(tree, node, network, depth, args);
2013 }
2014
2015 iterate_tree(tree,
2016 &node->left_record,
2017 network,
2018 depth + 1,
2019 depth_first,
2020 args,
2021 callback);
2022
2023 if (depth_first) {
2024 callback(tree, node, network, depth, args);
2025 }
2026
2027 iterate_tree(tree,
2028 &node->right_record,
2029 flip_network_bit(tree, network, depth),
2030 depth + 1,
2031 depth_first,
2032 args,
2033 callback);
2034 }
2035 }
2036
2037 uint128_t
flip_network_bit(MMDBW_tree_s * tree,uint128_t network,uint8_t depth)2038 flip_network_bit(MMDBW_tree_s *tree, uint128_t network, uint8_t depth) {
2039 return network | ((uint128_t)1 << (tree_depth0(tree) - depth));
2040 }
2041
key_for_data(SV * data)2042 static SV *key_for_data(SV *data) {
2043 dSP;
2044 ENTER;
2045 SAVETMPS;
2046
2047 PUSHMARK(SP);
2048 EXTEND(SP, 1);
2049 PUSHs(data);
2050 PUTBACK;
2051
2052 const char *const sub = "MaxMind::DB::Writer::Util::key_for_data";
2053 int count = call_pv(sub, G_SCALAR);
2054
2055 SPAGAIN;
2056
2057 if (count != 1) {
2058 croak("Expected 1 item back from %s() call", sub);
2059 }
2060
2061 SV *key = POPs;
2062 SvREFCNT_inc_simple_void_NN(key);
2063
2064 PUTBACK;
2065 FREETMPS;
2066 LEAVE;
2067
2068 return key;
2069 }
2070
data_for_key(MMDBW_tree_s * tree,const char * const key)2071 SV *data_for_key(MMDBW_tree_s *tree, const char *const key) {
2072 MMDBW_data_hash_s *data = NULL;
2073 HASH_FIND(hh, tree->data_table, key, strlen(key), data);
2074
2075 if (NULL != data) {
2076 return data->data_sv;
2077 } else {
2078 return &PL_sv_undef;
2079 }
2080 }
2081
merge_cache_lookup(MMDBW_tree_s * tree,char * merge_cache_key)2082 static const char *merge_cache_lookup(MMDBW_tree_s *tree,
2083 char *merge_cache_key) {
2084 MMDBW_merge_cache_s *cache = NULL;
2085 HASH_FIND(hh, tree->merge_cache, merge_cache_key, MERGE_KEY_SIZE, cache);
2086
2087 if (!cache) {
2088 return NULL;
2089 }
2090
2091 // We have to check that the value has not been removed from the data
2092 // table
2093 MMDBW_data_hash_s *data = NULL;
2094 HASH_FIND(hh, tree->data_table, cache->value, SHA1_KEY_LENGTH, data);
2095 if (data != NULL) {
2096 return cache->value;
2097 }
2098
2099 // Item has been removed from data table. Remove the cached merge too.
2100 HASH_DEL(tree->merge_cache, cache);
2101 return NULL;
2102 }
2103
store_in_merge_cache(MMDBW_tree_s * tree,char * merge_cache_key,const char * const new_key)2104 static void store_in_merge_cache(MMDBW_tree_s *tree,
2105 char *merge_cache_key,
2106 const char *const new_key) {
2107 MMDBW_merge_cache_s *data = checked_malloc(sizeof(MMDBW_merge_cache_s));
2108 data->value = checked_malloc(SHA1_KEY_LENGTH + 1);
2109 strncpy((char *)data->value, new_key, SHA1_KEY_LENGTH + 1);
2110
2111 data->key = checked_malloc(MERGE_KEY_SIZE + 1);
2112 strncpy((char *)data->key, merge_cache_key, MERGE_KEY_SIZE + 1);
2113
2114 HASH_ADD_KEYPTR(hh, tree->merge_cache, data->key, MERGE_KEY_SIZE, data);
2115 }
2116
free_tree(MMDBW_tree_s * tree)2117 void free_tree(MMDBW_tree_s *tree) {
2118 free_record_value(tree, &tree->root_record, true);
2119 free_merge_cache(tree);
2120
2121 int hash_count = HASH_COUNT(tree->data_table);
2122 if (0 != hash_count) {
2123 croak("%d elements left in data table after freeing all nodes!",
2124 hash_count);
2125 }
2126
2127 free(tree);
2128 }
2129
free_merge_cache(MMDBW_tree_s * tree)2130 void free_merge_cache(MMDBW_tree_s *tree) {
2131 MMDBW_merge_cache_s *cache, *tmp = NULL;
2132 HASH_ITER(hh, tree->merge_cache, cache, tmp) {
2133 HASH_DEL(tree->merge_cache, cache);
2134 free((void *)cache->key);
2135 free((void *)cache->value);
2136 free(cache);
2137 }
2138 }
2139
checked_malloc(size_t size)2140 static void *checked_malloc(size_t size) {
2141 void *ptr = malloc(size);
2142 if (!ptr) {
2143 abort();
2144 }
2145
2146 return ptr;
2147 }
2148
2149 static void
checked_fwrite(FILE * file,char * filename,void * buffer,size_t count)2150 checked_fwrite(FILE *file, char *filename, void *buffer, size_t count) {
2151 size_t result = fwrite(buffer, 1, count, file);
2152 if (result != count) {
2153 fclose(file);
2154 croak("Write to %s did not write the expected amount of data (wrote "
2155 "%zu instead of %zu): %s",
2156 filename,
2157 result,
2158 count,
2159 strerror(errno));
2160 }
2161 }
2162
check_perlio_result(SSize_t result,SSize_t expected,char * op)2163 static void check_perlio_result(SSize_t result, SSize_t expected, char *op) {
2164 if (result < 0) {
2165 croak("%s operation failed: %s\n", op, strerror(result));
2166 } else if (result != expected) {
2167 croak("%s operation wrote %zd bytes when we expected to write %zd",
2168 op,
2169 result,
2170 expected);
2171 }
2172 }
2173
status_error_message(MMDBW_status status)2174 static char *status_error_message(MMDBW_status status) {
2175 switch (status) {
2176 case MMDBW_SUCCESS:
2177 return "Success";
2178 case MMDBW_INSERT_INTO_ALIAS_NODE_ERROR:
2179 return "Attempted to insert into an aliased network.";
2180 case MMDBW_INSERT_INVALID_RECORD_TYPE_ERROR:
2181 return "Invalid record type given to insert.";
2182 case MMDBW_FREED_ALIAS_NODE_ERROR:
2183 return "Attempted to free an IPv4 alias node. Did you try to "
2184 "overwrite an alias network?";
2185 case MMDBW_FREED_FIXED_EMPTY_ERROR:
2186 return "Attempted to free a fixed empty node. This should never "
2187 "happen.";
2188 case MMDBW_FREED_FIXED_NODE_ERROR:
2189 return "Attempted to free a fixed node. This should never happen.";
2190 case MMDBW_ALIAS_OVERWRITE_ATTEMPT_ERROR:
2191 return "Attempted to overwrite an aliased network.";
2192 case MMDBW_FIXED_NODE_OVERWRITE_ATTEMPT_ERROR:
2193 return "Attempted to overwrite a fixed node.";
2194 case MMDBW_RESOLVING_IP_ERROR:
2195 return "Failed to resolve IP address.";
2196 }
2197 // We should get a compile time warning if an enum is missing
2198 return "Unknown error";
2199 }
2200
record_type_name(MMDBW_record_type type)2201 static const char *record_type_name(MMDBW_record_type type) {
2202 switch (type) {
2203 case MMDBW_RECORD_TYPE_EMPTY:
2204 return "empty";
2205 case MMDBW_RECORD_TYPE_FIXED_EMPTY:
2206 return "fixed_empty";
2207 case MMDBW_RECORD_TYPE_DATA:
2208 return "data";
2209 case MMDBW_RECORD_TYPE_NODE:
2210 return "node";
2211 case MMDBW_RECORD_TYPE_FIXED_NODE:
2212 return "fixed_node";
2213 case MMDBW_RECORD_TYPE_ALIAS:
2214 return "alias";
2215 }
2216 return "unknown type";
2217 }
2218