1 #include "tree.h"
2 
3 #ifndef WIN32
4 #include <sys/mman.h>
5 #else
6 #include "windows_mman.h"
7 #endif
8 
9 #include <sys/socket.h>
10 #include <sys/stat.h>
11 #include <sys/types.h>
12 
13 #include <errno.h>
14 #include <fcntl.h>
15 #include <inttypes.h>
16 #include <netdb.h>
17 #include <stdbool.h>
18 #include <stdio.h>
19 
20 #ifdef __GNUC__
21 #define UNUSED(x) UNUSED_##x __attribute__((__unused__))
22 #else
23 #define UNUSED(x) UNUSED_##x
24 #endif
25 
26 /* This is also defined in MaxMind::DB::Common but we don't want to have to
27  * fetch it every time we need it. */
28 #define DATA_SECTION_SEPARATOR_SIZE (16)
29 
30 #define SHA1_KEY_LENGTH (27)
31 
32 #define MERGE_KEY_SIZE (57)
33 
34 typedef struct freeze_args_s {
35     FILE *file;
36     char *filename;
37     HV *data_hash;
38 } freeze_args_s;
39 
40 typedef struct thawed_network_s {
41     MMDBW_network_s *network;
42     MMDBW_record_s *record;
43 } thawed_network_s;
44 
45 typedef struct encode_args_s {
46     PerlIO *output_io;
47     SV *root_data_type;
48     SV *serializer;
49     HV *data_pointer_cache;
50 } encode_args_s;
51 
52 struct network {
53     const char *const ipstr;
54     const uint8_t prefix_length;
55 };
56 
57 static void verify_ip(MMDBW_tree_s *tree, const char *ipstr);
58 static int128_t ip_string_to_integer(const char *ipstr, int family);
59 static int128_t ip_bytes_to_integer(uint8_t *bytes, int family);
60 static void
61 integer_to_ip_bytes(int tree_ip_version, uint128_t ip, uint8_t *bytes);
62 static void integer_to_ip_string(int tree_ip_version,
63                                  uint128_t ip,
64                                  char *dst,
65                                  int dst_length);
66 static int prefix_length_for_largest_subnet(uint128_t start_ip,
67                                             uint128_t end_ip,
68                                             int family,
69                                             uint128_t *reverse_mask);
70 static const char *
71 store_data_in_tree(MMDBW_tree_s *tree, const char *const key, SV *data_sv);
72 static const char *increment_data_reference_count(MMDBW_tree_s *tree,
73                                                   const char *const key);
74 static void
75 set_stored_data_in_tree(MMDBW_tree_s *tree, const char *const key, SV *data_sv);
76 static void decrement_data_reference_count(MMDBW_tree_s *tree,
77                                            const char *const key);
78 static MMDBW_network_s resolve_network(MMDBW_tree_s *tree,
79                                        const char *const ipstr,
80                                        uint8_t prefix_length);
81 static MMDBW_status
82 resolve_ip(int tree_ip_version, const char *const ipstr, uint8_t *bytes);
83 static void free_network(MMDBW_network_s *network);
84 static void alias_ipv4_networks(MMDBW_tree_s *tree);
85 static MMDBW_status insert_reserved_networks_as_fixed_empty(MMDBW_tree_s *tree);
86 static MMDBW_status
87 insert_networks_as_fixed_empty(MMDBW_tree_s *tree,
88                                struct network const *const networks,
89                                const size_t num_networks);
90 static MMDBW_status
91 insert_record_for_network(MMDBW_tree_s *tree,
92                           MMDBW_network_s *network,
93                           MMDBW_record_s *new_record,
94                           MMDBW_merge_strategy merge_strategy,
95                           bool is_internal_insert);
96 static MMDBW_status
97 insert_record_into_next_node(MMDBW_tree_s *tree,
98                              MMDBW_record_s *current_record,
99                              MMDBW_network_s *network,
100                              int current_bit,
101                              MMDBW_record_s *new_record,
102                              MMDBW_merge_strategy merge_strategy,
103                              bool is_internal_insert);
104 static MMDBW_status
105 insert_record_into_current_record(MMDBW_tree_s *tree,
106                                   MMDBW_record_s *current_record,
107                                   MMDBW_network_s *network,
108                                   MMDBW_record_s *new_record,
109                                   MMDBW_merge_strategy merge_strategy);
110 static const char *maybe_merge_records(MMDBW_tree_s *tree,
111                                        MMDBW_network_s *network,
112                                        MMDBW_record_s *new_record,
113                                        MMDBW_record_s *record_to_set,
114                                        MMDBW_merge_strategy merge_strategy);
115 static int network_bit_value(MMDBW_network_s *network, uint8_t current_bit);
116 static int tree_depth0(MMDBW_tree_s *tree);
117 static SV *merge_hashes(MMDBW_tree_s *tree,
118                         SV *from,
119                         SV *into,
120                         MMDBW_merge_strategy merge_strategy);
121 static void merge_new_from_hash_into_hash(MMDBW_tree_s *tree,
122                                           HV *from,
123                                           HV *to,
124                                           MMDBW_merge_strategy merge_strategy);
125 static SV *merge_values(MMDBW_tree_s *tree,
126                         SV *from,
127                         SV *into,
128                         MMDBW_merge_strategy merge_strategy);
129 static SV *merge_arrays(MMDBW_tree_s *tree,
130                         SV *from,
131                         SV *into,
132                         MMDBW_merge_strategy merge_strategy);
133 static MMDBW_status find_record_for_network(MMDBW_tree_s *tree,
134                                             MMDBW_network_s *network,
135                                             MMDBW_record_s **record);
136 static MMDBW_node_s *new_node_from_record(MMDBW_tree_s *tree,
137                                           MMDBW_record_s *record);
138 static MMDBW_status free_node_and_subnodes(MMDBW_tree_s *tree,
139                                            MMDBW_node_s *node,
140                                            bool remove_alias_and_fixed_nodes);
141 static MMDBW_status free_record_value(MMDBW_tree_s *tree,
142                                       MMDBW_record_s *record,
143                                       bool remove_alias_and_fixed_nodes);
144 static void assign_node_number(MMDBW_tree_s *tree,
145                                MMDBW_node_s *node,
146                                uint128_t UNUSED(network),
147                                uint8_t UNUSED(depth),
148                                void *UNUSED(args));
149 static void freeze_search_tree(MMDBW_tree_s *tree, freeze_args_s *args);
150 static void freeze_node(MMDBW_tree_s *tree,
151                         MMDBW_node_s *node,
152                         uint128_t network,
153                         uint8_t depth,
154                         void *void_args);
155 static void freeze_data_record(MMDBW_tree_s *UNUSED(tree),
156                                uint128_t network,
157                                uint8_t depth,
158                                const char *key,
159                                freeze_args_s *args);
160 static void freeze_to_file(freeze_args_s *args, void *data, size_t size);
161 static void freeze_data_to_file(freeze_args_s *args, MMDBW_tree_s *tree);
162 static SV *freeze_hash(HV *hash);
163 static uint8_t thaw_uint8(uint8_t **buffer);
164 static thawed_network_s *thaw_network(MMDBW_tree_s *tree, uint8_t **buffer);
165 static uint8_t *thaw_bytes(uint8_t **buffer, size_t size);
166 static uint128_t thaw_uint128(uint8_t **buffer);
167 static STRLEN thaw_strlen(uint8_t **buffer);
168 static const char *thaw_data_key(uint8_t **buffer);
169 static HV *thaw_data_hash(SV *data_to_decode);
170 static void encode_node(MMDBW_tree_s *tree,
171                         MMDBW_node_s *node,
172                         uint128_t UNUSED(network),
173                         uint8_t UNUSED(depth),
174                         void *void_args);
175 static void
176 check_record_sanity(MMDBW_node_s *node, MMDBW_record_s *record, char *side);
177 static uint32_t record_value_as_number(MMDBW_tree_s *tree,
178                                        MMDBW_record_s *record,
179                                        encode_args_s *args);
180 static void iterate_tree(MMDBW_tree_s *tree,
181                          MMDBW_record_s *record,
182                          uint128_t network,
183                          const uint8_t depth,
184                          bool depth_first,
185                          void *args,
186                          MMDBW_iterator_callback callback);
187 static SV *key_for_data(SV *data);
188 static const char *merge_cache_lookup(MMDBW_tree_s *tree,
189                                       char *merge_cache_key);
190 static void store_in_merge_cache(MMDBW_tree_s *tree,
191                                  char *merge_cache_key,
192                                  const char *const new_key);
193 static void *checked_malloc(size_t size);
194 static void
195 checked_fwrite(FILE *file, char *filename, void *buffer, size_t count);
196 static void check_perlio_result(SSize_t result, SSize_t expected, char *op);
197 static char *status_error_message(MMDBW_status status);
198 static const char *record_type_name(MMDBW_record_type type);
199 
200 // Create a new tree.
201 //
202 // For a description of `alias_ipv6' and `remove_reserved_networks', refer to
203 // the MaxMind::DB::Writer::Tree documentation about these options.
new_tree(const uint8_t ip_version,uint8_t record_size,MMDBW_merge_strategy merge_strategy,const bool alias_ipv6,const bool remove_reserved_networks)204 MMDBW_tree_s *new_tree(const uint8_t ip_version,
205                        uint8_t record_size,
206                        MMDBW_merge_strategy merge_strategy,
207                        const bool alias_ipv6,
208                        const bool remove_reserved_networks) {
209     if (merge_strategy == MMDBW_MERGE_STRATEGY_UNKNOWN) {
210         croak("Unknown merge_strategy encountered");
211     }
212     if (ip_version != 4 && ip_version != 6) {
213         croak("Unexpected IP version of %u", ip_version);
214     }
215     if (record_size != 24 && record_size != 28 && record_size != 32) {
216         croak("Only record sizes of 24, 28, and 32 are supported. Received %u.",
217               record_size);
218     }
219 
220     MMDBW_tree_s *tree = checked_malloc(sizeof(MMDBW_tree_s));
221     tree->ip_version = ip_version;
222 
223     tree->record_size = record_size;
224     tree->merge_strategy = merge_strategy;
225     tree->merge_cache = NULL;
226     tree->data_table = NULL;
227     tree->root_record = (MMDBW_record_s){
228         .type = MMDBW_RECORD_TYPE_EMPTY,
229     };
230     tree->node_count = 0;
231 
232     if (alias_ipv6) {
233         alias_ipv4_networks(tree);
234     }
235 
236     if (remove_reserved_networks) {
237         MMDBW_status const status =
238             insert_reserved_networks_as_fixed_empty(tree);
239         if (status != MMDBW_SUCCESS) {
240             free_tree(tree);
241             croak("Error inserting reserved networks: %s",
242                   status_error_message(status));
243         }
244     }
245 
246     return tree;
247 }
248 
insert_network(MMDBW_tree_s * tree,const char * ipstr,const uint8_t prefix_length,SV * key_sv,SV * data,MMDBW_merge_strategy merge_strategy)249 void insert_network(MMDBW_tree_s *tree,
250                     const char *ipstr,
251                     const uint8_t prefix_length,
252                     SV *key_sv,
253                     SV *data,
254                     MMDBW_merge_strategy merge_strategy) {
255     verify_ip(tree, ipstr);
256 
257     MMDBW_network_s network = resolve_network(tree, ipstr, prefix_length);
258 
259     const char *const key =
260         store_data_in_tree(tree, SvPVbyte_nolen(key_sv), data);
261     MMDBW_record_s new_record = {.type = MMDBW_RECORD_TYPE_DATA,
262                                  .value = {.key = key}};
263 
264     MMDBW_status status = insert_record_for_network(
265         tree, &network, &new_record, merge_strategy, false);
266 
267     // The data's ref count gets incremented by the insert each time it is
268     // inserted. As such, we need to decrement it here.
269     decrement_data_reference_count(tree, key);
270     free_network(&network);
271 
272     if (MMDBW_SUCCESS != status) {
273         croak("%s (when inserting %s/%" PRIu8 ")",
274               status_error_message(status),
275               ipstr,
276               prefix_length);
277     }
278 }
279 
verify_ip(MMDBW_tree_s * tree,const char * ipstr)280 static void verify_ip(MMDBW_tree_s *tree, const char *ipstr) {
281     if (tree->ip_version == 4 && strchr(ipstr, ':')) {
282         croak("You cannot insert an IPv6 address (%s) into an IPv4 tree.",
283               ipstr);
284     }
285 }
286 
insert_range(MMDBW_tree_s * tree,const char * start_ipstr,const char * end_ipstr,SV * key_sv,SV * data_sv,MMDBW_merge_strategy merge_strategy)287 void insert_range(MMDBW_tree_s *tree,
288                   const char *start_ipstr,
289                   const char *end_ipstr,
290                   SV *key_sv,
291                   SV *data_sv,
292                   MMDBW_merge_strategy merge_strategy) {
293     verify_ip(tree, start_ipstr);
294     verify_ip(tree, end_ipstr);
295 
296     uint128_t start_ip = ip_string_to_integer(start_ipstr, tree->ip_version);
297     uint128_t end_ip = ip_string_to_integer(end_ipstr, tree->ip_version);
298 
299     if (end_ip < start_ip) {
300         croak("First IP (%s) in range comes before last IP (%s)",
301               start_ipstr,
302               end_ipstr);
303     }
304 
305     const char *const key =
306         store_data_in_tree(tree, SvPVbyte_nolen(key_sv), data_sv);
307 
308     uint8_t bytes[tree->ip_version == 6 ? 16 : 4];
309 
310     MMDBW_status status = MMDBW_SUCCESS;
311 
312     // Eventually we could change the code to walk the tree and break up the
313     // range at the same time, saving some unnecessary computation. However,
314     // that would require more significant refactoring of the insertion and
315     // merging code.
316     while (start_ip <= end_ip) {
317         uint128_t reverse_mask;
318         int prefix_length = prefix_length_for_largest_subnet(
319             start_ip, end_ip, tree->ip_version, &reverse_mask);
320 
321         integer_to_ip_bytes(tree->ip_version, start_ip, bytes);
322 
323         MMDBW_network_s network = {
324             .bytes = bytes,
325             .prefix_length = prefix_length,
326         };
327 
328         MMDBW_record_s new_record = {.type = MMDBW_RECORD_TYPE_DATA,
329                                      .value = {.key = key}};
330 
331         status = insert_record_for_network(
332             tree, &network, &new_record, merge_strategy, false);
333 
334         if (MMDBW_SUCCESS != status) {
335             break;
336         }
337 
338         start_ip = (start_ip | reverse_mask) + 1;
339 
340         // The +1 caused an overflow and we are done.
341         if (start_ip == 0) {
342             break;
343         }
344     }
345     // store_data_in_tree starts at a reference count of 1, so we need to
346     // decrement in order to account for that.
347     decrement_data_reference_count(tree, key);
348 
349     if (MMDBW_SUCCESS != status) {
350         croak("%s (when inserting %s - %s)",
351               status_error_message(status),
352               start_ipstr,
353               end_ipstr);
354     }
355 }
356 
ip_string_to_integer(const char * ipstr,int family)357 static int128_t ip_string_to_integer(const char *ipstr, int family) {
358     uint8_t bytes[family == 6 ? 16 : 4];
359     if (resolve_ip(family, ipstr, bytes) != MMDBW_SUCCESS) {
360         croak("Invalid IP address: %s", ipstr);
361     }
362     return ip_bytes_to_integer(bytes, family);
363 }
364 
ip_bytes_to_integer(uint8_t * bytes,int family)365 static int128_t ip_bytes_to_integer(uint8_t *bytes, int family) {
366     int length = family == 6 ? 16 : 4;
367 
368     int128_t ipint = 0;
369     for (int i = 0; i < length; i++) {
370         ipint = (ipint << 8) | bytes[i];
371     }
372     return ipint;
373 }
374 
375 static void
integer_to_ip_bytes(int tree_ip_version,uint128_t ip,uint8_t * bytes)376 integer_to_ip_bytes(int tree_ip_version, uint128_t ip, uint8_t *bytes) {
377     int bytes_length = tree_ip_version == 6 ? 16 : 4;
378     for (int i = 1; i <= bytes_length; i++) {
379         bytes[bytes_length - i] = 0xFF & ip;
380         ip >>= 8;
381     }
382 }
383 
integer_to_ip_string(int tree_ip_version,uint128_t ip,char * dst,int dst_length)384 static void integer_to_ip_string(int tree_ip_version,
385                                  uint128_t ip,
386                                  char *dst,
387                                  int dst_length) {
388     uint8_t bytes[tree_ip_version == 6 ? 16 : 4];
389     integer_to_ip_bytes(tree_ip_version, ip, bytes);
390 
391     if (NULL == inet_ntop(tree_ip_version == 6 ? AF_INET6 : AF_INET,
392                           bytes,
393                           dst,
394                           dst_length)) {
395         croak("Error converting IP integer to string");
396     }
397 }
398 
prefix_length_for_largest_subnet(uint128_t start_ip,uint128_t end_ip,int family,uint128_t * reverse_mask)399 static int prefix_length_for_largest_subnet(uint128_t start_ip,
400                                             uint128_t end_ip,
401                                             int family,
402                                             uint128_t *reverse_mask) {
403 
404     if (start_ip > end_ip) {
405         croak("Start IP of the range must be less than or equal to end IP");
406     }
407 
408     int prefix_length = family == 6 ? 128 : 32;
409     *reverse_mask = 1;
410 
411     while (
412         // First IP of the subnet must be the start IP
413         (start_ip & ~*reverse_mask) == start_ip
414         // the last IP of the subnet must be <= the end IP
415         && (start_ip | *reverse_mask) <= end_ip
416         // stop if we have all IPs (shouldn't be required, but safety measure)
417         && prefix_length > 0) {
418         prefix_length--;
419         *reverse_mask = (*reverse_mask << 1) | 1;
420     }
421 
422     // We overshoot by one shift
423     *reverse_mask >>= 1;
424 
425     return prefix_length;
426 }
427 
remove_network(MMDBW_tree_s * tree,const char * ipstr,const uint8_t prefix_length)428 void remove_network(MMDBW_tree_s *tree,
429                     const char *ipstr,
430                     const uint8_t prefix_length) {
431     verify_ip(tree, ipstr);
432 
433     MMDBW_network_s network = resolve_network(tree, ipstr, prefix_length);
434 
435     MMDBW_record_s new_record = {.type = MMDBW_RECORD_TYPE_EMPTY};
436 
437     MMDBW_status status = insert_record_for_network(
438         tree, &network, &new_record, MMDBW_MERGE_STRATEGY_NONE, false);
439 
440     free_network(&network);
441     if (status != MMDBW_SUCCESS) {
442         croak(status_error_message(status));
443     }
444 }
445 
446 static const char *
store_data_in_tree(MMDBW_tree_s * tree,const char * const key,SV * data_sv)447 store_data_in_tree(MMDBW_tree_s *tree, const char *const key, SV *data_sv) {
448     const char *const new_key = increment_data_reference_count(tree, key);
449     set_stored_data_in_tree(tree, key, data_sv);
450 
451     return new_key;
452 }
453 
increment_data_reference_count(MMDBW_tree_s * tree,const char * const key)454 static const char *increment_data_reference_count(MMDBW_tree_s *tree,
455                                                   const char *const key) {
456     MMDBW_data_hash_s *data = NULL;
457     HASH_FIND(hh, tree->data_table, key, SHA1_KEY_LENGTH, data);
458 
459     /* We allow this possibility as we need to create the record separately
460        from updating the data when thawing */
461     if (NULL == data) {
462         data = checked_malloc(sizeof(MMDBW_data_hash_s));
463         data->reference_count = 0;
464 
465         data->data_sv = NULL;
466 
467         data->key = checked_malloc(SHA1_KEY_LENGTH + 1);
468         strcpy((char *)data->key, key);
469 
470         HASH_ADD_KEYPTR(hh, tree->data_table, data->key, SHA1_KEY_LENGTH, data);
471     }
472     data->reference_count++;
473 
474     return data->key;
475 }
476 
set_stored_data_in_tree(MMDBW_tree_s * tree,const char * const key,SV * data_sv)477 static void set_stored_data_in_tree(MMDBW_tree_s *tree,
478                                     const char *const key,
479                                     SV *data_sv) {
480     MMDBW_data_hash_s *data = NULL;
481     HASH_FIND(hh, tree->data_table, key, SHA1_KEY_LENGTH, data);
482 
483     if (NULL == data) {
484         croak("Attempt to set unknown data record in tree");
485     }
486 
487     if (NULL != data->data_sv) {
488         return;
489     }
490 
491     SvREFCNT_inc_simple_void_NN(data_sv);
492     data->data_sv = data_sv;
493 }
494 
decrement_data_reference_count(MMDBW_tree_s * tree,const char * const key)495 static void decrement_data_reference_count(MMDBW_tree_s *tree,
496                                            const char *const key) {
497     MMDBW_data_hash_s *data = NULL;
498     HASH_FIND(hh, tree->data_table, key, SHA1_KEY_LENGTH, data);
499 
500     if (NULL == data) {
501         croak("Attempt to remove data that does not exist from tree");
502     }
503 
504     data->reference_count--;
505     if (0 == data->reference_count) {
506         HASH_DEL(tree->data_table, data);
507         SvREFCNT_dec(data->data_sv);
508         free((char *)data->key);
509         free(data);
510     }
511 }
512 
resolve_network(MMDBW_tree_s * tree,const char * const ipstr,uint8_t prefix_length)513 static MMDBW_network_s resolve_network(MMDBW_tree_s *tree,
514                                        const char *const ipstr,
515                                        uint8_t prefix_length) {
516     uint8_t *bytes = checked_malloc(tree->ip_version == 6 ? 16 : 4);
517 
518     if (resolve_ip(tree->ip_version, ipstr, bytes) != MMDBW_SUCCESS) {
519         free(bytes);
520         croak("Invalid IP address: %s", ipstr);
521     }
522 
523     if (NULL == strchr(ipstr, ':')) {
524         // IPv4
525         if (prefix_length > 32) {
526             free(bytes);
527             croak("Prefix length greater than 32 on an IPv4 network (%s/%d)",
528                   ipstr,
529                   prefix_length);
530         }
531         if (tree->ip_version == 6) {
532             // Inserting IPv4 network into an IPv6 tree
533             prefix_length += 96;
534         }
535     } else if (prefix_length > 128) {
536         free(bytes);
537         croak("Prefix length greater than 128 on an IPv6 network (%s/%d)",
538               ipstr,
539               prefix_length);
540     }
541 
542     MMDBW_network_s network = {
543         .bytes = bytes,
544         .prefix_length = prefix_length,
545     };
546 
547     return network;
548 }
549 
550 static MMDBW_status
resolve_ip(int tree_ip_version,const char * const ipstr,uint8_t * bytes)551 resolve_ip(int tree_ip_version, const char *const ipstr, uint8_t *bytes) {
552     bool is_ipv4_address = NULL == strchr(ipstr, ':');
553     int family = is_ipv4_address ? AF_INET : AF_INET6;
554     if (tree_ip_version == 6 && is_ipv4_address) {
555         // We are inserting/looking up an IPv4 address in an IPv6 tree.
556         // The canonical location for this in our database is ::a.b.c.d.
557         // To get this address, we zero out the first 12 bytes of bytes
558         // and then put the IPv4 address in the remaining 4. The reason to
559         // not use getaddrinfo with AI_V4MAPPED is that it gives us
560         // ::FFFF:a.b.c.d and AI_V4MAPPED doesn't work on all platforms.
561         // See GitHub #7 and #51.
562         memset(bytes, 0, 12);
563         bytes += 12;
564     }
565     if (!inet_pton(family, ipstr, bytes)) {
566         return MMDBW_RESOLVING_IP_ERROR;
567     }
568     return MMDBW_SUCCESS;
569 }
570 
free_network(MMDBW_network_s * network)571 static void free_network(MMDBW_network_s *network) {
572     free((char *)network->bytes);
573 }
574 
575 static struct network ipv4_aliases[] = {
576     {
577         .ipstr = "::ffff:0:0",
578         .prefix_length = 96,
579     },
580     {.ipstr = "2001::", .prefix_length = 32},
581     {.ipstr = "2002::", .prefix_length = 16}};
582 
alias_ipv4_networks(MMDBW_tree_s * tree)583 static void alias_ipv4_networks(MMDBW_tree_s *tree) {
584     if (tree->ip_version == 4) {
585         return;
586     }
587 
588     MMDBW_network_s ipv4_root_network = resolve_network(tree, "::0.0.0.0", 96);
589     MMDBW_node_s *ipv4_root_node = new_node();
590     MMDBW_record_s ipv4_root_record = {
591         .type = MMDBW_RECORD_TYPE_FIXED_NODE,
592         .value.node = ipv4_root_node,
593     };
594 
595     MMDBW_status status = insert_record_for_network(tree,
596                                                     &ipv4_root_network,
597                                                     &ipv4_root_record,
598                                                     MMDBW_MERGE_STRATEGY_NONE,
599                                                     true);
600 
601     free_network(&ipv4_root_network);
602 
603     if (status != MMDBW_SUCCESS) {
604         croak("Unable to create IPv4 root node when setting up aliases: %s",
605               status_error_message(status));
606     }
607 
608     for (size_t i = 0; i < sizeof(ipv4_aliases) / sizeof(struct network); i++) {
609         MMDBW_network_s alias_network = resolve_network(
610             tree, ipv4_aliases[i].ipstr, ipv4_aliases[i].prefix_length);
611 
612         MMDBW_record_s record_for_alias = {
613             .type = MMDBW_RECORD_TYPE_ALIAS,
614             .value.node = ipv4_root_node,
615         };
616 
617         MMDBW_status status =
618             insert_record_for_network(tree,
619                                       &alias_network,
620                                       &record_for_alias,
621                                       MMDBW_MERGE_STRATEGY_NONE,
622                                       true);
623 
624         free_network(&alias_network);
625 
626         if (MMDBW_SUCCESS != status) {
627             croak("Unexpected error when searching for last node for alias: %s",
628                   status_error_message(status));
629         }
630     }
631 }
632 
633 // https://www.iana.org/assignments/iana-ipv4-special-registry/iana-ipv4-special-registry.xhtml
634 static struct network reserved_networks_ipv4[] = {
635     {.ipstr = "0.0.0.0", .prefix_length = 8},
636     {.ipstr = "10.0.0.0", .prefix_length = 8},
637     {.ipstr = "100.64.0.0", .prefix_length = 10},
638     {.ipstr = "127.0.0.0", .prefix_length = 8},
639     {.ipstr = "169.254.0.0", .prefix_length = 16},
640     {.ipstr = "172.16.0.0", .prefix_length = 12},
641     // This is an odd case. 192.0.0.0/24 is reserved, but there is a note that
642     // says "Not useable unless by virtue of a more specific reservation". As
643     // such, since 192.0.0.0/29 was more recently reserved, it's possible the
644     // intention is that the rest is not reserved any longer. I'm not too clear
645     // on this, but I believe that is the rationale, so I choose to leave it.
646     {.ipstr = "192.0.0.0", .prefix_length = 29},
647     // TODO(wstorey@maxmind.com): 192.168.0.8/32
648     // TODO(wstorey@maxmind.com): 192.168.0.9/32
649     // TODO(wstorey@maxmind.com): 192.168.0.10/32
650     // TODO(wstorey@maxmind.com): 192.168.0.170/32
651     // TODO(wstorey@maxmind.com): 192.168.0.171/32
652     {.ipstr = "192.0.2.0", .prefix_length = 24},
653     // 192.31.196.0/24 is routable I believe
654     // TODO(wstorey@maxmnd.com): 192.52.193.0/24
655     // TODO(wstorey@maxmind.com): Looks like 192.88.99.0/24 may no longer be
656     // reserved?
657     {.ipstr = "192.88.99.0", .prefix_length = 24},
658     {.ipstr = "192.168.0.0", .prefix_length = 16},
659     // 192.175.48.0/24 is routable I believe
660     {.ipstr = "198.18.0.0", .prefix_length = 15},
661     {.ipstr = "198.51.100.0", .prefix_length = 24},
662     {.ipstr = "203.0.113.0", .prefix_length = 24},
663     // The above IANA page doesn't list 224.0.0.0/4, but at least some parts
664     // are listed in https://tools.ietf.org/html/rfc5771
665     {.ipstr = "224.0.0.0", .prefix_length = 4},
666     {.ipstr = "240.0.0.0", .prefix_length = 4},
667     // 255.255.255.255/32 gets brought in by 240.0.0.0/4.
668 };
669 
670 // https://www.iana.org/assignments/iana-ipv6-special-registry/iana-ipv6-special-registry.xhtml
671 static struct network reserved_networks_ipv6[] = {
672     // ::/128 and ::1/128 are reserved under IPv6 but these are already
673     // covered under 0.0.0.0/8.
674     //
675     // ::ffff:0:0/96 - IPv4 mapped addresses. We treat it specially with the
676     // `alias_ipv6_to_ipv4' option.
677     //
678     // 64:ff9b::/96 - well known prefix mapping, covered by alias_ipv6_to_ipv4
679     //
680     // TODO(wstorey@maxmind.com): 64:ff9b:1::/48 should be in
681     // alias_ipv6_to_ipv4?
682 
683     {.ipstr = "100::", .prefix_length = 64},
684 
685     // 2001::/23 is reserved. We include all of it here other than 2001::/32
686     // as it is Teredo which is globally routable.
687     {.ipstr = "2001:1::", .prefix_length = 32},
688     {.ipstr = "2001:2::", .prefix_length = 31},
689     {.ipstr = "2001:4::", .prefix_length = 30},
690     {.ipstr = "2001:8::", .prefix_length = 29},
691     {.ipstr = "2001:10::", .prefix_length = 28},
692     {.ipstr = "2001:20::", .prefix_length = 27},
693     {.ipstr = "2001:40::", .prefix_length = 26},
694     {.ipstr = "2001:80::", .prefix_length = 25},
695     {.ipstr = "2001:100::", .prefix_length = 24},
696 
697     {.ipstr = "2001:db8::", .prefix_length = 32},
698     // 2002::/16 - 6to4, part of alias_ipv6_to_ipv4
699     // 2620:4f:8000::/48 is routable I believe
700     {.ipstr = "fc00::", .prefix_length = 7},
701     {.ipstr = "fe80::", .prefix_length = 10},
702     // Multicast
703     {.ipstr = "ff00::", .prefix_length = 8},
704 };
705 
706 // Insert a FIXED_EMPTY record for all private and reserved networks.
707 //
708 // This is to avoid the case where we might otherwise accidentally add
709 // information about such networks.
710 static MMDBW_status
insert_reserved_networks_as_fixed_empty(MMDBW_tree_s * tree)711 insert_reserved_networks_as_fixed_empty(MMDBW_tree_s *tree) {
712     MMDBW_status const status = insert_networks_as_fixed_empty(
713         tree,
714         reserved_networks_ipv4,
715         sizeof(reserved_networks_ipv4) / sizeof(struct network));
716     if (status != MMDBW_SUCCESS) {
717         return status;
718     }
719 
720     if (tree->ip_version == 6) {
721         MMDBW_status const status = insert_networks_as_fixed_empty(
722             tree,
723             reserved_networks_ipv6,
724             sizeof(reserved_networks_ipv6) / sizeof(struct network));
725         if (status != MMDBW_SUCCESS) {
726             return status;
727         }
728     }
729 
730     return MMDBW_SUCCESS;
731 }
732 
733 // Insert a FIXED_EMPTY record for each network.
734 static MMDBW_status
insert_networks_as_fixed_empty(MMDBW_tree_s * tree,struct network const * const networks,const size_t num_networks)735 insert_networks_as_fixed_empty(MMDBW_tree_s *tree,
736                                struct network const *const networks,
737                                const size_t num_networks) {
738     for (size_t i = 0; i < num_networks; i++) {
739         MMDBW_network_s resolved_network =
740             resolve_network(tree, networks[i].ipstr, networks[i].prefix_length);
741 
742         MMDBW_record_s record = {
743             .type = MMDBW_RECORD_TYPE_FIXED_EMPTY,
744         };
745 
746         MMDBW_status const status = insert_record_for_network(
747             tree, &resolved_network, &record, MMDBW_MERGE_STRATEGY_NONE, true);
748 
749         free_network(&resolved_network);
750 
751         if (status != MMDBW_SUCCESS) {
752             return status;
753         }
754     }
755 
756     return MMDBW_SUCCESS;
757 }
758 
759 static MMDBW_status
insert_record_for_network(MMDBW_tree_s * tree,MMDBW_network_s * network,MMDBW_record_s * new_record,MMDBW_merge_strategy merge_strategy,bool is_internal_insert)760 insert_record_for_network(MMDBW_tree_s *tree,
761                           MMDBW_network_s *network,
762                           MMDBW_record_s *new_record,
763                           MMDBW_merge_strategy merge_strategy,
764                           bool is_internal_insert) {
765     if (merge_strategy == MMDBW_MERGE_STRATEGY_UNKNOWN) {
766         merge_strategy = tree->merge_strategy;
767     }
768 
769     return insert_record_into_next_node(tree,
770                                         &(tree->root_record),
771                                         network,
772                                         0,
773                                         new_record,
774                                         merge_strategy,
775                                         is_internal_insert);
776 }
777 
778 static MMDBW_status
insert_record_into_next_node(MMDBW_tree_s * tree,MMDBW_record_s * current_record,MMDBW_network_s * network,int current_bit,MMDBW_record_s * new_record,MMDBW_merge_strategy merge_strategy,bool is_internal_insert)779 insert_record_into_next_node(MMDBW_tree_s *tree,
780                              MMDBW_record_s *current_record,
781                              MMDBW_network_s *network,
782                              int current_bit,
783                              MMDBW_record_s *new_record,
784                              MMDBW_merge_strategy merge_strategy,
785                              bool is_internal_insert) {
786     // We've reached the record where the network belongs. Depending on the
787     // type of record it is, we insert right here.
788     if (current_bit >= network->prefix_length &&
789         (current_record->type == MMDBW_RECORD_TYPE_EMPTY ||
790          current_record->type == MMDBW_RECORD_TYPE_DATA ||
791          (current_record->type == MMDBW_RECORD_TYPE_NODE &&
792           merge_strategy == MMDBW_MERGE_STRATEGY_NONE))) {
793         return insert_record_into_current_record(
794             tree, current_record, network, new_record, merge_strategy);
795     }
796 
797     if (current_bit == network->prefix_length &&
798         current_record->type == MMDBW_RECORD_TYPE_FIXED_NODE &&
799         new_record->type == MMDBW_RECORD_TYPE_FIXED_NODE) {
800         // We could potentially make this work, but it's tricky. One of the
801         // purposes of fixed nodes is for alias nodes to point at them.
802         // Returning success here without doing anything is not what we want to
803         // do: We should be setting the node value to what is in the new record
804         // since it may be setting up alias nodes pointing at it. Changing the
805         // current node value is not an option either: Aliases may already be
806         // pointing to it.
807         //
808         // If we don't have this case, we'll continue to descend the tree,
809         // inserting the new record's fixed node in potentially multiple
810         // locations. If that happens, we end up freeing the node multiple
811         // times when destroying the tree.
812         //
813         // Consider the case where we previously inserted fixed nodes to a
814         // greater depth than we are right now. Because we'd continue to
815         // descend the tree, we'd insert this new fixed node record at
816         // potentially multiple spots in the tree, at each point we encounter
817         // an EMPTY, DATA, or NODE record (the conditions where we can enter
818         // insert_record_into_current_record()). The stopping condition would
819         // not be correct.
820         return MMDBW_FIXED_NODE_OVERWRITE_ATTEMPT_ERROR;
821     }
822 
823     // Figure out the next node.
824     MMDBW_node_s *next_node = NULL;
825     switch (current_record->type) {
826         case MMDBW_RECORD_TYPE_EMPTY:
827         case MMDBW_RECORD_TYPE_DATA: {
828             // In this case we create a new node to point to. We make the new
829             // nodes left and right identical to us.
830             next_node = new_node_from_record(tree, current_record);
831             current_record->value.node = next_node;
832             current_record->type = MMDBW_RECORD_TYPE_NODE;
833             break;
834         }
835         case MMDBW_RECORD_TYPE_FIXED_EMPTY:
836             // We're a) trying to overwrite a fixed empty network, b) trying to
837             // insert a network containing a fixed empty network, or c) trying
838             // to insert inside a fixed empty network. (You can see these 3
839             // cases handled separately in the ALIAS case.) We ignore the insert
840             // with the assumption that it was not intended. e.g., you probably
841             // didn't mean to insert data about a reserved network. Doing so
842             // makes working with dirty data easier. Note this may change to be
843             // more strict and return an error in the future.
844             return MMDBW_SUCCESS;
845             break;
846         case MMDBW_RECORD_TYPE_ALIAS: {
847             // The insert is trying to overwrite an aliased network. We do not
848             // allow this.
849             if (current_bit == network->prefix_length) {
850                 return MMDBW_ALIAS_OVERWRITE_ATTEMPT_ERROR;
851             }
852 
853             // We don't follow aliases when inserting a network that contains
854             // an aliased network within it.
855             if (current_bit > network->prefix_length) {
856                 // We return success to silently ignore when we try to insert
857                 // here. If we raise an error, it makes working with dirty data
858                 // more difficult. We assume such inserts were not intended and
859                 // can be safely skipped.
860                 return MMDBW_SUCCESS;
861             }
862             // current_bit < network->prefix_length. This means we contain the
863             // new network already as ALIAS. Inserting the network is not valid
864             // because of that.
865             return MMDBW_INSERT_INTO_ALIAS_NODE_ERROR;
866             break;
867         }
868         case MMDBW_RECORD_TYPE_FIXED_NODE:
869         case MMDBW_RECORD_TYPE_NODE: {
870             // We're a node already.
871             next_node = current_record->value.node;
872             break;
873         }
874     }
875 
876     // If we are inserting an alias, a fixed node, or a fixed empty record, we
877     // make all of the nodes down to that record fixed nodes. This makes it
878     // easier to not accidentally delete or modify them.
879     if (new_record->type == MMDBW_RECORD_TYPE_ALIAS ||
880         new_record->type == MMDBW_RECORD_TYPE_FIXED_NODE ||
881         new_record->type == MMDBW_RECORD_TYPE_FIXED_EMPTY) {
882         current_record->type = MMDBW_RECORD_TYPE_FIXED_NODE;
883     }
884 
885     bool next_is_right = network_bit_value(network, current_bit);
886 
887     // if we are beyond the prefix length, both records are within the
888     // network we are inserting.
889     bool insert_into_both = current_bit >= network->prefix_length;
890 
891     if (next_is_right || insert_into_both) {
892         MMDBW_status status =
893             insert_record_into_next_node(tree,
894                                          &(next_node->right_record),
895                                          network,
896                                          current_bit + 1,
897                                          new_record,
898                                          merge_strategy,
899                                          is_internal_insert);
900         if (status != MMDBW_SUCCESS) {
901             return status;
902         }
903     }
904 
905     if (!next_is_right || insert_into_both) {
906         MMDBW_status status =
907             insert_record_into_next_node(tree,
908                                          &(next_node->left_record),
909                                          network,
910                                          current_bit + 1,
911                                          new_record,
912                                          merge_strategy,
913                                          is_internal_insert);
914         if (status != MMDBW_SUCCESS) {
915             return status;
916         }
917     }
918 
919     // We inserted the new record into the right and/or left record of the next
920     // node. We now need to trim the tree upwards by merging identical records.
921     // Basically what we do here is take care of the case where the record
922     // we're at points at another node, and the records in that node are both
923     // the same. In that case, we delete the node we point at and take its
924     // value on ourselves.
925 
926     if (next_node->left_record.type == next_node->right_record.type &&
927         // We don't allow merging into aliases or fixed nodes
928         current_record->type == MMDBW_RECORD_TYPE_NODE) {
929         switch (next_node->left_record.type) {
930             case MMDBW_RECORD_TYPE_EMPTY: {
931                 MMDBW_status status =
932                     free_node_and_subnodes(tree, next_node, false);
933                 if (status != MMDBW_SUCCESS) {
934                     return MMDBW_SUCCESS;
935                 }
936                 current_record->type = MMDBW_RECORD_TYPE_EMPTY;
937                 break;
938             }
939             case MMDBW_RECORD_TYPE_DATA: {
940                 // If the two keys are the same, the records can be merged.
941                 // Otherwise, break.
942                 if (strcmp(next_node->left_record.value.key,
943                            next_node->right_record.value.key)) {
944                     break;
945                 }
946                 const char *key = increment_data_reference_count(
947                     tree, next_node->left_record.value.key);
948                 MMDBW_status status =
949                     free_node_and_subnodes(tree, next_node, false);
950                 if (status != MMDBW_SUCCESS) {
951                     return MMDBW_SUCCESS;
952                 }
953                 current_record->type = MMDBW_RECORD_TYPE_DATA;
954                 current_record->value.key = key;
955                 break;
956             }
957             case MMDBW_RECORD_TYPE_ALIAS:
958             case MMDBW_RECORD_TYPE_FIXED_NODE:
959             case MMDBW_RECORD_TYPE_FIXED_EMPTY:
960             case MMDBW_RECORD_TYPE_NODE: {
961                 // Do nothing in these cases. We don't trim immutable nodes.
962                 break;
963             }
964         }
965     }
966     return MMDBW_SUCCESS;
967 }
968 
969 static MMDBW_status
insert_record_into_current_record(MMDBW_tree_s * tree,MMDBW_record_s * current_record,MMDBW_network_s * network,MMDBW_record_s * new_record,MMDBW_merge_strategy merge_strategy)970 insert_record_into_current_record(MMDBW_tree_s *tree,
971                                   MMDBW_record_s *current_record,
972                                   MMDBW_network_s *network,
973                                   MMDBW_record_s *new_record,
974                                   MMDBW_merge_strategy merge_strategy) {
975     // We only get called when we have a current_record with these record
976     // types. There was previously logic for other types, but that was
977     // confusing.
978     if (current_record->type != MMDBW_RECORD_TYPE_EMPTY &&
979         current_record->type != MMDBW_RECORD_TYPE_DATA &&
980         current_record->type != MMDBW_RECORD_TYPE_NODE) {
981         return MMDBW_INSERT_INVALID_RECORD_TYPE_ERROR;
982     }
983 
984     if (current_record->type == MMDBW_RECORD_TYPE_EMPTY &&
985         merge_strategy == MMDBW_MERGE_STRATEGY_ADD_ONLY_IF_PARENT_EXISTS) {
986         // We do not create a new record when using "only if parent exists"
987         return MMDBW_SUCCESS;
988     }
989 
990     const char *merged_key = maybe_merge_records(
991         tree, network, new_record, current_record, merge_strategy);
992 
993     MMDBW_status status = free_record_value(tree, current_record, false);
994     if (status != MMDBW_SUCCESS) {
995         return status;
996     }
997 
998     // Update the record to match the new one. Replace what's there.
999     current_record->type = new_record->type;
1000     if (new_record->type == MMDBW_RECORD_TYPE_DATA) {
1001         const char *const key = increment_data_reference_count(
1002             tree, merged_key == NULL ? new_record->value.key : merged_key);
1003         current_record->value.key = key;
1004     } else if (new_record->type == MMDBW_RECORD_TYPE_FIXED_NODE ||
1005                new_record->type == MMDBW_RECORD_TYPE_NODE ||
1006                new_record->type == MMDBW_RECORD_TYPE_ALIAS) {
1007         current_record->value.node = new_record->value.node;
1008     } else if (new_record->type == MMDBW_RECORD_TYPE_EMPTY ||
1009                new_record->type == MMDBW_RECORD_TYPE_FIXED_EMPTY) {
1010         current_record->value.key = NULL;
1011         current_record->value.node = NULL;
1012     } else {
1013         return MMDBW_INSERT_INVALID_RECORD_TYPE_ERROR;
1014     }
1015 
1016     if (merged_key) {
1017         decrement_data_reference_count(tree, merged_key);
1018     }
1019 
1020     return status;
1021 }
1022 
maybe_merge_records(MMDBW_tree_s * tree,MMDBW_network_s * network,MMDBW_record_s * new_record,MMDBW_record_s * record_to_set,MMDBW_merge_strategy merge_strategy)1023 static const char *maybe_merge_records(MMDBW_tree_s *tree,
1024                                        MMDBW_network_s *network,
1025                                        MMDBW_record_s *new_record,
1026                                        MMDBW_record_s *record_to_set,
1027                                        MMDBW_merge_strategy merge_strategy) {
1028     if (MMDBW_RECORD_TYPE_DATA != new_record->type ||
1029         merge_strategy == MMDBW_MERGE_STRATEGY_NONE) {
1030         return NULL;
1031     }
1032 
1033     /* This must come before the node pruning code in
1034        insert_record_for_network, as we only want to prune nodes where the
1035        merged record matches. */
1036     if (MMDBW_RECORD_TYPE_DATA != record_to_set->type
1037         // If the two keys are equal, there is no point in trying to merge
1038         // the contents.
1039         || strcmp(new_record->value.key, record_to_set->value.key) == 0) {
1040         return NULL;
1041     }
1042 
1043     char merge_cache_key[MERGE_KEY_SIZE + 1];
1044     snprintf(merge_cache_key,
1045              MERGE_KEY_SIZE + 1,
1046              "%d-%s-%s",
1047              merge_strategy,
1048              new_record->value.key,
1049              record_to_set->value.key);
1050 
1051     const char *cached_key = merge_cache_lookup(tree, merge_cache_key);
1052     if (cached_key != NULL) {
1053         const char *const new_key =
1054             increment_data_reference_count(tree, cached_key);
1055         return new_key;
1056     }
1057 
1058     SV *merged = merge_hashes_for_keys(tree,
1059                                        new_record->value.key,
1060                                        record_to_set->value.key,
1061                                        network,
1062                                        merge_strategy);
1063 
1064     SV *key_sv = key_for_data(merged);
1065     const char *const new_key =
1066         store_data_in_tree(tree, SvPVbyte_nolen(key_sv), merged);
1067     SvREFCNT_dec(key_sv);
1068 
1069     /* The ref count was incremented in store_data_in_tree */
1070     SvREFCNT_dec(merged);
1071 
1072     store_in_merge_cache(tree, merge_cache_key, new_key);
1073 
1074     return new_key;
1075 }
1076 
network_bit_value(MMDBW_network_s * network,uint8_t current_bit)1077 static int network_bit_value(MMDBW_network_s *network, uint8_t current_bit) {
1078     return network->bytes[current_bit >> 3] & (1U << (~current_bit & 7));
1079 }
1080 
tree_depth0(MMDBW_tree_s * tree)1081 static int tree_depth0(MMDBW_tree_s *tree) {
1082     return tree->ip_version == 6 ? 127 : 31;
1083 }
1084 
merge_hashes_for_keys(MMDBW_tree_s * tree,const char * const key_from,const char * const key_into,MMDBW_network_s * network,MMDBW_merge_strategy merge_strategy)1085 SV *merge_hashes_for_keys(MMDBW_tree_s *tree,
1086                           const char *const key_from,
1087                           const char *const key_into,
1088                           MMDBW_network_s *network,
1089                           MMDBW_merge_strategy merge_strategy) {
1090 
1091     SV *data_from = data_for_key(tree, key_from);
1092     SV *data_into = data_for_key(tree, key_into);
1093 
1094     if (!(SvROK(data_from) && SvROK(data_into) &&
1095           SvTYPE(SvRV(data_from)) == SVt_PVHV &&
1096           SvTYPE(SvRV(data_into)) == SVt_PVHV)) {
1097         /* We added key_into earlier during insert_record_for_network, so we
1098            have to make sure here that it's removed again after we decide to
1099            not actually store this network. It might be nicer to not insert
1100            anything into the tree until we're sure we really want to. */
1101         decrement_data_reference_count(tree, key_from);
1102 
1103         bool is_ipv6 = tree->ip_version == 6;
1104         char address_string[is_ipv6 ? INET6_ADDRSTRLEN : INET_ADDRSTRLEN];
1105         inet_ntop(is_ipv6 ? AF_INET6 : AF_INET,
1106                   network->bytes,
1107                   address_string,
1108                   sizeof(address_string));
1109 
1110         croak("Cannot merge data records unless both records are hashes - "
1111               "inserting %s/%" PRIu8,
1112               address_string,
1113               network->prefix_length);
1114     }
1115 
1116     return merge_hashes(tree, data_from, data_into, merge_strategy);
1117 }
1118 
merge_hashes(MMDBW_tree_s * tree,SV * from,SV * into,MMDBW_merge_strategy merge_strategy)1119 static SV *merge_hashes(MMDBW_tree_s *tree,
1120                         SV *from,
1121                         SV *into,
1122                         MMDBW_merge_strategy merge_strategy) {
1123     HV *hash_from = (HV *)SvRV(from);
1124     HV *hash_into = (HV *)SvRV(into);
1125     HV *hash_new = newHV();
1126 
1127     merge_new_from_hash_into_hash(tree, hash_into, hash_new, false);
1128     merge_new_from_hash_into_hash(tree, hash_from, hash_new, merge_strategy);
1129 
1130     return newRV_noinc((SV *)hash_new);
1131 }
1132 
1133 // Note: unlike the other merge functions, this does _not_ replace existing
1134 // values.
merge_new_from_hash_into_hash(MMDBW_tree_s * tree,HV * from,HV * to,MMDBW_merge_strategy merge_strategy)1135 static void merge_new_from_hash_into_hash(MMDBW_tree_s *tree,
1136                                           HV *from,
1137                                           HV *to,
1138                                           MMDBW_merge_strategy merge_strategy) {
1139     (void)hv_iterinit(from);
1140     HE *he;
1141     while (NULL != (he = hv_iternext(from))) {
1142         STRLEN key_length;
1143         const char *const key = HePV(he, key_length);
1144         U32 hash = 0;
1145         SV *value = HeVAL(he);
1146         if (hv_exists(to, key, key_length)) {
1147             if (merge_strategy == MMDBW_MERGE_STRATEGY_RECURSE ||
1148                 merge_strategy ==
1149                     MMDBW_MERGE_STRATEGY_ADD_ONLY_IF_PARENT_EXISTS) {
1150                 SV **existing_value = hv_fetch(to, key, key_length, 0);
1151                 if (existing_value == NULL) {
1152                     // This should never happen as we just did an hv_exists
1153                     croak("Received an unexpected NULL from hv_fetch");
1154                 }
1155 
1156                 value =
1157                     merge_values(tree, value, *existing_value, merge_strategy);
1158             } else {
1159                 // We are replacing the current value
1160                 hash = HeHASH(he);
1161                 SvREFCNT_inc_simple_void_NN(value);
1162             }
1163         } else if (merge_strategy ==
1164                        MMDBW_MERGE_STRATEGY_ADD_ONLY_IF_PARENT_EXISTS &&
1165                    SvROK(value)) {
1166             continue;
1167         } else {
1168             hash = HeHASH(he);
1169             SvREFCNT_inc_simple_void_NN(value);
1170         }
1171 
1172         (void)hv_store(to, key, key_length, value, hash);
1173     }
1174 
1175     return;
1176 }
1177 
merge_values(MMDBW_tree_s * tree,SV * from,SV * into,MMDBW_merge_strategy merge_strategy)1178 static SV *merge_values(MMDBW_tree_s *tree,
1179                         SV *from,
1180                         SV *into,
1181                         MMDBW_merge_strategy merge_strategy) {
1182     if (SvROK(from) != SvROK(into)) {
1183         croak("Attempt to merge a reference value and non-refrence value");
1184     }
1185 
1186     if (!SvROK(from)) {
1187         // If the two values are scalars, we prefer the one in the hash being
1188         // inserted.
1189         SvREFCNT_inc_simple_void_NN(from);
1190         return from;
1191     }
1192 
1193     if (SvTYPE(SvRV(from)) == SVt_PVHV && SvTYPE(SvRV(into)) == SVt_PVHV) {
1194         return merge_hashes(tree, from, into, merge_strategy);
1195     }
1196 
1197     if (SvTYPE(SvRV(from)) == SVt_PVAV && SvTYPE(SvRV(into)) == SVt_PVAV) {
1198         return merge_arrays(tree, from, into, merge_strategy);
1199     }
1200 
1201     croak("Only arrayrefs, hashrefs, and scalars can be merged.");
1202 }
1203 
merge_arrays(MMDBW_tree_s * tree,SV * from,SV * into,MMDBW_merge_strategy merge_strategy)1204 static SV *merge_arrays(MMDBW_tree_s *tree,
1205                         SV *from,
1206                         SV *into,
1207                         MMDBW_merge_strategy merge_strategy) {
1208     AV *from_array = (AV *)SvRV(from);
1209     AV *into_array = (AV *)SvRV(into);
1210 
1211     // Note that av_len() is really the index of the last element. In newer
1212     // Perl versions, it is also called av_top_index() or av_tindex()
1213     SSize_t from_top_index = av_len(from_array);
1214     SSize_t into_top_index = av_len(into_array);
1215 
1216     SSize_t new_top_index =
1217         from_top_index > into_top_index ? from_top_index : into_top_index;
1218 
1219     AV *new_array = newAV();
1220     for (SSize_t i = 0; i <= new_top_index; i++) {
1221         SV *new_value = NULL;
1222         SV **from_value = av_fetch(from_array, i, 0);
1223         SV **into_value = av_fetch(into_array, i, 0);
1224         if (from_value != NULL && into_value != NULL) {
1225             new_value =
1226                 merge_values(tree, *from_value, *into_value, merge_strategy);
1227         } else if (from_value != NULL) {
1228             if (merge_strategy ==
1229                     MMDBW_MERGE_STRATEGY_ADD_ONLY_IF_PARENT_EXISTS &&
1230                 SvROK(*from_value)) {
1231                 break;
1232             }
1233             new_value = *from_value;
1234             SvREFCNT_inc_simple_void_NN(new_value);
1235         } else if (into_value != NULL) {
1236             new_value = *into_value;
1237             SvREFCNT_inc_simple_void_NN(new_value);
1238         } else {
1239             croak("Received unexpected NULLs when merging arrays");
1240         }
1241 
1242         av_push(new_array, new_value);
1243     }
1244     return newRV_noinc((SV *)new_array);
1245 }
1246 
lookup_ip_address(MMDBW_tree_s * tree,const char * const ipstr)1247 SV *lookup_ip_address(MMDBW_tree_s *tree, const char *const ipstr) {
1248     bool is_ipv6_address = NULL != strchr(ipstr, ':');
1249     if (tree->ip_version == 4 && is_ipv6_address) {
1250         return &PL_sv_undef;
1251     }
1252     MMDBW_network_s network =
1253         resolve_network(tree, ipstr, is_ipv6_address ? 128 : 32);
1254 
1255     MMDBW_record_s *record_for_address;
1256     MMDBW_status status =
1257         find_record_for_network(tree, &network, &record_for_address);
1258 
1259     free_network(&network);
1260 
1261     if (MMDBW_SUCCESS != status) {
1262         croak("Received an unexpected NULL when looking up %s: %s",
1263               ipstr,
1264               status_error_message(status));
1265     }
1266 
1267     if (record_for_address->type == MMDBW_RECORD_TYPE_NODE ||
1268         record_for_address->type == MMDBW_RECORD_TYPE_FIXED_NODE ||
1269         record_for_address->type == MMDBW_RECORD_TYPE_ALIAS) {
1270         croak("WTF - found a node or alias record for an address lookup - "
1271               "%s" PRIu8,
1272               ipstr);
1273         return &PL_sv_undef;
1274     }
1275 
1276     if (record_for_address->type == MMDBW_RECORD_TYPE_EMPTY ||
1277         record_for_address->type == MMDBW_RECORD_TYPE_FIXED_EMPTY) {
1278         return &PL_sv_undef;
1279     }
1280 
1281     return newSVsv(data_for_key(tree, record_for_address->value.key));
1282 }
1283 
find_record_for_network(MMDBW_tree_s * tree,MMDBW_network_s * network,MMDBW_record_s ** record)1284 static MMDBW_status find_record_for_network(MMDBW_tree_s *tree,
1285                                             MMDBW_network_s *network,
1286                                             MMDBW_record_s **record) {
1287     *record = &(tree->root_record);
1288 
1289     for (int current_bit = 0; current_bit < network->prefix_length;
1290          current_bit++) {
1291 
1292         MMDBW_node_s *node;
1293         if ((*record)->type == MMDBW_RECORD_TYPE_NODE ||
1294             (*record)->type == MMDBW_RECORD_TYPE_FIXED_NODE ||
1295             (*record)->type == MMDBW_RECORD_TYPE_ALIAS) {
1296             node = (*record)->value.node;
1297         } else {
1298             break;
1299         }
1300 
1301         if (network_bit_value(network, current_bit)) {
1302             *record = &(node->right_record);
1303         } else {
1304             *record = &(node->left_record);
1305         }
1306     }
1307 
1308     return MMDBW_SUCCESS;
1309 }
1310 
new_node_from_record(MMDBW_tree_s * tree,MMDBW_record_s * record)1311 static MMDBW_node_s *new_node_from_record(MMDBW_tree_s *tree,
1312                                           MMDBW_record_s *record) {
1313     MMDBW_node_s *node = new_node();
1314     if (record->type == MMDBW_RECORD_TYPE_DATA) {
1315         /* We only need to increment the reference count once as we are
1316            replacing the parent record */
1317         increment_data_reference_count(tree, record->value.key);
1318 
1319         node->left_record.type = MMDBW_RECORD_TYPE_DATA;
1320         node->left_record.value.key = record->value.key;
1321 
1322         node->right_record.type = MMDBW_RECORD_TYPE_DATA;
1323         node->right_record.value.key = record->value.key;
1324     }
1325 
1326     return node;
1327 }
1328 
new_node()1329 MMDBW_node_s *new_node() {
1330     MMDBW_node_s *node = checked_malloc(sizeof(MMDBW_node_s));
1331 
1332     node->number = 0;
1333     node->left_record.type = node->right_record.type = MMDBW_RECORD_TYPE_EMPTY;
1334 
1335     return node;
1336 }
1337 
free_node_and_subnodes(MMDBW_tree_s * tree,MMDBW_node_s * node,bool remove_alias_and_fixed_nodes)1338 static MMDBW_status free_node_and_subnodes(MMDBW_tree_s *tree,
1339                                            MMDBW_node_s *node,
1340                                            bool remove_alias_and_fixed_nodes) {
1341     MMDBW_status status = free_record_value(
1342         tree, &(node->left_record), remove_alias_and_fixed_nodes);
1343     if (status != MMDBW_SUCCESS) {
1344         return status;
1345     }
1346 
1347     status = free_record_value(
1348         tree, &(node->right_record), remove_alias_and_fixed_nodes);
1349     if (status != MMDBW_SUCCESS) {
1350         return status;
1351     }
1352 
1353     free(node);
1354     return MMDBW_SUCCESS;
1355 }
1356 
free_record_value(MMDBW_tree_s * tree,MMDBW_record_s * record,bool remove_alias_and_fixed_nodes)1357 static MMDBW_status free_record_value(MMDBW_tree_s *tree,
1358                                       MMDBW_record_s *record,
1359                                       bool remove_alias_and_fixed_nodes) {
1360     if (record->type == MMDBW_RECORD_TYPE_FIXED_NODE &&
1361         !remove_alias_and_fixed_nodes) {
1362         return MMDBW_FREED_FIXED_NODE_ERROR;
1363     }
1364 
1365     if (record->type == MMDBW_RECORD_TYPE_FIXED_EMPTY &&
1366         !remove_alias_and_fixed_nodes) {
1367         return MMDBW_FREED_FIXED_EMPTY_ERROR;
1368     }
1369 
1370     if (record->type == MMDBW_RECORD_TYPE_NODE ||
1371         record->type == MMDBW_RECORD_TYPE_FIXED_NODE) {
1372         return free_node_and_subnodes(
1373             tree, record->value.node, remove_alias_and_fixed_nodes);
1374     }
1375 
1376     if (record->type == MMDBW_RECORD_TYPE_DATA) {
1377         decrement_data_reference_count(tree, record->value.key);
1378     }
1379 
1380     /* Alias nodes should only be removed explicitly. We can't just croak
1381        as it will leave the tree in an inconsistent state causing a segfault
1382        during unwinding. */
1383     if (record->type == MMDBW_RECORD_TYPE_ALIAS &&
1384         !remove_alias_and_fixed_nodes) {
1385         return MMDBW_FREED_ALIAS_NODE_ERROR;
1386     }
1387     return MMDBW_SUCCESS;
1388 }
1389 
assign_node_numbers(MMDBW_tree_s * tree)1390 void assign_node_numbers(MMDBW_tree_s *tree) {
1391     tree->node_count = 0;
1392     start_iteration(tree, false, (void *)NULL, &assign_node_number);
1393 }
1394 
assign_node_number(MMDBW_tree_s * tree,MMDBW_node_s * node,uint128_t UNUSED (network),uint8_t UNUSED (depth),void * UNUSED (args))1395 static void assign_node_number(MMDBW_tree_s *tree,
1396                                MMDBW_node_s *node,
1397                                uint128_t UNUSED(network),
1398                                uint8_t UNUSED(depth),
1399                                void *UNUSED(args)) {
1400     node->number = tree->node_count++;
1401     return;
1402 }
1403 
1404 /* 16 bytes for an IP address, 1 byte for the prefix length */
1405 #define FROZEN_RECORD_MAX_SIZE (16 + 1 + SHA1_KEY_LENGTH)
1406 #define FROZEN_NODE_MAX_SIZE (FROZEN_RECORD_MAX_SIZE * 2)
1407 
1408 /* 17 bytes of NULLs followed by something that cannot be an SHA1 key are a
1409    clear indicator that there are no more frozen networks in the buffer. */
1410 #define SEVENTEEN_NULLS "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
1411 #define FREEZE_SEPARATOR "not an SHA1 key"
1412 /* We subtract 1 as we treat this as a sequence of bytes rather than a null
1413    terminated string. */
1414 #define FREEZE_SEPARATOR_LENGTH (sizeof(FREEZE_SEPARATOR) - 1)
1415 
freeze_tree(MMDBW_tree_s * tree,char * filename,char * frozen_params,size_t frozen_params_size)1416 void freeze_tree(MMDBW_tree_s *tree,
1417                  char *filename,
1418                  char *frozen_params,
1419                  size_t frozen_params_size) {
1420     FILE *file = fopen(filename, "wb");
1421     if (!file) {
1422         croak("Could not open file %s: %s", filename, strerror(errno));
1423     }
1424 
1425     freeze_args_s args = {
1426         .file = file,
1427         .filename = filename,
1428     };
1429 
1430     freeze_to_file(&args, &frozen_params_size, 4);
1431     freeze_to_file(&args, frozen_params, frozen_params_size);
1432 
1433     freeze_search_tree(tree, &args);
1434 
1435     freeze_to_file(&args, SEVENTEEN_NULLS, 17);
1436     freeze_to_file(&args, FREEZE_SEPARATOR, FREEZE_SEPARATOR_LENGTH);
1437 
1438     freeze_data_to_file(&args, tree);
1439 
1440     if (fclose(file) != 0) {
1441         croak("Could not close file %s: %s", filename, strerror(errno));
1442     }
1443 
1444     /* When the hash is _freed_, Perl decrements the ref count for each value
1445      * so we don't need to mess with them. */
1446     SvREFCNT_dec((SV *)args.data_hash);
1447 }
1448 
freeze_search_tree(MMDBW_tree_s * tree,freeze_args_s * args)1449 static void freeze_search_tree(MMDBW_tree_s *tree, freeze_args_s *args) {
1450     if (tree->root_record.type == MMDBW_RECORD_TYPE_DATA) {
1451         croak("A tree that only contains a data record for /0 cannot be "
1452               "frozen");
1453     }
1454 
1455     if (tree->root_record.type == MMDBW_RECORD_TYPE_NODE ||
1456         tree->root_record.type == MMDBW_RECORD_TYPE_FIXED_NODE) {
1457         start_iteration(tree, false, (void *)args, &freeze_node);
1458         return;
1459     }
1460 
1461     croak("Unexected root record type when freezing tree: %s",
1462           record_type_name(tree->root_record.type));
1463 }
1464 
freeze_node(MMDBW_tree_s * tree,MMDBW_node_s * node,uint128_t network,uint8_t depth,void * void_args)1465 static void freeze_node(MMDBW_tree_s *tree,
1466                         MMDBW_node_s *node,
1467                         uint128_t network,
1468                         uint8_t depth,
1469                         void *void_args) {
1470     freeze_args_s *args = (freeze_args_s *)void_args;
1471 
1472     const uint8_t next_depth = depth + 1;
1473 
1474     if (node->left_record.type == MMDBW_RECORD_TYPE_DATA) {
1475         freeze_data_record(
1476             tree, network, next_depth, node->left_record.value.key, args);
1477     }
1478 
1479     if (node->right_record.type == MMDBW_RECORD_TYPE_DATA) {
1480         uint128_t right_network = flip_network_bit(tree, network, depth);
1481         freeze_data_record(tree,
1482                            right_network,
1483                            next_depth,
1484                            node->right_record.value.key,
1485                            args);
1486     }
1487 }
1488 
freeze_data_record(MMDBW_tree_s * UNUSED (tree),uint128_t network,uint8_t depth,const char * key,freeze_args_s * args)1489 static void freeze_data_record(MMDBW_tree_s *UNUSED(tree),
1490                                uint128_t network,
1491                                uint8_t depth,
1492                                const char *key,
1493                                freeze_args_s *args) {
1494     /* It'd save some space to shrink this to 4 bytes for IPv4-only trees, but
1495      * that would also complicated thawing quite a bit. */
1496     freeze_to_file(args, &network, 16);
1497     freeze_to_file(args, &(depth), 1);
1498     freeze_to_file(args, (char *)key, SHA1_KEY_LENGTH);
1499 }
1500 
freeze_to_file(freeze_args_s * args,void * data,size_t size)1501 static void freeze_to_file(freeze_args_s *args, void *data, size_t size) {
1502     checked_fwrite(args->file, args->filename, data, size);
1503 }
1504 
freeze_data_to_file(freeze_args_s * args,MMDBW_tree_s * tree)1505 static void freeze_data_to_file(freeze_args_s *args, MMDBW_tree_s *tree) {
1506     HV *data_hash = newHV();
1507 
1508     MMDBW_data_hash_s *item, *tmp;
1509     HASH_ITER(hh, tree->data_table, item, tmp) {
1510         SvREFCNT_inc_simple_void_NN(item->data_sv);
1511         (void)hv_store(data_hash, item->key, SHA1_KEY_LENGTH, item->data_sv, 0);
1512     }
1513 
1514     SV *frozen_data = freeze_hash(data_hash);
1515     STRLEN frozen_data_size;
1516     char *frozen_data_chars = SvPV(frozen_data, frozen_data_size);
1517 
1518     checked_fwrite(
1519         args->file, args->filename, &frozen_data_size, sizeof(STRLEN));
1520     checked_fwrite(
1521         args->file, args->filename, frozen_data_chars, frozen_data_size);
1522 
1523     SvREFCNT_dec(frozen_data);
1524     SvREFCNT_dec((SV *)data_hash);
1525 }
1526 
freeze_hash(HV * hash)1527 static SV *freeze_hash(HV *hash) {
1528     dSP;
1529     ENTER;
1530     SAVETMPS;
1531 
1532     SV *hashref = sv_2mortal(newRV_inc((SV *)hash));
1533 
1534     PUSHMARK(SP);
1535     EXTEND(SP, 1);
1536     PUSHs(hashref);
1537     PUTBACK;
1538 
1539     int count = call_pv("Sereal::Encoder::encode_sereal", G_SCALAR);
1540 
1541     SPAGAIN;
1542 
1543     if (count != 1) {
1544         croak("Expected 1 item back from Sereal::Encoder::encode_sereal call");
1545     }
1546 
1547     SV *frozen = POPs;
1548     if (!SvPOK(frozen)) {
1549         croak("The Sereal::Encoder::encode_sereal sub returned an SV which is "
1550               "not SvPOK!");
1551     }
1552 
1553     /* The SV will be mortal so it's about to lose a ref with the FREETMPS
1554        call below. */
1555     SvREFCNT_inc_simple_void_NN(frozen);
1556 
1557     PUTBACK;
1558     FREETMPS;
1559     LEAVE;
1560 
1561     return frozen;
1562 }
1563 
thaw_tree(char * filename,uint32_t initial_offset,uint8_t ip_version,uint8_t record_size,MMDBW_merge_strategy merge_strategy,const bool alias_ipv6,const bool remove_reserved_networks)1564 MMDBW_tree_s *thaw_tree(char *filename,
1565                         uint32_t initial_offset,
1566                         uint8_t ip_version,
1567                         uint8_t record_size,
1568                         MMDBW_merge_strategy merge_strategy,
1569                         const bool alias_ipv6,
1570                         const bool remove_reserved_networks) {
1571 #ifdef WIN32
1572     int fd = open(filename, O_RDONLY);
1573 #else
1574     int fd = open(filename, O_RDONLY, 0);
1575 #endif
1576     if (fd == -1) {
1577         croak("Could not open file %s: %s", filename, strerror(errno));
1578     }
1579 
1580     struct stat fileinfo;
1581     if (fstat(fd, &fileinfo) == -1) {
1582         close(fd);
1583         croak("Could not stat file: %s: %s", filename, strerror(errno));
1584     }
1585 
1586     uint8_t *buffer =
1587         (uint8_t *)mmap(NULL, fileinfo.st_size, PROT_READ, MAP_SHARED, fd, 0);
1588     close(fd);
1589 
1590     buffer += initial_offset;
1591 
1592     MMDBW_tree_s *tree = new_tree(ip_version,
1593                                   record_size,
1594                                   merge_strategy,
1595                                   alias_ipv6,
1596                                   remove_reserved_networks);
1597 
1598     thawed_network_s *thawed;
1599     while (NULL != (thawed = thaw_network(tree, &buffer))) {
1600         // We should never need to merge when thawing a tree.
1601         MMDBW_status status =
1602             insert_record_for_network(tree,
1603                                       thawed->network,
1604                                       thawed->record,
1605                                       MMDBW_MERGE_STRATEGY_NONE,
1606                                       true);
1607         free_network(thawed->network);
1608         free(thawed->network);
1609         if (thawed->record->type == MMDBW_RECORD_TYPE_DATA) {
1610             free((char *)thawed->record->value.key);
1611         }
1612         free(thawed->record);
1613         free(thawed);
1614         if (status != MMDBW_SUCCESS) {
1615             croak(status_error_message(status));
1616         }
1617     }
1618 
1619     STRLEN frozen_data_size = thaw_strlen(&buffer);
1620 
1621     /* per perlapi newSVpvn copies the string */
1622     SV *data_to_decode = sv_2mortal(newSVpvn((char *)buffer, frozen_data_size));
1623     HV *data_hash = thaw_data_hash(data_to_decode);
1624 
1625     hv_iterinit(data_hash);
1626     char *key;
1627     I32 keylen;
1628     SV *value;
1629     while (NULL != (value = hv_iternextsv(data_hash, &key, &keylen))) {
1630         set_stored_data_in_tree(tree, key, value);
1631     }
1632 
1633     SvREFCNT_dec((SV *)data_hash);
1634 
1635     return tree;
1636 }
1637 
thaw_uint8(uint8_t ** buffer)1638 static uint8_t thaw_uint8(uint8_t **buffer) {
1639     uint8_t value;
1640     memcpy(&value, *buffer, 1);
1641     *buffer += 1;
1642     return value;
1643 }
1644 
thaw_network(MMDBW_tree_s * tree,uint8_t ** buffer)1645 static thawed_network_s *thaw_network(MMDBW_tree_s *tree, uint8_t **buffer) {
1646     uint128_t start_ip = thaw_uint128(buffer);
1647     uint8_t prefix_length = thaw_uint8(buffer);
1648 
1649     if (start_ip == 0 && prefix_length == 0) {
1650         uint8_t *maybe_separator = thaw_bytes(buffer, FREEZE_SEPARATOR_LENGTH);
1651         if (memcmp(maybe_separator,
1652                    FREEZE_SEPARATOR,
1653                    FREEZE_SEPARATOR_LENGTH) == 0) {
1654 
1655             free(maybe_separator);
1656             return NULL;
1657         }
1658 
1659         croak("Found a ::0/0 network but that should never happen!");
1660     }
1661 
1662     uint8_t *start_ip_bytes = (uint8_t *)&start_ip;
1663     uint8_t temp;
1664     for (int i = 0; i < 8; i++) {
1665         temp = start_ip_bytes[i];
1666         start_ip_bytes[i] = start_ip_bytes[15 - i];
1667         start_ip_bytes[15 - i] = temp;
1668     }
1669 
1670     thawed_network_s *thawed = checked_malloc(sizeof(thawed_network_s));
1671 
1672     uint8_t *bytes;
1673     if (tree->ip_version == 4) {
1674         bytes = checked_malloc(4);
1675         memcpy(bytes, start_ip_bytes + 12, 4);
1676     } else {
1677         bytes = checked_malloc(16);
1678         memcpy(bytes, &start_ip, 16);
1679     }
1680 
1681     MMDBW_network_s network = {
1682         .bytes = bytes,
1683         .prefix_length = prefix_length,
1684     };
1685 
1686     thawed->network = checked_malloc(sizeof(MMDBW_network_s));
1687     memcpy(thawed->network, &network, sizeof(MMDBW_network_s));
1688 
1689     MMDBW_record_s *record = checked_malloc(sizeof(MMDBW_record_s));
1690     record->type = MMDBW_RECORD_TYPE_DATA;
1691 
1692     record->value.key = thaw_data_key(buffer);
1693 
1694     thawed->record = record;
1695 
1696     return thawed;
1697 }
1698 
thaw_bytes(uint8_t ** buffer,size_t size)1699 static uint8_t *thaw_bytes(uint8_t **buffer, size_t size) {
1700     uint8_t *value = checked_malloc(size);
1701     memcpy(value, *buffer, size);
1702     *buffer += size;
1703     return value;
1704 }
1705 
thaw_uint128(uint8_t ** buffer)1706 static uint128_t thaw_uint128(uint8_t **buffer) {
1707     uint128_t value;
1708     memcpy(&value, *buffer, 16);
1709     *buffer += 16;
1710     return value;
1711 }
1712 
thaw_strlen(uint8_t ** buffer)1713 static STRLEN thaw_strlen(uint8_t **buffer) {
1714     STRLEN value;
1715     memcpy(&value, *buffer, sizeof(STRLEN));
1716     *buffer += sizeof(STRLEN);
1717     return value;
1718 }
1719 
thaw_data_key(uint8_t ** buffer)1720 static const char *thaw_data_key(uint8_t **buffer) {
1721     char *value = checked_malloc(SHA1_KEY_LENGTH + 1);
1722     memcpy(value, *buffer, SHA1_KEY_LENGTH);
1723     *buffer += SHA1_KEY_LENGTH;
1724     value[SHA1_KEY_LENGTH] = '\0';
1725     return (const char *)value;
1726 }
1727 
thaw_data_hash(SV * data_to_decode)1728 static HV *thaw_data_hash(SV *data_to_decode) {
1729     dSP;
1730     ENTER;
1731     SAVETMPS;
1732 
1733     PUSHMARK(SP);
1734     EXTEND(SP, 1);
1735     PUSHs(data_to_decode);
1736     PUTBACK;
1737 
1738     int count = call_pv("Sereal::Decoder::decode_sereal", G_SCALAR);
1739 
1740     SPAGAIN;
1741 
1742     if (count != 1) {
1743         croak("Expected 1 item back from Sereal::Decoder::decode_sereal call");
1744     }
1745 
1746     SV *thawed = POPs;
1747     if (!SvROK(thawed)) {
1748         croak("The Sereal::Decoder::decode_sereal sub returned an SV which is "
1749               "not SvROK!");
1750     }
1751 
1752     SV *data_hash = SvREFCNT_inc_simple_NN(SvRV(thawed));
1753 
1754     PUTBACK;
1755     FREETMPS;
1756     LEAVE;
1757 
1758     return (HV *)data_hash;
1759 }
1760 
write_search_tree(MMDBW_tree_s * tree,SV * output,SV * root_data_type,SV * serializer)1761 void write_search_tree(MMDBW_tree_s *tree,
1762                        SV *output,
1763                        SV *root_data_type,
1764                        SV *serializer) {
1765     assign_node_numbers(tree);
1766 
1767     /* This is a gross way to get around the fact that with C function
1768      * pointers we can't easily pass different params to different
1769      * callbacks. */
1770     encode_args_s args = {.output_io = IoOFP(sv_2io(output)),
1771                           .root_data_type = root_data_type,
1772                           .serializer = serializer,
1773                           .data_pointer_cache = newHV()};
1774 
1775     start_iteration(tree, false, (void *)&args, &encode_node);
1776 
1777     /* When the hash is _freed_, Perl decrements the ref count for each value
1778      * so we don't need to mess with them. */
1779     SvREFCNT_dec((SV *)args.data_pointer_cache);
1780 
1781     return;
1782 }
1783 
encode_node(MMDBW_tree_s * tree,MMDBW_node_s * node,uint128_t UNUSED (network),uint8_t UNUSED (depth),void * void_args)1784 static void encode_node(MMDBW_tree_s *tree,
1785                         MMDBW_node_s *node,
1786                         uint128_t UNUSED(network),
1787                         uint8_t UNUSED(depth),
1788                         void *void_args) {
1789     encode_args_s *args = (encode_args_s *)void_args;
1790 
1791     check_record_sanity(node, &(node->left_record), "left");
1792     check_record_sanity(node, &(node->right_record), "right");
1793 
1794     uint32_t left =
1795         htonl(record_value_as_number(tree, &(node->left_record), args));
1796     uint32_t right =
1797         htonl(record_value_as_number(tree, &(node->right_record), args));
1798 
1799     uint8_t *left_bytes = (uint8_t *)&left;
1800     uint8_t *right_bytes = (uint8_t *)&right;
1801 
1802     if (tree->record_size == 24) {
1803         check_perlio_result(PerlIO_printf(args->output_io,
1804                                           "%c%c%c%c%c%c",
1805                                           left_bytes[1],
1806                                           left_bytes[2],
1807                                           left_bytes[3],
1808                                           right_bytes[1],
1809                                           right_bytes[2],
1810                                           right_bytes[3]),
1811                             6,
1812                             "PerlIO_printf");
1813     } else if (tree->record_size == 28) {
1814         check_perlio_result(
1815             PerlIO_printf(args->output_io,
1816                           "%c%c%c%c%c%c%c",
1817                           left_bytes[1],
1818                           left_bytes[2],
1819                           left_bytes[3],
1820                           (left_bytes[0] << 4) | (right_bytes[0] & 15),
1821                           right_bytes[1],
1822                           right_bytes[2],
1823                           right_bytes[3]),
1824             7,
1825             "PerlIO_printf");
1826     } else {
1827         check_perlio_result(PerlIO_printf(args->output_io,
1828                                           "%c%c%c%c%c%c%c%c",
1829                                           left_bytes[0],
1830                                           left_bytes[1],
1831                                           left_bytes[2],
1832                                           left_bytes[3],
1833                                           right_bytes[0],
1834                                           right_bytes[1],
1835                                           right_bytes[2],
1836                                           right_bytes[3]),
1837                             8,
1838                             "PerlIO_printf");
1839     }
1840 }
1841 
1842 /* Note that for data records, we will ensure that the key they contain does
1843  * match a data record in the record_value_as_number() subroutine. */
1844 static void
check_record_sanity(MMDBW_node_s * node,MMDBW_record_s * record,char * side)1845 check_record_sanity(MMDBW_node_s *node, MMDBW_record_s *record, char *side) {
1846     if (record->type == MMDBW_RECORD_TYPE_NODE ||
1847         record->type == MMDBW_RECORD_TYPE_FIXED_NODE) {
1848         if (record->value.node->number == node->number) {
1849             croak("%s record of node %" PRIu32 " points to the same node",
1850                   side,
1851                   node->number);
1852         }
1853 
1854         if (record->value.node->number < node->number) {
1855             croak("%s record of node %" PRIu32
1856                   " points to a node number (%" PRIu32 ")",
1857                   side,
1858                   node->number,
1859                   record->value.node->number);
1860         }
1861     }
1862 
1863     // This is a simple check that we aren't pointing at the tree root.
1864     if (record->type == MMDBW_RECORD_TYPE_ALIAS) {
1865         if (record->value.node->number == 0) {
1866             croak("%s record of node %" PRIu32 " is an alias to node 0",
1867                   side,
1868                   node->number);
1869         }
1870     }
1871 }
1872 
record_value_as_number(MMDBW_tree_s * tree,MMDBW_record_s * record,encode_args_s * args)1873 static uint32_t record_value_as_number(MMDBW_tree_s *tree,
1874                                        MMDBW_record_s *record,
1875                                        encode_args_s *args) {
1876     uint32_t record_value = 0;
1877 
1878     switch (record->type) {
1879         case MMDBW_RECORD_TYPE_FIXED_EMPTY:
1880         case MMDBW_RECORD_TYPE_EMPTY: {
1881             // Say that the IP isn't here.
1882             record_value = tree->node_count;
1883             break;
1884         }
1885         case MMDBW_RECORD_TYPE_NODE:
1886         case MMDBW_RECORD_TYPE_ALIAS:
1887         case MMDBW_RECORD_TYPE_FIXED_NODE: {
1888             record_value = record->value.node->number;
1889             break;
1890         }
1891         case MMDBW_RECORD_TYPE_DATA: {
1892             SV **cache_record = hv_fetch(args->data_pointer_cache,
1893                                          record->value.key,
1894                                          SHA1_KEY_LENGTH,
1895                                          0);
1896             if (cache_record) {
1897                 /* It is ok to return this without the size check below as it
1898                    would have already croaked when it was inserted if it was too
1899                    big. */
1900                 return SvIV(*cache_record);
1901             }
1902 
1903             SV *data = newSVsv(data_for_key(tree, record->value.key));
1904             if (!SvOK(data)) {
1905                 croak("No data associated with key - %s", record->value.key);
1906             }
1907 
1908             dSP;
1909             ENTER;
1910             SAVETMPS;
1911 
1912             PUSHMARK(SP);
1913             EXTEND(SP, 5);
1914             PUSHs(args->serializer);
1915             PUSHs(args->root_data_type);
1916             mPUSHs(data);
1917             PUSHs(&PL_sv_undef);
1918             mPUSHp(record->value.key, strlen(record->value.key));
1919             PUTBACK;
1920 
1921             int count = call_method("store_data", G_SCALAR);
1922 
1923             SPAGAIN;
1924 
1925             if (count != 1) {
1926                 croak("Expected 1 item back from ->store_data() call");
1927             }
1928 
1929             SV *rval = POPs;
1930             if (!(SvIOK(rval) || SvUOK(rval))) {
1931                 croak("The serializer's store_data() method returned an SV "
1932                       "which is not SvIOK or SvUOK!");
1933             }
1934             uint32_t position = (uint32_t)SvUV(rval);
1935 
1936             PUTBACK;
1937             FREETMPS;
1938             LEAVE;
1939 
1940             record_value =
1941                 position + tree->node_count + DATA_SECTION_SEPARATOR_SIZE;
1942 
1943             SV *value = newSViv(record_value);
1944             (void)hv_store(args->data_pointer_cache,
1945                            record->value.key,
1946                            SHA1_KEY_LENGTH,
1947                            value,
1948                            0);
1949             break;
1950         }
1951     }
1952 
1953     if (record_value > max_record_value(tree)) {
1954         croak("Node value of %" PRIu32 " exceeds the record size of %" PRIu8
1955               " bits",
1956               record_value,
1957               tree->record_size);
1958     }
1959 
1960     return record_value;
1961 }
1962 
max_record_value(MMDBW_tree_s * tree)1963 uint32_t max_record_value(MMDBW_tree_s *tree) {
1964     uint8_t record_size = tree->record_size;
1965     return record_size == 32 ? UINT32_MAX : (uint32_t)(1 << record_size) - 1;
1966 }
1967 
start_iteration(MMDBW_tree_s * tree,bool depth_first,void * args,MMDBW_iterator_callback callback)1968 void start_iteration(MMDBW_tree_s *tree,
1969                      bool depth_first,
1970                      void *args,
1971                      MMDBW_iterator_callback callback) {
1972     uint128_t network = 0;
1973     uint8_t depth = 0;
1974 
1975     // We disallow this as the callback is based on nodes rather than records,
1976     // and changing that is a rabbit hole that I don't want to go down
1977     // currently. (I stuck my head in and regretted it.)
1978     if (MMDBW_RECORD_TYPE_NODE != tree->root_record.type &&
1979         MMDBW_RECORD_TYPE_FIXED_NODE != tree->root_record.type) {
1980         croak("Iteration is not currently allowed in trees with no nodes. "
1981               "Record type: %s",
1982               record_type_name(tree->root_record.type));
1983     }
1984 
1985     iterate_tree(
1986         tree, &tree->root_record, network, depth, depth_first, args, callback);
1987 
1988     return;
1989 }
1990 
iterate_tree(MMDBW_tree_s * tree,MMDBW_record_s * record,uint128_t network,const uint8_t depth,bool depth_first,void * args,MMDBW_iterator_callback callback)1991 static void iterate_tree(MMDBW_tree_s *tree,
1992                          MMDBW_record_s *record,
1993                          uint128_t network,
1994                          const uint8_t depth,
1995                          bool depth_first,
1996                          void *args,
1997                          MMDBW_iterator_callback callback) {
1998     if (depth > tree_depth0(tree) + 1) {
1999         char ip[INET6_ADDRSTRLEN];
2000         integer_to_ip_string(tree->ip_version, network, ip, sizeof(ip));
2001         croak("Depth during iteration is greater than 127 (depth: %u, "
2002               "start IP: %s)! The tree is wonky.\n",
2003               depth,
2004               ip);
2005     }
2006 
2007     if (record->type == MMDBW_RECORD_TYPE_NODE ||
2008         record->type == MMDBW_RECORD_TYPE_FIXED_NODE) {
2009         MMDBW_node_s *node = record->value.node;
2010 
2011         if (!depth_first) {
2012             callback(tree, node, network, depth, args);
2013         }
2014 
2015         iterate_tree(tree,
2016                      &node->left_record,
2017                      network,
2018                      depth + 1,
2019                      depth_first,
2020                      args,
2021                      callback);
2022 
2023         if (depth_first) {
2024             callback(tree, node, network, depth, args);
2025         }
2026 
2027         iterate_tree(tree,
2028                      &node->right_record,
2029                      flip_network_bit(tree, network, depth),
2030                      depth + 1,
2031                      depth_first,
2032                      args,
2033                      callback);
2034     }
2035 }
2036 
2037 uint128_t
flip_network_bit(MMDBW_tree_s * tree,uint128_t network,uint8_t depth)2038 flip_network_bit(MMDBW_tree_s *tree, uint128_t network, uint8_t depth) {
2039     return network | ((uint128_t)1 << (tree_depth0(tree) - depth));
2040 }
2041 
key_for_data(SV * data)2042 static SV *key_for_data(SV *data) {
2043     dSP;
2044     ENTER;
2045     SAVETMPS;
2046 
2047     PUSHMARK(SP);
2048     EXTEND(SP, 1);
2049     PUSHs(data);
2050     PUTBACK;
2051 
2052     const char *const sub = "MaxMind::DB::Writer::Util::key_for_data";
2053     int count = call_pv(sub, G_SCALAR);
2054 
2055     SPAGAIN;
2056 
2057     if (count != 1) {
2058         croak("Expected 1 item back from %s() call", sub);
2059     }
2060 
2061     SV *key = POPs;
2062     SvREFCNT_inc_simple_void_NN(key);
2063 
2064     PUTBACK;
2065     FREETMPS;
2066     LEAVE;
2067 
2068     return key;
2069 }
2070 
data_for_key(MMDBW_tree_s * tree,const char * const key)2071 SV *data_for_key(MMDBW_tree_s *tree, const char *const key) {
2072     MMDBW_data_hash_s *data = NULL;
2073     HASH_FIND(hh, tree->data_table, key, strlen(key), data);
2074 
2075     if (NULL != data) {
2076         return data->data_sv;
2077     } else {
2078         return &PL_sv_undef;
2079     }
2080 }
2081 
merge_cache_lookup(MMDBW_tree_s * tree,char * merge_cache_key)2082 static const char *merge_cache_lookup(MMDBW_tree_s *tree,
2083                                       char *merge_cache_key) {
2084     MMDBW_merge_cache_s *cache = NULL;
2085     HASH_FIND(hh, tree->merge_cache, merge_cache_key, MERGE_KEY_SIZE, cache);
2086 
2087     if (!cache) {
2088         return NULL;
2089     }
2090 
2091     // We have to check that the value has not been removed from the data
2092     // table
2093     MMDBW_data_hash_s *data = NULL;
2094     HASH_FIND(hh, tree->data_table, cache->value, SHA1_KEY_LENGTH, data);
2095     if (data != NULL) {
2096         return cache->value;
2097     }
2098 
2099     // Item has been removed from data table. Remove the cached merge too.
2100     HASH_DEL(tree->merge_cache, cache);
2101     return NULL;
2102 }
2103 
store_in_merge_cache(MMDBW_tree_s * tree,char * merge_cache_key,const char * const new_key)2104 static void store_in_merge_cache(MMDBW_tree_s *tree,
2105                                  char *merge_cache_key,
2106                                  const char *const new_key) {
2107     MMDBW_merge_cache_s *data = checked_malloc(sizeof(MMDBW_merge_cache_s));
2108     data->value = checked_malloc(SHA1_KEY_LENGTH + 1);
2109     strncpy((char *)data->value, new_key, SHA1_KEY_LENGTH + 1);
2110 
2111     data->key = checked_malloc(MERGE_KEY_SIZE + 1);
2112     strncpy((char *)data->key, merge_cache_key, MERGE_KEY_SIZE + 1);
2113 
2114     HASH_ADD_KEYPTR(hh, tree->merge_cache, data->key, MERGE_KEY_SIZE, data);
2115 }
2116 
free_tree(MMDBW_tree_s * tree)2117 void free_tree(MMDBW_tree_s *tree) {
2118     free_record_value(tree, &tree->root_record, true);
2119     free_merge_cache(tree);
2120 
2121     int hash_count = HASH_COUNT(tree->data_table);
2122     if (0 != hash_count) {
2123         croak("%d elements left in data table after freeing all nodes!",
2124               hash_count);
2125     }
2126 
2127     free(tree);
2128 }
2129 
free_merge_cache(MMDBW_tree_s * tree)2130 void free_merge_cache(MMDBW_tree_s *tree) {
2131     MMDBW_merge_cache_s *cache, *tmp = NULL;
2132     HASH_ITER(hh, tree->merge_cache, cache, tmp) {
2133         HASH_DEL(tree->merge_cache, cache);
2134         free((void *)cache->key);
2135         free((void *)cache->value);
2136         free(cache);
2137     }
2138 }
2139 
checked_malloc(size_t size)2140 static void *checked_malloc(size_t size) {
2141     void *ptr = malloc(size);
2142     if (!ptr) {
2143         abort();
2144     }
2145 
2146     return ptr;
2147 }
2148 
2149 static void
checked_fwrite(FILE * file,char * filename,void * buffer,size_t count)2150 checked_fwrite(FILE *file, char *filename, void *buffer, size_t count) {
2151     size_t result = fwrite(buffer, 1, count, file);
2152     if (result != count) {
2153         fclose(file);
2154         croak("Write to %s did not write the expected amount of data (wrote "
2155               "%zu instead of %zu): %s",
2156               filename,
2157               result,
2158               count,
2159               strerror(errno));
2160     }
2161 }
2162 
check_perlio_result(SSize_t result,SSize_t expected,char * op)2163 static void check_perlio_result(SSize_t result, SSize_t expected, char *op) {
2164     if (result < 0) {
2165         croak("%s operation failed: %s\n", op, strerror(result));
2166     } else if (result != expected) {
2167         croak("%s operation wrote %zd bytes when we expected to write %zd",
2168               op,
2169               result,
2170               expected);
2171     }
2172 }
2173 
status_error_message(MMDBW_status status)2174 static char *status_error_message(MMDBW_status status) {
2175     switch (status) {
2176         case MMDBW_SUCCESS:
2177             return "Success";
2178         case MMDBW_INSERT_INTO_ALIAS_NODE_ERROR:
2179             return "Attempted to insert into an aliased network.";
2180         case MMDBW_INSERT_INVALID_RECORD_TYPE_ERROR:
2181             return "Invalid record type given to insert.";
2182         case MMDBW_FREED_ALIAS_NODE_ERROR:
2183             return "Attempted to free an IPv4 alias node. Did you try to "
2184                    "overwrite an alias network?";
2185         case MMDBW_FREED_FIXED_EMPTY_ERROR:
2186             return "Attempted to free a fixed empty node. This should never "
2187                    "happen.";
2188         case MMDBW_FREED_FIXED_NODE_ERROR:
2189             return "Attempted to free a fixed node. This should never happen.";
2190         case MMDBW_ALIAS_OVERWRITE_ATTEMPT_ERROR:
2191             return "Attempted to overwrite an aliased network.";
2192         case MMDBW_FIXED_NODE_OVERWRITE_ATTEMPT_ERROR:
2193             return "Attempted to overwrite a fixed node.";
2194         case MMDBW_RESOLVING_IP_ERROR:
2195             return "Failed to resolve IP address.";
2196     }
2197     // We should get a compile time warning if an enum is missing
2198     return "Unknown error";
2199 }
2200 
record_type_name(MMDBW_record_type type)2201 static const char *record_type_name(MMDBW_record_type type) {
2202     switch (type) {
2203         case MMDBW_RECORD_TYPE_EMPTY:
2204             return "empty";
2205         case MMDBW_RECORD_TYPE_FIXED_EMPTY:
2206             return "fixed_empty";
2207         case MMDBW_RECORD_TYPE_DATA:
2208             return "data";
2209         case MMDBW_RECORD_TYPE_NODE:
2210             return "node";
2211         case MMDBW_RECORD_TYPE_FIXED_NODE:
2212             return "fixed_node";
2213         case MMDBW_RECORD_TYPE_ALIAS:
2214             return "alias";
2215     }
2216     return "unknown type";
2217 }
2218