1 #ifndef ADDRESS_PARSER_IO_H
2 #define ADDRESS_PARSER_IO_H
3 
4 #include <stdio.h>
5 #include <stdlib.h>
6 #include <stdbool.h>
7 
8 #include "address_parser.h"
9 #include "collections.h"
10 #include "file_utils.h"
11 #include "scanner.h"
12 #include "string_utils.h"
13 
14 #define AMBIGUOUS_LANGUAGE "xxx"
15 #define UNKNOWN_LANGUAGE "unk"
16 
17 enum address_parser_training_data_fields {
18     ADDRESS_PARSER_FIELD_LANGUAGE,
19     ADDRESS_PARSER_FIELD_COUNTRY,
20     ADDRESS_PARSER_FIELD_ADDRESS,
21     ADDRESS_PARSER_FILE_NUM_TOKENS
22 };
23 
24 typedef struct address_parser_data_set {
25     FILE *f;
26     token_array *tokens;
27     tokenized_string_t *tokenized_str;
28     cstring_array *normalizations;
29     size_t norm;
30     cstring_array *labels;
31     uint32_array *separators;
32     char_array *language;
33     char_array *country;
34 } address_parser_data_set_t;
35 
36 
37 address_parser_data_set_t *address_parser_data_set_init(char *filename);
38 bool address_parser_data_set_rewind(address_parser_data_set_t *self);
39 bool address_parser_data_set_tokenize_line(address_parser_data_set_t *self, char *input);
40 bool address_parser_data_set_next(address_parser_data_set_t *self);
41 void address_parser_data_set_destroy(address_parser_data_set_t *self);
42 
43 #endif