1 #ifndef ADDRESS_PARSER_IO_H 2 #define ADDRESS_PARSER_IO_H 3 4 #include <stdio.h> 5 #include <stdlib.h> 6 #include <stdbool.h> 7 8 #include "address_parser.h" 9 #include "collections.h" 10 #include "file_utils.h" 11 #include "scanner.h" 12 #include "string_utils.h" 13 14 #define AMBIGUOUS_LANGUAGE "xxx" 15 #define UNKNOWN_LANGUAGE "unk" 16 17 enum address_parser_training_data_fields { 18 ADDRESS_PARSER_FIELD_LANGUAGE, 19 ADDRESS_PARSER_FIELD_COUNTRY, 20 ADDRESS_PARSER_FIELD_ADDRESS, 21 ADDRESS_PARSER_FILE_NUM_TOKENS 22 }; 23 24 typedef struct address_parser_data_set { 25 FILE *f; 26 token_array *tokens; 27 tokenized_string_t *tokenized_str; 28 cstring_array *normalizations; 29 size_t norm; 30 cstring_array *labels; 31 uint32_array *separators; 32 char_array *language; 33 char_array *country; 34 } address_parser_data_set_t; 35 36 37 address_parser_data_set_t *address_parser_data_set_init(char *filename); 38 bool address_parser_data_set_rewind(address_parser_data_set_t *self); 39 bool address_parser_data_set_tokenize_line(address_parser_data_set_t *self, char *input); 40 bool address_parser_data_set_next(address_parser_data_set_t *self); 41 void address_parser_data_set_destroy(address_parser_data_set_t *self); 42 43 #endif