1 /*************************************************************************/ 2 /* Copyright (c) 2004 */ 3 /* Daniel Sleator, David Temperley, and John Lafferty */ 4 /* Copyright (c) 2013, 2014 Linas Vepstas */ 5 /* All rights reserved */ 6 /* */ 7 /* Use of the link grammar parsing system is subject to the terms of the */ 8 /* license set forth in the LICENSE file included with this software. */ 9 /* This license allows free redistribution and use in source and binary */ 10 /* forms, with or without modification, subject to certain conditions. */ 11 /* */ 12 /*************************************************************************/ 13 14 #ifndef _LG_DICT_COMMON_H_ 15 #define _LG_DICT_COMMON_H_ 16 17 #include "api-types.h" // pp_knowledge 18 #include "connectors.h" // ConTable 19 #include "dict-structures.h" 20 #include "memory-pool.h" // Pool_desc 21 #include "utilities.h" // locale_t 22 23 #define EMPTY_CONNECTOR "ZZZ" 24 #define UNLIMITED_CONNECTORS_WORD ("UNLIMITED-CONNECTORS") 25 #define LIMITED_CONNECTORS_WORD ("LENGTH-LIMIT-") 26 27 /* Forward decls */ 28 typedef struct Afdict_class_struct Afdict_class; 29 typedef struct Regex_node_s Regex_node; 30 31 typedef struct X_node_struct X_node; 32 struct X_node_struct 33 { 34 const char * string; /* the word itself */ 35 Exp * exp; 36 X_node *next; 37 const Gword *word; /* originating Wordgraph word */ 38 }; 39 40 /* The regexes are stored as a linked list of the following nodes. */ 41 struct Regex_node_s 42 { 43 char *name; /* The identifying name of the regex */ 44 char *pattern; /* The regular expression pattern */ 45 bool neg; /* Negate the match */ 46 void *re; /* The compiled regex. void * to avoid 47 having re library details invading the 48 rest of the LG system; regex-morph.c 49 takes care of all matching. 50 */ 51 Regex_node *next; 52 }; 53 54 struct Afdict_class_struct 55 { 56 size_t mem_elems; /* number of memory elements allocated */ 57 size_t length; /* number of strings */ 58 char const ** string; 59 }; 60 61 #define MAX_TOKEN_LENGTH 250 /* Maximum number of chars in a token */ 62 #define IDIOM_LINK_SZ 5 63 64 #ifdef HAVE_SQLITE 65 #define IS_DB_DICT(dict) (NULL != dict->db_handle) 66 #else 67 #define IS_DB_DICT(dict) false 68 #endif /* HAVE_SQLITE */ 69 70 typedef struct 71 { 72 String_id *set; /* Expression tag names */ 73 const char **name; /* Tag name (indexed by tag id) */ 74 unsigned int num; /* Number of tags */ 75 unsigned int size; /* Allocated tag array size */ 76 } expression_tag; 77 78 struct Dictionary_s 79 { 80 Dict_node * root; 81 Regex_node * regex_root; 82 const char * name; 83 const char * lang; 84 const char * version; 85 const char * locale; /* Locale name */ 86 locale_t lctype; /* Locale argument for the *_l() functions */ 87 int num_entries; 88 89 bool use_unknown_word; 90 bool unknown_word_defined; 91 bool left_wall_defined; 92 bool right_wall_defined; 93 bool shuffle_linkages; 94 95 Dialect *dialect; /* "4.0.dialect" info */ 96 expression_tag dialect_tag; /* Expression dialect tag info */ 97 expression_tag *macro_tag; /* Macro tags for expression debug */ 98 void *cached_dialect; /* Only for dialect cache validation */ 99 100 /* Affixes are used during the tokenization stage. */ 101 Dictionary affix_table; 102 Afdict_class * afdict_class; 103 bool pre_suf_class_exists; /* True iff PRE or SUF exists */ 104 105 /* Random morphology generator */ 106 struct anysplit_params * anysplit; 107 108 /* If not null, then use spelling guesser for unknown words */ 109 void * spell_checker; /* spell checker handle */ 110 #ifdef HAVE_SQLITE 111 void * db_handle; /* database handle */ 112 #endif 113 114 void (*insert_entry)(Dictionary, Dict_node *, int); 115 Dict_node* (*lookup_list)(Dictionary, const char*); 116 Dict_node* (*lookup_wild)(Dictionary, const char*); 117 void (*free_lookup)(Dictionary, Dict_node*); 118 bool (*lookup)(Dictionary, const char*); 119 void (*close)(Dictionary); 120 121 pp_knowledge * base_knowledge; /* Core post-processing rules */ 122 pp_knowledge * hpsg_knowledge; /* Head-Phrase Structure rules */ 123 String_set * string_set; /* Set of link names in the dictionary */ 124 Word_file * word_file_header; 125 ConTable contable; 126 127 Pool_desc * Exp_pool; 128 129 /* Private data elements that come in play only while the 130 * dictionary is being read, and are not otherwise used. 131 */ 132 const char * input; 133 const char * pin; 134 bool recursive_error; 135 const char * suppress_warning; 136 bool is_special; 137 int already_got_it; /* For char, but needs to hold EOF */ 138 int line_number; 139 char current_idiom[IDIOM_LINK_SZ]; 140 char token[MAX_TOKEN_LENGTH]; 141 }; 142 /* The functions here are intended for use by the tokenizer, only, 143 * and pretty much no one else. If you are not the tokenizer, you 144 * probably don't need these. */ 145 146 bool dict_has_word(const Dictionary dict, const char *); 147 Exp *Exp_create(Pool_desc *); 148 Exp *Exp_create_dup(Pool_desc *, Exp *); 149 Exp *make_unary_node(Pool_desc *, Exp *); 150 void add_empty_word(Sentence, X_node *); 151 152 #endif /* _LG_DICT_COMMON_H_ */ 153