1 /*************************************************************************/
2 /* Copyright (c) 2004                                                    */
3 /* Daniel Sleator, David Temperley, and John Lafferty                    */
4 /* Copyright (c) 2013, 2014 Linas Vepstas                                */
5 /* All rights reserved                                                   */
6 /*                                                                       */
7 /* Use of the link grammar parsing system is subject to the terms of the */
8 /* license set forth in the LICENSE file included with this software.    */
9 /* This license allows free redistribution and use in source and binary  */
10 /* forms, with or without modification, subject to certain conditions.   */
11 /*                                                                       */
12 /*************************************************************************/
13 
14 #ifndef _LG_DICT_COMMON_H_
15 #define  _LG_DICT_COMMON_H_
16 
17 #include "api-types.h"                  // pp_knowledge
18 #include "connectors.h"                 // ConTable
19 #include "dict-structures.h"
20 #include "memory-pool.h"                // Pool_desc
21 #include "utilities.h"                  // locale_t
22 
23 #define EMPTY_CONNECTOR "ZZZ"
24 #define UNLIMITED_CONNECTORS_WORD ("UNLIMITED-CONNECTORS")
25 #define LIMITED_CONNECTORS_WORD ("LENGTH-LIMIT-")
26 
27 /* Forward decls */
28 typedef struct Afdict_class_struct Afdict_class;
29 typedef struct Regex_node_s Regex_node;
30 
31 typedef struct X_node_struct X_node;
32 struct X_node_struct
33 {
34 	const char * string;       /* the word itself */
35 	Exp * exp;
36 	X_node *next;
37 	const Gword *word;         /* originating Wordgraph word */
38 };
39 
40 /* The regexes are stored as a linked list of the following nodes. */
41 struct Regex_node_s
42 {
43 	char *name;      /* The identifying name of the regex */
44 	char *pattern;   /* The regular expression pattern */
45 	bool neg;        /* Negate the match */
46 	void *re;        /* The compiled regex. void * to avoid
47 	                    having re library details invading the
48 	                    rest of the LG system; regex-morph.c
49 	                    takes care of all matching.
50 	                  */
51 	Regex_node *next;
52 };
53 
54 struct Afdict_class_struct
55 {
56 	size_t mem_elems;     /* number of memory elements allocated */
57 	size_t length;        /* number of strings */
58 	char const ** string;
59 };
60 
61 #define MAX_TOKEN_LENGTH 250     /* Maximum number of chars in a token */
62 #define IDIOM_LINK_SZ 5
63 
64 #ifdef HAVE_SQLITE
65 #define IS_DB_DICT(dict) (NULL != dict->db_handle)
66 #else
67 #define IS_DB_DICT(dict) false
68 #endif /* HAVE_SQLITE */
69 
70 typedef struct
71 {
72 	String_id *set;                    /* Expression tag names */
73 	const char **name;                 /* Tag name (indexed by tag id) */
74 	unsigned int num;                  /* Number of tags */
75 	unsigned int size;                 /* Allocated tag array size */
76 } expression_tag;
77 
78 struct Dictionary_s
79 {
80 	Dict_node *  root;
81 	Regex_node * regex_root;
82 	const char * name;
83 	const char * lang;
84 	const char * version;
85 	const char * locale;    /* Locale name */
86 	locale_t     lctype;    /* Locale argument for the *_l() functions */
87 	int          num_entries;
88 
89 	bool         use_unknown_word;
90 	bool         unknown_word_defined;
91 	bool         left_wall_defined;
92 	bool         right_wall_defined;
93 	bool         shuffle_linkages;
94 
95 	Dialect *dialect;                  /* "4.0.dialect" info */
96 	expression_tag dialect_tag;        /* Expression dialect tag info */
97 	expression_tag *macro_tag;         /* Macro tags for expression debug */
98 	void *cached_dialect;              /* Only for dialect cache validation */
99 
100 	/* Affixes are used during the tokenization stage. */
101 	Dictionary      affix_table;
102 	Afdict_class *  afdict_class;
103 	bool pre_suf_class_exists;         /* True iff PRE or SUF exists */
104 
105 	/* Random morphology generator */
106 	struct anysplit_params * anysplit;
107 
108 	/* If not null, then use spelling guesser for unknown words */
109 	void *          spell_checker;     /* spell checker handle */
110 #ifdef HAVE_SQLITE
111 	void *          db_handle;         /* database handle */
112 #endif
113 
114 	void (*insert_entry)(Dictionary, Dict_node *, int);
115 	Dict_node* (*lookup_list)(Dictionary, const char*);
116 	Dict_node* (*lookup_wild)(Dictionary, const char*);
117 	void (*free_lookup)(Dictionary, Dict_node*);
118 	bool (*lookup)(Dictionary, const char*);
119 	void (*close)(Dictionary);
120 
121 	pp_knowledge  * base_knowledge;    /* Core post-processing rules */
122 	pp_knowledge  * hpsg_knowledge;    /* Head-Phrase Structure rules */
123 	String_set *    string_set;        /* Set of link names in the dictionary */
124 	Word_file *     word_file_header;
125 	ConTable        contable;
126 
127 	Pool_desc  * Exp_pool;
128 
129 	/* Private data elements that come in play only while the
130 	 * dictionary is being read, and are not otherwise used.
131 	 */
132 	const char    * input;
133 	const char    * pin;
134 	bool            recursive_error;
135 	const char    * suppress_warning;
136 	bool            is_special;
137 	int             already_got_it; /* For char, but needs to hold EOF */
138 	int             line_number;
139 	char            current_idiom[IDIOM_LINK_SZ];
140 	char            token[MAX_TOKEN_LENGTH];
141 };
142 /* The functions here are intended for use by the tokenizer, only,
143  * and pretty much no one else. If you are not the tokenizer, you
144  * probably don't need these. */
145 
146 bool dict_has_word(const Dictionary dict, const char *);
147 Exp *Exp_create(Pool_desc *);
148 Exp *Exp_create_dup(Pool_desc *, Exp *);
149 Exp *make_unary_node(Pool_desc *, Exp *);
150 void add_empty_word(Sentence, X_node *);
151 
152 #endif /* _LG_DICT_COMMON_H_ */
153