1 /*
2 * Copyright (C) 2005 Martin Sevior <msevior@physics.unimelb.edu.au>
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version 2
7 * of the License, or (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
17 * 02110-1301 USA.
18 */
19 /********************************************************************************/
20 /* Copyright (c) 2004
21 */
22 /* Daniel Sleator, David Temperley, and John Lafferty
23 */
24 /* All rights reserved
25 */
26 /*
27 */
28 /* Use of the link grammar parsing system is subject to the terms of the
29 */
30 /* license set forth in the LICENSE file included with this software,
31 */
32 /* and also available at http://www.link.cs.cmu.edu/link/license.html
33 */
34 /* This license allows free redistribution and use in source and binary
35 */
36 /* forms, with or without modification, subject to certain conditions.
37 */
38 /*
39 */
40 /********************************************************************************/
41
42 #include "config.h"
43 #include "xap_App.h"
44 #include "ut_locale.h"
45 #include "ut_string_class.h"
46 #include "ut_types.h"
47 #include "ut_debugmsg.h"
48 #include "ut_vector.h"
49 #include "ut_string.h"
50 #include "../xp/AbiGrammarUtil.h"
51
52 #include "LinkGrammarWrap.h"
53
LinkGrammarWrap(void)54 LinkGrammarWrap::LinkGrammarWrap(void)
55 {
56 m_Opts = parse_options_create();
57 #ifdef _MSC_VER
58 gchar* dict_path = g_build_filename (XAP_App::getApp()->getAbiSuiteLibDir(), "grammar", NULL);
59 dictionary_set_data_dir(dict_path);
60 g_free(dict_path);
61 #endif
62 UT_LocaleTransactor t(LC_ALL, "");
63 m_Dict = dictionary_create_lang("en");
64 parse_options_set_max_parse_time(m_Opts, 1); // 1 second max parse time
65 }
66
~LinkGrammarWrap(void)67 LinkGrammarWrap::~LinkGrammarWrap(void)
68 {
69 if(m_Dict)
70 dictionary_delete(m_Dict);
71 if(m_Opts)
72 parse_options_delete(m_Opts);
73 }
74
parseSentence(PieceOfText * pT)75 bool LinkGrammarWrap::parseSentence(PieceOfText * pT)
76 {
77 if(!m_Dict)
78 {
79 UT_DEBUGMSG(("No dictionary!!\n"));
80 return true; // default to no grammar checking.
81 }
82 // UT_DEBUGMSG(("Sentence received |%s|\n",pT->sText.utf8_str()));
83 Sentence sent = sentence_create(const_cast<char *>(pT->sText.utf8_str()),m_Dict);
84 if (!sent) return true;
85
86 /* First parse with cost 0 or 1 and no null links */
87 parse_options_set_disjunct_cost(m_Opts, 2);
88 parse_options_set_min_null_count(m_Opts, 0);
89 parse_options_set_max_null_count(m_Opts, 0);
90 parse_options_set_islands_ok(m_Opts, 0);
91 #ifndef HAVE_LINK_GRAMMAR_51
92 parse_options_set_panic_mode(m_Opts, TRUE);
93 #endif
94 parse_options_reset_resources(m_Opts);
95 UT_sint32 num_linkages = sentence_parse(sent, m_Opts);
96 bool res = (num_linkages >= 1);
97 if(TRUE == parse_options_timer_expired(m_Opts))
98 {
99 UT_DEBUGMSG(("!!! Timer expired! Mark valid anyway!\n"));
100 res= true; // Mark valid if it's too hard. FIXME. We can attempt to recover
101 // by tweaking paramters once we know what we're doing.
102 }
103 UT_UTF8String errStr = "";
104 if(!res && (num_linkages == 0))
105 {
106 // Now proces with NULL links. to find out what went wrong.
107 parse_options_set_min_null_count(m_Opts, 1);
108 parse_options_set_max_null_count(m_Opts, sentence_length(sent));
109 parse_options_set_islands_ok(m_Opts, 1);
110 parse_options_reset_resources(m_Opts);
111 num_linkages = sentence_parse(sent, m_Opts);
112 }
113 pT->m_bGrammarChecked = true;
114 pT->m_bGrammarOK = res;
115 if(!res)
116 {
117 UT_GenericVector<AbiGrammarError *> vecMapOfWords;
118 //
119 // Get first linkage
120 //
121 AbiGrammarError * pErr = NULL;
122 if(num_linkages > 0)
123 {
124 Linkage linkage = linkage_create(0, sent, m_Opts);
125 if(linkage != NULL)
126 {
127 UT_sint32 i = 0;
128 UT_sint32 iLow= 0;
129 UT_sint32 iHigh= 0;
130 UT_sint32 iOff = pT->iInLow;
131 const char * szSent = pT->sText.utf8_str();
132 UT_sint32 totlen = strlen(szSent);
133 for (i=1; i<sentence_length(sent) && (iLow < totlen); i++)
134 {
135 //
136 // NULL link island.
137 //
138 // UT_DEBUGMSG((" iLow %d szSent[iLow] %c\n",iLow,szSent[iLow]));
139 while((szSent[iLow] == ' ') && (iLow < totlen))
140 {
141 //UT_DEBUGMSG((" iLow %d szSent[iLow] %c\n",iLow,szSent[i]));
142 iLow++;
143 }
144 if(iLow >= totlen)
145 {
146 //UT_DEBUGMSG(("Error ! ran off the end! iLow %d \n Text |%s|\n",iLow,szSent));
147 break;
148 }
149 AbiGrammarError * pWordMap = new AbiGrammarError();
150 pWordMap->m_iErrLow = iLow;
151 pWordMap->m_iErrHigh = iLow + strlen(linkage_get_word(linkage, i));
152 pWordMap->m_iWordNum = i;
153 vecMapOfWords.addItem(pWordMap);
154 bool bNew = false;
155
156 //UT_DEBUGMSG(("|%s| NULL LINK\n",sent->word[i].string));
157 if(pErr == NULL)
158 {
159 pErr = new AbiGrammarError();
160 bNew = true;
161 }
162 if(bNew || (pErr->m_iWordNum + 1 < i))
163 {
164 if(!bNew)
165 {
166 if(pErr)
167 {
168 delete pErr;
169 }
170 pErr = new AbiGrammarError();
171 }
172 iHigh = iLow + strlen(linkage_get_word(linkage, i));
173 pErr->m_iErrLow = iLow + iOff -1;
174 pErr->m_iErrHigh = iHigh + iOff -1;
175 if(pErr->m_iErrLow < 0)
176 {
177 pErr->m_iErrLow = 0;
178 }
179 if(pErr->m_iErrHigh < totlen-1)
180 {
181 pErr->m_iErrHigh += 1;
182 }
183 pErr->m_iWordNum = i;
184 // UT_DEBUGMSG(("Add Error %x low %d High %d\n",pErr,pErr->m_iErrLow,pErr->m_iErrHigh));
185 pT->m_vecGrammarErrors.addItem(pErr);
186 pErr = NULL;
187 }
188 else
189 {
190 //
191 // Expand the sqiggle
192 //
193 iHigh = iLow + strlen(linkage_get_word(linkage, i)) + iOff;
194 pErr->m_iErrHigh = iHigh;
195 if(pErr->m_iErrHigh < totlen-1)
196 {
197 pErr->m_iErrHigh += 1;
198 }
199 pErr->m_iWordNum = i;
200 }
201 iLow += strlen(linkage_get_word(linkage, i));
202 }
203 //
204 // No NULL links but still an error , mark the whole sentence bad.
205 //
206 if(pT->m_vecGrammarErrors.getItemCount() == 0)
207 {
208 if(pErr)
209 {
210 delete pErr;
211 }
212 pErr = new AbiGrammarError();
213 pErr->m_iErrLow = pT->iInLow;
214 pErr->m_iErrHigh = pT->iInHigh;
215 if(pErr->m_iErrLow < 0)
216 {
217 pErr->m_iErrLow = 0;
218 }
219 // UT_DEBUGMSG(("Add Error %x low %d High %d\n",pErr,pErr->m_iErrLow,pErr->m_iErrHigh));
220 pT->m_vecGrammarErrors.addItem(pErr);
221 pErr->m_sErrorDesc = linkage_get_violation_name(linkage);
222 //UT_DEBUGMSG(("Complete Sentence had error %s\n",pErr->m_sErrorDesc.utf8_str()));
223 pErr = NULL;
224 }
225
226 // for(i=0; i< pT->m_vecGrammarErrors.getItemCount(); i++)
227 // {
228 // pErr = pT->m_vecGrammarErrors.getNthItem(i);
229 // UT_DEBUGMSG((" err %d iLow %d iHigh %d\n",i,pErr->m_iErrLow,pErr->m_iErrHigh));
230 // }
231 UT_UTF8String sErr = linkage_get_violation_name(linkage);
232 // UT_DEBUGMSG(("Top Level error message |%s|\n",sErr.utf8_str()));
233 linkage_delete(linkage);
234 for(i=0; i< vecMapOfWords.getItemCount(); i++)
235 {
236 AbiGrammarError * p = vecMapOfWords.getNthItem(i);
237 delete p;
238 }
239 }
240 }
241 else
242 {
243 if(pErr)
244 {
245 delete pErr;
246 }
247 pErr = new AbiGrammarError();
248 pErr->m_iErrLow = pT->iInLow;
249 pErr->m_iErrHigh = pT->iInHigh;
250 if(pErr->m_iErrLow < 0)
251 {
252 pErr->m_iErrLow = 0;
253 }
254 // UT_DEBUGMSG(("Final Add Error %x low %d High %d\n",pErr,pErr->m_iErrLow,pErr->m_iErrHigh));
255 pT->m_vecGrammarErrors.addItem(pErr);
256 pErr = NULL;
257 }
258 if(pErr)
259 delete pErr;
260 }
261 sentence_delete(sent);
262 return res;
263 }
264
clear(void)265 bool LinkGrammarWrap::clear(void)
266 {
267 return true;
268 }
269