1 /*
2  * Copyright (C) 2005 Martin Sevior <msevior@physics.unimelb.edu.au>
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public License
6  * as published by the Free Software Foundation; either version 2
7  * of the License, or (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write to the Free Software
16  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
17  * 02110-1301 USA.
18  */
19 /********************************************************************************/
20 /* Copyright (c) 2004
21 */
22 /* Daniel Sleator, David Temperley, and John Lafferty
23 */
24 /* All rights reserved
25 */
26 /*
27 */
28 /* Use of the link grammar parsing system is subject to the terms of the
29 */
30 /* license set forth in the LICENSE file included with this software,
31 */
32 /* and also available at http://www.link.cs.cmu.edu/link/license.html
33 */
34 /* This license allows free redistribution and use in source and binary
35 */
36 /* forms, with or without modification, subject to certain conditions.
37 */
38 /*
39 */
40 /********************************************************************************/
41 
42 #include "config.h"
43 #include "xap_App.h"
44 #include "ut_locale.h"
45 #include "ut_string_class.h"
46 #include "ut_types.h"
47 #include "ut_debugmsg.h"
48 #include "ut_vector.h"
49 #include "ut_string.h"
50 #include "../xp/AbiGrammarUtil.h"
51 
52 #include "LinkGrammarWrap.h"
53 
LinkGrammarWrap(void)54 LinkGrammarWrap::LinkGrammarWrap(void)
55 {
56   m_Opts = parse_options_create();
57 #ifdef _MSC_VER
58   gchar* dict_path = g_build_filename (XAP_App::getApp()->getAbiSuiteLibDir(), "grammar", NULL);
59   dictionary_set_data_dir(dict_path);
60   g_free(dict_path);
61 #endif
62   UT_LocaleTransactor t(LC_ALL, "");
63   m_Dict = dictionary_create_lang("en");
64   parse_options_set_max_parse_time(m_Opts, 1); // 1 second max parse time
65 }
66 
~LinkGrammarWrap(void)67 LinkGrammarWrap::~LinkGrammarWrap(void)
68 {
69   if(m_Dict)
70     dictionary_delete(m_Dict);
71   if(m_Opts)
72     parse_options_delete(m_Opts);
73 }
74 
parseSentence(PieceOfText * pT)75 bool LinkGrammarWrap::parseSentence(PieceOfText * pT)
76 {
77   if(!m_Dict)
78   {
79     UT_DEBUGMSG(("No dictionary!!\n"));
80     return true; // default to no grammar checking.
81   }
82   //  UT_DEBUGMSG(("Sentence received |%s|\n",pT->sText.utf8_str()));
83   Sentence  sent = sentence_create(const_cast<char *>(pT->sText.utf8_str()),m_Dict);
84   if (!sent) return true;
85 
86   /* First parse with cost 0 or 1 and no null links */
87   parse_options_set_disjunct_cost(m_Opts, 2);
88   parse_options_set_min_null_count(m_Opts, 0);
89   parse_options_set_max_null_count(m_Opts, 0);
90   parse_options_set_islands_ok(m_Opts, 0);
91 #ifndef HAVE_LINK_GRAMMAR_51
92   parse_options_set_panic_mode(m_Opts, TRUE);
93 #endif
94   parse_options_reset_resources(m_Opts);
95   UT_sint32 num_linkages = sentence_parse(sent, m_Opts);
96   bool res =  (num_linkages >= 1);
97   if(TRUE == parse_options_timer_expired(m_Opts))
98   {
99     UT_DEBUGMSG(("!!! Timer expired! Mark valid anyway!\n"));
100     res= true; // Mark valid if it's too hard. FIXME. We can attempt to recover
101                // by tweaking paramters once we know what we're doing.
102   }
103   UT_UTF8String errStr = "";
104   if(!res && (num_linkages == 0))
105   {
106     // Now proces with NULL links. to find out what went wrong.
107     parse_options_set_min_null_count(m_Opts, 1);
108     parse_options_set_max_null_count(m_Opts, sentence_length(sent));
109     parse_options_set_islands_ok(m_Opts, 1);
110     parse_options_reset_resources(m_Opts);
111     num_linkages = sentence_parse(sent, m_Opts);
112   }
113   pT->m_bGrammarChecked = true;
114   pT->m_bGrammarOK = res;
115   if(!res)
116   {
117     UT_GenericVector<AbiGrammarError *> vecMapOfWords;
118     //
119     // Get first linkage
120     //
121     AbiGrammarError * pErr = NULL;
122     if(num_linkages > 0)
123     {
124       Linkage linkage = linkage_create(0, sent, m_Opts);
125       if(linkage != NULL)
126       {
127 	UT_sint32 i = 0;
128 	UT_sint32 iLow= 0;
129 	UT_sint32 iHigh= 0;
130 	UT_sint32 iOff = pT->iInLow;
131 	const char * szSent = pT->sText.utf8_str();
132 	UT_sint32 totlen = strlen(szSent);
133 	for (i=1; i<sentence_length(sent) && (iLow < totlen); i++)
134 	{
135 	  //
136 	  // NULL link island.
137 	  //
138 	  //    UT_DEBUGMSG((" iLow %d szSent[iLow] %c\n",iLow,szSent[iLow]));
139 	  while((szSent[iLow] == ' ') && (iLow < totlen))
140 	  {
141 	    //UT_DEBUGMSG((" iLow %d szSent[iLow] %c\n",iLow,szSent[i]));
142 	    iLow++;
143 	  }
144 	  if(iLow >= totlen)
145 	  {
146 	    //UT_DEBUGMSG(("Error ! ran off the end! iLow %d \n Text |%s|\n",iLow,szSent));
147 	    break;
148 	  }
149 	  AbiGrammarError * pWordMap = new  AbiGrammarError();
150 	  pWordMap->m_iErrLow = iLow;
151 	  pWordMap->m_iErrHigh = iLow + strlen(linkage_get_word(linkage, i));
152 	  pWordMap->m_iWordNum = i;
153 	  vecMapOfWords.addItem(pWordMap);
154 	  bool bNew = false;
155 
156 	  //UT_DEBUGMSG(("|%s| NULL LINK\n",sent->word[i].string));
157 	  if(pErr == NULL)
158 	  {
159 	    pErr = new AbiGrammarError();
160 	    bNew = true;
161 	  }
162 	  if(bNew || (pErr->m_iWordNum + 1 < i))
163 	  {
164 	    if(!bNew)
165 	    {
166 		  if(pErr)
167 		  {
168 		    delete pErr;
169 		  }
170 		  pErr = new AbiGrammarError();
171 	    }
172 	    iHigh = iLow + strlen(linkage_get_word(linkage, i));
173 	    pErr->m_iErrLow = iLow + iOff -1;
174 	    pErr->m_iErrHigh = iHigh + iOff -1;
175 	    if(pErr->m_iErrLow < 0)
176 	    {
177 		  pErr->m_iErrLow = 0;
178 	    }
179 	    if(pErr->m_iErrHigh < totlen-1)
180 	    {
181 		  pErr->m_iErrHigh += 1;
182 	    }
183 	    pErr->m_iWordNum = i;
184 	    // UT_DEBUGMSG(("Add Error %x low %d High %d\n",pErr,pErr->m_iErrLow,pErr->m_iErrHigh));
185 	    pT->m_vecGrammarErrors.addItem(pErr);
186 		pErr = NULL;
187 	  }
188 	  else
189 	  {
190 	    //
191 	    // Expand the sqiggle
192 	    //
193 	    iHigh = iLow + strlen(linkage_get_word(linkage, i)) + iOff;
194 	    pErr->m_iErrHigh = iHigh;
195 	    if(pErr->m_iErrHigh < totlen-1)
196 	    {
197 		  pErr->m_iErrHigh += 1;
198 	    }
199 	    pErr->m_iWordNum = i;
200 	  }
201 	  iLow += strlen(linkage_get_word(linkage, i));
202 	}
203 	//
204 	// No NULL links but still an error , mark the whole sentence bad.
205 	//
206 	if(pT->m_vecGrammarErrors.getItemCount() == 0)
207 	{
208       if(pErr)
209       {
210         delete pErr;
211       }
212 	  pErr = new AbiGrammarError();
213 	  pErr->m_iErrLow = pT->iInLow;
214 	  pErr->m_iErrHigh = pT->iInHigh;
215 	  if(pErr->m_iErrLow < 0)
216 	  {
217 	    pErr->m_iErrLow = 0;
218 	  }
219 	  // UT_DEBUGMSG(("Add Error %x low %d High %d\n",pErr,pErr->m_iErrLow,pErr->m_iErrHigh));
220 	  pT->m_vecGrammarErrors.addItem(pErr);
221 	  pErr->m_sErrorDesc = linkage_get_violation_name(linkage);
222 	  //UT_DEBUGMSG(("Complete Sentence had error %s\n",pErr->m_sErrorDesc.utf8_str()));
223 	  pErr = NULL;
224 	}
225 
226 	//	  for(i=0; i< pT->m_vecGrammarErrors.getItemCount(); i++)
227 	// {
228 	//    pErr = pT->m_vecGrammarErrors.getNthItem(i);
229 	//    UT_DEBUGMSG((" err %d iLow %d iHigh %d\n",i,pErr->m_iErrLow,pErr->m_iErrHigh));
230 	//  }
231 	UT_UTF8String sErr = linkage_get_violation_name(linkage);
232 	//	UT_DEBUGMSG(("Top Level error message |%s|\n",sErr.utf8_str()));
233 	linkage_delete(linkage);
234 	for(i=0; i<  vecMapOfWords.getItemCount(); i++)
235 	{
236 	  AbiGrammarError * p = vecMapOfWords.getNthItem(i);
237 	  delete p;
238 	}
239       }
240     }
241     else
242     {
243       if(pErr)
244       {
245         delete pErr;
246       }
247       pErr = new AbiGrammarError();
248       pErr->m_iErrLow = pT->iInLow;
249       pErr->m_iErrHigh = pT->iInHigh;
250       if(pErr->m_iErrLow < 0)
251       {
252 	pErr->m_iErrLow = 0;
253       }
254       //      UT_DEBUGMSG(("Final Add Error %x low %d High %d\n",pErr,pErr->m_iErrLow,pErr->m_iErrHigh));
255       pT->m_vecGrammarErrors.addItem(pErr);
256 	  pErr = NULL;
257     }
258     if(pErr)
259       delete pErr;
260   }
261   sentence_delete(sent);
262   return res;
263 }
264 
clear(void)265 bool LinkGrammarWrap::clear(void)
266 {
267   return true;
268 }
269