1 /*
2   Copyright (c) 1998 - 2015
3   ILK   - Tilburg University
4   CLiPS - University of Antwerp
5 
6   This file is part of timbl
7 
8   timbl is free software; you can redistribute it and/or modify
9   it under the terms of the GNU General Public License as published by
10   the Free Software Foundation; either version 3 of the License, or
11   (at your option) any later version.
12 
13   timbl is distributed in the hope that it will be useful,
14   but WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16   GNU General Public License for more details.
17 
18   You should have received a copy of the GNU General Public License
19   along with this program; if not, see <http://www.gnu.org/licenses/>.
20 
21   For questions and suggestions, see:
22       http://ilk.uvt.nl/software.html
23   or send mail to:
24       timbl@uvt.nl
25 */
26 
27 #include <algorithm>
28 #include <iostream>
29 #include <string>
30 #include <fstream>
31 
32 #include "timbl/TimblAPI.h"
33 
34 using namespace Timbl;
35 
36 using std::ifstream;
37 using std::ofstream;
38 using std::ios;
39 using std::cerr;
40 using std::cin;
41 using std::cout;
42 using std::endl;
43 using std::istream;
44 using std::string;
45 using std::equal;
46 using std::getline;
47 
48 #define MAX_EXP        10
49 #define MAX_PARAMS      256
50 
nocase_cmp(char c1,char c2)51 bool nocase_cmp( char c1, char c2 ){
52   return toupper(c1) == toupper(c2);
53 }
54 
compare_nocase_n(const string & s1,const string & s2,size_t n)55 bool compare_nocase_n( const string& s1, const string& s2, size_t n ){
56   if ( equal( s1.begin(), s1.begin()+n, s2.begin(), nocase_cmp ) ){
57     return true;
58   }
59   else {
60     return false;
61   }
62 }
63 
64 //
65 // here are the various scripting actions:
66 //
67 enum ActionType { UnknownAct, New, Free,
68 		  Set,  Show, Classify,
69 		  Train, Test, Increment, Decrement,
70 		  Expand, Remove,
71 		  SaveTree, GetTree,
72 		  SaveW, GetW,
73 		  Quit };
74 /*
75   The following scripting commands are implemented:
76   QUIT
77       stop all further actions.
78   NEW name <algo>
79       create an experiment with name 'name' of type 'algo'
80       algo can be IB1, IB2, TRIBL or IGTREE. Default: IBL
81   FREE name
82       delete the experiment with name 'name'
83   <name>.SET option value
84       set option of experiment name to value
85   <name>.SHOW OPTIONS
86       show all posible options with their default and current
87       values for experiment 'name'
88   <name>.SHOW SETTINGS
89       show all options with current settings of 'name'
90   <name>.TRAIN file
91       build an instancebase from file.
92   <name>.TEST file1 [file2]
93       classify all lines from file1, write results to file2 or
94       to file1.out if parameter file2 is not present
95   <name>.EXPAND file1
96       increment the database with contents of file1
97   <name>.REMOVE file1
98       decrement the database with contents of file1
99   <name>.CLASSIFY line
100       classify this line
101   <name>.ADD line
102       increment the database with line
103   <name>.REM line
104       decrement the database with line
105   <name>.SAVE file
106       save the instancebase of experiment name to file.
107   <name>.GET file
108       get the instancebase for experiment name from file.
109   <name>.SAVEW file
110       save the current weights of experiment name to file.
111   <name>.GETW file
112       get new weights for experiment name from file.
113  */
114 
115 TimblAPI *experiments[MAX_EXP];
116 int exp_cnt = 0;
117 
118 
119 // the following functions implement a simple parser to parse the
120 // script file, recognize experiment names en actions to perform
121 // on those experiments
122 
fill_params(string * params,const string & line)123 int fill_params( string *params, const string& line ){
124   // chop line into a bunch of parameters.
125   int i;
126   for ( i=0; i < MAX_PARAMS; i++ )
127     params[i] = "";
128   i = 0;
129   size_t len = line.length();
130   if ( line[0] == '"' && line[len-1] == '"' ) {
131     params[0] = string( line, 1, len-2 );
132     return 1;
133   }
134   for ( size_t u_i = 0; u_i < len; u_i++) {
135     if ( line[u_i] == ',' || line[u_i] == ' ' ){
136       if ( params[i] != "" ) // Don't accept zero length strings !
137 	++i;
138       if ( i >= MAX_PARAMS )
139 	break;
140     }
141     else
142       params[i] += line[u_i];
143   } // u_i
144   if ( i >= MAX_PARAMS ){
145     cerr << "too many parameters!" << endl;
146     return MAX_PARAMS+1;
147   }
148   if ( params[i] != "" ){ // last param ended at line end
149     i++;
150   }
151   return i;
152 }
153 
lookup(const string & name)154 int lookup( const string& name ){
155   // search for an experiment with the name 'name' in the list of
156   // known experiments.
157   int i;
158   for ( i=0; i < exp_cnt; i++ ){
159     if ( name == experiments[i]->ExpName() )
160       return i;
161   }
162   return -1;
163 }
164 
parse(const string & Buffer,int & pos,string * pars,int & len)165 ActionType parse( const string& Buffer, int &pos, string *pars, int &len ){
166   // here we parse lines of the script-file:
167   // first we take the first part and see if it is a NEW or FREE
168   // command which need special attention.
169   // otherwise we asume it to be the name of an experiment.
170   string Buf = TiCC::trim( Buffer );
171   len = 0;
172   if ( compare_nocase_n( Buf, "NEW ", 4 ) ){
173     len = fill_params( pars, Buf.substr(4) );
174     if ( ( pos = lookup( pars[0] ) ) != -1 ){
175       cerr << "you can't renew an experiment: " << Buf << endl;
176       return UnknownAct;
177     }
178     return New;
179   }
180   else if ( compare_nocase_n( Buf, "FREE ", 5 ) ){
181     len = fill_params( pars, Buf.substr(5) );
182     if ( (pos = lookup( pars[0] ) ) == -1 ){
183       cerr << "you can't free this unknown experiment: " << Buf << endl;
184       return UnknownAct;
185     }
186     return Free;
187   }
188   else if ( compare_nocase_n( Buf, "QUIT", 4 ) ){
189     return Quit;
190   }
191   else {
192     string expname;
193     string::size_type p = Buf.find( '.' );
194     if ( p == string::npos ){
195       cerr << "missing experiment reference!" << endl;
196       return UnknownAct;
197     }
198     else{
199       expname = Buf.substr(0, p );
200       pos = lookup( expname ); // do we know it.
201       if ( pos == -1 )
202 	return UnknownAct; // error
203       Buf = Buf.substr( p+1 );
204       // A well known experiment, so now we can see what we
205       // must do.
206       if ( compare_nocase_n( Buf, "SET ", 4 ) ){
207 	len = fill_params( pars, Buf.substr(4) );
208 	return Set;
209       }
210       else if ( compare_nocase_n( Buf, "SHOW ", 5 ) ){
211 	len = fill_params( pars, Buf.substr(5) );
212 	return Show;
213       }
214       else  if ( compare_nocase_n( Buf, "GET ", 4 ) ){
215 	len = fill_params( pars, Buf.substr(4) );
216 	return GetTree;
217       }
218       else if ( compare_nocase_n( Buf, "GETW ", 5 ) ){
219 	len = fill_params( pars, Buf.substr(5) );
220 	return GetW;
221       }
222       else if ( compare_nocase_n( Buf, "SAVE ", 5 ) ){
223 	len = fill_params( pars, Buf.substr(5) );
224 	return SaveTree;
225       }
226       else if ( compare_nocase_n( Buf, "SAVEW ", 6 ) ){
227 	len = fill_params( pars, Buf.substr(6) );
228 	return SaveW;
229       }
230       else if ( compare_nocase_n( Buf, "TRAIN ", 6 ) ){
231 	len = fill_params( pars, Buf.substr(6) );
232 	return Train;
233       }
234       else if ( compare_nocase_n( Buf, "EXPAND ", 7 ) ){
235 	len = fill_params( pars, Buf.substr(7) );
236 	return Expand;
237       }
238       else if ( compare_nocase_n( Buf, "REMOVE ", 7 ) ){
239 	len = fill_params( pars, Buf.substr(7) );
240 	return Remove;
241       }
242       else if ( compare_nocase_n( Buf, "TEST ", 5 ) ){
243 	len = fill_params( pars, Buf.substr(5) );
244 	return Test;
245       }
246       else if ( compare_nocase_n( Buf, "CLASSIFY ", 9 ) ){
247 	len = fill_params( pars, Buf.substr(9) );
248 	return Classify;
249       }
250       else if ( compare_nocase_n( Buf, "ADD ", 4 ) ){
251 	len = fill_params( pars, Buf.substr(4) );
252 	return Increment;
253       }
254       else if ( compare_nocase_n( Buf, "REM ", 4 ) ){
255 	len = fill_params( pars, Buf.substr(4) );
256 	return Decrement;
257       }
258       else
259 	return UnknownAct;
260     }
261   }
262 }
263 
one_command(istream & in_file,int & line_count)264 void one_command( istream &in_file, int &line_count ) {
265   // the actual "engine"
266   // get a line from in_file, parse it and take appropiate action
267   // Most of the time by directly calling a MBL Class function.
268   // of course some sanity checking is done here and there
269   static string *params = NULL;
270   int pos = -1, len;
271   if ( params == 0 ){
272     params = new string[MAX_PARAMS+1];
273   }
274   string Buffer;
275   getline( in_file, Buffer );
276   line_count++;
277   if ( Buffer == "" || Buffer[0] == '#' ){
278     return;
279   }
280   cerr << "TSE script, executing line: " << line_count<< endl
281        << "=== " << Buffer << endl;
282   ActionType action = parse( Buffer, pos, params, len );
283   if ( len >= MAX_PARAMS ){
284     cerr << "Too many parameters, skipping....." << endl;
285     return;
286   }
287   switch ( action ){
288   case Quit:
289     exit(1);
290     break;
291   case New: {
292     if ( exp_cnt == MAX_EXP ){
293       cerr << "To many different experiments in one run" << endl;
294       exit(1);
295     }
296     if ( len == 0 ){
297       cerr << " Wrong number of parameters for New" << endl;
298       exit(1);
299     }
300     string cmnd;
301     if ( len == 1 ){
302       cerr << "1 parameters " << params[0] << endl;
303       cmnd = "-a IB1";
304     }
305     else {
306       for ( int i=1; i < len; ++i )
307 	cmnd += params[i] + " ";
308     }
309     experiments[exp_cnt++] = new TimblAPI( cmnd, params[0] );
310     cerr << "Created a new experiment: "
311 	 << experiments[exp_cnt-1]->ExpName() << endl;
312     break;
313   }
314   case Free:
315     delete experiments[pos];
316     exp_cnt--;
317     for ( ; pos < exp_cnt; pos++ ){
318       experiments[pos] = experiments[pos+1];
319     }
320     experiments[exp_cnt] = 0;
321     break;
322   case GetTree:
323     if ( len == 0 )
324       cerr << "missing filename to retrieve InstanceBase" << endl;
325     else
326       experiments[pos]->GetInstanceBase( params[0] );
327     break;
328   case SaveTree:
329     if ( len == 0 ){
330       params[0] = experiments[pos]->ExpName() + ".tree";
331     }
332     else
333       experiments[pos]->WriteInstanceBase(params[0]);
334     break;
335   case GetW:
336     if ( len == 0 ) {
337       params[0] = experiments[pos]->ExpName() + ".weights";
338     }
339     else
340       experiments[pos]->GetWeights(params[0]);
341     break;
342   case SaveW:
343     if ( len == 0 ){
344       params[0] = experiments[pos]->ExpName() + ".weights";
345     }
346     else
347       experiments[pos]->SaveWeights(params[0]);
348     break;
349   case Show:
350     if ( len != 1 )
351       cerr << "missing information about WHAT to show" << endl;
352     else {
353       if ( compare_nocase( params[0], "OPTIONS" ) )
354 	experiments[pos]->ShowOptions( cerr );
355       else if ( compare_nocase( params[0], "SETTING" ) )
356 	experiments[pos]->ShowSettings( cerr );
357       else
358 	cerr << "don't know how to show '" << params[0] << "'" << endl;
359     }
360     break;
361   case Train:
362     if ( len == 1 )
363       experiments[pos]->Learn(params[0]);
364     else
365       cerr << "missing filename for Train" << endl;
366     break;
367   case Expand:
368     if ( len == 1 )
369       experiments[pos]->Expand(params[0]);
370     else
371       cerr << "missing filename for Expand" << endl;
372     break;
373   case Remove:
374     if ( len == 1 )
375       experiments[pos]->Remove(params[0]);
376     else
377       cerr << "missing filename for Remove" << endl;
378     break;
379   case Test: {
380     switch ( len ){
381     case 0:
382       cerr << "missing filename for Test" << endl;
383       return;
384       break;
385     case 1:
386       params[1] = params[0] + ".out";
387       break;
388     case 2:
389       break;
390     default:
391       cerr << "too many parameters for Test, (ignored)" << endl;
392     }
393     experiments[pos]->Test( params[0], params[1] );
394     break;
395   }
396   case Classify:
397     if ( len == 1 ){
398       const TargetValue *tv = experiments[pos]->Classify(params[0]);
399       cout << "classify: " << params[0] << " ==> " << tv << endl;
400     }
401     else
402       cerr << "missing instancestring for Add" << endl;
403     break;
404   case Increment:
405     if ( len == 1 )
406       experiments[pos]->Increment(params[0]);
407     else
408       cerr << "missing instancestring for Add" << endl;
409     break;
410   case Decrement:
411     if ( len == 1 )
412       experiments[pos]->Decrement(params[0]);
413     else
414       cerr << "missing instancestring for Remove" << endl;
415     break;
416   case Set:
417     if ( len != 1 ){
418       for ( int j=1; j < len; j++ )
419 	params[0] += params[j];
420     }
421     if ( !experiments[pos]->SetOptions( params[0] ) )
422       cerr << "problem with Set " << params[0] << endl;
423     break;
424   case UnknownAct:
425     if ( pos < 0 )
426       cerr << "[" << line_count << "]" << Buffer
427 	   << "  ==> Unknown experiment, skipped\n" << endl;
428     else
429       cerr << "[" << line_count << "] " << Buffer
430 	   << "  ==> Unknown action, skipped\n" << endl;
431     break;
432   }
433 }
434 
main(int argc,char * argv[])435 int main(int argc, char *argv[] ){
436   // the following trick makes it possible to parse lines from cin
437   // as well from a user supplied file.
438   istream *script_file;
439   ifstream test_file;
440   if ( argc > 1 ){
441     if ( (test_file.open( argv[1], ios::in ), !test_file.good() ) ){
442       cerr << argv[0] << " - couldn't open scriptfile " << argv[1] << endl;
443       exit(1);
444     }
445     cout << "reading script from: " << argv[1] << endl;
446     script_file = &test_file;
447   }
448   else
449     script_file = &cin;
450   int line = 0;
451   while ( !(*script_file).eof() )
452     one_command( *script_file, line );
453   exit(0);
454 }
455 
456