1 /*
2 Copyright (c) 1998 - 2015
3 ILK - Tilburg University
4 CLiPS - University of Antwerp
5
6 This file is part of timbl
7
8 timbl is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or
11 (at your option) any later version.
12
13 timbl is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, see <http://www.gnu.org/licenses/>.
20
21 For questions and suggestions, see:
22 http://ilk.uvt.nl/software.html
23 or send mail to:
24 timbl@uvt.nl
25 */
26
27 #include <algorithm>
28 #include <iostream>
29 #include <string>
30 #include <fstream>
31
32 #include "timbl/TimblAPI.h"
33
34 using namespace Timbl;
35
36 using std::ifstream;
37 using std::ofstream;
38 using std::ios;
39 using std::cerr;
40 using std::cin;
41 using std::cout;
42 using std::endl;
43 using std::istream;
44 using std::string;
45 using std::equal;
46 using std::getline;
47
48 #define MAX_EXP 10
49 #define MAX_PARAMS 256
50
nocase_cmp(char c1,char c2)51 bool nocase_cmp( char c1, char c2 ){
52 return toupper(c1) == toupper(c2);
53 }
54
compare_nocase_n(const string & s1,const string & s2,size_t n)55 bool compare_nocase_n( const string& s1, const string& s2, size_t n ){
56 if ( equal( s1.begin(), s1.begin()+n, s2.begin(), nocase_cmp ) ){
57 return true;
58 }
59 else {
60 return false;
61 }
62 }
63
64 //
65 // here are the various scripting actions:
66 //
67 enum ActionType { UnknownAct, New, Free,
68 Set, Show, Classify,
69 Train, Test, Increment, Decrement,
70 Expand, Remove,
71 SaveTree, GetTree,
72 SaveW, GetW,
73 Quit };
74 /*
75 The following scripting commands are implemented:
76 QUIT
77 stop all further actions.
78 NEW name <algo>
79 create an experiment with name 'name' of type 'algo'
80 algo can be IB1, IB2, TRIBL or IGTREE. Default: IBL
81 FREE name
82 delete the experiment with name 'name'
83 <name>.SET option value
84 set option of experiment name to value
85 <name>.SHOW OPTIONS
86 show all posible options with their default and current
87 values for experiment 'name'
88 <name>.SHOW SETTINGS
89 show all options with current settings of 'name'
90 <name>.TRAIN file
91 build an instancebase from file.
92 <name>.TEST file1 [file2]
93 classify all lines from file1, write results to file2 or
94 to file1.out if parameter file2 is not present
95 <name>.EXPAND file1
96 increment the database with contents of file1
97 <name>.REMOVE file1
98 decrement the database with contents of file1
99 <name>.CLASSIFY line
100 classify this line
101 <name>.ADD line
102 increment the database with line
103 <name>.REM line
104 decrement the database with line
105 <name>.SAVE file
106 save the instancebase of experiment name to file.
107 <name>.GET file
108 get the instancebase for experiment name from file.
109 <name>.SAVEW file
110 save the current weights of experiment name to file.
111 <name>.GETW file
112 get new weights for experiment name from file.
113 */
114
115 TimblAPI *experiments[MAX_EXP];
116 int exp_cnt = 0;
117
118
119 // the following functions implement a simple parser to parse the
120 // script file, recognize experiment names en actions to perform
121 // on those experiments
122
fill_params(string * params,const string & line)123 int fill_params( string *params, const string& line ){
124 // chop line into a bunch of parameters.
125 int i;
126 for ( i=0; i < MAX_PARAMS; i++ )
127 params[i] = "";
128 i = 0;
129 size_t len = line.length();
130 if ( line[0] == '"' && line[len-1] == '"' ) {
131 params[0] = string( line, 1, len-2 );
132 return 1;
133 }
134 for ( size_t u_i = 0; u_i < len; u_i++) {
135 if ( line[u_i] == ',' || line[u_i] == ' ' ){
136 if ( params[i] != "" ) // Don't accept zero length strings !
137 ++i;
138 if ( i >= MAX_PARAMS )
139 break;
140 }
141 else
142 params[i] += line[u_i];
143 } // u_i
144 if ( i >= MAX_PARAMS ){
145 cerr << "too many parameters!" << endl;
146 return MAX_PARAMS+1;
147 }
148 if ( params[i] != "" ){ // last param ended at line end
149 i++;
150 }
151 return i;
152 }
153
lookup(const string & name)154 int lookup( const string& name ){
155 // search for an experiment with the name 'name' in the list of
156 // known experiments.
157 int i;
158 for ( i=0; i < exp_cnt; i++ ){
159 if ( name == experiments[i]->ExpName() )
160 return i;
161 }
162 return -1;
163 }
164
parse(const string & Buffer,int & pos,string * pars,int & len)165 ActionType parse( const string& Buffer, int &pos, string *pars, int &len ){
166 // here we parse lines of the script-file:
167 // first we take the first part and see if it is a NEW or FREE
168 // command which need special attention.
169 // otherwise we asume it to be the name of an experiment.
170 string Buf = TiCC::trim( Buffer );
171 len = 0;
172 if ( compare_nocase_n( Buf, "NEW ", 4 ) ){
173 len = fill_params( pars, Buf.substr(4) );
174 if ( ( pos = lookup( pars[0] ) ) != -1 ){
175 cerr << "you can't renew an experiment: " << Buf << endl;
176 return UnknownAct;
177 }
178 return New;
179 }
180 else if ( compare_nocase_n( Buf, "FREE ", 5 ) ){
181 len = fill_params( pars, Buf.substr(5) );
182 if ( (pos = lookup( pars[0] ) ) == -1 ){
183 cerr << "you can't free this unknown experiment: " << Buf << endl;
184 return UnknownAct;
185 }
186 return Free;
187 }
188 else if ( compare_nocase_n( Buf, "QUIT", 4 ) ){
189 return Quit;
190 }
191 else {
192 string expname;
193 string::size_type p = Buf.find( '.' );
194 if ( p == string::npos ){
195 cerr << "missing experiment reference!" << endl;
196 return UnknownAct;
197 }
198 else{
199 expname = Buf.substr(0, p );
200 pos = lookup( expname ); // do we know it.
201 if ( pos == -1 )
202 return UnknownAct; // error
203 Buf = Buf.substr( p+1 );
204 // A well known experiment, so now we can see what we
205 // must do.
206 if ( compare_nocase_n( Buf, "SET ", 4 ) ){
207 len = fill_params( pars, Buf.substr(4) );
208 return Set;
209 }
210 else if ( compare_nocase_n( Buf, "SHOW ", 5 ) ){
211 len = fill_params( pars, Buf.substr(5) );
212 return Show;
213 }
214 else if ( compare_nocase_n( Buf, "GET ", 4 ) ){
215 len = fill_params( pars, Buf.substr(4) );
216 return GetTree;
217 }
218 else if ( compare_nocase_n( Buf, "GETW ", 5 ) ){
219 len = fill_params( pars, Buf.substr(5) );
220 return GetW;
221 }
222 else if ( compare_nocase_n( Buf, "SAVE ", 5 ) ){
223 len = fill_params( pars, Buf.substr(5) );
224 return SaveTree;
225 }
226 else if ( compare_nocase_n( Buf, "SAVEW ", 6 ) ){
227 len = fill_params( pars, Buf.substr(6) );
228 return SaveW;
229 }
230 else if ( compare_nocase_n( Buf, "TRAIN ", 6 ) ){
231 len = fill_params( pars, Buf.substr(6) );
232 return Train;
233 }
234 else if ( compare_nocase_n( Buf, "EXPAND ", 7 ) ){
235 len = fill_params( pars, Buf.substr(7) );
236 return Expand;
237 }
238 else if ( compare_nocase_n( Buf, "REMOVE ", 7 ) ){
239 len = fill_params( pars, Buf.substr(7) );
240 return Remove;
241 }
242 else if ( compare_nocase_n( Buf, "TEST ", 5 ) ){
243 len = fill_params( pars, Buf.substr(5) );
244 return Test;
245 }
246 else if ( compare_nocase_n( Buf, "CLASSIFY ", 9 ) ){
247 len = fill_params( pars, Buf.substr(9) );
248 return Classify;
249 }
250 else if ( compare_nocase_n( Buf, "ADD ", 4 ) ){
251 len = fill_params( pars, Buf.substr(4) );
252 return Increment;
253 }
254 else if ( compare_nocase_n( Buf, "REM ", 4 ) ){
255 len = fill_params( pars, Buf.substr(4) );
256 return Decrement;
257 }
258 else
259 return UnknownAct;
260 }
261 }
262 }
263
one_command(istream & in_file,int & line_count)264 void one_command( istream &in_file, int &line_count ) {
265 // the actual "engine"
266 // get a line from in_file, parse it and take appropiate action
267 // Most of the time by directly calling a MBL Class function.
268 // of course some sanity checking is done here and there
269 static string *params = NULL;
270 int pos = -1, len;
271 if ( params == 0 ){
272 params = new string[MAX_PARAMS+1];
273 }
274 string Buffer;
275 getline( in_file, Buffer );
276 line_count++;
277 if ( Buffer == "" || Buffer[0] == '#' ){
278 return;
279 }
280 cerr << "TSE script, executing line: " << line_count<< endl
281 << "=== " << Buffer << endl;
282 ActionType action = parse( Buffer, pos, params, len );
283 if ( len >= MAX_PARAMS ){
284 cerr << "Too many parameters, skipping....." << endl;
285 return;
286 }
287 switch ( action ){
288 case Quit:
289 exit(1);
290 break;
291 case New: {
292 if ( exp_cnt == MAX_EXP ){
293 cerr << "To many different experiments in one run" << endl;
294 exit(1);
295 }
296 if ( len == 0 ){
297 cerr << " Wrong number of parameters for New" << endl;
298 exit(1);
299 }
300 string cmnd;
301 if ( len == 1 ){
302 cerr << "1 parameters " << params[0] << endl;
303 cmnd = "-a IB1";
304 }
305 else {
306 for ( int i=1; i < len; ++i )
307 cmnd += params[i] + " ";
308 }
309 experiments[exp_cnt++] = new TimblAPI( cmnd, params[0] );
310 cerr << "Created a new experiment: "
311 << experiments[exp_cnt-1]->ExpName() << endl;
312 break;
313 }
314 case Free:
315 delete experiments[pos];
316 exp_cnt--;
317 for ( ; pos < exp_cnt; pos++ ){
318 experiments[pos] = experiments[pos+1];
319 }
320 experiments[exp_cnt] = 0;
321 break;
322 case GetTree:
323 if ( len == 0 )
324 cerr << "missing filename to retrieve InstanceBase" << endl;
325 else
326 experiments[pos]->GetInstanceBase( params[0] );
327 break;
328 case SaveTree:
329 if ( len == 0 ){
330 params[0] = experiments[pos]->ExpName() + ".tree";
331 }
332 else
333 experiments[pos]->WriteInstanceBase(params[0]);
334 break;
335 case GetW:
336 if ( len == 0 ) {
337 params[0] = experiments[pos]->ExpName() + ".weights";
338 }
339 else
340 experiments[pos]->GetWeights(params[0]);
341 break;
342 case SaveW:
343 if ( len == 0 ){
344 params[0] = experiments[pos]->ExpName() + ".weights";
345 }
346 else
347 experiments[pos]->SaveWeights(params[0]);
348 break;
349 case Show:
350 if ( len != 1 )
351 cerr << "missing information about WHAT to show" << endl;
352 else {
353 if ( compare_nocase( params[0], "OPTIONS" ) )
354 experiments[pos]->ShowOptions( cerr );
355 else if ( compare_nocase( params[0], "SETTING" ) )
356 experiments[pos]->ShowSettings( cerr );
357 else
358 cerr << "don't know how to show '" << params[0] << "'" << endl;
359 }
360 break;
361 case Train:
362 if ( len == 1 )
363 experiments[pos]->Learn(params[0]);
364 else
365 cerr << "missing filename for Train" << endl;
366 break;
367 case Expand:
368 if ( len == 1 )
369 experiments[pos]->Expand(params[0]);
370 else
371 cerr << "missing filename for Expand" << endl;
372 break;
373 case Remove:
374 if ( len == 1 )
375 experiments[pos]->Remove(params[0]);
376 else
377 cerr << "missing filename for Remove" << endl;
378 break;
379 case Test: {
380 switch ( len ){
381 case 0:
382 cerr << "missing filename for Test" << endl;
383 return;
384 break;
385 case 1:
386 params[1] = params[0] + ".out";
387 break;
388 case 2:
389 break;
390 default:
391 cerr << "too many parameters for Test, (ignored)" << endl;
392 }
393 experiments[pos]->Test( params[0], params[1] );
394 break;
395 }
396 case Classify:
397 if ( len == 1 ){
398 const TargetValue *tv = experiments[pos]->Classify(params[0]);
399 cout << "classify: " << params[0] << " ==> " << tv << endl;
400 }
401 else
402 cerr << "missing instancestring for Add" << endl;
403 break;
404 case Increment:
405 if ( len == 1 )
406 experiments[pos]->Increment(params[0]);
407 else
408 cerr << "missing instancestring for Add" << endl;
409 break;
410 case Decrement:
411 if ( len == 1 )
412 experiments[pos]->Decrement(params[0]);
413 else
414 cerr << "missing instancestring for Remove" << endl;
415 break;
416 case Set:
417 if ( len != 1 ){
418 for ( int j=1; j < len; j++ )
419 params[0] += params[j];
420 }
421 if ( !experiments[pos]->SetOptions( params[0] ) )
422 cerr << "problem with Set " << params[0] << endl;
423 break;
424 case UnknownAct:
425 if ( pos < 0 )
426 cerr << "[" << line_count << "]" << Buffer
427 << " ==> Unknown experiment, skipped\n" << endl;
428 else
429 cerr << "[" << line_count << "] " << Buffer
430 << " ==> Unknown action, skipped\n" << endl;
431 break;
432 }
433 }
434
main(int argc,char * argv[])435 int main(int argc, char *argv[] ){
436 // the following trick makes it possible to parse lines from cin
437 // as well from a user supplied file.
438 istream *script_file;
439 ifstream test_file;
440 if ( argc > 1 ){
441 if ( (test_file.open( argv[1], ios::in ), !test_file.good() ) ){
442 cerr << argv[0] << " - couldn't open scriptfile " << argv[1] << endl;
443 exit(1);
444 }
445 cout << "reading script from: " << argv[1] << endl;
446 script_file = &test_file;
447 }
448 else
449 script_file = &cin;
450 int line = 0;
451 while ( !(*script_file).eof() )
452 one_command( *script_file, line );
453 exit(0);
454 }
455
456