1 // crm_neural_net.c - a neural net classifier
2
3 // Copyright 2009 William S. Yerazunis.
4 // This file is under GPLv3, as described in COPYING.
5
6 /////////////////////////////////////////////////////////////////////
7 // derived from crm_osb_hyperspace.c by Joe Langeway, rehacked by
8 // Bill Yerazunis. Since this code is directly derived from
9 // crm_osb_hyperspace.c and produced for the crm114 project so:
10 //
11 /////////////////////////////////////////////////////////////////////
12 //
13 // Original spec by Bill Yerazunis, original code by Joe Langeway,
14 // recode for CRM114 use by Bill Yerazunis.
15 //
16 // This file of code (crm_neural_net* and subsidiary routines) is
17 // dual-licensed to both William S. Yerazunis and Joe Langeway,
18 // including the right to reuse this code in any way desired,
19 // including the right to relicense it under any other terms as
20 // desired.
21 //
22 //////////////////////////////////////////////////////////////////////
23
24 //////////////////////////////////////////////////////////////////////
25 // This file is part of on going research and should not be considered
26 // a finished product, a reliable tool, an example of good software
27 // engineering, or a reflection of any quality of Joe's besides his
28 // tendancy towards long hours.
29 //////////////////////////////////////////////////////////////////////
30
31 // Here's what's going on:
32 //
33 // It's three-layer Kohonen-flavor feed-forward neural net, trained by
34 // accumulating weight updates, calculated with simple gradiant descent
35 // and back propagation, to apply once for all documents.
36 //
37 // NB: the <bychunk> flag changes this behavior to train the results of each
38 // document as the document is encountered. This is _usually_ a good idea.
39 //
40 // The <fromstart> means "reinitialize the net". Use this if things get
41 // wedged.
42 //
43 // The <append> flag means _do not retrain_. Instead, just log this data
44 // into the file and return ASAP (use this to load up a bunch of data
45 // and then train it all, which is computationally cheaper)
46
47 // include some standard files
48 #include "crm114_sysincludes.h"
49
50 // include any local crm114 configuration file
51 #include "crm114_config.h"
52
53 // include the crm114 data structures file
54 #include "crm114_structs.h"
55
56 // and include the routine declarations file
57 #include "crm114.h"
58
59 // the globals used when we need a big buffer - allocated once, used
60 // wherever needed. These are sized to the same size as the data window.
61 // do not mut these or random binary shall be shat upon thee
62 extern char *outbuf;
63
64 //static int neural_trace = 1;
65 static int neural_trace = 0;
66
67 #define HEADER_SIZE 1024
68
69 #define STOCHASTIC
70 //#define TRAIN_PER_DOCUMENT
71 //#define ACCUMULATE_TRAINING
72
73 // DESCRIPTION OF A NEURAL NETWORK
74 //
75 // retina_size is number of inputs to the first layer; feature IDs need
76 // to be moduloed down to this value.
77 // first_layer_size is the number neurons in the first layer. Every neuron
78 // sums retina_size weighted values from each of the
79 // retina inputs
80 // hidden_layer_size is the number of neurons in the hidden layer. Each of
81 // these neurons sums first_layer_size weighted outputs
82 // of the first layer
83 // ... and of course, in this implementation, the final layer is just TWO
84 // neurons (in-class and out-of-class), summing up
85 // hidden_layer_size weighted outputs.
86
87
88 typedef struct mythical_neural_net_header
89 {
90 int retina_size, first_layer_size, hidden_layer_size;
91
92 } NEURAL_NET_HEAD_STRUCT;
93
94 typedef struct mythical_neural_net
95 {
96 // The W's are the weights
97 float *Win, *Whid, *Wout;
98 // these are the activation intensities
99 float *retina, *first_layer, *hidden_layer, output_layer[2];
100 // these are the learning deltas
101 float *delta_first_layer, *delta_hidden_layer, delta_output_layer[2];
102 unsigned *docs_start, *docs_end;
103 int retina_size, first_layer_size, hidden_layer_size;
104 void *file_origin;
105 } NEURAL_NET_STRUCT;
106
107 // Three little functions to turn bare pointers to the neural
108 // network coefficient arrays into *floats so they can be read
109 // or written. This hack is because C doesn't do multidimensional
110 // arrays with runtime defined dimensions very well.
111 //
112 // Just to remind you- nn is the neural net, "neuron" is the neuron being
113 // activated, and "channel" is the input channel on that neuron.
114 // (note that this means actual storage is in "odometer" format -
115 // the last arg varying fastest).
116 //
arefWin(NEURAL_NET_STRUCT * nn,int neuron,int channel)117 inline static float *arefWin(NEURAL_NET_STRUCT *nn, int neuron, int channel)
118 {
119 // if (neuron < 0 || neuron > nn->first_layer_size)
120 // fprintf (stderr, "bad neuron number %ld in first layer calc\n", neuron);
121 //if (channel < 0 || channel > nn->retina_size)
122 // fprintf (stderr, "bad channel number %ld in first layer calc\n", channel);
123 // fprintf (stderr, "nn = %lx, Win = %lx, neuron = %ld, channel = %ld\n",
124 // nn, nn->Win, neuron, channel);
125 return &(nn->Win [(neuron * nn->retina_size) + channel]);
126 }
127
128 // Same deal, nn is the net, "neuron" is the neuron number, "channel"
129 // is the channel on that neuron.
arefWhid(NEURAL_NET_STRUCT * nn,int neuron,int channel)130 inline static float *arefWhid(NEURAL_NET_STRUCT *nn, int neuron, int channel)
131 {
132 if (neuron < 0 || neuron > nn->hidden_layer_size)
133 fprintf (stderr, "bad neuron number %d in hidden layer calc\n", neuron);
134 if (channel < 0 || channel > nn->first_layer_size)
135 fprintf (stderr, "bad channel number %d in hidden layer calc\n", channel);
136 return & (nn->Whid[( neuron * nn->first_layer_size) + channel]);
137 }
138
139 // Final layer, same deal;, nn is the net, "neuron" is the neuron
140 // number (here, 0 or 1), "channel" is the channel on that neuron.
arefWout(NEURAL_NET_STRUCT * nn,int neuron,int channel)141 inline static float *arefWout(NEURAL_NET_STRUCT *nn, int neuron, int channel)
142 {
143 if (neuron < 0 || neuron > 1)
144 fprintf (stderr, "bad neuron number %d in final layer calc\n", neuron);
145 if (channel < 0 || channel > nn->hidden_layer_size)
146 fprintf (stderr, "bad channel number %d in final layer calc\n", channel);
147 return &(nn->Wout [(neuron * nn->hidden_layer_size) + channel]);
148 }
149
sign(float x)150 inline static float sign (float x)
151 {
152 if (x < 0.0) return -1;
153 return 1;
154 }
155
156 // Stochastic_factor is a "noise term" to keep the net shaken up a bit;
157 // this prevents getting stuck in local minima (one of which can occur
158 // when a weight approaches zero, because then the motion becomes *very*
159 // small (alpha, the motion factor, is usually << 1.0) and it's not
160 // possible for the weight value to "jump over" zero and
161 // go negative. This causes that weight to lock; if enough weights lock,
162 // the net fails to converge.
163 //
164 // There are two parameters used (both are static local vars!). One is
165 // the maximum noise added (recommended to be about the same value as
166 // alpha) and the other is the drop rate, which is how fast the noise
167 // diminishes with increasing learning cycles. This is measured in
168 // terms of the soft cycle limit and epoch number; perhaps it would be
169 // better if the drop was actually a 1/R curve (Boltzmann curve) but
170 // this works well.
171 //
172 // Note: the following also worked pretty well:
173 // ((0.05 * (double)rand() / (double) RAND_MAX) - 0.025)
174
rand0to1()175 inline static double rand0to1()
176 {
177 return ((double)rand() / (double)RAND_MAX);
178 }
179
stochastic_factor(float stoch_noise,int soft_cycle_limit,int epoch_num)180 inline static float stochastic_factor (float stoch_noise,
181 int soft_cycle_limit,
182 int epoch_num)
183 {
184 float v;
185
186 v = ( rand0to1() - 0.5) * stoch_noise;
187 // v = ((((double)rand ()) / ((double) RAND_MAX))) * stoch_noise;
188 // v = v * ( 1 / ( epoch_num + 1));
189 // v = v *
190 // (1 - (epoch_num / soft_cycle_limit));
191 return (v);
192 };
193
194 // Gain noise is noise applied to the training impulse, rather than
195 // gain applied to the weights
gain_noise_factor()196 inline static double gain_noise_factor ()
197 {
198 return ( rand0to1() * NN_DEFAULT_GAIN_NOISE) ;
199 };
200
201
202 // These are used _only_ when a new net needs to be created;
203 // the actual net dimensions used usually come from the file header
204 int retina_size = NN_RETINA_SIZE;
205 int first_layer_size = NN_FIRST_LAYER_SIZE;
206 int hidden_layer_size = NN_HIDDEN_LAYER_SIZE;
207
208 /*
209
210 Another (as yet unimplemented) idea is to stripe the retina in a
211 pattern such that each neuron is "up against" every other neuron at
212 least once, but not as many times as the current "full crossbar"
213 system, which takes a long time to train.
214
215 One way to do this is to label the points onto a fully connected graph
216 nodes, and then to take the graph nodes in pairwise (or larger) groups
217 such as, on edges, faces, or hyperfaces, so that every graph node is
218 taken against every other node and hence every input at one level is
219 "against" every other input. However, we haven't implemented this.
220
221 Another yet unimplemented option is to go with a square "Hopfield"
222 type network instead of the three-layer Kohonen network implemented
223 here. The problem with the Hopfield network is that although the
224 learning method is simpler (simple Hebbian learning works - that is,
225 clamp the known inputs and known outputs, then for each neuron if the
226 inputs match outputs, increase the weights, else decrease the
227 weights.) However, in a Hopfield the learning ability is smaller; for
228 N neurons the number of patterns that can be learned is only N / (2
229 log N), from McLiece (1987). So, with 1000 neurons (1,000,000 coeffs,
230 or a 4 megabyte backing storage file) we could learn only about 70
231 patterns.
232
233 This isn't to say we'll never implement a modified Hopfield net
234 (with a large number of clamped "neurons") but it's not here yet.
235
236 */
237
238
239
make_new_backing_file(char * filename)240 static int make_new_backing_file(char *filename)
241 {
242 int i;
243 FILE *f;
244 NEURAL_NET_HEAD_STRUCT h;
245 float a;
246 f = fopen(filename, "wb");
247 if(!f)
248 {
249 nonfatalerror5("unable to create neural network backing file", filename,
250 CRM_ENGINE_HERE);
251 return -1;
252 }
253
254
255 h.retina_size = retina_size;
256 h.first_layer_size = first_layer_size;
257 h.hidden_layer_size = hidden_layer_size;
258
259 if(sparse_spectrum_file_length)
260 {
261 h.first_layer_size = sqrt (sparse_spectrum_file_length / 1024);
262 if (h.first_layer_size < 4) h.first_layer_size = 4;
263 h.hidden_layer_size = h.first_layer_size ;
264 h.retina_size = h.first_layer_size * 1024 ;
265 if (h.retina_size < 1024) h.retina_size = 1024;
266 }
267 if (neural_trace || user_trace )
268 fprintf (stderr, "Input sparse_spectrum_file_length = %ld. \n"
269 "new neural net dimensions:\n retina width=%d, "
270 "first layer neurons = %d, hidden layer neurons = %d\n",
271 sparse_spectrum_file_length, retina_size,
272 first_layer_size, hidden_layer_size);
273
274 // Write out the header fields
275 dontcare = fwrite(&h, 1, sizeof(NEURAL_NET_HEAD_STRUCT), f);
276
277 // Pad out the header space to header_size
278 for(i = sizeof(NEURAL_NET_HEAD_STRUCT); i < HEADER_SIZE; i++)
279 {
280 fputc('\0', f);
281 };
282
283 // and write random small floating points into the
284 // weighting.
285 i = (retina_size * first_layer_size)
286 + (first_layer_size * hidden_layer_size)
287 + (hidden_layer_size * 2);
288 if (neural_trace)
289 {
290 fprintf (stderr, "Putting out %d coefficients.\n", i);
291 fprintf (stderr, "Initial weight ");
292 i--;
293 a = rand0to1 () * NN_INITIALIZATION_NOISE_MAGNITUDE;
294 fprintf (stderr, "%f ", a);
295 dontcare = fwrite(&a, 1, sizeof(float), f);
296 i--;
297 a = rand0to1 () * NN_INITIALIZATION_NOISE_MAGNITUDE;
298 fprintf (stderr, "%f ", a);
299 dontcare = fwrite(&a, 1, sizeof(float), f);
300 i--;
301 a = rand0to1 () * NN_INITIALIZATION_NOISE_MAGNITUDE;
302 fprintf (stderr, "%f ", a);
303 dontcare = fwrite(&a, 1, sizeof(float), f);
304 fprintf (stderr, "\n");
305 };
306 while(i--)
307 {
308 a = rand0to1 () * NN_INITIALIZATION_NOISE_MAGNITUDE;
309 // fprintf (stderr, "%f ", a);
310 dontcare = fwrite(&a, 1, sizeof(float), f);
311 }
312 fclose(f);
313 return 0;
314 }
315
map_file(NEURAL_NET_STRUCT * nn,char * filename)316 static int map_file(NEURAL_NET_STRUCT *nn, char *filename)
317 {
318 NEURAL_NET_HEAD_STRUCT *h;
319 struct stat statbuf;
320 float *w;
321 nn-> file_origin = MAP_FAILED;
322 if(stat(filename, &statbuf))
323 {
324 // nonfatalerror5("unable to map neural network backing file",
325 // "filename", CRM_ENGINE_HERE);
326 // fprintf (stderr, "static mmap: No such file; nn = %lx\n", (long) nn );
327
328 return -1;
329 }
330 // fprintf (stderr, "File is %s\n", filename);
331 nn->file_origin = crm_mmap_file
332 ( filename,
333 0, statbuf.st_size,
334 PROT_READ | PROT_WRITE,
335 MAP_SHARED,
336 NULL );
337 if(nn->file_origin == MAP_FAILED)
338 {
339 nonfatalerror5("unable to map neural network backing file", "filename",
340 CRM_ENGINE_HERE);
341 return -1;
342 }
343 h = nn->file_origin;
344 nn->retina_size = h->retina_size;
345 nn->first_layer_size = h->first_layer_size;
346 nn->hidden_layer_size = h->hidden_layer_size;
347
348 if (internal_trace)
349 fprintf (stderr, "Neural net dimensions: retina width=%d, "
350 "first layer neurons = %d, hidden layer neurons = %d\n",
351 retina_size, first_layer_size, hidden_layer_size);
352
353
354 // These are the sums of the weighted input activations for these layers
355 nn->retina = (float *) malloc (sizeof(float) * h->retina_size);
356 nn->first_layer = (float *) malloc(sizeof(float) * h->first_layer_size);
357 nn->hidden_layer = (float *) malloc(sizeof(float) * h->hidden_layer_size);
358
359 // These are the deltas used for learning (only).
360 nn->delta_first_layer=(float *) malloc(sizeof(float) * h->first_layer_size);
361 nn->delta_hidden_layer=(float *)malloc(sizeof(float) * h->hidden_layer_size);
362
363 // remember output layer is statically allocated!
364
365 //skip w over the header
366 w = (float *) ( (char *)h + HEADER_SIZE);
367 nn->Win = w;
368 nn->Whid = & w[ h->retina_size * h->first_layer_size];
369
370 nn->Wout = & w[ h->retina_size * h->first_layer_size
371 + h->first_layer_size * h->hidden_layer_size ];
372
373 nn->docs_start = (unsigned *) & w
374 [ h->retina_size * h->first_layer_size
375 + h->first_layer_size * h->hidden_layer_size
376 + h->hidden_layer_size * 2 ];
377
378
379 nn->docs_end = (unsigned *) ((char *)h + statbuf.st_size);
380
381 if (internal_trace)
382 {
383 fprintf (stderr, "Header start at %lx, end at %lx (bytes %lu )\n",
384 (unsigned long) h,
385 (unsigned long) nn->Win ,
386 (unsigned long) nn->Win - (unsigned long) h);
387
388 fprintf (stderr, "Coeffs start at %lx, end at %lx (ints %lu )\n",
389 (unsigned long) nn->Win,
390 (unsigned long) nn->docs_start,
391 ((unsigned long) nn->docs_start - (unsigned long) nn->Win)
392 / sizeof (float));
393
394 fprintf (stderr, "Docs start at %lx, end at %lx (features %lu )\n",
395 (unsigned long) nn->docs_start,
396 (unsigned long) nn->docs_end,
397 ((unsigned long) nn->docs_end - (unsigned long) nn->docs_start)
398 / sizeof (unsigned));
399 };
400 return 0;
401 }
402
unmap_file(NEURAL_NET_STRUCT * nn,char * filename)403 static void unmap_file(NEURAL_NET_STRUCT *nn, char *filename)
404 {
405 free(nn->retina);
406 free(nn->first_layer);
407 free(nn->hidden_layer);
408 free(nn->delta_first_layer);
409 free(nn->delta_hidden_layer);
410 crm_munmap_file((void *) nn->file_origin);
411 #ifndef CRM_WINDOWS
412 // Because mmap/munmap doesn't set atime, nor set the "modified"
413 // flag, some network filesystems will fail to mark the file as
414 // modified and so their cacheing will make a mistake.
415 //
416 // The fix is to do a trivial read/write on the file, to force
417 // the filesystem to repropagate it's caches.
418 //
419 {
420 int hfd; // hashfile fd
421 char foo;
422 hfd = open (filename, O_RDWR);
423 dontcare = read (hfd, &foo, sizeof(foo));
424 lseek (hfd, 0, SEEK_SET);
425 dontcare = write (hfd, &foo, sizeof(foo));
426 close (hfd);
427 }
428 #endif // !CRM_WINDOWS
429 }
430
431 // Do a _forced_ unmap. This is needed if the file will be
432 // written to with fwrite, etc.
force_unmap_file(NEURAL_NET_STRUCT * nn,char * filename)433 static void force_unmap_file(NEURAL_NET_STRUCT *nn, char *filename)
434 {
435 unmap_file (nn, filename);
436 crm_force_munmap_filename(filename);
437 #ifndef CRM_WINDOWS
438 // Because mmap/munmap doesn't set atime, nor set the "modified"
439 // flag, some network filesystems will fail to mark the file as
440 // modified and so their cacheing will make a mistake.
441 //
442 // The fix is to do a trivial read/write on the file, to force
443 // the filesystem to repropagate it's caches.
444 //
445 {
446 int hfd; // hashfile fd
447 char foo;
448 hfd = open (filename, O_RDWR);
449 dontcare = read (hfd, &foo, sizeof(foo));
450 lseek (hfd, 0, SEEK_SET);
451 dontcare = write (hfd, &foo, sizeof(foo));
452 close (hfd);
453 }
454 #endif // !CRM_WINDOWS
455 }
456
457 //cache this if this thing is too slow, we'll need crazy interpolation though
logistic(float a)458 static float logistic(float a)
459 {
460 float y;
461 if(isnan(a))
462 {
463 char *foo;
464 foo = malloc (32);
465 fprintf (stderr, "Logistic of a NAN\n");
466 foo = NULL;
467 strcpy ("hello, world", foo);
468 fatalerror5("Tried to compute the logistic function of a NAN!!", "",
469 CRM_ENGINE_HERE);
470 return 0.5; //perhaps we should escape all the way
471 }
472
473 y = 1.0 / (1.0 + exp(-a));
474 //if (y < 0.05) y = 0.05;
475 //if (y > 0.95) y = 0.95;
476 if(isnan(y))
477 {
478 fatalerror5("Computed a NAN as a RESULT of the logistic function!", "",
479 CRM_ENGINE_HERE);
480 return 0.5;
481 }
482 return y;
483 }
484
485
486 // A totally ad-hoc way to calculate pR. (well, since all pR's
487 // are really ad-hoc underneath, it's perfectly reasonable).
488 //
489 #ifdef NEED_SINGLE_ARG_PR
get_pR(double p)490 static double get_pR(double p)
491 {
492 double a = fabs(p - 0.5);
493 // scale 0..0.1 to 0..1.0
494 if( a < 0.1 )
495 a *= 10.0;
496 // scale 0.1...0.2 to 1.0...10.0
497 else if(a < 0.2)
498 a = 1.0 + 90.0 * (a - 0.1);
499 // scale 0.2...0.3 to 10.0...100.0
500 else if(a < 0.3)
501 a = 10.0 + 900.0 * (a - 0.2);
502 // scale 0.3... to 100.0...and up
503 else
504 a = 100.0 + 1000.0 * (a - 0.3);
505 return p >= 0.5 ? a : -a;
506 }
507 #endif // NEED_SINGLE_ARG_PR
508
get_pR2(double icnr,double ocnr)509 static double get_pR2 (double icnr, double ocnr)
510 {
511 double sum;
512 double diff;
513 double output;
514
515 sum = icnr + ocnr;
516 diff = icnr - ocnr;
517
518 output = 100 * sum * diff;
519 return output;
520 }
521
522
523
524 // Actually evaluate the neural net, given
525 // the "bag" of features.
526
do_net(NEURAL_NET_STRUCT * nn,unsigned * bag,long baglen)527 static void do_net (NEURAL_NET_STRUCT *nn, unsigned *bag, long baglen)
528 {
529 int i;
530 int neuron, channel;
531
532 int pauser;
533
534 pauser = 0;
535 if (baglen == 0) pauser = 1;
536 while (pauser==1) ;
537 pauser = 0;
538
539 // Initialize the activation levels to zeroes everywhere
540
541 // First layer:
542 for(neuron = 0; neuron < nn->first_layer_size; neuron++)
543 nn->first_layer[neuron] = 0.00;
544
545 // Second (hidden) layer:
546 for(neuron = 0; neuron < nn->hidden_layer_size; neuron++)
547 nn->hidden_layer[neuron] = 0.00;
548
549 // Final layer:
550 nn->output_layer[0] = 0.00;
551 nn->output_layer[1] = 0.00;
552
553 // Sum up the activations on the first layer of the net.
554 // Note that this code (amazingly) works fine for both _non_-projected
555 // as well as projected feature bags, by inclusion of the modulo on
556 // the retina size.
557
558 if (neural_trace)
559 {
560 fprintf (stderr, "First six items in the bag are: \n "
561 " %d %d %d %d %d %d\n",
562 bag[0], bag[1], bag[2], bag[3], bag[4], bag[5]);
563 };
564
565 // Empty the retina:
566 for (channel = 0; channel < nn->retina_size; channel++)
567 nn->retina[channel] = 0.00;
568
569 // for each feature in the bag
570 for(i = 0; i < baglen; i++)
571 {
572 channel = bag[i] % nn-> retina_size;
573 // Channel 0 is reserved as "constants", so if the modulo
574 // puts a stimulus onto the 0th retina channel, we actually
575 // push it over one channel, to retina channel 1.
576 if (channel == 0) channel = 1;
577 nn->retina[channel] += 1.0;
578 };
579 // debugging check
580 if (neural_trace)
581 fprintf (stderr, "Running do_net %lx on doc at %lx length %d\n",
582 (long) nn, (long) bag, i);
583
584 // Now we actually calculate the neuron activation values.
585 //
586 // First, the column 0 "always activated" constants, since channel
587 // zero is verboten as an actual channel (we roll channel 0 activations
588 // over onto channel 1)
589 //
590
591 nn->retina[0] = 1.0;
592
593 // Major confusion prevention debugging statement.
594 if ( 0 )
595 for (channel = 0; channel < nn->retina_size; channel++)
596 if (nn->retina[channel] > 0)
597 fprintf (stderr, " %d", channel);
598
599 for ( neuron = 0; neuron < nn->first_layer_size; neuron++)
600 for (channel = 0; channel < nn->retina_size; channel++)
601 // sum the activations on the first layer for this channel
602 nn->first_layer[neuron] +=
603 // avalWin(nn, neuron, channel)
604 nn->Win[(neuron*nn->retina_size) + channel]
605 * nn->retina[channel];
606
607 // Do a nonlinear function (logistic) in-place on the first layer outputs.
608 for(neuron = 0; neuron < nn->first_layer_size; neuron++)
609 {
610 nn->first_layer[neuron] = logistic(nn->first_layer[neuron]);
611 };
612
613 // For each neuron output in the first layer, generate activation
614 // levels for the second (hidden) layer inputs.
615 //
616 // Like the retina, we "sacrifice" the input "neuron zero" to be the
617 // bias-from-zero weight
618
619 for (neuron = 0; neuron < nn->hidden_layer_size; neuron++)
620 nn->hidden_layer[neuron] = *arefWhid (nn, neuron, 0);
621
622 for( channel = 1; channel < nn->first_layer_size; channel++)
623 for(neuron = 0; neuron < nn->hidden_layer_size; neuron++)
624 nn->hidden_layer[neuron] +=
625 *arefWhid(nn, neuron, channel)
626 * nn->first_layer[channel];
627
628
629 // Do our nonlinear function (logistic) here on the second layer outputs.
630 for(neuron = 0; neuron < nn->hidden_layer_size; neuron++)
631 nn->hidden_layer[neuron] = logistic(nn->hidden_layer[neuron]);
632
633 // Generate the values in the final output layer (both neurons)
634 //
635 // Again, we sacrifice neuron 0 of the hidden layer to be the
636 // bias weights.
637
638 nn->output_layer[0] = *arefWout (nn, 0, 0);
639 nn->output_layer[0] = *arefWout (nn, 1, 0);
640
641 for(channel = 1; channel < nn->hidden_layer_size; channel++)
642 {
643 nn->output_layer[0] += *arefWout(nn, 0, channel)
644 * nn->hidden_layer[channel];
645 nn->output_layer[1] += *arefWout(nn, 1, channel)
646 * nn->hidden_layer[channel];
647 };
648
649 nn->output_layer[0] = logistic( nn->output_layer[0] );
650 nn->output_layer[1] = logistic( nn->output_layer[1] );
651 if (internal_trace || neural_trace)
652 fprintf (stderr, "Network final outputs: %lf vs %lf\n",
653 nn->output_layer[0], nn->output_layer[1]);
654
655 }
656
657
compare_features(const void * a,const void * b)658 static int compare_features(const void *a, const void *b)
659 {
660 if(*(unsigned int *)a < *(unsigned int *)b)
661 return -1;
662 if(*(unsigned int *)a > *(unsigned int *)b)
663 return 1;
664 return 0;
665 }
666
667
668 // Convert a text into a bag of features, obeying things
669 // like the tokenizer regex and the UNIQUE flag. The result
670 // is a bag of sorted features. This does _not_ do projection
671 // onto the retina, however (see "project_features" for that).
672 // Note- in later versions of this code, we no longer project
673 // separately. Instead, we modulo at runtime onto the retina.
674 //
eat_document(char * text,long text_len,long * ate,regex_t * regee,unsigned * feature_space,long max_features,long long flags,unsigned * sum)675 static long eat_document
676 ( char *text, long text_len, long *ate,
677 regex_t *regee,
678 unsigned *feature_space, long max_features,
679 long long flags, unsigned *sum)
680 {
681 long n_features = 0;
682 long i, j;
683 long long unigram, unique, string;
684 long next_offset;
685
686 unique = apb->sflags & CRM_UNIQUE;
687 unigram = apb->sflags & CRM_UNIGRAM;
688 string = apb->sflags & CRM_STRING;
689
690 *sum = 0;
691 *ate = 0;
692
693 // Use the flagged vector tokenizer
694 //
695 // GROT GROT GROT note that we _disregard_ the passed-in "regee" (the
696 // previously precompiled regex) and use the automagical code in
697 // vector_tokenize_selector to get the "right" parse regex from the
698 // user program's APB.
699 crm_vector_tokenize_selector
700 (apb, // the APB
701 text, // input string buffer
702 0, // start offset
703 text_len, // length
704 NULL, // parser regex
705 0, // parser regex len
706 NULL, // tokenizer coeff array
707 0, // tokenizer pipeline len
708 0, // tokenizer pipeline iterations
709 feature_space, // where to put the hashed results
710 max_features - 1, // max number of hashes
711 &n_features, // how many hashes we actually got
712 &next_offset); // where to start again for more hashes
713
714 // GROT GROT GROT
715 // maybe we should force a constant into one column of input
716 // so we can change the threshold of level activation?
717
718 // Sort the features for speed in matching later - NO NEED ANY MORE
719 // qsort(feature_space, n_features, sizeof(unsigned int), compare_features);
720
721 // anything that hashed to 0 or 1 needs to be promoted to 2 or 3, because
722 // 0 and 1 are our "in-class" and "out-of-class" sentinels. Also
723 // calculate the checksum for this document.
724
725 for (i = 0; i < n_features; i++)
726 {
727 if (feature_space[i] == 0)
728 feature_space[i] = 2;
729 if (feature_space[i] == 1)
730 feature_space[i] = 3;
731 }
732
733 // Do a uniquifying pass.
734 if(unique)
735 {
736 // Sort the features for uniquifying
737 qsort(feature_space, n_features, sizeof(unsigned int), compare_features);
738 // do "two finger" uniquifying on the sorted result.
739 i = 0;
740 for(j = 1; j < n_features; j++)
741 if ( feature_space[j] != feature_space [i] )
742 feature_space[++i] = feature_space[j];
743 if (n_features > 0)
744 n_features = i + 1;
745 };
746
747 // and do the checksum update
748 *sum = 0;
749 for (i = 0; i < n_features; i++)
750 *sum += feature_space[i];
751
752 // All done.
753 return n_features;
754 }
755
756 // Nuke the net - basically fill it with small uniformly distributed
757 // random weighting coefficients. This gets the net out of a "stuck
758 // on center" condition. A flag of 0 means "from zero", a flag of 1
759 // means "in addition to current state".
760 //
nuke(NEURAL_NET_STRUCT * nn,long flag)761 static void nuke(NEURAL_NET_STRUCT *nn, long flag)
762 {
763 long j;
764 float *f;
765 float dsn, dsno;
766
767 dsn = NN_DEFAULT_STOCH_NOISE ;
768 dsno = - dsn / 2.0;
769
770 if (neural_trace)
771 fprintf (stderr, "********Nuking the net to random values\n");
772
773 f = nn->Win;
774 for (j = 0; j < nn->retina_size * nn->first_layer_size; j++)
775 f[j] = flag*f[j] + dsno + dsn * rand0to1();
776
777 f = nn->Whid;
778 for (j = 0; j < nn->first_layer_size * nn-> hidden_layer_size; j++)
779 f[j] = flag*f[j] + dsno + dsn * rand0to1();
780
781 f = nn->Wout;
782 for (j = 0; j < nn-> hidden_layer_size * 2; j++)
783 f[j] = flag*f[j] + dsno + dsn * rand0to1();
784
785 }
786
787
788 // Entry point for learning
789 // Basically, we use gradient descent.
790 //
crm_neural_net_learn(CSL_CELL * csl,ARGPARSE_BLOCK * apb,char * txtptr,long txtstart,long txtlen)791 int crm_neural_net_learn
792 ( CSL_CELL *csl, ARGPARSE_BLOCK *apb,
793 char *txtptr, long txtstart, long txtlen )
794 {
795 NEURAL_NET_STRUCT my_nn, *nn = &my_nn;
796 char filename[MAX_PATTERN];
797 char htext[MAX_PATTERN];
798 long htext_len;
799 struct stat statbuf;
800
801 regex_t regee;
802
803 unsigned bag[NN_MAX_FEATURES];
804
805 unsigned sum;
806
807 long i, j, n_features;
808 long old_file_size;
809 unsigned *new_doc_start, *current_doc, *k, *l;
810 unsigned out_of_class;
811 long found_duplicate, n_docs, n_docs_trained, current_doc_len;
812 long n_cycles, filesize_on_disk, soft_cycle_limit;
813 unsigned n_no_train;
814
815 FILE *f;
816
817 float *dWin, *dWhid, *dWout;
818
819 float stoch_noise, internal_training_threshold;
820 double alpha = NN_DEFAULT_ALPHA;
821 double alphalocal;
822
823 long this_doc_was_wrong;
824 double errmag, epoch_errmag;
825
826 // in_class_docs and out_class_docs are lookaside lists so
827 // we can alternate training of in-class and out-of-class
828 // examples, so huge batches are not a problem.
829 // GROT GROT GROT this goes in later... coded in .crm,
830 // it works really well, but not coded into the C yet.
831 //
832 // long in_class_docs[MAX_PATTERN], out_class_docs[MAX_PATTERN];
833 // long icd, icd_max, ocd, ocd_max;
834
835 long neuron, channel;
836
837 if(internal_trace)
838 neural_trace = 1;
839
840 if(neural_trace)
841 fprintf(stderr, "entered crm_neural_net_learn\n");
842
843 // Get the filename
844 crm_get_pgm_arg (htext, MAX_PATTERN, apb->p1start, apb->p1len);
845 htext_len = apb->p1len;
846 htext_len = crm_nexpandvar (htext, htext_len, MAX_PATTERN);
847
848 i = 0;
849 while (htext[i] < 0x021) i++;
850 j = i;
851 while (htext[j] >= 0x021) j++;
852 htext[j] = '\000';
853 strcpy (filename, &htext[i]);
854
855 // Set a nominal epoch cycle limiter
856 //
857 soft_cycle_limit = NN_MAX_TRAINING_CYCLES;
858 if(apb->sflags & CRM_FROMSTART) // FROMSTART reinits the coeffs.
859 {
860 soft_cycle_limit = NN_MAX_TRAINING_CYCLES_FROMSTART;
861 }
862 else
863 {
864 soft_cycle_limit = NN_MAX_TRAINING_CYCLES;
865 };
866
867 // Get alpha, the stochastic noise level, and the cycle limit from
868 // the s2 parameter
869 //
870 {
871 char s2_buf[MAX_PATTERN];
872 long s2_len;
873
874 alpha = NN_DEFAULT_ALPHA;
875 stoch_noise = NN_DEFAULT_STOCH_NOISE;
876 internal_training_threshold = NN_INTERNAL_TRAINING_THRESHOLD;
877
878 crm_get_pgm_arg (s2_buf, MAX_PATTERN, apb->s2start, apb->s2len);
879 s2_len = apb->s2len;
880 s2_len = crm_nexpandvar (s2_buf, s2_len, MAX_PATTERN);
881 if (s2_len > 0)
882 sscanf (s2_buf, "%lf %f %ld %f",
883 &alpha,
884 &stoch_noise,
885 &soft_cycle_limit,
886 &internal_training_threshold);
887 if (alpha < 0.0 ) alpha = NN_DEFAULT_ALPHA;
888 if (alpha < 0.000001 ) alpha = 0.000001;
889 if (alpha > 1.0) alpha = 1.0;
890 if (stoch_noise > 1.0) stoch_noise = 1.0;
891 if (stoch_noise < 0) stoch_noise = NN_DEFAULT_STOCH_NOISE;
892 if (soft_cycle_limit < 0)
893 soft_cycle_limit = NN_MAX_TRAINING_CYCLES_FROMSTART;
894
895 // fprintf (stderr, "**Alpha = %lf, noise = %f cycle limit = %ld\n",
896 // alpha, stoch_noise, soft_cycle_limit);
897 }
898
899 // Convert the text form of the document into a bag of features
900 // according to the parsing regex, and compute the checksum "sum".
901 //
902 n_features = eat_document ( txtptr + txtstart, txtlen, &i,
903 ®ee, bag, NN_MAX_FEATURES, apb->sflags, &sum);
904
905 if (user_trace)
906 fprintf (stderr, "***total input features = %ld (1st 3:) %d %d %d \n\n",
907 n_features, bag[0], bag[1], bag[2]);
908
909 // Put the document's features into the file by appending to the
910 // end of the file. Note that we have a header - a 0 for normal
911 // (in-class) learning, and a 1 for "not in class" learning, then
912 // then a document checksum for fast finding of duplicate documents,
913 // then a reserved number for how many times this hasn't needed training
914 // (a speedup that is currently unused)
915 //
916
917 found_duplicate = 0;
918
919 i = stat (filename, &statbuf);
920
921 if (i != 0)
922 {
923 // Nope, file does not exist. Make a new one.
924 if (user_trace)
925 fprintf (stderr, "Making a new backing file: '%s'\n", filename);
926 make_new_backing_file(filename);
927 stat(filename, &statbuf);
928 if (neural_trace)
929 fprintf (stderr, "Initial file size (just coeffs): %ld\n",
930 statbuf.st_size);
931 }
932
933 // The file now ought to exist. Map it.
934 i = map_file (nn, filename);
935 if ( i < 0)
936 {
937 fatalerror5 ("Could not create the neural net backing file ",
938 filename, CRM_ENGINE_HERE);
939 };
940 // Scan the file for documents with the matching checksum
941 // so we can selectively forget / refute documents.
942 // (GROT GROT GROT - I hate this method of refutation- it
943 // depends on absolutely identical documents. On the other
944 // hand, it's how a NN can be told of negative examples, which
945 // back-propagation needs. - wsy. ).
946 //
947 // This is admittedly ugly to map the file, scan it, unmap it,
948 // write append to it, and map it again. Bleah.... Yes, I know.
949
950 found_duplicate = 0;
951 k = nn->docs_start;
952 for(k = nn->docs_start;
953 k < (nn->docs_end - 100)
954 && (k[0] == 0 || k[0] == 1) // Find a start-of-doc sentinel
955 && k[2] != sum; // does the checksum match?
956 k++);
957
958 if (k[2] == sum)
959 found_duplicate = 1;
960 // Did we find a matching sum
961 if(found_duplicate)
962 {
963 k[1] = 0;
964 if (apb->sflags & CRM_REFUTE)
965 {
966 if (neural_trace)
967 fprintf (stderr, "Marking this doc as out-of-class.\n");
968 k[1] = 1;
969 };
970 if (apb->sflags & CRM_ABSENT)
971 {
972 // erasing something out of the
973 // input set. Cut it out of the
974 // knowledge base.
975 unsigned *dest;
976 long len;
977 if (neural_trace)
978 fprintf (stderr, "Obliterating data...");
979
980 dest = k;
981 // Find the next zero sentinel
982 for(k += 3; *k; k++);
983 // and copy from here to end of file over our
984 // incorrectly attributed data, overwriting it.
985 len = (unsigned long) k - (unsigned long) dest;
986 if (neural_trace)
987 fprintf (stderr, "start %lx length %ld\n",
988 (unsigned long) dest, len);
989 for (; k< nn->docs_end; k++)
990 {
991 *dest = *k;
992 dest++;
993 };
994 //
995 // now unmap the file, msync it, unmap it, and truncate
996 // it. Then remap it back in; viola- the bad data's gone.
997 force_unmap_file (nn, filename);
998 dontcare = truncate (filename, statbuf.st_size - len);
999 map_file (nn, filename);
1000 if (neural_trace)
1001 fprintf (stderr, "Dealt with a duplicate at %lx\n",
1002 (unsigned long) k);
1003 }
1004 else
1005 {
1006 if(neural_trace)
1007 fprintf(stderr, "***Learning same file twice. W.T.F. \n");
1008 };
1009 };
1010 retina_size = nn->retina_size;
1011
1012 // then this data file has never been seen before so append it and remap
1013 //
1014 if(! found_duplicate)
1015 { // adding to the old file
1016 if (neural_trace) fprintf (stderr, "Appending new data.\n");
1017 old_file_size = statbuf.st_size;
1018 if(neural_trace)
1019 fprintf(stderr, "NN: new file data will start at offset %ld\n",
1020 old_file_size);
1021
1022 // make sure that there's something to add.
1023 if(n_features < 0.5 )
1024 {
1025 if(neural_trace)
1026 fprintf(stderr,
1027 "NN: Can't add a null example to the net.\n");
1028 }
1029 else
1030 {
1031
1032 // unmap the file so we can write-append to it.
1033 force_unmap_file(nn, filename);
1034 f = fopen(filename, "ab+");
1035 out_of_class = ((apb->sflags & CRM_REFUTE) != 0);
1036 if(internal_trace || neural_trace)
1037 {
1038 fprintf (stderr, "Sense writing %u\n", out_of_class);
1039 if(out_of_class)
1040 fprintf(stderr, "learning out of class\n");
1041 else
1042 fprintf(stderr, "learning in class\n");
1043 }
1044
1045 // Offset 0 is the sentinel - always either 0 or 1
1046 // Offset 0 - Write 0 or 1 (in-class or out-class)
1047 dontcare = fwrite(&out_of_class, 1, sizeof(unsigned), f);
1048
1049 // Offset 1 - Write the number of passes without a
1050 // retrain on this one
1051 n_no_train = 0; //number of times we went without training this guy
1052 dontcare = fwrite(&n_no_train, 1, sizeof(unsigned), f);
1053
1054 // Offset 2 - Write the sum of the document feature hashes.
1055 dontcare = fwrite(&sum, 1, sizeof(unsigned), f); //hash of whole doc
1056
1057 // ALERT ALERT ALERT
1058 // CHANGED to save the bag, _not_ the projection!
1059 // This means we must project during training, however
1060 // it also means we don't bust the CPU cache nearly as
1061 // badly!
1062 dontcare = fwrite (bag, n_features, sizeof(unsigned), f);
1063 if (neural_trace)
1064 {
1065 fprintf (stderr,
1066 "Appending 3 marker unsigneds and %ld unsigneds of features\n",
1067 n_features);
1068 fprintf (stderr, "First three features are %d %d %d\n",
1069 bag[0], bag[1], bag[2]);
1070 }
1071 // Close the file and we've now updated the disk image.
1072 fclose(f);
1073 }
1074 stat(filename, &statbuf);
1075 filesize_on_disk = statbuf.st_size;
1076 if(neural_trace)
1077 fprintf(stderr, "NN: statted filesize is now %ld.\n", statbuf.st_size);
1078
1079 // MMap the file back into memory and fix up the nn->blah structs
1080 if(map_file(nn, filename))
1081 {
1082 //nonfatalerror("Couldn't mmap file!", filename);
1083 return -1;
1084 }
1085
1086 if (neural_trace)
1087 fprintf (stderr, "Neural network mapped at %lx\n", (long) nn);
1088
1089 new_doc_start =
1090 (unsigned *) ((char *)(nn->file_origin) + old_file_size);
1091
1092 // Print out a telltale to see if we're in the right place.
1093 if (neural_trace)
1094 {
1095 fprintf (stderr, "\nAfter mmap, offsets of weight arrays"
1096 " are:\n Win: %lx, Whid: %lx, Wout: %lx\n",
1097 (long)nn->Win, (long)nn->Whid, (long)nn->Wout);
1098 fprintf (stderr, "First weights (in/hid/out): %lf, %lf, %lf\n",
1099 nn->Win[0], nn->Whid[0], nn->Wout[0]);
1100 fprintf (stderr, "Weight ptrs from map start : "
1101 "Win: %lx, Whid: %lx, Wout: %lx\n",
1102 (long) ((void *)&nn->Win[0]),
1103 (long) ((void *)&nn->Whid[0]),
1104 (long) ((void *)&nn->Wout[0]));
1105 fprintf (stderr, "Weight ptrs (arefWxxx funcs: "
1106 "Win: %lx, Whid: %lx, Wout: %lx\n",
1107 (long) ((void *)arefWin(nn, 0, 0)),
1108 (long) ((void *)arefWhid(nn, 0, 0)),
1109 (long) ((void *)arefWout(nn, 0, 0)));
1110 };
1111
1112 }
1113 else
1114 new_doc_start = k;
1115
1116 // If we're in APPEND mode, we don't train yet. So we're
1117 // complete at this point and can return to the caller.
1118 //
1119 if(apb->sflags & CRM_APPEND)
1120 {
1121 if (neural_trace || internal_trace)
1122 fprintf (stderr,
1123 "Append mode- exiting neural_learn without recalc\n");
1124 unmap_file (nn, filename);
1125 return -1;
1126 }
1127
1128 // Are we going from the absolute start?
1129 if (apb->sflags & CRM_FROMSTART)
1130 nuke(nn, 0);
1131
1132
1133 // Tally the number of example documents in this file
1134 n_docs = 0;
1135 for(k = nn->docs_start; k < nn->docs_end; k++)
1136 {
1137 if (*k < 2)
1138 n_docs++;
1139 };
1140
1141 if (user_trace)
1142 fprintf (stderr, "Total documents in file: %lu\n", n_docs);
1143
1144 n_cycles = 0;
1145
1146 n_docs_trained = 1;
1147
1148
1149 // Malloc the internal allocations for back propagation. These
1150 // are the deltas used in the "blame game" of back-propagation itself.
1151 //
1152 dWin = malloc(sizeof(float) * nn->retina_size * nn->first_layer_size);
1153 dWhid = malloc(sizeof(float) * nn->first_layer_size * nn->hidden_layer_size);
1154 dWout = malloc(sizeof(float) * nn->hidden_layer_size * 2);
1155
1156 // Check - did we malloc successfully?
1157 if(!dWin || !dWhid || !dWout)
1158 {
1159 if(dWin)
1160 free(dWin);
1161 if(dWhid)
1162 free(dWhid);
1163 if(dWout)
1164 free(dWout);
1165 unmap_file(nn, filename);
1166 fatalerror5("unable to malloc!", "Neural network 3", CRM_ENGINE_HERE);
1167 return -1;
1168 }
1169
1170 alphalocal = alpha;
1171 // Now the learning loop itself
1172 n_docs_trained = 1;
1173 while(n_cycles < soft_cycle_limit && n_docs_trained > 0)
1174 {
1175 float eic0, eic1, eoc0, eoc1;
1176 if (neural_trace)
1177 fprintf (stderr, "\n\n**** Start of epoch %ld on net at %lx\n",
1178 n_cycles, (long)nn);
1179 if (user_trace)
1180 fprintf (stderr, "\nE %4f%7ld ",
1181 alphalocal,
1182 n_cycles);
1183 n_docs_trained = 0;
1184 epoch_errmag = 0.0;
1185 errmag = 0.0;
1186
1187
1188 // Zero out any accumulated delta errors, in the reverse
1189 // (back-propagation) order, so we can template this code
1190 // again for the actual backprop.
1191 //
1192 // mid-to-out layer deltas first
1193 for( channel = 0; channel < nn->hidden_layer_size; channel++)
1194 for (neuron = 0; neuron < 2; neuron++)
1195 dWout[channel * 2 + neuron] = 0.0;
1196
1197 // in-to-mid deltas next
1198 for( channel = 0; channel < nn->first_layer_size; channel++)
1199 for(neuron = 0; neuron < nn->hidden_layer_size; neuron++)
1200 dWhid[channel * nn->hidden_layer_size + neuron] = 0.0;
1201
1202 // Retina-to-in layer deltas last.
1203 for(channel = 0; channel < nn->retina_size; channel++)
1204 for(neuron = 0; neuron < nn->first_layer_size; neuron++)
1205 dWin[channel * nn->first_layer_size + neuron] = 0.0;
1206
1207 // Add stochastic noise to the network... this happens
1208 // only once per epoch no matter what the protocol...
1209 //
1210 if (stoch_noise > 0.0)
1211 {
1212 // First, noodge the output layer's input-side weights:
1213 for (neuron = 0; neuron < 2; neuron++)
1214 for(channel = 0; channel < nn->hidden_layer_size; channel++)
1215 {
1216 *arefWout(nn, neuron, channel) +=
1217 stochastic_factor (stoch_noise, soft_cycle_limit, n_cycles);
1218 };
1219
1220 // Then for the hidden layer's input weights
1221 for (neuron = 0; neuron < nn->hidden_layer_size; neuron++)
1222 for(channel = 0; channel < nn->first_layer_size; channel++)
1223 {
1224 *arefWhid(nn, neuron, channel) +=
1225 stochastic_factor (stoch_noise, soft_cycle_limit, n_cycles);
1226 };
1227
1228 // Finally, the input layer's input weights
1229 for (neuron = 0; neuron < nn->first_layer_size; neuron++)
1230 for(channel = 0; channel < nn->retina_size; channel++)
1231 {
1232 *arefWin(nn, neuron, channel) +=
1233 stochastic_factor (stoch_noise, soft_cycle_limit, n_cycles);
1234 };
1235 };
1236
1237 // "Track" the weights closer to zero. This keeps unused
1238 // weights from going haywire. Note that the static magnitude
1239 // of this is equal to the average stochastic noise.
1240 //
1241 if (stoch_noise > 0.0)
1242 {
1243 // First, noodge the output layer's input-side weights:
1244 for (neuron = 0; neuron < 2; neuron++)
1245 for(channel = 0; channel < nn->hidden_layer_size; channel++)
1246 {
1247 *arefWout(nn, neuron, channel) *= NN_ZERO_TRACKING ;
1248 };
1249
1250 // Then for the hidden layer's input weights
1251 for (neuron = 0; neuron < nn->hidden_layer_size; neuron++)
1252 for(channel = 0; channel < nn->first_layer_size; channel++)
1253 {
1254 *arefWhid(nn, neuron, channel) *= NN_ZERO_TRACKING ;
1255 };
1256
1257 // Finally, the input layer's input weights
1258 for (neuron = 0; neuron < nn->first_layer_size; neuron++)
1259 for(channel = 0; channel < nn->retina_size; channel++)
1260 {
1261 *arefWin(nn, neuron, channel) *= NN_ZERO_TRACKING ;
1262 };
1263 };
1264
1265 // Loop around and train each document.
1266 for(k = nn->docs_start; k < nn->docs_end;) // the per-document loop
1267 {
1268 // Start on the 0/1 (in-class/out-of-class) sentinel
1269 out_of_class = k[0];
1270 if (out_of_class != 0 && out_of_class != 1)
1271 fatalerror5 ( "SYNCH ERROR in training data file: ",
1272 "filename",
1273 CRM_ENGINE_HERE);
1274
1275 if (neural_trace)
1276 {
1277 fprintf (stderr, " Doc %lx out_of_class %u\n",
1278 (unsigned long int) k,
1279 out_of_class);
1280 fprintf (stderr,
1281 "First weights (in/hid/out): %lf, %lf, %lf\n",
1282 nn->Win[0], nn->Whid[0], nn->Wout[0]);
1283 };
1284
1285 k++;
1286 // save a pointer to the "OK this many times" counter
1287 l = k;
1288
1289 k++;
1290 // Now we point to the document checksum.
1291 // Skip over the doc checksum
1292 k++;
1293
1294 // Now k is the first feature in the current document.
1295 // Find end of the current doc
1296 current_doc = k;
1297 current_doc_len = 0;
1298 while (k < nn->docs_end && *k > 1)
1299 {
1300 k++;
1301 current_doc_len++;
1302 };
1303 // k now points to the next document
1304 if (neural_trace)
1305 fprintf (stderr,
1306 "Current doc %lx class %ld len %lx next doc start %lx next sense %lx\n",
1307 (unsigned long) current_doc,
1308 (unsigned long) out_of_class,
1309 (unsigned long) current_doc_len,
1310 (unsigned long) k,
1311 (unsigned long) *k);
1312
1313 // Run the net on the document
1314 do_net(nn, current_doc, current_doc_len);
1315 if( neural_trace )
1316 fprintf(stderr, "-- doc @ %lx got network outputs of %f v. %f\n",
1317 (unsigned long)current_doc,
1318 nn->output_layer[0], nn->output_layer[1]);
1319
1320 // Keep track of the total error of this epoch.
1321 errmag = (! (out_of_class)) ?
1322 nn->output_layer[1] + (1 - nn->output_layer[0])
1323 :
1324 nn->output_layer[0] + (1 - nn->output_layer[1]);
1325 epoch_errmag += errmag;
1326
1327 // Now, the test- output channel 0 is "in class", channel
1328 // 1 is "not in class". We see if the output of the
1329 // net is acceptable here, and if not, backprop our errors.
1330 //
1331 // Note that the SRI document says 'Always backprop every
1332 // training sample on every training epoch." Maybe that's a
1333 // good idea, maybe not. Maybe it causes overfitting.
1334 // It's a good question either way.
1335 //
1336 // These error terms are all cast such that a value less than
1337 // zero is a "retrain" condition.
1338 eic0 = nn->output_layer[0] - (1.0 - internal_training_threshold);
1339 eoc0 = (internal_training_threshold) - nn->output_layer[0];
1340 eic1 = (internal_training_threshold) - nn->output_layer[1];
1341 eoc1 = nn->output_layer[1] - (1.0 - internal_training_threshold);
1342
1343 // Do the error bounds comparison
1344 this_doc_was_wrong = 0;
1345 if(
1346 ( ! out_of_class &&
1347 (( eic0 < 0 ) || ( eic1 < 0 ))
1348 )
1349 ||
1350 ( out_of_class &&
1351 (( eoc0 < 0 ) || (eoc1 < 0 ))
1352 )
1353 )
1354 {
1355 n_docs_trained ++;
1356 this_doc_was_wrong = 1;
1357 *l = 0; // reset the "OK this many times" pointer.
1358 };
1359
1360 if (user_trace)
1361 {
1362 if (this_doc_was_wrong)
1363 {
1364 if ( ! out_of_class )
1365 {
1366 if (nn->output_layer[0] > nn->output_layer[1])
1367 fprintf (stderr, "+");
1368 else
1369 fprintf (stderr, "#");
1370 }
1371 else
1372 {
1373 if (nn->output_layer[0] < nn->output_layer[1])
1374 fprintf (stderr, "x");
1375 else
1376 fprintf (stderr, "X");
1377 };
1378 }
1379 else
1380 {
1381 fprintf (stderr, " ");
1382 };
1383 };
1384 if (internal_trace)
1385 fprintf (stderr, " %6lf ", errmag);
1386
1387 // Do we want to train everything, even if it's right?
1388 //#define TRAIN_ALWAYS 1
1389 #define TRAIN_ALWAYS 0
1390
1391
1392 if (this_doc_was_wrong
1393 || TRAIN_ALWAYS
1394 || (!(apb->sflags & CRM_BYCHUNK) ))
1395 {
1396 // this block is the "actually train this document" section
1397 // If we got here, we're training, either because tne neural
1398 // net got it wrong, or because we're in TRAIN_ALWAYS.
1399 // So, it's time to run the ugly backprop.
1400 //
1401 if(neural_trace) fprintf(stderr, " Need backprop on doc at %lx\n",
1402 (long) current_doc );
1403
1404 // Start setting the errors. Work backwards, from the
1405 // known desired states.
1406 //
1407 // Now, for a bit of tricky math. One could train
1408 // with heuristics (a la the Winnow algorithm) or one could
1409 // train with a gradient descent algorithm.
1410 //
1411 // Luckily, the gradient descent is actually quite simple;
1412 // it turns out the partial derivative of the logistic
1413 // function dL(x)/dx is just (1-L(x)) * L(x) . Plotting this
1414 // on a spreadsheet is quite illuminating - it ranges from
1415 // nearly zero near infinity to 0.25 at zero.
1416 //
1417 // See http://www.speech.sri.com/people/anand/771/html/node37.html
1418 // for details of this math. SRI: here means "from the SRI paper"
1419 //
1420 // SRI: del_j = -(target - output)*(1 - output)*(output)
1421 //
1422 #define GRADIENT_DESCENT
1423 #ifdef GRADIENT_DESCENT
1424 if (neural_trace)
1425 fprintf (stderr, "Generating error deltas for output layer\n");
1426
1427 // 1.0 and 0.0 are the desired final layer outputs for "in class".
1428 nn->delta_output_layer[0] = 1.0;
1429 nn->delta_output_layer[1] = 0.0;
1430 // out-of-class gets them reversed
1431 if (out_of_class)
1432 {
1433 nn->delta_output_layer[0] = 0.0;
1434 nn->delta_output_layer[1] = 1.0;
1435 };
1436
1437 if (neural_trace)
1438 fprintf (stderr, "Output layer desired values: %lf, %lf\n",
1439 nn->delta_output_layer[0],
1440 nn->delta_output_layer[1]);
1441
1442
1443 // Now generate the values "in place" from the desired values
1444 // MAYBE TYPO in the SRI document- it says this is
1445 // SRI: -del_k = (t_j - o_j) (1 - o_j) o_j) and it is unclear if
1446 // the minus sign should _not_ be there.
1447 //
1448 // However, a spreadsheet calculation of the actual values for
1449 // the derivative of the logistic function show that in fact
1450 // del_k is positive for positive errors (that is, when the
1451 // desired value is higher than the output value, del_k
1452 // should be positive for when the desired output is greater
1453 // than the actual output)
1454
1455 // Del_k on the output layer first
1456 //
1457 for (neuron = 0; neuron < 2; neuron++)
1458 nn->delta_output_layer[neuron] =
1459 (
1460 ( nn->delta_output_layer[neuron] // target - actual
1461 - nn->output_layer[neuron] )
1462 * (1.0 - nn->output_layer[neuron]) // 1 - actual
1463 * (nn->output_layer[neuron]) // actual
1464 );
1465
1466 if (neural_trace)
1467 fprintf (stderr, "Output layer output error delta-sub_j: %lf, %lf\n",
1468 nn->delta_output_layer[0],
1469 nn->delta_output_layer[1]);
1470
1471 // Now we calculate the delta weight we want to apply to the
1472 // middle layer's inputs, again, this is "in place".
1473 //
1474 // The corrected del_j formula for hidden and input neurons is:
1475 // del_j = o_j (1 - o_j) SUM (del_k * w_kj)
1476 // (where the sum is over all neurons downstream of neuron j)
1477 if (neural_trace)
1478 fprintf (stderr, "Doing the hidden layer.\n");
1479
1480 if (neural_trace)
1481 fprintf (stderr, "Now doing the sum of the del_k * w_kj\n");
1482
1483 // Initialize the hidden layer deltas
1484 for (neuron = 0; neuron < nn->hidden_layer_size; neuron++)
1485 nn->delta_hidden_layer[neuron] = 0;
1486
1487 // Calculate the SUM del_k * w_kj "in place"
1488 //
1489 // Start with the error term (which is del_k of the output
1490 // node times the weight coupling the hidden layer to that
1491 // output node). This is the error for a middle layer.
1492 for (neuron = 0; neuron < nn->hidden_layer_size; neuron++)
1493 for (channel = 0; channel < 2; channel++)
1494 nn->delta_hidden_layer[neuron] +=
1495 nn->delta_output_layer[channel]
1496 * (*arefWout(nn, channel, neuron ));
1497
1498 // Now multiply by the o_j * (1-o_j) term
1499 for (neuron = 0; neuron < nn->hidden_layer_size; neuron++)
1500 nn->delta_hidden_layer[neuron] =
1501 (nn->delta_hidden_layer[neuron]
1502 * nn->hidden_layer[neuron]
1503 * (1.0 - nn->hidden_layer[neuron]));
1504
1505 if (neural_trace)
1506 fprintf (stderr, "First three hidden delta_subj's: %lf %lf %lf\n",
1507 nn->delta_hidden_layer[0],
1508 nn->delta_hidden_layer[1],
1509 nn->delta_hidden_layer[2]);
1510
1511
1512 // Do the calculation for the retina-to-input layer. Same
1513 // deal as before.
1514 //
1515 // The corrected del_j formula for hidden and input neurons is:
1516 // del_j = o_j (1 - o_j) SUM (del_k * w_kj)
1517 // (where the sum is over all neurons downstream of neuron j)
1518 if (neural_trace)
1519 fprintf (stderr, "Doing the input layer.\n");
1520
1521 if (neural_trace)
1522 fprintf (stderr, "Now doing the sum of the del_k * w_kj\n");
1523
1524 // Initialize the input layer deltas
1525 for (neuron = 0; neuron < nn->first_layer_size; neuron++)
1526 nn->delta_first_layer[neuron] = 0;
1527
1528 // Calculate the SUM "in place" (the error mag term)
1529 for (neuron = 0; neuron < nn->first_layer_size; neuron++)
1530 for (channel = 0; channel < nn->hidden_layer_size; channel++)
1531 nn->delta_first_layer[neuron] +=
1532 nn->delta_hidden_layer[neuron]
1533 * (*arefWhid(nn, channel, neuron));
1534
1535 // Now multiply by the o_j * (1-)_j) term
1536 for (neuron = 0; neuron < nn->first_layer_size; neuron++)
1537 nn->delta_first_layer[neuron] =
1538 ( nn->delta_first_layer[neuron]
1539 * nn->first_layer[neuron]
1540 * (1.0 - nn->first_layer[neuron]));
1541
1542 if (neural_trace)
1543 fprintf (stderr, "First three input delta_subj's: %lf %lf %lf\n",
1544 nn->delta_first_layer[0],
1545 nn->delta_first_layer[1],
1546 nn->delta_first_layer[2]);
1547
1548 #endif // GRADIENT_DESCENT
1549
1550 // The SRI document suggests that the right thing to do
1551 // is to update after each training example is calculated.
1552 // So, we'll try that. :) RESULT: It works fine for _one_
1553 // document, but not multiple documents tend to oscillate
1554 // back and forth. Conclusion: sometimes you need to sum
1555 // up the deltas and error terms.
1556 //
1557 //
1558 // We have the del_k's all calculated for all three
1559 // layers, and we can use them to calculate the desired
1560 // change in weights for each layer.
1561 // These weight changes do not take place immediately;
1562 // we might do them at the end of a training epoch.
1563
1564 alphalocal = alpha * (1.0 + gain_noise_factor());
1565
1566 // Are we training after each document?
1567 if (apb->sflags & CRM_BYCHUNK)
1568 {
1569
1570 // First, the output layer's input-side weights:
1571 for (neuron = 0; neuron < 2; neuron++)
1572 for(channel = 0; channel < nn->hidden_layer_size; channel++)
1573 {
1574 *arefWout(nn, neuron, channel) =
1575 *arefWout (nn, neuron, channel)
1576 + ( alphalocal
1577 * nn->delta_output_layer[neuron]
1578 * nn->hidden_layer[channel]);
1579 };
1580
1581 // Then for the hidden layer's input weights
1582 for (neuron = 0; neuron < nn->hidden_layer_size; neuron++)
1583 for(channel = 0; channel < nn->first_layer_size; channel++)
1584 {
1585 *arefWhid(nn, neuron, channel) =
1586 *arefWhid (nn, neuron, channel)
1587 + ( alphalocal
1588 * nn->delta_hidden_layer[neuron]
1589 * nn->first_layer[channel]);
1590 };
1591
1592 // Finally, the input layer's input weights
1593 for (neuron = 0; neuron < nn->first_layer_size; neuron++)
1594 for(channel = 0; channel < nn->retina_size; channel++)
1595 {
1596 *arefWin(nn, neuron, channel) =
1597 *arefWin (nn, neuron, channel)
1598 + ( alphalocal
1599 * nn->delta_first_layer[neuron]
1600 * nn->retina[channel]);
1601 };
1602 }
1603 else
1604 // Are we doing all docs, then training on the sum?
1605 {
1606
1607 // First, the output layer's input-side weights:
1608 for (neuron = 0; neuron < 2; neuron++)
1609 for(channel = 0; channel < nn->hidden_layer_size; channel++)
1610 {
1611 dWout[channel*2 + neuron] +=
1612 ( alphalocal
1613 * nn->delta_output_layer[neuron]
1614 * nn->hidden_layer[channel]);
1615 };
1616
1617 // Then for the hidden layer's input weights
1618 for (neuron = 0; neuron < nn->hidden_layer_size; neuron++)
1619 for(channel = 0; channel < nn->first_layer_size; channel++)
1620 {
1621 dWhid [channel * nn->hidden_layer_size + neuron] +=
1622 ( alphalocal
1623 * nn->delta_hidden_layer[neuron]
1624 * nn->first_layer[channel]);
1625 };
1626
1627 // Finally, the input layer's input weights
1628 for (neuron = 0; neuron < nn->first_layer_size; neuron++)
1629 for(channel = 0; channel < nn->retina_size; channel++)
1630 {
1631 dWin [ channel * nn->first_layer_size + neuron] +=
1632 ( alphalocal
1633 * nn->delta_first_layer[neuron]
1634 * nn->retina[channel]);
1635 };
1636 };
1637
1638 } // end of train this document
1639 else
1640 {
1641 if(neural_trace)
1642 fprintf(stderr, "Doc %lx / %u OK, looks good, not trained\n",
1643 (long unsigned) current_doc, out_of_class);
1644 }; // END OF THE PER-DOCUMENT LOOP
1645
1646 if (neural_trace)
1647 fprintf (stderr,
1648 "Moving to next document at k=%lx to docs_end= %lx\n",
1649 (long int)k, (long int)nn->docs_end);
1650 };
1651 n_cycles++;
1652 if (neural_trace)
1653 fprintf (stderr, "All documents processed.\n"
1654 "Now first weight coeffs: %lf %lf, %lf\n",
1655 nn->Win[0], nn->Whid[0], nn->Wout[0]);
1656
1657 // If we're _not_ doing immediate-per-document training...
1658 if (! (apb->sflags & CRM_BYCHUNK))
1659 {
1660
1661 if (neural_trace)
1662 fprintf (stderr, "putting accumulated deltas out.\n");
1663
1664 // First, the output layer's input-side weights:
1665 for (neuron = 0; neuron < 2; neuron++)
1666 for(channel = 0; channel < nn->hidden_layer_size; channel++)
1667 {
1668 *arefWout(nn, neuron, channel) +=
1669 dWout [ channel * 2 + neuron ];
1670 };
1671
1672 // Then for the hidden layer's input weights
1673 for (neuron = 0; neuron < nn->hidden_layer_size; neuron++)
1674 for(channel = 0; channel < nn->first_layer_size; channel++)
1675 {
1676 *arefWhid(nn, neuron, channel) +=
1677 dWhid [channel * nn->hidden_layer_size + neuron];
1678 };
1679
1680 // Finally, the input layer's input weights
1681 for (neuron = 0; neuron < nn->first_layer_size; neuron++)
1682 for(channel = 0; channel < nn->retina_size; channel++)
1683 {
1684 *arefWin(nn, neuron, channel) +=
1685 dWin[ channel * nn->first_layer_size + neuron];
1686 };
1687 };
1688
1689 if (user_trace)
1690 fprintf (stderr, " %6lf", epoch_errmag);
1691
1692 if (neural_trace)
1693 fprintf (stderr, "End of epoch;\n"
1694 "after training first weight coeffs: %lf %lf, %lf",
1695 nn->Win[0], nn->Whid[0], nn->Wout[0]);
1696
1697 // Sometimes, it's better to dropkick.
1698 if (n_cycles % NN_FROMSTART_PUNTING == 0)
1699 {
1700 fprintf (stderr, "punt...");
1701 nuke (nn, 0);
1702 };
1703
1704 }; // End of training epoch loop - back to linear code
1705
1706
1707 free(dWin);
1708 free(dWhid);
1709 free(dWout);
1710
1711 // if(n_cycles >= soft_cycle_limit)
1712 // nonfatalerror5 ( "neural: failed to converge within the training limit. ",
1713 // "Beware your results.\n You might want to consider"
1714 // " a larger network as well.", CRM_ENGINE_HERE);
1715
1716 if(neural_trace)
1717 fprintf(stderr, "\nn_cycles = %ld\n", n_cycles);
1718
1719 // Do we microgroom?
1720 // GROT GROT GROT don't do this yet - this code isn't right.
1721 // GROT GROT GROT
1722 if(apb->sflags & CRM_MICROGROOM & 0)
1723 {
1724 int trunced = 0;
1725 for(k = nn->docs_start; k < nn->docs_end;)
1726 {
1727 current_doc = k;
1728 k += 3;
1729 while(*k++);
1730 if(current_doc[1] > NN_MICROGROOM_THRESHOLD)
1731 {
1732 memmove(current_doc, k, sizeof(unsigned long) * (nn->docs_end - k));
1733 nn->docs_end -= k - current_doc;
1734 k = current_doc;
1735 trunced = sizeof(char)
1736 * ( (char *)(nn->docs_end) - (char *)(nn->file_origin) );
1737 n_docs--;
1738 }
1739 }
1740 if(trunced)
1741 {
1742 crm_force_munmap_filename(filename);
1743 dontcare = truncate(filename, trunced);
1744 if(neural_trace)
1745 fprintf(stderr, "\nleaving neural net learn after truncating"
1746 ", n_docs = %ld\n", n_docs);
1747 return 0;
1748 }
1749 }
1750
1751 unmap_file(nn, filename);
1752
1753 if(neural_trace)
1754 fprintf(stderr, "\nleaving neural net learn"
1755 ", n_docs = %ld\n", n_docs);
1756 return 0;
1757 }
1758
crm_neural_net_classify(CSL_CELL * csl,ARGPARSE_BLOCK * apb,char * txtptr,long txtstart,long txtlen)1759 int crm_neural_net_classify (
1760 CSL_CELL *csl, ARGPARSE_BLOCK *apb,
1761 char *txtptr, long txtstart, long txtlen )
1762 {
1763 char filenames_field[MAX_PATTERN];
1764 long filenames_field_len;
1765 char filenames[MAX_CLASSIFIERS][MAX_FILE_NAME_LEN];
1766
1767 NEURAL_NET_STRUCT NN, *nn = &NN;
1768
1769 regex_t regee;
1770
1771 unsigned bag[NN_MAX_FEATURES], sum;
1772 long baglen;
1773
1774 long i, j, k, n, n_classifiers, out_pos;
1775 long fail_on = MAX_CLASSIFIERS;
1776
1777 float output[MAX_CLASSIFIERS][2];
1778 long example_length [MAX_CLASSIFIERS];
1779 double total_icnr, total_ocnr;
1780 double p[MAX_CLASSIFIERS], pR[MAX_CLASSIFIERS], suc_p, suc_pR, tot;
1781
1782 char out_var[MAX_PATTERN];
1783 long out_var_len;
1784
1785
1786 n = 0;
1787
1788 if(internal_trace)
1789 neural_trace = 1;
1790
1791 if(neural_trace)
1792 fprintf(stderr, "entered crm_neural_net_classify\n");
1793
1794
1795 //grab filenames field
1796 crm_get_pgm_arg (filenames_field, MAX_PATTERN, apb->p1start, apb->p1len);
1797 filenames_field_len = apb->p1len;
1798 filenames_field_len =
1799 crm_nexpandvar(filenames_field, filenames_field_len, MAX_PATTERN);
1800
1801 //grab output variable name
1802 crm_get_pgm_arg (out_var, MAX_PATTERN, apb->p2start, apb->p2len);
1803 out_var_len = apb->p2len;
1804 out_var_len = crm_nexpandvar (out_var, out_var_len, MAX_PATTERN);
1805
1806 //a tiny automata for your troubles to grab the names of our classifier files
1807 // and figure out what side of the "|" they're on, hey Bill, why isn't
1808 // this in the stringy stuff?
1809 for(i = 0, j = 0, k = 0; i < filenames_field_len && j < MAX_CLASSIFIERS; i++)
1810 if(filenames_field[i] == '\\') //allow escaped in case filename is wierd
1811 filenames[j][k++] = filenames_field[++i];
1812 else if(isspace(filenames_field[i]) && k > 0)
1813 { //white space terminates filenames
1814 filenames[j][k] = '\0';
1815 k = 0;
1816 j++;
1817 }
1818 else if(filenames_field[i] == '|')
1819 { //found the bar, terminate filename if we're in one
1820 if(k > 0)
1821 {
1822 k = 0;
1823 j++;
1824 }
1825 fail_on = j;
1826 }
1827 else if(isgraph(filenames_field[i])) //just copy char otherwise
1828 filenames[j][k++] = filenames_field[i];
1829
1830
1831 if(j < MAX_CLASSIFIERS)
1832 filenames[j][k] = '\0';
1833 if(k > 0)
1834 n_classifiers = j + 1;
1835 else
1836 n_classifiers = j;
1837
1838 if(fail_on > n_classifiers)
1839 fail_on = n_classifiers;
1840
1841 if(neural_trace)
1842 {
1843 fprintf(stderr, "fail_on = %ld\n", fail_on);
1844 for(i = 0; i < n_classifiers; i++)
1845 fprintf(stderr, "filenames[%ld] = %s\n", i, filenames[i]);
1846 };
1847
1848 baglen = eat_document( txtptr + txtstart, txtlen, &j,
1849 ®ee, bag, NN_MAX_FEATURES, apb->sflags, &sum);
1850
1851 //loop over classifiers and calc scores
1852 for(i = 0; i < n_classifiers; i++)
1853 {
1854 if(map_file(nn, filenames[i]))
1855 {
1856 nonfatalerror("Couldn't mmap file!", filenames[i]);
1857 output[i][0] = 0.5;
1858 output[i][1] = 0.5;
1859 continue;
1860 }
1861 // *** now we do projection in real time because it's smaller!
1862 // this allows different sized nn's to be compared.
1863 // ret = malloc( sizeof(unsigned long) * nn->retina_size);
1864 // rn = project_features(bag, n, ret, nn->retina_size);
1865 example_length[i] = nn->docs_end - nn->docs_start;
1866 do_net(nn, bag, baglen);
1867 output[i][0] = nn->output_layer[0];
1868 output[i][1] = nn->output_layer[1];
1869 // free(ret);
1870 unmap_file(nn, filenames[i]);
1871 if(neural_trace)
1872 fprintf(stderr, "Network outputs on file %s:\t%f\t%f\n",
1873 filenames[i], output[i][0], output[i][1]);
1874 }
1875
1876 // Calculate the winners and losers.
1877
1878 // Get total activation output for all of the networks first:
1879 tot = 0.0;
1880 for(i = 0; i < n_classifiers; i++)
1881 {
1882 // Normalize the classifiers to a [0...1] range
1883 tot += p[i] = 0.5 * (1.0 + output[i][0] - output[i][1]);
1884 }
1885
1886 if(neural_trace)
1887 fprintf(stderr, "tot = %f\n", tot);
1888
1889 for(i = 0; i < n_classifiers; i++)
1890 {
1891 // response fraction of this class.
1892 p[i] /= tot;
1893 if(neural_trace)
1894 fprintf(stderr, "p[%ld] = %f (normalized)\n", i, p[i]);
1895 }
1896
1897 // Find the one "best matching" network
1898 j = 0;
1899 for(i = 1; i < n_classifiers; i++)
1900 if( p[i] > p[j])
1901 j = i;
1902
1903 // Find the overall winning side - success, or fail
1904 suc_p = 0.0;
1905 for(i = 0; i < fail_on; i++)
1906 suc_p += p[i];
1907
1908 // Calculate pR's for all of the classifiers.
1909 total_icnr = 0;
1910 total_ocnr = 0;
1911 for(i = 0; i < n_classifiers; i++)
1912 {
1913 pR[i] = get_pR2 (output[i][0], output[i][1]);
1914 total_icnr += i < fail_on ? output[i][0] : output[i][1];
1915 total_ocnr += i < fail_on ? output[i][1] : output[i][0];
1916 };
1917 // renormalize total responses
1918 total_icnr = total_icnr / n_classifiers;
1919 total_ocnr = total_ocnr / n_classifiers;
1920
1921 suc_pR = get_pR2 (total_icnr, total_ocnr);
1922 out_pos = 0;
1923
1924 if(suc_p > 0.5 ) //test for nan as well
1925 out_pos += sprintf
1926 ( outbuf + out_pos,
1927 "CLASSIFY succeeds; success probability: %f pR: %6.4f\n",
1928 suc_p, suc_pR);
1929 else
1930 out_pos += sprintf
1931 ( outbuf + out_pos,
1932 "CLASSIFY fails; success probability: %f pR: %6.4f\n",
1933 suc_p, suc_pR);
1934
1935 out_pos += sprintf
1936 ( outbuf + out_pos,
1937 "Best match to file #%ld (%s) prob: %6.4f pR: %6.4f \n",
1938 j,
1939 filenames[j],
1940 p[j], pR[j]);
1941
1942 out_pos += sprintf
1943 ( outbuf + out_pos,
1944 "Total features in input file: %ld\n",
1945 baglen );
1946
1947 for(i = 0; i < n_classifiers; i++)
1948 out_pos += sprintf
1949 ( outbuf + out_pos,
1950 "#%ld (%s): feats: %ld ic: %3.2f oc: %3.2f prob: %3.2e, pR: %6.2f\n",
1951 i, filenames[i], example_length[i],
1952 output [i][0], output[i][1], p[i], pR[i] );
1953
1954 if(out_var_len)
1955 crm_destructive_alter_nvariable(out_var, out_var_len, outbuf, out_pos);
1956
1957 if (suc_p <= 0.5)
1958 {
1959 csl->cstmt = csl->mct[csl->cstmt]->fail_index - 1;
1960 csl->aliusstk [csl->mct[csl->cstmt]->nest_level] = -1;
1961 }
1962 return 0;
1963 }
1964