1 //	crm_neural_net.c - a neural net classifier
2 
3 // Copyright 2009 William S. Yerazunis.
4 // This file is under GPLv3, as described in COPYING.
5 
6 /////////////////////////////////////////////////////////////////////
7 //  derived from crm_osb_hyperspace.c by Joe Langeway, rehacked by
8 //  Bill Yerazunis.  Since this code is directly derived from
9 //  crm_osb_hyperspace.c and produced for the crm114 project so:
10 //
11 /////////////////////////////////////////////////////////////////////
12 //
13 //     Original spec by Bill Yerazunis, original code by Joe Langeway,
14 //     recode for CRM114 use by Bill Yerazunis.
15 //
16 //     This file of code (crm_neural_net* and subsidiary routines) is
17 //     dual-licensed to both William S. Yerazunis and Joe Langeway,
18 //     including the right to reuse this code in any way desired,
19 //     including the right to relicense it under any other terms as
20 //     desired.
21 //
22 //////////////////////////////////////////////////////////////////////
23 
24 //////////////////////////////////////////////////////////////////////
25 //  This file is part of on going research and should not be considered
26 //  a finished product, a reliable tool, an example of good software
27 //  engineering, or a reflection of any quality of Joe's besides his
28 //  tendancy towards long hours.
29 //////////////////////////////////////////////////////////////////////
30 
31 //  Here's what's going on:
32 //
33 //  It's three-layer Kohonen-flavor feed-forward neural net, trained by
34 //  accumulating weight updates, calculated with simple gradiant descent
35 //  and back propagation, to apply once for all documents.
36 //
37 //  NB:  the <bychunk> flag changes this behavior to train the results of each
38 //  document as the document is encountered.  This is _usually_ a good idea.
39 //
40 //  The <fromstart> means "reinitialize the net".  Use this if things get
41 //  wedged.
42 //
43 //  The <append> flag means _do not retrain_. Instead, just log this data
44 //  into the file and return ASAP (use this to load up a bunch of data
45 //  and then train it all, which is computationally cheaper)
46 
47 //  include some standard files
48 #include "crm114_sysincludes.h"
49 
50 //  include any local crm114 configuration file
51 #include "crm114_config.h"
52 
53 //  include the crm114 data structures file
54 #include "crm114_structs.h"
55 
56 //  and include the routine declarations file
57 #include "crm114.h"
58 
59 //    the globals used when we need a big buffer  - allocated once, used
60 //    wherever needed.  These are sized to the same size as the data window.
61 //    do not mut these or random binary shall be shat upon thee
62 extern char *outbuf;
63 
64 //static int neural_trace = 1;
65 static int neural_trace = 0;
66 
67 #define HEADER_SIZE 1024
68 
69 #define STOCHASTIC
70 //#define TRAIN_PER_DOCUMENT
71 //#define ACCUMULATE_TRAINING
72 
73 //    DESCRIPTION OF A NEURAL NETWORK
74 //
75 //   retina_size is number of inputs to the first layer; feature IDs need
76 //                 to be moduloed down to this value.
77 //   first_layer_size is the number neurons in the first layer.  Every neuron
78 //                 sums retina_size weighted values from each of the
79 //                 retina inputs
80 //   hidden_layer_size is the number of neurons in the hidden layer.  Each of
81 //                 these neurons sums first_layer_size weighted outputs
82 //                  of the first layer
83 //   ... and of course, in this implementation, the final layer is just TWO
84 //                 neurons (in-class and out-of-class), summing up
85 //                 hidden_layer_size weighted outputs.
86 
87 
88 typedef struct mythical_neural_net_header
89 {
90   int retina_size, first_layer_size, hidden_layer_size;
91 
92 } NEURAL_NET_HEAD_STRUCT;
93 
94 typedef struct mythical_neural_net
95 {
96   //             The W's are the weights
97   float *Win, *Whid, *Wout;
98   //                these are the activation intensities
99   float *retina, *first_layer, *hidden_layer, output_layer[2];
100   //                      these are the learning deltas
101   float *delta_first_layer, *delta_hidden_layer, delta_output_layer[2];
102   unsigned *docs_start, *docs_end;
103   int retina_size, first_layer_size, hidden_layer_size;
104   void *file_origin;
105 } NEURAL_NET_STRUCT;
106 
107 //   Three little functions to turn bare pointers to the neural
108 //   network coefficient arrays into *floats so they can be read
109 //   or written.  This hack is because C doesn't do multidimensional
110 //   arrays with runtime defined dimensions very well.
111 //
112 //    Just to remind you- nn is the neural net, "neuron" is the neuron being
113 //    activated, and "channel" is the input channel on that neuron.
114 //    (note that this means actual storage is in "odometer" format -
115 //    the last arg varying fastest).
116 //
arefWin(NEURAL_NET_STRUCT * nn,int neuron,int channel)117 inline static float *arefWin(NEURAL_NET_STRUCT *nn, int neuron, int channel)
118 {
119   //  if (neuron < 0 || neuron > nn->first_layer_size)
120   //  fprintf (stderr, "bad neuron number %ld in first layer calc\n", neuron);
121   //if (channel < 0 || channel > nn->retina_size)
122   //  fprintf (stderr, "bad channel number %ld in first layer calc\n", channel);
123   //  fprintf (stderr, "nn = %lx, Win = %lx, neuron = %ld, channel = %ld\n",
124   //	   nn, nn->Win, neuron, channel);
125   return &(nn->Win [(neuron * nn->retina_size) + channel]);
126 }
127 
128 //     Same deal, nn is the net, "neuron" is the neuron number, "channel"
129 //      is the channel on that neuron.
arefWhid(NEURAL_NET_STRUCT * nn,int neuron,int channel)130 inline static float *arefWhid(NEURAL_NET_STRUCT *nn, int neuron, int channel)
131 {
132   if (neuron < 0 || neuron > nn->hidden_layer_size)
133     fprintf (stderr, "bad neuron number %d in hidden layer calc\n", neuron);
134   if (channel < 0 || channel > nn->first_layer_size)
135     fprintf (stderr, "bad channel number %d in hidden layer calc\n", channel);
136   return & (nn->Whid[( neuron * nn->first_layer_size) + channel]);
137 }
138 
139 //     Final layer, same deal;, nn is the net, "neuron" is the neuron
140 //      number (here, 0 or 1), "channel" is the channel on that neuron.
arefWout(NEURAL_NET_STRUCT * nn,int neuron,int channel)141 inline static float *arefWout(NEURAL_NET_STRUCT *nn, int neuron, int channel)
142 {
143   if (neuron < 0 || neuron > 1)
144     fprintf (stderr, "bad neuron number %d in final layer calc\n", neuron);
145   if (channel < 0 || channel > nn->hidden_layer_size)
146     fprintf (stderr, "bad channel number %d in final layer calc\n", channel);
147   return &(nn->Wout [(neuron * nn->hidden_layer_size) + channel]);
148 }
149 
sign(float x)150 inline static float sign (float x)
151 {
152   if (x < 0.0) return -1;
153   return 1;
154 }
155 
156 //    Stochastic_factor is a "noise term" to keep the net shaken up a bit;
157 //    this prevents getting stuck in local minima (one of which can occur
158 //    when a weight approaches zero, because then the motion becomes *very*
159 //    small (alpha, the motion factor, is usually << 1.0) and it's not
160 //    possible for the weight value to "jump over" zero and
161 //    go negative.  This causes that weight to lock; if enough weights lock,
162 //    the net fails to converge.
163 //
164 //    There are two parameters used (both are static local vars!).  One is
165 //    the maximum noise added (recommended to be about the same value as
166 //    alpha) and the other is the drop rate, which is how fast the noise
167 //    diminishes with increasing learning cycles.  This is measured in
168 //    terms of the soft cycle limit and epoch number; perhaps it would be
169 //    better if the drop was actually a 1/R curve (Boltzmann curve) but
170 //    this works well.
171 //
172 //    Note: the following also worked pretty well:
173 //     ((0.05 * (double)rand() / (double) RAND_MAX) - 0.025)
174 
rand0to1()175 inline static double rand0to1()
176 {
177   return ((double)rand() / (double)RAND_MAX);
178 }
179 
stochastic_factor(float stoch_noise,int soft_cycle_limit,int epoch_num)180 inline static float stochastic_factor (float stoch_noise,
181 				       int soft_cycle_limit,
182 				       int epoch_num)
183 {
184   float v;
185 
186   v = ( rand0to1() - 0.5) * stoch_noise;
187   //  v = ((((double)rand ()) / ((double) RAND_MAX))) * stoch_noise;
188   //  v = v * ( 1 / ( epoch_num + 1));
189   //  v = v *
190   //    (1 - (epoch_num / soft_cycle_limit));
191   return (v);
192 };
193 
194 //     Gain noise is noise applied to the training impulse, rather than
195 //     gain applied to the weights
gain_noise_factor()196 inline static double gain_noise_factor ()
197 {
198   return ( rand0to1() * NN_DEFAULT_GAIN_NOISE) ;
199 };
200 
201 
202 //  These are used _only_ when a new net needs to be created;
203 //  the actual net dimensions used usually come from the file header
204 int    retina_size = NN_RETINA_SIZE;
205 int    first_layer_size = NN_FIRST_LAYER_SIZE;
206 int    hidden_layer_size = NN_HIDDEN_LAYER_SIZE;
207 
208 /*
209 
210 Another (as yet unimplemented) idea is to stripe the retina in a
211 pattern such that each neuron is "up against" every other neuron at
212 least once, but not as many times as the current "full crossbar"
213 system, which takes a long time to train.
214 
215 One way to do this is to label the points onto a fully connected graph
216 nodes, and then to take the graph nodes in pairwise (or larger) groups
217 such as, on edges, faces, or hyperfaces, so that every graph node is
218 taken against every other node and hence every input at one level is
219 "against" every other input.  However, we haven't implemented this.
220 
221 Another yet unimplemented option is to go with a square "Hopfield"
222 type network instead of the three-layer Kohonen network implemented
223 here.  The problem with the Hopfield network is that although the
224 learning method is simpler (simple Hebbian learning works - that is,
225 clamp the known inputs and known outputs, then for each neuron if the
226 inputs match outputs, increase the weights, else decrease the
227 weights.) However, in a Hopfield the learning ability is smaller; for
228 N neurons the number of patterns that can be learned is only N / (2
229 log N), from McLiece (1987).  So, with 1000 neurons (1,000,000 coeffs,
230 or a 4 megabyte backing storage file) we could learn only about 70
231 patterns.
232 
233 This isn't to say we'll never implement a modified Hopfield net
234 (with a large number of clamped "neurons") but it's not here yet.
235 
236 */
237 
238 
239 
make_new_backing_file(char * filename)240 static int make_new_backing_file(char *filename)
241 {
242   int i;
243   FILE *f;
244   NEURAL_NET_HEAD_STRUCT h;
245   float a;
246   f = fopen(filename, "wb");
247   if(!f)
248   {
249     nonfatalerror5("unable to create neural network backing file", filename,
250                         CRM_ENGINE_HERE);
251     return -1;
252   }
253 
254 
255   h.retina_size = retina_size;
256   h.first_layer_size = first_layer_size;
257   h.hidden_layer_size = hidden_layer_size;
258 
259   if(sparse_spectrum_file_length)
260   {
261     h.first_layer_size = sqrt (sparse_spectrum_file_length / 1024);
262     if (h.first_layer_size < 4) h.first_layer_size = 4;
263     h.hidden_layer_size = h.first_layer_size ;
264     h.retina_size = h.first_layer_size * 1024  ;
265     if (h.retina_size < 1024) h.retina_size = 1024;
266   }
267   if (neural_trace || user_trace )
268     fprintf (stderr, "Input sparse_spectrum_file_length = %ld. \n"
269 	     "new neural net dimensions:\n retina width=%d, "
270 	     "first layer neurons = %d, hidden layer neurons = %d\n",
271 	     sparse_spectrum_file_length, retina_size,
272 	     first_layer_size, hidden_layer_size);
273 
274   //    Write out the header fields
275   dontcare = fwrite(&h, 1, sizeof(NEURAL_NET_HEAD_STRUCT), f);
276 
277   //     Pad out the header space to header_size
278   for(i = sizeof(NEURAL_NET_HEAD_STRUCT); i < HEADER_SIZE; i++)
279     {
280        fputc('\0', f);
281     };
282 
283   //      and write random small floating points into the
284   //      weighting.
285   i = (retina_size * first_layer_size)
286     + (first_layer_size * hidden_layer_size)
287     + (hidden_layer_size * 2);
288   if (neural_trace)
289     {
290       fprintf (stderr, "Putting out %d coefficients.\n", i);
291       fprintf (stderr, "Initial weight ");
292       i--;
293       a = rand0to1 () * NN_INITIALIZATION_NOISE_MAGNITUDE;
294       fprintf (stderr, "%f ", a);
295       dontcare = fwrite(&a, 1, sizeof(float), f);
296       i--;
297       a = rand0to1 () * NN_INITIALIZATION_NOISE_MAGNITUDE;
298       fprintf (stderr, "%f ", a);
299       dontcare = fwrite(&a, 1, sizeof(float), f);
300       i--;
301       a = rand0to1 () * NN_INITIALIZATION_NOISE_MAGNITUDE;
302       fprintf (stderr, "%f ", a);
303       dontcare = fwrite(&a, 1, sizeof(float), f);
304       fprintf (stderr, "\n");
305     };
306   while(i--)
307   {
308       a = rand0to1 () * NN_INITIALIZATION_NOISE_MAGNITUDE;
309       //      fprintf (stderr, "%f ", a);
310       dontcare = fwrite(&a, 1, sizeof(float), f);
311   }
312   fclose(f);
313   return 0;
314 }
315 
map_file(NEURAL_NET_STRUCT * nn,char * filename)316 static int map_file(NEURAL_NET_STRUCT *nn, char *filename)
317 {
318   NEURAL_NET_HEAD_STRUCT *h;
319   struct stat statbuf;
320   float *w;
321   nn-> file_origin = MAP_FAILED;
322   if(stat(filename, &statbuf))
323   {
324     //    nonfatalerror5("unable to map neural network backing file",
325     //	   "filename", CRM_ENGINE_HERE);
326     //  fprintf (stderr, "static mmap: No such file; nn = %lx\n", (long) nn );
327 
328     return -1;
329   }
330   //  fprintf (stderr, "File is %s\n", filename);
331   nn->file_origin = crm_mmap_file
332                      (  filename,
333                         0, statbuf.st_size,
334                         PROT_READ | PROT_WRITE,
335                         MAP_SHARED,
336                         NULL     );
337   if(nn->file_origin == MAP_FAILED)
338   {
339     nonfatalerror5("unable to map neural network backing file", "filename",
340                          CRM_ENGINE_HERE);
341     return -1;
342   }
343   h = nn->file_origin;
344   nn->retina_size = h->retina_size;
345   nn->first_layer_size = h->first_layer_size;
346   nn->hidden_layer_size = h->hidden_layer_size;
347 
348   if (internal_trace)
349     fprintf (stderr, "Neural net dimensions: retina width=%d, "
350 	     "first layer neurons = %d, hidden layer neurons = %d\n",
351 	     retina_size, first_layer_size, hidden_layer_size);
352 
353 
354   //  These are the sums of the weighted input activations for these layers
355   nn->retina = (float *) malloc (sizeof(float) * h->retina_size);
356   nn->first_layer = (float *) malloc(sizeof(float) * h->first_layer_size);
357   nn->hidden_layer = (float *) malloc(sizeof(float) * h->hidden_layer_size);
358 
359   //  These are the deltas used for learning (only).
360   nn->delta_first_layer=(float *) malloc(sizeof(float) * h->first_layer_size);
361   nn->delta_hidden_layer=(float *)malloc(sizeof(float) * h->hidden_layer_size);
362 
363   // remember output layer is statically allocated!
364 
365   //skip w over the header
366   w = (float *)  ( (char *)h + HEADER_SIZE);
367   nn->Win = w;
368   nn->Whid =  & w[ h->retina_size * h->first_layer_size];
369 
370   nn->Wout = & w[ h->retina_size * h->first_layer_size
371 		  + h->first_layer_size * h->hidden_layer_size ];
372 
373   nn->docs_start = (unsigned *) & w
374     [ h->retina_size * h->first_layer_size
375       + h->first_layer_size * h->hidden_layer_size
376       + h->hidden_layer_size * 2 ];
377 
378 
379   nn->docs_end = (unsigned *) ((char *)h + statbuf.st_size);
380 
381   if (internal_trace)
382     {
383       fprintf (stderr, "Header start at %lx, end at %lx (bytes %lu )\n",
384 	       (unsigned long) h,
385 	       (unsigned long) nn->Win ,
386 	       (unsigned long) nn->Win - (unsigned long) h);
387 
388       fprintf (stderr, "Coeffs start at %lx, end at %lx (ints %lu )\n",
389 	       (unsigned long) nn->Win,
390 	       (unsigned long) nn->docs_start,
391 	       ((unsigned long) nn->docs_start - (unsigned long) nn->Win)
392 	       / sizeof (float));
393 
394       fprintf (stderr, "Docs start at %lx, end at %lx (features %lu )\n",
395 	       (unsigned long) nn->docs_start,
396 	       (unsigned long) nn->docs_end,
397 	       ((unsigned long) nn->docs_end - (unsigned long) nn->docs_start)
398 	       / sizeof (unsigned));
399     };
400   return 0;
401 }
402 
unmap_file(NEURAL_NET_STRUCT * nn,char * filename)403 static void unmap_file(NEURAL_NET_STRUCT *nn, char *filename)
404 {
405   free(nn->retina);
406   free(nn->first_layer);
407   free(nn->hidden_layer);
408   free(nn->delta_first_layer);
409   free(nn->delta_hidden_layer);
410   crm_munmap_file((void *) nn->file_origin);
411 #ifndef CRM_WINDOWS
412   //    Because mmap/munmap doesn't set atime, nor set the "modified"
413   //    flag, some network filesystems will fail to mark the file as
414   //    modified and so their cacheing will make a mistake.
415   //
416   //    The fix is to do a trivial read/write on the file, to force
417   //    the filesystem to repropagate it's caches.
418   //
419   {
420     int hfd;                  //  hashfile fd
421     char foo;
422     hfd = open (filename, O_RDWR);
423     dontcare = read (hfd, &foo, sizeof(foo));
424     lseek (hfd, 0, SEEK_SET);
425     dontcare = write (hfd, &foo, sizeof(foo));
426     close (hfd);
427   }
428 #endif	// !CRM_WINDOWS
429 }
430 
431 //     Do a _forced_ unmap.  This is needed if the file will be
432 //     written to with fwrite, etc.
force_unmap_file(NEURAL_NET_STRUCT * nn,char * filename)433 static void force_unmap_file(NEURAL_NET_STRUCT *nn, char *filename)
434 {
435   unmap_file (nn, filename);
436   crm_force_munmap_filename(filename);
437 #ifndef CRM_WINDOWS
438   //    Because mmap/munmap doesn't set atime, nor set the "modified"
439   //    flag, some network filesystems will fail to mark the file as
440   //    modified and so their cacheing will make a mistake.
441   //
442   //    The fix is to do a trivial read/write on the file, to force
443   //    the filesystem to repropagate it's caches.
444   //
445   {
446     int hfd;                  //  hashfile fd
447     char foo;
448     hfd = open (filename, O_RDWR);
449     dontcare = read (hfd, &foo, sizeof(foo));
450     lseek (hfd, 0, SEEK_SET);
451     dontcare = write (hfd, &foo, sizeof(foo));
452     close (hfd);
453   }
454 #endif	// !CRM_WINDOWS
455 }
456 
457 //cache this if this thing is too slow, we'll need crazy interpolation though
logistic(float a)458 static float logistic(float a)
459 {
460   float y;
461   if(isnan(a))
462     {
463       char *foo;
464       foo = malloc (32);
465       fprintf (stderr, "Logistic of a NAN\n");
466       foo = NULL;
467       strcpy ("hello, world", foo);
468       fatalerror5("Tried to compute the logistic function of a NAN!!", "",
469 		  CRM_ENGINE_HERE);
470       return 0.5; //perhaps we should escape all the way
471     }
472 
473     y = 1.0 / (1.0 + exp(-a));
474     //if (y < 0.05) y = 0.05;
475     //if (y > 0.95) y = 0.95;
476     if(isnan(y))
477     {
478       fatalerror5("Computed a NAN as a RESULT of the logistic function!", "",
479 		  CRM_ENGINE_HERE);
480       return 0.5;
481     }
482   return y;
483 }
484 
485 
486 //    A totally ad-hoc way to calculate pR.  (well, since all pR's
487 //     are really ad-hoc underneath, it's perfectly reasonable).
488 //
489 #ifdef NEED_SINGLE_ARG_PR
get_pR(double p)490 static double get_pR(double p)
491 {
492   double a = fabs(p - 0.5);
493   // scale 0..0.1 to 0..1.0
494   if( a < 0.1 )
495     a *= 10.0;
496   //  scale 0.1...0.2 to 1.0...10.0
497   else if(a < 0.2)
498     a = 1.0 + 90.0 * (a - 0.1);
499   //   scale 0.2...0.3 to 10.0...100.0
500   else if(a < 0.3)
501     a = 10.0 + 900.0 * (a - 0.2);
502   //    scale 0.3... to 100.0...and up
503   else
504     a = 100.0 + 1000.0 * (a - 0.3);
505   return p >= 0.5 ? a : -a;
506 }
507 #endif	// NEED_SINGLE_ARG_PR
508 
get_pR2(double icnr,double ocnr)509 static double get_pR2 (double icnr, double ocnr)
510 {
511   double sum;
512   double diff;
513   double output;
514 
515   sum = icnr + ocnr;
516   diff = icnr - ocnr;
517 
518   output = 100 * sum * diff;
519   return output;
520 }
521 
522 
523 
524 //    Actually evaluate the neural net, given
525 //    the "bag" of features.
526 
do_net(NEURAL_NET_STRUCT * nn,unsigned * bag,long baglen)527 static void do_net (NEURAL_NET_STRUCT *nn, unsigned *bag, long baglen)
528 {
529   int i;
530   int neuron, channel;
531 
532   int pauser;
533 
534   pauser = 0;
535   if (baglen == 0) pauser = 1;
536   while (pauser==1) ;
537   pauser = 0;
538 
539   //    Initialize the activation levels to zeroes everywhere
540 
541   //   First layer:
542   for(neuron = 0; neuron < nn->first_layer_size; neuron++)
543     nn->first_layer[neuron] = 0.00;
544 
545   //   Second (hidden) layer:
546   for(neuron = 0; neuron < nn->hidden_layer_size; neuron++)
547     nn->hidden_layer[neuron] = 0.00;
548 
549   //   Final layer:
550   nn->output_layer[0] = 0.00;
551   nn->output_layer[1] = 0.00;
552 
553   //    Sum up the activations on the first layer of the net.
554   //    Note that this code (amazingly) works fine for both _non_-projected
555   //    as well as projected feature bags, by inclusion of the modulo on
556   //    the retina size.
557 
558   if (neural_trace)
559     {
560       fprintf (stderr, "First six items in the bag are: \n "
561 	       "     %d %d %d %d %d %d\n",
562 	       bag[0], bag[1], bag[2], bag[3], bag[4], bag[5]);
563     };
564 
565   //   Empty the retina:
566   for (channel = 0; channel < nn->retina_size; channel++)
567     nn->retina[channel] = 0.00;
568 
569   // for each feature in the bag
570   for(i = 0; i < baglen; i++)
571     {
572       channel = bag[i] % nn-> retina_size;
573       //     Channel 0 is reserved as "constants", so if the modulo
574       //     puts a stimulus onto the 0th retina channel, we actually
575       //     push it over one channel, to retina channel 1.
576       if (channel == 0) channel = 1;
577       nn->retina[channel] += 1.0;
578     };
579   //     debugging check
580   if (neural_trace)
581     fprintf (stderr, "Running do_net %lx on doc at %lx length %d\n",
582 	     (long) nn, (long) bag, i);
583 
584   //    Now we actually calculate the neuron activation values.
585   //
586   //    First, the column 0 "always activated" constants, since channel
587   //    zero is verboten as an actual channel (we roll channel 0 activations
588   //    over onto channel 1)
589   //
590 
591   nn->retina[0] = 1.0;
592 
593   //   Major confusion prevention debugging statement.
594   if ( 0 )
595     for (channel = 0; channel < nn->retina_size; channel++)
596       if (nn->retina[channel] > 0)
597 	fprintf (stderr, " %d", channel);
598 
599   for ( neuron = 0; neuron < nn->first_layer_size; neuron++)
600     for (channel = 0; channel < nn->retina_size; channel++)
601       //  sum the activations on the first layer for this channel
602       nn->first_layer[neuron] +=
603 	//	avalWin(nn, neuron, channel)
604 	nn->Win[(neuron*nn->retina_size) + channel]
605 	* nn->retina[channel];
606 
607   //   Do a nonlinear function (logistic) in-place on the first layer outputs.
608   for(neuron = 0; neuron < nn->first_layer_size; neuron++)
609     {
610       nn->first_layer[neuron] = logistic(nn->first_layer[neuron]);
611     };
612 
613   //    For each neuron output in the first layer, generate activation
614   //    levels for the second (hidden) layer inputs.
615   //
616   //     Like the retina, we "sacrifice" the input "neuron zero" to be the
617   //     bias-from-zero weight
618 
619   for (neuron = 0; neuron < nn->hidden_layer_size; neuron++)
620     nn->hidden_layer[neuron] = *arefWhid (nn, neuron, 0);
621 
622   for( channel = 1; channel < nn->first_layer_size; channel++)
623     for(neuron = 0; neuron < nn->hidden_layer_size; neuron++)
624       nn->hidden_layer[neuron] +=
625 	*arefWhid(nn, neuron, channel)
626 	* nn->first_layer[channel];
627 
628 
629   //   Do our nonlinear function (logistic) here on the second layer outputs.
630   for(neuron = 0; neuron < nn->hidden_layer_size; neuron++)
631     nn->hidden_layer[neuron] = logistic(nn->hidden_layer[neuron]);
632 
633   //     Generate the values in the final output layer (both neurons)
634   //
635   //     Again, we sacrifice neuron 0 of the hidden layer to be the
636   //     bias weights.
637 
638   nn->output_layer[0] = *arefWout (nn, 0, 0);
639   nn->output_layer[0] = *arefWout (nn, 1, 0);
640 
641   for(channel = 1; channel < nn->hidden_layer_size; channel++)
642     {
643       nn->output_layer[0] += *arefWout(nn, 0, channel)
644 	* nn->hidden_layer[channel];
645       nn->output_layer[1] += *arefWout(nn, 1, channel)
646 	* nn->hidden_layer[channel];
647     };
648 
649   nn->output_layer[0] = logistic( nn->output_layer[0] );
650   nn->output_layer[1] = logistic( nn->output_layer[1] );
651   if (internal_trace || neural_trace)
652     fprintf (stderr, "Network final outputs: %lf vs %lf\n",
653 	     nn->output_layer[0], nn->output_layer[1]);
654 
655 }
656 
657 
compare_features(const void * a,const void * b)658 static int compare_features(const void *a, const void *b)
659 {
660   if(*(unsigned int *)a < *(unsigned int *)b)
661     return -1;
662   if(*(unsigned int *)a > *(unsigned int *)b)
663     return 1;
664   return 0;
665 }
666 
667 
668 //     Convert a text into a bag of features, obeying things
669 //     like the tokenizer regex and the UNIQUE flag.  The result
670 //     is a bag of sorted features.  This does _not_ do projection
671 //     onto the retina, however (see "project_features" for that).
672 //     Note- in later versions of this code, we no longer project
673 //     separately.  Instead, we modulo at runtime onto the retina.
674 //
eat_document(char * text,long text_len,long * ate,regex_t * regee,unsigned * feature_space,long max_features,long long flags,unsigned * sum)675 static long eat_document
676         (       char *text, long text_len, long *ate,
677                 regex_t *regee,
678                 unsigned *feature_space, long max_features,
679                 long long flags, unsigned *sum)
680 {
681   long n_features = 0;
682   long i, j;
683   long long unigram, unique, string;
684   long next_offset;
685 
686   unique = apb->sflags & CRM_UNIQUE;
687   unigram = apb->sflags & CRM_UNIGRAM;
688   string = apb->sflags & CRM_STRING;
689 
690   *sum = 0;
691   *ate = 0;
692 
693   //   Use the flagged vector tokenizer
694   //
695   //     GROT GROT GROT note that we _disregard_ the passed-in "regee" (the
696   //     previously precompiled regex) and use the automagical code in
697   //     vector_tokenize_selector to get the "right" parse regex from the
698   //     user program's APB.
699   crm_vector_tokenize_selector
700     (apb,                   // the APB
701      text,                   // input string buffer
702      0,                      // start offset
703      text_len,               // length
704      NULL,                  // parser regex
705      0,                     // parser regex len
706      NULL,                   // tokenizer coeff array
707      0,                      // tokenizer pipeline len
708      0,                      // tokenizer pipeline iterations
709      feature_space,           // where to put the hashed results
710      max_features - 1,        //  max number of hashes
711      &n_features,             // how many hashes we actually got
712      &next_offset);           // where to start again for more hashes
713 
714   //     GROT GROT GROT
715   //     maybe we should force a constant into one column of input
716   //     so we can change the threshold of level activation?
717 
718   //   Sort the features for speed in matching later - NO NEED ANY MORE
719   //  qsort(feature_space, n_features, sizeof(unsigned int), compare_features);
720 
721   // anything that hashed to 0 or 1 needs to be promoted to 2 or 3, because
722   //   0 and 1 are our "in-class" and "out-of-class" sentinels.  Also
723   //   calculate the checksum for this document.
724 
725   for (i = 0; i < n_features; i++)
726     {
727       if (feature_space[i] == 0)
728 	feature_space[i] = 2;
729       if (feature_space[i] == 1)
730 	feature_space[i] = 3;
731     }
732 
733   //    Do a uniquifying pass.
734   if(unique)
735     {
736       //   Sort the features for uniquifying
737       qsort(feature_space, n_features, sizeof(unsigned int), compare_features);
738       //      do "two finger" uniquifying on the sorted result.
739       i = 0;
740       for(j = 1; j < n_features; j++)
741 	if ( feature_space[j] != feature_space [i] )
742 	  feature_space[++i] = feature_space[j];
743       if (n_features > 0)
744 	n_features = i + 1;
745     };
746 
747   //  and do the checksum update
748   *sum = 0;
749   for (i = 0; i < n_features; i++)
750     *sum += feature_space[i];
751 
752   //    All done.
753   return n_features;
754 }
755 
756 //     Nuke the net - basically fill it with small uniformly distributed
757 //     random weighting coefficients.  This gets the net out of a "stuck
758 //     on center" condition.  A flag of 0 means "from zero", a flag of 1
759 //     means "in addition to current state".
760 //
nuke(NEURAL_NET_STRUCT * nn,long flag)761 static void nuke(NEURAL_NET_STRUCT *nn, long flag)
762 {
763   long j;
764   float *f;
765   float dsn, dsno;
766 
767   dsn = NN_DEFAULT_STOCH_NOISE  ;
768   dsno = - dsn / 2.0;
769 
770   if (neural_trace)
771     fprintf (stderr, "********Nuking the net to random values\n");
772 
773   f = nn->Win;
774   for (j = 0; j < nn->retina_size * nn->first_layer_size; j++)
775     f[j] = flag*f[j] + dsno + dsn * rand0to1();
776 
777   f = nn->Whid;
778   for (j = 0; j < nn->first_layer_size * nn-> hidden_layer_size; j++)
779     f[j] = flag*f[j] + dsno + dsn * rand0to1();
780 
781   f = nn->Wout;
782   for (j = 0; j < nn-> hidden_layer_size  * 2; j++)
783     f[j] = flag*f[j] + dsno + dsn * rand0to1();
784 
785 }
786 
787 
788 //   Entry point for learning
789 //    Basically, we use gradient descent.
790 //
crm_neural_net_learn(CSL_CELL * csl,ARGPARSE_BLOCK * apb,char * txtptr,long txtstart,long txtlen)791 int crm_neural_net_learn
792     (   CSL_CELL *csl, ARGPARSE_BLOCK *apb,
793         char *txtptr, long txtstart, long txtlen   )
794 {
795   NEURAL_NET_STRUCT my_nn, *nn = &my_nn;
796   char filename[MAX_PATTERN];
797   char htext[MAX_PATTERN];
798   long htext_len;
799   struct stat statbuf;
800 
801   regex_t regee;
802 
803   unsigned bag[NN_MAX_FEATURES];
804 
805   unsigned sum;
806 
807   long i, j, n_features;
808   long old_file_size;
809   unsigned *new_doc_start, *current_doc, *k, *l;
810   unsigned out_of_class;
811   long found_duplicate, n_docs, n_docs_trained, current_doc_len;
812   long n_cycles, filesize_on_disk, soft_cycle_limit;
813   unsigned n_no_train;
814 
815   FILE *f;
816 
817   float *dWin, *dWhid, *dWout;
818 
819   float stoch_noise, internal_training_threshold;
820   double alpha = NN_DEFAULT_ALPHA;
821   double alphalocal;
822 
823   long this_doc_was_wrong;
824   double errmag, epoch_errmag;
825 
826   //     in_class_docs and out_class_docs are lookaside lists so
827   //     we can alternate training of in-class and out-of-class
828   //     examples, so huge batches are not a problem.
829   //      GROT GROT GROT this goes in later...  coded in .crm,
830   //      it works really well, but not coded into the C yet.
831   //
832   //  long in_class_docs[MAX_PATTERN], out_class_docs[MAX_PATTERN];
833   //  long icd, icd_max, ocd, ocd_max;
834 
835   long neuron, channel;
836 
837   if(internal_trace)
838     neural_trace = 1;
839 
840   if(neural_trace)
841     fprintf(stderr, "entered crm_neural_net_learn\n");
842 
843   //    Get the filename
844   crm_get_pgm_arg (htext, MAX_PATTERN, apb->p1start, apb->p1len);
845   htext_len = apb->p1len;
846   htext_len = crm_nexpandvar (htext, htext_len, MAX_PATTERN);
847 
848   i = 0;
849   while (htext[i] < 0x021) i++;
850   j = i;
851   while (htext[j] >= 0x021) j++;
852   htext[j] = '\000';
853   strcpy (filename, &htext[i]);
854 
855   //     Set a nominal epoch cycle limiter
856   //
857   soft_cycle_limit = NN_MAX_TRAINING_CYCLES;
858   if(apb->sflags & CRM_FROMSTART)    //  FROMSTART reinits the coeffs.
859     {
860       soft_cycle_limit = NN_MAX_TRAINING_CYCLES_FROMSTART;
861     }
862   else
863     {
864       soft_cycle_limit = NN_MAX_TRAINING_CYCLES;
865     };
866 
867   //    Get alpha, the stochastic noise level, and the cycle limit from
868   //      the s2 parameter
869   //
870   {
871     char s2_buf[MAX_PATTERN];
872     long s2_len;
873 
874     alpha = NN_DEFAULT_ALPHA;
875     stoch_noise = NN_DEFAULT_STOCH_NOISE;
876     internal_training_threshold = NN_INTERNAL_TRAINING_THRESHOLD;
877 
878     crm_get_pgm_arg (s2_buf, MAX_PATTERN, apb->s2start, apb->s2len);
879     s2_len = apb->s2len;
880     s2_len = crm_nexpandvar (s2_buf, s2_len, MAX_PATTERN);
881     if (s2_len > 0)
882       sscanf (s2_buf, "%lf %f %ld %f",
883 	      &alpha,
884 	      &stoch_noise,
885 	      &soft_cycle_limit,
886 	      &internal_training_threshold);
887     if (alpha < 0.0 ) alpha = NN_DEFAULT_ALPHA;
888     if (alpha < 0.000001 ) alpha = 0.000001;
889     if (alpha > 1.0) alpha = 1.0;
890     if (stoch_noise > 1.0) stoch_noise = 1.0;
891     if (stoch_noise < 0) stoch_noise = NN_DEFAULT_STOCH_NOISE;
892     if (soft_cycle_limit < 0)
893       soft_cycle_limit = NN_MAX_TRAINING_CYCLES_FROMSTART;
894 
895     //    fprintf (stderr, "**Alpha = %lf, noise = %f cycle limit = %ld\n",
896     //     alpha, stoch_noise, soft_cycle_limit);
897   }
898 
899   //    Convert the text form of the document into a bag of features
900   //    according to the parsing regex, and compute the checksum "sum".
901   //
902   n_features = eat_document ( txtptr + txtstart, txtlen, &i,
903                       &regee, bag, NN_MAX_FEATURES, apb->sflags, &sum);
904 
905   if (user_trace)
906     fprintf (stderr, "***total input features = %ld (1st 3:) %d %d %d \n\n",
907 	     n_features, bag[0], bag[1], bag[2]);
908 
909   //    Put the document's features into the file by appending to the
910   //    end of the file.  Note that we have a header - a 0 for normal
911   //    (in-class) learning, and a 1 for "not in class" learning, then
912   //    then a document checksum for fast finding of duplicate documents,
913   //    then a reserved number for how many times this hasn't needed training
914   //    (a speedup that is currently unused)
915   //
916 
917   found_duplicate = 0;
918 
919   i = stat (filename, &statbuf);
920 
921   if (i != 0)
922     {
923       //   Nope, file does not exist.  Make a new one.
924       if (user_trace)
925 	fprintf (stderr, "Making a new backing file: '%s'\n", filename);
926       make_new_backing_file(filename);
927       stat(filename, &statbuf);
928       if (neural_trace)
929 	fprintf (stderr, "Initial file size (just coeffs): %ld\n",
930 		 statbuf.st_size);
931     }
932 
933   //   The file now ought to exist.  Map it.
934   i = map_file (nn, filename);
935   if ( i < 0)
936     {
937       fatalerror5 ("Could not create the neural net backing file ",
938 		   filename, CRM_ENGINE_HERE);
939     };
940   //    Scan the file for documents with the matching checksum
941   //    so we can selectively forget / refute documents.
942   //      (GROT GROT GROT - I hate this method of refutation- it
943   //      depends on absolutely identical documents.  On the other
944   //      hand, it's how a NN can be told of negative examples, which
945   //      back-propagation needs. - wsy.  ).
946   //
947   //    This is admittedly ugly to map the file, scan it, unmap it,
948   //    write append to it, and map it again.  Bleah.... Yes, I know.
949 
950   found_duplicate = 0;
951   k = nn->docs_start;
952   for(k = nn->docs_start;
953       k < (nn->docs_end - 100)
954 	&& (k[0] == 0 || k[0] == 1) //   Find a start-of-doc sentinel
955 	&& k[2] != sum;             //  does the checksum match?
956       k++);
957 
958   if (k[2] == sum)
959     found_duplicate = 1;
960   //   Did we find a matching sum
961   if(found_duplicate)
962     {
963       k[1] = 0;
964       if (apb->sflags & CRM_REFUTE)
965 	{
966 	  if (neural_trace)
967 	    fprintf (stderr, "Marking this doc as out-of-class.\n");
968 	  k[1] = 1;
969 	};
970       if (apb->sflags & CRM_ABSENT)
971 	{
972 	  //   erasing something out of the
973 	  //   input set.   Cut it out of the
974 	  //   knowledge base.
975 	  unsigned *dest;
976 	  long len;
977 	  if (neural_trace)
978 	    fprintf (stderr, "Obliterating data...");
979 
980 	  dest = k;
981 	  //   Find the next zero sentinel
982 	  for(k += 3; *k; k++);
983 	  //   and copy from here to end of file over our
984 	  //    incorrectly attributed data, overwriting it.
985 	  len = (unsigned long) k - (unsigned long) dest;
986 	  if (neural_trace)
987 	    fprintf (stderr, "start %lx length %ld\n",
988 		     (unsigned long) dest, len);
989 	  for (; k< nn->docs_end; k++)
990 	    {
991 	      *dest = *k;
992 	      dest++;
993 	    };
994 	  //
995 	  //    now unmap the file, msync it, unmap it, and truncate
996 	  //    it.  Then remap it back in; viola- the bad data's gone.
997 	  force_unmap_file (nn, filename);
998 	  dontcare = truncate (filename, statbuf.st_size - len);
999 	  map_file (nn, filename);
1000 	  if (neural_trace)
1001 	    fprintf (stderr, "Dealt with a duplicate at %lx\n",
1002 		     (unsigned long) k);
1003 	}
1004       else
1005 	{
1006 	  if(neural_trace)
1007 	    fprintf(stderr, "***Learning same file twice.  W.T.F. \n");
1008 	};
1009     };
1010   retina_size = nn->retina_size;
1011 
1012   //   then this data file has never been seen before so append it and remap
1013   //
1014   if(! found_duplicate)
1015     {                         // adding to the old file
1016       if (neural_trace) fprintf (stderr, "Appending new data.\n");
1017       old_file_size = statbuf.st_size;
1018       if(neural_trace)
1019 	fprintf(stderr, "NN: new file data will start at offset %ld\n",
1020 		old_file_size);
1021 
1022       //   make sure that there's something to add.
1023       if(n_features < 0.5 )
1024 	{
1025 	  if(neural_trace)
1026 	    fprintf(stderr,
1027 		    "NN: Can't add a null example to the net.\n");
1028 	}
1029       else
1030 	{
1031 
1032 	  // unmap the file so we can write-append to it.
1033 	  force_unmap_file(nn, filename);
1034 	  f = fopen(filename, "ab+");
1035 	  out_of_class = ((apb->sflags & CRM_REFUTE) != 0);
1036 	  if(internal_trace || neural_trace)
1037 	    {
1038 	      fprintf (stderr, "Sense writing %u\n", out_of_class);
1039 	      if(out_of_class)
1040 		fprintf(stderr, "learning out of class\n");
1041 	      else
1042 		fprintf(stderr, "learning in class\n");
1043 	    }
1044 
1045 	  //     Offset 0 is the sentinel - always either 0 or 1
1046 	  //   Offset 0 -  Write 0 or 1 (in-class or out-class)
1047 	  dontcare = fwrite(&out_of_class, 1, sizeof(unsigned), f);
1048 
1049 	  //  Offset 1 - Write the number of passes without a
1050 	  //  retrain on this one
1051 	  n_no_train = 0; //number of times we went without training this guy
1052 	  dontcare = fwrite(&n_no_train, 1, sizeof(unsigned), f);
1053 
1054 	  //  Offset 2 - Write the sum of the document feature hashes.
1055 	  dontcare = fwrite(&sum, 1, sizeof(unsigned), f);   //hash of whole doc
1056 
1057 	  //       ALERT ALERT ALERT
1058 	  //    CHANGED to save the bag, _not_ the projection!
1059 	  //    This means we must project during training, however
1060 	  //    it also means we don't bust the CPU cache nearly as
1061 	  //    badly!
1062 	  dontcare = fwrite (bag, n_features, sizeof(unsigned), f);
1063 	  if (neural_trace)
1064 	    {
1065 	      fprintf (stderr,
1066 		       "Appending 3 marker unsigneds and %ld unsigneds of features\n",
1067 		       n_features);
1068 	      fprintf (stderr, "First three features are %d %d %d\n",
1069 		       bag[0], bag[1], bag[2]);
1070 	    }
1071 	  //    Close the file and we've now updated the disk image.
1072 	  fclose(f);
1073 	}
1074       stat(filename, &statbuf);
1075       filesize_on_disk = statbuf.st_size;
1076       if(neural_trace)
1077         fprintf(stderr, "NN: statted filesize is now %ld.\n", statbuf.st_size);
1078 
1079       //   MMap the file back into memory and fix up the nn->blah structs
1080       if(map_file(nn, filename))
1081 	{
1082 	  //nonfatalerror("Couldn't mmap file!", filename);
1083 	  return -1;
1084 	}
1085 
1086       if (neural_trace)
1087 	fprintf (stderr, "Neural network mapped at %lx\n", (long) nn);
1088 
1089       new_doc_start =
1090 	(unsigned *) ((char *)(nn->file_origin) + old_file_size);
1091 
1092       //    Print out a telltale to see if we're in the right place.
1093       if (neural_trace)
1094 	{
1095 	  fprintf (stderr, "\nAfter mmap, offsets of weight arrays"
1096 		   " are:\n   Win: %lx, Whid: %lx, Wout: %lx\n",
1097 		   (long)nn->Win, (long)nn->Whid, (long)nn->Wout);
1098 	  fprintf (stderr, "First weights (in/hid/out): %lf, %lf, %lf\n",
1099 		   nn->Win[0], nn->Whid[0], nn->Wout[0]);
1100 	  fprintf (stderr, "Weight ptrs from map start : "
1101 		   "Win: %lx, Whid: %lx, Wout: %lx\n",
1102 		   (long) ((void *)&nn->Win[0]),
1103 	           (long) ((void *)&nn->Whid[0]),
1104 		   (long) ((void *)&nn->Wout[0]));
1105 	  fprintf (stderr, "Weight ptrs (arefWxxx funcs: "
1106 		   "Win: %lx, Whid: %lx, Wout: %lx\n",
1107 		   (long) ((void *)arefWin(nn, 0, 0)),
1108 		   (long) ((void *)arefWhid(nn, 0, 0)),
1109 		   (long) ((void *)arefWout(nn, 0, 0)));
1110 	};
1111 
1112     }
1113   else
1114     new_doc_start = k;
1115 
1116   //    If we're in APPEND mode, we don't train yet.  So we're
1117   //    complete at this point and can return to the caller.
1118   //
1119   if(apb->sflags & CRM_APPEND)
1120     {
1121       if (neural_trace || internal_trace)
1122 	fprintf (stderr,
1123 		 "Append mode- exiting neural_learn without recalc\n");
1124       unmap_file (nn, filename);
1125       return -1;
1126     }
1127 
1128   //   Are we going from the absolute start?
1129   if (apb->sflags & CRM_FROMSTART)
1130       nuke(nn, 0);
1131 
1132 
1133   //   Tally the number of example documents in this file
1134   n_docs = 0;
1135   for(k = nn->docs_start; k < nn->docs_end; k++)
1136     {
1137       if (*k < 2)
1138       n_docs++;
1139     };
1140 
1141   if (user_trace)
1142     fprintf (stderr, "Total documents in file: %lu\n", n_docs);
1143 
1144   n_cycles = 0;
1145 
1146   n_docs_trained = 1;
1147 
1148 
1149   //    Malloc the internal allocations for back propagation.  These
1150   //    are the deltas used in the "blame game" of back-propagation itself.
1151   //
1152   dWin = malloc(sizeof(float) * nn->retina_size * nn->first_layer_size);
1153   dWhid = malloc(sizeof(float) * nn->first_layer_size * nn->hidden_layer_size);
1154   dWout = malloc(sizeof(float) * nn->hidden_layer_size * 2);
1155 
1156   //    Check - did we malloc successfully?
1157   if(!dWin || !dWhid || !dWout)
1158     {
1159       if(dWin)
1160 	free(dWin);
1161       if(dWhid)
1162 	free(dWhid);
1163       if(dWout)
1164 	free(dWout);
1165       unmap_file(nn, filename);
1166       fatalerror5("unable to malloc!", "Neural network 3", CRM_ENGINE_HERE);
1167       return -1;
1168     }
1169 
1170   alphalocal = alpha;
1171   //    Now the learning loop itself
1172   n_docs_trained = 1;
1173   while(n_cycles < soft_cycle_limit && n_docs_trained > 0)
1174     {
1175       float eic0, eic1, eoc0, eoc1;
1176       if (neural_trace)
1177 	fprintf (stderr, "\n\n**** Start of epoch %ld on net at %lx\n",
1178 		 n_cycles, (long)nn);
1179       if (user_trace)
1180 	fprintf (stderr, "\nE %4f%7ld ",
1181 		 alphalocal,
1182 		 n_cycles);
1183       n_docs_trained = 0;
1184       epoch_errmag = 0.0;
1185       errmag = 0.0;
1186 
1187 
1188       //    Zero out any accumulated delta errors, in the reverse
1189       //    (back-propagation) order, so we can template this code
1190       //    again for the actual backprop.
1191       //
1192       //       mid-to-out layer deltas first
1193       for( channel = 0; channel < nn->hidden_layer_size; channel++)
1194 	for (neuron = 0; neuron < 2; neuron++)
1195 	dWout[channel * 2 + neuron] = 0.0;
1196 
1197       //       in-to-mid deltas next
1198       for( channel = 0; channel < nn->first_layer_size; channel++)
1199 	for(neuron = 0; neuron < nn->hidden_layer_size; neuron++)
1200 	  dWhid[channel * nn->hidden_layer_size + neuron] = 0.0;
1201 
1202       //       Retina-to-in layer deltas last.
1203       for(channel = 0; channel < nn->retina_size; channel++)
1204 	for(neuron = 0; neuron < nn->first_layer_size; neuron++)
1205 	  dWin[channel * nn->first_layer_size + neuron] = 0.0;
1206 
1207       //    Add stochastic noise to the network... this happens
1208       //    only once per epoch no matter what the protocol...
1209       //
1210       if (stoch_noise > 0.0)
1211 	{
1212 	  //   First, noodge the output layer's input-side weights:
1213 	  for (neuron = 0; neuron < 2; neuron++)
1214 	    for(channel = 0; channel < nn->hidden_layer_size; channel++)
1215 	      {
1216 		*arefWout(nn, neuron, channel) +=
1217 		  stochastic_factor (stoch_noise, soft_cycle_limit, n_cycles);
1218 	      };
1219 
1220 	  //     Then for the hidden layer's input weights
1221 	  for (neuron = 0; neuron < nn->hidden_layer_size; neuron++)
1222 	    for(channel = 0; channel < nn->first_layer_size; channel++)
1223 	      {
1224 		*arefWhid(nn, neuron, channel) +=
1225 		  stochastic_factor (stoch_noise, soft_cycle_limit, n_cycles);
1226 	      };
1227 
1228 	  //     Finally, the input layer's input weights
1229 	  for (neuron = 0; neuron < nn->first_layer_size; neuron++)
1230 	    for(channel = 0; channel < nn->retina_size; channel++)
1231 	      {
1232 		*arefWin(nn, neuron, channel) +=
1233 		  stochastic_factor (stoch_noise, soft_cycle_limit, n_cycles);
1234 	      };
1235 	};
1236 
1237       //    "Track" the weights closer to zero.  This keeps unused
1238       //    weights from going haywire.  Note that the static magnitude
1239       //    of this is equal to the average stochastic noise.
1240       //
1241       if (stoch_noise > 0.0)
1242 	{
1243 	  //   First, noodge the output layer's input-side weights:
1244 	  for (neuron = 0; neuron < 2; neuron++)
1245 	    for(channel = 0; channel < nn->hidden_layer_size; channel++)
1246 	      {
1247 		*arefWout(nn, neuron, channel) *= NN_ZERO_TRACKING ;
1248 	      };
1249 
1250 	  //     Then for the hidden layer's input weights
1251 	  for (neuron = 0; neuron < nn->hidden_layer_size; neuron++)
1252 	    for(channel = 0; channel < nn->first_layer_size; channel++)
1253 	      {
1254 		*arefWhid(nn, neuron, channel) *= NN_ZERO_TRACKING ;
1255 	      };
1256 
1257 	  //     Finally, the input layer's input weights
1258 	  for (neuron = 0; neuron < nn->first_layer_size; neuron++)
1259 	    for(channel = 0; channel < nn->retina_size; channel++)
1260 	      {
1261 		*arefWin(nn, neuron, channel) *= NN_ZERO_TRACKING ;
1262 	      };
1263 	};
1264 
1265       //   Loop around and train each document.
1266       for(k = nn->docs_start; k < nn->docs_end;)  // the per-document loop
1267 	{
1268 	  //  Start on the 0/1 (in-class/out-of-class) sentinel
1269 	  out_of_class = k[0];
1270 	  if (out_of_class != 0 && out_of_class != 1)
1271 	    fatalerror5 ( "SYNCH ERROR in training data file: ",
1272 			  "filename",
1273 			  CRM_ENGINE_HERE);
1274 
1275 	  if (neural_trace)
1276 	    {
1277 	      fprintf (stderr, " Doc %lx out_of_class %u\n",
1278 		       (unsigned long int) k,
1279 		       out_of_class);
1280 	      fprintf (stderr,
1281 		       "First weights (in/hid/out): %lf, %lf, %lf\n",
1282 		       nn->Win[0], nn->Whid[0], nn->Wout[0]);
1283 	    };
1284 
1285 	  k++;
1286 	  //  save a pointer to the "OK this many times" counter
1287 	  l = k;
1288 
1289 	  k++;
1290 	  //   Now we point to the document checksum.
1291 	  //     Skip over the doc checksum
1292 	  k++;
1293 
1294 	  //      Now k is the first feature in the current document.
1295 	  //      Find end of the current doc
1296 	  current_doc = k;
1297 	  current_doc_len = 0;
1298 	  while (k < nn->docs_end && *k > 1)
1299 	    {
1300 	      k++;
1301 	      current_doc_len++;
1302 	    };
1303 	  //    k now points to the next document
1304 	  if (neural_trace)
1305 	    fprintf (stderr,
1306 		     "Current doc %lx class %ld len %lx next doc start %lx next sense %lx\n",
1307 		     (unsigned long) current_doc,
1308 		     (unsigned long) out_of_class,
1309 		     (unsigned long) current_doc_len,
1310 		     (unsigned long) k,
1311 		     (unsigned long) *k);
1312 
1313      	  //      Run the net on the document
1314 	  do_net(nn, current_doc, current_doc_len);
1315 	  if( neural_trace )
1316 	    fprintf(stderr, "-- doc @ %lx got network outputs of %f v. %f\n",
1317 		    (unsigned long)current_doc,
1318 		    nn->output_layer[0], nn->output_layer[1]);
1319 
1320 	  //       Keep track of the total error of this epoch.
1321 	  errmag = (! (out_of_class)) ?
1322 	    nn->output_layer[1] + (1 - nn->output_layer[0])
1323 	    :
1324 	    nn->output_layer[0] + (1 - nn->output_layer[1]);
1325 	  epoch_errmag += errmag;
1326 
1327 	  //    Now, the test- output channel 0 is "in class", channel
1328 	  //    1 is "not in class".  We see if the output of the
1329 	  //    net is acceptable here, and if not, backprop our errors.
1330 	  //
1331 	  //     Note that the SRI document says 'Always backprop every
1332 	  //     training sample on every training epoch."  Maybe that's a
1333 	  //     good idea, maybe not.  Maybe it causes overfitting.
1334 	  //     It's a good question either way.
1335 	  //
1336 	  //     These error terms are all cast such that a value less than
1337 	  //     zero is a "retrain" condition.
1338 	  eic0 = nn->output_layer[0] - (1.0 - internal_training_threshold);
1339 	  eoc0 = (internal_training_threshold) - nn->output_layer[0];
1340 	  eic1 = (internal_training_threshold) - nn->output_layer[1];
1341 	  eoc1 = nn->output_layer[1] - (1.0 - internal_training_threshold);
1342 
1343 	  //    Do the error bounds comparison
1344 	  this_doc_was_wrong = 0;
1345 	  if(
1346 	     ( ! out_of_class &&
1347 	       (( eic0 < 0 ) || ( eic1 < 0 ))
1348 	       )
1349 	     ||
1350 	     ( out_of_class &&
1351 	       (( eoc0 < 0 ) || (eoc1 < 0 ))
1352 	       )
1353 	     )
1354 	    {
1355 	      n_docs_trained ++;
1356 	      this_doc_was_wrong = 1;
1357 	      *l = 0;      // reset the "OK this many times" pointer.
1358 	    };
1359 
1360 	  if (user_trace)
1361 	  {
1362 	    if (this_doc_was_wrong)
1363 	      {
1364 		if ( ! out_of_class )
1365 		  {
1366 		    if (nn->output_layer[0] > nn->output_layer[1])
1367 		      fprintf (stderr, "+");
1368 		    else
1369 		      fprintf (stderr, "#");
1370 		  }
1371 		else
1372 		  {
1373 		    if (nn->output_layer[0] < nn->output_layer[1])
1374 		      fprintf (stderr, "x");
1375 		    else
1376 		      fprintf (stderr, "X");
1377 		  };
1378 	      }
1379 	    else
1380 	      {
1381 		fprintf (stderr, " ");
1382 	      };
1383 	  };
1384 	if (internal_trace)
1385 	  fprintf (stderr, " %6lf ", errmag);
1386 
1387 	//  Do we want to train everything, even if it's right?
1388 //#define TRAIN_ALWAYS 1
1389 #define TRAIN_ALWAYS 0
1390 
1391 
1392 	  if  (this_doc_was_wrong
1393 	       || TRAIN_ALWAYS
1394 	       || (!(apb->sflags & CRM_BYCHUNK) ))
1395       {
1396 	//    this block is the "actually train this document" section
1397 	//   If we got here, we're training, either because tne neural
1398 	//   net got it wrong, or because we're in TRAIN_ALWAYS.
1399 	//   So, it's time to run the ugly backprop.
1400 	//
1401 	if(neural_trace) fprintf(stderr, " Need backprop on doc at %lx\n",
1402 				 (long) current_doc );
1403 
1404 		//       Start setting the errors.  Work backwards, from the
1405 	//       known desired states.
1406 	//
1407 	//     Now, for a bit of tricky math.  One could train
1408 	//     with heuristics (a la the Winnow algorithm) or one could
1409 	//     train with a gradient descent algorithm.
1410 	//
1411 	//      Luckily, the gradient descent is actually quite simple;
1412 	//      it turns out the partial derivative of the logistic
1413 	//      function dL(x)/dx is just  (1-L(x)) * L(x) .  Plotting this
1414 	//      on a spreadsheet is quite illuminating - it ranges from
1415 	//      nearly zero near infinity to 0.25 at zero.
1416 	//
1417 	//   See http://www.speech.sri.com/people/anand/771/html/node37.html
1418 	//   for details of this math.  SRI: here means "from the SRI paper"
1419 	//
1420 	//     SRI: del_j = -(target - output)*(1 - output)*(output)
1421 	//
1422 #define GRADIENT_DESCENT
1423 #ifdef GRADIENT_DESCENT
1424 	if (neural_trace)
1425 	  fprintf (stderr, "Generating error deltas for output layer\n");
1426 
1427 	//  1.0 and 0.0 are the desired final layer outputs for "in class".
1428 	nn->delta_output_layer[0] = 1.0;
1429 	nn->delta_output_layer[1] = 0.0;
1430 	//     out-of-class gets them reversed
1431 	if (out_of_class)
1432 	  {
1433 	    nn->delta_output_layer[0] = 0.0;
1434 	    nn->delta_output_layer[1] = 1.0;
1435 	  };
1436 
1437 	if (neural_trace)
1438 	  fprintf (stderr, "Output layer desired values: %lf, %lf\n",
1439 		   nn->delta_output_layer[0],
1440 		   nn->delta_output_layer[1]);
1441 
1442 
1443 	//    Now generate the values "in place" from the desired values
1444 	//    MAYBE TYPO in the SRI document- it says this is
1445 	//    SRI: -del_k = (t_j - o_j) (1 - o_j) o_j) and it is unclear if
1446 	//    the minus sign should _not_ be there.
1447 	//
1448 	//    However, a spreadsheet calculation of the actual values for
1449 	//    the derivative of the logistic function show that in fact
1450 	//    del_k is positive for positive errors (that is, when the
1451 	//    desired value is higher than the output value, del_k
1452 	//    should be positive for when the desired output is greater
1453 	//    than the actual output)
1454 
1455 	//   Del_k on the output layer first
1456 	//
1457 	for (neuron = 0; neuron < 2; neuron++)
1458 	  nn->delta_output_layer[neuron] =
1459 	    (
1460 	     ( nn->delta_output_layer[neuron]            //  target - actual
1461 	       -  nn->output_layer[neuron] )
1462 	     * (1.0 - nn->output_layer[neuron])         // 1 - actual
1463 	     * (nn->output_layer[neuron])               // actual
1464 	      );
1465 
1466 	if (neural_trace)
1467 	  fprintf (stderr, "Output layer output error delta-sub_j: %lf, %lf\n",
1468 		   nn->delta_output_layer[0],
1469 		   nn->delta_output_layer[1]);
1470 
1471 	//       Now we calculate the delta weight we want to apply to the
1472 	//       middle layer's inputs, again, this is "in place".
1473 	//
1474 	//    The corrected del_j formula for hidden and input neurons is:
1475 	//     del_j = o_j (1 - o_j) SUM (del_k * w_kj)
1476 	//     (where the sum is over all neurons downstream of neuron j)
1477 	if (neural_trace)
1478 	  fprintf (stderr, "Doing the hidden layer.\n");
1479 
1480 	if (neural_trace)
1481 	  fprintf (stderr, "Now doing the sum of the del_k * w_kj\n");
1482 
1483 	//     Initialize the hidden layer deltas
1484 	for (neuron = 0; neuron < nn->hidden_layer_size; neuron++)
1485 	  nn->delta_hidden_layer[neuron] = 0;
1486 
1487 	//    Calculate the SUM del_k * w_kj "in place"
1488 	//
1489 	//    Start with the error term (which is del_k of the output
1490 	//    node times the weight coupling the hidden layer to that
1491 	//    output node).  This is the error for a middle layer.
1492 	for (neuron = 0; neuron < nn->hidden_layer_size; neuron++)
1493 	  for (channel = 0; channel < 2; channel++)
1494 	    nn->delta_hidden_layer[neuron] +=
1495 	      nn->delta_output_layer[channel]
1496 	      * (*arefWout(nn, channel, neuron ));
1497 
1498 	//     Now multiply by the o_j * (1-o_j) term
1499 	for (neuron = 0; neuron < nn->hidden_layer_size; neuron++)
1500 	  nn->delta_hidden_layer[neuron] =
1501 	    (nn->delta_hidden_layer[neuron]
1502 	     * nn->hidden_layer[neuron]
1503 	     * (1.0 - nn->hidden_layer[neuron]));
1504 
1505 	if (neural_trace)
1506 	  fprintf (stderr, "First three hidden delta_subj's: %lf %lf %lf\n",
1507 		   nn->delta_hidden_layer[0],
1508 		   nn->delta_hidden_layer[1],
1509 		   nn->delta_hidden_layer[2]);
1510 
1511 
1512 	//   Do the calculation for the retina-to-input layer.  Same
1513 	//    deal as before.
1514 	//
1515 	//    The corrected del_j formula for hidden and input neurons is:
1516 	//     del_j = o_j (1 - o_j) SUM (del_k * w_kj)
1517 	//     (where the sum is over all neurons downstream of neuron j)
1518 	if (neural_trace)
1519 	  fprintf (stderr, "Doing the input layer.\n");
1520 
1521 	if (neural_trace)
1522 	  fprintf (stderr, "Now doing the sum of the del_k * w_kj\n");
1523 
1524 	//     Initialize the input layer deltas
1525 	for (neuron = 0; neuron < nn->first_layer_size; neuron++)
1526 	  nn->delta_first_layer[neuron] = 0;
1527 
1528 	//    Calculate the SUM  "in place" (the error mag term)
1529 	for (neuron = 0; neuron < nn->first_layer_size; neuron++)
1530 	  for (channel = 0; channel < nn->hidden_layer_size; channel++)
1531 	    nn->delta_first_layer[neuron] +=
1532 	      nn->delta_hidden_layer[neuron]
1533 	      * (*arefWhid(nn, channel, neuron));
1534 
1535 	//     Now multiply by the o_j * (1-)_j) term
1536 	for (neuron = 0; neuron < nn->first_layer_size; neuron++)
1537 	  nn->delta_first_layer[neuron] =
1538 	    ( nn->delta_first_layer[neuron]
1539 	      * nn->first_layer[neuron]
1540 	      * (1.0 - nn->first_layer[neuron]));
1541 
1542        	if (neural_trace)
1543 	  fprintf (stderr, "First three input delta_subj's: %lf %lf %lf\n",
1544 		   nn->delta_first_layer[0],
1545 		   nn->delta_first_layer[1],
1546 		   nn->delta_first_layer[2]);
1547 
1548 #endif	// GRADIENT_DESCENT
1549 
1550 	//    The SRI document suggests that the right thing to do
1551 	//    is to update after each training example is calculated.
1552 	//    So, we'll try that.  :)   RESULT: It works fine for _one_
1553 	//    document, but not multiple documents tend to oscillate
1554 	//    back and forth.  Conclusion: sometimes you need to sum
1555 	//    up the deltas and error terms.
1556 	//
1557 	//
1558 	//   We have the del_k's all calculated for all three
1559 	//    layers, and we can use them to calculate the desired
1560 	//     change in weights for each layer.
1561 	//      These weight changes do not take place immediately;
1562 	//       we might do them at the end of a training epoch.
1563 
1564 	alphalocal = alpha * (1.0 + gain_noise_factor());
1565 
1566 	//  Are we training after each document?
1567 	if (apb->sflags & CRM_BYCHUNK)
1568 	  {
1569 
1570 	    //   First, the output layer's input-side weights:
1571 	    for (neuron = 0; neuron < 2; neuron++)
1572 	      for(channel = 0; channel < nn->hidden_layer_size; channel++)
1573 		{
1574 		  *arefWout(nn, neuron, channel) =
1575 		    *arefWout (nn, neuron, channel)
1576 		    + ( alphalocal
1577 			* nn->delta_output_layer[neuron]
1578 			* nn->hidden_layer[channel]);
1579 		};
1580 
1581 	    //     Then for the hidden layer's input weights
1582 	    for (neuron = 0; neuron < nn->hidden_layer_size; neuron++)
1583 	      for(channel = 0; channel < nn->first_layer_size; channel++)
1584 		{
1585 		  *arefWhid(nn, neuron, channel) =
1586 		    *arefWhid (nn, neuron, channel)
1587 		    + ( alphalocal
1588 			* nn->delta_hidden_layer[neuron]
1589 			* nn->first_layer[channel]);
1590 		};
1591 
1592 	    //     Finally, the input layer's input weights
1593 	    for (neuron = 0; neuron < nn->first_layer_size; neuron++)
1594 	      for(channel = 0; channel < nn->retina_size; channel++)
1595 		{
1596 		  *arefWin(nn, neuron, channel) =
1597 		    *arefWin (nn, neuron, channel)
1598 		    + ( alphalocal
1599 			* nn->delta_first_layer[neuron]
1600 			* nn->retina[channel]);
1601 		};
1602 	  }
1603 	else
1604 	  //  Are we doing all docs, then training on the sum?
1605 	  {
1606 
1607 	    //   First, the output layer's input-side weights:
1608 	    for (neuron = 0; neuron < 2; neuron++)
1609 	      for(channel = 0; channel < nn->hidden_layer_size; channel++)
1610 		{
1611 		  dWout[channel*2 + neuron] +=
1612 		    ( alphalocal
1613 		      * nn->delta_output_layer[neuron]
1614 		      * nn->hidden_layer[channel]);
1615 		};
1616 
1617 	    //     Then for the hidden layer's input weights
1618 	    for (neuron = 0; neuron < nn->hidden_layer_size; neuron++)
1619 	      for(channel = 0; channel < nn->first_layer_size; channel++)
1620 		{
1621 		  dWhid [channel * nn->hidden_layer_size + neuron] +=
1622 		    ( alphalocal
1623 		      * nn->delta_hidden_layer[neuron]
1624 		      * nn->first_layer[channel]);
1625 		};
1626 
1627 	    //     Finally, the input layer's input weights
1628 	    for (neuron = 0; neuron < nn->first_layer_size; neuron++)
1629 	      for(channel = 0; channel < nn->retina_size; channel++)
1630 		{
1631 		  dWin [ channel * nn->first_layer_size + neuron] +=
1632 		    ( alphalocal
1633 		      * nn->delta_first_layer[neuron]
1634 		      *  nn->retina[channel]);
1635 		};
1636 	  };
1637 
1638       }       // end of train this document
1639 	  else
1640 	    {
1641 	      if(neural_trace)
1642 		fprintf(stderr, "Doc %lx / %u OK, looks good, not trained\n",
1643 			(long unsigned) current_doc, out_of_class);
1644 	    };      //      END OF THE PER-DOCUMENT LOOP
1645 
1646 	  if (neural_trace)
1647 	    fprintf (stderr,
1648 		     "Moving to next document at k=%lx to docs_end= %lx\n",
1649 		     (long int)k, (long int)nn->docs_end);
1650 	};
1651       n_cycles++;
1652       if (neural_trace)
1653 	fprintf (stderr, "All documents processed.\n"
1654 		 "Now first weight coeffs: %lf %lf, %lf\n",
1655 		 nn->Win[0], nn->Whid[0], nn->Wout[0]);
1656 
1657       //    If we're _not_ doing immediate-per-document training...
1658       if (! (apb->sflags & CRM_BYCHUNK))
1659 	{
1660 
1661 	  if (neural_trace)
1662 	    fprintf (stderr, "putting accumulated deltas out.\n");
1663 
1664 	  //   First, the output layer's input-side weights:
1665 	  for (neuron = 0; neuron < 2; neuron++)
1666 	    for(channel = 0; channel < nn->hidden_layer_size; channel++)
1667 	      {
1668 		*arefWout(nn, neuron, channel) +=
1669 		  dWout [ channel * 2 + neuron ];
1670 	      };
1671 
1672 	  //     Then for the hidden layer's input weights
1673 	  for (neuron = 0; neuron < nn->hidden_layer_size; neuron++)
1674 	    for(channel = 0; channel < nn->first_layer_size; channel++)
1675 	      {
1676 		*arefWhid(nn, neuron, channel) +=
1677 		  dWhid [channel * nn->hidden_layer_size + neuron];
1678 	      };
1679 
1680 	  //     Finally, the input layer's input weights
1681 	  for (neuron = 0; neuron < nn->first_layer_size; neuron++)
1682 	    for(channel = 0; channel < nn->retina_size; channel++)
1683 	      {
1684 		*arefWin(nn, neuron, channel) +=
1685 		  dWin[ channel * nn->first_layer_size + neuron];
1686 	      };
1687 	};
1688 
1689       if (user_trace)
1690 	fprintf (stderr, "    %6lf", epoch_errmag);
1691 
1692       if (neural_trace)
1693 	fprintf (stderr, "End of epoch;\n"
1694 		 "after training first weight coeffs: %lf %lf, %lf",
1695 		 nn->Win[0], nn->Whid[0], nn->Wout[0]);
1696 
1697       //   Sometimes, it's better to dropkick.
1698       if (n_cycles % NN_FROMSTART_PUNTING == 0)
1699       	{
1700 	  fprintf (stderr, "punt...");
1701       	  nuke (nn, 0);
1702       	};
1703 
1704     };        //    End of training epoch loop - back to linear code
1705 
1706 
1707   free(dWin);
1708   free(dWhid);
1709   free(dWout);
1710 
1711   //  if(n_cycles >= soft_cycle_limit)
1712   //  nonfatalerror5 ( "neural: failed to converge within the training limit.  ",
1713   //			 "Beware your results.\n You might want to consider"
1714   //		     " a larger network as well.", CRM_ENGINE_HERE);
1715 
1716   if(neural_trace)
1717     fprintf(stderr, "\nn_cycles = %ld\n", n_cycles);
1718 
1719   //    Do we microgroom?
1720   //    GROT GROT GROT don't do this yet - this code isn't right.
1721   //    GROT GROT GROT
1722   if(apb->sflags & CRM_MICROGROOM & 0)
1723     {
1724       int trunced = 0;
1725       for(k = nn->docs_start; k < nn->docs_end;)
1726 	{
1727 	  current_doc = k;
1728 	  k += 3;
1729 	  while(*k++);
1730 	  if(current_doc[1] > NN_MICROGROOM_THRESHOLD)
1731 	    {
1732 	      memmove(current_doc, k, sizeof(unsigned long) * (nn->docs_end - k));
1733 	      nn->docs_end -= k - current_doc;
1734 	      k = current_doc;
1735 	      trunced = sizeof(char)
1736 		* ( (char *)(nn->docs_end) - (char *)(nn->file_origin) );
1737 	      n_docs--;
1738 	    }
1739 	}
1740       if(trunced)
1741 	{
1742 	  crm_force_munmap_filename(filename);
1743 	  dontcare = truncate(filename, trunced);
1744 	  if(neural_trace)
1745 	    fprintf(stderr, "\nleaving neural net learn after truncating"
1746 		    ", n_docs = %ld\n", n_docs);
1747 	  return 0;
1748 	}
1749     }
1750 
1751   unmap_file(nn, filename);
1752 
1753   if(neural_trace)
1754     fprintf(stderr, "\nleaving neural net learn"
1755 	    ", n_docs = %ld\n", n_docs);
1756   return 0;
1757 }
1758 
crm_neural_net_classify(CSL_CELL * csl,ARGPARSE_BLOCK * apb,char * txtptr,long txtstart,long txtlen)1759 int crm_neural_net_classify (
1760 			     CSL_CELL *csl, ARGPARSE_BLOCK *apb,
1761 			     char *txtptr, long txtstart, long txtlen   )
1762 {
1763   char filenames_field[MAX_PATTERN];
1764   long filenames_field_len;
1765   char filenames[MAX_CLASSIFIERS][MAX_FILE_NAME_LEN];
1766 
1767   NEURAL_NET_STRUCT NN, *nn = &NN;
1768 
1769   regex_t regee;
1770 
1771   unsigned bag[NN_MAX_FEATURES], sum;
1772   long baglen;
1773 
1774   long i, j, k, n, n_classifiers, out_pos;
1775   long fail_on = MAX_CLASSIFIERS;
1776 
1777   float output[MAX_CLASSIFIERS][2];
1778   long example_length [MAX_CLASSIFIERS];
1779   double total_icnr, total_ocnr;
1780   double p[MAX_CLASSIFIERS], pR[MAX_CLASSIFIERS], suc_p, suc_pR, tot;
1781 
1782   char out_var[MAX_PATTERN];
1783   long out_var_len;
1784 
1785 
1786   n = 0;
1787 
1788   if(internal_trace)
1789     neural_trace = 1;
1790 
1791   if(neural_trace)
1792     fprintf(stderr, "entered crm_neural_net_classify\n");
1793 
1794 
1795   //grab filenames field
1796   crm_get_pgm_arg (filenames_field, MAX_PATTERN, apb->p1start, apb->p1len);
1797   filenames_field_len = apb->p1len;
1798   filenames_field_len =
1799     crm_nexpandvar(filenames_field, filenames_field_len, MAX_PATTERN);
1800 
1801   //grab output variable name
1802   crm_get_pgm_arg (out_var, MAX_PATTERN, apb->p2start, apb->p2len);
1803   out_var_len = apb->p2len;
1804   out_var_len = crm_nexpandvar (out_var, out_var_len, MAX_PATTERN);
1805 
1806   //a tiny automata for your troubles to grab the names of our classifier files
1807     // and figure out what side of the "|" they're on, hey Bill, why isn't
1808     // this in the stringy stuff?
1809   for(i = 0, j = 0, k = 0; i < filenames_field_len && j < MAX_CLASSIFIERS; i++)
1810     if(filenames_field[i] == '\\') //allow escaped in case filename is wierd
1811       filenames[j][k++] = filenames_field[++i];
1812     else if(isspace(filenames_field[i]) && k > 0)
1813       {   //white space terminates filenames
1814 	filenames[j][k] = '\0';
1815 	k = 0;
1816 	j++;
1817       }
1818     else if(filenames_field[i] == '|')
1819       { //found the bar, terminate filename if we're in one
1820 	if(k > 0)
1821 	  {
1822 	    k = 0;
1823 	    j++;
1824 	  }
1825 	fail_on = j;
1826       }
1827     else if(isgraph(filenames_field[i])) //just copy char otherwise
1828       filenames[j][k++] = filenames_field[i];
1829 
1830 
1831   if(j < MAX_CLASSIFIERS)
1832     filenames[j][k] = '\0';
1833   if(k > 0)
1834     n_classifiers = j + 1;
1835   else
1836     n_classifiers = j;
1837 
1838   if(fail_on > n_classifiers)
1839     fail_on = n_classifiers;
1840 
1841   if(neural_trace)
1842     {
1843       fprintf(stderr, "fail_on = %ld\n", fail_on);
1844       for(i = 0; i < n_classifiers; i++)
1845 	fprintf(stderr, "filenames[%ld] = %s\n", i, filenames[i]);
1846     };
1847 
1848   baglen = eat_document( txtptr + txtstart, txtlen, &j,
1849 		    &regee, bag, NN_MAX_FEATURES, apb->sflags, &sum);
1850 
1851   //loop over classifiers and calc scores
1852   for(i = 0; i < n_classifiers; i++)
1853     {
1854       if(map_file(nn, filenames[i]))
1855         {
1856           nonfatalerror("Couldn't mmap file!", filenames[i]);
1857           output[i][0] = 0.5;
1858           output[i][1] = 0.5;
1859           continue;
1860         }
1861       //   ***  now we do projection in real time because it's smaller!
1862       //     this allows different sized nn's to be compared.
1863       // ret = malloc( sizeof(unsigned long) * nn->retina_size);
1864       // rn = project_features(bag, n, ret, nn->retina_size);
1865       example_length[i] = nn->docs_end - nn->docs_start;
1866       do_net(nn, bag, baglen);
1867       output[i][0] = nn->output_layer[0];
1868       output[i][1] = nn->output_layer[1];
1869       // free(ret);
1870       unmap_file(nn, filenames[i]);
1871       if(neural_trace)
1872 	fprintf(stderr, "Network outputs on file %s:\t%f\t%f\n",
1873 		filenames[i], output[i][0], output[i][1]);
1874     }
1875 
1876   //    Calculate the winners and losers.
1877 
1878   //    Get total activation output for all of the networks first:
1879   tot = 0.0;
1880   for(i = 0; i < n_classifiers; i++)
1881     {
1882       //    Normalize the classifiers to a [0...1] range
1883       tot += p[i] = 0.5 * (1.0 + output[i][0] - output[i][1]);
1884     }
1885 
1886   if(neural_trace)
1887     fprintf(stderr, "tot = %f\n", tot);
1888 
1889   for(i = 0; i < n_classifiers; i++)
1890     {
1891       //     response fraction of this class.
1892       p[i] /= tot;
1893       if(neural_trace)
1894 	fprintf(stderr, "p[%ld] = %f (normalized)\n", i, p[i]);
1895     }
1896 
1897   //      Find the one "best matching" network
1898   j = 0;
1899   for(i = 1; i < n_classifiers; i++)
1900     if( p[i] > p[j])
1901       j = i;
1902 
1903   //      Find the overall winning side - success, or fail
1904   suc_p = 0.0;
1905   for(i = 0; i < fail_on; i++)
1906     suc_p += p[i];
1907 
1908   //      Calculate pR's for all of the classifiers.
1909   total_icnr = 0;
1910   total_ocnr = 0;
1911   for(i = 0; i < n_classifiers; i++)
1912     {
1913       pR[i] = get_pR2 (output[i][0], output[i][1]);
1914       total_icnr += i < fail_on ? output[i][0] : output[i][1];
1915       total_ocnr += i < fail_on ? output[i][1] : output[i][0];
1916     };
1917   //      renormalize total responses
1918   total_icnr = total_icnr / n_classifiers;
1919   total_ocnr = total_ocnr / n_classifiers;
1920 
1921   suc_pR = get_pR2 (total_icnr, total_ocnr);
1922   out_pos = 0;
1923 
1924   if(suc_p > 0.5 ) //test for nan as well
1925     out_pos += sprintf
1926       ( outbuf + out_pos,
1927 	"CLASSIFY succeeds; success probability: %f  pR: %6.4f\n",
1928 	suc_p, suc_pR);
1929   else
1930     out_pos += sprintf
1931       ( outbuf + out_pos,
1932 	"CLASSIFY fails; success probability: %f  pR: %6.4f\n",
1933 	suc_p, suc_pR);
1934 
1935   out_pos += sprintf
1936     ( outbuf + out_pos,
1937       "Best match to file #%ld (%s) prob: %6.4f  pR: %6.4f  \n",
1938       j,
1939       filenames[j],
1940       p[j], pR[j]);
1941 
1942   out_pos += sprintf
1943     ( outbuf + out_pos,
1944       "Total features in input file: %ld\n",
1945       baglen  );
1946 
1947   for(i = 0; i < n_classifiers; i++)
1948     out_pos += sprintf
1949       ( outbuf + out_pos,
1950 	"#%ld (%s): feats: %ld ic: %3.2f oc: %3.2f prob: %3.2e, pR: %6.2f\n",
1951 	i, filenames[i], example_length[i],
1952 	output [i][0], output[i][1], p[i], pR[i] );
1953 
1954   if(out_var_len)
1955     crm_destructive_alter_nvariable(out_var, out_var_len, outbuf, out_pos);
1956 
1957   if (suc_p <= 0.5)
1958     {
1959       csl->cstmt = csl->mct[csl->cstmt]->fail_index - 1;
1960       csl->aliusstk [csl->mct[csl->cstmt]->nest_level] = -1;
1961     }
1962   return 0;
1963 }
1964