1 /** @file perceptron.h
2
3 @brief CPerceptronNN header - Perceptron Neural Network
4
5 @author Jakub Ad�mek
6 Last modified $Id: perceptron.h,v 1.21 2002/04/23 15:49:41 jakubadamek Exp $
7 */
8
9 #ifndef __BANG_FFNN_H__
10 #define __BANG_FFNN_H__
11
12 #include "math.h"
13 #include "time.h"
14 #include "traindata.h"
15 #include "percstruct.h"
16
17 //#include "../../base/bing.h"
18
19 /// abstract neuron transfer function
20 typedef double TFloat2FloatFn (double,double);
21
22 //template class TVector <CFloat, TVectorFloat_label_>;
23
24 /** + tf... are neuron transfer functions,
25 + wif... are weight initialize functions */
26
27 static const int fnsCount = 3;
28
29 struct TFloat2FloatFns {
30 /// "linear" : linear transfer function
tfLinearTFloat2FloatFns31 static double tfLinear (double n, double) { return n; }
tfLinearDerivTFloat2FloatFns32 static double tfLinearDeriv (double, double) { return 1; }
33 /// "logsig" : logical sigmoid transfer function
tfLogSigTFloat2FloatFns34 static double tfLogSig (double n, double lambda) { return 1.0 / (1.0 + exp (-n*lambda)); }
tfLogSigDerivTFloat2FloatFns35 static double tfLogSigDeriv (double n, double lambda) { return lambda * n * (1 - n); }
36 /// "tanh" : hyperbolical tangent
tfTanHTFloat2FloatFns37 static double tfTanH (double n, double) { return 2 / (1 + exp(-2*n)) - 1; }
tfTanHDerivTFloat2FloatFns38 static double tfTanHDeriv (double n, double) { return 1 - sqr (n); }
39 };
40
41 /// perceptron neuron
STRUCT_BEGIN(TPerceptron)42 STRUCT_BEGIN (TPerceptron)
43 public:
44 void Init() { error=0; output=0; layerSize=0; inputCount=0; layer=-1;
45 transferFn=-1; lambda=1; }
46 /// accumulates error over all trained data
47 CFloat error;
48 /// current output (during calculation)
49 CFloat output;
50 /// the second transfer fn parameter
51 CFloat lambda;
52 /// partial derivation of Ek
53 CFloat partialEk;
54 CInt transferFn;
55 /// returns transfer Fn applied on output
56 inline double transfer ();
57 /// returns transfer function derivation needed in backpropagation
58 inline double transferDeriv ();
59 CInt layerSize;
60 /// count of incoming connections including bias
61 CInt inputCount;
62 CInt layer;
63 /// just for debugging reason - index in the neurons vector
64 CInt index;
65 STRUCT_END (TPerceptron)
66 VECTOR (TPerceptron, TVectorPerceptron);
67 VECTOR (TVectorPerceptron, TSeriesNeurons);
68
69 STRUCT_BEGIN (TConIndex)
70 public:
71 /// neuron from which the connection goes
72 CInt start;
73 /// neuron to which it goes
74 CInt end;
75 /// is it a recurrent connection?
76 CInt recurrent;
77 bool operator > (const TConIndex &y) const { return end > y.end; }
78 bool operator < (const TConIndex &y) const { return end < y.end; }
79 STRUCT_END (TConIndex)
80
81 SET (TConIndex, TConIndexes)
82
83 /** @brief Encapsulates all the functionality of Perceptron Neural Network
84
85 Neurons are indexed over layers, 0..n1-1 in the 1st layer, n1..n1+n2-1 in the 2nd etc.
86 Biases are considered as weights from neuron -1, with output value = 1.
87 */
88
89 STRUCT_BEGIN_FROM (CPerceptronNN, TPerceptronStructure)
90 public:
91 friend class PerceptronNN;
92
Init()93 void Init() { weights=NULL; gradient=NULL; oldGradient=NULL; D=NULL; oldD=NULL;
94 pushGoalInterval = 0; runWeightsPerturbation=0; }
95
96 /// this gives the possibility for bias (neuron -1) to be included
STRUCT_BEGIN(TPerceptrons)97 STRUCT_BEGIN (TPerceptrons)
98 TVectorPerceptron data;
99 TPerceptron & operator [] (int i) { return data[i+1]; }
100 const TPerceptron & operator [] (int i) const { return data[i+1]; }
size()101 int size() const { return data.size() - 1; }
102 STRUCT_END (TPerceptrons)
103
104 /// all neurons including inputs (although inputs don't have transferFn etc.)
105 TPerceptrons neurons;
106
107 /// used to store previous neuron output values when counting time series with recurrent network
108 TSeriesNeurons seriesNeurons;
109
110 // attribute for all connections - e.g. weights. Includes a fictive connection to bias.
STRUCT_BEGIN(TConnectionsAttribs)111 STRUCT_BEGIN (TConnectionsAttribs)
112 void Init() { name=""; permanent=false; }
113 /// name which identifies this attrib
114 CString name;
115 /// data in one vector over all connections
116 TSmallVectorFloat d;
117 /** bool - should be saved to file? If it takes a long time to calculate it,
118 the attrib should be permanent, otherwise not. */
119 CInt permanent;
120 STRUCT_END (TConnectionsAttribs)
121
122 VECTORC (TConnectionsAttribs, TConAttribs);
123 TConAttribs conAttribs;
124
125 TConIndexes conIndexes;
126
127 TConAttribs::iterator
128 weights,
129 /** weights on which the error was lowest - when trained with eval set,
130 error of eval set is taken */
131 bestWeights,
132 gradient,
133 /// not necessary in all algorithms
134 oldGradient,
135 /// stable conjugate gradient
136 D, oldD;
137
getWeights()138 const TSmallVectorFloat & getWeights () const { return weights->d; }
139
140 /** main function - runs the whole training procedure
141 return which goal was met */
142 TLearningGoal train (CTrainingData *, const CString &filename);
143 /** number of milliseconds between two goal writes */
144 CInt pushGoalInterval;
145
146 /// Creates and sets neurons, uses initSeriesNeurons().
147 void initNeurons ();
148 /// creates and sets connections attributes (weights etc.)
149 void initConAttribs ();
150 CXml * printWeights (const CString &filename) const;
151 /** Reads weights from <stream> tag, returns error description or empty string.
152 The filename of the config file is used to get the file path. */
153 CString readWeights (CRox &stream, const CString &filename);
154 /// initializes weights
155 void weightInit ();
156 /// initializes weights by Nguyen-Widrow - called from weightInit()
157 void weightInitNguyenWidrow();
158
159 /// initializes the weights with 1 or 0 indicating whether connection exists
160 void processRecurrentConnections();
161
162 /// runs given inputs through network and accumulates error
163 void runNetwork (const TSmallVectorFloat &inputs, int irow = -1);
164 /** runs the error back propagation to calculate the error function first derivatives */
165 void backPropagate (const TSmallVectorFloat &outputs, int irow = -1);
166 /** Runs the network on all input data (batch training) and calculates gradient
167 by backpropagation. Works well with recurrent networks. */
168 void calculateGradient (const CTrainingData *);
169
170 /// copies neuron outputs to the given outputs
171 void copyOutputs (TSmallVectorFloat &outputs) const;
172 /// calculates all outputs and overwrites output in traindata, dumps the traindata
173 void calculateOutputs (CTrainingData &trData, CString fileName = "");
174
175 void putWeights (bang_ofstream & str);
176
177 /** Returns: sum of errors on output neurons.
178 If trData supplied, iterates through all data from given set first.
179 If postprocessed==true, postprocesses the error back to the magnitude of the source data */
180 double getError (const CTrainingData * trData = NULL, CInt set=ST_TRAIN, bool postprocessed = false);
181
182 //private:
183 /// index of first output layer neuron
184 int outOffset;
185 /// error sizes
186 TVectorFloat epochError;
187
188 /** Iterates through all connections. Parameters:
189 inStart - index of neuron in which connection starts
190 inEnd - dtto ends
191 con - index of connection
192
193 Usage:
194 + int inStart, inEnd=-1, con;
195 + while (next_connection) do_something;
196
197 You are ensured that all weights for inEnd are evaluated in a row,
198 i.e. inEnd grows steadily neuron by neuron.
199
200 This will run terrible lots of times - make it as quickly as possible!
201 */
202 inline bool next_connection (TPerceptron *&nStart, TPerceptron *&nEnd, int &con, TVectorPerceptron *neu = NULL);
203
204 /** Like next_connection, you are ensured inEnd decreases steadily neuron by neuron. */
205 inline bool prev_connection (TPerceptron *&nStart, TPerceptron *&nEnd, int &con, TVectorPerceptron *neu = NULL);
206
207 inline bool next_connection_recurrent (TPerceptron *&nStart, TPerceptron *&nEnd, int &con, TVectorPerceptron *neu);
208 inline bool prev_connection_recurrent (TPerceptron *&nStart, TPerceptron *&nEnd, int &con, TVectorPerceptron *neu);
209
210 /** Returns "con" - index of connection between given neurons */
211 inline int getConnection (int inStart, int inEnd);
212
213 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
214 LEARNING ALGORITHMS
215 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
216
217 /// special powerful algorithm by Stefan Rueger
218 void stableConjugateGradient (int epoch, const CTrainingData *);
219 /// Levenberg-Marquardt with modified line search by Croatian team
220 void modifiedLevenbergMarquardt (int epoch, const CTrainingData *trData);
221 /// the simple basic gradient descent with learning rate only
222 void gradientDescent (const CTrainingData *trData);
223 void gradientDescent_recurrent (const CTrainingData *trData);
224 /** This is a simple numerical technique to calculate gradient. It is implemented here just
225 to proove correctness of the much more efficient backPropagation technique.
226 Returns difference between this way computed gradient and the current gradient
227 (which should be counted before by backPropagation) */
228 double weightsPerturbation (const CTrainingData *trData);
229
230 /** If the trData parameter is given, the training data configuration is added as a subtag */
231 virtual CXml *print_all (const CString &filename, CTrainingData *trData = NULL) const;
232 /** If the trData parameter is given, the training data configuration is read from a subtag */
233 virtual CString read_all (CRox *xml, const CString &filename, CTrainingData *trData = NULL);
234
235 CString start (CTrainingData *trData, const CString &filename);
236 int stopCriterion ();
237 CString nextIteration (CTrainingData *trData, const CString &filename);
238 void end(CTrainingData *trData);
239
240 private:
241 /// Jacobian matrix and error vector used in MLM method
242 CMatrix<double> J, e;
243 double Emin;
244 int iData;
245
246 time_t startTime;
247 int lastBackupTime, lastGoalWrite;
248 double bestError, currentError;
249 bang_ofstream errorLog;
250 /// local variables used in iterations
251 TLearningGoal goalState;
252 CString filename;
STRUCT_END_FROM(CPerceptronNN,TPerceptronStructure)253 STRUCT_END_FROM(CPerceptronNN,TPerceptronStructure)
254
255 /** Connections are organized this way:
256 [x,y] means connection x->y
257 For CR_FEEDFORWARD - consider network with 62 inputs, 20 hidden units in 1st layer, 10 in 2nd, 8 outputs
258 [-1,62][0,62]..[61,62] (63 connections)
259 [-1,63][0,63]..[61,63] (63)
260 ... (20 times 63 cons)
261 [-1,82][62,82][63,82]..[81,82] (21 connections)
262 [-1,83][62,83]..[81,83] (21)
263 ... (10 times 21 cons)
264 [-1,92][82,92]..[91,92] (11)
265 ...
266 [-1,99][82,99]..[91,99] (8 times 11 cons)
267 */
268
269 inline int CPerceptronNN::getConnection (int inStart, int inEnd)
270 {
271 return 0;
272 if ((int)conRestrict == CR_LAYERED) {
273 int con = 0;
274 int layer = neurons[inEnd].layer;
275 bool bias = inStart == -1;
276 int ilayer;
277 for (ilayer=1; ilayer < layer; ++ilayer) {
278 con += (int)layerSizes[ilayer] * ((int)layerSizes[ilayer-1]+1);
279 inStart -= (int)layerSizes[ilayer-1];
280 inEnd -= (int)layerSizes[ilayer-1];
281 }
282 inEnd -= layerSizes[ilayer-1];
283 if (bias) con += inEnd * ((int)layerSizes[ilayer-1]+1);
284 else con += inEnd * ((int)layerSizes[ilayer-1]+1) + inStart + 1;
285 return con;
286 }
287 else if ((int)conRestrict == CR_NONE || conRestrict == CR_LAYERED)
288 return (inEnd - layerSizes[0]) * (neurons.size()+1) + inStart + 1;
289 return 0;
290 }
291
292 // "neu" are the neurons stored from previous input in the same series
next_connection(TPerceptron * & nStart,TPerceptron * & nEnd,int & con,TVectorPerceptron * neu)293 inline bool CPerceptronNN::next_connection (TPerceptron *&nStart, TPerceptron *&nEnd, int &con, TVectorPerceptron *neu)
294 {
295 static int wCount, inputCount;
296
297 if (nEnd == NULL) {
298 con = -1;
299 wCount = weights->d.size();
300 inputCount = layerSizes[0];
301 }
302
303 while (++con < wCount && weights->d[con] == 0);
304 if (con == wCount) return false;
305
306 nEnd = &neurons [conIndexes[con].end];
307 if (neu != NULL && conIndexes[con].recurrent)
308 nStart = &(*neu) [conIndexes[con].start - inputCount];
309 else nStart = &neurons [conIndexes[con].start];
310 return true;
311 }
312
313
prev_connection(TPerceptron * & nStart,TPerceptron * & nEnd,int & con,TVectorPerceptron * neu)314 inline bool CPerceptronNN::prev_connection (TPerceptron *&nStart, TPerceptron *&nEnd, int &con, TVectorPerceptron *neu)
315 {
316 static int inputCount;
317
318 if (nEnd == NULL) {
319 con = weights->d.size();
320 inputCount = layerSizes[0];
321 }
322
323 while (--con >= 0 && weights->d[con] == 0);
324 if (con == -1) return false;
325
326 nEnd = &neurons [conIndexes[con].end];
327 if (neu != NULL && conIndexes[con].recurrent)
328 nStart = &(*neu) [conIndexes[con].start - inputCount];
329 else nStart = &neurons [conIndexes[con].start];
330 return true;
331 }
332
333
334 /*
335
336 inline bool CPerceptronNN::next_connection (TPerceptron *&nStart, TPerceptron *&nEnd, int &con, TVectorPerceptron *neu)
337 {
338 static int _conCount, inStart, inEnd;
339 static int maxStart;
340 static TVectorInt layerStarts;
341
342 if (conRestrict == CR_NONE || conRestrict == CR_LAYERED)
343 return next_connection_recurrent (nStart, nEnd, con, neu);
344 else if ((int)conRestrict == CR_LAYERED) {
345 // initialize
346 if (nEnd == NULL) {
347 layerStarts.Realloc (layerSizes.size());
348 _conCount = weights->d.size();
349 con = 0;
350 layerStarts[0] = 0;
351 for (int ilayer=1; ilayer < layerSizes.size(); ++ilayer)
352 layerStarts[ilayer] = layerStarts[ilayer-1] + layerSizes[ilayer-1];
353 inStart = -1;
354 inEnd = layerSizes[0];
355 maxStart = layerStarts[neurons[inEnd].layer]-1;
356 nStart = &neurons[-1];
357 nEnd = &neurons[inEnd];
358 return true;
359 }
360
361 if (++con == _conCount)
362 return false;
363
364 if (inStart == -1)
365 inStart = (int)layerStarts[neurons[inEnd].layer-1];
366 else if (++inStart > maxStart) {
367 inStart = -1;
368 nEnd = &neurons[++inEnd];
369 maxStart = (int)layerStarts[nEnd->layer]-1;
370 }
371 nStart = &neurons[inStart];
372 return true;
373 }
374
375 return false;
376 }
377
378 inline bool CPerceptronNN::prev_connection (TPerceptron *&nStart, TPerceptron *&nEnd, int &con, TVectorPerceptron *neu)
379 {
380 static int conCount, inStart, inEnd;
381 static int minStart;
382 static TVectorInt layerStarts;
383
384 if (conRestrict == CR_NONE || conRestrict == CR_LAYERED)
385 return prev_connection_recurrent (nStart, nEnd, con, neu);
386 if ((int)conRestrict == CR_LAYERED) {
387 // initialize
388 if (nEnd == NULL) {
389 layerStarts.Realloc (layerSizes.size());
390 layerStarts[0] = 0;
391 for (int ilayer=1; ilayer < layerSizes.size(); ++ilayer)
392 layerStarts[ilayer] = (int)layerStarts[ilayer-1] + (int)layerSizes[ilayer-1];
393 conCount = weights->d.size();
394 con = conCount-1;
395 inEnd = neurons.size()-1;
396 inStart = (int)layerStarts[neurons[inEnd].layer]-1;
397 minStart = (int)layerStarts[neurons[inStart].layer];
398 nStart = &neurons[inStart];
399 nEnd = &neurons[inEnd];
400 return true;
401 }
402
403 if (--con < 0)
404 return false;
405
406 if (inStart == -1) {
407 nEnd = &neurons[--inEnd];
408 inStart = (int)layerStarts[nEnd->layer]-1;
409 minStart = (int)layerStarts[neurons[inStart].layer];
410 }
411 else if (--inStart < minStart)
412 inStart = -1;
413 nStart = &neurons[inStart];
414 return true;
415 }
416
417 return false;
418 }
419
420
421 // "neu" are the neurons stored from previous input in the same series
422 inline bool CPerceptronNN::next_connection_recurrent (TPerceptron *&nStart, TPerceptron *&nEnd, int &con, TVectorPerceptron *neu)
423 {
424 static int inputCount, neuCount;
425
426 if (nEnd == NULL) {
427 con = -1;
428 inputCount = layerSizes[0];
429 neuCount = neurons.size();
430 }
431
432 while (++con < weights->d.size() && weights->d[con] == 0);
433 if (con == weights->d.size()) return false;
434
435 int inEnd = con / (neuCount+1) + inputCount;
436 int inStart = con % (neuCount+1) - 1;
437
438 nEnd = &neurons[inEnd];
439 nStart = &neurons[inStart];
440 if (inStart != -1 && nStart->layer != nEnd->layer - 1 && neu != NULL)
441 nStart = &(*neu)[inStart - layerSizes[0]];
442 return true;
443 }
444
445
446 inline bool CPerceptronNN::prev_connection_recurrent (TPerceptron *&nStart, TPerceptron *&nEnd, int &con, TVectorPerceptron *neu)
447 {
448 static int inputCount, neuCount;
449
450 if (nEnd == NULL) {
451 con = weights->d.size();
452 inputCount = layerSizes[0];
453 neuCount = neurons.size();
454 }
455
456 while (--con >= 0 && weights->d[con] == 0);
457 if (con == -1) return false;
458
459 int inEnd = con / (neuCount+1) + inputCount;
460 int inStart = con % (neuCount+1) - 1;
461
462 nEnd = &neurons[inEnd];
463 nStart = &neurons[inStart];
464 if (inStart != -1 && nStart->layer != nEnd->layer - 1 && neu != NULL)
465 nStart = &(*neu)[inStart - layerSizes[0]];
466 return true;
467 }
468 */
transfer()469 inline double TPerceptron::transfer ()
470 {
471 switch((int)transferFn) {
472 case TF_LINEAR: return output;
473 case TF_LOGSIG: return 1.0 / (1.0 + exp (-output*lambda));
474 case TF_TANSIG: return 2 / (1 + exp(-2*output)) - 1;
475 default: assert (false); return 0;
476 }
477 }
478
transferDeriv()479 inline double TPerceptron::transferDeriv ()
480 {
481 switch((int)transferFn) {
482 case TF_LINEAR: return 1;
483 case TF_LOGSIG: return lambda * output * (1 - output);
484 case TF_TANSIG: return 1 - sqr (output);
485 default: assert (false); return 0;
486 }
487 }
488
489 #endif
490
491
492