1 /** @file perceptron.h
2 
3   @brief CPerceptronNN header - Perceptron Neural Network
4 
5   @author Jakub Ad�mek
6   Last modified $Id: perceptron.h,v 1.21 2002/04/23 15:49:41 jakubadamek Exp $
7 */
8 
9 #ifndef __BANG_FFNN_H__
10 #define __BANG_FFNN_H__
11 
12 #include "math.h"
13 #include "time.h"
14 #include "traindata.h"
15 #include "percstruct.h"
16 
17 //#include "../../base/bing.h"
18 
19 /// abstract neuron transfer function
20 typedef double TFloat2FloatFn (double,double);
21 
22 //template class TVector <CFloat, TVectorFloat_label_>;
23 
24 /**	+ tf... are neuron transfer functions,
25 	+ wif... are weight initialize functions */
26 
27 static const int fnsCount = 3;
28 
29 struct TFloat2FloatFns {
30 	/// "linear" : linear transfer function
tfLinearTFloat2FloatFns31 	static double tfLinear (double n, double) { return n; }
tfLinearDerivTFloat2FloatFns32 	static double tfLinearDeriv (double, double) { return 1; }
33 	/// "logsig" : logical sigmoid transfer function
tfLogSigTFloat2FloatFns34 	static double tfLogSig (double n, double lambda) { return 1.0 / (1.0 + exp (-n*lambda)); }
tfLogSigDerivTFloat2FloatFns35 	static double tfLogSigDeriv (double n, double lambda) { return lambda * n * (1 - n); }
36 	/// "tanh" : hyperbolical tangent
tfTanHTFloat2FloatFns37 	static double tfTanH (double n, double) { return 2 / (1 + exp(-2*n)) - 1; }
tfTanHDerivTFloat2FloatFns38 	static double tfTanHDeriv (double n, double) { return 1 - sqr (n); }
39 };
40 
41 /// perceptron neuron
STRUCT_BEGIN(TPerceptron)42 STRUCT_BEGIN (TPerceptron)
43 public:
44 	void Init()	{ error=0; output=0; layerSize=0; inputCount=0; layer=-1;
45 			  transferFn=-1; lambda=1; }
46 	/// accumulates error over all trained data
47 	CFloat error;
48 	/// current output (during calculation)
49 	CFloat output;
50 	/// the second transfer fn parameter
51 	CFloat lambda;
52 	/// partial derivation of Ek
53 	CFloat partialEk;
54 	CInt transferFn;
55 	/// returns transfer Fn applied on output
56 	inline double transfer ();
57 	/// returns transfer function derivation needed in backpropagation
58 	inline double transferDeriv ();
59 	CInt layerSize;
60 	/// count of incoming connections including bias
61 	CInt inputCount;
62 	CInt layer;
63 	/// just for debugging reason - index in the neurons vector
64 	CInt index;
65 STRUCT_END (TPerceptron)
66 VECTOR (TPerceptron, TVectorPerceptron);
67 VECTOR (TVectorPerceptron, TSeriesNeurons);
68 
69 STRUCT_BEGIN (TConIndex)
70 public:
71 	/// neuron from which the connection goes
72 	CInt start;
73 	/// neuron to which it goes
74 	CInt end;
75 	/// is it a recurrent connection?
76 	CInt recurrent;
77 	bool operator > (const TConIndex &y) const { return end > y.end; }
78 	bool operator < (const TConIndex &y) const { return end < y.end; }
79 STRUCT_END (TConIndex)
80 
81 SET (TConIndex, TConIndexes)
82 
83 /** @brief Encapsulates all the functionality of Perceptron Neural Network
84 
85 	Neurons are indexed over layers, 0..n1-1 in the 1st layer, n1..n1+n2-1 in the 2nd etc.
86 	Biases are considered as weights from neuron -1, with output value = 1.
87 */
88 
89 STRUCT_BEGIN_FROM (CPerceptronNN, TPerceptronStructure)
90 public:
91 	friend class PerceptronNN;
92 
Init()93 	void Init()	{ weights=NULL; gradient=NULL; oldGradient=NULL; D=NULL; oldD=NULL;
94 			  pushGoalInterval = 0; runWeightsPerturbation=0; }
95 
96 	/// this gives the possibility for bias (neuron -1) to be included
STRUCT_BEGIN(TPerceptrons)97 	STRUCT_BEGIN (TPerceptrons)
98 		TVectorPerceptron data;
99 		TPerceptron & operator [] (int i)	{ return data[i+1]; }
100 		const TPerceptron & operator [] (int i) const { return data[i+1]; }
size()101 		int size() const		{ return data.size() - 1; }
102 	STRUCT_END (TPerceptrons)
103 
104 	/// all neurons including inputs (although inputs don't have transferFn etc.)
105 	TPerceptrons neurons;
106 
107 	/// used to store previous neuron output values when counting time series with recurrent network
108 	TSeriesNeurons seriesNeurons;
109 
110 	// attribute for all connections - e.g. weights. Includes a fictive connection to bias.
STRUCT_BEGIN(TConnectionsAttribs)111 	STRUCT_BEGIN (TConnectionsAttribs)
112 		void Init()	{ name=""; permanent=false; }
113 		/// name which identifies this attrib
114 		CString name;
115 		/// data in one vector over all connections
116 		TSmallVectorFloat d;
117 		/** bool - should be saved to file? If it takes a long time to calculate it,
118 			the attrib should be permanent, otherwise not. */
119 		CInt permanent;
120 	STRUCT_END (TConnectionsAttribs)
121 
122 	VECTORC (TConnectionsAttribs, TConAttribs);
123 	TConAttribs conAttribs;
124 
125 	TConIndexes conIndexes;
126 
127 	TConAttribs::iterator
128 		weights,
129 		/** weights on which the error was lowest - when trained with eval set,
130 			error of eval set is taken */
131 		bestWeights,
132 		gradient,
133 		/// not necessary in all algorithms
134 		oldGradient,
135 		/// stable conjugate gradient
136 		D, oldD;
137 
getWeights()138 	const TSmallVectorFloat & getWeights () const { return weights->d; }
139 
140 	/** main function - runs the whole training procedure
141 		return which goal was met */
142 	TLearningGoal train (CTrainingData *, const CString &filename);
143 	/** number of milliseconds between two goal writes */
144 	CInt pushGoalInterval;
145 
146 	/// Creates and sets neurons, uses initSeriesNeurons().
147 	void initNeurons ();
148 	/// creates and sets connections attributes (weights etc.)
149 	void initConAttribs ();
150 	CXml * printWeights (const CString &filename) const;
151 	/** Reads weights from <stream> tag, returns error description or empty string.
152 		The filename of the config file is used to get the file path. */
153 	CString readWeights (CRox &stream, const CString &filename);
154 	/// initializes weights
155 	void weightInit ();
156 	/// initializes weights by Nguyen-Widrow - called from weightInit()
157 	void weightInitNguyenWidrow();
158 
159 	/// initializes the weights with 1 or 0 indicating whether connection exists
160 	void processRecurrentConnections();
161 
162 	/// runs given inputs through network and accumulates error
163 	void runNetwork (const TSmallVectorFloat &inputs, int irow = -1);
164 	/** runs the error back propagation to calculate the error function first derivatives */
165 	void backPropagate (const TSmallVectorFloat &outputs, int irow = -1);
166 	/** Runs the network on all input data (batch training) and calculates gradient
167 		by backpropagation. Works well with recurrent networks. */
168 	void calculateGradient (const CTrainingData *);
169 
170 	/// copies neuron outputs to the given outputs
171 	void copyOutputs (TSmallVectorFloat &outputs) const;
172 	/// calculates all outputs and overwrites output in traindata, dumps the traindata
173 	void calculateOutputs (CTrainingData &trData, CString fileName = "");
174 
175 	void putWeights (bang_ofstream & str);
176 
177 	/** Returns: sum of errors on output neurons.
178 		If trData supplied, iterates through all data from given set first.
179 		If postprocessed==true, postprocesses the error back to the magnitude of the source data */
180 	double getError (const CTrainingData * trData = NULL, CInt set=ST_TRAIN, bool postprocessed = false);
181 
182 //private:
183 	/// index of first output layer neuron
184 	int outOffset;
185 	/// error sizes
186 	TVectorFloat epochError;
187 
188 	/** Iterates through all connections. Parameters:
189 		inStart - index of neuron in which connection starts
190 		inEnd   - dtto ends
191 		con	- index of connection
192 
193 	    Usage:
194 	    + int inStart, inEnd=-1, con;
195 	    + while (next_connection) do_something;
196 
197 	    You are ensured that all weights for inEnd are evaluated in a row,
198 	    i.e. inEnd grows steadily neuron by neuron.
199 
200 	    This will run terrible lots of times - make it as quickly as possible!
201 	*/
202 	inline bool next_connection (TPerceptron *&nStart, TPerceptron *&nEnd, int &con, TVectorPerceptron *neu = NULL);
203 
204 	/** Like next_connection, you are ensured inEnd decreases steadily neuron by neuron. */
205 	inline bool prev_connection (TPerceptron *&nStart, TPerceptron *&nEnd, int &con, TVectorPerceptron *neu = NULL);
206 
207 	inline bool next_connection_recurrent (TPerceptron *&nStart, TPerceptron *&nEnd, int &con, TVectorPerceptron *neu);
208 	inline bool prev_connection_recurrent (TPerceptron *&nStart, TPerceptron *&nEnd, int &con, TVectorPerceptron *neu);
209 
210 	/** Returns "con" - index of connection between given neurons */
211 	inline int getConnection (int inStart, int inEnd);
212 
213 	/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
214 		           LEARNING ALGORITHMS
215 	 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
216 
217 	/// special powerful algorithm by Stefan Rueger
218 	void stableConjugateGradient (int epoch, const CTrainingData *);
219 	/// Levenberg-Marquardt with modified line search by Croatian team
220 	void modifiedLevenbergMarquardt (int epoch, const CTrainingData *trData);
221 	/// the simple basic gradient descent with learning rate only
222 	void gradientDescent (const CTrainingData *trData);
223 	void gradientDescent_recurrent (const CTrainingData *trData);
224 	/** This is a simple numerical technique to calculate gradient. It is implemented here just
225 		to proove correctness of the much more efficient backPropagation technique.
226 		Returns difference between this way computed gradient and the current gradient
227 		(which should be counted before by backPropagation) */
228 	double weightsPerturbation (const CTrainingData *trData);
229 
230 	/** If the trData parameter is given, the training data configuration is added as a subtag */
231 	virtual CXml *print_all (const CString &filename, CTrainingData *trData = NULL) const;
232 	/** If the trData parameter is given, the training data configuration is read from a subtag */
233 	virtual CString read_all (CRox *xml, const CString &filename, CTrainingData *trData = NULL);
234 
235 	CString start (CTrainingData *trData, const CString &filename);
236 	int stopCriterion ();
237 	CString nextIteration (CTrainingData *trData, const CString &filename);
238 	void end(CTrainingData *trData);
239 
240 private:
241 	/// Jacobian matrix and error vector used in MLM method
242 	CMatrix<double> J, e;
243 	double Emin;
244 	int iData;
245 
246 	time_t startTime;
247 	int lastBackupTime, lastGoalWrite;
248 	double bestError, currentError;
249 	bang_ofstream errorLog;
250 	/// local variables used in iterations
251 	TLearningGoal goalState;
252 	CString filename;
STRUCT_END_FROM(CPerceptronNN,TPerceptronStructure)253 STRUCT_END_FROM(CPerceptronNN,TPerceptronStructure)
254 
255 /** Connections are organized this way:
256 	[x,y] means connection x->y
257 	For CR_FEEDFORWARD - consider network with 62 inputs, 20 hidden units in 1st layer, 10 in 2nd, 8 outputs
258 	[-1,62][0,62]..[61,62] (63 connections)
259 	[-1,63][0,63]..[61,63] (63)
260 	...		       (20 times 63 cons)
261 	[-1,82][62,82][63,82]..[81,82] (21 connections)
262 	[-1,83][62,83]..[81,83] (21)
263 	...			(10 times 21 cons)
264 	[-1,92][82,92]..[91,92] (11)
265 	...
266 	[-1,99][82,99]..[91,99] (8 times 11 cons)
267 */
268 
269 inline int CPerceptronNN::getConnection (int inStart, int inEnd)
270 {
271 	return 0;
272 	if ((int)conRestrict == CR_LAYERED) {
273 		int con = 0;
274 		int layer = neurons[inEnd].layer;
275 		bool bias = inStart == -1;
276 		int ilayer;
277 		for (ilayer=1; ilayer < layer; ++ilayer) {
278 			con += (int)layerSizes[ilayer] * ((int)layerSizes[ilayer-1]+1);
279 			inStart -= (int)layerSizes[ilayer-1];
280 			inEnd -= (int)layerSizes[ilayer-1];
281 		}
282 		inEnd -= layerSizes[ilayer-1];
283 		if (bias) con += inEnd * ((int)layerSizes[ilayer-1]+1);
284 		else con += inEnd * ((int)layerSizes[ilayer-1]+1) + inStart + 1;
285 		return con;
286 	}
287 	else if ((int)conRestrict == CR_NONE || conRestrict == CR_LAYERED)
288 		return (inEnd - layerSizes[0]) * (neurons.size()+1) + inStart + 1;
289 	return 0;
290 }
291 
292 // "neu" are the neurons stored from previous input in the same series
next_connection(TPerceptron * & nStart,TPerceptron * & nEnd,int & con,TVectorPerceptron * neu)293 inline bool CPerceptronNN::next_connection (TPerceptron *&nStart, TPerceptron *&nEnd, int &con, TVectorPerceptron *neu)
294 {
295 	static int wCount, inputCount;
296 
297 	if (nEnd == NULL) {
298 		con = -1;
299 		wCount = weights->d.size();
300 		inputCount = layerSizes[0];
301 	}
302 
303 	while (++con < wCount && weights->d[con] == 0);
304 	if (con == wCount) return false;
305 
306 	nEnd = &neurons [conIndexes[con].end];
307 	if (neu != NULL && conIndexes[con].recurrent)
308 		nStart = &(*neu) [conIndexes[con].start - inputCount];
309 	else nStart = &neurons [conIndexes[con].start];
310 	return true;
311 }
312 
313 
prev_connection(TPerceptron * & nStart,TPerceptron * & nEnd,int & con,TVectorPerceptron * neu)314 inline bool CPerceptronNN::prev_connection (TPerceptron *&nStart, TPerceptron *&nEnd, int &con, TVectorPerceptron *neu)
315 {
316 	static int inputCount;
317 
318 	if (nEnd == NULL) {
319 		con = weights->d.size();
320 		inputCount = layerSizes[0];
321 	}
322 
323 	while (--con >= 0 && weights->d[con] == 0);
324 	if (con == -1) return false;
325 
326 	nEnd = &neurons [conIndexes[con].end];
327 	if (neu != NULL && conIndexes[con].recurrent)
328 		nStart = &(*neu) [conIndexes[con].start - inputCount];
329 	else nStart = &neurons [conIndexes[con].start];
330 	return true;
331 }
332 
333 
334 /*
335 
336 inline bool CPerceptronNN::next_connection (TPerceptron *&nStart, TPerceptron *&nEnd, int &con, TVectorPerceptron *neu)
337 {
338 	static int _conCount, inStart, inEnd;
339 	static int maxStart;
340 	static TVectorInt layerStarts;
341 
342 	if (conRestrict == CR_NONE || conRestrict == CR_LAYERED)
343 		return next_connection_recurrent (nStart, nEnd, con, neu);
344 	else if ((int)conRestrict == CR_LAYERED) {
345 		// initialize
346 		if (nEnd == NULL) {
347 			layerStarts.Realloc (layerSizes.size());
348 			_conCount = weights->d.size();
349 			con = 0;
350 			layerStarts[0] = 0;
351 			for (int ilayer=1; ilayer < layerSizes.size(); ++ilayer)
352 				layerStarts[ilayer] = layerStarts[ilayer-1] + layerSizes[ilayer-1];
353 			inStart = -1;
354 			inEnd = layerSizes[0];
355 			maxStart = layerStarts[neurons[inEnd].layer]-1;
356 			nStart = &neurons[-1];
357 			nEnd = &neurons[inEnd];
358 			return true;
359 		}
360 
361 		if (++con == _conCount)
362 			return false;
363 
364 		if (inStart == -1)
365 			inStart = (int)layerStarts[neurons[inEnd].layer-1];
366 		else if (++inStart > maxStart) {
367 			inStart = -1;
368 			nEnd = &neurons[++inEnd];
369 			maxStart = (int)layerStarts[nEnd->layer]-1;
370 		}
371 		nStart = &neurons[inStart];
372 		return true;
373 	}
374 
375 	return false;
376 }
377 
378 inline bool CPerceptronNN::prev_connection (TPerceptron *&nStart, TPerceptron *&nEnd, int &con, TVectorPerceptron *neu)
379 {
380 	static int conCount, inStart, inEnd;
381 	static int minStart;
382 	static TVectorInt layerStarts;
383 
384 	if (conRestrict == CR_NONE || conRestrict == CR_LAYERED)
385 		return prev_connection_recurrent (nStart, nEnd, con, neu);
386 	if ((int)conRestrict == CR_LAYERED) {
387 		// initialize
388 		if (nEnd == NULL) {
389 			layerStarts.Realloc (layerSizes.size());
390 			layerStarts[0] = 0;
391 			for (int ilayer=1; ilayer < layerSizes.size(); ++ilayer)
392 				layerStarts[ilayer] = (int)layerStarts[ilayer-1] + (int)layerSizes[ilayer-1];
393 			conCount = weights->d.size();
394 			con = conCount-1;
395 			inEnd = neurons.size()-1;
396 			inStart = (int)layerStarts[neurons[inEnd].layer]-1;
397 			minStart = (int)layerStarts[neurons[inStart].layer];
398 			nStart = &neurons[inStart];
399 			nEnd = &neurons[inEnd];
400 			return true;
401 		}
402 
403 		if (--con < 0)
404 			return false;
405 
406 		if (inStart == -1) {
407 			nEnd = &neurons[--inEnd];
408 			inStart = (int)layerStarts[nEnd->layer]-1;
409 			minStart = (int)layerStarts[neurons[inStart].layer];
410 		}
411 		else if (--inStart < minStart)
412 			inStart = -1;
413 		nStart = &neurons[inStart];
414 		return true;
415 	}
416 
417 	return false;
418 }
419 
420 
421 // "neu" are the neurons stored from previous input in the same series
422 inline bool CPerceptronNN::next_connection_recurrent (TPerceptron *&nStart, TPerceptron *&nEnd, int &con, TVectorPerceptron *neu)
423 {
424 	static int inputCount, neuCount;
425 
426 	if (nEnd == NULL) {
427 		con = -1;
428 		inputCount = layerSizes[0];
429 		neuCount = neurons.size();
430 	}
431 
432 	while (++con < weights->d.size() && weights->d[con] == 0);
433 	if (con == weights->d.size()) return false;
434 
435 	int inEnd = con / (neuCount+1) + inputCount;
436 	int inStart = con % (neuCount+1) - 1;
437 
438 	nEnd = &neurons[inEnd];
439 	nStart = &neurons[inStart];
440 	if (inStart != -1 && nStart->layer != nEnd->layer - 1 && neu != NULL)
441 		nStart = &(*neu)[inStart - layerSizes[0]];
442 	return true;
443 }
444 
445 
446 inline bool CPerceptronNN::prev_connection_recurrent (TPerceptron *&nStart, TPerceptron *&nEnd, int &con, TVectorPerceptron *neu)
447 {
448 	static int inputCount, neuCount;
449 
450 	if (nEnd == NULL) {
451 		con = weights->d.size();
452 		inputCount = layerSizes[0];
453 		neuCount = neurons.size();
454 	}
455 
456 	while (--con >= 0 && weights->d[con] == 0);
457 	if (con == -1) return false;
458 
459 	int inEnd = con / (neuCount+1) + inputCount;
460 	int inStart = con % (neuCount+1) - 1;
461 
462 	nEnd = &neurons[inEnd];
463 	nStart = &neurons[inStart];
464 	if (inStart != -1 && nStart->layer != nEnd->layer - 1 && neu != NULL)
465 		nStart = &(*neu)[inStart - layerSizes[0]];
466 	return true;
467 }
468 */
transfer()469 inline double TPerceptron::transfer ()
470 {
471 	switch((int)transferFn) {
472 	case TF_LINEAR: return output;
473 	case TF_LOGSIG: return 1.0 / (1.0 + exp (-output*lambda));
474 	case TF_TANSIG: return 2 / (1 + exp(-2*output)) - 1;
475 	default: assert (false); return 0;
476 	}
477 }
478 
transferDeriv()479 inline double TPerceptron::transferDeriv ()
480 {
481 	switch((int)transferFn) {
482 	case TF_LINEAR: return 1;
483 	case TF_LOGSIG: return lambda * output * (1 - output);
484 	case TF_TANSIG: return 1 - sqr (output);
485 	default: assert (false); return 0;
486 	}
487 }
488 
489 #endif
490 
491 
492