1 /*
2  *  Open BEAGLE
3  *  Copyright (C) 2001-2007 by Christian Gagne and Marc Parizeau
4  *
5  *  This library is free software; you can redistribute it and/or
6  *  modify it under the terms of the GNU Lesser General Public
7  *  License as published by the Free Software Foundation; either
8  *  version 2.1 of the License, or (at your option) any later version.
9  *
10  *  This library is distributed in the hope that it will be useful,
11  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  *  Lesser General Public License for more details.
14  *
15  *  You should have received a copy of the GNU Lesser General Public
16  *  License along with this library; if not, write to the Free Software
17  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
18  *
19  *  Contact:
20  *  Laboratoire de Vision et Systemes Numeriques
21  *  Departement de genie electrique et de genie informatique
22  *  Universite Laval, Quebec, Canada, G1K 7P4
23  *  http://vision.gel.ulaval.ca
24  *
25  */
26 
27 /*!
28  *  \file   SpambaseEvalOp.hpp
29  *  \brief  Definition of the type SpambaseEvalOp.
30  *  \author Christian Gagne
31  *  \author Marc Parizeau
32  *  $Revision: 1.7.2.1 $
33  *  $Date: 2007/05/09 01:51:24 $
34  */
35 
36 /*!
37  *  \defgroup Spambase Spambase Example
38  *  \brief SPAM e-mail database (spambase): Machine learning using strongly-typed GP
39  *     with Open BEAGLE.
40  *
41  *  \par Objective
42  *  Find a program the will successfully predict whether a given e-mail is spam
43  *  or not from some extracted features.
44  *
45  *  \par Comments
46  *  The evolved programs works on floating-point values AND Booleans values.
47  *  The programs must return a Boolean value which must be true if e-mail is
48  *  spam, and false otherwise. Don't expect too much from this program as
49  *  it is quite basic and not oriented toward performance. It is there mainly
50  *  to illustrate the use of strongly-typed GP with Open BEAGLE.
51  *
52  *  \par Terminal set
53  *  - IN0, IN1, ...  up to IN56, the e-mail features.      [floating-point]
54  *  - 0 and 1, two Boolean constants.                      [Boolean]
55  *  - Ephemeral constants randomly generated in $[0,100]$  [floating-point]
56  *
57  *  \par Function set
58  *  - AND               [Inputs: Booleans,        Output: Boolean]
59  *  - OR                [Input:  Boolean,         Output: Boolean]
60  *  - NOT               [Inputs: Booleans,        Output: Boolean]
61  *  - +                 [Inputs: floating-points, Output: floating-point]
62  *  - -                 [Inputs: floating-points, Output: floating-point]
63  *  - *                 [Inputs: floating-points, Output: floating-point]
64  *  - /                 [Inputs: floating-points, Output: floating-point]
65  *  - <                 [Inputs: floating-points, Output: Booleans]
66  *  - ==                [Inputs: floating-points, Output: Booleans]
67  *  - if-then-else      [1st Input: Boolean, 2nd & 3rd Input: floating-points,
68  *                       Output: floating-point]
69  *
70  *  \par Fitness cases
71  *  A random sample of 400 e-mails over the database, re-chosen for
72  *  each fitness evaluation.
73  *
74  *  \par Hits
75  *  Number of correct outputs obtained over the 400 fitness cases.
76  *
77  *  \par Raw fitness
78  *  Ignored (always 0).
79  *
80  *  \par Standardized fitness
81  *  Rate of correct outputs over the fitness cases where
82  *  the desired output was 0 (non-spam).
83  *
84  *  \par Adjusted fitness
85  *  Rate of correct outputs over the fitness cases where
86  *  the desired output was 1 (spam).
87  *
88  *  \par Normalized fitness
89  *  Rate of correct outputs obtained over all the 400 fitness cases.
90  *
91  *  \par Stopping criteria
92  *  When the best individual scores 400 hits or when the evolution reaches
93  *  the maximum number of generations.
94  *
95  *  \par Reference
96  *  Machine learning repository, http://www.ics.uci.edu/~mlearn/MLRepository.html
97  *
98  */
99 
100 #ifndef SpambaseEvalOp_hpp
101 #define SpambaseEvalOp_hpp
102 
103 #include "beagle/GP.hpp"
104 #include <string>
105 #include <vector>
106 
107 #define Spambase_DataSize 4601
108 #define Spambase_TestSize 400
109 
110 
111 /*!
112  *  \class SpambaseEvalOp SpambaseEvalOp.hpp "SpambaseEvalOp.hpp"
113  *  \brief The individual evaluation class operator for the spambase problem.
114  *  \ingroup Spambase
115  */
116 class SpambaseEvalOp : public Beagle::GP::EvaluationOp {
117 
118 public:
119 
120   //! SpambaseEvalOp allocator type.
121   typedef Beagle::AllocatorT<SpambaseEvalOp,Beagle::GP::EvaluationOp::Alloc>
122           Alloc;
123   //!< SpambaseEvalOp handle type.
124   typedef Beagle::PointerT<SpambaseEvalOp,Beagle::GP::EvaluationOp::Handle>
125           Handle;
126   //!< SpambaseEvalOp bag type.
127   typedef Beagle::ContainerT<SpambaseEvalOp,Beagle::GP::EvaluationOp::Bag>
128           Bag;
129 
130   explicit SpambaseEvalOp(Beagle::string inFilename="spambase.data");
131 
132   virtual void initialize(Beagle::System& ioSystem);
133   virtual Beagle::Fitness::Handle evaluate(Beagle::GP::Individual& inIndividual,
134                                            Beagle::GP::Context& ioContext);
135   virtual void postInit(Beagle::System& ioSystem);
136           void readData(Beagle::string inFilename, unsigned int inSizeData);
137 
138 protected:
139   Beagle::String::Handle                     mFilename;        //!< Filename containing the data.
140   std::vector< std::vector<Beagle::Double> > mInputs;          //!< Spambase data inputs.
141   std::vector<Beagle::Bool>                  mOutputs;         //!< Desired tags (1=spam,0=non-spam).
142   std::vector<unsigned int>                  mShuffledTable;   //!< Shuffled index table of the data.
143   Beagle::string                             mFilenameDefault; //!< Default filename used.
144 
145 };
146 
147 #endif // SpambaseEvalOp_hpp
148