1 /* 2 * Open BEAGLE 3 * Copyright (C) 2001-2007 by Christian Gagne and Marc Parizeau 4 * 5 * This library is free software; you can redistribute it and/or 6 * modify it under the terms of the GNU Lesser General Public 7 * License as published by the Free Software Foundation; either 8 * version 2.1 of the License, or (at your option) any later version. 9 * 10 * This library is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 * Lesser General Public License for more details. 14 * 15 * You should have received a copy of the GNU Lesser General Public 16 * License along with this library; if not, write to the Free Software 17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 18 * 19 * Contact: 20 * Laboratoire de Vision et Systemes Numeriques 21 * Departement de genie electrique et de genie informatique 22 * Universite Laval, Quebec, Canada, G1K 7P4 23 * http://vision.gel.ulaval.ca 24 * 25 */ 26 27 /*! 28 * \file SpambaseEvalOp.hpp 29 * \brief Definition of the type SpambaseEvalOp. 30 * \author Christian Gagne 31 * \author Marc Parizeau 32 * $Revision: 1.7.2.1 $ 33 * $Date: 2007/05/09 01:51:24 $ 34 */ 35 36 /*! 37 * \defgroup Spambase Spambase Example 38 * \brief SPAM e-mail database (spambase): Machine learning using strongly-typed GP 39 * with Open BEAGLE. 40 * 41 * \par Objective 42 * Find a program the will successfully predict whether a given e-mail is spam 43 * or not from some extracted features. 44 * 45 * \par Comments 46 * The evolved programs works on floating-point values AND Booleans values. 47 * The programs must return a Boolean value which must be true if e-mail is 48 * spam, and false otherwise. Don't expect too much from this program as 49 * it is quite basic and not oriented toward performance. It is there mainly 50 * to illustrate the use of strongly-typed GP with Open BEAGLE. 51 * 52 * \par Terminal set 53 * - IN0, IN1, ... up to IN56, the e-mail features. [floating-point] 54 * - 0 and 1, two Boolean constants. [Boolean] 55 * - Ephemeral constants randomly generated in $[0,100]$ [floating-point] 56 * 57 * \par Function set 58 * - AND [Inputs: Booleans, Output: Boolean] 59 * - OR [Input: Boolean, Output: Boolean] 60 * - NOT [Inputs: Booleans, Output: Boolean] 61 * - + [Inputs: floating-points, Output: floating-point] 62 * - - [Inputs: floating-points, Output: floating-point] 63 * - * [Inputs: floating-points, Output: floating-point] 64 * - / [Inputs: floating-points, Output: floating-point] 65 * - < [Inputs: floating-points, Output: Booleans] 66 * - == [Inputs: floating-points, Output: Booleans] 67 * - if-then-else [1st Input: Boolean, 2nd & 3rd Input: floating-points, 68 * Output: floating-point] 69 * 70 * \par Fitness cases 71 * A random sample of 400 e-mails over the database, re-chosen for 72 * each fitness evaluation. 73 * 74 * \par Hits 75 * Number of correct outputs obtained over the 400 fitness cases. 76 * 77 * \par Raw fitness 78 * Ignored (always 0). 79 * 80 * \par Standardized fitness 81 * Rate of correct outputs over the fitness cases where 82 * the desired output was 0 (non-spam). 83 * 84 * \par Adjusted fitness 85 * Rate of correct outputs over the fitness cases where 86 * the desired output was 1 (spam). 87 * 88 * \par Normalized fitness 89 * Rate of correct outputs obtained over all the 400 fitness cases. 90 * 91 * \par Stopping criteria 92 * When the best individual scores 400 hits or when the evolution reaches 93 * the maximum number of generations. 94 * 95 * \par Reference 96 * Machine learning repository, http://www.ics.uci.edu/~mlearn/MLRepository.html 97 * 98 */ 99 100 #ifndef SpambaseEvalOp_hpp 101 #define SpambaseEvalOp_hpp 102 103 #include "beagle/GP.hpp" 104 #include <string> 105 #include <vector> 106 107 #define Spambase_DataSize 4601 108 #define Spambase_TestSize 400 109 110 111 /*! 112 * \class SpambaseEvalOp SpambaseEvalOp.hpp "SpambaseEvalOp.hpp" 113 * \brief The individual evaluation class operator for the spambase problem. 114 * \ingroup Spambase 115 */ 116 class SpambaseEvalOp : public Beagle::GP::EvaluationOp { 117 118 public: 119 120 //! SpambaseEvalOp allocator type. 121 typedef Beagle::AllocatorT<SpambaseEvalOp,Beagle::GP::EvaluationOp::Alloc> 122 Alloc; 123 //!< SpambaseEvalOp handle type. 124 typedef Beagle::PointerT<SpambaseEvalOp,Beagle::GP::EvaluationOp::Handle> 125 Handle; 126 //!< SpambaseEvalOp bag type. 127 typedef Beagle::ContainerT<SpambaseEvalOp,Beagle::GP::EvaluationOp::Bag> 128 Bag; 129 130 explicit SpambaseEvalOp(Beagle::string inFilename="spambase.data"); 131 132 virtual void initialize(Beagle::System& ioSystem); 133 virtual Beagle::Fitness::Handle evaluate(Beagle::GP::Individual& inIndividual, 134 Beagle::GP::Context& ioContext); 135 virtual void postInit(Beagle::System& ioSystem); 136 void readData(Beagle::string inFilename, unsigned int inSizeData); 137 138 protected: 139 Beagle::String::Handle mFilename; //!< Filename containing the data. 140 std::vector< std::vector<Beagle::Double> > mInputs; //!< Spambase data inputs. 141 std::vector<Beagle::Bool> mOutputs; //!< Desired tags (1=spam,0=non-spam). 142 std::vector<unsigned int> mShuffledTable; //!< Shuffled index table of the data. 143 Beagle::string mFilenameDefault; //!< Default filename used. 144 145 }; 146 147 #endif // SpambaseEvalOp_hpp 148