1
2package AI::Genetic;
3
4use strict;
5use Carp;
6
7use vars qw/$VERSION/;
8
9$VERSION = 0.05;
10
11use AI::Genetic::Defaults;
12
13# new AI::Genetic. More modular.
14# Not too many checks are done still.
15
16##### Shared private vars
17# this hash predefines some strategies
18
19my %_strategy = (
20		 rouletteSinglePoint => \&AI::Genetic::Defaults::rouletteSinglePoint,
21		 rouletteTwoPoint    => \&AI::Genetic::Defaults::rouletteTwoPoint,
22		 rouletteUniform     => \&AI::Genetic::Defaults::rouletteUniform,
23
24		 tournamentSinglePoint => \&AI::Genetic::Defaults::tournamentSinglePoint,
25		 tournamentTwoPoint    => \&AI::Genetic::Defaults::tournamentTwoPoint,
26		 tournamentUniform     => \&AI::Genetic::Defaults::tournamentUniform,
27
28		 randomSinglePoint => \&AI::Genetic::Defaults::randomSinglePoint,
29		 randomTwoPoint    => \&AI::Genetic::Defaults::randomTwoPoint,
30		 randomUniform     => \&AI::Genetic::Defaults::randomUniform,
31		);
32
33# this hash maps the genome types to the
34# classes they're defined in.
35
36my %_genome2class = (
37		     bitvector   => 'AI::Genetic::IndBitVector',
38		     rangevector => 'AI::Genetic::IndRangeVector',
39		     listvector  => 'AI::Genetic::IndListVector',
40		    );
41
42##################
43
44# sub new():
45# This is the constructor. It creates a new AI::Genetic
46# object. Options are:
47# -population: set the population size
48# -crossover:  set the crossover probability
49# -mutation:   set the mutation probability
50# -fitness:    set the fitness function
51# -type:       set the genome type. See docs.
52# -terminate:  set termination sub.
53
54sub new {
55  my ($class, %args) = @_;
56
57  my $self = bless {
58		    ADDSEL => {},   # user-defined selections
59		    ADDCRS => {},   # user-defined crossovers
60		    ADDMUT => {},   # user-defined mutations
61		    ADDSTR => {},   # user-defined strategies
62		   } => $class;
63
64  $self->{FITFUNC}    = $args{-fitness}    || sub { 1 };
65  $self->{CROSSRATE}  = $args{-crossover}  || 0.95;
66  $self->{MUTPROB}    = $args{-mutation}   || 0.05;
67  $self->{POPSIZE}    = $args{-population} || 100;
68  $self->{TYPE}       = $args{-type}       || 'bitvector';
69  $self->{TERM}       = $args{-terminate}  || sub { 0 };
70
71  $self->{PEOPLE}     = [];   # list of individuals
72  $self->{GENERATION} = 0;    # current gen.
73
74  $self->{INIT}       = 0;    # whether pop is initialized or not.
75  $self->{SORTED}     = 0;    # whether the population is sorted by score or not.
76  $self->{INDIVIDUAL} = '';   # name of individual class to use().
77
78  return $self;
79}
80
81# sub createStrategy():
82# This method creates a new strategy.
83# It takes two arguments: name of strategy, and
84# anon sub that implements it.
85
86sub createStrategy {
87  my ($self, $name, $sub) = @_;
88
89  if (ref($sub) eq 'CODE') {
90    $self->{ADDSTR}{$name} = $sub;
91  } else {
92    # we don't know what this operation is.
93    carp <<EOC;
94ERROR: Must specify anonymous subroutine for strategy.
95       Strategy '$name' will be deleted.
96EOC
97    ;
98    delete $self->{ADDSTR}{$name};
99    return undef;
100  }
101
102  return $name;
103}
104
105# sub evolve():
106# This method evolves the population using a specific strategy
107# for a specific number of generations.
108
109sub evolve {
110  my ($self, $strategy, $gens) = @_;
111
112  unless ($self->{INIT}) {
113    carp "can't evolve() before init()";
114    return undef;
115  }
116
117  my $strSub;
118  if      (exists $self->{ADDSTR}{$strategy}) {
119    $strSub = $self->{ADDSTR}{$strategy};
120  } elsif (exists $_strategy{$strategy}) {
121    $strSub = $_strategy{$strategy};
122  } else {
123    carp "ERROR: Do not know what strategy '$strategy' is,";
124    return undef;
125  }
126
127  $gens ||= 1;
128
129  for my $i (1 .. $gens) {
130    $self->sortPopulation;
131    $strSub->($self);
132
133    $self->{GENERATION}++;
134    $self->{SORTED} = 0;
135
136    last if $self->{TERM}->($self);
137
138#    my @f = $self->getFittest(10);
139#    for my $f (@f) {
140#      print STDERR "    Fitness = ", $f->score, "..\n";
141#      print STDERR "    Genes are: @{$f->genes}.\n";
142#    }
143  }
144}
145
146# sub sortIndividuals():
147# This method takes as input an anon list of individuals, and returns
148# another anon list of the same individuals but sorted in decreasing
149# score.
150
151sub sortIndividuals {
152  my ($self, $list) = @_;
153
154  # make sure all score's are calculated.
155  # This is to avoid a bug in Perl where a sort is called from whithin another
156  # sort, and they are in different packages, then you get a use of uninit value
157  # warning. See http://rt.perl.org/rt3/Ticket/Display.html?id=7063
158  $_->score for @$list;
159
160  return [sort {$b->score <=> $a->score} @$list];
161}
162
163# sub sortPopulation():
164# This method sorts the population of individuals.
165
166sub sortPopulation {
167  my $self = shift;
168
169  return if $self->{SORTED};
170
171  $self->{PEOPLE} = $self->sortIndividuals($self->{PEOPLE});
172  $self->{SORTED} = 1;
173}
174
175# sub getFittest():
176# This method returns the fittest individuals.
177
178sub getFittest {
179  my ($self, $N) = @_;
180
181  $N ||= 1;
182  $N = 1 if $N < 1;
183
184  $N = @{$self->{PEOPLE}} if $N > @{$self->{PEOPLE}};
185
186  $self->sortPopulation;
187
188  my @r = @{$self->{PEOPLE}}[0 .. $N-1];
189
190  return $r[0] if $N == 1 && not wantarray;
191
192  return @r;
193}
194
195# sub init():
196# This method initializes the population to completely
197# random individuals. It deletes all current individuals!!!
198# It also examines the type of individuals we want, and
199# require()s the proper class. Throws an error if it can't.
200# Must pass to it an anon list that will be passed to the
201# newRandom method of the individual.
202
203# In case of bitvector, $newArgs is length of bitvector.
204# In case of rangevector, $newArgs is anon list of anon lists.
205# each sub-anon list has two elements, min number and max number.
206# In case of listvector, $newArgs is anon list of anon lists.
207# Each sub-anon list contains possible values of gene.
208
209sub init {
210  my ($self, $newArgs) = @_;
211
212  $self->{INIT} = 0;
213
214  my $ind;
215  if (exists $_genome2class{$self->{TYPE}}) {
216    $ind = $_genome2class{$self->{TYPE}};
217  } else {
218    $ind = $self->{TYPE};
219  }
220
221  eval "use $ind";  # does this work if package is in same file?
222  if ($@) {
223    carp "ERROR: Init failed. Can't require '$ind': $@,";
224    return undef;
225  }
226
227  $self->{INDIVIDUAL} = $ind;
228  $self->{PEOPLE}     = [];
229  $self->{SORTED}     = 0;
230  $self->{GENERATION} = 0;
231  $self->{INITARGS}   = $newArgs;
232
233  push @{$self->{PEOPLE}} =>
234    $ind->newRandom($newArgs) for 1 .. $self->{POPSIZE};
235
236  $_->fitness($self->{FITFUNC}) for @{$self->{PEOPLE}};
237
238  $self->{INIT} = 1;
239}
240
241# sub people():
242# returns the current list of individuals in the population.
243# note: this returns the actual array ref, so any changes
244# made to it (ex, shift/pop/etc) will be reflected in the
245# population.
246
247sub people {
248  my $self = shift;
249
250  if (@_) {
251    $self->{PEOPLE} = shift;
252    $self->{SORTED} = 0;
253  }
254
255  $self->{PEOPLE};
256}
257
258# useful little methods to set/query parameters.
259sub size       { $_[0]{POPSIZE}    = $_[1] if defined $_[1]; $_[0]{POPSIZE}   }
260sub crossProb  { $_[0]{CROSSRATE}  = $_[1] if defined $_[1]; $_[0]{CROSSRATE} }
261sub mutProb    { $_[0]{MUTPROB}    = $_[1] if defined $_[1]; $_[0]{MUTPROB}   }
262sub indType    { $_[0]{INDIVIDUAL} }
263sub generation { $_[0]{GENERATION} }
264
265# sub inject():
266# This method is used to add individuals to the current population.
267# The point of it is that sometimes the population gets stagnant,
268# so it could be useful add "fresh blood".
269# Takes a variable number of arguments. The first argument is the
270# total number, N, of new individuals to add. The remaining arguments
271# are genomes to inject. There must be at most N genomes to inject.
272# If the number, n, of genomes to inject is less than N, N - n random
273# genomes are added. Perhaps an example will help?
274# returns 1 on success and undef on error.
275
276sub inject {
277  my ($self, $count, @genomes) = @_;
278
279  unless ($self->{INIT}) {
280    carp "can't inject() before init()";
281    return undef;
282  }
283
284  my $ind = $self->{INDIVIDUAL};
285
286  my @newInds;
287  for my $i (1 .. $count) {
288    my $genes = shift @genomes;
289
290    if ($genes) {
291      push @newInds => $ind->newSpecific($genes, $self->{INITARGS});
292    } else {
293      push @newInds => $ind->newRandom  ($self->{INITARGS});
294    }
295  }
296
297  $_->fitness($self->{FITFUNC}) for @newInds;
298
299  push @{$self->{PEOPLE}} => @newInds;
300
301  return 1;
302}
303
304__END__
305
306=head1 NAME
307
308AI::Genetic - A pure Perl genetic algorithm implementation.
309
310=head1 SYNOPSIS
311
312    use AI::Genetic;
313    my $ga = new AI::Genetic(
314        -fitness    => \&fitnessFunc,
315        -type       => 'bitvector',
316        -population => 500,
317        -crossover  => 0.9,
318        -mutation   => 0.01,
319	-terminate  => \&terminateFunc,
320       );
321
322     $ga->init(10);
323     $ga->evolve('rouletteTwoPoint', 100);
324     print "Best score = ", $ga->getFittest->score, ".\n";
325
326     sub fitnessFunc {
327         my $genes = shift;
328
329         my $fitness;
330         # assign a number to $fitness based on the @$genes
331         # ...
332
333         return $fitness;
334      }
335
336      sub terminateFunc {
337         my $ga = shift;
338
339         # terminate if reached some threshold.
340         return 1 if $ga->getFittest->score > $THRESHOLD;
341         return 0;
342      }
343
344=head1 DESCRIPTION
345
346This module implements a Genetic Algorithm (GA) in pure Perl.
347Other Perl modules that achieve the same thing (perhaps better,
348perhaps worse) do exist. Please check CPAN. I mainly wrote this
349module to satisfy my own needs, and to learn something about GAs
350along the way.
351
352B<PLEASE NOTE:> As of v0.02, AI::Genetic has been re-written from
353scratch to be more modular and expandable. To achieve this, I had
354to modify the API, so it is not backward-compatible with v0.01.
355As a result, I do not plan on supporting v0.01.
356
357I will not go into the details of GAs here, but here are the
358bare basics. Plenty of information can be found on the web.
359
360In a GA, a population of individuals compete for survival. Each
361individual is designated by a set of genes that define its
362behaviour. Individuals that perform better (as defined by the
363fitness function) have a higher chance of mating with other
364individuals. When two individuals mate, they swap some of
365their genes, resulting in an individual that has properties
366from both of its "parents". Every now and then, a mutation
367occurs where some gene randomly changes value, resulting in
368a different individual. If all is well defined, after a few
369generations, the population should converge on a "good-enough"
370solution to the problem being tackled.
371
372A GA implementation runs for a discrete number of time steps
373called I<generations>. What happens during each generation can
374vary greatly depending on the strategy being used (See
375L</"STRATEGIES"> for more info).
376Typically, a variation of the following happens at
377each generation:
378
379=over 4
380
381=item B<1. Selection>
382
383Here the performance of all the individuals is evaluated
384based on the fitness function, and each is given a specific
385fitness value. The higher the value, the bigger the chance
386of an individual passing its genes on in future generations
387through mating (crossover).
388
389=item B<2. Crossover>
390
391Here, individuals selected are randomly paired up for
392crossover (aka I<sexual reproduction>). This is further
393controlled by the crossover rate specified and may result in
394a new offspring individual that contains genes common to
395both parents. New individuals are injected into the current
396population.
397
398=item B<3. Mutation>
399
400In this step, each individual is given the chance to mutate
401based on the mutation probability specified. If an individual
402is to mutate, each of its genes is given the chance to randomly
403switch its value to some other state.
404
405=back
406
407=head1 CLASS METHODS
408
409Here are the public methods.
410
411=over 4
412
413=item I<$ga>-E<gt>B<new>(I<options>)
414
415This is the constructor. It accepts options in the form of
416hash-value pairs. These are:
417
418=over 8
419
420=item B<-population>
421
422This defines the size of the population, i.e. how many individuals
423to simultaneously exist at each generation. Defaults to 100.
424
425=item B<-crossover>
426
427This defines the crossover rate. Defaults to 0.95.
428
429=item B<-mutation>
430
431This defines the mutation rate. Defaults to 0.05.
432
433=item I<-fitness>
434
435This defines a fitness function. It expects a reference to a subroutine.
436More details are given in L</"FITNESS FUNCTION">.
437
438=item I<-type>
439
440This defines the type of the genome. Currently, AI::Genetic
441supports only three types:
442
443=over
444
445=item I<bitvector>
446
447Individuals of this type have genes that are bits. Each gene
448can be in one of two possible states, on or off.
449
450=item I<listvector>
451
452Each gene of a listvector individual can assume one string value from
453a specified list of possible string values.
454
455=item I<rangevector>
456
457Each gene of a rangevector individual can assume one integer value
458from a range of possible integer values. Note that only integers are
459supported. The user can always transform any desired fractional values
460by multiplying and dividing by an appropriate power of 10.
461
462=back
463
464Defaults to I<bitvector>.
465
466=item I<-terminate>
467
468This option allows the definition of a termination subroutine.
469It expects a subroutine reference. This sub will be called at
470the end of each generation with one argument: the AI::Genetic
471object. Evolution terminates if the sub returns a true value.
472
473=back
474
475=item I<$ga>-E<gt>B<createStrategy>(I<strategy_name>, I<sub_ref>)
476
477This method allows the creation of a custom-made strategy to be used
478during evolution. It expects a unique strategy name, and a subroutine
479reference as arguments. The subroutine will be called with one argument:
480the AI::Genetic object. It is expected to alter the population at each
481generation. See L</"STRATEGIES"> for more information.
482
483=item I<$ga>-E<gt>B<init>(I<initArgs>)
484
485This method initializes the population with random individuals. It B<MUST>
486be called before any call to I<evolve()> or I<inject()>. As a side effect,
487any already existing individuals in the population are deleted. It expects
488one argument, which depends on the type of individuals:
489
490=over
491
492=item o
493
494For bitvectors, the argument is simply the length of the bitvector.
495
496    $ga->init(10);
497
498this initializes a population where each individual has 10 genes.
499
500=item o
501
502For listvectors, the argument is an anonymous list of lists. The
503number of sub-lists is equal to the number of genes of each individual.
504Each sub-list defines the possible string values that the corresponding gene
505can assume.
506
507    $ga->init([
508               [qw/red blue green/],
509               [qw/big medium small/],
510               [qw/very_fat fat fit thin very_thin/],
511              ]);
512
513this initializes a population where each individual has 3 genes, and each gene
514can assume one of the given values.
515
516=item o
517
518For rangevectors, the argument is an anonymous list of lists. The
519number of sub-lists is equal to the number of genes of each individual.
520Each sub-list defines the minimum and maximum integer values that the
521corresponding gene can assume.
522
523    $ga->init([
524               [1, 5],
525               [0, 20],
526               [4, 9],
527              ]);
528
529this initializes a population where each individual has 3 genes, and each gene
530can assume an integer within the corresponding range.
531
532=back
533
534=item I<$ga>-E<gt>B<inject>(I<N>, ?I<args>?)
535
536This method can be used to add more individuals to the population. New individuals
537can be randomly generated, or be explicitly specified. The first argument specifies
538the number, I<N>, of new individuals to add. This can be followed by at most I<N>
539arguments, each of which is an anonymous list that specifies the genome of a
540single individual to add. If the number of genomes given, I<n>, is less than I<N>, then
541I<N> - I<n> random individuals are added for a total of I<N> new individuals. Random
542individuals are generated using the same arguments passed to the I<init()> method.
543For example:
544
545  $ga->inject(5,
546              [qw/red big thin/],
547              [qw/blue small fat/],
548             );
549
550this adds 5 new individuals, 2 with the specified genetic coding, and 3 randomly
551generated.
552
553=item I<$ga>-E<gt>B<evolve>(I<strategy>, ?I<num_generations>?)
554
555This method causes the GA to evolve the population using the specified strategy.
556A strategy name has to be specified as the first argument. The second argument
557is optional and specifies the number of generations to evolve. It defaults to
5581. See L</"STRATEGIES"> for more information on the default strategies.
559
560Each generation consists of the following steps:
561
562=over
563
564=item o
565
566The population is sorted according to the individuals' fitnesses.
567
568=item o
569
570The subroutine corresponding to the named strategy is called with one argument,
571the AI::Genetic object. This subroutine is expected to alter the object itself.
572
573=item o
574
575If a termination subroutine is given, it is executed and the return value is
576checked. Evolution terminates if this sub returns a true value.
577
578=back
579
580=item I<$ga>-E<gt>B<getFittest>(?I<N>?)
581
582This returns the I<N> fittest individuals. If not specified,
583I<N> defaults to 1. As a side effect, it sorts the population by
584fitness score. The actual AI::Genetic::Individual objects are returned.
585You can use the C<genes()> and C<score()> methods to get the genes and the
586scores of the individuals. Please check L<AI::Genetic::Individual> for details.
587
588=item I<$ga>-E<gt>B<sortPopulation>
589
590This method sorts the population according to fitness function. The results
591are cached for speed.
592
593=item I<$ga>-E<gt>B<sortIndividuals>(?[I<ListOfIndividuals>]?)
594
595Given an anonymous list of individuals, this method sorts them according
596to fitness, returning an anonymous list of the sorted individuals.
597
598=item I<$ga>-E<gt>B<people>()
599
600Returns an anonymous list of individuals of the current population.
601B<IMPORTANT>: the actual array reference used by the AI::Genetic object
602is returned, so any changes to it will be reflected in I<$ga>.
603
604=item I<$ga>-E<gt>B<size>(?I<newSize>?)
605
606This method is used to query and set the population size.
607
608=item I<$ga>-E<gt>B<crossProb>(?I<newProb>?)
609
610This method is used to query and set the crossover rate.
611
612=item I<$ga>-E<gt>B<mutProb>(?I<newProb>?)
613
614This method is used to query and set the mutation rate.
615
616=item I<$ga>-E<gt>B<indType>()
617
618This method returns the type of individual: I<bitvector>, I<listvector>,
619or I<rangevector>.
620
621=item I<$ga>-E<gt>B<generation>()
622
623This method returns the current generation.
624
625=back
626
627=head1 FITNESS FUNCTION
628
629Very quickly you will realize that properly defining the fitness function
630is the most important aspect of a GA. Most of the time that a genetic
631algorithm takes to run is spent in running the fitness function for each
632separate individual to get its fitness. AI::Genetic tries to minimize this
633time by caching the fitness result for each individual. But, B<you should
634spend a lot of time optimizing your fitness function to achieve decent run
635times.>
636
637The fitness function should expect only one argument, an anonymous list of
638genes, corresponding to the individual being analyzed. It is expected
639to return a number which defines the fitness score of the said individual.
640The higher the score, the more fit the individual, the more the chance it
641has to be chosen for crossover.
642
643=head1 STRATEGIES
644
645AI::Genetic comes with 9 predefined strategies. These are:
646
647=over
648
649=item rouletteSinglePoint
650
651This strategy implements roulette-wheel selection and single-point crossover.
652
653=item rouletteTwoPoint
654
655This strategy implements roulette-wheel selection and two-point crossover.
656
657=item rouletteUniform
658
659This strategy implements roulette-wheel selection and uniform crossover.
660
661=item tournamentSinglePoint
662
663This strategy implements tournament selection and single-point crossover.
664
665=item tournamentTwoPoint
666
667This strategy implements tournament selection and two-point crossover.
668
669=item tournamentUniform
670
671This strategy implements tournament selection and uniform crossover.
672
673=item randomSinglePoint
674
675This strategy implements random selection and single-point crossover.
676
677=item randomTwoPoint
678
679This strategy implements random selection and two-point crossover.
680
681=item randomUniform
682
683This strategy implements random selection and uniform crossover.
684
685=back
686
687More detail on these strategies and how to call them in your own
688custom strategies can be found in L<AI::Genetic::OpSelection>,
689L<AI::Genetic::OpCrossover> and L<AI::Genetic::OpMutation>.
690
691You can use the functions defined in the above modules in your
692own custom-made strategy. Consult their manpages for more info.
693A custom-made strategy can be defined using the I<strategy()>
694method and is called at the beginning of each generation. The only
695argument to it is the AI::Genetic object itself. Note that the
696population at this point is sorted accoring to each individual's
697fitness score. It is expected that the strategy sub will modify
698the population stored in the AI::Genetic object. Here's the
699pseudo-code of events:
700
701    for (1 .. num_generations) {
702      sort population;
703      call strategy_sub;
704      if (termination_sub exists) {
705        call termination_sub;
706        last if returned true value;
707      }
708    }
709
710=head1 A NOTE ON SPEED/EFFICIENCY
711
712Genetic algorithms are inherently slow.
713Perl can be pretty fast, but will never reach the speed of optimized
714C code (at least my Perl coding will not). I wrote AI::Genetic mainly
715for my own learning experience, but still tried to optimize it as
716much as I can while trying to keep it as flexible as possible.
717
718To do that, I resorted to some well-known tricks like passing a
719reference of a long list instead of the list itself (for example,
720when calling the fitness function, a reference of the gene list
721is passed), and caching fitness scores (if you try to evaluate
722the fitness of the same individual more than once, then the fitness
723function will not be called, and the cached result is returned).
724
725To help speed up your run times, you should pay special attention
726to the design of your fitness function since this will be called once
727for each unique individual in each generation. If you can shave off a
728few clock cycles here and there, then it will be greatly magnified in
729the total run time.
730
731=head1 BUGS
732
733I have tested this module quite a bit, and even used it to solve a
734work-related problem successfully. But, if you think you found a bug
735then please let me know, and I promise to look at it.
736
737Also, if you have any requests, comments or suggestions, then feel
738free to email me.
739
740=head1 INSTALLATION
741
742Either the usual:
743
744    perl Makefile.PL
745    make
746    make install
747
748or just stick it somewhere in @INC where perl can find it. It is in pure Perl.
749
750=head1 AUTHOR & CREDITS
751
752Written by Ala Qumsieh I<aqumsieh@cpan.org>.
753
754Special thanks go to John D. Porter and Oliver Smith for stimulating
755discussions and great suggestions. Daniel Martin and Ivan Tubert-Brohman
756uncovered various bugs and for this I'm grateful.
757
758=head1 COPYRIGHTS
759
760(c) 2003-2005 Ala Qumsieh. All rights reserved.
761This module is distributed under the same terms as Perl itself.
762
763=cut
764