1 2package AI::Genetic; 3 4use strict; 5use Carp; 6 7use vars qw/$VERSION/; 8 9$VERSION = 0.05; 10 11use AI::Genetic::Defaults; 12 13# new AI::Genetic. More modular. 14# Not too many checks are done still. 15 16##### Shared private vars 17# this hash predefines some strategies 18 19my %_strategy = ( 20 rouletteSinglePoint => \&AI::Genetic::Defaults::rouletteSinglePoint, 21 rouletteTwoPoint => \&AI::Genetic::Defaults::rouletteTwoPoint, 22 rouletteUniform => \&AI::Genetic::Defaults::rouletteUniform, 23 24 tournamentSinglePoint => \&AI::Genetic::Defaults::tournamentSinglePoint, 25 tournamentTwoPoint => \&AI::Genetic::Defaults::tournamentTwoPoint, 26 tournamentUniform => \&AI::Genetic::Defaults::tournamentUniform, 27 28 randomSinglePoint => \&AI::Genetic::Defaults::randomSinglePoint, 29 randomTwoPoint => \&AI::Genetic::Defaults::randomTwoPoint, 30 randomUniform => \&AI::Genetic::Defaults::randomUniform, 31 ); 32 33# this hash maps the genome types to the 34# classes they're defined in. 35 36my %_genome2class = ( 37 bitvector => 'AI::Genetic::IndBitVector', 38 rangevector => 'AI::Genetic::IndRangeVector', 39 listvector => 'AI::Genetic::IndListVector', 40 ); 41 42################## 43 44# sub new(): 45# This is the constructor. It creates a new AI::Genetic 46# object. Options are: 47# -population: set the population size 48# -crossover: set the crossover probability 49# -mutation: set the mutation probability 50# -fitness: set the fitness function 51# -type: set the genome type. See docs. 52# -terminate: set termination sub. 53 54sub new { 55 my ($class, %args) = @_; 56 57 my $self = bless { 58 ADDSEL => {}, # user-defined selections 59 ADDCRS => {}, # user-defined crossovers 60 ADDMUT => {}, # user-defined mutations 61 ADDSTR => {}, # user-defined strategies 62 } => $class; 63 64 $self->{FITFUNC} = $args{-fitness} || sub { 1 }; 65 $self->{CROSSRATE} = $args{-crossover} || 0.95; 66 $self->{MUTPROB} = $args{-mutation} || 0.05; 67 $self->{POPSIZE} = $args{-population} || 100; 68 $self->{TYPE} = $args{-type} || 'bitvector'; 69 $self->{TERM} = $args{-terminate} || sub { 0 }; 70 71 $self->{PEOPLE} = []; # list of individuals 72 $self->{GENERATION} = 0; # current gen. 73 74 $self->{INIT} = 0; # whether pop is initialized or not. 75 $self->{SORTED} = 0; # whether the population is sorted by score or not. 76 $self->{INDIVIDUAL} = ''; # name of individual class to use(). 77 78 return $self; 79} 80 81# sub createStrategy(): 82# This method creates a new strategy. 83# It takes two arguments: name of strategy, and 84# anon sub that implements it. 85 86sub createStrategy { 87 my ($self, $name, $sub) = @_; 88 89 if (ref($sub) eq 'CODE') { 90 $self->{ADDSTR}{$name} = $sub; 91 } else { 92 # we don't know what this operation is. 93 carp <<EOC; 94ERROR: Must specify anonymous subroutine for strategy. 95 Strategy '$name' will be deleted. 96EOC 97 ; 98 delete $self->{ADDSTR}{$name}; 99 return undef; 100 } 101 102 return $name; 103} 104 105# sub evolve(): 106# This method evolves the population using a specific strategy 107# for a specific number of generations. 108 109sub evolve { 110 my ($self, $strategy, $gens) = @_; 111 112 unless ($self->{INIT}) { 113 carp "can't evolve() before init()"; 114 return undef; 115 } 116 117 my $strSub; 118 if (exists $self->{ADDSTR}{$strategy}) { 119 $strSub = $self->{ADDSTR}{$strategy}; 120 } elsif (exists $_strategy{$strategy}) { 121 $strSub = $_strategy{$strategy}; 122 } else { 123 carp "ERROR: Do not know what strategy '$strategy' is,"; 124 return undef; 125 } 126 127 $gens ||= 1; 128 129 for my $i (1 .. $gens) { 130 $self->sortPopulation; 131 $strSub->($self); 132 133 $self->{GENERATION}++; 134 $self->{SORTED} = 0; 135 136 last if $self->{TERM}->($self); 137 138# my @f = $self->getFittest(10); 139# for my $f (@f) { 140# print STDERR " Fitness = ", $f->score, "..\n"; 141# print STDERR " Genes are: @{$f->genes}.\n"; 142# } 143 } 144} 145 146# sub sortIndividuals(): 147# This method takes as input an anon list of individuals, and returns 148# another anon list of the same individuals but sorted in decreasing 149# score. 150 151sub sortIndividuals { 152 my ($self, $list) = @_; 153 154 # make sure all score's are calculated. 155 # This is to avoid a bug in Perl where a sort is called from whithin another 156 # sort, and they are in different packages, then you get a use of uninit value 157 # warning. See http://rt.perl.org/rt3/Ticket/Display.html?id=7063 158 $_->score for @$list; 159 160 return [sort {$b->score <=> $a->score} @$list]; 161} 162 163# sub sortPopulation(): 164# This method sorts the population of individuals. 165 166sub sortPopulation { 167 my $self = shift; 168 169 return if $self->{SORTED}; 170 171 $self->{PEOPLE} = $self->sortIndividuals($self->{PEOPLE}); 172 $self->{SORTED} = 1; 173} 174 175# sub getFittest(): 176# This method returns the fittest individuals. 177 178sub getFittest { 179 my ($self, $N) = @_; 180 181 $N ||= 1; 182 $N = 1 if $N < 1; 183 184 $N = @{$self->{PEOPLE}} if $N > @{$self->{PEOPLE}}; 185 186 $self->sortPopulation; 187 188 my @r = @{$self->{PEOPLE}}[0 .. $N-1]; 189 190 return $r[0] if $N == 1 && not wantarray; 191 192 return @r; 193} 194 195# sub init(): 196# This method initializes the population to completely 197# random individuals. It deletes all current individuals!!! 198# It also examines the type of individuals we want, and 199# require()s the proper class. Throws an error if it can't. 200# Must pass to it an anon list that will be passed to the 201# newRandom method of the individual. 202 203# In case of bitvector, $newArgs is length of bitvector. 204# In case of rangevector, $newArgs is anon list of anon lists. 205# each sub-anon list has two elements, min number and max number. 206# In case of listvector, $newArgs is anon list of anon lists. 207# Each sub-anon list contains possible values of gene. 208 209sub init { 210 my ($self, $newArgs) = @_; 211 212 $self->{INIT} = 0; 213 214 my $ind; 215 if (exists $_genome2class{$self->{TYPE}}) { 216 $ind = $_genome2class{$self->{TYPE}}; 217 } else { 218 $ind = $self->{TYPE}; 219 } 220 221 eval "use $ind"; # does this work if package is in same file? 222 if ($@) { 223 carp "ERROR: Init failed. Can't require '$ind': $@,"; 224 return undef; 225 } 226 227 $self->{INDIVIDUAL} = $ind; 228 $self->{PEOPLE} = []; 229 $self->{SORTED} = 0; 230 $self->{GENERATION} = 0; 231 $self->{INITARGS} = $newArgs; 232 233 push @{$self->{PEOPLE}} => 234 $ind->newRandom($newArgs) for 1 .. $self->{POPSIZE}; 235 236 $_->fitness($self->{FITFUNC}) for @{$self->{PEOPLE}}; 237 238 $self->{INIT} = 1; 239} 240 241# sub people(): 242# returns the current list of individuals in the population. 243# note: this returns the actual array ref, so any changes 244# made to it (ex, shift/pop/etc) will be reflected in the 245# population. 246 247sub people { 248 my $self = shift; 249 250 if (@_) { 251 $self->{PEOPLE} = shift; 252 $self->{SORTED} = 0; 253 } 254 255 $self->{PEOPLE}; 256} 257 258# useful little methods to set/query parameters. 259sub size { $_[0]{POPSIZE} = $_[1] if defined $_[1]; $_[0]{POPSIZE} } 260sub crossProb { $_[0]{CROSSRATE} = $_[1] if defined $_[1]; $_[0]{CROSSRATE} } 261sub mutProb { $_[0]{MUTPROB} = $_[1] if defined $_[1]; $_[0]{MUTPROB} } 262sub indType { $_[0]{INDIVIDUAL} } 263sub generation { $_[0]{GENERATION} } 264 265# sub inject(): 266# This method is used to add individuals to the current population. 267# The point of it is that sometimes the population gets stagnant, 268# so it could be useful add "fresh blood". 269# Takes a variable number of arguments. The first argument is the 270# total number, N, of new individuals to add. The remaining arguments 271# are genomes to inject. There must be at most N genomes to inject. 272# If the number, n, of genomes to inject is less than N, N - n random 273# genomes are added. Perhaps an example will help? 274# returns 1 on success and undef on error. 275 276sub inject { 277 my ($self, $count, @genomes) = @_; 278 279 unless ($self->{INIT}) { 280 carp "can't inject() before init()"; 281 return undef; 282 } 283 284 my $ind = $self->{INDIVIDUAL}; 285 286 my @newInds; 287 for my $i (1 .. $count) { 288 my $genes = shift @genomes; 289 290 if ($genes) { 291 push @newInds => $ind->newSpecific($genes, $self->{INITARGS}); 292 } else { 293 push @newInds => $ind->newRandom ($self->{INITARGS}); 294 } 295 } 296 297 $_->fitness($self->{FITFUNC}) for @newInds; 298 299 push @{$self->{PEOPLE}} => @newInds; 300 301 return 1; 302} 303 304__END__ 305 306=head1 NAME 307 308AI::Genetic - A pure Perl genetic algorithm implementation. 309 310=head1 SYNOPSIS 311 312 use AI::Genetic; 313 my $ga = new AI::Genetic( 314 -fitness => \&fitnessFunc, 315 -type => 'bitvector', 316 -population => 500, 317 -crossover => 0.9, 318 -mutation => 0.01, 319 -terminate => \&terminateFunc, 320 ); 321 322 $ga->init(10); 323 $ga->evolve('rouletteTwoPoint', 100); 324 print "Best score = ", $ga->getFittest->score, ".\n"; 325 326 sub fitnessFunc { 327 my $genes = shift; 328 329 my $fitness; 330 # assign a number to $fitness based on the @$genes 331 # ... 332 333 return $fitness; 334 } 335 336 sub terminateFunc { 337 my $ga = shift; 338 339 # terminate if reached some threshold. 340 return 1 if $ga->getFittest->score > $THRESHOLD; 341 return 0; 342 } 343 344=head1 DESCRIPTION 345 346This module implements a Genetic Algorithm (GA) in pure Perl. 347Other Perl modules that achieve the same thing (perhaps better, 348perhaps worse) do exist. Please check CPAN. I mainly wrote this 349module to satisfy my own needs, and to learn something about GAs 350along the way. 351 352B<PLEASE NOTE:> As of v0.02, AI::Genetic has been re-written from 353scratch to be more modular and expandable. To achieve this, I had 354to modify the API, so it is not backward-compatible with v0.01. 355As a result, I do not plan on supporting v0.01. 356 357I will not go into the details of GAs here, but here are the 358bare basics. Plenty of information can be found on the web. 359 360In a GA, a population of individuals compete for survival. Each 361individual is designated by a set of genes that define its 362behaviour. Individuals that perform better (as defined by the 363fitness function) have a higher chance of mating with other 364individuals. When two individuals mate, they swap some of 365their genes, resulting in an individual that has properties 366from both of its "parents". Every now and then, a mutation 367occurs where some gene randomly changes value, resulting in 368a different individual. If all is well defined, after a few 369generations, the population should converge on a "good-enough" 370solution to the problem being tackled. 371 372A GA implementation runs for a discrete number of time steps 373called I<generations>. What happens during each generation can 374vary greatly depending on the strategy being used (See 375L</"STRATEGIES"> for more info). 376Typically, a variation of the following happens at 377each generation: 378 379=over 4 380 381=item B<1. Selection> 382 383Here the performance of all the individuals is evaluated 384based on the fitness function, and each is given a specific 385fitness value. The higher the value, the bigger the chance 386of an individual passing its genes on in future generations 387through mating (crossover). 388 389=item B<2. Crossover> 390 391Here, individuals selected are randomly paired up for 392crossover (aka I<sexual reproduction>). This is further 393controlled by the crossover rate specified and may result in 394a new offspring individual that contains genes common to 395both parents. New individuals are injected into the current 396population. 397 398=item B<3. Mutation> 399 400In this step, each individual is given the chance to mutate 401based on the mutation probability specified. If an individual 402is to mutate, each of its genes is given the chance to randomly 403switch its value to some other state. 404 405=back 406 407=head1 CLASS METHODS 408 409Here are the public methods. 410 411=over 4 412 413=item I<$ga>-E<gt>B<new>(I<options>) 414 415This is the constructor. It accepts options in the form of 416hash-value pairs. These are: 417 418=over 8 419 420=item B<-population> 421 422This defines the size of the population, i.e. how many individuals 423to simultaneously exist at each generation. Defaults to 100. 424 425=item B<-crossover> 426 427This defines the crossover rate. Defaults to 0.95. 428 429=item B<-mutation> 430 431This defines the mutation rate. Defaults to 0.05. 432 433=item I<-fitness> 434 435This defines a fitness function. It expects a reference to a subroutine. 436More details are given in L</"FITNESS FUNCTION">. 437 438=item I<-type> 439 440This defines the type of the genome. Currently, AI::Genetic 441supports only three types: 442 443=over 444 445=item I<bitvector> 446 447Individuals of this type have genes that are bits. Each gene 448can be in one of two possible states, on or off. 449 450=item I<listvector> 451 452Each gene of a listvector individual can assume one string value from 453a specified list of possible string values. 454 455=item I<rangevector> 456 457Each gene of a rangevector individual can assume one integer value 458from a range of possible integer values. Note that only integers are 459supported. The user can always transform any desired fractional values 460by multiplying and dividing by an appropriate power of 10. 461 462=back 463 464Defaults to I<bitvector>. 465 466=item I<-terminate> 467 468This option allows the definition of a termination subroutine. 469It expects a subroutine reference. This sub will be called at 470the end of each generation with one argument: the AI::Genetic 471object. Evolution terminates if the sub returns a true value. 472 473=back 474 475=item I<$ga>-E<gt>B<createStrategy>(I<strategy_name>, I<sub_ref>) 476 477This method allows the creation of a custom-made strategy to be used 478during evolution. It expects a unique strategy name, and a subroutine 479reference as arguments. The subroutine will be called with one argument: 480the AI::Genetic object. It is expected to alter the population at each 481generation. See L</"STRATEGIES"> for more information. 482 483=item I<$ga>-E<gt>B<init>(I<initArgs>) 484 485This method initializes the population with random individuals. It B<MUST> 486be called before any call to I<evolve()> or I<inject()>. As a side effect, 487any already existing individuals in the population are deleted. It expects 488one argument, which depends on the type of individuals: 489 490=over 491 492=item o 493 494For bitvectors, the argument is simply the length of the bitvector. 495 496 $ga->init(10); 497 498this initializes a population where each individual has 10 genes. 499 500=item o 501 502For listvectors, the argument is an anonymous list of lists. The 503number of sub-lists is equal to the number of genes of each individual. 504Each sub-list defines the possible string values that the corresponding gene 505can assume. 506 507 $ga->init([ 508 [qw/red blue green/], 509 [qw/big medium small/], 510 [qw/very_fat fat fit thin very_thin/], 511 ]); 512 513this initializes a population where each individual has 3 genes, and each gene 514can assume one of the given values. 515 516=item o 517 518For rangevectors, the argument is an anonymous list of lists. The 519number of sub-lists is equal to the number of genes of each individual. 520Each sub-list defines the minimum and maximum integer values that the 521corresponding gene can assume. 522 523 $ga->init([ 524 [1, 5], 525 [0, 20], 526 [4, 9], 527 ]); 528 529this initializes a population where each individual has 3 genes, and each gene 530can assume an integer within the corresponding range. 531 532=back 533 534=item I<$ga>-E<gt>B<inject>(I<N>, ?I<args>?) 535 536This method can be used to add more individuals to the population. New individuals 537can be randomly generated, or be explicitly specified. The first argument specifies 538the number, I<N>, of new individuals to add. This can be followed by at most I<N> 539arguments, each of which is an anonymous list that specifies the genome of a 540single individual to add. If the number of genomes given, I<n>, is less than I<N>, then 541I<N> - I<n> random individuals are added for a total of I<N> new individuals. Random 542individuals are generated using the same arguments passed to the I<init()> method. 543For example: 544 545 $ga->inject(5, 546 [qw/red big thin/], 547 [qw/blue small fat/], 548 ); 549 550this adds 5 new individuals, 2 with the specified genetic coding, and 3 randomly 551generated. 552 553=item I<$ga>-E<gt>B<evolve>(I<strategy>, ?I<num_generations>?) 554 555This method causes the GA to evolve the population using the specified strategy. 556A strategy name has to be specified as the first argument. The second argument 557is optional and specifies the number of generations to evolve. It defaults to 5581. See L</"STRATEGIES"> for more information on the default strategies. 559 560Each generation consists of the following steps: 561 562=over 563 564=item o 565 566The population is sorted according to the individuals' fitnesses. 567 568=item o 569 570The subroutine corresponding to the named strategy is called with one argument, 571the AI::Genetic object. This subroutine is expected to alter the object itself. 572 573=item o 574 575If a termination subroutine is given, it is executed and the return value is 576checked. Evolution terminates if this sub returns a true value. 577 578=back 579 580=item I<$ga>-E<gt>B<getFittest>(?I<N>?) 581 582This returns the I<N> fittest individuals. If not specified, 583I<N> defaults to 1. As a side effect, it sorts the population by 584fitness score. The actual AI::Genetic::Individual objects are returned. 585You can use the C<genes()> and C<score()> methods to get the genes and the 586scores of the individuals. Please check L<AI::Genetic::Individual> for details. 587 588=item I<$ga>-E<gt>B<sortPopulation> 589 590This method sorts the population according to fitness function. The results 591are cached for speed. 592 593=item I<$ga>-E<gt>B<sortIndividuals>(?[I<ListOfIndividuals>]?) 594 595Given an anonymous list of individuals, this method sorts them according 596to fitness, returning an anonymous list of the sorted individuals. 597 598=item I<$ga>-E<gt>B<people>() 599 600Returns an anonymous list of individuals of the current population. 601B<IMPORTANT>: the actual array reference used by the AI::Genetic object 602is returned, so any changes to it will be reflected in I<$ga>. 603 604=item I<$ga>-E<gt>B<size>(?I<newSize>?) 605 606This method is used to query and set the population size. 607 608=item I<$ga>-E<gt>B<crossProb>(?I<newProb>?) 609 610This method is used to query and set the crossover rate. 611 612=item I<$ga>-E<gt>B<mutProb>(?I<newProb>?) 613 614This method is used to query and set the mutation rate. 615 616=item I<$ga>-E<gt>B<indType>() 617 618This method returns the type of individual: I<bitvector>, I<listvector>, 619or I<rangevector>. 620 621=item I<$ga>-E<gt>B<generation>() 622 623This method returns the current generation. 624 625=back 626 627=head1 FITNESS FUNCTION 628 629Very quickly you will realize that properly defining the fitness function 630is the most important aspect of a GA. Most of the time that a genetic 631algorithm takes to run is spent in running the fitness function for each 632separate individual to get its fitness. AI::Genetic tries to minimize this 633time by caching the fitness result for each individual. But, B<you should 634spend a lot of time optimizing your fitness function to achieve decent run 635times.> 636 637The fitness function should expect only one argument, an anonymous list of 638genes, corresponding to the individual being analyzed. It is expected 639to return a number which defines the fitness score of the said individual. 640The higher the score, the more fit the individual, the more the chance it 641has to be chosen for crossover. 642 643=head1 STRATEGIES 644 645AI::Genetic comes with 9 predefined strategies. These are: 646 647=over 648 649=item rouletteSinglePoint 650 651This strategy implements roulette-wheel selection and single-point crossover. 652 653=item rouletteTwoPoint 654 655This strategy implements roulette-wheel selection and two-point crossover. 656 657=item rouletteUniform 658 659This strategy implements roulette-wheel selection and uniform crossover. 660 661=item tournamentSinglePoint 662 663This strategy implements tournament selection and single-point crossover. 664 665=item tournamentTwoPoint 666 667This strategy implements tournament selection and two-point crossover. 668 669=item tournamentUniform 670 671This strategy implements tournament selection and uniform crossover. 672 673=item randomSinglePoint 674 675This strategy implements random selection and single-point crossover. 676 677=item randomTwoPoint 678 679This strategy implements random selection and two-point crossover. 680 681=item randomUniform 682 683This strategy implements random selection and uniform crossover. 684 685=back 686 687More detail on these strategies and how to call them in your own 688custom strategies can be found in L<AI::Genetic::OpSelection>, 689L<AI::Genetic::OpCrossover> and L<AI::Genetic::OpMutation>. 690 691You can use the functions defined in the above modules in your 692own custom-made strategy. Consult their manpages for more info. 693A custom-made strategy can be defined using the I<strategy()> 694method and is called at the beginning of each generation. The only 695argument to it is the AI::Genetic object itself. Note that the 696population at this point is sorted accoring to each individual's 697fitness score. It is expected that the strategy sub will modify 698the population stored in the AI::Genetic object. Here's the 699pseudo-code of events: 700 701 for (1 .. num_generations) { 702 sort population; 703 call strategy_sub; 704 if (termination_sub exists) { 705 call termination_sub; 706 last if returned true value; 707 } 708 } 709 710=head1 A NOTE ON SPEED/EFFICIENCY 711 712Genetic algorithms are inherently slow. 713Perl can be pretty fast, but will never reach the speed of optimized 714C code (at least my Perl coding will not). I wrote AI::Genetic mainly 715for my own learning experience, but still tried to optimize it as 716much as I can while trying to keep it as flexible as possible. 717 718To do that, I resorted to some well-known tricks like passing a 719reference of a long list instead of the list itself (for example, 720when calling the fitness function, a reference of the gene list 721is passed), and caching fitness scores (if you try to evaluate 722the fitness of the same individual more than once, then the fitness 723function will not be called, and the cached result is returned). 724 725To help speed up your run times, you should pay special attention 726to the design of your fitness function since this will be called once 727for each unique individual in each generation. If you can shave off a 728few clock cycles here and there, then it will be greatly magnified in 729the total run time. 730 731=head1 BUGS 732 733I have tested this module quite a bit, and even used it to solve a 734work-related problem successfully. But, if you think you found a bug 735then please let me know, and I promise to look at it. 736 737Also, if you have any requests, comments or suggestions, then feel 738free to email me. 739 740=head1 INSTALLATION 741 742Either the usual: 743 744 perl Makefile.PL 745 make 746 make install 747 748or just stick it somewhere in @INC where perl can find it. It is in pure Perl. 749 750=head1 AUTHOR & CREDITS 751 752Written by Ala Qumsieh I<aqumsieh@cpan.org>. 753 754Special thanks go to John D. Porter and Oliver Smith for stimulating 755discussions and great suggestions. Daniel Martin and Ivan Tubert-Brohman 756uncovered various bugs and for this I'm grateful. 757 758=head1 COPYRIGHTS 759 760(c) 2003-2005 Ala Qumsieh. All rights reserved. 761This module is distributed under the same terms as Perl itself. 762 763=cut 764