1 /*
2  *  cBGGenotypeManager.cc
3  *  Avida
4  *
5  *  Created by David on 11/11/09.
6  *  Copyright 2009-2011 Michigan State University. All rights reserved.
7  *
8  *
9  *  This file is part of Avida.
10  *
11  *  Avida is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License
12  *  as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
13  *
14  *  Avida is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
15  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for more details.
16  *
17  *  You should have received a copy of the GNU Lesser General Public License along with Avida.
18  *  If not, see <http://www.gnu.org/licenses/>.
19  *
20  */
21 
22 #include "cBGGenotypeManager.h"
23 
24 #include "avida/core/Sequence.h"
25 
26 #include "cBGGenotype.h"
27 #include "cDataFile.h"
28 #include "cStats.h"
29 #include "cStringUtil.h"
30 #include "cWorld.h"
31 #include "tArrayMap.h"
32 #include "tAutoRelease.h"
33 #include "tDataCommandManager.h"
34 
35 using namespace Avida;
36 using namespace AvidaTools;
37 
38 
cBGGenotypeManager(cWorld * world)39 cBGGenotypeManager::cBGGenotypeManager(cWorld* world)
40   : m_world(world)
41   , m_active_sz(1)
42   , m_coalescent(NULL)
43   , m_best(0)
44   , m_next_id(1)
45   , m_dom_prev(-1)
46   , m_dom_time(0)
47   , m_dcm(NULL)
48 {
49 }
50 
~cBGGenotypeManager()51 cBGGenotypeManager::~cBGGenotypeManager()
52 {
53   tAutoRelease<tIterator<cBGGenotype> > list_it(m_active_sz[0].Iterator());
54   while (list_it->Next() != NULL) {
55     assert(list_it->Get()->GetActiveReferenceCount() == 0);
56     removeGenotype(list_it->Get());
57   }
58 
59   assert(m_historic.GetSize() == 0);
60   assert(m_best == 0);
61   delete m_dcm;
62 }
63 
64 
ClassifyNewBioUnit(cBioUnit * bu,tArrayMap<cString,cString> * hints)65 cBioGroup* cBGGenotypeManager::ClassifyNewBioUnit(cBioUnit* bu, tArrayMap<cString, cString>* hints) { return ClassifyNewBioUnit(bu, NULL, hints); }
66 
67 
UpdateReset()68 void cBGGenotypeManager::UpdateReset()
69 {
70   if (m_active_sz.GetSize() < nBGGenotypeManager::HASH_SIZE) {
71     for (int i = 0; i < m_active_sz.GetSize(); i++) {
72       tAutoRelease<tIterator<cBGGenotype> > list_it(m_active_sz[i].Iterator());
73       while (list_it->Next() != NULL) if (list_it->Get()->IsThreshold()) list_it->Get()->UpdateReset();
74     }
75   } else {
76     for (int i = 0; i < nBGGenotypeManager::HASH_SIZE; i++) {
77       tAutoRelease<tIterator<cBGGenotype> > list_it(m_active_hash[i].Iterator());
78       while (list_it->Next() != NULL) if (list_it->Get()->IsThreshold()) list_it->Get()->UpdateReset();
79     }
80   }
81 
82   tAutoRelease<tIterator<cBGGenotype> > list_it(m_historic.Iterator());
83   while (list_it->Next() != NULL) if (!list_it->Get()->GetReferenceCount()) this->removeGenotype(list_it->Get());
84 }
85 
86 
UpdateStats(cStats & stats)87 void cBGGenotypeManager::UpdateStats(cStats& stats)
88 {
89   // @TODO - genotype manager should stash stats in cStats as a classification "role" stats object so that multiple roles can report stats
90 
91   // Clear out genotype sums...
92   stats.SumGenotypeAge().Clear();
93   stats.SumAbundance().Clear();
94   stats.SumGenotypeDepth().Clear();
95   stats.SumSize().Clear();
96   stats.SumThresholdAge().Clear();
97 
98   if(m_world->GetConfig().PRED_PREY_SWITCH.Get() == -2 || m_world->GetConfig().PRED_PREY_SWITCH.Get() > -1) {
99     stats.SumPreySize().Clear();
100     stats.SumPredSize().Clear();
101   }
102 
103   double entropy = 0.0;
104   double prey_entropy = 0.0;
105   double pred_entropy = 0.0;
106   int active_count = 0;
107   for (int i = 1; i < m_active_sz.GetSize(); i++) {
108     active_count += m_active_sz[i].GetSize();
109     tAutoRelease<tIterator<cBGGenotype> > list_it(m_active_sz[i].Iterator());
110     while (list_it->Next() != NULL) {
111       cBGGenotype* bg = list_it->Get();
112       const int abundance = bg->GetNumUnits();
113 
114       // Update stats...
115       const int age = stats.GetUpdate() - bg->GetUpdateBorn();
116       stats.SumGenotypeAge().Add(age, abundance);
117       stats.SumAbundance().Add(abundance);
118       stats.SumGenotypeDepth().Add(bg->GetDepth(), abundance);
119       stats.SumSize().Add(bg->GetGenome().GetSequence().GetSize(), abundance);
120 
121       if(m_world->GetConfig().PRED_PREY_SWITCH.Get() == -2 || m_world->GetConfig().PRED_PREY_SWITCH.Get() > -1) {
122         if (bg->GetLastForagerType() !=-2) stats.SumPreySize().Add(bg->GetGenome().GetSequence().GetSize(), abundance);
123         else stats.SumPredSize().Add(bg->GetGenome().GetSequence().GetSize(), abundance);
124       }
125 
126       // Calculate this genotype's contribution to entropy
127       // - when p = 1.0, partial_ent calculation would return -0.0. This may propagate
128       //   to the output stage, but behavior is dependent on compiler used and optimization
129       //   level.  For consistent output, ensures that 0.0 is returned.
130       const double p = ((double) abundance) / (double) stats.GetNumCreatures();
131       const double partial_ent = (abundance == stats.GetNumCreatures()) ? 0.0 : -(p * Log(p));
132       entropy += partial_ent;
133 
134       if (m_world->GetConfig().PRED_PREY_SWITCH.Get() == -2 || m_world->GetConfig().PRED_PREY_SWITCH.Get() > -1) {
135         if (bg->GetLastForagerType() > -2) {
136           const double prey_p = ((double) abundance) / (double) stats.GetNumPreyCreatures();
137           const double prey_partial_ent = (abundance == stats.GetNumPreyCreatures()) ? 0.0 : -(prey_p * Log(prey_p));
138           prey_entropy += prey_partial_ent;
139         }
140         else {
141           const double pred_p = ((double) abundance) / (double) stats.GetNumPredCreatures();
142           const double pred_partial_ent = (abundance == stats.GetNumPredCreatures()) ? 0.0 : -(pred_p * Log(pred_p));
143           pred_entropy += pred_partial_ent;
144         }
145       }
146 
147       // Do any special calculations for threshold genotypes.
148       if (bg->IsThreshold()) stats.SumThresholdAge().Add(age, abundance);
149     }
150   }
151 
152   stats.SetEntropy(entropy);
153   stats.SetNumGenotypes(active_count, m_historic.GetSize());
154 
155   if (m_world->GetConfig().PRED_PREY_SWITCH.Get() == -2 || m_world->GetConfig().PRED_PREY_SWITCH.Get() > -1) {
156     stats.SetPreyEntropy(prey_entropy);
157     stats.SetPredEntropy(pred_entropy);
158   }
159 
160   // Handle dominant genotype stats
161   cBGGenotype* dom_genotype = getBest();
162   if (dom_genotype == NULL) return;
163 
164   stats.SetDomMerit(dom_genotype->GetMerit());
165   stats.SetDomGestation(dom_genotype->GetGestationTime());
166   stats.SetDomReproRate(dom_genotype->GetReproRate());
167   stats.SetDomFitness(dom_genotype->GetFitness());
168   stats.SetDomCopiedSize(dom_genotype->GetCopiedSize());
169   stats.SetDomExeSize(dom_genotype->GetExecutedSize());
170 
171   stats.SetDomSize(dom_genotype->GetGenome().GetSequence().GetSize());
172   stats.SetDomID(dom_genotype->GetID());
173   stats.SetDomName(dom_genotype->GetName());
174 
175   if (dom_genotype->IsThreshold()) {
176     stats.SetDomBirths(dom_genotype->GetLastBirths());
177     stats.SetDomBreedTrue(dom_genotype->GetLastBreedTrue());
178     stats.SetDomBreedIn(dom_genotype->GetLastBreedIn());
179     stats.SetDomBreedOut(dom_genotype->GetLastBreedOut());
180   } else {
181     stats.SetDomBirths(dom_genotype->GetThisBirths());
182     stats.SetDomBreedTrue(dom_genotype->GetThisBreedTrue());
183     stats.SetDomBreedIn(dom_genotype->GetThisBreedIn());
184     stats.SetDomBreedOut(dom_genotype->GetThisBreedOut());
185   }
186 
187   stats.SetDomAbundance(dom_genotype->GetNumUnits());
188   stats.SetDomGeneDepth(dom_genotype->GetDepth());
189   stats.SetDomSequence(dom_genotype->GetGenome().GetSequence().AsString());
190 
191   stats.SetDomLastBirthCell(dom_genotype->GetLastBirthCell());
192   stats.SetDomLastGroup(dom_genotype->GetLastGroupID());
193   stats.SetDomLastForagerType(dom_genotype->GetLastForagerType());
194 
195 }
196 
197 
198 
GetBioGroup(int bg_id)199 cBioGroup* cBGGenotypeManager::GetBioGroup(int bg_id)
200 {
201   for (int i = m_best; i >= 0; i--) {
202     tAutoRelease<tIterator<cBGGenotype> > list_it(m_active_sz[i].Iterator());
203     while (list_it->Next() != NULL) if (list_it->Get()->GetID() == bg_id) return list_it->Get();
204   }
205 
206   tAutoRelease<tIterator<cBGGenotype> > list_it(m_historic.Iterator());
207   while (list_it->Next() != NULL) if (list_it->Get()->GetID() == bg_id) return list_it->Get();
208 
209   return NULL;
210 }
211 
212 
LoadBioGroup(const tDictionary<cString> & props)213 cBioGroup* cBGGenotypeManager::LoadBioGroup(const tDictionary<cString>& props)
214 {
215   cBGGenotype* bg = new cBGGenotype(this, m_next_id++, props, m_world);
216   m_historic.Push(bg, &bg->m_handle);
217   return bg;
218 }
219 
220 
SaveBioGroups(cDataFile & df)221 void cBGGenotypeManager::SaveBioGroups(cDataFile& df)
222 {
223   // @TODO - Just dump historic for now.  Need structured output format to support top down save
224   //         With a structured save (and save params passed through), a "structured population save" could be attained
225   //         by simply calling the bio group save.  As it stands right now, cPopulation must decorate columns with additional
226   //         data about active genotypes, yet the bio group interface really shouldn't know about active/inactive genotypes.
227   //         Thus it is not proper to split bgm save into a save historic and save active.  Right now we'll just make
228   //         cPopulation do the work.
229 
230   tAutoRelease<tIterator<cBGGenotype> > list_it(m_historic.Iterator());
231   while (list_it->Next() != NULL) {
232     list_it->Get()->Save(df);
233     df.Endl();
234   }
235 }
236 
237 
Iterator()238 tIterator<cBioGroup>* cBGGenotypeManager::Iterator()
239 {
240   return new cGenotypeIterator(this);
241 }
242 
243 
244 
ClassifyNewBioUnit(cBioUnit * bu,tArray<cBioGroup * > * parents,tArrayMap<cString,cString> * hints)245 cBGGenotype* cBGGenotypeManager::ClassifyNewBioUnit(cBioUnit* bu, tArray<cBioGroup*>* parents, tArrayMap<cString, cString>* hints)
246 {
247   int list_num = hashGenome(bu->GetGenome().GetSequence());
248 
249   cBGGenotype* found = NULL;
250 
251   cString gid_str;
252   if (hints && hints->Get("id", gid_str)) {
253     int gid = gid_str.AsInt();
254 
255     // Search all lists attempting to locate the referenced genotype by ID
256     for (int i = 0; i < m_active_sz.GetSize() && !found; i++) {
257       tAutoRelease<tIterator<cBGGenotype> > list_it(m_active_sz[i].Iterator());
258       while (list_it->Next() != NULL) {
259         if (list_it->Get()->GetID() == gid) {
260           found = list_it->Get();
261           found->NotifyNewBioUnit(bu);
262           break;
263         }
264       }
265     }
266 
267     if (!found) {
268       tAutoRelease<tIterator<cBGGenotype> > list_it(m_historic.Iterator());
269       while (list_it->Next() != NULL) {
270         if (list_it->Get()->GetID() == gid) {
271           found = list_it->Get();
272           m_active_hash[hashGenome(found->GetGenome().GetSequence())].Push(found);
273           found->m_handle->Remove(); // Remove from historic list
274           m_active_sz[found->GetNumUnits()].PushRear(found, &found->m_handle);
275           found->NotifyNewBioUnit(bu);
276           m_world->GetStats().AddGenotype();
277           if (found->GetNumUnits() > m_best) {
278             m_best = found->GetNumUnits();
279             found->SetThreshold();
280             found->SetName(nameGenotype(found->GetGenome().GetSequence().GetSize()));
281             NotifyListeners(found, BG_EVENT_ADD_THRESHOLD);
282           }
283         }
284       }
285     }
286   }
287 
288   // No hints or unable to locate hinted genome, search for a matching genotype
289   if (!found) {
290     tAutoRelease<tIterator<cBGGenotype> > list_it(m_active_hash[list_num].Iterator());
291     while (list_it->Next() != NULL) {
292       if (list_it->Get()->Matches(bu)) {
293         found = list_it->Get();
294         found->NotifyNewBioUnit(bu);
295         break;
296       }
297     }
298   }
299 
300   // No matching genotype (hinted or otherwise), so create a new one
301   if (!found) {
302     //@CHC: If genotype classification is disabled, we don't want to keep track of the parents, or else
303     //      we won't end up saving any memory
304     if (!m_world->GetConfig().DISABLE_GENOTYPE_CLASSIFICATION.Get()) { //It's enabled, so keep the parents
305       found = new cBGGenotype(this, m_next_id++, bu, m_world->GetStats().GetUpdate(), parents);
306     } else { //It's disabled, so toss the parents
307       found = new cBGGenotype(this, m_next_id++, bu, m_world->GetStats().GetUpdate(), NULL);
308     }
309     m_active_hash[list_num].Push(found);
310     resizeActiveList(found->GetNumUnits());
311     m_active_sz[found->GetNumUnits()].PushRear(found, &found->m_handle);
312     m_world->GetStats().AddGenotype();
313     if (found->GetNumUnits() > m_best) {
314       m_best = found->GetNumUnits();
315       found->SetThreshold();
316       found->SetName(nameGenotype(found->GetGenome().GetSequence().GetSize()));
317       NotifyListeners(found, BG_EVENT_ADD_THRESHOLD);
318     }
319   }
320 
321   return found;
322 }
323 
324 
AdjustGenotype(cBGGenotype * genotype,int old_size,int new_size)325 void cBGGenotypeManager::AdjustGenotype(cBGGenotype* genotype, int old_size, int new_size)
326 {
327   // Remove from old size list
328   genotype->m_handle->Remove();
329   if (m_coalescent == genotype) m_coalescent = NULL;
330 
331   // Handle best genotype pointer
332   bool was_best = (old_size && old_size == m_best);
333   if (was_best && m_active_sz[old_size].GetSize() == 0) {
334     for (m_best--; m_best > 0; m_best--) if (m_active_sz[m_best].GetSize()) break;
335   }
336 
337   // Handle defunct genotypes
338   if (new_size == 0 && genotype->GetActiveReferenceCount() == 0) {
339     removeGenotype(genotype);
340     return;
341   }
342 
343   // Add to new size list
344   resizeActiveList(new_size);
345   if (was_best && m_best == new_size) {
346     // Special case to keep the current best genotype as best when shrinking to the same size as other genotypes
347     m_active_sz[new_size].Push(genotype, &genotype->m_handle);
348   } else {
349     m_active_sz[new_size].PushRear(genotype, &genotype->m_handle);
350     if (new_size > m_best) m_best = new_size;
351   }
352 
353   if (!genotype->IsThreshold() && (new_size >= m_world->GetConfig().THRESHOLD.Get() || genotype == getBest())) {
354     genotype->SetThreshold();
355     genotype->SetName(nameGenotype(genotype->GetGenome().GetSequence().GetSize()));
356     NotifyListeners(genotype, BG_EVENT_ADD_THRESHOLD);
357   }
358 }
359 
360 
361 
GetBioGroupPropertyList() const362 const tArray<cString>& cBGGenotypeManager::GetBioGroupPropertyList() const
363 {
364   if (!m_dcm) buildDataCommandManager();
365   return m_dcm->GetEntryNames();
366 }
367 
BioGroupHasProperty(const cString & prop) const368 bool cBGGenotypeManager::BioGroupHasProperty(const cString& prop) const
369 {
370   if (!m_dcm) buildDataCommandManager();
371   tAutoRelease<tDataEntryCommand<cBGGenotype> > dc(m_dcm->GetDataCommand(prop));
372   return (!dc.IsNull());
373 }
374 
GetBioGroupProperty(const cBGGenotype * genotype,const cString & prop) const375 cFlexVar cBGGenotypeManager::GetBioGroupProperty(const cBGGenotype* genotype, const cString& prop) const
376 {
377   if (!m_dcm) buildDataCommandManager();
378   tAutoRelease<tDataEntryCommand<cBGGenotype> > dc(m_dcm->GetDataCommand(prop));
379 
380   if (!dc.IsNull()) return dc->GetValue(genotype);
381 
382   return cFlexVar();
383 }
384 
385 
386 
hashGenome(const Sequence & genome) const387 unsigned int cBGGenotypeManager::hashGenome(const Sequence& genome) const
388 {
389   unsigned int total = 0;
390 
391   for (int i = 0; i < genome.GetSize(); i++) {
392     total += (genome[i].GetOp() + 3) * i;
393   }
394 
395   return total % nBGGenotypeManager::HASH_SIZE;
396 }
397 
nameGenotype(int size)398 cString cBGGenotypeManager::nameGenotype(int size)
399 {
400   if (m_sz_count.GetSize() <= size) m_sz_count.Resize(size + 1, 0);
401   int num = m_sz_count[size]++;
402 
403   char alpha[6];
404 
405   for (int i = 4; i >= 0; i--) {
406     alpha[i] = (num % 26) + 'a';
407     num /= 26;
408   }
409   alpha[5] = '\0';
410 
411   return cStringUtil::Stringf("%03d-%s", size, alpha);
412 }
413 
removeGenotype(cBGGenotype * genotype)414 void cBGGenotypeManager::removeGenotype(cBGGenotype* genotype)
415 {
416   if (genotype->GetActiveReferenceCount()) return;
417 
418   if (genotype->IsActive()) {
419     int list_num = hashGenome(genotype->GetGenome().GetSequence());
420     m_active_hash[list_num].Remove(genotype);
421     genotype->Deactivate(m_world->GetStats().GetUpdate());
422     //@CHC: If classification of historical genotypes is turned off, then we'll
423     //      just skip the step of adding a removed genotype to the historic list.
424     //      Note: the list will stay empty and the reported total number of genotypes
425     //      will not be correct
426     if (!m_world->GetConfig().DISABLE_GENOTYPE_CLASSIFICATION.Get()) {
427       m_historic.Push(genotype, &genotype->m_handle);
428     }
429   }
430 
431   if (genotype->IsThreshold()) {
432     NotifyListeners(genotype, BG_EVENT_REMOVE_THRESHOLD);
433     genotype->ClearThreshold();
434   }
435 
436   if (genotype->GetPassiveReferenceCount()) return;
437 
438   const tArray<cBGGenotype*>& parents = genotype->GetParents();
439   for (int i = 0; i < parents.GetSize(); i++) {
440     parents[i]->RemovePassiveReference();
441     updateCoalescent();
442 
443     // Pre-check for active genotypes to avoid recursion costs
444     if (!parents[i]->GetActiveReferenceCount()) removeGenotype(parents[i]);
445   }
446 
447   assert(genotype->m_handle);
448   genotype->m_handle->Remove(); // Remove from historic list
449   delete genotype;
450 }
451 
updateCoalescent()452 void cBGGenotypeManager::updateCoalescent()
453 {
454   if (m_coalescent && (m_coalescent->GetActiveReferenceCount() > 0 || m_coalescent->GetPassiveReferenceCount() > 1)) return;
455 
456   if (m_best == 0) {
457     m_coalescent = NULL;
458     m_world->GetStats().SetCoalescentGenotypeDepth(-1);
459     return;
460   }
461 
462   // @TODO - update coalescent assumes asexual population
463   cBGGenotype* test_gen = getBest();
464   cBGGenotype* found_gen = test_gen;
465   cBGGenotype* parent_gen = (found_gen->GetParents().GetSize()) ? found_gen->GetParents()[0] : NULL;
466 
467   while (parent_gen != NULL) {
468     if (test_gen->GetActiveReferenceCount() > 0 || test_gen->GetPassiveReferenceCount() > 1) found_gen = test_gen;
469 
470     test_gen = parent_gen;
471     parent_gen = (test_gen->GetParents().GetSize()) ? test_gen->GetParents()[0] : NULL;
472   }
473 
474   m_coalescent = found_gen;
475   m_world->GetStats().SetCoalescentGenotypeDepth(m_coalescent->GetDepth());
476 }
477 
buildDataCommandManager() const478 void cBGGenotypeManager::buildDataCommandManager() const
479 {
480   m_dcm = new tDataCommandManager<cBGGenotype>;
481 
482 #define ADD_PROP(NAME, TYPE, GET, DESC) \
483   m_dcm->Add(NAME, new tDataEntryOfType<cBGGenotype, TYPE>(NAME, DESC, &cBGGenotype::GET));
484 
485   ADD_PROP("genome", cString (), GetGenomeString, "Genome");
486   ADD_PROP("name", const cString& (), GetName, "Name");
487   ADD_PROP("parents", const cString& (), GetParentString, "Parents");
488   ADD_PROP("threshold", bool (), IsThreshold, "Threshold");
489   ADD_PROP("update_born", int (), GetUpdateBorn, "Update Born");
490   ADD_PROP("fitness", double (), GetFitness, "Average Fitness");
491   ADD_PROP("repro_rate", double (), GetReproRate, "Repro Rate");
492   ADD_PROP("recent_births", int (), GetThisBirths, "Recent Births (during update)");
493   ADD_PROP("recent_deaths", int (), GetThisDeaths, "Recent Deaths (during update)");
494   ADD_PROP("recent_breed_true", int (), GetThisBreedTrue, "Recent Breed True (during update)");
495   ADD_PROP("recent_breed_in", int (), GetThisBreedIn, "Recent Breed In (during update)");
496   ADD_PROP("recent_breed_out", int (), GetThisBreedOut, "Recent Breed Out (during update)");
497   ADD_PROP("total_organisms", int (), GetTotalOrganisms, "Total Organisms");
498   ADD_PROP("last_births", int (), GetLastBirths, "Births (during last update)");
499   ADD_PROP("last_breed_true", int (), GetLastBreedTrue, "Breed True (during last update)");
500   ADD_PROP("last_breed_in", int (), GetLastBreedIn, "Breed In (during last update)");
501   ADD_PROP("last_breed_out", int (), GetLastBreedOut, "Breed Out (during last update)");
502   ADD_PROP("last_birth_cell", int (), GetLastBirthCell, "Last birth cell");
503   ADD_PROP("last_group_id", int (), GetLastGroupID, "Last birth group");
504   ADD_PROP("last_forager_type", int (), GetLastForagerType, "Last birth forager type");
505 }
506 
Get()507 cBioGroup* cBGGenotypeManager::cGenotypeIterator::Get() { return m_it->Get(); }
508 
509 
Next()510 cBioGroup* cBGGenotypeManager::cGenotypeIterator::Next()
511 {
512   if (!m_it->Next()) {
513     for (m_sz_i--; m_sz_i > 0; m_sz_i--) {
514       if (m_bgm->m_active_sz[m_sz_i].GetSize()) {
515         delete m_it;
516         m_it = m_bgm->m_active_sz[m_sz_i].Iterator();
517         m_it->Next();
518         break;
519       }
520     }
521   }
522 
523   return m_it->Get();
524 }
525