1 /*
2 * cBGGenotypeManager.cc
3 * Avida
4 *
5 * Created by David on 11/11/09.
6 * Copyright 2009-2011 Michigan State University. All rights reserved.
7 *
8 *
9 * This file is part of Avida.
10 *
11 * Avida is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License
12 * as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
13 *
14 * Avida is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public License along with Avida.
18 * If not, see <http://www.gnu.org/licenses/>.
19 *
20 */
21
22 #include "cBGGenotypeManager.h"
23
24 #include "avida/core/Sequence.h"
25
26 #include "cBGGenotype.h"
27 #include "cDataFile.h"
28 #include "cStats.h"
29 #include "cStringUtil.h"
30 #include "cWorld.h"
31 #include "tArrayMap.h"
32 #include "tAutoRelease.h"
33 #include "tDataCommandManager.h"
34
35 using namespace Avida;
36 using namespace AvidaTools;
37
38
cBGGenotypeManager(cWorld * world)39 cBGGenotypeManager::cBGGenotypeManager(cWorld* world)
40 : m_world(world)
41 , m_active_sz(1)
42 , m_coalescent(NULL)
43 , m_best(0)
44 , m_next_id(1)
45 , m_dom_prev(-1)
46 , m_dom_time(0)
47 , m_dcm(NULL)
48 {
49 }
50
~cBGGenotypeManager()51 cBGGenotypeManager::~cBGGenotypeManager()
52 {
53 tAutoRelease<tIterator<cBGGenotype> > list_it(m_active_sz[0].Iterator());
54 while (list_it->Next() != NULL) {
55 assert(list_it->Get()->GetActiveReferenceCount() == 0);
56 removeGenotype(list_it->Get());
57 }
58
59 assert(m_historic.GetSize() == 0);
60 assert(m_best == 0);
61 delete m_dcm;
62 }
63
64
ClassifyNewBioUnit(cBioUnit * bu,tArrayMap<cString,cString> * hints)65 cBioGroup* cBGGenotypeManager::ClassifyNewBioUnit(cBioUnit* bu, tArrayMap<cString, cString>* hints) { return ClassifyNewBioUnit(bu, NULL, hints); }
66
67
UpdateReset()68 void cBGGenotypeManager::UpdateReset()
69 {
70 if (m_active_sz.GetSize() < nBGGenotypeManager::HASH_SIZE) {
71 for (int i = 0; i < m_active_sz.GetSize(); i++) {
72 tAutoRelease<tIterator<cBGGenotype> > list_it(m_active_sz[i].Iterator());
73 while (list_it->Next() != NULL) if (list_it->Get()->IsThreshold()) list_it->Get()->UpdateReset();
74 }
75 } else {
76 for (int i = 0; i < nBGGenotypeManager::HASH_SIZE; i++) {
77 tAutoRelease<tIterator<cBGGenotype> > list_it(m_active_hash[i].Iterator());
78 while (list_it->Next() != NULL) if (list_it->Get()->IsThreshold()) list_it->Get()->UpdateReset();
79 }
80 }
81
82 tAutoRelease<tIterator<cBGGenotype> > list_it(m_historic.Iterator());
83 while (list_it->Next() != NULL) if (!list_it->Get()->GetReferenceCount()) this->removeGenotype(list_it->Get());
84 }
85
86
UpdateStats(cStats & stats)87 void cBGGenotypeManager::UpdateStats(cStats& stats)
88 {
89 // @TODO - genotype manager should stash stats in cStats as a classification "role" stats object so that multiple roles can report stats
90
91 // Clear out genotype sums...
92 stats.SumGenotypeAge().Clear();
93 stats.SumAbundance().Clear();
94 stats.SumGenotypeDepth().Clear();
95 stats.SumSize().Clear();
96 stats.SumThresholdAge().Clear();
97
98 if(m_world->GetConfig().PRED_PREY_SWITCH.Get() == -2 || m_world->GetConfig().PRED_PREY_SWITCH.Get() > -1) {
99 stats.SumPreySize().Clear();
100 stats.SumPredSize().Clear();
101 }
102
103 double entropy = 0.0;
104 double prey_entropy = 0.0;
105 double pred_entropy = 0.0;
106 int active_count = 0;
107 for (int i = 1; i < m_active_sz.GetSize(); i++) {
108 active_count += m_active_sz[i].GetSize();
109 tAutoRelease<tIterator<cBGGenotype> > list_it(m_active_sz[i].Iterator());
110 while (list_it->Next() != NULL) {
111 cBGGenotype* bg = list_it->Get();
112 const int abundance = bg->GetNumUnits();
113
114 // Update stats...
115 const int age = stats.GetUpdate() - bg->GetUpdateBorn();
116 stats.SumGenotypeAge().Add(age, abundance);
117 stats.SumAbundance().Add(abundance);
118 stats.SumGenotypeDepth().Add(bg->GetDepth(), abundance);
119 stats.SumSize().Add(bg->GetGenome().GetSequence().GetSize(), abundance);
120
121 if(m_world->GetConfig().PRED_PREY_SWITCH.Get() == -2 || m_world->GetConfig().PRED_PREY_SWITCH.Get() > -1) {
122 if (bg->GetLastForagerType() !=-2) stats.SumPreySize().Add(bg->GetGenome().GetSequence().GetSize(), abundance);
123 else stats.SumPredSize().Add(bg->GetGenome().GetSequence().GetSize(), abundance);
124 }
125
126 // Calculate this genotype's contribution to entropy
127 // - when p = 1.0, partial_ent calculation would return -0.0. This may propagate
128 // to the output stage, but behavior is dependent on compiler used and optimization
129 // level. For consistent output, ensures that 0.0 is returned.
130 const double p = ((double) abundance) / (double) stats.GetNumCreatures();
131 const double partial_ent = (abundance == stats.GetNumCreatures()) ? 0.0 : -(p * Log(p));
132 entropy += partial_ent;
133
134 if (m_world->GetConfig().PRED_PREY_SWITCH.Get() == -2 || m_world->GetConfig().PRED_PREY_SWITCH.Get() > -1) {
135 if (bg->GetLastForagerType() > -2) {
136 const double prey_p = ((double) abundance) / (double) stats.GetNumPreyCreatures();
137 const double prey_partial_ent = (abundance == stats.GetNumPreyCreatures()) ? 0.0 : -(prey_p * Log(prey_p));
138 prey_entropy += prey_partial_ent;
139 }
140 else {
141 const double pred_p = ((double) abundance) / (double) stats.GetNumPredCreatures();
142 const double pred_partial_ent = (abundance == stats.GetNumPredCreatures()) ? 0.0 : -(pred_p * Log(pred_p));
143 pred_entropy += pred_partial_ent;
144 }
145 }
146
147 // Do any special calculations for threshold genotypes.
148 if (bg->IsThreshold()) stats.SumThresholdAge().Add(age, abundance);
149 }
150 }
151
152 stats.SetEntropy(entropy);
153 stats.SetNumGenotypes(active_count, m_historic.GetSize());
154
155 if (m_world->GetConfig().PRED_PREY_SWITCH.Get() == -2 || m_world->GetConfig().PRED_PREY_SWITCH.Get() > -1) {
156 stats.SetPreyEntropy(prey_entropy);
157 stats.SetPredEntropy(pred_entropy);
158 }
159
160 // Handle dominant genotype stats
161 cBGGenotype* dom_genotype = getBest();
162 if (dom_genotype == NULL) return;
163
164 stats.SetDomMerit(dom_genotype->GetMerit());
165 stats.SetDomGestation(dom_genotype->GetGestationTime());
166 stats.SetDomReproRate(dom_genotype->GetReproRate());
167 stats.SetDomFitness(dom_genotype->GetFitness());
168 stats.SetDomCopiedSize(dom_genotype->GetCopiedSize());
169 stats.SetDomExeSize(dom_genotype->GetExecutedSize());
170
171 stats.SetDomSize(dom_genotype->GetGenome().GetSequence().GetSize());
172 stats.SetDomID(dom_genotype->GetID());
173 stats.SetDomName(dom_genotype->GetName());
174
175 if (dom_genotype->IsThreshold()) {
176 stats.SetDomBirths(dom_genotype->GetLastBirths());
177 stats.SetDomBreedTrue(dom_genotype->GetLastBreedTrue());
178 stats.SetDomBreedIn(dom_genotype->GetLastBreedIn());
179 stats.SetDomBreedOut(dom_genotype->GetLastBreedOut());
180 } else {
181 stats.SetDomBirths(dom_genotype->GetThisBirths());
182 stats.SetDomBreedTrue(dom_genotype->GetThisBreedTrue());
183 stats.SetDomBreedIn(dom_genotype->GetThisBreedIn());
184 stats.SetDomBreedOut(dom_genotype->GetThisBreedOut());
185 }
186
187 stats.SetDomAbundance(dom_genotype->GetNumUnits());
188 stats.SetDomGeneDepth(dom_genotype->GetDepth());
189 stats.SetDomSequence(dom_genotype->GetGenome().GetSequence().AsString());
190
191 stats.SetDomLastBirthCell(dom_genotype->GetLastBirthCell());
192 stats.SetDomLastGroup(dom_genotype->GetLastGroupID());
193 stats.SetDomLastForagerType(dom_genotype->GetLastForagerType());
194
195 }
196
197
198
GetBioGroup(int bg_id)199 cBioGroup* cBGGenotypeManager::GetBioGroup(int bg_id)
200 {
201 for (int i = m_best; i >= 0; i--) {
202 tAutoRelease<tIterator<cBGGenotype> > list_it(m_active_sz[i].Iterator());
203 while (list_it->Next() != NULL) if (list_it->Get()->GetID() == bg_id) return list_it->Get();
204 }
205
206 tAutoRelease<tIterator<cBGGenotype> > list_it(m_historic.Iterator());
207 while (list_it->Next() != NULL) if (list_it->Get()->GetID() == bg_id) return list_it->Get();
208
209 return NULL;
210 }
211
212
LoadBioGroup(const tDictionary<cString> & props)213 cBioGroup* cBGGenotypeManager::LoadBioGroup(const tDictionary<cString>& props)
214 {
215 cBGGenotype* bg = new cBGGenotype(this, m_next_id++, props, m_world);
216 m_historic.Push(bg, &bg->m_handle);
217 return bg;
218 }
219
220
SaveBioGroups(cDataFile & df)221 void cBGGenotypeManager::SaveBioGroups(cDataFile& df)
222 {
223 // @TODO - Just dump historic for now. Need structured output format to support top down save
224 // With a structured save (and save params passed through), a "structured population save" could be attained
225 // by simply calling the bio group save. As it stands right now, cPopulation must decorate columns with additional
226 // data about active genotypes, yet the bio group interface really shouldn't know about active/inactive genotypes.
227 // Thus it is not proper to split bgm save into a save historic and save active. Right now we'll just make
228 // cPopulation do the work.
229
230 tAutoRelease<tIterator<cBGGenotype> > list_it(m_historic.Iterator());
231 while (list_it->Next() != NULL) {
232 list_it->Get()->Save(df);
233 df.Endl();
234 }
235 }
236
237
Iterator()238 tIterator<cBioGroup>* cBGGenotypeManager::Iterator()
239 {
240 return new cGenotypeIterator(this);
241 }
242
243
244
ClassifyNewBioUnit(cBioUnit * bu,tArray<cBioGroup * > * parents,tArrayMap<cString,cString> * hints)245 cBGGenotype* cBGGenotypeManager::ClassifyNewBioUnit(cBioUnit* bu, tArray<cBioGroup*>* parents, tArrayMap<cString, cString>* hints)
246 {
247 int list_num = hashGenome(bu->GetGenome().GetSequence());
248
249 cBGGenotype* found = NULL;
250
251 cString gid_str;
252 if (hints && hints->Get("id", gid_str)) {
253 int gid = gid_str.AsInt();
254
255 // Search all lists attempting to locate the referenced genotype by ID
256 for (int i = 0; i < m_active_sz.GetSize() && !found; i++) {
257 tAutoRelease<tIterator<cBGGenotype> > list_it(m_active_sz[i].Iterator());
258 while (list_it->Next() != NULL) {
259 if (list_it->Get()->GetID() == gid) {
260 found = list_it->Get();
261 found->NotifyNewBioUnit(bu);
262 break;
263 }
264 }
265 }
266
267 if (!found) {
268 tAutoRelease<tIterator<cBGGenotype> > list_it(m_historic.Iterator());
269 while (list_it->Next() != NULL) {
270 if (list_it->Get()->GetID() == gid) {
271 found = list_it->Get();
272 m_active_hash[hashGenome(found->GetGenome().GetSequence())].Push(found);
273 found->m_handle->Remove(); // Remove from historic list
274 m_active_sz[found->GetNumUnits()].PushRear(found, &found->m_handle);
275 found->NotifyNewBioUnit(bu);
276 m_world->GetStats().AddGenotype();
277 if (found->GetNumUnits() > m_best) {
278 m_best = found->GetNumUnits();
279 found->SetThreshold();
280 found->SetName(nameGenotype(found->GetGenome().GetSequence().GetSize()));
281 NotifyListeners(found, BG_EVENT_ADD_THRESHOLD);
282 }
283 }
284 }
285 }
286 }
287
288 // No hints or unable to locate hinted genome, search for a matching genotype
289 if (!found) {
290 tAutoRelease<tIterator<cBGGenotype> > list_it(m_active_hash[list_num].Iterator());
291 while (list_it->Next() != NULL) {
292 if (list_it->Get()->Matches(bu)) {
293 found = list_it->Get();
294 found->NotifyNewBioUnit(bu);
295 break;
296 }
297 }
298 }
299
300 // No matching genotype (hinted or otherwise), so create a new one
301 if (!found) {
302 //@CHC: If genotype classification is disabled, we don't want to keep track of the parents, or else
303 // we won't end up saving any memory
304 if (!m_world->GetConfig().DISABLE_GENOTYPE_CLASSIFICATION.Get()) { //It's enabled, so keep the parents
305 found = new cBGGenotype(this, m_next_id++, bu, m_world->GetStats().GetUpdate(), parents);
306 } else { //It's disabled, so toss the parents
307 found = new cBGGenotype(this, m_next_id++, bu, m_world->GetStats().GetUpdate(), NULL);
308 }
309 m_active_hash[list_num].Push(found);
310 resizeActiveList(found->GetNumUnits());
311 m_active_sz[found->GetNumUnits()].PushRear(found, &found->m_handle);
312 m_world->GetStats().AddGenotype();
313 if (found->GetNumUnits() > m_best) {
314 m_best = found->GetNumUnits();
315 found->SetThreshold();
316 found->SetName(nameGenotype(found->GetGenome().GetSequence().GetSize()));
317 NotifyListeners(found, BG_EVENT_ADD_THRESHOLD);
318 }
319 }
320
321 return found;
322 }
323
324
AdjustGenotype(cBGGenotype * genotype,int old_size,int new_size)325 void cBGGenotypeManager::AdjustGenotype(cBGGenotype* genotype, int old_size, int new_size)
326 {
327 // Remove from old size list
328 genotype->m_handle->Remove();
329 if (m_coalescent == genotype) m_coalescent = NULL;
330
331 // Handle best genotype pointer
332 bool was_best = (old_size && old_size == m_best);
333 if (was_best && m_active_sz[old_size].GetSize() == 0) {
334 for (m_best--; m_best > 0; m_best--) if (m_active_sz[m_best].GetSize()) break;
335 }
336
337 // Handle defunct genotypes
338 if (new_size == 0 && genotype->GetActiveReferenceCount() == 0) {
339 removeGenotype(genotype);
340 return;
341 }
342
343 // Add to new size list
344 resizeActiveList(new_size);
345 if (was_best && m_best == new_size) {
346 // Special case to keep the current best genotype as best when shrinking to the same size as other genotypes
347 m_active_sz[new_size].Push(genotype, &genotype->m_handle);
348 } else {
349 m_active_sz[new_size].PushRear(genotype, &genotype->m_handle);
350 if (new_size > m_best) m_best = new_size;
351 }
352
353 if (!genotype->IsThreshold() && (new_size >= m_world->GetConfig().THRESHOLD.Get() || genotype == getBest())) {
354 genotype->SetThreshold();
355 genotype->SetName(nameGenotype(genotype->GetGenome().GetSequence().GetSize()));
356 NotifyListeners(genotype, BG_EVENT_ADD_THRESHOLD);
357 }
358 }
359
360
361
GetBioGroupPropertyList() const362 const tArray<cString>& cBGGenotypeManager::GetBioGroupPropertyList() const
363 {
364 if (!m_dcm) buildDataCommandManager();
365 return m_dcm->GetEntryNames();
366 }
367
BioGroupHasProperty(const cString & prop) const368 bool cBGGenotypeManager::BioGroupHasProperty(const cString& prop) const
369 {
370 if (!m_dcm) buildDataCommandManager();
371 tAutoRelease<tDataEntryCommand<cBGGenotype> > dc(m_dcm->GetDataCommand(prop));
372 return (!dc.IsNull());
373 }
374
GetBioGroupProperty(const cBGGenotype * genotype,const cString & prop) const375 cFlexVar cBGGenotypeManager::GetBioGroupProperty(const cBGGenotype* genotype, const cString& prop) const
376 {
377 if (!m_dcm) buildDataCommandManager();
378 tAutoRelease<tDataEntryCommand<cBGGenotype> > dc(m_dcm->GetDataCommand(prop));
379
380 if (!dc.IsNull()) return dc->GetValue(genotype);
381
382 return cFlexVar();
383 }
384
385
386
hashGenome(const Sequence & genome) const387 unsigned int cBGGenotypeManager::hashGenome(const Sequence& genome) const
388 {
389 unsigned int total = 0;
390
391 for (int i = 0; i < genome.GetSize(); i++) {
392 total += (genome[i].GetOp() + 3) * i;
393 }
394
395 return total % nBGGenotypeManager::HASH_SIZE;
396 }
397
nameGenotype(int size)398 cString cBGGenotypeManager::nameGenotype(int size)
399 {
400 if (m_sz_count.GetSize() <= size) m_sz_count.Resize(size + 1, 0);
401 int num = m_sz_count[size]++;
402
403 char alpha[6];
404
405 for (int i = 4; i >= 0; i--) {
406 alpha[i] = (num % 26) + 'a';
407 num /= 26;
408 }
409 alpha[5] = '\0';
410
411 return cStringUtil::Stringf("%03d-%s", size, alpha);
412 }
413
removeGenotype(cBGGenotype * genotype)414 void cBGGenotypeManager::removeGenotype(cBGGenotype* genotype)
415 {
416 if (genotype->GetActiveReferenceCount()) return;
417
418 if (genotype->IsActive()) {
419 int list_num = hashGenome(genotype->GetGenome().GetSequence());
420 m_active_hash[list_num].Remove(genotype);
421 genotype->Deactivate(m_world->GetStats().GetUpdate());
422 //@CHC: If classification of historical genotypes is turned off, then we'll
423 // just skip the step of adding a removed genotype to the historic list.
424 // Note: the list will stay empty and the reported total number of genotypes
425 // will not be correct
426 if (!m_world->GetConfig().DISABLE_GENOTYPE_CLASSIFICATION.Get()) {
427 m_historic.Push(genotype, &genotype->m_handle);
428 }
429 }
430
431 if (genotype->IsThreshold()) {
432 NotifyListeners(genotype, BG_EVENT_REMOVE_THRESHOLD);
433 genotype->ClearThreshold();
434 }
435
436 if (genotype->GetPassiveReferenceCount()) return;
437
438 const tArray<cBGGenotype*>& parents = genotype->GetParents();
439 for (int i = 0; i < parents.GetSize(); i++) {
440 parents[i]->RemovePassiveReference();
441 updateCoalescent();
442
443 // Pre-check for active genotypes to avoid recursion costs
444 if (!parents[i]->GetActiveReferenceCount()) removeGenotype(parents[i]);
445 }
446
447 assert(genotype->m_handle);
448 genotype->m_handle->Remove(); // Remove from historic list
449 delete genotype;
450 }
451
updateCoalescent()452 void cBGGenotypeManager::updateCoalescent()
453 {
454 if (m_coalescent && (m_coalescent->GetActiveReferenceCount() > 0 || m_coalescent->GetPassiveReferenceCount() > 1)) return;
455
456 if (m_best == 0) {
457 m_coalescent = NULL;
458 m_world->GetStats().SetCoalescentGenotypeDepth(-1);
459 return;
460 }
461
462 // @TODO - update coalescent assumes asexual population
463 cBGGenotype* test_gen = getBest();
464 cBGGenotype* found_gen = test_gen;
465 cBGGenotype* parent_gen = (found_gen->GetParents().GetSize()) ? found_gen->GetParents()[0] : NULL;
466
467 while (parent_gen != NULL) {
468 if (test_gen->GetActiveReferenceCount() > 0 || test_gen->GetPassiveReferenceCount() > 1) found_gen = test_gen;
469
470 test_gen = parent_gen;
471 parent_gen = (test_gen->GetParents().GetSize()) ? test_gen->GetParents()[0] : NULL;
472 }
473
474 m_coalescent = found_gen;
475 m_world->GetStats().SetCoalescentGenotypeDepth(m_coalescent->GetDepth());
476 }
477
buildDataCommandManager() const478 void cBGGenotypeManager::buildDataCommandManager() const
479 {
480 m_dcm = new tDataCommandManager<cBGGenotype>;
481
482 #define ADD_PROP(NAME, TYPE, GET, DESC) \
483 m_dcm->Add(NAME, new tDataEntryOfType<cBGGenotype, TYPE>(NAME, DESC, &cBGGenotype::GET));
484
485 ADD_PROP("genome", cString (), GetGenomeString, "Genome");
486 ADD_PROP("name", const cString& (), GetName, "Name");
487 ADD_PROP("parents", const cString& (), GetParentString, "Parents");
488 ADD_PROP("threshold", bool (), IsThreshold, "Threshold");
489 ADD_PROP("update_born", int (), GetUpdateBorn, "Update Born");
490 ADD_PROP("fitness", double (), GetFitness, "Average Fitness");
491 ADD_PROP("repro_rate", double (), GetReproRate, "Repro Rate");
492 ADD_PROP("recent_births", int (), GetThisBirths, "Recent Births (during update)");
493 ADD_PROP("recent_deaths", int (), GetThisDeaths, "Recent Deaths (during update)");
494 ADD_PROP("recent_breed_true", int (), GetThisBreedTrue, "Recent Breed True (during update)");
495 ADD_PROP("recent_breed_in", int (), GetThisBreedIn, "Recent Breed In (during update)");
496 ADD_PROP("recent_breed_out", int (), GetThisBreedOut, "Recent Breed Out (during update)");
497 ADD_PROP("total_organisms", int (), GetTotalOrganisms, "Total Organisms");
498 ADD_PROP("last_births", int (), GetLastBirths, "Births (during last update)");
499 ADD_PROP("last_breed_true", int (), GetLastBreedTrue, "Breed True (during last update)");
500 ADD_PROP("last_breed_in", int (), GetLastBreedIn, "Breed In (during last update)");
501 ADD_PROP("last_breed_out", int (), GetLastBreedOut, "Breed Out (during last update)");
502 ADD_PROP("last_birth_cell", int (), GetLastBirthCell, "Last birth cell");
503 ADD_PROP("last_group_id", int (), GetLastGroupID, "Last birth group");
504 ADD_PROP("last_forager_type", int (), GetLastForagerType, "Last birth forager type");
505 }
506
Get()507 cBioGroup* cBGGenotypeManager::cGenotypeIterator::Get() { return m_it->Get(); }
508
509
Next()510 cBioGroup* cBGGenotypeManager::cGenotypeIterator::Next()
511 {
512 if (!m_it->Next()) {
513 for (m_sz_i--; m_sz_i > 0; m_sz_i--) {
514 if (m_bgm->m_active_sz[m_sz_i].GetSize()) {
515 delete m_it;
516 m_it = m_bgm->m_active_sz[m_sz_i].Iterator();
517 m_it->Next();
518 break;
519 }
520 }
521 }
522
523 return m_it->Get();
524 }
525