1 /*
2 * cAnalyzeGenotype.cc
3 * Avida
4 *
5 * Called "analyze_genotype.cc" prior to 12/2/05.
6 * Copyright 1999-2011 Michigan State University. All rights reserved.
7 * Copyright 1993-2003 California Institute of Technology.
8 *
9 *
10 * This file is part of Avida.
11 *
12 * Avida is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
14 *
15 * Avida is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details.
17 *
18 * You should have received a copy of the GNU Lesser General Public License along with Avida.
19 * If not, see <http://www.gnu.org/licenses/>.
20 *
21 */
22
23 #include "cAnalyzeGenotype.h"
24
25 #include "avida/core/WorldDriver.h"
26
27 #include "cAvidaContext.h"
28 #include "cCPUTestInfo.h"
29 #include "cHardwareBase.h"
30 #include "cHardwareManager.h"
31 #include "cInstSet.h"
32 #include "cOrganism.h"
33 #include "cPhenotype.h"
34 #include "cPhenPlastGenotype.h"
35 #include "cPlasticPhenotype.h"
36 #include "cTestCPU.h"
37 #include "cEnvironment.h"
38 #include "cHardwareManager.h"
39 #include "cWorld.h"
40
41 #include "tArray.h"
42 #include "tAutoRelease.h"
43 #include "tDataCommandManager.h"
44 #include "tDMSingleton.h"
45
46
47 #include <cmath>
48 using namespace std;
49 using namespace Avida;
50
51
cAnalyzeGenotype(cWorld * world,const Genome & genome)52 cAnalyzeGenotype::cAnalyzeGenotype(cWorld* world, const Genome& genome)
53 : m_world(world)
54 , m_genome(genome)
55 , name("")
56 , m_cpu_test_info()
57 , m_data(new sGenotypeDatastore)
58 , aligned_sequence("")
59 , tag("")
60 , viable(false)
61 , id_num(-1)
62 , parent_id(-1)
63 , parent2_id(-1)
64 , num_cpus(0)
65 , total_cpus(0)
66 , update_born(0)
67 , update_dead(0)
68 , depth(0)
69 , length(0)
70 , copy_length(0)
71 , exe_length(0)
72 , merit(0.0)
73 , gest_time(INT_MAX)
74 , fitness(0.0)
75 , errors(0)
76 , inst_executed_counts(0)
77 , task_counts(0)
78 , task_qualities(0)
79 , internal_task_counts(0)
80 , internal_task_qualities(0)
81 , rbins_total(0)
82 , rbins_avail(0)
83 , collect_spec_counts(0)
84 , m_mating_type(MATING_TYPE_JUVENILE)
85 , m_mate_preference(MATE_PREFERENCE_RANDOM)
86 , m_mating_display_a(0)
87 , m_mating_display_b(0)
88 , fitness_ratio(0.0)
89 , efficiency_ratio(0.0)
90 , comp_merit_ratio(0.0)
91 , parent_dist(0)
92 , ancestor_dist(0)
93 , parent_muts("")
94 , knockout_stats(NULL)
95 , m_land(NULL)
96 , m_phenplast_stats(NULL)
97 {
98
99 }
100
cAnalyzeGenotype(const cAnalyzeGenotype & _gen)101 cAnalyzeGenotype::cAnalyzeGenotype(const cAnalyzeGenotype& _gen)
102 : m_world(_gen.m_world)
103 , m_genome(_gen.m_genome)
104 , name(_gen.name)
105 , m_cpu_test_info(_gen.m_cpu_test_info)
106 , m_data(_gen.m_data)
107 , aligned_sequence(_gen.aligned_sequence)
108 , tag(_gen.tag)
109 , viable(_gen.viable)
110 , id_num(_gen.id_num)
111 , parent_id(_gen.parent_id)
112 , parent2_id(_gen.parent2_id)
113 , num_cpus(_gen.num_cpus)
114 , total_cpus(_gen.total_cpus)
115 , update_born(_gen.update_born)
116 , update_dead(_gen.update_dead)
117 , depth(_gen.depth)
118 , length(_gen.length)
119 , copy_length(_gen.copy_length)
120 , exe_length(_gen.exe_length)
121 , merit(_gen.merit)
122 , gest_time(_gen.gest_time)
123 , fitness(_gen.fitness)
124 , errors(_gen.errors)
125 , inst_executed_counts(_gen.inst_executed_counts)
126 , task_counts(_gen.task_counts)
127 , task_qualities(_gen.task_qualities)
128 , internal_task_counts(_gen.internal_task_counts)
129 , internal_task_qualities(_gen.internal_task_qualities)
130 , rbins_total(_gen.rbins_total)
131 , rbins_avail(_gen.rbins_avail)
132 , collect_spec_counts(_gen.collect_spec_counts)
133 , m_mating_type(_gen.m_mating_type)
134 , m_mate_preference(_gen.m_mate_preference)
135 , m_mating_display_a(_gen.m_mating_display_a)
136 , m_mating_display_b(_gen.m_mating_display_b)
137 , fitness_ratio(_gen.fitness_ratio)
138 , efficiency_ratio(_gen.efficiency_ratio)
139 , comp_merit_ratio(_gen.comp_merit_ratio)
140 , parent_dist(_gen.parent_dist)
141 , ancestor_dist(_gen.ancestor_dist)
142 , parent_muts(_gen.parent_muts)
143 , knockout_stats(NULL)
144 , m_land(NULL)
145 , m_phenplast_stats(NULL)
146 {
147 if (_gen.knockout_stats != NULL) {
148 knockout_stats = new cAnalyzeKnockouts;
149 *knockout_stats = *(_gen.knockout_stats);
150 }
151 if (_gen.m_phenplast_stats != NULL)
152 m_phenplast_stats = new cPhenPlastSummary(*_gen.m_phenplast_stats);
153 }
154
~cAnalyzeGenotype()155 cAnalyzeGenotype::~cAnalyzeGenotype()
156 {
157 if (knockout_stats != NULL) delete knockout_stats;
158 if (m_phenplast_stats != NULL) delete m_phenplast_stats;
159 Unlink();
160 }
161
162
Initialize()163 void cAnalyzeGenotype::Initialize()
164 {
165 tDMSingleton<tDataCommandManager<cAnalyzeGenotype> >::Initialize(&cAnalyzeGenotype::buildDataCommandManager);
166 }
167
168
buildDataCommandManager()169 tDataCommandManager<cAnalyzeGenotype>* cAnalyzeGenotype::buildDataCommandManager()
170 {
171 tDataCommandManager<cAnalyzeGenotype>* dcm = new tDataCommandManager<cAnalyzeGenotype>;
172
173 // A basic macro to link a keyword to a description and Get and Set methods in cAnalyzeGenotype.
174 #define ADD_GDATA(TYPE, KEYWORD, DESC, GET, SET, COMP, NSTR, HSTR) \
175 { \
176 cString nstr_str(#NSTR), hstr_str(#HSTR); \
177 cString null_str = "0"; \
178 if (nstr_str != "0") null_str = NSTR; \
179 cString html_str = "align=center"; \
180 if (hstr_str != "0") html_str = HSTR; \
181 \
182 dcm->Add(KEYWORD, new tDataEntryOfType<cAnalyzeGenotype, TYPE> \
183 (KEYWORD, DESC, &cAnalyzeGenotype::GET, &cAnalyzeGenotype::SET, COMP, null_str, html_str)); \
184 }
185
186 // To add a new keyword connected to a stat in cAnalyzeGenotype, you need to connect all of the pieces here.
187 // The ADD_GDATA macro takes eight arguments:
188 // type : The type of the variables being linked in.
189 // keyword : The short word used to reference this variable from analyze mode.
190 // description : A slightly fuller description of what this variable is; used in data legends.
191 // "get" accessor : The accessor method to retrieve the value of this variable from cAnalyzeGenotype
192 // "set" accessor : The method to set this variable in cAnalyzeGenotype (use SetNULL if none exists).
193 // comparison method : A method that will take two genotypes and compare this value bewtween them (or CompareNULL)
194 // null keyword : A string to represent what should be printed if this stat is zero. (0 for default)
195 // html flags : A string to be included in the <td> when stat is printed in HTML table (0 for "align=center")
196
197 // As a reminder about the compare types:
198 // FLEX_COMPARE_NONE = 0 -- No comparisons should be done at all.
199 // FLEX_COMPARE_DIFF = 1 -- Only track if a stat has changed, don't worry about direction.
200 // FLEX_COMPARE_MAX = 2 -- Color higher values as beneficial, lower as harmful.
201 // FLEX_COMPARE_MIN = 3 -- Color lower values as beneficial, higher as harmful.
202 // FLEX_COMPARE_DIFF2 = 4 -- Same as FLEX_COMPARE_DIFF, but 0 indicates trait is off.
203 // FLEX_COMPARE_MAX2 = 5 -- Same as FLEX_COMPARE_MAX, and 0 indicates trait is off.
204 // FLEX_COMPARE_MIN2 = 6 -- Same as FLEX_COMPARE_MIN, BUT 0 still indicates off.
205
206 ADD_GDATA(const cString& (), "name", "Genotype Name", GetName, SetName, 0, 0, 0);
207 ADD_GDATA(bool (), "viable", "Is Viable (0/1)", GetViable, SetViable, 5, 0, 0);
208 ADD_GDATA(int (), "id", "Genotype ID", GetID, SetID, 0, 0, 0);
209 ADD_GDATA(int (), "src", "Genotype Source", GetSource, SetSource, 0, 0, 0);
210 ADD_GDATA(const cString& (), "src_args", "Genotype Source Arguments", GetSourceArgs, SetSourceArgs, 0, "(none)", 0);
211 ADD_GDATA(const cString& (), "tag", "Genotype Tag", GetTag, SetTag, 0, "(none)","");
212 ADD_GDATA(const cString& (), "parents", "Parent String", GetParents, SetParents, 0, "(none)", 0);
213 ADD_GDATA(int (), "parent_id", "Parent ID", GetParentID, SetParentID, 0, 0, 0);
214 ADD_GDATA(int (), "parent2_id", "Second Parent ID (sexual orgs)",GetParent2ID, SetParent2ID, 0, 0, 0);
215 ADD_GDATA(int (), "parent_dist", "Parent Distance", GetParentDist, SetParentDist, 0, 0, 0);
216 ADD_GDATA(int (), "ancestor_dist","Ancestor Distance", GetAncestorDist, SetAncestorDist, 0, 0, 0);
217 ADD_GDATA(int (), "lineage", "Unique Lineage Label", GetLineageLabel, SetLineageLabel, 0, 0, 0);
218 ADD_GDATA(int (), "num_cpus", "Number of CPUs", GetNumCPUs, SetNumCPUs, 0, 0, 0);
219 ADD_GDATA(int (), "total_cpus", "Total CPUs Ever", GetTotalCPUs, SetTotalCPUs, 0, 0, 0);
220 ADD_GDATA(int (), "num_units", "Number of CPUs", GetNumCPUs, SetNumCPUs, 0, 0, 0);
221 ADD_GDATA(int (), "total_units", "Total CPUs Ever", GetTotalCPUs, SetTotalCPUs, 0, 0, 0);
222 ADD_GDATA(int (), "length", "Genome Length", GetLength, SetLength, 4, 0, 0);
223 ADD_GDATA(int (), "copy_length", "Copied Length", GetCopyLength, SetCopyLength, 0, 0, 0);
224 ADD_GDATA(int (), "exe_length", "Executed Length", GetExeLength, SetExeLength, 0, 0, 0);
225 ADD_GDATA(double (), "merit", "Merit", GetMerit, SetMerit, 5, 0, 0);
226 ADD_GDATA(double (), "comp_merit", "Computational Merit", GetCompMerit, SetNULL, 5, 0, 0);
227 ADD_GDATA(double (), "comp_merit_ratio", "Computational Merit Ratio", GetCompMeritRatio, SetNULL, 5, 0, 0);
228 ADD_GDATA(int (), "gest_time", "Gestation Time", GetGestTime, SetGestTime, 6, "Inf", 0);
229 ADD_GDATA(double (), "efficiency", "Rep. Efficiency", GetEfficiency, SetNULL, 5, 0, 0);
230 ADD_GDATA(double (), "efficiency_ratio", "Rep. Efficiency Ratio", GetEfficiencyRatio,SetNULL, 5, 0, 0);
231 ADD_GDATA(double (), "fitness", "Fitness", GetFitness, SetFitness, 5, 0, 0);
232 ADD_GDATA(double (), "div_type", "Divide Type", GetDivType, SetDivType, 0, 0, 0);
233 ADD_GDATA(int (), "mate_id", "Mate Selection ID Number", GetMateID, SetMateID, 0, 0, 0);
234 ADD_GDATA(double (), "fitness_ratio","Fitness Ratio", GetFitnessRatio, SetNULL, 5, 0, 0);
235 ADD_GDATA(int (), "update_born", "Update Born", GetUpdateBorn, SetUpdateBorn, 0, 0, 0);
236 ADD_GDATA(int (), "gen_born", "Update Born", GetUpdateBorn, SetUpdateBorn, 0, 0, 0);
237 ADD_GDATA(int (), "update_dead", "Update Dead", GetUpdateDead, SetUpdateDead, 0, 0, 0);
238 ADD_GDATA(int (), "update_deactivated", "Update Dead", GetUpdateDead, SetUpdateDead, 0, 0, 0);
239 ADD_GDATA(int (), "depth", "Tree Depth", GetDepth, SetDepth, 0, 0, 0);
240 ADD_GDATA(const cString& (), "cells", "Cells", GetCells, SetCells, 0, 0, 0);
241 ADD_GDATA(const cString& (), "gest_offset", "Gest Offsets", GetGestOffsets, SetGestOffsets, 0, 0, 0);
242 ADD_GDATA(double (), "frac_dead", "Fraction Mutations Lethal", GetFracDead, SetNULL, 0, 0, 0);
243 ADD_GDATA(double (), "frac_neg", "Fraction Mutations Detrimental",GetFracNeg, SetNULL, 0, 0, 0);
244 ADD_GDATA(double (), "frac_neut", "Fraction Mutations Neutral", GetFracNeut, SetNULL, 0, 0, 0);
245 ADD_GDATA(double (), "frac_pos", "Fraction Mutations Beneficial", GetFracPos, SetNULL, 0, 0, 0);
246 ADD_GDATA(double (), "complexity", "Basic Complexity (beneficial muts are neutral)", GetComplexity, SetNULL, 0, 0, 0);
247 ADD_GDATA(double (), "land_fitness", "Average Lanscape Fitness", GetLandscapeFitness, SetNULL, 0, 0, 0);
248
249 ADD_GDATA(int(), "mating_type", "Mating type (-1 = juvenile; 0 = female; 1 = male)", GetMatingType, SetMatingType, 0, 0, 0);
250 ADD_GDATA(int(), "mate_preference", "Mate preference", GetMatePreference, SetMatePreference, 0, 0, 0);
251 ADD_GDATA(int(), "mating_display_a", "Mating display A", GetMatingDisplayA, SetMatingDisplayA, 0, 0, 0);
252 ADD_GDATA(int(), "mating_display_b", "Mating display B", GetMatingDisplayB, SetMatingDisplayB, 0, 0, 0);
253
254 ADD_GDATA(int (), "num_phen", "Number of Plastic Phenotypes", GetNumPhenotypes, SetNULL, 0, 0, 0);
255 ADD_GDATA(int (), "num_trials", "Number of Recalculation Trials", GetNumTrials, SetNULL, 0, 0, 0);
256 ADD_GDATA(double (), "phen_entropy", "Phenotpyic Entropy", GetPhenotypicEntropy, SetNULL, 0, 0, 0);
257 ADD_GDATA(double (), "phen_max_fitness", "Phen Plast Maximum Fitness", GetMaximumFitness, SetNULL, 0, 0, 0);
258 ADD_GDATA(double (), "phen_max_fit_freq", "Phen Plast Maximum Fitness Frequency", GetMaximumFitnessFrequency,SetNULL, 0, 0, 0);
259 ADD_GDATA(double (), "phen_min_fitness", "Phen Plast Minimum Fitness", GetMinimumFitness, SetNULL, 0, 0, 0);
260 ADD_GDATA(double (), "phen_min_freq", "Phen Plast Minimum Fitness Frequency", GetMinimumFitnessFrequency,SetNULL, 0, 0, 0);
261 ADD_GDATA(double (), "phen_avg_fitness", "Phen Plast Wtd Avg Fitness", GetAverageFitness, SetNULL, 0, 0, 0);
262 ADD_GDATA(double (), "phen_likely_freq", "Freq of Most Likely Phenotype", GetLikelyFrequency, SetNULL, 0, 0, 0);
263 ADD_GDATA(double (), "phen_likely_fitness","Fitness of Most Likely Phenotype", GetLikelyFitness, SetNULL, 0, 0, 0);
264 ADD_GDATA(double (), "prob_viable", "Probability Viable", GetViableProbability, SetNULL, 0, 0, 0);
265
266
267 // @JEB There is a difference between these two. parent_muts is based on an alignment. mut_steps is based on recorded mutations during run.
268 ADD_GDATA(const cString& (), "parent_muts", "Mutations from Parent", GetParentMuts, SetParentMuts, 0, "(none)", "");
269 ADD_GDATA(const cString (), "mut_steps", "Mutation Steps from Parent", GetMutSteps, SetMutSteps, 0, "", "");
270
271 ADD_GDATA(const cString& (), "task_order", "Task Performance Order", GetTaskOrder, SetTaskOrder, 0, "(none)", "");
272 ADD_GDATA(int (), "hw_type", "Hardware Type", GetHWType, SetHWType, 0, "(N/A)", "");
273 ADD_GDATA(const cString& (), "inst_set", "Instruction Set", GetInstSet, SetInstSet, 0, "(N/A)", "");
274 ADD_GDATA(cString (), "sequence", "Genome Sequence", GetSequence, SetSequence, 0, "(N/A)", "");
275 ADD_GDATA(const cString& (), "alignment", "Aligned Sequence", GetAlignedSequence, SetAlignedSequence, 0, "(N/A)", "");
276
277 ADD_GDATA(cString (), "executed_flags", "Executed Flags", GetExecutedFlags, SetNULL, 0, "(N/A)", "");
278 ADD_GDATA(cString (), "alignment_executed_flags", "Alignment Executed Flags", GetAlignmentExecutedFlags, SetNULL, 0, "(N/A)", "");
279 ADD_GDATA(cString (), "task_list", "List of all tasks performed", GetTaskList, SetNULL, 0, "(N/A)", "");
280
281 // @TODO - the following were link.tasksites and html.sequence, respectively. The period character is now separated as
282 // an argument passed into the function, thus they are matched as the component before the period. For now
283 // I have simply removed the argument part, since there are not any existing name clashes. However, in future
284 // versions we should rename these.
285 ADD_GDATA(cString (), "link", "Phenotype Map", GetMapLink, SetNULL, 0, 0, 0);
286 ADD_GDATA(cString (), "html", "Genome Sequence", GetHTMLSequence, SetNULL, 0, "(N/A)", "");
287
288 // coarse-grained task stats
289 ADD_GDATA(int (), "total_task_count","# Different Tasks", GetTotalTaskCount, SetNULL, 1, 0, 0);
290 ADD_GDATA(int (), "total_task_performance_count", "Total Tasks Performed", GetTotalTaskPerformanceCount, SetNULL, 1, 0, 0);
291
292
293 dcm->Add("task", new tDataEntryOfType<cAnalyzeGenotype, int (int, const cStringList&)>
294 ("task", &cAnalyzeGenotype::DescTask, &cAnalyzeGenotype::GetTaskCount, 5));
295 dcm->Add("task_quality", new tDataEntryOfType<cAnalyzeGenotype, double (int)>
296 ("task_quality", &cAnalyzeGenotype::DescTask, &cAnalyzeGenotype::GetTaskQuality, 5));
297 dcm->Add("env_input", new tDataEntryOfType<cAnalyzeGenotype, int (int)>
298 ("env_input", &cAnalyzeGenotype::DescEnvInput, &cAnalyzeGenotype::GetEnvInput));
299 dcm->Add("inst", new tDataEntryOfType<cAnalyzeGenotype, int (int)>
300 ("inst", &cAnalyzeGenotype::DescInstExe, &cAnalyzeGenotype::GetInstExecutedCount));
301 dcm->Add("r_tot", new tDataEntryOfType<cAnalyzeGenotype, double (int)>
302 ("r_tot", &cAnalyzeGenotype::DescRTot, &cAnalyzeGenotype::GetRBinTotal));
303 dcm->Add("r_avail", new tDataEntryOfType<cAnalyzeGenotype, double (int)>
304 ("r_avail", &cAnalyzeGenotype::DescRAvail, &cAnalyzeGenotype::GetRBinAvail));
305 dcm->Add("prob_task", new tDataEntryOfType<cAnalyzeGenotype, double (int)>
306 ("prob_task", &cAnalyzeGenotype::DescTaskProb, &cAnalyzeGenotype::GetTaskProbability, 5));
307 dcm->Add("r_spec", new tDataEntryOfType<cAnalyzeGenotype, int (int)>
308 ("r_spec", &cAnalyzeGenotype::DescRSpec, &cAnalyzeGenotype::GetRSpec));
309
310
311 // The remaining values should actually go in a separate list called
312 // "population_data_list", but for the moment we're going to put them
313 // here so that we only need to worry about a single system to load and
314 // save genotype information.
315 ADD_GDATA(int (), "update", "Update Output", GetUpdateDead, SetUpdateDead, 0, 0, 0);
316 ADD_GDATA(int (), "dom_num_cpus", "Number of Dominant Organisms", GetNumCPUs, SetNumCPUs, 0, 0, 0);
317 ADD_GDATA(int (), "dom_depth", "Tree Depth of Dominant Genotype", GetDepth, SetDepth, 0, 0, 0);
318 ADD_GDATA(int (), "dom_id", "Dominant Genotype ID", GetID, SetID, 0, 0, 0);
319 ADD_GDATA(cString (), "dom_sequence", "Dominant Genotype Sequence", GetSequence, SetSequence, 0, "(N/A)", "");
320
321
322 return dcm;
323 #undef ADD_GDATA
324 }
325
GetDataCommandManager()326 tDataCommandManager<cAnalyzeGenotype>& cAnalyzeGenotype::GetDataCommandManager()
327 {
328 return tDMSingleton<tDataCommandManager<cAnalyzeGenotype> >::GetInstance();
329 }
330
331
DescTask(int task_id) const332 cString cAnalyzeGenotype::DescTask(int task_id) const
333 {
334 if (task_id > m_world->GetEnvironment().GetNumTasks()) return "";
335 return m_world->GetEnvironment().GetTask(task_id).GetDesc();
336 }
337
DescTaskProb(int task_id) const338 cString cAnalyzeGenotype::DescTaskProb(int task_id) const
339 {
340 if (task_id > m_world->GetEnvironment().GetNumTasks()) return "";
341 return DescTask(task_id) + " (Probability)";
342 }
343
344
~sGenotypeDatastore()345 cAnalyzeGenotype::sGenotypeDatastore::~sGenotypeDatastore()
346 {
347 for (tArrayMap<int, cGenotypeData*>::iterator it = dmap.begin(); it != dmap.end(); it++) delete it->Value();
348 }
349
SetGenotypeData(int data_id,cGenotypeData * data)350 void cAnalyzeGenotype::SetGenotypeData(int data_id, cGenotypeData* data)
351 {
352 m_data->rwlock.WriteLock();
353 m_data->dmap.Set(data_id, data);
354 m_data->rwlock.WriteUnlock();
355 }
356
357
CalcMaxGestation() const358 int cAnalyzeGenotype::CalcMaxGestation() const
359 {
360 return m_world->GetConfig().TEST_CPU_TIME_MOD.Get() * m_genome.GetSize();
361 }
362
CalcKnockouts(bool check_pairs,bool check_chart) const363 void cAnalyzeGenotype::CalcKnockouts(bool check_pairs, bool check_chart) const
364 {
365 if (knockout_stats == NULL) {
366 // We've never called this before -- setup the stats.
367 knockout_stats = new cAnalyzeKnockouts;
368 }
369 else if (check_pairs == true && knockout_stats->has_pair_info == false) {
370 // We don't have the pair stats we need -- keep going.
371 knockout_stats->Reset();
372 }
373 else if (check_chart == true && knockout_stats->has_chart_info == false) {
374 // We don't have the phyenotype chart we need -- keep going.
375 knockout_stats->Reset();
376 }
377 else {
378 // We already have all the info we need -- just quit.
379 return;
380 }
381
382 cAvidaContext& ctx = m_world->GetDefaultContext();
383
384 cTestCPU* testcpu = m_world->GetHardwareManager().CreateTestCPU(ctx);
385
386 // Calculate the base fitness for the genotype we're working with...
387 // (This may not have been run already, and cost negligiably more time
388 // considering the number of knockouts we need to do.
389 cAnalyzeGenotype base_genotype(m_world, m_genome);
390 base_genotype.Recalculate(ctx);
391 double base_fitness = base_genotype.GetFitness();
392 const tArray<int> base_task_counts( base_genotype.GetTaskCounts() );
393
394 // If the base fitness is 0, the organism is dead and has no complexity.
395 if (base_fitness == 0.0) {
396 knockout_stats->neut_count = length;
397 delete testcpu;
398 return;
399 }
400
401 Genome mod_genome(m_genome);
402
403 // Setup a NULL instruction needed for testing
404 const cInstruction null_inst = m_world->GetHardwareManager().GetInstSet(mod_genome.GetInstSet()).ActivateNullInst();
405
406 // If we are keeping track of the specific effects on tasks from the
407 // knockouts, setup the matrix.
408 if (check_chart == true) {
409 knockout_stats->task_counts.Resize(length);
410 knockout_stats->has_chart_info = true;
411 }
412
413 // Loop through all the lines of code, testing the removal of each.
414 // -2=lethal, -1=detrimental, 0=neutral, 1=beneficial
415 tArray<int> ko_effect(length);
416 for (int line_num = 0; line_num < length; line_num++) {
417 // Save a copy of the current instruction and replace it with "NULL"
418 int cur_inst = mod_genome.GetSequence()[line_num].GetOp();
419 mod_genome.GetSequence()[line_num] = null_inst;
420 cAnalyzeGenotype ko_genotype(m_world, mod_genome);
421 ko_genotype.Recalculate(ctx);
422 if (check_chart == true) {
423 const tArray<int> ko_task_counts( ko_genotype.GetTaskCounts() );
424 knockout_stats->task_counts[line_num] = ko_task_counts;
425 }
426
427 double ko_fitness = ko_genotype.GetFitness();
428 if (ko_fitness == 0.0) {
429 knockout_stats->dead_count++;
430 ko_effect[line_num] = -2;
431 } else if (ko_fitness < base_fitness) {
432 knockout_stats->neg_count++;
433 ko_effect[line_num] = -1;
434 } else if (ko_fitness == base_fitness) {
435 knockout_stats->neut_count++;
436 ko_effect[line_num] = 0;
437 } else if (ko_fitness > base_fitness) {
438 knockout_stats->pos_count++;
439 ko_effect[line_num] = 1;
440 } else {
441 m_world->GetDriver().RaiseException("internal: illegal state in CalcKnockouts()");
442 }
443
444 // Reset the mod_genome back to the original sequence.
445 mod_genome.GetSequence()[line_num].SetOp(cur_inst);
446 }
447
448 // Only continue from here if we are looking at all pairs of knockouts
449 // as well.
450 if (check_pairs == false) {
451 delete testcpu;
452 return;
453 }
454
455 tArray<int> ko_pair_effect(ko_effect);
456 for (int line1 = 0; line1 < length; line1++) {
457 // If this line has already been changed, keep going...
458 if (ko_effect[line1] != ko_pair_effect[line1]) continue;
459
460 // Loop through all possibilities for the next line.
461 for (int line2 = line1+1; line2 < length; line2++) {
462 // If this line has already been changed, keep going...
463 if (ko_effect[line2] != ko_pair_effect[line2]) continue;
464
465 // If the two lines are of different types (one is information and the
466 // other is not) then we're not interested in testing this combination
467 // since any possible result is reasonable.
468 if ((ko_effect[line1] < 0 && ko_effect[line2] >= 0) ||
469 (ko_effect[line1] >= 0 && ko_effect[line2] < 0)) {
470 continue;
471 }
472
473 // Calculate the fitness for this pair of knockouts to determine if its
474 // something other than what we expected.
475
476 int cur_inst1 = mod_genome.GetSequence()[line1].GetOp();
477 int cur_inst2 = mod_genome.GetSequence()[line2].GetOp();
478 mod_genome.GetSequence()[line1] = null_inst;
479 mod_genome.GetSequence()[line2] = null_inst;
480 cAnalyzeGenotype ko_genotype(m_world, mod_genome);
481 ko_genotype.Recalculate(ctx);
482
483 double ko_fitness = ko_genotype.GetFitness();
484
485 // If the individual knockouts are both harmful, but in combination
486 // they are neutral or even beneficial, they should not count as
487 // information.
488 if (ko_fitness >= base_fitness &&
489 ko_effect[line1] < 0 && ko_effect[line2] < 0) {
490 ko_pair_effect[line1] = 0;
491 ko_pair_effect[line2] = 0;
492 }
493
494 // If the individual knockouts are both neutral (or beneficial?),
495 // but in combination they are harmful, they are likely redundant
496 // to each other. For now, count them both as information.
497 if (ko_fitness < base_fitness &&
498 ko_effect[line1] >= 0 && ko_effect[line2] >= 0) {
499 ko_pair_effect[line1] = -1;
500 ko_pair_effect[line2] = -1;
501 }
502
503 // Reset the mod_genome back to the original sequence.
504 mod_genome.GetSequence()[line1].SetOp(cur_inst1);
505 mod_genome.GetSequence()[line2].SetOp(cur_inst2);
506 }
507 }
508
509 for (int i = 0; i < length; i++) {
510 if (ko_pair_effect[i] == -2) knockout_stats->pair_dead_count++;
511 else if (ko_pair_effect[i] == -1) knockout_stats->pair_neg_count++;
512 else if (ko_pair_effect[i] == 0) knockout_stats->pair_neut_count++;
513 else if (ko_pair_effect[i] == 1) knockout_stats->pair_pos_count++;
514 }
515
516 knockout_stats->has_pair_info = true;
517 delete testcpu;
518 }
519
CheckLand() const520 void cAnalyzeGenotype::CheckLand() const
521 {
522 if (m_land == NULL) {
523 m_land = new cLandscape(m_world, m_genome);
524 m_land->SetCPUTestInfo(m_cpu_test_info);
525 m_land->SetDistance(1);
526 m_land->Process(m_world->GetDefaultContext());
527 }
528 }
529
CheckPhenPlast() const530 void cAnalyzeGenotype::CheckPhenPlast() const
531 {
532 // Implicit genotype recalculation if required
533 if (m_phenplast_stats == NULL) {
534 cCPUTestInfo test_info;
535
536 cPhenPlastGenotype pp(m_genome, 1000, test_info, m_world, m_world->GetDefaultContext());
537 m_phenplast_stats = new cPhenPlastSummary(pp);
538 }
539 }
540
541
542
CalcLandscape(cAvidaContext & ctx)543 void cAnalyzeGenotype::CalcLandscape(cAvidaContext& ctx)
544 {
545 if (m_land == NULL) m_land = new cLandscape(m_world, m_genome);
546 m_land->SetCPUTestInfo(m_cpu_test_info);
547 m_land->SetDistance(1);
548 m_land->Process(ctx);
549 }
550
551
Recalculate(cAvidaContext & ctx,cCPUTestInfo * test_info,cAnalyzeGenotype * parent_genotype,int num_trials)552 void cAnalyzeGenotype::Recalculate(cAvidaContext& ctx, cCPUTestInfo* test_info, cAnalyzeGenotype* parent_genotype, int num_trials)
553 {
554 // Allocate our own test info if it wasn't provided
555 tAutoRelease<cCPUTestInfo> local_test_info;
556 if (!test_info)
557 {
558 test_info = new cCPUTestInfo();
559 local_test_info.Set(test_info);
560 }
561
562 // Handling recalculation here
563 cPhenPlastGenotype recalc_data(m_genome, num_trials, *test_info, m_world, ctx);
564
565 // The most likely phenotype will be assigned to the phenotype stats
566 const cPlasticPhenotype* likely_phenotype = recalc_data.GetMostLikelyPhenotype();
567
568 viable = likely_phenotype->IsViable();
569 m_env_inputs = likely_phenotype->GetEnvInputs();
570 executed_flags = likely_phenotype->GetExecutedFlags();
571 inst_executed_counts = likely_phenotype->GetLastInstCount();
572 length = likely_phenotype->GetGenomeLength();
573 copy_length = likely_phenotype->GetCopiedSize();
574 exe_length = likely_phenotype->GetExecutedSize();
575 merit = likely_phenotype->GetMerit().GetDouble();
576 gest_time = likely_phenotype->GetGestationTime();
577 fitness = likely_phenotype->GetFitness();
578 errors = likely_phenotype->GetLastNumErrors();
579 div_type = likely_phenotype->GetDivType();
580 mate_id = likely_phenotype->MateSelectID();
581 task_counts = likely_phenotype->GetLastTaskCount();
582 task_qualities = likely_phenotype->GetLastTaskQuality();
583 internal_task_counts = likely_phenotype->GetLastInternalTaskCount();
584 internal_task_qualities = likely_phenotype->GetLastInternalTaskQuality();
585 rbins_total = likely_phenotype->GetLastRBinsTotal();
586 rbins_avail = likely_phenotype->GetLastRBinsAvail();
587 collect_spec_counts = likely_phenotype->GetLastCollectSpecCounts();
588 m_mating_type = likely_phenotype->GetMatingType(); //@CHC
589 m_mate_preference = likely_phenotype->GetMatePreference(); //@CHC
590 m_mating_display_a = likely_phenotype->GetCurMatingDisplayA();
591 m_mating_display_b = likely_phenotype->GetCurMatingDisplayB();
592
593
594 // Setup a new parent stats if we have a parent to work with.
595 if (parent_genotype != NULL) {
596 fitness_ratio = GetFitness() / parent_genotype->GetFitness();
597 efficiency_ratio = GetEfficiency() / parent_genotype->GetEfficiency();
598 comp_merit_ratio = GetCompMerit() / parent_genotype->GetCompMerit();
599 parent_dist = cStringUtil::EditDistance(m_genome.GetSequence().AsString(), parent_genotype->GetGenome().GetSequence().AsString(), parent_muts);
600
601 ancestor_dist = parent_genotype->GetAncestorDist() + parent_dist;
602 }
603
604 // Summarize plasticity information if multiple recalculations performed
605 if (num_trials > 1){
606 if (m_phenplast_stats != NULL)
607 delete m_phenplast_stats;
608 m_phenplast_stats = new cPhenPlastSummary(recalc_data);
609 }
610 }
611
612
PrintTasks(ofstream & fp,int min_task,int max_task)613 void cAnalyzeGenotype::PrintTasks(ofstream& fp, int min_task, int max_task)
614 {
615 if (max_task == -1) max_task = task_counts.GetSize();
616
617 for (int i = min_task; i < max_task; i++) {
618 fp << task_counts[i] << " ";
619 }
620 }
621
PrintTasksQuality(ofstream & fp,int min_task,int max_task)622 void cAnalyzeGenotype::PrintTasksQuality(ofstream& fp, int min_task, int max_task)
623 {
624 if (max_task == -1) max_task = task_counts.GetSize();
625
626 for (int i = min_task; i < max_task; i++) {
627 fp << task_qualities[i] << " ";
628 }
629 }
630
PrintInternalTasks(ofstream & fp,int min_task,int max_task)631 void cAnalyzeGenotype::PrintInternalTasks(ofstream& fp, int min_task, int max_task)
632 {
633 if (max_task == -1) max_task = internal_task_counts.GetSize();
634
635 for (int i = min_task; i < max_task; i++) {
636 fp << internal_task_counts[i] << " ";
637 }
638 }
639
PrintInternalTasksQuality(ofstream & fp,int min_task,int max_task)640 void cAnalyzeGenotype::PrintInternalTasksQuality(ofstream& fp, int min_task, int max_task)
641 {
642 if (max_task == -1) max_task = internal_task_counts.GetSize();
643
644 for (int i = min_task; i < max_task; i++) {
645 fp << internal_task_qualities[i] << " ";
646 }
647 }
648
SetParents(const cString & parent_str)649 void cAnalyzeGenotype::SetParents(const cString& parent_str)
650 {
651 cString lps(parent_str);
652 if (lps.GetSize()) parent_id = lps.Pop(',').AsInt();
653 if (lps.GetSize()) parent2_id = lps.Pop(',').AsInt();
654 }
655
SetParentID(int _parent_id)656 void cAnalyzeGenotype::SetParentID(int _parent_id)
657 {
658 parent_id = _parent_id;
659 if (parent_id >= 0) {
660 if (parent2_id >= 0) {
661 m_parent_str = cStringUtil::Stringf("%d,%d", parent_id, parent2_id);
662 } else {
663 m_parent_str = cStringUtil::Stringf("%d", parent_id);
664 }
665 } else {
666 m_parent_str = "";
667 }
668 }
669
SetParent2ID(int _parent2_id)670 void cAnalyzeGenotype::SetParent2ID(int _parent2_id)
671 {
672 parent2_id = _parent2_id;
673 if (parent_id >= 0) {
674 if (parent2_id >= 0) {
675 m_parent_str = cStringUtil::Stringf("%d,%d", parent_id, parent2_id);
676 } else {
677 m_parent_str = cStringUtil::Stringf("%d", parent_id);
678 }
679 } else {
680 m_parent_str = "";
681 }
682 }
683
SetHWType(int hw_type)684 void cAnalyzeGenotype::SetHWType(int hw_type)
685 {
686 m_genome.SetHardwareType(hw_type);
687 }
688
SetInstSet(const cString & inst_set)689 void cAnalyzeGenotype::SetInstSet(const cString& inst_set)
690 {
691 m_genome.SetInstSet(inst_set);
692 }
693
SetSequence(cString _sequence)694 void cAnalyzeGenotype::SetSequence(cString _sequence)
695 {
696 Sequence new_genome(_sequence);
697 m_genome.SetSequence(new_genome);
698 }
699
700
GetAlignmentExecutedFlags() const701 cString cAnalyzeGenotype::GetAlignmentExecutedFlags() const
702 {
703 // Make this on the fly from executed flags
704 // and the genome sequence, inserting gaps...
705 cString aligned_executed_flags = GetExecutedFlags();
706 cString aligned_seq = GetAlignedSequence();
707
708 for (int i=0; i<aligned_seq.GetSize(); i++)
709 {
710 if (aligned_seq[i] == '_') aligned_executed_flags.Insert("_", i);
711 }
712
713 return aligned_executed_flags;
714 }
715
GetInstExecutedCount(int _inst_num) const716 int cAnalyzeGenotype::GetInstExecutedCount(int _inst_num) const
717 {
718 if(_inst_num < inst_executed_counts.GetSize() && _inst_num > 0)
719 { return inst_executed_counts[_inst_num]; }
720
721 // If the instruction is not valid, clearly it has never been executed!
722 return 0;
723 }
724
DescInstExe(int _inst_id) const725 cString cAnalyzeGenotype::DescInstExe(int _inst_id) const
726 {
727 if(_inst_id > inst_executed_counts.GetSize() || _inst_id < 0) return "";
728
729 cString desc("# Times ");
730 desc += m_world->GetHardwareManager().GetInstSet(m_genome.GetInstSet()).GetName(_inst_id);
731 desc += " Executed";
732 return desc;
733 }
734
GetKO_DeadCount() const735 int cAnalyzeGenotype::GetKO_DeadCount() const
736 {
737 CalcKnockouts(false); // Make sure knockouts are calculated
738 return knockout_stats->dead_count;
739 }
740
GetKO_NegCount() const741 int cAnalyzeGenotype::GetKO_NegCount() const
742 {
743 CalcKnockouts(false); // Make sure knockouts are calculated
744 return knockout_stats->neg_count;
745 }
746
GetKO_NeutCount() const747 int cAnalyzeGenotype::GetKO_NeutCount() const
748 {
749 CalcKnockouts(false); // Make sure knockouts are calculated
750 return knockout_stats->neut_count;
751 }
752
GetKO_PosCount() const753 int cAnalyzeGenotype::GetKO_PosCount() const
754 {
755 CalcKnockouts(false); // Make sure knockouts are calculated
756 return knockout_stats->pos_count;
757 }
758
GetKO_Complexity() const759 int cAnalyzeGenotype::GetKO_Complexity() const
760 {
761 CalcKnockouts(false); // Make sure knockouts are calculated
762 return knockout_stats->dead_count + knockout_stats->neg_count;
763 }
764
GetKOPair_DeadCount() const765 int cAnalyzeGenotype::GetKOPair_DeadCount() const
766 {
767 CalcKnockouts(true); // Make sure knockouts are calculated
768 return knockout_stats->pair_dead_count;
769 }
770
GetKOPair_NegCount() const771 int cAnalyzeGenotype::GetKOPair_NegCount() const
772 {
773 CalcKnockouts(true); // Make sure knockouts are calculated
774 return knockout_stats->pair_neg_count;
775 }
776
GetKOPair_NeutCount() const777 int cAnalyzeGenotype::GetKOPair_NeutCount() const
778 {
779 CalcKnockouts(true); // Make sure knockouts are calculated
780 return knockout_stats->pair_neut_count;
781 }
782
GetKOPair_PosCount() const783 int cAnalyzeGenotype::GetKOPair_PosCount() const
784 {
785 CalcKnockouts(true); // Make sure knockouts are calculated
786 return knockout_stats->pair_pos_count;
787 }
788
GetKOPair_Complexity() const789 int cAnalyzeGenotype::GetKOPair_Complexity() const
790 {
791 CalcKnockouts(true); // Make sure knockouts are calculated
792 return knockout_stats->pair_dead_count + knockout_stats->pair_neg_count;
793 }
794
GetKO_TaskCounts() const795 const tArray< tArray<int> > & cAnalyzeGenotype::GetKO_TaskCounts() const
796 {
797 CalcKnockouts(false, true); // Make sure knockouts are calculated
798 return knockout_stats->task_counts;
799 }
800
GetTaskList() const801 cString cAnalyzeGenotype::GetTaskList() const
802 {
803 const int num_tasks = task_counts.GetSize();
804 cString out_string(num_tasks);
805
806 for (int i = 0; i < num_tasks; i++) {
807 const int cur_count = task_counts[i];
808 if (cur_count < 10) {
809 out_string[i] = '0' + cur_count;
810 }
811 else if (cur_count < 30) {
812 out_string[i] = 'X';
813 }
814 else if (cur_count < 80) {
815 out_string[i] = 'L';
816 }
817 else if (cur_count < 300) {
818 out_string[i] = 'C';
819 }
820 else if (cur_count < 800) {
821 out_string[i] = 'D';
822 }
823 else if (cur_count < 3000) {
824 out_string[i] = 'M';
825 }
826 else {
827 out_string[i] = '+';
828 }
829 }
830
831 return out_string;
832 }
833
834
GetHTMLSequence() const835 cString cAnalyzeGenotype::GetHTMLSequence() const
836 {
837 cString text_genome = m_genome.GetSequence().AsString();
838 cString html_code("<tt>");
839
840 cString diff_info = parent_muts;
841 char mut_type = 'N';
842 int mut_pos = -1;
843
844 cString cur_mut = diff_info.Pop(',');
845 if (cur_mut != "") {
846 mut_type = cur_mut[0];
847 cur_mut.ClipFront(1); cur_mut.ClipEnd(1);
848 mut_pos = cur_mut.AsInt();
849 }
850
851 int ins_count = 0;
852 for (int i = 0; i < m_genome.GetSize(); i++) {
853 char symbol = text_genome[i];
854 if (i != mut_pos) html_code += symbol;
855 else {
856 // Figure out the information for the type of mutation we had...
857 cString color;
858 if (mut_type == 'M') {
859 color = "#FF0000";
860 } else if (mut_type == 'I') {
861 color = "#00FF00";
862 ins_count++;
863 } else { // if (mut_type == 'D') {
864 color = "#0000FF";
865 symbol = '*';
866 i--; // Rewind - we didn't read the handle character yet!
867 }
868
869 // Move on to the next mutation...
870 cur_mut = diff_info.Pop(',');
871 if (cur_mut != "") {
872 mut_type = cur_mut[0];
873 cur_mut.ClipFront(1); cur_mut.ClipEnd(1);
874 mut_pos = cur_mut.AsInt();
875 if (mut_type == 'D') mut_pos += ins_count;
876 } else mut_pos = -1;
877
878 // Tack on the current symbol...
879 cString symbol_string;
880 symbol_string.Set("<b><font color=\"%s\">%c</font></b>", static_cast<const char*>(color), symbol);
881 html_code += symbol_string;
882 }
883 }
884
885 html_code += "</tt>";
886
887 return html_code;
888 }
889