1 /*
2 * cModularityAnalysis.cc
3 * Avida
4 *
5 * Created by David on 1/11/09.
6 * Copyright 2009-2011 Michigan State University. All rights reserved.
7 *
8 *
9 * This file is part of Avida.
10 *
11 * Avida is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License
12 * as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
13 *
14 * Avida is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public License along with Avida.
18 * If not, see <http://www.gnu.org/licenses/>.
19 *
20 */
21
22 #include "cModularityAnalysis.h"
23
24 #include "cAnalyzeGenotype.h"
25 #include "cOrganism.h"
26 #include "cPhenotype.h"
27 #include "tDataCommandManager.h"
28 #include "tDataEntry.h"
29
30
Initialize()31 void cModularityAnalysis::Initialize()
32 {
33 tDataCommandManager<cAnalyzeGenotype>& dcm = cAnalyzeGenotype::GetDataCommandManager();
34 // A basic macro to link a keyword to a description and Get and Set methods in cAnalyzeGenotype.
35 #define ADD_GDATA(KEYWORD, DESC, GET, COMP) \
36 dcm.Add(KEYWORD, new tDataEntryProxy<cAnalyzeGenotype, cFlexVar ()> \
37 (KEYWORD, DESC, &cModularityAnalysis::GET, COMP));
38 #define ADD_GDATA_IDX(KEYWORD, DESC, GET, COMP) \
39 dcm.Add(KEYWORD, new tDataEntryProxy<cAnalyzeGenotype, cFlexVar (int)> \
40 (KEYWORD, &cModularityAnalysis::DESC, &cModularityAnalysis::GET, COMP));
41
42 ADD_GDATA("tasks_done", "Number of Tasks Performed", GetTasksDoneFor, 2);
43 ADD_GDATA("insts_tasks", "Number of Instructions Involved in Tasks", GetInstsInvolvedInTasksFor, 1);
44 ADD_GDATA("tasks_prop", "Proportion of Sites in Tasks", GetTaskProportionFor, 4);
45 ADD_GDATA("ave_tasks_per_site", "Average Number of Tasks Per Site", GetAveTasksPerSiteFor, 6);
46 ADD_GDATA("ave_sites_per_task", "Average Number of Sites Per Task", GetAveSitesPerTaskFor, 6);
47 ADD_GDATA("ave_prop_nonoverlap", "Average Proportion of the Non-overlapping Region of a Task", GetTaskProportionFor, 4);
48 ADD_GDATA_IDX("sites_per_task", DescSitesPerTask, GetSitesPerTaskFor, 1);
49 ADD_GDATA_IDX("sites_inv_x_tasks", DescSitesInvolvedInXTasks, GetSitesInvolvedInXTasksFor, 1);
50 ADD_GDATA_IDX("task_length", DescTaskLength, GetTaskLengthFor, 4);
51 ADD_GDATA_IDX("ave_task_position", DescAveTaskPosition, GetAveTaskPositionFor, 4);
52 }
53
CalcFunctionalModularity(cAvidaContext & ctx)54 void cModularityAnalysis::CalcFunctionalModularity(cAvidaContext& ctx)
55 {
56 cTestCPU* testcpu = m_genotype->GetWorld()->GetHardwareManager().CreateTestCPU(ctx);
57 cCPUTestInfo test_info = m_test_info;
58
59 const Genome& base_genome = m_genotype->GetGenome();
60 const Sequence& base_seq = base_genome.GetSequence();
61
62 // Calculate the stats for the genotype we're working with...
63 testcpu->TestGenome(ctx, test_info, base_genome);
64 double base_fitness = test_info.GetColonyFitness();
65
66 // Check if the organism does any tasks
67 bool does_tasks = false;
68 const tArray<int> base_tasks = test_info.GetColonyOrganism()->GetPhenotype().GetLastTaskCount();
69 const int num_tasks = base_tasks.GetSize();
70 for (int i = 0; i < num_tasks; i++) {
71 if (base_tasks[i] > 0) {
72 does_tasks = true;
73 break;
74 }
75 }
76
77 // Don't calculate the modularity if the organism doesn't reproduce. i.e. if the fitness is 0
78 if (base_fitness > 0.0 && does_tasks) {
79 // Set up the instruction set for mapping
80 cInstSet& map_inst_set = m_genotype->GetWorld()->GetHardwareManager().GetInstSet(base_genome.GetInstSet());
81 const cInstruction null_inst = map_inst_set.ActivateNullInst();
82
83 // Genome for testing
84 const int max_line = base_genome.GetSize();
85 Genome mod_genome(base_genome);
86 Sequence& seq = mod_genome.GetSequence();
87
88 // Create and initialize the modularity matrix
89 tMatrix<int> mod_matrix(num_tasks, max_line);
90 mod_matrix.SetAll(0);
91
92 tArray<int> site_num_tasks(max_line, 0); // number of tasks instruction is used in
93 tArray<int> sites_per_task(num_tasks, 0); // number of sites involved in each task
94
95 // Loop through all the lines of code, testing the removal of each.
96 for (int line_num = 0; line_num < max_line; line_num++) {
97 int cur_inst = base_seq[line_num].GetOp();
98
99 seq[line_num] = null_inst;
100
101 // Run the modified genome through the Test CPU
102 testcpu->TestGenome(ctx, test_info, mod_genome);
103
104 if (test_info.GetColonyFitness() > 0.0) {
105 const tArray<int>& test_tasks = test_info.GetColonyOrganism()->GetPhenotype().GetLastTaskCount();
106
107 for (int cur_task = 0; cur_task < num_tasks; cur_task++) {
108 // This is done so that under 'binary' option it marks
109 // the task as being influenced by the mutation iff
110 // it is completely knocked out, not just decreased
111 if (base_tasks[cur_task] && !test_tasks[cur_task]) {
112 // If knocking out an instruction stops the expression of a particular task, mark that in the modularity matrix
113 // and add it to two counts
114 mod_matrix(cur_task, line_num) = 1;
115 sites_per_task[cur_task]++;
116 site_num_tasks[line_num]++;
117 }
118 }
119 }
120
121 // Reset the mod_genome back to the original sequence.
122 seq[line_num].SetOp(cur_inst);
123 }
124
125 tArray<int> sites_inv_x_tasks(num_tasks + 1, 0); // # of inst's involved in 0,1,2,3... tasks
126 tArray<double> ave_task_position(num_tasks, 0.0); // mean positions of the tasks in the genome
127 tArray<int> task_length(num_tasks, 0); // distance between first and last inst involved in a task
128
129 int total_task = 0; // total number of tasks done
130 int total_inst = 0; // total number of instructions involved in tasks
131 int total_all = 0; // sum of mod_matrix
132 double sum_task_overlap = 0; // sum of task overlap for for this genome
133
134
135 // Calculate instruction and task totals
136 for (int i = 0; i < num_tasks; i++) {
137 total_all += sites_per_task[i];
138 if (sites_per_task[i]) total_task++;
139 }
140 for (int i = 0; i < max_line; i++) {
141 sites_inv_x_tasks[site_num_tasks[i]]++;
142 if (site_num_tasks[i] != 0) total_inst++;
143 }
144
145
146 // Calculate average task overlap
147 // first construct num_task x num_task matrix with number of sites overlapping
148 tMatrix<int> task_overlap(num_tasks, num_tasks);
149 task_overlap.SetAll(0);
150
151 for (int i = 0; i < max_line; i++) {
152 for (int j = 0; j < num_tasks; j++) {
153 for (int k = j; k < num_tasks; k++) {
154 if (mod_matrix(j, i) > 0 && mod_matrix(k, i) > 0) {
155 task_overlap(j, k)++;
156 if (j != k) task_overlap(k, j)++;
157 }
158 }
159 }
160 }
161
162 // go though the task_overlap matrix, add and average everything up.
163 if (total_task > 1) {
164 for (int i = 0; i < num_tasks; i++) {
165 if (task_overlap(i, i)) {
166 int overlap_per_task = 0;
167 for (int j = 0; j < num_tasks; j++) if (i != j) overlap_per_task += task_overlap(i,j);
168 sum_task_overlap += (double)overlap_per_task / (task_overlap(i, i) * (total_task - 1));
169 }
170 }
171 }
172
173
174 // Calculate the first/last position of a task, the task "spread"
175 // starting from the top look for the fist command that matters for a task
176 for (int i = 0; i < num_tasks; i++) {
177 for (int j = 0; j < max_line; j++) {
178 if (mod_matrix(i, j) > 0 && task_length[i] == 0) {
179 task_length[i] = j;
180 break;
181 }
182 }
183 }
184
185 // starting from the bottom look for the last command that matters for a task
186 // and subtract it from the first to get the task length
187 // add one in order to account for both the beginning and the end instruction
188 for (int i = 0; i < num_tasks; i++) {
189 for (int j = max_line - 1; j >= 0; j--) {
190 if (mod_matrix(i, j) > 0) {
191 task_length[i] = j - task_length[i] + 1;
192 break;
193 }
194 }
195 }
196
197
198 // Calculate mean positions of the tasks
199 tArray<int> task_position(num_tasks);
200 for (int i = 0; i < num_tasks; i++) {
201 task_position[i] = 0;
202 for (int j = 0; j < max_line; j++) if (mod_matrix(i,j) > 0) task_position[i] += j;
203 }
204 for (int i = 0; i < num_tasks; i++) ave_task_position[i] = (double)task_position[i] / sites_per_task[i];
205
206
207 cModularityData* mod_data = new cModularityData;
208 mod_data->tasks_done = total_task;
209 mod_data->insts_tasks = total_inst;
210 mod_data->tasks_prop = (double)total_inst / max_line;
211 mod_data->ave_tasks_per_site = (total_inst) ? (double)total_all / total_inst : 0.0;
212 mod_data->ave_sites_per_task = (total_task) ? (double)total_all / total_task : 0.0;
213 mod_data->ave_prop_nonoverlap = (total_task) ? sum_task_overlap / total_task : 0.0;
214 mod_data->sites_per_task = sites_per_task;
215 mod_data->sites_inv_x_tasks = sites_inv_x_tasks;
216 mod_data->task_length = task_length;
217 mod_data->ave_task_position = ave_task_position;
218 m_genotype->SetGenotypeData(GD_MD_ID, mod_data);
219 }
220 }
221
222 #ifdef DEBUG
223 #define GET_MD() \
224 cAnalyzeGenotype::ReadToken* tok = genotype->GetReadToken(); \
225 cModularityData* data = dynamic_cast<cModularityData*>(genotype->GetGenotypeData(tok, GD_MD_ID)); \
226 delete tok;
227 #else
228 #define GET_MD() \
229 cAnalyzeGenotype::ReadToken* tok = genotype->GetReadToken(); \
230 cModularityData* data = static_cast<cModularityData*>(genotype->GetGenotypeData(tok, GD_MD_ID)); \
231 delete tok;
232 #endif
233
GetTasksDoneFor(const cAnalyzeGenotype * genotype)234 cFlexVar cModularityAnalysis::GetTasksDoneFor(const cAnalyzeGenotype* genotype)
235 {
236 GET_MD();
237 if (data) return cFlexVar(data->tasks_done);
238 return cFlexVar(0);
239 }
240
GetInstsInvolvedInTasksFor(const cAnalyzeGenotype * genotype)241 cFlexVar cModularityAnalysis::GetInstsInvolvedInTasksFor(const cAnalyzeGenotype* genotype)
242 {
243 GET_MD();
244 if (data) return cFlexVar(data->insts_tasks);
245 return cFlexVar(0);
246 }
247
GetTaskProportionFor(const cAnalyzeGenotype * genotype)248 cFlexVar cModularityAnalysis::GetTaskProportionFor(const cAnalyzeGenotype* genotype)
249 {
250 GET_MD();
251 if (data) return cFlexVar(data->tasks_prop);
252 return cFlexVar(0.0);
253 }
254
GetAveTasksPerSiteFor(const cAnalyzeGenotype * genotype)255 cFlexVar cModularityAnalysis::GetAveTasksPerSiteFor(const cAnalyzeGenotype* genotype)
256 {
257 GET_MD();
258 if (data) return cFlexVar(data->ave_tasks_per_site);
259 return cFlexVar(0.0);
260 }
261
GetAveSitesPerTaskFor(const cAnalyzeGenotype * genotype)262 cFlexVar cModularityAnalysis::GetAveSitesPerTaskFor(const cAnalyzeGenotype* genotype)
263 {
264 GET_MD();
265 if (data) return cFlexVar(data->ave_sites_per_task);
266 return cFlexVar(0.0);
267 }
268
GetPropNonoverlapFor(const cAnalyzeGenotype * genotype)269 cFlexVar cModularityAnalysis::GetPropNonoverlapFor(const cAnalyzeGenotype* genotype)
270 {
271 GET_MD();
272 if (data) return cFlexVar(data->ave_prop_nonoverlap);
273 return cFlexVar(0.0);
274 }
275
276
GetSitesPerTaskFor(const cAnalyzeGenotype * genotype,int idx)277 cFlexVar cModularityAnalysis::GetSitesPerTaskFor(const cAnalyzeGenotype* genotype, int idx)
278 {
279 GET_MD();
280 if (data && idx >= 0 && idx < data->sites_per_task.GetSize()) return cFlexVar(data->sites_per_task[idx]);
281 return cFlexVar(0);
282 }
283
DescSitesPerTask(const cAnalyzeGenotype * genotype,int idx)284 cString cModularityAnalysis::DescSitesPerTask(const cAnalyzeGenotype* genotype, int idx)
285 {
286 return cStringUtil::Stringf("Number of Sites Per Task %d", idx);
287 }
288
289
GetSitesInvolvedInXTasksFor(const cAnalyzeGenotype * genotype,int idx)290 cFlexVar cModularityAnalysis::GetSitesInvolvedInXTasksFor(const cAnalyzeGenotype* genotype, int idx)
291 {
292 GET_MD();
293 if (data && idx >= 0 && idx < data->sites_per_task.GetSize()) return cFlexVar(data->sites_inv_x_tasks[idx]);
294 return cFlexVar(0);
295 }
296
DescSitesInvolvedInXTasks(const cAnalyzeGenotype * genotype,int idx)297 cString cModularityAnalysis::DescSitesInvolvedInXTasks(const cAnalyzeGenotype* genotype, int idx)
298 {
299 return cStringUtil::Stringf("Number of Sites Involved in %d Tasks", idx);
300 }
301
302
GetTaskLengthFor(const cAnalyzeGenotype * genotype,int idx)303 cFlexVar cModularityAnalysis::GetTaskLengthFor(const cAnalyzeGenotype* genotype, int idx)
304 {
305 GET_MD();
306 if (data && idx >= 0 && idx < data->sites_per_task.GetSize()) return cFlexVar(data->task_length[idx]);
307 return cFlexVar(0);
308 }
309
DescTaskLength(const cAnalyzeGenotype * genotype,int idx)310 cString cModularityAnalysis::DescTaskLength(const cAnalyzeGenotype* genotype, int idx)
311 {
312 return cStringUtil::Stringf("Task %d Length", idx);
313 }
314
315
GetAveTaskPositionFor(const cAnalyzeGenotype * genotype,int idx)316 cFlexVar cModularityAnalysis::GetAveTaskPositionFor(const cAnalyzeGenotype* genotype, int idx)
317 {
318 GET_MD();
319 if (data && idx >= 0 && idx < data->sites_per_task.GetSize()) return cFlexVar(data->ave_task_position[idx]);
320 return cFlexVar(0.0);
321 }
322
DescAveTaskPosition(const cAnalyzeGenotype * genotype,int idx)323 cString cModularityAnalysis::DescAveTaskPosition(const cAnalyzeGenotype* genotype, int idx)
324 {
325 return cStringUtil::Stringf("Task %d Position", idx);
326 }
327
328 #undef GET_MD
329
330
cModularityData()331 cModularityAnalysis::cModularityData::cModularityData()
332 : tasks_done(0), insts_tasks(0), tasks_prop(0.0), ave_tasks_per_site(0.0), ave_sites_per_task(0.0), ave_prop_nonoverlap(0.0)
333 {
334 }
335