1 /*
2  *  cModularityAnalysis.cc
3  *  Avida
4  *
5  *  Created by David on 1/11/09.
6  *  Copyright 2009-2011 Michigan State University. All rights reserved.
7  *
8  *
9  *  This file is part of Avida.
10  *
11  *  Avida is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License
12  *  as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
13  *
14  *  Avida is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
15  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for more details.
16  *
17  *  You should have received a copy of the GNU Lesser General Public License along with Avida.
18  *  If not, see <http://www.gnu.org/licenses/>.
19  *
20  */
21 
22 #include "cModularityAnalysis.h"
23 
24 #include "cAnalyzeGenotype.h"
25 #include "cOrganism.h"
26 #include "cPhenotype.h"
27 #include "tDataCommandManager.h"
28 #include "tDataEntry.h"
29 
30 
Initialize()31 void cModularityAnalysis::Initialize()
32 {
33   tDataCommandManager<cAnalyzeGenotype>& dcm = cAnalyzeGenotype::GetDataCommandManager();
34   // A basic macro to link a keyword to a description and Get and Set methods in cAnalyzeGenotype.
35 #define ADD_GDATA(KEYWORD, DESC, GET, COMP) \
36   dcm.Add(KEYWORD, new tDataEntryProxy<cAnalyzeGenotype, cFlexVar ()> \
37     (KEYWORD, DESC, &cModularityAnalysis::GET, COMP));
38 #define ADD_GDATA_IDX(KEYWORD, DESC, GET, COMP) \
39   dcm.Add(KEYWORD, new tDataEntryProxy<cAnalyzeGenotype, cFlexVar (int)> \
40     (KEYWORD, &cModularityAnalysis::DESC, &cModularityAnalysis::GET, COMP));
41 
42   ADD_GDATA("tasks_done", "Number of Tasks Performed", GetTasksDoneFor, 2);
43   ADD_GDATA("insts_tasks", "Number of Instructions Involved in Tasks", GetInstsInvolvedInTasksFor, 1);
44   ADD_GDATA("tasks_prop", "Proportion of Sites in Tasks", GetTaskProportionFor, 4);
45   ADD_GDATA("ave_tasks_per_site", "Average Number of Tasks Per Site", GetAveTasksPerSiteFor, 6);
46   ADD_GDATA("ave_sites_per_task", "Average Number of Sites Per Task", GetAveSitesPerTaskFor, 6);
47   ADD_GDATA("ave_prop_nonoverlap", "Average Proportion of the Non-overlapping Region of a Task", GetTaskProportionFor, 4);
48   ADD_GDATA_IDX("sites_per_task", DescSitesPerTask, GetSitesPerTaskFor, 1);
49   ADD_GDATA_IDX("sites_inv_x_tasks", DescSitesInvolvedInXTasks, GetSitesInvolvedInXTasksFor, 1);
50   ADD_GDATA_IDX("task_length", DescTaskLength, GetTaskLengthFor, 4);
51   ADD_GDATA_IDX("ave_task_position", DescAveTaskPosition, GetAveTaskPositionFor, 4);
52 }
53 
CalcFunctionalModularity(cAvidaContext & ctx)54 void cModularityAnalysis::CalcFunctionalModularity(cAvidaContext& ctx)
55 {
56   cTestCPU* testcpu = m_genotype->GetWorld()->GetHardwareManager().CreateTestCPU(ctx);
57   cCPUTestInfo test_info = m_test_info;
58 
59   const Genome& base_genome = m_genotype->GetGenome();
60   const Sequence& base_seq = base_genome.GetSequence();
61 
62   // Calculate the stats for the genotype we're working with...
63   testcpu->TestGenome(ctx, test_info, base_genome);
64   double base_fitness = test_info.GetColonyFitness();
65 
66   // Check if the organism does any tasks
67   bool does_tasks = false;
68   const tArray<int> base_tasks = test_info.GetColonyOrganism()->GetPhenotype().GetLastTaskCount();
69   const int num_tasks = base_tasks.GetSize();
70   for (int i = 0; i < num_tasks; i++) {
71     if (base_tasks[i] > 0) {
72       does_tasks = true;
73       break;
74     }
75   }
76 
77   // Don't calculate the modularity if the organism doesn't reproduce. i.e. if the fitness is 0
78   if (base_fitness > 0.0 && does_tasks) {
79     // Set up the instruction set for mapping
80     cInstSet& map_inst_set = m_genotype->GetWorld()->GetHardwareManager().GetInstSet(base_genome.GetInstSet());
81     const cInstruction null_inst = map_inst_set.ActivateNullInst();
82 
83     // Genome for testing
84     const int max_line = base_genome.GetSize();
85     Genome mod_genome(base_genome);
86     Sequence& seq = mod_genome.GetSequence();
87 
88     // Create and initialize the modularity matrix
89     tMatrix<int> mod_matrix(num_tasks, max_line);
90     mod_matrix.SetAll(0);
91 
92     tArray<int> site_num_tasks(max_line, 0);  // number of tasks instruction is used in
93     tArray<int> sites_per_task(num_tasks, 0); // number of sites involved in each task
94 
95     // Loop through all the lines of code, testing the removal of each.
96     for (int line_num = 0; line_num < max_line; line_num++) {
97       int cur_inst = base_seq[line_num].GetOp();
98 
99       seq[line_num] = null_inst;
100 
101       // Run the modified genome through the Test CPU
102       testcpu->TestGenome(ctx, test_info, mod_genome);
103 
104       if (test_info.GetColonyFitness() > 0.0) {
105         const tArray<int>& test_tasks = test_info.GetColonyOrganism()->GetPhenotype().GetLastTaskCount();
106 
107         for (int cur_task = 0; cur_task < num_tasks; cur_task++) {
108           // This is done so that under 'binary' option it marks
109           // the task as being influenced by the mutation iff
110           // it is completely knocked out, not just decreased
111           if (base_tasks[cur_task] && !test_tasks[cur_task]) {
112             // If knocking out an instruction stops the expression of a particular task, mark that in the modularity matrix
113             // and add it to two counts
114             mod_matrix(cur_task, line_num) = 1;
115             sites_per_task[cur_task]++;
116             site_num_tasks[line_num]++;
117           }
118         }
119       }
120 
121       // Reset the mod_genome back to the original sequence.
122       seq[line_num].SetOp(cur_inst);
123     }
124 
125     tArray<int> sites_inv_x_tasks(num_tasks + 1, 0);  // # of inst's involved in 0,1,2,3... tasks
126     tArray<double> ave_task_position(num_tasks, 0.0); // mean positions of the tasks in the genome
127     tArray<int> task_length(num_tasks, 0);  // distance between first and last inst involved in a task
128 
129     int total_task = 0;           // total number of tasks done
130     int total_inst = 0;           // total number of instructions involved in tasks
131     int total_all = 0;            // sum of mod_matrix
132     double sum_task_overlap = 0;  // sum of task overlap for for this genome
133 
134 
135     // Calculate instruction and task totals
136     for (int i = 0; i < num_tasks; i++) {
137       total_all += sites_per_task[i];
138       if (sites_per_task[i]) total_task++;
139     }
140     for (int i = 0; i < max_line; i++) {
141       sites_inv_x_tasks[site_num_tasks[i]]++;
142       if (site_num_tasks[i] != 0) total_inst++;
143     }
144 
145 
146     // Calculate average task overlap
147     // first construct num_task x num_task matrix with number of sites overlapping
148     tMatrix<int> task_overlap(num_tasks, num_tasks);
149     task_overlap.SetAll(0);
150 
151     for (int i = 0; i < max_line; i++) {
152       for (int j = 0; j < num_tasks; j++) {
153         for (int k = j; k < num_tasks; k++) {
154           if (mod_matrix(j, i) > 0 && mod_matrix(k, i) > 0) {
155             task_overlap(j, k)++;
156             if (j != k) task_overlap(k, j)++;
157           }
158         }
159       }
160     }
161 
162     // go though the task_overlap matrix, add and average everything up.
163     if (total_task > 1) {
164       for (int i = 0; i < num_tasks; i++) {
165         if (task_overlap(i, i)) {
166           int overlap_per_task = 0;
167           for (int j = 0; j < num_tasks; j++) if (i != j) overlap_per_task += task_overlap(i,j);
168           sum_task_overlap += (double)overlap_per_task / (task_overlap(i, i) * (total_task - 1));
169         }
170       }
171     }
172 
173 
174     // Calculate the first/last position of a task, the task "spread"
175     // starting from the top look for the fist command that matters for a task
176     for (int i = 0; i < num_tasks; i++) {
177       for (int j = 0; j < max_line; j++) {
178         if (mod_matrix(i, j) > 0 && task_length[i] == 0) {
179           task_length[i] = j;
180           break;
181         }
182       }
183     }
184 
185     // starting from the bottom look for the last command that matters for a task
186     // and subtract it from the first to get the task length
187     // add one in order to account for both the beginning and the end instruction
188     for (int i = 0; i < num_tasks; i++) {
189       for (int j = max_line - 1; j >= 0; j--) {
190         if (mod_matrix(i, j) > 0) {
191           task_length[i] = j - task_length[i] + 1;
192           break;
193         }
194       }
195     }
196 
197 
198     // Calculate mean positions of the tasks
199     tArray<int> task_position(num_tasks);
200     for (int i = 0; i < num_tasks; i++) {
201       task_position[i] = 0;
202       for (int j = 0; j < max_line; j++) if (mod_matrix(i,j) > 0) task_position[i] += j;
203     }
204     for (int i = 0; i < num_tasks; i++) ave_task_position[i] = (double)task_position[i] / sites_per_task[i];
205 
206 
207     cModularityData* mod_data = new cModularityData;
208     mod_data->tasks_done = total_task;
209     mod_data->insts_tasks = total_inst;
210     mod_data->tasks_prop = (double)total_inst / max_line;
211     mod_data->ave_tasks_per_site = (total_inst) ? (double)total_all / total_inst : 0.0;
212     mod_data->ave_sites_per_task = (total_task) ? (double)total_all / total_task : 0.0;
213     mod_data->ave_prop_nonoverlap = (total_task) ? sum_task_overlap / total_task : 0.0;
214     mod_data->sites_per_task = sites_per_task;
215     mod_data->sites_inv_x_tasks = sites_inv_x_tasks;
216     mod_data->task_length = task_length;
217     mod_data->ave_task_position = ave_task_position;
218     m_genotype->SetGenotypeData(GD_MD_ID, mod_data);
219   }
220 }
221 
222 #ifdef DEBUG
223 #define GET_MD() \
224   cAnalyzeGenotype::ReadToken* tok = genotype->GetReadToken(); \
225   cModularityData* data = dynamic_cast<cModularityData*>(genotype->GetGenotypeData(tok, GD_MD_ID)); \
226   delete tok;
227 #else
228 #define GET_MD() \
229   cAnalyzeGenotype::ReadToken* tok = genotype->GetReadToken(); \
230   cModularityData* data = static_cast<cModularityData*>(genotype->GetGenotypeData(tok, GD_MD_ID)); \
231   delete tok;
232 #endif
233 
GetTasksDoneFor(const cAnalyzeGenotype * genotype)234 cFlexVar cModularityAnalysis::GetTasksDoneFor(const cAnalyzeGenotype* genotype)
235 {
236   GET_MD();
237   if (data) return cFlexVar(data->tasks_done);
238   return cFlexVar(0);
239 }
240 
GetInstsInvolvedInTasksFor(const cAnalyzeGenotype * genotype)241 cFlexVar cModularityAnalysis::GetInstsInvolvedInTasksFor(const cAnalyzeGenotype* genotype)
242 {
243   GET_MD();
244   if (data) return cFlexVar(data->insts_tasks);
245   return cFlexVar(0);
246 }
247 
GetTaskProportionFor(const cAnalyzeGenotype * genotype)248 cFlexVar cModularityAnalysis::GetTaskProportionFor(const cAnalyzeGenotype* genotype)
249 {
250   GET_MD();
251   if (data) return cFlexVar(data->tasks_prop);
252   return cFlexVar(0.0);
253 }
254 
GetAveTasksPerSiteFor(const cAnalyzeGenotype * genotype)255 cFlexVar cModularityAnalysis::GetAveTasksPerSiteFor(const cAnalyzeGenotype* genotype)
256 {
257   GET_MD();
258   if (data) return cFlexVar(data->ave_tasks_per_site);
259   return cFlexVar(0.0);
260 }
261 
GetAveSitesPerTaskFor(const cAnalyzeGenotype * genotype)262 cFlexVar cModularityAnalysis::GetAveSitesPerTaskFor(const cAnalyzeGenotype* genotype)
263 {
264   GET_MD();
265   if (data) return cFlexVar(data->ave_sites_per_task);
266   return cFlexVar(0.0);
267 }
268 
GetPropNonoverlapFor(const cAnalyzeGenotype * genotype)269 cFlexVar cModularityAnalysis::GetPropNonoverlapFor(const cAnalyzeGenotype* genotype)
270 {
271   GET_MD();
272   if (data) return cFlexVar(data->ave_prop_nonoverlap);
273   return cFlexVar(0.0);
274 }
275 
276 
GetSitesPerTaskFor(const cAnalyzeGenotype * genotype,int idx)277 cFlexVar cModularityAnalysis::GetSitesPerTaskFor(const cAnalyzeGenotype* genotype, int idx)
278 {
279   GET_MD();
280   if (data && idx >= 0 && idx < data->sites_per_task.GetSize()) return cFlexVar(data->sites_per_task[idx]);
281   return cFlexVar(0);
282 }
283 
DescSitesPerTask(const cAnalyzeGenotype * genotype,int idx)284 cString cModularityAnalysis::DescSitesPerTask(const cAnalyzeGenotype* genotype, int idx)
285 {
286   return cStringUtil::Stringf("Number of Sites Per Task %d", idx);
287 }
288 
289 
GetSitesInvolvedInXTasksFor(const cAnalyzeGenotype * genotype,int idx)290 cFlexVar cModularityAnalysis::GetSitesInvolvedInXTasksFor(const cAnalyzeGenotype* genotype, int idx)
291 {
292   GET_MD();
293   if (data && idx >= 0 && idx < data->sites_per_task.GetSize()) return cFlexVar(data->sites_inv_x_tasks[idx]);
294   return cFlexVar(0);
295 }
296 
DescSitesInvolvedInXTasks(const cAnalyzeGenotype * genotype,int idx)297 cString cModularityAnalysis::DescSitesInvolvedInXTasks(const cAnalyzeGenotype* genotype, int idx)
298 {
299   return cStringUtil::Stringf("Number of Sites Involved in %d Tasks", idx);
300 }
301 
302 
GetTaskLengthFor(const cAnalyzeGenotype * genotype,int idx)303 cFlexVar cModularityAnalysis::GetTaskLengthFor(const cAnalyzeGenotype* genotype, int idx)
304 {
305   GET_MD();
306   if (data && idx >= 0 && idx < data->sites_per_task.GetSize()) return cFlexVar(data->task_length[idx]);
307   return cFlexVar(0);
308 }
309 
DescTaskLength(const cAnalyzeGenotype * genotype,int idx)310 cString cModularityAnalysis::DescTaskLength(const cAnalyzeGenotype* genotype, int idx)
311 {
312   return cStringUtil::Stringf("Task %d Length", idx);
313 }
314 
315 
GetAveTaskPositionFor(const cAnalyzeGenotype * genotype,int idx)316 cFlexVar cModularityAnalysis::GetAveTaskPositionFor(const cAnalyzeGenotype* genotype, int idx)
317 {
318   GET_MD();
319   if (data && idx >= 0 && idx < data->sites_per_task.GetSize()) return cFlexVar(data->ave_task_position[idx]);
320   return cFlexVar(0.0);
321 }
322 
DescAveTaskPosition(const cAnalyzeGenotype * genotype,int idx)323 cString cModularityAnalysis::DescAveTaskPosition(const cAnalyzeGenotype* genotype, int idx)
324 {
325   return cStringUtil::Stringf("Task %d Position", idx);
326 }
327 
328 #undef GET_MD
329 
330 
cModularityData()331 cModularityAnalysis::cModularityData::cModularityData()
332 : tasks_done(0), insts_tasks(0), tasks_prop(0.0), ave_tasks_per_site(0.0), ave_sites_per_task(0.0), ave_prop_nonoverlap(0.0)
333 {
334 }
335