1 /*
2  *  cAnalyze.cc
3  *  Avida
4  *
5  *  Called "analyze.cc" prior to 12/1/05.
6  *  Copyright 1999-2011 Michigan State University. All rights reserved.
7  *  Copyright 1993-2003 California Institute of Technology.
8  *
9  *
10  *  This file is part of Avida.
11  *
12  *  Avida is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License
13  *  as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
14  *
15  *  Avida is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
16  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for more details.
17  *
18  *  You should have received a copy of the GNU Lesser General Public License along with Avida.
19  *  If not, see <http://www.gnu.org/licenses/>.
20  *
21  */
22 
23 #include "cAnalyze.h"
24 
25 #include "avida/Avida.h"
26 
27 #include "avida/core/WorldDriver.h"
28 
29 #include "cActionLibrary.h"
30 #include "cAnalyzeCommand.h"
31 #include "cAnalyzeCommandAction.h"
32 #include "cAnalyzeCommandDef.h"
33 #include "cAnalyzeCommandDefBase.h"
34 #include "cAnalyzeFlowCommand.h"
35 #include "cAnalyzeFlowCommandDef.h"
36 #include "cAnalyzeFunction.h"
37 #include "cAnalyzeGenotype.h"
38 #include "cAnalyzeTreeStats_CumulativeStemminess.h"
39 #include "cAnalyzeTreeStats_Gamma.h"
40 #include "cAvidaContext.h"
41 #include "cCPUTestInfo.h"
42 #include "cDataFile.h"
43 #include "cEnvironment.h"
44 #include "cHardwareBase.h"
45 #include "cHardwareManager.h"
46 #include "cHardwareStatusPrinter.h"
47 #include "cHelpManager.h"
48 #include "cInitFile.h"
49 #include "cInstSet.h"
50 #include "cLandscape.h"
51 #include "cModularityAnalysis.h"
52 #include "cPhenotype.h"
53 #include "cPhenPlastGenotype.h"
54 #include "cPlasticPhenotype.h"
55 #include "cProbSchedule.h"
56 #include "cReaction.h"
57 #include "cReactionProcess.h"
58 #include "cResource.h"
59 #include "cResourceHistory.h"
60 #include "cSchedule.h"
61 #include "cStringIterator.h"
62 #include "cTestCPU.h"
63 #include "cUserFeedback.h"
64 #include "cWorld.h"
65 #include "tAnalyzeJob.h"
66 #include "tAnalyzeJobBatch.h"
67 #include "tDataCommandManager.h"
68 #include "tDataEntry.h"
69 #include "tDataEntryCommand.h"
70 #include "tHashMap.h"
71 #include "tMatrix.h"
72 
73 #include <iomanip>
74 #include <fstream>
75 #include <sstream>
76 #include <string>
77 #include <queue>
78 #include <stack>
79 
80 #include <cerrno>
81 extern "C" {
82 #include <sys/stat.h>
83 }
84 
85 using namespace std;
86 using namespace Avida;
87 using namespace AvidaTools;
88 
cAnalyze(cWorld * world)89 cAnalyze::cAnalyze(cWorld* world)
90 : cur_batch(0)
91 /*
92  FIXME : refactor "temporary_next_id". @kgn
93  - Added as a quick way to provide unique serial ids, per organism, in COMPETE
94  command. @kgn
95  */
96 , temporary_next_id(0)
97 , temporary_next_update(0)
98 , batch(INITIAL_BATCHES)
99 , variables(26)
100 , local_variables(26)
101 , arg_variables(26)
102 , exit_on_error(true)
103 , m_world(world)
104 , m_ctx(world->GetDefaultContext())
105 , m_jobqueue(world)
106 , m_resources(NULL)
107 , m_resource_time_spent_offset(0)
108 , interactive_depth(0)
109 {
110   random.ResetSeed(m_world->GetConfig().RANDOM_SEED.Get());
111   if (m_world->GetDriver().IsInteractive()) exit_on_error = false;
112 
113   for (int i = 0; i < GetNumBatches(); i++) {
114     batch[i].Name().Set("Batch%d", i);
115   }
116 
117 }
118 
119 
120 
~cAnalyze()121 cAnalyze::~cAnalyze()
122 {
123   while (command_list.GetSize()) delete command_list.Pop();
124   while (function_list.GetSize()) delete function_list.Pop();
125 }
126 
127 
RunFile(cString filename)128 void cAnalyze::RunFile(cString filename)
129 {
130   bool saved_analyze = m_ctx.GetAnalyzeMode();
131   m_ctx.SetAnalyzeMode();
132 
133   cInitFile analyze_file(filename, m_world->GetWorkingDir());
134   if (!analyze_file.WasOpened()) {
135     const cUserFeedback& feedback = analyze_file.GetFeedback();
136     for (int i = 0; i < feedback.GetNumMessages(); i++) {
137       switch (feedback.GetMessageType(i)) {
138         case cUserFeedback::UF_ERROR:    cerr << "error: "; break;
139         case cUserFeedback::UF_WARNING:  cerr << "warning: "; break;
140         default: break;
141       };
142       cerr << feedback.GetMessage(i) << endl;
143     }
144     cerr << "warning: creating default file: '" << filename << "'" << endl;
145     ofstream fp(filename);
146     fp << "################################################################################################" << endl
147        << "# This file is used to setup avida when it is in analysis-only mode, which can be triggered by"   << endl
148        << "# running \"avida -a\"."                                                                          << endl
149        << "# "                                                                                               << endl
150        << "# Please see the documentation in documentation/analyze.html for information on how to use"       << endl
151        << "# analyze mode."                                                                                  << endl
152        << "################################################################################################" << endl
153       << endl;
154     fp.close();
155   } else {
156     LoadCommandList(analyze_file, command_list);
157     ProcessCommands(command_list);
158   }
159 
160   if (!saved_analyze) m_ctx.ClearAnalyzeMode();
161 }
162 
163 //////////////// Loading methods...
164 
LoadOrganism(cString cur_string)165 void cAnalyze::LoadOrganism(cString cur_string)
166 {
167   // LOAD_ORGANISM command...
168 
169   cString filename = cur_string.PopWord();
170 
171   // Output information about loading file.
172   cout << "Loading: " << filename << '\n';
173 
174 
175 
176   // Setup the genome...
177   Genome genome;
178   cUserFeedback feedback;
179   genome.LoadFromDetailFile(filename, m_world->GetWorkingDir(), m_world->GetHardwareManager(), feedback);
180   for (int i = 0; i < feedback.GetNumMessages(); i++) {
181     switch (feedback.GetMessageType(i)) {
182       case cUserFeedback::UF_ERROR:    cerr << "error: "; break;
183       case cUserFeedback::UF_WARNING:  cerr << "warning: "; break;
184       default: break;
185     };
186     cerr << feedback.GetMessage(i) << endl;
187   }
188 
189   // Construct the new genotype..
190   cAnalyzeGenotype* genotype = new cAnalyzeGenotype(m_world, genome);
191 
192   // Determine the organism's original name -- strip off directory...
193   while (filename.Find('/') != -1) filename.Pop('/');
194   while (filename.Find('\\') != -1) filename.Pop('\\');
195   filename.Replace(".gen", "");  // Remove the .gen from the filename.
196   genotype->SetName(filename);
197 
198   // And save it in the current batch.
199   batch[cur_batch].List().PushRear(genotype);
200 
201   // Adjust the flags on this batch
202   batch[cur_batch].SetLineage(false);
203   batch[cur_batch].SetAligned(false);
204 }
205 
206 
LoadSequence(cString cur_string)207 void cAnalyze::LoadSequence(cString cur_string)
208 {
209   // LOAD_SEQUENCE
210 
211   static int sequence_count = 1;
212   cString sequence = cur_string.PopWord();
213   cString seq_name = cur_string.PopWord();
214 
215   cout << "Loading: " << sequence << endl;
216 
217   // Setup the genotype...
218   const cInstSet& is = m_world->GetHardwareManager().GetDefaultInstSet();
219   Genome genome(is.GetHardwareType(), is.GetInstSetName(), sequence);
220   cAnalyzeGenotype* genotype = new cAnalyzeGenotype(m_world, genome);
221 
222   genotype->SetNumCPUs(1);      // Initialize to a single organism.
223   if (seq_name == "") {
224     seq_name = cStringUtil::Stringf("org-Seq%d", sequence_count);
225   }
226   genotype->SetName(seq_name);
227   sequence_count++;
228 
229   // Add this genotype to the proper batch.
230   batch[cur_batch].List().PushRear(genotype);
231 
232   // Adjust the flags on this batch
233   batch[cur_batch].SetLineage(false);
234   batch[cur_batch].SetAligned(false);
235 }
236 
237 // Clears the current time oriented list of resources and loads in a new one
238 // from a file specified by the user, or resource.dat by default.
LoadResources(cString cur_string)239 void cAnalyze::LoadResources(cString cur_string)
240 {
241   delete m_resources;
242   m_resources = new cResourceHistory;
243 
244   int words = cur_string.CountNumWords();
245 
246   cString filename = "resource.dat";
247   if (words >= 1)
248 		filename = cur_string.PopWord();
249   if (words >= 2)
250 		m_resource_time_spent_offset = cur_string.PopWord().AsInt();
251 
252   cout << "Loading Resources from: " << filename << endl;
253 
254   if (!m_resources->LoadFile(filename, m_world->GetWorkingDir())) m_world->GetDriver().RaiseException("failed to load resource file");
255 }
256 
AnalyzeEntropy(cAnalyzeGenotype * genotype,double mu)257 double cAnalyze::AnalyzeEntropy(cAnalyzeGenotype* genotype, double mu)
258 {
259   double entropy = 0.0;
260 
261   // If the fitness is 0, the entropy is the length of genotype ...
262   genotype->Recalculate(m_ctx);
263   if (genotype->GetFitness() == 0) {
264     return genotype->GetLength();
265   }
266 
267   // Calculate the stats for the genotype we're working with ...
268   const Genome& base_genome = genotype->GetGenome();
269   const Sequence& base_seq = base_genome.GetSequence();
270   Genome mod_genome(base_genome);
271   Sequence& seq = mod_genome.GetSequence();
272   const int num_insts = m_world->GetHardwareManager().GetInstSet(base_genome.GetInstSet()).GetSize();
273   const int num_lines = base_genome.GetSize();
274   double base_fitness = genotype->GetFitness();
275 
276   // Loop through all the lines of code, testing all mutations...
277   tArray<double> test_fitness(num_insts);
278   tArray<double> prob(num_insts);
279   for (int line_no = 0; line_no < num_lines; line_no ++) {
280     int cur_inst = base_seq[line_no].GetOp();
281 
282     // Test fitness of each mutant.
283     for (int mod_inst = 0; mod_inst < num_insts; mod_inst++) {
284       seq[line_no].SetOp(mod_inst);
285       cAnalyzeGenotype test_genotype(m_world, mod_genome);
286       test_genotype.Recalculate(m_ctx);
287       // Ajust fitness ...
288       if (test_genotype.GetFitness() <= base_fitness) {
289         test_fitness[mod_inst] = test_genotype.GetFitness();
290       } else {
291         test_fitness[mod_inst] = base_fitness;
292       }
293     }
294 
295     // Calculate probabilities at mut-sel balance
296     double w_bar = 1;
297 
298     // Normalize fitness values
299     double maxFitness = 0.0;
300     for(int i=0; i<num_insts; i++) {
301       if(test_fitness[i] > maxFitness) {
302         maxFitness = test_fitness[i];
303       }
304     }
305 
306 
307     for(int i=0; i<num_insts; i++) {
308       test_fitness[i] /= maxFitness;
309     }
310 
311     while(1) {
312       double sum = 0.0;
313       for (int mod_inst = 0; mod_inst < num_insts; mod_inst ++) {
314         prob[mod_inst] = (mu * w_bar) /
315         ((double)num_insts *
316          (w_bar + test_fitness[mod_inst] * mu - test_fitness[mod_inst]));
317         sum = sum + prob[mod_inst];
318       }
319       if ((sum-1.0)*(sum-1.0) <= 0.0001)
320         break;
321       else
322         w_bar = w_bar - 0.000001;
323     }
324 
325     // Calculate entropy ...
326     double this_entropy = 0.0;
327     for (int i = 0; i < num_insts; i ++) {
328       this_entropy += prob[i] * log((double) 1.0/prob[i]) / log ((double) num_insts);
329     }
330     entropy += this_entropy;
331 
332     // Reset the mod_genome back to the original sequence.
333     seq[line_no].SetOp(cur_inst);
334   }
335   return entropy;
336 }
337 
338 //@ MRR @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
AnalyzeEntropyPairs(cAnalyzeGenotype * genotype,double mu)339 tMatrix< double > cAnalyze::AnalyzeEntropyPairs(cAnalyzeGenotype * genotype, double mu)
340 {
341 
342   double entropy = 0.0;
343 
344   genotype->Recalculate(m_ctx);
345 
346   // Calculate the stats for the genotype we're working with ...
347   const Genome& base_genome = genotype->GetGenome();
348   const Sequence& base_seq = base_genome.GetSequence();
349   Genome mod_genome(base_genome);
350   Sequence& seq = mod_genome.GetSequence();
351   const int num_insts = m_world->GetHardwareManager().GetInstSet(base_genome.GetInstSet()).GetSize();
352   const int num_lines = base_genome.GetSize();
353   double base_fitness = genotype->GetFitness();
354 
355   cout << num_lines << endl;
356   tMatrix< double > pairwiseEntropy(num_lines, num_lines);
357   for (int i=0; i<num_lines; i++)
358     for (int j=-0; j<num_lines; j++)
359       pairwiseEntropy[i][j] = 0.0;
360 
361   cout << pairwiseEntropy.GetNumRows() << endl;
362 
363   // If the fitness is 0, return empty matrix
364 
365   if (genotype->GetFitness() == 0) {
366     return pairwiseEntropy;
367   }
368 
369 
370   tMatrix< double >  test_fitness(num_insts,num_insts);
371   tMatrix< double >  prob(num_insts,num_insts);
372 
373   //Pairwise mutations; the diagonal of the matrix will be the information
374   //stored by that site alone
375   for (int line_1 = 0; line_1 < num_lines; line_1++){
376     for (int line_2 = line_1; line_2 < num_lines; line_2++) {
377 
378       cerr << "[ " << line_1 << ", " << line_2 << " ]" << endl;
379 
380       int cur_inst_1 = base_seq[line_1].GetOp();
381       int cur_inst_2 = base_seq[line_2].GetOp();
382 
383       // Test fitness of each mutant.
384       for (int mod_inst_1 = 0; mod_inst_1 < num_insts; mod_inst_1++){
385         for (int mod_inst_2 = 0; mod_inst_2 < num_insts; mod_inst_2++) {
386           seq[line_1].SetOp(mod_inst_1);
387           seq[line_2].SetOp(mod_inst_2);
388           cAnalyzeGenotype test_genotype(m_world, mod_genome);
389           test_genotype.Recalculate(m_ctx);
390           // Adjust fitness ...
391           if (test_genotype.GetFitness() <= base_fitness) {
392             test_fitness[mod_inst_1][mod_inst_2] = test_genotype.GetFitness();
393           } else {
394             test_fitness[mod_inst_1][mod_inst_2] = base_fitness;
395           }
396         }
397       }
398 
399       // Calculate probabilities at mut-sel balance
400       double w_bar = 1;
401 
402       // Normalize fitness values
403       double maxFitness = 0.0;
404       for(int i=0; i<num_insts; i++) {
405         for (int j = 0; j < num_insts; j++){
406           if(test_fitness[i][j] > maxFitness) {
407             maxFitness = test_fitness[i][j];
408           }
409         }
410       }
411 
412 
413       for(int i=0; i<num_insts; i++) {
414         for (int j=0; j<num_insts; j++){
415           test_fitness[i][j] /= maxFitness;
416         }
417       }
418 
419 
420       while(1) {
421         double sum = 0.0;
422         for (int mod_inst_1 = 0; mod_inst_1 < num_insts; mod_inst_1++) {
423           for (int mod_inst_2 = 0; mod_inst_2 < num_insts; mod_inst_2++){
424 
425             prob[mod_inst_1][mod_inst_2] =
426             (mu * w_bar) /
427             ((double)num_insts *
428              (w_bar + test_fitness[mod_inst_1][mod_inst_2]
429               * mu  - test_fitness[mod_inst_1][mod_inst_2]));
430             sum = sum + prob[mod_inst_1][mod_inst_2];
431           }
432         }
433         if ((sum-1.0)*(sum-1.0) <= 0.0001)
434           break;
435         else
436           w_bar = w_bar - 0.000001;
437       }
438 
439       // Calculate entropy ...
440       double this_entropy = 0.0;
441       for (int i = 0; i < num_insts; i++){
442         for (int j = 0; j < num_insts; j++) {
443           this_entropy += prob[i][j] *
444           log((double) 1.0/prob[i][j]) / log ((double) num_insts);
445         }
446       }
447       entropy += this_entropy;
448       pairwiseEntropy[line_1][line_2] = this_entropy;
449 
450       // Reset the mod_genome back to the original sequence.
451       seq[line_1].SetOp(cur_inst_1);
452       seq[line_2].SetOp(cur_inst_2);
453 
454     }
455   }  //End Loops
456   return pairwiseEntropy;
457 }
458 
459 
460 
461 
AnalyzeEntropyGivenParent(cAnalyzeGenotype * genotype,cAnalyzeGenotype * parent,double mut_rate)462 double cAnalyze::AnalyzeEntropyGivenParent(cAnalyzeGenotype * genotype,
463                                            cAnalyzeGenotype * parent, double mut_rate)
464 {
465   double entropy = 0.0;
466 
467   // Calculate the stats for the genotype we're working with ...
468   genotype->Recalculate(m_ctx);
469   const Genome& parent_genome = parent->GetGenome();
470   const Sequence& parent_seq = parent_genome.GetSequence();
471   const Genome& base_genome = genotype->GetGenome();
472   const Sequence& base_seq = base_genome.GetSequence();
473   Genome mod_genome(base_genome);
474   Sequence& seq = mod_genome.GetSequence();
475   const int num_insts = m_world->GetHardwareManager().GetInstSet(base_genome.GetInstSet()).GetSize();
476   const int num_lines = base_genome.GetSize();
477 
478   // Loop through all the lines of code, testing all mutations ...
479   tArray<double> test_fitness(num_insts);
480   tArray<double> prob(num_insts);
481   for (int line_no = 0; line_no < num_lines; line_no ++) {
482     int cur_inst = base_seq[line_no].GetOp();
483     int parent_inst = parent_seq[line_no].GetOp();
484 
485     // Test fitness of each mutant.
486     for (int mod_inst = 0; mod_inst < num_insts; mod_inst++) {
487       seq[line_no].SetOp(mod_inst);
488       cAnalyzeGenotype test_genotype(m_world, mod_genome);
489       test_genotype.Recalculate(m_ctx);
490       test_fitness[mod_inst] = test_genotype.GetFitness();
491     }
492 
493 
494     // Calculate probabilities at mut-sel balance
495     double w_bar = 1;
496 
497     // Normalize fitness values, assert if they are all zero
498     double maxFitness = 0.0;
499     for(int i=0; i<num_insts; i++) {
500       if ( i == parent_inst) { continue; }
501       if (test_fitness[i] > maxFitness) {
502         maxFitness = test_fitness[i];
503       }
504     }
505 
506     if(maxFitness > 0) {
507       for(int i = 0; i < num_insts; i ++) {
508         if (i == parent_inst) { continue; }
509         test_fitness[i] /= maxFitness;
510       }
511     } else {
512       // every other inst is equally likely to be mutated to
513       for (int i = 0; i < num_insts; i ++) {
514         if (i == parent_inst) { continue; }
515         test_fitness[i] = 1;
516       }
517     }
518 
519     double double_num_insts = num_insts * 1.0;
520     while(1) {
521       double sum = 0.0;
522       for (int mod_inst = 0; mod_inst < num_insts; mod_inst ++) {
523         if (mod_inst == parent_inst) { continue; }
524         prob[mod_inst] = (mut_rate * w_bar) /
525         (double_num_insts-2) /
526         (w_bar + test_fitness[mod_inst] * mut_rate * (double_num_insts-1) / (double_num_insts - 2)
527          - test_fitness[mod_inst]);
528         sum = sum + prob[mod_inst];
529       }
530       if ((sum-1.0)*(sum-1.0) <= 0.0001)
531         break;
532       else
533         w_bar = w_bar - 0.000001;
534     }
535 
536     // Calculate entropy ...
537     double this_entropy = 0.0;
538     this_entropy -= (1.0 - mut_rate) * log(1.0 - mut_rate) / log(static_cast<double>(num_insts));
539     for (int i = 0; i < num_insts; i ++) {
540       if (i == parent_inst) { continue; }
541       prob[i] = prob[i] * mut_rate;
542       this_entropy += prob[i] * log(static_cast<double>(1.0/prob[i])) / log (static_cast<double>(num_insts));
543     }
544     entropy += this_entropy;
545 
546     // Reset the mod_genome back to the base_genome.
547     seq[line_no].SetOp(cur_inst);
548   }
549   return entropy;
550 }
551 
IncreasedInfo(cAnalyzeGenotype * genotype1,cAnalyzeGenotype * genotype2,double mu)552 double cAnalyze::IncreasedInfo(cAnalyzeGenotype * genotype1,
553                                cAnalyzeGenotype * genotype2,
554                                double mu)
555 {
556   double increased_info = 0.0;
557 
558   // Calculate the stats for the genotypes we're working with ...
559   if ( genotype1->GetLength() != genotype2->GetLength() ) {
560     cerr << "Error: Two genotypes don't have same length.(cAnalyze::IncreasedInfo)" << endl;
561     if (exit_on_error) exit(1);
562   }
563 
564   genotype1->Recalculate(m_ctx);
565   if (genotype1->GetFitness() == 0) {
566     return 0.0;
567   }
568 
569   const Genome& genotype1_base_genome = genotype1->GetGenome();
570   const Sequence& genotype1_base_seq = genotype1_base_genome.GetSequence();
571   Genome genotype1_mod_genome(genotype1_base_genome);
572   Sequence& genotype1_mod_seq = genotype1_mod_genome.GetSequence();
573   const int num_insts = m_world->GetHardwareManager().GetInstSet(genotype1_base_genome.GetInstSet()).GetSize();
574   const int num_lines = genotype1_base_genome.GetSize();
575   double genotype1_base_fitness = genotype1->GetFitness();
576   vector<double> genotype1_info(num_lines, 0.0);
577 
578   // Loop through all the lines of code, calculate genotype1 information
579   tArray<double> test_fitness(num_insts);
580   tArray<double> prob(num_insts);
581   for (int line_no = 0; line_no < num_lines; line_no ++) {
582     int cur_inst = genotype1_base_seq[line_no].GetOp();
583 
584     // Test fitness of each mutant.
585     for (int mod_inst = 0; mod_inst < num_insts; mod_inst++) {
586       genotype1_mod_seq[line_no].SetOp(mod_inst);
587       cAnalyzeGenotype test_genotype(m_world, genotype1_mod_genome);
588       test_genotype.Recalculate(m_ctx);
589       // Ajust fitness ...
590       if (test_genotype.GetFitness() <= genotype1_base_fitness) {
591         test_fitness[mod_inst] = test_genotype.GetFitness();
592       } else {
593         test_fitness[mod_inst] = genotype1_base_fitness;
594       }
595     }
596 
597     // Calculate probabilities at mut-sel balance
598     double w_bar = 1;
599 
600     // Normalize fitness values
601     double maxFitness = 0.0;
602     for(int i=0; i<num_insts; i++) {
603       if(test_fitness[i] > maxFitness) {
604         maxFitness = test_fitness[i];
605       }
606     }
607 
608     for(int i=0; i<num_insts; i++) {
609       test_fitness[i] /= maxFitness;
610     }
611 
612     while(1) {
613       double sum = 0.0;
614       for (int mod_inst = 0; mod_inst < num_insts; mod_inst ++) {
615         prob[mod_inst] = (mu * w_bar) /
616         ((double)num_insts *
617          (w_bar + test_fitness[mod_inst] * mu - test_fitness[mod_inst]));
618         sum = sum + prob[mod_inst];
619       }
620       if ((sum-1.0)*(sum-1.0) <= 0.0001)
621         break;
622       else
623         w_bar = w_bar - 0.000001;
624     }
625 
626     // Calculate entropy ...
627     double this_entropy = 0.0;
628     for (int i = 0; i < num_insts; i ++) {
629       this_entropy += prob[i] * log((double) 1.0/prob[i]) / log ((double) num_insts);
630     }
631     genotype1_info[line_no] = 1 - this_entropy;
632 
633     // Reset the mod_genome back to the original sequence.
634     genotype1_mod_seq[line_no].SetOp(cur_inst);
635   }
636 
637   genotype2->Recalculate(m_ctx);
638   if (genotype2->GetFitness() == 0) {
639     for (int line_no = 0; line_no < num_lines; ++ line_no) {
640       increased_info += genotype1_info[line_no];
641     }
642     return increased_info;
643   }
644 
645   const Genome& genotype2_base_genome = genotype2->GetGenome();
646   const Sequence& genotype2_base_seq = genotype2_base_genome.GetSequence();
647   Genome genotype2_mod_genome(genotype2_base_genome);
648   Sequence& genotype2_mod_seq = genotype2_mod_genome.GetSequence();
649   double genotype2_base_fitness = genotype2->GetFitness();
650 
651   // Loop through all the lines of code, calculate increased information
652   for (int line_no = 0; line_no < num_lines; line_no ++) {
653     int cur_inst = genotype2_base_seq[line_no].GetOp();
654 
655     // Test fitness of each mutant.
656     for (int mod_inst = 0; mod_inst < num_insts; mod_inst++) {
657       genotype2_mod_seq[line_no].SetOp(mod_inst);
658       cAnalyzeGenotype test_genotype(m_world, genotype2_mod_genome);
659       test_genotype.Recalculate(m_ctx);
660       // Ajust fitness ...
661       if (test_genotype.GetFitness() <= genotype2_base_fitness) {
662         test_fitness[mod_inst] = test_genotype.GetFitness();
663       } else {
664         test_fitness[mod_inst] = genotype2_base_fitness;
665       }
666     }
667 
668     // Calculate probabilities at mut-sel balance
669     double w_bar = 1;
670 
671     // Normalize fitness values, assert if they are all zero
672     double maxFitness = 0.0;
673     for(int i=0; i<num_insts; i++) {
674       if(test_fitness[i] > maxFitness) {
675         maxFitness = test_fitness[i];
676       }
677     }
678 
679     for(int i=0; i<num_insts; i++) {
680       test_fitness[i] /= maxFitness;
681     }
682 
683     while(1) {
684       double sum = 0.0;
685       for (int mod_inst = 0; mod_inst < num_insts; mod_inst ++) {
686         prob[mod_inst] = (mu * w_bar) /
687         ((double)num_insts *
688          (w_bar + test_fitness[mod_inst] * mu - test_fitness[mod_inst]));
689         sum = sum + prob[mod_inst];
690       }
691       if ((sum-1.0)*(sum-1.0) <= 0.0001)
692         break;
693       else
694         w_bar = w_bar - 0.000001;
695     }
696 
697     // Calculate entropy ...
698     double this_entropy = 0.0;
699     for (int i = 0; i < num_insts; i ++) {
700       this_entropy += prob[i] * log((double) 1.0/prob[i]) / log ((double) num_insts);
701     }
702 
703     // Compare information
704     if (genotype1_info[line_no] > 1 - this_entropy) {
705       increased_info += genotype1_info[line_no] - (1 - this_entropy);
706     } // else increasing is 0, do nothing
707 
708     // Reset the mod_genome back to the original sequence.
709     genotype2_mod_seq[line_no].SetOp(cur_inst);
710   }
711 
712 
713   return increased_info;
714 }
715 
LoadFile(cString cur_string)716 void cAnalyze::LoadFile(cString cur_string)
717 {
718   // LOAD
719 
720   cString filename = cur_string.PopWord();
721 
722   cout << "Loading: " << filename << endl;
723 
724   cInitFile input_file(filename, m_world->GetWorkingDir());
725   if (!input_file.WasOpened()) {
726     const cUserFeedback& feedback = input_file.GetFeedback();
727     for (int i = 0; i < feedback.GetNumMessages(); i++) {
728       switch (feedback.GetMessageType(i)) {
729         case cUserFeedback::UF_ERROR:    cerr << "error: "; break;
730         case cUserFeedback::UF_WARNING:  cerr << "warning: "; break;
731         default: break;
732       };
733       cerr << feedback.GetMessage(i) << endl;
734     }
735     if (exit_on_error) exit(1);
736   }
737 
738   const cString filetype = input_file.GetFiletype();
739   if (filetype != "population_data" &&  // Deprecated
740       filetype != "genotype_data") {
741     cerr << "error: cannot load files of type \"" << filetype << "\"." << endl;
742     if (exit_on_error) exit(1);
743   }
744 
745   if (m_world->GetVerbosity() >= VERBOSE_ON) {
746     cout << "Loading file of type: " << filetype << endl;
747   }
748 
749 
750   // Construct a linked list of data types that can be loaded...
751   tList< tDataEntryCommand<cAnalyzeGenotype> > output_list;
752   tListIterator< tDataEntryCommand<cAnalyzeGenotype> > output_it(output_list);
753   cUserFeedback feedback;
754   cAnalyzeGenotype::GetDataCommandManager().LoadCommandList(input_file.GetFormat(), output_list, &feedback);
755 
756   for (int i = 0; i < feedback.GetNumMessages(); i++) {
757     switch (feedback.GetMessageType(i)) {
758       case cUserFeedback::UF_ERROR:    cerr << "error: "; break;
759       case cUserFeedback::UF_WARNING:  cerr << "warning: "; break;
760       default: break;
761     };
762     cerr << feedback.GetMessage(i) << endl;
763   }
764 
765   if (feedback.GetNumErrors()) return;
766 
767   bool id_inc = input_file.GetFormat().HasString("id");
768 
769   // Setup the genome...
770   const cInstSet& is = m_world->GetHardwareManager().GetDefaultInstSet();
771   Genome default_genome(is.GetHardwareType(), is.GetInstSetName(), Sequence(1));
772   int load_count = 0;
773 
774   for (int line_id = 0; line_id < input_file.GetNumLines(); line_id++) {
775     cString cur_line = input_file.GetLine(line_id);
776 
777     cAnalyzeGenotype* genotype = new cAnalyzeGenotype(m_world, default_genome);
778 
779     output_it.Reset();
780     tDataEntryCommand<cAnalyzeGenotype>* data_command = NULL;
781     while ((data_command = output_it.Next()) != NULL) {
782       data_command->SetValue(genotype, cur_line.PopWord());
783     }
784 
785     // Give this genotype a name.  Base it on the ID if possible.
786     if (id_inc == false) {
787       cString name = cStringUtil::Stringf("org-%d", load_count++);
788       genotype->SetName(name);
789     }
790     else {
791       cString name = cStringUtil::Stringf("org-%d", genotype->GetID());
792       genotype->SetName(name);
793     }
794 
795     // Add this genotype to the proper batch.
796     batch[cur_batch].List().PushRear(genotype);
797   }
798 
799   // Adjust the flags on this batch
800   batch[cur_batch].SetLineage(false);
801   batch[cur_batch].SetAligned(false);
802 }
803 
804 
805 //////////////// Reduction....
806 
CommandFilter(cString cur_string)807 void cAnalyze::CommandFilter(cString cur_string)
808 {
809   // First three arguments are: setting, relation, comparison
810   // Fourth argument is optional batch.
811 
812   const int num_args = cur_string.CountNumWords();
813   cString stat_name = cur_string.PopWord();
814   cString relation = cur_string.PopWord();
815   cString test_value = cur_string.PopWord();
816 
817   // Get the dynamic command to look up the stat we need.
818   tDataEntryCommand<cAnalyzeGenotype>* stat_command = cAnalyzeGenotype::GetDataCommandManager().GetDataCommand(stat_name);
819 
820 
821   // Check for various possible errors before moving on...
822   bool error_found = false;
823   if (num_args < 3 || num_args > 4) {
824     cerr << "Error: Incorrect argument count." << endl;
825     error_found = true;
826   }
827 
828   if (stat_command == NULL) {
829     cerr << "Error: Unknown stat '" << stat_name << "'" << endl;
830     error_found = true;
831   }
832 
833   // Check relationship types.  rel_ok[0] = less_ok; rel_ok[1] = same_ok; rel_ok[2] = gtr_ok
834   tArray<bool> rel_ok(3, false);
835   if (relation == "==")      {                    rel_ok[1] = true;                    }
836   else if (relation == "!=") { rel_ok[0] = true;                     rel_ok[2] = true; }
837   else if (relation == "<")  { rel_ok[0] = true;                                       }
838   else if (relation == ">")  {                                       rel_ok[2] = true; }
839   else if (relation == "<=") { rel_ok[0] = true;  rel_ok[1] = true;                    }
840   else if (relation == ">=") {                    rel_ok[1] = true;  rel_ok[2] = true; }
841   else {
842     cerr << "Error: Unknown relation '" << relation << "'" << endl;
843     error_found = true;
844   }
845 
846   if (error_found == true) {
847     cerr << "Format: FILTER [stat] [relation] [value] [batch=current]" << endl;
848     cerr << "Example: FILTER fitness >= 10.0" << endl;
849     if (exit_on_error) exit(1);
850     if (stat_command != NULL) delete stat_command;
851     return;
852   }
853 
854 
855   // If we made it this far, we're going ahead with the command...
856 
857   if (m_world->GetVerbosity() >= VERBOSE_ON) {
858     cout << "Filtering batch " << cur_batch << " to genotypes where "
859     << stat_name << " " << relation << " " << test_value << endl;
860   }
861 
862 
863   // Loop through the genotypes and remove the entries that don't match.
864   tListIterator<cAnalyzeGenotype> batch_it(batch[cur_batch].List());
865   cAnalyzeGenotype * cur_genotype = NULL;
866   while ((cur_genotype = batch_it.Next()) != NULL) {
867     const cFlexVar value = stat_command->GetValue(cur_genotype);
868     int compare = 1 + CompareFlexStat(value, test_value);
869 
870     // Check if we should eliminate this genotype...
871     if (rel_ok[compare] == false) {
872       delete batch_it.Remove();
873     }
874   }
875   delete stat_command;
876 
877 
878   // Adjust the flags on this batch
879   batch[cur_batch].SetLineage(false);
880   batch[cur_batch].SetAligned(false);
881 }
882 
FindGenotype(cString cur_string)883 void cAnalyze::FindGenotype(cString cur_string)
884 {
885   // If no arguments are passed in, just find max num_cpus.
886   if (cur_string.GetSize() == 0) cur_string = "num_cpus";
887 
888   if (m_world->GetVerbosity() >= VERBOSE_ON) {
889     cout << "Reducing batch " << cur_batch << " to genotypes: ";
890   }
891 
892   tListPlus<cAnalyzeGenotype> & gen_list = batch[cur_batch].List();
893   tListPlus<cAnalyzeGenotype> found_list;
894   while (cur_string.CountNumWords() > 0) {
895     cString gen_desc(cur_string.PopWord());
896     if (m_world->GetVerbosity() >= VERBOSE_ON) cout << gen_desc << " ";
897 
898     // Determine by lin_type which genotype were are tracking...
899     cAnalyzeGenotype * found_gen = PopGenotype(gen_desc, cur_batch);
900 
901     if (found_gen == NULL) {
902       cerr << "  Warning: genotype not found!" << endl;
903       continue;
904     }
905 
906     // Save this genotype...
907     found_list.Push(found_gen);
908   }
909   cout << endl;
910 
911   // Delete all genotypes other than the ones found!
912   while (gen_list.GetSize() > 0) delete gen_list.Pop();
913 
914   // And fill it back in with the good stuff.
915   while (found_list.GetSize() > 0) gen_list.Push(found_list.Pop());
916 
917   // Adjust the flags on this batch
918   batch[cur_batch].SetLineage(false);
919   batch[cur_batch].SetAligned(false);
920 }
921 
FindOrganism(cString cur_string)922 void cAnalyze::FindOrganism(cString cur_string)
923 {
924   // At least one argument is rquired.
925   if (cur_string.GetSize() == 0) {
926     cerr << "Error: At least one argument is required in FIND_ORGANISM." << endl;
927     cerr << " (perhaps you want FIND_GENOTYPE?)" << endl;
928     return;
929   }
930 
931   if (m_world->GetVerbosity() >= VERBOSE_ON) {
932     cout << "Reducing batch " << cur_batch << " to organisms: " << endl;
933   }
934 
935   tListPlus<cAnalyzeGenotype> & gen_list = batch[cur_batch].List();
936   tListPlus<cAnalyzeGenotype> found_list;
937 
938   tArray<int> new_counts(gen_list.GetSize());
939   new_counts.SetAll(0);
940 
941   while (cur_string.CountNumWords() > 0) {
942     cString org_desc(cur_string.PopWord());
943     if (m_world->GetVerbosity() >= VERBOSE_ON) cout << org_desc << " ";
944 
945     // Determine by org_desc which genotype were are tracking...
946     if (org_desc == "random") {
947       bool found = false;
948       int num_orgs = gen_list.Count(&cAnalyzeGenotype::GetNumCPUs);
949       while (found != true) {
950       	int org_chosen = random.GetUInt(num_orgs);
951       	cAnalyzeGenotype * found_genotype =
952           gen_list.FindSummedValue(org_chosen, &cAnalyzeGenotype::GetNumCPUs);
953         if ( found_genotype->GetNumCPUs() != 0 && found_genotype->GetViable()) {
954           found_genotype->SetNumCPUs(found_genotype->GetNumCPUs()-1);
955           new_counts[gen_list.FindPosPtr(found_genotype)] +=1;
956           cout << "Found genotype " << gen_list.FindPosPtr(found_genotype) << endl;
957           found = true;
958         }
959       }
960     }
961 
962     // pick a random organisms, with replacement!
963     if (org_desc == "randomWR") {
964       bool found = false;
965       int num_orgs = gen_list.Count(&cAnalyzeGenotype::GetNumCPUs);
966       while (found != true) {
967         int org_chosen = random.GetUInt(num_orgs);
968         cAnalyzeGenotype * found_genotype =
969           gen_list.FindSummedValue(org_chosen, &cAnalyzeGenotype::GetNumCPUs);
970         if ( found_genotype->GetNumCPUs() != 0 && found_genotype->GetViable()) {
971           new_counts[gen_list.FindPosPtr(found_genotype)] +=1;
972           cout << "Found genotype " << gen_list.FindPosPtr(found_genotype) << endl;
973           found = true;
974         }
975       }
976     }
977   }
978 
979   int pos_count = 0;
980   cAnalyzeGenotype * genotype = NULL;
981   tListIterator<cAnalyzeGenotype> batch_it(batch[cur_batch].List());
982 
983   while ((genotype = batch_it.Next()) != NULL) {
984     genotype->SetNumCPUs(new_counts[pos_count]);
985     if (genotype->GetNumCPUs() == 0) batch_it.Remove();
986     else cout << "Genotype " << pos_count << " has " << new_counts[pos_count] << " organisms." << endl;
987     pos_count++;
988   }
989 
990   // Adjust the flags on this batch
991   batch[cur_batch].SetLineage(false);
992   batch[cur_batch].SetAligned(false);
993 }
994 
FindLineage(cString cur_string)995 void cAnalyze::FindLineage(cString cur_string)
996 {
997   cString lin_type = "num_cpus";
998   if (cur_string.CountNumWords() > 0) lin_type = cur_string.PopWord();
999 
1000   if (m_world->GetVerbosity() >= VERBOSE_ON) {
1001     cout << "Reducing batch " << cur_batch
1002     << " to " << lin_type << " lineage " << endl;
1003   } else cout << "Performing lineage scan..." << endl;
1004 
1005 
1006   // Determine by lin_type which genotype we are tracking...
1007   cAnalyzeGenotype * found_gen = PopGenotype(lin_type, cur_batch);
1008 
1009   if (found_gen == NULL) {
1010     cerr << "  Warning: Genotype " << lin_type
1011     << " not found.  Lineage scan aborted." << endl;
1012     return;
1013   }
1014 
1015   // Otherwise, trace back through the id numbers to mark all of those
1016   // in the ancestral lineage...
1017 
1018   // Construct a list of genotypes found...
1019 
1020   tListPlus<cAnalyzeGenotype> found_list;
1021   found_list.Push(found_gen);
1022   int next_id = found_gen->GetParentID();
1023   bool found = true;
1024   while (found == true) {
1025     found = false;
1026 
1027     tListIterator<cAnalyzeGenotype> batch_it(batch[cur_batch].List());
1028     while ((found_gen = batch_it.Next()) != NULL) {
1029       if (found_gen->GetID() == next_id) {
1030         batch_it.Remove();
1031         found_list.Push(found_gen);
1032         next_id = found_gen->GetParentID();
1033         found = true;
1034         break;
1035       }
1036     }
1037   }
1038 
1039   // We now have all of the genotypes in this lineage, delete everything
1040   // else.
1041 
1042   const int total_removed = batch[cur_batch].List().GetSize();
1043   while (batch[cur_batch].List().GetSize() > 0) {
1044     delete batch[cur_batch].List().Pop();
1045   }
1046 
1047   // And fill it back in with the good stuff.
1048   int total_kept = found_list.GetSize();
1049   while (found_list.GetSize() > 0) {
1050     batch[cur_batch].List().PushRear(found_list.Pop());
1051   }
1052 
1053   if (m_world->GetVerbosity() >= VERBOSE_ON) {
1054     cout << "  Lineage has " << total_kept << " genotypes; "
1055     << total_removed << " were removed." << endl;
1056   }
1057 
1058   // Adjust the flags on this batch
1059   batch[cur_batch].SetLineage(true);
1060   batch[cur_batch].SetAligned(false);
1061 }
1062 
1063 
FindSexLineage(cString cur_string)1064 void cAnalyze::FindSexLineage(cString cur_string)
1065 {
1066 
1067   // detemine the method for construicting a lineage
1068   // by defauly, find the lineage of the final dominant
1069   cString lin_type = "num_cpus";
1070   if (cur_string.CountNumWords() > 0) lin_type = cur_string.PopWord();
1071 
1072   // parent_method determins which of the two parental lineages to use
1073   // "rec_region_size" :
1074   //		"mother" (dominant parent) is the parent contributing
1075   // 		more to the offspring genome (default)
1076   // "genome_size" :
1077   //		"mother" (dominant parent) is the longer parent
1078   cString parent_method = "rec_region_size";
1079   if (cur_string.CountNumWords() > 0) parent_method = cur_string.PopWord();
1080 
1081   if (m_world->GetVerbosity() >= VERBOSE_ON) {
1082     cout << "Reducing batch " << cur_batch
1083     << " to " << lin_type << " sexual lineage "
1084     << " using " << parent_method << " criteria." << endl;
1085   } else cout << "Performing sexual lineage scan..." << endl;
1086 
1087 
1088   // Determine by lin_type which genotype we are tracking...
1089   cAnalyzeGenotype * found_gen = PopGenotype(lin_type, cur_batch);
1090 
1091   cAnalyzeGenotype * found_dad;
1092   cAnalyzeGenotype * found_mom;
1093   cAnalyzeGenotype * found_temp;
1094 
1095   if (found_gen == NULL) {
1096     cerr << "  Warning: Genotype " << lin_type
1097     << " not found.  Sexual lineage scan aborted." << endl;
1098     return;
1099   }
1100 
1101   // Otherwise, trace back through the id numbers to mark all of those
1102   // in the ancestral lineage...
1103 
1104   // Construct a list of genotypes found...
1105 
1106   tListPlus<cAnalyzeGenotype> found_list;
1107   found_list.Push(found_gen);
1108   int next_id1 = found_gen->GetParentID();
1109   int next_id2 = found_gen->GetParent2ID();
1110 
1111   bool found_m = true;
1112   bool found_d = true;
1113 
1114   while (found_m == true & found_d == true) {
1115 
1116     //cout << "Searching for mom=" << next_id1
1117     //	 << " and dad=" << next_id2 << endl;
1118     found_m = false;
1119     found_d = false;
1120 
1121     // Look for the father first....
1122     tListIterator<cAnalyzeGenotype> batch_it(batch[cur_batch].List());
1123     batch_it.Reset();
1124     while ((found_dad = batch_it.Next()) != NULL) {
1125       // Check if the father is found...
1126       if (found_dad->GetID() == next_id2) {
1127         //cout << "Found dad!" << endl;
1128         batch_it.Remove();
1129         found_list.Push(found_dad);
1130         found_d = true;
1131         break;
1132       }
1133     }
1134 
1135     // dad may have already been found, check the find list!
1136     if (found_d == false) {
1137       tListIterator<cAnalyzeGenotype> found_it(found_list);
1138       while ((found_dad = found_it.Next()) != NULL) {
1139         if (found_dad->GetID() == next_id2) {
1140           //cout << "Found dad in found list!" << endl;
1141           found_d = true;
1142           break;
1143         }
1144       }
1145     }
1146 
1147     // Next, look for the mother...
1148     batch_it.Reset();
1149     while ((found_mom = batch_it.Next()) != NULL) {
1150       if (found_mom->GetID() == next_id1) {
1151         //cout << "Found mom!" << endl;
1152         batch_it.Remove();
1153         found_list.Push(found_mom);
1154         // if finding lineages by parental length, may have to swap
1155         if (parent_method == "genome_size" && found_mom->GetLength() < found_dad->GetLength()) {
1156           //cout << "Swapping parents!" << endl;
1157           found_temp = found_mom;
1158           found_mom = found_dad;
1159           found_dad = found_temp;
1160         }
1161         next_id1 = found_mom->GetParentID();
1162         next_id2 = found_mom->GetParent2ID();
1163         found_m = true;
1164         break;
1165       }
1166     }
1167 
1168     // If the mother was not found, it may already have been placed in the
1169     // found list as a father...
1170     if (found_m == false) {
1171       tListIterator<cAnalyzeGenotype> found_it(found_list);
1172       while ((found_mom = found_it.Next()) != NULL) {
1173         if (found_mom->GetID() == next_id1) {
1174           //cout << "Found mom as dad!" << endl;
1175           // Don't move to found list, since its already there, but update
1176           // to the next ids.
1177           // if finding lineages by parental length, may have to swap
1178           if (parent_method == "genome_size" && found_mom->GetLength() < found_dad->GetLength()) {
1179             //cout << "Swapping parents!" << endl;
1180             found_temp = found_mom;
1181             found_mom = found_dad;
1182             found_dad = found_temp;
1183           }
1184           next_id1 = found_mom->GetParentID();
1185           next_id2 = found_mom->GetParent2ID();
1186           found_m = true;
1187           break;
1188         }
1189       }
1190     }
1191   }
1192 
1193   // We now have all of the genotypes in this lineage, delete everything
1194   // else.
1195 
1196   const int total_removed = batch[cur_batch].List().GetSize();
1197   while (batch[cur_batch].List().GetSize() > 0) {
1198     delete batch[cur_batch].List().Pop();
1199   }
1200 
1201   // And fill it back in with the good stuff.
1202   int total_kept = found_list.GetSize();
1203   while (found_list.GetSize() > 0) {
1204     batch[cur_batch].List().PushRear(found_list.Pop());
1205   }
1206 
1207   if (m_world->GetVerbosity() >= VERBOSE_ON) {
1208     cout << "  Sexual lineage has " << total_kept << " genotypes; "
1209     << total_removed << " were removed." << endl;
1210   }
1211 
1212   // Adjust the flags on this batch
1213   batch[cur_batch].SetLineage(false);
1214   batch[cur_batch].SetAligned(false);
1215 }
1216 
FindClade(cString cur_string)1217 void cAnalyze::FindClade(cString cur_string)
1218 {
1219   if (cur_string.GetSize() == 0) {
1220     cerr << "  Warning: No clade specified for FIND_CLADE.  Aborting." << endl;
1221     return;
1222   }
1223 
1224   cString clade_type( cur_string.PopWord() );
1225 
1226   if (m_world->GetVerbosity() >= VERBOSE_ON) {
1227     cout << "Reducing batch " << cur_batch
1228     << " to clade " << clade_type << "." << endl;
1229   } else cout << "Performing clade scan..." << endl;
1230 
1231 
1232   // Determine by clade_type which genotype we are tracking...
1233   cAnalyzeGenotype * found_gen = PopGenotype(clade_type, cur_batch);
1234 
1235   if (found_gen == NULL) {
1236     cerr << "  Warning: Ancestral genotype " << clade_type
1237     << " not found.  Clade scan aborted." << endl;
1238     return;
1239   }
1240 
1241   // Do this the brute force way... scan for one step at a time.
1242 
1243   // Construct a list of genotypes found...
1244 
1245   tListPlus<cAnalyzeGenotype> found_list; // Found and finished.
1246   tListPlus<cAnalyzeGenotype> scan_list;  // Found, but need to scan for children.
1247   scan_list.Push(found_gen);
1248 
1249   // Keep going as long as there is something in the scan list...
1250   while (scan_list.GetSize() > 0) {
1251     // Move the next genotype from the scan list to the found_list.
1252     found_gen = scan_list.Pop();
1253     int parent_id = found_gen->GetID();
1254     found_list.Push(found_gen);
1255 
1256     // Seach for all of the children of this genotype...
1257     tListIterator<cAnalyzeGenotype> batch_it(batch[cur_batch].List());
1258     while ((found_gen = batch_it.Next()) != NULL) {
1259       // If we found a child, place it into the scan list.
1260       if (found_gen->GetParentID() == parent_id) {
1261         batch_it.Remove();
1262         scan_list.Push(found_gen);
1263       }
1264     }
1265   }
1266 
1267   // We now have all of the genotypes in this clade, delete everything else.
1268 
1269   const int total_removed = batch[cur_batch].List().GetSize();
1270   while (batch[cur_batch].List().GetSize() > 0) {
1271     delete batch[cur_batch].List().Pop();
1272   }
1273 
1274   // And fill it back in with the good stuff.
1275   int total_kept = found_list.GetSize();
1276   while (found_list.GetSize() > 0) {
1277     batch[cur_batch].List().PushRear(found_list.Pop());
1278   }
1279 
1280   if (m_world->GetVerbosity() >= VERBOSE_ON) {
1281     cout << "  Clade has " << total_kept << " genotypes; "
1282     << total_removed << " were removed." << endl;
1283   }
1284 
1285   // Adjust the flags on this batch
1286   batch[cur_batch].SetLineage(false);
1287   batch[cur_batch].SetAligned(false);
1288 }
1289 
1290 // @JEB 9-25-2008
FindLastCommonAncestor(cString cur_string)1291 void cAnalyze::FindLastCommonAncestor(cString cur_string)
1292 {
1293 
1294   // Assumes that the current batch contains a population and all of its common ancestors
1295   // Finds the last common ancestor among all current organisms that are still alive,
1296   // i.e. have an update_died of -1.
1297 
1298   cout << "Finding last common ancestor of batch " << cur_batch << endl;
1299 
1300   if (m_world->GetVerbosity() >= VERBOSE_ON) {
1301     cout << "  Connecting genotypes to parents. " << endl;
1302   }
1303 
1304   // Connect each genotype to its parent.
1305   tListIterator<cAnalyzeGenotype> child_it(batch[cur_batch].List());
1306   cAnalyzeGenotype * on_child = NULL;
1307   while ((on_child = child_it.Next()) != NULL) {
1308     tListIterator<cAnalyzeGenotype> parent_it(batch[cur_batch].List());
1309     cAnalyzeGenotype * on_parent = NULL;
1310     while ((on_parent = parent_it.Next()) != NULL) {
1311       if (on_child->GetParentID() == on_parent->GetID()) {
1312         on_child->LinkParent(on_parent);
1313         break;
1314       }
1315     }
1316   }
1317 
1318   if (m_world->GetVerbosity() >= VERBOSE_ON) {
1319     cout << "  Finding earliest genotype. " << endl;
1320   }
1321 
1322   // Find the genotype without a parent (there should only be one)
1323   tListIterator<cAnalyzeGenotype> first_lca_it(batch[cur_batch].List());
1324   cAnalyzeGenotype * lca = NULL;
1325   cAnalyzeGenotype * test_lca = NULL;
1326   while ((test_lca = first_lca_it.Next()) != NULL) {
1327     if (!test_lca->GetParent()) {
1328       // It is an error to get two genotypes without a parent
1329       if (lca != NULL) {
1330         cout << "Error: More than one genotype does not have a parent. " << endl;
1331         cout << "Genotype 1: " << test_lca->GetID() << endl;
1332         cout << "Genotype 2: " << lca->GetID() << endl;
1333         return;
1334       }
1335       lca = test_lca;
1336     }
1337   }
1338 
1339   if (m_world->GetVerbosity() >= VERBOSE_ON) {
1340     cout << "  Following children to last common ancestor. " << endl;
1341   }
1342 
1343   // Follow the children from this parent until we find a genotype with
1344   // more than one child. This is the last common ancestor.
1345   while (lca->GetChildList().GetSize() == 1) {
1346     lca = lca->GetChildList().Pop();
1347   }
1348 
1349   // Delete everything else.
1350   tListIterator<cAnalyzeGenotype> delete_batch_it(batch[cur_batch].List());
1351   cAnalyzeGenotype * delete_genotype = NULL;
1352   while ((delete_genotype = delete_batch_it.Next()) != NULL) {
1353     if (delete_genotype->GetID() != lca->GetID()) {
1354       delete delete_genotype;
1355     }
1356   }
1357 
1358   // And fill it back in with the good stuff.
1359   batch[cur_batch].List().Clear();
1360   batch[cur_batch].List().PushRear(lca);
1361 }
1362 
1363 
SampleOrganisms(cString cur_string)1364 void cAnalyze::SampleOrganisms(cString cur_string)
1365 {
1366   double fraction = cur_string.PopWord().AsDouble();
1367   int init_genotypes = batch[cur_batch].List().GetSize();
1368 
1369   double test_viable = 0;
1370   if (cur_string.GetSize() > 0) {
1371     test_viable = cur_string.PopWord().AsDouble();
1372   }
1373 
1374   if (m_world->GetVerbosity() >= VERBOSE_ON) {
1375     cout << "Sampling " << fraction << " organisms from batch "
1376     << cur_batch << "." << endl;
1377   }
1378   else cout << "Sampling Organisms..." << endl;
1379 
1380   cAnalyzeGenotype * genotype = NULL;
1381   tListIterator<cAnalyzeGenotype> batch_it(batch[cur_batch].List());
1382 
1383   // Loop through all genotypes to perform a census
1384   int org_count = 0;
1385   while ((genotype = batch_it.Next()) != NULL) {
1386     // If we require viables, reduce all non-viables to zero organisms.
1387     if (test_viable == 1  &&  genotype->GetViable() == 0) {
1388       genotype->SetNumCPUs(0);
1389     }
1390 
1391     // Count the number of organisms in this genotype.
1392     org_count += genotype->GetNumCPUs();
1393   }
1394 
1395   // Create an array to store pointers to the genotypes and fill it in
1396   // while temporarily resetting all of the organism counts to zero.
1397   tArray<cAnalyzeGenotype *> org_array(org_count);
1398   int cur_org = 0;
1399   batch_it.Reset();
1400   while ((genotype = batch_it.Next()) != NULL) {
1401     for (int i = 0; i < genotype->GetNumCPUs(); i++) {
1402       org_array[cur_org] = genotype;
1403       cur_org++;
1404     }
1405     genotype->SetNumCPUs(0);
1406   }
1407 
1408   assert(cur_org == org_count);
1409 
1410   // Determine how many organisms we want to keep.
1411   int new_org_count = (int) fraction;
1412   if (fraction < 1.0) new_org_count = (int) (fraction * (double) org_count);
1413   if (new_org_count > org_count) {
1414     cerr << "Warning: Trying to sample " << new_org_count
1415     << "organisms from a population of " << org_count
1416     << endl;
1417     new_org_count = org_count;
1418   }
1419 
1420   // Now pick those that we are keeping.
1421   tArray<int> keep_ids(new_org_count);
1422   random.Choose(org_count, keep_ids);
1423 
1424   // And increment the org counts for the associated genotypes.
1425   for (int i = 0; i < new_org_count; i++) {
1426     genotype = org_array[ keep_ids[i] ];
1427     genotype->SetNumCPUs(genotype->GetNumCPUs() + 1);
1428   }
1429 
1430 
1431   // Delete all genotypes with no remaining organisms...
1432   batch_it.Reset();
1433   while ((genotype = batch_it.Next()) != NULL) {
1434     if (genotype->GetNumCPUs() == 0) {
1435       batch_it.Remove();
1436       delete genotype;
1437     }
1438   }
1439 
1440   int num_genotypes = batch[cur_batch].List().GetSize();
1441   if (m_world->GetVerbosity() >= VERBOSE_ON) {
1442     cout << "  Removed " << org_count - new_org_count
1443     << " organisms (" << init_genotypes - num_genotypes
1444     << " genotypes); " << new_org_count
1445     << " orgs (" << num_genotypes << " gens) remaining."
1446     << endl;
1447   }
1448 
1449   // Adjust the flags on this batch
1450   batch[cur_batch].SetLineage(false);
1451   batch[cur_batch].SetAligned(false);
1452 }
1453 
1454 
SampleGenotypes(cString cur_string)1455 void cAnalyze::SampleGenotypes(cString cur_string)
1456 {
1457   double fraction = cur_string.PopWord().AsDouble();
1458   int init_genotypes = batch[cur_batch].List().GetSize();
1459 
1460   double test_viable = 0;
1461   if (cur_string.GetSize() > 0) {
1462     test_viable = cur_string.PopWord().AsDouble();
1463   }
1464 
1465   if (m_world->GetVerbosity() >= VERBOSE_ON) {
1466     cout << "Sampling " << fraction << " genotypes from batch "
1467     << cur_batch << "." << endl;
1468   }
1469   else cout << "Sampling Genotypes..." << endl;
1470 
1471   double frac_remove = 1.0 - fraction;
1472 
1473   cAnalyzeGenotype * genotype = NULL;
1474 
1475   tListIterator<cAnalyzeGenotype> batch_it(batch[cur_batch].List());
1476   while ((genotype = batch_it.Next()) != NULL) {
1477     if (random.P(frac_remove) || ((genotype->GetViable())==0 && test_viable==1) ) {
1478       batch_it.Remove();
1479       delete genotype;
1480     }
1481   }
1482 
1483   int num_genotypes = batch[cur_batch].List().GetSize();
1484   if (m_world->GetVerbosity() >= VERBOSE_ON) {
1485     cout << "  Removed " << init_genotypes - num_genotypes
1486     << " genotypes; " << num_genotypes << " remaining."
1487     << endl;
1488   }
1489 
1490   // Adjust the flags on this batch
1491   batch[cur_batch].SetLineage(false);
1492   batch[cur_batch].SetAligned(false);
1493 }
1494 
KeepTopGenotypes(cString cur_string)1495 void cAnalyze::KeepTopGenotypes(cString cur_string)
1496 {
1497   const int num_kept = cur_string.PopWord().AsInt();
1498   const int num_genotypes = batch[cur_batch].List().GetSize();
1499   const int num_removed = num_genotypes - num_kept;
1500 
1501   for (int i = 0; i < num_removed; i++) {
1502     delete batch[cur_batch].List().PopRear();
1503   }
1504 
1505   // Adjust the flags on this batch
1506   // batch[cur_batch].SetLineage(false); // Should not destroy a lineage...
1507   batch[cur_batch].SetAligned(false);
1508 }
1509 
TruncateLineage(cString cur_string)1510 void cAnalyze::TruncateLineage(cString cur_string)
1511 {
1512   cString type("task");
1513   int arg_i = -1;
1514   if (cur_string.GetSize()) type = cur_string.PopWord();
1515   if (type == "task") {
1516     if (cur_string.GetSize()) arg_i = cur_string.PopWord().AsInt();
1517     const int env_size = m_world->GetEnvironment().GetNumTasks();
1518     if (arg_i < 0 || arg_i >= env_size) arg_i = env_size - 1;
1519   }
1520   cString lin_type("num_cpus");
1521   if (cur_string.GetSize()) lin_type = cur_string.PopWord();
1522   FindLineage(lin_type);
1523   BatchRecalculate("");
1524 
1525   if (type == "task") {
1526     if (m_world->GetVerbosity() >= VERBOSE_ON)
1527       cout << "Truncating batch " << cur_batch << " based on task " << arg_i << " emergence..." << endl;
1528     else
1529       cout << "Truncating lineage..." << endl;
1530 
1531     bool found = false;
1532     tListIterator<cAnalyzeGenotype> batch_it(batch[cur_batch].List());
1533     cAnalyzeGenotype* genotype = NULL;
1534 
1535     while ((genotype = batch_it.Next())) {
1536       if (found) {
1537         batch_it.Remove();
1538         delete genotype;
1539         continue;
1540       }
1541       if (genotype->GetTaskCount(arg_i)) found = true;
1542     }
1543   }
1544 }
1545 
1546 // JEB: Creates specified number of offspring by running
1547 // each organism in the test CPU with mutations on.
SampleOffspring(cString cur_string)1548 void cAnalyze::SampleOffspring(cString cur_string)
1549 {
1550   int number_to_sample = (cur_string.GetSize()) ? cur_string.PopWord().AsInt() : 1000;
1551 
1552   // These parameters copied from BatchRecalculate, they could change what kinds of offspring are produced!!
1553   tArray<int> manual_inputs;  // Used only if manual inputs are specified
1554   cString msg;                // Holds any information we may want to send the driver to display
1555   int use_resources      = (cur_string.GetSize()) ? cur_string.PopWord().AsInt() : 0;
1556   int update             = (cur_string.GetSize()) ? cur_string.PopWord().AsInt() : -1;
1557   bool use_random_inputs = (cur_string.GetSize()) ? cur_string.PopWord().AsInt() == 1: false;
1558   bool use_manual_inputs = false;
1559 
1560   //Manual inputs will override random input request and must be the last arguments.
1561   if (cur_string.CountNumWords() > 0){
1562     if (cur_string.CountNumWords() == m_world->GetEnvironment().GetInputSize()){
1563       manual_inputs.Resize(m_world->GetEnvironment().GetInputSize());
1564       use_random_inputs = false;
1565       use_manual_inputs = true;
1566       for (int k = 0; cur_string.GetSize(); k++)
1567         manual_inputs[k] = cur_string.PopWord().AsInt();
1568     } else if (m_world->GetVerbosity() >= VERBOSE_ON){
1569       msg.Set("Invalid number of environment inputs requested for recalculation: %d specified, %d required.",
1570               cur_string.CountNumWords(), m_world->GetEnvironment().GetInputSize());
1571       m_world->GetDriver().NotifyWarning(msg);
1572     }
1573   }
1574 
1575   cCPUTestInfo test_info(1); //we only allow one generation of testing! v. important to get proper offspring
1576   if (use_manual_inputs)
1577     test_info.UseManualInputs(manual_inputs);
1578   else
1579     test_info.UseRandomInputs(use_random_inputs);
1580   test_info.SetResourceOptions(use_resources, m_resources, update, m_resource_time_spent_offset);
1581 
1582   if (m_world->GetVerbosity() >= VERBOSE_ON) {
1583     msg.Set("Sampling %d offspring from each of the %d organisms in batch %d...", number_to_sample, batch[cur_batch].GetSize(), cur_batch);
1584     m_world->GetDriver().NotifyComment(msg);
1585   } else{
1586     msg.Set("Sampling offspring...");
1587     m_world->GetDriver().NotifyComment(msg);
1588   }
1589 
1590   // Load the mutation rates from the environment.
1591   test_info.MutationRates().Copy(m_world->GetEnvironment().GetMutRates());
1592   // Copy them into the organism
1593   tListPlus<cAnalyzeGenotype> offspring_list;
1594   tListIterator<cAnalyzeGenotype> batch_it(batch[cur_batch].List());
1595   cAnalyzeGenotype* parent_genotype = NULL;
1596 
1597   cTestCPU * test_cpu = m_world->GetHardwareManager().CreateTestCPU(m_ctx);
1598   while ((parent_genotype = batch_it.Next())) {
1599 
1600     // We keep a hash with genome strings as keys
1601     // to save duplication of the same offspring genotype.
1602     // NumCPUs is incremented whenever an offspring is
1603     // created more than once from the same parent.
1604     tDictionary<cAnalyzeGenotype*> genome_hash;
1605 
1606     for (int i=0; i<number_to_sample; i++) {
1607       test_cpu->TestGenome(m_world->GetDefaultContext(), test_info, parent_genotype->GetGenome());
1608       cAnalyzeGenotype * offspring_genotype = NULL;
1609       bool found = genome_hash.Find(test_info.GetTestOrganism(0)->OffspringGenome().GetSequence().AsString(), offspring_genotype);
1610       if (found) {
1611         offspring_genotype->SetNumCPUs(offspring_genotype->GetNumCPUs() + 1);
1612       }
1613       else {
1614         cAnalyzeGenotype* offspring_genotype = new cAnalyzeGenotype(m_world, test_info.GetTestOrganism(0)->OffspringGenome());
1615         offspring_genotype->SetID(parent_genotype->GetID());
1616         offspring_genotype->SetNumCPUs(1);
1617         offspring_list.Push(offspring_genotype);
1618         genome_hash.Set(test_info.GetTestOrganism(0)->OffspringGenome().GetSequence().AsString(), offspring_genotype);
1619       }
1620     }
1621     batch_it.Remove();
1622     delete parent_genotype;
1623   }
1624   delete test_cpu;
1625 
1626   // Fill back in the current batch with the new offspring
1627   while (offspring_list.GetSize() > 0) {
1628     batch[cur_batch].List().PushRear(offspring_list.Pop());
1629   }
1630 
1631 }
1632 
1633 
1634 //////////////// Output Commands...
1635 
CommandPrint(cString cur_string)1636 void cAnalyze::CommandPrint(cString cur_string)
1637 {
1638   if (m_world->GetVerbosity() >= VERBOSE_ON) cout << "Printing batch " << cur_batch << endl;
1639   else cout << "Printing organisms..." << endl;
1640 
1641   cString directory = PopDirectory(cur_string, "archive/");
1642   // Weirdly, PopDirectory() doesn't actually pop, so...
1643   cur_string.PopWord();  // There, that actually removes the directory string
1644 
1645   tListIterator<cAnalyzeGenotype> batch_it(batch[cur_batch].List());
1646   cAnalyzeGenotype* genotype = NULL;
1647   cTestCPU* testcpu = m_world->GetHardwareManager().CreateTestCPU(m_ctx);
1648   while ((genotype = batch_it.Next()) != NULL) {
1649     cString filename(directory);
1650 
1651     if (cur_string.GetSize() > 0) {
1652       filename += cur_string.PopWord();
1653     }
1654     else {
1655       filename += genotype->GetName();
1656       filename += ".gen";
1657     }
1658 
1659     testcpu->PrintGenome(m_ctx, genotype->GetGenome(), filename);
1660     if (m_world->GetVerbosity() >= VERBOSE_ON) cout << "Printing: " << filename << endl;
1661   }
1662   delete testcpu;
1663 }
1664 
CommandTrace(cString cur_string)1665 void cAnalyze::CommandTrace(cString cur_string)
1666 {
1667   cString msg;
1668   tArray<int> manual_inputs;
1669   int sg = 0;
1670 
1671   // Process our arguments; manual inputs must be the last arguments
1672 
1673   cString directory      = PopDirectory(cur_string.PopWord(), cString("archive/"));           // #1
1674   cString first_arg = cur_string.PopWord();
1675 
1676   if (first_arg.IsSubstring("sg=", 0)) {
1677     first_arg.Pop('=');
1678     sg = first_arg.AsInt();
1679     if (sg < 0 || sg >= m_world->GetEnvironment().GetNumStateGrids()) {
1680       msg.Set("invalid state grid selection");
1681       m_world->GetDriver().NotifyWarning(msg);
1682       return;
1683     }
1684     first_arg = cur_string.PopWord();
1685   }
1686 
1687   int use_resources      = (first_arg.GetSize()) ? first_arg.AsInt() : 0;                     // #2
1688   int update             = (cur_string.GetSize()) ? cur_string.PopWord().AsInt() : -1;        // #3
1689   bool use_random_inputs = (cur_string.GetSize()) ? cur_string.PopWord().AsInt() == 1: false; // #4
1690   bool use_manual_inputs = false;                                                             // #5+
1691 
1692   //Manual inputs will override random input request
1693   if (cur_string.CountNumWords() > 0){
1694     if (cur_string.CountNumWords() == m_world->GetEnvironment().GetInputSize()){
1695       manual_inputs.Resize(m_world->GetEnvironment().GetInputSize());
1696       use_random_inputs = false;
1697       use_manual_inputs = true;
1698       for (int k = 0; cur_string.GetSize(); k++)
1699         manual_inputs[k] = cur_string.PopWord().AsInt();
1700     } else if (m_world->GetVerbosity() >= VERBOSE_ON){
1701       msg.Set("Invalid number of environment inputs requested for recalculation: %d specified, %d required.",
1702               cur_string.CountNumWords(), m_world->GetEnvironment().GetInputSize());
1703       m_world->GetDriver().NotifyWarning(msg);
1704     }
1705   }
1706 
1707 
1708   if (m_world->GetVerbosity() >= VERBOSE_ON)
1709     msg.Set("Tracing batch %d", cur_batch);
1710   else
1711     msg.Set("Tracing organisms.");
1712   m_world->GetDriver().NotifyComment(msg);
1713 
1714   cTestCPU* testcpu = m_world->GetHardwareManager().CreateTestCPU(m_ctx);
1715 
1716   tListIterator<cAnalyzeGenotype> batch_it(batch[cur_batch].List());
1717   cAnalyzeGenotype * genotype = NULL;
1718   while ((genotype = batch_it.Next()) != NULL) {
1719     cString filename = directory + genotype->GetName() + cString(".trace");
1720 
1721     if (genotype->GetGenome().GetSize() == 0)
1722       break;
1723 
1724     // Build the hardware status printer for tracing.
1725     ofstream& trace_fp = m_world->GetDataFileOFStream(filename);
1726     cHardwareStatusPrinter trace_printer(trace_fp);
1727 
1728     // Build the test info for printing.
1729     cCPUTestInfo test_info;
1730     test_info.SetTraceExecution(&trace_printer);
1731     if (use_manual_inputs)
1732       test_info.UseManualInputs(manual_inputs);
1733     else
1734       test_info.UseRandomInputs(use_random_inputs);
1735     test_info.SetResourceOptions(use_resources, m_resources, update, m_resource_time_spent_offset);
1736     test_info.SetCurrentStateGridID(sg);
1737 
1738     if (m_world->GetVerbosity() >= VERBOSE_ON){
1739       msg = cString("Tracing ") + filename;
1740       m_world->GetDriver().NotifyComment(msg);
1741     }
1742 
1743     testcpu->TestGenome(m_ctx, test_info, genotype->GetGenome());
1744 
1745     m_world->GetDataFileManager().Remove(filename);
1746   }
1747 
1748   delete testcpu;
1749 }
1750 
1751 
CommandPrintTasks(cString cur_string)1752 void cAnalyze::CommandPrintTasks(cString cur_string)
1753 {
1754   if (m_world->GetVerbosity() >= VERBOSE_ON) cout << "Printing tasks in batch " << cur_batch << endl;
1755   else cout << "Printing tasks..." << endl;
1756 
1757   // Load in the variables...
1758   cString filename("tasks.dat");
1759   if (cur_string.GetSize() != 0) filename = cur_string.PopWord();
1760 
1761   ofstream& fp = m_world->GetDataFileOFStream(filename);
1762 
1763   // Loop through all of the genotypes in this batch...
1764   tListIterator<cAnalyzeGenotype> batch_it(batch[cur_batch].List());
1765   cAnalyzeGenotype * genotype = NULL;
1766   while ((genotype = batch_it.Next()) != NULL) {
1767     fp << genotype->GetID() << " ";
1768     genotype->PrintTasks(fp);
1769     fp << endl;
1770   }
1771 }
1772 
CommandPrintTasksQuality(cString cur_string)1773 void cAnalyze::CommandPrintTasksQuality(cString cur_string)
1774 {
1775   if (m_world->GetVerbosity() >= VERBOSE_ON) cout << "Printing task qualities in batch " << cur_batch << endl;
1776   else cout << "Printing task qualities..." << endl;
1777 
1778   // Load in the variables...
1779   cString filename("tasksquality.dat");
1780   if (cur_string.GetSize() != 0) filename = cur_string.PopWord();
1781 
1782   ofstream& fp = m_world->GetDataFileOFStream(filename);
1783 
1784   // Loop through all of the genotypes in this batch...
1785   tListIterator<cAnalyzeGenotype> batch_it(batch[cur_batch].List());
1786   cAnalyzeGenotype * genotype = NULL;
1787   while ((genotype = batch_it.Next()) != NULL) {
1788     fp << genotype->GetID() << " ";
1789     genotype->PrintTasksQuality(fp);
1790     fp << endl;
1791   }
1792 }
1793 
CommandDetail(cString cur_string)1794 void cAnalyze::CommandDetail(cString cur_string)
1795 {
1796   if (m_world->GetVerbosity() >= VERBOSE_ON) cout << "Detailing batch " << cur_batch << endl;
1797   else cout << "Detailing..." << endl;
1798 
1799   // @JEB return if there are no organisms in the current batch
1800   if (batch[cur_batch].GetSize() == 0) return;
1801 
1802   // Load in the variables...
1803   cString filename("detail.dat");
1804   if (cur_string.GetSize() != 0) filename = cur_string.PopWord();
1805 
1806   // Construct a linked list of details needed...
1807   tList< tDataEntryCommand<cAnalyzeGenotype> > output_list;
1808   tListIterator< tDataEntryCommand<cAnalyzeGenotype> > output_it(output_list);
1809   cAnalyzeGenotype::GetDataCommandManager().LoadCommandList(cur_string, output_list);
1810 
1811   // Determine the file type...
1812   int file_type = FILE_TYPE_TEXT;
1813   cString file_extension(filename);
1814   while (file_extension.Find('.') != -1) file_extension.Pop('.');
1815   if (file_extension == "html") file_type = FILE_TYPE_HTML;
1816 
1817   // Setup the file...
1818   if (filename == "cout") {
1819     CommandDetail_Header(cout, file_type, output_it);
1820     CommandDetail_Body(cout, file_type, output_it);
1821   } else {
1822     ofstream& fp = m_world->GetDataFileOFStream(filename);
1823     CommandDetail_Header(fp, file_type, output_it);
1824     CommandDetail_Body(fp, file_type, output_it);
1825 		m_world->GetDataFileManager().Remove(filename);
1826 	}
1827 
1828   // And clean up...
1829   while (output_list.GetSize() != 0) delete output_list.Pop();
1830 }
1831 
1832 
CommandDetailTimeline(cString cur_string)1833 void cAnalyze::CommandDetailTimeline(cString cur_string)
1834 {
1835   if (m_world->GetVerbosity() >= VERBOSE_ON) cout << "Detailing batch "
1836     << cur_batch << " based on time" << endl;
1837   else cout << "Detailing..." << endl;
1838 
1839   // Load in the variables...
1840   cString filename("detail_timeline.dat");
1841   int time_step = 100;
1842   int max_time = 100000;
1843   if (cur_string.GetSize() != 0) filename = cur_string.PopWord();
1844   if (cur_string.GetSize() != 0) time_step = cur_string.PopWord().AsInt();
1845   if (cur_string.GetSize() != 0) max_time = cur_string.PopWord().AsInt();
1846 
1847   if (m_world->GetVerbosity() >= VERBOSE_ON) {
1848     cout << "  Time step = " << time_step << endl
1849     << "  Max time = " << max_time << endl;
1850   }
1851 
1852   // Construct a linked list of details needed...
1853   tList< tDataEntryCommand<cAnalyzeGenotype> > output_list;
1854   tListIterator< tDataEntryCommand<cAnalyzeGenotype> > output_it(output_list);
1855   cAnalyzeGenotype::GetDataCommandManager().LoadCommandList(cur_string, output_list);
1856 
1857   // Determine the file type...
1858   int file_type = FILE_TYPE_TEXT;
1859   cString file_extension(filename);
1860   while (file_extension.Find('.') != -1) file_extension.Pop('.');
1861   if (file_extension == "html") file_type = FILE_TYPE_HTML;
1862 
1863   // Setup the file...
1864   if (filename == "cout") {
1865     CommandDetail_Header(cout, file_type, output_it, time_step);
1866     CommandDetail_Body(cout, file_type, output_it, time_step, max_time);
1867   } else {
1868     ofstream& fp = m_world->GetDataFileOFStream(filename);
1869     CommandDetail_Header(fp, file_type, output_it, time_step);
1870     CommandDetail_Body(fp, file_type, output_it, time_step, max_time);
1871   }
1872 
1873   // And clean up...
1874   while (output_list.GetSize() != 0) delete output_list.Pop();
1875 }
1876 
1877 
CommandDetail_Header(ostream & fp,int format_type,tListIterator<tDataEntryCommand<cAnalyzeGenotype>> & output_it,int time_step)1878 void cAnalyze::CommandDetail_Header(ostream& fp, int format_type,
1879                                     tListIterator< tDataEntryCommand<cAnalyzeGenotype> >& output_it,
1880                                     int time_step)
1881 {
1882   cAnalyzeGenotype* cur_genotype = batch[cur_batch].List().GetFirst();
1883 
1884   // Write out the header on the file
1885   if (format_type == FILE_TYPE_TEXT) {
1886     fp << "#filetype genotype_data" << endl;
1887     fp << "#format ";
1888     if (time_step > 0) fp << "update ";
1889     while (output_it.Next() != NULL) {
1890       const cString& entry_name = output_it.Get()->GetName();
1891       fp << entry_name << " ";
1892     }
1893     fp << endl << endl;
1894 
1895     // Give the more human-readable legend.
1896     fp << "# Legend:" << endl;
1897     int count = 0;
1898     if (time_step > 0) fp << "# " << ++count << ": Update" << endl;
1899     while (output_it.Next() != NULL) {
1900       const cString& entry_desc = output_it.Get()->GetDesc(cur_genotype);
1901       fp << "# " << ++count << ": " << entry_desc << endl;
1902     }
1903     fp << endl;
1904   } else { // if (format_type == FILE_TYPE_HTML) {
1905     fp << "<html>" << endl
1906     << "<body bgcolor=\"#FFFFFF\"" << endl
1907     << " text=\"#000000\"" << endl
1908     << " link=\"#0000AA\"" << endl
1909     << " alink=\"#0000FF\"" << endl
1910     << " vlink=\"#000044\">" << endl
1911     << endl
1912     << "<h1 align=center>Run " << batch[cur_batch].Name() << endl
1913     << endl
1914     << "<center>" << endl
1915     << "<table border=1 cellpadding=2><tr>" << endl;
1916 
1917     if (time_step > 0) fp << "<th bgcolor=\"#AAAAFF\">Update ";
1918     while (output_it.Next() != NULL) {
1919       const cString& entry_desc = output_it.Get()->GetDesc(cur_genotype);
1920       fp << "<th bgcolor=\"#AAAAFF\">" << entry_desc << " ";
1921     }
1922     fp << "</tr>" << endl;
1923 
1924   }
1925 
1926   }
1927 
1928 
CommandDetail_Body(ostream & fp,int format_type,tListIterator<tDataEntryCommand<cAnalyzeGenotype>> & output_it,int time_step,int max_time)1929 void cAnalyze::CommandDetail_Body(ostream& fp, int format_type,
1930                                   tListIterator< tDataEntryCommand<cAnalyzeGenotype> > & output_it,
1931                                   int time_step, int max_time)
1932 {
1933   // Loop through all of the genotypes in this batch...
1934   tListIterator<cAnalyzeGenotype> batch_it(batch[cur_batch].List());
1935   cAnalyzeGenotype * cur_genotype = batch_it.Next();
1936   cAnalyzeGenotype * next_genotype = batch_it.Next();
1937   cAnalyzeGenotype * prev_genotype = NULL;
1938 
1939   int cur_time = 0;
1940   while (cur_genotype != NULL && cur_time <= max_time) {
1941     if (m_world->GetVerbosity() >= VERBOSE_DETAILS) {
1942       cout << "Detailing genotype " << cur_genotype->GetID()
1943       << " at depth " << cur_genotype->GetDepth()
1944       << endl;
1945     }
1946     output_it.Reset();
1947     if (format_type == FILE_TYPE_HTML) {
1948       fp << "<tr>";
1949       if (time_step > 0) fp << "<td>" << cur_time << " ";
1950     }
1951     else if (time_step > 0) {  // TEXT file, printing times...
1952       fp << cur_time << " ";
1953     }
1954 
1955     tDataEntryCommand<cAnalyzeGenotype> * data_command = NULL;
1956     while ((data_command = output_it.Next()) != NULL) {
1957       cFlexVar cur_value = data_command->GetValue(cur_genotype);
1958       if (format_type == FILE_TYPE_HTML) {
1959         int compare = 0;
1960         if (prev_genotype) {
1961           cFlexVar prev_value = data_command->GetValue(prev_genotype);
1962           int compare_type = data_command->GetCompareType();
1963           compare = CompareFlexStat(cur_value, prev_value, compare_type);
1964         }
1965         HTMLPrintStat(cur_value, fp, compare, data_command->GetHtmlCellFlags(), data_command->GetNull());
1966       }
1967       else {  // if (format_type == FILE_TYPE_TEXT) {
1968         fp << data_command->GetValue(cur_genotype) << " ";
1969       }
1970       }
1971     if (format_type == FILE_TYPE_HTML) fp << "</tr>";
1972     fp << endl;
1973 
1974     cur_time += time_step;
1975     if (time_step > 0) {
1976       while (next_genotype && next_genotype->GetUpdateBorn() < cur_time) {
1977         prev_genotype = cur_genotype;
1978         cur_genotype = next_genotype;
1979         next_genotype = batch_it.Next();
1980       }
1981     }
1982     else {
1983       // Always moveon if we're not basing this on time, or if we've run out of genotypes.
1984       prev_genotype = cur_genotype;
1985       cur_genotype = next_genotype;
1986       next_genotype = batch_it.Next();
1987     }
1988 
1989     }
1990 
1991   // If in HTML mode, we need to end the file...
1992   if (format_type == FILE_TYPE_HTML) {
1993     fp << "</table>" << endl
1994     << "</center>" << endl;
1995   }
1996   }
1997 
CommandDetailAverage_Body(ostream & fp,int nucoutputs,tListIterator<tDataEntryCommand<cAnalyzeGenotype>> & output_it)1998 void cAnalyze::CommandDetailAverage_Body(ostream& fp, int nucoutputs,
1999                                          tListIterator< tDataEntryCommand<cAnalyzeGenotype> > & output_it)
2000 {
2001   // Loop through all of the genotypes in this batch...
2002   tListIterator<cAnalyzeGenotype> batch_it(batch[cur_batch].List());
2003   cAnalyzeGenotype * cur_genotype = batch_it.Next();
2004   cAnalyzeGenotype * next_genotype = batch_it.Next();
2005   cAnalyzeGenotype * prev_genotype = NULL;
2006 
2007   tArray<cDoubleSum> output_counts(nucoutputs);
2008   for (int i = 0; i < nucoutputs; i++) { output_counts[i].Clear();}
2009   int count;
2010   while (cur_genotype != NULL) {
2011     count = 0;
2012     output_it.Reset();
2013     tDataEntryCommand<cAnalyzeGenotype> * data_command = NULL;
2014     while ((data_command = output_it.Next()) != NULL) {
2015       for (int j = 0; j < cur_genotype->GetNumCPUs(); j++) {
2016         output_counts[count].Add( data_command->GetValue(cur_genotype).AsDouble() );
2017       }
2018       count++;
2019     }
2020 
2021     prev_genotype = cur_genotype;
2022     cur_genotype = next_genotype;
2023     next_genotype = batch_it.Next();
2024   }
2025   fp << batch[cur_batch].Name() << " ";
2026   for (int i = 0; i < nucoutputs; i++) {
2027     fp << output_counts[i].Average() << " ";
2028   }
2029   fp << endl;
2030 }
2031 
CommandDetailAverage(cString cur_string)2032 void cAnalyze::CommandDetailAverage(cString cur_string)
2033 {
2034   if (m_world->GetVerbosity() >= VERBOSE_ON) cout << "Average detailing batch " << cur_batch << endl;
2035   else cout << "Detailing..." << endl;
2036 
2037   // Load in the variables...
2038   cString filename("detail.dat");
2039   if (cur_string.GetSize() != 0) filename = cur_string.PopWord();
2040 
2041   // Construct a linked list of details needed...
2042   tList< tDataEntryCommand<cAnalyzeGenotype> > output_list;
2043   tListIterator< tDataEntryCommand<cAnalyzeGenotype> > output_it(output_list);
2044   cAnalyzeGenotype::GetDataCommandManager().LoadCommandList(cur_string, output_list);
2045 
2046   // check if file is already in use.
2047   bool file_active = m_world->GetDataFileManager().IsOpen(filename);
2048 
2049   ofstream& fp = m_world->GetDataFileOFStream(filename);
2050 
2051   // if it's a new file print out the header
2052   if (file_active == false) {
2053     CommandDetail_Header(fp, FILE_TYPE_TEXT, output_it);
2054   }
2055   CommandDetailAverage_Body(fp, cur_string.CountNumWords(), output_it);
2056 
2057   while (output_list.GetSize() != 0) delete output_list.Pop();
2058 
2059 }
2060 
CommandDetailBatches(cString cur_string)2061 void cAnalyze::CommandDetailBatches(cString cur_string)
2062 {
2063   // Load in the variables...
2064   cString keyword("num_cpus");
2065   cString filename("detail_batch.dat");
2066   if (cur_string.GetSize() != 0) keyword = cur_string.PopWord();
2067   if (cur_string.GetSize() != 0) filename = cur_string.PopWord();
2068 
2069   if (m_world->GetVerbosity() >= VERBOSE_ON) cout << "Detailing batches for " << keyword << endl;
2070   else cout << "Detailing Batches..." << endl;
2071 
2072   // Find its associated command...
2073   tDataEntryCommand<cAnalyzeGenotype>* cur_command = cAnalyzeGenotype::GetDataCommandManager().GetDataCommand(keyword);
2074   if (!cur_command) {
2075     cout << "error: no data entry, unable to detail batches" << endl;
2076     return;
2077   }
2078 
2079 
2080   // Determine the file type...
2081   int file_type = FILE_TYPE_TEXT;
2082   cString file_extension(filename);
2083   while (file_extension.Find('.') != -1) file_extension.Pop('.');
2084   if (file_extension == "html") file_type = FILE_TYPE_HTML;
2085 
2086   ofstream& fp = m_world->GetDataFileOFStream(filename);
2087   cAnalyzeGenotype* first_genotype = batch[cur_batch].List().GetFirst();
2088 
2089   // Write out the header on the file
2090   if (file_type == FILE_TYPE_TEXT) {
2091     fp << "#filetype batch_data" << endl
2092     << "#format batch_id " << keyword << endl
2093     << endl;
2094 
2095     // Give the more human-readable legend.
2096     fp << "# Legend:" << endl
2097       << "#  Column 1 = Batch ID" << endl
2098       << "#  Remaining entries: " << cur_command->GetDesc(first_genotype) << endl
2099       << endl;
2100 
2101   } else { // if (file_type == FILE_TYPE_HTML) {
2102     fp << "<html>" << endl
2103     << "<body bgcolor=\"#FFFFFF\"" << endl
2104     << " text=\"#000000\"" << endl
2105     << " link=\"#0000AA\"" << endl
2106     << " alink=\"#0000FF\"" << endl
2107     << " vlink=\"#000044\">" << endl
2108     << endl
2109     << "<h1 align=center> Distribution of " << cur_command->GetDesc(first_genotype)
2110     << endl << endl
2111     << "<center>" << endl
2112     << "<table border=1 cellpadding=2>" << endl
2113     << "<tr><th bgcolor=\"#AAAAFF\">" << cur_command->GetDesc(first_genotype) << "</tr>"
2114     << endl;
2115   }
2116 
2117 
2118   // Loop through all of the batches...
2119   for (int i = 0; i < GetNumBatches(); i++) {
2120     if (batch[i].List().GetSize() == 0) continue;
2121 
2122     if (file_type == FILE_TYPE_HTML) fp << "<tr><td>";
2123     fp << i << " ";
2124 
2125     tListIterator<cAnalyzeGenotype> batch_it(batch[i].List());
2126     cAnalyzeGenotype * genotype = NULL;
2127     while ((genotype = batch_it.Next()) != NULL) {
2128       if (file_type == FILE_TYPE_HTML) fp << "<td>";
2129 
2130       if (file_type == FILE_TYPE_HTML) {
2131         HTMLPrintStat(cur_command->GetValue(genotype), fp, 0, cur_command->GetHtmlCellFlags(), cur_command->GetNull());
2132       }
2133       else {  // if (file_type == FILE_TYPE_TEXT) {
2134         fp << cur_command->GetValue(genotype) << " ";
2135       }
2136       }
2137     if (file_type == FILE_TYPE_HTML) fp << "</tr>";
2138     fp << endl;
2139     }
2140 
2141   // If in HTML mode, we need to end the file...
2142   if (file_type == FILE_TYPE_HTML) {
2143     fp << "</table>" << endl
2144     << "</center>" << endl;
2145   }
2146 
2147   delete cur_command;
2148 }
2149 
2150 
2151 
CommandDetailIndex(cString cur_string)2152 void cAnalyze::CommandDetailIndex(cString cur_string)
2153 {
2154   cout << "Creating a Detail Index..." << endl;
2155 
2156   // A filename and min and max batches must be included.
2157   if (cur_string.CountNumWords() < 3) {
2158     cerr << "Error: must include filename, and min and max batch numbers." << endl;
2159     if (exit_on_error) exit(1);
2160   }
2161 
2162   // Load in the variables...
2163   cString filename(cur_string.PopWord());
2164   int min_batch = cur_string.PopWord().AsInt();
2165   int max_batch = cur_string.PopWord().AsInt();
2166 
2167   if (max_batch < min_batch) {
2168     cerr << "Error: min_batch=" << min_batch
2169     << ", max_batch=" << max_batch << "  (incorrect order?)" << endl;
2170     if (exit_on_error) exit(1);
2171   }
2172 
2173   // Construct a linked list of details needed...
2174   tList<tDataEntryCommand<cAnalyzeGenotype> > output_list;
2175   tListIterator<tDataEntryCommand<cAnalyzeGenotype> > output_it(output_list);
2176   cAnalyzeGenotype::GetDataCommandManager().LoadCommandList(cStringList(cur_string), output_list);
2177 
2178 
2179   // Setup the file...
2180   ofstream& fp = m_world->GetDataFileOFStream(filename);
2181   cAnalyzeGenotype* first_genotype = batch[cur_batch].List().GetFirst();
2182 
2183   // Determine the file type...
2184   int file_type = FILE_TYPE_TEXT;
2185   while (filename.Find('.') != -1) filename.Pop('.'); // Grab only extension
2186   if (filename == "html") file_type = FILE_TYPE_HTML;
2187 
2188   // Write out the header on the file
2189   if (file_type == FILE_TYPE_TEXT) {
2190     fp << "#filetype genotype_data" << endl;
2191     fp << "#format ";
2192     while (output_it.Next() != NULL) {
2193       const cString & entry_name = output_it.Get()->GetName();
2194       fp << entry_name << " ";
2195     }
2196     fp << endl << endl;
2197 
2198     // Give the more human-readable legend.
2199     fp << "# Legend:" << endl;
2200     fp << "# 1: Batch Name" << endl;
2201     int count = 1;
2202     while (output_it.Next() != NULL) {
2203       const cString& entry_desc = output_it.Get()->GetDesc(first_genotype);
2204       fp << "# " << ++count << ": " << entry_desc << endl;
2205     }
2206     fp << endl;
2207   } else { // if (file_type == FILE_TYPE_HTML) {
2208     fp << "<html>" << endl
2209     << "<body bgcolor=\"#FFFFFF\"" << endl
2210     << " text=\"#000000\"" << endl
2211     << " link=\"#0000AA\"" << endl
2212     << " alink=\"#0000FF\"" << endl
2213     << " vlink=\"#000044\">" << endl
2214     << endl
2215     << "<h1 align=center>Batch Index" << endl
2216     << endl
2217     << "<center>" << endl
2218     << "<table border=1 cellpadding=2><tr>" << endl;
2219 
2220     fp << "<th bgcolor=\"#AAAAFF\">Batch ";
2221     while (output_it.Next() != NULL) {
2222       const cString& entry_desc = output_it.Get()->GetDesc(first_genotype);
2223       fp << "<th bgcolor=\"#AAAAFF\">" << entry_desc << " ";
2224     }
2225     fp << "</tr>" << endl;
2226 
2227   }
2228 
2229   // Loop through all of the batchs...
2230   for (int batch_id = min_batch; batch_id <= max_batch; batch_id++) {
2231     cAnalyzeGenotype * genotype = batch[batch_id].List().GetFirst();
2232     if (genotype == NULL) continue;
2233     output_it.Reset();
2234     tDataEntryCommand<cAnalyzeGenotype>* data_entry = NULL;
2235     const cString & batch_name = batch[batch_id].Name();
2236     if (file_type == FILE_TYPE_HTML) {
2237       fp << "<tr><th><a href=lineage." << batch_name << ".html>"
2238       << batch_name << "</a> ";
2239     } else {
2240       fp << batch_name << " ";
2241     }
2242 
2243     while ((data_entry = output_it.Next()) != NULL) {
2244       if (file_type == FILE_TYPE_HTML) {
2245         fp << "<td align=center><a href=\""
2246         << data_entry->GetName() << "." << batch_name << ".png\">"
2247         << data_entry->GetValue(genotype) << "</a> ";
2248       } else {  // if (file_type == FILE_TYPE_TEXT) {
2249         fp << data_entry->GetValue(genotype) << " ";
2250       }
2251       }
2252     if (file_type == FILE_TYPE_HTML) fp << "</tr>";
2253     fp << endl;
2254     }
2255 
2256   // If in HTML mode, we need to end the file...
2257   if (file_type == FILE_TYPE_HTML) {
2258     fp << "</table>" << endl
2259     << "</center>" << endl;
2260   }
2261   }
2262 
CommandHistogram(cString cur_string)2263 void cAnalyze::CommandHistogram(cString cur_string)
2264 {
2265   if (m_world->GetVerbosity() >= VERBOSE_ON) cout << "Histogram batch " << cur_batch << endl;
2266   else cout << "Histograming..." << endl;
2267 
2268   // Load in the variables...
2269   cString filename("histogram.dat");
2270   if (cur_string.GetSize() != 0) filename = cur_string.PopWord();
2271 
2272   // Construct a linked list of details needed...
2273   tList< tDataEntryCommand<cAnalyzeGenotype> > output_list;
2274   tListIterator< tDataEntryCommand<cAnalyzeGenotype> > output_it(output_list);
2275   cAnalyzeGenotype::GetDataCommandManager().LoadCommandList(cur_string, output_list);
2276 
2277   // Determine the file type...
2278   int file_type = FILE_TYPE_TEXT;
2279   cString file_extension(filename);
2280   while (file_extension.Find('.') != -1) file_extension.Pop('.');
2281   if (file_extension == "html") file_type = FILE_TYPE_HTML;
2282 
2283   // Setup the file...
2284   if (filename == "cout") {
2285     CommandHistogram_Header(cout, file_type, output_it);
2286     CommandHistogram_Body(cout, file_type, output_it);
2287   } else {
2288     ofstream& fp = m_world->GetDataFileOFStream(filename);
2289     CommandHistogram_Header(fp, file_type, output_it);
2290     CommandHistogram_Body(fp, file_type, output_it);
2291   }
2292 
2293   // And clean up...
2294   while (output_list.GetSize() != 0) delete output_list.Pop();
2295 }
2296 
CommandHistogram_Header(ostream & fp,int format_type,tListIterator<tDataEntryCommand<cAnalyzeGenotype>> & output_it)2297 void cAnalyze::CommandHistogram_Header(ostream& fp, int format_type,
2298                                        tListIterator< tDataEntryCommand<cAnalyzeGenotype> > & output_it)
2299 {
2300   cAnalyzeGenotype* first_genotype = batch[cur_batch].List().GetFirst();
2301 
2302   // Write out the header on the file
2303   if (format_type == FILE_TYPE_TEXT) {
2304     fp << "#filetype histogram_data" << endl;
2305     fp << "#format ";
2306     while (output_it.Next() != NULL) {
2307       const cString & entry_name = output_it.Get()->GetName();
2308       fp << entry_name << " ";
2309     }
2310     fp << endl << endl;
2311 
2312     // Give the more human-readable legend.
2313     fp << "# Histograms:" << endl;
2314     int count = 0;
2315     while (output_it.Next() != NULL) {
2316       const cString & entry_desc = output_it.Get()->GetDesc(first_genotype);
2317       fp << "# " << ++count << ": " << entry_desc << endl;
2318     }
2319     fp << endl;
2320   } else { // if (format_type == FILE_TYPE_HTML) {
2321     fp << "<html>" << endl
2322     << "<body bgcolor=\"#FFFFFF\"" << endl
2323     << " text=\"#000000\"" << endl
2324     << " link=\"#0000AA\"" << endl
2325     << " alink=\"#0000FF\"" << endl
2326     << " vlink=\"#000044\">" << endl
2327     << endl
2328     << "<h1 align=center>Histograms for " << batch[cur_batch].Name()
2329     << "</h1>" << endl
2330     << endl
2331     << "<center>" << endl
2332     << "<table border=1 cellpadding=2><tr>" << endl;
2333 
2334     while (output_it.Next() != NULL) {
2335       const cString & entry_desc = output_it.Get()->GetDesc(first_genotype);
2336       const cString & entry_name = output_it.Get()->GetName();
2337       fp << "<tr><th bgcolor=\"#AAAAFF\"><a href=\"#"
2338         << entry_name << "\">"
2339         << entry_desc << "</a></tr>";
2340     }
2341     fp << "</tr></table>" << endl;
2342   }
2343   }
2344 
2345 
CommandHistogram_Body(ostream & fp,int format_type,tListIterator<tDataEntryCommand<cAnalyzeGenotype>> & output_it)2346 void cAnalyze::CommandHistogram_Body(ostream& fp, int format_type,
2347                                      tListIterator< tDataEntryCommand<cAnalyzeGenotype> >& output_it)
2348 {
2349   output_it.Reset();
2350   tDataEntryCommand<cAnalyzeGenotype> * data_command = NULL;
2351   cAnalyzeGenotype* first_genotype = batch[cur_batch].List().GetFirst();
2352 
2353   while ((data_command = output_it.Next()) != NULL) {
2354     if (format_type == FILE_TYPE_TEXT) {
2355       fp << "# --- " << data_command->GetDesc(first_genotype) << " ---" << endl;
2356     } else {
2357       fp << "<table cellpadding=3>" << endl
2358       << "<tr><th colspan=3><a name=\"" << data_command->GetName() << "\">"
2359       << data_command->GetDesc(first_genotype) << "</th></tr>" << endl;
2360     }
2361 
2362     tDictionary<int> count_dict;
2363 
2364     // Loop through all genotypes in this batch to collect the info we need.
2365     tListIterator<cAnalyzeGenotype> batch_it(batch[cur_batch].List());
2366     cAnalyzeGenotype * cur_genotype;
2367     while ((cur_genotype = batch_it.Next()) != NULL) {
2368       const cString cur_name(data_command->GetValue(cur_genotype).AsString());
2369       int count = 0;
2370       count_dict.Find(cur_name, count);
2371       count += cur_genotype->GetNumCPUs();
2372       count_dict.Set(cur_name, count);
2373     }
2374 
2375     tList<cString> name_list;
2376     tList<int> count_list;
2377     count_dict.AsLists(name_list, count_list);
2378 
2379     // Figure out the maximum count and the maximum widths...
2380     int max_count = 0;
2381     int max_name_width = 0;
2382     int max_count_width = 0;
2383     tListIterator<int> count_it(count_list);
2384     tListIterator<cString> name_it(name_list);
2385     while (count_it.Next() != NULL) {
2386       const cString cur_name( *(name_it.Next()) );
2387       const int cur_count = *(count_it.Get());
2388       const int name_width = cur_name.GetSize();
2389       const int count_width = cStringUtil::Stringf("%d", cur_count).GetSize();
2390       if (cur_count > max_count) max_count = cur_count;
2391       if (name_width > max_name_width) max_name_width = name_width;
2392       if (count_width > max_count_width) max_count_width = count_width;
2393     }
2394 
2395     // Do some final calculations now that we know the maximums...
2396     const int max_stars = 75 - max_name_width - max_count_width;
2397 
2398     // Now print everything out...
2399     count_it.Reset();
2400     name_it.Reset();
2401     while (count_it.Next() != NULL) {
2402       const cString cur_name( *(name_it.Next()) );
2403       const int cur_count = *(count_it.Get());
2404       if (cur_count == 0) continue;
2405       int num_stars = (cur_count * max_stars) / max_count;
2406 
2407       if (format_type == FILE_TYPE_TEXT) {
2408         fp << setw(max_name_width) << cur_name << "  "
2409         << setw(max_count_width) << cur_count << "  ";
2410         for (int i = 0; i < num_stars; i++) { fp << '#'; }
2411         fp << endl;
2412       } else { // FILE_TYPE_HTML
2413         fp << "<tr><td>" << cur_name
2414         << "<td>" << cur_count
2415         << "<td>";
2416         for (int i = 0; i < num_stars; i++) { fp << '#'; }
2417         fp << "</tr>" << endl;
2418       }
2419     }
2420 
2421     if (format_type == FILE_TYPE_TEXT) {
2422       // Skip a line between histograms...
2423       fp << endl;
2424     } else {
2425       fp << "</table><br><br>" << endl << endl;
2426     }
2427   }
2428 
2429   // If in HTML mode, we need to end the file...
2430   if (format_type == FILE_TYPE_HTML) {
2431     fp << "</table>" << endl
2432     << "</center>" << endl;
2433   }
2434 }
2435 
2436 
2437 ///// Population Analysis Commands ////
2438 
2439 // Comparator for p_stat struct: compared by cpu_count
2440 // Higher cpu_count is considered "less" in order to sort greatest-to-least
2441 // Furthermore, within the same cpu_count we sort greatest-to-least
2442 // based on genotype_count
PStatsComparator(const void * elem1,const void * elem2)2443 int cAnalyze::PStatsComparator(const void * elem1, const void * elem2)
2444 {
2445   if (((p_stats*)elem2)->cpu_count > ((p_stats*)elem1)->cpu_count) return 1;
2446   if (((p_stats*)elem2)->cpu_count < ((p_stats*)elem1)->cpu_count) return -1;
2447 
2448   // if the cpu_counts are the same, we'd like to sort greatest-to-least
2449   // on genotype_count
2450   if (((p_stats*)elem2)->genotype_count > ((p_stats*)elem1)->genotype_count) return 1;
2451   if (((p_stats*)elem2)->genotype_count < ((p_stats*)elem1)->genotype_count) return -1;
2452 
2453   // if they have the same cpu_count and genotype_count, we call them the same
2454   return 0;
2455 }
2456 
CommandPrintPhenotypes(cString cur_string)2457 void cAnalyze::CommandPrintPhenotypes(cString cur_string)
2458 {
2459   if (m_world->GetVerbosity() >= VERBOSE_ON) cout << "Printing phenotypes in batch "
2460     << cur_batch << endl;
2461   else cout << "Printing phenotypes..." << endl;
2462 
2463   // Load in the variables...
2464   cString filename("phenotype.dat");
2465   if (cur_string.GetSize() != 0) filename = cur_string.PopWord();
2466 
2467   cString flag("");
2468   bool print_ttc = false;
2469   bool print_ttpc = false;
2470   while (cur_string.GetSize() != 0) {
2471   	flag = cur_string.PopWord();
2472   	if (flag == "total_task_count") print_ttc = true;
2473   	else if (flag == "total_task_performance_count") print_ttpc = true;
2474   }
2475 
2476   // Make sure we have at least one genotype...
2477   if (batch[cur_batch].List().GetSize() == 0) return;
2478 
2479   // Setup the phenotype categories...
2480   const int num_tasks = batch[cur_batch].List().GetFirst()->GetNumTasks();
2481 
2482   tHashMap<cBitArray, p_stats> phenotype_table(HASH_TABLE_SIZE_MEDIUM);
2483 
2484   // Loop through all of the genotypes in this batch...
2485   tListIterator<cAnalyzeGenotype> batch_it(batch[cur_batch].List());
2486   cAnalyzeGenotype * genotype = NULL;
2487   while ((genotype = batch_it.Next()) != NULL) {
2488     cBitArray phen_id(num_tasks + 1);  // + 1 because phenotype also depends on viability
2489     phen_id.Clear();
2490     if (genotype->GetViable() == true) phen_id++;
2491     for (int i = 0; i < num_tasks; i++) {
2492       if (genotype->GetTaskCount(i) > 0)  phen_id.Set(i + 1, true);  // again, +1 because we used 0th bit for viability
2493     }
2494 
2495     p_stats phenotype_stats;
2496 
2497     if (phenotype_table.Find(phen_id, phenotype_stats)) {
2498       phenotype_stats.cpu_count      += genotype->GetNumCPUs();
2499       phenotype_stats.genotype_count += 1;
2500       phenotype_stats.total_length   += genotype->GetNumCPUs() * genotype->GetLength();
2501       phenotype_stats.total_gest     += genotype->GetNumCPUs() * genotype->GetGestTime();
2502 
2503       // don't bother tracking these unless asked for
2504       if (print_ttc || print_ttpc) {
2505         for (int i = 0; i < num_tasks; i++) {
2506           phenotype_stats.total_task_count += ((genotype->GetTaskCount(i) > 0) ? 1 : 0);
2507           phenotype_stats.total_task_performance_count += genotype->GetTaskCount(i);
2508         }
2509       }
2510     }
2511     else {
2512       phenotype_stats.phen_id        = phen_id;  // this is for ease of printing and sorting
2513       phenotype_stats.cpu_count      = genotype->GetNumCPUs();
2514       phenotype_stats.genotype_count = 1;
2515       phenotype_stats.total_length   = genotype->GetNumCPUs() * genotype->GetLength();
2516       phenotype_stats.total_gest     = genotype->GetNumCPUs() * genotype->GetGestTime();
2517 
2518       phenotype_stats.total_task_count = 0;
2519       phenotype_stats.total_task_performance_count = 0;
2520 
2521       // don't bother actually tracking these unless asked for
2522       if (print_ttc || print_ttpc) {
2523         for (int i = 0; i < num_tasks; i++) {
2524           phenotype_stats.total_task_count += ((genotype->GetTaskCount(i) > 0) ? 1 : 0);
2525           phenotype_stats.total_task_performance_count += genotype->GetTaskCount(i);
2526         }
2527       }
2528     }
2529 
2530     // add to / update table
2531     phenotype_table.Set(phen_id, phenotype_stats);
2532   }
2533 
2534   ofstream& fp = m_world->GetDataFileOFStream(filename);
2535 
2536   fp << "# 1: Number of organisms of this phenotype" << endl
2537     << "# 2: Number of genotypes of this phenotye" << endl
2538     << "# 3: Average Genome Length" << endl
2539     << "# 4: Average Gestation Time" << endl
2540     << "# 5: Viability of Phenotype" << endl;
2541   if (print_ttc && print_ttpc) {
2542   	fp << "# 6: Total # of different tasks performed by this phenotype" << endl
2543     	<< "# 7: Average # of tasks performed by this phenotype" << endl
2544     	<< "# 8+: Tasks performed in this phenotype" << endl;
2545   }
2546   else if (print_ttc) {
2547   	fp << "# 6: Total # of different tasks performed by this phenotype" << endl
2548     	<< "# 7+: Tasks performed in this phenotype" << endl;
2549   }
2550   else if (print_ttpc) {
2551   	fp << "# 6: Total # of tasks performed by this phenotype" << endl
2552   	  << "# 7+: Tasks performed in this phenotype" << endl;
2553   }
2554   else { fp << "# 6+: Tasks performed in this phenotype" << endl; }
2555   fp << endl;
2556 
2557   // Print the phenotypes in order from greatest cpu count to least
2558   // Within cpu_count, print in order from greatest genotype count to least
2559   tArray<p_stats> phenotype_array;
2560   phenotype_table.GetValues(phenotype_array);
2561   phenotype_array.MergeSort(&cAnalyze::PStatsComparator);  // sort by cpu_count, greatest to least
2562 
2563   for (int i = 0; i < phenotype_array.GetSize(); i++) {
2564     fp << phenotype_array[i].cpu_count << " "
2565        << phenotype_array[i].genotype_count << " "
2566        << phenotype_array[i].total_length / phenotype_array[i].cpu_count << " "
2567        << phenotype_array[i].total_gest / phenotype_array[i].cpu_count << " "
2568        << phenotype_array[i].phen_id.Get(0) << "  ";  // viability
2569 
2570     if (print_ttc) {
2571       fp << phenotype_array[i].total_task_count / phenotype_array[i].genotype_count << " ";
2572     }
2573     if (print_ttpc) {
2574       fp << phenotype_array[i].total_task_performance_count / phenotype_array[i].genotype_count << " ";
2575     }
2576 
2577     // not using cBitArray::Print because it would print viability bit too
2578     for (int j = 1; j <= num_tasks; j++) { fp << phenotype_array[i].phen_id.Get(j) << " "; }
2579 
2580     fp << endl;
2581   }
2582 
2583   m_world->GetDataFileManager().Remove(filename);
2584 
2585 }
2586 
2587 
2588 // Print various diversity metrics from the current batch of genotypes...
CommandPrintDiversity(cString cur_string)2589 void cAnalyze::CommandPrintDiversity(cString cur_string)
2590 {
2591   if (m_world->GetVerbosity() >= VERBOSE_ON) cout << "Printing diversity data for batch "
2592     << cur_batch << endl;
2593   else cout << "Printing diversity data..." << endl;
2594 
2595   // Load in the variables...
2596   cString filename("diversity.dat");
2597   if (cur_string.GetSize() != 0) filename = cur_string.PopWord();
2598 
2599   // Make sure we have at least one genotype...
2600   if (batch[cur_batch].List().GetSize() == 0) return;
2601 
2602   // Setup the task categories...
2603   const int num_tasks = batch[cur_batch].List().GetFirst()->GetNumTasks();
2604   tArray<int> task_count(num_tasks);
2605   tArray<int> task_gen_count(num_tasks);
2606   tArray<double> task_gen_dist(num_tasks);
2607   tArray<double> task_site_entropy(num_tasks);
2608   task_count.SetAll(0);
2609   task_gen_count.SetAll(0);
2610 
2611   // We must determine the average hamming distance between genotypes in
2612   // this batch that perform each task.  Levenstein distance would be ideal,
2613   // but takes a while, so we'll do it this way first.  For the calculations,
2614   // we need to know home many times each instruction appears at each
2615   // position for each genotype collection that performs a particular task.
2616   const cInstSet& is = m_world->GetHardwareManager().GetDefaultInstSet();
2617   const int num_insts = is.GetSize();
2618   const int max_length = BatchUtil_GetMaxLength();
2619   tMatrix<int> inst_freq(max_length, num_insts+1);
2620 
2621   for (int task_id = 0; task_id < num_tasks; task_id++) {
2622     inst_freq.SetAll(0);
2623 
2624     // Loop through all genotypes, singling out those that do current task...
2625     tListIterator<cAnalyzeGenotype> batch_it(batch[cur_batch].List());
2626     cAnalyzeGenotype* genotype = NULL;
2627     while ((genotype = batch_it.Next()) != NULL) {
2628       if (genotype->GetGenome().GetInstSet() != is.GetInstSetName() || genotype->GetTaskCount(task_id) == 0) continue;
2629 
2630       const Sequence& genome = genotype->GetGenome().GetSequence();
2631       const int num_cpus = genotype->GetNumCPUs();
2632       task_count[task_id] += num_cpus;
2633       task_gen_count[task_id]++;
2634       for (int i = 0; i < genotype->GetLength(); i++) {
2635         inst_freq( i, genome[i].GetOp() ) += num_cpus;
2636       }
2637       for (int i = genotype->GetLength(); i < max_length; i++) {
2638         inst_freq(i, num_insts) += num_cpus; // Entry for "past genome end"
2639       }
2640     }
2641 
2642     // Analyze the data for this entry...
2643     const int cur_count = task_count[task_id];
2644     const int total_pairs = cur_count * (cur_count - 1) / 2;
2645     int total_dist = 0;
2646     double total_ent = 0;
2647     for (int pos = 0; pos < max_length; pos++) {
2648       // Calculate distance component...
2649       for (int inst1 = 0; inst1 < num_insts; inst1++) {
2650         if (inst_freq(pos, inst1) == 0) continue;
2651         for (int inst2 = inst1+1; inst2 <= num_insts; inst2++) {
2652           total_dist += inst_freq(pos, inst1) * inst_freq(pos, inst2);
2653         }
2654       }
2655 
2656       // Calculate entropy component...
2657       for (int i = 0; i <= num_insts; i++) {
2658         const int cur_freq = inst_freq(pos, i);
2659         if (cur_freq == 0) continue;
2660         const double p = ((double) cur_freq) / (double) cur_count;
2661         total_ent -= p * log(p);
2662       }
2663     }
2664 
2665     task_gen_dist[task_id] = ((double) total_dist) / (double) total_pairs;
2666     task_site_entropy[task_id] = total_ent;
2667   }
2668 
2669   // Print out the results...
2670   cDataFile & df = m_world->GetDataFile(filename);
2671 
2672   for (int i = 0; i < num_tasks; i++) {
2673     df.Write(i,                    "# 1: Task ID");
2674     df.Write(task_count[i],        "# 2: Number of organisms performing task");
2675     df.Write(task_gen_count[i],    "# 3: Number of genotypes performing task");
2676     df.Write(task_gen_dist[i],     "# 4: Average distance between genotypes performing task");
2677     df.Write(task_site_entropy[i], "# 5: Total per-site entropy of genotypes performing task");
2678     df.Endl();
2679   }
2680 }
2681 
2682 
PhyloCommunityComplexity(cString cur_string)2683 void cAnalyze::PhyloCommunityComplexity(cString cur_string)
2684 {
2685   /////////////////////////////////////////////////////////////////////////
2686   // Calculate the mutual information between all genotypes and environment
2687   /////////////////////////////////////////////////////////////////////////
2688 
2689   cout << "Analyze biocomplexity of current population about environment ...\n";
2690 
2691   // Get the number of genotypes that are gonna be analyzed.
2692   int max_genotypes = cur_string.PopWord().AsInt();
2693 
2694   // Get update
2695   int update = cur_string.PopWord().AsInt();
2696 
2697   // Get the directory
2698   cString directory = PopDirectory(cur_string, "community_cpx/");
2699 
2700   // Get the file name that saves the result
2701   cString filename = cur_string.PopWord();
2702   if (filename.IsEmpty()) {
2703     filename = "community.complexity.dat";
2704   }
2705 
2706   filename.Set("%s%s", static_cast<const char*>(directory), static_cast<const char*>(filename));
2707   ofstream& cpx_fp = m_world->GetDataFileOFStream(filename);
2708 
2709   cpx_fp << "# Legend:" << endl;
2710   cpx_fp << "# 1: Genotype ID" << endl;
2711   cpx_fp << "# 2: Entropy given Known Genotypes" << endl;
2712   cpx_fp << "# 3: Entropy given Both Known Genotypes and Env" << endl;
2713   cpx_fp << "# 4: New Information about Environment" << endl;
2714   cpx_fp << "# 5: Total Complexity" << endl;
2715   cpx_fp << endl;
2716 
2717 
2718   /////////////////////////////////////////////////////////////////////////////////
2719   // Loop through all genotypes in all batches and build id vs. genotype map
2720 
2721   map<int, cAnalyzeGenotype *> genotype_database;
2722   for (int i = 0; i < GetNumBatches(); ++ i) {
2723     tListIterator<cAnalyzeGenotype> batch_it(batch[i].List());
2724     cAnalyzeGenotype * genotype = NULL;
2725     while ((genotype = batch_it.Next()) != NULL) {
2726       genotype_database.insert(make_pair(genotype->GetID(), genotype));
2727     }
2728   }
2729 
2730 
2731   ////////////////////////////////////////////////
2732   // Check if all the genotypes having same length
2733 
2734   int length_genome = 0;
2735   if (genotype_database.size() > 0) {
2736     length_genome = genotype_database.begin()->second->GetLength();
2737   }
2738   map<int, cAnalyzeGenotype*>::iterator gen_iterator = genotype_database.begin();
2739   for (; gen_iterator != genotype_database.end(); ++ gen_iterator) {
2740     if (gen_iterator->second->GetLength() != length_genome) {
2741       cerr << "Genotype " << gen_iterator->first << " has different genome length." << endl;
2742       if (exit_on_error) exit(1);
2743     }
2744   }
2745 
2746 
2747   ///////////////////////
2748   // Create Test Info
2749   // No choice of use_resources for this analyze command...
2750   cCPUTestInfo test_info;
2751   test_info.SetResourceOptions(RES_CONSTANT, m_resources, update, m_resource_time_spent_offset);
2752 
2753 
2754   ///////////////////////////////////////////////////////////////////////
2755   // Choose the first n most abundant genotypes and put them in community
2756 
2757   vector<cAnalyzeGenotype *> community;
2758   cAnalyzeGenotype * genotype = NULL;
2759   tListIterator<cAnalyzeGenotype> batch_it(batch[cur_batch].List());
2760 
2761   while (((genotype = batch_it.Next()) != NULL) && (community.size() < static_cast<unsigned int>(max_genotypes))) {
2762     community.push_back(genotype);
2763   }
2764 
2765 
2766   ///////////////////////////
2767   // Measure hamming distance
2768 
2769   int size_community = community.size();
2770   if (size_community == 0) {
2771     cerr << "There is no genotype in this community." << endl;
2772     if (exit_on_error) exit(1);
2773   }
2774   typedef pair<int,int> gen_pair;
2775   map<gen_pair, int> hamming_dist;
2776 
2777   for (int i = 0; i< size_community; ++ i) {
2778     for (int j = i+1; j < size_community; ++ j) {
2779       int dist = Sequence::FindHammingDistance(community[i]->GetGenome().GetSequence(),
2780                                                   community[j]->GetGenome().GetSequence());
2781       int id1 = community[i]->GetID();
2782       int id2 = community[j]->GetID();
2783 
2784       hamming_dist.insert(make_pair(gen_pair(id1, id2), dist));
2785       hamming_dist.insert(make_pair(gen_pair(id2, id1), dist));
2786     }
2787   }
2788 
2789 
2790   //////////////////////////////////
2791   // Get Most Recent Common Ancestor
2792 
2793   map<gen_pair, cAnalyzeGenotype *> mrca;
2794   map<gen_pair, int> raw_dist;
2795   for (int i = 0; i< size_community; ++ i) {
2796     for (int j = i+1; j < size_community; ++ j) {
2797 
2798       cAnalyzeGenotype * lineage1_genotype = community[i];
2799       cAnalyzeGenotype * lineage2_genotype = community[j];
2800       int total_dist = 0;
2801 
2802       while (lineage1_genotype->GetID() != lineage2_genotype->GetID()) {
2803         if (lineage1_genotype->GetID() > lineage2_genotype->GetID()) {
2804           int parent_id = lineage1_genotype->GetParentID();
2805           cAnalyzeGenotype * parent = genotype_database.find(parent_id)->second;
2806 
2807           total_dist += Sequence::FindHammingDistance(lineage1_genotype->GetGenome().GetSequence(),
2808                                                          parent->GetGenome().GetSequence());
2809           lineage1_genotype = parent;
2810         } else {
2811           int parent_id = lineage2_genotype->GetParentID();
2812           cAnalyzeGenotype * parent = genotype_database.find(parent_id)->second;
2813           total_dist += Sequence::FindHammingDistance(lineage2_genotype->GetGenome().GetSequence(),
2814                                                          parent->GetGenome().GetSequence());
2815 
2816           lineage2_genotype = parent;
2817         }
2818       }
2819 
2820       int id1 = community[i]->GetID();
2821       int id2 = community[j]->GetID();
2822       mrca.insert(make_pair(gen_pair(id1, id2), lineage1_genotype));
2823       mrca.insert(make_pair(gen_pair(id2, id1), lineage1_genotype));
2824       raw_dist.insert(make_pair(gen_pair(id1, id2), total_dist));
2825       raw_dist.insert(make_pair(gen_pair(id2, id1), total_dist));
2826     }
2827   }
2828 
2829 
2830   vector<cAnalyzeGenotype *> sorted_community = community;
2831 
2832 
2833   /////////////////////////////////////////////
2834   // Loop through genotypes in sorted community
2835 
2836   double complexity = 0.0;
2837   vector<cAnalyzeGenotype *> given_genotypes;
2838 
2839   for (int i = 0; i < size_community; ++ i) {
2840     genotype = sorted_community[i];
2841 
2842     // Skip the dead organisms
2843     genotype->Recalculate(m_ctx, &test_info);
2844     if (genotype->GetFitness() == 0) continue;
2845 
2846     int num_insts = m_world->GetHardwareManager().GetInstSet(genotype->GetGenome().GetInstSet()).GetSize();
2847 
2848     vector<double> one_line_prob(num_insts, 0.0);
2849     vector< vector<double> > prob(length_genome, one_line_prob);
2850 
2851     cout << endl << genotype->GetID() << endl;
2852 
2853     if (given_genotypes.size() >= 1) {
2854       //////////////////////////////////////////////////
2855       // Look for a genotype that is closest to this one
2856 
2857       cAnalyzeGenotype* min_depth_gen = given_genotypes[0];
2858       cAnalyzeGenotype* tmrca = mrca.find(gen_pair(genotype->GetID(), given_genotypes[0]->GetID()))->second;
2859       int min_depth_dist = genotype->GetDepth() + given_genotypes[0]->GetDepth() - 2 * tmrca->GetDepth();
2860 
2861       for (unsigned int i = 1; i < given_genotypes.size() ; ++ i) {
2862         cAnalyzeGenotype* given_genotype = given_genotypes[i];
2863         cAnalyzeGenotype* tmrca = mrca.find(gen_pair(genotype->GetID(), given_genotype->GetID()))->second;
2864         int dist = genotype->GetDepth() + given_genotype->GetDepth() - 2 * tmrca->GetDepth();
2865 
2866         if (dist < min_depth_dist) {
2867           min_depth_dist = dist;
2868           min_depth_gen = given_genotype;
2869         }
2870       }
2871 
2872       const Genome& given_genome = min_depth_gen->GetGenome();
2873       const Genome& base_genome = genotype->GetGenome();
2874       Genome mod_genome(base_genome);
2875 
2876       for (int line = 0; line < length_genome; ++ line) {
2877         int given_inst = given_genome.GetSequence()[line].GetOp();
2878         mod_genome = base_genome;
2879         mod_genome.GetSequence()[line].SetOp(given_inst);
2880         cAnalyzeGenotype test_genotype(m_world, mod_genome);
2881         test_genotype.Recalculate(m_ctx, &test_info);
2882 
2883         // Only when given inst make the genotype alive
2884         if (test_genotype.GetFitness() > 0) {
2885           prob[line][given_inst] += pow(1 - 1.0/length_genome, min_depth_dist);
2886         }
2887       }
2888 
2889       cpx_fp << genotype->GetID() << " " << min_depth_dist << " "
2890         << raw_dist.find(gen_pair(genotype->GetID(), min_depth_gen->GetID()))->second << " "
2891         << hamming_dist.find(gen_pair(genotype->GetID(), min_depth_gen->GetID()))->second << "   ";
2892     } else {
2893       cpx_fp << genotype->GetID() << " ";
2894     }
2895 
2896 
2897     ///////////////////////////////////////////////////////////////////
2898     // Point mutation at all lines of code to look for neutral mutation
2899     // and the mutations that can make organism alive
2900 
2901     cout << "Test point mutation." << endl;
2902     vector<bool> one_line_neutral(num_insts, false);
2903     vector< vector<bool> > neutral_mut(length_genome, one_line_neutral);
2904     vector< vector<bool> > alive_mut(length_genome, one_line_neutral);
2905 
2906     genotype->Recalculate(m_ctx, &test_info);
2907     double base_fitness = genotype->GetFitness();
2908     cout << base_fitness << endl;
2909     const Genome& base_genome = genotype->GetGenome();
2910     Genome mod_genome(base_genome);
2911 
2912     for (int line = 0; line < length_genome; ++ line) {
2913       int cur_inst = base_genome.GetSequence()[line].GetOp();
2914 
2915       for (int mod_inst = 0; mod_inst < num_insts; ++ mod_inst) {
2916         mod_genome.GetSequence()[line].SetOp(mod_inst);
2917         cAnalyzeGenotype test_genotype(m_world, mod_genome);
2918         test_genotype.Recalculate(m_ctx, &test_info);
2919         if (test_genotype.GetFitness() >= base_fitness) {
2920           neutral_mut[line][mod_inst] = true;
2921         }
2922         if (test_genotype.GetFitness() > 0) {
2923           alive_mut[line][mod_inst] = true;
2924         }
2925       }
2926 
2927       mod_genome.GetSequence()[line].SetOp(cur_inst);
2928     }
2929 
2930 
2931     /////////////////////////////////////////
2932     // Normalize the probability at each line
2933 
2934     vector< vector<double> > prob_before_env(length_genome, one_line_prob);
2935 
2936     for (int line = 0; line < length_genome; ++ line) {
2937       double cur_total_prob = 0.0;
2938       int num_alive = 0;
2939       for (int inst = 0; inst < num_insts; ++ inst) {
2940         if (alive_mut[line][inst] == true) {
2941           cur_total_prob += prob[line][inst];
2942           num_alive ++;
2943         }
2944       }
2945       if (cur_total_prob > 1) {
2946         cout << "Total probability at " << line << " is greater than 0." << endl;
2947         if (exit_on_error) exit(1);
2948       }
2949       double left_prob = 1 - cur_total_prob;
2950 
2951       for (int inst = 0; inst < num_insts; ++ inst) {
2952         if (alive_mut[line][inst] == true) {
2953           prob_before_env[line][inst] = prob[line][inst] + left_prob / num_alive;
2954         } else {
2955           prob_before_env[line][inst] = 0;
2956         }
2957       }
2958 
2959     }
2960 
2961 
2962     /////////////////////////////////
2963     // Calculate entropy of each line
2964 
2965     vector<double> entropy(length_genome, 0.0);
2966     for (int line = 0; line < length_genome; ++ line) {
2967       double sum = 0;
2968       for (int inst = 0; inst < num_insts; ++ inst) {
2969         sum += prob_before_env[line][inst];
2970         if (prob_before_env[line][inst] > 0) {
2971           entropy[line] -= prob_before_env[line][inst] * log(prob_before_env[line][inst]) / log(num_insts*1.0);
2972         }
2973       }
2974       if (sum > 1.001 || sum < 0.999) {
2975         cout << "Sum of probability is not 1 at line " << line << endl;
2976         if (exit_on_error) exit(1);
2977       }
2978     }
2979 
2980 
2981     /////////////////////////////////////////////////////
2982     // Redistribute the probability of insts at each line
2983 
2984     vector< vector<double> > prob_given_env(length_genome, one_line_prob);
2985 
2986     for (int line = 0; line < length_genome; ++ line) {
2987       double total_prob = 0.0;
2988       int num_neutral = 0;
2989       for (int inst = 0; inst < num_insts; ++ inst) {
2990         if (neutral_mut[line][inst] == true) {
2991           num_neutral ++;
2992           total_prob += prob[line][inst];
2993         }
2994       }
2995 
2996       double left = 1 - total_prob;
2997 
2998       for (int inst = 0; inst < num_insts; ++ inst) {
2999         if (neutral_mut[line][inst] == true) {
3000           prob_given_env[line][inst] = prob[line][inst] + left / num_neutral;
3001         } else {
3002           prob_given_env[line][inst] = 0.0;
3003         }
3004       }
3005 
3006     }
3007 
3008 
3009     ////////////////////////////////////////////////
3010     // Calculate the entropy given environment
3011 
3012     vector<double> entropy_given_env(length_genome, 0.0);
3013     for (int line = 0; line < length_genome; ++ line) {
3014       double sum = 0;
3015       for (int inst = 0; inst < num_insts; ++ inst) {
3016         sum += prob_given_env[line][inst];
3017         if (prob_given_env[line][inst] > 0) {
3018           entropy_given_env[line] -= prob_given_env[line][inst] * log(prob_given_env[line][inst]) /
3019           log(num_insts*1.0);
3020         }
3021       }
3022       if (sum > 1.001 || sum < 0.999) {
3023         cout << "Sum of probability is not 1 at line " << line << " " << sum << endl;
3024         if (exit_on_error) exit(1);
3025       }
3026     }
3027 
3028 
3029     ///////////////////////////////////////////////////////////////////////////
3030     // Calculate the information between genotype and env given other genotypes
3031     double information = 0.0;
3032     double entropy_before = 0.0;
3033     double entropy_after = 0.0;
3034     for (int line = 0; line < length_genome; ++ line) {
3035       entropy_before += entropy[line];
3036       entropy_after += entropy_given_env[line];
3037 
3038       if (entropy[line] >= entropy_given_env[line]) {
3039         information += entropy[line] - entropy_given_env[line];
3040       } else {    // Negative information is because given condition is not related with this genotype  ...
3041 
3042         // Count the number of insts that can make genotype alive
3043         int num_inst_alive = 0;
3044         for (int inst = 0; inst < num_insts; ++ inst) {
3045           if (alive_mut[line][inst] == true) {
3046             num_inst_alive ++;
3047           }
3048         }
3049 
3050         double entropy_before = - log(1.0/num_inst_alive) / log(num_insts*1.0);
3051         information += entropy_before - entropy_given_env[line];
3052         if (information < 0) {
3053           cout << "Negative information at site " << line << endl;
3054           if (exit_on_error) exit(1);
3055         }
3056       }
3057 
3058     }
3059     complexity += information;
3060 
3061     cpx_fp << entropy_before << " " << entropy_after << " "  << information << " " << complexity << "   ";
3062     genotype->PrintTasks(cpx_fp, 0, -1);
3063     cpx_fp << endl;
3064 
3065 
3066     /////////////////////////////////////////////////////////////
3067     // This genotype becomes the given condition of next genotype
3068 
3069     given_genotypes.push_back(genotype);
3070   }
3071 
3072   m_world->GetDataFileManager().Remove(filename);
3073   return;
3074 }
3075 
3076 
3077 // Calculate Edit Distance stats for all pairs of organisms across the population.
CommandPrintDistances(cString cur_string)3078 void cAnalyze::CommandPrintDistances(cString cur_string)
3079 {
3080   cout << "Calculating Edit Distance between all pairs of genotypes." << endl;
3081 
3082   // Get the maximum distance we care about
3083   int dist_threshold = cur_string.PopWord().AsInt();
3084 
3085   // Get the file name that saves the result
3086   cString filename = cur_string.PopWord();
3087   if (filename.IsEmpty()) {
3088     filename = "edit_distance.dat";
3089   }
3090 
3091   ofstream & fout = m_world->GetDataFileOFStream(filename);
3092 
3093   fout << "# All pairs edit distance" << endl;
3094   fout << "# 1: Num organism pairs" << endl;
3095 	fout << "# 2: Mean distance computed using (n*(n-1)/2) as all pairs." << endl;
3096   fout << "# 3: Mean distance" << endl;
3097   fout << "# 4: Max distance" << endl;
3098   fout << "# 5: Frac distances above threshold (" << dist_threshold << ")" << endl;
3099   fout << endl;
3100 
3101   // Loop through all pairs of organisms.
3102   int dist_total = 0;
3103   int dist_max = 0;
3104   int pair_count = 0;
3105   int threshold_pair_count = 0;
3106 	double count = 0;
3107 
3108   cAnalyzeGenotype * genotype1 = NULL;
3109   cAnalyzeGenotype * genotype2 = NULL;
3110   tListIterator<cAnalyzeGenotype> batch_it1(batch[cur_batch].List());
3111 
3112   int watermark = 0;
3113 
3114   while ((genotype1 = batch_it1.Next()) != NULL) {
3115 		count ++;
3116     const int gen1_count = genotype1->GetNumCPUs();
3117 
3118     // Pair this genotype with itself for a distance of 0.
3119     pair_count += gen1_count * (gen1_count - 1) / 2;
3120 
3121     // Loop through the other genotypes this one can be paired with.
3122     tListIterator<cAnalyzeGenotype> batch_it2(batch_it1);
3123     while ((genotype2 = batch_it2.Next()) != NULL) {
3124       const int gen2_count = genotype2->GetNumCPUs();
3125       const int cur_pairs = gen1_count * gen2_count;
3126       const int cur_dist = Sequence::FindEditDistance(genotype1->GetGenome().GetSequence(), genotype2->GetGenome().GetSequence());
3127       dist_total += cur_pairs * cur_dist;
3128       if (cur_dist > dist_max) dist_max = cur_dist;
3129       pair_count += cur_pairs;
3130       if (cur_dist >= dist_threshold) threshold_pair_count += cur_pairs;
3131 
3132       if (pair_count > watermark) {
3133 	cout << watermark << endl;
3134 	watermark += 100000;
3135       }
3136     }
3137   }
3138 
3139 	count = (count * (count-1) ) /2;
3140   fout << pair_count << " "
3141 	     << ((double) dist_total) / count << " "
3142        << ((double) dist_total) / (double) pair_count << " "
3143        << dist_max << " "
3144        << ((double) threshold_pair_count) / (double) pair_count << " "
3145        << endl;
3146 
3147   return;
3148 }
3149 
3150 
3151 // Calculate various stats for trees in population.
CommandPrintTreeStats(cString cur_string)3152 void cAnalyze::CommandPrintTreeStats(cString cur_string)
3153 {
3154   if (m_world->GetVerbosity() >= VERBOSE_ON) cout << "Printing tree stats for batch "
3155     << cur_batch << endl;
3156   else cout << "Printing tree stats..." << endl;
3157 
3158   // Load in the variables...
3159   cString filename("tree_stats.dat");
3160   if (cur_string.GetSize() != 0) filename = cur_string.PopWord();
3161 
3162   ofstream& fp = m_world->GetDataFileOFStream(filename);
3163 
3164   fp << "# Legend:" << endl;
3165   fp << "# 1: Average cumulative stemminess" << endl;
3166   fp << endl;
3167 
3168   cAnalyzeTreeStats_CumulativeStemminess agts(m_world);
3169   agts.AnalyzeBatchTree(batch[cur_batch].List());
3170 
3171   fp << agts.AverageStemminess();
3172   fp << endl;
3173 }
3174 
3175 
3176 // Calculate cumulative stemmines for trees in population.
CommandPrintCumulativeStemminess(cString cur_string)3177 void cAnalyze::CommandPrintCumulativeStemminess(cString cur_string)
3178 {
3179   if (m_world->GetVerbosity() >= VERBOSE_ON) cout << "Printing cumulative stemmines for batch "
3180     << cur_batch << endl;
3181   else cout << "Printing cumulative stemmines..." << endl;
3182 
3183   // Load in the variables...
3184   cString filename("cumulative_stemminess.dat");
3185   if (cur_string.GetSize() != 0) filename = cur_string.PopWord();
3186 
3187   ofstream& fp = m_world->GetDataFileOFStream(filename);
3188 
3189   fp << "# Legend:" << endl;
3190   fp << "# 1: Average cumulative stemminess" << endl;
3191   fp << endl;
3192 
3193   cAnalyzeTreeStats_CumulativeStemminess agts(m_world);
3194   agts.AnalyzeBatchTree(batch[cur_batch].List());
3195 
3196   fp << agts.AverageStemminess();
3197   fp << endl;
3198 }
3199 
3200 
3201 
3202 // Calculate Pybus-Harvey gamma statistic for trees in population.
CommandPrintGamma(cString cur_string)3203 void cAnalyze::CommandPrintGamma(cString cur_string)
3204 {
3205   if (m_world->GetVerbosity() >= VERBOSE_ON) cout << "Printing Pybus-Harvey gamma statistic for batch "
3206     << cur_batch << endl;
3207   else cout << "Printing Pybus-Harvey gamma statistic..." << endl;
3208 
3209   // Load in the variables...
3210   int end_time = (cur_string.GetSize()) ? cur_string.PopWord().AsInt() : -1;         // #1
3211   if (end_time < 0) {
3212     cout << "Error: end_time (argument 1) must be specified as nonzero." << endl;
3213     return;
3214   }
3215 
3216   cString filename("gamma.dat");
3217   if (cur_string.GetSize() != 0) filename = cur_string.PopWord();
3218 
3219   cString lineage_thru_time_fname("");
3220   if (cur_string.GetSize() != 0) lineage_thru_time_fname = cur_string.PopWord();
3221 
3222   /*
3223   I've hardwired the option 'furcation_time_convention' to '1'.
3224 
3225   'furcation_time_convention' refers to the time at which a 'speciation' event
3226   occurs (I'm not sure 'speciation' is the right word for this). If a parent
3227   genotype produces two distinct surviving lineages, then the time of
3228   speciation could be:
3229   - 1: The parent genotype's birth time
3230   - 2: The elder child genotype's birth time
3231   - 3: The younger child genotype's birth time
3232 
3233   @kgn
3234   */
3235   // int furcation_time_convention = (cur_string.GetSize()) ? cur_string.PopWord().AsInt() : 1;
3236   int furcation_time_convention = 1;
3237 
3238   ofstream& fp = m_world->GetDataFileOFStream(filename);
3239 
3240   fp << "# Legend:" << endl;
3241   fp << "# 1: Pybus-Harvey gamma statistic" << endl;
3242   fp << endl;
3243 
3244   cAnalyzeTreeStats_Gamma atsg(m_world);
3245   atsg.AnalyzeBatch(batch[cur_batch].List(), end_time, furcation_time_convention);
3246 
3247   fp << atsg.Gamma();
3248   fp << endl;
3249 
3250   if(lineage_thru_time_fname != ""){
3251     ofstream& ltt_fp = m_world->GetDataFileOFStream(lineage_thru_time_fname);
3252 
3253     ltt_fp << "# Legend:" << endl;
3254     ltt_fp << "# 1: num_lineages" << endl;
3255     ltt_fp << "# 2: furcation_time" << endl;
3256     ltt_fp << endl;
3257 
3258     int size = atsg.FurcationTimes().GetSize();
3259     for(int i = 0; i < size; i++){
3260       ltt_fp << i+2 << " " << atsg.FurcationTimes()[i] <<  endl;
3261     }
3262   }
3263 }
3264 
3265 
AnalyzeCommunityComplexity(cString cur_string)3266 void cAnalyze::AnalyzeCommunityComplexity(cString cur_string)
3267 {
3268   /////////////////////////////////////////////////////////////////////
3269   // Calculate the mutual information between community and environment
3270   /////////////////////////////////////////////////////////////////////
3271 
3272   cout << "Analyze community complexity of current population about environment with Charles method ...\n";
3273 
3274   // Get the number of genotypes that are gonna be analyzed.
3275   int max_genotypes = cur_string.PopWord().AsInt(); // If it is 0, we sample
3276                                                     //two genotypes for each task.
3277 
3278   // Get update
3279   int update = cur_string.PopWord().AsInt();
3280 
3281   // Get the directory
3282   cString dir = cur_string.PopWord();
3283   cString defaultDir = "community_cpx/";
3284   cString directory = PopDirectory(dir, defaultDir);
3285 
3286   // Get the file name that saves the result
3287   cString filename = cur_string.PopWord();
3288   if (filename.IsEmpty()) {
3289     filename = "community.complexity.dat";
3290   }
3291 
3292   filename.Set("%s%s", static_cast<const char*>(directory), static_cast<const char*>(filename));
3293   ofstream& cpx_fp = m_world->GetDataFileOFStream(filename);
3294 
3295   cpx_fp << "# Legend:" << endl;
3296   cpx_fp << "# 1: Genotype ID" << endl;
3297   cpx_fp << "# 2: Entropy given Known Genotypes" << endl;
3298   cpx_fp << "# 3: Entropy given Both Known Genotypes and Env" << endl;
3299   cpx_fp << "# 4: New Information about Environment" << endl;
3300   cpx_fp << "# 5: Total Complexity" << endl;
3301   cpx_fp << "# 6: Hamming Distance to Closest Given Genotype" << endl;
3302   cpx_fp << "# 7: Total Hamming Distance to Closest Neighbor" << endl;
3303   cpx_fp << "# 8: Number of Organisms" << endl;
3304   cpx_fp << "# 9: Total Number of Organisms" << endl;
3305   cpx_fp << "# 10 - : Tasks Implemented" << endl;
3306   cpx_fp << endl;
3307 
3308   ///////////////////////
3309   // Backup test CPU data
3310   cCPUTestInfo test_info;
3311   // No choice of use_resources for this analyze command...
3312   test_info.SetResourceOptions(RES_CONSTANT, m_resources, update, m_resource_time_spent_offset);
3313 
3314   vector<cAnalyzeGenotype *> community;
3315   cAnalyzeGenotype * genotype = NULL;
3316   tListIterator<cAnalyzeGenotype> batch_it(batch[cur_batch].List());
3317 
3318 
3319   if (max_genotypes > 0) {
3320 
3321     ///////////////////////////////////////////////////////////////////////
3322     // Choose the first n most abundant genotypes and put them in community
3323 
3324     while (((genotype = batch_it.Next()) != NULL) && (community.size() < static_cast<unsigned int>(max_genotypes))) {
3325       community.push_back(genotype);
3326     }
3327   } else if (max_genotypes == 0) {
3328 
3329     /////////////////////////////////////
3330     // Choose two genotypes for each task
3331 
3332     genotype = batch_it.Next();
3333     if (genotype == NULL) {
3334       m_world->GetDataFileManager().Remove(filename);
3335       return;
3336     }
3337     genotype->Recalculate(m_ctx, &test_info);
3338     int num_tasks = genotype->GetNumTasks();
3339     vector< vector<cAnalyzeGenotype *> > genotype_class(num_tasks);
3340     do {
3341       for (int task_id = 0; task_id < num_tasks; ++ task_id) {
3342         int count = genotype->GetTaskCount(task_id);
3343         if (count > 0) {
3344           genotype_class[task_id].push_back(genotype);
3345         }
3346       }
3347     } while ((genotype = batch_it.Next()) != NULL);
3348 
3349     cRandom random;
3350     for (int task_id = 0; task_id < num_tasks; ++ task_id) {
3351       int num_genotype = genotype_class[task_id].size();
3352       if (num_genotype > 0) {
3353         int index = random.GetUInt(num_genotype);
3354         community.push_back(genotype_class[task_id][index]);
3355         index = random.GetUInt(num_genotype);
3356         community.push_back(genotype_class[task_id][index]);
3357       } else {
3358         // Pick up a class that is not empty
3359         int class_id = random.GetUInt(num_tasks);
3360         while (genotype_class[class_id].size() == 0) {
3361           class_id ++;
3362           if (class_id >= num_tasks) {
3363             class_id = 0;
3364           }
3365         }
3366         int num_genotype = genotype_class[class_id].size();
3367         int index = random.GetUInt(num_genotype);
3368         community.push_back(genotype_class[task_id][index]);
3369         index = random.GetUInt(num_genotype);
3370         community.push_back(genotype_class[task_id][index]);
3371       }
3372     }
3373 
3374   }
3375 
3376   ////////////////////////////////////////////////////
3377   // Test point mutation of each genotype in community
3378 
3379   map<int, tMatrix<double> > point_mut;
3380   int size_community = community.size();
3381   int length_genome = 0;
3382   if (size_community > 1) {
3383     length_genome = community[0]->GetLength();
3384   }
3385 
3386   for (int i = 0; i < size_community; ++ i) {
3387     genotype = community[i];
3388 
3389     ///////////////////////////////////////////////////////////////////
3390     // Point mutation at all lines of code to look for neutral mutation
3391     cout << "Test point mutation for genotype " << genotype->GetID() << endl;
3392 
3393     genotype->Recalculate(m_ctx, &test_info);
3394     const Genome& base_genome = genotype->GetGenome();
3395     const Sequence& base_seq = base_genome.GetSequence();
3396     Genome mod_genome(base_genome);
3397     Sequence& seq = mod_genome.GetSequence();
3398     const int num_insts = m_world->GetHardwareManager().GetInstSet(base_genome.GetInstSet()).GetSize();
3399     double base_fitness = genotype->GetFitness();
3400 
3401     tMatrix<double> prob(length_genome, num_insts);
3402 
3403 
3404     for (int line = 0; line < length_genome; ++ line) {
3405       int cur_inst = base_seq[line].GetOp();
3406       int num_neutral = 0;
3407 
3408       for (int mod_inst = 0; mod_inst < num_insts; ++ mod_inst) {
3409         seq[line].SetOp(mod_inst);
3410         cAnalyzeGenotype test_genotype(m_world, mod_genome);
3411         test_genotype.Recalculate(m_ctx, &test_info);
3412         if (test_genotype.GetFitness() >= base_fitness) {
3413           prob[line][mod_inst] = 1.0;
3414           num_neutral ++;
3415         } else {
3416           prob[line][mod_inst] = 0.0;
3417         }
3418       }
3419 
3420       for (int mod_inst = 0; mod_inst < num_insts; ++ mod_inst) {
3421         prob[line][mod_inst] /= num_neutral;
3422       }
3423 
3424 
3425       seq[line].SetOp(cur_inst);
3426     }
3427 
3428     point_mut.insert(make_pair(genotype->GetID(), prob));
3429   }
3430 
3431   //////////////////////////////////////
3432   // Loop through genotypes in community
3433 
3434   double complexity = 0.0;
3435   int total_dist = 0;
3436   int total_cpus = 0;
3437   vector<cAnalyzeGenotype *> given_genotypes;
3438 
3439   ////////////////////////////////////////
3440   // New information in the first gentoype
3441   genotype = community[0];
3442   double oo_initial_entropy = length_genome;
3443   double oo_conditional_entropy = 0.0;
3444   tMatrix<double> this_prob = point_mut.find(genotype->GetID())->second;
3445   const int num_insts = m_world->GetHardwareManager().GetInstSet(genotype->GetGenome().GetInstSet()).GetSize();
3446 
3447   for (int line = 0; line < length_genome; ++ line) {
3448     double oneline_entropy = 0.0;
3449     for (int inst = 0; inst < num_insts; ++ inst) {
3450       if (this_prob[line][inst] > 0) {
3451         oneline_entropy -= this_prob[line][inst] * (log(this_prob[line][inst]) /
3452                                                     log(1.0*num_insts));
3453       }
3454     }
3455     oo_conditional_entropy += oneline_entropy;
3456   }
3457 
3458   double new_info = oo_initial_entropy - oo_conditional_entropy;
3459   complexity += new_info;
3460   given_genotypes.push_back(genotype);
3461 
3462   cpx_fp << genotype->GetID() << " "
3463     << oo_initial_entropy << " "
3464     << oo_conditional_entropy << " "
3465     << new_info << " "
3466     << complexity << "   "
3467     << "0 0" << "   ";
3468   int num_cpus = genotype->GetNumCPUs();
3469   total_cpus += num_cpus;
3470   cpx_fp << num_cpus << " " << total_cpus << "   ";
3471   genotype->Recalculate(m_ctx, &test_info);
3472   genotype->PrintTasks(cpx_fp, 0, -1);
3473   cpx_fp << endl;
3474 
3475 
3476   //////////////////////////////////////////////////////
3477   // New information in other genotypes in community ...
3478   for (int i = 1; i < size_community; ++ i) {
3479     genotype = community[i];
3480     if (genotype->GetLength() != length_genome) {
3481       cerr << "Genotypes in the community do not same genome length.\n";
3482       if (exit_on_error) exit(1);
3483     }
3484 
3485     // Skip the dead organisms
3486     genotype->Recalculate(m_ctx, &test_info);
3487     cout << genotype->GetID() << " " << genotype->GetFitness() << endl;
3488     if (genotype->GetFitness() == 0) {
3489       continue;
3490     }
3491 
3492     double min_new_info = length_genome;
3493     double oo_initial_entropy = 0.0;
3494     double oo_conditional_entropy = 0.0;
3495     cAnalyzeGenotype* used_genotype = NULL;
3496     tMatrix<double> this_prob = point_mut.find(genotype->GetID())->second;
3497 
3498     // For any given genotype, calculate the new information in genotype
3499     for (unsigned int j = 0; j < given_genotypes.size(); ++ j) {
3500 
3501       tMatrix<double> given_prob = point_mut.find(given_genotypes[j]->GetID())->second;
3502       double new_info = 0.0;
3503       double total_initial_entropy = 0.0;
3504       double total_conditional_entropy = 0.0;
3505 
3506       for (int line = 0; line < length_genome; ++ line) {
3507 
3508         // H(genotype|known_genotype)
3509         double prob_overlap = 0;
3510         for (int inst = 0; inst < num_insts; ++ inst) {
3511           if (this_prob[line][inst] < given_prob[line][inst]) {
3512             prob_overlap += this_prob[line][inst];
3513           } else {
3514             prob_overlap += given_prob[line][inst];
3515           }
3516         }
3517 
3518         double given_site_entropy = 0.0;
3519         for (int inst = 0; inst < num_insts; ++ inst) {
3520           if (given_prob[line][inst] > 0) {
3521             given_site_entropy -= given_prob[line][inst] * (log(given_prob[line][inst]) /
3522                                                             log(1.0*num_insts));
3523           }
3524         }
3525 
3526 
3527         double entropy_overlap = 0.0;
3528         if (prob_overlap > 0 &&  (1 - prob_overlap > 0)) {
3529           entropy_overlap = (- prob_overlap * log(prob_overlap)
3530                              - (1-prob_overlap) * log(1 - prob_overlap)) / log(1.0*num_insts);
3531         } else {
3532           entropy_overlap = 0;
3533         }
3534 
3535         double initial_entropy = prob_overlap * given_site_entropy
3536           + (1 - prob_overlap) * 1 + entropy_overlap;
3537         total_initial_entropy += initial_entropy;
3538 
3539         // H(genotype|E, known_genotype) = H(genotype|Env)
3540         double conditional_entropy = 0.0;
3541         for (int inst = 0; inst < num_insts; ++ inst) {
3542           if (this_prob[line][inst] > 0) {
3543             conditional_entropy -= this_prob[line][inst] * (log(this_prob[line][inst]) /
3544                                                             log(1.0*num_insts));
3545           }
3546         }
3547         total_conditional_entropy += conditional_entropy;
3548 
3549         if (conditional_entropy > initial_entropy + 0.00001) {
3550           cerr << "Negative Information.\n";
3551           cout << line << endl;
3552           for (int inst = 0; inst < num_insts; ++ inst) {
3553             cout << this_prob[line][inst] << " ";
3554           }
3555           cout << endl;
3556           for (int inst = 0; inst < num_insts; ++ inst) {
3557             cout << given_prob[line][inst] << " ";
3558           }
3559           cout << endl;
3560 
3561           if (exit_on_error) exit(1);
3562         }
3563 
3564         new_info += initial_entropy - conditional_entropy;
3565       }
3566 
3567       if (new_info < min_new_info) {
3568         min_new_info = new_info;
3569         oo_initial_entropy = total_initial_entropy;
3570         oo_conditional_entropy = total_conditional_entropy;
3571         used_genotype = given_genotypes[j];
3572         cout << "        " << "New closest genotype " << used_genotype->GetID()
3573           << " " << new_info << endl;
3574       }
3575 
3576     }
3577     complexity += min_new_info;
3578     cpx_fp << genotype->GetID() << " "
3579       << oo_initial_entropy << " "
3580       << oo_conditional_entropy << " "
3581       << min_new_info << " " << complexity << "   ";
3582 
3583     int hamm_dist = Sequence::FindHammingDistance(genotype->GetGenome().GetSequence(), used_genotype->GetGenome().GetSequence());
3584     total_dist += hamm_dist;
3585     cpx_fp << hamm_dist << " " << total_dist << "   ";
3586 
3587     int num_cpus = genotype->GetNumCPUs();
3588     total_cpus += num_cpus;
3589     cpx_fp << num_cpus << " " << total_cpus << "   ";
3590 
3591 
3592     genotype->PrintTasks(cpx_fp, 0, -1);
3593     cpx_fp << endl;
3594     given_genotypes.push_back(genotype);
3595   }
3596 
3597   m_world->GetDataFileManager().Remove(filename);
3598   return;
3599 }
3600 
3601 /* prints grid with what the fitness of an org in each range box would be given the resource levels
3602 	at given update (10000 by default) SLG*/
CommandPrintResourceFitnessMap(cString cur_string)3603 void cAnalyze::CommandPrintResourceFitnessMap(cString cur_string)
3604 {
3605   cout << "creating resource fitness map...\n";
3606   // at what update do we want to use the resource concentrations from?
3607   int update = 10000;
3608   if (cur_string.GetSize() != 0) update = cur_string.PopWord().AsInt();
3609   // what file to write data to
3610   cString filename("resourcefitmap.dat");
3611   if (cur_string.GetSize() != 0) filename = cur_string.PopWord();
3612   ofstream& fp = m_world->GetDataFileOFStream(filename);
3613 
3614   int f1=-1, f2=-1, rangecount[2]={0,0}, threshcount[2]={0,0};
3615   double f1Max = 0.0, f1Min = 0.0, f2Max = 0.0, f2Min = 0.0;
3616 
3617   // first need to find out how many thresh and range resources there are on each function
3618   // NOTE! this only works for 2-obj. problems right now!
3619   for (int i=0; i<m_world->GetEnvironment().GetReactionLib().GetSize(); i++)
3620   {
3621 	  cReaction* react = m_world->GetEnvironment().GetReactionLib().GetReaction(i);
3622 	  int fun = react->GetTask()->GetArguments().GetInt(0);
3623 	  double thresh = react->GetTask()->GetArguments().GetDouble(3);
3624 	  double threshMax = react->GetTask()->GetArguments().GetDouble(4);
3625 	  if (i==0)
3626 	  {
3627 		  f1 = fun;
3628 		  f1Max = react->GetTask()->GetArguments().GetDouble(1);
3629 		  f1Min = react->GetTask()->GetArguments().GetDouble(2);
3630 	  }
3631 
3632 	     if (fun==f1 && threshMax>0)
3633 			 rangecount[0]++;
3634 		 else if (fun==f1 && thresh>=0)
3635 			 threshcount[0]++;
3636 		 else if (fun!=f1 && threshcount[1]==0 && rangecount[1]==0)
3637 		 {
3638 			 f2=fun;
3639 			 f2Max = react->GetTask()->GetArguments().GetDouble(1);
3640 			 f2Min = react->GetTask()->GetArguments().GetDouble(2);
3641 		 }
3642 		 if (fun==f2 && threshMax>0)
3643 			 rangecount[1]++;
3644 		 else if (fun==f2 && thresh>=0)
3645 			 threshcount[1]++;
3646 
3647   }
3648   int fsize[2];
3649   fsize[0] = rangecount[0];
3650   if (threshcount[0]>fsize[0])
3651 	  fsize[0]=threshcount[0];
3652   fsize[1]=rangecount[1];
3653   if (threshcount[1]>fsize[1])
3654 	  fsize[1]=threshcount[1];
3655 
3656   cout << "f1 size: " << fsize[0] << "  f2 size: " << fsize[1] << endl;
3657   double stepsize[2];
3658   stepsize[0] = (f1Max-f1Min)/fsize[0];
3659   stepsize[1] = (f2Max-f2Min)/fsize[1];
3660 
3661   // this is our grid where we are going to calculate the fitness of an org in each box
3662   // given current resource contributions
3663   tArray< tArray<double> > fitnesses(fsize[0]+1);
3664   for (int i=0; i<fitnesses.GetSize(); i++)
3665 	  fitnesses[i].Resize(fsize[1]+1,1);
3666 
3667   // Get the resources for the specified update
3668   tArray<double> resources;
3669   if (!m_resources || !m_resources->GetResourceLevelsForUpdate(update, resources, true)) {
3670     cout << "error: did not find the desired update in resource history" << endl;
3671     return;
3672   }
3673 
3674   cout << "creating map using resources at update: " << update << endl;
3675 
3676   for (int i = 0; i < m_world->GetEnvironment().GetResourceLib().GetSize(); i++) {
3677 
3678     // first have to find reaction that matches this resource, so compare names
3679 	  cString name = m_world->GetEnvironment().GetResourceLib().GetResource(i)->GetName();
3680 	  cReaction* react = NULL;
3681 	  for (int j = 0; j < m_world->GetEnvironment().GetReactionLib().GetSize(); j++) {
3682 		  if (m_world->GetEnvironment().GetReactionLib().GetReaction(j)->GetProcesses().GetPos(0)->GetResource()->GetName() == name) {
3683 			  react = m_world->GetEnvironment().GetReactionLib().GetReaction(j);
3684 			  j = m_world->GetEnvironment().GetReactionLib().GetSize();
3685 		  }
3686 	  }
3687 	  if (react == NULL) continue;
3688 
3689 	  // now have proper reaction, pull all the data need from the reaction
3690 	  double frac = react->GetProcesses().GetPos(0)->GetMaxFraction();
3691 	  double max = react->GetProcesses().GetPos(0)->GetMaxNumber();
3692 	  double min = react->GetProcesses().GetPos(0)->GetMinNumber();
3693 	  double value = react->GetValue();
3694 	  int fun = react->GetTask()->GetArguments().GetInt(0);
3695 
3696     if (fun == f1) fun = 0;
3697 	  else if (fun == f2) fun = 1;
3698 	  else cout << "function is neither f1 or f2! doh!\n";
3699 
3700 	  double thresh = react->GetTask()->GetArguments().GetDouble(3);
3701 	  double threshMax = react->GetTask()->GetArguments().GetDouble(4);
3702 	  //double maxFx = react->GetTask()->GetArguments().GetDouble(1);
3703 	  //double minFx = react->GetTask()->GetArguments().GetDouble(2);
3704 
3705 	  // and pull the concentration of this resource from resource object loaded from resource.dat
3706 	  double concentration = resources[i];
3707 
3708 	  // calculate the merit based on this resource concentration, fraction, and value
3709 	  double mer = concentration * frac * value;
3710 	  if (mer > max)
3711 		  mer=max;
3712 	  else if (mer < min)
3713 		  mer=0;
3714 	  double threshMaxAdjusted, threshAdjusted;
3715 	  // if this is a range reaction, need to update one entire row or column in fitnesses array
3716 	  if (threshMax>0)
3717 	  {
3718 		  for (int k=0; k<fsize[fun]; k++)
3719 		  {
3720 			  // function f1
3721 			  if (fun==0)
3722 			  {
3723 				  threshMaxAdjusted = threshMax*(f1Max-f1Min) + f1Min;
3724 				  threshAdjusted = thresh*(f1Max-f1Min) + f1Min;
3725 				  double pos = stepsize[0]*k+f1Min+stepsize[0]/2.0;
3726 				  if (threshAdjusted <= pos && threshMaxAdjusted >= pos)
3727 				  {
3728 					  for (int z=0; z<fsize[1]+1; z++)
3729 						  fitnesses[k+1][z] *= pow(2,mer);
3730 				  // actually solutions right at min possible get range above them too
3731 					  if (k==0)
3732 						  for (int z=0; z<fsize[1]+1; z++)
3733 							  fitnesses[0][z] *= pow(2,mer);
3734 				  }
3735 			  }
3736 			  // function f2
3737 			  else
3738 			  {
3739 				  threshMaxAdjusted = threshMax*(f2Max-f2Min) + f2Min;
3740 				  threshAdjusted = thresh*(f2Max-f2Min) + f2Min;
3741 				  double pos = stepsize[1]*k+f1Min+stepsize[1]/2.0;
3742 				  if (threshAdjusted <= pos && threshMaxAdjusted >= pos)
3743 				  {
3744 					  for (int z=0; z<fsize[0]+1; z++)
3745 						  fitnesses[z][k+1] *= pow(2,mer);
3746 				  // actually solutions right at min possible get range above them too
3747 					  if (k==0)
3748 						  for (int z=0; z<fsize[0]+1; z++)
3749 							  fitnesses[z][0] *= pow(2,mer);
3750 				  }
3751 			  }
3752 		  }
3753 	  }
3754 	  // threshold reaction, need to update all rows or columns above given threshold
3755 	  else if (thresh>=0)
3756 	  {
3757 		  for (int k=0; k<fsize[fun]+1; k++)
3758 		  {
3759 			  // function f1
3760 			  if (fun==0)
3761 			  {
3762 			      threshAdjusted = thresh*(f1Max-f1Min) + f1Min;
3763 			      double pos = stepsize[0]*k+f1Min-stepsize[0]/2.0;
3764 			      if (threshAdjusted >= pos)
3765 				{
3766 				  for (int z=0; z<fsize[1]+1; z++)
3767 				    {
3768 				      fitnesses[k][z] *= pow(2,mer);
3769 				    }
3770 				}
3771 
3772 			  }
3773 			  // function f2
3774 			  else
3775 			  {
3776 			    threshAdjusted = thresh*(f2Max-f2Min) + f2Min;
3777 			    double pos = stepsize[1]*k+f1Min-stepsize[1]/2.0;
3778 			    if (threshAdjusted >= pos)
3779 			      {
3780 				for (int z=0; z<fsize[0]+1; z++)
3781 				  fitnesses[z][k] *= pow(2,mer);
3782 			      }
3783 			  }
3784 		  }
3785 	  }
3786 
3787 	  }
3788 
3789   for (int i=fitnesses[0].GetSize()-1; i>=0; i--)
3790   {
3791     for (int j=0; j<fitnesses.GetSize(); j++)
3792 	fp << fitnesses[j][i] << " ";
3793     fp << endl;
3794   }
3795 }
3796 
3797 
3798 //@ MRR @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
CommandPairwiseEntropy(cString cur_string)3799 void cAnalyze::CommandPairwiseEntropy(cString cur_string)
3800 {
3801   if (m_world->GetVerbosity() >= VERBOSE_ON)
3802     cout << "Finding pairwise entropy on batch " << cur_batch << endl;
3803   else
3804     cout << "Finding pairwise entropy..." << endl;
3805 
3806   cout << "@MRR-> This command is being tested!" << endl;
3807 
3808   cString directory = PopDirectory(cur_string, "pairwise_data/");
3809   if (m_world->GetVerbosity() >= VERBOSE_ON)
3810     cout << "\tUsing directory: " << directory << endl;
3811   double mu = cur_string.PopWord().AsDouble();
3812   if (m_world->GetVerbosity() >= VERBOSE_ON)
3813     cout << "\tUsing mu=" << mu << endl;
3814 
3815   tListIterator<cAnalyzeGenotype> batch_it(batch[cur_batch].List());
3816   cAnalyzeGenotype * genotype = batch_it.Next();
3817 
3818   cout << genotype->GetName() << endl;
3819 
3820   while(genotype != NULL)
3821   {
3822     cString genName = genotype->GetName();
3823 
3824     if (m_world->GetVerbosity() >= VERBOSE_ON)
3825       cout << "\t...on genotype " << genName << endl;
3826 
3827     cString filename;
3828     filename.Set("%spairdata.%s.dat", static_cast<const char*>(directory),
3829                  static_cast<const char*>(genName));
3830 
3831     // @DMB -- ofstream& fp = m_world->GetDataFileOFStream(filename);
3832 
3833     if (m_world->GetVerbosity() >= VERBOSE_ON)
3834       cout << "\t\t...with filename:  " << filename << endl;
3835 
3836     cout << "# Pairwise Entropy Information" << endl;
3837 
3838     tMatrix<double> pairdata = AnalyzeEntropyPairs(genotype, mu);
3839 
3840     cout << pairdata.GetNumRows() << endl;
3841 
3842     for (int i=0;  i < pairdata.GetNumRows(); i++){
3843       for (int j=0; j < pairdata.GetNumCols(); j++)
3844         cout << pairdata[i][j] << " ";
3845       cout << endl;
3846     }
3847     m_world->GetDataFileManager().Remove(filename);
3848     genotype = batch_it.Next();
3849   }
3850 }
3851 
3852 
3853 
3854 
3855 
3856 // This command will take the current batch and analyze how well organisms
3857 // cross-over with each other, both across the population and between mates.
3858 
AnalyzeMateSelection(cString cur_string)3859 void cAnalyze::AnalyzeMateSelection(cString cur_string)
3860 {
3861   int sample_size = 10000;
3862   if (cur_string.GetSize() != 0) sample_size = cur_string.PopWord().AsInt();
3863   cString filename("none");
3864   if (cur_string.GetSize() != 0) filename = cur_string.PopWord();
3865   double min_swap_frac = 0.0;
3866   if (cur_string.GetSize() != 0) min_swap_frac=cur_string.PopWord().AsDouble();
3867   double max_swap_frac = 1.0 - min_swap_frac;
3868 
3869   cout << "Analyzing Mate Selection... " << endl;
3870 
3871   // Do some quick tests before moving on...
3872   if (min_swap_frac < 0.0 || min_swap_frac >= 0.5) {
3873     cerr << "ERROR: Minimum swap fraction out of range [0.0, 0.5)." << endl;
3874   }
3875 
3876   // Next, we create an array that contains pointers to all of the organisms
3877   // in this batch.  Note that we want to select genotypes based on their
3878   // abundance, so they will have one entry in the array per organism.  Note
3879   // that we only consider viable genotypes.
3880 
3881   // Start by counting the total number of organisms (and do other such
3882   // data collection...
3883   tHashMap<int, int> mate_id_counts;
3884 
3885   int org_count = 0;
3886   int gen_count = 0;
3887   cAnalyzeGenotype * genotype = NULL;
3888   tListIterator<cAnalyzeGenotype> list_it(batch[cur_batch].List());
3889   while ((genotype = list_it.Next()) != NULL) {
3890     if (genotype->GetViable() == false || genotype->GetNumCPUs() == 0) {
3891       continue;
3892     }
3893     gen_count++;
3894     org_count += genotype->GetNumCPUs();
3895 
3896     // Keep track of how many organisms have each mate id...
3897     int mate_id = genotype->GetMateID();
3898     int count = 0;
3899     mate_id_counts.Find(mate_id, count);
3900     count += genotype->GetNumCPUs();
3901     mate_id_counts.Set(mate_id, count);
3902   }
3903 
3904   // Create an array of the correct size.
3905   tArray<cAnalyzeGenotype *> genotype_array(org_count);
3906 
3907   // And insert all of the organisms into the array.
3908   int cur_pos = 0;
3909   while ((genotype = list_it.Next()) != NULL) {
3910     if (genotype->GetViable() == false) continue;
3911     int cur_count = genotype->GetNumCPUs();
3912     for (int i = 0; i < cur_count; i++) {
3913       genotype_array[cur_pos++] = genotype;
3914     }
3915   }
3916 
3917 
3918   // Setup some variables to collect statistics.
3919   int total_matches_tested = 0;
3920   int fail_count = 0;
3921   int match_fail_count = 0;
3922 
3923   // Create a Test CPU
3924   cTestCPU* testcpu = m_world->GetHardwareManager().CreateTestCPU(m_ctx);
3925 
3926   // Loop through all of the tests, picking random organisms each time and
3927   // performing a random cross test.
3928   cAnalyzeGenotype * genotype2 = NULL;
3929   for (int test_id = 0; test_id < sample_size; test_id++) {
3930     genotype = genotype_array[ m_world->GetRandom().GetUInt(org_count) ];
3931     genotype2 = genotype_array[ m_world->GetRandom().GetUInt(org_count) ];
3932 
3933     // Stop immediately if we're comparing a genotype to itself.
3934     if (genotype == genotype2) {
3935       total_matches_tested++;
3936       continue;
3937     }
3938 
3939     // Setup the random parameters for this test.
3940     Genome test_genome0 = genotype->GetGenome();
3941     Genome test_genome1 = genotype2->GetGenome();
3942 
3943     double start_frac = -1.0;
3944     double end_frac = -1.0;
3945     double swap_frac = -1.0;
3946     while (swap_frac < min_swap_frac || swap_frac > max_swap_frac) {
3947       start_frac = m_world->GetRandom().GetDouble();
3948       end_frac = m_world->GetRandom().GetDouble();
3949       if (start_frac > end_frac) Swap(start_frac, end_frac);
3950       swap_frac = end_frac - start_frac;
3951     }
3952 
3953     int start0 = (int) (start_frac * (double) test_genome0.GetSize());
3954     int end0   = (int) (end_frac * (double) test_genome0.GetSize());
3955     int size0 = end0 - start0;
3956 
3957     int start1 = (int) (start_frac * (double) test_genome1.GetSize());
3958     int end1   = (int) (end_frac * (double) test_genome1.GetSize());
3959     int size1 = end1 - start1;
3960 
3961     int new_size0 = test_genome0.GetSize() - size0 + size1;
3962     int new_size1 = test_genome1.GetSize() - size1 + size0;
3963 
3964     // Setup some statistics for this particular test.
3965     bool same_mate_id = ( genotype->GetMateID() == genotype2->GetMateID() );
3966     if (same_mate_id == true) total_matches_tested++;
3967 
3968     // Don't Crossover if offspring will be illegal!!!
3969     if (new_size0 < MIN_GENOME_LENGTH || new_size0 > MAX_GENOME_LENGTH ||
3970         new_size1 < MIN_GENOME_LENGTH || new_size1 > MAX_GENOME_LENGTH) {
3971       fail_count++;
3972       if (same_mate_id == true) match_fail_count++;
3973       continue;
3974     }
3975 
3976     // Do the replacement...  We're only going to test genome0, so we only
3977     // need to modify that one.
3978     Sequence cross1 = test_genome1.GetSequence().Crop(start1, end1);
3979     test_genome0.GetSequence().Replace(start0, size0, cross1);
3980 
3981     // Do the test.
3982     cCPUTestInfo test_info;
3983 
3984     // Run each side, and determine viability...
3985     testcpu->TestGenome(m_ctx, test_info, test_genome0);
3986     if( test_info.IsViable() == false ) {
3987       fail_count++;
3988       if (same_mate_id == true) match_fail_count++;
3989     }
3990   }
3991   delete testcpu;
3992 
3993   // Do some calculations on the sizes of the mate groups...
3994   const int num_mate_groups = mate_id_counts.GetSize();
3995 
3996   // Collect lists on all of the mate groups for the calculations...
3997   tList<int> key_list;
3998   tList<int> count_list;
3999   mate_id_counts.AsLists(key_list, count_list);
4000   tListIterator<int> count_it(count_list);
4001 
4002   int max_group_size = 0;
4003   double mate_id_entropy = 0.0;
4004   while (count_it.Next() != NULL) {
4005     int cur_count = *(count_it.Get());
4006     double cur_frac = ((double) cur_count) / ((double) org_count);
4007     if (cur_count > max_group_size) max_group_size = cur_count;
4008     mate_id_entropy -= cur_frac * log(cur_frac);
4009   }
4010 
4011   // Calculate the final answer
4012   double fail_frac = (double) fail_count / (double) sample_size;
4013   double match_fail_frac =
4014     (double) match_fail_count / (double) total_matches_tested;
4015   cout << "  ave fraction failed = " << fail_frac << endl
4016     << "  ave matches failed = " << match_fail_frac << endl
4017     << "  total mate matches = " <<  total_matches_tested
4018     << " / " << sample_size<< endl;
4019 
4020   if (filename == "none") return;
4021 
4022   cDataFile & df = m_world->GetDataFile(filename);
4023   df.WriteComment( "Mate selection information" );
4024   df.WriteTimeStamp();
4025 
4026   df.Write(fail_frac,       "Average fraction failed");
4027   df.Write(match_fail_frac, "Average fraction of mate matches failed");
4028   df.Write(sample_size, "Total number of crossovers tested");
4029   df.Write(total_matches_tested, "Number of crossovers with matching mate IDs");
4030   df.Write(gen_count, "Number of genotypes in test batch");
4031   df.Write(org_count, "Number of organisms in test batch");
4032   df.Write(num_mate_groups, "Number of distinct mate IDs");
4033   df.Write(max_group_size, "Size of the largest distinct mate ID group");
4034   df.Write(mate_id_entropy, "Diversity of mate IDs (entropy)");
4035   df.Endl();
4036 }
4037 
4038 
AnalyzeComplexityDelta(cString cur_string)4039 void cAnalyze::AnalyzeComplexityDelta(cString cur_string)
4040 {
4041   // This command will examine the current population, and sample mutations
4042   // to see what the distribution of complexity changes is.  Only genotypes
4043   // with a certain abundance (default=3) will be tested to make sure that
4044   // the organism didn't already have hidden complexity due to a downward
4045   // step.
4046   cout << "Testing complexity delta." << endl;
4047 
4048   cString filename = "complexity_delta.dat";
4049   int num_tests = 10;
4050   double copy_mut_prob = m_world->GetConfig().COPY_MUT_PROB.Get();
4051   double ins_mut_prob = m_world->GetConfig().DIVIDE_INS_PROB.Get();
4052   double del_mut_prob = m_world->GetConfig().DIVIDE_DEL_PROB.Get();
4053   int count_threshold = 3;
4054 
4055   if (cur_string.GetSize() > 0) filename = cur_string.PopWord();
4056   if (cur_string.GetSize() > 0) num_tests = cur_string.PopWord().AsInt();
4057   if (cur_string.GetSize() > 0) copy_mut_prob = cur_string.PopWord().AsDouble();
4058   if (cur_string.GetSize() > 0) ins_mut_prob = cur_string.PopWord().AsDouble();
4059   if (cur_string.GetSize() > 0) del_mut_prob = cur_string.PopWord().AsDouble();
4060   if (cur_string.GetSize() > 0) count_threshold = cur_string.PopWord().AsInt();
4061 
4062   if (m_world->GetVerbosity() >= VERBOSE_ON) {
4063     cout << "...using:"
4064     << " filename='" << filename << "'"
4065     << " num_tests=" << num_tests
4066     << " copy_mut_prob=" << copy_mut_prob
4067     << " ins_mut_prob=" << ins_mut_prob
4068     << " del_mut_prob=" << del_mut_prob
4069     << " count_threshold=" << count_threshold
4070     << endl;
4071   }
4072 
4073   // Create an array of all of the genotypes above threshold.
4074   cAnalyzeGenotype * genotype = NULL;
4075   tListIterator<cAnalyzeGenotype> batch_it(batch[cur_batch].List());
4076 
4077   // Loop through all genotypes to perform a census
4078   int org_count = 0;
4079   while ((genotype = batch_it.Next()) != NULL) {
4080     // Only count genotypes above threshold
4081     if (genotype->GetNumCPUs() >= count_threshold) {
4082       org_count += genotype->GetNumCPUs();
4083     }
4084   }
4085 
4086   // Create an array to store pointers to the genotypes and fill it in.
4087   tArray<cAnalyzeGenotype *> org_array(org_count);
4088   int cur_org = 0;
4089   batch_it.Reset();
4090   while ((genotype = batch_it.Next()) != NULL) {
4091     // Ignore genotypes below threshold.
4092     if (genotype->GetNumCPUs() < count_threshold) continue;
4093 
4094     // Insert the remaining genotypes into the array.
4095     for (int i = 0; i < genotype->GetNumCPUs(); i++) {
4096       org_array[cur_org] = genotype;
4097       cur_org++;
4098     }
4099   }
4100 
4101   // Open up the file and prepare it for output.
4102   cDataFile & df = m_world->GetDataFile(filename);
4103   df.WriteComment( "An analyze of expected complexity changes between parent and offspring" );
4104   df.WriteTimeStamp();
4105 
4106   // Next check the appropriate number of organisms, perform mutations, and
4107   // store the results.
4108   for (int cur_test = 0; cur_test < num_tests; cur_test++) {
4109     // Pick the genotype to test.
4110     int test_org_id = m_world->GetRandom().GetInt(org_count);
4111     genotype = org_array[test_org_id];
4112 
4113     // Create a copy of the genome.
4114     Genome mod_genome = genotype->GetGenome();
4115     Sequence& mod_seq = mod_genome.GetSequence();
4116     const cInstSet& inst_set = m_world->GetHardwareManager().GetInstSet(mod_genome.GetInstSet());
4117 
4118     if (copy_mut_prob == 0.0 &&
4119         ins_mut_prob == 0.0 &&
4120         del_mut_prob == 0.0) {
4121       cerr << "ERROR: All mutation rates are zero!  No complexity delta analysis possible." << endl;
4122       return;
4123     }
4124 
4125     // Perform the per-site mutations -- we are going to keep looping until
4126     // we trigger at least one mutation.
4127     int num_mutations = 0;
4128     int ins_line = -1;
4129     int del_line = -1;
4130     while (num_mutations == 0) {
4131       if (copy_mut_prob > 0.0) {
4132         for (int i = 0; i < mod_genome.GetSize(); i++) {
4133           if (m_world->GetRandom().P(copy_mut_prob)) {
4134             mod_seq[i] = inst_set.GetRandomInst(m_ctx);
4135             num_mutations++;
4136           }
4137         }
4138       }
4139 
4140       // Perform an Insertion if it has one.
4141       if (m_world->GetRandom().P(ins_mut_prob)) {
4142         ins_line = m_world->GetRandom().GetInt(mod_genome.GetSize() + 1);
4143         mod_seq.Insert(ins_line, inst_set.GetRandomInst(m_ctx));
4144         num_mutations++;
4145       }
4146 
4147       // Perform a Deletion if it has one.
4148       if (m_world->GetRandom().P(del_mut_prob)) {
4149         del_line = m_world->GetRandom().GetInt(mod_genome.GetSize());
4150         mod_seq.Remove(del_line);
4151         num_mutations++;
4152       }
4153     }
4154 
4155     // Collect basic state before and after the mutations...
4156     genotype->Recalculate(m_ctx);
4157     double start_complexity = genotype->GetKO_Complexity();
4158     double start_fitness = genotype->GetFitness();
4159     int start_length = genotype->GetLength();
4160     int start_gest = genotype->GetGestTime();
4161     const tArray<int>& start_task_counts = genotype->GetTaskCounts();
4162     const tArray< tArray<int> >& start_KO_task_counts = genotype->GetKO_TaskCounts();
4163 
4164     cAnalyzeGenotype new_genotype(m_world, mod_genome);
4165     new_genotype.Recalculate(m_ctx);
4166     double end_complexity = new_genotype.GetKO_Complexity();
4167     double end_fitness = new_genotype.GetFitness();
4168     int end_length = new_genotype.GetLength();
4169     int end_gest = new_genotype.GetGestTime();
4170     const tArray<int> & end_task_counts = new_genotype.GetTaskCounts();
4171     const tArray< tArray<int> >& end_KO_task_counts = new_genotype.GetKO_TaskCounts();
4172 
4173     // Calculate the complexities....
4174     double complexity_change = end_complexity - start_complexity;
4175 
4176     // Loop through each line and determine if each line contributes to
4177     int total_info_new = 0;    // Site didn't encode info, but now does.
4178     int total_info_shift = 0;  // Shift in which tasks this site codes for.
4179     int total_info_pshift = 0; // Partial, but not total shift of tasks.
4180     int total_info_share = 0;  // Site codes for more tasks than before.
4181     int total_info_lost = 0;   // Site list all tasks it encoded for.
4182     int total_info_plost = 0;  // Site reduced tasks it encodes for.
4183     int total_info_kept = 0;   // Site still codes for sames tasks as before
4184     int total_info_lack = 0;   // Site never codes for any tasks.
4185 
4186     const int num_tasks = start_task_counts.GetSize();
4187     tArray<int> mut_effects(num_tasks);
4188     for (int i = 0; i < num_tasks; i++) {
4189       mut_effects[i] = end_task_counts[i] - start_task_counts[i];
4190     }
4191 
4192     int end_line = 0;
4193     for (int start_line = 0; start_line < start_length; start_line++) {
4194       if (start_line == del_line) {
4195         // This line was deleted in the end.  Skip it, but don't increment
4196         // the end_line
4197         continue;
4198       }
4199       if (start_line == ins_line) {
4200         // This position had an insertion.  Deal with it and then skip it.
4201         end_line++;
4202 
4203         // No "continue" here.  With the updated end_line we can move on.
4204       }
4205 
4206       // If we made it this far, the start_line and end_line should be aligned.
4207       int info_maintained_count = 0;
4208       int info_gained_count = 0;
4209       int info_lost_count = 0;
4210 
4211       for (int cur_task = 0; cur_task < num_tasks; cur_task++) {
4212         // At the organism level, the mutation may have caused four options
4213         // for this task  (A) Was never present, (B) Was present and still is,
4214         // (C) Was not present, but is now, or (D) Was present, but was lost.
4215 
4216         // Case A:
4217         if (start_task_counts[cur_task]==0 && end_task_counts[cur_task]==0) {
4218           // This task was never done.  Keep looping.
4219           continue;
4220         }
4221 
4222         // Case B:
4223         if (start_task_counts[cur_task] == end_task_counts[cur_task]) {
4224           // The task hasn't changed.  Has its encoding?
4225           bool KO_start = true;
4226           bool KO_end = true;
4227           if (start_KO_task_counts[start_line][cur_task]  ==
4228               start_task_counts[cur_task]) {
4229             // start_count is unchanged by knocking out this line.
4230             KO_start = false;
4231           }
4232           if (end_KO_task_counts[end_line][cur_task]  ==
4233               end_task_counts[cur_task]) {
4234             // end_count is unchanged by knocking out this line.
4235             KO_end = false;
4236           }
4237 
4238           if (KO_start == true && KO_end == true) info_maintained_count++;
4239           if (KO_start == true && KO_end == false) info_lost_count++;
4240           if (KO_start == false && KO_end == true) info_gained_count++;
4241           continue;
4242         }
4243 
4244         // Case C:
4245         if (start_task_counts[cur_task] < end_task_counts[cur_task]) {
4246           // Task was GAINED...  Is this site important?
4247           if (end_KO_task_counts[end_line][cur_task]  <
4248               end_task_counts[cur_task]) {
4249             info_gained_count++;
4250           }
4251           continue;
4252         }
4253 
4254         // Case D:
4255         if (start_task_counts[cur_task] > end_task_counts[cur_task]) {
4256           // The task was LOST...  Was this site important?
4257           if (start_KO_task_counts[start_line][cur_task]  <
4258               start_task_counts[cur_task]) {
4259             info_lost_count++;
4260           }
4261           continue;
4262         }
4263       }
4264 
4265       // We now have counts and know how often this site was responsible for
4266       // a task gain, a task loss, or a task being maintained.
4267 
4268       bool has_keep = info_maintained_count > 0;
4269       bool has_loss = info_lost_count > 0;
4270       bool has_gain = info_gained_count > 0;
4271 
4272       if      ( !has_loss  &&  !has_gain  &&  !has_keep ) total_info_lack++;
4273       else if ( !has_loss  &&  !has_gain  &&   has_keep ) total_info_kept++;
4274       else if ( !has_loss  &&   has_gain  &&  !has_keep ) total_info_new++;
4275       else if ( !has_loss  &&   has_gain  &&   has_keep ) total_info_share++;
4276       else if (  has_loss  &&  !has_gain  &&  !has_keep ) total_info_lost++;
4277       else if (  has_loss  &&  !has_gain  &&   has_keep ) total_info_plost++;
4278       else if (  has_loss  &&   has_gain  &&  !has_keep ) total_info_shift++;
4279       else if (  has_loss  &&   has_gain  &&   has_keep ) total_info_pshift++;
4280 
4281       end_line++;
4282     }
4283 
4284 
4285     // Output the results.
4286     df.Write(num_mutations, "Number of mutational differences between original organism and mutant.");
4287     df.Write(complexity_change, "Complexity difference between original organism and mutant.");
4288     df.Write(start_complexity, "Total complexity of initial organism.");
4289     df.Write(end_complexity, "Total complexity of mutant.");
4290 
4291     // Broken down complexity info
4292     df.Write(total_info_lack, "Num sites with no info at all.");
4293     df.Write(total_info_kept, "Num sites with info, but no change.");
4294     df.Write(total_info_new, "Num sites with new info (prev. none).");
4295     df.Write(total_info_share, "Num sites with newly shared info.");
4296     df.Write(total_info_lost, "Num sites with lost info.");
4297     df.Write(total_info_plost, "Num sites with parital lost info.");
4298     df.Write(total_info_shift, "Num sites with shift in info.");
4299     df.Write(total_info_pshift, "Num sites with partial shift in info.");
4300 
4301     // Start and End task counts...
4302     for (int i = 0; i < start_task_counts.GetSize(); i++) {
4303       df.Write(start_task_counts[i], cStringUtil::Stringf("Start task %d", i));
4304     }
4305 
4306     for (int i = 0; i < end_task_counts.GetSize(); i++) {
4307       df.Write(end_task_counts[i], cStringUtil::Stringf("End task %d", i));
4308     }
4309 
4310     df.Write(start_fitness, "Fitness of initial organism.");
4311     df.Write(end_fitness, "Fitness of mutant.");
4312     df.Write(start_length, "Length of initial organism.");
4313     df.Write(end_length, "Length of mutant.");
4314     df.Write(start_gest, "Gestation Time of initial organism.");
4315     df.Write(end_gest, "Gestation Time of mutant.");
4316     df.Write(genotype->GetID(), "ID of initial genotype.");
4317     df.Endl();
4318   }
4319 }
4320 
AnalyzeKnockouts(cString cur_string)4321 void cAnalyze::AnalyzeKnockouts(cString cur_string)
4322 {
4323   cout << "Analyzing the effects of knockouts..." << endl;
4324 
4325   cString filename = "knockouts.dat";
4326   if (cur_string.GetSize() > 0) filename = cur_string.PopWord();
4327 
4328   int max_knockouts = 1;
4329   if (cur_string.GetSize() > 0) max_knockouts = cur_string.PopWord().AsInt();
4330 
4331   // Open up the data file...
4332   cDataFile & df = m_world->GetDataFile(filename);
4333   df.WriteComment( "Analysis of knockouts in genomes" );
4334   df.WriteTimeStamp();
4335 
4336 
4337   // Loop through all of the genotypes in this batch...
4338   tListIterator<cAnalyzeGenotype> batch_it(batch[cur_batch].List());
4339   cAnalyzeGenotype * genotype = NULL;
4340   while ((genotype = batch_it.Next()) != NULL) {
4341     if (m_world->GetVerbosity() >= VERBOSE_ON) cout << "  Knockout: " << genotype->GetName() << endl;
4342 
4343     // Calculate the stats for the genotype we're working with...
4344     genotype->Recalculate(m_ctx);
4345     const double base_fitness = genotype->GetFitness();
4346 
4347     const int max_line = genotype->GetLength();
4348     const Genome& base_genome = genotype->GetGenome();
4349     const Sequence& base_seq = base_genome.GetSequence();
4350     Genome mod_genome(base_genome);
4351     Sequence& seq = mod_genome.GetSequence();
4352     cInstruction null_inst = m_world->GetHardwareManager().GetInstSet(base_genome.GetInstSet()).ActivateNullInst();
4353 
4354     // Loop through all the lines of code, testing the removal of each.
4355     // -2=lethal, -1=detrimental, 0=neutral, 1=beneficial
4356     int dead_count = 0;
4357     int neg_count = 0;
4358     int neut_count = 0;
4359     int pos_count = 0;
4360     tArray<int> ko_effect(max_line);
4361     for (int line_num = 0; line_num < max_line; line_num++) {
4362       // Save a copy of the current instruction and replace it with "NULL"
4363       int cur_inst = base_seq[line_num].GetOp();
4364       seq[line_num] = null_inst;
4365       cAnalyzeGenotype ko_genotype(m_world, mod_genome);
4366       ko_genotype.Recalculate(m_ctx);
4367 
4368       double ko_fitness = ko_genotype.GetFitness();
4369       if (ko_fitness == 0.0) {
4370         dead_count++;
4371         ko_effect[line_num] = -2;
4372       } else if (ko_fitness < base_fitness) {
4373         neg_count++;
4374         ko_effect[line_num] = -1;
4375       } else if (ko_fitness == base_fitness) {
4376         neut_count++;
4377         ko_effect[line_num] = 0;
4378       } else if (ko_fitness > base_fitness) {
4379         pos_count++;
4380         ko_effect[line_num] = 1;
4381       } else {
4382         cerr << "ERROR: illegal state in AnalyzeKnockouts()" << endl;
4383       }
4384 
4385       // Reset the mod_genome back to the original sequence.
4386       seq[line_num].SetOp(cur_inst);
4387     }
4388 
4389     tArray<int> ko_pair_effect(ko_effect);
4390     if (max_knockouts > 1) {
4391       for (int line1 = 0; line1 < max_line; line1++) {
4392       	for (int line2 = line1+1; line2 < max_line; line2++) {
4393           int cur_inst1 = base_seq[line1].GetOp();
4394           int cur_inst2 = base_seq[line2].GetOp();
4395           seq[line1] = null_inst;
4396           seq[line2] = null_inst;
4397           cAnalyzeGenotype ko_genotype(m_world, mod_genome);
4398           ko_genotype.Recalculate(m_ctx);
4399 
4400           double ko_fitness = ko_genotype.GetFitness();
4401 
4402           // If both individual knockouts are both harmful, but in combination
4403           // they are neutral or even beneficial, they should not count as
4404           // information.
4405           if (ko_fitness >= base_fitness &&
4406               ko_effect[line1] < 0 && ko_effect[line2] < 0) {
4407             ko_pair_effect[line1] = 0;
4408             ko_pair_effect[line2] = 0;
4409           }
4410 
4411           // If the individual knockouts are both neutral (or beneficial?),
4412           // but in combination they are harmful, they are likely redundant
4413           // to each other.  For now, count them both as information.
4414           if (ko_fitness < base_fitness &&
4415               ko_effect[line1] >= 0 && ko_effect[line2] >= 0) {
4416             ko_pair_effect[line1] = -1;
4417             ko_pair_effect[line2] = -1;
4418           }
4419 
4420           // Reset the mod_genome back to the original sequence.
4421           seq[line1].SetOp(cur_inst1);
4422           seq[line2].SetOp(cur_inst2);
4423         }
4424       }
4425     }
4426 
4427     int pair_dead_count = 0;
4428     int pair_neg_count = 0;
4429     int pair_neut_count = 0;
4430     int pair_pos_count = 0;
4431     for (int i = 0; i < max_line; i++) {
4432       if (ko_pair_effect[i] == -2) pair_dead_count++;
4433       else if (ko_pair_effect[i] == -1) pair_neg_count++;
4434       else if (ko_pair_effect[i] == 0) pair_neut_count++;
4435       else if (ko_pair_effect[i] == 1) pair_pos_count++;
4436     }
4437 
4438     // Output data...
4439     df.Write(genotype->GetID(), "Genotype ID");
4440     df.Write(dead_count, "Count of lethal knockouts");
4441     df.Write(neg_count,  "Count of detrimental knockouts");
4442     df.Write(neut_count, "Count of neutral knockouts");
4443     df.Write(pos_count,  "Count of beneficial knockouts");
4444     df.Write(pair_dead_count, "Count of lethal knockouts after paired knockout tests.");
4445     df.Write(pair_neg_count,  "Count of detrimental knockouts after paired knockout tests.");
4446     df.Write(pair_neut_count, "Count of neutral knockouts after paired knockout tests.");
4447     df.Write(pair_pos_count,  "Count of beneficial knockouts after paired knockout tests.");
4448     df.Endl();
4449   }
4450 }
4451 
4452 
CommandMapTasks(cString cur_string)4453 void cAnalyze::CommandMapTasks(cString cur_string)
4454 {
4455   cString msg;  //Use if to construct any messages to send to driver
4456 
4457   m_world->GetDriver().NotifyComment("Constructing genotype-phenotype maps");
4458 
4459   // Load in the variables / default them
4460   cString directory         = PopDirectory(cur_string.PopWord(), "phenotype/");
4461   int     print_mode        = 0;   // 0=Normal, 1=Boolean results
4462   int     file_type         = FILE_TYPE_TEXT;
4463   bool    use_manual_inputs = false;  // Should we use manual inputs?
4464 
4465   // HTML special flags...
4466   bool link_maps = false;  // Should muliple maps be linked together?
4467   bool link_insts = false; // Should links be made to instruction descs?
4468 
4469   // Collect any other format information needed...
4470   tList< tDataEntryCommand<cAnalyzeGenotype> > output_list;
4471   tListIterator< tDataEntryCommand<cAnalyzeGenotype> > output_it(output_list);
4472   tArray<int> manual_inputs;
4473 
4474   cStringList arg_list(cur_string);
4475 
4476   msg.Set("Found %d args.", arg_list.GetSize());
4477   m_world->GetDriver().NotifyComment(msg);
4478 
4479   int use_resources = 0;
4480 
4481   // Check for some command specific variables, removing them from the list if found.
4482   if (arg_list.PopString("0") != "")                 print_mode = 0;
4483   if (arg_list.PopString("1") != "")                 print_mode = 1;
4484   if (arg_list.PopString("text") != "")              file_type = FILE_TYPE_TEXT;
4485   if (arg_list.PopString("html") != "")              file_type = FILE_TYPE_HTML;
4486   if (arg_list.PopString("link_maps") != "")         link_maps = true;
4487   if (arg_list.PopString("link_insts") != "")        link_insts = true;
4488   if (arg_list.PopString("use_resources=2") != "")   use_resources = 2;
4489   if (arg_list.HasString("use_manual_inputs"))       use_manual_inputs = true;
4490 
4491   if (use_manual_inputs){
4492     int pos = arg_list.LocateString("use_manual_inputs");
4493     arg_list.PopString("use_manual_inputs");
4494     manual_inputs.Resize(m_world->GetEnvironment().GetInputSize());
4495     if (arg_list.GetSize() >= pos + m_world->GetEnvironment().GetInputSize() - 1)
4496       for (int k = 0; k < m_world->GetEnvironment().GetInputSize(); k++)
4497         manual_inputs[k] = arg_list.PopLine(pos).AsInt();
4498     else
4499       m_world->GetDriver().RaiseFatalException(1, "CommandMapTask: Invalid use of use_manual_inputs");
4500   }
4501 
4502   msg.Set("There are %d column args.", arg_list.GetSize());
4503   m_world->GetDriver().NotifyComment(msg);
4504 
4505   cAnalyzeGenotype::GetDataCommandManager().LoadCommandList(arg_list, output_list);
4506 
4507   m_world->GetDriver().NotifyComment("Args are loaded.");
4508 
4509   const int num_cols = output_list.GetSize();
4510 
4511 
4512   // Give some information in verbose mode.
4513   if (m_world->GetVerbosity() >= VERBOSE_ON) {
4514     cout << "  outputing as ";
4515     if (print_mode == 1) cout << "boolean ";
4516     if (file_type == FILE_TYPE_TEXT) {
4517       cout << "text files." << endl;
4518     } else { // if (file_type == FILE_TYPE_HTML) {
4519       cout << "HTML files";
4520       if (link_maps == true) cout << "; linking files together";
4521       if (link_maps == true) cout << "; linking inst names to descs";
4522       cout << "." << endl;
4523     }
4524     cout << "  Format: ";
4525 
4526     output_it.Reset();
4527     while (output_it.Next() != NULL) {
4528       cout << output_it.Get()->GetName() << " ";
4529     }
4530     cout << endl;
4531   }
4532 
4533 
4534   ///////////////////////////////////////////////////////
4535   // Loop through all of the genotypes in this batch...
4536 
4537   tListIterator<cAnalyzeGenotype> batch_it(batch[cur_batch].List());
4538   cAnalyzeGenotype * genotype = NULL;
4539   while ((genotype = batch_it.Next()) != NULL) {
4540     if (m_world->GetVerbosity() >= VERBOSE_ON) cout << "  Mapping " << genotype->GetName() << endl;
4541 
4542     // Construct this filename...
4543     cString filename;
4544     if (file_type == FILE_TYPE_TEXT) {
4545       filename.Set("%stasksites.%s.dat", static_cast<const char*>(directory), static_cast<const char*>(genotype->GetName()));
4546     } else {   //  if (file_type == FILE_TYPE_HTML) {
4547       filename.Set("%stasksites.%s.html", static_cast<const char*>(directory), static_cast<const char*>(genotype->GetName()));
4548     }
4549     ofstream& fp = m_world->GetDataFileOFStream(filename);
4550 
4551     // Construct linked filenames...
4552     cString next_file("");
4553     cString prev_file("");
4554     if (link_maps == true) {
4555       // Check the next genotype on the list...
4556       if (batch_it.Next() != NULL) {
4557         next_file.Set("tasksites.%s.html", static_cast<const char*>(batch_it.Get()->GetName()));
4558       }
4559       batch_it.Prev();  // Put the list back where it was...
4560 
4561       // Check the previous genotype on the list...
4562       if (batch_it.Prev() != NULL) {
4563         prev_file.Set("tasksites.%s.html", static_cast<const char*>(batch_it.Get()->GetName()));
4564       }
4565       batch_it.Next();  // Put the list back where it was...
4566     }
4567 
4568     // Calculate the stats for the genotype we're working with...
4569     cCPUTestInfo test_info;
4570     if (use_manual_inputs)
4571       test_info.UseManualInputs(manual_inputs);
4572     test_info.SetResourceOptions(use_resources, m_resources);
4573     genotype->Recalculate(m_ctx, &test_info);
4574 
4575     // Headers...
4576     if (file_type == FILE_TYPE_TEXT) {
4577       fp << "-1 "  << batch[cur_batch].Name() << " "
4578       << genotype->GetID() << " ";
4579 
4580       tDataEntryCommand<cAnalyzeGenotype> * data_command = NULL;
4581       while ((data_command = output_it.Next()) != NULL) {
4582         fp << data_command->GetValue(genotype) << " ";
4583       }
4584       fp << endl;
4585 
4586     } else { // if (file_type == FILE_TYPE_HTML) {
4587       // Mark file as html
4588       fp << "<html>" << endl;
4589 
4590       // Setup any javascript macros needed...
4591       fp << "<head>" << endl;
4592       if (link_insts == true) {
4593         fp << "<script language=\"javascript\">" << endl
4594         << "function Inst(inst_name)" << endl
4595         << "{" << endl
4596         << "var filename = \"help.\" + inst_name + \".html\";" << endl
4597         << "newwin = window.open(filename, 'Instruction', "
4598         << "'toolbar=0,status=0,location=0,directories=0,menubar=0,"
4599         << "scrollbars=1,height=150,width=300');" << endl
4600         << "newwin.focus();" << endl
4601         << "}" << endl
4602         << "</script>" << endl;
4603       }
4604       fp << "</head>" << endl;
4605 
4606       // Setup the body...
4607       fp << "<body>" << endl
4608       << "<div align=\"center\">" << endl
4609       << "<h1 align=\"center\">Run " << batch[cur_batch].Name() << ", ID " << genotype->GetID() << "</h1>" << endl
4610       << endl;
4611 
4612       // Links?
4613       fp << "<table width=90%><tr><td align=left>";
4614       if (prev_file != "") fp << "<a href=\"" << prev_file << "\">Prev</a>";
4615       else fp << "&nbsp;";
4616       fp << "<td align=right>";
4617       if (next_file != "") fp << "<a href=\"" << next_file << "\">Next</a>";
4618       else fp << "&nbsp;";
4619       fp << "</tr></table>" << endl;
4620 
4621       // The table
4622       fp << "<table border=1 cellpadding=2>" << endl;
4623 
4624       // The headings...///
4625       fp << "<tr><td colspan=3> ";
4626       output_it.Reset();
4627       while (output_it.Next() != NULL) {
4628         fp << "<th>" << output_it.Get()->GetDesc(genotype) << " ";
4629       }
4630       fp << "</tr>" << endl;
4631 
4632       // The base creature...
4633       fp << "<tr><th colspan=3>Base Creature";
4634       tDataEntryCommand<cAnalyzeGenotype> * data_command = NULL;
4635       const cInstSet& is = m_world->GetHardwareManager().GetDefaultInstSet();
4636       Genome null_genome(is.GetHardwareType(), is.GetInstSetName(), Sequence(1));
4637       cAnalyzeGenotype null_genotype(m_world, null_genome);
4638       while ((data_command = output_it.Next()) != NULL) {
4639         const cFlexVar cur_value = data_command->GetValue(genotype);
4640         const cFlexVar null_value = data_command->GetValue(&null_genotype);
4641         int compare = CompareFlexStat(cur_value, null_value, data_command->GetCompareType());
4642         if (compare > 0) {
4643           fp << "<th bgcolor=\"#" << m_world->GetConfig().COLOR_MUT_POS.Get() << "\">";
4644         }
4645         else  fp << "<th bgcolor=\"#" << m_world->GetConfig().COLOR_MUT_LETHAL.Get() << "\">";
4646 
4647         if (data_command->HasArg("blank") == true) fp << "&nbsp;" << " ";
4648         else fp << cur_value << " ";
4649       }
4650       fp << "</tr>" << endl;
4651     }
4652 
4653 
4654     const int max_line = genotype->GetLength();
4655     const Genome& base_genome = genotype->GetGenome();
4656     const Sequence& base_seq = base_genome.GetSequence();
4657     Genome mod_genome(base_genome);
4658     Sequence& seq = mod_genome.GetSequence();
4659 
4660     // Keep track of the number of failues/successes for attributes...
4661     int * col_pass_count = new int[num_cols];
4662     int * col_fail_count = new int[num_cols];
4663     for (int i = 0; i < num_cols; i++) {
4664       col_pass_count[i] = 0;
4665       col_fail_count[i] = 0;
4666     }
4667 
4668     cInstSet& is = m_world->GetHardwareManager().GetInstSet(base_genome.GetInstSet());
4669     const cInstruction null_inst = is.ActivateNullInst();
4670 
4671     // Loop through all the lines of code, testing the removal of each.
4672     for (int line_num = 0; line_num < max_line; line_num++) {
4673       int cur_inst = base_seq[line_num].GetOp();
4674       char cur_symbol = base_seq[line_num].GetSymbol();
4675 
4676       seq[line_num] = null_inst;
4677       cAnalyzeGenotype test_genotype(m_world, mod_genome);
4678       test_genotype.Recalculate(m_ctx, &test_info);
4679 
4680       if (file_type == FILE_TYPE_HTML) fp << "<tr><td align=right>";
4681       fp << (line_num + 1) << " ";
4682       if (file_type == FILE_TYPE_HTML) fp << "<td align=center>";
4683       fp << cur_symbol << " ";
4684       if (file_type == FILE_TYPE_HTML) fp << "<td align=center>";
4685       if (link_insts == true) {
4686         fp << "<a href=\"javascript:Inst('"
4687         << is.GetName(cur_inst)
4688         << "')\">";
4689       }
4690       fp << is.GetName(cur_inst) << " ";
4691       if (link_insts == true) fp << "</a>";
4692 
4693 
4694       // Print the individual columns...
4695       output_it.Reset();
4696       tDataEntryCommand<cAnalyzeGenotype>* data_command = NULL;
4697       int cur_col = 0;
4698       while ((data_command = output_it.Next()) != NULL) {
4699         const cFlexVar test_value = data_command->GetValue(&test_genotype);
4700         int compare = CompareFlexStat(test_value, data_command->GetValue(genotype), data_command->GetCompareType());
4701 
4702         if (file_type == FILE_TYPE_HTML) {
4703           HTMLPrintStat(test_value, fp, compare, data_command->GetHtmlCellFlags(), data_command->GetNull(),
4704                         !(data_command->HasArg("blank")));
4705         }
4706         else fp << test_value << " ";
4707 
4708         if (compare == -2) col_fail_count[cur_col]++;
4709         else if (compare == 2) col_pass_count[cur_col]++;
4710         cur_col++;
4711       }
4712       if (file_type == FILE_TYPE_HTML) fp << "</tr>";
4713       fp << endl;
4714 
4715       // Reset the mod_genome back to the original sequence.
4716       seq[line_num].SetOp(cur_inst);
4717     }
4718 
4719 
4720     // Construct the final line of the table with all totals...
4721     if (file_type == FILE_TYPE_HTML) {
4722       fp << "<tr><th colspan=3>Totals";
4723 
4724       for (int i = 0; i < num_cols; i++) {
4725         if (col_pass_count[i] > 0) {
4726           fp << "<th bgcolor=\"#" << m_world->GetConfig().COLOR_MUT_POS.Get() << "\">" << col_pass_count[i];
4727         }
4728         else if (col_fail_count[i] > 0) {
4729           fp << "<th bgcolor=\"#" << m_world->GetConfig().COLOR_MUT_LETHAL.Get() << "\">" << col_fail_count[i];
4730         }
4731         else fp << "<th>0";
4732       }
4733       fp << "</tr>" << endl;
4734 
4735       // And close everything up...
4736       fp << "</table>" << endl
4737       << "</div>" << endl;
4738     }
4739 
4740     delete [] col_pass_count;
4741     delete [] col_fail_count;
4742     m_world->GetDataFileManager().Remove(filename);  // Close the data file object
4743   }
4744 }
4745 
CommandCalcFunctionalModularity(cString cur_string)4746 void cAnalyze::CommandCalcFunctionalModularity(cString cur_string)
4747 {
4748   cout << "Calculating Functional Modularity..." << endl;
4749 
4750   cCPUTestInfo test_info;
4751   PopCommonCPUTestParameters(m_world, cur_string, test_info, m_resources, m_resource_time_spent_offset);
4752 
4753   tList<cModularityAnalysis> mod_list;
4754   tAnalyzeJobBatch<cModularityAnalysis> jobbatch(m_jobqueue);
4755   tListIterator<cAnalyzeGenotype> batch_it(batch[cur_batch].List());
4756   for (cAnalyzeGenotype* cur_genotype = batch_it.Next(); cur_genotype; cur_genotype = batch_it.Next()) {
4757     cModularityAnalysis* mod = new cModularityAnalysis(cur_genotype, test_info);
4758     mod_list.Push(mod);
4759     jobbatch.AddJob(mod, &cModularityAnalysis::CalcFunctionalModularity);
4760   }
4761   jobbatch.RunBatch();
4762   cModularityAnalysis* mod = NULL;
4763   while ((mod = mod_list.Pop())) delete mod;
4764 }
4765 
CommandAverageModularity(cString cur_string)4766 void cAnalyze::CommandAverageModularity(cString cur_string)
4767 {
4768   cout << "Average Modularity calculations" << endl;
4769 
4770   // Load in the variables...
4771   cString filename = cur_string.PopWord();
4772 
4773   int print_mode = 0;   // 0=Normal, 1=Boolean results
4774 
4775   // Collect any other format information needed...
4776   tList< tDataEntryCommand<cAnalyzeGenotype> > output_list;
4777   tListIterator< tDataEntryCommand<cAnalyzeGenotype> > output_it(output_list);
4778 
4779   cStringList arg_list(cur_string);
4780 
4781   cout << "Found " << arg_list.GetSize() << " args." << endl;
4782 
4783   // Check for some command specific variables.
4784   if (arg_list.PopString("0") != "") print_mode = 0;
4785   if (arg_list.PopString("1") != "") print_mode = 1;
4786 
4787   cout << "There are " << arg_list.GetSize() << " column args." << endl;
4788 
4789   cAnalyzeGenotype::GetDataCommandManager().LoadCommandList(arg_list, output_list);
4790 
4791   cout << "Args are loaded." << endl;
4792 
4793   const int num_cols = output_list.GetSize();
4794 
4795   // Give some information in verbose mode.
4796   if (m_world->GetVerbosity() >= VERBOSE_ON) {
4797     cout << "  outputing as ";
4798     if (print_mode == 1) cout << "boolean ";
4799     cout << "text files." << endl;
4800     cout << "  Format: ";
4801 
4802     output_it.Reset();
4803     while (output_it.Next() != NULL) {
4804       cout << output_it.Get()->GetName() << " ";
4805     }
4806     cout << endl;
4807   }
4808 
4809   ofstream& fp = m_world->GetDataFileOFStream(filename);
4810 
4811   // printing the headers
4812   // not done by default since many dumps may be analyzed at the same time
4813   // and results would be put in the same file
4814   if (arg_list.GetSize()==0) {
4815     // Headers
4816     fp << "# Avida analyze modularity data" << endl;
4817     fp << "# 1: organism length" << endl;
4818     fp << "# 2: number of tasks done" << endl;
4819     fp << "# 3: number of sites used in tasks" << endl;
4820     fp << "# 4: proportion of sites used in tasks" << endl;
4821     fp << "# 5: average number of tasks done per site" << endl;
4822     fp << "# 6: average number sites per task done" << endl;
4823     fp << "# 7: average number tasks per site per task" << endl;
4824     fp << "# 8: average proportion of the non-overlaping region of a task" << endl;
4825     fp << "# 9-17: average StDev in positions used for task 1-9" << endl;
4826     fp << "# 18-26: average number of sites necessary for each of the tasks" << endl;
4827     fp << "# 27-36: number of sites involved in 0-9 tasks" << endl;
4828     fp << "# 37-45: average task length (distance from first to last inst used)" << endl;
4829     fp << endl;
4830     return;
4831   }
4832 
4833   // initialize various variables used in calculations
4834 
4835   int num_orgs = 0;		// number of organisms in the dump
4836 
4837   double  av_length = 0; 	// average organism length
4838   double  av_task = 0; 	// average # of tasks done
4839   double  av_inst = 0; 	// average # instructions used in tasks
4840   double  av_inst_len = 0; 	// proportion of sites used for tasks
4841   double  av_site_task = 0; 	// average number of sites per task
4842   double  av_task_site = 0;   // average number of tasks per site
4843   double  av_t_s_norm = 0;	// average number of tasks per site per task
4844   double  av_task_overlap = 0; // average overlap between tasks
4845 
4846   // average StDev in positions used for a task
4847   tArray<double> std_task_position(num_cols);
4848   std_task_position.SetAll(0.0);
4849 
4850   // # of organisms actually doing a task
4851   tArray<double> org_task(num_cols);
4852   org_task.SetAll(0.0);
4853 
4854   // av. # of sites necessary for each of the tasks
4855   tArray<double> av_num_inst(num_cols);
4856   av_num_inst.SetAll(0.0);
4857 
4858   // number of sites involved in 0-9 tasks
4859   tArray<double> av_inst_task(num_cols+1);
4860   av_inst_task.SetAll(0.0);
4861 
4862   // av. # task length (distance from first to last site used)
4863   tArray<double> av_task_length(num_cols);
4864   av_task_length.SetAll(0.0);
4865 
4866 
4867   ///////////////////////////////////////////////////////
4868   // Loop through all of the genotypes in this batch...
4869   ///////////////////////////////////////////////////////
4870 
4871   tListIterator<cAnalyzeGenotype> batch_it(batch[cur_batch].List());
4872   cAnalyzeGenotype* genotype = NULL;
4873 
4874   // would like to test oly the viable ones, but they can be non-viable
4875   // and still reproduce and do tasks
4876   // while ((genotype = batch_it.Next()) != NULL && genotype->GetViable()) {
4877   while ((genotype = batch_it.Next()) != NULL) {
4878 
4879     int num_cpus = genotype->GetNumCPUs();
4880 
4881     if (m_world->GetVerbosity() >= VERBOSE_ON) cout << "  Mapping " << genotype->GetName() << endl;
4882     cout.flush();
4883 
4884     // Calculate the stats for the genotype we're working with...
4885     genotype->Recalculate(m_ctx);
4886 
4887     // Check if the organism does any tasks.
4888     bool does_tasks = false;
4889     for (int i = 0; i < num_cols; i++) {
4890       if (genotype->GetTaskCount(i) > 0)  {
4891         does_tasks = true;
4892         break;
4893       }
4894     }
4895 
4896     // Don't calculate the modularity if the organism doesn't reproduce
4897     // i.e. if the fitness is 0
4898     if (genotype->GetFitness() > 0.0 && does_tasks) {
4899       num_orgs = num_orgs + num_cpus;
4900 
4901       const int max_line = genotype->GetLength();
4902       const Genome& base_genome = genotype->GetGenome();
4903       const Sequence& base_seq = base_genome.GetSequence();
4904       Genome mod_genome(base_genome);
4905       Sequence& seq = mod_genome.GetSequence();
4906       cInstruction null_inst = m_world->GetHardwareManager().GetInstSet(base_genome.GetInstSet()).ActivateNullInst();
4907 
4908       // Create and initialize the modularity matrix
4909       tMatrix<int> mod_matrix(num_cols, max_line);
4910       mod_matrix.SetAll(0);
4911 
4912       // Create and initialize the task overalp matrix
4913       tMatrix<int> task_overlap(num_cols, num_cols);
4914       task_overlap.SetAll(0);
4915 
4916       // Create an initialize the counters for modularity
4917       tArray<int> num_task(max_line); // number of tasks instruction is used in
4918       tArray<int> num_inst(num_cols); // number of instructions involved in a task
4919       tArray<int> sum(num_cols); 	    // helps with StDev calculations
4920       tArray<int> sumsq(num_cols);    // helps with StDev calculations
4921       tArray<int> inst_task(num_cols+1); // # of inst's involved in 0,1,2,3... tasks
4922       tArray<int> task_length(num_cols);    // ditance between first and last inst involved in a task
4923 
4924       num_task.SetAll(0);
4925       num_inst.SetAll(0);
4926       sum.SetAll(0);
4927       sumsq.SetAll(0);
4928       inst_task.SetAll(0);
4929       task_length.SetAll(0);
4930 
4931       int total_task = 0;        // total number of tasks done
4932       int total_inst = 0;        // total number of instructions involved in tasks
4933       int total_all = 0;         // sum of mod_matrix
4934       double sum_task_overlap = 0;// task overlap for for this geneome
4935 
4936       // Loop through all the lines of code, testing the removal of each.
4937       for (int line_num = 0; line_num < max_line; line_num++) {
4938         int cur_inst = base_seq[line_num].GetOp();
4939 
4940         seq[line_num] = null_inst;
4941         cAnalyzeGenotype test_genotype(m_world, mod_genome);
4942         test_genotype.Recalculate(m_ctx);
4943 
4944         // Print the individual columns...
4945         output_it.Reset();
4946         tDataEntryCommand<cAnalyzeGenotype> * data_command = NULL;
4947         int cur_col = 0;
4948         while ((data_command = output_it.Next()) != NULL) {
4949           const cFlexVar test_value = data_command->GetValue(&test_genotype);
4950 
4951           // This is done so that under 'binary' option it marks
4952           // the task as being influenced by the mutation iff
4953           // it is completely knocked out, not just decreased
4954 
4955           int compare_type = data_command->GetCompareType();
4956           int compare = CompareFlexStat(test_value, data_command->GetValue(genotype), compare_type);
4957 
4958           // If knocking out an instruction stops the expression of a
4959           // particular task, mark that in the modularity matrix
4960           // and add it to two counts
4961           // Only do the checking if the test_genotype replicate, i.e.
4962           // if it's fitness is not zeros
4963 
4964           if (compare < 0  && test_genotype.GetFitness() != 0) {
4965             mod_matrix(cur_col,line_num) = 1;
4966             num_inst[cur_col]++;
4967             num_task[line_num]++;
4968           }
4969           cur_col++;
4970         }
4971 
4972         // Reset the mod_genome back to the original sequence.
4973         seq[line_num].SetOp(cur_inst);
4974       } // end of genotype-phenotype mapping for a single organism
4975 
4976       for (int i = 0; i < num_cols; i++) if (num_inst[i] != 0) total_task++;
4977       for (int i = 0; i < max_line; i++) if (num_task[i] != 0) total_inst++;
4978       for (int i = 0; i < num_cols; i++) total_all = total_all + num_inst[i];
4979 
4980       // Add the values to the av_ variables, used for calculating the average
4981       // in order to weigh them by abundance, multiply everything by num_cpus
4982 
4983       av_length = av_length + max_line*num_cpus;
4984       av_task = av_task + total_task*num_cpus;
4985       av_inst = av_inst + total_inst*num_cpus;
4986       av_inst_len = av_inst_len + (double) total_inst*num_cpus/max_line;
4987 
4988       if (total_task !=0)  av_site_task = av_site_task + num_cpus * (double) total_all/total_task;
4989       if (total_inst !=0)  av_task_site = av_task_site + num_cpus * (double) total_all/total_inst;
4990       if (total_inst !=0 && total_task !=0) {
4991         av_t_s_norm = av_t_s_norm + num_cpus * (double) total_all/(total_inst*total_task);
4992       }
4993 
4994       for (int i = 0; i < num_cols; i++) {
4995         if (num_inst[i] > 0) {
4996           av_num_inst[i] = av_num_inst[i] + num_inst[i] * num_cpus;
4997           org_task[i] = org_task[i] + num_cpus;   // count how many are actually doing the task
4998         }
4999       }
5000 
5001       // calculate average task overlap
5002       // first construct num_task x num_task matrix with number of sites overlapping
5003       for (int i = 0; i < max_line; i++) {
5004         for (int j = 0; j < num_cols; j++) {
5005           for (int k = j; k < num_cols; k++) {
5006             if (mod_matrix(j,i)>0 && mod_matrix(k,i)>0) {
5007               task_overlap(j,k)++;
5008               if (j!=k) task_overlap(k,j)++;
5009             }
5010           }
5011         }
5012       }
5013 
5014       // go though the task_overlap matrix, add and average everything up.
5015       if (total_task > 1) {
5016         for (int i = 0; i < num_cols; i++) {
5017           double overlap_per_task = 0;
5018           for (int j = 0; j < num_cols; j++) {
5019             if (i!=j) {overlap_per_task = overlap_per_task + task_overlap(i,j);}
5020           }
5021           if (task_overlap(i,i) !=0){
5022             sum_task_overlap = sum_task_overlap + overlap_per_task / (task_overlap(i,i) * (total_task-1));
5023           }
5024         }
5025       }
5026 
5027       // now, divide that by number of tasks done and add to the grand sum, weigthed by num_cpus
5028       if (total_task!=0) {
5029         av_task_overlap = av_task_overlap + num_cpus * (double) sum_task_overlap/total_task ;
5030       }
5031       // calculate the first/last postion of a task, the task "spread"
5032       // starting from the top look for the fist command that matters for a task
5033 
5034       for (int i = 0; i < num_cols; i++) {
5035         int j = 0;
5036         while (j < max_line) {
5037           if (mod_matrix(i,j) > 0 && task_length[i] == 0 ) {
5038             task_length[i] = j;
5039             break;
5040           }
5041           j++;
5042         }
5043       }
5044 
5045       // starting frm the bottom look for the last command that matters for a task
5046       // and substract it from the first to get the task length
5047       // add one in order to account for both the beginning and the end instruction
5048       for (int i = 0; i < num_cols; i++) {
5049         int j = max_line - 1;
5050         while (j > -1) {
5051           if (mod_matrix(i,j) > 0) {
5052             task_length[i] = j - task_length[i] + 1;
5053             break;
5054           }
5055           j--;
5056         }
5057       }
5058       // add the task lengths to the average for the batch
5059       // weigthed by the number of cpus for that genotype
5060       for (int i = 0; i < num_cols; i++) {
5061         av_task_length[i] = av_task_length[i] +  num_cpus * task_length[i];
5062       }
5063 
5064       // calculate the Standard Deviation in the mean position of the task
5065       for (int i = 0; i < num_cols; i++) {
5066         for (int j = 0; j < max_line; j++) {
5067           if (mod_matrix(i,j)>0) sum[i] = sum[i] + j;
5068         }
5069       }
5070 
5071       double temp = 0;
5072       for (int i = 0; i < num_cols; i++) {
5073         if (num_inst[i]>1) {
5074           double av_sum = sum[i]/num_inst[i];
5075           for (int j = 0; j < max_line; j++) {
5076             if (mod_matrix(i,j)>0) temp = (av_sum - j)*(av_sum - j);
5077           }
5078           std_task_position[i] = std_task_position[i] + sqrt(temp/(num_inst[i]-1))*num_cpus;
5079         }
5080       }
5081 
5082       for (int i = 0; i < max_line; i++) { inst_task[num_task[i]]++ ;}
5083       for (int i = 0; i < num_cols+1; i++) { av_inst_task[i] = av_inst_task[i] + inst_task[i] * num_cpus;}
5084 
5085     }
5086   }  // this is the end of the loop going though all the organisms
5087 
5088   // make sure there are some organisms doing task in this batch
5089   // if not, return all zeros
5090 
5091   if (num_orgs != 0) {
5092     fp << (double) av_length/num_orgs  << " ";  	// 1: average length
5093     fp << (double) av_task/num_orgs << " ";		// 2: av. number of tasks done
5094     fp << (double) av_inst/num_orgs << " ";		// 3: av. number of sites used for tasks
5095     fp << (double) av_inst_len/num_orgs << " ";		// 4: proportion of sites used for tasks
5096     fp << (double) av_task_site/num_orgs << " ";	// 5: av. number of tasks per site
5097     fp << (double) av_site_task/num_orgs << " ";	// 6: av. number of sites per task
5098     fp << (double) av_t_s_norm/num_orgs << " ";		// 7: av. number of tasks per site per task
5099     fp << (double) 1 - av_task_overlap/num_orgs << " ";        // 8: av. proportion of a task that DOESN'T overlap
5100     for (int i = 0; i < num_cols; i++) {
5101       if (org_task[i] > 0) fp << std_task_position[i]/org_task[i]  << " ";
5102       else fp << 0 << " ";
5103     }
5104     for (int i = 0; i < num_cols; i++) {
5105       if (org_task[i] > 0) fp << (double) av_num_inst[i]/org_task[i]  << " ";
5106       else fp << 0 << " ";
5107     }
5108     for (int i = 0; i < num_cols+1; i++) { fp << (double) av_inst_task[i]/num_orgs  << " ";}
5109     for (int i = 0; i < num_cols; i++) { fp << (double) av_task_length[i]/num_orgs  << " ";}
5110     fp << endl;
5111   }
5112 
5113   else {
5114     for (int i = 0; i < 8+4*num_cols+1; i++) {fp << "0 ";}
5115     fp << endl;
5116   }
5117 }
5118 
5119 
CommandAnalyzeModularity(cString cur_string)5120 void cAnalyze::CommandAnalyzeModularity(cString cur_string)
5121 {
5122   cString filename("analyze_modularity.dat");
5123   if (cur_string.GetSize() != 0) filename = cur_string.PopWord();
5124 
5125   cDataFile & df = m_world->GetDataFile(filename);
5126   df.WriteComment( "Modularity Analysis" );
5127   df.WriteTimeStamp();
5128 
5129   // Determine which phenotypic traits we're working with
5130   tList< tDataEntryCommand<cAnalyzeGenotype> > output_list;
5131   tListIterator< tDataEntryCommand<cAnalyzeGenotype> > output_it(output_list);
5132   cStringList arg_list(cur_string);
5133   cAnalyzeGenotype::GetDataCommandManager().LoadCommandList(arg_list, output_list);
5134   const int num_traits = output_list.GetSize();
5135 
5136   // Loop through all genotypes in this batch.
5137   tListIterator<cAnalyzeGenotype> batch_it(batch[cur_batch].List());
5138   cAnalyzeGenotype * genotype = NULL;
5139   while ((genotype = batch_it.Next()) != NULL) {
5140     const int base_length = genotype->GetLength();
5141     const Genome& base_genome = genotype->GetGenome();
5142     const Sequence& base_seq = base_genome.GetSequence();
5143     Genome mod_genome(base_genome);
5144     Sequence& seq = mod_genome.GetSequence();
5145     genotype->Recalculate(m_ctx);
5146 
5147     const cInstruction null_inst = m_world->GetHardwareManager().GetInstSet(base_genome.GetInstSet()).ActivateNullInst();
5148 
5149     tMatrix<bool> task_matrix(num_traits, base_length);
5150     tArray<int> num_inst(num_traits);  // Number of instructions for each task
5151     tArray<int> num_task(base_length); // Number of traits at each locus
5152     task_matrix.SetAll(false);
5153     num_inst.SetAll(0);
5154     num_task.SetAll(0);
5155 
5156     // Loop through all lines in this genome
5157     for (int line_num = 0; line_num < base_length; line_num++) {
5158       int cur_inst = base_seq[line_num].GetOp();
5159 
5160       // Determine what happens to this genotype when this line is knocked out
5161       seq[line_num] = null_inst;
5162       cAnalyzeGenotype test_genotype(m_world, mod_genome);
5163       test_genotype.Recalculate(m_ctx);
5164 
5165       // Loop through the individual traits
5166       output_it.Reset();
5167       tDataEntryCommand<cAnalyzeGenotype> * data_command = NULL;
5168       int cur_trait = 0;
5169       while ((data_command = output_it.Next()) != NULL) {
5170         const cFlexVar test_value = data_command->GetValue(&test_genotype);
5171 
5172         int compare_type = data_command->GetCompareType();
5173         int compare = CompareFlexStat(test_value, data_command->GetValue(genotype), compare_type);
5174 
5175         // If knocking out the instruction turns off this trait, mark it in
5176         // the modularity matrix.  Only check if the test_genotype replicates,
5177         // i.e. if its fitness is not zeros
5178         if (compare < 0  && test_genotype.GetFitness() != 0) {
5179           task_matrix(cur_trait, line_num) = true;
5180           num_inst[cur_trait]++;
5181           num_task[line_num]++;
5182           // cout << line_num << " : true" << endl;
5183         } else {
5184           // cout << line_num << " : false" << endl;
5185         }
5186         cur_trait++;
5187       }
5188 
5189       // Reset the mod_genome back to the original sequence.
5190       seq[line_num].SetOp(cur_inst);
5191     } // end of genotype-phenotype mapping for a single organism
5192 
5193 
5194     // --== PHYSICAL MODULARITY ==--
5195 
5196     double ave_dist = 0.0;  // Average distance between sites in traits.
5197 
5198     // Loop through each task to calculate its physical modularity
5199     int trait_count = 0; // Count active traits...
5200     int site_count = 0;  // Count total sites for all traits...
5201     for (int cur_trait = 0; cur_trait < num_traits; cur_trait++) {
5202       //       cout << "Trait " << cur_trait << ", coded for by "
5203       //         << num_inst[cur_trait] << " instructions." << endl;
5204 
5205       // Ignore traits not coded for in this genome...
5206       if (num_inst[cur_trait] == 0) continue;
5207 
5208       // Keep track of how many traits we're examining...
5209       trait_count++;
5210 
5211       double trait_dist = 0.0;  // Total distance between sites in this trait.
5212       int num_samples = 0;      // Count samples we take for this trait.
5213 
5214       // Compare all pairs of positions.
5215       for (int pos1 = 0; pos1 < base_length; pos1++) {
5216         if (task_matrix(cur_trait, pos1) == false) continue;
5217         site_count++;
5218         for (int pos2 = pos1+1; pos2 < base_length; pos2++) {
5219           if (task_matrix(cur_trait, pos2) == false) continue;
5220 
5221           // We'll only make it this far if both positions code for the trait.
5222           num_samples++;
5223 
5224           // Calculate the distance...
5225           int cur_dist = pos2 - pos1;
5226 
5227           // Remember to consider that the genome is circular.
5228           if (2*cur_dist > base_length) cur_dist = base_length - cur_dist;
5229 
5230           //        cout << "Pos " << pos1 << " and " << pos2 << "; distance="
5231           //             << cur_dist << endl;
5232 
5233           // And add it into the total for this trait.
5234           trait_dist += cur_dist;
5235         }
5236       }
5237 
5238       // Assert that we found the correct number of samples.
5239       //assert(num_samples = num_inst(cur_trait) * (num_inst(cur_trait)-1) / 2);
5240 
5241       // Now that we have all of the distances for this trait, divide by the
5242       // number of samples and add it to the average.
5243       ave_dist += trait_dist / num_samples;
5244     }
5245 
5246 
5247     // Now that we've summed up all of the average distances for this
5248     // genotype, calculate the physical modularity.
5249 
5250     double PM = 1.0 - (ave_dist / (double) (base_length * trait_count));
5251     double ave_sites = ((double) site_count) / (double) trait_count;
5252 
5253     // Write the results to file...
5254     df.Write(PM,          "Physical Modularity");
5255     df.Write(trait_count, "Number of traits used in calculation");
5256     df.Write(ave_sites,   "Average num sites associated with traits");
5257     df.Write(base_length, "Genome length");
5258     df.Write(ave_dist,    "Average Distance between trait sites");
5259     df.Endl();
5260   }
5261 
5262   // @CAO CONTINUE HERE
5263 }
5264 
5265 
5266 // Determine redundancy by calculating the percentage of the lifetimes
5267 // where fitness is decreased over a range of instruction failure probabilities.
5268 // @JEB 9-24-2008
CommandAnalyzeRedundancyByInstFailure(cString cur_string)5269 void cAnalyze::CommandAnalyzeRedundancyByInstFailure(cString cur_string)
5270 {
5271   cout << "Analyzing redundancy by changing instruction failure probability..." << endl;
5272 
5273   cString filename("analyze_redundancy_by_inst_failure.dat");
5274   if (cur_string.GetSize() != 0) filename = cur_string.PopWord();
5275   int replicates = 1000;
5276   if (cur_string.GetSize() != 0) replicates = cur_string.PopWord().AsInt();
5277   double log10_start_pr_fail = -4;
5278 
5279   // add mode
5280   int mode = 0;
5281   // 0 = average log2 fitness
5282   // 1 = fitness decreased
5283 
5284   if (cur_string.GetSize() != 0) log10_start_pr_fail = cur_string.PopWord().AsDouble();
5285   double log10_end_pr_fail = 0;
5286   if (cur_string.GetSize() != 0) log10_end_pr_fail = cur_string.PopWord().AsDouble();
5287   if (log10_end_pr_fail > 0) {
5288     m_world->GetDriver().NotifyWarning("ANALYZE_REDUNDANCY_BY_INST_FAILURE: End log value greater than 0 set to 0.");
5289   }
5290   double log10_step_size_pr_fail = 0.1;
5291   if (cur_string.GetSize() != 0) log10_step_size_pr_fail = cur_string.PopWord().AsDouble();
5292 
5293   // Output is one line per organism in the current batch with columns.
5294   cDataFile & df = m_world->GetDataFile(filename);
5295   df.WriteComment( "Redundancy calculated by changing the probability of instruction failure" );
5296   cString s;
5297   s.Set("%i replicates at each chance of instruction failure", replicates);
5298   df.WriteComment(s);
5299   df.WriteTimeStamp();
5300 
5301   // Loop through all of the genotypes in this batch...
5302 
5303   tListIterator<cAnalyzeGenotype> batch_it(batch[cur_batch].List());
5304   cAnalyzeGenotype* genotype = NULL;
5305   while ((genotype = batch_it.Next()) != NULL) {
5306 
5307     if (m_world->GetVerbosity() >= VERBOSE_ON) {
5308       cout << "  Determining redundancy by instruction failure for " << genotype->GetName() << endl;
5309     }
5310 
5311     const cInstSet& original_inst_set = m_world->GetHardwareManager().GetInstSet(genotype->GetGenome().GetInstSet());
5312     cInstSet* modify_inst_set = new cInstSet(original_inst_set);
5313     cString isname = cString(genotype->GetGenome().GetInstSet()) + ":analyze_redundancy_by_inst_failure";
5314     if (!m_world->GetHardwareManager().RegisterInstSet(isname, modify_inst_set)) {
5315       delete modify_inst_set;
5316       modify_inst_set = &m_world->GetHardwareManager().GetInstSet(isname);
5317     }
5318 
5319     // Modify the instruction set to include the current probability of failure.
5320     int num_pr_fail_insts = 0;
5321     for (int j = 0; j < modify_inst_set->GetSize(); j++)
5322     {
5323       cString inst_name = modify_inst_set->GetName(j);
5324       cInstruction inst = modify_inst_set->GetInst(inst_name);
5325       if (original_inst_set.GetProbFail(inst) > 0) num_pr_fail_insts++;
5326       modify_inst_set->SetProbFail(inst, 0);
5327     }
5328     genotype->GetGenome().SetInstSet(isname);
5329 
5330     // Avoid unintentional use with no instructions having a chance of failure
5331     if (num_pr_fail_insts == 0) {
5332       m_world->GetDriver().RaiseFatalException(1,"ANALYZE_REDUNDANCY_BY_INST_FAILURE: No instructions have a chance of failure in default instruction set.");
5333     }
5334 
5335     // Recalculate the baseline fitness
5336     // May need to calculate multiple times to check for stochastic behavior....
5337     genotype->Recalculate(m_ctx);
5338     double baseline_fitness = genotype->GetFitness();
5339 
5340     if (baseline_fitness > 0) {
5341       // Write information for this
5342       df.Write(genotype->GetName(), "genotype name");
5343       df.Write(genotype->GetID(), "genotype id");
5344       df.Write(baseline_fitness, "fitness");
5345 
5346       // Run the organism the specified number of replicates
5347       for (double log10_fc = log10_start_pr_fail; log10_fc <= log10_end_pr_fail; log10_fc += log10_step_size_pr_fail) {
5348         double fc = exp(log10_fc*log(10.0));
5349 
5350         // Modify the instruction set to include the current probability of failure.
5351         *modify_inst_set = original_inst_set;
5352         for (int j = 0; j < modify_inst_set->GetSize(); j++) {
5353           cString inst_name = modify_inst_set->GetName(j);
5354           cInstruction inst = modify_inst_set->GetInst(inst_name);
5355           if (original_inst_set.GetProbFail(inst) > 0) modify_inst_set->SetProbFail(inst, fc);
5356         }
5357 
5358         // Recalculate the requested number of times
5359         double chance = 0;
5360         double avg_fitness = 0;
5361         for (int i = 0; i < replicates; i++) {
5362           genotype->Recalculate(m_ctx);
5363           if (genotype->GetFitness() < baseline_fitness) chance++;
5364           avg_fitness += genotype->GetFitness();
5365         }
5366 
5367         if (mode == 0) {
5368           s.Set("Avg fitness when inst prob fail %.3g", fc);
5369           df.Write(avg_fitness/replicates, s);
5370         } else {
5371           s.Set("Fraction of replicates with reduced fitness at inst prob fail %.3g", fc);
5372           df.Write(chance/replicates, s);
5373         }
5374       }
5375       df.Endl();
5376     }
5377   }
5378 }
5379 
CommandMapMutations(cString cur_string)5380 void cAnalyze::CommandMapMutations(cString cur_string)
5381 {
5382   cout << "Constructing genome mutations maps..." << endl;
5383 
5384   // Load in the variables...
5385   cString directory = PopDirectory(cur_string, "mutations/");
5386   int file_type = FILE_TYPE_TEXT;
5387 
5388   cStringList arg_list(cur_string);
5389 
5390   // Check for some command specific variables.
5391   if (arg_list.PopString("text") != "") file_type = FILE_TYPE_TEXT;
5392   if (arg_list.PopString("html") != "") file_type = FILE_TYPE_HTML;
5393 
5394   // Give some information in verbose mode.
5395   if (m_world->GetVerbosity() >= VERBOSE_ON) {
5396     cout << "  outputing as ";
5397     if (file_type == FILE_TYPE_TEXT) cout << "text files." << endl;
5398     else cout << "HTML files." << endl;
5399   }
5400 
5401 
5402   ///////////////////////////////////////////////////////
5403   // Loop through all of the genotypes in this batch...
5404 
5405   tListIterator<cAnalyzeGenotype> batch_it(batch[cur_batch].List());
5406   cAnalyzeGenotype * genotype = NULL;
5407   while ((genotype = batch_it.Next()) != NULL) {
5408     if (m_world->GetVerbosity() >= VERBOSE_ON) {
5409       cout << "  Creating mutation map for " << genotype->GetName() << endl;
5410     }
5411 
5412     // Construct this filename...
5413     cString filename;
5414     if (file_type == FILE_TYPE_TEXT) {
5415       filename.Set("%smut_map.%s.dat", static_cast<const char*>(directory), static_cast<const char*>(genotype->GetName()));
5416     } else {   //  if (file_type == FILE_TYPE_HTML) {
5417       filename.Set("%smut_map.%s.html", static_cast<const char*>(directory), static_cast<const char*>(genotype->GetName()));
5418     }
5419     if (m_world->GetVerbosity() >= VERBOSE_ON) {
5420       cout << "  Using filename \"" << filename << "\"" << endl;
5421     }
5422     ofstream& fp = m_world->GetDataFileOFStream(filename);
5423 
5424     // Calculate the stats for the genotype we're working with...
5425     genotype->Recalculate(m_ctx);
5426     const double base_fitness = genotype->GetFitness();
5427     const int max_line = genotype->GetLength();
5428     const Genome& base_genome = genotype->GetGenome();
5429     const Sequence& base_seq = base_genome.GetSequence();
5430     Genome mod_genome(base_genome);
5431     Sequence& seq = mod_genome.GetSequence();
5432     const cInstSet& inst_set = m_world->GetHardwareManager().GetInstSet(base_genome.GetInstSet());
5433     const int num_insts = inst_set.GetSize();
5434 
5435     // Headers...
5436     if (file_type == FILE_TYPE_TEXT) {
5437       fp << "# 1: Genome instruction ID (pre-mutation)" << endl;
5438       for (int i = 0; i < num_insts; i++) {
5439         fp << "# " << i+1 <<": Fit if mutated to '"
5440         << inst_set.GetName(i) << "'" << endl;
5441       }
5442       fp << "# " << num_insts + 2 << ": Knockout" << endl;
5443       fp << "# " << num_insts + 3 << ": Fraction Lethal" << endl;
5444       fp << "# " << num_insts + 4 << ": Fraction Detremental" << endl;
5445       fp << "# " << num_insts + 5 << ": Fraction Neutral" << endl;
5446       fp << "# " << num_insts + 6 << ": Fraction Beneficial" << endl;
5447       fp << "# " << num_insts + 7 << ": Average Fitness" << endl;
5448       fp << "# " << num_insts + 8 << ": Expected Entropy" << endl;
5449       fp << "# " << num_insts + 9 << ": Original Instruction Name" << endl;
5450       fp << endl;
5451 
5452     } else { // if (file_type == FILE_TYPE_HTML) {
5453              // Mark file as html
5454       fp << "<html>" << endl;
5455 
5456       // Setup the body...
5457       fp << "<body bgcolor=\"#FFFFFF\"" << endl
5458         << " text=\"#000000\"" << endl
5459         << " link=\"#0000AA\"" << endl
5460         << " alink=\"#0000FF\"" << endl
5461         << " vlink=\"#000044\">" << endl
5462         << endl
5463         << "<h1 align=center>Mutation Map for Run " << batch[cur_batch].Name()
5464         << ", ID " << genotype->GetID() << "</h1>" << endl
5465         << "<center>" << endl
5466         << endl;
5467 
5468       // The main chart...
5469       fp << "<table border=1 cellpadding=2>" << endl;
5470 
5471       // The headings...///
5472       fp << "<tr><th>Genome ";
5473       for (int i = 0; i < num_insts; i++) {
5474         fp << "<th>" << inst_set.GetName(i) << " ";
5475       }
5476       fp << "<th>Knockout ";
5477       fp << "<th>Frac. Lethal ";
5478       fp << "<th>Frac. Detremental ";
5479       fp << "<th>Frac. Neutral ";
5480       fp << "<th>Frac. Beneficial ";
5481       fp << "<th>Ave. Fitness ";
5482       fp << "<th>Expected Entropy ";
5483       fp << "</tr>" << endl << endl;
5484     }
5485 
5486 
5487     // Keep track of the number of mutations in each category...
5488     int total_dead = 0, total_neg = 0, total_neut = 0, total_pos = 0;
5489     double total_fitness = 0.0;
5490     tArray<double> col_fitness(num_insts + 1);
5491     col_fitness.SetAll(0.0);
5492 
5493     const cInstruction null_inst = m_world->GetHardwareManager().GetInstSet(base_genome.GetInstSet()).ActivateNullInst();
5494 
5495     cString color_string;  // For coloring cells...
5496 
5497     // Loop through all the lines of code, testing all mutations...
5498     for (int line_num = 0; line_num < max_line; line_num++) {
5499       int cur_inst = base_seq[line_num].GetOp();
5500       char cur_symbol = base_seq[line_num].GetSymbol();
5501       int row_dead = 0, row_neg = 0, row_neut = 0, row_pos = 0;
5502       double row_fitness = 0.0;
5503 
5504       // Column 1... the original instruction in the geneome.
5505       if (file_type == FILE_TYPE_HTML) {
5506         fp << "<tr><td align=right>" << inst_set.GetName(cur_inst)
5507         << " (" << cur_symbol << ") ";
5508       } else {
5509         fp << cur_inst << " ";
5510       }
5511 
5512       // Columns 2 to D+1 (the possible mutations)
5513       for (int mod_inst = 0; mod_inst < num_insts; mod_inst++)
5514       {
5515         if (mod_inst == cur_inst) {
5516           if (file_type == FILE_TYPE_HTML) {
5517             color_string = "#FFFFFF";
5518             fp << "<th bgcolor=\"" << color_string << "\">";
5519           }
5520         }
5521         else {
5522           seq[line_num].SetOp(mod_inst);
5523           cAnalyzeGenotype test_genotype(m_world, mod_genome);
5524           test_genotype.Recalculate(m_ctx);
5525           const double test_fitness = test_genotype.GetFitness() / base_fitness;
5526           row_fitness += test_fitness;
5527           total_fitness += test_fitness;
5528           col_fitness[mod_inst] += test_fitness;
5529 
5530           // Categorize this mutation...
5531           if (test_fitness == 1.0) {           // Neutral Mutation...
5532             row_neut++;
5533             total_neut++;
5534             if (file_type == FILE_TYPE_HTML) color_string = m_world->GetConfig().COLOR_MUT_NEUT.Get();
5535           } else if (test_fitness == 0.0) {    // Lethal Mutation...
5536             row_dead++;
5537             total_dead++;
5538             if (file_type == FILE_TYPE_HTML) color_string = m_world->GetConfig().COLOR_MUT_LETHAL.Get();
5539           } else if (test_fitness < 1.0) {     // Detrimental Mutation...
5540             row_neg++;
5541             total_neg++;
5542             if (file_type == FILE_TYPE_HTML) color_string = m_world->GetConfig().COLOR_MUT_NEG.Get();
5543           } else {                             // Beneficial Mutation...
5544             row_pos++;
5545             total_pos++;
5546             if (file_type == FILE_TYPE_HTML) color_string = m_world->GetConfig().COLOR_MUT_POS.Get();
5547           }
5548 
5549           // Write out this cell...
5550           if (file_type == FILE_TYPE_HTML) {
5551             fp << "<th bgcolor=\"" << color_string << "\">";
5552           }
5553           fp << test_fitness << " ";
5554         }
5555       }
5556 
5557       // Column: Knockout
5558       seq[line_num] = null_inst;
5559       cAnalyzeGenotype test_genotype(m_world, mod_genome);
5560       test_genotype.Recalculate(m_ctx);
5561       const double test_fitness = test_genotype.GetFitness() / base_fitness;
5562       col_fitness[num_insts] += test_fitness;
5563 
5564       // Categorize this mutation if its in HTML mode (color only)...
5565       if (file_type == FILE_TYPE_HTML) {
5566         if (test_fitness == 1.0) color_string =  m_world->GetConfig().COLOR_MUT_NEUT.Get();
5567         else if (test_fitness == 0.0) color_string = m_world->GetConfig().COLOR_MUT_LETHAL.Get();
5568         else if (test_fitness < 1.0) color_string = m_world->GetConfig().COLOR_MUT_NEG.Get();
5569         else color_string = m_world->GetConfig().COLOR_MUT_POS.Get();
5570 
5571         fp << "<th bgcolor=\"" << color_string << "\">";
5572       }
5573 
5574       fp << test_fitness << " ";
5575 
5576       // Fraction Columns...
5577       if (file_type == FILE_TYPE_HTML) fp << "<th bgcolor=\"#" << m_world->GetConfig().COLOR_MUT_LETHAL.Get() << "\">";
5578       fp << (double) row_dead / (double) (num_insts-1) << " ";
5579 
5580       if (file_type == FILE_TYPE_HTML) fp << "<th bgcolor=\"#" << m_world->GetConfig().COLOR_MUT_NEG.Get() << "\">";
5581       fp << (double) row_neg / (double) (num_insts-1) << " ";
5582 
5583       if (file_type == FILE_TYPE_HTML) fp << "<th bgcolor=\"#" << m_world->GetConfig().COLOR_MUT_NEUT.Get() << "\">";
5584       fp << (double) row_neut / (double) (num_insts-1) << " ";
5585 
5586       if (file_type == FILE_TYPE_HTML) fp << "<th bgcolor=\"#" << m_world->GetConfig().COLOR_MUT_POS.Get() << "\">";
5587       fp << (double) row_pos / (double) (num_insts-1) << " ";
5588 
5589 
5590       // Column: Average Fitness
5591       if (file_type == FILE_TYPE_HTML) fp << "<th>";
5592       fp << row_fitness / (double) (num_insts-1) << " ";
5593 
5594       // Column: Expected Entropy  @CAO Implement!
5595       if (file_type == FILE_TYPE_HTML) fp << "<th>";
5596       fp << 0.0 << " ";
5597 
5598       // End this row...
5599       if (file_type == FILE_TYPE_HTML) fp << "</tr>";
5600       fp << endl;
5601 
5602       // Reset the mod_genome back to the original sequence.
5603       seq[line_num].SetOp(cur_inst);
5604     }
5605 
5606 
5607     // Construct the final line of the table with all totals...
5608     if (file_type == FILE_TYPE_HTML) {
5609       fp << "<tr><th>Totals";
5610 
5611       // Instructions + Knockout
5612       for (int i = 0; i <= num_insts; i++) {
5613         fp << "<th>" << col_fitness[i] / max_line << " ";
5614       }
5615 
5616       int total_tests = max_line * (num_insts-1);
5617       fp << "<th>" << (double) total_dead / (double) total_tests << " ";
5618       fp << "<th>" << (double) total_neg / (double) total_tests << " ";
5619       fp << "<th>" << (double) total_neut / (double) total_tests << " ";
5620       fp << "<th>" << (double) total_pos / (double) total_tests << " ";
5621       fp << "<th>" << total_fitness / (double) total_tests << " ";
5622       fp << "<th>" << 0.0 << " ";
5623 
5624 
5625       // And close everything up...
5626       fp << "</table>" << endl
5627         << "</center>" << endl;
5628     }
5629   }
5630 }
5631 
5632 
CommandMapDepth(cString cur_string)5633 void cAnalyze::CommandMapDepth(cString cur_string)
5634 {
5635   cout << "Constructing depth map..." << endl;
5636 
5637   cString filename("depth_map.dat");
5638   if (cur_string.GetSize() != 0) filename = cur_string.PopWord();
5639 
5640   int min_batch = 0;
5641   int max_batch = cur_batch - 1;
5642 
5643   if (cur_string.GetSize() != 0) min_batch = cur_string.PopWord().AsInt();
5644   if (cur_string.GetSize() != 0) max_batch = cur_string.PopWord().AsInt();
5645 
5646   // First, scan all of the batches to find the maximum depth.
5647   int max_depth = -1;
5648   cAnalyzeGenotype * genotype;
5649   for (int i = min_batch; i <= max_batch; i++) {
5650     tListIterator<cAnalyzeGenotype> list_it(batch[i].List());
5651     while ((genotype = list_it.Next()) != NULL) {
5652       if (genotype->GetDepth() > max_depth) max_depth = genotype->GetDepth();
5653     }
5654   }
5655 
5656   cout << "max_depth = " << max_depth << endl;
5657 
5658   ofstream& fp = m_world->GetDataFileOFStream(filename);
5659 
5660   cout << "Output to " << filename << endl;
5661   tArray<int> depth_array(max_depth+1);
5662   for (cur_batch = min_batch; cur_batch <= max_batch; cur_batch++) {
5663     depth_array.SetAll(0);
5664     tListIterator<cAnalyzeGenotype> list_it(batch[cur_batch].List());
5665     while ((genotype = list_it.Next()) != NULL) {
5666       const int cur_depth = genotype->GetDepth();
5667       const int cur_count = genotype->GetNumCPUs();
5668       depth_array[cur_depth] += cur_count;
5669     }
5670 
5671     for (int i = 0; i <= max_depth; i++) {
5672       fp << depth_array[i] << " ";
5673     }
5674     fp << endl;
5675   }
5676 }
5677 
CommandHamming(cString cur_string)5678 void cAnalyze::CommandHamming(cString cur_string)
5679 {
5680   cString filename("hamming.dat");
5681   if (cur_string.GetSize() != 0) filename = cur_string.PopWord();
5682 
5683   int batch1 = PopBatch(cur_string.PopWord());
5684   int batch2 = PopBatch(cur_string.PopWord());
5685 
5686   // We want batch2 to be the larger one for efficiency...
5687   if (batch[batch1].List().GetSize() > batch[batch2].List().GetSize()) {
5688     int tmp = batch1;  batch1 = batch2;  batch2 = tmp;
5689   }
5690 
5691   if (m_world->GetVerbosity() <= VERBOSE_NORMAL) {
5692     cout << "Calculating Hamming Distance... ";
5693     cout.flush();
5694   } else {
5695     cout << "Calculating Hamming Distance between batches "
5696     << batch1 << " and " << batch2 << endl;
5697     cout.flush();
5698   }
5699 
5700   // Setup some variables;
5701   cAnalyzeGenotype * genotype1 = NULL;
5702   cAnalyzeGenotype * genotype2 = NULL;
5703   int total_dist = 0;
5704   int total_count = 0;
5705 
5706   tListIterator<cAnalyzeGenotype> list1_it(batch[batch1].List());
5707   tListIterator<cAnalyzeGenotype> list2_it(batch[batch2].List());
5708 
5709   while ((genotype1 = list1_it.Next()) != NULL) {
5710     list2_it.Reset();
5711     while ((genotype2 = list2_it.Next()) != NULL) {
5712       // Determine the counts...
5713       const int count1 = genotype1->GetNumCPUs();
5714       const int count2 = genotype2->GetNumCPUs();
5715       const int num_pairs = (genotype1 == genotype2) ?
5716         ((count1 - 1) * (count2 - 1)) : (count1 * count2);
5717       if (num_pairs == 0) continue;
5718 
5719       // And do the tests...
5720       const int dist = Sequence::FindHammingDistance(genotype1->GetGenome().GetSequence(), genotype2->GetGenome().GetSequence());
5721       total_dist += dist * num_pairs;
5722       total_count += num_pairs;
5723     }
5724   }
5725 
5726 
5727   // Calculate the final answer
5728   double ave_dist = (double) total_dist / (double) total_count;
5729   cout << " ave distance = " << ave_dist << endl;
5730 
5731   cDataFile & df = m_world->GetDataFile(filename);
5732 
5733   df.WriteComment( "Hamming distance information" );
5734   df.WriteTimeStamp();
5735 
5736   df.Write(batch[batch1].Name(), "Name of First Batch");
5737   df.Write(batch[batch2].Name(), "Name of Second Batch");
5738   df.Write(ave_dist,             "Average Hamming Distance");
5739   df.Write(total_count,          "Total Pairs Test");
5740   df.Endl();
5741 }
5742 
CommandLevenstein(cString cur_string)5743 void cAnalyze::CommandLevenstein(cString cur_string)
5744 {
5745   cString filename("lev.dat");
5746   if (cur_string.GetSize() != 0) filename = cur_string.PopWord();
5747 
5748   int batch1 = PopBatch(cur_string.PopWord());
5749   int batch2 = PopBatch(cur_string.PopWord());
5750 
5751   // We want batch2 to be the larger one for efficiency...
5752   if (batch[batch1].List().GetSize() > batch[batch2].List().GetSize()) {
5753     int tmp = batch1;  batch1 = batch2;  batch2 = tmp;
5754   }
5755 
5756   if (m_world->GetVerbosity() <= VERBOSE_NORMAL) {
5757     cout << "Calculating Levenstein Distance... ";
5758     cout.flush();
5759   } else {
5760     cout << "Calculating Levenstein Distance between batch "
5761     << batch1 << " and " << batch2 << endl;
5762     cout.flush();
5763   }
5764 
5765   // Setup some variables;
5766   cAnalyzeGenotype * genotype1 = NULL;
5767   cAnalyzeGenotype * genotype2 = NULL;
5768   int total_dist = 0;
5769   int total_count = 0;
5770 
5771   tListIterator<cAnalyzeGenotype> list1_it(batch[batch1].List());
5772   tListIterator<cAnalyzeGenotype> list2_it(batch[batch2].List());
5773 
5774   // Loop through all of the genotypes in each batch...
5775   while ((genotype1 = list1_it.Next()) != NULL) {
5776     list2_it.Reset();
5777     while ((genotype2 = list2_it.Next()) != NULL) {
5778       // Determine the counts...
5779       const int count1 = genotype1->GetNumCPUs();
5780       const int count2 = genotype2->GetNumCPUs();
5781       const int num_pairs = (genotype1 == genotype2) ?
5782         ((count1 - 1) * (count2 - 1)) : (count1 * count2);
5783       if (num_pairs == 0) continue;
5784 
5785       // And do the tests...
5786       const int dist = Sequence::FindEditDistance(genotype1->GetGenome().GetSequence(),
5787                                                      genotype2->GetGenome().GetSequence());
5788       total_dist += dist * num_pairs;
5789       total_count += num_pairs;
5790     }
5791   }
5792 
5793   // Calculate the final answer
5794   double ave_dist = (double) total_dist / (double) total_count;
5795   cout << " ave distance = " << ave_dist << endl;
5796 
5797   cDataFile & df = m_world->GetDataFile(filename);
5798 
5799   df.WriteComment( "Levenstein distance information" );
5800   df.WriteTimeStamp();
5801 
5802   df.Write(batch[batch1].Name(), "Name of First Batch");
5803   df.Write(batch[batch2].Name(), "Name of Second Batch");
5804   df.Write(ave_dist,             "Average Levenstein Distance");
5805   df.Write(total_count,          "Total Pairs Test");
5806   df.Endl();
5807 }
5808 
CommandSpecies(cString cur_string)5809 void cAnalyze::CommandSpecies(cString cur_string)
5810 {
5811   cString filename("species.dat");
5812   if (cur_string.GetSize() != 0) filename = cur_string.PopWord();
5813 
5814   int batch1 = PopBatch(cur_string.PopWord());
5815   int batch2 = PopBatch(cur_string.PopWord());
5816   int num_compare = PopBatch(cur_string.PopWord());
5817 
5818   // We want batch2 to be the larger one for efficiency...
5819   if (batch[batch1].List().GetSize() > batch[batch2].List().GetSize()) {
5820     int tmp = batch1;  batch1 = batch2;  batch2 = tmp;
5821   }
5822 
5823   if (m_world->GetVerbosity() <= VERBOSE_NORMAL) cout << "Calculating Species Distance... " << endl;
5824   else cout << "Calculating Species Distance between batch "
5825     << batch1 << " and " << batch2 << endl;
5826 
5827   // Setup some variables;
5828   cAnalyzeGenotype * genotype1 = NULL;
5829   cAnalyzeGenotype * genotype2 = NULL;
5830   int total_fail = 0;
5831   int total_count = 0;
5832 
5833   cTestCPU* testcpu = m_world->GetHardwareManager().CreateTestCPU(m_ctx);
5834 
5835   tListIterator<cAnalyzeGenotype> list1_it(batch[batch1].List());
5836   tListIterator<cAnalyzeGenotype> list2_it(batch[batch2].List());
5837 
5838   // Loop through all of the genotypes in each batch...
5839   while ((genotype1 = list1_it.Next()) != NULL) {
5840     list2_it.Reset();
5841     while ((genotype2 = list2_it.Next()) != NULL) {
5842       // Determine the counts...
5843       const int count1 = genotype1->GetNumCPUs();
5844       const int count2 = genotype2->GetNumCPUs();
5845       int num_pairs = count1 * count2;
5846       int fail_count = 0;
5847       bool cross1_viable = true;
5848       bool cross2_viable = true;
5849 
5850 
5851       if (genotype1 == genotype2) {
5852         total_count += num_pairs * 2 * num_compare;
5853       }
5854       else {
5855         assert(num_compare!=0);
5856         // And do the tests...
5857         for (int iter=1; iter < num_compare; iter++) {
5858           Genome test_genome0 = genotype1->GetGenome();
5859           Genome test_genome1 = genotype2->GetGenome();
5860 
5861           double start_frac = m_world->GetRandom().GetDouble();
5862           double end_frac = m_world->GetRandom().GetDouble();
5863           if (start_frac > end_frac) Swap(start_frac, end_frac);
5864 
5865           int start0 = (int) (start_frac * (double) test_genome0.GetSize());
5866           int end0   = (int) (end_frac * (double) test_genome0.GetSize());
5867           int start1 = (int) (start_frac * (double) test_genome1.GetSize());
5868           int end1   = (int) (end_frac * (double) test_genome1.GetSize());
5869           assert( start0 >= 0  &&  start0 < test_genome0.GetSize() );
5870           assert( end0   >= 0  &&  end0   < test_genome0.GetSize() );
5871           assert( start1 >= 0  &&  start1 < test_genome1.GetSize() );
5872           assert( end1   >= 0  &&  end1   < test_genome1.GetSize() );
5873 
5874           // Calculate size of sections crossing over...
5875           int size0 = end0 - start0;
5876           int size1 = end1 - start1;
5877 
5878           int new_size0 = test_genome0.GetSize() - size0 + size1;
5879           int new_size1 = test_genome1.GetSize() - size1 + size0;
5880 
5881           // Don't Crossover if offspring will be illegal!!!
5882           if (new_size0 < MIN_GENOME_LENGTH || new_size0 > MAX_GENOME_LENGTH ||
5883               new_size1 < MIN_GENOME_LENGTH || new_size1 > MAX_GENOME_LENGTH) {
5884             fail_count +=2;
5885             break;
5886           }
5887 
5888           // Swap the components
5889           Sequence cross0 = test_genome0.GetSequence().Crop(start0, end0);
5890           Sequence cross1 = test_genome1.GetSequence().Crop(start1, end1);
5891           test_genome0.GetSequence().Replace(start0, size0, cross1);
5892           test_genome1.GetSequence().Replace(start1, size1, cross0);
5893 
5894           // Run each side, and determine viability...
5895           cCPUTestInfo test_info;
5896           testcpu->TestGenome(m_ctx, test_info, test_genome0);
5897           cross1_viable = test_info.IsViable();
5898 
5899           testcpu->TestGenome(m_ctx, test_info, test_genome1);
5900           cross2_viable = test_info.IsViable();
5901 
5902           if (cross1_viable == false) fail_count++;
5903           if (cross2_viable == false) fail_count++;
5904         }
5905 
5906         total_fail += fail_count * num_pairs;
5907         total_count += num_pairs * 2 * num_compare;
5908       }
5909     }
5910   }
5911 
5912   delete testcpu;
5913 
5914   // Calculate the final answer
5915   double ave_dist = (double) total_fail / (double) total_count;
5916   cout << "  ave distance = " << ave_dist  << " in " << total_count << " tests." << endl;
5917 
5918   cDataFile& df = m_world->GetDataFile(filename);
5919 
5920   df.WriteComment( "Species information" );
5921   df.WriteTimeStamp();
5922 
5923   df.Write(batch[batch1].Name(), "Name of First Batch");
5924   df.Write(batch[batch2].Name(), "Name of Second Batch");
5925   df.Write(ave_dist,             "Average Species Distance");
5926   df.Write(total_count,          "Total Recombinants tested");
5927   df.Endl();
5928 }
5929 
CommandRecombine(cString cur_string)5930 void cAnalyze::CommandRecombine(cString cur_string)
5931 {
5932   int batch1 = PopBatch(cur_string.PopWord());
5933   int batch2 = PopBatch(cur_string.PopWord());
5934   int batch3 = PopBatch(cur_string.PopWord());
5935   int num_compare = PopBatch(cur_string.PopWord());
5936 
5937   // We want batch2 to be the larger one for efficiency...
5938   if (batch[batch1].List().GetSize() > batch[batch2].List().GetSize()) {
5939     int tmp = batch1;  batch1 = batch2;  batch2 = tmp;
5940   }
5941 
5942   if (m_world->GetVerbosity() <= VERBOSE_NORMAL) cout << "Creating recombinants...  " << endl;
5943   else cout << "Creating recombinants between batch "
5944     << batch1 << " and " << batch2 << endl;
5945 
5946   // Setup some variables;
5947   cAnalyzeGenotype * genotype1 = NULL;
5948   cAnalyzeGenotype * genotype2 = NULL;
5949 
5950   tListIterator<cAnalyzeGenotype> list1_it(batch[batch1].List());
5951   tListIterator<cAnalyzeGenotype> list2_it(batch[batch2].List());
5952 
5953   // Loop through all of the genotypes in each batch...
5954   while ((genotype1 = list1_it.Next()) != NULL) {
5955     list2_it.Reset();
5956     while ((genotype2 = list2_it.Next()) != NULL) {
5957       // Determine the counts...
5958       int fail_count = 0;
5959 
5960 
5961       assert(num_compare!=0);
5962       // And do the tests...
5963       for (int iter=1; iter < num_compare; iter++) {
5964         Genome test_genome0 = genotype1->GetGenome();
5965         Genome test_genome1 = genotype2->GetGenome();
5966 
5967         double start_frac = m_world->GetRandom().GetDouble();
5968         double end_frac = m_world->GetRandom().GetDouble();
5969         if (start_frac > end_frac) Swap(start_frac, end_frac);
5970 
5971         int start0 = (int) (start_frac * (double) test_genome0.GetSize());
5972         int end0   = (int) (end_frac * (double) test_genome0.GetSize());
5973         int start1 = (int) (start_frac * (double) test_genome1.GetSize());
5974         int end1   = (int) (end_frac * (double) test_genome1.GetSize());
5975         assert( start0 >= 0  &&  start0 < test_genome0.GetSize() );
5976         assert( end0   >= 0  &&  end0   < test_genome0.GetSize() );
5977         assert( start1 >= 0  &&  start1 < test_genome1.GetSize() );
5978         assert( end1   >= 0  &&  end1   < test_genome1.GetSize() );
5979 
5980         // Calculate size of sections crossing over...
5981         int size0 = end0 - start0;
5982         int size1 = end1 - start1;
5983 
5984         int new_size0 = test_genome0.GetSize() - size0 + size1;
5985         int new_size1 = test_genome1.GetSize() - size1 + size0;
5986 
5987         // Don't Crossover if offspring will be illegal!!!
5988         if (new_size0 < MIN_GENOME_LENGTH || new_size0 > MAX_GENOME_LENGTH ||
5989             new_size1 < MIN_GENOME_LENGTH || new_size1 > MAX_GENOME_LENGTH) {
5990           fail_count +=2;
5991           break;
5992         }
5993 
5994         if (size0 > 0 && size1 > 0) {
5995           Sequence cross0 = test_genome0.GetSequence().Crop(start0, end0);
5996           Sequence cross1 = test_genome1.GetSequence().Crop(start1, end1);
5997           test_genome0.GetSequence().Replace(start0, size0, cross1);
5998           test_genome1.GetSequence().Replace(start1, size1, cross0);
5999         }
6000         else if (size0 > 0) {
6001           Sequence cross0 = test_genome0.GetSequence().Crop(start0, end0);
6002           test_genome1.GetSequence().Replace(start1, size1, cross0);
6003         }
6004         else if (size1 > 0) {
6005           Sequence cross1 = test_genome1.GetSequence().Crop(start1, end1);
6006           test_genome0.GetSequence().Replace(start0, size0, cross1);
6007         }
6008 
6009         cAnalyzeGenotype* new_genotype0 = new cAnalyzeGenotype(m_world, test_genome0);
6010         cAnalyzeGenotype* new_genotype1 = new cAnalyzeGenotype(m_world, test_genome1);
6011         new_genotype0->SetNumCPUs(1);
6012         new_genotype1->SetNumCPUs(1);
6013         new_genotype0->SetID(0);
6014         new_genotype1->SetID(0);
6015         new_genotype0->SetName("noname");
6016         new_genotype1->SetName("noname");
6017         new_genotype0->SetParentID(genotype1->GetID()); //@CHC: Want to keep track of which two parents generated this offspring
6018         new_genotype0->SetParent2ID(genotype2->GetID());
6019         new_genotype1->SetParentID(genotype1->GetID());
6020         new_genotype1->SetParent2ID(genotype2->GetID());
6021 
6022         batch[batch3].List().PushRear(new_genotype0);
6023         batch[batch3].List().PushRear(new_genotype1);
6024 
6025         //batch[batch3].List().PushRear(new cAnalyzeGenotype(test_genome0, inst_set));
6026         //batch[batch3].List().PushRear(new cAnalyzeGenotype(test_genome1, inst_set));
6027 
6028       }
6029     }
6030   }
6031 }
6032 
CommandRecombineSample(cString cur_string)6033 void cAnalyze::CommandRecombineSample(cString cur_string)
6034 {
6035   int batch1 = PopBatch(cur_string.PopWord());
6036   int batch2 = PopBatch(cur_string.PopWord());
6037   int batch3 = PopBatch(cur_string.PopWord());
6038   int num_compare = PopBatch(cur_string.PopWord());
6039 
6040 
6041   if (m_world->GetVerbosity() <= VERBOSE_NORMAL) cout << "Creating recombinants...  " << endl;
6042   else cout << "Creating recombinants between batch "
6043     << batch1 << " and " << batch2 << endl;
6044 
6045   // Setup some variables;
6046   cAnalyzeGenotype * genotype1 = NULL;
6047   cAnalyzeGenotype * genotype2 = NULL;
6048 
6049   //Loop through X number of genotypes
6050   for (int i = 1; i <= num_compare; i++) {
6051     genotype1 = batch[batch1].FindGenotypeRandom(m_world->GetRandom());
6052     genotype2 = batch[batch2].FindGenotypeRandom(m_world->GetRandom());
6053 
6054     //50% chance of swapping genotype1 and genotype2 so that we don't always end up with
6055     //    the same batch contributing the "ends" of the genome to the offspring
6056     if (m_world->GetRandom().P(0.5)) {
6057         cAnalyzeGenotype * temp = genotype1;
6058         genotype1 = genotype2;
6059         genotype2 = temp;
6060     }
6061 
6062     int fail_count = 0;
6063 
6064     Genome test_genome0 = genotype1->GetGenome();
6065     Genome test_genome1 = genotype2->GetGenome();
6066 
6067     double start_frac = m_world->GetRandom().GetDouble();
6068     double end_frac = m_world->GetRandom().GetDouble();
6069     if (start_frac > end_frac) Swap(start_frac, end_frac);
6070 
6071     int start0 = (int) (start_frac * (double) test_genome0.GetSize());
6072     int end0   = (int) (end_frac * (double) test_genome0.GetSize());
6073     int start1 = (int) (start_frac * (double) test_genome1.GetSize());
6074     int end1   = (int) (end_frac * (double) test_genome1.GetSize());
6075     assert( start0 >= 0  &&  start0 < test_genome0.GetSize() );
6076     assert( end0   >= 0  &&  end0   < test_genome0.GetSize() );
6077     assert( start1 >= 0  &&  start1 < test_genome1.GetSize() );
6078     assert( end1   >= 0  &&  end1   < test_genome1.GetSize() );
6079 
6080     // Calculate size of sections crossing over...
6081     int size0 = end0 - start0;
6082     int size1 = end1 - start1;
6083 
6084     int new_size0 = test_genome0.GetSize() - size0 + size1;
6085     int new_size1 = test_genome1.GetSize() - size1 + size0;
6086 
6087     // Don't Crossover if offspring will be illegal!!!
6088     if (new_size0 < MIN_GENOME_LENGTH || new_size0 > MAX_GENOME_LENGTH ||
6089         new_size1 < MIN_GENOME_LENGTH || new_size1 > MAX_GENOME_LENGTH) {
6090       fail_count +=2;
6091       break;
6092     }
6093 
6094     if (size0 > 0 && size1 > 0) {
6095       Sequence cross0 = test_genome0.GetSequence().Crop(start0, end0);
6096       Sequence cross1 = test_genome1.GetSequence().Crop(start1, end1);
6097       test_genome0.GetSequence().Replace(start0, size0, cross1);
6098       test_genome1.GetSequence().Replace(start1, size1, cross0);
6099     }
6100     else if (size0 > 0) {
6101       Sequence cross0 = test_genome0.GetSequence().Crop(start0, end0);
6102       test_genome1.GetSequence().Replace(start1, size1, cross0);
6103     }
6104     else if (size1 > 0) {
6105       Sequence cross1 = test_genome1.GetSequence().Crop(start1, end1);
6106       test_genome0.GetSequence().Replace(start0, size0, cross1);
6107     }
6108 
6109     cAnalyzeGenotype* new_genotype0 = new cAnalyzeGenotype(m_world, test_genome0);
6110     //cAnalyzeGenotype* new_genotype1 = new cAnalyzeGenotype(m_world, test_genome1);
6111     new_genotype0->SetNumCPUs(1);
6112     //new_genotype1->SetNumCPUs(1);
6113     new_genotype0->SetID(0);
6114     //new_genotype1->SetID(0);
6115     new_genotype0->SetName("noname");
6116     //new_genotype1->SetName("noname");
6117     new_genotype0->SetParentID(genotype1->GetID()); //@CHC: Want to keep track of which two parents generated this offspring
6118     new_genotype0->SetParent2ID(genotype2->GetID());
6119     //new_genotype1->SetParentID(genotype1->GetID());
6120     //new_genotype1->SetParent2ID(genotype2->GetID());
6121 
6122     batch[batch3].List().PushRear(new_genotype0);
6123     //batch[batch3].List().PushRear(new_genotype1);
6124 
6125   }
6126 
6127 }
6128 
6129 // This command will mutate a single locus in every single organism in the current batch
CommandMutagenize(cString cur_string)6130 void cAnalyze::CommandMutagenize(cString cur_string)
6131 {
6132 
6133   // Loop through all the genomes in the current batch
6134 
6135   tListIterator<cAnalyzeGenotype> batch_it(batch[cur_batch].List());
6136   cAnalyzeGenotype* genotype = NULL;
6137 
6138   while ((genotype = batch_it.Next()) != NULL) {
6139 
6140     //Add a mutation to it
6141     const int max_line = genotype->GetLength();
6142     Genome& cur_genome = genotype->GetGenome();
6143     Sequence& cur_seq = cur_genome.GetSequence();
6144     const cInstSet& inst_set = m_world->GetHardwareManager().GetInstSet(cur_genome.GetInstSet());
6145 
6146     int line_num = m_ctx.GetRandom().GetInt(cur_genome.GetSize());
6147 
6148     cur_seq[line_num] = inst_set.GetRandomInst(m_ctx); // Replace it with a random instruction
6149 
6150   }
6151 
6152 }
6153 
CommandAlign(cString cur_string)6154 void cAnalyze::CommandAlign(cString cur_string)
6155 {
6156   // Align does not need any args yet.
6157   (void) cur_string;
6158 
6159   cout << "Aligning sequences..." << endl;
6160 
6161   if (batch[cur_batch].IsLineage() == false && m_world->GetVerbosity() >= VERBOSE_ON) {
6162     cerr << "  Warning: sequences may not be a consecutive lineage."
6163     << endl;
6164   }
6165 
6166   // Create an array of all the sequences we need to align.
6167   tListPlus<cAnalyzeGenotype> & glist = batch[cur_batch].List();
6168   tListIterator<cAnalyzeGenotype> batch_it(glist);
6169   const int num_sequences = glist.GetSize();
6170   cString * sequences = new cString[num_sequences];
6171 
6172   // Move through each sequence and update it.
6173   batch_it.Reset();
6174   cString diff_info;
6175   for (int i = 0; i < num_sequences; i++) {
6176     sequences[i] = batch_it.Next()->GetGenome().GetSequence().AsString();
6177     if (i == 0) continue;
6178     // Track of the number of insertions and deletions to shift properly.
6179     int num_ins = 0;
6180     int num_del = 0;
6181 
6182     // Compare each string to the previous.
6183     cStringUtil::EditDistance(sequences[i], sequences[i-1], diff_info, '_');
6184 
6185     while (diff_info.GetSize() != 0) {
6186       cString cur_mut = diff_info.Pop(',');
6187       const char mut_type = cur_mut[0];
6188       cur_mut.ClipFront(1); cur_mut.ClipEnd(1);
6189       int position = cur_mut.AsInt();
6190 
6191       // Nothing to do with Mutations
6192       if (mut_type == 'M') continue;
6193 
6194       // Handle insertions...
6195       if (mut_type == 'I') {
6196         // Loop back and insert an '_' into all previous sequences.
6197         for (int j = 0; j < i; j++) {
6198           sequences[j].Insert('_', position + num_del);
6199         }
6200         num_ins++;
6201       }
6202 
6203       // Handle Deletions...
6204       else if (mut_type == 'D') {
6205         // Insert '_' into the current sequence at the point of deletions.
6206         sequences[i].Insert("_", position + num_ins);
6207         num_del++;
6208       }
6209 
6210     }
6211   }
6212 
6213   batch_it.Reset();
6214   for (int i = 0; i < num_sequences; i++) {
6215     batch_it.Next()->SetAlignedSequence(sequences[i]);
6216   }
6217 
6218   // Cleanup
6219   delete [] sequences;
6220 
6221   // Adjust the flags on this batch
6222   // batch[cur_batch].SetLineage(false);
6223   batch[cur_batch].SetAligned(true);
6224 }
6225 
6226 // Now this command do not consider changing environment
6227 // and only work for lineage and fixed-length runs.
AnalyzeNewInfo(cString cur_string)6228 void cAnalyze::AnalyzeNewInfo(cString cur_string)
6229 {
6230   cout << "Analyze new information in child about environment ..." << endl;
6231 
6232   // Load in the variables
6233   int words = cur_string.CountNumWords();
6234   if (words < 1) {
6235     cout << "This command requires mutation rate, skipping." << endl;
6236     return;
6237   }
6238 
6239   // Get the mutation rate ...
6240   double mu = cur_string.PopWord().AsDouble();
6241 
6242   // Create the directory using the string given as the second argument
6243   cString dir = cur_string.PopWord();
6244   cString defaultDir = "newinfo/";
6245   cString directory = PopDirectory(dir, defaultDir);
6246 
6247   ///////////////////////////////////////////////////////
6248   // Loop through all of the genotypes in this batch...
6249 
6250   if (batch[cur_batch].IsLineage() != true) {
6251     cout << "This command requires the lineage in the batch, skipping.\n";
6252     return;
6253   }
6254 
6255   cString newinfo_fn;
6256   newinfo_fn.Set("%s%s.newinfo.dat", static_cast<const char*>(directory), "lineage");
6257   ofstream& newinfo_fp = m_world->GetDataFileOFStream(newinfo_fn);
6258 
6259   newinfo_fp << "# Legend:" << endl;
6260   newinfo_fp << "# 1:Child Genotype ID" << endl;
6261   newinfo_fp << "# 2:Parent Genotype ID" << endl;
6262   newinfo_fp << "# 3:Information of Child about Environment I(C:E)" << endl;
6263   newinfo_fp << "# 4:Information of Parent about Environment I(P:E)" << endl;
6264   newinfo_fp << "# 5:I(C:E)-I(P:E)" << endl;
6265   newinfo_fp << "# 6:Information Gained in Child" << endl;
6266   newinfo_fp << "# 7:Information Decreased in Child" << endl;
6267   newinfo_fp << "# 8:Net Increasing of Information in Child" << endl;
6268   newinfo_fp << endl;
6269 
6270   tListIterator<cAnalyzeGenotype> batch_it(batch[cur_batch].List());
6271   cAnalyzeGenotype * parent_genotype = batch_it.Next();
6272   if (parent_genotype == NULL) {
6273     m_world->GetDataFileManager().Remove(newinfo_fn);
6274     return;
6275   }
6276   cAnalyzeGenotype * child_genotype = NULL;
6277   double I_P_E; // Information of parent about environment
6278   double H_P_E = AnalyzeEntropy(parent_genotype, mu);
6279   I_P_E = parent_genotype->GetLength() - H_P_E;
6280 
6281   while ((child_genotype = batch_it.Next()) != NULL) {
6282 
6283     if (m_world->GetVerbosity() >= VERBOSE_ON) {
6284       cout << "Analyze new information for " << child_genotype->GetName() << endl;
6285     }
6286 
6287     // Information of parent about its environment should not be zero.
6288     if (I_P_E == 0) {
6289       cerr << "Error: Information between parent and its enviroment is zero."
6290       << "(cAnalyze::AnalyzeNewInfo)" << endl;
6291       if (exit_on_error) exit(1);
6292     }
6293 
6294     double H_C_E = AnalyzeEntropy(child_genotype, mu);
6295     double I_C_E = child_genotype->GetLength() - H_C_E;
6296     double net_gain = I_C_E - I_P_E;
6297 
6298     // Increased information in child compared to parent
6299     double child_increased_info = IncreasedInfo(child_genotype, parent_genotype, mu);
6300 
6301     // Lost information in child compared to parent
6302     double child_lost_info = IncreasedInfo(parent_genotype, child_genotype, mu);
6303 
6304     // Write information to file ...
6305     newinfo_fp << child_genotype->GetID() << " ";
6306     newinfo_fp << parent_genotype->GetID() << " ";
6307     newinfo_fp << I_C_E << " ";
6308     newinfo_fp << I_P_E << " ";
6309     newinfo_fp << net_gain << " ";
6310     newinfo_fp << child_increased_info << " ";
6311     newinfo_fp << child_lost_info << " ";
6312     newinfo_fp << child_increased_info - child_lost_info << endl;
6313 
6314     parent_genotype = child_genotype;
6315     I_P_E = I_C_E;
6316   }
6317 
6318   m_world->GetDataFileManager().Remove(newinfo_fn);
6319   return;
6320 }
6321 
6322 
6323 
WriteClone(cString cur_string)6324 void cAnalyze::WriteClone(cString cur_string)
6325 {
6326   // Load in the variables...
6327   cString filename("clone.dat");
6328   int num_cells = -1;
6329   if (cur_string.GetSize() != 0) filename = cur_string.PopWord();
6330   if (cur_string.GetSize() != 0) num_cells = cur_string.PopWord().AsInt();
6331 
6332 
6333   ofstream& fp = m_world->GetDataFileOFStream(filename);
6334 
6335   // Start up again at update zero...
6336   fp << "0 ";
6337 
6338   // Setup the archive sizes of lists to all be zero.
6339   fp << MAX_GENOME_LENGTH << " ";
6340   for (int i = 0; i < MAX_GENOME_LENGTH; i++) {
6341     fp << "0 ";
6342   }
6343 
6344   // Save the individual genotypes
6345   fp << batch[cur_batch].List().GetSize() << " ";
6346 
6347   int org_count = 0;
6348   tListIterator<cAnalyzeGenotype> batch_it(batch[cur_batch].List());
6349   cAnalyzeGenotype * genotype = NULL;
6350   while ((genotype = batch_it.Next()) != NULL) {
6351     org_count += genotype->GetNumCPUs();
6352     const int length = genotype->GetLength();
6353     const Sequence& genome = genotype->GetGenome().GetSequence();
6354 
6355     fp << genotype->GetID() << " "
6356       << length << " ";
6357 
6358     for (int i = 0; i < length; i++) {
6359       fp << genome[i].GetOp() << " ";
6360     }
6361   }
6362 
6363   // Write out the current state of the grid.
6364 
6365   if (num_cells == 0) num_cells = org_count;
6366   fp << num_cells << " ";
6367 
6368   batch_it.Reset();
6369   while ((genotype = batch_it.Next()) != NULL) {
6370     for (int i = 0; i < genotype->GetNumCPUs(); i++) {
6371       fp << genotype->GetID() << " ";
6372     }
6373   }
6374 
6375   // Fill out the remainder of the grid with -1
6376   for (int i = org_count; i < num_cells; i++) {
6377     fp << "-1 ";
6378   }
6379 }
6380 
6381 
WriteInjectEvents(cString cur_string)6382 void cAnalyze::WriteInjectEvents(cString cur_string)
6383 {
6384   // Load in the variables...
6385   cString filename("events_inj.cfg");
6386   int start_cell = 0;
6387   int lineage = 0;
6388   if (cur_string.GetSize() != 0) filename = cur_string.PopWord();
6389   if (cur_string.GetSize() != 0) start_cell = cur_string.PopWord().AsInt();
6390   if (cur_string.GetSize() != 0) lineage = cur_string.PopWord().AsInt();
6391 
6392   ofstream& fp = m_world->GetDataFileOFStream(filename);
6393 
6394   int org_count = 0;
6395   tListIterator<cAnalyzeGenotype> batch_it(batch[cur_batch].List());
6396   cAnalyzeGenotype * genotype = NULL;
6397   while ((genotype = batch_it.Next()) != NULL) {
6398     const int cur_count = genotype->GetNumCPUs();
6399     org_count += cur_count;
6400     const Sequence& genome = genotype->GetGenome().GetSequence();
6401 
6402     fp << "u 0 InjectSequence "
6403       << genome.AsString() << " "
6404       << start_cell << " "
6405       << start_cell + cur_count << " "
6406       << genotype->GetMerit() << " "
6407       << lineage << " "
6408       << endl;
6409     start_cell += cur_count;
6410   }
6411 }
6412 
6413 
WriteCompetition(cString cur_string)6414 void cAnalyze::WriteCompetition(cString cur_string)
6415 {
6416   cout << "Writing Competition events..." << endl;
6417 
6418   // Load in the variables...
6419   int join_UD = 0;
6420   double start_merit = 50000;
6421   cString filename("events_comp.cfg");
6422   int batch_A = cur_batch - 1;
6423   int batch_B = cur_batch;
6424   int grid_side = -1;
6425   int lineage = 0;
6426 
6427   // Make sure we have reasonable default batches.
6428   if (cur_batch == 0) { batch_A = 0; batch_B = 1; }
6429 
6430   if (cur_string.GetSize() != 0) join_UD = cur_string.PopWord().AsInt();
6431   if (cur_string.GetSize() != 0) start_merit = cur_string.PopWord().AsDouble();
6432   if (cur_string.GetSize() != 0) filename = cur_string.PopWord();
6433   if (cur_string.GetSize() != 0) batch_A = cur_string.PopWord().AsInt();
6434   if (cur_string.GetSize() != 0) batch_B = cur_string.PopWord().AsInt();
6435   if (cur_string.GetSize() != 0) grid_side = cur_string.PopWord().AsInt();
6436   if (cur_string.GetSize() != 0) lineage = cur_string.PopWord().AsInt();
6437 
6438   // Check inputs...
6439   if (join_UD < 0) join_UD = 0;
6440   if (batch_A < 0 || batch_B < 0) {
6441     cerr << "Error: Batch IDs must be positive!" << endl;
6442     return;
6443   }
6444 
6445   ofstream& fp = m_world->GetDataFileOFStream(filename);
6446 
6447   // Count the number of organisms in each batch...
6448   cAnalyzeGenotype * genotype = NULL;
6449 
6450   int org_count_A = 0;
6451   tListIterator<cAnalyzeGenotype> batchA_it(batch[batch_A].List());
6452   while ((genotype = batchA_it.Next()) != NULL) {
6453     org_count_A += genotype->GetNumCPUs();
6454   }
6455 
6456   int org_count_B = 0;
6457   tListIterator<cAnalyzeGenotype> batchB_it(batch[batch_B].List());
6458   while ((genotype = batchB_it.Next()) != NULL) {
6459     org_count_B += genotype->GetNumCPUs();
6460   }
6461 
6462   int max_count = Max(org_count_A, org_count_B);
6463   if (max_count > 10000) {
6464     cout << "Warning: more than 10,000 organisms in sub-population!" << endl;
6465   }
6466 
6467   if (grid_side <= 0) {
6468     for (grid_side = 5; grid_side < 100; grid_side += 5) {
6469       if (grid_side * grid_side >= max_count) break;
6470     }
6471     if (m_world->GetVerbosity() >= VERBOSE_ON) {
6472       cout << "...assuming population size "
6473       << grid_side << "x" << grid_side << "." << endl;
6474     }
6475   }
6476 
6477 
6478   int pop_size = grid_side * grid_side;
6479 
6480   int inject_pos = 0;
6481   while ((genotype = batchA_it.Next()) != NULL) {
6482     const int cur_count = genotype->GetNumCPUs();
6483     const Sequence& genome = genotype->GetGenome().GetSequence();
6484     double cur_merit = start_merit;
6485     if (cur_merit < 0) cur_merit = genotype->GetMerit();
6486     fp << "u 0 InjectSequence "
6487       << genome.AsString() << " "
6488       << inject_pos << " "
6489       << inject_pos + cur_count << " "
6490       << cur_merit << " "
6491       << lineage << " "
6492       << endl;
6493     inject_pos += cur_count;
6494   }
6495 
6496   inject_pos = pop_size;
6497   while ((genotype = batchB_it.Next()) != NULL) {
6498     const int cur_count = genotype->GetNumCPUs();
6499     const Sequence& genome = genotype->GetGenome().GetSequence();
6500     double cur_merit = start_merit;
6501     if (cur_merit < 0) cur_merit = genotype->GetMerit();
6502     fp << "u 0 InjectSequence "
6503       << genome.AsString() << " "
6504       << inject_pos << " "
6505       << inject_pos + cur_count << " "
6506       << cur_merit << " "
6507       << lineage+1 << " "
6508       << endl;
6509     inject_pos += cur_count;
6510   }
6511 
6512   fp << "u 0 SeverGridRow" << grid_side << endl;
6513   fp << "u " << join_UD << " JoinGridRow " << grid_side << endl;
6514 }
6515 
6516 
6517 // Analyze the mutations along an aligned lineage.
6518 
AnalyzeMuts(cString cur_string)6519 void cAnalyze::AnalyzeMuts(cString cur_string)
6520 {
6521   cout << "Analyzing Mutations" << endl;
6522 
6523   // Make sure we have everything we need.
6524   if (batch[cur_batch].IsAligned() == false) {
6525     cout << "  Error: sequences not aligned." << endl;
6526     return;
6527   }
6528 
6529   // Setup variables...
6530   cString filename("analyze_muts.dat");
6531   bool all_combos = false;
6532   if (cur_string.GetSize() != 0) filename = cur_string.PopWord();
6533   if (cur_string.GetSize() != 0) all_combos = cur_string.PopWord().AsInt();
6534 
6535   tListPlus<cAnalyzeGenotype> & gen_list = batch[cur_batch].List();
6536   tListIterator<cAnalyzeGenotype> batch_it(batch[cur_batch].List());
6537 
6538   const int num_sequences = gen_list.GetSize();
6539   const int sequence_length =
6540     gen_list.GetFirst()->GetAlignedSequence().GetSize();
6541   cString * sequences = new cString[num_sequences];
6542   int * mut_count = new int[sequence_length];
6543   for (int i = 0; i < sequence_length; i++) mut_count[i] = 0;
6544 
6545   // Load in the sequences
6546   batch_it.Reset();
6547   int count = 0;
6548   while (batch_it.Next() != NULL) {
6549     sequences[count] = batch_it.Get()->GetAlignedSequence();
6550     count++;
6551   }
6552 
6553   // Count the number of changes at each site...
6554   for (int i = 1; i < num_sequences; i++) {       // For each pair...
6555     cString & seq1 = sequences[i-1];
6556     cString & seq2 = sequences[i];
6557     for (int j = 0; j < sequence_length; j++) {   // For each site...
6558       if (seq1[j] != seq2[j]) mut_count[j]++;
6559     }
6560   }
6561 
6562   // Grab the two strings we're actively going to be working with.
6563   cString & first_seq = sequences[0];
6564   cString & last_seq = sequences[num_sequences - 1];
6565 
6566   // Print out the header...
6567   ofstream& fp = m_world->GetDataFileOFStream(filename);
6568   fp << "# " << sequences[0] << endl;
6569   fp << "# " << sequences[num_sequences - 1] << endl;
6570   fp << "# ";
6571   for (int i = 0; i < sequence_length; i++) {
6572     if (mut_count[i] == 0) fp << " ";
6573     else if (mut_count[i] > 9) fp << "+";
6574     else fp << mut_count[i];
6575   }
6576   fp << endl;
6577   fp << "# ";
6578   for (int i = 0; i < sequence_length; i++) {
6579     if (first_seq[i] == last_seq[i]) fp << " ";
6580     else fp << "^";
6581   }
6582   fp << endl << endl;
6583 
6584   // Count the number of diffs between the two strings we're interested in.
6585   const int total_diffs = cStringUtil::Distance(first_seq, last_seq);
6586   if (m_world->GetVerbosity() >= VERBOSE_ON) cout << "  " << total_diffs << " mutations being tested." << endl;
6587 
6588   // Locate each difference.
6589   int * mut_positions = new int[total_diffs];
6590   int cur_mut = 0;
6591   for (int i = 0; i < first_seq.GetSize(); i++) {
6592     if (first_seq[i] != last_seq[i]) {
6593       mut_positions[cur_mut] = i;
6594       cur_mut++;
6595     }
6596   }
6597 
6598   // The number of mutations we need to deal with will tell us how much
6599   // we can attempt to do. (@CAO should be able to overide defaults)
6600   bool scan_combos = true;  // Scan all possible combos of mutations?
6601   bool detail_muts = true;  // Collect detailed info on all mutations?
6602   bool print_all = true;    // Print everything we collect without digestion?
6603   if (total_diffs > 30) scan_combos = false;
6604   if (total_diffs > 20) detail_muts = false;
6605   if (total_diffs > 10) print_all = false;
6606 
6607   // Start moving through the difference combinations...
6608   if (scan_combos) {
6609     const int total_combos = 1 << total_diffs;
6610     cout << "  Scanning through " << total_combos << " combos." << endl;
6611 
6612     double * total_fitness = new double[total_diffs + 1];
6613     double * total_sqr_fitness = new double[total_diffs + 1];
6614     double * max_fitness = new double[total_diffs + 1];
6615     cString * max_sequence = new cString[total_diffs + 1];
6616     int * test_count = new int[total_diffs + 1];
6617     for (int i = 0; i <= total_diffs; i++) {
6618       total_fitness[i] = 0.0;
6619       total_sqr_fitness[i] = 0.0;
6620       max_fitness[i] = 0.0;
6621       test_count[i] = 0;
6622     }
6623 
6624     cTestCPU* testcpu = m_world->GetHardwareManager().CreateTestCPU(m_ctx);
6625 
6626     // Loop through all of the combos...
6627     const int combo_step = total_combos / 79;
6628     for (int combo_id = 0; combo_id < total_combos; combo_id++) {
6629       if (combo_id % combo_step == 0) {
6630         cout << '.';
6631         cout.flush();
6632       }
6633       // Start at the first sequence and add needed changes...
6634       cString test_sequence = first_seq;
6635       int diff_count = 0;
6636       for (int mut_id = 0; mut_id < total_diffs; mut_id++) {
6637         if ((combo_id >> mut_id) & 1) {
6638           const int cur_pos = mut_positions[mut_id];
6639           test_sequence[cur_pos] = static_cast<const char*>(last_seq)[cur_pos];
6640           diff_count++;
6641         }
6642       }
6643 
6644       // Determine the fitness of the current sequence...
6645       const cInstSet& is = m_world->GetHardwareManager().GetDefaultInstSet();
6646       Genome test_genome(is.GetHardwareType(), is.GetInstSetName(), Sequence(test_sequence));
6647       cCPUTestInfo test_info;
6648       testcpu->TestGenome(m_ctx, test_info, test_genome);
6649       const double fitness = test_info.GetGenotypeFitness();
6650 
6651       //cAnalyzeGenotype test_genotype(test_sequence);
6652       //test_genotype.Recalculate(m_ctx, testcpu);
6653       //const double fitness = test_genotype.GetFitness();
6654 
6655       total_fitness[diff_count] += fitness;
6656       total_sqr_fitness[diff_count] += fitness * fitness;
6657       if (fitness > max_fitness[diff_count]) {
6658         max_fitness[diff_count] = fitness;
6659         max_sequence[diff_count] = test_sequence;
6660         //  	cout << endl
6661         //  	     << max_sequence[diff_count] << " "
6662         //  	     << test_info.GetGenotypeMerit() << " "
6663         //  	     << fitness << " "
6664         //  	     << combo_id << endl;
6665       }
6666       test_count[diff_count]++;
6667     }
6668 
6669     // Output the results...
6670 
6671     for (int i = 0; i <= total_diffs; i++) {
6672       const cInstSet& is = m_world->GetHardwareManager().GetDefaultInstSet();
6673       Genome max_genome(is.GetHardwareType(), is.GetInstSetName(), Sequence(max_sequence[i]));
6674       cAnalyzeGenotype max_genotype(m_world, max_genome);
6675       max_genotype.Recalculate(m_ctx);
6676       fp << i                                         << " "  //  1
6677         << test_count[i]                             << " "  //  2
6678         << total_fitness[i] / (double) test_count[i] << " "  //  3
6679         << max_fitness[i]                            << " "  //  4
6680         << max_genotype.GetMerit()                   << " "  //  5
6681         << max_genotype.GetGestTime()                << " "  //  6
6682         << max_genotype.GetLength()                  << " "  //  7
6683         << max_genotype.GetCopyLength()              << " "  //  8
6684         << max_genotype.GetExeLength()               << " "; //  9
6685       max_genotype.PrintTasks(fp, 3,12);
6686       fp << max_sequence[i] << endl;
6687     }
6688 
6689     // Cleanup
6690     delete [] total_fitness;
6691     delete [] total_sqr_fitness;
6692     delete [] max_fitness;
6693     delete [] max_sequence;
6694     delete [] test_count;
6695 
6696     delete testcpu;
6697   }
6698   // If we can't scan through all combos, give wanring.
6699   else {
6700     cerr << "  Warning: too many mutations (" << total_diffs
6701     << ") to scan through combos." << endl;
6702   }
6703 
6704 
6705   // Cleanup...
6706   delete [] sequences;
6707   delete [] mut_count;
6708   delete [] mut_positions;
6709 }
6710 
6711 
6712 // Analyze the frequency that each instruction appears in the batch, and
6713 // make note of those that appear more or less often than expected.
6714 
AnalyzeInstructions(cString cur_string)6715 void cAnalyze::AnalyzeInstructions(cString cur_string)
6716 {
6717   if (m_world->GetVerbosity() >= VERBOSE_ON) {
6718     cout << "Analyzing Instructions in batch " << cur_batch << endl;
6719   }
6720   else cout << "Analyzing Instructions..." << endl;
6721 
6722   // Load in the variables...
6723   cString filename("inst_analyze.dat");
6724   if (cur_string.GetSize() != 0) filename = cur_string.PopWord();
6725   cString isname = m_world->GetHardwareManager().GetDefaultInstSet().GetInstSetName();
6726   if (cur_string.GetSize() != 0) isname = cur_string.PopWord();
6727   const cInstSet& inst_set = m_world->GetHardwareManager().GetInstSet(isname);
6728   const int num_insts = inst_set.GetSize();
6729 
6730   // Setup the file...
6731   ofstream& fp = m_world->GetDataFileOFStream(filename);
6732 
6733   // Determine the file type...
6734   int file_type = FILE_TYPE_TEXT;
6735   while (filename.Find('.') != -1) filename.Pop('.');
6736   if (filename == "html") file_type = FILE_TYPE_HTML;
6737 
6738   // If we're in HTML mode, setup the header...
6739   if (file_type == FILE_TYPE_HTML) {
6740     // Document header...
6741     fp << "<html>" << endl
6742     << "<body bgcolor=\"#FFFFFF\"" << endl
6743     << " text=\"#000000\"" << endl
6744     << " link=\"#0000AA\"" << endl
6745     << " alink=\"#0000FF\"" << endl
6746     << " vlink=\"#000044\">" << endl
6747     << endl
6748     << "<h1 align=center>Instruction Chart: "
6749     << batch[cur_batch].Name() << endl
6750     << "<br><br>" << endl
6751     << endl;
6752 
6753     // Instruction key...
6754     const int num_cols = 6;
6755     const int num_rows = ((num_insts - 1) / num_cols) + 1;
6756     fp << "<table border=2 cellpadding=3>" << endl
6757       << "<tr bgcolor=\"#AAAAFF\"><th colspan=6>Instruction Set Legend</tr>"
6758       << endl;
6759     for (int i = 0; i < num_rows; i++) {
6760       fp << "<tr>";
6761       for (int j = 0; j < num_cols; j++) {
6762         const int inst_id = i + j * num_rows;
6763         if (inst_id < num_insts) {
6764           cInstruction cur_inst(inst_id);
6765           fp << "<td><b>" << cur_inst.GetSymbol() << "</b> : "
6766             << inst_set.GetName(inst_id) << " ";
6767         }
6768         else {
6769           fp << "<td>&nbsp; ";
6770         }
6771       }
6772       fp << "</tr>" << endl;
6773     }
6774     fp << "</table>" << endl
6775       << "<br><br><br>" << endl;
6776 
6777     // Main table header...
6778     fp << "<center>" << endl
6779       << "<table border=1 cellpadding=2>" << endl
6780       << "<tr><th bgcolor=\"#AAAAFF\">Run # <th bgcolor=\"#AAAAFF\">Length"
6781       << endl;
6782     for (int i = 0; i < num_insts; i++) {
6783       cInstruction cur_inst(i);
6784       fp << "<th bgcolor=\"#AAAAFF\">" << cur_inst.GetSymbol() << " ";
6785     }
6786     fp << "</tr>" << endl;
6787   }
6788   else { // if (file_type == FILE_TYPE_TEXT) {
6789     fp << "#RUN_NAME  LENGTH  ";
6790     for (int i = 0; i < num_insts; i++) {
6791       cInstruction cur_inst(i);
6792       fp << cur_inst.GetSymbol() << ":" << inst_set.GetName(i) << " ";
6793     }
6794     fp << endl;
6795   }
6796 
6797   // Figure out how often we expect each instruction to appear...
6798   const double exp_freq = 1.0 / (double) num_insts;
6799   const double min_freq = exp_freq * 0.5;
6800   const double max_freq = exp_freq * 2.0;
6801 
6802   double total_length = 0.0;
6803   tArray<double> total_freq(num_insts);
6804   for (int i = 0; i < num_insts; i++) total_freq[i] = 0.0;
6805 
6806   // Loop through all of the genotypes in this batch...
6807   tListIterator<cAnalyzeGenotype> batch_it(batch[cur_batch].List());
6808   cAnalyzeGenotype * genotype = NULL;
6809   while ((genotype = batch_it.Next()) != NULL) {
6810     if (genotype->GetGenome().GetInstSet() != isname) continue;
6811 
6812     // Setup for counting...
6813     tArray<int> inst_bin(num_insts);
6814     for (int i = 0; i < num_insts; i++) inst_bin[i] = 0;
6815 
6816     // Count it up!
6817     const int genome_size = genotype->GetLength();
6818     for (int i = 0; i < genome_size; i++) {
6819       const int inst_id = genotype->GetGenome().GetSequence()[i].GetOp();
6820       inst_bin[inst_id]++;
6821     }
6822 
6823     // Print it out...
6824     if (file_type == FILE_TYPE_HTML) fp << "<tr><th>";
6825     fp << genotype->GetName() << " ";
6826     if (file_type == FILE_TYPE_HTML) fp << "<td align=center>";
6827     total_length += genome_size;
6828     fp << genome_size << " ";
6829     for (int i = 0; i < num_insts; i++) {
6830       const double inst_freq = ((double) inst_bin[i]) / (double) genome_size;
6831       total_freq[i] += inst_freq;
6832       if (file_type == FILE_TYPE_HTML) {
6833         if (inst_freq == 0.0) fp << "<td bgcolor=\"FFAAAA\">";
6834         else if (inst_freq < min_freq) fp << "<td bgcolor=\"FFFFAA\">";
6835         else if (inst_freq < max_freq) fp << "<td bgcolor=\"AAAAFF\">";
6836         else fp << "<td bgcolor=\"AAFFAA\">";
6837       }
6838       fp << cStringUtil::Stringf("%04.3f", inst_freq) << " ";
6839     }
6840     if (file_type == FILE_TYPE_HTML) fp << "</tr>";
6841     fp << endl;
6842   }
6843 
6844   if (file_type == FILE_TYPE_HTML) {
6845     int num_genomes = batch[cur_batch].List().GetSize();
6846     fp << "<tr><th>Average <th>" << total_length / num_genomes << " ";
6847     for (int i = 0; i < num_insts; i++) {
6848       double inst_freq = total_freq[i] / num_genomes;
6849       if (inst_freq == 0.0) fp << "<td bgcolor=\"#" << m_world->GetConfig().COLOR_MUT_LETHAL.Get() << "\">";
6850       else if (inst_freq < min_freq) fp << "<td bgcolor=\"#" << m_world->GetConfig().COLOR_MUT_NEG.Get() << "\">";
6851       else if (inst_freq < max_freq) fp << "<td bgcolor=\"#" << m_world->GetConfig().COLOR_MUT_NEUT.Get() << "\">";
6852       else fp << "<td bgcolor=\"#" << m_world->GetConfig().COLOR_MUT_POS.Get() << "\">";
6853       fp << cStringUtil::Stringf("%04.3f", inst_freq) << " ";
6854     }
6855     fp << "</tr>" << endl
6856       << "</table></center>" << endl;
6857   }
6858   }
6859 
AnalyzeInstPop(cString cur_string)6860 void cAnalyze::AnalyzeInstPop(cString cur_string)
6861 {
6862   if (m_world->GetVerbosity() >= VERBOSE_ON) {
6863     cout << "Analyzing Instructions in batch " << cur_batch << endl;
6864   }
6865   else cout << "Analyzeing Instructions..." << endl;
6866 
6867   // Load in the variables...
6868   cString filename("inst_analyze.dat");
6869   if (cur_string.GetSize() != 0) filename = cur_string.PopWord();
6870   cString isname = m_world->GetHardwareManager().GetDefaultInstSet().GetInstSetName();
6871   if (cur_string.GetSize() != 0) isname = cur_string.PopWord();
6872   const cInstSet& inst_set = m_world->GetHardwareManager().GetInstSet(isname);
6873   const int num_insts = inst_set.GetSize();
6874 
6875   // Setup the file...
6876   ofstream& fp = m_world->GetDataFileOFStream(filename);
6877 
6878   for (int i = 0; i < num_insts; i++) {
6879     cInstruction cur_inst(i);
6880     fp << cur_inst.GetSymbol() << ":" << inst_set.GetName(i) << " ";
6881   }
6882   fp << endl;
6883 
6884   double total_length = 0.0;
6885   tArray<double> total_freq(num_insts);
6886   for (int i = 0; i < num_insts; i++) total_freq[i] = 0.0;
6887   int num_orgs = 0;
6888 
6889   // Loop through all of the genotypes in this batch...
6890   tListIterator<cAnalyzeGenotype> batch_it(batch[cur_batch].List());
6891   cAnalyzeGenotype * genotype = NULL;
6892   while ((genotype = batch_it.Next()) != NULL) {
6893     if (genotype->GetGenome().GetInstSet() != isname) continue;
6894 
6895     num_orgs++;
6896 
6897     // Setup for counting...
6898     tArray<int> inst_bin(num_insts);
6899     for (int i = 0; i < num_insts; i++) inst_bin[i] = 0;
6900 
6901     // Count it up!
6902     const int genome_size = genotype->GetLength();
6903     for (int i = 0; i < genome_size; i++) {
6904       const int inst_id = genotype->GetGenome().GetSequence()[i].GetOp();
6905       inst_bin[inst_id]++;
6906     }
6907     total_length += genome_size;
6908     for (int i = 0; i < num_insts; i++) {
6909       const double inst_freq = ((double) inst_bin[i]) / (double) genome_size;
6910       total_freq[i] += inst_freq;
6911     }
6912   }
6913   // Print it out...
6914   //    fp << total_length/num_orgs  << " ";
6915   for (int i = 0; i < num_insts; i++) {
6916     fp << cStringUtil::Stringf("%04.3f", total_freq[i]/num_orgs) << " ";
6917   }
6918   fp << endl;
6919 
6920 }
6921 
AnalyzeBranching(cString cur_string)6922 void cAnalyze::AnalyzeBranching(cString cur_string)
6923 {
6924   if (m_world->GetVerbosity() >= VERBOSE_ON) {
6925     cout << "Analyzing branching patterns in batch " << cur_batch << endl;
6926   }
6927   else cout << "Analyzeing Branches..." << endl;
6928 
6929   // Load in the variables...
6930   cString filename("branch_analyze.dat");
6931   if (cur_string.GetSize() != 0) filename = cur_string.PopWord();
6932 
6933   // Setup the file...
6934   //ofstream& fp = m_world->GetDataFileOFStream(filename);
6935 
6936   // UNFINISHED!
6937   // const int num_insts = inst_set.GetSize();
6938 }
6939 
AnalyzeMutationTraceback(cString cur_string)6940 void cAnalyze::AnalyzeMutationTraceback(cString cur_string)
6941 {
6942   if (m_world->GetVerbosity() >= VERBOSE_ON) {
6943     cout << "Analyzing mutation traceback in batch " << cur_batch << endl;
6944   }
6945   else cout << "Analyzing mutation traceback..." << endl;
6946 
6947   // This works best on lineages, so warn if we don't have one.
6948   if (batch[cur_batch].IsLineage() == false && m_world->GetVerbosity() >= VERBOSE_ON) {
6949     cerr << "  Warning: trying to traceback mutations outside of lineage."
6950     << endl;
6951   }
6952 
6953   if (batch[cur_batch].List().GetSize() == 0) {
6954     cerr << "Error: Trying to traceback mutations with no genotypes in batch."
6955     << endl;
6956     return;
6957   }
6958 
6959   // Make sure all genotypes are the same length.
6960   int size = -1;
6961   tListIterator<cAnalyzeGenotype> batch_it(batch[cur_batch].List());
6962   cAnalyzeGenotype * genotype = NULL;
6963   while ((genotype = batch_it.Next()) != NULL) {
6964     if (size == -1) size = genotype->GetLength();
6965     if (size != genotype->GetLength()) {
6966       cerr << "  Error: Trying to traceback mutations in genotypes of differing lengths." << endl;
6967       cerr << "  Aborting." << endl;
6968       return;
6969     }
6970   }
6971 
6972   // Setup variables...
6973   cString filename("analyze_traceback.dat");
6974   if (cur_string.GetSize() != 0) filename = cur_string.PopWord();
6975 
6976   // Setup a genome to store the previous values before mutations.
6977   tArray<int> prev_inst(size);
6978   prev_inst.SetAll(-1);  // -1 indicates never changed.
6979 
6980   // Open the output file...
6981   ofstream& fp = m_world->GetDataFileOFStream(filename);
6982 
6983   cTestCPU* testcpu = m_world->GetHardwareManager().CreateTestCPU(m_ctx);
6984 
6985   // Loop through all of the genotypes again, testing mutation reversions.
6986   cAnalyzeGenotype * prev_genotype = batch_it.Next();
6987   while ((genotype = batch_it.Next()) != NULL) {
6988     continue;
6989     // Check to see if any sites have changed...
6990     for (int i = 0; i < size; i++) {
6991       if (genotype->GetGenome().GetSequence()[i] != prev_genotype->GetGenome().GetSequence()[i]) {
6992         prev_inst[i] = prev_genotype->GetGenome().GetSequence()[i].GetOp();
6993       }
6994     }
6995 
6996     // Next, determine the fraction of mutations that are currently adaptive.
6997     int num_beneficial = 0;
6998     int num_neutral = 0;
6999     int num_detrimental = 0;
7000     int num_static = 0;      // Sites that were never mutated.
7001 
7002     Genome test_genome = genotype->GetGenome();
7003     cCPUTestInfo test_info;
7004     testcpu->TestGenome(m_ctx, test_info, test_genome);
7005     const double base_fitness = test_info.GetGenotypeFitness();
7006 
7007     for (int i = 0; i < size; i++) {
7008       if (prev_inst[i] == -1) num_static++;
7009       else {
7010         test_genome.GetSequence()[i].SetOp(prev_inst[i]);
7011         testcpu->TestGenome(m_ctx, test_info, test_genome);
7012         const double cur_fitness = test_info.GetGenotypeFitness();
7013         if (cur_fitness > base_fitness) num_detrimental++;
7014         else if (cur_fitness < base_fitness) num_beneficial++;
7015         else num_neutral++;
7016         test_genome.GetSequence()[i] = genotype->GetGenome().GetSequence()[i];
7017       }
7018     }
7019 
7020     fp << genotype->GetDepth() << " "
7021       << num_beneficial << " "
7022       << num_neutral << " "
7023       << num_detrimental << " "
7024       << num_static << " "
7025       << endl;
7026 
7027     prev_genotype = genotype;
7028   }
7029 
7030   delete testcpu;
7031 }
7032 
AnalyzeComplexity(cString cur_string)7033 void cAnalyze::AnalyzeComplexity(cString cur_string)
7034 {
7035   cout << "Analyzing genome complexity..." << endl;
7036 
7037   // Load in the variables...
7038   // This command requires at least on arguement
7039   int words = cur_string.CountNumWords();
7040   if(words < 1) {
7041     cout << "Error: AnalyzeComplexity has no parameters, skipping." << endl;
7042     return;
7043   }
7044 
7045   // Get the mutation rate arguement
7046   double mut_rate = cur_string.PopWord().AsDouble();
7047 
7048   // Create the directory using the string given as the second arguement
7049   cString dir = cur_string.PopWord();
7050   cString defaultDirectory = "complexity/";
7051   cString directory = PopDirectory(dir, defaultDirectory);
7052 
7053   // Default for usage of resources is false
7054   int useResources = 0;
7055   // resource usage flag is an optional arguement, but is always the 3rd arg
7056   if(words >= 3) {
7057     useResources = cur_string.PopWord().AsInt();
7058     // All non-zero values are considered false (Handled by testcpu->InitResources)
7059   }
7060 
7061   // Batch frequency begins with the first organism, but then skips that
7062   // amount ahead in the batch.  It defaults to 1, so that default analyzes
7063   // all the organisms in the batch.  It is always the 4th arg.
7064   int batchFrequency = 1;
7065   if(words == 4) {
7066     batchFrequency = cur_string.PopWord().AsInt();
7067     if(batchFrequency <= 0) {
7068       batchFrequency = 1;
7069     }
7070   }
7071 
7072   cTestCPU* testcpu = m_world->GetHardwareManager().CreateTestCPU(m_ctx);
7073 
7074   ///////////////////////////////////////////////////////
7075   // Loop through all of the genotypes in this batch...
7076 
7077   tListIterator<cAnalyzeGenotype> batch_it(batch[cur_batch].List());
7078   cAnalyzeGenotype * genotype = NULL;
7079 
7080   cString lineage_filename;
7081   if (batch[cur_batch].IsLineage()) {
7082     lineage_filename.Set("%s%s.complexity.dat", static_cast<const char*>(directory), "lineage");
7083   } else {
7084     lineage_filename.Set("%s%s.complexity.dat", static_cast<const char*>(directory), "nonlineage");
7085   }
7086   ofstream& lineage_fp = m_world->GetDataFileOFStream(lineage_filename);
7087 
7088   while ((genotype = batch_it.Next()) != NULL) {
7089     if (m_world->GetVerbosity() >= VERBOSE_ON) {
7090       cout << "  Analyzing complexity for " << genotype->GetName() << endl;
7091     }
7092 
7093     // Construct this filename...
7094     cString filename;
7095     filename.Set("%s%s.complexity.dat", static_cast<const char*>(directory), static_cast<const char*>(genotype->GetName()));
7096     ofstream& fp = m_world->GetDataFileOFStream(filename);
7097 
7098     lineage_fp << genotype->GetID() << " ";
7099 
7100     int updateBorn = -1;
7101     updateBorn = genotype->GetUpdateBorn();
7102     cCPUTestInfo test_info;
7103     test_info.SetResourceOptions(useResources, m_resources, updateBorn, m_resource_time_spent_offset);
7104 
7105     // Calculate the stats for the genotype we're working with ...
7106     genotype->Recalculate(m_ctx, &test_info);
7107     cout << genotype->GetFitness() << endl;
7108     const int max_line = genotype->GetLength();
7109     const Genome& base_genome = genotype->GetGenome();
7110     const Sequence& base_seq = base_genome.GetSequence();
7111     Genome mod_genome(base_genome);
7112     Sequence& seq = mod_genome.GetSequence();
7113     const int num_insts = m_world->GetHardwareManager().GetInstSet(base_genome.GetInstSet()).GetSize();
7114 
7115     // Loop through all the lines of code, testing all mutations...
7116     tArray<double> test_fitness(num_insts);
7117     tArray<double> prob(num_insts);
7118     for (int line_num = 0; line_num < max_line; line_num++) {
7119       int cur_inst = base_seq[line_num].GetOp();
7120 
7121       // Column 1 ... the original instruction in the genome.
7122       fp << cur_inst << " ";
7123 
7124       // Test fitness of each mutant.
7125       for (int mod_inst = 0; mod_inst < num_insts; mod_inst++) {
7126         seq[line_num].SetOp(mod_inst);
7127         cAnalyzeGenotype test_genotype(m_world, mod_genome);
7128         test_genotype.Recalculate(m_ctx);
7129         test_fitness[mod_inst] = test_genotype.GetFitness();
7130       }
7131 
7132       // Ajust fitness
7133       double cur_inst_fitness = test_fitness[cur_inst];
7134       for (int mod_inst = 0; mod_inst < num_insts; mod_inst++) {
7135         if (test_fitness[mod_inst] > cur_inst_fitness)
7136           test_fitness[mod_inst] = cur_inst_fitness;
7137         test_fitness[mod_inst] = test_fitness[mod_inst] / cur_inst_fitness;
7138       }
7139 
7140       // Calculate probabilities at mut-sel balance
7141       double w_bar = 1;
7142 
7143       // Normalize fitness values, assert if they are all zero
7144       double maxFitness = 0.0;
7145       for(int i=0; i<num_insts; i++) {
7146         if(test_fitness[i] > maxFitness) {
7147           maxFitness = test_fitness[i];
7148         }
7149       }
7150 
7151       if(maxFitness > 0) {
7152         for(int i=0; i<num_insts; i++) {
7153           test_fitness[i] /= maxFitness;
7154         }
7155       } else {
7156         fp << "All zero fitness, ERROR." << endl;
7157         continue;
7158       }
7159 
7160       while(1) {
7161         double sum = 0.0;
7162         for (int mod_inst = 0; mod_inst < num_insts; mod_inst ++) {
7163           prob[mod_inst] = (mut_rate * w_bar) /
7164           ((double)num_insts * (w_bar + test_fitness[mod_inst] * mut_rate - test_fitness[mod_inst]));
7165           sum = sum + prob[mod_inst];
7166         }
7167         if ((sum-1.0)*(sum-1.0) <= 0.0001)
7168           break;
7169         else
7170           w_bar = w_bar - 0.000001;
7171       }
7172       // Write probability
7173       for (int mod_inst = 0; mod_inst < num_insts; mod_inst ++) {
7174         fp << prob[mod_inst] << " ";
7175       }
7176 
7177       // Calculate complexity
7178       double entropy = 0;
7179       for (int i = 0; i < num_insts; i ++) {
7180         entropy += prob[i] * log((double) 1.0/prob[i]) / log ((double) num_insts);
7181       }
7182       double complexity = 1 - entropy;
7183       fp << complexity << endl;
7184 
7185       lineage_fp << complexity << " ";
7186 
7187       // Reset the mod_genome back to the original sequence.
7188       seq[line_num].SetOp(cur_inst);
7189     }
7190 
7191     m_world->GetDataFileManager().Remove(filename);
7192 
7193     lineage_fp << endl;
7194 
7195     // Always grabs the first one
7196     // Skip i-1 times, so that the beginning of the loop will grab the ith one
7197     // where i is the batchFrequency
7198     for(int count=0; genotype != NULL && count < batchFrequency - 1; count++) {
7199       genotype = batch_it.Next();
7200       if(genotype != NULL && m_world->GetVerbosity() >= VERBOSE_ON) {
7201         cout << "Skipping: " << genotype->GetName() << endl;
7202       }
7203     }
7204     if(genotype == NULL) { break; }
7205   }
7206 
7207   m_world->GetDataFileManager().Remove(lineage_filename);
7208 
7209   delete testcpu;
7210 }
7211 
AnalyzeFitnessLandscapeTwoSites(cString cur_string)7212 void cAnalyze::AnalyzeFitnessLandscapeTwoSites(cString cur_string)
7213 {
7214   cout << "Fitness for all instruction combinations at two sites..." << endl;
7215 
7216   /*
7217    * Arguments:
7218    * 1) directory (default: 'fitness_landscape_two_sites/'
7219    * 2) useResources (default: 0 -- no)
7220    * 3) batchFrequency (default: 1 -- all genotypes in batch)
7221    *
7222    */
7223 
7224   // number of arguments provided
7225   int words = cur_string.CountNumWords();
7226   if (m_world->GetVerbosity() >= VERBOSE_ON) {
7227     cout << "  Number of arguments passed: " << words << endl;
7228   }
7229 
7230   //
7231   // argument 1 -- directory
7232   //
7233   cString dir = cur_string.PopWord();
7234   cString defaultDirectory = "fitness_landscape_two_sites/";
7235   cString directory = PopDirectory(dir, defaultDirectory);
7236   if (m_world->GetVerbosity() >= VERBOSE_ON) {
7237     cout << "  - Analysis results to directory: " << directory << endl;
7238   }
7239 
7240   //
7241   // argument 2 -- use resources?
7242   //
7243   // Default for usage of resources is false
7244   int useResources = 0;
7245   if(words >= 2) {
7246     useResources = cur_string.PopWord().AsInt();
7247     // All non-zero values are considered false (Handled by testcpu->InitResources)
7248   }
7249   if (m_world->GetVerbosity() >= VERBOSE_ON) {
7250     cout << "  - Use resorces set to: " << useResources << " (0=false, true other int)" << endl;
7251   }
7252 
7253   //
7254   // argument 3 -- batch frequncy
7255   //   - default batchFrequency=1 (every organism analyzed)
7256   //
7257   int batchFrequency = 1;
7258   if(words >= 3) {
7259     batchFrequency = cur_string.PopWord().AsInt();
7260     if(batchFrequency <= 0) {
7261       batchFrequency = 1;
7262     }
7263   }
7264   if (m_world->GetVerbosity() >= VERBOSE_ON) {
7265     cout << "  - Batch frequency set to: " << batchFrequency << endl;
7266   }
7267 
7268   // test cpu
7269   //cTestCPU* testcpu = m_world->GetHardwareManager().CreateTestCPU();
7270 
7271   // get current batch
7272   tListIterator<cAnalyzeGenotype> batch_it(batch[cur_batch].List());
7273   cAnalyzeGenotype * genotype = NULL;
7274 
7275   // analyze each genotype in the batch
7276   while ((genotype = batch_it.Next()) != NULL) {
7277     if (m_world->GetVerbosity() >= VERBOSE_ON) {
7278       cout << "  Analyzing complexity for " << genotype->GetName() << endl;
7279     }
7280 
7281     int updateBorn = -1;
7282     updateBorn = genotype->GetUpdateBorn();
7283     cCPUTestInfo test_info;
7284     test_info.SetResourceOptions(useResources, m_resources, updateBorn, m_resource_time_spent_offset);
7285 
7286     // Calculate the stats for the genotype we're working with ...
7287     genotype->Recalculate(m_ctx, &test_info);
7288     const int max_line = genotype->GetLength();
7289     const Genome& base_genome = genotype->GetGenome();
7290     const Sequence& base_seq = base_genome.GetSequence();
7291     Genome mod_genome(base_genome);
7292     Sequence& seq = mod_genome.GetSequence();
7293     const int num_insts = m_world->GetHardwareManager().GetInstSet(base_genome.GetInstSet()).GetSize();
7294 
7295     // run throught sites in genome
7296     for (int site1 = 0; site1 < max_line; site1++) {
7297       for (int site2 = site1+1; site2 < max_line; site2++) {
7298 
7299         // Construct filename for this site combination
7300         cString fl_filename;
7301         fl_filename.Set("%s%s_FitLand_sites-%d_and_%d.dat", static_cast<const char*>(directory), static_cast<const char*>(genotype->GetName()), site1, site2);
7302         cDataFile & fit_land_fp = m_world->GetDataFile(fl_filename);
7303         fit_land_fp.WriteComment( "Two-site fitness landscape, all possible instructions" );
7304         fit_land_fp.WriteComment( cStringUtil::Stringf("Site 1: %d Site 2: %d", site1, site2) );
7305         fit_land_fp.WriteComment( "Rows #- instruction, site 1" );
7306         fit_land_fp.WriteComment( "Columns #- instruction, site 2" );
7307         fit_land_fp.WriteTimeStamp();
7308 
7309         // get current instructions at site 1 and site 2
7310         int curr_inst1 = base_seq[site1].GetOp();
7311         int curr_inst2 = base_seq[site2].GetOp();
7312 
7313         // get current fitness
7314         //double curr_fitness = genotype->GetFitness();
7315 
7316         // run through all possible instruction combinations
7317         // at two sites
7318         for (int mod_inst1 = 0; mod_inst1 < num_insts; mod_inst1++) {
7319           for (int mod_inst2 = 0; mod_inst2 < num_insts; mod_inst2++) {
7320             // modify mod_genome at two sites
7321             seq[site1].SetOp(mod_inst1);
7322             seq[site2].SetOp(mod_inst2);
7323             // analyze mod_genome
7324             cAnalyzeGenotype test_genotype(m_world, mod_genome);
7325             test_genotype.Recalculate(m_ctx);
7326             double mod_fitness = test_genotype.GetFitness();
7327 
7328             // write to file
7329             fit_land_fp.Write(mod_fitness, cStringUtil::Stringf("Instruction, site 2: %d ", mod_inst2));
7330           }
7331           fit_land_fp.Endl();
7332         }
7333         // Reset the mod_genome back to the original sequence.
7334         seq[site1].SetOp(curr_inst1);
7335         seq[site2].SetOp(curr_inst2);
7336 
7337         // close file
7338         m_world->GetDataFileManager().Remove(fl_filename);
7339       }
7340     }
7341   }
7342 }
7343 
AnalyzeLineageComplexitySitesN(cString cur_string)7344 void cAnalyze::AnalyzeLineageComplexitySitesN(cString cur_string)
7345 {
7346   /*
7347   Implemented up to n=2, feel free to expand for greater n's
7348   */
7349   cout << "Analyzing genome complexity of a lineage for n sites..." << endl;
7350 
7351   /*
7352    * Arguments:
7353    * 1) N-mutant (default: 2)
7354    * 2) directory
7355    */
7356 
7357   // number of arguments provided
7358   int words = cur_string.CountNumWords();
7359   if (m_world->GetVerbosity() >= VERBOSE_ON) {
7360     cout << "  Number of arguments passed: " << words << endl;
7361   }
7362 
7363   //
7364   // argument 1 -- N-mutant number
7365   //
7366   int n = 2;
7367   if(words < 1) {
7368     // no mutation n-mutant number provided
7369     if (m_world->GetVerbosity() >= VERBOSE_ON) {
7370       cout << "  - No specific n-mutant selected, using default n-mutant with n = " << n << endl;
7371     }
7372   } else {
7373     // n-mutant number provided
7374     n = cur_string.PopWord().AsInt();
7375     if (n < 1.0) {
7376       // find an n-mutant below 1 is trivial
7377       n = 1.0;
7378     }
7379     if (m_world->GetVerbosity() >= VERBOSE_ON) {
7380       cout << "  - n-mutant passed, using n = " << n << endl;
7381     }
7382   }
7383 
7384   //
7385   // argument 2 -- directory
7386   //
7387   cString dir = cur_string.PopWord();
7388   cString defaultDirectory = "complexity_nmutant_lineage/";
7389   cString directory = PopDirectory(dir, defaultDirectory);
7390   if (m_world->GetVerbosity() >= VERBOSE_ON) {
7391     cout << "  - Analysis results to directory: " << directory << endl;
7392   }
7393 
7394   // test cpu
7395   cTestCPU* testcpu = m_world->GetHardwareManager().CreateTestCPU(m_ctx);
7396 
7397 // get current batch
7398   tListIterator<cAnalyzeGenotype> batch_it(batch[cur_batch].List());
7399   cAnalyzeGenotype * genotype = NULL;
7400 
7401     // Construct filename
7402     cString filename_2s;
7403     filename_2s.Set("complexity.dat");
7404     cDataFile & fp_2s = m_world->GetDataFile(filename_2s);
7405     fp_2s.WriteComment( "Lineage Complexity Analysis" );
7406     fp_2s.WriteTimeStamp();
7407 //    m_world->GetDataFileManager().Remove(filename_2s);
7408 
7409 
7410   // analyze each genotype in the batch
7411   while ((genotype = batch_it.Next()) != NULL) {
7412     if (m_world->GetVerbosity() >= VERBOSE_ON) {
7413       cout << "  Analyzing complexity for " << genotype->GetName() << endl;
7414     }
7415 
7416 
7417 
7418     // Calculate the stats for the genotype we're working with ...
7419     const int gen_length = genotype->GetLength();
7420     const Genome& base_genome = genotype->GetGenome();
7421     const double gen_fitness = genotype->GetFitness();
7422     const Sequence& base_seq = base_genome.GetSequence();
7423     Genome mod_genome(base_genome);
7424     Sequence& seq = mod_genome.GetSequence();
7425     const int num_insts = m_world->GetHardwareManager().GetInstSet(base_genome.GetInstSet()).GetSize();
7426 
7427     //Initialize variables needed for complexity calculations
7428     int posneutmut = 0; //number of positive and nuetral mutations
7429     int posmut = 0;
7430 
7431     cout << "The base genome fitness is: " << gen_fitness << endl;
7432 
7433     /*
7434      *
7435      *  ONE SITE CALCULATIONS
7436      *
7437      */
7438 
7439     // run through each gene in genome
7440     if( n ==  1 ) {
7441       for (int gene_num = 0; gene_num < gen_length; gene_num++) {
7442         // get the current instruction at this line/site
7443         int cur_inst = base_seq[gene_num].GetOp();
7444 
7445         // recalculate fitness of each mutant and count the number of positive and neutral mutations
7446         for (int mod_inst = 0; mod_inst < num_insts; mod_inst++) {
7447           //Check to make sure not re-evaluating the the original genome
7448           if (mod_inst != cur_inst) {
7449             //cout << "Mod Inst, Cur Inst: " << mod_inst << " " << cur_inst << endl;
7450             seq[gene_num].SetOp(mod_inst);
7451             cAnalyzeGenotype test_genotype(m_world, mod_genome);
7452             test_genotype.Recalculate(m_ctx);
7453             double mod_fitness = test_genotype.GetFitness();
7454             cout << "Mod Fitness: " << mod_fitness << endl;
7455             if (mod_fitness >= gen_fitness) {
7456               //cout << "Mutant has better fitness" << endl;
7457               posneutmut += 1;
7458             }
7459             if (mod_fitness > gen_fitness) {
7460               posmut +=1;
7461             }
7462           }
7463         }
7464         seq[gene_num].SetOp(cur_inst);
7465       }
7466     }
7467     /*
7468      *
7469      *  TWO SITE CALCULATIONS
7470      *
7471      */
7472 
7473     // run through genes in genome
7474     // - only consider lin_num2 > lin_num1 so that we don't consider
7475     // Mut Info [1][45] and Mut Info [45][1]
7476     if( n == 2) {
7477       for (int gene_num1 = 0; gene_num1 < (gen_length-1); gene_num1++) {
7478         for (int gene_num2 = gene_num1+1; gene_num2 < gen_length; gene_num2++) {
7479           //cout << "line #1, #2: " << gene_num1 << ", " << gene_num2 << endl;
7480 
7481           // get current instructions at site 1 and site 2
7482           int cur_inst1 = base_seq[gene_num1].GetOp();
7483           int cur_inst2 = base_seq[gene_num2].GetOp();
7484 
7485           // initialize running fitness total
7486           double fitness_total_2s = 0.0;
7487 
7488           // run through all possible instructions
7489           for (int mod_inst1 = 0; mod_inst1 < num_insts; mod_inst1++) {
7490             for (int mod_inst2 = 0; mod_inst2 < num_insts; mod_inst2++) {
7491               // modify mod_genome at two sites
7492               seq[gene_num1].SetOp(mod_inst1);
7493               seq[gene_num2].SetOp(mod_inst2);
7494               // analyze mod_genome
7495               cAnalyzeGenotype test_genotype(m_world, mod_genome);
7496               test_genotype.Recalculate(m_ctx);
7497               double mod_fitness = test_genotype.GetFitness();
7498               //cout << "Mutant Fitness: " << mod_fitness << endl;
7499               if (mod_fitness >= gen_fitness) {
7500                 posneutmut += 1;
7501               }
7502               if (mod_fitness > gen_fitness) {
7503                 posmut += 1;
7504               }
7505             }
7506           }
7507           seq[gene_num1].SetOp(cur_inst1);
7508           seq[gene_num2].SetOp(cur_inst2);
7509         }
7510       }
7511     }
7512 
7513     if ( n >= 3) {
7514         //TODO
7515     }
7516 
7517     //cout << "Genome Length: " << gen_length << endl;
7518     //cout << "Postive & Neutral Mutations: " << posneutmut << endl;
7519 
7520     // calculate complexity
7521     double denominator = 0.0;
7522     if (n == 1) {
7523       denominator = (num_insts*gen_length);
7524     }
7525     else if (n == 2) {
7526         denominator = (pow((double)num_insts,(double)2)*(gen_length)*(gen_length-1)*(0.5));
7527     }
7528 
7529     double wn = ( posneutmut / denominator);
7530 
7531     //cout << "Denom: " << denominator << " wn: " << wn << endl;
7532 
7533     double entropy = 0.0;
7534     double totalcombo = pow((double)num_insts, gen_length);
7535     //cout << "Total Combinations: " << totalcombo << endl;
7536     //cout << "Log of wn and totalcombos: " << log(wn * totalcombo ) << endl;
7537     if (posneutmut > 0) {
7538         entropy = (log(wn * totalcombo ) / log(double(num_insts)));
7539     }
7540 
7541     //cout << "Entropy: " << entropy << endl;
7542 
7543     double complexity = (gen_length - entropy);
7544     cout << "Complexity: " << complexity << endl;
7545 
7546     //write to file
7547 
7548     fp_2s.Write(genotype->GetID(),           "Genotype ID");
7549     fp_2s.Write(genotype->GetFitness(),      "Genotype Fitness");
7550     fp_2s.Write(gen_length,                  "Genotype Length");
7551     fp_2s.Write(posmut,                      "Positive Mutations");
7552     fp_2s.Write(posneutmut,                  "Positive and Neutral Mutations");
7553     fp_2s.Write(entropy,                     "Entropy");
7554     fp_2s.Write(complexity,                  "Complexity");
7555     fp_2s.Endl();
7556 
7557   }
7558   m_world->GetDataFileManager().Remove(filename_2s);
7559 
7560   delete testcpu;
7561 }
7562 
AnalyzeComplexityTwoSites(cString cur_string)7563 void cAnalyze::AnalyzeComplexityTwoSites(cString cur_string)
7564 {
7565   cout << "Analyzing genome complexity (one and two sites)..." << endl;
7566 
7567   /*
7568    * Arguments:
7569    * 1) mutation rate (default: 0.0 - selection only)
7570    * 2) directory for results (default: 'complexity_two_sites/'
7571    * 3) use resources ? -- 0 or 1 (default: 0)
7572    * 4) batch frequency (default: 1 - all genotypes in batch)
7573    *    -- how many genotypes to skip in batch
7574    * 5) convergence accuracy (default: 1.e-10)
7575    *
7576    */
7577 
7578   // number of arguments provided
7579   int words = cur_string.CountNumWords();
7580   if (m_world->GetVerbosity() >= VERBOSE_ON) {
7581     cout << "  Number of arguments passed: " << words << endl;
7582   }
7583 
7584   //
7585   // argument 1 -- mutation rate
7586   //
7587   double mut_rate = 0.0075;
7588   if(words < 1) {
7589     // no mutation rate provided
7590     if (m_world->GetVerbosity() >= VERBOSE_ON) {
7591       cout << "  - No mutation rate passed, using default mu = " << mut_rate << endl;
7592     }
7593   } else {
7594     // mutation rate provided
7595     mut_rate = cur_string.PopWord().AsDouble();
7596     if (mut_rate < 0.0) {
7597       // can't have mutation rate below zero
7598       mut_rate = 0.0;
7599     }
7600     if (m_world->GetVerbosity() >= VERBOSE_ON) {
7601       cout << "  - Mutation rate passed, using mu = " << mut_rate << endl;
7602     }
7603   }
7604 
7605   //
7606   // argument 2 -- directory
7607   //
7608   cString dir = cur_string.PopWord();
7609   cString defaultDirectory = "complexity_two_sites/";
7610   cString directory = PopDirectory(dir, defaultDirectory);
7611   if (m_world->GetVerbosity() >= VERBOSE_ON) {
7612     cout << "  - Analysis results to directory: " << directory << endl;
7613   }
7614 
7615   //
7616   // argument 3 -- use resources?
7617   //
7618   // Default for usage of resources is false
7619   int useResources = 0;
7620   if(words >= 3) {
7621     useResources = cur_string.PopWord().AsInt();
7622     // All non-zero values are considered false (Handled by testcpu->InitResources)
7623   }
7624   if (m_world->GetVerbosity() >= VERBOSE_ON) {
7625     cout << "  - Use resorces set to: " << useResources << " (0=false, true other int)" << endl;
7626   }
7627 
7628   //
7629   // argument 4 -- batch frequncy
7630   //   - default batchFrequency=1 (every organism analyzed)
7631   //
7632   int batchFrequency = 1;
7633   if(words >= 4) {
7634     batchFrequency = cur_string.PopWord().AsInt();
7635     if(batchFrequency <= 0) {
7636       batchFrequency = 1;
7637     }
7638   }
7639   if (m_world->GetVerbosity() >= VERBOSE_ON) {
7640     cout << "  - Batch frequency set to: " << batchFrequency << endl;
7641   }
7642 
7643   //
7644   // argument 5 -- convergence accuracy for mutation-selection balance
7645   //
7646   double converg_accuracy = 1.e-10;
7647   if(words >= 5) {
7648     converg_accuracy = cur_string.PopWord().AsDouble();
7649   }
7650   if (m_world->GetVerbosity() >= VERBOSE_ON) {
7651     cout << "  - Convergence accuracy: " << converg_accuracy << endl;
7652   }
7653 
7654   // test cpu
7655   cTestCPU* testcpu = m_world->GetHardwareManager().CreateTestCPU(m_ctx);
7656 
7657   // get current batch
7658   tListIterator<cAnalyzeGenotype> batch_it(batch[cur_batch].List());
7659   cAnalyzeGenotype * genotype = NULL;
7660 
7661   // create file for batch summary
7662   cString summary_filename;
7663   summary_filename.Set("%scomplexity_batch_summary.dat", static_cast<const char*>(directory));
7664   cDataFile & summary_fp = m_world->GetDataFile(summary_filename);
7665   summary_fp.WriteComment( "One, Two Site Entropy/Complexity Analysis" );
7666   summary_fp.WriteTimeStamp();
7667 
7668   // analyze each genotype in the batch
7669   while ((genotype = batch_it.Next()) != NULL) {
7670     if (m_world->GetVerbosity() >= VERBOSE_ON) {
7671       cout << "  Analyzing complexity for " << genotype->GetName() << endl;
7672     }
7673     // entropy and complexity for whole genome
7674     // in both mers and bits
7675     // >> single site approximation
7676     double genome_ss_entropy_mers = 0.0;
7677     double genome_ss_entropy_bits = 0.0;
7678     double genome_ss_complexity_mers = 0.0;
7679     double genome_ss_complexity_bits = 0.0;
7680     // >> two site approximation
7681     double genome_ds_mut_info_mers = 0.0;
7682     double genome_ds_mut_info_bits = 0.0;
7683     double genome_ds_complexity_mers = 0.0;
7684     double genome_ds_complexity_bits = 0.0;
7685 
7686     // Construct filename
7687     cString filename_2s;
7688     filename_2s.Set("%s%s.twosite.complexity.dat", static_cast<const char*>(directory), static_cast<const char*>(genotype->GetName()));
7689     cDataFile & fp_2s = m_world->GetDataFile(filename_2s);
7690     fp_2s.WriteComment( "One, Two Site Entropy/Complexity Analysis" );
7691     fp_2s.WriteComment( "NOTE: mutual information = (col 6 + col 8) - (col 9)" );
7692     fp_2s.WriteComment( "NOTE: possible negative mutual information-- is this real? " );
7693     fp_2s.WriteTimeStamp();
7694 
7695     int updateBorn = -1;
7696     updateBorn = genotype->GetUpdateBorn();
7697     cCPUTestInfo test_info;
7698     test_info.SetResourceOptions(useResources, m_resources, updateBorn, m_resource_time_spent_offset);
7699 
7700     // Calculate the stats for the genotype we're working with ...
7701     genotype->Recalculate(m_ctx, &test_info);
7702     const int max_line = genotype->GetLength();
7703     const Genome& base_genome = genotype->GetGenome();
7704     const Sequence& base_seq = base_genome.GetSequence();
7705     Genome mod_genome(base_genome);
7706     Sequence& seq = mod_genome.GetSequence();
7707     const int num_insts = m_world->GetHardwareManager().GetInstSet(base_genome.GetInstSet()).GetSize();
7708 
7709     /*
7710      *
7711      *  ONE SITE CALCULATIONS
7712      *
7713      */
7714 
7715     // single site entropies for use with
7716     // two site calculations (below)
7717     tArray<double> entropy_ss_mers(max_line);
7718     tArray<double> entropy_ss_bits(max_line);
7719     // used in single site calculations
7720     tArray<double> test_fitness(num_insts);
7721     tArray<double> prob(num_insts);
7722     tArray<double> prob_next(num_insts);
7723 
7724     // run through lines in genome
7725     for (int line_num = 0; line_num < max_line; line_num++) {
7726       // get the current instruction at this line/site
7727       int cur_inst = base_seq[line_num].GetOp();
7728 
7729       // recalculate fitness of each mutant.
7730       for (int mod_inst = 0; mod_inst < num_insts; mod_inst++) {
7731         seq[line_num].SetOp(mod_inst);
7732         cAnalyzeGenotype test_genotype(m_world, mod_genome);
7733         test_genotype.Recalculate(m_ctx);
7734         test_fitness[mod_inst] = test_genotype.GetFitness();
7735       }
7736 
7737       // Adjust fitness
7738       // - set all fitness values greater than current instruction
7739       // equal to current instruction fitness
7740       // - make the rest of the fitness values relative to
7741       // the current instruction fitness
7742       double cur_inst_fitness = test_fitness[cur_inst];
7743       // test that current fitness greater than zero
7744       // if NOT, all fitnesses will be set to zero
7745       if (cur_inst_fitness > 0.0) {
7746         for (int mod_inst = 0; mod_inst < num_insts; mod_inst++) {
7747           if (test_fitness[mod_inst] > cur_inst_fitness)
7748             test_fitness[mod_inst] = cur_inst_fitness;
7749           test_fitness[mod_inst] /= cur_inst_fitness;
7750         }
7751       } else {
7752         cout << "Fitness of this genotype is ZERO--no information." << endl;
7753         continue;
7754       }
7755 
7756       // initialize prob for
7757       // mutation-selection balance
7758       double fitness_total = 0.0;
7759       for (int i = 0; i < num_insts; i ++ ) {
7760         fitness_total += test_fitness[i];
7761       }
7762       for (int i = 0; i < num_insts; i ++ ) {
7763         prob[i] = test_fitness[i]/fitness_total;
7764         prob_next[i] = 0.0;
7765       }
7766 
7767       double check_sum = 0.0;
7768       while(1) {
7769         check_sum = 0.0;
7770         double delta_prob = 0.0;
7771         //double delta_prob_ex = 0.0;
7772         for (int mod_inst = 0; mod_inst < num_insts; mod_inst ++) {
7773           // calculate the average fitness
7774           double w_avg = 0.0;
7775           for (int i = 0; i < num_insts; i++) {
7776             w_avg += prob[i]*test_fitness[i];
7777           }
7778           if (mut_rate != 0.0) {
7779             // run mutation-selection equation
7780             prob_next[mod_inst] = ((1.0-mut_rate)*test_fitness[mod_inst]*prob[mod_inst])/(w_avg);
7781             prob_next[mod_inst] += mut_rate/((double)num_insts);
7782           } else {
7783             // run selection equation
7784             prob_next[mod_inst] = (test_fitness[mod_inst]*prob[mod_inst])/(w_avg);
7785           }
7786           // increment change in probs
7787           delta_prob += (prob_next[mod_inst]-prob[mod_inst])*(prob_next[mod_inst]-prob[mod_inst]);
7788           //delta_prob_ex += (prob_next[mod_inst]-prob[mod_inst]);
7789         }
7790         // transfer t+1 to t for next iteration
7791         for (int i = 0; i < num_insts; i++) {
7792           prob[i]=prob_next[i];
7793           check_sum += prob[i];
7794         }
7795 
7796         // test for convergence
7797         if (delta_prob < converg_accuracy)
7798           break;
7799       }
7800 
7801       // Calculate complexity and entropy in bits and mers
7802       double entropy_mers = 0;
7803       double entropy_bits = 0;
7804       for (int i = 0; i < num_insts; i ++) {
7805         // watch for prob[i] == 0
7806         // --> 0.0 log(0.0) = 0.0
7807         if (prob[i] != 0.0) {
7808           entropy_mers += prob[i] * log((double) 1.0/prob[i]) / log ((double) num_insts);
7809           entropy_bits += prob[i] * log((double) 1.0/prob[i]) / log ((double) 2.0);
7810         }
7811       }
7812       double complexity_mers = 1 - entropy_mers;
7813       double complexity_bits = (log ((double) num_insts) / log ((double) 2.0)) - entropy_bits;
7814 
7815       // update entropy and complexity values
7816       // with this site's values
7817       genome_ss_entropy_mers += entropy_mers;
7818       genome_ss_entropy_bits += entropy_bits;
7819       genome_ss_complexity_mers += complexity_mers;
7820       genome_ss_complexity_bits += complexity_bits;
7821 
7822       // save entropy for this line/site number
7823       entropy_ss_mers[line_num] = entropy_mers;
7824       entropy_ss_bits[line_num] = entropy_bits;
7825 
7826       // Reset the mod_genome back to the original sequence.
7827       seq[line_num].SetOp(cur_inst);
7828     }
7829 
7830     /*
7831      *
7832      *  TWO SITE CALCULATIONS
7833      *
7834      */
7835 
7836     // Loop through all the lines of code,
7837     // testing all TWO SITE mutations...
7838     tMatrix<double> test_fitness_2s(num_insts,num_insts);
7839     tArray<double> prob_1s_i(num_insts);
7840     tArray<double> prob_1s_j(num_insts);
7841     tMatrix<double> prob_2s(num_insts,num_insts);
7842     tMatrix<double> prob_next_2s(num_insts,num_insts);
7843 
7844     // run through lines in genome
7845     // - only consider lin_num2 > lin_num1 so that we don't consider
7846     // Mut Info [1][45] and Mut Info [45][1]
7847     for (int line_num1 = 0; line_num1 < max_line; line_num1++) {
7848       for (int line_num2 = line_num1+1; line_num2 < max_line; line_num2++) {
7849         // debug
7850         //cout << "line #1, #2: " << line_num1 << ", " << line_num2 << endl;
7851 
7852         // get current instructions at site 1 and site 2
7853         int cur_inst1 = base_seq[line_num1].GetOp();
7854         int cur_inst2 = base_seq[line_num2].GetOp();
7855 
7856         // get current fitness
7857         double cur_inst_fitness_2s = genotype->GetFitness();
7858 
7859         // initialize running fitness total
7860         double fitness_total_2s = 0.0;
7861 
7862         // test that current fitness is greater than zero
7863         if (cur_inst_fitness_2s > 0.0) {
7864           // current fitness greater than zero
7865           // run through all possible instructions
7866           for (int mod_inst1 = 0; mod_inst1 < num_insts; mod_inst1++) {
7867             for (int mod_inst2 = 0; mod_inst2 < num_insts; mod_inst2++) {
7868               // modify mod_genome at two sites
7869               seq[line_num1].SetOp(mod_inst1);
7870               seq[line_num2].SetOp(mod_inst2);
7871               // analyze mod_genome
7872               cAnalyzeGenotype test_genotype(m_world, mod_genome);
7873               test_genotype.Recalculate(m_ctx);
7874               test_fitness_2s[mod_inst1][mod_inst2] = test_genotype.GetFitness();
7875 
7876               // if modified fitness is greater than current fitness
7877               //  - set equal to current fitness
7878               if (test_fitness_2s[mod_inst1][mod_inst2] > cur_inst_fitness_2s)
7879                 test_fitness_2s[mod_inst1][mod_inst2] = cur_inst_fitness_2s;
7880 
7881               // in all cases, scale fitness relative to current fitness
7882               test_fitness_2s[mod_inst1][mod_inst2] /= cur_inst_fitness_2s;
7883 
7884               // update fitness total
7885               fitness_total_2s += test_fitness_2s[mod_inst1][mod_inst2];
7886             }
7887           }
7888         } else {
7889           // current fitness is not greater than zero--skip
7890           cout << "Fitness of this genotype is ZERO--no information." << endl;
7891           continue;
7892         }
7893 
7894         // initialize probabilities
7895         for (int i = 0; i < num_insts; i++ ) {
7896           // single site probabilities
7897           // to be built from two site probabilities
7898           prob_1s_i[i] = 0.0;
7899           prob_1s_j[i] = 0.0;
7900           for (int j = 0; j < num_insts; j++ ) {
7901             // intitialize two site probability with
7902             // relative fitness
7903             prob_2s[i][j] = test_fitness_2s[i][j]/fitness_total_2s;
7904             prob_next_2s[i][j] = 0.0;
7905           }
7906         }
7907 
7908         double check_sum_2s = 0.0;
7909         while(1) {
7910           check_sum_2s = 0.0;
7911           double delta_prob_2s = 0.0;
7912           //double delta_prob_ex = 0.0;
7913           for (int mod_inst1 = 0; mod_inst1 < num_insts; mod_inst1 ++) {
7914             for (int mod_inst2 = 0; mod_inst2 < num_insts; mod_inst2 ++) {
7915               // calculate the average fitness
7916               double w_avg_2s = 0.0;
7917               for (int i = 0; i < num_insts; i++) {
7918                 for (int j = 0; j < num_insts; j++) {
7919                   w_avg_2s += prob_2s[i][j]*test_fitness_2s[i][j];
7920                 }
7921               }
7922               if (mut_rate != 0.0) {
7923                 // run mutation-selection equation
7924                 // -term 1
7925                 prob_next_2s[mod_inst1][mod_inst2] = ((1.0-mut_rate)*(1.0-mut_rate)*test_fitness_2s[mod_inst1][mod_inst2]*prob_2s[mod_inst1][mod_inst2])/(w_avg_2s);
7926                 // -term 2
7927                 double sum_term2 = 0.0;
7928                 for (int i = 0; i < num_insts; i++) {
7929                   sum_term2 += (test_fitness_2s[i][mod_inst2]*prob_2s[i][mod_inst2])/(w_avg_2s);
7930                 }
7931                 prob_next_2s[mod_inst1][mod_inst2] += (((mut_rate*(1.0-mut_rate))/((double)num_insts)))*sum_term2;
7932                 // -term 3
7933                 double sum_term3 = 0.0;
7934                 for (int j = 0; j < num_insts; j++) {
7935                   sum_term3 += (test_fitness_2s[mod_inst1][j]*prob_2s[mod_inst1][j])/(w_avg_2s);
7936                 }
7937                 prob_next_2s[mod_inst1][mod_inst2] += (((mut_rate*(1.0-mut_rate))/((double)num_insts)))*sum_term3;
7938                 // -term 4
7939                 prob_next_2s[mod_inst1][mod_inst2] += (mut_rate/((double)num_insts))*(mut_rate/((double)num_insts));
7940               } else {
7941                 // run selection equation
7942                 prob_next_2s[mod_inst1][mod_inst2] = (test_fitness_2s[mod_inst1][mod_inst2]*prob_2s[mod_inst1][mod_inst2])/(w_avg_2s);
7943 
7944               }
7945               // increment change in probs
7946               delta_prob_2s += (prob_next_2s[mod_inst1][mod_inst2]-prob_2s[mod_inst1][mod_inst2])*(prob_next_2s[mod_inst1][mod_inst2]-prob_2s[mod_inst1][mod_inst2]);
7947               //delta_prob_ex += (prob_next[mod_inst]-prob[mod_inst]);
7948             }
7949           }
7950           // transfer probabilities at time t+1
7951           // to t for next iteration
7952           for (int i = 0; i < num_insts; i++) {
7953             for (int j = 0; j < num_insts; j++) {
7954               prob_2s[i][j]=prob_next_2s[i][j];
7955               check_sum_2s += prob_2s[i][j];
7956             }
7957           }
7958 
7959           // test for convergence
7960           if (delta_prob_2s < converg_accuracy)
7961             break;
7962         }
7963 
7964         // get single site probabilites from
7965         // two site probabilities
7966         // site i (first site)
7967         double check_prob_sum_site_1 = 0.0;
7968         double check_prob_sum_site_2 = 0.0;
7969         for (int i = 0; i < num_insts; i++) {
7970           for (int j = 0; j < num_insts; j++) {
7971             prob_1s_i[i] += prob_2s[i][j];
7972           }
7973           check_prob_sum_site_1 += prob_1s_i[i];
7974         }
7975         // site j (second site)
7976         for (int j = 0; j < num_insts; j++) {
7977           for (int i = 0; i < num_insts; i++) {
7978             prob_1s_j[j] += prob_2s[i][j];
7979           }
7980           check_prob_sum_site_2 += prob_1s_j[j];
7981         }
7982 
7983         // Calculate one site and two versions of
7984         // complexity and entropy in bits and mers
7985         //-mers
7986         double entropy_ss_site1_mers = 0.0;
7987         double entropy_ss_site2_mers = 0.0;
7988         double entropy_ds_mers = 0.0;
7989         //-bits
7990         double entropy_ss_site1_bits = 0.0;
7991         double entropy_ss_site2_bits = 0.0;
7992         double entropy_ds_bits = 0.0;
7993 
7994         // single site entropies
7995         for (int i = 0; i < num_insts; i ++) {
7996           // watch for zero probabilities
7997           if (prob_1s_i[i] != 0.0) {
7998             // mers
7999             entropy_ss_site1_mers += prob_1s_i[i] * log((double) 1.0/prob_1s_i[i]) / log ((double) num_insts);
8000             // bits
8001             entropy_ss_site1_bits += prob_1s_i[i] * log((double) 1.0/prob_1s_i[i]) / log ((double) 2.0);
8002           }
8003           if (prob_1s_j[i] != 0.0) {
8004             // mers
8005             entropy_ss_site2_mers += prob_1s_j[i] * log((double) 1.0/prob_1s_j[i]) / log ((double) num_insts);
8006             // bits
8007             entropy_ss_site2_bits += prob_1s_j[i] * log((double) 1.0/prob_1s_j[i]) / log ((double) 2.0);
8008           }
8009         }
8010 
8011         // two site joint entropies
8012         for (int i = 0; i < num_insts; i ++) {
8013           for (int j = 0; j < num_insts; j ++) {
8014             // watch for zero probabilities
8015             if (prob_2s[i][j] != 0.0) {
8016               // two site entropy in mers
8017               entropy_ds_mers += prob_2s[i][j] * log((double) 1.0/prob_2s[i][j]) / log ((double) num_insts);
8018               // two site entropy in bitss
8019               entropy_ds_bits += prob_2s[i][j] * log((double) 1.0/prob_2s[i][j]) / log ((double) 2.0);
8020             }
8021           }
8022         }
8023 
8024         // calculate the mutual information
8025         // - add single site entropies
8026         // - subtract two site joint entropy
8027         // units: mers
8028         double mutual_information_mers = entropy_ss_site1_mers + entropy_ss_site2_mers;
8029         mutual_information_mers -= entropy_ds_mers;
8030 
8031         // units: bits
8032         double mutual_information_bits = entropy_ss_site1_bits + entropy_ss_site2_bits;
8033         mutual_information_bits -= entropy_ds_bits;
8034 
8035         // two site, only update mutatual informtion total
8036         genome_ds_mut_info_mers += mutual_information_mers;
8037         genome_ds_mut_info_bits += mutual_information_bits;
8038 
8039         // write output to file
8040         fp_2s.Write(line_num1,                    "Site 1 in genome");
8041         fp_2s.Write(line_num2,                    "Site 2 in genome");
8042         fp_2s.Write(cur_inst1,                    "Current Instruction, Site 1");
8043         fp_2s.Write(cur_inst2,                    "Current Instruction, Site 2");
8044         fp_2s.Write(entropy_ss_mers[line_num1],   "Entropy (MERS), Site 1 -- single site mut-sel balance");
8045         fp_2s.Write(entropy_ss_site1_mers,        "Entropy (MERS), Site 1 -- TWO site mut-sel balance");
8046         fp_2s.Write(entropy_ss_mers[line_num2],   "Entropy (MERS), Site 2 -- single site mut-sel balance");
8047         fp_2s.Write(entropy_ss_site2_mers,        "Entropy (MERS), Site 2 -- TWO site mut-sel balance");
8048         fp_2s.Write(entropy_ds_mers,              "Joint Entropy (MERS), Site 1 & 2 -- TWO site mut-sel balance");
8049         fp_2s.Write(mutual_information_mers,      "Mutual Information (MERS), Site 1 & 2 -- TWO site mut-sel balance");
8050         fp_2s.Endl();
8051 
8052         // Reset the mod_genome back to the original sequence.
8053         seq[line_num1].SetOp(cur_inst1);
8054         seq[line_num2].SetOp(cur_inst2);
8055 
8056       }// end line 2
8057     }// end line 1
8058 
8059     // cleanup file for this genome
8060     m_world->GetDataFileManager().Remove(filename_2s);
8061 
8062     // calculate the two site complexity
8063     // (2 site complexity) = (1 site complexity) + (total 2 site mutual info)
8064     genome_ds_complexity_mers = genome_ss_complexity_mers + genome_ds_mut_info_mers;
8065     genome_ds_complexity_bits = genome_ss_complexity_bits + genome_ds_mut_info_bits;
8066 
8067     summary_fp.Write(genotype->GetID(),           "Genotype ID");
8068     summary_fp.Write(genotype->GetFitness(),      "Genotype Fitness");
8069     summary_fp.Write(genome_ss_entropy_mers,      "Entropy (single-site) MERS");
8070     summary_fp.Write(genome_ss_complexity_mers,   "Complexity (single-site) MERS");
8071     summary_fp.Write(genome_ds_mut_info_mers,     "Mutual Information MERS");
8072     summary_fp.Write(genome_ds_complexity_mers,   "Complexity (two-site) MERS");
8073     summary_fp.Write(genome_ss_entropy_bits,      "Entropy (single-site) BITS");
8074     summary_fp.Write(genome_ss_complexity_bits,   "Complexity (single-site) BITS");
8075     summary_fp.Write(genome_ds_mut_info_bits,     "Mutual Information BITS");
8076     summary_fp.Write(genome_ds_complexity_bits,   "Complexity (two-site) BITS");
8077     summary_fp.Endl();
8078 
8079     // Always grabs the first one
8080     // Skip i-1 times, so that the beginning of the loop will grab the ith one
8081     // where i is the batchFrequency
8082     for(int count=0; genotype != NULL && count < batchFrequency - 1; count++) {
8083       genotype = batch_it.Next();
8084       if(genotype != NULL && m_world->GetVerbosity() >= VERBOSE_ON) {
8085         cout << "Skipping: " << genotype->GetName() << endl;
8086       }
8087     }
8088     if(genotype == NULL) { break; }
8089   }
8090 
8091   m_world->GetDataFileManager().Remove(summary_filename);
8092 
8093   delete testcpu;
8094 }
8095 
AnalyzePopComplexity(cString cur_string)8096 void cAnalyze::AnalyzePopComplexity(cString cur_string)
8097 {
8098   cout << "Analyzing population complexity ..." << endl;
8099 
8100   // Load in the variables...
8101   cString directory = PopDirectory(cur_string, "pop_complexity/");
8102   cString file = cur_string;
8103 
8104   // Construct filename...
8105   cString filename;
8106   filename.Set("%spop%s.complexity.dat", static_cast<const char*>(directory), static_cast<const char*>(file));
8107   ofstream& fp = m_world->GetDataFileOFStream(filename);
8108 
8109   //////////////////////////////////////////////////////////
8110   // Loop through all of the genotypes in this batch ...
8111 
8112   tListIterator<cAnalyzeGenotype> batch_it(batch[cur_batch].List());
8113   cAnalyzeGenotype * genotype = NULL;
8114 
8115 
8116   genotype = batch_it.Next();
8117 
8118 
8119   if (genotype == NULL) return;
8120   int seq_length = genotype->GetLength();
8121   const int num_insts = m_world->GetHardwareManager().GetInstSet(genotype->GetGenome().GetInstSet()).GetSize();
8122   tMatrix<int> inst_stat(seq_length, num_insts);
8123 
8124   // Initializing inst_stat ...
8125   for (int line_num = 0; line_num < seq_length; line_num ++)
8126     for (int inst_num = 0; inst_num < num_insts; inst_num ++)
8127       inst_stat(line_num, inst_num) = 0;
8128 
8129   int num_cpus = 0;
8130   int actural_samples = 0;
8131   while (genotype != NULL) {
8132     num_cpus = genotype->GetNumCPUs();
8133     const Genome& base_genome = genotype->GetGenome();
8134     for (int i = 0; i < num_cpus; i++) {   // Stat on every organism with same genotype.
8135       for (int line_num = 0; line_num < seq_length; line_num++) {
8136         int cur_inst = base_genome.GetSequence()[line_num].GetOp();
8137         inst_stat(line_num, cur_inst)++;
8138       }
8139       actural_samples++;
8140     }
8141     genotype = batch_it.Next();
8142   }
8143 
8144 // Calculate complexity
8145 for (int line_num = 0; line_num < seq_length; line_num ++) {
8146   double entropy = 0.0;
8147   for (int inst_num = 0; inst_num < num_insts; inst_num ++) {
8148     if (inst_stat(line_num, inst_num) == 0) continue;
8149     float prob = (float) (inst_stat(line_num, inst_num)) / (float) (actural_samples);
8150     entropy += prob * log((double) 1.0/prob) / log((double) num_insts);
8151   }
8152   double complexity = 1 - entropy;
8153   fp << complexity << " ";
8154 }
8155 fp << endl;
8156 
8157 m_world->GetDataFileManager().Remove(filename);
8158 return;
8159 }
8160 
8161 
8162 
8163 /* MRR
8164  * August 2007
8165  * This function will go through the lineage, align the genotypes, and
8166  * preform mutation reversion a specified number of descendents ahead
8167  * assuming they keep within a certain alignment distance (specified as well).
8168  * The output will give fitness information for the mutation-reverted genotypes
8169  * as described below.
8170 */
MutationRevert(cString cur_string)8171 void cAnalyze::MutationRevert(cString cur_string)
8172 {
8173 
8174   //This function takes in three parameters, all defaulted:
8175   cString filename("XXX.dat");   //The name of the output file
8176   int      max_dist      = -1;    //The maximum edit distance allowed in the search
8177   int	   max_depth     = 5;     //The maximum depth forward one wishes to search
8178 
8179   if (cur_string.GetSize() != 0) filename = cur_string.PopWord();
8180   if (cur_string.GetSize() != 0) max_dist = cur_string.PopWord().AsInt();
8181   if (cur_string.GetSize() != 0) max_depth = cur_string.PopWord().AsInt();
8182 
8183 	//Warning notifications
8184   if (!batch[cur_batch].IsLineage())
8185   {
8186 		cout << "Error: This command requires a lineage.  Skipping." << endl;
8187 		return;
8188   }
8189 
8190 
8191 	//Request a file
8192 	ofstream& FOT = m_world->GetDataFileOFStream(filename);
8193 	/*
8194    FOT output per line
8195    ID
8196    FITNESS
8197    BIRTH
8198    DISTANCE
8199    PID
8200    P_FITNESS
8201    P_BIRTH
8202 			@ea depth past
8203    CHILDX_ID
8204    CHILDX_BIRTH
8205    CHILDX_FITNESS
8206    CHILDX_DISTANCE
8207    CHILDX_FITNESS_SANS_MUT
8208    */
8209 
8210 
8211   //Align the batch... we're going to keep the fitnesses intact from the runs
8212 	CommandAlign("");
8213 
8214 	//Our edit distance is already stored in the historical dump.
8215 
8216 	//Test hardware
8217 	cCPUTestInfo test_info;
8218 	test_info.UseRandomInputs(true);
8219 
8220 	tListIterator<cAnalyzeGenotype> batch_it(batch[cur_batch].List());
8221   cAnalyzeGenotype* parent_genotype = batch_it.Next();
8222 	cAnalyzeGenotype* other_genotype  = NULL;
8223 	cAnalyzeGenotype* genotype        = NULL;
8224 
8225   while( (genotype = batch_it.Next()) != NULL && parent_genotype != NULL)
8226   {
8227 		if (true)
8228 		{
8229 			FOT << genotype->GetID()			<< " "
8230       << genotype->GetFitness()		<< " "
8231       << genotype->GetUpdateBorn() << " "
8232       << genotype->GetParentDist() << " "
8233       << parent_genotype->GetID()				<< " "
8234       << parent_genotype->GetFitness()		<< " "
8235       << parent_genotype->GetUpdateBorn()	<< " ";
8236 
8237 			int cum_dist = 0;
8238 			cString str_parent = parent_genotype->GetSequence();
8239 			cString str_other  = "";
8240 			cString str_align_parent = parent_genotype->GetAlignedSequence();
8241 			cString str_align_other  = genotype->GetAlignedSequence();
8242 			cString reversion  = ""; //Reversion mask
8243 
8244 			//Find what changes to revert
8245 			for (int k = 0; k < str_align_parent.GetSize(); k++)
8246 			{
8247 				char p = str_align_parent[k];
8248 				char c = str_align_other[k];
8249 				if (p == c)
8250 					reversion += " ";	//Nothing
8251 				else if (p == '_' && c != '_')
8252 					reversion += "+";	//Insertion
8253 				else if (p != '_' && c == '_')
8254 					reversion += "-";  //Deletion
8255 				else
8256 					reversion += p;			//Point Mutation
8257 			}
8258 
8259 			tListIterator<cAnalyzeGenotype> next_it(batch_it);
8260 			for (int i = 0; i < max_depth; i++)
8261 			{
8262 				if ( (other_genotype = next_it.Next()) != NULL &&
8263              (cum_dist <= max_dist || max_dist == -1) )
8264 				{
8265 					cum_dist += other_genotype->GetParentDist();
8266 					if (cum_dist > max_dist && max_dist != -1)
8267 						break;
8268 					str_other = other_genotype->GetSequence();
8269 					str_align_other = other_genotype->GetAlignedSequence();
8270 
8271 					//Revert "background" to parental form
8272 					cString reverted = "";
8273 					for (int k = 0; k < reversion.GetSize(); k++)
8274 					{
8275 						if (reversion[k] == '+')       continue;  //Insertion, so skip
8276 						else if (reversion[k] == '-')  reverted += str_align_parent[k]; //Add del
8277 						else if (reversion[k] != ' ')       reverted += reversion[k];        //Revert mut
8278 						else if (str_align_other[k] != '_') reverted += str_align_other[k];  //Keep current
8279 					}
8280 
8281           const cInstSet& is = m_world->GetHardwareManager().GetDefaultInstSet();
8282           Genome rev_genome(is.GetHardwareType(), is.GetInstSetName(), Sequence(reverted));
8283 					cAnalyzeGenotype new_genotype(m_world, rev_genome);  //Get likely fitness
8284 					new_genotype.Recalculate(m_ctx, &test_info, NULL, 50);
8285 
8286           FOT << other_genotype->GetID()			<< " "
8287             << other_genotype->GetFitness()		<< " "
8288             << other_genotype->GetUpdateBorn() << " "
8289             << cum_dist                        << " "
8290             << new_genotype.GetFitness()       << " ";
8291 				}
8292 				else
8293 				{
8294 					FOT << -1 << " "
8295           << -1 << " "
8296           << -1 << " "
8297           << -1 << " "
8298           << -1 << " ";
8299 				}
8300 			}
8301 			FOT << endl;
8302 		}
8303 		parent_genotype = genotype;
8304   }
8305 
8306   return;
8307 }
8308 
EnvironmentSetup(cString cur_string)8309 void cAnalyze::EnvironmentSetup(cString cur_string)
8310 {
8311   cUserFeedback feedback;
8312   cout << "Running environment command: " << endl << "  " << cur_string << endl;
8313   m_world->GetEnvironment().LoadLine(cur_string, feedback);
8314   for (int i = 0; i < feedback.GetNumMessages(); i++) {
8315     switch (feedback.GetMessageType(i)) {
8316       case cUserFeedback::UF_ERROR:    cerr << "error: "; break;
8317       case cUserFeedback::UF_WARNING:  cerr << "warning: "; break;
8318       default: break;
8319     };
8320     cerr << feedback.GetMessage(i) << endl;
8321   }
8322 }
8323 
8324 
CommandHelpfile(cString cur_string)8325 void cAnalyze::CommandHelpfile(cString cur_string)
8326 {
8327   cout << "Printing helpfiles in: " << cur_string << endl;
8328 
8329   cHelpManager help_control;
8330   if (m_world->GetVerbosity() >= VERBOSE_ON) help_control.SetVerbose();
8331   while (cur_string.GetSize() > 0) {
8332     help_control.LoadFile(cur_string.PopWord());
8333   }
8334 
8335   help_control.PrintHTML();
8336 }
8337 
8338 
8339 
8340 
8341 //////////////// Control...
8342 
VarSet(cString cur_string)8343 void cAnalyze::VarSet(cString cur_string)
8344 {
8345   cString var = cur_string.PopWord();
8346 
8347   if (cur_string.GetSize() == 0) {
8348     cerr << "Error: No variable provided in SET command" << endl;
8349     return;
8350   }
8351 
8352   cString& cur_variable = GetVariable(var);
8353   cur_variable = cur_string.PopWord();
8354 
8355   if (m_world->GetVerbosity() >= VERBOSE_ON) {
8356     cout << "Setting " << var << " to " << cur_variable << endl;
8357   }
8358 }
8359 
ConfigGet(cString cur_string)8360 void cAnalyze::ConfigGet(cString cur_string)
8361 {
8362   cString cvar = cur_string.PopWord();
8363   cString var = cur_string.PopWord();
8364 
8365   if (cvar.GetSize() == 0 || var.GetSize() == 0) {
8366     cerr << "Error: Missing variable in CONFIG_GET command" << endl;
8367     return;
8368   }
8369 
8370   cString& cur_variable = GetVariable(var);
8371 
8372   // Get Config Variable
8373   if (!m_world->GetConfig().Get(cvar, cur_variable)) {
8374     cerr << "Error: Configuration Variable '" << var << "' was not found." << endl;
8375     return;
8376   }
8377 
8378   if (m_world->GetVerbosity() >= VERBOSE_ON)
8379     cout << "Setting variable " << var << " to " << cur_variable << endl;
8380 }
8381 
ConfigSet(cString cur_string)8382 void cAnalyze::ConfigSet(cString cur_string)
8383 {
8384   cString cvar = cur_string.PopWord();
8385 
8386   if (cvar.GetSize() == 0) {
8387     cerr << "Error: No variable provided in CONFIG_SET command" << endl;
8388     return;
8389   }
8390 
8391   // Get Config Variable
8392   cString val = cur_string.PopWord();
8393   if (!m_world->GetConfig().Set(cvar, val)) {
8394     cerr << "Error: Configuration Variable '" << cvar << "' was not found." << endl;
8395     return;
8396   }
8397 
8398   if (m_world->GetVerbosity() >= VERBOSE_ON)
8399     cout << "Setting configuration variable " << cvar << " to " << val << endl;
8400 }
8401 
8402 
BatchSet(cString cur_string)8403 void cAnalyze::BatchSet(cString cur_string)
8404 {
8405   int next_batch = 0;
8406   if (cur_string.CountNumWords() > 0) {
8407     next_batch = cur_string.PopWord().AsInt();
8408   }
8409   if (m_world->GetVerbosity() >= VERBOSE_ON) cout << "Setting current batch to " << next_batch << endl;
8410   if (next_batch >= GetNumBatches()) {
8411     if (next_batch >= MAX_BATCHES) {
8412       cerr << "  Error: max batches is " << MAX_BATCHES << endl;
8413       if (exit_on_error) exit(1);
8414     } else {
8415       int old_num_batches = GetNumBatches();
8416       int num_batchsets_needed = ((next_batch - old_num_batches) / NUM_BATCHES_INCREMENT) + 1;
8417       int new_num_batches = GetNumBatches() + (num_batchsets_needed * NUM_BATCHES_INCREMENT);
8418       if (new_num_batches > MAX_BATCHES) new_num_batches = MAX_BATCHES;
8419 
8420       cout << "Increasing max batches to " << new_num_batches << endl;
8421 
8422       batch.Resize(new_num_batches);
8423       for (int i = old_num_batches; i < new_num_batches; i++) {
8424         batch[i].Name().Set("Batch%d", i);
8425       }
8426       cur_batch = next_batch;
8427     }
8428   } else {
8429     cur_batch = next_batch;
8430   }
8431 }
8432 
BatchName(cString cur_string)8433 void cAnalyze::BatchName(cString cur_string)
8434 {
8435   if (cur_string.CountNumWords() == 0) {
8436     if (m_world->GetVerbosity() >= VERBOSE_ON) cout << "  Warning: No name given in NAME_BATCH!" << endl;
8437     return;
8438   }
8439 
8440   batch[cur_batch].Name() = cur_string.PopWord();
8441 }
8442 
BatchTag(cString cur_string)8443 void cAnalyze::BatchTag(cString cur_string)
8444 {
8445   if (cur_string.CountNumWords() == 0) {
8446     if (m_world->GetVerbosity() >= VERBOSE_ON) cout << "  Warning: No tag given in TAG_BATCH!" << endl;
8447     return;
8448   }
8449 
8450   if (m_world->GetVerbosity() >= VERBOSE_ON) {
8451     cout << "Tagging batch " << cur_batch
8452     << " with tag '" << cur_string << "'" << endl;
8453   }
8454 
8455   tListIterator<cAnalyzeGenotype> batch_it(batch[cur_batch].List());
8456   cAnalyzeGenotype * genotype = NULL;
8457   while ((genotype = batch_it.Next()) != NULL) {
8458     genotype->SetTag(cur_string);
8459   }
8460 
8461 }
8462 
BatchPurge(cString cur_string)8463 void cAnalyze::BatchPurge(cString cur_string)
8464 {
8465   int batch_id = cur_batch;
8466   if (cur_string.CountNumWords() > 0) batch_id = cur_string.PopWord().AsInt();
8467 
8468   if (m_world->GetVerbosity() >= VERBOSE_ON) cout << "Purging batch " << batch_id << endl;
8469 
8470   while (batch[batch_id].List().GetSize() > 0) {
8471     delete batch[batch_id].List().Pop();
8472   }
8473 
8474   batch[batch_id].SetLineage(false);
8475   batch[batch_id].SetAligned(false);
8476 }
8477 
BatchDuplicate(cString cur_string)8478 void cAnalyze::BatchDuplicate(cString cur_string)
8479 {
8480   if (cur_string.GetSize() == 0) {
8481     cerr << "Duplicate Error: Must include from ID!" << endl;
8482     if (exit_on_error) exit(1);
8483   }
8484   int batch_from = cur_string.PopWord().AsInt();
8485 
8486   int batch_to = cur_batch;
8487   if (cur_string.GetSize() > 0) batch_to = cur_string.PopWord().AsInt();
8488 
8489   if (m_world->GetVerbosity() >= VERBOSE_ON) {
8490     cout << "Duplicating from batch " << batch_from << " to batch " << batch_to << "." << endl;
8491   }
8492 
8493   tListIterator<cAnalyzeGenotype> batch_from_it(batch[batch_from].List());
8494   cAnalyzeGenotype * genotype = NULL;
8495   while ((genotype = batch_from_it.Next()) != NULL) {
8496     cAnalyzeGenotype * new_genotype = new cAnalyzeGenotype(*genotype);
8497     batch[batch_to].List().PushRear(new_genotype);
8498   }
8499 
8500   batch[batch_to].SetLineage(false);
8501   batch[batch_to].SetAligned(false);
8502 }
8503 
BatchRecalculate(cString cur_string)8504 void cAnalyze::BatchRecalculate(cString cur_string)
8505 {
8506   tArray<int> manual_inputs;  // Used only if manual inputs are specified
8507   cString msg;                // Holds any information we may want to send the driver to display
8508 
8509   int use_resources      = (cur_string.GetSize()) ? cur_string.PopWord().AsInt() : 0;
8510   int update             = (cur_string.GetSize()) ? cur_string.PopWord().AsInt() : -1;
8511   bool use_random_inputs = (cur_string.GetSize()) ? cur_string.PopWord().AsInt() == 1: false;
8512   bool use_manual_inputs = false;
8513 
8514   //Manual inputs will override random input request and must be the last arguments.
8515   if (cur_string.CountNumWords() > 0){
8516     if (cur_string.CountNumWords() == m_world->GetEnvironment().GetInputSize()){
8517       manual_inputs.Resize(m_world->GetEnvironment().GetInputSize());
8518       use_random_inputs = false;
8519       use_manual_inputs = true;
8520       for (int k = 0; cur_string.GetSize(); k++)
8521         manual_inputs[k] = cur_string.PopWord().AsInt();
8522     } else if (m_world->GetVerbosity() >= VERBOSE_ON){
8523       msg.Set("Invalid number of environment inputs requested for recalculation: %d specified, %d required.",
8524               cur_string.CountNumWords(), m_world->GetEnvironment().GetInputSize());
8525       m_world->GetDriver().NotifyWarning(msg);
8526     }
8527   }
8528 
8529   cCPUTestInfo test_info;
8530   if (use_manual_inputs)
8531     test_info.UseManualInputs(manual_inputs);
8532   else
8533     test_info.UseRandomInputs(use_random_inputs);
8534   test_info.SetResourceOptions(use_resources, m_resources, update, m_resource_time_spent_offset);
8535 
8536   if (m_world->GetVerbosity() >= VERBOSE_ON) {
8537     msg.Set("Running batch %d through test CPUs...", cur_batch);
8538     m_world->GetDriver().NotifyComment(msg);
8539   } else{
8540     msg.Set("Running through test CPUs...");
8541     m_world->GetDriver().NotifyComment(msg);
8542   }
8543 
8544   if (m_world->GetVerbosity() >= VERBOSE_ON && batch[cur_batch].IsLineage() == false) {
8545     msg.Set("Batch may not be a lineage; parent and ancestor distances may not be correct");
8546     m_world->GetDriver().NotifyWarning(msg);
8547   }
8548 
8549   tListIterator<cAnalyzeGenotype> batch_it(batch[cur_batch].List());
8550   cAnalyzeGenotype * genotype = NULL;
8551   cAnalyzeGenotype * last_genotype = NULL;
8552   while ((genotype = batch_it.Next()) != NULL) {
8553     // If the previous genotype was the parent of this one, pass in a pointer
8554     // to it for improved recalculate (such as distance to parent, etc.)
8555     if (last_genotype != NULL && genotype->GetParentID() == last_genotype->GetID()) {
8556       genotype->Recalculate(m_ctx, &test_info, last_genotype);
8557     } else {
8558       genotype->Recalculate(m_ctx, &test_info);
8559     }
8560     last_genotype = genotype;
8561   }
8562 
8563   return;
8564 }
8565 
8566 
BatchRecalculateWithArgs(cString cur_string)8567 void cAnalyze::BatchRecalculateWithArgs(cString cur_string)
8568 {
8569   // RECALC <use_resources> <random_inputs> <manual_inputs in.1 in.2 in.3> <update N> <num_trials X>
8570 
8571   tArray<int> manual_inputs;  // Used only if manual inputs are specified
8572   cString msg;                // Holds any information we may want to send the driver to display
8573 
8574   // Defaults
8575   bool use_resources     = false;
8576   int  update            = -1;
8577   bool use_random_inputs = false;
8578   bool use_manual_inputs = false;
8579   int  num_trials        = 1;
8580 
8581   // Handle our recalculate arguments
8582   // Really, we should have a generalized tokenizer handle this
8583   cStringList args(cur_string);
8584   int pos = -1;
8585   if (args.PopString("use_resources") != "")      use_resources     = true;
8586   if (args.PopString("use_random_inputs") != "")  use_random_inputs = true;
8587   if ( (pos = args.LocateString("use_manual_inputs") ) != -1){
8588     use_manual_inputs = true;
8589     args.PopString("use_manual_inputs");
8590     int num = m_world->GetEnvironment().GetInputSize();
8591     manual_inputs.Resize(num);
8592     if (args.GetSize() >= pos + num - 2)
8593       for (int k = 0; k < num; k++)
8594         manual_inputs[k] = args.PopLine(pos).AsInt();
8595     else
8596       m_world->GetDriver().RaiseFatalException(1, "RecalculateWithArgs: Invalid use of use_manual_inputs");
8597   }
8598   if ( (pos = args.LocateString("update")) != -1 ){
8599     args.PopString("update");
8600     if (args.GetSize() >= pos - 1){
8601       update = args.PopLine(pos).AsInt();
8602     } else
8603        m_world->GetDriver().RaiseFatalException(1, "RecalculateWithArgs: Invalid use of update (did you specify a value?)");
8604   }
8605   if ( (pos = args.LocateString("num_trials")) != -1){
8606     args.PopString("num_trials");
8607     if (args.GetSize() >= pos - 1)
8608       num_trials = args.PopLine(pos).AsInt();
8609     else
8610       m_world->GetDriver().RaiseFatalException(1, "RecalculateWithArgs: Invalid use of num_trials (did you specify a value?)");
8611   }
8612 
8613   if (use_manual_inputs)
8614     use_random_inputs = false;
8615 
8616   cCPUTestInfo test_info;
8617   if (use_manual_inputs)
8618     test_info.UseManualInputs(manual_inputs);
8619   else
8620     test_info.UseRandomInputs(use_random_inputs);
8621   test_info.SetResourceOptions(use_resources, m_resources, update, m_resource_time_spent_offset);
8622 
8623   // Notifications
8624   if (m_world->GetVerbosity() >= VERBOSE_ON) {
8625     msg.Set("Running batch %d through test CPUs...", cur_batch);
8626     m_world->GetDriver().NotifyComment(msg);
8627   } else{
8628     msg.Set("Running through test CPUs...");
8629     m_world->GetDriver().NotifyComment(msg);
8630   }
8631   if (m_world->GetVerbosity() >= VERBOSE_ON && batch[cur_batch].IsLineage() == false) {
8632     msg.Set("Batch may not be a lineage; parent and ancestor distances may not be correct");
8633     m_world->GetDriver().NotifyWarning(msg);
8634   }
8635 
8636   tListIterator<cAnalyzeGenotype> batch_it(batch[cur_batch].List());
8637   cAnalyzeGenotype * genotype = NULL;
8638   cAnalyzeGenotype * last_genotype = NULL;
8639   while ((genotype = batch_it.Next()) != NULL) {
8640     // If the previous genotype was the parent of this one, pass in a pointer
8641     // to it for improved recalculate (such as distance to parent, etc.)
8642     if (last_genotype != NULL && genotype->GetParentID() == last_genotype->GetID()) {
8643       genotype->Recalculate(m_ctx, &test_info, last_genotype, num_trials);
8644     } else {
8645       genotype->Recalculate(m_ctx, &test_info, NULL, num_trials);
8646     }
8647     last_genotype = genotype;
8648   }
8649 
8650   return;
8651 }
8652 
8653 
BatchRename(cString cur_string)8654 void cAnalyze::BatchRename(cString cur_string)
8655 {
8656   if (m_world->GetVerbosity() <= VERBOSE_NORMAL) cout << "Renaming organisms..." << endl;
8657   else cout << "Renaming organisms in batch " << cur_batch << endl;
8658 
8659   // If a number is given with rename, start at that number...
8660 
8661   int id_num = cur_string.PopWord().AsInt();
8662   tListIterator<cAnalyzeGenotype> batch_it(batch[cur_batch].List());
8663   cAnalyzeGenotype * genotype = NULL;
8664   while ((genotype = batch_it.Next()) != NULL) {
8665     cString name = cStringUtil::Stringf("org-%d", id_num);
8666     genotype->SetID(id_num);
8667     genotype->SetName(name);
8668     id_num++;
8669   }
8670 }
8671 
CloseFile(cString cur_string)8672 void cAnalyze::CloseFile(cString cur_string)
8673 {
8674   m_world->GetDataFileManager().Remove(cur_string.PopWord());
8675 }
8676 
8677 
PrintStatus(cString cur_string)8678 void cAnalyze::PrintStatus(cString cur_string)
8679 {
8680   // No Args needed...
8681   (void) cur_string;
8682 
8683   cout << "Status Report:" << endl;
8684   for (int i = 0; i < GetNumBatches(); i++) {
8685     if (i == cur_batch || batch[i].List().GetSize() > 0) {
8686       cout << "  Batch " << i << " -- "
8687       << batch[i].List().GetSize() << " genotypes.";
8688       if (i == cur_batch) cout << "  <current>";
8689       if (batch[i].IsLineage() == true) cout << "  <lineage>";
8690       if (batch[i].IsAligned() == true) cout << "  <aligned>";
8691 
8692       cout << endl;
8693     }
8694   }
8695 }
8696 
PrintDebug(cString cur_string)8697 void cAnalyze::PrintDebug(cString cur_string)
8698 {
8699   cout << "::: " << cur_string << '\n';
8700 }
8701 
PrintTestInfo(cString cur_string)8702 void cAnalyze::PrintTestInfo(cString cur_string)
8703 {
8704   cFlexVar var1(1), var2(2.0), var3('3'), var4("four");
8705   cFlexVar var5(9), var6(9.0), var7('9'), var8("9");
8706 
8707   tArray<cFlexVar> vars(10);
8708   vars[0] = "Testing";
8709   vars[1] = 1;
8710   vars[2] = 2.0;
8711   vars[3] = '3';
8712   vars[4] = "four";
8713   vars[5] = 9;
8714   vars[6] = 9.0;
8715   vars[7] = '9';
8716   vars[8] = "9";
8717 
8718   cout << "AsString:  ";
8719   for (int i = 0; i < 10; i++) cout << i << ":" << vars[i].AsString() << " ";
8720   cout << endl;
8721 
8722   cout << "AsInt:  ";
8723   for (int i = 0; i < 10; i++) cout << i << ":" << vars[i].AsInt() << " ";
8724   cout << endl;
8725 
8726   for (int i = 0; i < 10; i++) {
8727     for (int j = i+1; j < 10; j++) {
8728       cout << "     vars[" << i << "] <= vars[" << j << "] ?  " << (vars[i] <= vars[j]);
8729       cout << "     vars[" << j << "] <= vars[" << i << "] ?  " << (vars[j] <= vars[i]);
8730       cout << endl;
8731     }
8732   }
8733 
8734 }
8735 
IncludeFile(cString cur_string)8736 void cAnalyze::IncludeFile(cString cur_string)
8737 {
8738   while (cur_string.GetSize() > 0) {
8739     cString filename = cur_string.PopWord();
8740 
8741     cInitFile include_file(filename, m_world->GetWorkingDir());
8742 
8743     tList<cAnalyzeCommand> include_list;
8744     LoadCommandList(include_file, include_list);
8745     ProcessCommands(include_list);
8746   }
8747 }
8748 
CommandSystem(cString cur_string)8749 void cAnalyze::CommandSystem(cString cur_string)
8750 {
8751   if (cur_string.GetSize() == 0) {
8752     cerr << "Error: Keyword \"system\" must be followed by command to run." << endl;
8753     if (exit_on_error) exit(1);
8754   }
8755 
8756   cout << "Running System Command: " << cur_string << endl;
8757 
8758   system(cur_string);
8759 }
8760 
CommandInteractive(cString cur_string)8761 void cAnalyze::CommandInteractive(cString cur_string)
8762 {
8763   // No Args needed...
8764   (void) cur_string;
8765 
8766   RunInteractive();
8767 }
8768 
8769 
8770 /*
8771  FIXME@kgn
8772  Must categorize COMPETE command.
8773  */
8774 /* Arguments to COMPETE: */
8775 /*
8776  batch_size : size of target batch
8777  from_id
8778  to_id=current
8779  initial_next_id=-1
8780  */
BatchCompete(cString cur_string)8781 void cAnalyze::BatchCompete(cString cur_string)
8782 {
8783   if (cur_string.GetSize() == 0) {
8784     cerr << "Compete Error: Must include target batch size!" << endl;
8785     if (exit_on_error) exit(1);
8786   }
8787   int batch_size = cur_string.PopWord().AsInt();
8788 
8789   if (cur_string.GetSize() == 0) {
8790     cerr << "Compete Error: Must include from ID!" << endl;
8791     if (exit_on_error) exit(1);
8792   }
8793   int batch_from = cur_string.PopWord().AsInt();
8794 
8795   int batch_to = cur_batch;
8796   if (cur_string.GetSize() > 0) batch_to = cur_string.PopWord().AsInt();
8797 
8798   int initial_next_id = -1;
8799   if (cur_string.GetSize() > 0) {
8800     initial_next_id = cur_string.PopWord().AsInt();
8801   }
8802   if (0 <= initial_next_id) {
8803     SetTempNextID(initial_next_id);
8804   }
8805 
8806   int initial_next_update = -1;
8807   if (cur_string.GetSize() > 0) {
8808     initial_next_update = cur_string.PopWord().AsInt();
8809   }
8810   if (0 <= initial_next_update) {
8811     SetTempNextUpdate(initial_next_update);
8812   }
8813 
8814   if (m_world->GetVerbosity() >= VERBOSE_ON) {
8815     cout << "Compete " << batch_size << " organisms from batch " << batch_from << " to batch " << batch_to << ";" << endl;
8816     cout << "assigning new IDs starting with " << GetTempNextID() << "." << endl;
8817   }
8818 
8819   /* Get iterator into "from" batch. */
8820   tListIterator<cAnalyzeGenotype> batch_it(batch[batch_from].List());
8821   /* Get size of "from" batch. */
8822   const int parent_batch_size = batch[batch_from].List().GetSize();
8823 
8824   /* Create scheduler. */
8825   cSchedule* schedule = new cProbSchedule(
8826                                           parent_batch_size,
8827                                           m_world->GetRandom().GetInt(0x7FFFFFFF)
8828                                           );
8829 
8830   /* Initialize scheduler with fitness values per-organism. */
8831   tArray<cAnalyzeGenotype*> genotype_array(parent_batch_size);
8832   tArray<Genome> offspring_genome_array(parent_batch_size);
8833   tArray<cMerit> fitness_array(parent_batch_size);
8834   cAnalyzeGenotype * genotype = NULL;
8835 
8836   cCPUTestInfo test_info;
8837 
8838   /*
8839    FIXME@kgn
8840    This should be settable by an optional argument.
8841    */
8842   test_info.UseRandomInputs(true);
8843 
8844   int array_pos = 0;
8845   while ((genotype = batch_it.Next()) != NULL) {
8846     genotype_array[array_pos] = genotype;
8847     genotype->Recalculate(m_world->GetDefaultContext(), &test_info, NULL);
8848     if(genotype->GetViable()){
8849       /*
8850        FIXME@kgn
8851        - HACK : multiplication by 1000 because merits less than 1 are truncated
8852        to zero.
8853        */
8854       fitness_array[array_pos] = genotype->GetFitness() * 1000.;
8855       /*
8856        FIXME@kgn
8857        - Need to note somewhere that we are using first descendent of the
8858        parent, if the parent is viable, so that genome of first descendent may
8859        differ from that of parent.
8860        */
8861       offspring_genome_array[array_pos] = test_info.GetTestOrganism(0)->OffspringGenome();
8862     } else {
8863       fitness_array[array_pos] = 0.0;
8864     }
8865     schedule->Adjust(array_pos, fitness_array[array_pos]);
8866     array_pos++;
8867   }
8868 
8869   /* Use scheduler to sample organisms in "from" batch. */
8870   for(int i=0; i<batch_size; /* don't increment i yet */){
8871     /* Sample an organism. */
8872     array_pos = schedule->GetNextID();
8873     if(array_pos < 0){
8874       cout << "Warning: No organisms in origin batch have positive fitness, cannot sample to destination batch." << endl;
8875       break;
8876     }
8877     genotype = genotype_array[array_pos];
8878 
8879     double copy_mut_prob = m_world->GetConfig().COPY_MUT_PROB.Get();
8880     double ins_mut_prob = m_world->GetConfig().DIVIDE_INS_PROB.Get();
8881     double del_mut_prob = m_world->GetConfig().DIVIDE_DEL_PROB.Get();
8882     int ins_line = -1;
8883     int del_line = -1;
8884 
8885     Genome child_genome = offspring_genome_array[array_pos];
8886     Sequence& child_seq = child_genome.GetSequence();
8887     const cInstSet& inst_set = m_world->GetHardwareManager().GetInstSet(child_genome.GetInstSet());
8888 
8889     if (copy_mut_prob > 0.0) {
8890       for (int n = 0; n < child_genome.GetSize(); n++) {
8891         if (m_world->GetRandom().P(copy_mut_prob)) {
8892           child_seq[n] = inst_set.GetRandomInst(m_ctx);
8893         }
8894       }
8895     }
8896 
8897     /* Perform an Insertion if it has one. */
8898     if (m_world->GetRandom().P(ins_mut_prob)) {
8899       ins_line = m_world->GetRandom().GetInt(child_genome.GetSize() + 1);
8900       child_seq.Insert(ins_line, inst_set.GetRandomInst(m_ctx));
8901     }
8902 
8903     /* Perform a Deletion if it has one. */
8904     if (m_world->GetRandom().P(del_mut_prob)) {
8905       del_line = m_world->GetRandom().GetInt(child_genome.GetSize());
8906       child_seq.Remove(del_line);
8907     }
8908 
8909     /* Create (possibly mutated) offspring. */
8910     cAnalyzeGenotype* new_genotype = new cAnalyzeGenotype(m_world, child_genome);
8911 
8912     int parent_id = genotype->GetID();
8913     int child_id = GetTempNextID();
8914     SetTempNextID(child_id + 1);
8915     cString child_name = cStringUtil::Stringf("org-%d", child_id);
8916 
8917     new_genotype->SetParentID(parent_id);
8918     new_genotype->SetID(child_id);
8919     new_genotype->SetName(child_name);
8920     new_genotype->SetUpdateBorn(GetTempNextUpdate());
8921 
8922     /* Place offspring in "to" batch. */
8923     batch[batch_to].List().PushRear(new_genotype);
8924     /* Increment and continue. */
8925     i++;
8926   }
8927 
8928   SetTempNextUpdate(GetTempNextUpdate() + 1);
8929 
8930   batch[batch_to].SetLineage(false);
8931   batch[batch_to].SetAligned(false);
8932 
8933   if(schedule){ delete schedule; schedule = 0; }
8934 
8935   return;
8936 }
8937 
8938 
FunctionCreate(cString cur_string,tList<cAnalyzeCommand> & clist)8939 void cAnalyze::FunctionCreate(cString cur_string, tList<cAnalyzeCommand>& clist)
8940 {
8941   int num_args = cur_string.CountNumWords();
8942   if (num_args < 1) {
8943     cerr << "Error: Must provide function name when creating function.";
8944     if (exit_on_error) exit(1);
8945   }
8946 
8947   cString fun_name = cur_string.PopWord();
8948 
8949   if (FindAnalyzeCommandDef(fun_name) != NULL) {
8950     cerr << "Error: Cannot create function '" << fun_name
8951     << "'; already exists." << endl;
8952     if (exit_on_error) exit(1);
8953   }
8954 
8955   if (m_world->GetVerbosity() >= VERBOSE_ON) cout << "Creating function: " << fun_name << endl;
8956 
8957   // Create the new function...
8958   cAnalyzeFunction * new_function = new cAnalyzeFunction(fun_name);
8959   while (clist.GetSize() > 0) {
8960     new_function->GetCommandList()->PushRear(clist.Pop());
8961   }
8962 
8963   // Save the function on the new list...
8964   function_list.PushRear(new_function);
8965 }
8966 
FunctionRun(const cString & fun_name,cString args)8967 bool cAnalyze::FunctionRun(const cString & fun_name, cString args)
8968 {
8969   if (m_world->GetVerbosity() >= VERBOSE_ON) {
8970     cout << "Running function: " << fun_name << endl;
8971     // << " with args: " << args << endl;
8972   }
8973 
8974   // Find the function we're about to run...
8975   cAnalyzeFunction * found_function = NULL;
8976   tListIterator<cAnalyzeFunction> function_it(function_list);
8977   while (function_it.Next() != NULL) {
8978     if (function_it.Get()->GetName() == fun_name) {
8979       found_function = function_it.Get();
8980       break;
8981     }
8982   }
8983 
8984   // If we were unable to find the command we're looking for, return false.
8985   if (found_function == NULL) return false;
8986 
8987   // Back up the local variables
8988   cString backup_arg_vars[10];
8989   cString backup_local_vars[26];
8990   for (int i = 0; i < 10; i++) backup_arg_vars[i] = arg_variables[i];
8991   for (int i = 0; i < 26; i++) backup_local_vars[i] = local_variables[i];
8992 
8993   // Set the arg variables to the passed-in args...
8994   arg_variables[0] = fun_name;
8995   for (int i = 1; i < 10; i++) arg_variables[i] = args.PopWord();
8996   for (int i = 0; i < 26; i++) local_variables[i] = "";
8997 
8998   ProcessCommands(*(found_function->GetCommandList()));
8999 
9000   // Restore the local variables
9001   for (int i = 0; i < 10; i++) arg_variables[i] = backup_arg_vars[i];
9002   for (int i = 0; i < 26; i++) local_variables[i] = backup_local_vars[i];
9003 
9004   return true;
9005 }
9006 
9007 
BatchUtil_GetMaxLength(int batch_id)9008 int cAnalyze::BatchUtil_GetMaxLength(int batch_id)
9009 {
9010   if (batch_id < 0) batch_id = cur_batch;
9011 
9012   int max_length = 0;
9013 
9014   tListIterator<cAnalyzeGenotype> batch_it(batch[batch_id].List());
9015   cAnalyzeGenotype * genotype = NULL;
9016   while ((genotype = batch_it.Next()) != NULL) {
9017     if (genotype->GetLength() > max_length) max_length = genotype->GetLength();
9018   }
9019 
9020   return max_length;
9021 }
9022 
9023 
CommandForeach(cString cur_string,tList<cAnalyzeCommand> & clist)9024 void cAnalyze::CommandForeach(cString cur_string,
9025                               tList<cAnalyzeCommand> & clist)
9026 {
9027   if (m_world->GetVerbosity() >= VERBOSE_ON) cout << "Initiating Foreach loop..." << endl;
9028 
9029   cString var = cur_string.PopWord();
9030   int num_args = cur_string.CountNumWords();
9031 
9032   cString & cur_variable = GetVariable(var);
9033 
9034   for (int i = 0; i < num_args; i++) {
9035     cur_variable = cur_string.PopWord();
9036 
9037     if (m_world->GetVerbosity() >= VERBOSE_ON) {
9038       cout << "Foreach: setting " << var << " to " << cur_variable << endl;
9039     }
9040     ProcessCommands(clist);
9041   }
9042 
9043   if (m_world->GetVerbosity() >= VERBOSE_ON) {
9044     cout << "Ending Foreach on " << var << endl;
9045   }
9046 }
9047 
9048 
CommandForRange(cString cur_string,tList<cAnalyzeCommand> & clist)9049 void cAnalyze::CommandForRange(cString cur_string,
9050                                tList<cAnalyzeCommand> & clist)
9051 {
9052   if (m_world->GetVerbosity() >= VERBOSE_ON) cout << "Initiating FORRANGE loop..." << endl;
9053 
9054   int num_args = cur_string.CountNumWords();
9055   if (num_args < 3) {
9056     cerr << "  Error: Must give variable, min and max with FORRANGE!"
9057     << endl;
9058     if (exit_on_error) exit(1);
9059   }
9060 
9061   cString var = cur_string.PopWord();
9062   double min_val = cur_string.PopWord().AsDouble();
9063   double max_val = cur_string.PopWord().AsDouble();
9064   double step_val = 1.0;
9065   if (num_args >=4 ) step_val = cur_string.PopWord().AsDouble();
9066 
9067   cString & cur_variable = GetVariable(var);
9068 
9069   // Seperate out all ints from not all ints...
9070   if (min_val == (double) ((int) min_val) &&
9071       max_val == (double) ((int) max_val) &&
9072       step_val == (double) ((int) step_val)) {
9073     for (int i = (int) min_val; i <= (int) max_val; i += (int) step_val) {
9074       cur_variable.Set("%d", i);
9075 
9076       if (m_world->GetVerbosity() >= VERBOSE_ON) {
9077         cout << "FORRANGE: setting " << var << " to " << cur_variable << endl;
9078       }
9079       ProcessCommands(clist);
9080     }
9081   } else {
9082     for (double i = min_val; i <= max_val; i += step_val) {
9083       cur_variable.Set("%f", i);
9084 
9085       if (m_world->GetVerbosity() >= VERBOSE_ON) {
9086         cout << "FORRANGE: setting " << var << " to " << cur_variable << endl;
9087       }
9088       ProcessCommands(clist);
9089     }
9090   }
9091 
9092   if (m_world->GetVerbosity() >= VERBOSE_ON) {
9093     cout << "Ending FORRANGE on " << var << endl;
9094   }
9095 }
9096 
9097 
9098 ///////////////////  Private Methods ///////////////////////////
9099 
PopDirectory(cString in_string,const cString default_dir)9100 cString cAnalyze::PopDirectory(cString in_string, const cString default_dir)
9101 {
9102   // Determing the directory name
9103   cString directory(default_dir);
9104   if (in_string.GetSize() != 0) directory = in_string.PopWord();
9105 
9106   // Make sure the directory ends in a slash.  If not, add one.
9107   int last_pos = directory.GetSize() - 1;
9108   if (directory[last_pos] != '/' && directory[last_pos] != '\\') {
9109     directory += '/';
9110   }
9111 
9112   return directory;
9113 }
9114 
PopBatch(const cString & in_string)9115 int cAnalyze::PopBatch(const cString & in_string)
9116 {
9117   int batch = cur_batch;
9118   if (in_string.GetSize() != 0 && in_string != "current") {
9119     batch = in_string.AsInt();
9120   }
9121 
9122   return batch;
9123 }
9124 
PopGenotype(cString gen_desc,int batch_id)9125 cAnalyzeGenotype * cAnalyze::PopGenotype(cString gen_desc, int batch_id)
9126 {
9127   if (batch_id == -1) batch_id = cur_batch;
9128   tListPlus<cAnalyzeGenotype> & gen_list = batch[batch_id].List();
9129   gen_desc.ToLower();
9130 
9131   cAnalyzeGenotype * found_gen = NULL;
9132   if (gen_desc == "num_cpus")
9133     found_gen = gen_list.PopMax(&cAnalyzeGenotype::GetNumCPUs);
9134   else if (gen_desc == "total_cpus")
9135     found_gen = gen_list.PopMax(&cAnalyzeGenotype::GetTotalCPUs);
9136   else if (gen_desc == "merit")
9137     found_gen = gen_list.PopMax(&cAnalyzeGenotype::GetMerit);
9138   else if (gen_desc == "fitness")
9139     found_gen = gen_list.PopMax(&cAnalyzeGenotype::GetFitness);
9140   else if (gen_desc.IsNumeric(0))
9141     found_gen = gen_list.PopValue(&cAnalyzeGenotype::GetID, gen_desc.AsInt());
9142   else if (gen_desc == "random") {
9143     int gen_pos = random.GetUInt(gen_list.GetSize());
9144     found_gen = gen_list.PopPos(gen_pos);
9145   }
9146   else {
9147     cout << "  Error: unknown type " << gen_desc << endl;
9148     if (exit_on_error) exit(1);
9149   }
9150 
9151   return found_gen;
9152 }
9153 
9154 
GetVariable(const cString & var)9155 cString& cAnalyze::GetVariable(const cString & var)
9156 {
9157   if (var.GetSize() != 1 ||
9158       (var.IsLetter(0) == false && var.IsNumeric(0) == false)) {
9159     cerr << "Error: Illegal variable " << var << " being used." << endl;
9160     if (exit_on_error) exit(1);
9161   }
9162 
9163   if (var.IsLowerLetter(0) == true) {
9164     int var_id = (int) (var[0] - 'a');
9165     return variables[var_id];
9166   }
9167   else if (var.IsUpperLetter(0) == true) {
9168     int var_id = (int) (var[0] - 'A');
9169     return local_variables[var_id];
9170   }
9171   // Otherwise it must be a number...
9172   int var_id = (int) (var[0] - '0');
9173   return arg_variables[var_id];
9174 }
9175 
9176 
LoadCommandList(cInitFile & init_file,tList<cAnalyzeCommand> & clist,int start_at)9177 int cAnalyze::LoadCommandList(cInitFile& init_file, tList<cAnalyzeCommand>& clist, int start_at)
9178 {
9179   for (int i = start_at; i < init_file.GetNumLines(); i++) {
9180     cString cur_string = init_file.GetLine(i);
9181     cString command = cur_string.PopWord();
9182 
9183     cAnalyzeCommand* cur_command;
9184     cAnalyzeCommandDefBase* command_def = FindAnalyzeCommandDef(command);
9185 
9186     if (command == "END") {
9187       // We are done with this section of code; break out...
9188       return i;
9189     } else if (command_def != NULL && command_def->IsFlowCommand() == true) {
9190       // This code has a body to it... fill it out!
9191       cur_command = new cAnalyzeFlowCommand(command, cur_string);
9192       i = LoadCommandList(init_file, *(cur_command->GetCommandList()), i + 1); // Start processing at the next line
9193     } else {
9194       // This is a normal command...
9195       cur_command = new cAnalyzeCommand(command, cur_string);
9196     }
9197 
9198     clist.PushRear(cur_command);
9199   }
9200 
9201   return init_file.GetNumLines();
9202 }
9203 
InteractiveLoadCommandList(tList<cAnalyzeCommand> & clist)9204 void cAnalyze::InteractiveLoadCommandList(tList<cAnalyzeCommand> & clist)
9205 {
9206   interactive_depth++;
9207   char text_input[2048];
9208   while (true) {
9209     for (int i = 0; i <= interactive_depth; i++) {
9210       cout << ">>";
9211     }
9212     cout << " ";
9213     cout.flush();
9214     cin.getline(text_input, 2048);
9215     cString cur_input(text_input);
9216     cString command = cur_input.PopWord();
9217 
9218     cAnalyzeCommand * cur_command;
9219     cAnalyzeCommandDefBase * command_def = FindAnalyzeCommandDef(command);
9220 
9221     if (command == "END") {
9222       // We are done with this section of code; break out...
9223       break;
9224     }
9225     else if (command_def != NULL && command_def->IsFlowCommand() == true) {
9226       // This code has a body to it... fill it out!
9227       cur_command = new cAnalyzeFlowCommand(command, cur_input);
9228       InteractiveLoadCommandList(*(cur_command->GetCommandList()));
9229     }
9230     else {
9231       // This is a normal command...
9232       cur_command = new cAnalyzeCommand(command, cur_input);
9233     }
9234 
9235     clist.PushRear(cur_command);
9236   }
9237   interactive_depth--;
9238 }
9239 
PreProcessArgs(cString & args)9240 void cAnalyze::PreProcessArgs(cString & args)
9241 {
9242   int pos = 0;
9243   int search_start = 0;
9244   while ((pos = args.Find('$', search_start)) != -1) {
9245     // Setup the variable name that was found...
9246     char varlet = args[pos+1];
9247     cString varname("$");
9248     varname += varlet;
9249 
9250     // Determine the variable and act on it.
9251     int varsize = 0;
9252     if (varlet == '$') {
9253       args.Clip(pos+1, 1);
9254       varsize = 1;
9255     }
9256     else if (varlet >= 'a' && varlet <= 'z') {
9257       int var_id = (int) (varlet - 'a');
9258       args.Replace(varname, variables[var_id], pos);
9259       varsize = variables[var_id].GetSize();
9260     }
9261     else if (varlet >= 'A' && varlet <= 'Z') {
9262       int var_id = (int) (varlet - 'A');
9263       args.Replace(varname, local_variables[var_id], pos);
9264       varsize = local_variables[var_id].GetSize();
9265     }
9266     else if (varlet >= '0' && varlet <= '9') {
9267       int var_id = (int) (varlet - '0');
9268       args.Replace(varname, arg_variables[var_id], pos);
9269       varsize = arg_variables[var_id].GetSize();
9270     }
9271     search_start = pos + varsize;
9272   }
9273 }
9274 
ProcessCommands(tList<cAnalyzeCommand> & clist)9275 void cAnalyze::ProcessCommands(tList<cAnalyzeCommand>& clist)
9276 {
9277   // Process the command list...
9278   tListIterator<cAnalyzeCommand> command_it(clist);
9279   command_it.Reset();
9280   cAnalyzeCommand* cur_command = NULL;
9281   while ((cur_command = command_it.Next()) != NULL) {
9282     cString command = cur_command->GetCommand();
9283     cString args = cur_command->GetArgs();
9284     PreProcessArgs(args);
9285 
9286     cAnalyzeCommandDefBase* command_fun = FindAnalyzeCommandDef(command);
9287 
9288     cUserFeedback feedback;
9289     if (command_fun != NULL) {
9290       command_fun->Run(this, args, *cur_command, feedback);
9291       for (int i = 0; i < feedback.GetNumMessages(); i++) {
9292         switch (feedback.GetMessageType(i)) {
9293           case cUserFeedback::UF_ERROR:    cerr << "error: "; break;
9294           case cUserFeedback::UF_WARNING:  cerr << "warning: "; break;
9295           default: break;
9296         };
9297         cerr << feedback.GetMessage(i) << endl;
9298         if (exit_on_error && feedback.GetNumErrors()) exit(1);
9299       }
9300     } else if (!FunctionRun(command, args)) {
9301       cerr << "error: Unknown analysis keyword '" << command << "'." << endl;
9302       if (exit_on_error) exit(1);
9303     }
9304   }
9305 }
9306 
9307 
PopCommonCPUTestParameters(cWorld * in_world,cString & cur_string,cCPUTestInfo & test_info,cResourceHistory * in_resource_history,int in_resource_time_spent_offset)9308 void cAnalyze::PopCommonCPUTestParameters(cWorld* in_world, cString& cur_string, cCPUTestInfo& test_info, cResourceHistory* in_resource_history, int in_resource_time_spent_offset)
9309 {
9310   tArray<int> manual_inputs;  // Used only if manual inputs are specified
9311   cString msg;                // Holds any information we may want to send the driver to display
9312   int use_resources      = (cur_string.GetSize()) ? cur_string.PopWord().AsInt() : 0;
9313   int update             = (cur_string.GetSize()) ? cur_string.PopWord().AsInt() : -1;
9314   bool use_random_inputs = (cur_string.GetSize()) ? cur_string.PopWord().AsInt() == 1: false;
9315   bool use_manual_inputs = false;
9316 
9317   //Manual inputs will override random input request and must be the last arguments.
9318   if (cur_string.CountNumWords() > 0){
9319     if (cur_string.CountNumWords() == in_world->GetEnvironment().GetInputSize()){
9320       manual_inputs.Resize(in_world->GetEnvironment().GetInputSize());
9321       use_random_inputs = false;
9322       use_manual_inputs = true;
9323       for (int k = 0; cur_string.GetSize(); k++)
9324         manual_inputs[k] = cur_string.PopWord().AsInt();
9325     } else if (in_world->GetVerbosity() >= VERBOSE_ON){
9326       msg.Set("Invalid number of environment inputs requested for recalculation: %d specified, %d required.",
9327               cur_string.CountNumWords(), in_world->GetEnvironment().GetInputSize());
9328       in_world->GetDriver().NotifyWarning(msg);
9329     }
9330   }
9331 
9332   if (use_manual_inputs)
9333     test_info.UseManualInputs(manual_inputs);
9334   else
9335     test_info.UseRandomInputs(use_random_inputs);
9336   test_info.SetResourceOptions(use_resources, in_resource_history, update, in_resource_time_spent_offset);
9337 }
9338 
9339 
9340 // The following function will print a cell in a table with a background color based on a comparison
9341 // with its parent (the result of which is passed in as the 'compare' argument).  The cell_flags argument
9342 // includes any other information you want in the <td> tag; 'null_text' is the text you want to replace a
9343 // zero with (sometime "none" or "N/A"); and 'print_text' is a bool asking if the text should be included at
9344 // all, or just the background color.
9345 
HTMLPrintStat(const cFlexVar & value,std::ostream & fp,int compare,const cString & cell_flags,const cString & null_text,bool print_text)9346 void cAnalyze::HTMLPrintStat(const cFlexVar & value, std::ostream& fp, int compare,
9347                              const cString & cell_flags, const cString & null_text, bool print_text)
9348 {
9349   fp << "<td " << cell_flags << " ";
9350   if (compare == COMPARE_RESULT_OFF) {
9351     fp << "bgcolor=\"#" << m_world->GetConfig().COLOR_NEG2.Get() << "\">";
9352     if (print_text == true) fp << null_text << " ";
9353     else fp << "&nbsp; ";
9354     return;
9355   }
9356 
9357   if (compare == COMPARE_RESULT_NEG)       fp << "bgcolor=\"#" << m_world->GetConfig().COLOR_NEG1.Get() << "\">";
9358   else if (compare == COMPARE_RESULT_SAME) fp << "bgcolor=\"#" << m_world->GetConfig().COLOR_SAME.Get() << "\">";
9359   else if (compare == COMPARE_RESULT_POS)  fp << "bgcolor=\"#" << m_world->GetConfig().COLOR_POS1.Get() << "\">";
9360   else if (compare == COMPARE_RESULT_ON)   fp << "bgcolor=\"#" << m_world->GetConfig().COLOR_POS2.Get() << "\">";
9361   else if (compare == COMPARE_RESULT_DIFF) fp << "bgcolor=\"#" << m_world->GetConfig().COLOR_DIFF.Get() << "\">";
9362   else {
9363     std::cerr << "Error! Illegal case in Compare:" << compare << std::endl;
9364     exit(0);
9365   }
9366 
9367   if (print_text == true) fp << value << " ";
9368   else fp << "&nbsp; ";
9369 
9370 }
9371 
CompareFlexStat(const cFlexVar & org_stat,const cFlexVar & parent_stat,int compare_type)9372 int cAnalyze::CompareFlexStat(const cFlexVar & org_stat, const cFlexVar & parent_stat, int compare_type)
9373 {
9374   // If no comparisons need be done, return zero and stop here.
9375   if (compare_type == FLEX_COMPARE_NONE) {
9376     return COMPARE_RESULT_SAME;
9377   }
9378 
9379   // In all cases, if the stats are the same, we should return this and stop.
9380   if (org_stat == parent_stat) return COMPARE_RESULT_SAME;
9381 
9382   // If we made it this far and all we care about is if they differ, return that they do.
9383   if (compare_type == FLEX_COMPARE_DIFF) return COMPARE_RESULT_DIFF;
9384 
9385   // If zero is not special we can calculate our result.
9386   if (compare_type == FLEX_COMPARE_MAX) {     // Color higher values as beneficial, lower as harmful.
9387     if (org_stat > parent_stat) return COMPARE_RESULT_POS;
9388     return COMPARE_RESULT_NEG;
9389   }
9390   if (compare_type == FLEX_COMPARE_MIN) {     // Color lower values as beneficial, higher as harmful.
9391     if (org_stat > parent_stat) return COMPARE_RESULT_NEG;
9392     return COMPARE_RESULT_POS;
9393   }
9394 
9395 
9396   // If we made it this far, it means that zero has a special status.
9397   if (org_stat == 0) return COMPARE_RESULT_OFF;
9398   if (parent_stat == 0) return COMPARE_RESULT_ON;
9399 
9400 
9401   // No zeros are involved, so we can go back to basic checks...
9402   if (compare_type == FLEX_COMPARE_DIFF2) return COMPARE_RESULT_DIFF;
9403 
9404   if (compare_type == FLEX_COMPARE_MAX2) {     // Color higher values as beneficial, lower as harmful.
9405     if (org_stat > parent_stat) return COMPARE_RESULT_POS;
9406     return COMPARE_RESULT_NEG;
9407   }
9408   if (compare_type == FLEX_COMPARE_MIN2) {     // Color lower values as beneficial, higher as harmful.
9409     if (org_stat > parent_stat) return COMPARE_RESULT_NEG;
9410     return COMPARE_RESULT_POS;
9411   }
9412 
9413   assert(false);  // One of the other options should have been chosen.
9414   return 0;
9415 }
9416 
9417 
9418 
9419 
9420 
AddLibraryDef(const cString & name,void (cAnalyze::* _fun)(cString))9421 void cAnalyze::AddLibraryDef(const cString & name,
9422                              void (cAnalyze::*_fun)(cString))
9423 {
9424   command_lib.PushRear(new cAnalyzeCommandDef(name, _fun));
9425 }
9426 
AddLibraryDef(const cString & name,void (cAnalyze::* _fun)(cString,tList<cAnalyzeCommand> &))9427 void cAnalyze::AddLibraryDef(const cString & name,
9428                              void (cAnalyze::*_fun)(cString, tList<cAnalyzeCommand> &))
9429 {
9430   command_lib.PushRear(new cAnalyzeFlowCommandDef(name, _fun));
9431 }
9432 
SetupCommandDefLibrary()9433 void cAnalyze::SetupCommandDefLibrary()
9434 {
9435   if (command_lib.GetSize() != 0) return; // Library already setup.
9436 
9437   AddLibraryDef("LOAD_ORGANISM", &cAnalyze::LoadOrganism);
9438   AddLibraryDef("LOAD_SEQUENCE", &cAnalyze::LoadSequence);
9439   AddLibraryDef("LOAD_RESOURCES", &cAnalyze::LoadResources);
9440   AddLibraryDef("LOAD", &cAnalyze::LoadFile);
9441 
9442   // Reduction and sampling commands...
9443   AddLibraryDef("FILTER", &cAnalyze::CommandFilter);
9444   AddLibraryDef("FIND_GENOTYPE", &cAnalyze::FindGenotype);
9445   AddLibraryDef("FIND_ORGANISM", &cAnalyze::FindOrganism);
9446   AddLibraryDef("FIND_LINEAGE", &cAnalyze::FindLineage);
9447   AddLibraryDef("FIND_SEX_LINEAGE", &cAnalyze::FindSexLineage);
9448   AddLibraryDef("FIND_CLADE", &cAnalyze::FindClade);
9449   AddLibraryDef("FIND_LAST_COMMON_ANCESTOR", &cAnalyze::FindLastCommonAncestor);
9450   AddLibraryDef("SAMPLE_ORGANISMS", &cAnalyze::SampleOrganisms);
9451   AddLibraryDef("SAMPLE_GENOTYPES", &cAnalyze::SampleGenotypes);
9452   AddLibraryDef("KEEP_TOP", &cAnalyze::KeepTopGenotypes);
9453   AddLibraryDef("TRUNCATELINEAGE", &cAnalyze::TruncateLineage); // Depricate!
9454   AddLibraryDef("TRUNCATE_LINEAGE", &cAnalyze::TruncateLineage);
9455   AddLibraryDef("SAMPLE_OFFSPRING", &cAnalyze::SampleOffspring);
9456 
9457   // Direct output commands...
9458   AddLibraryDef("PRINT", &cAnalyze::CommandPrint);
9459   AddLibraryDef("TRACE", &cAnalyze::CommandTrace);
9460   AddLibraryDef("PRINT_TASKS", &cAnalyze::CommandPrintTasks);
9461   AddLibraryDef("PRINT_TASKS_QUALITY", &cAnalyze::CommandPrintTasksQuality);
9462   AddLibraryDef("DETAIL", &cAnalyze::CommandDetail);
9463   AddLibraryDef("DETAIL_TIMELINE", &cAnalyze::CommandDetailTimeline);
9464   AddLibraryDef("DETAIL_BATCHES", &cAnalyze::CommandDetailBatches);
9465   AddLibraryDef("DETAIL_AVERAGE", &cAnalyze::CommandDetailAverage);
9466   AddLibraryDef("DETAIL_INDEX", &cAnalyze::CommandDetailIndex);
9467   AddLibraryDef("HISTOGRAM", &cAnalyze::CommandHistogram);
9468 
9469   // Population analysis commands...
9470   AddLibraryDef("PRINT_PHENOTYPES", &cAnalyze::CommandPrintPhenotypes);
9471   AddLibraryDef("PRINT_DIVERSITY", &cAnalyze::CommandPrintDiversity);
9472   AddLibraryDef("PRINT_DISTANCES", &cAnalyze::CommandPrintDistances);
9473   AddLibraryDef("PRINT_TREE_STATS", &cAnalyze::CommandPrintTreeStats);
9474   AddLibraryDef("PRINT_CUMULATIVE_STEMMINESS", &cAnalyze::CommandPrintCumulativeStemminess);
9475   AddLibraryDef("PRINT_GAMMA", &cAnalyze::CommandPrintGamma);
9476   AddLibraryDef("COMMUNITY_COMPLEXITY", &cAnalyze::AnalyzeCommunityComplexity);
9477   AddLibraryDef("PRINT_RESOURCE_FITNESS_MAP", &cAnalyze::CommandPrintResourceFitnessMap);
9478 
9479   // Individual organism analysis...
9480   AddLibraryDef("MAP", &cAnalyze::CommandMapTasks);  // Deprecated...
9481   AddLibraryDef("MAP_TASKS", &cAnalyze::CommandMapTasks);
9482   AddLibraryDef("AVERAGE_MODULARITY", &cAnalyze::CommandAverageModularity);
9483   AddLibraryDef("CALC_FUNCTIONAL_MODULARITY", &cAnalyze::CommandCalcFunctionalModularity);
9484   AddLibraryDef("ANALYZE_REDUNDANCY_BY_INST_FAILURE", &cAnalyze::CommandAnalyzeRedundancyByInstFailure);
9485   AddLibraryDef("MAP_MUTATIONS", &cAnalyze::CommandMapMutations);
9486   AddLibraryDef("ANALYZE_COMPLEXITY", &cAnalyze::AnalyzeComplexity);
9487   AddLibraryDef("ANALYZE_LINEAGE_COMPLEXITY", &cAnalyze::AnalyzeLineageComplexitySitesN);
9488   AddLibraryDef("ANALYZE_FITNESS_TWO_SITES", &cAnalyze::AnalyzeFitnessLandscapeTwoSites);
9489   AddLibraryDef("ANALYZE_COMPLEXITY_TWO_SITES", &cAnalyze::AnalyzeComplexityTwoSites);
9490   AddLibraryDef("ANALYZE_KNOCKOUTS", &cAnalyze::AnalyzeKnockouts);
9491   AddLibraryDef("ANALYZE_POP_COMPLEXITY", &cAnalyze::AnalyzePopComplexity);
9492   AddLibraryDef("MAP_DEPTH", &cAnalyze::CommandMapDepth);
9493   // (Untested) AddLibraryDef("PAIRWISE_ENTROPY", &cAnalyze::CommandPairwiseEntropy);
9494 
9495   // Population comparison commands...
9496   AddLibraryDef("HAMMING", &cAnalyze::CommandHamming);
9497   AddLibraryDef("LEVENSTEIN", &cAnalyze::CommandLevenstein);
9498   AddLibraryDef("SPECIES", &cAnalyze::CommandSpecies);
9499   AddLibraryDef("RECOMBINE", &cAnalyze::CommandRecombine);
9500   AddLibraryDef("RECOMBINE_SAMPLE", &cAnalyze::CommandRecombineSample);
9501   AddLibraryDef("MUTAGENIZE", &cAnalyze::CommandMutagenize);
9502 
9503   // Lineage analysis commands...
9504   AddLibraryDef("ALIGN", &cAnalyze::CommandAlign);
9505   AddLibraryDef("ANALYZE_NEWINFO", &cAnalyze::AnalyzeNewInfo);
9506   AddLibraryDef("MUTATION_REVERT", &cAnalyze::MutationRevert);
9507 
9508   // Build input files for avida...
9509   AddLibraryDef("WRITE_CLONE", &cAnalyze::WriteClone);
9510   AddLibraryDef("WRITE_INJECT_EVENTS", &cAnalyze::WriteInjectEvents);
9511   AddLibraryDef("WRITE_COMPETITION", &cAnalyze::WriteCompetition);
9512 
9513   // Automated analysis
9514   AddLibraryDef("ANALYZE_MUTS", &cAnalyze::AnalyzeMuts);
9515   AddLibraryDef("ANALYZE_INSTRUCTIONS", &cAnalyze::AnalyzeInstructions);
9516   AddLibraryDef("ANALYZE_INST_POP", &cAnalyze::AnalyzeInstPop);
9517   AddLibraryDef("ANALYZE_BRANCHING", &cAnalyze::AnalyzeBranching);
9518   AddLibraryDef("ANALYZE_MUTATION_TRACEBACK",
9519                 &cAnalyze::AnalyzeMutationTraceback);
9520   AddLibraryDef("ANALYZE_MATE_SELECTION", &cAnalyze::AnalyzeMateSelection);
9521   AddLibraryDef("ANALYZE_COMPLEXITY_DELTA", &cAnalyze::AnalyzeComplexityDelta);
9522 
9523   // Environment manipulation
9524   AddLibraryDef("ENVIRONMENT", &cAnalyze::EnvironmentSetup);
9525 
9526   // Documantation...
9527   AddLibraryDef("HELPFILE", &cAnalyze::CommandHelpfile);
9528 
9529   // Control commands...
9530   AddLibraryDef("SET", &cAnalyze::VarSet);
9531   AddLibraryDef("CONFIG_GET", &cAnalyze::ConfigGet);
9532   AddLibraryDef("CONFIG_SET", &cAnalyze::ConfigSet);
9533   AddLibraryDef("SET_BATCH", &cAnalyze::BatchSet);
9534   AddLibraryDef("NAME_BATCH", &cAnalyze::BatchName);
9535   AddLibraryDef("TAG_BATCH", &cAnalyze::BatchTag);
9536   AddLibraryDef("PURGE_BATCH", &cAnalyze::BatchPurge);
9537   AddLibraryDef("DUPLICATE", &cAnalyze::BatchDuplicate);
9538   AddLibraryDef("RECALCULATE", &cAnalyze::BatchRecalculate);
9539   AddLibraryDef("RECALC", &cAnalyze::BatchRecalculateWithArgs);
9540   AddLibraryDef("RENAME", &cAnalyze::BatchRename);
9541   AddLibraryDef("CLOSE_FILE", &cAnalyze::CloseFile);
9542   AddLibraryDef("STATUS", &cAnalyze::PrintStatus);
9543   AddLibraryDef("ECHO", &cAnalyze::PrintDebug);
9544   AddLibraryDef("DEBUG", &cAnalyze::PrintDebug);
9545   AddLibraryDef("TEST", &cAnalyze::PrintTestInfo);
9546   AddLibraryDef("INCLUDE", &cAnalyze::IncludeFile);
9547   AddLibraryDef("RUN", &cAnalyze::IncludeFile);
9548   AddLibraryDef("SYSTEM", &cAnalyze::CommandSystem);
9549   AddLibraryDef("INTERACTIVE", &cAnalyze::CommandInteractive);
9550 
9551   // Functions...
9552   AddLibraryDef("FUNCTION", &cAnalyze::FunctionCreate);
9553 
9554   // Flow commands...
9555   AddLibraryDef("FOREACH", &cAnalyze::CommandForeach);
9556   AddLibraryDef("FORRANGE", &cAnalyze::CommandForRange);
9557 
9558   // Uncategorized commands...
9559   AddLibraryDef("COMPETE", &cAnalyze::BatchCompete);
9560 }
9561 
FindAnalyzeCommandDef(const cString & name)9562 cAnalyzeCommandDefBase* cAnalyze::FindAnalyzeCommandDef(const cString& name)
9563 {
9564   SetupCommandDefLibrary();
9565 
9566   cString uppername(name);
9567   uppername.ToUpper();
9568   tListIterator<cAnalyzeCommandDefBase> lib_it(command_lib);
9569   while (lib_it.Next() != (void *) NULL) {
9570     if (lib_it.Get()->GetName() == uppername) break;
9571   }
9572   cAnalyzeCommandDefBase* command_def = lib_it.Get();
9573 
9574   if (command_def == NULL && cActionLibrary::GetInstance().Supports(name)) {
9575     command_def = new cAnalyzeCommandAction(name, m_world);
9576     command_lib.PushRear(command_def);
9577   }
9578 
9579   return command_def;
9580 }
9581 
RunInteractive()9582 void cAnalyze::RunInteractive()
9583 {
9584   bool saved_analyze = m_ctx.GetAnalyzeMode();
9585   m_ctx.SetAnalyzeMode();
9586 
9587   cout << "Entering interactive mode..." << endl;
9588 
9589   char text_input[2048];
9590   while (true) {
9591     cout << ">> ";
9592     cout.flush();
9593     cin.getline(text_input, 2048);
9594     cString cur_input(text_input);
9595     cString command = cur_input.PopWord();
9596 
9597     cAnalyzeCommand* cur_command;
9598     cAnalyzeCommandDefBase* command_def = FindAnalyzeCommandDef(command);
9599     if (command == "") {
9600       // Don't worry about blank lines...
9601       continue;
9602     } else if (command == "END" || command == "QUIT" || command == "EXIT") {
9603       // We are done with interactive mode...
9604       break;
9605     } else if (command_def != NULL && command_def->IsFlowCommand() == true) {
9606       // This code has a body to it... fill it out!
9607       cur_command = new cAnalyzeFlowCommand(command, cur_input);
9608       InteractiveLoadCommandList(*(cur_command->GetCommandList()));
9609     } else {
9610       // This is a normal command...
9611       cur_command = new cAnalyzeCommand(command, cur_input);
9612     }
9613 
9614     cString args = cur_command->GetArgs();
9615     PreProcessArgs(args);
9616 
9617     cAnalyzeCommandDefBase* command_fun = FindAnalyzeCommandDef(command);
9618 
9619     if (command_fun != NULL) {                                // First check for built-in functions...
9620       cUserFeedback feedback;
9621       command_fun->Run(this, args, *cur_command, feedback);
9622       for (int i = 0; i < feedback.GetNumMessages(); i++) {
9623         switch (feedback.GetMessageType(i)) {
9624           case cUserFeedback::UF_ERROR:    cerr << "error: "; break;
9625           case cUserFeedback::UF_WARNING:  cerr << "warning: "; break;
9626           default: break;
9627         };
9628         cerr << feedback.GetMessage(i) << endl;
9629       }
9630     } else if (FunctionRun(command, args) == true) {          // Then user functions
9631       /* no additional action */
9632     } else {                                                  // Error
9633       cerr << "Error: Unknown command '" << command << "'." << endl;
9634     }
9635   }
9636 
9637   if (!saved_analyze) m_ctx.ClearAnalyzeMode();
9638 }
9639 
9640