1 /*
2 * cAnalyze.cc
3 * Avida
4 *
5 * Called "analyze.cc" prior to 12/1/05.
6 * Copyright 1999-2011 Michigan State University. All rights reserved.
7 * Copyright 1993-2003 California Institute of Technology.
8 *
9 *
10 * This file is part of Avida.
11 *
12 * Avida is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
14 *
15 * Avida is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details.
17 *
18 * You should have received a copy of the GNU Lesser General Public License along with Avida.
19 * If not, see <http://www.gnu.org/licenses/>.
20 *
21 */
22
23 #include "cAnalyze.h"
24
25 #include "avida/Avida.h"
26
27 #include "avida/core/WorldDriver.h"
28
29 #include "cActionLibrary.h"
30 #include "cAnalyzeCommand.h"
31 #include "cAnalyzeCommandAction.h"
32 #include "cAnalyzeCommandDef.h"
33 #include "cAnalyzeCommandDefBase.h"
34 #include "cAnalyzeFlowCommand.h"
35 #include "cAnalyzeFlowCommandDef.h"
36 #include "cAnalyzeFunction.h"
37 #include "cAnalyzeGenotype.h"
38 #include "cAnalyzeTreeStats_CumulativeStemminess.h"
39 #include "cAnalyzeTreeStats_Gamma.h"
40 #include "cAvidaContext.h"
41 #include "cCPUTestInfo.h"
42 #include "cDataFile.h"
43 #include "cEnvironment.h"
44 #include "cHardwareBase.h"
45 #include "cHardwareManager.h"
46 #include "cHardwareStatusPrinter.h"
47 #include "cHelpManager.h"
48 #include "cInitFile.h"
49 #include "cInstSet.h"
50 #include "cLandscape.h"
51 #include "cModularityAnalysis.h"
52 #include "cPhenotype.h"
53 #include "cPhenPlastGenotype.h"
54 #include "cPlasticPhenotype.h"
55 #include "cProbSchedule.h"
56 #include "cReaction.h"
57 #include "cReactionProcess.h"
58 #include "cResource.h"
59 #include "cResourceHistory.h"
60 #include "cSchedule.h"
61 #include "cStringIterator.h"
62 #include "cTestCPU.h"
63 #include "cUserFeedback.h"
64 #include "cWorld.h"
65 #include "tAnalyzeJob.h"
66 #include "tAnalyzeJobBatch.h"
67 #include "tDataCommandManager.h"
68 #include "tDataEntry.h"
69 #include "tDataEntryCommand.h"
70 #include "tHashMap.h"
71 #include "tMatrix.h"
72
73 #include <iomanip>
74 #include <fstream>
75 #include <sstream>
76 #include <string>
77 #include <queue>
78 #include <stack>
79
80 #include <cerrno>
81 extern "C" {
82 #include <sys/stat.h>
83 }
84
85 using namespace std;
86 using namespace Avida;
87 using namespace AvidaTools;
88
cAnalyze(cWorld * world)89 cAnalyze::cAnalyze(cWorld* world)
90 : cur_batch(0)
91 /*
92 FIXME : refactor "temporary_next_id". @kgn
93 - Added as a quick way to provide unique serial ids, per organism, in COMPETE
94 command. @kgn
95 */
96 , temporary_next_id(0)
97 , temporary_next_update(0)
98 , batch(INITIAL_BATCHES)
99 , variables(26)
100 , local_variables(26)
101 , arg_variables(26)
102 , exit_on_error(true)
103 , m_world(world)
104 , m_ctx(world->GetDefaultContext())
105 , m_jobqueue(world)
106 , m_resources(NULL)
107 , m_resource_time_spent_offset(0)
108 , interactive_depth(0)
109 {
110 random.ResetSeed(m_world->GetConfig().RANDOM_SEED.Get());
111 if (m_world->GetDriver().IsInteractive()) exit_on_error = false;
112
113 for (int i = 0; i < GetNumBatches(); i++) {
114 batch[i].Name().Set("Batch%d", i);
115 }
116
117 }
118
119
120
~cAnalyze()121 cAnalyze::~cAnalyze()
122 {
123 while (command_list.GetSize()) delete command_list.Pop();
124 while (function_list.GetSize()) delete function_list.Pop();
125 }
126
127
RunFile(cString filename)128 void cAnalyze::RunFile(cString filename)
129 {
130 bool saved_analyze = m_ctx.GetAnalyzeMode();
131 m_ctx.SetAnalyzeMode();
132
133 cInitFile analyze_file(filename, m_world->GetWorkingDir());
134 if (!analyze_file.WasOpened()) {
135 const cUserFeedback& feedback = analyze_file.GetFeedback();
136 for (int i = 0; i < feedback.GetNumMessages(); i++) {
137 switch (feedback.GetMessageType(i)) {
138 case cUserFeedback::UF_ERROR: cerr << "error: "; break;
139 case cUserFeedback::UF_WARNING: cerr << "warning: "; break;
140 default: break;
141 };
142 cerr << feedback.GetMessage(i) << endl;
143 }
144 cerr << "warning: creating default file: '" << filename << "'" << endl;
145 ofstream fp(filename);
146 fp << "################################################################################################" << endl
147 << "# This file is used to setup avida when it is in analysis-only mode, which can be triggered by" << endl
148 << "# running \"avida -a\"." << endl
149 << "# " << endl
150 << "# Please see the documentation in documentation/analyze.html for information on how to use" << endl
151 << "# analyze mode." << endl
152 << "################################################################################################" << endl
153 << endl;
154 fp.close();
155 } else {
156 LoadCommandList(analyze_file, command_list);
157 ProcessCommands(command_list);
158 }
159
160 if (!saved_analyze) m_ctx.ClearAnalyzeMode();
161 }
162
163 //////////////// Loading methods...
164
LoadOrganism(cString cur_string)165 void cAnalyze::LoadOrganism(cString cur_string)
166 {
167 // LOAD_ORGANISM command...
168
169 cString filename = cur_string.PopWord();
170
171 // Output information about loading file.
172 cout << "Loading: " << filename << '\n';
173
174
175
176 // Setup the genome...
177 Genome genome;
178 cUserFeedback feedback;
179 genome.LoadFromDetailFile(filename, m_world->GetWorkingDir(), m_world->GetHardwareManager(), feedback);
180 for (int i = 0; i < feedback.GetNumMessages(); i++) {
181 switch (feedback.GetMessageType(i)) {
182 case cUserFeedback::UF_ERROR: cerr << "error: "; break;
183 case cUserFeedback::UF_WARNING: cerr << "warning: "; break;
184 default: break;
185 };
186 cerr << feedback.GetMessage(i) << endl;
187 }
188
189 // Construct the new genotype..
190 cAnalyzeGenotype* genotype = new cAnalyzeGenotype(m_world, genome);
191
192 // Determine the organism's original name -- strip off directory...
193 while (filename.Find('/') != -1) filename.Pop('/');
194 while (filename.Find('\\') != -1) filename.Pop('\\');
195 filename.Replace(".gen", ""); // Remove the .gen from the filename.
196 genotype->SetName(filename);
197
198 // And save it in the current batch.
199 batch[cur_batch].List().PushRear(genotype);
200
201 // Adjust the flags on this batch
202 batch[cur_batch].SetLineage(false);
203 batch[cur_batch].SetAligned(false);
204 }
205
206
LoadSequence(cString cur_string)207 void cAnalyze::LoadSequence(cString cur_string)
208 {
209 // LOAD_SEQUENCE
210
211 static int sequence_count = 1;
212 cString sequence = cur_string.PopWord();
213 cString seq_name = cur_string.PopWord();
214
215 cout << "Loading: " << sequence << endl;
216
217 // Setup the genotype...
218 const cInstSet& is = m_world->GetHardwareManager().GetDefaultInstSet();
219 Genome genome(is.GetHardwareType(), is.GetInstSetName(), sequence);
220 cAnalyzeGenotype* genotype = new cAnalyzeGenotype(m_world, genome);
221
222 genotype->SetNumCPUs(1); // Initialize to a single organism.
223 if (seq_name == "") {
224 seq_name = cStringUtil::Stringf("org-Seq%d", sequence_count);
225 }
226 genotype->SetName(seq_name);
227 sequence_count++;
228
229 // Add this genotype to the proper batch.
230 batch[cur_batch].List().PushRear(genotype);
231
232 // Adjust the flags on this batch
233 batch[cur_batch].SetLineage(false);
234 batch[cur_batch].SetAligned(false);
235 }
236
237 // Clears the current time oriented list of resources and loads in a new one
238 // from a file specified by the user, or resource.dat by default.
LoadResources(cString cur_string)239 void cAnalyze::LoadResources(cString cur_string)
240 {
241 delete m_resources;
242 m_resources = new cResourceHistory;
243
244 int words = cur_string.CountNumWords();
245
246 cString filename = "resource.dat";
247 if (words >= 1)
248 filename = cur_string.PopWord();
249 if (words >= 2)
250 m_resource_time_spent_offset = cur_string.PopWord().AsInt();
251
252 cout << "Loading Resources from: " << filename << endl;
253
254 if (!m_resources->LoadFile(filename, m_world->GetWorkingDir())) m_world->GetDriver().RaiseException("failed to load resource file");
255 }
256
AnalyzeEntropy(cAnalyzeGenotype * genotype,double mu)257 double cAnalyze::AnalyzeEntropy(cAnalyzeGenotype* genotype, double mu)
258 {
259 double entropy = 0.0;
260
261 // If the fitness is 0, the entropy is the length of genotype ...
262 genotype->Recalculate(m_ctx);
263 if (genotype->GetFitness() == 0) {
264 return genotype->GetLength();
265 }
266
267 // Calculate the stats for the genotype we're working with ...
268 const Genome& base_genome = genotype->GetGenome();
269 const Sequence& base_seq = base_genome.GetSequence();
270 Genome mod_genome(base_genome);
271 Sequence& seq = mod_genome.GetSequence();
272 const int num_insts = m_world->GetHardwareManager().GetInstSet(base_genome.GetInstSet()).GetSize();
273 const int num_lines = base_genome.GetSize();
274 double base_fitness = genotype->GetFitness();
275
276 // Loop through all the lines of code, testing all mutations...
277 tArray<double> test_fitness(num_insts);
278 tArray<double> prob(num_insts);
279 for (int line_no = 0; line_no < num_lines; line_no ++) {
280 int cur_inst = base_seq[line_no].GetOp();
281
282 // Test fitness of each mutant.
283 for (int mod_inst = 0; mod_inst < num_insts; mod_inst++) {
284 seq[line_no].SetOp(mod_inst);
285 cAnalyzeGenotype test_genotype(m_world, mod_genome);
286 test_genotype.Recalculate(m_ctx);
287 // Ajust fitness ...
288 if (test_genotype.GetFitness() <= base_fitness) {
289 test_fitness[mod_inst] = test_genotype.GetFitness();
290 } else {
291 test_fitness[mod_inst] = base_fitness;
292 }
293 }
294
295 // Calculate probabilities at mut-sel balance
296 double w_bar = 1;
297
298 // Normalize fitness values
299 double maxFitness = 0.0;
300 for(int i=0; i<num_insts; i++) {
301 if(test_fitness[i] > maxFitness) {
302 maxFitness = test_fitness[i];
303 }
304 }
305
306
307 for(int i=0; i<num_insts; i++) {
308 test_fitness[i] /= maxFitness;
309 }
310
311 while(1) {
312 double sum = 0.0;
313 for (int mod_inst = 0; mod_inst < num_insts; mod_inst ++) {
314 prob[mod_inst] = (mu * w_bar) /
315 ((double)num_insts *
316 (w_bar + test_fitness[mod_inst] * mu - test_fitness[mod_inst]));
317 sum = sum + prob[mod_inst];
318 }
319 if ((sum-1.0)*(sum-1.0) <= 0.0001)
320 break;
321 else
322 w_bar = w_bar - 0.000001;
323 }
324
325 // Calculate entropy ...
326 double this_entropy = 0.0;
327 for (int i = 0; i < num_insts; i ++) {
328 this_entropy += prob[i] * log((double) 1.0/prob[i]) / log ((double) num_insts);
329 }
330 entropy += this_entropy;
331
332 // Reset the mod_genome back to the original sequence.
333 seq[line_no].SetOp(cur_inst);
334 }
335 return entropy;
336 }
337
338 //@ MRR @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
AnalyzeEntropyPairs(cAnalyzeGenotype * genotype,double mu)339 tMatrix< double > cAnalyze::AnalyzeEntropyPairs(cAnalyzeGenotype * genotype, double mu)
340 {
341
342 double entropy = 0.0;
343
344 genotype->Recalculate(m_ctx);
345
346 // Calculate the stats for the genotype we're working with ...
347 const Genome& base_genome = genotype->GetGenome();
348 const Sequence& base_seq = base_genome.GetSequence();
349 Genome mod_genome(base_genome);
350 Sequence& seq = mod_genome.GetSequence();
351 const int num_insts = m_world->GetHardwareManager().GetInstSet(base_genome.GetInstSet()).GetSize();
352 const int num_lines = base_genome.GetSize();
353 double base_fitness = genotype->GetFitness();
354
355 cout << num_lines << endl;
356 tMatrix< double > pairwiseEntropy(num_lines, num_lines);
357 for (int i=0; i<num_lines; i++)
358 for (int j=-0; j<num_lines; j++)
359 pairwiseEntropy[i][j] = 0.0;
360
361 cout << pairwiseEntropy.GetNumRows() << endl;
362
363 // If the fitness is 0, return empty matrix
364
365 if (genotype->GetFitness() == 0) {
366 return pairwiseEntropy;
367 }
368
369
370 tMatrix< double > test_fitness(num_insts,num_insts);
371 tMatrix< double > prob(num_insts,num_insts);
372
373 //Pairwise mutations; the diagonal of the matrix will be the information
374 //stored by that site alone
375 for (int line_1 = 0; line_1 < num_lines; line_1++){
376 for (int line_2 = line_1; line_2 < num_lines; line_2++) {
377
378 cerr << "[ " << line_1 << ", " << line_2 << " ]" << endl;
379
380 int cur_inst_1 = base_seq[line_1].GetOp();
381 int cur_inst_2 = base_seq[line_2].GetOp();
382
383 // Test fitness of each mutant.
384 for (int mod_inst_1 = 0; mod_inst_1 < num_insts; mod_inst_1++){
385 for (int mod_inst_2 = 0; mod_inst_2 < num_insts; mod_inst_2++) {
386 seq[line_1].SetOp(mod_inst_1);
387 seq[line_2].SetOp(mod_inst_2);
388 cAnalyzeGenotype test_genotype(m_world, mod_genome);
389 test_genotype.Recalculate(m_ctx);
390 // Adjust fitness ...
391 if (test_genotype.GetFitness() <= base_fitness) {
392 test_fitness[mod_inst_1][mod_inst_2] = test_genotype.GetFitness();
393 } else {
394 test_fitness[mod_inst_1][mod_inst_2] = base_fitness;
395 }
396 }
397 }
398
399 // Calculate probabilities at mut-sel balance
400 double w_bar = 1;
401
402 // Normalize fitness values
403 double maxFitness = 0.0;
404 for(int i=0; i<num_insts; i++) {
405 for (int j = 0; j < num_insts; j++){
406 if(test_fitness[i][j] > maxFitness) {
407 maxFitness = test_fitness[i][j];
408 }
409 }
410 }
411
412
413 for(int i=0; i<num_insts; i++) {
414 for (int j=0; j<num_insts; j++){
415 test_fitness[i][j] /= maxFitness;
416 }
417 }
418
419
420 while(1) {
421 double sum = 0.0;
422 for (int mod_inst_1 = 0; mod_inst_1 < num_insts; mod_inst_1++) {
423 for (int mod_inst_2 = 0; mod_inst_2 < num_insts; mod_inst_2++){
424
425 prob[mod_inst_1][mod_inst_2] =
426 (mu * w_bar) /
427 ((double)num_insts *
428 (w_bar + test_fitness[mod_inst_1][mod_inst_2]
429 * mu - test_fitness[mod_inst_1][mod_inst_2]));
430 sum = sum + prob[mod_inst_1][mod_inst_2];
431 }
432 }
433 if ((sum-1.0)*(sum-1.0) <= 0.0001)
434 break;
435 else
436 w_bar = w_bar - 0.000001;
437 }
438
439 // Calculate entropy ...
440 double this_entropy = 0.0;
441 for (int i = 0; i < num_insts; i++){
442 for (int j = 0; j < num_insts; j++) {
443 this_entropy += prob[i][j] *
444 log((double) 1.0/prob[i][j]) / log ((double) num_insts);
445 }
446 }
447 entropy += this_entropy;
448 pairwiseEntropy[line_1][line_2] = this_entropy;
449
450 // Reset the mod_genome back to the original sequence.
451 seq[line_1].SetOp(cur_inst_1);
452 seq[line_2].SetOp(cur_inst_2);
453
454 }
455 } //End Loops
456 return pairwiseEntropy;
457 }
458
459
460
461
AnalyzeEntropyGivenParent(cAnalyzeGenotype * genotype,cAnalyzeGenotype * parent,double mut_rate)462 double cAnalyze::AnalyzeEntropyGivenParent(cAnalyzeGenotype * genotype,
463 cAnalyzeGenotype * parent, double mut_rate)
464 {
465 double entropy = 0.0;
466
467 // Calculate the stats for the genotype we're working with ...
468 genotype->Recalculate(m_ctx);
469 const Genome& parent_genome = parent->GetGenome();
470 const Sequence& parent_seq = parent_genome.GetSequence();
471 const Genome& base_genome = genotype->GetGenome();
472 const Sequence& base_seq = base_genome.GetSequence();
473 Genome mod_genome(base_genome);
474 Sequence& seq = mod_genome.GetSequence();
475 const int num_insts = m_world->GetHardwareManager().GetInstSet(base_genome.GetInstSet()).GetSize();
476 const int num_lines = base_genome.GetSize();
477
478 // Loop through all the lines of code, testing all mutations ...
479 tArray<double> test_fitness(num_insts);
480 tArray<double> prob(num_insts);
481 for (int line_no = 0; line_no < num_lines; line_no ++) {
482 int cur_inst = base_seq[line_no].GetOp();
483 int parent_inst = parent_seq[line_no].GetOp();
484
485 // Test fitness of each mutant.
486 for (int mod_inst = 0; mod_inst < num_insts; mod_inst++) {
487 seq[line_no].SetOp(mod_inst);
488 cAnalyzeGenotype test_genotype(m_world, mod_genome);
489 test_genotype.Recalculate(m_ctx);
490 test_fitness[mod_inst] = test_genotype.GetFitness();
491 }
492
493
494 // Calculate probabilities at mut-sel balance
495 double w_bar = 1;
496
497 // Normalize fitness values, assert if they are all zero
498 double maxFitness = 0.0;
499 for(int i=0; i<num_insts; i++) {
500 if ( i == parent_inst) { continue; }
501 if (test_fitness[i] > maxFitness) {
502 maxFitness = test_fitness[i];
503 }
504 }
505
506 if(maxFitness > 0) {
507 for(int i = 0; i < num_insts; i ++) {
508 if (i == parent_inst) { continue; }
509 test_fitness[i] /= maxFitness;
510 }
511 } else {
512 // every other inst is equally likely to be mutated to
513 for (int i = 0; i < num_insts; i ++) {
514 if (i == parent_inst) { continue; }
515 test_fitness[i] = 1;
516 }
517 }
518
519 double double_num_insts = num_insts * 1.0;
520 while(1) {
521 double sum = 0.0;
522 for (int mod_inst = 0; mod_inst < num_insts; mod_inst ++) {
523 if (mod_inst == parent_inst) { continue; }
524 prob[mod_inst] = (mut_rate * w_bar) /
525 (double_num_insts-2) /
526 (w_bar + test_fitness[mod_inst] * mut_rate * (double_num_insts-1) / (double_num_insts - 2)
527 - test_fitness[mod_inst]);
528 sum = sum + prob[mod_inst];
529 }
530 if ((sum-1.0)*(sum-1.0) <= 0.0001)
531 break;
532 else
533 w_bar = w_bar - 0.000001;
534 }
535
536 // Calculate entropy ...
537 double this_entropy = 0.0;
538 this_entropy -= (1.0 - mut_rate) * log(1.0 - mut_rate) / log(static_cast<double>(num_insts));
539 for (int i = 0; i < num_insts; i ++) {
540 if (i == parent_inst) { continue; }
541 prob[i] = prob[i] * mut_rate;
542 this_entropy += prob[i] * log(static_cast<double>(1.0/prob[i])) / log (static_cast<double>(num_insts));
543 }
544 entropy += this_entropy;
545
546 // Reset the mod_genome back to the base_genome.
547 seq[line_no].SetOp(cur_inst);
548 }
549 return entropy;
550 }
551
IncreasedInfo(cAnalyzeGenotype * genotype1,cAnalyzeGenotype * genotype2,double mu)552 double cAnalyze::IncreasedInfo(cAnalyzeGenotype * genotype1,
553 cAnalyzeGenotype * genotype2,
554 double mu)
555 {
556 double increased_info = 0.0;
557
558 // Calculate the stats for the genotypes we're working with ...
559 if ( genotype1->GetLength() != genotype2->GetLength() ) {
560 cerr << "Error: Two genotypes don't have same length.(cAnalyze::IncreasedInfo)" << endl;
561 if (exit_on_error) exit(1);
562 }
563
564 genotype1->Recalculate(m_ctx);
565 if (genotype1->GetFitness() == 0) {
566 return 0.0;
567 }
568
569 const Genome& genotype1_base_genome = genotype1->GetGenome();
570 const Sequence& genotype1_base_seq = genotype1_base_genome.GetSequence();
571 Genome genotype1_mod_genome(genotype1_base_genome);
572 Sequence& genotype1_mod_seq = genotype1_mod_genome.GetSequence();
573 const int num_insts = m_world->GetHardwareManager().GetInstSet(genotype1_base_genome.GetInstSet()).GetSize();
574 const int num_lines = genotype1_base_genome.GetSize();
575 double genotype1_base_fitness = genotype1->GetFitness();
576 vector<double> genotype1_info(num_lines, 0.0);
577
578 // Loop through all the lines of code, calculate genotype1 information
579 tArray<double> test_fitness(num_insts);
580 tArray<double> prob(num_insts);
581 for (int line_no = 0; line_no < num_lines; line_no ++) {
582 int cur_inst = genotype1_base_seq[line_no].GetOp();
583
584 // Test fitness of each mutant.
585 for (int mod_inst = 0; mod_inst < num_insts; mod_inst++) {
586 genotype1_mod_seq[line_no].SetOp(mod_inst);
587 cAnalyzeGenotype test_genotype(m_world, genotype1_mod_genome);
588 test_genotype.Recalculate(m_ctx);
589 // Ajust fitness ...
590 if (test_genotype.GetFitness() <= genotype1_base_fitness) {
591 test_fitness[mod_inst] = test_genotype.GetFitness();
592 } else {
593 test_fitness[mod_inst] = genotype1_base_fitness;
594 }
595 }
596
597 // Calculate probabilities at mut-sel balance
598 double w_bar = 1;
599
600 // Normalize fitness values
601 double maxFitness = 0.0;
602 for(int i=0; i<num_insts; i++) {
603 if(test_fitness[i] > maxFitness) {
604 maxFitness = test_fitness[i];
605 }
606 }
607
608 for(int i=0; i<num_insts; i++) {
609 test_fitness[i] /= maxFitness;
610 }
611
612 while(1) {
613 double sum = 0.0;
614 for (int mod_inst = 0; mod_inst < num_insts; mod_inst ++) {
615 prob[mod_inst] = (mu * w_bar) /
616 ((double)num_insts *
617 (w_bar + test_fitness[mod_inst] * mu - test_fitness[mod_inst]));
618 sum = sum + prob[mod_inst];
619 }
620 if ((sum-1.0)*(sum-1.0) <= 0.0001)
621 break;
622 else
623 w_bar = w_bar - 0.000001;
624 }
625
626 // Calculate entropy ...
627 double this_entropy = 0.0;
628 for (int i = 0; i < num_insts; i ++) {
629 this_entropy += prob[i] * log((double) 1.0/prob[i]) / log ((double) num_insts);
630 }
631 genotype1_info[line_no] = 1 - this_entropy;
632
633 // Reset the mod_genome back to the original sequence.
634 genotype1_mod_seq[line_no].SetOp(cur_inst);
635 }
636
637 genotype2->Recalculate(m_ctx);
638 if (genotype2->GetFitness() == 0) {
639 for (int line_no = 0; line_no < num_lines; ++ line_no) {
640 increased_info += genotype1_info[line_no];
641 }
642 return increased_info;
643 }
644
645 const Genome& genotype2_base_genome = genotype2->GetGenome();
646 const Sequence& genotype2_base_seq = genotype2_base_genome.GetSequence();
647 Genome genotype2_mod_genome(genotype2_base_genome);
648 Sequence& genotype2_mod_seq = genotype2_mod_genome.GetSequence();
649 double genotype2_base_fitness = genotype2->GetFitness();
650
651 // Loop through all the lines of code, calculate increased information
652 for (int line_no = 0; line_no < num_lines; line_no ++) {
653 int cur_inst = genotype2_base_seq[line_no].GetOp();
654
655 // Test fitness of each mutant.
656 for (int mod_inst = 0; mod_inst < num_insts; mod_inst++) {
657 genotype2_mod_seq[line_no].SetOp(mod_inst);
658 cAnalyzeGenotype test_genotype(m_world, genotype2_mod_genome);
659 test_genotype.Recalculate(m_ctx);
660 // Ajust fitness ...
661 if (test_genotype.GetFitness() <= genotype2_base_fitness) {
662 test_fitness[mod_inst] = test_genotype.GetFitness();
663 } else {
664 test_fitness[mod_inst] = genotype2_base_fitness;
665 }
666 }
667
668 // Calculate probabilities at mut-sel balance
669 double w_bar = 1;
670
671 // Normalize fitness values, assert if they are all zero
672 double maxFitness = 0.0;
673 for(int i=0; i<num_insts; i++) {
674 if(test_fitness[i] > maxFitness) {
675 maxFitness = test_fitness[i];
676 }
677 }
678
679 for(int i=0; i<num_insts; i++) {
680 test_fitness[i] /= maxFitness;
681 }
682
683 while(1) {
684 double sum = 0.0;
685 for (int mod_inst = 0; mod_inst < num_insts; mod_inst ++) {
686 prob[mod_inst] = (mu * w_bar) /
687 ((double)num_insts *
688 (w_bar + test_fitness[mod_inst] * mu - test_fitness[mod_inst]));
689 sum = sum + prob[mod_inst];
690 }
691 if ((sum-1.0)*(sum-1.0) <= 0.0001)
692 break;
693 else
694 w_bar = w_bar - 0.000001;
695 }
696
697 // Calculate entropy ...
698 double this_entropy = 0.0;
699 for (int i = 0; i < num_insts; i ++) {
700 this_entropy += prob[i] * log((double) 1.0/prob[i]) / log ((double) num_insts);
701 }
702
703 // Compare information
704 if (genotype1_info[line_no] > 1 - this_entropy) {
705 increased_info += genotype1_info[line_no] - (1 - this_entropy);
706 } // else increasing is 0, do nothing
707
708 // Reset the mod_genome back to the original sequence.
709 genotype2_mod_seq[line_no].SetOp(cur_inst);
710 }
711
712
713 return increased_info;
714 }
715
LoadFile(cString cur_string)716 void cAnalyze::LoadFile(cString cur_string)
717 {
718 // LOAD
719
720 cString filename = cur_string.PopWord();
721
722 cout << "Loading: " << filename << endl;
723
724 cInitFile input_file(filename, m_world->GetWorkingDir());
725 if (!input_file.WasOpened()) {
726 const cUserFeedback& feedback = input_file.GetFeedback();
727 for (int i = 0; i < feedback.GetNumMessages(); i++) {
728 switch (feedback.GetMessageType(i)) {
729 case cUserFeedback::UF_ERROR: cerr << "error: "; break;
730 case cUserFeedback::UF_WARNING: cerr << "warning: "; break;
731 default: break;
732 };
733 cerr << feedback.GetMessage(i) << endl;
734 }
735 if (exit_on_error) exit(1);
736 }
737
738 const cString filetype = input_file.GetFiletype();
739 if (filetype != "population_data" && // Deprecated
740 filetype != "genotype_data") {
741 cerr << "error: cannot load files of type \"" << filetype << "\"." << endl;
742 if (exit_on_error) exit(1);
743 }
744
745 if (m_world->GetVerbosity() >= VERBOSE_ON) {
746 cout << "Loading file of type: " << filetype << endl;
747 }
748
749
750 // Construct a linked list of data types that can be loaded...
751 tList< tDataEntryCommand<cAnalyzeGenotype> > output_list;
752 tListIterator< tDataEntryCommand<cAnalyzeGenotype> > output_it(output_list);
753 cUserFeedback feedback;
754 cAnalyzeGenotype::GetDataCommandManager().LoadCommandList(input_file.GetFormat(), output_list, &feedback);
755
756 for (int i = 0; i < feedback.GetNumMessages(); i++) {
757 switch (feedback.GetMessageType(i)) {
758 case cUserFeedback::UF_ERROR: cerr << "error: "; break;
759 case cUserFeedback::UF_WARNING: cerr << "warning: "; break;
760 default: break;
761 };
762 cerr << feedback.GetMessage(i) << endl;
763 }
764
765 if (feedback.GetNumErrors()) return;
766
767 bool id_inc = input_file.GetFormat().HasString("id");
768
769 // Setup the genome...
770 const cInstSet& is = m_world->GetHardwareManager().GetDefaultInstSet();
771 Genome default_genome(is.GetHardwareType(), is.GetInstSetName(), Sequence(1));
772 int load_count = 0;
773
774 for (int line_id = 0; line_id < input_file.GetNumLines(); line_id++) {
775 cString cur_line = input_file.GetLine(line_id);
776
777 cAnalyzeGenotype* genotype = new cAnalyzeGenotype(m_world, default_genome);
778
779 output_it.Reset();
780 tDataEntryCommand<cAnalyzeGenotype>* data_command = NULL;
781 while ((data_command = output_it.Next()) != NULL) {
782 data_command->SetValue(genotype, cur_line.PopWord());
783 }
784
785 // Give this genotype a name. Base it on the ID if possible.
786 if (id_inc == false) {
787 cString name = cStringUtil::Stringf("org-%d", load_count++);
788 genotype->SetName(name);
789 }
790 else {
791 cString name = cStringUtil::Stringf("org-%d", genotype->GetID());
792 genotype->SetName(name);
793 }
794
795 // Add this genotype to the proper batch.
796 batch[cur_batch].List().PushRear(genotype);
797 }
798
799 // Adjust the flags on this batch
800 batch[cur_batch].SetLineage(false);
801 batch[cur_batch].SetAligned(false);
802 }
803
804
805 //////////////// Reduction....
806
CommandFilter(cString cur_string)807 void cAnalyze::CommandFilter(cString cur_string)
808 {
809 // First three arguments are: setting, relation, comparison
810 // Fourth argument is optional batch.
811
812 const int num_args = cur_string.CountNumWords();
813 cString stat_name = cur_string.PopWord();
814 cString relation = cur_string.PopWord();
815 cString test_value = cur_string.PopWord();
816
817 // Get the dynamic command to look up the stat we need.
818 tDataEntryCommand<cAnalyzeGenotype>* stat_command = cAnalyzeGenotype::GetDataCommandManager().GetDataCommand(stat_name);
819
820
821 // Check for various possible errors before moving on...
822 bool error_found = false;
823 if (num_args < 3 || num_args > 4) {
824 cerr << "Error: Incorrect argument count." << endl;
825 error_found = true;
826 }
827
828 if (stat_command == NULL) {
829 cerr << "Error: Unknown stat '" << stat_name << "'" << endl;
830 error_found = true;
831 }
832
833 // Check relationship types. rel_ok[0] = less_ok; rel_ok[1] = same_ok; rel_ok[2] = gtr_ok
834 tArray<bool> rel_ok(3, false);
835 if (relation == "==") { rel_ok[1] = true; }
836 else if (relation == "!=") { rel_ok[0] = true; rel_ok[2] = true; }
837 else if (relation == "<") { rel_ok[0] = true; }
838 else if (relation == ">") { rel_ok[2] = true; }
839 else if (relation == "<=") { rel_ok[0] = true; rel_ok[1] = true; }
840 else if (relation == ">=") { rel_ok[1] = true; rel_ok[2] = true; }
841 else {
842 cerr << "Error: Unknown relation '" << relation << "'" << endl;
843 error_found = true;
844 }
845
846 if (error_found == true) {
847 cerr << "Format: FILTER [stat] [relation] [value] [batch=current]" << endl;
848 cerr << "Example: FILTER fitness >= 10.0" << endl;
849 if (exit_on_error) exit(1);
850 if (stat_command != NULL) delete stat_command;
851 return;
852 }
853
854
855 // If we made it this far, we're going ahead with the command...
856
857 if (m_world->GetVerbosity() >= VERBOSE_ON) {
858 cout << "Filtering batch " << cur_batch << " to genotypes where "
859 << stat_name << " " << relation << " " << test_value << endl;
860 }
861
862
863 // Loop through the genotypes and remove the entries that don't match.
864 tListIterator<cAnalyzeGenotype> batch_it(batch[cur_batch].List());
865 cAnalyzeGenotype * cur_genotype = NULL;
866 while ((cur_genotype = batch_it.Next()) != NULL) {
867 const cFlexVar value = stat_command->GetValue(cur_genotype);
868 int compare = 1 + CompareFlexStat(value, test_value);
869
870 // Check if we should eliminate this genotype...
871 if (rel_ok[compare] == false) {
872 delete batch_it.Remove();
873 }
874 }
875 delete stat_command;
876
877
878 // Adjust the flags on this batch
879 batch[cur_batch].SetLineage(false);
880 batch[cur_batch].SetAligned(false);
881 }
882
FindGenotype(cString cur_string)883 void cAnalyze::FindGenotype(cString cur_string)
884 {
885 // If no arguments are passed in, just find max num_cpus.
886 if (cur_string.GetSize() == 0) cur_string = "num_cpus";
887
888 if (m_world->GetVerbosity() >= VERBOSE_ON) {
889 cout << "Reducing batch " << cur_batch << " to genotypes: ";
890 }
891
892 tListPlus<cAnalyzeGenotype> & gen_list = batch[cur_batch].List();
893 tListPlus<cAnalyzeGenotype> found_list;
894 while (cur_string.CountNumWords() > 0) {
895 cString gen_desc(cur_string.PopWord());
896 if (m_world->GetVerbosity() >= VERBOSE_ON) cout << gen_desc << " ";
897
898 // Determine by lin_type which genotype were are tracking...
899 cAnalyzeGenotype * found_gen = PopGenotype(gen_desc, cur_batch);
900
901 if (found_gen == NULL) {
902 cerr << " Warning: genotype not found!" << endl;
903 continue;
904 }
905
906 // Save this genotype...
907 found_list.Push(found_gen);
908 }
909 cout << endl;
910
911 // Delete all genotypes other than the ones found!
912 while (gen_list.GetSize() > 0) delete gen_list.Pop();
913
914 // And fill it back in with the good stuff.
915 while (found_list.GetSize() > 0) gen_list.Push(found_list.Pop());
916
917 // Adjust the flags on this batch
918 batch[cur_batch].SetLineage(false);
919 batch[cur_batch].SetAligned(false);
920 }
921
FindOrganism(cString cur_string)922 void cAnalyze::FindOrganism(cString cur_string)
923 {
924 // At least one argument is rquired.
925 if (cur_string.GetSize() == 0) {
926 cerr << "Error: At least one argument is required in FIND_ORGANISM." << endl;
927 cerr << " (perhaps you want FIND_GENOTYPE?)" << endl;
928 return;
929 }
930
931 if (m_world->GetVerbosity() >= VERBOSE_ON) {
932 cout << "Reducing batch " << cur_batch << " to organisms: " << endl;
933 }
934
935 tListPlus<cAnalyzeGenotype> & gen_list = batch[cur_batch].List();
936 tListPlus<cAnalyzeGenotype> found_list;
937
938 tArray<int> new_counts(gen_list.GetSize());
939 new_counts.SetAll(0);
940
941 while (cur_string.CountNumWords() > 0) {
942 cString org_desc(cur_string.PopWord());
943 if (m_world->GetVerbosity() >= VERBOSE_ON) cout << org_desc << " ";
944
945 // Determine by org_desc which genotype were are tracking...
946 if (org_desc == "random") {
947 bool found = false;
948 int num_orgs = gen_list.Count(&cAnalyzeGenotype::GetNumCPUs);
949 while (found != true) {
950 int org_chosen = random.GetUInt(num_orgs);
951 cAnalyzeGenotype * found_genotype =
952 gen_list.FindSummedValue(org_chosen, &cAnalyzeGenotype::GetNumCPUs);
953 if ( found_genotype->GetNumCPUs() != 0 && found_genotype->GetViable()) {
954 found_genotype->SetNumCPUs(found_genotype->GetNumCPUs()-1);
955 new_counts[gen_list.FindPosPtr(found_genotype)] +=1;
956 cout << "Found genotype " << gen_list.FindPosPtr(found_genotype) << endl;
957 found = true;
958 }
959 }
960 }
961
962 // pick a random organisms, with replacement!
963 if (org_desc == "randomWR") {
964 bool found = false;
965 int num_orgs = gen_list.Count(&cAnalyzeGenotype::GetNumCPUs);
966 while (found != true) {
967 int org_chosen = random.GetUInt(num_orgs);
968 cAnalyzeGenotype * found_genotype =
969 gen_list.FindSummedValue(org_chosen, &cAnalyzeGenotype::GetNumCPUs);
970 if ( found_genotype->GetNumCPUs() != 0 && found_genotype->GetViable()) {
971 new_counts[gen_list.FindPosPtr(found_genotype)] +=1;
972 cout << "Found genotype " << gen_list.FindPosPtr(found_genotype) << endl;
973 found = true;
974 }
975 }
976 }
977 }
978
979 int pos_count = 0;
980 cAnalyzeGenotype * genotype = NULL;
981 tListIterator<cAnalyzeGenotype> batch_it(batch[cur_batch].List());
982
983 while ((genotype = batch_it.Next()) != NULL) {
984 genotype->SetNumCPUs(new_counts[pos_count]);
985 if (genotype->GetNumCPUs() == 0) batch_it.Remove();
986 else cout << "Genotype " << pos_count << " has " << new_counts[pos_count] << " organisms." << endl;
987 pos_count++;
988 }
989
990 // Adjust the flags on this batch
991 batch[cur_batch].SetLineage(false);
992 batch[cur_batch].SetAligned(false);
993 }
994
FindLineage(cString cur_string)995 void cAnalyze::FindLineage(cString cur_string)
996 {
997 cString lin_type = "num_cpus";
998 if (cur_string.CountNumWords() > 0) lin_type = cur_string.PopWord();
999
1000 if (m_world->GetVerbosity() >= VERBOSE_ON) {
1001 cout << "Reducing batch " << cur_batch
1002 << " to " << lin_type << " lineage " << endl;
1003 } else cout << "Performing lineage scan..." << endl;
1004
1005
1006 // Determine by lin_type which genotype we are tracking...
1007 cAnalyzeGenotype * found_gen = PopGenotype(lin_type, cur_batch);
1008
1009 if (found_gen == NULL) {
1010 cerr << " Warning: Genotype " << lin_type
1011 << " not found. Lineage scan aborted." << endl;
1012 return;
1013 }
1014
1015 // Otherwise, trace back through the id numbers to mark all of those
1016 // in the ancestral lineage...
1017
1018 // Construct a list of genotypes found...
1019
1020 tListPlus<cAnalyzeGenotype> found_list;
1021 found_list.Push(found_gen);
1022 int next_id = found_gen->GetParentID();
1023 bool found = true;
1024 while (found == true) {
1025 found = false;
1026
1027 tListIterator<cAnalyzeGenotype> batch_it(batch[cur_batch].List());
1028 while ((found_gen = batch_it.Next()) != NULL) {
1029 if (found_gen->GetID() == next_id) {
1030 batch_it.Remove();
1031 found_list.Push(found_gen);
1032 next_id = found_gen->GetParentID();
1033 found = true;
1034 break;
1035 }
1036 }
1037 }
1038
1039 // We now have all of the genotypes in this lineage, delete everything
1040 // else.
1041
1042 const int total_removed = batch[cur_batch].List().GetSize();
1043 while (batch[cur_batch].List().GetSize() > 0) {
1044 delete batch[cur_batch].List().Pop();
1045 }
1046
1047 // And fill it back in with the good stuff.
1048 int total_kept = found_list.GetSize();
1049 while (found_list.GetSize() > 0) {
1050 batch[cur_batch].List().PushRear(found_list.Pop());
1051 }
1052
1053 if (m_world->GetVerbosity() >= VERBOSE_ON) {
1054 cout << " Lineage has " << total_kept << " genotypes; "
1055 << total_removed << " were removed." << endl;
1056 }
1057
1058 // Adjust the flags on this batch
1059 batch[cur_batch].SetLineage(true);
1060 batch[cur_batch].SetAligned(false);
1061 }
1062
1063
FindSexLineage(cString cur_string)1064 void cAnalyze::FindSexLineage(cString cur_string)
1065 {
1066
1067 // detemine the method for construicting a lineage
1068 // by defauly, find the lineage of the final dominant
1069 cString lin_type = "num_cpus";
1070 if (cur_string.CountNumWords() > 0) lin_type = cur_string.PopWord();
1071
1072 // parent_method determins which of the two parental lineages to use
1073 // "rec_region_size" :
1074 // "mother" (dominant parent) is the parent contributing
1075 // more to the offspring genome (default)
1076 // "genome_size" :
1077 // "mother" (dominant parent) is the longer parent
1078 cString parent_method = "rec_region_size";
1079 if (cur_string.CountNumWords() > 0) parent_method = cur_string.PopWord();
1080
1081 if (m_world->GetVerbosity() >= VERBOSE_ON) {
1082 cout << "Reducing batch " << cur_batch
1083 << " to " << lin_type << " sexual lineage "
1084 << " using " << parent_method << " criteria." << endl;
1085 } else cout << "Performing sexual lineage scan..." << endl;
1086
1087
1088 // Determine by lin_type which genotype we are tracking...
1089 cAnalyzeGenotype * found_gen = PopGenotype(lin_type, cur_batch);
1090
1091 cAnalyzeGenotype * found_dad;
1092 cAnalyzeGenotype * found_mom;
1093 cAnalyzeGenotype * found_temp;
1094
1095 if (found_gen == NULL) {
1096 cerr << " Warning: Genotype " << lin_type
1097 << " not found. Sexual lineage scan aborted." << endl;
1098 return;
1099 }
1100
1101 // Otherwise, trace back through the id numbers to mark all of those
1102 // in the ancestral lineage...
1103
1104 // Construct a list of genotypes found...
1105
1106 tListPlus<cAnalyzeGenotype> found_list;
1107 found_list.Push(found_gen);
1108 int next_id1 = found_gen->GetParentID();
1109 int next_id2 = found_gen->GetParent2ID();
1110
1111 bool found_m = true;
1112 bool found_d = true;
1113
1114 while (found_m == true & found_d == true) {
1115
1116 //cout << "Searching for mom=" << next_id1
1117 // << " and dad=" << next_id2 << endl;
1118 found_m = false;
1119 found_d = false;
1120
1121 // Look for the father first....
1122 tListIterator<cAnalyzeGenotype> batch_it(batch[cur_batch].List());
1123 batch_it.Reset();
1124 while ((found_dad = batch_it.Next()) != NULL) {
1125 // Check if the father is found...
1126 if (found_dad->GetID() == next_id2) {
1127 //cout << "Found dad!" << endl;
1128 batch_it.Remove();
1129 found_list.Push(found_dad);
1130 found_d = true;
1131 break;
1132 }
1133 }
1134
1135 // dad may have already been found, check the find list!
1136 if (found_d == false) {
1137 tListIterator<cAnalyzeGenotype> found_it(found_list);
1138 while ((found_dad = found_it.Next()) != NULL) {
1139 if (found_dad->GetID() == next_id2) {
1140 //cout << "Found dad in found list!" << endl;
1141 found_d = true;
1142 break;
1143 }
1144 }
1145 }
1146
1147 // Next, look for the mother...
1148 batch_it.Reset();
1149 while ((found_mom = batch_it.Next()) != NULL) {
1150 if (found_mom->GetID() == next_id1) {
1151 //cout << "Found mom!" << endl;
1152 batch_it.Remove();
1153 found_list.Push(found_mom);
1154 // if finding lineages by parental length, may have to swap
1155 if (parent_method == "genome_size" && found_mom->GetLength() < found_dad->GetLength()) {
1156 //cout << "Swapping parents!" << endl;
1157 found_temp = found_mom;
1158 found_mom = found_dad;
1159 found_dad = found_temp;
1160 }
1161 next_id1 = found_mom->GetParentID();
1162 next_id2 = found_mom->GetParent2ID();
1163 found_m = true;
1164 break;
1165 }
1166 }
1167
1168 // If the mother was not found, it may already have been placed in the
1169 // found list as a father...
1170 if (found_m == false) {
1171 tListIterator<cAnalyzeGenotype> found_it(found_list);
1172 while ((found_mom = found_it.Next()) != NULL) {
1173 if (found_mom->GetID() == next_id1) {
1174 //cout << "Found mom as dad!" << endl;
1175 // Don't move to found list, since its already there, but update
1176 // to the next ids.
1177 // if finding lineages by parental length, may have to swap
1178 if (parent_method == "genome_size" && found_mom->GetLength() < found_dad->GetLength()) {
1179 //cout << "Swapping parents!" << endl;
1180 found_temp = found_mom;
1181 found_mom = found_dad;
1182 found_dad = found_temp;
1183 }
1184 next_id1 = found_mom->GetParentID();
1185 next_id2 = found_mom->GetParent2ID();
1186 found_m = true;
1187 break;
1188 }
1189 }
1190 }
1191 }
1192
1193 // We now have all of the genotypes in this lineage, delete everything
1194 // else.
1195
1196 const int total_removed = batch[cur_batch].List().GetSize();
1197 while (batch[cur_batch].List().GetSize() > 0) {
1198 delete batch[cur_batch].List().Pop();
1199 }
1200
1201 // And fill it back in with the good stuff.
1202 int total_kept = found_list.GetSize();
1203 while (found_list.GetSize() > 0) {
1204 batch[cur_batch].List().PushRear(found_list.Pop());
1205 }
1206
1207 if (m_world->GetVerbosity() >= VERBOSE_ON) {
1208 cout << " Sexual lineage has " << total_kept << " genotypes; "
1209 << total_removed << " were removed." << endl;
1210 }
1211
1212 // Adjust the flags on this batch
1213 batch[cur_batch].SetLineage(false);
1214 batch[cur_batch].SetAligned(false);
1215 }
1216
FindClade(cString cur_string)1217 void cAnalyze::FindClade(cString cur_string)
1218 {
1219 if (cur_string.GetSize() == 0) {
1220 cerr << " Warning: No clade specified for FIND_CLADE. Aborting." << endl;
1221 return;
1222 }
1223
1224 cString clade_type( cur_string.PopWord() );
1225
1226 if (m_world->GetVerbosity() >= VERBOSE_ON) {
1227 cout << "Reducing batch " << cur_batch
1228 << " to clade " << clade_type << "." << endl;
1229 } else cout << "Performing clade scan..." << endl;
1230
1231
1232 // Determine by clade_type which genotype we are tracking...
1233 cAnalyzeGenotype * found_gen = PopGenotype(clade_type, cur_batch);
1234
1235 if (found_gen == NULL) {
1236 cerr << " Warning: Ancestral genotype " << clade_type
1237 << " not found. Clade scan aborted." << endl;
1238 return;
1239 }
1240
1241 // Do this the brute force way... scan for one step at a time.
1242
1243 // Construct a list of genotypes found...
1244
1245 tListPlus<cAnalyzeGenotype> found_list; // Found and finished.
1246 tListPlus<cAnalyzeGenotype> scan_list; // Found, but need to scan for children.
1247 scan_list.Push(found_gen);
1248
1249 // Keep going as long as there is something in the scan list...
1250 while (scan_list.GetSize() > 0) {
1251 // Move the next genotype from the scan list to the found_list.
1252 found_gen = scan_list.Pop();
1253 int parent_id = found_gen->GetID();
1254 found_list.Push(found_gen);
1255
1256 // Seach for all of the children of this genotype...
1257 tListIterator<cAnalyzeGenotype> batch_it(batch[cur_batch].List());
1258 while ((found_gen = batch_it.Next()) != NULL) {
1259 // If we found a child, place it into the scan list.
1260 if (found_gen->GetParentID() == parent_id) {
1261 batch_it.Remove();
1262 scan_list.Push(found_gen);
1263 }
1264 }
1265 }
1266
1267 // We now have all of the genotypes in this clade, delete everything else.
1268
1269 const int total_removed = batch[cur_batch].List().GetSize();
1270 while (batch[cur_batch].List().GetSize() > 0) {
1271 delete batch[cur_batch].List().Pop();
1272 }
1273
1274 // And fill it back in with the good stuff.
1275 int total_kept = found_list.GetSize();
1276 while (found_list.GetSize() > 0) {
1277 batch[cur_batch].List().PushRear(found_list.Pop());
1278 }
1279
1280 if (m_world->GetVerbosity() >= VERBOSE_ON) {
1281 cout << " Clade has " << total_kept << " genotypes; "
1282 << total_removed << " were removed." << endl;
1283 }
1284
1285 // Adjust the flags on this batch
1286 batch[cur_batch].SetLineage(false);
1287 batch[cur_batch].SetAligned(false);
1288 }
1289
1290 // @JEB 9-25-2008
FindLastCommonAncestor(cString cur_string)1291 void cAnalyze::FindLastCommonAncestor(cString cur_string)
1292 {
1293
1294 // Assumes that the current batch contains a population and all of its common ancestors
1295 // Finds the last common ancestor among all current organisms that are still alive,
1296 // i.e. have an update_died of -1.
1297
1298 cout << "Finding last common ancestor of batch " << cur_batch << endl;
1299
1300 if (m_world->GetVerbosity() >= VERBOSE_ON) {
1301 cout << " Connecting genotypes to parents. " << endl;
1302 }
1303
1304 // Connect each genotype to its parent.
1305 tListIterator<cAnalyzeGenotype> child_it(batch[cur_batch].List());
1306 cAnalyzeGenotype * on_child = NULL;
1307 while ((on_child = child_it.Next()) != NULL) {
1308 tListIterator<cAnalyzeGenotype> parent_it(batch[cur_batch].List());
1309 cAnalyzeGenotype * on_parent = NULL;
1310 while ((on_parent = parent_it.Next()) != NULL) {
1311 if (on_child->GetParentID() == on_parent->GetID()) {
1312 on_child->LinkParent(on_parent);
1313 break;
1314 }
1315 }
1316 }
1317
1318 if (m_world->GetVerbosity() >= VERBOSE_ON) {
1319 cout << " Finding earliest genotype. " << endl;
1320 }
1321
1322 // Find the genotype without a parent (there should only be one)
1323 tListIterator<cAnalyzeGenotype> first_lca_it(batch[cur_batch].List());
1324 cAnalyzeGenotype * lca = NULL;
1325 cAnalyzeGenotype * test_lca = NULL;
1326 while ((test_lca = first_lca_it.Next()) != NULL) {
1327 if (!test_lca->GetParent()) {
1328 // It is an error to get two genotypes without a parent
1329 if (lca != NULL) {
1330 cout << "Error: More than one genotype does not have a parent. " << endl;
1331 cout << "Genotype 1: " << test_lca->GetID() << endl;
1332 cout << "Genotype 2: " << lca->GetID() << endl;
1333 return;
1334 }
1335 lca = test_lca;
1336 }
1337 }
1338
1339 if (m_world->GetVerbosity() >= VERBOSE_ON) {
1340 cout << " Following children to last common ancestor. " << endl;
1341 }
1342
1343 // Follow the children from this parent until we find a genotype with
1344 // more than one child. This is the last common ancestor.
1345 while (lca->GetChildList().GetSize() == 1) {
1346 lca = lca->GetChildList().Pop();
1347 }
1348
1349 // Delete everything else.
1350 tListIterator<cAnalyzeGenotype> delete_batch_it(batch[cur_batch].List());
1351 cAnalyzeGenotype * delete_genotype = NULL;
1352 while ((delete_genotype = delete_batch_it.Next()) != NULL) {
1353 if (delete_genotype->GetID() != lca->GetID()) {
1354 delete delete_genotype;
1355 }
1356 }
1357
1358 // And fill it back in with the good stuff.
1359 batch[cur_batch].List().Clear();
1360 batch[cur_batch].List().PushRear(lca);
1361 }
1362
1363
SampleOrganisms(cString cur_string)1364 void cAnalyze::SampleOrganisms(cString cur_string)
1365 {
1366 double fraction = cur_string.PopWord().AsDouble();
1367 int init_genotypes = batch[cur_batch].List().GetSize();
1368
1369 double test_viable = 0;
1370 if (cur_string.GetSize() > 0) {
1371 test_viable = cur_string.PopWord().AsDouble();
1372 }
1373
1374 if (m_world->GetVerbosity() >= VERBOSE_ON) {
1375 cout << "Sampling " << fraction << " organisms from batch "
1376 << cur_batch << "." << endl;
1377 }
1378 else cout << "Sampling Organisms..." << endl;
1379
1380 cAnalyzeGenotype * genotype = NULL;
1381 tListIterator<cAnalyzeGenotype> batch_it(batch[cur_batch].List());
1382
1383 // Loop through all genotypes to perform a census
1384 int org_count = 0;
1385 while ((genotype = batch_it.Next()) != NULL) {
1386 // If we require viables, reduce all non-viables to zero organisms.
1387 if (test_viable == 1 && genotype->GetViable() == 0) {
1388 genotype->SetNumCPUs(0);
1389 }
1390
1391 // Count the number of organisms in this genotype.
1392 org_count += genotype->GetNumCPUs();
1393 }
1394
1395 // Create an array to store pointers to the genotypes and fill it in
1396 // while temporarily resetting all of the organism counts to zero.
1397 tArray<cAnalyzeGenotype *> org_array(org_count);
1398 int cur_org = 0;
1399 batch_it.Reset();
1400 while ((genotype = batch_it.Next()) != NULL) {
1401 for (int i = 0; i < genotype->GetNumCPUs(); i++) {
1402 org_array[cur_org] = genotype;
1403 cur_org++;
1404 }
1405 genotype->SetNumCPUs(0);
1406 }
1407
1408 assert(cur_org == org_count);
1409
1410 // Determine how many organisms we want to keep.
1411 int new_org_count = (int) fraction;
1412 if (fraction < 1.0) new_org_count = (int) (fraction * (double) org_count);
1413 if (new_org_count > org_count) {
1414 cerr << "Warning: Trying to sample " << new_org_count
1415 << "organisms from a population of " << org_count
1416 << endl;
1417 new_org_count = org_count;
1418 }
1419
1420 // Now pick those that we are keeping.
1421 tArray<int> keep_ids(new_org_count);
1422 random.Choose(org_count, keep_ids);
1423
1424 // And increment the org counts for the associated genotypes.
1425 for (int i = 0; i < new_org_count; i++) {
1426 genotype = org_array[ keep_ids[i] ];
1427 genotype->SetNumCPUs(genotype->GetNumCPUs() + 1);
1428 }
1429
1430
1431 // Delete all genotypes with no remaining organisms...
1432 batch_it.Reset();
1433 while ((genotype = batch_it.Next()) != NULL) {
1434 if (genotype->GetNumCPUs() == 0) {
1435 batch_it.Remove();
1436 delete genotype;
1437 }
1438 }
1439
1440 int num_genotypes = batch[cur_batch].List().GetSize();
1441 if (m_world->GetVerbosity() >= VERBOSE_ON) {
1442 cout << " Removed " << org_count - new_org_count
1443 << " organisms (" << init_genotypes - num_genotypes
1444 << " genotypes); " << new_org_count
1445 << " orgs (" << num_genotypes << " gens) remaining."
1446 << endl;
1447 }
1448
1449 // Adjust the flags on this batch
1450 batch[cur_batch].SetLineage(false);
1451 batch[cur_batch].SetAligned(false);
1452 }
1453
1454
SampleGenotypes(cString cur_string)1455 void cAnalyze::SampleGenotypes(cString cur_string)
1456 {
1457 double fraction = cur_string.PopWord().AsDouble();
1458 int init_genotypes = batch[cur_batch].List().GetSize();
1459
1460 double test_viable = 0;
1461 if (cur_string.GetSize() > 0) {
1462 test_viable = cur_string.PopWord().AsDouble();
1463 }
1464
1465 if (m_world->GetVerbosity() >= VERBOSE_ON) {
1466 cout << "Sampling " << fraction << " genotypes from batch "
1467 << cur_batch << "." << endl;
1468 }
1469 else cout << "Sampling Genotypes..." << endl;
1470
1471 double frac_remove = 1.0 - fraction;
1472
1473 cAnalyzeGenotype * genotype = NULL;
1474
1475 tListIterator<cAnalyzeGenotype> batch_it(batch[cur_batch].List());
1476 while ((genotype = batch_it.Next()) != NULL) {
1477 if (random.P(frac_remove) || ((genotype->GetViable())==0 && test_viable==1) ) {
1478 batch_it.Remove();
1479 delete genotype;
1480 }
1481 }
1482
1483 int num_genotypes = batch[cur_batch].List().GetSize();
1484 if (m_world->GetVerbosity() >= VERBOSE_ON) {
1485 cout << " Removed " << init_genotypes - num_genotypes
1486 << " genotypes; " << num_genotypes << " remaining."
1487 << endl;
1488 }
1489
1490 // Adjust the flags on this batch
1491 batch[cur_batch].SetLineage(false);
1492 batch[cur_batch].SetAligned(false);
1493 }
1494
KeepTopGenotypes(cString cur_string)1495 void cAnalyze::KeepTopGenotypes(cString cur_string)
1496 {
1497 const int num_kept = cur_string.PopWord().AsInt();
1498 const int num_genotypes = batch[cur_batch].List().GetSize();
1499 const int num_removed = num_genotypes - num_kept;
1500
1501 for (int i = 0; i < num_removed; i++) {
1502 delete batch[cur_batch].List().PopRear();
1503 }
1504
1505 // Adjust the flags on this batch
1506 // batch[cur_batch].SetLineage(false); // Should not destroy a lineage...
1507 batch[cur_batch].SetAligned(false);
1508 }
1509
TruncateLineage(cString cur_string)1510 void cAnalyze::TruncateLineage(cString cur_string)
1511 {
1512 cString type("task");
1513 int arg_i = -1;
1514 if (cur_string.GetSize()) type = cur_string.PopWord();
1515 if (type == "task") {
1516 if (cur_string.GetSize()) arg_i = cur_string.PopWord().AsInt();
1517 const int env_size = m_world->GetEnvironment().GetNumTasks();
1518 if (arg_i < 0 || arg_i >= env_size) arg_i = env_size - 1;
1519 }
1520 cString lin_type("num_cpus");
1521 if (cur_string.GetSize()) lin_type = cur_string.PopWord();
1522 FindLineage(lin_type);
1523 BatchRecalculate("");
1524
1525 if (type == "task") {
1526 if (m_world->GetVerbosity() >= VERBOSE_ON)
1527 cout << "Truncating batch " << cur_batch << " based on task " << arg_i << " emergence..." << endl;
1528 else
1529 cout << "Truncating lineage..." << endl;
1530
1531 bool found = false;
1532 tListIterator<cAnalyzeGenotype> batch_it(batch[cur_batch].List());
1533 cAnalyzeGenotype* genotype = NULL;
1534
1535 while ((genotype = batch_it.Next())) {
1536 if (found) {
1537 batch_it.Remove();
1538 delete genotype;
1539 continue;
1540 }
1541 if (genotype->GetTaskCount(arg_i)) found = true;
1542 }
1543 }
1544 }
1545
1546 // JEB: Creates specified number of offspring by running
1547 // each organism in the test CPU with mutations on.
SampleOffspring(cString cur_string)1548 void cAnalyze::SampleOffspring(cString cur_string)
1549 {
1550 int number_to_sample = (cur_string.GetSize()) ? cur_string.PopWord().AsInt() : 1000;
1551
1552 // These parameters copied from BatchRecalculate, they could change what kinds of offspring are produced!!
1553 tArray<int> manual_inputs; // Used only if manual inputs are specified
1554 cString msg; // Holds any information we may want to send the driver to display
1555 int use_resources = (cur_string.GetSize()) ? cur_string.PopWord().AsInt() : 0;
1556 int update = (cur_string.GetSize()) ? cur_string.PopWord().AsInt() : -1;
1557 bool use_random_inputs = (cur_string.GetSize()) ? cur_string.PopWord().AsInt() == 1: false;
1558 bool use_manual_inputs = false;
1559
1560 //Manual inputs will override random input request and must be the last arguments.
1561 if (cur_string.CountNumWords() > 0){
1562 if (cur_string.CountNumWords() == m_world->GetEnvironment().GetInputSize()){
1563 manual_inputs.Resize(m_world->GetEnvironment().GetInputSize());
1564 use_random_inputs = false;
1565 use_manual_inputs = true;
1566 for (int k = 0; cur_string.GetSize(); k++)
1567 manual_inputs[k] = cur_string.PopWord().AsInt();
1568 } else if (m_world->GetVerbosity() >= VERBOSE_ON){
1569 msg.Set("Invalid number of environment inputs requested for recalculation: %d specified, %d required.",
1570 cur_string.CountNumWords(), m_world->GetEnvironment().GetInputSize());
1571 m_world->GetDriver().NotifyWarning(msg);
1572 }
1573 }
1574
1575 cCPUTestInfo test_info(1); //we only allow one generation of testing! v. important to get proper offspring
1576 if (use_manual_inputs)
1577 test_info.UseManualInputs(manual_inputs);
1578 else
1579 test_info.UseRandomInputs(use_random_inputs);
1580 test_info.SetResourceOptions(use_resources, m_resources, update, m_resource_time_spent_offset);
1581
1582 if (m_world->GetVerbosity() >= VERBOSE_ON) {
1583 msg.Set("Sampling %d offspring from each of the %d organisms in batch %d...", number_to_sample, batch[cur_batch].GetSize(), cur_batch);
1584 m_world->GetDriver().NotifyComment(msg);
1585 } else{
1586 msg.Set("Sampling offspring...");
1587 m_world->GetDriver().NotifyComment(msg);
1588 }
1589
1590 // Load the mutation rates from the environment.
1591 test_info.MutationRates().Copy(m_world->GetEnvironment().GetMutRates());
1592 // Copy them into the organism
1593 tListPlus<cAnalyzeGenotype> offspring_list;
1594 tListIterator<cAnalyzeGenotype> batch_it(batch[cur_batch].List());
1595 cAnalyzeGenotype* parent_genotype = NULL;
1596
1597 cTestCPU * test_cpu = m_world->GetHardwareManager().CreateTestCPU(m_ctx);
1598 while ((parent_genotype = batch_it.Next())) {
1599
1600 // We keep a hash with genome strings as keys
1601 // to save duplication of the same offspring genotype.
1602 // NumCPUs is incremented whenever an offspring is
1603 // created more than once from the same parent.
1604 tDictionary<cAnalyzeGenotype*> genome_hash;
1605
1606 for (int i=0; i<number_to_sample; i++) {
1607 test_cpu->TestGenome(m_world->GetDefaultContext(), test_info, parent_genotype->GetGenome());
1608 cAnalyzeGenotype * offspring_genotype = NULL;
1609 bool found = genome_hash.Find(test_info.GetTestOrganism(0)->OffspringGenome().GetSequence().AsString(), offspring_genotype);
1610 if (found) {
1611 offspring_genotype->SetNumCPUs(offspring_genotype->GetNumCPUs() + 1);
1612 }
1613 else {
1614 cAnalyzeGenotype* offspring_genotype = new cAnalyzeGenotype(m_world, test_info.GetTestOrganism(0)->OffspringGenome());
1615 offspring_genotype->SetID(parent_genotype->GetID());
1616 offspring_genotype->SetNumCPUs(1);
1617 offspring_list.Push(offspring_genotype);
1618 genome_hash.Set(test_info.GetTestOrganism(0)->OffspringGenome().GetSequence().AsString(), offspring_genotype);
1619 }
1620 }
1621 batch_it.Remove();
1622 delete parent_genotype;
1623 }
1624 delete test_cpu;
1625
1626 // Fill back in the current batch with the new offspring
1627 while (offspring_list.GetSize() > 0) {
1628 batch[cur_batch].List().PushRear(offspring_list.Pop());
1629 }
1630
1631 }
1632
1633
1634 //////////////// Output Commands...
1635
CommandPrint(cString cur_string)1636 void cAnalyze::CommandPrint(cString cur_string)
1637 {
1638 if (m_world->GetVerbosity() >= VERBOSE_ON) cout << "Printing batch " << cur_batch << endl;
1639 else cout << "Printing organisms..." << endl;
1640
1641 cString directory = PopDirectory(cur_string, "archive/");
1642 // Weirdly, PopDirectory() doesn't actually pop, so...
1643 cur_string.PopWord(); // There, that actually removes the directory string
1644
1645 tListIterator<cAnalyzeGenotype> batch_it(batch[cur_batch].List());
1646 cAnalyzeGenotype* genotype = NULL;
1647 cTestCPU* testcpu = m_world->GetHardwareManager().CreateTestCPU(m_ctx);
1648 while ((genotype = batch_it.Next()) != NULL) {
1649 cString filename(directory);
1650
1651 if (cur_string.GetSize() > 0) {
1652 filename += cur_string.PopWord();
1653 }
1654 else {
1655 filename += genotype->GetName();
1656 filename += ".gen";
1657 }
1658
1659 testcpu->PrintGenome(m_ctx, genotype->GetGenome(), filename);
1660 if (m_world->GetVerbosity() >= VERBOSE_ON) cout << "Printing: " << filename << endl;
1661 }
1662 delete testcpu;
1663 }
1664
CommandTrace(cString cur_string)1665 void cAnalyze::CommandTrace(cString cur_string)
1666 {
1667 cString msg;
1668 tArray<int> manual_inputs;
1669 int sg = 0;
1670
1671 // Process our arguments; manual inputs must be the last arguments
1672
1673 cString directory = PopDirectory(cur_string.PopWord(), cString("archive/")); // #1
1674 cString first_arg = cur_string.PopWord();
1675
1676 if (first_arg.IsSubstring("sg=", 0)) {
1677 first_arg.Pop('=');
1678 sg = first_arg.AsInt();
1679 if (sg < 0 || sg >= m_world->GetEnvironment().GetNumStateGrids()) {
1680 msg.Set("invalid state grid selection");
1681 m_world->GetDriver().NotifyWarning(msg);
1682 return;
1683 }
1684 first_arg = cur_string.PopWord();
1685 }
1686
1687 int use_resources = (first_arg.GetSize()) ? first_arg.AsInt() : 0; // #2
1688 int update = (cur_string.GetSize()) ? cur_string.PopWord().AsInt() : -1; // #3
1689 bool use_random_inputs = (cur_string.GetSize()) ? cur_string.PopWord().AsInt() == 1: false; // #4
1690 bool use_manual_inputs = false; // #5+
1691
1692 //Manual inputs will override random input request
1693 if (cur_string.CountNumWords() > 0){
1694 if (cur_string.CountNumWords() == m_world->GetEnvironment().GetInputSize()){
1695 manual_inputs.Resize(m_world->GetEnvironment().GetInputSize());
1696 use_random_inputs = false;
1697 use_manual_inputs = true;
1698 for (int k = 0; cur_string.GetSize(); k++)
1699 manual_inputs[k] = cur_string.PopWord().AsInt();
1700 } else if (m_world->GetVerbosity() >= VERBOSE_ON){
1701 msg.Set("Invalid number of environment inputs requested for recalculation: %d specified, %d required.",
1702 cur_string.CountNumWords(), m_world->GetEnvironment().GetInputSize());
1703 m_world->GetDriver().NotifyWarning(msg);
1704 }
1705 }
1706
1707
1708 if (m_world->GetVerbosity() >= VERBOSE_ON)
1709 msg.Set("Tracing batch %d", cur_batch);
1710 else
1711 msg.Set("Tracing organisms.");
1712 m_world->GetDriver().NotifyComment(msg);
1713
1714 cTestCPU* testcpu = m_world->GetHardwareManager().CreateTestCPU(m_ctx);
1715
1716 tListIterator<cAnalyzeGenotype> batch_it(batch[cur_batch].List());
1717 cAnalyzeGenotype * genotype = NULL;
1718 while ((genotype = batch_it.Next()) != NULL) {
1719 cString filename = directory + genotype->GetName() + cString(".trace");
1720
1721 if (genotype->GetGenome().GetSize() == 0)
1722 break;
1723
1724 // Build the hardware status printer for tracing.
1725 ofstream& trace_fp = m_world->GetDataFileOFStream(filename);
1726 cHardwareStatusPrinter trace_printer(trace_fp);
1727
1728 // Build the test info for printing.
1729 cCPUTestInfo test_info;
1730 test_info.SetTraceExecution(&trace_printer);
1731 if (use_manual_inputs)
1732 test_info.UseManualInputs(manual_inputs);
1733 else
1734 test_info.UseRandomInputs(use_random_inputs);
1735 test_info.SetResourceOptions(use_resources, m_resources, update, m_resource_time_spent_offset);
1736 test_info.SetCurrentStateGridID(sg);
1737
1738 if (m_world->GetVerbosity() >= VERBOSE_ON){
1739 msg = cString("Tracing ") + filename;
1740 m_world->GetDriver().NotifyComment(msg);
1741 }
1742
1743 testcpu->TestGenome(m_ctx, test_info, genotype->GetGenome());
1744
1745 m_world->GetDataFileManager().Remove(filename);
1746 }
1747
1748 delete testcpu;
1749 }
1750
1751
CommandPrintTasks(cString cur_string)1752 void cAnalyze::CommandPrintTasks(cString cur_string)
1753 {
1754 if (m_world->GetVerbosity() >= VERBOSE_ON) cout << "Printing tasks in batch " << cur_batch << endl;
1755 else cout << "Printing tasks..." << endl;
1756
1757 // Load in the variables...
1758 cString filename("tasks.dat");
1759 if (cur_string.GetSize() != 0) filename = cur_string.PopWord();
1760
1761 ofstream& fp = m_world->GetDataFileOFStream(filename);
1762
1763 // Loop through all of the genotypes in this batch...
1764 tListIterator<cAnalyzeGenotype> batch_it(batch[cur_batch].List());
1765 cAnalyzeGenotype * genotype = NULL;
1766 while ((genotype = batch_it.Next()) != NULL) {
1767 fp << genotype->GetID() << " ";
1768 genotype->PrintTasks(fp);
1769 fp << endl;
1770 }
1771 }
1772
CommandPrintTasksQuality(cString cur_string)1773 void cAnalyze::CommandPrintTasksQuality(cString cur_string)
1774 {
1775 if (m_world->GetVerbosity() >= VERBOSE_ON) cout << "Printing task qualities in batch " << cur_batch << endl;
1776 else cout << "Printing task qualities..." << endl;
1777
1778 // Load in the variables...
1779 cString filename("tasksquality.dat");
1780 if (cur_string.GetSize() != 0) filename = cur_string.PopWord();
1781
1782 ofstream& fp = m_world->GetDataFileOFStream(filename);
1783
1784 // Loop through all of the genotypes in this batch...
1785 tListIterator<cAnalyzeGenotype> batch_it(batch[cur_batch].List());
1786 cAnalyzeGenotype * genotype = NULL;
1787 while ((genotype = batch_it.Next()) != NULL) {
1788 fp << genotype->GetID() << " ";
1789 genotype->PrintTasksQuality(fp);
1790 fp << endl;
1791 }
1792 }
1793
CommandDetail(cString cur_string)1794 void cAnalyze::CommandDetail(cString cur_string)
1795 {
1796 if (m_world->GetVerbosity() >= VERBOSE_ON) cout << "Detailing batch " << cur_batch << endl;
1797 else cout << "Detailing..." << endl;
1798
1799 // @JEB return if there are no organisms in the current batch
1800 if (batch[cur_batch].GetSize() == 0) return;
1801
1802 // Load in the variables...
1803 cString filename("detail.dat");
1804 if (cur_string.GetSize() != 0) filename = cur_string.PopWord();
1805
1806 // Construct a linked list of details needed...
1807 tList< tDataEntryCommand<cAnalyzeGenotype> > output_list;
1808 tListIterator< tDataEntryCommand<cAnalyzeGenotype> > output_it(output_list);
1809 cAnalyzeGenotype::GetDataCommandManager().LoadCommandList(cur_string, output_list);
1810
1811 // Determine the file type...
1812 int file_type = FILE_TYPE_TEXT;
1813 cString file_extension(filename);
1814 while (file_extension.Find('.') != -1) file_extension.Pop('.');
1815 if (file_extension == "html") file_type = FILE_TYPE_HTML;
1816
1817 // Setup the file...
1818 if (filename == "cout") {
1819 CommandDetail_Header(cout, file_type, output_it);
1820 CommandDetail_Body(cout, file_type, output_it);
1821 } else {
1822 ofstream& fp = m_world->GetDataFileOFStream(filename);
1823 CommandDetail_Header(fp, file_type, output_it);
1824 CommandDetail_Body(fp, file_type, output_it);
1825 m_world->GetDataFileManager().Remove(filename);
1826 }
1827
1828 // And clean up...
1829 while (output_list.GetSize() != 0) delete output_list.Pop();
1830 }
1831
1832
CommandDetailTimeline(cString cur_string)1833 void cAnalyze::CommandDetailTimeline(cString cur_string)
1834 {
1835 if (m_world->GetVerbosity() >= VERBOSE_ON) cout << "Detailing batch "
1836 << cur_batch << " based on time" << endl;
1837 else cout << "Detailing..." << endl;
1838
1839 // Load in the variables...
1840 cString filename("detail_timeline.dat");
1841 int time_step = 100;
1842 int max_time = 100000;
1843 if (cur_string.GetSize() != 0) filename = cur_string.PopWord();
1844 if (cur_string.GetSize() != 0) time_step = cur_string.PopWord().AsInt();
1845 if (cur_string.GetSize() != 0) max_time = cur_string.PopWord().AsInt();
1846
1847 if (m_world->GetVerbosity() >= VERBOSE_ON) {
1848 cout << " Time step = " << time_step << endl
1849 << " Max time = " << max_time << endl;
1850 }
1851
1852 // Construct a linked list of details needed...
1853 tList< tDataEntryCommand<cAnalyzeGenotype> > output_list;
1854 tListIterator< tDataEntryCommand<cAnalyzeGenotype> > output_it(output_list);
1855 cAnalyzeGenotype::GetDataCommandManager().LoadCommandList(cur_string, output_list);
1856
1857 // Determine the file type...
1858 int file_type = FILE_TYPE_TEXT;
1859 cString file_extension(filename);
1860 while (file_extension.Find('.') != -1) file_extension.Pop('.');
1861 if (file_extension == "html") file_type = FILE_TYPE_HTML;
1862
1863 // Setup the file...
1864 if (filename == "cout") {
1865 CommandDetail_Header(cout, file_type, output_it, time_step);
1866 CommandDetail_Body(cout, file_type, output_it, time_step, max_time);
1867 } else {
1868 ofstream& fp = m_world->GetDataFileOFStream(filename);
1869 CommandDetail_Header(fp, file_type, output_it, time_step);
1870 CommandDetail_Body(fp, file_type, output_it, time_step, max_time);
1871 }
1872
1873 // And clean up...
1874 while (output_list.GetSize() != 0) delete output_list.Pop();
1875 }
1876
1877
CommandDetail_Header(ostream & fp,int format_type,tListIterator<tDataEntryCommand<cAnalyzeGenotype>> & output_it,int time_step)1878 void cAnalyze::CommandDetail_Header(ostream& fp, int format_type,
1879 tListIterator< tDataEntryCommand<cAnalyzeGenotype> >& output_it,
1880 int time_step)
1881 {
1882 cAnalyzeGenotype* cur_genotype = batch[cur_batch].List().GetFirst();
1883
1884 // Write out the header on the file
1885 if (format_type == FILE_TYPE_TEXT) {
1886 fp << "#filetype genotype_data" << endl;
1887 fp << "#format ";
1888 if (time_step > 0) fp << "update ";
1889 while (output_it.Next() != NULL) {
1890 const cString& entry_name = output_it.Get()->GetName();
1891 fp << entry_name << " ";
1892 }
1893 fp << endl << endl;
1894
1895 // Give the more human-readable legend.
1896 fp << "# Legend:" << endl;
1897 int count = 0;
1898 if (time_step > 0) fp << "# " << ++count << ": Update" << endl;
1899 while (output_it.Next() != NULL) {
1900 const cString& entry_desc = output_it.Get()->GetDesc(cur_genotype);
1901 fp << "# " << ++count << ": " << entry_desc << endl;
1902 }
1903 fp << endl;
1904 } else { // if (format_type == FILE_TYPE_HTML) {
1905 fp << "<html>" << endl
1906 << "<body bgcolor=\"#FFFFFF\"" << endl
1907 << " text=\"#000000\"" << endl
1908 << " link=\"#0000AA\"" << endl
1909 << " alink=\"#0000FF\"" << endl
1910 << " vlink=\"#000044\">" << endl
1911 << endl
1912 << "<h1 align=center>Run " << batch[cur_batch].Name() << endl
1913 << endl
1914 << "<center>" << endl
1915 << "<table border=1 cellpadding=2><tr>" << endl;
1916
1917 if (time_step > 0) fp << "<th bgcolor=\"#AAAAFF\">Update ";
1918 while (output_it.Next() != NULL) {
1919 const cString& entry_desc = output_it.Get()->GetDesc(cur_genotype);
1920 fp << "<th bgcolor=\"#AAAAFF\">" << entry_desc << " ";
1921 }
1922 fp << "</tr>" << endl;
1923
1924 }
1925
1926 }
1927
1928
CommandDetail_Body(ostream & fp,int format_type,tListIterator<tDataEntryCommand<cAnalyzeGenotype>> & output_it,int time_step,int max_time)1929 void cAnalyze::CommandDetail_Body(ostream& fp, int format_type,
1930 tListIterator< tDataEntryCommand<cAnalyzeGenotype> > & output_it,
1931 int time_step, int max_time)
1932 {
1933 // Loop through all of the genotypes in this batch...
1934 tListIterator<cAnalyzeGenotype> batch_it(batch[cur_batch].List());
1935 cAnalyzeGenotype * cur_genotype = batch_it.Next();
1936 cAnalyzeGenotype * next_genotype = batch_it.Next();
1937 cAnalyzeGenotype * prev_genotype = NULL;
1938
1939 int cur_time = 0;
1940 while (cur_genotype != NULL && cur_time <= max_time) {
1941 if (m_world->GetVerbosity() >= VERBOSE_DETAILS) {
1942 cout << "Detailing genotype " << cur_genotype->GetID()
1943 << " at depth " << cur_genotype->GetDepth()
1944 << endl;
1945 }
1946 output_it.Reset();
1947 if (format_type == FILE_TYPE_HTML) {
1948 fp << "<tr>";
1949 if (time_step > 0) fp << "<td>" << cur_time << " ";
1950 }
1951 else if (time_step > 0) { // TEXT file, printing times...
1952 fp << cur_time << " ";
1953 }
1954
1955 tDataEntryCommand<cAnalyzeGenotype> * data_command = NULL;
1956 while ((data_command = output_it.Next()) != NULL) {
1957 cFlexVar cur_value = data_command->GetValue(cur_genotype);
1958 if (format_type == FILE_TYPE_HTML) {
1959 int compare = 0;
1960 if (prev_genotype) {
1961 cFlexVar prev_value = data_command->GetValue(prev_genotype);
1962 int compare_type = data_command->GetCompareType();
1963 compare = CompareFlexStat(cur_value, prev_value, compare_type);
1964 }
1965 HTMLPrintStat(cur_value, fp, compare, data_command->GetHtmlCellFlags(), data_command->GetNull());
1966 }
1967 else { // if (format_type == FILE_TYPE_TEXT) {
1968 fp << data_command->GetValue(cur_genotype) << " ";
1969 }
1970 }
1971 if (format_type == FILE_TYPE_HTML) fp << "</tr>";
1972 fp << endl;
1973
1974 cur_time += time_step;
1975 if (time_step > 0) {
1976 while (next_genotype && next_genotype->GetUpdateBorn() < cur_time) {
1977 prev_genotype = cur_genotype;
1978 cur_genotype = next_genotype;
1979 next_genotype = batch_it.Next();
1980 }
1981 }
1982 else {
1983 // Always moveon if we're not basing this on time, or if we've run out of genotypes.
1984 prev_genotype = cur_genotype;
1985 cur_genotype = next_genotype;
1986 next_genotype = batch_it.Next();
1987 }
1988
1989 }
1990
1991 // If in HTML mode, we need to end the file...
1992 if (format_type == FILE_TYPE_HTML) {
1993 fp << "</table>" << endl
1994 << "</center>" << endl;
1995 }
1996 }
1997
CommandDetailAverage_Body(ostream & fp,int nucoutputs,tListIterator<tDataEntryCommand<cAnalyzeGenotype>> & output_it)1998 void cAnalyze::CommandDetailAverage_Body(ostream& fp, int nucoutputs,
1999 tListIterator< tDataEntryCommand<cAnalyzeGenotype> > & output_it)
2000 {
2001 // Loop through all of the genotypes in this batch...
2002 tListIterator<cAnalyzeGenotype> batch_it(batch[cur_batch].List());
2003 cAnalyzeGenotype * cur_genotype = batch_it.Next();
2004 cAnalyzeGenotype * next_genotype = batch_it.Next();
2005 cAnalyzeGenotype * prev_genotype = NULL;
2006
2007 tArray<cDoubleSum> output_counts(nucoutputs);
2008 for (int i = 0; i < nucoutputs; i++) { output_counts[i].Clear();}
2009 int count;
2010 while (cur_genotype != NULL) {
2011 count = 0;
2012 output_it.Reset();
2013 tDataEntryCommand<cAnalyzeGenotype> * data_command = NULL;
2014 while ((data_command = output_it.Next()) != NULL) {
2015 for (int j = 0; j < cur_genotype->GetNumCPUs(); j++) {
2016 output_counts[count].Add( data_command->GetValue(cur_genotype).AsDouble() );
2017 }
2018 count++;
2019 }
2020
2021 prev_genotype = cur_genotype;
2022 cur_genotype = next_genotype;
2023 next_genotype = batch_it.Next();
2024 }
2025 fp << batch[cur_batch].Name() << " ";
2026 for (int i = 0; i < nucoutputs; i++) {
2027 fp << output_counts[i].Average() << " ";
2028 }
2029 fp << endl;
2030 }
2031
CommandDetailAverage(cString cur_string)2032 void cAnalyze::CommandDetailAverage(cString cur_string)
2033 {
2034 if (m_world->GetVerbosity() >= VERBOSE_ON) cout << "Average detailing batch " << cur_batch << endl;
2035 else cout << "Detailing..." << endl;
2036
2037 // Load in the variables...
2038 cString filename("detail.dat");
2039 if (cur_string.GetSize() != 0) filename = cur_string.PopWord();
2040
2041 // Construct a linked list of details needed...
2042 tList< tDataEntryCommand<cAnalyzeGenotype> > output_list;
2043 tListIterator< tDataEntryCommand<cAnalyzeGenotype> > output_it(output_list);
2044 cAnalyzeGenotype::GetDataCommandManager().LoadCommandList(cur_string, output_list);
2045
2046 // check if file is already in use.
2047 bool file_active = m_world->GetDataFileManager().IsOpen(filename);
2048
2049 ofstream& fp = m_world->GetDataFileOFStream(filename);
2050
2051 // if it's a new file print out the header
2052 if (file_active == false) {
2053 CommandDetail_Header(fp, FILE_TYPE_TEXT, output_it);
2054 }
2055 CommandDetailAverage_Body(fp, cur_string.CountNumWords(), output_it);
2056
2057 while (output_list.GetSize() != 0) delete output_list.Pop();
2058
2059 }
2060
CommandDetailBatches(cString cur_string)2061 void cAnalyze::CommandDetailBatches(cString cur_string)
2062 {
2063 // Load in the variables...
2064 cString keyword("num_cpus");
2065 cString filename("detail_batch.dat");
2066 if (cur_string.GetSize() != 0) keyword = cur_string.PopWord();
2067 if (cur_string.GetSize() != 0) filename = cur_string.PopWord();
2068
2069 if (m_world->GetVerbosity() >= VERBOSE_ON) cout << "Detailing batches for " << keyword << endl;
2070 else cout << "Detailing Batches..." << endl;
2071
2072 // Find its associated command...
2073 tDataEntryCommand<cAnalyzeGenotype>* cur_command = cAnalyzeGenotype::GetDataCommandManager().GetDataCommand(keyword);
2074 if (!cur_command) {
2075 cout << "error: no data entry, unable to detail batches" << endl;
2076 return;
2077 }
2078
2079
2080 // Determine the file type...
2081 int file_type = FILE_TYPE_TEXT;
2082 cString file_extension(filename);
2083 while (file_extension.Find('.') != -1) file_extension.Pop('.');
2084 if (file_extension == "html") file_type = FILE_TYPE_HTML;
2085
2086 ofstream& fp = m_world->GetDataFileOFStream(filename);
2087 cAnalyzeGenotype* first_genotype = batch[cur_batch].List().GetFirst();
2088
2089 // Write out the header on the file
2090 if (file_type == FILE_TYPE_TEXT) {
2091 fp << "#filetype batch_data" << endl
2092 << "#format batch_id " << keyword << endl
2093 << endl;
2094
2095 // Give the more human-readable legend.
2096 fp << "# Legend:" << endl
2097 << "# Column 1 = Batch ID" << endl
2098 << "# Remaining entries: " << cur_command->GetDesc(first_genotype) << endl
2099 << endl;
2100
2101 } else { // if (file_type == FILE_TYPE_HTML) {
2102 fp << "<html>" << endl
2103 << "<body bgcolor=\"#FFFFFF\"" << endl
2104 << " text=\"#000000\"" << endl
2105 << " link=\"#0000AA\"" << endl
2106 << " alink=\"#0000FF\"" << endl
2107 << " vlink=\"#000044\">" << endl
2108 << endl
2109 << "<h1 align=center> Distribution of " << cur_command->GetDesc(first_genotype)
2110 << endl << endl
2111 << "<center>" << endl
2112 << "<table border=1 cellpadding=2>" << endl
2113 << "<tr><th bgcolor=\"#AAAAFF\">" << cur_command->GetDesc(first_genotype) << "</tr>"
2114 << endl;
2115 }
2116
2117
2118 // Loop through all of the batches...
2119 for (int i = 0; i < GetNumBatches(); i++) {
2120 if (batch[i].List().GetSize() == 0) continue;
2121
2122 if (file_type == FILE_TYPE_HTML) fp << "<tr><td>";
2123 fp << i << " ";
2124
2125 tListIterator<cAnalyzeGenotype> batch_it(batch[i].List());
2126 cAnalyzeGenotype * genotype = NULL;
2127 while ((genotype = batch_it.Next()) != NULL) {
2128 if (file_type == FILE_TYPE_HTML) fp << "<td>";
2129
2130 if (file_type == FILE_TYPE_HTML) {
2131 HTMLPrintStat(cur_command->GetValue(genotype), fp, 0, cur_command->GetHtmlCellFlags(), cur_command->GetNull());
2132 }
2133 else { // if (file_type == FILE_TYPE_TEXT) {
2134 fp << cur_command->GetValue(genotype) << " ";
2135 }
2136 }
2137 if (file_type == FILE_TYPE_HTML) fp << "</tr>";
2138 fp << endl;
2139 }
2140
2141 // If in HTML mode, we need to end the file...
2142 if (file_type == FILE_TYPE_HTML) {
2143 fp << "</table>" << endl
2144 << "</center>" << endl;
2145 }
2146
2147 delete cur_command;
2148 }
2149
2150
2151
CommandDetailIndex(cString cur_string)2152 void cAnalyze::CommandDetailIndex(cString cur_string)
2153 {
2154 cout << "Creating a Detail Index..." << endl;
2155
2156 // A filename and min and max batches must be included.
2157 if (cur_string.CountNumWords() < 3) {
2158 cerr << "Error: must include filename, and min and max batch numbers." << endl;
2159 if (exit_on_error) exit(1);
2160 }
2161
2162 // Load in the variables...
2163 cString filename(cur_string.PopWord());
2164 int min_batch = cur_string.PopWord().AsInt();
2165 int max_batch = cur_string.PopWord().AsInt();
2166
2167 if (max_batch < min_batch) {
2168 cerr << "Error: min_batch=" << min_batch
2169 << ", max_batch=" << max_batch << " (incorrect order?)" << endl;
2170 if (exit_on_error) exit(1);
2171 }
2172
2173 // Construct a linked list of details needed...
2174 tList<tDataEntryCommand<cAnalyzeGenotype> > output_list;
2175 tListIterator<tDataEntryCommand<cAnalyzeGenotype> > output_it(output_list);
2176 cAnalyzeGenotype::GetDataCommandManager().LoadCommandList(cStringList(cur_string), output_list);
2177
2178
2179 // Setup the file...
2180 ofstream& fp = m_world->GetDataFileOFStream(filename);
2181 cAnalyzeGenotype* first_genotype = batch[cur_batch].List().GetFirst();
2182
2183 // Determine the file type...
2184 int file_type = FILE_TYPE_TEXT;
2185 while (filename.Find('.') != -1) filename.Pop('.'); // Grab only extension
2186 if (filename == "html") file_type = FILE_TYPE_HTML;
2187
2188 // Write out the header on the file
2189 if (file_type == FILE_TYPE_TEXT) {
2190 fp << "#filetype genotype_data" << endl;
2191 fp << "#format ";
2192 while (output_it.Next() != NULL) {
2193 const cString & entry_name = output_it.Get()->GetName();
2194 fp << entry_name << " ";
2195 }
2196 fp << endl << endl;
2197
2198 // Give the more human-readable legend.
2199 fp << "# Legend:" << endl;
2200 fp << "# 1: Batch Name" << endl;
2201 int count = 1;
2202 while (output_it.Next() != NULL) {
2203 const cString& entry_desc = output_it.Get()->GetDesc(first_genotype);
2204 fp << "# " << ++count << ": " << entry_desc << endl;
2205 }
2206 fp << endl;
2207 } else { // if (file_type == FILE_TYPE_HTML) {
2208 fp << "<html>" << endl
2209 << "<body bgcolor=\"#FFFFFF\"" << endl
2210 << " text=\"#000000\"" << endl
2211 << " link=\"#0000AA\"" << endl
2212 << " alink=\"#0000FF\"" << endl
2213 << " vlink=\"#000044\">" << endl
2214 << endl
2215 << "<h1 align=center>Batch Index" << endl
2216 << endl
2217 << "<center>" << endl
2218 << "<table border=1 cellpadding=2><tr>" << endl;
2219
2220 fp << "<th bgcolor=\"#AAAAFF\">Batch ";
2221 while (output_it.Next() != NULL) {
2222 const cString& entry_desc = output_it.Get()->GetDesc(first_genotype);
2223 fp << "<th bgcolor=\"#AAAAFF\">" << entry_desc << " ";
2224 }
2225 fp << "</tr>" << endl;
2226
2227 }
2228
2229 // Loop through all of the batchs...
2230 for (int batch_id = min_batch; batch_id <= max_batch; batch_id++) {
2231 cAnalyzeGenotype * genotype = batch[batch_id].List().GetFirst();
2232 if (genotype == NULL) continue;
2233 output_it.Reset();
2234 tDataEntryCommand<cAnalyzeGenotype>* data_entry = NULL;
2235 const cString & batch_name = batch[batch_id].Name();
2236 if (file_type == FILE_TYPE_HTML) {
2237 fp << "<tr><th><a href=lineage." << batch_name << ".html>"
2238 << batch_name << "</a> ";
2239 } else {
2240 fp << batch_name << " ";
2241 }
2242
2243 while ((data_entry = output_it.Next()) != NULL) {
2244 if (file_type == FILE_TYPE_HTML) {
2245 fp << "<td align=center><a href=\""
2246 << data_entry->GetName() << "." << batch_name << ".png\">"
2247 << data_entry->GetValue(genotype) << "</a> ";
2248 } else { // if (file_type == FILE_TYPE_TEXT) {
2249 fp << data_entry->GetValue(genotype) << " ";
2250 }
2251 }
2252 if (file_type == FILE_TYPE_HTML) fp << "</tr>";
2253 fp << endl;
2254 }
2255
2256 // If in HTML mode, we need to end the file...
2257 if (file_type == FILE_TYPE_HTML) {
2258 fp << "</table>" << endl
2259 << "</center>" << endl;
2260 }
2261 }
2262
CommandHistogram(cString cur_string)2263 void cAnalyze::CommandHistogram(cString cur_string)
2264 {
2265 if (m_world->GetVerbosity() >= VERBOSE_ON) cout << "Histogram batch " << cur_batch << endl;
2266 else cout << "Histograming..." << endl;
2267
2268 // Load in the variables...
2269 cString filename("histogram.dat");
2270 if (cur_string.GetSize() != 0) filename = cur_string.PopWord();
2271
2272 // Construct a linked list of details needed...
2273 tList< tDataEntryCommand<cAnalyzeGenotype> > output_list;
2274 tListIterator< tDataEntryCommand<cAnalyzeGenotype> > output_it(output_list);
2275 cAnalyzeGenotype::GetDataCommandManager().LoadCommandList(cur_string, output_list);
2276
2277 // Determine the file type...
2278 int file_type = FILE_TYPE_TEXT;
2279 cString file_extension(filename);
2280 while (file_extension.Find('.') != -1) file_extension.Pop('.');
2281 if (file_extension == "html") file_type = FILE_TYPE_HTML;
2282
2283 // Setup the file...
2284 if (filename == "cout") {
2285 CommandHistogram_Header(cout, file_type, output_it);
2286 CommandHistogram_Body(cout, file_type, output_it);
2287 } else {
2288 ofstream& fp = m_world->GetDataFileOFStream(filename);
2289 CommandHistogram_Header(fp, file_type, output_it);
2290 CommandHistogram_Body(fp, file_type, output_it);
2291 }
2292
2293 // And clean up...
2294 while (output_list.GetSize() != 0) delete output_list.Pop();
2295 }
2296
CommandHistogram_Header(ostream & fp,int format_type,tListIterator<tDataEntryCommand<cAnalyzeGenotype>> & output_it)2297 void cAnalyze::CommandHistogram_Header(ostream& fp, int format_type,
2298 tListIterator< tDataEntryCommand<cAnalyzeGenotype> > & output_it)
2299 {
2300 cAnalyzeGenotype* first_genotype = batch[cur_batch].List().GetFirst();
2301
2302 // Write out the header on the file
2303 if (format_type == FILE_TYPE_TEXT) {
2304 fp << "#filetype histogram_data" << endl;
2305 fp << "#format ";
2306 while (output_it.Next() != NULL) {
2307 const cString & entry_name = output_it.Get()->GetName();
2308 fp << entry_name << " ";
2309 }
2310 fp << endl << endl;
2311
2312 // Give the more human-readable legend.
2313 fp << "# Histograms:" << endl;
2314 int count = 0;
2315 while (output_it.Next() != NULL) {
2316 const cString & entry_desc = output_it.Get()->GetDesc(first_genotype);
2317 fp << "# " << ++count << ": " << entry_desc << endl;
2318 }
2319 fp << endl;
2320 } else { // if (format_type == FILE_TYPE_HTML) {
2321 fp << "<html>" << endl
2322 << "<body bgcolor=\"#FFFFFF\"" << endl
2323 << " text=\"#000000\"" << endl
2324 << " link=\"#0000AA\"" << endl
2325 << " alink=\"#0000FF\"" << endl
2326 << " vlink=\"#000044\">" << endl
2327 << endl
2328 << "<h1 align=center>Histograms for " << batch[cur_batch].Name()
2329 << "</h1>" << endl
2330 << endl
2331 << "<center>" << endl
2332 << "<table border=1 cellpadding=2><tr>" << endl;
2333
2334 while (output_it.Next() != NULL) {
2335 const cString & entry_desc = output_it.Get()->GetDesc(first_genotype);
2336 const cString & entry_name = output_it.Get()->GetName();
2337 fp << "<tr><th bgcolor=\"#AAAAFF\"><a href=\"#"
2338 << entry_name << "\">"
2339 << entry_desc << "</a></tr>";
2340 }
2341 fp << "</tr></table>" << endl;
2342 }
2343 }
2344
2345
CommandHistogram_Body(ostream & fp,int format_type,tListIterator<tDataEntryCommand<cAnalyzeGenotype>> & output_it)2346 void cAnalyze::CommandHistogram_Body(ostream& fp, int format_type,
2347 tListIterator< tDataEntryCommand<cAnalyzeGenotype> >& output_it)
2348 {
2349 output_it.Reset();
2350 tDataEntryCommand<cAnalyzeGenotype> * data_command = NULL;
2351 cAnalyzeGenotype* first_genotype = batch[cur_batch].List().GetFirst();
2352
2353 while ((data_command = output_it.Next()) != NULL) {
2354 if (format_type == FILE_TYPE_TEXT) {
2355 fp << "# --- " << data_command->GetDesc(first_genotype) << " ---" << endl;
2356 } else {
2357 fp << "<table cellpadding=3>" << endl
2358 << "<tr><th colspan=3><a name=\"" << data_command->GetName() << "\">"
2359 << data_command->GetDesc(first_genotype) << "</th></tr>" << endl;
2360 }
2361
2362 tDictionary<int> count_dict;
2363
2364 // Loop through all genotypes in this batch to collect the info we need.
2365 tListIterator<cAnalyzeGenotype> batch_it(batch[cur_batch].List());
2366 cAnalyzeGenotype * cur_genotype;
2367 while ((cur_genotype = batch_it.Next()) != NULL) {
2368 const cString cur_name(data_command->GetValue(cur_genotype).AsString());
2369 int count = 0;
2370 count_dict.Find(cur_name, count);
2371 count += cur_genotype->GetNumCPUs();
2372 count_dict.Set(cur_name, count);
2373 }
2374
2375 tList<cString> name_list;
2376 tList<int> count_list;
2377 count_dict.AsLists(name_list, count_list);
2378
2379 // Figure out the maximum count and the maximum widths...
2380 int max_count = 0;
2381 int max_name_width = 0;
2382 int max_count_width = 0;
2383 tListIterator<int> count_it(count_list);
2384 tListIterator<cString> name_it(name_list);
2385 while (count_it.Next() != NULL) {
2386 const cString cur_name( *(name_it.Next()) );
2387 const int cur_count = *(count_it.Get());
2388 const int name_width = cur_name.GetSize();
2389 const int count_width = cStringUtil::Stringf("%d", cur_count).GetSize();
2390 if (cur_count > max_count) max_count = cur_count;
2391 if (name_width > max_name_width) max_name_width = name_width;
2392 if (count_width > max_count_width) max_count_width = count_width;
2393 }
2394
2395 // Do some final calculations now that we know the maximums...
2396 const int max_stars = 75 - max_name_width - max_count_width;
2397
2398 // Now print everything out...
2399 count_it.Reset();
2400 name_it.Reset();
2401 while (count_it.Next() != NULL) {
2402 const cString cur_name( *(name_it.Next()) );
2403 const int cur_count = *(count_it.Get());
2404 if (cur_count == 0) continue;
2405 int num_stars = (cur_count * max_stars) / max_count;
2406
2407 if (format_type == FILE_TYPE_TEXT) {
2408 fp << setw(max_name_width) << cur_name << " "
2409 << setw(max_count_width) << cur_count << " ";
2410 for (int i = 0; i < num_stars; i++) { fp << '#'; }
2411 fp << endl;
2412 } else { // FILE_TYPE_HTML
2413 fp << "<tr><td>" << cur_name
2414 << "<td>" << cur_count
2415 << "<td>";
2416 for (int i = 0; i < num_stars; i++) { fp << '#'; }
2417 fp << "</tr>" << endl;
2418 }
2419 }
2420
2421 if (format_type == FILE_TYPE_TEXT) {
2422 // Skip a line between histograms...
2423 fp << endl;
2424 } else {
2425 fp << "</table><br><br>" << endl << endl;
2426 }
2427 }
2428
2429 // If in HTML mode, we need to end the file...
2430 if (format_type == FILE_TYPE_HTML) {
2431 fp << "</table>" << endl
2432 << "</center>" << endl;
2433 }
2434 }
2435
2436
2437 ///// Population Analysis Commands ////
2438
2439 // Comparator for p_stat struct: compared by cpu_count
2440 // Higher cpu_count is considered "less" in order to sort greatest-to-least
2441 // Furthermore, within the same cpu_count we sort greatest-to-least
2442 // based on genotype_count
PStatsComparator(const void * elem1,const void * elem2)2443 int cAnalyze::PStatsComparator(const void * elem1, const void * elem2)
2444 {
2445 if (((p_stats*)elem2)->cpu_count > ((p_stats*)elem1)->cpu_count) return 1;
2446 if (((p_stats*)elem2)->cpu_count < ((p_stats*)elem1)->cpu_count) return -1;
2447
2448 // if the cpu_counts are the same, we'd like to sort greatest-to-least
2449 // on genotype_count
2450 if (((p_stats*)elem2)->genotype_count > ((p_stats*)elem1)->genotype_count) return 1;
2451 if (((p_stats*)elem2)->genotype_count < ((p_stats*)elem1)->genotype_count) return -1;
2452
2453 // if they have the same cpu_count and genotype_count, we call them the same
2454 return 0;
2455 }
2456
CommandPrintPhenotypes(cString cur_string)2457 void cAnalyze::CommandPrintPhenotypes(cString cur_string)
2458 {
2459 if (m_world->GetVerbosity() >= VERBOSE_ON) cout << "Printing phenotypes in batch "
2460 << cur_batch << endl;
2461 else cout << "Printing phenotypes..." << endl;
2462
2463 // Load in the variables...
2464 cString filename("phenotype.dat");
2465 if (cur_string.GetSize() != 0) filename = cur_string.PopWord();
2466
2467 cString flag("");
2468 bool print_ttc = false;
2469 bool print_ttpc = false;
2470 while (cur_string.GetSize() != 0) {
2471 flag = cur_string.PopWord();
2472 if (flag == "total_task_count") print_ttc = true;
2473 else if (flag == "total_task_performance_count") print_ttpc = true;
2474 }
2475
2476 // Make sure we have at least one genotype...
2477 if (batch[cur_batch].List().GetSize() == 0) return;
2478
2479 // Setup the phenotype categories...
2480 const int num_tasks = batch[cur_batch].List().GetFirst()->GetNumTasks();
2481
2482 tHashMap<cBitArray, p_stats> phenotype_table(HASH_TABLE_SIZE_MEDIUM);
2483
2484 // Loop through all of the genotypes in this batch...
2485 tListIterator<cAnalyzeGenotype> batch_it(batch[cur_batch].List());
2486 cAnalyzeGenotype * genotype = NULL;
2487 while ((genotype = batch_it.Next()) != NULL) {
2488 cBitArray phen_id(num_tasks + 1); // + 1 because phenotype also depends on viability
2489 phen_id.Clear();
2490 if (genotype->GetViable() == true) phen_id++;
2491 for (int i = 0; i < num_tasks; i++) {
2492 if (genotype->GetTaskCount(i) > 0) phen_id.Set(i + 1, true); // again, +1 because we used 0th bit for viability
2493 }
2494
2495 p_stats phenotype_stats;
2496
2497 if (phenotype_table.Find(phen_id, phenotype_stats)) {
2498 phenotype_stats.cpu_count += genotype->GetNumCPUs();
2499 phenotype_stats.genotype_count += 1;
2500 phenotype_stats.total_length += genotype->GetNumCPUs() * genotype->GetLength();
2501 phenotype_stats.total_gest += genotype->GetNumCPUs() * genotype->GetGestTime();
2502
2503 // don't bother tracking these unless asked for
2504 if (print_ttc || print_ttpc) {
2505 for (int i = 0; i < num_tasks; i++) {
2506 phenotype_stats.total_task_count += ((genotype->GetTaskCount(i) > 0) ? 1 : 0);
2507 phenotype_stats.total_task_performance_count += genotype->GetTaskCount(i);
2508 }
2509 }
2510 }
2511 else {
2512 phenotype_stats.phen_id = phen_id; // this is for ease of printing and sorting
2513 phenotype_stats.cpu_count = genotype->GetNumCPUs();
2514 phenotype_stats.genotype_count = 1;
2515 phenotype_stats.total_length = genotype->GetNumCPUs() * genotype->GetLength();
2516 phenotype_stats.total_gest = genotype->GetNumCPUs() * genotype->GetGestTime();
2517
2518 phenotype_stats.total_task_count = 0;
2519 phenotype_stats.total_task_performance_count = 0;
2520
2521 // don't bother actually tracking these unless asked for
2522 if (print_ttc || print_ttpc) {
2523 for (int i = 0; i < num_tasks; i++) {
2524 phenotype_stats.total_task_count += ((genotype->GetTaskCount(i) > 0) ? 1 : 0);
2525 phenotype_stats.total_task_performance_count += genotype->GetTaskCount(i);
2526 }
2527 }
2528 }
2529
2530 // add to / update table
2531 phenotype_table.Set(phen_id, phenotype_stats);
2532 }
2533
2534 ofstream& fp = m_world->GetDataFileOFStream(filename);
2535
2536 fp << "# 1: Number of organisms of this phenotype" << endl
2537 << "# 2: Number of genotypes of this phenotye" << endl
2538 << "# 3: Average Genome Length" << endl
2539 << "# 4: Average Gestation Time" << endl
2540 << "# 5: Viability of Phenotype" << endl;
2541 if (print_ttc && print_ttpc) {
2542 fp << "# 6: Total # of different tasks performed by this phenotype" << endl
2543 << "# 7: Average # of tasks performed by this phenotype" << endl
2544 << "# 8+: Tasks performed in this phenotype" << endl;
2545 }
2546 else if (print_ttc) {
2547 fp << "# 6: Total # of different tasks performed by this phenotype" << endl
2548 << "# 7+: Tasks performed in this phenotype" << endl;
2549 }
2550 else if (print_ttpc) {
2551 fp << "# 6: Total # of tasks performed by this phenotype" << endl
2552 << "# 7+: Tasks performed in this phenotype" << endl;
2553 }
2554 else { fp << "# 6+: Tasks performed in this phenotype" << endl; }
2555 fp << endl;
2556
2557 // Print the phenotypes in order from greatest cpu count to least
2558 // Within cpu_count, print in order from greatest genotype count to least
2559 tArray<p_stats> phenotype_array;
2560 phenotype_table.GetValues(phenotype_array);
2561 phenotype_array.MergeSort(&cAnalyze::PStatsComparator); // sort by cpu_count, greatest to least
2562
2563 for (int i = 0; i < phenotype_array.GetSize(); i++) {
2564 fp << phenotype_array[i].cpu_count << " "
2565 << phenotype_array[i].genotype_count << " "
2566 << phenotype_array[i].total_length / phenotype_array[i].cpu_count << " "
2567 << phenotype_array[i].total_gest / phenotype_array[i].cpu_count << " "
2568 << phenotype_array[i].phen_id.Get(0) << " "; // viability
2569
2570 if (print_ttc) {
2571 fp << phenotype_array[i].total_task_count / phenotype_array[i].genotype_count << " ";
2572 }
2573 if (print_ttpc) {
2574 fp << phenotype_array[i].total_task_performance_count / phenotype_array[i].genotype_count << " ";
2575 }
2576
2577 // not using cBitArray::Print because it would print viability bit too
2578 for (int j = 1; j <= num_tasks; j++) { fp << phenotype_array[i].phen_id.Get(j) << " "; }
2579
2580 fp << endl;
2581 }
2582
2583 m_world->GetDataFileManager().Remove(filename);
2584
2585 }
2586
2587
2588 // Print various diversity metrics from the current batch of genotypes...
CommandPrintDiversity(cString cur_string)2589 void cAnalyze::CommandPrintDiversity(cString cur_string)
2590 {
2591 if (m_world->GetVerbosity() >= VERBOSE_ON) cout << "Printing diversity data for batch "
2592 << cur_batch << endl;
2593 else cout << "Printing diversity data..." << endl;
2594
2595 // Load in the variables...
2596 cString filename("diversity.dat");
2597 if (cur_string.GetSize() != 0) filename = cur_string.PopWord();
2598
2599 // Make sure we have at least one genotype...
2600 if (batch[cur_batch].List().GetSize() == 0) return;
2601
2602 // Setup the task categories...
2603 const int num_tasks = batch[cur_batch].List().GetFirst()->GetNumTasks();
2604 tArray<int> task_count(num_tasks);
2605 tArray<int> task_gen_count(num_tasks);
2606 tArray<double> task_gen_dist(num_tasks);
2607 tArray<double> task_site_entropy(num_tasks);
2608 task_count.SetAll(0);
2609 task_gen_count.SetAll(0);
2610
2611 // We must determine the average hamming distance between genotypes in
2612 // this batch that perform each task. Levenstein distance would be ideal,
2613 // but takes a while, so we'll do it this way first. For the calculations,
2614 // we need to know home many times each instruction appears at each
2615 // position for each genotype collection that performs a particular task.
2616 const cInstSet& is = m_world->GetHardwareManager().GetDefaultInstSet();
2617 const int num_insts = is.GetSize();
2618 const int max_length = BatchUtil_GetMaxLength();
2619 tMatrix<int> inst_freq(max_length, num_insts+1);
2620
2621 for (int task_id = 0; task_id < num_tasks; task_id++) {
2622 inst_freq.SetAll(0);
2623
2624 // Loop through all genotypes, singling out those that do current task...
2625 tListIterator<cAnalyzeGenotype> batch_it(batch[cur_batch].List());
2626 cAnalyzeGenotype* genotype = NULL;
2627 while ((genotype = batch_it.Next()) != NULL) {
2628 if (genotype->GetGenome().GetInstSet() != is.GetInstSetName() || genotype->GetTaskCount(task_id) == 0) continue;
2629
2630 const Sequence& genome = genotype->GetGenome().GetSequence();
2631 const int num_cpus = genotype->GetNumCPUs();
2632 task_count[task_id] += num_cpus;
2633 task_gen_count[task_id]++;
2634 for (int i = 0; i < genotype->GetLength(); i++) {
2635 inst_freq( i, genome[i].GetOp() ) += num_cpus;
2636 }
2637 for (int i = genotype->GetLength(); i < max_length; i++) {
2638 inst_freq(i, num_insts) += num_cpus; // Entry for "past genome end"
2639 }
2640 }
2641
2642 // Analyze the data for this entry...
2643 const int cur_count = task_count[task_id];
2644 const int total_pairs = cur_count * (cur_count - 1) / 2;
2645 int total_dist = 0;
2646 double total_ent = 0;
2647 for (int pos = 0; pos < max_length; pos++) {
2648 // Calculate distance component...
2649 for (int inst1 = 0; inst1 < num_insts; inst1++) {
2650 if (inst_freq(pos, inst1) == 0) continue;
2651 for (int inst2 = inst1+1; inst2 <= num_insts; inst2++) {
2652 total_dist += inst_freq(pos, inst1) * inst_freq(pos, inst2);
2653 }
2654 }
2655
2656 // Calculate entropy component...
2657 for (int i = 0; i <= num_insts; i++) {
2658 const int cur_freq = inst_freq(pos, i);
2659 if (cur_freq == 0) continue;
2660 const double p = ((double) cur_freq) / (double) cur_count;
2661 total_ent -= p * log(p);
2662 }
2663 }
2664
2665 task_gen_dist[task_id] = ((double) total_dist) / (double) total_pairs;
2666 task_site_entropy[task_id] = total_ent;
2667 }
2668
2669 // Print out the results...
2670 cDataFile & df = m_world->GetDataFile(filename);
2671
2672 for (int i = 0; i < num_tasks; i++) {
2673 df.Write(i, "# 1: Task ID");
2674 df.Write(task_count[i], "# 2: Number of organisms performing task");
2675 df.Write(task_gen_count[i], "# 3: Number of genotypes performing task");
2676 df.Write(task_gen_dist[i], "# 4: Average distance between genotypes performing task");
2677 df.Write(task_site_entropy[i], "# 5: Total per-site entropy of genotypes performing task");
2678 df.Endl();
2679 }
2680 }
2681
2682
PhyloCommunityComplexity(cString cur_string)2683 void cAnalyze::PhyloCommunityComplexity(cString cur_string)
2684 {
2685 /////////////////////////////////////////////////////////////////////////
2686 // Calculate the mutual information between all genotypes and environment
2687 /////////////////////////////////////////////////////////////////////////
2688
2689 cout << "Analyze biocomplexity of current population about environment ...\n";
2690
2691 // Get the number of genotypes that are gonna be analyzed.
2692 int max_genotypes = cur_string.PopWord().AsInt();
2693
2694 // Get update
2695 int update = cur_string.PopWord().AsInt();
2696
2697 // Get the directory
2698 cString directory = PopDirectory(cur_string, "community_cpx/");
2699
2700 // Get the file name that saves the result
2701 cString filename = cur_string.PopWord();
2702 if (filename.IsEmpty()) {
2703 filename = "community.complexity.dat";
2704 }
2705
2706 filename.Set("%s%s", static_cast<const char*>(directory), static_cast<const char*>(filename));
2707 ofstream& cpx_fp = m_world->GetDataFileOFStream(filename);
2708
2709 cpx_fp << "# Legend:" << endl;
2710 cpx_fp << "# 1: Genotype ID" << endl;
2711 cpx_fp << "# 2: Entropy given Known Genotypes" << endl;
2712 cpx_fp << "# 3: Entropy given Both Known Genotypes and Env" << endl;
2713 cpx_fp << "# 4: New Information about Environment" << endl;
2714 cpx_fp << "# 5: Total Complexity" << endl;
2715 cpx_fp << endl;
2716
2717
2718 /////////////////////////////////////////////////////////////////////////////////
2719 // Loop through all genotypes in all batches and build id vs. genotype map
2720
2721 map<int, cAnalyzeGenotype *> genotype_database;
2722 for (int i = 0; i < GetNumBatches(); ++ i) {
2723 tListIterator<cAnalyzeGenotype> batch_it(batch[i].List());
2724 cAnalyzeGenotype * genotype = NULL;
2725 while ((genotype = batch_it.Next()) != NULL) {
2726 genotype_database.insert(make_pair(genotype->GetID(), genotype));
2727 }
2728 }
2729
2730
2731 ////////////////////////////////////////////////
2732 // Check if all the genotypes having same length
2733
2734 int length_genome = 0;
2735 if (genotype_database.size() > 0) {
2736 length_genome = genotype_database.begin()->second->GetLength();
2737 }
2738 map<int, cAnalyzeGenotype*>::iterator gen_iterator = genotype_database.begin();
2739 for (; gen_iterator != genotype_database.end(); ++ gen_iterator) {
2740 if (gen_iterator->second->GetLength() != length_genome) {
2741 cerr << "Genotype " << gen_iterator->first << " has different genome length." << endl;
2742 if (exit_on_error) exit(1);
2743 }
2744 }
2745
2746
2747 ///////////////////////
2748 // Create Test Info
2749 // No choice of use_resources for this analyze command...
2750 cCPUTestInfo test_info;
2751 test_info.SetResourceOptions(RES_CONSTANT, m_resources, update, m_resource_time_spent_offset);
2752
2753
2754 ///////////////////////////////////////////////////////////////////////
2755 // Choose the first n most abundant genotypes and put them in community
2756
2757 vector<cAnalyzeGenotype *> community;
2758 cAnalyzeGenotype * genotype = NULL;
2759 tListIterator<cAnalyzeGenotype> batch_it(batch[cur_batch].List());
2760
2761 while (((genotype = batch_it.Next()) != NULL) && (community.size() < static_cast<unsigned int>(max_genotypes))) {
2762 community.push_back(genotype);
2763 }
2764
2765
2766 ///////////////////////////
2767 // Measure hamming distance
2768
2769 int size_community = community.size();
2770 if (size_community == 0) {
2771 cerr << "There is no genotype in this community." << endl;
2772 if (exit_on_error) exit(1);
2773 }
2774 typedef pair<int,int> gen_pair;
2775 map<gen_pair, int> hamming_dist;
2776
2777 for (int i = 0; i< size_community; ++ i) {
2778 for (int j = i+1; j < size_community; ++ j) {
2779 int dist = Sequence::FindHammingDistance(community[i]->GetGenome().GetSequence(),
2780 community[j]->GetGenome().GetSequence());
2781 int id1 = community[i]->GetID();
2782 int id2 = community[j]->GetID();
2783
2784 hamming_dist.insert(make_pair(gen_pair(id1, id2), dist));
2785 hamming_dist.insert(make_pair(gen_pair(id2, id1), dist));
2786 }
2787 }
2788
2789
2790 //////////////////////////////////
2791 // Get Most Recent Common Ancestor
2792
2793 map<gen_pair, cAnalyzeGenotype *> mrca;
2794 map<gen_pair, int> raw_dist;
2795 for (int i = 0; i< size_community; ++ i) {
2796 for (int j = i+1; j < size_community; ++ j) {
2797
2798 cAnalyzeGenotype * lineage1_genotype = community[i];
2799 cAnalyzeGenotype * lineage2_genotype = community[j];
2800 int total_dist = 0;
2801
2802 while (lineage1_genotype->GetID() != lineage2_genotype->GetID()) {
2803 if (lineage1_genotype->GetID() > lineage2_genotype->GetID()) {
2804 int parent_id = lineage1_genotype->GetParentID();
2805 cAnalyzeGenotype * parent = genotype_database.find(parent_id)->second;
2806
2807 total_dist += Sequence::FindHammingDistance(lineage1_genotype->GetGenome().GetSequence(),
2808 parent->GetGenome().GetSequence());
2809 lineage1_genotype = parent;
2810 } else {
2811 int parent_id = lineage2_genotype->GetParentID();
2812 cAnalyzeGenotype * parent = genotype_database.find(parent_id)->second;
2813 total_dist += Sequence::FindHammingDistance(lineage2_genotype->GetGenome().GetSequence(),
2814 parent->GetGenome().GetSequence());
2815
2816 lineage2_genotype = parent;
2817 }
2818 }
2819
2820 int id1 = community[i]->GetID();
2821 int id2 = community[j]->GetID();
2822 mrca.insert(make_pair(gen_pair(id1, id2), lineage1_genotype));
2823 mrca.insert(make_pair(gen_pair(id2, id1), lineage1_genotype));
2824 raw_dist.insert(make_pair(gen_pair(id1, id2), total_dist));
2825 raw_dist.insert(make_pair(gen_pair(id2, id1), total_dist));
2826 }
2827 }
2828
2829
2830 vector<cAnalyzeGenotype *> sorted_community = community;
2831
2832
2833 /////////////////////////////////////////////
2834 // Loop through genotypes in sorted community
2835
2836 double complexity = 0.0;
2837 vector<cAnalyzeGenotype *> given_genotypes;
2838
2839 for (int i = 0; i < size_community; ++ i) {
2840 genotype = sorted_community[i];
2841
2842 // Skip the dead organisms
2843 genotype->Recalculate(m_ctx, &test_info);
2844 if (genotype->GetFitness() == 0) continue;
2845
2846 int num_insts = m_world->GetHardwareManager().GetInstSet(genotype->GetGenome().GetInstSet()).GetSize();
2847
2848 vector<double> one_line_prob(num_insts, 0.0);
2849 vector< vector<double> > prob(length_genome, one_line_prob);
2850
2851 cout << endl << genotype->GetID() << endl;
2852
2853 if (given_genotypes.size() >= 1) {
2854 //////////////////////////////////////////////////
2855 // Look for a genotype that is closest to this one
2856
2857 cAnalyzeGenotype* min_depth_gen = given_genotypes[0];
2858 cAnalyzeGenotype* tmrca = mrca.find(gen_pair(genotype->GetID(), given_genotypes[0]->GetID()))->second;
2859 int min_depth_dist = genotype->GetDepth() + given_genotypes[0]->GetDepth() - 2 * tmrca->GetDepth();
2860
2861 for (unsigned int i = 1; i < given_genotypes.size() ; ++ i) {
2862 cAnalyzeGenotype* given_genotype = given_genotypes[i];
2863 cAnalyzeGenotype* tmrca = mrca.find(gen_pair(genotype->GetID(), given_genotype->GetID()))->second;
2864 int dist = genotype->GetDepth() + given_genotype->GetDepth() - 2 * tmrca->GetDepth();
2865
2866 if (dist < min_depth_dist) {
2867 min_depth_dist = dist;
2868 min_depth_gen = given_genotype;
2869 }
2870 }
2871
2872 const Genome& given_genome = min_depth_gen->GetGenome();
2873 const Genome& base_genome = genotype->GetGenome();
2874 Genome mod_genome(base_genome);
2875
2876 for (int line = 0; line < length_genome; ++ line) {
2877 int given_inst = given_genome.GetSequence()[line].GetOp();
2878 mod_genome = base_genome;
2879 mod_genome.GetSequence()[line].SetOp(given_inst);
2880 cAnalyzeGenotype test_genotype(m_world, mod_genome);
2881 test_genotype.Recalculate(m_ctx, &test_info);
2882
2883 // Only when given inst make the genotype alive
2884 if (test_genotype.GetFitness() > 0) {
2885 prob[line][given_inst] += pow(1 - 1.0/length_genome, min_depth_dist);
2886 }
2887 }
2888
2889 cpx_fp << genotype->GetID() << " " << min_depth_dist << " "
2890 << raw_dist.find(gen_pair(genotype->GetID(), min_depth_gen->GetID()))->second << " "
2891 << hamming_dist.find(gen_pair(genotype->GetID(), min_depth_gen->GetID()))->second << " ";
2892 } else {
2893 cpx_fp << genotype->GetID() << " ";
2894 }
2895
2896
2897 ///////////////////////////////////////////////////////////////////
2898 // Point mutation at all lines of code to look for neutral mutation
2899 // and the mutations that can make organism alive
2900
2901 cout << "Test point mutation." << endl;
2902 vector<bool> one_line_neutral(num_insts, false);
2903 vector< vector<bool> > neutral_mut(length_genome, one_line_neutral);
2904 vector< vector<bool> > alive_mut(length_genome, one_line_neutral);
2905
2906 genotype->Recalculate(m_ctx, &test_info);
2907 double base_fitness = genotype->GetFitness();
2908 cout << base_fitness << endl;
2909 const Genome& base_genome = genotype->GetGenome();
2910 Genome mod_genome(base_genome);
2911
2912 for (int line = 0; line < length_genome; ++ line) {
2913 int cur_inst = base_genome.GetSequence()[line].GetOp();
2914
2915 for (int mod_inst = 0; mod_inst < num_insts; ++ mod_inst) {
2916 mod_genome.GetSequence()[line].SetOp(mod_inst);
2917 cAnalyzeGenotype test_genotype(m_world, mod_genome);
2918 test_genotype.Recalculate(m_ctx, &test_info);
2919 if (test_genotype.GetFitness() >= base_fitness) {
2920 neutral_mut[line][mod_inst] = true;
2921 }
2922 if (test_genotype.GetFitness() > 0) {
2923 alive_mut[line][mod_inst] = true;
2924 }
2925 }
2926
2927 mod_genome.GetSequence()[line].SetOp(cur_inst);
2928 }
2929
2930
2931 /////////////////////////////////////////
2932 // Normalize the probability at each line
2933
2934 vector< vector<double> > prob_before_env(length_genome, one_line_prob);
2935
2936 for (int line = 0; line < length_genome; ++ line) {
2937 double cur_total_prob = 0.0;
2938 int num_alive = 0;
2939 for (int inst = 0; inst < num_insts; ++ inst) {
2940 if (alive_mut[line][inst] == true) {
2941 cur_total_prob += prob[line][inst];
2942 num_alive ++;
2943 }
2944 }
2945 if (cur_total_prob > 1) {
2946 cout << "Total probability at " << line << " is greater than 0." << endl;
2947 if (exit_on_error) exit(1);
2948 }
2949 double left_prob = 1 - cur_total_prob;
2950
2951 for (int inst = 0; inst < num_insts; ++ inst) {
2952 if (alive_mut[line][inst] == true) {
2953 prob_before_env[line][inst] = prob[line][inst] + left_prob / num_alive;
2954 } else {
2955 prob_before_env[line][inst] = 0;
2956 }
2957 }
2958
2959 }
2960
2961
2962 /////////////////////////////////
2963 // Calculate entropy of each line
2964
2965 vector<double> entropy(length_genome, 0.0);
2966 for (int line = 0; line < length_genome; ++ line) {
2967 double sum = 0;
2968 for (int inst = 0; inst < num_insts; ++ inst) {
2969 sum += prob_before_env[line][inst];
2970 if (prob_before_env[line][inst] > 0) {
2971 entropy[line] -= prob_before_env[line][inst] * log(prob_before_env[line][inst]) / log(num_insts*1.0);
2972 }
2973 }
2974 if (sum > 1.001 || sum < 0.999) {
2975 cout << "Sum of probability is not 1 at line " << line << endl;
2976 if (exit_on_error) exit(1);
2977 }
2978 }
2979
2980
2981 /////////////////////////////////////////////////////
2982 // Redistribute the probability of insts at each line
2983
2984 vector< vector<double> > prob_given_env(length_genome, one_line_prob);
2985
2986 for (int line = 0; line < length_genome; ++ line) {
2987 double total_prob = 0.0;
2988 int num_neutral = 0;
2989 for (int inst = 0; inst < num_insts; ++ inst) {
2990 if (neutral_mut[line][inst] == true) {
2991 num_neutral ++;
2992 total_prob += prob[line][inst];
2993 }
2994 }
2995
2996 double left = 1 - total_prob;
2997
2998 for (int inst = 0; inst < num_insts; ++ inst) {
2999 if (neutral_mut[line][inst] == true) {
3000 prob_given_env[line][inst] = prob[line][inst] + left / num_neutral;
3001 } else {
3002 prob_given_env[line][inst] = 0.0;
3003 }
3004 }
3005
3006 }
3007
3008
3009 ////////////////////////////////////////////////
3010 // Calculate the entropy given environment
3011
3012 vector<double> entropy_given_env(length_genome, 0.0);
3013 for (int line = 0; line < length_genome; ++ line) {
3014 double sum = 0;
3015 for (int inst = 0; inst < num_insts; ++ inst) {
3016 sum += prob_given_env[line][inst];
3017 if (prob_given_env[line][inst] > 0) {
3018 entropy_given_env[line] -= prob_given_env[line][inst] * log(prob_given_env[line][inst]) /
3019 log(num_insts*1.0);
3020 }
3021 }
3022 if (sum > 1.001 || sum < 0.999) {
3023 cout << "Sum of probability is not 1 at line " << line << " " << sum << endl;
3024 if (exit_on_error) exit(1);
3025 }
3026 }
3027
3028
3029 ///////////////////////////////////////////////////////////////////////////
3030 // Calculate the information between genotype and env given other genotypes
3031 double information = 0.0;
3032 double entropy_before = 0.0;
3033 double entropy_after = 0.0;
3034 for (int line = 0; line < length_genome; ++ line) {
3035 entropy_before += entropy[line];
3036 entropy_after += entropy_given_env[line];
3037
3038 if (entropy[line] >= entropy_given_env[line]) {
3039 information += entropy[line] - entropy_given_env[line];
3040 } else { // Negative information is because given condition is not related with this genotype ...
3041
3042 // Count the number of insts that can make genotype alive
3043 int num_inst_alive = 0;
3044 for (int inst = 0; inst < num_insts; ++ inst) {
3045 if (alive_mut[line][inst] == true) {
3046 num_inst_alive ++;
3047 }
3048 }
3049
3050 double entropy_before = - log(1.0/num_inst_alive) / log(num_insts*1.0);
3051 information += entropy_before - entropy_given_env[line];
3052 if (information < 0) {
3053 cout << "Negative information at site " << line << endl;
3054 if (exit_on_error) exit(1);
3055 }
3056 }
3057
3058 }
3059 complexity += information;
3060
3061 cpx_fp << entropy_before << " " << entropy_after << " " << information << " " << complexity << " ";
3062 genotype->PrintTasks(cpx_fp, 0, -1);
3063 cpx_fp << endl;
3064
3065
3066 /////////////////////////////////////////////////////////////
3067 // This genotype becomes the given condition of next genotype
3068
3069 given_genotypes.push_back(genotype);
3070 }
3071
3072 m_world->GetDataFileManager().Remove(filename);
3073 return;
3074 }
3075
3076
3077 // Calculate Edit Distance stats for all pairs of organisms across the population.
CommandPrintDistances(cString cur_string)3078 void cAnalyze::CommandPrintDistances(cString cur_string)
3079 {
3080 cout << "Calculating Edit Distance between all pairs of genotypes." << endl;
3081
3082 // Get the maximum distance we care about
3083 int dist_threshold = cur_string.PopWord().AsInt();
3084
3085 // Get the file name that saves the result
3086 cString filename = cur_string.PopWord();
3087 if (filename.IsEmpty()) {
3088 filename = "edit_distance.dat";
3089 }
3090
3091 ofstream & fout = m_world->GetDataFileOFStream(filename);
3092
3093 fout << "# All pairs edit distance" << endl;
3094 fout << "# 1: Num organism pairs" << endl;
3095 fout << "# 2: Mean distance computed using (n*(n-1)/2) as all pairs." << endl;
3096 fout << "# 3: Mean distance" << endl;
3097 fout << "# 4: Max distance" << endl;
3098 fout << "# 5: Frac distances above threshold (" << dist_threshold << ")" << endl;
3099 fout << endl;
3100
3101 // Loop through all pairs of organisms.
3102 int dist_total = 0;
3103 int dist_max = 0;
3104 int pair_count = 0;
3105 int threshold_pair_count = 0;
3106 double count = 0;
3107
3108 cAnalyzeGenotype * genotype1 = NULL;
3109 cAnalyzeGenotype * genotype2 = NULL;
3110 tListIterator<cAnalyzeGenotype> batch_it1(batch[cur_batch].List());
3111
3112 int watermark = 0;
3113
3114 while ((genotype1 = batch_it1.Next()) != NULL) {
3115 count ++;
3116 const int gen1_count = genotype1->GetNumCPUs();
3117
3118 // Pair this genotype with itself for a distance of 0.
3119 pair_count += gen1_count * (gen1_count - 1) / 2;
3120
3121 // Loop through the other genotypes this one can be paired with.
3122 tListIterator<cAnalyzeGenotype> batch_it2(batch_it1);
3123 while ((genotype2 = batch_it2.Next()) != NULL) {
3124 const int gen2_count = genotype2->GetNumCPUs();
3125 const int cur_pairs = gen1_count * gen2_count;
3126 const int cur_dist = Sequence::FindEditDistance(genotype1->GetGenome().GetSequence(), genotype2->GetGenome().GetSequence());
3127 dist_total += cur_pairs * cur_dist;
3128 if (cur_dist > dist_max) dist_max = cur_dist;
3129 pair_count += cur_pairs;
3130 if (cur_dist >= dist_threshold) threshold_pair_count += cur_pairs;
3131
3132 if (pair_count > watermark) {
3133 cout << watermark << endl;
3134 watermark += 100000;
3135 }
3136 }
3137 }
3138
3139 count = (count * (count-1) ) /2;
3140 fout << pair_count << " "
3141 << ((double) dist_total) / count << " "
3142 << ((double) dist_total) / (double) pair_count << " "
3143 << dist_max << " "
3144 << ((double) threshold_pair_count) / (double) pair_count << " "
3145 << endl;
3146
3147 return;
3148 }
3149
3150
3151 // Calculate various stats for trees in population.
CommandPrintTreeStats(cString cur_string)3152 void cAnalyze::CommandPrintTreeStats(cString cur_string)
3153 {
3154 if (m_world->GetVerbosity() >= VERBOSE_ON) cout << "Printing tree stats for batch "
3155 << cur_batch << endl;
3156 else cout << "Printing tree stats..." << endl;
3157
3158 // Load in the variables...
3159 cString filename("tree_stats.dat");
3160 if (cur_string.GetSize() != 0) filename = cur_string.PopWord();
3161
3162 ofstream& fp = m_world->GetDataFileOFStream(filename);
3163
3164 fp << "# Legend:" << endl;
3165 fp << "# 1: Average cumulative stemminess" << endl;
3166 fp << endl;
3167
3168 cAnalyzeTreeStats_CumulativeStemminess agts(m_world);
3169 agts.AnalyzeBatchTree(batch[cur_batch].List());
3170
3171 fp << agts.AverageStemminess();
3172 fp << endl;
3173 }
3174
3175
3176 // Calculate cumulative stemmines for trees in population.
CommandPrintCumulativeStemminess(cString cur_string)3177 void cAnalyze::CommandPrintCumulativeStemminess(cString cur_string)
3178 {
3179 if (m_world->GetVerbosity() >= VERBOSE_ON) cout << "Printing cumulative stemmines for batch "
3180 << cur_batch << endl;
3181 else cout << "Printing cumulative stemmines..." << endl;
3182
3183 // Load in the variables...
3184 cString filename("cumulative_stemminess.dat");
3185 if (cur_string.GetSize() != 0) filename = cur_string.PopWord();
3186
3187 ofstream& fp = m_world->GetDataFileOFStream(filename);
3188
3189 fp << "# Legend:" << endl;
3190 fp << "# 1: Average cumulative stemminess" << endl;
3191 fp << endl;
3192
3193 cAnalyzeTreeStats_CumulativeStemminess agts(m_world);
3194 agts.AnalyzeBatchTree(batch[cur_batch].List());
3195
3196 fp << agts.AverageStemminess();
3197 fp << endl;
3198 }
3199
3200
3201
3202 // Calculate Pybus-Harvey gamma statistic for trees in population.
CommandPrintGamma(cString cur_string)3203 void cAnalyze::CommandPrintGamma(cString cur_string)
3204 {
3205 if (m_world->GetVerbosity() >= VERBOSE_ON) cout << "Printing Pybus-Harvey gamma statistic for batch "
3206 << cur_batch << endl;
3207 else cout << "Printing Pybus-Harvey gamma statistic..." << endl;
3208
3209 // Load in the variables...
3210 int end_time = (cur_string.GetSize()) ? cur_string.PopWord().AsInt() : -1; // #1
3211 if (end_time < 0) {
3212 cout << "Error: end_time (argument 1) must be specified as nonzero." << endl;
3213 return;
3214 }
3215
3216 cString filename("gamma.dat");
3217 if (cur_string.GetSize() != 0) filename = cur_string.PopWord();
3218
3219 cString lineage_thru_time_fname("");
3220 if (cur_string.GetSize() != 0) lineage_thru_time_fname = cur_string.PopWord();
3221
3222 /*
3223 I've hardwired the option 'furcation_time_convention' to '1'.
3224
3225 'furcation_time_convention' refers to the time at which a 'speciation' event
3226 occurs (I'm not sure 'speciation' is the right word for this). If a parent
3227 genotype produces two distinct surviving lineages, then the time of
3228 speciation could be:
3229 - 1: The parent genotype's birth time
3230 - 2: The elder child genotype's birth time
3231 - 3: The younger child genotype's birth time
3232
3233 @kgn
3234 */
3235 // int furcation_time_convention = (cur_string.GetSize()) ? cur_string.PopWord().AsInt() : 1;
3236 int furcation_time_convention = 1;
3237
3238 ofstream& fp = m_world->GetDataFileOFStream(filename);
3239
3240 fp << "# Legend:" << endl;
3241 fp << "# 1: Pybus-Harvey gamma statistic" << endl;
3242 fp << endl;
3243
3244 cAnalyzeTreeStats_Gamma atsg(m_world);
3245 atsg.AnalyzeBatch(batch[cur_batch].List(), end_time, furcation_time_convention);
3246
3247 fp << atsg.Gamma();
3248 fp << endl;
3249
3250 if(lineage_thru_time_fname != ""){
3251 ofstream& ltt_fp = m_world->GetDataFileOFStream(lineage_thru_time_fname);
3252
3253 ltt_fp << "# Legend:" << endl;
3254 ltt_fp << "# 1: num_lineages" << endl;
3255 ltt_fp << "# 2: furcation_time" << endl;
3256 ltt_fp << endl;
3257
3258 int size = atsg.FurcationTimes().GetSize();
3259 for(int i = 0; i < size; i++){
3260 ltt_fp << i+2 << " " << atsg.FurcationTimes()[i] << endl;
3261 }
3262 }
3263 }
3264
3265
AnalyzeCommunityComplexity(cString cur_string)3266 void cAnalyze::AnalyzeCommunityComplexity(cString cur_string)
3267 {
3268 /////////////////////////////////////////////////////////////////////
3269 // Calculate the mutual information between community and environment
3270 /////////////////////////////////////////////////////////////////////
3271
3272 cout << "Analyze community complexity of current population about environment with Charles method ...\n";
3273
3274 // Get the number of genotypes that are gonna be analyzed.
3275 int max_genotypes = cur_string.PopWord().AsInt(); // If it is 0, we sample
3276 //two genotypes for each task.
3277
3278 // Get update
3279 int update = cur_string.PopWord().AsInt();
3280
3281 // Get the directory
3282 cString dir = cur_string.PopWord();
3283 cString defaultDir = "community_cpx/";
3284 cString directory = PopDirectory(dir, defaultDir);
3285
3286 // Get the file name that saves the result
3287 cString filename = cur_string.PopWord();
3288 if (filename.IsEmpty()) {
3289 filename = "community.complexity.dat";
3290 }
3291
3292 filename.Set("%s%s", static_cast<const char*>(directory), static_cast<const char*>(filename));
3293 ofstream& cpx_fp = m_world->GetDataFileOFStream(filename);
3294
3295 cpx_fp << "# Legend:" << endl;
3296 cpx_fp << "# 1: Genotype ID" << endl;
3297 cpx_fp << "# 2: Entropy given Known Genotypes" << endl;
3298 cpx_fp << "# 3: Entropy given Both Known Genotypes and Env" << endl;
3299 cpx_fp << "# 4: New Information about Environment" << endl;
3300 cpx_fp << "# 5: Total Complexity" << endl;
3301 cpx_fp << "# 6: Hamming Distance to Closest Given Genotype" << endl;
3302 cpx_fp << "# 7: Total Hamming Distance to Closest Neighbor" << endl;
3303 cpx_fp << "# 8: Number of Organisms" << endl;
3304 cpx_fp << "# 9: Total Number of Organisms" << endl;
3305 cpx_fp << "# 10 - : Tasks Implemented" << endl;
3306 cpx_fp << endl;
3307
3308 ///////////////////////
3309 // Backup test CPU data
3310 cCPUTestInfo test_info;
3311 // No choice of use_resources for this analyze command...
3312 test_info.SetResourceOptions(RES_CONSTANT, m_resources, update, m_resource_time_spent_offset);
3313
3314 vector<cAnalyzeGenotype *> community;
3315 cAnalyzeGenotype * genotype = NULL;
3316 tListIterator<cAnalyzeGenotype> batch_it(batch[cur_batch].List());
3317
3318
3319 if (max_genotypes > 0) {
3320
3321 ///////////////////////////////////////////////////////////////////////
3322 // Choose the first n most abundant genotypes and put them in community
3323
3324 while (((genotype = batch_it.Next()) != NULL) && (community.size() < static_cast<unsigned int>(max_genotypes))) {
3325 community.push_back(genotype);
3326 }
3327 } else if (max_genotypes == 0) {
3328
3329 /////////////////////////////////////
3330 // Choose two genotypes for each task
3331
3332 genotype = batch_it.Next();
3333 if (genotype == NULL) {
3334 m_world->GetDataFileManager().Remove(filename);
3335 return;
3336 }
3337 genotype->Recalculate(m_ctx, &test_info);
3338 int num_tasks = genotype->GetNumTasks();
3339 vector< vector<cAnalyzeGenotype *> > genotype_class(num_tasks);
3340 do {
3341 for (int task_id = 0; task_id < num_tasks; ++ task_id) {
3342 int count = genotype->GetTaskCount(task_id);
3343 if (count > 0) {
3344 genotype_class[task_id].push_back(genotype);
3345 }
3346 }
3347 } while ((genotype = batch_it.Next()) != NULL);
3348
3349 cRandom random;
3350 for (int task_id = 0; task_id < num_tasks; ++ task_id) {
3351 int num_genotype = genotype_class[task_id].size();
3352 if (num_genotype > 0) {
3353 int index = random.GetUInt(num_genotype);
3354 community.push_back(genotype_class[task_id][index]);
3355 index = random.GetUInt(num_genotype);
3356 community.push_back(genotype_class[task_id][index]);
3357 } else {
3358 // Pick up a class that is not empty
3359 int class_id = random.GetUInt(num_tasks);
3360 while (genotype_class[class_id].size() == 0) {
3361 class_id ++;
3362 if (class_id >= num_tasks) {
3363 class_id = 0;
3364 }
3365 }
3366 int num_genotype = genotype_class[class_id].size();
3367 int index = random.GetUInt(num_genotype);
3368 community.push_back(genotype_class[task_id][index]);
3369 index = random.GetUInt(num_genotype);
3370 community.push_back(genotype_class[task_id][index]);
3371 }
3372 }
3373
3374 }
3375
3376 ////////////////////////////////////////////////////
3377 // Test point mutation of each genotype in community
3378
3379 map<int, tMatrix<double> > point_mut;
3380 int size_community = community.size();
3381 int length_genome = 0;
3382 if (size_community > 1) {
3383 length_genome = community[0]->GetLength();
3384 }
3385
3386 for (int i = 0; i < size_community; ++ i) {
3387 genotype = community[i];
3388
3389 ///////////////////////////////////////////////////////////////////
3390 // Point mutation at all lines of code to look for neutral mutation
3391 cout << "Test point mutation for genotype " << genotype->GetID() << endl;
3392
3393 genotype->Recalculate(m_ctx, &test_info);
3394 const Genome& base_genome = genotype->GetGenome();
3395 const Sequence& base_seq = base_genome.GetSequence();
3396 Genome mod_genome(base_genome);
3397 Sequence& seq = mod_genome.GetSequence();
3398 const int num_insts = m_world->GetHardwareManager().GetInstSet(base_genome.GetInstSet()).GetSize();
3399 double base_fitness = genotype->GetFitness();
3400
3401 tMatrix<double> prob(length_genome, num_insts);
3402
3403
3404 for (int line = 0; line < length_genome; ++ line) {
3405 int cur_inst = base_seq[line].GetOp();
3406 int num_neutral = 0;
3407
3408 for (int mod_inst = 0; mod_inst < num_insts; ++ mod_inst) {
3409 seq[line].SetOp(mod_inst);
3410 cAnalyzeGenotype test_genotype(m_world, mod_genome);
3411 test_genotype.Recalculate(m_ctx, &test_info);
3412 if (test_genotype.GetFitness() >= base_fitness) {
3413 prob[line][mod_inst] = 1.0;
3414 num_neutral ++;
3415 } else {
3416 prob[line][mod_inst] = 0.0;
3417 }
3418 }
3419
3420 for (int mod_inst = 0; mod_inst < num_insts; ++ mod_inst) {
3421 prob[line][mod_inst] /= num_neutral;
3422 }
3423
3424
3425 seq[line].SetOp(cur_inst);
3426 }
3427
3428 point_mut.insert(make_pair(genotype->GetID(), prob));
3429 }
3430
3431 //////////////////////////////////////
3432 // Loop through genotypes in community
3433
3434 double complexity = 0.0;
3435 int total_dist = 0;
3436 int total_cpus = 0;
3437 vector<cAnalyzeGenotype *> given_genotypes;
3438
3439 ////////////////////////////////////////
3440 // New information in the first gentoype
3441 genotype = community[0];
3442 double oo_initial_entropy = length_genome;
3443 double oo_conditional_entropy = 0.0;
3444 tMatrix<double> this_prob = point_mut.find(genotype->GetID())->second;
3445 const int num_insts = m_world->GetHardwareManager().GetInstSet(genotype->GetGenome().GetInstSet()).GetSize();
3446
3447 for (int line = 0; line < length_genome; ++ line) {
3448 double oneline_entropy = 0.0;
3449 for (int inst = 0; inst < num_insts; ++ inst) {
3450 if (this_prob[line][inst] > 0) {
3451 oneline_entropy -= this_prob[line][inst] * (log(this_prob[line][inst]) /
3452 log(1.0*num_insts));
3453 }
3454 }
3455 oo_conditional_entropy += oneline_entropy;
3456 }
3457
3458 double new_info = oo_initial_entropy - oo_conditional_entropy;
3459 complexity += new_info;
3460 given_genotypes.push_back(genotype);
3461
3462 cpx_fp << genotype->GetID() << " "
3463 << oo_initial_entropy << " "
3464 << oo_conditional_entropy << " "
3465 << new_info << " "
3466 << complexity << " "
3467 << "0 0" << " ";
3468 int num_cpus = genotype->GetNumCPUs();
3469 total_cpus += num_cpus;
3470 cpx_fp << num_cpus << " " << total_cpus << " ";
3471 genotype->Recalculate(m_ctx, &test_info);
3472 genotype->PrintTasks(cpx_fp, 0, -1);
3473 cpx_fp << endl;
3474
3475
3476 //////////////////////////////////////////////////////
3477 // New information in other genotypes in community ...
3478 for (int i = 1; i < size_community; ++ i) {
3479 genotype = community[i];
3480 if (genotype->GetLength() != length_genome) {
3481 cerr << "Genotypes in the community do not same genome length.\n";
3482 if (exit_on_error) exit(1);
3483 }
3484
3485 // Skip the dead organisms
3486 genotype->Recalculate(m_ctx, &test_info);
3487 cout << genotype->GetID() << " " << genotype->GetFitness() << endl;
3488 if (genotype->GetFitness() == 0) {
3489 continue;
3490 }
3491
3492 double min_new_info = length_genome;
3493 double oo_initial_entropy = 0.0;
3494 double oo_conditional_entropy = 0.0;
3495 cAnalyzeGenotype* used_genotype = NULL;
3496 tMatrix<double> this_prob = point_mut.find(genotype->GetID())->second;
3497
3498 // For any given genotype, calculate the new information in genotype
3499 for (unsigned int j = 0; j < given_genotypes.size(); ++ j) {
3500
3501 tMatrix<double> given_prob = point_mut.find(given_genotypes[j]->GetID())->second;
3502 double new_info = 0.0;
3503 double total_initial_entropy = 0.0;
3504 double total_conditional_entropy = 0.0;
3505
3506 for (int line = 0; line < length_genome; ++ line) {
3507
3508 // H(genotype|known_genotype)
3509 double prob_overlap = 0;
3510 for (int inst = 0; inst < num_insts; ++ inst) {
3511 if (this_prob[line][inst] < given_prob[line][inst]) {
3512 prob_overlap += this_prob[line][inst];
3513 } else {
3514 prob_overlap += given_prob[line][inst];
3515 }
3516 }
3517
3518 double given_site_entropy = 0.0;
3519 for (int inst = 0; inst < num_insts; ++ inst) {
3520 if (given_prob[line][inst] > 0) {
3521 given_site_entropy -= given_prob[line][inst] * (log(given_prob[line][inst]) /
3522 log(1.0*num_insts));
3523 }
3524 }
3525
3526
3527 double entropy_overlap = 0.0;
3528 if (prob_overlap > 0 && (1 - prob_overlap > 0)) {
3529 entropy_overlap = (- prob_overlap * log(prob_overlap)
3530 - (1-prob_overlap) * log(1 - prob_overlap)) / log(1.0*num_insts);
3531 } else {
3532 entropy_overlap = 0;
3533 }
3534
3535 double initial_entropy = prob_overlap * given_site_entropy
3536 + (1 - prob_overlap) * 1 + entropy_overlap;
3537 total_initial_entropy += initial_entropy;
3538
3539 // H(genotype|E, known_genotype) = H(genotype|Env)
3540 double conditional_entropy = 0.0;
3541 for (int inst = 0; inst < num_insts; ++ inst) {
3542 if (this_prob[line][inst] > 0) {
3543 conditional_entropy -= this_prob[line][inst] * (log(this_prob[line][inst]) /
3544 log(1.0*num_insts));
3545 }
3546 }
3547 total_conditional_entropy += conditional_entropy;
3548
3549 if (conditional_entropy > initial_entropy + 0.00001) {
3550 cerr << "Negative Information.\n";
3551 cout << line << endl;
3552 for (int inst = 0; inst < num_insts; ++ inst) {
3553 cout << this_prob[line][inst] << " ";
3554 }
3555 cout << endl;
3556 for (int inst = 0; inst < num_insts; ++ inst) {
3557 cout << given_prob[line][inst] << " ";
3558 }
3559 cout << endl;
3560
3561 if (exit_on_error) exit(1);
3562 }
3563
3564 new_info += initial_entropy - conditional_entropy;
3565 }
3566
3567 if (new_info < min_new_info) {
3568 min_new_info = new_info;
3569 oo_initial_entropy = total_initial_entropy;
3570 oo_conditional_entropy = total_conditional_entropy;
3571 used_genotype = given_genotypes[j];
3572 cout << " " << "New closest genotype " << used_genotype->GetID()
3573 << " " << new_info << endl;
3574 }
3575
3576 }
3577 complexity += min_new_info;
3578 cpx_fp << genotype->GetID() << " "
3579 << oo_initial_entropy << " "
3580 << oo_conditional_entropy << " "
3581 << min_new_info << " " << complexity << " ";
3582
3583 int hamm_dist = Sequence::FindHammingDistance(genotype->GetGenome().GetSequence(), used_genotype->GetGenome().GetSequence());
3584 total_dist += hamm_dist;
3585 cpx_fp << hamm_dist << " " << total_dist << " ";
3586
3587 int num_cpus = genotype->GetNumCPUs();
3588 total_cpus += num_cpus;
3589 cpx_fp << num_cpus << " " << total_cpus << " ";
3590
3591
3592 genotype->PrintTasks(cpx_fp, 0, -1);
3593 cpx_fp << endl;
3594 given_genotypes.push_back(genotype);
3595 }
3596
3597 m_world->GetDataFileManager().Remove(filename);
3598 return;
3599 }
3600
3601 /* prints grid with what the fitness of an org in each range box would be given the resource levels
3602 at given update (10000 by default) SLG*/
CommandPrintResourceFitnessMap(cString cur_string)3603 void cAnalyze::CommandPrintResourceFitnessMap(cString cur_string)
3604 {
3605 cout << "creating resource fitness map...\n";
3606 // at what update do we want to use the resource concentrations from?
3607 int update = 10000;
3608 if (cur_string.GetSize() != 0) update = cur_string.PopWord().AsInt();
3609 // what file to write data to
3610 cString filename("resourcefitmap.dat");
3611 if (cur_string.GetSize() != 0) filename = cur_string.PopWord();
3612 ofstream& fp = m_world->GetDataFileOFStream(filename);
3613
3614 int f1=-1, f2=-1, rangecount[2]={0,0}, threshcount[2]={0,0};
3615 double f1Max = 0.0, f1Min = 0.0, f2Max = 0.0, f2Min = 0.0;
3616
3617 // first need to find out how many thresh and range resources there are on each function
3618 // NOTE! this only works for 2-obj. problems right now!
3619 for (int i=0; i<m_world->GetEnvironment().GetReactionLib().GetSize(); i++)
3620 {
3621 cReaction* react = m_world->GetEnvironment().GetReactionLib().GetReaction(i);
3622 int fun = react->GetTask()->GetArguments().GetInt(0);
3623 double thresh = react->GetTask()->GetArguments().GetDouble(3);
3624 double threshMax = react->GetTask()->GetArguments().GetDouble(4);
3625 if (i==0)
3626 {
3627 f1 = fun;
3628 f1Max = react->GetTask()->GetArguments().GetDouble(1);
3629 f1Min = react->GetTask()->GetArguments().GetDouble(2);
3630 }
3631
3632 if (fun==f1 && threshMax>0)
3633 rangecount[0]++;
3634 else if (fun==f1 && thresh>=0)
3635 threshcount[0]++;
3636 else if (fun!=f1 && threshcount[1]==0 && rangecount[1]==0)
3637 {
3638 f2=fun;
3639 f2Max = react->GetTask()->GetArguments().GetDouble(1);
3640 f2Min = react->GetTask()->GetArguments().GetDouble(2);
3641 }
3642 if (fun==f2 && threshMax>0)
3643 rangecount[1]++;
3644 else if (fun==f2 && thresh>=0)
3645 threshcount[1]++;
3646
3647 }
3648 int fsize[2];
3649 fsize[0] = rangecount[0];
3650 if (threshcount[0]>fsize[0])
3651 fsize[0]=threshcount[0];
3652 fsize[1]=rangecount[1];
3653 if (threshcount[1]>fsize[1])
3654 fsize[1]=threshcount[1];
3655
3656 cout << "f1 size: " << fsize[0] << " f2 size: " << fsize[1] << endl;
3657 double stepsize[2];
3658 stepsize[0] = (f1Max-f1Min)/fsize[0];
3659 stepsize[1] = (f2Max-f2Min)/fsize[1];
3660
3661 // this is our grid where we are going to calculate the fitness of an org in each box
3662 // given current resource contributions
3663 tArray< tArray<double> > fitnesses(fsize[0]+1);
3664 for (int i=0; i<fitnesses.GetSize(); i++)
3665 fitnesses[i].Resize(fsize[1]+1,1);
3666
3667 // Get the resources for the specified update
3668 tArray<double> resources;
3669 if (!m_resources || !m_resources->GetResourceLevelsForUpdate(update, resources, true)) {
3670 cout << "error: did not find the desired update in resource history" << endl;
3671 return;
3672 }
3673
3674 cout << "creating map using resources at update: " << update << endl;
3675
3676 for (int i = 0; i < m_world->GetEnvironment().GetResourceLib().GetSize(); i++) {
3677
3678 // first have to find reaction that matches this resource, so compare names
3679 cString name = m_world->GetEnvironment().GetResourceLib().GetResource(i)->GetName();
3680 cReaction* react = NULL;
3681 for (int j = 0; j < m_world->GetEnvironment().GetReactionLib().GetSize(); j++) {
3682 if (m_world->GetEnvironment().GetReactionLib().GetReaction(j)->GetProcesses().GetPos(0)->GetResource()->GetName() == name) {
3683 react = m_world->GetEnvironment().GetReactionLib().GetReaction(j);
3684 j = m_world->GetEnvironment().GetReactionLib().GetSize();
3685 }
3686 }
3687 if (react == NULL) continue;
3688
3689 // now have proper reaction, pull all the data need from the reaction
3690 double frac = react->GetProcesses().GetPos(0)->GetMaxFraction();
3691 double max = react->GetProcesses().GetPos(0)->GetMaxNumber();
3692 double min = react->GetProcesses().GetPos(0)->GetMinNumber();
3693 double value = react->GetValue();
3694 int fun = react->GetTask()->GetArguments().GetInt(0);
3695
3696 if (fun == f1) fun = 0;
3697 else if (fun == f2) fun = 1;
3698 else cout << "function is neither f1 or f2! doh!\n";
3699
3700 double thresh = react->GetTask()->GetArguments().GetDouble(3);
3701 double threshMax = react->GetTask()->GetArguments().GetDouble(4);
3702 //double maxFx = react->GetTask()->GetArguments().GetDouble(1);
3703 //double minFx = react->GetTask()->GetArguments().GetDouble(2);
3704
3705 // and pull the concentration of this resource from resource object loaded from resource.dat
3706 double concentration = resources[i];
3707
3708 // calculate the merit based on this resource concentration, fraction, and value
3709 double mer = concentration * frac * value;
3710 if (mer > max)
3711 mer=max;
3712 else if (mer < min)
3713 mer=0;
3714 double threshMaxAdjusted, threshAdjusted;
3715 // if this is a range reaction, need to update one entire row or column in fitnesses array
3716 if (threshMax>0)
3717 {
3718 for (int k=0; k<fsize[fun]; k++)
3719 {
3720 // function f1
3721 if (fun==0)
3722 {
3723 threshMaxAdjusted = threshMax*(f1Max-f1Min) + f1Min;
3724 threshAdjusted = thresh*(f1Max-f1Min) + f1Min;
3725 double pos = stepsize[0]*k+f1Min+stepsize[0]/2.0;
3726 if (threshAdjusted <= pos && threshMaxAdjusted >= pos)
3727 {
3728 for (int z=0; z<fsize[1]+1; z++)
3729 fitnesses[k+1][z] *= pow(2,mer);
3730 // actually solutions right at min possible get range above them too
3731 if (k==0)
3732 for (int z=0; z<fsize[1]+1; z++)
3733 fitnesses[0][z] *= pow(2,mer);
3734 }
3735 }
3736 // function f2
3737 else
3738 {
3739 threshMaxAdjusted = threshMax*(f2Max-f2Min) + f2Min;
3740 threshAdjusted = thresh*(f2Max-f2Min) + f2Min;
3741 double pos = stepsize[1]*k+f1Min+stepsize[1]/2.0;
3742 if (threshAdjusted <= pos && threshMaxAdjusted >= pos)
3743 {
3744 for (int z=0; z<fsize[0]+1; z++)
3745 fitnesses[z][k+1] *= pow(2,mer);
3746 // actually solutions right at min possible get range above them too
3747 if (k==0)
3748 for (int z=0; z<fsize[0]+1; z++)
3749 fitnesses[z][0] *= pow(2,mer);
3750 }
3751 }
3752 }
3753 }
3754 // threshold reaction, need to update all rows or columns above given threshold
3755 else if (thresh>=0)
3756 {
3757 for (int k=0; k<fsize[fun]+1; k++)
3758 {
3759 // function f1
3760 if (fun==0)
3761 {
3762 threshAdjusted = thresh*(f1Max-f1Min) + f1Min;
3763 double pos = stepsize[0]*k+f1Min-stepsize[0]/2.0;
3764 if (threshAdjusted >= pos)
3765 {
3766 for (int z=0; z<fsize[1]+1; z++)
3767 {
3768 fitnesses[k][z] *= pow(2,mer);
3769 }
3770 }
3771
3772 }
3773 // function f2
3774 else
3775 {
3776 threshAdjusted = thresh*(f2Max-f2Min) + f2Min;
3777 double pos = stepsize[1]*k+f1Min-stepsize[1]/2.0;
3778 if (threshAdjusted >= pos)
3779 {
3780 for (int z=0; z<fsize[0]+1; z++)
3781 fitnesses[z][k] *= pow(2,mer);
3782 }
3783 }
3784 }
3785 }
3786
3787 }
3788
3789 for (int i=fitnesses[0].GetSize()-1; i>=0; i--)
3790 {
3791 for (int j=0; j<fitnesses.GetSize(); j++)
3792 fp << fitnesses[j][i] << " ";
3793 fp << endl;
3794 }
3795 }
3796
3797
3798 //@ MRR @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
CommandPairwiseEntropy(cString cur_string)3799 void cAnalyze::CommandPairwiseEntropy(cString cur_string)
3800 {
3801 if (m_world->GetVerbosity() >= VERBOSE_ON)
3802 cout << "Finding pairwise entropy on batch " << cur_batch << endl;
3803 else
3804 cout << "Finding pairwise entropy..." << endl;
3805
3806 cout << "@MRR-> This command is being tested!" << endl;
3807
3808 cString directory = PopDirectory(cur_string, "pairwise_data/");
3809 if (m_world->GetVerbosity() >= VERBOSE_ON)
3810 cout << "\tUsing directory: " << directory << endl;
3811 double mu = cur_string.PopWord().AsDouble();
3812 if (m_world->GetVerbosity() >= VERBOSE_ON)
3813 cout << "\tUsing mu=" << mu << endl;
3814
3815 tListIterator<cAnalyzeGenotype> batch_it(batch[cur_batch].List());
3816 cAnalyzeGenotype * genotype = batch_it.Next();
3817
3818 cout << genotype->GetName() << endl;
3819
3820 while(genotype != NULL)
3821 {
3822 cString genName = genotype->GetName();
3823
3824 if (m_world->GetVerbosity() >= VERBOSE_ON)
3825 cout << "\t...on genotype " << genName << endl;
3826
3827 cString filename;
3828 filename.Set("%spairdata.%s.dat", static_cast<const char*>(directory),
3829 static_cast<const char*>(genName));
3830
3831 // @DMB -- ofstream& fp = m_world->GetDataFileOFStream(filename);
3832
3833 if (m_world->GetVerbosity() >= VERBOSE_ON)
3834 cout << "\t\t...with filename: " << filename << endl;
3835
3836 cout << "# Pairwise Entropy Information" << endl;
3837
3838 tMatrix<double> pairdata = AnalyzeEntropyPairs(genotype, mu);
3839
3840 cout << pairdata.GetNumRows() << endl;
3841
3842 for (int i=0; i < pairdata.GetNumRows(); i++){
3843 for (int j=0; j < pairdata.GetNumCols(); j++)
3844 cout << pairdata[i][j] << " ";
3845 cout << endl;
3846 }
3847 m_world->GetDataFileManager().Remove(filename);
3848 genotype = batch_it.Next();
3849 }
3850 }
3851
3852
3853
3854
3855
3856 // This command will take the current batch and analyze how well organisms
3857 // cross-over with each other, both across the population and between mates.
3858
AnalyzeMateSelection(cString cur_string)3859 void cAnalyze::AnalyzeMateSelection(cString cur_string)
3860 {
3861 int sample_size = 10000;
3862 if (cur_string.GetSize() != 0) sample_size = cur_string.PopWord().AsInt();
3863 cString filename("none");
3864 if (cur_string.GetSize() != 0) filename = cur_string.PopWord();
3865 double min_swap_frac = 0.0;
3866 if (cur_string.GetSize() != 0) min_swap_frac=cur_string.PopWord().AsDouble();
3867 double max_swap_frac = 1.0 - min_swap_frac;
3868
3869 cout << "Analyzing Mate Selection... " << endl;
3870
3871 // Do some quick tests before moving on...
3872 if (min_swap_frac < 0.0 || min_swap_frac >= 0.5) {
3873 cerr << "ERROR: Minimum swap fraction out of range [0.0, 0.5)." << endl;
3874 }
3875
3876 // Next, we create an array that contains pointers to all of the organisms
3877 // in this batch. Note that we want to select genotypes based on their
3878 // abundance, so they will have one entry in the array per organism. Note
3879 // that we only consider viable genotypes.
3880
3881 // Start by counting the total number of organisms (and do other such
3882 // data collection...
3883 tHashMap<int, int> mate_id_counts;
3884
3885 int org_count = 0;
3886 int gen_count = 0;
3887 cAnalyzeGenotype * genotype = NULL;
3888 tListIterator<cAnalyzeGenotype> list_it(batch[cur_batch].List());
3889 while ((genotype = list_it.Next()) != NULL) {
3890 if (genotype->GetViable() == false || genotype->GetNumCPUs() == 0) {
3891 continue;
3892 }
3893 gen_count++;
3894 org_count += genotype->GetNumCPUs();
3895
3896 // Keep track of how many organisms have each mate id...
3897 int mate_id = genotype->GetMateID();
3898 int count = 0;
3899 mate_id_counts.Find(mate_id, count);
3900 count += genotype->GetNumCPUs();
3901 mate_id_counts.Set(mate_id, count);
3902 }
3903
3904 // Create an array of the correct size.
3905 tArray<cAnalyzeGenotype *> genotype_array(org_count);
3906
3907 // And insert all of the organisms into the array.
3908 int cur_pos = 0;
3909 while ((genotype = list_it.Next()) != NULL) {
3910 if (genotype->GetViable() == false) continue;
3911 int cur_count = genotype->GetNumCPUs();
3912 for (int i = 0; i < cur_count; i++) {
3913 genotype_array[cur_pos++] = genotype;
3914 }
3915 }
3916
3917
3918 // Setup some variables to collect statistics.
3919 int total_matches_tested = 0;
3920 int fail_count = 0;
3921 int match_fail_count = 0;
3922
3923 // Create a Test CPU
3924 cTestCPU* testcpu = m_world->GetHardwareManager().CreateTestCPU(m_ctx);
3925
3926 // Loop through all of the tests, picking random organisms each time and
3927 // performing a random cross test.
3928 cAnalyzeGenotype * genotype2 = NULL;
3929 for (int test_id = 0; test_id < sample_size; test_id++) {
3930 genotype = genotype_array[ m_world->GetRandom().GetUInt(org_count) ];
3931 genotype2 = genotype_array[ m_world->GetRandom().GetUInt(org_count) ];
3932
3933 // Stop immediately if we're comparing a genotype to itself.
3934 if (genotype == genotype2) {
3935 total_matches_tested++;
3936 continue;
3937 }
3938
3939 // Setup the random parameters for this test.
3940 Genome test_genome0 = genotype->GetGenome();
3941 Genome test_genome1 = genotype2->GetGenome();
3942
3943 double start_frac = -1.0;
3944 double end_frac = -1.0;
3945 double swap_frac = -1.0;
3946 while (swap_frac < min_swap_frac || swap_frac > max_swap_frac) {
3947 start_frac = m_world->GetRandom().GetDouble();
3948 end_frac = m_world->GetRandom().GetDouble();
3949 if (start_frac > end_frac) Swap(start_frac, end_frac);
3950 swap_frac = end_frac - start_frac;
3951 }
3952
3953 int start0 = (int) (start_frac * (double) test_genome0.GetSize());
3954 int end0 = (int) (end_frac * (double) test_genome0.GetSize());
3955 int size0 = end0 - start0;
3956
3957 int start1 = (int) (start_frac * (double) test_genome1.GetSize());
3958 int end1 = (int) (end_frac * (double) test_genome1.GetSize());
3959 int size1 = end1 - start1;
3960
3961 int new_size0 = test_genome0.GetSize() - size0 + size1;
3962 int new_size1 = test_genome1.GetSize() - size1 + size0;
3963
3964 // Setup some statistics for this particular test.
3965 bool same_mate_id = ( genotype->GetMateID() == genotype2->GetMateID() );
3966 if (same_mate_id == true) total_matches_tested++;
3967
3968 // Don't Crossover if offspring will be illegal!!!
3969 if (new_size0 < MIN_GENOME_LENGTH || new_size0 > MAX_GENOME_LENGTH ||
3970 new_size1 < MIN_GENOME_LENGTH || new_size1 > MAX_GENOME_LENGTH) {
3971 fail_count++;
3972 if (same_mate_id == true) match_fail_count++;
3973 continue;
3974 }
3975
3976 // Do the replacement... We're only going to test genome0, so we only
3977 // need to modify that one.
3978 Sequence cross1 = test_genome1.GetSequence().Crop(start1, end1);
3979 test_genome0.GetSequence().Replace(start0, size0, cross1);
3980
3981 // Do the test.
3982 cCPUTestInfo test_info;
3983
3984 // Run each side, and determine viability...
3985 testcpu->TestGenome(m_ctx, test_info, test_genome0);
3986 if( test_info.IsViable() == false ) {
3987 fail_count++;
3988 if (same_mate_id == true) match_fail_count++;
3989 }
3990 }
3991 delete testcpu;
3992
3993 // Do some calculations on the sizes of the mate groups...
3994 const int num_mate_groups = mate_id_counts.GetSize();
3995
3996 // Collect lists on all of the mate groups for the calculations...
3997 tList<int> key_list;
3998 tList<int> count_list;
3999 mate_id_counts.AsLists(key_list, count_list);
4000 tListIterator<int> count_it(count_list);
4001
4002 int max_group_size = 0;
4003 double mate_id_entropy = 0.0;
4004 while (count_it.Next() != NULL) {
4005 int cur_count = *(count_it.Get());
4006 double cur_frac = ((double) cur_count) / ((double) org_count);
4007 if (cur_count > max_group_size) max_group_size = cur_count;
4008 mate_id_entropy -= cur_frac * log(cur_frac);
4009 }
4010
4011 // Calculate the final answer
4012 double fail_frac = (double) fail_count / (double) sample_size;
4013 double match_fail_frac =
4014 (double) match_fail_count / (double) total_matches_tested;
4015 cout << " ave fraction failed = " << fail_frac << endl
4016 << " ave matches failed = " << match_fail_frac << endl
4017 << " total mate matches = " << total_matches_tested
4018 << " / " << sample_size<< endl;
4019
4020 if (filename == "none") return;
4021
4022 cDataFile & df = m_world->GetDataFile(filename);
4023 df.WriteComment( "Mate selection information" );
4024 df.WriteTimeStamp();
4025
4026 df.Write(fail_frac, "Average fraction failed");
4027 df.Write(match_fail_frac, "Average fraction of mate matches failed");
4028 df.Write(sample_size, "Total number of crossovers tested");
4029 df.Write(total_matches_tested, "Number of crossovers with matching mate IDs");
4030 df.Write(gen_count, "Number of genotypes in test batch");
4031 df.Write(org_count, "Number of organisms in test batch");
4032 df.Write(num_mate_groups, "Number of distinct mate IDs");
4033 df.Write(max_group_size, "Size of the largest distinct mate ID group");
4034 df.Write(mate_id_entropy, "Diversity of mate IDs (entropy)");
4035 df.Endl();
4036 }
4037
4038
AnalyzeComplexityDelta(cString cur_string)4039 void cAnalyze::AnalyzeComplexityDelta(cString cur_string)
4040 {
4041 // This command will examine the current population, and sample mutations
4042 // to see what the distribution of complexity changes is. Only genotypes
4043 // with a certain abundance (default=3) will be tested to make sure that
4044 // the organism didn't already have hidden complexity due to a downward
4045 // step.
4046 cout << "Testing complexity delta." << endl;
4047
4048 cString filename = "complexity_delta.dat";
4049 int num_tests = 10;
4050 double copy_mut_prob = m_world->GetConfig().COPY_MUT_PROB.Get();
4051 double ins_mut_prob = m_world->GetConfig().DIVIDE_INS_PROB.Get();
4052 double del_mut_prob = m_world->GetConfig().DIVIDE_DEL_PROB.Get();
4053 int count_threshold = 3;
4054
4055 if (cur_string.GetSize() > 0) filename = cur_string.PopWord();
4056 if (cur_string.GetSize() > 0) num_tests = cur_string.PopWord().AsInt();
4057 if (cur_string.GetSize() > 0) copy_mut_prob = cur_string.PopWord().AsDouble();
4058 if (cur_string.GetSize() > 0) ins_mut_prob = cur_string.PopWord().AsDouble();
4059 if (cur_string.GetSize() > 0) del_mut_prob = cur_string.PopWord().AsDouble();
4060 if (cur_string.GetSize() > 0) count_threshold = cur_string.PopWord().AsInt();
4061
4062 if (m_world->GetVerbosity() >= VERBOSE_ON) {
4063 cout << "...using:"
4064 << " filename='" << filename << "'"
4065 << " num_tests=" << num_tests
4066 << " copy_mut_prob=" << copy_mut_prob
4067 << " ins_mut_prob=" << ins_mut_prob
4068 << " del_mut_prob=" << del_mut_prob
4069 << " count_threshold=" << count_threshold
4070 << endl;
4071 }
4072
4073 // Create an array of all of the genotypes above threshold.
4074 cAnalyzeGenotype * genotype = NULL;
4075 tListIterator<cAnalyzeGenotype> batch_it(batch[cur_batch].List());
4076
4077 // Loop through all genotypes to perform a census
4078 int org_count = 0;
4079 while ((genotype = batch_it.Next()) != NULL) {
4080 // Only count genotypes above threshold
4081 if (genotype->GetNumCPUs() >= count_threshold) {
4082 org_count += genotype->GetNumCPUs();
4083 }
4084 }
4085
4086 // Create an array to store pointers to the genotypes and fill it in.
4087 tArray<cAnalyzeGenotype *> org_array(org_count);
4088 int cur_org = 0;
4089 batch_it.Reset();
4090 while ((genotype = batch_it.Next()) != NULL) {
4091 // Ignore genotypes below threshold.
4092 if (genotype->GetNumCPUs() < count_threshold) continue;
4093
4094 // Insert the remaining genotypes into the array.
4095 for (int i = 0; i < genotype->GetNumCPUs(); i++) {
4096 org_array[cur_org] = genotype;
4097 cur_org++;
4098 }
4099 }
4100
4101 // Open up the file and prepare it for output.
4102 cDataFile & df = m_world->GetDataFile(filename);
4103 df.WriteComment( "An analyze of expected complexity changes between parent and offspring" );
4104 df.WriteTimeStamp();
4105
4106 // Next check the appropriate number of organisms, perform mutations, and
4107 // store the results.
4108 for (int cur_test = 0; cur_test < num_tests; cur_test++) {
4109 // Pick the genotype to test.
4110 int test_org_id = m_world->GetRandom().GetInt(org_count);
4111 genotype = org_array[test_org_id];
4112
4113 // Create a copy of the genome.
4114 Genome mod_genome = genotype->GetGenome();
4115 Sequence& mod_seq = mod_genome.GetSequence();
4116 const cInstSet& inst_set = m_world->GetHardwareManager().GetInstSet(mod_genome.GetInstSet());
4117
4118 if (copy_mut_prob == 0.0 &&
4119 ins_mut_prob == 0.0 &&
4120 del_mut_prob == 0.0) {
4121 cerr << "ERROR: All mutation rates are zero! No complexity delta analysis possible." << endl;
4122 return;
4123 }
4124
4125 // Perform the per-site mutations -- we are going to keep looping until
4126 // we trigger at least one mutation.
4127 int num_mutations = 0;
4128 int ins_line = -1;
4129 int del_line = -1;
4130 while (num_mutations == 0) {
4131 if (copy_mut_prob > 0.0) {
4132 for (int i = 0; i < mod_genome.GetSize(); i++) {
4133 if (m_world->GetRandom().P(copy_mut_prob)) {
4134 mod_seq[i] = inst_set.GetRandomInst(m_ctx);
4135 num_mutations++;
4136 }
4137 }
4138 }
4139
4140 // Perform an Insertion if it has one.
4141 if (m_world->GetRandom().P(ins_mut_prob)) {
4142 ins_line = m_world->GetRandom().GetInt(mod_genome.GetSize() + 1);
4143 mod_seq.Insert(ins_line, inst_set.GetRandomInst(m_ctx));
4144 num_mutations++;
4145 }
4146
4147 // Perform a Deletion if it has one.
4148 if (m_world->GetRandom().P(del_mut_prob)) {
4149 del_line = m_world->GetRandom().GetInt(mod_genome.GetSize());
4150 mod_seq.Remove(del_line);
4151 num_mutations++;
4152 }
4153 }
4154
4155 // Collect basic state before and after the mutations...
4156 genotype->Recalculate(m_ctx);
4157 double start_complexity = genotype->GetKO_Complexity();
4158 double start_fitness = genotype->GetFitness();
4159 int start_length = genotype->GetLength();
4160 int start_gest = genotype->GetGestTime();
4161 const tArray<int>& start_task_counts = genotype->GetTaskCounts();
4162 const tArray< tArray<int> >& start_KO_task_counts = genotype->GetKO_TaskCounts();
4163
4164 cAnalyzeGenotype new_genotype(m_world, mod_genome);
4165 new_genotype.Recalculate(m_ctx);
4166 double end_complexity = new_genotype.GetKO_Complexity();
4167 double end_fitness = new_genotype.GetFitness();
4168 int end_length = new_genotype.GetLength();
4169 int end_gest = new_genotype.GetGestTime();
4170 const tArray<int> & end_task_counts = new_genotype.GetTaskCounts();
4171 const tArray< tArray<int> >& end_KO_task_counts = new_genotype.GetKO_TaskCounts();
4172
4173 // Calculate the complexities....
4174 double complexity_change = end_complexity - start_complexity;
4175
4176 // Loop through each line and determine if each line contributes to
4177 int total_info_new = 0; // Site didn't encode info, but now does.
4178 int total_info_shift = 0; // Shift in which tasks this site codes for.
4179 int total_info_pshift = 0; // Partial, but not total shift of tasks.
4180 int total_info_share = 0; // Site codes for more tasks than before.
4181 int total_info_lost = 0; // Site list all tasks it encoded for.
4182 int total_info_plost = 0; // Site reduced tasks it encodes for.
4183 int total_info_kept = 0; // Site still codes for sames tasks as before
4184 int total_info_lack = 0; // Site never codes for any tasks.
4185
4186 const int num_tasks = start_task_counts.GetSize();
4187 tArray<int> mut_effects(num_tasks);
4188 for (int i = 0; i < num_tasks; i++) {
4189 mut_effects[i] = end_task_counts[i] - start_task_counts[i];
4190 }
4191
4192 int end_line = 0;
4193 for (int start_line = 0; start_line < start_length; start_line++) {
4194 if (start_line == del_line) {
4195 // This line was deleted in the end. Skip it, but don't increment
4196 // the end_line
4197 continue;
4198 }
4199 if (start_line == ins_line) {
4200 // This position had an insertion. Deal with it and then skip it.
4201 end_line++;
4202
4203 // No "continue" here. With the updated end_line we can move on.
4204 }
4205
4206 // If we made it this far, the start_line and end_line should be aligned.
4207 int info_maintained_count = 0;
4208 int info_gained_count = 0;
4209 int info_lost_count = 0;
4210
4211 for (int cur_task = 0; cur_task < num_tasks; cur_task++) {
4212 // At the organism level, the mutation may have caused four options
4213 // for this task (A) Was never present, (B) Was present and still is,
4214 // (C) Was not present, but is now, or (D) Was present, but was lost.
4215
4216 // Case A:
4217 if (start_task_counts[cur_task]==0 && end_task_counts[cur_task]==0) {
4218 // This task was never done. Keep looping.
4219 continue;
4220 }
4221
4222 // Case B:
4223 if (start_task_counts[cur_task] == end_task_counts[cur_task]) {
4224 // The task hasn't changed. Has its encoding?
4225 bool KO_start = true;
4226 bool KO_end = true;
4227 if (start_KO_task_counts[start_line][cur_task] ==
4228 start_task_counts[cur_task]) {
4229 // start_count is unchanged by knocking out this line.
4230 KO_start = false;
4231 }
4232 if (end_KO_task_counts[end_line][cur_task] ==
4233 end_task_counts[cur_task]) {
4234 // end_count is unchanged by knocking out this line.
4235 KO_end = false;
4236 }
4237
4238 if (KO_start == true && KO_end == true) info_maintained_count++;
4239 if (KO_start == true && KO_end == false) info_lost_count++;
4240 if (KO_start == false && KO_end == true) info_gained_count++;
4241 continue;
4242 }
4243
4244 // Case C:
4245 if (start_task_counts[cur_task] < end_task_counts[cur_task]) {
4246 // Task was GAINED... Is this site important?
4247 if (end_KO_task_counts[end_line][cur_task] <
4248 end_task_counts[cur_task]) {
4249 info_gained_count++;
4250 }
4251 continue;
4252 }
4253
4254 // Case D:
4255 if (start_task_counts[cur_task] > end_task_counts[cur_task]) {
4256 // The task was LOST... Was this site important?
4257 if (start_KO_task_counts[start_line][cur_task] <
4258 start_task_counts[cur_task]) {
4259 info_lost_count++;
4260 }
4261 continue;
4262 }
4263 }
4264
4265 // We now have counts and know how often this site was responsible for
4266 // a task gain, a task loss, or a task being maintained.
4267
4268 bool has_keep = info_maintained_count > 0;
4269 bool has_loss = info_lost_count > 0;
4270 bool has_gain = info_gained_count > 0;
4271
4272 if ( !has_loss && !has_gain && !has_keep ) total_info_lack++;
4273 else if ( !has_loss && !has_gain && has_keep ) total_info_kept++;
4274 else if ( !has_loss && has_gain && !has_keep ) total_info_new++;
4275 else if ( !has_loss && has_gain && has_keep ) total_info_share++;
4276 else if ( has_loss && !has_gain && !has_keep ) total_info_lost++;
4277 else if ( has_loss && !has_gain && has_keep ) total_info_plost++;
4278 else if ( has_loss && has_gain && !has_keep ) total_info_shift++;
4279 else if ( has_loss && has_gain && has_keep ) total_info_pshift++;
4280
4281 end_line++;
4282 }
4283
4284
4285 // Output the results.
4286 df.Write(num_mutations, "Number of mutational differences between original organism and mutant.");
4287 df.Write(complexity_change, "Complexity difference between original organism and mutant.");
4288 df.Write(start_complexity, "Total complexity of initial organism.");
4289 df.Write(end_complexity, "Total complexity of mutant.");
4290
4291 // Broken down complexity info
4292 df.Write(total_info_lack, "Num sites with no info at all.");
4293 df.Write(total_info_kept, "Num sites with info, but no change.");
4294 df.Write(total_info_new, "Num sites with new info (prev. none).");
4295 df.Write(total_info_share, "Num sites with newly shared info.");
4296 df.Write(total_info_lost, "Num sites with lost info.");
4297 df.Write(total_info_plost, "Num sites with parital lost info.");
4298 df.Write(total_info_shift, "Num sites with shift in info.");
4299 df.Write(total_info_pshift, "Num sites with partial shift in info.");
4300
4301 // Start and End task counts...
4302 for (int i = 0; i < start_task_counts.GetSize(); i++) {
4303 df.Write(start_task_counts[i], cStringUtil::Stringf("Start task %d", i));
4304 }
4305
4306 for (int i = 0; i < end_task_counts.GetSize(); i++) {
4307 df.Write(end_task_counts[i], cStringUtil::Stringf("End task %d", i));
4308 }
4309
4310 df.Write(start_fitness, "Fitness of initial organism.");
4311 df.Write(end_fitness, "Fitness of mutant.");
4312 df.Write(start_length, "Length of initial organism.");
4313 df.Write(end_length, "Length of mutant.");
4314 df.Write(start_gest, "Gestation Time of initial organism.");
4315 df.Write(end_gest, "Gestation Time of mutant.");
4316 df.Write(genotype->GetID(), "ID of initial genotype.");
4317 df.Endl();
4318 }
4319 }
4320
AnalyzeKnockouts(cString cur_string)4321 void cAnalyze::AnalyzeKnockouts(cString cur_string)
4322 {
4323 cout << "Analyzing the effects of knockouts..." << endl;
4324
4325 cString filename = "knockouts.dat";
4326 if (cur_string.GetSize() > 0) filename = cur_string.PopWord();
4327
4328 int max_knockouts = 1;
4329 if (cur_string.GetSize() > 0) max_knockouts = cur_string.PopWord().AsInt();
4330
4331 // Open up the data file...
4332 cDataFile & df = m_world->GetDataFile(filename);
4333 df.WriteComment( "Analysis of knockouts in genomes" );
4334 df.WriteTimeStamp();
4335
4336
4337 // Loop through all of the genotypes in this batch...
4338 tListIterator<cAnalyzeGenotype> batch_it(batch[cur_batch].List());
4339 cAnalyzeGenotype * genotype = NULL;
4340 while ((genotype = batch_it.Next()) != NULL) {
4341 if (m_world->GetVerbosity() >= VERBOSE_ON) cout << " Knockout: " << genotype->GetName() << endl;
4342
4343 // Calculate the stats for the genotype we're working with...
4344 genotype->Recalculate(m_ctx);
4345 const double base_fitness = genotype->GetFitness();
4346
4347 const int max_line = genotype->GetLength();
4348 const Genome& base_genome = genotype->GetGenome();
4349 const Sequence& base_seq = base_genome.GetSequence();
4350 Genome mod_genome(base_genome);
4351 Sequence& seq = mod_genome.GetSequence();
4352 cInstruction null_inst = m_world->GetHardwareManager().GetInstSet(base_genome.GetInstSet()).ActivateNullInst();
4353
4354 // Loop through all the lines of code, testing the removal of each.
4355 // -2=lethal, -1=detrimental, 0=neutral, 1=beneficial
4356 int dead_count = 0;
4357 int neg_count = 0;
4358 int neut_count = 0;
4359 int pos_count = 0;
4360 tArray<int> ko_effect(max_line);
4361 for (int line_num = 0; line_num < max_line; line_num++) {
4362 // Save a copy of the current instruction and replace it with "NULL"
4363 int cur_inst = base_seq[line_num].GetOp();
4364 seq[line_num] = null_inst;
4365 cAnalyzeGenotype ko_genotype(m_world, mod_genome);
4366 ko_genotype.Recalculate(m_ctx);
4367
4368 double ko_fitness = ko_genotype.GetFitness();
4369 if (ko_fitness == 0.0) {
4370 dead_count++;
4371 ko_effect[line_num] = -2;
4372 } else if (ko_fitness < base_fitness) {
4373 neg_count++;
4374 ko_effect[line_num] = -1;
4375 } else if (ko_fitness == base_fitness) {
4376 neut_count++;
4377 ko_effect[line_num] = 0;
4378 } else if (ko_fitness > base_fitness) {
4379 pos_count++;
4380 ko_effect[line_num] = 1;
4381 } else {
4382 cerr << "ERROR: illegal state in AnalyzeKnockouts()" << endl;
4383 }
4384
4385 // Reset the mod_genome back to the original sequence.
4386 seq[line_num].SetOp(cur_inst);
4387 }
4388
4389 tArray<int> ko_pair_effect(ko_effect);
4390 if (max_knockouts > 1) {
4391 for (int line1 = 0; line1 < max_line; line1++) {
4392 for (int line2 = line1+1; line2 < max_line; line2++) {
4393 int cur_inst1 = base_seq[line1].GetOp();
4394 int cur_inst2 = base_seq[line2].GetOp();
4395 seq[line1] = null_inst;
4396 seq[line2] = null_inst;
4397 cAnalyzeGenotype ko_genotype(m_world, mod_genome);
4398 ko_genotype.Recalculate(m_ctx);
4399
4400 double ko_fitness = ko_genotype.GetFitness();
4401
4402 // If both individual knockouts are both harmful, but in combination
4403 // they are neutral or even beneficial, they should not count as
4404 // information.
4405 if (ko_fitness >= base_fitness &&
4406 ko_effect[line1] < 0 && ko_effect[line2] < 0) {
4407 ko_pair_effect[line1] = 0;
4408 ko_pair_effect[line2] = 0;
4409 }
4410
4411 // If the individual knockouts are both neutral (or beneficial?),
4412 // but in combination they are harmful, they are likely redundant
4413 // to each other. For now, count them both as information.
4414 if (ko_fitness < base_fitness &&
4415 ko_effect[line1] >= 0 && ko_effect[line2] >= 0) {
4416 ko_pair_effect[line1] = -1;
4417 ko_pair_effect[line2] = -1;
4418 }
4419
4420 // Reset the mod_genome back to the original sequence.
4421 seq[line1].SetOp(cur_inst1);
4422 seq[line2].SetOp(cur_inst2);
4423 }
4424 }
4425 }
4426
4427 int pair_dead_count = 0;
4428 int pair_neg_count = 0;
4429 int pair_neut_count = 0;
4430 int pair_pos_count = 0;
4431 for (int i = 0; i < max_line; i++) {
4432 if (ko_pair_effect[i] == -2) pair_dead_count++;
4433 else if (ko_pair_effect[i] == -1) pair_neg_count++;
4434 else if (ko_pair_effect[i] == 0) pair_neut_count++;
4435 else if (ko_pair_effect[i] == 1) pair_pos_count++;
4436 }
4437
4438 // Output data...
4439 df.Write(genotype->GetID(), "Genotype ID");
4440 df.Write(dead_count, "Count of lethal knockouts");
4441 df.Write(neg_count, "Count of detrimental knockouts");
4442 df.Write(neut_count, "Count of neutral knockouts");
4443 df.Write(pos_count, "Count of beneficial knockouts");
4444 df.Write(pair_dead_count, "Count of lethal knockouts after paired knockout tests.");
4445 df.Write(pair_neg_count, "Count of detrimental knockouts after paired knockout tests.");
4446 df.Write(pair_neut_count, "Count of neutral knockouts after paired knockout tests.");
4447 df.Write(pair_pos_count, "Count of beneficial knockouts after paired knockout tests.");
4448 df.Endl();
4449 }
4450 }
4451
4452
CommandMapTasks(cString cur_string)4453 void cAnalyze::CommandMapTasks(cString cur_string)
4454 {
4455 cString msg; //Use if to construct any messages to send to driver
4456
4457 m_world->GetDriver().NotifyComment("Constructing genotype-phenotype maps");
4458
4459 // Load in the variables / default them
4460 cString directory = PopDirectory(cur_string.PopWord(), "phenotype/");
4461 int print_mode = 0; // 0=Normal, 1=Boolean results
4462 int file_type = FILE_TYPE_TEXT;
4463 bool use_manual_inputs = false; // Should we use manual inputs?
4464
4465 // HTML special flags...
4466 bool link_maps = false; // Should muliple maps be linked together?
4467 bool link_insts = false; // Should links be made to instruction descs?
4468
4469 // Collect any other format information needed...
4470 tList< tDataEntryCommand<cAnalyzeGenotype> > output_list;
4471 tListIterator< tDataEntryCommand<cAnalyzeGenotype> > output_it(output_list);
4472 tArray<int> manual_inputs;
4473
4474 cStringList arg_list(cur_string);
4475
4476 msg.Set("Found %d args.", arg_list.GetSize());
4477 m_world->GetDriver().NotifyComment(msg);
4478
4479 int use_resources = 0;
4480
4481 // Check for some command specific variables, removing them from the list if found.
4482 if (arg_list.PopString("0") != "") print_mode = 0;
4483 if (arg_list.PopString("1") != "") print_mode = 1;
4484 if (arg_list.PopString("text") != "") file_type = FILE_TYPE_TEXT;
4485 if (arg_list.PopString("html") != "") file_type = FILE_TYPE_HTML;
4486 if (arg_list.PopString("link_maps") != "") link_maps = true;
4487 if (arg_list.PopString("link_insts") != "") link_insts = true;
4488 if (arg_list.PopString("use_resources=2") != "") use_resources = 2;
4489 if (arg_list.HasString("use_manual_inputs")) use_manual_inputs = true;
4490
4491 if (use_manual_inputs){
4492 int pos = arg_list.LocateString("use_manual_inputs");
4493 arg_list.PopString("use_manual_inputs");
4494 manual_inputs.Resize(m_world->GetEnvironment().GetInputSize());
4495 if (arg_list.GetSize() >= pos + m_world->GetEnvironment().GetInputSize() - 1)
4496 for (int k = 0; k < m_world->GetEnvironment().GetInputSize(); k++)
4497 manual_inputs[k] = arg_list.PopLine(pos).AsInt();
4498 else
4499 m_world->GetDriver().RaiseFatalException(1, "CommandMapTask: Invalid use of use_manual_inputs");
4500 }
4501
4502 msg.Set("There are %d column args.", arg_list.GetSize());
4503 m_world->GetDriver().NotifyComment(msg);
4504
4505 cAnalyzeGenotype::GetDataCommandManager().LoadCommandList(arg_list, output_list);
4506
4507 m_world->GetDriver().NotifyComment("Args are loaded.");
4508
4509 const int num_cols = output_list.GetSize();
4510
4511
4512 // Give some information in verbose mode.
4513 if (m_world->GetVerbosity() >= VERBOSE_ON) {
4514 cout << " outputing as ";
4515 if (print_mode == 1) cout << "boolean ";
4516 if (file_type == FILE_TYPE_TEXT) {
4517 cout << "text files." << endl;
4518 } else { // if (file_type == FILE_TYPE_HTML) {
4519 cout << "HTML files";
4520 if (link_maps == true) cout << "; linking files together";
4521 if (link_maps == true) cout << "; linking inst names to descs";
4522 cout << "." << endl;
4523 }
4524 cout << " Format: ";
4525
4526 output_it.Reset();
4527 while (output_it.Next() != NULL) {
4528 cout << output_it.Get()->GetName() << " ";
4529 }
4530 cout << endl;
4531 }
4532
4533
4534 ///////////////////////////////////////////////////////
4535 // Loop through all of the genotypes in this batch...
4536
4537 tListIterator<cAnalyzeGenotype> batch_it(batch[cur_batch].List());
4538 cAnalyzeGenotype * genotype = NULL;
4539 while ((genotype = batch_it.Next()) != NULL) {
4540 if (m_world->GetVerbosity() >= VERBOSE_ON) cout << " Mapping " << genotype->GetName() << endl;
4541
4542 // Construct this filename...
4543 cString filename;
4544 if (file_type == FILE_TYPE_TEXT) {
4545 filename.Set("%stasksites.%s.dat", static_cast<const char*>(directory), static_cast<const char*>(genotype->GetName()));
4546 } else { // if (file_type == FILE_TYPE_HTML) {
4547 filename.Set("%stasksites.%s.html", static_cast<const char*>(directory), static_cast<const char*>(genotype->GetName()));
4548 }
4549 ofstream& fp = m_world->GetDataFileOFStream(filename);
4550
4551 // Construct linked filenames...
4552 cString next_file("");
4553 cString prev_file("");
4554 if (link_maps == true) {
4555 // Check the next genotype on the list...
4556 if (batch_it.Next() != NULL) {
4557 next_file.Set("tasksites.%s.html", static_cast<const char*>(batch_it.Get()->GetName()));
4558 }
4559 batch_it.Prev(); // Put the list back where it was...
4560
4561 // Check the previous genotype on the list...
4562 if (batch_it.Prev() != NULL) {
4563 prev_file.Set("tasksites.%s.html", static_cast<const char*>(batch_it.Get()->GetName()));
4564 }
4565 batch_it.Next(); // Put the list back where it was...
4566 }
4567
4568 // Calculate the stats for the genotype we're working with...
4569 cCPUTestInfo test_info;
4570 if (use_manual_inputs)
4571 test_info.UseManualInputs(manual_inputs);
4572 test_info.SetResourceOptions(use_resources, m_resources);
4573 genotype->Recalculate(m_ctx, &test_info);
4574
4575 // Headers...
4576 if (file_type == FILE_TYPE_TEXT) {
4577 fp << "-1 " << batch[cur_batch].Name() << " "
4578 << genotype->GetID() << " ";
4579
4580 tDataEntryCommand<cAnalyzeGenotype> * data_command = NULL;
4581 while ((data_command = output_it.Next()) != NULL) {
4582 fp << data_command->GetValue(genotype) << " ";
4583 }
4584 fp << endl;
4585
4586 } else { // if (file_type == FILE_TYPE_HTML) {
4587 // Mark file as html
4588 fp << "<html>" << endl;
4589
4590 // Setup any javascript macros needed...
4591 fp << "<head>" << endl;
4592 if (link_insts == true) {
4593 fp << "<script language=\"javascript\">" << endl
4594 << "function Inst(inst_name)" << endl
4595 << "{" << endl
4596 << "var filename = \"help.\" + inst_name + \".html\";" << endl
4597 << "newwin = window.open(filename, 'Instruction', "
4598 << "'toolbar=0,status=0,location=0,directories=0,menubar=0,"
4599 << "scrollbars=1,height=150,width=300');" << endl
4600 << "newwin.focus();" << endl
4601 << "}" << endl
4602 << "</script>" << endl;
4603 }
4604 fp << "</head>" << endl;
4605
4606 // Setup the body...
4607 fp << "<body>" << endl
4608 << "<div align=\"center\">" << endl
4609 << "<h1 align=\"center\">Run " << batch[cur_batch].Name() << ", ID " << genotype->GetID() << "</h1>" << endl
4610 << endl;
4611
4612 // Links?
4613 fp << "<table width=90%><tr><td align=left>";
4614 if (prev_file != "") fp << "<a href=\"" << prev_file << "\">Prev</a>";
4615 else fp << " ";
4616 fp << "<td align=right>";
4617 if (next_file != "") fp << "<a href=\"" << next_file << "\">Next</a>";
4618 else fp << " ";
4619 fp << "</tr></table>" << endl;
4620
4621 // The table
4622 fp << "<table border=1 cellpadding=2>" << endl;
4623
4624 // The headings...///
4625 fp << "<tr><td colspan=3> ";
4626 output_it.Reset();
4627 while (output_it.Next() != NULL) {
4628 fp << "<th>" << output_it.Get()->GetDesc(genotype) << " ";
4629 }
4630 fp << "</tr>" << endl;
4631
4632 // The base creature...
4633 fp << "<tr><th colspan=3>Base Creature";
4634 tDataEntryCommand<cAnalyzeGenotype> * data_command = NULL;
4635 const cInstSet& is = m_world->GetHardwareManager().GetDefaultInstSet();
4636 Genome null_genome(is.GetHardwareType(), is.GetInstSetName(), Sequence(1));
4637 cAnalyzeGenotype null_genotype(m_world, null_genome);
4638 while ((data_command = output_it.Next()) != NULL) {
4639 const cFlexVar cur_value = data_command->GetValue(genotype);
4640 const cFlexVar null_value = data_command->GetValue(&null_genotype);
4641 int compare = CompareFlexStat(cur_value, null_value, data_command->GetCompareType());
4642 if (compare > 0) {
4643 fp << "<th bgcolor=\"#" << m_world->GetConfig().COLOR_MUT_POS.Get() << "\">";
4644 }
4645 else fp << "<th bgcolor=\"#" << m_world->GetConfig().COLOR_MUT_LETHAL.Get() << "\">";
4646
4647 if (data_command->HasArg("blank") == true) fp << " " << " ";
4648 else fp << cur_value << " ";
4649 }
4650 fp << "</tr>" << endl;
4651 }
4652
4653
4654 const int max_line = genotype->GetLength();
4655 const Genome& base_genome = genotype->GetGenome();
4656 const Sequence& base_seq = base_genome.GetSequence();
4657 Genome mod_genome(base_genome);
4658 Sequence& seq = mod_genome.GetSequence();
4659
4660 // Keep track of the number of failues/successes for attributes...
4661 int * col_pass_count = new int[num_cols];
4662 int * col_fail_count = new int[num_cols];
4663 for (int i = 0; i < num_cols; i++) {
4664 col_pass_count[i] = 0;
4665 col_fail_count[i] = 0;
4666 }
4667
4668 cInstSet& is = m_world->GetHardwareManager().GetInstSet(base_genome.GetInstSet());
4669 const cInstruction null_inst = is.ActivateNullInst();
4670
4671 // Loop through all the lines of code, testing the removal of each.
4672 for (int line_num = 0; line_num < max_line; line_num++) {
4673 int cur_inst = base_seq[line_num].GetOp();
4674 char cur_symbol = base_seq[line_num].GetSymbol();
4675
4676 seq[line_num] = null_inst;
4677 cAnalyzeGenotype test_genotype(m_world, mod_genome);
4678 test_genotype.Recalculate(m_ctx, &test_info);
4679
4680 if (file_type == FILE_TYPE_HTML) fp << "<tr><td align=right>";
4681 fp << (line_num + 1) << " ";
4682 if (file_type == FILE_TYPE_HTML) fp << "<td align=center>";
4683 fp << cur_symbol << " ";
4684 if (file_type == FILE_TYPE_HTML) fp << "<td align=center>";
4685 if (link_insts == true) {
4686 fp << "<a href=\"javascript:Inst('"
4687 << is.GetName(cur_inst)
4688 << "')\">";
4689 }
4690 fp << is.GetName(cur_inst) << " ";
4691 if (link_insts == true) fp << "</a>";
4692
4693
4694 // Print the individual columns...
4695 output_it.Reset();
4696 tDataEntryCommand<cAnalyzeGenotype>* data_command = NULL;
4697 int cur_col = 0;
4698 while ((data_command = output_it.Next()) != NULL) {
4699 const cFlexVar test_value = data_command->GetValue(&test_genotype);
4700 int compare = CompareFlexStat(test_value, data_command->GetValue(genotype), data_command->GetCompareType());
4701
4702 if (file_type == FILE_TYPE_HTML) {
4703 HTMLPrintStat(test_value, fp, compare, data_command->GetHtmlCellFlags(), data_command->GetNull(),
4704 !(data_command->HasArg("blank")));
4705 }
4706 else fp << test_value << " ";
4707
4708 if (compare == -2) col_fail_count[cur_col]++;
4709 else if (compare == 2) col_pass_count[cur_col]++;
4710 cur_col++;
4711 }
4712 if (file_type == FILE_TYPE_HTML) fp << "</tr>";
4713 fp << endl;
4714
4715 // Reset the mod_genome back to the original sequence.
4716 seq[line_num].SetOp(cur_inst);
4717 }
4718
4719
4720 // Construct the final line of the table with all totals...
4721 if (file_type == FILE_TYPE_HTML) {
4722 fp << "<tr><th colspan=3>Totals";
4723
4724 for (int i = 0; i < num_cols; i++) {
4725 if (col_pass_count[i] > 0) {
4726 fp << "<th bgcolor=\"#" << m_world->GetConfig().COLOR_MUT_POS.Get() << "\">" << col_pass_count[i];
4727 }
4728 else if (col_fail_count[i] > 0) {
4729 fp << "<th bgcolor=\"#" << m_world->GetConfig().COLOR_MUT_LETHAL.Get() << "\">" << col_fail_count[i];
4730 }
4731 else fp << "<th>0";
4732 }
4733 fp << "</tr>" << endl;
4734
4735 // And close everything up...
4736 fp << "</table>" << endl
4737 << "</div>" << endl;
4738 }
4739
4740 delete [] col_pass_count;
4741 delete [] col_fail_count;
4742 m_world->GetDataFileManager().Remove(filename); // Close the data file object
4743 }
4744 }
4745
CommandCalcFunctionalModularity(cString cur_string)4746 void cAnalyze::CommandCalcFunctionalModularity(cString cur_string)
4747 {
4748 cout << "Calculating Functional Modularity..." << endl;
4749
4750 cCPUTestInfo test_info;
4751 PopCommonCPUTestParameters(m_world, cur_string, test_info, m_resources, m_resource_time_spent_offset);
4752
4753 tList<cModularityAnalysis> mod_list;
4754 tAnalyzeJobBatch<cModularityAnalysis> jobbatch(m_jobqueue);
4755 tListIterator<cAnalyzeGenotype> batch_it(batch[cur_batch].List());
4756 for (cAnalyzeGenotype* cur_genotype = batch_it.Next(); cur_genotype; cur_genotype = batch_it.Next()) {
4757 cModularityAnalysis* mod = new cModularityAnalysis(cur_genotype, test_info);
4758 mod_list.Push(mod);
4759 jobbatch.AddJob(mod, &cModularityAnalysis::CalcFunctionalModularity);
4760 }
4761 jobbatch.RunBatch();
4762 cModularityAnalysis* mod = NULL;
4763 while ((mod = mod_list.Pop())) delete mod;
4764 }
4765
CommandAverageModularity(cString cur_string)4766 void cAnalyze::CommandAverageModularity(cString cur_string)
4767 {
4768 cout << "Average Modularity calculations" << endl;
4769
4770 // Load in the variables...
4771 cString filename = cur_string.PopWord();
4772
4773 int print_mode = 0; // 0=Normal, 1=Boolean results
4774
4775 // Collect any other format information needed...
4776 tList< tDataEntryCommand<cAnalyzeGenotype> > output_list;
4777 tListIterator< tDataEntryCommand<cAnalyzeGenotype> > output_it(output_list);
4778
4779 cStringList arg_list(cur_string);
4780
4781 cout << "Found " << arg_list.GetSize() << " args." << endl;
4782
4783 // Check for some command specific variables.
4784 if (arg_list.PopString("0") != "") print_mode = 0;
4785 if (arg_list.PopString("1") != "") print_mode = 1;
4786
4787 cout << "There are " << arg_list.GetSize() << " column args." << endl;
4788
4789 cAnalyzeGenotype::GetDataCommandManager().LoadCommandList(arg_list, output_list);
4790
4791 cout << "Args are loaded." << endl;
4792
4793 const int num_cols = output_list.GetSize();
4794
4795 // Give some information in verbose mode.
4796 if (m_world->GetVerbosity() >= VERBOSE_ON) {
4797 cout << " outputing as ";
4798 if (print_mode == 1) cout << "boolean ";
4799 cout << "text files." << endl;
4800 cout << " Format: ";
4801
4802 output_it.Reset();
4803 while (output_it.Next() != NULL) {
4804 cout << output_it.Get()->GetName() << " ";
4805 }
4806 cout << endl;
4807 }
4808
4809 ofstream& fp = m_world->GetDataFileOFStream(filename);
4810
4811 // printing the headers
4812 // not done by default since many dumps may be analyzed at the same time
4813 // and results would be put in the same file
4814 if (arg_list.GetSize()==0) {
4815 // Headers
4816 fp << "# Avida analyze modularity data" << endl;
4817 fp << "# 1: organism length" << endl;
4818 fp << "# 2: number of tasks done" << endl;
4819 fp << "# 3: number of sites used in tasks" << endl;
4820 fp << "# 4: proportion of sites used in tasks" << endl;
4821 fp << "# 5: average number of tasks done per site" << endl;
4822 fp << "# 6: average number sites per task done" << endl;
4823 fp << "# 7: average number tasks per site per task" << endl;
4824 fp << "# 8: average proportion of the non-overlaping region of a task" << endl;
4825 fp << "# 9-17: average StDev in positions used for task 1-9" << endl;
4826 fp << "# 18-26: average number of sites necessary for each of the tasks" << endl;
4827 fp << "# 27-36: number of sites involved in 0-9 tasks" << endl;
4828 fp << "# 37-45: average task length (distance from first to last inst used)" << endl;
4829 fp << endl;
4830 return;
4831 }
4832
4833 // initialize various variables used in calculations
4834
4835 int num_orgs = 0; // number of organisms in the dump
4836
4837 double av_length = 0; // average organism length
4838 double av_task = 0; // average # of tasks done
4839 double av_inst = 0; // average # instructions used in tasks
4840 double av_inst_len = 0; // proportion of sites used for tasks
4841 double av_site_task = 0; // average number of sites per task
4842 double av_task_site = 0; // average number of tasks per site
4843 double av_t_s_norm = 0; // average number of tasks per site per task
4844 double av_task_overlap = 0; // average overlap between tasks
4845
4846 // average StDev in positions used for a task
4847 tArray<double> std_task_position(num_cols);
4848 std_task_position.SetAll(0.0);
4849
4850 // # of organisms actually doing a task
4851 tArray<double> org_task(num_cols);
4852 org_task.SetAll(0.0);
4853
4854 // av. # of sites necessary for each of the tasks
4855 tArray<double> av_num_inst(num_cols);
4856 av_num_inst.SetAll(0.0);
4857
4858 // number of sites involved in 0-9 tasks
4859 tArray<double> av_inst_task(num_cols+1);
4860 av_inst_task.SetAll(0.0);
4861
4862 // av. # task length (distance from first to last site used)
4863 tArray<double> av_task_length(num_cols);
4864 av_task_length.SetAll(0.0);
4865
4866
4867 ///////////////////////////////////////////////////////
4868 // Loop through all of the genotypes in this batch...
4869 ///////////////////////////////////////////////////////
4870
4871 tListIterator<cAnalyzeGenotype> batch_it(batch[cur_batch].List());
4872 cAnalyzeGenotype* genotype = NULL;
4873
4874 // would like to test oly the viable ones, but they can be non-viable
4875 // and still reproduce and do tasks
4876 // while ((genotype = batch_it.Next()) != NULL && genotype->GetViable()) {
4877 while ((genotype = batch_it.Next()) != NULL) {
4878
4879 int num_cpus = genotype->GetNumCPUs();
4880
4881 if (m_world->GetVerbosity() >= VERBOSE_ON) cout << " Mapping " << genotype->GetName() << endl;
4882 cout.flush();
4883
4884 // Calculate the stats for the genotype we're working with...
4885 genotype->Recalculate(m_ctx);
4886
4887 // Check if the organism does any tasks.
4888 bool does_tasks = false;
4889 for (int i = 0; i < num_cols; i++) {
4890 if (genotype->GetTaskCount(i) > 0) {
4891 does_tasks = true;
4892 break;
4893 }
4894 }
4895
4896 // Don't calculate the modularity if the organism doesn't reproduce
4897 // i.e. if the fitness is 0
4898 if (genotype->GetFitness() > 0.0 && does_tasks) {
4899 num_orgs = num_orgs + num_cpus;
4900
4901 const int max_line = genotype->GetLength();
4902 const Genome& base_genome = genotype->GetGenome();
4903 const Sequence& base_seq = base_genome.GetSequence();
4904 Genome mod_genome(base_genome);
4905 Sequence& seq = mod_genome.GetSequence();
4906 cInstruction null_inst = m_world->GetHardwareManager().GetInstSet(base_genome.GetInstSet()).ActivateNullInst();
4907
4908 // Create and initialize the modularity matrix
4909 tMatrix<int> mod_matrix(num_cols, max_line);
4910 mod_matrix.SetAll(0);
4911
4912 // Create and initialize the task overalp matrix
4913 tMatrix<int> task_overlap(num_cols, num_cols);
4914 task_overlap.SetAll(0);
4915
4916 // Create an initialize the counters for modularity
4917 tArray<int> num_task(max_line); // number of tasks instruction is used in
4918 tArray<int> num_inst(num_cols); // number of instructions involved in a task
4919 tArray<int> sum(num_cols); // helps with StDev calculations
4920 tArray<int> sumsq(num_cols); // helps with StDev calculations
4921 tArray<int> inst_task(num_cols+1); // # of inst's involved in 0,1,2,3... tasks
4922 tArray<int> task_length(num_cols); // ditance between first and last inst involved in a task
4923
4924 num_task.SetAll(0);
4925 num_inst.SetAll(0);
4926 sum.SetAll(0);
4927 sumsq.SetAll(0);
4928 inst_task.SetAll(0);
4929 task_length.SetAll(0);
4930
4931 int total_task = 0; // total number of tasks done
4932 int total_inst = 0; // total number of instructions involved in tasks
4933 int total_all = 0; // sum of mod_matrix
4934 double sum_task_overlap = 0;// task overlap for for this geneome
4935
4936 // Loop through all the lines of code, testing the removal of each.
4937 for (int line_num = 0; line_num < max_line; line_num++) {
4938 int cur_inst = base_seq[line_num].GetOp();
4939
4940 seq[line_num] = null_inst;
4941 cAnalyzeGenotype test_genotype(m_world, mod_genome);
4942 test_genotype.Recalculate(m_ctx);
4943
4944 // Print the individual columns...
4945 output_it.Reset();
4946 tDataEntryCommand<cAnalyzeGenotype> * data_command = NULL;
4947 int cur_col = 0;
4948 while ((data_command = output_it.Next()) != NULL) {
4949 const cFlexVar test_value = data_command->GetValue(&test_genotype);
4950
4951 // This is done so that under 'binary' option it marks
4952 // the task as being influenced by the mutation iff
4953 // it is completely knocked out, not just decreased
4954
4955 int compare_type = data_command->GetCompareType();
4956 int compare = CompareFlexStat(test_value, data_command->GetValue(genotype), compare_type);
4957
4958 // If knocking out an instruction stops the expression of a
4959 // particular task, mark that in the modularity matrix
4960 // and add it to two counts
4961 // Only do the checking if the test_genotype replicate, i.e.
4962 // if it's fitness is not zeros
4963
4964 if (compare < 0 && test_genotype.GetFitness() != 0) {
4965 mod_matrix(cur_col,line_num) = 1;
4966 num_inst[cur_col]++;
4967 num_task[line_num]++;
4968 }
4969 cur_col++;
4970 }
4971
4972 // Reset the mod_genome back to the original sequence.
4973 seq[line_num].SetOp(cur_inst);
4974 } // end of genotype-phenotype mapping for a single organism
4975
4976 for (int i = 0; i < num_cols; i++) if (num_inst[i] != 0) total_task++;
4977 for (int i = 0; i < max_line; i++) if (num_task[i] != 0) total_inst++;
4978 for (int i = 0; i < num_cols; i++) total_all = total_all + num_inst[i];
4979
4980 // Add the values to the av_ variables, used for calculating the average
4981 // in order to weigh them by abundance, multiply everything by num_cpus
4982
4983 av_length = av_length + max_line*num_cpus;
4984 av_task = av_task + total_task*num_cpus;
4985 av_inst = av_inst + total_inst*num_cpus;
4986 av_inst_len = av_inst_len + (double) total_inst*num_cpus/max_line;
4987
4988 if (total_task !=0) av_site_task = av_site_task + num_cpus * (double) total_all/total_task;
4989 if (total_inst !=0) av_task_site = av_task_site + num_cpus * (double) total_all/total_inst;
4990 if (total_inst !=0 && total_task !=0) {
4991 av_t_s_norm = av_t_s_norm + num_cpus * (double) total_all/(total_inst*total_task);
4992 }
4993
4994 for (int i = 0; i < num_cols; i++) {
4995 if (num_inst[i] > 0) {
4996 av_num_inst[i] = av_num_inst[i] + num_inst[i] * num_cpus;
4997 org_task[i] = org_task[i] + num_cpus; // count how many are actually doing the task
4998 }
4999 }
5000
5001 // calculate average task overlap
5002 // first construct num_task x num_task matrix with number of sites overlapping
5003 for (int i = 0; i < max_line; i++) {
5004 for (int j = 0; j < num_cols; j++) {
5005 for (int k = j; k < num_cols; k++) {
5006 if (mod_matrix(j,i)>0 && mod_matrix(k,i)>0) {
5007 task_overlap(j,k)++;
5008 if (j!=k) task_overlap(k,j)++;
5009 }
5010 }
5011 }
5012 }
5013
5014 // go though the task_overlap matrix, add and average everything up.
5015 if (total_task > 1) {
5016 for (int i = 0; i < num_cols; i++) {
5017 double overlap_per_task = 0;
5018 for (int j = 0; j < num_cols; j++) {
5019 if (i!=j) {overlap_per_task = overlap_per_task + task_overlap(i,j);}
5020 }
5021 if (task_overlap(i,i) !=0){
5022 sum_task_overlap = sum_task_overlap + overlap_per_task / (task_overlap(i,i) * (total_task-1));
5023 }
5024 }
5025 }
5026
5027 // now, divide that by number of tasks done and add to the grand sum, weigthed by num_cpus
5028 if (total_task!=0) {
5029 av_task_overlap = av_task_overlap + num_cpus * (double) sum_task_overlap/total_task ;
5030 }
5031 // calculate the first/last postion of a task, the task "spread"
5032 // starting from the top look for the fist command that matters for a task
5033
5034 for (int i = 0; i < num_cols; i++) {
5035 int j = 0;
5036 while (j < max_line) {
5037 if (mod_matrix(i,j) > 0 && task_length[i] == 0 ) {
5038 task_length[i] = j;
5039 break;
5040 }
5041 j++;
5042 }
5043 }
5044
5045 // starting frm the bottom look for the last command that matters for a task
5046 // and substract it from the first to get the task length
5047 // add one in order to account for both the beginning and the end instruction
5048 for (int i = 0; i < num_cols; i++) {
5049 int j = max_line - 1;
5050 while (j > -1) {
5051 if (mod_matrix(i,j) > 0) {
5052 task_length[i] = j - task_length[i] + 1;
5053 break;
5054 }
5055 j--;
5056 }
5057 }
5058 // add the task lengths to the average for the batch
5059 // weigthed by the number of cpus for that genotype
5060 for (int i = 0; i < num_cols; i++) {
5061 av_task_length[i] = av_task_length[i] + num_cpus * task_length[i];
5062 }
5063
5064 // calculate the Standard Deviation in the mean position of the task
5065 for (int i = 0; i < num_cols; i++) {
5066 for (int j = 0; j < max_line; j++) {
5067 if (mod_matrix(i,j)>0) sum[i] = sum[i] + j;
5068 }
5069 }
5070
5071 double temp = 0;
5072 for (int i = 0; i < num_cols; i++) {
5073 if (num_inst[i]>1) {
5074 double av_sum = sum[i]/num_inst[i];
5075 for (int j = 0; j < max_line; j++) {
5076 if (mod_matrix(i,j)>0) temp = (av_sum - j)*(av_sum - j);
5077 }
5078 std_task_position[i] = std_task_position[i] + sqrt(temp/(num_inst[i]-1))*num_cpus;
5079 }
5080 }
5081
5082 for (int i = 0; i < max_line; i++) { inst_task[num_task[i]]++ ;}
5083 for (int i = 0; i < num_cols+1; i++) { av_inst_task[i] = av_inst_task[i] + inst_task[i] * num_cpus;}
5084
5085 }
5086 } // this is the end of the loop going though all the organisms
5087
5088 // make sure there are some organisms doing task in this batch
5089 // if not, return all zeros
5090
5091 if (num_orgs != 0) {
5092 fp << (double) av_length/num_orgs << " "; // 1: average length
5093 fp << (double) av_task/num_orgs << " "; // 2: av. number of tasks done
5094 fp << (double) av_inst/num_orgs << " "; // 3: av. number of sites used for tasks
5095 fp << (double) av_inst_len/num_orgs << " "; // 4: proportion of sites used for tasks
5096 fp << (double) av_task_site/num_orgs << " "; // 5: av. number of tasks per site
5097 fp << (double) av_site_task/num_orgs << " "; // 6: av. number of sites per task
5098 fp << (double) av_t_s_norm/num_orgs << " "; // 7: av. number of tasks per site per task
5099 fp << (double) 1 - av_task_overlap/num_orgs << " "; // 8: av. proportion of a task that DOESN'T overlap
5100 for (int i = 0; i < num_cols; i++) {
5101 if (org_task[i] > 0) fp << std_task_position[i]/org_task[i] << " ";
5102 else fp << 0 << " ";
5103 }
5104 for (int i = 0; i < num_cols; i++) {
5105 if (org_task[i] > 0) fp << (double) av_num_inst[i]/org_task[i] << " ";
5106 else fp << 0 << " ";
5107 }
5108 for (int i = 0; i < num_cols+1; i++) { fp << (double) av_inst_task[i]/num_orgs << " ";}
5109 for (int i = 0; i < num_cols; i++) { fp << (double) av_task_length[i]/num_orgs << " ";}
5110 fp << endl;
5111 }
5112
5113 else {
5114 for (int i = 0; i < 8+4*num_cols+1; i++) {fp << "0 ";}
5115 fp << endl;
5116 }
5117 }
5118
5119
CommandAnalyzeModularity(cString cur_string)5120 void cAnalyze::CommandAnalyzeModularity(cString cur_string)
5121 {
5122 cString filename("analyze_modularity.dat");
5123 if (cur_string.GetSize() != 0) filename = cur_string.PopWord();
5124
5125 cDataFile & df = m_world->GetDataFile(filename);
5126 df.WriteComment( "Modularity Analysis" );
5127 df.WriteTimeStamp();
5128
5129 // Determine which phenotypic traits we're working with
5130 tList< tDataEntryCommand<cAnalyzeGenotype> > output_list;
5131 tListIterator< tDataEntryCommand<cAnalyzeGenotype> > output_it(output_list);
5132 cStringList arg_list(cur_string);
5133 cAnalyzeGenotype::GetDataCommandManager().LoadCommandList(arg_list, output_list);
5134 const int num_traits = output_list.GetSize();
5135
5136 // Loop through all genotypes in this batch.
5137 tListIterator<cAnalyzeGenotype> batch_it(batch[cur_batch].List());
5138 cAnalyzeGenotype * genotype = NULL;
5139 while ((genotype = batch_it.Next()) != NULL) {
5140 const int base_length = genotype->GetLength();
5141 const Genome& base_genome = genotype->GetGenome();
5142 const Sequence& base_seq = base_genome.GetSequence();
5143 Genome mod_genome(base_genome);
5144 Sequence& seq = mod_genome.GetSequence();
5145 genotype->Recalculate(m_ctx);
5146
5147 const cInstruction null_inst = m_world->GetHardwareManager().GetInstSet(base_genome.GetInstSet()).ActivateNullInst();
5148
5149 tMatrix<bool> task_matrix(num_traits, base_length);
5150 tArray<int> num_inst(num_traits); // Number of instructions for each task
5151 tArray<int> num_task(base_length); // Number of traits at each locus
5152 task_matrix.SetAll(false);
5153 num_inst.SetAll(0);
5154 num_task.SetAll(0);
5155
5156 // Loop through all lines in this genome
5157 for (int line_num = 0; line_num < base_length; line_num++) {
5158 int cur_inst = base_seq[line_num].GetOp();
5159
5160 // Determine what happens to this genotype when this line is knocked out
5161 seq[line_num] = null_inst;
5162 cAnalyzeGenotype test_genotype(m_world, mod_genome);
5163 test_genotype.Recalculate(m_ctx);
5164
5165 // Loop through the individual traits
5166 output_it.Reset();
5167 tDataEntryCommand<cAnalyzeGenotype> * data_command = NULL;
5168 int cur_trait = 0;
5169 while ((data_command = output_it.Next()) != NULL) {
5170 const cFlexVar test_value = data_command->GetValue(&test_genotype);
5171
5172 int compare_type = data_command->GetCompareType();
5173 int compare = CompareFlexStat(test_value, data_command->GetValue(genotype), compare_type);
5174
5175 // If knocking out the instruction turns off this trait, mark it in
5176 // the modularity matrix. Only check if the test_genotype replicates,
5177 // i.e. if its fitness is not zeros
5178 if (compare < 0 && test_genotype.GetFitness() != 0) {
5179 task_matrix(cur_trait, line_num) = true;
5180 num_inst[cur_trait]++;
5181 num_task[line_num]++;
5182 // cout << line_num << " : true" << endl;
5183 } else {
5184 // cout << line_num << " : false" << endl;
5185 }
5186 cur_trait++;
5187 }
5188
5189 // Reset the mod_genome back to the original sequence.
5190 seq[line_num].SetOp(cur_inst);
5191 } // end of genotype-phenotype mapping for a single organism
5192
5193
5194 // --== PHYSICAL MODULARITY ==--
5195
5196 double ave_dist = 0.0; // Average distance between sites in traits.
5197
5198 // Loop through each task to calculate its physical modularity
5199 int trait_count = 0; // Count active traits...
5200 int site_count = 0; // Count total sites for all traits...
5201 for (int cur_trait = 0; cur_trait < num_traits; cur_trait++) {
5202 // cout << "Trait " << cur_trait << ", coded for by "
5203 // << num_inst[cur_trait] << " instructions." << endl;
5204
5205 // Ignore traits not coded for in this genome...
5206 if (num_inst[cur_trait] == 0) continue;
5207
5208 // Keep track of how many traits we're examining...
5209 trait_count++;
5210
5211 double trait_dist = 0.0; // Total distance between sites in this trait.
5212 int num_samples = 0; // Count samples we take for this trait.
5213
5214 // Compare all pairs of positions.
5215 for (int pos1 = 0; pos1 < base_length; pos1++) {
5216 if (task_matrix(cur_trait, pos1) == false) continue;
5217 site_count++;
5218 for (int pos2 = pos1+1; pos2 < base_length; pos2++) {
5219 if (task_matrix(cur_trait, pos2) == false) continue;
5220
5221 // We'll only make it this far if both positions code for the trait.
5222 num_samples++;
5223
5224 // Calculate the distance...
5225 int cur_dist = pos2 - pos1;
5226
5227 // Remember to consider that the genome is circular.
5228 if (2*cur_dist > base_length) cur_dist = base_length - cur_dist;
5229
5230 // cout << "Pos " << pos1 << " and " << pos2 << "; distance="
5231 // << cur_dist << endl;
5232
5233 // And add it into the total for this trait.
5234 trait_dist += cur_dist;
5235 }
5236 }
5237
5238 // Assert that we found the correct number of samples.
5239 //assert(num_samples = num_inst(cur_trait) * (num_inst(cur_trait)-1) / 2);
5240
5241 // Now that we have all of the distances for this trait, divide by the
5242 // number of samples and add it to the average.
5243 ave_dist += trait_dist / num_samples;
5244 }
5245
5246
5247 // Now that we've summed up all of the average distances for this
5248 // genotype, calculate the physical modularity.
5249
5250 double PM = 1.0 - (ave_dist / (double) (base_length * trait_count));
5251 double ave_sites = ((double) site_count) / (double) trait_count;
5252
5253 // Write the results to file...
5254 df.Write(PM, "Physical Modularity");
5255 df.Write(trait_count, "Number of traits used in calculation");
5256 df.Write(ave_sites, "Average num sites associated with traits");
5257 df.Write(base_length, "Genome length");
5258 df.Write(ave_dist, "Average Distance between trait sites");
5259 df.Endl();
5260 }
5261
5262 // @CAO CONTINUE HERE
5263 }
5264
5265
5266 // Determine redundancy by calculating the percentage of the lifetimes
5267 // where fitness is decreased over a range of instruction failure probabilities.
5268 // @JEB 9-24-2008
CommandAnalyzeRedundancyByInstFailure(cString cur_string)5269 void cAnalyze::CommandAnalyzeRedundancyByInstFailure(cString cur_string)
5270 {
5271 cout << "Analyzing redundancy by changing instruction failure probability..." << endl;
5272
5273 cString filename("analyze_redundancy_by_inst_failure.dat");
5274 if (cur_string.GetSize() != 0) filename = cur_string.PopWord();
5275 int replicates = 1000;
5276 if (cur_string.GetSize() != 0) replicates = cur_string.PopWord().AsInt();
5277 double log10_start_pr_fail = -4;
5278
5279 // add mode
5280 int mode = 0;
5281 // 0 = average log2 fitness
5282 // 1 = fitness decreased
5283
5284 if (cur_string.GetSize() != 0) log10_start_pr_fail = cur_string.PopWord().AsDouble();
5285 double log10_end_pr_fail = 0;
5286 if (cur_string.GetSize() != 0) log10_end_pr_fail = cur_string.PopWord().AsDouble();
5287 if (log10_end_pr_fail > 0) {
5288 m_world->GetDriver().NotifyWarning("ANALYZE_REDUNDANCY_BY_INST_FAILURE: End log value greater than 0 set to 0.");
5289 }
5290 double log10_step_size_pr_fail = 0.1;
5291 if (cur_string.GetSize() != 0) log10_step_size_pr_fail = cur_string.PopWord().AsDouble();
5292
5293 // Output is one line per organism in the current batch with columns.
5294 cDataFile & df = m_world->GetDataFile(filename);
5295 df.WriteComment( "Redundancy calculated by changing the probability of instruction failure" );
5296 cString s;
5297 s.Set("%i replicates at each chance of instruction failure", replicates);
5298 df.WriteComment(s);
5299 df.WriteTimeStamp();
5300
5301 // Loop through all of the genotypes in this batch...
5302
5303 tListIterator<cAnalyzeGenotype> batch_it(batch[cur_batch].List());
5304 cAnalyzeGenotype* genotype = NULL;
5305 while ((genotype = batch_it.Next()) != NULL) {
5306
5307 if (m_world->GetVerbosity() >= VERBOSE_ON) {
5308 cout << " Determining redundancy by instruction failure for " << genotype->GetName() << endl;
5309 }
5310
5311 const cInstSet& original_inst_set = m_world->GetHardwareManager().GetInstSet(genotype->GetGenome().GetInstSet());
5312 cInstSet* modify_inst_set = new cInstSet(original_inst_set);
5313 cString isname = cString(genotype->GetGenome().GetInstSet()) + ":analyze_redundancy_by_inst_failure";
5314 if (!m_world->GetHardwareManager().RegisterInstSet(isname, modify_inst_set)) {
5315 delete modify_inst_set;
5316 modify_inst_set = &m_world->GetHardwareManager().GetInstSet(isname);
5317 }
5318
5319 // Modify the instruction set to include the current probability of failure.
5320 int num_pr_fail_insts = 0;
5321 for (int j = 0; j < modify_inst_set->GetSize(); j++)
5322 {
5323 cString inst_name = modify_inst_set->GetName(j);
5324 cInstruction inst = modify_inst_set->GetInst(inst_name);
5325 if (original_inst_set.GetProbFail(inst) > 0) num_pr_fail_insts++;
5326 modify_inst_set->SetProbFail(inst, 0);
5327 }
5328 genotype->GetGenome().SetInstSet(isname);
5329
5330 // Avoid unintentional use with no instructions having a chance of failure
5331 if (num_pr_fail_insts == 0) {
5332 m_world->GetDriver().RaiseFatalException(1,"ANALYZE_REDUNDANCY_BY_INST_FAILURE: No instructions have a chance of failure in default instruction set.");
5333 }
5334
5335 // Recalculate the baseline fitness
5336 // May need to calculate multiple times to check for stochastic behavior....
5337 genotype->Recalculate(m_ctx);
5338 double baseline_fitness = genotype->GetFitness();
5339
5340 if (baseline_fitness > 0) {
5341 // Write information for this
5342 df.Write(genotype->GetName(), "genotype name");
5343 df.Write(genotype->GetID(), "genotype id");
5344 df.Write(baseline_fitness, "fitness");
5345
5346 // Run the organism the specified number of replicates
5347 for (double log10_fc = log10_start_pr_fail; log10_fc <= log10_end_pr_fail; log10_fc += log10_step_size_pr_fail) {
5348 double fc = exp(log10_fc*log(10.0));
5349
5350 // Modify the instruction set to include the current probability of failure.
5351 *modify_inst_set = original_inst_set;
5352 for (int j = 0; j < modify_inst_set->GetSize(); j++) {
5353 cString inst_name = modify_inst_set->GetName(j);
5354 cInstruction inst = modify_inst_set->GetInst(inst_name);
5355 if (original_inst_set.GetProbFail(inst) > 0) modify_inst_set->SetProbFail(inst, fc);
5356 }
5357
5358 // Recalculate the requested number of times
5359 double chance = 0;
5360 double avg_fitness = 0;
5361 for (int i = 0; i < replicates; i++) {
5362 genotype->Recalculate(m_ctx);
5363 if (genotype->GetFitness() < baseline_fitness) chance++;
5364 avg_fitness += genotype->GetFitness();
5365 }
5366
5367 if (mode == 0) {
5368 s.Set("Avg fitness when inst prob fail %.3g", fc);
5369 df.Write(avg_fitness/replicates, s);
5370 } else {
5371 s.Set("Fraction of replicates with reduced fitness at inst prob fail %.3g", fc);
5372 df.Write(chance/replicates, s);
5373 }
5374 }
5375 df.Endl();
5376 }
5377 }
5378 }
5379
CommandMapMutations(cString cur_string)5380 void cAnalyze::CommandMapMutations(cString cur_string)
5381 {
5382 cout << "Constructing genome mutations maps..." << endl;
5383
5384 // Load in the variables...
5385 cString directory = PopDirectory(cur_string, "mutations/");
5386 int file_type = FILE_TYPE_TEXT;
5387
5388 cStringList arg_list(cur_string);
5389
5390 // Check for some command specific variables.
5391 if (arg_list.PopString("text") != "") file_type = FILE_TYPE_TEXT;
5392 if (arg_list.PopString("html") != "") file_type = FILE_TYPE_HTML;
5393
5394 // Give some information in verbose mode.
5395 if (m_world->GetVerbosity() >= VERBOSE_ON) {
5396 cout << " outputing as ";
5397 if (file_type == FILE_TYPE_TEXT) cout << "text files." << endl;
5398 else cout << "HTML files." << endl;
5399 }
5400
5401
5402 ///////////////////////////////////////////////////////
5403 // Loop through all of the genotypes in this batch...
5404
5405 tListIterator<cAnalyzeGenotype> batch_it(batch[cur_batch].List());
5406 cAnalyzeGenotype * genotype = NULL;
5407 while ((genotype = batch_it.Next()) != NULL) {
5408 if (m_world->GetVerbosity() >= VERBOSE_ON) {
5409 cout << " Creating mutation map for " << genotype->GetName() << endl;
5410 }
5411
5412 // Construct this filename...
5413 cString filename;
5414 if (file_type == FILE_TYPE_TEXT) {
5415 filename.Set("%smut_map.%s.dat", static_cast<const char*>(directory), static_cast<const char*>(genotype->GetName()));
5416 } else { // if (file_type == FILE_TYPE_HTML) {
5417 filename.Set("%smut_map.%s.html", static_cast<const char*>(directory), static_cast<const char*>(genotype->GetName()));
5418 }
5419 if (m_world->GetVerbosity() >= VERBOSE_ON) {
5420 cout << " Using filename \"" << filename << "\"" << endl;
5421 }
5422 ofstream& fp = m_world->GetDataFileOFStream(filename);
5423
5424 // Calculate the stats for the genotype we're working with...
5425 genotype->Recalculate(m_ctx);
5426 const double base_fitness = genotype->GetFitness();
5427 const int max_line = genotype->GetLength();
5428 const Genome& base_genome = genotype->GetGenome();
5429 const Sequence& base_seq = base_genome.GetSequence();
5430 Genome mod_genome(base_genome);
5431 Sequence& seq = mod_genome.GetSequence();
5432 const cInstSet& inst_set = m_world->GetHardwareManager().GetInstSet(base_genome.GetInstSet());
5433 const int num_insts = inst_set.GetSize();
5434
5435 // Headers...
5436 if (file_type == FILE_TYPE_TEXT) {
5437 fp << "# 1: Genome instruction ID (pre-mutation)" << endl;
5438 for (int i = 0; i < num_insts; i++) {
5439 fp << "# " << i+1 <<": Fit if mutated to '"
5440 << inst_set.GetName(i) << "'" << endl;
5441 }
5442 fp << "# " << num_insts + 2 << ": Knockout" << endl;
5443 fp << "# " << num_insts + 3 << ": Fraction Lethal" << endl;
5444 fp << "# " << num_insts + 4 << ": Fraction Detremental" << endl;
5445 fp << "# " << num_insts + 5 << ": Fraction Neutral" << endl;
5446 fp << "# " << num_insts + 6 << ": Fraction Beneficial" << endl;
5447 fp << "# " << num_insts + 7 << ": Average Fitness" << endl;
5448 fp << "# " << num_insts + 8 << ": Expected Entropy" << endl;
5449 fp << "# " << num_insts + 9 << ": Original Instruction Name" << endl;
5450 fp << endl;
5451
5452 } else { // if (file_type == FILE_TYPE_HTML) {
5453 // Mark file as html
5454 fp << "<html>" << endl;
5455
5456 // Setup the body...
5457 fp << "<body bgcolor=\"#FFFFFF\"" << endl
5458 << " text=\"#000000\"" << endl
5459 << " link=\"#0000AA\"" << endl
5460 << " alink=\"#0000FF\"" << endl
5461 << " vlink=\"#000044\">" << endl
5462 << endl
5463 << "<h1 align=center>Mutation Map for Run " << batch[cur_batch].Name()
5464 << ", ID " << genotype->GetID() << "</h1>" << endl
5465 << "<center>" << endl
5466 << endl;
5467
5468 // The main chart...
5469 fp << "<table border=1 cellpadding=2>" << endl;
5470
5471 // The headings...///
5472 fp << "<tr><th>Genome ";
5473 for (int i = 0; i < num_insts; i++) {
5474 fp << "<th>" << inst_set.GetName(i) << " ";
5475 }
5476 fp << "<th>Knockout ";
5477 fp << "<th>Frac. Lethal ";
5478 fp << "<th>Frac. Detremental ";
5479 fp << "<th>Frac. Neutral ";
5480 fp << "<th>Frac. Beneficial ";
5481 fp << "<th>Ave. Fitness ";
5482 fp << "<th>Expected Entropy ";
5483 fp << "</tr>" << endl << endl;
5484 }
5485
5486
5487 // Keep track of the number of mutations in each category...
5488 int total_dead = 0, total_neg = 0, total_neut = 0, total_pos = 0;
5489 double total_fitness = 0.0;
5490 tArray<double> col_fitness(num_insts + 1);
5491 col_fitness.SetAll(0.0);
5492
5493 const cInstruction null_inst = m_world->GetHardwareManager().GetInstSet(base_genome.GetInstSet()).ActivateNullInst();
5494
5495 cString color_string; // For coloring cells...
5496
5497 // Loop through all the lines of code, testing all mutations...
5498 for (int line_num = 0; line_num < max_line; line_num++) {
5499 int cur_inst = base_seq[line_num].GetOp();
5500 char cur_symbol = base_seq[line_num].GetSymbol();
5501 int row_dead = 0, row_neg = 0, row_neut = 0, row_pos = 0;
5502 double row_fitness = 0.0;
5503
5504 // Column 1... the original instruction in the geneome.
5505 if (file_type == FILE_TYPE_HTML) {
5506 fp << "<tr><td align=right>" << inst_set.GetName(cur_inst)
5507 << " (" << cur_symbol << ") ";
5508 } else {
5509 fp << cur_inst << " ";
5510 }
5511
5512 // Columns 2 to D+1 (the possible mutations)
5513 for (int mod_inst = 0; mod_inst < num_insts; mod_inst++)
5514 {
5515 if (mod_inst == cur_inst) {
5516 if (file_type == FILE_TYPE_HTML) {
5517 color_string = "#FFFFFF";
5518 fp << "<th bgcolor=\"" << color_string << "\">";
5519 }
5520 }
5521 else {
5522 seq[line_num].SetOp(mod_inst);
5523 cAnalyzeGenotype test_genotype(m_world, mod_genome);
5524 test_genotype.Recalculate(m_ctx);
5525 const double test_fitness = test_genotype.GetFitness() / base_fitness;
5526 row_fitness += test_fitness;
5527 total_fitness += test_fitness;
5528 col_fitness[mod_inst] += test_fitness;
5529
5530 // Categorize this mutation...
5531 if (test_fitness == 1.0) { // Neutral Mutation...
5532 row_neut++;
5533 total_neut++;
5534 if (file_type == FILE_TYPE_HTML) color_string = m_world->GetConfig().COLOR_MUT_NEUT.Get();
5535 } else if (test_fitness == 0.0) { // Lethal Mutation...
5536 row_dead++;
5537 total_dead++;
5538 if (file_type == FILE_TYPE_HTML) color_string = m_world->GetConfig().COLOR_MUT_LETHAL.Get();
5539 } else if (test_fitness < 1.0) { // Detrimental Mutation...
5540 row_neg++;
5541 total_neg++;
5542 if (file_type == FILE_TYPE_HTML) color_string = m_world->GetConfig().COLOR_MUT_NEG.Get();
5543 } else { // Beneficial Mutation...
5544 row_pos++;
5545 total_pos++;
5546 if (file_type == FILE_TYPE_HTML) color_string = m_world->GetConfig().COLOR_MUT_POS.Get();
5547 }
5548
5549 // Write out this cell...
5550 if (file_type == FILE_TYPE_HTML) {
5551 fp << "<th bgcolor=\"" << color_string << "\">";
5552 }
5553 fp << test_fitness << " ";
5554 }
5555 }
5556
5557 // Column: Knockout
5558 seq[line_num] = null_inst;
5559 cAnalyzeGenotype test_genotype(m_world, mod_genome);
5560 test_genotype.Recalculate(m_ctx);
5561 const double test_fitness = test_genotype.GetFitness() / base_fitness;
5562 col_fitness[num_insts] += test_fitness;
5563
5564 // Categorize this mutation if its in HTML mode (color only)...
5565 if (file_type == FILE_TYPE_HTML) {
5566 if (test_fitness == 1.0) color_string = m_world->GetConfig().COLOR_MUT_NEUT.Get();
5567 else if (test_fitness == 0.0) color_string = m_world->GetConfig().COLOR_MUT_LETHAL.Get();
5568 else if (test_fitness < 1.0) color_string = m_world->GetConfig().COLOR_MUT_NEG.Get();
5569 else color_string = m_world->GetConfig().COLOR_MUT_POS.Get();
5570
5571 fp << "<th bgcolor=\"" << color_string << "\">";
5572 }
5573
5574 fp << test_fitness << " ";
5575
5576 // Fraction Columns...
5577 if (file_type == FILE_TYPE_HTML) fp << "<th bgcolor=\"#" << m_world->GetConfig().COLOR_MUT_LETHAL.Get() << "\">";
5578 fp << (double) row_dead / (double) (num_insts-1) << " ";
5579
5580 if (file_type == FILE_TYPE_HTML) fp << "<th bgcolor=\"#" << m_world->GetConfig().COLOR_MUT_NEG.Get() << "\">";
5581 fp << (double) row_neg / (double) (num_insts-1) << " ";
5582
5583 if (file_type == FILE_TYPE_HTML) fp << "<th bgcolor=\"#" << m_world->GetConfig().COLOR_MUT_NEUT.Get() << "\">";
5584 fp << (double) row_neut / (double) (num_insts-1) << " ";
5585
5586 if (file_type == FILE_TYPE_HTML) fp << "<th bgcolor=\"#" << m_world->GetConfig().COLOR_MUT_POS.Get() << "\">";
5587 fp << (double) row_pos / (double) (num_insts-1) << " ";
5588
5589
5590 // Column: Average Fitness
5591 if (file_type == FILE_TYPE_HTML) fp << "<th>";
5592 fp << row_fitness / (double) (num_insts-1) << " ";
5593
5594 // Column: Expected Entropy @CAO Implement!
5595 if (file_type == FILE_TYPE_HTML) fp << "<th>";
5596 fp << 0.0 << " ";
5597
5598 // End this row...
5599 if (file_type == FILE_TYPE_HTML) fp << "</tr>";
5600 fp << endl;
5601
5602 // Reset the mod_genome back to the original sequence.
5603 seq[line_num].SetOp(cur_inst);
5604 }
5605
5606
5607 // Construct the final line of the table with all totals...
5608 if (file_type == FILE_TYPE_HTML) {
5609 fp << "<tr><th>Totals";
5610
5611 // Instructions + Knockout
5612 for (int i = 0; i <= num_insts; i++) {
5613 fp << "<th>" << col_fitness[i] / max_line << " ";
5614 }
5615
5616 int total_tests = max_line * (num_insts-1);
5617 fp << "<th>" << (double) total_dead / (double) total_tests << " ";
5618 fp << "<th>" << (double) total_neg / (double) total_tests << " ";
5619 fp << "<th>" << (double) total_neut / (double) total_tests << " ";
5620 fp << "<th>" << (double) total_pos / (double) total_tests << " ";
5621 fp << "<th>" << total_fitness / (double) total_tests << " ";
5622 fp << "<th>" << 0.0 << " ";
5623
5624
5625 // And close everything up...
5626 fp << "</table>" << endl
5627 << "</center>" << endl;
5628 }
5629 }
5630 }
5631
5632
CommandMapDepth(cString cur_string)5633 void cAnalyze::CommandMapDepth(cString cur_string)
5634 {
5635 cout << "Constructing depth map..." << endl;
5636
5637 cString filename("depth_map.dat");
5638 if (cur_string.GetSize() != 0) filename = cur_string.PopWord();
5639
5640 int min_batch = 0;
5641 int max_batch = cur_batch - 1;
5642
5643 if (cur_string.GetSize() != 0) min_batch = cur_string.PopWord().AsInt();
5644 if (cur_string.GetSize() != 0) max_batch = cur_string.PopWord().AsInt();
5645
5646 // First, scan all of the batches to find the maximum depth.
5647 int max_depth = -1;
5648 cAnalyzeGenotype * genotype;
5649 for (int i = min_batch; i <= max_batch; i++) {
5650 tListIterator<cAnalyzeGenotype> list_it(batch[i].List());
5651 while ((genotype = list_it.Next()) != NULL) {
5652 if (genotype->GetDepth() > max_depth) max_depth = genotype->GetDepth();
5653 }
5654 }
5655
5656 cout << "max_depth = " << max_depth << endl;
5657
5658 ofstream& fp = m_world->GetDataFileOFStream(filename);
5659
5660 cout << "Output to " << filename << endl;
5661 tArray<int> depth_array(max_depth+1);
5662 for (cur_batch = min_batch; cur_batch <= max_batch; cur_batch++) {
5663 depth_array.SetAll(0);
5664 tListIterator<cAnalyzeGenotype> list_it(batch[cur_batch].List());
5665 while ((genotype = list_it.Next()) != NULL) {
5666 const int cur_depth = genotype->GetDepth();
5667 const int cur_count = genotype->GetNumCPUs();
5668 depth_array[cur_depth] += cur_count;
5669 }
5670
5671 for (int i = 0; i <= max_depth; i++) {
5672 fp << depth_array[i] << " ";
5673 }
5674 fp << endl;
5675 }
5676 }
5677
CommandHamming(cString cur_string)5678 void cAnalyze::CommandHamming(cString cur_string)
5679 {
5680 cString filename("hamming.dat");
5681 if (cur_string.GetSize() != 0) filename = cur_string.PopWord();
5682
5683 int batch1 = PopBatch(cur_string.PopWord());
5684 int batch2 = PopBatch(cur_string.PopWord());
5685
5686 // We want batch2 to be the larger one for efficiency...
5687 if (batch[batch1].List().GetSize() > batch[batch2].List().GetSize()) {
5688 int tmp = batch1; batch1 = batch2; batch2 = tmp;
5689 }
5690
5691 if (m_world->GetVerbosity() <= VERBOSE_NORMAL) {
5692 cout << "Calculating Hamming Distance... ";
5693 cout.flush();
5694 } else {
5695 cout << "Calculating Hamming Distance between batches "
5696 << batch1 << " and " << batch2 << endl;
5697 cout.flush();
5698 }
5699
5700 // Setup some variables;
5701 cAnalyzeGenotype * genotype1 = NULL;
5702 cAnalyzeGenotype * genotype2 = NULL;
5703 int total_dist = 0;
5704 int total_count = 0;
5705
5706 tListIterator<cAnalyzeGenotype> list1_it(batch[batch1].List());
5707 tListIterator<cAnalyzeGenotype> list2_it(batch[batch2].List());
5708
5709 while ((genotype1 = list1_it.Next()) != NULL) {
5710 list2_it.Reset();
5711 while ((genotype2 = list2_it.Next()) != NULL) {
5712 // Determine the counts...
5713 const int count1 = genotype1->GetNumCPUs();
5714 const int count2 = genotype2->GetNumCPUs();
5715 const int num_pairs = (genotype1 == genotype2) ?
5716 ((count1 - 1) * (count2 - 1)) : (count1 * count2);
5717 if (num_pairs == 0) continue;
5718
5719 // And do the tests...
5720 const int dist = Sequence::FindHammingDistance(genotype1->GetGenome().GetSequence(), genotype2->GetGenome().GetSequence());
5721 total_dist += dist * num_pairs;
5722 total_count += num_pairs;
5723 }
5724 }
5725
5726
5727 // Calculate the final answer
5728 double ave_dist = (double) total_dist / (double) total_count;
5729 cout << " ave distance = " << ave_dist << endl;
5730
5731 cDataFile & df = m_world->GetDataFile(filename);
5732
5733 df.WriteComment( "Hamming distance information" );
5734 df.WriteTimeStamp();
5735
5736 df.Write(batch[batch1].Name(), "Name of First Batch");
5737 df.Write(batch[batch2].Name(), "Name of Second Batch");
5738 df.Write(ave_dist, "Average Hamming Distance");
5739 df.Write(total_count, "Total Pairs Test");
5740 df.Endl();
5741 }
5742
CommandLevenstein(cString cur_string)5743 void cAnalyze::CommandLevenstein(cString cur_string)
5744 {
5745 cString filename("lev.dat");
5746 if (cur_string.GetSize() != 0) filename = cur_string.PopWord();
5747
5748 int batch1 = PopBatch(cur_string.PopWord());
5749 int batch2 = PopBatch(cur_string.PopWord());
5750
5751 // We want batch2 to be the larger one for efficiency...
5752 if (batch[batch1].List().GetSize() > batch[batch2].List().GetSize()) {
5753 int tmp = batch1; batch1 = batch2; batch2 = tmp;
5754 }
5755
5756 if (m_world->GetVerbosity() <= VERBOSE_NORMAL) {
5757 cout << "Calculating Levenstein Distance... ";
5758 cout.flush();
5759 } else {
5760 cout << "Calculating Levenstein Distance between batch "
5761 << batch1 << " and " << batch2 << endl;
5762 cout.flush();
5763 }
5764
5765 // Setup some variables;
5766 cAnalyzeGenotype * genotype1 = NULL;
5767 cAnalyzeGenotype * genotype2 = NULL;
5768 int total_dist = 0;
5769 int total_count = 0;
5770
5771 tListIterator<cAnalyzeGenotype> list1_it(batch[batch1].List());
5772 tListIterator<cAnalyzeGenotype> list2_it(batch[batch2].List());
5773
5774 // Loop through all of the genotypes in each batch...
5775 while ((genotype1 = list1_it.Next()) != NULL) {
5776 list2_it.Reset();
5777 while ((genotype2 = list2_it.Next()) != NULL) {
5778 // Determine the counts...
5779 const int count1 = genotype1->GetNumCPUs();
5780 const int count2 = genotype2->GetNumCPUs();
5781 const int num_pairs = (genotype1 == genotype2) ?
5782 ((count1 - 1) * (count2 - 1)) : (count1 * count2);
5783 if (num_pairs == 0) continue;
5784
5785 // And do the tests...
5786 const int dist = Sequence::FindEditDistance(genotype1->GetGenome().GetSequence(),
5787 genotype2->GetGenome().GetSequence());
5788 total_dist += dist * num_pairs;
5789 total_count += num_pairs;
5790 }
5791 }
5792
5793 // Calculate the final answer
5794 double ave_dist = (double) total_dist / (double) total_count;
5795 cout << " ave distance = " << ave_dist << endl;
5796
5797 cDataFile & df = m_world->GetDataFile(filename);
5798
5799 df.WriteComment( "Levenstein distance information" );
5800 df.WriteTimeStamp();
5801
5802 df.Write(batch[batch1].Name(), "Name of First Batch");
5803 df.Write(batch[batch2].Name(), "Name of Second Batch");
5804 df.Write(ave_dist, "Average Levenstein Distance");
5805 df.Write(total_count, "Total Pairs Test");
5806 df.Endl();
5807 }
5808
CommandSpecies(cString cur_string)5809 void cAnalyze::CommandSpecies(cString cur_string)
5810 {
5811 cString filename("species.dat");
5812 if (cur_string.GetSize() != 0) filename = cur_string.PopWord();
5813
5814 int batch1 = PopBatch(cur_string.PopWord());
5815 int batch2 = PopBatch(cur_string.PopWord());
5816 int num_compare = PopBatch(cur_string.PopWord());
5817
5818 // We want batch2 to be the larger one for efficiency...
5819 if (batch[batch1].List().GetSize() > batch[batch2].List().GetSize()) {
5820 int tmp = batch1; batch1 = batch2; batch2 = tmp;
5821 }
5822
5823 if (m_world->GetVerbosity() <= VERBOSE_NORMAL) cout << "Calculating Species Distance... " << endl;
5824 else cout << "Calculating Species Distance between batch "
5825 << batch1 << " and " << batch2 << endl;
5826
5827 // Setup some variables;
5828 cAnalyzeGenotype * genotype1 = NULL;
5829 cAnalyzeGenotype * genotype2 = NULL;
5830 int total_fail = 0;
5831 int total_count = 0;
5832
5833 cTestCPU* testcpu = m_world->GetHardwareManager().CreateTestCPU(m_ctx);
5834
5835 tListIterator<cAnalyzeGenotype> list1_it(batch[batch1].List());
5836 tListIterator<cAnalyzeGenotype> list2_it(batch[batch2].List());
5837
5838 // Loop through all of the genotypes in each batch...
5839 while ((genotype1 = list1_it.Next()) != NULL) {
5840 list2_it.Reset();
5841 while ((genotype2 = list2_it.Next()) != NULL) {
5842 // Determine the counts...
5843 const int count1 = genotype1->GetNumCPUs();
5844 const int count2 = genotype2->GetNumCPUs();
5845 int num_pairs = count1 * count2;
5846 int fail_count = 0;
5847 bool cross1_viable = true;
5848 bool cross2_viable = true;
5849
5850
5851 if (genotype1 == genotype2) {
5852 total_count += num_pairs * 2 * num_compare;
5853 }
5854 else {
5855 assert(num_compare!=0);
5856 // And do the tests...
5857 for (int iter=1; iter < num_compare; iter++) {
5858 Genome test_genome0 = genotype1->GetGenome();
5859 Genome test_genome1 = genotype2->GetGenome();
5860
5861 double start_frac = m_world->GetRandom().GetDouble();
5862 double end_frac = m_world->GetRandom().GetDouble();
5863 if (start_frac > end_frac) Swap(start_frac, end_frac);
5864
5865 int start0 = (int) (start_frac * (double) test_genome0.GetSize());
5866 int end0 = (int) (end_frac * (double) test_genome0.GetSize());
5867 int start1 = (int) (start_frac * (double) test_genome1.GetSize());
5868 int end1 = (int) (end_frac * (double) test_genome1.GetSize());
5869 assert( start0 >= 0 && start0 < test_genome0.GetSize() );
5870 assert( end0 >= 0 && end0 < test_genome0.GetSize() );
5871 assert( start1 >= 0 && start1 < test_genome1.GetSize() );
5872 assert( end1 >= 0 && end1 < test_genome1.GetSize() );
5873
5874 // Calculate size of sections crossing over...
5875 int size0 = end0 - start0;
5876 int size1 = end1 - start1;
5877
5878 int new_size0 = test_genome0.GetSize() - size0 + size1;
5879 int new_size1 = test_genome1.GetSize() - size1 + size0;
5880
5881 // Don't Crossover if offspring will be illegal!!!
5882 if (new_size0 < MIN_GENOME_LENGTH || new_size0 > MAX_GENOME_LENGTH ||
5883 new_size1 < MIN_GENOME_LENGTH || new_size1 > MAX_GENOME_LENGTH) {
5884 fail_count +=2;
5885 break;
5886 }
5887
5888 // Swap the components
5889 Sequence cross0 = test_genome0.GetSequence().Crop(start0, end0);
5890 Sequence cross1 = test_genome1.GetSequence().Crop(start1, end1);
5891 test_genome0.GetSequence().Replace(start0, size0, cross1);
5892 test_genome1.GetSequence().Replace(start1, size1, cross0);
5893
5894 // Run each side, and determine viability...
5895 cCPUTestInfo test_info;
5896 testcpu->TestGenome(m_ctx, test_info, test_genome0);
5897 cross1_viable = test_info.IsViable();
5898
5899 testcpu->TestGenome(m_ctx, test_info, test_genome1);
5900 cross2_viable = test_info.IsViable();
5901
5902 if (cross1_viable == false) fail_count++;
5903 if (cross2_viable == false) fail_count++;
5904 }
5905
5906 total_fail += fail_count * num_pairs;
5907 total_count += num_pairs * 2 * num_compare;
5908 }
5909 }
5910 }
5911
5912 delete testcpu;
5913
5914 // Calculate the final answer
5915 double ave_dist = (double) total_fail / (double) total_count;
5916 cout << " ave distance = " << ave_dist << " in " << total_count << " tests." << endl;
5917
5918 cDataFile& df = m_world->GetDataFile(filename);
5919
5920 df.WriteComment( "Species information" );
5921 df.WriteTimeStamp();
5922
5923 df.Write(batch[batch1].Name(), "Name of First Batch");
5924 df.Write(batch[batch2].Name(), "Name of Second Batch");
5925 df.Write(ave_dist, "Average Species Distance");
5926 df.Write(total_count, "Total Recombinants tested");
5927 df.Endl();
5928 }
5929
CommandRecombine(cString cur_string)5930 void cAnalyze::CommandRecombine(cString cur_string)
5931 {
5932 int batch1 = PopBatch(cur_string.PopWord());
5933 int batch2 = PopBatch(cur_string.PopWord());
5934 int batch3 = PopBatch(cur_string.PopWord());
5935 int num_compare = PopBatch(cur_string.PopWord());
5936
5937 // We want batch2 to be the larger one for efficiency...
5938 if (batch[batch1].List().GetSize() > batch[batch2].List().GetSize()) {
5939 int tmp = batch1; batch1 = batch2; batch2 = tmp;
5940 }
5941
5942 if (m_world->GetVerbosity() <= VERBOSE_NORMAL) cout << "Creating recombinants... " << endl;
5943 else cout << "Creating recombinants between batch "
5944 << batch1 << " and " << batch2 << endl;
5945
5946 // Setup some variables;
5947 cAnalyzeGenotype * genotype1 = NULL;
5948 cAnalyzeGenotype * genotype2 = NULL;
5949
5950 tListIterator<cAnalyzeGenotype> list1_it(batch[batch1].List());
5951 tListIterator<cAnalyzeGenotype> list2_it(batch[batch2].List());
5952
5953 // Loop through all of the genotypes in each batch...
5954 while ((genotype1 = list1_it.Next()) != NULL) {
5955 list2_it.Reset();
5956 while ((genotype2 = list2_it.Next()) != NULL) {
5957 // Determine the counts...
5958 int fail_count = 0;
5959
5960
5961 assert(num_compare!=0);
5962 // And do the tests...
5963 for (int iter=1; iter < num_compare; iter++) {
5964 Genome test_genome0 = genotype1->GetGenome();
5965 Genome test_genome1 = genotype2->GetGenome();
5966
5967 double start_frac = m_world->GetRandom().GetDouble();
5968 double end_frac = m_world->GetRandom().GetDouble();
5969 if (start_frac > end_frac) Swap(start_frac, end_frac);
5970
5971 int start0 = (int) (start_frac * (double) test_genome0.GetSize());
5972 int end0 = (int) (end_frac * (double) test_genome0.GetSize());
5973 int start1 = (int) (start_frac * (double) test_genome1.GetSize());
5974 int end1 = (int) (end_frac * (double) test_genome1.GetSize());
5975 assert( start0 >= 0 && start0 < test_genome0.GetSize() );
5976 assert( end0 >= 0 && end0 < test_genome0.GetSize() );
5977 assert( start1 >= 0 && start1 < test_genome1.GetSize() );
5978 assert( end1 >= 0 && end1 < test_genome1.GetSize() );
5979
5980 // Calculate size of sections crossing over...
5981 int size0 = end0 - start0;
5982 int size1 = end1 - start1;
5983
5984 int new_size0 = test_genome0.GetSize() - size0 + size1;
5985 int new_size1 = test_genome1.GetSize() - size1 + size0;
5986
5987 // Don't Crossover if offspring will be illegal!!!
5988 if (new_size0 < MIN_GENOME_LENGTH || new_size0 > MAX_GENOME_LENGTH ||
5989 new_size1 < MIN_GENOME_LENGTH || new_size1 > MAX_GENOME_LENGTH) {
5990 fail_count +=2;
5991 break;
5992 }
5993
5994 if (size0 > 0 && size1 > 0) {
5995 Sequence cross0 = test_genome0.GetSequence().Crop(start0, end0);
5996 Sequence cross1 = test_genome1.GetSequence().Crop(start1, end1);
5997 test_genome0.GetSequence().Replace(start0, size0, cross1);
5998 test_genome1.GetSequence().Replace(start1, size1, cross0);
5999 }
6000 else if (size0 > 0) {
6001 Sequence cross0 = test_genome0.GetSequence().Crop(start0, end0);
6002 test_genome1.GetSequence().Replace(start1, size1, cross0);
6003 }
6004 else if (size1 > 0) {
6005 Sequence cross1 = test_genome1.GetSequence().Crop(start1, end1);
6006 test_genome0.GetSequence().Replace(start0, size0, cross1);
6007 }
6008
6009 cAnalyzeGenotype* new_genotype0 = new cAnalyzeGenotype(m_world, test_genome0);
6010 cAnalyzeGenotype* new_genotype1 = new cAnalyzeGenotype(m_world, test_genome1);
6011 new_genotype0->SetNumCPUs(1);
6012 new_genotype1->SetNumCPUs(1);
6013 new_genotype0->SetID(0);
6014 new_genotype1->SetID(0);
6015 new_genotype0->SetName("noname");
6016 new_genotype1->SetName("noname");
6017 new_genotype0->SetParentID(genotype1->GetID()); //@CHC: Want to keep track of which two parents generated this offspring
6018 new_genotype0->SetParent2ID(genotype2->GetID());
6019 new_genotype1->SetParentID(genotype1->GetID());
6020 new_genotype1->SetParent2ID(genotype2->GetID());
6021
6022 batch[batch3].List().PushRear(new_genotype0);
6023 batch[batch3].List().PushRear(new_genotype1);
6024
6025 //batch[batch3].List().PushRear(new cAnalyzeGenotype(test_genome0, inst_set));
6026 //batch[batch3].List().PushRear(new cAnalyzeGenotype(test_genome1, inst_set));
6027
6028 }
6029 }
6030 }
6031 }
6032
CommandRecombineSample(cString cur_string)6033 void cAnalyze::CommandRecombineSample(cString cur_string)
6034 {
6035 int batch1 = PopBatch(cur_string.PopWord());
6036 int batch2 = PopBatch(cur_string.PopWord());
6037 int batch3 = PopBatch(cur_string.PopWord());
6038 int num_compare = PopBatch(cur_string.PopWord());
6039
6040
6041 if (m_world->GetVerbosity() <= VERBOSE_NORMAL) cout << "Creating recombinants... " << endl;
6042 else cout << "Creating recombinants between batch "
6043 << batch1 << " and " << batch2 << endl;
6044
6045 // Setup some variables;
6046 cAnalyzeGenotype * genotype1 = NULL;
6047 cAnalyzeGenotype * genotype2 = NULL;
6048
6049 //Loop through X number of genotypes
6050 for (int i = 1; i <= num_compare; i++) {
6051 genotype1 = batch[batch1].FindGenotypeRandom(m_world->GetRandom());
6052 genotype2 = batch[batch2].FindGenotypeRandom(m_world->GetRandom());
6053
6054 //50% chance of swapping genotype1 and genotype2 so that we don't always end up with
6055 // the same batch contributing the "ends" of the genome to the offspring
6056 if (m_world->GetRandom().P(0.5)) {
6057 cAnalyzeGenotype * temp = genotype1;
6058 genotype1 = genotype2;
6059 genotype2 = temp;
6060 }
6061
6062 int fail_count = 0;
6063
6064 Genome test_genome0 = genotype1->GetGenome();
6065 Genome test_genome1 = genotype2->GetGenome();
6066
6067 double start_frac = m_world->GetRandom().GetDouble();
6068 double end_frac = m_world->GetRandom().GetDouble();
6069 if (start_frac > end_frac) Swap(start_frac, end_frac);
6070
6071 int start0 = (int) (start_frac * (double) test_genome0.GetSize());
6072 int end0 = (int) (end_frac * (double) test_genome0.GetSize());
6073 int start1 = (int) (start_frac * (double) test_genome1.GetSize());
6074 int end1 = (int) (end_frac * (double) test_genome1.GetSize());
6075 assert( start0 >= 0 && start0 < test_genome0.GetSize() );
6076 assert( end0 >= 0 && end0 < test_genome0.GetSize() );
6077 assert( start1 >= 0 && start1 < test_genome1.GetSize() );
6078 assert( end1 >= 0 && end1 < test_genome1.GetSize() );
6079
6080 // Calculate size of sections crossing over...
6081 int size0 = end0 - start0;
6082 int size1 = end1 - start1;
6083
6084 int new_size0 = test_genome0.GetSize() - size0 + size1;
6085 int new_size1 = test_genome1.GetSize() - size1 + size0;
6086
6087 // Don't Crossover if offspring will be illegal!!!
6088 if (new_size0 < MIN_GENOME_LENGTH || new_size0 > MAX_GENOME_LENGTH ||
6089 new_size1 < MIN_GENOME_LENGTH || new_size1 > MAX_GENOME_LENGTH) {
6090 fail_count +=2;
6091 break;
6092 }
6093
6094 if (size0 > 0 && size1 > 0) {
6095 Sequence cross0 = test_genome0.GetSequence().Crop(start0, end0);
6096 Sequence cross1 = test_genome1.GetSequence().Crop(start1, end1);
6097 test_genome0.GetSequence().Replace(start0, size0, cross1);
6098 test_genome1.GetSequence().Replace(start1, size1, cross0);
6099 }
6100 else if (size0 > 0) {
6101 Sequence cross0 = test_genome0.GetSequence().Crop(start0, end0);
6102 test_genome1.GetSequence().Replace(start1, size1, cross0);
6103 }
6104 else if (size1 > 0) {
6105 Sequence cross1 = test_genome1.GetSequence().Crop(start1, end1);
6106 test_genome0.GetSequence().Replace(start0, size0, cross1);
6107 }
6108
6109 cAnalyzeGenotype* new_genotype0 = new cAnalyzeGenotype(m_world, test_genome0);
6110 //cAnalyzeGenotype* new_genotype1 = new cAnalyzeGenotype(m_world, test_genome1);
6111 new_genotype0->SetNumCPUs(1);
6112 //new_genotype1->SetNumCPUs(1);
6113 new_genotype0->SetID(0);
6114 //new_genotype1->SetID(0);
6115 new_genotype0->SetName("noname");
6116 //new_genotype1->SetName("noname");
6117 new_genotype0->SetParentID(genotype1->GetID()); //@CHC: Want to keep track of which two parents generated this offspring
6118 new_genotype0->SetParent2ID(genotype2->GetID());
6119 //new_genotype1->SetParentID(genotype1->GetID());
6120 //new_genotype1->SetParent2ID(genotype2->GetID());
6121
6122 batch[batch3].List().PushRear(new_genotype0);
6123 //batch[batch3].List().PushRear(new_genotype1);
6124
6125 }
6126
6127 }
6128
6129 // This command will mutate a single locus in every single organism in the current batch
CommandMutagenize(cString cur_string)6130 void cAnalyze::CommandMutagenize(cString cur_string)
6131 {
6132
6133 // Loop through all the genomes in the current batch
6134
6135 tListIterator<cAnalyzeGenotype> batch_it(batch[cur_batch].List());
6136 cAnalyzeGenotype* genotype = NULL;
6137
6138 while ((genotype = batch_it.Next()) != NULL) {
6139
6140 //Add a mutation to it
6141 const int max_line = genotype->GetLength();
6142 Genome& cur_genome = genotype->GetGenome();
6143 Sequence& cur_seq = cur_genome.GetSequence();
6144 const cInstSet& inst_set = m_world->GetHardwareManager().GetInstSet(cur_genome.GetInstSet());
6145
6146 int line_num = m_ctx.GetRandom().GetInt(cur_genome.GetSize());
6147
6148 cur_seq[line_num] = inst_set.GetRandomInst(m_ctx); // Replace it with a random instruction
6149
6150 }
6151
6152 }
6153
CommandAlign(cString cur_string)6154 void cAnalyze::CommandAlign(cString cur_string)
6155 {
6156 // Align does not need any args yet.
6157 (void) cur_string;
6158
6159 cout << "Aligning sequences..." << endl;
6160
6161 if (batch[cur_batch].IsLineage() == false && m_world->GetVerbosity() >= VERBOSE_ON) {
6162 cerr << " Warning: sequences may not be a consecutive lineage."
6163 << endl;
6164 }
6165
6166 // Create an array of all the sequences we need to align.
6167 tListPlus<cAnalyzeGenotype> & glist = batch[cur_batch].List();
6168 tListIterator<cAnalyzeGenotype> batch_it(glist);
6169 const int num_sequences = glist.GetSize();
6170 cString * sequences = new cString[num_sequences];
6171
6172 // Move through each sequence and update it.
6173 batch_it.Reset();
6174 cString diff_info;
6175 for (int i = 0; i < num_sequences; i++) {
6176 sequences[i] = batch_it.Next()->GetGenome().GetSequence().AsString();
6177 if (i == 0) continue;
6178 // Track of the number of insertions and deletions to shift properly.
6179 int num_ins = 0;
6180 int num_del = 0;
6181
6182 // Compare each string to the previous.
6183 cStringUtil::EditDistance(sequences[i], sequences[i-1], diff_info, '_');
6184
6185 while (diff_info.GetSize() != 0) {
6186 cString cur_mut = diff_info.Pop(',');
6187 const char mut_type = cur_mut[0];
6188 cur_mut.ClipFront(1); cur_mut.ClipEnd(1);
6189 int position = cur_mut.AsInt();
6190
6191 // Nothing to do with Mutations
6192 if (mut_type == 'M') continue;
6193
6194 // Handle insertions...
6195 if (mut_type == 'I') {
6196 // Loop back and insert an '_' into all previous sequences.
6197 for (int j = 0; j < i; j++) {
6198 sequences[j].Insert('_', position + num_del);
6199 }
6200 num_ins++;
6201 }
6202
6203 // Handle Deletions...
6204 else if (mut_type == 'D') {
6205 // Insert '_' into the current sequence at the point of deletions.
6206 sequences[i].Insert("_", position + num_ins);
6207 num_del++;
6208 }
6209
6210 }
6211 }
6212
6213 batch_it.Reset();
6214 for (int i = 0; i < num_sequences; i++) {
6215 batch_it.Next()->SetAlignedSequence(sequences[i]);
6216 }
6217
6218 // Cleanup
6219 delete [] sequences;
6220
6221 // Adjust the flags on this batch
6222 // batch[cur_batch].SetLineage(false);
6223 batch[cur_batch].SetAligned(true);
6224 }
6225
6226 // Now this command do not consider changing environment
6227 // and only work for lineage and fixed-length runs.
AnalyzeNewInfo(cString cur_string)6228 void cAnalyze::AnalyzeNewInfo(cString cur_string)
6229 {
6230 cout << "Analyze new information in child about environment ..." << endl;
6231
6232 // Load in the variables
6233 int words = cur_string.CountNumWords();
6234 if (words < 1) {
6235 cout << "This command requires mutation rate, skipping." << endl;
6236 return;
6237 }
6238
6239 // Get the mutation rate ...
6240 double mu = cur_string.PopWord().AsDouble();
6241
6242 // Create the directory using the string given as the second argument
6243 cString dir = cur_string.PopWord();
6244 cString defaultDir = "newinfo/";
6245 cString directory = PopDirectory(dir, defaultDir);
6246
6247 ///////////////////////////////////////////////////////
6248 // Loop through all of the genotypes in this batch...
6249
6250 if (batch[cur_batch].IsLineage() != true) {
6251 cout << "This command requires the lineage in the batch, skipping.\n";
6252 return;
6253 }
6254
6255 cString newinfo_fn;
6256 newinfo_fn.Set("%s%s.newinfo.dat", static_cast<const char*>(directory), "lineage");
6257 ofstream& newinfo_fp = m_world->GetDataFileOFStream(newinfo_fn);
6258
6259 newinfo_fp << "# Legend:" << endl;
6260 newinfo_fp << "# 1:Child Genotype ID" << endl;
6261 newinfo_fp << "# 2:Parent Genotype ID" << endl;
6262 newinfo_fp << "# 3:Information of Child about Environment I(C:E)" << endl;
6263 newinfo_fp << "# 4:Information of Parent about Environment I(P:E)" << endl;
6264 newinfo_fp << "# 5:I(C:E)-I(P:E)" << endl;
6265 newinfo_fp << "# 6:Information Gained in Child" << endl;
6266 newinfo_fp << "# 7:Information Decreased in Child" << endl;
6267 newinfo_fp << "# 8:Net Increasing of Information in Child" << endl;
6268 newinfo_fp << endl;
6269
6270 tListIterator<cAnalyzeGenotype> batch_it(batch[cur_batch].List());
6271 cAnalyzeGenotype * parent_genotype = batch_it.Next();
6272 if (parent_genotype == NULL) {
6273 m_world->GetDataFileManager().Remove(newinfo_fn);
6274 return;
6275 }
6276 cAnalyzeGenotype * child_genotype = NULL;
6277 double I_P_E; // Information of parent about environment
6278 double H_P_E = AnalyzeEntropy(parent_genotype, mu);
6279 I_P_E = parent_genotype->GetLength() - H_P_E;
6280
6281 while ((child_genotype = batch_it.Next()) != NULL) {
6282
6283 if (m_world->GetVerbosity() >= VERBOSE_ON) {
6284 cout << "Analyze new information for " << child_genotype->GetName() << endl;
6285 }
6286
6287 // Information of parent about its environment should not be zero.
6288 if (I_P_E == 0) {
6289 cerr << "Error: Information between parent and its enviroment is zero."
6290 << "(cAnalyze::AnalyzeNewInfo)" << endl;
6291 if (exit_on_error) exit(1);
6292 }
6293
6294 double H_C_E = AnalyzeEntropy(child_genotype, mu);
6295 double I_C_E = child_genotype->GetLength() - H_C_E;
6296 double net_gain = I_C_E - I_P_E;
6297
6298 // Increased information in child compared to parent
6299 double child_increased_info = IncreasedInfo(child_genotype, parent_genotype, mu);
6300
6301 // Lost information in child compared to parent
6302 double child_lost_info = IncreasedInfo(parent_genotype, child_genotype, mu);
6303
6304 // Write information to file ...
6305 newinfo_fp << child_genotype->GetID() << " ";
6306 newinfo_fp << parent_genotype->GetID() << " ";
6307 newinfo_fp << I_C_E << " ";
6308 newinfo_fp << I_P_E << " ";
6309 newinfo_fp << net_gain << " ";
6310 newinfo_fp << child_increased_info << " ";
6311 newinfo_fp << child_lost_info << " ";
6312 newinfo_fp << child_increased_info - child_lost_info << endl;
6313
6314 parent_genotype = child_genotype;
6315 I_P_E = I_C_E;
6316 }
6317
6318 m_world->GetDataFileManager().Remove(newinfo_fn);
6319 return;
6320 }
6321
6322
6323
WriteClone(cString cur_string)6324 void cAnalyze::WriteClone(cString cur_string)
6325 {
6326 // Load in the variables...
6327 cString filename("clone.dat");
6328 int num_cells = -1;
6329 if (cur_string.GetSize() != 0) filename = cur_string.PopWord();
6330 if (cur_string.GetSize() != 0) num_cells = cur_string.PopWord().AsInt();
6331
6332
6333 ofstream& fp = m_world->GetDataFileOFStream(filename);
6334
6335 // Start up again at update zero...
6336 fp << "0 ";
6337
6338 // Setup the archive sizes of lists to all be zero.
6339 fp << MAX_GENOME_LENGTH << " ";
6340 for (int i = 0; i < MAX_GENOME_LENGTH; i++) {
6341 fp << "0 ";
6342 }
6343
6344 // Save the individual genotypes
6345 fp << batch[cur_batch].List().GetSize() << " ";
6346
6347 int org_count = 0;
6348 tListIterator<cAnalyzeGenotype> batch_it(batch[cur_batch].List());
6349 cAnalyzeGenotype * genotype = NULL;
6350 while ((genotype = batch_it.Next()) != NULL) {
6351 org_count += genotype->GetNumCPUs();
6352 const int length = genotype->GetLength();
6353 const Sequence& genome = genotype->GetGenome().GetSequence();
6354
6355 fp << genotype->GetID() << " "
6356 << length << " ";
6357
6358 for (int i = 0; i < length; i++) {
6359 fp << genome[i].GetOp() << " ";
6360 }
6361 }
6362
6363 // Write out the current state of the grid.
6364
6365 if (num_cells == 0) num_cells = org_count;
6366 fp << num_cells << " ";
6367
6368 batch_it.Reset();
6369 while ((genotype = batch_it.Next()) != NULL) {
6370 for (int i = 0; i < genotype->GetNumCPUs(); i++) {
6371 fp << genotype->GetID() << " ";
6372 }
6373 }
6374
6375 // Fill out the remainder of the grid with -1
6376 for (int i = org_count; i < num_cells; i++) {
6377 fp << "-1 ";
6378 }
6379 }
6380
6381
WriteInjectEvents(cString cur_string)6382 void cAnalyze::WriteInjectEvents(cString cur_string)
6383 {
6384 // Load in the variables...
6385 cString filename("events_inj.cfg");
6386 int start_cell = 0;
6387 int lineage = 0;
6388 if (cur_string.GetSize() != 0) filename = cur_string.PopWord();
6389 if (cur_string.GetSize() != 0) start_cell = cur_string.PopWord().AsInt();
6390 if (cur_string.GetSize() != 0) lineage = cur_string.PopWord().AsInt();
6391
6392 ofstream& fp = m_world->GetDataFileOFStream(filename);
6393
6394 int org_count = 0;
6395 tListIterator<cAnalyzeGenotype> batch_it(batch[cur_batch].List());
6396 cAnalyzeGenotype * genotype = NULL;
6397 while ((genotype = batch_it.Next()) != NULL) {
6398 const int cur_count = genotype->GetNumCPUs();
6399 org_count += cur_count;
6400 const Sequence& genome = genotype->GetGenome().GetSequence();
6401
6402 fp << "u 0 InjectSequence "
6403 << genome.AsString() << " "
6404 << start_cell << " "
6405 << start_cell + cur_count << " "
6406 << genotype->GetMerit() << " "
6407 << lineage << " "
6408 << endl;
6409 start_cell += cur_count;
6410 }
6411 }
6412
6413
WriteCompetition(cString cur_string)6414 void cAnalyze::WriteCompetition(cString cur_string)
6415 {
6416 cout << "Writing Competition events..." << endl;
6417
6418 // Load in the variables...
6419 int join_UD = 0;
6420 double start_merit = 50000;
6421 cString filename("events_comp.cfg");
6422 int batch_A = cur_batch - 1;
6423 int batch_B = cur_batch;
6424 int grid_side = -1;
6425 int lineage = 0;
6426
6427 // Make sure we have reasonable default batches.
6428 if (cur_batch == 0) { batch_A = 0; batch_B = 1; }
6429
6430 if (cur_string.GetSize() != 0) join_UD = cur_string.PopWord().AsInt();
6431 if (cur_string.GetSize() != 0) start_merit = cur_string.PopWord().AsDouble();
6432 if (cur_string.GetSize() != 0) filename = cur_string.PopWord();
6433 if (cur_string.GetSize() != 0) batch_A = cur_string.PopWord().AsInt();
6434 if (cur_string.GetSize() != 0) batch_B = cur_string.PopWord().AsInt();
6435 if (cur_string.GetSize() != 0) grid_side = cur_string.PopWord().AsInt();
6436 if (cur_string.GetSize() != 0) lineage = cur_string.PopWord().AsInt();
6437
6438 // Check inputs...
6439 if (join_UD < 0) join_UD = 0;
6440 if (batch_A < 0 || batch_B < 0) {
6441 cerr << "Error: Batch IDs must be positive!" << endl;
6442 return;
6443 }
6444
6445 ofstream& fp = m_world->GetDataFileOFStream(filename);
6446
6447 // Count the number of organisms in each batch...
6448 cAnalyzeGenotype * genotype = NULL;
6449
6450 int org_count_A = 0;
6451 tListIterator<cAnalyzeGenotype> batchA_it(batch[batch_A].List());
6452 while ((genotype = batchA_it.Next()) != NULL) {
6453 org_count_A += genotype->GetNumCPUs();
6454 }
6455
6456 int org_count_B = 0;
6457 tListIterator<cAnalyzeGenotype> batchB_it(batch[batch_B].List());
6458 while ((genotype = batchB_it.Next()) != NULL) {
6459 org_count_B += genotype->GetNumCPUs();
6460 }
6461
6462 int max_count = Max(org_count_A, org_count_B);
6463 if (max_count > 10000) {
6464 cout << "Warning: more than 10,000 organisms in sub-population!" << endl;
6465 }
6466
6467 if (grid_side <= 0) {
6468 for (grid_side = 5; grid_side < 100; grid_side += 5) {
6469 if (grid_side * grid_side >= max_count) break;
6470 }
6471 if (m_world->GetVerbosity() >= VERBOSE_ON) {
6472 cout << "...assuming population size "
6473 << grid_side << "x" << grid_side << "." << endl;
6474 }
6475 }
6476
6477
6478 int pop_size = grid_side * grid_side;
6479
6480 int inject_pos = 0;
6481 while ((genotype = batchA_it.Next()) != NULL) {
6482 const int cur_count = genotype->GetNumCPUs();
6483 const Sequence& genome = genotype->GetGenome().GetSequence();
6484 double cur_merit = start_merit;
6485 if (cur_merit < 0) cur_merit = genotype->GetMerit();
6486 fp << "u 0 InjectSequence "
6487 << genome.AsString() << " "
6488 << inject_pos << " "
6489 << inject_pos + cur_count << " "
6490 << cur_merit << " "
6491 << lineage << " "
6492 << endl;
6493 inject_pos += cur_count;
6494 }
6495
6496 inject_pos = pop_size;
6497 while ((genotype = batchB_it.Next()) != NULL) {
6498 const int cur_count = genotype->GetNumCPUs();
6499 const Sequence& genome = genotype->GetGenome().GetSequence();
6500 double cur_merit = start_merit;
6501 if (cur_merit < 0) cur_merit = genotype->GetMerit();
6502 fp << "u 0 InjectSequence "
6503 << genome.AsString() << " "
6504 << inject_pos << " "
6505 << inject_pos + cur_count << " "
6506 << cur_merit << " "
6507 << lineage+1 << " "
6508 << endl;
6509 inject_pos += cur_count;
6510 }
6511
6512 fp << "u 0 SeverGridRow" << grid_side << endl;
6513 fp << "u " << join_UD << " JoinGridRow " << grid_side << endl;
6514 }
6515
6516
6517 // Analyze the mutations along an aligned lineage.
6518
AnalyzeMuts(cString cur_string)6519 void cAnalyze::AnalyzeMuts(cString cur_string)
6520 {
6521 cout << "Analyzing Mutations" << endl;
6522
6523 // Make sure we have everything we need.
6524 if (batch[cur_batch].IsAligned() == false) {
6525 cout << " Error: sequences not aligned." << endl;
6526 return;
6527 }
6528
6529 // Setup variables...
6530 cString filename("analyze_muts.dat");
6531 bool all_combos = false;
6532 if (cur_string.GetSize() != 0) filename = cur_string.PopWord();
6533 if (cur_string.GetSize() != 0) all_combos = cur_string.PopWord().AsInt();
6534
6535 tListPlus<cAnalyzeGenotype> & gen_list = batch[cur_batch].List();
6536 tListIterator<cAnalyzeGenotype> batch_it(batch[cur_batch].List());
6537
6538 const int num_sequences = gen_list.GetSize();
6539 const int sequence_length =
6540 gen_list.GetFirst()->GetAlignedSequence().GetSize();
6541 cString * sequences = new cString[num_sequences];
6542 int * mut_count = new int[sequence_length];
6543 for (int i = 0; i < sequence_length; i++) mut_count[i] = 0;
6544
6545 // Load in the sequences
6546 batch_it.Reset();
6547 int count = 0;
6548 while (batch_it.Next() != NULL) {
6549 sequences[count] = batch_it.Get()->GetAlignedSequence();
6550 count++;
6551 }
6552
6553 // Count the number of changes at each site...
6554 for (int i = 1; i < num_sequences; i++) { // For each pair...
6555 cString & seq1 = sequences[i-1];
6556 cString & seq2 = sequences[i];
6557 for (int j = 0; j < sequence_length; j++) { // For each site...
6558 if (seq1[j] != seq2[j]) mut_count[j]++;
6559 }
6560 }
6561
6562 // Grab the two strings we're actively going to be working with.
6563 cString & first_seq = sequences[0];
6564 cString & last_seq = sequences[num_sequences - 1];
6565
6566 // Print out the header...
6567 ofstream& fp = m_world->GetDataFileOFStream(filename);
6568 fp << "# " << sequences[0] << endl;
6569 fp << "# " << sequences[num_sequences - 1] << endl;
6570 fp << "# ";
6571 for (int i = 0; i < sequence_length; i++) {
6572 if (mut_count[i] == 0) fp << " ";
6573 else if (mut_count[i] > 9) fp << "+";
6574 else fp << mut_count[i];
6575 }
6576 fp << endl;
6577 fp << "# ";
6578 for (int i = 0; i < sequence_length; i++) {
6579 if (first_seq[i] == last_seq[i]) fp << " ";
6580 else fp << "^";
6581 }
6582 fp << endl << endl;
6583
6584 // Count the number of diffs between the two strings we're interested in.
6585 const int total_diffs = cStringUtil::Distance(first_seq, last_seq);
6586 if (m_world->GetVerbosity() >= VERBOSE_ON) cout << " " << total_diffs << " mutations being tested." << endl;
6587
6588 // Locate each difference.
6589 int * mut_positions = new int[total_diffs];
6590 int cur_mut = 0;
6591 for (int i = 0; i < first_seq.GetSize(); i++) {
6592 if (first_seq[i] != last_seq[i]) {
6593 mut_positions[cur_mut] = i;
6594 cur_mut++;
6595 }
6596 }
6597
6598 // The number of mutations we need to deal with will tell us how much
6599 // we can attempt to do. (@CAO should be able to overide defaults)
6600 bool scan_combos = true; // Scan all possible combos of mutations?
6601 bool detail_muts = true; // Collect detailed info on all mutations?
6602 bool print_all = true; // Print everything we collect without digestion?
6603 if (total_diffs > 30) scan_combos = false;
6604 if (total_diffs > 20) detail_muts = false;
6605 if (total_diffs > 10) print_all = false;
6606
6607 // Start moving through the difference combinations...
6608 if (scan_combos) {
6609 const int total_combos = 1 << total_diffs;
6610 cout << " Scanning through " << total_combos << " combos." << endl;
6611
6612 double * total_fitness = new double[total_diffs + 1];
6613 double * total_sqr_fitness = new double[total_diffs + 1];
6614 double * max_fitness = new double[total_diffs + 1];
6615 cString * max_sequence = new cString[total_diffs + 1];
6616 int * test_count = new int[total_diffs + 1];
6617 for (int i = 0; i <= total_diffs; i++) {
6618 total_fitness[i] = 0.0;
6619 total_sqr_fitness[i] = 0.0;
6620 max_fitness[i] = 0.0;
6621 test_count[i] = 0;
6622 }
6623
6624 cTestCPU* testcpu = m_world->GetHardwareManager().CreateTestCPU(m_ctx);
6625
6626 // Loop through all of the combos...
6627 const int combo_step = total_combos / 79;
6628 for (int combo_id = 0; combo_id < total_combos; combo_id++) {
6629 if (combo_id % combo_step == 0) {
6630 cout << '.';
6631 cout.flush();
6632 }
6633 // Start at the first sequence and add needed changes...
6634 cString test_sequence = first_seq;
6635 int diff_count = 0;
6636 for (int mut_id = 0; mut_id < total_diffs; mut_id++) {
6637 if ((combo_id >> mut_id) & 1) {
6638 const int cur_pos = mut_positions[mut_id];
6639 test_sequence[cur_pos] = static_cast<const char*>(last_seq)[cur_pos];
6640 diff_count++;
6641 }
6642 }
6643
6644 // Determine the fitness of the current sequence...
6645 const cInstSet& is = m_world->GetHardwareManager().GetDefaultInstSet();
6646 Genome test_genome(is.GetHardwareType(), is.GetInstSetName(), Sequence(test_sequence));
6647 cCPUTestInfo test_info;
6648 testcpu->TestGenome(m_ctx, test_info, test_genome);
6649 const double fitness = test_info.GetGenotypeFitness();
6650
6651 //cAnalyzeGenotype test_genotype(test_sequence);
6652 //test_genotype.Recalculate(m_ctx, testcpu);
6653 //const double fitness = test_genotype.GetFitness();
6654
6655 total_fitness[diff_count] += fitness;
6656 total_sqr_fitness[diff_count] += fitness * fitness;
6657 if (fitness > max_fitness[diff_count]) {
6658 max_fitness[diff_count] = fitness;
6659 max_sequence[diff_count] = test_sequence;
6660 // cout << endl
6661 // << max_sequence[diff_count] << " "
6662 // << test_info.GetGenotypeMerit() << " "
6663 // << fitness << " "
6664 // << combo_id << endl;
6665 }
6666 test_count[diff_count]++;
6667 }
6668
6669 // Output the results...
6670
6671 for (int i = 0; i <= total_diffs; i++) {
6672 const cInstSet& is = m_world->GetHardwareManager().GetDefaultInstSet();
6673 Genome max_genome(is.GetHardwareType(), is.GetInstSetName(), Sequence(max_sequence[i]));
6674 cAnalyzeGenotype max_genotype(m_world, max_genome);
6675 max_genotype.Recalculate(m_ctx);
6676 fp << i << " " // 1
6677 << test_count[i] << " " // 2
6678 << total_fitness[i] / (double) test_count[i] << " " // 3
6679 << max_fitness[i] << " " // 4
6680 << max_genotype.GetMerit() << " " // 5
6681 << max_genotype.GetGestTime() << " " // 6
6682 << max_genotype.GetLength() << " " // 7
6683 << max_genotype.GetCopyLength() << " " // 8
6684 << max_genotype.GetExeLength() << " "; // 9
6685 max_genotype.PrintTasks(fp, 3,12);
6686 fp << max_sequence[i] << endl;
6687 }
6688
6689 // Cleanup
6690 delete [] total_fitness;
6691 delete [] total_sqr_fitness;
6692 delete [] max_fitness;
6693 delete [] max_sequence;
6694 delete [] test_count;
6695
6696 delete testcpu;
6697 }
6698 // If we can't scan through all combos, give wanring.
6699 else {
6700 cerr << " Warning: too many mutations (" << total_diffs
6701 << ") to scan through combos." << endl;
6702 }
6703
6704
6705 // Cleanup...
6706 delete [] sequences;
6707 delete [] mut_count;
6708 delete [] mut_positions;
6709 }
6710
6711
6712 // Analyze the frequency that each instruction appears in the batch, and
6713 // make note of those that appear more or less often than expected.
6714
AnalyzeInstructions(cString cur_string)6715 void cAnalyze::AnalyzeInstructions(cString cur_string)
6716 {
6717 if (m_world->GetVerbosity() >= VERBOSE_ON) {
6718 cout << "Analyzing Instructions in batch " << cur_batch << endl;
6719 }
6720 else cout << "Analyzing Instructions..." << endl;
6721
6722 // Load in the variables...
6723 cString filename("inst_analyze.dat");
6724 if (cur_string.GetSize() != 0) filename = cur_string.PopWord();
6725 cString isname = m_world->GetHardwareManager().GetDefaultInstSet().GetInstSetName();
6726 if (cur_string.GetSize() != 0) isname = cur_string.PopWord();
6727 const cInstSet& inst_set = m_world->GetHardwareManager().GetInstSet(isname);
6728 const int num_insts = inst_set.GetSize();
6729
6730 // Setup the file...
6731 ofstream& fp = m_world->GetDataFileOFStream(filename);
6732
6733 // Determine the file type...
6734 int file_type = FILE_TYPE_TEXT;
6735 while (filename.Find('.') != -1) filename.Pop('.');
6736 if (filename == "html") file_type = FILE_TYPE_HTML;
6737
6738 // If we're in HTML mode, setup the header...
6739 if (file_type == FILE_TYPE_HTML) {
6740 // Document header...
6741 fp << "<html>" << endl
6742 << "<body bgcolor=\"#FFFFFF\"" << endl
6743 << " text=\"#000000\"" << endl
6744 << " link=\"#0000AA\"" << endl
6745 << " alink=\"#0000FF\"" << endl
6746 << " vlink=\"#000044\">" << endl
6747 << endl
6748 << "<h1 align=center>Instruction Chart: "
6749 << batch[cur_batch].Name() << endl
6750 << "<br><br>" << endl
6751 << endl;
6752
6753 // Instruction key...
6754 const int num_cols = 6;
6755 const int num_rows = ((num_insts - 1) / num_cols) + 1;
6756 fp << "<table border=2 cellpadding=3>" << endl
6757 << "<tr bgcolor=\"#AAAAFF\"><th colspan=6>Instruction Set Legend</tr>"
6758 << endl;
6759 for (int i = 0; i < num_rows; i++) {
6760 fp << "<tr>";
6761 for (int j = 0; j < num_cols; j++) {
6762 const int inst_id = i + j * num_rows;
6763 if (inst_id < num_insts) {
6764 cInstruction cur_inst(inst_id);
6765 fp << "<td><b>" << cur_inst.GetSymbol() << "</b> : "
6766 << inst_set.GetName(inst_id) << " ";
6767 }
6768 else {
6769 fp << "<td> ";
6770 }
6771 }
6772 fp << "</tr>" << endl;
6773 }
6774 fp << "</table>" << endl
6775 << "<br><br><br>" << endl;
6776
6777 // Main table header...
6778 fp << "<center>" << endl
6779 << "<table border=1 cellpadding=2>" << endl
6780 << "<tr><th bgcolor=\"#AAAAFF\">Run # <th bgcolor=\"#AAAAFF\">Length"
6781 << endl;
6782 for (int i = 0; i < num_insts; i++) {
6783 cInstruction cur_inst(i);
6784 fp << "<th bgcolor=\"#AAAAFF\">" << cur_inst.GetSymbol() << " ";
6785 }
6786 fp << "</tr>" << endl;
6787 }
6788 else { // if (file_type == FILE_TYPE_TEXT) {
6789 fp << "#RUN_NAME LENGTH ";
6790 for (int i = 0; i < num_insts; i++) {
6791 cInstruction cur_inst(i);
6792 fp << cur_inst.GetSymbol() << ":" << inst_set.GetName(i) << " ";
6793 }
6794 fp << endl;
6795 }
6796
6797 // Figure out how often we expect each instruction to appear...
6798 const double exp_freq = 1.0 / (double) num_insts;
6799 const double min_freq = exp_freq * 0.5;
6800 const double max_freq = exp_freq * 2.0;
6801
6802 double total_length = 0.0;
6803 tArray<double> total_freq(num_insts);
6804 for (int i = 0; i < num_insts; i++) total_freq[i] = 0.0;
6805
6806 // Loop through all of the genotypes in this batch...
6807 tListIterator<cAnalyzeGenotype> batch_it(batch[cur_batch].List());
6808 cAnalyzeGenotype * genotype = NULL;
6809 while ((genotype = batch_it.Next()) != NULL) {
6810 if (genotype->GetGenome().GetInstSet() != isname) continue;
6811
6812 // Setup for counting...
6813 tArray<int> inst_bin(num_insts);
6814 for (int i = 0; i < num_insts; i++) inst_bin[i] = 0;
6815
6816 // Count it up!
6817 const int genome_size = genotype->GetLength();
6818 for (int i = 0; i < genome_size; i++) {
6819 const int inst_id = genotype->GetGenome().GetSequence()[i].GetOp();
6820 inst_bin[inst_id]++;
6821 }
6822
6823 // Print it out...
6824 if (file_type == FILE_TYPE_HTML) fp << "<tr><th>";
6825 fp << genotype->GetName() << " ";
6826 if (file_type == FILE_TYPE_HTML) fp << "<td align=center>";
6827 total_length += genome_size;
6828 fp << genome_size << " ";
6829 for (int i = 0; i < num_insts; i++) {
6830 const double inst_freq = ((double) inst_bin[i]) / (double) genome_size;
6831 total_freq[i] += inst_freq;
6832 if (file_type == FILE_TYPE_HTML) {
6833 if (inst_freq == 0.0) fp << "<td bgcolor=\"FFAAAA\">";
6834 else if (inst_freq < min_freq) fp << "<td bgcolor=\"FFFFAA\">";
6835 else if (inst_freq < max_freq) fp << "<td bgcolor=\"AAAAFF\">";
6836 else fp << "<td bgcolor=\"AAFFAA\">";
6837 }
6838 fp << cStringUtil::Stringf("%04.3f", inst_freq) << " ";
6839 }
6840 if (file_type == FILE_TYPE_HTML) fp << "</tr>";
6841 fp << endl;
6842 }
6843
6844 if (file_type == FILE_TYPE_HTML) {
6845 int num_genomes = batch[cur_batch].List().GetSize();
6846 fp << "<tr><th>Average <th>" << total_length / num_genomes << " ";
6847 for (int i = 0; i < num_insts; i++) {
6848 double inst_freq = total_freq[i] / num_genomes;
6849 if (inst_freq == 0.0) fp << "<td bgcolor=\"#" << m_world->GetConfig().COLOR_MUT_LETHAL.Get() << "\">";
6850 else if (inst_freq < min_freq) fp << "<td bgcolor=\"#" << m_world->GetConfig().COLOR_MUT_NEG.Get() << "\">";
6851 else if (inst_freq < max_freq) fp << "<td bgcolor=\"#" << m_world->GetConfig().COLOR_MUT_NEUT.Get() << "\">";
6852 else fp << "<td bgcolor=\"#" << m_world->GetConfig().COLOR_MUT_POS.Get() << "\">";
6853 fp << cStringUtil::Stringf("%04.3f", inst_freq) << " ";
6854 }
6855 fp << "</tr>" << endl
6856 << "</table></center>" << endl;
6857 }
6858 }
6859
AnalyzeInstPop(cString cur_string)6860 void cAnalyze::AnalyzeInstPop(cString cur_string)
6861 {
6862 if (m_world->GetVerbosity() >= VERBOSE_ON) {
6863 cout << "Analyzing Instructions in batch " << cur_batch << endl;
6864 }
6865 else cout << "Analyzeing Instructions..." << endl;
6866
6867 // Load in the variables...
6868 cString filename("inst_analyze.dat");
6869 if (cur_string.GetSize() != 0) filename = cur_string.PopWord();
6870 cString isname = m_world->GetHardwareManager().GetDefaultInstSet().GetInstSetName();
6871 if (cur_string.GetSize() != 0) isname = cur_string.PopWord();
6872 const cInstSet& inst_set = m_world->GetHardwareManager().GetInstSet(isname);
6873 const int num_insts = inst_set.GetSize();
6874
6875 // Setup the file...
6876 ofstream& fp = m_world->GetDataFileOFStream(filename);
6877
6878 for (int i = 0; i < num_insts; i++) {
6879 cInstruction cur_inst(i);
6880 fp << cur_inst.GetSymbol() << ":" << inst_set.GetName(i) << " ";
6881 }
6882 fp << endl;
6883
6884 double total_length = 0.0;
6885 tArray<double> total_freq(num_insts);
6886 for (int i = 0; i < num_insts; i++) total_freq[i] = 0.0;
6887 int num_orgs = 0;
6888
6889 // Loop through all of the genotypes in this batch...
6890 tListIterator<cAnalyzeGenotype> batch_it(batch[cur_batch].List());
6891 cAnalyzeGenotype * genotype = NULL;
6892 while ((genotype = batch_it.Next()) != NULL) {
6893 if (genotype->GetGenome().GetInstSet() != isname) continue;
6894
6895 num_orgs++;
6896
6897 // Setup for counting...
6898 tArray<int> inst_bin(num_insts);
6899 for (int i = 0; i < num_insts; i++) inst_bin[i] = 0;
6900
6901 // Count it up!
6902 const int genome_size = genotype->GetLength();
6903 for (int i = 0; i < genome_size; i++) {
6904 const int inst_id = genotype->GetGenome().GetSequence()[i].GetOp();
6905 inst_bin[inst_id]++;
6906 }
6907 total_length += genome_size;
6908 for (int i = 0; i < num_insts; i++) {
6909 const double inst_freq = ((double) inst_bin[i]) / (double) genome_size;
6910 total_freq[i] += inst_freq;
6911 }
6912 }
6913 // Print it out...
6914 // fp << total_length/num_orgs << " ";
6915 for (int i = 0; i < num_insts; i++) {
6916 fp << cStringUtil::Stringf("%04.3f", total_freq[i]/num_orgs) << " ";
6917 }
6918 fp << endl;
6919
6920 }
6921
AnalyzeBranching(cString cur_string)6922 void cAnalyze::AnalyzeBranching(cString cur_string)
6923 {
6924 if (m_world->GetVerbosity() >= VERBOSE_ON) {
6925 cout << "Analyzing branching patterns in batch " << cur_batch << endl;
6926 }
6927 else cout << "Analyzeing Branches..." << endl;
6928
6929 // Load in the variables...
6930 cString filename("branch_analyze.dat");
6931 if (cur_string.GetSize() != 0) filename = cur_string.PopWord();
6932
6933 // Setup the file...
6934 //ofstream& fp = m_world->GetDataFileOFStream(filename);
6935
6936 // UNFINISHED!
6937 // const int num_insts = inst_set.GetSize();
6938 }
6939
AnalyzeMutationTraceback(cString cur_string)6940 void cAnalyze::AnalyzeMutationTraceback(cString cur_string)
6941 {
6942 if (m_world->GetVerbosity() >= VERBOSE_ON) {
6943 cout << "Analyzing mutation traceback in batch " << cur_batch << endl;
6944 }
6945 else cout << "Analyzing mutation traceback..." << endl;
6946
6947 // This works best on lineages, so warn if we don't have one.
6948 if (batch[cur_batch].IsLineage() == false && m_world->GetVerbosity() >= VERBOSE_ON) {
6949 cerr << " Warning: trying to traceback mutations outside of lineage."
6950 << endl;
6951 }
6952
6953 if (batch[cur_batch].List().GetSize() == 0) {
6954 cerr << "Error: Trying to traceback mutations with no genotypes in batch."
6955 << endl;
6956 return;
6957 }
6958
6959 // Make sure all genotypes are the same length.
6960 int size = -1;
6961 tListIterator<cAnalyzeGenotype> batch_it(batch[cur_batch].List());
6962 cAnalyzeGenotype * genotype = NULL;
6963 while ((genotype = batch_it.Next()) != NULL) {
6964 if (size == -1) size = genotype->GetLength();
6965 if (size != genotype->GetLength()) {
6966 cerr << " Error: Trying to traceback mutations in genotypes of differing lengths." << endl;
6967 cerr << " Aborting." << endl;
6968 return;
6969 }
6970 }
6971
6972 // Setup variables...
6973 cString filename("analyze_traceback.dat");
6974 if (cur_string.GetSize() != 0) filename = cur_string.PopWord();
6975
6976 // Setup a genome to store the previous values before mutations.
6977 tArray<int> prev_inst(size);
6978 prev_inst.SetAll(-1); // -1 indicates never changed.
6979
6980 // Open the output file...
6981 ofstream& fp = m_world->GetDataFileOFStream(filename);
6982
6983 cTestCPU* testcpu = m_world->GetHardwareManager().CreateTestCPU(m_ctx);
6984
6985 // Loop through all of the genotypes again, testing mutation reversions.
6986 cAnalyzeGenotype * prev_genotype = batch_it.Next();
6987 while ((genotype = batch_it.Next()) != NULL) {
6988 continue;
6989 // Check to see if any sites have changed...
6990 for (int i = 0; i < size; i++) {
6991 if (genotype->GetGenome().GetSequence()[i] != prev_genotype->GetGenome().GetSequence()[i]) {
6992 prev_inst[i] = prev_genotype->GetGenome().GetSequence()[i].GetOp();
6993 }
6994 }
6995
6996 // Next, determine the fraction of mutations that are currently adaptive.
6997 int num_beneficial = 0;
6998 int num_neutral = 0;
6999 int num_detrimental = 0;
7000 int num_static = 0; // Sites that were never mutated.
7001
7002 Genome test_genome = genotype->GetGenome();
7003 cCPUTestInfo test_info;
7004 testcpu->TestGenome(m_ctx, test_info, test_genome);
7005 const double base_fitness = test_info.GetGenotypeFitness();
7006
7007 for (int i = 0; i < size; i++) {
7008 if (prev_inst[i] == -1) num_static++;
7009 else {
7010 test_genome.GetSequence()[i].SetOp(prev_inst[i]);
7011 testcpu->TestGenome(m_ctx, test_info, test_genome);
7012 const double cur_fitness = test_info.GetGenotypeFitness();
7013 if (cur_fitness > base_fitness) num_detrimental++;
7014 else if (cur_fitness < base_fitness) num_beneficial++;
7015 else num_neutral++;
7016 test_genome.GetSequence()[i] = genotype->GetGenome().GetSequence()[i];
7017 }
7018 }
7019
7020 fp << genotype->GetDepth() << " "
7021 << num_beneficial << " "
7022 << num_neutral << " "
7023 << num_detrimental << " "
7024 << num_static << " "
7025 << endl;
7026
7027 prev_genotype = genotype;
7028 }
7029
7030 delete testcpu;
7031 }
7032
AnalyzeComplexity(cString cur_string)7033 void cAnalyze::AnalyzeComplexity(cString cur_string)
7034 {
7035 cout << "Analyzing genome complexity..." << endl;
7036
7037 // Load in the variables...
7038 // This command requires at least on arguement
7039 int words = cur_string.CountNumWords();
7040 if(words < 1) {
7041 cout << "Error: AnalyzeComplexity has no parameters, skipping." << endl;
7042 return;
7043 }
7044
7045 // Get the mutation rate arguement
7046 double mut_rate = cur_string.PopWord().AsDouble();
7047
7048 // Create the directory using the string given as the second arguement
7049 cString dir = cur_string.PopWord();
7050 cString defaultDirectory = "complexity/";
7051 cString directory = PopDirectory(dir, defaultDirectory);
7052
7053 // Default for usage of resources is false
7054 int useResources = 0;
7055 // resource usage flag is an optional arguement, but is always the 3rd arg
7056 if(words >= 3) {
7057 useResources = cur_string.PopWord().AsInt();
7058 // All non-zero values are considered false (Handled by testcpu->InitResources)
7059 }
7060
7061 // Batch frequency begins with the first organism, but then skips that
7062 // amount ahead in the batch. It defaults to 1, so that default analyzes
7063 // all the organisms in the batch. It is always the 4th arg.
7064 int batchFrequency = 1;
7065 if(words == 4) {
7066 batchFrequency = cur_string.PopWord().AsInt();
7067 if(batchFrequency <= 0) {
7068 batchFrequency = 1;
7069 }
7070 }
7071
7072 cTestCPU* testcpu = m_world->GetHardwareManager().CreateTestCPU(m_ctx);
7073
7074 ///////////////////////////////////////////////////////
7075 // Loop through all of the genotypes in this batch...
7076
7077 tListIterator<cAnalyzeGenotype> batch_it(batch[cur_batch].List());
7078 cAnalyzeGenotype * genotype = NULL;
7079
7080 cString lineage_filename;
7081 if (batch[cur_batch].IsLineage()) {
7082 lineage_filename.Set("%s%s.complexity.dat", static_cast<const char*>(directory), "lineage");
7083 } else {
7084 lineage_filename.Set("%s%s.complexity.dat", static_cast<const char*>(directory), "nonlineage");
7085 }
7086 ofstream& lineage_fp = m_world->GetDataFileOFStream(lineage_filename);
7087
7088 while ((genotype = batch_it.Next()) != NULL) {
7089 if (m_world->GetVerbosity() >= VERBOSE_ON) {
7090 cout << " Analyzing complexity for " << genotype->GetName() << endl;
7091 }
7092
7093 // Construct this filename...
7094 cString filename;
7095 filename.Set("%s%s.complexity.dat", static_cast<const char*>(directory), static_cast<const char*>(genotype->GetName()));
7096 ofstream& fp = m_world->GetDataFileOFStream(filename);
7097
7098 lineage_fp << genotype->GetID() << " ";
7099
7100 int updateBorn = -1;
7101 updateBorn = genotype->GetUpdateBorn();
7102 cCPUTestInfo test_info;
7103 test_info.SetResourceOptions(useResources, m_resources, updateBorn, m_resource_time_spent_offset);
7104
7105 // Calculate the stats for the genotype we're working with ...
7106 genotype->Recalculate(m_ctx, &test_info);
7107 cout << genotype->GetFitness() << endl;
7108 const int max_line = genotype->GetLength();
7109 const Genome& base_genome = genotype->GetGenome();
7110 const Sequence& base_seq = base_genome.GetSequence();
7111 Genome mod_genome(base_genome);
7112 Sequence& seq = mod_genome.GetSequence();
7113 const int num_insts = m_world->GetHardwareManager().GetInstSet(base_genome.GetInstSet()).GetSize();
7114
7115 // Loop through all the lines of code, testing all mutations...
7116 tArray<double> test_fitness(num_insts);
7117 tArray<double> prob(num_insts);
7118 for (int line_num = 0; line_num < max_line; line_num++) {
7119 int cur_inst = base_seq[line_num].GetOp();
7120
7121 // Column 1 ... the original instruction in the genome.
7122 fp << cur_inst << " ";
7123
7124 // Test fitness of each mutant.
7125 for (int mod_inst = 0; mod_inst < num_insts; mod_inst++) {
7126 seq[line_num].SetOp(mod_inst);
7127 cAnalyzeGenotype test_genotype(m_world, mod_genome);
7128 test_genotype.Recalculate(m_ctx);
7129 test_fitness[mod_inst] = test_genotype.GetFitness();
7130 }
7131
7132 // Ajust fitness
7133 double cur_inst_fitness = test_fitness[cur_inst];
7134 for (int mod_inst = 0; mod_inst < num_insts; mod_inst++) {
7135 if (test_fitness[mod_inst] > cur_inst_fitness)
7136 test_fitness[mod_inst] = cur_inst_fitness;
7137 test_fitness[mod_inst] = test_fitness[mod_inst] / cur_inst_fitness;
7138 }
7139
7140 // Calculate probabilities at mut-sel balance
7141 double w_bar = 1;
7142
7143 // Normalize fitness values, assert if they are all zero
7144 double maxFitness = 0.0;
7145 for(int i=0; i<num_insts; i++) {
7146 if(test_fitness[i] > maxFitness) {
7147 maxFitness = test_fitness[i];
7148 }
7149 }
7150
7151 if(maxFitness > 0) {
7152 for(int i=0; i<num_insts; i++) {
7153 test_fitness[i] /= maxFitness;
7154 }
7155 } else {
7156 fp << "All zero fitness, ERROR." << endl;
7157 continue;
7158 }
7159
7160 while(1) {
7161 double sum = 0.0;
7162 for (int mod_inst = 0; mod_inst < num_insts; mod_inst ++) {
7163 prob[mod_inst] = (mut_rate * w_bar) /
7164 ((double)num_insts * (w_bar + test_fitness[mod_inst] * mut_rate - test_fitness[mod_inst]));
7165 sum = sum + prob[mod_inst];
7166 }
7167 if ((sum-1.0)*(sum-1.0) <= 0.0001)
7168 break;
7169 else
7170 w_bar = w_bar - 0.000001;
7171 }
7172 // Write probability
7173 for (int mod_inst = 0; mod_inst < num_insts; mod_inst ++) {
7174 fp << prob[mod_inst] << " ";
7175 }
7176
7177 // Calculate complexity
7178 double entropy = 0;
7179 for (int i = 0; i < num_insts; i ++) {
7180 entropy += prob[i] * log((double) 1.0/prob[i]) / log ((double) num_insts);
7181 }
7182 double complexity = 1 - entropy;
7183 fp << complexity << endl;
7184
7185 lineage_fp << complexity << " ";
7186
7187 // Reset the mod_genome back to the original sequence.
7188 seq[line_num].SetOp(cur_inst);
7189 }
7190
7191 m_world->GetDataFileManager().Remove(filename);
7192
7193 lineage_fp << endl;
7194
7195 // Always grabs the first one
7196 // Skip i-1 times, so that the beginning of the loop will grab the ith one
7197 // where i is the batchFrequency
7198 for(int count=0; genotype != NULL && count < batchFrequency - 1; count++) {
7199 genotype = batch_it.Next();
7200 if(genotype != NULL && m_world->GetVerbosity() >= VERBOSE_ON) {
7201 cout << "Skipping: " << genotype->GetName() << endl;
7202 }
7203 }
7204 if(genotype == NULL) { break; }
7205 }
7206
7207 m_world->GetDataFileManager().Remove(lineage_filename);
7208
7209 delete testcpu;
7210 }
7211
AnalyzeFitnessLandscapeTwoSites(cString cur_string)7212 void cAnalyze::AnalyzeFitnessLandscapeTwoSites(cString cur_string)
7213 {
7214 cout << "Fitness for all instruction combinations at two sites..." << endl;
7215
7216 /*
7217 * Arguments:
7218 * 1) directory (default: 'fitness_landscape_two_sites/'
7219 * 2) useResources (default: 0 -- no)
7220 * 3) batchFrequency (default: 1 -- all genotypes in batch)
7221 *
7222 */
7223
7224 // number of arguments provided
7225 int words = cur_string.CountNumWords();
7226 if (m_world->GetVerbosity() >= VERBOSE_ON) {
7227 cout << " Number of arguments passed: " << words << endl;
7228 }
7229
7230 //
7231 // argument 1 -- directory
7232 //
7233 cString dir = cur_string.PopWord();
7234 cString defaultDirectory = "fitness_landscape_two_sites/";
7235 cString directory = PopDirectory(dir, defaultDirectory);
7236 if (m_world->GetVerbosity() >= VERBOSE_ON) {
7237 cout << " - Analysis results to directory: " << directory << endl;
7238 }
7239
7240 //
7241 // argument 2 -- use resources?
7242 //
7243 // Default for usage of resources is false
7244 int useResources = 0;
7245 if(words >= 2) {
7246 useResources = cur_string.PopWord().AsInt();
7247 // All non-zero values are considered false (Handled by testcpu->InitResources)
7248 }
7249 if (m_world->GetVerbosity() >= VERBOSE_ON) {
7250 cout << " - Use resorces set to: " << useResources << " (0=false, true other int)" << endl;
7251 }
7252
7253 //
7254 // argument 3 -- batch frequncy
7255 // - default batchFrequency=1 (every organism analyzed)
7256 //
7257 int batchFrequency = 1;
7258 if(words >= 3) {
7259 batchFrequency = cur_string.PopWord().AsInt();
7260 if(batchFrequency <= 0) {
7261 batchFrequency = 1;
7262 }
7263 }
7264 if (m_world->GetVerbosity() >= VERBOSE_ON) {
7265 cout << " - Batch frequency set to: " << batchFrequency << endl;
7266 }
7267
7268 // test cpu
7269 //cTestCPU* testcpu = m_world->GetHardwareManager().CreateTestCPU();
7270
7271 // get current batch
7272 tListIterator<cAnalyzeGenotype> batch_it(batch[cur_batch].List());
7273 cAnalyzeGenotype * genotype = NULL;
7274
7275 // analyze each genotype in the batch
7276 while ((genotype = batch_it.Next()) != NULL) {
7277 if (m_world->GetVerbosity() >= VERBOSE_ON) {
7278 cout << " Analyzing complexity for " << genotype->GetName() << endl;
7279 }
7280
7281 int updateBorn = -1;
7282 updateBorn = genotype->GetUpdateBorn();
7283 cCPUTestInfo test_info;
7284 test_info.SetResourceOptions(useResources, m_resources, updateBorn, m_resource_time_spent_offset);
7285
7286 // Calculate the stats for the genotype we're working with ...
7287 genotype->Recalculate(m_ctx, &test_info);
7288 const int max_line = genotype->GetLength();
7289 const Genome& base_genome = genotype->GetGenome();
7290 const Sequence& base_seq = base_genome.GetSequence();
7291 Genome mod_genome(base_genome);
7292 Sequence& seq = mod_genome.GetSequence();
7293 const int num_insts = m_world->GetHardwareManager().GetInstSet(base_genome.GetInstSet()).GetSize();
7294
7295 // run throught sites in genome
7296 for (int site1 = 0; site1 < max_line; site1++) {
7297 for (int site2 = site1+1; site2 < max_line; site2++) {
7298
7299 // Construct filename for this site combination
7300 cString fl_filename;
7301 fl_filename.Set("%s%s_FitLand_sites-%d_and_%d.dat", static_cast<const char*>(directory), static_cast<const char*>(genotype->GetName()), site1, site2);
7302 cDataFile & fit_land_fp = m_world->GetDataFile(fl_filename);
7303 fit_land_fp.WriteComment( "Two-site fitness landscape, all possible instructions" );
7304 fit_land_fp.WriteComment( cStringUtil::Stringf("Site 1: %d Site 2: %d", site1, site2) );
7305 fit_land_fp.WriteComment( "Rows #- instruction, site 1" );
7306 fit_land_fp.WriteComment( "Columns #- instruction, site 2" );
7307 fit_land_fp.WriteTimeStamp();
7308
7309 // get current instructions at site 1 and site 2
7310 int curr_inst1 = base_seq[site1].GetOp();
7311 int curr_inst2 = base_seq[site2].GetOp();
7312
7313 // get current fitness
7314 //double curr_fitness = genotype->GetFitness();
7315
7316 // run through all possible instruction combinations
7317 // at two sites
7318 for (int mod_inst1 = 0; mod_inst1 < num_insts; mod_inst1++) {
7319 for (int mod_inst2 = 0; mod_inst2 < num_insts; mod_inst2++) {
7320 // modify mod_genome at two sites
7321 seq[site1].SetOp(mod_inst1);
7322 seq[site2].SetOp(mod_inst2);
7323 // analyze mod_genome
7324 cAnalyzeGenotype test_genotype(m_world, mod_genome);
7325 test_genotype.Recalculate(m_ctx);
7326 double mod_fitness = test_genotype.GetFitness();
7327
7328 // write to file
7329 fit_land_fp.Write(mod_fitness, cStringUtil::Stringf("Instruction, site 2: %d ", mod_inst2));
7330 }
7331 fit_land_fp.Endl();
7332 }
7333 // Reset the mod_genome back to the original sequence.
7334 seq[site1].SetOp(curr_inst1);
7335 seq[site2].SetOp(curr_inst2);
7336
7337 // close file
7338 m_world->GetDataFileManager().Remove(fl_filename);
7339 }
7340 }
7341 }
7342 }
7343
AnalyzeLineageComplexitySitesN(cString cur_string)7344 void cAnalyze::AnalyzeLineageComplexitySitesN(cString cur_string)
7345 {
7346 /*
7347 Implemented up to n=2, feel free to expand for greater n's
7348 */
7349 cout << "Analyzing genome complexity of a lineage for n sites..." << endl;
7350
7351 /*
7352 * Arguments:
7353 * 1) N-mutant (default: 2)
7354 * 2) directory
7355 */
7356
7357 // number of arguments provided
7358 int words = cur_string.CountNumWords();
7359 if (m_world->GetVerbosity() >= VERBOSE_ON) {
7360 cout << " Number of arguments passed: " << words << endl;
7361 }
7362
7363 //
7364 // argument 1 -- N-mutant number
7365 //
7366 int n = 2;
7367 if(words < 1) {
7368 // no mutation n-mutant number provided
7369 if (m_world->GetVerbosity() >= VERBOSE_ON) {
7370 cout << " - No specific n-mutant selected, using default n-mutant with n = " << n << endl;
7371 }
7372 } else {
7373 // n-mutant number provided
7374 n = cur_string.PopWord().AsInt();
7375 if (n < 1.0) {
7376 // find an n-mutant below 1 is trivial
7377 n = 1.0;
7378 }
7379 if (m_world->GetVerbosity() >= VERBOSE_ON) {
7380 cout << " - n-mutant passed, using n = " << n << endl;
7381 }
7382 }
7383
7384 //
7385 // argument 2 -- directory
7386 //
7387 cString dir = cur_string.PopWord();
7388 cString defaultDirectory = "complexity_nmutant_lineage/";
7389 cString directory = PopDirectory(dir, defaultDirectory);
7390 if (m_world->GetVerbosity() >= VERBOSE_ON) {
7391 cout << " - Analysis results to directory: " << directory << endl;
7392 }
7393
7394 // test cpu
7395 cTestCPU* testcpu = m_world->GetHardwareManager().CreateTestCPU(m_ctx);
7396
7397 // get current batch
7398 tListIterator<cAnalyzeGenotype> batch_it(batch[cur_batch].List());
7399 cAnalyzeGenotype * genotype = NULL;
7400
7401 // Construct filename
7402 cString filename_2s;
7403 filename_2s.Set("complexity.dat");
7404 cDataFile & fp_2s = m_world->GetDataFile(filename_2s);
7405 fp_2s.WriteComment( "Lineage Complexity Analysis" );
7406 fp_2s.WriteTimeStamp();
7407 // m_world->GetDataFileManager().Remove(filename_2s);
7408
7409
7410 // analyze each genotype in the batch
7411 while ((genotype = batch_it.Next()) != NULL) {
7412 if (m_world->GetVerbosity() >= VERBOSE_ON) {
7413 cout << " Analyzing complexity for " << genotype->GetName() << endl;
7414 }
7415
7416
7417
7418 // Calculate the stats for the genotype we're working with ...
7419 const int gen_length = genotype->GetLength();
7420 const Genome& base_genome = genotype->GetGenome();
7421 const double gen_fitness = genotype->GetFitness();
7422 const Sequence& base_seq = base_genome.GetSequence();
7423 Genome mod_genome(base_genome);
7424 Sequence& seq = mod_genome.GetSequence();
7425 const int num_insts = m_world->GetHardwareManager().GetInstSet(base_genome.GetInstSet()).GetSize();
7426
7427 //Initialize variables needed for complexity calculations
7428 int posneutmut = 0; //number of positive and nuetral mutations
7429 int posmut = 0;
7430
7431 cout << "The base genome fitness is: " << gen_fitness << endl;
7432
7433 /*
7434 *
7435 * ONE SITE CALCULATIONS
7436 *
7437 */
7438
7439 // run through each gene in genome
7440 if( n == 1 ) {
7441 for (int gene_num = 0; gene_num < gen_length; gene_num++) {
7442 // get the current instruction at this line/site
7443 int cur_inst = base_seq[gene_num].GetOp();
7444
7445 // recalculate fitness of each mutant and count the number of positive and neutral mutations
7446 for (int mod_inst = 0; mod_inst < num_insts; mod_inst++) {
7447 //Check to make sure not re-evaluating the the original genome
7448 if (mod_inst != cur_inst) {
7449 //cout << "Mod Inst, Cur Inst: " << mod_inst << " " << cur_inst << endl;
7450 seq[gene_num].SetOp(mod_inst);
7451 cAnalyzeGenotype test_genotype(m_world, mod_genome);
7452 test_genotype.Recalculate(m_ctx);
7453 double mod_fitness = test_genotype.GetFitness();
7454 cout << "Mod Fitness: " << mod_fitness << endl;
7455 if (mod_fitness >= gen_fitness) {
7456 //cout << "Mutant has better fitness" << endl;
7457 posneutmut += 1;
7458 }
7459 if (mod_fitness > gen_fitness) {
7460 posmut +=1;
7461 }
7462 }
7463 }
7464 seq[gene_num].SetOp(cur_inst);
7465 }
7466 }
7467 /*
7468 *
7469 * TWO SITE CALCULATIONS
7470 *
7471 */
7472
7473 // run through genes in genome
7474 // - only consider lin_num2 > lin_num1 so that we don't consider
7475 // Mut Info [1][45] and Mut Info [45][1]
7476 if( n == 2) {
7477 for (int gene_num1 = 0; gene_num1 < (gen_length-1); gene_num1++) {
7478 for (int gene_num2 = gene_num1+1; gene_num2 < gen_length; gene_num2++) {
7479 //cout << "line #1, #2: " << gene_num1 << ", " << gene_num2 << endl;
7480
7481 // get current instructions at site 1 and site 2
7482 int cur_inst1 = base_seq[gene_num1].GetOp();
7483 int cur_inst2 = base_seq[gene_num2].GetOp();
7484
7485 // initialize running fitness total
7486 double fitness_total_2s = 0.0;
7487
7488 // run through all possible instructions
7489 for (int mod_inst1 = 0; mod_inst1 < num_insts; mod_inst1++) {
7490 for (int mod_inst2 = 0; mod_inst2 < num_insts; mod_inst2++) {
7491 // modify mod_genome at two sites
7492 seq[gene_num1].SetOp(mod_inst1);
7493 seq[gene_num2].SetOp(mod_inst2);
7494 // analyze mod_genome
7495 cAnalyzeGenotype test_genotype(m_world, mod_genome);
7496 test_genotype.Recalculate(m_ctx);
7497 double mod_fitness = test_genotype.GetFitness();
7498 //cout << "Mutant Fitness: " << mod_fitness << endl;
7499 if (mod_fitness >= gen_fitness) {
7500 posneutmut += 1;
7501 }
7502 if (mod_fitness > gen_fitness) {
7503 posmut += 1;
7504 }
7505 }
7506 }
7507 seq[gene_num1].SetOp(cur_inst1);
7508 seq[gene_num2].SetOp(cur_inst2);
7509 }
7510 }
7511 }
7512
7513 if ( n >= 3) {
7514 //TODO
7515 }
7516
7517 //cout << "Genome Length: " << gen_length << endl;
7518 //cout << "Postive & Neutral Mutations: " << posneutmut << endl;
7519
7520 // calculate complexity
7521 double denominator = 0.0;
7522 if (n == 1) {
7523 denominator = (num_insts*gen_length);
7524 }
7525 else if (n == 2) {
7526 denominator = (pow((double)num_insts,(double)2)*(gen_length)*(gen_length-1)*(0.5));
7527 }
7528
7529 double wn = ( posneutmut / denominator);
7530
7531 //cout << "Denom: " << denominator << " wn: " << wn << endl;
7532
7533 double entropy = 0.0;
7534 double totalcombo = pow((double)num_insts, gen_length);
7535 //cout << "Total Combinations: " << totalcombo << endl;
7536 //cout << "Log of wn and totalcombos: " << log(wn * totalcombo ) << endl;
7537 if (posneutmut > 0) {
7538 entropy = (log(wn * totalcombo ) / log(double(num_insts)));
7539 }
7540
7541 //cout << "Entropy: " << entropy << endl;
7542
7543 double complexity = (gen_length - entropy);
7544 cout << "Complexity: " << complexity << endl;
7545
7546 //write to file
7547
7548 fp_2s.Write(genotype->GetID(), "Genotype ID");
7549 fp_2s.Write(genotype->GetFitness(), "Genotype Fitness");
7550 fp_2s.Write(gen_length, "Genotype Length");
7551 fp_2s.Write(posmut, "Positive Mutations");
7552 fp_2s.Write(posneutmut, "Positive and Neutral Mutations");
7553 fp_2s.Write(entropy, "Entropy");
7554 fp_2s.Write(complexity, "Complexity");
7555 fp_2s.Endl();
7556
7557 }
7558 m_world->GetDataFileManager().Remove(filename_2s);
7559
7560 delete testcpu;
7561 }
7562
AnalyzeComplexityTwoSites(cString cur_string)7563 void cAnalyze::AnalyzeComplexityTwoSites(cString cur_string)
7564 {
7565 cout << "Analyzing genome complexity (one and two sites)..." << endl;
7566
7567 /*
7568 * Arguments:
7569 * 1) mutation rate (default: 0.0 - selection only)
7570 * 2) directory for results (default: 'complexity_two_sites/'
7571 * 3) use resources ? -- 0 or 1 (default: 0)
7572 * 4) batch frequency (default: 1 - all genotypes in batch)
7573 * -- how many genotypes to skip in batch
7574 * 5) convergence accuracy (default: 1.e-10)
7575 *
7576 */
7577
7578 // number of arguments provided
7579 int words = cur_string.CountNumWords();
7580 if (m_world->GetVerbosity() >= VERBOSE_ON) {
7581 cout << " Number of arguments passed: " << words << endl;
7582 }
7583
7584 //
7585 // argument 1 -- mutation rate
7586 //
7587 double mut_rate = 0.0075;
7588 if(words < 1) {
7589 // no mutation rate provided
7590 if (m_world->GetVerbosity() >= VERBOSE_ON) {
7591 cout << " - No mutation rate passed, using default mu = " << mut_rate << endl;
7592 }
7593 } else {
7594 // mutation rate provided
7595 mut_rate = cur_string.PopWord().AsDouble();
7596 if (mut_rate < 0.0) {
7597 // can't have mutation rate below zero
7598 mut_rate = 0.0;
7599 }
7600 if (m_world->GetVerbosity() >= VERBOSE_ON) {
7601 cout << " - Mutation rate passed, using mu = " << mut_rate << endl;
7602 }
7603 }
7604
7605 //
7606 // argument 2 -- directory
7607 //
7608 cString dir = cur_string.PopWord();
7609 cString defaultDirectory = "complexity_two_sites/";
7610 cString directory = PopDirectory(dir, defaultDirectory);
7611 if (m_world->GetVerbosity() >= VERBOSE_ON) {
7612 cout << " - Analysis results to directory: " << directory << endl;
7613 }
7614
7615 //
7616 // argument 3 -- use resources?
7617 //
7618 // Default for usage of resources is false
7619 int useResources = 0;
7620 if(words >= 3) {
7621 useResources = cur_string.PopWord().AsInt();
7622 // All non-zero values are considered false (Handled by testcpu->InitResources)
7623 }
7624 if (m_world->GetVerbosity() >= VERBOSE_ON) {
7625 cout << " - Use resorces set to: " << useResources << " (0=false, true other int)" << endl;
7626 }
7627
7628 //
7629 // argument 4 -- batch frequncy
7630 // - default batchFrequency=1 (every organism analyzed)
7631 //
7632 int batchFrequency = 1;
7633 if(words >= 4) {
7634 batchFrequency = cur_string.PopWord().AsInt();
7635 if(batchFrequency <= 0) {
7636 batchFrequency = 1;
7637 }
7638 }
7639 if (m_world->GetVerbosity() >= VERBOSE_ON) {
7640 cout << " - Batch frequency set to: " << batchFrequency << endl;
7641 }
7642
7643 //
7644 // argument 5 -- convergence accuracy for mutation-selection balance
7645 //
7646 double converg_accuracy = 1.e-10;
7647 if(words >= 5) {
7648 converg_accuracy = cur_string.PopWord().AsDouble();
7649 }
7650 if (m_world->GetVerbosity() >= VERBOSE_ON) {
7651 cout << " - Convergence accuracy: " << converg_accuracy << endl;
7652 }
7653
7654 // test cpu
7655 cTestCPU* testcpu = m_world->GetHardwareManager().CreateTestCPU(m_ctx);
7656
7657 // get current batch
7658 tListIterator<cAnalyzeGenotype> batch_it(batch[cur_batch].List());
7659 cAnalyzeGenotype * genotype = NULL;
7660
7661 // create file for batch summary
7662 cString summary_filename;
7663 summary_filename.Set("%scomplexity_batch_summary.dat", static_cast<const char*>(directory));
7664 cDataFile & summary_fp = m_world->GetDataFile(summary_filename);
7665 summary_fp.WriteComment( "One, Two Site Entropy/Complexity Analysis" );
7666 summary_fp.WriteTimeStamp();
7667
7668 // analyze each genotype in the batch
7669 while ((genotype = batch_it.Next()) != NULL) {
7670 if (m_world->GetVerbosity() >= VERBOSE_ON) {
7671 cout << " Analyzing complexity for " << genotype->GetName() << endl;
7672 }
7673 // entropy and complexity for whole genome
7674 // in both mers and bits
7675 // >> single site approximation
7676 double genome_ss_entropy_mers = 0.0;
7677 double genome_ss_entropy_bits = 0.0;
7678 double genome_ss_complexity_mers = 0.0;
7679 double genome_ss_complexity_bits = 0.0;
7680 // >> two site approximation
7681 double genome_ds_mut_info_mers = 0.0;
7682 double genome_ds_mut_info_bits = 0.0;
7683 double genome_ds_complexity_mers = 0.0;
7684 double genome_ds_complexity_bits = 0.0;
7685
7686 // Construct filename
7687 cString filename_2s;
7688 filename_2s.Set("%s%s.twosite.complexity.dat", static_cast<const char*>(directory), static_cast<const char*>(genotype->GetName()));
7689 cDataFile & fp_2s = m_world->GetDataFile(filename_2s);
7690 fp_2s.WriteComment( "One, Two Site Entropy/Complexity Analysis" );
7691 fp_2s.WriteComment( "NOTE: mutual information = (col 6 + col 8) - (col 9)" );
7692 fp_2s.WriteComment( "NOTE: possible negative mutual information-- is this real? " );
7693 fp_2s.WriteTimeStamp();
7694
7695 int updateBorn = -1;
7696 updateBorn = genotype->GetUpdateBorn();
7697 cCPUTestInfo test_info;
7698 test_info.SetResourceOptions(useResources, m_resources, updateBorn, m_resource_time_spent_offset);
7699
7700 // Calculate the stats for the genotype we're working with ...
7701 genotype->Recalculate(m_ctx, &test_info);
7702 const int max_line = genotype->GetLength();
7703 const Genome& base_genome = genotype->GetGenome();
7704 const Sequence& base_seq = base_genome.GetSequence();
7705 Genome mod_genome(base_genome);
7706 Sequence& seq = mod_genome.GetSequence();
7707 const int num_insts = m_world->GetHardwareManager().GetInstSet(base_genome.GetInstSet()).GetSize();
7708
7709 /*
7710 *
7711 * ONE SITE CALCULATIONS
7712 *
7713 */
7714
7715 // single site entropies for use with
7716 // two site calculations (below)
7717 tArray<double> entropy_ss_mers(max_line);
7718 tArray<double> entropy_ss_bits(max_line);
7719 // used in single site calculations
7720 tArray<double> test_fitness(num_insts);
7721 tArray<double> prob(num_insts);
7722 tArray<double> prob_next(num_insts);
7723
7724 // run through lines in genome
7725 for (int line_num = 0; line_num < max_line; line_num++) {
7726 // get the current instruction at this line/site
7727 int cur_inst = base_seq[line_num].GetOp();
7728
7729 // recalculate fitness of each mutant.
7730 for (int mod_inst = 0; mod_inst < num_insts; mod_inst++) {
7731 seq[line_num].SetOp(mod_inst);
7732 cAnalyzeGenotype test_genotype(m_world, mod_genome);
7733 test_genotype.Recalculate(m_ctx);
7734 test_fitness[mod_inst] = test_genotype.GetFitness();
7735 }
7736
7737 // Adjust fitness
7738 // - set all fitness values greater than current instruction
7739 // equal to current instruction fitness
7740 // - make the rest of the fitness values relative to
7741 // the current instruction fitness
7742 double cur_inst_fitness = test_fitness[cur_inst];
7743 // test that current fitness greater than zero
7744 // if NOT, all fitnesses will be set to zero
7745 if (cur_inst_fitness > 0.0) {
7746 for (int mod_inst = 0; mod_inst < num_insts; mod_inst++) {
7747 if (test_fitness[mod_inst] > cur_inst_fitness)
7748 test_fitness[mod_inst] = cur_inst_fitness;
7749 test_fitness[mod_inst] /= cur_inst_fitness;
7750 }
7751 } else {
7752 cout << "Fitness of this genotype is ZERO--no information." << endl;
7753 continue;
7754 }
7755
7756 // initialize prob for
7757 // mutation-selection balance
7758 double fitness_total = 0.0;
7759 for (int i = 0; i < num_insts; i ++ ) {
7760 fitness_total += test_fitness[i];
7761 }
7762 for (int i = 0; i < num_insts; i ++ ) {
7763 prob[i] = test_fitness[i]/fitness_total;
7764 prob_next[i] = 0.0;
7765 }
7766
7767 double check_sum = 0.0;
7768 while(1) {
7769 check_sum = 0.0;
7770 double delta_prob = 0.0;
7771 //double delta_prob_ex = 0.0;
7772 for (int mod_inst = 0; mod_inst < num_insts; mod_inst ++) {
7773 // calculate the average fitness
7774 double w_avg = 0.0;
7775 for (int i = 0; i < num_insts; i++) {
7776 w_avg += prob[i]*test_fitness[i];
7777 }
7778 if (mut_rate != 0.0) {
7779 // run mutation-selection equation
7780 prob_next[mod_inst] = ((1.0-mut_rate)*test_fitness[mod_inst]*prob[mod_inst])/(w_avg);
7781 prob_next[mod_inst] += mut_rate/((double)num_insts);
7782 } else {
7783 // run selection equation
7784 prob_next[mod_inst] = (test_fitness[mod_inst]*prob[mod_inst])/(w_avg);
7785 }
7786 // increment change in probs
7787 delta_prob += (prob_next[mod_inst]-prob[mod_inst])*(prob_next[mod_inst]-prob[mod_inst]);
7788 //delta_prob_ex += (prob_next[mod_inst]-prob[mod_inst]);
7789 }
7790 // transfer t+1 to t for next iteration
7791 for (int i = 0; i < num_insts; i++) {
7792 prob[i]=prob_next[i];
7793 check_sum += prob[i];
7794 }
7795
7796 // test for convergence
7797 if (delta_prob < converg_accuracy)
7798 break;
7799 }
7800
7801 // Calculate complexity and entropy in bits and mers
7802 double entropy_mers = 0;
7803 double entropy_bits = 0;
7804 for (int i = 0; i < num_insts; i ++) {
7805 // watch for prob[i] == 0
7806 // --> 0.0 log(0.0) = 0.0
7807 if (prob[i] != 0.0) {
7808 entropy_mers += prob[i] * log((double) 1.0/prob[i]) / log ((double) num_insts);
7809 entropy_bits += prob[i] * log((double) 1.0/prob[i]) / log ((double) 2.0);
7810 }
7811 }
7812 double complexity_mers = 1 - entropy_mers;
7813 double complexity_bits = (log ((double) num_insts) / log ((double) 2.0)) - entropy_bits;
7814
7815 // update entropy and complexity values
7816 // with this site's values
7817 genome_ss_entropy_mers += entropy_mers;
7818 genome_ss_entropy_bits += entropy_bits;
7819 genome_ss_complexity_mers += complexity_mers;
7820 genome_ss_complexity_bits += complexity_bits;
7821
7822 // save entropy for this line/site number
7823 entropy_ss_mers[line_num] = entropy_mers;
7824 entropy_ss_bits[line_num] = entropy_bits;
7825
7826 // Reset the mod_genome back to the original sequence.
7827 seq[line_num].SetOp(cur_inst);
7828 }
7829
7830 /*
7831 *
7832 * TWO SITE CALCULATIONS
7833 *
7834 */
7835
7836 // Loop through all the lines of code,
7837 // testing all TWO SITE mutations...
7838 tMatrix<double> test_fitness_2s(num_insts,num_insts);
7839 tArray<double> prob_1s_i(num_insts);
7840 tArray<double> prob_1s_j(num_insts);
7841 tMatrix<double> prob_2s(num_insts,num_insts);
7842 tMatrix<double> prob_next_2s(num_insts,num_insts);
7843
7844 // run through lines in genome
7845 // - only consider lin_num2 > lin_num1 so that we don't consider
7846 // Mut Info [1][45] and Mut Info [45][1]
7847 for (int line_num1 = 0; line_num1 < max_line; line_num1++) {
7848 for (int line_num2 = line_num1+1; line_num2 < max_line; line_num2++) {
7849 // debug
7850 //cout << "line #1, #2: " << line_num1 << ", " << line_num2 << endl;
7851
7852 // get current instructions at site 1 and site 2
7853 int cur_inst1 = base_seq[line_num1].GetOp();
7854 int cur_inst2 = base_seq[line_num2].GetOp();
7855
7856 // get current fitness
7857 double cur_inst_fitness_2s = genotype->GetFitness();
7858
7859 // initialize running fitness total
7860 double fitness_total_2s = 0.0;
7861
7862 // test that current fitness is greater than zero
7863 if (cur_inst_fitness_2s > 0.0) {
7864 // current fitness greater than zero
7865 // run through all possible instructions
7866 for (int mod_inst1 = 0; mod_inst1 < num_insts; mod_inst1++) {
7867 for (int mod_inst2 = 0; mod_inst2 < num_insts; mod_inst2++) {
7868 // modify mod_genome at two sites
7869 seq[line_num1].SetOp(mod_inst1);
7870 seq[line_num2].SetOp(mod_inst2);
7871 // analyze mod_genome
7872 cAnalyzeGenotype test_genotype(m_world, mod_genome);
7873 test_genotype.Recalculate(m_ctx);
7874 test_fitness_2s[mod_inst1][mod_inst2] = test_genotype.GetFitness();
7875
7876 // if modified fitness is greater than current fitness
7877 // - set equal to current fitness
7878 if (test_fitness_2s[mod_inst1][mod_inst2] > cur_inst_fitness_2s)
7879 test_fitness_2s[mod_inst1][mod_inst2] = cur_inst_fitness_2s;
7880
7881 // in all cases, scale fitness relative to current fitness
7882 test_fitness_2s[mod_inst1][mod_inst2] /= cur_inst_fitness_2s;
7883
7884 // update fitness total
7885 fitness_total_2s += test_fitness_2s[mod_inst1][mod_inst2];
7886 }
7887 }
7888 } else {
7889 // current fitness is not greater than zero--skip
7890 cout << "Fitness of this genotype is ZERO--no information." << endl;
7891 continue;
7892 }
7893
7894 // initialize probabilities
7895 for (int i = 0; i < num_insts; i++ ) {
7896 // single site probabilities
7897 // to be built from two site probabilities
7898 prob_1s_i[i] = 0.0;
7899 prob_1s_j[i] = 0.0;
7900 for (int j = 0; j < num_insts; j++ ) {
7901 // intitialize two site probability with
7902 // relative fitness
7903 prob_2s[i][j] = test_fitness_2s[i][j]/fitness_total_2s;
7904 prob_next_2s[i][j] = 0.0;
7905 }
7906 }
7907
7908 double check_sum_2s = 0.0;
7909 while(1) {
7910 check_sum_2s = 0.0;
7911 double delta_prob_2s = 0.0;
7912 //double delta_prob_ex = 0.0;
7913 for (int mod_inst1 = 0; mod_inst1 < num_insts; mod_inst1 ++) {
7914 for (int mod_inst2 = 0; mod_inst2 < num_insts; mod_inst2 ++) {
7915 // calculate the average fitness
7916 double w_avg_2s = 0.0;
7917 for (int i = 0; i < num_insts; i++) {
7918 for (int j = 0; j < num_insts; j++) {
7919 w_avg_2s += prob_2s[i][j]*test_fitness_2s[i][j];
7920 }
7921 }
7922 if (mut_rate != 0.0) {
7923 // run mutation-selection equation
7924 // -term 1
7925 prob_next_2s[mod_inst1][mod_inst2] = ((1.0-mut_rate)*(1.0-mut_rate)*test_fitness_2s[mod_inst1][mod_inst2]*prob_2s[mod_inst1][mod_inst2])/(w_avg_2s);
7926 // -term 2
7927 double sum_term2 = 0.0;
7928 for (int i = 0; i < num_insts; i++) {
7929 sum_term2 += (test_fitness_2s[i][mod_inst2]*prob_2s[i][mod_inst2])/(w_avg_2s);
7930 }
7931 prob_next_2s[mod_inst1][mod_inst2] += (((mut_rate*(1.0-mut_rate))/((double)num_insts)))*sum_term2;
7932 // -term 3
7933 double sum_term3 = 0.0;
7934 for (int j = 0; j < num_insts; j++) {
7935 sum_term3 += (test_fitness_2s[mod_inst1][j]*prob_2s[mod_inst1][j])/(w_avg_2s);
7936 }
7937 prob_next_2s[mod_inst1][mod_inst2] += (((mut_rate*(1.0-mut_rate))/((double)num_insts)))*sum_term3;
7938 // -term 4
7939 prob_next_2s[mod_inst1][mod_inst2] += (mut_rate/((double)num_insts))*(mut_rate/((double)num_insts));
7940 } else {
7941 // run selection equation
7942 prob_next_2s[mod_inst1][mod_inst2] = (test_fitness_2s[mod_inst1][mod_inst2]*prob_2s[mod_inst1][mod_inst2])/(w_avg_2s);
7943
7944 }
7945 // increment change in probs
7946 delta_prob_2s += (prob_next_2s[mod_inst1][mod_inst2]-prob_2s[mod_inst1][mod_inst2])*(prob_next_2s[mod_inst1][mod_inst2]-prob_2s[mod_inst1][mod_inst2]);
7947 //delta_prob_ex += (prob_next[mod_inst]-prob[mod_inst]);
7948 }
7949 }
7950 // transfer probabilities at time t+1
7951 // to t for next iteration
7952 for (int i = 0; i < num_insts; i++) {
7953 for (int j = 0; j < num_insts; j++) {
7954 prob_2s[i][j]=prob_next_2s[i][j];
7955 check_sum_2s += prob_2s[i][j];
7956 }
7957 }
7958
7959 // test for convergence
7960 if (delta_prob_2s < converg_accuracy)
7961 break;
7962 }
7963
7964 // get single site probabilites from
7965 // two site probabilities
7966 // site i (first site)
7967 double check_prob_sum_site_1 = 0.0;
7968 double check_prob_sum_site_2 = 0.0;
7969 for (int i = 0; i < num_insts; i++) {
7970 for (int j = 0; j < num_insts; j++) {
7971 prob_1s_i[i] += prob_2s[i][j];
7972 }
7973 check_prob_sum_site_1 += prob_1s_i[i];
7974 }
7975 // site j (second site)
7976 for (int j = 0; j < num_insts; j++) {
7977 for (int i = 0; i < num_insts; i++) {
7978 prob_1s_j[j] += prob_2s[i][j];
7979 }
7980 check_prob_sum_site_2 += prob_1s_j[j];
7981 }
7982
7983 // Calculate one site and two versions of
7984 // complexity and entropy in bits and mers
7985 //-mers
7986 double entropy_ss_site1_mers = 0.0;
7987 double entropy_ss_site2_mers = 0.0;
7988 double entropy_ds_mers = 0.0;
7989 //-bits
7990 double entropy_ss_site1_bits = 0.0;
7991 double entropy_ss_site2_bits = 0.0;
7992 double entropy_ds_bits = 0.0;
7993
7994 // single site entropies
7995 for (int i = 0; i < num_insts; i ++) {
7996 // watch for zero probabilities
7997 if (prob_1s_i[i] != 0.0) {
7998 // mers
7999 entropy_ss_site1_mers += prob_1s_i[i] * log((double) 1.0/prob_1s_i[i]) / log ((double) num_insts);
8000 // bits
8001 entropy_ss_site1_bits += prob_1s_i[i] * log((double) 1.0/prob_1s_i[i]) / log ((double) 2.0);
8002 }
8003 if (prob_1s_j[i] != 0.0) {
8004 // mers
8005 entropy_ss_site2_mers += prob_1s_j[i] * log((double) 1.0/prob_1s_j[i]) / log ((double) num_insts);
8006 // bits
8007 entropy_ss_site2_bits += prob_1s_j[i] * log((double) 1.0/prob_1s_j[i]) / log ((double) 2.0);
8008 }
8009 }
8010
8011 // two site joint entropies
8012 for (int i = 0; i < num_insts; i ++) {
8013 for (int j = 0; j < num_insts; j ++) {
8014 // watch for zero probabilities
8015 if (prob_2s[i][j] != 0.0) {
8016 // two site entropy in mers
8017 entropy_ds_mers += prob_2s[i][j] * log((double) 1.0/prob_2s[i][j]) / log ((double) num_insts);
8018 // two site entropy in bitss
8019 entropy_ds_bits += prob_2s[i][j] * log((double) 1.0/prob_2s[i][j]) / log ((double) 2.0);
8020 }
8021 }
8022 }
8023
8024 // calculate the mutual information
8025 // - add single site entropies
8026 // - subtract two site joint entropy
8027 // units: mers
8028 double mutual_information_mers = entropy_ss_site1_mers + entropy_ss_site2_mers;
8029 mutual_information_mers -= entropy_ds_mers;
8030
8031 // units: bits
8032 double mutual_information_bits = entropy_ss_site1_bits + entropy_ss_site2_bits;
8033 mutual_information_bits -= entropy_ds_bits;
8034
8035 // two site, only update mutatual informtion total
8036 genome_ds_mut_info_mers += mutual_information_mers;
8037 genome_ds_mut_info_bits += mutual_information_bits;
8038
8039 // write output to file
8040 fp_2s.Write(line_num1, "Site 1 in genome");
8041 fp_2s.Write(line_num2, "Site 2 in genome");
8042 fp_2s.Write(cur_inst1, "Current Instruction, Site 1");
8043 fp_2s.Write(cur_inst2, "Current Instruction, Site 2");
8044 fp_2s.Write(entropy_ss_mers[line_num1], "Entropy (MERS), Site 1 -- single site mut-sel balance");
8045 fp_2s.Write(entropy_ss_site1_mers, "Entropy (MERS), Site 1 -- TWO site mut-sel balance");
8046 fp_2s.Write(entropy_ss_mers[line_num2], "Entropy (MERS), Site 2 -- single site mut-sel balance");
8047 fp_2s.Write(entropy_ss_site2_mers, "Entropy (MERS), Site 2 -- TWO site mut-sel balance");
8048 fp_2s.Write(entropy_ds_mers, "Joint Entropy (MERS), Site 1 & 2 -- TWO site mut-sel balance");
8049 fp_2s.Write(mutual_information_mers, "Mutual Information (MERS), Site 1 & 2 -- TWO site mut-sel balance");
8050 fp_2s.Endl();
8051
8052 // Reset the mod_genome back to the original sequence.
8053 seq[line_num1].SetOp(cur_inst1);
8054 seq[line_num2].SetOp(cur_inst2);
8055
8056 }// end line 2
8057 }// end line 1
8058
8059 // cleanup file for this genome
8060 m_world->GetDataFileManager().Remove(filename_2s);
8061
8062 // calculate the two site complexity
8063 // (2 site complexity) = (1 site complexity) + (total 2 site mutual info)
8064 genome_ds_complexity_mers = genome_ss_complexity_mers + genome_ds_mut_info_mers;
8065 genome_ds_complexity_bits = genome_ss_complexity_bits + genome_ds_mut_info_bits;
8066
8067 summary_fp.Write(genotype->GetID(), "Genotype ID");
8068 summary_fp.Write(genotype->GetFitness(), "Genotype Fitness");
8069 summary_fp.Write(genome_ss_entropy_mers, "Entropy (single-site) MERS");
8070 summary_fp.Write(genome_ss_complexity_mers, "Complexity (single-site) MERS");
8071 summary_fp.Write(genome_ds_mut_info_mers, "Mutual Information MERS");
8072 summary_fp.Write(genome_ds_complexity_mers, "Complexity (two-site) MERS");
8073 summary_fp.Write(genome_ss_entropy_bits, "Entropy (single-site) BITS");
8074 summary_fp.Write(genome_ss_complexity_bits, "Complexity (single-site) BITS");
8075 summary_fp.Write(genome_ds_mut_info_bits, "Mutual Information BITS");
8076 summary_fp.Write(genome_ds_complexity_bits, "Complexity (two-site) BITS");
8077 summary_fp.Endl();
8078
8079 // Always grabs the first one
8080 // Skip i-1 times, so that the beginning of the loop will grab the ith one
8081 // where i is the batchFrequency
8082 for(int count=0; genotype != NULL && count < batchFrequency - 1; count++) {
8083 genotype = batch_it.Next();
8084 if(genotype != NULL && m_world->GetVerbosity() >= VERBOSE_ON) {
8085 cout << "Skipping: " << genotype->GetName() << endl;
8086 }
8087 }
8088 if(genotype == NULL) { break; }
8089 }
8090
8091 m_world->GetDataFileManager().Remove(summary_filename);
8092
8093 delete testcpu;
8094 }
8095
AnalyzePopComplexity(cString cur_string)8096 void cAnalyze::AnalyzePopComplexity(cString cur_string)
8097 {
8098 cout << "Analyzing population complexity ..." << endl;
8099
8100 // Load in the variables...
8101 cString directory = PopDirectory(cur_string, "pop_complexity/");
8102 cString file = cur_string;
8103
8104 // Construct filename...
8105 cString filename;
8106 filename.Set("%spop%s.complexity.dat", static_cast<const char*>(directory), static_cast<const char*>(file));
8107 ofstream& fp = m_world->GetDataFileOFStream(filename);
8108
8109 //////////////////////////////////////////////////////////
8110 // Loop through all of the genotypes in this batch ...
8111
8112 tListIterator<cAnalyzeGenotype> batch_it(batch[cur_batch].List());
8113 cAnalyzeGenotype * genotype = NULL;
8114
8115
8116 genotype = batch_it.Next();
8117
8118
8119 if (genotype == NULL) return;
8120 int seq_length = genotype->GetLength();
8121 const int num_insts = m_world->GetHardwareManager().GetInstSet(genotype->GetGenome().GetInstSet()).GetSize();
8122 tMatrix<int> inst_stat(seq_length, num_insts);
8123
8124 // Initializing inst_stat ...
8125 for (int line_num = 0; line_num < seq_length; line_num ++)
8126 for (int inst_num = 0; inst_num < num_insts; inst_num ++)
8127 inst_stat(line_num, inst_num) = 0;
8128
8129 int num_cpus = 0;
8130 int actural_samples = 0;
8131 while (genotype != NULL) {
8132 num_cpus = genotype->GetNumCPUs();
8133 const Genome& base_genome = genotype->GetGenome();
8134 for (int i = 0; i < num_cpus; i++) { // Stat on every organism with same genotype.
8135 for (int line_num = 0; line_num < seq_length; line_num++) {
8136 int cur_inst = base_genome.GetSequence()[line_num].GetOp();
8137 inst_stat(line_num, cur_inst)++;
8138 }
8139 actural_samples++;
8140 }
8141 genotype = batch_it.Next();
8142 }
8143
8144 // Calculate complexity
8145 for (int line_num = 0; line_num < seq_length; line_num ++) {
8146 double entropy = 0.0;
8147 for (int inst_num = 0; inst_num < num_insts; inst_num ++) {
8148 if (inst_stat(line_num, inst_num) == 0) continue;
8149 float prob = (float) (inst_stat(line_num, inst_num)) / (float) (actural_samples);
8150 entropy += prob * log((double) 1.0/prob) / log((double) num_insts);
8151 }
8152 double complexity = 1 - entropy;
8153 fp << complexity << " ";
8154 }
8155 fp << endl;
8156
8157 m_world->GetDataFileManager().Remove(filename);
8158 return;
8159 }
8160
8161
8162
8163 /* MRR
8164 * August 2007
8165 * This function will go through the lineage, align the genotypes, and
8166 * preform mutation reversion a specified number of descendents ahead
8167 * assuming they keep within a certain alignment distance (specified as well).
8168 * The output will give fitness information for the mutation-reverted genotypes
8169 * as described below.
8170 */
MutationRevert(cString cur_string)8171 void cAnalyze::MutationRevert(cString cur_string)
8172 {
8173
8174 //This function takes in three parameters, all defaulted:
8175 cString filename("XXX.dat"); //The name of the output file
8176 int max_dist = -1; //The maximum edit distance allowed in the search
8177 int max_depth = 5; //The maximum depth forward one wishes to search
8178
8179 if (cur_string.GetSize() != 0) filename = cur_string.PopWord();
8180 if (cur_string.GetSize() != 0) max_dist = cur_string.PopWord().AsInt();
8181 if (cur_string.GetSize() != 0) max_depth = cur_string.PopWord().AsInt();
8182
8183 //Warning notifications
8184 if (!batch[cur_batch].IsLineage())
8185 {
8186 cout << "Error: This command requires a lineage. Skipping." << endl;
8187 return;
8188 }
8189
8190
8191 //Request a file
8192 ofstream& FOT = m_world->GetDataFileOFStream(filename);
8193 /*
8194 FOT output per line
8195 ID
8196 FITNESS
8197 BIRTH
8198 DISTANCE
8199 PID
8200 P_FITNESS
8201 P_BIRTH
8202 @ea depth past
8203 CHILDX_ID
8204 CHILDX_BIRTH
8205 CHILDX_FITNESS
8206 CHILDX_DISTANCE
8207 CHILDX_FITNESS_SANS_MUT
8208 */
8209
8210
8211 //Align the batch... we're going to keep the fitnesses intact from the runs
8212 CommandAlign("");
8213
8214 //Our edit distance is already stored in the historical dump.
8215
8216 //Test hardware
8217 cCPUTestInfo test_info;
8218 test_info.UseRandomInputs(true);
8219
8220 tListIterator<cAnalyzeGenotype> batch_it(batch[cur_batch].List());
8221 cAnalyzeGenotype* parent_genotype = batch_it.Next();
8222 cAnalyzeGenotype* other_genotype = NULL;
8223 cAnalyzeGenotype* genotype = NULL;
8224
8225 while( (genotype = batch_it.Next()) != NULL && parent_genotype != NULL)
8226 {
8227 if (true)
8228 {
8229 FOT << genotype->GetID() << " "
8230 << genotype->GetFitness() << " "
8231 << genotype->GetUpdateBorn() << " "
8232 << genotype->GetParentDist() << " "
8233 << parent_genotype->GetID() << " "
8234 << parent_genotype->GetFitness() << " "
8235 << parent_genotype->GetUpdateBorn() << " ";
8236
8237 int cum_dist = 0;
8238 cString str_parent = parent_genotype->GetSequence();
8239 cString str_other = "";
8240 cString str_align_parent = parent_genotype->GetAlignedSequence();
8241 cString str_align_other = genotype->GetAlignedSequence();
8242 cString reversion = ""; //Reversion mask
8243
8244 //Find what changes to revert
8245 for (int k = 0; k < str_align_parent.GetSize(); k++)
8246 {
8247 char p = str_align_parent[k];
8248 char c = str_align_other[k];
8249 if (p == c)
8250 reversion += " "; //Nothing
8251 else if (p == '_' && c != '_')
8252 reversion += "+"; //Insertion
8253 else if (p != '_' && c == '_')
8254 reversion += "-"; //Deletion
8255 else
8256 reversion += p; //Point Mutation
8257 }
8258
8259 tListIterator<cAnalyzeGenotype> next_it(batch_it);
8260 for (int i = 0; i < max_depth; i++)
8261 {
8262 if ( (other_genotype = next_it.Next()) != NULL &&
8263 (cum_dist <= max_dist || max_dist == -1) )
8264 {
8265 cum_dist += other_genotype->GetParentDist();
8266 if (cum_dist > max_dist && max_dist != -1)
8267 break;
8268 str_other = other_genotype->GetSequence();
8269 str_align_other = other_genotype->GetAlignedSequence();
8270
8271 //Revert "background" to parental form
8272 cString reverted = "";
8273 for (int k = 0; k < reversion.GetSize(); k++)
8274 {
8275 if (reversion[k] == '+') continue; //Insertion, so skip
8276 else if (reversion[k] == '-') reverted += str_align_parent[k]; //Add del
8277 else if (reversion[k] != ' ') reverted += reversion[k]; //Revert mut
8278 else if (str_align_other[k] != '_') reverted += str_align_other[k]; //Keep current
8279 }
8280
8281 const cInstSet& is = m_world->GetHardwareManager().GetDefaultInstSet();
8282 Genome rev_genome(is.GetHardwareType(), is.GetInstSetName(), Sequence(reverted));
8283 cAnalyzeGenotype new_genotype(m_world, rev_genome); //Get likely fitness
8284 new_genotype.Recalculate(m_ctx, &test_info, NULL, 50);
8285
8286 FOT << other_genotype->GetID() << " "
8287 << other_genotype->GetFitness() << " "
8288 << other_genotype->GetUpdateBorn() << " "
8289 << cum_dist << " "
8290 << new_genotype.GetFitness() << " ";
8291 }
8292 else
8293 {
8294 FOT << -1 << " "
8295 << -1 << " "
8296 << -1 << " "
8297 << -1 << " "
8298 << -1 << " ";
8299 }
8300 }
8301 FOT << endl;
8302 }
8303 parent_genotype = genotype;
8304 }
8305
8306 return;
8307 }
8308
EnvironmentSetup(cString cur_string)8309 void cAnalyze::EnvironmentSetup(cString cur_string)
8310 {
8311 cUserFeedback feedback;
8312 cout << "Running environment command: " << endl << " " << cur_string << endl;
8313 m_world->GetEnvironment().LoadLine(cur_string, feedback);
8314 for (int i = 0; i < feedback.GetNumMessages(); i++) {
8315 switch (feedback.GetMessageType(i)) {
8316 case cUserFeedback::UF_ERROR: cerr << "error: "; break;
8317 case cUserFeedback::UF_WARNING: cerr << "warning: "; break;
8318 default: break;
8319 };
8320 cerr << feedback.GetMessage(i) << endl;
8321 }
8322 }
8323
8324
CommandHelpfile(cString cur_string)8325 void cAnalyze::CommandHelpfile(cString cur_string)
8326 {
8327 cout << "Printing helpfiles in: " << cur_string << endl;
8328
8329 cHelpManager help_control;
8330 if (m_world->GetVerbosity() >= VERBOSE_ON) help_control.SetVerbose();
8331 while (cur_string.GetSize() > 0) {
8332 help_control.LoadFile(cur_string.PopWord());
8333 }
8334
8335 help_control.PrintHTML();
8336 }
8337
8338
8339
8340
8341 //////////////// Control...
8342
VarSet(cString cur_string)8343 void cAnalyze::VarSet(cString cur_string)
8344 {
8345 cString var = cur_string.PopWord();
8346
8347 if (cur_string.GetSize() == 0) {
8348 cerr << "Error: No variable provided in SET command" << endl;
8349 return;
8350 }
8351
8352 cString& cur_variable = GetVariable(var);
8353 cur_variable = cur_string.PopWord();
8354
8355 if (m_world->GetVerbosity() >= VERBOSE_ON) {
8356 cout << "Setting " << var << " to " << cur_variable << endl;
8357 }
8358 }
8359
ConfigGet(cString cur_string)8360 void cAnalyze::ConfigGet(cString cur_string)
8361 {
8362 cString cvar = cur_string.PopWord();
8363 cString var = cur_string.PopWord();
8364
8365 if (cvar.GetSize() == 0 || var.GetSize() == 0) {
8366 cerr << "Error: Missing variable in CONFIG_GET command" << endl;
8367 return;
8368 }
8369
8370 cString& cur_variable = GetVariable(var);
8371
8372 // Get Config Variable
8373 if (!m_world->GetConfig().Get(cvar, cur_variable)) {
8374 cerr << "Error: Configuration Variable '" << var << "' was not found." << endl;
8375 return;
8376 }
8377
8378 if (m_world->GetVerbosity() >= VERBOSE_ON)
8379 cout << "Setting variable " << var << " to " << cur_variable << endl;
8380 }
8381
ConfigSet(cString cur_string)8382 void cAnalyze::ConfigSet(cString cur_string)
8383 {
8384 cString cvar = cur_string.PopWord();
8385
8386 if (cvar.GetSize() == 0) {
8387 cerr << "Error: No variable provided in CONFIG_SET command" << endl;
8388 return;
8389 }
8390
8391 // Get Config Variable
8392 cString val = cur_string.PopWord();
8393 if (!m_world->GetConfig().Set(cvar, val)) {
8394 cerr << "Error: Configuration Variable '" << cvar << "' was not found." << endl;
8395 return;
8396 }
8397
8398 if (m_world->GetVerbosity() >= VERBOSE_ON)
8399 cout << "Setting configuration variable " << cvar << " to " << val << endl;
8400 }
8401
8402
BatchSet(cString cur_string)8403 void cAnalyze::BatchSet(cString cur_string)
8404 {
8405 int next_batch = 0;
8406 if (cur_string.CountNumWords() > 0) {
8407 next_batch = cur_string.PopWord().AsInt();
8408 }
8409 if (m_world->GetVerbosity() >= VERBOSE_ON) cout << "Setting current batch to " << next_batch << endl;
8410 if (next_batch >= GetNumBatches()) {
8411 if (next_batch >= MAX_BATCHES) {
8412 cerr << " Error: max batches is " << MAX_BATCHES << endl;
8413 if (exit_on_error) exit(1);
8414 } else {
8415 int old_num_batches = GetNumBatches();
8416 int num_batchsets_needed = ((next_batch - old_num_batches) / NUM_BATCHES_INCREMENT) + 1;
8417 int new_num_batches = GetNumBatches() + (num_batchsets_needed * NUM_BATCHES_INCREMENT);
8418 if (new_num_batches > MAX_BATCHES) new_num_batches = MAX_BATCHES;
8419
8420 cout << "Increasing max batches to " << new_num_batches << endl;
8421
8422 batch.Resize(new_num_batches);
8423 for (int i = old_num_batches; i < new_num_batches; i++) {
8424 batch[i].Name().Set("Batch%d", i);
8425 }
8426 cur_batch = next_batch;
8427 }
8428 } else {
8429 cur_batch = next_batch;
8430 }
8431 }
8432
BatchName(cString cur_string)8433 void cAnalyze::BatchName(cString cur_string)
8434 {
8435 if (cur_string.CountNumWords() == 0) {
8436 if (m_world->GetVerbosity() >= VERBOSE_ON) cout << " Warning: No name given in NAME_BATCH!" << endl;
8437 return;
8438 }
8439
8440 batch[cur_batch].Name() = cur_string.PopWord();
8441 }
8442
BatchTag(cString cur_string)8443 void cAnalyze::BatchTag(cString cur_string)
8444 {
8445 if (cur_string.CountNumWords() == 0) {
8446 if (m_world->GetVerbosity() >= VERBOSE_ON) cout << " Warning: No tag given in TAG_BATCH!" << endl;
8447 return;
8448 }
8449
8450 if (m_world->GetVerbosity() >= VERBOSE_ON) {
8451 cout << "Tagging batch " << cur_batch
8452 << " with tag '" << cur_string << "'" << endl;
8453 }
8454
8455 tListIterator<cAnalyzeGenotype> batch_it(batch[cur_batch].List());
8456 cAnalyzeGenotype * genotype = NULL;
8457 while ((genotype = batch_it.Next()) != NULL) {
8458 genotype->SetTag(cur_string);
8459 }
8460
8461 }
8462
BatchPurge(cString cur_string)8463 void cAnalyze::BatchPurge(cString cur_string)
8464 {
8465 int batch_id = cur_batch;
8466 if (cur_string.CountNumWords() > 0) batch_id = cur_string.PopWord().AsInt();
8467
8468 if (m_world->GetVerbosity() >= VERBOSE_ON) cout << "Purging batch " << batch_id << endl;
8469
8470 while (batch[batch_id].List().GetSize() > 0) {
8471 delete batch[batch_id].List().Pop();
8472 }
8473
8474 batch[batch_id].SetLineage(false);
8475 batch[batch_id].SetAligned(false);
8476 }
8477
BatchDuplicate(cString cur_string)8478 void cAnalyze::BatchDuplicate(cString cur_string)
8479 {
8480 if (cur_string.GetSize() == 0) {
8481 cerr << "Duplicate Error: Must include from ID!" << endl;
8482 if (exit_on_error) exit(1);
8483 }
8484 int batch_from = cur_string.PopWord().AsInt();
8485
8486 int batch_to = cur_batch;
8487 if (cur_string.GetSize() > 0) batch_to = cur_string.PopWord().AsInt();
8488
8489 if (m_world->GetVerbosity() >= VERBOSE_ON) {
8490 cout << "Duplicating from batch " << batch_from << " to batch " << batch_to << "." << endl;
8491 }
8492
8493 tListIterator<cAnalyzeGenotype> batch_from_it(batch[batch_from].List());
8494 cAnalyzeGenotype * genotype = NULL;
8495 while ((genotype = batch_from_it.Next()) != NULL) {
8496 cAnalyzeGenotype * new_genotype = new cAnalyzeGenotype(*genotype);
8497 batch[batch_to].List().PushRear(new_genotype);
8498 }
8499
8500 batch[batch_to].SetLineage(false);
8501 batch[batch_to].SetAligned(false);
8502 }
8503
BatchRecalculate(cString cur_string)8504 void cAnalyze::BatchRecalculate(cString cur_string)
8505 {
8506 tArray<int> manual_inputs; // Used only if manual inputs are specified
8507 cString msg; // Holds any information we may want to send the driver to display
8508
8509 int use_resources = (cur_string.GetSize()) ? cur_string.PopWord().AsInt() : 0;
8510 int update = (cur_string.GetSize()) ? cur_string.PopWord().AsInt() : -1;
8511 bool use_random_inputs = (cur_string.GetSize()) ? cur_string.PopWord().AsInt() == 1: false;
8512 bool use_manual_inputs = false;
8513
8514 //Manual inputs will override random input request and must be the last arguments.
8515 if (cur_string.CountNumWords() > 0){
8516 if (cur_string.CountNumWords() == m_world->GetEnvironment().GetInputSize()){
8517 manual_inputs.Resize(m_world->GetEnvironment().GetInputSize());
8518 use_random_inputs = false;
8519 use_manual_inputs = true;
8520 for (int k = 0; cur_string.GetSize(); k++)
8521 manual_inputs[k] = cur_string.PopWord().AsInt();
8522 } else if (m_world->GetVerbosity() >= VERBOSE_ON){
8523 msg.Set("Invalid number of environment inputs requested for recalculation: %d specified, %d required.",
8524 cur_string.CountNumWords(), m_world->GetEnvironment().GetInputSize());
8525 m_world->GetDriver().NotifyWarning(msg);
8526 }
8527 }
8528
8529 cCPUTestInfo test_info;
8530 if (use_manual_inputs)
8531 test_info.UseManualInputs(manual_inputs);
8532 else
8533 test_info.UseRandomInputs(use_random_inputs);
8534 test_info.SetResourceOptions(use_resources, m_resources, update, m_resource_time_spent_offset);
8535
8536 if (m_world->GetVerbosity() >= VERBOSE_ON) {
8537 msg.Set("Running batch %d through test CPUs...", cur_batch);
8538 m_world->GetDriver().NotifyComment(msg);
8539 } else{
8540 msg.Set("Running through test CPUs...");
8541 m_world->GetDriver().NotifyComment(msg);
8542 }
8543
8544 if (m_world->GetVerbosity() >= VERBOSE_ON && batch[cur_batch].IsLineage() == false) {
8545 msg.Set("Batch may not be a lineage; parent and ancestor distances may not be correct");
8546 m_world->GetDriver().NotifyWarning(msg);
8547 }
8548
8549 tListIterator<cAnalyzeGenotype> batch_it(batch[cur_batch].List());
8550 cAnalyzeGenotype * genotype = NULL;
8551 cAnalyzeGenotype * last_genotype = NULL;
8552 while ((genotype = batch_it.Next()) != NULL) {
8553 // If the previous genotype was the parent of this one, pass in a pointer
8554 // to it for improved recalculate (such as distance to parent, etc.)
8555 if (last_genotype != NULL && genotype->GetParentID() == last_genotype->GetID()) {
8556 genotype->Recalculate(m_ctx, &test_info, last_genotype);
8557 } else {
8558 genotype->Recalculate(m_ctx, &test_info);
8559 }
8560 last_genotype = genotype;
8561 }
8562
8563 return;
8564 }
8565
8566
BatchRecalculateWithArgs(cString cur_string)8567 void cAnalyze::BatchRecalculateWithArgs(cString cur_string)
8568 {
8569 // RECALC <use_resources> <random_inputs> <manual_inputs in.1 in.2 in.3> <update N> <num_trials X>
8570
8571 tArray<int> manual_inputs; // Used only if manual inputs are specified
8572 cString msg; // Holds any information we may want to send the driver to display
8573
8574 // Defaults
8575 bool use_resources = false;
8576 int update = -1;
8577 bool use_random_inputs = false;
8578 bool use_manual_inputs = false;
8579 int num_trials = 1;
8580
8581 // Handle our recalculate arguments
8582 // Really, we should have a generalized tokenizer handle this
8583 cStringList args(cur_string);
8584 int pos = -1;
8585 if (args.PopString("use_resources") != "") use_resources = true;
8586 if (args.PopString("use_random_inputs") != "") use_random_inputs = true;
8587 if ( (pos = args.LocateString("use_manual_inputs") ) != -1){
8588 use_manual_inputs = true;
8589 args.PopString("use_manual_inputs");
8590 int num = m_world->GetEnvironment().GetInputSize();
8591 manual_inputs.Resize(num);
8592 if (args.GetSize() >= pos + num - 2)
8593 for (int k = 0; k < num; k++)
8594 manual_inputs[k] = args.PopLine(pos).AsInt();
8595 else
8596 m_world->GetDriver().RaiseFatalException(1, "RecalculateWithArgs: Invalid use of use_manual_inputs");
8597 }
8598 if ( (pos = args.LocateString("update")) != -1 ){
8599 args.PopString("update");
8600 if (args.GetSize() >= pos - 1){
8601 update = args.PopLine(pos).AsInt();
8602 } else
8603 m_world->GetDriver().RaiseFatalException(1, "RecalculateWithArgs: Invalid use of update (did you specify a value?)");
8604 }
8605 if ( (pos = args.LocateString("num_trials")) != -1){
8606 args.PopString("num_trials");
8607 if (args.GetSize() >= pos - 1)
8608 num_trials = args.PopLine(pos).AsInt();
8609 else
8610 m_world->GetDriver().RaiseFatalException(1, "RecalculateWithArgs: Invalid use of num_trials (did you specify a value?)");
8611 }
8612
8613 if (use_manual_inputs)
8614 use_random_inputs = false;
8615
8616 cCPUTestInfo test_info;
8617 if (use_manual_inputs)
8618 test_info.UseManualInputs(manual_inputs);
8619 else
8620 test_info.UseRandomInputs(use_random_inputs);
8621 test_info.SetResourceOptions(use_resources, m_resources, update, m_resource_time_spent_offset);
8622
8623 // Notifications
8624 if (m_world->GetVerbosity() >= VERBOSE_ON) {
8625 msg.Set("Running batch %d through test CPUs...", cur_batch);
8626 m_world->GetDriver().NotifyComment(msg);
8627 } else{
8628 msg.Set("Running through test CPUs...");
8629 m_world->GetDriver().NotifyComment(msg);
8630 }
8631 if (m_world->GetVerbosity() >= VERBOSE_ON && batch[cur_batch].IsLineage() == false) {
8632 msg.Set("Batch may not be a lineage; parent and ancestor distances may not be correct");
8633 m_world->GetDriver().NotifyWarning(msg);
8634 }
8635
8636 tListIterator<cAnalyzeGenotype> batch_it(batch[cur_batch].List());
8637 cAnalyzeGenotype * genotype = NULL;
8638 cAnalyzeGenotype * last_genotype = NULL;
8639 while ((genotype = batch_it.Next()) != NULL) {
8640 // If the previous genotype was the parent of this one, pass in a pointer
8641 // to it for improved recalculate (such as distance to parent, etc.)
8642 if (last_genotype != NULL && genotype->GetParentID() == last_genotype->GetID()) {
8643 genotype->Recalculate(m_ctx, &test_info, last_genotype, num_trials);
8644 } else {
8645 genotype->Recalculate(m_ctx, &test_info, NULL, num_trials);
8646 }
8647 last_genotype = genotype;
8648 }
8649
8650 return;
8651 }
8652
8653
BatchRename(cString cur_string)8654 void cAnalyze::BatchRename(cString cur_string)
8655 {
8656 if (m_world->GetVerbosity() <= VERBOSE_NORMAL) cout << "Renaming organisms..." << endl;
8657 else cout << "Renaming organisms in batch " << cur_batch << endl;
8658
8659 // If a number is given with rename, start at that number...
8660
8661 int id_num = cur_string.PopWord().AsInt();
8662 tListIterator<cAnalyzeGenotype> batch_it(batch[cur_batch].List());
8663 cAnalyzeGenotype * genotype = NULL;
8664 while ((genotype = batch_it.Next()) != NULL) {
8665 cString name = cStringUtil::Stringf("org-%d", id_num);
8666 genotype->SetID(id_num);
8667 genotype->SetName(name);
8668 id_num++;
8669 }
8670 }
8671
CloseFile(cString cur_string)8672 void cAnalyze::CloseFile(cString cur_string)
8673 {
8674 m_world->GetDataFileManager().Remove(cur_string.PopWord());
8675 }
8676
8677
PrintStatus(cString cur_string)8678 void cAnalyze::PrintStatus(cString cur_string)
8679 {
8680 // No Args needed...
8681 (void) cur_string;
8682
8683 cout << "Status Report:" << endl;
8684 for (int i = 0; i < GetNumBatches(); i++) {
8685 if (i == cur_batch || batch[i].List().GetSize() > 0) {
8686 cout << " Batch " << i << " -- "
8687 << batch[i].List().GetSize() << " genotypes.";
8688 if (i == cur_batch) cout << " <current>";
8689 if (batch[i].IsLineage() == true) cout << " <lineage>";
8690 if (batch[i].IsAligned() == true) cout << " <aligned>";
8691
8692 cout << endl;
8693 }
8694 }
8695 }
8696
PrintDebug(cString cur_string)8697 void cAnalyze::PrintDebug(cString cur_string)
8698 {
8699 cout << "::: " << cur_string << '\n';
8700 }
8701
PrintTestInfo(cString cur_string)8702 void cAnalyze::PrintTestInfo(cString cur_string)
8703 {
8704 cFlexVar var1(1), var2(2.0), var3('3'), var4("four");
8705 cFlexVar var5(9), var6(9.0), var7('9'), var8("9");
8706
8707 tArray<cFlexVar> vars(10);
8708 vars[0] = "Testing";
8709 vars[1] = 1;
8710 vars[2] = 2.0;
8711 vars[3] = '3';
8712 vars[4] = "four";
8713 vars[5] = 9;
8714 vars[6] = 9.0;
8715 vars[7] = '9';
8716 vars[8] = "9";
8717
8718 cout << "AsString: ";
8719 for (int i = 0; i < 10; i++) cout << i << ":" << vars[i].AsString() << " ";
8720 cout << endl;
8721
8722 cout << "AsInt: ";
8723 for (int i = 0; i < 10; i++) cout << i << ":" << vars[i].AsInt() << " ";
8724 cout << endl;
8725
8726 for (int i = 0; i < 10; i++) {
8727 for (int j = i+1; j < 10; j++) {
8728 cout << " vars[" << i << "] <= vars[" << j << "] ? " << (vars[i] <= vars[j]);
8729 cout << " vars[" << j << "] <= vars[" << i << "] ? " << (vars[j] <= vars[i]);
8730 cout << endl;
8731 }
8732 }
8733
8734 }
8735
IncludeFile(cString cur_string)8736 void cAnalyze::IncludeFile(cString cur_string)
8737 {
8738 while (cur_string.GetSize() > 0) {
8739 cString filename = cur_string.PopWord();
8740
8741 cInitFile include_file(filename, m_world->GetWorkingDir());
8742
8743 tList<cAnalyzeCommand> include_list;
8744 LoadCommandList(include_file, include_list);
8745 ProcessCommands(include_list);
8746 }
8747 }
8748
CommandSystem(cString cur_string)8749 void cAnalyze::CommandSystem(cString cur_string)
8750 {
8751 if (cur_string.GetSize() == 0) {
8752 cerr << "Error: Keyword \"system\" must be followed by command to run." << endl;
8753 if (exit_on_error) exit(1);
8754 }
8755
8756 cout << "Running System Command: " << cur_string << endl;
8757
8758 system(cur_string);
8759 }
8760
CommandInteractive(cString cur_string)8761 void cAnalyze::CommandInteractive(cString cur_string)
8762 {
8763 // No Args needed...
8764 (void) cur_string;
8765
8766 RunInteractive();
8767 }
8768
8769
8770 /*
8771 FIXME@kgn
8772 Must categorize COMPETE command.
8773 */
8774 /* Arguments to COMPETE: */
8775 /*
8776 batch_size : size of target batch
8777 from_id
8778 to_id=current
8779 initial_next_id=-1
8780 */
BatchCompete(cString cur_string)8781 void cAnalyze::BatchCompete(cString cur_string)
8782 {
8783 if (cur_string.GetSize() == 0) {
8784 cerr << "Compete Error: Must include target batch size!" << endl;
8785 if (exit_on_error) exit(1);
8786 }
8787 int batch_size = cur_string.PopWord().AsInt();
8788
8789 if (cur_string.GetSize() == 0) {
8790 cerr << "Compete Error: Must include from ID!" << endl;
8791 if (exit_on_error) exit(1);
8792 }
8793 int batch_from = cur_string.PopWord().AsInt();
8794
8795 int batch_to = cur_batch;
8796 if (cur_string.GetSize() > 0) batch_to = cur_string.PopWord().AsInt();
8797
8798 int initial_next_id = -1;
8799 if (cur_string.GetSize() > 0) {
8800 initial_next_id = cur_string.PopWord().AsInt();
8801 }
8802 if (0 <= initial_next_id) {
8803 SetTempNextID(initial_next_id);
8804 }
8805
8806 int initial_next_update = -1;
8807 if (cur_string.GetSize() > 0) {
8808 initial_next_update = cur_string.PopWord().AsInt();
8809 }
8810 if (0 <= initial_next_update) {
8811 SetTempNextUpdate(initial_next_update);
8812 }
8813
8814 if (m_world->GetVerbosity() >= VERBOSE_ON) {
8815 cout << "Compete " << batch_size << " organisms from batch " << batch_from << " to batch " << batch_to << ";" << endl;
8816 cout << "assigning new IDs starting with " << GetTempNextID() << "." << endl;
8817 }
8818
8819 /* Get iterator into "from" batch. */
8820 tListIterator<cAnalyzeGenotype> batch_it(batch[batch_from].List());
8821 /* Get size of "from" batch. */
8822 const int parent_batch_size = batch[batch_from].List().GetSize();
8823
8824 /* Create scheduler. */
8825 cSchedule* schedule = new cProbSchedule(
8826 parent_batch_size,
8827 m_world->GetRandom().GetInt(0x7FFFFFFF)
8828 );
8829
8830 /* Initialize scheduler with fitness values per-organism. */
8831 tArray<cAnalyzeGenotype*> genotype_array(parent_batch_size);
8832 tArray<Genome> offspring_genome_array(parent_batch_size);
8833 tArray<cMerit> fitness_array(parent_batch_size);
8834 cAnalyzeGenotype * genotype = NULL;
8835
8836 cCPUTestInfo test_info;
8837
8838 /*
8839 FIXME@kgn
8840 This should be settable by an optional argument.
8841 */
8842 test_info.UseRandomInputs(true);
8843
8844 int array_pos = 0;
8845 while ((genotype = batch_it.Next()) != NULL) {
8846 genotype_array[array_pos] = genotype;
8847 genotype->Recalculate(m_world->GetDefaultContext(), &test_info, NULL);
8848 if(genotype->GetViable()){
8849 /*
8850 FIXME@kgn
8851 - HACK : multiplication by 1000 because merits less than 1 are truncated
8852 to zero.
8853 */
8854 fitness_array[array_pos] = genotype->GetFitness() * 1000.;
8855 /*
8856 FIXME@kgn
8857 - Need to note somewhere that we are using first descendent of the
8858 parent, if the parent is viable, so that genome of first descendent may
8859 differ from that of parent.
8860 */
8861 offspring_genome_array[array_pos] = test_info.GetTestOrganism(0)->OffspringGenome();
8862 } else {
8863 fitness_array[array_pos] = 0.0;
8864 }
8865 schedule->Adjust(array_pos, fitness_array[array_pos]);
8866 array_pos++;
8867 }
8868
8869 /* Use scheduler to sample organisms in "from" batch. */
8870 for(int i=0; i<batch_size; /* don't increment i yet */){
8871 /* Sample an organism. */
8872 array_pos = schedule->GetNextID();
8873 if(array_pos < 0){
8874 cout << "Warning: No organisms in origin batch have positive fitness, cannot sample to destination batch." << endl;
8875 break;
8876 }
8877 genotype = genotype_array[array_pos];
8878
8879 double copy_mut_prob = m_world->GetConfig().COPY_MUT_PROB.Get();
8880 double ins_mut_prob = m_world->GetConfig().DIVIDE_INS_PROB.Get();
8881 double del_mut_prob = m_world->GetConfig().DIVIDE_DEL_PROB.Get();
8882 int ins_line = -1;
8883 int del_line = -1;
8884
8885 Genome child_genome = offspring_genome_array[array_pos];
8886 Sequence& child_seq = child_genome.GetSequence();
8887 const cInstSet& inst_set = m_world->GetHardwareManager().GetInstSet(child_genome.GetInstSet());
8888
8889 if (copy_mut_prob > 0.0) {
8890 for (int n = 0; n < child_genome.GetSize(); n++) {
8891 if (m_world->GetRandom().P(copy_mut_prob)) {
8892 child_seq[n] = inst_set.GetRandomInst(m_ctx);
8893 }
8894 }
8895 }
8896
8897 /* Perform an Insertion if it has one. */
8898 if (m_world->GetRandom().P(ins_mut_prob)) {
8899 ins_line = m_world->GetRandom().GetInt(child_genome.GetSize() + 1);
8900 child_seq.Insert(ins_line, inst_set.GetRandomInst(m_ctx));
8901 }
8902
8903 /* Perform a Deletion if it has one. */
8904 if (m_world->GetRandom().P(del_mut_prob)) {
8905 del_line = m_world->GetRandom().GetInt(child_genome.GetSize());
8906 child_seq.Remove(del_line);
8907 }
8908
8909 /* Create (possibly mutated) offspring. */
8910 cAnalyzeGenotype* new_genotype = new cAnalyzeGenotype(m_world, child_genome);
8911
8912 int parent_id = genotype->GetID();
8913 int child_id = GetTempNextID();
8914 SetTempNextID(child_id + 1);
8915 cString child_name = cStringUtil::Stringf("org-%d", child_id);
8916
8917 new_genotype->SetParentID(parent_id);
8918 new_genotype->SetID(child_id);
8919 new_genotype->SetName(child_name);
8920 new_genotype->SetUpdateBorn(GetTempNextUpdate());
8921
8922 /* Place offspring in "to" batch. */
8923 batch[batch_to].List().PushRear(new_genotype);
8924 /* Increment and continue. */
8925 i++;
8926 }
8927
8928 SetTempNextUpdate(GetTempNextUpdate() + 1);
8929
8930 batch[batch_to].SetLineage(false);
8931 batch[batch_to].SetAligned(false);
8932
8933 if(schedule){ delete schedule; schedule = 0; }
8934
8935 return;
8936 }
8937
8938
FunctionCreate(cString cur_string,tList<cAnalyzeCommand> & clist)8939 void cAnalyze::FunctionCreate(cString cur_string, tList<cAnalyzeCommand>& clist)
8940 {
8941 int num_args = cur_string.CountNumWords();
8942 if (num_args < 1) {
8943 cerr << "Error: Must provide function name when creating function.";
8944 if (exit_on_error) exit(1);
8945 }
8946
8947 cString fun_name = cur_string.PopWord();
8948
8949 if (FindAnalyzeCommandDef(fun_name) != NULL) {
8950 cerr << "Error: Cannot create function '" << fun_name
8951 << "'; already exists." << endl;
8952 if (exit_on_error) exit(1);
8953 }
8954
8955 if (m_world->GetVerbosity() >= VERBOSE_ON) cout << "Creating function: " << fun_name << endl;
8956
8957 // Create the new function...
8958 cAnalyzeFunction * new_function = new cAnalyzeFunction(fun_name);
8959 while (clist.GetSize() > 0) {
8960 new_function->GetCommandList()->PushRear(clist.Pop());
8961 }
8962
8963 // Save the function on the new list...
8964 function_list.PushRear(new_function);
8965 }
8966
FunctionRun(const cString & fun_name,cString args)8967 bool cAnalyze::FunctionRun(const cString & fun_name, cString args)
8968 {
8969 if (m_world->GetVerbosity() >= VERBOSE_ON) {
8970 cout << "Running function: " << fun_name << endl;
8971 // << " with args: " << args << endl;
8972 }
8973
8974 // Find the function we're about to run...
8975 cAnalyzeFunction * found_function = NULL;
8976 tListIterator<cAnalyzeFunction> function_it(function_list);
8977 while (function_it.Next() != NULL) {
8978 if (function_it.Get()->GetName() == fun_name) {
8979 found_function = function_it.Get();
8980 break;
8981 }
8982 }
8983
8984 // If we were unable to find the command we're looking for, return false.
8985 if (found_function == NULL) return false;
8986
8987 // Back up the local variables
8988 cString backup_arg_vars[10];
8989 cString backup_local_vars[26];
8990 for (int i = 0; i < 10; i++) backup_arg_vars[i] = arg_variables[i];
8991 for (int i = 0; i < 26; i++) backup_local_vars[i] = local_variables[i];
8992
8993 // Set the arg variables to the passed-in args...
8994 arg_variables[0] = fun_name;
8995 for (int i = 1; i < 10; i++) arg_variables[i] = args.PopWord();
8996 for (int i = 0; i < 26; i++) local_variables[i] = "";
8997
8998 ProcessCommands(*(found_function->GetCommandList()));
8999
9000 // Restore the local variables
9001 for (int i = 0; i < 10; i++) arg_variables[i] = backup_arg_vars[i];
9002 for (int i = 0; i < 26; i++) local_variables[i] = backup_local_vars[i];
9003
9004 return true;
9005 }
9006
9007
BatchUtil_GetMaxLength(int batch_id)9008 int cAnalyze::BatchUtil_GetMaxLength(int batch_id)
9009 {
9010 if (batch_id < 0) batch_id = cur_batch;
9011
9012 int max_length = 0;
9013
9014 tListIterator<cAnalyzeGenotype> batch_it(batch[batch_id].List());
9015 cAnalyzeGenotype * genotype = NULL;
9016 while ((genotype = batch_it.Next()) != NULL) {
9017 if (genotype->GetLength() > max_length) max_length = genotype->GetLength();
9018 }
9019
9020 return max_length;
9021 }
9022
9023
CommandForeach(cString cur_string,tList<cAnalyzeCommand> & clist)9024 void cAnalyze::CommandForeach(cString cur_string,
9025 tList<cAnalyzeCommand> & clist)
9026 {
9027 if (m_world->GetVerbosity() >= VERBOSE_ON) cout << "Initiating Foreach loop..." << endl;
9028
9029 cString var = cur_string.PopWord();
9030 int num_args = cur_string.CountNumWords();
9031
9032 cString & cur_variable = GetVariable(var);
9033
9034 for (int i = 0; i < num_args; i++) {
9035 cur_variable = cur_string.PopWord();
9036
9037 if (m_world->GetVerbosity() >= VERBOSE_ON) {
9038 cout << "Foreach: setting " << var << " to " << cur_variable << endl;
9039 }
9040 ProcessCommands(clist);
9041 }
9042
9043 if (m_world->GetVerbosity() >= VERBOSE_ON) {
9044 cout << "Ending Foreach on " << var << endl;
9045 }
9046 }
9047
9048
CommandForRange(cString cur_string,tList<cAnalyzeCommand> & clist)9049 void cAnalyze::CommandForRange(cString cur_string,
9050 tList<cAnalyzeCommand> & clist)
9051 {
9052 if (m_world->GetVerbosity() >= VERBOSE_ON) cout << "Initiating FORRANGE loop..." << endl;
9053
9054 int num_args = cur_string.CountNumWords();
9055 if (num_args < 3) {
9056 cerr << " Error: Must give variable, min and max with FORRANGE!"
9057 << endl;
9058 if (exit_on_error) exit(1);
9059 }
9060
9061 cString var = cur_string.PopWord();
9062 double min_val = cur_string.PopWord().AsDouble();
9063 double max_val = cur_string.PopWord().AsDouble();
9064 double step_val = 1.0;
9065 if (num_args >=4 ) step_val = cur_string.PopWord().AsDouble();
9066
9067 cString & cur_variable = GetVariable(var);
9068
9069 // Seperate out all ints from not all ints...
9070 if (min_val == (double) ((int) min_val) &&
9071 max_val == (double) ((int) max_val) &&
9072 step_val == (double) ((int) step_val)) {
9073 for (int i = (int) min_val; i <= (int) max_val; i += (int) step_val) {
9074 cur_variable.Set("%d", i);
9075
9076 if (m_world->GetVerbosity() >= VERBOSE_ON) {
9077 cout << "FORRANGE: setting " << var << " to " << cur_variable << endl;
9078 }
9079 ProcessCommands(clist);
9080 }
9081 } else {
9082 for (double i = min_val; i <= max_val; i += step_val) {
9083 cur_variable.Set("%f", i);
9084
9085 if (m_world->GetVerbosity() >= VERBOSE_ON) {
9086 cout << "FORRANGE: setting " << var << " to " << cur_variable << endl;
9087 }
9088 ProcessCommands(clist);
9089 }
9090 }
9091
9092 if (m_world->GetVerbosity() >= VERBOSE_ON) {
9093 cout << "Ending FORRANGE on " << var << endl;
9094 }
9095 }
9096
9097
9098 /////////////////// Private Methods ///////////////////////////
9099
PopDirectory(cString in_string,const cString default_dir)9100 cString cAnalyze::PopDirectory(cString in_string, const cString default_dir)
9101 {
9102 // Determing the directory name
9103 cString directory(default_dir);
9104 if (in_string.GetSize() != 0) directory = in_string.PopWord();
9105
9106 // Make sure the directory ends in a slash. If not, add one.
9107 int last_pos = directory.GetSize() - 1;
9108 if (directory[last_pos] != '/' && directory[last_pos] != '\\') {
9109 directory += '/';
9110 }
9111
9112 return directory;
9113 }
9114
PopBatch(const cString & in_string)9115 int cAnalyze::PopBatch(const cString & in_string)
9116 {
9117 int batch = cur_batch;
9118 if (in_string.GetSize() != 0 && in_string != "current") {
9119 batch = in_string.AsInt();
9120 }
9121
9122 return batch;
9123 }
9124
PopGenotype(cString gen_desc,int batch_id)9125 cAnalyzeGenotype * cAnalyze::PopGenotype(cString gen_desc, int batch_id)
9126 {
9127 if (batch_id == -1) batch_id = cur_batch;
9128 tListPlus<cAnalyzeGenotype> & gen_list = batch[batch_id].List();
9129 gen_desc.ToLower();
9130
9131 cAnalyzeGenotype * found_gen = NULL;
9132 if (gen_desc == "num_cpus")
9133 found_gen = gen_list.PopMax(&cAnalyzeGenotype::GetNumCPUs);
9134 else if (gen_desc == "total_cpus")
9135 found_gen = gen_list.PopMax(&cAnalyzeGenotype::GetTotalCPUs);
9136 else if (gen_desc == "merit")
9137 found_gen = gen_list.PopMax(&cAnalyzeGenotype::GetMerit);
9138 else if (gen_desc == "fitness")
9139 found_gen = gen_list.PopMax(&cAnalyzeGenotype::GetFitness);
9140 else if (gen_desc.IsNumeric(0))
9141 found_gen = gen_list.PopValue(&cAnalyzeGenotype::GetID, gen_desc.AsInt());
9142 else if (gen_desc == "random") {
9143 int gen_pos = random.GetUInt(gen_list.GetSize());
9144 found_gen = gen_list.PopPos(gen_pos);
9145 }
9146 else {
9147 cout << " Error: unknown type " << gen_desc << endl;
9148 if (exit_on_error) exit(1);
9149 }
9150
9151 return found_gen;
9152 }
9153
9154
GetVariable(const cString & var)9155 cString& cAnalyze::GetVariable(const cString & var)
9156 {
9157 if (var.GetSize() != 1 ||
9158 (var.IsLetter(0) == false && var.IsNumeric(0) == false)) {
9159 cerr << "Error: Illegal variable " << var << " being used." << endl;
9160 if (exit_on_error) exit(1);
9161 }
9162
9163 if (var.IsLowerLetter(0) == true) {
9164 int var_id = (int) (var[0] - 'a');
9165 return variables[var_id];
9166 }
9167 else if (var.IsUpperLetter(0) == true) {
9168 int var_id = (int) (var[0] - 'A');
9169 return local_variables[var_id];
9170 }
9171 // Otherwise it must be a number...
9172 int var_id = (int) (var[0] - '0');
9173 return arg_variables[var_id];
9174 }
9175
9176
LoadCommandList(cInitFile & init_file,tList<cAnalyzeCommand> & clist,int start_at)9177 int cAnalyze::LoadCommandList(cInitFile& init_file, tList<cAnalyzeCommand>& clist, int start_at)
9178 {
9179 for (int i = start_at; i < init_file.GetNumLines(); i++) {
9180 cString cur_string = init_file.GetLine(i);
9181 cString command = cur_string.PopWord();
9182
9183 cAnalyzeCommand* cur_command;
9184 cAnalyzeCommandDefBase* command_def = FindAnalyzeCommandDef(command);
9185
9186 if (command == "END") {
9187 // We are done with this section of code; break out...
9188 return i;
9189 } else if (command_def != NULL && command_def->IsFlowCommand() == true) {
9190 // This code has a body to it... fill it out!
9191 cur_command = new cAnalyzeFlowCommand(command, cur_string);
9192 i = LoadCommandList(init_file, *(cur_command->GetCommandList()), i + 1); // Start processing at the next line
9193 } else {
9194 // This is a normal command...
9195 cur_command = new cAnalyzeCommand(command, cur_string);
9196 }
9197
9198 clist.PushRear(cur_command);
9199 }
9200
9201 return init_file.GetNumLines();
9202 }
9203
InteractiveLoadCommandList(tList<cAnalyzeCommand> & clist)9204 void cAnalyze::InteractiveLoadCommandList(tList<cAnalyzeCommand> & clist)
9205 {
9206 interactive_depth++;
9207 char text_input[2048];
9208 while (true) {
9209 for (int i = 0; i <= interactive_depth; i++) {
9210 cout << ">>";
9211 }
9212 cout << " ";
9213 cout.flush();
9214 cin.getline(text_input, 2048);
9215 cString cur_input(text_input);
9216 cString command = cur_input.PopWord();
9217
9218 cAnalyzeCommand * cur_command;
9219 cAnalyzeCommandDefBase * command_def = FindAnalyzeCommandDef(command);
9220
9221 if (command == "END") {
9222 // We are done with this section of code; break out...
9223 break;
9224 }
9225 else if (command_def != NULL && command_def->IsFlowCommand() == true) {
9226 // This code has a body to it... fill it out!
9227 cur_command = new cAnalyzeFlowCommand(command, cur_input);
9228 InteractiveLoadCommandList(*(cur_command->GetCommandList()));
9229 }
9230 else {
9231 // This is a normal command...
9232 cur_command = new cAnalyzeCommand(command, cur_input);
9233 }
9234
9235 clist.PushRear(cur_command);
9236 }
9237 interactive_depth--;
9238 }
9239
PreProcessArgs(cString & args)9240 void cAnalyze::PreProcessArgs(cString & args)
9241 {
9242 int pos = 0;
9243 int search_start = 0;
9244 while ((pos = args.Find('$', search_start)) != -1) {
9245 // Setup the variable name that was found...
9246 char varlet = args[pos+1];
9247 cString varname("$");
9248 varname += varlet;
9249
9250 // Determine the variable and act on it.
9251 int varsize = 0;
9252 if (varlet == '$') {
9253 args.Clip(pos+1, 1);
9254 varsize = 1;
9255 }
9256 else if (varlet >= 'a' && varlet <= 'z') {
9257 int var_id = (int) (varlet - 'a');
9258 args.Replace(varname, variables[var_id], pos);
9259 varsize = variables[var_id].GetSize();
9260 }
9261 else if (varlet >= 'A' && varlet <= 'Z') {
9262 int var_id = (int) (varlet - 'A');
9263 args.Replace(varname, local_variables[var_id], pos);
9264 varsize = local_variables[var_id].GetSize();
9265 }
9266 else if (varlet >= '0' && varlet <= '9') {
9267 int var_id = (int) (varlet - '0');
9268 args.Replace(varname, arg_variables[var_id], pos);
9269 varsize = arg_variables[var_id].GetSize();
9270 }
9271 search_start = pos + varsize;
9272 }
9273 }
9274
ProcessCommands(tList<cAnalyzeCommand> & clist)9275 void cAnalyze::ProcessCommands(tList<cAnalyzeCommand>& clist)
9276 {
9277 // Process the command list...
9278 tListIterator<cAnalyzeCommand> command_it(clist);
9279 command_it.Reset();
9280 cAnalyzeCommand* cur_command = NULL;
9281 while ((cur_command = command_it.Next()) != NULL) {
9282 cString command = cur_command->GetCommand();
9283 cString args = cur_command->GetArgs();
9284 PreProcessArgs(args);
9285
9286 cAnalyzeCommandDefBase* command_fun = FindAnalyzeCommandDef(command);
9287
9288 cUserFeedback feedback;
9289 if (command_fun != NULL) {
9290 command_fun->Run(this, args, *cur_command, feedback);
9291 for (int i = 0; i < feedback.GetNumMessages(); i++) {
9292 switch (feedback.GetMessageType(i)) {
9293 case cUserFeedback::UF_ERROR: cerr << "error: "; break;
9294 case cUserFeedback::UF_WARNING: cerr << "warning: "; break;
9295 default: break;
9296 };
9297 cerr << feedback.GetMessage(i) << endl;
9298 if (exit_on_error && feedback.GetNumErrors()) exit(1);
9299 }
9300 } else if (!FunctionRun(command, args)) {
9301 cerr << "error: Unknown analysis keyword '" << command << "'." << endl;
9302 if (exit_on_error) exit(1);
9303 }
9304 }
9305 }
9306
9307
PopCommonCPUTestParameters(cWorld * in_world,cString & cur_string,cCPUTestInfo & test_info,cResourceHistory * in_resource_history,int in_resource_time_spent_offset)9308 void cAnalyze::PopCommonCPUTestParameters(cWorld* in_world, cString& cur_string, cCPUTestInfo& test_info, cResourceHistory* in_resource_history, int in_resource_time_spent_offset)
9309 {
9310 tArray<int> manual_inputs; // Used only if manual inputs are specified
9311 cString msg; // Holds any information we may want to send the driver to display
9312 int use_resources = (cur_string.GetSize()) ? cur_string.PopWord().AsInt() : 0;
9313 int update = (cur_string.GetSize()) ? cur_string.PopWord().AsInt() : -1;
9314 bool use_random_inputs = (cur_string.GetSize()) ? cur_string.PopWord().AsInt() == 1: false;
9315 bool use_manual_inputs = false;
9316
9317 //Manual inputs will override random input request and must be the last arguments.
9318 if (cur_string.CountNumWords() > 0){
9319 if (cur_string.CountNumWords() == in_world->GetEnvironment().GetInputSize()){
9320 manual_inputs.Resize(in_world->GetEnvironment().GetInputSize());
9321 use_random_inputs = false;
9322 use_manual_inputs = true;
9323 for (int k = 0; cur_string.GetSize(); k++)
9324 manual_inputs[k] = cur_string.PopWord().AsInt();
9325 } else if (in_world->GetVerbosity() >= VERBOSE_ON){
9326 msg.Set("Invalid number of environment inputs requested for recalculation: %d specified, %d required.",
9327 cur_string.CountNumWords(), in_world->GetEnvironment().GetInputSize());
9328 in_world->GetDriver().NotifyWarning(msg);
9329 }
9330 }
9331
9332 if (use_manual_inputs)
9333 test_info.UseManualInputs(manual_inputs);
9334 else
9335 test_info.UseRandomInputs(use_random_inputs);
9336 test_info.SetResourceOptions(use_resources, in_resource_history, update, in_resource_time_spent_offset);
9337 }
9338
9339
9340 // The following function will print a cell in a table with a background color based on a comparison
9341 // with its parent (the result of which is passed in as the 'compare' argument). The cell_flags argument
9342 // includes any other information you want in the <td> tag; 'null_text' is the text you want to replace a
9343 // zero with (sometime "none" or "N/A"); and 'print_text' is a bool asking if the text should be included at
9344 // all, or just the background color.
9345
HTMLPrintStat(const cFlexVar & value,std::ostream & fp,int compare,const cString & cell_flags,const cString & null_text,bool print_text)9346 void cAnalyze::HTMLPrintStat(const cFlexVar & value, std::ostream& fp, int compare,
9347 const cString & cell_flags, const cString & null_text, bool print_text)
9348 {
9349 fp << "<td " << cell_flags << " ";
9350 if (compare == COMPARE_RESULT_OFF) {
9351 fp << "bgcolor=\"#" << m_world->GetConfig().COLOR_NEG2.Get() << "\">";
9352 if (print_text == true) fp << null_text << " ";
9353 else fp << " ";
9354 return;
9355 }
9356
9357 if (compare == COMPARE_RESULT_NEG) fp << "bgcolor=\"#" << m_world->GetConfig().COLOR_NEG1.Get() << "\">";
9358 else if (compare == COMPARE_RESULT_SAME) fp << "bgcolor=\"#" << m_world->GetConfig().COLOR_SAME.Get() << "\">";
9359 else if (compare == COMPARE_RESULT_POS) fp << "bgcolor=\"#" << m_world->GetConfig().COLOR_POS1.Get() << "\">";
9360 else if (compare == COMPARE_RESULT_ON) fp << "bgcolor=\"#" << m_world->GetConfig().COLOR_POS2.Get() << "\">";
9361 else if (compare == COMPARE_RESULT_DIFF) fp << "bgcolor=\"#" << m_world->GetConfig().COLOR_DIFF.Get() << "\">";
9362 else {
9363 std::cerr << "Error! Illegal case in Compare:" << compare << std::endl;
9364 exit(0);
9365 }
9366
9367 if (print_text == true) fp << value << " ";
9368 else fp << " ";
9369
9370 }
9371
CompareFlexStat(const cFlexVar & org_stat,const cFlexVar & parent_stat,int compare_type)9372 int cAnalyze::CompareFlexStat(const cFlexVar & org_stat, const cFlexVar & parent_stat, int compare_type)
9373 {
9374 // If no comparisons need be done, return zero and stop here.
9375 if (compare_type == FLEX_COMPARE_NONE) {
9376 return COMPARE_RESULT_SAME;
9377 }
9378
9379 // In all cases, if the stats are the same, we should return this and stop.
9380 if (org_stat == parent_stat) return COMPARE_RESULT_SAME;
9381
9382 // If we made it this far and all we care about is if they differ, return that they do.
9383 if (compare_type == FLEX_COMPARE_DIFF) return COMPARE_RESULT_DIFF;
9384
9385 // If zero is not special we can calculate our result.
9386 if (compare_type == FLEX_COMPARE_MAX) { // Color higher values as beneficial, lower as harmful.
9387 if (org_stat > parent_stat) return COMPARE_RESULT_POS;
9388 return COMPARE_RESULT_NEG;
9389 }
9390 if (compare_type == FLEX_COMPARE_MIN) { // Color lower values as beneficial, higher as harmful.
9391 if (org_stat > parent_stat) return COMPARE_RESULT_NEG;
9392 return COMPARE_RESULT_POS;
9393 }
9394
9395
9396 // If we made it this far, it means that zero has a special status.
9397 if (org_stat == 0) return COMPARE_RESULT_OFF;
9398 if (parent_stat == 0) return COMPARE_RESULT_ON;
9399
9400
9401 // No zeros are involved, so we can go back to basic checks...
9402 if (compare_type == FLEX_COMPARE_DIFF2) return COMPARE_RESULT_DIFF;
9403
9404 if (compare_type == FLEX_COMPARE_MAX2) { // Color higher values as beneficial, lower as harmful.
9405 if (org_stat > parent_stat) return COMPARE_RESULT_POS;
9406 return COMPARE_RESULT_NEG;
9407 }
9408 if (compare_type == FLEX_COMPARE_MIN2) { // Color lower values as beneficial, higher as harmful.
9409 if (org_stat > parent_stat) return COMPARE_RESULT_NEG;
9410 return COMPARE_RESULT_POS;
9411 }
9412
9413 assert(false); // One of the other options should have been chosen.
9414 return 0;
9415 }
9416
9417
9418
9419
9420
AddLibraryDef(const cString & name,void (cAnalyze::* _fun)(cString))9421 void cAnalyze::AddLibraryDef(const cString & name,
9422 void (cAnalyze::*_fun)(cString))
9423 {
9424 command_lib.PushRear(new cAnalyzeCommandDef(name, _fun));
9425 }
9426
AddLibraryDef(const cString & name,void (cAnalyze::* _fun)(cString,tList<cAnalyzeCommand> &))9427 void cAnalyze::AddLibraryDef(const cString & name,
9428 void (cAnalyze::*_fun)(cString, tList<cAnalyzeCommand> &))
9429 {
9430 command_lib.PushRear(new cAnalyzeFlowCommandDef(name, _fun));
9431 }
9432
SetupCommandDefLibrary()9433 void cAnalyze::SetupCommandDefLibrary()
9434 {
9435 if (command_lib.GetSize() != 0) return; // Library already setup.
9436
9437 AddLibraryDef("LOAD_ORGANISM", &cAnalyze::LoadOrganism);
9438 AddLibraryDef("LOAD_SEQUENCE", &cAnalyze::LoadSequence);
9439 AddLibraryDef("LOAD_RESOURCES", &cAnalyze::LoadResources);
9440 AddLibraryDef("LOAD", &cAnalyze::LoadFile);
9441
9442 // Reduction and sampling commands...
9443 AddLibraryDef("FILTER", &cAnalyze::CommandFilter);
9444 AddLibraryDef("FIND_GENOTYPE", &cAnalyze::FindGenotype);
9445 AddLibraryDef("FIND_ORGANISM", &cAnalyze::FindOrganism);
9446 AddLibraryDef("FIND_LINEAGE", &cAnalyze::FindLineage);
9447 AddLibraryDef("FIND_SEX_LINEAGE", &cAnalyze::FindSexLineage);
9448 AddLibraryDef("FIND_CLADE", &cAnalyze::FindClade);
9449 AddLibraryDef("FIND_LAST_COMMON_ANCESTOR", &cAnalyze::FindLastCommonAncestor);
9450 AddLibraryDef("SAMPLE_ORGANISMS", &cAnalyze::SampleOrganisms);
9451 AddLibraryDef("SAMPLE_GENOTYPES", &cAnalyze::SampleGenotypes);
9452 AddLibraryDef("KEEP_TOP", &cAnalyze::KeepTopGenotypes);
9453 AddLibraryDef("TRUNCATELINEAGE", &cAnalyze::TruncateLineage); // Depricate!
9454 AddLibraryDef("TRUNCATE_LINEAGE", &cAnalyze::TruncateLineage);
9455 AddLibraryDef("SAMPLE_OFFSPRING", &cAnalyze::SampleOffspring);
9456
9457 // Direct output commands...
9458 AddLibraryDef("PRINT", &cAnalyze::CommandPrint);
9459 AddLibraryDef("TRACE", &cAnalyze::CommandTrace);
9460 AddLibraryDef("PRINT_TASKS", &cAnalyze::CommandPrintTasks);
9461 AddLibraryDef("PRINT_TASKS_QUALITY", &cAnalyze::CommandPrintTasksQuality);
9462 AddLibraryDef("DETAIL", &cAnalyze::CommandDetail);
9463 AddLibraryDef("DETAIL_TIMELINE", &cAnalyze::CommandDetailTimeline);
9464 AddLibraryDef("DETAIL_BATCHES", &cAnalyze::CommandDetailBatches);
9465 AddLibraryDef("DETAIL_AVERAGE", &cAnalyze::CommandDetailAverage);
9466 AddLibraryDef("DETAIL_INDEX", &cAnalyze::CommandDetailIndex);
9467 AddLibraryDef("HISTOGRAM", &cAnalyze::CommandHistogram);
9468
9469 // Population analysis commands...
9470 AddLibraryDef("PRINT_PHENOTYPES", &cAnalyze::CommandPrintPhenotypes);
9471 AddLibraryDef("PRINT_DIVERSITY", &cAnalyze::CommandPrintDiversity);
9472 AddLibraryDef("PRINT_DISTANCES", &cAnalyze::CommandPrintDistances);
9473 AddLibraryDef("PRINT_TREE_STATS", &cAnalyze::CommandPrintTreeStats);
9474 AddLibraryDef("PRINT_CUMULATIVE_STEMMINESS", &cAnalyze::CommandPrintCumulativeStemminess);
9475 AddLibraryDef("PRINT_GAMMA", &cAnalyze::CommandPrintGamma);
9476 AddLibraryDef("COMMUNITY_COMPLEXITY", &cAnalyze::AnalyzeCommunityComplexity);
9477 AddLibraryDef("PRINT_RESOURCE_FITNESS_MAP", &cAnalyze::CommandPrintResourceFitnessMap);
9478
9479 // Individual organism analysis...
9480 AddLibraryDef("MAP", &cAnalyze::CommandMapTasks); // Deprecated...
9481 AddLibraryDef("MAP_TASKS", &cAnalyze::CommandMapTasks);
9482 AddLibraryDef("AVERAGE_MODULARITY", &cAnalyze::CommandAverageModularity);
9483 AddLibraryDef("CALC_FUNCTIONAL_MODULARITY", &cAnalyze::CommandCalcFunctionalModularity);
9484 AddLibraryDef("ANALYZE_REDUNDANCY_BY_INST_FAILURE", &cAnalyze::CommandAnalyzeRedundancyByInstFailure);
9485 AddLibraryDef("MAP_MUTATIONS", &cAnalyze::CommandMapMutations);
9486 AddLibraryDef("ANALYZE_COMPLEXITY", &cAnalyze::AnalyzeComplexity);
9487 AddLibraryDef("ANALYZE_LINEAGE_COMPLEXITY", &cAnalyze::AnalyzeLineageComplexitySitesN);
9488 AddLibraryDef("ANALYZE_FITNESS_TWO_SITES", &cAnalyze::AnalyzeFitnessLandscapeTwoSites);
9489 AddLibraryDef("ANALYZE_COMPLEXITY_TWO_SITES", &cAnalyze::AnalyzeComplexityTwoSites);
9490 AddLibraryDef("ANALYZE_KNOCKOUTS", &cAnalyze::AnalyzeKnockouts);
9491 AddLibraryDef("ANALYZE_POP_COMPLEXITY", &cAnalyze::AnalyzePopComplexity);
9492 AddLibraryDef("MAP_DEPTH", &cAnalyze::CommandMapDepth);
9493 // (Untested) AddLibraryDef("PAIRWISE_ENTROPY", &cAnalyze::CommandPairwiseEntropy);
9494
9495 // Population comparison commands...
9496 AddLibraryDef("HAMMING", &cAnalyze::CommandHamming);
9497 AddLibraryDef("LEVENSTEIN", &cAnalyze::CommandLevenstein);
9498 AddLibraryDef("SPECIES", &cAnalyze::CommandSpecies);
9499 AddLibraryDef("RECOMBINE", &cAnalyze::CommandRecombine);
9500 AddLibraryDef("RECOMBINE_SAMPLE", &cAnalyze::CommandRecombineSample);
9501 AddLibraryDef("MUTAGENIZE", &cAnalyze::CommandMutagenize);
9502
9503 // Lineage analysis commands...
9504 AddLibraryDef("ALIGN", &cAnalyze::CommandAlign);
9505 AddLibraryDef("ANALYZE_NEWINFO", &cAnalyze::AnalyzeNewInfo);
9506 AddLibraryDef("MUTATION_REVERT", &cAnalyze::MutationRevert);
9507
9508 // Build input files for avida...
9509 AddLibraryDef("WRITE_CLONE", &cAnalyze::WriteClone);
9510 AddLibraryDef("WRITE_INJECT_EVENTS", &cAnalyze::WriteInjectEvents);
9511 AddLibraryDef("WRITE_COMPETITION", &cAnalyze::WriteCompetition);
9512
9513 // Automated analysis
9514 AddLibraryDef("ANALYZE_MUTS", &cAnalyze::AnalyzeMuts);
9515 AddLibraryDef("ANALYZE_INSTRUCTIONS", &cAnalyze::AnalyzeInstructions);
9516 AddLibraryDef("ANALYZE_INST_POP", &cAnalyze::AnalyzeInstPop);
9517 AddLibraryDef("ANALYZE_BRANCHING", &cAnalyze::AnalyzeBranching);
9518 AddLibraryDef("ANALYZE_MUTATION_TRACEBACK",
9519 &cAnalyze::AnalyzeMutationTraceback);
9520 AddLibraryDef("ANALYZE_MATE_SELECTION", &cAnalyze::AnalyzeMateSelection);
9521 AddLibraryDef("ANALYZE_COMPLEXITY_DELTA", &cAnalyze::AnalyzeComplexityDelta);
9522
9523 // Environment manipulation
9524 AddLibraryDef("ENVIRONMENT", &cAnalyze::EnvironmentSetup);
9525
9526 // Documantation...
9527 AddLibraryDef("HELPFILE", &cAnalyze::CommandHelpfile);
9528
9529 // Control commands...
9530 AddLibraryDef("SET", &cAnalyze::VarSet);
9531 AddLibraryDef("CONFIG_GET", &cAnalyze::ConfigGet);
9532 AddLibraryDef("CONFIG_SET", &cAnalyze::ConfigSet);
9533 AddLibraryDef("SET_BATCH", &cAnalyze::BatchSet);
9534 AddLibraryDef("NAME_BATCH", &cAnalyze::BatchName);
9535 AddLibraryDef("TAG_BATCH", &cAnalyze::BatchTag);
9536 AddLibraryDef("PURGE_BATCH", &cAnalyze::BatchPurge);
9537 AddLibraryDef("DUPLICATE", &cAnalyze::BatchDuplicate);
9538 AddLibraryDef("RECALCULATE", &cAnalyze::BatchRecalculate);
9539 AddLibraryDef("RECALC", &cAnalyze::BatchRecalculateWithArgs);
9540 AddLibraryDef("RENAME", &cAnalyze::BatchRename);
9541 AddLibraryDef("CLOSE_FILE", &cAnalyze::CloseFile);
9542 AddLibraryDef("STATUS", &cAnalyze::PrintStatus);
9543 AddLibraryDef("ECHO", &cAnalyze::PrintDebug);
9544 AddLibraryDef("DEBUG", &cAnalyze::PrintDebug);
9545 AddLibraryDef("TEST", &cAnalyze::PrintTestInfo);
9546 AddLibraryDef("INCLUDE", &cAnalyze::IncludeFile);
9547 AddLibraryDef("RUN", &cAnalyze::IncludeFile);
9548 AddLibraryDef("SYSTEM", &cAnalyze::CommandSystem);
9549 AddLibraryDef("INTERACTIVE", &cAnalyze::CommandInteractive);
9550
9551 // Functions...
9552 AddLibraryDef("FUNCTION", &cAnalyze::FunctionCreate);
9553
9554 // Flow commands...
9555 AddLibraryDef("FOREACH", &cAnalyze::CommandForeach);
9556 AddLibraryDef("FORRANGE", &cAnalyze::CommandForRange);
9557
9558 // Uncategorized commands...
9559 AddLibraryDef("COMPETE", &cAnalyze::BatchCompete);
9560 }
9561
FindAnalyzeCommandDef(const cString & name)9562 cAnalyzeCommandDefBase* cAnalyze::FindAnalyzeCommandDef(const cString& name)
9563 {
9564 SetupCommandDefLibrary();
9565
9566 cString uppername(name);
9567 uppername.ToUpper();
9568 tListIterator<cAnalyzeCommandDefBase> lib_it(command_lib);
9569 while (lib_it.Next() != (void *) NULL) {
9570 if (lib_it.Get()->GetName() == uppername) break;
9571 }
9572 cAnalyzeCommandDefBase* command_def = lib_it.Get();
9573
9574 if (command_def == NULL && cActionLibrary::GetInstance().Supports(name)) {
9575 command_def = new cAnalyzeCommandAction(name, m_world);
9576 command_lib.PushRear(command_def);
9577 }
9578
9579 return command_def;
9580 }
9581
RunInteractive()9582 void cAnalyze::RunInteractive()
9583 {
9584 bool saved_analyze = m_ctx.GetAnalyzeMode();
9585 m_ctx.SetAnalyzeMode();
9586
9587 cout << "Entering interactive mode..." << endl;
9588
9589 char text_input[2048];
9590 while (true) {
9591 cout << ">> ";
9592 cout.flush();
9593 cin.getline(text_input, 2048);
9594 cString cur_input(text_input);
9595 cString command = cur_input.PopWord();
9596
9597 cAnalyzeCommand* cur_command;
9598 cAnalyzeCommandDefBase* command_def = FindAnalyzeCommandDef(command);
9599 if (command == "") {
9600 // Don't worry about blank lines...
9601 continue;
9602 } else if (command == "END" || command == "QUIT" || command == "EXIT") {
9603 // We are done with interactive mode...
9604 break;
9605 } else if (command_def != NULL && command_def->IsFlowCommand() == true) {
9606 // This code has a body to it... fill it out!
9607 cur_command = new cAnalyzeFlowCommand(command, cur_input);
9608 InteractiveLoadCommandList(*(cur_command->GetCommandList()));
9609 } else {
9610 // This is a normal command...
9611 cur_command = new cAnalyzeCommand(command, cur_input);
9612 }
9613
9614 cString args = cur_command->GetArgs();
9615 PreProcessArgs(args);
9616
9617 cAnalyzeCommandDefBase* command_fun = FindAnalyzeCommandDef(command);
9618
9619 if (command_fun != NULL) { // First check for built-in functions...
9620 cUserFeedback feedback;
9621 command_fun->Run(this, args, *cur_command, feedback);
9622 for (int i = 0; i < feedback.GetNumMessages(); i++) {
9623 switch (feedback.GetMessageType(i)) {
9624 case cUserFeedback::UF_ERROR: cerr << "error: "; break;
9625 case cUserFeedback::UF_WARNING: cerr << "warning: "; break;
9626 default: break;
9627 };
9628 cerr << feedback.GetMessage(i) << endl;
9629 }
9630 } else if (FunctionRun(command, args) == true) { // Then user functions
9631 /* no additional action */
9632 } else { // Error
9633 cerr << "Error: Unknown command '" << command << "'." << endl;
9634 }
9635 }
9636
9637 if (!saved_analyze) m_ctx.ClearAnalyzeMode();
9638 }
9639
9640