1 /***************************************************************************
2 * Copyright (C) 2006 by BUI Quang Minh, Steffen Klaere, Arndt von Haeseler *
3 * minh.bui@univie.ac.at *
4 * *
5 * This program is free software; you can redistribute it and/or modify *
6 * it under the terms of the GNU General Public License as published by *
7 * the Free Software Foundation; either version 2 of the License, or *
8 * (at your option) any later version. *
9 * *
10 * This program is distributed in the hope that it will be useful, *
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13 * GNU General Public License for more details. *
14 * *
15 * You should have received a copy of the GNU General Public License *
16 * along with this program; if not, write to the *
17 * Free Software Foundation, Inc., *
18 * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *
19 ***************************************************************************/
20 #include "splitgraph.h"
21 #include <iostream>
22 #include <fstream>
23 #include <cctype>
24 #include <algorithm>
25 #include "tree/node.h"
26 #include "ncl/ncl.h"
27 #include "nclextra/myreader.h"
28 #include "tree/mtree.h"
29 #include "tree/mtreeset.h"
30
31
compareSplit(Split * sp1,Split * sp2)32 bool compareSplit(Split* sp1, Split* sp2) {
33 if (sp1->countTaxa() != sp2->countTaxa())
34 return sp1->countTaxa() < sp2->countTaxa();
35 else
36 return sp1->firstTaxon() < sp2->firstTaxon();
37 }
38
39 //#define MY_DEBUG
40 /********************************************************
41 Defining SplitGraph methods
42 ********************************************************/
SplitGraph()43 SplitGraph::SplitGraph()
44 : vector<Split*>()
45 {
46 pda = NULL;
47 taxa = NULL;
48 splits = NULL;
49 sets = NULL;
50 trees = NULL;
51 mtrees = NULL;
52 areas_boundary = NULL;
53 }
54
SplitGraph(Params & params)55 SplitGraph::SplitGraph(Params ¶ms) : vector<Split*>() {
56 init(params);
57 }
58
createBlocks()59 void SplitGraph::createBlocks() {
60 taxa = new NxsTaxaBlock();
61 splits = new MSplitsBlock(this);
62 pda = new MPdaBlock(this);
63 sets = new MSetsBlock();
64 trees = new TreesBlock(taxa);
65 //mtrees = NULL;
66 }
67
68
init(Params & params)69 void SplitGraph::init(Params ¶ms)
70 {
71 mtrees = NULL;
72 if (params.intype == IN_NEWICK) {
73 // read the input file, can contain more than 1 tree
74 mtrees = new MTreeSet(params.user_file, params.is_rooted, params.tree_burnin, params.tree_max_count);
75 //mtree = new MTree(params.user_file, params.is_rooted);
76
77 if (params.is_rooted) {
78 params.sub_size++;
79 params.min_size++;
80 }
81 if (mtrees->isRooted() && params.root != NULL)
82 outError(ERR_CONFLICT_ROOT);
83 //SplitIntMap hash_ss;
84 mtrees->convertSplits(*this, params.split_threshold, params.split_weight_summary, params.split_weight_threshold);
85
86 if (verbose_mode >= VB_DEBUG)
87 saveFileStarDot(cout);
88 } else {
89 createBlocks();
90 // if (params.is_rooted)
91 // outError(ERR_ROOT_NET);
92
93 cout << "Reading input file " << params.user_file << "..." << endl;
94
95 MyReader nexus(params.user_file);
96
97 nexus.Add(taxa);
98 nexus.Add(splits);
99 nexus.Add(pda);
100 nexus.Add(sets);
101 nexus.Add(trees);
102
103 MyToken token(nexus.inf);
104 nexus.Execute(token);
105 if (trees->GetNumTrees() > 0) {
106 if (getNSplits() > 0)
107 outError("Ambiguous input file, pls only specify either SPLITS block or TREES block");
108 convertFromTreesBlock(params.tree_burnin, params.tree_max_count, params.split_threshold,
109 params.split_weight_summary, params.split_weight_threshold, params.tree_weight_file);
110 }
111
112 }
113
114 if (verbose_mode >= VB_DEBUG)
115 taxa->Report(cout);
116 //splits->Report(cout);
117 //reportConflict(cout);
118 if (params.pdtaxa_file != NULL) {
119 if (sets->getNSets() > 0)
120 outError("Taxa sets were already specified in the input file");
121 cout << "Reading taxa sets in file " << params.pdtaxa_file << "..." << endl;
122
123 bool nexus_formated = (detectInputFile(params.pdtaxa_file) == IN_NEXUS);
124 if (nexus_formated) {
125 MyReader nexus(params.pdtaxa_file);
126 nexus.Add(sets);
127 MyToken token(nexus.inf);
128 nexus.Execute(token);
129 } else {
130 readTaxaSets(params.pdtaxa_file, sets);
131 }
132 if (sets->getNSets() == 0)
133 outError("No taxa sets found");
134 }
135
136 areas_boundary = NULL;
137 if (params.areas_boundary_file) {
138 if (sets->getNSets() == 0) outError("No taxon sets defined yet");
139 areas_boundary = new double [sets->getNSets() * sets->getNSets()];
140 cout << "Reading sets relation file " << params.areas_boundary_file << "..." << endl;
141 readAreasBoundary(params.areas_boundary_file, sets, areas_boundary);
142 }
143
144 if (verbose_mode >= VB_DEBUG && sets->getNSets() > 0)
145 sets->Report(cout);
146
147 if (sets->getNSets() > 0 && taxa->GetNumTaxonLabels() == 0) {
148 AddTaxaFromSets();
149 }
150 if (taxa->GetNumTaxonLabels() == 0)
151 outError("No taxa found");
152 if (getNSplits() == 0) {
153 //outError(ERR_NO_SPLITS);
154 createStarTree();
155 }
156 cout << getNTaxa()-params.is_rooted <<
157 " taxa and " << getNSplits()-params.is_rooted << " splits." << endl;
158
159 }
160
161
saveCheckpoint()162 void SplitGraph::saveCheckpoint() {
163 if (empty()) return;
164 int ntax = getNTaxa();
165 // checkpoint->startStruct("S");
166 CKP_SAVE(ntax);
167 int nsplits = size();
168 CKP_SAVE(nsplits);
169 checkpoint->startList(size());
170 for (iterator it = begin(); it != end(); it++) {
171 checkpoint->addListElement();
172 stringstream ss;
173 ss << (*it)->getWeight();
174 for (int i = 0; i < ntax; i++)
175 if ((*it)->containTaxon(i))
176 ss << " " << i;
177 checkpoint->put("", ss.str());
178 }
179 checkpoint->endList();
180 // checkpoint->endStruct();
181 CheckpointFactory::saveCheckpoint();
182 }
183
restoreCheckpoint()184 void SplitGraph::restoreCheckpoint() {
185 int ntax, nsplits;
186 CheckpointFactory::restoreCheckpoint();
187 // checkpoint->startStruct("S");
188
189 if (!CKP_RESTORE(ntax)) return;
190 CKP_RESTORE(nsplits);
191 checkpoint->startList(nsplits);
192 for (int split = 0; split < nsplits; split++) {
193 checkpoint->addListElement();
194 string str;
195 bool found = checkpoint->getString("", str);
196 ASSERT(found);
197 stringstream ss(str);
198 double weight;
199 ss >> weight;
200 Split *sp = new Split(ntax, weight);
201 for (int i = 0; i < ntax; i++) {
202 int tax;
203 if (ss >> tax) {
204 sp->addTaxon(tax);
205 } else
206 break;
207 }
208 push_back(sp);
209 }
210 checkpoint->endList();
211 // checkpoint->endStruct();
212 }
213
getNTrivialSplits()214 int SplitGraph::getNTrivialSplits() {
215 int count = 0;
216 for (iterator it = begin(); it != end(); it++)
217 if ((*it)->trivial() >= 0)
218 count++;
219 return count;
220 }
221
222
createStarTree()223 void SplitGraph::createStarTree() {
224 cout << "No splits found, creating a star tree with branch length of 1..." << endl;
225 int ntaxa = taxa->GetNumTaxonLabels();
226 for (int i = 0; i < ntaxa; i++) {
227 Split *sp = new Split(ntaxa, 1.0);
228 sp->addTaxon(i);
229 push_back(sp);
230 }
231 cout << "NOTE: subsequent PD will correspond to species richness." << endl;
232 }
233
234
AddTaxaFromSets()235 void SplitGraph::AddTaxaFromSets() {
236 cout << "Taking taxa from SETS block..." << endl;
237 for (int i = 0; i < sets->getNSets(); i++)
238 for(vector<string>::iterator it = sets->getSet(i)->taxlist.begin();
239 it != sets->getSet(i)->taxlist.end(); it++)
240 if (!taxa->IsAlreadyDefined(NxsString(it->c_str()))) {
241 taxa->AddTaxonLabel(NxsString(it->c_str()));
242 }
243 }
244
freeMem()245 void SplitGraph::freeMem() {
246 for (reverse_iterator it = rbegin(); it != rend(); it++) {
247 //(*it)->report(cout);
248 delete *it;
249 }
250 clear();
251 if (areas_boundary) delete areas_boundary;
252 if (trees) delete trees;
253 if (sets) delete sets;
254 if (pda) delete pda;
255 if (splits) delete splits;
256 if (taxa) delete taxa;
257 if (mtrees) delete mtrees;
258 }
259
~SplitGraph()260 SplitGraph::~SplitGraph()
261 {
262 freeMem();
263 }
264
265
convertFromTreesBlock(int burnin,int max_count,double split_threshold,int split_weight_summary,double weight_threshold,const char * tree_weight_file)266 void SplitGraph::convertFromTreesBlock(int burnin, int max_count, double split_threshold,
267 int split_weight_summary, double weight_threshold, const char *tree_weight_file) {
268 cout << trees->GetNumTrees() << " tree(s) loaded" << endl;
269 if (burnin >= trees->GetNumTrees())
270 outError("Burnin value is too large");
271 if (burnin > 0)
272 cout << burnin << " beginning tree(s) discarded" << endl;
273 mtrees = new MTreeSet();
274
275 for (int i = burnin; i < trees->GetNumTrees() && (i < burnin+max_count); i++) {
276 stringstream strs(trees->GetTranslatedTreeDescription(i), ios::in | ios::out | ios::app);
277 strs << ";";
278 MTree *tree = mtrees->newTree();
279 bool myrooted = trees->IsRootedTree(i);
280 tree->readTree(strs, myrooted);
281 mtrees->push_back(tree);
282 mtrees->tree_weights.push_back(1);
283 }
284 mtrees->checkConsistency();
285 //SplitIntMap hash_ss;
286
287 if (tree_weight_file)
288 readIntVector(tree_weight_file, burnin, max_count, mtrees->tree_weights);
289 /* else if (!weights)
290 tree_weights.resize(size(), 1);*/
291
292 if (mtrees->size() != mtrees->tree_weights.size())
293 outError("Tree file and tree weight file have different number of entries");
294 mtrees->convertSplits(*this, split_threshold, split_weight_summary, weight_threshold);
295 }
296
297
298
report(ostream & out)299 void SplitGraph::report(ostream &out)
300 {
301
302 out << endl;
303 out << "Split network contains ";
304
305 if (size() == 0)
306 {
307 out << "no split" << endl;
308 }
309 else if (size() == 1)
310 out << "one split" << endl;
311 else
312 out << size() << " splits" << endl;
313
314 if (size() == 0)
315 return;
316
317 sort(begin(), end(), compareSplit);
318 int k = 0;
319 for (iterator it = begin(); it != end(); it++,k++)
320 {
321 out << '\t' << (k+1) << '\t';
322 (*it)->report(out);
323 }
324 }
325
reportConflict(ostream & out)326 void SplitGraph::reportConflict(ostream &out)
327 {
328 int k = 0;
329 out << "Compatible splits: " << endl;
330 for (iterator i = begin(); i != end(); i++, k++)
331 {
332 out << (k+1) << '\t';
333 int k2 = 1;
334 for (iterator j = begin(); j != end(); j++, k2++)
335 if ( j != i && (*i)->compatible(*(*j)))
336 {
337 out << k2 << " ";
338 }
339 out << endl;
340 }
341 }
342
343 /**
344 calculate sum of weights of preserved splits in the taxa_set
345 @param taxa_set a set of taxa
346 */
calcWeight(Split & taxa_set)347 double SplitGraph::calcWeight(Split &taxa_set)
348 {
349 double sum = 0.0;
350 for (iterator it = begin(); it != end(); it++)
351 if ((*it)->preserved(taxa_set))
352 sum += (*it)->getWeight();
353 return sum;
354 }
355
countSplits(Split & taxa_set)356 int SplitGraph::countSplits(Split &taxa_set)
357 {
358 int cnt = 0;
359 for (iterator it = begin(); it != end(); it++)
360 if ((*it)->preserved(taxa_set))
361 cnt++;
362 return cnt;
363 }
364
countInternalSplits(Split & taxa_set)365 int SplitGraph::countInternalSplits(Split &taxa_set)
366 {
367 int cnt = 0;
368 for (iterator it = begin(); it != end(); it++)
369 if ((*it)->trivial() < 0 && (*it)->preserved(taxa_set))
370 cnt++;
371 return cnt;
372 }
373
374 /**
375 calculate sum of weights of all splits
376 */
calcWeight()377 double SplitGraph::calcWeight() {
378 double sum = 0.0;
379 for (iterator it = begin(); it != end(); it++)
380 sum += (*it)->weight;
381 return sum;
382 }
383
calcTrivialWeight()384 double SplitGraph::calcTrivialWeight() {
385 double sum = 0.0;
386 for (iterator it = begin(); it != end(); it++)
387 if ((*it)->trivial() >= 0)
388 sum += (*it)->weight;
389 return sum;
390 }
391
maxWeight()392 double SplitGraph::maxWeight() {
393 double m = -1e6;
394 for (iterator it = begin(); it != end(); it++)
395 if (m < (*it)->weight) m = (*it)->weight;
396 return m;
397 }
398
generateTaxaSet(char * filename,int size,int overlap,int times)399 void SplitGraph::generateTaxaSet(char *filename, int size, int overlap, int times) {
400 ofstream out(filename);
401 if (!out.is_open())
402 outError(ERR_WRITE_OUTPUT, filename);
403 ASSERT(overlap <= size);
404 int total = 2*size - overlap;
405 int ntaxa = getNTaxa();
406 for (int cnt = 0; cnt < times; cnt++) {
407 // generate random taxon index
408 IntVector ranvec;
409 BoolVector occur(ntaxa, false);
410 int i;
411 for (i = 0; i < total; i++) {
412 int rnum;
413 do { rnum = random_int(ntaxa); } while (occur[rnum]);
414 ranvec.push_back(rnum);
415 occur[rnum] = true;
416 }
417 // now write the first set
418 out << size << endl;
419 for (i = 0; i < size; i++)
420 out << taxa->GetTaxonLabel(ranvec[i]) << endl;
421 out << endl;
422 // now write the second set
423 out << size << endl;
424 for (i = size-overlap; i < total; i++)
425 out << taxa->GetTaxonLabel(ranvec[i]) << endl;
426 out << endl;
427 }
428 out.close();
429 }
430
calcDistance(char * filename)431 void SplitGraph::calcDistance(char *filename) {
432 ofstream out(filename);
433 if (!out.is_open())
434 outError(ERR_WRITE_OUTPUT, filename);
435 mmatrix(double) dist;
436 int i, j;
437 calcDistance(dist);
438
439 int ntaxa = getNTaxa();
440
441 // now write the distances in phylip .dist format
442 out << ntaxa << endl;
443
444 for (i = 0; i < ntaxa; i++) {
445 out << taxa->GetTaxonLabel(i) << " ";
446 for (j = 0; j < ntaxa; j++) {
447 out << dist[i][j] << " ";
448 }
449 out << endl;
450 }
451 out.close();
452 }
453
calcDistance(mmatrix (double)& dist)454 void SplitGraph::calcDistance(mmatrix(double) &dist) {
455 int ntaxa = getNTaxa();
456 iterator it;
457 vector<int> vi, vj;
458 vector<int>::iterator i, j;
459
460 dist.resize(ntaxa);
461 for (mmatrix(double)::iterator di = dist.begin(); di != dist.end(); di++)
462 (*di).resize(ntaxa, 0);
463
464 for (it = begin(); it != end(); it++) {
465 (*it)->getTaxaList(vi, vj);
466 for (i = vi.begin(); i != vi.end(); i++)
467 for (j = vj.begin(); j < vj.end(); j++) {
468 dist[*i][*j] += (*it)->weight;
469 dist[*j][*i] += (*it)->weight;
470 }
471 }
472
473 }
474
475
calcDistance(mmatrix (double)& dist,vector<int> & taxa_order)476 void SplitGraph::calcDistance(mmatrix(double) &dist, vector<int> &taxa_order) {
477 int ntaxa = getNTaxa();
478 int i, j;
479
480 mmatrix(double) my_dist;
481 calcDistance(my_dist);
482 dist.resize(ntaxa);
483 for (i = 0; i < ntaxa; i++) {
484 dist[i].resize(ntaxa);
485 for (j = 0; j < ntaxa; j++)
486 dist[i][j] = my_dist[taxa_order[i]][taxa_order[j]];
487 }
488 }
489
checkCircular(mmatrix (double)& dist)490 bool SplitGraph::checkCircular(mmatrix(double) &dist) {
491 return true;
492 int ntaxa = getNTaxa();
493 Split taxa_set(ntaxa, 0.0);
494 for (int i = 0; i < ntaxa-2; i++)
495 for (int j = i+1; j < ntaxa-1; j++)
496 for (int k = j+1; k < ntaxa; k++) {
497 taxa_set.addTaxon(i);
498 taxa_set.addTaxon(j);
499 taxa_set.addTaxon(k);
500 taxa_set.weight = calcWeight(taxa_set);
501 if (fabs(2 * taxa_set.weight - (dist[i][j] + dist[i][k] + dist[j][k])) > 0.0001) {
502 cout << "Taxa " << i << " " << j << " " << k;
503 cout << " do not satisfy circular equation!" << endl;
504 cout << "Weight = " << taxa_set.weight << endl;
505 cout << "Sum dist/2 = " << (dist[i][j] + dist[i][k] + dist[j][k]) / 2.0 << endl;
506 cout << "dist = " << dist[i][j] << " " << dist[i][k] << " "
507 << dist[j][k] << endl;
508 return false;
509 }
510 taxa_set.removeTaxon(i);
511 taxa_set.removeTaxon(j);
512 taxa_set.removeTaxon(k);
513 }
514 return true;
515 }
516
generateCircular(Params & params)517 void SplitGraph::generateCircular(Params ¶ms) {
518 int i, j;
519 int ntaxa = params.sub_size;
520 int num_splits = (params.num_splits > 0) ? params.num_splits : 3 * ntaxa;
521 if (num_splits < ntaxa)
522 outError(ERR_FEW_SPLITS);
523
524 taxa = new NxsTaxaBlock();
525 splits = new MSplitsBlock(this);
526
527 double threshold = (ntaxa > 3) ? (double)(num_splits - ntaxa) / (ntaxa*(ntaxa-3)/2) : 0.0;
528
529 // insert all trivial splits
530 for (i = 0; i < ntaxa; i++) {
531 double weight = randomLen(params);
532 Split *sp = new Split(ntaxa, weight);
533 sp->addTaxon(i);
534 push_back(sp);
535 ostringstream str;
536 str << "T" << (i+1);
537 taxa->AddTaxonLabel(NxsString(str.str().c_str()));
538 splits->cycle.push_back(i);
539 }
540
541 // randomly insert internal splits
542 for (i = 0; i < ntaxa-2 && getNSplits() < num_splits; i++)
543 for (j = i+1; j < ntaxa && j < ntaxa-3+i; j++) {
544 double choice = random_double();
545 if (choice > threshold) continue;
546 double weight = randomLen(params);
547 Split *sp = new Split(ntaxa, weight);
548 for (int k = i; k <= j; k++)
549 sp->addTaxon(k);
550 push_back(sp);
551 if (getNSplits() >= num_splits) break;
552 }
553
554 ofstream out(params.user_file);
555 if (!out.is_open()) {
556 outError(ERR_WRITE_OUTPUT, params.user_file);
557 }
558
559 saveFileNexus(out);
560 out.close();
561 }
562
saveFileNexus(ostream & out,bool omit_trivial)563 void SplitGraph::saveFileNexus(ostream &out, bool omit_trivial) {
564 int ntaxa = getNTaxa();
565 int i;
566 out << "#nexus" << endl << endl;
567 out << "BEGIN Taxa;" << endl;
568 out << "DIMENSIONS ntax=" << ntaxa << ";" << endl;
569 out << "TAXLABELS" << endl;
570 for (i = 0; i < ntaxa; i++)
571 out << "[" << i+1 << "] '" << taxa->GetTaxonLabel(i) << "'" << endl;
572 out << ";" << endl << "END; [Taxa]" << endl << endl;
573 out << "BEGIN Splits;" << endl;
574 out << "DIMENSIONS ntax=" << ntaxa << " nsplits=" << ((omit_trivial) ? getNSplits() - getNTrivialSplits() : getNSplits()) << ";" << endl;
575 out << "FORMAT labels=no weights=yes confidences=no intervals=no;" << endl;
576 if (isCircular()) {
577 out << "CYCLE";
578 for (i = 0; i < ntaxa; i++)
579 out << " " << splits->cycle[i] + 1;
580 out << ";" << endl;
581 }
582 out << "MATRIX" << endl;
583 int near_zeros = 0;
584 int zeros = 0;
585 for (iterator it = begin(); it != end(); it++) {
586 if (omit_trivial && (*it)->trivial() >= 0) continue;
587 if ((*it)->weight == 0.0) zeros ++;
588 if ((*it)->weight <= 1e-6) near_zeros ++;
589 out << "\t" << (*it)->weight << "\t";
590 for (i = 0; i < ntaxa; i++)
591 if ((*it)->containTaxon(i))
592 out << " " << i+1;
593 out << "," << endl;
594 }
595 out << ";" << endl << "END; [Splits]" << endl << endl;
596 if (near_zeros) {
597 //outWarning("Some nearly-zero split weights observed");
598 //cout << zeros << " zero-weights and " << near_zeros << " near zero weights!" << endl;
599 }
600 }
601
saveFileStarDot(ostream & out,bool omit_trivial)602 void SplitGraph::saveFileStarDot(ostream &out, bool omit_trivial) {
603 int ntaxa = getNTaxa();
604 int i;
605 for (iterator it = begin(); it != end(); it++) {
606 if (omit_trivial && (*it)->trivial() >= 0) continue;
607 bool swap_code = !(*it)->containTaxon(0);
608 if (swap_code) {
609 for (i = 0; i < ntaxa; i++)
610 out << (((*it)->containTaxon(i)) ? '.' : '*');
611 } else {
612 for (i = 0; i < ntaxa; i++)
613 out << (((*it)->containTaxon(i)) ? '*' : '.');
614 }
615 out << "\t" << (*it)->weight << endl;
616 }
617 }
618
saveFile(const char * out_file,InputType file_format,bool omit_trivial)619 void SplitGraph::saveFile(const char* out_file, InputType file_format, bool omit_trivial) {
620 try {
621 ofstream out;
622 out.exceptions(ios::failbit | ios::badbit);
623 out.open(out_file);
624 if (file_format == IN_NEXUS)
625 saveFileNexus(out, omit_trivial);
626 else
627 saveFileStarDot(out, omit_trivial);
628 out.close();
629 } catch (ios::failure) {
630 outError(ERR_WRITE_OUTPUT, out_file);
631 }
632 }
633
scaleWeight(double norm,bool make_int,int precision)634 void SplitGraph::scaleWeight(double norm, bool make_int, int precision) {
635 for (iterator itg = begin(); itg != end(); itg ++ )
636 if (make_int)
637 (*itg)->setWeight( round((*itg)->getWeight()*norm) );
638 else if (precision < 0)
639 (*itg)->setWeight( (*itg)->getWeight()*norm);
640 else
641 (*itg)->setWeight( round((*itg)->getWeight()*norm*pow((double)10.0,precision))/pow((double)10.0,precision));
642 }
643 // TODO Implement a more efficient function using Hash Table
containSplit(Split & sp)644 bool SplitGraph::containSplit(Split &sp) {
645 Split invert_sp(sp);
646 invert_sp.invert();
647 for (iterator it = begin(); it != end(); it++)
648 if ((*(*it)) == sp || (*(*it)) == invert_sp)
649 return true;
650 return false;
651 }
652
computeBoundary(Split & area)653 double SplitGraph::computeBoundary(Split &area) {
654 if (!areas_boundary) return 0.0;
655 int nareas = sets->getNSets();
656 double boundary = 0.0;
657 for (int i = 0; i < nareas; i++)
658 if (area.containTaxon(i)) {
659 boundary += areas_boundary[i*nareas+i];
660 for (int j = i+1; j < nareas; j++)
661 if (area.containTaxon(j))
662 boundary -= 2.0 * areas_boundary[i*nareas+j];
663 }
664 return boundary;
665 }
666
compatible(Split * sp)667 bool SplitGraph::compatible(Split *sp) {
668 for (iterator it = begin(); it != end(); it++)
669 if (!(*it)->compatible(*sp))
670 return false;
671 return true;
672 }
673
findMaxCompatibleSplits(SplitGraph & maxsg)674 void SplitGraph::findMaxCompatibleSplits(SplitGraph &maxsg) {
675
676 // maximum number of compatible splits = 2n-3!
677 int max_splits = getNTaxa() * 2 - 3;
678
679 // myset will be sorted by weight in descending order
680 SplitSet myset;
681 myset.insert(myset.end(), begin(), end());
682 sort(myset.begin(), myset.end(), splitweightcmp);
683
684 // now build the spset
685 if (!maxsg.taxa)
686 maxsg.taxa = new NxsTaxaBlock();
687 if (!maxsg.splits)
688 maxsg.splits = new MSplitsBlock(&maxsg);
689 if (!maxsg.pda)
690 maxsg.pda = new MPdaBlock(&maxsg);
691
692 for (int i = 0; i < getNTaxa(); i++)
693 maxsg.taxa->AddTaxonLabel(taxa->GetTaxonLabel(i));
694
695 // make the cycle
696 maxsg.splits->cycle = splits->cycle;
697 // make the splits
698
699 for (SplitSet::iterator it = myset.begin(); it != myset.end(); it++)
700 if (maxsg.compatible(*it)){
701 maxsg.push_back(new Split(*(*it)));
702 //(*it)->report(cout);
703 if (maxsg.size() >= max_splits)
704 break;
705 }
706 myset.clear();
707 }
708
isWeaklyCompatible()709 bool SplitGraph::isWeaklyCompatible() {
710 if (getNSplits() < 3) return true;
711 for (iterator it1 = begin(); it1+2 != end(); it1++)
712 for (iterator it2 = it1+1; it2+1 != end(); it2++)
713 for (iterator it3 = it2+1; it3 != end(); it3++) {
714 Split sp1(*(*it1));
715 Split sp2(*(*it2));
716 Split sp3(*(*it3));
717 Split sp(sp1);
718 sp *= sp2;
719 sp *= sp3;
720 if (sp.isEmpty()) continue;
721 sp1.invert();
722 sp2.invert();
723 sp = sp1;
724 sp *= sp2;
725 sp *= sp3;
726 if (sp.isEmpty()) continue;
727 sp2.invert();
728 sp3.invert();
729 sp = sp1;
730 sp *= sp2;
731 sp *= sp3;
732 if (sp.isEmpty()) continue;
733 sp1.invert();
734 sp2.invert();
735 sp = sp1;
736 sp *= sp2;
737 sp *= sp3;
738 if (sp.isEmpty()) continue;
739 return false;
740 }
741 return true;
742 }
743
744
getTaxaName(vector<string> & taxname)745 void SplitGraph::getTaxaName(vector<string> &taxname) {
746 taxname.clear();
747 for (int i = 0; i < getNTaxa(); i++)
748 taxname.push_back(taxa->GetTaxonLabel(i));
749 }
750
findLeafName(string & name)751 int SplitGraph::findLeafName(string &name) {
752 for (int i = 0; i < getNTaxa(); i++)
753 if (taxa->GetTaxonLabel(i) == name)
754 return i;
755 return -1;
756 }
757
removeTrivialSplits()758 int SplitGraph::removeTrivialSplits() {
759 int removed = 0;
760 for (iterator itg = begin(); itg != end(); ) {
761 if ((*itg)->trivial() >= 0) {
762 removed++;
763 delete (*itg);
764 (*itg) = back();
765 pop_back();
766 } else itg++;
767 }
768 return removed;
769 }
770