1 /***************************************************************************
2  *   Copyright (C) 2006 by BUI Quang Minh, Steffen Klaere, Arndt von Haeseler   *
3  *   minh.bui@univie.ac.at   *
4  *                                                                         *
5  *   This program is free software; you can redistribute it and/or modify  *
6  *   it under the terms of the GNU General Public License as published by  *
7  *   the Free Software Foundation; either version 2 of the License, or     *
8  *   (at your option) any later version.                                   *
9  *                                                                         *
10  *   This program is distributed in the hope that it will be useful,       *
11  *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
12  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
13  *   GNU General Public License for more details.                          *
14  *                                                                         *
15  *   You should have received a copy of the GNU General Public License     *
16  *   along with this program; if not, write to the                         *
17  *   Free Software Foundation, Inc.,                                       *
18  *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.             *
19  ***************************************************************************/
20 #include "splitgraph.h"
21 #include <iostream>
22 #include <fstream>
23 #include <cctype>
24 #include <algorithm>
25 #include "tree/node.h"
26 #include "ncl/ncl.h"
27 #include "nclextra/myreader.h"
28 #include "tree/mtree.h"
29 #include "tree/mtreeset.h"
30 
31 
compareSplit(Split * sp1,Split * sp2)32 bool compareSplit(Split* sp1, Split* sp2) {
33     if (sp1->countTaxa() != sp2->countTaxa())
34         return sp1->countTaxa() < sp2->countTaxa();
35     else
36         return sp1->firstTaxon() < sp2->firstTaxon();
37 }
38 
39 //#define MY_DEBUG
40 /********************************************************
41     Defining SplitGraph methods
42 ********************************************************/
SplitGraph()43 SplitGraph::SplitGraph()
44         : vector<Split*>()
45 {
46     pda = NULL;
47     taxa = NULL;
48     splits = NULL;
49     sets = NULL;
50     trees = NULL;
51     mtrees = NULL;
52     areas_boundary = NULL;
53 }
54 
SplitGraph(Params & params)55 SplitGraph::SplitGraph(Params &params) : vector<Split*>() {
56     init(params);
57 }
58 
createBlocks()59 void SplitGraph::createBlocks() {
60     taxa = new NxsTaxaBlock();
61     splits = new MSplitsBlock(this);
62     pda = new MPdaBlock(this);
63     sets = new MSetsBlock();
64     trees = new TreesBlock(taxa);
65     //mtrees = NULL;
66 }
67 
68 
init(Params & params)69 void SplitGraph::init(Params &params)
70 {
71     mtrees = NULL;
72     if (params.intype == IN_NEWICK) {
73         // read the input file, can contain more than 1 tree
74         mtrees = new MTreeSet(params.user_file, params.is_rooted, params.tree_burnin, params.tree_max_count);
75         //mtree = new MTree(params.user_file, params.is_rooted);
76 
77         if (params.is_rooted) {
78             params.sub_size++;
79             params.min_size++;
80         }
81         if (mtrees->isRooted() && params.root != NULL)
82             outError(ERR_CONFLICT_ROOT);
83         //SplitIntMap hash_ss;
84         mtrees->convertSplits(*this, params.split_threshold, params.split_weight_summary, params.split_weight_threshold);
85 
86         if (verbose_mode >= VB_DEBUG)
87             saveFileStarDot(cout);
88     } else {
89         createBlocks();
90 //        if (params.is_rooted)
91 //            outError(ERR_ROOT_NET);
92 
93          cout << "Reading input file " << params.user_file << "..." << endl;
94 
95         MyReader nexus(params.user_file);
96 
97         nexus.Add(taxa);
98         nexus.Add(splits);
99         nexus.Add(pda);
100         nexus.Add(sets);
101         nexus.Add(trees);
102 
103         MyToken token(nexus.inf);
104         nexus.Execute(token);
105         if (trees->GetNumTrees() > 0) {
106             if (getNSplits() > 0)
107                 outError("Ambiguous input file, pls only specify either SPLITS block or TREES block");
108             convertFromTreesBlock(params.tree_burnin, params.tree_max_count, params.split_threshold,
109                 params.split_weight_summary, params.split_weight_threshold, params.tree_weight_file);
110         }
111 
112     }
113 
114     if (verbose_mode >= VB_DEBUG)
115         taxa->Report(cout);
116     //splits->Report(cout);
117     //reportConflict(cout);
118     if (params.pdtaxa_file != NULL) {
119         if (sets->getNSets() > 0)
120             outError("Taxa sets were already specified in the input file");
121         cout << "Reading taxa sets in file " << params.pdtaxa_file << "..." << endl;
122 
123         bool nexus_formated = (detectInputFile(params.pdtaxa_file) == IN_NEXUS);
124         if (nexus_formated) {
125             MyReader nexus(params.pdtaxa_file);
126             nexus.Add(sets);
127             MyToken token(nexus.inf);
128             nexus.Execute(token);
129         } else {
130             readTaxaSets(params.pdtaxa_file, sets);
131         }
132         if (sets->getNSets() == 0)
133             outError("No taxa sets found");
134     }
135 
136     areas_boundary = NULL;
137     if (params.areas_boundary_file) {
138         if (sets->getNSets() == 0) outError("No taxon sets defined yet");
139         areas_boundary = new double [sets->getNSets() * sets->getNSets()];
140         cout << "Reading sets relation file " << params.areas_boundary_file << "..." << endl;
141         readAreasBoundary(params.areas_boundary_file, sets, areas_boundary);
142     }
143 
144     if (verbose_mode >= VB_DEBUG && sets->getNSets() > 0)
145         sets->Report(cout);
146 
147     if (sets->getNSets() > 0 && taxa->GetNumTaxonLabels() == 0) {
148         AddTaxaFromSets();
149     }
150     if (taxa->GetNumTaxonLabels() == 0)
151         outError("No taxa found");
152     if (getNSplits() == 0) {
153         //outError(ERR_NO_SPLITS);
154         createStarTree();
155     }
156     cout << getNTaxa()-params.is_rooted <<
157         " taxa and " << getNSplits()-params.is_rooted << " splits." << endl;
158 
159 }
160 
161 
saveCheckpoint()162 void SplitGraph::saveCheckpoint() {
163     if (empty()) return;
164     int ntax = getNTaxa();
165 //    checkpoint->startStruct("S");
166     CKP_SAVE(ntax);
167     int nsplits = size();
168     CKP_SAVE(nsplits);
169     checkpoint->startList(size());
170     for (iterator it = begin(); it != end(); it++) {
171         checkpoint->addListElement();
172         stringstream ss;
173         ss << (*it)->getWeight();
174         for (int i = 0; i < ntax; i++)
175             if ((*it)->containTaxon(i))
176                 ss << " " << i;
177         checkpoint->put("", ss.str());
178     }
179     checkpoint->endList();
180 //    checkpoint->endStruct();
181     CheckpointFactory::saveCheckpoint();
182 }
183 
restoreCheckpoint()184 void SplitGraph::restoreCheckpoint() {
185     int ntax, nsplits;
186     CheckpointFactory::restoreCheckpoint();
187 //    checkpoint->startStruct("S");
188 
189     if (!CKP_RESTORE(ntax)) return;
190     CKP_RESTORE(nsplits);
191     checkpoint->startList(nsplits);
192     for (int split = 0; split < nsplits; split++) {
193         checkpoint->addListElement();
194         string str;
195         bool found = checkpoint->getString("", str);
196         ASSERT(found);
197         stringstream ss(str);
198         double weight;
199         ss >> weight;
200         Split *sp = new Split(ntax, weight);
201         for (int i = 0; i < ntax; i++) {
202             int tax;
203             if (ss >> tax) {
204                 sp->addTaxon(tax);
205             } else
206                 break;
207         }
208         push_back(sp);
209     }
210     checkpoint->endList();
211 //    checkpoint->endStruct();
212 }
213 
getNTrivialSplits()214 int SplitGraph::getNTrivialSplits() {
215     int count = 0;
216     for (iterator it = begin(); it != end(); it++)
217         if ((*it)->trivial() >= 0)
218             count++;
219     return count;
220 }
221 
222 
createStarTree()223 void SplitGraph::createStarTree() {
224     cout << "No splits found, creating a star tree with branch length of 1..." << endl;
225     int ntaxa = taxa->GetNumTaxonLabels();
226     for (int i = 0; i < ntaxa; i++) {
227         Split *sp = new Split(ntaxa, 1.0);
228         sp->addTaxon(i);
229         push_back(sp);
230     }
231     cout << "NOTE: subsequent PD will correspond to species richness." << endl;
232 }
233 
234 
AddTaxaFromSets()235 void SplitGraph::AddTaxaFromSets() {
236     cout << "Taking taxa from SETS block..." << endl;
237     for (int i = 0; i < sets->getNSets(); i++)
238         for(vector<string>::iterator it = sets->getSet(i)->taxlist.begin();
239             it != sets->getSet(i)->taxlist.end(); it++)
240             if (!taxa->IsAlreadyDefined(NxsString(it->c_str()))) {
241                 taxa->AddTaxonLabel(NxsString(it->c_str()));
242             }
243 }
244 
freeMem()245 void SplitGraph::freeMem() {
246     for (reverse_iterator it = rbegin(); it != rend(); it++) {
247         //(*it)->report(cout);
248         delete *it;
249     }
250     clear();
251     if (areas_boundary) delete areas_boundary;
252     if (trees) delete trees;
253     if (sets) delete sets;
254     if (pda) delete pda;
255     if (splits) delete splits;
256     if (taxa) delete taxa;
257     if (mtrees) delete mtrees;
258 }
259 
~SplitGraph()260 SplitGraph::~SplitGraph()
261 {
262     freeMem();
263 }
264 
265 
convertFromTreesBlock(int burnin,int max_count,double split_threshold,int split_weight_summary,double weight_threshold,const char * tree_weight_file)266 void SplitGraph::convertFromTreesBlock(int burnin, int max_count, double split_threshold,
267     int split_weight_summary, double weight_threshold, const char *tree_weight_file) {
268     cout << trees->GetNumTrees() << " tree(s) loaded" << endl;
269     if (burnin >= trees->GetNumTrees())
270         outError("Burnin value is too large");
271     if (burnin > 0)
272     cout << burnin << " beginning tree(s) discarded" << endl;
273     mtrees = new MTreeSet();
274 
275     for (int i = burnin; i < trees->GetNumTrees() && (i < burnin+max_count); i++) {
276         stringstream strs(trees->GetTranslatedTreeDescription(i), ios::in | ios::out | ios::app);
277         strs << ";";
278         MTree *tree = mtrees->newTree();
279         bool myrooted = trees->IsRootedTree(i);
280         tree->readTree(strs, myrooted);
281         mtrees->push_back(tree);
282         mtrees->tree_weights.push_back(1);
283     }
284     mtrees->checkConsistency();
285     //SplitIntMap hash_ss;
286 
287     if (tree_weight_file)
288         readIntVector(tree_weight_file, burnin, max_count, mtrees->tree_weights);
289 /*    else if (!weights)
290         tree_weights.resize(size(), 1);*/
291 
292     if (mtrees->size() != mtrees->tree_weights.size())
293         outError("Tree file and tree weight file have different number of entries");
294     mtrees->convertSplits(*this, split_threshold, split_weight_summary, weight_threshold);
295 }
296 
297 
298 
report(ostream & out)299 void SplitGraph::report(ostream &out)
300 {
301 
302     out << endl;
303     out << "Split network contains ";
304 
305     if (size() == 0)
306     {
307         out << "no split" << endl;
308     }
309     else if (size() == 1)
310         out << "one split" << endl;
311     else
312         out << size() << " splits" << endl;
313 
314     if (size() == 0)
315         return;
316 
317     sort(begin(), end(), compareSplit);
318     int k = 0;
319     for (iterator it = begin(); it != end(); it++,k++)
320     {
321         out << '\t' << (k+1) << '\t';
322         (*it)->report(out);
323     }
324 }
325 
reportConflict(ostream & out)326 void SplitGraph::reportConflict(ostream &out)
327 {
328     int k = 0;
329     out << "Compatible splits: " << endl;
330     for (iterator i = begin(); i != end(); i++, k++)
331     {
332         out << (k+1) << '\t';
333         int k2 = 1;
334         for (iterator j = begin(); j != end(); j++, k2++)
335             if ( j != i && (*i)->compatible(*(*j)))
336             {
337                 out << k2 << " ";
338             }
339         out << endl;
340     }
341 }
342 
343 /**
344     calculate sum of weights of preserved splits in the taxa_set
345     @param taxa_set a set of taxa
346 */
calcWeight(Split & taxa_set)347 double SplitGraph::calcWeight(Split &taxa_set)
348 {
349     double sum = 0.0;
350     for (iterator it = begin(); it != end(); it++)
351         if ((*it)->preserved(taxa_set))
352             sum += (*it)->getWeight();
353     return sum;
354 }
355 
countSplits(Split & taxa_set)356 int SplitGraph::countSplits(Split &taxa_set)
357 {
358     int cnt = 0;
359     for (iterator it = begin(); it != end(); it++)
360         if ((*it)->preserved(taxa_set))
361             cnt++;
362     return cnt;
363 }
364 
countInternalSplits(Split & taxa_set)365 int SplitGraph::countInternalSplits(Split &taxa_set)
366 {
367     int cnt = 0;
368     for (iterator it = begin(); it != end(); it++)
369         if ((*it)->trivial() < 0 && (*it)->preserved(taxa_set))
370             cnt++;
371     return cnt;
372 }
373 
374 /**
375     calculate sum of weights of all splits
376 */
calcWeight()377 double SplitGraph::calcWeight() {
378     double sum = 0.0;
379     for (iterator it = begin(); it != end(); it++)
380         sum += (*it)->weight;
381     return sum;
382 }
383 
calcTrivialWeight()384 double SplitGraph::calcTrivialWeight() {
385     double sum = 0.0;
386     for (iterator it = begin(); it != end(); it++)
387         if ((*it)->trivial() >= 0)
388             sum += (*it)->weight;
389     return sum;
390 }
391 
maxWeight()392 double SplitGraph::maxWeight() {
393     double m = -1e6;
394     for (iterator it = begin(); it != end(); it++)
395         if (m < (*it)->weight) m = (*it)->weight;
396     return m;
397 }
398 
generateTaxaSet(char * filename,int size,int overlap,int times)399 void SplitGraph::generateTaxaSet(char *filename, int size, int overlap, int times) {
400     ofstream out(filename);
401     if (!out.is_open())
402         outError(ERR_WRITE_OUTPUT, filename);
403     ASSERT(overlap <= size);
404     int total = 2*size - overlap;
405     int ntaxa = getNTaxa();
406     for (int cnt = 0; cnt < times; cnt++) {
407         // generate random taxon index
408         IntVector ranvec;
409         BoolVector occur(ntaxa, false);
410         int i;
411         for (i = 0; i < total; i++) {
412             int rnum;
413             do { rnum = random_int(ntaxa); } while (occur[rnum]);
414             ranvec.push_back(rnum);
415             occur[rnum] = true;
416         }
417         // now write the first set
418         out << size << endl;
419         for (i = 0; i < size; i++)
420             out << taxa->GetTaxonLabel(ranvec[i]) << endl;
421         out << endl;
422         // now write the second set
423         out << size << endl;
424         for (i = size-overlap; i < total; i++)
425             out << taxa->GetTaxonLabel(ranvec[i]) << endl;
426         out << endl;
427     }
428     out.close();
429 }
430 
calcDistance(char * filename)431 void SplitGraph::calcDistance(char *filename) {
432     ofstream out(filename);
433     if (!out.is_open())
434         outError(ERR_WRITE_OUTPUT, filename);
435     mmatrix(double) dist;
436     int i, j;
437     calcDistance(dist);
438 
439     int ntaxa = getNTaxa();
440 
441     // now write the distances in phylip .dist format
442     out << ntaxa << endl;
443 
444     for (i = 0; i < ntaxa; i++) {
445         out << taxa->GetTaxonLabel(i) << "   ";
446         for (j = 0; j < ntaxa; j++) {
447             out << dist[i][j] << "  ";
448         }
449         out << endl;
450     }
451     out.close();
452 }
453 
calcDistance(mmatrix (double)& dist)454 void SplitGraph::calcDistance(mmatrix(double) &dist) {
455     int ntaxa = getNTaxa();
456     iterator it;
457     vector<int> vi, vj;
458     vector<int>::iterator i, j;
459 
460     dist.resize(ntaxa);
461     for (mmatrix(double)::iterator di = dist.begin(); di != dist.end(); di++)
462         (*di).resize(ntaxa, 0);
463 
464     for (it = begin(); it != end(); it++) {
465         (*it)->getTaxaList(vi, vj);
466         for (i = vi.begin(); i != vi.end(); i++)
467             for (j = vj.begin(); j < vj.end(); j++) {
468                 dist[*i][*j] += (*it)->weight;
469                 dist[*j][*i] += (*it)->weight;
470             }
471     }
472 
473 }
474 
475 
calcDistance(mmatrix (double)& dist,vector<int> & taxa_order)476 void SplitGraph::calcDistance(mmatrix(double) &dist, vector<int> &taxa_order) {
477     int ntaxa = getNTaxa();
478     int i, j;
479 
480     mmatrix(double) my_dist;
481     calcDistance(my_dist);
482     dist.resize(ntaxa);
483     for (i = 0; i < ntaxa; i++) {
484         dist[i].resize(ntaxa);
485         for (j = 0; j < ntaxa; j++)
486             dist[i][j] = my_dist[taxa_order[i]][taxa_order[j]];
487     }
488 }
489 
checkCircular(mmatrix (double)& dist)490 bool SplitGraph::checkCircular(mmatrix(double) &dist) {
491     return true;
492     int ntaxa = getNTaxa();
493     Split taxa_set(ntaxa, 0.0);
494     for (int i = 0; i < ntaxa-2; i++)
495         for (int j = i+1; j < ntaxa-1; j++)
496             for (int k = j+1; k < ntaxa; k++) {
497                 taxa_set.addTaxon(i);
498                 taxa_set.addTaxon(j);
499                 taxa_set.addTaxon(k);
500                 taxa_set.weight = calcWeight(taxa_set);
501                 if (fabs(2 * taxa_set.weight - (dist[i][j] + dist[i][k] + dist[j][k])) > 0.0001) {
502                     cout << "Taxa " << i << " " << j << " " << k;
503                     cout << " do not satisfy circular equation!" << endl;
504                     cout << "Weight = " << taxa_set.weight << endl;
505                     cout << "Sum dist/2 = " << (dist[i][j] + dist[i][k] + dist[j][k]) / 2.0 << endl;
506                     cout << "dist = " << dist[i][j] << " " << dist[i][k] << " "
507                          << dist[j][k] << endl;
508                     return false;
509                 }
510                 taxa_set.removeTaxon(i);
511                 taxa_set.removeTaxon(j);
512                 taxa_set.removeTaxon(k);
513             }
514     return true;
515 }
516 
generateCircular(Params & params)517 void SplitGraph::generateCircular(Params &params) {
518     int i, j;
519     int ntaxa = params.sub_size;
520     int num_splits = (params.num_splits > 0) ? params.num_splits : 3 * ntaxa;
521     if (num_splits < ntaxa)
522         outError(ERR_FEW_SPLITS);
523 
524     taxa = new NxsTaxaBlock();
525     splits = new MSplitsBlock(this);
526 
527     double threshold = (ntaxa > 3) ? (double)(num_splits - ntaxa) / (ntaxa*(ntaxa-3)/2) : 0.0;
528 
529     // insert all trivial splits
530     for (i = 0; i < ntaxa; i++) {
531         double weight = randomLen(params);
532         Split *sp = new Split(ntaxa, weight);
533         sp->addTaxon(i);
534         push_back(sp);
535         ostringstream str;
536         str << "T" << (i+1);
537         taxa->AddTaxonLabel(NxsString(str.str().c_str()));
538         splits->cycle.push_back(i);
539     }
540 
541     // randomly insert internal splits
542     for (i = 0; i < ntaxa-2 && getNSplits() < num_splits; i++)
543         for (j = i+1; j < ntaxa && j < ntaxa-3+i; j++) {
544             double choice = random_double();
545             if (choice > threshold) continue;
546             double weight = randomLen(params);
547             Split *sp = new Split(ntaxa, weight);
548             for (int k = i; k <= j; k++)
549                 sp->addTaxon(k);
550             push_back(sp);
551             if (getNSplits() >= num_splits) break;
552         }
553 
554     ofstream out(params.user_file);
555     if (!out.is_open()) {
556         outError(ERR_WRITE_OUTPUT, params.user_file);
557     }
558 
559     saveFileNexus(out);
560     out.close();
561 }
562 
saveFileNexus(ostream & out,bool omit_trivial)563 void SplitGraph::saveFileNexus(ostream &out, bool omit_trivial) {
564     int ntaxa = getNTaxa();
565     int i;
566     out << "#nexus" << endl << endl;
567     out << "BEGIN Taxa;" << endl;
568     out << "DIMENSIONS ntax=" << ntaxa << ";" << endl;
569     out << "TAXLABELS" << endl;
570     for (i = 0; i < ntaxa; i++)
571         out << "[" << i+1 << "] '" << taxa->GetTaxonLabel(i) << "'" << endl;
572     out << ";" << endl << "END; [Taxa]" << endl << endl;
573     out << "BEGIN Splits;" << endl;
574     out << "DIMENSIONS ntax=" << ntaxa << " nsplits=" << ((omit_trivial) ? getNSplits() - getNTrivialSplits() : getNSplits()) << ";" << endl;
575     out << "FORMAT labels=no weights=yes confidences=no intervals=no;" << endl;
576     if (isCircular()) {
577         out << "CYCLE";
578         for (i = 0; i < ntaxa; i++)
579             out << " " << splits->cycle[i] + 1;
580         out << ";" << endl;
581     }
582     out << "MATRIX" << endl;
583     int near_zeros = 0;
584     int zeros = 0;
585     for (iterator it = begin(); it != end(); it++) {
586         if (omit_trivial && (*it)->trivial() >= 0) continue;
587         if ((*it)->weight == 0.0) zeros ++;
588         if ((*it)->weight <= 1e-6) near_zeros ++;
589         out << "\t" << (*it)->weight << "\t";
590         for (i = 0; i < ntaxa; i++)
591             if ((*it)->containTaxon(i))
592                 out << " " << i+1;
593         out << "," << endl;
594     }
595     out << ";" << endl << "END; [Splits]" << endl << endl;
596     if (near_zeros) {
597         //outWarning("Some nearly-zero split weights observed");
598         //cout << zeros << " zero-weights and " << near_zeros << " near zero weights!" << endl;
599     }
600 }
601 
saveFileStarDot(ostream & out,bool omit_trivial)602 void SplitGraph::saveFileStarDot(ostream &out, bool omit_trivial) {
603     int ntaxa = getNTaxa();
604     int i;
605     for (iterator it = begin(); it != end(); it++) {
606         if (omit_trivial && (*it)->trivial() >= 0) continue;
607         bool swap_code = !(*it)->containTaxon(0);
608         if (swap_code) {
609             for (i = 0; i < ntaxa; i++)
610                 out << (((*it)->containTaxon(i)) ? '.' : '*');
611         } else {
612             for (i = 0; i < ntaxa; i++)
613                 out << (((*it)->containTaxon(i)) ? '*' : '.');
614         }
615         out << "\t" << (*it)->weight << endl;
616     }
617 }
618 
saveFile(const char * out_file,InputType file_format,bool omit_trivial)619 void SplitGraph::saveFile(const char* out_file, InputType file_format, bool omit_trivial) {
620     try {
621         ofstream out;
622         out.exceptions(ios::failbit | ios::badbit);
623         out.open(out_file);
624         if (file_format == IN_NEXUS)
625             saveFileNexus(out, omit_trivial);
626         else
627             saveFileStarDot(out, omit_trivial);
628         out.close();
629     } catch (ios::failure) {
630         outError(ERR_WRITE_OUTPUT, out_file);
631     }
632 }
633 
scaleWeight(double norm,bool make_int,int precision)634 void SplitGraph::scaleWeight(double norm, bool make_int, int precision) {
635     for (iterator itg = begin(); itg != end(); itg ++ )
636         if (make_int)
637             (*itg)->setWeight( round((*itg)->getWeight()*norm) );
638         else if (precision < 0)
639             (*itg)->setWeight( (*itg)->getWeight()*norm);
640         else
641             (*itg)->setWeight( round((*itg)->getWeight()*norm*pow((double)10.0,precision))/pow((double)10.0,precision));
642 }
643 // TODO Implement a more efficient function using Hash Table
containSplit(Split & sp)644 bool SplitGraph::containSplit(Split &sp) {
645     Split invert_sp(sp);
646     invert_sp.invert();
647     for (iterator it = begin(); it != end(); it++)
648         if ((*(*it)) == sp || (*(*it)) == invert_sp)
649             return true;
650     return false;
651 }
652 
computeBoundary(Split & area)653 double SplitGraph::computeBoundary(Split &area) {
654     if (!areas_boundary) return 0.0;
655     int nareas = sets->getNSets();
656     double boundary = 0.0;
657     for (int i = 0; i < nareas; i++)
658     if (area.containTaxon(i)) {
659         boundary += areas_boundary[i*nareas+i];
660         for (int j = i+1; j < nareas; j++)
661             if (area.containTaxon(j))
662                 boundary -= 2.0 * areas_boundary[i*nareas+j];
663     }
664     return boundary;
665 }
666 
compatible(Split * sp)667 bool SplitGraph::compatible(Split *sp) {
668     for (iterator it = begin(); it != end(); it++)
669         if (!(*it)->compatible(*sp))
670             return false;
671     return true;
672 }
673 
findMaxCompatibleSplits(SplitGraph & maxsg)674 void SplitGraph::findMaxCompatibleSplits(SplitGraph &maxsg) {
675 
676     // maximum number of compatible splits = 2n-3!
677     int max_splits = getNTaxa() * 2 - 3;
678 
679     // myset will be sorted by weight in descending order
680     SplitSet myset;
681     myset.insert(myset.end(), begin(), end());
682     sort(myset.begin(), myset.end(), splitweightcmp);
683 
684     // now build the spset
685     if (!maxsg.taxa)
686         maxsg.taxa = new NxsTaxaBlock();
687     if (!maxsg.splits)
688         maxsg.splits = new MSplitsBlock(&maxsg);
689     if (!maxsg.pda)
690         maxsg.pda = new MPdaBlock(&maxsg);
691 
692     for (int i = 0; i < getNTaxa(); i++)
693         maxsg.taxa->AddTaxonLabel(taxa->GetTaxonLabel(i));
694 
695     // make the cycle
696     maxsg.splits->cycle = splits->cycle;
697     // make the splits
698 
699     for (SplitSet::iterator it = myset.begin(); it != myset.end(); it++)
700         if (maxsg.compatible(*it)){
701             maxsg.push_back(new Split(*(*it)));
702             //(*it)->report(cout);
703             if (maxsg.size() >= max_splits)
704                 break;
705         }
706     myset.clear();
707 }
708 
isWeaklyCompatible()709 bool SplitGraph::isWeaklyCompatible() {
710     if (getNSplits() < 3) return true;
711     for (iterator it1 = begin(); it1+2 != end(); it1++)
712         for (iterator it2 = it1+1; it2+1 != end(); it2++)
713             for (iterator it3 = it2+1; it3 != end(); it3++) {
714                 Split sp1(*(*it1));
715                 Split sp2(*(*it2));
716                 Split sp3(*(*it3));
717                 Split sp(sp1);
718                 sp *= sp2;
719                 sp *= sp3;
720                 if (sp.isEmpty()) continue;
721                 sp1.invert();
722                 sp2.invert();
723                 sp = sp1;
724                 sp *= sp2;
725                 sp *= sp3;
726                 if (sp.isEmpty()) continue;
727                 sp2.invert();
728                 sp3.invert();
729                 sp = sp1;
730                 sp *= sp2;
731                 sp *= sp3;
732                 if (sp.isEmpty()) continue;
733                 sp1.invert();
734                 sp2.invert();
735                 sp = sp1;
736                 sp *= sp2;
737                 sp *= sp3;
738                 if (sp.isEmpty()) continue;
739                 return false;
740             }
741     return true;
742 }
743 
744 
getTaxaName(vector<string> & taxname)745 void SplitGraph::getTaxaName(vector<string> &taxname) {
746     taxname.clear();
747     for (int i = 0; i < getNTaxa(); i++)
748         taxname.push_back(taxa->GetTaxonLabel(i));
749 }
750 
findLeafName(string & name)751 int SplitGraph::findLeafName(string &name) {
752     for (int i = 0; i < getNTaxa(); i++)
753         if (taxa->GetTaxonLabel(i) == name)
754             return i;
755     return -1;
756 }
757 
removeTrivialSplits()758 int SplitGraph::removeTrivialSplits() {
759     int removed = 0;
760     for (iterator itg = begin(); itg != end(); )  {
761         if ((*itg)->trivial() >= 0) {
762             removed++;
763             delete (*itg);
764             (*itg) = back();
765             pop_back();
766         } else itg++;
767     }
768     return removed;
769 }
770