1 /***************************************************************************
2 * Copyright (C) 2006 by BUI Quang Minh, Steffen Klaere, Arndt von Haeseler *
3 * minh.bui@univie.ac.at *
4 * *
5 * This program is free software; you can redistribute it and/or modify *
6 * it under the terms of the GNU General Public License as published by *
7 * the Free Software Foundation; either version 2 of the License, or *
8 * (at your option) any later version. *
9 * *
10 * This program is distributed in the hope that it will be useful, *
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13 * GNU General Public License for more details. *
14 * *
15 * You should have received a copy of the GNU General Public License *
16 * along with this program; if not, write to the *
17 * Free Software Foundation, Inc., *
18 * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *
19 ***************************************************************************/
20 #include "tree/mtree.h"
21 #include "pdnetwork.h"
22 #include "ncl/ncl.h"
23 #include "nclextra/msetsblock.h"
24 #include "nclextra/myreader.h"
25 #include "lpwrapper.h"
26 #include "gurobiwrapper.h"
27
28 extern void summarizeSplit(Params ¶ms, PDNetwork &sg, vector<SplitSet> &pd_set, PDRelatedMeasures &pd_more, bool full_report);
29
30
PDNetwork()31 PDNetwork::PDNetwork()
32 : SplitGraph()
33 {
34 extra_pd = 0;
35 min_pd = false;
36 }
37
PDNetwork(Params & params)38 PDNetwork::PDNetwork(Params ¶ms) : SplitGraph(params) {
39 extra_pd = 0;
40 min_pd = false;
41
42 if (params.is_rooted)
43 readRootNode(ROOT_NAME);
44
45 // read the parameter file
46 if (params.param_file != NULL)
47 readParams(params);
48
49 if (params.budget_file != NULL) {
50 if (isPDArea())
51 pda->readBudgetAreaFile(params);
52 else
53 pda->readBudgetFile(params);
54 }
55 // identify the root
56 if (params.root != NULL)
57 readRootNode(params.root);
58
59 // initial PD min
60 if (params.find_pd_min)
61 initPDMin();
62
63 // read the initial set of taxa, incoporate info into the split system
64 if (params.initial_file != NULL && params.eco_dag_file == NULL)
65 readInitialSet(params);
66
67 if (!initialset.empty() && !isPDArea())
68 proceedInitialSet();
69
70 if (params.initial_area_file != NULL)
71 readInitialAreas(params);
72
73
74
75 }
76
77
78 /**
79 Identify the root node if specified, include it into the initial set
80 @param root_name name of the root node
81 */
readRootNode(const char * root_name)82 void PDNetwork::readRootNode(const char *root_name) {
83 int id = -1;
84 try {
85 id = taxa->FindTaxon(root_name);
86 } catch (NxsTaxaBlock::NxsX_NoSuchTaxon) {
87 outError(ERR_NO_TAXON, root_name);
88 }
89 initialset.clear();
90 initialset.push_back(id);
91 //if (sets->getNSets() == 0)
92 }
93
94
95
readParams(Params & params)96 void PDNetwork::readParams(Params ¶ms) {
97 int ntaxa = getNTaxa() - params.is_rooted;
98
99 // read parameters from file
100 double scale;
101 StrVector tax_name;
102 DoubleVector ori_weight, tax_weight;
103 readWeightFile(params, ntaxa, scale, tax_name, ori_weight);
104
105 // now convert the weights
106 tax_weight.resize(ntaxa, 0);
107 for (int i = 0; i < tax_name.size(); i++) {
108 int id = -1;
109 try {
110 string name = "";
111 name.append(tax_name[i]);
112 id = taxa->FindTaxon(NxsString(name.c_str()));
113 } catch (NxsTaxaBlock::NxsX_NoSuchTaxon) {
114 outError(ERR_NO_TAXON, tax_name[i]);
115 }
116 tax_weight[id] = ori_weight[i];
117 }
118
119 if (params.scaling_factor >= 0) {
120 if (params.scaling_factor > 1) outError("Scaling factor must be between 0 and 1");
121 cout << "Rescaling split weights with " << params.scaling_factor <<
122 " and taxa weights with " << 1 - params.scaling_factor << endl;
123 scale = params.scaling_factor;
124 for (DoubleVector::iterator it = tax_weight.begin(); it != tax_weight.end(); it++)
125 (*it) *= (1 - scale);
126 }
127
128 // incoporate into the split system
129 for (iterator it = begin(); it != end(); it++) {
130 int id = (*it)->trivial();
131 // first, multiply split weight with the coefficient
132 (*it)->weight *= scale;
133
134 // if a trivial split, add the important parameter f
135 if (id >= 0)
136 (*it)->weight += tax_weight[id];
137 }
138 }
139
140
141 /**
142 read the initial set of taxa to be included into PD-tree
143 @param params program parameters
144 */
readInitialSet(Params & params)145 void PDNetwork::readInitialSet(Params ¶ms) {
146 extra_pd = 0.0;
147 int ntaxa = getNTaxa() - params.is_rooted;
148 StrVector tax_name;
149 readInitTaxaFile(params, ntaxa, tax_name);
150 if (tax_name.empty())
151 outError("No taxa found");
152 for (StrVector::iterator it = tax_name.begin(); it != tax_name.end(); it++) {
153 int id = -1;
154 try {
155 string name = "";
156 name.append(*it);
157 id = taxa->FindTaxon(NxsString(name.c_str()));
158 } catch (NxsTaxaBlock::NxsX_NoSuchTaxon) {
159 outError(ERR_NO_TAXON, *it);
160 }
161 initialset.push_back(id);
162 }
163
164 if (isPDArea()) return;
165
166 if (isBudgetConstraint()) {
167 int budget = (params.budget >= 0) ? params.budget : pda->budget;
168 if (calcCost(initialset) > budget)
169 outError(ERR_TOO_SMALL_BUDGET);
170 } else {
171 int sub_size = (params.sub_size > 1) ? params.sub_size : pda->sub_size;
172 if (initialset.size() > sub_size)
173 outError(ERR_TOO_SMALL_K);
174 }
175 }
176
proceedInitialSet()177 void PDNetwork::proceedInitialSet() {
178 double total_w = trunc(abs(calcWeight())+1);
179 // get the set of initial taxa
180 set<int> iset;
181 for (IntVector::iterator it2 = initialset.begin(); it2 != initialset.end(); it2++)
182 iset.insert(*it2);
183 // now modifying the split weights
184 for (iterator it = begin(); it != end(); it++) {
185
186 // get the taxa id of trivial split
187 int id = (*it)->trivial();
188 // if not trivial split, continue
189 if (id < 0) continue;
190
191 if (iset.find(id) != iset.end()) {
192 // increase the trivial split weight
193 (*it)->weight += total_w;
194 extra_pd += total_w;
195 }
196 }
197 }
198
readInitialAreas(Params & params)199 void PDNetwork::readInitialAreas(Params ¶ms) {
200 if (!isPDArea())
201 outError("Invalid -ia option: no areas specified");
202 int nareas = sets->getNSets();
203 StrVector area_name;
204 readInitAreaFile(params, nareas, area_name);
205 if (area_name.empty())
206 outError("No area found");
207 for (StrVector::iterator it = area_name.begin(); it != area_name.end(); it++) {
208 int id = -1;
209 id = sets->findArea(*it);
210 if (id < 0)
211 outError(ERR_NO_AREA, *it);
212 initialareas.push_back(id);
213 }
214
215 if (isBudgetConstraint()) {
216 int budget = (params.budget >= 0) ? params.budget : pda->budget;
217 if (calcCost(initialareas) > budget)
218 outError(ERR_TOO_SMALL_BUDGET);
219 } else {
220 int sub_size = (params.sub_size >= 1) ? params.sub_size : pda->sub_size;
221 if (initialareas.size() > sub_size)
222 outError(ERR_TOO_SMALL_K);
223 }
224 }
225
226
initPDMin()227 void PDNetwork::initPDMin() {
228 min_pd = true;
229 for (iterator it = begin(); it != end(); it++) {
230 (*it)->weight = -(*it)->weight;
231 }
232 }
233
234
235 /**
236 compute the required costs to conserve a taxa set
237 @param taxset set of taxa
238 @return minimum budget required
239 */
calcCost(IntVector & taxset)240 int PDNetwork::calcCost(IntVector &taxset) {
241 int sum = 0;
242 for (IntVector::iterator it = taxset.begin(); it != taxset.end(); it++)
243 sum += pda->costs[*it];
244 return sum;
245 }
246
247 /**
248 compute the required costs to conserve a taxa set
249 @param taxset set of taxa
250 @return minimum budget required
251 */
calcCost(Split & taxset)252 int PDNetwork::calcCost(Split &taxset) {
253 IntVector invec;
254 taxset.getTaxaList(invec);
255 return calcCost(invec);
256 }
257
258
259
260 /********************************************************
261 Now comes PD related stuff
262 ********************************************************/
263
264
265
calcPD(Split & id_set)266 void PDNetwork::calcPD(Split &id_set) {
267 if (initialset.empty()) {
268 id_set.weight = calcWeight(id_set);
269 return;
270 }
271 Split id(id_set);
272 for (IntVector::iterator it = initialset.begin(); it != initialset.end(); it++)
273 id.addTaxon(*it);
274 id_set.weight = calcWeight(id);
275 }
276
calcExclusivePD(Split & id_set)277 void PDNetwork::calcExclusivePD(Split &id_set) {
278 id_set.invert();
279 calcPD(id_set);
280 id_set.invert();
281 id_set.weight = calcWeight() - id_set.weight;
282 }
283
284
285
computePD(Params & params,SplitSet & pd_set,PDRelatedMeasures & pd_more)286 void PDNetwork::computePD(Params ¶ms, SplitSet &pd_set, PDRelatedMeasures &pd_more) {
287 //MSetsBlock *sets;
288 //sets = new MSetsBlock();
289
290 //sets->Report(cout);
291 TaxaSetNameVector *allsets = sets->getSets();
292 TaxaSetNameVector::iterator i;
293 for (i = allsets->begin(); i != allsets->end(); i++) {
294 Split *id_set = new Split(getNTaxa());
295 /*
296 for (IntVector::iterator it = initialset.begin(); it != initialset.end(); it++)
297 id_set->addTaxon(*it);
298 */
299 for (vector<string>::iterator it2 = (*i)->taxlist.begin(); it2 != (*i)->taxlist.end(); it2++) {
300 int id = -1;
301 try {
302 id = taxa->FindTaxon(NxsString(it2->c_str()));
303 } catch (NxsTaxaBlock::NxsX_NoSuchTaxon) {
304 outError(ERR_NO_TAXON, *it2);
305 }
306 if (id >= 0)
307 id_set->addTaxon(id);
308 }
309 pd_more.setName.push_back((*i)->name);
310 if (params.exclusive_pd) {
311 calcExclusivePD(*id_set);
312 pd_more.exclusivePD.push_back(id_set->getWeight());
313 }
314 calcPD(*id_set);
315 pd_more.PDScore.push_back(id_set->weight);
316 pd_set.push_back(id_set);
317 }
318 //delete sets;
319 }
320
321
322
323 /********************************************************
324 EXHAUSTIVE FUNCTION
325 ********************************************************/
326
updateSplitVector(Split & curset,SplitSet & best_set)327 void PDNetwork::updateSplitVector(Split &curset, SplitSet &best_set)
328 {
329 if (curset.weight > best_set[0]->weight) {
330 for (int it = best_set.size()-1; it >= 0; it--)
331 delete best_set[it];
332 best_set.clear();
333 }
334 best_set.push_back(new Split(curset));
335 }
336
337 /**
338 calculate sum of weights of preserved splits in the taxa_set
339 @param taxa_set a set of taxa
340 */
calcRaisedWeight(Split & taxa_set,IntList & rem_splits,IntList::iterator & rem_it)341 double PDNetwork::calcRaisedWeight(Split &taxa_set,
342 IntList &rem_splits, IntList::iterator &rem_it)
343 {
344 double sum = 0.0;
345 for (IntList::iterator it = rem_splits.begin(); it != rem_it;)
346 if ((*this)[*it]->preserved(taxa_set)) {
347 sum += (*this)[*it]->weight;
348 IntList::iterator prev_it = rem_it;
349 prev_it--;
350 int temp = *it;
351 *it = *prev_it;
352 *prev_it = temp;
353 rem_it = prev_it;
354 } else it++;
355 return sum;
356 }
357
calcMaxBudget()358 int PDNetwork::calcMaxBudget() {
359 int sum = 0;
360 for (DoubleVector::iterator it = pda->costs.begin(); it != pda->costs.end(); it++)
361 sum += (*it);
362 return sum;
363 }
364
365
enterFindPD(Params & params)366 void PDNetwork::enterFindPD(Params ¶ms) {
367 // check parameters
368 if (params.pd_proportion == 0.0) {
369 if (isBudgetConstraint()) {
370 int budget = (params.budget >= 0) ? params.budget : pda->getBudget();
371 if (budget < 0) {
372 outError(ERR_NO_BUDGET);
373 }
374 } else {
375 int min_accepted = !isPDArea() + 1;
376 int sub_size = (params.sub_size >= min_accepted) ? params.sub_size : pda->getSubSize();
377 if (sub_size < min_accepted && params.pdtaxa_file == NULL) {
378 outError(ERR_NO_K);
379 }
380
381 }
382 }
383 if (initialset.size() > 0) {
384 cout << "Consider split network as ROOTED." << endl;
385 } else {
386 cout << "Consider split network as UNROOTED." << endl;
387 }
388
389 cout << "Total split weights: " << calcWeight() << endl;
390 cout << " Internal split weights: " << calcWeight() - calcTrivialWeight() << endl;
391 cout << " Trivial split weights : " << calcTrivialWeight() << endl;
392
393 if (params.pd_proportion == 0.0) {
394
395 if (isBudgetConstraint()) {
396 // fix the budget and min_budget first
397 if (params.budget < 0) params.budget = pda->budget;
398 if (verbose_mode >= VB_DEBUG) {
399 pda->Report(cout);
400 }
401 cout << "Budget constraint with budget = " << params.budget << " ..." << endl;
402 if (params.min_budget < 0)
403 params.min_budget = pda->min_budget;
404 if (params.min_budget < 0) params.min_budget = params.budget;
405
406 // resize the taxa_set
407 int max_budget = calcMaxBudget();
408 if (params.budget > max_budget) {
409 cout << "Only maximum budget of " << max_budget << " required, truncating to that value..." << endl;
410 params.budget = max_budget;
411 if (params.min_budget > params.budget)
412 params.min_budget = params.budget;
413 }
414
415 } else {
416 int min_accepted = !isPDArea() + 1;
417 if (params.sub_size <= 0) params.sub_size = pda->sub_size;
418 if (!isPDArea()) {
419 if (params.sub_size < 2 || params.sub_size > getNTaxa()) {
420 ostringstream str;
421 str <<"k must be between 2 and " << getNTaxa()-params.is_rooted;
422 outError(str.str());
423 }
424 } else if (params.sub_size < 1 || params.sub_size > sets->getNSets()) {
425 ostringstream str;
426 str << "k must be between 1 and " << sets->getNSets();
427 outError(str.str());
428 }
429 if (params.min_size < min_accepted) params.min_size = params.sub_size;
430 }
431 }
432 }
433
printLPVersion(bool gurobi_format)434 void printLPVersion(bool gurobi_format) {
435 if (gurobi_format)
436 cout << "Using GUROBI" << endl;
437 else {
438 //int lp_majorversion, lp_minorversion, lp_release, lp_build;
439 //lp_solve_version_info(&lp_majorversion, &lp_minorversion, &lp_release, &lp_build);
440 //cout << "Using LP_SOLVE " << lp_majorversion << "." << lp_minorversion << "." << lp_release << "." << lp_build << endl;
441 }
442 }
443
findPD(Params & params,vector<SplitSet> & taxa_set,vector<int> & taxa_order)444 void PDNetwork::findPD(Params ¶ms, vector<SplitSet> &taxa_set, vector<int> &taxa_order) {
445
446 // call the entering function
447 enterFindPD(params);
448
449 int ntaxa = getNTaxa();
450 int nsplits = getNSplits();
451 Split curset(ntaxa, 0.0);
452 IntList rem_splits;
453
454 for (int i = 0; i < nsplits; i++)
455 rem_splits.push_back(i);
456 IntList::iterator rem_it = rem_splits.end();
457
458 params.detected_mode = EXHAUSTIVE;
459
460
461 if (isPDArea()) {
462 params.detected_mode = LINEAR_PROGRAMMING;
463 printLPVersion(params.gurobi_format);
464 cout << "Optimizing PD over " << sets->getNSets() << " areas..." << endl;
465 cout << "Linear programming on general split network..." << endl;
466 findPDArea_LP(params, taxa_set);
467 } else if (params.run_mode == GREEDY) {
468 // greedy search, not ensure to give the optimal sets!
469 cout << "Start greedy search..." << endl;
470 greedyPD(params.sub_size, curset, taxa_order);
471 localSearchPD(params.sub_size, curset, taxa_order);
472 taxa_set.resize(1);
473 taxa_set[0].push_back(new Split(curset));
474 } else if (params.run_mode != EXHAUSTIVE) {
475 params.detected_mode = LINEAR_PROGRAMMING;
476 printLPVersion(params.gurobi_format);
477 cout << "Linear programming on general split network..." << endl;
478 findPD_LP(params, taxa_set);
479 }
480 else if (isBudgetConstraint()) {
481 // exhaustive search by the order
482 cout << endl << "Start exhaustive search..." << endl;
483 taxa_set.resize(1);
484 taxa_set[0].push_back(new Split(ntaxa, 0.0));
485 exhaustPDBudget(params.budget, -1, curset, params.find_all, taxa_set[0], taxa_order, rem_splits, rem_it);
486 } else {
487 // exhaustive search by the order
488 cout << endl << "Start exhaustive search..." << endl;
489 taxa_set.resize(1);
490 taxa_set[0].push_back(new Split(ntaxa, 0.0));
491 exhaustPD2(params.sub_size, -1, curset, params.find_all, taxa_set[0], taxa_order, rem_splits, rem_it);
492 }
493
494 // call the leaving function
495 leaveFindPD(taxa_set);
496 }
497
leaveFindPD(vector<SplitSet> & taxa_set)498 void PDNetwork::leaveFindPD(vector<SplitSet> &taxa_set) {
499 // subtract the weights from the extra_pd
500 if (extra_pd > 0)
501 for (vector<SplitSet>::iterator it = taxa_set.begin(); it != taxa_set.end(); it++)
502 for (SplitSet::iterator it2 = (*it).begin(); it2 != (*it).end(); it2++)
503 (*it2)->weight -= extra_pd;
504 if (min_pd)
505 for (vector<SplitSet>::iterator it = taxa_set.begin(); it != taxa_set.end(); it++)
506 for (SplitSet::iterator it2 = (*it).begin(); it2 != (*it).end(); it2++)
507 (*it2)->weight = -(*it2)->weight;
508 }
509
510
511 /**
512 exhaustive search VERSION 2 for maximal phylogenetic diversity of a given size
513 @param subsize the subset size
514 @param best_set (OUT) the set of taxa in the maximal PD set
515 @param cur_tax current taxon
516 @param curset current set
517 @param taxa_order (OUT) order of inserted taxa
518 @param rem_splits (IN) remaining splits
519 @return the PD score of the maximal set
520 */
exhaustPD2(int subsize,int cur_tax,Split & curset,bool find_all,SplitSet & best_set,vector<int> & taxa_order,IntList & rem_splits,IntList::iterator & rem_it)521 double PDNetwork::exhaustPD2(int subsize, int cur_tax, Split &curset,
522 bool find_all,SplitSet &best_set, vector<int> &taxa_order,
523 IntList &rem_splits, IntList::iterator &rem_it )
524 {
525 int ntaxa = getNTaxa();
526 double saved_score = curset.weight;
527 for (int tax = cur_tax+1; tax <= ntaxa - subsize; tax ++) {
528 curset.addTaxon(taxa_order[tax]);
529 IntList::iterator saved_it = rem_it;
530 curset.weight += calcRaisedWeight(curset, rem_splits, rem_it);
531 if (subsize > 1)
532 exhaustPD2(subsize-1, tax, curset, find_all, best_set, taxa_order, rem_splits, rem_it);
533 else {
534 if (curset.weight >= best_set[0]->weight) {
535 updateSplitVector(curset, best_set);
536 //curset.report(cout);
537 }
538 //curset.report(cout);
539 }
540 curset.removeTaxon(taxa_order[tax]);
541 curset.weight = saved_score;
542 rem_it = saved_it;
543 //restoreSplit(subsize, rem_splits, out_splits);
544 }
545 return best_set[0]->weight;
546 }
547
548
549
exhaustPDBudget(int cur_budget,int cur_tax,Split & curset,bool find_all,SplitSet & best_set,vector<int> & taxa_order,IntList & rem_splits,IntList::iterator & rem_it)550 double PDNetwork::exhaustPDBudget(int cur_budget, int cur_tax, Split &curset,
551 bool find_all,SplitSet &best_set, vector<int> &taxa_order,
552 IntList &rem_splits, IntList::iterator &rem_it )
553 {
554 int ntaxa = getNTaxa();
555 double saved_score = curset.weight;
556 for (int tax = cur_tax+1; tax < ntaxa; tax ++)
557 if (pda->costs[taxa_order[tax]] <= cur_budget)
558 {
559 curset.addTaxon(taxa_order[tax]);
560 IntList::iterator saved_it = rem_it;
561
562 curset.weight += calcRaisedWeight(curset, rem_splits, rem_it);
563 if (curset.weight >= best_set[0]->weight) {
564 updateSplitVector(curset, best_set);
565 //curset.report(cout);
566 }
567
568 if (tax < ntaxa-1)
569 exhaustPDBudget(cur_budget - pda->costs[taxa_order[tax]], tax,
570 curset, find_all, best_set, taxa_order, rem_splits, rem_it);
571
572 //curset.report(cout);
573 curset.removeTaxon(taxa_order[tax]);
574 curset.weight = saved_score;
575 rem_it = saved_it;
576 //restoreSplit(subsize, rem_splits, out_splits);
577 }
578 return best_set[0]->weight;
579 }
580
581
582 /********************************************************
583 GREEDY SEARCH!
584 ********************************************************/
585
586 /**
587 greedy algorithm for phylogenetic diversity of a given size
588 @param subsize the subset size
589 @param taxa_set (OUT) the set of taxa in the PD-set
590 @return the PD score of the maximal set, also returned in taxa_set.weight
591 */
greedyPD(int subsize,Split & taxa_set,vector<int> & taxa_order)592 double PDNetwork::greedyPD(int subsize, Split &taxa_set, vector<int> &taxa_order) {
593 int ntaxa = getNTaxa();
594 taxa_set.setNTaxa(ntaxa);
595 taxa_set.weight = 0;
596 taxa_order.clear();
597 taxa_order.reserve(ntaxa);
598
599 int besti, bestj, i, j;
600
601 // start from the PD-2 set
602 for (i = 0; i < ntaxa - 1; i++)
603 for (j = 0; j < ntaxa; j++) {
604 Split curset;
605 curset.setNTaxa(ntaxa);
606 curset.addTaxon(i);
607 curset.addTaxon(j);
608 curset.weight = calcWeight(curset);
609 if (curset.weight > taxa_set.weight) {
610 taxa_set = curset;
611 besti = i;
612 bestj = j;
613 }
614 }
615
616 //taxa_set.report(cout);
617 taxa_order.push_back(besti);
618 taxa_order.push_back(bestj);
619
620 for (int step = 2; step < subsize; step++) {
621 Split pdk_set = taxa_set;
622 besti = -1;
623 for (i = 0; i < ntaxa; i++)
624 if (!pdk_set.containTaxon(i)) {
625 Split curset;
626 curset.setNTaxa(ntaxa);
627 curset = pdk_set;
628 curset.addTaxon(i);
629 curset.weight = calcWeight(curset);
630 if (curset.weight > taxa_set.weight || besti == -1) {
631 taxa_set = curset;
632 besti = i;
633 }
634 }
635 //taxa_set.report(cout);
636 taxa_order.push_back(besti);
637 }
638 return taxa_set.getWeight();
639 }
640
641
642 /**
643 testing algorithm for phylogenetic diversity of a given size
644 @param subsize the subset size
645 @param taxa_set (OUT) the set of taxa in the PD-set
646 @return the PD score of the maximal set, also returned in taxa_set.weight
647 */
localSearchPD(int subsize,Split & taxa_set,vector<int> & taxa_order)648 double PDNetwork::localSearchPD(int subsize, Split &taxa_set, vector<int> &taxa_order) {
649 int ntaxa = getNTaxa();
650 //int nsplits = getNSplits();
651 int i;
652 taxa_set.setNTaxa(ntaxa);
653 for (i = 0; i < subsize; i++)
654 taxa_set.addTaxon(taxa_order[i]);
655 taxa_set.weight = calcWeight(taxa_set);
656 taxa_set.report(cout);
657 bool stop;
658 do {
659 stop = true;
660 for (i = 0; i < ntaxa; i++) if (taxa_set.containTaxon(i)) {
661 for (int j = 0; j < ntaxa; j++) if (!taxa_set.containTaxon(j))
662 {
663 taxa_set.addTaxon(j);
664 taxa_set.removeTaxon(i);
665 double new_w = calcWeight(taxa_set);
666 if (new_w > taxa_set.weight) {
667 taxa_set.weight = new_w;
668 stop = false;
669 taxa_set.report(cout);
670 break;
671 }
672 taxa_set.removeTaxon(j);
673 taxa_set.addTaxon(i);
674 }
675 if (!stop) break;
676 }
677 } while (!stop);
678 return taxa_set.getWeight();
679 }
680
681
calcPDGain(vector<SplitSet> & pd_set,mmatrix (double)& delta)682 void PDNetwork::calcPDGain(vector<SplitSet> &pd_set, mmatrix(double) &delta) {
683 vector<SplitSet>::iterator it;
684 int ntaxa = pd_set.front().front()->getNTaxa();
685 delta.resize(pd_set.size());
686 int cnt = 0;
687 for (cnt = 0; cnt < delta.size(); cnt++)
688 delta[cnt].resize(ntaxa, 0);
689
690
691
692 for (it = pd_set.begin(), cnt = 0; it != pd_set.end(); it++, cnt++) {
693 ASSERT(!(*it).empty());
694 // take only the first split for calculation
695 Split *sp = (*it).front();
696 for (int tax = 0; tax < ntaxa; tax++)
697 if (!sp->containTaxon(tax)) {
698 sp->addTaxon(tax);
699 delta[cnt][tax] = calcWeight(*sp) - sp->weight;
700 sp->removeTaxon(tax);
701 }
702 }
703 }
704
calcPDEndemism(SplitSet & area_set,DoubleVector & pd_endem)705 void PDNetwork::calcPDEndemism(SplitSet &area_set, DoubleVector &pd_endem) {
706 SplitSet::iterator it_s;
707
708 // make union of all id_sets
709 Split id_union(getNTaxa());
710 for (it_s = area_set.begin(); it_s != area_set.end(); it_s++)
711 id_union += *(*it_s);
712
713 // calculate PD of union
714 calcPD(id_union);
715
716 // now calculate PD endemism
717 pd_endem.clear();
718 for (it_s = area_set.begin(); it_s != area_set.end(); it_s++) {
719 // make union of all other set
720 Split id_other(getNTaxa());
721 for (SplitSet::iterator it_s2 = area_set.begin(); it_s2 != area_set.end(); it_s2++)
722 if (it_s2 != it_s) id_other += *(*it_s2);
723 // calculate PD of all other sets
724 calcPD(id_other);
725
726 // calc PD endemism
727 pd_endem.push_back(id_union.weight - id_other.weight);
728 }
729 }
730
calcPDComplementarity(SplitSet & area_set,char * area_names,vector<string> & all_names,DoubleVector & pd_comp)731 void PDNetwork::calcPDComplementarity(SplitSet &area_set, char *area_names,
732 vector<string> &all_names, DoubleVector &pd_comp) {
733
734 set<string> given_areas;
735
736 parseAreaName(area_names, given_areas);
737
738 /*
739 for (set<string>::iterator it = given_areas.begin(); it != given_areas.end(); it++)
740 cout << (*it) << "!";
741 cout << endl;
742 */
743 SplitSet::iterator it_s;
744 vector<string>::iterator it_n;
745
746 Split given_id(getNTaxa());
747
748 // convert taxa set to id set
749 for (it_s = area_set.begin(), it_n = all_names.begin(); it_s != area_set.end(); it_s++, it_n++) {
750 if (given_areas.find(*it_n) != given_areas.end())
751 given_id += *(*it_s);
752 }
753
754 if (given_id.countTaxa() == 0)
755 outError("Complementary area name(s) not correct");
756
757 calcPD(given_id);
758
759 // now calculate PD complementarity
760 pd_comp.clear();
761 for (it_s = area_set.begin(); it_s != area_set.end(); it_s++) {
762 // make union the two sets
763 Split id_both(*(*it_s));
764 id_both += given_id;
765 // calculate PD of both sets
766 calcPD(id_both);
767 // calc PD complementarity
768 pd_comp.push_back(id_both.weight - given_id.weight);
769 }
770
771 }
772
transformLP2(Params & params,const char * outfile,int total_size,bool make_bin)773 void PDNetwork::transformLP2(Params ¶ms, const char *outfile, int total_size, bool make_bin) {
774 Split included_tax(getNTaxa());
775 IntVector::iterator it2;
776 for (it2 = initialset.begin(); it2 != initialset.end(); it2++)
777 included_tax.addTaxon(*it2);
778 try {
779 ofstream out;
780 out.exceptions(ios::failbit | ios::badbit);
781 out.open(outfile);
782 vector<int> y_value;
783 checkYValue(total_size, y_value);
784
785 lpObjectiveMaxSD(out, params, y_value, total_size);
786 lpSplitConstraint_TS(out, params, y_value, total_size);
787 lpK_BudgetConstraint(out, params, total_size);
788 lpVariableBound(out, params, included_tax, y_value);
789 if (make_bin)
790 lpVariableBinary(out, params, included_tax);
791
792 out.close();
793 //cout << "Transformed LP problem printed to " << outfile << endl;
794 } catch (ios::failure) {
795 outError(ERR_WRITE_OUTPUT, outfile);
796 }
797 }
798
799 //Olga:ECOpd split system
transformEcoLP(Params & params,const char * outfile,int total_size)800 void PDNetwork::transformEcoLP(Params ¶ms, const char *outfile, int total_size) {
801 try {
802 ofstream out;
803 out.exceptions(ios::failbit | ios::badbit);
804 out.open(outfile);
805 vector<int> y_value;
806 y_value.resize(getNSplits(), -1);
807 lpObjectiveMaxSD(out, params, y_value, total_size);
808 lpSplitConstraint_TS(out, params, y_value, total_size);
809 out.close();
810
811 } catch (ios::failure) {
812 outError(ERR_WRITE_OUTPUT, outfile);
813 }
814 }
815
findPD_LP(Params & params,vector<SplitSet> & taxa_set)816 void PDNetwork::findPD_LP(Params ¶ms, vector<SplitSet> &taxa_set) {
817 if (params.find_all)
818 outError("Current linear programming does not support multiple optimal sets!");
819
820 string ofile = params.out_prefix;
821 ofile += ".lp";
822 double score;
823 int lp_ret, i, ntaxa = getNTaxa();
824 int k, min_k, max_k, step_k, index;
825
826 double *variables = new double[ntaxa];
827
828 if (isBudgetConstraint()) { // non-budget case
829 min_k = params.min_budget;
830 max_k = params.budget;
831 step_k = params.step_budget;
832 } else {
833 min_k = params.min_size;
834 max_k = params.sub_size;
835 step_k = params.step_size;
836 }
837 taxa_set.resize((max_k - min_k)/step_k + 1);
838
839 // now construction the optimal PD sets
840 if (isBudgetConstraint())
841 cout << "running budget = ";
842 else
843 cout << "running k = ";
844 for (k = min_k; k <= max_k; k += step_k) {
845 index = (k - min_k) / step_k;
846 if (!params.binary_programming) {
847 transformLP2(params, ofile.c_str(), k, false);
848 cout << " " << k;
849 cout.flush();
850 if (params.gurobi_format)
851 lp_ret = gurobi_solve((char*)ofile.c_str(), ntaxa, &score, variables, verbose_mode, params.gurobi_threads);
852 else
853 lp_ret = lp_solve((char*)ofile.c_str(), ntaxa, &score, variables, verbose_mode);
854 } else lp_ret = 7;
855 if (lp_ret != 0 && lp_ret != 7)
856 outError("Something went wrong with LP solver!");
857 if (lp_ret == 7) { // fail with non-binary case, do again with strict binary
858 if (params.binary_programming)
859 transformLP2(params, ofile.c_str(), k, true);
860 else
861 lpVariableBinary(ofile.c_str(), params, initialset);
862 cout << " " << k << "(bin)";
863 cout.flush();
864 if (params.gurobi_format)
865 lp_ret = gurobi_solve((char*)ofile.c_str(), ntaxa, &score, variables, verbose_mode, params.gurobi_threads);
866 else
867 lp_ret = lp_solve((char*)ofile.c_str(), ntaxa, &score, variables, verbose_mode);
868 if (lp_ret != 0) // check error again without allowing non-binary
869 outError("Something went wrong with LP solver!");
870 }
871
872 Split *pd_set = new Split(ntaxa, score);
873 for (i = 0; i < ntaxa; i++)
874 if (1.0 - variables[i] < tolerance) {
875 //pd_set->addTaxon(taxa_order[i]);
876 pd_set->addTaxon(i);
877 }
878 calcPD(*pd_set);
879 taxa_set[index].push_back(pd_set);
880 }
881 cout << endl;
882 delete [] variables;
883 }
884
transformLP_Area2(Params & params,const char * outfile,int total_size,bool make_bin)885 void PDNetwork::transformLP_Area2(Params ¶ms, const char *outfile, int total_size, bool make_bin) {
886 int nareas = getNAreas();
887 Split included_area(nareas);
888 IntVector::iterator it2;
889 for (it2 = initialareas.begin(); it2 != initialareas.end(); it2++)
890 included_area.addTaxon(*it2);
891 try {
892 ofstream out;
893 out.exceptions(ios::failbit | ios::badbit);
894 out.open(outfile);
895 vector<int> y_value, count1, count2;
896 checkYValue_Area(total_size, y_value, count1, count2);
897
898 lpObjectiveMaxSD(out, params, y_value, total_size);
899 lpSplitConstraint_RS(out, params, y_value, count1, count2, total_size);
900 lpInitialArea(out, params);
901 lpK_BudgetConstraint(out, params, total_size);
902 lpBoundaryConstraint(out, params);
903 lpVariableBound(out, params, included_area, y_value);
904 if (make_bin)
905 lpVariableBinary(out, params, included_area);
906
907 out.close();
908 //cout << "Transformed LP problem printed to " << outfile << endl;
909 } catch (ios::failure) {
910 outError(ERR_WRITE_OUTPUT, outfile);
911 }
912
913 }
914
transformMinK_Area2(Params & params,const char * outfile,double pd_proportion,bool make_bin)915 void PDNetwork::transformMinK_Area2(Params ¶ms, const char *outfile, double pd_proportion, bool make_bin) {
916 int nareas = getNAreas();
917 Split included_area(nareas);
918 IntVector::iterator it2;
919 for (it2 = initialareas.begin(); it2 != initialareas.end(); it2++)
920 included_area.addTaxon(*it2);
921 try {
922 ofstream out;
923 out.exceptions(ios::failbit | ios::badbit);
924 out.open(outfile);
925 vector<int> y_value, count1, count2;
926 checkYValue_Area(0, y_value, count1, count2);
927
928 lpObjectiveMinK(out, params);
929 lpMinSDConstraint(out, params, y_value, pd_proportion);
930 lpSplitConstraint_RS(out, params, y_value, count1, count2, 0);
931 lpInitialArea(out, params);
932 lpBoundaryConstraint(out, params);
933 lpVariableBound(out, params, included_area, y_value);
934 if (make_bin)
935 lpVariableBinary(out, params, included_area);
936
937 out.close();
938 //cout << "Transformed LP problem printed to " << outfile << endl;
939 } catch (ios::failure) {
940 outError(ERR_WRITE_OUTPUT, outfile);
941 }
942 }
943
944
findMinKArea_LP(Params & params,const char * filename,double pd_proportion,Split & area)945 double PDNetwork::findMinKArea_LP(Params ¶ms, const char* filename, double pd_proportion, Split &area) {
946 int nareas = area_taxa.size();
947 double *variables = new double[nareas];
948 double score;
949 int lp_ret, i;
950
951
952 if (!params.binary_programming) {
953 cout << " " << pd_proportion;
954 cout.flush();
955 transformMinK_Area2(params, filename, pd_proportion, false);
956 if (params.gurobi_format)
957 lp_ret = gurobi_solve((char*)filename, nareas, &score, variables, verbose_mode, params.gurobi_threads);
958 else
959 lp_ret = lp_solve((char*)filename, nareas, &score, variables, verbose_mode);
960 } else lp_ret = 7;
961 if (lp_ret != 0 && lp_ret != 7)
962 outError("Something went wrong with LP solver!");
963 if (lp_ret == 7) { // fail with non-binary case, do again with strict binary
964 cout << " " << pd_proportion << "(bin)";
965 cout.flush();
966 if (params.binary_programming)
967 transformMinK_Area2(params, filename, pd_proportion, true);
968 else
969 lpVariableBinary(filename, params, initialareas);
970 if (params.gurobi_format)
971 lp_ret = gurobi_solve((char*)filename, nareas, &score, variables, verbose_mode, params.gurobi_threads);
972 else
973 lp_ret = lp_solve((char*)filename, nareas, &score, variables, verbose_mode);
974 if (lp_ret != 0) // check error again without allowing non-binary
975 outError("Something went wrong with LP solver!");
976 }
977 area.setNTaxa(nareas);
978 for (i = 0; i < nareas; i++)
979 if (1.0 - variables[i] < tolerance) {
980 //pd_set->addTaxon(taxa_order[i]);
981 area.addTaxon(i);
982 }
983 calcPDArea(area);
984 cout << " score: " << area.weight;
985 double budget_k;
986 if (isBudgetConstraint()) {
987 budget_k = calcCost(area);
988 } else {
989 budget_k = area.countTaxa();
990 }
991 delete [] variables;
992 return budget_k;
993 }
994
computeFeasibleBudget(Params & params,IntVector & ok_budget)995 void PDNetwork::computeFeasibleBudget(Params ¶ms, IntVector &ok_budget) {
996 if (!isBudgetConstraint()) {
997 ok_budget.resize(params.sub_size+1, 1);
998 return;
999 }
1000 cout << "Computing feasible budget values..." << endl;
1001 IntVector cost_present;
1002 cost_present.resize((*max_element(pda->costs.begin(), pda->costs.end())) + 1, 0);
1003 int i, j, num_cost = 0;
1004 DoubleVector::iterator it;
1005 for (it = pda->costs.begin(); it != pda->costs.end(); it++) {
1006 if ((*it) != round(*it)) {
1007 outError("Non integer cost detected.");
1008 }
1009 if ((*it) != 0 && !(cost_present[*it])) {
1010 num_cost++;
1011 cost_present[*it] = 1;
1012 }
1013 }
1014 if (num_cost == 0) outError("All costs are zero! Please check the input budget file.");
1015 if (cost_present[1]) {
1016 // if cost of 1 detected, all budget values are feasible
1017 ok_budget.resize(params.budget+1, 1);
1018 return;
1019 }
1020 IntVector unique_cost;
1021 IntVector::iterator it2;
1022 for (i = 0, it2 = cost_present.begin(); it2 != cost_present.end(); it2++, i++)
1023 if (*it2) unique_cost.push_back(i);
1024 ASSERT(unique_cost.size() == num_cost);
1025
1026 ok_budget.resize(params.budget+1, 0);
1027 // initialize all entry with corresponding cost
1028 for (it2 = unique_cost.begin(); it2 != unique_cost.end(); it2++)
1029 if (*it2 < ok_budget.size())
1030 ok_budget[*it2] = 1;
1031 // now use dynamic programming to find feasible budgets
1032
1033 for (i = 0; i <= params.budget; i++)
1034 for (it2 = unique_cost.begin(); it2 != unique_cost.end(); it2++) {
1035 j = i - (*it2);
1036 if (j < 0) continue;
1037 if (ok_budget[j]) {
1038 ok_budget[i] = 1;
1039 break;
1040 }
1041 }
1042
1043
1044 if (verbose_mode < VB_MED)
1045 return;
1046 cout << "Feasible budgets:";
1047 for (i = 0; i < ok_budget.size(); i++)
1048 if (ok_budget[i]) cout << " " << i;
1049 cout << endl;
1050 }
1051
1052
printOutputSetScore(Params & params,vector<SplitSet> & pd_set)1053 void PDNetwork::printOutputSetScore(Params ¶ms, vector<SplitSet> &pd_set) {
1054 char filename[300];
1055 //int c_old = -1;
1056 int c_num = 0, i;
1057 //double w_old = -1.0;
1058 char scorename[300];
1059 ofstream scoreout;
1060 ofstream out;
1061 if (params.nr_output == 1) {
1062 if (params.run_mode == PD_USER_SET || !isPDArea()) {
1063 sprintf(filename, "%s.pdtaxa", params.out_prefix);
1064 cout << "All taxa list(s) printed to " << filename << endl;
1065 } else {
1066 sprintf(filename, "%s.pdarea", params.out_prefix);
1067 cout << "All area list(s) printed to " << filename << endl;
1068 }
1069 out.open(filename);
1070 sprintf(scorename, "%s.score", params.out_prefix);
1071 scoreout.open(scorename);
1072 }
1073 double total_weight = calcWeight();
1074
1075 for (vector<SplitSet>::iterator it = pd_set.begin(); it != pd_set.end(); it++) {
1076 // ignore, if get the same PD sets again
1077 //if (it != pd_set.begin() && it->getWeight() == (it-1)->getWeight() && it->size() == (it-1)->size())
1078 //continue;
1079 if ((*it).empty()) continue;
1080 c_num = 0;
1081 if (params.nr_output == 1)
1082 scoreout << (*it)[0]->countTaxa() << " " << (it)->getWeight() << endl;
1083
1084 for (SplitSet::iterator it2 = (*it).begin(); it2 != (*it).end(); it2++, c_num++ ){
1085 Split *this_set = *it2;
1086 int count = this_set->countTaxa();
1087 //if (count == 0) continue;
1088
1089 //if (count != c_old) {
1090 if (c_num == 0) {
1091 //c_num = 0;
1092 sprintf(filename, "%s.%d.pdtaxa", params.out_prefix, count);
1093 }
1094 else {
1095 //c_num++;
1096 sprintf(filename, "%s.%d.pdtaxa.%d", params.out_prefix, count, c_num);
1097 }
1098 //if (fabs(w_old - this_set->getWeight()) > 1e-5 || (c_old != count))
1099 // if (params.nr_output == 1)
1100 // scoreout << count << " " << this_set->getWeight() << endl;
1101 //w_old = this_set->getWeight();
1102
1103 //c_old = count;
1104 if (params.nr_output > 10) {
1105 out.open(filename);
1106 if (params.run_mode == PD_USER_SET || !isPDArea()) {
1107 for (i = 0; i < getNTaxa(); i++)
1108 if (this_set->containTaxon(i))
1109 out << getTaxa()->GetTaxonLabel(i) << endl;
1110 } else {
1111 for (i = 0; i < getSetsBlock()->getNSets(); i++)
1112 if (this_set->containTaxon(i))
1113 out << getSetsBlock()->getSet(i)->name << endl;
1114 }
1115 out.close();
1116 //cout << "Taxa list printed to " << filename << endl;
1117 } else if (params.nr_output == 1) {
1118 out << count << " " << this_set->getWeight() << " " <<
1119 this_set->getWeight() / total_weight << " " <<
1120 calcCost(*this_set) << " " << computeBoundary(*this_set) << " " <<
1121 params.boundary_modifier << endl;
1122
1123 if (params.run_mode == PD_USER_SET || !isPDArea()) {
1124 for (i = 0; i < getNTaxa(); i++)
1125 if (this_set->containTaxon(i))
1126 out << getTaxa()->GetTaxonLabel(i) << endl;
1127 } else {
1128 for (i = 0; i < getSetsBlock()->getNSets(); i++)
1129 if (this_set->containTaxon(i))
1130 out << getSetsBlock()->getSet(i)->name << endl;
1131 }
1132 }
1133 }
1134 }
1135
1136 if (params.nr_output == 1) {
1137 out.close();
1138 scoreout.close();
1139 //cout << "PD scores printed to " << scorename << endl;
1140 }
1141 }
1142
1143
findPDArea_LP(Params & params,vector<SplitSet> & areas_set)1144 void PDNetwork::findPDArea_LP(Params ¶ms, vector<SplitSet> &areas_set) {
1145 if (params.find_all)
1146 outError("Current linear programming does not support multiple optimal sets!");
1147 PDRelatedMeasures pd_more;
1148 // get the taxa in the areas, only if EMPTY!
1149 Split *area_coverage = new Split();
1150 int num_area_coverage = params.sub_size;
1151 if (area_taxa.empty()) {
1152 computePD(params, area_taxa, pd_more);
1153 if (params.root || params.is_rooted) {
1154 ASSERT(!initialset.empty());
1155 int root_id = initialset[0];
1156 for (SplitSet::iterator it = area_taxa.begin(); it != area_taxa.end(); it++)
1157 (*it)->addTaxon(root_id);
1158 }
1159 checkAreaCoverage();
1160 num_area_coverage = findMinAreas(params, *area_coverage);
1161 calcPDArea(*area_coverage);
1162 cout << "We found ";
1163 if (isBudgetConstraint())
1164 cout << "a budget of " << num_area_coverage << " is enough";
1165 else
1166 cout << "a number of " << num_area_coverage << " areas are enough";
1167 cout << " to cover all feasible taxa" << endl;
1168 if (isBudgetConstraint()) {
1169 if (params.budget > num_area_coverage) {
1170 params.budget = num_area_coverage;
1171 if (params.min_budget > params.budget)
1172 params.min_budget = params.budget;
1173 cout << "budget is therefore set to a maximum of " << num_area_coverage << endl;
1174 }
1175 } else
1176 if (params.sub_size > num_area_coverage) {
1177 params.sub_size = num_area_coverage;
1178 if (params.min_size > params.sub_size)
1179 params.min_size = params.sub_size;
1180 cout << "k is therefore set to a maximum of " << num_area_coverage << endl;
1181 }
1182 }
1183
1184 string ofile = params.out_prefix;
1185 ofile += ".lp";
1186 double score;
1187 int lp_ret, i;
1188 int nareas = area_taxa.size();
1189 int k, min_k, max_k, step_k, index;
1190
1191 if (params.pd_proportion == 1.0 && params.min_proportion == 0.0) {
1192 if (area_coverage->empty()) num_area_coverage = findMinAreas(params, *area_coverage);
1193 areas_set.resize(1);
1194 areas_set[0].push_back(area_coverage);
1195 if (isBudgetConstraint()) {
1196 params.budget = params.min_budget = num_area_coverage;
1197 } else {
1198 params.sub_size = params.min_size = num_area_coverage;
1199 }
1200 return;
1201 }
1202
1203
1204 double *variables = new double[nareas];
1205
1206 // identifying minimum k/budget to conserve the proportion of SD
1207 if (params.pd_proportion != 0.0) {
1208 if (params.min_proportion == 0.0) params.min_proportion = params.pd_proportion;
1209 cout << "running p = ";
1210 double prop;
1211 areas_set.resize(round((params.pd_proportion-params.min_proportion)/params.step_proportion) + 1);
1212 for (prop = params.min_proportion, index = 0; prop <= params.pd_proportion + 1e-6; prop += params.step_proportion, index++) {
1213 Split *area = new Split(nareas);
1214 if (prop < 1.0)
1215 findMinKArea_LP(params, ofile.c_str(), prop, *area);
1216 else
1217 *area = *area_coverage;
1218 areas_set[index].push_back(area);
1219 }
1220 /* if (params.min_proportion != 0.0)
1221 min_bk = findMinKArea_LP(params, ofile.c_str(), params.min_proportion);
1222 if (isBudgetConstraint()) {
1223 params.budget = bk;
1224 params.min_budget = min_bk;
1225 } else {
1226 params.sub_size = bk;
1227 params.min_size = min_bk;
1228 }
1229 cout << endl << (isBudgetConstraint() ? "budget" : "k") << " from " << min_bk << " to " << bk << endl;*/
1230 cout << endl;
1231 delete [] variables;
1232 delete area_coverage;
1233 return;
1234 }
1235
1236 IntVector list_k;
1237
1238 if (isBudgetConstraint()) { // non-budget case
1239 min_k = params.min_budget;
1240 max_k = params.budget;
1241 step_k = params.step_budget;
1242 } else {
1243 min_k = params.min_size;
1244 max_k = params.sub_size;
1245 step_k = params.step_size;
1246 }
1247 areas_set.resize((max_k - min_k)/step_k + 1);
1248 computeFeasibleBudget(params, list_k);
1249
1250 time_t time_init;
1251 time(&time_init);
1252 // now construction the optimal PD sets
1253 if (isBudgetConstraint())
1254 cout << "running budget = ";
1255 else
1256 cout << "running k = ";
1257 for (k = min_k; k <= max_k; k += step_k) {
1258 if (!list_k[k]) continue;
1259 index = (k - min_k) / step_k;
1260 if (!params.binary_programming) {
1261 cout << " " << k;
1262 cout.flush();
1263 transformLP_Area2(params, ofile.c_str(), k, false);
1264 if (params.gurobi_format)
1265 lp_ret = gurobi_solve((char*)ofile.c_str(), nareas, &score, variables, verbose_mode, params.gurobi_threads);
1266 else
1267 lp_ret = lp_solve((char*)ofile.c_str(), nareas, &score, variables, verbose_mode);
1268 } else lp_ret = 7;
1269
1270 if (lp_ret != 0 && lp_ret != 7)
1271 outError("Something went wrong with LP solver!");
1272 if (lp_ret == 7) { // fail with non-binary case, do again with strict binary
1273 cout << " " << k << "(bin)";
1274 cout.flush();
1275 if (params.binary_programming)
1276 transformLP_Area2(params, ofile.c_str(), k, true);
1277 else
1278 lpVariableBinary(ofile.c_str(), params, initialareas);
1279 if (params.gurobi_format)
1280 lp_ret = gurobi_solve((char*)ofile.c_str(), nareas, &score, variables, verbose_mode, params.gurobi_threads);
1281 else
1282 lp_ret = lp_solve((char*)ofile.c_str(), nareas, &score, variables, verbose_mode);
1283 if (lp_ret != 0) // check error again without allowing non-binary
1284 outError("Something went wrong with LP solver!");
1285 }
1286
1287 Split *area = new Split(nareas, score);
1288 for (i = 0; i < nareas; i++)
1289 if (1.0 - variables[i] < tolerance) {
1290 //pd_set->addTaxon(taxa_order[i]);
1291 area->addTaxon(i);
1292 }
1293 calcPDArea(*area);
1294 areas_set[index].push_back(area);
1295 time_t time_cur;
1296 time(&time_cur);
1297 if (difftime(time_cur, time_init) > 10) {
1298 // write output if more than 10 seconds have elapsed
1299 printOutputSetScore(params, areas_set);
1300 PDRelatedMeasures pd_more; // just for called function, nothing
1301 summarizeSplit(params, *this, areas_set, pd_more, false);
1302 time_init = time_cur;
1303 }
1304 }
1305 cout << endl;
1306 delete [] variables;
1307 delete area_coverage;
1308 }
1309
1310
isPDArea()1311 bool PDNetwork::isPDArea() {
1312 return (sets->getNSets() > 0);
1313 }
1314
calcPDArea(Split & area_id_set)1315 void PDNetwork::calcPDArea(Split &area_id_set) {
1316 int ntaxa = getNTaxa();
1317 int nareas = area_taxa.size();
1318 Split sp(ntaxa);
1319 for (int i = 0; i < nareas; i++)
1320 if (area_id_set.containTaxon(i))
1321 sp += *area_taxa[i];
1322 calcPD(sp);
1323 area_id_set.weight = sp.weight;
1324 }
1325
isUniquelyCovered(int taxon,int & area)1326 bool PDNetwork::isUniquelyCovered(int taxon, int &area) {
1327 area = -1;
1328 for (int i = 0; i < getNAreas(); i++)
1329 if (area_taxa[i]->containTaxon(taxon)) {
1330 if (area < 0) area = i; else return false;
1331 }
1332 return (area >= 0);
1333 }
1334
1335
transformLP_Area_Coverage(const char * outfile,Params & params,Split & included_area)1336 void PDNetwork::transformLP_Area_Coverage(const char *outfile, Params ¶ms, Split &included_area) {
1337 int ntaxa = getNTaxa();
1338 int nareas = getNAreas();
1339 int i, j;
1340 IntVector::iterator it;
1341 Split tax_cover(ntaxa);
1342 for (it = initialareas.begin(); it != initialareas.end(); it++) {
1343 tax_cover += *(area_taxa[*it]);
1344 included_area.addTaxon(*it);
1345 }
1346 for (j = 0; j < ntaxa; j++) {
1347 if (isUniquelyCovered(j, i)) {
1348 if (verbose_mode >= VB_MED) {
1349 cout << "Taxon " << taxa->GetTaxonLabel(j) << " is uniquely covered by " << sets->getSet(i)->name << endl;
1350 }
1351 included_area.addTaxon(i);
1352 tax_cover.addTaxon(j);
1353 }
1354 }
1355 try {
1356 ofstream out;
1357 out.exceptions(ios::failbit | ios::badbit);
1358 out.open(outfile);
1359 iterator spit;
1360
1361 lpObjectiveMinK(out, params);
1362
1363 // add constraint: every taxon should be covered by some area
1364 for (j = 0; j < ntaxa; j++) {
1365 if (tax_cover.containTaxon(j)) continue;
1366 bool ok = false;
1367 for (i = 0; i < nareas; i++)
1368 if (area_taxa[i]->containTaxon(j)) {
1369 out << " +x" << i;
1370 ok = true;
1371 }
1372 if (!ok) continue;
1373 out << " >= 1";
1374 if (params.gurobi_format)
1375 out << endl;
1376 else
1377 out << ";" << endl;
1378 }
1379 lpBoundaryConstraint(out, params);
1380
1381 // add bound for variable x
1382 IntVector y_value;
1383 lpVariableBound(out, params, included_area, y_value);
1384 out.close();
1385 } catch (ios::failure) {
1386 outError(ERR_WRITE_OUTPUT, outfile);
1387 }
1388 }
1389
1390
findMinAreas(Params & params,Split & area_id)1391 int PDNetwork::findMinAreas(Params ¶ms, Split &area_id) {
1392 string ofile = params.out_prefix;
1393 ofile += ".lp";
1394 int nareas = getNAreas();
1395 int i;
1396 double *variables = new double[nareas];
1397 double score;
1398 Split included_area(nareas);
1399 transformLP_Area_Coverage(ofile.c_str(), params, included_area);
1400 int lp_ret;
1401 if (params.gurobi_format)
1402 lp_ret = gurobi_solve((char*)ofile.c_str(), nareas, &score, variables, verbose_mode, params.gurobi_threads);
1403 else
1404 lp_ret = lp_solve((char*)ofile.c_str(), nareas, &score, variables, verbose_mode);
1405
1406 if (lp_ret != 0 && lp_ret != 7)
1407 outError("Something went wrong with LP solver!");
1408 if (lp_ret == 7) { // fail with non-binary case, do again with strict binary
1409 lpVariableBinary(ofile.c_str(), params, included_area);
1410
1411 if (params.gurobi_format)
1412 lp_ret = gurobi_solve((char*)ofile.c_str(), nareas, &score, variables, verbose_mode, params.gurobi_threads);
1413 else
1414 lp_ret = lp_solve((char*)ofile.c_str(), nareas, &score, variables, verbose_mode);
1415 if (lp_ret != 0) // check error again without allowing non-binary
1416 outError("Something went wrong with LP solver!");
1417 }
1418 area_id.setNTaxa(nareas);
1419 int count = 0;
1420 // for checking purpose
1421 Split taxon_coverage(getNTaxa());
1422
1423 for (i = 0; i < nareas; i++)
1424 if (1.0 - variables[i] < tolerance) {
1425 //pd_set->addTaxon(taxa_order[i]);
1426 area_id.addTaxon(i);
1427 taxon_coverage += *(area_taxa[i]);
1428 if (isBudgetConstraint())
1429 count += pda->getCost(i);
1430 else
1431 count++;
1432 }
1433 ofile = params.out_prefix;
1434 ofile += ".cover";
1435 try {
1436 ofstream out;
1437 out.exceptions(ios::failbit | ios::badbit);
1438 out.open(ofile.c_str());
1439 out << area_id.countTaxa() << " " << count << " " << computeBoundary(area_id) << " " << params.boundary_modifier << endl;
1440 for (i = 0; i < nareas; i++)
1441 if (area_id.containTaxon(i))
1442 out << sets->getSet(i)->name << endl;
1443 out.close();
1444 //cout << "Transformed LP problem printed to " << outfile << endl;
1445 } catch (ios::failure) {
1446 outError(ERR_WRITE_OUTPUT, ofile);
1447 }
1448
1449
1450 /*
1451 if (taxon_coverage.countTaxa() != getNTaxa()) {
1452 outError("Something wrong with LP in determining taxon coverage");
1453 }*/
1454 delete [] variables;
1455 return (count);
1456 }
1457
1458
checkAreaCoverage()1459 bool PDNetwork::checkAreaCoverage() {
1460 int ntaxa = getNTaxa();
1461 Split tax_cover(ntaxa);
1462 for (SplitSet::iterator it = area_taxa.begin(); it != area_taxa.end(); it++) {
1463 tax_cover += *(*it);
1464 }
1465 if (tax_cover.countTaxa() == ntaxa) {
1466 return true;
1467 }
1468
1469 cout << "WARNING: some taxa are not covered by any area including: ";
1470 for (int i = 0; i < ntaxa; i++)
1471 if (!tax_cover.containTaxon(i)) cout << taxa->GetTaxonLabel(i) << " ";
1472 cout << endl;
1473 return false;
1474 }
1475
1476
1477 /***********************************************
1478 ***********************************************
1479 LP-aided functions
1480 ***********************************************/
1481
1482
lpObjectiveMaxSD(ostream & out,Params & params,IntVector & y_value,int total_size)1483 void PDNetwork::lpObjectiveMaxSD(ostream &out, Params ¶ms, IntVector &y_value, int total_size) {
1484 //IntVector y_value, count1, count2;
1485 iterator spit;
1486 int i;
1487 // define the objective function
1488 if (params.gurobi_format)
1489 out << "Maximize" << endl;
1490 else
1491 out << "max: ";
1492
1493 for (spit = begin(),i=0; spit != end(); spit++,i++) {
1494 if (y_value[i] < 0)
1495 out << " +" << (*spit)->getWeight() << " y" << i;
1496 else if (y_value[i] >= 2)
1497 out << " +" << (*spit)->getWeight() << " x" << y_value[i] - 2;
1498 }
1499
1500 if (params.gurobi_format)
1501 out << endl << "Subject to" << endl;
1502 else
1503 out << ";" << endl;
1504 }
1505
1506 ///// TODO FOR taxon selection
lpObjectiveMinK(ostream & out,Params & params)1507 void PDNetwork::lpObjectiveMinK(ostream &out, Params ¶ms) {
1508 iterator spit;
1509 int i, j;
1510 int nareas = area_taxa.size();
1511
1512 // define the objective function
1513 if (params.gurobi_format)
1514 out << "Minimize" << endl;
1515 else
1516 out << "min: ";
1517
1518 for (j = 0; j < nareas; j++) {
1519 double coeff = (isBudgetConstraint()) ? getPdaBlock()->getCost(j) : 1.0;
1520 if (areas_boundary) coeff += areas_boundary[j*nareas+j] * params.boundary_modifier;
1521 out << ((j>0) ? " +" : "") << coeff << " x" << j;
1522
1523
1524 }
1525
1526 if (areas_boundary && params.boundary_modifier != 0.0) {
1527 if (params.quad_programming)
1528 out << " + [";
1529 for (i = 0; i < nareas-1; i++)
1530 for (j = i+1; j < nareas; j++)
1531 if (areas_boundary[i*nareas+j] > 0.0) {
1532 double coeff = 2*areas_boundary[i*nareas+j] * params.boundary_modifier;
1533 if (params.quad_programming)
1534 out << " -" << coeff << " x" << i << " * x" << j;
1535 else
1536 out << " -" << coeff << " y" << i << "_" << j;
1537 }
1538 if (params.quad_programming)
1539 out << " ] / 2";
1540 }
1541
1542 if (params.gurobi_format)
1543 out << endl << "Subject to" << endl;
1544 else
1545 out << ";" << endl;
1546 }
1547
lpK_BudgetConstraint(ostream & out,Params & params,int total_size)1548 void PDNetwork::lpK_BudgetConstraint(ostream &out, Params ¶ms, int total_size) {
1549
1550 int nvars;
1551 int i, j;
1552 if (isPDArea())
1553 nvars = area_taxa.size();
1554 else
1555 nvars = getNTaxa();
1556
1557 for (j = 0; j < nvars; j++) {
1558 double coeff = (isBudgetConstraint()) ? getPdaBlock()->getCost(j) : 1.0;
1559 if (areas_boundary) coeff += areas_boundary[j*nvars+j] * params.boundary_modifier;
1560 out << ((j>0) ? " +" : "") << coeff << " x" << j;
1561
1562 }
1563
1564 if (areas_boundary && params.boundary_modifier != 0.0) {
1565 for (i = 0; i < nvars-1; i++)
1566 for (j = i+1; j < nvars; j++)
1567 if (areas_boundary[i*nvars+j] > 0.0) {
1568 double coeff = 2*areas_boundary[i*nvars+j] * params.boundary_modifier;
1569 out << " -" << coeff << " y" << i << "_" << j;
1570 }
1571 }
1572 out << " <= " << total_size;
1573
1574 // constraint for k-set or total budget
1575 /*
1576 if (isBudgetConstraint()) {
1577 for (j = 0; j < nvars; j++) {
1578 out << ((j==0)? "" : " +") << getPdaBlock()->getCost(j) << " x" << j;
1579 }
1580 out << " <= " << total_size;
1581 } else {
1582 for (j = 0; j < nvars; j++) {
1583 out << ((j==0)? "" : " +") << "x" << j;
1584 }
1585 out << " = " << total_size;
1586 }*/
1587 if (params.gurobi_format)
1588 out << endl;
1589 else
1590 out << ";" << endl;
1591 }
1592
lpBoundaryConstraint(ostream & out,Params & params)1593 void PDNetwork::lpBoundaryConstraint(ostream &out, Params ¶ms) {
1594 // constraint on the variable for the shared boundary between areas
1595 if (!areas_boundary || params.boundary_modifier == 0.0)
1596 return;
1597 if (params.quad_programming) return;
1598 int i, j;
1599 int nareas = area_taxa.size();
1600
1601 for (i = 0; i < nareas-1; i++)
1602 for (j = i+1; j < nareas; j++)
1603 if (areas_boundary[i*nareas+j] > 0.0) {
1604 out << "x" << i << " - y" << i << "_" << j << " >= 0";
1605 if (params.gurobi_format)
1606 out << endl;
1607 else
1608 out << ";" << endl;
1609 out << "x" << j << " - y" << i << "_" << j << " >= 0";
1610 if (params.gurobi_format)
1611 out << endl;
1612 else
1613 out << ";" << endl;
1614 }
1615 }
1616
lpSplitConstraint_RS(ostream & out,Params & params,IntVector & y_value,IntVector & count1,IntVector & count2,int total_size)1617 void PDNetwork::lpSplitConstraint_RS(ostream &out, Params ¶ms, IntVector &y_value, IntVector &count1, IntVector &count2, int total_size) {
1618 iterator spit;
1619 int i,j;
1620 //int root_id = -1;
1621 //if (params.root || params.is_rooted) root_id = initialset[0];
1622 int nareas = area_taxa.size();
1623
1624
1625 // adding the constraint for splits
1626 for (spit = begin(),i=0; spit != end(); spit++,i++) {
1627 if (y_value[i] >= 0) continue;
1628 Split *sp = (*spit);
1629
1630 if (count1[i] < nareas && (isBudgetConstraint() || count1[i] <= nareas - total_size))
1631 {
1632 out << "y" << i;
1633 if (!params.gurobi_format)
1634 out << " <=";
1635 for (j = 0; j < nareas; j++) {
1636 if (sp->overlap(*area_taxa[j])) {
1637 if (params.gurobi_format)
1638 out << " -x" << j;
1639 else
1640 out << " +x" << j;
1641 }
1642 }
1643 if (params.gurobi_format)
1644 out << " <= 0" << endl;
1645 else
1646 out << ";" << endl;
1647 }
1648
1649 if (count2[i] < nareas && (isBudgetConstraint() || count2[i] <= nareas - total_size))
1650 {
1651 sp->invert(); // scan the invert
1652 out << "y" << i;
1653 if (!params.gurobi_format)
1654 out << " <=";
1655 for (j = 0; j < nareas; j++) {
1656 if (sp->overlap(*area_taxa[j])) {
1657 if (params.gurobi_format)
1658 out << " -x" << j;
1659 else
1660 out << " +x" << j;
1661 }
1662 }
1663 if (params.gurobi_format)
1664 out << " <= 0" << endl;
1665 else
1666 out << ";" << endl;
1667 sp->invert(); // invert back to original
1668 }
1669 }
1670 }
1671
lpSplitConstraint_TS(ostream & out,Params & params,IntVector & y_value,int total_size)1672 void PDNetwork::lpSplitConstraint_TS(ostream &out, Params ¶ms, IntVector &y_value, int total_size) {
1673 iterator spit;
1674 int i,j;
1675 int ntaxa = getNTaxa();
1676 // adding the constraint for splits
1677 for (spit = begin(),i=0; spit != end(); spit++,i++) {
1678 if (y_value[i] >= 0) continue;
1679
1680 Split *sp = (*spit);
1681 bool contain_initset = sp->containAny(initialset);
1682
1683 if (!contain_initset && (isBudgetConstraint() || sp->countTaxa() <= ntaxa - total_size)) {
1684 out << "y" << i;
1685 for (j = 0; j < ntaxa; j++)
1686 if (sp->containTaxon(j))
1687 out << " -x" << j;
1688 out << " <= 0";
1689 if (params.gurobi_format)
1690 out << endl;
1691 else
1692 out << ";" << endl;
1693 }
1694 contain_initset = false;
1695 if (initialset.size() > 0) {
1696 sp->invert();
1697 contain_initset = sp->containAny(initialset);
1698 sp->invert();
1699 }
1700 if (!contain_initset && (isBudgetConstraint() || sp->countTaxa() >= total_size)) {
1701 out << "y" << i;
1702 for (j = 0; j < ntaxa; j++)
1703 if (!sp->containTaxon(j))
1704 out << " -x" << j;
1705 out << " <= 0";
1706 if (params.gurobi_format)
1707 out << endl;
1708 else
1709 out << ";" << endl;
1710 }
1711 }
1712 }
1713
1714
lpMinSDConstraint(ostream & out,Params & params,IntVector & y_value,double pd_proportion)1715 void PDNetwork::lpMinSDConstraint(ostream &out, Params ¶ms, IntVector &y_value, double pd_proportion) {
1716 iterator spit;
1717 int i;
1718 double total_weight = calcWeight();
1719 double required_sd = total_weight * pd_proportion;
1720 if (required_sd > total_weight) required_sd = total_weight;
1721 required_sd -= 1e-6;
1722 // adding constraint for min conserved PD proportion
1723 for (spit = begin(),i=0; spit != end(); spit++,i++) {
1724 if (y_value[i] < 0)
1725 out << " +" << (*spit)->getWeight() << " y" << i;
1726 else if (y_value[i] >= 2)
1727 out << " +" << (*spit)->getWeight() << " x" << y_value[i] - 2;
1728 else if (y_value[i] == 1) required_sd -= (*spit)->getWeight();
1729 }
1730 out.precision(12);
1731 out << " >= " << required_sd;
1732 out.precision(6);
1733
1734 if (params.gurobi_format)
1735 out << endl;
1736 else
1737 out << ";" << endl;
1738 }
1739
lpVariableBound(ostream & out,Params & params,Split & included_vars,IntVector & y_value)1740 void PDNetwork::lpVariableBound(ostream &out, Params ¶ms, Split &included_vars, IntVector &y_value) {
1741 IntVector::iterator it2;
1742 int i, j;
1743 // define the variable boundary
1744
1745 if (params.gurobi_format)
1746 out << "Bounds" << endl;
1747
1748
1749 for (j = 0; j < included_vars.getNTaxa(); j++) {
1750 if (included_vars.containTaxon(j)) {
1751 out << "x" << j << " = 1";
1752 } else {
1753 if (params.gurobi_format)
1754 out << "0 <= ";
1755 out << "x" << j << " <= 1";
1756 }
1757 if (params.gurobi_format)
1758 out << endl;
1759 else
1760 out << ";" << endl;
1761 }
1762
1763 if (!y_value.empty()) {
1764 for (i = 0; i < getNSplits(); i++) {
1765 if (y_value[i] >= 0) continue;
1766 if (params.gurobi_format)
1767 out << "0 <= ";
1768 out << "y" << i << " <= 1";
1769 if (params.gurobi_format)
1770 out << endl;
1771 else
1772 out << ";" << endl;
1773 }
1774 }
1775 int nvars = included_vars.getNTaxa();
1776 if (areas_boundary && params.boundary_modifier != 0.0 && !params.quad_programming) {
1777 for (i = 0; i < included_vars.getNTaxa()-1; i++)
1778 for (j = i+1; j < included_vars.getNTaxa(); j++)
1779 if (areas_boundary[i*nvars+j] > 0.0) {
1780 if (params.gurobi_format)
1781 out << "0 <= ";
1782 out << "y" << i << "_" << j << " <= 1";
1783 if (params.gurobi_format)
1784 out << endl;
1785 else
1786 out << ";" << endl;
1787
1788 }
1789 }
1790 }
1791
lpVariableBinary(ostream & out,Params & params,Split & included_vars)1792 void PDNetwork::lpVariableBinary(ostream &out, Params ¶ms, Split &included_vars) {
1793 int nvars;
1794 int j;
1795 if (isPDArea())
1796 nvars = area_taxa.size();
1797 else
1798 nvars = getNTaxa();
1799
1800 bool first = true;
1801 for (j = 0; j < nvars; j++) {
1802 if (included_vars.containTaxon(j)) continue;
1803 if (params.gurobi_format) {
1804 if (!first)
1805 out << " ";
1806 else
1807 out << "Binary" << endl;
1808 } else {
1809 if (!first)
1810 out << ", ";
1811 else
1812 out << "bin ";
1813 }
1814 out << "x" << j;
1815 first = false;
1816 }
1817 if (!first) {
1818 if (params.gurobi_format)
1819 out << endl;
1820 else
1821 out << ";" << endl;
1822 }
1823 }
1824
1825
1826 /**
1827 add binary variables
1828 */
lpVariableBinary(const char * outfile,Params & params,Split & included_vars)1829 void PDNetwork::lpVariableBinary(const char *outfile, Params ¶ms, Split &included_vars) {
1830 try {
1831 ofstream out;
1832 out.exceptions(ios::failbit | ios::badbit);
1833 out.open(outfile, ios::app);
1834 lpVariableBinary(out, params, included_vars);
1835 out.close();
1836 } catch (ios::failure) {
1837 outError(ERR_WRITE_OUTPUT, outfile);
1838 }
1839 }
1840
1841
lpVariableBinary(const char * outfile,Params & params,IntVector & initialset)1842 void PDNetwork::lpVariableBinary(const char *outfile, Params ¶ms, IntVector &initialset) {
1843 int nvars;
1844 if (isPDArea())
1845 nvars = area_taxa.size();
1846 else
1847 nvars = getNTaxa();
1848 Split included_vars(nvars);
1849 for (IntVector::iterator it2 = initialset.begin(); it2 != initialset.end(); it2++)
1850 included_vars.addTaxon(*it2);
1851 lpVariableBinary(outfile, params, included_vars);
1852 }
1853
lpInitialArea(ostream & out,Params & params)1854 void PDNetwork::lpInitialArea(ostream &out, Params ¶ms) {
1855 int nareas = getNAreas();
1856 int j;
1857
1858 // adding constraint for initialset
1859 for (IntVector::iterator it = initialset.begin(); it != initialset.end(); it++) {
1860 if (it == initialset.begin() && (params.root || params.is_rooted)) // ignore the root
1861 continue;
1862 out << "1 <= ";
1863 bool ok = false;
1864 for (j = 0; j < nareas; j++)
1865 if (area_taxa[j]->containTaxon(*it)) {
1866 out << " +x" << j;
1867 ok = true;
1868 }
1869 if (params.gurobi_format)
1870 out << endl;
1871 else
1872 out << ";" << endl;
1873 if (!ok) {
1874 outError("No area contains taxon ", taxa->GetTaxonLabel(*it));
1875 }
1876 }
1877 }
1878
checkYValue(int total_size,vector<int> & y_value)1879 void PDNetwork::checkYValue(int total_size, vector<int> &y_value) {
1880 iterator spit;
1881 int ntaxa = getNTaxa();
1882 int i;
1883
1884 y_value.resize(getNSplits(), -1);
1885 for (spit = begin(),i=0; spit != end(); spit++,i++) {
1886 Split *sp = (*spit);
1887 int id = -1;
1888 int cnt = sp->countTaxa();
1889 if (cnt > ntaxa / 2) {
1890 sp->invert();
1891 cnt = ntaxa - cnt;
1892 }
1893 if (cnt == 1)
1894 id = sp->firstTaxon();
1895 if (id >= 0) {
1896 // if the split is external -> y[i] = x[id]
1897 y_value[i] = id+2;
1898 continue;
1899 }
1900 if (!isBudgetConstraint()) {
1901 if (cnt > ntaxa - total_size && cnt < total_size) {
1902 // if both constraints can be dropped -> y[i] = 1
1903 y_value[i] = 1;
1904 }
1905 }
1906 }
1907
1908 }
1909
1910
1911
1912
checkYValue_Area(int total_size,vector<int> & y_value,vector<int> & count1,vector<int> & count2)1913 void PDNetwork::checkYValue_Area(int total_size, vector<int> &y_value, vector<int> &count1, vector<int> &count2) {
1914 iterator spit;
1915 int nareas = area_taxa.size();
1916 int i, j;
1917
1918 y_value.resize(getNSplits(), -1);
1919 count1.resize(getNSplits(), 0);
1920 count2.resize(getNSplits(), 0);
1921 for (spit = begin(),i=0; spit != end(); spit++,i++) {
1922 Split *sp = (*spit);
1923 int id1 = -1, id2 = -1;
1924 for (j = 0; j < nareas; j++) {
1925 if (sp->overlap(*area_taxa[j])) {
1926 count1[i]++;
1927 id1 = j;
1928 }
1929 }
1930 sp->invert();
1931 for (j = 0; j < nareas; j++) {
1932 if (sp->overlap(*area_taxa[j])) { count2[i]++; id2 = j; }
1933 }
1934 sp->invert(); // invert back to original
1935 if (count1[i] == 0 || count2[i] == 0)
1936 y_value[i] = 0;
1937 else {
1938
1939 if (count1[i] == nareas && count2[i] == nareas) {
1940 y_value[i] = 1;
1941 continue;
1942 }
1943 if (isBudgetConstraint())
1944 continue;
1945
1946 if (count1[i] == 1 && count2[i] > nareas - total_size) {
1947 y_value[i] = id1 + 2;
1948 } else if (count2[i] == 1 && count1[i] > nareas - total_size) {
1949 y_value[i] = id2 + 2;
1950 //continue;
1951 } else if (count1[i] > nareas - total_size && count2[i] > nareas - total_size) {
1952 y_value[i] = 1;
1953 }
1954 }
1955 }
1956
1957 }
1958
speciesList(vector<string> * speciesNames)1959 void PDNetwork::speciesList(vector<string> *speciesNames)
1960 {
1961 for(int i=0; i<getNTaxa();i++)
1962 (*speciesNames).push_back(taxa->GetTaxonLabel(i));
1963
1964 }
1965