1 /***************************************************************************
2  *   Copyright (C) 2006 by BUI Quang Minh, Steffen Klaere, Arndt von Haeseler   *
3  *   minh.bui@univie.ac.at   *
4  *                                                                         *
5  *   This program is free software; you can redistribute it and/or modify  *
6  *   it under the terms of the GNU General Public License as published by  *
7  *   the Free Software Foundation; either version 2 of the License, or     *
8  *   (at your option) any later version.                                   *
9  *                                                                         *
10  *   This program is distributed in the hope that it will be useful,       *
11  *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
12  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
13  *   GNU General Public License for more details.                          *
14  *                                                                         *
15  *   You should have received a copy of the GNU General Public License     *
16  *   along with this program; if not, write to the                         *
17  *   Free Software Foundation, Inc.,                                       *
18  *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.             *
19  ***************************************************************************/
20 #include "tree/mtree.h"
21 #include "pdnetwork.h"
22 #include "ncl/ncl.h"
23 #include "nclextra/msetsblock.h"
24 #include "nclextra/myreader.h"
25 #include "lpwrapper.h"
26 #include "gurobiwrapper.h"
27 
28 extern void summarizeSplit(Params &params, PDNetwork &sg, vector<SplitSet> &pd_set, PDRelatedMeasures &pd_more, bool full_report);
29 
30 
PDNetwork()31 PDNetwork::PDNetwork()
32  : SplitGraph()
33 {
34 	extra_pd = 0;
35 	min_pd = false;
36 }
37 
PDNetwork(Params & params)38 PDNetwork::PDNetwork(Params &params) : SplitGraph(params) {
39 	extra_pd = 0;
40 	min_pd = false;
41 
42 	if (params.is_rooted)
43 		readRootNode(ROOT_NAME);
44 
45 	// read the parameter file
46 	if (params.param_file != NULL)
47 		readParams(params);
48 
49 	if (params.budget_file != NULL) {
50 		if (isPDArea())
51 			pda->readBudgetAreaFile(params);
52 		else
53 			pda->readBudgetFile(params);
54 	}
55 	// identify the root
56 	if (params.root != NULL)
57 		readRootNode(params.root);
58 
59 	// initial PD min
60 	if (params.find_pd_min)
61 		initPDMin();
62 
63 	// read the initial set of taxa, incoporate info into the split system
64 	if (params.initial_file != NULL && params.eco_dag_file == NULL)
65 		readInitialSet(params);
66 
67 	if (!initialset.empty() && !isPDArea())
68 		proceedInitialSet();
69 
70 	if (params.initial_area_file != NULL)
71 		readInitialAreas(params);
72 
73 
74 
75 }
76 
77 
78 /**
79 	Identify the root node if specified, include it into the initial set
80 	@param root_name name of the root node
81 */
readRootNode(const char * root_name)82 void PDNetwork::readRootNode(const char *root_name) {
83 	int id = -1;
84 	try {
85 		id = taxa->FindTaxon(root_name);
86 	} catch (NxsTaxaBlock::NxsX_NoSuchTaxon) {
87 		outError(ERR_NO_TAXON, root_name);
88 	}
89 	initialset.clear();
90 	initialset.push_back(id);
91 	//if (sets->getNSets() == 0)
92 }
93 
94 
95 
readParams(Params & params)96 void PDNetwork::readParams(Params &params) {
97 	int ntaxa = getNTaxa() - params.is_rooted;
98 
99 	// read parameters from file
100 	double scale;
101 	StrVector tax_name;
102 	DoubleVector ori_weight, tax_weight;
103 	readWeightFile(params, ntaxa, scale, tax_name, ori_weight);
104 
105 	// now convert the weights
106 	tax_weight.resize(ntaxa, 0);
107 	for (int i = 0; i < tax_name.size(); i++) {
108 		int id = -1;
109 		try {
110 			string name = "";
111 			name.append(tax_name[i]);
112 			id = taxa->FindTaxon(NxsString(name.c_str()));
113 		} catch (NxsTaxaBlock::NxsX_NoSuchTaxon) {
114 			outError(ERR_NO_TAXON, tax_name[i]);
115 		}
116 		tax_weight[id] = ori_weight[i];
117 	}
118 
119 	if (params.scaling_factor >= 0) {
120 		if (params.scaling_factor > 1) outError("Scaling factor must be between 0 and 1");
121 		cout << "Rescaling split weights with " << params.scaling_factor <<
122 			" and taxa weights with " << 1 - params.scaling_factor << endl;
123 		scale = params.scaling_factor;
124 		for (DoubleVector::iterator it = tax_weight.begin(); it != tax_weight.end(); it++)
125 			(*it) *= (1 - scale);
126 	}
127 
128 	// incoporate into the split system
129 	for (iterator it = begin(); it != end(); it++) {
130 		int id = (*it)->trivial();
131 		// first, multiply split weight with the coefficient
132 		(*it)->weight *= scale;
133 
134 		// if a trivial split, add the important parameter f
135 		if (id >= 0)
136 			(*it)->weight += tax_weight[id];
137 	}
138 }
139 
140 
141 /**
142 	read the initial set of taxa to be included into PD-tree
143 	@param params program parameters
144 */
readInitialSet(Params & params)145 void PDNetwork::readInitialSet(Params &params) {
146 	extra_pd = 0.0;
147 	int ntaxa = getNTaxa() - params.is_rooted;
148 	StrVector tax_name;
149 	readInitTaxaFile(params, ntaxa, tax_name);
150 	if (tax_name.empty())
151 		outError("No taxa found");
152 	for (StrVector::iterator it = tax_name.begin(); it != tax_name.end(); it++) {
153 		int id = -1;
154 		try {
155 			string name = "";
156 			name.append(*it);
157 			id = taxa->FindTaxon(NxsString(name.c_str()));
158 		} catch (NxsTaxaBlock::NxsX_NoSuchTaxon) {
159 			outError(ERR_NO_TAXON, *it);
160 		}
161 		initialset.push_back(id);
162 	}
163 
164 	if (isPDArea()) return;
165 
166 	if (isBudgetConstraint()) {
167 		int budget = (params.budget >= 0) ? params.budget : pda->budget;
168 		if (calcCost(initialset) > budget)
169 			outError(ERR_TOO_SMALL_BUDGET);
170 	} else {
171 		int sub_size = (params.sub_size > 1) ? params.sub_size : pda->sub_size;
172 		if (initialset.size() > sub_size)
173 			outError(ERR_TOO_SMALL_K);
174 	}
175 }
176 
proceedInitialSet()177 void PDNetwork::proceedInitialSet() {
178 	double total_w = trunc(abs(calcWeight())+1);
179 	// get the set of initial taxa
180 	set<int> iset;
181 	for (IntVector::iterator it2 = initialset.begin(); it2 != initialset.end(); it2++)
182 		iset.insert(*it2);
183 	// now modifying the split weights
184 	for (iterator it = begin(); it != end(); it++) {
185 
186 		// get the taxa id of trivial split
187 		int id = (*it)->trivial();
188 		// if not trivial split, continue
189 		if (id < 0) continue;
190 
191 		if (iset.find(id) != iset.end()) {
192 			// increase the trivial split weight
193 			(*it)->weight += total_w;
194 			extra_pd += total_w;
195 		}
196 	}
197 }
198 
readInitialAreas(Params & params)199 void PDNetwork::readInitialAreas(Params &params) {
200 	if (!isPDArea())
201 		outError("Invalid -ia option: no areas specified");
202 	int nareas = sets->getNSets();
203 	StrVector area_name;
204 	readInitAreaFile(params, nareas, area_name);
205 	if (area_name.empty())
206 		outError("No area found");
207 	for (StrVector::iterator it = area_name.begin(); it != area_name.end(); it++) {
208 		int id = -1;
209 		id = sets->findArea(*it);
210 		if (id < 0)
211 			outError(ERR_NO_AREA, *it);
212 		initialareas.push_back(id);
213 	}
214 
215 	if (isBudgetConstraint()) {
216 		int budget = (params.budget >= 0) ? params.budget : pda->budget;
217 		if (calcCost(initialareas) > budget)
218 			outError(ERR_TOO_SMALL_BUDGET);
219 	} else {
220 		int sub_size = (params.sub_size >= 1) ? params.sub_size : pda->sub_size;
221 		if (initialareas.size() > sub_size)
222 			outError(ERR_TOO_SMALL_K);
223 	}
224 }
225 
226 
initPDMin()227 void PDNetwork::initPDMin() {
228 	min_pd = true;
229 	for (iterator it = begin(); it != end(); it++) {
230 		(*it)->weight = -(*it)->weight;
231 	}
232 }
233 
234 
235 /**
236 	compute the required costs to conserve a taxa set
237 	@param taxset set of taxa
238 	@return minimum budget required
239 */
calcCost(IntVector & taxset)240 int PDNetwork::calcCost(IntVector &taxset) {
241 	int sum = 0;
242 	for (IntVector::iterator it = taxset.begin(); it != taxset.end(); it++)
243 		sum += pda->costs[*it];
244 	return sum;
245 }
246 
247 /**
248 	compute the required costs to conserve a taxa set
249 	@param taxset set of taxa
250 	@return minimum budget required
251 */
calcCost(Split & taxset)252 int PDNetwork::calcCost(Split &taxset) {
253 	IntVector invec;
254 	taxset.getTaxaList(invec);
255 	return calcCost(invec);
256 }
257 
258 
259 
260 /********************************************************
261 	Now comes PD related stuff
262 ********************************************************/
263 
264 
265 
calcPD(Split & id_set)266 void PDNetwork::calcPD(Split &id_set) {
267 	if (initialset.empty()) {
268 		id_set.weight = calcWeight(id_set);
269 		return;
270 	}
271 	Split id(id_set);
272 	for (IntVector::iterator it = initialset.begin(); it != initialset.end(); it++)
273 		id.addTaxon(*it);
274 	id_set.weight = calcWeight(id);
275 }
276 
calcExclusivePD(Split & id_set)277 void PDNetwork::calcExclusivePD(Split &id_set) {
278 	id_set.invert();
279 	calcPD(id_set);
280 	id_set.invert();
281 	id_set.weight = calcWeight() - id_set.weight;
282 }
283 
284 
285 
computePD(Params & params,SplitSet & pd_set,PDRelatedMeasures & pd_more)286 void PDNetwork::computePD(Params &params, SplitSet &pd_set, PDRelatedMeasures &pd_more) {
287 	//MSetsBlock *sets;
288 	//sets = new MSetsBlock();
289 
290 	//sets->Report(cout);
291 	TaxaSetNameVector *allsets = sets->getSets();
292 	TaxaSetNameVector::iterator i;
293 	for (i = allsets->begin(); i != allsets->end(); i++) {
294 		Split *id_set = new Split(getNTaxa());
295 		/*
296 		for (IntVector::iterator it = initialset.begin(); it != initialset.end(); it++)
297 			id_set->addTaxon(*it);
298 		*/
299 		for (vector<string>::iterator it2 = (*i)->taxlist.begin(); it2 != (*i)->taxlist.end(); it2++) {
300 			int id = -1;
301 			try {
302 				id = taxa->FindTaxon(NxsString(it2->c_str()));
303 			} catch (NxsTaxaBlock::NxsX_NoSuchTaxon) {
304 				outError(ERR_NO_TAXON, *it2);
305 			}
306 			if (id >= 0)
307 				id_set->addTaxon(id);
308 		}
309 		pd_more.setName.push_back((*i)->name);
310 		if (params.exclusive_pd) {
311 			calcExclusivePD(*id_set);
312 			pd_more.exclusivePD.push_back(id_set->getWeight());
313 		}
314 		calcPD(*id_set);
315 		pd_more.PDScore.push_back(id_set->weight);
316 		pd_set.push_back(id_set);
317 	}
318 	//delete sets;
319 }
320 
321 
322 
323 /********************************************************
324 	EXHAUSTIVE FUNCTION
325 ********************************************************/
326 
updateSplitVector(Split & curset,SplitSet & best_set)327 void PDNetwork::updateSplitVector(Split &curset, SplitSet &best_set)
328 {
329 	if (curset.weight > best_set[0]->weight) {
330 		for (int it = best_set.size()-1; it >= 0; it--)
331 			delete best_set[it];
332 		best_set.clear();
333 	}
334 	best_set.push_back(new Split(curset));
335 }
336 
337 /**
338 	calculate sum of weights of preserved splits in the taxa_set
339 	@param taxa_set a set of taxa
340 */
calcRaisedWeight(Split & taxa_set,IntList & rem_splits,IntList::iterator & rem_it)341 double PDNetwork::calcRaisedWeight(Split &taxa_set,
342 	IntList &rem_splits, IntList::iterator &rem_it)
343 {
344 	double sum = 0.0;
345 	for (IntList::iterator it = rem_splits.begin(); it != rem_it;)
346 		if ((*this)[*it]->preserved(taxa_set)) {
347 			sum += (*this)[*it]->weight;
348 			IntList::iterator prev_it = rem_it;
349 			prev_it--;
350 			int temp = *it;
351 			*it = *prev_it;
352 			*prev_it = temp;
353 			rem_it = prev_it;
354 		} else it++;
355 	return sum;
356 }
357 
calcMaxBudget()358 int PDNetwork::calcMaxBudget() {
359 	int sum = 0;
360 	for (DoubleVector::iterator it = pda->costs.begin(); it != pda->costs.end(); it++)
361 		sum += (*it);
362 	return sum;
363 }
364 
365 
enterFindPD(Params & params)366 void PDNetwork::enterFindPD(Params &params) {
367 	// check parameters
368 	if (params.pd_proportion == 0.0) {
369 		if (isBudgetConstraint()) {
370 			int budget = (params.budget >= 0) ? params.budget : pda->getBudget();
371 			if (budget < 0) {
372 				outError(ERR_NO_BUDGET);
373 			}
374 		} else {
375 			int min_accepted = !isPDArea() + 1;
376 			int sub_size = (params.sub_size >= min_accepted) ? params.sub_size : pda->getSubSize();
377 			if (sub_size < min_accepted && params.pdtaxa_file == NULL) {
378 				outError(ERR_NO_K);
379 			}
380 
381 		}
382 	}
383 	if (initialset.size() > 0) {
384 		cout << "Consider split network as ROOTED." << endl;
385 	} else {
386 		cout << "Consider split network as UNROOTED." << endl;
387 	}
388 
389 	cout << "Total split weights: " << calcWeight() << endl;
390 	cout << "  Internal split weights: " << calcWeight() - calcTrivialWeight() << endl;
391 	cout << "  Trivial split weights : " << calcTrivialWeight() << endl;
392 
393 	if (params.pd_proportion == 0.0) {
394 
395 		if (isBudgetConstraint()) {
396 			// fix the budget and min_budget first
397 			if (params.budget < 0) params.budget = pda->budget;
398 			if (verbose_mode >= VB_DEBUG) {
399 				pda->Report(cout);
400 			}
401 			cout << "Budget constraint with budget = " << params.budget << " ..." << endl;
402 			if (params.min_budget < 0)
403 				params.min_budget = pda->min_budget;
404 			if (params.min_budget < 0) params.min_budget = params.budget;
405 
406 			// resize the taxa_set
407 			int max_budget = calcMaxBudget();
408 			if (params.budget > max_budget) {
409 				cout << "Only maximum budget of " << max_budget << " required, truncating to that value..." << endl;
410 				params.budget = max_budget;
411 				if (params.min_budget > params.budget)
412 					params.min_budget = params.budget;
413 			}
414 
415 		} else	{
416 			int min_accepted = !isPDArea() + 1;
417 			if (params.sub_size <= 0) params.sub_size = pda->sub_size;
418 			if (!isPDArea()) {
419 				if (params.sub_size < 2 || params.sub_size > getNTaxa()) {
420 					ostringstream str;
421 					str <<"k must be between 2 and " << getNTaxa()-params.is_rooted;
422 					outError(str.str());
423 				}
424 			} else if (params.sub_size < 1 || params.sub_size > sets->getNSets()) {
425 					ostringstream str;
426 					str << "k must be between 1 and " << sets->getNSets();
427 					outError(str.str());
428 				}
429 			if (params.min_size < min_accepted) params.min_size = params.sub_size;
430 		}
431 	}
432 }
433 
printLPVersion(bool gurobi_format)434 void printLPVersion(bool gurobi_format) {
435 	if (gurobi_format)
436 		cout << "Using GUROBI" << endl;
437 	else {
438 		//int lp_majorversion, lp_minorversion, lp_release, lp_build;
439 		//lp_solve_version_info(&lp_majorversion, &lp_minorversion, &lp_release, &lp_build);
440 		//cout << "Using LP_SOLVE " << lp_majorversion << "." << lp_minorversion << "." << lp_release << "." << lp_build << endl;
441 	}
442 }
443 
findPD(Params & params,vector<SplitSet> & taxa_set,vector<int> & taxa_order)444 void PDNetwork::findPD(Params &params, vector<SplitSet> &taxa_set, vector<int> &taxa_order) {
445 
446 	// call the entering function
447 	enterFindPD(params);
448 
449 	int ntaxa = getNTaxa();
450 	int nsplits = getNSplits();
451 	Split curset(ntaxa, 0.0);
452 	IntList rem_splits;
453 
454 	for (int i = 0; i < nsplits; i++)
455 		rem_splits.push_back(i);
456 	IntList::iterator rem_it = rem_splits.end();
457 
458 	params.detected_mode = EXHAUSTIVE;
459 
460 
461 	if (isPDArea()) {
462 		params.detected_mode = LINEAR_PROGRAMMING;
463 		printLPVersion(params.gurobi_format);
464 		cout << "Optimizing PD over " << sets->getNSets() << " areas..." << endl;
465 		cout << "Linear programming on general split network..." << endl;
466 		findPDArea_LP(params, taxa_set);
467 	} else if (params.run_mode == GREEDY) {
468 		// greedy search, not ensure to give the optimal sets!
469 		cout << "Start greedy search..." << endl;
470 		greedyPD(params.sub_size, curset, taxa_order);
471 		localSearchPD(params.sub_size, curset, taxa_order);
472 		taxa_set.resize(1);
473 		taxa_set[0].push_back(new Split(curset));
474 	} else if (params.run_mode != EXHAUSTIVE) {
475 		params.detected_mode = LINEAR_PROGRAMMING;
476 		printLPVersion(params.gurobi_format);
477 		cout << "Linear programming on general split network..." << endl;
478 		findPD_LP(params, taxa_set);
479 	}
480 	else if (isBudgetConstraint()) {
481 		// exhaustive search by the order
482 		cout << endl << "Start exhaustive search..." << endl;
483 		taxa_set.resize(1);
484 		taxa_set[0].push_back(new Split(ntaxa, 0.0));
485 		exhaustPDBudget(params.budget, -1, curset, params.find_all, taxa_set[0], taxa_order, rem_splits, rem_it);
486 	} else	{
487 		// exhaustive search by the order
488 		cout << endl << "Start exhaustive search..." << endl;
489 		taxa_set.resize(1);
490 		taxa_set[0].push_back(new Split(ntaxa, 0.0));
491 		exhaustPD2(params.sub_size, -1, curset, params.find_all, taxa_set[0], taxa_order, rem_splits, rem_it);
492 	}
493 
494 	// call the leaving function
495 	leaveFindPD(taxa_set);
496 }
497 
leaveFindPD(vector<SplitSet> & taxa_set)498 void PDNetwork::leaveFindPD(vector<SplitSet> &taxa_set) {
499 	// subtract the weights from the extra_pd
500 	if (extra_pd > 0)
501 		for (vector<SplitSet>::iterator it = taxa_set.begin(); it != taxa_set.end(); it++)
502 			for (SplitSet::iterator it2 = (*it).begin(); it2 != (*it).end(); it2++)
503 				(*it2)->weight -= extra_pd;
504 	if (min_pd)
505 		for (vector<SplitSet>::iterator it = taxa_set.begin(); it != taxa_set.end(); it++)
506 			for (SplitSet::iterator it2 = (*it).begin(); it2 != (*it).end(); it2++)
507 				(*it2)->weight = -(*it2)->weight;
508 }
509 
510 
511 /**
512 	exhaustive search VERSION 2 for maximal phylogenetic diversity of a given size
513 	@param subsize the subset size
514 	@param best_set (OUT) the set of taxa in the maximal PD set
515 	@param cur_tax current taxon
516 	@param curset current set
517 	@param taxa_order (OUT) order of inserted taxa
518 	@param rem_splits (IN) remaining splits
519 	@return the PD score of the maximal set
520 */
exhaustPD2(int subsize,int cur_tax,Split & curset,bool find_all,SplitSet & best_set,vector<int> & taxa_order,IntList & rem_splits,IntList::iterator & rem_it)521 double PDNetwork::exhaustPD2(int subsize, int cur_tax, Split &curset,
522 	bool find_all,SplitSet &best_set, vector<int> &taxa_order,
523 	IntList &rem_splits, IntList::iterator &rem_it )
524 {
525 	int ntaxa = getNTaxa();
526 	double saved_score = curset.weight;
527 	for (int tax = cur_tax+1; tax <= ntaxa - subsize; tax ++) {
528 		curset.addTaxon(taxa_order[tax]);
529 		IntList::iterator saved_it = rem_it;
530 		curset.weight += calcRaisedWeight(curset, rem_splits, rem_it);
531 		if (subsize > 1)
532 			exhaustPD2(subsize-1, tax, curset, find_all, best_set, taxa_order, rem_splits, rem_it);
533 		else {
534 			if (curset.weight >= best_set[0]->weight) {
535 				updateSplitVector(curset, best_set);
536 				//curset.report(cout);
537 			}
538 			//curset.report(cout);
539 		}
540 		curset.removeTaxon(taxa_order[tax]);
541 		curset.weight = saved_score;
542 		rem_it = saved_it;
543 		//restoreSplit(subsize, rem_splits, out_splits);
544 	}
545 	return best_set[0]->weight;
546 }
547 
548 
549 
exhaustPDBudget(int cur_budget,int cur_tax,Split & curset,bool find_all,SplitSet & best_set,vector<int> & taxa_order,IntList & rem_splits,IntList::iterator & rem_it)550 double PDNetwork::exhaustPDBudget(int cur_budget, int cur_tax, Split &curset,
551 	bool find_all,SplitSet &best_set, vector<int> &taxa_order,
552 	IntList &rem_splits, IntList::iterator &rem_it )
553 {
554 	int ntaxa = getNTaxa();
555 	double saved_score = curset.weight;
556 	for (int tax = cur_tax+1; tax < ntaxa; tax ++)
557 	if (pda->costs[taxa_order[tax]] <= cur_budget)
558 	{
559 		curset.addTaxon(taxa_order[tax]);
560 		IntList::iterator saved_it = rem_it;
561 
562 		curset.weight += calcRaisedWeight(curset, rem_splits, rem_it);
563 		if (curset.weight >= best_set[0]->weight) {
564 			updateSplitVector(curset, best_set);
565 			//curset.report(cout);
566 		}
567 
568 		if (tax < ntaxa-1)
569 			exhaustPDBudget(cur_budget - pda->costs[taxa_order[tax]], tax,
570 				curset, find_all, best_set, taxa_order, rem_splits, rem_it);
571 
572 			//curset.report(cout);
573 		curset.removeTaxon(taxa_order[tax]);
574 		curset.weight = saved_score;
575 		rem_it = saved_it;
576 		//restoreSplit(subsize, rem_splits, out_splits);
577 	}
578 	return best_set[0]->weight;
579 }
580 
581 
582 /********************************************************
583 	GREEDY SEARCH!
584 ********************************************************/
585 
586 /**
587 	greedy algorithm for phylogenetic diversity of a given size
588 	@param subsize the subset size
589 	@param taxa_set (OUT) the set of taxa in the PD-set
590 	@return the PD score of the maximal set, also returned in taxa_set.weight
591 */
greedyPD(int subsize,Split & taxa_set,vector<int> & taxa_order)592 double PDNetwork::greedyPD(int subsize, Split &taxa_set, vector<int> &taxa_order) {
593 	int ntaxa = getNTaxa();
594 	taxa_set.setNTaxa(ntaxa);
595 	taxa_set.weight = 0;
596 	taxa_order.clear();
597 	taxa_order.reserve(ntaxa);
598 
599 	int besti, bestj, i, j;
600 
601 	// start from the PD-2 set
602 	for (i = 0; i < ntaxa - 1; i++)
603 		for (j = 0; j < ntaxa; j++) {
604 			Split curset;
605 			curset.setNTaxa(ntaxa);
606 			curset.addTaxon(i);
607 			curset.addTaxon(j);
608 			curset.weight = calcWeight(curset);
609 			if (curset.weight > taxa_set.weight) {
610 				taxa_set = curset;
611 				besti = i;
612 				bestj = j;
613 			}
614 		}
615 
616 	//taxa_set.report(cout);
617 	taxa_order.push_back(besti);
618 	taxa_order.push_back(bestj);
619 
620 	for (int step = 2; step < subsize; step++) {
621 		Split pdk_set = taxa_set;
622 		besti = -1;
623 		for (i = 0; i < ntaxa; i++)
624 		if (!pdk_set.containTaxon(i)) {
625 			Split curset;
626 			curset.setNTaxa(ntaxa);
627 			curset = pdk_set;
628 			curset.addTaxon(i);
629 			curset.weight = calcWeight(curset);
630 			if (curset.weight > taxa_set.weight || besti == -1) {
631 				taxa_set = curset;
632 				besti = i;
633 			}
634 		}
635 		//taxa_set.report(cout);
636 		taxa_order.push_back(besti);
637 	}
638 	return taxa_set.getWeight();
639 }
640 
641 
642 /**
643 	testing algorithm for phylogenetic diversity of a given size
644 	@param subsize the subset size
645 	@param taxa_set (OUT) the set of taxa in the PD-set
646 	@return the PD score of the maximal set, also returned in taxa_set.weight
647 */
localSearchPD(int subsize,Split & taxa_set,vector<int> & taxa_order)648 double PDNetwork::localSearchPD(int subsize, Split &taxa_set, vector<int> &taxa_order) {
649 	int ntaxa = getNTaxa();
650 	//int nsplits = getNSplits();
651 	int i;
652 	taxa_set.setNTaxa(ntaxa);
653 	for (i = 0; i < subsize; i++)
654 		taxa_set.addTaxon(taxa_order[i]);
655 	taxa_set.weight = calcWeight(taxa_set);
656 	taxa_set.report(cout);
657 	bool stop;
658 	do {
659 		stop = true;
660 		for (i = 0; i < ntaxa; i++) if (taxa_set.containTaxon(i)) {
661 			for (int j = 0; j < ntaxa; j++) if (!taxa_set.containTaxon(j))
662 			{
663 				taxa_set.addTaxon(j);
664 				taxa_set.removeTaxon(i);
665 				double new_w = calcWeight(taxa_set);
666 				if (new_w > taxa_set.weight) {
667 					taxa_set.weight = new_w;
668 					stop = false;
669 					taxa_set.report(cout);
670 					break;
671 				}
672 				taxa_set.removeTaxon(j);
673 				taxa_set.addTaxon(i);
674 			}
675 			if (!stop) break;
676 		}
677 	} while (!stop);
678 	return taxa_set.getWeight();
679 }
680 
681 
calcPDGain(vector<SplitSet> & pd_set,mmatrix (double)& delta)682 void PDNetwork::calcPDGain(vector<SplitSet> &pd_set, mmatrix(double) &delta) {
683 	vector<SplitSet>::iterator it;
684 	int ntaxa = pd_set.front().front()->getNTaxa();
685 	delta.resize(pd_set.size());
686 	int cnt = 0;
687 	for (cnt = 0; cnt < delta.size(); cnt++)
688 		delta[cnt].resize(ntaxa, 0);
689 
690 
691 
692 	for (it = pd_set.begin(), cnt = 0; it != pd_set.end(); it++, cnt++) {
693 		ASSERT(!(*it).empty());
694 		// take only the first split for calculation
695 		Split *sp = (*it).front();
696 		for (int tax = 0; tax < ntaxa; tax++)
697 			if (!sp->containTaxon(tax)) {
698 				sp->addTaxon(tax);
699 				delta[cnt][tax] = calcWeight(*sp) - sp->weight;
700 				sp->removeTaxon(tax);
701 			}
702 	}
703 }
704 
calcPDEndemism(SplitSet & area_set,DoubleVector & pd_endem)705 void PDNetwork::calcPDEndemism(SplitSet &area_set, DoubleVector &pd_endem) {
706 	SplitSet::iterator it_s;
707 
708 	// make union of all id_sets
709 	Split id_union(getNTaxa());
710 	for (it_s = area_set.begin(); it_s != area_set.end(); it_s++)
711 		id_union += *(*it_s);
712 
713 	// calculate PD of union
714 	calcPD(id_union);
715 
716 	// now calculate PD endemism
717 	pd_endem.clear();
718 	for (it_s = area_set.begin(); it_s != area_set.end(); it_s++) {
719 		// make union of all other set
720 		Split id_other(getNTaxa());
721 		for (SplitSet::iterator it_s2 = area_set.begin(); it_s2 != area_set.end(); it_s2++)
722 			if (it_s2 != it_s) id_other += *(*it_s2);
723 		// calculate PD of all other sets
724 		calcPD(id_other);
725 
726 		// calc PD endemism
727 		pd_endem.push_back(id_union.weight - id_other.weight);
728 	}
729 }
730 
calcPDComplementarity(SplitSet & area_set,char * area_names,vector<string> & all_names,DoubleVector & pd_comp)731 void PDNetwork::calcPDComplementarity(SplitSet &area_set, char *area_names,
732 	vector<string> &all_names, DoubleVector &pd_comp) {
733 
734 	set<string> given_areas;
735 
736 	parseAreaName(area_names, given_areas);
737 
738 /*
739 	for (set<string>::iterator it = given_areas.begin(); it != given_areas.end(); it++)
740 		cout << (*it) << "!";
741 	cout << endl;
742 */
743 	SplitSet::iterator it_s;
744 	vector<string>::iterator it_n;
745 
746 	Split given_id(getNTaxa());
747 
748 	// convert taxa set to id set
749 	for (it_s = area_set.begin(), it_n = all_names.begin(); it_s != area_set.end(); it_s++, it_n++) {
750 		if (given_areas.find(*it_n) != given_areas.end())
751 			given_id += *(*it_s);
752 	}
753 
754 	if (given_id.countTaxa() == 0)
755 		outError("Complementary area name(s) not correct");
756 
757 	calcPD(given_id);
758 
759 	// now calculate PD complementarity
760 	pd_comp.clear();
761 	for (it_s = area_set.begin(); it_s != area_set.end(); it_s++) {
762 		// make union the two sets
763 		Split id_both(*(*it_s));
764 		id_both += given_id;
765 		// calculate PD of both sets
766 		calcPD(id_both);
767 		// calc PD complementarity
768 		pd_comp.push_back(id_both.weight - given_id.weight);
769 	}
770 
771 }
772 
transformLP2(Params & params,const char * outfile,int total_size,bool make_bin)773 void PDNetwork::transformLP2(Params &params, const char *outfile, int total_size, bool make_bin) {
774 	Split included_tax(getNTaxa());
775 	IntVector::iterator it2;
776 	for (it2 = initialset.begin(); it2 != initialset.end(); it2++)
777 		included_tax.addTaxon(*it2);
778 	try {
779 		ofstream out;
780 		out.exceptions(ios::failbit | ios::badbit);
781 		out.open(outfile);
782 		vector<int> y_value;
783 		checkYValue(total_size, y_value);
784 
785 		lpObjectiveMaxSD(out, params, y_value, total_size);
786 		lpSplitConstraint_TS(out, params, y_value, total_size);
787 		lpK_BudgetConstraint(out, params, total_size);
788 		lpVariableBound(out, params, included_tax, y_value);
789 		if (make_bin)
790 			lpVariableBinary(out, params, included_tax);
791 
792 		out.close();
793 		//cout << "Transformed LP problem printed to " << outfile << endl;
794 	} catch (ios::failure) {
795 		outError(ERR_WRITE_OUTPUT, outfile);
796 	}
797 }
798 
799 //Olga:ECOpd split system
transformEcoLP(Params & params,const char * outfile,int total_size)800 void PDNetwork::transformEcoLP(Params &params, const char *outfile, int total_size) {
801 	try {
802 		ofstream out;
803 		out.exceptions(ios::failbit | ios::badbit);
804 		out.open(outfile);
805 		vector<int> y_value;
806 		y_value.resize(getNSplits(), -1);
807 		lpObjectiveMaxSD(out, params, y_value, total_size);
808 		lpSplitConstraint_TS(out, params, y_value, total_size);
809 		out.close();
810 
811 	} catch (ios::failure) {
812 		outError(ERR_WRITE_OUTPUT, outfile);
813 	}
814 }
815 
findPD_LP(Params & params,vector<SplitSet> & taxa_set)816 void PDNetwork::findPD_LP(Params &params, vector<SplitSet> &taxa_set) {
817 	if (params.find_all)
818 		outError("Current linear programming does not support multiple optimal sets!");
819 
820 	string ofile = params.out_prefix;
821 	ofile += ".lp";
822 	double score;
823 	int lp_ret, i, ntaxa = getNTaxa();
824 	int k, min_k, max_k, step_k, index;
825 
826 	double *variables = new double[ntaxa];
827 
828 	if (isBudgetConstraint()) { // non-budget case
829 		min_k = params.min_budget;
830 		max_k = params.budget;
831 		step_k = params.step_budget;
832 	} else {
833 		min_k = params.min_size;
834 		max_k = params.sub_size;
835 		step_k = params.step_size;
836 	}
837 	taxa_set.resize((max_k - min_k)/step_k + 1);
838 
839 	// now construction the optimal PD sets
840 	if (isBudgetConstraint())
841 		cout << "running budget = ";
842 	else
843 		cout << "running k = ";
844 	for (k = min_k; k <= max_k; k += step_k) {
845 		index = (k - min_k) / step_k;
846 		if (!params.binary_programming) {
847 			transformLP2(params, ofile.c_str(), k, false);
848 			cout << " " << k;
849 			cout.flush();
850 			if (params.gurobi_format)
851 				lp_ret = gurobi_solve((char*)ofile.c_str(), ntaxa, &score, variables, verbose_mode, params.gurobi_threads);
852 			else
853 				lp_ret = lp_solve((char*)ofile.c_str(), ntaxa, &score, variables, verbose_mode);
854 		} else lp_ret = 7;
855 		if (lp_ret != 0 && lp_ret != 7)
856 			outError("Something went wrong with LP solver!");
857 		if (lp_ret == 7) { // fail with non-binary case, do again with strict binary
858 			if (params.binary_programming)
859 				transformLP2(params, ofile.c_str(), k, true);
860 			else
861 				lpVariableBinary(ofile.c_str(), params, initialset);
862 			cout << " " << k << "(bin)";
863 			cout.flush();
864 			if (params.gurobi_format)
865 				lp_ret = gurobi_solve((char*)ofile.c_str(), ntaxa, &score, variables, verbose_mode, params.gurobi_threads);
866 			else
867 				lp_ret = lp_solve((char*)ofile.c_str(), ntaxa, &score, variables, verbose_mode);
868 			if (lp_ret != 0) // check error again without allowing non-binary
869 				outError("Something went wrong with LP solver!");
870 		}
871 
872 		Split *pd_set = new Split(ntaxa, score);
873 		for (i = 0; i < ntaxa; i++)
874 			if (1.0 - variables[i] < tolerance) {
875 				//pd_set->addTaxon(taxa_order[i]);
876 				pd_set->addTaxon(i);
877 			}
878 		calcPD(*pd_set);
879 		taxa_set[index].push_back(pd_set);
880 	}
881 	cout << endl;
882 	delete [] variables;
883 }
884 
transformLP_Area2(Params & params,const char * outfile,int total_size,bool make_bin)885 void PDNetwork::transformLP_Area2(Params &params, const char *outfile, int total_size, bool make_bin) {
886 	int nareas = getNAreas();
887 	Split included_area(nareas);
888 	IntVector::iterator it2;
889 	for (it2 = initialareas.begin(); it2 != initialareas.end(); it2++)
890 		included_area.addTaxon(*it2);
891 	try {
892 		ofstream out;
893 		out.exceptions(ios::failbit | ios::badbit);
894 		out.open(outfile);
895 		vector<int> y_value, count1, count2;
896 		checkYValue_Area(total_size, y_value, count1, count2);
897 
898 		lpObjectiveMaxSD(out, params, y_value, total_size);
899 		lpSplitConstraint_RS(out, params, y_value, count1, count2, total_size);
900 		lpInitialArea(out, params);
901 		lpK_BudgetConstraint(out, params, total_size);
902 		lpBoundaryConstraint(out, params);
903 		lpVariableBound(out, params, included_area, y_value);
904 		if (make_bin)
905 			lpVariableBinary(out, params, included_area);
906 
907 		out.close();
908 		//cout << "Transformed LP problem printed to " << outfile << endl;
909 	} catch (ios::failure) {
910 		outError(ERR_WRITE_OUTPUT, outfile);
911 	}
912 
913 }
914 
transformMinK_Area2(Params & params,const char * outfile,double pd_proportion,bool make_bin)915 void PDNetwork::transformMinK_Area2(Params &params, const char *outfile, double pd_proportion, bool make_bin) {
916 	int nareas = getNAreas();
917 	Split included_area(nareas);
918 	IntVector::iterator it2;
919 	for (it2 = initialareas.begin(); it2 != initialareas.end(); it2++)
920 		included_area.addTaxon(*it2);
921 	try {
922 		ofstream out;
923 		out.exceptions(ios::failbit | ios::badbit);
924 		out.open(outfile);
925 		vector<int> y_value, count1, count2;
926 		checkYValue_Area(0, y_value, count1, count2);
927 
928 		lpObjectiveMinK(out, params);
929 		lpMinSDConstraint(out, params, y_value, pd_proportion);
930 		lpSplitConstraint_RS(out, params, y_value, count1, count2, 0);
931 		lpInitialArea(out, params);
932 		lpBoundaryConstraint(out, params);
933 		lpVariableBound(out, params, included_area, y_value);
934 		if (make_bin)
935 			lpVariableBinary(out, params, included_area);
936 
937 		out.close();
938 		//cout << "Transformed LP problem printed to " << outfile << endl;
939 	} catch (ios::failure) {
940 		outError(ERR_WRITE_OUTPUT, outfile);
941 	}
942 }
943 
944 
findMinKArea_LP(Params & params,const char * filename,double pd_proportion,Split & area)945 double PDNetwork::findMinKArea_LP(Params &params, const char* filename, double pd_proportion, Split &area) {
946 	int nareas = area_taxa.size();
947 	double *variables = new double[nareas];
948 	double score;
949 	int lp_ret, i;
950 
951 
952 	if (!params.binary_programming) {
953 		cout << " " << pd_proportion;
954 		cout.flush();
955 		transformMinK_Area2(params, filename, pd_proportion, false);
956 		if (params.gurobi_format)
957 			lp_ret = gurobi_solve((char*)filename, nareas, &score, variables, verbose_mode, params.gurobi_threads);
958 		else
959 			lp_ret = lp_solve((char*)filename, nareas, &score, variables, verbose_mode);
960 	} else lp_ret = 7;
961 	if (lp_ret != 0 && lp_ret != 7)
962 		outError("Something went wrong with LP solver!");
963 	if (lp_ret == 7) { // fail with non-binary case, do again with strict binary
964 		cout << " " << pd_proportion << "(bin)";
965 		cout.flush();
966 		if (params.binary_programming)
967 			transformMinK_Area2(params, filename, pd_proportion, true);
968 		else
969 			lpVariableBinary(filename, params, initialareas);
970 		if (params.gurobi_format)
971 			lp_ret = gurobi_solve((char*)filename, nareas, &score, variables, verbose_mode, params.gurobi_threads);
972 		else
973 			lp_ret = lp_solve((char*)filename, nareas, &score, variables, verbose_mode);
974 		if (lp_ret != 0) // check error again without allowing non-binary
975 			outError("Something went wrong with LP solver!");
976 	}
977 	area.setNTaxa(nareas);
978 	for (i = 0; i < nareas; i++)
979 		if (1.0 - variables[i] < tolerance) {
980 			//pd_set->addTaxon(taxa_order[i]);
981 			area.addTaxon(i);
982 		}
983 	calcPDArea(area);
984 	cout << " score: " << area.weight;
985 	double budget_k;
986 	if (isBudgetConstraint()) {
987 		budget_k = calcCost(area);
988 	} else {
989 		budget_k = area.countTaxa();
990 	}
991 	delete [] variables;
992 	return budget_k;
993 }
994 
computeFeasibleBudget(Params & params,IntVector & ok_budget)995 void PDNetwork::computeFeasibleBudget(Params &params, IntVector &ok_budget) {
996 	if (!isBudgetConstraint()) {
997 		ok_budget.resize(params.sub_size+1, 1);
998 		return;
999 	}
1000 	cout << "Computing feasible budget values..." << endl;
1001 	IntVector cost_present;
1002 	cost_present.resize((*max_element(pda->costs.begin(), pda->costs.end())) + 1, 0);
1003 	int i, j, num_cost = 0;
1004 	DoubleVector::iterator it;
1005 	for (it = pda->costs.begin(); it != pda->costs.end(); it++) {
1006 		if ((*it) != round(*it)) {
1007 			outError("Non integer cost detected.");
1008 		}
1009 		if ((*it) != 0 && !(cost_present[*it])) {
1010 			num_cost++;
1011 			cost_present[*it] = 1;
1012 		}
1013 	}
1014 	if (num_cost == 0) outError("All costs are zero! Please check the input budget file.");
1015 	if (cost_present[1]) {
1016 		// if cost of 1 detected, all budget values are feasible
1017 		ok_budget.resize(params.budget+1, 1);
1018 		return;
1019 	}
1020 	IntVector unique_cost;
1021 	IntVector::iterator it2;
1022 	for (i = 0, it2 = cost_present.begin(); it2 != cost_present.end(); it2++, i++)
1023 		if (*it2) unique_cost.push_back(i);
1024 	ASSERT(unique_cost.size() == num_cost);
1025 
1026 	ok_budget.resize(params.budget+1, 0);
1027 	// initialize all entry with corresponding cost
1028 	for (it2 = unique_cost.begin(); it2 != unique_cost.end(); it2++)
1029 	if (*it2 < ok_budget.size())
1030 		ok_budget[*it2] = 1;
1031 	// now use dynamic programming to find feasible budgets
1032 
1033 	for (i = 0; i <= params.budget; i++)
1034 		for (it2 = unique_cost.begin(); it2 != unique_cost.end(); it2++) {
1035 			j = i - (*it2);
1036 			if (j < 0) continue;
1037 			if (ok_budget[j]) {
1038 				ok_budget[i] = 1;
1039 				break;
1040 			}
1041 		}
1042 
1043 
1044 	if (verbose_mode < VB_MED)
1045 		return;
1046 	cout << "Feasible budgets:";
1047 	for (i = 0; i < ok_budget.size(); i++)
1048 		if (ok_budget[i]) cout << " " << i;
1049 	cout << endl;
1050 }
1051 
1052 
printOutputSetScore(Params & params,vector<SplitSet> & pd_set)1053 void PDNetwork::printOutputSetScore(Params &params, vector<SplitSet> &pd_set) {
1054 	char filename[300];
1055 	//int c_old = -1;
1056 	int c_num = 0, i;
1057 	//double w_old = -1.0;
1058 	char scorename[300];
1059 	ofstream scoreout;
1060 	ofstream out;
1061 	if (params.nr_output == 1) {
1062 		if (params.run_mode == PD_USER_SET || !isPDArea()) {
1063 			sprintf(filename, "%s.pdtaxa", params.out_prefix);
1064 			cout << "All taxa list(s) printed to " << filename << endl;
1065 		} else {
1066 			sprintf(filename, "%s.pdarea", params.out_prefix);
1067 			cout << "All area list(s) printed to " << filename << endl;
1068 		}
1069 		out.open(filename);
1070 		sprintf(scorename, "%s.score", params.out_prefix);
1071 		scoreout.open(scorename);
1072 	}
1073 	double total_weight = calcWeight();
1074 
1075 	for (vector<SplitSet>::iterator it = pd_set.begin(); it != pd_set.end(); it++) {
1076 		// ignore, if get the same PD sets again
1077 		//if (it != pd_set.begin() && it->getWeight() == (it-1)->getWeight() && it->size() == (it-1)->size())
1078 			//continue;
1079 		if ((*it).empty()) continue;
1080 		c_num = 0;
1081 		if (params.nr_output == 1)
1082 			scoreout << (*it)[0]->countTaxa() << "  " << (it)->getWeight() << endl;
1083 
1084 		for (SplitSet::iterator it2 = (*it).begin(); it2 != (*it).end(); it2++, c_num++ ){
1085 			Split *this_set = *it2;
1086 			int count = this_set->countTaxa();
1087 			//if (count == 0) continue;
1088 
1089 			//if (count != c_old) {
1090 			if (c_num == 0) {
1091 				//c_num = 0;
1092 				sprintf(filename, "%s.%d.pdtaxa", params.out_prefix, count);
1093 			}
1094 			else {
1095 				//c_num++;
1096 				sprintf(filename, "%s.%d.pdtaxa.%d", params.out_prefix, count, c_num);
1097 			}
1098 			//if (fabs(w_old - this_set->getWeight()) > 1e-5 || (c_old != count))
1099 	//			if (params.nr_output == 1)
1100 		//			scoreout << count << "  " << this_set->getWeight() << endl;
1101 			//w_old = this_set->getWeight();
1102 
1103 			//c_old = count;
1104 			if (params.nr_output > 10) {
1105 				out.open(filename);
1106 				if (params.run_mode == PD_USER_SET || !isPDArea()) {
1107 					for (i = 0; i < getNTaxa(); i++)
1108 						if (this_set->containTaxon(i))
1109 							out << getTaxa()->GetTaxonLabel(i) << endl;
1110 				} else {
1111 					for (i = 0; i < getSetsBlock()->getNSets(); i++)
1112 						if (this_set->containTaxon(i))
1113 							out << getSetsBlock()->getSet(i)->name << endl;
1114 				}
1115 				out.close();
1116 				//cout << "Taxa list printed to " << filename << endl;
1117 			} else if (params.nr_output == 1) {
1118 				out << count << "  " << this_set->getWeight() << " " <<
1119 					this_set->getWeight()  / total_weight << " " <<
1120 					calcCost(*this_set) << " " << computeBoundary(*this_set) << " " <<
1121 					params.boundary_modifier << endl;
1122 
1123 				if (params.run_mode == PD_USER_SET || !isPDArea()) {
1124 					for (i = 0; i < getNTaxa(); i++)
1125 						if (this_set->containTaxon(i))
1126 							out << getTaxa()->GetTaxonLabel(i) << endl;
1127 				} else {
1128 					for (i = 0; i < getSetsBlock()->getNSets(); i++)
1129 						if (this_set->containTaxon(i))
1130 							out << getSetsBlock()->getSet(i)->name << endl;
1131 				}
1132 			}
1133 		}
1134 	}
1135 
1136 	if (params.nr_output == 1) {
1137 		out.close();
1138 		scoreout.close();
1139 		//cout << "PD scores printed to " << scorename << endl;
1140 	}
1141 }
1142 
1143 
findPDArea_LP(Params & params,vector<SplitSet> & areas_set)1144 void PDNetwork::findPDArea_LP(Params &params, vector<SplitSet> &areas_set) {
1145 	if (params.find_all)
1146 		outError("Current linear programming does not support multiple optimal sets!");
1147 	PDRelatedMeasures pd_more;
1148 	// get the taxa in the areas, only if EMPTY!
1149 	Split *area_coverage = new Split();
1150 	int num_area_coverage = params.sub_size;
1151 	if (area_taxa.empty()) {
1152 		computePD(params, area_taxa, pd_more);
1153 		if (params.root || params.is_rooted) {
1154 			ASSERT(!initialset.empty());
1155 			int root_id = initialset[0];
1156 			for (SplitSet::iterator it = area_taxa.begin(); it != area_taxa.end(); it++)
1157 				(*it)->addTaxon(root_id);
1158 		}
1159 		checkAreaCoverage();
1160 		num_area_coverage = findMinAreas(params, *area_coverage);
1161 		calcPDArea(*area_coverage);
1162 		cout << "We found ";
1163 		if (isBudgetConstraint())
1164 			cout << "a budget of " << num_area_coverage << " is enough";
1165 		else
1166 			cout << "a number of " << num_area_coverage << " areas are enough";
1167 		cout << " to cover all feasible taxa" << endl;
1168 		if (isBudgetConstraint()) {
1169 			if (params.budget > num_area_coverage) {
1170 				params.budget = num_area_coverage;
1171 				if (params.min_budget > params.budget)
1172 					params.min_budget = params.budget;
1173 				cout << "budget is therefore set to a maximum of " << num_area_coverage << endl;
1174 			}
1175 		} else
1176 		if (params.sub_size > num_area_coverage) {
1177 			params.sub_size = num_area_coverage;
1178 			if (params.min_size > params.sub_size)
1179 				params.min_size = params.sub_size;
1180 			cout << "k is therefore set to a maximum of " << num_area_coverage << endl;
1181 		}
1182 	}
1183 
1184 	string ofile = params.out_prefix;
1185 	ofile += ".lp";
1186 	double score;
1187 	int lp_ret, i;
1188 	int nareas = area_taxa.size();
1189 	int k, min_k, max_k, step_k, index;
1190 
1191 	if (params.pd_proportion == 1.0 && params.min_proportion == 0.0) {
1192 		if (area_coverage->empty()) num_area_coverage = findMinAreas(params, *area_coverage);
1193 		areas_set.resize(1);
1194 		areas_set[0].push_back(area_coverage);
1195 		if (isBudgetConstraint()) {
1196 			params.budget = params.min_budget = num_area_coverage;
1197 		} else {
1198 			params.sub_size = params.min_size = num_area_coverage;
1199 		}
1200 		return;
1201 	}
1202 
1203 
1204 	double *variables = new double[nareas];
1205 
1206 	// identifying minimum k/budget to conserve the proportion of SD
1207 	if (params.pd_proportion != 0.0) {
1208 		if (params.min_proportion == 0.0) params.min_proportion = params.pd_proportion;
1209 		cout << "running p = ";
1210 		double prop;
1211 		areas_set.resize(round((params.pd_proportion-params.min_proportion)/params.step_proportion) + 1);
1212 		for (prop = params.min_proportion, index = 0; prop <= params.pd_proportion + 1e-6; prop += params.step_proportion, index++) {
1213 			Split *area = new Split(nareas);
1214 			if (prop < 1.0)
1215 				findMinKArea_LP(params, ofile.c_str(), prop, *area);
1216 			else
1217 				*area = *area_coverage;
1218  			areas_set[index].push_back(area);
1219 		}
1220 /*		if (params.min_proportion != 0.0)
1221 			min_bk = findMinKArea_LP(params, ofile.c_str(), params.min_proportion);
1222 		if (isBudgetConstraint()) {
1223 			params.budget = bk;
1224 			params.min_budget = min_bk;
1225 		} else {
1226 			params.sub_size = bk;
1227 			params.min_size = min_bk;
1228 		}
1229 		cout << endl << (isBudgetConstraint() ? "budget" : "k") << " from " << min_bk << " to " << bk << endl;*/
1230 		cout << endl;
1231 		delete [] variables;
1232 		delete area_coverage;
1233 		return;
1234 	}
1235 
1236 	IntVector list_k;
1237 
1238 	if (isBudgetConstraint()) { // non-budget case
1239 		min_k = params.min_budget;
1240 		max_k = params.budget;
1241 		step_k = params.step_budget;
1242 	} else {
1243 		min_k = params.min_size;
1244 		max_k = params.sub_size;
1245 		step_k = params.step_size;
1246 	}
1247 	areas_set.resize((max_k - min_k)/step_k + 1);
1248 	computeFeasibleBudget(params, list_k);
1249 
1250 	time_t time_init;
1251 	time(&time_init);
1252 	// now construction the optimal PD sets
1253 	if (isBudgetConstraint())
1254 		cout << "running budget = ";
1255 	else
1256 		cout << "running k = ";
1257 	for (k = min_k; k <= max_k; k += step_k) {
1258 		if (!list_k[k]) continue;
1259 		index = (k - min_k) / step_k;
1260 		if (!params.binary_programming) {
1261 			cout << " " << k;
1262 			cout.flush();
1263 			transformLP_Area2(params, ofile.c_str(), k, false);
1264 			if (params.gurobi_format)
1265 				lp_ret = gurobi_solve((char*)ofile.c_str(), nareas, &score, variables, verbose_mode, params.gurobi_threads);
1266 			else
1267 				lp_ret = lp_solve((char*)ofile.c_str(), nareas, &score, variables, verbose_mode);
1268 		} else lp_ret = 7;
1269 
1270 		if (lp_ret != 0 && lp_ret != 7)
1271 			outError("Something went wrong with LP solver!");
1272 		if (lp_ret == 7) { // fail with non-binary case, do again with strict binary
1273 			cout << " " << k << "(bin)";
1274 			cout.flush();
1275 			if (params.binary_programming)
1276 				transformLP_Area2(params, ofile.c_str(), k, true);
1277 			else
1278 				lpVariableBinary(ofile.c_str(), params, initialareas);
1279 			if (params.gurobi_format)
1280 				lp_ret = gurobi_solve((char*)ofile.c_str(), nareas, &score, variables, verbose_mode, params.gurobi_threads);
1281 			else
1282 				lp_ret = lp_solve((char*)ofile.c_str(), nareas, &score, variables, verbose_mode);
1283 			if (lp_ret != 0) // check error again without allowing non-binary
1284 				outError("Something went wrong with LP solver!");
1285 		}
1286 
1287 		Split *area = new Split(nareas, score);
1288 		for (i = 0; i < nareas; i++)
1289 			if (1.0 - variables[i] < tolerance) {
1290 				//pd_set->addTaxon(taxa_order[i]);
1291 				area->addTaxon(i);
1292 			}
1293 		calcPDArea(*area);
1294 		areas_set[index].push_back(area);
1295 		time_t time_cur;
1296 		time(&time_cur);
1297 		if (difftime(time_cur, time_init) > 10) {
1298 			// write output if more than 10 seconds have elapsed
1299 			printOutputSetScore(params, areas_set);
1300 			PDRelatedMeasures pd_more; // just for called function, nothing
1301 			summarizeSplit(params, *this, areas_set, pd_more, false);
1302 			time_init = time_cur;
1303 		}
1304 	}
1305 	cout << endl;
1306 	delete [] variables;
1307 	delete area_coverage;
1308 }
1309 
1310 
isPDArea()1311 bool PDNetwork::isPDArea() {
1312 	return (sets->getNSets() > 0);
1313 }
1314 
calcPDArea(Split & area_id_set)1315 void PDNetwork::calcPDArea(Split &area_id_set) {
1316 	int ntaxa = getNTaxa();
1317 	int nareas = area_taxa.size();
1318 	Split sp(ntaxa);
1319 	for (int i = 0; i < nareas; i++)
1320 		if (area_id_set.containTaxon(i))
1321 			sp += *area_taxa[i];
1322 	calcPD(sp);
1323 	area_id_set.weight = sp.weight;
1324 }
1325 
isUniquelyCovered(int taxon,int & area)1326 bool PDNetwork::isUniquelyCovered(int taxon, int &area) {
1327 	area = -1;
1328 	for (int i = 0; i < getNAreas(); i++)
1329 		if (area_taxa[i]->containTaxon(taxon)) {
1330 			if (area < 0) area = i;	else return false;
1331 		}
1332 	return (area >= 0);
1333 }
1334 
1335 
transformLP_Area_Coverage(const char * outfile,Params & params,Split & included_area)1336 void PDNetwork::transformLP_Area_Coverage(const char *outfile, Params &params, Split &included_area) {
1337 	int ntaxa = getNTaxa();
1338 	int nareas = getNAreas();
1339 	int i, j;
1340 	IntVector::iterator it;
1341 	Split tax_cover(ntaxa);
1342 	for (it = initialareas.begin(); it != initialareas.end(); it++) {
1343 		tax_cover += *(area_taxa[*it]);
1344 		included_area.addTaxon(*it);
1345 	}
1346 	for (j = 0; j < ntaxa; j++) {
1347 		if (isUniquelyCovered(j, i)) {
1348 			if (verbose_mode >= VB_MED) {
1349 				cout << "Taxon " << taxa->GetTaxonLabel(j) << " is uniquely covered by " << sets->getSet(i)->name << endl;
1350 			}
1351 			included_area.addTaxon(i);
1352 			tax_cover.addTaxon(j);
1353 		}
1354 	}
1355 	try {
1356 		ofstream out;
1357 		out.exceptions(ios::failbit | ios::badbit);
1358 		out.open(outfile);
1359 		iterator spit;
1360 
1361 		lpObjectiveMinK(out, params);
1362 
1363 		// add constraint: every taxon should be covered by some area
1364 		for (j = 0; j < ntaxa; j++) {
1365 			if (tax_cover.containTaxon(j)) continue;
1366 			bool ok = false;
1367 			for (i = 0; i < nareas; i++)
1368 				if (area_taxa[i]->containTaxon(j)) {
1369 					out << " +x" << i;
1370 					ok = true;
1371 				}
1372 			if (!ok) continue;
1373 			out << " >= 1";
1374 			if (params.gurobi_format)
1375 				out << endl;
1376 			else
1377 				out << ";" << endl;
1378 		}
1379 		lpBoundaryConstraint(out, params);
1380 
1381 		// add bound for variable x
1382 		IntVector y_value;
1383 		lpVariableBound(out, params, included_area, y_value);
1384 		out.close();
1385 	} catch (ios::failure) {
1386 		outError(ERR_WRITE_OUTPUT, outfile);
1387 	}
1388 }
1389 
1390 
findMinAreas(Params & params,Split & area_id)1391 int PDNetwork::findMinAreas(Params &params, Split &area_id) {
1392 	string ofile = params.out_prefix;
1393 	ofile += ".lp";
1394 	int nareas = getNAreas();
1395 	int i;
1396 	double *variables = new double[nareas];
1397 	double score;
1398 	Split included_area(nareas);
1399 	transformLP_Area_Coverage(ofile.c_str(), params, included_area);
1400 	int lp_ret;
1401 	if (params.gurobi_format)
1402 		lp_ret = gurobi_solve((char*)ofile.c_str(), nareas, &score, variables, verbose_mode, params.gurobi_threads);
1403 	else
1404 		lp_ret = lp_solve((char*)ofile.c_str(), nareas, &score, variables, verbose_mode);
1405 
1406 	if (lp_ret != 0 && lp_ret != 7)
1407 		outError("Something went wrong with LP solver!");
1408 	if (lp_ret == 7) { // fail with non-binary case, do again with strict binary
1409 		lpVariableBinary(ofile.c_str(), params, included_area);
1410 
1411 		if (params.gurobi_format)
1412 			lp_ret = gurobi_solve((char*)ofile.c_str(), nareas, &score, variables, verbose_mode, params.gurobi_threads);
1413 		else
1414 			lp_ret = lp_solve((char*)ofile.c_str(), nareas, &score, variables, verbose_mode);
1415 		if (lp_ret != 0) // check error again without allowing non-binary
1416 			outError("Something went wrong with LP solver!");
1417 	}
1418 	area_id.setNTaxa(nareas);
1419 	int count = 0;
1420 	// for checking purpose
1421 	Split taxon_coverage(getNTaxa());
1422 
1423 	for (i = 0; i < nareas; i++)
1424 		if (1.0 - variables[i] < tolerance) {
1425 			//pd_set->addTaxon(taxa_order[i]);
1426 			area_id.addTaxon(i);
1427 			taxon_coverage += *(area_taxa[i]);
1428 			if (isBudgetConstraint())
1429 				count += pda->getCost(i);
1430 			else
1431 				count++;
1432 		}
1433 	ofile = params.out_prefix;
1434 	ofile += ".cover";
1435 	try {
1436 		ofstream out;
1437 		out.exceptions(ios::failbit | ios::badbit);
1438 		out.open(ofile.c_str());
1439 		out << area_id.countTaxa() << " " << count << " " << computeBoundary(area_id) << " " << params.boundary_modifier << endl;
1440 		for (i = 0; i < nareas; i++)
1441 			if (area_id.containTaxon(i))
1442 				out << sets->getSet(i)->name << endl;
1443 		out.close();
1444 		//cout << "Transformed LP problem printed to " << outfile << endl;
1445 	} catch (ios::failure) {
1446 		outError(ERR_WRITE_OUTPUT, ofile);
1447 	}
1448 
1449 
1450 	/*
1451 	if (taxon_coverage.countTaxa() != getNTaxa()) {
1452 		outError("Something wrong with LP in determining taxon coverage");
1453 	}*/
1454 	delete [] variables;
1455 	return (count);
1456 }
1457 
1458 
checkAreaCoverage()1459 bool PDNetwork::checkAreaCoverage() {
1460 	int ntaxa = getNTaxa();
1461 	Split tax_cover(ntaxa);
1462 	for (SplitSet::iterator it = area_taxa.begin(); it != area_taxa.end(); it++) {
1463 		tax_cover += *(*it);
1464 	}
1465 	if (tax_cover.countTaxa() == ntaxa) {
1466 		return true;
1467 	}
1468 
1469 	cout << "WARNING: some taxa are not covered by any area including: ";
1470 	for (int i = 0; i < ntaxa; i++)
1471 		if (!tax_cover.containTaxon(i)) cout << taxa->GetTaxonLabel(i) << " ";
1472 	cout << endl;
1473 	return false;
1474 }
1475 
1476 
1477 /***********************************************
1478 ***********************************************
1479 	LP-aided functions
1480 ***********************************************/
1481 
1482 
lpObjectiveMaxSD(ostream & out,Params & params,IntVector & y_value,int total_size)1483 void PDNetwork::lpObjectiveMaxSD(ostream &out, Params &params, IntVector &y_value, int total_size) {
1484 	//IntVector y_value, count1, count2;
1485 	iterator spit;
1486 	int i;
1487 	// define the objective function
1488 	if (params.gurobi_format)
1489 		out << "Maximize" << endl;
1490 	else
1491 		out << "max: ";
1492 
1493 	for (spit = begin(),i=0; spit != end(); spit++,i++)	{
1494 		if (y_value[i] < 0)
1495 			out << " +" << (*spit)->getWeight() << " y" << i;
1496 		else if (y_value[i] >= 2)
1497 			out << " +" << (*spit)->getWeight() << " x" << y_value[i] - 2;
1498 	}
1499 
1500 	if (params.gurobi_format)
1501 		out << endl << "Subject to" << endl;
1502 	else
1503 		out << ";" << endl;
1504 }
1505 
1506 ///// TODO FOR taxon selection
lpObjectiveMinK(ostream & out,Params & params)1507 void PDNetwork::lpObjectiveMinK(ostream &out, Params &params) {
1508 	iterator spit;
1509 	int i, j;
1510 	int nareas = area_taxa.size();
1511 
1512 	// define the objective function
1513 	if (params.gurobi_format)
1514 		out << "Minimize" << endl;
1515 	else
1516 		out << "min: ";
1517 
1518 	for (j = 0; j < nareas; j++) {
1519 		double coeff = (isBudgetConstraint()) ? getPdaBlock()->getCost(j) : 1.0;
1520 		if (areas_boundary) coeff += areas_boundary[j*nareas+j] * params.boundary_modifier;
1521 		out << ((j>0) ? " +" : "") << coeff << " x" << j;
1522 
1523 
1524 	}
1525 
1526 	if (areas_boundary && params.boundary_modifier != 0.0) {
1527 		if (params.quad_programming)
1528 			out << " + [";
1529 		for (i = 0; i < nareas-1; i++)
1530 		for (j = i+1; j < nareas; j++)
1531 		if (areas_boundary[i*nareas+j] > 0.0) {
1532 			double coeff = 2*areas_boundary[i*nareas+j] * params.boundary_modifier;
1533 			if (params.quad_programming)
1534 				out << " -" << coeff << " x" << i << " * x" << j;
1535 			else
1536 				out << " -" << coeff << " y" << i << "_" << j;
1537 		}
1538 		if (params.quad_programming)
1539 			out << " ] / 2";
1540 	}
1541 
1542 	if (params.gurobi_format)
1543 		out << endl << "Subject to" << endl;
1544 	else
1545 		out << ";" << endl;
1546 }
1547 
lpK_BudgetConstraint(ostream & out,Params & params,int total_size)1548 void PDNetwork::lpK_BudgetConstraint(ostream &out, Params &params, int total_size) {
1549 
1550 	int nvars;
1551 	int i, j;
1552 	if (isPDArea())
1553 		nvars = area_taxa.size();
1554 	else
1555 		nvars = getNTaxa();
1556 
1557 	for (j = 0; j < nvars; j++) {
1558 		double coeff = (isBudgetConstraint()) ? getPdaBlock()->getCost(j) : 1.0;
1559 		if (areas_boundary) coeff += areas_boundary[j*nvars+j] * params.boundary_modifier;
1560 		out << ((j>0) ? " +" : "") << coeff << " x" << j;
1561 
1562 	}
1563 
1564 	if (areas_boundary && params.boundary_modifier != 0.0) {
1565 		for (i = 0; i < nvars-1; i++)
1566 		for (j = i+1; j < nvars; j++)
1567 		if (areas_boundary[i*nvars+j] > 0.0) {
1568 			double coeff = 2*areas_boundary[i*nvars+j] * params.boundary_modifier;
1569 				out << " -" << coeff << " y" << i << "_" << j;
1570 		}
1571 	}
1572 	out << " <= " << total_size;
1573 
1574 	// constraint for k-set or total budget
1575 	/*
1576 	if (isBudgetConstraint()) {
1577 		for (j = 0; j < nvars; j++) {
1578 			out << ((j==0)? "" : " +") << getPdaBlock()->getCost(j) << " x" << j;
1579 		}
1580 		out << " <= " << total_size;
1581 	} else {
1582 		for (j = 0; j < nvars; j++) {
1583 			out << ((j==0)? "" : " +") << "x" << j;
1584 		}
1585 		out  << " = " << total_size;
1586 	}*/
1587 	if (params.gurobi_format)
1588 		out << endl;
1589 	else
1590 		out << ";" << endl;
1591 }
1592 
lpBoundaryConstraint(ostream & out,Params & params)1593 void PDNetwork::lpBoundaryConstraint(ostream &out, Params &params) {
1594 	// constraint on the variable for the shared boundary between areas
1595 	if (!areas_boundary || params.boundary_modifier == 0.0)
1596 		return;
1597 	if (params.quad_programming) return;
1598 	int i, j;
1599 	int nareas = area_taxa.size();
1600 
1601 	for (i = 0; i < nareas-1; i++)
1602 		for (j = i+1; j < nareas; j++)
1603 			if (areas_boundary[i*nareas+j] > 0.0) {
1604 				out << "x" << i << " - y" << i << "_" << j << " >= 0";
1605 				if (params.gurobi_format)
1606 					out << endl;
1607 				else
1608 					out << ";" << endl;
1609 				out << "x" << j << " - y" << i << "_" << j << " >= 0";
1610 				if (params.gurobi_format)
1611 					out << endl;
1612 				else
1613 					out << ";" << endl;
1614 			}
1615 }
1616 
lpSplitConstraint_RS(ostream & out,Params & params,IntVector & y_value,IntVector & count1,IntVector & count2,int total_size)1617 void PDNetwork::lpSplitConstraint_RS(ostream &out, Params &params, IntVector &y_value, IntVector &count1, IntVector &count2, int total_size) {
1618 	iterator spit;
1619 	int i,j;
1620 	//int root_id = -1;
1621 	//if (params.root || params.is_rooted) root_id = initialset[0];
1622 	int nareas = area_taxa.size();
1623 
1624 
1625 	// adding the constraint for splits
1626 	for (spit = begin(),i=0; spit != end(); spit++,i++) {
1627 		if (y_value[i] >= 0) continue;
1628 		Split *sp = (*spit);
1629 
1630 		if (count1[i] < nareas && (isBudgetConstraint() || count1[i] <= nareas - total_size))
1631 		{
1632 			out << "y" << i;
1633 			if (!params.gurobi_format)
1634 				out << " <=";
1635 			for (j = 0; j < nareas; j++) {
1636 				if (sp->overlap(*area_taxa[j])) {
1637 					if (params.gurobi_format)
1638 						out << " -x" << j;
1639 					else
1640 						out << " +x" << j;
1641 				}
1642 			}
1643 			if (params.gurobi_format)
1644 				out << " <= 0" << endl;
1645 			else
1646 				out << ";" << endl;
1647 		}
1648 
1649 		if (count2[i] < nareas && (isBudgetConstraint() || count2[i] <= nareas - total_size))
1650 		{
1651 			sp->invert(); // scan the invert
1652 			out << "y" << i;
1653 			if (!params.gurobi_format)
1654 				out << " <=";
1655 			for (j = 0; j < nareas; j++) {
1656 				if (sp->overlap(*area_taxa[j])) {
1657 					if (params.gurobi_format)
1658 						out << " -x" << j;
1659 					else
1660 						out << " +x" << j;
1661 				}
1662 			}
1663 			if (params.gurobi_format)
1664 				out << " <= 0" << endl;
1665 			else
1666 				out << ";" << endl;
1667 			sp->invert(); // invert back to original
1668 		}
1669 	}
1670 }
1671 
lpSplitConstraint_TS(ostream & out,Params & params,IntVector & y_value,int total_size)1672 void PDNetwork::lpSplitConstraint_TS(ostream &out, Params &params, IntVector &y_value, int total_size) {
1673 	iterator spit;
1674 	int i,j;
1675 	int ntaxa = getNTaxa();
1676 	// adding the constraint for splits
1677 	for (spit = begin(),i=0; spit != end(); spit++,i++) {
1678 		if (y_value[i] >= 0) continue;
1679 
1680 		Split *sp = (*spit);
1681 		bool contain_initset = sp->containAny(initialset);
1682 
1683 		if (!contain_initset && (isBudgetConstraint() || sp->countTaxa() <= ntaxa - total_size)) {
1684 			out << "y" << i;
1685 			for (j = 0; j < ntaxa; j++)
1686 				if (sp->containTaxon(j))
1687 					out << " -x" << j;
1688 			out << " <= 0";
1689 			if (params.gurobi_format)
1690 				out << endl;
1691 			else
1692 				out << ";" << endl;
1693 		}
1694 		contain_initset = false;
1695 		if (initialset.size() > 0) {
1696 			sp->invert();
1697 			contain_initset =  sp->containAny(initialset);
1698 			sp->invert();
1699 		}
1700 		if (!contain_initset && (isBudgetConstraint() || sp->countTaxa() >= total_size)) {
1701 			out << "y" << i;
1702 			for (j = 0; j < ntaxa; j++)
1703 				if (!sp->containTaxon(j))
1704 					out << " -x" << j;
1705 			out << " <= 0";
1706 			if (params.gurobi_format)
1707 				out << endl;
1708 			else
1709 				out << ";" << endl;
1710 		}
1711 	}
1712 }
1713 
1714 
lpMinSDConstraint(ostream & out,Params & params,IntVector & y_value,double pd_proportion)1715 void PDNetwork::lpMinSDConstraint(ostream &out, Params &params, IntVector &y_value, double pd_proportion) {
1716 	iterator spit;
1717 	int i;
1718 	double total_weight = calcWeight();
1719 	double required_sd = total_weight * pd_proportion;
1720 	if (required_sd > total_weight) required_sd = total_weight;
1721 	required_sd -= 1e-6;
1722 	// adding constraint for min conserved PD proportion
1723 	for (spit = begin(),i=0; spit != end(); spit++,i++)	{
1724 		if (y_value[i] < 0)
1725 			out << " +" << (*spit)->getWeight() << " y" << i;
1726 		else if (y_value[i] >= 2)
1727 			out << " +" << (*spit)->getWeight() << " x" << y_value[i] - 2;
1728 		else if (y_value[i] == 1) required_sd -= (*spit)->getWeight();
1729 	}
1730 	out.precision(12);
1731 	out << " >= " << required_sd;
1732 	out.precision(6);
1733 
1734 	if (params.gurobi_format)
1735 		out << endl;
1736 	else
1737 		out << ";" << endl;
1738 }
1739 
lpVariableBound(ostream & out,Params & params,Split & included_vars,IntVector & y_value)1740 void PDNetwork::lpVariableBound(ostream &out, Params &params, Split &included_vars, IntVector &y_value) {
1741 	IntVector::iterator it2;
1742 	int i, j;
1743 	// define the variable boundary
1744 
1745 	if (params.gurobi_format)
1746 		out << "Bounds" << endl;
1747 
1748 
1749 	for (j = 0; j < included_vars.getNTaxa(); j++) {
1750 		if (included_vars.containTaxon(j)) {
1751 			out << "x" << j << " = 1";
1752 		} else {
1753 			if (params.gurobi_format)
1754 				out << "0 <= ";
1755 			out << "x" << j << " <= 1";
1756 		}
1757 		if (params.gurobi_format)
1758 			out << endl;
1759 		else
1760 			out << ";" << endl;
1761 	}
1762 
1763 	if (!y_value.empty()) {
1764 		for (i = 0; i < getNSplits(); i++) {
1765 			if (y_value[i] >= 0) continue;
1766 			if (params.gurobi_format)
1767 				out << "0 <= ";
1768 			out << "y" << i << " <= 1";
1769 			if (params.gurobi_format)
1770 				out << endl;
1771 			else
1772 				out << ";" << endl;
1773 		}
1774 	}
1775 	int nvars = included_vars.getNTaxa();
1776 	if (areas_boundary && params.boundary_modifier != 0.0 && !params.quad_programming) {
1777 		for (i = 0; i < included_vars.getNTaxa()-1; i++)
1778 		for (j = i+1; j < included_vars.getNTaxa(); j++)
1779 			if (areas_boundary[i*nvars+j] > 0.0) {
1780 				if (params.gurobi_format)
1781 					out << "0 <= ";
1782 				out << "y" << i << "_" << j << " <= 1";
1783 				if (params.gurobi_format)
1784 					out << endl;
1785 				else
1786 					out << ";" << endl;
1787 
1788 			}
1789 	}
1790 }
1791 
lpVariableBinary(ostream & out,Params & params,Split & included_vars)1792 void PDNetwork::lpVariableBinary(ostream &out, Params &params, Split &included_vars) {
1793 	int nvars;
1794 	int j;
1795 	if (isPDArea())
1796 		nvars = area_taxa.size();
1797 	else
1798 		nvars = getNTaxa();
1799 
1800 	bool first = true;
1801 	for (j = 0; j < nvars; j++) {
1802 		if (included_vars.containTaxon(j)) continue;
1803 		if (params.gurobi_format) {
1804 			if (!first)
1805 				out << " ";
1806 			else
1807 				out << "Binary" << endl;
1808 		} else {
1809 			if (!first)
1810 				out << ", ";
1811 			else
1812 				out << "bin ";
1813 		}
1814 		out << "x" << j;
1815 		first = false;
1816 	}
1817 	if (!first) {
1818 		if (params.gurobi_format)
1819 			out << endl;
1820 		else
1821 			out << ";" << endl;
1822 	}
1823 }
1824 
1825 
1826 /**
1827 	add binary variables
1828 */
lpVariableBinary(const char * outfile,Params & params,Split & included_vars)1829 void PDNetwork::lpVariableBinary(const char *outfile, Params &params, Split &included_vars) {
1830 	try {
1831 		ofstream out;
1832 		out.exceptions(ios::failbit | ios::badbit);
1833 		out.open(outfile, ios::app);
1834 		lpVariableBinary(out, params, included_vars);
1835 		out.close();
1836 	} catch (ios::failure) {
1837 		outError(ERR_WRITE_OUTPUT, outfile);
1838 	}
1839 }
1840 
1841 
lpVariableBinary(const char * outfile,Params & params,IntVector & initialset)1842 void PDNetwork::lpVariableBinary(const char *outfile, Params &params, IntVector &initialset) {
1843 	int nvars;
1844 	if (isPDArea())
1845 		nvars = area_taxa.size();
1846 	else
1847 		nvars = getNTaxa();
1848 	Split included_vars(nvars);
1849 	for (IntVector::iterator it2 = initialset.begin(); it2 != initialset.end(); it2++)
1850 		included_vars.addTaxon(*it2);
1851 	lpVariableBinary(outfile, params, included_vars);
1852 }
1853 
lpInitialArea(ostream & out,Params & params)1854 void PDNetwork::lpInitialArea(ostream &out, Params &params) {
1855 	int nareas = getNAreas();
1856 	int j;
1857 
1858 	// adding constraint for initialset
1859 	for (IntVector::iterator it = initialset.begin(); it != initialset.end(); it++) {
1860 		if (it == initialset.begin() && (params.root || params.is_rooted)) // ignore the root
1861 			continue;
1862 		out << "1 <= ";
1863 		bool ok = false;
1864 		for (j = 0; j < nareas; j++)
1865 			if (area_taxa[j]->containTaxon(*it)) {
1866 				out << " +x" << j;
1867 				ok = true;
1868 			}
1869 		if (params.gurobi_format)
1870 			out << endl;
1871 		else
1872 			out << ";" << endl;
1873 		if (!ok) {
1874 			outError("No area contains taxon ", taxa->GetTaxonLabel(*it));
1875 		}
1876 	}
1877 }
1878 
checkYValue(int total_size,vector<int> & y_value)1879 void PDNetwork::checkYValue(int total_size, vector<int> &y_value) {
1880 	iterator spit;
1881 	int ntaxa = getNTaxa();
1882 	int i;
1883 
1884 	y_value.resize(getNSplits(), -1);
1885 	for (spit = begin(),i=0; spit != end(); spit++,i++) {
1886 		Split *sp = (*spit);
1887 		int id = -1;
1888 		int cnt = sp->countTaxa();
1889 		if (cnt > ntaxa / 2) {
1890 			sp->invert();
1891 			cnt = ntaxa - cnt;
1892 		}
1893 		if (cnt == 1)
1894 			id = sp->firstTaxon();
1895 		if (id >= 0) {
1896 			// if the split is external -> y[i] = x[id]
1897 			y_value[i] = id+2;
1898 			continue;
1899 		}
1900 		if (!isBudgetConstraint()) {
1901 			if (cnt > ntaxa - total_size && cnt < total_size) {
1902 				// if both constraints can be dropped -> y[i] = 1
1903 				y_value[i] = 1;
1904 			}
1905 		}
1906 	}
1907 
1908 }
1909 
1910 
1911 
1912 
checkYValue_Area(int total_size,vector<int> & y_value,vector<int> & count1,vector<int> & count2)1913 void PDNetwork::checkYValue_Area(int total_size, vector<int> &y_value, vector<int> &count1, vector<int> &count2) {
1914 	iterator spit;
1915 	int nareas = area_taxa.size();
1916 	int i, j;
1917 
1918 	y_value.resize(getNSplits(), -1);
1919 	count1.resize(getNSplits(), 0);
1920 	count2.resize(getNSplits(), 0);
1921 	for (spit = begin(),i=0; spit != end(); spit++,i++) {
1922 		Split *sp = (*spit);
1923 		int id1 = -1, id2 = -1;
1924 		for (j = 0; j < nareas; j++) {
1925 			if (sp->overlap(*area_taxa[j])) {
1926 				count1[i]++;
1927 				id1 = j;
1928 			}
1929 		}
1930 		sp->invert();
1931 		for (j = 0; j < nareas; j++) {
1932 			if (sp->overlap(*area_taxa[j])) { count2[i]++; id2 = j; }
1933 		}
1934 		sp->invert(); // invert back to original
1935 		if (count1[i] == 0 || count2[i] == 0)
1936 			y_value[i] = 0;
1937 		else {
1938 
1939 			if (count1[i] == nareas && count2[i] == nareas) {
1940 				y_value[i] = 1;
1941 				continue;
1942 			}
1943 			if (isBudgetConstraint())
1944 				continue;
1945 
1946 			if (count1[i] == 1 && count2[i] > nareas - total_size) {
1947 				y_value[i] = id1 + 2;
1948 			} else if (count2[i] == 1 && count1[i] > nareas - total_size) {
1949 				y_value[i] = id2 + 2;
1950 				//continue;
1951 			} else if (count1[i] > nareas - total_size && count2[i] > nareas - total_size) {
1952 				y_value[i] = 1;
1953 			}
1954 		}
1955 	}
1956 
1957 }
1958 
speciesList(vector<string> * speciesNames)1959 void PDNetwork::speciesList(vector<string> *speciesNames)
1960 {
1961 	for(int i=0; i<getNTaxa();i++)
1962 		(*speciesNames).push_back(taxa->GetTaxonLabel(i));
1963 
1964 }
1965