1 /***************************************************************************
2  *   Copyright (C) 2006 by BUI Quang Minh, Steffen Klaere, Arndt von Haeseler   *
3  *   minh.bui@univie.ac.at   *
4  *                                                                         *
5  *   This program is free software; you can redistribute it and/or modify  *
6  *   it under the terms of the GNU General Public License as published by  *
7  *   the Free Software Foundation; either version 2 of the License, or     *
8  *   (at your option) any later version.                                   *
9  *                                                                         *
10  *   This program is distributed in the hope that it will be useful,       *
11  *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
12  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
13  *   GNU General Public License for more details.                          *
14  *                                                                         *
15  *   You should have received a copy of the GNU General Public License     *
16  *   along with this program; if not, write to the                         *
17  *   Free Software Foundation, Inc.,                                       *
18  *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.             *
19  ***************************************************************************/
20 #ifndef SPLITGRAPH_H
21 #define SPLITGRAPH_H
22 
23 #include <list>
24 #include <vector>
25 #include <string>
26 #include "split.h"
27 #include "ncl/ncl.h"
28 #include "nclextra/msplitsblock.h"
29 #include "nclextra/mpdablock.h"
30 #include "nclextra/msetsblock.h"
31 #include "tree/node.h"
32 #include "splitset.h"
33 #include "tree/mtree.h"
34 #include "utils/checkpoint.h"
35 
36 class MTreeSet;
37 
38 using namespace std;
39 
40 /**
41 SplitGraph class
42 
43 @author BUI Quang Minh, Steffen Klaere, Arndt von Haeseler
44 */
45 class SplitGraph : public vector<Split*>, public CheckpointFactory
46 {
47 public:
48 
49 	friend class MTree;
50 	friend class MTreeSet;
51 	friend class ECOpd;
52 
53 /********************************************************
54 	CONSTRUCTORs, INITIALIZATION AND DESTRUCTORs
55 ********************************************************/
56 
57 	/**
58 		empty constructor
59 	*/
60     SplitGraph();
61 
62 	/**
63 		construct split graph from the parameters by calling init(params).
64 		@param params program parameters
65 	*/
66     SplitGraph(Params &params);
67 
68 	/**
69 		init split graph from the parameters
70 		@param params program parameters
71 	*/
72     void init(Params &params);
73 
74     /**
75         save object into the checkpoint
76     */
77     virtual void saveCheckpoint();
78 
79     /**
80         restore object from the checkpoint
81     */
82     virtual void restoreCheckpoint();
83 
84 	/**
85 		if no taxa block found, but the sets block is present, then
86 		this function will be invoked. It takes the taxa names from the sets block.
87 	*/
88 	void AddTaxaFromSets();
89 
90 	/**
91 		this function is invoked
92 	*/
93 	void createStarTree();
94 
95 
96 	/**
97 		new all blocks: taxa, splits, pda
98 	*/
99 	void createBlocks();
100 
101 	/** free allocated memory, called by destructor */
102 	void freeMem();
103 
104 	/**
105 		destructor
106 	*/
107     virtual ~SplitGraph();
108 
109 	/**
110 		convert the collection of trees in TREES block into this split graph
111 		@param burnin the number of beginning trees to be discarded
112 		@param max_count max number of trees to consider
113 		@param split_threshold only keep those splits which appear more than this threshold
114 		@param weight_threshold minimum weight cutoff
115 	*/
116 	void convertFromTreesBlock(int burnin, int max_count, double split_threshold,
117 		int split_weight_summary, double weight_threshold, const char *tree_weight_file);
118 
119 /********************************************************
120 	PRINT INFORMATION
121 ********************************************************/
122 
123 	/**
124 		print infos of split graph
125 		@param out the output stream
126 	*/
127 	void report(ostream &out);
128 
129 	/**
130 		print infos of compatibility graph of splits
131 		@param out the output stream
132 	*/
133 	void reportConflict(ostream &out);
134 
135 /********************************************************
136 	GET INFORMATION
137 ********************************************************/
138 
139 	/**
140 		calculate sum of weights of all splits
141 	*/
142 	double calcWeight();
143 
144 
145 	/**
146 		calculate sum of weights of all trivial splits
147 	*/
148 	double calcTrivialWeight();
149 
150 	/**
151 		calculate sum of weights of preserved splits in the taxa_set
152 		@param taxa_set a set of taxa
153 	*/
154 	double calcWeight(Split &taxa_set);
155 
156 	/**
157 		count how many splits are covered by the taxon set
158 		@param taxa_set a set of taxa
159 	*/
160 	int countSplits(Split &taxa_set);
161 
162 	/**
163 		count how many internal splits are covered by the taxon set
164 		@param taxa_set a set of taxa
165 	*/
166 	int countInternalSplits(Split &taxa_set);
167 
168 	/**
169 		generate pairs of random taxa set with overlap of taxa in common
170 		@param filename output file name
171 		@param size size of the taxa set
172 		@param overlap number of taxa common in both sets
173 		@param times number of times repeated
174 	*/
175 	void generateTaxaSet(char *filename, int size, int overlap, int times);
176 
177 	/**
178 		scale the weight of all splits to a norm factor
179 		@param norm normalized factor
180 		@param make_int TRUE to round weights to int, FALSE otherwise
181 		@param precision numerical precision, default (-1) for no rounding
182 	*/
183 	void scaleWeight(double norm, bool make_int = false, int precision = -1);
184 
185 
186 	/**
187 		@return TRUE if split sp is contained in the split system
188 		@param sp target split to search for
189 	*/
190 	bool containSplit(Split &sp);
191 
192 	/**
193 		compute the boundary length of the area set, using areas_boundary variable
194 		@param area a set of area ID
195 		@return boundary length
196 	*/
197 	double computeBoundary(Split &area);
198 
199 	 /**
200 	  @return max split weight
201 	 */
202 	double maxWeight();
203 
204 	/**
205 	 * @param name a name string
206 	 * @return ID of leaf corresponding to name, -1 if not found
207 	 */
208 	int findLeafName(string &name);
209 
210 /********************************************************
211 	compatibility
212 ********************************************************/
213 
214 	/**
215 		find the maximum-weight set of compatible splits
216 		@param maxsg (OUT) set of compatible splits in a split graph class
217 	*/
218 	void findMaxCompatibleSplits(SplitGraph &maxsg);
219 
220 	/**
221  		check the compatibility of sp against all splits in this set
222 		@param sp the target split
223 		@return TRUE if sp is compatible with all splits here, otherwise FALSE
224 	*/
225 	bool compatible(Split *sp);
226 
227 /********************************************************
228 	OTHER STUFFS
229 ********************************************************/
230 
231 	/**
232 		@return number of taxa
233 	*/
getNTaxa()234 	int getNTaxa() {
235 		ASSERT(size() > 0);
236 		return (*begin())->ntaxa;
237 	}
238 
239 	/**
240 		@return number of areas
241 	*/
getNAreas()242 	int getNAreas() {
243 		return sets->getNSets();
244 	}
245 
246 	/**
247 		@return number of splits
248 	*/
getNSplits()249 	int getNSplits() {
250 		return size();
251 	}
252 
253 	/**
254 		@return number of trivial splits
255 	*/
256 	int getNTrivialSplits();
257 
258 	/**
259 		@return taxa block
260 	*/
getTaxa()261 	NxsTaxaBlock *getTaxa() {
262 		return taxa;
263 	}
264 
265 	void getTaxaName(vector<string> &taxname);
266 
267 	/**
268 		@return splits block
269 	*/
getSplitsBlock()270 	MSplitsBlock *getSplitsBlock() {
271 		return splits;
272 	}
273 
274 	/**
275 		@return PDA block
276 	*/
getPdaBlock()277 	MPdaBlock *getPdaBlock() {
278 		return pda;
279 	}
280 
281 	/**
282 		@return SETS block
283 	*/
getSetsBlock()284 	MSetsBlock *getSetsBlock() {
285 		return sets;
286 	}
287 
288 	/**
289 		@return TREES block
290 	*/
getTreesBlock()291 	NxsTreesBlock *getTreesBlock() {
292 		return trees;
293 	}
294 
getMTrees()295 	MTreeSet *getMTrees() {
296 		return mtrees;
297 	}
298 
299 	/**
300 		@return TRUE if splits graph is circular
301 	*/
isCircular()302 	bool isCircular() {
303 		return splits->cycle.size() != 0;
304 	}
305 
306 	/**
307 		@return TRUE if split system is weakly compatible
308 	*/
309 	bool isWeaklyCompatible();
310 
311 	/**
312 		@return TRUE if it is the cost-constrained PD problem
313 	*/
isBudgetConstraint()314 	bool isBudgetConstraint() {
315 		return pda->cost_constrained;
316 	}
317 
318 	/**
319 		@return TRUE if the distance matrix presents for circular splits graph
320 		@param mat distance matrix
321 	*/
322 	bool checkCircular(mmatrix(double) &mat);
323 
324 	/**
325 		get the ID of the taxon around the circle in a circular splits graph
326 		@param i a taxon
327 		@return index of taxon on the circle
328 	*/
getCircleId(int i)329 	int getCircleId(int i) {
330 		ASSERT(i >= 0 && i < getNTaxa());
331 		return splits->cycle[i];
332 	}
333 
334 	/**
335 		generate a random circular split graph
336 		@param params program parameters
337 	*/
338 	void generateCircular(Params &params);
339 
340 	/**
341 		save split systems to a file in NEXUS format
342 		@param out output stream
343 		@param omit_trivial TRUE to omit trivial splits, FALSE otherwise
344 	*/
345 	void saveFileNexus(ostream &out, bool omit_trivial = false);
346 
347 	/**
348 		save split systems to a file in star-dot format (eg **...*)
349 		@param out output stream
350 		@param omit_trivial TRUE to omit trivial splits, FALSE otherwise
351 	*/
352 	void saveFileStarDot(ostream &out, bool omit_trivial = false);
353 
354 	/**
355 		save split systems to a file
356 		@param out output file name
357 		@param omit_trivial TRUE to omit trivial splits, FALSE otherwise
358 	*/
359 	void saveFile(const char* out_file, InputType file_format, bool omit_trivial = false);
360 
361 	/**
362 		calculate the distance matrix, print to file in phylip format
363 		@param filename output file name
364 	*/
365 	void calcDistance(char *filename);
366 
367 
368 	/**
369 		calculate the distance matrix
370 		@param dist (OUT) distance matrix
371 	*/
372 	void calcDistance(mmatrix(double) &dist);
373 
374 	/**
375 		calculate the distance matrix, based on the taxa_order
376 		@param dist (OUT) distance matrix
377 		@param taxa_order an order of taxa
378 	*/
379 	void calcDistance(mmatrix(double) &dist, vector<int> &taxa_order);
380 
381 	/**
382 	 * remove all trivial splits
383 	 * @return number of trivial splits removed
384 	*/
385 	int removeTrivialSplits();
386 
387 protected:
388 
389 	/**
390 		taxa block
391 	*/
392 	NxsTaxaBlock *taxa;
393 
394 	/**
395 		splits block
396 	*/
397 	MSplitsBlock *splits;
398 
399 	/**
400 		PDA block
401 	*/
402 	MPdaBlock *pda;
403 
404 
405 	/**
406 		SETS block
407 	*/
408 	MSetsBlock *sets;
409 
410 	/**
411 		relationship between the sets. For example, the common boundary length between two areas.
412 	*/
413 	double *areas_boundary;
414 
415 	/**
416 		TREES block
417 	*/
418 	NxsTreesBlock *trees;
419 
420 	/**
421 		storing set of trees if the split graph is converted from it
422 	*/
423 	MTreeSet *mtrees;
424 
425 };
426 
427 #endif
428