1 /*************************************************************************** 2 * Copyright (C) 2006 by BUI Quang Minh, Steffen Klaere, Arndt von Haeseler * 3 * minh.bui@univie.ac.at * 4 * * 5 * This program is free software; you can redistribute it and/or modify * 6 * it under the terms of the GNU General Public License as published by * 7 * the Free Software Foundation; either version 2 of the License, or * 8 * (at your option) any later version. * 9 * * 10 * This program is distributed in the hope that it will be useful, * 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of * 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * 13 * GNU General Public License for more details. * 14 * * 15 * You should have received a copy of the GNU General Public License * 16 * along with this program; if not, write to the * 17 * Free Software Foundation, Inc., * 18 * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * 19 ***************************************************************************/ 20 #ifndef SPLITGRAPH_H 21 #define SPLITGRAPH_H 22 23 #include <list> 24 #include <vector> 25 #include <string> 26 #include "split.h" 27 #include "ncl/ncl.h" 28 #include "nclextra/msplitsblock.h" 29 #include "nclextra/mpdablock.h" 30 #include "nclextra/msetsblock.h" 31 #include "tree/node.h" 32 #include "splitset.h" 33 #include "tree/mtree.h" 34 #include "utils/checkpoint.h" 35 36 class MTreeSet; 37 38 using namespace std; 39 40 /** 41 SplitGraph class 42 43 @author BUI Quang Minh, Steffen Klaere, Arndt von Haeseler 44 */ 45 class SplitGraph : public vector<Split*>, public CheckpointFactory 46 { 47 public: 48 49 friend class MTree; 50 friend class MTreeSet; 51 friend class ECOpd; 52 53 /******************************************************** 54 CONSTRUCTORs, INITIALIZATION AND DESTRUCTORs 55 ********************************************************/ 56 57 /** 58 empty constructor 59 */ 60 SplitGraph(); 61 62 /** 63 construct split graph from the parameters by calling init(params). 64 @param params program parameters 65 */ 66 SplitGraph(Params ¶ms); 67 68 /** 69 init split graph from the parameters 70 @param params program parameters 71 */ 72 void init(Params ¶ms); 73 74 /** 75 save object into the checkpoint 76 */ 77 virtual void saveCheckpoint(); 78 79 /** 80 restore object from the checkpoint 81 */ 82 virtual void restoreCheckpoint(); 83 84 /** 85 if no taxa block found, but the sets block is present, then 86 this function will be invoked. It takes the taxa names from the sets block. 87 */ 88 void AddTaxaFromSets(); 89 90 /** 91 this function is invoked 92 */ 93 void createStarTree(); 94 95 96 /** 97 new all blocks: taxa, splits, pda 98 */ 99 void createBlocks(); 100 101 /** free allocated memory, called by destructor */ 102 void freeMem(); 103 104 /** 105 destructor 106 */ 107 virtual ~SplitGraph(); 108 109 /** 110 convert the collection of trees in TREES block into this split graph 111 @param burnin the number of beginning trees to be discarded 112 @param max_count max number of trees to consider 113 @param split_threshold only keep those splits which appear more than this threshold 114 @param weight_threshold minimum weight cutoff 115 */ 116 void convertFromTreesBlock(int burnin, int max_count, double split_threshold, 117 int split_weight_summary, double weight_threshold, const char *tree_weight_file); 118 119 /******************************************************** 120 PRINT INFORMATION 121 ********************************************************/ 122 123 /** 124 print infos of split graph 125 @param out the output stream 126 */ 127 void report(ostream &out); 128 129 /** 130 print infos of compatibility graph of splits 131 @param out the output stream 132 */ 133 void reportConflict(ostream &out); 134 135 /******************************************************** 136 GET INFORMATION 137 ********************************************************/ 138 139 /** 140 calculate sum of weights of all splits 141 */ 142 double calcWeight(); 143 144 145 /** 146 calculate sum of weights of all trivial splits 147 */ 148 double calcTrivialWeight(); 149 150 /** 151 calculate sum of weights of preserved splits in the taxa_set 152 @param taxa_set a set of taxa 153 */ 154 double calcWeight(Split &taxa_set); 155 156 /** 157 count how many splits are covered by the taxon set 158 @param taxa_set a set of taxa 159 */ 160 int countSplits(Split &taxa_set); 161 162 /** 163 count how many internal splits are covered by the taxon set 164 @param taxa_set a set of taxa 165 */ 166 int countInternalSplits(Split &taxa_set); 167 168 /** 169 generate pairs of random taxa set with overlap of taxa in common 170 @param filename output file name 171 @param size size of the taxa set 172 @param overlap number of taxa common in both sets 173 @param times number of times repeated 174 */ 175 void generateTaxaSet(char *filename, int size, int overlap, int times); 176 177 /** 178 scale the weight of all splits to a norm factor 179 @param norm normalized factor 180 @param make_int TRUE to round weights to int, FALSE otherwise 181 @param precision numerical precision, default (-1) for no rounding 182 */ 183 void scaleWeight(double norm, bool make_int = false, int precision = -1); 184 185 186 /** 187 @return TRUE if split sp is contained in the split system 188 @param sp target split to search for 189 */ 190 bool containSplit(Split &sp); 191 192 /** 193 compute the boundary length of the area set, using areas_boundary variable 194 @param area a set of area ID 195 @return boundary length 196 */ 197 double computeBoundary(Split &area); 198 199 /** 200 @return max split weight 201 */ 202 double maxWeight(); 203 204 /** 205 * @param name a name string 206 * @return ID of leaf corresponding to name, -1 if not found 207 */ 208 int findLeafName(string &name); 209 210 /******************************************************** 211 compatibility 212 ********************************************************/ 213 214 /** 215 find the maximum-weight set of compatible splits 216 @param maxsg (OUT) set of compatible splits in a split graph class 217 */ 218 void findMaxCompatibleSplits(SplitGraph &maxsg); 219 220 /** 221 check the compatibility of sp against all splits in this set 222 @param sp the target split 223 @return TRUE if sp is compatible with all splits here, otherwise FALSE 224 */ 225 bool compatible(Split *sp); 226 227 /******************************************************** 228 OTHER STUFFS 229 ********************************************************/ 230 231 /** 232 @return number of taxa 233 */ getNTaxa()234 int getNTaxa() { 235 ASSERT(size() > 0); 236 return (*begin())->ntaxa; 237 } 238 239 /** 240 @return number of areas 241 */ getNAreas()242 int getNAreas() { 243 return sets->getNSets(); 244 } 245 246 /** 247 @return number of splits 248 */ getNSplits()249 int getNSplits() { 250 return size(); 251 } 252 253 /** 254 @return number of trivial splits 255 */ 256 int getNTrivialSplits(); 257 258 /** 259 @return taxa block 260 */ getTaxa()261 NxsTaxaBlock *getTaxa() { 262 return taxa; 263 } 264 265 void getTaxaName(vector<string> &taxname); 266 267 /** 268 @return splits block 269 */ getSplitsBlock()270 MSplitsBlock *getSplitsBlock() { 271 return splits; 272 } 273 274 /** 275 @return PDA block 276 */ getPdaBlock()277 MPdaBlock *getPdaBlock() { 278 return pda; 279 } 280 281 /** 282 @return SETS block 283 */ getSetsBlock()284 MSetsBlock *getSetsBlock() { 285 return sets; 286 } 287 288 /** 289 @return TREES block 290 */ getTreesBlock()291 NxsTreesBlock *getTreesBlock() { 292 return trees; 293 } 294 getMTrees()295 MTreeSet *getMTrees() { 296 return mtrees; 297 } 298 299 /** 300 @return TRUE if splits graph is circular 301 */ isCircular()302 bool isCircular() { 303 return splits->cycle.size() != 0; 304 } 305 306 /** 307 @return TRUE if split system is weakly compatible 308 */ 309 bool isWeaklyCompatible(); 310 311 /** 312 @return TRUE if it is the cost-constrained PD problem 313 */ isBudgetConstraint()314 bool isBudgetConstraint() { 315 return pda->cost_constrained; 316 } 317 318 /** 319 @return TRUE if the distance matrix presents for circular splits graph 320 @param mat distance matrix 321 */ 322 bool checkCircular(mmatrix(double) &mat); 323 324 /** 325 get the ID of the taxon around the circle in a circular splits graph 326 @param i a taxon 327 @return index of taxon on the circle 328 */ getCircleId(int i)329 int getCircleId(int i) { 330 ASSERT(i >= 0 && i < getNTaxa()); 331 return splits->cycle[i]; 332 } 333 334 /** 335 generate a random circular split graph 336 @param params program parameters 337 */ 338 void generateCircular(Params ¶ms); 339 340 /** 341 save split systems to a file in NEXUS format 342 @param out output stream 343 @param omit_trivial TRUE to omit trivial splits, FALSE otherwise 344 */ 345 void saveFileNexus(ostream &out, bool omit_trivial = false); 346 347 /** 348 save split systems to a file in star-dot format (eg **...*) 349 @param out output stream 350 @param omit_trivial TRUE to omit trivial splits, FALSE otherwise 351 */ 352 void saveFileStarDot(ostream &out, bool omit_trivial = false); 353 354 /** 355 save split systems to a file 356 @param out output file name 357 @param omit_trivial TRUE to omit trivial splits, FALSE otherwise 358 */ 359 void saveFile(const char* out_file, InputType file_format, bool omit_trivial = false); 360 361 /** 362 calculate the distance matrix, print to file in phylip format 363 @param filename output file name 364 */ 365 void calcDistance(char *filename); 366 367 368 /** 369 calculate the distance matrix 370 @param dist (OUT) distance matrix 371 */ 372 void calcDistance(mmatrix(double) &dist); 373 374 /** 375 calculate the distance matrix, based on the taxa_order 376 @param dist (OUT) distance matrix 377 @param taxa_order an order of taxa 378 */ 379 void calcDistance(mmatrix(double) &dist, vector<int> &taxa_order); 380 381 /** 382 * remove all trivial splits 383 * @return number of trivial splits removed 384 */ 385 int removeTrivialSplits(); 386 387 protected: 388 389 /** 390 taxa block 391 */ 392 NxsTaxaBlock *taxa; 393 394 /** 395 splits block 396 */ 397 MSplitsBlock *splits; 398 399 /** 400 PDA block 401 */ 402 MPdaBlock *pda; 403 404 405 /** 406 SETS block 407 */ 408 MSetsBlock *sets; 409 410 /** 411 relationship between the sets. For example, the common boundary length between two areas. 412 */ 413 double *areas_boundary; 414 415 /** 416 TREES block 417 */ 418 NxsTreesBlock *trees; 419 420 /** 421 storing set of trees if the split graph is converted from it 422 */ 423 MTreeSet *mtrees; 424 425 }; 426 427 #endif 428