1 // $Id: fc_status.h,v 1.13 2011/03/08 08:16:42 bobgian Exp $ 2 3 /* 4 * Copyright 2010 Mary Kuhner, Jon Yamato, Joseph Felsenstein, and Bob Giansiracusa 5 * 6 * This software is distributed free of charge for non-commercial use 7 * and is copyrighted. Of course, we do not guarantee that the software 8 * works, and are not responsible for any damage you may cause or have. 9 */ 10 11 //------------------------------------------------------------------------------------ 12 13 #ifndef FC_STATUS_H 14 #define FC_STATUS_H 15 16 #include <set> 17 #include <map> 18 19 // Defines various symbols used to control debugging of experimental code blocks. 20 #include "local_build.h" 21 22 #include "range.h" 23 24 //------------------------------------------------------------------------------------ 25 26 // An object of this type (FC_Grid) maps site range-starting-indices to coalescence branch-counts in the FC_Status 27 // object. The FC_Grid data member is a map populated by objects of type FC_Node, each a pair whose KEY 28 // (first of the pair) represents the (included) beginning startpoint of a range of site index values whose 29 // (excluded) endpoint is given by the starting site index of the next range (in the increasing-index direction). 30 // 31 // Each FC_Node object in the FC_Grid consists of a pair containing two values: a "key" (site index value) and a 32 // "value" (the count of branches at current tree level carrying the corresponding site "live" to some non-empty 33 // set of tips). 34 // 35 // The term "site index" refers to the index number identifying a site, which value is used as the "key" component 36 // of an FC_Node. The term "branch count" refer to the number of branches crossing the current "level" in the tree 37 // which are carrying the set of sites live to the tips, which value is used as the "value" component of an FC_Node. 38 39 // Type of the container object comprising the site-index -> branch-count map: 40 typedef std::map<long int, long int, std::less<long int> > FC_Grid; 41 42 // Type of objects ("nodes") inserted into FC_Grid map. 43 // FIRST (key): Site index (included starting site number for current range, 44 // also serves as excluded site number for "previous" range). 45 // SECOND (value): Live site branch count (number of branches carrying the sites represented by the current range). 46 // ALL sites in this range are indicated as being carried "live" by this value as a count of the 47 // number of branches carrying these sites live to at least one tip. The branch count is the number 48 // of branches that cross a surface cutting the tree at a given level (this "level" is defined by the 49 // "time interval" or phase of the "horserace" portion of the tree-rearrangement algorithm; it is 50 // NOT the total number of branches in the tree which carry these sites "live" to some tips. 51 // 52 typedef std::pair<long int, long int> FC_Node; 53 54 //------------------------------------------------------------------------------------ 55 56 // A class to support maintenance of information on which sites have attained Final Coalescence. Given a site number, 57 // the functions of this class maintain counts of how many branches at a given "level" are "passing" sites through 58 // to the tips. "Final Coalescence" is attained when that count (for a given site or range of sites) decreases to 59 // unity. The class object contains a cached representation (a "rangeset") of such sites. This cache is simply the 60 // set of all ranges whose count value is currently unity, and it is updated whenever the count computation results 61 // in a change in count. 62 // 63 // Note that the "coalesced sites rangeset cache" must be updated whenever branch counts for sites change, both on 64 // increment and on decrement. Decrement makes sense: when the number of branches carrying a site decreases to one, 65 // that is a "final coalescence". However, it is possible that the count may increase from zero to one and then never 66 // change again. If such an increment were not tallied, those sites (which have already coalesced in the tree being 67 // sampled at that level) would be missed altogether. 68 69 // Operation of FC_Status object -- the object contains two sub-objects: 70 // 71 // Data member "m_fc_grid" is an FC_Grid object, which is a map containing nodes, each of whose key is a site index 72 // (an integer representing the site index at which a branch-count changes) and whose "value" is a count of the 73 // number of branches carrying this site "live" (visible at some set of tips) for the region STARTING (inclusive) 74 // at the associated key index value and ENDING (exclusive) at the next higher (in the map's ordering) node's key 75 // index value. To determine the branch count for an arbitrary site, one can invoke the lower_bound() map member 76 // function and interpret the resulting iterator: 77 // 78 // - If the iterator equals end(), then there are no node(s) with keys equal to or greater than the given site 79 // index. Either there are no nodes at all in the FC_Grid, or the branch count is that of the value component 80 // of the first node in the DECREASING direction. It is an error for the value component of the HIGHEST node 81 // in the map to be anything other than zero. Such a lookup can be interpreted (if the assumption is that such 82 // a site in in a range which HAS been entered into the map) as an error condition or as an indication that 83 // information for a range containing the site in question has not yet been entered. 84 // 85 // - If the iterator points to a node whose key matches the input search site index, then associated branch 86 // count is the value component of the corresponding node. 87 // 88 // - If there is no node with this key (so that lower_bound() returns an iterator to the next "higher" node), 89 // one must decrement the iterator to obtain the next "lower" node, whose "value" slot is the count to apply 90 // to the region represented by the range of site indices bounded by the keys of the two nodes (which includes 91 // the site of interest). If there is no "lower" node, then the input site index corresponds to a site for 92 // which no information has been stored, which can be interpreted either as an error condition or as a branch 93 // count of zero. 94 // 95 // As the site-to-branch-count-maintenance operation proceeds, the algorithm watches for regions which abut 96 // at a common node and have the same count value on both sides, combining them into a single region by deleting 97 // the commom middle node. 98 // 99 // Data member "m_coalesced_sites" is simply a rangeset (set of pairs, each indicating a range of sites by index, 100 // with the same convention of including START edge and excluding END edge). The union of the ranges denoted by 101 // the pairs in the rangeset denotes the set of all sites which have come to Final Coalescence (in other words, 102 // whose branch counts have decreased to or now are unity). This information is updated by the FC_Grid maintenance 103 // algorithm when computing branch counts and cached in this data member, making the information easy to retrieve. 104 105 class FC_Status 106 { 107 public: 108 109 // Construct object representing number of live-site-carrying-branches, indexed by (sparse array of) site numbers. 110 // Initial object stores coalescence count of zero for each site. 111 // Prevent accidental conversion from integer to FC_Status object via "explicit". 112 explicit FC_Status(); 113 114 // Interface to the count-maintaining algorithm: Increase the count at a set of sites by ONE. Increment_FC_Counts(const rangeset & sites_to_increment)115 void Increment_FC_Counts(const rangeset & sites_to_increment) 116 { 117 Adjust_FC_Counts(sites_to_increment, +1); 118 } 119 120 // Interface to the count-maintaining algorithm: Decrease the count at a set of sites by ONE. Decrement_FC_Counts(const rangeset & sites_to_decrement)121 void Decrement_FC_Counts(const rangeset & sites_to_decrement) 122 { 123 Adjust_FC_Counts(sites_to_decrement, -1); 124 } 125 126 // Return set of ranges (a "rangeset") representing all sites that have coalesced. Once coalesced at a given 127 // level, those sites are coalesced at all levels rootward; the rangeset represents cumulative coalescence 128 // (this level and all rootward), not just sites that coalesce AT this level. Objects (rangesets) returned 129 // are by-value copies and thus remain valid despite changes in the internal state of the FC_Status object cache 130 // (as the "horserace" proceeds to other time intervals). 131 rangeset Coalesced_Sites(); 132 133 // The FC_Status object is allocated on the stack and is destructed when its containing variable goes out 134 // of scope. The FC_Status object contains two STL containers (which manage their own memory) and an built-in 135 // int as data members (ie, no dynamically-allocated objects or pointers) and therefore there is no need 136 // for other than a default destructor. ~FC_Status()137 ~FC_Status() {} 138 139 // Verification of final-coalescence algorithm: test for equality of two FC_Status objects holding data 140 // computed different ways. 141 friend bool operator==(const FC_Status & lhs, const FC_Status & rhs); 142 friend bool operator!=(const FC_Status & lhs, const FC_Status & rhs); 143 144 #if (LAMARC_QA_FCTEST_DEBUGLEVEL > 0) // For testing; remove in production version. 145 void PrintFCStatus(bool print_output, const rangeset & sites_to_adjust, long int adjustment); 146 #endif 147 148 // JDEBUG: quick and dirty GDB Debugging functions. 149 long GridSize() const; // m_fc_grid.size() 150 void PrintGrid() const; // print the contents of m_fc_grid 151 152 private: 153 154 // Site index value for LOWER end of lowest range inserted so far; effectively, the BEGINNING of the FC_Grid. 155 long int m_site_index_lower_limit; 156 157 // Site index value for UPPER end of highest range inserted so far; effectively, the END of the FC_Grid. 158 long int m_site_index_upper_limit; 159 160 // This FC_Grid maps keys (representing site number ranges) to coalescence counts. The (included) lower endpoint 161 // site index of the range is the key of the FC_Node object (a pair) stored in the FC_Grid (the container). 162 // The range represented is from the node's site index (included) to the site index (excluded) stored in the 163 // adjacent (next in upward direction) FC_Node in the grid. 164 // 165 // The lowest LOWER endpoint is represented by the LOWER range endpoint site index for the LOWEST range entered 166 // so far. This site index value is remembered by the private data member "m_site_index_lower_limit". 167 // 168 // The highest UPPER endpoint is represented by the UPPER range endpoint site index for the HIGHEST range entered 169 // so far. This site index value is remembered by the private data member "m_site_index_upper_limit". 170 // 171 FC_Grid m_fc_grid; 172 173 // Cached representation of coalesced sites (those whose coalescence count has reached unity). 174 // This result is updated by the increment/decrement member functions. 175 rangeset m_coalesced_sites; 176 177 // Privately-declared and undefined functions. 178 FC_Status(FC_Status &); // Don't let clients copy this object. 179 FC_Status & operator=(FC_Status &); // Don't let clients assign to this object. 180 181 // Does all the work of Increment_FC_Counts() and Decrement_FC_Counts(). 182 void Adjust_FC_Counts(const rangeset & sites_to_adjust, long int adjustment); 183 }; 184 185 //------------------------------------------------------------------------------------ 186 187 #endif // FC_STATUS_H 188 189 //____________________________________________________________________________________ 190