// $Id: fc_status.h,v 1.13 2011/03/08 08:16:42 bobgian Exp $ /* * Copyright 2010 Mary Kuhner, Jon Yamato, Joseph Felsenstein, and Bob Giansiracusa * * This software is distributed free of charge for non-commercial use * and is copyrighted. Of course, we do not guarantee that the software * works, and are not responsible for any damage you may cause or have. */ //------------------------------------------------------------------------------------ #ifndef FC_STATUS_H #define FC_STATUS_H #include #include // Defines various symbols used to control debugging of experimental code blocks. #include "local_build.h" #include "range.h" //------------------------------------------------------------------------------------ // An object of this type (FC_Grid) maps site range-starting-indices to coalescence branch-counts in the FC_Status // object. The FC_Grid data member is a map populated by objects of type FC_Node, each a pair whose KEY // (first of the pair) represents the (included) beginning startpoint of a range of site index values whose // (excluded) endpoint is given by the starting site index of the next range (in the increasing-index direction). // // Each FC_Node object in the FC_Grid consists of a pair containing two values: a "key" (site index value) and a // "value" (the count of branches at current tree level carrying the corresponding site "live" to some non-empty // set of tips). // // The term "site index" refers to the index number identifying a site, which value is used as the "key" component // of an FC_Node. The term "branch count" refer to the number of branches crossing the current "level" in the tree // which are carrying the set of sites live to the tips, which value is used as the "value" component of an FC_Node. // Type of the container object comprising the site-index -> branch-count map: typedef std::map > FC_Grid; // Type of objects ("nodes") inserted into FC_Grid map. // FIRST (key): Site index (included starting site number for current range, // also serves as excluded site number for "previous" range). // SECOND (value): Live site branch count (number of branches carrying the sites represented by the current range). // ALL sites in this range are indicated as being carried "live" by this value as a count of the // number of branches carrying these sites live to at least one tip. The branch count is the number // of branches that cross a surface cutting the tree at a given level (this "level" is defined by the // "time interval" or phase of the "horserace" portion of the tree-rearrangement algorithm; it is // NOT the total number of branches in the tree which carry these sites "live" to some tips. // typedef std::pair FC_Node; //------------------------------------------------------------------------------------ // A class to support maintenance of information on which sites have attained Final Coalescence. Given a site number, // the functions of this class maintain counts of how many branches at a given "level" are "passing" sites through // to the tips. "Final Coalescence" is attained when that count (for a given site or range of sites) decreases to // unity. The class object contains a cached representation (a "rangeset") of such sites. This cache is simply the // set of all ranges whose count value is currently unity, and it is updated whenever the count computation results // in a change in count. // // Note that the "coalesced sites rangeset cache" must be updated whenever branch counts for sites change, both on // increment and on decrement. Decrement makes sense: when the number of branches carrying a site decreases to one, // that is a "final coalescence". However, it is possible that the count may increase from zero to one and then never // change again. If such an increment were not tallied, those sites (which have already coalesced in the tree being // sampled at that level) would be missed altogether. // Operation of FC_Status object -- the object contains two sub-objects: // // Data member "m_fc_grid" is an FC_Grid object, which is a map containing nodes, each of whose key is a site index // (an integer representing the site index at which a branch-count changes) and whose "value" is a count of the // number of branches carrying this site "live" (visible at some set of tips) for the region STARTING (inclusive) // at the associated key index value and ENDING (exclusive) at the next higher (in the map's ordering) node's key // index value. To determine the branch count for an arbitrary site, one can invoke the lower_bound() map member // function and interpret the resulting iterator: // // - If the iterator equals end(), then there are no node(s) with keys equal to or greater than the given site // index. Either there are no nodes at all in the FC_Grid, or the branch count is that of the value component // of the first node in the DECREASING direction. It is an error for the value component of the HIGHEST node // in the map to be anything other than zero. Such a lookup can be interpreted (if the assumption is that such // a site in in a range which HAS been entered into the map) as an error condition or as an indication that // information for a range containing the site in question has not yet been entered. // // - If the iterator points to a node whose key matches the input search site index, then associated branch // count is the value component of the corresponding node. // // - If there is no node with this key (so that lower_bound() returns an iterator to the next "higher" node), // one must decrement the iterator to obtain the next "lower" node, whose "value" slot is the count to apply // to the region represented by the range of site indices bounded by the keys of the two nodes (which includes // the site of interest). If there is no "lower" node, then the input site index corresponds to a site for // which no information has been stored, which can be interpreted either as an error condition or as a branch // count of zero. // // As the site-to-branch-count-maintenance operation proceeds, the algorithm watches for regions which abut // at a common node and have the same count value on both sides, combining them into a single region by deleting // the commom middle node. // // Data member "m_coalesced_sites" is simply a rangeset (set of pairs, each indicating a range of sites by index, // with the same convention of including START edge and excluding END edge). The union of the ranges denoted by // the pairs in the rangeset denotes the set of all sites which have come to Final Coalescence (in other words, // whose branch counts have decreased to or now are unity). This information is updated by the FC_Grid maintenance // algorithm when computing branch counts and cached in this data member, making the information easy to retrieve. class FC_Status { public: // Construct object representing number of live-site-carrying-branches, indexed by (sparse array of) site numbers. // Initial object stores coalescence count of zero for each site. // Prevent accidental conversion from integer to FC_Status object via "explicit". explicit FC_Status(); // Interface to the count-maintaining algorithm: Increase the count at a set of sites by ONE. void Increment_FC_Counts(const rangeset & sites_to_increment) { Adjust_FC_Counts(sites_to_increment, +1); } // Interface to the count-maintaining algorithm: Decrease the count at a set of sites by ONE. void Decrement_FC_Counts(const rangeset & sites_to_decrement) { Adjust_FC_Counts(sites_to_decrement, -1); } // Return set of ranges (a "rangeset") representing all sites that have coalesced. Once coalesced at a given // level, those sites are coalesced at all levels rootward; the rangeset represents cumulative coalescence // (this level and all rootward), not just sites that coalesce AT this level. Objects (rangesets) returned // are by-value copies and thus remain valid despite changes in the internal state of the FC_Status object cache // (as the "horserace" proceeds to other time intervals). rangeset Coalesced_Sites(); // The FC_Status object is allocated on the stack and is destructed when its containing variable goes out // of scope. The FC_Status object contains two STL containers (which manage their own memory) and an built-in // int as data members (ie, no dynamically-allocated objects or pointers) and therefore there is no need // for other than a default destructor. ~FC_Status() {} // Verification of final-coalescence algorithm: test for equality of two FC_Status objects holding data // computed different ways. friend bool operator==(const FC_Status & lhs, const FC_Status & rhs); friend bool operator!=(const FC_Status & lhs, const FC_Status & rhs); #if (LAMARC_QA_FCTEST_DEBUGLEVEL > 0) // For testing; remove in production version. void PrintFCStatus(bool print_output, const rangeset & sites_to_adjust, long int adjustment); #endif // JDEBUG: quick and dirty GDB Debugging functions. long GridSize() const; // m_fc_grid.size() void PrintGrid() const; // print the contents of m_fc_grid private: // Site index value for LOWER end of lowest range inserted so far; effectively, the BEGINNING of the FC_Grid. long int m_site_index_lower_limit; // Site index value for UPPER end of highest range inserted so far; effectively, the END of the FC_Grid. long int m_site_index_upper_limit; // This FC_Grid maps keys (representing site number ranges) to coalescence counts. The (included) lower endpoint // site index of the range is the key of the FC_Node object (a pair) stored in the FC_Grid (the container). // The range represented is from the node's site index (included) to the site index (excluded) stored in the // adjacent (next in upward direction) FC_Node in the grid. // // The lowest LOWER endpoint is represented by the LOWER range endpoint site index for the LOWEST range entered // so far. This site index value is remembered by the private data member "m_site_index_lower_limit". // // The highest UPPER endpoint is represented by the UPPER range endpoint site index for the HIGHEST range entered // so far. This site index value is remembered by the private data member "m_site_index_upper_limit". // FC_Grid m_fc_grid; // Cached representation of coalesced sites (those whose coalescence count has reached unity). // This result is updated by the increment/decrement member functions. rangeset m_coalesced_sites; // Privately-declared and undefined functions. FC_Status(FC_Status &); // Don't let clients copy this object. FC_Status & operator=(FC_Status &); // Don't let clients assign to this object. // Does all the work of Increment_FC_Counts() and Decrement_FC_Counts(). void Adjust_FC_Counts(const rangeset & sites_to_adjust, long int adjustment); }; //------------------------------------------------------------------------------------ #endif // FC_STATUS_H //____________________________________________________________________________________