1 // $Id: fc_status.h,v 1.13 2011/03/08 08:16:42 bobgian Exp $
2 
3 /*
4  * Copyright 2010 Mary Kuhner, Jon Yamato, Joseph Felsenstein, and Bob Giansiracusa
5  *
6  * This software is distributed free of charge for non-commercial use
7  * and is copyrighted.  Of course, we do not guarantee that the software
8  * works, and are not responsible for any damage you may cause or have.
9  */
10 
11 //------------------------------------------------------------------------------------
12 
13 #ifndef FC_STATUS_H
14 #define FC_STATUS_H
15 
16 #include <set>
17 #include <map>
18 
19 // Defines various symbols used to control debugging of experimental code blocks.
20 #include "local_build.h"
21 
22 #include "range.h"
23 
24 //------------------------------------------------------------------------------------
25 
26 // An object of this type (FC_Grid) maps site range-starting-indices to coalescence branch-counts in the FC_Status
27 // object.  The FC_Grid data member is a map populated by objects of type FC_Node, each a pair whose KEY
28 // (first of the pair) represents the (included) beginning startpoint of a range of site index values whose
29 // (excluded) endpoint is given by the starting site index of the next range (in the increasing-index direction).
30 //
31 // Each FC_Node object in the FC_Grid consists of a pair containing two values: a "key" (site index value) and a
32 // "value"  (the count of branches at current tree level carrying the corresponding site "live" to some non-empty
33 // set of tips).
34 //
35 // The term "site index" refers to the index number identifying a site, which value is used as the "key" component
36 // of an FC_Node.  The term "branch count" refer to the number of branches crossing the current "level" in the tree
37 // which are carrying the set of sites live to the tips, which value is used as the "value" component of an FC_Node.
38 
39 // Type of the container object comprising the site-index -> branch-count map:
40 typedef   std::map<long int, long int, std::less<long int> >   FC_Grid;
41 
42 // Type of objects ("nodes") inserted into FC_Grid map.
43 // FIRST (key):    Site index (included starting site number for current range,
44 //                 also serves as excluded site number for "previous" range).
45 // SECOND (value): Live site branch count (number of branches carrying the sites represented by the current range).
46 //                 ALL sites in this range are indicated as being carried "live" by this value as a count of the
47 //                 number of branches carrying these sites live to at least one tip.  The branch count is the number
48 //                 of branches that cross a surface cutting the tree at a given level (this "level" is defined by the
49 //                 "time interval" or phase of the "horserace" portion of the tree-rearrangement algorithm; it is
50 //                 NOT the total number of branches in the tree which carry these sites "live" to some tips.
51 //
52 typedef   std::pair<long int, long int>   FC_Node;
53 
54 //------------------------------------------------------------------------------------
55 
56 // A class to support maintenance of information on which sites have attained Final Coalescence.  Given a site number,
57 // the functions of this class maintain counts of how many branches at a given "level" are "passing" sites through
58 // to the tips.  "Final Coalescence" is attained when that count (for a given site or range of sites) decreases to
59 // unity.  The class object contains a cached representation (a "rangeset") of such sites.  This cache is simply the
60 // set of all ranges whose count value is currently unity, and it is updated whenever the count computation results
61 // in a change in count.
62 //
63 // Note that the "coalesced sites rangeset cache" must be updated whenever branch counts for sites change, both on
64 // increment and on decrement.  Decrement makes sense: when the number of branches carrying a site decreases to one,
65 // that is a "final coalescence".  However, it is possible that the count may increase from zero to one and then never
66 // change again.  If such an increment were not tallied, those sites (which have already coalesced in the tree being
67 // sampled at that level) would be missed altogether.
68 
69 // Operation of FC_Status object -- the object contains two sub-objects:
70 //
71 //   Data member "m_fc_grid" is an FC_Grid object, which is a map containing nodes, each of whose key is a site index
72 //   (an integer representing the site index at which a branch-count changes) and whose "value" is a count of the
73 //   number of branches carrying this site "live" (visible at some set of tips) for the region STARTING (inclusive)
74 //   at the associated key index value and ENDING (exclusive) at the next higher (in the map's ordering) node's key
75 //   index value.  To determine the branch count for an arbitrary site, one can invoke the lower_bound() map member
76 //   function and interpret the resulting iterator:
77 //
78 //     - If the iterator equals end(), then there are no node(s) with keys equal to or greater than the given site
79 //       index.  Either there are no nodes at all in the FC_Grid, or the branch count is that of the value component
80 //       of the first node in the DECREASING direction.  It is an error for the value component of the HIGHEST node
81 //       in the map to be anything other than zero.  Such a lookup can be interpreted (if the assumption is that such
82 //       a site in in a range which HAS been entered into the map) as an error condition or as an indication that
83 //       information for a range containing the site in question has not yet been entered.
84 //
85 //     - If the iterator points to a node whose key matches the input search site index, then associated branch
86 //       count is the value component of the corresponding node.
87 //
88 //     - If there is no node with this key (so that lower_bound() returns an iterator to the next "higher" node),
89 //       one must decrement the iterator to obtain the next "lower" node, whose "value" slot is the count to apply
90 //       to the region represented by the range of site indices bounded by the keys of the two nodes (which includes
91 //       the site of interest).  If there is no "lower" node, then the input site index corresponds to a site for
92 //       which no information has been stored, which can be interpreted either as an error condition or as a branch
93 //       count of zero.
94 //
95 //   As the site-to-branch-count-maintenance operation proceeds, the algorithm watches for regions which abut
96 //   at a common node and have the same count value on both sides, combining them into a single region by deleting
97 //   the commom middle node.
98 //
99 //   Data member "m_coalesced_sites" is simply a rangeset (set of pairs, each indicating a range of sites by index,
100 //   with the same convention of including START edge and excluding END edge).  The union of the ranges denoted by
101 //   the pairs in the rangeset denotes the set of all sites which have come to Final Coalescence (in other words,
102 //   whose branch counts have decreased to or now are unity).  This information is updated by the FC_Grid maintenance
103 //   algorithm when computing branch counts and cached in this data member, making the information easy to retrieve.
104 
105 class FC_Status
106 {
107   public:
108 
109     // Construct object representing number of live-site-carrying-branches, indexed by (sparse array of) site numbers.
110     // Initial object stores coalescence count of zero for each site.
111     // Prevent accidental conversion from integer to FC_Status object via "explicit".
112     explicit FC_Status();
113 
114     // Interface to the count-maintaining algorithm: Increase the count at a set of sites by ONE.
Increment_FC_Counts(const rangeset & sites_to_increment)115     void Increment_FC_Counts(const rangeset & sites_to_increment)
116     {
117         Adjust_FC_Counts(sites_to_increment, +1);
118     }
119 
120     // Interface to the count-maintaining algorithm: Decrease the count at a set of sites by ONE.
Decrement_FC_Counts(const rangeset & sites_to_decrement)121     void Decrement_FC_Counts(const rangeset & sites_to_decrement)
122     {
123         Adjust_FC_Counts(sites_to_decrement, -1);
124     }
125 
126     // Return set of ranges (a "rangeset") representing all sites that have coalesced.  Once coalesced at a given
127     // level, those sites are coalesced at all levels rootward; the rangeset represents cumulative coalescence
128     // (this level and all rootward), not just sites that coalesce AT this level.   Objects (rangesets) returned
129     // are by-value copies and thus remain valid despite changes in the internal state of the FC_Status object cache
130     // (as the "horserace" proceeds to other time intervals).
131     rangeset Coalesced_Sites();
132 
133     // The FC_Status object is allocated on the stack and is destructed when its containing variable goes out
134     // of scope.  The FC_Status object contains two STL containers (which manage their own memory) and an built-in
135     // int as data members (ie, no dynamically-allocated objects or pointers) and therefore there is no need
136     // for other than a default destructor.
~FC_Status()137     ~FC_Status() {}
138 
139     // Verification of final-coalescence algorithm: test for equality of two FC_Status objects holding data
140     // computed different ways.
141     friend bool operator==(const FC_Status & lhs, const FC_Status & rhs);
142     friend bool operator!=(const FC_Status & lhs, const FC_Status & rhs);
143 
144 #if (LAMARC_QA_FCTEST_DEBUGLEVEL > 0)   // For testing; remove in production version.
145     void PrintFCStatus(bool print_output, const rangeset & sites_to_adjust, long int adjustment);
146 #endif
147 
148     // JDEBUG: quick and dirty GDB Debugging functions.
149     long GridSize() const;              // m_fc_grid.size()
150     void PrintGrid() const;             // print the contents of m_fc_grid
151 
152   private:
153 
154     // Site index value for LOWER end of lowest range inserted so far; effectively, the BEGINNING of the FC_Grid.
155     long int m_site_index_lower_limit;
156 
157     // Site index value for UPPER end of highest range inserted so far; effectively, the END of the FC_Grid.
158     long int m_site_index_upper_limit;
159 
160     // This FC_Grid maps keys (representing site number ranges) to coalescence counts.  The (included) lower endpoint
161     // site index of the range is the key of the FC_Node object (a pair) stored in the FC_Grid (the container).
162     // The range represented is from the node's site index (included) to the site index (excluded) stored in the
163     // adjacent (next in upward direction) FC_Node in the grid.
164     //
165     // The lowest LOWER endpoint is represented by the LOWER range endpoint site index for the LOWEST range entered
166     // so far.  This site index value is remembered by the private data member "m_site_index_lower_limit".
167     //
168     // The highest UPPER endpoint is represented by the UPPER range endpoint site index for the HIGHEST range entered
169     // so far.  This site index value is remembered by the private data member "m_site_index_upper_limit".
170     //
171     FC_Grid m_fc_grid;
172 
173     // Cached representation of coalesced sites (those whose coalescence count has reached unity).
174     // This result is updated by the increment/decrement member functions.
175     rangeset m_coalesced_sites;
176 
177     // Privately-declared and undefined functions.
178     FC_Status(FC_Status &);             // Don't let clients copy this object.
179     FC_Status & operator=(FC_Status &); // Don't let clients assign to this object.
180 
181     // Does all the work of Increment_FC_Counts() and Decrement_FC_Counts().
182     void Adjust_FC_Counts(const rangeset & sites_to_adjust, long int adjustment);
183 };
184 
185 //------------------------------------------------------------------------------------
186 
187 #endif // FC_STATUS_H
188 
189 //____________________________________________________________________________________
190