1 // $Id: fc_status.cpp,v 1.14 2011/03/08 08:16:42 bobgian Exp $
2
3 /*
4 * Copyright 2010 Mary Kuhner, Jon Yamato, Joseph Felsenstein, and Bob Giansiracusa
5 *
6 * This software is distributed free of charge for non-commercial use
7 * and is copyrighted. Of course, we do not guarantee that the software
8 * works, and are not responsible for any damage you may cause or have.
9 */
10
11 //------------------------------------------------------------------------------------
12
13 #include <set>
14 #include <map>
15
16 // Defines various symbols used to control debugging of experimental code blocks.
17 #include "local_build.h"
18
19 #include "fc_status.h"
20 #include "range.h"
21
22 //------------------------------------------------------------------------------------
23
24 using namespace std;
25
26 //------------------------------------------------------------------------------------
27 // Construct object representing live-site-branch-counts for arbitrary set of sites.
28 // Site indices are arbitrary; site indices (keys in map) are constructed as entries are made via update functions.
29 // Initial object is empty.
30
FC_Status()31 FC_Status::FC_Status()
32 : m_site_index_lower_limit(MAXLONG), // LOWER end of lowest range inserted so far; the BEGINNING of the FC_Grid.
33 m_site_index_upper_limit(0L) // UPPER end of highest range inserted so far; the END of the FC_Grid.
34 {
35 // Member m_fc_grid is an empty map, representing a live-site-branch-count of zero for all sites.
36 // Member m_coalesced_sites contains an empty rangeset at object construction,
37 // representing an empty set of final coalesced sites.
38 }
39
40 //------------------------------------------------------------------------------------
41 // Return set of ranges representing all sites that have coalesced, where "coalesced" means "have branch count one",
42 // whether the count was higher before and has decremented to one in "true coalescence" or the count just started off
43 // at zero and has now incremented to one.
44 //
45 // Objects (rangeset) returned are by-value copies and thus remain constant despite changes in internal state
46 // of FC_Status object (as horserace proceeds to other time intervals). Client owns copied return-value objects.
47
Coalesced_Sites()48 rangeset FC_Status::Coalesced_Sites()
49 {
50 // This function is called often and simply returns a value pre-computed and cached by Adjust_FC_Counts().
51 return m_coalesced_sites;
52 }
53
54 //------------------------------------------------------------------------------------
55 // Verification of final-coalescence algorithm: test for equality of two FC_Status objects computed different ways.
56 // Simply checks that both internal subobjects are equal (equality defined by the respective container classes).
57
operator ==(const FC_Status & fc_counter_lhs,const FC_Status & fc_counter_rhs)58 bool operator==(const FC_Status & fc_counter_lhs, const FC_Status & fc_counter_rhs)
59 {
60 return ((fc_counter_lhs.m_fc_grid == fc_counter_rhs.m_fc_grid)
61 &&
62 (fc_counter_lhs.m_coalesced_sites == fc_counter_rhs.m_coalesced_sites));
63 }
64
65 //----------------------------------------
66 // Both operators are provided for completeness.
67
operator !=(const FC_Status & fc_counter_lhs,const FC_Status & fc_counter_rhs)68 inline bool operator!=(const FC_Status & fc_counter_lhs, const FC_Status & fc_counter_rhs)
69 {
70 return !(fc_counter_lhs == fc_counter_rhs);
71 }
72
73 //------------------------------------------------------------------------------------
74 // Increment_FC_Counts() and Decrement_FC_Counts() are defined inline in class declaration.
75 // They both call this function, which does all the work.
76
Adjust_FC_Counts(const rangeset & sites_to_adjust,long int adjustment)77 void FC_Status::Adjust_FC_Counts(const rangeset & sites_to_adjust, long int adjustment)
78 {
79 // Even though "adjustment" can only be +1 or -1, all the operands it is added to are long ints,
80 // so it will be coerced to long anyway. Might as well start out that way.
81
82 // Used mostly for debugging and error-checking. Once fully debugged, may remove this.
83 int iteration(0);
84
85 // "Current range" is the range that iterator "range_iter" is pointing at for current iteration.
86 for(rangeset::const_iterator range_iter(sites_to_adjust.begin()) ;
87 range_iter != sites_to_adjust.end() ;
88 ++range_iter)
89 {
90 // This loop inserts new nodes if there is none pre-existing at the site index marking the LOWER or UPPER
91 // edge of a range (one of the ranges in the "sites_to_adjust" set). A new node always indicates a change
92 // in the associated branch count. However, it is possible that one endpoint (perhaps both) of an input
93 // range might coincide with pre-existing node in the FC_Grid (if the endpoint index happens to be a value
94 // at which the count stored internally changes). In such a "coincident edge endpoint" case, the
95 // pre-existing node can simply be updated to reflect the new count for the region it represents (upward).
96
97 const long int range_lower_index(range_iter->first);
98 const long int range_upper_index(range_iter->second);
99
100 #if (LAMARC_QA_FCTEST_DEBUGLEVEL >= 10)
101 cerr << "Adjust_FC_Counts[1]: Start new iteration. Iteration: " << ++iteration << endl
102 << "Current range: ( " << range_lower_index << ' ' << range_upper_index << " )" << endl;
103 PrintFCStatus(false, sites_to_adjust, adjustment);
104 cerr << endl;
105 #endif
106
107 if(range_lower_index < range_upper_index) // Test for empty current range. Shouldn't happen ...
108 {
109 // Iterator "lower_edge" starts pointing to first node with key equal to or greater than current range
110 // LOWER index or equal to m_fc_grid.end() if no nodes have yet been inserted into the FC_Grid above the
111 // current range (because this is the first being inserted or just the highest so far being inserted).
112 FC_Grid::iterator lower_edge(m_fc_grid.lower_bound(range_lower_index));
113
114 // No nodes have yet been inserted into FC_Grid with keys HIGHER than (or EQUAL to) the LOWER end
115 // of the current range (this range is either the first one inserted or the highest one inserted so far).
116 // Insert one with branch count zero (will be adjusted later).
117 if(lower_edge == m_fc_grid.end())
118 {
119 // "Hint" form of insert(), using END iterator. Now "lower_edge" points to the node just inserted.
120 lower_edge = m_fc_grid.insert(lower_edge, pair<const long int, long int>(range_lower_index, 0L));
121
122 #if (LAMARC_QA_FCTEST_DEBUGLEVEL >= 7)
123 cerr << "Adjust_FC_Counts[2]: Inserted new node, LOWER edge. Iteration: "
124 << iteration << endl
125 << "Current range: ( "
126 << range_lower_index << ' ' << range_upper_index << " )" << endl;
127 PrintFCStatus(false, sites_to_adjust, adjustment);
128 cerr << endl;
129 #endif
130
131 if(range_lower_index < m_site_index_lower_limit)
132 {
133 m_site_index_lower_limit = range_lower_index;
134
135 #if (LAMARC_QA_FCTEST_DEBUGLEVEL >= 8)
136 cerr << "Adjust_FC_Counts[3]: Updated FC_Grid's LOWER range edge limit. Iteration: "
137 << iteration << endl
138 << "Current range: ( "
139 << range_lower_index << ' ' << range_upper_index << " )" << endl;
140 PrintFCStatus(false, sites_to_adjust, adjustment);
141 cerr << endl;
142 #endif
143 }
144 }
145
146 // Find node marking LOWER end of current range. If it exists, denote it with iterator "lower_edge"
147 // and leave its pre-adjustment count in place (to be updated in traversal of current range later).
148 else if(lower_edge->first == range_lower_index)
149 {
150 // Iterator "lower_edge" points to pre-existing node marking current range LOWER end.
151 // This marks the BEGINNING of the upcoming traversal (the first node that WILL be adjusted).
152
153 #if (LAMARC_QA_FCTEST_DEBUGLEVEL >= 5)
154 cerr << "Adjust_FC_Counts[4]: Will adjust node, LOWER edge. Iteration: "
155 << iteration << endl
156 << "Current range: ( "
157 << range_lower_index << ' ' << range_upper_index << " )" << endl;
158 PrintFCStatus(false, sites_to_adjust, adjustment);
159 cerr << endl;
160 #endif
161 }
162
163 // If no existing node was found with key searched for above (ie, LOWER end of current range is between
164 // two existing nodes in FC_Grid), find first node going lower from the one pointed at by iterator
165 // "lower_edge)" (which is the first existing node with key going UP from current range LOWER end),
166 // extract its count, and insert new node marking actual LOWER end of current range (using just-extracted
167 // pre-adjustment branch count). If NO node exists with a key below the value being searched for, DON'T
168 // decrement the iterator and DO use zero as the branch count.
169 else
170 {
171 long int branch_count = 0L; // Default branch count if no node exists "below" insert location.
172
173 // Don't attempt to point iterator "below" the LOWEST node in the entire FC_Grid.
174 if(range_lower_index > m_site_index_lower_limit)
175 {
176 // If a node "below" LOWER end of current range exists, read branch count and reset iterator.
177 branch_count = (--lower_edge)->second;
178 ++lower_edge; // Reset iterator to value outside this conditional.
179 }
180
181 // "Hint" form of insert() used here, with "hint" being original position returned by lower_bound().
182 // Now "lower_edge" points to the node just inserted.
183 lower_edge = m_fc_grid.insert(lower_edge, pair<const long, long>(range_lower_index, branch_count));
184
185 #if (LAMARC_QA_FCTEST_DEBUGLEVEL >= 6)
186 cerr << "Adjust_FC_Counts[5]: Inserted new node, LOWER edge. Iteration: " << iteration << endl
187 << "Current range: ( " << range_lower_index << ' '
188 << range_upper_index << " )" << endl;
189 PrintFCStatus(false, sites_to_adjust, adjustment);
190 cerr << endl;
191 #endif
192
193 if(range_lower_index < m_site_index_lower_limit)
194 {
195 m_site_index_lower_limit = range_lower_index;
196
197 #if (LAMARC_QA_FCTEST_DEBUGLEVEL >= 8)
198 cerr << "Adjust_FC_Counts[6]: Updated FC_Grid's LOWER range edge limit. Iteration: "
199 << iteration << endl
200 << "Current range: ( "
201 << range_lower_index << ' ' << range_upper_index << " )" << endl;
202 PrintFCStatus(false, sites_to_adjust, adjustment);
203 cerr << endl;
204 #endif
205 }
206 }
207 // Now iterator "lower_edge" points to node (not yet adjusted) at included LOWER end of current range.
208
209 // Iterator "upper_edge" starts pointing to first node with key equal to or greater than current range
210 // UPPER index or equal to m_fc_grid.end() if no nodes have yet been inserted into the FC_Grid above the
211 // current range (because this is first range inserted or just highest range so far inserted).
212 FC_Grid::iterator upper_edge(m_fc_grid.lower_bound(range_upper_index));
213
214 // No nodes have been inserted into FC_Grid with keys HIGHER than (or EQUAL to) the UPPER end of current
215 // range (this range is either first inserted or highest inserted so far). Insert one with branch count
216 // zero (to be adjusted later).
217 if(upper_edge == m_fc_grid.end())
218 {
219 // "Hint" form of insert(), with "hint" being END iterator. Now "upper_edge" points to node inserted.
220 upper_edge = m_fc_grid.insert(upper_edge, pair<const long int, long int>(range_upper_index, 0L));
221
222 #if (LAMARC_QA_FCTEST_DEBUGLEVEL >= 7)
223 cerr << "Adjust_FC_Counts[7]: Inserted new node, UPPER edge. Iteration: " << iteration << endl
224 << "Current range: ( " << range_lower_index
225 << ' ' << range_upper_index << " )" << endl;
226 PrintFCStatus(false, sites_to_adjust, adjustment);
227 cerr << endl;
228 #endif
229
230 if(range_upper_index > m_site_index_upper_limit)
231 {
232 m_site_index_upper_limit = range_upper_index;
233
234 #if (LAMARC_QA_FCTEST_DEBUGLEVEL >= 8)
235 cerr << "Adjust_FC_Counts[8]: Updated FC_Grid's UPPER range edge limit. Iteration: "
236 << iteration << endl << "Current range: ( "
237 << range_lower_index << ' ' << range_upper_index << " )" << endl;
238 PrintFCStatus(false, sites_to_adjust, adjustment);
239 cerr << endl;
240 #endif
241 }
242 }
243
244 // Find node marking UPPER end of current range. If it exists, denote it with iterator "upper_edge"
245 // and leave its pre-adjustment count in place (to be updated in traversal of current range later).
246 else if(upper_edge->first == range_upper_index)
247 {
248 // Iterator "upper_edge" points to pre-existing node marking current range UPPER end.
249 // This marks the END of the upcoming traversal (the first node that will NOT be adjusted).
250
251 #if (LAMARC_QA_FCTEST_DEBUGLEVEL >= 5)
252 cerr << "Adjust_FC_Counts[9]: Will adjust node, UPPER edge. Iteration: "
253 << iteration << endl << "Current range: ( "
254 << range_lower_index << ' ' << range_upper_index << " )" << endl;
255 PrintFCStatus(false, sites_to_adjust, adjustment);
256 cerr << endl;
257 #endif
258 }
259
260 // If no existing node was found with key searched for above (ie, UPPER end of current range is between
261 // two existing nodes in FC_Grid), find first node going lower from the one pointed at by iterator
262 // "upper_edge)" (which is the first existing node with key going DOWN from current range UPPER end),
263 // extract its count, and insert new node marking actual UPPER end of current range (using just-extracted
264 // pre-adjustment branch count).
265 //
266 // If NO node exists with a key below the value being searched for,
267 // DON'T decrement the iterator and DO use zero as the branch count.
268 else
269 {
270 long int branch_count = 0L; // Branch count if no node exists below current insert location.
271
272 // Don't attempt to point iterator "below" the LOWEST node in the entire FC_Grid.
273 if(range_upper_index > m_site_index_lower_limit)
274 {
275 // If a node "below" UPPER end of current range exists, read branch count and reset iterator.
276 branch_count = (--upper_edge)->second;
277 ++upper_edge; // Reset iterator to value outside this conditional.
278 }
279
280 // "Hint" form of insert(), with "hint" being original position returned by lower_bound() above.
281 // Now "upper_edge" points to the node just inserted.
282 upper_edge = m_fc_grid.insert(upper_edge, pair<const long, long>(range_upper_index, branch_count));
283
284 #if (LAMARC_QA_FCTEST_DEBUGLEVEL >= 6)
285 cerr << "Adjust_FC_Counts[10]: Inserted new node, UPPER edge. Iteration: "
286 << iteration << endl << "Current range: ( " << range_lower_index << ' '
287 << range_upper_index << " )" << endl;
288 PrintFCStatus(false, sites_to_adjust, adjustment);
289 cerr << endl;
290 #endif
291
292 if(range_upper_index > m_site_index_upper_limit)
293 {
294 m_site_index_upper_limit = range_upper_index;
295
296 #if (LAMARC_QA_FCTEST_DEBUGLEVEL >= 8)
297 cerr << "Adjust_FC_Counts[11]: Updated FC_Grid's UPPER range edge limit. Iteration: "
298 << iteration << endl << "Current range: ( "
299 << range_lower_index << ' ' << range_upper_index << " )" << endl;
300 PrintFCStatus(false, sites_to_adjust, adjustment);
301 cerr << endl;
302 #endif
303 }
304 }
305 // Now iterator "upper_edge" points to node (not yet adjusted) at included UPPER end of current range
306 // (whether pre-existing or just inserted).
307 //
308 // Now we can traverse all nodes within the current range, starting with the node marking the LOWER edge
309 // (inclusive - will be adjusted) and continuing to (but excluded - will not be adjusted) the node
310 // marking UPPER edge of current range.
311 for(FC_Grid::iterator node_iter(lower_edge); node_iter != upper_edge; ++node_iter)
312 {
313 node_iter->second += adjustment;
314 #if (LAMARC_QA_FCTEST_DEBUGLEVEL >= 10)
315 cerr << "Adjust_FC_Counts[12]: During traversal, per node, after adjustment: Node key: "
316 << node_iter->first << ", Node value: " << node_iter->second << endl << endl;
317 #endif
318 }
319
320 #if (LAMARC_QA_FCTEST_DEBUGLEVEL >= 9)
321 cerr << "Adjust_FC_Counts[13]: After traversal and node adjustments. Iteration: " << iteration << endl
322 << "Current range: ( " << range_lower_index << ' ' << range_upper_index << " )" << endl;
323 PrintFCStatus(false, sites_to_adjust, adjustment);
324 cerr << endl;
325 #endif
326 }
327 else
328 {
329 // ERROR: Empty or malformed range presented. Here now for debugging purposes.
330 cerr << "Adjust_FC_Counts[14]: Empty or malformed range presented. Iteration: " << iteration << endl;
331 #if (LAMARC_QA_FCTEST_DEBUGLEVEL >= 1)
332 PrintFCStatus(true, sites_to_adjust, adjustment);
333 #endif
334 cerr << endl << "Emergency exit." << endl;
335 exit(1);
336 }
337 }
338
339 // Now re-compute and cache the "coalesced sites", which is simple a rangeset of all sites whose branch count
340 // is now one. Note that a site may have a count of one either via coalescence (decrement of count to one from a
341 // higher value) or simply because the count is now one (initialized to zero by constructor, incremented
342 // once, and never changed again). Thus we must recompute the cache after a range's incremented branch count
343 // (first increment, in case no more occur) as well as after a decremented branch count (a "true" coalescence).
344 //
345 // Hopefully soon this section will contain a beautiful functional "STL algorithm-style" method for updating a
346 // data-structure. For now, we simply start from scratch each time and build a set holding the appropriate data.
347 // For now, since we are scanning the entire FC_Grid here in order to construct the cached rangeset each time,
348 // we also do a bunch of consistency tests which can be eliminated once this code passes its startup sanity tests.
349
350 // Clear it and start from scratch each time. Later we will update rather than rebuild this object.
351 m_coalesced_sites.clear();
352
353 FC_Grid::iterator fc_grid_limit(m_fc_grid.end());
354 long int previous_site_index = 0L;
355 long int previous_branch_count = 0L;
356 long int current_site_index = 0L;
357 long int current_branch_count = 0L;
358
359 iteration = 0; // Reset iteration variable to count iters through a different loop.
360
361 #if (LAMARC_QA_FCTEST_DEBUGLEVEL >= 9)
362 cerr << "Adjust_FC_Counts[15]: Beginning traversal on computing cached coalescences. Previous: ("
363 << previous_site_index << ' ' << previous_branch_count << "), Current: ("
364 << current_site_index << ' ' << current_branch_count << ')' << endl;
365 PrintFCStatus(true, sites_to_adjust, adjustment);
366 cerr << endl;
367 #endif
368
369 for(FC_Grid::iterator node_iter( m_fc_grid.begin()) ; node_iter != fc_grid_limit ; /* increment inside loop */ )
370 {
371 current_site_index = node_iter->first;
372 current_branch_count = node_iter->second;
373
374 #if (LAMARC_QA_FCTEST_DEBUGLEVEL >= 10)
375 cerr << "Adjust_FC_Counts[16]: Traversal iteration on computing cached coalescences. Iteration: "
376 << ++iteration << " Previous: (" << previous_site_index << ' ' << previous_branch_count
377 << "), Current: (" << current_site_index << ' ' << current_branch_count << ')' << endl;
378 PrintFCStatus(true, sites_to_adjust, adjustment);
379 cerr << endl;
380 #endif
381
382 if(current_branch_count < 0L)
383 {
384 cerr << "Adjust_FC_Counts[17]: Negative branch count on computing cached coalescences. Iteration: "
385 << iteration << " Previous: (" << previous_site_index << ' ' << previous_branch_count
386 << "), Current: (" << current_site_index << ' ' << current_branch_count << ')' << endl;
387 #if (LAMARC_QA_FCTEST_DEBUGLEVEL >= 1)
388 PrintFCStatus(true, sites_to_adjust, adjustment);
389 #endif
390 cerr << endl << "Emergency exit." << endl;
391 exit(1);
392 }
393
394 if(previous_branch_count == current_branch_count)
395 {
396 // If two adjacent ranges meet in a node and have the same (post-adjustment) branch counts, we can merge
397 // those regions by deleting the node marking their junction. It is also OK to delete the LOWEST and/or
398 // HIGHEST node(s) if their branch count values are zero (on the LOWER end)
399 // or if they match that to the left (on the UPPER end).
400
401 FC_Grid::iterator delete_me(node_iter);
402 ++node_iter; // Increment iterator before deleting node; then continue traversal.
403 m_fc_grid.erase(delete_me);
404
405 #if (LAMARC_QA_FCTEST_DEBUGLEVEL >= 3)
406 cerr << "Adjust_FC_Counts[18]: After region merge, erased node at LOWER edge. Iteration: "
407 << iteration << endl << "Inferior range: (" << previous_site_index << ' '
408 << current_site_index << ") Branch count: " << current_branch_count << endl;
409 PrintFCStatus(true, sites_to_adjust, adjustment);
410 cerr << endl;
411 #endif
412 }
413 else
414 {
415 // Regions abutting at this node have different branch counts. If LOWER region (to left of current node)
416 // has count of 1, then it represents a final coalescence (or a count climbing up from zero). Insert that
417 // region into the cache. Otherwise, keep looking.
418 if(previous_branch_count == 1L)
419 {
420 // "Hinted" insertion location is always just preceding the END (UPPER end of entire FC_Grid) iterator
421 // position, because we are always doing a "push_back" insertion.
422 m_coalesced_sites.insert(m_coalesced_sites.end(),
423 pair<const long, long>(previous_site_index, current_site_index));
424
425 #if (LAMARC_QA_FCTEST_DEBUGLEVEL >= 2)
426 cerr << "Adjust_FC_Counts[19]: Inserted range pair " << previous_site_index << ':'
427 << (current_site_index - 1) << " on computing cached coalescences. Iteration: "
428 << iteration << " Previous: (" << previous_site_index << ' ' << previous_branch_count
429 << "), Current: (" << current_site_index << ' ' << current_branch_count << ')' << endl;
430 PrintFCStatus(true, sites_to_adjust, adjustment);
431 cerr << endl;
432 #endif
433 }
434
435 ++node_iter; // Increment inside loop since we already incremented inside other branch of conditional.
436
437 previous_site_index = current_site_index;
438 previous_branch_count = current_branch_count;
439 }
440 }
441 }
442
443 //------------------------------------------------------------------------------------
444 // JDEBUG: quick and dirty GDB Debugging functions.
445
GridSize() const446 long int FC_Status::GridSize() const
447 {
448 return m_fc_grid.size();
449 }
450
PrintGrid() const451 void FC_Status::PrintGrid() const
452 {
453 for(FC_Grid::const_iterator node_iter = m_fc_grid.begin(); node_iter != m_fc_grid.end(); ++node_iter)
454 {
455 cerr << node_iter->first << ":" << node_iter->second << ",";
456 }
457
458 cout << endl;
459 }
460
461 //------------------------------------------------------------------------------------
462 // Testing and debugging; remove in production version.
463
464 #if (LAMARC_QA_FCTEST_DEBUGLEVEL > 0)
465
PrintFCStatus(bool print_output,const rangeset & sites_to_adjust,long int adjustment)466 void FC_Status::PrintFCStatus(bool print_output, const rangeset & sites_to_adjust, long int adjustment)
467 {
468 cerr << "Input rangeset: " << ToString(sites_to_adjust) << " <==> ( ";
469
470 for(rangeset::const_iterator range_iter(sites_to_adjust.begin()) ;
471 range_iter != sites_to_adjust.end() ;
472 ++range_iter)
473 {
474 cerr << '(' << range_iter->first << ' ' << range_iter->second << ") ";
475 }
476
477 cerr << ')' << endl
478 << "Adjustment: " << adjustment << endl;
479
480 cerr << "site_index_limits: " << m_site_index_lower_limit << " (lower), "
481 << m_site_index_upper_limit << " (upper)" << endl;
482
483 cerr << "m_fc_grid: ( ";
484 for(FC_Grid::const_iterator node_iter(m_fc_grid.begin()); node_iter != m_fc_grid.end(); ++node_iter)
485 {
486 cerr << '(' << node_iter->first << ' ' << node_iter->second << ") ";
487 }
488 cerr << ')' << endl;
489
490 if(print_output)
491 {
492 cerr << "m_coalesced_sites: ( ";
493 for(rangeset::const_iterator range_iter(m_coalesced_sites.begin()) ;
494 range_iter != m_coalesced_sites.end() ;
495 ++range_iter)
496 {
497 cerr << '(' << range_iter->first << ' ' << range_iter->second << ") ";
498 }
499 cerr << ')' << endl;
500 }
501 }
502
503 #endif
504
505 //____________________________________________________________________________________
506