1 // $Id: rangex.cpp,v 1.23 2011/12/29 23:44:19 ewalkup Exp $
2
3 /*
4 Copyright 2002 Peter Beerli, Mary Kuhner, Jon Yamato and Joseph Felsenstein
5
6 This software is distributed free of charge for non-commercial use
7 and is copyrighted. Of course, we do not guarantee that the software
8 works, and are not responsible for any damage you may cause or have.
9 */
10
11 #include <cassert>
12
13 #include "local_build.h"
14
15 #include "rangex.h"
16 #include "stringx.h"
17 #include "registry.h"
18
19 using std::string;
20 using std::make_pair;
21
22 //------------------------------------------------------------------------------------
23
ToString(rangepair rpair)24 string ToString(rangepair rpair)
25 {
26 //Note! This prints stuff out in 'user units' instead of 'internal units'.
27 // In other words, if the range is <0,34>, this is an 'open-ended' interval
28 // and the actual active sites are 0:33
29 //
30 //Furthermore, if the user is expecting to have no zeroes in the output, we
31 // need to convert the 0 to -1, and display "-1:33".
32 rpair.second--;
33 rpair = ToNoZeroesIfNeeded(rpair);
34 string retval = ToString(rpair.first);
35 if (rpair.second > rpair.first)
36 {
37 retval += ":" + ToString(rpair.second);
38 }
39 return retval;
40 }
41
42 //------------------------------------------------------------------------------------
43
ToString(rangeset rset)44 string ToString(rangeset rset)
45 {
46 rangeset::iterator rpair = rset.begin();
47 if (rpair == rset.end())
48 {
49 return "(none)";
50 }
51 string retval = ToString(*rpair);
52 rpair++;
53 for ( ; rpair != rset.end(); rpair++)
54 {
55 retval += ", " + ToString(*rpair);
56 }
57 return retval;
58 }
59
60 //------------------------------------------------------------------------------------
61 // Returns a rangeset as a set containing a single pair, that defined by its two arguments.
62
MakeRangeset(long int low,long int high)63 rangeset MakeRangeset(long int low, long int high)
64 {
65 rangeset retset;
66 retset.insert(make_pair(low, high));
67
68 return retset;
69 }
70
71 //------------------------------------------------------------------------------------
72 // Returns a rangeset as the set union of the first arg (as a rangepair) plus the second arg (as a rangeset).
73
AddPairToRange(const rangepair & addpart,const rangeset & rset)74 rangeset AddPairToRange(const rangepair& addpart, const rangeset& rset)
75 {
76 rangeset retset = rset;
77
78 rangepair low = make_pair(addpart.first, addpart.first);
79 rangepair high = make_pair(addpart.second, addpart.second);
80
81 long int newlow = low.first;
82 long int newhigh = high.second;
83
84 rangesetiter early = retset.lower_bound(low);
85 if (early != retset.begin())
86 {
87 early--;
88 if (early->second >= low.first)
89 {
90 //'low' falls within early's interval
91 newlow = early->first;
92 }
93 }
94
95 rangesetiter late = retset.upper_bound(high);
96 //We need to increment this late.first == high.first
97 if (late != retset.end())
98 {
99 if (late->first == high.first)
100 {
101 late++;
102 }
103 }
104 if (late != retset.begin())
105 {
106 late--;
107 if (late->second > high.first)
108 {
109 //'high' falls within late's interval
110 newhigh = late->second;
111 }
112 }
113
114 early = retset.lower_bound(make_pair(newlow, newlow+1));
115 late = retset.upper_bound(make_pair(newhigh-1, newhigh));
116
117 retset.erase(early, late);
118 retset.insert(make_pair(newlow, newhigh));
119
120 return retset;
121 }
122
123 //------------------------------------------------------------------------------------
124 // Returns a rangeset as the set difference of the second arg (as a rangeset) minus the first arg (as a rangepair).
125
RemovePairFromRange(const rangepair & removepart,const rangeset & rset)126 rangeset RemovePairFromRange(const rangepair& removepart, const rangeset& rset)
127 {
128 rangeset retset;
129 for (rangeset::iterator range=rset.begin(); range != rset.end(); range++)
130 {
131 if ((range->first >= removepart.first) && (range->second <= removepart.second))
132 {
133 //Don't add it.
134 }
135 else
136 {
137 if ((range->first < removepart.first) && (range->second > removepart.second))
138 {
139 //Add two outside halves.
140 retset.insert(make_pair(range->first, removepart.first));
141 retset.insert(make_pair(removepart.second, range->second));
142 }
143 else if ((range->second > removepart.first) && (range->second <= removepart.second))
144 {
145 //Add only the first half.
146 retset.insert(make_pair(range->first, removepart.first));
147 }
148 else if ((range->first >= removepart.first) && (range->first <= removepart.second))
149 {
150 //Add only the second half.
151 retset.insert(make_pair(removepart.second, range->second));
152 }
153 else
154 {
155 //Add the whole thing.
156 retset.insert(*range);
157 }
158 }
159 }
160 return retset;
161 }
162
163 //------------------------------------------------------------------------------------
164 // Returns a rangeset as the set difference of the second arg (as a rangeset) minus the first arg (as a rangeset).
165
RemoveRangeFromRange(const rangeset & removerange,const rangeset & rset)166 rangeset RemoveRangeFromRange(const rangeset& removerange, const rangeset& rset)
167 {
168 rangeset retset = rset;
169
170 for (rangeset::iterator removepair = removerange.begin(); removepair != removerange.end(); removepair++)
171 {
172 retset = RemovePairFromRange(*removepair, retset);
173 }
174
175 return retset;
176 }
177
178 //------------------------------------------------------------------------------------
179 // LS NOTE:
180 // These functions originally written for range.cpp, but ended up being needed elsewhere.
181 //
182 // Note that 'Union' (used to be 'OR' but I got confused too often) actually does the exact same thing that
183 // 'AddRangeToRange' used to, but much faster, so I took out AddRangeToRange entirely. Perhaps there's a
184 // similar way to speed up RemoveRangeFromRange? We'll see if it shows up in the profiler.
185
Union(const rangeset & set1,const rangeset & set2)186 rangeset Union(const rangeset& set1, const rangeset& set2)
187 {
188 if (set1.empty()) return set2;
189 if (set2.empty()) return set1;
190
191 rangeset mergeset;
192 merge(set1.begin(),set1.end(),set2.begin(),set2.end(),
193 inserter(mergeset,mergeset.begin()));
194
195 rangeset newset;
196 rangeset::iterator rit;
197
198 for(rit = mergeset.begin(); rit != mergeset.end(); ++rit)
199 {
200 ORAppend(*rit,newset);
201 }
202
203 return newset;
204 } // OR
205
206 //------------------------------------------------------------------------------------
207 // Helper function for 'Union'
208
ORAppend(rangepair newrange,rangeset & oldranges)209 void ORAppend(rangepair newrange, rangeset& oldranges)
210 {
211 if (oldranges.empty())
212 {
213 oldranges.insert(newrange);
214 return;
215 }
216
217 rangeset::iterator last = --oldranges.end();
218
219 assert(newrange.first >= last->first); // Expect sequential, sorted input.
220
221 if (newrange.second <= last->second) return; // New is contained in old.
222
223 if (newrange.first > last->second) // New is after old.
224 {
225 oldranges.insert(oldranges.end(),newrange);
226 return;
227 }
228
229 newrange.first = last->first; // New starts within old and extends past it.
230 oldranges.erase(last);
231 oldranges.insert(oldranges.end(),newrange);
232
233 return;
234 } // ORAppend
235
236 //------------------------------------------------------------------------------------
237 // Used to be 'AND' until I got confused too often.
238
Intersection(const rangeset & mother,const rangeset & father)239 rangeset Intersection(const rangeset& mother, const rangeset& father)
240 {
241 rangeset::const_iterator m = mother.begin();
242 rangeset::const_iterator f = father.begin();
243
244 rangeset result;
245 rangepair newpair;
246
247 if (mother.empty() || father.empty()) return result;
248
249 while (true)
250 {
251 newpair.first = std::max((*m).first, (*f).first);
252 newpair.second = std::min((*m).second, (*f).second);
253
254 if (newpair.first < newpair.second)
255 result.insert(result.end(),newpair);
256
257 if ((*m).second < (*f).second) ++m;
258 else ++f;
259
260 if (m == mother.end() || f == father.end()) return result;
261 }
262 } // Intersection
263
264 //------------------------------------------------------------------------------------
265 // Counts the sites (actually, any object; could be links) in a rangeset.
266
CountSites(rangeset rset)267 long int CountSites(rangeset rset)
268 {
269 long int retval(0);
270
271 for (rangeset::iterator rpair = rset.begin(); rpair != rset.end(); rpair++)
272 {
273 retval += rpair->second - rpair->first;
274 }
275
276 return retval;
277 }
278
279 //------------------------------------------------------------------------------------
280 // Measures the distance, in links, between the two ends of the sites in its argument.
281
CountLinks(const rangeset & sites)282 long CountLinks(const rangeset& sites)
283 {
284 if (sites.empty()) return 0L;
285
286 long start = sites.begin()->first;
287 long end = sites.rbegin()->second;
288 assert(start < end);
289
290 return end - start - 1;
291 } // CountLinks
292
293 //------------------------------------------------------------------------------------
294
TargetLinksFrom(const rangeset & targetsites)295 rangeset TargetLinksFrom(const rangeset& targetsites)
296 {
297 rangeset targetlinks;
298
299 // If the targetsites are empty we really do want to return an empty rangeset.
300 if (!targetsites.empty())
301 {
302 long firstlink(targetsites.begin()->first);
303 long lastlink(targetsites.rbegin()->second-1);
304
305 if (firstlink != lastlink)
306 {
307 rangepair newlinks(firstlink,lastlink);
308 targetlinks = AddPairToRange(newlinks,targetlinks);
309 }
310 }
311
312 return targetlinks;
313 } // TargetLinksFrom
314
315 //------------------------------------------------------------------------------------
316
AddToRangeset(const rangeset & rset,long offset)317 rangeset AddToRangeset(const rangeset& rset, long offset)
318 {
319 rangeset::const_iterator range;
320 rangeset newRangeSet;
321
322 for(range = rset.begin(); range != rset.end(); ++range)
323 {
324 long newFirst = range->first + offset;
325 long newSecond = range->second + offset;
326
327 newRangeSet.insert(make_pair(newFirst,newSecond));
328 }
329 return newRangeSet;
330 }
331
SubtractFromRangeset(const rangeset & rset,long offset)332 rangeset SubtractFromRangeset(const rangeset& rset, long offset)
333 {
334 long invOffset = 0 - offset;
335 return AddToRangeset(rset,invOffset);
336 }
337
338 //------------------------------------------------------------------------------------
339
IsInRangeset(const rangeset & targetset,long target)340 bool IsInRangeset(const rangeset& targetset, long target)
341 {
342 if (targetset.empty())
343 return false;
344
345 rangeset::const_iterator range;
346
347 for(range = targetset.begin(); range != targetset.end(); ++range)
348 {
349 if (range->second <= target) continue;
350 if (range->first <= target) return true;
351 break;
352 }
353
354 return false;
355 } // IsInRangeset
356
357 //------------------------------------------------------------------------------------
358 // Used in Range::AreLowSitesOnInactiveBranch_Rg to diagnose whether sites of interest
359 // occur on both sides of the cutpoint.
360
Surrounds(const rangeset & targetsites,long targetlink)361 bool Surrounds(const rangeset& targetsites, long targetlink)
362 {
363 if (targetsites.empty()) return false;
364
365 return (targetlink >= targetsites.begin()->first && targetlink < targetsites.rbegin()->second - 1);
366 } // Surrounds
367
368 //------------------------------------------------------------------------------------
369 //These functions are to be used when converting site labels for display to
370 // the user, and from *menu* input from the user. It is assumed that in the
371 // XML, the 'user input' has already been converted to the 'sequential' version.
372
ToSequentialIfNeeded(long int input)373 long int ToSequentialIfNeeded(long int input)
374 {
375 //Note: input might equal zero if it was the upper end of a rangepair,
376 // since rangepairs are open-ended by default. In other words, if the user
377 // types in "-20:-1" in the menu, the first thing that happens is that these
378 // numbers are converted to the pair <-20, 0>. To convert these values to
379 // the 'sequential' numbering system, this needs to be converted to the
380 // pair <-19, 1>.
381
382 if (registry.GetConvertOutputToEliminateZeroes() && (input <= 0))
383 {
384 input++;
385 }
386
387 return input;
388 }
389
390 //------------------------------------------------------------------------------------
391
ToNoZeroesIfNeeded(long int input)392 long int ToNoZeroesIfNeeded(long int input)
393 {
394 if (registry.GetConvertOutputToEliminateZeroes() && (input <= 0))
395 {
396 input--;
397 }
398
399 return input;
400 }
401
402 //------------------------------------------------------------------------------------
403
ToSequentialIfNeeded(rangepair input)404 rangepair ToSequentialIfNeeded(rangepair input)
405 {
406 if (registry.GetConvertOutputToEliminateZeroes())
407 {
408 input.first = ToSequentialIfNeeded(input.first);
409 input.second = ToSequentialIfNeeded(input.second);
410 }
411
412 return input;
413 }
414
415 //------------------------------------------------------------------------------------
416
ToNoZeroesIfNeeded(rangepair input)417 rangepair ToNoZeroesIfNeeded(rangepair input)
418 {
419 if (registry.GetConvertOutputToEliminateZeroes())
420 {
421 input.first = ToNoZeroesIfNeeded(input.first);
422 input.second = ToNoZeroesIfNeeded(input.second);
423 }
424
425 return input;
426 }
427
428 //____________________________________________________________________________________
429
430