1 // $Id: rangex.cpp,v 1.23 2011/12/29 23:44:19 ewalkup Exp $
2 
3 /*
4   Copyright 2002  Peter Beerli, Mary Kuhner, Jon Yamato and Joseph Felsenstein
5 
6   This software is distributed free of charge for non-commercial use
7   and is copyrighted.  Of course, we do not guarantee that the software
8   works, and are not responsible for any damage you may cause or have.
9 */
10 
11 #include <cassert>
12 
13 #include "local_build.h"
14 
15 #include "rangex.h"
16 #include "stringx.h"
17 #include "registry.h"
18 
19 using std::string;
20 using std::make_pair;
21 
22 //------------------------------------------------------------------------------------
23 
ToString(rangepair rpair)24 string ToString(rangepair rpair)
25 {
26     //Note!  This prints stuff out in 'user units' instead of 'internal units'.
27     // In other words, if the range is <0,34>, this is an 'open-ended' interval
28     // and the actual active sites are 0:33
29     //
30     //Furthermore, if the user is expecting to have no zeroes in the output, we
31     // need to convert the 0 to -1, and display "-1:33".
32     rpair.second--;
33     rpair = ToNoZeroesIfNeeded(rpair);
34     string retval = ToString(rpair.first);
35     if (rpair.second > rpair.first)
36     {
37         retval += ":" + ToString(rpair.second);
38     }
39     return retval;
40 }
41 
42 //------------------------------------------------------------------------------------
43 
ToString(rangeset rset)44 string ToString(rangeset rset)
45 {
46     rangeset::iterator rpair = rset.begin();
47     if (rpair == rset.end())
48     {
49         return "(none)";
50     }
51     string retval = ToString(*rpair);
52     rpair++;
53     for ( ; rpair != rset.end(); rpair++)
54     {
55         retval += ", " + ToString(*rpair);
56     }
57     return retval;
58 }
59 
60 //------------------------------------------------------------------------------------
61 // Returns a rangeset as a set containing a single pair, that defined by its two arguments.
62 
MakeRangeset(long int low,long int high)63 rangeset MakeRangeset(long int low, long int high)
64 {
65     rangeset retset;
66     retset.insert(make_pair(low, high));
67 
68     return retset;
69 }
70 
71 //------------------------------------------------------------------------------------
72 // Returns a rangeset as the set union of the first arg (as a rangepair) plus the second arg (as a rangeset).
73 
AddPairToRange(const rangepair & addpart,const rangeset & rset)74 rangeset AddPairToRange(const rangepair& addpart, const rangeset& rset)
75 {
76     rangeset retset = rset;
77 
78     rangepair low  = make_pair(addpart.first, addpart.first);
79     rangepair high = make_pair(addpart.second, addpart.second);
80 
81     long int newlow = low.first;
82     long int newhigh = high.second;
83 
84     rangesetiter early = retset.lower_bound(low);
85     if (early != retset.begin())
86     {
87         early--;
88         if (early->second >= low.first)
89         {
90             //'low' falls within early's interval
91             newlow = early->first;
92         }
93     }
94 
95     rangesetiter late = retset.upper_bound(high);
96     //We need to increment this late.first == high.first
97     if (late != retset.end())
98     {
99         if (late->first == high.first)
100         {
101             late++;
102         }
103     }
104     if (late != retset.begin())
105     {
106         late--;
107         if (late->second > high.first)
108         {
109             //'high' falls within late's interval
110             newhigh = late->second;
111         }
112     }
113 
114     early = retset.lower_bound(make_pair(newlow, newlow+1));
115     late  = retset.upper_bound(make_pair(newhigh-1, newhigh));
116 
117     retset.erase(early, late);
118     retset.insert(make_pair(newlow, newhigh));
119 
120     return retset;
121 }
122 
123 //------------------------------------------------------------------------------------
124 // Returns a rangeset as the set difference of the second arg (as a rangeset) minus the first arg (as a rangepair).
125 
RemovePairFromRange(const rangepair & removepart,const rangeset & rset)126 rangeset RemovePairFromRange(const rangepair& removepart, const rangeset& rset)
127 {
128     rangeset retset;
129     for (rangeset::iterator range=rset.begin(); range != rset.end(); range++)
130     {
131         if ((range->first >= removepart.first) && (range->second <= removepart.second))
132         {
133             //Don't add it.
134         }
135         else
136         {
137             if ((range->first < removepart.first) && (range->second > removepart.second))
138             {
139                 //Add two outside halves.
140                 retset.insert(make_pair(range->first, removepart.first));
141                 retset.insert(make_pair(removepart.second, range->second));
142             }
143             else if ((range->second > removepart.first) && (range->second <= removepart.second))
144             {
145                 //Add only the first half.
146                 retset.insert(make_pair(range->first, removepart.first));
147             }
148             else if ((range->first >= removepart.first) && (range->first <= removepart.second))
149             {
150                 //Add only the second half.
151                 retset.insert(make_pair(removepart.second, range->second));
152             }
153             else
154             {
155                 //Add the whole thing.
156                 retset.insert(*range);
157             }
158         }
159     }
160     return retset;
161 }
162 
163 //------------------------------------------------------------------------------------
164 // Returns a rangeset as the set difference of the second arg (as a rangeset) minus the first arg (as a rangeset).
165 
RemoveRangeFromRange(const rangeset & removerange,const rangeset & rset)166 rangeset RemoveRangeFromRange(const rangeset& removerange, const rangeset& rset)
167 {
168     rangeset retset = rset;
169 
170     for (rangeset::iterator removepair = removerange.begin(); removepair != removerange.end(); removepair++)
171     {
172         retset = RemovePairFromRange(*removepair, retset);
173     }
174 
175     return retset;
176 }
177 
178 //------------------------------------------------------------------------------------
179 // LS NOTE:
180 // These functions originally written for range.cpp, but ended up being needed elsewhere.
181 //
182 // Note that 'Union' (used to be 'OR' but I got confused too often) actually does the exact same thing that
183 // 'AddRangeToRange' used to, but much faster, so I took out AddRangeToRange entirely.  Perhaps there's a
184 // similar way to speed up RemoveRangeFromRange?  We'll see if it shows up in the profiler.
185 
Union(const rangeset & set1,const rangeset & set2)186 rangeset Union(const rangeset& set1, const rangeset& set2)
187 {
188     if (set1.empty()) return set2;
189     if (set2.empty()) return set1;
190 
191     rangeset mergeset;
192     merge(set1.begin(),set1.end(),set2.begin(),set2.end(),
193           inserter(mergeset,mergeset.begin()));
194 
195     rangeset newset;
196     rangeset::iterator rit;
197 
198     for(rit = mergeset.begin(); rit != mergeset.end(); ++rit)
199     {
200         ORAppend(*rit,newset);
201     }
202 
203     return newset;
204 } // OR
205 
206 //------------------------------------------------------------------------------------
207 // Helper function for 'Union'
208 
ORAppend(rangepair newrange,rangeset & oldranges)209 void ORAppend(rangepair newrange, rangeset& oldranges)
210 {
211     if (oldranges.empty())
212     {
213         oldranges.insert(newrange);
214         return;
215     }
216 
217     rangeset::iterator last = --oldranges.end();
218 
219     assert(newrange.first >= last->first); // Expect sequential, sorted input.
220 
221     if (newrange.second <= last->second) return; // New is contained in old.
222 
223     if (newrange.first > last->second)  // New is after old.
224     {
225         oldranges.insert(oldranges.end(),newrange);
226         return;
227     }
228 
229     newrange.first = last->first;       // New starts within old and extends past it.
230     oldranges.erase(last);
231     oldranges.insert(oldranges.end(),newrange);
232 
233     return;
234 } // ORAppend
235 
236 //------------------------------------------------------------------------------------
237 // Used to be 'AND' until I got confused too often.
238 
Intersection(const rangeset & mother,const rangeset & father)239 rangeset Intersection(const rangeset& mother, const rangeset& father)
240 {
241     rangeset::const_iterator m = mother.begin();
242     rangeset::const_iterator f = father.begin();
243 
244     rangeset result;
245     rangepair newpair;
246 
247     if (mother.empty() || father.empty()) return result;
248 
249     while (true)
250     {
251         newpair.first = std::max((*m).first, (*f).first);
252         newpair.second = std::min((*m).second, (*f).second);
253 
254         if (newpair.first < newpair.second)
255             result.insert(result.end(),newpair);
256 
257         if ((*m).second < (*f).second) ++m;
258         else ++f;
259 
260         if (m == mother.end() || f == father.end()) return result;
261     }
262 } // Intersection
263 
264 //------------------------------------------------------------------------------------
265 // Counts the sites (actually, any object; could be links) in a rangeset.
266 
CountSites(rangeset rset)267 long int CountSites(rangeset rset)
268 {
269     long int retval(0);
270 
271     for (rangeset::iterator rpair = rset.begin(); rpair != rset.end(); rpair++)
272     {
273         retval += rpair->second - rpair->first;
274     }
275 
276     return retval;
277 }
278 
279 //------------------------------------------------------------------------------------
280 // Measures the distance, in links, between the two ends of the sites in its argument.
281 
CountLinks(const rangeset & sites)282 long CountLinks(const rangeset& sites)
283 {
284     if (sites.empty()) return 0L;
285 
286     long start = sites.begin()->first;
287     long end = sites.rbegin()->second;
288     assert(start < end);
289 
290     return end - start - 1;
291 } // CountLinks
292 
293 //------------------------------------------------------------------------------------
294 
TargetLinksFrom(const rangeset & targetsites)295 rangeset TargetLinksFrom(const rangeset& targetsites)
296 {
297     rangeset targetlinks;
298 
299     // If the targetsites are empty we really do want to return an empty rangeset.
300     if (!targetsites.empty())
301     {
302         long firstlink(targetsites.begin()->first);
303         long lastlink(targetsites.rbegin()->second-1);
304 
305         if (firstlink != lastlink)
306         {
307             rangepair newlinks(firstlink,lastlink);
308             targetlinks = AddPairToRange(newlinks,targetlinks);
309         }
310     }
311 
312     return targetlinks;
313 } // TargetLinksFrom
314 
315 //------------------------------------------------------------------------------------
316 
AddToRangeset(const rangeset & rset,long offset)317 rangeset AddToRangeset(const rangeset& rset, long offset)
318 {
319     rangeset::const_iterator range;
320     rangeset newRangeSet;
321 
322     for(range = rset.begin(); range != rset.end(); ++range)
323     {
324         long newFirst = range->first + offset;
325         long newSecond = range->second + offset;
326 
327         newRangeSet.insert(make_pair(newFirst,newSecond));
328     }
329     return newRangeSet;
330 }
331 
SubtractFromRangeset(const rangeset & rset,long offset)332 rangeset SubtractFromRangeset(const rangeset& rset, long offset)
333 {
334     long invOffset = 0 - offset;
335     return AddToRangeset(rset,invOffset);
336 }
337 
338 //------------------------------------------------------------------------------------
339 
IsInRangeset(const rangeset & targetset,long target)340 bool IsInRangeset(const rangeset& targetset, long target)
341 {
342     if (targetset.empty())
343         return false;
344 
345     rangeset::const_iterator range;
346 
347     for(range = targetset.begin(); range != targetset.end(); ++range)
348     {
349         if (range->second <= target) continue;
350         if (range->first <= target) return true;
351         break;
352     }
353 
354     return false;
355 } // IsInRangeset
356 
357 //------------------------------------------------------------------------------------
358 // Used in Range::AreLowSitesOnInactiveBranch_Rg to diagnose whether sites of interest
359 // occur on both sides of the cutpoint.
360 
Surrounds(const rangeset & targetsites,long targetlink)361 bool Surrounds(const rangeset& targetsites, long targetlink)
362 {
363     if (targetsites.empty()) return false;
364 
365     return (targetlink >= targetsites.begin()->first && targetlink < targetsites.rbegin()->second - 1);
366 } // Surrounds
367 
368 //------------------------------------------------------------------------------------
369 //These functions are to be used when converting site labels for display to
370 // the user, and from *menu* input from the user.  It is assumed that in the
371 // XML, the 'user input' has already been converted to the 'sequential' version.
372 
ToSequentialIfNeeded(long int input)373 long int ToSequentialIfNeeded(long int input)
374 {
375     //Note:  input might equal zero if it was the upper end of a rangepair,
376     // since rangepairs are open-ended by default.  In other words, if the user
377     // types in "-20:-1" in the menu, the first thing that happens is that these
378     // numbers are converted to the pair <-20, 0>.  To convert these values to
379     // the 'sequential' numbering system, this needs to be converted to the
380     // pair <-19, 1>.
381 
382     if (registry.GetConvertOutputToEliminateZeroes() && (input <= 0))
383     {
384         input++;
385     }
386 
387     return input;
388 }
389 
390 //------------------------------------------------------------------------------------
391 
ToNoZeroesIfNeeded(long int input)392 long int ToNoZeroesIfNeeded(long int input)
393 {
394     if (registry.GetConvertOutputToEliminateZeroes() && (input <= 0))
395     {
396         input--;
397     }
398 
399     return input;
400 }
401 
402 //------------------------------------------------------------------------------------
403 
ToSequentialIfNeeded(rangepair input)404 rangepair ToSequentialIfNeeded(rangepair input)
405 {
406     if (registry.GetConvertOutputToEliminateZeroes())
407     {
408         input.first = ToSequentialIfNeeded(input.first);
409         input.second = ToSequentialIfNeeded(input.second);
410     }
411 
412     return input;
413 }
414 
415 //------------------------------------------------------------------------------------
416 
ToNoZeroesIfNeeded(rangepair input)417 rangepair ToNoZeroesIfNeeded(rangepair input)
418 {
419     if (registry.GetConvertOutputToEliminateZeroes())
420     {
421         input.first = ToNoZeroesIfNeeded(input.first);
422         input.second = ToNoZeroesIfNeeded(input.second);
423     }
424 
425     return input;
426 }
427 
428 //____________________________________________________________________________________
429 
430