1 /**
2  * \file DMS.cpp
3  * \brief Implementation for GeographicLib::DMS class
4  *
5  * Copyright (c) Charles Karney (2008-2020) <charles@karney.com> and licensed
6  * under the MIT/X11 License.  For more information, see
7  * https://geographiclib.sourceforge.io/
8  **********************************************************************/
9 
10 #include <GeographicLib/DMS.hpp>
11 #include <GeographicLib/Utility.hpp>
12 
13 #if defined(_MSC_VER)
14 // Squelch warnings about constant conditional expressions
15 #  pragma warning (disable: 4127)
16 #endif
17 
18 namespace GeographicLib {
19 
20   using namespace std;
21 
22   const char* const DMS::hemispheres_ = "SNWE";
23   const char* const DMS::signs_ = "-+";
24   const char* const DMS::digits_ = "0123456789";
25   const char* const DMS::dmsindicators_ = "D'\":";
26   const char* const DMS::components_[] = {"degrees", "minutes", "seconds"};
27 
Decode(const std::string & dms,flag & ind)28   Math::real DMS::Decode(const std::string& dms, flag& ind) {
29     // Here's a table of the allowed characters
30 
31     // S unicode   dec  UTF-8      descripton
32 
33     // DEGREE
34     // d U+0064    100  64         d
35     // D U+0044     68  44         D
36     // ° U+00b0    176  c2 b0      degree symbol
37     // º U+00ba    186  c2 ba      alt symbol
38     // ⁰ U+2070   8304  e2 81 b0   sup zero
39     // ˚ U+02da    730  cb 9a      ring above
40     // ∘ U+2218   8728  e2 88 98   compose function
41     // * U+002a     42  2a         GRiD symbol for degrees
42 
43     // MINUTES
44     // ' U+0027     39  27         apostrophe
45     // ` U+0060     96  60         grave accent
46     // ′ U+2032   8242  e2 80 b2   prime
47     // ‵ U+2035   8245  e2 80 b5   back prime
48     // ´ U+00b4    180  c2 b4      acute accent
49     // ‘ U+2018   8216  e2 80 98   left single quote (also ext ASCII 0x91)
50     // ’ U+2019   8217  e2 80 99   right single quote (also ext ASCII 0x92)
51     // ‛ U+201b   8219  e2 80 9b   reversed-9 single quote
52     // ʹ U+02b9    697  ca b9      modifier letter prime
53     // ˊ U+02ca    714  cb 8a      modifier letter acute accent
54     // ˋ U+02cb    715  cb 8b      modifier letter grave accent
55 
56     // SECONDS
57     // " U+0022     34  22         quotation mark
58     // ″ U+2033   8243  e2 80 b3   double prime
59     // ‶ U+2036   8246  e2 80 b6   reversed double prime
60     // ˝ U+02dd    733  cb 9d      double acute accent
61     // “ U+201c   8220  e2 80 9c   left double quote (also ext ASCII 0x93)
62     // ” U+201d   8221  e2 80 9d   right double quote (also ext ASCII 0x94)
63     // ‟ U+201f   8223  e2 80 9f   reversed-9 double quote
64     // ʺ U+02ba    698  ca ba      modifier letter double prime
65 
66     // PLUS
67     // + U+002b     43  2b         plus sign
68     // ➕ U+2795  10133  e2 9e 95   heavy plus
69     //   U+2064   8292  e2 81 a4   invisible plus |⁤|
70 
71     // MINUS
72     // - U+002d     45  2d         hyphen
73     // ‐ U+2010   8208  e2 80 90   dash
74     // ‑ U+2011   8209  e2 80 91   non-breaking hyphen
75     // – U+2013   8211  e2 80 93   en dash (also ext ASCII 0x96)
76     // — U+2014   8212  e2 80 94   em dash (also ext ASCII 0x97)
77     // − U+2212   8722  e2 88 92   minus sign
78     // ➖ U+2796  10134  e2 9e 96   heavy minus
79 
80     // IGNORED
81     //   U+00a0    160  c2 a0      non-breaking space
82     //   U+2007   8199  e2 80 87   figure space | |
83     //   U+2009   8201  e2 80 89   thin space   | |
84     //   U+200a   8202  e2 80 8a   hair space   | |
85     //   U+200b   8203  e2 80 8b   invisible space |​|
86     //   U+202f   8239  e2 80 af   narrow space | |
87     //   U+2063   8291  e2 81 a3   invisible separator |⁣|
88     // « U+00ab    171  c2 ab      left guillemot (for cgi-bin)
89     // » U+00bb    187  c2 bb      right guillemot (for cgi-bin)
90 
91     string dmsa = dms;
92     replace(dmsa, "\xc2\xb0",     'd' ); // U+00b0 degree symbol
93     replace(dmsa, "\xc2\xba",     'd' ); // U+00ba alt symbol
94     replace(dmsa, "\xe2\x81\xb0", 'd' ); // U+2070 sup zero
95     replace(dmsa, "\xcb\x9a",     'd' ); // U+02da ring above
96     replace(dmsa, "\xe2\x88\x98", 'd' ); // U+2218 compose function
97 
98     replace(dmsa, "\xe2\x80\xb2", '\''); // U+2032 prime
99     replace(dmsa, "\xe2\x80\xb5", '\''); // U+2035 back prime
100     replace(dmsa, "\xc2\xb4",     '\''); // U+00b4 acute accent
101     replace(dmsa, "\xe2\x80\x98", '\''); // U+2018 left single quote
102     replace(dmsa, "\xe2\x80\x99", '\''); // U+2019 right single quote
103     replace(dmsa, "\xe2\x80\x9b", '\''); // U+201b reversed-9 single quote
104     replace(dmsa, "\xca\xb9",     '\''); // U+02b9 modifier letter prime
105     replace(dmsa, "\xcb\x8a",     '\''); // U+02ca modifier letter acute accent
106     replace(dmsa, "\xcb\x8b",     '\''); // U+02cb modifier letter grave accent
107 
108     replace(dmsa, "\xe2\x80\xb3", '"' ); // U+2033 double prime
109     replace(dmsa, "\xe2\x80\xb6", '"' ); // U+2036 reversed double prime
110     replace(dmsa, "\xcb\x9d",     '"' ); // U+02dd double acute accent
111     replace(dmsa, "\xe2\x80\x9c", '"' ); // U+201c left double quote
112     replace(dmsa, "\xe2\x80\x9d", '"' ); // U+201d right double quote
113     replace(dmsa, "\xe2\x80\x9f", '"' ); // U+201f reversed-9 double quote
114     replace(dmsa, "\xca\xba",     '"' ); // U+02ba modifier letter double prime
115 
116     replace(dmsa, "\xe2\x9e\x95", '+' ); // U+2795 heavy plus
117     replace(dmsa, "\xe2\x81\xa4", '+' ); // U+2064 invisible plus
118 
119     replace(dmsa, "\xe2\x80\x90", '-' ); // U+2010 dash
120     replace(dmsa, "\xe2\x80\x91", '-' ); // U+2011 non-breaking hyphen
121     replace(dmsa, "\xe2\x80\x93", '-' ); // U+2013 en dash
122     replace(dmsa, "\xe2\x80\x94", '-' ); // U+2014 em dash
123     replace(dmsa, "\xe2\x88\x92", '-' ); // U+2212 minus sign
124     replace(dmsa, "\xe2\x9e\x96", '-' ); // U+2796 heavy minus
125 
126     replace(dmsa, "\xc2\xa0",     '\0'); // U+00a0 non-breaking space
127     replace(dmsa, "\xe2\x80\x87", '\0'); // U+2007 figure space
128     replace(dmsa, "\xe2\x80\x89", '\0'); // U+2007 thin space
129     replace(dmsa, "\xe2\x80\x8a", '\0'); // U+200a hair space
130     replace(dmsa, "\xe2\x80\x8b", '\0'); // U+200b invisible space
131     replace(dmsa, "\xe2\x80\xaf", '\0'); // U+202f narrow space
132     replace(dmsa, "\xe2\x81\xa3", '\0'); // U+2063 invisible separator
133 
134     replace(dmsa, "\xb0",         'd' ); // 0xb0 bare degree symbol
135     replace(dmsa, "\xba",         'd' ); // 0xba bare alt symbol
136     replace(dmsa, "*",            'd' ); // GRiD symbol for degree
137     replace(dmsa, "`",            '\''); // grave accent
138     replace(dmsa, "\xb4",         '\''); // 0xb4 bare acute accent
139     // Don't implement these alternatives; they are only relevant for cgi-bin
140     // replace(dmsa, "\x91",      '\''); // 0x91 ext ASCII left single quote
141     // replace(dmsa, "\x92",      '\''); // 0x92 ext ASCII right single quote
142     // replace(dmsa, "\x93",      '"' ); // 0x93 ext ASCII left double quote
143     // replace(dmsa, "\x94",      '"' ); // 0x94 ext ASCII right double quote
144     // replace(dmsa, "\x96",      '-' ); // 0x96 ext ASCII en dash
145     // replace(dmsa, "\x97",      '-' ); // 0x97 ext ASCII em dash
146     replace(dmsa, "\xa0",         '\0'); // 0xa0 bare non-breaking space
147     replace(dmsa, "''",           '"' ); // '' -> "
148     string::size_type
149       beg = 0,
150       end = unsigned(dmsa.size());
151     while (beg < end && isspace(dmsa[beg]))
152       ++beg;
153     while (beg < end && isspace(dmsa[end - 1]))
154       --end;
155     // The trimmed string in [beg, end)
156     real v = 0;
157     int i = 0;
158     flag ind1 = NONE;
159     // p is pointer to the next piece that needs decoding
160     for (string::size_type p = beg, pb; p < end; p = pb, ++i) {
161       string::size_type pa = p;
162       // Skip over initial hemisphere letter (for i == 0)
163       if (i == 0 && Utility::lookup(hemispheres_, dmsa[pa]) >= 0)
164         ++pa;
165       // Skip over initial sign (checking for it if i == 0)
166       if (i > 0 || (pa < end && Utility::lookup(signs_, dmsa[pa]) >= 0))
167         ++pa;
168       // Find next sign
169       pb = min(dmsa.find_first_of(signs_, pa), end);
170       flag ind2 = NONE;
171       v += InternalDecode(dmsa.substr(p, pb - p), ind2);
172       if (ind1 == NONE)
173         ind1 = ind2;
174       else if (!(ind2 == NONE || ind1 == ind2))
175         throw GeographicErr("Incompatible hemisphere specifier in " +
176                             dmsa.substr(beg, pb - beg));
177     }
178     if (i == 0)
179       throw GeographicErr("Empty or incomplete DMS string " +
180                           dmsa.substr(beg, end - beg));
181     ind = ind1;
182     return v;
183   }
184 
InternalDecode(const string & dmsa,flag & ind)185   Math::real DMS::InternalDecode(const string& dmsa, flag& ind) {
186     string errormsg;
187     do {                       // Executed once (provides the ability to break)
188       int sign = 1;
189       unsigned
190         beg = 0,
191         end = unsigned(dmsa.size());
192       flag ind1 = NONE;
193       int k = -1;
194       if (end > beg && (k = Utility::lookup(hemispheres_, dmsa[beg])) >= 0) {
195         ind1 = (k / 2) ? LONGITUDE : LATITUDE;
196         sign = k % 2 ? 1 : -1;
197         ++beg;
198       }
199       if (end > beg && (k = Utility::lookup(hemispheres_, dmsa[end-1])) >= 0) {
200         if (k >= 0) {
201           if (ind1 != NONE) {
202             if (toupper(dmsa[beg - 1]) == toupper(dmsa[end - 1]))
203               errormsg = "Repeated hemisphere indicators "
204                 + Utility::str(dmsa[beg - 1])
205                 + " in " + dmsa.substr(beg - 1, end - beg + 1);
206             else
207               errormsg = "Contradictory hemisphere indicators "
208                 + Utility::str(dmsa[beg - 1]) + " and "
209                 + Utility::str(dmsa[end - 1]) + " in "
210                 + dmsa.substr(beg - 1, end - beg + 1);
211             break;
212           }
213           ind1 = (k / 2) ? LONGITUDE : LATITUDE;
214           sign = k % 2 ? 1 : -1;
215           --end;
216         }
217       }
218       if (end > beg && (k = Utility::lookup(signs_, dmsa[beg])) >= 0) {
219         if (k >= 0) {
220           sign *= k ? 1 : -1;
221           ++beg;
222         }
223       }
224       if (end == beg) {
225         errormsg = "Empty or incomplete DMS string " + dmsa;
226         break;
227       }
228       real ipieces[] = {0, 0, 0};
229       real fpieces[] = {0, 0, 0};
230       unsigned npiece = 0;
231       real icurrent = 0;
232       real fcurrent = 0;
233       unsigned ncurrent = 0, p = beg;
234       bool pointseen = false;
235       unsigned digcount = 0, intcount = 0;
236       while (p < end) {
237         char x = dmsa[p++];
238         if ((k = Utility::lookup(digits_, x)) >= 0) {
239           ++ncurrent;
240           if (digcount > 0)
241             ++digcount;         // Count of decimal digits
242           else {
243             icurrent = 10 * icurrent + k;
244             ++intcount;
245           }
246         } else if (x == '.') {
247           if (pointseen) {
248             errormsg = "Multiple decimal points in "
249               + dmsa.substr(beg, end - beg);
250             break;
251           }
252           pointseen = true;
253           digcount = 1;
254         } else if ((k = Utility::lookup(dmsindicators_, x)) >= 0) {
255           if (k >= 3) {
256             if (p == end) {
257               errormsg = "Illegal for : to appear at the end of " +
258                 dmsa.substr(beg, end - beg);
259               break;
260             }
261             k = npiece;
262           }
263           if (unsigned(k) == npiece - 1) {
264             errormsg = "Repeated " + string(components_[k]) +
265               " component in " + dmsa.substr(beg, end - beg);
266             break;
267           } else if (unsigned(k) < npiece) {
268             errormsg = string(components_[k]) + " component follows "
269               + string(components_[npiece - 1]) + " component in "
270               + dmsa.substr(beg, end - beg);
271             break;
272           }
273           if (ncurrent == 0) {
274             errormsg = "Missing numbers in " + string(components_[k]) +
275               " component of " + dmsa.substr(beg, end - beg);
276             break;
277           }
278           if (digcount > 0) {
279             istringstream s(dmsa.substr(p - intcount - digcount - 1,
280                                         intcount + digcount));
281             s >> fcurrent;
282             icurrent = 0;
283           }
284           ipieces[k] = icurrent;
285           fpieces[k] = icurrent + fcurrent;
286           if (p < end) {
287             npiece = k + 1;
288             icurrent = fcurrent = 0;
289             ncurrent = digcount = intcount = 0;
290           }
291         } else if (Utility::lookup(signs_, x) >= 0) {
292           errormsg = "Internal sign in DMS string "
293             + dmsa.substr(beg, end - beg);
294           break;
295         } else {
296           errormsg = "Illegal character " + Utility::str(x) + " in DMS string "
297             + dmsa.substr(beg, end - beg);
298           break;
299         }
300       }
301       if (!errormsg.empty())
302         break;
303       if (Utility::lookup(dmsindicators_, dmsa[p - 1]) < 0) {
304         if (npiece >= 3) {
305           errormsg = "Extra text following seconds in DMS string "
306             + dmsa.substr(beg, end - beg);
307           break;
308         }
309         if (ncurrent == 0) {
310           errormsg = "Missing numbers in trailing component of "
311             + dmsa.substr(beg, end - beg);
312           break;
313         }
314         if (digcount > 0) {
315           istringstream s(dmsa.substr(p - intcount - digcount,
316                                       intcount + digcount));
317           s >> fcurrent;
318           icurrent = 0;
319         }
320         ipieces[npiece] = icurrent;
321         fpieces[npiece] = icurrent + fcurrent;
322       }
323       if (pointseen && digcount == 0) {
324         errormsg = "Decimal point in non-terminal component of "
325           + dmsa.substr(beg, end - beg);
326         break;
327       }
328       // Note that we accept 59.999999... even though it rounds to 60.
329       if (ipieces[1] >= 60 || fpieces[1] > 60 ) {
330         errormsg = "Minutes " + Utility::str(fpieces[1])
331           + " not in range [0, 60)";
332         break;
333       }
334       if (ipieces[2] >= 60 || fpieces[2] > 60) {
335         errormsg = "Seconds " + Utility::str(fpieces[2])
336           + " not in range [0, 60)";
337         break;
338       }
339       ind = ind1;
340       // Assume check on range of result is made by calling routine (which
341       // might be able to offer a better diagnostic).
342       return real(sign) *
343         ( fpieces[2] != 0 ?
344           (60*(60*fpieces[0] + fpieces[1]) + fpieces[2]) / 3600 :
345           ( fpieces[1] != 0 ?
346             (60*fpieces[0] + fpieces[1]) / 60 : fpieces[0] ) );
347     } while (false);
348     real val = Utility::nummatch<real>(dmsa);
349     if (val == 0)
350       throw GeographicErr(errormsg);
351     else
352       ind = NONE;
353     return val;
354   }
355 
DecodeLatLon(const string & stra,const string & strb,real & lat,real & lon,bool longfirst)356   void DMS::DecodeLatLon(const string& stra, const string& strb,
357                          real& lat, real& lon,
358                          bool longfirst) {
359     real a, b;
360     flag ia, ib;
361     a = Decode(stra, ia);
362     b = Decode(strb, ib);
363     if (ia == NONE && ib == NONE) {
364       // Default to lat, long unless longfirst
365       ia = longfirst ? LONGITUDE : LATITUDE;
366       ib = longfirst ? LATITUDE : LONGITUDE;
367     } else if (ia == NONE)
368       ia = flag(LATITUDE + LONGITUDE - ib);
369     else if (ib == NONE)
370       ib = flag(LATITUDE + LONGITUDE - ia);
371     if (ia == ib)
372       throw GeographicErr("Both " + stra + " and "
373                           + strb + " interpreted as "
374                           + (ia == LATITUDE ? "latitudes" : "longitudes"));
375     real
376       lat1 = ia == LATITUDE ? a : b,
377       lon1 = ia == LATITUDE ? b : a;
378     if (abs(lat1) > 90)
379       throw GeographicErr("Latitude " + Utility::str(lat1)
380                           + "d not in [-90d, 90d]");
381     lat = lat1;
382     lon = lon1;
383   }
384 
DecodeAngle(const string & angstr)385   Math::real DMS::DecodeAngle(const string& angstr) {
386     flag ind;
387     real ang = Decode(angstr, ind);
388     if (ind != NONE)
389       throw GeographicErr("Arc angle " + angstr
390                           + " includes a hemisphere, N/E/W/S");
391     return ang;
392   }
393 
DecodeAzimuth(const string & azistr)394   Math::real DMS::DecodeAzimuth(const string& azistr) {
395     flag ind;
396     real azi = Decode(azistr, ind);
397     if (ind == LATITUDE)
398       throw GeographicErr("Azimuth " + azistr
399                           + " has a latitude hemisphere, N/S");
400     return Math::AngNormalize(azi);
401   }
402 
Encode(real angle,component trailing,unsigned prec,flag ind,char dmssep)403   string DMS::Encode(real angle, component trailing, unsigned prec, flag ind,
404                      char dmssep) {
405     // Assume check on range of input angle has been made by calling
406     // routine (which might be able to offer a better diagnostic).
407     if (!isfinite(angle))
408       return angle < 0 ? string("-inf") :
409         (angle > 0 ? string("inf") : string("nan"));
410 
411     // 15 - 2 * trailing = ceiling(log10(2^53/90/60^trailing)).
412     // This suffices to give full real precision for numbers in [-90,90]
413     prec = min(15 + Math::extra_digits() - 2 * unsigned(trailing), prec);
414     real scale = 1;
415     for (unsigned i = 0; i < unsigned(trailing); ++i)
416       scale *= 60;
417     for (unsigned i = 0; i < prec; ++i)
418       scale *= 10;
419     if (ind == AZIMUTH)
420       angle -= floor(angle/360) * 360;
421     int sign = angle < 0 ? -1 : 1;
422     angle *= sign;
423 
424     // Break off integer part to preserve precision in manipulation of
425     // fractional part.
426     real
427       idegree = floor(angle),
428       fdegree = (angle - idegree) * scale + real(0.5);
429     {
430       // Implement the "round ties to even" rule
431       real f = floor(fdegree);
432       fdegree = (f == fdegree && fmod(f, real(2)) == 1) ? f - 1 : f;
433     }
434     fdegree /= scale;
435     if (fdegree >= 1) {
436       idegree += 1;
437       fdegree -= 1;
438     }
439     real pieces[3] = {fdegree, 0, 0};
440     for (unsigned i = 1; i <= unsigned(trailing); ++i) {
441       real
442         ip = floor(pieces[i - 1]),
443         fp = pieces[i - 1] - ip;
444       pieces[i] = fp * 60;
445       pieces[i - 1] = ip;
446     }
447     pieces[0] += idegree;
448     ostringstream s;
449     s << fixed << setfill('0');
450     if (ind == NONE && sign < 0)
451       s << '-';
452     switch (trailing) {
453     case DEGREE:
454       if (ind != NONE)
455         s << setw(1 + min(int(ind), 2) + prec + (prec ? 1 : 0));
456       s << Utility::str(pieces[0], prec);
457       // Don't include degree designator (d) if it is the trailing component.
458       break;
459     default:
460       if (ind != NONE)
461         s << setw(1 + min(int(ind), 2));
462       s << int(pieces[0])
463         << (dmssep ? dmssep : char(tolower(dmsindicators_[0])));
464       switch (trailing) {
465       case MINUTE:
466         s << setw(2 + prec + (prec ? 1 : 0)) << Utility::str(pieces[1], prec);
467         if (!dmssep)
468           s << char(tolower(dmsindicators_[1]));
469         break;
470       case SECOND:
471         s << setw(2)
472           << int(pieces[1])
473           << (dmssep ? dmssep : char(tolower(dmsindicators_[1])))
474           << setw(2 + prec + (prec ? 1 : 0)) << Utility::str(pieces[2], prec);
475         if (!dmssep)
476           s << char(tolower(dmsindicators_[2]));
477         break;
478       default:
479         break;
480       }
481     }
482     if (ind != NONE && ind != AZIMUTH)
483       s << hemispheres_[(ind == LATITUDE ? 0 : 2) + (sign < 0 ? 0 : 1)];
484     return s.str();
485   }
486 
487 } // namespace GeographicLib
488