1# Licensed under a 3-clause BSD style license - see LICENSE.rst 2"""Classes to read AAS MRT table format 3 4Ref: https://journals.aas.org/mrt-standards 5 6:Copyright: Smithsonian Astrophysical Observatory (2021) 7:Author: Tom Aldcroft (aldcroft@head.cfa.harvard.edu), \ 8 Suyog Garg (suyog7130@gmail.com) 9""" 10 11import re 12import math 13import warnings 14import numpy as np 15from io import StringIO 16 17from . import core 18from . import fixedwidth, cds 19 20from astropy import units as u 21 22from astropy.table import Table 23from astropy.table import Column, MaskedColumn 24from string import Template 25from textwrap import wrap 26 27MAX_SIZE_README_LINE = 80 28MAX_COL_INTLIMIT = 100000 29 30 31__doctest_skip__ = ['*'] 32 33 34BYTE_BY_BYTE_TEMPLATE = [ 35 "Byte-by-byte Description of file: $file", 36 "--------------------------------------------------------------------------------", 37 " Bytes Format Units Label Explanations", 38 "--------------------------------------------------------------------------------", 39 "$bytebybyte", 40 "--------------------------------------------------------------------------------"] 41 42MRT_TEMPLATE = [ 43 "Title:", 44 "Authors:", 45 "Table:", 46 "================================================================================", 47 "$bytebybyte", 48 "Notes:", 49 "--------------------------------------------------------------------------------"] 50 51 52class MrtSplitter(fixedwidth.FixedWidthSplitter): 53 """ 54 Contains the join function to left align the MRT columns 55 when writing to a file. 56 """ 57 def join(self, vals, widths): 58 vals = [val + ' ' * (width - len(val)) for val, width in zip(vals, widths)] 59 return self.delimiter.join(vals) 60 61 62class MrtHeader(cds.CdsHeader): 63 _subfmt = 'MRT' 64 65 def _split_float_format(self, value): 66 """ 67 Splits a Float string into different parts to find number 68 of digits after decimal and check if the value is in Scientific 69 notation. 70 71 Parameters 72 ---------- 73 value : str 74 String containing the float value to split. 75 76 Returns 77 ------- 78 fmt: (int, int, int, bool, bool) 79 List of values describing the Float sting. 80 (size, dec, ent, sign, exp) 81 size, length of the given string. 82 ent, number of digits before decimal point. 83 dec, number of digits after decimal point. 84 sign, whether or not given value signed. 85 exp, is value in Scientific notation? 86 """ 87 regfloat = re.compile(r"""(?P<sign> [+-]*) 88 (?P<ent> [^eE.]+) 89 (?P<deciPt> [.]*) 90 (?P<decimals> [0-9]*) 91 (?P<exp> [eE]*-*)[0-9]*""", 92 re.VERBOSE) 93 mo = regfloat.match(value) 94 95 if mo is None: 96 raise Exception(f'{value} is not a float number') 97 return (len(value), 98 len(mo.group('ent')), 99 len(mo.group('decimals')), 100 mo.group('sign') != "", 101 mo.group('exp') != "") 102 103 def _set_column_val_limits(self, col): 104 """ 105 Sets the ``col.min`` and ``col.max`` column attributes, 106 taking into account columns with Null values. 107 """ 108 col.max = max(col) 109 col.min = min(col) 110 if col.max is np.ma.core.MaskedConstant: 111 col.max = None 112 if col.min is np.ma.core.MaskedConstant: 113 col.min = None 114 115 def column_float_formatter(self, col): 116 """ 117 String formatter function for a column containing Float values. 118 Checks if the values in the given column are in Scientific notation, 119 by spliting the value string. It is assumed that the column either has 120 float values or Scientific notation. 121 122 A ``col.formatted_width`` attribute is added to the column. It is not added 123 if such an attribute is already present, say when the ``formats`` argument 124 is passed to the writer. A properly formatted format string is also added as 125 the ``col.format`` attribute. 126 127 Parameters 128 ---------- 129 col : A ``Table.Column`` object. 130 """ 131 # maxsize: maximum length of string containing the float value. 132 # maxent: maximum number of digits places before decimal point. 133 # maxdec: maximum number of digits places after decimal point. 134 # maxprec: maximum precision of the column values, sum of maxent and maxdec. 135 maxsize, maxprec, maxent, maxdec = 1, 0, 1, 0 136 sign = False 137 fformat = 'F' 138 139 # Find maximum sized value in the col 140 for val in col.str_vals: 141 # Skip null values 142 if val is None or val == '': 143 continue 144 145 # Find format of the Float string 146 fmt = self._split_float_format(val) 147 # If value is in Scientific notation 148 if fmt[4] is True: 149 # if the previous column value was in normal Float format 150 # set maxsize, maxprec and maxdec to default. 151 if fformat == 'F': 152 maxsize, maxprec, maxdec = 1, 0, 0 153 # Designate the column to be in Scientific notation. 154 fformat = 'E' 155 else: 156 # Move to next column value if 157 # current value is not in Scientific notation 158 # but the column is designated as such because 159 # one of the previous values was. 160 if fformat == 'E': 161 continue 162 163 if maxsize < fmt[0]: 164 maxsize = fmt[0] 165 if maxent < fmt[1]: 166 maxent = fmt[1] 167 if maxdec < fmt[2]: 168 maxdec = fmt[2] 169 if fmt[3]: 170 sign = True 171 172 if maxprec < fmt[1] + fmt[2]: 173 maxprec = fmt[1] + fmt[2] 174 175 if fformat == 'E': 176 if getattr(col, 'formatted_width', None) is None: # If ``formats`` not passed. 177 col.formatted_width = maxsize 178 if sign: 179 col.formatted_width += 1 180 # Number of digits after decimal is replaced by the precision 181 # for values in Scientific notation, when writing that Format. 182 col.fortran_format = fformat + str(col.formatted_width) + "." + str(maxprec) 183 col.format = str(col.formatted_width) + "." + str(maxdec) + "e" 184 else: 185 lead = '' 186 if getattr(col, 'formatted_width', None) is None: # If ``formats`` not passed. 187 col.formatted_width = maxent + maxdec + 1 188 if sign: 189 col.formatted_width += 1 190 elif col.format.startswith('0'): 191 # Keep leading zero, if already set in format - primarily for `seconds` columns 192 # in coordinates; may need extra case if this is to be also supported with `sign`. 193 lead = '0' 194 col.fortran_format = fformat + str(col.formatted_width) + "." + str(maxdec) 195 col.format = lead + col.fortran_format[1:] + "f" 196 197 def write_byte_by_byte(self): 198 """ 199 Writes the Byte-By-Byte description of the table. 200 201 Columns that are `astropy.coordinates.SkyCoord` or `astropy.time.TimeSeries` 202 objects or columns with values that are such objects are recognized as such, 203 and some predefined labels and description is used for them. 204 See the Vizier MRT Standard documentation in the link below for more details 205 on these. An example Byte-By-Byte table is shown here. 206 207 See: http://vizier.u-strasbg.fr/doc/catstd-3.1.htx 208 209 Example:: 210 211 -------------------------------------------------------------------------------- 212 Byte-by-byte Description of file: table.dat 213 -------------------------------------------------------------------------------- 214 Bytes Format Units Label Explanations 215 -------------------------------------------------------------------------------- 216 1- 8 A8 --- names Description of names 217 10-14 E5.1 --- e [-3160000.0/0.01] Description of e 218 16-23 F8.5 --- d [22.25/27.25] Description of d 219 25-31 E7.1 --- s [-9e+34/2.0] Description of s 220 33-35 I3 --- i [-30/67] Description of i 221 37-39 F3.1 --- sameF [5.0/5.0] Description of sameF 222 41-42 I2 --- sameI [20] Description of sameI 223 44-45 I2 h RAh Right Ascension (hour) 224 47-48 I2 min RAm Right Ascension (minute) 225 50-67 F18.15 s RAs Right Ascension (second) 226 69 A1 --- DE- Sign of Declination 227 70-71 I2 deg DEd Declination (degree) 228 73-74 I2 arcmin DEm Declination (arcmin) 229 76-91 F16.13 arcsec DEs Declination (arcsec) 230 231 -------------------------------------------------------------------------------- 232 """ 233 # Get column widths 234 vals_list = [] 235 col_str_iters = self.data.str_vals() 236 for vals in zip(*col_str_iters): 237 vals_list.append(vals) 238 239 for i, col in enumerate(self.cols): 240 col.width = max([len(vals[i]) for vals in vals_list]) 241 if self.start_line is not None: 242 col.width = max(col.width, len(col.info.name)) 243 widths = [col.width for col in self.cols] 244 245 startb = 1 # Byte count starts at 1. 246 247 # Set default width of the Bytes count column of the Byte-By-Byte table. 248 # This ``byte_count_width`` value helps align byte counts with respect 249 # to the hyphen using a format string. 250 byte_count_width = len(str(sum(widths) + len(self.cols) - 1)) 251 252 # Format string for Start Byte and End Byte 253 singlebfmt = "{:" + str(byte_count_width) + "d}" 254 fmtb = singlebfmt + "-" + singlebfmt 255 # Add trailing single whitespaces to Bytes column for better visibility. 256 singlebfmt += " " 257 fmtb += " " 258 259 # Set default width of Label and Description Byte-By-Byte columns. 260 max_label_width, max_descrip_size = 7, 16 261 262 bbb = Table(names=['Bytes', 'Format', 'Units', 'Label', 'Explanations'], 263 dtype=[str] * 5) 264 265 # Iterate over the columns to write Byte-By-Byte rows. 266 for i, col in enumerate(self.cols): 267 # Check if column is MaskedColumn 268 col.has_null = isinstance(col, MaskedColumn) 269 270 if col.format is not None: 271 col.formatted_width = max([len(sval) for sval in col.str_vals]) 272 273 # Set MRTColumn type, size and format. 274 if np.issubdtype(col.dtype, np.integer): 275 # Integer formatter 276 self._set_column_val_limits(col) 277 if getattr(col, 'formatted_width', None) is None: # If ``formats`` not passed. 278 col.formatted_width = max(len(str(col.max)), len(str(col.min))) 279 col.fortran_format = "I" + str(col.formatted_width) 280 if col.format is None: 281 col.format = ">" + col.fortran_format[1:] 282 283 elif np.issubdtype(col.dtype, np.dtype(float).type): 284 # Float formatter 285 self._set_column_val_limits(col) 286 self.column_float_formatter(col) 287 288 else: 289 # String formatter, ``np.issubdtype(col.dtype, str)`` is ``True``. 290 dtype = col.dtype.str 291 if col.has_null: 292 mcol = col 293 mcol.fill_value = "" 294 coltmp = Column(mcol.filled(), dtype=str) 295 dtype = coltmp.dtype.str 296 if getattr(col, 'formatted_width', None) is None: # If ``formats`` not passed. 297 col.formatted_width = int(re.search(r'(\d+)$', dtype).group(1)) 298 col.fortran_format = "A" + str(col.formatted_width) 299 col.format = str(col.formatted_width) + "s" 300 301 endb = col.formatted_width + startb - 1 302 303 # ``mixin`` columns converted to string valued columns will not have a name 304 # attribute. In those cases, a ``Unknown`` column label is put, indicating that 305 # such columns can be better formatted with some manipulation before calling 306 # the MRT writer. 307 if col.name is None: 308 col.name = "Unknown" 309 310 # Set column description. 311 if col.description is not None: 312 description = col.description 313 else: 314 description = "Description of " + col.name 315 316 # Set null flag in column description 317 nullflag = "" 318 if col.has_null: 319 nullflag = "?" 320 321 # Set column unit 322 if col.unit is not None: 323 col_unit = col.unit.to_string("cds") 324 elif col.name.lower().find("magnitude") > -1: 325 # ``col.unit`` can still be ``None``, if the unit of column values 326 # is ``Magnitude``, because ``astropy.units.Magnitude`` is actually a class. 327 # Unlike other units which are instances of ``astropy.units.Unit``, 328 # application of the ``Magnitude`` unit calculates the logarithm 329 # of the values. Thus, the only way to check for if the column values 330 # have ``Magnitude`` unit is to check the column name. 331 col_unit = "mag" 332 else: 333 col_unit = "---" 334 335 # Add col limit values to col description 336 lim_vals = "" 337 if (col.min and col.max and 338 not any(x in col.name for x in ['RA', 'DE', 'LON', 'LAT', 'PLN', 'PLT'])): 339 # No col limit values for coordinate columns. 340 if col.fortran_format[0] == 'I': 341 if abs(col.min) < MAX_COL_INTLIMIT and abs(col.max) < MAX_COL_INTLIMIT: 342 if col.min == col.max: 343 lim_vals = "[{0}]".format(col.min) 344 else: 345 lim_vals = "[{0}/{1}]".format(col.min, col.max) 346 elif col.fortran_format[0] in ('E', 'F'): 347 lim_vals = "[{0}/{1}]".format(math.floor(col.min * 100) / 100., 348 math.ceil(col.max * 100) / 100.) 349 350 if lim_vals != '' or nullflag != '': 351 description = "{0}{1} {2}".format(lim_vals, nullflag, description) 352 353 # Find the maximum label and description column widths. 354 if len(col.name) > max_label_width: 355 max_label_width = len(col.name) 356 if len(description) > max_descrip_size: 357 max_descrip_size = len(description) 358 359 # Add a row for the Sign of Declination in the bbb table 360 if col.name == 'DEd': 361 bbb.add_row([singlebfmt.format(startb), 362 "A1", "---", "DE-", 363 "Sign of Declination"]) 364 col.fortran_format = 'I2' 365 startb += 1 366 367 # Add Byte-By-Byte row to bbb table 368 bbb.add_row([singlebfmt.format(startb) if startb == endb 369 else fmtb.format(startb, endb), 370 "" if col.fortran_format is None else col.fortran_format, 371 col_unit, 372 "" if col.name is None else col.name, 373 description]) 374 startb = endb + 2 375 376 # Properly format bbb columns 377 bbblines = StringIO() 378 bbb.write(bbblines, format='ascii.fixed_width_no_header', 379 delimiter=' ', bookend=False, delimiter_pad=None, 380 formats={'Format': '<6s', 381 'Units': '<6s', 382 'Label': '<' + str(max_label_width) + 's', 383 'Explanations': '' + str(max_descrip_size) + 's'}) 384 385 # Get formatted bbb lines 386 bbblines = bbblines.getvalue().splitlines() 387 388 # ``nsplit`` is the number of whitespaces to prefix to long description 389 # lines in order to wrap them. It is the sum of the widths of the 390 # previous 4 columns plus the number of single spacing between them. 391 # The hyphen in the Bytes column is also counted. 392 nsplit = byte_count_width * 2 + 1 + 12 + max_label_width + 4 393 394 # Wrap line if it is too long 395 buff = "" 396 for newline in bbblines: 397 if len(newline) > MAX_SIZE_README_LINE: 398 buff += ("\n").join(wrap(newline, 399 subsequent_indent=" " * nsplit, 400 width=MAX_SIZE_README_LINE)) 401 buff += "\n" 402 else: 403 buff += newline + "\n" 404 405 # Last value of ``endb`` is the sum of column widths after formatting. 406 self.linewidth = endb 407 408 # Remove the last extra newline character from Byte-By-Byte. 409 buff = buff[:-1] 410 return buff 411 412 def write(self, lines): 413 """ 414 Writes the Header of the MRT table, aka ReadMe, which 415 also contains the Byte-By-Byte description of the table. 416 """ 417 from astropy.coordinates import SkyCoord 418 419 # Recognised ``SkyCoord.name`` forms with their default column names (helio* require SunPy). 420 coord_systems = {'galactic': ('GLAT', 'GLON', 'b', 'l'), 421 'ecliptic': ('ELAT', 'ELON', 'lat', 'lon'), # 'geocentric*ecliptic' 422 'heliographic': ('HLAT', 'HLON', 'lat', 'lon'), # '_carrington|stonyhurst' 423 'helioprojective': ('HPLT', 'HPLN', 'Ty', 'Tx')} 424 eqtnames = ['RAh', 'RAm', 'RAs', 'DEd', 'DEm', 'DEs'] 425 426 # list to store indices of columns that are modified. 427 to_pop = [] 428 429 # For columns that are instances of ``SkyCoord`` and other ``mixin`` columns 430 # or whose values are objects of these classes. 431 for i, col in enumerate(self.cols): 432 # If col is a ``Column`` object but its values are ``SkyCoord`` objects, 433 # convert the whole column to ``SkyCoord`` object, which helps in applying 434 # SkyCoord methods directly. 435 if not isinstance(col, SkyCoord) and isinstance(col[0], SkyCoord): 436 try: 437 col = SkyCoord(col) 438 except (ValueError, TypeError): 439 # If only the first value of the column is a ``SkyCoord`` object, 440 # the column cannot be converted to a ``SkyCoord`` object. 441 # These columns are converted to ``Column`` object and then converted 442 # to string valued column. 443 if not isinstance(col, Column): 444 col = Column(col) 445 col = Column([str(val) for val in col]) 446 self.cols[i] = col 447 continue 448 449 # Replace single ``SkyCoord`` column by its coordinate components if no coordinate 450 # columns of the correspoding type exist yet. 451 if isinstance(col, SkyCoord): 452 # If coordinates are given in RA/DEC, divide each them into hour/deg, 453 # minute/arcminute, second/arcsecond columns. 454 if ('ra' in col.representation_component_names.keys() and 455 len(set(eqtnames) - set(self.colnames)) == 6): 456 ra_c, dec_c = col.ra.hms, col.dec.dms 457 coords = [ra_c.h.round().astype('i1'), ra_c.m.round().astype('i1'), ra_c.s, 458 dec_c.d.round().astype('i1'), dec_c.m.round().astype('i1'), dec_c.s] 459 coord_units = [u.h, u.min, u.second, 460 u.deg, u.arcmin, u.arcsec] 461 coord_descrip = ['Right Ascension (hour)', 'Right Ascension (minute)', 462 'Right Ascension (second)', 'Declination (degree)', 463 'Declination (arcmin)', 'Declination (arcsec)'] 464 for coord, name, coord_unit, descrip in zip( 465 coords, eqtnames, coord_units, coord_descrip): 466 # Have Sign of Declination only in the DEd column. 467 if name in ['DEm', 'DEs']: 468 coord_col = Column(list(np.abs(coord)), name=name, 469 unit=coord_unit, description=descrip) 470 else: 471 coord_col = Column(list(coord), name=name, unit=coord_unit, 472 description=descrip) 473 # Set default number of digits after decimal point for the 474 # second values, and deg-min to (signed) 2-digit zero-padded integer. 475 if name == 'RAs': 476 coord_col.format = '013.10f' 477 elif name == 'DEs': 478 coord_col.format = '012.9f' 479 elif name == 'RAh': 480 coord_col.format = '2d' 481 elif name == 'DEd': 482 coord_col.format = '+03d' 483 elif name.startswith(('RA', 'DE')): 484 coord_col.format = '02d' 485 self.cols.append(coord_col) 486 to_pop.append(i) # Delete original ``SkyCoord`` column. 487 488 # For all other coordinate types, simply divide into two columns 489 # for latitude and longitude resp. with the unit used been as it is. 490 491 else: 492 frminfo = '' 493 for frame, latlon in coord_systems.items(): 494 if frame in col.name and len(set(latlon[:2]) - set(self.colnames)) == 2: 495 if frame != col.name: 496 frminfo = f' ({col.name})' 497 lon_col = Column(getattr(col, latlon[3]), name=latlon[1], 498 description=f'{frame.capitalize()} Longitude{frminfo}', 499 unit=col.representation_component_units[latlon[3]], 500 format='.12f') 501 lat_col = Column(getattr(col, latlon[2]), name=latlon[0], 502 description=f'{frame.capitalize()} Latitude{frminfo}', 503 unit=col.representation_component_units[latlon[2]], 504 format='+.12f') 505 self.cols.append(lon_col) 506 self.cols.append(lat_col) 507 to_pop.append(i) # Delete original ``SkyCoord`` column. 508 509 # Convert all other ``SkyCoord`` columns that are not in the above three 510 # representations to string valued columns. Those could either be types not 511 # supported yet (e.g. 'helioprojective'), or already present and converted. 512 # If there were any extra ``SkyCoord`` columns of one kind after the first one, 513 # then their decomposition into their component columns has been skipped. 514 # This is done in order to not create duplicate component columns. 515 # Explicit renaming of the extra coordinate component columns by appending some 516 # suffix to their name, so as to distinguish them, is not yet implemented. 517 if i not in to_pop: 518 warnings.warn(f"Coordinate system of type '{col.name}' already stored in table " 519 f"as CDS/MRT-syle columns or of unrecognized type. So column {i} " 520 f"is being skipped with designation of a string valued column " 521 f"`{self.colnames[i]}`.", UserWarning) 522 self.cols.append(Column(col.to_string(), name=self.colnames[i])) 523 to_pop.append(i) # Delete original ``SkyCoord`` column. 524 525 # Convert all other ``mixin`` columns to ``Column`` objects. 526 # Parsing these may still lead to errors! 527 elif not isinstance(col, Column): 528 col = Column(col) 529 # If column values are ``object`` types, convert them to string. 530 if np.issubdtype(col.dtype, np.dtype(object).type): 531 col = Column([str(val) for val in col]) 532 self.cols[i] = col 533 534 # Delete original ``SkyCoord`` columns, if there were any. 535 for i in to_pop[::-1]: 536 self.cols.pop(i) 537 538 # Check for any left over extra coordinate columns. 539 if any(x in self.colnames for x in ['RAh', 'DEd', 'ELON', 'GLAT']): 540 # At this point any extra ``SkyCoord`` columns should have been converted to string 541 # valued columns, together with issuance of a warning, by the coordinate parser above. 542 # This test is just left here as a safeguard. 543 for i, col in enumerate(self.cols): 544 if isinstance(col, SkyCoord): 545 self.cols[i] = Column(col.to_string(), name=self.colnames[i]) 546 message = ('Table already has coordinate system in CDS/MRT-syle columns. ' 547 f'So column {i} should have been replaced already with ' 548 f'a string valued column `{self.colnames[i]}`.') 549 raise core.InconsistentTableError(message) 550 551 # Get Byte-By-Byte description and fill the template 552 bbb_template = Template('\n'.join(BYTE_BY_BYTE_TEMPLATE)) 553 byte_by_byte = bbb_template.substitute({'file': 'table.dat', 554 'bytebybyte': self.write_byte_by_byte()}) 555 556 # Fill up the full ReadMe 557 rm_template = Template('\n'.join(MRT_TEMPLATE)) 558 readme_filled = rm_template.substitute({'bytebybyte': byte_by_byte}) 559 lines.append(readme_filled) 560 561 562class MrtData(cds.CdsData): 563 """MRT table data reader 564 """ 565 _subfmt = 'MRT' 566 splitter_class = MrtSplitter 567 568 def write(self, lines): 569 self.splitter.delimiter = ' ' 570 fixedwidth.FixedWidthData.write(self, lines) 571 572 573class Mrt(core.BaseReader): 574 """AAS MRT (Machine-Readable Table) format table. 575 576 **Reading** 577 :: 578 579 >>> from astropy.io import ascii 580 >>> table = ascii.read('data.mrt', format='mrt') 581 582 **Writing** 583 584 Use ``ascii.write(table, 'data.mrt', format='mrt')`` to write tables to 585 Machine Readable Table (MRT) format. 586 587 Note that the metadata of the table, apart from units, column names and 588 description, will not be written. These have to be filled in by hand later. 589 590 See also: :ref:`cds_mrt_format`. 591 592 Caveats: 593 594 * The Units and Explanations are available in the column ``unit`` and 595 ``description`` attributes, respectively. 596 * The other metadata defined by this format is not available in the output table. 597 """ 598 _format_name = 'mrt' 599 _io_registry_format_aliases = ['mrt'] 600 _io_registry_can_write = True 601 _description = 'MRT format table' 602 603 data_class = MrtData 604 header_class = MrtHeader 605 606 def write(self, table=None): 607 # Construct for writing empty table is not yet done. 608 if len(table) == 0: 609 raise NotImplementedError 610 611 self.data.header = self.header 612 self.header.position_line = None 613 self.header.start_line = None 614 615 # Create a copy of the ``table``, so that it the copy gets modified and 616 # written to the file, while the original table remains as it is. 617 table = table.copy() 618 return super().write(table) 619