1""" 2Implementation of Harwell-Boeing read/write. 3 4At the moment not the full Harwell-Boeing format is supported. Supported 5features are: 6 7 - assembled, non-symmetric, real matrices 8 - integer for pointer/indices 9 - exponential format for float values, and int format 10 11""" 12# TODO: 13# - Add more support (symmetric/complex matrices, non-assembled matrices ?) 14 15# XXX: reading is reasonably efficient (>= 85 % is in numpy.fromstring), but 16# takes a lot of memory. Being faster would require compiled code. 17# write is not efficient. Although not a terribly exciting task, 18# having reusable facilities to efficiently read/write fortran-formatted files 19# would be useful outside this module. 20 21import warnings 22 23import numpy as np 24from scipy.sparse import csc_matrix 25from scipy.io.harwell_boeing._fortran_format_parser import \ 26 FortranFormatParser, IntFormat, ExpFormat 27 28__all__ = ["MalformedHeader", "hb_read", "hb_write", "HBInfo", "HBFile", 29 "HBMatrixType"] 30 31 32class MalformedHeader(Exception): 33 pass 34 35 36class LineOverflow(Warning): 37 pass 38 39 40def _nbytes_full(fmt, nlines): 41 """Return the number of bytes to read to get every full lines for the 42 given parsed fortran format.""" 43 return (fmt.repeat * fmt.width + 1) * (nlines - 1) 44 45 46class HBInfo: 47 @classmethod 48 def from_data(cls, m, title="Default title", key="0", mxtype=None, fmt=None): 49 """Create a HBInfo instance from an existing sparse matrix. 50 51 Parameters 52 ---------- 53 m : sparse matrix 54 the HBInfo instance will derive its parameters from m 55 title : str 56 Title to put in the HB header 57 key : str 58 Key 59 mxtype : HBMatrixType 60 type of the input matrix 61 fmt : dict 62 not implemented 63 64 Returns 65 ------- 66 hb_info : HBInfo instance 67 """ 68 m = m.tocsc(copy=False) 69 70 pointer = m.indptr 71 indices = m.indices 72 values = m.data 73 74 nrows, ncols = m.shape 75 nnon_zeros = m.nnz 76 77 if fmt is None: 78 # +1 because HB use one-based indexing (Fortran), and we will write 79 # the indices /pointer as such 80 pointer_fmt = IntFormat.from_number(np.max(pointer+1)) 81 indices_fmt = IntFormat.from_number(np.max(indices+1)) 82 83 if values.dtype.kind in np.typecodes["AllFloat"]: 84 values_fmt = ExpFormat.from_number(-np.max(np.abs(values))) 85 elif values.dtype.kind in np.typecodes["AllInteger"]: 86 values_fmt = IntFormat.from_number(-np.max(np.abs(values))) 87 else: 88 raise NotImplementedError("type %s not implemented yet" % values.dtype.kind) 89 else: 90 raise NotImplementedError("fmt argument not supported yet.") 91 92 if mxtype is None: 93 if not np.isrealobj(values): 94 raise ValueError("Complex values not supported yet") 95 if values.dtype.kind in np.typecodes["AllInteger"]: 96 tp = "integer" 97 elif values.dtype.kind in np.typecodes["AllFloat"]: 98 tp = "real" 99 else: 100 raise NotImplementedError("type %s for values not implemented" 101 % values.dtype) 102 mxtype = HBMatrixType(tp, "unsymmetric", "assembled") 103 else: 104 raise ValueError("mxtype argument not handled yet.") 105 106 def _nlines(fmt, size): 107 nlines = size // fmt.repeat 108 if nlines * fmt.repeat != size: 109 nlines += 1 110 return nlines 111 112 pointer_nlines = _nlines(pointer_fmt, pointer.size) 113 indices_nlines = _nlines(indices_fmt, indices.size) 114 values_nlines = _nlines(values_fmt, values.size) 115 116 total_nlines = pointer_nlines + indices_nlines + values_nlines 117 118 return cls(title, key, 119 total_nlines, pointer_nlines, indices_nlines, values_nlines, 120 mxtype, nrows, ncols, nnon_zeros, 121 pointer_fmt.fortran_format, indices_fmt.fortran_format, 122 values_fmt.fortran_format) 123 124 @classmethod 125 def from_file(cls, fid): 126 """Create a HBInfo instance from a file object containing a matrix in the 127 HB format. 128 129 Parameters 130 ---------- 131 fid : file-like matrix 132 File or file-like object containing a matrix in the HB format. 133 134 Returns 135 ------- 136 hb_info : HBInfo instance 137 """ 138 # First line 139 line = fid.readline().strip("\n") 140 if not len(line) > 72: 141 raise ValueError("Expected at least 72 characters for first line, " 142 "got: \n%s" % line) 143 title = line[:72] 144 key = line[72:] 145 146 # Second line 147 line = fid.readline().strip("\n") 148 if not len(line.rstrip()) >= 56: 149 raise ValueError("Expected at least 56 characters for second line, " 150 "got: \n%s" % line) 151 total_nlines = _expect_int(line[:14]) 152 pointer_nlines = _expect_int(line[14:28]) 153 indices_nlines = _expect_int(line[28:42]) 154 values_nlines = _expect_int(line[42:56]) 155 156 rhs_nlines = line[56:72].strip() 157 if rhs_nlines == '': 158 rhs_nlines = 0 159 else: 160 rhs_nlines = _expect_int(rhs_nlines) 161 if not rhs_nlines == 0: 162 raise ValueError("Only files without right hand side supported for " 163 "now.") 164 165 # Third line 166 line = fid.readline().strip("\n") 167 if not len(line) >= 70: 168 raise ValueError("Expected at least 72 character for third line, got:\n" 169 "%s" % line) 170 171 mxtype_s = line[:3].upper() 172 if not len(mxtype_s) == 3: 173 raise ValueError("mxtype expected to be 3 characters long") 174 175 mxtype = HBMatrixType.from_fortran(mxtype_s) 176 if mxtype.value_type not in ["real", "integer"]: 177 raise ValueError("Only real or integer matrices supported for " 178 "now (detected %s)" % mxtype) 179 if not mxtype.structure == "unsymmetric": 180 raise ValueError("Only unsymmetric matrices supported for " 181 "now (detected %s)" % mxtype) 182 if not mxtype.storage == "assembled": 183 raise ValueError("Only assembled matrices supported for now") 184 185 if not line[3:14] == " " * 11: 186 raise ValueError("Malformed data for third line: %s" % line) 187 188 nrows = _expect_int(line[14:28]) 189 ncols = _expect_int(line[28:42]) 190 nnon_zeros = _expect_int(line[42:56]) 191 nelementals = _expect_int(line[56:70]) 192 if not nelementals == 0: 193 raise ValueError("Unexpected value %d for nltvl (last entry of line 3)" 194 % nelementals) 195 196 # Fourth line 197 line = fid.readline().strip("\n") 198 199 ct = line.split() 200 if not len(ct) == 3: 201 raise ValueError("Expected 3 formats, got %s" % ct) 202 203 return cls(title, key, 204 total_nlines, pointer_nlines, indices_nlines, values_nlines, 205 mxtype, nrows, ncols, nnon_zeros, 206 ct[0], ct[1], ct[2], 207 rhs_nlines, nelementals) 208 209 def __init__(self, title, key, 210 total_nlines, pointer_nlines, indices_nlines, values_nlines, 211 mxtype, nrows, ncols, nnon_zeros, 212 pointer_format_str, indices_format_str, values_format_str, 213 right_hand_sides_nlines=0, nelementals=0): 214 """Do not use this directly, but the class ctrs (from_* functions).""" 215 self.title = title 216 self.key = key 217 if title is None: 218 title = "No Title" 219 if len(title) > 72: 220 raise ValueError("title cannot be > 72 characters") 221 222 if key is None: 223 key = "|No Key" 224 if len(key) > 8: 225 warnings.warn("key is > 8 characters (key is %s)" % key, LineOverflow) 226 227 self.total_nlines = total_nlines 228 self.pointer_nlines = pointer_nlines 229 self.indices_nlines = indices_nlines 230 self.values_nlines = values_nlines 231 232 parser = FortranFormatParser() 233 pointer_format = parser.parse(pointer_format_str) 234 if not isinstance(pointer_format, IntFormat): 235 raise ValueError("Expected int format for pointer format, got %s" 236 % pointer_format) 237 238 indices_format = parser.parse(indices_format_str) 239 if not isinstance(indices_format, IntFormat): 240 raise ValueError("Expected int format for indices format, got %s" % 241 indices_format) 242 243 values_format = parser.parse(values_format_str) 244 if isinstance(values_format, ExpFormat): 245 if mxtype.value_type not in ["real", "complex"]: 246 raise ValueError("Inconsistency between matrix type %s and " 247 "value type %s" % (mxtype, values_format)) 248 values_dtype = np.float64 249 elif isinstance(values_format, IntFormat): 250 if mxtype.value_type not in ["integer"]: 251 raise ValueError("Inconsistency between matrix type %s and " 252 "value type %s" % (mxtype, values_format)) 253 # XXX: fortran int -> dtype association ? 254 values_dtype = int 255 else: 256 raise ValueError("Unsupported format for values %r" % (values_format,)) 257 258 self.pointer_format = pointer_format 259 self.indices_format = indices_format 260 self.values_format = values_format 261 262 self.pointer_dtype = np.int32 263 self.indices_dtype = np.int32 264 self.values_dtype = values_dtype 265 266 self.pointer_nlines = pointer_nlines 267 self.pointer_nbytes_full = _nbytes_full(pointer_format, pointer_nlines) 268 269 self.indices_nlines = indices_nlines 270 self.indices_nbytes_full = _nbytes_full(indices_format, indices_nlines) 271 272 self.values_nlines = values_nlines 273 self.values_nbytes_full = _nbytes_full(values_format, values_nlines) 274 275 self.nrows = nrows 276 self.ncols = ncols 277 self.nnon_zeros = nnon_zeros 278 self.nelementals = nelementals 279 self.mxtype = mxtype 280 281 def dump(self): 282 """Gives the header corresponding to this instance as a string.""" 283 header = [self.title.ljust(72) + self.key.ljust(8)] 284 285 header.append("%14d%14d%14d%14d" % 286 (self.total_nlines, self.pointer_nlines, 287 self.indices_nlines, self.values_nlines)) 288 header.append("%14s%14d%14d%14d%14d" % 289 (self.mxtype.fortran_format.ljust(14), self.nrows, 290 self.ncols, self.nnon_zeros, 0)) 291 292 pffmt = self.pointer_format.fortran_format 293 iffmt = self.indices_format.fortran_format 294 vffmt = self.values_format.fortran_format 295 header.append("%16s%16s%20s" % 296 (pffmt.ljust(16), iffmt.ljust(16), vffmt.ljust(20))) 297 return "\n".join(header) 298 299 300def _expect_int(value, msg=None): 301 try: 302 return int(value) 303 except ValueError as e: 304 if msg is None: 305 msg = "Expected an int, got %s" 306 raise ValueError(msg % value) from e 307 308 309def _read_hb_data(content, header): 310 # XXX: look at a way to reduce memory here (big string creation) 311 ptr_string = "".join([content.read(header.pointer_nbytes_full), 312 content.readline()]) 313 ptr = np.fromstring(ptr_string, 314 dtype=int, sep=' ') 315 316 ind_string = "".join([content.read(header.indices_nbytes_full), 317 content.readline()]) 318 ind = np.fromstring(ind_string, 319 dtype=int, sep=' ') 320 321 val_string = "".join([content.read(header.values_nbytes_full), 322 content.readline()]) 323 val = np.fromstring(val_string, 324 dtype=header.values_dtype, sep=' ') 325 326 try: 327 return csc_matrix((val, ind-1, ptr-1), 328 shape=(header.nrows, header.ncols)) 329 except ValueError as e: 330 raise e 331 332 333def _write_data(m, fid, header): 334 m = m.tocsc(copy=False) 335 336 def write_array(f, ar, nlines, fmt): 337 # ar_nlines is the number of full lines, n is the number of items per 338 # line, ffmt the fortran format 339 pyfmt = fmt.python_format 340 pyfmt_full = pyfmt * fmt.repeat 341 342 # for each array to write, we first write the full lines, and special 343 # case for partial line 344 full = ar[:(nlines - 1) * fmt.repeat] 345 for row in full.reshape((nlines-1, fmt.repeat)): 346 f.write(pyfmt_full % tuple(row) + "\n") 347 nremain = ar.size - full.size 348 if nremain > 0: 349 f.write((pyfmt * nremain) % tuple(ar[ar.size - nremain:]) + "\n") 350 351 fid.write(header.dump()) 352 fid.write("\n") 353 # +1 is for Fortran one-based indexing 354 write_array(fid, m.indptr+1, header.pointer_nlines, 355 header.pointer_format) 356 write_array(fid, m.indices+1, header.indices_nlines, 357 header.indices_format) 358 write_array(fid, m.data, header.values_nlines, 359 header.values_format) 360 361 362class HBMatrixType: 363 """Class to hold the matrix type.""" 364 # q2f* translates qualified names to Fortran character 365 _q2f_type = { 366 "real": "R", 367 "complex": "C", 368 "pattern": "P", 369 "integer": "I", 370 } 371 _q2f_structure = { 372 "symmetric": "S", 373 "unsymmetric": "U", 374 "hermitian": "H", 375 "skewsymmetric": "Z", 376 "rectangular": "R" 377 } 378 _q2f_storage = { 379 "assembled": "A", 380 "elemental": "E", 381 } 382 383 _f2q_type = dict([(j, i) for i, j in _q2f_type.items()]) 384 _f2q_structure = dict([(j, i) for i, j in _q2f_structure.items()]) 385 _f2q_storage = dict([(j, i) for i, j in _q2f_storage.items()]) 386 387 @classmethod 388 def from_fortran(cls, fmt): 389 if not len(fmt) == 3: 390 raise ValueError("Fortran format for matrix type should be 3 " 391 "characters long") 392 try: 393 value_type = cls._f2q_type[fmt[0]] 394 structure = cls._f2q_structure[fmt[1]] 395 storage = cls._f2q_storage[fmt[2]] 396 return cls(value_type, structure, storage) 397 except KeyError as e: 398 raise ValueError("Unrecognized format %s" % fmt) from e 399 400 def __init__(self, value_type, structure, storage="assembled"): 401 self.value_type = value_type 402 self.structure = structure 403 self.storage = storage 404 405 if value_type not in self._q2f_type: 406 raise ValueError("Unrecognized type %s" % value_type) 407 if structure not in self._q2f_structure: 408 raise ValueError("Unrecognized structure %s" % structure) 409 if storage not in self._q2f_storage: 410 raise ValueError("Unrecognized storage %s" % storage) 411 412 @property 413 def fortran_format(self): 414 return self._q2f_type[self.value_type] + \ 415 self._q2f_structure[self.structure] + \ 416 self._q2f_storage[self.storage] 417 418 def __repr__(self): 419 return "HBMatrixType(%s, %s, %s)" % \ 420 (self.value_type, self.structure, self.storage) 421 422 423class HBFile: 424 def __init__(self, file, hb_info=None): 425 """Create a HBFile instance. 426 427 Parameters 428 ---------- 429 file : file-object 430 StringIO work as well 431 hb_info : HBInfo, optional 432 Should be given as an argument for writing, in which case the file 433 should be writable. 434 """ 435 self._fid = file 436 if hb_info is None: 437 self._hb_info = HBInfo.from_file(file) 438 else: 439 #raise IOError("file %s is not writable, and hb_info " 440 # "was given." % file) 441 self._hb_info = hb_info 442 443 @property 444 def title(self): 445 return self._hb_info.title 446 447 @property 448 def key(self): 449 return self._hb_info.key 450 451 @property 452 def type(self): 453 return self._hb_info.mxtype.value_type 454 455 @property 456 def structure(self): 457 return self._hb_info.mxtype.structure 458 459 @property 460 def storage(self): 461 return self._hb_info.mxtype.storage 462 463 def read_matrix(self): 464 return _read_hb_data(self._fid, self._hb_info) 465 466 def write_matrix(self, m): 467 return _write_data(m, self._fid, self._hb_info) 468 469 470def hb_read(path_or_open_file): 471 """Read HB-format file. 472 473 Parameters 474 ---------- 475 path_or_open_file : path-like or file-like 476 If a file-like object, it is used as-is. Otherwise, it is opened 477 before reading. 478 479 Returns 480 ------- 481 data : scipy.sparse.csc_matrix instance 482 The data read from the HB file as a sparse matrix. 483 484 Notes 485 ----- 486 At the moment not the full Harwell-Boeing format is supported. Supported 487 features are: 488 489 - assembled, non-symmetric, real matrices 490 - integer for pointer/indices 491 - exponential format for float values, and int format 492 493 Examples 494 -------- 495 We can read and write a harwell-boeing format file: 496 497 >>> from scipy.io.harwell_boeing import hb_read, hb_write 498 >>> from scipy.sparse import csr_matrix, eye 499 >>> data = csr_matrix(eye(3)) # create a sparse matrix 500 >>> hb_write("data.hb", data) # write a hb file 501 >>> print(hb_read("data.hb")) # read a hb file 502 (0, 0) 1.0 503 (1, 1) 1.0 504 (2, 2) 1.0 505 506 """ 507 def _get_matrix(fid): 508 hb = HBFile(fid) 509 return hb.read_matrix() 510 511 if hasattr(path_or_open_file, 'read'): 512 return _get_matrix(path_or_open_file) 513 else: 514 with open(path_or_open_file) as f: 515 return _get_matrix(f) 516 517 518def hb_write(path_or_open_file, m, hb_info=None): 519 """Write HB-format file. 520 521 Parameters 522 ---------- 523 path_or_open_file : path-like or file-like 524 If a file-like object, it is used as-is. Otherwise, it is opened 525 before writing. 526 m : sparse-matrix 527 the sparse matrix to write 528 hb_info : HBInfo 529 contains the meta-data for write 530 531 Returns 532 ------- 533 None 534 535 Notes 536 ----- 537 At the moment not the full Harwell-Boeing format is supported. Supported 538 features are: 539 540 - assembled, non-symmetric, real matrices 541 - integer for pointer/indices 542 - exponential format for float values, and int format 543 544 Examples 545 -------- 546 We can read and write a harwell-boeing format file: 547 548 >>> from scipy.io.harwell_boeing import hb_read, hb_write 549 >>> from scipy.sparse import csr_matrix, eye 550 >>> data = csr_matrix(eye(3)) # create a sparse matrix 551 >>> hb_write("data.hb", data) # write a hb file 552 >>> print(hb_read("data.hb")) # read a hb file 553 (0, 0) 1.0 554 (1, 1) 1.0 555 (2, 2) 1.0 556 557 """ 558 m = m.tocsc(copy=False) 559 560 if hb_info is None: 561 hb_info = HBInfo.from_data(m) 562 563 def _set_matrix(fid): 564 hb = HBFile(fid, hb_info) 565 return hb.write_matrix(m) 566 567 if hasattr(path_or_open_file, 'write'): 568 return _set_matrix(path_or_open_file) 569 else: 570 with open(path_or_open_file, 'w') as f: 571 return _set_matrix(f) 572