1"""
2Implementation of Harwell-Boeing read/write.
3
4At the moment not the full Harwell-Boeing format is supported. Supported
5features are:
6
7    - assembled, non-symmetric, real matrices
8    - integer for pointer/indices
9    - exponential format for float values, and int format
10
11"""
12# TODO:
13#   - Add more support (symmetric/complex matrices, non-assembled matrices ?)
14
15# XXX: reading is reasonably efficient (>= 85 % is in numpy.fromstring), but
16# takes a lot of memory. Being faster would require compiled code.
17# write is not efficient. Although not a terribly exciting task,
18# having reusable facilities to efficiently read/write fortran-formatted files
19# would be useful outside this module.
20
21import warnings
22
23import numpy as np
24from scipy.sparse import csc_matrix
25from scipy.io.harwell_boeing._fortran_format_parser import \
26        FortranFormatParser, IntFormat, ExpFormat
27
28__all__ = ["MalformedHeader", "hb_read", "hb_write", "HBInfo", "HBFile",
29           "HBMatrixType"]
30
31
32class MalformedHeader(Exception):
33    pass
34
35
36class LineOverflow(Warning):
37    pass
38
39
40def _nbytes_full(fmt, nlines):
41    """Return the number of bytes to read to get every full lines for the
42    given parsed fortran format."""
43    return (fmt.repeat * fmt.width + 1) * (nlines - 1)
44
45
46class HBInfo:
47    @classmethod
48    def from_data(cls, m, title="Default title", key="0", mxtype=None, fmt=None):
49        """Create a HBInfo instance from an existing sparse matrix.
50
51        Parameters
52        ----------
53        m : sparse matrix
54            the HBInfo instance will derive its parameters from m
55        title : str
56            Title to put in the HB header
57        key : str
58            Key
59        mxtype : HBMatrixType
60            type of the input matrix
61        fmt : dict
62            not implemented
63
64        Returns
65        -------
66        hb_info : HBInfo instance
67        """
68        m = m.tocsc(copy=False)
69
70        pointer = m.indptr
71        indices = m.indices
72        values = m.data
73
74        nrows, ncols = m.shape
75        nnon_zeros = m.nnz
76
77        if fmt is None:
78            # +1 because HB use one-based indexing (Fortran), and we will write
79            # the indices /pointer as such
80            pointer_fmt = IntFormat.from_number(np.max(pointer+1))
81            indices_fmt = IntFormat.from_number(np.max(indices+1))
82
83            if values.dtype.kind in np.typecodes["AllFloat"]:
84                values_fmt = ExpFormat.from_number(-np.max(np.abs(values)))
85            elif values.dtype.kind in np.typecodes["AllInteger"]:
86                values_fmt = IntFormat.from_number(-np.max(np.abs(values)))
87            else:
88                raise NotImplementedError("type %s not implemented yet" % values.dtype.kind)
89        else:
90            raise NotImplementedError("fmt argument not supported yet.")
91
92        if mxtype is None:
93            if not np.isrealobj(values):
94                raise ValueError("Complex values not supported yet")
95            if values.dtype.kind in np.typecodes["AllInteger"]:
96                tp = "integer"
97            elif values.dtype.kind in np.typecodes["AllFloat"]:
98                tp = "real"
99            else:
100                raise NotImplementedError("type %s for values not implemented"
101                                          % values.dtype)
102            mxtype = HBMatrixType(tp, "unsymmetric", "assembled")
103        else:
104            raise ValueError("mxtype argument not handled yet.")
105
106        def _nlines(fmt, size):
107            nlines = size // fmt.repeat
108            if nlines * fmt.repeat != size:
109                nlines += 1
110            return nlines
111
112        pointer_nlines = _nlines(pointer_fmt, pointer.size)
113        indices_nlines = _nlines(indices_fmt, indices.size)
114        values_nlines = _nlines(values_fmt, values.size)
115
116        total_nlines = pointer_nlines + indices_nlines + values_nlines
117
118        return cls(title, key,
119            total_nlines, pointer_nlines, indices_nlines, values_nlines,
120            mxtype, nrows, ncols, nnon_zeros,
121            pointer_fmt.fortran_format, indices_fmt.fortran_format,
122            values_fmt.fortran_format)
123
124    @classmethod
125    def from_file(cls, fid):
126        """Create a HBInfo instance from a file object containing a matrix in the
127        HB format.
128
129        Parameters
130        ----------
131        fid : file-like matrix
132            File or file-like object containing a matrix in the HB format.
133
134        Returns
135        -------
136        hb_info : HBInfo instance
137        """
138        # First line
139        line = fid.readline().strip("\n")
140        if not len(line) > 72:
141            raise ValueError("Expected at least 72 characters for first line, "
142                             "got: \n%s" % line)
143        title = line[:72]
144        key = line[72:]
145
146        # Second line
147        line = fid.readline().strip("\n")
148        if not len(line.rstrip()) >= 56:
149            raise ValueError("Expected at least 56 characters for second line, "
150                             "got: \n%s" % line)
151        total_nlines = _expect_int(line[:14])
152        pointer_nlines = _expect_int(line[14:28])
153        indices_nlines = _expect_int(line[28:42])
154        values_nlines = _expect_int(line[42:56])
155
156        rhs_nlines = line[56:72].strip()
157        if rhs_nlines == '':
158            rhs_nlines = 0
159        else:
160            rhs_nlines = _expect_int(rhs_nlines)
161        if not rhs_nlines == 0:
162            raise ValueError("Only files without right hand side supported for "
163                             "now.")
164
165        # Third line
166        line = fid.readline().strip("\n")
167        if not len(line) >= 70:
168            raise ValueError("Expected at least 72 character for third line, got:\n"
169                             "%s" % line)
170
171        mxtype_s = line[:3].upper()
172        if not len(mxtype_s) == 3:
173            raise ValueError("mxtype expected to be 3 characters long")
174
175        mxtype = HBMatrixType.from_fortran(mxtype_s)
176        if mxtype.value_type not in ["real", "integer"]:
177            raise ValueError("Only real or integer matrices supported for "
178                             "now (detected %s)" % mxtype)
179        if not mxtype.structure == "unsymmetric":
180            raise ValueError("Only unsymmetric matrices supported for "
181                             "now (detected %s)" % mxtype)
182        if not mxtype.storage == "assembled":
183            raise ValueError("Only assembled matrices supported for now")
184
185        if not line[3:14] == " " * 11:
186            raise ValueError("Malformed data for third line: %s" % line)
187
188        nrows = _expect_int(line[14:28])
189        ncols = _expect_int(line[28:42])
190        nnon_zeros = _expect_int(line[42:56])
191        nelementals = _expect_int(line[56:70])
192        if not nelementals == 0:
193            raise ValueError("Unexpected value %d for nltvl (last entry of line 3)"
194                             % nelementals)
195
196        # Fourth line
197        line = fid.readline().strip("\n")
198
199        ct = line.split()
200        if not len(ct) == 3:
201            raise ValueError("Expected 3 formats, got %s" % ct)
202
203        return cls(title, key,
204                   total_nlines, pointer_nlines, indices_nlines, values_nlines,
205                   mxtype, nrows, ncols, nnon_zeros,
206                   ct[0], ct[1], ct[2],
207                   rhs_nlines, nelementals)
208
209    def __init__(self, title, key,
210            total_nlines, pointer_nlines, indices_nlines, values_nlines,
211            mxtype, nrows, ncols, nnon_zeros,
212            pointer_format_str, indices_format_str, values_format_str,
213            right_hand_sides_nlines=0, nelementals=0):
214        """Do not use this directly, but the class ctrs (from_* functions)."""
215        self.title = title
216        self.key = key
217        if title is None:
218            title = "No Title"
219        if len(title) > 72:
220            raise ValueError("title cannot be > 72 characters")
221
222        if key is None:
223            key = "|No Key"
224        if len(key) > 8:
225            warnings.warn("key is > 8 characters (key is %s)" % key, LineOverflow)
226
227        self.total_nlines = total_nlines
228        self.pointer_nlines = pointer_nlines
229        self.indices_nlines = indices_nlines
230        self.values_nlines = values_nlines
231
232        parser = FortranFormatParser()
233        pointer_format = parser.parse(pointer_format_str)
234        if not isinstance(pointer_format, IntFormat):
235            raise ValueError("Expected int format for pointer format, got %s"
236                             % pointer_format)
237
238        indices_format = parser.parse(indices_format_str)
239        if not isinstance(indices_format, IntFormat):
240            raise ValueError("Expected int format for indices format, got %s" %
241                             indices_format)
242
243        values_format = parser.parse(values_format_str)
244        if isinstance(values_format, ExpFormat):
245            if mxtype.value_type not in ["real", "complex"]:
246                raise ValueError("Inconsistency between matrix type %s and "
247                                 "value type %s" % (mxtype, values_format))
248            values_dtype = np.float64
249        elif isinstance(values_format, IntFormat):
250            if mxtype.value_type not in ["integer"]:
251                raise ValueError("Inconsistency between matrix type %s and "
252                                 "value type %s" % (mxtype, values_format))
253            # XXX: fortran int -> dtype association ?
254            values_dtype = int
255        else:
256            raise ValueError("Unsupported format for values %r" % (values_format,))
257
258        self.pointer_format = pointer_format
259        self.indices_format = indices_format
260        self.values_format = values_format
261
262        self.pointer_dtype = np.int32
263        self.indices_dtype = np.int32
264        self.values_dtype = values_dtype
265
266        self.pointer_nlines = pointer_nlines
267        self.pointer_nbytes_full = _nbytes_full(pointer_format, pointer_nlines)
268
269        self.indices_nlines = indices_nlines
270        self.indices_nbytes_full = _nbytes_full(indices_format, indices_nlines)
271
272        self.values_nlines = values_nlines
273        self.values_nbytes_full = _nbytes_full(values_format, values_nlines)
274
275        self.nrows = nrows
276        self.ncols = ncols
277        self.nnon_zeros = nnon_zeros
278        self.nelementals = nelementals
279        self.mxtype = mxtype
280
281    def dump(self):
282        """Gives the header corresponding to this instance as a string."""
283        header = [self.title.ljust(72) + self.key.ljust(8)]
284
285        header.append("%14d%14d%14d%14d" %
286                      (self.total_nlines, self.pointer_nlines,
287                       self.indices_nlines, self.values_nlines))
288        header.append("%14s%14d%14d%14d%14d" %
289                      (self.mxtype.fortran_format.ljust(14), self.nrows,
290                       self.ncols, self.nnon_zeros, 0))
291
292        pffmt = self.pointer_format.fortran_format
293        iffmt = self.indices_format.fortran_format
294        vffmt = self.values_format.fortran_format
295        header.append("%16s%16s%20s" %
296                      (pffmt.ljust(16), iffmt.ljust(16), vffmt.ljust(20)))
297        return "\n".join(header)
298
299
300def _expect_int(value, msg=None):
301    try:
302        return int(value)
303    except ValueError as e:
304        if msg is None:
305            msg = "Expected an int, got %s"
306        raise ValueError(msg % value) from e
307
308
309def _read_hb_data(content, header):
310    # XXX: look at a way to reduce memory here (big string creation)
311    ptr_string = "".join([content.read(header.pointer_nbytes_full),
312                           content.readline()])
313    ptr = np.fromstring(ptr_string,
314            dtype=int, sep=' ')
315
316    ind_string = "".join([content.read(header.indices_nbytes_full),
317                       content.readline()])
318    ind = np.fromstring(ind_string,
319            dtype=int, sep=' ')
320
321    val_string = "".join([content.read(header.values_nbytes_full),
322                          content.readline()])
323    val = np.fromstring(val_string,
324            dtype=header.values_dtype, sep=' ')
325
326    try:
327        return csc_matrix((val, ind-1, ptr-1),
328                          shape=(header.nrows, header.ncols))
329    except ValueError as e:
330        raise e
331
332
333def _write_data(m, fid, header):
334    m = m.tocsc(copy=False)
335
336    def write_array(f, ar, nlines, fmt):
337        # ar_nlines is the number of full lines, n is the number of items per
338        # line, ffmt the fortran format
339        pyfmt = fmt.python_format
340        pyfmt_full = pyfmt * fmt.repeat
341
342        # for each array to write, we first write the full lines, and special
343        # case for partial line
344        full = ar[:(nlines - 1) * fmt.repeat]
345        for row in full.reshape((nlines-1, fmt.repeat)):
346            f.write(pyfmt_full % tuple(row) + "\n")
347        nremain = ar.size - full.size
348        if nremain > 0:
349            f.write((pyfmt * nremain) % tuple(ar[ar.size - nremain:]) + "\n")
350
351    fid.write(header.dump())
352    fid.write("\n")
353    # +1 is for Fortran one-based indexing
354    write_array(fid, m.indptr+1, header.pointer_nlines,
355                header.pointer_format)
356    write_array(fid, m.indices+1, header.indices_nlines,
357                header.indices_format)
358    write_array(fid, m.data, header.values_nlines,
359                header.values_format)
360
361
362class HBMatrixType:
363    """Class to hold the matrix type."""
364    # q2f* translates qualified names to Fortran character
365    _q2f_type = {
366        "real": "R",
367        "complex": "C",
368        "pattern": "P",
369        "integer": "I",
370    }
371    _q2f_structure = {
372            "symmetric": "S",
373            "unsymmetric": "U",
374            "hermitian": "H",
375            "skewsymmetric": "Z",
376            "rectangular": "R"
377    }
378    _q2f_storage = {
379        "assembled": "A",
380        "elemental": "E",
381    }
382
383    _f2q_type = dict([(j, i) for i, j in _q2f_type.items()])
384    _f2q_structure = dict([(j, i) for i, j in _q2f_structure.items()])
385    _f2q_storage = dict([(j, i) for i, j in _q2f_storage.items()])
386
387    @classmethod
388    def from_fortran(cls, fmt):
389        if not len(fmt) == 3:
390            raise ValueError("Fortran format for matrix type should be 3 "
391                             "characters long")
392        try:
393            value_type = cls._f2q_type[fmt[0]]
394            structure = cls._f2q_structure[fmt[1]]
395            storage = cls._f2q_storage[fmt[2]]
396            return cls(value_type, structure, storage)
397        except KeyError as e:
398            raise ValueError("Unrecognized format %s" % fmt) from e
399
400    def __init__(self, value_type, structure, storage="assembled"):
401        self.value_type = value_type
402        self.structure = structure
403        self.storage = storage
404
405        if value_type not in self._q2f_type:
406            raise ValueError("Unrecognized type %s" % value_type)
407        if structure not in self._q2f_structure:
408            raise ValueError("Unrecognized structure %s" % structure)
409        if storage not in self._q2f_storage:
410            raise ValueError("Unrecognized storage %s" % storage)
411
412    @property
413    def fortran_format(self):
414        return self._q2f_type[self.value_type] + \
415               self._q2f_structure[self.structure] + \
416               self._q2f_storage[self.storage]
417
418    def __repr__(self):
419        return "HBMatrixType(%s, %s, %s)" % \
420               (self.value_type, self.structure, self.storage)
421
422
423class HBFile:
424    def __init__(self, file, hb_info=None):
425        """Create a HBFile instance.
426
427        Parameters
428        ----------
429        file : file-object
430            StringIO work as well
431        hb_info : HBInfo, optional
432            Should be given as an argument for writing, in which case the file
433            should be writable.
434        """
435        self._fid = file
436        if hb_info is None:
437            self._hb_info = HBInfo.from_file(file)
438        else:
439            #raise IOError("file %s is not writable, and hb_info "
440            #              "was given." % file)
441            self._hb_info = hb_info
442
443    @property
444    def title(self):
445        return self._hb_info.title
446
447    @property
448    def key(self):
449        return self._hb_info.key
450
451    @property
452    def type(self):
453        return self._hb_info.mxtype.value_type
454
455    @property
456    def structure(self):
457        return self._hb_info.mxtype.structure
458
459    @property
460    def storage(self):
461        return self._hb_info.mxtype.storage
462
463    def read_matrix(self):
464        return _read_hb_data(self._fid, self._hb_info)
465
466    def write_matrix(self, m):
467        return _write_data(m, self._fid, self._hb_info)
468
469
470def hb_read(path_or_open_file):
471    """Read HB-format file.
472
473    Parameters
474    ----------
475    path_or_open_file : path-like or file-like
476        If a file-like object, it is used as-is. Otherwise, it is opened
477        before reading.
478
479    Returns
480    -------
481    data : scipy.sparse.csc_matrix instance
482        The data read from the HB file as a sparse matrix.
483
484    Notes
485    -----
486    At the moment not the full Harwell-Boeing format is supported. Supported
487    features are:
488
489        - assembled, non-symmetric, real matrices
490        - integer for pointer/indices
491        - exponential format for float values, and int format
492
493    Examples
494    --------
495    We can read and write a harwell-boeing format file:
496
497    >>> from scipy.io.harwell_boeing import hb_read, hb_write
498    >>> from scipy.sparse import csr_matrix, eye
499    >>> data = csr_matrix(eye(3))  # create a sparse matrix
500    >>> hb_write("data.hb", data)  # write a hb file
501    >>> print(hb_read("data.hb"))  # read a hb file
502      (0, 0)	1.0
503      (1, 1)	1.0
504      (2, 2)	1.0
505
506    """
507    def _get_matrix(fid):
508        hb = HBFile(fid)
509        return hb.read_matrix()
510
511    if hasattr(path_or_open_file, 'read'):
512        return _get_matrix(path_or_open_file)
513    else:
514        with open(path_or_open_file) as f:
515            return _get_matrix(f)
516
517
518def hb_write(path_or_open_file, m, hb_info=None):
519    """Write HB-format file.
520
521    Parameters
522    ----------
523    path_or_open_file : path-like or file-like
524        If a file-like object, it is used as-is. Otherwise, it is opened
525        before writing.
526    m : sparse-matrix
527        the sparse matrix to write
528    hb_info : HBInfo
529        contains the meta-data for write
530
531    Returns
532    -------
533    None
534
535    Notes
536    -----
537    At the moment not the full Harwell-Boeing format is supported. Supported
538    features are:
539
540        - assembled, non-symmetric, real matrices
541        - integer for pointer/indices
542        - exponential format for float values, and int format
543
544    Examples
545    --------
546    We can read and write a harwell-boeing format file:
547
548    >>> from scipy.io.harwell_boeing import hb_read, hb_write
549    >>> from scipy.sparse import csr_matrix, eye
550    >>> data = csr_matrix(eye(3))  # create a sparse matrix
551    >>> hb_write("data.hb", data)  # write a hb file
552    >>> print(hb_read("data.hb"))  # read a hb file
553      (0, 0)	1.0
554      (1, 1)	1.0
555      (2, 2)	1.0
556
557    """
558    m = m.tocsc(copy=False)
559
560    if hb_info is None:
561        hb_info = HBInfo.from_data(m)
562
563    def _set_matrix(fid):
564        hb = HBFile(fid, hb_info)
565        return hb.write_matrix(m)
566
567    if hasattr(path_or_open_file, 'write'):
568        return _set_matrix(path_or_open_file)
569    else:
570        with open(path_or_open_file, 'w') as f:
571            return _set_matrix(f)
572