1import re
2import time
3import numpy as np
4
5from ase.atoms import Atoms
6from ase.utils import reader, writer
7from ase.cell import Cell
8
9__all__ = ['read_rmc6f', 'write_rmc6f']
10
11ncols2style = {9: 'no_labels',
12               10: 'labels',
13               11: 'magnetic'}
14
15
16def _read_construct_regex(lines):
17    """
18    Utility for constructing  regular expressions used by reader.
19    """
20    lines = [l.strip() for l in lines]
21    lines_re = '|'.join(lines)
22    lines_re = lines_re.replace(' ', r'\s+')
23    lines_re = lines_re.replace('(', r'\(')
24    lines_re = lines_re.replace(')', r'\)')
25    return '({})'.format(lines_re)
26
27
28def _read_line_of_atoms_section(fields):
29    """
30    Process `fields` line of Atoms section in rmc6f file and output extracted
31    info as atom id (key) and list of properties for Atoms object (values).
32
33    Parameters
34    ----------
35    fields: list[str]
36        List of columns from line in rmc6f file.
37
38
39    Returns
40    ------
41    atom_id: int
42        Atom ID
43    properties: list[str|float]
44        List of Atoms properties based on rmc6f style.
45        Basically, have 1) element and fractional coordinates for 'labels'
46        or 'no_labels' style and 2) same for 'magnetic' style except adds
47        the spin.
48        Examples for 1) 'labels' or 'no_labels' styles or 2) 'magnetic' style:
49            1) [element, xf, yf, zf]
50            2) [element, xf, yf, zf, spin]
51    """
52    # Atom id
53    atom_id = int(fields[0])
54
55    # Get style used for rmc6f from file based on number of columns
56    ncols = len(fields)
57    style = ncols2style[ncols]
58
59    # Create the position dictionary
60    properties = list()
61    element = str(fields[1])
62    if style == 'no_labels':
63        # id element xf yf zf ref_num ucell_x ucell_y ucell_z
64        xf = float(fields[2])
65        yf = float(fields[3])
66        zf = float(fields[4])
67        properties = [element, xf, yf, zf]
68
69    elif style == 'labels':
70        # id element label xf yf zf ref_num ucell_x ucell_y ucell_z
71        xf = float(fields[3])
72        yf = float(fields[4])
73        zf = float(fields[5])
74        properties = [element, xf, yf, zf]
75
76    elif style == 'magnetic':
77        # id element xf yf zf ref_num ucell_x ucell_y ucell_z M: spin
78        xf = float(fields[2])
79        yf = float(fields[3])
80        zf = float(fields[4])
81        spin = float(fields[10].strip("M:"))
82        properties = [element, xf, yf, zf, spin]
83    else:
84        raise Exception("Unsupported style for parsing rmc6f file format.")
85
86    return atom_id, properties
87
88
89def _read_process_rmc6f_lines_to_pos_and_cell(lines):
90    """
91    Processes the lines of rmc6f file to atom position dictionary and cell
92
93    Parameters
94    ----------
95    lines: list[str]
96        List of lines from rmc6f file.
97
98    Returns
99    ------
100    pos : dict{int:list[str|float]}
101        Dict for each atom id and Atoms properties based on rmc6f style.
102        Basically, have 1) element and fractional coordinates for 'labels'
103        or 'no_labels' style and 2) same for 'magnetic' style except adds
104        the spin.
105        Examples for 1) 'labels' or 'no_labels' styles or 2) 'magnetic' style:
106            1) pos[aid] = [element, xf, yf, zf]
107            2) pos[aid] = [element, xf, yf, zf, spin]
108    cell: Cell object
109        The ASE Cell object created from cell parameters read from the 'Cell'
110        section of rmc6f file.
111    """
112
113    # Inititalize output pos dictionary
114    pos = {}
115
116    # Defined the header an section lines we process
117    header_lines = [
118        "Number of atoms:",
119        "Supercell dimensions:",
120        "Cell (Ang/deg):",
121        "Lattice vectors (Ang):"]
122    sections = ["Atoms"]
123
124    # Construct header and sections regex
125    header_lines_re = _read_construct_regex(header_lines)
126    sections_re = _read_construct_regex(sections)
127
128    section = None
129    header = True
130
131    # Remove any lines that are blank
132    lines = [line for line in lines if line != '']
133
134    # Process each line of rmc6f file
135    pos = {}
136    for line in lines:
137
138        # check if in a section
139        m = re.match(sections_re, line)
140        if m is not None:
141            section = m.group(0).strip()
142            header = False
143            continue
144
145        # header section
146        if header:
147            field = None
148            val = None
149
150            # Regex that matches whitespace-separated floats
151            float_list_re = r'\s+(\d[\d|\s\.]+[\d|\.])'
152            m = re.search(header_lines_re + float_list_re, line)
153            if m is not None:
154                field = m.group(1)
155                val = m.group(2)
156
157            if field is not None and val is not None:
158
159                if field == "Number of atoms:":
160                    pass
161                    """
162                    NOTE: Can just capture via number of atoms ingested.
163                          Maybe use in future for a check.
164                    code: natoms = int(val)
165                    """
166
167                if field.startswith('Supercell'):
168                    pass
169                    """
170                    NOTE: wrapping back down to unit cell is not
171                          necessarily needed for ASE object.
172
173                    code: supercell = [int(x) for x in val.split()]
174                    """
175
176                if field.startswith('Cell'):
177                    cellpar = [float(x) for x in val.split()]
178                    cell = Cell.fromcellpar(cellpar)
179
180                if field.startswith('Lattice'):
181                    pass
182                    """
183                    NOTE: Have questions about RMC fractionalization matrix for
184                          conversion in data2config vs. the ASE matrix.
185                          Currently, just support the Cell section.
186                    """
187
188        # main body section
189        if section is not None:
190            if section == 'Atoms':
191                atom_id, atom_props = _read_line_of_atoms_section(line.split())
192                pos[atom_id] = atom_props
193
194    return pos, cell
195
196
197def _write_output_column_format(columns, arrays):
198    """
199    Helper function to build output for data columns in rmc6f format
200
201    Parameters
202    ----------
203    columns: list[str]
204        List of keys in arrays. Will be columns in the output file.
205    arrays: dict{str:np.array}
206        Dict with arrays for each column of rmc6f file that are
207        property of Atoms object.
208
209    Returns
210    ------
211    property_ncols : list[int]
212        Number of columns for each property.
213    dtype_obj: np.dtype
214        Data type object for the columns.
215    formats_as_str: str
216        The format for printing the columns.
217
218    """
219    fmt_map = {'d': ('R', '%14.6f '),
220               'f': ('R', '%14.6f '),
221               'i': ('I', '%8d '),
222               'O': ('S', '%s'),
223               'S': ('S', '%s'),
224               'U': ('S', '%s'),
225               'b': ('L', ' %.1s ')}
226
227    property_types = []
228    property_ncols = []
229    dtypes = []
230    formats = []
231
232    # Take each column and setup formatting vectors
233    for column in columns:
234        array = arrays[column]
235        dtype = array.dtype
236
237        property_type, fmt = fmt_map[dtype.kind]
238        property_types.append(property_type)
239
240        # Flags for 1d vectors
241        is_1d = len(array.shape) == 1
242        is_1d_as_2d = len(array.shape) == 2 and array.shape[1] == 1
243
244        # Construct the list of key pairs of column with dtype
245        if (is_1d or is_1d_as_2d):
246            ncol = 1
247            dtypes.append((column, dtype))
248        else:
249            ncol = array.shape[1]
250            for c in range(ncol):
251                dtypes.append((column + str(c), dtype))
252
253        # Add format and number of columns for this property to output array
254        formats.extend([fmt] * ncol)
255        property_ncols.append(ncol)
256
257    # Prepare outputs to correct data structure
258    dtype_obj = np.dtype(dtypes)
259    formats_as_str = ''.join(formats) + '\n'
260
261    return property_ncols, dtype_obj, formats_as_str
262
263
264def _write_output(filename, header_lines, data, fmt, order=None):
265    """
266    Helper function to write information to the filename
267
268    Parameters
269    ----------
270    filename : file|str
271        A file like object or filename
272    header_lines : list[str]
273        Header section of output rmc6f file
274    data: np.array[len(atoms)]
275        Array for the Atoms section to write to file. Has
276        the columns that need to be written on each row
277    fmt: str
278        The format string to use for writing each column in
279        the rows of data.
280    order : list[str]
281        If not None, gives a list of atom types for the order
282        to write out each.
283    """
284    fd = filename
285
286    # Write header section
287    for line in header_lines:
288        fd.write("%s \n" % line)
289
290    # If specifying the order, fix the atom id and write to file
291    natoms = data.shape[0]
292    if order is not None:
293        new_id = 0
294        for atype in order:
295            for i in range(natoms):
296                if atype == data[i][1]:
297                    new_id += 1
298                    data[i][0] = new_id
299                    fd.write(fmt % tuple(data[i]))
300    # ...just write rows to file
301    else:
302        for i in range(natoms):
303            fd.write(fmt % tuple(data[i]))
304
305
306@reader
307def read_rmc6f(filename, atom_type_map=None):
308    """
309    Parse a RMCProfile rmc6f file into ASE Atoms object
310
311    Parameters
312    ----------
313    filename : file|str
314        A file like object or filename.
315    atom_type_map: dict{str:str}
316        Map of atom types for conversions. Mainly used if there is
317        an atom type in the file that is not supported by ASE but
318        want to map to a supported atom type instead.
319
320        Example to map deuterium to hydrogen:
321        atom_type_map = { 'D': 'H' }
322
323    Returns
324    ------
325    structure : Atoms
326        The Atoms object read in from the rmc6f file.
327    """
328
329    fd = filename
330    lines = fd.readlines()
331
332    # Process the rmc6f file to extract positions and cell
333    pos, cell = _read_process_rmc6f_lines_to_pos_and_cell(lines)
334
335    # create an atom type map if one does not exist from unique atomic symbols
336    if atom_type_map is None:
337        symbols = [atom[0] for atom in pos.values()]
338        atom_type_map = {atype: atype for atype in symbols}
339
340    # Use map of tmp symbol to actual symbol
341    for atom in pos.values():
342        atom[0] = atom_type_map[atom[0]]
343
344    # create Atoms from read-in data
345    symbols = []
346    scaled_positions = []
347    spin = None
348    magmoms = []
349    for atom in pos.values():
350        if len(atom) == 4:
351            element, x, y, z = atom
352        else:
353            element, x, y, z, spin = atom
354
355        element = atom_type_map[element]
356        symbols.append(element)
357        scaled_positions.append([x, y, z])
358        if spin is not None:
359            magmoms.append(spin)
360
361    atoms = Atoms(scaled_positions=scaled_positions,
362                  symbols=symbols,
363                  cell=cell,
364                  magmoms=magmoms,
365                  pbc=[True, True, True])
366
367    return atoms
368
369
370@writer
371def write_rmc6f(filename, atoms, order=None, atom_type_map=None):
372    """
373    Write output in rmc6f format - RMCProfile v6 fractional coordinates
374
375    Parameters
376    ----------
377    filename : file|str
378        A file like object or filename.
379    atoms: Atoms object
380        The Atoms object to be written.
381
382    order : list[str]
383        If not None, gives a list of atom types for the order
384        to write out each.
385    atom_type_map: dict{str:str}
386        Map of atom types for conversions. Mainly used if there is
387        an atom type in the Atoms object that is a placeholder
388        for a different atom type. This is used when the atom type
389        is not supported by ASE but is in RMCProfile.
390
391        Example to map hydrogen to deuterium:
392        atom_type_map = { 'H': 'D' }
393    """
394
395    # get atom types and how many of each (and set to order if passed)
396    atom_types = set(atoms.symbols)
397    if order is not None:
398        if set(atom_types) != set(order):
399            raise Exception("The order is not a set of the atom types.")
400        atom_types = order
401
402    atom_count_dict = atoms.symbols.formula.count()
403    natom_types = [str(atom_count_dict[atom_type]) for atom_type in atom_types]
404
405    # create an atom type map if one does not exist from unique atomic symbols
406    if atom_type_map is None:
407        symbols = set(np.array(atoms.symbols))
408        atom_type_map = {atype: atype for atype in symbols}
409
410    # HEADER SECTION
411
412    # get type and number of each atom type
413    atom_types_list = [atom_type_map[atype] for atype in atom_types]
414    atom_types_present = ' '.join(atom_types_list)
415    natom_types_present = ' '.join(natom_types)
416
417    header_lines = [
418        "(Version 6f format configuration file)",
419        "(Generated by ASE - Atomic Simulation Environment https://wiki.fysik.dtu.dk/ase/ )",  # noqa: E501
420        "Metadata date: " + time.strftime('%d-%m-%Y'),
421        "Number of types of atoms:   {} ".format(len(atom_types)),
422        "Atom types present:          {}".format(atom_types_present),
423        "Number of each atom type:   {}".format(natom_types_present),
424        "Number of moves generated:           0",
425        "Number of moves tried:               0",
426        "Number of moves accepted:            0",
427        "Number of prior configuration saves: 0",
428        "Number of atoms:                     {}".format(len(atoms)),
429        "Supercell dimensions:                1 1 1"]
430
431    # magnetic moments
432    if atoms.has('magmoms'):
433        spin_str = "Number of spins:                     {}"
434        spin_line = spin_str.format(len(atoms.get_initial_magnetic_moments()))
435        header_lines.extend([spin_line])
436
437    density_str = "Number density (Ang^-3):              {}"
438    density_line = density_str.format(len(atoms) / atoms.get_volume())
439    cell_angles = [str(x) for x in atoms.cell.cellpar()]
440    cell_line = "Cell (Ang/deg): " + ' '.join(cell_angles)
441    header_lines.extend([density_line, cell_line])
442
443    # get lattice vectors from cell lengths and angles
444    # NOTE: RMCProfile uses a different convention for the fractionalization
445    # matrix
446
447    cell_parameters = atoms.cell.cellpar()
448    cell = Cell.fromcellpar(cell_parameters).T
449    x_line = ' '.join(['{:12.6f}'.format(i) for i in cell[0]])
450    y_line = ' '.join(['{:12.6f}'.format(i) for i in cell[1]])
451    z_line = ' '.join(['{:12.6f}'.format(i) for i in cell[2]])
452    lat_lines = ["Lattice vectors (Ang):", x_line, y_line, z_line]
453    header_lines.extend(lat_lines)
454    header_lines.extend(['Atoms:'])
455
456    # ATOMS SECTION
457
458    # create columns of data for atoms (fr_cols)
459    fr_cols = ['id', 'symbols', 'scaled_positions', 'ref_num', 'ref_cell']
460    if atoms.has('magmoms'):
461        fr_cols.extend('magmom')
462
463    # Collect data to be written out
464    natoms = len(atoms)
465
466    arrays = {}
467    arrays['id'] = np.array(range(1, natoms + 1, 1), int)
468    arrays['symbols'] = np.array(atoms.symbols)
469    arrays['ref_num'] = np.zeros(natoms, int)
470    arrays['ref_cell'] = np.zeros((natoms, 3), int)
471    arrays['scaled_positions'] = np.array(atoms.get_scaled_positions())
472
473    # get formatting for writing output based on atom arrays
474    ncols, dtype_obj, fmt = _write_output_column_format(fr_cols, arrays)
475
476    # Pack fr_cols into record array
477    data = np.zeros(natoms, dtype_obj)
478    for column, ncol in zip(fr_cols, ncols):
479        value = arrays[column]
480        if ncol == 1:
481            data[column] = np.squeeze(value)
482        else:
483            for c in range(ncol):
484                data[column + str(c)] = value[:, c]
485
486    # Use map of tmp symbol to actual symbol
487    for i in range(natoms):
488        data[i][1] = atom_type_map[data[i][1]]
489
490    # Write the output
491    _write_output(filename, header_lines, data, fmt, order=order)
492