1"""This module defines an interface to CASTEP for
2    use by the ASE (Webpage: http://wiki.fysik.dtu.dk/ase)
3
4Authors:
5    Max Hoffmann, max.hoffmann@ch.tum.de
6    Joerg Meyer, joerg.meyer@ch.tum.de
7    Simon P. Rittmeyer, simon.rittmeyer@tum.de
8
9Contributors:
10    Juan M. Lorenzi, juan.lorenzi@tum.de
11    Georg S. Michelitsch, georg.michelitsch@tch.tum.de
12    Reinhard J. Maurer, reinhard.maurer@yale.edu
13    Simone Sturniolo, simone.sturniolo@stfc.ac.uk
14"""
15
16import difflib
17import numpy as np
18import os
19import re
20import glob
21import shutil
22import sys
23import json
24import time
25import tempfile
26import warnings
27import subprocess
28from copy import deepcopy
29from collections import namedtuple
30from itertools import product
31from typing import List, Set
32
33import ase
34import ase.units as units
35from ase.calculators.general import Calculator
36from ase.calculators.calculator import compare_atoms
37from ase.calculators.calculator import PropertyNotImplementedError
38from ase.calculators.calculator import kpts2sizeandoffsets
39from ase.dft.kpoints import BandPath
40from ase.parallel import paropen
41from ase.io.castep import read_param
42from ase.io.castep import read_bands
43from ase.constraints import FixCartesian
44
45__all__ = [
46    'Castep',
47    'CastepCell',
48    'CastepParam',
49    'create_castep_keywords']
50
51contact_email = 'simon.rittmeyer@tum.de'
52
53# A convenient table to avoid the previously used "eval"
54_tf_table = {
55    '': True,  # Just the keyword is equivalent to True
56    'True': True,
57    'False': False}
58
59
60def _self_getter(getf):
61    # A decorator that makes it so that if no 'atoms' argument is passed to a
62    # getter function, self.atoms is used instead
63
64    def decor_getf(self, atoms=None, *args, **kwargs):
65
66        if atoms is None:
67            atoms = self.atoms
68
69        return getf(self, atoms, *args, **kwargs)
70
71    return decor_getf
72
73
74def _parse_tss_block(value, scaled=False):
75    # Parse the assigned value for a Transition State Search structure block
76    is_atoms = isinstance(value, ase.atoms.Atoms)
77    try:
78        is_strlist = all(map(lambda x: isinstance(x, str), value))
79    except TypeError:
80        is_strlist = False
81
82    if not is_atoms:
83        if not is_strlist:
84            # Invalid!
85            raise TypeError('castep.cell.positions_abs/frac_intermediate/'
86                            'product expects Atoms object or list of strings')
87
88        # First line must be Angstroms!
89        if (not scaled) and value[0].strip() != 'ang':
90            raise RuntimeError('Only ang units currently supported in castep.'
91                               'cell.positions_abs_intermediate/product')
92        return '\n'.join(map(str.strip, value))
93    else:
94        text_block = '' if scaled else 'ang\n'
95        positions = (value.get_scaled_positions() if scaled else
96                     value.get_positions())
97        symbols = value.get_chemical_symbols()
98        for s, p in zip(symbols, positions):
99            text_block += '    {0} {1:.3f} {2:.3f} {3:.3f}\n'.format(s, *p)
100
101        return text_block
102
103
104class Castep(Calculator):
105    r"""
106CASTEP Interface Documentation
107
108
109Introduction
110============
111
112CASTEP_ [1]_ W_ is a software package which uses density functional theory to
113provide a good atomic-level description of all manner of materials and
114molecules. CASTEP can give information about total energies, forces and
115stresses on an atomic system, as well as calculating optimum geometries, band
116structures, optical spectra, phonon spectra and much more. It can also perform
117molecular dynamics simulations.
118
119The CASTEP calculator interface class offers intuitive access to all CASTEP
120settings and most results. All CASTEP specific settings are accessible via
121attribute access (*i.e*. ``calc.param.keyword = ...`` or
122``calc.cell.keyword = ...``)
123
124
125Getting Started:
126================
127
128Set the environment variables appropriately for your system.
129
130>>> export CASTEP_COMMAND=' ... '
131>>> export CASTEP_PP_PATH=' ... '
132
133Note: alternatively to CASTEP_PP_PATH one can set PSPOT_DIR
134as CASTEP consults this by default, i.e.
135
136>>> export PSPOT_DIR=' ... '
137
138
139Running the Calculator
140======================
141
142The default initialization command for the CASTEP calculator is
143
144.. class:: Castep(directory='CASTEP', label='castep')
145
146To do a minimal run one only needs to set atoms, this will use all
147default settings of CASTEP, meaning LDA, singlepoint, etc..
148
149With a generated *castep_keywords.json* in place all options are accessible
150by inspection, *i.e.* tab-completion. This works best when using ``ipython``.
151All options can be accessed via ``calc.param.<TAB>`` or ``calc.cell.<TAB>``
152and documentation is printed with ``calc.param.<keyword> ?`` or
153``calc.cell.<keyword> ?``. All options can also be set directly
154using ``calc.keyword = ...`` or ``calc.KEYWORD = ...`` or even
155``ialc.KeYwOrD`` or directly as named arguments in the call to the constructor
156(*e.g.* ``Castep(task='GeometryOptimization')``).
157If using this calculator on a machine without CASTEP, one might choose to copy
158a *castep_keywords.json* file generated elsewhere in order to access this
159feature: the file will be used if located in the working directory,
160*$HOME/.ase/* or *ase/ase/calculators/* within the ASE library. The file should
161be generated the first time it is needed, but you can generate a new keywords
162file in the currect directory with ``python -m ase.calculators.castep``.
163
164All options that go into the ``.param`` file are held in an ``CastepParam``
165instance, while all options that go into the ``.cell`` file and don't belong
166to the atoms object are held in an ``CastepCell`` instance. Each instance can
167be created individually and can be added to calculators by attribute
168assignment, *i.e.* ``calc.param = param`` or ``calc.cell = cell``.
169
170All internal variables of the calculator start with an underscore (_).
171All cell attributes that clearly belong into the atoms object are blocked.
172Setting ``calc.atoms_attribute`` (*e.g.* ``= positions``) is sent directly to
173the atoms object.
174
175
176Arguments:
177==========
178
179=========================  ====================================================
180Keyword                    Description
181=========================  ====================================================
182``directory``              The relative path where all input and output files
183                           will be placed. If this does not exist, it will be
184                           created.  Existing directories will be moved to
185                           directory-TIMESTAMP unless self._rename_existing_dir
186                           is set to false.
187
188``label``                  The prefix of .param, .cell, .castep, etc. files.
189
190``castep_command``         Command to run castep. Can also be set via the bash
191                           environment variable ``CASTEP_COMMAND``. If none is
192                           given or found, will default to ``castep``
193
194``check_castep_version``   Boolean whether to check if the installed castep
195                           version matches the version from which the available
196                           options were deduced. Defaults to ``False``.
197
198``castep_pp_path``         The path where the pseudopotentials are stored. Can
199                           also be set via the bash environment variables
200                           ``PSPOT_DIR`` (preferred) and ``CASTEP_PP_PATH``.
201                           Will default to the current working directory if
202                           none is given or found. Note that pseudopotentials
203                           may be generated on-the-fly if they are not found.
204
205``find_pspots``            Boolean whether to search for pseudopotentials in
206                           ``<castep_pp_path>`` or not. If activated, files in
207                           this directory will be checked for typical names. If
208                           files are not found, they will be generated on the
209                           fly, depending on the ``_build_missing_pspots``
210                           value.  A RuntimeError will be raised in case
211                           multiple files per element are found. Defaults to
212                           ``False``.
213``keyword_tolerance``      Integer to indicate the level of tolerance to apply
214                           validation of any parameters set in the CastepCell
215                           or CastepParam objects against the ones found in
216                           castep_keywords. Levels are as following:
217
218                           0 = no tolerance, keywords not found in
219                           castep_keywords will raise an exception
220
221                           1 = keywords not found will be accepted but produce
222                           a warning (default)
223
224                           2 = keywords not found will be accepted silently
225
226                           3 = no attempt is made to look for
227                           castep_keywords.json at all
228``castep_keywords``        Can be used to pass a CastepKeywords object that is
229                           then used with no attempt to actually load a
230                           castep_keywords.json file. Most useful for debugging
231                           and testing purposes.
232
233=========================  ====================================================
234
235
236Additional Settings
237===================
238
239=========================  ====================================================
240Internal Setting           Description
241=========================  ====================================================
242``_castep_command``        (``=castep``): the actual shell command used to
243                           call CASTEP.
244
245``_check_checkfile``       (``=True``): this makes write_param() only
246                           write a continue or reuse statement if the
247                           addressed .check or .castep_bin file exists in the
248                           directory.
249
250``_copy_pspots``           (``=False``): if set to True the calculator will
251                           actually copy the needed pseudo-potential (\*.usp)
252                           file, usually it will only create symlinks.
253
254``_link_pspots``           (``=True``): if set to True the calculator will
255                           actually will create symlinks to the needed pseudo
256                           potentials. Set this option (and ``_copy_pspots``)
257                           to False if you rather want to access your pseudo
258                           potentials using the PSPOT_DIR environment variable
259                           that is read by CASTEP.
260                           *Note:* This option has no effect if ``copy_pspots``
261                           is True..
262
263``_build_missing_pspots``  (``=True``): if set to True, castep will generate
264                           missing pseudopotentials on the fly. If not, a
265                           RuntimeError will be raised if not all files were
266                           found.
267
268``_export_settings``       (``=True``): if this is set to
269                           True, all calculator internal settings shown here
270                           will be included in the .param in a comment line (#)
271                           and can be read again by merge_param. merge_param
272                           can be forced to ignore this directive using the
273                           optional argument ``ignore_internal_keys=True``.
274
275``_force_write``           (``=True``): this controls wether the \*cell and
276                           \*param will be overwritten.
277
278``_prepare_input_only``    (``=False``): If set to True, the calculator will
279                           create \*cell und \*param file but not
280                           start the calculation itself.
281                           If this is used to prepare jobs locally
282                           and run on a remote cluster it is recommended
283                           to set ``_copy_pspots = True``.
284
285``_castep_pp_path``        (``='.'``) : the place where the calculator
286                           will look for pseudo-potential files.
287
288``_find_pspots``           (``=False``): if set to True, the calculator will
289                           try to find the respective pseudopotentials from
290                           <_castep_pp_path>. As long as there are no multiple
291                           files per element in this directory, the auto-detect
292                           feature should be very robust. Raises a RuntimeError
293                           if required files are not unique (multiple files per
294                           element). Non existing pseudopotentials will be
295                           generated, though this could be dangerous.
296
297``_rename_existing_dir``   (``=True``) : when using a new instance
298                           of the calculator, this will move directories out of
299                           the way that would be overwritten otherwise,
300                           appending a date string.
301
302``_set_atoms``             (``=False``) : setting this to True will overwrite
303                           any atoms object previously attached to the
304                           calculator when reading a \.castep file.  By de-
305                           fault, the read() function will only create a new
306                           atoms object if none has been attached and other-
307                           wise try to assign forces etc. based on the atom's
308                           positions.  ``_set_atoms=True`` could be necessary
309                           if one uses CASTEP's internal geometry optimization
310                           (``calc.param.task='GeometryOptimization'``)
311                           because then the positions get out of sync.
312                           *Warning*: this option is generally not recommended
313                           unless one knows one really needs it. There should
314                           never be any need, if CASTEP is used as a
315                           single-point calculator.
316
317``_track_output``          (``=False``) : if set to true, the interface
318                           will append a number to the label on all input
319                           and output files, where n is the number of calls
320                           to this instance. *Warning*: this setting may con-
321                           sume a lot more disk space because of the additio-
322                           nal \*check files.
323
324``_try_reuse``             (``=_track_output``) : when setting this, the in-
325                           terface will try to fetch the reuse file from the
326                           previous run even if _track_output is True. By de-
327                           fault it is equal to _track_output, but may be
328                           overridden.
329
330                           Since this behavior may not always be desirable for
331                           single-point calculations. Regular reuse for *e.g.*
332                           a geometry-optimization can be achieved by setting
333                           ``calc.param.reuse = True``.
334
335``_pedantic``              (``=False``) if set to true, the calculator will
336                           inform about settings probably wasting a lot of CPU
337                           time or causing numerical inconsistencies.
338
339=========================  ====================================================
340
341Special features:
342=================
343
344
345``.dryrun_ok()``
346  Runs ``castep_command seed -dryrun`` in a temporary directory return True if
347  all variables initialized ok. This is a fast way to catch errors in the
348  input. Afterwards _kpoints_used is set.
349
350``.merge_param()``
351  Takes a filename or filehandler of a .param file or CastepParam instance and
352  merges it into the current calculator instance, overwriting current settings
353
354``.keyword.clear()``
355  Can be used on any option like ``calc.param.keyword.clear()`` or
356  ``calc.cell.keyword.clear()`` to return to the CASTEP default.
357
358``.initialize()``
359  Creates all needed input in the ``_directory``. This can then copied to and
360  run in a place without ASE or even python.
361
362``.set_pspot('<library>')``
363  This automatically sets the pseudo-potential for all present species to
364  ``<Species>_<library>.usp``. Make sure that ``_castep_pp_path`` is set
365  correctly. Note that there is no check, if the file actually exists. If it
366  doesn't castep will crash! You may want to use ``find_pspots()`` instead.
367
368``.find_pspots(pspot=<library>, suffix=<suffix>)``
369  This automatically searches for pseudopotentials of type
370  ``<Species>_<library>.<suffix>`` or ``<Species>-<library>.<suffix>`` in
371  ``castep_pp_path` (make sure this is set correctly). Note that ``<Species>``
372  will be searched for case insensitive.  Regular expressions are accepted, and
373  arguments ``'*'`` will be regarded as bash-like wildcards. Defaults are any
374  ``<library>`` and any ``<suffix>`` from ``['usp', 'UPF', 'recpot']``. If you
375  have well-organized folders with pseudopotentials of one kind, this should
376  work with the defaults.
377
378``print(calc)``
379  Prints a short summary of the calculator settings and atoms.
380
381``ase.io.castep.read_seed('path-to/seed')``
382  Given you have a combination of seed.{param,cell,castep} this will return an
383  atoms object with the last ionic positions in the .castep file and all other
384  settings parsed from the .cell and .param file. If no .castep file is found
385  the positions are taken from the .cell file. The output directory will be
386  set to the same directory, only the label is preceded by 'copy_of\_'  to
387  avoid overwriting.
388
389``.set_kpts(kpoints)``
390  This is equivalent to initialising the calculator with
391  ``calc = Castep(kpts=kpoints)``. ``kpoints`` can be specified in many
392  convenient forms: simple Monkhorst-Pack grids can be specified e.g.
393  ``(2, 2, 3)`` or ``'2 2 3'``; lists of specific weighted k-points can be
394  given in reciprocal lattice coordinates e.g.
395  ``[[0, 0, 0, 0.25], [0.25, 0.25, 0.25, 0.75]]``; a dictionary syntax is
396  available for more complex requirements e.g.
397  ``{'size': (2, 2, 2), 'gamma': True}`` will give a Gamma-centered 2x2x2 M-P
398  grid, ``{'density': 10, 'gamma': False, 'even': False}`` will give a mesh
399  with density of at least 10 Ang (based on the unit cell of currently-attached
400  atoms) with an odd number of points in each direction and avoiding the Gamma
401  point.
402
403``.set_bandpath(bandpath)``
404  This is equivalent to initialialising the calculator with
405  ``calc=Castep(bandpath=bandpath)`` and may be set simultaneously with *kpts*.
406  It allows an electronic band structure path to be set up using ASE BandPath
407  objects. This enables a band structure calculation to be set up conveniently
408  using e.g. calc.set_bandpath(atoms.cell.bandpath().interpolate(npoints=200))
409
410``.band_structure(bandfile=None)``
411  Read a band structure from _seedname.bands_ file. This returns an ase
412  BandStructure object which may be plotted with e.g.
413  ``calc.band_structure().plot()``
414
415Notes/Issues:
416==============
417
418* Currently *only* the FixAtoms *constraint* is fully supported for reading and
419  writing. There is some experimental support for the FixCartesian constraint.
420
421* There is no support for the CASTEP *unit system*. Units of eV and Angstrom
422  are used throughout. In particular when converting total energies from
423  different calculators, one should check that the same CODATA_ version is
424  used for constants and conversion factors, respectively.
425
426.. _CASTEP: http://www.castep.org/
427
428.. _W: https://en.wikipedia.org/wiki/CASTEP
429
430.. _CODATA: https://physics.nist.gov/cuu/Constants/index.html
431
432.. [1] S. J. Clark, M. D. Segall, C. J. Pickard, P. J. Hasnip, M. J. Probert,
433       K. Refson, M. C. Payne Zeitschrift für Kristallographie 220(5-6)
434       pp.567- 570 (2005) PDF_.
435
436.. _PDF: http://www.tcm.phy.cam.ac.uk/castep/papers/ZKristallogr_2005.pdf
437
438
439End CASTEP Interface Documentation
440    """
441
442    # Class attributes !
443    # keys set through atoms object
444    atoms_keys = [
445        'charges',
446        'ionic_constraints',
447        'lattice_abs',
448        'lattice_cart',
449        'positions_abs',
450        'positions_abs_final',
451        'positions_abs_intermediate',
452        'positions_frac',
453        'positions_frac_final',
454        'positions_frac_intermediate']
455
456    atoms_obj_keys = [
457        'dipole',
458        'energy_free',
459        'energy_zero',
460        'fermi',
461        'forces',
462        'nbands',
463        'positions',
464        'stress',
465        'pressure']
466
467    internal_keys = [
468        '_castep_command',
469        '_check_checkfile',
470        '_copy_pspots',
471        '_link_pspots',
472        '_find_pspots',
473        '_build_missing_pspots',
474        '_directory',
475        '_export_settings',
476        '_force_write',
477        '_label',
478        '_prepare_input_only',
479        '_castep_pp_path',
480        '_rename_existing_dir',
481        '_set_atoms',
482        '_track_output',
483        '_try_reuse',
484        '_pedantic']
485
486    def __init__(self, directory='CASTEP', label='castep',
487                 castep_command=None, check_castep_version=False,
488                 castep_pp_path=None, find_pspots=False, keyword_tolerance=1,
489                 castep_keywords=None, **kwargs):
490
491        self.__name__ = 'Castep'
492
493        # initialize the ase.calculators.general calculator
494        Calculator.__init__(self)
495
496        from ase.io.castep import write_cell
497        self._write_cell = write_cell
498
499        if castep_keywords is None:
500            castep_keywords = CastepKeywords(make_param_dict(),
501                                             make_cell_dict(),
502                                             [],
503                                             [],
504                                             0)
505            if keyword_tolerance < 3:
506                try:
507                    castep_keywords = import_castep_keywords(castep_command)
508                except CastepVersionError as e:
509                    if keyword_tolerance == 0:
510                        raise e
511                    else:
512                        warnings.warn(str(e))
513
514        self._kw_tol = keyword_tolerance
515        keyword_tolerance = max(keyword_tolerance, 2)  # 3 not accepted below
516        self.param = CastepParam(castep_keywords,
517                                 keyword_tolerance=keyword_tolerance)
518        self.cell = CastepCell(castep_keywords,
519                               keyword_tolerance=keyword_tolerance)
520
521        ###################################
522        # Calculator state variables      #
523        ###################################
524        self._calls = 0
525        self._castep_version = castep_keywords.castep_version
526
527        # collects warning from .castep files
528        self._warnings = []
529        # collects content from *.err file
530        self._error = None
531        # warnings raised by the ASE interface
532        self._interface_warnings = []
533
534        # store to check if recalculation is necessary
535        self._old_atoms = None
536        self._old_cell = None
537        self._old_param = None
538
539        ###################################
540        # Internal keys                   #
541        # Allow to tweak the behavior     #
542        ###################################
543        self._opt = {}
544        self._castep_command = get_castep_command(castep_command)
545        self._castep_pp_path = get_castep_pp_path(castep_pp_path)
546        self._check_checkfile = True
547        self._copy_pspots = False
548        self._link_pspots = True
549        self._find_pspots = find_pspots
550        self._build_missing_pspots = True
551        self._directory = os.path.abspath(directory)
552        self._export_settings = True
553        self._force_write = True
554        self._label = label
555        self._prepare_input_only = False
556        self._rename_existing_dir = True
557        self._set_atoms = False
558        self._track_output = False
559        self._try_reuse = False
560
561        # turn off the pedantic user warnings
562        self._pedantic = False
563
564        # will be set on during runtime
565        self._seed = None
566
567        ###################################
568        # (Physical) result variables     #
569        ###################################
570        self.atoms = None
571        # initialize result variables
572        self._forces = None
573        self._energy_total = None
574        self._energy_free = None
575        self._energy_0K = None
576        self._energy_total_corr = None
577        self._eigenvalues = None
578        self._efermi = None
579        self._ibz_kpts = None
580        self._ibz_weights = None
581        self._band_structure = None
582
583        # dispersion corrections
584        self._dispcorr_energy_total = None
585        self._dispcorr_energy_free = None
586        self._dispcorr_energy_0K = None
587
588        # spins and hirshfeld volumes
589        self._spins = None
590        self._hirsh_volrat = None
591
592        # Mulliken charges
593        self._mulliken_charges = None
594        # Hirshfeld charges
595        self._hirshfeld_charges = None
596
597        self._number_of_cell_constraints = None
598        self._output_verbosity = None
599        self._stress = None
600        self._pressure = None
601        self._unit_cell = None
602        self._kpoints = None
603
604        # pointers to other files used at runtime
605        self._check_file = None
606        self._castep_bin_file = None
607
608        # plane wave cutoff energy (may be derived during PP generation)
609        self._cut_off_energy = None
610
611        # runtime information
612        self._total_time = None
613        self._peak_memory = None
614
615        # check version of CASTEP options module against current one
616        if check_castep_version:
617            local_castep_version = get_castep_version(self._castep_command)
618            if not hasattr(self, '_castep_version'):
619                warnings.warn('No castep version found')
620                return
621            if not local_castep_version == self._castep_version:
622                warnings.warn('The options module was generated from version %s '
623                              'while your are currently using CASTEP version %s' %
624                              (self._castep_version,
625                               get_castep_version(self._castep_command)))
626                self._castep_version = local_castep_version
627
628        # processes optional arguments in kw style
629        for keyword, value in kwargs.items():
630            # first fetch special keywords issued by ASE CLI
631            if keyword == 'kpts':
632                self.set_kpts(value)
633            elif keyword == 'bandpath':
634                self.set_bandpath(value)
635            elif keyword == 'xc':
636                self.xc_functional = value
637            elif keyword == 'ecut':
638                self.cut_off_energy = value
639            else:  # the general case
640                self.__setattr__(keyword, value)
641
642    def band_structure(self, bandfile=None):
643        from ase.spectrum.band_structure import BandStructure
644
645        if bandfile is None:
646            bandfile = os.path.join(self._directory, self._seed) + '.bands'
647
648        if not os.path.exists(bandfile):
649            raise ValueError('Cannot find band file "{}".'.format(bandfile))
650
651        kpts, weights, eigenvalues, efermi = read_bands(bandfile)
652
653        # Get definitions of high-symmetry points
654        special_points = self.atoms.cell.bandpath(npoints=0).special_points
655        bandpath = BandPath(self.atoms.cell,
656                            kpts=kpts,
657                            special_points=special_points)
658        return BandStructure(bandpath, eigenvalues, reference=efermi)
659
660    def set_bandpath(self, bandpath):
661        """Set a band structure path from ase.dft.kpoints.BandPath object
662
663        This will set the bs_kpoint_list block with a set of specific points
664        determined in ASE. bs_kpoint_spacing will not be used; to modify the
665        number of points, consider using e.g. bandpath.resample(density=20) to
666        obtain a new dense path.
667
668        Args:
669            bandpath (:obj:`ase.dft.kpoints.BandPath` or None):
670                Set to None to remove list of band structure points. Otherwise,
671                sampling will follow BandPath parameters.
672
673        """
674
675        def clear_bs_keywords():
676            bs_keywords = product({'bs_kpoint', 'bs_kpoints'},
677                                  {'path', 'path_spacing',
678                                   'list',
679                                   'mp_grid', 'mp_spacing', 'mp_offset'})
680            for bs_tag in bs_keywords:
681                setattr(self.cell, '_'.join(bs_tag), None)
682
683        if bandpath is None:
684            clear_bs_keywords()
685        elif isinstance(bandpath, BandPath):
686            clear_bs_keywords()
687            self.cell.bs_kpoint_list = [' '.join(map(str, row))
688                                        for row in bandpath.kpts]
689        else:
690            raise TypeError('Band structure path must be an '
691                            'ase.dft.kpoint.BandPath object')
692
693    def set_kpts(self, kpts):
694        """Set k-point mesh/path using a str, tuple or ASE features
695
696        Args:
697            kpts (None, tuple, str, dict):
698
699        This method will set the CASTEP parameters kpoints_mp_grid,
700        kpoints_mp_offset and kpoints_mp_spacing as appropriate. Unused
701        parameters will be set to None (i.e. not included in input files.)
702
703        If kpts=None, all these parameters are set as unused.
704
705        The simplest useful case is to give a 3-tuple of integers specifying
706        a Monkhorst-Pack grid. This may also be formatted as a string separated
707        by spaces; this is the format used internally before writing to the
708        input files.
709
710        A more powerful set of features is available when using a python
711        dictionary with the following allowed keys:
712
713        - 'size' (3-tuple of int) mesh of mesh dimensions
714        - 'density' (float) for BZ sampling density in points per recip. Ang
715          ( kpoint_mp_spacing = 1 / (2pi * density) ). An explicit MP mesh will
716          be set to allow for rounding/centering.
717        - 'spacing' (float) for BZ sampling density for maximum space between
718          sample points in reciprocal space. This is numerically equivalent to
719          the inbuilt ``calc.cell.kpoint_mp_spacing``, but will be converted to
720          'density' to allow for rounding/centering.
721        - 'even' (bool) to round each direction up to the nearest even number;
722          set False for odd numbers, leave as None for no odd/even rounding.
723        - 'gamma' (bool) to offset the Monkhorst-Pack grid to include
724          (0, 0, 0); set False to offset each direction avoiding 0.
725        """
726
727        def clear_mp_keywords():
728            mp_keywords = product({'kpoint', 'kpoints'},
729                                  {'mp_grid', 'mp_offset',
730                                   'mp_spacing', 'list'})
731            for kp_tag in mp_keywords:
732                setattr(self.cell, '_'.join(kp_tag), None)
733
734        # Case 1: Clear parameters with set_kpts(None)
735        if kpts is None:
736            clear_mp_keywords()
737            pass
738
739        # Case 2: list of explicit k-points with weights
740        # e.g. [[ 0,    0,   0,    0.125],
741        #       [ 0,   -0.5, 0,    0.375],
742        #       [-0.5,  0,  -0.5,  0.375],
743        #       [-0.5, -0.5, -0.5, 0.125]]
744
745        elif (isinstance(kpts, (tuple, list))
746              and isinstance(kpts[0], (tuple, list))):
747
748            if not all(map((lambda row: len(row) == 4), kpts)):
749                raise ValueError(
750                    'In explicit kpt list each row should have 4 elements')
751
752            clear_mp_keywords()
753            self.cell.kpoint_list = [' '.join(map(str, row)) for row in kpts]
754
755        # Case 3: list of explicit kpts formatted as list of str
756        # i.e. the internal format of calc.kpoint_list split on \n
757        # e.g. ['0 0 0 0.125', '0 -0.5 0 0.375', '-0.5 0 -0.5 0.375']
758        elif isinstance(kpts, (tuple, list)) and isinstance(kpts[0], str):
759
760            if not all(map((lambda row: len(row.split()) == 4), kpts)):
761                raise ValueError(
762                    'In explicit kpt list each row should have 4 elements')
763
764            clear_mp_keywords()
765            self.cell.kpoint_list = kpts
766
767        # Case 4: list or tuple of MP samples e.g. [3, 3, 2]
768        elif isinstance(kpts, (tuple, list)) and isinstance(kpts[0], int):
769            if len(kpts) != 3:
770                raise ValueError('Monkhorst-pack grid should have 3 values')
771            clear_mp_keywords()
772            self.cell.kpoint_mp_grid = '%d %d %d' % tuple(kpts)
773
774        # Case 5: str representation of Case 3 e.g. '3 3 2'
775        elif isinstance(kpts, str):
776            self.set_kpts([int(x) for x in kpts.split()])
777
778        # Case 6: dict of options e.g. {'size': (3, 3, 2), 'gamma': True}
779        # 'spacing' is allowed but transformed to 'density' to get mesh/offset
780        elif isinstance(kpts, dict):
781            kpts = kpts.copy()
782
783            if (kpts.get('spacing') is not None
784                and kpts.get('density') is not None):
785                raise ValueError(
786                    'Cannot set kpts spacing and density simultaneously.')
787            else:
788                if kpts.get('spacing') is not None:
789                    kpts = kpts.copy()
790                    spacing = kpts.pop('spacing')
791                    kpts['density'] = 1 / (np.pi * spacing)
792
793                clear_mp_keywords()
794                size, offsets = kpts2sizeandoffsets(atoms=self.atoms, **kpts)
795                self.cell.kpoint_mp_grid = '%d %d %d' % tuple(size)
796                self.cell.kpoint_mp_offset = '%f %f %f' % tuple(offsets)
797
798        # Case 7: some other iterator. Try treating as a list:
799        elif hasattr(kpts, '__iter__'):
800            self.set_kpts(list(kpts))
801
802        # Otherwise, give up
803        else:
804            raise TypeError('Cannot interpret kpts of this type')
805
806    def todict(self, skip_default=True):
807        """Create dict with settings of .param and .cell"""
808        dct = {}
809        dct['param'] = self.param.get_attr_dict()
810        dct['cell'] = self.cell.get_attr_dict()
811
812        return dct
813
814    def check_state(self, atoms, tol=1e-15):
815        """Check for system changes since last calculation."""
816        return compare_atoms(self._old_atoms, atoms)
817
818    def _castep_find_last_record(self, castep_file):
819        """Checks wether a given castep file has a regular
820        ending message following the last banner message. If this
821        is the case, the line number of the last banner is message
822        is return, otherwise False.
823
824        returns (record_start, record_end, end_found, last_record_complete)
825        """
826        if isinstance(castep_file, str):
827            castep_file = paropen(castep_file, 'r')
828            file_opened = True
829        else:
830            file_opened = False
831        record_starts = []
832        while True:
833            line = castep_file.readline()
834            if 'Welcome' in line and 'CASTEP' in line:
835                record_starts = [castep_file.tell()] + record_starts
836            if not line:
837                break
838
839        if record_starts == []:
840            warnings.warn('Could not find CASTEP label in result file: %s.'
841                          ' Are you sure this is a .castep file?' % castep_file)
842            return
843
844        # search for regular end of file
845        end_found = False
846        # start to search from record beginning from the back
847        # and see if
848        record_end = -1
849        for record_nr, record_start in enumerate(record_starts):
850            castep_file.seek(record_start)
851            while True:
852                line = castep_file.readline()
853                if not line:
854                    break
855                if 'warn' in line.lower():
856                    self._warnings.append(line)
857
858                if 'Finalisation time   =' in line:
859                    end_found = True
860                    record_end = castep_file.tell()
861                    break
862
863            if end_found:
864                break
865
866        if file_opened:
867            castep_file.close()
868
869        if end_found:
870            # record_nr == 0 corresponds to the last record here
871            if record_nr == 0:
872                return (record_start, record_end, True, True)
873            else:
874                return (record_start, record_end, True, False)
875        else:
876            return (0, record_end, False, False)
877
878    def read(self, castep_file=None):
879        """Read a castep file into the current instance."""
880
881        _close = True
882
883        if castep_file is None:
884            if self._castep_file:
885                castep_file = self._castep_file
886                out = paropen(castep_file, 'r')
887            else:
888                warnings.warn('No CASTEP file specified')
889                return
890            if not os.path.exists(castep_file):
891                warnings.warn('No CASTEP file found')
892
893        elif isinstance(castep_file, str):
894            out = paropen(castep_file, 'r')
895
896        else:
897            # in this case we assume that we have a fileobj already, but check
898            # for attributes in order to avoid extended EAFP blocks.
899            out = castep_file
900
901            # look before you leap...
902            attributes = ['name',
903                          'seek',
904                          'close',
905                          'readline',
906                          'tell']
907
908            for attr in attributes:
909                if not hasattr(out, attr):
910                    raise TypeError(
911                        '"castep_file" is neither str nor valid fileobj')
912
913            castep_file = out.name
914            _close = False
915
916        if self._seed is None:
917            self._seed = os.path.splitext(os.path.basename(castep_file))[0]
918
919        err_file = '%s.0001.err' % self._seed
920        if os.path.exists(err_file):
921            err_file = paropen(err_file)
922            self._error = err_file.read()
923            err_file.close()
924            # we return right-away because it might
925            # just be here from a previous run
926        # look for last result, if several CASTEP
927        # run are appended
928
929        record_start, record_end, end_found, _\
930            = self._castep_find_last_record(out)
931        if not end_found:
932            warnings.warn('No regular end found in %s file. %s' %
933                          (castep_file, self._error))
934            if _close:
935                out.close()
936            return
937            # we return here, because the file has no a regular end
938
939        # now iterate over last CASTEP output in file to extract information
940        # could be generalized as well to extract trajectory from file
941        # holding several outputs
942        n_cell_const = 0
943        forces = []
944
945        # HOTFIX:
946        # we have to initialize the _stress variable as a zero array
947        # otherwise the calculator crashes upon pickling trajectories
948        # Alternative would be to raise a NotImplementedError() which
949        # is also kind of not true, since we can extract stresses if
950        # the user configures CASTEP to print them in the outfile
951        # stress = []
952        stress = np.zeros([3, 3])
953        hirsh_volrat = []
954
955        # Two flags to check whether spin-polarized or not, and whether
956        # Hirshfeld volumes are calculated
957        spin_polarized = False
958        calculate_hirshfeld = False
959        mulliken_analysis = False
960        hirshfeld_analysis = False
961        kpoints = None
962
963        positions_frac_list = []
964
965        out.seek(record_start)
966        while True:
967            # TODO: add a switch if we have a geometry optimization: record
968            # atoms objects for intermediate steps.
969            try:
970                # in case we need to rewind back one line, we memorize the bit
971                # position of this line in the file.
972                # --> see symops problem below
973                _line_start = out.tell()
974                line = out.readline()
975                if not line or out.tell() > record_end:
976                    break
977                elif 'Hirshfeld Analysis' in line:
978                    hirshfeld_charges = []
979
980                    hirshfeld_analysis = True
981                    # skip the separating line
982                    line = out.readline()
983                    # this is the headline
984                    line = out.readline()
985
986                    if 'Charge' in line:
987                        # skip the next separator line
988                        line = out.readline()
989                        while True:
990                            line = out.readline()
991                            fields = line.split()
992                            if len(fields) == 1:
993                                break
994                            else:
995                                hirshfeld_charges.append(float(fields[-1]))
996                elif 'stress calculation' in line:
997                    if line.split()[-1].strip() == 'on':
998                        self.param.calculate_stress = True
999                elif 'basis set accuracy' in line:
1000                    self.param.basis_precision = line.split()[-1]
1001                elif 'plane wave basis set cut-off' in line:
1002                    # NB this is set as a private "result" attribute to avoid
1003                    # conflict with input option basis_precision
1004                    cutoff = float(line.split()[-2])
1005                    self._cut_off_energy = cutoff
1006                    if self.param.basis_precision.value is None:
1007                        self.param.cut_off_energy = cutoff
1008                elif 'total energy / atom convergence tol.' in line:
1009                    elec_energy_tol = float(line.split()[-2])
1010                    self.param.elec_energy_tol = elec_energy_tol
1011                elif 'convergence tolerance window' in line:
1012                    elec_convergence_win = int(line.split()[-2])
1013                    self.param.elec_convergence_win = elec_convergence_win
1014                elif re.match(r'\sfinite basis set correction\s*:', line):
1015                    finite_basis_corr = line.split()[-1]
1016                    fbc_possibilities = {'none': 0,
1017                                         'manual': 1, 'automatic': 2}
1018                    fbc = fbc_possibilities[finite_basis_corr]
1019                    self.param.finite_basis_corr = fbc
1020                elif 'Treating system as non-metallic' in line:
1021                    self.param.fix_occupancy = True
1022                elif 'max. number of SCF cycles:' in line:
1023                    max_no_scf = float(line.split()[-1])
1024                    self.param.max_scf_cycles = max_no_scf
1025                elif 'density-mixing scheme' in line:
1026                    mixing_scheme = line.split()[-1]
1027                    self.param.mixing_scheme = mixing_scheme
1028                elif 'dump wavefunctions every' in line:
1029                    no_dump_cycles = float(line.split()[-3])
1030                    self.param.num_dump_cycles = no_dump_cycles
1031                elif 'optimization strategy' in line:
1032                    lspl = line.split(":")
1033                    if lspl[0].strip() != 'optimization strategy':
1034                        # This can happen in iprint: 3 calculations
1035                        continue
1036                    if 'memory' in line:
1037                        self.param.opt_strategy = 'Memory'
1038                    if 'speed' in line:
1039                        self.param.opt_strategy = 'Speed'
1040                elif 'calculation limited to maximum' in line:
1041                    calc_limit = float(line.split()[-2])
1042                    self.param.run_time = calc_limit
1043                elif 'type of calculation' in line:
1044                    lspl = line.split(":")
1045                    if lspl[0].strip() != 'type of calculation':
1046                        # This can happen in iprint: 3 calculations
1047                        continue
1048                    calc_type = lspl[-1]
1049                    calc_type = re.sub(r'\s+', ' ', calc_type)
1050                    calc_type = calc_type.strip()
1051                    if calc_type != 'single point energy':
1052                        calc_type_possibilities = {
1053                            'geometry optimization': 'GeometryOptimization',
1054                            'band structure': 'BandStructure',
1055                            'molecular dynamics': 'MolecularDynamics',
1056                            'optical properties': 'Optics',
1057                            'phonon calculation': 'Phonon',
1058                            'E-field calculation': 'Efield',
1059                            'Phonon followed by E-field': 'Phonon+Efield',
1060                            'transition state search': 'TransitionStateSearch',
1061                            'Magnetic Resonance': 'MagRes',
1062                            'Core level spectra': 'Elnes',
1063                            'Electronic Spectroscopy': 'ElectronicSpectroscopy'
1064                        }
1065                        ctype = calc_type_possibilities[calc_type]
1066                        self.param.task = ctype
1067                elif 'using functional' in line:
1068                    used_functional = line.split(":")[-1]
1069                    used_functional = re.sub(r'\s+', ' ', used_functional)
1070                    used_functional = used_functional.strip()
1071                    if used_functional != 'Local Density Approximation':
1072                        used_functional_possibilities = {
1073                            'Perdew Wang (1991)': 'PW91',
1074                            'Perdew Burke Ernzerhof': 'PBE',
1075                            'revised Perdew Burke Ernzerhof': 'RPBE',
1076                            'PBE with Wu-Cohen exchange': 'WC',
1077                            'PBE for solids (2008)': 'PBESOL',
1078                            'Hartree-Fock': 'HF',
1079                            'Hartree-Fock +': 'HF-LDA',
1080                            'Screened Hartree-Fock': 'sX',
1081                            'Screened Hartree-Fock + ': 'sX-LDA',
1082                            'hybrid PBE0': 'PBE0',
1083                            'hybrid B3LYP': 'B3LYP',
1084                            'hybrid HSE03': 'HSE03',
1085                            'hybrid HSE06': 'HSE06'
1086                        }
1087                        used_func = used_functional_possibilities[
1088                            used_functional]
1089                        self.param.xc_functional = used_func
1090                elif 'output verbosity' in line:
1091                    iprint = int(line.split()[-1][1])
1092                    if int(iprint) != 1:
1093                        self.param.iprint = iprint
1094                elif 'treating system as spin-polarized' in line:
1095                    spin_polarized = True
1096                    self.param.spin_polarized = spin_polarized
1097                elif 'treating system as non-spin-polarized' in line:
1098                    spin_polarized = False
1099                elif 'Number of kpoints used' in line:
1100                    kpoints = int(line.split('=')[-1].strip())
1101                elif 'Unit Cell' in line:
1102                    lattice_real = []
1103                    lattice_reci = []
1104                    while True:
1105                        line = out.readline()
1106                        fields = line.split()
1107                        if len(fields) == 6:
1108                            break
1109                    for i in range(3):
1110                        lattice_real.append([float(f) for f in fields[0:3]])
1111                        lattice_reci.append([float(f) for f in fields[3:7]])
1112                        line = out.readline()
1113                        fields = line.split()
1114                elif 'Cell Contents' in line:
1115                    while True:
1116                        line = out.readline()
1117                        if 'Total number of ions in cell' in line:
1118                            n_atoms = int(line.split()[7])
1119                        if 'Total number of species in cell' in line:
1120                            int(line.split()[7])
1121                        fields = line.split()
1122                        if len(fields) == 0:
1123                            break
1124                elif 'Fractional coordinates of atoms' in line:
1125                    species = []
1126                    custom_species = None  # A CASTEP special thing
1127                    positions_frac = []
1128                    # positions_cart = []
1129                    while True:
1130                        line = out.readline()
1131                        fields = line.split()
1132                        if len(fields) == 7:
1133                            break
1134                    for n in range(n_atoms):
1135                        spec_custom = fields[1].split(':', 1)
1136                        elem = spec_custom[0]
1137                        if len(spec_custom) > 1 and custom_species is None:
1138                            # Add it to the custom info!
1139                            custom_species = list(species)
1140                        species.append(elem)
1141                        if custom_species is not None:
1142                            custom_species.append(fields[1])
1143                        positions_frac.append([float(s) for s in fields[3:6]])
1144                        line = out.readline()
1145                        fields = line.split()
1146                    positions_frac_list.append(positions_frac)
1147                elif 'Files used for pseudopotentials' in line:
1148                    while True:
1149                        line = out.readline()
1150                        if 'Pseudopotential generated on-the-fly' in line:
1151                            continue
1152                        fields = line.split()
1153                        if (len(fields) >= 2):
1154                            elem, pp_file = fields
1155                            self.cell.species_pot = (elem, pp_file)
1156                        else:
1157                            break
1158                elif 'k-Points For BZ Sampling' in line:
1159                    # TODO: generalize for non-Monkhorst Pack case
1160                    # (i.e. kpoint lists) -
1161                    # kpoints_offset cannot be read this way and
1162                    # is hence always set to None
1163                    while True:
1164                        line = out.readline()
1165                        if not line.strip():
1166                            break
1167                        if 'MP grid size for SCF calculation' in line:
1168                            # kpoints =  ' '.join(line.split()[-3:])
1169                            # self.kpoints_mp_grid = kpoints
1170                            # self.kpoints_mp_offset = '0. 0. 0.'
1171                            # not set here anymore because otherwise
1172                            # two calculator objects go out of sync
1173                            # after each calculation triggering unnecessary
1174                            # recalculation
1175                            break
1176                elif 'Symmetry and Constraints' in line:
1177                    # this is a bit of a hack, but otherwise the read_symops
1178                    # would need to re-read the entire file. --> just rewind
1179                    # back by one line, so the read_symops routine can find the
1180                    # start of this block.
1181                    out.seek(_line_start)
1182                    self.read_symops(castep_castep=out)
1183                elif 'Number of cell constraints' in line:
1184                    n_cell_const = int(line.split()[4])
1185                elif 'Final energy' in line:
1186                    self._energy_total = float(line.split()[-2])
1187                elif 'Final free energy' in line:
1188                    self._energy_free = float(line.split()[-2])
1189                elif 'NB est. 0K energy' in line:
1190                    self._energy_0K = float(line.split()[-2])
1191                # check if we had a finite basis set correction
1192                elif 'Total energy corrected for finite basis set' in line:
1193                    self._energy_total_corr = float(line.split()[-2])
1194
1195                # Add support for dispersion correction
1196                # filtering due to SEDC is done in get_potential_energy
1197                elif 'Dispersion corrected final energy' in line:
1198                    self._dispcorr_energy_total = float(line.split()[-2])
1199                elif 'Dispersion corrected final free energy' in line:
1200                    self._dispcorr_energy_free = float(line.split()[-2])
1201                elif 'dispersion corrected est. 0K energy' in line:
1202                    self._dispcorr_energy_0K = float(line.split()[-2])
1203
1204                # remember to remove constraint labels in force components
1205                # (lacking a space behind the actual floating point number in
1206                # the CASTEP output)
1207                elif '******************** Forces *********************'\
1208                     in line or\
1209                     '************** Symmetrised Forces ***************'\
1210                     in line or\
1211                     '************** Constrained Symmetrised Forces ****'\
1212                     '**********'\
1213                     in line or\
1214                     '******************** Constrained Forces **********'\
1215                     '**********'\
1216                     in line or\
1217                     '******************* Unconstrained Forces *********'\
1218                     '**********'\
1219                     in line:
1220                    fix = []
1221                    fix_cart = []
1222                    forces = []
1223                    while True:
1224                        line = out.readline()
1225                        fields = line.split()
1226                        if len(fields) == 7:
1227                            break
1228                    for n in range(n_atoms):
1229                        consd = np.array([0, 0, 0])
1230                        fxyz = [0, 0, 0]
1231                        for (i, force_component) in enumerate(fields[-4:-1]):
1232                            if force_component.count("(cons'd)") > 0:
1233                                consd[i] = 1
1234                            fxyz[i] = float(force_component.replace(
1235                                "(cons'd)", ''))
1236                        if consd.all():
1237                            fix.append(n)
1238                        elif consd.any():
1239                            fix_cart.append(FixCartesian(n, consd))
1240                        forces.append(fxyz)
1241                        line = out.readline()
1242                        fields = line.split()
1243
1244                # add support for Hirshfeld analysis
1245                elif 'Hirshfeld / free atomic volume :' in line:
1246                    # if we are here, then params must be able to cope with
1247                    # Hirshfeld flag (if castep_keywords.py matches employed
1248                    # castep version)
1249                    calculate_hirshfeld = True
1250                    hirsh_volrat = []
1251                    while True:
1252                        line = out.readline()
1253                        fields = line.split()
1254                        if len(fields) == 1:
1255                            break
1256                    for n in range(n_atoms):
1257                        hirsh_atom = float(fields[0])
1258                        hirsh_volrat.append(hirsh_atom)
1259                        while True:
1260                            line = out.readline()
1261                            if 'Hirshfeld / free atomic volume :' in line or\
1262                               'Hirshfeld Analysis' in line:
1263                                break
1264                        line = out.readline()
1265                        fields = line.split()
1266
1267                elif '***************** Stress Tensor *****************'\
1268                     in line or\
1269                     '*********** Symmetrised Stress Tensor ***********'\
1270                     in line:
1271                    stress = []
1272                    while True:
1273                        line = out.readline()
1274                        fields = line.split()
1275                        if len(fields) == 6:
1276                            break
1277                    for n in range(3):
1278                        stress.append([float(s) for s in fields[2:5]])
1279                        line = out.readline()
1280                        fields = line.split()
1281                    line = out.readline()
1282                    if "Pressure:" in line:
1283                        self._pressure = float(line.split()[-2]) * units.GPa
1284                elif ('BFGS: starting iteration' in line
1285                      or 'BFGS: improving iteration' in line):
1286                    if n_cell_const < 6:
1287                        lattice_real = []
1288                        lattice_reci = []
1289                    # backup previous configuration first:
1290                    # for highly symmetric systems (where essentially only the
1291                    # stress is optimized, but the atomic positions) positions
1292                    # are only printed once.
1293                    if species:
1294                        prev_species = deepcopy(species)
1295                    if positions_frac:
1296                        prev_positions_frac = deepcopy(positions_frac)
1297                    species = []
1298                    positions_frac = []
1299                    forces = []
1300
1301                    # HOTFIX:
1302                    # Same reason for the stress initialization as before
1303                    # stress = []
1304                    stress = np.zeros([3, 3])
1305
1306                # extract info from the Mulliken analysis
1307                elif 'Atomic Populations' in line:
1308                    # sometimes this appears twice in a castep file
1309                    mulliken_charges = []
1310                    spins = []
1311
1312                    mulliken_analysis = True
1313                    # skip the separating line
1314                    line = out.readline()
1315                    # this is the headline
1316                    line = out.readline()
1317
1318                    if 'Charge' in line:
1319                        # skip the next separator line
1320                        line = out.readline()
1321                        while True:
1322                            line = out.readline()
1323                            fields = line.split()
1324                            if len(fields) == 1:
1325                                break
1326
1327                            # the check for len==7 is due to CASTEP 18
1328                            # outformat changes
1329                            if spin_polarized:
1330                                if len(fields) != 7:
1331                                    spins.append(float(fields[-1]))
1332                                    mulliken_charges.append(float(fields[-2]))
1333                            else:
1334                                mulliken_charges.append(float(fields[-1]))
1335
1336                # There is actually no good reason to get out of the loop
1337                # already at this point... or do I miss something?
1338                # elif 'BFGS: Final Configuration:' in line:
1339                #    break
1340                elif 'warn' in line.lower():
1341                    self._warnings.append(line)
1342
1343                # fetch some last info
1344                elif 'Total time' in line:
1345                    pattern = r'.*=\s*([\d\.]+) s'
1346                    self._total_time = float(re.search(pattern, line).group(1))
1347
1348                elif 'Peak Memory Use' in line:
1349                    pattern = r'.*=\s*([\d]+) kB'
1350                    self._peak_memory = int(re.search(pattern, line).group(1))
1351
1352            except Exception as exception:
1353                sys.stderr.write(line + '|-> line triggered exception: '
1354                                 + str(exception))
1355                raise
1356
1357        if _close:
1358            out.close()
1359
1360        # in highly summetric crystals, positions and symmetry are only printed
1361        # upon init, hence we here restore these original values
1362        if not positions_frac:
1363            positions_frac = prev_positions_frac
1364        if not species:
1365            species = prev_species
1366
1367        if not spin_polarized:
1368            # set to zero spin if non-spin polarized calculation
1369            spins = np.zeros(len(positions_frac))
1370
1371        positions_frac_atoms = np.array(positions_frac)
1372        forces_atoms = np.array(forces)
1373        spins_atoms = np.array(spins)
1374
1375        if mulliken_analysis:
1376            mulliken_charges_atoms = np.array(mulliken_charges)
1377        else:
1378            mulliken_charges_atoms = np.zeros(len(positions_frac))
1379
1380        if hirshfeld_analysis:
1381            hirshfeld_charges_atoms = np.array(hirshfeld_charges)
1382        else:
1383            hirshfeld_charges_atoms = None
1384
1385        if calculate_hirshfeld:
1386            hirsh_atoms = np.array(hirsh_volrat)
1387        else:
1388            hirsh_atoms = np.zeros_like(spins)
1389
1390        if self.atoms and not self._set_atoms:
1391            # compensate for internal reordering of atoms by CASTEP
1392            # using the fact that the order is kept within each species
1393
1394            # positions_frac_ase = self.atoms.get_scaled_positions(wrap=False)
1395            atoms_assigned = [False] * len(self.atoms)
1396
1397            # positions_frac_castep_init = np.array(positions_frac_list[0])
1398            positions_frac_castep = np.array(positions_frac_list[-1])
1399
1400            # species_castep = list(species)
1401            forces_castep = np.array(forces)
1402            hirsh_castep = np.array(hirsh_volrat)
1403            spins_castep = np.array(spins)
1404            mulliken_charges_castep = np.array(mulliken_charges_atoms)
1405
1406            # go through the atoms position list and replace
1407            # with the corresponding one from the
1408            # castep file corresponding atomic number
1409            for iase in range(n_atoms):
1410                for icastep in range(n_atoms):
1411                    if (species[icastep] == self.atoms[iase].symbol
1412                            and not atoms_assigned[icastep]):
1413                        positions_frac_atoms[iase] = \
1414                            positions_frac_castep[icastep]
1415                        forces_atoms[iase] = np.array(forces_castep[icastep])
1416                        if iprint > 1 and calculate_hirshfeld:
1417                            hirsh_atoms[iase] = np.array(hirsh_castep[icastep])
1418                        if spin_polarized:
1419                            # reordering not necessary in case all spins == 0
1420                            spins_atoms[iase] = np.array(spins_castep[icastep])
1421                        mulliken_charges_atoms[iase] = np.array(
1422                            mulliken_charges_castep[icastep])
1423                        atoms_assigned[icastep] = True
1424                        break
1425
1426            if not all(atoms_assigned):
1427                not_assigned = [i for (i, assigned)
1428                                in zip(range(len(atoms_assigned)),
1429                                       atoms_assigned) if not assigned]
1430                warnings.warn('%s atoms not assigned. '
1431                              ' DEBUGINFO: The following atoms where not assigned: %s' %
1432                              (atoms_assigned.count(False), not_assigned))
1433            else:
1434                self.atoms.set_scaled_positions(positions_frac_atoms)
1435
1436        else:
1437            # If no atoms, object has been previously defined
1438            # we define it here and set the Castep() instance as calculator.
1439            # This covers the case that we simply want to open a .castep file.
1440
1441            # The next time around we will have an atoms object, since
1442            # set_calculator also set atoms in the calculator.
1443            if self.atoms:
1444                constraints = self.atoms.constraints
1445            else:
1446                constraints = []
1447            atoms = ase.atoms.Atoms(species,
1448                                    cell=lattice_real,
1449                                    constraint=constraints,
1450                                    pbc=True,
1451                                    scaled_positions=positions_frac,
1452                                    )
1453            if custom_species is not None:
1454                atoms.new_array('castep_custom_species',
1455                                np.array(custom_species))
1456
1457            if self.param.spin_polarized:
1458                # only set magnetic moments if this was a spin polarized
1459                # calculation
1460                # this one fails as is
1461                atoms.set_initial_magnetic_moments(magmoms=spins_atoms)
1462
1463            if mulliken_analysis:
1464                atoms.set_initial_charges(charges=mulliken_charges_atoms)
1465            atoms.calc = self
1466
1467        self._kpoints = kpoints
1468        self._forces = forces_atoms
1469        # stress in .castep file is given in GPa:
1470        self._stress = np.array(stress) * units.GPa
1471        self._hirsh_volrat = hirsh_atoms
1472        self._spins = spins_atoms
1473        self._mulliken_charges = mulliken_charges_atoms
1474        self._hirshfeld_charges = hirshfeld_charges_atoms
1475
1476        if self._warnings:
1477            warnings.warn('WARNING: %s contains warnings' % castep_file)
1478            for warning in self._warnings:
1479                warnings.warn(warning)
1480        # reset
1481        self._warnings = []
1482
1483        # Read in eigenvalues from bands file
1484        bands_file = castep_file[:-7] + '.bands'
1485        if (self.param.task.value is not None
1486            and self.param.task.value.lower() == 'bandstructure'):
1487            self._band_structure = self.band_structure(bandfile=bands_file)
1488        else:
1489            try:
1490                (self._ibz_kpts,
1491                 self._ibz_weights,
1492                 self._eigenvalues,
1493                 self._efermi) = read_bands(filename=bands_file)
1494            except FileNotFoundError:
1495                warnings.warn('Could not load .bands file, eigenvalues and '
1496                              'Fermi energy are unknown')
1497
1498    def read_symops(self, castep_castep=None):
1499        # TODO: check that this is really backwards compatible
1500        # with previous routine with this name...
1501        """Read all symmetry operations used from a .castep file."""
1502
1503        if castep_castep is None:
1504            castep_castep = self._seed + '.castep'
1505
1506        if isinstance(castep_castep, str):
1507            if not os.path.isfile(castep_castep):
1508                warnings.warn('Warning: CASTEP file %s not found!' %
1509                              castep_castep)
1510            f = paropen(castep_castep, 'r')
1511            _close = True
1512        else:
1513            # in this case we assume that we have a fileobj already, but check
1514            # for attributes in order to avoid extended EAFP blocks.
1515            f = castep_castep
1516
1517            # look before you leap...
1518            attributes = ['name',
1519                          'readline',
1520                          'close']
1521
1522            for attr in attributes:
1523                if not hasattr(f, attr):
1524                    raise TypeError('read_castep_castep_symops: castep_castep '
1525                                    'is not of type str nor valid fileobj!')
1526
1527            castep_castep = f.name
1528            _close = False
1529
1530        while True:
1531            line = f.readline()
1532            if not line:
1533                return
1534            if 'output verbosity' in line:
1535                iprint = line.split()[-1][1]
1536                # filter out the default
1537                if int(iprint) != 1:
1538                    self.param.iprint = iprint
1539            if 'Symmetry and Constraints' in line:
1540                break
1541
1542        if self.param.iprint.value is None or int(self.param.iprint.value) < 2:
1543            self._interface_warnings.append(
1544                'Warning: No symmetry'
1545                'operations could be read from %s (iprint < 2).' % f.name)
1546            return
1547
1548        while True:
1549            line = f.readline()
1550            if not line:
1551                break
1552            if 'Number of symmetry operations' in line:
1553                nsym = int(line.split()[5])
1554                # print "nsym = %d" % nsym
1555                # information about symmetry related atoms currently not read
1556                symmetry_operations = []
1557                for _ in range(nsym):
1558                    rotation = []
1559                    displacement = []
1560                    while True:
1561                        if 'rotation' in f.readline():
1562                            break
1563                    for _ in range(3):
1564                        line = f.readline()
1565                        rotation.append([float(r) for r in line.split()[1:4]])
1566                    while True:
1567                        if 'displacement' in f.readline():
1568                            break
1569                    line = f.readline()
1570                    displacement = [float(d) for d in line.split()[1:4]]
1571                    symop = {'rotation': rotation,
1572                             'displacement': displacement}
1573                    self.symmetry_ops = symop
1574                self.symmetry = symmetry_operations
1575                warnings.warn('Symmetry operations successfully read from %s. %s' %
1576                              (f.name, self.cell.symmetry_ops))
1577                break
1578
1579        # only close if we opened the file in this routine
1580        if _close:
1581            f.close()
1582
1583    def get_hirsh_volrat(self):
1584        """
1585        Return the Hirshfeld volumes.
1586        """
1587        return self._hirsh_volrat
1588
1589    def get_spins(self):
1590        """
1591        Return the spins from a plane-wave Mulliken analysis.
1592        """
1593        return self._spins
1594
1595    def get_mulliken_charges(self):
1596        """
1597        Return the charges from a plane-wave Mulliken analysis.
1598        """
1599        return self._mulliken_charges
1600
1601    def get_hirshfeld_charges(self):
1602        """
1603        Return the charges from a Hirshfeld analysis.
1604        """
1605        return self._hirshfeld_charges
1606
1607    def get_total_time(self):
1608        """
1609        Return the total runtime
1610        """
1611        return self._total_time
1612
1613    def get_peak_memory(self):
1614        """
1615        Return the peak memory usage
1616        """
1617        return self._peak_memory
1618
1619    def set_label(self, label):
1620        """The label is part of each seed, which in turn is a prefix
1621        in each CASTEP related file.
1622        """
1623        # we may think about changing this in future to set `self._directory`
1624        # and `self._label`, as one would expect
1625        self._label = label
1626
1627    def set_pspot(self, pspot, elems=None,
1628                  notelems=None,
1629                  clear=True,
1630                  suffix='usp'):
1631        """Quickly set all pseudo-potentials: Usually CASTEP psp are named
1632        like <Elem>_<pspot>.<suffix> so this function function only expects
1633        the <LibraryName>. It then clears any previous pseudopotential
1634        settings apply the one with <LibraryName> for each element in the
1635        atoms object. The optional elems and notelems arguments can be used
1636        to exclusively assign to some species, or to exclude with notelemens.
1637
1638        Parameters ::
1639
1640            - elems (None) : set only these elements
1641            - notelems (None): do not set the elements
1642            - clear (True): clear previous settings
1643            - suffix (usp): PP file suffix
1644        """
1645        if self._find_pspots:
1646            if self._pedantic:
1647                warnings.warn('Warning: <_find_pspots> = True. '
1648                              'Do you really want to use `set_pspots()`? '
1649                              'This does not check whether the PP files exist. '
1650                              'You may rather want to use `find_pspots()` with '
1651                              'the same <pspot>.')
1652
1653        if clear and not elems and not notelems:
1654            self.cell.species_pot.clear()
1655        for elem in set(self.atoms.get_chemical_symbols()):
1656            if elems is not None and elem not in elems:
1657                continue
1658            if notelems is not None and elem in notelems:
1659                continue
1660            self.cell.species_pot = (elem, '%s_%s.%s' % (elem, pspot, suffix))
1661
1662    def find_pspots(self, pspot='.+', elems=None,
1663                    notelems=None, clear=True, suffix='(usp|UPF|recpot)'):
1664        r"""Quickly find and set all pseudo-potentials by searching in
1665        castep_pp_path:
1666
1667        This one is more flexible than set_pspots, and also checks if the files
1668        are actually available from the castep_pp_path.
1669
1670        Essentially, the function parses the filenames in <castep_pp_path> and
1671        does a regex matching. The respective pattern is:
1672
1673            r"^(<elem>|<elem.upper()>|elem.lower()>(_|-)<pspot>\.<suffix>$"
1674
1675        In most cases, it will be sufficient to not specify anything, if you
1676        use standard CASTEP USPPs with only one file per element in the
1677        <castep_pp_path>.
1678
1679        The function raises a `RuntimeError` if there is some ambiguity
1680        (multiple files per element).
1681
1682        Parameters ::
1683
1684            - pspots ('.+') : as defined above, will be a wildcard if not
1685                              specified.
1686            - elems (None) : set only these elements
1687            - notelems (None): do not set the elements
1688            - clear (True): clear previous settings
1689            - suffix (usp|UPF|recpot): PP file suffix
1690        """
1691        if clear and not elems and not notelems:
1692            self.cell.species_pot.clear()
1693
1694        if not os.path.isdir(self._castep_pp_path):
1695            if self._pedantic:
1696                warnings.warn('Cannot search directory: {} Folder does not exist'
1697                              .format(self._castep_pp_path))
1698            return
1699
1700        # translate the bash wildcard syntax to regex
1701        if pspot == '*':
1702            pspot = '.*'
1703        if suffix == '*':
1704            suffix = '.*'
1705        if pspot == '*':
1706            pspot = '.*'
1707
1708        # GBRV USPPs have a strnage naming schme
1709        pattern = r'^({elem}|{elem_upper}|{elem_lower})(_|-){pspot}\.{suffix}$'
1710
1711        for elem in set(self.atoms.get_chemical_symbols()):
1712            if elems is not None and elem not in elems:
1713                continue
1714            if notelems is not None and elem in notelems:
1715                continue
1716            p = pattern.format(elem=elem,
1717                               elem_upper=elem.upper(),
1718                               elem_lower=elem.lower(),
1719                               pspot=pspot,
1720                               suffix=suffix)
1721            pps = []
1722            for f in os.listdir(self._castep_pp_path):
1723                if re.match(p, f):
1724                    pps.append(f)
1725            if not pps:
1726                if self._pedantic:
1727                    warnings.warn('Pseudopotential for species {} not found!'
1728                                  .format(elem))
1729            elif not len(pps) == 1:
1730                raise RuntimeError(
1731                    'Pseudopotential for species ''{} not unique!\n'
1732                    .format(elem)
1733                    + 'Found the following files in {}\n'
1734                    .format(self._castep_pp_path)
1735                    + '\n'.join(['    {}'.format(pp) for pp in pps]) +
1736                    '\nConsider a stricter search pattern in `find_pspots()`.')
1737            else:
1738                self.cell.species_pot = (elem, pps[0])
1739
1740    @property
1741    def name(self):
1742        """Return the name of the calculator (string).  """
1743        return self.__name__
1744
1745    def get_property(self, name, atoms=None, allow_calculation=True):
1746        # High-level getter for compliance with the database module...
1747        # in principle this would not be necessary any longer if we properly
1748        # based this class on `Calculator`
1749        if name == 'forces':
1750            return self.get_forces(atoms)
1751        elif name == 'energy':
1752            return self.get_potential_energy(atoms)
1753        elif name == 'stress':
1754            return self.get_stress(atoms)
1755        elif name == 'charges':
1756            return self.get_charges(atoms)
1757        else:
1758            raise PropertyNotImplementedError
1759
1760    @_self_getter
1761    def get_forces(self, atoms):
1762        """Run CASTEP calculation if needed and return forces."""
1763        self.update(atoms)
1764        return np.array(self._forces)
1765
1766    @_self_getter
1767    def get_total_energy(self, atoms):
1768        """Run CASTEP calculation if needed and return total energy."""
1769        self.update(atoms)
1770        return self._energy_total
1771
1772    @_self_getter
1773    def get_total_energy_corrected(self, atoms):
1774        """Run CASTEP calculation if needed and return total energy."""
1775        self.update(atoms)
1776        return self._energy_total_corr
1777
1778    @_self_getter
1779    def get_free_energy(self, atoms):
1780        """Run CASTEP calculation if needed and return free energy.
1781           Only defined with smearing."""
1782        self.update(atoms)
1783        return self._energy_free
1784
1785    @_self_getter
1786    def get_0K_energy(self, atoms):
1787        """Run CASTEP calculation if needed and return 0K energy.
1788           Only defined with smearing."""
1789        self.update(atoms)
1790        return self._energy_0K
1791
1792    @_self_getter
1793    def get_potential_energy(self, atoms, force_consistent=False):
1794        # here for compatibility with ase/calculators/general.py
1795        # but accessing only _name variables
1796        """Return the total potential energy."""
1797        self.update(atoms)
1798        if force_consistent:
1799            # Assumption: If no dispersion correction is applied, then the
1800            # respective value will default to None as initialized.
1801            if self._dispcorr_energy_free is not None:
1802                return self._dispcorr_energy_free
1803            else:
1804                return self._energy_free
1805        else:
1806            if self._energy_0K is not None:
1807                if self._dispcorr_energy_0K is not None:
1808                    return self._dispcorr_energy_0K
1809                else:
1810                    return self._energy_0K
1811            else:
1812                if self._dispcorr_energy_total is not None:
1813                    return self._dispcorr_energy_total
1814                else:
1815                    if self._energy_total_corr is not None:
1816                        return self._energy_total_corr
1817                    else:
1818                        return self._energy_total
1819
1820    @_self_getter
1821    def get_stress(self, atoms):
1822        """Return the stress."""
1823        self.update(atoms)
1824        # modification: we return the Voigt form directly to get rid of the
1825        # annoying user warnings
1826        stress = np.array(
1827            [self._stress[0, 0], self._stress[1, 1], self._stress[2, 2],
1828             self._stress[1, 2], self._stress[0, 2], self._stress[0, 1]])
1829        # return self._stress
1830        return stress
1831
1832    @_self_getter
1833    def get_pressure(self, atoms):
1834        """Return the pressure."""
1835        self.update(atoms)
1836        return self._pressure
1837
1838    @_self_getter
1839    def get_unit_cell(self, atoms):
1840        """Return the unit cell."""
1841        self.update(atoms)
1842        return self._unit_cell
1843
1844    @_self_getter
1845    def get_kpoints(self, atoms):
1846        """Return the kpoints."""
1847        self.update(atoms)
1848        return self._kpoints
1849
1850    @_self_getter
1851    def get_number_cell_constraints(self, atoms):
1852        """Return the number of cell constraints."""
1853        self.update(atoms)
1854        return self._number_of_cell_constraints
1855
1856    @_self_getter
1857    def get_charges(self, atoms):
1858        """Run CASTEP calculation if needed and return Mulliken charges."""
1859        self.update(atoms)
1860        return np.array(self._mulliken_charges)
1861
1862    @_self_getter
1863    def get_magnetic_moments(self, atoms):
1864        """Run CASTEP calculation if needed and return Mulliken charges."""
1865        self.update(atoms)
1866        return np.array(self._spins)
1867
1868    def set_atoms(self, atoms):
1869        """Sets the atoms for the calculator and vice versa."""
1870        atoms.pbc = [True, True, True]
1871        self.__dict__['atoms'] = atoms.copy()
1872        self.atoms._calc = self
1873
1874    def update(self, atoms):
1875        """Checks if atoms object or calculator changed and
1876        runs calculation if so.
1877        """
1878        if self.calculation_required(atoms):
1879            self.calculate(atoms)
1880
1881    def calculation_required(self, atoms, _=None):
1882        """Checks wether anything changed in the atoms object or CASTEP
1883        settings since the last calculation using this instance.
1884        """
1885        # SPR: what happens with the atoms parameter here? Why don't we use it?
1886        # from all that I can tell we need to compare against atoms instead of
1887        # self.atoms
1888        # if not self.atoms == self._old_atoms:
1889        if not atoms == self._old_atoms:
1890            return True
1891        if self._old_param is None or self._old_cell is None:
1892            return True
1893        if not self.param._options == self._old_param._options:
1894            return True
1895        if not self.cell._options == self._old_cell._options:
1896            return True
1897        return False
1898
1899    def calculate(self, atoms):
1900        """Write all necessary input file and call CASTEP."""
1901        self.prepare_input_files(atoms, force_write=self._force_write)
1902        if not self._prepare_input_only:
1903            self.run()
1904            self.read()
1905
1906            # we need to push the old state here!
1907            # although run() pushes it, read() may change the atoms object
1908            # again.
1909            # yet, the old state is supposed to be the one AFTER read()
1910            self.push_oldstate()
1911
1912    def push_oldstate(self):
1913        """This function pushes the current state of the (CASTEP) Atoms object
1914        onto the previous state. Or in other words after calling this function,
1915        calculation_required will return False and enquiry functions just
1916        report the current value, e.g. get_forces(), get_potential_energy().
1917        """
1918        # make a snapshot of all current input
1919        # to be able to test if recalculation
1920        # is necessary
1921        self._old_atoms = self.atoms.copy()
1922        self._old_param = deepcopy(self.param)
1923        self._old_cell = deepcopy(self.cell)
1924
1925    def initialize(self, *args, **kwargs):
1926        """Just an alias for prepar_input_files to comply with standard
1927        function names in ASE.
1928        """
1929        self.prepare_input_files(*args, **kwargs)
1930
1931    def prepare_input_files(self, atoms=None, force_write=None):
1932        """Only writes the input .cell and .param files and return
1933        This can be useful if one quickly needs to prepare input files
1934        for a cluster where no python or ASE is available. One can than
1935        upload the file manually and read out the results using
1936        Castep().read().
1937        """
1938
1939        if self.param.reuse.value is None:
1940            if self._pedantic:
1941                warnings.warn('You have not set e.g. calc.param.reuse = True. '
1942                              'Reusing a previous calculation may save CPU time! '
1943                              'The interface will make sure by default, a .check exists. '
1944                              'file before adding this statement to the .param file.')
1945        if self.param.num_dump_cycles.value is None:
1946            if self._pedantic:
1947                warnings.warn('You have not set e.g. calc.param.num_dump_cycles = 0. '
1948                              'This can save you a lot of disk space. One only needs '
1949                              '*wvfn* if electronic convergence is not achieved.')
1950        from ase.io.castep import write_param
1951
1952        if atoms is None:
1953            atoms = self.atoms
1954        else:
1955            self.atoms = atoms
1956
1957        if force_write is None:
1958            force_write = self._force_write
1959
1960        # if we have new instance of the calculator,
1961        # move existing results out of the way, first
1962        if (os.path.isdir(self._directory)
1963                and self._calls == 0
1964                and self._rename_existing_dir):
1965            if os.listdir(self._directory) == []:
1966                os.rmdir(self._directory)
1967            else:
1968                # rename appending creation date of the directory
1969                ctime = time.localtime(os.lstat(self._directory).st_ctime)
1970                os.rename(self._directory, '%s.bak-%s' %
1971                          (self._directory,
1972                           time.strftime('%Y%m%d-%H%M%S', ctime)))
1973
1974        # create work directory
1975        if not os.path.isdir(self._directory):
1976            os.makedirs(self._directory, 0o775)
1977
1978        # we do this every time, not only upon first call
1979        # if self._calls == 0:
1980        self._fetch_pspots()
1981
1982        # if _try_reuse is requested and this
1983        # is not the first run, we try to find
1984        # the .check file from the previous run
1985        # this is only necessary if _track_output
1986        # is set to true
1987        if self._try_reuse and self._calls > 0:
1988            if os.path.exists(self._abs_path(self._check_file)):
1989                self.param.reuse = self._check_file
1990            elif os.path.exists(self._abs_path(self._castep_bin_file)):
1991                self.param.reuse = self._castep_bin_file
1992        self._seed = self._build_castep_seed()
1993        self._check_file = '%s.check' % self._seed
1994        self._castep_bin_file = '%s.castep_bin' % self._seed
1995        self._castep_file = self._abs_path('%s.castep' % self._seed)
1996
1997        # write out the input file
1998        self._write_cell(self._abs_path('%s.cell' % self._seed),
1999                         self.atoms, castep_cell=self.cell,
2000                         force_write=force_write)
2001
2002        if self._export_settings:
2003            interface_options = self._opt
2004        else:
2005            interface_options = None
2006        write_param(self._abs_path('%s.param' % self._seed), self.param,
2007                    check_checkfile=self._check_checkfile,
2008                    force_write=force_write,
2009                    interface_options=interface_options,)
2010
2011    def _build_castep_seed(self):
2012        """Abstracts to construction of the final castep <seed>
2013        with and without _tracking_output.
2014        """
2015        if self._track_output:
2016            return '%s-%06d' % (self._label, self._calls)
2017        else:
2018            return '%s' % (self._label)
2019
2020    def _abs_path(self, path):
2021        # Create an absolute path for a file to put in the working directory
2022        return os.path.join(self._directory, path)
2023
2024    def run(self):
2025        """Simply call castep. If the first .err file
2026        contains text, this will be printed to the screen.
2027        """
2028        # change to target directory
2029        self._calls += 1
2030
2031        # run castep itself
2032        stdout, stderr = shell_stdouterr('%s %s' % (self._castep_command,
2033                                                    self._seed),
2034                                         cwd=self._directory)
2035        if stdout:
2036            print('castep call stdout:\n%s' % stdout)
2037        if stderr:
2038            print('castep call stderr:\n%s' % stderr)
2039
2040        # shouldn't it be called after read()???
2041        # self.push_oldstate()
2042
2043        # check for non-empty error files
2044        err_file = self._abs_path('%s.0001.err' % self._seed)
2045        if os.path.exists(err_file):
2046            err_file = open(err_file)
2047            self._error = err_file.read()
2048            err_file.close()
2049        if self._error:
2050            raise RuntimeError(self._error)
2051
2052    def __repr__(self):
2053        """Returns generic, fast to capture representation of
2054        CASTEP settings along with atoms object.
2055        """
2056        expr = ''
2057        expr += '-----------------Atoms--------------------\n'
2058        if self.atoms is not None:
2059            expr += str('%20s\n' % self.atoms)
2060        else:
2061            expr += 'None\n'
2062
2063        expr += '-----------------Param keywords-----------\n'
2064        expr += str(self.param)
2065        expr += '-----------------Cell keywords------------\n'
2066        expr += str(self.cell)
2067        expr += '-----------------Internal keys------------\n'
2068        for key in self.internal_keys:
2069            expr += '%20s : %s\n' % (key, self._opt[key])
2070
2071        return expr
2072
2073    def __getattr__(self, attr):
2074        """___getattr___ gets overloaded to reroute the internal keys
2075        and to be able to easily store them in in the param so that
2076        they can be read in again in subsequent calls.
2077        """
2078        if attr in self.internal_keys:
2079            return self._opt[attr]
2080        if attr in ['__repr__', '__str__']:
2081            raise AttributeError
2082        elif attr not in self.__dict__:
2083            raise AttributeError
2084        else:
2085            return self.__dict__[attr]
2086
2087    def __setattr__(self, attr, value):
2088        """We overload the settattr method to make value assignment
2089        as pythonic as possible. Internal values all start with _.
2090        Value assigment is case insensitive!
2091        """
2092
2093        if attr.startswith('_'):
2094            # internal variables all start with _
2095            # let's check first if they are close but not identical
2096            # to one of the switches, that the user accesses directly
2097            similars = difflib.get_close_matches(attr, self.internal_keys,
2098                                                 cutoff=0.9)
2099            if attr not in self.internal_keys and similars:
2100                warnings.warn('Warning: You probably tried one of: %s but typed %s' %
2101                              (similars, attr))
2102            if attr in self.internal_keys:
2103                self._opt[attr] = value
2104                if attr == '_track_output':
2105                    if value:
2106                        self._try_reuse = True
2107                        if self._pedantic:
2108                            warnings.warn('You switched _track_output on. This will '
2109                                          'consume a lot of disk-space. The interface '
2110                                          'also switched _try_reuse on, which will '
2111                                          'try to find the last check file. Set '
2112                                          '_try_reuse = False, if you need '
2113                                          'really separate calculations')
2114                    elif '_try_reuse' in self._opt and self._try_reuse:
2115                        self._try_reuse = False
2116                        if self._pedantic:
2117                            warnings.warn('_try_reuse is set to False, too')
2118            else:
2119                self.__dict__[attr] = value
2120            return
2121        elif attr in ['atoms', 'cell', 'param']:
2122            if value is not None:
2123                if attr == 'atoms' and not isinstance(value, ase.atoms.Atoms):
2124                    raise TypeError(
2125                        '%s is not an instance of ase.atoms.Atoms.' % value)
2126                elif attr == 'cell' and not isinstance(value, CastepCell):
2127                    raise TypeError('%s is not an instance of CastepCell.' %
2128                                    value)
2129                elif attr == 'param' and not isinstance(value, CastepParam):
2130                    raise TypeError('%s is not an instance of CastepParam.' %
2131                                    value)
2132            # These 3 are accepted right-away, no matter what
2133            self.__dict__[attr] = value
2134            return
2135        elif attr in self.atoms_obj_keys:
2136            # keywords which clearly belong to the atoms object are
2137            # rerouted to go there
2138            self.atoms.__dict__[attr] = value
2139            return
2140        elif attr in self.atoms_keys:
2141            # CASTEP keywords that should go into the atoms object
2142            # itself are blocked
2143            warnings.warn('Ignoring setings of "%s", since this has to be set '
2144                          'through the atoms object' % attr)
2145            return
2146
2147        attr = attr.lower()
2148        if attr not in (list(self.cell._options.keys())
2149                        + list(self.param._options.keys())):
2150            # what is left now should be meant to be a castep keyword
2151            # so we first check if it defined, and if not offer some error
2152            # correction
2153            if self._kw_tol == 0:
2154                similars = difflib.get_close_matches(
2155                    attr,
2156                    self.cell._options.keys() + self.param._options.keys())
2157                if similars:
2158                    raise UserWarning('Option "%s" not known! You mean "%s"?' %
2159                                      (attr, similars[0]))
2160                else:
2161                    raise UserWarning('Option "%s" is not known!' % attr)
2162            else:
2163                warnings.warn('Option "%s" is not known - please set any new'
2164                              ' options directly in the .cell or .param '
2165                              'objects' % attr)
2166                return
2167
2168        # here we know it must go into one of the component param or cell
2169        # so we first determine which one
2170        if attr in self.param._options.keys():
2171            comp = 'param'
2172        elif attr in self.cell._options.keys():
2173            comp = 'cell'
2174        else:
2175            raise UserWarning('Programming error: could not attach '
2176                              'the keyword to an input file')
2177
2178        self.__dict__[comp].__setattr__(attr, value)
2179
2180    def merge_param(self, param, overwrite=True, ignore_internal_keys=False):
2181        """Parse a param file and merge it into the current parameters."""
2182        if isinstance(param, CastepParam):
2183            for key, option in param._options.items():
2184                if option.value is not None:
2185                    self.param.__setattr__(key, option.value)
2186            return
2187
2188        elif isinstance(param, str):
2189            param_file = open(param, 'r')
2190            _close = True
2191
2192        else:
2193            # in this case we assume that we have a fileobj already, but check
2194            # for attributes in order to avoid extended EAFP blocks.
2195            param_file = param
2196
2197            # look before you leap...
2198            attributes = ['name',
2199                          'close'
2200                          'readlines']
2201
2202            for attr in attributes:
2203                if not hasattr(param_file, attr):
2204                    raise TypeError('"param" is neither CastepParam nor str '
2205                                    'nor valid fileobj')
2206
2207            param = param_file.name
2208            _close = False
2209
2210        self, int_opts = read_param(fd=param_file, calc=self,
2211                                    get_interface_options=True)
2212
2213        # Add the interface options
2214        for k, val in int_opts.items():
2215            if (k in self.internal_keys and not ignore_internal_keys):
2216                if val in _tf_table:
2217                    val = _tf_table[val]
2218                self._opt[k] = val
2219
2220        if _close:
2221            param_file.close()
2222
2223    def dryrun_ok(self, dryrun_flag='-dryrun'):
2224        """Starts a CASTEP run with the -dryrun flag [default]
2225        in a temporary and check wether all variables are initialized
2226        correctly. This is recommended for every bigger simulation.
2227        """
2228        from ase.io.castep import write_param
2229
2230        temp_dir = tempfile.mkdtemp()
2231        self._fetch_pspots(temp_dir)
2232        seed = 'dryrun'
2233
2234        self._write_cell(os.path.join(temp_dir, '%s.cell' % seed),
2235                         self.atoms, castep_cell=self.cell)
2236        # This part needs to be modified now that we rely on the new formats.py
2237        # interface
2238        if not os.path.isfile(os.path.join(temp_dir, '%s.cell' % seed)):
2239            warnings.warn('%s.cell not written - aborting dryrun' % seed)
2240            return
2241        write_param(os.path.join(temp_dir, '%s.param' % seed), self.param, )
2242
2243        stdout, stderr = shell_stdouterr(('%s %s %s' % (self._castep_command,
2244                                                        seed,
2245                                                        dryrun_flag)),
2246                                         cwd=temp_dir)
2247
2248        if stdout:
2249            print(stdout)
2250        if stderr:
2251            print(stderr)
2252        result_file = open(os.path.join(temp_dir, '%s.castep' % seed))
2253
2254        txt = result_file.read()
2255        ok_string = r'.*DRYRUN finished.*No problems found with input files.*'
2256        match = re.match(ok_string, txt, re.DOTALL)
2257
2258        m = re.search(r'Number of kpoints used =\s*([0-9]+)', txt)
2259        if m:
2260            self._kpoints = int(m.group(1))
2261        else:
2262            warnings.warn(
2263                'Couldn\'t fetch number of kpoints from dryrun CASTEP file')
2264
2265        err_file = os.path.join(temp_dir, '%s.0001.err' % seed)
2266        if match is None and os.path.exists(err_file):
2267            err_file = open(err_file)
2268            self._error = err_file.read()
2269            err_file.close()
2270
2271        result_file.close()
2272        shutil.rmtree(temp_dir)
2273
2274        # re.match return None is the string does not match
2275        return match is not None
2276
2277    # this could go into the Atoms() class at some point...
2278    def _get_number_in_species(self, at, atoms=None):
2279        """Return the number of the atoms within the set of it own
2280        species. If you are an ASE commiter: why not move this into
2281        ase.atoms.Atoms ?"""
2282        if atoms is None:
2283            atoms = self.atoms
2284        numbers = atoms.get_atomic_numbers()
2285        n = numbers[at]
2286        nis = numbers.tolist()[:at + 1].count(n)
2287        return nis
2288
2289    def _get_absolute_number(self, species, nic, atoms=None):
2290        """This is the inverse function to _get_number in species."""
2291        if atoms is None:
2292            atoms = self.atoms
2293        ch = atoms.get_chemical_symbols()
2294        ch.reverse()
2295        total_nr = 0
2296        assert nic > 0, 'Number in species needs to be 1 or larger'
2297        while True:
2298            if ch.pop() == species:
2299                if nic == 1:
2300                    return total_nr
2301                nic -= 1
2302            total_nr += 1
2303
2304    def _fetch_pspots(self, directory=None):
2305        """Put all specified pseudo-potentials into the working directory.
2306        """
2307        # should be a '==' right? Otherwise setting _castep_pp_path is not
2308        # honored.
2309        if (not os.environ.get('PSPOT_DIR', None)
2310                and self._castep_pp_path == os.path.abspath('.')):
2311            # By default CASTEP consults the environment variable
2312            # PSPOT_DIR. If this contains a list of colon separated
2313            # directories it will check those directories for pseudo-
2314            # potential files if not in the current directory.
2315            # Thus if PSPOT_DIR is set there is nothing left to do.
2316            # If however PSPOT_DIR was been accidentally set
2317            # (e.g. with regards to a different program)
2318            # setting CASTEP_PP_PATH to an explicit value will
2319            # still be honored.
2320            return
2321
2322        if directory is None:
2323            directory = self._directory
2324        if not os.path.isdir(self._castep_pp_path):
2325            warnings.warn('PSPs directory %s not found' % self._castep_pp_path)
2326        pspots = {}
2327        if self._find_pspots:
2328            self.find_pspots()
2329        if self.cell.species_pot.value is not None:
2330            for line in self.cell.species_pot.value.split('\n'):
2331                line = line.split()
2332                if line:
2333                    pspots[line[0]] = line[1]
2334        for species in self.atoms.get_chemical_symbols():
2335            if not pspots or species not in pspots.keys():
2336                if self._build_missing_pspots:
2337                    if self._pedantic:
2338                        warnings.warn('Warning: you have no PP specified for %s. '
2339                                      'CASTEP will now generate an on-the-fly potentials. '
2340                                      'For sake of numerical consistency and efficiency '
2341                                      'this is discouraged.' % species)
2342                else:
2343                    raise RuntimeError(
2344                        'Warning: you have no PP specified for %s.' %
2345                        species)
2346        if self.cell.species_pot.value:
2347            for (species, pspot) in pspots.items():
2348                orig_pspot_file = os.path.join(self._castep_pp_path, pspot)
2349                cp_pspot_file = os.path.join(directory, pspot)
2350                if (os.path.exists(orig_pspot_file)
2351                        and not os.path.exists(cp_pspot_file)):
2352                    if self._copy_pspots:
2353                        shutil.copy(orig_pspot_file, directory)
2354                    elif self._link_pspots:
2355                        os.symlink(orig_pspot_file, cp_pspot_file)
2356                    else:
2357                        if self._pedantic:
2358                            warnings.warn('Warning: PP files have neither been '
2359                                          'linked nor copied to the working directory. Make '
2360                                          'sure to set the evironment variable PSPOT_DIR '
2361                                          'accordingly!')
2362
2363
2364def get_castep_version(castep_command):
2365    """This returns the version number as printed in the CASTEP banner.
2366       For newer CASTEP versions ( > 6.1) the --version command line option
2367       has been added; this will be attempted first.
2368    """
2369    import tempfile
2370    with tempfile.TemporaryDirectory() as temp_dir:
2371        return _get_castep_version(castep_command, temp_dir)
2372
2373
2374def _get_castep_version(castep_command, temp_dir):
2375    jname = 'dummy_jobname'
2376    stdout, stderr = '', ''
2377    fallback_version = 16.  # CASTEP 16.0 and 16.1 report version wrongly
2378    try:
2379        stdout, stderr = subprocess.Popen(
2380            castep_command.split() + ['--version'],
2381            stderr=subprocess.PIPE,
2382            stdout=subprocess.PIPE, cwd=temp_dir,
2383            universal_newlines=True).communicate()
2384        if 'CASTEP version' not in stdout:
2385            stdout, stderr = subprocess.Popen(
2386                castep_command.split() + [jname],
2387                stderr=subprocess.PIPE,
2388                stdout=subprocess.PIPE, cwd=temp_dir,
2389                universal_newlines=True).communicate()
2390    except Exception:  # XXX Which kind of exception?
2391        msg = ''
2392        msg += 'Could not determine the version of your CASTEP binary \n'
2393        msg += 'This usually means one of the following \n'
2394        msg += '   * you do not have CASTEP installed \n'
2395        msg += '   * you have not set the CASTEP_COMMAND to call it \n'
2396        msg += '   * you have provided a wrong CASTEP_COMMAND. \n'
2397        msg += '     Make sure it is in your PATH\n\n'
2398        msg += stdout
2399        msg += stderr
2400        raise CastepVersionError(msg)
2401    if 'CASTEP version' in stdout:
2402        output_txt = stdout.split('\n')
2403        version_re = re.compile(r'CASTEP version:\s*([0-9\.]*)')
2404    else:
2405        output = open(os.path.join(temp_dir, '%s.castep' % jname))
2406        output_txt = output.readlines()
2407        output.close()
2408        version_re = re.compile(r'(?<=CASTEP version )[0-9.]*')
2409    # shutil.rmtree(temp_dir)
2410    for line in output_txt:
2411        if 'CASTEP version' in line:
2412            try:
2413                return float(version_re.findall(line)[0])
2414            except ValueError:
2415                # Fallback for buggy --version on CASTEP 16.0, 16.1
2416                return fallback_version
2417
2418
2419def create_castep_keywords(castep_command, filename='castep_keywords.json',
2420                           force_write=True, path='.', fetch_only=None):
2421    """This function allows to fetch all available keywords from stdout
2422    of an installed castep binary. It furthermore collects the documentation
2423    to harness the power of (ipython) inspection and type for some basic
2424    type checking of input. All information is stored in a JSON file that is
2425    not distributed by default to avoid breaking the license of CASTEP.
2426    """
2427    # Takes a while ...
2428    # Fetch all allowed parameters
2429    # fetch_only : only fetch that many parameters (for testsuite only)
2430    suffixes = ['cell', 'param']
2431
2432    filepath = os.path.join(path, filename)
2433
2434    if os.path.exists(filepath) and not force_write:
2435        warnings.warn('CASTEP Options Module file exists. '
2436                      'You can overwrite it by calling '
2437                      'python castep.py -f [CASTEP_COMMAND].')
2438        return False
2439
2440    # Not saving directly to file her to prevent half-generated files
2441    # which will cause problems on future runs
2442
2443    castep_version = get_castep_version(castep_command)
2444
2445    help_all, _ = shell_stdouterr('%s -help all' % castep_command)
2446
2447    # Filter out proper keywords
2448    try:
2449        # The old pattern does not math properly as in CASTEP as of v8.0 there
2450        # are some keywords for the semi-empircal dispersion correction (SEDC)
2451        # which also include numbers.
2452        if castep_version < 7.0:
2453            pattern = r'((?<=^ )[A-Z_]{2,}|(?<=^)[A-Z_]{2,})'
2454        else:
2455            pattern = r'((?<=^ )[A-Z_\d]{2,}|(?<=^)[A-Z_\d]{2,})'
2456
2457        raw_options = re.findall(pattern, help_all, re.MULTILINE)
2458    except Exception:
2459        warnings.warn('Problem parsing: %s' % help_all)
2460        raise
2461
2462    types = set()
2463    levels = set()
2464
2465    processed_n = 0
2466    to_process = len(raw_options[:fetch_only])
2467
2468    processed_options = {sf: {} for sf in suffixes}
2469
2470    for o_i, option in enumerate(raw_options[:fetch_only]):
2471        doc, _ = shell_stdouterr('%s -help %s' % (castep_command, option))
2472
2473        # Stand Back! I know regular expressions (http://xkcd.com/208/) :-)
2474        match = re.match(r'(?P<before_type>.*)Type: (?P<type>.+?)\s+'
2475                         + r'Level: (?P<level>[^ ]+)\n\s*\n'
2476                         + r'(?P<doc>.*?)(\n\s*\n|$)', doc, re.DOTALL)
2477
2478        processed_n += 1
2479
2480        if match is not None:
2481            match = match.groupdict()
2482
2483            # JM: uncomment lines in following block to debug issues
2484            #     with keyword assignment during extraction process from CASTEP
2485            suffix = None
2486            if re.findall(r'PARAMETERS keywords:\n\n\s?None found', doc):
2487                suffix = 'cell'
2488            if re.findall(r'CELL keywords:\n\n\s?None found', doc):
2489                suffix = 'param'
2490            if suffix is None:
2491                warnings.warn('%s -> not assigned to either'
2492                              ' CELL or PARAMETERS keywords' % option)
2493
2494            option = option.lower()
2495            mtyp = match.get('type', None)
2496            mlvl = match.get('level', None)
2497            mdoc = match.get('doc', None)
2498
2499            if mtyp is None:
2500                warnings.warn('Found no type for %s' % option)
2501                continue
2502            if mlvl is None:
2503                warnings.warn('Found no level for %s' % option)
2504                continue
2505            if mdoc is None:
2506                warnings.warn('Found no doc string for %s' % option)
2507                continue
2508
2509            types = types.union([mtyp])
2510            levels = levels.union([mlvl])
2511
2512            processed_options[suffix][option] = {
2513                'keyword': option,
2514                'option_type': mtyp,
2515                'level': mlvl,
2516                'docstring': mdoc
2517            }
2518
2519            processed_n += 1
2520
2521            frac = (o_i + 1.0) / to_process
2522            sys.stdout.write('\rProcessed: [{0}] {1:>3.0f}%'.format(
2523                             '#' * int(frac * 20) + ' '
2524                             * (20 - int(frac * 20)),
2525                             100 * frac))
2526            sys.stdout.flush()
2527
2528        else:
2529            warnings.warn('create_castep_keywords: Could not process %s'
2530                          % option)
2531
2532    sys.stdout.write('\n')
2533    sys.stdout.flush()
2534
2535    processed_options['types'] = list(types)
2536    processed_options['levels'] = list(levels)
2537    processed_options['castep_version'] = castep_version
2538
2539    json.dump(processed_options, open(filepath, 'w'), indent=4)
2540
2541    warnings.warn('CASTEP v%s, fetched %s keywords' %
2542                  (castep_version, processed_n))
2543    return True
2544
2545
2546class CastepOption:
2547    """"A CASTEP option. It handles basic conversions from string to its value
2548    type."""
2549
2550    default_convert_types = {
2551        'boolean (logical)': 'bool',
2552        'defined': 'bool',
2553        'string': 'str',
2554        'integer': 'int',
2555        'real': 'float',
2556        'integer vector': 'int_vector',
2557        'real vector': 'float_vector',
2558        'physical': 'float_physical',
2559        'block': 'block'
2560    }
2561
2562    def __init__(self, keyword, level, option_type, value=None,
2563                 docstring='No information available'):
2564        self.keyword = keyword
2565        self.level = level
2566        self.type = option_type
2567        self._value = value
2568        self.__doc__ = docstring
2569
2570    @property
2571    def value(self):
2572
2573        if self._value is not None:
2574            if self.type.lower() in ('integer vector', 'real vector',
2575                                     'physical'):
2576                return ' '.join(map(str, self._value))
2577            elif self.type.lower() in ('boolean (logical)', 'defined'):
2578                return str(self._value).upper()
2579            else:
2580                return str(self._value)
2581
2582    @property
2583    def raw_value(self):
2584        # The value, not converted to a string
2585        return self._value
2586
2587    @value.setter  # type: ignore
2588    def value(self, val):
2589
2590        if val is None:
2591            self.clear()
2592            return
2593
2594        ctype = self.default_convert_types.get(self.type.lower(), 'str')
2595        typeparse = '_parse_%s' % ctype
2596        try:
2597            self._value = getattr(self, typeparse)(val)
2598        except ValueError:
2599            raise ConversionError(ctype, self.keyword, val)
2600
2601    def clear(self):
2602        """Reset the value of the option to None again"""
2603        self._value = None
2604
2605    @staticmethod
2606    def _parse_bool(value):
2607        try:
2608            value = _tf_table[str(value).strip().title()]
2609        except (KeyError, ValueError):
2610            raise ValueError()
2611        return value
2612
2613    @staticmethod
2614    def _parse_str(value):
2615        value = str(value)
2616        return value
2617
2618    @staticmethod
2619    def _parse_int(value):
2620        value = int(value)
2621        return value
2622
2623    @staticmethod
2624    def _parse_float(value):
2625        value = float(value)
2626        return value
2627
2628    @staticmethod
2629    def _parse_int_vector(value):
2630        # Accepts either a string or an actual list/numpy array of ints
2631        if isinstance(value, str):
2632            if ',' in value:
2633                value = value.replace(',', ' ')
2634            value = list(map(int, value.split()))
2635
2636        value = np.array(value)
2637
2638        if value.shape != (3,) or value.dtype != int:
2639            raise ValueError()
2640
2641        return list(value)
2642
2643    @staticmethod
2644    def _parse_float_vector(value):
2645        # Accepts either a string or an actual list/numpy array of floats
2646        if isinstance(value, str):
2647            if ',' in value:
2648                value = value.replace(',', ' ')
2649            value = list(map(float, value.split()))
2650
2651        value = np.array(value) * 1.0
2652
2653        if value.shape != (3,) or value.dtype != float:
2654            raise ValueError()
2655
2656        return list(value)
2657
2658    @staticmethod
2659    def _parse_float_physical(value):
2660        # If this is a string containing units, saves them
2661        if isinstance(value, str):
2662            value = value.split()
2663
2664        try:
2665            l = len(value)
2666        except TypeError:
2667            l = 1
2668            value = [value]
2669
2670        if l == 1:
2671            try:
2672                value = (float(value[0]), '')
2673            except (TypeError, ValueError):
2674                raise ValueError()
2675        elif l == 2:
2676            try:
2677                value = (float(value[0]), value[1])
2678            except (TypeError, ValueError, IndexError):
2679                raise ValueError()
2680        else:
2681            raise ValueError()
2682
2683        return value
2684
2685    @staticmethod
2686    def _parse_block(value):
2687
2688        if isinstance(value, str):
2689            return value
2690        elif hasattr(value, '__getitem__'):
2691            return '\n'.join(value)  # Arrays of lines
2692        else:
2693            raise ValueError()
2694
2695    def __repr__(self):
2696        if self._value:
2697            expr = ('Option: {keyword}({type}, {level}):\n{_value}\n'
2698                    ).format(**self.__dict__)
2699        else:
2700            expr = ('Option: {keyword}[unset]({type}, {level})'
2701                    ).format(**self.__dict__)
2702        return expr
2703
2704    def __eq__(self, other):
2705        if not isinstance(other, CastepOption):
2706            return False
2707        else:
2708            return self.__dict__ == other.__dict__
2709
2710
2711class CastepOptionDict:
2712    """A dictionary-like object to hold a set of options for .cell or .param
2713    files loaded from a dictionary, for the sake of validation.
2714
2715    Replaces the old CastepCellDict and CastepParamDict that were defined in
2716    the castep_keywords.py file.
2717    """
2718
2719    def __init__(self, options=None):
2720        object.__init__(self)
2721        self._options = {}  # ComparableDict is not needed any more as
2722        # CastepOptions can be compared directly now
2723        for kw in options:
2724            opt = CastepOption(**options[kw])
2725            self._options[opt.keyword] = opt
2726            self.__dict__[opt.keyword] = opt
2727
2728
2729class CastepInputFile:
2730
2731    """Master class for CastepParam and CastepCell to inherit from"""
2732
2733    _keyword_conflicts: List[Set[str]] = []
2734
2735    def __init__(self, options_dict=None, keyword_tolerance=1):
2736        object.__init__(self)
2737
2738        if options_dict is None:
2739            options_dict = CastepOptionDict({})
2740
2741        self._options = options_dict._options
2742        self.__dict__.update(self._options)
2743        # keyword_tolerance means how strict the checks on new attributes are
2744        # 0 = no new attributes allowed
2745        # 1 = new attributes allowed, warning given
2746        # 2 = new attributes allowed, silent
2747        self._perm = np.clip(keyword_tolerance, 0, 2)
2748
2749        # Compile a dictionary for quick check of conflict sets
2750        self._conflict_dict = {kw: set(cset).difference({kw})
2751                               for cset in self._keyword_conflicts for kw in cset}
2752
2753    def __repr__(self):
2754        expr = ''
2755        is_default = True
2756        for key, option in sorted(self._options.items()):
2757            if option.value is not None:
2758                is_default = False
2759                expr += ('%20s : %s\n' % (key, option.value))
2760        if is_default:
2761            expr = 'Default\n'
2762
2763        expr += 'Keyword tolerance: {0}'.format(self._perm)
2764        return expr
2765
2766    def __setattr__(self, attr, value):
2767
2768        # Hidden attributes are treated normally
2769        if attr.startswith('_'):
2770            self.__dict__[attr] = value
2771            return
2772
2773        if attr not in self._options.keys():
2774
2775            if self._perm > 0:
2776                # Do we consider it a string or a block?
2777                is_str = isinstance(value, str)
2778                is_block = False
2779                if ((hasattr(value, '__getitem__') and not is_str)
2780                        or (is_str and len(value.split('\n')) > 1)):
2781                    is_block = True
2782
2783            if self._perm == 0:
2784                similars = difflib.get_close_matches(attr,
2785                                                     self._options.keys())
2786                if similars:
2787                    raise UserWarning(('Option "%s" not known! You mean "%s"?')
2788                                      % (attr, similars[0]))
2789                else:
2790                    raise UserWarning('Option "%s" is not known!' % attr)
2791            elif self._perm == 1:
2792                warnings.warn(('Option "%s" is not known and will '
2793                               'be added as a %s') % (attr,
2794                                                      ('block' if is_block else
2795                                                       'string')))
2796            attr = attr.lower()
2797            opt = CastepOption(keyword=attr, level='Unknown',
2798                               option_type='block' if is_block else 'string')
2799            self._options[attr] = opt
2800            self.__dict__[attr] = opt
2801        else:
2802            attr = attr.lower()
2803            opt = self._options[attr]
2804
2805        if not opt.type.lower() == 'block' and isinstance(value, str):
2806            value = value.replace(':', ' ')
2807
2808        # If it is, use the appropriate parser, unless a custom one is defined
2809        attrparse = '_parse_%s' % attr.lower()
2810
2811        # Check for any conflicts if the value is not None
2812        if not (value is None):
2813            cset = self._conflict_dict.get(attr.lower(), {})
2814            for c in cset:
2815                if (c in self._options and self._options[c].value):
2816                    warnings.warn(
2817                        'option "{attr}" conflicts with "{conflict}" in '
2818                        'calculator. Setting "{conflict}" to '
2819                        'None.'.format(attr=attr, conflict=c))
2820                    self._options[c].value = None
2821
2822        if hasattr(self, attrparse):
2823            self._options[attr].value = self.__getattribute__(attrparse)(value)
2824        else:
2825            self._options[attr].value = value
2826
2827    def __getattr__(self, name):
2828        if name[0] == '_' or self._perm == 0:
2829            raise AttributeError()
2830
2831        if self._perm == 1:
2832            warnings.warn('Option %s is not known, returning None' % (name))
2833
2834        return CastepOption(keyword='none', level='Unknown',
2835                            option_type='string', value=None)
2836
2837    def get_attr_dict(self, raw=False, types=False):
2838        """Settings that go into .param file in a traditional dict"""
2839
2840        attrdict = {k: o.raw_value if raw else o.value
2841                    for k, o in self._options.items() if o.value is not None}
2842
2843        if types:
2844            for key, val in attrdict.items():
2845                attrdict[key] = (val, self._options[key].type)
2846
2847        return attrdict
2848
2849
2850class CastepParam(CastepInputFile):
2851    """CastepParam abstracts the settings that go into the .param file"""
2852
2853    _keyword_conflicts = [{'cut_off_energy', 'basis_precision'}, ]
2854
2855    def __init__(self, castep_keywords, keyword_tolerance=1):
2856        self._castep_version = castep_keywords.castep_version
2857        CastepInputFile.__init__(self, castep_keywords.CastepParamDict(),
2858                                 keyword_tolerance)
2859
2860    @property
2861    def castep_version(self):
2862        return self._castep_version
2863
2864    # .param specific parsers
2865    def _parse_reuse(self, value):
2866        if value is None:
2867            return None  # Reset the value
2868        try:
2869            if self._options['continuation'].value:
2870                warnings.warn('Cannot set reuse if continuation is set, and '
2871                              'vice versa. Set the other to None, if you want '
2872                              'this setting.')
2873                return None
2874        except KeyError:
2875            pass
2876        return 'default' if (value is True) else str(value)
2877
2878    def _parse_continuation(self, value):
2879        if value is None:
2880            return None  # Reset the value
2881        try:
2882            if self._options['reuse'].value:
2883                warnings.warn('Cannot set reuse if continuation is set, and '
2884                              'vice versa. Set the other to None, if you want '
2885                              'this setting.')
2886                return None
2887        except KeyError:
2888            pass
2889        return 'default' if (value is True) else str(value)
2890
2891
2892class CastepCell(CastepInputFile):
2893
2894    """CastepCell abstracts all setting that go into the .cell file"""
2895
2896    _keyword_conflicts = [
2897        {'kpoint_mp_grid', 'kpoint_mp_spacing', 'kpoint_list',
2898         'kpoints_mp_grid', 'kpoints_mp_spacing', 'kpoints_list'},
2899        {'bs_kpoint_mp_grid', 'bs_kpoint_mp_spacing', 'bs_kpoint_list',
2900         'bs_kpoint_path',
2901         'bs_kpoints_mp_grid', 'bs_kpoints_mp_spacing', 'bs_kpoints_list',
2902         'bs_kpoints_path'},
2903        {'spectral_kpoint_mp_grid', 'spectral_kpoint_mp_spacing', 'spectral_kpoint_list',
2904         'spectral_kpoint_path',
2905         'spectral_kpoints_mp_grid', 'spectral_kpoints_mp_spacing', 'spectral_kpoints_list',
2906         'spectral_kpoints_path'},
2907        {'phonon_kpoint_mp_grid', 'phonon_kpoint_mp_spacing', 'phonon_kpoint_list',
2908         'phonon_kpoint_path',
2909         'phonon_kpoints_mp_grid', 'phonon_kpoints_mp_spacing', 'phonon_kpoints_list',
2910         'phonon_kpoints_path'},
2911        {'fine_phonon_kpoint_mp_grid', 'fine_phonon_kpoint_mp_spacing', 'fine_phonon_kpoint_list',
2912         'fine_phonon_kpoint_path'},
2913        {'magres_kpoint_mp_grid', 'magres_kpoint_mp_spacing', 'magres_kpoint_list',
2914         'magres_kpoint_path'},
2915        {'elnes_kpoint_mp_grid', 'elnes_kpoint_mp_spacing', 'elnes_kpoint_list',
2916         'elnes_kpoint_path'},
2917        {'optics_kpoint_mp_grid', 'optics_kpoint_mp_spacing', 'optics_kpoint_list',
2918         'optics_kpoint_path'},
2919        {'supercell_kpoint_mp_grid', 'supercell_kpoint_mp_spacing', 'supercell_kpoint_list',
2920         'supercell_kpoint_path'}, ]
2921
2922    def __init__(self, castep_keywords, keyword_tolerance=1):
2923        self._castep_version = castep_keywords.castep_version
2924        CastepInputFile.__init__(self, castep_keywords.CastepCellDict(),
2925                                 keyword_tolerance)
2926
2927    @property
2928    def castep_version(self):
2929        return self._castep_version
2930
2931    # .cell specific parsers
2932    def _parse_species_pot(self, value):
2933
2934        # Single tuple
2935        if isinstance(value, tuple) and len(value) == 2:
2936            value = [value]
2937        # List of tuples
2938        if hasattr(value, '__getitem__'):
2939            pspots = [tuple(map(str.strip, x)) for x in value]
2940            if not all(map(lambda x: len(x) == 2, value)):
2941                warnings.warn('Please specify pseudopotentials in python as '
2942                              'a tuple or a list of tuples formatted like: '
2943                              '(species, file), e.g. ("O", "path-to/O_OTFG.usp") '
2944                              'Anything else will be ignored')
2945                return None
2946
2947        text_block = self._options['species_pot'].value
2948
2949        text_block = text_block if text_block else ''
2950        # Remove any duplicates
2951        for pp in pspots:
2952            text_block = re.sub(r'\n?\s*%s\s+.*' % pp[0], '', text_block)
2953            if pp[1]:
2954                text_block += '\n%s %s' % pp
2955
2956        return text_block
2957
2958    def _parse_symmetry_ops(self, value):
2959        if not isinstance(value, tuple) \
2960           or not len(value) == 2 \
2961           or not value[0].shape[1:] == (3, 3) \
2962           or not value[1].shape[1:] == (3,) \
2963           or not value[0].shape[0] == value[1].shape[0]:
2964            warnings.warn('Invalid symmetry_ops block, skipping')
2965            return
2966        # Now on to print...
2967        text_block = ''
2968        for op_i, (op_rot, op_tranls) in enumerate(zip(*value)):
2969            text_block += '\n'.join([' '.join([str(x) for x in row])
2970                                     for row in op_rot])
2971            text_block += '\n'
2972            text_block += ' '.join([str(x) for x in op_tranls])
2973            text_block += '\n\n'
2974
2975        return text_block
2976
2977    def _parse_positions_abs_intermediate(self, value):
2978        return _parse_tss_block(value)
2979
2980    def _parse_positions_abs_product(self, value):
2981        return _parse_tss_block(value)
2982
2983    def _parse_positions_frac_intermediate(self, value):
2984        return _parse_tss_block(value, True)
2985
2986    def _parse_positions_frac_product(self, value):
2987        return _parse_tss_block(value, True)
2988
2989
2990CastepKeywords = namedtuple('CastepKeywords',
2991                            ['CastepParamDict', 'CastepCellDict',
2992                             'types', 'levels', 'castep_version'])
2993
2994# We keep this just for naming consistency with older versions
2995
2996
2997def make_cell_dict(data=None):
2998
2999    data = data if data is not None else {}
3000
3001    class CastepCellDict(CastepOptionDict):
3002        def __init__(self):
3003            CastepOptionDict.__init__(self, data)
3004
3005    return CastepCellDict
3006
3007
3008def make_param_dict(data=None):
3009
3010    data = data if data is not None else {}
3011
3012    class CastepParamDict(CastepOptionDict):
3013        def __init__(self):
3014            CastepOptionDict.__init__(self, data)
3015
3016    return CastepParamDict
3017
3018
3019class CastepVersionError(Exception):
3020    """No special behaviour, works to signal when Castep can not be found"""
3021    pass
3022
3023
3024class ConversionError(Exception):
3025
3026    """Print customized error for options that are not converted correctly
3027    and point out that they are maybe not implemented, yet"""
3028
3029    def __init__(self, key_type, attr, value):
3030        Exception.__init__(self)
3031        self.key_type = key_type
3032        self.value = value
3033        self.attr = attr
3034
3035    def __str__(self):
3036        return 'Could not convert %s = %s to %s\n' \
3037            % (self.attr, self.value, self.key_type) \
3038            + 'This means you either tried to set a value of the wrong\n'\
3039            + 'type or this keyword needs some special care. Please feel\n'\
3040            + 'to add it to the corresponding __setattr__ method and send\n'\
3041            + 'the patch to %s, so we can all benefit.' % (contact_email)
3042
3043
3044def get_castep_pp_path(castep_pp_path=''):
3045    """Abstract the quest for a CASTEP PSP directory."""
3046    if castep_pp_path:
3047        return os.path.abspath(os.path.expanduser(castep_pp_path))
3048    elif 'PSPOT_DIR' in os.environ:
3049        return os.environ['PSPOT_DIR']
3050    elif 'CASTEP_PP_PATH' in os.environ:
3051        return os.environ['CASTEP_PP_PATH']
3052    else:
3053        return os.path.abspath('.')
3054
3055
3056def get_castep_command(castep_command=''):
3057    """Abstract the quest for a castep_command string."""
3058    if castep_command:
3059        return castep_command
3060    elif 'CASTEP_COMMAND' in os.environ:
3061        return os.environ['CASTEP_COMMAND']
3062    else:
3063        return 'castep'
3064
3065
3066def shell_stdouterr(raw_command, cwd=None):
3067    """Abstracts the standard call of the commandline, when
3068    we are only interested in the stdout and stderr
3069    """
3070    stdout, stderr = subprocess.Popen(raw_command,
3071                                      stdout=subprocess.PIPE,
3072                                      stderr=subprocess.PIPE,
3073                                      universal_newlines=True,
3074                                      shell=True, cwd=cwd).communicate()
3075    return stdout.strip(), stderr.strip()
3076
3077
3078def import_castep_keywords(castep_command='',
3079                           filename='castep_keywords.json',
3080                           path='.'):
3081
3082    # Search for castep_keywords.json (or however it's called) in multiple
3083    # paths
3084
3085    searchpaths = [path,
3086                   os.path.expanduser('~/.ase'),
3087                   os.path.join(ase.__path__[0], 'calculators')]
3088    try:
3089        kwfile = sum([glob.glob(os.path.join(sp, filename))
3090                      for sp in searchpaths], [])[0]
3091    except IndexError:
3092        warnings.warn("""Generating CASTEP keywords JSON file... hang on.
3093    The CASTEP keywords JSON file contains abstractions for CASTEP input
3094    parameters (for both .cell and .param input files), including some
3095    format checks and descriptions. The latter are extracted from the
3096    internal online help facility of a CASTEP binary, thus allowing to
3097    easily keep the calculator synchronized with (different versions of)
3098    the CASTEP code. Consequently, avoiding licensing issues (CASTEP is
3099    distributed commercially by accelrys), we consider it wise not to
3100    provide the file in the first place.""")
3101        create_castep_keywords(get_castep_command(castep_command),
3102                               filename=filename, path=path)
3103        warnings.warn('Stored %s in %s.  Copy it to your ASE installation under '
3104                      'ase/calculators for system-wide installation. Using a *nix '
3105                      'OS this can be a simple as mv %s %s' %
3106                      (filename, os.path.abspath(path),
3107                       os.path.join(os.path.abspath(path), filename),
3108                       os.path.join(os.path.dirname(ase.__file__),
3109                                    'calculators')))
3110        kwfile = os.path.join(path, filename)
3111
3112    # Now create the castep_keywords object proper
3113    kwdata = json.load(open(kwfile))
3114
3115    # This is a bit awkward, but it's necessary for backwards compatibility
3116    param_dict = make_param_dict(kwdata['param'])
3117    cell_dict = make_cell_dict(kwdata['cell'])
3118
3119    castep_keywords = CastepKeywords(param_dict, cell_dict,
3120                                     kwdata['types'], kwdata['levels'],
3121                                     kwdata['castep_version'])
3122
3123    return castep_keywords
3124
3125
3126if __name__ == '__main__':
3127    warnings.warn('When called directly this calculator will fetch all available '
3128                  'keywords from the binarys help function into a '
3129                  'castep_keywords.json in the current directory %s '
3130                  'For system wide usage, it can be copied into an ase installation '
3131                  'at ASE/calculators. '
3132                  'This castep_keywords.json usually only needs to be generated once '
3133                  'for a CASTEP binary/CASTEP version.' % os.getcwd())
3134
3135    import optparse
3136    parser = optparse.OptionParser()
3137    parser.add_option(
3138        '-f', '--force-write', dest='force_write',
3139        help='Force overwriting existing castep_keywords.json', default=False,
3140        action='store_true')
3141    (options, args) = parser.parse_args()
3142
3143    if args:
3144        opt_castep_command = ''.join(args)
3145    else:
3146        opt_castep_command = ''
3147    generated = create_castep_keywords(get_castep_command(opt_castep_command),
3148                                       force_write=options.force_write)
3149
3150    if generated:
3151        try:
3152            with open('castep_keywords.json') as fd:
3153                json.load(fd)
3154        except Exception as e:
3155            warnings.warn(
3156                '%s Ooops, something went wrong with the CASTEP keywords' % e)
3157        else:
3158            warnings.warn('Import works. Looking good!')
3159