1import pprint
2import re
3from copy import deepcopy
4
5import numpy as np
6
7from ..exceptions import ValidationError
8from ..physical_constants import constants
9from ..util import provenance_stamp, unnp, update_with_error
10from .chgmult import validate_and_fill_chgmult
11from .nucleus import reconcile_nucleus
12from .regex import VERSION_PATTERN
13
14
15def from_input_arrays(
16    *,
17    enable_qm=True,
18    enable_efp=True,
19    missing_enabled_return_qm="error",
20    missing_enabled_return_efp="error",
21    # qm
22    geom=None,
23    elea=None,
24    elez=None,
25    elem=None,
26    mass=None,
27    real=None,
28    elbl=None,
29    name=None,
30    units="Angstrom",
31    input_units_to_au=None,
32    fix_com=None,
33    fix_orientation=None,
34    fix_symmetry=None,
35    fragment_separators=None,
36    fragment_charges=None,
37    fragment_multiplicities=None,
38    molecular_charge=None,
39    molecular_multiplicity=None,
40    # efp
41    fragment_files=None,
42    hint_types=None,
43    geom_hints=None,
44    # qm-vz
45    geom_unsettled=None,
46    variables=None,
47    # processing details
48    speclabel=True,
49    tooclose=0.1,
50    zero_ghost_fragments=False,
51    nonphysical=False,
52    mtol=1.0e-3,
53    copy=True,
54    verbose=1,
55):
56    """Compose a Molecule dict from unvalidated arrays and variables
57    in multiple domains.
58
59    Drives :py:func:`qcelemental.molparse.from_arrays` for sucessive
60    domains and hooks them together (e.g., impose `fix_com` on "qm"
61    when "efp" present.
62
63    """
64    molinit = {}
65    if enable_qm:
66        molinit["qm"] = {}
67    if enable_efp:
68        molinit["efp"] = {}
69
70    if enable_efp:
71        processed = from_arrays(
72            domain="efp",
73            missing_enabled_return=missing_enabled_return_efp,
74            units=units,
75            input_units_to_au=input_units_to_au,
76            fix_com=fix_com,
77            fix_orientation=fix_orientation,
78            fix_symmetry=fix_symmetry,
79            fragment_files=fragment_files,
80            hint_types=hint_types,
81            geom_hints=geom_hints,
82            # which other processing details needed?
83            verbose=verbose,
84        )
85        update_with_error(molinit, {"efp": processed})
86        if molinit["efp"] == {}:
87            del molinit["efp"]
88
89    efp_present = enable_efp and "efp" in molinit and bool(len(molinit["efp"]["geom_hints"]))
90    if efp_present:
91        fix_com = True
92        fix_orientation = True
93        fix_symmetry = "c1"
94
95    if enable_qm:
96        dm = "qmvz" if geom_unsettled else "qm"
97        processed = from_arrays(
98            domain=dm,
99            missing_enabled_return=missing_enabled_return_qm,
100            geom=geom,
101            elea=elea,
102            elez=elez,
103            elem=elem,
104            mass=mass,
105            real=real,
106            elbl=elbl,
107            name=name,
108            units=units,
109            input_units_to_au=input_units_to_au,
110            fix_com=fix_com,
111            fix_orientation=fix_orientation,
112            fix_symmetry=fix_symmetry,
113            fragment_separators=fragment_separators,
114            fragment_charges=fragment_charges,
115            fragment_multiplicities=fragment_multiplicities,
116            molecular_charge=molecular_charge,
117            molecular_multiplicity=molecular_multiplicity,
118            geom_unsettled=geom_unsettled,
119            variables=variables,
120            # processing details
121            speclabel=speclabel,
122            tooclose=tooclose,
123            zero_ghost_fragments=zero_ghost_fragments,
124            nonphysical=nonphysical,
125            mtol=mtol,
126            copy=copy,
127            verbose=1,
128        )
129        update_with_error(molinit, {"qm": processed})
130        if molinit["qm"] == {}:
131            del molinit["qm"]
132
133    return molinit
134
135
136def from_arrays(
137    *,
138    geom=None,
139    elea=None,
140    elez=None,
141    elem=None,
142    mass=None,
143    real=None,
144    elbl=None,
145    name=None,
146    units="Angstrom",
147    input_units_to_au=None,
148    fix_com=None,
149    fix_orientation=None,
150    fix_symmetry=None,
151    fragment_separators=None,
152    fragment_charges=None,
153    fragment_multiplicities=None,
154    molecular_charge=None,
155    molecular_multiplicity=None,
156    comment=None,
157    provenance=None,
158    connectivity=None,
159    fragment_files=None,
160    hint_types=None,
161    geom_hints=None,
162    geom_unsettled=None,
163    variables=None,
164    domain="qm",
165    missing_enabled_return: str = "error",
166    np_out: bool = True,
167    speclabel: bool = True,
168    tooclose: float = 0.1,
169    zero_ghost_fragments=False,
170    nonphysical: bool = False,
171    mtol=1.0e-3,
172    copy=True,
173    verbose=1,
174):
175    r"""Compose a Molecule dict from unvalidated arrays and variables, returning dict.
176
177    See fields of Return molrec below. Required parameters (for QM XYZ)
178    are `geom` and one of `elem`, `elez`, `elbl` (`speclabel=True`)
179
180    Parameters
181    ----------
182    geom : Union[List[List[float]], numpy.ndarray]
183        (nat, 3) or (3 * nat, ) ndarray or list o'lists of Cartesian coordinates.
184    fragment_separators : Union[List[int], numpy.ndarray]
185        (nfr - 1, ) list of atom indices at which to split `geom` into fragments.
186    elbl : Union[List[str], numpy.ndarray]
187        (nat, ) Label extending `elem` symbol, possibly conveying ghosting, isotope, mass, tagging information.
188    tooclose
189        Interatom distance (native `geom` units) nearer than which atoms not allowed.
190    nonphysical
191        Do allow masses outside an element's natural range to pass validation?
192    speclabel
193        If `True`, interpret `elbl` as potentially full nucleus spec including
194        ghosting, isotope, mass, tagging information, e.g., `@13C_mine` or
195        `He4@4.01`. If `False`, interpret `elbl` as only the user/tagging
196        extension to nucleus label, e.g. `_mine` or `4` in the previous examples.
197    missing_enabled_return
198        {'minimal', 'none', 'error'}
199        What to do when an enabled domain is of zero-length? Respectively, return
200        a fully valid but empty molrec, return empty dictionary, or throw error.
201    np_out
202        When `True`, fields geom, elea, elez, elem, mass, real, elbl will be ndarray.
203        Use `False` to get a json-able version.
204
205    Returns
206    -------
207    molrec : dict
208        Molecule dictionary spec follows. Its principles are
209
210        (1) contents are fully validated and defaulted - no error
211        checking necessary,
212
213        (2) contents may be mildly redundant - atomic numbers and
214        element symbols present,
215
216        (3) big system, nat-length single-type arrays, not small system,
217        nat-number heterogeneous objects,
218
219        (4) some fields are optional (e.g., fix_symmetry) but largely
220        self-describing so units or fix_com must be present.
221
222        (5) apart from some mild optional fields, _all_ fields will
223        be present (corollary of "fully validated and defaulted") - no
224        need to check for every key. in some cases like efp, keys will
225        appear in blocks, so pre-handshake there will be a few hint keys
226        and post-handshake they will be joined by full qm-like molrec.
227
228        (6) molrec should be idempotent through this function (equiv to
229        schema validator) but are not idempotent throughout its life. if
230        fields permit, frame may be changed. Future? if fields permit,
231        mol may be symmetrized. Coordinates and angles may change units
232        or range if program returns them in only one form.
233
234    name : str, optional
235        Label for molecule; should be valid Python identifier.
236    units : {'Angstrom', 'Bohr'}
237        Units for `geom`.
238    input_units_to_au : float, optional
239        If `units='Angstrom'`, overrides consumer's value for [A]-->[a0] conversion.
240    fix_com : bool
241        Whether translation of `geom` is allowed or disallowed.
242    fix_orientation : bool
243        Whether rotation of `geom` is allowed or disallowed.
244    fix_symmetry : str, optional
245        Maximal point group symmetry which `geom` should be treated. Lowercase.
246    geom : ndarray of float
247        (3 * nat, ) Cartesian coordinates in `units`.
248    elea : ndarray of int
249        (nat, ) Mass number for atoms, if known isotope, else -1.
250    elez : ndarray of int
251        (nat, ) Number of protons, nuclear charge for atoms.
252    elem : ndarray of str
253        (nat, ) Element symbol for atoms.
254    mass : ndarray of float
255        (nat, ) Atomic mass [u] for atoms.
256    real : ndarray of bool
257        (nat, ) Real/ghostedness for atoms.
258    elbl : ndarray of str
259        (nat, ) Label with any tagging information from element spec.
260    fragment_separators : list of int
261        (nfr - 1, ) list of atom indices at which to split `geom` into fragments.
262    fragment_charges : list of float
263        (nfr, ) list of charge allocated to each fragment.
264    fragment_multiplicities : list of int
265        (nfr, ) list of multiplicity allocated to each fragment.
266    molecular_charge : float
267        total charge on system.
268    molecular_multiplicity : int
269        total multiplicity on system.
270    comment : str, optional
271        Additional comment for molecule.
272    provenance : dict of str
273        Accumulated history of molecule, with fields "creator", "version", "routine".
274    connectivity : list of tuples of int, optional
275        (nbond, 3) list of (0-indexed) (atomA, atomB, bond_order) (int, int, double) tuples
276
277    EFP extension (this + units is minimal)
278
279    fragment_files : list of str
280        (nfr, ) lowercased names of efp meat fragment files.
281    hint_types : {'xyzabc', 'points'}
282        (nfr, ) type of fragment orientation hint.
283    geom_hints : list of lists of float
284        (nfr, ) inner lists have length 6 (xyzabc; to orient the center) or
285        9 (points; to orient the first three atoms) of the EFP fragment.
286
287    QMVZ extension (geom_unsettled replaces geom)
288
289    geom_unsettled : list of lists of str
290        (nat, ) all-string Cartesian and/or zmat anchor and value contents
291        mixing anchors, values, and variables.
292    variables : list of pairs
293        (nvar, 2) pairs of variables (str) and values (float). May be incomplete.
294
295    Raises
296    ------
297    qcelemental.ValidationError
298        For most anything wrong.
299
300    """
301    # <<  domain sorting  >>
302    available_domains = ["qm", "efp", "qmvz"]
303    if domain not in available_domains:
304        raise ValidationError(
305            "Topology domain {} not available for processing. Choose among {}".format(domain, available_domains)
306        )
307
308    if domain == "qm" and (geom is None or np.asarray(geom).size == 0):
309        if missing_enabled_return == "none":
310            return {}
311        elif missing_enabled_return == "minimal":
312            geom = []
313        else:
314            raise ValidationError("""For domain 'qm', `geom` must be provided.""")
315    if domain == "efp" and (geom_hints is None or np.asarray(geom_hints, dtype=object).size == 0):
316        if missing_enabled_return == "none":
317            return {}
318        elif missing_enabled_return == "minimal":
319            geom_hints = []
320            fragment_files = []
321            hint_types = []
322        else:
323            raise ValidationError("""For domain 'efp', `geom_hints` must be provided.""")
324
325    molinit = {}
326    extern = False
327
328    processed = validate_and_fill_units(
329        name=name,
330        units=units,
331        input_units_to_au=input_units_to_au,
332        comment=comment,
333        provenance=provenance,
334        connectivity=connectivity,
335        always_return_iutau=False,
336    )
337    processed["provenance"] = provenance_stamp(__name__)
338    update_with_error(molinit, processed)
339
340    if domain == "efp":
341        processed = validate_and_fill_efp(fragment_files=fragment_files, hint_types=hint_types, geom_hints=geom_hints)
342        update_with_error(molinit, processed)
343        extern = bool(len(molinit["geom_hints"]))
344
345    if domain == "qm" or (domain == "efp" and geom is not None) or domain == "qmvz":
346        if domain == "qmvz":
347            processed = validate_and_fill_unsettled_geometry(geom_unsettled=geom_unsettled, variables=variables)
348            update_with_error(molinit, processed)
349            nat = len(molinit["geom_unsettled"])
350
351        else:
352            processed = validate_and_fill_geometry(geom=geom, tooclose=tooclose, copy=copy)
353            update_with_error(molinit, processed)
354            nat = molinit["geom"].shape[0] // 3
355
356        processed = validate_and_fill_nuclei(
357            nat,
358            elea=elea,
359            elez=elez,
360            elem=elem,
361            mass=mass,
362            real=real,
363            elbl=elbl,
364            speclabel=speclabel,
365            nonphysical=nonphysical,
366            mtol=mtol,
367            verbose=verbose,
368        )
369        update_with_error(molinit, processed)
370
371        processed = validate_and_fill_fragments(
372            nat,
373            fragment_separators=fragment_separators,
374            fragment_charges=fragment_charges,
375            fragment_multiplicities=fragment_multiplicities,
376        )
377        update_with_error(molinit, processed)
378
379        Z_available = molinit["elez"] * molinit["real"] * 1.0
380        processed = validate_and_fill_chgmult(
381            zeff=Z_available,
382            fragment_separators=molinit["fragment_separators"],
383            molecular_charge=molecular_charge,
384            fragment_charges=molinit["fragment_charges"],
385            molecular_multiplicity=molecular_multiplicity,
386            fragment_multiplicities=molinit["fragment_multiplicities"],
387            zero_ghost_fragments=zero_ghost_fragments,
388            verbose=verbose,
389        )
390        del molinit["fragment_charges"]  # sometimes safe update is too picky about overwriting v_a_f_fragments values
391        del molinit["fragment_multiplicities"]
392        update_with_error(molinit, processed)
393
394    extern = domain == "efp"
395
396    processed = validate_and_fill_frame(
397        extern=extern, fix_com=fix_com, fix_orientation=fix_orientation, fix_symmetry=fix_symmetry
398    )
399    update_with_error(molinit, processed)
400
401    if verbose >= 2:
402        print("RETURN FROM qcel.molparse.from_arrays(domain={})".format(domain.upper()))
403        pprint.pprint(molinit)
404
405    if not np_out:
406        molinit = unnp(molinit)
407
408    return molinit
409
410
411def validate_and_fill_units(
412    name=None,
413    units="Angstrom",
414    input_units_to_au=None,
415    comment=None,
416    provenance=None,
417    connectivity=None,
418    always_return_iutau=False,
419):
420    molinit = {}
421
422    if name is not None:
423        molinit["name"] = name
424
425    if comment is not None:
426        molinit["comment"] = comment
427
428    def validate_provenance(dicary):
429        expected_prov_keys = ["creator", "routine", "version"]
430        try:
431            prov_keys = sorted(dicary.keys())
432        except AttributeError:
433            raise ValidationError("Provenance entry is not dictionary: {}".format(dicary))
434
435        if prov_keys == expected_prov_keys:
436            if not isinstance(dicary["creator"], str):
437                raise ValidationError(
438                    """Provenance key 'creator' should be string of creating program's name: {}""".format(
439                        dicary["creator"]
440                    )
441                )
442            if not re.fullmatch(VERSION_PATTERN, dicary["version"], re.VERBOSE):
443                raise ValidationError(
444                    """Provenance key 'version' should be a valid PEP 440 string: {}""".format(dicary["version"])
445                )
446            if not isinstance(dicary["routine"], str):
447                raise ValidationError(
448                    """Provenance key 'routine' should be string of creating function's name: {}""".format(
449                        dicary["routine"]
450                    )
451                )
452            return True
453        else:
454            raise ValidationError("Provenance keys ({}) incorrect: {}".format(expected_prov_keys, prov_keys))
455
456    if provenance is None:
457        molinit["provenance"] = {}
458    else:
459        if validate_provenance(provenance):
460            molinit["provenance"] = deepcopy(provenance)
461
462    if connectivity is not None:
463        conn = []
464        try:
465            for (at1, at2, bondorder) in connectivity:
466                if not (float(at1)).is_integer() or at1 < 0:  # or at1 >= nat:
467                    raise ValidationError("""Connectivity first atom should be int [0, nat): {}""".format(at1))
468                if not (float(at2)).is_integer() or at2 < 0:  # or at2 >= nat:
469                    raise ValidationError("""Connectivity second atom should be int [0, nat): {}""".format(at2))
470                if bondorder < 0 or bondorder > 5:
471                    raise ValidationError("""Connectivity bond order should be float [0, 5]: {}""".format(bondorder))
472                conn.append((int(min(at1, at2)), int(max(at1, at2)), float(bondorder)))
473            conn.sort(key=lambda tup: tup[0])
474            molinit["connectivity"] = conn
475        except ValueError:
476            raise ValidationError(
477                "Connectivity entry is not of form [(at1, at2, bondorder), ...]: {}".format(connectivity)
478            )
479
480    if units.capitalize() in ["Angstrom", "Bohr"]:
481        molinit["units"] = units.capitalize()
482    else:
483        raise ValidationError("Invalid molecule geometry units: {}".format(units))
484
485    if molinit["units"] == "Bohr":
486        iutau = 1.0
487    elif molinit["units"] == "Angstrom":
488        iutau = 1.0 / constants.bohr2angstroms
489
490    if input_units_to_au is not None:
491        if abs(input_units_to_au - iutau) < 0.05:
492            iutau = input_units_to_au
493        else:
494            raise ValidationError(
495                """No big perturbations to physical constants! {} !~= {}""".format(iutau, input_units_to_au)
496            )
497
498    if always_return_iutau or input_units_to_au is not None:
499        molinit["input_units_to_au"] = iutau
500
501    return molinit
502
503
504def validate_and_fill_frame(extern, fix_com=None, fix_orientation=None, fix_symmetry=None):
505
506    if fix_com is True:
507        com = True
508    elif fix_com is False:
509        if extern:
510            raise ValidationError("Invalid fix_com ({}) with extern ({})".format(fix_com, extern))
511        else:
512            com = False
513    elif fix_com is None:
514        com = extern
515    else:
516        raise ValidationError("Invalid fix_com: {}".format(fix_com))
517
518    if fix_orientation is True:
519        orient = True
520    elif fix_orientation is False:
521        if extern:
522            raise ValidationError("Invalid fix_orientation ({}) with extern ({})".format(fix_orientation, extern))
523        else:
524            orient = False
525    elif fix_orientation is None:
526        orient = extern
527    else:
528        raise ValidationError("Invalid fix_orientation: {}".format(fix_orientation))
529
530    symm = None
531    if extern:
532        if fix_symmetry is None:
533            symm = "c1"
534        elif fix_symmetry.lower() == "c1":
535            symm = "c1"
536        else:
537            raise ValidationError("Invalid (non-C1) fix_symmetry ({}) with extern ({})".format(fix_symmetry, extern))
538    else:
539        if fix_symmetry is not None:
540            symm = fix_symmetry.lower()
541
542    molinit = {}
543    molinit["fix_com"] = com
544    molinit["fix_orientation"] = orient
545    if symm:
546        molinit["fix_symmetry"] = symm
547
548    return molinit
549
550
551def validate_and_fill_efp(fragment_files=None, hint_types=None, geom_hints=None):
552
553    if (
554        fragment_files is None
555        or hint_types is None
556        or geom_hints is None
557        or fragment_files == [None]
558        or hint_types == [None]
559        or geom_hints == [None]
560        or not (len(fragment_files) == len(hint_types) == len(geom_hints))
561    ):
562
563        raise ValidationError(
564            """Missing or inconsistent length among efp quantities: fragment_files ({}), hint_types ({}), and geom_hints ({})""".format(
565                fragment_files, hint_types, geom_hints
566            )
567        )
568
569    # NOTE: imposing case on file
570    try:
571        files = [f.lower() for f in fragment_files]
572    except AttributeError:
573        raise ValidationError("""fragment_files not strings: {}""".format(fragment_files))
574
575    if all(f in ["xyzabc", "points", "rotmat"] for f in hint_types):
576        types = hint_types
577    else:
578        raise ValidationError("""hint_types not among 'xyzabc', 'points', 'rotmat': {}""".format(hint_types))
579
580    hints = []
581    hlen = {"xyzabc": 6, "points": 9, "rotmat": 12}
582    for ifr, fr in enumerate(geom_hints):
583        try:
584            hint = [float(f) for f in fr]
585        except (ValueError, TypeError):
586            raise ValidationError("""Un float-able elements in geom_hints[{}]: {}""".format(ifr, fr))
587
588        htype = hint_types[ifr]
589        if len(hint) == hlen[htype]:
590            hints.append(hint)
591        else:
592            raise ValidationError("""EFP hint type {} not {} elements: {}""".format(htype, hlen[htype], hint))
593
594    return {"fragment_files": files, "hint_types": types, "geom_hints": hints}
595
596
597def validate_and_fill_geometry(geom=None, tooclose=0.1, copy=True):
598    """Check `geom` for overlapping atoms. Return flattened"""
599
600    npgeom = np.array(geom, copy=copy, dtype=float).reshape((-1, 3))
601
602    # Upper triangular
603    metric = tooclose ** 2
604    tooclose_inds = []
605    for x in range(npgeom.shape[0]):
606        diffs = npgeom[x] - npgeom[x + 1 :]
607        dists = np.einsum("ij,ij->i", diffs, diffs)
608
609        # Record issues
610        if np.any(dists < metric):
611            indices = np.where(dists < metric)[0]
612            tooclose_inds.extend([(x, y, dist) for y, dist in zip(indices + x + 1, dists[indices] ** 0.5)])
613
614    if tooclose_inds:
615        raise ValidationError(
616            """Following atoms are too close: {}""".format([(i, j, dist) for i, j, dist in tooclose_inds])
617        )
618
619    return {"geom": npgeom.reshape((-1))}
620
621
622def validate_and_fill_nuclei(
623    nat,
624    elea=None,
625    elez=None,
626    elem=None,
627    mass=None,
628    real=None,
629    elbl=None,
630    # processing details
631    speclabel=True,
632    nonphysical=False,
633    mtol=1.0e-3,
634    verbose=1,
635):
636    """Check the nuclear identity arrays for consistency and fill in knowable values."""
637
638    if elea is None:
639        elea = np.asarray([None] * nat)
640    else:
641        # -1 equivalent to None
642        elea = np.asarray(elea)
643        if -1 in elea:
644            elea = np.array([(None if at == -1 else at) for at in elea])  # Rebuild to change dtype if needed.
645
646    if elez is None:
647        elez = np.asarray([None] * nat)
648    else:
649        elez = np.asarray(elez)
650
651    if elem is None:
652        elem = np.asarray([None] * nat)
653    else:
654        elem = np.asarray(elem)
655
656    if mass is None:
657        mass = np.asarray([None] * nat)
658    else:
659        mass = np.asarray(mass)
660
661    if real is None:
662        real = np.asarray([None] * nat)
663    else:
664        real = np.asarray(real)
665
666    if elbl is None:
667        elbl = np.asarray([None] * nat)
668    else:
669        elbl = np.asarray(elbl)
670
671    if not ((nat,) == elea.shape == elez.shape == elem.shape == mass.shape == real.shape == elbl.shape):
672        raise ValidationError(
673            """Dimension mismatch natom {} among A {}, Z {}, E {}, mass {}, real {}, and elbl {}""".format(
674                (nat,), elea.shape, elez.shape, elem.shape, mass.shape, real.shape, elbl.shape
675            )
676        )
677
678    if nat:
679        A, Z, E, mass, real, label = zip(
680            *[
681                reconcile_nucleus(
682                    A=elea[at],
683                    Z=elez[at],
684                    E=elem[at],
685                    mass=mass[at],
686                    real=real[at],
687                    label=elbl[at],
688                    speclabel=speclabel,
689                    nonphysical=nonphysical,
690                    mtol=mtol,
691                    verbose=verbose,
692                )
693                for at in range(nat)
694            ]
695        )
696    else:
697        A = Z = E = mass = real = label = []
698    return {
699        "elea": np.array(A, dtype=int),
700        "elez": np.array(Z, dtype=int),
701        "elem": np.array(E),
702        "mass": np.array(mass, dtype=float),
703        "real": np.array(real, dtype=bool),
704        "elbl": np.array(label),
705    }
706
707
708def validate_and_fill_fragments(nat, fragment_separators=None, fragment_charges=None, fragment_multiplicities=None):
709    """Check consistency of fragment specifiers wrt type and length. For
710    charge & multiplicity, scientific defaults are not computed or applied;
711    rather, missing slots are filled with `None` for later processing.
712
713    """
714    if fragment_separators is None:
715        if fragment_charges is None and fragment_multiplicities is None:
716            frs = []  # np.array([], dtype=int)  # if empty, needs to be both ndarray and int
717            frc = [None]
718            frm = [None]
719        else:
720            raise ValidationError(
721                """Fragment quantities given without separation info: sep ({}), chg ({}), and mult ({})""".format(
722                    fragment_separators, fragment_charges, fragment_multiplicities
723                )
724            )
725    else:
726        trial_geom = np.zeros((nat, 3))
727        try:
728            split_geom = np.split(trial_geom, fragment_separators, axis=0)
729        except TypeError:
730            raise ValidationError(
731                """fragment_separators ({}) unable to perform trial np.split on geometry.""".format(fragment_separators)
732            )
733        if any(len(f) == 0 for f in split_geom):
734            if nat != 0:
735                raise ValidationError(
736                    """fragment_separators ({}) yields zero-length fragment(s) after trial np.split on geometry.""".format(
737                        split_geom
738                    )
739                )
740        if sum(len(f) for f in split_geom) != nat:
741            raise ValidationError(
742                """fragment_separators ({}) yields overlapping fragment(s) after trial np.split on geometry, possibly unsorted.""".format(
743                    split_geom
744                )
745            )
746        frs = fragment_separators
747        nfr = len(split_geom)
748
749        if fragment_charges is None:
750            frc = [None] * nfr
751        else:
752            try:
753                frc = [(f if f is None else float(f)) for f in fragment_charges]
754            except TypeError:
755                raise ValidationError("""fragment_charges not among None or float: {}""".format(fragment_charges))
756
757        if fragment_multiplicities is None:
758            frm = [None] * nfr
759        elif all(f is None or (isinstance(f, (int, np.integer)) and f >= 1) for f in fragment_multiplicities):
760            frm = fragment_multiplicities
761        else:
762            raise ValidationError(
763                """fragment_multiplicities not among None or positive integer: {}""".format(fragment_multiplicities)
764            )
765
766    if not (len(frc) == len(frm) == len(frs) + 1):
767        raise ValidationError(
768            """Dimension mismatch among fragment quantities: sep + 1 ({}), chg ({}), and mult({})""".format(
769                len(frs) + 1, len(frc), len(frm)
770            )
771        )
772
773    return {"fragment_separators": list(frs), "fragment_charges": frc, "fragment_multiplicities": frm}
774
775
776def validate_and_fill_unsettled_geometry(geom_unsettled, variables):
777    lgeom = [len(g) for g in geom_unsettled]
778
779    if lgeom[0] not in [0, 3]:
780        raise ValidationError("""First line must be Cartesian or single atom.""")
781
782    if any(l == 3 for l in lgeom) and not all((l in [3, 6]) for l in lgeom):
783        raise ValidationError(
784            """Mixing Cartesian and Zmat formats must occur in just that order once absolute frame established."""
785        )
786
787    allowed_to_follow = {0: [2], 2: [4], 3: [3, 6], 4: [6], 6: [3, 6]}
788
789    for il in range(len(lgeom) - 1):
790        if lgeom[il + 1] not in allowed_to_follow[lgeom[il]]:
791            raise ValidationError(
792                """This is not how a Zmat works - aim for lower triangular. Line len ({}) may be followed by line len ({}), not ({}).""".format(
793                    lgeom[il], allowed_to_follow[lgeom[il]], lgeom[il + 1]
794                )
795            )
796
797    if not all(len(v) == 2 for v in variables):
798        raise ValidationError("""Variables should come in pairs: {}""".format(variables))
799
800    vvars = [[str(v[0]), float(v[1])] for v in variables]
801
802    return {"geom_unsettled": geom_unsettled, "variables": vvars}
803