qcelemental/molparse/from_arrays.py

import pprint
import re
from copy import deepcopy

import numpy as np

from ..exceptions import ValidationError
from ..physical_constants import constants
from ..util import provenance_stamp, unnp, update_with_error
from .chgmult import validate_and_fill_chgmult
from .nucleus import reconcile_nucleus
from .regex import VERSION_PATTERN


def from_input_arrays(
    *,
    enable_qm=True,
    enable_efp=True,
    missing_enabled_return_qm="error",
    missing_enabled_return_efp="error",
    # qm
    geom=None,
    elea=None,
    elez=None,
    elem=None,
    mass=None,
    real=None,
    elbl=None,
    name=None,
    units="Angstrom",
    input_units_to_au=None,
    fix_com=None,
    fix_orientation=None,
    fix_symmetry=None,
    fragment_separators=None,
    fragment_charges=None,
    fragment_multiplicities=None,
    molecular_charge=None,
    molecular_multiplicity=None,
    # efp
    fragment_files=None,
    hint_types=None,
    geom_hints=None,
    # qm-vz
    geom_unsettled=None,
    variables=None,
    # processing details
    speclabel=True,
    tooclose=0.1,
    zero_ghost_fragments=False,
    nonphysical=False,
    mtol=1.0e-3,
    copy=True,
    verbose=1,
):
    """Compose a Molecule dict from unvalidated arrays and variables
    in multiple domains.

    Drives :py:func:`qcelemental.molparse.from_arrays` for sucessive
    domains and hooks them together (e.g., impose `fix_com` on "qm"
    when "efp" present.

    """
    molinit = {}
    if enable_qm:
        molinit["qm"] = {}
    if enable_efp:
        molinit["efp"] = {}

    if enable_efp:
        processed = from_arrays(
            domain="efp",
            missing_enabled_return=missing_enabled_return_efp,
            units=units,
            input_units_to_au=input_units_to_au,
            fix_com=fix_com,
            fix_orientation=fix_orientation,
            fix_symmetry=fix_symmetry,
            fragment_files=fragment_files,
            hint_types=hint_types,
            geom_hints=geom_hints,
            # which other processing details needed?
            verbose=verbose,
        )
        update_with_error(molinit, {"efp": processed})
        if molinit["efp"] == {}:
            del molinit["efp"]

    efp_present = enable_efp and "efp" in molinit and bool(len(molinit["efp"]["geom_hints"]))
    if efp_present:
        fix_com = True
        fix_orientation = True
        fix_symmetry = "c1"

    if enable_qm:
        dm = "qmvz" if geom_unsettled else "qm"
        processed = from_arrays(
            domain=dm,
            missing_enabled_return=missing_enabled_return_qm,
            geom=geom,
            elea=elea,
            elez=elez,
            elem=elem,
            mass=mass,
            real=real,
            elbl=elbl,
            name=name,
            units=units,
            input_units_to_au=input_units_to_au,
            fix_com=fix_com,
            fix_orientation=fix_orientation,
            fix_symmetry=fix_symmetry,
            fragment_separators=fragment_separators,
            fragment_charges=fragment_charges,
            fragment_multiplicities=fragment_multiplicities,
            molecular_charge=molecular_charge,
            molecular_multiplicity=molecular_multiplicity,
            geom_unsettled=geom_unsettled,
            variables=variables,
            # processing details
            speclabel=speclabel,
            tooclose=tooclose,
            zero_ghost_fragments=zero_ghost_fragments,
            nonphysical=nonphysical,
            mtol=mtol,
            copy=copy,
            verbose=1,
        )
        update_with_error(molinit, {"qm": processed})
        if molinit["qm"] == {}:
            del molinit["qm"]

    return molinit


def from_arrays(
    *,
    geom=None,
    elea=None,
    elez=None,
    elem=None,
    mass=None,
    real=None,
    elbl=None,
    name=None,
    units="Angstrom",
    input_units_to_au=None,
    fix_com=None,
    fix_orientation=None,
    fix_symmetry=None,
    fragment_separators=None,
    fragment_charges=None,
    fragment_multiplicities=None,
    molecular_charge=None,
    molecular_multiplicity=None,
    comment=None,
    provenance=None,
    connectivity=None,
    fragment_files=None,
    hint_types=None,
    geom_hints=None,
    geom_unsettled=None,
    variables=None,
    domain="qm",
    missing_enabled_return: str = "error",
    np_out: bool = True,
    speclabel: bool = True,
    tooclose: float = 0.1,
    zero_ghost_fragments=False,
    nonphysical: bool = False,
    mtol=1.0e-3,
    copy=True,
    verbose=1,
):
    r"""Compose a Molecule dict from unvalidated arrays and variables, returning dict.

    See fields of Return molrec below. Required parameters (for QM XYZ)
    are `geom` and one of `elem`, `elez`, `elbl` (`speclabel=True`)

    Parameters
    ----------
    geom : Union[List[List[float]], numpy.ndarray]
        (nat, 3) or (3 * nat, ) ndarray or list o'lists of Cartesian coordinates.
    fragment_separators : Union[List[int], numpy.ndarray]
        (nfr - 1, ) list of atom indices at which to split `geom` into fragments.
    elbl : Union[List[str], numpy.ndarray]
        (nat, ) Label extending `elem` symbol, possibly conveying ghosting, isotope, mass, tagging information.
    tooclose
        Interatom distance (native `geom` units) nearer than which atoms not allowed.
    nonphysical
        Do allow masses outside an element's natural range to pass validation?
    speclabel
        If `True`, interpret `elbl` as potentially full nucleus spec including
        ghosting, isotope, mass, tagging information, e.g., `@13C_mine` or
        `He4@4.01`. If `False`, interpret `elbl` as only the user/tagging
        extension to nucleus label, e.g. `_mine` or `4` in the previous examples.
    missing_enabled_return
        {'minimal', 'none', 'error'}
        What to do when an enabled domain is of zero-length? Respectively, return
        a fully valid but empty molrec, return empty dictionary, or throw error.
    np_out
        When `True`, fields geom, elea, elez, elem, mass, real, elbl will be ndarray.
        Use `False` to get a json-able version.

    Returns
    -------
    molrec : dict
        Molecule dictionary spec follows. Its principles are

        (1) contents are fully validated and defaulted - no error
        checking necessary,

        (2) contents may be mildly redundant - atomic numbers and
        element symbols present,

        (3) big system, nat-length single-type arrays, not small system,
        nat-number heterogeneous objects,

        (4) some fields are optional (e.g., fix_symmetry) but largely
        self-describing so units or fix_com must be present.

        (5) apart from some mild optional fields, _all_ fields will
        be present (corollary of "fully validated and defaulted") - no
        need to check for every key. in some cases like efp, keys will
        appear in blocks, so pre-handshake there will be a few hint keys
        and post-handshake they will be joined by full qm-like molrec.

        (6) molrec should be idempotent through this function (equiv to
        schema validator) but are not idempotent throughout its life. if
        fields permit, frame may be changed. Future? if fields permit,
        mol may be symmetrized. Coordinates and angles may change units
        or range if program returns them in only one form.

    name : str, optional
        Label for molecule; should be valid Python identifier.
    units : {'Angstrom', 'Bohr'}
        Units for `geom`.
    input_units_to_au : float, optional
        If `units='Angstrom'`, overrides consumer's value for [A]-->[a0] conversion.
    fix_com : bool
        Whether translation of `geom` is allowed or disallowed.
    fix_orientation : bool
        Whether rotation of `geom` is allowed or disallowed.
    fix_symmetry : str, optional
        Maximal point group symmetry which `geom` should be treated. Lowercase.
    geom : ndarray of float
        (3 * nat, ) Cartesian coordinates in `units`.
    elea : ndarray of int
        (nat, ) Mass number for atoms, if known isotope, else -1.
    elez : ndarray of int
        (nat, ) Number of protons, nuclear charge for atoms.
    elem : ndarray of str
        (nat, ) Element symbol for atoms.
    mass : ndarray of float
        (nat, ) Atomic mass [u] for atoms.
    real : ndarray of bool
        (nat, ) Real/ghostedness for atoms.
    elbl : ndarray of str
        (nat, ) Label with any tagging information from element spec.
    fragment_separators : list of int
        (nfr - 1, ) list of atom indices at which to split `geom` into fragments.
    fragment_charges : list of float
        (nfr, ) list of charge allocated to each fragment.
    fragment_multiplicities : list of int
        (nfr, ) list of multiplicity allocated to each fragment.
    molecular_charge : float
        total charge on system.
    molecular_multiplicity : int
        total multiplicity on system.
    comment : str, optional
        Additional comment for molecule.
    provenance : dict of str
        Accumulated history of molecule, with fields "creator", "version", "routine".
    connectivity : list of tuples of int, optional
        (nbond, 3) list of (0-indexed) (atomA, atomB, bond_order) (int, int, double) tuples

    EFP extension (this + units is minimal)

    fragment_files : list of str
        (nfr, ) lowercased names of efp meat fragment files.
    hint_types : {'xyzabc', 'points'}
        (nfr, ) type of fragment orientation hint.
    geom_hints : list of lists of float
        (nfr, ) inner lists have length 6 (xyzabc; to orient the center) or
        9 (points; to orient the first three atoms) of the EFP fragment.

    QMVZ extension (geom_unsettled replaces geom)

    geom_unsettled : list of lists of str
        (nat, ) all-string Cartesian and/or zmat anchor and value contents
        mixing anchors, values, and variables.
    variables : list of pairs
        (nvar, 2) pairs of variables (str) and values (float). May be incomplete.

    Raises
    ------
    qcelemental.ValidationError
        For most anything wrong.

    """
    # <<  domain sorting  >>
    available_domains = ["qm", "efp", "qmvz"]
    if domain not in available_domains:
        raise ValidationError(
            "Topology domain {} not available for processing. Choose among {}".format(domain, available_domains)
        )

    if domain == "qm" and (geom is None or np.asarray(geom).size == 0):
        if missing_enabled_return == "none":
            return {}
        elif missing_enabled_return == "minimal":
            geom = []
        else:
            raise ValidationError("""For domain 'qm', `geom` must be provided.""")
    if domain == "efp" and (geom_hints is None or np.asarray(geom_hints, dtype=object).size == 0):
        if missing_enabled_return == "none":
            return {}
        elif missing_enabled_return == "minimal":
            geom_hints = []
            fragment_files = []
            hint_types = []
        else:
            raise ValidationError("""For domain 'efp', `geom_hints` must be provided.""")

    molinit = {}
    extern = False

    processed = validate_and_fill_units(
        name=name,
        units=units,
        input_units_to_au=input_units_to_au,
        comment=comment,
        provenance=provenance,
        connectivity=connectivity,
        always_return_iutau=False,
    )
    processed["provenance"] = provenance_stamp(__name__)
    update_with_error(molinit, processed)

    if domain == "efp":
        processed = validate_and_fill_efp(fragment_files=fragment_files, hint_types=hint_types, geom_hints=geom_hints)
        update_with_error(molinit, processed)
        extern = bool(len(molinit["geom_hints"]))

    if domain == "qm" or (domain == "efp" and geom is not None) or domain == "qmvz":
        if domain == "qmvz":
            processed = validate_and_fill_unsettled_geometry(geom_unsettled=geom_unsettled, variables=variables)
            update_with_error(molinit, processed)
            nat = len(molinit["geom_unsettled"])

        else:
            processed = validate_and_fill_geometry(geom=geom, tooclose=tooclose, copy=copy)
            update_with_error(molinit, processed)
            nat = molinit["geom"].shape[0] // 3

        processed = validate_and_fill_nuclei(
            nat,
            elea=elea,
            elez=elez,
            elem=elem,
            mass=mass,
            real=real,
            elbl=elbl,
            speclabel=speclabel,
            nonphysical=nonphysical,
            mtol=mtol,
            verbose=verbose,
        )
        update_with_error(molinit, processed)

        processed = validate_and_fill_fragments(
            nat,
            fragment_separators=fragment_separators,
            fragment_charges=fragment_charges,
            fragment_multiplicities=fragment_multiplicities,
        )
        update_with_error(molinit, processed)

        Z_available = molinit["elez"] * molinit["real"] * 1.0
        processed = validate_and_fill_chgmult(
            zeff=Z_available,
            fragment_separators=molinit["fragment_separators"],
            molecular_charge=molecular_charge,
            fragment_charges=molinit["fragment_charges"],
            molecular_multiplicity=molecular_multiplicity,
            fragment_multiplicities=molinit["fragment_multiplicities"],
            zero_ghost_fragments=zero_ghost_fragments,
            verbose=verbose,
        )
        del molinit["fragment_charges"]  # sometimes safe update is too picky about overwriting v_a_f_fragments values
        del molinit["fragment_multiplicities"]
        update_with_error(molinit, processed)

    extern = domain == "efp"

    processed = validate_and_fill_frame(
        extern=extern, fix_com=fix_com, fix_orientation=fix_orientation, fix_symmetry=fix_symmetry
    )
    update_with_error(molinit, processed)

    if verbose >= 2:
        print("RETURN FROM qcel.molparse.from_arrays(domain={})".format(domain.upper()))
        pprint.pprint(molinit)

    if not np_out:
        molinit = unnp(molinit)

    return molinit


def validate_and_fill_units(
    name=None,
    units="Angstrom",
    input_units_to_au=None,
    comment=None,
    provenance=None,
    connectivity=None,
    always_return_iutau=False,
):
    molinit = {}

    if name is not None:
        molinit["name"] = name

    if comment is not None:
        molinit["comment"] = comment

    def validate_provenance(dicary):
        expected_prov_keys = ["creator", "routine", "version"]
        try:
            prov_keys = sorted(dicary.keys())
        except AttributeError:
            raise ValidationError("Provenance entry is not dictionary: {}".format(dicary))

        if prov_keys == expected_prov_keys:
            if not isinstance(dicary["creator"], str):
                raise ValidationError(
                    """Provenance key 'creator' should be string of creating program's name: {}""".format(
                        dicary["creator"]
                    )
                )
            if not re.fullmatch(VERSION_PATTERN, dicary["version"], re.VERBOSE):
                raise ValidationError(
                    """Provenance key 'version' should be a valid PEP 440 string: {}""".format(dicary["version"])
                )
            if not isinstance(dicary["routine"], str):
                raise ValidationError(
                    """Provenance key 'routine' should be string of creating function's name: {}""".format(
                        dicary["routine"]
                    )
                )
            return True
        else:
            raise ValidationError("Provenance keys ({}) incorrect: {}".format(expected_prov_keys, prov_keys))

    if provenance is None:
        molinit["provenance"] = {}
    else:
        if validate_provenance(provenance):
            molinit["provenance"] = deepcopy(provenance)

    if connectivity is not None:
        conn = []
        try:
            for (at1, at2, bondorder) in connectivity:
                if not (float(at1)).is_integer() or at1 < 0:  # or at1 >= nat:
                    raise ValidationError("""Connectivity first atom should be int [0, nat): {}""".format(at1))
                if not (float(at2)).is_integer() or at2 < 0:  # or at2 >= nat:
                    raise ValidationError("""Connectivity second atom should be int [0, nat): {}""".format(at2))
                if bondorder < 0 or bondorder > 5:
                    raise ValidationError("""Connectivity bond order should be float [0, 5]: {}""".format(bondorder))
                conn.append((int(min(at1, at2)), int(max(at1, at2)), float(bondorder)))
            conn.sort(key=lambda tup: tup[0])
            molinit["connectivity"] = conn
        except ValueError:
            raise ValidationError(
                "Connectivity entry is not of form [(at1, at2, bondorder), ...]: {}".format(connectivity)
            )

    if units.capitalize() in ["Angstrom", "Bohr"]:
        molinit["units"] = units.capitalize()
    else:
        raise ValidationError("Invalid molecule geometry units: {}".format(units))

    if molinit["units"] == "Bohr":
        iutau = 1.0
    elif molinit["units"] == "Angstrom":
        iutau = 1.0 / constants.bohr2angstroms

    if input_units_to_au is not None:
        if abs(input_units_to_au - iutau) < 0.05:
            iutau = input_units_to_au
        else:
            raise ValidationError(
                """No big perturbations to physical constants! {} !~= {}""".format(iutau, input_units_to_au)
            )

    if always_return_iutau or input_units_to_au is not None:
        molinit["input_units_to_au"] = iutau

    return molinit


def validate_and_fill_frame(extern, fix_com=None, fix_orientation=None, fix_symmetry=None):

    if fix_com is True:
        com = True
    elif fix_com is False:
        if extern:
            raise ValidationError("Invalid fix_com ({}) with extern ({})".format(fix_com, extern))
        else:
            com = False
    elif fix_com is None:
        com = extern
    else:
        raise ValidationError("Invalid fix_com: {}".format(fix_com))

    if fix_orientation is True:
        orient = True
    elif fix_orientation is False:
        if extern:
            raise ValidationError("Invalid fix_orientation ({}) with extern ({})".format(fix_orientation, extern))
        else:
            orient = False
    elif fix_orientation is None:
        orient = extern
    else:
        raise ValidationError("Invalid fix_orientation: {}".format(fix_orientation))

    symm = None
    if extern:
        if fix_symmetry is None:
            symm = "c1"
        elif fix_symmetry.lower() == "c1":
            symm = "c1"
        else:
            raise ValidationError("Invalid (non-C1) fix_symmetry ({}) with extern ({})".format(fix_symmetry, extern))
    else:
        if fix_symmetry is not None:
            symm = fix_symmetry.lower()

    molinit = {}
    molinit["fix_com"] = com
    molinit["fix_orientation"] = orient
    if symm:
        molinit["fix_symmetry"] = symm

    return molinit


def validate_and_fill_efp(fragment_files=None, hint_types=None, geom_hints=None):

    if (
        fragment_files is None
        or hint_types is None
        or geom_hints is None
        or fragment_files == [None]
        or hint_types == [None]
        or geom_hints == [None]
        or not (len(fragment_files) == len(hint_types) == len(geom_hints))
    ):

        raise ValidationError(
            """Missing or inconsistent length among efp quantities: fragment_files ({}), hint_types ({}), and geom_hints ({})""".format(
                fragment_files, hint_types, geom_hints
            )
        )

    # NOTE: imposing case on file
    try:
        files = [f.lower() for f in fragment_files]
    except AttributeError:
        raise ValidationError("""fragment_files not strings: {}""".format(fragment_files))

    if all(f in ["xyzabc", "points", "rotmat"] for f in hint_types):
        types = hint_types
    else:
        raise ValidationError("""hint_types not among 'xyzabc', 'points', 'rotmat': {}""".format(hint_types))

    hints = []
    hlen = {"xyzabc": 6, "points": 9, "rotmat": 12}
    for ifr, fr in enumerate(geom_hints):
        try:
            hint = [float(f) for f in fr]
        except (ValueError, TypeError):
            raise ValidationError("""Un float-able elements in geom_hints[{}]: {}""".format(ifr, fr))

        htype = hint_types[ifr]
        if len(hint) == hlen[htype]:
            hints.append(hint)
        else:
            raise ValidationError("""EFP hint type {} not {} elements: {}""".format(htype, hlen[htype], hint))

    return {"fragment_files": files, "hint_types": types, "geom_hints": hints}


def validate_and_fill_geometry(geom=None, tooclose=0.1, copy=True):
    """Check `geom` for overlapping atoms. Return flattened"""

    npgeom = np.array(geom, copy=copy, dtype=float).reshape((-1, 3))

    # Upper triangular
    metric = tooclose ** 2
    tooclose_inds = []
    for x in range(npgeom.shape[0]):
        diffs = npgeom[x] - npgeom[x + 1 :]
        dists = np.einsum("ij,ij->i", diffs, diffs)

        # Record issues
        if np.any(dists < metric):
            indices = np.where(dists < metric)[0]
            tooclose_inds.extend([(x, y, dist) for y, dist in zip(indices + x + 1, dists[indices] ** 0.5)])

    if tooclose_inds:
        raise ValidationError(
            """Following atoms are too close: {}""".format([(i, j, dist) for i, j, dist in tooclose_inds])
        )

    return {"geom": npgeom.reshape((-1))}


def validate_and_fill_nuclei(
    nat,
    elea=None,
    elez=None,
    elem=None,
    mass=None,
    real=None,
    elbl=None,
    # processing details
    speclabel=True,
    nonphysical=False,
    mtol=1.0e-3,
    verbose=1,
):
    """Check the nuclear identity arrays for consistency and fill in knowable values."""

    if elea is None:
        elea = np.asarray([None] * nat)
    else:
        # -1 equivalent to None
        elea = np.asarray(elea)
        if -1 in elea:
            elea = np.array([(None if at == -1 else at) for at in elea])  # Rebuild to change dtype if needed.

    if elez is None:
        elez = np.asarray([None] * nat)
    else:
        elez = np.asarray(elez)

    if elem is None:
        elem = np.asarray([None] * nat)
    else:
        elem = np.asarray(elem)

    if mass is None:
        mass = np.asarray([None] * nat)
    else:
        mass = np.asarray(mass)

    if real is None:
        real = np.asarray([None] * nat)
    else:
        real = np.asarray(real)

    if elbl is None:
        elbl = np.asarray([None] * nat)
    else:
        elbl = np.asarray(elbl)

    if not ((nat,) == elea.shape == elez.shape == elem.shape == mass.shape == real.shape == elbl.shape):
        raise ValidationError(
            """Dimension mismatch natom {} among A {}, Z {}, E {}, mass {}, real {}, and elbl {}""".format(
                (nat,), elea.shape, elez.shape, elem.shape, mass.shape, real.shape, elbl.shape
            )
        )

    if nat:
        A, Z, E, mass, real, label = zip(
            *[
                reconcile_nucleus(
                    A=elea[at],
                    Z=elez[at],
                    E=elem[at],
                    mass=mass[at],
                    real=real[at],
                    label=elbl[at],
                    speclabel=speclabel,
                    nonphysical=nonphysical,
                    mtol=mtol,
                    verbose=verbose,
                )
                for at in range(nat)
            ]
        )
    else:
        A = Z = E = mass = real = label = []
    return {
        "elea": np.array(A, dtype=int),
        "elez": np.array(Z, dtype=int),
        "elem": np.array(E),
        "mass": np.array(mass, dtype=float),
        "real": np.array(real, dtype=bool),
        "elbl": np.array(label),
    }


def validate_and_fill_fragments(nat, fragment_separators=None, fragment_charges=None, fragment_multiplicities=None):
    """Check consistency of fragment specifiers wrt type and length. For
    charge & multiplicity, scientific defaults are not computed or applied;
    rather, missing slots are filled with `None` for later processing.

    """
    if fragment_separators is None:
        if fragment_charges is None and fragment_multiplicities is None:
            frs = []  # np.array([], dtype=int)  # if empty, needs to be both ndarray and int
            frc = [None]
            frm = [None]
        else:
            raise ValidationError(
                """Fragment quantities given without separation info: sep ({}), chg ({}), and mult ({})""".format(
                    fragment_separators, fragment_charges, fragment_multiplicities
                )
            )
    else:
        trial_geom = np.zeros((nat, 3))
        try:
            split_geom = np.split(trial_geom, fragment_separators, axis=0)
        except TypeError:
            raise ValidationError(
                """fragment_separators ({}) unable to perform trial np.split on geometry.""".format(fragment_separators)
            )
        if any(len(f) == 0 for f in split_geom):
            if nat != 0:
                raise ValidationError(
                    """fragment_separators ({}) yields zero-length fragment(s) after trial np.split on geometry.""".format(
                        split_geom
                    )
                )
        if sum(len(f) for f in split_geom) != nat:
            raise ValidationError(
                """fragment_separators ({}) yields overlapping fragment(s) after trial np.split on geometry, possibly unsorted.""".format(
                    split_geom
                )
            )
        frs = fragment_separators
        nfr = len(split_geom)

        if fragment_charges is None:
            frc = [None] * nfr
        else:
            try:
                frc = [(f if f is None else float(f)) for f in fragment_charges]
            except TypeError:
                raise ValidationError("""fragment_charges not among None or float: {}""".format(fragment_charges))

        if fragment_multiplicities is None:
            frm = [None] * nfr
        elif all(f is None or (isinstance(f, (int, np.integer)) and f >= 1) for f in fragment_multiplicities):
            frm = fragment_multiplicities
        else:
            raise ValidationError(
                """fragment_multiplicities not among None or positive integer: {}""".format(fragment_multiplicities)
            )

    if not (len(frc) == len(frm) == len(frs) + 1):
        raise ValidationError(
            """Dimension mismatch among fragment quantities: sep + 1 ({}), chg ({}), and mult({})""".format(
                len(frs) + 1, len(frc), len(frm)
            )
        )

    return {"fragment_separators": list(frs), "fragment_charges": frc, "fragment_multiplicities": frm}


def validate_and_fill_unsettled_geometry(geom_unsettled, variables):
    lgeom = [len(g) for g in geom_unsettled]

    if lgeom[0] not in [0, 3]:
        raise ValidationError("""First line must be Cartesian or single atom.""")

    if any(l == 3 for l in lgeom) and not all((l in [3, 6]) for l in lgeom):
        raise ValidationError(
            """Mixing Cartesian and Zmat formats must occur in just that order once absolute frame established."""
        )

    allowed_to_follow = {0: [2], 2: [4], 3: [3, 6], 4: [6], 6: [3, 6]}

    for il in range(len(lgeom) - 1):
        if lgeom[il + 1] not in allowed_to_follow[lgeom[il]]:
            raise ValidationError(
                """This is not how a Zmat works - aim for lower triangular. Line len ({}) may be followed by line len ({}), not ({}).""".format(
                    lgeom[il], allowed_to_follow[lgeom[il]], lgeom[il + 1]
                )
            )

    if not all(len(v) == 2 for v in variables):
        raise ValidationError("""Variables should come in pairs: {}""".format(variables))

    vvars = [[str(v[0]), float(v[1])] for v in variables]

    return {"geom_unsettled": geom_unsettled, "variables": vvars}