1import pprint 2import re 3from copy import deepcopy 4 5import numpy as np 6 7from ..exceptions import ValidationError 8from ..physical_constants import constants 9from ..util import provenance_stamp, unnp, update_with_error 10from .chgmult import validate_and_fill_chgmult 11from .nucleus import reconcile_nucleus 12from .regex import VERSION_PATTERN 13 14 15def from_input_arrays( 16 *, 17 enable_qm=True, 18 enable_efp=True, 19 missing_enabled_return_qm="error", 20 missing_enabled_return_efp="error", 21 # qm 22 geom=None, 23 elea=None, 24 elez=None, 25 elem=None, 26 mass=None, 27 real=None, 28 elbl=None, 29 name=None, 30 units="Angstrom", 31 input_units_to_au=None, 32 fix_com=None, 33 fix_orientation=None, 34 fix_symmetry=None, 35 fragment_separators=None, 36 fragment_charges=None, 37 fragment_multiplicities=None, 38 molecular_charge=None, 39 molecular_multiplicity=None, 40 # efp 41 fragment_files=None, 42 hint_types=None, 43 geom_hints=None, 44 # qm-vz 45 geom_unsettled=None, 46 variables=None, 47 # processing details 48 speclabel=True, 49 tooclose=0.1, 50 zero_ghost_fragments=False, 51 nonphysical=False, 52 mtol=1.0e-3, 53 copy=True, 54 verbose=1, 55): 56 """Compose a Molecule dict from unvalidated arrays and variables 57 in multiple domains. 58 59 Drives :py:func:`qcelemental.molparse.from_arrays` for sucessive 60 domains and hooks them together (e.g., impose `fix_com` on "qm" 61 when "efp" present. 62 63 """ 64 molinit = {} 65 if enable_qm: 66 molinit["qm"] = {} 67 if enable_efp: 68 molinit["efp"] = {} 69 70 if enable_efp: 71 processed = from_arrays( 72 domain="efp", 73 missing_enabled_return=missing_enabled_return_efp, 74 units=units, 75 input_units_to_au=input_units_to_au, 76 fix_com=fix_com, 77 fix_orientation=fix_orientation, 78 fix_symmetry=fix_symmetry, 79 fragment_files=fragment_files, 80 hint_types=hint_types, 81 geom_hints=geom_hints, 82 # which other processing details needed? 83 verbose=verbose, 84 ) 85 update_with_error(molinit, {"efp": processed}) 86 if molinit["efp"] == {}: 87 del molinit["efp"] 88 89 efp_present = enable_efp and "efp" in molinit and bool(len(molinit["efp"]["geom_hints"])) 90 if efp_present: 91 fix_com = True 92 fix_orientation = True 93 fix_symmetry = "c1" 94 95 if enable_qm: 96 dm = "qmvz" if geom_unsettled else "qm" 97 processed = from_arrays( 98 domain=dm, 99 missing_enabled_return=missing_enabled_return_qm, 100 geom=geom, 101 elea=elea, 102 elez=elez, 103 elem=elem, 104 mass=mass, 105 real=real, 106 elbl=elbl, 107 name=name, 108 units=units, 109 input_units_to_au=input_units_to_au, 110 fix_com=fix_com, 111 fix_orientation=fix_orientation, 112 fix_symmetry=fix_symmetry, 113 fragment_separators=fragment_separators, 114 fragment_charges=fragment_charges, 115 fragment_multiplicities=fragment_multiplicities, 116 molecular_charge=molecular_charge, 117 molecular_multiplicity=molecular_multiplicity, 118 geom_unsettled=geom_unsettled, 119 variables=variables, 120 # processing details 121 speclabel=speclabel, 122 tooclose=tooclose, 123 zero_ghost_fragments=zero_ghost_fragments, 124 nonphysical=nonphysical, 125 mtol=mtol, 126 copy=copy, 127 verbose=1, 128 ) 129 update_with_error(molinit, {"qm": processed}) 130 if molinit["qm"] == {}: 131 del molinit["qm"] 132 133 return molinit 134 135 136def from_arrays( 137 *, 138 geom=None, 139 elea=None, 140 elez=None, 141 elem=None, 142 mass=None, 143 real=None, 144 elbl=None, 145 name=None, 146 units="Angstrom", 147 input_units_to_au=None, 148 fix_com=None, 149 fix_orientation=None, 150 fix_symmetry=None, 151 fragment_separators=None, 152 fragment_charges=None, 153 fragment_multiplicities=None, 154 molecular_charge=None, 155 molecular_multiplicity=None, 156 comment=None, 157 provenance=None, 158 connectivity=None, 159 fragment_files=None, 160 hint_types=None, 161 geom_hints=None, 162 geom_unsettled=None, 163 variables=None, 164 domain="qm", 165 missing_enabled_return: str = "error", 166 np_out: bool = True, 167 speclabel: bool = True, 168 tooclose: float = 0.1, 169 zero_ghost_fragments=False, 170 nonphysical: bool = False, 171 mtol=1.0e-3, 172 copy=True, 173 verbose=1, 174): 175 r"""Compose a Molecule dict from unvalidated arrays and variables, returning dict. 176 177 See fields of Return molrec below. Required parameters (for QM XYZ) 178 are `geom` and one of `elem`, `elez`, `elbl` (`speclabel=True`) 179 180 Parameters 181 ---------- 182 geom : Union[List[List[float]], numpy.ndarray] 183 (nat, 3) or (3 * nat, ) ndarray or list o'lists of Cartesian coordinates. 184 fragment_separators : Union[List[int], numpy.ndarray] 185 (nfr - 1, ) list of atom indices at which to split `geom` into fragments. 186 elbl : Union[List[str], numpy.ndarray] 187 (nat, ) Label extending `elem` symbol, possibly conveying ghosting, isotope, mass, tagging information. 188 tooclose 189 Interatom distance (native `geom` units) nearer than which atoms not allowed. 190 nonphysical 191 Do allow masses outside an element's natural range to pass validation? 192 speclabel 193 If `True`, interpret `elbl` as potentially full nucleus spec including 194 ghosting, isotope, mass, tagging information, e.g., `@13C_mine` or 195 `He4@4.01`. If `False`, interpret `elbl` as only the user/tagging 196 extension to nucleus label, e.g. `_mine` or `4` in the previous examples. 197 missing_enabled_return 198 {'minimal', 'none', 'error'} 199 What to do when an enabled domain is of zero-length? Respectively, return 200 a fully valid but empty molrec, return empty dictionary, or throw error. 201 np_out 202 When `True`, fields geom, elea, elez, elem, mass, real, elbl will be ndarray. 203 Use `False` to get a json-able version. 204 205 Returns 206 ------- 207 molrec : dict 208 Molecule dictionary spec follows. Its principles are 209 210 (1) contents are fully validated and defaulted - no error 211 checking necessary, 212 213 (2) contents may be mildly redundant - atomic numbers and 214 element symbols present, 215 216 (3) big system, nat-length single-type arrays, not small system, 217 nat-number heterogeneous objects, 218 219 (4) some fields are optional (e.g., fix_symmetry) but largely 220 self-describing so units or fix_com must be present. 221 222 (5) apart from some mild optional fields, _all_ fields will 223 be present (corollary of "fully validated and defaulted") - no 224 need to check for every key. in some cases like efp, keys will 225 appear in blocks, so pre-handshake there will be a few hint keys 226 and post-handshake they will be joined by full qm-like molrec. 227 228 (6) molrec should be idempotent through this function (equiv to 229 schema validator) but are not idempotent throughout its life. if 230 fields permit, frame may be changed. Future? if fields permit, 231 mol may be symmetrized. Coordinates and angles may change units 232 or range if program returns them in only one form. 233 234 name : str, optional 235 Label for molecule; should be valid Python identifier. 236 units : {'Angstrom', 'Bohr'} 237 Units for `geom`. 238 input_units_to_au : float, optional 239 If `units='Angstrom'`, overrides consumer's value for [A]-->[a0] conversion. 240 fix_com : bool 241 Whether translation of `geom` is allowed or disallowed. 242 fix_orientation : bool 243 Whether rotation of `geom` is allowed or disallowed. 244 fix_symmetry : str, optional 245 Maximal point group symmetry which `geom` should be treated. Lowercase. 246 geom : ndarray of float 247 (3 * nat, ) Cartesian coordinates in `units`. 248 elea : ndarray of int 249 (nat, ) Mass number for atoms, if known isotope, else -1. 250 elez : ndarray of int 251 (nat, ) Number of protons, nuclear charge for atoms. 252 elem : ndarray of str 253 (nat, ) Element symbol for atoms. 254 mass : ndarray of float 255 (nat, ) Atomic mass [u] for atoms. 256 real : ndarray of bool 257 (nat, ) Real/ghostedness for atoms. 258 elbl : ndarray of str 259 (nat, ) Label with any tagging information from element spec. 260 fragment_separators : list of int 261 (nfr - 1, ) list of atom indices at which to split `geom` into fragments. 262 fragment_charges : list of float 263 (nfr, ) list of charge allocated to each fragment. 264 fragment_multiplicities : list of int 265 (nfr, ) list of multiplicity allocated to each fragment. 266 molecular_charge : float 267 total charge on system. 268 molecular_multiplicity : int 269 total multiplicity on system. 270 comment : str, optional 271 Additional comment for molecule. 272 provenance : dict of str 273 Accumulated history of molecule, with fields "creator", "version", "routine". 274 connectivity : list of tuples of int, optional 275 (nbond, 3) list of (0-indexed) (atomA, atomB, bond_order) (int, int, double) tuples 276 277 EFP extension (this + units is minimal) 278 279 fragment_files : list of str 280 (nfr, ) lowercased names of efp meat fragment files. 281 hint_types : {'xyzabc', 'points'} 282 (nfr, ) type of fragment orientation hint. 283 geom_hints : list of lists of float 284 (nfr, ) inner lists have length 6 (xyzabc; to orient the center) or 285 9 (points; to orient the first three atoms) of the EFP fragment. 286 287 QMVZ extension (geom_unsettled replaces geom) 288 289 geom_unsettled : list of lists of str 290 (nat, ) all-string Cartesian and/or zmat anchor and value contents 291 mixing anchors, values, and variables. 292 variables : list of pairs 293 (nvar, 2) pairs of variables (str) and values (float). May be incomplete. 294 295 Raises 296 ------ 297 qcelemental.ValidationError 298 For most anything wrong. 299 300 """ 301 # << domain sorting >> 302 available_domains = ["qm", "efp", "qmvz"] 303 if domain not in available_domains: 304 raise ValidationError( 305 "Topology domain {} not available for processing. Choose among {}".format(domain, available_domains) 306 ) 307 308 if domain == "qm" and (geom is None or np.asarray(geom).size == 0): 309 if missing_enabled_return == "none": 310 return {} 311 elif missing_enabled_return == "minimal": 312 geom = [] 313 else: 314 raise ValidationError("""For domain 'qm', `geom` must be provided.""") 315 if domain == "efp" and (geom_hints is None or np.asarray(geom_hints, dtype=object).size == 0): 316 if missing_enabled_return == "none": 317 return {} 318 elif missing_enabled_return == "minimal": 319 geom_hints = [] 320 fragment_files = [] 321 hint_types = [] 322 else: 323 raise ValidationError("""For domain 'efp', `geom_hints` must be provided.""") 324 325 molinit = {} 326 extern = False 327 328 processed = validate_and_fill_units( 329 name=name, 330 units=units, 331 input_units_to_au=input_units_to_au, 332 comment=comment, 333 provenance=provenance, 334 connectivity=connectivity, 335 always_return_iutau=False, 336 ) 337 processed["provenance"] = provenance_stamp(__name__) 338 update_with_error(molinit, processed) 339 340 if domain == "efp": 341 processed = validate_and_fill_efp(fragment_files=fragment_files, hint_types=hint_types, geom_hints=geom_hints) 342 update_with_error(molinit, processed) 343 extern = bool(len(molinit["geom_hints"])) 344 345 if domain == "qm" or (domain == "efp" and geom is not None) or domain == "qmvz": 346 if domain == "qmvz": 347 processed = validate_and_fill_unsettled_geometry(geom_unsettled=geom_unsettled, variables=variables) 348 update_with_error(molinit, processed) 349 nat = len(molinit["geom_unsettled"]) 350 351 else: 352 processed = validate_and_fill_geometry(geom=geom, tooclose=tooclose, copy=copy) 353 update_with_error(molinit, processed) 354 nat = molinit["geom"].shape[0] // 3 355 356 processed = validate_and_fill_nuclei( 357 nat, 358 elea=elea, 359 elez=elez, 360 elem=elem, 361 mass=mass, 362 real=real, 363 elbl=elbl, 364 speclabel=speclabel, 365 nonphysical=nonphysical, 366 mtol=mtol, 367 verbose=verbose, 368 ) 369 update_with_error(molinit, processed) 370 371 processed = validate_and_fill_fragments( 372 nat, 373 fragment_separators=fragment_separators, 374 fragment_charges=fragment_charges, 375 fragment_multiplicities=fragment_multiplicities, 376 ) 377 update_with_error(molinit, processed) 378 379 Z_available = molinit["elez"] * molinit["real"] * 1.0 380 processed = validate_and_fill_chgmult( 381 zeff=Z_available, 382 fragment_separators=molinit["fragment_separators"], 383 molecular_charge=molecular_charge, 384 fragment_charges=molinit["fragment_charges"], 385 molecular_multiplicity=molecular_multiplicity, 386 fragment_multiplicities=molinit["fragment_multiplicities"], 387 zero_ghost_fragments=zero_ghost_fragments, 388 verbose=verbose, 389 ) 390 del molinit["fragment_charges"] # sometimes safe update is too picky about overwriting v_a_f_fragments values 391 del molinit["fragment_multiplicities"] 392 update_with_error(molinit, processed) 393 394 extern = domain == "efp" 395 396 processed = validate_and_fill_frame( 397 extern=extern, fix_com=fix_com, fix_orientation=fix_orientation, fix_symmetry=fix_symmetry 398 ) 399 update_with_error(molinit, processed) 400 401 if verbose >= 2: 402 print("RETURN FROM qcel.molparse.from_arrays(domain={})".format(domain.upper())) 403 pprint.pprint(molinit) 404 405 if not np_out: 406 molinit = unnp(molinit) 407 408 return molinit 409 410 411def validate_and_fill_units( 412 name=None, 413 units="Angstrom", 414 input_units_to_au=None, 415 comment=None, 416 provenance=None, 417 connectivity=None, 418 always_return_iutau=False, 419): 420 molinit = {} 421 422 if name is not None: 423 molinit["name"] = name 424 425 if comment is not None: 426 molinit["comment"] = comment 427 428 def validate_provenance(dicary): 429 expected_prov_keys = ["creator", "routine", "version"] 430 try: 431 prov_keys = sorted(dicary.keys()) 432 except AttributeError: 433 raise ValidationError("Provenance entry is not dictionary: {}".format(dicary)) 434 435 if prov_keys == expected_prov_keys: 436 if not isinstance(dicary["creator"], str): 437 raise ValidationError( 438 """Provenance key 'creator' should be string of creating program's name: {}""".format( 439 dicary["creator"] 440 ) 441 ) 442 if not re.fullmatch(VERSION_PATTERN, dicary["version"], re.VERBOSE): 443 raise ValidationError( 444 """Provenance key 'version' should be a valid PEP 440 string: {}""".format(dicary["version"]) 445 ) 446 if not isinstance(dicary["routine"], str): 447 raise ValidationError( 448 """Provenance key 'routine' should be string of creating function's name: {}""".format( 449 dicary["routine"] 450 ) 451 ) 452 return True 453 else: 454 raise ValidationError("Provenance keys ({}) incorrect: {}".format(expected_prov_keys, prov_keys)) 455 456 if provenance is None: 457 molinit["provenance"] = {} 458 else: 459 if validate_provenance(provenance): 460 molinit["provenance"] = deepcopy(provenance) 461 462 if connectivity is not None: 463 conn = [] 464 try: 465 for (at1, at2, bondorder) in connectivity: 466 if not (float(at1)).is_integer() or at1 < 0: # or at1 >= nat: 467 raise ValidationError("""Connectivity first atom should be int [0, nat): {}""".format(at1)) 468 if not (float(at2)).is_integer() or at2 < 0: # or at2 >= nat: 469 raise ValidationError("""Connectivity second atom should be int [0, nat): {}""".format(at2)) 470 if bondorder < 0 or bondorder > 5: 471 raise ValidationError("""Connectivity bond order should be float [0, 5]: {}""".format(bondorder)) 472 conn.append((int(min(at1, at2)), int(max(at1, at2)), float(bondorder))) 473 conn.sort(key=lambda tup: tup[0]) 474 molinit["connectivity"] = conn 475 except ValueError: 476 raise ValidationError( 477 "Connectivity entry is not of form [(at1, at2, bondorder), ...]: {}".format(connectivity) 478 ) 479 480 if units.capitalize() in ["Angstrom", "Bohr"]: 481 molinit["units"] = units.capitalize() 482 else: 483 raise ValidationError("Invalid molecule geometry units: {}".format(units)) 484 485 if molinit["units"] == "Bohr": 486 iutau = 1.0 487 elif molinit["units"] == "Angstrom": 488 iutau = 1.0 / constants.bohr2angstroms 489 490 if input_units_to_au is not None: 491 if abs(input_units_to_au - iutau) < 0.05: 492 iutau = input_units_to_au 493 else: 494 raise ValidationError( 495 """No big perturbations to physical constants! {} !~= {}""".format(iutau, input_units_to_au) 496 ) 497 498 if always_return_iutau or input_units_to_au is not None: 499 molinit["input_units_to_au"] = iutau 500 501 return molinit 502 503 504def validate_and_fill_frame(extern, fix_com=None, fix_orientation=None, fix_symmetry=None): 505 506 if fix_com is True: 507 com = True 508 elif fix_com is False: 509 if extern: 510 raise ValidationError("Invalid fix_com ({}) with extern ({})".format(fix_com, extern)) 511 else: 512 com = False 513 elif fix_com is None: 514 com = extern 515 else: 516 raise ValidationError("Invalid fix_com: {}".format(fix_com)) 517 518 if fix_orientation is True: 519 orient = True 520 elif fix_orientation is False: 521 if extern: 522 raise ValidationError("Invalid fix_orientation ({}) with extern ({})".format(fix_orientation, extern)) 523 else: 524 orient = False 525 elif fix_orientation is None: 526 orient = extern 527 else: 528 raise ValidationError("Invalid fix_orientation: {}".format(fix_orientation)) 529 530 symm = None 531 if extern: 532 if fix_symmetry is None: 533 symm = "c1" 534 elif fix_symmetry.lower() == "c1": 535 symm = "c1" 536 else: 537 raise ValidationError("Invalid (non-C1) fix_symmetry ({}) with extern ({})".format(fix_symmetry, extern)) 538 else: 539 if fix_symmetry is not None: 540 symm = fix_symmetry.lower() 541 542 molinit = {} 543 molinit["fix_com"] = com 544 molinit["fix_orientation"] = orient 545 if symm: 546 molinit["fix_symmetry"] = symm 547 548 return molinit 549 550 551def validate_and_fill_efp(fragment_files=None, hint_types=None, geom_hints=None): 552 553 if ( 554 fragment_files is None 555 or hint_types is None 556 or geom_hints is None 557 or fragment_files == [None] 558 or hint_types == [None] 559 or geom_hints == [None] 560 or not (len(fragment_files) == len(hint_types) == len(geom_hints)) 561 ): 562 563 raise ValidationError( 564 """Missing or inconsistent length among efp quantities: fragment_files ({}), hint_types ({}), and geom_hints ({})""".format( 565 fragment_files, hint_types, geom_hints 566 ) 567 ) 568 569 # NOTE: imposing case on file 570 try: 571 files = [f.lower() for f in fragment_files] 572 except AttributeError: 573 raise ValidationError("""fragment_files not strings: {}""".format(fragment_files)) 574 575 if all(f in ["xyzabc", "points", "rotmat"] for f in hint_types): 576 types = hint_types 577 else: 578 raise ValidationError("""hint_types not among 'xyzabc', 'points', 'rotmat': {}""".format(hint_types)) 579 580 hints = [] 581 hlen = {"xyzabc": 6, "points": 9, "rotmat": 12} 582 for ifr, fr in enumerate(geom_hints): 583 try: 584 hint = [float(f) for f in fr] 585 except (ValueError, TypeError): 586 raise ValidationError("""Un float-able elements in geom_hints[{}]: {}""".format(ifr, fr)) 587 588 htype = hint_types[ifr] 589 if len(hint) == hlen[htype]: 590 hints.append(hint) 591 else: 592 raise ValidationError("""EFP hint type {} not {} elements: {}""".format(htype, hlen[htype], hint)) 593 594 return {"fragment_files": files, "hint_types": types, "geom_hints": hints} 595 596 597def validate_and_fill_geometry(geom=None, tooclose=0.1, copy=True): 598 """Check `geom` for overlapping atoms. Return flattened""" 599 600 npgeom = np.array(geom, copy=copy, dtype=float).reshape((-1, 3)) 601 602 # Upper triangular 603 metric = tooclose ** 2 604 tooclose_inds = [] 605 for x in range(npgeom.shape[0]): 606 diffs = npgeom[x] - npgeom[x + 1 :] 607 dists = np.einsum("ij,ij->i", diffs, diffs) 608 609 # Record issues 610 if np.any(dists < metric): 611 indices = np.where(dists < metric)[0] 612 tooclose_inds.extend([(x, y, dist) for y, dist in zip(indices + x + 1, dists[indices] ** 0.5)]) 613 614 if tooclose_inds: 615 raise ValidationError( 616 """Following atoms are too close: {}""".format([(i, j, dist) for i, j, dist in tooclose_inds]) 617 ) 618 619 return {"geom": npgeom.reshape((-1))} 620 621 622def validate_and_fill_nuclei( 623 nat, 624 elea=None, 625 elez=None, 626 elem=None, 627 mass=None, 628 real=None, 629 elbl=None, 630 # processing details 631 speclabel=True, 632 nonphysical=False, 633 mtol=1.0e-3, 634 verbose=1, 635): 636 """Check the nuclear identity arrays for consistency and fill in knowable values.""" 637 638 if elea is None: 639 elea = np.asarray([None] * nat) 640 else: 641 # -1 equivalent to None 642 elea = np.asarray(elea) 643 if -1 in elea: 644 elea = np.array([(None if at == -1 else at) for at in elea]) # Rebuild to change dtype if needed. 645 646 if elez is None: 647 elez = np.asarray([None] * nat) 648 else: 649 elez = np.asarray(elez) 650 651 if elem is None: 652 elem = np.asarray([None] * nat) 653 else: 654 elem = np.asarray(elem) 655 656 if mass is None: 657 mass = np.asarray([None] * nat) 658 else: 659 mass = np.asarray(mass) 660 661 if real is None: 662 real = np.asarray([None] * nat) 663 else: 664 real = np.asarray(real) 665 666 if elbl is None: 667 elbl = np.asarray([None] * nat) 668 else: 669 elbl = np.asarray(elbl) 670 671 if not ((nat,) == elea.shape == elez.shape == elem.shape == mass.shape == real.shape == elbl.shape): 672 raise ValidationError( 673 """Dimension mismatch natom {} among A {}, Z {}, E {}, mass {}, real {}, and elbl {}""".format( 674 (nat,), elea.shape, elez.shape, elem.shape, mass.shape, real.shape, elbl.shape 675 ) 676 ) 677 678 if nat: 679 A, Z, E, mass, real, label = zip( 680 *[ 681 reconcile_nucleus( 682 A=elea[at], 683 Z=elez[at], 684 E=elem[at], 685 mass=mass[at], 686 real=real[at], 687 label=elbl[at], 688 speclabel=speclabel, 689 nonphysical=nonphysical, 690 mtol=mtol, 691 verbose=verbose, 692 ) 693 for at in range(nat) 694 ] 695 ) 696 else: 697 A = Z = E = mass = real = label = [] 698 return { 699 "elea": np.array(A, dtype=int), 700 "elez": np.array(Z, dtype=int), 701 "elem": np.array(E), 702 "mass": np.array(mass, dtype=float), 703 "real": np.array(real, dtype=bool), 704 "elbl": np.array(label), 705 } 706 707 708def validate_and_fill_fragments(nat, fragment_separators=None, fragment_charges=None, fragment_multiplicities=None): 709 """Check consistency of fragment specifiers wrt type and length. For 710 charge & multiplicity, scientific defaults are not computed or applied; 711 rather, missing slots are filled with `None` for later processing. 712 713 """ 714 if fragment_separators is None: 715 if fragment_charges is None and fragment_multiplicities is None: 716 frs = [] # np.array([], dtype=int) # if empty, needs to be both ndarray and int 717 frc = [None] 718 frm = [None] 719 else: 720 raise ValidationError( 721 """Fragment quantities given without separation info: sep ({}), chg ({}), and mult ({})""".format( 722 fragment_separators, fragment_charges, fragment_multiplicities 723 ) 724 ) 725 else: 726 trial_geom = np.zeros((nat, 3)) 727 try: 728 split_geom = np.split(trial_geom, fragment_separators, axis=0) 729 except TypeError: 730 raise ValidationError( 731 """fragment_separators ({}) unable to perform trial np.split on geometry.""".format(fragment_separators) 732 ) 733 if any(len(f) == 0 for f in split_geom): 734 if nat != 0: 735 raise ValidationError( 736 """fragment_separators ({}) yields zero-length fragment(s) after trial np.split on geometry.""".format( 737 split_geom 738 ) 739 ) 740 if sum(len(f) for f in split_geom) != nat: 741 raise ValidationError( 742 """fragment_separators ({}) yields overlapping fragment(s) after trial np.split on geometry, possibly unsorted.""".format( 743 split_geom 744 ) 745 ) 746 frs = fragment_separators 747 nfr = len(split_geom) 748 749 if fragment_charges is None: 750 frc = [None] * nfr 751 else: 752 try: 753 frc = [(f if f is None else float(f)) for f in fragment_charges] 754 except TypeError: 755 raise ValidationError("""fragment_charges not among None or float: {}""".format(fragment_charges)) 756 757 if fragment_multiplicities is None: 758 frm = [None] * nfr 759 elif all(f is None or (isinstance(f, (int, np.integer)) and f >= 1) for f in fragment_multiplicities): 760 frm = fragment_multiplicities 761 else: 762 raise ValidationError( 763 """fragment_multiplicities not among None or positive integer: {}""".format(fragment_multiplicities) 764 ) 765 766 if not (len(frc) == len(frm) == len(frs) + 1): 767 raise ValidationError( 768 """Dimension mismatch among fragment quantities: sep + 1 ({}), chg ({}), and mult({})""".format( 769 len(frs) + 1, len(frc), len(frm) 770 ) 771 ) 772 773 return {"fragment_separators": list(frs), "fragment_charges": frc, "fragment_multiplicities": frm} 774 775 776def validate_and_fill_unsettled_geometry(geom_unsettled, variables): 777 lgeom = [len(g) for g in geom_unsettled] 778 779 if lgeom[0] not in [0, 3]: 780 raise ValidationError("""First line must be Cartesian or single atom.""") 781 782 if any(l == 3 for l in lgeom) and not all((l in [3, 6]) for l in lgeom): 783 raise ValidationError( 784 """Mixing Cartesian and Zmat formats must occur in just that order once absolute frame established.""" 785 ) 786 787 allowed_to_follow = {0: [2], 2: [4], 3: [3, 6], 4: [6], 6: [3, 6]} 788 789 for il in range(len(lgeom) - 1): 790 if lgeom[il + 1] not in allowed_to_follow[lgeom[il]]: 791 raise ValidationError( 792 """This is not how a Zmat works - aim for lower triangular. Line len ({}) may be followed by line len ({}), not ({}).""".format( 793 lgeom[il], allowed_to_follow[lgeom[il]], lgeom[il + 1] 794 ) 795 ) 796 797 if not all(len(v) == 2 for v in variables): 798 raise ValidationError("""Variables should come in pairs: {}""".format(variables)) 799 800 vvars = [[str(v[0]), float(v[1])] for v in variables] 801 802 return {"geom_unsettled": geom_unsettled, "variables": vvars} 803