1#!/usr/bin/env python 2# -*- coding: utf-8 -*- 3# 4# Copyright (c) 2017, the cclib development team 5# 6# This file is part of cclib (http://cclib.github.io) and is distributed under 7# the terms of the BSD 3-Clause License. 8 9"""Script for loading data from computational chemistry files.""" 10 11 12import glob 13import logging 14import os.path 15import difflib 16from functools import partial 17from pprint import pprint 18 19# This is needed for testing purposes only. 20import sys 21 22import numpy 23 24from cclib.parser import ccData 25from cclib.io import ccread, URL_PATTERN 26 27 28# Set up options for pretty-printing output. 29pprint = partial(pprint, width=120, compact=True) 30numpy.set_printoptions(linewidth=120) 31 32 33def ccget(): 34 """Parse files with cclib based on command line arguments.""" 35 36 import argparse 37 38 parser = argparse.ArgumentParser() 39 40 parser.add_argument( 41 "attribute_or_compchemlogfile", nargs="+", 42 help="one or more attributes to be parsed from one ore more logfiles", 43 ) 44 45 group = parser.add_mutually_exclusive_group() 46 47 group.add_argument( 48 "--list", "-l", 49 action="store_true", 50 help="print a list of attributes available in each file", 51 ) 52 group.add_argument( 53 "--json", "-j", 54 action="store_true", 55 help="the given logfile is in CJSON format", 56 ) 57 group.add_argument( 58 "--multi", "-m", 59 action="store_true", 60 help="parse multiple input files as one input stream", 61 ) 62 63 parser.add_argument( 64 "--verbose", "-v", 65 action="store_true", 66 help="more verbose parsing output (only errors by default)", 67 ) 68 parser.add_argument( 69 "--future", "-u", 70 action="store_true", 71 help="use experimental features (currently optdone_as_list)", 72 ) 73 parser.add_argument( 74 "--full", "-f", 75 action="store_true", 76 help="toggle full print behaviour for attributes", 77 ) 78 79 args = parser.parse_args() 80 81 arglist = args.attribute_or_compchemlogfile 82 showattr = args.list 83 cjsonfile = args.json 84 multifile = args.multi 85 verbose = args.verbose 86 future = args.future 87 full = args.full 88 89 # Toggle full print behaviour for numpy arrays. 90 if full: 91 numpy.set_printoptions(threshold=numpy.inf) 92 93 # We need at least one attribute and the filename, so two arguments, or 94 # just one filename if we want to list attributes that can be extracted. 95 # In multifile mode, we generally want at least two filenames, so the 96 # expected number of arguments is a bit different. 97 if not multifile: 98 correct_number = (not showattr and len(arglist) > 1) or (showattr and len(arglist) > 0) 99 else: 100 correct_number = (not showattr and len(arglist) > 2) or (showattr and len(arglist) > 1) 101 if not correct_number: 102 print("The number of arguments does not seem to be correct.") 103 parser.print_usage() 104 parser.exit(1) 105 106 # Figure out which are the attribute names and which are the filenames or links. 107 # Note that in Linux, the shell expands wild cards, but not so in Windows, 108 # so try to do that here using glob. 109 attrnames = [] 110 filenames = [] 111 for arg in arglist: 112 if arg not in ccData._attrlist: 113 fuzzy_attr = difflib.get_close_matches(arg, ccData._attrlist, n=1, cutoff=0.85) 114 if len(fuzzy_attr) > 0: 115 fuzzy_attr = fuzzy_attr[0] 116 logging.warning("Attribute '{0}' not found, but attribute '{1}' is close. " 117 "Using '{1}' instead.".format(arg, fuzzy_attr)) 118 arg = fuzzy_attr 119 if arg in ccData._attrlist: 120 attrnames.append(arg) 121 elif URL_PATTERN.match(arg) or os.path.isfile(arg): 122 filenames.append(arg) 123 else: 124 wildcardmatches = glob.glob(arg) 125 if wildcardmatches: 126 filenames.extend(wildcardmatches) 127 else: 128 print("%s is neither a filename nor an attribute name." % arg) 129 parser.print_usage() 130 parser.exit(1) 131 132 # Since there is some ambiguity to the correct number of arguments, check 133 # that there is at least one filename (or two in multifile mode), and also 134 # at least one attribute to parse if the -l option was not passed. 135 if len(filenames) == 0: 136 print("No logfiles given") 137 parser.exit(1) 138 if multifile and len(filenames) == 1: 139 print("Expecting at least two logfiles in multifile mode") 140 parser.exit(1) 141 if not showattr and len(attrnames) == 0: 142 print("No attributes given") 143 parser.exit(1) 144 145 # This should be sufficient to correctly handle multiple files, that is to 146 # run the loop below only once with all logfiles in the variable `filename`. 147 # Although, perhaps it would be clearer to abstract the contents of the loop 148 # into another function. 149 if multifile: 150 filenames = [filenames] 151 152 # Now parse each file and print out the requested attributes. 153 for filename in filenames: 154 155 if multifile: 156 name = ", ".join(filename[:-1]) + " and " + filename[-1] 157 else: 158 name = filename 159 160 # The keyword dictionary are not used so much. but could be useful for 161 # passing options downstream. For example, we might use --future for 162 # triggering experimental or alternative behavior (as with optdone). 163 kwargs = {} 164 if verbose: 165 kwargs['verbose'] = True 166 kwargs['loglevel'] = logging.INFO 167 else: 168 kwargs['verbose'] = False 169 kwargs['loglevel'] = logging.ERROR 170 if future: 171 kwargs['future'] = True 172 if cjsonfile: 173 kwargs['cjson'] = True 174 175 print("Attempting to read %s" % name) 176 data = ccread(filename, **kwargs) 177 178 if data is None: 179 print("Cannot figure out the format of '%s'" % name) 180 print("Report this to the cclib development team if you think it is an error.") 181 print("\n" + parser.format_usage()) 182 parser.exit(1) 183 184 if showattr: 185 print("cclib can parse the following attributes from %s:" % name) 186 if cjsonfile: 187 for key in data: 188 print(key) 189 break 190 for attr in data._attrlist: 191 if hasattr(data, attr): 192 print(" %s" % attr) 193 else: 194 invalid = False 195 for attr in attrnames: 196 if cjsonfile: 197 if attr in data: 198 print("%s:\n%s" % (attr, data[attr])) 199 continue 200 else: 201 if hasattr(data, attr): 202 print(attr) 203 attr_val = getattr(data, attr) 204 # List of attributes to be printed with new lines 205 if attr in data._listsofarrays and full: 206 for val in attr_val: 207 pprint(val) 208 else: 209 pprint(attr_val) 210 continue 211 212 print("Could not parse %s from this file." % attr) 213 invalid = True 214 if invalid: 215 parser.print_help() 216 217 218if __name__ == "__main__": 219 220 ccget() 221