1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3#
4# Copyright (c) 2017, the cclib development team
5#
6# This file is part of cclib (http://cclib.github.io) and is distributed under
7# the terms of the BSD 3-Clause License.
8
9"""Script for loading data from computational chemistry files."""
10
11
12import glob
13import logging
14import os.path
15import difflib
16from functools import partial
17from pprint import pprint
18
19# This is needed for testing purposes only.
20import sys
21
22import numpy
23
24from cclib.parser import ccData
25from cclib.io import ccread, URL_PATTERN
26
27
28# Set up options for pretty-printing output.
29pprint = partial(pprint, width=120, compact=True)
30numpy.set_printoptions(linewidth=120)
31
32
33def ccget():
34    """Parse files with cclib based on command line arguments."""
35
36    import argparse
37
38    parser = argparse.ArgumentParser()
39
40    parser.add_argument(
41        "attribute_or_compchemlogfile", nargs="+",
42        help="one or more attributes to be parsed from one ore more logfiles",
43    )
44
45    group = parser.add_mutually_exclusive_group()
46
47    group.add_argument(
48        "--list", "-l",
49        action="store_true",
50        help="print a list of attributes available in each file",
51    )
52    group.add_argument(
53        "--json", "-j",
54        action="store_true",
55        help="the given logfile is in CJSON format",
56    )
57    group.add_argument(
58        "--multi", "-m",
59        action="store_true",
60        help="parse multiple input files as one input stream",
61    )
62
63    parser.add_argument(
64        "--verbose", "-v",
65        action="store_true",
66        help="more verbose parsing output (only errors by default)",
67    )
68    parser.add_argument(
69        "--future", "-u",
70        action="store_true",
71        help="use experimental features (currently optdone_as_list)",
72    )
73    parser.add_argument(
74        "--full", "-f",
75        action="store_true",
76        help="toggle full print behaviour for attributes",
77    )
78
79    args = parser.parse_args()
80
81    arglist = args.attribute_or_compchemlogfile
82    showattr = args.list
83    cjsonfile = args.json
84    multifile = args.multi
85    verbose = args.verbose
86    future = args.future
87    full = args.full
88
89    # Toggle full print behaviour for numpy arrays.
90    if full:
91        numpy.set_printoptions(threshold=numpy.inf)
92
93    # We need at least one attribute and the filename, so two arguments, or
94    # just one filename if we want to list attributes that can be extracted.
95    # In multifile mode, we generally want at least two filenames, so the
96    # expected number of arguments is a bit different.
97    if not multifile:
98        correct_number = (not showattr and len(arglist) > 1) or (showattr and len(arglist) > 0)
99    else:
100        correct_number = (not showattr and len(arglist) > 2) or (showattr and len(arglist) > 1)
101    if not correct_number:
102        print("The number of arguments does not seem to be correct.")
103        parser.print_usage()
104        parser.exit(1)
105
106    # Figure out which are the attribute names and which are the filenames or links.
107    # Note that in Linux, the shell expands wild cards, but not so in Windows,
108    # so try to do that here using glob.
109    attrnames = []
110    filenames = []
111    for arg in arglist:
112        if arg not in ccData._attrlist:
113            fuzzy_attr = difflib.get_close_matches(arg, ccData._attrlist, n=1, cutoff=0.85)
114            if len(fuzzy_attr) > 0:
115                fuzzy_attr = fuzzy_attr[0]
116                logging.warning("Attribute '{0}' not found, but attribute '{1}' is close. "
117                    "Using '{1}' instead.".format(arg, fuzzy_attr))
118                arg = fuzzy_attr
119        if arg in ccData._attrlist:
120            attrnames.append(arg)
121        elif URL_PATTERN.match(arg) or os.path.isfile(arg):
122            filenames.append(arg)
123        else:
124            wildcardmatches = glob.glob(arg)
125            if wildcardmatches:
126                filenames.extend(wildcardmatches)
127            else:
128                print("%s is neither a filename nor an attribute name." % arg)
129                parser.print_usage()
130                parser.exit(1)
131
132    # Since there is some ambiguity to the correct number of arguments, check
133    # that there is at least one filename (or two in multifile mode), and also
134    # at least one attribute to parse if the -l option was not passed.
135    if len(filenames) == 0:
136        print("No logfiles given")
137        parser.exit(1)
138    if multifile and len(filenames) == 1:
139        print("Expecting at least two logfiles in multifile mode")
140        parser.exit(1)
141    if not showattr and len(attrnames) == 0:
142        print("No attributes given")
143        parser.exit(1)
144
145    # This should be sufficient to correctly handle multiple files, that is to
146    # run the loop below only once with all logfiles in the variable `filename`.
147    # Although, perhaps it would be clearer to abstract the contents of the loop
148    # into another function.
149    if multifile:
150        filenames = [filenames]
151
152    # Now parse each file and print out the requested attributes.
153    for filename in filenames:
154
155        if multifile:
156            name = ", ".join(filename[:-1]) + " and " + filename[-1]
157        else:
158            name = filename
159
160        # The keyword dictionary are not used so much. but could be useful for
161        # passing options downstream. For example, we might use --future for
162        # triggering experimental or alternative behavior (as with optdone).
163        kwargs = {}
164        if verbose:
165            kwargs['verbose'] = True
166            kwargs['loglevel'] = logging.INFO
167        else:
168            kwargs['verbose'] = False
169            kwargs['loglevel'] = logging.ERROR
170        if future:
171            kwargs['future'] = True
172        if cjsonfile:
173            kwargs['cjson'] = True
174
175        print("Attempting to read %s" % name)
176        data = ccread(filename, **kwargs)
177
178        if data is None:
179            print("Cannot figure out the format of '%s'" % name)
180            print("Report this to the cclib development team if you think it is an error.")
181            print("\n" + parser.format_usage())
182            parser.exit(1)
183
184        if showattr:
185            print("cclib can parse the following attributes from %s:" % name)
186            if cjsonfile:
187                for key in data:
188                    print(key)
189                break
190            for attr in data._attrlist:
191                if hasattr(data, attr):
192                    print("  %s" % attr)
193        else:
194            invalid = False
195            for attr in attrnames:
196                if cjsonfile:
197                    if attr in data:
198                        print("%s:\n%s" % (attr, data[attr]))
199                        continue
200                else:
201                    if hasattr(data, attr):
202                        print(attr)
203                        attr_val = getattr(data, attr)
204                        # List of attributes to be printed with new lines
205                        if attr in data._listsofarrays and full:
206                            for val in attr_val:
207                                pprint(val)
208                        else:
209                            pprint(attr_val)
210                        continue
211
212                print("Could not parse %s from this file." % attr)
213                invalid = True
214            if invalid:
215                parser.print_help()
216
217
218if __name__ == "__main__":
219
220    ccget()
221