1#!/usr/bin/env python
2
3# Author: Andrew Jewett (jewett.aij at g mail)
4# License: MIT License  (See LICENSE.md)
5# Copyright (c) 2017, California Institute of Technology
6# All rights reserved.
7
8g_program_name = __file__.split('/')[-1]   # = 'charge_by_bond.py'
9g_date_str = '2020-11-04'
10g_version_str = '0.15.0'
11
12
13import sys
14import re
15from collections import defaultdict
16import re
17
18try:
19    from . import ttree_lex
20    from .lttree_styles import AtomStyle2ColNames, ColNames2AidAtypeMolid
21except (ImportError, SystemError, ValueError):
22    # not installed as a package
23    import ttree_lex
24    from lttree_styles import AtomStyle2ColNames, ColNames2AidAtypeMolid
25
26
27
28def LookupChargePairs(chargebyatomid,
29                      # bond_ids,
30                      # bond_types,
31                      # bond_pairs,
32                      lines_atoms,
33                      lines_bonds,
34                      lines_bond_list,
35                      lines_chargebybond,
36                      atom_style,
37                      section_name,
38                      prefix='',
39                      suffix=''):
40                      # bond_ids_offset=0):
41                      # report_progress = False):
42    """
43    LookupChargePairs() looks up partial-charge pair contributions from the
44    types of atoms participating in a bond.
45
46    Output:
47    ...It looks up the corresponding change in the partial charges for
48       each pair of atoms and stores this in the "chargebyatomid" dictionary.
49
50    Input (continued):
51       This function requires:
52    ...a list of bonded pairs of atoms
53        stored in the lines_bonds variable (from the "Data Bond List"
54                                       or "Data Bonds AtomId AtomId" sections)
55    ...and a list of atom types
56        stored in the lines_atoms variable (from the "Data Atoms" section)
57
58    ...and a list of charge-pairs-as-a-function-of-atom-types
59        stored in the lines_chargebybond (from the "Data Bonds By Type" section)
60
61    """
62
63    column_names = AtomStyle2ColNames(atom_style)
64    i_atomid, i_atomtype, i_molid = ColNames2AidAtypeMolid(column_names)
65
66    atomids = []
67    atomtypes = []
68    atomids2types = {}
69
70    for iv in range(0, len(lines_atoms)):
71        line = lines_atoms[iv].strip()
72        if '#' in line:
73            icomment = line.find('#')
74            line = (line[:icomment]).strip()
75        if len(line) > 0:
76            tokens = ttree_lex.SplitQuotedString(line)
77            if ((len(tokens) <= i_atomid) or (len(tokens) <= i_atomtype)):
78                sys.stderr.write("\"" + line + "\"\n")
79                raise(ttree_lex.InputError(
80                    'Error not enough columns on line ' + str(iv + 1) + ' of \"Atoms\" section.'))
81            tokens = ttree_lex.SplitQuotedString(line)
82            atomid = ttree_lex.EscCharStrToChar(tokens[i_atomid])
83            atomids.append(atomid)
84            atomtype = ttree_lex.EscCharStrToChar(tokens[i_atomtype])
85            atomtypes.append(atomtype)
86            atomids2types[atomid] = atomtype
87
88    #assert(isinstance(bond_ids, list))
89    #assert(isinstance(bond_types, list))
90    #assert(isinstance(bond_pairs, list))
91    #del bond_ids[:]
92    #del bond_types[:]
93    #del bond_pairs[:]
94    bond_pairs = []
95
96    for ie in range(0, len(lines_bond_list)):
97        line = lines_bond_list[ie].strip()
98        if '#' in line:
99            icomment = line.find('#')
100            line = (line[:icomment]).strip()
101        if len(line) == 0:
102            continue
103        tokens = ttree_lex.SplitQuotedString(line)
104        if len(tokens) == 3:
105            # bond_ids.append(ttree_lex.EscCharStrToChar(tokens[0]))
106            bond_pairs.append((ttree_lex.EscCharStrToChar(tokens[1]),
107                               ttree_lex.EscCharStrToChar(tokens[2])))
108        else:
109            raise(ttree_lex.InputError('Incorrect number of columns on line ' +
110                                       str(ie + 1) + ' of \"' + section_name + '\" section.'))
111
112    for ie in range(0, len(lines_bonds)):
113        line = lines_bonds[ie].strip()
114        if '#' in line:
115            icomment = line.find('#')
116            line = (line[:icomment]).strip()
117        if len(line) == 0:
118            continue
119        tokens = ttree_lex.SplitQuotedString(line)
120        if len(tokens) == 4:
121            # bond_ids.append(ttree_lex.EscCharStrToChar(tokens[0]))
122            # bond_types.append(ttree_lex.EscCharStrToChar(tokens[1]))
123            bond_pairs.append((ttree_lex.EscCharStrToChar(tokens[2]),
124                               ttree_lex.EscCharStrToChar(tokens[3])))
125        else:
126            raise(ttree_lex.InputError('Incorrect number of columns on line ' +
127                                       str(ie + 1) + ' of \"' + section_name + '\" section.'))
128
129    # for ie in range(0, len(lines_bonds_atomid_atomid)):
130    #    line = lines_bonds_atomid_atomid[ie].strip()
131    #    if '#' in line:
132    #        icomment = line.find('#')
133    #        line = (line[:icomment]).strip()
134    #    if len(line) == 0:
135    #        continue
136    #    tokens = ttree_lex.SplitQuotedString(line)
137    #    if len(tokens) == 2:
138    #        #bondid_n = bond_ids_offset + len(bond_ids) + 1
139    #        #bond_ids.append(prefix+str(bondid_n)+suffix)
140    #        bond_pairs.append( (ttree_lex.EscCharStrToChar(tokens[0]),
141    #                            ttree_lex.EscCharStrToChar(tokens[1])) )
142    #    else:
143    #        raise(ttree_lex.InputError('Incorrect number of columns on line '+str(ie+1)+' of \"'+section_name+'\" section.'))
144
145    #assert(len(bond_types) == 0)
146    typepattern_to_chargepairs = []
147    warning_unassigned_chargepairs = None
148
149    for i in range(0, len(lines_chargebybond)):
150        line = lines_chargebybond[i].strip()
151        if '#' in line:
152            icomment = line.find('#')
153            line = (line[:icomment]).strip()
154        if len(line) > 0:
155            tokens = ttree_lex.SplitQuotedString(line)
156
157            if (len(tokens) != 4):
158                raise(ttree_lex.InputError('Error: Wrong number of columns in the \"Charge Pairs By Type\" section of data file.\n'
159                                           'Offending line:\n' +
160                                           '\"' + line + '\"\n'
161                                           'Expected 4 columns\n'))
162
163            chargepair = (float(tokens[2]),
164                          float(tokens[3]))
165
166            typepattern = []
167
168            for typestr in tokens[:2]:
169                if ttree_lex.HasRE(typestr):
170                    regex_str = VarNameToRegex(typestr)
171                    typepattern.append(re.compile(regex_str))
172                else:
173                    typepattern.append(ttree_lex.EscCharStrToChar(typestr))
174
175            typepattern_to_chargepairs.append([typepattern, chargepair])
176
177    for atomid1, atomid2 in bond_pairs:
178
179        if atomid1 not in atomids2types:
180            raise ttree_lex.InputError('Error: atom \"' + atomid1 + '\" not defined in \"Data Atoms\".\n'
181                                       '       This usually happens when the user mistypes one of the names of the\n'
182                                       '       $atoms in either a \"Data Atoms\" or \"Data Bond List\" section.\n'
183                                       '       To find out where the mistake occured, search the \n'
184                                       '       \"ttree_assignments.txt\" file for:\n'
185                                       '       \"' + atomid1 + '\"\n')
186
187        if atomid2 not in atomids2types:
188            raise ttree_lex.InputError('Error: atom \"' + atomid2 + '\" not defined in \"Data Atoms\".\n'
189                                       '       This usually happens when the user mistypes one of the names of the\n'
190                                       '       $atoms in either a \"Data Atoms\" or \"Data Bond List\" section.\n'
191                                       '       To find out where the mistake occured, search the \n'
192                                       '       \"ttree_assignments.txt\" file for:\n'
193                                       '       \"' + atomid2 + '\"\n')
194
195        atomtype1 = atomids2types[atomid1]
196        atomtype2 = atomids2types[atomid2]
197
198        found = False
199        for typepattern, chargepair in typepattern_to_chargepairs:
200            # use string comparisons to check if atom types match the pattern
201            if ttree_lex.MatchesAll((atomtype1, atomtype2), typepattern):
202                # ("MatchesAll()" defined in "ttree_lex.py")
203                chargebyatomid[atomid1] += chargepair[0]
204                chargebyatomid[atomid2] += chargepair[1]
205                found = True
206            elif ttree_lex.MatchesAll((atomtype2, atomtype1), typepattern):
207                chargebyatomid[atomid1] += chargepair[1]
208                chargebyatomid[atomid2] += chargepair[0]
209                found = True
210        if (not found) and (not warning_unassigned_chargepairs):
211            warning_unassigned_chargepairs = (atomid1, atomid2)
212
213    if warning_unassigned_chargepairs:
214        sys.stderr.write('---------------------------------------------------------------------------\n'
215                         'Warning: bonds found between atoms with no partial-charge rules.\n'
216                         '         This means that somewhere you are using a force-field\n'
217                         '         which assigns atomic charge according to the bonds these atoms\n'
218                         '         participate in, AND at least one pair of bonded atoms does NOT have\n'
219                         '         a rule defined to assign charges to that pair of atoms.\n'
220                         '         This can happen if there is a problem with the force-field file\n'
221                         '         OR if you are defining the charges for these atoms manually\n'
222                         '         In the later case, it is not a problem.\n'
223                         '         The first bond with this problem is between this pair of atoms:\n'
224                         '            ' +
225                         str(warning_unassigned_chargepairs[0]) + '\n'
226                         '            ' +
227                         str(warning_unassigned_chargepairs[1]) + '\n'
228                         '---------------------------------------------------------------------------\n')
229
230def main():
231    """
232    This is is a "main module" wrapper for invoking chargepairs_by_type.py
233    as a stand alone program.  This program:
234    This program reads a LAMMPS data file (or an excerpt of a LAMMPS)
235    data file containing bonded many-body interactions by atom type
236    (and bond type), and generates a list of atom charges in LAMMPS input
237    script format consistent with those types (to the standard out).
238
239    Typical Usage:
240
241    chargepairs_by_type.py -atoms atoms.data \\
242                           -bonds bonds.data \\
243                           -chargebybond chargepairs_by_type.data \\
244                           > list_of_atom_charges.in
245
246    """
247
248    #######  Main Code Below: #######
249    sys.stderr.write(g_program_name + ' v' +
250                     g_version_str + ' ' + g_date_str + ' ')
251    if sys.version < '3':
252        sys.stderr.write(' (python version < 3)\n')
253    else:
254        sys.stderr.write('\n')
255
256    try:
257        fname_atoms = None
258        fname_bonds = None
259        fname_bond_list = None
260        fname_chargebybond = None
261        section_name = 'Data Bond List'  # (This will be replaced later.)
262        atom_style = 'full'
263        prefix = ''
264        suffix = ''
265        bond_lack_types = False
266
267        argv = [arg for arg in sys.argv]
268
269        # Loop over the remaining arguments not processed yet.
270        # These arguments are specific to the lttree.py program
271        # and are not understood by ttree.py:
272        i = 1
273        while i < len(argv):
274            #sys.stderr.write('argv['+str(i)+'] = \"'+argv[i]+'\"\n')
275            if ((argv[i].lower() == '-?') or
276                    (argv[i].lower() == '--?') or
277                    (argv[i].lower() == '-help') or
278                    (argv[i].lower() == '-help')):
279                if i + 1 >= len(argv):
280                    sys.stdout.write(man_page_text + '\n')
281                    sys.exit(0)
282
283            elif argv[i].lower() == '-atoms':
284                if i + 1 >= len(argv):
285                    raise ttree_lex.InputError('Error: ' + argv[i] + ' flag should be followed by a file name containing lines of\n'
286                                               '       text which might appear in the "Atoms" section of a LAMMPS data file.\n')
287                fname_atoms = argv[i + 1]
288                del(argv[i:i + 2])
289
290            elif argv[i].lower() == '-bonds':
291                if i + 1 >= len(argv):
292                    raise ttree_lex.InputError('Error: ' + argv[i] + ' flag should be followed by a file name containing lines of\n'
293                                               '       text which might appear in the "Bonds" section of a LAMMPS data file.\n')
294                fname_bonds = argv[i + 1]
295                del(argv[i:i + 2])
296
297            elif argv[i].lower() == '-bond-list':
298                if i + 1 >= len(argv):
299                    raise ttree_lex.InputError(
300                        'Error: ' + argv[i] + ' flag should be followed by a file name\n')
301                    # raise ttree_lex.InputError('Error: '+argv[i]+' flag should be followed by a file name containing lines of\n'
302                    #                 '       text which might appear in the "Bonds No Types" section of a LAMMPS data file.\n')
303                fname_bond_list = argv[i + 1]
304                section_name = "Data Bond List"
305                del(argv[i:i + 2])
306
307            elif ((argv[i].lower() == '-chargebybond') or
308                  (argv[i].lower() == '-chargesbybond') or
309                  (argv[i].lower() == '-charge-by-bond') or
310                  (argv[i].lower() == '-charges-by-bond') or
311                  (argv[i].lower() == '-chargepairsbytype') or
312                  (argv[i].lower() == '-chargepairs-by-type') or
313                  (argv[i].lower() == '-charge-pairs-by-type')):
314                if i + 1 >= len(argv):
315                    raise ttree_lex.InputError(
316                        'Error: ' + argv[i] + ' flag should be followed by a file name\n')
317
318                    # raise ttree_lex.InputError('Error: '+argv[i]+' flag should be followed by a file name containing\n'
319                    #                 '       text which might appear in the "'+section_name+' By Type" section\n'
320                    #                 '       of a LAMMPS data file.\n')
321                fname_chargebybond = argv[i + 1]
322                del(argv[i:i + 2])
323
324            elif ((argv[i].lower() == '-atom-style') or
325                  (argv[i].lower() == '-atom_style')):
326                if i + 1 >= len(argv):
327                    raise ttree_lex.InputError('Error: ' + argv[i] + ' flag should be followed by a an atom_style name.\n'
328                                               '       (Or single quoted string which includes a space-separated\n'
329                                               '       list of column names.)\n')
330                atom_style = argv[i + 1]
331                del(argv[i:i + 2])
332
333            elif argv[i][0] == '-':
334                raise ttree_lex.InputError('Error(' + g_program_name + '):\n'
335                                           'Unrecogized command line argument \"' + argv[i] + '\"\n')
336            else:
337                i += 1
338
339        if len(argv) != 1:
340            # if there are more than 2 remaining arguments,
341            problem_args = ['\"' + arg + '\"' for arg in argv[1:]]
342            raise ttree_lex.InputError('Syntax Error(' + g_program_name + '):\n\n'
343                                       '       Problem with argument list.\n'
344                                       '       The remaining arguments are:\n\n'
345                                       '         ' +
346                                       (' '.join(problem_args)) + '\n\n'
347                                       '       (The actual problem may be earlier in the argument list.)\n')
348
349        #bond_types = []
350        fatoms = open(fname_atoms, 'r')
351        lines_bonds = []
352        lines_bond_list = []
353        fbonds = fbond_list = None
354        try:
355            if fname_bonds != None:
356                fbonds = open(fname_bonds, 'r')
357                lines_bonds = fbonds.readlines()
358                fbonds.close()
359        except IOError:
360            pass
361        try:
362            if fname_bond_list != None:
363                fbond_list = open(fname_bond_list, 'r')
364                lines_bond_list = fbond_list.readlines()
365                fbond_list.close()
366        except IOError:
367            pass
368        if ((len(lines_bonds) == 0) and (len(lines_bond_list) == 0)):
369            sys.stderr.write('Error(' + g_program_name + '): No bonds defined for this system\n'
370                             '      (This error may be a bug in moltemplate.)\n')
371        fchargebybond = open(fname_chargebybond, 'r')
372        lines_atoms = fatoms.readlines()
373
374        lines_chargebybond = fchargebybond.readlines()
375        fatoms.close()
376        fchargebybond.close()
377        chargebyatomid = defaultdict(float)
378
379        LookupChargePairs(chargebyatomid,
380                          lines_atoms,
381                          lines_bonds,
382                          lines_bond_list,
383                          lines_chargebybond,
384                          atom_style,
385                          section_name)
386
387        for atomid, charge in chargebyatomid.items():
388            sys.stdout.write('  set atom ' + str(atomid) +
389                             ' charge ' + str(charge) + '\n')
390
391    except (ValueError, ttree_lex.InputError) as err:
392        sys.stderr.write('\n' + str(err) + '\n')
393        sys.exit(-1)
394
395
396
397if __name__ == "__main__":
398    main()
399
400