1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3
4# Author: Andrew Jewett
5# License: MIT License  (See LICENSE.md)
6# Copyright (c) 2014
7
8"""
9dump2data.py
10
11Extract dynamical degrees of freedom from a lammps DUMP file (from the stdin)
12and construct a new DATA file (to the stdout).
13A reference DATA file is needed (argument).
14
15   basic usage
16./dump2data.py orig_file.data < dump.lammpstrj > new_file.data
17   (This extract last frame, uses "full" atom_style.)
18
19   options:
20./dump2data.py [-t t -atomstyle style] orig.data < dump.lammpstrj > new.data
21
22"""
23
24
25#g_program_name = 'dump2data.py'
26g_program_name = __file__.split('/')[-1]
27g_date_str = '2021-4-27'
28g_version_str = '0.61.0'
29
30import sys
31from collections import defaultdict
32from operator import itemgetter, attrgetter
33
34
35class InputError(Exception):
36
37    def __init__(self, err_msg):
38        self.err_msg = err_msg
39
40    def __str__(self):
41        return self.err_msg
42
43
44def ErrorLeader(infile, lineno):
45    return '\"' + infile + '\", line ' + str(lineno) + ': '
46
47
48class MiscSettings(object):
49
50    def __init__(self):
51        self.tstart = None
52        self.tstop = None
53        self.timestep_str = ''
54        self.last_frame = False
55        self.center_frame = False
56        self.output_format = 'data'
57        self.input_format = 'dump'
58        self.multi = True
59        self.skip_interval = 1
60        self.atom_type_intervals = []
61        self.atom_id_intervals = []
62        self.mol_id_intervals = []
63        self.scale = None
64        self.in_coord_file_name = ''
65
66class AtomStyleSettings(object):
67
68    def __init__(self):
69        # The following new member data indicate which columns store
70        # LAMMPS-specific information.
71        # The next 6 members store keep track of the different columns
72        # of the "Atoms" section of a LAMMPS data file:
73        self.column_names = []  # <--A list of column names (optional)
74        # <--A triplet of integers indicating which columns store coordinate data
75        self.i_coords = []
76        # self.ii_coords= [] #<--A list of triplets of column indexes storing
77        # coordinate data
78        self.ii_vects = []  # <--A list of triplets of column indexes storing directional data
79        #   (such as dipole or ellipsoid orientations)
80        self.i_atomid = None  # <--An integer indicating which column has the atomid
81        self.i_atomtype = None  # <--An integer indicating which column has the atomtype
82        self.i_molid = None  # <--An integer indicating which column has the molid, if applicable
83
84
85class DataSettings(AtomStyleSettings):
86
87    def __init__(self):
88        AtomStyleSettings.__init__(self)
89        self.contents = ''
90        self.file_name = ''
91
92
93# Atom Styles in LAMMPS as of 2011-7-29
94g_style_map = {'angle':     ['atom-ID', 'molecule-ID', 'atom-type', 'x', 'y', 'z'],
95               'atomic':    ['atom-ID', 'atom-type', 'x', 'y', 'z'],
96               'body':      ['atom-ID', 'atom-type', 'bodyflag', 'mass', 'x', 'y', 'z'],
97               'bond':      ['atom-ID', 'molecule-ID', 'atom-type', 'x', 'y', 'z'],
98               'charge':    ['atom-ID', 'atom-type', 'q', 'x', 'y', 'z'],
99               'dipole':    ['atom-ID', 'atom-type', 'q', 'x', 'y', 'z', 'mux', 'muy', 'muz'],
100               'dpd':       ['atom-ID', 'atom-type', 'theta', 'x', 'y', 'z'],
101               'electron':  ['atom-ID', 'atom-type', 'q', 'spin', 'eradius', 'x', 'y', 'z'],
102               'ellipsoid': ['atom-ID', 'atom-type', 'ellipsoidflag', 'density', 'x', 'y', 'z'],
103               'full':      ['atom-ID', 'molecule-ID', 'atom-type', 'q', 'x', 'y', 'z'],
104               'line':      ['atom-ID', 'molecule-ID', 'atom-type', 'lineflag', 'density', 'x', 'y', 'z'],
105               'meso':      ['atom-ID', 'atom-type', 'rho', 'e', 'cv', 'x', 'y', 'z'],
106               'molecular': ['atom-ID', 'molecule-ID', 'atom-type', 'x', 'y', 'z'],
107               'peri':      ['atom-ID', 'atom-type', 'volume', 'density', 'x', 'y', 'z'],
108               'smd':       ['atom-ID', 'atom-type', 'molecule-ID' 'volume', 'mass', 'kernel-radius', 'contact-radius', 'x', 'y', 'z'],
109               'sphere':    ['atom-ID', 'atom-type', 'diameter', 'density', 'x', 'y', 'z'],
110               'template':  ['atom-ID', 'molecule-ID', 'template-index', 'template-atom', 'atom-type', 'x', 'y', 'z'],
111               'tri':       ['atom-ID', 'molecule-ID', 'atom-type', 'triangleflag', 'density', 'x', 'y', 'z'],
112               'wavepacket': ['atom-ID', 'atom-type', 'charge', 'spin', 'eradius', 'etag', 'cs_re', 'cs_im', 'x', 'y', 'z'],
113               'hybrid':    ['atom-ID', 'atom-type', 'x', 'y', 'z'],
114               # The following styles were removed from LAMMPS as of 2012-3
115               'colloid':   ['atom-ID', 'atom-type', 'x', 'y', 'z'],
116               'granular':  ['atom-ID', 'atom-type', 'diameter', 'density', 'x', 'y', 'z']}
117
118
119def AtomStyle2ColNames(atom_style_string):
120
121    atom_style_string = atom_style_string.strip()
122    if len(atom_style_string) == 0:
123        raise InputError('Error(dump2data): Invalid atom_style\n'
124                         '       (The atom_style command was followed by an empty string.)\n')
125    atom_style_args = atom_style_string.split()
126    atom_style = atom_style_args[0]
127
128    hybrid_args = atom_style_args[1:]
129
130    if (atom_style not in g_style_map):
131        if (len(atom_style_args) >= 2):
132            # If the atom_style_string includes at least 2 words, then we
133            # interpret this as a list of the individual column names
134            return atom_style_args
135        else:
136            raise InputError(
137                'Error(dump2data): Unrecognized atom_style: \"' + atom_style + '\"\n')
138
139    if (atom_style != 'hybrid'):
140        return g_style_map[atom_style]
141    else:
142        column_names = ['atom-ID', 'atom-type', 'x', 'y', 'z']
143        if (len(hybrid_args) == 0):
144            raise InputError(
145                'Error(dump2data): atom_style hybrid must be followed by a sub_style.\n')
146        for sub_style in hybrid_args:
147            if (sub_style not in g_style_map):
148                raise InputError(
149                    'Error(dump2data): Unrecognized atom_style: \"' + sub_style + '\"\n')
150            for cname in g_style_map[sub_style]:
151                if cname not in column_names:
152                    column_names.append(cname)
153
154        return column_names
155
156
157def ColNames2AidAtypeMolid(column_names):
158    # Because of the diversity of ways that these
159    # numbers are referred to in the LAMMPS documentation,
160    # we have to be flexible and allow the user to refer
161    # to these quantities in a variety of ways.
162    # Hopefully this covers everything:
163
164    i_atomid = None
165    if 'atom-ID' in column_names:
166        i_atomid = column_names.index('atom-ID')
167    elif 'atom−ID' in column_names:  # (− is the character used in the manual)
168        i_atomid = column_names.index('atom−ID')
169    elif 'atomID' in column_names:
170        i_atomid = column_names.index('atomID')
171    elif 'atomid' in column_names:
172        i_atomid = column_names.index('atomid')
173    elif 'id' in column_names:
174        i_atomid = column_names.index('id')
175    elif 'atom' in column_names:
176        i_atomid = column_names.index('atom')
177    elif '$atom' in column_names:
178        i_atomid = column_names.index('$atom')
179    else:
180        raise InputError(
181            'Error(dump2data): List of column names lacks an \"atom-ID\"\n')
182
183    i_atomtype = None
184    if 'atom-type' in column_names:
185        i_atomtype = column_names.index('atom-type')
186    elif 'atom−type' in column_names:  # (− hyphen character used in manual)
187        i_atomtype = column_names.index('atom−type')
188    elif 'atomtype' in column_names:
189        i_atomtype = column_names.index('atomtype')
190    elif 'type' in column_names:
191        i_atomtype = column_names.index('type')
192    elif '@atom' in column_names:
193        i_atomtype = column_names.index('@atom')
194    else:
195        raise InputError(
196            'Error(dump2data): List of column names lacks an \"atom-type\"\n')
197
198    i_molid = None
199    if 'molecule-ID' in column_names:
200        i_molid = column_names.index('molecule-ID')
201    elif 'molecule−ID' in column_names:  # (− hyphen character used in manual)
202        i_molid = column_names.index('molecule−ID')
203    elif 'moleculeID' in column_names:
204        i_molid = column_names.index('moleculeID')
205    elif 'moleculeid' in column_names:
206        i_molid = column_names.index('moleculeid')
207    elif 'molecule' in column_names:
208        i_molid = column_names.index('molecule')
209    elif 'molID' in column_names:
210        i_molid = column_names.index('molID')
211    elif 'molid' in column_names:
212        i_molid = column_names.index('molid')
213    elif 'mol' in column_names:
214        i_molid = column_names.index('mol')
215    elif '$mol' in column_names:
216        i_molid = column_names.index('$mol')
217    else:
218        pass  # some atom_types do not have a valid molecule-ID
219
220    return i_atomid, i_atomtype, i_molid
221
222
223def ColNames2Coords(column_names):
224    """ Which of the columns correspond to coordinates
225        which must be transformed using rigid-body
226        (affine: rotation + translation) transformations?
227        This function outputs a list of lists of triplets of integers.
228
229    """
230    i_x = None
231    i_y = None
232    i_z = None
233    if 'x' in column_names:
234        i_x = column_names.index('x')
235    if 'y' in column_names:
236        i_y = column_names.index('y')
237    if 'z' in column_names:
238        i_z = column_names.index('z')
239    if (((i_x != None) != (i_y != None)) or
240            ((i_y != None) != (i_z != None)) or
241            ((i_z != None) != (i_x != None))):
242        raise InputError(
243            'Error(dump2data): columns must include \"x\", \"y\", and \"z\".\n')
244    return [[i_x, i_y, i_z]]
245
246
247def ColNames2Vects(column_names):
248    """ Which of the columns correspond to coordinates
249        which must be transformed using rotations?
250        Some coordinates like dipole moments and
251        ellipsoid orientations should only be rotated
252        (not translated).
253        This function outputs a list of lists of triplets of integers.
254
255    """
256    vects = []
257    i_mux = None
258    i_muy = None
259    i_muz = None
260    if 'mux' in column_names:
261        i_mux = column_names.index('mux')
262    if 'muy' in column_names:
263        i_muy = column_names.index('muy')
264    if 'muz' in column_names:
265        i_muz = column_names.index('muz')
266    if (((i_mux != None) != (i_muy != None)) or
267            ((i_muy != None) != (i_muz != None)) or
268            ((i_muz != None) != (i_mux != None))):
269        raise InputError(
270            'Error(dump2data): custom atom_style list must define mux, muy, and muz or none.\n')
271    if i_mux != None:
272        vects.append([i_mux, i_muy, i_muz])
273    i_quati = None
274    i_quatj = None
275    i_quatk = None
276    if 'quati' in column_names:
277        i_quati = column_names.index('quati')
278    if 'quatj' in column_names:
279        i_quatj = column_names.index('quatj')
280    if 'quatk' in column_names:
281        i_quatk = column_names.index('quatk')
282    if (((i_quati != None) != (i_quatj != None)) or
283            ((i_quatj != None) != (i_quatk != None)) or
284            ((i_quatk != None) != (i_quati != None))):
285        raise InputError(
286            'Error(dump2data): custom atom_style list must define quati, quatj, and quatk or none.\n')
287    if i_quati != None:
288        vects.append([i_quati, i_quatj, i_quatk])
289    return vects
290
291
292
293def Str2IntervalUnion(s):
294    """
295    Convert strings like '1-3,5,9-7' into [[1,3],[5,5],[9,7]]
296    """
297    try:
298        tokens = s.split(',')
299        intervals = []
300        for token in tokens:
301            a_str = token
302            b_str = token
303            i_separator = token.find('-')
304            if i_separator == -1:
305                i_separator = token.find('*')
306            if i_separator == -1:
307                i_separator = token.find(':')
308            if i_separator != -1:
309                a_str = token[:i_separator]
310                b_str = token[i_separator+1:]
311            if (len(a_str) == 0) or (a_str in ('-','*',':')):
312                a = 0
313            else:
314                a = int(a_str)
315            if (len(b_str) == 0) or (b_str in ('-','*',':')):
316                b = 0
317            else:
318                b = int(b_str)
319            intervals.append((a,b))
320        return intervals
321    except (ValueError) as err:
322        sys.stderr.write('Error: Incorrect format in numeric interval: "'+s+'"\n')
323        sys.exit(-1)
324
325
326
327
328def ParseArgs(argv,
329              misc_settings,
330              data_settings,
331              warning_strings=None):
332
333    # Loop over the remaining arguments not processed yet.
334    # These arguments are specific to the lttree.py program
335    # and are not understood by this program.
336    i = 1
337    while i < len(argv):
338        #sys.stderr.write('argv['+str(i)+'] = \"'+argv[i]+'\"\n')
339        if ((argv[i].lower() == '-atomstyle') or
340            (argv[i].lower() == '-atom_style') or
341            (argv[i].lower() == '-atom-style')):
342            in_init = []
343            if i + 1 >= len(argv):
344                raise InputError('Error(dump2data): ' + argv[i] + ' flag should be followed by a an atom_style name.\n'
345                                 '       (Or single quoted string which includes a space-separated\n'
346                                 '       list of column names.)\n')
347            data_settings.column_names = AtomStyle2ColNames(argv[i + 1])
348            sys.stderr.write('    \"Atoms\" column format:\n')
349            sys.stderr.write(
350                '    ' + (' '.join(data_settings.column_names)) + '\n')
351
352            # ColNames2Coords() and ColNames2Vects() generate lists of
353            # triplets of integers, storing the column numbers containing
354            # x, y, and z coordinate values, and vx,vy,vz direction vectors.
355            data_settings.ii_vects = ColNames2Vects(data_settings.column_names)
356            ii_coords = ColNames2Coords(data_settings.column_names)
357            # This program assumes that there is only one coordinate triplet
358            # (x,y,z) for each atom.  Hence we assume that len(ii_coords)==1
359            assert(len(ii_coords) == 1)
360            data_settings.i_coords = ii_coords[0]
361
362            # Now figure out which columns correspond to atomid, atomtype,
363            # molid
364            data_settings.i_atomid, data_settings.i_atomtype, data_settings.i_molid = ColNames2AidAtypeMolid(
365                data_settings.column_names)
366            del(argv[i:i + 2])
367
368        elif (argv[i].lower() == '-icoord'):
369            if i + 1 >= len(argv):
370                raise InputError('Error(dump2data): ' + argv[i] + ' flag should be followed by list of integers\n'
371                                 '       corresponding to column numbers for coordinates in\n'
372                                 '       the \"Atoms\" section of a LAMMPS data file.\n')
373            ilist = argv[i + 1].split()
374            if (len(ilist) % 3) != 0:
375                raise InputError('Error(dump2data): ' + argv[i] + ' flag should be followed by list of integers.\n'
376                                 '       This is usually a list of 3 intebers, but it can contain more.\n'
377                                 '       The number of cooridnate columns must be divisible by 3,\n'
378                                 '       (even if the simulation is in 2 dimensions)\n')
379
380            #ii_coords = []
381            # for i in range(0, len(ilist)/3):
382            #    cols = [ilist[3*i]+1, ilist[3*i+1]+1, ilist[3*i+2]+1]
383            #    ii_coords.append(cols)
384            # if ((len(ii_coords) != 0) or (len(ii_coords[0]) != 3)):
385            #    raise InputError('Error(dump2data): Argument \"'+argv[i]+'\" must be followed by exactly 3 integers.\n')
386
387            data_settings.i_coords = ilist
388            if (len(i_coords) != 3):
389                raise InputError('Error(dump2data): Argument \"' +
390                                 argv[i] + '\" must be followed by exactly 3 integers.\n')
391
392            data_settings.i_coords = ii_coords[0]
393
394            del(argv[i:i + 2])
395
396        elif (argv[i].lower() == '-ivect'):
397            if i + 1 >= len(argv):
398                raise InputError('Error(dump2data): ' + argv[i] + ' flag should be followed by list of integers\n'
399                                 '       corresponding to column numbers for direction vectors in\n'
400                                 '       the \"Atoms\" section of a LAMMPS data file.\n')
401            ilist = argv[i + 1].split()
402            if (len(ilist) % 3) != 0:
403                raise InputError('Error(dump2data): ' + argv[i] + ' flag should be followed by list of integers.\n'
404                                 '       This is usually a list of 3 intebers, but it can contain more.\n'
405                                 '       The number of cooridnate columns must be divisible by 3,\n'
406                                 '       (even if the simulation is in 2 dimensions)\n')
407
408            data_settings.ii_vects = []
409            for i in range(0, len(ilist) / 3):
410                cols = [ilist[3 * i] + 1, ilist[3 * i + 1] +
411                        1, ilist[3 * i + 2] + 1]
412                setting.ii_vects.append(cols)
413            # This should override any earlier settings as a result of the
414            # -atomstyle argument.  So you can specify a custom list of column
415            # names using -atomstyle "list of column names", and then afterwards
416            # specify which of these columns correspond to direction vectors
417            # using the "-ivect" command line argument later on.
418            # This way, in theory you should be able to read columns from
419            # new custom atom-styles that have not been invented yet.
420            # (Although I haven't tested this.)
421
422            del(argv[i:i + 2])
423        # i_atomid is not really needed for this program, but I load it anyway
424        elif ((argv[i].lower() == '-iatomid') or
425              (argv[i].lower() == '-iid') or
426              (argv[i].lower() == '-iatom-id')):
427            if ((i + 1 >= len(argv)) or (not str.isdigit(argv[i + 1]))):
428                raise InputError('Error(dump2data): ' + argv[i] + ' flag should be followed by an integer\n'
429                                 '       (>=1) indicating which column in the \"Atoms\" section of a\n'
430                                 '       LAMMPS data file contains the atom id number (typically 1).\n'
431                                 '       (This argument is unnecessary if you use the -atomstyle argument.)\n')
432            i_atomid = int(argv[i + 1]) - 1
433            del(argv[i:i + 2])
434        # i_atomtype is not really needed for this program, but I load it
435        # anyway
436        elif ((argv[i].lower() == '-iatomtype') or
437              (argv[i].lower() == '-itype') or
438              (argv[i].lower() == '-iatom-type')):
439            if ((i + 1 >= len(argv)) or (not str.isdigit(argv[i + 1]))):
440                raise InputError('Error(dump2data): ' + argv[i] + ' flag should be followed by an integer\n'
441                                 '       (>=1) indicating which column in the \"Atoms\" section of a\n'
442                                 '       LAMMPS data file contains the atom type.\n'
443                                 '       (This argument is unnecessary if you use the -atomstyle argument.)\n')
444            i_atomtype = int(argv[i + 1]) - 1
445            del(argv[i:i + 2])
446        # i_molid is not really needed for this program, but I load it anyway
447        elif ((argv[i].lower() == '-imolid') or
448              (argv[i].lower() == '-imol') or
449              (argv[i].lower() == '-imol-id') or
450              (argv[i].lower() == '-imoleculeid') or
451              (argv[i].lower() == '-imolecule-id')):
452            if ((i + 1 >= len(argv)) or (not str.isdigit(argv[i + 1]))):
453                raise InputError('Error(dump2data): ' + argv[i] + ' flag should be followed by an integer\n'
454                                 '       (>=1) indicating which column in the \"Atoms\" section of a\n'
455                                 '       LAMMPS data file contains the molecule id number.\n'
456                                 '       (This argument is unnecessary if you use the -atomstyle argument.)\n')
457            del(argv[i:i + 2])
458        # Which frame do we want?
459        elif (argv[i].lower() == '-t'):
460            if ((i + 1 >= len(argv)) or (not str.isdigit(argv[i + 1]))):
461                raise InputError('Error(dump2data): ' + argv[i] + ' flag should be followed by an integer indicating\n'
462                                 '       the frame you want to extract from the dump file (trajectory).\n'
463                                 '       This integer should match the timestep corresponding to the frame\n'
464                                 '       whose coordinates you wish to extract.\n')
465            misc_settings.timestep_str = argv[i + 1]
466            del(argv[i:i + 2])
467            misc_settings.multi = False
468            misc_settings.last_frame = False
469
470        elif (argv[i].lower() == '-tstart'):
471            if ((i + 1 >= len(argv)) or (not str.isdigit(argv[i + 1]))):
472                raise InputError('Error(dump2data): ' + argv[i] + ' flag should be followed by an integer indicating\n'
473                                 '       the first frame you want to extract from the dump file (trajectory).\n'
474                                 '       This integer should match the timestep corresponding to the frame\n'
475                                 '       (after which) you wish to extract coordinates.\n')
476            misc_settings.tstart = float(argv[i + 1])
477            del(argv[i:i + 2])
478            misc_settings.multi = True
479
480        elif (argv[i].lower() == '-tstop'):
481            if ((i + 1 >= len(argv)) or (not str.isdigit(argv[i + 1]))):
482                raise InputError('Error(dump2data): ' + argv[i] + ' flag should be followed by an number indicating\n'
483                                 '       the first frame you want to extract from the dump file (trajectory).\n'
484                                 '       Frames after this timestep will be ignored.\n')
485            misc_settings.tstop = float(argv[i + 1])
486            del(argv[i:i + 2])
487            misc_settings.multi = True
488
489        elif (argv[i].lower() == '-center'):
490            misc_settings.center_frame = True
491            del(argv[i:i + 1])
492
493        elif ((argv[i].lower() == '-raw') or (argv[i].lower() == '-rawout')):
494            misc_settings.output_format = 'raw'
495            del(argv[i:i + 1])
496
497        elif (argv[i].lower() == '-rawin'):
498            misc_settings.input_format = 'raw'
499            misc_settings.multi = False
500            del(argv[i:i + 1])
501
502        elif ((argv[i].lower() == '-xyz') or
503              (argv[i].lower() == '-xyz-type') or
504              (argv[i].lower() == '-xyzout')):
505            misc_settings.output_format = 'xyz'
506            del(argv[i:i + 1])
507
508        elif (argv[i].lower() == '-xyz-id'):
509            misc_settings.output_format = 'xyz-id'
510            del(argv[i:i + 1])
511
512        elif (argv[i].lower() == '-xyz-mol'):
513            misc_settings.output_format = 'xyz-mol'
514            del(argv[i:i + 1])
515
516        elif (argv[i].lower() == '-xyz-type-mol'):
517            misc_settings.output_format = 'xyz-type-mol'
518            del(argv[i:i + 1])
519
520        elif (argv[i].lower() == '-xyzin'):
521            misc_settings.input_format = 'xyz'
522            misc_settings.multi = False
523            del(argv[i:i + 1])
524
525        elif (argv[i].lower() == '-multi'):
526            misc_settings.multi = True
527            del(argv[i:i + 1])
528
529        elif (argv[i].lower() == '-last'):
530            misc_settings.last_frame = True
531            misc_settings.multi = False
532            del(argv[i:i + 1])
533
534        elif (argv[i].lower() == '-interval'):
535            misc_settings.skip_interval = int(argv[i + 1])
536            del(argv[i:i + 2])
537
538        elif (argv[i].lower() == '-scale'):
539            misc_settings.scale = float(argv[i + 1])
540            del(argv[i:i + 2])
541
542        elif (argv[i].lower() == '-id'):
543            misc_settings.atom_id_intervals += Str2IntervalUnion(argv[i+1])
544            del(argv[i:i + 2])
545
546        elif (argv[i].lower() == '-type'):
547            misc_settings.atom_type_intervals += Str2IntervalUnion(argv[i+1])
548            del(argv[i:i + 2])
549
550        elif (argv[i].lower() == '-mol'):
551            misc_settings.mol_id_intervals += Str2IntervalUnion(argv[i+1])
552            del(argv[i:i + 2])
553
554        elif ((argv[i].lower() == '-in') or
555              (argv[i].lower() == '-dump')):
556            misc_settings.in_coord_file_name = argv[i+1]
557            del(argv[i:i + 2])
558
559        elif ((argv[i][0] == '-') and (__name__ == "__main__")):
560            raise InputError(
561                'Error(dump2data): Unrecogized command line argument \"' + argv[i] + '\"\n')
562        else:
563            i += 1
564
565    usage_examples = \
566"""    Typical usage:
567dump2data.py orig_file.data < dump.lammpstrj > new_file.data
568      (This extracts last frame, uses "full" atom_style.)
569    Additional options:
570dump2data.py -t t -atomstyle style orig.data < dump.lammpstrj > new.data
571"""
572
573    # if __name__ == "__main__":
574
575    if (len(argv) > 2):
576        # if there are more than 2 remaining arguments,
577        #   AND
578        # no other function will process the remaining argument list
579        # (ie. if __name__ == "__main__")
580        #   THEN
581        raise InputError('    ----\n'
582                         'ERROR(dump2data): You have too many arguments (or unrecognized arguments):\n'
583                         '       \"' + (' '.join(argv)) + '\"\n'
584                         '    ----\n'
585                         + usage_examples)
586    elif (len(argv) < 2):
587        if misc_settings.output_format == 'data':
588            raise InputError('    ----\n'
589                             'ERROR(dump2data): Problem with argument list:\n'
590                             '       Expected a LAMMPS .data file as an argument.\n'
591                             '    ----\n'
592                             + usage_examples)
593    else:
594        in_data_file = open(argv[1], 'r')
595        data_settings.file_name = argv[1]
596        data_settings.contents = in_data_file.readlines()
597        in_data_file.close()
598
599    # end of if-then statement for "if __name__ == "__main__""
600
601    if len(data_settings.i_coords) == 0:
602        if warning_strings != None:
603            warning_strings.append(
604                'WARNING(dump2data): atom_style unknown. (Use -atomstyle style. Assuming \"full\")')
605        warn_atom_style_unspecified = True
606        # The default atom_style is "full"
607        data_settings.column_names = AtomStyle2ColNames('full')
608        ii_coords = ColNames2Coords(data_settings.column_names)
609        # This program assumes that there is only one coordinate triplet
610        # (x,y,z) for each atom.  Hence we assume that len(ii_coords)==1
611        assert(len(ii_coords) == 1)
612        data_settings.i_coords = ii_coords[0]
613        data_settings.ii_vects = ColNames2Vects(data_settings.column_names)
614        data_settings.i_atomid, data_settings.i_atomtype, data_settings.i_molid = ColNames2AidAtypeMolid(
615            data_settings.column_names)
616
617        # sys.stderr.write('########################################################\n'
618        # '##            WARNING: atom_style unspecified         ##\n'
619        # '##    --> \"Atoms\" column data has an unknown format.  ##\n'
620        # '##              Assuming atom_style = \"full\"          ##\n'
621        # '########################################################\n'
622        # '## To specify the \"Atoms\" column format you can:      ##\n'
623        # '##   1) Use the -atom_style \"STYLE\"  argument         ##\n'
624        # '##      where \"STYLE\" is a string indicating a LAMMPS ##\n'
625        # '##      atom_style, including hybrid styles.(Standard ##\n'
626        # '##      atom styles defined in 2011 are supported.)   ##\n'
627        # '##   2) Use the -atom_style \"COL_LIST\"    argument    ##\n'
628        # '##      where \"COL_LIST" is a quoted list of strings  ##\n'
629        # '##      indicating the name of each column.           ##\n'
630        # '##      Names \"x\",\"y\",\"z\" are interpreted as          ##\n'
631        # '##      atomic coordinates. \"mux\",\"muy\",\"muz\"         ##\n'
632        # '##      and \"quati\",\"quatj\",\"quatk\" are               ##\n'
633        # '##      interpreted as direction vectors.             ##\n'
634        # '##   3) Use the -icoord \"cx cy cz...\" argument        ##\n'
635        # '##      where \"cx cy cz\" is a list of integers        ##\n'
636        # '##      indicating the column numbers for the x,y,z   ##\n'
637        # '##      coordinates of each atom.                     ##\n'
638        # '##   4) Use the -ivect \"cmux cmuy cmuz...\" argument   ##\n'
639        # '##      where \"cmux cmuy cmuz...\" is a list of        ##\n'
640        # '##      integers indicating the column numbers for    ##\n'
641        # '##      the vector that determines the direction of a ##\n'
642        # '##      dipole or ellipsoid (ie. a rotateable vector).##\n'
643        # '##      (More than one triplet can be specified. The  ##\n'
644        # '##       number of entries must be divisible by 3.)   ##\n'
645        # '##   5) Include a                                     ##\n'
646        # '##      write(\"in_init.txt\"){atom_style ...}          ##\n'
647        # '##      statement in your .ttree file.                ##\n'
648        # '########################################################\n')
649
650
651def GetIntAtomID(pair):
652    return int(pair[0])
653
654
655def WriteFrameToData(out_file,
656                     descr_str,
657                     misc_settings,
658                     data_settings,
659                     dump_column_names,
660                     natoms,
661                     coords,
662                     coords_ixiyiz,
663                     vects,
664                     velocities,
665                     atomtypes,
666                     molids,
667                     xlo_str, xhi_str,
668                     ylo_str, yhi_str,
669                     zlo_str, zhi_str,
670                     xy_str, xz_str, yz_str):
671    """
672    Open a data file.  Read the LAMMPS DATA file line by line.
673    When the line contains information which is also in the dump file,
674    replace that information with information from the dump file.
675    (Information from a dump file is stored in the arguments to this function.)
676    The resulting file also has LAMMPS DATA format.
677
678    """
679
680    section = ''
681    firstline = True
682    for line_orig in data_settings.contents:
683        ic = line_orig.find('#')
684        if ic != -1:
685            line = line_orig[:ic]
686        else:
687            line = line_orig
688        line = line.strip()
689
690        if firstline:  # Construct a new descriptive header line:
691            if descr_str != None:
692                line = descr_str
693            firstline = False
694
695        if (len(line) > 0):
696            # The initial section (section='') is assumed to be
697            # the "LAMMPS Description" section.  This is where the
698            # box boundaries are specified.
699            if section == '':
700                tokens = line.split()
701                if ((len(tokens) >= 2) and
702                        ((tokens[-2] == 'xlo') and (tokens[-1] == 'xhi')) and
703                        ((xlo_str != None) and (xhi_str != None))):
704                    tokens[0] = xlo_str
705                    tokens[1] = xhi_str
706                    line = ' '.join(tokens)
707                elif ((len(tokens) >= 2) and
708                      ((tokens[-2] == 'ylo') and (tokens[-1] == 'yhi')) and
709                      ((ylo_str != None) and (yhi_str != None))):
710                    tokens[0] = ylo_str
711                    tokens[1] = yhi_str
712                    line = ' '.join(tokens)
713                elif ((len(tokens) >= 2) and
714                      ((tokens[-2] == 'zlo') and (tokens[-1] == 'zhi')) and
715                      ((zlo_str != None) and (zhi_str != None))):
716                    tokens[0] = zlo_str
717                    tokens[1] = zhi_str
718                    line = ' '.join(tokens)
719                elif ((len(tokens) >= 3) and
720                      ((tokens[-3] == 'xy') and
721                       (tokens[-2] == 'xz') and
722                       (tokens[-1] == 'yz')) and
723                      ((xy_str != None) and
724                       (xz_str != None) and
725                       (yz_str != None))):
726                    tokens[0] = xy_str
727                    tokens[1] = xz_str
728                    tokens[2] = yz_str
729                    line = ' '.join(tokens)
730            if (line in set(['Masses', 'Velocities', 'Atoms', 'Ellipsoids',
731                             'Bond Coeffs', 'Angle Coeffs',
732                             'Dihedral Coeffs', 'Improper Coeffs',
733                             'Bonds', 'Angles', 'Dihedrals', 'Impropers'])):
734                section = line
735            else:
736                if (section == 'Atoms'):
737                    tokens = line.split()
738                    atomid = tokens[0]
739
740                    # update the atomtype and molID
741                    # (which may change during the simulation)
742                    if atomtypes:
743                        tokens[data_settings.i_atomtype] = atomtypes[atomid]
744                    if molids and data_settings.i_molid:
745                        tokens[data_settings.i_molid] = molids[atomid]
746
747                    if atomid in coords:
748                        # Loop over all of the vector degrees of
749                        # freedom of the particle, excluding coords
750                        # (for example: mu_x, mu_y, mu_z,
751                        #            or quat_i, quat_j, quat_k)
752                        # In principle, depending on the atom_style,
753                        # there could be multiple vectors per atom.
754                        for I in range(0, len(data_settings.ii_vects)):
755                            i_vx = data_settings.ii_vects[I][0]
756                            i_vy = data_settings.ii_vects[I][1]
757                            i_vz = data_settings.ii_vects[I][2]
758                            if atomid in vects:
759                                vxvyvz = vects[atomid][I]
760                                assert((type(vxvyvz) is tuple) and
761                                       (len(vxvyvz) == 3))
762                                if ((i_vx >= len(tokens)) or
763                                    (i_vy >= len(tokens)) or
764                                    (i_vz >= len(tokens))):
765                                    raise InputError('Error(dump2data): Atom style incompatible with data file.\n'
766                                                     '       Specify the atom_style using -atomstyle style.\n')
767
768                                # Replace the vector components with numbers
769                                # from the dump file
770                                tokens[i_vx] = vxvyvz[0]
771                                tokens[i_vy] = vxvyvz[1]
772                                tokens[i_vz] = vxvyvz[2]
773
774                            else:
775                                if (dump_column_names and
776                                    (data_settings.column_names[
777                                        i_vx] not in dump_column_names)):
778                                    raise InputError('Error(dump2data): You have a vector coordinate in your DATA file named \"' + data_settings.column_names[i_vx] + '\"\n'
779                                                     '       However there are no columns with this name in your DUMP file\n'
780                                                     '       (or the column was not in the expected place).\n'
781                                                     '       Hence, the atom styles in the dump and data files do not match.')
782
783                        # Now loop over the coordinates of each atom.
784                        # for I in range(0,len(data_settings.ii_coords)):
785                        #    xyz = coords[atomid][I]
786                        #            THIS LOOP IS SILLY.
787                        #            EACH ATOM ONLY HAS ONE SET OF X,Y,Z
788                        #            COORDINATES. COMMENTING OUT THIS LOOP:
789                        #    i_x = data_settings.ii_coords[I][0]
790                        #    i_y = data_settings.ii_coords[I][1]
791                        #    i_z = data_settings.ii_coords[I][2]
792                        # USING THIS INSTEAD:
793
794                        xyz = coords[atomid]
795                        i_x = data_settings.i_coords[0]
796                        i_y = data_settings.i_coords[1]
797                        i_z = data_settings.i_coords[2]
798                        if ((i_x >= len(tokens)) or
799                            (i_y >= len(tokens)) or
800                            (i_z >= len(tokens))):
801                            raise InputError('Error(dump2data): Atom style incompatible with data file.\n'
802                                             '       Specify the atom_style using -atomstyle style.\n')
803                        # Replace the coordinates with coordinates from
804                        # the dump file into tokens[i_x]...
805                        tokens[i_x] = str(xyz[0])
806                        tokens[i_y] = str(xyz[1])
807                        tokens[i_z] = str(xyz[2])
808
809                        # Are there there any integer coords
810                        # (ix, iy, iz) in the dump file?
811                        if coords_ixiyiz[atomid]:
812                            assert(len(coords_ixiyiz[atomid]) == 3)
813                            # Integer coords stored in the DATA file too?
814                            if len(tokens) == (len(data_settings.column_names) + 3):
815                                # Then replace the last 3 columns of the
816                                # line in the data file with: ix iy iz
817                                tokens[-3] = coords_ixiyiz[atomid][0]
818                                tokens[-2] = coords_ixiyiz[atomid][1]
819                                tokens[-1] = coords_ixiyiz[atomid][2]
820                            else:
821                                if (not misc_settings.center_frame):
822                                    # Append them to the end of the line:
823                                    tokens.append(coords_ixiyiz[atomid][0])
824                                    tokens.append(coords_ixiyiz[atomid][1])
825                                    tokens.append(coords_ixiyiz[atomid][2])
826
827                        # Now finally paste all the tokens together:
828                        line = ' '.join(tokens)
829
830                elif (section == 'Velocities'):
831                    tokens = line.split()
832                    atomid = tokens[0]
833                    if atomid in velocities:
834
835                        vxvyvz = velocities[atomid]
836                        if len(tokens) < 4:
837                            raise InputError(
838                                'Error(dump2data): Not enough columns in the \"Velocities\" file.\n')
839                        # Replace the coordinates with coordinates from
840                        # the dump file into tokens[i_x]...
841                        tokens[1] = str(vxvyvz[0])
842                        tokens[2] = str(vxvyvz[1])
843                        tokens[3] = str(vxvyvz[2])
844
845                        # Now finally paste all the tokens together:
846                        line = ' '.join(tokens)
847
848                elif section == '':
849                    # If section == '', then we are in the Header section
850                    # of the DATA file.  In that case, the modifications that
851                    # were made to that line were handled earlier, and there
852                    # is nothing left to do here.
853                    pass
854
855                else: # If we are not in one of the sections containing
856                    #   data that comes from the dump file (eg 'Atoms',
857                    #   'Velocities'), then copy the text directly from
858                    #   the original data file without modification.
859                    #   Since "line" has any comments and whitespace removed,
860                    #   replace it with "line_orig" which has everything.
861                    line = line_orig.rstrip('\n')
862
863        out_file.write(line + '\n')
864
865    return
866
867
868
869
870def InIntervalUnion(i, intervals):
871    """
872    Return whether integer i lies within in at least one of the intervals.
873    (Note: If 0 appears in the upper bound of an interval, it means +infinity.)
874    """
875    if len(intervals) == 0:
876        return True
877    accept = False
878    for interval in intervals:
879        assert(len(interval) == 2)
880        if (interval[0] <= i) and ((i <= interval[1]) or (interval[1] == 0)):
881            accept = True
882    return accept
883
884
885
886def main():
887    sys.stderr.write(g_program_name + ' v' +
888                     g_version_str + ' ' + g_date_str + ' ')
889    # if sys.version < '3':
890    #    sys.stderr.write(' (python version < 3)\n')
891    # else:
892    sys.stderr.write('\n')
893
894    try:
895        data_settings = DataSettings()
896        misc_settings = MiscSettings()
897        warning_strings = []
898        ParseArgs(sys.argv,
899                  misc_settings,
900                  data_settings,
901                  warning_strings)
902
903        # Open the lammps dump file (trajectory file)
904        # Skip to the line containing the correct frame/timestep.
905        # (this is the last frame by default).
906        # Read the "BOX BOUNDS" and the "ATOMS" sections.
907        # Store the x,y,z coordinates in the "coords" associative array
908        # (indexed by atom id, which could be non-numeric in general).
909
910        section = ''
911
912        #coords = defaultdict(list)
913        #coords_ixiyiz = defaultdict(list)
914        #vects = defaultdict(list)
915        #xlo_str = xhi_str = ylo_str = yhi_str = zlo_str = zhi_str = None
916        #xy_str = xz_str = yz_str = None
917        #natoms = -1
918        #timestep_str = ''
919
920        frame_coords = defaultdict(list)
921        frame_coords_ixiyiz = defaultdict(list)
922        frame_vects = defaultdict(list)
923        frame_velocities = defaultdict(list)
924        frame_atomtypes = defaultdict(list)
925        frame_molids = defaultdict(list)
926        frame_xlo_str = frame_xhi_str = None
927        frame_ylo_str = frame_yhi_str = None
928        frame_zlo_str = frame_zhi_str = None
929        frame_xy_str = frame_xz_str = frame_yz_str = None
930        frame_natoms = -1
931        frame_timestep_str = ''
932        i_atomid = i_atomtype = i_molid = -1
933        i_x = i_y = i_z = i_xu = i_yu = i_zu = -1
934        i_xs = i_ys = i_zs = i_xsu = i_ysu = i_zsu = -1
935
936        dump_column_names = []
937
938        #num_frames_in = -1
939        num_frames_out = 0
940        finished_reading_frame = False
941        read_last_frame = False
942
943        if misc_settings.in_coord_file_name != '':
944            in_coord_file = open(misc_settings.in_coord_file_name)
945        else:
946            in_coord_file = sys.stdin
947
948        while True:
949
950            line = in_coord_file.readline()
951            if line == '':  # if EOF
952                if len(frame_coords) > 0:
953                    finished_reading_frame = True
954                read_last_frame = True
955
956            line = line.strip()
957            if (line.find('ITEM:') == 0):
958                section = line
959                if (section.find('ITEM: ATOMS ') == 0):
960                    dump_column_names = line[12:].split()
961                    i_atomid, i_atomtype, i_molid = \
962                        ColNames2AidAtypeMolid(dump_column_names)
963                    #ii_coords = ColNames2Coords(dump_column_names)
964
965                    x_already_unwrapped = False
966                    y_already_unwrapped = False
967                    z_already_unwrapped = False
968
969                    if 'x' in dump_column_names:
970                        i_x = dump_column_names.index('x')
971                    elif 'xu' in dump_column_names:
972                        i_xu = dump_column_names.index('xu')
973                        x_already_unwrapped = True
974                    elif 'xs' in dump_column_names:
975                        i_xs = dump_column_names.index('xs')
976                    elif 'xsu' in dump_column_names:
977                        i_xsu = dump_column_names.index('xsu')
978                        x_already_unwrapped = True
979                    else:
980                        raise InputError('Error(dump2data): \"ATOMS\" section of dump file lacks a \"x\" column.\n' +
981                                         '       (excerpt below)\n' + line)
982
983                    if 'y' in dump_column_names:
984                        i_y = dump_column_names.index('y')
985                    elif 'yu' in dump_column_names:
986                        i_yu = dump_column_names.index('yu')
987                        y_already_unwrapped = True
988                    elif 'ys' in dump_column_names:
989                        i_ys = dump_column_names.index('ys')
990                    elif 'ysu' in dump_column_names:
991                        i_ysu = dump_column_names.index('ysu')
992                        y_already_unwrapped = True
993                    else:
994                        raise InputError('Error(dump2data): \"ATOMS\" section of dump file lacks a \"y\" column.\n' +
995                                         '       (excerpt below)\n' + line)
996
997                    if 'z' in dump_column_names:
998                        i_z = dump_column_names.index('z')
999                    elif 'zu' in dump_column_names:
1000                        i_zu = dump_column_names.index('zu')
1001                        z_already_unwrapped = True
1002                    elif 'zs' in dump_column_names:
1003                        i_zs = dump_column_names.index('zs')
1004                    elif 'zsu' in dump_column_names:
1005                        i_zsu = dump_column_names.index('zsu')
1006                        z_already_unwrapped = True
1007                    else:
1008                        raise InputError('Error(dump2data): \"ATOMS\" section of dump file lacks a \"z\" column.\n' +
1009                                         '       (excerpt below)\n' + line)
1010
1011                    ii_vects = ColNames2Vects(dump_column_names)
1012                    if (len(ii_vects) != len(data_settings.ii_vects)):
1013                        raise InputError('Error(dump2data): atom styles in data and dump files differ.\n'
1014                                         '      Some needed columns from the atom_styles are missing in the dump file.')
1015
1016                    i_ix = i_iy = i_iz = -1
1017                    if 'ix' in dump_column_names:
1018                        i_ix = dump_column_names.index('ix')
1019                    if 'iy' in dump_column_names:
1020                        i_iy = dump_column_names.index('iy')
1021                    if 'iz' in dump_column_names:
1022                        i_iz = dump_column_names.index('iz')
1023
1024                    i_vx = i_vy = i_vz = -1
1025                    if 'vx' in dump_column_names:
1026                        i_vx = dump_column_names.index('vx')
1027                    if 'vy' in dump_column_names:
1028                        i_vy = dump_column_names.index('vy')
1029                    if 'vz' in dump_column_names:
1030                        i_vz = dump_column_names.index('vz')
1031
1032                elif (section.find('ITEM: BOX BOUNDS') == 0):
1033                    avec = [1.0, 0.0, 0.0]
1034                    bvec = [0.0, 1.0, 0.0]
1035                    cvec = [0.0, 0.0, 1.0]
1036
1037                elif (section.find('ITEM: TIMESTEP') == 0):
1038                    if len(frame_coords) > 0:
1039                        finished_reading_frame = True
1040
1041            elif ((len(line) > 0) and (line[0] != '#')):
1042                if (section.find('ITEM: TIMESTEP') == 0):
1043                    finished_reading_frame = False
1044                    frame_timestep_str = line
1045                    frame_coords = defaultdict(list)
1046                    frame_coords_ixiyiz = defaultdict(list)
1047                    frame_vects = defaultdict(list)
1048                    frame_velocities = defaultdict(list)
1049                    frame_atomtypes = defaultdict(list)
1050                    frame_molids = defaultdict(list)
1051                    frame_xlo_str = frame_xhi_str = None
1052                    frame_ylo_str = frame_yhi_str = None
1053                    frame_zlo_str = frame_zhi_str = None
1054                    frame_xy_str = frame_xz_str = frame_yz_str = None
1055
1056                elif (section == 'ITEM: NUMBER OF ATOMS'):
1057                    frame_natoms = int(line)
1058
1059                elif (section.find('ITEM: BOX BOUNDS') == 0):
1060                    is_triclinic = (section.find('xy xz yz') == 0)
1061
1062                    tokens = line.split()
1063                    if not frame_xlo_str:
1064                        assert(not frame_xhi_str)
1065                        frame_xlo_str = tokens[0]
1066                        frame_xhi_str = tokens[1]
1067                        avec = [float(frame_xhi_str) - float(frame_xlo_str),
1068                                0.0,
1069                                0.0]
1070                        if (is_triclinic and (len(tokens) > 2)):
1071                            frame_xy_str = tokens[2]
1072                            # We will use this to recompute avec,bvec later. See
1073                            # https://lammps.sandia.gov/doc/Howto_triclinic.html
1074
1075                    elif not frame_ylo_str:
1076                        assert(not frame_yhi_str)
1077                        frame_ylo_str = tokens[0]
1078                        frame_yhi_str = tokens[1]
1079                        bvec = [0.0,
1080                                float(frame_yhi_str) - float(frame_ylo_str),
1081                                0.0]
1082                        if (is_triclinic and (len(tokens) > 2)):
1083                            frame_xz_str = tokens[2]
1084                            # We will use this to recompute bvec later.  See:
1085                            # https://lammps.sandia.gov/doc/Howto_triclinic.html
1086
1087                    elif not frame_zlo_str:
1088                        assert(not frame_zhi_str)
1089                        frame_zlo_str = tokens[0]
1090                        frame_zhi_str = tokens[1]
1091                        cvec = [0.0,
1092                                0.0,
1093                                float(frame_zhi_str) - float(frame_zlo_str)]
1094
1095                        if is_triclinic:
1096                            # https://lammps.sandia.gov/doc/Howto_triclinic.html
1097                            frame_yz_str = "0.0"
1098                            if len(tokens) > 2:
1099                                frame_yz_str = tokens[2]
1100                            # If we are using triclinic boundary conditions,
1101                            # then we need to update the way we compute the
1102                            # lattice vectors: avec, bvec, cvec.
1103                            # This was not possible to do until we had read:
1104                            #   frame_xy_str, frame_xz_str, and frame_yz_str.
1105                            #
1106                            # Now that we have read all 3 of them, recompute
1107                            # the avec, bvec, cvec vectors according to:
1108                            # https://lammps.sandia.gov/doc/Howto_triclinic.html
1109                            #
1110                            # When triclinic boundary conditions are used,
1111                            # the "BOX BOUNDS" section of the dump file stores
1112                            # xlo_bound, xhi_bound, ylo_bound, and yhi_bound
1113                            # instead of xlo, xhi, ylo, yhi.  So we need to use
1114                            # the following formula to calculate xlo,xhi,ylo,yhi
1115                            xlo_bound = float(frame_xlo_str)
1116                            xhi_bound = float(frame_xhi_str)
1117                            ylo_bound = float(frame_ylo_str)
1118                            yhi_bound = float(frame_yhi_str)
1119                            zlo_bound = float(frame_zlo_str)
1120                            zhi_bound = float(frame_zhi_str)
1121                            xy = float(frame_xy_str)
1122                            xz = float(frame_xz_str)
1123                            yz = float(frame_yz_str)
1124                            xlo = xlo_bound - min(0.0, xy, xz, xy+xz)
1125                            xhi = xhi_bound - max(0.0, xy, xz, xy+xz)
1126                            ylo = ylo_bound - min(0.0, yz)
1127                            yhi = yhi_bound - max(0.0, yz)
1128                            zlo = zlo_bound
1129                            zhi = zhi_bound
1130                            # now update "frame_xlo_str" and the other strings:
1131                            frame_xlo_str = str(xlo)
1132                            frame_xhi_str = str(xhi)
1133                            frame_ylo_str = str(ylo)
1134                            frame_yhi_str = str(yhi)
1135                            frame_zlo_str = str(zlo)
1136                            frame_zhi_str = str(zhi)
1137                            # and compute the avec,bvec,cvec unit cell vectors
1138                            avec = [xhi-xlo, 0.0, 0.0]
1139                            bvec = [xy, yhi-ylo, 0.0]
1140                            cvec = [xz, yz, zhi-zlo]
1141
1142                    # sys.stderr.write('avec='+str(avec)+'\n')
1143                    # sys.stderr.write('bvec='+str(bvec)+'\n')
1144                    # sys.stderr.write('cvec='+str(cvec)+'\n')
1145
1146                elif (section.find('ITEM: ATOMS') == 0):
1147                    tokens = line.split()
1148                    atomid = tokens[i_atomid]
1149                    atomtype = tokens[i_atomtype]
1150                    frame_atomtypes[atomid] = atomtype
1151                    if i_molid:
1152                        molid = tokens[i_molid]
1153                        frame_molids[atomid] = molid
1154
1155                    if ((i_x != -1) and (i_y != -1) and (i_z != -1)):
1156                        x = float(tokens[i_x])  # i_x determined above
1157                        y = float(tokens[i_y])
1158                        z = float(tokens[i_z])
1159
1160                    elif ((i_xu != -1) and (i_yu != -1) and (i_zu != -1)):
1161                        x = float(tokens[i_xu])  # i_x determined above
1162                        y = float(tokens[i_yu])
1163                        z = float(tokens[i_zu])
1164
1165                    elif ((i_xs != -1) and (i_ys != -1) and (i_zs != -1)):
1166                        xs = float(tokens[i_xs])  # i_xs determined above
1167                        ys = float(tokens[i_ys])
1168                        zs = float(tokens[i_zs])
1169
1170                        x = float(frame_xlo_str) + xs * \
1171                            avec[0] + ys * bvec[0] + zs * cvec[0]
1172                        y = float(frame_ylo_str) + xs * \
1173                            avec[1] + ys * bvec[1] + zs * cvec[1]
1174                        z = float(frame_zlo_str) + xs * \
1175                            avec[2] + ys * bvec[2] + zs * cvec[2]
1176
1177                    # avec, bvec, cvec described here:
1178                    # https://lammps.sandia.gov/doc/Howto_triclinic.html
1179
1180                    elif ((i_xsu != -1) and (i_ysu != -1) and (i_zsu != -1)):
1181                        xsu = float(tokens[i_xsu])  # i_xs determined above
1182                        ysu = float(tokens[i_ysu])
1183                        zsu = float(tokens[i_zsu])
1184
1185                        x = float(frame_xlo_str) + xsu * \
1186                            avec[0] + ysu * bvec[0] + zsu * cvec[0]
1187                        y = float(frame_ylo_str) + xsu * \
1188                            avec[1] + ysu * bvec[1] + zsu * cvec[1]
1189                        z = float(frame_zlo_str) + xsu * \
1190                            avec[2] + ysu * bvec[2] + zsu * cvec[2]
1191
1192                    # Now deal with ix, iy, iz
1193                    if (i_ix != -1) and (not x_already_unwrapped):
1194                        ix = int(tokens[i_ix])
1195                        if (misc_settings.center_frame or
1196                            (misc_settings.output_format != 'data')):
1197                            #sys.stderr.write('atomid='+str(atomid)+', ix = '+str(ix)+', avec='+str(avec)+'\n')
1198                            x += ix * avec[0]
1199                            y += ix * avec[1]
1200                            z += ix * avec[2]
1201                        else:
1202                            if atomid not in frame_coords_ixiyiz:
1203                                frame_coords_ixiyiz[atomid] = ["0", "0", "0"]
1204                            frame_coords_ixiyiz[atomid][0] = str(ix)
1205
1206                    if (i_iy != -1) and (not y_already_unwrapped):
1207                        iy = int(tokens[i_iy])
1208                        if (misc_settings.center_frame or
1209                            (misc_settings.output_format != 'data')):
1210                            #sys.stderr.write('atomid='+str(atomid)+', iy = '+str(iy)+', bvec='+str(bvec)+'\n')
1211                            x += iy * bvec[0]
1212                            y += iy * bvec[1]
1213                            z += iy * bvec[2]
1214                        else:
1215                            if atomid not in frame_coords_ixiyiz:
1216                                frame_coords_ixiyiz[atomid] = ["0", "0", "0"]
1217                            frame_coords_ixiyiz[atomid][1] = str(iy)
1218
1219                    if (i_iz != -1) and (not z_already_unwrapped):
1220                        iz = int(tokens[i_iz])
1221                        if (misc_settings.center_frame or
1222                            (misc_settings.output_format != 'data')):
1223                            #sys.stderr.write('atomid='+str(atomid)+', iz = '+str(iz)+', cvec='+str(cvec)+'\n')
1224                            x += iz * cvec[0]
1225                            y += iz * cvec[1]
1226                            z += iz * cvec[2]
1227                        else:
1228                            if atomid not in frame_coords_ixiyiz:
1229                                frame_coords_ixiyiz[atomid] = ["0", "0", "0"]
1230                            frame_coords_ixiyiz[atomid][2] = str(iz)
1231
1232                    #frame_coords[atomid] = [str(x), str(y), str(z)]
1233                    frame_coords[atomid] = [x, y, z]
1234
1235                    vx = 0.0
1236                    vy = 0.0
1237                    vz = 0.0
1238                    if i_vx != -1:
1239                        vx = float(tokens[i_vx])
1240                    if i_vy != -1:
1241                        vy = float(tokens[i_vy])
1242                    if i_vz != -1:
1243                        vz = float(tokens[i_vz])
1244
1245                    frame_velocities[atomid] = [vx, vy, vz]
1246
1247                    # NOTE:
1248                    # There can be multiple "vects" associated with each atom
1249                    # (for example, dipole moments, ellipsoid directions, etc..)
1250
1251                    if atomid not in frame_vects:
1252                        frame_vects[atomid] = [
1253                            None for I in range(0, len(ii_vects))]
1254
1255                    for I in range(0, len(ii_vects)):
1256                        i_vx = ii_vects[I][0]
1257                        i_vy = ii_vects[I][1]
1258                        i_vz = ii_vects[I][2]
1259                        vx_str = tokens[i_vx]
1260                        vy_str = tokens[i_vy]
1261                        vz_str = tokens[i_vz]
1262
1263                        # Now the annoying part:
1264                        # Which vect is it (mux,muy,muz) or (quati,quatj,quatk)?
1265                        # The columns could be listed in a different order
1266                        # in the data file and in the dump file.
1267                        # Figure out which vector it is in the data file (stored
1268                        # in the integer "I_data") so that column names match.
1269                        name_vx = dump_column_names[i_vx]
1270                        name_vy = dump_column_names[i_vy]
1271                        name_vz = dump_column_names[i_vz]
1272                        i_vx_data = 0
1273                        I_data = -1
1274                        # This code is dreadful and inneficient.
1275                        # I never want to touch this code again. (Hope it
1276                        # works)
1277                        while i_vx_data < len(data_settings.column_names):
1278                            if name_vx == data_settings.column_names[i_vx_data]:
1279                                I_data = 0
1280                                while I_data < len(data_settings.ii_vects):
1281                                    if (dump_column_names[ii_vects[I][0]] ==
1282                                        data_settings.column_names[data_settings.ii_vects[I_data][0]]):
1283                                        # (This checks the first component, ([0]) I should also
1284                                        # check [1] and [2], but the code is slow enough already.
1285                                        # THIS IS TERRIBLE CODE.
1286                                        break
1287                                    I_data += 1
1288
1289                            if (0 <= I_data) and (I_data < len(data_settings.ii_vects)):
1290                                break
1291
1292                            i_vx_data += 1
1293
1294                        if (0 <= I_data) and (I_data < len(data_settings.ii_vects)):
1295                            frame_vects[atomid][I_data] = (vx_str,vy_str,vz_str)
1296                        else:
1297                            raise InputError('Error(dump2data): You have a vector coordinate in your dump file named \"' + name_vx + '\"\n'
1298                                             '       However there are no columns with this name in your data file\n'
1299                                             '       (or the column was not in the expected place).\n'
1300                                             '       Hence, the atom styles in the dump and data files do not match.')
1301
1302            if finished_reading_frame:
1303
1304                if misc_settings.scale != None:
1305                    for atomid in frame_coords:
1306                        for d in range(0, 3):
1307                            crd = float(frame_coords[atomid][d])
1308                            frame_coords[atomid][d]=str(crd*misc_settings.scale)
1309
1310                if len(frame_coords) != frame_natoms:
1311                    err_msg = 'Number of lines in \"ITEM: ATOMS\" section disagrees with\n' \
1312                        + '           \"ITEM: NUMBER OF ATOMS\" declared earlier in this file.\n'
1313                    raise InputError(err_msg)
1314
1315                if misc_settings.center_frame:
1316                    cm = [0.0, 0.0, 0.0]
1317                    for atomid in frame_coords:
1318                        for d in range(0, 3):
1319                            cm[d] += float(frame_coords[atomid][d])
1320                    for d in range(0, 3):
1321                        cm[d] /= float(len(frame_coords))
1322                    for atomid in frame_coords:
1323                        for d in range(0, 3):
1324                            frame_coords[atomid][d] = "%.7g" % (
1325                                float(frame_coords[atomid][d]) - cm[d])
1326                        frame_coords_ixiyiz[atomid] = ["0", "0", "0"]
1327
1328                    if misc_settings.output_format != 'data':
1329                        frame_coords_ixiyiz[atomid] = ["0", "0", "0"]
1330
1331                # if (num_frames_in == -1):
1332                #    if (misc_settings.timestep_str != ''):
1333                #        if (float(frame_timestep_str) >=
1334                #            float(misc_settings.timestep_str)):
1335                #            num_frames_in = 1
1336                #        if not misc_settings.multi:
1337                #            read_last_frame = True
1338                #    else:
1339                #        num_frames_in = 1
1340
1341                # Should we write out the coordinates in this frame?
1342                write_this_frame = False
1343
1344                if misc_settings.multi:
1345
1346                    write_this_frame = True
1347                    if (misc_settings.tstart and
1348                        (int(frame_timestep_str) < misc_settings.tstart)):
1349                        write_this_frame = False
1350                    if (misc_settings.tstop and
1351                        (int(frame_timestep_str) > misc_settings.tstop)):
1352                        write_this_frame = False
1353                        read_last_frame = True
1354
1355                    if misc_settings.tstart:
1356                        tstart = misc_settings.tstart
1357                    else:
1358                        tstart = 0
1359
1360                    if ((int(frame_timestep_str) - tstart)
1361                            %
1362                            misc_settings.skip_interval) != 0:
1363                        write_this_frame = False
1364
1365                else:
1366                    if misc_settings.last_frame:
1367                        if read_last_frame:
1368                            write_this_frame = True
1369                    else:
1370                        assert(misc_settings.timestep_str)
1371                        if (int(frame_timestep_str) >=
1372                            int(misc_settings.timestep_str)):
1373                            write_this_frame = True
1374                            read_last_frame = True
1375
1376                if write_this_frame:
1377
1378                    num_frames_out += 1
1379
1380                    sys.stderr.write('  (writing frame ' + str(num_frames_out) +
1381                                     ' at timestep ' + frame_timestep_str + ')\n')
1382
1383                    # Print the frame
1384                    # First check which format to output the data:
1385                    if misc_settings.output_format == 'raw':
1386                        # Print out the coordinates in simple 3-column text
1387                        # format
1388                        for atomid, xyz in iter(sorted(frame_coords.items(), key=GetIntAtomID)):
1389                            # Check and see if whether the atom is one of the
1390                            # atoms that was selected by the user.  If not discard.
1391                            # (I don't offer this feature for 'data' files because
1392                            #  it is harder to implement for this file type.)
1393                            if not InIntervalUnion(int(atomid),
1394                                    misc_settings.atom_id_intervals):
1395                                continue
1396                            atype = frame_atomtypes[atomid]
1397                            if not InIntervalUnion(int(atype),
1398                                    misc_settings.atom_type_intervals):
1399                                continue
1400                            molid = frame_molids[atomid]
1401                            if not InIntervalUnion(int(molid),
1402                                    misc_settings.mol_id_intervals):
1403                                continue
1404
1405                            # Write the coordinates
1406                            if misc_settings.scale == None:
1407                                sys.stdout.write(
1408                                    str(xyz[0]) + ' ' + str(xyz[1]) + ' ' + str(xyz[2]) + '\n')
1409                            else:
1410                                # Only convert to float and back if
1411                                # misc_settings.scale != None
1412                                sys.stdout.write(str(misc_settings.scale * float(xyz[0])) + ' ' +
1413                                                 str(misc_settings.scale * float(xyz[1])) + ' ' +
1414                                                 str(misc_settings.scale * float(xyz[2])) + '\n')
1415                        sys.stdout.write('\n')
1416
1417                    elif ((misc_settings.output_format == 'xyz') or
1418                          (misc_settings.output_format == 'xyz-id') or
1419                          (misc_settings.output_format == 'xyz-mol') or
1420                          (misc_settings.output_format == 'xyz-type-mol')):
1421                            # Print out the coordinates in simple 3-column text
1422                            # format
1423                        sys.stdout.write(str(len(frame_coords)) + '\n')
1424                        descr_str = 'LAMMPS data from timestep ' + frame_timestep_str
1425                        sys.stdout.write(descr_str + '\n')
1426                        for atomid, xyz in iter(sorted(frame_coords.items(), key=GetIntAtomID)):
1427
1428                            # Check and see if whether the atom is one of the
1429                            # atoms that was selected by the user. If not discard.
1430                            if not InIntervalUnion(int(atomid),
1431                                    misc_settings.atom_id_intervals):
1432                                continue
1433                            atype = frame_atomtypes[atomid]
1434                            if not InIntervalUnion(int(atype),
1435                                    misc_settings.atom_type_intervals):
1436                                continue
1437                            molid = frame_molids[atomid]
1438                            if not InIntervalUnion(int(molid),
1439                                    misc_settings.mol_id_intervals):
1440                                continue
1441
1442                            if ((misc_settings.output_format == 'xyz') or
1443                                (misc_settings.output_format == 'xyz-type)')):
1444                                atomtype = frame_atomtypes.get(atomid)
1445                                if atomtype == None:
1446                                    raise InputError('xyz ERROR: Your trajectory file lacks atom-type information\n')
1447                                first_column = str(atomtype)
1448                            elif misc_settings.output_format == 'xyz-id':
1449                                first_column = str(atomid)
1450                            elif misc_settings.output_format == 'xyz-mol':
1451                                molid = frame_molids.get(atomid)
1452                                if molid == None:
1453                                    raise InputError('-xyz-mol ERROR: Your trajectory file lacks molecule-id information.\n')
1454                                sys.stderr.write(str(molid)+'\n')
1455                                first_column = str(molid)
1456                            elif misc_settings.output_format == 'xyz-type-mol':
1457                                atomtype = frame_atomtypes.get(atomid)
1458                                if atomtype == None:
1459                                    raise InputError('xyz-type-mol ERROR: Your trajectory file lacks atom-type information.\n')
1460                                molid = frame_molids.get(atomid)
1461                                if molid == None:
1462                                    raise InputError('-xyz-type-mol ERROR: Your trajectory file lacks molecule-id information.\n')
1463                                first_column = str(atomtype)+'_'+str(molid)
1464                            if misc_settings.scale == None:
1465                                sys.stdout.write(first_column + ' ' +
1466                                                 str(xyz[0]) + ' ' +
1467                                                 str(xyz[1]) + ' ' +
1468                                                 str(xyz[2]) + '\n')
1469                            else:
1470                                # Only convert to float and back if
1471                                # misc_settings.scale != None
1472                                sys.stdout.write(first_column + ' ' +
1473                                                 str(misc_settings.scale * float(xyz[0])) + ' ' +
1474                                                 str(misc_settings.scale * float(xyz[1])) + ' ' +
1475                                                 str(misc_settings.scale * float(xyz[2])) + '\n')
1476
1477                    else:
1478                        # Parse the DATA file specified by the user
1479                        # and replace appropriate lines or fields with
1480                        # the corresponding text from the DUMP file.
1481                        descr_str = 'LAMMPS data from timestep ' + frame_timestep_str
1482                        if misc_settings.multi and (misc_settings.output_format == 'data'):
1483                            out_file_name = data_settings.file_name + '.'\
1484                                + str(num_frames_out)
1485                            sys.stderr.write(
1486                                '  (creating file \"' + out_file_name + '\")\n')
1487                            out_file = open(out_file_name, 'w')
1488                        else:
1489                            out_file = sys.stdout
1490
1491                        WriteFrameToData(out_file,
1492                                         descr_str,
1493                                         misc_settings,
1494                                         data_settings,
1495                                         dump_column_names,
1496                                         frame_natoms,
1497                                         frame_coords,
1498                                         frame_coords_ixiyiz,
1499                                         frame_vects,
1500                                         frame_velocities,
1501                                         frame_atomtypes,
1502                                         frame_molids,
1503                                         frame_xlo_str, frame_xhi_str,
1504                                         frame_ylo_str, frame_yhi_str,
1505                                         frame_zlo_str, frame_zhi_str,
1506                                         frame_xy_str, frame_xz_str, frame_yz_str)
1507
1508                        # if misc_settings.multi:
1509                        #    out_file.close()
1510
1511                # if num_frames_in >= 0:
1512                #    num_frames_in += 1
1513
1514                if read_last_frame:
1515                    exit(0)
1516
1517        if misc_settings.in_coord_file_name != '':
1518            in_coord_file.close()
1519
1520        for warning_str in warning_strings:
1521            sys.stderr.write(warning_str + '\n')
1522
1523    except (ValueError, InputError) as err:
1524        sys.stderr.write('\n' + str(err) + '\n')
1525        sys.exit(-1)
1526
1527    return
1528
1529
1530if __name__ == '__main__':
1531    main()
1532