1#!/usr/bin/env python 2# -*- coding: utf-8 -*- 3 4# Author: Andrew Jewett 5# License: MIT License (See LICENSE.md) 6# Copyright (c) 2014 7 8""" 9dump2data.py 10 11Extract dynamical degrees of freedom from a lammps DUMP file (from the stdin) 12and construct a new DATA file (to the stdout). 13A reference DATA file is needed (argument). 14 15 basic usage 16./dump2data.py orig_file.data < dump.lammpstrj > new_file.data 17 (This extract last frame, uses "full" atom_style.) 18 19 options: 20./dump2data.py [-t t -atomstyle style] orig.data < dump.lammpstrj > new.data 21 22""" 23 24 25#g_program_name = 'dump2data.py' 26g_program_name = __file__.split('/')[-1] 27g_date_str = '2021-4-27' 28g_version_str = '0.61.0' 29 30import sys 31from collections import defaultdict 32from operator import itemgetter, attrgetter 33 34 35class InputError(Exception): 36 37 def __init__(self, err_msg): 38 self.err_msg = err_msg 39 40 def __str__(self): 41 return self.err_msg 42 43 44def ErrorLeader(infile, lineno): 45 return '\"' + infile + '\", line ' + str(lineno) + ': ' 46 47 48class MiscSettings(object): 49 50 def __init__(self): 51 self.tstart = None 52 self.tstop = None 53 self.timestep_str = '' 54 self.last_frame = False 55 self.center_frame = False 56 self.output_format = 'data' 57 self.input_format = 'dump' 58 self.multi = True 59 self.skip_interval = 1 60 self.atom_type_intervals = [] 61 self.atom_id_intervals = [] 62 self.mol_id_intervals = [] 63 self.scale = None 64 self.in_coord_file_name = '' 65 66class AtomStyleSettings(object): 67 68 def __init__(self): 69 # The following new member data indicate which columns store 70 # LAMMPS-specific information. 71 # The next 6 members store keep track of the different columns 72 # of the "Atoms" section of a LAMMPS data file: 73 self.column_names = [] # <--A list of column names (optional) 74 # <--A triplet of integers indicating which columns store coordinate data 75 self.i_coords = [] 76 # self.ii_coords= [] #<--A list of triplets of column indexes storing 77 # coordinate data 78 self.ii_vects = [] # <--A list of triplets of column indexes storing directional data 79 # (such as dipole or ellipsoid orientations) 80 self.i_atomid = None # <--An integer indicating which column has the atomid 81 self.i_atomtype = None # <--An integer indicating which column has the atomtype 82 self.i_molid = None # <--An integer indicating which column has the molid, if applicable 83 84 85class DataSettings(AtomStyleSettings): 86 87 def __init__(self): 88 AtomStyleSettings.__init__(self) 89 self.contents = '' 90 self.file_name = '' 91 92 93# Atom Styles in LAMMPS as of 2011-7-29 94g_style_map = {'angle': ['atom-ID', 'molecule-ID', 'atom-type', 'x', 'y', 'z'], 95 'atomic': ['atom-ID', 'atom-type', 'x', 'y', 'z'], 96 'body': ['atom-ID', 'atom-type', 'bodyflag', 'mass', 'x', 'y', 'z'], 97 'bond': ['atom-ID', 'molecule-ID', 'atom-type', 'x', 'y', 'z'], 98 'charge': ['atom-ID', 'atom-type', 'q', 'x', 'y', 'z'], 99 'dipole': ['atom-ID', 'atom-type', 'q', 'x', 'y', 'z', 'mux', 'muy', 'muz'], 100 'dpd': ['atom-ID', 'atom-type', 'theta', 'x', 'y', 'z'], 101 'electron': ['atom-ID', 'atom-type', 'q', 'spin', 'eradius', 'x', 'y', 'z'], 102 'ellipsoid': ['atom-ID', 'atom-type', 'ellipsoidflag', 'density', 'x', 'y', 'z'], 103 'full': ['atom-ID', 'molecule-ID', 'atom-type', 'q', 'x', 'y', 'z'], 104 'line': ['atom-ID', 'molecule-ID', 'atom-type', 'lineflag', 'density', 'x', 'y', 'z'], 105 'meso': ['atom-ID', 'atom-type', 'rho', 'e', 'cv', 'x', 'y', 'z'], 106 'molecular': ['atom-ID', 'molecule-ID', 'atom-type', 'x', 'y', 'z'], 107 'peri': ['atom-ID', 'atom-type', 'volume', 'density', 'x', 'y', 'z'], 108 'smd': ['atom-ID', 'atom-type', 'molecule-ID' 'volume', 'mass', 'kernel-radius', 'contact-radius', 'x', 'y', 'z'], 109 'sphere': ['atom-ID', 'atom-type', 'diameter', 'density', 'x', 'y', 'z'], 110 'template': ['atom-ID', 'molecule-ID', 'template-index', 'template-atom', 'atom-type', 'x', 'y', 'z'], 111 'tri': ['atom-ID', 'molecule-ID', 'atom-type', 'triangleflag', 'density', 'x', 'y', 'z'], 112 'wavepacket': ['atom-ID', 'atom-type', 'charge', 'spin', 'eradius', 'etag', 'cs_re', 'cs_im', 'x', 'y', 'z'], 113 'hybrid': ['atom-ID', 'atom-type', 'x', 'y', 'z'], 114 # The following styles were removed from LAMMPS as of 2012-3 115 'colloid': ['atom-ID', 'atom-type', 'x', 'y', 'z'], 116 'granular': ['atom-ID', 'atom-type', 'diameter', 'density', 'x', 'y', 'z']} 117 118 119def AtomStyle2ColNames(atom_style_string): 120 121 atom_style_string = atom_style_string.strip() 122 if len(atom_style_string) == 0: 123 raise InputError('Error(dump2data): Invalid atom_style\n' 124 ' (The atom_style command was followed by an empty string.)\n') 125 atom_style_args = atom_style_string.split() 126 atom_style = atom_style_args[0] 127 128 hybrid_args = atom_style_args[1:] 129 130 if (atom_style not in g_style_map): 131 if (len(atom_style_args) >= 2): 132 # If the atom_style_string includes at least 2 words, then we 133 # interpret this as a list of the individual column names 134 return atom_style_args 135 else: 136 raise InputError( 137 'Error(dump2data): Unrecognized atom_style: \"' + atom_style + '\"\n') 138 139 if (atom_style != 'hybrid'): 140 return g_style_map[atom_style] 141 else: 142 column_names = ['atom-ID', 'atom-type', 'x', 'y', 'z'] 143 if (len(hybrid_args) == 0): 144 raise InputError( 145 'Error(dump2data): atom_style hybrid must be followed by a sub_style.\n') 146 for sub_style in hybrid_args: 147 if (sub_style not in g_style_map): 148 raise InputError( 149 'Error(dump2data): Unrecognized atom_style: \"' + sub_style + '\"\n') 150 for cname in g_style_map[sub_style]: 151 if cname not in column_names: 152 column_names.append(cname) 153 154 return column_names 155 156 157def ColNames2AidAtypeMolid(column_names): 158 # Because of the diversity of ways that these 159 # numbers are referred to in the LAMMPS documentation, 160 # we have to be flexible and allow the user to refer 161 # to these quantities in a variety of ways. 162 # Hopefully this covers everything: 163 164 i_atomid = None 165 if 'atom-ID' in column_names: 166 i_atomid = column_names.index('atom-ID') 167 elif 'atom−ID' in column_names: # (− is the character used in the manual) 168 i_atomid = column_names.index('atom−ID') 169 elif 'atomID' in column_names: 170 i_atomid = column_names.index('atomID') 171 elif 'atomid' in column_names: 172 i_atomid = column_names.index('atomid') 173 elif 'id' in column_names: 174 i_atomid = column_names.index('id') 175 elif 'atom' in column_names: 176 i_atomid = column_names.index('atom') 177 elif '$atom' in column_names: 178 i_atomid = column_names.index('$atom') 179 else: 180 raise InputError( 181 'Error(dump2data): List of column names lacks an \"atom-ID\"\n') 182 183 i_atomtype = None 184 if 'atom-type' in column_names: 185 i_atomtype = column_names.index('atom-type') 186 elif 'atom−type' in column_names: # (− hyphen character used in manual) 187 i_atomtype = column_names.index('atom−type') 188 elif 'atomtype' in column_names: 189 i_atomtype = column_names.index('atomtype') 190 elif 'type' in column_names: 191 i_atomtype = column_names.index('type') 192 elif '@atom' in column_names: 193 i_atomtype = column_names.index('@atom') 194 else: 195 raise InputError( 196 'Error(dump2data): List of column names lacks an \"atom-type\"\n') 197 198 i_molid = None 199 if 'molecule-ID' in column_names: 200 i_molid = column_names.index('molecule-ID') 201 elif 'molecule−ID' in column_names: # (− hyphen character used in manual) 202 i_molid = column_names.index('molecule−ID') 203 elif 'moleculeID' in column_names: 204 i_molid = column_names.index('moleculeID') 205 elif 'moleculeid' in column_names: 206 i_molid = column_names.index('moleculeid') 207 elif 'molecule' in column_names: 208 i_molid = column_names.index('molecule') 209 elif 'molID' in column_names: 210 i_molid = column_names.index('molID') 211 elif 'molid' in column_names: 212 i_molid = column_names.index('molid') 213 elif 'mol' in column_names: 214 i_molid = column_names.index('mol') 215 elif '$mol' in column_names: 216 i_molid = column_names.index('$mol') 217 else: 218 pass # some atom_types do not have a valid molecule-ID 219 220 return i_atomid, i_atomtype, i_molid 221 222 223def ColNames2Coords(column_names): 224 """ Which of the columns correspond to coordinates 225 which must be transformed using rigid-body 226 (affine: rotation + translation) transformations? 227 This function outputs a list of lists of triplets of integers. 228 229 """ 230 i_x = None 231 i_y = None 232 i_z = None 233 if 'x' in column_names: 234 i_x = column_names.index('x') 235 if 'y' in column_names: 236 i_y = column_names.index('y') 237 if 'z' in column_names: 238 i_z = column_names.index('z') 239 if (((i_x != None) != (i_y != None)) or 240 ((i_y != None) != (i_z != None)) or 241 ((i_z != None) != (i_x != None))): 242 raise InputError( 243 'Error(dump2data): columns must include \"x\", \"y\", and \"z\".\n') 244 return [[i_x, i_y, i_z]] 245 246 247def ColNames2Vects(column_names): 248 """ Which of the columns correspond to coordinates 249 which must be transformed using rotations? 250 Some coordinates like dipole moments and 251 ellipsoid orientations should only be rotated 252 (not translated). 253 This function outputs a list of lists of triplets of integers. 254 255 """ 256 vects = [] 257 i_mux = None 258 i_muy = None 259 i_muz = None 260 if 'mux' in column_names: 261 i_mux = column_names.index('mux') 262 if 'muy' in column_names: 263 i_muy = column_names.index('muy') 264 if 'muz' in column_names: 265 i_muz = column_names.index('muz') 266 if (((i_mux != None) != (i_muy != None)) or 267 ((i_muy != None) != (i_muz != None)) or 268 ((i_muz != None) != (i_mux != None))): 269 raise InputError( 270 'Error(dump2data): custom atom_style list must define mux, muy, and muz or none.\n') 271 if i_mux != None: 272 vects.append([i_mux, i_muy, i_muz]) 273 i_quati = None 274 i_quatj = None 275 i_quatk = None 276 if 'quati' in column_names: 277 i_quati = column_names.index('quati') 278 if 'quatj' in column_names: 279 i_quatj = column_names.index('quatj') 280 if 'quatk' in column_names: 281 i_quatk = column_names.index('quatk') 282 if (((i_quati != None) != (i_quatj != None)) or 283 ((i_quatj != None) != (i_quatk != None)) or 284 ((i_quatk != None) != (i_quati != None))): 285 raise InputError( 286 'Error(dump2data): custom atom_style list must define quati, quatj, and quatk or none.\n') 287 if i_quati != None: 288 vects.append([i_quati, i_quatj, i_quatk]) 289 return vects 290 291 292 293def Str2IntervalUnion(s): 294 """ 295 Convert strings like '1-3,5,9-7' into [[1,3],[5,5],[9,7]] 296 """ 297 try: 298 tokens = s.split(',') 299 intervals = [] 300 for token in tokens: 301 a_str = token 302 b_str = token 303 i_separator = token.find('-') 304 if i_separator == -1: 305 i_separator = token.find('*') 306 if i_separator == -1: 307 i_separator = token.find(':') 308 if i_separator != -1: 309 a_str = token[:i_separator] 310 b_str = token[i_separator+1:] 311 if (len(a_str) == 0) or (a_str in ('-','*',':')): 312 a = 0 313 else: 314 a = int(a_str) 315 if (len(b_str) == 0) or (b_str in ('-','*',':')): 316 b = 0 317 else: 318 b = int(b_str) 319 intervals.append((a,b)) 320 return intervals 321 except (ValueError) as err: 322 sys.stderr.write('Error: Incorrect format in numeric interval: "'+s+'"\n') 323 sys.exit(-1) 324 325 326 327 328def ParseArgs(argv, 329 misc_settings, 330 data_settings, 331 warning_strings=None): 332 333 # Loop over the remaining arguments not processed yet. 334 # These arguments are specific to the lttree.py program 335 # and are not understood by this program. 336 i = 1 337 while i < len(argv): 338 #sys.stderr.write('argv['+str(i)+'] = \"'+argv[i]+'\"\n') 339 if ((argv[i].lower() == '-atomstyle') or 340 (argv[i].lower() == '-atom_style') or 341 (argv[i].lower() == '-atom-style')): 342 in_init = [] 343 if i + 1 >= len(argv): 344 raise InputError('Error(dump2data): ' + argv[i] + ' flag should be followed by a an atom_style name.\n' 345 ' (Or single quoted string which includes a space-separated\n' 346 ' list of column names.)\n') 347 data_settings.column_names = AtomStyle2ColNames(argv[i + 1]) 348 sys.stderr.write(' \"Atoms\" column format:\n') 349 sys.stderr.write( 350 ' ' + (' '.join(data_settings.column_names)) + '\n') 351 352 # ColNames2Coords() and ColNames2Vects() generate lists of 353 # triplets of integers, storing the column numbers containing 354 # x, y, and z coordinate values, and vx,vy,vz direction vectors. 355 data_settings.ii_vects = ColNames2Vects(data_settings.column_names) 356 ii_coords = ColNames2Coords(data_settings.column_names) 357 # This program assumes that there is only one coordinate triplet 358 # (x,y,z) for each atom. Hence we assume that len(ii_coords)==1 359 assert(len(ii_coords) == 1) 360 data_settings.i_coords = ii_coords[0] 361 362 # Now figure out which columns correspond to atomid, atomtype, 363 # molid 364 data_settings.i_atomid, data_settings.i_atomtype, data_settings.i_molid = ColNames2AidAtypeMolid( 365 data_settings.column_names) 366 del(argv[i:i + 2]) 367 368 elif (argv[i].lower() == '-icoord'): 369 if i + 1 >= len(argv): 370 raise InputError('Error(dump2data): ' + argv[i] + ' flag should be followed by list of integers\n' 371 ' corresponding to column numbers for coordinates in\n' 372 ' the \"Atoms\" section of a LAMMPS data file.\n') 373 ilist = argv[i + 1].split() 374 if (len(ilist) % 3) != 0: 375 raise InputError('Error(dump2data): ' + argv[i] + ' flag should be followed by list of integers.\n' 376 ' This is usually a list of 3 intebers, but it can contain more.\n' 377 ' The number of cooridnate columns must be divisible by 3,\n' 378 ' (even if the simulation is in 2 dimensions)\n') 379 380 #ii_coords = [] 381 # for i in range(0, len(ilist)/3): 382 # cols = [ilist[3*i]+1, ilist[3*i+1]+1, ilist[3*i+2]+1] 383 # ii_coords.append(cols) 384 # if ((len(ii_coords) != 0) or (len(ii_coords[0]) != 3)): 385 # raise InputError('Error(dump2data): Argument \"'+argv[i]+'\" must be followed by exactly 3 integers.\n') 386 387 data_settings.i_coords = ilist 388 if (len(i_coords) != 3): 389 raise InputError('Error(dump2data): Argument \"' + 390 argv[i] + '\" must be followed by exactly 3 integers.\n') 391 392 data_settings.i_coords = ii_coords[0] 393 394 del(argv[i:i + 2]) 395 396 elif (argv[i].lower() == '-ivect'): 397 if i + 1 >= len(argv): 398 raise InputError('Error(dump2data): ' + argv[i] + ' flag should be followed by list of integers\n' 399 ' corresponding to column numbers for direction vectors in\n' 400 ' the \"Atoms\" section of a LAMMPS data file.\n') 401 ilist = argv[i + 1].split() 402 if (len(ilist) % 3) != 0: 403 raise InputError('Error(dump2data): ' + argv[i] + ' flag should be followed by list of integers.\n' 404 ' This is usually a list of 3 intebers, but it can contain more.\n' 405 ' The number of cooridnate columns must be divisible by 3,\n' 406 ' (even if the simulation is in 2 dimensions)\n') 407 408 data_settings.ii_vects = [] 409 for i in range(0, len(ilist) / 3): 410 cols = [ilist[3 * i] + 1, ilist[3 * i + 1] + 411 1, ilist[3 * i + 2] + 1] 412 setting.ii_vects.append(cols) 413 # This should override any earlier settings as a result of the 414 # -atomstyle argument. So you can specify a custom list of column 415 # names using -atomstyle "list of column names", and then afterwards 416 # specify which of these columns correspond to direction vectors 417 # using the "-ivect" command line argument later on. 418 # This way, in theory you should be able to read columns from 419 # new custom atom-styles that have not been invented yet. 420 # (Although I haven't tested this.) 421 422 del(argv[i:i + 2]) 423 # i_atomid is not really needed for this program, but I load it anyway 424 elif ((argv[i].lower() == '-iatomid') or 425 (argv[i].lower() == '-iid') or 426 (argv[i].lower() == '-iatom-id')): 427 if ((i + 1 >= len(argv)) or (not str.isdigit(argv[i + 1]))): 428 raise InputError('Error(dump2data): ' + argv[i] + ' flag should be followed by an integer\n' 429 ' (>=1) indicating which column in the \"Atoms\" section of a\n' 430 ' LAMMPS data file contains the atom id number (typically 1).\n' 431 ' (This argument is unnecessary if you use the -atomstyle argument.)\n') 432 i_atomid = int(argv[i + 1]) - 1 433 del(argv[i:i + 2]) 434 # i_atomtype is not really needed for this program, but I load it 435 # anyway 436 elif ((argv[i].lower() == '-iatomtype') or 437 (argv[i].lower() == '-itype') or 438 (argv[i].lower() == '-iatom-type')): 439 if ((i + 1 >= len(argv)) or (not str.isdigit(argv[i + 1]))): 440 raise InputError('Error(dump2data): ' + argv[i] + ' flag should be followed by an integer\n' 441 ' (>=1) indicating which column in the \"Atoms\" section of a\n' 442 ' LAMMPS data file contains the atom type.\n' 443 ' (This argument is unnecessary if you use the -atomstyle argument.)\n') 444 i_atomtype = int(argv[i + 1]) - 1 445 del(argv[i:i + 2]) 446 # i_molid is not really needed for this program, but I load it anyway 447 elif ((argv[i].lower() == '-imolid') or 448 (argv[i].lower() == '-imol') or 449 (argv[i].lower() == '-imol-id') or 450 (argv[i].lower() == '-imoleculeid') or 451 (argv[i].lower() == '-imolecule-id')): 452 if ((i + 1 >= len(argv)) or (not str.isdigit(argv[i + 1]))): 453 raise InputError('Error(dump2data): ' + argv[i] + ' flag should be followed by an integer\n' 454 ' (>=1) indicating which column in the \"Atoms\" section of a\n' 455 ' LAMMPS data file contains the molecule id number.\n' 456 ' (This argument is unnecessary if you use the -atomstyle argument.)\n') 457 del(argv[i:i + 2]) 458 # Which frame do we want? 459 elif (argv[i].lower() == '-t'): 460 if ((i + 1 >= len(argv)) or (not str.isdigit(argv[i + 1]))): 461 raise InputError('Error(dump2data): ' + argv[i] + ' flag should be followed by an integer indicating\n' 462 ' the frame you want to extract from the dump file (trajectory).\n' 463 ' This integer should match the timestep corresponding to the frame\n' 464 ' whose coordinates you wish to extract.\n') 465 misc_settings.timestep_str = argv[i + 1] 466 del(argv[i:i + 2]) 467 misc_settings.multi = False 468 misc_settings.last_frame = False 469 470 elif (argv[i].lower() == '-tstart'): 471 if ((i + 1 >= len(argv)) or (not str.isdigit(argv[i + 1]))): 472 raise InputError('Error(dump2data): ' + argv[i] + ' flag should be followed by an integer indicating\n' 473 ' the first frame you want to extract from the dump file (trajectory).\n' 474 ' This integer should match the timestep corresponding to the frame\n' 475 ' (after which) you wish to extract coordinates.\n') 476 misc_settings.tstart = float(argv[i + 1]) 477 del(argv[i:i + 2]) 478 misc_settings.multi = True 479 480 elif (argv[i].lower() == '-tstop'): 481 if ((i + 1 >= len(argv)) or (not str.isdigit(argv[i + 1]))): 482 raise InputError('Error(dump2data): ' + argv[i] + ' flag should be followed by an number indicating\n' 483 ' the first frame you want to extract from the dump file (trajectory).\n' 484 ' Frames after this timestep will be ignored.\n') 485 misc_settings.tstop = float(argv[i + 1]) 486 del(argv[i:i + 2]) 487 misc_settings.multi = True 488 489 elif (argv[i].lower() == '-center'): 490 misc_settings.center_frame = True 491 del(argv[i:i + 1]) 492 493 elif ((argv[i].lower() == '-raw') or (argv[i].lower() == '-rawout')): 494 misc_settings.output_format = 'raw' 495 del(argv[i:i + 1]) 496 497 elif (argv[i].lower() == '-rawin'): 498 misc_settings.input_format = 'raw' 499 misc_settings.multi = False 500 del(argv[i:i + 1]) 501 502 elif ((argv[i].lower() == '-xyz') or 503 (argv[i].lower() == '-xyz-type') or 504 (argv[i].lower() == '-xyzout')): 505 misc_settings.output_format = 'xyz' 506 del(argv[i:i + 1]) 507 508 elif (argv[i].lower() == '-xyz-id'): 509 misc_settings.output_format = 'xyz-id' 510 del(argv[i:i + 1]) 511 512 elif (argv[i].lower() == '-xyz-mol'): 513 misc_settings.output_format = 'xyz-mol' 514 del(argv[i:i + 1]) 515 516 elif (argv[i].lower() == '-xyz-type-mol'): 517 misc_settings.output_format = 'xyz-type-mol' 518 del(argv[i:i + 1]) 519 520 elif (argv[i].lower() == '-xyzin'): 521 misc_settings.input_format = 'xyz' 522 misc_settings.multi = False 523 del(argv[i:i + 1]) 524 525 elif (argv[i].lower() == '-multi'): 526 misc_settings.multi = True 527 del(argv[i:i + 1]) 528 529 elif (argv[i].lower() == '-last'): 530 misc_settings.last_frame = True 531 misc_settings.multi = False 532 del(argv[i:i + 1]) 533 534 elif (argv[i].lower() == '-interval'): 535 misc_settings.skip_interval = int(argv[i + 1]) 536 del(argv[i:i + 2]) 537 538 elif (argv[i].lower() == '-scale'): 539 misc_settings.scale = float(argv[i + 1]) 540 del(argv[i:i + 2]) 541 542 elif (argv[i].lower() == '-id'): 543 misc_settings.atom_id_intervals += Str2IntervalUnion(argv[i+1]) 544 del(argv[i:i + 2]) 545 546 elif (argv[i].lower() == '-type'): 547 misc_settings.atom_type_intervals += Str2IntervalUnion(argv[i+1]) 548 del(argv[i:i + 2]) 549 550 elif (argv[i].lower() == '-mol'): 551 misc_settings.mol_id_intervals += Str2IntervalUnion(argv[i+1]) 552 del(argv[i:i + 2]) 553 554 elif ((argv[i].lower() == '-in') or 555 (argv[i].lower() == '-dump')): 556 misc_settings.in_coord_file_name = argv[i+1] 557 del(argv[i:i + 2]) 558 559 elif ((argv[i][0] == '-') and (__name__ == "__main__")): 560 raise InputError( 561 'Error(dump2data): Unrecogized command line argument \"' + argv[i] + '\"\n') 562 else: 563 i += 1 564 565 usage_examples = \ 566""" Typical usage: 567dump2data.py orig_file.data < dump.lammpstrj > new_file.data 568 (This extracts last frame, uses "full" atom_style.) 569 Additional options: 570dump2data.py -t t -atomstyle style orig.data < dump.lammpstrj > new.data 571""" 572 573 # if __name__ == "__main__": 574 575 if (len(argv) > 2): 576 # if there are more than 2 remaining arguments, 577 # AND 578 # no other function will process the remaining argument list 579 # (ie. if __name__ == "__main__") 580 # THEN 581 raise InputError(' ----\n' 582 'ERROR(dump2data): You have too many arguments (or unrecognized arguments):\n' 583 ' \"' + (' '.join(argv)) + '\"\n' 584 ' ----\n' 585 + usage_examples) 586 elif (len(argv) < 2): 587 if misc_settings.output_format == 'data': 588 raise InputError(' ----\n' 589 'ERROR(dump2data): Problem with argument list:\n' 590 ' Expected a LAMMPS .data file as an argument.\n' 591 ' ----\n' 592 + usage_examples) 593 else: 594 in_data_file = open(argv[1], 'r') 595 data_settings.file_name = argv[1] 596 data_settings.contents = in_data_file.readlines() 597 in_data_file.close() 598 599 # end of if-then statement for "if __name__ == "__main__"" 600 601 if len(data_settings.i_coords) == 0: 602 if warning_strings != None: 603 warning_strings.append( 604 'WARNING(dump2data): atom_style unknown. (Use -atomstyle style. Assuming \"full\")') 605 warn_atom_style_unspecified = True 606 # The default atom_style is "full" 607 data_settings.column_names = AtomStyle2ColNames('full') 608 ii_coords = ColNames2Coords(data_settings.column_names) 609 # This program assumes that there is only one coordinate triplet 610 # (x,y,z) for each atom. Hence we assume that len(ii_coords)==1 611 assert(len(ii_coords) == 1) 612 data_settings.i_coords = ii_coords[0] 613 data_settings.ii_vects = ColNames2Vects(data_settings.column_names) 614 data_settings.i_atomid, data_settings.i_atomtype, data_settings.i_molid = ColNames2AidAtypeMolid( 615 data_settings.column_names) 616 617 # sys.stderr.write('########################################################\n' 618 # '## WARNING: atom_style unspecified ##\n' 619 # '## --> \"Atoms\" column data has an unknown format. ##\n' 620 # '## Assuming atom_style = \"full\" ##\n' 621 # '########################################################\n' 622 # '## To specify the \"Atoms\" column format you can: ##\n' 623 # '## 1) Use the -atom_style \"STYLE\" argument ##\n' 624 # '## where \"STYLE\" is a string indicating a LAMMPS ##\n' 625 # '## atom_style, including hybrid styles.(Standard ##\n' 626 # '## atom styles defined in 2011 are supported.) ##\n' 627 # '## 2) Use the -atom_style \"COL_LIST\" argument ##\n' 628 # '## where \"COL_LIST" is a quoted list of strings ##\n' 629 # '## indicating the name of each column. ##\n' 630 # '## Names \"x\",\"y\",\"z\" are interpreted as ##\n' 631 # '## atomic coordinates. \"mux\",\"muy\",\"muz\" ##\n' 632 # '## and \"quati\",\"quatj\",\"quatk\" are ##\n' 633 # '## interpreted as direction vectors. ##\n' 634 # '## 3) Use the -icoord \"cx cy cz...\" argument ##\n' 635 # '## where \"cx cy cz\" is a list of integers ##\n' 636 # '## indicating the column numbers for the x,y,z ##\n' 637 # '## coordinates of each atom. ##\n' 638 # '## 4) Use the -ivect \"cmux cmuy cmuz...\" argument ##\n' 639 # '## where \"cmux cmuy cmuz...\" is a list of ##\n' 640 # '## integers indicating the column numbers for ##\n' 641 # '## the vector that determines the direction of a ##\n' 642 # '## dipole or ellipsoid (ie. a rotateable vector).##\n' 643 # '## (More than one triplet can be specified. The ##\n' 644 # '## number of entries must be divisible by 3.) ##\n' 645 # '## 5) Include a ##\n' 646 # '## write(\"in_init.txt\"){atom_style ...} ##\n' 647 # '## statement in your .ttree file. ##\n' 648 # '########################################################\n') 649 650 651def GetIntAtomID(pair): 652 return int(pair[0]) 653 654 655def WriteFrameToData(out_file, 656 descr_str, 657 misc_settings, 658 data_settings, 659 dump_column_names, 660 natoms, 661 coords, 662 coords_ixiyiz, 663 vects, 664 velocities, 665 atomtypes, 666 molids, 667 xlo_str, xhi_str, 668 ylo_str, yhi_str, 669 zlo_str, zhi_str, 670 xy_str, xz_str, yz_str): 671 """ 672 Open a data file. Read the LAMMPS DATA file line by line. 673 When the line contains information which is also in the dump file, 674 replace that information with information from the dump file. 675 (Information from a dump file is stored in the arguments to this function.) 676 The resulting file also has LAMMPS DATA format. 677 678 """ 679 680 section = '' 681 firstline = True 682 for line_orig in data_settings.contents: 683 ic = line_orig.find('#') 684 if ic != -1: 685 line = line_orig[:ic] 686 else: 687 line = line_orig 688 line = line.strip() 689 690 if firstline: # Construct a new descriptive header line: 691 if descr_str != None: 692 line = descr_str 693 firstline = False 694 695 if (len(line) > 0): 696 # The initial section (section='') is assumed to be 697 # the "LAMMPS Description" section. This is where the 698 # box boundaries are specified. 699 if section == '': 700 tokens = line.split() 701 if ((len(tokens) >= 2) and 702 ((tokens[-2] == 'xlo') and (tokens[-1] == 'xhi')) and 703 ((xlo_str != None) and (xhi_str != None))): 704 tokens[0] = xlo_str 705 tokens[1] = xhi_str 706 line = ' '.join(tokens) 707 elif ((len(tokens) >= 2) and 708 ((tokens[-2] == 'ylo') and (tokens[-1] == 'yhi')) and 709 ((ylo_str != None) and (yhi_str != None))): 710 tokens[0] = ylo_str 711 tokens[1] = yhi_str 712 line = ' '.join(tokens) 713 elif ((len(tokens) >= 2) and 714 ((tokens[-2] == 'zlo') and (tokens[-1] == 'zhi')) and 715 ((zlo_str != None) and (zhi_str != None))): 716 tokens[0] = zlo_str 717 tokens[1] = zhi_str 718 line = ' '.join(tokens) 719 elif ((len(tokens) >= 3) and 720 ((tokens[-3] == 'xy') and 721 (tokens[-2] == 'xz') and 722 (tokens[-1] == 'yz')) and 723 ((xy_str != None) and 724 (xz_str != None) and 725 (yz_str != None))): 726 tokens[0] = xy_str 727 tokens[1] = xz_str 728 tokens[2] = yz_str 729 line = ' '.join(tokens) 730 if (line in set(['Masses', 'Velocities', 'Atoms', 'Ellipsoids', 731 'Bond Coeffs', 'Angle Coeffs', 732 'Dihedral Coeffs', 'Improper Coeffs', 733 'Bonds', 'Angles', 'Dihedrals', 'Impropers'])): 734 section = line 735 else: 736 if (section == 'Atoms'): 737 tokens = line.split() 738 atomid = tokens[0] 739 740 # update the atomtype and molID 741 # (which may change during the simulation) 742 if atomtypes: 743 tokens[data_settings.i_atomtype] = atomtypes[atomid] 744 if molids and data_settings.i_molid: 745 tokens[data_settings.i_molid] = molids[atomid] 746 747 if atomid in coords: 748 # Loop over all of the vector degrees of 749 # freedom of the particle, excluding coords 750 # (for example: mu_x, mu_y, mu_z, 751 # or quat_i, quat_j, quat_k) 752 # In principle, depending on the atom_style, 753 # there could be multiple vectors per atom. 754 for I in range(0, len(data_settings.ii_vects)): 755 i_vx = data_settings.ii_vects[I][0] 756 i_vy = data_settings.ii_vects[I][1] 757 i_vz = data_settings.ii_vects[I][2] 758 if atomid in vects: 759 vxvyvz = vects[atomid][I] 760 assert((type(vxvyvz) is tuple) and 761 (len(vxvyvz) == 3)) 762 if ((i_vx >= len(tokens)) or 763 (i_vy >= len(tokens)) or 764 (i_vz >= len(tokens))): 765 raise InputError('Error(dump2data): Atom style incompatible with data file.\n' 766 ' Specify the atom_style using -atomstyle style.\n') 767 768 # Replace the vector components with numbers 769 # from the dump file 770 tokens[i_vx] = vxvyvz[0] 771 tokens[i_vy] = vxvyvz[1] 772 tokens[i_vz] = vxvyvz[2] 773 774 else: 775 if (dump_column_names and 776 (data_settings.column_names[ 777 i_vx] not in dump_column_names)): 778 raise InputError('Error(dump2data): You have a vector coordinate in your DATA file named \"' + data_settings.column_names[i_vx] + '\"\n' 779 ' However there are no columns with this name in your DUMP file\n' 780 ' (or the column was not in the expected place).\n' 781 ' Hence, the atom styles in the dump and data files do not match.') 782 783 # Now loop over the coordinates of each atom. 784 # for I in range(0,len(data_settings.ii_coords)): 785 # xyz = coords[atomid][I] 786 # THIS LOOP IS SILLY. 787 # EACH ATOM ONLY HAS ONE SET OF X,Y,Z 788 # COORDINATES. COMMENTING OUT THIS LOOP: 789 # i_x = data_settings.ii_coords[I][0] 790 # i_y = data_settings.ii_coords[I][1] 791 # i_z = data_settings.ii_coords[I][2] 792 # USING THIS INSTEAD: 793 794 xyz = coords[atomid] 795 i_x = data_settings.i_coords[0] 796 i_y = data_settings.i_coords[1] 797 i_z = data_settings.i_coords[2] 798 if ((i_x >= len(tokens)) or 799 (i_y >= len(tokens)) or 800 (i_z >= len(tokens))): 801 raise InputError('Error(dump2data): Atom style incompatible with data file.\n' 802 ' Specify the atom_style using -atomstyle style.\n') 803 # Replace the coordinates with coordinates from 804 # the dump file into tokens[i_x]... 805 tokens[i_x] = str(xyz[0]) 806 tokens[i_y] = str(xyz[1]) 807 tokens[i_z] = str(xyz[2]) 808 809 # Are there there any integer coords 810 # (ix, iy, iz) in the dump file? 811 if coords_ixiyiz[atomid]: 812 assert(len(coords_ixiyiz[atomid]) == 3) 813 # Integer coords stored in the DATA file too? 814 if len(tokens) == (len(data_settings.column_names) + 3): 815 # Then replace the last 3 columns of the 816 # line in the data file with: ix iy iz 817 tokens[-3] = coords_ixiyiz[atomid][0] 818 tokens[-2] = coords_ixiyiz[atomid][1] 819 tokens[-1] = coords_ixiyiz[atomid][2] 820 else: 821 if (not misc_settings.center_frame): 822 # Append them to the end of the line: 823 tokens.append(coords_ixiyiz[atomid][0]) 824 tokens.append(coords_ixiyiz[atomid][1]) 825 tokens.append(coords_ixiyiz[atomid][2]) 826 827 # Now finally paste all the tokens together: 828 line = ' '.join(tokens) 829 830 elif (section == 'Velocities'): 831 tokens = line.split() 832 atomid = tokens[0] 833 if atomid in velocities: 834 835 vxvyvz = velocities[atomid] 836 if len(tokens) < 4: 837 raise InputError( 838 'Error(dump2data): Not enough columns in the \"Velocities\" file.\n') 839 # Replace the coordinates with coordinates from 840 # the dump file into tokens[i_x]... 841 tokens[1] = str(vxvyvz[0]) 842 tokens[2] = str(vxvyvz[1]) 843 tokens[3] = str(vxvyvz[2]) 844 845 # Now finally paste all the tokens together: 846 line = ' '.join(tokens) 847 848 elif section == '': 849 # If section == '', then we are in the Header section 850 # of the DATA file. In that case, the modifications that 851 # were made to that line were handled earlier, and there 852 # is nothing left to do here. 853 pass 854 855 else: # If we are not in one of the sections containing 856 # data that comes from the dump file (eg 'Atoms', 857 # 'Velocities'), then copy the text directly from 858 # the original data file without modification. 859 # Since "line" has any comments and whitespace removed, 860 # replace it with "line_orig" which has everything. 861 line = line_orig.rstrip('\n') 862 863 out_file.write(line + '\n') 864 865 return 866 867 868 869 870def InIntervalUnion(i, intervals): 871 """ 872 Return whether integer i lies within in at least one of the intervals. 873 (Note: If 0 appears in the upper bound of an interval, it means +infinity.) 874 """ 875 if len(intervals) == 0: 876 return True 877 accept = False 878 for interval in intervals: 879 assert(len(interval) == 2) 880 if (interval[0] <= i) and ((i <= interval[1]) or (interval[1] == 0)): 881 accept = True 882 return accept 883 884 885 886def main(): 887 sys.stderr.write(g_program_name + ' v' + 888 g_version_str + ' ' + g_date_str + ' ') 889 # if sys.version < '3': 890 # sys.stderr.write(' (python version < 3)\n') 891 # else: 892 sys.stderr.write('\n') 893 894 try: 895 data_settings = DataSettings() 896 misc_settings = MiscSettings() 897 warning_strings = [] 898 ParseArgs(sys.argv, 899 misc_settings, 900 data_settings, 901 warning_strings) 902 903 # Open the lammps dump file (trajectory file) 904 # Skip to the line containing the correct frame/timestep. 905 # (this is the last frame by default). 906 # Read the "BOX BOUNDS" and the "ATOMS" sections. 907 # Store the x,y,z coordinates in the "coords" associative array 908 # (indexed by atom id, which could be non-numeric in general). 909 910 section = '' 911 912 #coords = defaultdict(list) 913 #coords_ixiyiz = defaultdict(list) 914 #vects = defaultdict(list) 915 #xlo_str = xhi_str = ylo_str = yhi_str = zlo_str = zhi_str = None 916 #xy_str = xz_str = yz_str = None 917 #natoms = -1 918 #timestep_str = '' 919 920 frame_coords = defaultdict(list) 921 frame_coords_ixiyiz = defaultdict(list) 922 frame_vects = defaultdict(list) 923 frame_velocities = defaultdict(list) 924 frame_atomtypes = defaultdict(list) 925 frame_molids = defaultdict(list) 926 frame_xlo_str = frame_xhi_str = None 927 frame_ylo_str = frame_yhi_str = None 928 frame_zlo_str = frame_zhi_str = None 929 frame_xy_str = frame_xz_str = frame_yz_str = None 930 frame_natoms = -1 931 frame_timestep_str = '' 932 i_atomid = i_atomtype = i_molid = -1 933 i_x = i_y = i_z = i_xu = i_yu = i_zu = -1 934 i_xs = i_ys = i_zs = i_xsu = i_ysu = i_zsu = -1 935 936 dump_column_names = [] 937 938 #num_frames_in = -1 939 num_frames_out = 0 940 finished_reading_frame = False 941 read_last_frame = False 942 943 if misc_settings.in_coord_file_name != '': 944 in_coord_file = open(misc_settings.in_coord_file_name) 945 else: 946 in_coord_file = sys.stdin 947 948 while True: 949 950 line = in_coord_file.readline() 951 if line == '': # if EOF 952 if len(frame_coords) > 0: 953 finished_reading_frame = True 954 read_last_frame = True 955 956 line = line.strip() 957 if (line.find('ITEM:') == 0): 958 section = line 959 if (section.find('ITEM: ATOMS ') == 0): 960 dump_column_names = line[12:].split() 961 i_atomid, i_atomtype, i_molid = \ 962 ColNames2AidAtypeMolid(dump_column_names) 963 #ii_coords = ColNames2Coords(dump_column_names) 964 965 x_already_unwrapped = False 966 y_already_unwrapped = False 967 z_already_unwrapped = False 968 969 if 'x' in dump_column_names: 970 i_x = dump_column_names.index('x') 971 elif 'xu' in dump_column_names: 972 i_xu = dump_column_names.index('xu') 973 x_already_unwrapped = True 974 elif 'xs' in dump_column_names: 975 i_xs = dump_column_names.index('xs') 976 elif 'xsu' in dump_column_names: 977 i_xsu = dump_column_names.index('xsu') 978 x_already_unwrapped = True 979 else: 980 raise InputError('Error(dump2data): \"ATOMS\" section of dump file lacks a \"x\" column.\n' + 981 ' (excerpt below)\n' + line) 982 983 if 'y' in dump_column_names: 984 i_y = dump_column_names.index('y') 985 elif 'yu' in dump_column_names: 986 i_yu = dump_column_names.index('yu') 987 y_already_unwrapped = True 988 elif 'ys' in dump_column_names: 989 i_ys = dump_column_names.index('ys') 990 elif 'ysu' in dump_column_names: 991 i_ysu = dump_column_names.index('ysu') 992 y_already_unwrapped = True 993 else: 994 raise InputError('Error(dump2data): \"ATOMS\" section of dump file lacks a \"y\" column.\n' + 995 ' (excerpt below)\n' + line) 996 997 if 'z' in dump_column_names: 998 i_z = dump_column_names.index('z') 999 elif 'zu' in dump_column_names: 1000 i_zu = dump_column_names.index('zu') 1001 z_already_unwrapped = True 1002 elif 'zs' in dump_column_names: 1003 i_zs = dump_column_names.index('zs') 1004 elif 'zsu' in dump_column_names: 1005 i_zsu = dump_column_names.index('zsu') 1006 z_already_unwrapped = True 1007 else: 1008 raise InputError('Error(dump2data): \"ATOMS\" section of dump file lacks a \"z\" column.\n' + 1009 ' (excerpt below)\n' + line) 1010 1011 ii_vects = ColNames2Vects(dump_column_names) 1012 if (len(ii_vects) != len(data_settings.ii_vects)): 1013 raise InputError('Error(dump2data): atom styles in data and dump files differ.\n' 1014 ' Some needed columns from the atom_styles are missing in the dump file.') 1015 1016 i_ix = i_iy = i_iz = -1 1017 if 'ix' in dump_column_names: 1018 i_ix = dump_column_names.index('ix') 1019 if 'iy' in dump_column_names: 1020 i_iy = dump_column_names.index('iy') 1021 if 'iz' in dump_column_names: 1022 i_iz = dump_column_names.index('iz') 1023 1024 i_vx = i_vy = i_vz = -1 1025 if 'vx' in dump_column_names: 1026 i_vx = dump_column_names.index('vx') 1027 if 'vy' in dump_column_names: 1028 i_vy = dump_column_names.index('vy') 1029 if 'vz' in dump_column_names: 1030 i_vz = dump_column_names.index('vz') 1031 1032 elif (section.find('ITEM: BOX BOUNDS') == 0): 1033 avec = [1.0, 0.0, 0.0] 1034 bvec = [0.0, 1.0, 0.0] 1035 cvec = [0.0, 0.0, 1.0] 1036 1037 elif (section.find('ITEM: TIMESTEP') == 0): 1038 if len(frame_coords) > 0: 1039 finished_reading_frame = True 1040 1041 elif ((len(line) > 0) and (line[0] != '#')): 1042 if (section.find('ITEM: TIMESTEP') == 0): 1043 finished_reading_frame = False 1044 frame_timestep_str = line 1045 frame_coords = defaultdict(list) 1046 frame_coords_ixiyiz = defaultdict(list) 1047 frame_vects = defaultdict(list) 1048 frame_velocities = defaultdict(list) 1049 frame_atomtypes = defaultdict(list) 1050 frame_molids = defaultdict(list) 1051 frame_xlo_str = frame_xhi_str = None 1052 frame_ylo_str = frame_yhi_str = None 1053 frame_zlo_str = frame_zhi_str = None 1054 frame_xy_str = frame_xz_str = frame_yz_str = None 1055 1056 elif (section == 'ITEM: NUMBER OF ATOMS'): 1057 frame_natoms = int(line) 1058 1059 elif (section.find('ITEM: BOX BOUNDS') == 0): 1060 is_triclinic = (section.find('xy xz yz') == 0) 1061 1062 tokens = line.split() 1063 if not frame_xlo_str: 1064 assert(not frame_xhi_str) 1065 frame_xlo_str = tokens[0] 1066 frame_xhi_str = tokens[1] 1067 avec = [float(frame_xhi_str) - float(frame_xlo_str), 1068 0.0, 1069 0.0] 1070 if (is_triclinic and (len(tokens) > 2)): 1071 frame_xy_str = tokens[2] 1072 # We will use this to recompute avec,bvec later. See 1073 # https://lammps.sandia.gov/doc/Howto_triclinic.html 1074 1075 elif not frame_ylo_str: 1076 assert(not frame_yhi_str) 1077 frame_ylo_str = tokens[0] 1078 frame_yhi_str = tokens[1] 1079 bvec = [0.0, 1080 float(frame_yhi_str) - float(frame_ylo_str), 1081 0.0] 1082 if (is_triclinic and (len(tokens) > 2)): 1083 frame_xz_str = tokens[2] 1084 # We will use this to recompute bvec later. See: 1085 # https://lammps.sandia.gov/doc/Howto_triclinic.html 1086 1087 elif not frame_zlo_str: 1088 assert(not frame_zhi_str) 1089 frame_zlo_str = tokens[0] 1090 frame_zhi_str = tokens[1] 1091 cvec = [0.0, 1092 0.0, 1093 float(frame_zhi_str) - float(frame_zlo_str)] 1094 1095 if is_triclinic: 1096 # https://lammps.sandia.gov/doc/Howto_triclinic.html 1097 frame_yz_str = "0.0" 1098 if len(tokens) > 2: 1099 frame_yz_str = tokens[2] 1100 # If we are using triclinic boundary conditions, 1101 # then we need to update the way we compute the 1102 # lattice vectors: avec, bvec, cvec. 1103 # This was not possible to do until we had read: 1104 # frame_xy_str, frame_xz_str, and frame_yz_str. 1105 # 1106 # Now that we have read all 3 of them, recompute 1107 # the avec, bvec, cvec vectors according to: 1108 # https://lammps.sandia.gov/doc/Howto_triclinic.html 1109 # 1110 # When triclinic boundary conditions are used, 1111 # the "BOX BOUNDS" section of the dump file stores 1112 # xlo_bound, xhi_bound, ylo_bound, and yhi_bound 1113 # instead of xlo, xhi, ylo, yhi. So we need to use 1114 # the following formula to calculate xlo,xhi,ylo,yhi 1115 xlo_bound = float(frame_xlo_str) 1116 xhi_bound = float(frame_xhi_str) 1117 ylo_bound = float(frame_ylo_str) 1118 yhi_bound = float(frame_yhi_str) 1119 zlo_bound = float(frame_zlo_str) 1120 zhi_bound = float(frame_zhi_str) 1121 xy = float(frame_xy_str) 1122 xz = float(frame_xz_str) 1123 yz = float(frame_yz_str) 1124 xlo = xlo_bound - min(0.0, xy, xz, xy+xz) 1125 xhi = xhi_bound - max(0.0, xy, xz, xy+xz) 1126 ylo = ylo_bound - min(0.0, yz) 1127 yhi = yhi_bound - max(0.0, yz) 1128 zlo = zlo_bound 1129 zhi = zhi_bound 1130 # now update "frame_xlo_str" and the other strings: 1131 frame_xlo_str = str(xlo) 1132 frame_xhi_str = str(xhi) 1133 frame_ylo_str = str(ylo) 1134 frame_yhi_str = str(yhi) 1135 frame_zlo_str = str(zlo) 1136 frame_zhi_str = str(zhi) 1137 # and compute the avec,bvec,cvec unit cell vectors 1138 avec = [xhi-xlo, 0.0, 0.0] 1139 bvec = [xy, yhi-ylo, 0.0] 1140 cvec = [xz, yz, zhi-zlo] 1141 1142 # sys.stderr.write('avec='+str(avec)+'\n') 1143 # sys.stderr.write('bvec='+str(bvec)+'\n') 1144 # sys.stderr.write('cvec='+str(cvec)+'\n') 1145 1146 elif (section.find('ITEM: ATOMS') == 0): 1147 tokens = line.split() 1148 atomid = tokens[i_atomid] 1149 atomtype = tokens[i_atomtype] 1150 frame_atomtypes[atomid] = atomtype 1151 if i_molid: 1152 molid = tokens[i_molid] 1153 frame_molids[atomid] = molid 1154 1155 if ((i_x != -1) and (i_y != -1) and (i_z != -1)): 1156 x = float(tokens[i_x]) # i_x determined above 1157 y = float(tokens[i_y]) 1158 z = float(tokens[i_z]) 1159 1160 elif ((i_xu != -1) and (i_yu != -1) and (i_zu != -1)): 1161 x = float(tokens[i_xu]) # i_x determined above 1162 y = float(tokens[i_yu]) 1163 z = float(tokens[i_zu]) 1164 1165 elif ((i_xs != -1) and (i_ys != -1) and (i_zs != -1)): 1166 xs = float(tokens[i_xs]) # i_xs determined above 1167 ys = float(tokens[i_ys]) 1168 zs = float(tokens[i_zs]) 1169 1170 x = float(frame_xlo_str) + xs * \ 1171 avec[0] + ys * bvec[0] + zs * cvec[0] 1172 y = float(frame_ylo_str) + xs * \ 1173 avec[1] + ys * bvec[1] + zs * cvec[1] 1174 z = float(frame_zlo_str) + xs * \ 1175 avec[2] + ys * bvec[2] + zs * cvec[2] 1176 1177 # avec, bvec, cvec described here: 1178 # https://lammps.sandia.gov/doc/Howto_triclinic.html 1179 1180 elif ((i_xsu != -1) and (i_ysu != -1) and (i_zsu != -1)): 1181 xsu = float(tokens[i_xsu]) # i_xs determined above 1182 ysu = float(tokens[i_ysu]) 1183 zsu = float(tokens[i_zsu]) 1184 1185 x = float(frame_xlo_str) + xsu * \ 1186 avec[0] + ysu * bvec[0] + zsu * cvec[0] 1187 y = float(frame_ylo_str) + xsu * \ 1188 avec[1] + ysu * bvec[1] + zsu * cvec[1] 1189 z = float(frame_zlo_str) + xsu * \ 1190 avec[2] + ysu * bvec[2] + zsu * cvec[2] 1191 1192 # Now deal with ix, iy, iz 1193 if (i_ix != -1) and (not x_already_unwrapped): 1194 ix = int(tokens[i_ix]) 1195 if (misc_settings.center_frame or 1196 (misc_settings.output_format != 'data')): 1197 #sys.stderr.write('atomid='+str(atomid)+', ix = '+str(ix)+', avec='+str(avec)+'\n') 1198 x += ix * avec[0] 1199 y += ix * avec[1] 1200 z += ix * avec[2] 1201 else: 1202 if atomid not in frame_coords_ixiyiz: 1203 frame_coords_ixiyiz[atomid] = ["0", "0", "0"] 1204 frame_coords_ixiyiz[atomid][0] = str(ix) 1205 1206 if (i_iy != -1) and (not y_already_unwrapped): 1207 iy = int(tokens[i_iy]) 1208 if (misc_settings.center_frame or 1209 (misc_settings.output_format != 'data')): 1210 #sys.stderr.write('atomid='+str(atomid)+', iy = '+str(iy)+', bvec='+str(bvec)+'\n') 1211 x += iy * bvec[0] 1212 y += iy * bvec[1] 1213 z += iy * bvec[2] 1214 else: 1215 if atomid not in frame_coords_ixiyiz: 1216 frame_coords_ixiyiz[atomid] = ["0", "0", "0"] 1217 frame_coords_ixiyiz[atomid][1] = str(iy) 1218 1219 if (i_iz != -1) and (not z_already_unwrapped): 1220 iz = int(tokens[i_iz]) 1221 if (misc_settings.center_frame or 1222 (misc_settings.output_format != 'data')): 1223 #sys.stderr.write('atomid='+str(atomid)+', iz = '+str(iz)+', cvec='+str(cvec)+'\n') 1224 x += iz * cvec[0] 1225 y += iz * cvec[1] 1226 z += iz * cvec[2] 1227 else: 1228 if atomid not in frame_coords_ixiyiz: 1229 frame_coords_ixiyiz[atomid] = ["0", "0", "0"] 1230 frame_coords_ixiyiz[atomid][2] = str(iz) 1231 1232 #frame_coords[atomid] = [str(x), str(y), str(z)] 1233 frame_coords[atomid] = [x, y, z] 1234 1235 vx = 0.0 1236 vy = 0.0 1237 vz = 0.0 1238 if i_vx != -1: 1239 vx = float(tokens[i_vx]) 1240 if i_vy != -1: 1241 vy = float(tokens[i_vy]) 1242 if i_vz != -1: 1243 vz = float(tokens[i_vz]) 1244 1245 frame_velocities[atomid] = [vx, vy, vz] 1246 1247 # NOTE: 1248 # There can be multiple "vects" associated with each atom 1249 # (for example, dipole moments, ellipsoid directions, etc..) 1250 1251 if atomid not in frame_vects: 1252 frame_vects[atomid] = [ 1253 None for I in range(0, len(ii_vects))] 1254 1255 for I in range(0, len(ii_vects)): 1256 i_vx = ii_vects[I][0] 1257 i_vy = ii_vects[I][1] 1258 i_vz = ii_vects[I][2] 1259 vx_str = tokens[i_vx] 1260 vy_str = tokens[i_vy] 1261 vz_str = tokens[i_vz] 1262 1263 # Now the annoying part: 1264 # Which vect is it (mux,muy,muz) or (quati,quatj,quatk)? 1265 # The columns could be listed in a different order 1266 # in the data file and in the dump file. 1267 # Figure out which vector it is in the data file (stored 1268 # in the integer "I_data") so that column names match. 1269 name_vx = dump_column_names[i_vx] 1270 name_vy = dump_column_names[i_vy] 1271 name_vz = dump_column_names[i_vz] 1272 i_vx_data = 0 1273 I_data = -1 1274 # This code is dreadful and inneficient. 1275 # I never want to touch this code again. (Hope it 1276 # works) 1277 while i_vx_data < len(data_settings.column_names): 1278 if name_vx == data_settings.column_names[i_vx_data]: 1279 I_data = 0 1280 while I_data < len(data_settings.ii_vects): 1281 if (dump_column_names[ii_vects[I][0]] == 1282 data_settings.column_names[data_settings.ii_vects[I_data][0]]): 1283 # (This checks the first component, ([0]) I should also 1284 # check [1] and [2], but the code is slow enough already. 1285 # THIS IS TERRIBLE CODE. 1286 break 1287 I_data += 1 1288 1289 if (0 <= I_data) and (I_data < len(data_settings.ii_vects)): 1290 break 1291 1292 i_vx_data += 1 1293 1294 if (0 <= I_data) and (I_data < len(data_settings.ii_vects)): 1295 frame_vects[atomid][I_data] = (vx_str,vy_str,vz_str) 1296 else: 1297 raise InputError('Error(dump2data): You have a vector coordinate in your dump file named \"' + name_vx + '\"\n' 1298 ' However there are no columns with this name in your data file\n' 1299 ' (or the column was not in the expected place).\n' 1300 ' Hence, the atom styles in the dump and data files do not match.') 1301 1302 if finished_reading_frame: 1303 1304 if misc_settings.scale != None: 1305 for atomid in frame_coords: 1306 for d in range(0, 3): 1307 crd = float(frame_coords[atomid][d]) 1308 frame_coords[atomid][d]=str(crd*misc_settings.scale) 1309 1310 if len(frame_coords) != frame_natoms: 1311 err_msg = 'Number of lines in \"ITEM: ATOMS\" section disagrees with\n' \ 1312 + ' \"ITEM: NUMBER OF ATOMS\" declared earlier in this file.\n' 1313 raise InputError(err_msg) 1314 1315 if misc_settings.center_frame: 1316 cm = [0.0, 0.0, 0.0] 1317 for atomid in frame_coords: 1318 for d in range(0, 3): 1319 cm[d] += float(frame_coords[atomid][d]) 1320 for d in range(0, 3): 1321 cm[d] /= float(len(frame_coords)) 1322 for atomid in frame_coords: 1323 for d in range(0, 3): 1324 frame_coords[atomid][d] = "%.7g" % ( 1325 float(frame_coords[atomid][d]) - cm[d]) 1326 frame_coords_ixiyiz[atomid] = ["0", "0", "0"] 1327 1328 if misc_settings.output_format != 'data': 1329 frame_coords_ixiyiz[atomid] = ["0", "0", "0"] 1330 1331 # if (num_frames_in == -1): 1332 # if (misc_settings.timestep_str != ''): 1333 # if (float(frame_timestep_str) >= 1334 # float(misc_settings.timestep_str)): 1335 # num_frames_in = 1 1336 # if not misc_settings.multi: 1337 # read_last_frame = True 1338 # else: 1339 # num_frames_in = 1 1340 1341 # Should we write out the coordinates in this frame? 1342 write_this_frame = False 1343 1344 if misc_settings.multi: 1345 1346 write_this_frame = True 1347 if (misc_settings.tstart and 1348 (int(frame_timestep_str) < misc_settings.tstart)): 1349 write_this_frame = False 1350 if (misc_settings.tstop and 1351 (int(frame_timestep_str) > misc_settings.tstop)): 1352 write_this_frame = False 1353 read_last_frame = True 1354 1355 if misc_settings.tstart: 1356 tstart = misc_settings.tstart 1357 else: 1358 tstart = 0 1359 1360 if ((int(frame_timestep_str) - tstart) 1361 % 1362 misc_settings.skip_interval) != 0: 1363 write_this_frame = False 1364 1365 else: 1366 if misc_settings.last_frame: 1367 if read_last_frame: 1368 write_this_frame = True 1369 else: 1370 assert(misc_settings.timestep_str) 1371 if (int(frame_timestep_str) >= 1372 int(misc_settings.timestep_str)): 1373 write_this_frame = True 1374 read_last_frame = True 1375 1376 if write_this_frame: 1377 1378 num_frames_out += 1 1379 1380 sys.stderr.write(' (writing frame ' + str(num_frames_out) + 1381 ' at timestep ' + frame_timestep_str + ')\n') 1382 1383 # Print the frame 1384 # First check which format to output the data: 1385 if misc_settings.output_format == 'raw': 1386 # Print out the coordinates in simple 3-column text 1387 # format 1388 for atomid, xyz in iter(sorted(frame_coords.items(), key=GetIntAtomID)): 1389 # Check and see if whether the atom is one of the 1390 # atoms that was selected by the user. If not discard. 1391 # (I don't offer this feature for 'data' files because 1392 # it is harder to implement for this file type.) 1393 if not InIntervalUnion(int(atomid), 1394 misc_settings.atom_id_intervals): 1395 continue 1396 atype = frame_atomtypes[atomid] 1397 if not InIntervalUnion(int(atype), 1398 misc_settings.atom_type_intervals): 1399 continue 1400 molid = frame_molids[atomid] 1401 if not InIntervalUnion(int(molid), 1402 misc_settings.mol_id_intervals): 1403 continue 1404 1405 # Write the coordinates 1406 if misc_settings.scale == None: 1407 sys.stdout.write( 1408 str(xyz[0]) + ' ' + str(xyz[1]) + ' ' + str(xyz[2]) + '\n') 1409 else: 1410 # Only convert to float and back if 1411 # misc_settings.scale != None 1412 sys.stdout.write(str(misc_settings.scale * float(xyz[0])) + ' ' + 1413 str(misc_settings.scale * float(xyz[1])) + ' ' + 1414 str(misc_settings.scale * float(xyz[2])) + '\n') 1415 sys.stdout.write('\n') 1416 1417 elif ((misc_settings.output_format == 'xyz') or 1418 (misc_settings.output_format == 'xyz-id') or 1419 (misc_settings.output_format == 'xyz-mol') or 1420 (misc_settings.output_format == 'xyz-type-mol')): 1421 # Print out the coordinates in simple 3-column text 1422 # format 1423 sys.stdout.write(str(len(frame_coords)) + '\n') 1424 descr_str = 'LAMMPS data from timestep ' + frame_timestep_str 1425 sys.stdout.write(descr_str + '\n') 1426 for atomid, xyz in iter(sorted(frame_coords.items(), key=GetIntAtomID)): 1427 1428 # Check and see if whether the atom is one of the 1429 # atoms that was selected by the user. If not discard. 1430 if not InIntervalUnion(int(atomid), 1431 misc_settings.atom_id_intervals): 1432 continue 1433 atype = frame_atomtypes[atomid] 1434 if not InIntervalUnion(int(atype), 1435 misc_settings.atom_type_intervals): 1436 continue 1437 molid = frame_molids[atomid] 1438 if not InIntervalUnion(int(molid), 1439 misc_settings.mol_id_intervals): 1440 continue 1441 1442 if ((misc_settings.output_format == 'xyz') or 1443 (misc_settings.output_format == 'xyz-type)')): 1444 atomtype = frame_atomtypes.get(atomid) 1445 if atomtype == None: 1446 raise InputError('xyz ERROR: Your trajectory file lacks atom-type information\n') 1447 first_column = str(atomtype) 1448 elif misc_settings.output_format == 'xyz-id': 1449 first_column = str(atomid) 1450 elif misc_settings.output_format == 'xyz-mol': 1451 molid = frame_molids.get(atomid) 1452 if molid == None: 1453 raise InputError('-xyz-mol ERROR: Your trajectory file lacks molecule-id information.\n') 1454 sys.stderr.write(str(molid)+'\n') 1455 first_column = str(molid) 1456 elif misc_settings.output_format == 'xyz-type-mol': 1457 atomtype = frame_atomtypes.get(atomid) 1458 if atomtype == None: 1459 raise InputError('xyz-type-mol ERROR: Your trajectory file lacks atom-type information.\n') 1460 molid = frame_molids.get(atomid) 1461 if molid == None: 1462 raise InputError('-xyz-type-mol ERROR: Your trajectory file lacks molecule-id information.\n') 1463 first_column = str(atomtype)+'_'+str(molid) 1464 if misc_settings.scale == None: 1465 sys.stdout.write(first_column + ' ' + 1466 str(xyz[0]) + ' ' + 1467 str(xyz[1]) + ' ' + 1468 str(xyz[2]) + '\n') 1469 else: 1470 # Only convert to float and back if 1471 # misc_settings.scale != None 1472 sys.stdout.write(first_column + ' ' + 1473 str(misc_settings.scale * float(xyz[0])) + ' ' + 1474 str(misc_settings.scale * float(xyz[1])) + ' ' + 1475 str(misc_settings.scale * float(xyz[2])) + '\n') 1476 1477 else: 1478 # Parse the DATA file specified by the user 1479 # and replace appropriate lines or fields with 1480 # the corresponding text from the DUMP file. 1481 descr_str = 'LAMMPS data from timestep ' + frame_timestep_str 1482 if misc_settings.multi and (misc_settings.output_format == 'data'): 1483 out_file_name = data_settings.file_name + '.'\ 1484 + str(num_frames_out) 1485 sys.stderr.write( 1486 ' (creating file \"' + out_file_name + '\")\n') 1487 out_file = open(out_file_name, 'w') 1488 else: 1489 out_file = sys.stdout 1490 1491 WriteFrameToData(out_file, 1492 descr_str, 1493 misc_settings, 1494 data_settings, 1495 dump_column_names, 1496 frame_natoms, 1497 frame_coords, 1498 frame_coords_ixiyiz, 1499 frame_vects, 1500 frame_velocities, 1501 frame_atomtypes, 1502 frame_molids, 1503 frame_xlo_str, frame_xhi_str, 1504 frame_ylo_str, frame_yhi_str, 1505 frame_zlo_str, frame_zhi_str, 1506 frame_xy_str, frame_xz_str, frame_yz_str) 1507 1508 # if misc_settings.multi: 1509 # out_file.close() 1510 1511 # if num_frames_in >= 0: 1512 # num_frames_in += 1 1513 1514 if read_last_frame: 1515 exit(0) 1516 1517 if misc_settings.in_coord_file_name != '': 1518 in_coord_file.close() 1519 1520 for warning_str in warning_strings: 1521 sys.stderr.write(warning_str + '\n') 1522 1523 except (ValueError, InputError) as err: 1524 sys.stderr.write('\n' + str(err) + '\n') 1525 sys.exit(-1) 1526 1527 return 1528 1529 1530if __name__ == '__main__': 1531 main() 1532