1#!/bin/env python
2#
3# Build a super cell of a given PDB file.
4# Because of peculiarities of NWChem we need to produce a
5# super cell by creating more atoms with translated coordinates.
6# Then we need to write all the non-water atoms out first
7# followed by all water atoms.
8#
9def read_pdb_file(filename):
10    '''
11    Read the contents of the PDB file and return a list of lines.
12    '''
13    fp = open(filename,'r')
14    pdb = fp.readlines()
15    fp.close()
16    return pdb
17
18def get_cell_dimensions(pdb):
19    '''
20    Take the PDB file as a list of lines.
21    Read the cell dimensions from the first line (starting with "CRYST1")
22    and return this as a tuple.
23    '''
24    line = pdb[0]
25    elements = line.split()
26    x = float(elements[1])
27    y = float(elements[2])
28    z = float(elements[3])
29    return (x,y,z)
30
31def gen_translation(direction,pdb):
32    '''
33    Generate the translation vector based on the choice of direction (given as a string),
34    and the cell dimension of the PDB file.
35    Return this a tuple.
36    '''
37    (x,y,z) = get_cell_dimensions(pdb)
38    if direction == "x":
39        trans = (x,0.0,0.0)
40    elif direction == "y":
41        trans = (0.0,y,0.0)
42    elif direction == "z":
43        trans = (0.0,0.0,z)
44    else:
45        print("Interesting coordinate system you are using. Your choice of direction is %s\n"%direction)
46    return trans
47
48def new_cell_dimensions(direction,pdb):
49    '''
50    Given the PDB data and the direction compute the new cell dimensions
51    after extending the system in the given dimension.
52    Return the new dimensions as a tuple.
53    '''
54    (x,y,z) = get_cell_dimensions(pdb)
55    if direction == "x":
56        cell = (2*x,y,z)
57    elif direction == "y":
58        cell = (x,2*y,z)
59    elif direction == "z":
60        cell = (x,y,2*z)
61    else:
62        print("Interesting coordinate system you are using. Your choice of direction is %s\n"%direction)
63    return cell
64
65def split_pdb(pdb):
66    '''
67    Given the PDB file, separate the contents into three parts:
68    - the cell definition
69    - the solute atoms
70    - the solvent atoms
71    Return these results as a tuple of lists of lines.
72    '''
73    cryst   = []
74    solute  = []
75    solvent = []
76    for line in pdb:
77        elements = line.split()
78        if elements[0] == "CRYST1":
79            cryst.append(line)
80        elif elements[0] == "ATOM":
81            if elements[3] == "HOH":
82                solvent.append(line)
83            else:
84                solute.append(line)
85        elif elements[0] == "TER":
86            solute.append(line)
87    return (cryst,solute,solvent)
88
89def translate_atoms(atomsin,translation):
90    '''
91    Take a given list of atoms and create a new list of atoms
92    where all atoms are translated by the given translations.
93    Return the new list of atoms. The input list remains unchanged.
94    '''
95    (dx,dy,dz) = translation
96    atomsout = []
97    for atom in atomsin:
98        elements = atom.split()
99        if elements[0] == "ATOM":
100            sx = atom[30:38]
101            sy = atom[38:46]
102            sz = atom[46:54]
103            x  = float(sx)+dx
104            y  = float(sy)+dy
105            z  = float(sz)+dz
106            new_atom = "%s%8.3f%8.3f%8.3f%s"%(atom[:30],x,y,z,atom[54:])
107        else:
108            new_atom = atom
109        atomsout.append(new_atom)
110    return atomsout
111
112def new_pdb(cryst,oldsolute,oldsolvent,newsolute,newsolvent):
113    '''
114    Given the different sections of the old PDB and the translated parts of the
115    solute and solvent lists construct a new PDB file.
116    Return the list of lines of the new PDB file.
117    '''
118    end = ["END"]
119    newpdb = cryst+oldsolute+newsolute+oldsolvent+newsolvent+end
120    return newpdb
121
122def new_cell(oldcryst,newcell):
123    '''
124    Given the old crystal line and the new cell dimensions construct
125    a new crystal line.
126    Return the list of new crystal lines.
127    '''
128    inline = oldcryst[0]
129    (x,y,z) = newcell
130    outline = "%s%9.3f%9.3f%9.3f%s"%(inline[:6],x,y,z,inline[33:])
131    outlist = []
132    outlist.append(outline)
133    return outlist
134
135def write_pdb(filename,pdb):
136    '''
137    Given the name of the output file and the list of PDB lines
138    write the data to the PDB file.
139    '''
140    fp = open(filename,'w')
141    for line in pdb:
142        fp.write(line)
143    fp.close()
144
145def parse_arguments():
146    '''
147    Parse command line arguments.
148    '''
149    from argparse import ArgumentParser
150    prs = ArgumentParser()
151    prs.add_argument("input",help="the input PDF file")
152    prs.add_argument("output",help="the output PDF file")
153    prs.add_argument("direction",help="the direction to expand along")
154    args = prs.parse_args()
155    return args
156
157def execute_with_arguments(args):
158    inputfile  = args.input
159    outputfile = args.output
160    direction  = args.direction
161    inputdata = read_pdb_file(inputfile)
162    trans = gen_translation(direction,inputdata)
163    newcell = new_cell_dimensions(direction,inputdata)
164    (cryst,solute,solvent) = split_pdb(inputdata)
165    newcryst = new_cell(cryst,newcell)
166    newsolute = translate_atoms(solute,trans)
167    newsolvent = translate_atoms(solvent,trans)
168    outputdata = new_pdb(newcryst,solute,solvent,newsolute,newsolvent)
169    write_pdb(outputfile,outputdata)
170
171def main():
172    execute_with_arguments(parse_arguments())
173
174if __name__ == "__main__":
175    main()
176