1#!/usr/local/bin/perl -w
2
3use strict;
4#
5# This script reads an XPLOR input file with distance restraint data
6# as sometimes is found in the pdb database (http://www.pdb.org).
7# From this input file dihedral restrints should be removed, such that
8# only distance restraints are left. The script can handle ambiguous restraints.
9# It converts the distance restraints to GROMACS format.
10# A typical command line would be
11# ./xplor2gmx.pl 33 conf.pdb < restraints.dat > disre.itp
12# You can turn off debugging info, but please do verify your output is correct!
13#
14# David van der Spoel (spoel@gromacs.org), July 2002
15#
16
17# Turn debugging off (0), or on ( > 0).
18my $debug = 1;
19# Turn atom name translation on and off
20my $trans = 1;
21
22my $pdb   = shift || die "I need the name of the pdb file with correct atom numbers\n";
23my $core  = shift || "core.ndx";
24my $tbl   = "$ENV{GMXDATA}/top/atom_nom.tbl";
25
26printf "[ distance_restraints ]\n";
27printf "; Read an xplor distance restraint file, and output GROMACS distance restraints.\n";
28printf "; This also needs a pdb file with correct GROMACS atom numbering.\n";
29printf "; I used $pdb for the atom numbers\n";
30printf "; This also needs the offset in residues.\n";
31
32# Read the pdb file
33# If things go wrong, check whether your pdb file is read correctly.
34my $natom = 0;
35my $nres  = 0;
36my @resname;
37my @aname;
38my @resnr;
39open(PDB,$pdb) || die "Can not open file $pdb\n";
40while (my $line = <PDB>) {
41    if (index($line,"ATOM") >= 0) {
42	my @tmp = split(' ',$line);
43	$aname[$natom] = $tmp[2];
44	$resnr[$natom] = $tmp[4];
45	if (!defined $resname[$tmp[4]]) {
46	    $resname[$tmp[4]] = $tmp[3];
47	    $nres++;
48	}
49	$natom++;
50    }
51}
52close PDB;
53printf "; I found $natom atoms in the pdb file $pdb\n";
54printf "; I found $nres residues in the pdb file $pdb\n";
55if ($debug > 1) {
56    for (my $i = 0; ($i < $natom); $i ++) {
57	printf("; %5d  %5s  %5s  %5d\n",$i+1,$aname[$i],
58	       $resname[$resnr[$i]],$resnr[$i]);
59    }
60}
61
62#
63# Read the name translation table.
64# Source for this comes from: http://www.bmrb.wisc.edu/ref_info/atom_nom.tbl
65# It was adapted slightly for GROMACS use, but only as regards formatting,
66# not for content
67#
68open(TBL,$tbl) || die "Can not open atom-name translation table $tbl\n";
69my $ntbl=0;
70my @tblxplor;
71my @tblgmx;
72my @tblres;
73while (my $line = <TBL>) {
74    my @ttt = split('#',$line);
75    my @tmp = split(' ',$ttt[0]);
76    if ($#tmp == 3) {
77	# New table entry
78	$tblres[$ntbl] = $tmp[0];
79	$tblxplor[$ntbl] = $tmp[1];
80	$tblgmx[$ntbl] = $tmp[3];
81	$ntbl++;
82    }
83}
84close TBL;
85printf "; Read $ntbl entries from $tbl\n";
86
87my $default = "XXX";
88
89my @templates = (
90 [ $default, "HA#", "HA1", "HA2" ],
91 [ $default, "HA*", "HA1", "HA2" ],
92 [ $default, "HB#",  "HB",         "HB1",	"HB2"	],
93 [ $default, "HB*",  "HB",         "HB1",	"HB2"	],
94 [ $default, "HG#",  "HG",         "HG1",	"HG2",	"HG11",	"HG12",	"HG13",	"HG21",	"HG22",	"HG23"  ],
95 [ $default, "HG*",  "HG",         "HG1",	"HG2",	"HG11",	"HG12",	"HG13",	"HG21",	"HG22",	"HG23"  ],
96 [ $default, "HG1#", "HG11",	"HG12",	"HG13"	],
97 [ $default, "HG1*", "HG11",	"HG12",	"HG13"	],
98 [ $default, "HG2#", "HG21",	"HG22",	"HG23"	],
99 [ $default, "HG2*", "HG21",	"HG22",	"HG23"	],
100 [ $default, "HD#",  "HD1",	"HD2",	"HD11",	"HD12",	"HD13",	"HD21",	"HD22",	"HD23" ],
101 [ $default, "HD*",  "HD1",	"HD2",	"HD11",	"HD12",	"HD13",	"HD21",	"HD22",	"HD23" ],
102 [ $default, "HD1#", "HD11",	"HD12"	],
103 [ "ILE",    "HD1*", "HD1",	"HD2",   "HD3"	],
104 [ $default, "HD1*", "HD11",	"HD12"	],
105 [ $default, "HD2#", "HD21",	"HD22"	],
106 [ $default, "HD2*", "HD21",	"HD22"	],
107 [ $default, "HE#",  "HE",      "HE1",	"HE2"	],
108 [ "GLN",    "HE*",  "HE21",	"HE22"	],
109 [ $default, "HE*",  "HE",        "HE1",	"HE2"	],
110 [ $default, "HE2*", "HE2",       "HE21",	"HE22"	],
111 [ $default, "HH#",  "HH11",	"HH12",	"HH21",	"HH22" ],
112 [ $default, "HH*",  "HH11",	"HH12",	"HH21",	"HH22" ],
113 [ $default, "HZ#",  "HZ",         "HZ1",	"HZ2",	"HZ3"	],
114 [ $default, "HZ*",  "HZ",         "HZ1",	"HZ2",	"HZ3"	],
115 [ $default, "HN",   "H" ]
116);
117
118my $ntranslated = 0;
119my $nuntransltd = 0;
120sub transl_aname {
121    my $resnm  = shift;
122    my $atom   = shift;
123
124    if ( $trans == 1 ) {
125	for(my $i = 0; ($i < $ntbl); $i ++) {
126	    if ($tblres[$i] eq $resnm) {
127		if ($tblxplor[$i] eq $atom) {
128		    $ntranslated++;
129		    return $tblgmx[$i];
130		}
131	    }
132	}
133    }
134    $nuntransltd++;
135    if ($debug > 1) {
136	printf "; No name change for $resnm $atom\n";
137    }
138    return $atom;
139}
140
141sub expand_template {
142    my $atom  = shift(@_);
143    my $rnum  = shift(@_);
144    my $bdone = 0;
145    my @atoms;
146    my $jj = 0;
147
148    die("No residue name for residue $rnum") if (!defined ($resname[$rnum]));
149    for (my $tt=0; (($tt <= $#templates) && ($bdone == 0)); $tt++) {
150	my $templ = $templates[$tt];
151        if (($resname[$rnum] eq $templ->[0] ||
152             $default eq $templ->[0]) &&
153            ($atom eq $templ->[1])) {
154	    for ($jj = 2; ($jj <= $#{$templ}); $jj++) {
155		push @atoms, transl_aname($resname[$rnum],$templ->[$jj]);
156	    }
157	    $bdone  = 1;
158	}
159    }
160    if ($bdone == 0) {
161	push @atoms, transl_aname($resname[$rnum],$atom);
162    }
163    if ($debug > 0) {
164	my $natom = $#atoms+1;
165	printf("; Found $natom elements for atom $resname[$rnum] %d $atom:",
166	       $rnum);
167	for my $aa ( @atoms ) {
168	    printf " $aa";
169	}
170	printf "\n";
171    }
172    return @atoms;
173}
174
175if ($debug > 1) {
176    printf "; There are $#templates template entries\n";
177    for (my $tt=0; ($tt <= $#templates); $tt++) {
178	my $templ  = $templates[$tt];
179        my $ntempl = $#{$templ};
180	printf "; Item $tt ($templates[$tt][0] $templates[$tt][1]) has $ntempl entries\n";
181    }
182}
183
184# This index file holds numbers of restraints involving core atoms
185my @protcore = ( "H", "HN", "HA", "HA1", "HA2", "HB", "HB1", "HB2", "HB3", "HG", "HG1", "HG2", "HG3", "N", "O"  );
186open(CORE,">$core") || die "Can not open $core\n";
187print CORE "[ core-restraints ]\n";
188my $ncore = 0;
189
190my $myindex     = 0;
191my $linec       = 0;
192my $npair       = 0;
193my $nunresolved = 0;
194while (my $line = <STDIN>) {
195    my @ttt = split('!',$line);
196    if (index($ttt[0], "dihedral") >= 0) {
197        last;
198    }
199    elsif ((index($ttt[0],"assign") >= 0) && (index($ttt[0],"!assign") < 0)) {
200	my @tmp = split('\(',$ttt[0]);
201	# Find first argument
202        my $rhaak = undef;
203	if (($rhaak  = index($tmp[1],')')) < 0) {
204	    printf "No ) in '$tmp[1]'\n";
205	}
206	my $atres1 = substr($tmp[1],0,$rhaak);
207	my @at1 = split('or',$atres1);
208
209	# Find second argument
210	if (($rhaak  = index($tmp[2],')')) < 0) {
211	    printf "No ) in '$tmp[2]'\n";
212	}
213	my $atres2 = substr($tmp[2],0,$rhaak);
214	my @at2 = split('or',$atres2);
215
216	my @expdata = split('\)',$tmp[2]);
217	my @dist    = split(' ',$expdata[1]);
218
219	my $bOK   = 0;
220	my $bCore = 1;
221	foreach my $a1 ( @at1 ) {
222	    my @info1  = split(' ',$a1);
223	    my $r1     = $info1[1];
224	    my @atoms1 = expand_template($info1[4],$r1);
225
226	    foreach my $a2 ( @at2 ) {
227		my @info2  = split(' ',$a2);
228		my $r2     = $info2[1];
229		my @atoms2 = expand_template($info2[4],$r2);
230
231                my $i = undef;
232		for ($i = 0; ($i < $natom) && ($resnr[$i] < $r1); $i++) { ; }
233		for ( ; ($i < $natom) && ($resnr[$i] == $r1); $i++) {
234		    foreach my $ii ( @atoms1 ) {
235			if ($ii eq $aname[$i]) {
236			    my $bCoreI = 0;
237			    for my $pp ( @protcore ) {
238				if ($ii eq $pp) {
239				    $bCoreI = 1;
240				}
241			    }
242                            my $j = undef;
243			    for ($j = 0; ($j < $natom) && ($resnr[$j] < $r2); $j++) { ; }
244			    for ( ; ($j < $natom) && ($resnr[$j] == $r2); $j++) {
245				foreach my $jj ( @atoms2 ) {
246				    if ($jj eq $aname[$j]) {
247					my $dd     = 0.1*$dist[0];
248					my $dminus = 0.1*$dist[1];
249					my $dplus  = 0.1*$dist[2];
250					my $low    = $dd-$dminus;
251					my $up1    = $dd+$dplus;
252					my $up2    = $up1+1;
253					printf("%5d %5d %5d %5d %5d %7.3f %7.3f %7.3f 1.0; res $r1 $ii - res $r2 $jj\n",$i+1,$j+1,1,$myindex,1,$low,$up1,$up2);
254					# Do some checks
255					$bOK    = 1;
256					my $bCoreJ = 0;
257					for my $pp ( @protcore ) {
258					    if ($jj eq $pp) {
259						$bCoreJ = 1;
260					    }
261					}
262					if (($bCoreI == 0) || ($bCoreJ == 0)) {
263					    if ($debug > 0) {
264						printf "; No core $ii ($bCoreI) $jj ($bCoreJ)\n";
265					    }
266					    $bCore = 0;
267					}
268					$npair++;
269				    }
270				}
271			    }
272			}
273		    }
274		}
275	    }
276	}
277	if ($bCore == 1) {
278	    printf CORE "$myindex\n";
279	    $ncore++;
280	}
281	if ($bOK == 0) {
282	    printf "; UNRESOLVED: $ttt[0]\n";
283	    $nunresolved++;
284	}
285	else {
286	    $myindex++;
287	}
288    }
289    $linec++;
290}
291close CORE;
292
293printf "; A total of $myindex lines with distance restraints were read from $linec input lines\n";
294printf "; From this, $npair actual restraints were generated.\n";
295printf "; A total of $nunresolved lines could not be interpreted\n";
296printf "; In the process $ntranslated atoms had a name change\n";
297printf "; However for $nuntransltd no name change could be found\n";
298printf "; usually these are either HN->H renamings or not-existing atoms\n";
299printf "; generated by the template expansion (e.g. HG#)\n";
300printf "; A total of $ncore restraints were classified as core restraints\n";
301