1#!/usr/local/bin/perl -w 2 3use strict; 4# 5# This script reads an XPLOR input file with distance restraint data 6# as sometimes is found in the pdb database (http://www.pdb.org). 7# From this input file dihedral restrints should be removed, such that 8# only distance restraints are left. The script can handle ambiguous restraints. 9# It converts the distance restraints to GROMACS format. 10# A typical command line would be 11# ./xplor2gmx.pl 33 conf.pdb < restraints.dat > disre.itp 12# You can turn off debugging info, but please do verify your output is correct! 13# 14# David van der Spoel (spoel@gromacs.org), July 2002 15# 16 17# Turn debugging off (0), or on ( > 0). 18my $debug = 1; 19# Turn atom name translation on and off 20my $trans = 1; 21 22my $pdb = shift || die "I need the name of the pdb file with correct atom numbers\n"; 23my $core = shift || "core.ndx"; 24my $tbl = "$ENV{GMXDATA}/top/atom_nom.tbl"; 25 26printf "[ distance_restraints ]\n"; 27printf "; Read an xplor distance restraint file, and output GROMACS distance restraints.\n"; 28printf "; This also needs a pdb file with correct GROMACS atom numbering.\n"; 29printf "; I used $pdb for the atom numbers\n"; 30printf "; This also needs the offset in residues.\n"; 31 32# Read the pdb file 33# If things go wrong, check whether your pdb file is read correctly. 34my $natom = 0; 35my $nres = 0; 36my @resname; 37my @aname; 38my @resnr; 39open(PDB,$pdb) || die "Can not open file $pdb\n"; 40while (my $line = <PDB>) { 41 if (index($line,"ATOM") >= 0) { 42 my @tmp = split(' ',$line); 43 $aname[$natom] = $tmp[2]; 44 $resnr[$natom] = $tmp[4]; 45 if (!defined $resname[$tmp[4]]) { 46 $resname[$tmp[4]] = $tmp[3]; 47 $nres++; 48 } 49 $natom++; 50 } 51} 52close PDB; 53printf "; I found $natom atoms in the pdb file $pdb\n"; 54printf "; I found $nres residues in the pdb file $pdb\n"; 55if ($debug > 1) { 56 for (my $i = 0; ($i < $natom); $i ++) { 57 printf("; %5d %5s %5s %5d\n",$i+1,$aname[$i], 58 $resname[$resnr[$i]],$resnr[$i]); 59 } 60} 61 62# 63# Read the name translation table. 64# Source for this comes from: http://www.bmrb.wisc.edu/ref_info/atom_nom.tbl 65# It was adapted slightly for GROMACS use, but only as regards formatting, 66# not for content 67# 68open(TBL,$tbl) || die "Can not open atom-name translation table $tbl\n"; 69my $ntbl=0; 70my @tblxplor; 71my @tblgmx; 72my @tblres; 73while (my $line = <TBL>) { 74 my @ttt = split('#',$line); 75 my @tmp = split(' ',$ttt[0]); 76 if ($#tmp == 3) { 77 # New table entry 78 $tblres[$ntbl] = $tmp[0]; 79 $tblxplor[$ntbl] = $tmp[1]; 80 $tblgmx[$ntbl] = $tmp[3]; 81 $ntbl++; 82 } 83} 84close TBL; 85printf "; Read $ntbl entries from $tbl\n"; 86 87my $default = "XXX"; 88 89my @templates = ( 90 [ $default, "HA#", "HA1", "HA2" ], 91 [ $default, "HA*", "HA1", "HA2" ], 92 [ $default, "HB#", "HB", "HB1", "HB2" ], 93 [ $default, "HB*", "HB", "HB1", "HB2" ], 94 [ $default, "HG#", "HG", "HG1", "HG2", "HG11", "HG12", "HG13", "HG21", "HG22", "HG23" ], 95 [ $default, "HG*", "HG", "HG1", "HG2", "HG11", "HG12", "HG13", "HG21", "HG22", "HG23" ], 96 [ $default, "HG1#", "HG11", "HG12", "HG13" ], 97 [ $default, "HG1*", "HG11", "HG12", "HG13" ], 98 [ $default, "HG2#", "HG21", "HG22", "HG23" ], 99 [ $default, "HG2*", "HG21", "HG22", "HG23" ], 100 [ $default, "HD#", "HD1", "HD2", "HD11", "HD12", "HD13", "HD21", "HD22", "HD23" ], 101 [ $default, "HD*", "HD1", "HD2", "HD11", "HD12", "HD13", "HD21", "HD22", "HD23" ], 102 [ $default, "HD1#", "HD11", "HD12" ], 103 [ "ILE", "HD1*", "HD1", "HD2", "HD3" ], 104 [ $default, "HD1*", "HD11", "HD12" ], 105 [ $default, "HD2#", "HD21", "HD22" ], 106 [ $default, "HD2*", "HD21", "HD22" ], 107 [ $default, "HE#", "HE", "HE1", "HE2" ], 108 [ "GLN", "HE*", "HE21", "HE22" ], 109 [ $default, "HE*", "HE", "HE1", "HE2" ], 110 [ $default, "HE2*", "HE2", "HE21", "HE22" ], 111 [ $default, "HH#", "HH11", "HH12", "HH21", "HH22" ], 112 [ $default, "HH*", "HH11", "HH12", "HH21", "HH22" ], 113 [ $default, "HZ#", "HZ", "HZ1", "HZ2", "HZ3" ], 114 [ $default, "HZ*", "HZ", "HZ1", "HZ2", "HZ3" ], 115 [ $default, "HN", "H" ] 116); 117 118my $ntranslated = 0; 119my $nuntransltd = 0; 120sub transl_aname { 121 my $resnm = shift; 122 my $atom = shift; 123 124 if ( $trans == 1 ) { 125 for(my $i = 0; ($i < $ntbl); $i ++) { 126 if ($tblres[$i] eq $resnm) { 127 if ($tblxplor[$i] eq $atom) { 128 $ntranslated++; 129 return $tblgmx[$i]; 130 } 131 } 132 } 133 } 134 $nuntransltd++; 135 if ($debug > 1) { 136 printf "; No name change for $resnm $atom\n"; 137 } 138 return $atom; 139} 140 141sub expand_template { 142 my $atom = shift(@_); 143 my $rnum = shift(@_); 144 my $bdone = 0; 145 my @atoms; 146 my $jj = 0; 147 148 die("No residue name for residue $rnum") if (!defined ($resname[$rnum])); 149 for (my $tt=0; (($tt <= $#templates) && ($bdone == 0)); $tt++) { 150 my $templ = $templates[$tt]; 151 if (($resname[$rnum] eq $templ->[0] || 152 $default eq $templ->[0]) && 153 ($atom eq $templ->[1])) { 154 for ($jj = 2; ($jj <= $#{$templ}); $jj++) { 155 push @atoms, transl_aname($resname[$rnum],$templ->[$jj]); 156 } 157 $bdone = 1; 158 } 159 } 160 if ($bdone == 0) { 161 push @atoms, transl_aname($resname[$rnum],$atom); 162 } 163 if ($debug > 0) { 164 my $natom = $#atoms+1; 165 printf("; Found $natom elements for atom $resname[$rnum] %d $atom:", 166 $rnum); 167 for my $aa ( @atoms ) { 168 printf " $aa"; 169 } 170 printf "\n"; 171 } 172 return @atoms; 173} 174 175if ($debug > 1) { 176 printf "; There are $#templates template entries\n"; 177 for (my $tt=0; ($tt <= $#templates); $tt++) { 178 my $templ = $templates[$tt]; 179 my $ntempl = $#{$templ}; 180 printf "; Item $tt ($templates[$tt][0] $templates[$tt][1]) has $ntempl entries\n"; 181 } 182} 183 184# This index file holds numbers of restraints involving core atoms 185my @protcore = ( "H", "HN", "HA", "HA1", "HA2", "HB", "HB1", "HB2", "HB3", "HG", "HG1", "HG2", "HG3", "N", "O" ); 186open(CORE,">$core") || die "Can not open $core\n"; 187print CORE "[ core-restraints ]\n"; 188my $ncore = 0; 189 190my $myindex = 0; 191my $linec = 0; 192my $npair = 0; 193my $nunresolved = 0; 194while (my $line = <STDIN>) { 195 my @ttt = split('!',$line); 196 if (index($ttt[0], "dihedral") >= 0) { 197 last; 198 } 199 elsif ((index($ttt[0],"assign") >= 0) && (index($ttt[0],"!assign") < 0)) { 200 my @tmp = split('\(',$ttt[0]); 201 # Find first argument 202 my $rhaak = undef; 203 if (($rhaak = index($tmp[1],')')) < 0) { 204 printf "No ) in '$tmp[1]'\n"; 205 } 206 my $atres1 = substr($tmp[1],0,$rhaak); 207 my @at1 = split('or',$atres1); 208 209 # Find second argument 210 if (($rhaak = index($tmp[2],')')) < 0) { 211 printf "No ) in '$tmp[2]'\n"; 212 } 213 my $atres2 = substr($tmp[2],0,$rhaak); 214 my @at2 = split('or',$atres2); 215 216 my @expdata = split('\)',$tmp[2]); 217 my @dist = split(' ',$expdata[1]); 218 219 my $bOK = 0; 220 my $bCore = 1; 221 foreach my $a1 ( @at1 ) { 222 my @info1 = split(' ',$a1); 223 my $r1 = $info1[1]; 224 my @atoms1 = expand_template($info1[4],$r1); 225 226 foreach my $a2 ( @at2 ) { 227 my @info2 = split(' ',$a2); 228 my $r2 = $info2[1]; 229 my @atoms2 = expand_template($info2[4],$r2); 230 231 my $i = undef; 232 for ($i = 0; ($i < $natom) && ($resnr[$i] < $r1); $i++) { ; } 233 for ( ; ($i < $natom) && ($resnr[$i] == $r1); $i++) { 234 foreach my $ii ( @atoms1 ) { 235 if ($ii eq $aname[$i]) { 236 my $bCoreI = 0; 237 for my $pp ( @protcore ) { 238 if ($ii eq $pp) { 239 $bCoreI = 1; 240 } 241 } 242 my $j = undef; 243 for ($j = 0; ($j < $natom) && ($resnr[$j] < $r2); $j++) { ; } 244 for ( ; ($j < $natom) && ($resnr[$j] == $r2); $j++) { 245 foreach my $jj ( @atoms2 ) { 246 if ($jj eq $aname[$j]) { 247 my $dd = 0.1*$dist[0]; 248 my $dminus = 0.1*$dist[1]; 249 my $dplus = 0.1*$dist[2]; 250 my $low = $dd-$dminus; 251 my $up1 = $dd+$dplus; 252 my $up2 = $up1+1; 253 printf("%5d %5d %5d %5d %5d %7.3f %7.3f %7.3f 1.0; res $r1 $ii - res $r2 $jj\n",$i+1,$j+1,1,$myindex,1,$low,$up1,$up2); 254 # Do some checks 255 $bOK = 1; 256 my $bCoreJ = 0; 257 for my $pp ( @protcore ) { 258 if ($jj eq $pp) { 259 $bCoreJ = 1; 260 } 261 } 262 if (($bCoreI == 0) || ($bCoreJ == 0)) { 263 if ($debug > 0) { 264 printf "; No core $ii ($bCoreI) $jj ($bCoreJ)\n"; 265 } 266 $bCore = 0; 267 } 268 $npair++; 269 } 270 } 271 } 272 } 273 } 274 } 275 } 276 } 277 if ($bCore == 1) { 278 printf CORE "$myindex\n"; 279 $ncore++; 280 } 281 if ($bOK == 0) { 282 printf "; UNRESOLVED: $ttt[0]\n"; 283 $nunresolved++; 284 } 285 else { 286 $myindex++; 287 } 288 } 289 $linec++; 290} 291close CORE; 292 293printf "; A total of $myindex lines with distance restraints were read from $linec input lines\n"; 294printf "; From this, $npair actual restraints were generated.\n"; 295printf "; A total of $nunresolved lines could not be interpreted\n"; 296printf "; In the process $ntranslated atoms had a name change\n"; 297printf "; However for $nuntransltd no name change could be found\n"; 298printf "; usually these are either HN->H renamings or not-existing atoms\n"; 299printf "; generated by the template expansion (e.g. HG#)\n"; 300printf "; A total of $ncore restraints were classified as core restraints\n"; 301