1 /* molauto.c
2 
3    MolAuto v1.1.1 for MolScript v2.1
4 
5    Create a MolScript input file for a given PDB file.
6 
7    Copyright (C) 1997-1998 Per Kraulis
8     18-Jan-1997  first attempts
9     12-Mar-1997  first usable version
10     29-Aug-1997  rewrote secondary structure output procedure
11      5-Nov-1997  fixed single-residue coil and turn bug
12     23-Jan-1998  mod's for named_data in mol3d
13      5-Jun-1998  mod's for mol3d_init; PDB secondary structure now default
14     17-Jun-1998  fixed bug with 1HIV: convert single aa among ligands to ligand
15     25-Nov-1998  fixed -ss_hb bug: init mol properly
16 */
17 
18 #include <assert.h>
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <ctype.h>
22 #include <string.h>
23 
24 #include <args.h>
25 #include <str_utils.h>
26 #include <dynstring.h>
27 #include <mol3d.h>
28 #include <mol3d_io.h>
29 #include <mol3d_init.h>
30 #include <mol3d_utils.h>
31 #include <mol3d_secstruc.h>
32 #include <aa_lookup.h>
33 
34 
35 /*============================================================*/
36 const char program_str[] = "MolAuto v1.1.1";
37 const char copyright_str[] = "Copyright (C) 1997-1998 Per J. Kraulis";
38 
39 enum ss_modes { SS_PDB, SS_CA, SS_HB };
40 enum ligand_modes { LIGAND_NO, LIGAND_BONDS, LIGAND_STICK, LIGAND_CPK };
41 
42 int title_mode = TRUE;
43 int centre_mode = TRUE;
44 int cylinder_mode = FALSE;
45 int coil_mode = TRUE;
46 int nice_mode = FALSE;
47 int thin_mode = FALSE;
48 int ligand_mode = LIGAND_BONDS;
49 int colour_mode = TRUE;
50 int ss_mode = SS_PDB;
51 
52 static double hue, decrement;
53 
54 
55 /*------------------------------------------------------------*/
56 void
output_hsb_decrement(void)57 output_hsb_decrement (void)
58 {
59   if (colour_mode && !nice_mode) {
60     printf ("  set planecolour hsb %.4g 1 1;\n", hue);
61     hue -= decrement;
62     if (hue < 0.0) hue = 0.0;
63   }
64 }
65 
66 
67 /*------------------------------------------------------------*/
68 void
fatal_error(const char * msg)69 fatal_error (const char *msg)
70 {
71   assert (msg);
72 
73   fprintf (stderr, "MolAuto error: %s\n", msg);
74   exit (1);
75 }
76 
77 
78 /*------------------------------------------------------------*/
79 void
fatal_error_str(const char * msg,const char * str)80 fatal_error_str (const char *msg, const char *str)
81 {
82   assert (msg);
83   assert (str);
84 
85   fprintf (stderr, "MolAuto error: %s %s\n", msg, str);
86   exit (1);
87 }
88 
89 
90 /*------------------------------------------------------------*/
91 char *
process_arguments(void)92 process_arguments (void)
93 {
94   int slot;
95 
96   args_flag (0);
97 
98   if ((args_number <= 1) || (args_exists ("-h"))) {
99     fprintf (stderr, "Usage: molauto [options] pdbfile\n");
100     fprintf (stderr, "  -notitle   do not output a title\n");
101     fprintf (stderr, "  -nocentre  do not centre the molecule\n");
102     fprintf (stderr, "  -cylinder  use cylinders for helices\n");
103     fprintf (stderr, "  -turns     use turns when specified, otherwise coil\n");
104     fprintf (stderr, "  -nice      nicer rendering; rainbow colours, more segments\n");
105     fprintf (stderr, "  -thin      set helix, strand, coil thickness to 0\n");
106     fprintf (stderr, "  -noligand  do not render ligand(s)\n");
107     fprintf (stderr, "  -bonds     render ligand(s) using bonds (default)\n");
108     fprintf (stderr, "  -stick     render ligand(s) using ball-and-stick\n");
109     fprintf (stderr, "  -cpk       render ligand(s) using cpk\n");
110     fprintf (stderr, "  -nocolour  do not use colour for schematics\n");
111     fprintf (stderr, "  -ss_pdb    secondary structure as defined in PDB file (default)\n");
112     fprintf (stderr, "  -ss_hb     secondary structure by H-bonds (DSSP-like) criteria\n");
113     fprintf (stderr, "  -ss_ca     secondary structure by CA-geometry criteria \n");
114     fprintf (stderr, "  -h         output this message\n");
115     fprintf (stderr, "----- %s, %s\n", program_str, copyright_str);
116     fprintf (stderr, "----- http://www.avatar.se/molscript/\n");
117 
118     exit (0);
119   }
120 
121   slot = args_exists ("-nocentre");
122   if (slot) {
123     centre_mode = FALSE;
124     args_flag (slot);
125   }
126 
127   slot = args_exists ("-notitle");
128   if (slot) {
129     title_mode = FALSE;
130     args_flag (slot);
131   }
132 
133   slot = args_exists ("-cylinder");
134   if (slot) {
135     cylinder_mode = TRUE;
136     args_flag (slot);
137   }
138 
139   slot = args_exists ("-turns");
140   if (slot) {
141     coil_mode = FALSE;
142     args_flag (slot);
143   }
144 
145   slot = args_exists ("-nice");
146   if (slot) {
147     nice_mode = TRUE;
148     args_flag (slot);
149   }
150 
151   slot = args_exists ("-thin");
152   if (slot) {
153     thin_mode = TRUE;
154     args_flag (slot);
155   }
156 
157   slot = args_exists ("-noligand");
158   if (slot) {
159     ligand_mode = LIGAND_NO;
160     args_flag (slot);
161   }
162 
163   slot = args_exists ("-bonds");
164   if (slot) {
165     ligand_mode = LIGAND_BONDS;
166     args_flag (slot);
167   }
168 
169   slot = args_exists ("-stick");
170   if (slot) {
171     ligand_mode = LIGAND_STICK;
172     args_flag (slot);
173   }
174 
175   slot = args_exists ("-cpk");
176   if (slot) {
177     ligand_mode = LIGAND_CPK;
178     args_flag (slot);
179   }
180 
181   slot = args_exists ("-nocolour");
182   if (slot) {
183     colour_mode = FALSE;
184     args_flag (slot);
185   }
186 
187   slot = args_exists ("-ss_hb");
188   if (slot) {
189     ss_mode = SS_HB;
190     args_flag (slot);
191   }
192 
193   slot = args_exists ("-ss_pdb");
194   if (slot) {
195     ss_mode = SS_PDB;
196     args_flag (slot);
197   }
198 
199   slot = args_exists ("-ss_ca");
200   if (slot) {
201     ss_mode = SS_CA;
202     args_flag (slot);
203   }
204 
205   slot = args_unflagged();
206   if (slot <= 0) fatal_error ("no PDB file name given");
207   if (args_item (slot) [0] == '-')
208     fatal_error_str ("invalid argument: ", args_item (slot));
209 
210   return args_item (slot);
211 }
212 
213 
214 /*------------------------------------------------------------*/
215 void
set_secondary_structure(mol3d * mol)216 set_secondary_structure (mol3d *mol)
217 {
218   res3d *res, *first;
219   int count;
220   char ss;
221 
222   assert (mol);
223 
224   switch (ss_mode) {
225 
226   case SS_PDB:			/* has already been set, if data was there */
227     if (! (mol->init & MOL3D_INIT_SECSTRUC)) {
228       mol3d_secstruc_ca_geom (mol); /* fall-back: CA-geometry criteria */
229     }
230     break;
231 
232   case SS_CA:
233     mol3d_secstruc_ca_geom (mol);
234     break;
235 
236   case SS_HB:
237     if (mol3d_secstruc_hbonds (mol)) {
238       for (res = mol->first; res; res = res->next) {
239 	switch (res->secstruc) {
240 	case 'i':
241 	case 'I':
242 	case 'b':
243 	case 'B':
244 	  res->secstruc = ' ';
245 	  break;
246 	case 'g':
247 	  res->secstruc = 'h';
248 	  break;
249 	case 'G':
250 	  res->secstruc = 'H';
251 	  break;
252 	default:
253 	  break;
254 	}
255       }
256     } else {
257       mol3d_secstruc_ca_geom (mol); /* fall-back: CA-geometry criteria */
258     }
259     break;
260   }
261 
262   if (coil_mode) {		/* change turn into coil */
263     for (res = mol->first; res; res = res->next) {
264       switch (res->secstruc) {
265       case 't':
266       case 'T':
267 	res->secstruc = ' ';
268 	break;
269       default:
270 	break;
271       }
272     }
273 
274   } else {
275 				/* convert single coil residue between turns */
276     for (res = mol->first; res; res = res->next) {
277       if ((res->secstruc == ' ') && (res->prev) && (res->next) &&
278 	  (res->prev->secstruc == 'T') && (res->next->secstruc == 't')) {
279 	res->secstruc = 'T';
280       }
281     }
282 				/* convert single turn starts between turns */
283     for (res = mol->first; res; res = res->next) {
284       if ((res->secstruc == 't') && (res->prev) &&
285 	  (res->prev->secstruc == 'T')) res->secstruc = 'T';
286     }
287   }
288 				/* remove helix/strand if shorter than 3 */
289   first = mol->first;
290   ss = toupper (first->secstruc);
291   count = 1;
292   for (res = mol->first; res; res = res->next) {
293     if (res->secstruc == ss) {
294       count++;
295     } else {
296       if (((ss == 'H') || (ss == 'E')) && (count < 3)) {
297 	for (; first != res; first = first->next) first->secstruc = ' ';
298       }
299       first = res;
300       ss = toupper (res->secstruc);
301       count = 1;
302     }
303   }
304 				/* convert single aa among ligands to ligand */
305   for (res = mol->first; res; res = res->next) {
306     if (res->secstruc == '-') continue;
307     if (res->prev) {
308       if (res->next) {
309 	if ((res->prev->secstruc == '-') &&
310 	    (res->next->secstruc == '-')) res->secstruc = '-';
311       } else {
312 	if (res->prev->secstruc == '-') res->secstruc = '-';
313       }
314     } else {
315       if (res->next) {
316 	if (res->next->secstruc == '-') res->secstruc = '-';
317       } else {
318 	res->secstruc = '-';
319       }
320     }
321   }
322 }
323 
324 
325 /*------------------------------------------------------------*/
326 void
output_secondary_structure(mol3d * mol)327 output_secondary_structure (mol3d *mol)
328 {
329   res3d *res, *first, *prev;
330   char ss;
331 
332   assert (mol);
333 
334   res = res3d_create();		/* add a sentinel residue, to simplify */
335   strcpy (res->name, "NONE");	/* case with last residue being an AA */
336   strcpy (res->type, res->name);
337   mol3d_append_residue (mol, res);
338 
339   if (colour_mode) {
340     hue = 0.666666;
341 
342     if (nice_mode) {
343       printf ("  set colourparts on, residuecolour amino-acids rainbow;\n\n");
344     } else {
345       int parts = 0;
346       ss = mol->first->secstruc;
347       for (res = mol->first; res; res = res->next) {
348 	if (res->secstruc != ss) parts++;
349 	ss = toupper (res->secstruc);
350       }
351       if (parts > 1) {
352 	decrement = hue / (double) (parts - 1);
353       } else {
354 	decrement = 0.0;
355       }
356     }
357   }
358 
359   first = mol->first;
360   ss = first->secstruc;
361 
362   for (res = mol->first; res; res = res->next) {
363     if (res->secstruc != ss) {
364 
365       switch (ss) {
366 
367       case '-':
368 	first = res;
369 	break;
370 
371       case ' ':
372 	output_hsb_decrement();
373 	printf ("  coil from %s to ", first->name);
374 	switch (toupper (res->secstruc)) {
375 	case '-':
376 	  printf ("%s;\n", prev->name);
377 	  first = NULL;
378 	  break;
379 	case 'T':
380 	  printf ("%s;\n", prev->name);
381 	  first = prev;
382 	  break;
383 	case 'H':
384 	case 'E':
385 	  printf ("%s;\n", res->name);
386 	  first = res;
387 	  break;
388 	}
389 	break;
390 
391       case 'T':
392 	output_hsb_decrement();
393 	printf ("  turn from %s to ", first->name);
394 	switch (toupper (res->secstruc)) {
395 	case '-':
396 	  printf ("%s;\n", prev->name);
397 	  first = NULL;
398 	  break;
399 	case ' ':
400 	case 'H':
401 	case 'E':
402 	  printf ("%s;\n", res->name);
403 	  first = res;
404 	  break;
405 	}
406 	break;
407 
408       case 'H':
409 	output_hsb_decrement();
410 	if (cylinder_mode) {
411 	  printf ("  cylinder from %s to ", first->name);
412 	} else {
413 	  printf ("  helix from %s to ", first->name);
414 	}
415 	switch (toupper (res->secstruc)) {
416 	case '-':
417 	  printf ("%s;\n", prev->name);
418 	  first = NULL;
419 	  break;
420 	case ' ':
421 	case 'T':
422 	  printf ("%s;\n", prev->name);
423 	  first = prev;
424 	  break;
425 	case 'H':
426 	case 'E':
427 	  printf ("%s;\n", prev->name);
428 	  printf ("  turn from %s to %s;\n", prev->name, res->name);
429 	  first = res;
430 	  break;
431 	}
432 	break;
433 
434       case 'E':
435 	output_hsb_decrement();
436 	printf ("  strand from %s to ", first->name);
437 	switch (toupper (res->secstruc)) {
438 	case '-':
439 	  printf ("%s;\n", prev->name);
440 	  first = NULL;
441 	  break;
442 	case ' ':
443 	case 'T':
444 	  printf ("%s;\n", prev->name);
445 	  first = prev;
446 	  break;
447 	case 'H':
448 	case 'E':
449 	  printf ("%s;\n", prev->name);
450 	  printf ("  turn from %s to %s;\n", prev->name, res->name);
451 	  first = res;
452 	  break;
453 	}
454 	break;
455       }
456       ss = toupper (res->secstruc);
457     }
458     prev = res;
459   }
460 }
461 
462 
463 /*------------------------------------------------------------*/
464 void
output_nucleotides(mol3d * mol)465 output_nucleotides (mol3d *mol)
466 {
467   res3d *res;
468 
469   assert (mol);
470 
471   for (res = mol->first; res; res = res->next) {
472     if (is_nucleic_acid_type (res->type)) {
473       if (colour_mode) {
474 	if (nice_mode) {
475 	  printf ("\n  set residuecolour nucleotides rainbow;");
476 	} else {
477 	  printf ("\n  set planecolour rgb 1 0.2 0.2;");
478 	}
479       }
480       printf ("\n  double-helix nucleotides;\n");
481       break;
482     }
483   }
484 }
485 
486 
487 /*------------------------------------------------------------*/
488 void
output_ligand(mol3d * mol)489 output_ligand (mol3d *mol)
490 {
491   res3d *res;
492   char *render_type;
493   int first_ligand = TRUE;
494 
495   assert (mol);
496 
497   switch (ligand_mode) {
498   case LIGAND_NO:
499     return;
500   case LIGAND_BONDS:
501     render_type = "bonds";
502     break;
503   case LIGAND_STICK:
504     render_type = "ball-and-stick";
505     break;
506   case LIGAND_CPK:
507     render_type = "cpk";
508     break;
509   }
510 
511   for (res = mol->first; res; res = res->next) {
512     if (res->secstruc != '-') continue;
513     if (is_water_type (res->type)) continue;
514     if (is_nucleic_acid_type (res->type)) continue;
515     if (res3d_count_atoms (res) == 0) continue;	/* sentinel residue */
516 
517     if (first_ligand) {
518       if (colour_mode) {
519 	printf ("\n  set colourparts on;\n");
520       } else {
521 	printf ("\n");
522       }
523       first_ligand = FALSE;
524     }
525 
526     if (res3d_count_atoms (res) == 1) {
527       if (res3d_unique_name (mol, res)) {
528 	printf ("  cpk in residue %s;\n", res->name);
529       } else {
530 	printf ("  cpk in require residue %s and type %s;\n",
531 		res->name, res->type);
532       }
533     } else if (res3d_unique_name (mol, res)) {
534       printf ("  %s in residue %s;\n", render_type, res->name);
535     } else {
536       printf ("  %s in require residue %s and type %s;\n", render_type,
537 	      res->name, res->type);
538     }
539   }
540 }
541 
542 
543 /*------------------------------------------------------------*/
544 void
output_quoted_string(char * str)545 output_quoted_string (char *str)
546 {
547   assert (str);
548   assert (*str);
549 
550   putchar ('"');
551   for ( ; *str; str++) {
552     if (*str == '"') putchar ('\\');
553     putchar (*str);
554   }
555   putchar ('"');
556 }
557 
558 
559 /*------------------------------------------------------------*/
main(int argc,char * argv[])560 main (int argc, char *argv[])
561 {
562   char *pdbfilename;
563   mol3d *mol;
564 
565   args_initialize (argc, argv);
566   pdbfilename = process_arguments();
567   mol = mol3d_read_pdb_filename (pdbfilename);
568   if ((mol == NULL) &&
569       mol3d_is_pdb_code (pdbfilename)) mol = mol3d_read_pdb_code (pdbfilename);
570   if (mol == NULL) fatal_error ("could not read the PDB file");
571   mol3d_init (mol,
572 	      MOL3D_INIT_NOBLANKS | MOL3D_INIT_COLOURS |
573 	      MOL3D_INIT_RADII | MOL3D_INIT_AACODES |
574 	      MOL3D_INIT_CENTRALS | MOL3D_INIT_RESIDUE_ORDINALS);
575   set_secondary_structure (mol);
576 
577   printf ("! MolScript v2.1 input file\n");
578   printf ("! generated by %s\n\n", program_str);
579 
580   if (title_mode && mol->data) {
581     dynstring *ds = (dynstring *) nd_search_data (mol->data, "COMPND");
582     if (ds) {
583       char *pos = strstr (ds->string, "MOLECULE:");
584       if (pos) {
585 	dynstring *ds2;
586 	pos += 9;
587 	ds2 = ds_allocate (20);
588 	while ((*pos != '\0') && (*pos != ';')) ds_add (ds2, *pos++);
589 	ds_right_adjust (ds2);
590 	ds_left_adjust (ds2);
591 	printf ("title ");
592 	output_quoted_string (ds2->string);
593 	printf ("\n\n");
594 	ds_delete (ds2);
595       } else {
596 	ds_right_adjust (ds);
597 	ds_left_adjust (ds);
598 	printf ("title ");
599 	output_quoted_string (ds->string);
600 	printf ("\n\n");
601       }
602     }
603   }
604 
605   printf ("plot\n\n  read mol ");
606   output_quoted_string (pdbfilename);
607   printf (";\n\n");
608 
609   if (centre_mode)
610     printf ("  transform atom * by centre position atom *;\n\n");
611 
612   if (!nice_mode) printf ("  set segments 2;\n\n");
613 
614   if (thin_mode)
615     printf ("  set strandthickness 0, helixthickness 0, coilradius 0;\n\n");
616 
617   output_secondary_structure (mol);
618   output_nucleotides (mol);
619   output_ligand (mol);
620 
621   printf ("\nend_plot\n");
622 
623   return 0;
624 }
625