1 /* molauto.c
2
3 MolAuto v1.1.1 for MolScript v2.1
4
5 Create a MolScript input file for a given PDB file.
6
7 Copyright (C) 1997-1998 Per Kraulis
8 18-Jan-1997 first attempts
9 12-Mar-1997 first usable version
10 29-Aug-1997 rewrote secondary structure output procedure
11 5-Nov-1997 fixed single-residue coil and turn bug
12 23-Jan-1998 mod's for named_data in mol3d
13 5-Jun-1998 mod's for mol3d_init; PDB secondary structure now default
14 17-Jun-1998 fixed bug with 1HIV: convert single aa among ligands to ligand
15 25-Nov-1998 fixed -ss_hb bug: init mol properly
16 */
17
18 #include <assert.h>
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <ctype.h>
22 #include <string.h>
23
24 #include <args.h>
25 #include <str_utils.h>
26 #include <dynstring.h>
27 #include <mol3d.h>
28 #include <mol3d_io.h>
29 #include <mol3d_init.h>
30 #include <mol3d_utils.h>
31 #include <mol3d_secstruc.h>
32 #include <aa_lookup.h>
33
34
35 /*============================================================*/
36 const char program_str[] = "MolAuto v1.1.1";
37 const char copyright_str[] = "Copyright (C) 1997-1998 Per J. Kraulis";
38
39 enum ss_modes { SS_PDB, SS_CA, SS_HB };
40 enum ligand_modes { LIGAND_NO, LIGAND_BONDS, LIGAND_STICK, LIGAND_CPK };
41
42 int title_mode = TRUE;
43 int centre_mode = TRUE;
44 int cylinder_mode = FALSE;
45 int coil_mode = TRUE;
46 int nice_mode = FALSE;
47 int thin_mode = FALSE;
48 int ligand_mode = LIGAND_BONDS;
49 int colour_mode = TRUE;
50 int ss_mode = SS_PDB;
51
52 static double hue, decrement;
53
54
55 /*------------------------------------------------------------*/
56 void
output_hsb_decrement(void)57 output_hsb_decrement (void)
58 {
59 if (colour_mode && !nice_mode) {
60 printf (" set planecolour hsb %.4g 1 1;\n", hue);
61 hue -= decrement;
62 if (hue < 0.0) hue = 0.0;
63 }
64 }
65
66
67 /*------------------------------------------------------------*/
68 void
fatal_error(const char * msg)69 fatal_error (const char *msg)
70 {
71 assert (msg);
72
73 fprintf (stderr, "MolAuto error: %s\n", msg);
74 exit (1);
75 }
76
77
78 /*------------------------------------------------------------*/
79 void
fatal_error_str(const char * msg,const char * str)80 fatal_error_str (const char *msg, const char *str)
81 {
82 assert (msg);
83 assert (str);
84
85 fprintf (stderr, "MolAuto error: %s %s\n", msg, str);
86 exit (1);
87 }
88
89
90 /*------------------------------------------------------------*/
91 char *
process_arguments(void)92 process_arguments (void)
93 {
94 int slot;
95
96 args_flag (0);
97
98 if ((args_number <= 1) || (args_exists ("-h"))) {
99 fprintf (stderr, "Usage: molauto [options] pdbfile\n");
100 fprintf (stderr, " -notitle do not output a title\n");
101 fprintf (stderr, " -nocentre do not centre the molecule\n");
102 fprintf (stderr, " -cylinder use cylinders for helices\n");
103 fprintf (stderr, " -turns use turns when specified, otherwise coil\n");
104 fprintf (stderr, " -nice nicer rendering; rainbow colours, more segments\n");
105 fprintf (stderr, " -thin set helix, strand, coil thickness to 0\n");
106 fprintf (stderr, " -noligand do not render ligand(s)\n");
107 fprintf (stderr, " -bonds render ligand(s) using bonds (default)\n");
108 fprintf (stderr, " -stick render ligand(s) using ball-and-stick\n");
109 fprintf (stderr, " -cpk render ligand(s) using cpk\n");
110 fprintf (stderr, " -nocolour do not use colour for schematics\n");
111 fprintf (stderr, " -ss_pdb secondary structure as defined in PDB file (default)\n");
112 fprintf (stderr, " -ss_hb secondary structure by H-bonds (DSSP-like) criteria\n");
113 fprintf (stderr, " -ss_ca secondary structure by CA-geometry criteria \n");
114 fprintf (stderr, " -h output this message\n");
115 fprintf (stderr, "----- %s, %s\n", program_str, copyright_str);
116 fprintf (stderr, "----- http://www.avatar.se/molscript/\n");
117
118 exit (0);
119 }
120
121 slot = args_exists ("-nocentre");
122 if (slot) {
123 centre_mode = FALSE;
124 args_flag (slot);
125 }
126
127 slot = args_exists ("-notitle");
128 if (slot) {
129 title_mode = FALSE;
130 args_flag (slot);
131 }
132
133 slot = args_exists ("-cylinder");
134 if (slot) {
135 cylinder_mode = TRUE;
136 args_flag (slot);
137 }
138
139 slot = args_exists ("-turns");
140 if (slot) {
141 coil_mode = FALSE;
142 args_flag (slot);
143 }
144
145 slot = args_exists ("-nice");
146 if (slot) {
147 nice_mode = TRUE;
148 args_flag (slot);
149 }
150
151 slot = args_exists ("-thin");
152 if (slot) {
153 thin_mode = TRUE;
154 args_flag (slot);
155 }
156
157 slot = args_exists ("-noligand");
158 if (slot) {
159 ligand_mode = LIGAND_NO;
160 args_flag (slot);
161 }
162
163 slot = args_exists ("-bonds");
164 if (slot) {
165 ligand_mode = LIGAND_BONDS;
166 args_flag (slot);
167 }
168
169 slot = args_exists ("-stick");
170 if (slot) {
171 ligand_mode = LIGAND_STICK;
172 args_flag (slot);
173 }
174
175 slot = args_exists ("-cpk");
176 if (slot) {
177 ligand_mode = LIGAND_CPK;
178 args_flag (slot);
179 }
180
181 slot = args_exists ("-nocolour");
182 if (slot) {
183 colour_mode = FALSE;
184 args_flag (slot);
185 }
186
187 slot = args_exists ("-ss_hb");
188 if (slot) {
189 ss_mode = SS_HB;
190 args_flag (slot);
191 }
192
193 slot = args_exists ("-ss_pdb");
194 if (slot) {
195 ss_mode = SS_PDB;
196 args_flag (slot);
197 }
198
199 slot = args_exists ("-ss_ca");
200 if (slot) {
201 ss_mode = SS_CA;
202 args_flag (slot);
203 }
204
205 slot = args_unflagged();
206 if (slot <= 0) fatal_error ("no PDB file name given");
207 if (args_item (slot) [0] == '-')
208 fatal_error_str ("invalid argument: ", args_item (slot));
209
210 return args_item (slot);
211 }
212
213
214 /*------------------------------------------------------------*/
215 void
set_secondary_structure(mol3d * mol)216 set_secondary_structure (mol3d *mol)
217 {
218 res3d *res, *first;
219 int count;
220 char ss;
221
222 assert (mol);
223
224 switch (ss_mode) {
225
226 case SS_PDB: /* has already been set, if data was there */
227 if (! (mol->init & MOL3D_INIT_SECSTRUC)) {
228 mol3d_secstruc_ca_geom (mol); /* fall-back: CA-geometry criteria */
229 }
230 break;
231
232 case SS_CA:
233 mol3d_secstruc_ca_geom (mol);
234 break;
235
236 case SS_HB:
237 if (mol3d_secstruc_hbonds (mol)) {
238 for (res = mol->first; res; res = res->next) {
239 switch (res->secstruc) {
240 case 'i':
241 case 'I':
242 case 'b':
243 case 'B':
244 res->secstruc = ' ';
245 break;
246 case 'g':
247 res->secstruc = 'h';
248 break;
249 case 'G':
250 res->secstruc = 'H';
251 break;
252 default:
253 break;
254 }
255 }
256 } else {
257 mol3d_secstruc_ca_geom (mol); /* fall-back: CA-geometry criteria */
258 }
259 break;
260 }
261
262 if (coil_mode) { /* change turn into coil */
263 for (res = mol->first; res; res = res->next) {
264 switch (res->secstruc) {
265 case 't':
266 case 'T':
267 res->secstruc = ' ';
268 break;
269 default:
270 break;
271 }
272 }
273
274 } else {
275 /* convert single coil residue between turns */
276 for (res = mol->first; res; res = res->next) {
277 if ((res->secstruc == ' ') && (res->prev) && (res->next) &&
278 (res->prev->secstruc == 'T') && (res->next->secstruc == 't')) {
279 res->secstruc = 'T';
280 }
281 }
282 /* convert single turn starts between turns */
283 for (res = mol->first; res; res = res->next) {
284 if ((res->secstruc == 't') && (res->prev) &&
285 (res->prev->secstruc == 'T')) res->secstruc = 'T';
286 }
287 }
288 /* remove helix/strand if shorter than 3 */
289 first = mol->first;
290 ss = toupper (first->secstruc);
291 count = 1;
292 for (res = mol->first; res; res = res->next) {
293 if (res->secstruc == ss) {
294 count++;
295 } else {
296 if (((ss == 'H') || (ss == 'E')) && (count < 3)) {
297 for (; first != res; first = first->next) first->secstruc = ' ';
298 }
299 first = res;
300 ss = toupper (res->secstruc);
301 count = 1;
302 }
303 }
304 /* convert single aa among ligands to ligand */
305 for (res = mol->first; res; res = res->next) {
306 if (res->secstruc == '-') continue;
307 if (res->prev) {
308 if (res->next) {
309 if ((res->prev->secstruc == '-') &&
310 (res->next->secstruc == '-')) res->secstruc = '-';
311 } else {
312 if (res->prev->secstruc == '-') res->secstruc = '-';
313 }
314 } else {
315 if (res->next) {
316 if (res->next->secstruc == '-') res->secstruc = '-';
317 } else {
318 res->secstruc = '-';
319 }
320 }
321 }
322 }
323
324
325 /*------------------------------------------------------------*/
326 void
output_secondary_structure(mol3d * mol)327 output_secondary_structure (mol3d *mol)
328 {
329 res3d *res, *first, *prev;
330 char ss;
331
332 assert (mol);
333
334 res = res3d_create(); /* add a sentinel residue, to simplify */
335 strcpy (res->name, "NONE"); /* case with last residue being an AA */
336 strcpy (res->type, res->name);
337 mol3d_append_residue (mol, res);
338
339 if (colour_mode) {
340 hue = 0.666666;
341
342 if (nice_mode) {
343 printf (" set colourparts on, residuecolour amino-acids rainbow;\n\n");
344 } else {
345 int parts = 0;
346 ss = mol->first->secstruc;
347 for (res = mol->first; res; res = res->next) {
348 if (res->secstruc != ss) parts++;
349 ss = toupper (res->secstruc);
350 }
351 if (parts > 1) {
352 decrement = hue / (double) (parts - 1);
353 } else {
354 decrement = 0.0;
355 }
356 }
357 }
358
359 first = mol->first;
360 ss = first->secstruc;
361
362 for (res = mol->first; res; res = res->next) {
363 if (res->secstruc != ss) {
364
365 switch (ss) {
366
367 case '-':
368 first = res;
369 break;
370
371 case ' ':
372 output_hsb_decrement();
373 printf (" coil from %s to ", first->name);
374 switch (toupper (res->secstruc)) {
375 case '-':
376 printf ("%s;\n", prev->name);
377 first = NULL;
378 break;
379 case 'T':
380 printf ("%s;\n", prev->name);
381 first = prev;
382 break;
383 case 'H':
384 case 'E':
385 printf ("%s;\n", res->name);
386 first = res;
387 break;
388 }
389 break;
390
391 case 'T':
392 output_hsb_decrement();
393 printf (" turn from %s to ", first->name);
394 switch (toupper (res->secstruc)) {
395 case '-':
396 printf ("%s;\n", prev->name);
397 first = NULL;
398 break;
399 case ' ':
400 case 'H':
401 case 'E':
402 printf ("%s;\n", res->name);
403 first = res;
404 break;
405 }
406 break;
407
408 case 'H':
409 output_hsb_decrement();
410 if (cylinder_mode) {
411 printf (" cylinder from %s to ", first->name);
412 } else {
413 printf (" helix from %s to ", first->name);
414 }
415 switch (toupper (res->secstruc)) {
416 case '-':
417 printf ("%s;\n", prev->name);
418 first = NULL;
419 break;
420 case ' ':
421 case 'T':
422 printf ("%s;\n", prev->name);
423 first = prev;
424 break;
425 case 'H':
426 case 'E':
427 printf ("%s;\n", prev->name);
428 printf (" turn from %s to %s;\n", prev->name, res->name);
429 first = res;
430 break;
431 }
432 break;
433
434 case 'E':
435 output_hsb_decrement();
436 printf (" strand from %s to ", first->name);
437 switch (toupper (res->secstruc)) {
438 case '-':
439 printf ("%s;\n", prev->name);
440 first = NULL;
441 break;
442 case ' ':
443 case 'T':
444 printf ("%s;\n", prev->name);
445 first = prev;
446 break;
447 case 'H':
448 case 'E':
449 printf ("%s;\n", prev->name);
450 printf (" turn from %s to %s;\n", prev->name, res->name);
451 first = res;
452 break;
453 }
454 break;
455 }
456 ss = toupper (res->secstruc);
457 }
458 prev = res;
459 }
460 }
461
462
463 /*------------------------------------------------------------*/
464 void
output_nucleotides(mol3d * mol)465 output_nucleotides (mol3d *mol)
466 {
467 res3d *res;
468
469 assert (mol);
470
471 for (res = mol->first; res; res = res->next) {
472 if (is_nucleic_acid_type (res->type)) {
473 if (colour_mode) {
474 if (nice_mode) {
475 printf ("\n set residuecolour nucleotides rainbow;");
476 } else {
477 printf ("\n set planecolour rgb 1 0.2 0.2;");
478 }
479 }
480 printf ("\n double-helix nucleotides;\n");
481 break;
482 }
483 }
484 }
485
486
487 /*------------------------------------------------------------*/
488 void
output_ligand(mol3d * mol)489 output_ligand (mol3d *mol)
490 {
491 res3d *res;
492 char *render_type;
493 int first_ligand = TRUE;
494
495 assert (mol);
496
497 switch (ligand_mode) {
498 case LIGAND_NO:
499 return;
500 case LIGAND_BONDS:
501 render_type = "bonds";
502 break;
503 case LIGAND_STICK:
504 render_type = "ball-and-stick";
505 break;
506 case LIGAND_CPK:
507 render_type = "cpk";
508 break;
509 }
510
511 for (res = mol->first; res; res = res->next) {
512 if (res->secstruc != '-') continue;
513 if (is_water_type (res->type)) continue;
514 if (is_nucleic_acid_type (res->type)) continue;
515 if (res3d_count_atoms (res) == 0) continue; /* sentinel residue */
516
517 if (first_ligand) {
518 if (colour_mode) {
519 printf ("\n set colourparts on;\n");
520 } else {
521 printf ("\n");
522 }
523 first_ligand = FALSE;
524 }
525
526 if (res3d_count_atoms (res) == 1) {
527 if (res3d_unique_name (mol, res)) {
528 printf (" cpk in residue %s;\n", res->name);
529 } else {
530 printf (" cpk in require residue %s and type %s;\n",
531 res->name, res->type);
532 }
533 } else if (res3d_unique_name (mol, res)) {
534 printf (" %s in residue %s;\n", render_type, res->name);
535 } else {
536 printf (" %s in require residue %s and type %s;\n", render_type,
537 res->name, res->type);
538 }
539 }
540 }
541
542
543 /*------------------------------------------------------------*/
544 void
output_quoted_string(char * str)545 output_quoted_string (char *str)
546 {
547 assert (str);
548 assert (*str);
549
550 putchar ('"');
551 for ( ; *str; str++) {
552 if (*str == '"') putchar ('\\');
553 putchar (*str);
554 }
555 putchar ('"');
556 }
557
558
559 /*------------------------------------------------------------*/
main(int argc,char * argv[])560 main (int argc, char *argv[])
561 {
562 char *pdbfilename;
563 mol3d *mol;
564
565 args_initialize (argc, argv);
566 pdbfilename = process_arguments();
567 mol = mol3d_read_pdb_filename (pdbfilename);
568 if ((mol == NULL) &&
569 mol3d_is_pdb_code (pdbfilename)) mol = mol3d_read_pdb_code (pdbfilename);
570 if (mol == NULL) fatal_error ("could not read the PDB file");
571 mol3d_init (mol,
572 MOL3D_INIT_NOBLANKS | MOL3D_INIT_COLOURS |
573 MOL3D_INIT_RADII | MOL3D_INIT_AACODES |
574 MOL3D_INIT_CENTRALS | MOL3D_INIT_RESIDUE_ORDINALS);
575 set_secondary_structure (mol);
576
577 printf ("! MolScript v2.1 input file\n");
578 printf ("! generated by %s\n\n", program_str);
579
580 if (title_mode && mol->data) {
581 dynstring *ds = (dynstring *) nd_search_data (mol->data, "COMPND");
582 if (ds) {
583 char *pos = strstr (ds->string, "MOLECULE:");
584 if (pos) {
585 dynstring *ds2;
586 pos += 9;
587 ds2 = ds_allocate (20);
588 while ((*pos != '\0') && (*pos != ';')) ds_add (ds2, *pos++);
589 ds_right_adjust (ds2);
590 ds_left_adjust (ds2);
591 printf ("title ");
592 output_quoted_string (ds2->string);
593 printf ("\n\n");
594 ds_delete (ds2);
595 } else {
596 ds_right_adjust (ds);
597 ds_left_adjust (ds);
598 printf ("title ");
599 output_quoted_string (ds->string);
600 printf ("\n\n");
601 }
602 }
603 }
604
605 printf ("plot\n\n read mol ");
606 output_quoted_string (pdbfilename);
607 printf (";\n\n");
608
609 if (centre_mode)
610 printf (" transform atom * by centre position atom *;\n\n");
611
612 if (!nice_mode) printf (" set segments 2;\n\n");
613
614 if (thin_mode)
615 printf (" set strandthickness 0, helixthickness 0, coilradius 0;\n\n");
616
617 output_secondary_structure (mol);
618 output_nucleotides (mol);
619 output_ligand (mol);
620
621 printf ("\nend_plot\n");
622
623 return 0;
624 }
625