1 /*****
2 This file is part of the Babel Program
3 Copyright (C) 1992-96 W. Patrick Walters and Matthew T. Stahl
4 All Rights Reserved
5 All Rights Reserved
6 All Rights Reserved
7 All Rights Reserved
8
9 For more information please contact :
10
11 babel@mercury.aichem.arizona.edu
12 --------------------------------------------------------------------------------
13 FILE : asstypes.c
14 AUTHOR(S) : Pat Walters
15 DATE : 10-92
16 PURPOSE : Assign specific atom types (i.e. hybridization) to the atoms in the UMS
17
18 The code here is mine, but the ideas are
19 not. The majority of this program is a
20 translation of a fortran program by
21 Elaine Meng - UC San Francisco.
22 For information on the algorithms used
23 here see
24 E. Meng and R. Lewis, J. Comp. Chem., 12,
25 pp 891-898 (1991)
26 ******/
27
28 #include "bbltyp.h"
29
30 #undef DEBUG
31
32
33
34 void type_attached_oxygens(ums_type *mol, int atm);
35
check_atomic_numbers(ums_type * mol)36 void check_atomic_numbers(ums_type *mol)
37 {
38 int i, column;
39 char temp_type[5];
40
41 if (Atomic_number(1) == 0)
42 {
43 column = locate_input_type("INT");
44 for (i = 1; i <= Atoms; i++)
45 Atomic_number(i) = get_input_type(i,column,Type(i),temp_type,dummy);
46 }
47 }
48
49
assign_types(ums_type * mol)50 int assign_types(ums_type *mol)
51 {
52 int i, result;
53
54 for (i = 1; i <= Atoms; i++)
55 Atomic_number(i) = get_atomic_number(Type(i));
56
57 tag_organics(mol);
58 result = phase1(mol);
59 result = phase2(mol);
60 result = phase3(mol);
61 result = phase4(mol);
62 result = phase5(mol);
63 result = phase6(mol);
64
65 check_for_amides(mol);
66 /*
67 find_aromatic_atoms(mol);
68 */
69 return(1);
70 }
71
72
tag_organics(ums_type * mol)73 void tag_organics(ums_type *mol)
74 {
75 int i;
76
77 for (i = 1; i <= Atoms; i++)
78 {
79 if (EQ(Type(i),"C") ||
80 EQ(Type(i),"H") ||
81 EQ(Type(i),"O") ||
82 EQ(Type(i),"N") ||
83 EQ(Type(i),"S") ||
84 EQ(Type(i), "P"))
85 Organic(i) = TRUE;
86 else
87 Organic(i) = FALSE;
88 }
89 }
90
91 /********************************************************************
92 phase1 - type all hydrogens and deuterium by whether they are
93 attached to carbon or not. Calculate the number of heavy atoms
94 bonded to each atom by subtracting the number of hydrogens
95 attached from the valenece
96 *********************************************************************/
97
98 int
phase1(ums_type * mol)99 phase1(ums_type *mol)
100 {
101 int result;
102 result = type_hydrogens(mol);
103 return(1);
104 }
105
106
107 int
phase2(ums_type * mol)108 phase2(ums_type *mol)
109 {
110 int result;
111
112 result = valence_four(mol);
113 result = valence_three(mol);
114 result = valence_two(mol);
115 return(1);
116 }
117
118 int
phase3(ums_type * mol)119 phase3(ums_type *mol)
120 {
121 int result;
122
123 result = valence_one(mol);
124 return(1);
125 }
126
127
128 int
phase4(ums_type * mol)129 phase4(ums_type *mol)
130 {
131 int count,i,j;
132 double bond_length;
133 int flag;
134
135 for (count = 1; count <= Atoms; count ++)
136 {
137 switch(Redo(count))
138 {
139 case 1:
140 for (i = 0; i < Valence(count); i++)
141 {
142 j = Connection(count,i);
143 bond_length = distance(Point(count),Point(j));
144 if ((bond_length <= V2_C2_C_CUTOFF) &&
145 (Type(j)[0] == 'C'))
146 strcpy(Type(count),"C2");
147 else
148 if ((bond_length <= V2_C2_N_CUTOFF) &&
149 (Type(j)[0] == 'N'))
150 strcpy(Type(count),"C2");
151 }
152 for (i = 0; i < Valence(count); i++)
153 {
154 j = Connection(count,i);
155 bond_length = distance(Point(count),Point(j));
156 if ((bond_length > V2_C3_C_CUTOFF) &&
157 (Type(j)[0] == 'C'))
158 strcpy(Type(count),"C3");
159 else
160 if ((bond_length > V2_C3_N_CUTOFF) &&
161 (Type(j)[0] == 'N'))
162 strcpy(Type(count),"C3");
163 else
164 if ((bond_length > V2_C3_O_CUTOFF) &&
165 (Type(j)[0] == 'O'))
166 strcpy(Type(count),"C3");
167 }
168 break;
169 case 2:
170 for (i = 0; i < Valence(count); i++)
171 {
172 j = Connection(count,i);
173 bond_length = distance(Point(count),Point(j));
174 if ((bond_length <= V2_N2_C_CUTOFF) &&
175 (Type(j)[0] == 'C'))
176 strcpy(Type(count),"Npl");
177 else
178 if ((bond_length <= V2_N2_N_CUTOFF) &&
179 (Type(j)[0] == 'N'))
180 strcpy(Type(count),"Npl");
181 }
182 break;
183 case 3:
184 {
185 flag = 0;
186 for (i = 0; i < Valence(count); i++)
187 {
188 j = Connection(count,i);
189 bond_length = distance(Point(count),Point(j));
190 if ((bond_length <= V2_C2_C_CUTOFF) &&
191 (Type(j)[0] == 'C'))
192 {
193 strcpy(Type(count),"C2");
194 flag = 1;
195 }
196 else
197 if ((bond_length <= V2_C2_N_CUTOFF) &&
198 (Type(j)[0] == 'N'))
199 {
200 strcpy(Type(count),"C2");
201 flag = 1;
202 }
203 }
204 if (flag == 0)
205 for (i = 0; i < Valence(count); i++)
206 {
207 j = Connection(count,i);
208 bond_length = distance(Point(count),Point(j));
209 if ((bond_length > V2_C3_C_CUTOFF) &&
210 (Type(j)[0] == 'C'))
211 {
212 strcpy(Type(count),"C3");
213 flag = 1;
214 }
215 else
216 if ((bond_length > V2_C3_N_CUTOFF) &&
217 (Type(j)[0] == 'N'))
218 {
219 strcpy(Type(count),"C3");
220 flag = 1;
221 }
222 else
223 if ((bond_length > V2_C3_O_CUTOFF) &&
224 (Type(j)[0] == 'O'))
225 {
226 strcpy(Type(count),"C3");
227 flag = 1;
228 }
229 else
230 if (flag == 0)
231 if ((bond_length > GEN_C3_C_CUTOFF) &&
232 (Type(j)[0] == 'C'))
233 {
234 strcpy(Type(count),"C3");
235 flag = 1;
236 }
237 }
238 }
239 break;
240 }
241 }
242 return(1);
243 }
244
245 int
phase5(ums_type * mol)246 phase5(ums_type *mol)
247 {
248 int count,i, j;
249 int flag;
250
251 for (count = 1; count <= Atoms; count ++)
252 {
253 if (strcmp(Type(count),"C2") == 0)
254 {
255 flag = 0;
256 for (i = 0; i < Valence(count); i++)
257 {
258 j = Connection(count,i);
259 if ((strstr("C3 DC HC N3 N3+ O3 ",Type(j)) == NULL) &&
260 (strstr("Pac Sac Sox C1 S3 Cac ",Type(j)) == NULL))
261 flag = 1;
262 }
263 if (flag == 0)
264 strcpy(Type(count),"C3");
265 }
266 }
267 return(1);
268 }
269
270
271 int
phase6(ums_type * mol)272 phase6(ums_type *mol)
273 {
274 int i,j,k,l,m,n;
275 int conn;
276 int no_plus;
277 int protonated;
278
279 for (i = 1; i <= Atoms; i ++)
280 {
281 no_plus = 1;
282 protonated = TRUE;
283 if (EQ(Type(i),"N3"))
284 {
285 for (j = 0; j < Valence(i); j++)
286 {
287 conn = Connection(i,j);
288
289 /* If an unsaturated atom is attached to this nitrogen then it should be Npl */
290 if ((Valence(i) == 2) && (IsUnsatType(Type(conn))))
291 {
292 protonated = FALSE;
293 strcpy(Type(i),"Npl");
294 break;
295 }
296
297 /* If the attached atom is something other that C3, H or D the nitrogen is
298 not positively charged */
299 if (NOTEQ(Type(conn),"C3") && (Atomic_number(conn) != 1))
300 {
301 protonated = FALSE;
302 }
303 }
304 if (protonated)
305 strcpy(Type(i),"N3+");
306 }
307 /* look for guanadinium nitrogens */
308
309 else
310 if (EQ(Type(i),"C2"))
311 {
312 /* First see if we have an sp2 carbon surrounded by 3 sp2
313 nitrogens */
314
315 m = 0;
316 for (j= 0; j < Valence(i); j++)
317 {
318 k = Connection(i,j);
319 if ((EQ(Type(k),"Npl")) || (EQ(Type(k),"N2")) || (EQ(Type(k),"Ng+")))
320 m++;
321 }
322 if (m == 3)
323 {
324 strcpy(Type(i),"C+");
325 for (j= 0; j < Valence(i); j++)
326 {
327 k = Connection(i,j);
328 strcpy(Type(k),"Ng+");
329 }
330 }
331 #if 0
332 /* If we have 3 planar nitrogens connected to the sp3 carbon
333 it is possible that this is a guanadinium group. Now check
334 each of the nitrogens to see if they are connected to either
335 a C2 or Npl */
336
337 if (m == 3)
338 {
339 no_plus = FALSE;
340 for (j = 0; j < Valence(i); j++)
341 {
342 k = Connection(i,j);
343 if ((EQ(Type(k),"Npl")) || (EQ(Type(k),"N2")))
344 {
345 strcpy(Type(k),"Ng+");
346 for (l = 0; l < Valence(k); l++)
347 {
348 n = Connection(k,l);
349 if (n != i)
350 {
351 if (EQ(Type(n),"C2") || EQ(Type(n),"Npl"))
352 {
353 no_plus = TRUE;
354 break;
355 }
356 }
357 }
358 }
359 }
360 }
361 if (no_plus == 1)
362 for (j = 0; j < Valence(i); j++)
363 {
364 k = Connection(i,j);
365 if (strcmp(Type(k),"Ng+") == 0)
366 strcpy(Type(k),"Npl");
367 }
368 #endif
369 }
370 else
371 if (strcmp(Type(i),"Cac") == 0)
372 for (j = 0; j < Valence(i); j++)
373 {
374 k = Connection(i,j);
375 if ((strncmp(Type(k),"O",1) == 0) &&
376 (count_heavy_atoms(mol,k) == 1))
377 strcpy(Type(k),"O-");
378 }
379 }
380 return(1);
381 }
382
383
384 int
type_hydrogens(ums_type * mol)385 type_hydrogens(ums_type *mol)
386 {
387 int i, conn;
388
389 for (i = 1; i <= Atoms; i++)
390 {
391 if (Type(i)[0] == 'H')
392 {
393 strcpy(Type(i),"H");
394 conn = Connection(i,0);
395 if (Type(conn)[0] == 'C')
396 {
397 strcpy(Type(i),"HC");
398 }
399 }
400 }
401 return(1);
402 }
403
404
405 int
valence_four(ums_type * mol)406 valence_four(ums_type *mol)
407 {
408 int count;
409
410 for (count = 1; count <= Atoms; count ++)
411 {
412 if ((Valence(count) == 4) && (IsOrganic(count)))
413 {
414 switch(Type(count)[0])
415 {
416 case 'C':
417 if ((strcmp(Type(count),"C") == 0))
418 strcpy(Type(count),"C3");
419 break;
420 case 'N':
421 if (count_free_ox(mol,count) >= 1)
422 strcpy(Type(count),"Nox");
423 else
424 strcpy(Type(count),"N3+");
425 break;
426 case 'P':
427 if (strlen(Type(count)) == 1)
428 {
429 if (count_free_ox(mol,count) >= 2)
430 strcpy(Type(count),"Pac");
431 else
432 if (count_free_ox(mol,count) == 1)
433 strcpy(Type(count),"Pox");
434 else
435 strcpy(Type(count),"P3+");
436 }
437 break;
438 case 'S':
439 if (strcmp(Type(count),"S") == 0)
440 {
441 if (count_free_ox(mol,count) >= 3)
442 strcpy(Type(count),"Sac");
443 else
444 if (count_free_ox(mol,count) >= 1)
445 strcpy(Type(count),"Sox");
446 else
447 strcpy(Type(count),"S");
448 }
449 break;
450 case 'B':
451 if (count_free_ox(mol,count) >= 3)
452 strcpy(Type(count),"Bac");
453 if (count_free_ox(mol,count) >= 1)
454 strcpy(Type(count),"Box");
455 else
456 strcpy(Type(count),"B");
457 break;
458 }
459 }
460 }
461 return(1);
462 }
463
464 int
valence_three(ums_type * mol)465 valence_three(ums_type *mol)
466 {
467 int count;
468 int k,l,m;
469 double angle1,angle2,angle3,avg_angle;
470
471 for (count = 1; count <= Atoms; count ++)
472 {
473 if ((Valence(count) == 3) && (IsOrganic(count)))
474 {
475 k = Connection(count,0);
476 l = Connection(count,1);
477 m = Connection(count,2);
478
479 angle1 = bond_angle(Point(k),
480 Point(count),
481 Point(l));
482 angle2 = bond_angle(Point(k),
483 Point(count),
484 Point(m));
485 angle3 = bond_angle(Point(l),
486 Point(count),
487 Point(m));
488 avg_angle = (angle1 + angle2 + angle3)/3;
489
490 switch(Type(count)[0])
491 {
492 case 'C':
493 if (avg_angle < SP3_MAX)
494 strcpy(Type(count),"C3");
495 else
496 if (count_free_ox(mol,count) >= 2)
497 strcpy(Type(count),"Cac");
498 else
499 strcpy(Type(count),"C2");
500 break;
501 case 'N':
502 if (avg_angle < SP3_MAX)
503 strcpy(Type(count),"N3");
504 else
505 if (count_free_ox(mol,count) >= 2)
506 strcpy(Type(count),"Ntr");
507 else
508 strcpy(Type(count),"Npl");
509 break;
510 case 'B':
511 if (count_free_ox(mol,count) >= 1)
512 strcpy(Type(count),"Box");
513 else
514 strcpy(Type(count),"B");
515 break;
516 case 'S':
517 if (strcmp(Type(count),"S") == 0)
518 {
519 if (count_free_ox(mol,count) >= 1)
520 strcpy(Type(count),"Sox");
521 else
522 strcpy(Type(count),"S3+");
523 }
524 break;
525 }
526 }
527 }
528 return(1);
529 }
530
531
532 int
valence_two(ums_type * mol)533 valence_two(ums_type *mol)
534 {
535 int count;
536 int k,l;
537 double angle1;
538
539 for (count = 1; count <= Atoms; count ++)
540 {
541 if ((Valence(count) == 2) && (IsOrganic(count)))
542 {
543 k = Connection(count,0);
544 l = Connection(count,1);
545 angle1 = bond_angle(Point(k),
546 Point(count),
547 Point(l));
548
549 switch(Type(count)[0])
550 {
551 case 'C':
552 if (strcmp(Type(count),"C") == 0)
553 {
554 if (angle1 < SP3_MAX)
555 {
556 strcpy(Type(count),"C3");
557 Redo(count) = 1;
558 }
559 else
560 if (angle1 < SP_MIN)
561 {
562 strcpy(Type(count),"C2");
563 if (angle1 < MAY_BE_SP2)
564 Redo(count) = 3;
565 }
566 else
567 strcpy(Type(count),"C1");
568 }
569 break;
570 case 'N':
571 if (angle1 <= SP3_MAX)
572 {
573 strcpy(Type(count),"N3");
574 Redo(count) = 2;
575 }
576 else
577 if (angle1 <= SP_MIN)
578 {
579 strcpy(Type(count),"Npl");
580 }
581 else
582 strcpy(Type(count),"N1");
583 break;
584 case 'O':
585 strcpy(Type(count),"O3");
586 break;
587 case 'S':
588 if (strcmp(Type(count),"S") == 0)
589 strcpy(Type(count),"S3");
590 break;
591 }
592 }
593 }
594 return(1);
595 }
596
597
598 int
valence_one(ums_type * mol)599 valence_one(ums_type *mol)
600 {
601 int count;
602 int k;
603 double bond_length;
604
605 for (count = 1; count <= Atoms; count ++)
606 {
607 k = Connection(count,0);
608 bond_length = distance(Point(count),Point(k));
609
610 if ((Valence(count) == 1) && (IsOrganic(count)))
611 switch(Type(count)[0])
612 {
613 case 'C':
614 if (strcmp(Type(count),"C") == 0)
615 {
616 if ((strncmp(Type(k),"C1",2) == 0)
617 && (bond_length <= V1_C1_C1_CUTOFF))
618 strcpy(Type(count),"C1");
619 else
620 if ((strncmp(Type(k),"C",1) == 0)
621 && (bond_length <= V1_C2_C_CUTOFF))
622 strcpy(Type(count),"C2");
623 else
624 strcpy(Type(count),"C3");
625 }
626 if (strncmp(Type(k),"N",1) == 0)
627 {
628 if (bond_length <= V1_C2_N_CUTOFF)
629 strcpy(Type(count),"C2");
630 else
631 strcpy(Type(count),"C3");
632 }
633 break;
634 case 'N':
635 if (strcmp(Type(count),"N") == 0)
636 if ((strncmp(Type(k),"C1",2) == 0)
637 && (bond_length <= V1_N1_C1_CUTOFF))
638 strcpy(Type(count),"N1");
639 else
640 if (((strncmp(Type(k),"C2",2) == 0) ||
641 (strncmp(Type(k),"C3",2) == 0))
642 && ((bond_length > V1_N3_C_CUTOFF)))
643 strcpy(Type(count),"N3");
644 else
645 if (((strncmp(Type(k),"N3",2) == 0))
646 && ((bond_length > V1_N3_N3_CUTOFF)))
647 strcpy(Type(count),"N3");
648 else
649 if (((strncmp(Type(k),"Npl",3) == 0))
650 && (bond_length > V1_N3_N2_CUTOFF))
651 strcpy(Type(count),"N3");
652 else
653 strcpy(Type(count),"Npl");
654 break;
655 case 'O':
656 if (strcmp(Type(count),"O") == 0)
657 if (strstr("Cac Pac Sac Ntr ",Type(k)) != NULL)
658 strcpy(Type(count),"O-");
659 else
660 if (strstr("Nox Pox Sox ",Type(k)) != NULL)
661 strcpy(Type(count),"O2");
662 else
663 if ((Type(k)[0] == 'C')
664 && (bond_length <= V1_O2_C2_CUTOFF))
665 {
666 strcpy(Type(count),"O2");
667 strcpy(Type(k),"C2");
668 Redo(k) = 0;
669 }
670 else
671 if ((strcmp(Type(k),"As") == 0)
672 && (bond_length <= V1_O2_AS_CUTOFF))
673 strcpy(Type(count),"O2");
674 else
675 strcpy(Type(count),"O3");
676 break;
677 case 'S':
678 if (strcmp(Type(count),"S") == 0)
679 if ((strncmp(Type(k),"P",1) == 0))
680 strcpy(Type(count),"S2");
681 else
682 if ((strncmp(Type(k),"C",1) == 0)
683 && (bond_length <= V1_S2_C2_CUTOFF))
684 {
685 strcpy(Type(count),"S2");
686 strcpy(Type(k),"C2");
687 Redo(count) = 0;
688 }
689 else
690 if ((strcmp(Type(k),"As") == 0)
691 && (bond_length <= V1_S2_AS_CUTOFF))
692 strcpy(Type(count),"S2");
693 else
694 strcpy(Type(count),"S3");
695 break;
696 }
697 }
698 return(1);
699 }
700
701
702 double
bond_angle(coord_type a,coord_type b,coord_type c)703 bond_angle(coord_type a, coord_type b, coord_type c)
704 {
705 double angle;
706 double dist;
707
708 double cos_theta;
709
710 dist = distance(a,b) * distance(b,c);
711 cos_theta = ((a.x - b.x) * (c.x - b.x) + (a.y - b.y) * (c.y - b.y) +
712 (a.z - b.z) * (c.z - b.z))/dist;
713 if (cos_theta + 1.0 < 0.0001)
714 angle = 180.0;
715 else
716 angle = (acos(cos_theta)) * RAD_TO_DEG;
717 return(angle);
718 }
719
720
721
722
723 int
count_heavy_atoms(ums_type * mol,int atom_number)724 count_heavy_atoms(ums_type *mol, int atom_number)
725 {
726 int count;
727 int H_count = 0;
728 int bonded_atom;
729
730 for (count = 0; count < Valence(atom_number); count ++)
731 {
732 bonded_atom = (Connection(atom_number,count));
733 if (Type(bonded_atom)[0] == 'H')
734 {
735 H_count ++;
736 }
737 }
738 return( Valence(atom_number) - H_count);
739 }
740
741
742 int
count_free_ox(ums_type * mol,int atom_number)743 count_free_ox(ums_type *mol, int atom_number)
744 {
745 int count;
746 int bonded_atom;
747 int free_O_count = 0;
748
749 for (count = 0; count < Valence(atom_number); count ++)
750 {
751 bonded_atom = (Connection(atom_number,count));
752 if ((Type(bonded_atom)[0] == 'O') &&
753 (count_heavy_atoms(mol,bonded_atom) == 1))
754 {
755 free_O_count ++;
756 }
757 }
758 return(free_O_count);
759 }
760
fix_carboxylates(ums_type * mol)761 void fix_carboxylates(ums_type *mol)
762 {
763 int i;
764
765 for (i = 1; i < Atoms; i++)
766 {
767 if EQ(Type(i),"O-")
768 {
769 switch(Valence(i))
770 {
771 case 1 :
772 strcpy(Type(i),"O2");
773 break;
774 default :
775 strcpy(Type(i),"O3");
776 break;
777 }
778 }
779 }
780 }
781
check_for_amides(ums_type * mol)782 void check_for_amides(ums_type *mol)
783 {
784 int i,j,conn;
785
786 for (i = 1; i <= Atoms; i++)
787 {
788 if (EQ(Type(i),"Npl"))
789 for (j = 0; j < Valence(i); j++)
790 {
791 conn = Connection(i,j);
792
793 if (EQ(Type(conn),"Cac") || EQ(Type(conn),"Sox") || EQ(Type(conn),"So2"))
794 {
795 strcpy(Type(i),"Nam");
796 break;
797 }
798
799 if (EQ(Type(conn),"C2"))
800 if (check_for_carbonyl(mol,Connection(i,j)) == 3)
801 {
802 strcpy(Type(i),"Nam");
803 break;
804 }
805 }
806 }
807 }
808
809
810
811
812
813
814
815
816
817
818