1 /*****
2 This file is part of the Babel Program
3 Copyright (C) 1992-96 W. Patrick Walters and Matthew T. Stahl
4 All Rights Reserved
5 All Rights Reserved
6 All Rights Reserved
7 All Rights Reserved
8 
9 For more information please contact :
10 
11 babel@mercury.aichem.arizona.edu
12 --------------------------------------------------------------------------------
13 FILE : asstypes.c
14 AUTHOR(S) : Pat Walters
15 DATE : 10-92
16 PURPOSE : Assign specific atom types (i.e. hybridization) to the atoms in the UMS
17 
18 The code here is mine, but the ideas are
19 not. The majority of this program is a
20 translation of a fortran program by
21 Elaine Meng - UC San Francisco.
22 For information on the algorithms used
23 here see
24 E. Meng and R. Lewis, J. Comp. Chem., 12,
25 pp 891-898 (1991)
26 ******/
27 
28 #include "bbltyp.h"
29 
30 #undef DEBUG
31 
32 
33 
34 void type_attached_oxygens(ums_type *mol, int atm);
35 
check_atomic_numbers(ums_type * mol)36 void check_atomic_numbers(ums_type *mol)
37 {
38   int i, column;
39   char temp_type[5];
40 
41   if (Atomic_number(1) == 0)
42   {
43     column = locate_input_type("INT");
44     for (i = 1; i <= Atoms; i++)
45 	Atomic_number(i) = get_input_type(i,column,Type(i),temp_type,dummy);
46   }
47 }
48 
49 
assign_types(ums_type * mol)50 int assign_types(ums_type *mol)
51 {
52   int i, result;
53 
54   for (i = 1; i <= Atoms; i++)
55     Atomic_number(i) = get_atomic_number(Type(i));
56 
57   tag_organics(mol);
58   result = phase1(mol);
59   result = phase2(mol);
60   result = phase3(mol);
61   result = phase4(mol);
62   result = phase5(mol);
63   result = phase6(mol);
64 
65   check_for_amides(mol);
66   /*
67     find_aromatic_atoms(mol);
68     */
69   return(1);
70 }
71 
72 
tag_organics(ums_type * mol)73 void tag_organics(ums_type *mol)
74 {
75   int i;
76 
77   for (i = 1; i <= Atoms; i++)
78   {
79     if (EQ(Type(i),"C") ||
80 	EQ(Type(i),"H") ||
81 	EQ(Type(i),"O") ||
82 	EQ(Type(i),"N") ||
83 	EQ(Type(i),"S") ||
84 	EQ(Type(i), "P"))
85       Organic(i) = TRUE;
86     else
87       Organic(i) = FALSE;
88   }
89 }
90 
91 /********************************************************************
92 phase1 - type all hydrogens and deuterium by whether they are
93 attached to carbon or not. Calculate the number of heavy atoms
94 bonded to each atom by subtracting the number of hydrogens
95 attached from the valenece
96 *********************************************************************/
97 
98 int
phase1(ums_type * mol)99 phase1(ums_type *mol)
100 {
101   int result;
102   result = type_hydrogens(mol);
103   return(1);
104 }
105 
106 
107 int
phase2(ums_type * mol)108 phase2(ums_type *mol)
109 {
110   int result;
111 
112   result = valence_four(mol);
113   result = valence_three(mol);
114   result = valence_two(mol);
115   return(1);
116 }
117 
118 int
phase3(ums_type * mol)119 phase3(ums_type *mol)
120 {
121   int result;
122 
123   result = valence_one(mol);
124   return(1);
125 }
126 
127 
128 int
phase4(ums_type * mol)129 phase4(ums_type *mol)
130 {
131   int count,i,j;
132   double bond_length;
133   int flag;
134 
135   for (count = 1; count <= Atoms; count ++)
136   {
137     switch(Redo(count))
138     {
139     case 1:
140       for (i = 0; i < Valence(count); i++)
141       {
142 	j = Connection(count,i);
143 	bond_length = distance(Point(count),Point(j));
144 	if ((bond_length <= V2_C2_C_CUTOFF) &&
145 	    (Type(j)[0] == 'C'))
146 	  strcpy(Type(count),"C2");
147 	else
148 	  if ((bond_length <= V2_C2_N_CUTOFF) &&
149 	      (Type(j)[0] == 'N'))
150 	    strcpy(Type(count),"C2");
151       }
152       for (i = 0; i < Valence(count); i++)
153       {
154 	j = Connection(count,i);
155 	bond_length = distance(Point(count),Point(j));
156 	if ((bond_length > V2_C3_C_CUTOFF) &&
157 	    (Type(j)[0] == 'C'))
158 	  strcpy(Type(count),"C3");
159 	else
160 	  if ((bond_length > V2_C3_N_CUTOFF) &&
161 	      (Type(j)[0] == 'N'))
162 	    strcpy(Type(count),"C3");
163 	  else
164 	    if ((bond_length > V2_C3_O_CUTOFF) &&
165 		(Type(j)[0] == 'O'))
166 	      strcpy(Type(count),"C3");
167       }
168       break;
169     case 2:
170       for (i = 0; i < Valence(count); i++)
171       {
172 	j = Connection(count,i);
173 	bond_length = distance(Point(count),Point(j));
174 	if ((bond_length <= V2_N2_C_CUTOFF) &&
175 	    (Type(j)[0] == 'C'))
176 	  strcpy(Type(count),"Npl");
177 	else
178 	  if ((bond_length <= V2_N2_N_CUTOFF) &&
179 	      (Type(j)[0] == 'N'))
180 	    strcpy(Type(count),"Npl");
181       }
182       break;
183     case 3:
184       {
185 	flag = 0;
186 	for (i = 0; i < Valence(count); i++)
187 	{
188 	  j = Connection(count,i);
189 	  bond_length = distance(Point(count),Point(j));
190 	  if ((bond_length <= V2_C2_C_CUTOFF) &&
191 	      (Type(j)[0] == 'C'))
192 	  {
193 	    strcpy(Type(count),"C2");
194 	    flag = 1;
195 	  }
196 	  else
197 	    if ((bond_length <= V2_C2_N_CUTOFF) &&
198 		(Type(j)[0] == 'N'))
199 	    {
200 	      strcpy(Type(count),"C2");
201 	      flag = 1;
202 	    }
203 	}
204 	if (flag == 0)
205 	  for (i = 0; i < Valence(count); i++)
206 	  {
207 	    j = Connection(count,i);
208 	    bond_length = distance(Point(count),Point(j));
209 	    if ((bond_length > V2_C3_C_CUTOFF) &&
210 		(Type(j)[0] == 'C'))
211 	    {
212 	      strcpy(Type(count),"C3");
213 	      flag = 1;
214 	    }
215 	    else
216 	      if ((bond_length > V2_C3_N_CUTOFF) &&
217 		  (Type(j)[0] == 'N'))
218 	      {
219 		strcpy(Type(count),"C3");
220 		flag = 1;
221 	      }
222 	      else
223 		if ((bond_length > V2_C3_O_CUTOFF) &&
224 		    (Type(j)[0] == 'O'))
225 		{
226 		  strcpy(Type(count),"C3");
227 		  flag = 1;
228 		}
229 		else
230 		  if (flag == 0)
231 		    if ((bond_length > GEN_C3_C_CUTOFF) &&
232 		      (Type(j)[0] == 'C'))
233 		    {
234 		      strcpy(Type(count),"C3");
235 		      flag = 1;
236 		    }
237 	  }
238       }
239       break;
240     }
241   }
242   return(1);
243 }
244 
245 int
phase5(ums_type * mol)246 phase5(ums_type *mol)
247 {
248   int count,i, j;
249   int flag;
250 
251   for (count = 1; count <= Atoms; count ++)
252   {
253     if (strcmp(Type(count),"C2") == 0)
254     {
255       flag = 0;
256       for (i = 0; i < Valence(count); i++)
257       {
258 	j = Connection(count,i);
259 	if ((strstr("C3   DC    HC   N3   N3+   O3   ",Type(j)) == NULL) &&
260 	    (strstr("Pac   Sac   Sox  C1   S3    Cac  ",Type(j)) == NULL))
261 	  flag = 1;
262       }
263       if (flag == 0)
264 	strcpy(Type(count),"C3");
265     }
266   }
267   return(1);
268 }
269 
270 
271 int
phase6(ums_type * mol)272 phase6(ums_type *mol)
273 {
274   int i,j,k,l,m,n;
275   int conn;
276   int no_plus;
277   int protonated;
278 
279   for (i = 1; i <= Atoms; i ++)
280   {
281     no_plus = 1;
282     protonated = TRUE;
283     if (EQ(Type(i),"N3"))
284     {
285       for (j = 0; j < Valence(i); j++)
286       {
287 	conn = Connection(i,j);
288 
289 	/* If an unsaturated atom is attached to this nitrogen then it should be Npl */
290 	if ((Valence(i) == 2) && (IsUnsatType(Type(conn))))
291 	{
292 	  protonated = FALSE;
293 	  strcpy(Type(i),"Npl");
294 	  break;
295 	}
296 
297 	/* If the attached atom is something other that C3, H or D the nitrogen is
298 	   not positively charged */
299 	if (NOTEQ(Type(conn),"C3") && (Atomic_number(conn) != 1))
300 	{
301 	  protonated = FALSE;
302 	}
303       }
304       if (protonated)
305 	strcpy(Type(i),"N3+");
306     }
307     /* look for guanadinium nitrogens */
308 
309     else
310       if (EQ(Type(i),"C2"))
311       {
312 	/* First see if we have an sp2 carbon surrounded by 3 sp2
313 	   nitrogens */
314 
315 	m = 0;
316 	for (j= 0; j < Valence(i); j++)
317 	{
318 	  k = Connection(i,j);
319 	  if ((EQ(Type(k),"Npl")) || (EQ(Type(k),"N2")) || (EQ(Type(k),"Ng+")))
320 	    m++;
321 	}
322 	if (m == 3)
323 	{
324 	  strcpy(Type(i),"C+");
325 	  for (j= 0; j < Valence(i); j++)
326 	  {
327 	    k = Connection(i,j);
328 	    strcpy(Type(k),"Ng+");
329 	  }
330 	}
331 #if 0
332 	/* If we have 3 planar nitrogens connected to the sp3 carbon
333 	   it is possible that this is a guanadinium group.  Now check
334 	   each of the nitrogens to see if they are connected to either
335 	   a C2 or Npl  */
336 
337 	if (m == 3)
338 	{
339 	  no_plus = FALSE;
340 	  for (j = 0; j < Valence(i); j++)
341 	  {
342 	    k = Connection(i,j);
343 	    if ((EQ(Type(k),"Npl")) || (EQ(Type(k),"N2")))
344 	    {
345 	      strcpy(Type(k),"Ng+");
346 	      for (l = 0; l < Valence(k); l++)
347 	      {
348 		n = Connection(k,l);
349 		if (n != i)
350 		{
351 		  if (EQ(Type(n),"C2") || EQ(Type(n),"Npl"))
352 		  {
353 		    no_plus = TRUE;
354 		    break;
355 		  }
356 		}
357 	      }
358 	    }
359 	  }
360 	}
361 	if (no_plus == 1)
362 	  for (j = 0; j < Valence(i); j++)
363 	  {
364 	    k = Connection(i,j);
365 	    if (strcmp(Type(k),"Ng+") == 0)
366 	      strcpy(Type(k),"Npl");
367 	  }
368 #endif
369       }
370       else
371 	if (strcmp(Type(i),"Cac") == 0)
372 	  for (j = 0; j < Valence(i); j++)
373 	  {
374 	    k = Connection(i,j);
375 	    if ((strncmp(Type(k),"O",1) == 0) &&
376 		 (count_heavy_atoms(mol,k) == 1))
377 	      strcpy(Type(k),"O-");
378 	  }
379   }
380   return(1);
381 }
382 
383 
384 int
type_hydrogens(ums_type * mol)385 type_hydrogens(ums_type *mol)
386 {
387   int i, conn;
388 
389   for (i = 1; i <= Atoms; i++)
390   {
391     if (Type(i)[0] == 'H')
392     {
393       strcpy(Type(i),"H");
394       conn = Connection(i,0);
395       if (Type(conn)[0] == 'C')
396       {
397 	strcpy(Type(i),"HC");
398       }
399     }
400   }
401   return(1);
402 }
403 
404 
405 int
valence_four(ums_type * mol)406 valence_four(ums_type *mol)
407 {
408   int count;
409 
410   for (count = 1; count <= Atoms; count ++)
411   {
412     if ((Valence(count) == 4) && (IsOrganic(count)))
413     {
414       switch(Type(count)[0])
415       {
416       case 'C':
417 	if ((strcmp(Type(count),"C") == 0))
418 	  strcpy(Type(count),"C3");
419 	break;
420       case 'N':
421 	if (count_free_ox(mol,count) >= 1)
422 	  strcpy(Type(count),"Nox");
423 	else
424 	  strcpy(Type(count),"N3+");
425 	break;
426       case 'P':
427 	if (strlen(Type(count)) == 1)
428 	{
429 	  if (count_free_ox(mol,count) >= 2)
430 	    strcpy(Type(count),"Pac");
431 	  else
432 	    if (count_free_ox(mol,count) == 1)
433 	      strcpy(Type(count),"Pox");
434 	    else
435 	      strcpy(Type(count),"P3+");
436 	}
437 	break;
438       case 'S':
439 	if (strcmp(Type(count),"S") == 0)
440 	{
441 	  if (count_free_ox(mol,count) >= 3)
442 	    strcpy(Type(count),"Sac");
443 	  else
444 	    if (count_free_ox(mol,count) >= 1)
445 	      strcpy(Type(count),"Sox");
446 	    else
447 	      strcpy(Type(count),"S");
448 	}
449 	break;
450       case 'B':
451 	if (count_free_ox(mol,count) >= 3)
452 	  strcpy(Type(count),"Bac");
453 	if (count_free_ox(mol,count) >= 1)
454 	  strcpy(Type(count),"Box");
455 	else
456 	  strcpy(Type(count),"B");
457 	break;
458       }
459     }
460   }
461   return(1);
462 }
463 
464 int
valence_three(ums_type * mol)465 valence_three(ums_type *mol)
466 {
467   int count;
468   int k,l,m;
469   double angle1,angle2,angle3,avg_angle;
470 
471   for (count = 1; count <= Atoms; count ++)
472   {
473     if ((Valence(count) == 3) && (IsOrganic(count)))
474     {
475       k = Connection(count,0);
476       l = Connection(count,1);
477       m = Connection(count,2);
478 
479       angle1 = bond_angle(Point(k),
480 			  Point(count),
481 			  Point(l));
482       angle2 = bond_angle(Point(k),
483 			  Point(count),
484 			  Point(m));
485       angle3 = bond_angle(Point(l),
486 			  Point(count),
487 			  Point(m));
488       avg_angle = (angle1 + angle2 + angle3)/3;
489 
490       switch(Type(count)[0])
491       {
492       case 'C':
493 	if (avg_angle < SP3_MAX)
494 	  strcpy(Type(count),"C3");
495 	else
496 	  if (count_free_ox(mol,count) >= 2)
497 	    strcpy(Type(count),"Cac");
498 	  else
499 	    strcpy(Type(count),"C2");
500 	break;
501       case 'N':
502 	if (avg_angle < SP3_MAX)
503 	  strcpy(Type(count),"N3");
504 	else
505 	  if (count_free_ox(mol,count) >= 2)
506 	    strcpy(Type(count),"Ntr");
507 	  else
508 	    strcpy(Type(count),"Npl");
509 	break;
510       case 'B':
511 	if (count_free_ox(mol,count) >= 1)
512 	  strcpy(Type(count),"Box");
513 	else
514 	  strcpy(Type(count),"B");
515 	break;
516       case 'S':
517 	if (strcmp(Type(count),"S") == 0)
518 	{
519 	  if (count_free_ox(mol,count) >= 1)
520 	    strcpy(Type(count),"Sox");
521 	  else
522 	    strcpy(Type(count),"S3+");
523 	}
524 	break;
525       }
526     }
527   }
528   return(1);
529 }
530 
531 
532 int
valence_two(ums_type * mol)533 valence_two(ums_type *mol)
534 {
535   int count;
536   int k,l;
537   double angle1;
538 
539   for (count = 1; count <= Atoms; count ++)
540   {
541     if ((Valence(count) == 2) && (IsOrganic(count)))
542     {
543       k = Connection(count,0);
544       l = Connection(count,1);
545       angle1 = bond_angle(Point(k),
546 			  Point(count),
547 			  Point(l));
548 
549       switch(Type(count)[0])
550       {
551       case 'C':
552 	if (strcmp(Type(count),"C") == 0)
553 	{
554 	  if (angle1 < SP3_MAX)
555 	  {
556 	    strcpy(Type(count),"C3");
557 	    Redo(count) = 1;
558 	  }
559 	  else
560 	    if (angle1 < SP_MIN)
561 	    {
562 	      strcpy(Type(count),"C2");
563 	      if (angle1 < MAY_BE_SP2)
564 		Redo(count) = 3;
565 	    }
566 	    else
567 	      strcpy(Type(count),"C1");
568 	}
569 	break;
570       case 'N':
571 	if (angle1 <= SP3_MAX)
572 	{
573 	  strcpy(Type(count),"N3");
574 	  Redo(count) = 2;
575 	}
576 	else
577 	  if (angle1 <= SP_MIN)
578 	  {
579 	    strcpy(Type(count),"Npl");
580 	  }
581 	  else
582 	    strcpy(Type(count),"N1");
583 	break;
584       case 'O':
585 	    strcpy(Type(count),"O3");
586 	break;
587       case 'S':
588 	if (strcmp(Type(count),"S") == 0)
589 	  strcpy(Type(count),"S3");
590 	break;
591       }
592     }
593   }
594   return(1);
595 }
596 
597 
598 int
valence_one(ums_type * mol)599 valence_one(ums_type *mol)
600 {
601   int count;
602   int k;
603   double bond_length;
604 
605   for (count = 1; count <= Atoms; count ++)
606   {
607     k = Connection(count,0);
608     bond_length = distance(Point(count),Point(k));
609 
610     if ((Valence(count) == 1) && (IsOrganic(count)))
611       switch(Type(count)[0])
612       {
613       case 'C':
614 	if (strcmp(Type(count),"C") == 0)
615 	{
616 	  if ((strncmp(Type(k),"C1",2) == 0)
617 	    && (bond_length <= V1_C1_C1_CUTOFF))
618 	    strcpy(Type(count),"C1");
619 	  else
620 	    if ((strncmp(Type(k),"C",1) == 0)
621 	      && (bond_length <= V1_C2_C_CUTOFF))
622 	      strcpy(Type(count),"C2");
623 	    else
624 	      strcpy(Type(count),"C3");
625 	}
626 	if (strncmp(Type(k),"N",1) == 0)
627 	{
628 	  if (bond_length <= V1_C2_N_CUTOFF)
629 	    strcpy(Type(count),"C2");
630 	  else
631 	    strcpy(Type(count),"C3");
632 	}
633 	break;
634       case 'N':
635 	if (strcmp(Type(count),"N") == 0)
636 	  if ((strncmp(Type(k),"C1",2) == 0)
637 	  && (bond_length <= V1_N1_C1_CUTOFF))
638 	      strcpy(Type(count),"N1");
639 	  else
640 	    if (((strncmp(Type(k),"C2",2) == 0) ||
641 		(strncmp(Type(k),"C3",2) == 0))
642 	      && ((bond_length > V1_N3_C_CUTOFF)))
643 		strcpy(Type(count),"N3");
644 	    else
645 	      if (((strncmp(Type(k),"N3",2) == 0))
646 		&& ((bond_length > V1_N3_N3_CUTOFF)))
647 		  strcpy(Type(count),"N3");
648 	      else
649 		if (((strncmp(Type(k),"Npl",3) == 0))
650 		  && (bond_length > V1_N3_N2_CUTOFF))
651 		    strcpy(Type(count),"N3");
652 		else
653 		    strcpy(Type(count),"Npl");
654 	break;
655       case 'O':
656 	if (strcmp(Type(count),"O") == 0)
657 	  if (strstr("Cac  Pac  Sac  Ntr  ",Type(k)) != NULL)
658 	    strcpy(Type(count),"O-");
659 	  else
660 	    if (strstr("Nox  Pox  Sox  ",Type(k)) != NULL)
661 	      strcpy(Type(count),"O2");
662 	    else
663 	      if ((Type(k)[0] == 'C')
664 		  && (bond_length <= V1_O2_C2_CUTOFF))
665 	      {
666 		strcpy(Type(count),"O2");
667 		strcpy(Type(k),"C2");
668 		Redo(k) = 0;
669 	      }
670 	      else
671 		if ((strcmp(Type(k),"As") == 0)
672 		    && (bond_length <= V1_O2_AS_CUTOFF))
673 		  strcpy(Type(count),"O2");
674 		else
675 		  strcpy(Type(count),"O3");
676 	break;
677       case 'S':
678 	if (strcmp(Type(count),"S") == 0)
679 	if ((strncmp(Type(k),"P",1) == 0))
680 	  strcpy(Type(count),"S2");
681 	else
682 	  if ((strncmp(Type(k),"C",1) == 0)
683 	      && (bond_length <= V1_S2_C2_CUTOFF))
684 	  {
685 	    strcpy(Type(count),"S2");
686 	    strcpy(Type(k),"C2");
687 	    Redo(count) = 0;
688 	  }
689 	  else
690 	    if ((strcmp(Type(k),"As") == 0)
691 		&& (bond_length <= V1_S2_AS_CUTOFF))
692 	      strcpy(Type(count),"S2");
693 	    else
694 	      strcpy(Type(count),"S3");
695 	break;
696       }
697   }
698   return(1);
699 }
700 
701 
702 double
bond_angle(coord_type a,coord_type b,coord_type c)703 bond_angle(coord_type a, coord_type b, coord_type c)
704 {
705   double angle;
706   double dist;
707 
708   double cos_theta;
709 
710   dist = distance(a,b) * distance(b,c);
711   cos_theta = ((a.x - b.x) * (c.x - b.x) + (a.y - b.y) * (c.y - b.y) +
712 	       (a.z - b.z) * (c.z - b.z))/dist;
713   if (cos_theta  + 1.0 < 0.0001)
714     angle = 180.0;
715   else
716     angle = (acos(cos_theta)) * RAD_TO_DEG;
717   return(angle);
718 }
719 
720 
721 
722 
723 int
count_heavy_atoms(ums_type * mol,int atom_number)724 count_heavy_atoms(ums_type *mol, int atom_number)
725 {
726   int count;
727   int H_count = 0;
728   int bonded_atom;
729 
730   for (count = 0; count < Valence(atom_number); count ++)
731   {
732     bonded_atom = (Connection(atom_number,count));
733     if (Type(bonded_atom)[0] == 'H')
734     {
735       H_count ++;
736     }
737   }
738   return( Valence(atom_number) - H_count);
739 }
740 
741 
742 int
count_free_ox(ums_type * mol,int atom_number)743 count_free_ox(ums_type *mol, int atom_number)
744 {
745   int count;
746   int bonded_atom;
747   int free_O_count = 0;
748 
749   for (count = 0; count < Valence(atom_number); count ++)
750   {
751     bonded_atom = (Connection(atom_number,count));
752     if ((Type(bonded_atom)[0] == 'O') &&
753 	(count_heavy_atoms(mol,bonded_atom) == 1))
754     {
755       free_O_count ++;
756     }
757   }
758   return(free_O_count);
759 }
760 
fix_carboxylates(ums_type * mol)761 void fix_carboxylates(ums_type *mol)
762 {
763   int i;
764 
765   for (i = 1; i < Atoms; i++)
766   {
767     if EQ(Type(i),"O-")
768     {
769       switch(Valence(i))
770       {
771       case 1 :
772 	strcpy(Type(i),"O2");
773 	break;
774       default :
775 	strcpy(Type(i),"O3");
776 	break;
777       }
778     }
779   }
780 }
781 
check_for_amides(ums_type * mol)782 void check_for_amides(ums_type *mol)
783 {
784   int i,j,conn;
785 
786   for (i = 1; i <= Atoms; i++)
787   {
788     if (EQ(Type(i),"Npl"))
789       for (j = 0; j < Valence(i); j++)
790       {
791 	conn = Connection(i,j);
792 
793 	if (EQ(Type(conn),"Cac") || EQ(Type(conn),"Sox") || EQ(Type(conn),"So2"))
794 	{
795 	  strcpy(Type(i),"Nam");
796 	  break;
797 	}
798 
799 	if (EQ(Type(conn),"C2"))
800 	  if (check_for_carbonyl(mol,Connection(i,j)) == 3)
801 	  {
802 	    strcpy(Type(i),"Nam");
803 	    break;
804 	  }
805       }
806   }
807 }
808 
809 
810 
811 
812 
813 
814 
815 
816 
817 
818