1 /************************************************************/
2 /*							    */
3 /*	prunebiostruc.c 				    */
4 /*							    */
5 /*	pulls out a single biopolymer chain from a complete */
6 /*	biostruc object  and writes it out to a file.       */
7 /*	Kenneth J. Addess August 25, 1998		    */
8 /************************************************************/
9 /*
10 * $Log: prunebsc.c,v $
11 * Revision 6.4  2006/07/13 17:07:27  bollin
12 * removed unused variables
13 *
14 * Revision 6.3  2002/05/31 20:16:37  chenj
15 * PruneModel: Initialize molidx=NULL & if (molidx) Memfree(molidx)
16 *
17 * Revision 6.2  1999/11/06 15:56:15  addess
18 * got rid of two useless lines of code
19 *
20 * Revision 6.1  1999/05/07 20:01:09  kans
21 * renaming prunebiostruc to prunebsc
22 *
23 * Revision 6.4  1998/12/01 15:13:47  addess
24 * cleaned up code to remove memory leaks
25 *
26  * Revision 6.3  1998/11/28  20:43:53  addess
27  * clean up some code that leaked memory
28  *
29  * Revision 6.2  1998/10/14  17:33:52  addess
30  * *** empty log message ***
31  *
32 */
33 
34 #include "prunebsc.h"
35 
36 /*static void My_StringNCpy(CharPtr str1, CharPtr str2, Int4 len)
37 {
38   StringNCpy(str1,str2,len);
39   str1[len] = '\0';
40 }*/
41 
42 
43 /*static Boolean IsLayerOne(ValNodePtr pvn)
44 {
45   Boolean Layer = FALSE;
46   CharPtr comment_txt;
47   CharPtr LayerOne = {"This is Layer 1 Release"};
48   CharPtr temp_string;
49   Int4 len;
50 
51   while (pvn)
52   {
53      if (pvn->choice == BiostrucDescr_pdb_comment)
54      {
55         comment_txt = (CharPtr)pvn->data.ptrvalue;
56         temp_string = StringSave(LayerOne);
57         len = StringLen(LayerOne);
58         My_StringNCpy(temp_string, &comment_txt[18], len);
59         if (!StringICmp(temp_string, LayerOne))
60         {
61           Layer = TRUE;
62           break;
63         }
64      }
65      pvn = pvn->next;
66    }
67 
68    return Layer;
69 }
70 */
71 
72 /*static Int4 NumberOfBioChains(MoleculeGraphPtr mgp)
73 {
74   ValNodePtr vnp;
75   Int4 mtype, nbp = 0;
76 
77   while (mgp != NULL)
78   {
79       vnp = ValNodeFindNext(mgp->descr, NULL, BiomolDescr_molecule_type);
80 
81       if (vnp) mtype = vnp->data.intvalue;
82 
83      switch(mtype)
84      {
85 	 case 1:
86 	 case 2:
87 	 case 3:
88 	    nbp++;
89 	    break;
90      }
91 
92       mgp = mgp->next;
93   }
94 
95   return nbp;
96 }*/
97 
MakeBioGraphPtr(MoleculeGraphPtr mgp)98 static MoleculeGraphPtr MakeBioGraphPtr(MoleculeGraphPtr mgp)
99 {
100   MoleculeGraphPtr newbp, bp = NULL, currentbp;
101   ValNodePtr vnp;
102   Int4 mtype;
103 
104   while (mgp != NULL)
105    {
106       vnp = ValNodeFindNext(mgp->descr, NULL, BiomolDescr_molecule_type);
107 
108       if (vnp) mtype = vnp->data.intvalue;
109 
110       switch(mtype)
111       {
112 	 case 1:
113 	 case 2:
114 	 case 3:
115 	    newbp = MoleculeGraphNew();
116 	    newbp->id = mgp->id;
117 	    newbp->descr = mgp->descr;
118 	    newbp->seq_id = mgp->seq_id;
119             newbp->residue_sequence = mgp->residue_sequence;
120 	    newbp->inter_residue_bonds = mgp->inter_residue_bonds;
121 	    if (bp == NULL)
122 	    {
123 	       bp = newbp;
124             }
125 	    else
126 	    {
127 	       currentbp->next = newbp;
128             }
129 	    currentbp = newbp;
130 	    break;
131       }
132 
133       mgp = mgp->next;
134    }
135 
136   return bp;
137 }
138 
139 /*static Int4 NumberOfHetChains(MoleculeGraphPtr mgp, MoleculeGraphPtr bp)
140 {
141   ValNodePtr vnp;
142   Int4 mtype, molecule_id, nhet = 0;
143   CharPtr mname;
144 
145   while (mgp != NULL)
146   {
147     vnp = ValNodeFindNext(mgp->descr, NULL, BiomolDescr_molecule_type);
148 
149     if (vnp) mtype = vnp->data.intvalue;
150 
151     switch(mtype)
152     {
153       case 6:
154       if (vnp = ValNodeFindNext(mgp->descr, NULL, BiomolDescr_name))
155       mname = vnp->data.ptrvalue;
156       molecule_id = atoi(mname);
157 
158       if (isBiopoly(molecule_id, bp))
159       {
160         nhet++;
161       }
162       break;
163     }
164    mgp = mgp->next;
165   }
166   return nhet;
167 }
168 */
MakeHetGraphPtr(MoleculeGraphPtr mgp,MoleculeGraphPtr bp)169 static MoleculeGraphPtr MakeHetGraphPtr(MoleculeGraphPtr mgp, MoleculeGraphPtr bp)
170 {
171   MoleculeGraphPtr newhet = NULL, het = NULL, currenthet;
172   ValNodePtr vnp;
173   Int4 mtype, molecule_id;
174   CharPtr mname;
175 
176   while (mgp != NULL)
177   {
178       vnp = ValNodeFindNext(mgp->descr, NULL, BiomolDescr_molecule_type);
179 
180       if (vnp) mtype = vnp->data.intvalue;
181 
182       switch(mtype)
183       {
184         case 6:
185 	  vnp = ValNodeFindNext(mgp->descr, NULL, BiomolDescr_name);
186 
187 	  if (vnp) mname = vnp->data.ptrvalue;
188 
189           molecule_id = atoi(mname);
190 	  if (isBiopoly(molecule_id, bp))
191 	  {
192 	    newhet = MoleculeGraphNew();
193 	    newhet->id = mgp->id;
194 	    newhet->descr = mgp->descr;
195 	    newhet->seq_id = mgp->seq_id;
196             newhet->residue_sequence = mgp->residue_sequence;
197 	    newhet->inter_residue_bonds = mgp->inter_residue_bonds;
198 	    if (het == NULL)
199 	    {
200 	      het = newhet;
201             }
202 	    else
203 	    {
204 	      currenthet->next = newhet;
205             }
206 	    currenthet = newhet;
207 	  }
208 	  break;
209       }
210 
211    mgp = mgp->next;
212   }
213 
214   return het;
215 }
216 
PruneFeatureSet(BiostrucFeatureSetPtr bsfsp,Int4 id)217 static BiostrucFeatureSetPtr PruneFeatureSet(BiostrucFeatureSetPtr bsfsp, Int4 id)
218 {
219   BiostrucFeatureSetPtr pbsfs;
220   BiostrucFeaturePtr bsfpDie, bsfp, bsfpnew = NULL;
221   ResidueIntervalPntrPtr ripp;
222   ResiduePntrsPtr rpp;
223   ChemGraphPntrsPtr cgpp;
224 
225   pbsfs = BiostrucFeatureSetNew();
226   pbsfs->id = bsfsp->id;
227   bsfsp->id = 0;
228   pbsfs->descr = bsfsp->descr;
229   pbsfs->features = NULL;
230   bsfp = bsfsp->features;
231 
232   while (bsfp)
233   {
234     cgpp = (ChemGraphPntrsPtr)bsfp->Location_location->data.ptrvalue;
235     rpp = (ResiduePntrsPtr)cgpp->data.ptrvalue;
236     ripp = (ResidueIntervalPntrPtr)rpp->data.ptrvalue;
237 
238     if (ripp->molecule_id == id)
239     {
240       if (pbsfs->features == NULL)
241       {
242         pbsfs->features = bsfp;
243         bsfpnew = pbsfs->features;
244         bsfp = bsfp->next;
245         bsfpnew->next = NULL;
246       }
247       else
248       {
249         bsfpnew->next = bsfp;
250         bsfpnew = bsfp;
251         bsfp = bsfp->next;
252         bsfpnew->next = NULL;
253       }
254     }
255     else
256     {
257       bsfpDie = bsfp;
258       bsfp = bsfp->next;
259       bsfpDie = NULL;
260     }
261   }
262   return pbsfs;
263 }
264 
PruneModel(BiostrucModelPtr bsmp,ValNodePtr pvnIds,Int4 id)265 static BiostrucModelPtr PruneModel(BiostrucModelPtr bsmp, ValNodePtr pvnIds, Int4 id)
266 {
267   BiostrucModelPtr bsmp2;
268   ValNodePtr pvnCoordinates, pvnLiteral, pvnCoordinatesNew, pvnLiteralNew;
269   ValNodePtr pvnMolId, pvnResId, pvnAtmId, pvnId;
270   ValNodePtr pvnX, pvnY, pvnZ;
271   ValNodePtr prev = NULL, current = NULL;
272   ModelCoordinateSetPtr mcs, mcsDie, mcsnew = NULL;
273   SurfaceCoordinatesPtr scp;
274   AtomicCoordinatesPtr acp, acpnew;
275   ResidueIntervalPntrPtr ripp;
276   ResiduePntrsPtr rpp;
277   AtomPntrsPtr atmp;
278   ModelSpacePointsPtr stmp;
279   Int4 numatoms, index, new_numatoms = 0;
280   Boolean *molidx = NULL, found;
281 
282   bsmp2 = BiostrucModelNew();
283   bsmp2->id = bsmp->id;
284   bsmp2->type = bsmp->type;
285   bsmp2->descr = bsmp->descr;
286   bsmp->descr = NULL;
287   bsmp2->model_space = bsmp->model_space;
288   bsmp->model_space = NULL;
289   bsmp2->model_coordinates = NULL;
290 
291   mcs = bsmp->model_coordinates;
292 
293   while (mcs)
294   {
295     if (mcs->descr)
296     {
297       pvnCoordinates = mcs->Coordinates_coordinates;
298       pvnLiteral = pvnCoordinates->data.ptrvalue;
299       scp = (SurfaceCoordinatesPtr)pvnLiteral->data.ptrvalue;
300       rpp = (ResiduePntrsPtr)scp->contents->data.ptrvalue;
301       ripp = (ResidueIntervalPntrPtr)rpp->data.ptrvalue;
302 
303       if (ripp->molecule_id == id)
304       {
305         if (!bsmp2->model_coordinates)
306         {
307           bsmp2->model_coordinates = mcs;
308           mcsnew = bsmp2->model_coordinates;
309           mcs = mcs->next;
310           mcsnew->next = NULL;
311         }
312         else
313         {
314           mcsnew->next = mcs;
315           mcsnew = mcs;
316           mcs = mcs->next;
317           mcsnew->next = NULL;
318         }
319       }
320       else
321       {
322         mcsDie = mcs;
323         mcs = mcs->next;
324         mcsDie = NULL;
325       }
326     }
327     else
328     {
329       pvnCoordinates = mcs->Coordinates_coordinates;
330       pvnLiteral = pvnCoordinates->data.ptrvalue;
331       acp = (AtomicCoordinatesPtr)pvnLiteral->data.ptrvalue;
332       atmp = acp->atoms;
333       stmp = acp->sites;
334       numatoms = atmp->number_of_ptrs;
335       molidx = (Boolean *)MemNew(numatoms * sizeof(Boolean));
336 
337       for (index = 0, pvnMolId = atmp->molecule_ids;
338            pvnMolId != NULL; index++, pvnMolId = pvnMolId->next)
339       {
340         pvnId = pvnIds;
341         found = FALSE;
342         while (pvnId)
343         {
344           if (pvnMolId->data.intvalue == pvnId->data.intvalue)
345           {
346             found = TRUE;
347             break;
348           }
349           pvnId = pvnId->next;
350         }
351         if (found)
352         {
353           molidx[index] = TRUE;
354           new_numatoms++;
355         }
356           else molidx[index] = FALSE;
357       }
358 
359       acpnew = AtomicCoordinatesNew();
360       acpnew->number_of_points = new_numatoms;
361       acpnew->atoms = AtomPntrsNew();
362       acpnew->atoms->number_of_ptrs = new_numatoms;
363       pvnMolId = atmp->molecule_ids;
364       for (index = 0; index < numatoms; index++)
365       {
366         if (molidx[index] == TRUE)
367         {
368           current = pvnMolId;
369           pvnMolId = pvnMolId->next;
370           current->next = NULL;
371         }
372         else
373         {
374           pvnMolId = pvnMolId->next;
375           continue;
376         }
377         if (acpnew->atoms->molecule_ids == NULL)
378 	  acpnew->atoms->molecule_ids = current;
379         else
380 	  prev->next = current;
381         prev = current;
382       }
383       prev = NULL;
384       current = NULL;
385       pvnResId = atmp->residue_ids;
386       for (index = 0; index < numatoms; index++)
387       {
388         if (molidx[index] == TRUE)
389         {
390           current = pvnResId;
391           pvnResId = pvnResId->next;
392           current->next = NULL;
393         }
394         else
395         {
396           pvnResId = pvnResId->next;
397           continue;
398         }
399         if (acpnew->atoms->residue_ids == NULL)
400 	  acpnew->atoms->residue_ids = current;
401         else
402 	  prev->next = current;
403         prev = current;
404       }
405       prev = NULL;
406       current = NULL;
407       pvnAtmId = atmp->atom_ids;
408       for (index = 0; index < numatoms; index++)
409       {
410         if (molidx[index] == TRUE)
411         {
412           current = pvnAtmId;
413           pvnAtmId = pvnAtmId->next;
414           current->next = NULL;
415         }
416         else
417         {
418           pvnAtmId = pvnAtmId->next;
419           continue;
420         }
421         if (acpnew->atoms->atom_ids == NULL)
422 	  acpnew->atoms->atom_ids = current;
423         else
424 	  prev->next = current;
425         prev = current;
426       }
427       prev = NULL;
428       current = NULL;
429       acpnew->sites = ModelSpacePointsNew();
430       acpnew->sites->scale_factor = 1000;
431       pvnX = stmp->x;
432       for (index = 0; index < numatoms; index++)
433       {
434         if (molidx[index] == TRUE)
435         {
436           current = pvnX;
437           pvnX = pvnX->next;
438           current->next = NULL;
439         }
440         else
441         {
442           pvnX = pvnX->next;
443           continue;
444         }
445         if (acpnew->sites->x == NULL)
446 	  acpnew->sites->x = current;
447         else
448 	  prev->next = current;
449         prev = current;
450       }
451       prev = NULL;
452       current = NULL;
453       pvnY = stmp->y;
454       for (index = 0; index < numatoms; index++)
455       {
456         if (molidx[index] == TRUE)
457         {
458           current = pvnY;
459           pvnY = pvnY->next;
460           current->next = NULL;
461         }
462         else
463         {
464           pvnY = pvnY->next;
465           continue;
466         }
467         if (acpnew->sites->y == NULL)
468 	  acpnew->sites->y = current;
469         else
470 	  prev->next = current;
471         prev = current;
472       }
473       prev = NULL;
474       current = NULL;
475       pvnZ = stmp->z;
476       for (index = 0; index < numatoms; index++)
477       {
478         if (molidx[index] == TRUE)
479         {
480           current = pvnZ;
481           pvnZ = pvnZ->next;
482           current->next = NULL;
483         }
484         else
485         {
486           pvnZ = pvnZ->next;
487           continue;
488         }
489         if (acpnew->sites->z == NULL)
490 	  acpnew->sites->z = current;
491         else
492 	  prev->next = current;
493         prev = current;
494       }
495 
496       pvnLiteralNew = ValNodeNew(NULL);
497       pvnLiteralNew->choice = 1;
498       pvnLiteralNew->data.ptrvalue = (Pointer)acpnew;
499       pvnCoordinatesNew = ValNodeNew(NULL);
500       pvnCoordinatesNew->choice = 1;
501       pvnCoordinatesNew->data.ptrvalue = (Pointer)pvnLiteralNew;
502       mcsnew = ModelCoordinateSetNew();
503       mcsnew->id = mcs->id;
504       mcsnew->descr = NULL;
505       mcsnew->Coordinates_coordinates = pvnCoordinatesNew;
506 
507       if (!bsmp2->model_coordinates)
508       {
509         bsmp2->model_coordinates = mcsnew;
510         mcs = mcs->next;
511       }
512     }
513   }
514 
515   if (molidx) MemFree(molidx);
516   return bsmp2;
517 }
518 
PruneBiostruc(BiostrucPtr bsp,CharPtr chain)519 BiostrucPtr LIBCALL PruneBiostruc(BiostrucPtr bsp, CharPtr chain)
520 {
521   ValNodePtr vnp, pvnId = NULL, pvnIds = NULL;
522   CharPtr chnid, hetid, feature_name;
523   Int4 mol_id, hetnum, molId1, molId2;
524   BiostrucPtr bsp2;
525   BiostrucGraphPtr bsgp;
526   BiostrucModelPtr bsmp, bsmpHead = NULL, bsmpTail;
527   BiostrucFeatureSetPtr bsfsp, bsfspHead = NULL, bsfspTail;
528   MoleculeGraphPtr bp, het, currenthet, currentbp;
529   InterResidueBondPtr pirb, pirbnew;
530   Boolean found1, found2;
531 
532   if (!bsp)
533   {
534     return NULL;
535   }
536 
537    bsgp = bsp->chemical_graph;
538   /*bsmp = bsp->model;*/
539 
540   bp =  MakeBioGraphPtr(bsgp->molecule_graphs);
541 
542 /* nhet = NumberOfHetChains(bsgp->molecule_graphs, bp);*/
543 
544   het = MakeHetGraphPtr(bsgp->molecule_graphs, bp);
545 
546   bsp2 = BiostrucNew();
547   bsp2->id = bsp->id;
548   bsp->id = NULL;
549   vnp = ValNodeNew(NULL);
550   vnp = bsp->descr;
551   bsp2->descr = vnp;
552   bsp->descr = NULL;
553   bsp2->chemical_graph = BiostrucGraphNew();
554 
555 /* Pull out the relevant parts of the molecule_graphs */
556 
557   for (currentbp = bp; currentbp != NULL; currentbp = currentbp->next)
558   {
559     if (vnp = ValNodeFindNext(currentbp->descr, NULL, BiomolDescr_name))
560       chnid = vnp->data.ptrvalue;
561 
562     if (*chnid == *chain)
563     {
564       bsp2->chemical_graph->molecule_graphs = currentbp;
565       mol_id = currentbp->id;
566       ValNodeAddInt(&pvnIds, 0, mol_id);
567       currentbp->next = NULL;
568       break;
569     }
570   }
571 
572   while (het)
573   {
574     if (vnp = ValNodeFindNext(het->descr, NULL, BiomolDescr_name))
575       hetid = vnp->data.ptrvalue;
576 
577     hetnum = atoi(hetid);
578 
579     if (hetnum == mol_id)
580     {
581       if (!bsp2->chemical_graph->molecule_graphs->next)
582       {
583         bsp2->chemical_graph->molecule_graphs->next = het;
584         currenthet = bsp2->chemical_graph->molecule_graphs->next;
585         ValNodeAddInt(&pvnIds, 0, currenthet->id);
586         het = het->next;
587         currenthet->next = NULL;
588       }
589       else
590       {
591         currenthet->next = het;
592         currenthet = het;
593         ValNodeAddInt(&pvnIds, 0, currenthet->id);
594         het = het->next;
595         currenthet->next = NULL;
596       }
597     }
598     else het = het->next;
599   }
600 
601   bsp->chemical_graph->molecule_graphs = NULL;
602   bsp2->chemical_graph->descr = bsp->chemical_graph->descr;
603   bsp->chemical_graph->descr = NULL;
604   bsp2->chemical_graph->residue_graphs = bsp->chemical_graph->residue_graphs;
605   bsp->chemical_graph->residue_graphs = NULL;
606 
607   pirb = bsp->chemical_graph->inter_molecule_bonds;
608 
609   while (pirb)
610   {
611     molId1 = pirb->atom_id_1->molecule_id;
612     molId2 = pirb->atom_id_2->molecule_id;
613     found1 = FALSE;
614     found2 = FALSE;
615     pvnId = pvnIds;
616 
617     while (pvnId)
618     {
619       if (pvnId->data.intvalue == molId1)
620       {
621         found1 = TRUE;
622         break;
623       }
624       pvnId = pvnId->next;
625     }
626 
627     pvnId = pvnIds;
628     while (pvnId)
629     {
630       if (pvnId->data.intvalue == molId2)
631       {
632         found2 = TRUE;
633         break;
634       }
635       pvnId = pvnId->next;
636     }
637 
638     if ((found1) && (found2))
639     {
640       if (!bsp2->chemical_graph->inter_molecule_bonds)
641       {
642         bsp2->chemical_graph->inter_molecule_bonds = pirb;
643         pirbnew = bsp2->chemical_graph->inter_molecule_bonds;
644         pirb = pirb->next;
645         pirbnew->next = NULL;
646       }
647       else
648       {
649         pirbnew->next = pirb;
650         pirbnew = pirb;
651         pirb = pirb->next;
652         pirbnew->next = NULL;
653       }
654     }
655     else pirb = pirb->next;
656   }
657 
658   bsp->chemical_graph->inter_molecule_bonds = NULL;
659 
660   bsfsp = bsp->features;
661   while (bsfsp)
662   {
663     if (vnp = ValNodeFindNext(bsfsp->descr, NULL, BiostrucFeatureSetDescr_name))
664        feature_name = vnp->data.ptrvalue;
665 
666     if ((!StringICmp("NCBI assigned secondary structure", feature_name)) ||
667        (!StringICmp("NCBI Domains", feature_name)) ||
668        (!StringICmp("PDB secondary structure", feature_name)))
669     {
670       if (!bsfspHead)
671       {
672         bsfspHead = PruneFeatureSet(bsfsp, mol_id);
673         bsfspTail = bsfspHead;
674         bsfspTail->next = NULL;
675       }
676       else
677       {
678         bsfspTail->next = PruneFeatureSet(bsfsp, mol_id);
679         bsfspTail = bsfspTail->next;
680         bsfspTail->next = NULL;
681       }
682     }
683 
684     bsfsp = bsfsp->next;
685   }
686   bsp2->features = bsfspHead;
687   bsp->features = NULL;
688 
689   bsmp = bsp->model;
690 
691   while (bsmp)
692   {
693     if (!bsmpHead)
694     {
695       bsmpHead = PruneModel(bsmp, pvnIds, mol_id);
696       bsmpTail = bsmpHead;
697       bsmpTail->next = NULL;
698     }
699     else
700     {
701       bsmpTail->next = PruneModel(bsmp, pvnIds, mol_id);
702       bsmpTail = bsmpTail->next;
703       bsmpTail->next = NULL;
704     }
705 
706     bsmp = bsmp->next;
707   }
708 
709   bsp2->model = bsmpHead;
710   bsp->model = NULL;
711   bsp->chemical_graph = NULL;
712   BiostrucFree(bsp);
713   ValNodeFree(pvnIds);
714   return bsp2;
715 }
716 
717