1 /************************************************************/
2 /* */
3 /* prunebiostruc.c */
4 /* */
5 /* pulls out a single biopolymer chain from a complete */
6 /* biostruc object and writes it out to a file. */
7 /* Kenneth J. Addess August 25, 1998 */
8 /************************************************************/
9 /*
10 * $Log: prunebsc.c,v $
11 * Revision 6.4 2006/07/13 17:07:27 bollin
12 * removed unused variables
13 *
14 * Revision 6.3 2002/05/31 20:16:37 chenj
15 * PruneModel: Initialize molidx=NULL & if (molidx) Memfree(molidx)
16 *
17 * Revision 6.2 1999/11/06 15:56:15 addess
18 * got rid of two useless lines of code
19 *
20 * Revision 6.1 1999/05/07 20:01:09 kans
21 * renaming prunebiostruc to prunebsc
22 *
23 * Revision 6.4 1998/12/01 15:13:47 addess
24 * cleaned up code to remove memory leaks
25 *
26 * Revision 6.3 1998/11/28 20:43:53 addess
27 * clean up some code that leaked memory
28 *
29 * Revision 6.2 1998/10/14 17:33:52 addess
30 * *** empty log message ***
31 *
32 */
33
34 #include "prunebsc.h"
35
36 /*static void My_StringNCpy(CharPtr str1, CharPtr str2, Int4 len)
37 {
38 StringNCpy(str1,str2,len);
39 str1[len] = '\0';
40 }*/
41
42
43 /*static Boolean IsLayerOne(ValNodePtr pvn)
44 {
45 Boolean Layer = FALSE;
46 CharPtr comment_txt;
47 CharPtr LayerOne = {"This is Layer 1 Release"};
48 CharPtr temp_string;
49 Int4 len;
50
51 while (pvn)
52 {
53 if (pvn->choice == BiostrucDescr_pdb_comment)
54 {
55 comment_txt = (CharPtr)pvn->data.ptrvalue;
56 temp_string = StringSave(LayerOne);
57 len = StringLen(LayerOne);
58 My_StringNCpy(temp_string, &comment_txt[18], len);
59 if (!StringICmp(temp_string, LayerOne))
60 {
61 Layer = TRUE;
62 break;
63 }
64 }
65 pvn = pvn->next;
66 }
67
68 return Layer;
69 }
70 */
71
72 /*static Int4 NumberOfBioChains(MoleculeGraphPtr mgp)
73 {
74 ValNodePtr vnp;
75 Int4 mtype, nbp = 0;
76
77 while (mgp != NULL)
78 {
79 vnp = ValNodeFindNext(mgp->descr, NULL, BiomolDescr_molecule_type);
80
81 if (vnp) mtype = vnp->data.intvalue;
82
83 switch(mtype)
84 {
85 case 1:
86 case 2:
87 case 3:
88 nbp++;
89 break;
90 }
91
92 mgp = mgp->next;
93 }
94
95 return nbp;
96 }*/
97
MakeBioGraphPtr(MoleculeGraphPtr mgp)98 static MoleculeGraphPtr MakeBioGraphPtr(MoleculeGraphPtr mgp)
99 {
100 MoleculeGraphPtr newbp, bp = NULL, currentbp;
101 ValNodePtr vnp;
102 Int4 mtype;
103
104 while (mgp != NULL)
105 {
106 vnp = ValNodeFindNext(mgp->descr, NULL, BiomolDescr_molecule_type);
107
108 if (vnp) mtype = vnp->data.intvalue;
109
110 switch(mtype)
111 {
112 case 1:
113 case 2:
114 case 3:
115 newbp = MoleculeGraphNew();
116 newbp->id = mgp->id;
117 newbp->descr = mgp->descr;
118 newbp->seq_id = mgp->seq_id;
119 newbp->residue_sequence = mgp->residue_sequence;
120 newbp->inter_residue_bonds = mgp->inter_residue_bonds;
121 if (bp == NULL)
122 {
123 bp = newbp;
124 }
125 else
126 {
127 currentbp->next = newbp;
128 }
129 currentbp = newbp;
130 break;
131 }
132
133 mgp = mgp->next;
134 }
135
136 return bp;
137 }
138
139 /*static Int4 NumberOfHetChains(MoleculeGraphPtr mgp, MoleculeGraphPtr bp)
140 {
141 ValNodePtr vnp;
142 Int4 mtype, molecule_id, nhet = 0;
143 CharPtr mname;
144
145 while (mgp != NULL)
146 {
147 vnp = ValNodeFindNext(mgp->descr, NULL, BiomolDescr_molecule_type);
148
149 if (vnp) mtype = vnp->data.intvalue;
150
151 switch(mtype)
152 {
153 case 6:
154 if (vnp = ValNodeFindNext(mgp->descr, NULL, BiomolDescr_name))
155 mname = vnp->data.ptrvalue;
156 molecule_id = atoi(mname);
157
158 if (isBiopoly(molecule_id, bp))
159 {
160 nhet++;
161 }
162 break;
163 }
164 mgp = mgp->next;
165 }
166 return nhet;
167 }
168 */
MakeHetGraphPtr(MoleculeGraphPtr mgp,MoleculeGraphPtr bp)169 static MoleculeGraphPtr MakeHetGraphPtr(MoleculeGraphPtr mgp, MoleculeGraphPtr bp)
170 {
171 MoleculeGraphPtr newhet = NULL, het = NULL, currenthet;
172 ValNodePtr vnp;
173 Int4 mtype, molecule_id;
174 CharPtr mname;
175
176 while (mgp != NULL)
177 {
178 vnp = ValNodeFindNext(mgp->descr, NULL, BiomolDescr_molecule_type);
179
180 if (vnp) mtype = vnp->data.intvalue;
181
182 switch(mtype)
183 {
184 case 6:
185 vnp = ValNodeFindNext(mgp->descr, NULL, BiomolDescr_name);
186
187 if (vnp) mname = vnp->data.ptrvalue;
188
189 molecule_id = atoi(mname);
190 if (isBiopoly(molecule_id, bp))
191 {
192 newhet = MoleculeGraphNew();
193 newhet->id = mgp->id;
194 newhet->descr = mgp->descr;
195 newhet->seq_id = mgp->seq_id;
196 newhet->residue_sequence = mgp->residue_sequence;
197 newhet->inter_residue_bonds = mgp->inter_residue_bonds;
198 if (het == NULL)
199 {
200 het = newhet;
201 }
202 else
203 {
204 currenthet->next = newhet;
205 }
206 currenthet = newhet;
207 }
208 break;
209 }
210
211 mgp = mgp->next;
212 }
213
214 return het;
215 }
216
PruneFeatureSet(BiostrucFeatureSetPtr bsfsp,Int4 id)217 static BiostrucFeatureSetPtr PruneFeatureSet(BiostrucFeatureSetPtr bsfsp, Int4 id)
218 {
219 BiostrucFeatureSetPtr pbsfs;
220 BiostrucFeaturePtr bsfpDie, bsfp, bsfpnew = NULL;
221 ResidueIntervalPntrPtr ripp;
222 ResiduePntrsPtr rpp;
223 ChemGraphPntrsPtr cgpp;
224
225 pbsfs = BiostrucFeatureSetNew();
226 pbsfs->id = bsfsp->id;
227 bsfsp->id = 0;
228 pbsfs->descr = bsfsp->descr;
229 pbsfs->features = NULL;
230 bsfp = bsfsp->features;
231
232 while (bsfp)
233 {
234 cgpp = (ChemGraphPntrsPtr)bsfp->Location_location->data.ptrvalue;
235 rpp = (ResiduePntrsPtr)cgpp->data.ptrvalue;
236 ripp = (ResidueIntervalPntrPtr)rpp->data.ptrvalue;
237
238 if (ripp->molecule_id == id)
239 {
240 if (pbsfs->features == NULL)
241 {
242 pbsfs->features = bsfp;
243 bsfpnew = pbsfs->features;
244 bsfp = bsfp->next;
245 bsfpnew->next = NULL;
246 }
247 else
248 {
249 bsfpnew->next = bsfp;
250 bsfpnew = bsfp;
251 bsfp = bsfp->next;
252 bsfpnew->next = NULL;
253 }
254 }
255 else
256 {
257 bsfpDie = bsfp;
258 bsfp = bsfp->next;
259 bsfpDie = NULL;
260 }
261 }
262 return pbsfs;
263 }
264
PruneModel(BiostrucModelPtr bsmp,ValNodePtr pvnIds,Int4 id)265 static BiostrucModelPtr PruneModel(BiostrucModelPtr bsmp, ValNodePtr pvnIds, Int4 id)
266 {
267 BiostrucModelPtr bsmp2;
268 ValNodePtr pvnCoordinates, pvnLiteral, pvnCoordinatesNew, pvnLiteralNew;
269 ValNodePtr pvnMolId, pvnResId, pvnAtmId, pvnId;
270 ValNodePtr pvnX, pvnY, pvnZ;
271 ValNodePtr prev = NULL, current = NULL;
272 ModelCoordinateSetPtr mcs, mcsDie, mcsnew = NULL;
273 SurfaceCoordinatesPtr scp;
274 AtomicCoordinatesPtr acp, acpnew;
275 ResidueIntervalPntrPtr ripp;
276 ResiduePntrsPtr rpp;
277 AtomPntrsPtr atmp;
278 ModelSpacePointsPtr stmp;
279 Int4 numatoms, index, new_numatoms = 0;
280 Boolean *molidx = NULL, found;
281
282 bsmp2 = BiostrucModelNew();
283 bsmp2->id = bsmp->id;
284 bsmp2->type = bsmp->type;
285 bsmp2->descr = bsmp->descr;
286 bsmp->descr = NULL;
287 bsmp2->model_space = bsmp->model_space;
288 bsmp->model_space = NULL;
289 bsmp2->model_coordinates = NULL;
290
291 mcs = bsmp->model_coordinates;
292
293 while (mcs)
294 {
295 if (mcs->descr)
296 {
297 pvnCoordinates = mcs->Coordinates_coordinates;
298 pvnLiteral = pvnCoordinates->data.ptrvalue;
299 scp = (SurfaceCoordinatesPtr)pvnLiteral->data.ptrvalue;
300 rpp = (ResiduePntrsPtr)scp->contents->data.ptrvalue;
301 ripp = (ResidueIntervalPntrPtr)rpp->data.ptrvalue;
302
303 if (ripp->molecule_id == id)
304 {
305 if (!bsmp2->model_coordinates)
306 {
307 bsmp2->model_coordinates = mcs;
308 mcsnew = bsmp2->model_coordinates;
309 mcs = mcs->next;
310 mcsnew->next = NULL;
311 }
312 else
313 {
314 mcsnew->next = mcs;
315 mcsnew = mcs;
316 mcs = mcs->next;
317 mcsnew->next = NULL;
318 }
319 }
320 else
321 {
322 mcsDie = mcs;
323 mcs = mcs->next;
324 mcsDie = NULL;
325 }
326 }
327 else
328 {
329 pvnCoordinates = mcs->Coordinates_coordinates;
330 pvnLiteral = pvnCoordinates->data.ptrvalue;
331 acp = (AtomicCoordinatesPtr)pvnLiteral->data.ptrvalue;
332 atmp = acp->atoms;
333 stmp = acp->sites;
334 numatoms = atmp->number_of_ptrs;
335 molidx = (Boolean *)MemNew(numatoms * sizeof(Boolean));
336
337 for (index = 0, pvnMolId = atmp->molecule_ids;
338 pvnMolId != NULL; index++, pvnMolId = pvnMolId->next)
339 {
340 pvnId = pvnIds;
341 found = FALSE;
342 while (pvnId)
343 {
344 if (pvnMolId->data.intvalue == pvnId->data.intvalue)
345 {
346 found = TRUE;
347 break;
348 }
349 pvnId = pvnId->next;
350 }
351 if (found)
352 {
353 molidx[index] = TRUE;
354 new_numatoms++;
355 }
356 else molidx[index] = FALSE;
357 }
358
359 acpnew = AtomicCoordinatesNew();
360 acpnew->number_of_points = new_numatoms;
361 acpnew->atoms = AtomPntrsNew();
362 acpnew->atoms->number_of_ptrs = new_numatoms;
363 pvnMolId = atmp->molecule_ids;
364 for (index = 0; index < numatoms; index++)
365 {
366 if (molidx[index] == TRUE)
367 {
368 current = pvnMolId;
369 pvnMolId = pvnMolId->next;
370 current->next = NULL;
371 }
372 else
373 {
374 pvnMolId = pvnMolId->next;
375 continue;
376 }
377 if (acpnew->atoms->molecule_ids == NULL)
378 acpnew->atoms->molecule_ids = current;
379 else
380 prev->next = current;
381 prev = current;
382 }
383 prev = NULL;
384 current = NULL;
385 pvnResId = atmp->residue_ids;
386 for (index = 0; index < numatoms; index++)
387 {
388 if (molidx[index] == TRUE)
389 {
390 current = pvnResId;
391 pvnResId = pvnResId->next;
392 current->next = NULL;
393 }
394 else
395 {
396 pvnResId = pvnResId->next;
397 continue;
398 }
399 if (acpnew->atoms->residue_ids == NULL)
400 acpnew->atoms->residue_ids = current;
401 else
402 prev->next = current;
403 prev = current;
404 }
405 prev = NULL;
406 current = NULL;
407 pvnAtmId = atmp->atom_ids;
408 for (index = 0; index < numatoms; index++)
409 {
410 if (molidx[index] == TRUE)
411 {
412 current = pvnAtmId;
413 pvnAtmId = pvnAtmId->next;
414 current->next = NULL;
415 }
416 else
417 {
418 pvnAtmId = pvnAtmId->next;
419 continue;
420 }
421 if (acpnew->atoms->atom_ids == NULL)
422 acpnew->atoms->atom_ids = current;
423 else
424 prev->next = current;
425 prev = current;
426 }
427 prev = NULL;
428 current = NULL;
429 acpnew->sites = ModelSpacePointsNew();
430 acpnew->sites->scale_factor = 1000;
431 pvnX = stmp->x;
432 for (index = 0; index < numatoms; index++)
433 {
434 if (molidx[index] == TRUE)
435 {
436 current = pvnX;
437 pvnX = pvnX->next;
438 current->next = NULL;
439 }
440 else
441 {
442 pvnX = pvnX->next;
443 continue;
444 }
445 if (acpnew->sites->x == NULL)
446 acpnew->sites->x = current;
447 else
448 prev->next = current;
449 prev = current;
450 }
451 prev = NULL;
452 current = NULL;
453 pvnY = stmp->y;
454 for (index = 0; index < numatoms; index++)
455 {
456 if (molidx[index] == TRUE)
457 {
458 current = pvnY;
459 pvnY = pvnY->next;
460 current->next = NULL;
461 }
462 else
463 {
464 pvnY = pvnY->next;
465 continue;
466 }
467 if (acpnew->sites->y == NULL)
468 acpnew->sites->y = current;
469 else
470 prev->next = current;
471 prev = current;
472 }
473 prev = NULL;
474 current = NULL;
475 pvnZ = stmp->z;
476 for (index = 0; index < numatoms; index++)
477 {
478 if (molidx[index] == TRUE)
479 {
480 current = pvnZ;
481 pvnZ = pvnZ->next;
482 current->next = NULL;
483 }
484 else
485 {
486 pvnZ = pvnZ->next;
487 continue;
488 }
489 if (acpnew->sites->z == NULL)
490 acpnew->sites->z = current;
491 else
492 prev->next = current;
493 prev = current;
494 }
495
496 pvnLiteralNew = ValNodeNew(NULL);
497 pvnLiteralNew->choice = 1;
498 pvnLiteralNew->data.ptrvalue = (Pointer)acpnew;
499 pvnCoordinatesNew = ValNodeNew(NULL);
500 pvnCoordinatesNew->choice = 1;
501 pvnCoordinatesNew->data.ptrvalue = (Pointer)pvnLiteralNew;
502 mcsnew = ModelCoordinateSetNew();
503 mcsnew->id = mcs->id;
504 mcsnew->descr = NULL;
505 mcsnew->Coordinates_coordinates = pvnCoordinatesNew;
506
507 if (!bsmp2->model_coordinates)
508 {
509 bsmp2->model_coordinates = mcsnew;
510 mcs = mcs->next;
511 }
512 }
513 }
514
515 if (molidx) MemFree(molidx);
516 return bsmp2;
517 }
518
PruneBiostruc(BiostrucPtr bsp,CharPtr chain)519 BiostrucPtr LIBCALL PruneBiostruc(BiostrucPtr bsp, CharPtr chain)
520 {
521 ValNodePtr vnp, pvnId = NULL, pvnIds = NULL;
522 CharPtr chnid, hetid, feature_name;
523 Int4 mol_id, hetnum, molId1, molId2;
524 BiostrucPtr bsp2;
525 BiostrucGraphPtr bsgp;
526 BiostrucModelPtr bsmp, bsmpHead = NULL, bsmpTail;
527 BiostrucFeatureSetPtr bsfsp, bsfspHead = NULL, bsfspTail;
528 MoleculeGraphPtr bp, het, currenthet, currentbp;
529 InterResidueBondPtr pirb, pirbnew;
530 Boolean found1, found2;
531
532 if (!bsp)
533 {
534 return NULL;
535 }
536
537 bsgp = bsp->chemical_graph;
538 /*bsmp = bsp->model;*/
539
540 bp = MakeBioGraphPtr(bsgp->molecule_graphs);
541
542 /* nhet = NumberOfHetChains(bsgp->molecule_graphs, bp);*/
543
544 het = MakeHetGraphPtr(bsgp->molecule_graphs, bp);
545
546 bsp2 = BiostrucNew();
547 bsp2->id = bsp->id;
548 bsp->id = NULL;
549 vnp = ValNodeNew(NULL);
550 vnp = bsp->descr;
551 bsp2->descr = vnp;
552 bsp->descr = NULL;
553 bsp2->chemical_graph = BiostrucGraphNew();
554
555 /* Pull out the relevant parts of the molecule_graphs */
556
557 for (currentbp = bp; currentbp != NULL; currentbp = currentbp->next)
558 {
559 if (vnp = ValNodeFindNext(currentbp->descr, NULL, BiomolDescr_name))
560 chnid = vnp->data.ptrvalue;
561
562 if (*chnid == *chain)
563 {
564 bsp2->chemical_graph->molecule_graphs = currentbp;
565 mol_id = currentbp->id;
566 ValNodeAddInt(&pvnIds, 0, mol_id);
567 currentbp->next = NULL;
568 break;
569 }
570 }
571
572 while (het)
573 {
574 if (vnp = ValNodeFindNext(het->descr, NULL, BiomolDescr_name))
575 hetid = vnp->data.ptrvalue;
576
577 hetnum = atoi(hetid);
578
579 if (hetnum == mol_id)
580 {
581 if (!bsp2->chemical_graph->molecule_graphs->next)
582 {
583 bsp2->chemical_graph->molecule_graphs->next = het;
584 currenthet = bsp2->chemical_graph->molecule_graphs->next;
585 ValNodeAddInt(&pvnIds, 0, currenthet->id);
586 het = het->next;
587 currenthet->next = NULL;
588 }
589 else
590 {
591 currenthet->next = het;
592 currenthet = het;
593 ValNodeAddInt(&pvnIds, 0, currenthet->id);
594 het = het->next;
595 currenthet->next = NULL;
596 }
597 }
598 else het = het->next;
599 }
600
601 bsp->chemical_graph->molecule_graphs = NULL;
602 bsp2->chemical_graph->descr = bsp->chemical_graph->descr;
603 bsp->chemical_graph->descr = NULL;
604 bsp2->chemical_graph->residue_graphs = bsp->chemical_graph->residue_graphs;
605 bsp->chemical_graph->residue_graphs = NULL;
606
607 pirb = bsp->chemical_graph->inter_molecule_bonds;
608
609 while (pirb)
610 {
611 molId1 = pirb->atom_id_1->molecule_id;
612 molId2 = pirb->atom_id_2->molecule_id;
613 found1 = FALSE;
614 found2 = FALSE;
615 pvnId = pvnIds;
616
617 while (pvnId)
618 {
619 if (pvnId->data.intvalue == molId1)
620 {
621 found1 = TRUE;
622 break;
623 }
624 pvnId = pvnId->next;
625 }
626
627 pvnId = pvnIds;
628 while (pvnId)
629 {
630 if (pvnId->data.intvalue == molId2)
631 {
632 found2 = TRUE;
633 break;
634 }
635 pvnId = pvnId->next;
636 }
637
638 if ((found1) && (found2))
639 {
640 if (!bsp2->chemical_graph->inter_molecule_bonds)
641 {
642 bsp2->chemical_graph->inter_molecule_bonds = pirb;
643 pirbnew = bsp2->chemical_graph->inter_molecule_bonds;
644 pirb = pirb->next;
645 pirbnew->next = NULL;
646 }
647 else
648 {
649 pirbnew->next = pirb;
650 pirbnew = pirb;
651 pirb = pirb->next;
652 pirbnew->next = NULL;
653 }
654 }
655 else pirb = pirb->next;
656 }
657
658 bsp->chemical_graph->inter_molecule_bonds = NULL;
659
660 bsfsp = bsp->features;
661 while (bsfsp)
662 {
663 if (vnp = ValNodeFindNext(bsfsp->descr, NULL, BiostrucFeatureSetDescr_name))
664 feature_name = vnp->data.ptrvalue;
665
666 if ((!StringICmp("NCBI assigned secondary structure", feature_name)) ||
667 (!StringICmp("NCBI Domains", feature_name)) ||
668 (!StringICmp("PDB secondary structure", feature_name)))
669 {
670 if (!bsfspHead)
671 {
672 bsfspHead = PruneFeatureSet(bsfsp, mol_id);
673 bsfspTail = bsfspHead;
674 bsfspTail->next = NULL;
675 }
676 else
677 {
678 bsfspTail->next = PruneFeatureSet(bsfsp, mol_id);
679 bsfspTail = bsfspTail->next;
680 bsfspTail->next = NULL;
681 }
682 }
683
684 bsfsp = bsfsp->next;
685 }
686 bsp2->features = bsfspHead;
687 bsp->features = NULL;
688
689 bsmp = bsp->model;
690
691 while (bsmp)
692 {
693 if (!bsmpHead)
694 {
695 bsmpHead = PruneModel(bsmp, pvnIds, mol_id);
696 bsmpTail = bsmpHead;
697 bsmpTail->next = NULL;
698 }
699 else
700 {
701 bsmpTail->next = PruneModel(bsmp, pvnIds, mol_id);
702 bsmpTail = bsmpTail->next;
703 bsmpTail->next = NULL;
704 }
705
706 bsmp = bsmp->next;
707 }
708
709 bsp2->model = bsmpHead;
710 bsp->model = NULL;
711 bsp->chemical_graph = NULL;
712 BiostrucFree(bsp);
713 ValNodeFree(pvnIds);
714 return bsp2;
715 }
716
717