1 #include <maputil.h>
2 #include <objsub.h>
3
4 /**************************************************
5 *
6 * free_enzyme_list(enp)
7 * Free a list of EnzDataPtr
8 *
9 **************************************************/
free_enzyme_list(EnzPtr enp)10 NLM_EXTERN EnzPtr free_enzyme_list (EnzPtr enp)
11 {
12 EnzDataPtr edp;
13 EnzPtr next;
14
15 while(enp != NULL)
16 {
17 next = enp->next;
18 edp = enp->data.ptrvalue;
19 MemFree(edp->name);
20 MemFree(edp->pattern);
21 MemFree(edp);
22
23 MemFree(enp);
24 enp = next;
25 }
26
27 return enp;
28
29 }
30
31 /****************************************************
32 *
33 * make_enzyme_list(file_name)
34 * build a ValNodeList of EnzDataPtr from teh
35 * input file
36 *
37 ****************************************************/
make_enzyme_list(CharPtr file_name)38 NLM_EXTERN EnzPtr make_enzyme_list(CharPtr file_name)
39 {
40 Char buff[100];
41 Char name[100];
42 Char pattern[100];
43 long cut_pos;
44 EnzPtr head = NULL, e_new;
45 EnzDataPtr data;
46 Uint1 order =0;
47 FILE *ifp;
48
49 if(file_name == NULL)
50 return NULL;
51 if((ifp = FileOpen(file_name, "r")) == NULL){
52 Message(MSG_ERROR, "fail to open Enzyme file %s", file_name);
53 return NULL;
54 }
55
56 while(FileGets(buff, 99, ifp) != NULL){
57 sscanf(buff, "%s\t%s\t%ld\n", name, pattern, &cut_pos);
58 ++order;
59 data = (EnzDataPtr)MemNew(sizeof(EnzData));
60 data->name = StringSave(name);
61 data->pattern = StringSave(pattern);
62 data->cut_pos = 1;
63 /**data->cut_pos = cut_pos;**/
64
65 e_new = ValNodeNew(head);
66 e_new->choice = order;
67 e_new->data.ptrvalue = data;
68 if(head == NULL)
69 head = e_new;
70 }/**end of while**/
71
72 FileClose(ifp);
73 return head;
74 }
75
76
77 /**********************************************************************
78 *
79 * FreeEquivAlign(ealign_list)
80 * Free a list of Seq-annot that is of type Seq-align
81 * mostly, those are the externally loaded Seq-align for showing
82 * the Equiv map
83 *
84 ***********************************************************************/
FreeEquivAlign(ValNodePtr ealign_list)85 NLM_EXTERN ValNodePtr FreeEquivAlign(ValNodePtr ealign_list)
86 {
87 SeqAnnotPtr annot;
88 ValNodePtr next;
89
90 while(ealign_list)
91 {
92 if(ealign_list->choice == 1)
93 {
94 annot = (SeqAnnotPtr)(ealign_list->data.ptrvalue);
95 SeqAnnotFree(annot);
96 }
97 next = ealign_list->next;
98 MemFree(ealign_list);
99 ealign_list = next;
100 }
101
102 return NULL;
103 }
104
105
106 /*******************************************************************
107 *
108 * GetEquivAlignType(annot)
109 * annot stores the alignments of markers mapped by more than
110 * one groups. if return 1, the markers are the consistent markers
111 * if return 2, the markers are inconsistent markers
112 * if return 0, the alignment simply records the mapping to
113 * the sequence map
114 * if return -1, unknown status. Will be treated the same as 1
115 *
116 *******************************************************************/
GetEquivAlignType(SeqAnnotPtr annot)117 NLM_EXTERN Int2 GetEquivAlignType(SeqAnnotPtr annot)
118 {
119 ValNodePtr vnp;
120 UserObjectPtr uop;
121 UserFieldPtr ufp;
122 ObjectIdPtr oip;
123
124 if(annot == NULL)
125 return -1;
126 for(vnp = annot->desc; vnp != NULL; vnp = vnp->next)
127 {
128 if(vnp->choice == Annot_descr_user)
129 {
130 uop = vnp->data.ptrvalue;
131 if(uop != NULL)
132 {
133 oip = uop->type;
134 if(oip->str && StringCmp(oip->str,
135 "Equiv Alignment") == 0)
136 {
137 ufp = uop->data;
138 if(ufp->choice == 2)
139 return (Int2)(ufp->data.intvalue);
140 }
141 }
142 }
143 }
144 return -1;
145 }
146
147
148 /***********************************************************************
149 *
150 * FreeMuskSep(sep_list)
151 * Free the manually loaded Seq-entries
152 * sep_list: a list of MuskSepPtr
153 *
154 ***********************************************************************/
FreeMuskSep(ValNodePtr sep_list)155 NLM_EXTERN ValNodePtr FreeMuskSep(ValNodePtr sep_list)
156 {
157 ValNodePtr next;
158 MuskSepPtr msp;
159
160 while(sep_list)
161 {
162 next = sep_list->next;
163 msp = (MuskSepPtr)(sep_list->data.ptrvalue);
164 switch(msp->datatype)
165 {
166 case OBJ_SEQENTRY:
167 SeqEntryFree((SeqEntryPtr)(msp->dataptr));
168 break;
169 case OBJ_SEQSUB :
170 SeqSubmitFree(msp->dataptr);
171 break;
172 case OBJ_BIOSEQ :
173 BioseqFree((BioseqPtr)(msp->dataptr));
174 break;
175 case OBJ_BIOSEQSET:
176 BioseqSetFree((BioseqSetPtr)(msp->dataptr));
177 break;
178 default:
179 break;
180 }
181
182 MemFree(msp);
183 MemFree(sep_list);
184 sep_list = next;
185 }
186 return NULL;
187 }
188
189
190 /**********************************************************************
191 *
192 * get_Bioseq_type(bsp)
193 * return the type of Bioseq, such as the genetic or physical map
194 * depending on its bsp->repr type and bsp->seq_ext type.
195 *
196 **********************************************************************/
get_Bioseq_type(BioseqPtr bsp)197 NLM_EXTERN Uint1 get_Bioseq_type(BioseqPtr bsp)
198 {
199 SeqFeatPtr sfp;
200 SeqAnnotPtr annot;
201
202 switch (bsp->repr)
203 {
204 case Seq_repr_map:
205 sfp = bsp->seq_ext;
206 if(sfp == NULL)
207 {
208 annot = bsp->annot;
209 while(annot)
210 {
211 if(annot->type == 1)
212 sfp = annot->data;
213 if(sfp != NULL)
214 break;
215 }
216 }
217 if(sfp == NULL)
218 return PHYSICAL_MAP;
219 if(sfp->data.choice == 1)
220 return GENETIC_MAP;
221 if(sfp->data.choice == 13)
222 return RESTRICTION_MAP;
223 if(sfp->data.choice == 14)
224 return CYTO_MAP;
225 return PHYSICAL_MAP;
226
227 case Seq_repr_virtual:
228 return PHYSICAL_MAP;
229 case Seq_repr_seg:
230 case Seq_repr_ref:
231 case Seq_repr_delta:
232 return SEG_SEQ;
233 default:
234 return RAW_SEQ;
235 }
236 }
237
238
239 /********************************************************************
240 *
241 * MapLayoutFree(head)
242 * Free the list of MapPosPtr
243 *
244 ********************************************************************/
MapLayoutFree(MapLayoutPtr head)245 NLM_EXTERN MapLayoutPtr MapLayoutFree(MapLayoutPtr head)
246 {
247 ValNodeFreeData(head);
248 return NULL;
249 }
250
251
252 /***************************************************************************
253 *
254 * getBioseqNumbering(bsp)
255 * get the numbering object from Seq_descr. If no numbering, return NULL
256 *
257 ****************************************************************************/
getBioseqNumbering(BioseqPtr bsp)258 NLM_EXTERN NumberingPtr getBioseqNumbering (BioseqPtr bsp)
259 {
260 NumberingPtr np = NULL;
261 ValNodePtr anp;
262
263 anp = BioseqGetSeqDescr(bsp, Seq_descr_num, NULL);
264 if(anp != NULL)
265 np = (NumberingPtr)anp->data.ptrvalue;
266 if(np != NULL)
267 {
268 if(np->choice != Numbering_real)
269 return NULL;
270 if(np->data.ptrvalue == NULL)
271 return NULL;
272 }
273
274
275 return np;
276 }
277
278 /**********************************************************************
279 *
280 * IS_NUM_GENE(gene_label): kludge function
281 * determine if the gene_label is used as a map unit, such
282 * as the case for C.elegans physical map
283 *
284 **********************************************************************/
IS_NUM_GENE(CharPtr gene_label)285 NLM_EXTERN Boolean IS_NUM_GENE(CharPtr gene_label)
286 {
287 return (gene_label[0]=='0' || atol(gene_label) != 0 );
288 }
289
290 /***********************************************************************
291 *
292 * map_unit_label(): create a label for the map unit
293 *
294 ***********************************************************************/
map_unit_label(Int4 pos,NumberingPtr np,CharPtr label,Boolean use_kb)295 NLM_EXTERN Boolean map_unit_label(Int4 pos, NumberingPtr np, CharPtr label, Boolean use_kb)
296 {
297 DataVal num_val;
298 FloatHi m_val;
299
300 if(np == NULL)
301 {
302 if(use_kb)
303 pos = pos/1000;
304 if(use_kb && pos != 0)
305 sprintf(label, "%ldK", (long) pos);
306 else
307 sprintf(label, "%ld", (long) pos);
308 return FALSE;
309 }
310 else
311 {
312 NumberingValue(np, pos, &num_val); /**Add the last line**/
313 m_val = num_val.realvalue;
314 sprintf(label, "%2.2Lf", (long double) m_val);
315 return TRUE;
316 }
317 }
318
319
start_new_stack(Int4 pre_pos,Int4 pos,Int4 scale,Int2Ptr label_width,Int2 c_width)320 NLM_EXTERN Boolean start_new_stack(Int4 pre_pos, Int4 pos, Int4 scale, Int2Ptr label_width, Int2 c_width)
321 {
322 Int2 max_width;
323 Int4 space;
324
325 max_width = MAX(c_width, *label_width);
326 if(pre_pos != -1 && (*label_width > 0)) /*not the first mark*/
327 {
328 space = (ABS(pre_pos - pos))/scale;
329 if(space < (max_width/2 +2))
330 return FALSE;
331 }
332
333 *label_width = c_width;
334 return TRUE;
335 }
336
337 /*************************************************************************
338 *
339 * is_label_match(obj_id, label)
340 * return TRUE if obj_id->str matches with label
341 *
342 **************************************************************************/
343
is_label_match(ObjectIdPtr obj_id,CharPtr label)344 NLM_EXTERN Boolean is_label_match(ObjectIdPtr obj_id, CharPtr label)
345 {
346 if(obj_id !=NULL && obj_id->str !=NULL)
347 return (StringCmp(obj_id->str, label) == 0);
348 return FALSE;
349 }
350
351
352 /*************************************************************************
353 *
354 * get_band_type, get_band_name(): kludge functions associated with
355 * the cytogenetic map, i.e., the band pattern and the name of the
356 * cytogenetic map
357 *
358 **************************************************************************/
get_uop_type(UserObjectPtr uop)359 static Uint1 get_uop_type(UserObjectPtr uop)
360 {
361 ObjectIdPtr obj_id;
362
363 obj_id = uop->type;
364 if(is_label_match(obj_id, "BND"))
365 return BND;
366 if(is_label_match(obj_id, "HET"))
367 return HET;
368 if(is_label_match(obj_id, "TEL"))
369 return TEL;
370 if(is_label_match(obj_id, "CEN"))
371 return CEN;
372 return 0;
373 }
374
get_band_type(UserObjectPtr uop)375 NLM_EXTERN Uint1 get_band_type(UserObjectPtr uop)
376 {
377 UserFieldPtr ufp;
378 Uint1 type;
379
380 type = get_uop_type(uop);
381 if(type !=0)
382 return type;
383
384 while(uop)
385 {
386 if(uop->data)
387 {
388 ufp = uop->data;
389 while(ufp)
390 {
391 if(is_label_match(ufp->label, "BandType"))
392 {
393 if(StringCmp(ufp->data.ptrvalue, "GiemsaPos") ==0)
394 return GIEMSA_POS;
395 if(StringCmp(ufp->data.ptrvalue, "GiemsaNeg") ==0)
396 return GIEMSA_NEG;
397 if(StringCmp(ufp->data.ptrvalue, "Acrocentric") ==0)
398 return ACRO_CENTRIC;
399 if(StringCmp(ufp->data.ptrvalue, "Point") ==0)
400 return BAND_POINT;
401 if(StringCmp(ufp->data.ptrvalue, "VariableReg") ==0)
402 return VARIABLE_REG;
403 return 0;
404 }
405 ufp = ufp->next;
406 }
407 }
408
409 uop = uop->next;
410 }
411 return 0;
412 }
413
414
415
416 /*********************************************************************
417 *
418 * get_band_name(uop)
419 * parse the band name from a cytogenetic map
420 *
421 *********************************************************************/
get_band_name(UserObjectPtr uop)422 NLM_EXTERN CharPtr get_band_name(UserObjectPtr uop)
423 {
424 UserFieldPtr ufp;
425
426 while(uop)
427 {
428 for(ufp = uop->data; ufp!=NULL; ufp=ufp->next)
429 {
430 if(is_label_match(ufp->label, "BandName"))
431 return (CharPtr)(ufp->data.ptrvalue);
432 }
433 uop = uop->next;
434 }
435 return NULL;
436 }
437
438
439 /************************************************************************
440 *
441 * is_map_segment(slp)
442 * return TRUE if slp is a Seq-loc from a amp
443 * return FALSE if it is not a map or the Bioseq is not loaded to
444 * the memory yet
445 *
446 ************************************************************************/
is_map_segment(SeqLocPtr slp)447 NLM_EXTERN Boolean is_map_segment(SeqLocPtr slp)
448 {
449 BioseqPtr bsp=NULL;
450 SeqLocPtr loc;
451 SeqIdPtr sip;
452 ObjectIdPtr oip;
453
454 if(slp->choice == SEQLOC_NULL || slp->choice == SEQLOC_EMPTY)
455 return TRUE;
456 sip = SeqLocId(slp);
457 if(sip == NULL)
458 return TRUE;
459 if(sip->choice == SEQID_LOCAL)
460 {
461 oip = sip->data.ptrvalue;
462 if(oip && oip->str)
463 {
464 if(StringNCmp(oip->str, "virtual", 7) == 0)
465 return TRUE;
466 if(StringNCmp(oip->str, "virtural", 7) == 0)
467 return TRUE;
468 }
469 }
470 bsp = BioseqFindCore(sip);
471 if(bsp !=NULL)
472 {
473 if(bsp->repr == Seq_repr_map)
474 return TRUE;
475 if(bsp->repr == Seq_repr_virtual)
476 return TRUE;
477 if(bsp->repr == Seq_repr_seg)
478 {
479 loc = bsp->seq_ext;
480 return is_map_segment(loc);
481 }
482
483 }
484
485 return FALSE;
486 }
487
488 /***************************************************************
489 *
490 * figure_map_seqid(ext_loc)
491 * a very unreliable way to figure out the if there is a
492 * Seq-id for the map. It is dependent on the frequency of
493 * the Seq-id in a segmented sequence
494 *
495 ****************************************************************/
496 typedef struct mapseqid_count{
497 SeqIdPtr sip;
498 Int4 count;
499 }MapSeqIdCount, PNTR MapSeqIdCountPtr;
500 #define COUNT_NUM 20
501
figure_map_seqid(SeqLocPtr ext_loc)502 NLM_EXTERN SeqIdPtr figure_map_seqid(SeqLocPtr ext_loc)
503 {
504 MapSeqIdCount id_count[COUNT_NUM];
505 Int4 i, j, maxcount, total, c_total;
506 Int4 n_maxcount;
507 SeqIdPtr sip;
508 BioseqPtr bsp;
509
510 MemSet((Pointer)id_count, 0, (size_t)COUNT_NUM * sizeof(MapSeqIdCount));
511 i =0; j= 0;
512 while(ext_loc)
513 {
514 ++i;
515 sip = SeqLocId(ext_loc);
516 for(j =0; j<COUNT_NUM; ++j)
517 {
518 if(id_count[j].sip == NULL)
519 {
520 id_count[j].sip = sip;
521 id_count[j].count = 1;
522 break;
523 }
524 else
525 {
526 if(SeqIdMatch(id_count[j].sip, sip))
527 {
528 ++(id_count[j].count);
529 break;
530 }
531 }
532 }
533 ext_loc = ext_loc->next;
534 }
535 total = i;
536
537 /* if(total < COUNT_NUM)
538 return NULL; */
539 c_total = MIN(total, COUNT_NUM);
540 maxcount = 0;
541 n_maxcount = 0;
542 j = 0;
543 for(i = 0; i<c_total; ++i)
544 {
545 if(id_count[i].count > maxcount)
546 {
547 j = i;
548 n_maxcount = maxcount;
549 maxcount = id_count[i].count;
550 }
551 }
552 sip = id_count[j].sip;
553 if(sip == NULL)
554 return NULL;
555 if(sip->choice != SEQID_LOCAL && sip->choice != SEQID_GENERAL)
556 return NULL;
557 bsp = BioseqFind(sip);
558 if(bsp != NULL)
559 {
560 if(bsp->repr == Seq_repr_raw || bsp->repr == Seq_repr_const)
561 return NULL;
562 if(bsp->repr == Seq_repr_map || bsp->repr == Seq_repr_virtual)
563 return sip;
564 }
565
566
567 if((FloatHi)maxcount/(FloatHi)total > 0.3)
568 return (id_count[j].sip);
569 if(maxcount>n_maxcount)
570 {
571 if(sip->choice == SEQID_LOCAL || sip->choice == SEQID_GENERAL)
572 return sip;
573 else
574 return NULL;
575 }
576
577 return NULL;
578 }
579
580
ck_cyto_type(SeqFeatPtr sfp)581 NLM_EXTERN Uint1 ck_cyto_type(SeqFeatPtr sfp)
582 {
583 UserObjectPtr uop;
584 Uint1 band;
585
586 if(sfp->data.choice == 14)
587 {
588 uop = sfp->data.value.ptrvalue;
589 band = get_band_type(uop);
590 if(band >= BND && band <= CEN)
591 return FLY_CYTO;
592 if(band >= BAND_POINT && band <= VARIABLE_REG)
593 return HUMAN_CYTO;
594 }
595
596 return 0;
597 }
598
599
600
601 /********************************************************************
602 *
603 * SortAlignPosition(app, dim)
604 * Sort out the order of a multiple alignment in the vertical
605 * display mode. It is sorted to the descending order of
606 * app->top. one app correspond to one aligned segment. It can
607 * be of multiple dimensions
608 * app: alignment position
609 * dim: dimention of alignment
610 *
611 *********************************************************************/
SortAlignPosition(AlignPosPtr app,Int2 dim)612 NLM_EXTERN void SortAlignPosition(AlignPosPtr app, Int2 dim)
613 {
614 Int4 tmp_left, tmp_right, tmp_top, tmp_bottom;
615 Boolean s_witch = TRUE;
616 Int2 i, j;
617
618 for(i =0; i<dim-1 && s_witch; ++i) /*sort the order*/
619 {
620 s_witch = FALSE;
621 for(j = 0; j<dim-i-1; ++j)
622 {
623 if(app->top[j] > app->top[j+1])
624 {
625 s_witch = TRUE;
626 tmp_left = app->left[j];
627 tmp_right = app->right[j];
628 tmp_top = app->top[j];
629 tmp_bottom = app->bottom[j];
630
631 app->left[j] = app->left[j+1];
632 app->right[j] = app->right[j+1];
633 app->top[j] = app->top[j+1];
634 app->bottom[j] = app->bottom[j+1];
635
636 app->left[j+1] = tmp_left;
637 app->right[j+1] = tmp_right;
638 app->top[j+1] = tmp_top;
639 app->bottom[j+1] = tmp_bottom;
640 }
641 }
642 }
643 }
644
free_slp_list(ValNodePtr slp_list)645 NLM_EXTERN ValNodePtr free_slp_list(ValNodePtr slp_list)
646 {
647 ValNodePtr next;
648 SeqLocPtr slp;
649
650 while(slp_list)
651 {
652 next = slp_list->next;
653 slp = (SeqLocPtr)(slp_list->data.ptrvalue);
654 if(slp != NULL)
655 SeqLocSetFree(slp);
656 slp_list->next = NULL;
657 MemFree(slp_list);
658 slp_list = next;
659 }
660
661 return NULL;
662 }
663
load_one_label(CharPtr label,ValNodePtr PNTR list,ValNodePtr PNTR prev)664 static void load_one_label(CharPtr label, ValNodePtr PNTR list, ValNodePtr PNTR prev)
665 {
666 ValNodePtr n_vnp;
667
668 if(label != NULL && label[0] != '\0')
669 {
670 n_vnp = ValNodeNew(NULL);
671 n_vnp->data.ptrvalue = StringSave(label);
672 if(*prev == NULL)
673 *list = n_vnp;
674 else
675 (*prev)->next = n_vnp;
676 *prev = n_vnp;
677 }
678 }
679
680
load_gene_list(SeqFeatPtr sfp,ValNodePtr PNTR list,ValNodePtr PNTR prev)681 static void load_gene_list(SeqFeatPtr sfp, ValNodePtr PNTR list, ValNodePtr PNTR prev)
682 {
683 ObjMgrPtr omp;
684 ObjMgrTypePtr omtp;
685 Char label[21];
686 GeneRefPtr grp;
687 ValNodePtr syn;
688 CharPtr str;
689 Uint2 subtype;
690 Uint1 val;
691 ValNodePtr vnp;
692 DbtagPtr db_tag;
693 ObjectIdPtr oip;
694
695
696 omp = ObjMgrGet();
697 omtp = ObjMgrTypeFind (omp, OBJ_SEQFEAT, NULL, NULL);
698 if(omp == NULL || omtp->labelfunc == NULL)
699 return;
700 if(omtp->subtypefunc == NULL)
701 return;
702
703
704 while(sfp)
705 {
706 subtype = (*(omtp->subtypefunc)) (sfp);
707 if(subtype == FEATDEF_GENE || subtype == FEATDEF_tRNA || subtype == FEATDEF_rep_origin || subtype == FEATDEF_CDS)
708 {
709 if(subtype == FEATDEF_CDS)
710 val = CDS_MARK;
711 else
712 val = GENE_MARK;
713 (*(omtp->labelfunc)) (sfp, label, 20, OM_LABEL_CONTENT);
714 if(label[0] != '\0')
715 load_one_label(label, list, prev);
716
717 /*add synonym for the Gene-ref*/
718 if(sfp->data.choice == 1)
719 {
720 grp = sfp->data.value.ptrvalue;
721 if(grp != NULL)
722 {
723 for(syn = grp->syn; syn != NULL; syn = syn->next)
724 {
725 str = syn->data.ptrvalue;
726 if(str != NULL)
727 load_one_label(str, list, prev);
728 }
729 for(vnp = grp->db; vnp != NULL; vnp = vnp->next)
730 {
731 db_tag = vnp->data.ptrvalue;
732 if(db_tag->db == NULL || StringICmp(db_tag->db, "GenBank"))
733 {
734 oip = db_tag->tag;
735 if(oip != NULL && oip->str)
736 load_one_label(oip->str, list, prev);
737 }
738 }
739 }
740 }
741 }
742 sfp = sfp->next;
743 }
744
745 }
746
747
get_seg_num(SeqLocPtr slp)748 NLM_EXTERN Int2 get_seg_num(SeqLocPtr slp)
749 {
750 Int2 i =0;
751
752 while(slp)
753 {
754 ++i;
755 slp = slp->next;
756 }
757
758 return i;
759 }
760
761 #define MAX_SEG_NUM 4 /*maximum segment number to search for features in segments*/
762 typedef struct findgene { /* used by FindGeneCallback */
763 SeqEntryPtr top; /* top seqentry for explore.. used to prevent recursion */
764 ValNodePtr PNTR list; /* the list of feature labels */
765 ValNodePtr PNTR prev;
766 } FindGeneStruct, PNTR FindGeneStructPtr;
767
FindGeneCallback(SeqEntryPtr sep,Pointer data,Int4 index,Int2 indent)768 static void FindGeneCallback(SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent)
769 {
770 ValNodePtr PNTR list, PNTR prev;
771 BioseqPtr bsp;
772 BioseqSetPtr bssp;
773 Int2 segnum;
774 Boolean check_seg = FALSE;
775 SeqEntryPtr s_sep;
776 BioseqPtr s_bsp;
777 SeqLocPtr slp;
778 SeqIdPtr sip;
779 SeqFeatPtr sfp;
780 SeqAnnotPtr annot = NULL;
781 FindGeneStructPtr fgsp;
782 FindGeneStruct fgs;
783 SeqEntryPtr last = NULL;
784
785 fgsp = (FindGeneStructPtr)data;
786 list = fgsp->list;
787 prev = fgsp->prev;
788 if(sep == NULL || fgsp == NULL || list == NULL || prev == NULL)
789 return;
790
791 if(sep->choice == 1)
792 {
793 bsp = (BioseqPtr)(sep->data.ptrvalue);
794 if(bsp == NULL)
795 return;
796 if(bsp->repr == Seq_repr_seg)
797 {
798 segnum = get_seg_num((SeqLocPtr)(bsp->seq_ext));
799 if(segnum <= MAX_SEG_NUM || !BioseqHasFeature(bsp))
800 {
801 check_seg = TRUE;
802 for(slp = bsp->seq_ext; slp != NULL; slp = slp->next)
803 {
804 sip = SeqLocId(slp);
805 if(sip != NULL)
806 {
807 s_bsp = BioseqLockById(sip);
808 if(s_bsp != NULL)
809 {
810 s_sep = SeqEntryFind(s_bsp->id);
811 if((s_sep != NULL) && (s_sep != fgsp->top) && (s_sep != last))
812 {
813 fgs.top = s_sep;
814 fgs.list = list;
815 fgs.prev = prev;
816 last = s_sep;
817 SeqEntryExplore(s_sep, (Pointer) (&fgs), FindGeneCallback);
818 }
819 BioseqUnlock(s_bsp);
820 }
821 }
822 }
823 }
824 }
825 if(bsp->repr == Seq_repr_map)
826 {
827 sfp = bsp->seq_ext;
828 load_gene_list(sfp, list, prev);
829 }
830 if(check_seg == FALSE)
831 annot = bsp->annot;
832 }
833
834 else
835 {
836 bssp = (BioseqSetPtr)(sep->data.ptrvalue);
837 if(bssp == NULL)
838 return;
839 annot = bssp->annot;
840 }
841
842 while(annot)
843 {
844 if(annot->type == 1)
845 {
846 sfp = (SeqFeatPtr)(annot->data);
847 load_gene_list(sfp, list, prev);
848 }
849 annot = annot->next;
850 }
851
852 }
853
854
855 /*****************************************************************
856 *
857 * Build a list of gene symbols to supply the Find Gene option in
858 * the global view
859 *
860 ******************************************************************/
BuildGeneList(SeqEntryPtr sep)861 NLM_EXTERN ValNodePtr BuildGeneList(SeqEntryPtr sep)
862 {
863 ValNodePtr list = NULL;
864 ValNodePtr prev = NULL;
865 FindGeneStruct fgs;
866
867 if(sep == NULL)
868 return FALSE;
869
870 fgs.top = sep;
871 fgs.list = &list;
872 fgs.prev = &prev;
873
874 SeqEntryExplore(sep, (Pointer) (&fgs), FindGeneCallback);
875
876 return list;
877
878 }
879
880
881
882 typedef struct querydata {
883 ValNodePtr query_list;
884 GeneDataPtr gdp;
885 SeqLocPtr target;
886
887 ObjMgrPtr omp;
888 ObjMgrTypePtr omtp;
889 Char label[21];
890 }QueryData, PNTR QueryDataPtr;
891
892 /***************************************************************
893 *
894 * all the landmark genes need to be found ONLY ONCE. The user
895 * input data need to be found multiple times
896 *
897 ****************************************************************/
gene_is_loaded(GeneDataPtr gdp,CharPtr symbol)898 static Boolean gene_is_loaded(GeneDataPtr gdp, CharPtr symbol)
899 {
900 if(symbol == NULL || gdp == NULL)
901 return FALSE;
902
903 while(gdp)
904 {
905 if(gdp->symbol)
906 if(StringCmp(gdp->symbol, symbol) == 0)
907 return TRUE;
908 gdp = gdp->next;
909 }
910
911 return FALSE;
912 }
913
dup_seq_loc(SeqLocPtr slp)914 static SeqLocPtr dup_seq_loc(SeqLocPtr slp)
915 {
916 Int4 start, stop;
917 Uint1 strand;
918
919 if(slp == NULL)
920 return NULL;
921
922 start = SeqLocStart(slp);
923 stop = SeqLocStop(slp);
924 strand = SeqLocStrand(slp);
925
926 return SeqLocIntNew(start, stop, strand, SeqLocId(slp));
927 }
928
929
930
931 /*****************************************************************
932 *
933 * if sfp is a Gene-ref and contains the gene in g_list,
934 * return the string in g_list
935 * else return NULL
936 *
937 *****************************************************************/
check_landmark(SeqFeatPtr sfp,CharPtr mark)938 NLM_EXTERN Boolean check_landmark(SeqFeatPtr sfp, CharPtr mark)
939 {
940 GeneRefPtr grp;
941 ValNodePtr curr;
942 DbtagPtr db_tag;
943 ObjectIdPtr oip;
944
945 if(sfp == NULL || sfp->data.choice != 1)
946 return FALSE;
947 grp = sfp->data.value.ptrvalue;
948 if(grp != NULL)
949 {
950 if(grp->locus)
951 {
952 if(StringICmp(grp->locus, mark) == 0)
953 return TRUE;
954 }
955 if(grp->syn)
956 {
957 if(check_syn(grp->syn, mark))
958 return TRUE;
959 }
960 for(curr = grp->db; curr != NULL; curr = curr->next)
961 {
962 db_tag = curr->data.ptrvalue;
963 if(db_tag)
964 {
965 oip = db_tag->tag;
966 if(oip && oip->str)
967 {
968 if(StringICmp(oip->str, mark) == 0)
969 return TRUE;
970 }
971 }
972 }
973 }
974
975 return FALSE;
976 }
977
gmarkfunc(GatherContextPtr gcp)978 static Boolean gmarkfunc(GatherContextPtr gcp)
979 {
980 QueryDataPtr qdp;
981
982 GeneDataPtr gdp;
983 GeneRefPtr grp;
984 SeqFeatPtr sfp;
985 ValNodePtr syn = NULL;
986 Uint2 subtype;
987 ValNodePtr curr;
988 Boolean is_landmark;
989 CharPtr match_str, str;
990 Int4 start, stop;
991 Uint1 strand;
992
993 if(gcp == NULL)
994 return FALSE;
995 qdp = (QueryDataPtr)(gcp->userdata);
996 if(qdp == NULL)
997 return FALSE;
998
999 if(gcp->thistype != OBJ_SEQFEAT && gcp->thistype != OBJ_BIOSEQ_MAPFEAT)
1000 return TRUE;
1001
1002 sfp = (SeqFeatPtr)(gcp->thisitem);
1003 if(sfp == NULL)
1004 return TRUE;
1005
1006 qdp->label[0] = '\0';
1007 subtype = (*(qdp->omtp->subtypefunc)) (sfp);
1008 if(subtype == FEATDEF_GENE || subtype == FEATDEF_tRNA || subtype == FEATDEF_rep_origin || subtype == FEATDEF_CDS)
1009 {
1010 if(subtype == FEATDEF_GENE) /*load the synonom info*/
1011 {
1012 grp = sfp->data.value.ptrvalue;
1013 if(grp != NULL)
1014 syn = grp->syn;
1015 }
1016
1017 (*(qdp->omtp->labelfunc)) (sfp, qdp->label, 20, OM_LABEL_CONTENT);
1018 for(curr = qdp->query_list; curr != NULL; curr = curr->next)
1019 {
1020 str = curr->data.ptrvalue;
1021 match_str = NULL;
1022 if(StringICmp(qdp->label, str) == 0)
1023 match_str = str;
1024 else if(check_landmark(sfp, str))
1025 match_str = str;
1026 if(match_str)
1027 {
1028 is_landmark = (curr->choice != 0);
1029 if(!is_landmark || !gene_is_loaded(qdp->gdp, match_str))
1030 {
1031 gdp = MemNew(sizeof (GeneData));
1032 gdp->landmark = is_landmark;
1033 gdp->symbol = StringSave(match_str);
1034 gdp->entityID = gcp->entityID;
1035 gdp->itemID = gcp->itemID;
1036 gdp->itemType = gcp->thistype;
1037 gdp->subtype = subtype;
1038 gdp->sfp = sfp;
1039 if(qdp->target == NULL)
1040 gdp->location = dup_seq_loc(sfp->location);
1041 else
1042 {
1043 start = gcp->extremes.left;
1044 stop = gcp->extremes.right;
1045 strand = gcp->extremes.strand;
1046 gdp->location = SeqLocIntNew(start, stop, strand, SeqLocId(qdp->target));
1047 }
1048 LinkGeneData(&(qdp->gdp), gdp);
1049 }
1050 }
1051 }
1052 }
1053
1054 return TRUE;
1055
1056 }
1057
1058
1059 /******************************************************************
1060 *
1061 * load_gdata_marks(slp, gene_list, seglevels, sep, gdp)
1062 * Gather the current Seq-entry to create the corresponding list of
1063 * GeneDataPtr for the list of gene symbols
1064 * slp: the target Seq-local. can be set to NULL
1065 * gene_list: a list of query symbols
1066 * seglevels: levels of gather
1067 * sep: the Seq-entry
1068 * gdp: the header of GeneDataPtr
1069 *
1070 ********************************************************************/
load_gdata_marks(SeqLocPtr slp,ValNodePtr gene_list,Int2 seglevels,SeqEntryPtr sep,GeneDataPtr PNTR pgdp)1071 NLM_EXTERN Boolean load_gdata_marks(SeqLocPtr slp, ValNodePtr gene_list, Int2 seglevels, SeqEntryPtr sep, GeneDataPtr PNTR pgdp)
1072 {
1073 GatherScope gs;
1074 QueryData qd;
1075 ObjMgrPtr omp;
1076 ObjMgrTypePtr omtp;
1077
1078 if(gene_list == NULL || sep == NULL || pgdp == NULL)
1079 return FALSE;
1080
1081 omp = ObjMgrGet();
1082 if(omp == NULL)
1083 return FALSE;
1084 omtp = ObjMgrTypeFind(omp, OBJ_SEQFEAT, NULL, NULL);
1085 if(omtp == NULL)
1086 return FALSE;
1087
1088 qd.omp = omp;
1089 qd.omtp = omtp;
1090 qd.label[0] = '\0';
1091 qd.query_list = gene_list;
1092 qd.gdp = *pgdp;
1093 qd.target = slp;
1094
1095
1096 MemSet((Pointer)(&gs), 0, sizeof(GatherScope));
1097 MemSet((Pointer)(gs.ignore), (int)(TRUE), (size_t)OBJ_MAX * sizeof(Boolean));
1098 gs.ignore[OBJ_SEQFEAT] = FALSE;
1099 gs.ignore[OBJ_SEQANNOT] = FALSE;
1100 gs.ignore[OBJ_BIOSEQ_MAPFEAT] = FALSE;
1101 gs.nointervals = TRUE;
1102 gs.target = slp;
1103 if(slp != NULL)
1104 gs.get_feats_location = TRUE;
1105 gs.seglevels = seglevels;
1106
1107 GatherSeqEntry(sep, (Pointer)(&qd), gmarkfunc, &gs);
1108
1109 if(*pgdp == NULL)
1110 *pgdp = qd.gdp;
1111 return TRUE;
1112 }
1113
1114 typedef struct bsp_order {
1115 SeqIdPtr sip;
1116 Int4 order;
1117 Boolean found;
1118 }BspOrder, PNTR BspOrderPtr;
1119
FindBspOrder(SeqEntryPtr sep,Pointer data,Int4 index,Int2 indent)1120 static void FindBspOrder(SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent)
1121 {
1122 BspOrderPtr bop;
1123 BioseqPtr bsp;
1124
1125 if(sep->choice != 1)
1126 return;
1127 if(data == NULL)
1128 return;
1129
1130 bop = (BspOrderPtr)data;
1131 if(bop->found)
1132 return ;
1133
1134 bsp = sep->data.ptrvalue;
1135 ++(bop->order);
1136 bop->found = BioseqMatch(bsp, bop->sip);
1137
1138 }
1139
1140
get_current_priority(SeqIdPtr sip,SeqEntryPtr sep,Int4Ptr order)1141 static Boolean get_current_priority(SeqIdPtr sip, SeqEntryPtr sep, Int4Ptr order)
1142 {
1143 BspOrder bo;
1144
1145 bo.sip = sip;
1146 bo.order = 0;
1147 bo.found = FALSE;
1148
1149
1150 if(sep != NULL && sip != NULL)
1151 BioseqExplore(sep, (Pointer)&bo, FindBspOrder);
1152 *order = bo.order;
1153
1154 return bo.found;
1155
1156 }
1157
get_seglevels(BioseqPtr bsp)1158 NLM_EXTERN Int2 get_seglevels (BioseqPtr bsp)
1159 {
1160 SeqLocPtr slp;
1161
1162
1163 if(bsp && bsp->repr == Seq_repr_seg)
1164 {
1165 if(!BioseqHasFeature(bsp))
1166 return 1;
1167 slp = (SeqLocPtr)(bsp->seq_ext);
1168 if(get_seg_num(slp) <= 4) /*4 is the cut off for searching at the second level*/
1169 return 1;
1170 }
1171
1172 return 0;
1173 }
1174
1175 /*************************************************************************
1176 *
1177 * return the best location of the gene from sep.
1178 * best is defined as the Bioseq with the highest searching prioirity
1179 * e_start, e_stop record the extremes of the all the presence of
1180 * the gene
1181 *
1182 *************************************************************************/
get_location_for_query(SeqEntryPtr sep,CharPtr gene,Int4Ptr e_start,Int4Ptr e_stop)1183 NLM_EXTERN SeqLocPtr get_location_for_query(SeqEntryPtr sep, CharPtr gene, Int4Ptr e_start, Int4Ptr e_stop)
1184 {
1185 ValNode vn;
1186 GeneDataPtr gdp = NULL, cgdp, best;
1187 Int2 seglevels;
1188 BioseqPtr bsp;
1189 SeqLocPtr slp;
1190 SeqLoc sl;
1191 Int4Ptr priority_list;
1192 Int4 num;
1193 Int4 left = -1, right = -1;
1194 Int4 order;
1195
1196 if(sep == NULL || gene == NULL)
1197 return NULL;
1198
1199 bsp = find_big_bioseq(sep);
1200 if(bsp == NULL)
1201 return NULL;
1202 seglevels = get_seglevels(bsp);
1203
1204 vn.choice = 0;
1205 vn.data.ptrvalue = gene;
1206 vn.next = NULL;
1207
1208
1209 if(seglevels == 1)
1210 {
1211 sl.choice = SEQLOC_WHOLE;
1212 sl.data.ptrvalue = bsp->id;
1213 sl.next = NULL;
1214 slp = &sl;
1215 }
1216 else
1217 slp = NULL;
1218
1219 if(!load_gdata_marks(slp, &vn, seglevels, sep, &gdp))
1220 return NULL;
1221
1222 priority_list = get_priority_order(sep, &num);
1223 best = NULL;
1224 for(cgdp = gdp; cgdp != NULL; cgdp = cgdp->next)
1225 {
1226 cgdp->priority = 10000;
1227 if(cgdp->location != NULL)
1228 {
1229 if(left == -1)
1230 left = SeqLocStart(cgdp->location);
1231 else
1232 left = MIN(left, SeqLocStart(cgdp->location));
1233 if(right == -1)
1234 right = SeqLocStop(cgdp->location);
1235 else
1236 right = MAX(right, SeqLocStop(cgdp->location));
1237 }
1238
1239 if(num > 0 && priority_list != NULL)
1240 {
1241 if(get_current_priority(SeqLocId(cgdp->location), sep, &order))
1242 {
1243 if(order >=0 && order <= num)
1244 cgdp->priority = (Uint2)(priority_list[order-1]);
1245 }
1246 }
1247 if(best == NULL)
1248 best = cgdp;
1249 else
1250 {
1251 if(cgdp->priority < best->priority)
1252 best = cgdp;
1253 }
1254 }
1255
1256 if(best != NULL)
1257 {
1258 slp = best->location;
1259 best->location = NULL;
1260 }
1261 *e_start = left;
1262 *e_stop = right;
1263 GeneDataFree(gdp);
1264 return slp;
1265 }
1266
1267
1268
1269
1270 typedef struct ra_store{
1271 ValNodePtr rrp_list; /*a collected list for repeats*/
1272 ValNodePtr arp_list; /*a collected list for alignment*/
1273 ObjMgrPtr omp;
1274 ObjMgrTypePtr omtp;
1275 Char annotDB[21]; /*for storing the Seq-annot info*/
1276 Boolean load_align;
1277 Uint1 displayOrder;
1278 }RAStore, PNTR RAStorePtr;
1279
add_repeats_to_list(RepeatRegionPtr r_new,ValNodePtr PNTR list)1280 static void add_repeats_to_list(RepeatRegionPtr r_new, ValNodePtr PNTR list)
1281 {
1282 Uint1 max_order = 0;
1283 ValNodePtr curr, prev = NULL;
1284 RepeatRegionPtr rrp;
1285 Boolean found = FALSE;
1286 ValNodePtr vnp;
1287
1288
1289 if(*list == NULL)
1290 ValNodeAddPointer(list, 1, r_new);
1291 else
1292 {
1293 curr = *list;
1294 while(curr)
1295 {
1296 rrp = curr->data.ptrvalue;
1297 if(rrp->rep_name[0] == '\0' && r_new->rep_name[0] == '\0')
1298 found = TRUE;
1299 if(StringICmp(rrp->rep_name, r_new->rep_name) == 0)
1300 found = TRUE;
1301 if(found)
1302 {
1303 vnp = ValNodeNew(NULL);
1304 vnp->choice = curr->choice;
1305 vnp->data.ptrvalue = r_new;
1306 vnp->next = curr->next;
1307 curr->next = vnp;
1308 return;
1309 }
1310 else
1311 {
1312 max_order = MAX(max_order, curr->choice);
1313 prev = curr;
1314 }
1315 curr = curr->next;
1316 }
1317 vnp = ValNodeNew(prev);
1318 vnp->choice = max_order + 1;
1319 vnp->data.ptrvalue = r_new;
1320 }
1321 }
1322
1323 /*trying to parse the descriptor in Seq-annot for the alignment display*/
get_align_annot_qual(SeqAnnotPtr annot,CharPtr annotDB,Int4 buf_size,Uint1Ptr annot_type)1324 NLM_EXTERN Uint1 get_align_annot_qual(SeqAnnotPtr annot, CharPtr annotDB, Int4 buf_size, Uint1Ptr annot_type)
1325 {
1326 UserObjectPtr uop;
1327 ValNodePtr desc;
1328 ObjectIdPtr oip;
1329 UserFieldPtr ufp;
1330 Boolean match;
1331
1332 if(annot == NULL || annot->type !=2)
1333 return 0;
1334 desc = annot->desc;
1335 match = FALSE;
1336 *annot_type = 0;
1337 while(desc)
1338 {
1339 if(desc->choice == Annot_descr_user)
1340 {
1341 uop = desc->data.ptrvalue;
1342 while(uop)
1343 {
1344 if(uop->type)
1345 {
1346 oip = uop->type;
1347 if(StringCmp(oip->str, "Align Consist?") == 0)
1348 {
1349 *annot_type = ANNOT_CONSIST;
1350 match = TRUE;
1351 }
1352 else if(StringCmp(oip->str, "Blast Type") == 0)
1353 {
1354 match = TRUE;
1355 *annot_type = ANNOT_BLAST;
1356 }
1357 else if(StringCmp(oip->str, "FISH Align") == 0)
1358 *annot_type = ANNOT_FISH;
1359 if(match)
1360 {
1361 ufp = uop->data;
1362 if(ufp && ufp->choice == 2)
1363 {
1364 oip = ufp->label;
1365 if(oip->str && annotDB != NULL)
1366 {
1367 StringNCpy_0(annotDB, oip->str, buf_size);
1368 }
1369 return (Uint1)(ufp->data.intvalue);
1370 }
1371 }
1372 }
1373 uop = uop->next;
1374 }
1375 }
1376 desc = desc->next;
1377 }
1378
1379
1380 desc = annot->desc;
1381 while(desc)
1382 {
1383 if(desc->choice == Annot_descr_user)
1384 {
1385 uop = desc->data.ptrvalue;
1386 while(uop)
1387 {
1388 if(uop->type)
1389 {
1390 oip = uop->type;
1391 if(StringCmp(oip->str, "Hist Seqalign") == 0)
1392 {
1393 ufp = uop->data;
1394 if(ufp->choice == 4 && ufp->data.boolvalue)
1395 {
1396 oip = ufp->label;
1397 if(oip && oip->str && annotDB)
1398 {
1399 StringNCpy_0(annotDB, oip->str, buf_size);
1400 return 0;
1401 }
1402 }
1403 }
1404 }
1405 uop = uop->next;
1406 }
1407 }
1408 desc = desc->next;
1409 }
1410 return 0;
1411 }
1412
1413
load_new_interval(ValNodePtr PNTR intervals,Int4 left,Int4 right)1414 static void load_new_interval(ValNodePtr PNTR intervals, Int4 left, Int4 right)
1415 {
1416 GatherRangePtr grp, t_grp;
1417 ValNodePtr curr, prev, next, vnp;
1418 Int4 p_right;
1419
1420 prev = NULL;
1421 /*overlaps the existing segment*/
1422 curr = *intervals;
1423 while(curr)
1424 {
1425 grp = curr->data.ptrvalue;
1426 if(!(left > (grp->right+1) || right < (grp->left -1)))
1427 {
1428 grp->left = MIN(left, grp->left);
1429 grp->right = MAX(right, grp->right);
1430 vnp = curr;
1431 /*check for the following segment*/
1432 curr = curr->next;
1433 while(curr)
1434 {
1435 next = curr->next;
1436 t_grp = curr->data.ptrvalue;
1437 if(t_grp->left <= grp->right +1)
1438 {
1439 grp->right = t_grp->right;
1440 vnp->next = next;
1441 curr->next = NULL;
1442 ValNodeFreeData(curr);
1443 curr = next;
1444 }
1445 else
1446 break;
1447 }
1448 return;
1449 }
1450 curr = curr->next;
1451 }
1452
1453 /*No overlap*/
1454 grp = MemNew(sizeof(GatherRange));
1455 grp->left = left;
1456 grp->right = right;
1457 vnp = ValNodeNew(NULL);
1458 vnp->choice = 1;
1459 vnp->data.ptrvalue = grp;
1460 if(*intervals == NULL)
1461 {
1462 *intervals = vnp;
1463 return;
1464 }
1465
1466 curr = *intervals;
1467 p_right = -1;
1468 prev = NULL;
1469 while(curr)
1470 {
1471 grp = curr->data.ptrvalue;
1472 if(p_right == -1) /*it is the first segment*/
1473 {
1474 if(right < grp->left)
1475 {
1476 *intervals = vnp;
1477 vnp->next = curr;
1478 return;
1479 }
1480 }
1481 else if(left >= p_right+1 && right <= grp->left -1)
1482 {
1483 prev->next = vnp;
1484 vnp->next = curr;
1485 return;
1486 }
1487
1488 p_right = grp->right;
1489 prev = curr;
1490 curr = curr->next;
1491 }
1492
1493 prev->next = vnp;
1494 }
1495
add_alignment_to_list(AlignRegionPtr a_new,ValNodePtr PNTR list)1496 static Boolean add_alignment_to_list(AlignRegionPtr a_new, ValNodePtr PNTR list)
1497 {
1498 ValNodePtr curr;
1499 AlignRegionPtr arp;
1500 ValNodePtr vnp, prev;
1501
1502
1503 curr = *list;
1504 prev = NULL;
1505 while(curr)
1506 {
1507 arp = curr->data.ptrvalue;
1508 if(arp->displayOrder == a_new->displayOrder &&
1509 StringCmp(arp->seq_name, a_new->seq_name) == 0)
1510 {
1511 curr->choice += 1;
1512 arp->gr.left = MIN(arp->gr.left, a_new->gr.left);
1513 arp->gr.right = MAX(arp->gr.right, a_new->gr.right);
1514 load_new_interval(&(arp->intervals), a_new->gr.left, a_new->gr.right);
1515 if(a_new->score > arp->score)
1516 {
1517 arp->score = a_new->score;
1518 arp->p_val = a_new->p_val;
1519 arp->e_val = a_new->e_val;
1520 }
1521 MemFree(a_new);
1522 return FALSE;
1523 }
1524 prev = curr;
1525 curr = curr->next;
1526 }
1527
1528 vnp = ValNodeNew(prev);
1529 vnp->choice = 1;
1530 vnp->data.ptrvalue = a_new;
1531 if(prev == NULL)
1532 *list = vnp;
1533 load_new_interval(&(a_new->intervals), a_new->gr.left, a_new->gr.right);
1534 return TRUE;
1535 }
1536
1537
1538 /*determine the status as absolute values*/
get_alignment_status(FloatHi score)1539 static Uint1 get_alignment_status(FloatHi score)
1540 {
1541 if(score < 40.0)
1542 return 0;
1543 if(score <50.0)
1544 return 1;
1545 if(score < 80.0)
1546 return 2;
1547 if(score < 200)
1548 return 3;
1549 return 4;
1550 }
1551
ArpNodeCompProc(VoidPtr ptr1,VoidPtr ptr2)1552 static int LIBCALLBACK ArpNodeCompProc (VoidPtr ptr1, VoidPtr ptr2)
1553 {
1554 AlignRegionPtr arp1, arp2;
1555 ValNodePtr vnp1;
1556 ValNodePtr vnp2;
1557 GatherRange gr1, gr2;
1558
1559 if (ptr1 != NULL && ptr2 != NULL)
1560 {
1561 vnp1 = *((ValNodePtr PNTR) ptr1);
1562 vnp2 = *((ValNodePtr PNTR) ptr2);
1563 if (vnp1 != NULL && vnp2 != NULL)
1564 {
1565 arp1 = (AlignRegionPtr) vnp1->data.ptrvalue;
1566 arp2 = (AlignRegionPtr) vnp2->data.ptrvalue;
1567 if (arp1 != NULL && arp2 != NULL)
1568 {
1569 if(arp2->score > arp1->score)
1570 return 1;
1571 if(arp1->score > arp2->score)
1572 return -1;
1573
1574 gr1 = arp1->gr;
1575 gr2 = arp2->gr;
1576 if (gr1.left > gr2.left)
1577 return 1;
1578 else if (gr1.left < gr2.left)
1579 return -1;
1580 else if (gr1.right < gr2.right)
1581 return 1;
1582 else if (gr1.right > gr2.right)
1583 return -1;
1584 return 0;
1585 }
1586 }
1587 }
1588 return 0;
1589 }
1590
sort_arp_list(ValNodePtr arp_list)1591 static ValNodePtr sort_arp_list(ValNodePtr arp_list)
1592 {
1593 return SortValNode(arp_list, ArpNodeCompProc);
1594 }
1595
racollfunc(GatherContextPtr gcp)1596 static Boolean racollfunc (GatherContextPtr gcp)
1597 {
1598 RAStorePtr rasp;
1599 Uint2 subtype;
1600 RepeatRegionPtr rrp;
1601 AlignRegionPtr arp;
1602 AlignDataPtr adp;
1603 SeqAnnotPtr annot;
1604 Uint1 annot_type;
1605 Int4 score, number;
1606
1607
1608 rasp = (RAStorePtr)(gcp->userdata);
1609 if(rasp == NULL)
1610 return FALSE;
1611
1612 switch(gcp->thistype)
1613 {
1614 case OBJ_SEQANNOT:
1615 annot = (SeqAnnotPtr)(gcp->thisitem);
1616 if(annot->type == 2) /*it is a Seq-annot for alignment*/
1617 {
1618 rasp->load_align = is_annot_for_hist_alignment(annot);
1619 if(rasp->load_align)
1620 {
1621 rasp->annotDB[0] = '\0';
1622 get_align_annot_qual(annot, rasp->annotDB, 20, &annot_type);
1623 ++(rasp->displayOrder);
1624 }
1625 }
1626 return TRUE;
1627
1628 case OBJ_SEQFEAT:
1629 if(rasp->omtp == NULL)
1630 return TRUE;
1631 if (rasp->omtp->subtypefunc != NULL)
1632 {
1633 subtype = (*(rasp->omtp->subtypefunc)) (gcp->thisitem);
1634 if(subtype == FEATDEF_repeat_region || subtype == FEATDEF_repeat_unit)
1635 {
1636 rrp = MemNew(sizeof(RepeatRegion));
1637 MemCopy(&(rrp->gr), &(gcp->extremes), sizeof(GatherRange));
1638 (*(rasp->omtp->labelfunc)) (gcp->thisitem, rrp->rep_name, 19, OM_LABEL_CONTENT);
1639 add_repeats_to_list(rrp, &(rasp->rrp_list));
1640 }
1641 }
1642 return TRUE;
1643
1644 case OBJ_SEQALIGN:
1645 case OBJ_SEQHIST_ALIGN:
1646 if(gcp->thistype == OBJ_SEQALIGN)
1647 {
1648 if(gcp->parenttype == OBJ_SEQANNOT)
1649 {
1650 if(rasp->load_align == FALSE)
1651 return TRUE;
1652 }
1653 }
1654
1655 for(adp = gcp->adp; adp != NULL; adp = adp->next)
1656 {
1657 arp = MemNew(sizeof(AlignRegion));
1658 arp->annotDB[0] = '\0';
1659 arp->displayOrder = 0;
1660 score = 0;
1661 arp->score = -1.0;
1662 GetScoreAndEvalue((SeqAlignPtr)gcp->thisitem, &score, &(arp->score), &(arp->e_val), &number);
1663 if(arp->score <=0 && score > 0)
1664 {
1665 arp->score = (FloatHi)score;
1666 }
1667
1668 if(gcp->thistype == OBJ_SEQALIGN)
1669 {
1670 if(gcp->parenttype == OBJ_SEQANNOT)
1671 {
1672 arp->displayOrder = rasp->displayOrder;
1673 StringCpy(arp->annotDB, rasp->annotDB);
1674 }
1675 }
1676 MuskSeqIdWrite(adp->sip, arp->seq_name, 19, PRINTID_TEXTID_ACCESSION, FALSE, FALSE);
1677 MemCopy(&(arp->gr), &(adp->extremes), sizeof(GatherRange));
1678 add_alignment_to_list(arp, &(rasp->arp_list));
1679 }
1680
1681 return TRUE;
1682 default:
1683 return TRUE;
1684 }
1685 }
1686
1687 /* static Uint1 get_alignment_status(FloatHi score, FloatHi min_score, FloatHi max_score)
1688 {
1689 FloatHi val;
1690
1691 if(min_score == max_score)
1692 return 0;
1693 else
1694 {
1695 if(score == max_score)
1696 return 4;
1697
1698 val = (score - min_score)/(max_score - min_score);
1699 return (Uint1)(val * 5.0);
1700 }
1701 } */
1702
1703
1704
1705 /**************************************************************************
1706 *
1707 * collect_repeats_and_align(slp, rrp_list, arp_list, seglevels, sep)
1708 *
1709 * collect repeat features and alignment for global display
1710 * rrp_list: the list of the repeat features
1711 * arp_list: the list of the alignments
1712 *
1713 ***************************************************************************/
collect_repeats_and_align(SeqLocPtr slp,ValNodePtr PNTR rrp_list,ValNodePtr PNTR arp_list,Int2 seglevels,SeqEntryPtr sep,Uint1Ptr align_has_status)1714 NLM_EXTERN Boolean collect_repeats_and_align(SeqLocPtr slp, ValNodePtr PNTR rrp_list, ValNodePtr PNTR arp_list, Int2 seglevels, SeqEntryPtr sep, Uint1Ptr align_has_status)
1715 {
1716 GatherScope gs;
1717 RAStore ras;
1718 ObjMgrPtr omp;
1719 ObjMgrTypePtr omtp;
1720 AlignRegionPtr arp, n_arp;
1721 ValNodePtr prev, next, curr, p_last;
1722
1723 *align_has_status = FALSE;
1724 if(slp == NULL || sep == NULL || (rrp_list == NULL && arp_list == NULL))
1725 return FALSE;
1726
1727 omp = ObjMgrGet();
1728 if(omp == NULL)
1729 return FALSE;
1730 omtp = ObjMgrTypeFind(omp, OBJ_SEQFEAT, NULL, NULL);
1731 ras.omp = omp;
1732 ras.omtp = omtp;
1733 ras.rrp_list = NULL;
1734 ras.arp_list = NULL;
1735 ras.annotDB[0] = '\0';
1736 ras.displayOrder = 0;
1737 ras.load_align = TRUE;
1738
1739
1740 MemSet((Pointer)(&gs), 0, sizeof(GatherScope));
1741 MemSet((Pointer)(gs.ignore), (int)(TRUE), (size_t)OBJ_MAX * sizeof(Boolean));
1742 if(rrp_list != NULL)
1743 gs.ignore[OBJ_SEQFEAT] = FALSE;
1744 if(arp_list != NULL)
1745 gs.ignore[OBJ_SEQHIST_ALIGN] = FALSE;
1746 gs.ignore[OBJ_SEQHIST] = FALSE;
1747 gs.ignore[OBJ_SEQANNOT] = FALSE;
1748 gs.ignore[OBJ_SEQALIGN] = FALSE;
1749
1750 gs.nointervals = TRUE;
1751 gs.target = slp;
1752 gs.get_feats_location = TRUE;
1753 gs.seglevels = seglevels;
1754 gs.mapinsert = TRUE;
1755
1756 GatherSeqEntry(sep, (Pointer)(&ras), racollfunc, &gs);
1757 if(rrp_list != NULL)
1758 *rrp_list = ras.rrp_list;
1759 if(arp_list != NULL)
1760 {
1761 *arp_list = ras.arp_list;
1762 if(*arp_list != NULL)
1763 {
1764 /*sort arp_list in groups according to the displayOrder*/
1765 curr = *arp_list;
1766 p_last = NULL;
1767 while(curr)
1768 {
1769 arp = curr->data.ptrvalue;
1770 next = curr->next;
1771 prev = curr;
1772 while(next)
1773 {
1774 n_arp = next->data.ptrvalue;
1775 if(n_arp->displayOrder == arp->displayOrder)
1776 {
1777 prev = next;
1778 next = next->next;
1779 }
1780 else
1781 break;
1782 }
1783 prev->next = NULL;
1784 curr = sort_arp_list(curr);
1785 if(p_last == NULL)
1786 *arp_list = curr;
1787 else
1788 p_last->next = curr;
1789
1790 while(curr->next != NULL)
1791 curr = curr->next;
1792 p_last = curr;
1793 curr = next;
1794 }
1795
1796
1797 for(curr = *arp_list; curr != NULL; curr = curr->next)
1798 {
1799 arp = curr->data.ptrvalue;
1800 arp->status = get_alignment_status(arp->score);
1801 if(arp->status > 0)
1802 *align_has_status = TRUE;
1803 }
1804 }
1805 }
1806 /* *max_displayOrder = ras.displayOrder; */
1807 return TRUE;
1808 }
1809
1810
1811
1812
1813 /*###################################################################
1814 #
1815 # functions related to make_Bioseq_list
1816 #
1817 ####################################################################*/
1818
FindBspCallback(SeqEntryPtr sep,Pointer data,Int4 index,Int2 indent)1819 static void FindBspCallback(SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent)
1820 {
1821 ValNodePtr PNTR bsp_list;
1822 BioseqPtr bsp;
1823 Uint1 choice;
1824
1825 if(sep->choice == 1)
1826 {
1827 bsp_list = (ValNodePtr PNTR)data;
1828 bsp = sep->data.ptrvalue;
1829 choice = 0;
1830 if(IsSeqIndexMap(bsp))
1831 choice = SEQINDEX_VAL;
1832 ValNodeAddPointer(bsp_list, choice, (Pointer)bsp);
1833 }
1834 }
1835
is_EQUIV(SeqEntryPtr sep)1836 static Boolean is_EQUIV(SeqEntryPtr sep)
1837 {
1838 BioseqSetPtr bssp;
1839
1840 if(sep->choice == 2)
1841 {
1842 bssp = (BioseqSetPtr)(sep->data.ptrvalue);
1843 return (bssp->_class == 10);
1844 }
1845 return FALSE;
1846 }
1847
get_priority_order(SeqEntryPtr sep,Int4Ptr num)1848 NLM_EXTERN Int4Ptr get_priority_order(SeqEntryPtr sep, Int4Ptr num)
1849 {
1850 BioseqSetPtr bssp;
1851 SeqDescrPtr desc;
1852 UserObjectPtr uop;
1853 UserFieldPtr ufp;
1854 ObjectIdPtr oip;
1855
1856 if(sep == NULL || sep->choice !=2)
1857 return NULL;
1858 bssp = sep->data.ptrvalue;
1859 if(bssp == NULL || bssp->_class != 10)
1860 return NULL;
1861
1862 for(desc = bssp->descr; desc != NULL; desc = desc->next)
1863 {
1864 if(desc->choice == Seq_descr_user)
1865 {
1866 uop = desc->data.ptrvalue;
1867 oip = uop->type;
1868 if(oip != NULL && oip->str != NULL)
1869 {
1870 if(StringCmp(oip->str, "Equiv Search Priority") == 0)
1871 {
1872 ufp = uop->data;
1873 if(ufp->choice == 8)
1874 {
1875 if(num != NULL)
1876 *num = ufp->num;
1877 return (Int4Ptr)(ufp->data.ptrvalue);
1878 }
1879 }
1880 }
1881 }
1882 }
1883
1884 return NULL;
1885 }
1886
1887
1888
1889
1890
1891
1892 /*************************************************************
1893 *
1894 * check the Seq-annot to see if it is designed to be displayed
1895 * as a sequence history or not
1896 *
1897 **************************************************************/
is_annot_for_hist_alignment(SeqAnnotPtr annot)1898 NLM_EXTERN Boolean is_annot_for_hist_alignment(SeqAnnotPtr annot)
1899 {
1900 UserObjectPtr uop;
1901 ValNodePtr desc;
1902 ObjectIdPtr oip;
1903 UserFieldPtr ufp;
1904
1905 if(annot == NULL || annot->type !=2)
1906 return FALSE;
1907 desc = annot->desc;
1908 while(desc)
1909 {
1910 if(desc->choice == Annot_descr_user)
1911 {
1912 uop = desc->data.ptrvalue;
1913 while(uop)
1914 {
1915 if(uop->type)
1916 {
1917 oip = uop->type;
1918 if(StringCmp(oip->str, "Hist Seqalign") == 0)
1919 {
1920 ufp = uop->data;
1921 if(ufp && ufp->choice == 4)
1922 return (ufp->data.boolvalue);
1923 }
1924 }
1925 uop = uop->next;
1926 }
1927 }
1928 desc = desc->next;
1929 }
1930
1931 return FALSE;
1932 }
1933
1934
get_equiv_align(SeqEntryPtr sep)1935 NLM_EXTERN ValNodePtr get_equiv_align(SeqEntryPtr sep)
1936 {
1937 BioseqSetPtr bssp;
1938 SeqAnnotPtr annot;
1939 ValNodePtr ealign_list = NULL;
1940 SeqAlignPtr align;
1941
1942 if(is_EQUIV(sep))
1943 {
1944 bssp = sep->data.ptrvalue;
1945 annot = bssp->annot;
1946 while(annot)
1947 {
1948 if(annot->type == 2)
1949 {
1950 align = annot->data;
1951 if(align && align->segtype == 3)
1952 { /*has to be Std-seg */
1953 if(!is_annot_for_hist_alignment(annot)) /*protection for cases that might include the alignment shown as history*/
1954 ValNodeAddPointer(&ealign_list, 0, annot);
1955 }
1956 }
1957 annot = annot->next;
1958 }
1959 }
1960
1961 return ealign_list;
1962 }
1963
1964
1965
1966
make_Bioseq_list(SeqEntryPtr sep,ValNodePtr PNTR bsp_list,ValNodePtr PNTR equiv_align)1967 NLM_EXTERN Boolean make_Bioseq_list(SeqEntryPtr sep, ValNodePtr PNTR bsp_list, ValNodePtr PNTR equiv_align)
1968 {
1969 BioseqPtr bsp;
1970 ValNodePtr sep_align_list;
1971 ValNodePtr this_list;
1972 Int4Ptr priority_order;
1973 Int4 num, i;
1974 ValNodePtr curr;
1975
1976 if(sep == NULL || bsp_list == NULL)
1977 return FALSE;
1978
1979 if(is_EQUIV(sep)) /*is it an Equiv-seg of Bioseqs*/
1980 {
1981 priority_order = get_priority_order(sep, &num);
1982 this_list = NULL;
1983 SeqEntryExplore(sep, (Pointer) (&this_list), FindBspCallback);
1984 for(i= 0, curr = this_list; curr != NULL && i<num; curr = curr->next)
1985 {
1986 if(curr->choice != SEQINDEX_VAL)
1987 { /*the sequence index map is excluded from
1988 search priority */
1989 if(priority_order != NULL)
1990 curr->choice = (Uint1)(priority_order[i++]);
1991 else
1992 curr->choice = SEQINDEX_VAL-1;
1993 }
1994 }
1995 ValNodeLink(bsp_list, this_list);
1996
1997 if(equiv_align != NULL)
1998 {
1999 sep_align_list = get_equiv_align(sep);
2000 ValNodeLink(equiv_align, sep_align_list);
2001 }
2002 }
2003 else
2004 {
2005 bsp = find_big_bioseq(sep);
2006 if(bsp != NULL)
2007 ValNodeAddPointer(bsp_list, 1, (Pointer)bsp);
2008 }
2009 return TRUE;
2010 }
2011
2012 #define MINDIST 100 /*for drawing the dynamic scaler*/
2013
calculate_ruler(Int4 scaleX)2014 NLM_EXTERN Int4 calculate_ruler(Int4 scaleX)
2015 {
2016 FloatHi logDist;
2017 FloatHi minDist;
2018 FloatHi nextPower;
2019 Int4 ruler;
2020
2021
2022 minDist = (double) (MINDIST * scaleX);
2023 logDist = log10 (minDist);
2024 nextPower = (exp(ceil(logDist) * NCBIMATH_LN10));
2025 if(minDist < nextPower /5.0)
2026 ruler = (Int4)( (nextPower + 0.5)/5.0 );
2027 else if (minDist <nextPower/4){
2028 ruler = (Int4)( (nextPower + 0.5)/4.0 );
2029 }
2030 else if(minDist <nextPower/2){
2031 ruler = (Int4)( (nextPower+0.5)/2.0 );
2032 }
2033 else {
2034 ruler = (Int4) (nextPower+0.5);
2035 }
2036 return ruler;
2037 }
2038
2039
LinkGeneData(GeneDataPtr PNTR head,GeneDataPtr g_new)2040 NLM_EXTERN GeneDataPtr LinkGeneData(GeneDataPtr PNTR head, GeneDataPtr g_new)
2041 {
2042 GeneDataPtr curr;
2043
2044 if(*head == NULL)
2045 *head = g_new;
2046 else
2047 {
2048 curr = *head;
2049 while(curr->next != NULL)
2050 curr = curr->next;
2051 curr->next = g_new;
2052 }
2053
2054 return (*head);
2055 }
2056
2057
2058 /****************************************************************
2059 *
2060 * LoadLandMarkGene(sep)
2061 * get the landmark gene from the User-object in the descriptor
2062 * all the genes are linked to a ValNode and vnp->choice is set
2063 * to 1 to indicate it is a landmark gene
2064 *
2065 ******************************************************************/
LoadLandMarkGene(SeqEntryPtr sep)2066 NLM_EXTERN ValNodePtr LoadLandMarkGene(SeqEntryPtr sep)
2067 {
2068 BioseqPtr bsp;
2069 BioseqSetPtr bssp;
2070 SeqDescrPtr descr = NULL;
2071 UserObjectPtr uop;
2072 UserFieldPtr ufp;
2073 CharPtr symbol;
2074 ValNodePtr list = NULL;
2075
2076 if(sep == NULL)
2077 return NULL;
2078
2079 if(sep->choice == 1)
2080 {
2081 bsp = sep->data.ptrvalue;
2082 descr = bsp->descr;
2083 }
2084 else
2085 {
2086 bssp = sep->data.ptrvalue;
2087 descr = bssp->descr;
2088 }
2089
2090 while(descr)
2091 {
2092 if(descr->choice == Seq_descr_user)
2093 {
2094 uop = descr->data.ptrvalue;
2095 while(uop)
2096 {
2097 if(uop->type != NULL)
2098 {
2099 if(is_label_match(uop->type, "LandMark"))
2100 {
2101 ufp = uop->data;
2102 while(ufp)
2103 {
2104 if(ufp->choice == 1)
2105 {
2106 symbol = (CharPtr)(ufp->data.ptrvalue);
2107 if(symbol != NULL)
2108 ValNodeCopyStr(&list, 1, symbol);
2109 /*set choice =1 to indicate it is a landmark gene*/
2110 }
2111 ufp = ufp->next;
2112 }
2113 }
2114 }
2115 uop = uop->next;
2116 }
2117 }
2118 descr = descr->next;
2119 }
2120 return list;
2121
2122 }
2123
BioseqHasLandMark(BioseqPtr bsp)2124 NLM_EXTERN Boolean BioseqHasLandMark(BioseqPtr bsp)
2125 {
2126 ValNodePtr descr;
2127 UserObjectPtr uop;
2128
2129 if(bsp == NULL)
2130 return FALSE;
2131
2132 for(descr = bsp->descr; descr != NULL; descr = descr->next)
2133 {
2134 if(descr->choice == Seq_descr_user)
2135 {
2136 uop = descr->data.ptrvalue;
2137 if(uop->type != NULL)
2138 {
2139 if(is_label_match(uop->type, "LandMark"))
2140 return TRUE;
2141 }
2142 }
2143 }
2144
2145 return FALSE;
2146 }
2147
2148
GeneDataFree(GeneDataPtr head)2149 NLM_EXTERN GeneDataPtr GeneDataFree(GeneDataPtr head)
2150 {
2151 GeneDataPtr next;
2152
2153 while(head != NULL)
2154 {
2155 next = head->next;
2156 MemFree(head->symbol);
2157 SeqLocFree(head->location);
2158 ValNodeFree(head->align_seg);
2159 MemFree(head);
2160 head = next;
2161 }
2162 return head;
2163 }
2164
2165
2166 /***********************************************************************
2167 *
2168 * for each sequence in alignment stored in Seq-hist, if the aligned
2169 * sequence itself contains alignment, it is temporarily loaded as
2170 * a user-object in the descriptor of the bioseq. This function extract
2171 * the information from the descripor and store it as a list of gi's
2172 * plus the kludge offset value
2173 *
2174 ************************************************************************/
get_seqids_with_alignment(BioseqPtr mbsp)2175 NLM_EXTERN ValNodePtr get_seqids_with_alignment(BioseqPtr mbsp)
2176 {
2177 ValNodePtr descr;
2178 UserObjectPtr uop;
2179 UserFieldPtr ufp;
2180 ValNodePtr align_id_list = NULL;
2181 ObjectIdPtr oip;
2182 Int4 gi;
2183
2184 if(mbsp == NULL)
2185 return NULL;
2186
2187 for (descr = mbsp->descr; descr != NULL; descr = descr->next)
2188 {
2189 if(descr->choice == Seq_descr_user)
2190 {
2191 uop = descr->data.ptrvalue;
2192 if(uop->type != NULL)
2193 {
2194 if(is_label_match(uop->type, "History"))
2195 /*temporary criteria for storing the seq-id with alignment*/
2196 {
2197 ufp = uop->data;
2198 while(ufp)
2199 {
2200 if(ufp->choice ==2) /*it is an integer*/
2201 {
2202 gi = ufp->data.intvalue;
2203 oip = ufp->label;
2204 /*oip->id is the kludge offset factor*/
2205 ValNodeAddInt(&align_id_list, (Uint1)(oip->id), gi);
2206 }
2207 ufp = ufp->next;
2208 }
2209 }
2210 }
2211 }
2212 }
2213 return align_id_list;
2214 }
2215
2216
2217 /***********************************************************************
2218 *
2219 * map the kludge offet factor for Unigene, RICE, MOUSE ,FlyBase, etc
2220 *
2221 ************************************************************************/
get_kludge_factor(SeqIdPtr sip,Int4Ptr gi)2222 NLM_EXTERN Int4 get_kludge_factor(SeqIdPtr sip, Int4Ptr gi)
2223 {
2224 DbtagPtr db_tag;
2225 ObjectIdPtr oip;
2226
2227 *gi = -1;
2228
2229 if(sip == NULL)
2230 return 0;
2231 if(sip->choice == SEQID_GI)
2232 *gi = sip->data.intvalue;
2233 if(sip->choice != SEQID_GENERAL)
2234 return 0;
2235
2236 db_tag = sip->data.ptrvalue;
2237 if(db_tag == NULL || db_tag->db == NULL)
2238 return 0L;
2239 oip = db_tag->tag;
2240 *gi = oip->id;
2241
2242 if(StringCmp(db_tag->db, "UNIGENE") == 0)
2243 return 1L;
2244
2245 if(StringICmp(db_tag->db, "FlyBase") == 0)
2246 return 2L;
2247
2248 if(StringCmp(db_tag->db, "JACKSON") == 0)
2249 return 3L;
2250
2251 if(StringCmp(db_tag->db, "JRGP") == 0)
2252 return 4L;
2253
2254 return 0L;
2255 }
2256
make_gene_data(ValNodePtr gene_list)2257 NLM_EXTERN GeneDataPtr make_gene_data(ValNodePtr gene_list)
2258 {
2259 GeneDataPtr head, g_new;
2260 CharPtr str;
2261
2262 head = NULL;
2263 while(gene_list)
2264 {
2265 str = gene_list->data.ptrvalue;
2266 g_new = MemNew(sizeof(GeneData));
2267 g_new->symbol = StringSave(str);
2268 g_new->priority = 0;
2269 g_new->landmark = (gene_list->choice != 0);
2270 LinkGeneData(&head, g_new);
2271 gene_list = gene_list->next;
2272 }
2273
2274 return head;
2275 }
2276
2277
RefreshGeneData(GeneDataPtr gdp)2278 NLM_EXTERN void RefreshGeneData(GeneDataPtr gdp)
2279 {
2280 while(gdp)
2281 {
2282 gdp->priority = 0;
2283 gdp = gdp->next;
2284 }
2285 }
2286
2287
2288 /**************************************************************
2289 *
2290 * get the alignment for the FISH map
2291 * for the Human Cytogenetic map, if there is a
2292 * Seq-annot stored as Hist-align and the intervals are
2293 * aligned to the FISH map, it will return the Seq-align
2294 * of the alignment to the FISH map
2295 *
2296 ***************************************************************/
get_FISH_align(BioseqPtr bsp)2297 NLM_EXTERN SeqAlignPtr get_FISH_align (BioseqPtr bsp)
2298 {
2299 SeqAnnotPtr annot;
2300 AnnotDescrPtr descr;
2301 Boolean is_hist, is_fish;
2302 UserObjectPtr uop;
2303 UserFieldPtr ufp;
2304 ObjectIdPtr oip;
2305
2306 for(annot = bsp->annot; annot != NULL; annot = annot->next)
2307 {
2308 if(annot->type == 2)
2309 {
2310 descr = annot->desc;
2311 is_hist = FALSE;
2312 is_fish = FALSE;
2313 while(descr)
2314 {
2315 if(descr->choice == Annot_descr_user)
2316 {
2317 uop = descr->data.ptrvalue;
2318 oip = uop->type;
2319 if(oip && oip->str &&
2320 StringCmp(oip->str, "Hist Seqalign") == 0)
2321 {
2322 ufp = uop->data;
2323 while(ufp)
2324 {
2325 oip = ufp->label;
2326 if(StringCmp(oip->str, "Hist Seqalign") == 0)
2327 {
2328 if(ufp->choice == 4)
2329 is_hist = ufp->data.boolvalue;
2330 }
2331 if(StringCmp(oip->str, "FISH Align") == 0)
2332 {
2333 if(ufp->choice == 4)
2334 is_fish = ufp->data.boolvalue;
2335 }
2336 ufp = ufp->next;
2337 }
2338 }
2339 }
2340 descr = descr->next;
2341 }
2342 if(is_hist && is_fish)
2343 return (SeqAlignPtr)(annot->data);
2344 }
2345 }
2346
2347 return NULL;
2348 }
2349
2350
2351 /*******************************************************
2352 *
2353 * annot_is_user_defined(annot)
2354 *
2355 * determine if the Seq-annot contains the features
2356 * that were defined by the user. This is to
2357 * distinguish the local data from the public data
2358 * set
2359 *
2360 ********************************************************/
annot_is_user_defined(SeqAnnotPtr annot)2361 NLM_EXTERN Boolean annot_is_user_defined (SeqAnnotPtr annot)
2362 {
2363 UserObjectPtr uop;
2364 ValNodePtr desc;
2365
2366 if(annot == NULL || annot->desc == NULL)
2367 return FALSE;
2368
2369 for(desc = annot->desc; desc != NULL; desc = desc->next)
2370 {
2371 if(desc->choice == Annot_descr_user)
2372 {
2373 uop = desc->data.ptrvalue;
2374 if(is_label_match(uop->type, "User Feature"))
2375 return TRUE;
2376 }
2377 }
2378
2379 return FALSE;
2380 }
2381
2382
2383 /*
2384 *
2385 * functions related to the map legend of a Bioseq
2386 *
2387 */
BioseqHasMapLegend(BioseqPtr bsp)2388 NLM_EXTERN UserObjectPtr BioseqHasMapLegend (BioseqPtr bsp)
2389 {
2390 ValNodePtr vnp;
2391 UserObjectPtr uop;
2392 ObjectIdPtr oip;
2393
2394
2395 if(bsp == NULL)
2396 return NULL;
2397 for (vnp = bsp->descr; vnp != NULL; vnp = vnp->next)
2398 {
2399 if(vnp->choice == Seq_descr_user)
2400 {
2401 uop = vnp->data.ptrvalue;
2402 if(uop && uop->type != NULL)
2403 {
2404 oip = uop->type;
2405 if(oip->str && StringCmp(oip->str, "MapLegend") == 0)
2406 return uop;
2407 }
2408 }
2409 }
2410
2411
2412 return NULL;
2413 }
2414
2415
SeqLocListHasLegend(ValNodePtr slp_list)2416 NLM_EXTERN Boolean SeqLocListHasLegend (ValNodePtr slp_list)
2417 {
2418 SeqLocPtr slp;
2419 BioseqPtr bsp;
2420
2421 while(slp_list)
2422 {
2423 slp = slp_list->data.ptrvalue;
2424 if(slp != NULL)
2425 {
2426 bsp = BioseqFind(SeqLocId(slp));
2427 if(bsp != NULL)
2428 {
2429 if(BioseqHasMapLegend(bsp) != NULL)
2430 return TRUE;
2431 }
2432 }
2433 slp_list = slp_list->next;
2434 }
2435
2436 return FALSE;
2437 }
2438
2439
2440 /*
2441 *
2442 * Is it a Sequence Index Map
2443 *
2444 */
IsSeqIndexMap(BioseqPtr bsp)2445 NLM_EXTERN Boolean IsSeqIndexMap (BioseqPtr bsp)
2446 {
2447 ValNodePtr vnp;
2448
2449 if(bsp == NULL)
2450 return FALSE;
2451
2452 for(vnp = bsp->descr; vnp != NULL; vnp = vnp->next)
2453 {
2454 if(vnp->choice == Seq_descr_comment)
2455 {
2456 if(vnp->data.ptrvalue != NULL)
2457 {
2458 if(StringCmp(vnp->data.ptrvalue,
2459 "Sequence Index Map") == 0)
2460 return TRUE;
2461 }
2462 }
2463 }
2464
2465
2466 return FALSE;
2467 }
2468
2469
2470 /*find a list of the seqlocs on the contig that maps to the
2471 *current chromosome. For now, only the Whitehead map and the
2472 *Eric Green's map is considered. return a list of Seq-locs that
2473 *contains contigs within the region
2474 */
FindContigDB(SeqIdPtr sip)2475 NLM_EXTERN Uint1 FindContigDB (SeqIdPtr sip)
2476 {
2477 DbtagPtr db_tag;
2478
2479 while(sip)
2480 {
2481 if(sip->choice == SEQID_GENERAL)
2482 {
2483 db_tag = sip->data.ptrvalue;
2484 if(db_tag->db)
2485 {
2486 if(StringCmp(db_tag->db, "MIT") == 0)
2487 return YAC_MIT;
2488 if(StringCmp(db_tag->db, "NHGRI") == 0)
2489 return YAC_NHGRI;
2490 }
2491 }
2492 sip = sip->next;
2493 }
2494 return 0;
2495 }
2496
2497
2498
2499 /*
2500 ################################################################
2501 #
2502 # functions related to mapping the location of the chromosome
2503 # to a location on the YAC Contig
2504 #
2505 ################################################################
2506 */
2507
2508
2509 /*the positions on the contig sequence is calculated by the ratio*/
calculate_percent_pos(Int4 pos,Int4 start,Int4 len)2510 static FloatHi calculate_percent_pos(Int4 pos, Int4 start, Int4 len)
2511 {
2512 return (FloatHi)(pos - start)/(FloatHi)len;
2513 }
2514
map_position_by_ratio(Int4 m_pos,SeqLocPtr m_loc,Int4 s_len)2515 static Int4 map_position_by_ratio(Int4 m_pos, SeqLocPtr m_loc, Int4 s_len)
2516 {
2517 FloatHi ratio;
2518 FloatHi val;
2519
2520 ratio = calculate_percent_pos(m_pos, SeqLocStart(m_loc), SeqLocLen(m_loc));
2521 val = ratio * (FloatHi)s_len;
2522 return MIN((Int4)val, s_len-1);
2523 }
2524
2525
2526 typedef struct contig_map {
2527 Int4 offset; /*offset of the contig to the chromosome coordinates*/
2528 SeqLocPtr slp; /*location of the contig*/
2529 }ContigMap, PNTR ContigMapPtr;
2530
OffsetCompProc(VoidPtr ptr1,VoidPtr ptr2)2531 static int LIBCALLBACK OffsetCompProc (VoidPtr ptr1, VoidPtr ptr2)
2532 {
2533 ContigMapPtr cmp1, cmp2;
2534 ValNodePtr vnp1;
2535 ValNodePtr vnp2;
2536
2537 if (ptr1 != NULL && ptr2 != NULL) {
2538 vnp1 = *((ValNodePtr PNTR) ptr1);
2539 vnp2 = *((ValNodePtr PNTR) ptr2);
2540 if (vnp1 != NULL && vnp2 != NULL) {
2541 cmp1 = (ContigMapPtr) vnp1->data.ptrvalue;
2542 cmp2 = (ContigMapPtr) vnp2->data.ptrvalue;
2543 if (cmp1 != NULL && cmp2 != NULL) {
2544 if(cmp1->offset > cmp2->offset)
2545 return 1;
2546 else
2547 {
2548 if(cmp1->offset < cmp2->offset)
2549 return -1;
2550 else
2551 return 0;
2552 }
2553 }
2554 }
2555 }
2556 return 0;
2557 }
2558
2559
2560 /*
2561 *
2562 * chr_slp is the position on the genome/chromosome. align is the alignment
2563 * between the contig and the chromosome. It will try to map the chromosome
2564 * coordinates to the contig coordinates. Since more than one contig may be
2565 * identified, the result will be chained to a list. The list of the contigs
2566 * are sorted by their position on the chromosome
2567 */
MapContigPosition(SeqLocPtr chr_slp,SeqAlignPtr align)2568 static ValNodePtr MapContigPosition (SeqLocPtr chr_slp, SeqAlignPtr align)
2569 {
2570 StdSegPtr ssp;
2571 SeqLocPtr slp;
2572 SeqLocPtr m_loc, s_loc;
2573 SeqIdPtr chr_sip;
2574 Int4 offset;
2575 Int4 start, stop;
2576 Int4 chr_start, chr_stop;
2577 Int4 ctg_start, ctg_stop;
2578 ContigMapPtr cmp;
2579 ValNodePtr list, curr, contig_list;
2580 BioseqPtr bsp;
2581
2582 chr_start = SeqLocStart(chr_slp);
2583 chr_stop = SeqLocStop(chr_slp);
2584 chr_sip = SeqLocId(chr_slp);
2585 list = NULL;
2586 while(align)
2587 {
2588 if(align->segtype == 3)
2589 {
2590 ssp = align->segs;
2591 while(ssp)
2592 {
2593 m_loc = NULL;
2594 s_loc = NULL;
2595 for(slp = ssp->loc; slp != NULL; slp = slp->next)
2596 {
2597 if(SeqIdMatch(SeqLocId(slp), chr_sip))
2598 {
2599 if(SeqLocCompare(slp, chr_slp) != SLC_NO_MATCH)
2600 m_loc = slp;
2601 }
2602 else
2603 s_loc = slp;
2604 }
2605 if(m_loc != NULL && s_loc != NULL)
2606 {
2607 if(SeqLocStart(m_loc) > chr_start)
2608 offset = SeqLocStart(m_loc) - chr_start;
2609 else
2610 offset = 0;
2611 start = MAX(SeqLocStart(m_loc), chr_start);
2612 stop = MIN(SeqLocStop(m_loc), chr_stop);
2613
2614 /*assuming the contig is mapped to the chromosome from
2615 head-to-toe. Use the original Bioseq to get the location
2616 on the contig
2617 */
2618 bsp = BioseqLockById(SeqLocId(s_loc));
2619 if(bsp != NULL)
2620 {
2621 ctg_start = map_position_by_ratio(start, m_loc, bsp->length);
2622 ctg_stop = map_position_by_ratio(stop, m_loc, bsp->length);
2623 slp = SeqLocIntNew(ctg_start, ctg_stop, Seq_strand_plus, SeqLocId(s_loc));
2624
2625 cmp = MemNew(sizeof(ContigMap));
2626 cmp->offset = offset;
2627 cmp->slp = slp;
2628 ValNodeAddPointer(&list, 0, cmp);
2629 BioseqUnlock(bsp);
2630 }
2631 }
2632 ssp = ssp->next;
2633 }
2634 }
2635 align = align->next;
2636 }
2637
2638 if(list == NULL)
2639 return NULL;
2640 list = SortValNode(list, OffsetCompProc);
2641 /*extract the Seq-loc from cmp, and free the structure*/
2642 contig_list = NULL;
2643 for(curr = list; curr != NULL; curr = curr->next)
2644 {
2645 cmp = curr->data.ptrvalue;
2646 if(cmp && cmp->slp != NULL)
2647 ValNodeAddPointer(&contig_list, 0, cmp->slp);
2648 MemFree(cmp);
2649 }
2650 ValNodeFree(list);
2651 return contig_list;
2652 }
2653
2654
2655
2656
2657 /*return a list of Seq-locs which are the contigs mapped to
2658 * the current location on the genome
2659 *
2660 */
FindContigList(SeqLocPtr chr_slp)2661 NLM_EXTERN ValNodePtr FindContigList (SeqLocPtr chr_slp)
2662 {
2663 BioseqPtr bsp;
2664 SeqAlignPtr align;
2665 ValNodePtr list = NULL;
2666 SeqAnnotPtr annot;
2667 Uint1 type, annot_type;
2668 Char annotDB[21];
2669
2670
2671 bsp = BioseqLockById(SeqLocId(chr_slp));
2672 if(bsp == NULL)
2673 return NULL;
2674
2675 if(FindContigDB (bsp->id) == 0)
2676 {
2677 BioseqUnlock(bsp);
2678 return NULL;
2679 }
2680 if(bsp->hist && bsp->hist->assembly)
2681 {
2682 align = bsp->hist->assembly;
2683 list = MapContigPosition (chr_slp, align);
2684 }
2685 if(list == NULL)
2686 {
2687 for(annot = bsp->annot; annot != NULL; annot = annot->next)
2688 {
2689 if(annot->type == 2)
2690 {
2691 if(is_annot_for_hist_alignment(annot))
2692 {
2693 type = get_align_annot_qual(annot, annotDB, 20, &annot_type);
2694 if(annot_type == ANNOT_CONSIST && type == 1)
2695 /*for anchored contigs only*/
2696 {
2697 align = annot->data;
2698 list = MapContigPosition(chr_slp, align);
2699 if(list != NULL)
2700 break;
2701 }
2702 }
2703 }
2704 }
2705 }
2706
2707 BioseqUnlock(bsp);
2708 return list;
2709 }
2710
is_lod_score_annot(SeqAnnotPtr annot)2711 NLM_EXTERN Boolean is_lod_score_annot(SeqAnnotPtr annot)
2712 {
2713 SeqFeatPtr sfp;
2714 ValNodePtr desc;
2715
2716 if(annot == NULL || annot->type != 1)
2717 return FALSE;
2718
2719 sfp = annot->data;
2720 if(sfp == NULL || sfp->data.choice != 14)
2721 return FALSE;
2722 for(desc = annot->desc; desc != NULL; desc = desc->next)
2723 {
2724 if(desc->choice == Annot_descr_name)
2725 {
2726 if(StringCmp(desc->data.ptrvalue, "LOD Score") == 0)
2727 return TRUE;
2728 }
2729 }
2730
2731 return FALSE;
2732 }
2733
GetAnnotTitle(SeqAnnotPtr annot)2734 NLM_EXTERN CharPtr GetAnnotTitle(SeqAnnotPtr annot)
2735 {
2736 ValNodePtr desc;
2737
2738 if(annot == NULL)
2739 return NULL;
2740
2741 for(desc = annot->desc; desc != NULL; desc = desc->next)
2742 {
2743 if(desc->choice == Annot_descr_title)
2744 {
2745 if(desc->data.ptrvalue != NULL)
2746 return (desc->data.ptrvalue);
2747 }
2748 }
2749
2750 return NULL;
2751 }
2752
2753
GetLODScoreNumber(BioseqPtr bsp)2754 NLM_EXTERN Int2 GetLODScoreNumber (BioseqPtr bsp)
2755 {
2756 Int2 num = 0;
2757 SeqAnnotPtr annot;
2758
2759 if(bsp == NULL || bsp->annot == NULL)
2760 return 0;
2761 for(annot = bsp->annot; annot != NULL; annot = annot->next)
2762 {
2763 if(is_lod_score_annot(annot))
2764 ++num;
2765 }
2766
2767 return num;
2768 }
2769
GetLODScoreBitValue(SeqFeatPtr sfp)2770 NLM_EXTERN Uint1 GetLODScoreBitValue (SeqFeatPtr sfp)
2771 {
2772 UserObjectPtr uop;
2773 ObjectIdPtr oip;
2774 UserFieldPtr ufp;
2775
2776 if(sfp == NULL || sfp->data.choice != 14)
2777 return 0;
2778
2779 uop = sfp->data.value.ptrvalue;
2780 if(uop != NULL)
2781 {
2782 oip = uop->type;
2783 if(oip && oip->str && StringCmp(oip->str, "LOD Score Value") == 0)
2784 {
2785 ufp = uop->data;
2786 while(ufp)
2787 {
2788 oip = ufp->label;
2789 if(oip->str && StringCmp(oip->str, "Bit Value") == 0)
2790 {
2791 if(ufp->choice == 2)
2792 return (Uint1)(ufp->data.intvalue);
2793 }
2794 ufp = ufp->next;
2795 }
2796 }
2797 }
2798
2799 return 0;
2800 }
2801
2802
2803