1 /* $Id: jzcoll.c,v 6.18 2006/07/13 17:06:38 bollin Exp $
2 * ===========================================================================
3 *
4 *                            PUBLIC DOMAIN NOTICE
5 *               National Center for Biotechnology Information
6 *
7 *  This software/database is a "United States Government Work" under the
8 *  terms of the United States Copyright Act.  It was written as part of
9 *  the author's official duties as a United States Government employee and
10 *  thus cannot be copyrighted.  This software/database is freely available
11 *  to the public for use. The National Library of Medicine and the U.S.
12 *  Government have not placed any restriction on its use or reproduction.
13 *
14 *  Although all reasonable efforts have been taken to ensure the accuracy
15 *  and reliability of the software and data, the NLM and the U.S.
16 *  Government do not and cannot warrant the performance or results that
17 *  may be obtained by using this software or data. The NLM and the U.S.
18 *  Government disclaim all warranties, express or implied, including
19 *  warranties of performance, merchantability or fitness for any particular
20 *  purpose.
21 *
22 *  Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * File Name:  $RCSfile: jzcoll.c,v $
27 *
28 * Author:  Jinghui Zhang
29 *
30 * Initial Version Creation Date: 03/24/97
31 *
32 * $Revision: 6.18 $
33 *
34 * File Description:
35 *         File for various alignments
36 *
37 * $Log: jzcoll.c,v $
38 * Revision 6.18  2006/07/13 17:06:38  bollin
39 * use Uint4 instead of Uint2 for itemID values
40 * removed unused variables
41 * resolved compiler warnings
42 *
43 * Revision 6.17  2001/06/26 16:42:58  vakatov
44 * POINT --> BAND_POINT  (to avoid conflicts with MS-Win standard headers)
45 *
46 * Revision 6.16  2000/11/16 22:10:37  shavirin
47 * Moved many functions from txalign.c - due to move of txalign.c to
48 * distrib/tools directory and libncbitool.a library.
49 *
50 *
51 * ==========================================================================
52 */
53 
54 #include <jzcoll.h>
55 #include <txalign.h>
56 #include <codon.h>
57 
58 static Char pchars[] = "ARNDCQEGHILKMFPSTWYVBZX";	/* amino acid names */
59 static Int4 webb_blosum62[WEBB_asize][WEBB_asize] = {
60    { 4,-1,-2,-2, 0,-1,-1, 0,-2,-1,-1,-1,-1,-2,-1, 1, 0,-3,-2, 0,-2,-1, 0 },
61    {-1, 5, 0,-2,-3, 1, 0,-2, 0,-3,-2, 2,-1,-3,-2,-1,-1,-3,-2,-3,-1, 0,-1 },
62    {-2, 0, 6, 1,-3, 0, 0, 0, 1,-3,-3, 0,-2,-3,-2, 1, 0,-4,-2,-3, 3, 0,-1 },
63    {-2,-2, 1, 6,-3, 0, 2,-1,-1,-3,-4,-1,-3,-3,-1, 0,-1,-4,-3,-3, 4, 1,-1 },
64    { 0,-3,-3,-3, 9,-3,-4,-3,-3,-1,-1,-3,-1,-2,-3,-1,-1,-2,-2,-1,-3,-3,-2 },
65    {-1, 1, 0, 0,-3, 5, 2,-2, 0,-3,-2, 1, 0,-3,-1, 0,-1,-2,-1,-2, 0, 3,-1 },
66    {-1, 0, 0, 2,-4, 2, 5,-2, 0,-3,-3, 1,-2,-3,-1, 0,-1,-3,-2,-2, 1, 4,-1 },
67    { 0,-2, 0,-1,-3,-2,-2, 6,-2,-4,-4,-2,-3,-3,-2, 0,-2,-2,-3,-3,-1,-2,-1 },
68    {-2, 0, 1,-1,-3, 0, 0,-2, 8,-3,-3,-1,-2,-1,-2,-1,-2,-2, 2,-3, 0, 0,-1 },
69    {-1,-3,-3,-3,-1,-3,-3,-4,-3, 4, 2,-3, 1, 0,-3,-2,-1,-3,-1, 3,-3,-3,-1 },
70    {-1,-2,-3,-4,-1,-2,-3,-4,-3, 2, 4,-2, 2, 0,-3,-2,-1,-2,-1, 1,-4,-3,-1 },
71    {-1, 2, 0,-1,-3, 1, 1,-2,-1,-3,-2, 5,-1,-3,-1, 0,-1,-3,-2,-2, 0, 1,-1 },
72    {-1,-1,-2,-3,-1, 0,-2,-3,-2, 1, 2,-1, 5, 0,-2,-1,-1,-1,-1, 1,-3,-1,-1 },
73    {-2,-3,-3,-3,-2,-3,-3,-3,-1, 0, 0,-3, 0, 6,-4,-2,-2, 1, 3,-1,-3,-3,-1 },
74    {-1,-2,-2,-1,-3,-1,-1,-2,-2,-3,-3,-1,-2,-4, 7,-1,-1,-4,-3,-2,-2,-1,-2 },
75    { 1,-1, 1, 0,-1, 0, 0, 0,-1,-2,-2, 0,-1,-2,-1, 4, 1,-3,-2,-2, 0, 0, 0 },
76    { 0,-1, 0,-1,-1,-1,-1,-2,-2,-1,-1,-1,-1,-2,-1, 1, 5,-2,-2, 0,-1,-1, 0 },
77    {-3,-3,-4,-4,-2,-2,-3,-2,-2,-3,-2,-3,-1, 1,-4,-3,-2,11, 2,-3,-4,-3,-2 },
78    {-2,-2,-2,-3,-2,-1,-2,-3, 2,-1,-1,-2,-1, 3,-3,-2,-2, 2, 7,-1,-3,-2,-1 },
79    { 0,-3,-3,-3,-1,-2,-2,-3,-3, 3, 1,-2, 1,-1,-2,-2, 0,-3,-1, 4,-3,-2,-1 },
80    {-2,-1, 3, 4,-3, 0, 1,-1, 0,-3,-4, 0,-3,-3,-2, 0,-1,-4,-3,-3, 4, 1,-1 },
81    {-1, 0, 0, 1,-3, 3, 4,-2, 0,-3,-3, 1,-1,-3,-1, 0,-1,-3,-2,-2, 1, 4,-1 },
82    { 0,-1,-1,-1,-2,-1,-1,-1,-1,-1,-1,-1,-1,-1,-2, 0, 0,-2,-1,-1,-1,-1,-1 },
83  };
84 
load_default_matrix(void)85 NLM_EXTERN Int4Ptr PNTR load_default_matrix (void)
86 {
87 	Int4Ptr PNTR ss;
88 	Int2 i, j;
89 
90 	ss = (Int4Ptr PNTR) MemNew((size_t)TX_MATRIX_SIZE * sizeof (Int4Ptr));
91 	for(i = 0; i<TX_MATRIX_SIZE; ++i)
92 		ss[i] = (Int4Ptr) MemNew((size_t)TX_MATRIX_SIZE * sizeof (Int4));
93 
94 	for(i = 0; i < TX_MATRIX_SIZE; i++)
95 		for(j = 0; j < TX_MATRIX_SIZE;j++)
96 			ss[i][j] = -1000;
97 	for(i = 0; i < WEBB_asize; ++i)
98 		for(j = 0; j < WEBB_asize; ++j)
99 			ss[pchars[i]][pchars[j]] = webb_blosum62[i][j];
100 	for(i = 0; i < WEBB_asize; ++i)
101 		ss[pchars[i]]['*'] = ss['*'][pchars[i]] = -4;
102 	ss['*']['*'] = 1;
103 	return ss;
104 }
105 
free_default_matrix(Int4Ptr PNTR matrix)106 NLM_EXTERN void free_default_matrix (Int4Ptr PNTR matrix)
107 {
108 	Int2 i;
109 
110 	for(i = 0; i<TX_MATRIX_SIZE; ++i)
111 		MemFree(matrix[i]);
112 	MemFree(matrix);
113 }
114 
115 NLM_EXTERN SeqIdPtr LIBCALL
ScorePtrUseThisGi(ScorePtr sp)116 ScorePtrUseThisGi (ScorePtr sp)
117 
118 {
119     ObjectIdPtr obid;
120     ScorePtr scrp;
121     SeqIdPtr gilist=NULL;
122 
123     for (scrp=sp; scrp; scrp = scrp->next) {
124         obid = scrp->id;
125         if(obid && obid->str) {
126             if (StringICmp(obid->str, "use_this_gi") == 0) {
127                 ValNodeAddInt(&gilist, SEQID_GI, scrp->value.intvalue);
128             }
129         }
130     }
131 
132     return gilist;
133 }
134 
135 /*
136   GetUseThisGi(SeqAlignPtr) looks for the "use_this_gi" flag in the ScorePtr.
137 */
138 
139 NLM_EXTERN SeqIdPtr LIBCALL
GetUseThisGi(SeqAlignPtr seqalign)140 GetUseThisGi(SeqAlignPtr seqalign)
141 
142 {
143 	Boolean retval=FALSE;
144         DenseDiagPtr ddp;
145         DenseSegPtr dsp;
146         ScorePtr sp;
147 	SeqIdPtr gilist=NULL;
148         StdSegPtr ssp;
149 
150 	sp = seqalign->score;
151 	if (sp == NULL)
152 	{
153 		switch (seqalign->segtype)
154 		{
155 			case 1: /*Dense-diag*/
156 				ddp = (DenseDiagPtr) seqalign->segs;
157 				while (ddp)
158 				{
159 					sp = ddp->scores;
160 					if (sp)
161 						break;
162 					ddp = ddp->next;
163 				}
164 				break;
165 			case 2:
166 				dsp = ( DenseSegPtr) seqalign->segs;
167 				if (dsp)
168 				{
169 					sp = dsp->scores;
170 				}
171 				break;
172 			case 3:
173 				ssp = (StdSegPtr) seqalign->segs;
174 				while (ssp)
175 				{
176 					sp = ssp->scores;
177 					if (sp)
178 						break;
179 					ssp = ssp->next;
180 				}
181 				break;
182 			default:
183 				break;
184 		}
185 	}
186 
187 
188 	gilist = ScorePtrUseThisGi(sp);
189 	return gilist;
190 }
191 
192 /*************************************************************************
193 *
194 *	functions and structure related to create a text buffer for the
195 *	alignment
196 *
197 *************************************************************************/
198 
FreeTextAlignList(ValNodePtr tdp_list)199 NLM_EXTERN ValNodePtr FreeTextAlignList(ValNodePtr tdp_list)
200 {
201 	TextAlignBufPtr tdp;
202 	ValNodePtr next;
203 	Int2 i;
204 
205 	while(tdp_list)
206 	{
207 		next = tdp_list->next;
208 		tdp_list->next = NULL;
209 		tdp = (TextAlignBufPtr) tdp_list->data.ptrvalue;
210 		if(tdp->label)
211 			MemFree(tdp->label);
212 		if(tdp->buf)
213 			MemFree(tdp->buf);
214 		if(tdp->matrix_val)
215 			MemFree(tdp->matrix_val);
216 		if(tdp->exonCount > 0)
217 		{
218 			for(i =0; i<3; ++i)
219 				MemFree(tdp->codon[i]);
220 		}
221 		MemFree(tdp);
222 		MemFree(tdp_list);
223 		tdp_list = next;
224 	}
225 
226 	return NULL;
227 }
228 
229 
230 /*######################################################################
231 #
232 #	functions related to ProcessTextAlignNode
233 #
234 #######################################################################*/
235 
236 
237 /******************************************************************************
238 *
239 *	load_text(bsp, pos1, pos2, l_seq, l_pos, mbuf, maxlen)
240 *	load the sequence into text
241 *	bsp: the Bioseq
242 *	pos1: the first position on the sequence.
243 *	pos2: the second position on the sequence.
244 *		if(pos1 and pos2 are negative val, indicate the region in on the*	minus strand
245 *	l_seq: the buffer for loading the sequence
246 *	l_pos: the current position in l_seq. Will be updated after the sequence
247 *		is loaded
248 *	mbuf: buffer from the master sequence. For checking mismatches and positive scores
249 *	maxlen: the maximum length per line. Used to determine the special
250 *	format used for long insertions
251 *	spacing is the space between the two adjacent residues
252 *	mismatch: if TRUE, show the identical residue with
253 *
254 *****************************************************************************/
255 
load_text(BioseqPtr bsp,Int4 pos1,Int4 pos2,CharPtr l_seq,Int4Ptr l_pos,CharPtr mbuf,Int2 maxlen,Int2 spacing,Boolean translate,Boolean mismatch,Int2Ptr matrix_val,Int4Ptr PNTR matrix,Uint1 strand,Int4Ptr PNTR posMatrix,Int4 q_start)256 static Boolean load_text(BioseqPtr bsp, Int4 pos1, Int4 pos2, CharPtr l_seq, Int4Ptr l_pos, CharPtr mbuf, Int2 maxlen, Int2 spacing, Boolean translate, Boolean mismatch, Int2Ptr matrix_val, Int4Ptr PNTR matrix, Uint1 strand, Int4Ptr PNTR posMatrix, Int4 q_start)
257 {
258     SeqPortPtr spp = NULL;
259     ByteStorePtr b_store = NULL;
260     Uint1 code;
261     Int4 start, stop;
262     Uint1 m_res, t_res, stdaa_res;
263     Int2 i;
264     Int2 val;
265     Int4 length, s_len;
266     Int2 c_pos;
267     Char temp[100];
268     Boolean protein;
269     Boolean overflow;
270     Boolean reverse;
271     Boolean is_real;
272     SeqFeatPtr fake_cds;
273     Boolean check_neg;	/*if aa is negative, load it as lower case char*/
274     SeqMapTablePtr smtp;
275 
276     if(*l_pos >= maxlen )
277         return FALSE;
278 
279     /* posMatrix uses NCBIstdaa encoding */
280 
281     if(posMatrix != NULL) {
282         if((smtp = SeqMapTableFindObj(Seq_code_ncbistdaa,
283                                       Seq_code_ncbieaa)) == NULL)
284             return FALSE;
285     }
286 
287     protein = (bsp->mol == Seq_mol_aa);
288     reverse = FALSE;
289     if(protein)
290         code = Seq_code_ncbieaa;
291     else
292         code = Seq_code_iupacna;
293     check_neg = (matrix_val == NULL && matrix != NULL);
294     if(strand == Seq_strand_minus) {	/*on the minus strand*/
295 
296         start = -pos2;
297         stop = -pos1;
298 
299         if(protein) {
300             strand = Seq_strand_plus;
301             reverse = TRUE;
302         }
303 
304     } else {
305         start = pos1;
306         stop = pos2;
307     }
308     if(translate) {
309         fake_cds = make_fake_cds(bsp, start, stop, strand);
310         b_store = ProteinFromCdRegionEx(fake_cds, TRUE, FALSE);
311         SeqFeatFree(fake_cds);
312         if(b_store == NULL)
313             return FALSE;
314         length = (stop - start +1)/3;
315         BSSeek(b_store, 0, SEEK_SET);
316     } else {
317         spp = SeqPortNew(bsp, start, stop, strand, code);
318         length = stop - start +1;
319     }
320     c_pos = (Int2)(*l_pos);
321     overflow = (c_pos >= maxlen);
322     if(maxlen>0 && (length > maxlen)) {	/*large insertions*/
323 
324         for(i =0; i<5 && !overflow; ++i) {
325             if(translate)
326                 l_seq[c_pos++] = (Uint1)BSGetByte(b_store);
327             else {
328                 if(reverse)
329                     SeqPortSeek(spp, length-1 -i, SEEK_SET);
330                 l_seq[c_pos++] = SeqPortGetResidue(spp);
331             }
332             overflow = (c_pos >= maxlen);
333         }
334         for(i =0; i<3 && !overflow; ++i) {
335             l_seq[c_pos++] = '.';
336             overflow = (c_pos >= maxlen);
337         }
338         if(!overflow) {
339             if(translate)
340                 BSSeek(b_store, length-1, SEEK_SET);
341             else if(!reverse)
342                 SeqPortSeek(spp, length-5, SEEK_SET);
343             for(i =0; i<5 && !overflow; ++i) {
344                 if(translate)
345                     l_seq[c_pos++] = (Uint1)BSGetByte(b_store);
346                 else {
347                     if(reverse)
348                         SeqPortSeek(spp, 4-i, SEEK_SET);
349                     l_seq[c_pos++] = SeqPortGetResidue(spp);
350                 }
351                 overflow = (c_pos >= maxlen);
352             }
353         }
354         if(overflow)
355             l_seq[maxlen-1] = '\0';
356         else
357             l_seq[c_pos] = '\0';
358         sprintf(temp, "(length=%ld)", (long) length);
359         s_len = StringLen(temp);
360         StringCat(l_seq, temp);
361         *l_pos = c_pos+s_len;
362     } else {
363         if(translate) {
364             while((val = BSGetByte(b_store)) != EOF) {
365                 t_res = (Uint1)val;
366                 l_seq[c_pos]= t_res;
367                 if(mbuf != NULL) {
368                     m_res = mbuf[c_pos];
369                     if(matrix_val && matrix)
370                         matrix_val[c_pos] = (Int2)matrix[m_res][t_res];
371                     if(mismatch && t_res == m_res)
372                         l_seq[c_pos] = '.';
373                     else if(check_neg && matrix[t_res][m_res] < 0)
374                         l_seq[c_pos] = TO_LOWER(t_res);
375                 }
376                 c_pos += spacing;
377                 if(c_pos >= maxlen) {
378                     c_pos = maxlen;
379                     break;
380                 }
381             }
382         } else {
383             if(reverse)
384                 SeqPortSeek(spp, length-1, SEEK_SET);
385             s_len = 0;
386             while((t_res = SeqPortGetResidue(spp)) != SEQPORT_EOF) {
387                 is_real = IS_ALPHA(t_res);
388                 if(is_real || t_res == '*' || t_res == '-') {
389                     if(is_real && !protein)
390                         t_res = TO_LOWER(t_res);
391                     l_seq[c_pos] = t_res;
392                     if(mbuf != NULL) {
393                         m_res = mbuf[c_pos];
394                         if(matrix_val) {
395                             if(matrix) {
396                                 if(posMatrix != NULL) {
397                                     if(t_res == m_res) /* complete match */
398                                         matrix_val[c_pos] = INT2_MAX;
399                                     else {
400                                         stdaa_res = SeqMapTableConvert(smtp, t_res);
401                                         matrix_val[c_pos] = (Int2)posMatrix[c_pos + q_start][stdaa_res];
402 
403                                         /*
404                                      if(posMatrix[c_pos + q_start][t_res] ==
405                                      matrix[t_res][t_res]) {
406                                      printf("Got it!");
407                                      } */
408 
409                                     }
410                                 } else {
411                                     matrix_val[c_pos] = (Int2)matrix[m_res][t_res];
412                                 }
413 
414                             } else if(t_res == m_res)
415                                 matrix_val[c_pos] = '|';
416                         }
417 
418                         if(mismatch && t_res == m_res)
419                             l_seq[c_pos] = '.';
420                         else if(posMatrix != NULL) {
421                             stdaa_res = SeqMapTableConvert(smtp, m_res);
422                             if(check_neg && posMatrix[c_pos + q_start][stdaa_res] < 0)
423                                 l_seq[c_pos] = TO_LOWER(t_res);
424                         } else { /*regular BLOSSUM62*/
425                             if(check_neg && matrix[t_res][m_res] < 0)
426                                 l_seq[c_pos] = TO_LOWER(t_res);
427                         }
428                     }
429                     c_pos += spacing;
430                     if(c_pos >= maxlen) {
431                         c_pos = maxlen;
432                         break;
433                     }
434                     ++s_len;
435                 }
436                 if(reverse) {
437                     if(s_len == length)
438                         break;
439                     else
440                         SeqPortSeek(spp, length -1 - s_len, SEEK_SET);
441                 }
442             }
443         }
444         *l_pos = c_pos;
445     }
446 
447     if(translate)
448         BSFree(b_store);
449     else
450         SeqPortFree(spp);
451     return TRUE;
452 }
453 
454 /*##########################################################################
455 #
456 #	functions related to add the features to the alignment
457 #
458 ###########################################################################*/
459 
460 
461 typedef struct protbuf{	/*for loading the translation of a CDs*/
462 	CharPtr buf;	/*load the protein sequence*/
463 	Int4 start;	/*start position in CDs*/
464 	Int4 stop;	/*stop position in CDs*/
465 	Int4 pos;	/*position for the feature*/
466 	Boolean load_codon;	/*load the codon data for aa sequence*/
467 	ValNodePtr cvp_list;	/*list for loading the codon of an aa*/
468 }ProtBuf, PNTR ProtBufPtr;
469 
470 
471 
472 /************************************************************************
473 *
474 *	check the protein sequence from CDs feature into the buffer
475 *
476 *************************************************************************/
load_prot_seq(GatherContextPtr gcp)477 static Boolean load_prot_seq(GatherContextPtr gcp)
478 {
479 	SeqFeatPtr sfp;
480 	ProtBufPtr pbp;
481 	SeqLocPtr loc;
482 
483 	if(gcp->thistype != OBJ_SEQFEAT)
484 		return FALSE;
485 	sfp = (SeqFeatPtr)(gcp->thisitem);
486 	if(sfp->data.choice !=3)
487 		return FALSE;
488 
489 	pbp = (ProtBufPtr)(gcp->userdata);
490 	if(pbp->load_codon)	/*looking for codon in aa sequence*/
491 	{
492 		pbp->cvp_list = aa_to_codon(sfp, pbp->start, pbp->stop);
493 		return (pbp->cvp_list !=NULL);
494 	}
495 
496 
497 	if(pbp->start <0)/*minus strand*/
498 		loc = SeqLocIntNew((-pbp->stop), (-pbp->start), Seq_strand_minus, SeqLocId(sfp->location));
499 	else
500 		loc = SeqLocIntNew(pbp->start, pbp->stop, Seq_strand_plus, SeqLocId(sfp->location));
501 
502 	pbp->pos = print_protein_for_cds(sfp, pbp->buf, loc, TRUE);
503 	SeqLocFree(loc);
504 	return (pbp->pos != -1);
505 }
506 
507 
508 
buffer_for_feature(Int4 c_left,Int4 c_right,Int4 seq_start,Int4 seq_stop,ValNodePtr fnp_node,Boolean load_codon,ProtBufPtr pbp)509 static Boolean buffer_for_feature(Int4 c_left, Int4 c_right, Int4 seq_start, Int4 seq_stop, ValNodePtr fnp_node, Boolean load_codon, ProtBufPtr pbp)
510 {
511 	FeatNodePtr fnp;
512 	Uint2 itemtype;
513 	CharPtr buf = NULL;
514 	Int2 i=0;
515 	Char symbol;
516 	ValNodePtr curr;
517 	IvalNodePtr inp;
518 	Int4 i_left, i_right;
519 	Int4 f_len;
520 
521 
522 	itemtype = (Uint2)(fnp_node->choice);
523 
524 	if(itemtype!= OBJ_SEQFEAT)
525 		return FALSE;
526 	fnp = (FeatNodePtr) fnp_node->data.ptrvalue;
527 	f_len = seq_stop - seq_start +1;
528 	if(load_codon)
529 		pbp->buf = NULL;
530 	else
531 		pbp->buf = (CharPtr) MemNew((size_t)(f_len +1)*sizeof(Char));
532 	pbp->start = seq_start;
533 	pbp->stop = seq_stop;
534 	pbp->pos = -1;
535 	pbp->load_codon= load_codon;
536 	pbp->cvp_list = NULL;
537 
538 	buf = pbp->buf;
539 	if(buf !=NULL)
540 		MemSet((Pointer)buf,  '~', (size_t)(f_len) * sizeof(Char));
541 	switch(fnp->feattype)/*check symbol for different features*/
542 	{
543 		case FEATDEF_GENE:
544 			symbol = '+';
545 			break;
546 		case FEATDEF_mRNA:
547 			symbol = '^';
548 			break;
549 		case FEATDEF_CDS:
550 			symbol = '$';
551 			break;
552 		default:
553 			symbol = '*';
554 			break;
555 	}
556 	if(fnp->feattype ==FEATDEF_CDS)
557 
558 		GatherItem(fnp->entityID, fnp->itemID, itemtype, (Pointer)(pbp), load_prot_seq);
559 	else
560 	{
561 		if(fnp->interval !=NULL)
562 		{
563 			for(curr = fnp->interval; curr !=NULL; curr = curr->next)
564 			{
565 				inp = (IvalNodePtr) curr->data.ptrvalue;
566 				i_left = inp->gr.left;
567 				i_right = inp->gr.right;
568 				if(!(i_left > c_right || i_right < c_left))
569 				{
570 					i_left = MAX(i_left, c_left);
571 					i_right = MIN(i_right, c_right);
572 					i_left -= c_left;
573 					i_right -=c_left;
574 					for(; i_left<=i_right; ++i_left)
575 						buf[i_left] = symbol;
576 				}
577 			}
578 		}
579 		else
580 		{
581 			i_left = fnp->extremes.left;
582 			i_right = fnp->extremes.right;
583 			if(!(i_left > c_right || i_right < c_left))
584 			{
585 				i_left = MAX(i_left, c_left);
586 				i_right = MIN(i_right, c_right);
587 				i_left -= c_left;
588 				i_right -=c_left;
589 				for(; i_left<=i_right; ++i_left)
590 					buf[i_left] = symbol;
591 			}
592 		}
593 
594 	}
595 	if(buf!=NULL)
596 		buf[f_len]= '\0';
597 	if(pbp->pos == -1)
598 		pbp->pos = ABS(seq_start);
599 
600 	if(pbp->buf != NULL || pbp->cvp_list !=NULL)
601 		return TRUE;
602 	else
603 		return FALSE;
604 }
605 
606 
607 
load_feature_data(ProtBufPtr pbp,FeatNodePtr fnp,Int4 pos,Int4 maxlen,ValNodePtr PNTR fbp_head)608 static Boolean load_feature_data(ProtBufPtr pbp, FeatNodePtr fnp, Int4 pos, Int4 maxlen, ValNodePtr PNTR fbp_head)
609 {
610 	Boolean found;
611 	TextAlignBufPtr fbp;
612 	ValNodePtr curr, pcvp;
613 	CodonVectorPtr cvp;
614 	Boolean load_codon;
615 	CharPtr PNTR codon;
616 	Int2 i;
617 	Int4 f_len;
618 	Char label[100];
619 	CharPtr buf;
620 	Boolean locus = FALSE;
621 
622 	if(pbp == NULL)
623 		return FALSE;
624 	if(pbp->buf == NULL && pbp->cvp_list == NULL)
625 		return FALSE;
626 	load_codon = (pbp->cvp_list !=NULL);
627 	f_len = pbp->stop - pbp->start +1;
628 
629 	found = FALSE;
630 	for(curr = *fbp_head; curr !=NULL; curr = curr->next)
631 	{
632 	   fbp = (TextAlignBufPtr) curr->data.ptrvalue;
633 	   if(fbp->itemID == fnp->itemID)
634 	   {
635 	     if(load_codon)
636 	     {
637 		for(pcvp = pbp->cvp_list; pcvp!=NULL; pcvp= pcvp->next)
638 		{
639 		   cvp = (CodonVectorPtr) pcvp->data.ptrvalue;
640 		   if(cvp->exonCount == fbp->exonCount)
641 		   {
642 			codon = fbp->codon;
643 			for(i =0; i<3; ++i)
644 			{
645 			   if(pos > fbp->f_pos)
646 				make_empty(codon[i] + fbp->f_pos, (Int2)(pos - fbp->f_pos));
647 			   StringCat(codon[i], (cvp->buf[i]+cvp->aa_index));
648 			}
649 			cvp->exonCount = 0;
650 			fbp->f_pos = pos + f_len;
651 		   }
652 		}/*end of for*/
653 	     }
654 	     else
655 	     {
656 		if(fbp->pos == -1)
657 		   fbp->pos = pbp->pos+1;
658 		if(pos > fbp->f_pos)
659 		   make_empty(fbp->buf+fbp->f_pos, (Int2)(pos - fbp->f_pos));
660 		StringCat(fbp->buf, pbp->buf);
661 		fbp->f_pos = pos + f_len;
662 		found = TRUE;
663 	     }
664 	   }
665 	}
666 
667 
668 	if(load_codon)
669 	{
670 	   for(pcvp = pbp->cvp_list; pcvp!=NULL; pcvp= pcvp->next)
671 	   {
672 		cvp = (CodonVectorPtr) pcvp->data.ptrvalue;
673 		if(cvp->exonCount !=0)
674 		{
675 		   fbp = (TextAlignBufPtr) MemNew(sizeof(TextAlignBuf));
676 		   fbp->seqEntityID = fnp->entityID;
677 		   fbp->pos = cvp->dna_pos +1;
678 		   fbp->strand = cvp->strand;
679 		   seqid_name(cvp->sip, label, locus, FALSE);
680 		   fbp->label = StringSave(label);
681 		   fbp->buf = NULL;
682 		   for(i =0; i<3; ++i)
683 		   {
684 		   	buf = (CharPtr) MemNew((size_t)(maxlen+1+1+1) * sizeof(Char));
685 			/*1 for partial start, 1 for partial stop*/
686 			if(pos > 0)
687 				make_empty(buf, (Int2)pos);
688 			StringCat(buf, cvp->buf[i]+cvp->aa_index);
689 			fbp->codon[i] = buf;
690 		   }
691 		   fbp->frame = cvp->frame;
692 		   fbp->f_pos = pos+f_len;
693 		   fbp->exonCount = cvp->exonCount;
694 		   fbp->itemID = fnp->itemID;
695 		   fbp->itemID = fnp->itemID;
696 		   fbp->feattype = fnp->feattype;
697 		   fbp->subtype = fnp->subtype;
698 		   fbp->entityID = fnp->entityID;
699 		   fbp->extra_space = (cvp->aa_index == 0);
700 		   ValNodeAddPointer(fbp_head, 0, fbp);
701 
702 		}
703 	     }
704 	}
705 	else
706 	{
707 	   	if(!found)	/*create a new node*/
708 		{
709 		   fbp = (TextAlignBufPtr) MemNew(sizeof(TextAlignBuf));
710 		   buf = (CharPtr) MemNew((size_t)(maxlen+1) * sizeof(Char));
711 		   if(pos > 0)
712 			make_empty(buf, (Int2)pos);
713 		   StringCat(buf, pbp->buf);
714 		   fbp->seqEntityID = fnp->entityID;
715 		   fbp->f_pos = pos + f_len;
716 		   fbp->itemID = fnp->itemID;
717 		   fbp->buf = buf;
718 		   fbp->pos = pbp->pos+1;
719 		   if(fnp->label !=NULL)
720 			fbp->label = StringSave(fnp->label);
721 		   fbp->strand = fnp->extremes.strand;
722 		   fbp->itemID = fnp->itemID;
723 		   fbp->feattype = fnp->feattype;
724 		   fbp->subtype = fnp->subtype;
725 		   fbp->entityID = fnp->entityID;
726 		   fbp->exonCount = 0;
727 		   ValNodeAddPointer(fbp_head, 0, fbp);
728 		}
729 	}
730 	if(pbp->buf)
731 		MemFree(pbp->buf);
732 	if(pbp->cvp_list)
733 		free_cvp_list(pbp->cvp_list);
734 	return TRUE;
735 }
736 
737 
738 
739 /**************************************************************************
740 *
741 *	collect_feature_buf(fnp_list, g_left, g_right, seq_start, l_pos,
742 *	fbp_head, max_len)
743 *	collect the features to be shown together with the alignment
744 *	fnp_list: a list of FeatNode associated with the current segment
745 *	g_left: the left position
746 *
747 ***************************************************************************/
collect_feature_buf(ValNodePtr fnp_list,Int4 g_left,Int4 g_right,Int4 seq_start,Int4 l_pos,ValNodePtr fbp_head,Int4 maxlen,Boolean is_aa)748 static ValNodePtr collect_feature_buf(ValNodePtr fnp_list, Int4 g_left, Int4 g_right, Int4 seq_start, Int4 l_pos, ValNodePtr fbp_head, Int4 maxlen, Boolean is_aa)
749 {
750 	ProtBuf pb;
751 	FeatNodePtr fnp;
752 	Int4 c_left, c_right;
753 	Int4 pos;
754 	Int4 fseq_start, fseq_stop;	/*map sequence start stop to the feature*/
755 	Int4 f_len;		/*length of the feature*/
756 	Boolean load_codon;
757 
758 	if(fnp_list == NULL)
759 		return fbp_head;
760 
761 
762 	while(fnp_list)
763 	{
764 	   fnp = (FeatNodePtr) fnp_list->data.ptrvalue;
765 	   c_left = fnp->extremes.left;
766 	   c_right = fnp->extremes.right;
767 	   load_codon = (is_aa && fnp->feattype == FEATDEF_CDS);
768 	   if(!(c_left > g_right || c_right < g_left))
769 	   {
770 		if(c_left > g_left)	/*map the seq pos from the graphic pos*/
771 			fseq_start = seq_start + (c_left-g_left);
772 		else
773 			fseq_start = seq_start;
774 		c_left = MAX(c_left, g_left);
775 		c_right = MIN(c_right, g_right);
776 		f_len = c_right - c_left+1;
777 		fseq_stop = fseq_start+f_len-1;
778 
779 		if(c_left > g_left)
780 		   pos = l_pos + (c_left - g_left);
781 		else
782 		   pos = l_pos;
783 
784 		if(buffer_for_feature(c_left, c_right, fseq_start, fseq_stop, fnp_list, load_codon, &pb))
785 
786 			load_feature_data(&pb, fnp, pos, maxlen, &fbp_head);
787 	   }
788 	   fnp_list = fnp_list->next;
789 	}
790 
791 	return fbp_head;
792 }
793 
map_position_by_spacing(Int4 distance,Int4 spacing,Boolean is_head)794 static Int4 map_position_by_spacing(Int4 distance, Int4 spacing, Boolean is_head)
795 {
796 	Int4 pos, left_over;
797 
798 	if(spacing == 1)
799 		return distance;
800 
801 	pos = distance/spacing;
802 	left_over = distance%spacing;
803 
804 	if(left_over == 0 && !is_head)
805 		pos = MAX(pos-1, 0);
806 	else if(left_over == 2 && is_head)
807 		++pos;
808 	return pos;
809 }
810 
add_empty_space(CharPtr buf,Int4 maxlen)811 static void add_empty_space(CharPtr buf, Int4 maxlen)
812 {
813 	Int4 buf_len;
814 
815 	buf_len = StringLen(buf);
816 	if(buf_len < maxlen)
817 		make_empty(buf+buf_len, (Int2)(maxlen-buf_len));
818 }
copy_insertion_bar(CharPtr buf,CharPtr ins_2,Int2 sym_pos,Int4 len)819 static void copy_insertion_bar(CharPtr buf, CharPtr ins_2, Int2 sym_pos, Int4 len)
820 {
821 	Int2 k;
822 
823 	if(buf == NULL || ins_2 == NULL)
824 		return;
825 	add_empty_space(buf, len);
826 	for(k = 0; k<sym_pos; ++k)
827 		if(ins_2[k] == '|' && buf[k] == ' ')
828 			buf[k] = '|';
829 }
830 
get_long_insert_len(Int4 length)831 static Int4 get_long_insert_len(Int4 length)
832 {
833 	Char temp[50];
834 
835 	sprintf(temp, "(length=%ld)", (long) length);
836 	return (StringLen(temp) + 13);
837 }
838 
load_tdp_data(ValNodePtr PNTR head,CharPtr label,CharPtr text,Uint4 itemID,Uint2 entityID,Uint2 seqEntityID,Uint4 bsp_itemID)839 static ValNodePtr load_tdp_data(ValNodePtr PNTR head, CharPtr label, CharPtr text, Uint4 itemID, Uint2 entityID, Uint2 seqEntityID, Uint4 bsp_itemID)
840 {
841 	TextAlignBufPtr tdp;
842 
843 	tdp = (TextAlignBufPtr) MemNew(sizeof(TextAlignBuf));
844 	tdp->pos = -1;
845 	tdp->label = label;
846 	tdp->buf= text;
847 	tdp->itemID = itemID;
848 	tdp->entityID = entityID;
849 	tdp->seqEntityID = seqEntityID;
850 	tdp->bsp_itemID = bsp_itemID;
851 
852 	return ValNodeAddPointer(head, 0, (Pointer)tdp);
853 }
854 
855 /******************************************************************************
856 *
857 *	ProcessTextInsertion(anp, m_left, m_right, bsp)
858 *	convert the insertions that are located within [m_left, m_right] into
859 *	text buffer (a list of TextDrawPtr)
860 *	anp: AlignNodePtr
861 *	m_left, m_right: the current region for selection
862 *	bsp: the BioseqPtr for this anp
863 *
864 *	return a list of TextDrawPtr
865 *
866 ******************************************************************************/
ProcessTextInsertion(AlignNodePtr anp,Int4 m_left,Int4 m_right,BioseqPtr bsp,Int4 line_len,Int1 m_frame)867 static ValNodePtr ProcessTextInsertion(AlignNodePtr anp, Int4 m_left, Int4 m_right, BioseqPtr bsp, Int4 line_len, Int1 m_frame)
868 {
869 	AlignSegPtr asp;
870 	Int4 inslen;		/*length of insertion*/
871 	Int2 insnum;		/*the number of insertions*/
872 	Int2 i, j;
873 	Int4Ptr inslevel;	/*for layout the level of insertions*/
874 	Int4 level;
875 	Int4 inspos;		/*position for insertion*/
876 	Int4 left;
877 	Int4 len;
878 	Int4 last_ins;
879 
880 	CharPtr ins_1;	/* \ symbols for insertions*/
881 	CharPtr ins_2;	/*| symbols for insertion*/
882 	CharPtr ins_seq;
883 	Int4 sym_pos;
884 	Int4 l_pos;
885 	Int4 seq_offset, seq_start, seq_stop;
886 	ValNodePtr head = NULL;
887 	ValNodePtr fbuf_list = NULL, curr;
888 	TextAlignBufPtr fbp;
889 	Int4 g_left, g_right;
890 	Boolean is_aa;
891 	Int4 seq_expand;
892 	Int4 spacing;
893 	Boolean translate;
894 	Uint1 strand;
895 
896 	strand = Seq_strand_plus;
897 	if(anp->seqpos < 0)
898 		strand = Seq_strand_minus;
899 	else if(anp->seqpos == 0 && anp->extremes.strand == Seq_strand_minus)
900 		strand = Seq_strand_minus;
901 	spacing = 1;
902 	if(m_frame > 0)
903 		spacing = 3;
904 	if(m_frame  == -1)
905 	{
906 		translate = TRUE;
907 		seq_expand = 3;
908 	}
909 	else
910 	{
911 		seq_expand = 1;
912 		translate = FALSE;
913 	}
914 	is_aa = (bsp->mol == Seq_mol_aa);
915 	insnum = 0;
916 	for(asp = anp->segs; asp !=NULL; asp = asp->next)
917 	/*checking the insertion numbers*/
918 	{
919 	   if(asp->type == INS_SEG)
920 	   {
921 	   	inspos = asp->ins_pos;
922 		if (inspos >= m_left && inspos<=m_right)
923 		{
924 			++insnum;
925 			asp->line = 0;
926 		}
927 		else
928 			asp->line = -1;
929 	   }
930 	}
931 	if(insnum == 0)
932 		return head;
933 
934 	/*layout the insertions*/
935 	inslevel = (Int4Ptr) MemNew((size_t)(2*insnum) * sizeof(Int4));	/*layout insert*/
936 	level = 0;
937 	len = MAX(m_right - m_left +1, line_len);
938 	for(asp = anp->segs; asp !=NULL; asp = asp->next)
939 	{
940 	   if(asp->type == INS_SEG && asp->line == 0)
941 	   {
942 	   	inspos = asp->ins_pos;
943 		inslen = asp->gr.right/seq_expand;
944 		/* if(inslen > (m_right-m_left+1)) */
945 		if(inslen > len)
946 			inslen = get_long_insert_len(inslen);
947 		inspos -= m_left;
948 		asp->line = find_insert_ypos(&left, inslen, inspos, 0, len-1, inslevel, 2, insnum);
949 		asp->gr.left = left;
950 		level = MAX(asp->line, level);
951 	   }
952 	}
953 	MemFree(inslevel);
954 
955 
956 	/*comput the insertion text*/
957 	for(j = 0; j< (level+1); ++j)
958 	{
959 	   l_pos = 0;
960 	   sym_pos = 0;
961 	   fbuf_list = NULL;
962 	   ins_seq = (CharPtr) MemNew((size_t)(len+1) * sizeof(Char));
963 	   ins_2 = (CharPtr) MemNew((size_t)(len+1) * sizeof(Char));
964 	   if(j == 0)
965 		ins_1 = (CharPtr) MemNew((size_t)(len+1) * sizeof(Char));
966 	   seq_offset = 0;
967 	   for(asp = anp->segs; asp !=NULL; asp = asp->next)
968 	   {
969 	   	if(asp->type == INS_SEG && asp->line >=j)
970 		{
971 
972 			inspos = asp->ins_pos - m_left;
973 			if(inspos > sym_pos)
974 			{
975 			   if(j == 0)	/*the first level*/
976 				make_empty(ins_1+sym_pos, (Int2)(inspos-sym_pos));
977 			   make_empty(ins_2+sym_pos, (Int2)(inspos-sym_pos));
978 			   sym_pos = inspos;
979 			}
980 			if(j == 0)
981 				ins_1[sym_pos] = '\\';
982 			ins_2[sym_pos] = '|';
983 			if(asp->line == j)
984 				last_ins = inspos+1;
985 			++sym_pos;
986 
987 			if(asp->line == j)
988 			{
989 			   seq_start = anp->seqpos + seq_offset;
990 			   seq_stop = seq_start + asp->gr.right -1;
991 			   /* seq_stop = seq_start + map_position_by_spacing(asp->gr.right, spacing, FALSE) * seq_expand + seq_expand -1; */
992 			   if(asp->gr.left > l_pos)
993 			   {
994 				make_empty(ins_seq+l_pos, (Int2)(asp->gr.left-l_pos));
995 				l_pos = asp->gr.left;
996 			   }
997 
998 			   g_left = asp->ins_pos;
999 			   g_right = asp->ins_pos + asp->gr.right -1;
1000 			   /* g_left = asp->gr.left;
1001 			   g_right = g_left + asp->gr.right -1;*/
1002 
1003 			   if((seq_stop - seq_start+1)>len)/*long insertions*/
1004 			   {
1005 		   	      fbuf_list = collect_feature_buf(asp->cnp, g_left, (g_left+4), seq_start, l_pos, fbuf_list, len, is_aa);	/*check the features first*/
1006 		   	      fbuf_list = collect_feature_buf(asp->cnp, g_left, (g_left+4), seq_stop-4, l_pos+8, fbuf_list, len, is_aa);	/*check the features ffirst. 3 is the 3 dots*/
1007 			   }
1008 			   else
1009 			      fbuf_list = collect_feature_buf(asp->cnp, g_left, g_right, seq_start, l_pos, fbuf_list, len, is_aa);
1010 
1011 			   load_text(bsp, seq_start, seq_stop, ins_seq, &l_pos, NULL, (Int2)len, 1, translate, FALSE, NULL, NULL, strand, NULL, 0);
1012 			}
1013 
1014 		}
1015 		if(asp->type == INS_SEG)
1016 			seq_offset += asp->gr.right;
1017 		if(asp->type == DIAG_SEG || asp->type == REG_SEG || asp->type == STD_SEG)
1018 			seq_offset += map_position_by_spacing(asp->gr.right - asp->gr.left +1,
1019 				spacing, TRUE) * seq_expand;
1020 			/* seq_offset += (asp->gr.right - asp->gr.left +1) * seq_expand; */
1021 	   }
1022 
1023 	   ins_2[sym_pos] = '\0';
1024 	   ins_seq[l_pos] = '\0';
1025 	   if(j == 0)
1026 	   {
1027 		ins_1[sym_pos] = '\0';
1028 		load_tdp_data(&head, NULL, ins_1, 0, 0, 0, 0);
1029 	   }
1030 
1031 	   for(curr = head; curr !=NULL; curr = curr->next)
1032 	   /*for(curr = fbuf_list; curr !=NULL; curr = curr->next)*/
1033 	   {
1034 		fbp = (TextAlignBufPtr) curr->data.ptrvalue;
1035 		if(fbp->buf != NULL)
1036 			copy_insertion_bar(fbp->buf, ins_2, (Int2)sym_pos, len);
1037 		else
1038 		{
1039 			for(i =0; i<3; ++i)
1040 				copy_insertion_bar(fbp->codon[i], ins_2, (Int2)sym_pos, len);
1041 		}
1042 	   }
1043 
1044 	   copy_insertion_bar(ins_seq, ins_2, (Int2)sym_pos, len);
1045 	   load_tdp_data(&head, NULL, ins_2, 0, 0, 0, 0);
1046 	   load_tdp_data(&head, NULL, ins_seq, anp->itemID, anp->entityID, anp->seq_entityID, anp->bsp_itemID);
1047 	   ValNodeLink(&head, fbuf_list);
1048 	   fbuf_list = head;
1049 	}
1050 
1051 	return head;
1052 
1053 }
1054 
1055 /***********************************************************************
1056 *
1057 *	ProcessTextAlignNode(anp, m_left, m_right, p_stop, m_buf, locus)
1058 *	Process the AlignNode to make a list of text buffer on the
1059 *	current region
1060 *	anp: AlignNodePtr
1061 *	m_left, m_right: the region on the alignment. Mapped in response
1062 *	to anp->extremes.left, and anp->extremes.right
1063 *	p_stop: the stop position of the previous segment. Used to label
1064 *	the position of a line composed entirely of gaps
1065 *	m_buf: buffer for the master sequence. Used to compare mismatches
1066 *	locus: if TRUE, show the locus name of the alignment
1067 *
1068 *	frame: frame >0, those are the hits from blastx. So, the
1069 *	protein need to be displayed to the proper frame
1070 *	frame 1-3: match to the plus strand of the master
1071 *	frame 4-6: match to the minus strand of the master
1072 *	frame 0:   no tranlsation, no frame match to the master
1073 *	frame -1:  translate the DNA sequence
1074 *	option:    option for display the alignments
1075 *   matrix:	   the protein alignment matrix
1076 *
1077 *
1078 ************************************************************************/
1079 
ProcessTextAlignNode2(AlignNodePtr anp,Int4 m_left,Int4 m_right,Int4Ptr p_stop,CharPtr m_buf,Int4 line_len,Int1 m_frame,Uint4 option,Int4Ptr PNTR matrix,Int4Ptr PNTR posMatrix,Int4 q_start)1080 NLM_EXTERN ValNodePtr ProcessTextAlignNode2(AlignNodePtr anp, Int4 m_left, Int4 m_right, Int4Ptr p_stop, CharPtr m_buf, Int4 line_len, Int1 m_frame, Uint4 option, Int4Ptr PNTR matrix, Int4Ptr PNTR posMatrix, Int4 q_start)
1081 {
1082     Int4 maxlen;
1083     Int4 g_left, g_right;
1084     Int4 len;		/*length of the segment*/
1085     CharPtr l_seq;	/*the buffer for the sequence*/
1086     Int2Ptr matrix_val;	/*value of each residue in alignment matrix*/
1087     Int4 l_pos;		/*the start position on the line*/
1088     Int4 offset;
1089     BioseqPtr bsp;
1090     SeqEntryPtr sep;
1091 
1092     AlignSegPtr asp;
1093     Int4 seq_offset, off_len;
1094     Int4 seq_start, seq_stop;
1095     Int4 s_start, s_stop;	/*for marking the position on one line*/
1096     CharPtr str;
1097 
1098     ValNodePtr head = NULL, ins_node;
1099     ValNodePtr fbuf_list = NULL;
1100     TextAlignBufPtr tdp;
1101     Boolean is_aa;
1102     Int4 spacing;
1103     Boolean translate;
1104     Int4 seq_expand;
1105     Boolean show_mismatch;
1106     Boolean set_matrix;
1107     Uint1 strand;
1108 
1109 
1110     if(m_frame > 6 || m_frame < -1)	/*check the m_frame. -1 for translate the hits*/
1111         return NULL;
1112 
1113 
1114     g_left = anp->extremes.left;
1115     g_right = anp->extremes.right;
1116     if(m_left > g_right || m_right < g_left)/*no overlap*/ {
1117         if(m_frame > 0) {
1118             if(anp->m_frame != m_frame)
1119                 return NULL;
1120             if(m_buf == NULL)
1121                 return NULL;
1122         }
1123         if(option & TXALIGN_BLUNT_END) {
1124             maxlen = m_right - m_left +1;
1125             l_seq = (CharPtr) MemGet((size_t)(maxlen+1)*sizeof(Char), MGET_ERRPOST);
1126             MemSet((Pointer)l_seq, '-',(size_t)(maxlen) * sizeof(Char));
1127             l_seq[maxlen] = '\0';
1128             tdp = (TextAlignBufPtr) MemNew(sizeof(TextAlignBuf));
1129             tdp->pos = *p_stop;
1130             tdp->strand = anp->extremes.strand;
1131             tdp->label = StringSave(anp->label);
1132             tdp->buf = l_seq;
1133             tdp->matrix_val = NULL;
1134             tdp->itemID = anp->itemID;
1135             tdp->feattype = 0;
1136             tdp->subtype = 0;
1137             tdp->entityID = anp->entityID;
1138             tdp->seqEntityID = anp->seq_entityID;
1139             tdp->bsp_itemID = anp->bsp_itemID;
1140             ValNodeAddPointer(&head, 0, tdp);
1141             return head;
1142         }
1143         else
1144             return NULL;
1145     }
1146 
1147     strand = Seq_strand_plus;
1148     if(anp->seqpos < 0)
1149         strand = Seq_strand_minus;
1150     else if(anp->seqpos == 0 && anp->extremes.strand == Seq_strand_minus)
1151         strand = Seq_strand_minus;
1152 
1153     l_pos = 0;
1154     spacing = 1;
1155     offset = 0;
1156     if(m_frame > 0) {
1157         if(anp->m_frame != m_frame)
1158             return NULL;
1159         if(m_buf == NULL)
1160             return NULL;
1161         /*add the empty space to reflect the reading frame*/
1162         for(str = m_buf; *str != '\n' && *str != '\0'; ++str) {
1163             if(IS_WHITESP(*str))
1164                 ++offset;
1165             else
1166                 break;
1167         }
1168         spacing = 3;
1169     }
1170     if(m_left < g_left) {
1171         l_pos += (g_left - m_left);
1172         if(m_frame > 0)
1173             ++l_pos;
1174     } else
1175         l_pos += offset;
1176 
1177     bsp = BioseqLockById(anp->sip);
1178     if(bsp == NULL)
1179         return NULL;
1180     is_aa = (bsp->mol == Seq_mol_aa);
1181     if((m_frame > 0 && !is_aa) || (m_frame == -1 && is_aa)) {
1182         BioseqUnlock(bsp);
1183         return NULL;
1184     }
1185     if(anp->seq_entityID == 0) {
1186         sep = SeqEntryFind(bsp->id);
1187         anp->seq_entityID = SeqMgrGetEntityIDForSeqEntry(sep);
1188     }
1189     if(anp->bsp_itemID == 0)
1190         anp->bsp_itemID = get_bioseq_itemID(bsp, anp->seq_entityID);
1191 
1192     if(m_frame == -1) {
1193         translate = TRUE;
1194         seq_expand = 3;
1195     } else {
1196         translate = FALSE;
1197         seq_expand = 1;
1198     }
1199 
1200     maxlen = m_right - m_left +1;
1201     l_seq = (CharPtr) MemGet((size_t)(maxlen+1)*sizeof(Char),
1202                              MGET_ERRPOST);
1203     if(option & TXALIGN_BLUNT_END)
1204         MemSet((Pointer)l_seq, '-',(size_t)maxlen * sizeof(Char));
1205     else
1206         MemSet((Pointer)l_seq, ' ',(size_t)maxlen * sizeof(Char));
1207     l_seq[maxlen] = '\0';
1208 
1209 
1210     set_matrix = FALSE;
1211     if(m_frame == 0 && bsp->mol != Seq_mol_aa) { /*DNA-DNA alignment*/
1212         if(option & TXALIGN_MATRIX_VAL)
1213             set_matrix = TRUE;
1214     } else {
1215         if(matrix != NULL && (option & TXALIGN_MATRIX_VAL))
1216             set_matrix = TRUE;
1217     }
1218     if(set_matrix) {
1219         matrix_val = (Int2Ptr) MemGet((size_t)(maxlen+1)*sizeof(Int2), MGET_ERRPOST);
1220         MemSet((Pointer)matrix_val, 0,(size_t)maxlen * sizeof(Int2));
1221     } else
1222         matrix_val = NULL;
1223     show_mismatch = (Boolean)(option & TXALIGN_MISMATCH);
1224 
1225 
1226     /*process  the GAPs and the DIAGs segs*/
1227     s_start = -1;
1228     s_stop = -1;
1229     off_len = 0;
1230     for(asp = anp->segs; asp !=NULL; asp = asp->next) {
1231         g_left = asp->gr.left;
1232         g_right = asp->gr.right;
1233         if(!(g_left > m_right || g_right < m_left)) {
1234             switch(asp->type) {
1235             case GAP_SEG:
1236                 g_left = MAX(m_left, g_left);
1237                 g_right = MIN(m_right, g_right);
1238                 len = g_right - g_left +1;
1239                 MemSet((Pointer)(l_seq +l_pos), '-',(size_t)len * sizeof(Char));
1240                 l_pos += len;
1241                 break;
1242 
1243             case REG_SEG:
1244             case DIAG_SEG:
1245             case STD_SEG:	/* Std-seg only works if the m_frame != 0 */
1246                 if(m_left > g_left)
1247                     len = off_len + m_left - g_left;
1248                 else
1249                     len = off_len;
1250                 seq_offset = map_position_by_spacing(len, spacing, TRUE) * seq_expand;
1251                 seq_start = anp->seqpos + seq_offset;
1252                 g_left = MAX(m_left, g_left);
1253                 g_right = MIN(m_right, g_right);
1254                 len += (g_right - g_left);
1255                 seq_stop = anp->seqpos + map_position_by_spacing(len, spacing, FALSE) * seq_expand + seq_expand -1;
1256 
1257                 if(seq_start <= seq_stop) {	/*the order of start and stop is reversed*/
1258                     if(s_start == -1)	/*record the end point*/
1259                         s_start = ABS(seq_start);
1260                     s_stop = ABS(seq_stop);
1261 
1262                     if(m_frame == 0)
1263                         fbuf_list = collect_feature_buf(asp->cnp, g_left, g_right, seq_start, l_pos, fbuf_list, maxlen, is_aa);	/*check the features first*/
1264                     load_text(bsp, seq_start, seq_stop, l_seq, &l_pos, m_buf, (Int2)maxlen,
1265                               (Int2)spacing, translate, show_mismatch, matrix_val, matrix, strand, posMatrix, q_start);
1266 
1267                 }
1268                 break;
1269 
1270             default:
1271                 break;
1272             }
1273         }
1274         if(asp->type == INS_SEG)
1275             off_len += (asp->gr.right * spacing);
1276         if(asp->type == REG_SEG || asp->type == DIAG_SEG || asp->type == STD_SEG)
1277             off_len+=(asp->gr.right - asp->gr.left +1);
1278     }
1279 
1280 
1281     /*the first segment in the layout is a gap segment*/
1282     if(s_start == -1)
1283         s_start = *p_stop;
1284     if(s_stop == -1)	/*gap across the entire region*/
1285         s_stop = *p_stop;
1286     *p_stop = s_stop	/*update the stop value*/;
1287     tdp = (TextAlignBufPtr) MemNew(sizeof(TextAlignBuf));
1288     tdp->pos = s_start+1;
1289     tdp->strand = anp->extremes.strand;
1290     tdp->label = StringSave(anp->label);
1291     tdp->buf = l_seq;
1292     tdp->matrix_val = matrix_val;
1293     tdp->itemID = anp->itemID;
1294     tdp->feattype = 0;
1295     tdp->subtype = 0;
1296     tdp->entityID = anp->entityID;
1297     tdp->seqEntityID = anp->seq_entityID;
1298     tdp->bsp_itemID = anp->bsp_itemID;
1299     ValNodeAddPointer(&head, 0, tdp);
1300     ValNodeLink(&head, fbuf_list);
1301 
1302     ins_node = ProcessTextInsertion(anp, m_left, m_right, bsp, line_len, m_frame);
1303     ValNodeLink(&head, ins_node);
1304     BioseqUnlock(bsp);
1305     return head;
1306 }
1307 
ProcessTextAlignNode(AlignNodePtr anp,Int4 m_left,Int4 m_right,Int4Ptr p_stop,CharPtr m_buf,Int4 line_len,Int1 m_frame,Uint4 option,Int4Ptr PNTR matrix)1308 NLM_EXTERN ValNodePtr ProcessTextAlignNode(AlignNodePtr anp, Int4 m_left, Int4 m_right, Int4Ptr p_stop, CharPtr m_buf, Int4 line_len, Int1 m_frame, Uint4 option, Int4Ptr PNTR matrix)
1309 {
1310     return ProcessTextAlignNode2(anp, m_left, m_right, p_stop, m_buf, line_len, m_frame, option, matrix, NULL, 0);
1311 }
1312 
clean_annot_for_anp(ValNodePtr PNTR head)1313 NLM_EXTERN ValNodePtr clean_annot_for_anp(ValNodePtr PNTR head)
1314 {
1315 	ValNodePtr prev, next, anp_list;
1316 
1317 	prev = NULL;
1318 	anp_list = *head;
1319 	while(anp_list)
1320 	{
1321 		next = anp_list->next;
1322 		if(anp_list->choice == OBJ_SEQANNOT)
1323 		{
1324 			if(prev == NULL)
1325 				*head = next;
1326 			else
1327 				prev->next = next;
1328 			anp_list->next = NULL;
1329 			FreeAlignNode(anp_list);
1330 		}
1331 		else
1332 			prev = anp_list;
1333 		anp_list = next;
1334 	}
1335 
1336 	return (*head);
1337 }
1338 
1339 
1340 
1341 /***********************************************************************
1342 *
1343 *	FreeFeatureList(list)
1344 *	free a list of FeatNode
1345 *
1346 ***********************************************************************/
FreeFeatureList(ValNodePtr list)1347 NLM_EXTERN ValNodePtr FreeFeatureList  (ValNodePtr list)
1348 {
1349 	FeatNodePtr  fnp;
1350 	ValNodePtr   next;
1351 
1352  	while (list != NULL)
1353  	{
1354 		next = list->next;
1355 		fnp = list->data.ptrvalue;
1356 		if (fnp != NULL)
1357 		{
1358 			ValNodeFreeData (fnp->interval);
1359 			MemFree (fnp->label);
1360 			MemFree(fnp->pos_label);
1361 			if(fnp->supress_node != NULL)	/*hidden features*/
1362 				FreeFeatureList(fnp->supress_node);
1363 			MemFree (fnp);
1364 		}
1365 		MemFree (list);
1366 
1367 		list = next;
1368 	}
1369 	return NULL;
1370 }
1371 
1372 /*********************************************************************
1373 *
1374 *	extract_node_list(head, itemType, entityID, feattype, subtype,
1375 *	label_type)
1376 *	extract a list of featnode from head which will have the
1377 *	selected itemType, entityID, feattye, subtype, label_type.
1378 *	set values to 0 if it is not considered in the selection
1379 *
1380 *********************************************************************/
do_collect(ValNodePtr vnp,Uint1 itemType,Uint2 entityID,Uint1 feattype,Uint1 label_type)1381 static Boolean do_collect(ValNodePtr vnp, Uint1 itemType, Uint2 entityID, Uint1 feattype, Uint1 label_type)
1382 {
1383 	Boolean is_num;		/*is the gene mark a number*/
1384 	FeatNodePtr fnp;
1385 
1386 	if(vnp->choice != itemType)
1387 		return FALSE;
1388 
1389 	fnp = (FeatNodePtr)(vnp->data.ptrvalue);
1390 	if(fnp == NULL)
1391 		return FALSE;
1392 
1393 	if(entityID !=0)
1394 		if(fnp->entityID !=entityID)
1395 			return FALSE;
1396 
1397 
1398 	if(itemType == OBJ_SEQFEAT)
1399 	{
1400 		if((feattype == 0) || (fnp->feattype == feattype))
1401 		{
1402 			if(label_type == ALL_LABEL)
1403 				return TRUE;
1404 			is_num = IS_NUM_GENE(fnp->label);
1405 			if(label_type == STR_LABEL)
1406 				return (is_num == FALSE);
1407 			if(label_type == NUM_LABEL)
1408 				return (is_num == TRUE);
1409 		}
1410 		else
1411 			return FALSE;
1412 	}
1413 
1414 	return TRUE;
1415 
1416 }
1417 
1418 
1419 
extract_node_list(ValNodePtr PNTR head,Uint1 itemType,Uint2 entityID,Uint1 feattype,Uint1 label_type)1420 NLM_EXTERN ValNodePtr extract_node_list(ValNodePtr PNTR head, Uint1 itemType, Uint2 entityID, Uint1 feattype, Uint1 label_type)
1421 {
1422 	ValNodePtr vnp, prev, list, next;
1423 
1424 
1425 	list = NULL;
1426 	prev = NULL;
1427 	vnp = *head;
1428 	while(vnp)
1429 	{
1430 		next = vnp->next;
1431 		if(do_collect(vnp, itemType, entityID, feattype, label_type))
1432 		{
1433 			if(prev == NULL)
1434 				*head = vnp->next;
1435 			else
1436 				prev->next = vnp->next;
1437 			vnp->next = NULL;
1438 			ValNodeLink(&list, vnp);
1439 		}
1440 		else
1441 			prev = vnp;
1442 		vnp = next;
1443 	}
1444 
1445 	return list;
1446 
1447 }
1448 
extract_lollipop_feature(ValNodePtr PNTR head,Int4 scale,BoolPtr lolli_feature)1449 NLM_EXTERN ValNodePtr extract_lollipop_feature(ValNodePtr PNTR head, Int4 scale, BoolPtr lolli_feature)
1450 {
1451 	ValNodePtr vnp, prev, list, next;
1452 	FeatNodePtr fnp;
1453 	Boolean extract = FALSE;
1454 
1455 
1456 	list = NULL;
1457 	prev = NULL;
1458 	vnp = *head;
1459 	while(vnp)
1460 	{
1461 		next = vnp->next;
1462 		extract = FALSE;
1463 		if(vnp->choice == OBJ_SEQFEAT)
1464 		{
1465 			fnp = vnp->data.ptrvalue;
1466 			if((fnp->extremes.right - fnp->extremes.left +1) <= scale)
1467 				extract = TRUE;
1468 			else if(lolli_feature != NULL)
1469 				extract = lolli_feature[fnp->feattype];
1470 		}
1471 		if(extract)
1472 		{
1473 			if(prev == NULL)
1474 				*head = vnp->next;
1475 			else
1476 				prev->next = vnp->next;
1477 			vnp->next = NULL;
1478 			ValNodeLink(&list, vnp);
1479 		}
1480 		else
1481 			prev = vnp;
1482 		vnp = next;
1483 	}
1484 
1485 	return list;
1486 
1487 }
1488 
1489 /*deside whether the alignment is of different molecules */
get_alignment_type(AnnotInfoPtr annot_info)1490 NLM_EXTERN Uint1 get_alignment_type(AnnotInfoPtr annot_info)
1491 {
1492 	if(annot_info->blast_type == ALIGN_BLASTX)
1493 		return ALIGN_DNA_TO_PROT;
1494 	if(annot_info->blast_type == ALIGN_TBLASTN)
1495 		return ALIGN_PROT_TO_DNA;
1496         if(annot_info->blast_type == ALIGN_PSITBLASTN)
1497                 return ALIGN_PROT_TO_DNA;
1498 	if(annot_info->blast_type == ALIGN_TBLASTX)
1499 		return ALIGN_TDNA_TO_TDNA;
1500 	return 0;
1501 }
1502 
1503 
1504 /*********************************************************************
1505 *
1506 *	FreeAlignNode(list)
1507 *	free a list of AlignNodePtr
1508 *
1509 *********************************************************************/
FreeAlignNode(ValNodePtr list)1510 NLM_EXTERN ValNodePtr FreeAlignNode(ValNodePtr list)
1511 {
1512 	AlignNodePtr anp;
1513 	AlignSegPtr asp, aspnext;
1514 	ValNodePtr   next;
1515 	AlignBlockPtr abp, abpnext;
1516 	AnnotInfoPtr annot_info;
1517 
1518  	while (list != NULL)
1519  	{
1520 		next = list->next;
1521 		if(list->choice == OBJ_SEQANNOT)
1522 		{
1523 			annot_info = list->data.ptrvalue;
1524 			MemFree(annot_info);
1525 		}
1526 		else
1527 		{
1528 			anp = list->data.ptrvalue;
1529 			if (anp != NULL)
1530 			{
1531 				asp = anp->segs;
1532 				while(asp !=NULL)
1533 				{
1534 					aspnext = asp->next;
1535 					asp->next = NULL;
1536 					if(asp->cnp != NULL)
1537 						FreeFeatureList(asp->cnp);
1538 					if(asp->mismatch)
1539 						ValNodeFree(asp->mismatch);
1540 					MemFree(asp);
1541 					asp = aspnext;
1542 				}
1543 				abp = anp->blocks;
1544 				while(abp != NULL)
1545 				{
1546 					abpnext = abp->next;
1547 					MemFree(abp);
1548 					abp = abpnext;
1549 				}
1550 				if(anp->pop_sap !=NULL)
1551 					SeqAnnotFree(anp->pop_sap);
1552 				SeqIdFree(anp->sip);
1553 				MemFree (anp->label);
1554 				MemFree (anp->clone_id);
1555 				MemFree(anp);
1556 			}
1557 		}
1558 		MemFree (list);
1559 
1560 		list = next;
1561 	}
1562 	return NULL;
1563 }
1564 
1565 
1566 
1567 
1568 /***********************************************************************
1569 *
1570 * 	CollectSegmentSeq(bsp, slp, seqID, offset, head)
1571 *		collect the segments in Bioseq
1572 *	bsp: Bioseq
1573 *	slp: the location on bsp to be collected
1574 *	seqID: the order of bsp in the current list
1575 *	offset: the offset to the graphic
1576 *	head: the head of the previous list
1577 *	return the head of new list
1578 *	if bsp is a segmented sequence, the corresponding segments are recorded
1579 *	in inp. Otherwise there is only one inp for slp.
1580 *
1581 ************************************************************************/
1582 
1583 
1584 typedef struct collectheader{	/*for collecting data of a sequence display*/
1585 	CollectSeqOptionPtr csop;	/*option for the sequences+features*/
1586 	ValNodePtr features;		/*a list of FeatNode for storing the feature data*/
1587 	ValNodePtr prev_feat;		/*previous node, for speed it up */
1588 	CollectAlignOptionPtr caop;	/*option for the alignment*/
1589 	ValNodePtr aligns;			/*a list of AlignNode for storing alignment data*/
1590 	ValNodePtr prev_align;		/*the previous node for alignment*/
1591 	SeqLocPtr slp;				/*target Seq-loc*/
1592 	SeqIdPtr maybe_mapid;			/*a possible mapid*/
1593 
1594 	ObjMgrPtr omp;				/*for save some space in the collection*/
1595 	Char thislabel[101];
1596 	Char ftype[101];
1597 	Uint2 subtype;
1598 	Int2 filter_level;
1599 	GeneDataPtr gdata;
1600 	Uint2 priority;
1601 	Boolean take_all_annot;		/*take everything in a Seq-annot*/
1602 	Boolean load_align;
1603 	Boolean skip_feature;
1604 	Uint1 index;
1605 	Char annotDB[21];
1606 	Boolean is_lod_score;
1607 }CollectHeader, PNTR CollectHeaderPtr;
1608 
1609 
1610 
link_data_for_collect(ValNodePtr PNTR head,ValNodePtr PNTR prev,Pointer data,Uint1 type)1611 static void link_data_for_collect (ValNodePtr PNTR head, ValNodePtr PNTR prev, Pointer data, Uint1 type)
1612 {
1613 	ValNodePtr curr;
1614 
1615 	curr = ValNodeNew(NULL);
1616 	curr->choice = type;
1617 	curr->data.ptrvalue = data;
1618 
1619 	if(*prev == NULL)
1620 		*head = curr;
1621 	else
1622 		(*prev)->next = curr;
1623 
1624 	*prev = curr;
1625 }
1626 
get_last_node(ValNodePtr head)1627 static ValNodePtr get_last_node (ValNodePtr head)
1628 {
1629 	if(head == NULL)
1630 		return NULL;
1631 
1632 	while(head->next != NULL)
1633 		head = head->next;
1634 	return head;
1635 }
1636 
CreateFeatNode(ValNodePtr PNTR f_head,ValNodePtr PNTR prev,Uint2 itemType,Uint4 itemID,Uint2 entityID,Uint2 feattype)1637 static FeatNodePtr CreateFeatNode (ValNodePtr PNTR f_head, ValNodePtr PNTR prev, Uint2 itemType, Uint4 itemID, Uint2 entityID, Uint2 feattype)
1638 {
1639 	FeatNodePtr  fnp;
1640 
1641 	fnp = MemNew (sizeof (FeatNode));
1642 	fnp->itemID = itemID;
1643 	fnp->entityID = entityID;
1644 	fnp->feattype = (Uint1)feattype;
1645 	link_data_for_collect(f_head, prev, (Pointer)fnp, (Uint1)(itemType));
1646 	return fnp;
1647 }
1648 
collect_feature_label(Uint1 format)1649 static Boolean collect_feature_label(Uint1 format)
1650 {
1651 	return (format <=OM_LABEL_SUMMARY);
1652 }
1653 
collect_sequence_label(Uint1 format)1654 static Boolean collect_sequence_label(Uint1 format)
1655 {
1656 	return (format >= PRINTID_FASTA_SHORT && format <=PRINTID_REPORT);
1657 }
1658 
1659 /*#####################################################################
1660 #
1661 #	functions related to the collection of the features of alignment
1662 #
1663 #####################################################################*/
1664 
1665 
1666 /****************************************************************
1667 *
1668 *	satcollfunc()
1669 *	callback function for collecting features on Sequence
1670 *	alignment. It recalculates the feature intervals based on
1671 *	the intervals in the aligned segments
1672 *
1673 ****************************************************************/
1674 typedef struct alignfeat
1675 {
1676 	ObjMgrPtr omp;
1677 	AlignNodePtr anp;
1678 	CollectSeqOptionPtr csop;
1679 	Int2 filter_level;
1680 	Boolean all_feature;
1681 }AlignFeat, PNTR AlignFeatPtr;
1682 
is_powerblast_feature(SeqAnnotPtr annot)1683 static Boolean is_powerblast_feature(SeqAnnotPtr annot)
1684 {
1685 	ValNodePtr desc;
1686 
1687 	if(annot->type != 1)
1688 		return FALSE;
1689 	for(desc = annot->desc; desc != NULL; desc = desc->next)
1690 	{
1691 		if(desc->choice == Annot_descr_name)
1692 		{
1693 			if(StringICmp(desc->data.ptrvalue, "powblast") == 0)
1694 				return TRUE;
1695 			if(StringICmp(desc->data.ptrvalue, "powerblast") == 0)
1696 				return TRUE;
1697 			/*powerBlast feature*/
1698 			if(StringNCmp(desc->data.ptrvalue, "PB:", 3) == 0)
1699 				return TRUE;
1700 		}
1701 	}
1702 	return FALSE;
1703 }
1704 
1705 
satcollfunc(GatherContextPtr gcp)1706 static Boolean satcollfunc(GatherContextPtr gcp)
1707 {
1708 	SeqFeatPtr sfp;
1709 	AlignFeatPtr afp;
1710 	CollectSeqOptionPtr csop;
1711 
1712 	AlignNodePtr anp;
1713 	Uint2 feat_subtype;	/*types defined by objfdef.h*/
1714 	SeqLocPtr slp = NULL;
1715 	Char label[101];
1716 	ObjMgrTypePtr omtp;
1717 	IvalNodePtr new;
1718 	FeatNodePtr fnp;
1719 	AlignSegPtr asp;
1720 	Int4 current_pos;
1721 	Uint1 strand;
1722 	Int4 seglen;
1723 	SeqLocPtr head;
1724 	Int2 label_size;
1725 	Int4 left, right, e_left, e_right;
1726 	Int4 i_left, i_right;
1727 	Int4 ins_len, gap_len;
1728 	GatherRangePtr grp;
1729 	Int2 i;
1730 	ValNodePtr prev;
1731 
1732 	afp= (AlignFeatPtr)(gcp->userdata);
1733 	if(afp == NULL || afp->csop == NULL)
1734 		return FALSE;
1735 
1736 	if(gcp->thistype == OBJ_SEQANNOT)
1737 	{
1738 		afp->all_feature = is_powerblast_feature((SeqAnnotPtr)(gcp->thisitem));
1739 		return TRUE;
1740 	}
1741 	if(gcp->thistype != OBJ_SEQFEAT)
1742 		return TRUE;
1743 
1744 	if(afp->filter_level == gcp->seglevel+1)
1745 		return TRUE;
1746 	csop = afp->csop;
1747 	label_size = MIN(100, csop->label_size);
1748 	if(csop->features == NULL && afp->all_feature == FALSE)
1749 		return FALSE;
1750 
1751 	omtp=ObjMgrTypeFind(afp->omp, OBJ_SEQFEAT, NULL, NULL);
1752 	if(omtp == NULL)
1753 		return TRUE;
1754 
1755 	feat_subtype = 0;
1756 	if(omtp->subtypefunc !=NULL)
1757 		feat_subtype =  (*(omtp->subtypefunc)) (gcp->thisitem);
1758 	if((afp->all_feature == FALSE) &&
1759 		(csop->features[feat_subtype] == FALSE))	/*do not collect the current feature*/
1760 		return TRUE;
1761 
1762 
1763 	anp = afp->anp;
1764 	current_pos = anp->seqpos;
1765 	if(anp->seqpos < 0)
1766 		strand = Seq_strand_minus;
1767 	else
1768 		strand = Seq_strand_plus;
1769 	sfp = gcp->thisitem;
1770 	label[0] = '\0';
1771 	if(collect_feature_label(csop->flabel_format[feat_subtype]))
1772 		if(omtp->labelfunc !=NULL)
1773 			(*(omtp->labelfunc))(sfp, label, label_size, csop->flabel_format[feat_subtype]);
1774 
1775 	/*map to the location of aligned segs*/
1776 	if(gcp->product)	/*for protein sequence alignment*/
1777 		head = sfp->product;
1778 	else
1779 		head = sfp->location;
1780 	left = anp->extremes.left;
1781 	ins_len = 0;
1782 	gap_len = 0;
1783 	e_left = gcp->extremes.left;
1784 	e_right = gcp->extremes.right;
1785 	for(asp = anp->segs; asp !=NULL; asp = asp->next)
1786 	{
1787 		if(asp->type != GAP_SEG)
1788 		{
1789 			prev = get_last_node (asp->cnp);
1790 			if(asp->type == INS_SEG)
1791 			{
1792 				seglen = asp->gr.right;
1793 				/*ins_len += seglen;*/
1794 			}
1795 			else
1796 				seglen = asp->gr.right - asp->gr.left +1;
1797 			right = left + seglen -1;
1798 			if(!(left > e_right || right < e_left))
1799 			{
1800 				fnp = CreateFeatNode (&(asp->cnp), &prev, OBJ_SEQFEAT, gcp->itemID, gcp->entityID, feat_subtype);
1801 				fnp->extremes.left = MAX(left, e_left)  + gap_len;
1802 				fnp->extremes.right = MIN(right, e_right) + gap_len;
1803 				fnp->extremes.left -=ins_len;
1804 				fnp->extremes.right -= ins_len;
1805 				fnp->extremes.strand = gcp->extremes.strand;
1806 				if(label[0] != '\0')
1807 					fnp->label = StringSave(label);
1808 				grp = gcp->rdp;
1809 				for(i=0; (grp!=NULL) && i<gcp->num_interval; ++i)
1810 				{
1811 					i_left = grp->left;
1812 					i_right = grp->right;
1813 					if(!(left > i_right || right < i_left))
1814 					{
1815 						new = MemNew(sizeof(IvalNode));
1816 						new->gr.left = MAX(left, i_left) - ins_len + gap_len;
1817 						new->gr.right = MIN(right, i_right) - ins_len + gap_len;
1818 						new->gr.strand = grp->strand;
1819 						ValNodeAddPointer(&(fnp->interval), 0, new);
1820 					}
1821 					++grp;
1822 				}
1823 			}
1824 			left = right +1;
1825 			if(asp->type == INS_SEG)
1826 				ins_len += seglen;
1827 		}
1828 		else
1829 			gap_len += (asp->gr.right - asp->gr.left +1);
1830 	}
1831 	return TRUE;
1832 }
1833 
1834 
1835 /******************************************************************
1836 *
1837 *	CollectFeatureForAlignNode(slp, anp, csop)
1838 *	collect feature for the alignment
1839 *	slp: the target Seq-loc
1840 *	anp: the AlignNode belong to the target Seq-loc
1841 *	csop: the option for gathering the features
1842 *
1843 ******************************************************************/
CollectFeatureForAlignNode(SeqLocPtr slp,AlignNodePtr anp,CollectSeqOptionPtr csop)1844 NLM_EXTERN Boolean CollectFeatureForAlignNode(SeqLocPtr slp, AlignNodePtr anp, CollectSeqOptionPtr csop)
1845 {
1846 	GatherScope gs;
1847 	AlignFeat af;
1848 	BioseqPtr bsp;
1849 
1850 	if(slp == NULL || anp == NULL || csop == NULL)
1851 		return FALSE;
1852 
1853 	if(anp->seq_entityID == 0)
1854 		return FALSE;
1855 	bsp = BioseqLockById(SeqLocId(slp));
1856 
1857 
1858 	MemSet((Pointer)&gs, 0, sizeof (GatherScope));
1859 	gs.get_feats_location = TRUE;
1860 	gs.get_feats_product =( bsp->mol == Seq_mol_aa);
1861 	MemSet((Pointer)(gs.ignore), (int)TRUE, (size_t)(OBJ_MAX)*sizeof(Boolean));
1862 
1863 	gs.ignore[OBJ_SEQANNOT] = FALSE;
1864 	gs.ignore[OBJ_SEQFEAT] = FALSE;
1865 
1866 	gs.nointervals = FALSE;	/*need to recalculate the intervals*/
1867 	/* gs.seglevels = 1;
1868 	gs.seglevels = 1;
1869 	gs.stop_on_annot = TRUE;*/
1870 	gs.ignore_top = FALSE;
1871 	gs.currlevel = 0;
1872 	gs.offset = anp->extremes.left;
1873 	gs.target = slp;
1874 
1875 	af.anp = anp;
1876 	af.csop = csop;
1877 	af.omp = ObjMgrGet();
1878 	af.filter_level = 0;
1879 
1880 	GatherEntity(anp->seq_entityID, (Pointer)(&af), satcollfunc, &gs);
1881 	BioseqUnlock(bsp);
1882 	return TRUE;
1883 }
1884 
1885 
1886 /******************************************************************
1887 *
1888 *	CollectFeatureForAlign(slp, anp, featureOrder, groupOrder)
1889 *	collect feature for the alignment
1890 *	slp: the target Seq-loc
1891 *	anp: the AlignNode belong to the target Seq-loc
1892 *	featureOrder: the order of features
1893 *	groupOrder: the order of the groups
1894 *	it takes the anp->seq_entityID and searches for the features
1895 *
1896 ******************************************************************/
1897 
CollectAlignFeature(SeqLocPtr slp,AlignNodePtr anp,Uint1Ptr featureOrder,Uint1Ptr groupOrder,Uint1Ptr flabel_format)1898 static Boolean CollectAlignFeature(SeqLocPtr slp, AlignNodePtr anp, Uint1Ptr featureOrder, Uint1Ptr groupOrder, Uint1Ptr flabel_format)
1899 {
1900 	CollectSeqOption cs_option;
1901 	Boolean show_feature, collect = FALSE;
1902 	Int2 i;
1903 	ValNode vn;
1904 
1905 	if(featureOrder == NULL || groupOrder == NULL || slp == NULL || anp == NULL)
1906 		return FALSE;
1907 
1908 	cs_option.nointerval = FALSE;
1909 	cs_option.slabel_format = PRINTID_TEXTID_ACCESSION;
1910 	cs_option.seglevels = 0;
1911 	cs_option.label_size = 10;
1912 	for( i =0; i<FEATDEF_ANY; ++i)	/*for checking the features to load*/
1913 	{
1914 		show_feature = (featureOrder[i] != 0);
1915 		cs_option.features[i] = show_feature;
1916 		if(show_feature)
1917 			collect = TRUE;
1918 	}
1919 	if(collect)
1920 	{
1921 		if(flabel_format == NULL)
1922 			MemSet((Pointer)(cs_option.flabel_format), OM_LABEL_CONTENT, (size_t)FEATDEF_ANY*sizeof(Uint1));
1923 		else
1924 			MemCopy(&(cs_option.flabel_format), &flabel_format, (size_t)FEATDEF_ANY*sizeof(Uint1));
1925 		CollectFeatureForAlignNode(slp, anp, &cs_option);
1926 		vn.choice = OBJ_SEQALIGN;
1927 		vn.data.ptrvalue = anp;
1928 		vn.next = NULL;
1929 		SortAlignmentFeature(&vn, featureOrder, groupOrder);
1930 		return TRUE;
1931 	}
1932 	else
1933 		return FALSE;
1934 }
1935 
CollectFeatureForAlign(SeqLocPtr slp,AlignNodePtr anp,Uint1Ptr featureOrder,Uint1Ptr groupOrder)1936 NLM_EXTERN Boolean CollectFeatureForAlign(SeqLocPtr slp, AlignNodePtr anp, Uint1Ptr featureOrder, Uint1Ptr groupOrder)
1937 {
1938 	return CollectAlignFeature(slp, anp, featureOrder, groupOrder, NULL);
1939 }
1940 
1941 /******************************************************************
1942 *
1943 *	SortAlignmentFeature(anp_node, featureOrder, groupOrder)
1944 *	sort the list of FeatNode in aligned segment (asp->cnp) to the
1945 *	proper order of featureOrder and groupOrder
1946 *
1947 *******************************************************************/
SortAlignmentFeature(ValNodePtr anp_node,Uint1Ptr featureOrder,Uint1Ptr groupOrder)1948 NLM_EXTERN void SortAlignmentFeature(ValNodePtr anp_node, Uint1Ptr featureOrder, Uint1Ptr groupOrder)
1949 {
1950 	AlignNodePtr anp;
1951 	AlignSegPtr asp;
1952 
1953 	while(anp_node)
1954 	{
1955 		if(anp_node->choice != OBJ_SEQANNOT)
1956 		{
1957 			anp = anp_node->data.ptrvalue;
1958 			for(asp = anp->segs; asp !=NULL; asp = asp->next)
1959 					if(asp->cnp !=NULL)
1960 						asp->cnp = SortFeatNode(asp->cnp, featureOrder, groupOrder);
1961 		}
1962 		anp_node = anp_node->next;
1963 	}
1964 }
1965 
1966 
make_current_seqport(SeqLocPtr masterloc,Int4 offset,Uint1 code)1967 static SeqPortPtr make_current_seqport(SeqLocPtr masterloc, Int4 offset, Uint1 code)
1968 {
1969 	SeqLocPtr slp;
1970 	Int4 start, stop;
1971 	Uint1 strand;
1972 	SeqPortPtr spp;
1973 
1974 
1975 	start = SeqLocStart(masterloc);
1976 	stop = SeqLocStop(masterloc);
1977 	strand = SeqLocStrand(masterloc);
1978 
1979 	if(strand == Seq_strand_minus)
1980 		stop -= offset;
1981 	else
1982 		start += offset;
1983 	slp = SeqLocIntNew(start, stop, strand, SeqLocId(masterloc));
1984 	spp = SeqPortNewByLoc(slp, code);
1985 	SeqLocFree(slp);
1986 
1987 	return spp;
1988 }
1989 
1990 
add_int_to_node(ValNodePtr PNTR head,ValNodePtr PNTR prev,Int4 val,Uint1 choice)1991 static void add_int_to_node (ValNodePtr PNTR head, ValNodePtr PNTR prev, Int4 val, Uint1 choice)
1992 {
1993 	ValNodePtr curr;
1994 
1995 	curr = ValNodeNew(NULL);
1996 	curr->choice = choice;
1997 	curr->data.intvalue = val;
1998 
1999 	if(*prev == NULL)
2000 		*head = curr;
2001 	else
2002 		(*prev)->next = curr;
2003 	*prev = curr;
2004 }
2005 
2006 
CollectMismatchForAlign(AlignNodePtr anp,SeqLocPtr masterloc,BioseqPtr bsp,Int4 offset)2007 static Boolean CollectMismatchForAlign(AlignNodePtr anp, SeqLocPtr masterloc, BioseqPtr bsp, Int4 offset)
2008 {
2009 
2010 	AlignSegPtr asp;
2011 	SeqPortPtr spp, mspp;
2012 	Uint1 code;
2013 	Uint1 res, mres;
2014 	Int4 start = 0, stop = 0;
2015 	Uint1 strand;
2016 	Int4 current_pos;
2017 	Int4 seglen, j;
2018 	ValNodePtr prev;
2019 
2020 
2021 
2022 	if(anp->is_master || SeqIdForSameBioseq(SeqLocId(masterloc), anp->sip))
2023 		return FALSE;
2024 	if(bsp->mol == Seq_mol_aa)
2025 		code = Seq_code_ncbieaa;
2026 	else
2027 		code = Seq_code_iupacna;
2028 	/* mspp = SeqPortNewByLoc(masterloc, code); */
2029 
2030 	current_pos = anp->seqpos;
2031 	if(anp->seqpos < 0)
2032 	{
2033 		strand = Seq_strand_minus;
2034 		if(ABS(anp->seqpos) < bsp->length-1)
2035 			anp->extremes.l_trunc = TRUE;
2036 	}
2037 	else
2038 	{
2039 		strand = Seq_strand_plus;
2040 		if(anp->seqpos > 0)
2041 			anp->extremes.l_trunc = TRUE;
2042 	}
2043 	for(asp = anp->segs; asp !=NULL; asp = asp->next)
2044 	{
2045 		if(asp->type == INS_SEG)
2046 			seglen = asp->gr.right;
2047 		else
2048 			seglen = asp->gr.right - asp->gr.left + 1;
2049 		switch(asp->type)
2050 		{
2051 			case GAP_SEG:
2052 				break;
2053 
2054 			case INS_SEG:
2055 				current_pos += seglen;
2056 				break;
2057 
2058 			case REG_SEG:
2059 			case DIAG_SEG:
2060 				prev = get_last_node (asp->mismatch);
2061 				if(strand == Seq_strand_minus)
2062 				{
2063 					stop = - current_pos;
2064 					start = stop - (seglen-1);
2065 				}
2066 				else
2067 				{
2068 					start = current_pos;
2069 					stop = start + (seglen -1);
2070 				}
2071 				spp = SeqPortNew(bsp, start, stop, strand, code);
2072 
2073 				mspp = make_current_seqport(masterloc, (asp->gr.left - offset), code);
2074 				/* SeqPortSeek(mspp, (asp->gr.left - offset), SEEK_SET); */
2075 
2076 				for(j =0; j<seglen; ++j)
2077 				{
2078 					res = SeqPortGetResidue(spp);
2079 					mres = SeqPortGetResidue(mspp);
2080 					while(res == SEQPORT_EOS || res == SEQPORT_VIRT)
2081 						res = SeqPortGetResidue(spp);
2082 					while(mres == SEQPORT_EOS || mres == SEQPORT_VIRT)
2083 						mres = SeqPortGetResidue(mspp);
2084 					if(IS_ALPHA(res) && IS_ALPHA(mres))
2085 					{
2086 						if(res != mres)
2087 						{
2088 							if(bsp->mol != Seq_mol_aa && !StrChr("acgtACGT", res))
2089 								add_int_to_node (&(asp->mismatch), &prev, (j+asp->gr.left), MISMATCH_AMB);
2090 							else
2091 								add_int_to_node (&(asp->mismatch), &prev, (j+asp->gr.left), MISMATCH_LINE);
2092 							/* ValNodeAddInt(&(asp->mismatch), 0, (j+asp->gr.left)); */
2093 						}
2094 					}
2095 					else if(res == SEQPORT_EOF || mres == SEQPORT_EOF)
2096 						break;
2097 
2098 				}
2099 				current_pos += seglen;
2100 
2101 				SeqPortFree(spp);
2102 				SeqPortFree(mspp);
2103 				break;
2104 
2105 			default:
2106 				break;
2107 		}
2108 		/*current_pos += seglen;*/
2109 	}
2110 	/* SeqPortFree(mspp); */
2111 	if(strand == Seq_strand_minus)
2112 	{
2113 		if(start > 0)
2114 			anp->extremes.r_trunc = TRUE;
2115 	}
2116 	else
2117 	{
2118 		if(stop < bsp->length-1)
2119 			anp->extremes.r_trunc = TRUE;
2120 	}
2121 
2122 	return TRUE;
2123 }
2124 
2125 
make_one_block(SeqRangePtr srp,Int4 seq_start,Int4 seq_stop,Int4 left,Int4 right,Int2 order,AlignNodePtr anp)2126 static AlignBlockPtr make_one_block(SeqRangePtr srp, Int4 seq_start, Int4 seq_stop, Int4 left, Int4 right, Int2 order, AlignNodePtr anp)
2127 {
2128 	AlignBlockPtr abp;
2129 	Int4 off_left, off_right;
2130 
2131 	if(srp == NULL || anp == NULL)
2132 		return NULL;
2133 	if(srp->start > seq_stop || srp->stop < seq_start)
2134 		return NULL;
2135 
2136 	if(srp->strand == Seq_strand_minus)
2137 	{
2138 		off_left = MAX(0, (srp->stop - seq_stop));
2139 		off_right = MAX(0, (seq_start - srp->start));
2140 	}
2141 	else
2142 	{
2143 		off_left = MAX(0, (seq_start - srp->start));
2144 		off_right = MAX(0, (srp->stop - seq_stop));
2145 	}
2146 
2147 	abp = MemNew(sizeof(AlignBlock));
2148 	abp->gr.left = left + off_left;
2149 	abp->gr.right = right - off_right;
2150 	abp->gr.strand = 0;
2151 	if(abp->gr.left == anp->extremes.left&& anp->extremes.strand == Seq_strand_minus)
2152 		abp->gr.strand = Seq_strand_minus;
2153 	if(abp->gr.right == anp->extremes.right && anp->extremes.strand == Seq_strand_plus)
2154 		abp->gr.strand = Seq_strand_plus;
2155 	abp->order = order;
2156 	return abp;
2157 }
2158 
2159 
link_align_blocks(AlignBlockPtr PNTR head,AlignBlockPtr new)2160 static AlignBlockPtr link_align_blocks(AlignBlockPtr PNTR head, AlignBlockPtr new)
2161 {
2162 	AlignBlockPtr curr;
2163 
2164 	if(*head == NULL)
2165 		*head = new;
2166 	else
2167 	{
2168 		curr = *head;
2169 		while(curr->next != NULL)
2170 			curr = curr->next;
2171 		curr->next = new;
2172 	}
2173 	return new;
2174 }
2175 
2176 
make_blocks(AlignDataPtr adp,Int4 seq_start,Int4 seq_stop,Int2 order,AlignNodePtr anp)2177 static Boolean make_blocks(AlignDataPtr adp, Int4 seq_start, Int4 seq_stop, Int2 order, AlignNodePtr anp)
2178 {
2179 	AlignBlockPtr abp = NULL;
2180 	AlignRangePtr arp;
2181 
2182 
2183 	if(adp == NULL || anp == NULL)
2184 		return FALSE;
2185 	if(adp->arp == NULL)
2186 	{
2187 		abp = make_one_block(&(adp->seqends), seq_start, seq_stop, anp->extremes.left, anp->extremes.right, order, anp);
2188 		if(abp != NULL)
2189 			link_align_blocks(&(anp->blocks), abp);
2190 	}
2191 	else
2192 	{
2193 		for(arp = adp->arp; arp != NULL; arp = arp->next)
2194 		{
2195 			if(arp->segtype == REG_SEG)
2196 			{
2197 				abp = make_one_block(&(arp->sr), seq_start, seq_stop, arp->gr.left, arp->gr.right, order, anp);
2198 				if(abp != NULL)
2199 				{
2200 					link_align_blocks(&(anp->blocks), abp);
2201 					break;
2202 				}
2203 			}
2204 		}
2205 	}
2206 	return (abp != NULL);
2207 }
2208 
2209 
sequence_has_alignment(ValNodePtr align_id_list,SeqIdPtr sip)2210 static Boolean sequence_has_alignment(ValNodePtr align_id_list, SeqIdPtr sip)
2211 {
2212 	Uint1 kludge_factor;
2213 	Int4 gi;
2214 
2215 
2216 	kludge_factor = (Uint1)get_kludge_factor(sip, &gi);
2217 	if(gi == -1)
2218 		return FALSE;
2219 
2220 	while(align_id_list)
2221 	{
2222 		if(align_id_list->choice == kludge_factor)
2223 		{
2224 			if(align_id_list->data.intvalue == gi)
2225 				return TRUE;
2226 		}
2227 
2228 		align_id_list = align_id_list->next;
2229 	}
2230 
2231 	return FALSE;
2232 }
2233 
2234 
2235 
add_sequence_alignment_info(ValNodePtr align_id_list,ValNodePtr anp_list)2236 static Boolean add_sequence_alignment_info(ValNodePtr align_id_list, ValNodePtr anp_list)
2237 {
2238 
2239 	AlignNodePtr anp;
2240 
2241 	if(align_id_list == NULL || anp_list == NULL)
2242 		return FALSE;
2243 
2244 	while(anp_list)
2245 	{
2246 		if(anp_list->choice != OBJ_SEQANNOT)
2247 		{
2248 			anp = anp_list->data.ptrvalue;
2249 			if(anp->seq_has_align == FALSE)
2250 				anp->seq_has_align = sequence_has_alignment(align_id_list, anp->sip);
2251 		}
2252 		anp_list = anp_list->next;
2253 	}
2254 
2255 	return TRUE;
2256 }
2257 
2258 
2259 typedef struct temp_bsp_data{
2260 	BioseqPtr bsp;
2261 	Uint4 itemID;
2262 	Boolean found;
2263 }TempBsp, PNTR TempBspPtr;
2264 
bspcountfunc(GatherContextPtr gcp)2265 static Boolean bspcountfunc(GatherContextPtr gcp)
2266 {
2267 	TempBspPtr tbp;
2268 	BioseqPtr bsp;
2269 
2270 	if(gcp == NULL)
2271 		return FALSE;
2272 	tbp = (TempBspPtr)(gcp->userdata);
2273 	if(tbp == NULL || tbp->bsp == NULL)
2274 		return FALSE;
2275 	if(tbp->found)
2276 		return FALSE;
2277 	bsp = (BioseqPtr)(gcp->thisitem);
2278 	if(tbp->bsp == bsp)
2279 	{
2280 		tbp->itemID= gcp->itemID;
2281 		tbp->found = TRUE;
2282 		return FALSE;
2283 	}
2284 	else
2285 		return TRUE;
2286 }
2287 
2288 /*****************************************************************
2289 *
2290 *       given the bioseq and its entityID, figure out the
2291 *       itemID for the Bioseq
2292 *
2293 *****************************************************************/
get_bioseq_itemID(BioseqPtr bsp,Uint2 entityID)2294 NLM_EXTERN Uint4 get_bioseq_itemID(BioseqPtr bsp, Uint2 entityID)
2295 {
2296 	GatherScope gs;
2297 	TempBsp tb;
2298 
2299 
2300 	if(bsp == NULL || entityID == 0)
2301 		return 0;
2302 
2303 	tb.bsp = bsp;
2304 	tb.itemID= 0;
2305 	tb.found = FALSE;
2306 
2307 	MemSet((Pointer)(&gs), 0, sizeof(GatherScope));
2308 	MemSet((Pointer)(gs.ignore), (int)(TRUE), (size_t)OBJ_MAX * sizeof(Boolean));
2309 	gs.ignore[OBJ_BIOSEQ] = FALSE;
2310 	GatherEntity(entityID, &tb, bspcountfunc, &gs);
2311 
2312 	return tb.itemID;
2313 }
2314 
2315 
stop_collecting_alignment(ValNodePtr anp_list,Int4 max_num)2316 static Boolean stop_collecting_alignment(ValNodePtr anp_list, Int4 max_num)
2317 {
2318 	Int2 i;
2319 
2320 	i = 0 ;
2321 	while(anp_list)
2322 	{
2323 		++i;
2324 		if(i > max_num)
2325 		{
2326 			if(anp_list->next == NULL)
2327 				return TRUE;
2328 		}
2329 		anp_list = anp_list->next;
2330 	}
2331 
2332 	return FALSE;
2333 }
2334 
FindCloneCallback(SeqEntryPtr sep,Pointer data,Int4 index,Int2 indent)2335 static void FindCloneCallback(SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent)
2336 {
2337 	AlignNodePtr anp;
2338 	BioseqPtr bsp;
2339 	BioseqSetPtr bssp;
2340 	ValNodePtr descr;
2341 	ValNodePtr curr;
2342 	OrgRefPtr orp;
2343 	ValNodePtr mod;
2344 	CharPtr str;
2345 
2346 	BioSourcePtr source;
2347 	SubSourcePtr ssp;
2348 
2349 	anp = (AlignNodePtr)data;
2350 	if(anp->clone_id != NULL)
2351 		return;
2352 
2353 
2354 	if(sep->choice == 1)
2355 	{
2356 		bsp = (BioseqPtr)(sep->data.ptrvalue);
2357 		descr = bsp->descr;
2358 	}
2359 	else
2360 	{
2361 		bssp = (BioseqSetPtr)(sep->data.ptrvalue);
2362 		descr = bssp->descr;
2363 	}
2364 
2365 	for(curr = descr; curr != NULL; curr = curr->next)
2366 	{
2367 		if(curr->choice == Seq_descr_source)
2368 		{
2369 			source = curr->data.ptrvalue;
2370 			/* search for /chromosome= */
2371 			for(ssp = source->subtype; ssp != NULL; ssp = ssp->next)
2372 			{
2373 				if(ssp->subtype == 3 && ssp->name != NULL)
2374 				{	/* 3 == clone */
2375 					anp->clone_id = StringSave(ssp->name);
2376 					return;
2377 				}
2378 			}
2379 		}
2380 		else if(curr->choice == Seq_descr_org)
2381 		{
2382 			orp = curr->data.ptrvalue;
2383 			if(orp)
2384 			{
2385 				for(mod = orp->mod; mod != NULL; mod = mod->next)
2386 				{
2387 					str = mod->data.ptrvalue;
2388 					if(StringNCmp(str, "clone=", 6) == 0)
2389 					{
2390 						anp->clone_id = StringSave(str+6);
2391 					}
2392 				}
2393 			}
2394 		}
2395 	}
2396 }
2397 
LoadIndexLabelBlock(AlignNodePtr anp)2398 static Boolean LoadIndexLabelBlock(AlignNodePtr anp)
2399 
2400 {
2401 	AlignBlockPtr abp;
2402 	SeqIdPtr sip;
2403 	ObjectIdPtr oip;
2404 	DbtagPtr db_tag;
2405 
2406 	sip = anp->sip;
2407 	if(sip == NULL || sip->choice != SEQID_GENERAL)
2408 		return FALSE;
2409 
2410 	db_tag = sip->data.ptrvalue;
2411 	if(db_tag == NULL || db_tag->db == NULL)
2412 		return FALSE;
2413 	oip = db_tag->tag;
2414 	if(oip== NULL || oip->id <= 0)
2415 		return FALSE;
2416 
2417 
2418 	abp = MemNew(sizeof(AlignBlock));
2419 	MemCopy((Pointer)(&(abp->gr)), (Pointer)(&(anp->extremes)), sizeof(GatherRange));
2420 	abp->order = (Uint2)oip->id;
2421 	anp->blocks = abp;
2422 
2423 	anp->label = StringSave(db_tag->db);
2424 	return TRUE;
2425 }
2426 
2427 
2428 /***********************************************************************
2429 *
2430 *	coll_align_data(align, m_sip, adp, clone, featureOrder, show_mismatch,
2431 *	itemID, entityID, anp_list)
2432 *	convert all the alignment data stored in adp into the drawing
2433 *	structure AlignNode
2434 *
2435 *	align: the current Seq-align
2436 *	m_sip: the master sequence, also the target sequence in gather
2437 *	adp: the collected structure from gather
2438 *	clone: for filtering out unwanted clone type. set to NULL for all
2439 *	featureOrder: for features to be displayed together with alignment
2440 *	show_mismatch: show the mismatched base-pairs
2441 *	itemID: itemID for the current align
2442 *	entityID: entityID for the Seq-entry of m_sip
2443 *
2444 *	NOTE: if either show_mismatch or featureOrder is selected, it puts
2445 *	the newly retrieved sequence for itemID and entityID
2446 *
2447 *	anp_list: the list of AlignNodePtr to stored the coverted result
2448 *
2449 ************************************************************************/
2450 
coll_align_data(SeqAlignPtr align,Uint1 index,AlignDataPtr adp,CollectAlignOptionPtr caop,Uint4 itemID,Int2 entityID,Int2 itemType,SeqLocPtr mloc,ValNodePtr PNTR anp_list,ValNodePtr PNTR prev)2451 static Boolean coll_align_data(SeqAlignPtr align, Uint1 index, AlignDataPtr adp, CollectAlignOptionPtr caop, Uint4 itemID, Int2 entityID, Int2 itemType, SeqLocPtr mloc, ValNodePtr PNTR anp_list, ValNodePtr PNTR prev)
2452 {
2453 	Char label[41];
2454 
2455 	SeqLocPtr slp, extloc;
2456 	SeqIdPtr sip;
2457 	Boolean feat;	/*collect any features?*/
2458 	Boolean show_mismatch;
2459 	Boolean is_master;
2460 
2461 	AlignRangePtr arp;
2462 	AlignNodePtr anp;
2463 	AlignSegPtr asp, pasp;
2464 
2465 	BioseqPtr bsp;
2466 	Uint2 order;
2467 	Int4 e_left = 0, e_right = 0;
2468 	Boolean match_seg;
2469 	Int2 label_size;
2470 	Int4 offset = 0;
2471 	SeqIdPtr best_id;
2472 	SeqEntryPtr sep;
2473 
2474 #ifdef NONO
2475         if(align->segtype == 5) /* Discontinuous aligment not collected */
2476             return TRUE;
2477 #endif
2478 	label_size = MIN(caop->label_size, 100);
2479 	feat = caop->show_feature;
2480 	if(align->segtype == 3)	/*for std-seg, no feature or mismatch*/
2481 	{
2482 		show_mismatch = FALSE;
2483 		feat = FALSE;
2484 	}
2485 	else
2486 		show_mismatch = caop->show_mismatch;
2487 
2488 	if(align->segtype == 2)	/*for Dense-seg or Dense-diag only*/
2489 	{
2490 		if(caop->align_num != -1)
2491 		{
2492 			/* if(stop_collecting_alignment(*anp_list, caop->align_num)) */
2493 			if(caop->curr_align_num > caop->align_num)
2494 			{
2495 				/* ErrPostEx (SEV_WARNING, 0, 0, "The top %ld alignments are displayed. The rest are truncated", caop->align_num);
2496 				return FALSE; */
2497 				feat = FALSE;
2498 				show_mismatch = FALSE;
2499 			}
2500 		}
2501 	}
2502 
2503 	while(adp)
2504 	{
2505 		++(caop->curr_align_num);
2506 		anp = MemNew(sizeof (AlignNode));
2507 		anp->pop_sap = NULL;
2508 		anp->itemID = itemID;
2509 		anp->entityID = entityID;
2510 		anp->seqOrder = adp->order;
2511 		anp->chain = adp->chain;
2512 		anp->seq_has_align = FALSE;
2513 		anp->index = index;
2514 		anp->keep_label = FALSE;
2515 		MemCopy(&(anp->extremes), &(adp->extremes), sizeof(GatherRange));
2516 
2517 		if(adp->seqends.strand == Seq_strand_minus)
2518 			anp->seqpos = -(adp->seqends.stop);
2519 		else
2520 			anp->seqpos = adp->seqends.start;
2521 
2522 		pasp = NULL;
2523 		for(arp = adp->arp; arp !=NULL; arp = arp->next)
2524 		{
2525 			asp = MemNew(sizeof(AlignSeg));
2526 			MemCopy(&(asp->gr), &(arp->gr), sizeof(GatherRange));
2527 			asp->type = arp->segtype;
2528 			if(asp->type == INS_SEG)
2529 				asp->ins_pos = asp->gr.left;
2530 			if(pasp == NULL)
2531 				anp->segs = asp;
2532 			else
2533 				pasp->next = asp;
2534 			pasp = asp;
2535 		}
2536 
2537 		if(index == ALIGN_NON_INDEX  && (feat|| show_mismatch))
2538 		{
2539 			bsp = BioseqLockById(adp->sip);
2540 			if(bsp != NULL)
2541 			{
2542 				if(adp->sip->choice == SEQID_GI)
2543 				{
2544 					sep = SeqEntryFind(adp->sip);
2545 					if(sep != NULL)
2546 						SeqEntryExplore(sep, (Pointer)anp, FindCloneCallback);
2547 				}
2548 				if(bsp->hist && bsp->hist->assembly)
2549 					anp->seq_has_align = TRUE;
2550 				anp->seq_entityID = ObjMgrGetEntityIDForPointer((Pointer)bsp);
2551 				anp->bsp_itemID = get_bioseq_itemID(bsp, anp->seq_entityID);
2552 				best_id = SeqIdFindBest(bsp->id, SEQID_GI);
2553 				if(best_id == NULL)
2554 					best_id = bsp->id;
2555 				anp->sip = SeqIdDup(best_id);
2556 
2557 				if(feat)
2558 				{
2559 					if(BioseqHasFeature(bsp))
2560 						caop->csop->seglevels = 0;
2561 					else
2562 						caop->csop->seglevels = 1;
2563 					slp = SeqLocIntNew(adp->seqends.start, adp->seqends.stop, adp->seqends.strand, best_id);
2564 					CollectFeatureForAlignNode(slp, anp, caop->csop);
2565 					SeqLocFree(slp);
2566 				}
2567 				if(show_mismatch && bsp->repr != Seq_repr_map)
2568 					CollectMismatchForAlign(anp, mloc, bsp, offset+caop->graphic_offset);
2569 
2570 				BioseqUnlock(bsp);
2571 			}
2572 			/* else
2573 				printf("fail to get sequence for %ld\n", adp->sip->data.intvalue);  */
2574 		}
2575 		if(anp->sip == NULL)
2576 			anp->sip = SeqIdDup(adp->sip);
2577 
2578 
2579 
2580 		/*collecting matching piece to show the content of a segmented sequence*/
2581 		if(caop->segloc != NULL && index == ALIGN_NON_INDEX )
2582 		{
2583 			is_master = SeqIdForSameBioseq(adp->sip, SeqLocId(mloc));
2584 			if(is_master)
2585 			{
2586 				e_left = 0;
2587 				e_right = -1;
2588 			}
2589 			order = 0;
2590 			for(extloc = caop->segloc; extloc != NULL; extloc = extloc->next)
2591 			{
2592 				++order;
2593 				match_seg = FALSE;
2594 				if(is_master)
2595 				{
2596 					e_right += SeqLocLen(extloc);
2597 					match_seg = TRUE;
2598 				}
2599 				else
2600 				{
2601 					sip = SeqLocId(extloc);
2602 					match_seg = SeqIdForSameBioseq(sip, anp->sip);
2603 
2604 				}
2605 				if(match_seg)
2606 				{
2607 					if(!is_master)
2608 					{
2609 						e_left = SeqLocStart(extloc);
2610 						e_right = SeqLocStop(extloc);
2611 					}
2612 					match_seg = make_blocks(adp, e_left, e_right, order, anp);
2613 				}
2614 				if(is_master)
2615 					e_left = e_right +1;
2616 				if(match_seg)
2617 					if(e_right > adp->seqends.stop)
2618 						break;
2619 			}
2620 		}
2621 
2622 		/*store the index information in the blocks*/
2623 		if(index != ALIGN_NON_INDEX)
2624 			LoadIndexLabelBlock(anp);
2625 		else if(label_size > 0)
2626 		{
2627 			if(MuskSeqIdWrite (anp->sip, label, label_size, caop->slabel_format, TRUE, TRUE)) {
2628 			    SeqIdPtr	gilist = GetUseThisGi(align);
2629 			    if (gilist) {
2630 				Char	buf[1024];
2631 				sprintf(buf, "%d", gilist->data.intvalue);
2632 				anp->label = StringSave(buf);
2633 				anp->keep_label = TRUE;
2634 				gilist = SeqIdSetFree(gilist);
2635 			    } else {
2636 				anp->label = StringSave(label);
2637 			    }
2638 			}
2639 		}
2640 
2641 		link_data_for_collect (anp_list, prev, (Pointer)anp, (Uint1)itemType);
2642 		adp = adp->next;
2643 	}
2644 	return TRUE;
2645 }
2646 
does_annot_match_target(SeqLocPtr target,SeqAnnotPtr annot)2647 static Boolean does_annot_match_target (SeqLocPtr target, SeqAnnotPtr annot)
2648 {
2649     SeqAlignPtr sap;
2650     SeqIdPtr sip;
2651     DenseDiagPtr ddp;
2652     DenseSegPtr dsp;
2653     StdSegPtr ssp;
2654     SeqIdPtr target_id;
2655     SeqLocPtr slp;
2656     Boolean result;
2657 
2658     if(target == NULL || annot == NULL || annot->type != 2)
2659         return FALSE;
2660     target_id = SeqLocId(target);
2661     sap = annot->data;
2662 
2663     if(sap == NULL)
2664         return FALSE;
2665 
2666     switch(sap->segtype) {
2667     case 1:
2668         ddp = sap->segs;
2669         for(sip = ddp->id; sip != NULL; sip = sip->next)
2670             if(SeqIdForSameBioseq(sip, target_id))
2671                 return TRUE;
2672         break;
2673     case 2:
2674         dsp = sap->segs;
2675         for(sip = dsp->ids; sip != NULL; sip = sip->next)
2676             if(SeqIdForSameBioseq(sip, target_id))
2677                 return TRUE;
2678         break;
2679     case 3:
2680         ssp = sap->segs;
2681         for(slp = ssp->loc; slp != NULL; slp = slp->next)
2682             if(SeqIdForSameBioseq(SeqLocId(slp), target_id))
2683                 return TRUE;
2684         break;
2685     case 5:
2686 
2687         annot->data = (SeqAlignPtr) sap->segs;
2688         result =  does_annot_match_target (target, annot);
2689         annot->data = sap;
2690         return result;
2691 
2692     default:
2693         break;
2694     }
2695 
2696     return FALSE;
2697 }
2698 
collalignfunc(GatherContextPtr gcp)2699 static Boolean collalignfunc(GatherContextPtr gcp)
2700 {
2701 	SeqAnnotPtr annot;
2702 	CollectHeaderPtr chp;
2703 	AnnotInfoPtr info;
2704 	SeqAlignPtr align;
2705 	Uint1 annot_type;
2706 
2707 
2708 	chp= (CollectHeaderPtr)(gcp->userdata);
2709 
2710 	switch(gcp->thistype)
2711 	{
2712 	case OBJ_SEQANNOT:
2713 		annot = (SeqAnnotPtr)(gcp->thisitem);
2714 		if(annot->type == 2)
2715 		{
2716 		chp->caop->curr_align_num = 0;
2717 		chp->load_align = TRUE;
2718 		chp->index = 0;
2719 		if(!chp->take_all_annot)
2720 		{
2721 			if(!is_annot_for_hist_alignment(annot))
2722 			{
2723 				chp->load_align = FALSE;
2724 				return TRUE;
2725 			}
2726 		}
2727 		info = MemNew(sizeof(AnnotInfo));
2728 		info->annotDB[0] = '\0';
2729 		info->displayOrder = get_align_annot_qual(annot, info->annotDB, 20, &annot_type);
2730 		info->annot_type = annot_type;
2731 		if(annot_type == ANNOT_BLAST)
2732 			info->blast_type = info->displayOrder;
2733 		/*load the index values*/
2734 		if(info->annotDB[0] != '\0')
2735 		{
2736 			if(StringCmp(info->annotDB, "Sequencing Status") == 0)
2737 				chp->index = ALIGN_SEQ_INDEX;
2738 			else if(StringCmp(info->annotDB, "Mapping Status") == 0)
2739 				chp->index = ALING_MAP_INDEX;
2740 		}
2741 
2742 		/* Eric Green's un-aligned guys */
2743 		if(annot_type == ANNOT_CONSIST)
2744 		{
2745 			info->consistent = info->displayOrder;
2746 			if(info->consistent == ALIGN_UNKNOWN)
2747 			{	/*un-aligned guys, check if the Seq-loc matches */
2748 				if(!does_annot_match_target (chp->slp, annot))
2749 					info = MemFree(info);
2750 				chp->load_align = FALSE;
2751 			}
2752 		}
2753 		else if(annot_type == ANNOT_FISH)
2754 			info->is_fish_align= TRUE;
2755 		if(info != NULL)
2756 		{
2757 			info->entityID = gcp->entityID;
2758 			info->itemID = gcp->itemID;
2759 			link_data_for_collect (&(chp->aligns), &(chp->prev_align), (Pointer)info, (Uint1)(gcp->thistype));
2760 		}
2761 		}
2762 		return TRUE;
2763 
2764 	case OBJ_SEQALIGN:
2765 		align = (SeqAlignPtr)(gcp->thisitem);
2766 		if(chp->load_align)
2767 			return coll_align_data(align, chp->index, gcp->adp, chp->caop, gcp->itemID, gcp->entityID, gcp->thistype, chp->slp, &(chp->aligns), &(chp->prev_align));
2768 		else
2769 			return TRUE;
2770 	case OBJ_SEQHIST_ALIGN:
2771 		align = (SeqAlignPtr)(gcp->thisitem);
2772 		return coll_align_data(align, chp->index, gcp->adp, chp->caop, gcp->itemID, gcp->entityID, gcp->thistype, chp->slp, &(chp->aligns), &(chp->prev_align));
2773 	case OBJ_SEQHIST:
2774 		chp->caop->curr_align_num = 0;
2775 		return TRUE;
2776 	default:
2777 		return TRUE;
2778 	}
2779 }
2780 
2781 
2782 /*********************************************************************
2783 *
2784 *	CollectItemForAlignment(slp, entityID, left, caop)
2785 *	return a list of AlignNode for the alignment in the target seqloc
2786 *	slp: the target Seq-loc
2787 *	entityID: the entity source for collection
2788 *	left: the left offset on the graphic
2789 *	caop: the option for alignment collection
2790 *
2791 **********************************************************************/
CollectItemForAlignment(SeqLocPtr slp,Uint2 entityID,Int4 left,CollectAlignOptionPtr caop,Boolean take_all_annot)2792 NLM_EXTERN ValNodePtr CollectItemForAlignment(SeqLocPtr slp, Uint2 entityID, Int4 left, CollectAlignOptionPtr caop, Boolean take_all_annot)
2793 {
2794 	GatherScope gs;
2795 	CollectHeader ch;
2796 	BioseqPtr mbsp;
2797 	SeqIdPtr sip;
2798 	ValNodePtr align_id_list = NULL;
2799 	SeqLocPtr curr, next;
2800 
2801 	if(slp == NULL || entityID == 0 || caop == NULL)
2802 		return NULL;
2803 
2804 	sip = SeqLocId(slp);
2805 	ch.aligns = NULL;
2806 	ch.caop = caop;
2807 	ch.take_all_annot = take_all_annot;
2808 	ch.load_align = TRUE;
2809 	ch.prev_feat = NULL;
2810 	ch.prev_align = NULL;
2811 	ch.index = 0;
2812 	/*ch.slp = slp;*/
2813 
2814 
2815 	MemSet((Pointer)&gs, 0, sizeof (GatherScope));
2816 	MemSet((Pointer)(gs.ignore), (int)TRUE, (size_t)(OBJ_MAX)*sizeof(Boolean));
2817 
2818 	if(caop->only_history == FALSE)
2819 	{
2820 		gs.ignore[OBJ_SEQANNOT] = FALSE;
2821 		gs.ignore[OBJ_SEQALIGN] = FALSE;
2822 	}
2823 	gs.ignore[OBJ_SEQHIST] = FALSE;
2824 	gs.ignore[OBJ_SEQHIST_ALIGN] = FALSE;
2825 
2826 
2827 	gs.nointervals = caop->nointerval;
2828 	gs.seglevels = 0;
2829 	gs.currlevel = 0;
2830 	gs.split_packed_pnt = FALSE;
2831 	gs.mapinsert = caop->map_insert;
2832 
2833 
2834 	curr = slp;
2835 	while(curr)
2836 	{
2837 		next = curr->next;
2838 		curr->next = NULL;
2839 		gs.offset = left;
2840 		gs.target = curr;
2841 		ch.slp = curr;
2842 		caop->graphic_offset = left;
2843 		GatherEntity(entityID, (Pointer)(&ch), collalignfunc, &gs);
2844 		left += SeqLocLen(curr);
2845 		curr->next = next;
2846 		curr = next;
2847 	}
2848 
2849 	if(ch.aligns != NULL)
2850 	{
2851 		mbsp = BioseqLockById(sip);
2852 		align_id_list = get_seqids_with_alignment(mbsp);
2853 		if(align_id_list != NULL)
2854 		{
2855 			add_sequence_alignment_info(align_id_list, ch.aligns);
2856 			ValNodeFree(align_id_list);
2857 		}
2858 		BioseqUnlock(mbsp);
2859 	}
2860 
2861 
2862 	return ch.aligns;
2863 }
2864 
2865 
merge_master_head(ValNodePtr head,ValNodePtr new_node)2866 static void merge_master_head(ValNodePtr head, ValNodePtr new_node)
2867 {
2868 	AlignNodePtr anp_head, anp;
2869 	AlignSegPtr asp;
2870 	AlignBlockPtr block;
2871 
2872 	if(head == NULL || new_node == NULL)
2873 		return;
2874 	anp_head = head->data.ptrvalue;
2875 	anp = new_node->data.ptrvalue;
2876 
2877 	if(anp_head == NULL || anp == NULL)
2878 		return;
2879 
2880 	anp_head->extremes.right = anp->extremes.right;
2881 	asp = anp_head->segs;
2882 	if(asp == NULL)
2883 		anp_head->segs = anp->segs;
2884 	else
2885 	{
2886 		while(asp->next != NULL)
2887 			asp = asp->next;
2888 		asp->next = anp->segs;
2889 	}
2890 	anp->segs = NULL;
2891 
2892 	block = anp_head->blocks;
2893 	if(block == NULL)
2894 		anp_head->blocks = anp->blocks;
2895 	else
2896 	{
2897 		while(block->next != NULL)
2898 			block = block->next;
2899 		if(anp->blocks != NULL)
2900 			block->gr.strand = 0;
2901 		block->next = anp->blocks;
2902 	}
2903 	anp->blocks = NULL;
2904 
2905 	FreeAlignNode(new_node);
2906 }
2907 
2908 
2909 /*****************************************************************************
2910 *
2911 *	cllect_master_align_node(m_loc, featureOrder, groupOrder)
2912 *	in the master-slave alignment, a fake Seq-align is created for the
2913 *	master sequence where the master is aligned to itself. The AlignNode
2914 *	can be computed for this faked alignment. When this is done, the fake
2915 *	Seq-align will be freed
2916 *
2917 *	m_loc: the Seq-loc for the master sequence
2918 *	featureOrder: the selected features
2919 *
2920 *******************************************************************************/
collect_master_align_node(CollectAlignOptionPtr caop,SeqLocPtr m_loc,Uint1 obj_type,Uint2 entityID)2921 NLM_EXTERN ValNodePtr collect_master_align_node(CollectAlignOptionPtr caop, SeqLocPtr m_loc, Uint1 obj_type, Uint2 entityID)
2922 {
2923 	SeqAlignPtr align;
2924 	DenseSegPtr dsp;
2925 	SeqIdPtr m_sip;
2926 	ValNodePtr anp_node, anp_head = NULL, curr;
2927 	ValNodePtr prev = NULL;
2928 	AlignNodePtr anp;
2929 	AlignDataPtr adp;
2930 	Int4 left =0;
2931 	Boolean show_mismatch;
2932 
2933 	if(caop == NULL || m_loc == NULL)
2934 		return NULL;
2935 
2936 	show_mismatch = caop->show_mismatch;
2937 	caop->show_mismatch = FALSE;
2938    while(m_loc)
2939    {
2940 	m_sip = SeqLocId(m_loc);
2941 
2942 	dsp = DenseSegNew();
2943 	dsp->dim = 2;
2944 	dsp->numseg =1;
2945 	dsp->strands = MemNew((size_t)2*sizeof(Uint1));
2946 	dsp->strands[0] = Seq_strand_plus;
2947 	dsp->strands[1] = SeqLocStrand(m_loc);
2948 	dsp->ids = SeqIdDup(m_sip);
2949 	dsp->ids->next = SeqIdDup(m_sip);
2950 	dsp->starts = MemNew((size_t)2*sizeof(Int4));
2951 	dsp->starts[0] = SeqLocStart(m_loc);
2952 	dsp->starts[1] = SeqLocStart(m_loc);
2953 	dsp->lens = MemNew(sizeof(Int4));
2954 	dsp->lens[0] = SeqLocLen(m_loc);
2955 
2956 	align = SeqAlignNew();
2957 	align->type = 3;
2958 	align->segtype = 2;
2959 	align->dim = 2;
2960 	align->segs = dsp;
2961 
2962 	anp_node = NULL;
2963 	adp = gather_align_data(m_loc, align, left, TRUE, TRUE);
2964 	if(adp !=NULL)
2965 	{
2966 		coll_align_data(align, 0, adp, caop, 0, entityID, obj_type, m_loc, &anp_node, &prev);
2967 		FreeAlignData(adp);
2968 	}
2969 	if(anp_head == NULL)
2970 		anp_head = anp_node;
2971 	else
2972 		merge_master_head(anp_head, anp_node);
2973 	SeqAlignFree(align);
2974 	left = SeqLocLen(m_loc);
2975 	m_loc = m_loc->next;
2976    }
2977    for(curr = anp_head; curr != NULL; curr = curr->next)
2978    {
2979 	anp = curr->data.ptrvalue;
2980 	anp->use_seq_ids = TRUE; /*use the Seq-id as the itemID for graphic display*/
2981 	anp->is_master = TRUE;
2982    }
2983 	caop->show_mismatch = show_mismatch;
2984    return anp_head;
2985 }
2986 
set_option_for_collect_align(CollectAlignOptionPtr caop,Int2 label_size,Uint1 style)2987 NLM_EXTERN Boolean set_option_for_collect_align(CollectAlignOptionPtr caop, Int2 label_size, Uint1 style)
2988 {
2989 	if(caop == NULL)
2990 		return FALSE;
2991 
2992 	MemSet((Pointer)caop, 0, sizeof(CollectAlignOption));
2993 	if(style < COLLECT_HISTORY || style > COLLECT_FIXED)
2994 	{
2995 		Message(MSG_ERROR, "Illegal style for alignment display %d", (int)style);
2996 		return FALSE;
2997 	}
2998 
2999 	caop->nointerval = FALSE;
3000 	caop->label_size= label_size;
3001 	if(style == COLLECT_MD || style == COLLECT_FIXED)
3002 	{
3003 		caop->only_history = FALSE;
3004 		caop->map_insert = FALSE;
3005 	}
3006 	else
3007 	{
3008 		caop->only_history = TRUE;
3009 		caop->map_insert = TRUE;
3010 	}
3011 	caop->map_graphic = (style != COLLECT_FIXED);
3012 	caop->show_mismatch = (style != COLLECT_HISTORY);
3013 	caop->show_feature = FALSE;
3014 	caop->slabel_format = PRINTID_TEXTID_ACCESSION;
3015 	caop->segloc = NULL;
3016 	caop->align_num = DEFAULT_ALIGN_NUM;
3017 	caop->graphic_offset = 0;
3018 	return TRUE;
3019 }
3020 
alignment_are_blast_hits(BioseqPtr bsp)3021 static Boolean alignment_are_blast_hits(BioseqPtr bsp)
3022 {
3023 	SeqAnnotPtr annot;
3024 	Char label[101];
3025 	Uint1 annot_type;
3026 
3027 	if(bsp == NULL || bsp->annot == NULL)
3028 		return FALSE;
3029 	for(annot = bsp->annot; annot != NULL; annot = annot->next)
3030 	{
3031 		if(annot->type == 2)
3032 		{
3033 			label[0] = '\0';
3034 			get_align_annot_qual(annot, label, 20, &annot_type);
3035 			if(label[0] != '\0' && StringNCmp(label, "BLAST", 5) == 0)
3036 				return TRUE;
3037 		}
3038 	}
3039 	return FALSE;
3040 }
3041 
collect_anpnode_with_option(CollectAlignOptionPtr caop,SeqLocPtr m_loc,Uint2 entityID,Int4 style,Uint1 itemType,Uint1Ptr f_order,Uint1Ptr g_order,Boolean take_all_annot)3042 NLM_EXTERN ValNodePtr collect_anpnode_with_option(CollectAlignOptionPtr caop, SeqLocPtr m_loc, Uint2 entityID, Int4 style, Uint1 itemType, Uint1Ptr f_order, Uint1Ptr g_order, Boolean take_all_annot)
3043 {
3044 	ValNodePtr anp_list = NULL, list;
3045 	BioseqPtr mbsp;
3046 	Uint1 featureOrder[FEATDEF_ANY];
3047 	Uint1 groupOrder[FEATDEF_ANY];
3048 	Int2 i;
3049 	CollectSeqOptionPtr csop = NULL;
3050 	ValNodePtr align_id_list = NULL;
3051 	AlignNodePtr anp;
3052 	Boolean show_feature;
3053 	ValNodePtr prev = NULL;
3054 
3055 
3056 
3057 	if(caop == NULL || m_loc == NULL || entityID == 0)
3058 		return NULL;
3059 	if(style < COLLECT_HISTORY || style > COLLECT_MD)
3060 		return NULL;
3061 	mbsp = BioseqLockById(SeqLocId(m_loc));
3062 	if(mbsp == NULL)
3063 		return NULL;
3064 
3065 	if(mbsp->repr == Seq_repr_seg)
3066 		caop->segloc  = (SeqLocPtr)(mbsp->seq_ext);
3067 	else
3068 		caop->segloc = NULL;
3069 
3070 	show_feature = FALSE;
3071 	if(style != COLLECT_HISTORY)
3072 	{
3073 		if(f_order != NULL && g_order != NULL)
3074 		{
3075 			MemCopy((Pointer)(featureOrder), (Pointer)f_order, (size_t)(FEATDEF_ANY* sizeof(Uint1)));
3076 			MemCopy((Pointer)(groupOrder), (Pointer)g_order, (size_t)(FEATDEF_ANY* sizeof(Uint1)));
3077 		}
3078 		else	/*use the default features*/
3079 		{
3080 			if(mbsp->mol == Seq_mol_aa)
3081 			{
3082 				MemSet((Pointer)(featureOrder), 1, (size_t)(FEATDEF_ANY* sizeof(Uint1)));
3083 				MemSet((Pointer)(groupOrder), 1, (size_t)(FEATDEF_ANY* sizeof(Uint1)));
3084 				featureOrder[FEATDEF_BAD] = 0;
3085 				/* featureOrder[FEATDEF_ANY] = 0; */ /* out of bounds */
3086 				featureOrder[FEATDEF_PUB] = 0;
3087 				featureOrder[FEATDEF_source] = 0;
3088 				featureOrder[FEATDEF_NUM] = 0;
3089 				featureOrder[FEATDEF_BIOSRC] = 0;
3090 				featureOrder[FEATDEF_ORG] = 0;
3091 				featureOrder[FEATDEF_CDS] =0;
3092 				featureOrder[FEATDEF_PROT] =0;
3093 			}
3094 			else
3095 			{
3096 				MemSet((Pointer)(featureOrder), 0, (size_t)(FEATDEF_ANY* sizeof(Uint1)));
3097 				MemSet((Pointer)(groupOrder), 0, (size_t)(FEATDEF_ANY* sizeof(Uint1)));
3098 				featureOrder[FEATDEF_Imp_CDS] = 1;
3099 				groupOrder[FEATDEF_Imp_CDS] = 1;
3100 				featureOrder[FEATDEF_CDS] = 1;
3101 				groupOrder[FEATDEF_CDS] = 1;
3102 			}
3103 		}
3104 
3105 		csop = caop->csop;
3106 		for(i =0; i<FEATDEF_ANY; ++i)
3107 		{
3108 			if(featureOrder[i] != 0)
3109 			{
3110 				csop->features[i] = TRUE;
3111 				show_feature = TRUE;
3112 			}
3113 			else
3114 				csop->features[i] = FALSE;
3115 		}
3116 	}
3117 	else
3118 		caop->show_mismatch = FALSE;
3119 
3120 
3121 
3122 	if(style == COLLECT_MP)
3123 	{
3124 		if(csop->features[FEATDEF_repeat_region] == FALSE ||
3125 			csop->features[FEATDEF_repeat_unit] == FALSE)
3126 		{
3127 			if(mbsp->repr == Seq_repr_seg || mbsp->repr == Seq_repr_raw
3128 				|| mbsp->repr == Seq_repr_const)
3129 			{
3130 
3131 				if(alignment_are_blast_hits(mbsp))
3132 				{
3133 					csop->features[FEATDEF_repeat_region] = TRUE;
3134 					csop->features[FEATDEF_repeat_unit] = TRUE;
3135 					csop->features[FEATDEF_repeat_region] = TRUE;
3136 					csop->features[FEATDEF_repeat_unit] = TRUE;
3137 					caop->show_feature = TRUE;
3138 				}
3139 			}
3140 		}
3141 		anp_list = collect_master_align_node(caop, m_loc, itemType, entityID);
3142 		if(anp_list == NULL)
3143 		{
3144 			BioseqUnlock(mbsp);
3145 			Message(MSG_ERROR, "Fail to make AlignNode for the master sequence");
3146 			return NULL;
3147 		}
3148 		/* if(caop->map_graphic == FALSE)
3149 		{
3150 			csop->features[FEATDEF_repeat_region] = FALSE;
3151 			csop->features[FEATDEF_repeat_unit] = FALSE;
3152 		} */
3153 
3154 	}
3155 	caop->show_feature = show_feature;
3156 
3157 
3158 	list = CollectItemForAlignment(m_loc, entityID, 0, caop, take_all_annot);
3159 	if(caop->no_sort == FALSE)
3160 		list = SortAlignNode(list);
3161 	ValNodeLink(&anp_list, list);
3162 	if(style == COLLECT_MD)
3163 	{
3164 		for(list = anp_list; list != NULL; list = list->next)
3165 		{
3166 			if(list->choice != OBJ_SEQANNOT)
3167 			{
3168 				anp = list->data.ptrvalue;
3169 				if(anp != NULL)
3170 					anp->use_seq_ids = TRUE;
3171 			}
3172 		}
3173 	}
3174 
3175 
3176 	if(caop->show_feature)
3177 		SortAlignmentFeature(anp_list, featureOrder, groupOrder);
3178 	align_id_list = get_seqids_with_alignment(mbsp);
3179 	if(align_id_list != NULL)
3180 	{
3181 		add_sequence_alignment_info(align_id_list, anp_list);
3182 		ValNodeFree(align_id_list);
3183 	}
3184 	if(style == COLLECT_MP && caop->flat_insert)
3185 		FlatAlignNode(anp_list);
3186 
3187 	BioseqUnlock(mbsp);
3188 	return anp_list;
3189 }
3190 
3191 
3192 
3193 /***************************************************************
3194 *
3195 *	CollAlignFromSeqAnnot(annot, m_loc, featureOrder, groupOrder,
3196 *	style,graphic)
3197 *
3198 *	collect the AlignNode for Seq-aligns stored in Seq-annot
3199 *	annot: the Seq-annot
3200 *	m_loc: the target sequence
3201 *	left: the offset of the leftmost position
3202 *	featureOrder, groupOrde: the features selected to be displayed together
3203 *	with alignment
3204 *	style: the style of the display. Only valid for multiple-pairwise
3205 *	and multiple dimension for now
3206 *	graphic: if TRUE, it is designed to show the display on graphic,
3207 *	so the mismatch data will be collected. Otherwise, it will not
3208 *	collect mismatch data
3209 *
3210 ****************************************************************/
CollAlignFromSeqAnnot(SeqAnnotPtr annot,SeqLocPtr m_loc,Uint1Ptr featureOrder,Uint1Ptr groupOrder,Uint1 style,Boolean graphic,Boolean sort,Boolean flat_insert)3211 NLM_EXTERN ValNodePtr CollAlignFromSeqAnnot(SeqAnnotPtr annot, SeqLocPtr m_loc, Uint1Ptr featureOrder, Uint1Ptr groupOrder, Uint1 style, Boolean graphic, Boolean sort, Boolean flat_insert)
3212 {
3213 	Uint2 entityID;
3214 
3215 	CollectAlignOption ca_option;
3216 	CollectSeqOption cs_option;
3217 	Int2 label_size = 32;
3218 
3219 	if(annot->type !=2)	/*it is not an alignment*/
3220 		return NULL;
3221 
3222 	entityID = ObjMgrRegister(OBJ_SEQANNOT, (Pointer)annot);
3223 	if(entityID == 0)
3224 		return NULL;
3225 
3226 
3227 	if(style == COLLECT_MP || style == COLLECT_MD)
3228 	{
3229 		set_option_for_collect_align(&ca_option, label_size, style);
3230 		cs_option.nointerval = FALSE;
3231 		cs_option.slabel_format = PRINTID_TEXTID_ACCESSION;
3232 		MemSet((Pointer)&(cs_option.flabel_format), OM_LABEL_CONTENT, (size_t)FEATDEF_ANY * sizeof(Uint1));
3233 		cs_option.label_size = label_size;
3234 		cs_option.seglevels = 0;
3235 		ca_option.csop = &cs_option;
3236 		ca_option.no_sort = 1- sort;
3237 		if(style == COLLECT_MP && flat_insert)
3238 			ca_option.flat_insert = TRUE;
3239 		else
3240 			ca_option.flat_insert = FALSE;
3241 
3242 		ca_option.only_history = FALSE;
3243 		if(!graphic)
3244 		{
3245 			ca_option.show_mismatch = FALSE;
3246 			ca_option.align_num = -1;
3247 		}
3248 		return collect_anpnode_with_option(&ca_option, m_loc, entityID, style, OBJ_SEQALIGN, featureOrder, groupOrder, TRUE);
3249 	}
3250 	else
3251 		return NULL;
3252 
3253 
3254 }
3255 
3256 /*###################################################################
3257 #
3258 #	functions related to collect Seq-feat, Bioseq and Bioseq-seg
3259 #
3260 ###################################################################*/
3261 
3262 
get_mapmarker_info(UserObjectPtr uop,Uint4Ptr extra,Uint2Ptr bin_order)3263 static void get_mapmarker_info(UserObjectPtr uop, Uint4Ptr extra, Uint2Ptr bin_order)
3264 {
3265 	ObjectIdPtr oip;
3266 	Int4 val;
3267 	Uint4 temp;
3268 	UserFieldPtr ufp;
3269 
3270 	temp = *extra;
3271 
3272 	while(uop)
3273 	{
3274 		oip = uop->type;
3275 		if(oip && oip->str != NULL)
3276 		{
3277 			if(StringCmp(oip->str, "MapMarkerInfo") == 0)
3278 			{
3279 				ufp = uop->data;
3280 				while(ufp)
3281 				{
3282 					oip = ufp->label;
3283 					if(StringCmp(oip->str, "Marker Type") == 0)
3284 					{
3285 						if(ufp->choice == 2)
3286 						{
3287 							val = ufp->data.intvalue;
3288 							switch(val)
3289 							{
3290 								case FRAME_WORK:
3291 									temp |= EXTRA_FRAME_WORK;
3292 									break;
3293 								case RECMIN:
3294 									temp |= EXTRA_RECMIN;
3295 									break;
3296 								case LIKELY:
3297 									temp |= EXTRA_LIKELY;
3298 									break;
3299 								case MDUP:
3300 									temp |= EXTRA_MDUP;
3301 									break;
3302 								case DUP:
3303 									temp |= EXTRA_DUP;
3304 									break;
3305 
3306 								case CONTIG_STS:
3307 									temp |= EXTRA_CONTIG_STS;
3308 									break;
3309 								default:
3310 									break;
3311 							}
3312 							*extra = temp;
3313 						}
3314 					}
3315 					if(StringCmp(oip->str, "Bin Order") == 0)
3316 					{
3317 						if(ufp->choice == 2)
3318 							*bin_order = (Uint2)(ufp->data.intvalue);
3319 					}
3320 
3321 					if(StringCmp(oip->str, "Marker Category") == 0)
3322 					{
3323 						if(ufp->choice == 2)
3324 						{
3325 							val = ufp->data.intvalue;
3326 							switch(val)
3327 							{
3328 								case EG_YAC_END:
3329 									temp |= EXTRA_YAC_END;
3330 									break;
3331 								case EG_RANDOME:
3332 									temp |= EXTRA_RANDOM;
3333 									break;
3334 
3335 								case EG_GENETIC:
3336 									temp |= EXTRA_GENETIC;
3337 									break;
3338 
3339 								case EG_GENE:
3340 									temp |= EXTRA_GENE;
3341 									break;
3342 								case EG_EST:
3343 									temp |= EXTRA_EST;
3344 									break;
3345 								case EG_MISC:
3346 									temp |= EXTRA_MISC;
3347 									break;
3348 								default:
3349 									break;
3350 							}
3351 						}
3352 					}
3353 					ufp = ufp->next;
3354 				}
3355 			}
3356 			else if(StringCmp(oip->str, "Marker Category") == 0)
3357 			{
3358 				ufp = uop->data;
3359 				while(ufp)
3360 				{
3361 					if(ufp->choice == 2)
3362 					{
3363 						val = ufp->data.intvalue;
3364 						switch(val)
3365 						{
3366 							case EG_YAC_END:
3367 								temp |= EXTRA_YAC_END;
3368 								break;
3369 							case EG_RANDOME:
3370 								temp |= EXTRA_RANDOM;
3371 								break;
3372 
3373 							case EG_GENETIC:
3374 								temp |= EXTRA_GENETIC;
3375 								break;
3376 
3377 							case EG_GENE:
3378 								temp |= EXTRA_GENE;
3379 								break;
3380 							case EG_EST:
3381 								temp |= EXTRA_EST;
3382 								break;
3383 							case EG_MISC:
3384 								temp |= EXTRA_MISC;
3385 								break;
3386 							default:
3387 								break;
3388 						}
3389 					}
3390 					ufp = ufp->next;
3391 				}
3392 			}
3393 		}
3394 		uop = uop->next;
3395 	}
3396 	*extra = temp;
3397 }
3398 
3399 
3400 /*******************************************************************
3401 *
3402 *	ck_seqfeat_extra: check if there is  extra data, such as
3403 *	Genbank accessions assocated with a GeneRef or Medlines
3404 *	associated with a Seq-feat
3405 *
3406 *******************************************************************/
ck_seqfeat_extra(SeqFeatPtr sfp)3407 NLM_EXTERN Uint4 ck_seqfeat_extra(SeqFeatPtr sfp)
3408 {
3409 	GeneRefPtr grp;
3410 	ValNodePtr db;
3411 	DbtagPtr db_tag;
3412 	ValNodePtr cit;
3413 	ValNodePtr pub;
3414 	Boolean has_gb = FALSE, has_med = FALSE;
3415 	Uint4 extra_data = 0;
3416 
3417 
3418 	if(sfp->data.choice == 1)
3419 	{
3420 		grp = sfp->data.value.ptrvalue;
3421 
3422 		for(db = grp->db; db!=NULL; db = db->next)
3423 		{
3424 			db_tag = db->data.ptrvalue;
3425 			if(StringICmp(db_tag->db, "GenBank") == 0)
3426 			{
3427 				extra_data |= EXTRA_GENBANK;
3428 				break;
3429 			}
3430 		}
3431 	}
3432 	for(cit = sfp->cit; !has_med && cit!=NULL; cit = cit->next)
3433 	{
3434 		if(cit->choice == 3)
3435 			has_med = TRUE;
3436 		if(cit->choice ==1)
3437 		{
3438 			pub = (ValNodePtr)(cit->data.ptrvalue);
3439 			while(pub)
3440 			{
3441 				if(pub->choice == PUB_Muid)
3442 				{
3443 					has_med = TRUE;
3444 					break;
3445 				}
3446 				pub = pub->next;
3447 			}
3448 		}
3449 	}
3450 
3451 	if(has_med)
3452 		extra_data |= EXTRA_MEDLINE;
3453 	return extra_data;
3454 }
3455 
3456 
3457 
3458 /******************************************************************
3459 *
3460 *	get_bin_order(sfp)
3461 *	get the 1000:1 bin data()
3462 *
3463 *******************************************************************/
get_bin_order(SeqFeatPtr sfp)3464 static Uint2 get_bin_order(SeqFeatPtr sfp)
3465 {
3466 	GeneRefPtr grp;
3467 	ValNodePtr db;
3468 	DbtagPtr db_tag;
3469 	ObjectIdPtr oip;
3470 
3471 	if(sfp->data.choice != 1)
3472 		return 0;
3473 	grp = sfp->data.value.ptrvalue;
3474 	if(grp == NULL)
3475 		return 0;
3476 
3477 	for(db = grp->db; db != NULL; db = db->next)
3478 	{
3479 		db_tag = db->data.ptrvalue;
3480 		if(db_tag != NULL && StringCmp(db_tag->db, "1000:1 Bin") ==0)
3481 		{
3482 			oip = db_tag->tag;
3483 			return (Uint2)(oip->id);
3484 		}
3485 	}
3486 
3487 	return 0;
3488 }
3489 
3490 
load_annot_name(SeqAnnotPtr annot,CharPtr annot_db)3491 static Boolean load_annot_name(SeqAnnotPtr annot, CharPtr annot_db)
3492 {
3493 	ValNodePtr desc;
3494 	CharPtr name, title;
3495 	Int4 len;
3496 
3497 	annot_db[0] = '\0';
3498 	if(annot == NULL)
3499 		return FALSE;
3500 	name = NULL;
3501 	title = NULL;
3502 	for(desc = annot->desc; desc != NULL; desc = desc->next)
3503 	{
3504 		if(desc->choice == Annot_descr_name)
3505 		{
3506 			if(name == NULL)
3507 				name = (CharPtr)(desc->data.ptrvalue);
3508 		}
3509 		if(desc->choice == Annot_descr_title)
3510 		{
3511 			if(title == NULL)
3512 				title = (CharPtr)(desc->data.ptrvalue);
3513 		}
3514 	}
3515 
3516 	if(name != NULL)
3517 		StringNCpy_0(annot_db, name, 20);
3518 	len = StringLen(annot_db);
3519 	if(title != NULL && len < 19)
3520 	{
3521 		StringCat(annot_db, ":");
3522 		++len;
3523 		StringNCpy_0(annot_db+len, title, 20-len);
3524 	}
3525 
3526 	return (annot_db[0] != '\0');
3527 }
3528 
3529 
check_feature_for_landmark(CharPtr label,GeneDataPtr gdata,SeqFeatPtr sfp,GatherContextPtr gcp,Uint2 priority)3530 static Boolean check_feature_for_landmark(CharPtr label, GeneDataPtr gdata, SeqFeatPtr sfp, GatherContextPtr gcp, Uint2 priority)
3531 {
3532 	Boolean found;
3533 	GeneDataPtr c_gdp;
3534 
3535 	if(gdata == NULL || sfp == NULL)
3536 		return FALSE;
3537 	found = FALSE;
3538 	c_gdp = NULL;
3539 	if(label[0] != '\0')
3540 	{
3541 		for(c_gdp = gdata; c_gdp != NULL; c_gdp = c_gdp->next)
3542 		{
3543 			if(StringICmp(c_gdp->symbol, label) == 0)
3544 			{
3545 				found = TRUE;
3546 				break;
3547 			}
3548 		}
3549 	}
3550 
3551 	if(!found && sfp->data.choice == 1)
3552 	{
3553 		for(c_gdp = gdata; c_gdp != NULL; c_gdp = c_gdp->next)
3554 		{
3555 			if(check_landmark(sfp, c_gdp->symbol))
3556 			{
3557 				found = TRUE;
3558 				break;
3559 			}
3560 		}
3561 	}
3562 
3563 	if(!found)
3564 		return FALSE;
3565 
3566 	if(c_gdp->priority == 0 || priority < c_gdp->priority)
3567 	{
3568 		c_gdp->entityID = gcp->entityID;
3569 		c_gdp->itemID = gcp->itemID;
3570 		c_gdp->itemType = gcp->thistype;
3571 		c_gdp->priority = priority;
3572 	}
3573 
3574 	StringCpy(label, c_gdp->symbol);
3575 	return TRUE;
3576 }
3577 
3578 
3579 
3580 /*******************************************************************
3581 *
3582 *	collseqfunc( )
3583 *	callback function for collecting sequence related data in
3584 *	gather, such as segments, features
3585 *
3586 *******************************************************************/
collseqfunc(GatherContextPtr gcp)3587 static Boolean collseqfunc(GatherContextPtr gcp)
3588 {
3589 	SeqFeatPtr sfp;
3590 	SeqLocPtr slp;
3591 	BioseqPtr bsp;
3592 	CollectHeaderPtr chp;
3593 	FeatNodePtr fnp;
3594 	CollectSeqOptionPtr csop;
3595 	ObjMgrTypePtr   omtp;
3596 
3597 	UserObjectPtr uop;
3598 	UserFieldPtr ufp;
3599 	GatherRangePtr grp;
3600 	IvalNodePtr inp;
3601 	Uint1 band;
3602 	Int2 i;
3603 	Int2 label_size;
3604 	ValNodePtr delta_node;
3605 	Boolean is_gap;
3606 	SeqLitPtr slitp;
3607 
3608 
3609 	chp= (CollectHeaderPtr)(gcp->userdata);
3610 	chp->subtype = 0;
3611 	chp->thislabel[0] = '\0';
3612 	chp->ftype [0] = '\0';
3613 	csop = chp->csop;
3614 	label_size = MIN(100, (Int2)(csop->label_size));
3615 
3616 
3617 	switch (gcp->thistype)
3618 	{
3619 		case OBJ_SEQANNOT:	/*for the cytogenetic map, skip certain
3620 						Seq-annot*/
3621 			chp->annotDB[0] = '\0';
3622 			chp->is_lod_score = is_lod_score_annot((SeqAnnotPtr)(gcp->thisitem));
3623 			load_annot_name((SeqAnnotPtr)(gcp->thisitem), chp->annotDB);
3624 			if(csop->bsp_type == CYTO_MAP)
3625 			{
3626 				if(!annot_is_user_defined((SeqAnnotPtr)(gcp->thisitem)))
3627 					chp->skip_feature = TRUE;
3628 				else
3629 					chp->skip_feature = FALSE;
3630 			}
3631 			break;
3632 
3633 		case OBJ_BIOSEQ_SEG:
3634 			slp = (SeqLocPtr)(gcp->thisitem);
3635 			if(is_map_segment(slp))	/*not very reliable*/
3636 				return TRUE;
3637 			if(chp->maybe_mapid != NULL)
3638 				if(SeqIdMatch(chp->maybe_mapid, SeqLocId(slp)))
3639 					return TRUE;
3640 			fnp = CreateFeatNode (&(chp->features), &(chp->prev_feat), gcp->thistype, gcp->itemID, gcp->entityID, 0);
3641 			MemCopy(&(fnp->extremes), &(gcp->extremes), sizeof(GatherRange));
3642 			if(slp->choice == SEQLOC_NULL || slp->choice == SEQLOC_EMPTY)
3643 				fnp->follower = TRUE;	/*used to present the empty Seq-loc*/
3644 			else
3645 			{
3646 				if(MuskSeqIdWrite (SeqLocId(slp), chp->thislabel, label_size, csop->slabel_format, TRUE, TRUE))
3647 					fnp->label = StringSave(chp->thislabel);
3648 			}
3649 			break;
3650 		case OBJ_BIOSEQ_DELTA:
3651 			delta_node = (ValNodePtr)(gcp->thisitem);
3652 			is_gap = FALSE;
3653 			chp->thislabel[0] = '\0';
3654 			if(delta_node->choice ==1)
3655 			{
3656 				slp = delta_node->data.ptrvalue;
3657 				if(slp->choice == SEQLOC_NULL || slp->choice == SEQLOC_EMPTY)
3658 					is_gap = TRUE;
3659 				else
3660 					MuskSeqIdWrite (SeqLocId(slp), chp->thislabel, label_size, csop->slabel_format, TRUE, TRUE);
3661 
3662 			}
3663 			else
3664 			{
3665 				slitp = delta_node->data.ptrvalue;
3666 				if(slitp->length == 0 || slitp->seq_data == NULL)
3667 				{
3668 					is_gap = TRUE;
3669 					if(slitp->length > 0)
3670 						return TRUE;
3671 				}
3672 			}
3673 
3674 			fnp = CreateFeatNode (&(chp->features), &(chp->prev_feat), gcp->thistype, gcp->itemID, gcp
3675 ->entityID, 0);
3676 			MemCopy(&(fnp->extremes), &(gcp->extremes), sizeof(GatherRange));
3677 			if(is_gap)
3678 				fnp->follower = TRUE;
3679 			else if(chp->thislabel[0] != '\0')
3680 				fnp->label = StringSave(chp->thislabel);
3681 			break;
3682 
3683 		case OBJ_BIOSEQ:
3684 			fnp = CreateFeatNode (&(chp->features), &(chp->prev_feat), gcp->thistype, gcp->itemID, gcp->entityID, 0);
3685 			MemCopy(&(fnp->extremes), &(gcp->extremes), sizeof(GatherRange));
3686 			bsp = (BioseqPtr) gcp->thisitem;
3687 			if(MuskSeqIdWrite(bsp->id, chp->thislabel, label_size, csop->slabel_format, TRUE, FALSE))
3688 				fnp->label = StringSave(chp->thislabel);
3689 			break;
3690 
3691 		case OBJ_BIOSEQ_MAPFEAT:
3692 		case OBJ_SEQFEAT:
3693 			if(gcp->thistype == OBJ_SEQFEAT && chp->skip_feature)
3694 				return TRUE;
3695 			sfp = (SeqFeatPtr) gcp->thisitem;
3696 			/* if(gcp->thistype == OBJ_SEQFEAT)
3697 			{
3698 				if(chp->filter_level == gcp->seglevel +1)
3699 					return TRUE;
3700 			} */
3701 			omtp = ObjMgrTypeFind (chp->omp, OBJ_SEQFEAT, NULL, NULL);
3702 			if(omtp == NULL)
3703 				return TRUE;
3704 			if (omtp->subtypefunc != NULL)
3705 				chp->subtype = (*(omtp->subtypefunc)) (gcp->thisitem);
3706 			else
3707 				chp->subtype = 0;
3708 			if(gcp->thistype == OBJ_SEQFEAT &&
3709 				csop->features[chp->subtype] == 0)	/*filter unwanted features*/
3710 				return TRUE;
3711 
3712 			/*tolerate the unknown band*/
3713 			/*
3714 			if(sfp->data.choice == 14 && gcp->thistype == OBJ_BIOSEQ_MAPFEAT)
3715 			{
3716 				uop = sfp->data.value.ptrvalue;
3717 				band = get_band_type(uop);
3718 				if(band == 0)
3719 					return TRUE;
3720 			}
3721 			*/
3722 			fnp = CreateFeatNode (&(chp->features), &(chp->prev_feat), gcp->thistype, gcp->itemID, gcp->entityID, chp->subtype);
3723 
3724 			/*special collection for the LOD scores*/
3725 			if(chp->is_lod_score && gcp->thistype == OBJ_SEQFEAT)	/*it is the LOD score data*/
3726 			{
3727 				fnp->extra_data = EXTRA_LOD_SCORE;
3728 				fnp->bin_order = GetLODScoreBitValue(sfp);
3729 				StringCpy(fnp->annotDB, chp->annotDB);
3730 				MemCopy(&(fnp->extremes), &(gcp->extremes), sizeof(GatherRange));
3731 				return TRUE;
3732 			}
3733 
3734 			fnp->has_product = (sfp->product !=NULL);
3735 			fnp->extra_data = ck_seqfeat_extra(sfp);	/*extra data associated with a Gene-ref*/
3736 			get_mapmarker_info(sfp->ext, &(fnp->extra_data), &(fnp->bin_order));
3737 			if(fnp->bin_order == 0)	/*just as a backup*/
3738 				fnp->bin_order = get_bin_order(sfp);
3739 			if(gcp->thistype == OBJ_SEQFEAT && chp->annotDB[0] != '\0')
3740 				StringCpy(fnp->annotDB, chp->annotDB);
3741 			else
3742 				fnp->annotDB[0]= '\0';
3743 			MemCopy(&(fnp->extremes), &(gcp->extremes), sizeof(GatherRange));
3744 
3745 			/*special collection for cytogenetic band*/
3746 			if(sfp->data.choice == 14 && gcp->thistype == OBJ_BIOSEQ_MAPFEAT)
3747 			{
3748 				uop = sfp->data.value.ptrvalue;
3749 				band = get_band_type(uop);
3750 				fnp->band = band;
3751 				fnp->label = StringSave(get_band_name(uop));
3752 				if(band < BAND_POINT)	/*for flybase*/
3753 				{
3754 					for(ufp = uop->data; ufp!=NULL; ufp=ufp->next)
3755 					{
3756 						if(is_label_match(ufp->label, "Subdivision"))
3757 							fnp->pos_label = StringSave(ufp->data.ptrvalue);
3758 					}
3759 				}
3760 			}
3761 			else	/*for non-cyto band*/
3762 			{
3763 				/* if((gcp->thistype == OBJ_BIOSEQ_MAPFEAT) || collect_feature_label(csop->flabel_format[chp->subtype])) */
3764 				/* check the landmark genes*/
3765 
3766 				if(collect_feature_label(csop->flabel_format[chp->subtype]))
3767 				{
3768 					if(omtp->labelfunc !=NULL)
3769 						(*(omtp->labelfunc)) (gcp->thisitem, chp->thislabel, label_size, OM_LABEL_CONTENT);
3770 						fnp->landmark = check_feature_for_landmark(chp->thislabel, chp->gdata, sfp, gcp, chp->priority);
3771 					if(fnp->landmark == FALSE && omtp->labelfunc != NULL &&
3772 						csop->flabel_format[chp->subtype] != OM_LABEL_CONTENT)
3773 						(*(omtp->labelfunc)) (gcp->thisitem, chp->thislabel, label_size, csop->flabel_format[chp->subtype]);
3774 
3775 				}
3776 
3777 				if(chp->thislabel[0] != '\0')
3778 					fnp->label = StringSave(chp->thislabel);
3779 				slp = sfp->location;	/*collect the intervals*/
3780 				if(slp->choice == SEQLOC_PACKED_PNT || csop->nointerval == FALSE)
3781 				{
3782 					grp = gcp->rdp;
3783 					for(i=0; (grp!=NULL) && i<gcp->num_interval; ++i)
3784 					{
3785 						inp = MemNew(sizeof(IvalNode));
3786 						MemCopy(&(inp->gr), grp, sizeof(GatherRange));
3787 						ValNodeAddPointer(&(fnp->interval), (Uint1)(i+1), (Pointer)inp);
3788 						++grp;
3789 					}
3790 				}
3791 			}
3792 			break;
3793 		default:
3794 			break;
3795 	}
3796 
3797 	return TRUE;
3798 }
3799 
3800 
ignore_feature(BoolPtr f_list)3801 static Boolean ignore_feature(BoolPtr f_list)
3802 {
3803 	Int2 i;
3804 
3805 	if(f_list == NULL)
3806 		return TRUE;
3807 	for(i = 0; i<FEATDEF_ANY; ++i)
3808 		if(f_list[i])
3809 			return FALSE;
3810 	return TRUE;
3811 }
3812 
3813 /***********************************************************************
3814 *
3815 *	CollectItemForSeqLoc(slp, entityID, left, is_aa, csop)
3816 *	Collect sequences, features for a Seq-loc
3817 *	slp: the target Seq-loc
3818 *	entityID: the top level entityID for the current sequence
3819 *	left: the left offset on the graph
3820 *	is_aa: if TRUE, set get_feats_product flag to TRUE
3821 *	csop: the collection option
3822 *
3823 *
3824 ***********************************************************************/
CollectItemForSeqLocEx(SeqLocPtr slp,Uint2 entityID,Int4 left,Boolean is_aa,SeqIdPtr maybe_mapid,CollectSeqOptionPtr csop,GeneDataPtr gdata,Uint2 priority,Boolean forceSeglevelsTo1)3825 NLM_EXTERN ValNodePtr CollectItemForSeqLocEx(SeqLocPtr slp, Uint2 entityID, Int4 left, Boolean is_aa, SeqIdPtr maybe_mapid, CollectSeqOptionPtr csop, GeneDataPtr gdata, Uint2 priority, Boolean forceSeglevelsTo1)
3826 {
3827 	GatherScope gs;
3828 	CollectHeader ch;
3829 
3830 	if(slp == NULL || entityID == 0 || csop == NULL)
3831 		return NULL;
3832 	ch.omp = ObjMgrGet();	/*set up the options*/
3833 	ch.features = NULL;
3834 	ch.csop = csop;
3835 	ch.maybe_mapid = maybe_mapid;
3836 	ch.filter_level = csop->filter_level;
3837 	ch.gdata = gdata;
3838 	ch.priority = priority;
3839 	ch.skip_feature = FALSE;
3840 	ch.prev_feat = NULL;
3841 	ch.prev_align = NULL;
3842 	ch.index = 0;
3843 	ch.annotDB[0] = '\0';
3844 	ch.is_lod_score = FALSE;
3845 
3846 
3847 	MemSet((Pointer)&gs, 0, sizeof (GatherScope));
3848 	gs.get_feats_location = TRUE;
3849 	gs.get_feats_product = is_aa;
3850 	MemSet((Pointer)(gs.ignore), (int)TRUE, (size_t)(OBJ_MAX)*sizeof(Boolean));
3851 
3852 	gs.ignore[OBJ_SEQENTRY] = FALSE;
3853 	gs.ignore[OBJ_BIOSEQ_SEG] = FALSE;
3854 	gs.ignore[OBJ_BIOSEQ] = FALSE;
3855 	gs.ignore[OBJ_BIOSEQ_MAPFEAT] = FALSE;
3856 	gs.ignore[OBJ_BIOSEQ_DELTA] = FALSE;
3857 	if(!ignore_feature(csop->features))
3858 	{
3859 		gs.ignore[OBJ_SEQANNOT] = FALSE;
3860 		gs.ignore[OBJ_SEQFEAT] = FALSE;
3861 	}
3862 
3863 	gs.nointervals = csop->nointerval;
3864 	gs.seglevels = csop->seglevels;
3865 	/*gs.stop_on_annot = TRUE;*/
3866 	if(gs.seglevels == 0)
3867 	{
3868 		gs.ignore_top = FALSE;
3869 		gs.stop_on_annot = FALSE;
3870 		/* gs.ignore_top = FALSE;
3871 		gs.stop_on_annot = TRUE; */
3872 	}
3873 	else if (forceSeglevelsTo1)
3874 	{
3875 		gs.ignore_top = TRUE; /* JK */
3876 		gs.stop_on_annot = FALSE; /* JK */
3877 	}
3878 	else
3879 	{
3880 		gs.ignore_top = FALSE;
3881 		gs.stop_on_annot = TRUE;
3882 	}
3883 	gs.currlevel = 0;
3884 	gs.split_packed_pnt = TRUE;
3885 
3886 
3887 	for(; slp!= NULL; slp = slp->next)
3888 	{
3889 		gs.offset = left;
3890 		gs.target = slp;
3891 		GatherEntity(entityID, (Pointer)(&ch), collseqfunc, &gs);
3892 		left += SeqLocLen(slp);
3893 	}
3894 	return ch.features;
3895 }
3896 
CollectItemForSeqLoc(SeqLocPtr slp,Uint2 entityID,Int4 left,Boolean is_aa,SeqIdPtr maybe_mapid,CollectSeqOptionPtr csop,GeneDataPtr gdata,Uint2 priority)3897 NLM_EXTERN ValNodePtr CollectItemForSeqLoc(SeqLocPtr slp, Uint2 entityID, Int4 left, Boolean is_aa, SeqIdPtr maybe_mapid, CollectSeqOptionPtr csop, GeneDataPtr gdata, Uint2 priority)
3898 {
3899 	return CollectItemForSeqLocEx(slp, entityID, left, is_aa, maybe_mapid, csop, gdata, priority, FALSE);
3900 }
3901 
is_segmap_align_annot(SeqAnnotPtr annot)3902 static Uint1 is_segmap_align_annot(SeqAnnotPtr annot)
3903 {
3904 	UserObjectPtr uop;
3905 	ValNodePtr desc;
3906 	ObjectIdPtr oip;
3907 	UserFieldPtr ufp;
3908 
3909 	if(annot == NULL)
3910 		return 0;
3911 	if(annot->type != 2)
3912 		return 0;
3913 	if(is_annot_for_hist_alignment(annot))
3914 		return 0;
3915 
3916 	desc =annot->desc;
3917 	while(desc)
3918 	{
3919 		if(desc->choice == Annot_descr_user)
3920 		{
3921 			uop = desc->data.ptrvalue;
3922 			if(uop->type)
3923 			{
3924 				oip = uop->type;
3925 				if(StringCmp(oip->str, "SegMap STS Alignment") == 0)
3926 				{
3927 					ufp = uop->data;
3928 					if(ufp && ufp->choice == 2)
3929 						return (Uint1)(ufp->data.intvalue);
3930 				}
3931 			}
3932 		}
3933 		desc = desc->next;
3934 	}
3935 	return 0;
3936 }
3937 
create_gr_data(SeqAlignPtr align,SeqLocPtr m_loc,Int4 m_left)3938 static GatherRangePtr create_gr_data(SeqAlignPtr align, SeqLocPtr m_loc, Int4 m_left)
3939 {
3940 	Int2 i, num;
3941 	SeqAlignPtr curr;
3942 	GatherRangePtr grp;
3943 
3944 	for(num = 0, curr = align; curr!= NULL; curr = curr->next)
3945 		++num;
3946 	if( num == 0)
3947 		return NULL;
3948 
3949 	grp = MemNew((size_t)num * sizeof(GatherRange));
3950 	for(i =0, curr = align; curr != NULL; curr = curr->next, ++i)
3951 	{
3952 		if(!SeqLocOffset (m_loc, curr->bounds, &(grp[i]), m_left))
3953 		{
3954 			grp[i].left = -1;
3955 			grp[i].right = -1;
3956 		}
3957 	}
3958 	return grp;
3959 }
3960 
add_int_with_order(ValNodePtr PNTR head,Uint1 type,Int4 pos)3961 static void add_int_with_order(ValNodePtr PNTR head, Uint1 type, Int4 pos)
3962 {
3963 	ValNodePtr prev, curr;
3964 	ValNodePtr cnew;
3965 
3966 	if(*head == NULL)
3967 		ValNodeAddInt(head, type, pos);
3968 	else
3969 	{
3970 		prev = NULL;
3971 		curr = *head;
3972 		cnew = ValNodeNew(NULL);
3973 		cnew->choice = type;
3974 		cnew->data.intvalue = pos;
3975 		while(curr)
3976 		{
3977 			if(pos < curr->data.intvalue)
3978 			{
3979 				if(prev == NULL)
3980 					*head = cnew;
3981 				else
3982 					prev->next = cnew;
3983 				cnew->next = curr;
3984 				return;
3985 			}
3986 
3987 			prev = curr;
3988 			curr = curr->next;
3989 		}
3990 		if(prev != NULL)
3991 			prev->next = cnew;
3992 	}
3993 }
3994 
3995 
3996 
load_open_close_sts_mark(SeqAlignPtr halign,ValNodePtr anp_list,Uint1 annot_type,SeqLocPtr m_loc,Int4 m_left)3997 static void load_open_close_sts_mark(SeqAlignPtr halign, ValNodePtr anp_list, Uint1 annot_type, SeqLocPtr m_loc, Int4 m_left)
3998 {
3999 	SeqAlignPtr align;
4000 	AlignNodePtr anp;
4001 	AlignSegPtr asp;
4002 	SeqIdPtr sip;
4003 	SeqLocPtr slp;
4004 	GatherRange gr;
4005 	GatherRangePtr grp;
4006 	Int4 start, stop;
4007 	Uint1 strand;
4008 	SeqInt sint;
4009 	SeqLoc sl;
4010 	Boolean collected = FALSE;
4011 	StdSegPtr ssp;
4012 	Int4 e_left, e_right;
4013 	Int2 i;
4014 
4015 	grp = create_gr_data(halign, m_loc, m_left);
4016 	if(grp == NULL)
4017 		return;
4018 	while(anp_list)
4019 	{
4020 		if(anp_list->choice != OBJ_SEQANNOT)
4021 		{
4022 			anp = anp_list->data.ptrvalue;
4023 			sip = anp->sip;
4024 			if(!SeqIdForSameBioseq(sip, SeqLocId(m_loc)))
4025 			{
4026 			if(anp->seqpos < 0)	/*minus strand*/
4027 			{
4028 				stop = ABS(anp->seqpos);
4029 				start = stop - (anp->extremes.right - anp->extremes.left);
4030 				strand = Seq_strand_minus;
4031 			}
4032 			else
4033 			{
4034 				start = anp->seqpos;
4035 				stop = start + (anp->extremes.right - anp->extremes.left);
4036 				strand = Seq_strand_plus;
4037 			}
4038 
4039 			sint.from = start;
4040 			sint.to = stop;
4041 			sint.strand = strand;
4042 			sint.id = sip;
4043 			sl.choice = SEQLOC_INT;
4044 			sl.data.ptrvalue = &sint;
4045 			sl.next = NULL;
4046 
4047 			e_left = anp->extremes.left;
4048 			e_right = anp->extremes.right;
4049 			asp = anp->segs;
4050 			for(align = halign, i=0; align != NULL; align = align->next, ++i)
4051 			{
4052 				if(grp[i].left != -1)
4053 				{
4054 					if(!(e_left > grp[i].right || e_right < grp[i].left))
4055 					{
4056 						for(ssp = align->segs; ssp != NULL; ssp = ssp->next)
4057 						{
4058 							/*slp = ssp->loc->next;*/
4059 							slp = ssp->loc;
4060 							while(slp)
4061 							{
4062 								if(SeqIdMatch(SeqLocId(slp), sip))
4063 									break;
4064 								else
4065 									slp = slp->next;
4066 							}
4067 
4068 							if(slp != NULL)
4069 							{
4070 								if(SeqLocOffset (&sl, slp, &gr, e_left))
4071 								{
4072 									if(asp == NULL)
4073 									{
4074 										asp = MemNew(sizeof(AlignSeg));
4075 										MemCopy(&(asp->gr), &(anp->extremes), sizeof(GatherRange));
4076 										asp->type = REG_SEG;
4077 										anp->segs = asp;
4078 									}
4079 
4080 									/*ValNodeAddInt(&(asp->mismatch), annot_type, gr.left);*/
4081 									add_int_with_order(&(asp->mismatch), annot_type, gr.left);
4082 									collected = TRUE;
4083 								}
4084 							}
4085 
4086 						}
4087 					}
4088 					/*if(grp[i].left> e_right)
4089 						break;*/
4090 				}
4091 			}
4092 			}
4093 		}
4094 		anp_list = anp_list->next;
4095 	}
4096 	MemFree(grp);
4097 
4098 }
4099 
4100 
4101 typedef struct segmap_data{
4102 	ValNodePtr anp_list;
4103 	SeqLocPtr m_loc;
4104 	Int4 left;
4105 }SegMapData, PNTR SegMapDataPtr;
4106 
coll_segmap_func(GatherContextPtr gcp)4107 static Boolean coll_segmap_func(GatherContextPtr gcp)
4108 {
4109 	ValNodePtr anp_list;
4110 	SeqAnnotPtr annot;
4111 	Uint1 annot_type;
4112 	SeqAlignPtr align;
4113 	SegMapDataPtr smdp;
4114 
4115 	smdp = (SegMapDataPtr)(gcp->userdata);
4116 	if(smdp == NULL || smdp->anp_list == NULL || smdp->m_loc == NULL)
4117 		return FALSE;
4118 	anp_list = smdp->anp_list;
4119 
4120 	annot = (SeqAnnotPtr)(gcp->thisitem);
4121 	if(annot == NULL || annot->type != 2)
4122 		return TRUE;
4123 	annot_type = is_segmap_align_annot(annot);
4124 	if(annot_type == 0)
4125 		return TRUE;
4126 	align = annot->data;
4127 	load_open_close_sts_mark(align, anp_list, annot_type, smdp->m_loc, smdp->left);
4128 
4129 	return TRUE;
4130 }
4131 
4132 
4133 /*******************************************************************
4134 *
4135 *	void CollectSegMapSTSAlign( entityID, anp_list)
4136 *	look for the sts alignment from segmap stored as Seq-annot in
4137 *	in entityID. Add the alignment as the mismatch marker in the
4138 *	AlignSeg of the anp_list
4139 *
4140 *******************************************************************/
CollectSegMapSTSAlign(Uint2 entityID,ValNodePtr anp_list,SeqLocPtr m_loc,Int4 m_left)4141 NLM_EXTERN void CollectSegMapSTSAlign( Uint2 entityID, ValNodePtr anp_list, SeqLocPtr m_loc, Int4 m_left)
4142 {
4143 	GatherScope gs;
4144 	SegMapData smd;
4145 
4146 	if(entityID == 0 || anp_list == NULL)
4147 		return;
4148 
4149 	MemSet((Pointer)&gs, 0, sizeof (GatherScope));
4150 	MemSet((Pointer)(gs.ignore), (int)TRUE, (size_t)(OBJ_MAX)*sizeof(Boolean));
4151 	gs.ignore[OBJ_SEQANNOT] = FALSE;
4152 
4153 	smd.anp_list = anp_list;
4154 	smd.m_loc = m_loc;
4155 	smd.left = m_left;
4156 	GatherEntity(entityID, (Pointer)(&smd), coll_segmap_func, &gs);
4157 }
4158 
4159 
4160 /*#####################################################################
4161 #
4162 #	functions related to the layout for FeatNode
4163 #
4164 ######################################################################*/
4165 
4166 
4167 
4168 /***********************************************************************
4169 *  SortFeatNode(list)
4170 *	HeapSort the FeatNode list to the accending order of fnp->left
4171 *	return the head of the sorted list
4172 *
4173 ************************************************************************/
4174 static Uint1Ptr featureSortOrder;
4175 static Uint1Ptr groupSortOrder;
4176 
FeatNodeIntervalCompare(ValNodePtr vnp1,ValNodePtr vnp2)4177 static int FeatNodeIntervalCompare (ValNodePtr vnp1, ValNodePtr vnp2)
4178 
4179 {
4180   GatherRangePtr  grp1, grp2;
4181 
4182   while (vnp1 != NULL && vnp2 != NULL) {
4183     grp1 = (GatherRangePtr) vnp1->data.ptrvalue;
4184     grp2 = (GatherRangePtr) vnp2->data.ptrvalue;
4185     if (grp1 != NULL && grp2 != NULL) {
4186       /*
4187       if (grp1->left > grp2->left) {
4188         return 1;
4189       } else if (grp1->left < grp2->left) {
4190         return -1;
4191       } else if (grp1->right > grp2->right) {
4192         return 1;
4193       } else if (grp1->right < grp2->right) {
4194         return -1;
4195       }
4196       */
4197       if (grp1->left > grp2->left) {
4198         return -1;
4199       } else if (grp1->left < grp2->left) {
4200         return 1;
4201       } else if (grp1->right > grp2->right) {
4202         return -1;
4203       } else if (grp1->right < grp2->right) {
4204         return 1;
4205       }
4206     }
4207     vnp1 = vnp1->next;
4208     vnp2 = vnp2->next;
4209   }
4210   if (vnp1 != NULL) {
4211     return -1;
4212   } else if (vnp2 != NULL) {
4213     return 1;
4214   } else {
4215     return 0;
4216   }
4217 }
4218 
FeatNodeCompProc(VoidPtr ptr1,VoidPtr ptr2)4219 static int LIBCALLBACK FeatNodeCompProc (VoidPtr ptr1, VoidPtr ptr2)
4220 {
4221   FeatNodePtr  fnp1;
4222   FeatNodePtr  fnp2;
4223   ValNodePtr   vnp1;
4224   ValNodePtr   vnp2;
4225   GatherRange	gr1, gr2;
4226   Uint1 group1, group2;
4227   Uint1 order1, order2;
4228   int rsult;
4229 
4230   if (ptr1 != NULL && ptr2 != NULL) {
4231     vnp1 = *((ValNodePtr PNTR) ptr1);
4232     vnp2 = *((ValNodePtr PNTR) ptr2);
4233     if (vnp1 != NULL && vnp2 != NULL) {
4234       fnp1 = (FeatNodePtr) vnp1->data.ptrvalue;
4235       fnp2 = (FeatNodePtr) vnp2->data.ptrvalue;
4236       if (fnp1 != NULL && fnp2 != NULL) {
4237 	gr1 = fnp1->extremes;
4238 	gr2 = fnp2->extremes;
4239         /*
4240         if (gr1.left > gr2.left) {
4241           return 1;
4242         } else if (gr1.left < gr2.left) {
4243           return -1;
4244         } else if (gr1.right > gr2.right) {
4245           return 1;
4246         } else if (gr1.right < gr2.right) {
4247           return -1;
4248         } else */
4249         if ((rsult = FeatNodeIntervalCompare (fnp1->interval, fnp2->interval)) != 0) {
4250           return rsult;
4251         } else {
4252 	  if(featureSortOrder == NULL || groupSortOrder == NULL)
4253           	return 0;
4254 	  else
4255 	  {
4256 		group1 = groupSortOrder[fnp1->feattype];
4257 		group2 = groupSortOrder[fnp2->feattype];
4258 		if(group1 !=group2)
4259 			return -1;
4260 		order1 = featureSortOrder[fnp1->feattype];
4261 		order2 = featureSortOrder[fnp2->feattype];
4262 		if(order1 < order2)
4263 			return -1;
4264 		if(order1 > order2)
4265 			return 1;
4266 		return 0;
4267 	  }
4268         }
4269       } else {
4270         return 0;
4271       }
4272     } else {
4273       return 0;
4274     }
4275   } else {
4276     return 0;
4277   }
4278 }
4279 
4280 
4281 
4282 
4283 /***********************************************************************
4284 *
4285 *	SortFeatNode(list)
4286 *	sort a list of FeatNode to the ascending order of (extremes.left,
4287 *	extremes.right)
4288 *
4289 **********************************************************************/
4290 
SortFeatNode(ValNodePtr fnp_list,Uint1Ptr featureOrder,Uint1Ptr groupOrder)4291 NLM_EXTERN ValNodePtr SortFeatNode(ValNodePtr fnp_list, Uint1Ptr featureOrder, Uint1Ptr groupOrder)
4292 {
4293 	featureSortOrder = featureOrder;
4294 	groupSortOrder = groupOrder;
4295 	return SortValNode(fnp_list, FeatNodeCompProc);
4296 }
4297 
4298 
AlignNodeCompProc(VoidPtr ptr1,VoidPtr ptr2)4299 static int LIBCALLBACK AlignNodeCompProc (VoidPtr ptr1, VoidPtr ptr2)
4300 {
4301   AlignNodePtr  anp1, anp2;
4302   ValNodePtr   vnp1;
4303   ValNodePtr   vnp2;
4304   GatherRange	gr1, gr2;
4305 
4306   if (ptr1 != NULL && ptr2 != NULL) {
4307     vnp1 = *((ValNodePtr PNTR) ptr1);
4308     vnp2 = *((ValNodePtr PNTR) ptr2);
4309     if (vnp1 != NULL && vnp2 != NULL) {
4310       anp1 = (AlignNodePtr) vnp1->data.ptrvalue;
4311       anp2 = (AlignNodePtr) vnp2->data.ptrvalue;
4312       if (anp1 != NULL && anp2 != NULL) {
4313 	gr1 = anp1->extremes;
4314 	gr2 = anp2->extremes;
4315 	/* len1 = anp1->extremes.right - anp1->extremes.left;
4316 	len2 = anp2->extremes.right - anp2->extremes.left;
4317         if(len1 > len2)
4318           return -1;
4319         else if(len1 < len2)
4320           return 1; */
4321 
4322         if (gr1.left > gr2.left) {
4323           return 1;
4324         } else if (gr1.left < gr2.left) {
4325           return -1;
4326         } else if (gr1.right < gr2.right) {
4327           return 1;
4328         } else if (gr1.right > gr2.right) {
4329           return -1;
4330         } else {
4331           return 0;
4332         }
4333       } else {
4334         return 0;
4335       }
4336     } else {
4337       return 0;
4338     }
4339   } else {
4340     return 0;
4341   }
4342 }
4343 
4344 /*sort to make the display showing the alignment that are
4345 * consistuent of the master sequence first
4346 */
modify_align_node_block(ValNodePtr anp_list)4347 static ValNodePtr modify_align_node_block(ValNodePtr anp_list)
4348 {
4349 	ValNodePtr block_list = NULL;
4350 	ValNodePtr curr, next, prev = NULL;
4351 	AlignNodePtr anp;
4352 
4353 	if(anp_list == NULL)
4354 		return NULL;
4355 
4356 	curr = anp_list;
4357 	while(curr)
4358 	{
4359 		next = curr->next;
4360 		anp = curr->data.ptrvalue;
4361 		if(anp->blocks != NULL)
4362 		{
4363 			if(prev == NULL)
4364 				anp_list = curr->next;
4365 			else
4366 				prev->next = curr->next;
4367 			curr->next = NULL;
4368 			ValNodeLink(&block_list, curr);
4369 		}
4370 		else
4371 			prev = curr;
4372 		curr = next;
4373 	}
4374 
4375 	if(block_list == NULL)
4376 		return anp_list;
4377 	else
4378 	{
4379 		ValNodeLink(&block_list, anp_list);
4380 		return block_list;
4381 	}
4382 }
4383 
4384 
4385 
4386 /***********************************************************************
4387 *
4388 *	SortAlignNode(anp_list)
4389 *	sort a list of AlignNode to the ascending order of (extremes.left,
4390 *	extremes.right)
4391 *
4392 **********************************************************************/
SortAlignNode(ValNodePtr anp_list)4393 NLM_EXTERN ValNodePtr SortAlignNode(ValNodePtr anp_list)
4394 {
4395 	ValNodePtr list, curr, prev, last, next;
4396 	ValNodePtr head;
4397 
4398 	if(anp_list == NULL)
4399 		return NULL;
4400 	list = anp_list;
4401 	prev = NULL;
4402 	head = NULL;
4403 	while(list != NULL)
4404 	{
4405 		if(prev != NULL)
4406 			prev->next = list;
4407 		while(list && list->choice == OBJ_SEQANNOT)
4408 		{
4409 			if(head == NULL)
4410 				head = list;
4411 			prev = list;
4412 			list = list->next;
4413 		}
4414 		if(list != NULL)
4415 		{
4416 			curr = list;
4417 			last = NULL;
4418 			while(curr && curr->choice != OBJ_SEQANNOT)
4419 			{
4420 				last = curr;
4421 				curr = curr->next;
4422 			}
4423 			next = last->next;
4424 			last->next = NULL;
4425 			list = SortValNode(list, AlignNodeCompProc);
4426 			list = modify_align_node_block(list);
4427 			if(prev == NULL)
4428 				head = 	list;
4429 			else
4430 				prev->next = list;
4431 			while(list->next != NULL)
4432 				list = list->next;
4433 			prev = list;
4434 			list = next;
4435 		}
4436 	}
4437 
4438 	return head;
4439 }
4440 
4441 
4442 
4443 /*#######################################################################
4444 #
4445 #	function related to Layout of AlignNode
4446 #
4447 ########################################################################*/
4448 
4449 
4450 /***********************************************************************
4451 *
4452 *	find_insert_ypos(left, seglen, ins, l_bound, r_bound, p_pos, space
4453 *	num)
4454 *	find the level for placing the insertions. Used in both the layout
4455 *	for text and graphic
4456 *	left: to store the left-most position calculated for an insertion
4457 *	seglen: length of the insertion
4458 *	ins: the position for insertions
4459 *	l_bound: the leftmost position in the current line
4460 *	r_bound: the rightmost position in the current line
4461 *	p_pos: position for storing all the layout info
4462 *	num: number of elements in p_pos
4463 *	return the current level found for an insertion
4464 *
4465 ***********************************************************************/
find_insert_ypos(Int4Ptr left,Int4 seglen,Int4 ins,Int4 l_bound,Int4 r_bound,Int4Ptr p_pos,Int4 space,Int2 num)4466 NLM_EXTERN Int2 find_insert_ypos(Int4Ptr left, Int4 seglen, Int4 ins, Int4 l_bound, Int4 r_bound, Int4Ptr p_pos, Int4 space, Int2 num)
4467 {
4468  	Int2 i =0;
4469 	Int4 start, stop;
4470 
4471 	--seglen;
4472 	*left = MAX(l_bound, (ins-seglen));
4473 	start = *left;
4474         for(i =0; i<num; ++i)
4475 	{
4476 		if(p_pos[i] == 0)
4477 		{
4478 			p_pos[i] = (*left + seglen);
4479              		return i;
4480 		}
4481 
4482 		if(ins > (p_pos[i]+space))
4483 		{
4484 			start = (*left);
4485 			start +=MAX(0, (seglen - (ins - (p_pos[i]+space))));
4486 			stop = start+seglen;
4487 			if(stop <= r_bound)
4488 			{
4489 				*left = start;
4490 				p_pos[i] = (*left) + seglen;
4491 				return i;
4492 			}
4493 		}
4494 
4495 
4496 	}
4497 
4498 	return -1;
4499 }
4500 
4501 
4502 
4503 /************************************************************************
4504 *
4505 *	convert_gdata_for_featnode(gdata, cyto_loc, offset)
4506 *	gdata: the GeneDataPtr
4507 *	cyto_loc: the current location on the cytogenetic map
4508 *	offset: the offset of cyto_loc to the graphic viewer1
4509 *	for human cytogenetic map, the markers are not shown. But for
4510 *	the markers that were queried, it will display the interval for
4511 *	gene data
4512 *
4513 ************************************************************************/
4514 
convert_gdata_to_featnode(GeneDataPtr gdata,SeqLocPtr cyto_loc,Int4 offset)4515 NLM_EXTERN ValNodePtr convert_gdata_to_featnode (GeneDataPtr gdata, SeqLocPtr cyto_loc, Int4 offset)
4516 {
4517 	ValNodePtr fnp_node;
4518 	ValNodePtr prev;
4519 	FeatNodePtr fnp;
4520 	SeqLocPtr slp;
4521 	SeqPntPtr spp;
4522 	Boolean mod_fuzz;	/*for the old style of FeatNode. To modify the
4523 					fuzziness on a point*/
4524 	IntFuzzPtr fuzz;
4525 	GatherRange gr;
4526 	SeqFeatPtr sfp;
4527 
4528 
4529 	if(gdata == NULL || cyto_loc == NULL)
4530 		return NULL;
4531 	fnp_node = NULL;
4532 	prev = NULL;
4533 	while(gdata)
4534 	{
4535 		slp = NULL;
4536 		sfp = gdata->sfp;
4537 		if(sfp != NULL && sfp->location != NULL)
4538 		{
4539 			mod_fuzz = FALSE;
4540 			if(sfp->location->choice == SEQLOC_PNT)
4541 			{
4542 				spp = sfp->location->data.ptrvalue;
4543 				if(spp->fuzz != NULL)
4544 				{
4545 					fuzz = spp->fuzz;
4546 					if(fuzz->choice == 2)	/*range */
4547 					{
4548 						mod_fuzz = TRUE;
4549 						slp = SeqLocIntNew(fuzz->b, fuzz->a, 0, spp->id);
4550 					}
4551 				}
4552 			}
4553 			if(!mod_fuzz)
4554 				slp = sfp->location;
4555 			if(SeqLocOffset(cyto_loc, slp, &gr, offset))
4556 			{
4557 				fnp = CreateFeatNode (&fnp_node, &prev, gdata->itemType, gdata->itemID, gdata->entityID, gdata->subtype);
4558 				MemCopy(&(fnp->extremes), &gr,  sizeof(GatherRange));
4559 				fnp->label = StringSave(gdata->symbol);
4560 				fnp->landmark = TRUE;
4561 				if(gdata->sfp != NULL)
4562 				{
4563 					fnp->extra_data = ck_seqfeat_extra(gdata->sfp);
4564 					get_mapmarker_info(gdata->sfp->ext, &(fnp->extra_data), &(fnp->bin_order));
4565 				}
4566 			}
4567 
4568 
4569 			if(mod_fuzz)
4570 				SeqLocFree(slp);
4571 		}
4572 		gdata = gdata->next;
4573 	}
4574 
4575 	return fnp_node;
4576 }
4577 
4578 
4579 /*
4580 *	for AlignNode that includes insertions, map the insertion
4581 *	to gaps on the master sequence
4582 */
4583 
4584 /*the data structure for storing the insertion information*/
4585 typedef struct insert_list {
4586 	Int4 max_size;
4587 	Int4 master_pos;	/*position on the master sequence*/
4588 	Boolean  used;	/* this position is acturally at an inserted segment*/
4589 	Boolean after;	/*does the insertion occurs after the master_pos*/
4590 	struct insert_list PNTR next;
4591 }InsertList, PNTR InsertListPtr;
4592 
4593 
load_insertion_list(InsertListPtr PNTR head,Int4 insert_pos,Int4 insert_size,Boolean after)4594 static void load_insertion_list(InsertListPtr PNTR head, Int4 insert_pos, Int4 insert_size, Boolean after)
4595 {
4596 	InsertListPtr curr, prev, ilp;
4597 
4598 	prev = NULL;
4599 	curr = *head;
4600 	while(curr)
4601 	{
4602 		if(curr->master_pos == insert_pos)
4603 		{
4604 			curr->max_size = MAX(curr->max_size, insert_size);
4605 			return;
4606 		}
4607 		if(curr->master_pos > insert_pos)
4608 			break;
4609 		else
4610 			prev = curr;
4611 		curr = curr->next;
4612 	}
4613 
4614 	ilp = MemNew(sizeof(InsertList));
4615 	ilp->max_size = insert_size;
4616 	ilp->master_pos = insert_pos;
4617 	ilp->next = curr;
4618 	ilp->after = after;
4619 
4620 	if(prev == NULL)
4621 		*head = ilp;
4622 	else
4623 		prev->next = ilp;
4624 }
4625 
add_offset_to_featnode(ValNodePtr fnp_node,Int4 offset)4626 static void add_offset_to_featnode(ValNodePtr fnp_node, Int4 offset)
4627 {
4628 	FeatNodePtr fnp;
4629 	ValNodePtr interval;
4630 	IvalNodePtr inp;
4631 
4632 	while(fnp_node)
4633 	{
4634 		fnp = fnp_node->data.ptrvalue;
4635 		fnp->extremes.left += offset;
4636 		fnp->extremes.right += offset;
4637 
4638 		for (interval = fnp->interval; interval != NULL; interval = interval->next)
4639 		{
4640 			inp = interval->data.ptrvalue;
4641 			inp->gr.left += offset;
4642 			inp->gr.right += offset;
4643 		}
4644 		fnp_node = fnp_node->next;
4645 	}
4646 }
4647 
AddOffsetToAlignNode(AlignNodePtr anp,Int4 offset)4648 NLM_EXTERN void AddOffsetToAlignNode(AlignNodePtr anp, Int4 offset)
4649 {
4650 	AlignSegPtr asp;
4651 	AlignBlockPtr abp;
4652 
4653 	anp->extremes.left += offset;
4654 	anp->extremes.right += offset;
4655 	for(abp = anp->blocks; abp != NULL; abp = abp->next)
4656 	{
4657 		abp->gr.left += offset;
4658 		abp->gr.right += offset;
4659 	}
4660 
4661 	for(asp = anp->segs; asp != NULL; asp = asp->next)
4662 	{
4663 		if(asp->type == INS_SEG)
4664 		{
4665 			asp->ins_pos += offset;
4666 			asp->gr.left += offset;
4667 		}
4668 		else
4669 		{
4670 			asp->gr.left += offset;
4671 			asp->gr.right += offset;
4672 		}
4673 		if(asp->cnp != NULL)
4674 			add_offset_to_featnode(asp->cnp, offset);
4675 	}
4676 }
4677 
split_feature_interval(ValNodePtr PNTR p_interval,Int4 offset,Int4 ins_pos,Int4 ins_size)4678 static ValNodePtr split_feature_interval(ValNodePtr PNTR p_interval, Int4 offset,
4679 										 Int4 ins_pos, Int4 ins_size)
4680 {
4681 	ValNodePtr interval, next, prev;
4682 	ValNodePtr second_list = NULL;
4683 	IvalNodePtr inp, new_inp;
4684 
4685 
4686 	prev = NULL;
4687 	interval = *p_interval;
4688 	while(interval != NULL)
4689 	{
4690 		next = interval->next;
4691 		inp = interval->data.ptrvalue;
4692 		if(inp->gr.right <= ins_pos)
4693 		{
4694 			inp->gr.left += offset;
4695 			inp->gr.right += offset;
4696 			prev = interval;
4697 		}
4698 		else if(inp->gr.left > ins_pos)
4699 		{
4700 			if(prev == NULL)
4701 				*p_interval = NULL;
4702 			else
4703 				prev->next = NULL;
4704 			return interval;
4705 		}
4706 		else
4707 		{	/*there is overlap */
4708 			new_inp = MemNew(sizeof(IvalNode));
4709 			new_inp->gr.strand = inp->gr.strand;
4710 			/* new_inp->gr.right = inp->gr.right + offset + ins_size;
4711 			new_inp->gr.left = ins_pos + offset + ins_size;	 */
4712 			new_inp->gr.right = inp->gr.right;
4713 			new_inp->gr.left = ins_pos +1;
4714 
4715 			inp->gr.left += offset;
4716 			inp->gr.right = ins_pos + offset;
4717 			interval->next = NULL;
4718 
4719 			ValNodeAddPointer(&second_list, 0, new_inp);
4720 			ValNodeLink(&second_list, next);
4721 			return second_list;
4722 		}
4723 		interval = next;
4724 	}
4725 
4726 	return NULL;
4727 }
4728 
add_insertion_to_featnode(ValNodePtr PNTR pfnp_node,Int4 offset,Int4 ins_pos,Int4 ins_size)4729 static ValNodePtr add_insertion_to_featnode(ValNodePtr PNTR pfnp_node, Int4 offset,
4730 											Int4 ins_pos, Int4 ins_size)
4731 {
4732 	ValNodePtr fnp_node, next, prev;
4733 	ValNodePtr second_list;
4734 	FeatNodePtr fnp, new_fnp;
4735 	ValNodePtr interval;
4736 	IvalNodePtr inp;
4737 
4738 	fnp_node = *pfnp_node;
4739 	second_list = NULL;
4740 	prev = NULL;
4741 	while(fnp_node)
4742 	{
4743 		next = fnp_node->next;
4744 		fnp = fnp_node->data.ptrvalue;
4745 		if(fnp->extremes.right <= ins_pos)
4746 		{
4747 			for (interval = fnp->interval; interval != NULL; interval = interval->next)
4748 			{
4749 				inp = interval->data.ptrvalue;
4750 				inp->gr.left += offset;
4751 				inp->gr.right += offset;
4752 			}
4753 			fnp->extremes.left += offset;
4754 			fnp->extremes.right += offset;
4755 			prev = fnp_node;
4756 		}
4757 		else if(fnp->extremes.left > ins_pos)
4758 		{
4759 			/* fnp->extremes.left += offset + ins_size;
4760 			fnp->extremes.right += offset + ins_size;
4761 
4762 			for (interval = fnp->interval; interval != NULL; interval = interval->next)
4763 			{
4764 				inp = interval->data.ptrvalue;
4765 				inp->gr.left += offset + ins_size;
4766 				inp->gr.right += offset + ins_size;
4767 			} */
4768 
4769 			fnp_node->next = NULL;
4770 			ValNodeLink(&second_list, fnp_node);
4771 			if(prev == NULL)
4772 				*pfnp_node = next;
4773 			else
4774 				prev->next = next;
4775 		}
4776 		else	/*resides between the insertion points, needs to split the featnode*/
4777 		{
4778 			new_fnp = MemNew(sizeof(FeatNode));
4779 			MemCopy((Pointer)new_fnp, fnp, sizeof(FeatNode));
4780 			if(fnp->label != NULL)
4781 				new_fnp->label = StringSave	(fnp->label);
4782 			if(fnp->pos_label != NULL)
4783 				new_fnp->pos_label = StringSave	(fnp->pos_label);
4784 			if(fnp->annotDB[0] != '\0')
4785 				StringCpy(new_fnp->annotDB, fnp->annotDB);
4786 			/* new_fnp->extremes.right = fnp->extremes.right + offset + ins_size;
4787 			new_fnp->extremes.left = ins_pos + ins_size + offset; */
4788 			new_fnp->extremes.right = fnp->extremes.right;
4789 			new_fnp->extremes.left = ins_pos + 1;
4790 			new_fnp->extremes.strand = fnp->extremes.strand;
4791 
4792 			ValNodeAddPointer(&second_list, fnp_node->choice, new_fnp);
4793 
4794 			fnp->extremes.left += offset;
4795 			fnp->extremes.right = ins_pos + offset;
4796 
4797 			new_fnp->interval = split_feature_interval(&(fnp->interval), offset,
4798 										 ins_pos, ins_size);
4799 
4800 			prev = fnp_node;
4801 		}
4802 		fnp_node = next;
4803 	}
4804 
4805 	return second_list;
4806 }
4807 
4808 
find_insertion_size(InsertListPtr ilp,Int4Ptr ins_pos)4809 static Int4 find_insertion_size (InsertListPtr ilp, Int4Ptr ins_pos)
4810 {
4811 	while(ilp)
4812 	{
4813 		if(ilp->master_pos == *ins_pos)
4814 		{
4815 			if(ilp->after == FALSE)
4816 				-- (*ins_pos);
4817 			return ilp->max_size;
4818 		}
4819 		ilp = ilp->next;
4820 	}
4821 
4822 	return 0;
4823 }
get_max_insert_size(InsertListPtr ilp,Int4 from,Int4 to,Int4Ptr insert_pos)4824 static Int4 get_max_insert_size (InsertListPtr ilp, Int4 from, Int4 to, Int4Ptr insert_pos)
4825 {
4826 	Int4 t_from, t_to;
4827 
4828 	while(ilp)
4829 	{
4830 		if(ilp->used == FALSE)	/*it is not used by insertion and mapping*/
4831 		{
4832 			t_from = from;
4833 			t_to = to;
4834 			if(ilp->after == FALSE)
4835 			{
4836 				t_from +=1;
4837 				t_to += 1;
4838 			}
4839 			if(ilp->master_pos >= from && ilp->master_pos <= to)
4840 			{
4841 				*insert_pos = ilp->master_pos;
4842 				if(ilp->after == FALSE)
4843 					--(*insert_pos);
4844 				ilp->used = TRUE;
4845 				return ilp->max_size;
4846 			}
4847 		}
4848 		else if(ilp->master_pos > to)
4849 			return -1;
4850 		ilp = ilp->next;
4851 	}
4852 
4853 	return -1;
4854 }
4855 
get_max_gap_size(InsertListPtr ilp,Int4 from,Int4 to)4856 static Int4 get_max_gap_size(InsertListPtr ilp, Int4 from, Int4 to)
4857 {
4858 	Int4 max_size = 0;
4859 
4860 	while(ilp)
4861 	{
4862 		if(ilp->used == FALSE)
4863 		{
4864 			if(ilp->master_pos >= from && ilp->master_pos <= to)
4865 			{
4866 				ilp->used = TRUE;
4867 				max_size += ilp->max_size;
4868 			}
4869 		}
4870 		if(ilp->master_pos > to)
4871 			return max_size;
4872 
4873 		ilp = ilp->next;
4874 	}
4875 	return max_size;
4876 }
4877 
4878 
add_offset_to_mismatch(ValNodePtr PNTR mismatch,Int4 offset,Int4 ins_pos,Int4 ins_size)4879 static ValNodePtr add_offset_to_mismatch(ValNodePtr PNTR mismatch, Int4 offset, Int4 ins_pos, Int4 ins_size)
4880 {
4881 	ValNodePtr second_list, prev, curr;
4882 
4883 	second_list = NULL;
4884 	curr = *mismatch;
4885 	prev = NULL;
4886 	while(curr)
4887 	{
4888 		if(curr->data.intvalue <= ins_pos || ins_pos == -1)
4889 			curr->data.intvalue += offset;
4890 		else
4891 		{
4892 			ValNodeLink(&second_list, curr);
4893 			if(prev == NULL)
4894 				*mismatch = NULL;
4895 			else
4896 				prev->next = NULL;
4897 			return second_list;
4898 
4899 		}
4900 		prev = curr;
4901 		curr = curr->next;
4902 	}
4903 	return second_list;
4904 }
4905 
reset_insertion_list(InsertListPtr ilp,AlignSegPtr asp)4906 static void reset_insertion_list(InsertListPtr ilp, AlignSegPtr asp)
4907 {
4908 	AlignSegPtr curr;
4909 
4910 	while(ilp)
4911 	{
4912 		ilp->used = FALSE;
4913 		for(curr = asp; curr != NULL; curr = curr->next)
4914 		{
4915 			if(curr->ins_pos == ilp->master_pos)
4916 			{
4917 				ilp->used = TRUE;
4918 				break;
4919 			}
4920 		}
4921 
4922 		ilp = ilp->next;
4923 	}
4924 }
4925 
refresh_insertion_list(InsertListPtr ilp)4926 static void refresh_insertion_list(InsertListPtr ilp)
4927 {
4928 	while(ilp)
4929 	{
4930 		ilp->used = FALSE;
4931 		ilp = ilp->next;
4932 	}
4933 }
4934 
get_offset_of_insertion(InsertListPtr ilp,Int4 left)4935 static Int4 get_offset_of_insertion(InsertListPtr ilp, Int4 left)
4936 {
4937 	Int4 offset = 0;
4938 
4939 	while(ilp)
4940 	{
4941 		if(ilp->master_pos >= left)
4942 			return offset;
4943 		else
4944 			offset += ilp->max_size;
4945 		ilp = ilp->next;
4946 	}
4947 
4948 	return offset;
4949 }
4950 
modify_anp_with_insertion(AlignNodePtr anp,InsertListPtr ilp)4951 static void modify_anp_with_insertion(AlignNodePtr anp, InsertListPtr ilp)
4952 {
4953 	Int4 offset;
4954 	Int4 leftover;
4955 	AlignSegPtr asp, next, new_asp, t_asp, prev;
4956 	Int4 max_insert_size;
4957 	Int4 insert_pos;
4958 	AlignBlockPtr abp, new_abp, next_abp;
4959 	ValNodePtr second_ms_list;
4960 
4961 
4962 	asp = anp->segs;
4963 	prev = NULL;
4964 	reset_insertion_list(ilp, asp);
4965 	offset = get_offset_of_insertion(ilp, anp->extremes.left);
4966 	anp->extremes.left += offset;
4967 	while(asp)
4968 	{
4969 		next = asp->next;
4970 		if(asp->type == INS_SEG)
4971 		{
4972 			max_insert_size = find_insertion_size (ilp, &(asp->ins_pos));
4973 			if(max_insert_size >0)
4974 			{
4975 				leftover = max_insert_size - asp->gr.right;	/*gr.right is the size of the insertion*/
4976 				/*convert the insertion into a REG_SEG */
4977 				asp->gr.left = asp->ins_pos + offset + 1;	/*insert after */
4978 				asp->gr.right += (asp->gr.left -1);
4979 				asp->type = REG_SEG;
4980 				add_offset_to_featnode(asp->cnp, offset +1);
4981 
4982 				/*insert the additional one for gaps*/
4983 				if(leftover > 0)
4984 				{
4985 					new_asp = MemNew(sizeof(AlignSeg));
4986 					new_asp->type = GAP_SEG;
4987 					new_asp->gr.left = asp->gr.right + 1;
4988 					new_asp->gr.right = new_asp->gr.left + leftover -1;
4989 					asp->next = new_asp;
4990 					new_asp->next = next;
4991 				}
4992 				offset += max_insert_size;
4993 			}
4994 			prev = asp;
4995 		}
4996 		else if(asp->type == GAP_SEG)
4997 		{ /*a gap */
4998 			max_insert_size = get_max_gap_size(ilp, asp->gr.left, asp->gr.right);
4999 			asp->gr.left += offset;
5000 			asp->gr.right += max_insert_size + offset;
5001 			offset += max_insert_size;
5002 			prev = asp;
5003 		}
5004 		else if(asp->type == REG_SEG)
5005 		{	/* a diagnol */
5006 			while( asp != NULL && (max_insert_size =
5007 				get_max_insert_size (ilp, asp->gr.left,
5008 				asp->gr.right, &insert_pos)) >0)
5009 			{
5010 				/*insertion at the very begining */
5011 				if(insert_pos == -1)
5012 				{
5013 					new_asp = MemNew(sizeof(AlignSeg));
5014 					new_asp->type = GAP_SEG;
5015 					new_asp->gr.left = asp->gr.left;
5016 					new_asp->gr.right = asp->gr.left + max_insert_size -1;
5017 					if(prev == NULL)
5018 						anp->segs = new_asp;
5019 					else
5020 						prev->next = new_asp;
5021 					prev = new_asp;
5022 					new_asp->next = asp;
5023 				}
5024 				else
5025 				{
5026 				if(asp->mismatch != NULL)
5027 					second_ms_list = add_offset_to_mismatch(&(asp->mismatch), offset,
5028 						insert_pos, max_insert_size);
5029 				else
5030 					second_ms_list = NULL;
5031 
5032 				leftover = asp->gr.right - insert_pos;
5033 				asp->gr.left += offset;
5034 				asp->gr.right = insert_pos + offset;
5035 
5036 				new_asp = MemNew(sizeof(AlignSeg));
5037 				new_asp->type = GAP_SEG;
5038 				new_asp->gr.left = asp->gr.right + 1;
5039 				new_asp->gr.right = insert_pos + offset + max_insert_size;
5040 				new_asp->next = next;
5041 				t_asp = asp;
5042 				asp->next = new_asp;
5043 				asp = new_asp;
5044 
5045 				if(leftover > 0)
5046 				{
5047 					new_asp = MemNew(sizeof(AlignSeg));
5048 					new_asp->type = REG_SEG;
5049 					new_asp->gr.left = insert_pos +1;
5050 					new_asp->gr.right = insert_pos + leftover;
5051 					asp->next = new_asp;
5052 					new_asp->next = next;
5053 					new_asp->cnp = add_insertion_to_featnode(&(t_asp->cnp), offset,
5054 										insert_pos, max_insert_size);
5055 					new_asp->mismatch = second_ms_list;
5056 					new_asp->next = next;
5057 					asp->next = new_asp;
5058 					asp = new_asp;
5059 					prev = new_asp;
5060 				}
5061 				else
5062 				{
5063 					add_offset_to_featnode(asp->cnp, offset);
5064 					prev = asp;
5065 					asp = NULL;
5066 					break;
5067 				}
5068 				}
5069 
5070 				offset += max_insert_size;
5071 			}	/*end of while*/
5072 			if(asp != NULL)
5073 			{
5074 
5075 				asp->gr.left += offset;
5076 				asp->gr.right += offset;
5077 				if(asp->cnp)
5078 					add_offset_to_featnode(asp->cnp, offset);
5079 				if(asp->mismatch != NULL)
5080 					add_offset_to_mismatch(&(asp->mismatch), offset, -1, -1);
5081 				prev = asp;
5082 			}
5083 		}
5084 		asp = next;
5085 	}
5086 	anp->extremes.right += offset;
5087 
5088 	if(offset > 0 && anp->blocks != NULL)
5089 	{
5090 		refresh_insertion_list(ilp);
5091 		abp = anp->blocks;
5092 		while(abp)
5093 		{
5094 			next_abp = abp->next;
5095 			abp->next = NULL;
5096 			offset = get_offset_of_insertion(ilp, abp->gr.left);
5097 			while( abp && (max_insert_size = get_max_insert_size (ilp,
5098 					abp->gr.left, abp->gr.right, &insert_pos)) > 0)
5099 			{
5100 				/*insertion at the very begining */
5101 				if(insert_pos == -1)
5102 					offset += max_insert_size;
5103 				else
5104 				{
5105 					leftover = abp->gr.right - insert_pos;
5106 					if(leftover > 0)
5107 					{
5108 						new_abp = MemNew(sizeof(AlignBlock));
5109 						new_abp->gr.left = insert_pos + 1;
5110 						new_abp->gr.right = abp->gr.right;
5111 						new_abp->order = abp->order;
5112 						new_abp->next = next_abp;
5113 
5114 						abp->gr.left += offset;
5115 						abp->gr.right = insert_pos + offset;
5116 						abp->next = new_abp;
5117 						if(abp->gr.strand != Seq_strand_minus)
5118 						{
5119 							new_abp->gr.strand = abp->gr.strand;
5120 							abp->gr.strand = 0;
5121 						}
5122 						abp = new_abp;
5123 						offset += max_insert_size;
5124 					}
5125 					else	/*reach the end */
5126 						break;
5127 				}
5128 			}
5129 			if(abp != NULL)
5130 			{
5131 				abp->gr.left += offset;
5132 				abp->gr.right += offset;
5133 				abp->next = next_abp;
5134 			}
5135 			abp = next_abp;
5136 		}
5137 	}
5138 }
5139 
free_insert_list(InsertListPtr ilp)5140 static void free_insert_list(InsertListPtr ilp)
5141 {
5142 	InsertListPtr next;
5143 
5144 	while(ilp)
5145 	{
5146 		next = ilp->next;
5147 		MemFree(ilp);
5148 		ilp = next;
5149 	}
5150 }
5151 
5152 
FlatAlignNode(ValNodePtr anp_list)5153 NLM_EXTERN Boolean FlatAlignNode(ValNodePtr anp_list)
5154 {
5155 	ValNodePtr curr;
5156 	AlignNodePtr master_anp, anp;
5157 	AnnotInfoPtr annot_info;
5158 	Uint1 align_type;
5159 	InsertListPtr ilp;
5160 	AlignSegPtr asp;
5161 	Int4 p_pos;
5162 
5163 
5164 	master_anp = NULL;
5165 	for(curr = anp_list; curr != NULL; curr = curr->next)
5166 	{
5167 		if(curr->choice == OBJ_SEQANNOT)
5168 		{
5169 			annot_info = curr->data.ptrvalue;
5170 			align_type = get_alignment_type (annot_info);
5171 			if(align_type == ALIGN_DNA_TO_PROT ||
5172 				align_type == ALIGN_PROT_TO_DNA || align_type == ALIGN_TDNA_TO_TDNA)
5173 				return FALSE;
5174 		}
5175 		else
5176 		{
5177 			anp = curr->data.ptrvalue;
5178 			if(anp->is_master)
5179 				master_anp = anp;
5180 		}
5181 	}
5182 	if(master_anp == NULL)
5183 		return FALSE;
5184 
5185 	/*load all the insertions in the alignments*/
5186 	ilp = NULL;
5187 	for(curr = anp_list; curr != NULL; curr = curr->next)
5188 	{
5189 		if(curr->choice != OBJ_SEQANNOT)
5190 		{
5191 			anp = curr->data.ptrvalue;
5192 			if(anp != master_anp)
5193 			{
5194 				p_pos = -1;
5195 				for(asp = anp->segs; asp != NULL; asp = asp->next)
5196 				{
5197 					if(asp->type == INS_SEG)
5198 						load_insertion_list(&ilp, asp->ins_pos, asp->gr.right, (Boolean)(p_pos == asp->ins_pos));
5199 					else if(asp->type != GAP_SEG)
5200 						p_pos = asp->gr.right;
5201 				}
5202 			}
5203 		}
5204 	}
5205 
5206 	if(ilp == NULL)
5207 		return FALSE;
5208 
5209 	/*do the real flatting*/
5210 	for(curr = anp_list; curr != NULL; curr = curr->next)
5211 	{
5212 		if(curr->choice != OBJ_SEQANNOT)
5213 		{
5214 			anp = curr->data.ptrvalue;
5215 			modify_anp_with_insertion(anp, ilp);
5216 		}
5217 	}
5218 
5219 
5220 	free_insert_list(ilp);
5221 	return TRUE;
5222 }
5223 
5224 
5225 /*
5226 *	Delete all the bad YACs from the list
5227 *	anything on the NHGRI map that is recorded inconsistent will
5228 *	be considered inconsistent. For the Whitehead map, the
5229 *	inconsistent+ambiguous is inconsistent. Inconsistent alone
5230 *	is not considered inconsistent
5231 */
is_ambiguous_annot(AnnotInfoPtr info,Uint1 db)5232 static Boolean is_ambiguous_annot(AnnotInfoPtr info, Uint1 db)
5233 {
5234 	if(info == NULL)
5235 		return FALSE;
5236 	if(info->annot_type == ANNOT_CONSIST)
5237 	{
5238 		if(info->consistent == ALIGN_CONSISTENT)
5239 			return FALSE;
5240 		else
5241 		{
5242 			if(info->consistent == ALIGN_INCONSISTENT)
5243 			{
5244 				if(db == YAC_NHGRI)
5245 					return TRUE;
5246 				else if(db == YAC_MIT)
5247 				{	/*inconsistent and ambiguous are different*/
5248 					if(StringCmp(info->annotDB, "Ambiguous") == 0)
5249 
5250 						return TRUE;
5251 					else
5252 						return FALSE;
5253 				}
5254 			}
5255 		}
5256 	}
5257 
5258 	return FALSE;
5259 }
5260 
5261 /*
5262 *	delete any of the whitehead yacs that only contains
5263 *	ambiguous STS hits
5264 */
delete_alignnode(AlignNodePtr anp,Uint1 db)5265 static Boolean delete_alignnode (AlignNodePtr anp, Uint1 db)
5266 {
5267 	AlignSegPtr asp;
5268 	ValNodePtr curr;
5269 	Boolean has_sts_hits;
5270 
5271 	has_sts_hits = FALSE;
5272 	for(asp = anp->segs; asp != NULL; asp = asp->next)
5273 	{
5274 		if(asp->mismatch != NULL)
5275 		{
5276 			for(curr = asp->mismatch; curr != NULL; curr = curr->next)
5277 			{
5278 				if(curr->choice == MISMATCH_CLOSE)	/*unambiguous hits*/
5279 					return FALSE;
5280 				if(curr->choice == MISMATCH_SQUARE)
5281 				{
5282 					if(db == YAC_NHGRI)
5283 						return FALSE;
5284 				}
5285 			}
5286 			has_sts_hits = TRUE;
5287 		}
5288 	}
5289 
5290 	return (has_sts_hits == FALSE);
5291 }
5292 
5293 
5294 /*
5295 *	Delete all the bad YACs from the list
5296 *	anything on the NHGRI map that is recorded inconsistent will
5297 *	be considered inconsistent. For the Whitehead map, the
5298 *	inconsistent+ambiguous is inconsistent. Inconsistent alone
5299 *	is not considered inconsistent
5300 */
CleanUpAmbiguousYAC(ValNodePtr PNTR anp_node,Uint1 db,SeqIdPtr chr_id)5301 NLM_EXTERN void CleanUpAmbiguousYAC (ValNodePtr PNTR anp_node, Uint1 db, SeqIdPtr chr_id)
5302 {
5303 	AnnotInfoPtr info;
5304 	AlignNodePtr anp;
5305 	ValNodePtr curr, prev, next;
5306 	Boolean del_annot;
5307 	Boolean del;
5308 
5309 	prev = NULL;
5310 	del_annot = FALSE;
5311 
5312 	curr = *anp_node;
5313 	while(curr)
5314 	{
5315 		next = curr->next;
5316 		del = FALSE;
5317 		if(curr->choice == OBJ_SEQANNOT)
5318 		{
5319 			info = curr->data.ptrvalue;
5320 			del_annot = is_ambiguous_annot(info, db);
5321 		}
5322 		else if(!del_annot || chr_id != NULL)
5323 		{
5324 			anp = curr->data.ptrvalue;
5325 			if(chr_id != NULL)
5326 			{
5327 				if(anp->sip != NULL)
5328 				{
5329 					if(SeqIdMatch(chr_id, anp->sip))
5330 						del = TRUE;
5331 				}
5332 			}
5333 			if(!del)
5334 				del = delete_alignnode (anp, db);
5335 		}
5336 		if(del_annot)
5337 			del = TRUE;
5338 
5339 		if(del)
5340 		{
5341 			if(prev == NULL)
5342 				*anp_node = next;
5343 			else
5344 				prev->next = next;
5345 			curr->next = NULL;
5346 			FreeAlignNode(curr);
5347 		}
5348 		else
5349 			prev = curr;
5350 		curr = next;
5351 	}
5352 }
5353 
5354 
5355 
5356 /*****************************************************************
5357 *
5358 *	check if the AlignNode only contains Seq-annot or it
5359 *	has real sequence alignment.
5360 *	the empty Seq-annot may be the unaligned contigs in
5361 *	Eric Green's map
5362 *
5363 ******************************************************************/
alignode_has_alignments(ValNodePtr aligns)5364 NLM_EXTERN Boolean alignode_has_alignments(ValNodePtr aligns)
5365 {
5366 	while(aligns)
5367 	{
5368 		if(aligns->choice != OBJ_SEQANNOT)
5369 			return TRUE;
5370 		aligns = aligns->next;
5371 	}
5372 
5373 	return FALSE;
5374 }
5375 
5376 
5377 
5378 
5379 
5380 
5381 
5382 
5383