1 /* $Id: jzcoll.c,v 6.18 2006/07/13 17:06:38 bollin Exp $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * File Name: $RCSfile: jzcoll.c,v $
27 *
28 * Author: Jinghui Zhang
29 *
30 * Initial Version Creation Date: 03/24/97
31 *
32 * $Revision: 6.18 $
33 *
34 * File Description:
35 * File for various alignments
36 *
37 * $Log: jzcoll.c,v $
38 * Revision 6.18 2006/07/13 17:06:38 bollin
39 * use Uint4 instead of Uint2 for itemID values
40 * removed unused variables
41 * resolved compiler warnings
42 *
43 * Revision 6.17 2001/06/26 16:42:58 vakatov
44 * POINT --> BAND_POINT (to avoid conflicts with MS-Win standard headers)
45 *
46 * Revision 6.16 2000/11/16 22:10:37 shavirin
47 * Moved many functions from txalign.c - due to move of txalign.c to
48 * distrib/tools directory and libncbitool.a library.
49 *
50 *
51 * ==========================================================================
52 */
53
54 #include <jzcoll.h>
55 #include <txalign.h>
56 #include <codon.h>
57
58 static Char pchars[] = "ARNDCQEGHILKMFPSTWYVBZX"; /* amino acid names */
59 static Int4 webb_blosum62[WEBB_asize][WEBB_asize] = {
60 { 4,-1,-2,-2, 0,-1,-1, 0,-2,-1,-1,-1,-1,-2,-1, 1, 0,-3,-2, 0,-2,-1, 0 },
61 {-1, 5, 0,-2,-3, 1, 0,-2, 0,-3,-2, 2,-1,-3,-2,-1,-1,-3,-2,-3,-1, 0,-1 },
62 {-2, 0, 6, 1,-3, 0, 0, 0, 1,-3,-3, 0,-2,-3,-2, 1, 0,-4,-2,-3, 3, 0,-1 },
63 {-2,-2, 1, 6,-3, 0, 2,-1,-1,-3,-4,-1,-3,-3,-1, 0,-1,-4,-3,-3, 4, 1,-1 },
64 { 0,-3,-3,-3, 9,-3,-4,-3,-3,-1,-1,-3,-1,-2,-3,-1,-1,-2,-2,-1,-3,-3,-2 },
65 {-1, 1, 0, 0,-3, 5, 2,-2, 0,-3,-2, 1, 0,-3,-1, 0,-1,-2,-1,-2, 0, 3,-1 },
66 {-1, 0, 0, 2,-4, 2, 5,-2, 0,-3,-3, 1,-2,-3,-1, 0,-1,-3,-2,-2, 1, 4,-1 },
67 { 0,-2, 0,-1,-3,-2,-2, 6,-2,-4,-4,-2,-3,-3,-2, 0,-2,-2,-3,-3,-1,-2,-1 },
68 {-2, 0, 1,-1,-3, 0, 0,-2, 8,-3,-3,-1,-2,-1,-2,-1,-2,-2, 2,-3, 0, 0,-1 },
69 {-1,-3,-3,-3,-1,-3,-3,-4,-3, 4, 2,-3, 1, 0,-3,-2,-1,-3,-1, 3,-3,-3,-1 },
70 {-1,-2,-3,-4,-1,-2,-3,-4,-3, 2, 4,-2, 2, 0,-3,-2,-1,-2,-1, 1,-4,-3,-1 },
71 {-1, 2, 0,-1,-3, 1, 1,-2,-1,-3,-2, 5,-1,-3,-1, 0,-1,-3,-2,-2, 0, 1,-1 },
72 {-1,-1,-2,-3,-1, 0,-2,-3,-2, 1, 2,-1, 5, 0,-2,-1,-1,-1,-1, 1,-3,-1,-1 },
73 {-2,-3,-3,-3,-2,-3,-3,-3,-1, 0, 0,-3, 0, 6,-4,-2,-2, 1, 3,-1,-3,-3,-1 },
74 {-1,-2,-2,-1,-3,-1,-1,-2,-2,-3,-3,-1,-2,-4, 7,-1,-1,-4,-3,-2,-2,-1,-2 },
75 { 1,-1, 1, 0,-1, 0, 0, 0,-1,-2,-2, 0,-1,-2,-1, 4, 1,-3,-2,-2, 0, 0, 0 },
76 { 0,-1, 0,-1,-1,-1,-1,-2,-2,-1,-1,-1,-1,-2,-1, 1, 5,-2,-2, 0,-1,-1, 0 },
77 {-3,-3,-4,-4,-2,-2,-3,-2,-2,-3,-2,-3,-1, 1,-4,-3,-2,11, 2,-3,-4,-3,-2 },
78 {-2,-2,-2,-3,-2,-1,-2,-3, 2,-1,-1,-2,-1, 3,-3,-2,-2, 2, 7,-1,-3,-2,-1 },
79 { 0,-3,-3,-3,-1,-2,-2,-3,-3, 3, 1,-2, 1,-1,-2,-2, 0,-3,-1, 4,-3,-2,-1 },
80 {-2,-1, 3, 4,-3, 0, 1,-1, 0,-3,-4, 0,-3,-3,-2, 0,-1,-4,-3,-3, 4, 1,-1 },
81 {-1, 0, 0, 1,-3, 3, 4,-2, 0,-3,-3, 1,-1,-3,-1, 0,-1,-3,-2,-2, 1, 4,-1 },
82 { 0,-1,-1,-1,-2,-1,-1,-1,-1,-1,-1,-1,-1,-1,-2, 0, 0,-2,-1,-1,-1,-1,-1 },
83 };
84
load_default_matrix(void)85 NLM_EXTERN Int4Ptr PNTR load_default_matrix (void)
86 {
87 Int4Ptr PNTR ss;
88 Int2 i, j;
89
90 ss = (Int4Ptr PNTR) MemNew((size_t)TX_MATRIX_SIZE * sizeof (Int4Ptr));
91 for(i = 0; i<TX_MATRIX_SIZE; ++i)
92 ss[i] = (Int4Ptr) MemNew((size_t)TX_MATRIX_SIZE * sizeof (Int4));
93
94 for(i = 0; i < TX_MATRIX_SIZE; i++)
95 for(j = 0; j < TX_MATRIX_SIZE;j++)
96 ss[i][j] = -1000;
97 for(i = 0; i < WEBB_asize; ++i)
98 for(j = 0; j < WEBB_asize; ++j)
99 ss[pchars[i]][pchars[j]] = webb_blosum62[i][j];
100 for(i = 0; i < WEBB_asize; ++i)
101 ss[pchars[i]]['*'] = ss['*'][pchars[i]] = -4;
102 ss['*']['*'] = 1;
103 return ss;
104 }
105
free_default_matrix(Int4Ptr PNTR matrix)106 NLM_EXTERN void free_default_matrix (Int4Ptr PNTR matrix)
107 {
108 Int2 i;
109
110 for(i = 0; i<TX_MATRIX_SIZE; ++i)
111 MemFree(matrix[i]);
112 MemFree(matrix);
113 }
114
115 NLM_EXTERN SeqIdPtr LIBCALL
ScorePtrUseThisGi(ScorePtr sp)116 ScorePtrUseThisGi (ScorePtr sp)
117
118 {
119 ObjectIdPtr obid;
120 ScorePtr scrp;
121 SeqIdPtr gilist=NULL;
122
123 for (scrp=sp; scrp; scrp = scrp->next) {
124 obid = scrp->id;
125 if(obid && obid->str) {
126 if (StringICmp(obid->str, "use_this_gi") == 0) {
127 ValNodeAddInt(&gilist, SEQID_GI, scrp->value.intvalue);
128 }
129 }
130 }
131
132 return gilist;
133 }
134
135 /*
136 GetUseThisGi(SeqAlignPtr) looks for the "use_this_gi" flag in the ScorePtr.
137 */
138
139 NLM_EXTERN SeqIdPtr LIBCALL
GetUseThisGi(SeqAlignPtr seqalign)140 GetUseThisGi(SeqAlignPtr seqalign)
141
142 {
143 Boolean retval=FALSE;
144 DenseDiagPtr ddp;
145 DenseSegPtr dsp;
146 ScorePtr sp;
147 SeqIdPtr gilist=NULL;
148 StdSegPtr ssp;
149
150 sp = seqalign->score;
151 if (sp == NULL)
152 {
153 switch (seqalign->segtype)
154 {
155 case 1: /*Dense-diag*/
156 ddp = (DenseDiagPtr) seqalign->segs;
157 while (ddp)
158 {
159 sp = ddp->scores;
160 if (sp)
161 break;
162 ddp = ddp->next;
163 }
164 break;
165 case 2:
166 dsp = ( DenseSegPtr) seqalign->segs;
167 if (dsp)
168 {
169 sp = dsp->scores;
170 }
171 break;
172 case 3:
173 ssp = (StdSegPtr) seqalign->segs;
174 while (ssp)
175 {
176 sp = ssp->scores;
177 if (sp)
178 break;
179 ssp = ssp->next;
180 }
181 break;
182 default:
183 break;
184 }
185 }
186
187
188 gilist = ScorePtrUseThisGi(sp);
189 return gilist;
190 }
191
192 /*************************************************************************
193 *
194 * functions and structure related to create a text buffer for the
195 * alignment
196 *
197 *************************************************************************/
198
FreeTextAlignList(ValNodePtr tdp_list)199 NLM_EXTERN ValNodePtr FreeTextAlignList(ValNodePtr tdp_list)
200 {
201 TextAlignBufPtr tdp;
202 ValNodePtr next;
203 Int2 i;
204
205 while(tdp_list)
206 {
207 next = tdp_list->next;
208 tdp_list->next = NULL;
209 tdp = (TextAlignBufPtr) tdp_list->data.ptrvalue;
210 if(tdp->label)
211 MemFree(tdp->label);
212 if(tdp->buf)
213 MemFree(tdp->buf);
214 if(tdp->matrix_val)
215 MemFree(tdp->matrix_val);
216 if(tdp->exonCount > 0)
217 {
218 for(i =0; i<3; ++i)
219 MemFree(tdp->codon[i]);
220 }
221 MemFree(tdp);
222 MemFree(tdp_list);
223 tdp_list = next;
224 }
225
226 return NULL;
227 }
228
229
230 /*######################################################################
231 #
232 # functions related to ProcessTextAlignNode
233 #
234 #######################################################################*/
235
236
237 /******************************************************************************
238 *
239 * load_text(bsp, pos1, pos2, l_seq, l_pos, mbuf, maxlen)
240 * load the sequence into text
241 * bsp: the Bioseq
242 * pos1: the first position on the sequence.
243 * pos2: the second position on the sequence.
244 * if(pos1 and pos2 are negative val, indicate the region in on the* minus strand
245 * l_seq: the buffer for loading the sequence
246 * l_pos: the current position in l_seq. Will be updated after the sequence
247 * is loaded
248 * mbuf: buffer from the master sequence. For checking mismatches and positive scores
249 * maxlen: the maximum length per line. Used to determine the special
250 * format used for long insertions
251 * spacing is the space between the two adjacent residues
252 * mismatch: if TRUE, show the identical residue with
253 *
254 *****************************************************************************/
255
load_text(BioseqPtr bsp,Int4 pos1,Int4 pos2,CharPtr l_seq,Int4Ptr l_pos,CharPtr mbuf,Int2 maxlen,Int2 spacing,Boolean translate,Boolean mismatch,Int2Ptr matrix_val,Int4Ptr PNTR matrix,Uint1 strand,Int4Ptr PNTR posMatrix,Int4 q_start)256 static Boolean load_text(BioseqPtr bsp, Int4 pos1, Int4 pos2, CharPtr l_seq, Int4Ptr l_pos, CharPtr mbuf, Int2 maxlen, Int2 spacing, Boolean translate, Boolean mismatch, Int2Ptr matrix_val, Int4Ptr PNTR matrix, Uint1 strand, Int4Ptr PNTR posMatrix, Int4 q_start)
257 {
258 SeqPortPtr spp = NULL;
259 ByteStorePtr b_store = NULL;
260 Uint1 code;
261 Int4 start, stop;
262 Uint1 m_res, t_res, stdaa_res;
263 Int2 i;
264 Int2 val;
265 Int4 length, s_len;
266 Int2 c_pos;
267 Char temp[100];
268 Boolean protein;
269 Boolean overflow;
270 Boolean reverse;
271 Boolean is_real;
272 SeqFeatPtr fake_cds;
273 Boolean check_neg; /*if aa is negative, load it as lower case char*/
274 SeqMapTablePtr smtp;
275
276 if(*l_pos >= maxlen )
277 return FALSE;
278
279 /* posMatrix uses NCBIstdaa encoding */
280
281 if(posMatrix != NULL) {
282 if((smtp = SeqMapTableFindObj(Seq_code_ncbistdaa,
283 Seq_code_ncbieaa)) == NULL)
284 return FALSE;
285 }
286
287 protein = (bsp->mol == Seq_mol_aa);
288 reverse = FALSE;
289 if(protein)
290 code = Seq_code_ncbieaa;
291 else
292 code = Seq_code_iupacna;
293 check_neg = (matrix_val == NULL && matrix != NULL);
294 if(strand == Seq_strand_minus) { /*on the minus strand*/
295
296 start = -pos2;
297 stop = -pos1;
298
299 if(protein) {
300 strand = Seq_strand_plus;
301 reverse = TRUE;
302 }
303
304 } else {
305 start = pos1;
306 stop = pos2;
307 }
308 if(translate) {
309 fake_cds = make_fake_cds(bsp, start, stop, strand);
310 b_store = ProteinFromCdRegionEx(fake_cds, TRUE, FALSE);
311 SeqFeatFree(fake_cds);
312 if(b_store == NULL)
313 return FALSE;
314 length = (stop - start +1)/3;
315 BSSeek(b_store, 0, SEEK_SET);
316 } else {
317 spp = SeqPortNew(bsp, start, stop, strand, code);
318 length = stop - start +1;
319 }
320 c_pos = (Int2)(*l_pos);
321 overflow = (c_pos >= maxlen);
322 if(maxlen>0 && (length > maxlen)) { /*large insertions*/
323
324 for(i =0; i<5 && !overflow; ++i) {
325 if(translate)
326 l_seq[c_pos++] = (Uint1)BSGetByte(b_store);
327 else {
328 if(reverse)
329 SeqPortSeek(spp, length-1 -i, SEEK_SET);
330 l_seq[c_pos++] = SeqPortGetResidue(spp);
331 }
332 overflow = (c_pos >= maxlen);
333 }
334 for(i =0; i<3 && !overflow; ++i) {
335 l_seq[c_pos++] = '.';
336 overflow = (c_pos >= maxlen);
337 }
338 if(!overflow) {
339 if(translate)
340 BSSeek(b_store, length-1, SEEK_SET);
341 else if(!reverse)
342 SeqPortSeek(spp, length-5, SEEK_SET);
343 for(i =0; i<5 && !overflow; ++i) {
344 if(translate)
345 l_seq[c_pos++] = (Uint1)BSGetByte(b_store);
346 else {
347 if(reverse)
348 SeqPortSeek(spp, 4-i, SEEK_SET);
349 l_seq[c_pos++] = SeqPortGetResidue(spp);
350 }
351 overflow = (c_pos >= maxlen);
352 }
353 }
354 if(overflow)
355 l_seq[maxlen-1] = '\0';
356 else
357 l_seq[c_pos] = '\0';
358 sprintf(temp, "(length=%ld)", (long) length);
359 s_len = StringLen(temp);
360 StringCat(l_seq, temp);
361 *l_pos = c_pos+s_len;
362 } else {
363 if(translate) {
364 while((val = BSGetByte(b_store)) != EOF) {
365 t_res = (Uint1)val;
366 l_seq[c_pos]= t_res;
367 if(mbuf != NULL) {
368 m_res = mbuf[c_pos];
369 if(matrix_val && matrix)
370 matrix_val[c_pos] = (Int2)matrix[m_res][t_res];
371 if(mismatch && t_res == m_res)
372 l_seq[c_pos] = '.';
373 else if(check_neg && matrix[t_res][m_res] < 0)
374 l_seq[c_pos] = TO_LOWER(t_res);
375 }
376 c_pos += spacing;
377 if(c_pos >= maxlen) {
378 c_pos = maxlen;
379 break;
380 }
381 }
382 } else {
383 if(reverse)
384 SeqPortSeek(spp, length-1, SEEK_SET);
385 s_len = 0;
386 while((t_res = SeqPortGetResidue(spp)) != SEQPORT_EOF) {
387 is_real = IS_ALPHA(t_res);
388 if(is_real || t_res == '*' || t_res == '-') {
389 if(is_real && !protein)
390 t_res = TO_LOWER(t_res);
391 l_seq[c_pos] = t_res;
392 if(mbuf != NULL) {
393 m_res = mbuf[c_pos];
394 if(matrix_val) {
395 if(matrix) {
396 if(posMatrix != NULL) {
397 if(t_res == m_res) /* complete match */
398 matrix_val[c_pos] = INT2_MAX;
399 else {
400 stdaa_res = SeqMapTableConvert(smtp, t_res);
401 matrix_val[c_pos] = (Int2)posMatrix[c_pos + q_start][stdaa_res];
402
403 /*
404 if(posMatrix[c_pos + q_start][t_res] ==
405 matrix[t_res][t_res]) {
406 printf("Got it!");
407 } */
408
409 }
410 } else {
411 matrix_val[c_pos] = (Int2)matrix[m_res][t_res];
412 }
413
414 } else if(t_res == m_res)
415 matrix_val[c_pos] = '|';
416 }
417
418 if(mismatch && t_res == m_res)
419 l_seq[c_pos] = '.';
420 else if(posMatrix != NULL) {
421 stdaa_res = SeqMapTableConvert(smtp, m_res);
422 if(check_neg && posMatrix[c_pos + q_start][stdaa_res] < 0)
423 l_seq[c_pos] = TO_LOWER(t_res);
424 } else { /*regular BLOSSUM62*/
425 if(check_neg && matrix[t_res][m_res] < 0)
426 l_seq[c_pos] = TO_LOWER(t_res);
427 }
428 }
429 c_pos += spacing;
430 if(c_pos >= maxlen) {
431 c_pos = maxlen;
432 break;
433 }
434 ++s_len;
435 }
436 if(reverse) {
437 if(s_len == length)
438 break;
439 else
440 SeqPortSeek(spp, length -1 - s_len, SEEK_SET);
441 }
442 }
443 }
444 *l_pos = c_pos;
445 }
446
447 if(translate)
448 BSFree(b_store);
449 else
450 SeqPortFree(spp);
451 return TRUE;
452 }
453
454 /*##########################################################################
455 #
456 # functions related to add the features to the alignment
457 #
458 ###########################################################################*/
459
460
461 typedef struct protbuf{ /*for loading the translation of a CDs*/
462 CharPtr buf; /*load the protein sequence*/
463 Int4 start; /*start position in CDs*/
464 Int4 stop; /*stop position in CDs*/
465 Int4 pos; /*position for the feature*/
466 Boolean load_codon; /*load the codon data for aa sequence*/
467 ValNodePtr cvp_list; /*list for loading the codon of an aa*/
468 }ProtBuf, PNTR ProtBufPtr;
469
470
471
472 /************************************************************************
473 *
474 * check the protein sequence from CDs feature into the buffer
475 *
476 *************************************************************************/
load_prot_seq(GatherContextPtr gcp)477 static Boolean load_prot_seq(GatherContextPtr gcp)
478 {
479 SeqFeatPtr sfp;
480 ProtBufPtr pbp;
481 SeqLocPtr loc;
482
483 if(gcp->thistype != OBJ_SEQFEAT)
484 return FALSE;
485 sfp = (SeqFeatPtr)(gcp->thisitem);
486 if(sfp->data.choice !=3)
487 return FALSE;
488
489 pbp = (ProtBufPtr)(gcp->userdata);
490 if(pbp->load_codon) /*looking for codon in aa sequence*/
491 {
492 pbp->cvp_list = aa_to_codon(sfp, pbp->start, pbp->stop);
493 return (pbp->cvp_list !=NULL);
494 }
495
496
497 if(pbp->start <0)/*minus strand*/
498 loc = SeqLocIntNew((-pbp->stop), (-pbp->start), Seq_strand_minus, SeqLocId(sfp->location));
499 else
500 loc = SeqLocIntNew(pbp->start, pbp->stop, Seq_strand_plus, SeqLocId(sfp->location));
501
502 pbp->pos = print_protein_for_cds(sfp, pbp->buf, loc, TRUE);
503 SeqLocFree(loc);
504 return (pbp->pos != -1);
505 }
506
507
508
buffer_for_feature(Int4 c_left,Int4 c_right,Int4 seq_start,Int4 seq_stop,ValNodePtr fnp_node,Boolean load_codon,ProtBufPtr pbp)509 static Boolean buffer_for_feature(Int4 c_left, Int4 c_right, Int4 seq_start, Int4 seq_stop, ValNodePtr fnp_node, Boolean load_codon, ProtBufPtr pbp)
510 {
511 FeatNodePtr fnp;
512 Uint2 itemtype;
513 CharPtr buf = NULL;
514 Int2 i=0;
515 Char symbol;
516 ValNodePtr curr;
517 IvalNodePtr inp;
518 Int4 i_left, i_right;
519 Int4 f_len;
520
521
522 itemtype = (Uint2)(fnp_node->choice);
523
524 if(itemtype!= OBJ_SEQFEAT)
525 return FALSE;
526 fnp = (FeatNodePtr) fnp_node->data.ptrvalue;
527 f_len = seq_stop - seq_start +1;
528 if(load_codon)
529 pbp->buf = NULL;
530 else
531 pbp->buf = (CharPtr) MemNew((size_t)(f_len +1)*sizeof(Char));
532 pbp->start = seq_start;
533 pbp->stop = seq_stop;
534 pbp->pos = -1;
535 pbp->load_codon= load_codon;
536 pbp->cvp_list = NULL;
537
538 buf = pbp->buf;
539 if(buf !=NULL)
540 MemSet((Pointer)buf, '~', (size_t)(f_len) * sizeof(Char));
541 switch(fnp->feattype)/*check symbol for different features*/
542 {
543 case FEATDEF_GENE:
544 symbol = '+';
545 break;
546 case FEATDEF_mRNA:
547 symbol = '^';
548 break;
549 case FEATDEF_CDS:
550 symbol = '$';
551 break;
552 default:
553 symbol = '*';
554 break;
555 }
556 if(fnp->feattype ==FEATDEF_CDS)
557
558 GatherItem(fnp->entityID, fnp->itemID, itemtype, (Pointer)(pbp), load_prot_seq);
559 else
560 {
561 if(fnp->interval !=NULL)
562 {
563 for(curr = fnp->interval; curr !=NULL; curr = curr->next)
564 {
565 inp = (IvalNodePtr) curr->data.ptrvalue;
566 i_left = inp->gr.left;
567 i_right = inp->gr.right;
568 if(!(i_left > c_right || i_right < c_left))
569 {
570 i_left = MAX(i_left, c_left);
571 i_right = MIN(i_right, c_right);
572 i_left -= c_left;
573 i_right -=c_left;
574 for(; i_left<=i_right; ++i_left)
575 buf[i_left] = symbol;
576 }
577 }
578 }
579 else
580 {
581 i_left = fnp->extremes.left;
582 i_right = fnp->extremes.right;
583 if(!(i_left > c_right || i_right < c_left))
584 {
585 i_left = MAX(i_left, c_left);
586 i_right = MIN(i_right, c_right);
587 i_left -= c_left;
588 i_right -=c_left;
589 for(; i_left<=i_right; ++i_left)
590 buf[i_left] = symbol;
591 }
592 }
593
594 }
595 if(buf!=NULL)
596 buf[f_len]= '\0';
597 if(pbp->pos == -1)
598 pbp->pos = ABS(seq_start);
599
600 if(pbp->buf != NULL || pbp->cvp_list !=NULL)
601 return TRUE;
602 else
603 return FALSE;
604 }
605
606
607
load_feature_data(ProtBufPtr pbp,FeatNodePtr fnp,Int4 pos,Int4 maxlen,ValNodePtr PNTR fbp_head)608 static Boolean load_feature_data(ProtBufPtr pbp, FeatNodePtr fnp, Int4 pos, Int4 maxlen, ValNodePtr PNTR fbp_head)
609 {
610 Boolean found;
611 TextAlignBufPtr fbp;
612 ValNodePtr curr, pcvp;
613 CodonVectorPtr cvp;
614 Boolean load_codon;
615 CharPtr PNTR codon;
616 Int2 i;
617 Int4 f_len;
618 Char label[100];
619 CharPtr buf;
620 Boolean locus = FALSE;
621
622 if(pbp == NULL)
623 return FALSE;
624 if(pbp->buf == NULL && pbp->cvp_list == NULL)
625 return FALSE;
626 load_codon = (pbp->cvp_list !=NULL);
627 f_len = pbp->stop - pbp->start +1;
628
629 found = FALSE;
630 for(curr = *fbp_head; curr !=NULL; curr = curr->next)
631 {
632 fbp = (TextAlignBufPtr) curr->data.ptrvalue;
633 if(fbp->itemID == fnp->itemID)
634 {
635 if(load_codon)
636 {
637 for(pcvp = pbp->cvp_list; pcvp!=NULL; pcvp= pcvp->next)
638 {
639 cvp = (CodonVectorPtr) pcvp->data.ptrvalue;
640 if(cvp->exonCount == fbp->exonCount)
641 {
642 codon = fbp->codon;
643 for(i =0; i<3; ++i)
644 {
645 if(pos > fbp->f_pos)
646 make_empty(codon[i] + fbp->f_pos, (Int2)(pos - fbp->f_pos));
647 StringCat(codon[i], (cvp->buf[i]+cvp->aa_index));
648 }
649 cvp->exonCount = 0;
650 fbp->f_pos = pos + f_len;
651 }
652 }/*end of for*/
653 }
654 else
655 {
656 if(fbp->pos == -1)
657 fbp->pos = pbp->pos+1;
658 if(pos > fbp->f_pos)
659 make_empty(fbp->buf+fbp->f_pos, (Int2)(pos - fbp->f_pos));
660 StringCat(fbp->buf, pbp->buf);
661 fbp->f_pos = pos + f_len;
662 found = TRUE;
663 }
664 }
665 }
666
667
668 if(load_codon)
669 {
670 for(pcvp = pbp->cvp_list; pcvp!=NULL; pcvp= pcvp->next)
671 {
672 cvp = (CodonVectorPtr) pcvp->data.ptrvalue;
673 if(cvp->exonCount !=0)
674 {
675 fbp = (TextAlignBufPtr) MemNew(sizeof(TextAlignBuf));
676 fbp->seqEntityID = fnp->entityID;
677 fbp->pos = cvp->dna_pos +1;
678 fbp->strand = cvp->strand;
679 seqid_name(cvp->sip, label, locus, FALSE);
680 fbp->label = StringSave(label);
681 fbp->buf = NULL;
682 for(i =0; i<3; ++i)
683 {
684 buf = (CharPtr) MemNew((size_t)(maxlen+1+1+1) * sizeof(Char));
685 /*1 for partial start, 1 for partial stop*/
686 if(pos > 0)
687 make_empty(buf, (Int2)pos);
688 StringCat(buf, cvp->buf[i]+cvp->aa_index);
689 fbp->codon[i] = buf;
690 }
691 fbp->frame = cvp->frame;
692 fbp->f_pos = pos+f_len;
693 fbp->exonCount = cvp->exonCount;
694 fbp->itemID = fnp->itemID;
695 fbp->itemID = fnp->itemID;
696 fbp->feattype = fnp->feattype;
697 fbp->subtype = fnp->subtype;
698 fbp->entityID = fnp->entityID;
699 fbp->extra_space = (cvp->aa_index == 0);
700 ValNodeAddPointer(fbp_head, 0, fbp);
701
702 }
703 }
704 }
705 else
706 {
707 if(!found) /*create a new node*/
708 {
709 fbp = (TextAlignBufPtr) MemNew(sizeof(TextAlignBuf));
710 buf = (CharPtr) MemNew((size_t)(maxlen+1) * sizeof(Char));
711 if(pos > 0)
712 make_empty(buf, (Int2)pos);
713 StringCat(buf, pbp->buf);
714 fbp->seqEntityID = fnp->entityID;
715 fbp->f_pos = pos + f_len;
716 fbp->itemID = fnp->itemID;
717 fbp->buf = buf;
718 fbp->pos = pbp->pos+1;
719 if(fnp->label !=NULL)
720 fbp->label = StringSave(fnp->label);
721 fbp->strand = fnp->extremes.strand;
722 fbp->itemID = fnp->itemID;
723 fbp->feattype = fnp->feattype;
724 fbp->subtype = fnp->subtype;
725 fbp->entityID = fnp->entityID;
726 fbp->exonCount = 0;
727 ValNodeAddPointer(fbp_head, 0, fbp);
728 }
729 }
730 if(pbp->buf)
731 MemFree(pbp->buf);
732 if(pbp->cvp_list)
733 free_cvp_list(pbp->cvp_list);
734 return TRUE;
735 }
736
737
738
739 /**************************************************************************
740 *
741 * collect_feature_buf(fnp_list, g_left, g_right, seq_start, l_pos,
742 * fbp_head, max_len)
743 * collect the features to be shown together with the alignment
744 * fnp_list: a list of FeatNode associated with the current segment
745 * g_left: the left position
746 *
747 ***************************************************************************/
collect_feature_buf(ValNodePtr fnp_list,Int4 g_left,Int4 g_right,Int4 seq_start,Int4 l_pos,ValNodePtr fbp_head,Int4 maxlen,Boolean is_aa)748 static ValNodePtr collect_feature_buf(ValNodePtr fnp_list, Int4 g_left, Int4 g_right, Int4 seq_start, Int4 l_pos, ValNodePtr fbp_head, Int4 maxlen, Boolean is_aa)
749 {
750 ProtBuf pb;
751 FeatNodePtr fnp;
752 Int4 c_left, c_right;
753 Int4 pos;
754 Int4 fseq_start, fseq_stop; /*map sequence start stop to the feature*/
755 Int4 f_len; /*length of the feature*/
756 Boolean load_codon;
757
758 if(fnp_list == NULL)
759 return fbp_head;
760
761
762 while(fnp_list)
763 {
764 fnp = (FeatNodePtr) fnp_list->data.ptrvalue;
765 c_left = fnp->extremes.left;
766 c_right = fnp->extremes.right;
767 load_codon = (is_aa && fnp->feattype == FEATDEF_CDS);
768 if(!(c_left > g_right || c_right < g_left))
769 {
770 if(c_left > g_left) /*map the seq pos from the graphic pos*/
771 fseq_start = seq_start + (c_left-g_left);
772 else
773 fseq_start = seq_start;
774 c_left = MAX(c_left, g_left);
775 c_right = MIN(c_right, g_right);
776 f_len = c_right - c_left+1;
777 fseq_stop = fseq_start+f_len-1;
778
779 if(c_left > g_left)
780 pos = l_pos + (c_left - g_left);
781 else
782 pos = l_pos;
783
784 if(buffer_for_feature(c_left, c_right, fseq_start, fseq_stop, fnp_list, load_codon, &pb))
785
786 load_feature_data(&pb, fnp, pos, maxlen, &fbp_head);
787 }
788 fnp_list = fnp_list->next;
789 }
790
791 return fbp_head;
792 }
793
map_position_by_spacing(Int4 distance,Int4 spacing,Boolean is_head)794 static Int4 map_position_by_spacing(Int4 distance, Int4 spacing, Boolean is_head)
795 {
796 Int4 pos, left_over;
797
798 if(spacing == 1)
799 return distance;
800
801 pos = distance/spacing;
802 left_over = distance%spacing;
803
804 if(left_over == 0 && !is_head)
805 pos = MAX(pos-1, 0);
806 else if(left_over == 2 && is_head)
807 ++pos;
808 return pos;
809 }
810
add_empty_space(CharPtr buf,Int4 maxlen)811 static void add_empty_space(CharPtr buf, Int4 maxlen)
812 {
813 Int4 buf_len;
814
815 buf_len = StringLen(buf);
816 if(buf_len < maxlen)
817 make_empty(buf+buf_len, (Int2)(maxlen-buf_len));
818 }
copy_insertion_bar(CharPtr buf,CharPtr ins_2,Int2 sym_pos,Int4 len)819 static void copy_insertion_bar(CharPtr buf, CharPtr ins_2, Int2 sym_pos, Int4 len)
820 {
821 Int2 k;
822
823 if(buf == NULL || ins_2 == NULL)
824 return;
825 add_empty_space(buf, len);
826 for(k = 0; k<sym_pos; ++k)
827 if(ins_2[k] == '|' && buf[k] == ' ')
828 buf[k] = '|';
829 }
830
get_long_insert_len(Int4 length)831 static Int4 get_long_insert_len(Int4 length)
832 {
833 Char temp[50];
834
835 sprintf(temp, "(length=%ld)", (long) length);
836 return (StringLen(temp) + 13);
837 }
838
load_tdp_data(ValNodePtr PNTR head,CharPtr label,CharPtr text,Uint4 itemID,Uint2 entityID,Uint2 seqEntityID,Uint4 bsp_itemID)839 static ValNodePtr load_tdp_data(ValNodePtr PNTR head, CharPtr label, CharPtr text, Uint4 itemID, Uint2 entityID, Uint2 seqEntityID, Uint4 bsp_itemID)
840 {
841 TextAlignBufPtr tdp;
842
843 tdp = (TextAlignBufPtr) MemNew(sizeof(TextAlignBuf));
844 tdp->pos = -1;
845 tdp->label = label;
846 tdp->buf= text;
847 tdp->itemID = itemID;
848 tdp->entityID = entityID;
849 tdp->seqEntityID = seqEntityID;
850 tdp->bsp_itemID = bsp_itemID;
851
852 return ValNodeAddPointer(head, 0, (Pointer)tdp);
853 }
854
855 /******************************************************************************
856 *
857 * ProcessTextInsertion(anp, m_left, m_right, bsp)
858 * convert the insertions that are located within [m_left, m_right] into
859 * text buffer (a list of TextDrawPtr)
860 * anp: AlignNodePtr
861 * m_left, m_right: the current region for selection
862 * bsp: the BioseqPtr for this anp
863 *
864 * return a list of TextDrawPtr
865 *
866 ******************************************************************************/
ProcessTextInsertion(AlignNodePtr anp,Int4 m_left,Int4 m_right,BioseqPtr bsp,Int4 line_len,Int1 m_frame)867 static ValNodePtr ProcessTextInsertion(AlignNodePtr anp, Int4 m_left, Int4 m_right, BioseqPtr bsp, Int4 line_len, Int1 m_frame)
868 {
869 AlignSegPtr asp;
870 Int4 inslen; /*length of insertion*/
871 Int2 insnum; /*the number of insertions*/
872 Int2 i, j;
873 Int4Ptr inslevel; /*for layout the level of insertions*/
874 Int4 level;
875 Int4 inspos; /*position for insertion*/
876 Int4 left;
877 Int4 len;
878 Int4 last_ins;
879
880 CharPtr ins_1; /* \ symbols for insertions*/
881 CharPtr ins_2; /*| symbols for insertion*/
882 CharPtr ins_seq;
883 Int4 sym_pos;
884 Int4 l_pos;
885 Int4 seq_offset, seq_start, seq_stop;
886 ValNodePtr head = NULL;
887 ValNodePtr fbuf_list = NULL, curr;
888 TextAlignBufPtr fbp;
889 Int4 g_left, g_right;
890 Boolean is_aa;
891 Int4 seq_expand;
892 Int4 spacing;
893 Boolean translate;
894 Uint1 strand;
895
896 strand = Seq_strand_plus;
897 if(anp->seqpos < 0)
898 strand = Seq_strand_minus;
899 else if(anp->seqpos == 0 && anp->extremes.strand == Seq_strand_minus)
900 strand = Seq_strand_minus;
901 spacing = 1;
902 if(m_frame > 0)
903 spacing = 3;
904 if(m_frame == -1)
905 {
906 translate = TRUE;
907 seq_expand = 3;
908 }
909 else
910 {
911 seq_expand = 1;
912 translate = FALSE;
913 }
914 is_aa = (bsp->mol == Seq_mol_aa);
915 insnum = 0;
916 for(asp = anp->segs; asp !=NULL; asp = asp->next)
917 /*checking the insertion numbers*/
918 {
919 if(asp->type == INS_SEG)
920 {
921 inspos = asp->ins_pos;
922 if (inspos >= m_left && inspos<=m_right)
923 {
924 ++insnum;
925 asp->line = 0;
926 }
927 else
928 asp->line = -1;
929 }
930 }
931 if(insnum == 0)
932 return head;
933
934 /*layout the insertions*/
935 inslevel = (Int4Ptr) MemNew((size_t)(2*insnum) * sizeof(Int4)); /*layout insert*/
936 level = 0;
937 len = MAX(m_right - m_left +1, line_len);
938 for(asp = anp->segs; asp !=NULL; asp = asp->next)
939 {
940 if(asp->type == INS_SEG && asp->line == 0)
941 {
942 inspos = asp->ins_pos;
943 inslen = asp->gr.right/seq_expand;
944 /* if(inslen > (m_right-m_left+1)) */
945 if(inslen > len)
946 inslen = get_long_insert_len(inslen);
947 inspos -= m_left;
948 asp->line = find_insert_ypos(&left, inslen, inspos, 0, len-1, inslevel, 2, insnum);
949 asp->gr.left = left;
950 level = MAX(asp->line, level);
951 }
952 }
953 MemFree(inslevel);
954
955
956 /*comput the insertion text*/
957 for(j = 0; j< (level+1); ++j)
958 {
959 l_pos = 0;
960 sym_pos = 0;
961 fbuf_list = NULL;
962 ins_seq = (CharPtr) MemNew((size_t)(len+1) * sizeof(Char));
963 ins_2 = (CharPtr) MemNew((size_t)(len+1) * sizeof(Char));
964 if(j == 0)
965 ins_1 = (CharPtr) MemNew((size_t)(len+1) * sizeof(Char));
966 seq_offset = 0;
967 for(asp = anp->segs; asp !=NULL; asp = asp->next)
968 {
969 if(asp->type == INS_SEG && asp->line >=j)
970 {
971
972 inspos = asp->ins_pos - m_left;
973 if(inspos > sym_pos)
974 {
975 if(j == 0) /*the first level*/
976 make_empty(ins_1+sym_pos, (Int2)(inspos-sym_pos));
977 make_empty(ins_2+sym_pos, (Int2)(inspos-sym_pos));
978 sym_pos = inspos;
979 }
980 if(j == 0)
981 ins_1[sym_pos] = '\\';
982 ins_2[sym_pos] = '|';
983 if(asp->line == j)
984 last_ins = inspos+1;
985 ++sym_pos;
986
987 if(asp->line == j)
988 {
989 seq_start = anp->seqpos + seq_offset;
990 seq_stop = seq_start + asp->gr.right -1;
991 /* seq_stop = seq_start + map_position_by_spacing(asp->gr.right, spacing, FALSE) * seq_expand + seq_expand -1; */
992 if(asp->gr.left > l_pos)
993 {
994 make_empty(ins_seq+l_pos, (Int2)(asp->gr.left-l_pos));
995 l_pos = asp->gr.left;
996 }
997
998 g_left = asp->ins_pos;
999 g_right = asp->ins_pos + asp->gr.right -1;
1000 /* g_left = asp->gr.left;
1001 g_right = g_left + asp->gr.right -1;*/
1002
1003 if((seq_stop - seq_start+1)>len)/*long insertions*/
1004 {
1005 fbuf_list = collect_feature_buf(asp->cnp, g_left, (g_left+4), seq_start, l_pos, fbuf_list, len, is_aa); /*check the features first*/
1006 fbuf_list = collect_feature_buf(asp->cnp, g_left, (g_left+4), seq_stop-4, l_pos+8, fbuf_list, len, is_aa); /*check the features ffirst. 3 is the 3 dots*/
1007 }
1008 else
1009 fbuf_list = collect_feature_buf(asp->cnp, g_left, g_right, seq_start, l_pos, fbuf_list, len, is_aa);
1010
1011 load_text(bsp, seq_start, seq_stop, ins_seq, &l_pos, NULL, (Int2)len, 1, translate, FALSE, NULL, NULL, strand, NULL, 0);
1012 }
1013
1014 }
1015 if(asp->type == INS_SEG)
1016 seq_offset += asp->gr.right;
1017 if(asp->type == DIAG_SEG || asp->type == REG_SEG || asp->type == STD_SEG)
1018 seq_offset += map_position_by_spacing(asp->gr.right - asp->gr.left +1,
1019 spacing, TRUE) * seq_expand;
1020 /* seq_offset += (asp->gr.right - asp->gr.left +1) * seq_expand; */
1021 }
1022
1023 ins_2[sym_pos] = '\0';
1024 ins_seq[l_pos] = '\0';
1025 if(j == 0)
1026 {
1027 ins_1[sym_pos] = '\0';
1028 load_tdp_data(&head, NULL, ins_1, 0, 0, 0, 0);
1029 }
1030
1031 for(curr = head; curr !=NULL; curr = curr->next)
1032 /*for(curr = fbuf_list; curr !=NULL; curr = curr->next)*/
1033 {
1034 fbp = (TextAlignBufPtr) curr->data.ptrvalue;
1035 if(fbp->buf != NULL)
1036 copy_insertion_bar(fbp->buf, ins_2, (Int2)sym_pos, len);
1037 else
1038 {
1039 for(i =0; i<3; ++i)
1040 copy_insertion_bar(fbp->codon[i], ins_2, (Int2)sym_pos, len);
1041 }
1042 }
1043
1044 copy_insertion_bar(ins_seq, ins_2, (Int2)sym_pos, len);
1045 load_tdp_data(&head, NULL, ins_2, 0, 0, 0, 0);
1046 load_tdp_data(&head, NULL, ins_seq, anp->itemID, anp->entityID, anp->seq_entityID, anp->bsp_itemID);
1047 ValNodeLink(&head, fbuf_list);
1048 fbuf_list = head;
1049 }
1050
1051 return head;
1052
1053 }
1054
1055 /***********************************************************************
1056 *
1057 * ProcessTextAlignNode(anp, m_left, m_right, p_stop, m_buf, locus)
1058 * Process the AlignNode to make a list of text buffer on the
1059 * current region
1060 * anp: AlignNodePtr
1061 * m_left, m_right: the region on the alignment. Mapped in response
1062 * to anp->extremes.left, and anp->extremes.right
1063 * p_stop: the stop position of the previous segment. Used to label
1064 * the position of a line composed entirely of gaps
1065 * m_buf: buffer for the master sequence. Used to compare mismatches
1066 * locus: if TRUE, show the locus name of the alignment
1067 *
1068 * frame: frame >0, those are the hits from blastx. So, the
1069 * protein need to be displayed to the proper frame
1070 * frame 1-3: match to the plus strand of the master
1071 * frame 4-6: match to the minus strand of the master
1072 * frame 0: no tranlsation, no frame match to the master
1073 * frame -1: translate the DNA sequence
1074 * option: option for display the alignments
1075 * matrix: the protein alignment matrix
1076 *
1077 *
1078 ************************************************************************/
1079
ProcessTextAlignNode2(AlignNodePtr anp,Int4 m_left,Int4 m_right,Int4Ptr p_stop,CharPtr m_buf,Int4 line_len,Int1 m_frame,Uint4 option,Int4Ptr PNTR matrix,Int4Ptr PNTR posMatrix,Int4 q_start)1080 NLM_EXTERN ValNodePtr ProcessTextAlignNode2(AlignNodePtr anp, Int4 m_left, Int4 m_right, Int4Ptr p_stop, CharPtr m_buf, Int4 line_len, Int1 m_frame, Uint4 option, Int4Ptr PNTR matrix, Int4Ptr PNTR posMatrix, Int4 q_start)
1081 {
1082 Int4 maxlen;
1083 Int4 g_left, g_right;
1084 Int4 len; /*length of the segment*/
1085 CharPtr l_seq; /*the buffer for the sequence*/
1086 Int2Ptr matrix_val; /*value of each residue in alignment matrix*/
1087 Int4 l_pos; /*the start position on the line*/
1088 Int4 offset;
1089 BioseqPtr bsp;
1090 SeqEntryPtr sep;
1091
1092 AlignSegPtr asp;
1093 Int4 seq_offset, off_len;
1094 Int4 seq_start, seq_stop;
1095 Int4 s_start, s_stop; /*for marking the position on one line*/
1096 CharPtr str;
1097
1098 ValNodePtr head = NULL, ins_node;
1099 ValNodePtr fbuf_list = NULL;
1100 TextAlignBufPtr tdp;
1101 Boolean is_aa;
1102 Int4 spacing;
1103 Boolean translate;
1104 Int4 seq_expand;
1105 Boolean show_mismatch;
1106 Boolean set_matrix;
1107 Uint1 strand;
1108
1109
1110 if(m_frame > 6 || m_frame < -1) /*check the m_frame. -1 for translate the hits*/
1111 return NULL;
1112
1113
1114 g_left = anp->extremes.left;
1115 g_right = anp->extremes.right;
1116 if(m_left > g_right || m_right < g_left)/*no overlap*/ {
1117 if(m_frame > 0) {
1118 if(anp->m_frame != m_frame)
1119 return NULL;
1120 if(m_buf == NULL)
1121 return NULL;
1122 }
1123 if(option & TXALIGN_BLUNT_END) {
1124 maxlen = m_right - m_left +1;
1125 l_seq = (CharPtr) MemGet((size_t)(maxlen+1)*sizeof(Char), MGET_ERRPOST);
1126 MemSet((Pointer)l_seq, '-',(size_t)(maxlen) * sizeof(Char));
1127 l_seq[maxlen] = '\0';
1128 tdp = (TextAlignBufPtr) MemNew(sizeof(TextAlignBuf));
1129 tdp->pos = *p_stop;
1130 tdp->strand = anp->extremes.strand;
1131 tdp->label = StringSave(anp->label);
1132 tdp->buf = l_seq;
1133 tdp->matrix_val = NULL;
1134 tdp->itemID = anp->itemID;
1135 tdp->feattype = 0;
1136 tdp->subtype = 0;
1137 tdp->entityID = anp->entityID;
1138 tdp->seqEntityID = anp->seq_entityID;
1139 tdp->bsp_itemID = anp->bsp_itemID;
1140 ValNodeAddPointer(&head, 0, tdp);
1141 return head;
1142 }
1143 else
1144 return NULL;
1145 }
1146
1147 strand = Seq_strand_plus;
1148 if(anp->seqpos < 0)
1149 strand = Seq_strand_minus;
1150 else if(anp->seqpos == 0 && anp->extremes.strand == Seq_strand_minus)
1151 strand = Seq_strand_minus;
1152
1153 l_pos = 0;
1154 spacing = 1;
1155 offset = 0;
1156 if(m_frame > 0) {
1157 if(anp->m_frame != m_frame)
1158 return NULL;
1159 if(m_buf == NULL)
1160 return NULL;
1161 /*add the empty space to reflect the reading frame*/
1162 for(str = m_buf; *str != '\n' && *str != '\0'; ++str) {
1163 if(IS_WHITESP(*str))
1164 ++offset;
1165 else
1166 break;
1167 }
1168 spacing = 3;
1169 }
1170 if(m_left < g_left) {
1171 l_pos += (g_left - m_left);
1172 if(m_frame > 0)
1173 ++l_pos;
1174 } else
1175 l_pos += offset;
1176
1177 bsp = BioseqLockById(anp->sip);
1178 if(bsp == NULL)
1179 return NULL;
1180 is_aa = (bsp->mol == Seq_mol_aa);
1181 if((m_frame > 0 && !is_aa) || (m_frame == -1 && is_aa)) {
1182 BioseqUnlock(bsp);
1183 return NULL;
1184 }
1185 if(anp->seq_entityID == 0) {
1186 sep = SeqEntryFind(bsp->id);
1187 anp->seq_entityID = SeqMgrGetEntityIDForSeqEntry(sep);
1188 }
1189 if(anp->bsp_itemID == 0)
1190 anp->bsp_itemID = get_bioseq_itemID(bsp, anp->seq_entityID);
1191
1192 if(m_frame == -1) {
1193 translate = TRUE;
1194 seq_expand = 3;
1195 } else {
1196 translate = FALSE;
1197 seq_expand = 1;
1198 }
1199
1200 maxlen = m_right - m_left +1;
1201 l_seq = (CharPtr) MemGet((size_t)(maxlen+1)*sizeof(Char),
1202 MGET_ERRPOST);
1203 if(option & TXALIGN_BLUNT_END)
1204 MemSet((Pointer)l_seq, '-',(size_t)maxlen * sizeof(Char));
1205 else
1206 MemSet((Pointer)l_seq, ' ',(size_t)maxlen * sizeof(Char));
1207 l_seq[maxlen] = '\0';
1208
1209
1210 set_matrix = FALSE;
1211 if(m_frame == 0 && bsp->mol != Seq_mol_aa) { /*DNA-DNA alignment*/
1212 if(option & TXALIGN_MATRIX_VAL)
1213 set_matrix = TRUE;
1214 } else {
1215 if(matrix != NULL && (option & TXALIGN_MATRIX_VAL))
1216 set_matrix = TRUE;
1217 }
1218 if(set_matrix) {
1219 matrix_val = (Int2Ptr) MemGet((size_t)(maxlen+1)*sizeof(Int2), MGET_ERRPOST);
1220 MemSet((Pointer)matrix_val, 0,(size_t)maxlen * sizeof(Int2));
1221 } else
1222 matrix_val = NULL;
1223 show_mismatch = (Boolean)(option & TXALIGN_MISMATCH);
1224
1225
1226 /*process the GAPs and the DIAGs segs*/
1227 s_start = -1;
1228 s_stop = -1;
1229 off_len = 0;
1230 for(asp = anp->segs; asp !=NULL; asp = asp->next) {
1231 g_left = asp->gr.left;
1232 g_right = asp->gr.right;
1233 if(!(g_left > m_right || g_right < m_left)) {
1234 switch(asp->type) {
1235 case GAP_SEG:
1236 g_left = MAX(m_left, g_left);
1237 g_right = MIN(m_right, g_right);
1238 len = g_right - g_left +1;
1239 MemSet((Pointer)(l_seq +l_pos), '-',(size_t)len * sizeof(Char));
1240 l_pos += len;
1241 break;
1242
1243 case REG_SEG:
1244 case DIAG_SEG:
1245 case STD_SEG: /* Std-seg only works if the m_frame != 0 */
1246 if(m_left > g_left)
1247 len = off_len + m_left - g_left;
1248 else
1249 len = off_len;
1250 seq_offset = map_position_by_spacing(len, spacing, TRUE) * seq_expand;
1251 seq_start = anp->seqpos + seq_offset;
1252 g_left = MAX(m_left, g_left);
1253 g_right = MIN(m_right, g_right);
1254 len += (g_right - g_left);
1255 seq_stop = anp->seqpos + map_position_by_spacing(len, spacing, FALSE) * seq_expand + seq_expand -1;
1256
1257 if(seq_start <= seq_stop) { /*the order of start and stop is reversed*/
1258 if(s_start == -1) /*record the end point*/
1259 s_start = ABS(seq_start);
1260 s_stop = ABS(seq_stop);
1261
1262 if(m_frame == 0)
1263 fbuf_list = collect_feature_buf(asp->cnp, g_left, g_right, seq_start, l_pos, fbuf_list, maxlen, is_aa); /*check the features first*/
1264 load_text(bsp, seq_start, seq_stop, l_seq, &l_pos, m_buf, (Int2)maxlen,
1265 (Int2)spacing, translate, show_mismatch, matrix_val, matrix, strand, posMatrix, q_start);
1266
1267 }
1268 break;
1269
1270 default:
1271 break;
1272 }
1273 }
1274 if(asp->type == INS_SEG)
1275 off_len += (asp->gr.right * spacing);
1276 if(asp->type == REG_SEG || asp->type == DIAG_SEG || asp->type == STD_SEG)
1277 off_len+=(asp->gr.right - asp->gr.left +1);
1278 }
1279
1280
1281 /*the first segment in the layout is a gap segment*/
1282 if(s_start == -1)
1283 s_start = *p_stop;
1284 if(s_stop == -1) /*gap across the entire region*/
1285 s_stop = *p_stop;
1286 *p_stop = s_stop /*update the stop value*/;
1287 tdp = (TextAlignBufPtr) MemNew(sizeof(TextAlignBuf));
1288 tdp->pos = s_start+1;
1289 tdp->strand = anp->extremes.strand;
1290 tdp->label = StringSave(anp->label);
1291 tdp->buf = l_seq;
1292 tdp->matrix_val = matrix_val;
1293 tdp->itemID = anp->itemID;
1294 tdp->feattype = 0;
1295 tdp->subtype = 0;
1296 tdp->entityID = anp->entityID;
1297 tdp->seqEntityID = anp->seq_entityID;
1298 tdp->bsp_itemID = anp->bsp_itemID;
1299 ValNodeAddPointer(&head, 0, tdp);
1300 ValNodeLink(&head, fbuf_list);
1301
1302 ins_node = ProcessTextInsertion(anp, m_left, m_right, bsp, line_len, m_frame);
1303 ValNodeLink(&head, ins_node);
1304 BioseqUnlock(bsp);
1305 return head;
1306 }
1307
ProcessTextAlignNode(AlignNodePtr anp,Int4 m_left,Int4 m_right,Int4Ptr p_stop,CharPtr m_buf,Int4 line_len,Int1 m_frame,Uint4 option,Int4Ptr PNTR matrix)1308 NLM_EXTERN ValNodePtr ProcessTextAlignNode(AlignNodePtr anp, Int4 m_left, Int4 m_right, Int4Ptr p_stop, CharPtr m_buf, Int4 line_len, Int1 m_frame, Uint4 option, Int4Ptr PNTR matrix)
1309 {
1310 return ProcessTextAlignNode2(anp, m_left, m_right, p_stop, m_buf, line_len, m_frame, option, matrix, NULL, 0);
1311 }
1312
clean_annot_for_anp(ValNodePtr PNTR head)1313 NLM_EXTERN ValNodePtr clean_annot_for_anp(ValNodePtr PNTR head)
1314 {
1315 ValNodePtr prev, next, anp_list;
1316
1317 prev = NULL;
1318 anp_list = *head;
1319 while(anp_list)
1320 {
1321 next = anp_list->next;
1322 if(anp_list->choice == OBJ_SEQANNOT)
1323 {
1324 if(prev == NULL)
1325 *head = next;
1326 else
1327 prev->next = next;
1328 anp_list->next = NULL;
1329 FreeAlignNode(anp_list);
1330 }
1331 else
1332 prev = anp_list;
1333 anp_list = next;
1334 }
1335
1336 return (*head);
1337 }
1338
1339
1340
1341 /***********************************************************************
1342 *
1343 * FreeFeatureList(list)
1344 * free a list of FeatNode
1345 *
1346 ***********************************************************************/
FreeFeatureList(ValNodePtr list)1347 NLM_EXTERN ValNodePtr FreeFeatureList (ValNodePtr list)
1348 {
1349 FeatNodePtr fnp;
1350 ValNodePtr next;
1351
1352 while (list != NULL)
1353 {
1354 next = list->next;
1355 fnp = list->data.ptrvalue;
1356 if (fnp != NULL)
1357 {
1358 ValNodeFreeData (fnp->interval);
1359 MemFree (fnp->label);
1360 MemFree(fnp->pos_label);
1361 if(fnp->supress_node != NULL) /*hidden features*/
1362 FreeFeatureList(fnp->supress_node);
1363 MemFree (fnp);
1364 }
1365 MemFree (list);
1366
1367 list = next;
1368 }
1369 return NULL;
1370 }
1371
1372 /*********************************************************************
1373 *
1374 * extract_node_list(head, itemType, entityID, feattype, subtype,
1375 * label_type)
1376 * extract a list of featnode from head which will have the
1377 * selected itemType, entityID, feattye, subtype, label_type.
1378 * set values to 0 if it is not considered in the selection
1379 *
1380 *********************************************************************/
do_collect(ValNodePtr vnp,Uint1 itemType,Uint2 entityID,Uint1 feattype,Uint1 label_type)1381 static Boolean do_collect(ValNodePtr vnp, Uint1 itemType, Uint2 entityID, Uint1 feattype, Uint1 label_type)
1382 {
1383 Boolean is_num; /*is the gene mark a number*/
1384 FeatNodePtr fnp;
1385
1386 if(vnp->choice != itemType)
1387 return FALSE;
1388
1389 fnp = (FeatNodePtr)(vnp->data.ptrvalue);
1390 if(fnp == NULL)
1391 return FALSE;
1392
1393 if(entityID !=0)
1394 if(fnp->entityID !=entityID)
1395 return FALSE;
1396
1397
1398 if(itemType == OBJ_SEQFEAT)
1399 {
1400 if((feattype == 0) || (fnp->feattype == feattype))
1401 {
1402 if(label_type == ALL_LABEL)
1403 return TRUE;
1404 is_num = IS_NUM_GENE(fnp->label);
1405 if(label_type == STR_LABEL)
1406 return (is_num == FALSE);
1407 if(label_type == NUM_LABEL)
1408 return (is_num == TRUE);
1409 }
1410 else
1411 return FALSE;
1412 }
1413
1414 return TRUE;
1415
1416 }
1417
1418
1419
extract_node_list(ValNodePtr PNTR head,Uint1 itemType,Uint2 entityID,Uint1 feattype,Uint1 label_type)1420 NLM_EXTERN ValNodePtr extract_node_list(ValNodePtr PNTR head, Uint1 itemType, Uint2 entityID, Uint1 feattype, Uint1 label_type)
1421 {
1422 ValNodePtr vnp, prev, list, next;
1423
1424
1425 list = NULL;
1426 prev = NULL;
1427 vnp = *head;
1428 while(vnp)
1429 {
1430 next = vnp->next;
1431 if(do_collect(vnp, itemType, entityID, feattype, label_type))
1432 {
1433 if(prev == NULL)
1434 *head = vnp->next;
1435 else
1436 prev->next = vnp->next;
1437 vnp->next = NULL;
1438 ValNodeLink(&list, vnp);
1439 }
1440 else
1441 prev = vnp;
1442 vnp = next;
1443 }
1444
1445 return list;
1446
1447 }
1448
extract_lollipop_feature(ValNodePtr PNTR head,Int4 scale,BoolPtr lolli_feature)1449 NLM_EXTERN ValNodePtr extract_lollipop_feature(ValNodePtr PNTR head, Int4 scale, BoolPtr lolli_feature)
1450 {
1451 ValNodePtr vnp, prev, list, next;
1452 FeatNodePtr fnp;
1453 Boolean extract = FALSE;
1454
1455
1456 list = NULL;
1457 prev = NULL;
1458 vnp = *head;
1459 while(vnp)
1460 {
1461 next = vnp->next;
1462 extract = FALSE;
1463 if(vnp->choice == OBJ_SEQFEAT)
1464 {
1465 fnp = vnp->data.ptrvalue;
1466 if((fnp->extremes.right - fnp->extremes.left +1) <= scale)
1467 extract = TRUE;
1468 else if(lolli_feature != NULL)
1469 extract = lolli_feature[fnp->feattype];
1470 }
1471 if(extract)
1472 {
1473 if(prev == NULL)
1474 *head = vnp->next;
1475 else
1476 prev->next = vnp->next;
1477 vnp->next = NULL;
1478 ValNodeLink(&list, vnp);
1479 }
1480 else
1481 prev = vnp;
1482 vnp = next;
1483 }
1484
1485 return list;
1486
1487 }
1488
1489 /*deside whether the alignment is of different molecules */
get_alignment_type(AnnotInfoPtr annot_info)1490 NLM_EXTERN Uint1 get_alignment_type(AnnotInfoPtr annot_info)
1491 {
1492 if(annot_info->blast_type == ALIGN_BLASTX)
1493 return ALIGN_DNA_TO_PROT;
1494 if(annot_info->blast_type == ALIGN_TBLASTN)
1495 return ALIGN_PROT_TO_DNA;
1496 if(annot_info->blast_type == ALIGN_PSITBLASTN)
1497 return ALIGN_PROT_TO_DNA;
1498 if(annot_info->blast_type == ALIGN_TBLASTX)
1499 return ALIGN_TDNA_TO_TDNA;
1500 return 0;
1501 }
1502
1503
1504 /*********************************************************************
1505 *
1506 * FreeAlignNode(list)
1507 * free a list of AlignNodePtr
1508 *
1509 *********************************************************************/
FreeAlignNode(ValNodePtr list)1510 NLM_EXTERN ValNodePtr FreeAlignNode(ValNodePtr list)
1511 {
1512 AlignNodePtr anp;
1513 AlignSegPtr asp, aspnext;
1514 ValNodePtr next;
1515 AlignBlockPtr abp, abpnext;
1516 AnnotInfoPtr annot_info;
1517
1518 while (list != NULL)
1519 {
1520 next = list->next;
1521 if(list->choice == OBJ_SEQANNOT)
1522 {
1523 annot_info = list->data.ptrvalue;
1524 MemFree(annot_info);
1525 }
1526 else
1527 {
1528 anp = list->data.ptrvalue;
1529 if (anp != NULL)
1530 {
1531 asp = anp->segs;
1532 while(asp !=NULL)
1533 {
1534 aspnext = asp->next;
1535 asp->next = NULL;
1536 if(asp->cnp != NULL)
1537 FreeFeatureList(asp->cnp);
1538 if(asp->mismatch)
1539 ValNodeFree(asp->mismatch);
1540 MemFree(asp);
1541 asp = aspnext;
1542 }
1543 abp = anp->blocks;
1544 while(abp != NULL)
1545 {
1546 abpnext = abp->next;
1547 MemFree(abp);
1548 abp = abpnext;
1549 }
1550 if(anp->pop_sap !=NULL)
1551 SeqAnnotFree(anp->pop_sap);
1552 SeqIdFree(anp->sip);
1553 MemFree (anp->label);
1554 MemFree (anp->clone_id);
1555 MemFree(anp);
1556 }
1557 }
1558 MemFree (list);
1559
1560 list = next;
1561 }
1562 return NULL;
1563 }
1564
1565
1566
1567
1568 /***********************************************************************
1569 *
1570 * CollectSegmentSeq(bsp, slp, seqID, offset, head)
1571 * collect the segments in Bioseq
1572 * bsp: Bioseq
1573 * slp: the location on bsp to be collected
1574 * seqID: the order of bsp in the current list
1575 * offset: the offset to the graphic
1576 * head: the head of the previous list
1577 * return the head of new list
1578 * if bsp is a segmented sequence, the corresponding segments are recorded
1579 * in inp. Otherwise there is only one inp for slp.
1580 *
1581 ************************************************************************/
1582
1583
1584 typedef struct collectheader{ /*for collecting data of a sequence display*/
1585 CollectSeqOptionPtr csop; /*option for the sequences+features*/
1586 ValNodePtr features; /*a list of FeatNode for storing the feature data*/
1587 ValNodePtr prev_feat; /*previous node, for speed it up */
1588 CollectAlignOptionPtr caop; /*option for the alignment*/
1589 ValNodePtr aligns; /*a list of AlignNode for storing alignment data*/
1590 ValNodePtr prev_align; /*the previous node for alignment*/
1591 SeqLocPtr slp; /*target Seq-loc*/
1592 SeqIdPtr maybe_mapid; /*a possible mapid*/
1593
1594 ObjMgrPtr omp; /*for save some space in the collection*/
1595 Char thislabel[101];
1596 Char ftype[101];
1597 Uint2 subtype;
1598 Int2 filter_level;
1599 GeneDataPtr gdata;
1600 Uint2 priority;
1601 Boolean take_all_annot; /*take everything in a Seq-annot*/
1602 Boolean load_align;
1603 Boolean skip_feature;
1604 Uint1 index;
1605 Char annotDB[21];
1606 Boolean is_lod_score;
1607 }CollectHeader, PNTR CollectHeaderPtr;
1608
1609
1610
link_data_for_collect(ValNodePtr PNTR head,ValNodePtr PNTR prev,Pointer data,Uint1 type)1611 static void link_data_for_collect (ValNodePtr PNTR head, ValNodePtr PNTR prev, Pointer data, Uint1 type)
1612 {
1613 ValNodePtr curr;
1614
1615 curr = ValNodeNew(NULL);
1616 curr->choice = type;
1617 curr->data.ptrvalue = data;
1618
1619 if(*prev == NULL)
1620 *head = curr;
1621 else
1622 (*prev)->next = curr;
1623
1624 *prev = curr;
1625 }
1626
get_last_node(ValNodePtr head)1627 static ValNodePtr get_last_node (ValNodePtr head)
1628 {
1629 if(head == NULL)
1630 return NULL;
1631
1632 while(head->next != NULL)
1633 head = head->next;
1634 return head;
1635 }
1636
CreateFeatNode(ValNodePtr PNTR f_head,ValNodePtr PNTR prev,Uint2 itemType,Uint4 itemID,Uint2 entityID,Uint2 feattype)1637 static FeatNodePtr CreateFeatNode (ValNodePtr PNTR f_head, ValNodePtr PNTR prev, Uint2 itemType, Uint4 itemID, Uint2 entityID, Uint2 feattype)
1638 {
1639 FeatNodePtr fnp;
1640
1641 fnp = MemNew (sizeof (FeatNode));
1642 fnp->itemID = itemID;
1643 fnp->entityID = entityID;
1644 fnp->feattype = (Uint1)feattype;
1645 link_data_for_collect(f_head, prev, (Pointer)fnp, (Uint1)(itemType));
1646 return fnp;
1647 }
1648
collect_feature_label(Uint1 format)1649 static Boolean collect_feature_label(Uint1 format)
1650 {
1651 return (format <=OM_LABEL_SUMMARY);
1652 }
1653
collect_sequence_label(Uint1 format)1654 static Boolean collect_sequence_label(Uint1 format)
1655 {
1656 return (format >= PRINTID_FASTA_SHORT && format <=PRINTID_REPORT);
1657 }
1658
1659 /*#####################################################################
1660 #
1661 # functions related to the collection of the features of alignment
1662 #
1663 #####################################################################*/
1664
1665
1666 /****************************************************************
1667 *
1668 * satcollfunc()
1669 * callback function for collecting features on Sequence
1670 * alignment. It recalculates the feature intervals based on
1671 * the intervals in the aligned segments
1672 *
1673 ****************************************************************/
1674 typedef struct alignfeat
1675 {
1676 ObjMgrPtr omp;
1677 AlignNodePtr anp;
1678 CollectSeqOptionPtr csop;
1679 Int2 filter_level;
1680 Boolean all_feature;
1681 }AlignFeat, PNTR AlignFeatPtr;
1682
is_powerblast_feature(SeqAnnotPtr annot)1683 static Boolean is_powerblast_feature(SeqAnnotPtr annot)
1684 {
1685 ValNodePtr desc;
1686
1687 if(annot->type != 1)
1688 return FALSE;
1689 for(desc = annot->desc; desc != NULL; desc = desc->next)
1690 {
1691 if(desc->choice == Annot_descr_name)
1692 {
1693 if(StringICmp(desc->data.ptrvalue, "powblast") == 0)
1694 return TRUE;
1695 if(StringICmp(desc->data.ptrvalue, "powerblast") == 0)
1696 return TRUE;
1697 /*powerBlast feature*/
1698 if(StringNCmp(desc->data.ptrvalue, "PB:", 3) == 0)
1699 return TRUE;
1700 }
1701 }
1702 return FALSE;
1703 }
1704
1705
satcollfunc(GatherContextPtr gcp)1706 static Boolean satcollfunc(GatherContextPtr gcp)
1707 {
1708 SeqFeatPtr sfp;
1709 AlignFeatPtr afp;
1710 CollectSeqOptionPtr csop;
1711
1712 AlignNodePtr anp;
1713 Uint2 feat_subtype; /*types defined by objfdef.h*/
1714 SeqLocPtr slp = NULL;
1715 Char label[101];
1716 ObjMgrTypePtr omtp;
1717 IvalNodePtr new;
1718 FeatNodePtr fnp;
1719 AlignSegPtr asp;
1720 Int4 current_pos;
1721 Uint1 strand;
1722 Int4 seglen;
1723 SeqLocPtr head;
1724 Int2 label_size;
1725 Int4 left, right, e_left, e_right;
1726 Int4 i_left, i_right;
1727 Int4 ins_len, gap_len;
1728 GatherRangePtr grp;
1729 Int2 i;
1730 ValNodePtr prev;
1731
1732 afp= (AlignFeatPtr)(gcp->userdata);
1733 if(afp == NULL || afp->csop == NULL)
1734 return FALSE;
1735
1736 if(gcp->thistype == OBJ_SEQANNOT)
1737 {
1738 afp->all_feature = is_powerblast_feature((SeqAnnotPtr)(gcp->thisitem));
1739 return TRUE;
1740 }
1741 if(gcp->thistype != OBJ_SEQFEAT)
1742 return TRUE;
1743
1744 if(afp->filter_level == gcp->seglevel+1)
1745 return TRUE;
1746 csop = afp->csop;
1747 label_size = MIN(100, csop->label_size);
1748 if(csop->features == NULL && afp->all_feature == FALSE)
1749 return FALSE;
1750
1751 omtp=ObjMgrTypeFind(afp->omp, OBJ_SEQFEAT, NULL, NULL);
1752 if(omtp == NULL)
1753 return TRUE;
1754
1755 feat_subtype = 0;
1756 if(omtp->subtypefunc !=NULL)
1757 feat_subtype = (*(omtp->subtypefunc)) (gcp->thisitem);
1758 if((afp->all_feature == FALSE) &&
1759 (csop->features[feat_subtype] == FALSE)) /*do not collect the current feature*/
1760 return TRUE;
1761
1762
1763 anp = afp->anp;
1764 current_pos = anp->seqpos;
1765 if(anp->seqpos < 0)
1766 strand = Seq_strand_minus;
1767 else
1768 strand = Seq_strand_plus;
1769 sfp = gcp->thisitem;
1770 label[0] = '\0';
1771 if(collect_feature_label(csop->flabel_format[feat_subtype]))
1772 if(omtp->labelfunc !=NULL)
1773 (*(omtp->labelfunc))(sfp, label, label_size, csop->flabel_format[feat_subtype]);
1774
1775 /*map to the location of aligned segs*/
1776 if(gcp->product) /*for protein sequence alignment*/
1777 head = sfp->product;
1778 else
1779 head = sfp->location;
1780 left = anp->extremes.left;
1781 ins_len = 0;
1782 gap_len = 0;
1783 e_left = gcp->extremes.left;
1784 e_right = gcp->extremes.right;
1785 for(asp = anp->segs; asp !=NULL; asp = asp->next)
1786 {
1787 if(asp->type != GAP_SEG)
1788 {
1789 prev = get_last_node (asp->cnp);
1790 if(asp->type == INS_SEG)
1791 {
1792 seglen = asp->gr.right;
1793 /*ins_len += seglen;*/
1794 }
1795 else
1796 seglen = asp->gr.right - asp->gr.left +1;
1797 right = left + seglen -1;
1798 if(!(left > e_right || right < e_left))
1799 {
1800 fnp = CreateFeatNode (&(asp->cnp), &prev, OBJ_SEQFEAT, gcp->itemID, gcp->entityID, feat_subtype);
1801 fnp->extremes.left = MAX(left, e_left) + gap_len;
1802 fnp->extremes.right = MIN(right, e_right) + gap_len;
1803 fnp->extremes.left -=ins_len;
1804 fnp->extremes.right -= ins_len;
1805 fnp->extremes.strand = gcp->extremes.strand;
1806 if(label[0] != '\0')
1807 fnp->label = StringSave(label);
1808 grp = gcp->rdp;
1809 for(i=0; (grp!=NULL) && i<gcp->num_interval; ++i)
1810 {
1811 i_left = grp->left;
1812 i_right = grp->right;
1813 if(!(left > i_right || right < i_left))
1814 {
1815 new = MemNew(sizeof(IvalNode));
1816 new->gr.left = MAX(left, i_left) - ins_len + gap_len;
1817 new->gr.right = MIN(right, i_right) - ins_len + gap_len;
1818 new->gr.strand = grp->strand;
1819 ValNodeAddPointer(&(fnp->interval), 0, new);
1820 }
1821 ++grp;
1822 }
1823 }
1824 left = right +1;
1825 if(asp->type == INS_SEG)
1826 ins_len += seglen;
1827 }
1828 else
1829 gap_len += (asp->gr.right - asp->gr.left +1);
1830 }
1831 return TRUE;
1832 }
1833
1834
1835 /******************************************************************
1836 *
1837 * CollectFeatureForAlignNode(slp, anp, csop)
1838 * collect feature for the alignment
1839 * slp: the target Seq-loc
1840 * anp: the AlignNode belong to the target Seq-loc
1841 * csop: the option for gathering the features
1842 *
1843 ******************************************************************/
CollectFeatureForAlignNode(SeqLocPtr slp,AlignNodePtr anp,CollectSeqOptionPtr csop)1844 NLM_EXTERN Boolean CollectFeatureForAlignNode(SeqLocPtr slp, AlignNodePtr anp, CollectSeqOptionPtr csop)
1845 {
1846 GatherScope gs;
1847 AlignFeat af;
1848 BioseqPtr bsp;
1849
1850 if(slp == NULL || anp == NULL || csop == NULL)
1851 return FALSE;
1852
1853 if(anp->seq_entityID == 0)
1854 return FALSE;
1855 bsp = BioseqLockById(SeqLocId(slp));
1856
1857
1858 MemSet((Pointer)&gs, 0, sizeof (GatherScope));
1859 gs.get_feats_location = TRUE;
1860 gs.get_feats_product =( bsp->mol == Seq_mol_aa);
1861 MemSet((Pointer)(gs.ignore), (int)TRUE, (size_t)(OBJ_MAX)*sizeof(Boolean));
1862
1863 gs.ignore[OBJ_SEQANNOT] = FALSE;
1864 gs.ignore[OBJ_SEQFEAT] = FALSE;
1865
1866 gs.nointervals = FALSE; /*need to recalculate the intervals*/
1867 /* gs.seglevels = 1;
1868 gs.seglevels = 1;
1869 gs.stop_on_annot = TRUE;*/
1870 gs.ignore_top = FALSE;
1871 gs.currlevel = 0;
1872 gs.offset = anp->extremes.left;
1873 gs.target = slp;
1874
1875 af.anp = anp;
1876 af.csop = csop;
1877 af.omp = ObjMgrGet();
1878 af.filter_level = 0;
1879
1880 GatherEntity(anp->seq_entityID, (Pointer)(&af), satcollfunc, &gs);
1881 BioseqUnlock(bsp);
1882 return TRUE;
1883 }
1884
1885
1886 /******************************************************************
1887 *
1888 * CollectFeatureForAlign(slp, anp, featureOrder, groupOrder)
1889 * collect feature for the alignment
1890 * slp: the target Seq-loc
1891 * anp: the AlignNode belong to the target Seq-loc
1892 * featureOrder: the order of features
1893 * groupOrder: the order of the groups
1894 * it takes the anp->seq_entityID and searches for the features
1895 *
1896 ******************************************************************/
1897
CollectAlignFeature(SeqLocPtr slp,AlignNodePtr anp,Uint1Ptr featureOrder,Uint1Ptr groupOrder,Uint1Ptr flabel_format)1898 static Boolean CollectAlignFeature(SeqLocPtr slp, AlignNodePtr anp, Uint1Ptr featureOrder, Uint1Ptr groupOrder, Uint1Ptr flabel_format)
1899 {
1900 CollectSeqOption cs_option;
1901 Boolean show_feature, collect = FALSE;
1902 Int2 i;
1903 ValNode vn;
1904
1905 if(featureOrder == NULL || groupOrder == NULL || slp == NULL || anp == NULL)
1906 return FALSE;
1907
1908 cs_option.nointerval = FALSE;
1909 cs_option.slabel_format = PRINTID_TEXTID_ACCESSION;
1910 cs_option.seglevels = 0;
1911 cs_option.label_size = 10;
1912 for( i =0; i<FEATDEF_ANY; ++i) /*for checking the features to load*/
1913 {
1914 show_feature = (featureOrder[i] != 0);
1915 cs_option.features[i] = show_feature;
1916 if(show_feature)
1917 collect = TRUE;
1918 }
1919 if(collect)
1920 {
1921 if(flabel_format == NULL)
1922 MemSet((Pointer)(cs_option.flabel_format), OM_LABEL_CONTENT, (size_t)FEATDEF_ANY*sizeof(Uint1));
1923 else
1924 MemCopy(&(cs_option.flabel_format), &flabel_format, (size_t)FEATDEF_ANY*sizeof(Uint1));
1925 CollectFeatureForAlignNode(slp, anp, &cs_option);
1926 vn.choice = OBJ_SEQALIGN;
1927 vn.data.ptrvalue = anp;
1928 vn.next = NULL;
1929 SortAlignmentFeature(&vn, featureOrder, groupOrder);
1930 return TRUE;
1931 }
1932 else
1933 return FALSE;
1934 }
1935
CollectFeatureForAlign(SeqLocPtr slp,AlignNodePtr anp,Uint1Ptr featureOrder,Uint1Ptr groupOrder)1936 NLM_EXTERN Boolean CollectFeatureForAlign(SeqLocPtr slp, AlignNodePtr anp, Uint1Ptr featureOrder, Uint1Ptr groupOrder)
1937 {
1938 return CollectAlignFeature(slp, anp, featureOrder, groupOrder, NULL);
1939 }
1940
1941 /******************************************************************
1942 *
1943 * SortAlignmentFeature(anp_node, featureOrder, groupOrder)
1944 * sort the list of FeatNode in aligned segment (asp->cnp) to the
1945 * proper order of featureOrder and groupOrder
1946 *
1947 *******************************************************************/
SortAlignmentFeature(ValNodePtr anp_node,Uint1Ptr featureOrder,Uint1Ptr groupOrder)1948 NLM_EXTERN void SortAlignmentFeature(ValNodePtr anp_node, Uint1Ptr featureOrder, Uint1Ptr groupOrder)
1949 {
1950 AlignNodePtr anp;
1951 AlignSegPtr asp;
1952
1953 while(anp_node)
1954 {
1955 if(anp_node->choice != OBJ_SEQANNOT)
1956 {
1957 anp = anp_node->data.ptrvalue;
1958 for(asp = anp->segs; asp !=NULL; asp = asp->next)
1959 if(asp->cnp !=NULL)
1960 asp->cnp = SortFeatNode(asp->cnp, featureOrder, groupOrder);
1961 }
1962 anp_node = anp_node->next;
1963 }
1964 }
1965
1966
make_current_seqport(SeqLocPtr masterloc,Int4 offset,Uint1 code)1967 static SeqPortPtr make_current_seqport(SeqLocPtr masterloc, Int4 offset, Uint1 code)
1968 {
1969 SeqLocPtr slp;
1970 Int4 start, stop;
1971 Uint1 strand;
1972 SeqPortPtr spp;
1973
1974
1975 start = SeqLocStart(masterloc);
1976 stop = SeqLocStop(masterloc);
1977 strand = SeqLocStrand(masterloc);
1978
1979 if(strand == Seq_strand_minus)
1980 stop -= offset;
1981 else
1982 start += offset;
1983 slp = SeqLocIntNew(start, stop, strand, SeqLocId(masterloc));
1984 spp = SeqPortNewByLoc(slp, code);
1985 SeqLocFree(slp);
1986
1987 return spp;
1988 }
1989
1990
add_int_to_node(ValNodePtr PNTR head,ValNodePtr PNTR prev,Int4 val,Uint1 choice)1991 static void add_int_to_node (ValNodePtr PNTR head, ValNodePtr PNTR prev, Int4 val, Uint1 choice)
1992 {
1993 ValNodePtr curr;
1994
1995 curr = ValNodeNew(NULL);
1996 curr->choice = choice;
1997 curr->data.intvalue = val;
1998
1999 if(*prev == NULL)
2000 *head = curr;
2001 else
2002 (*prev)->next = curr;
2003 *prev = curr;
2004 }
2005
2006
CollectMismatchForAlign(AlignNodePtr anp,SeqLocPtr masterloc,BioseqPtr bsp,Int4 offset)2007 static Boolean CollectMismatchForAlign(AlignNodePtr anp, SeqLocPtr masterloc, BioseqPtr bsp, Int4 offset)
2008 {
2009
2010 AlignSegPtr asp;
2011 SeqPortPtr spp, mspp;
2012 Uint1 code;
2013 Uint1 res, mres;
2014 Int4 start = 0, stop = 0;
2015 Uint1 strand;
2016 Int4 current_pos;
2017 Int4 seglen, j;
2018 ValNodePtr prev;
2019
2020
2021
2022 if(anp->is_master || SeqIdForSameBioseq(SeqLocId(masterloc), anp->sip))
2023 return FALSE;
2024 if(bsp->mol == Seq_mol_aa)
2025 code = Seq_code_ncbieaa;
2026 else
2027 code = Seq_code_iupacna;
2028 /* mspp = SeqPortNewByLoc(masterloc, code); */
2029
2030 current_pos = anp->seqpos;
2031 if(anp->seqpos < 0)
2032 {
2033 strand = Seq_strand_minus;
2034 if(ABS(anp->seqpos) < bsp->length-1)
2035 anp->extremes.l_trunc = TRUE;
2036 }
2037 else
2038 {
2039 strand = Seq_strand_plus;
2040 if(anp->seqpos > 0)
2041 anp->extremes.l_trunc = TRUE;
2042 }
2043 for(asp = anp->segs; asp !=NULL; asp = asp->next)
2044 {
2045 if(asp->type == INS_SEG)
2046 seglen = asp->gr.right;
2047 else
2048 seglen = asp->gr.right - asp->gr.left + 1;
2049 switch(asp->type)
2050 {
2051 case GAP_SEG:
2052 break;
2053
2054 case INS_SEG:
2055 current_pos += seglen;
2056 break;
2057
2058 case REG_SEG:
2059 case DIAG_SEG:
2060 prev = get_last_node (asp->mismatch);
2061 if(strand == Seq_strand_minus)
2062 {
2063 stop = - current_pos;
2064 start = stop - (seglen-1);
2065 }
2066 else
2067 {
2068 start = current_pos;
2069 stop = start + (seglen -1);
2070 }
2071 spp = SeqPortNew(bsp, start, stop, strand, code);
2072
2073 mspp = make_current_seqport(masterloc, (asp->gr.left - offset), code);
2074 /* SeqPortSeek(mspp, (asp->gr.left - offset), SEEK_SET); */
2075
2076 for(j =0; j<seglen; ++j)
2077 {
2078 res = SeqPortGetResidue(spp);
2079 mres = SeqPortGetResidue(mspp);
2080 while(res == SEQPORT_EOS || res == SEQPORT_VIRT)
2081 res = SeqPortGetResidue(spp);
2082 while(mres == SEQPORT_EOS || mres == SEQPORT_VIRT)
2083 mres = SeqPortGetResidue(mspp);
2084 if(IS_ALPHA(res) && IS_ALPHA(mres))
2085 {
2086 if(res != mres)
2087 {
2088 if(bsp->mol != Seq_mol_aa && !StrChr("acgtACGT", res))
2089 add_int_to_node (&(asp->mismatch), &prev, (j+asp->gr.left), MISMATCH_AMB);
2090 else
2091 add_int_to_node (&(asp->mismatch), &prev, (j+asp->gr.left), MISMATCH_LINE);
2092 /* ValNodeAddInt(&(asp->mismatch), 0, (j+asp->gr.left)); */
2093 }
2094 }
2095 else if(res == SEQPORT_EOF || mres == SEQPORT_EOF)
2096 break;
2097
2098 }
2099 current_pos += seglen;
2100
2101 SeqPortFree(spp);
2102 SeqPortFree(mspp);
2103 break;
2104
2105 default:
2106 break;
2107 }
2108 /*current_pos += seglen;*/
2109 }
2110 /* SeqPortFree(mspp); */
2111 if(strand == Seq_strand_minus)
2112 {
2113 if(start > 0)
2114 anp->extremes.r_trunc = TRUE;
2115 }
2116 else
2117 {
2118 if(stop < bsp->length-1)
2119 anp->extremes.r_trunc = TRUE;
2120 }
2121
2122 return TRUE;
2123 }
2124
2125
make_one_block(SeqRangePtr srp,Int4 seq_start,Int4 seq_stop,Int4 left,Int4 right,Int2 order,AlignNodePtr anp)2126 static AlignBlockPtr make_one_block(SeqRangePtr srp, Int4 seq_start, Int4 seq_stop, Int4 left, Int4 right, Int2 order, AlignNodePtr anp)
2127 {
2128 AlignBlockPtr abp;
2129 Int4 off_left, off_right;
2130
2131 if(srp == NULL || anp == NULL)
2132 return NULL;
2133 if(srp->start > seq_stop || srp->stop < seq_start)
2134 return NULL;
2135
2136 if(srp->strand == Seq_strand_minus)
2137 {
2138 off_left = MAX(0, (srp->stop - seq_stop));
2139 off_right = MAX(0, (seq_start - srp->start));
2140 }
2141 else
2142 {
2143 off_left = MAX(0, (seq_start - srp->start));
2144 off_right = MAX(0, (srp->stop - seq_stop));
2145 }
2146
2147 abp = MemNew(sizeof(AlignBlock));
2148 abp->gr.left = left + off_left;
2149 abp->gr.right = right - off_right;
2150 abp->gr.strand = 0;
2151 if(abp->gr.left == anp->extremes.left&& anp->extremes.strand == Seq_strand_minus)
2152 abp->gr.strand = Seq_strand_minus;
2153 if(abp->gr.right == anp->extremes.right && anp->extremes.strand == Seq_strand_plus)
2154 abp->gr.strand = Seq_strand_plus;
2155 abp->order = order;
2156 return abp;
2157 }
2158
2159
link_align_blocks(AlignBlockPtr PNTR head,AlignBlockPtr new)2160 static AlignBlockPtr link_align_blocks(AlignBlockPtr PNTR head, AlignBlockPtr new)
2161 {
2162 AlignBlockPtr curr;
2163
2164 if(*head == NULL)
2165 *head = new;
2166 else
2167 {
2168 curr = *head;
2169 while(curr->next != NULL)
2170 curr = curr->next;
2171 curr->next = new;
2172 }
2173 return new;
2174 }
2175
2176
make_blocks(AlignDataPtr adp,Int4 seq_start,Int4 seq_stop,Int2 order,AlignNodePtr anp)2177 static Boolean make_blocks(AlignDataPtr adp, Int4 seq_start, Int4 seq_stop, Int2 order, AlignNodePtr anp)
2178 {
2179 AlignBlockPtr abp = NULL;
2180 AlignRangePtr arp;
2181
2182
2183 if(adp == NULL || anp == NULL)
2184 return FALSE;
2185 if(adp->arp == NULL)
2186 {
2187 abp = make_one_block(&(adp->seqends), seq_start, seq_stop, anp->extremes.left, anp->extremes.right, order, anp);
2188 if(abp != NULL)
2189 link_align_blocks(&(anp->blocks), abp);
2190 }
2191 else
2192 {
2193 for(arp = adp->arp; arp != NULL; arp = arp->next)
2194 {
2195 if(arp->segtype == REG_SEG)
2196 {
2197 abp = make_one_block(&(arp->sr), seq_start, seq_stop, arp->gr.left, arp->gr.right, order, anp);
2198 if(abp != NULL)
2199 {
2200 link_align_blocks(&(anp->blocks), abp);
2201 break;
2202 }
2203 }
2204 }
2205 }
2206 return (abp != NULL);
2207 }
2208
2209
sequence_has_alignment(ValNodePtr align_id_list,SeqIdPtr sip)2210 static Boolean sequence_has_alignment(ValNodePtr align_id_list, SeqIdPtr sip)
2211 {
2212 Uint1 kludge_factor;
2213 Int4 gi;
2214
2215
2216 kludge_factor = (Uint1)get_kludge_factor(sip, &gi);
2217 if(gi == -1)
2218 return FALSE;
2219
2220 while(align_id_list)
2221 {
2222 if(align_id_list->choice == kludge_factor)
2223 {
2224 if(align_id_list->data.intvalue == gi)
2225 return TRUE;
2226 }
2227
2228 align_id_list = align_id_list->next;
2229 }
2230
2231 return FALSE;
2232 }
2233
2234
2235
add_sequence_alignment_info(ValNodePtr align_id_list,ValNodePtr anp_list)2236 static Boolean add_sequence_alignment_info(ValNodePtr align_id_list, ValNodePtr anp_list)
2237 {
2238
2239 AlignNodePtr anp;
2240
2241 if(align_id_list == NULL || anp_list == NULL)
2242 return FALSE;
2243
2244 while(anp_list)
2245 {
2246 if(anp_list->choice != OBJ_SEQANNOT)
2247 {
2248 anp = anp_list->data.ptrvalue;
2249 if(anp->seq_has_align == FALSE)
2250 anp->seq_has_align = sequence_has_alignment(align_id_list, anp->sip);
2251 }
2252 anp_list = anp_list->next;
2253 }
2254
2255 return TRUE;
2256 }
2257
2258
2259 typedef struct temp_bsp_data{
2260 BioseqPtr bsp;
2261 Uint4 itemID;
2262 Boolean found;
2263 }TempBsp, PNTR TempBspPtr;
2264
bspcountfunc(GatherContextPtr gcp)2265 static Boolean bspcountfunc(GatherContextPtr gcp)
2266 {
2267 TempBspPtr tbp;
2268 BioseqPtr bsp;
2269
2270 if(gcp == NULL)
2271 return FALSE;
2272 tbp = (TempBspPtr)(gcp->userdata);
2273 if(tbp == NULL || tbp->bsp == NULL)
2274 return FALSE;
2275 if(tbp->found)
2276 return FALSE;
2277 bsp = (BioseqPtr)(gcp->thisitem);
2278 if(tbp->bsp == bsp)
2279 {
2280 tbp->itemID= gcp->itemID;
2281 tbp->found = TRUE;
2282 return FALSE;
2283 }
2284 else
2285 return TRUE;
2286 }
2287
2288 /*****************************************************************
2289 *
2290 * given the bioseq and its entityID, figure out the
2291 * itemID for the Bioseq
2292 *
2293 *****************************************************************/
get_bioseq_itemID(BioseqPtr bsp,Uint2 entityID)2294 NLM_EXTERN Uint4 get_bioseq_itemID(BioseqPtr bsp, Uint2 entityID)
2295 {
2296 GatherScope gs;
2297 TempBsp tb;
2298
2299
2300 if(bsp == NULL || entityID == 0)
2301 return 0;
2302
2303 tb.bsp = bsp;
2304 tb.itemID= 0;
2305 tb.found = FALSE;
2306
2307 MemSet((Pointer)(&gs), 0, sizeof(GatherScope));
2308 MemSet((Pointer)(gs.ignore), (int)(TRUE), (size_t)OBJ_MAX * sizeof(Boolean));
2309 gs.ignore[OBJ_BIOSEQ] = FALSE;
2310 GatherEntity(entityID, &tb, bspcountfunc, &gs);
2311
2312 return tb.itemID;
2313 }
2314
2315
stop_collecting_alignment(ValNodePtr anp_list,Int4 max_num)2316 static Boolean stop_collecting_alignment(ValNodePtr anp_list, Int4 max_num)
2317 {
2318 Int2 i;
2319
2320 i = 0 ;
2321 while(anp_list)
2322 {
2323 ++i;
2324 if(i > max_num)
2325 {
2326 if(anp_list->next == NULL)
2327 return TRUE;
2328 }
2329 anp_list = anp_list->next;
2330 }
2331
2332 return FALSE;
2333 }
2334
FindCloneCallback(SeqEntryPtr sep,Pointer data,Int4 index,Int2 indent)2335 static void FindCloneCallback(SeqEntryPtr sep, Pointer data, Int4 index, Int2 indent)
2336 {
2337 AlignNodePtr anp;
2338 BioseqPtr bsp;
2339 BioseqSetPtr bssp;
2340 ValNodePtr descr;
2341 ValNodePtr curr;
2342 OrgRefPtr orp;
2343 ValNodePtr mod;
2344 CharPtr str;
2345
2346 BioSourcePtr source;
2347 SubSourcePtr ssp;
2348
2349 anp = (AlignNodePtr)data;
2350 if(anp->clone_id != NULL)
2351 return;
2352
2353
2354 if(sep->choice == 1)
2355 {
2356 bsp = (BioseqPtr)(sep->data.ptrvalue);
2357 descr = bsp->descr;
2358 }
2359 else
2360 {
2361 bssp = (BioseqSetPtr)(sep->data.ptrvalue);
2362 descr = bssp->descr;
2363 }
2364
2365 for(curr = descr; curr != NULL; curr = curr->next)
2366 {
2367 if(curr->choice == Seq_descr_source)
2368 {
2369 source = curr->data.ptrvalue;
2370 /* search for /chromosome= */
2371 for(ssp = source->subtype; ssp != NULL; ssp = ssp->next)
2372 {
2373 if(ssp->subtype == 3 && ssp->name != NULL)
2374 { /* 3 == clone */
2375 anp->clone_id = StringSave(ssp->name);
2376 return;
2377 }
2378 }
2379 }
2380 else if(curr->choice == Seq_descr_org)
2381 {
2382 orp = curr->data.ptrvalue;
2383 if(orp)
2384 {
2385 for(mod = orp->mod; mod != NULL; mod = mod->next)
2386 {
2387 str = mod->data.ptrvalue;
2388 if(StringNCmp(str, "clone=", 6) == 0)
2389 {
2390 anp->clone_id = StringSave(str+6);
2391 }
2392 }
2393 }
2394 }
2395 }
2396 }
2397
LoadIndexLabelBlock(AlignNodePtr anp)2398 static Boolean LoadIndexLabelBlock(AlignNodePtr anp)
2399
2400 {
2401 AlignBlockPtr abp;
2402 SeqIdPtr sip;
2403 ObjectIdPtr oip;
2404 DbtagPtr db_tag;
2405
2406 sip = anp->sip;
2407 if(sip == NULL || sip->choice != SEQID_GENERAL)
2408 return FALSE;
2409
2410 db_tag = sip->data.ptrvalue;
2411 if(db_tag == NULL || db_tag->db == NULL)
2412 return FALSE;
2413 oip = db_tag->tag;
2414 if(oip== NULL || oip->id <= 0)
2415 return FALSE;
2416
2417
2418 abp = MemNew(sizeof(AlignBlock));
2419 MemCopy((Pointer)(&(abp->gr)), (Pointer)(&(anp->extremes)), sizeof(GatherRange));
2420 abp->order = (Uint2)oip->id;
2421 anp->blocks = abp;
2422
2423 anp->label = StringSave(db_tag->db);
2424 return TRUE;
2425 }
2426
2427
2428 /***********************************************************************
2429 *
2430 * coll_align_data(align, m_sip, adp, clone, featureOrder, show_mismatch,
2431 * itemID, entityID, anp_list)
2432 * convert all the alignment data stored in adp into the drawing
2433 * structure AlignNode
2434 *
2435 * align: the current Seq-align
2436 * m_sip: the master sequence, also the target sequence in gather
2437 * adp: the collected structure from gather
2438 * clone: for filtering out unwanted clone type. set to NULL for all
2439 * featureOrder: for features to be displayed together with alignment
2440 * show_mismatch: show the mismatched base-pairs
2441 * itemID: itemID for the current align
2442 * entityID: entityID for the Seq-entry of m_sip
2443 *
2444 * NOTE: if either show_mismatch or featureOrder is selected, it puts
2445 * the newly retrieved sequence for itemID and entityID
2446 *
2447 * anp_list: the list of AlignNodePtr to stored the coverted result
2448 *
2449 ************************************************************************/
2450
coll_align_data(SeqAlignPtr align,Uint1 index,AlignDataPtr adp,CollectAlignOptionPtr caop,Uint4 itemID,Int2 entityID,Int2 itemType,SeqLocPtr mloc,ValNodePtr PNTR anp_list,ValNodePtr PNTR prev)2451 static Boolean coll_align_data(SeqAlignPtr align, Uint1 index, AlignDataPtr adp, CollectAlignOptionPtr caop, Uint4 itemID, Int2 entityID, Int2 itemType, SeqLocPtr mloc, ValNodePtr PNTR anp_list, ValNodePtr PNTR prev)
2452 {
2453 Char label[41];
2454
2455 SeqLocPtr slp, extloc;
2456 SeqIdPtr sip;
2457 Boolean feat; /*collect any features?*/
2458 Boolean show_mismatch;
2459 Boolean is_master;
2460
2461 AlignRangePtr arp;
2462 AlignNodePtr anp;
2463 AlignSegPtr asp, pasp;
2464
2465 BioseqPtr bsp;
2466 Uint2 order;
2467 Int4 e_left = 0, e_right = 0;
2468 Boolean match_seg;
2469 Int2 label_size;
2470 Int4 offset = 0;
2471 SeqIdPtr best_id;
2472 SeqEntryPtr sep;
2473
2474 #ifdef NONO
2475 if(align->segtype == 5) /* Discontinuous aligment not collected */
2476 return TRUE;
2477 #endif
2478 label_size = MIN(caop->label_size, 100);
2479 feat = caop->show_feature;
2480 if(align->segtype == 3) /*for std-seg, no feature or mismatch*/
2481 {
2482 show_mismatch = FALSE;
2483 feat = FALSE;
2484 }
2485 else
2486 show_mismatch = caop->show_mismatch;
2487
2488 if(align->segtype == 2) /*for Dense-seg or Dense-diag only*/
2489 {
2490 if(caop->align_num != -1)
2491 {
2492 /* if(stop_collecting_alignment(*anp_list, caop->align_num)) */
2493 if(caop->curr_align_num > caop->align_num)
2494 {
2495 /* ErrPostEx (SEV_WARNING, 0, 0, "The top %ld alignments are displayed. The rest are truncated", caop->align_num);
2496 return FALSE; */
2497 feat = FALSE;
2498 show_mismatch = FALSE;
2499 }
2500 }
2501 }
2502
2503 while(adp)
2504 {
2505 ++(caop->curr_align_num);
2506 anp = MemNew(sizeof (AlignNode));
2507 anp->pop_sap = NULL;
2508 anp->itemID = itemID;
2509 anp->entityID = entityID;
2510 anp->seqOrder = adp->order;
2511 anp->chain = adp->chain;
2512 anp->seq_has_align = FALSE;
2513 anp->index = index;
2514 anp->keep_label = FALSE;
2515 MemCopy(&(anp->extremes), &(adp->extremes), sizeof(GatherRange));
2516
2517 if(adp->seqends.strand == Seq_strand_minus)
2518 anp->seqpos = -(adp->seqends.stop);
2519 else
2520 anp->seqpos = adp->seqends.start;
2521
2522 pasp = NULL;
2523 for(arp = adp->arp; arp !=NULL; arp = arp->next)
2524 {
2525 asp = MemNew(sizeof(AlignSeg));
2526 MemCopy(&(asp->gr), &(arp->gr), sizeof(GatherRange));
2527 asp->type = arp->segtype;
2528 if(asp->type == INS_SEG)
2529 asp->ins_pos = asp->gr.left;
2530 if(pasp == NULL)
2531 anp->segs = asp;
2532 else
2533 pasp->next = asp;
2534 pasp = asp;
2535 }
2536
2537 if(index == ALIGN_NON_INDEX && (feat|| show_mismatch))
2538 {
2539 bsp = BioseqLockById(adp->sip);
2540 if(bsp != NULL)
2541 {
2542 if(adp->sip->choice == SEQID_GI)
2543 {
2544 sep = SeqEntryFind(adp->sip);
2545 if(sep != NULL)
2546 SeqEntryExplore(sep, (Pointer)anp, FindCloneCallback);
2547 }
2548 if(bsp->hist && bsp->hist->assembly)
2549 anp->seq_has_align = TRUE;
2550 anp->seq_entityID = ObjMgrGetEntityIDForPointer((Pointer)bsp);
2551 anp->bsp_itemID = get_bioseq_itemID(bsp, anp->seq_entityID);
2552 best_id = SeqIdFindBest(bsp->id, SEQID_GI);
2553 if(best_id == NULL)
2554 best_id = bsp->id;
2555 anp->sip = SeqIdDup(best_id);
2556
2557 if(feat)
2558 {
2559 if(BioseqHasFeature(bsp))
2560 caop->csop->seglevels = 0;
2561 else
2562 caop->csop->seglevels = 1;
2563 slp = SeqLocIntNew(adp->seqends.start, adp->seqends.stop, adp->seqends.strand, best_id);
2564 CollectFeatureForAlignNode(slp, anp, caop->csop);
2565 SeqLocFree(slp);
2566 }
2567 if(show_mismatch && bsp->repr != Seq_repr_map)
2568 CollectMismatchForAlign(anp, mloc, bsp, offset+caop->graphic_offset);
2569
2570 BioseqUnlock(bsp);
2571 }
2572 /* else
2573 printf("fail to get sequence for %ld\n", adp->sip->data.intvalue); */
2574 }
2575 if(anp->sip == NULL)
2576 anp->sip = SeqIdDup(adp->sip);
2577
2578
2579
2580 /*collecting matching piece to show the content of a segmented sequence*/
2581 if(caop->segloc != NULL && index == ALIGN_NON_INDEX )
2582 {
2583 is_master = SeqIdForSameBioseq(adp->sip, SeqLocId(mloc));
2584 if(is_master)
2585 {
2586 e_left = 0;
2587 e_right = -1;
2588 }
2589 order = 0;
2590 for(extloc = caop->segloc; extloc != NULL; extloc = extloc->next)
2591 {
2592 ++order;
2593 match_seg = FALSE;
2594 if(is_master)
2595 {
2596 e_right += SeqLocLen(extloc);
2597 match_seg = TRUE;
2598 }
2599 else
2600 {
2601 sip = SeqLocId(extloc);
2602 match_seg = SeqIdForSameBioseq(sip, anp->sip);
2603
2604 }
2605 if(match_seg)
2606 {
2607 if(!is_master)
2608 {
2609 e_left = SeqLocStart(extloc);
2610 e_right = SeqLocStop(extloc);
2611 }
2612 match_seg = make_blocks(adp, e_left, e_right, order, anp);
2613 }
2614 if(is_master)
2615 e_left = e_right +1;
2616 if(match_seg)
2617 if(e_right > adp->seqends.stop)
2618 break;
2619 }
2620 }
2621
2622 /*store the index information in the blocks*/
2623 if(index != ALIGN_NON_INDEX)
2624 LoadIndexLabelBlock(anp);
2625 else if(label_size > 0)
2626 {
2627 if(MuskSeqIdWrite (anp->sip, label, label_size, caop->slabel_format, TRUE, TRUE)) {
2628 SeqIdPtr gilist = GetUseThisGi(align);
2629 if (gilist) {
2630 Char buf[1024];
2631 sprintf(buf, "%d", gilist->data.intvalue);
2632 anp->label = StringSave(buf);
2633 anp->keep_label = TRUE;
2634 gilist = SeqIdSetFree(gilist);
2635 } else {
2636 anp->label = StringSave(label);
2637 }
2638 }
2639 }
2640
2641 link_data_for_collect (anp_list, prev, (Pointer)anp, (Uint1)itemType);
2642 adp = adp->next;
2643 }
2644 return TRUE;
2645 }
2646
does_annot_match_target(SeqLocPtr target,SeqAnnotPtr annot)2647 static Boolean does_annot_match_target (SeqLocPtr target, SeqAnnotPtr annot)
2648 {
2649 SeqAlignPtr sap;
2650 SeqIdPtr sip;
2651 DenseDiagPtr ddp;
2652 DenseSegPtr dsp;
2653 StdSegPtr ssp;
2654 SeqIdPtr target_id;
2655 SeqLocPtr slp;
2656 Boolean result;
2657
2658 if(target == NULL || annot == NULL || annot->type != 2)
2659 return FALSE;
2660 target_id = SeqLocId(target);
2661 sap = annot->data;
2662
2663 if(sap == NULL)
2664 return FALSE;
2665
2666 switch(sap->segtype) {
2667 case 1:
2668 ddp = sap->segs;
2669 for(sip = ddp->id; sip != NULL; sip = sip->next)
2670 if(SeqIdForSameBioseq(sip, target_id))
2671 return TRUE;
2672 break;
2673 case 2:
2674 dsp = sap->segs;
2675 for(sip = dsp->ids; sip != NULL; sip = sip->next)
2676 if(SeqIdForSameBioseq(sip, target_id))
2677 return TRUE;
2678 break;
2679 case 3:
2680 ssp = sap->segs;
2681 for(slp = ssp->loc; slp != NULL; slp = slp->next)
2682 if(SeqIdForSameBioseq(SeqLocId(slp), target_id))
2683 return TRUE;
2684 break;
2685 case 5:
2686
2687 annot->data = (SeqAlignPtr) sap->segs;
2688 result = does_annot_match_target (target, annot);
2689 annot->data = sap;
2690 return result;
2691
2692 default:
2693 break;
2694 }
2695
2696 return FALSE;
2697 }
2698
collalignfunc(GatherContextPtr gcp)2699 static Boolean collalignfunc(GatherContextPtr gcp)
2700 {
2701 SeqAnnotPtr annot;
2702 CollectHeaderPtr chp;
2703 AnnotInfoPtr info;
2704 SeqAlignPtr align;
2705 Uint1 annot_type;
2706
2707
2708 chp= (CollectHeaderPtr)(gcp->userdata);
2709
2710 switch(gcp->thistype)
2711 {
2712 case OBJ_SEQANNOT:
2713 annot = (SeqAnnotPtr)(gcp->thisitem);
2714 if(annot->type == 2)
2715 {
2716 chp->caop->curr_align_num = 0;
2717 chp->load_align = TRUE;
2718 chp->index = 0;
2719 if(!chp->take_all_annot)
2720 {
2721 if(!is_annot_for_hist_alignment(annot))
2722 {
2723 chp->load_align = FALSE;
2724 return TRUE;
2725 }
2726 }
2727 info = MemNew(sizeof(AnnotInfo));
2728 info->annotDB[0] = '\0';
2729 info->displayOrder = get_align_annot_qual(annot, info->annotDB, 20, &annot_type);
2730 info->annot_type = annot_type;
2731 if(annot_type == ANNOT_BLAST)
2732 info->blast_type = info->displayOrder;
2733 /*load the index values*/
2734 if(info->annotDB[0] != '\0')
2735 {
2736 if(StringCmp(info->annotDB, "Sequencing Status") == 0)
2737 chp->index = ALIGN_SEQ_INDEX;
2738 else if(StringCmp(info->annotDB, "Mapping Status") == 0)
2739 chp->index = ALING_MAP_INDEX;
2740 }
2741
2742 /* Eric Green's un-aligned guys */
2743 if(annot_type == ANNOT_CONSIST)
2744 {
2745 info->consistent = info->displayOrder;
2746 if(info->consistent == ALIGN_UNKNOWN)
2747 { /*un-aligned guys, check if the Seq-loc matches */
2748 if(!does_annot_match_target (chp->slp, annot))
2749 info = MemFree(info);
2750 chp->load_align = FALSE;
2751 }
2752 }
2753 else if(annot_type == ANNOT_FISH)
2754 info->is_fish_align= TRUE;
2755 if(info != NULL)
2756 {
2757 info->entityID = gcp->entityID;
2758 info->itemID = gcp->itemID;
2759 link_data_for_collect (&(chp->aligns), &(chp->prev_align), (Pointer)info, (Uint1)(gcp->thistype));
2760 }
2761 }
2762 return TRUE;
2763
2764 case OBJ_SEQALIGN:
2765 align = (SeqAlignPtr)(gcp->thisitem);
2766 if(chp->load_align)
2767 return coll_align_data(align, chp->index, gcp->adp, chp->caop, gcp->itemID, gcp->entityID, gcp->thistype, chp->slp, &(chp->aligns), &(chp->prev_align));
2768 else
2769 return TRUE;
2770 case OBJ_SEQHIST_ALIGN:
2771 align = (SeqAlignPtr)(gcp->thisitem);
2772 return coll_align_data(align, chp->index, gcp->adp, chp->caop, gcp->itemID, gcp->entityID, gcp->thistype, chp->slp, &(chp->aligns), &(chp->prev_align));
2773 case OBJ_SEQHIST:
2774 chp->caop->curr_align_num = 0;
2775 return TRUE;
2776 default:
2777 return TRUE;
2778 }
2779 }
2780
2781
2782 /*********************************************************************
2783 *
2784 * CollectItemForAlignment(slp, entityID, left, caop)
2785 * return a list of AlignNode for the alignment in the target seqloc
2786 * slp: the target Seq-loc
2787 * entityID: the entity source for collection
2788 * left: the left offset on the graphic
2789 * caop: the option for alignment collection
2790 *
2791 **********************************************************************/
CollectItemForAlignment(SeqLocPtr slp,Uint2 entityID,Int4 left,CollectAlignOptionPtr caop,Boolean take_all_annot)2792 NLM_EXTERN ValNodePtr CollectItemForAlignment(SeqLocPtr slp, Uint2 entityID, Int4 left, CollectAlignOptionPtr caop, Boolean take_all_annot)
2793 {
2794 GatherScope gs;
2795 CollectHeader ch;
2796 BioseqPtr mbsp;
2797 SeqIdPtr sip;
2798 ValNodePtr align_id_list = NULL;
2799 SeqLocPtr curr, next;
2800
2801 if(slp == NULL || entityID == 0 || caop == NULL)
2802 return NULL;
2803
2804 sip = SeqLocId(slp);
2805 ch.aligns = NULL;
2806 ch.caop = caop;
2807 ch.take_all_annot = take_all_annot;
2808 ch.load_align = TRUE;
2809 ch.prev_feat = NULL;
2810 ch.prev_align = NULL;
2811 ch.index = 0;
2812 /*ch.slp = slp;*/
2813
2814
2815 MemSet((Pointer)&gs, 0, sizeof (GatherScope));
2816 MemSet((Pointer)(gs.ignore), (int)TRUE, (size_t)(OBJ_MAX)*sizeof(Boolean));
2817
2818 if(caop->only_history == FALSE)
2819 {
2820 gs.ignore[OBJ_SEQANNOT] = FALSE;
2821 gs.ignore[OBJ_SEQALIGN] = FALSE;
2822 }
2823 gs.ignore[OBJ_SEQHIST] = FALSE;
2824 gs.ignore[OBJ_SEQHIST_ALIGN] = FALSE;
2825
2826
2827 gs.nointervals = caop->nointerval;
2828 gs.seglevels = 0;
2829 gs.currlevel = 0;
2830 gs.split_packed_pnt = FALSE;
2831 gs.mapinsert = caop->map_insert;
2832
2833
2834 curr = slp;
2835 while(curr)
2836 {
2837 next = curr->next;
2838 curr->next = NULL;
2839 gs.offset = left;
2840 gs.target = curr;
2841 ch.slp = curr;
2842 caop->graphic_offset = left;
2843 GatherEntity(entityID, (Pointer)(&ch), collalignfunc, &gs);
2844 left += SeqLocLen(curr);
2845 curr->next = next;
2846 curr = next;
2847 }
2848
2849 if(ch.aligns != NULL)
2850 {
2851 mbsp = BioseqLockById(sip);
2852 align_id_list = get_seqids_with_alignment(mbsp);
2853 if(align_id_list != NULL)
2854 {
2855 add_sequence_alignment_info(align_id_list, ch.aligns);
2856 ValNodeFree(align_id_list);
2857 }
2858 BioseqUnlock(mbsp);
2859 }
2860
2861
2862 return ch.aligns;
2863 }
2864
2865
merge_master_head(ValNodePtr head,ValNodePtr new_node)2866 static void merge_master_head(ValNodePtr head, ValNodePtr new_node)
2867 {
2868 AlignNodePtr anp_head, anp;
2869 AlignSegPtr asp;
2870 AlignBlockPtr block;
2871
2872 if(head == NULL || new_node == NULL)
2873 return;
2874 anp_head = head->data.ptrvalue;
2875 anp = new_node->data.ptrvalue;
2876
2877 if(anp_head == NULL || anp == NULL)
2878 return;
2879
2880 anp_head->extremes.right = anp->extremes.right;
2881 asp = anp_head->segs;
2882 if(asp == NULL)
2883 anp_head->segs = anp->segs;
2884 else
2885 {
2886 while(asp->next != NULL)
2887 asp = asp->next;
2888 asp->next = anp->segs;
2889 }
2890 anp->segs = NULL;
2891
2892 block = anp_head->blocks;
2893 if(block == NULL)
2894 anp_head->blocks = anp->blocks;
2895 else
2896 {
2897 while(block->next != NULL)
2898 block = block->next;
2899 if(anp->blocks != NULL)
2900 block->gr.strand = 0;
2901 block->next = anp->blocks;
2902 }
2903 anp->blocks = NULL;
2904
2905 FreeAlignNode(new_node);
2906 }
2907
2908
2909 /*****************************************************************************
2910 *
2911 * cllect_master_align_node(m_loc, featureOrder, groupOrder)
2912 * in the master-slave alignment, a fake Seq-align is created for the
2913 * master sequence where the master is aligned to itself. The AlignNode
2914 * can be computed for this faked alignment. When this is done, the fake
2915 * Seq-align will be freed
2916 *
2917 * m_loc: the Seq-loc for the master sequence
2918 * featureOrder: the selected features
2919 *
2920 *******************************************************************************/
collect_master_align_node(CollectAlignOptionPtr caop,SeqLocPtr m_loc,Uint1 obj_type,Uint2 entityID)2921 NLM_EXTERN ValNodePtr collect_master_align_node(CollectAlignOptionPtr caop, SeqLocPtr m_loc, Uint1 obj_type, Uint2 entityID)
2922 {
2923 SeqAlignPtr align;
2924 DenseSegPtr dsp;
2925 SeqIdPtr m_sip;
2926 ValNodePtr anp_node, anp_head = NULL, curr;
2927 ValNodePtr prev = NULL;
2928 AlignNodePtr anp;
2929 AlignDataPtr adp;
2930 Int4 left =0;
2931 Boolean show_mismatch;
2932
2933 if(caop == NULL || m_loc == NULL)
2934 return NULL;
2935
2936 show_mismatch = caop->show_mismatch;
2937 caop->show_mismatch = FALSE;
2938 while(m_loc)
2939 {
2940 m_sip = SeqLocId(m_loc);
2941
2942 dsp = DenseSegNew();
2943 dsp->dim = 2;
2944 dsp->numseg =1;
2945 dsp->strands = MemNew((size_t)2*sizeof(Uint1));
2946 dsp->strands[0] = Seq_strand_plus;
2947 dsp->strands[1] = SeqLocStrand(m_loc);
2948 dsp->ids = SeqIdDup(m_sip);
2949 dsp->ids->next = SeqIdDup(m_sip);
2950 dsp->starts = MemNew((size_t)2*sizeof(Int4));
2951 dsp->starts[0] = SeqLocStart(m_loc);
2952 dsp->starts[1] = SeqLocStart(m_loc);
2953 dsp->lens = MemNew(sizeof(Int4));
2954 dsp->lens[0] = SeqLocLen(m_loc);
2955
2956 align = SeqAlignNew();
2957 align->type = 3;
2958 align->segtype = 2;
2959 align->dim = 2;
2960 align->segs = dsp;
2961
2962 anp_node = NULL;
2963 adp = gather_align_data(m_loc, align, left, TRUE, TRUE);
2964 if(adp !=NULL)
2965 {
2966 coll_align_data(align, 0, adp, caop, 0, entityID, obj_type, m_loc, &anp_node, &prev);
2967 FreeAlignData(adp);
2968 }
2969 if(anp_head == NULL)
2970 anp_head = anp_node;
2971 else
2972 merge_master_head(anp_head, anp_node);
2973 SeqAlignFree(align);
2974 left = SeqLocLen(m_loc);
2975 m_loc = m_loc->next;
2976 }
2977 for(curr = anp_head; curr != NULL; curr = curr->next)
2978 {
2979 anp = curr->data.ptrvalue;
2980 anp->use_seq_ids = TRUE; /*use the Seq-id as the itemID for graphic display*/
2981 anp->is_master = TRUE;
2982 }
2983 caop->show_mismatch = show_mismatch;
2984 return anp_head;
2985 }
2986
set_option_for_collect_align(CollectAlignOptionPtr caop,Int2 label_size,Uint1 style)2987 NLM_EXTERN Boolean set_option_for_collect_align(CollectAlignOptionPtr caop, Int2 label_size, Uint1 style)
2988 {
2989 if(caop == NULL)
2990 return FALSE;
2991
2992 MemSet((Pointer)caop, 0, sizeof(CollectAlignOption));
2993 if(style < COLLECT_HISTORY || style > COLLECT_FIXED)
2994 {
2995 Message(MSG_ERROR, "Illegal style for alignment display %d", (int)style);
2996 return FALSE;
2997 }
2998
2999 caop->nointerval = FALSE;
3000 caop->label_size= label_size;
3001 if(style == COLLECT_MD || style == COLLECT_FIXED)
3002 {
3003 caop->only_history = FALSE;
3004 caop->map_insert = FALSE;
3005 }
3006 else
3007 {
3008 caop->only_history = TRUE;
3009 caop->map_insert = TRUE;
3010 }
3011 caop->map_graphic = (style != COLLECT_FIXED);
3012 caop->show_mismatch = (style != COLLECT_HISTORY);
3013 caop->show_feature = FALSE;
3014 caop->slabel_format = PRINTID_TEXTID_ACCESSION;
3015 caop->segloc = NULL;
3016 caop->align_num = DEFAULT_ALIGN_NUM;
3017 caop->graphic_offset = 0;
3018 return TRUE;
3019 }
3020
alignment_are_blast_hits(BioseqPtr bsp)3021 static Boolean alignment_are_blast_hits(BioseqPtr bsp)
3022 {
3023 SeqAnnotPtr annot;
3024 Char label[101];
3025 Uint1 annot_type;
3026
3027 if(bsp == NULL || bsp->annot == NULL)
3028 return FALSE;
3029 for(annot = bsp->annot; annot != NULL; annot = annot->next)
3030 {
3031 if(annot->type == 2)
3032 {
3033 label[0] = '\0';
3034 get_align_annot_qual(annot, label, 20, &annot_type);
3035 if(label[0] != '\0' && StringNCmp(label, "BLAST", 5) == 0)
3036 return TRUE;
3037 }
3038 }
3039 return FALSE;
3040 }
3041
collect_anpnode_with_option(CollectAlignOptionPtr caop,SeqLocPtr m_loc,Uint2 entityID,Int4 style,Uint1 itemType,Uint1Ptr f_order,Uint1Ptr g_order,Boolean take_all_annot)3042 NLM_EXTERN ValNodePtr collect_anpnode_with_option(CollectAlignOptionPtr caop, SeqLocPtr m_loc, Uint2 entityID, Int4 style, Uint1 itemType, Uint1Ptr f_order, Uint1Ptr g_order, Boolean take_all_annot)
3043 {
3044 ValNodePtr anp_list = NULL, list;
3045 BioseqPtr mbsp;
3046 Uint1 featureOrder[FEATDEF_ANY];
3047 Uint1 groupOrder[FEATDEF_ANY];
3048 Int2 i;
3049 CollectSeqOptionPtr csop = NULL;
3050 ValNodePtr align_id_list = NULL;
3051 AlignNodePtr anp;
3052 Boolean show_feature;
3053 ValNodePtr prev = NULL;
3054
3055
3056
3057 if(caop == NULL || m_loc == NULL || entityID == 0)
3058 return NULL;
3059 if(style < COLLECT_HISTORY || style > COLLECT_MD)
3060 return NULL;
3061 mbsp = BioseqLockById(SeqLocId(m_loc));
3062 if(mbsp == NULL)
3063 return NULL;
3064
3065 if(mbsp->repr == Seq_repr_seg)
3066 caop->segloc = (SeqLocPtr)(mbsp->seq_ext);
3067 else
3068 caop->segloc = NULL;
3069
3070 show_feature = FALSE;
3071 if(style != COLLECT_HISTORY)
3072 {
3073 if(f_order != NULL && g_order != NULL)
3074 {
3075 MemCopy((Pointer)(featureOrder), (Pointer)f_order, (size_t)(FEATDEF_ANY* sizeof(Uint1)));
3076 MemCopy((Pointer)(groupOrder), (Pointer)g_order, (size_t)(FEATDEF_ANY* sizeof(Uint1)));
3077 }
3078 else /*use the default features*/
3079 {
3080 if(mbsp->mol == Seq_mol_aa)
3081 {
3082 MemSet((Pointer)(featureOrder), 1, (size_t)(FEATDEF_ANY* sizeof(Uint1)));
3083 MemSet((Pointer)(groupOrder), 1, (size_t)(FEATDEF_ANY* sizeof(Uint1)));
3084 featureOrder[FEATDEF_BAD] = 0;
3085 /* featureOrder[FEATDEF_ANY] = 0; */ /* out of bounds */
3086 featureOrder[FEATDEF_PUB] = 0;
3087 featureOrder[FEATDEF_source] = 0;
3088 featureOrder[FEATDEF_NUM] = 0;
3089 featureOrder[FEATDEF_BIOSRC] = 0;
3090 featureOrder[FEATDEF_ORG] = 0;
3091 featureOrder[FEATDEF_CDS] =0;
3092 featureOrder[FEATDEF_PROT] =0;
3093 }
3094 else
3095 {
3096 MemSet((Pointer)(featureOrder), 0, (size_t)(FEATDEF_ANY* sizeof(Uint1)));
3097 MemSet((Pointer)(groupOrder), 0, (size_t)(FEATDEF_ANY* sizeof(Uint1)));
3098 featureOrder[FEATDEF_Imp_CDS] = 1;
3099 groupOrder[FEATDEF_Imp_CDS] = 1;
3100 featureOrder[FEATDEF_CDS] = 1;
3101 groupOrder[FEATDEF_CDS] = 1;
3102 }
3103 }
3104
3105 csop = caop->csop;
3106 for(i =0; i<FEATDEF_ANY; ++i)
3107 {
3108 if(featureOrder[i] != 0)
3109 {
3110 csop->features[i] = TRUE;
3111 show_feature = TRUE;
3112 }
3113 else
3114 csop->features[i] = FALSE;
3115 }
3116 }
3117 else
3118 caop->show_mismatch = FALSE;
3119
3120
3121
3122 if(style == COLLECT_MP)
3123 {
3124 if(csop->features[FEATDEF_repeat_region] == FALSE ||
3125 csop->features[FEATDEF_repeat_unit] == FALSE)
3126 {
3127 if(mbsp->repr == Seq_repr_seg || mbsp->repr == Seq_repr_raw
3128 || mbsp->repr == Seq_repr_const)
3129 {
3130
3131 if(alignment_are_blast_hits(mbsp))
3132 {
3133 csop->features[FEATDEF_repeat_region] = TRUE;
3134 csop->features[FEATDEF_repeat_unit] = TRUE;
3135 csop->features[FEATDEF_repeat_region] = TRUE;
3136 csop->features[FEATDEF_repeat_unit] = TRUE;
3137 caop->show_feature = TRUE;
3138 }
3139 }
3140 }
3141 anp_list = collect_master_align_node(caop, m_loc, itemType, entityID);
3142 if(anp_list == NULL)
3143 {
3144 BioseqUnlock(mbsp);
3145 Message(MSG_ERROR, "Fail to make AlignNode for the master sequence");
3146 return NULL;
3147 }
3148 /* if(caop->map_graphic == FALSE)
3149 {
3150 csop->features[FEATDEF_repeat_region] = FALSE;
3151 csop->features[FEATDEF_repeat_unit] = FALSE;
3152 } */
3153
3154 }
3155 caop->show_feature = show_feature;
3156
3157
3158 list = CollectItemForAlignment(m_loc, entityID, 0, caop, take_all_annot);
3159 if(caop->no_sort == FALSE)
3160 list = SortAlignNode(list);
3161 ValNodeLink(&anp_list, list);
3162 if(style == COLLECT_MD)
3163 {
3164 for(list = anp_list; list != NULL; list = list->next)
3165 {
3166 if(list->choice != OBJ_SEQANNOT)
3167 {
3168 anp = list->data.ptrvalue;
3169 if(anp != NULL)
3170 anp->use_seq_ids = TRUE;
3171 }
3172 }
3173 }
3174
3175
3176 if(caop->show_feature)
3177 SortAlignmentFeature(anp_list, featureOrder, groupOrder);
3178 align_id_list = get_seqids_with_alignment(mbsp);
3179 if(align_id_list != NULL)
3180 {
3181 add_sequence_alignment_info(align_id_list, anp_list);
3182 ValNodeFree(align_id_list);
3183 }
3184 if(style == COLLECT_MP && caop->flat_insert)
3185 FlatAlignNode(anp_list);
3186
3187 BioseqUnlock(mbsp);
3188 return anp_list;
3189 }
3190
3191
3192
3193 /***************************************************************
3194 *
3195 * CollAlignFromSeqAnnot(annot, m_loc, featureOrder, groupOrder,
3196 * style,graphic)
3197 *
3198 * collect the AlignNode for Seq-aligns stored in Seq-annot
3199 * annot: the Seq-annot
3200 * m_loc: the target sequence
3201 * left: the offset of the leftmost position
3202 * featureOrder, groupOrde: the features selected to be displayed together
3203 * with alignment
3204 * style: the style of the display. Only valid for multiple-pairwise
3205 * and multiple dimension for now
3206 * graphic: if TRUE, it is designed to show the display on graphic,
3207 * so the mismatch data will be collected. Otherwise, it will not
3208 * collect mismatch data
3209 *
3210 ****************************************************************/
CollAlignFromSeqAnnot(SeqAnnotPtr annot,SeqLocPtr m_loc,Uint1Ptr featureOrder,Uint1Ptr groupOrder,Uint1 style,Boolean graphic,Boolean sort,Boolean flat_insert)3211 NLM_EXTERN ValNodePtr CollAlignFromSeqAnnot(SeqAnnotPtr annot, SeqLocPtr m_loc, Uint1Ptr featureOrder, Uint1Ptr groupOrder, Uint1 style, Boolean graphic, Boolean sort, Boolean flat_insert)
3212 {
3213 Uint2 entityID;
3214
3215 CollectAlignOption ca_option;
3216 CollectSeqOption cs_option;
3217 Int2 label_size = 32;
3218
3219 if(annot->type !=2) /*it is not an alignment*/
3220 return NULL;
3221
3222 entityID = ObjMgrRegister(OBJ_SEQANNOT, (Pointer)annot);
3223 if(entityID == 0)
3224 return NULL;
3225
3226
3227 if(style == COLLECT_MP || style == COLLECT_MD)
3228 {
3229 set_option_for_collect_align(&ca_option, label_size, style);
3230 cs_option.nointerval = FALSE;
3231 cs_option.slabel_format = PRINTID_TEXTID_ACCESSION;
3232 MemSet((Pointer)&(cs_option.flabel_format), OM_LABEL_CONTENT, (size_t)FEATDEF_ANY * sizeof(Uint1));
3233 cs_option.label_size = label_size;
3234 cs_option.seglevels = 0;
3235 ca_option.csop = &cs_option;
3236 ca_option.no_sort = 1- sort;
3237 if(style == COLLECT_MP && flat_insert)
3238 ca_option.flat_insert = TRUE;
3239 else
3240 ca_option.flat_insert = FALSE;
3241
3242 ca_option.only_history = FALSE;
3243 if(!graphic)
3244 {
3245 ca_option.show_mismatch = FALSE;
3246 ca_option.align_num = -1;
3247 }
3248 return collect_anpnode_with_option(&ca_option, m_loc, entityID, style, OBJ_SEQALIGN, featureOrder, groupOrder, TRUE);
3249 }
3250 else
3251 return NULL;
3252
3253
3254 }
3255
3256 /*###################################################################
3257 #
3258 # functions related to collect Seq-feat, Bioseq and Bioseq-seg
3259 #
3260 ###################################################################*/
3261
3262
get_mapmarker_info(UserObjectPtr uop,Uint4Ptr extra,Uint2Ptr bin_order)3263 static void get_mapmarker_info(UserObjectPtr uop, Uint4Ptr extra, Uint2Ptr bin_order)
3264 {
3265 ObjectIdPtr oip;
3266 Int4 val;
3267 Uint4 temp;
3268 UserFieldPtr ufp;
3269
3270 temp = *extra;
3271
3272 while(uop)
3273 {
3274 oip = uop->type;
3275 if(oip && oip->str != NULL)
3276 {
3277 if(StringCmp(oip->str, "MapMarkerInfo") == 0)
3278 {
3279 ufp = uop->data;
3280 while(ufp)
3281 {
3282 oip = ufp->label;
3283 if(StringCmp(oip->str, "Marker Type") == 0)
3284 {
3285 if(ufp->choice == 2)
3286 {
3287 val = ufp->data.intvalue;
3288 switch(val)
3289 {
3290 case FRAME_WORK:
3291 temp |= EXTRA_FRAME_WORK;
3292 break;
3293 case RECMIN:
3294 temp |= EXTRA_RECMIN;
3295 break;
3296 case LIKELY:
3297 temp |= EXTRA_LIKELY;
3298 break;
3299 case MDUP:
3300 temp |= EXTRA_MDUP;
3301 break;
3302 case DUP:
3303 temp |= EXTRA_DUP;
3304 break;
3305
3306 case CONTIG_STS:
3307 temp |= EXTRA_CONTIG_STS;
3308 break;
3309 default:
3310 break;
3311 }
3312 *extra = temp;
3313 }
3314 }
3315 if(StringCmp(oip->str, "Bin Order") == 0)
3316 {
3317 if(ufp->choice == 2)
3318 *bin_order = (Uint2)(ufp->data.intvalue);
3319 }
3320
3321 if(StringCmp(oip->str, "Marker Category") == 0)
3322 {
3323 if(ufp->choice == 2)
3324 {
3325 val = ufp->data.intvalue;
3326 switch(val)
3327 {
3328 case EG_YAC_END:
3329 temp |= EXTRA_YAC_END;
3330 break;
3331 case EG_RANDOME:
3332 temp |= EXTRA_RANDOM;
3333 break;
3334
3335 case EG_GENETIC:
3336 temp |= EXTRA_GENETIC;
3337 break;
3338
3339 case EG_GENE:
3340 temp |= EXTRA_GENE;
3341 break;
3342 case EG_EST:
3343 temp |= EXTRA_EST;
3344 break;
3345 case EG_MISC:
3346 temp |= EXTRA_MISC;
3347 break;
3348 default:
3349 break;
3350 }
3351 }
3352 }
3353 ufp = ufp->next;
3354 }
3355 }
3356 else if(StringCmp(oip->str, "Marker Category") == 0)
3357 {
3358 ufp = uop->data;
3359 while(ufp)
3360 {
3361 if(ufp->choice == 2)
3362 {
3363 val = ufp->data.intvalue;
3364 switch(val)
3365 {
3366 case EG_YAC_END:
3367 temp |= EXTRA_YAC_END;
3368 break;
3369 case EG_RANDOME:
3370 temp |= EXTRA_RANDOM;
3371 break;
3372
3373 case EG_GENETIC:
3374 temp |= EXTRA_GENETIC;
3375 break;
3376
3377 case EG_GENE:
3378 temp |= EXTRA_GENE;
3379 break;
3380 case EG_EST:
3381 temp |= EXTRA_EST;
3382 break;
3383 case EG_MISC:
3384 temp |= EXTRA_MISC;
3385 break;
3386 default:
3387 break;
3388 }
3389 }
3390 ufp = ufp->next;
3391 }
3392 }
3393 }
3394 uop = uop->next;
3395 }
3396 *extra = temp;
3397 }
3398
3399
3400 /*******************************************************************
3401 *
3402 * ck_seqfeat_extra: check if there is extra data, such as
3403 * Genbank accessions assocated with a GeneRef or Medlines
3404 * associated with a Seq-feat
3405 *
3406 *******************************************************************/
ck_seqfeat_extra(SeqFeatPtr sfp)3407 NLM_EXTERN Uint4 ck_seqfeat_extra(SeqFeatPtr sfp)
3408 {
3409 GeneRefPtr grp;
3410 ValNodePtr db;
3411 DbtagPtr db_tag;
3412 ValNodePtr cit;
3413 ValNodePtr pub;
3414 Boolean has_gb = FALSE, has_med = FALSE;
3415 Uint4 extra_data = 0;
3416
3417
3418 if(sfp->data.choice == 1)
3419 {
3420 grp = sfp->data.value.ptrvalue;
3421
3422 for(db = grp->db; db!=NULL; db = db->next)
3423 {
3424 db_tag = db->data.ptrvalue;
3425 if(StringICmp(db_tag->db, "GenBank") == 0)
3426 {
3427 extra_data |= EXTRA_GENBANK;
3428 break;
3429 }
3430 }
3431 }
3432 for(cit = sfp->cit; !has_med && cit!=NULL; cit = cit->next)
3433 {
3434 if(cit->choice == 3)
3435 has_med = TRUE;
3436 if(cit->choice ==1)
3437 {
3438 pub = (ValNodePtr)(cit->data.ptrvalue);
3439 while(pub)
3440 {
3441 if(pub->choice == PUB_Muid)
3442 {
3443 has_med = TRUE;
3444 break;
3445 }
3446 pub = pub->next;
3447 }
3448 }
3449 }
3450
3451 if(has_med)
3452 extra_data |= EXTRA_MEDLINE;
3453 return extra_data;
3454 }
3455
3456
3457
3458 /******************************************************************
3459 *
3460 * get_bin_order(sfp)
3461 * get the 1000:1 bin data()
3462 *
3463 *******************************************************************/
get_bin_order(SeqFeatPtr sfp)3464 static Uint2 get_bin_order(SeqFeatPtr sfp)
3465 {
3466 GeneRefPtr grp;
3467 ValNodePtr db;
3468 DbtagPtr db_tag;
3469 ObjectIdPtr oip;
3470
3471 if(sfp->data.choice != 1)
3472 return 0;
3473 grp = sfp->data.value.ptrvalue;
3474 if(grp == NULL)
3475 return 0;
3476
3477 for(db = grp->db; db != NULL; db = db->next)
3478 {
3479 db_tag = db->data.ptrvalue;
3480 if(db_tag != NULL && StringCmp(db_tag->db, "1000:1 Bin") ==0)
3481 {
3482 oip = db_tag->tag;
3483 return (Uint2)(oip->id);
3484 }
3485 }
3486
3487 return 0;
3488 }
3489
3490
load_annot_name(SeqAnnotPtr annot,CharPtr annot_db)3491 static Boolean load_annot_name(SeqAnnotPtr annot, CharPtr annot_db)
3492 {
3493 ValNodePtr desc;
3494 CharPtr name, title;
3495 Int4 len;
3496
3497 annot_db[0] = '\0';
3498 if(annot == NULL)
3499 return FALSE;
3500 name = NULL;
3501 title = NULL;
3502 for(desc = annot->desc; desc != NULL; desc = desc->next)
3503 {
3504 if(desc->choice == Annot_descr_name)
3505 {
3506 if(name == NULL)
3507 name = (CharPtr)(desc->data.ptrvalue);
3508 }
3509 if(desc->choice == Annot_descr_title)
3510 {
3511 if(title == NULL)
3512 title = (CharPtr)(desc->data.ptrvalue);
3513 }
3514 }
3515
3516 if(name != NULL)
3517 StringNCpy_0(annot_db, name, 20);
3518 len = StringLen(annot_db);
3519 if(title != NULL && len < 19)
3520 {
3521 StringCat(annot_db, ":");
3522 ++len;
3523 StringNCpy_0(annot_db+len, title, 20-len);
3524 }
3525
3526 return (annot_db[0] != '\0');
3527 }
3528
3529
check_feature_for_landmark(CharPtr label,GeneDataPtr gdata,SeqFeatPtr sfp,GatherContextPtr gcp,Uint2 priority)3530 static Boolean check_feature_for_landmark(CharPtr label, GeneDataPtr gdata, SeqFeatPtr sfp, GatherContextPtr gcp, Uint2 priority)
3531 {
3532 Boolean found;
3533 GeneDataPtr c_gdp;
3534
3535 if(gdata == NULL || sfp == NULL)
3536 return FALSE;
3537 found = FALSE;
3538 c_gdp = NULL;
3539 if(label[0] != '\0')
3540 {
3541 for(c_gdp = gdata; c_gdp != NULL; c_gdp = c_gdp->next)
3542 {
3543 if(StringICmp(c_gdp->symbol, label) == 0)
3544 {
3545 found = TRUE;
3546 break;
3547 }
3548 }
3549 }
3550
3551 if(!found && sfp->data.choice == 1)
3552 {
3553 for(c_gdp = gdata; c_gdp != NULL; c_gdp = c_gdp->next)
3554 {
3555 if(check_landmark(sfp, c_gdp->symbol))
3556 {
3557 found = TRUE;
3558 break;
3559 }
3560 }
3561 }
3562
3563 if(!found)
3564 return FALSE;
3565
3566 if(c_gdp->priority == 0 || priority < c_gdp->priority)
3567 {
3568 c_gdp->entityID = gcp->entityID;
3569 c_gdp->itemID = gcp->itemID;
3570 c_gdp->itemType = gcp->thistype;
3571 c_gdp->priority = priority;
3572 }
3573
3574 StringCpy(label, c_gdp->symbol);
3575 return TRUE;
3576 }
3577
3578
3579
3580 /*******************************************************************
3581 *
3582 * collseqfunc( )
3583 * callback function for collecting sequence related data in
3584 * gather, such as segments, features
3585 *
3586 *******************************************************************/
collseqfunc(GatherContextPtr gcp)3587 static Boolean collseqfunc(GatherContextPtr gcp)
3588 {
3589 SeqFeatPtr sfp;
3590 SeqLocPtr slp;
3591 BioseqPtr bsp;
3592 CollectHeaderPtr chp;
3593 FeatNodePtr fnp;
3594 CollectSeqOptionPtr csop;
3595 ObjMgrTypePtr omtp;
3596
3597 UserObjectPtr uop;
3598 UserFieldPtr ufp;
3599 GatherRangePtr grp;
3600 IvalNodePtr inp;
3601 Uint1 band;
3602 Int2 i;
3603 Int2 label_size;
3604 ValNodePtr delta_node;
3605 Boolean is_gap;
3606 SeqLitPtr slitp;
3607
3608
3609 chp= (CollectHeaderPtr)(gcp->userdata);
3610 chp->subtype = 0;
3611 chp->thislabel[0] = '\0';
3612 chp->ftype [0] = '\0';
3613 csop = chp->csop;
3614 label_size = MIN(100, (Int2)(csop->label_size));
3615
3616
3617 switch (gcp->thistype)
3618 {
3619 case OBJ_SEQANNOT: /*for the cytogenetic map, skip certain
3620 Seq-annot*/
3621 chp->annotDB[0] = '\0';
3622 chp->is_lod_score = is_lod_score_annot((SeqAnnotPtr)(gcp->thisitem));
3623 load_annot_name((SeqAnnotPtr)(gcp->thisitem), chp->annotDB);
3624 if(csop->bsp_type == CYTO_MAP)
3625 {
3626 if(!annot_is_user_defined((SeqAnnotPtr)(gcp->thisitem)))
3627 chp->skip_feature = TRUE;
3628 else
3629 chp->skip_feature = FALSE;
3630 }
3631 break;
3632
3633 case OBJ_BIOSEQ_SEG:
3634 slp = (SeqLocPtr)(gcp->thisitem);
3635 if(is_map_segment(slp)) /*not very reliable*/
3636 return TRUE;
3637 if(chp->maybe_mapid != NULL)
3638 if(SeqIdMatch(chp->maybe_mapid, SeqLocId(slp)))
3639 return TRUE;
3640 fnp = CreateFeatNode (&(chp->features), &(chp->prev_feat), gcp->thistype, gcp->itemID, gcp->entityID, 0);
3641 MemCopy(&(fnp->extremes), &(gcp->extremes), sizeof(GatherRange));
3642 if(slp->choice == SEQLOC_NULL || slp->choice == SEQLOC_EMPTY)
3643 fnp->follower = TRUE; /*used to present the empty Seq-loc*/
3644 else
3645 {
3646 if(MuskSeqIdWrite (SeqLocId(slp), chp->thislabel, label_size, csop->slabel_format, TRUE, TRUE))
3647 fnp->label = StringSave(chp->thislabel);
3648 }
3649 break;
3650 case OBJ_BIOSEQ_DELTA:
3651 delta_node = (ValNodePtr)(gcp->thisitem);
3652 is_gap = FALSE;
3653 chp->thislabel[0] = '\0';
3654 if(delta_node->choice ==1)
3655 {
3656 slp = delta_node->data.ptrvalue;
3657 if(slp->choice == SEQLOC_NULL || slp->choice == SEQLOC_EMPTY)
3658 is_gap = TRUE;
3659 else
3660 MuskSeqIdWrite (SeqLocId(slp), chp->thislabel, label_size, csop->slabel_format, TRUE, TRUE);
3661
3662 }
3663 else
3664 {
3665 slitp = delta_node->data.ptrvalue;
3666 if(slitp->length == 0 || slitp->seq_data == NULL)
3667 {
3668 is_gap = TRUE;
3669 if(slitp->length > 0)
3670 return TRUE;
3671 }
3672 }
3673
3674 fnp = CreateFeatNode (&(chp->features), &(chp->prev_feat), gcp->thistype, gcp->itemID, gcp
3675 ->entityID, 0);
3676 MemCopy(&(fnp->extremes), &(gcp->extremes), sizeof(GatherRange));
3677 if(is_gap)
3678 fnp->follower = TRUE;
3679 else if(chp->thislabel[0] != '\0')
3680 fnp->label = StringSave(chp->thislabel);
3681 break;
3682
3683 case OBJ_BIOSEQ:
3684 fnp = CreateFeatNode (&(chp->features), &(chp->prev_feat), gcp->thistype, gcp->itemID, gcp->entityID, 0);
3685 MemCopy(&(fnp->extremes), &(gcp->extremes), sizeof(GatherRange));
3686 bsp = (BioseqPtr) gcp->thisitem;
3687 if(MuskSeqIdWrite(bsp->id, chp->thislabel, label_size, csop->slabel_format, TRUE, FALSE))
3688 fnp->label = StringSave(chp->thislabel);
3689 break;
3690
3691 case OBJ_BIOSEQ_MAPFEAT:
3692 case OBJ_SEQFEAT:
3693 if(gcp->thistype == OBJ_SEQFEAT && chp->skip_feature)
3694 return TRUE;
3695 sfp = (SeqFeatPtr) gcp->thisitem;
3696 /* if(gcp->thistype == OBJ_SEQFEAT)
3697 {
3698 if(chp->filter_level == gcp->seglevel +1)
3699 return TRUE;
3700 } */
3701 omtp = ObjMgrTypeFind (chp->omp, OBJ_SEQFEAT, NULL, NULL);
3702 if(omtp == NULL)
3703 return TRUE;
3704 if (omtp->subtypefunc != NULL)
3705 chp->subtype = (*(omtp->subtypefunc)) (gcp->thisitem);
3706 else
3707 chp->subtype = 0;
3708 if(gcp->thistype == OBJ_SEQFEAT &&
3709 csop->features[chp->subtype] == 0) /*filter unwanted features*/
3710 return TRUE;
3711
3712 /*tolerate the unknown band*/
3713 /*
3714 if(sfp->data.choice == 14 && gcp->thistype == OBJ_BIOSEQ_MAPFEAT)
3715 {
3716 uop = sfp->data.value.ptrvalue;
3717 band = get_band_type(uop);
3718 if(band == 0)
3719 return TRUE;
3720 }
3721 */
3722 fnp = CreateFeatNode (&(chp->features), &(chp->prev_feat), gcp->thistype, gcp->itemID, gcp->entityID, chp->subtype);
3723
3724 /*special collection for the LOD scores*/
3725 if(chp->is_lod_score && gcp->thistype == OBJ_SEQFEAT) /*it is the LOD score data*/
3726 {
3727 fnp->extra_data = EXTRA_LOD_SCORE;
3728 fnp->bin_order = GetLODScoreBitValue(sfp);
3729 StringCpy(fnp->annotDB, chp->annotDB);
3730 MemCopy(&(fnp->extremes), &(gcp->extremes), sizeof(GatherRange));
3731 return TRUE;
3732 }
3733
3734 fnp->has_product = (sfp->product !=NULL);
3735 fnp->extra_data = ck_seqfeat_extra(sfp); /*extra data associated with a Gene-ref*/
3736 get_mapmarker_info(sfp->ext, &(fnp->extra_data), &(fnp->bin_order));
3737 if(fnp->bin_order == 0) /*just as a backup*/
3738 fnp->bin_order = get_bin_order(sfp);
3739 if(gcp->thistype == OBJ_SEQFEAT && chp->annotDB[0] != '\0')
3740 StringCpy(fnp->annotDB, chp->annotDB);
3741 else
3742 fnp->annotDB[0]= '\0';
3743 MemCopy(&(fnp->extremes), &(gcp->extremes), sizeof(GatherRange));
3744
3745 /*special collection for cytogenetic band*/
3746 if(sfp->data.choice == 14 && gcp->thistype == OBJ_BIOSEQ_MAPFEAT)
3747 {
3748 uop = sfp->data.value.ptrvalue;
3749 band = get_band_type(uop);
3750 fnp->band = band;
3751 fnp->label = StringSave(get_band_name(uop));
3752 if(band < BAND_POINT) /*for flybase*/
3753 {
3754 for(ufp = uop->data; ufp!=NULL; ufp=ufp->next)
3755 {
3756 if(is_label_match(ufp->label, "Subdivision"))
3757 fnp->pos_label = StringSave(ufp->data.ptrvalue);
3758 }
3759 }
3760 }
3761 else /*for non-cyto band*/
3762 {
3763 /* if((gcp->thistype == OBJ_BIOSEQ_MAPFEAT) || collect_feature_label(csop->flabel_format[chp->subtype])) */
3764 /* check the landmark genes*/
3765
3766 if(collect_feature_label(csop->flabel_format[chp->subtype]))
3767 {
3768 if(omtp->labelfunc !=NULL)
3769 (*(omtp->labelfunc)) (gcp->thisitem, chp->thislabel, label_size, OM_LABEL_CONTENT);
3770 fnp->landmark = check_feature_for_landmark(chp->thislabel, chp->gdata, sfp, gcp, chp->priority);
3771 if(fnp->landmark == FALSE && omtp->labelfunc != NULL &&
3772 csop->flabel_format[chp->subtype] != OM_LABEL_CONTENT)
3773 (*(omtp->labelfunc)) (gcp->thisitem, chp->thislabel, label_size, csop->flabel_format[chp->subtype]);
3774
3775 }
3776
3777 if(chp->thislabel[0] != '\0')
3778 fnp->label = StringSave(chp->thislabel);
3779 slp = sfp->location; /*collect the intervals*/
3780 if(slp->choice == SEQLOC_PACKED_PNT || csop->nointerval == FALSE)
3781 {
3782 grp = gcp->rdp;
3783 for(i=0; (grp!=NULL) && i<gcp->num_interval; ++i)
3784 {
3785 inp = MemNew(sizeof(IvalNode));
3786 MemCopy(&(inp->gr), grp, sizeof(GatherRange));
3787 ValNodeAddPointer(&(fnp->interval), (Uint1)(i+1), (Pointer)inp);
3788 ++grp;
3789 }
3790 }
3791 }
3792 break;
3793 default:
3794 break;
3795 }
3796
3797 return TRUE;
3798 }
3799
3800
ignore_feature(BoolPtr f_list)3801 static Boolean ignore_feature(BoolPtr f_list)
3802 {
3803 Int2 i;
3804
3805 if(f_list == NULL)
3806 return TRUE;
3807 for(i = 0; i<FEATDEF_ANY; ++i)
3808 if(f_list[i])
3809 return FALSE;
3810 return TRUE;
3811 }
3812
3813 /***********************************************************************
3814 *
3815 * CollectItemForSeqLoc(slp, entityID, left, is_aa, csop)
3816 * Collect sequences, features for a Seq-loc
3817 * slp: the target Seq-loc
3818 * entityID: the top level entityID for the current sequence
3819 * left: the left offset on the graph
3820 * is_aa: if TRUE, set get_feats_product flag to TRUE
3821 * csop: the collection option
3822 *
3823 *
3824 ***********************************************************************/
CollectItemForSeqLocEx(SeqLocPtr slp,Uint2 entityID,Int4 left,Boolean is_aa,SeqIdPtr maybe_mapid,CollectSeqOptionPtr csop,GeneDataPtr gdata,Uint2 priority,Boolean forceSeglevelsTo1)3825 NLM_EXTERN ValNodePtr CollectItemForSeqLocEx(SeqLocPtr slp, Uint2 entityID, Int4 left, Boolean is_aa, SeqIdPtr maybe_mapid, CollectSeqOptionPtr csop, GeneDataPtr gdata, Uint2 priority, Boolean forceSeglevelsTo1)
3826 {
3827 GatherScope gs;
3828 CollectHeader ch;
3829
3830 if(slp == NULL || entityID == 0 || csop == NULL)
3831 return NULL;
3832 ch.omp = ObjMgrGet(); /*set up the options*/
3833 ch.features = NULL;
3834 ch.csop = csop;
3835 ch.maybe_mapid = maybe_mapid;
3836 ch.filter_level = csop->filter_level;
3837 ch.gdata = gdata;
3838 ch.priority = priority;
3839 ch.skip_feature = FALSE;
3840 ch.prev_feat = NULL;
3841 ch.prev_align = NULL;
3842 ch.index = 0;
3843 ch.annotDB[0] = '\0';
3844 ch.is_lod_score = FALSE;
3845
3846
3847 MemSet((Pointer)&gs, 0, sizeof (GatherScope));
3848 gs.get_feats_location = TRUE;
3849 gs.get_feats_product = is_aa;
3850 MemSet((Pointer)(gs.ignore), (int)TRUE, (size_t)(OBJ_MAX)*sizeof(Boolean));
3851
3852 gs.ignore[OBJ_SEQENTRY] = FALSE;
3853 gs.ignore[OBJ_BIOSEQ_SEG] = FALSE;
3854 gs.ignore[OBJ_BIOSEQ] = FALSE;
3855 gs.ignore[OBJ_BIOSEQ_MAPFEAT] = FALSE;
3856 gs.ignore[OBJ_BIOSEQ_DELTA] = FALSE;
3857 if(!ignore_feature(csop->features))
3858 {
3859 gs.ignore[OBJ_SEQANNOT] = FALSE;
3860 gs.ignore[OBJ_SEQFEAT] = FALSE;
3861 }
3862
3863 gs.nointervals = csop->nointerval;
3864 gs.seglevels = csop->seglevels;
3865 /*gs.stop_on_annot = TRUE;*/
3866 if(gs.seglevels == 0)
3867 {
3868 gs.ignore_top = FALSE;
3869 gs.stop_on_annot = FALSE;
3870 /* gs.ignore_top = FALSE;
3871 gs.stop_on_annot = TRUE; */
3872 }
3873 else if (forceSeglevelsTo1)
3874 {
3875 gs.ignore_top = TRUE; /* JK */
3876 gs.stop_on_annot = FALSE; /* JK */
3877 }
3878 else
3879 {
3880 gs.ignore_top = FALSE;
3881 gs.stop_on_annot = TRUE;
3882 }
3883 gs.currlevel = 0;
3884 gs.split_packed_pnt = TRUE;
3885
3886
3887 for(; slp!= NULL; slp = slp->next)
3888 {
3889 gs.offset = left;
3890 gs.target = slp;
3891 GatherEntity(entityID, (Pointer)(&ch), collseqfunc, &gs);
3892 left += SeqLocLen(slp);
3893 }
3894 return ch.features;
3895 }
3896
CollectItemForSeqLoc(SeqLocPtr slp,Uint2 entityID,Int4 left,Boolean is_aa,SeqIdPtr maybe_mapid,CollectSeqOptionPtr csop,GeneDataPtr gdata,Uint2 priority)3897 NLM_EXTERN ValNodePtr CollectItemForSeqLoc(SeqLocPtr slp, Uint2 entityID, Int4 left, Boolean is_aa, SeqIdPtr maybe_mapid, CollectSeqOptionPtr csop, GeneDataPtr gdata, Uint2 priority)
3898 {
3899 return CollectItemForSeqLocEx(slp, entityID, left, is_aa, maybe_mapid, csop, gdata, priority, FALSE);
3900 }
3901
is_segmap_align_annot(SeqAnnotPtr annot)3902 static Uint1 is_segmap_align_annot(SeqAnnotPtr annot)
3903 {
3904 UserObjectPtr uop;
3905 ValNodePtr desc;
3906 ObjectIdPtr oip;
3907 UserFieldPtr ufp;
3908
3909 if(annot == NULL)
3910 return 0;
3911 if(annot->type != 2)
3912 return 0;
3913 if(is_annot_for_hist_alignment(annot))
3914 return 0;
3915
3916 desc =annot->desc;
3917 while(desc)
3918 {
3919 if(desc->choice == Annot_descr_user)
3920 {
3921 uop = desc->data.ptrvalue;
3922 if(uop->type)
3923 {
3924 oip = uop->type;
3925 if(StringCmp(oip->str, "SegMap STS Alignment") == 0)
3926 {
3927 ufp = uop->data;
3928 if(ufp && ufp->choice == 2)
3929 return (Uint1)(ufp->data.intvalue);
3930 }
3931 }
3932 }
3933 desc = desc->next;
3934 }
3935 return 0;
3936 }
3937
create_gr_data(SeqAlignPtr align,SeqLocPtr m_loc,Int4 m_left)3938 static GatherRangePtr create_gr_data(SeqAlignPtr align, SeqLocPtr m_loc, Int4 m_left)
3939 {
3940 Int2 i, num;
3941 SeqAlignPtr curr;
3942 GatherRangePtr grp;
3943
3944 for(num = 0, curr = align; curr!= NULL; curr = curr->next)
3945 ++num;
3946 if( num == 0)
3947 return NULL;
3948
3949 grp = MemNew((size_t)num * sizeof(GatherRange));
3950 for(i =0, curr = align; curr != NULL; curr = curr->next, ++i)
3951 {
3952 if(!SeqLocOffset (m_loc, curr->bounds, &(grp[i]), m_left))
3953 {
3954 grp[i].left = -1;
3955 grp[i].right = -1;
3956 }
3957 }
3958 return grp;
3959 }
3960
add_int_with_order(ValNodePtr PNTR head,Uint1 type,Int4 pos)3961 static void add_int_with_order(ValNodePtr PNTR head, Uint1 type, Int4 pos)
3962 {
3963 ValNodePtr prev, curr;
3964 ValNodePtr cnew;
3965
3966 if(*head == NULL)
3967 ValNodeAddInt(head, type, pos);
3968 else
3969 {
3970 prev = NULL;
3971 curr = *head;
3972 cnew = ValNodeNew(NULL);
3973 cnew->choice = type;
3974 cnew->data.intvalue = pos;
3975 while(curr)
3976 {
3977 if(pos < curr->data.intvalue)
3978 {
3979 if(prev == NULL)
3980 *head = cnew;
3981 else
3982 prev->next = cnew;
3983 cnew->next = curr;
3984 return;
3985 }
3986
3987 prev = curr;
3988 curr = curr->next;
3989 }
3990 if(prev != NULL)
3991 prev->next = cnew;
3992 }
3993 }
3994
3995
3996
load_open_close_sts_mark(SeqAlignPtr halign,ValNodePtr anp_list,Uint1 annot_type,SeqLocPtr m_loc,Int4 m_left)3997 static void load_open_close_sts_mark(SeqAlignPtr halign, ValNodePtr anp_list, Uint1 annot_type, SeqLocPtr m_loc, Int4 m_left)
3998 {
3999 SeqAlignPtr align;
4000 AlignNodePtr anp;
4001 AlignSegPtr asp;
4002 SeqIdPtr sip;
4003 SeqLocPtr slp;
4004 GatherRange gr;
4005 GatherRangePtr grp;
4006 Int4 start, stop;
4007 Uint1 strand;
4008 SeqInt sint;
4009 SeqLoc sl;
4010 Boolean collected = FALSE;
4011 StdSegPtr ssp;
4012 Int4 e_left, e_right;
4013 Int2 i;
4014
4015 grp = create_gr_data(halign, m_loc, m_left);
4016 if(grp == NULL)
4017 return;
4018 while(anp_list)
4019 {
4020 if(anp_list->choice != OBJ_SEQANNOT)
4021 {
4022 anp = anp_list->data.ptrvalue;
4023 sip = anp->sip;
4024 if(!SeqIdForSameBioseq(sip, SeqLocId(m_loc)))
4025 {
4026 if(anp->seqpos < 0) /*minus strand*/
4027 {
4028 stop = ABS(anp->seqpos);
4029 start = stop - (anp->extremes.right - anp->extremes.left);
4030 strand = Seq_strand_minus;
4031 }
4032 else
4033 {
4034 start = anp->seqpos;
4035 stop = start + (anp->extremes.right - anp->extremes.left);
4036 strand = Seq_strand_plus;
4037 }
4038
4039 sint.from = start;
4040 sint.to = stop;
4041 sint.strand = strand;
4042 sint.id = sip;
4043 sl.choice = SEQLOC_INT;
4044 sl.data.ptrvalue = &sint;
4045 sl.next = NULL;
4046
4047 e_left = anp->extremes.left;
4048 e_right = anp->extremes.right;
4049 asp = anp->segs;
4050 for(align = halign, i=0; align != NULL; align = align->next, ++i)
4051 {
4052 if(grp[i].left != -1)
4053 {
4054 if(!(e_left > grp[i].right || e_right < grp[i].left))
4055 {
4056 for(ssp = align->segs; ssp != NULL; ssp = ssp->next)
4057 {
4058 /*slp = ssp->loc->next;*/
4059 slp = ssp->loc;
4060 while(slp)
4061 {
4062 if(SeqIdMatch(SeqLocId(slp), sip))
4063 break;
4064 else
4065 slp = slp->next;
4066 }
4067
4068 if(slp != NULL)
4069 {
4070 if(SeqLocOffset (&sl, slp, &gr, e_left))
4071 {
4072 if(asp == NULL)
4073 {
4074 asp = MemNew(sizeof(AlignSeg));
4075 MemCopy(&(asp->gr), &(anp->extremes), sizeof(GatherRange));
4076 asp->type = REG_SEG;
4077 anp->segs = asp;
4078 }
4079
4080 /*ValNodeAddInt(&(asp->mismatch), annot_type, gr.left);*/
4081 add_int_with_order(&(asp->mismatch), annot_type, gr.left);
4082 collected = TRUE;
4083 }
4084 }
4085
4086 }
4087 }
4088 /*if(grp[i].left> e_right)
4089 break;*/
4090 }
4091 }
4092 }
4093 }
4094 anp_list = anp_list->next;
4095 }
4096 MemFree(grp);
4097
4098 }
4099
4100
4101 typedef struct segmap_data{
4102 ValNodePtr anp_list;
4103 SeqLocPtr m_loc;
4104 Int4 left;
4105 }SegMapData, PNTR SegMapDataPtr;
4106
coll_segmap_func(GatherContextPtr gcp)4107 static Boolean coll_segmap_func(GatherContextPtr gcp)
4108 {
4109 ValNodePtr anp_list;
4110 SeqAnnotPtr annot;
4111 Uint1 annot_type;
4112 SeqAlignPtr align;
4113 SegMapDataPtr smdp;
4114
4115 smdp = (SegMapDataPtr)(gcp->userdata);
4116 if(smdp == NULL || smdp->anp_list == NULL || smdp->m_loc == NULL)
4117 return FALSE;
4118 anp_list = smdp->anp_list;
4119
4120 annot = (SeqAnnotPtr)(gcp->thisitem);
4121 if(annot == NULL || annot->type != 2)
4122 return TRUE;
4123 annot_type = is_segmap_align_annot(annot);
4124 if(annot_type == 0)
4125 return TRUE;
4126 align = annot->data;
4127 load_open_close_sts_mark(align, anp_list, annot_type, smdp->m_loc, smdp->left);
4128
4129 return TRUE;
4130 }
4131
4132
4133 /*******************************************************************
4134 *
4135 * void CollectSegMapSTSAlign( entityID, anp_list)
4136 * look for the sts alignment from segmap stored as Seq-annot in
4137 * in entityID. Add the alignment as the mismatch marker in the
4138 * AlignSeg of the anp_list
4139 *
4140 *******************************************************************/
CollectSegMapSTSAlign(Uint2 entityID,ValNodePtr anp_list,SeqLocPtr m_loc,Int4 m_left)4141 NLM_EXTERN void CollectSegMapSTSAlign( Uint2 entityID, ValNodePtr anp_list, SeqLocPtr m_loc, Int4 m_left)
4142 {
4143 GatherScope gs;
4144 SegMapData smd;
4145
4146 if(entityID == 0 || anp_list == NULL)
4147 return;
4148
4149 MemSet((Pointer)&gs, 0, sizeof (GatherScope));
4150 MemSet((Pointer)(gs.ignore), (int)TRUE, (size_t)(OBJ_MAX)*sizeof(Boolean));
4151 gs.ignore[OBJ_SEQANNOT] = FALSE;
4152
4153 smd.anp_list = anp_list;
4154 smd.m_loc = m_loc;
4155 smd.left = m_left;
4156 GatherEntity(entityID, (Pointer)(&smd), coll_segmap_func, &gs);
4157 }
4158
4159
4160 /*#####################################################################
4161 #
4162 # functions related to the layout for FeatNode
4163 #
4164 ######################################################################*/
4165
4166
4167
4168 /***********************************************************************
4169 * SortFeatNode(list)
4170 * HeapSort the FeatNode list to the accending order of fnp->left
4171 * return the head of the sorted list
4172 *
4173 ************************************************************************/
4174 static Uint1Ptr featureSortOrder;
4175 static Uint1Ptr groupSortOrder;
4176
FeatNodeIntervalCompare(ValNodePtr vnp1,ValNodePtr vnp2)4177 static int FeatNodeIntervalCompare (ValNodePtr vnp1, ValNodePtr vnp2)
4178
4179 {
4180 GatherRangePtr grp1, grp2;
4181
4182 while (vnp1 != NULL && vnp2 != NULL) {
4183 grp1 = (GatherRangePtr) vnp1->data.ptrvalue;
4184 grp2 = (GatherRangePtr) vnp2->data.ptrvalue;
4185 if (grp1 != NULL && grp2 != NULL) {
4186 /*
4187 if (grp1->left > grp2->left) {
4188 return 1;
4189 } else if (grp1->left < grp2->left) {
4190 return -1;
4191 } else if (grp1->right > grp2->right) {
4192 return 1;
4193 } else if (grp1->right < grp2->right) {
4194 return -1;
4195 }
4196 */
4197 if (grp1->left > grp2->left) {
4198 return -1;
4199 } else if (grp1->left < grp2->left) {
4200 return 1;
4201 } else if (grp1->right > grp2->right) {
4202 return -1;
4203 } else if (grp1->right < grp2->right) {
4204 return 1;
4205 }
4206 }
4207 vnp1 = vnp1->next;
4208 vnp2 = vnp2->next;
4209 }
4210 if (vnp1 != NULL) {
4211 return -1;
4212 } else if (vnp2 != NULL) {
4213 return 1;
4214 } else {
4215 return 0;
4216 }
4217 }
4218
FeatNodeCompProc(VoidPtr ptr1,VoidPtr ptr2)4219 static int LIBCALLBACK FeatNodeCompProc (VoidPtr ptr1, VoidPtr ptr2)
4220 {
4221 FeatNodePtr fnp1;
4222 FeatNodePtr fnp2;
4223 ValNodePtr vnp1;
4224 ValNodePtr vnp2;
4225 GatherRange gr1, gr2;
4226 Uint1 group1, group2;
4227 Uint1 order1, order2;
4228 int rsult;
4229
4230 if (ptr1 != NULL && ptr2 != NULL) {
4231 vnp1 = *((ValNodePtr PNTR) ptr1);
4232 vnp2 = *((ValNodePtr PNTR) ptr2);
4233 if (vnp1 != NULL && vnp2 != NULL) {
4234 fnp1 = (FeatNodePtr) vnp1->data.ptrvalue;
4235 fnp2 = (FeatNodePtr) vnp2->data.ptrvalue;
4236 if (fnp1 != NULL && fnp2 != NULL) {
4237 gr1 = fnp1->extremes;
4238 gr2 = fnp2->extremes;
4239 /*
4240 if (gr1.left > gr2.left) {
4241 return 1;
4242 } else if (gr1.left < gr2.left) {
4243 return -1;
4244 } else if (gr1.right > gr2.right) {
4245 return 1;
4246 } else if (gr1.right < gr2.right) {
4247 return -1;
4248 } else */
4249 if ((rsult = FeatNodeIntervalCompare (fnp1->interval, fnp2->interval)) != 0) {
4250 return rsult;
4251 } else {
4252 if(featureSortOrder == NULL || groupSortOrder == NULL)
4253 return 0;
4254 else
4255 {
4256 group1 = groupSortOrder[fnp1->feattype];
4257 group2 = groupSortOrder[fnp2->feattype];
4258 if(group1 !=group2)
4259 return -1;
4260 order1 = featureSortOrder[fnp1->feattype];
4261 order2 = featureSortOrder[fnp2->feattype];
4262 if(order1 < order2)
4263 return -1;
4264 if(order1 > order2)
4265 return 1;
4266 return 0;
4267 }
4268 }
4269 } else {
4270 return 0;
4271 }
4272 } else {
4273 return 0;
4274 }
4275 } else {
4276 return 0;
4277 }
4278 }
4279
4280
4281
4282
4283 /***********************************************************************
4284 *
4285 * SortFeatNode(list)
4286 * sort a list of FeatNode to the ascending order of (extremes.left,
4287 * extremes.right)
4288 *
4289 **********************************************************************/
4290
SortFeatNode(ValNodePtr fnp_list,Uint1Ptr featureOrder,Uint1Ptr groupOrder)4291 NLM_EXTERN ValNodePtr SortFeatNode(ValNodePtr fnp_list, Uint1Ptr featureOrder, Uint1Ptr groupOrder)
4292 {
4293 featureSortOrder = featureOrder;
4294 groupSortOrder = groupOrder;
4295 return SortValNode(fnp_list, FeatNodeCompProc);
4296 }
4297
4298
AlignNodeCompProc(VoidPtr ptr1,VoidPtr ptr2)4299 static int LIBCALLBACK AlignNodeCompProc (VoidPtr ptr1, VoidPtr ptr2)
4300 {
4301 AlignNodePtr anp1, anp2;
4302 ValNodePtr vnp1;
4303 ValNodePtr vnp2;
4304 GatherRange gr1, gr2;
4305
4306 if (ptr1 != NULL && ptr2 != NULL) {
4307 vnp1 = *((ValNodePtr PNTR) ptr1);
4308 vnp2 = *((ValNodePtr PNTR) ptr2);
4309 if (vnp1 != NULL && vnp2 != NULL) {
4310 anp1 = (AlignNodePtr) vnp1->data.ptrvalue;
4311 anp2 = (AlignNodePtr) vnp2->data.ptrvalue;
4312 if (anp1 != NULL && anp2 != NULL) {
4313 gr1 = anp1->extremes;
4314 gr2 = anp2->extremes;
4315 /* len1 = anp1->extremes.right - anp1->extremes.left;
4316 len2 = anp2->extremes.right - anp2->extremes.left;
4317 if(len1 > len2)
4318 return -1;
4319 else if(len1 < len2)
4320 return 1; */
4321
4322 if (gr1.left > gr2.left) {
4323 return 1;
4324 } else if (gr1.left < gr2.left) {
4325 return -1;
4326 } else if (gr1.right < gr2.right) {
4327 return 1;
4328 } else if (gr1.right > gr2.right) {
4329 return -1;
4330 } else {
4331 return 0;
4332 }
4333 } else {
4334 return 0;
4335 }
4336 } else {
4337 return 0;
4338 }
4339 } else {
4340 return 0;
4341 }
4342 }
4343
4344 /*sort to make the display showing the alignment that are
4345 * consistuent of the master sequence first
4346 */
modify_align_node_block(ValNodePtr anp_list)4347 static ValNodePtr modify_align_node_block(ValNodePtr anp_list)
4348 {
4349 ValNodePtr block_list = NULL;
4350 ValNodePtr curr, next, prev = NULL;
4351 AlignNodePtr anp;
4352
4353 if(anp_list == NULL)
4354 return NULL;
4355
4356 curr = anp_list;
4357 while(curr)
4358 {
4359 next = curr->next;
4360 anp = curr->data.ptrvalue;
4361 if(anp->blocks != NULL)
4362 {
4363 if(prev == NULL)
4364 anp_list = curr->next;
4365 else
4366 prev->next = curr->next;
4367 curr->next = NULL;
4368 ValNodeLink(&block_list, curr);
4369 }
4370 else
4371 prev = curr;
4372 curr = next;
4373 }
4374
4375 if(block_list == NULL)
4376 return anp_list;
4377 else
4378 {
4379 ValNodeLink(&block_list, anp_list);
4380 return block_list;
4381 }
4382 }
4383
4384
4385
4386 /***********************************************************************
4387 *
4388 * SortAlignNode(anp_list)
4389 * sort a list of AlignNode to the ascending order of (extremes.left,
4390 * extremes.right)
4391 *
4392 **********************************************************************/
SortAlignNode(ValNodePtr anp_list)4393 NLM_EXTERN ValNodePtr SortAlignNode(ValNodePtr anp_list)
4394 {
4395 ValNodePtr list, curr, prev, last, next;
4396 ValNodePtr head;
4397
4398 if(anp_list == NULL)
4399 return NULL;
4400 list = anp_list;
4401 prev = NULL;
4402 head = NULL;
4403 while(list != NULL)
4404 {
4405 if(prev != NULL)
4406 prev->next = list;
4407 while(list && list->choice == OBJ_SEQANNOT)
4408 {
4409 if(head == NULL)
4410 head = list;
4411 prev = list;
4412 list = list->next;
4413 }
4414 if(list != NULL)
4415 {
4416 curr = list;
4417 last = NULL;
4418 while(curr && curr->choice != OBJ_SEQANNOT)
4419 {
4420 last = curr;
4421 curr = curr->next;
4422 }
4423 next = last->next;
4424 last->next = NULL;
4425 list = SortValNode(list, AlignNodeCompProc);
4426 list = modify_align_node_block(list);
4427 if(prev == NULL)
4428 head = list;
4429 else
4430 prev->next = list;
4431 while(list->next != NULL)
4432 list = list->next;
4433 prev = list;
4434 list = next;
4435 }
4436 }
4437
4438 return head;
4439 }
4440
4441
4442
4443 /*#######################################################################
4444 #
4445 # function related to Layout of AlignNode
4446 #
4447 ########################################################################*/
4448
4449
4450 /***********************************************************************
4451 *
4452 * find_insert_ypos(left, seglen, ins, l_bound, r_bound, p_pos, space
4453 * num)
4454 * find the level for placing the insertions. Used in both the layout
4455 * for text and graphic
4456 * left: to store the left-most position calculated for an insertion
4457 * seglen: length of the insertion
4458 * ins: the position for insertions
4459 * l_bound: the leftmost position in the current line
4460 * r_bound: the rightmost position in the current line
4461 * p_pos: position for storing all the layout info
4462 * num: number of elements in p_pos
4463 * return the current level found for an insertion
4464 *
4465 ***********************************************************************/
find_insert_ypos(Int4Ptr left,Int4 seglen,Int4 ins,Int4 l_bound,Int4 r_bound,Int4Ptr p_pos,Int4 space,Int2 num)4466 NLM_EXTERN Int2 find_insert_ypos(Int4Ptr left, Int4 seglen, Int4 ins, Int4 l_bound, Int4 r_bound, Int4Ptr p_pos, Int4 space, Int2 num)
4467 {
4468 Int2 i =0;
4469 Int4 start, stop;
4470
4471 --seglen;
4472 *left = MAX(l_bound, (ins-seglen));
4473 start = *left;
4474 for(i =0; i<num; ++i)
4475 {
4476 if(p_pos[i] == 0)
4477 {
4478 p_pos[i] = (*left + seglen);
4479 return i;
4480 }
4481
4482 if(ins > (p_pos[i]+space))
4483 {
4484 start = (*left);
4485 start +=MAX(0, (seglen - (ins - (p_pos[i]+space))));
4486 stop = start+seglen;
4487 if(stop <= r_bound)
4488 {
4489 *left = start;
4490 p_pos[i] = (*left) + seglen;
4491 return i;
4492 }
4493 }
4494
4495
4496 }
4497
4498 return -1;
4499 }
4500
4501
4502
4503 /************************************************************************
4504 *
4505 * convert_gdata_for_featnode(gdata, cyto_loc, offset)
4506 * gdata: the GeneDataPtr
4507 * cyto_loc: the current location on the cytogenetic map
4508 * offset: the offset of cyto_loc to the graphic viewer1
4509 * for human cytogenetic map, the markers are not shown. But for
4510 * the markers that were queried, it will display the interval for
4511 * gene data
4512 *
4513 ************************************************************************/
4514
convert_gdata_to_featnode(GeneDataPtr gdata,SeqLocPtr cyto_loc,Int4 offset)4515 NLM_EXTERN ValNodePtr convert_gdata_to_featnode (GeneDataPtr gdata, SeqLocPtr cyto_loc, Int4 offset)
4516 {
4517 ValNodePtr fnp_node;
4518 ValNodePtr prev;
4519 FeatNodePtr fnp;
4520 SeqLocPtr slp;
4521 SeqPntPtr spp;
4522 Boolean mod_fuzz; /*for the old style of FeatNode. To modify the
4523 fuzziness on a point*/
4524 IntFuzzPtr fuzz;
4525 GatherRange gr;
4526 SeqFeatPtr sfp;
4527
4528
4529 if(gdata == NULL || cyto_loc == NULL)
4530 return NULL;
4531 fnp_node = NULL;
4532 prev = NULL;
4533 while(gdata)
4534 {
4535 slp = NULL;
4536 sfp = gdata->sfp;
4537 if(sfp != NULL && sfp->location != NULL)
4538 {
4539 mod_fuzz = FALSE;
4540 if(sfp->location->choice == SEQLOC_PNT)
4541 {
4542 spp = sfp->location->data.ptrvalue;
4543 if(spp->fuzz != NULL)
4544 {
4545 fuzz = spp->fuzz;
4546 if(fuzz->choice == 2) /*range */
4547 {
4548 mod_fuzz = TRUE;
4549 slp = SeqLocIntNew(fuzz->b, fuzz->a, 0, spp->id);
4550 }
4551 }
4552 }
4553 if(!mod_fuzz)
4554 slp = sfp->location;
4555 if(SeqLocOffset(cyto_loc, slp, &gr, offset))
4556 {
4557 fnp = CreateFeatNode (&fnp_node, &prev, gdata->itemType, gdata->itemID, gdata->entityID, gdata->subtype);
4558 MemCopy(&(fnp->extremes), &gr, sizeof(GatherRange));
4559 fnp->label = StringSave(gdata->symbol);
4560 fnp->landmark = TRUE;
4561 if(gdata->sfp != NULL)
4562 {
4563 fnp->extra_data = ck_seqfeat_extra(gdata->sfp);
4564 get_mapmarker_info(gdata->sfp->ext, &(fnp->extra_data), &(fnp->bin_order));
4565 }
4566 }
4567
4568
4569 if(mod_fuzz)
4570 SeqLocFree(slp);
4571 }
4572 gdata = gdata->next;
4573 }
4574
4575 return fnp_node;
4576 }
4577
4578
4579 /*
4580 * for AlignNode that includes insertions, map the insertion
4581 * to gaps on the master sequence
4582 */
4583
4584 /*the data structure for storing the insertion information*/
4585 typedef struct insert_list {
4586 Int4 max_size;
4587 Int4 master_pos; /*position on the master sequence*/
4588 Boolean used; /* this position is acturally at an inserted segment*/
4589 Boolean after; /*does the insertion occurs after the master_pos*/
4590 struct insert_list PNTR next;
4591 }InsertList, PNTR InsertListPtr;
4592
4593
load_insertion_list(InsertListPtr PNTR head,Int4 insert_pos,Int4 insert_size,Boolean after)4594 static void load_insertion_list(InsertListPtr PNTR head, Int4 insert_pos, Int4 insert_size, Boolean after)
4595 {
4596 InsertListPtr curr, prev, ilp;
4597
4598 prev = NULL;
4599 curr = *head;
4600 while(curr)
4601 {
4602 if(curr->master_pos == insert_pos)
4603 {
4604 curr->max_size = MAX(curr->max_size, insert_size);
4605 return;
4606 }
4607 if(curr->master_pos > insert_pos)
4608 break;
4609 else
4610 prev = curr;
4611 curr = curr->next;
4612 }
4613
4614 ilp = MemNew(sizeof(InsertList));
4615 ilp->max_size = insert_size;
4616 ilp->master_pos = insert_pos;
4617 ilp->next = curr;
4618 ilp->after = after;
4619
4620 if(prev == NULL)
4621 *head = ilp;
4622 else
4623 prev->next = ilp;
4624 }
4625
add_offset_to_featnode(ValNodePtr fnp_node,Int4 offset)4626 static void add_offset_to_featnode(ValNodePtr fnp_node, Int4 offset)
4627 {
4628 FeatNodePtr fnp;
4629 ValNodePtr interval;
4630 IvalNodePtr inp;
4631
4632 while(fnp_node)
4633 {
4634 fnp = fnp_node->data.ptrvalue;
4635 fnp->extremes.left += offset;
4636 fnp->extremes.right += offset;
4637
4638 for (interval = fnp->interval; interval != NULL; interval = interval->next)
4639 {
4640 inp = interval->data.ptrvalue;
4641 inp->gr.left += offset;
4642 inp->gr.right += offset;
4643 }
4644 fnp_node = fnp_node->next;
4645 }
4646 }
4647
AddOffsetToAlignNode(AlignNodePtr anp,Int4 offset)4648 NLM_EXTERN void AddOffsetToAlignNode(AlignNodePtr anp, Int4 offset)
4649 {
4650 AlignSegPtr asp;
4651 AlignBlockPtr abp;
4652
4653 anp->extremes.left += offset;
4654 anp->extremes.right += offset;
4655 for(abp = anp->blocks; abp != NULL; abp = abp->next)
4656 {
4657 abp->gr.left += offset;
4658 abp->gr.right += offset;
4659 }
4660
4661 for(asp = anp->segs; asp != NULL; asp = asp->next)
4662 {
4663 if(asp->type == INS_SEG)
4664 {
4665 asp->ins_pos += offset;
4666 asp->gr.left += offset;
4667 }
4668 else
4669 {
4670 asp->gr.left += offset;
4671 asp->gr.right += offset;
4672 }
4673 if(asp->cnp != NULL)
4674 add_offset_to_featnode(asp->cnp, offset);
4675 }
4676 }
4677
split_feature_interval(ValNodePtr PNTR p_interval,Int4 offset,Int4 ins_pos,Int4 ins_size)4678 static ValNodePtr split_feature_interval(ValNodePtr PNTR p_interval, Int4 offset,
4679 Int4 ins_pos, Int4 ins_size)
4680 {
4681 ValNodePtr interval, next, prev;
4682 ValNodePtr second_list = NULL;
4683 IvalNodePtr inp, new_inp;
4684
4685
4686 prev = NULL;
4687 interval = *p_interval;
4688 while(interval != NULL)
4689 {
4690 next = interval->next;
4691 inp = interval->data.ptrvalue;
4692 if(inp->gr.right <= ins_pos)
4693 {
4694 inp->gr.left += offset;
4695 inp->gr.right += offset;
4696 prev = interval;
4697 }
4698 else if(inp->gr.left > ins_pos)
4699 {
4700 if(prev == NULL)
4701 *p_interval = NULL;
4702 else
4703 prev->next = NULL;
4704 return interval;
4705 }
4706 else
4707 { /*there is overlap */
4708 new_inp = MemNew(sizeof(IvalNode));
4709 new_inp->gr.strand = inp->gr.strand;
4710 /* new_inp->gr.right = inp->gr.right + offset + ins_size;
4711 new_inp->gr.left = ins_pos + offset + ins_size; */
4712 new_inp->gr.right = inp->gr.right;
4713 new_inp->gr.left = ins_pos +1;
4714
4715 inp->gr.left += offset;
4716 inp->gr.right = ins_pos + offset;
4717 interval->next = NULL;
4718
4719 ValNodeAddPointer(&second_list, 0, new_inp);
4720 ValNodeLink(&second_list, next);
4721 return second_list;
4722 }
4723 interval = next;
4724 }
4725
4726 return NULL;
4727 }
4728
add_insertion_to_featnode(ValNodePtr PNTR pfnp_node,Int4 offset,Int4 ins_pos,Int4 ins_size)4729 static ValNodePtr add_insertion_to_featnode(ValNodePtr PNTR pfnp_node, Int4 offset,
4730 Int4 ins_pos, Int4 ins_size)
4731 {
4732 ValNodePtr fnp_node, next, prev;
4733 ValNodePtr second_list;
4734 FeatNodePtr fnp, new_fnp;
4735 ValNodePtr interval;
4736 IvalNodePtr inp;
4737
4738 fnp_node = *pfnp_node;
4739 second_list = NULL;
4740 prev = NULL;
4741 while(fnp_node)
4742 {
4743 next = fnp_node->next;
4744 fnp = fnp_node->data.ptrvalue;
4745 if(fnp->extremes.right <= ins_pos)
4746 {
4747 for (interval = fnp->interval; interval != NULL; interval = interval->next)
4748 {
4749 inp = interval->data.ptrvalue;
4750 inp->gr.left += offset;
4751 inp->gr.right += offset;
4752 }
4753 fnp->extremes.left += offset;
4754 fnp->extremes.right += offset;
4755 prev = fnp_node;
4756 }
4757 else if(fnp->extremes.left > ins_pos)
4758 {
4759 /* fnp->extremes.left += offset + ins_size;
4760 fnp->extremes.right += offset + ins_size;
4761
4762 for (interval = fnp->interval; interval != NULL; interval = interval->next)
4763 {
4764 inp = interval->data.ptrvalue;
4765 inp->gr.left += offset + ins_size;
4766 inp->gr.right += offset + ins_size;
4767 } */
4768
4769 fnp_node->next = NULL;
4770 ValNodeLink(&second_list, fnp_node);
4771 if(prev == NULL)
4772 *pfnp_node = next;
4773 else
4774 prev->next = next;
4775 }
4776 else /*resides between the insertion points, needs to split the featnode*/
4777 {
4778 new_fnp = MemNew(sizeof(FeatNode));
4779 MemCopy((Pointer)new_fnp, fnp, sizeof(FeatNode));
4780 if(fnp->label != NULL)
4781 new_fnp->label = StringSave (fnp->label);
4782 if(fnp->pos_label != NULL)
4783 new_fnp->pos_label = StringSave (fnp->pos_label);
4784 if(fnp->annotDB[0] != '\0')
4785 StringCpy(new_fnp->annotDB, fnp->annotDB);
4786 /* new_fnp->extremes.right = fnp->extremes.right + offset + ins_size;
4787 new_fnp->extremes.left = ins_pos + ins_size + offset; */
4788 new_fnp->extremes.right = fnp->extremes.right;
4789 new_fnp->extremes.left = ins_pos + 1;
4790 new_fnp->extremes.strand = fnp->extremes.strand;
4791
4792 ValNodeAddPointer(&second_list, fnp_node->choice, new_fnp);
4793
4794 fnp->extremes.left += offset;
4795 fnp->extremes.right = ins_pos + offset;
4796
4797 new_fnp->interval = split_feature_interval(&(fnp->interval), offset,
4798 ins_pos, ins_size);
4799
4800 prev = fnp_node;
4801 }
4802 fnp_node = next;
4803 }
4804
4805 return second_list;
4806 }
4807
4808
find_insertion_size(InsertListPtr ilp,Int4Ptr ins_pos)4809 static Int4 find_insertion_size (InsertListPtr ilp, Int4Ptr ins_pos)
4810 {
4811 while(ilp)
4812 {
4813 if(ilp->master_pos == *ins_pos)
4814 {
4815 if(ilp->after == FALSE)
4816 -- (*ins_pos);
4817 return ilp->max_size;
4818 }
4819 ilp = ilp->next;
4820 }
4821
4822 return 0;
4823 }
get_max_insert_size(InsertListPtr ilp,Int4 from,Int4 to,Int4Ptr insert_pos)4824 static Int4 get_max_insert_size (InsertListPtr ilp, Int4 from, Int4 to, Int4Ptr insert_pos)
4825 {
4826 Int4 t_from, t_to;
4827
4828 while(ilp)
4829 {
4830 if(ilp->used == FALSE) /*it is not used by insertion and mapping*/
4831 {
4832 t_from = from;
4833 t_to = to;
4834 if(ilp->after == FALSE)
4835 {
4836 t_from +=1;
4837 t_to += 1;
4838 }
4839 if(ilp->master_pos >= from && ilp->master_pos <= to)
4840 {
4841 *insert_pos = ilp->master_pos;
4842 if(ilp->after == FALSE)
4843 --(*insert_pos);
4844 ilp->used = TRUE;
4845 return ilp->max_size;
4846 }
4847 }
4848 else if(ilp->master_pos > to)
4849 return -1;
4850 ilp = ilp->next;
4851 }
4852
4853 return -1;
4854 }
4855
get_max_gap_size(InsertListPtr ilp,Int4 from,Int4 to)4856 static Int4 get_max_gap_size(InsertListPtr ilp, Int4 from, Int4 to)
4857 {
4858 Int4 max_size = 0;
4859
4860 while(ilp)
4861 {
4862 if(ilp->used == FALSE)
4863 {
4864 if(ilp->master_pos >= from && ilp->master_pos <= to)
4865 {
4866 ilp->used = TRUE;
4867 max_size += ilp->max_size;
4868 }
4869 }
4870 if(ilp->master_pos > to)
4871 return max_size;
4872
4873 ilp = ilp->next;
4874 }
4875 return max_size;
4876 }
4877
4878
add_offset_to_mismatch(ValNodePtr PNTR mismatch,Int4 offset,Int4 ins_pos,Int4 ins_size)4879 static ValNodePtr add_offset_to_mismatch(ValNodePtr PNTR mismatch, Int4 offset, Int4 ins_pos, Int4 ins_size)
4880 {
4881 ValNodePtr second_list, prev, curr;
4882
4883 second_list = NULL;
4884 curr = *mismatch;
4885 prev = NULL;
4886 while(curr)
4887 {
4888 if(curr->data.intvalue <= ins_pos || ins_pos == -1)
4889 curr->data.intvalue += offset;
4890 else
4891 {
4892 ValNodeLink(&second_list, curr);
4893 if(prev == NULL)
4894 *mismatch = NULL;
4895 else
4896 prev->next = NULL;
4897 return second_list;
4898
4899 }
4900 prev = curr;
4901 curr = curr->next;
4902 }
4903 return second_list;
4904 }
4905
reset_insertion_list(InsertListPtr ilp,AlignSegPtr asp)4906 static void reset_insertion_list(InsertListPtr ilp, AlignSegPtr asp)
4907 {
4908 AlignSegPtr curr;
4909
4910 while(ilp)
4911 {
4912 ilp->used = FALSE;
4913 for(curr = asp; curr != NULL; curr = curr->next)
4914 {
4915 if(curr->ins_pos == ilp->master_pos)
4916 {
4917 ilp->used = TRUE;
4918 break;
4919 }
4920 }
4921
4922 ilp = ilp->next;
4923 }
4924 }
4925
refresh_insertion_list(InsertListPtr ilp)4926 static void refresh_insertion_list(InsertListPtr ilp)
4927 {
4928 while(ilp)
4929 {
4930 ilp->used = FALSE;
4931 ilp = ilp->next;
4932 }
4933 }
4934
get_offset_of_insertion(InsertListPtr ilp,Int4 left)4935 static Int4 get_offset_of_insertion(InsertListPtr ilp, Int4 left)
4936 {
4937 Int4 offset = 0;
4938
4939 while(ilp)
4940 {
4941 if(ilp->master_pos >= left)
4942 return offset;
4943 else
4944 offset += ilp->max_size;
4945 ilp = ilp->next;
4946 }
4947
4948 return offset;
4949 }
4950
modify_anp_with_insertion(AlignNodePtr anp,InsertListPtr ilp)4951 static void modify_anp_with_insertion(AlignNodePtr anp, InsertListPtr ilp)
4952 {
4953 Int4 offset;
4954 Int4 leftover;
4955 AlignSegPtr asp, next, new_asp, t_asp, prev;
4956 Int4 max_insert_size;
4957 Int4 insert_pos;
4958 AlignBlockPtr abp, new_abp, next_abp;
4959 ValNodePtr second_ms_list;
4960
4961
4962 asp = anp->segs;
4963 prev = NULL;
4964 reset_insertion_list(ilp, asp);
4965 offset = get_offset_of_insertion(ilp, anp->extremes.left);
4966 anp->extremes.left += offset;
4967 while(asp)
4968 {
4969 next = asp->next;
4970 if(asp->type == INS_SEG)
4971 {
4972 max_insert_size = find_insertion_size (ilp, &(asp->ins_pos));
4973 if(max_insert_size >0)
4974 {
4975 leftover = max_insert_size - asp->gr.right; /*gr.right is the size of the insertion*/
4976 /*convert the insertion into a REG_SEG */
4977 asp->gr.left = asp->ins_pos + offset + 1; /*insert after */
4978 asp->gr.right += (asp->gr.left -1);
4979 asp->type = REG_SEG;
4980 add_offset_to_featnode(asp->cnp, offset +1);
4981
4982 /*insert the additional one for gaps*/
4983 if(leftover > 0)
4984 {
4985 new_asp = MemNew(sizeof(AlignSeg));
4986 new_asp->type = GAP_SEG;
4987 new_asp->gr.left = asp->gr.right + 1;
4988 new_asp->gr.right = new_asp->gr.left + leftover -1;
4989 asp->next = new_asp;
4990 new_asp->next = next;
4991 }
4992 offset += max_insert_size;
4993 }
4994 prev = asp;
4995 }
4996 else if(asp->type == GAP_SEG)
4997 { /*a gap */
4998 max_insert_size = get_max_gap_size(ilp, asp->gr.left, asp->gr.right);
4999 asp->gr.left += offset;
5000 asp->gr.right += max_insert_size + offset;
5001 offset += max_insert_size;
5002 prev = asp;
5003 }
5004 else if(asp->type == REG_SEG)
5005 { /* a diagnol */
5006 while( asp != NULL && (max_insert_size =
5007 get_max_insert_size (ilp, asp->gr.left,
5008 asp->gr.right, &insert_pos)) >0)
5009 {
5010 /*insertion at the very begining */
5011 if(insert_pos == -1)
5012 {
5013 new_asp = MemNew(sizeof(AlignSeg));
5014 new_asp->type = GAP_SEG;
5015 new_asp->gr.left = asp->gr.left;
5016 new_asp->gr.right = asp->gr.left + max_insert_size -1;
5017 if(prev == NULL)
5018 anp->segs = new_asp;
5019 else
5020 prev->next = new_asp;
5021 prev = new_asp;
5022 new_asp->next = asp;
5023 }
5024 else
5025 {
5026 if(asp->mismatch != NULL)
5027 second_ms_list = add_offset_to_mismatch(&(asp->mismatch), offset,
5028 insert_pos, max_insert_size);
5029 else
5030 second_ms_list = NULL;
5031
5032 leftover = asp->gr.right - insert_pos;
5033 asp->gr.left += offset;
5034 asp->gr.right = insert_pos + offset;
5035
5036 new_asp = MemNew(sizeof(AlignSeg));
5037 new_asp->type = GAP_SEG;
5038 new_asp->gr.left = asp->gr.right + 1;
5039 new_asp->gr.right = insert_pos + offset + max_insert_size;
5040 new_asp->next = next;
5041 t_asp = asp;
5042 asp->next = new_asp;
5043 asp = new_asp;
5044
5045 if(leftover > 0)
5046 {
5047 new_asp = MemNew(sizeof(AlignSeg));
5048 new_asp->type = REG_SEG;
5049 new_asp->gr.left = insert_pos +1;
5050 new_asp->gr.right = insert_pos + leftover;
5051 asp->next = new_asp;
5052 new_asp->next = next;
5053 new_asp->cnp = add_insertion_to_featnode(&(t_asp->cnp), offset,
5054 insert_pos, max_insert_size);
5055 new_asp->mismatch = second_ms_list;
5056 new_asp->next = next;
5057 asp->next = new_asp;
5058 asp = new_asp;
5059 prev = new_asp;
5060 }
5061 else
5062 {
5063 add_offset_to_featnode(asp->cnp, offset);
5064 prev = asp;
5065 asp = NULL;
5066 break;
5067 }
5068 }
5069
5070 offset += max_insert_size;
5071 } /*end of while*/
5072 if(asp != NULL)
5073 {
5074
5075 asp->gr.left += offset;
5076 asp->gr.right += offset;
5077 if(asp->cnp)
5078 add_offset_to_featnode(asp->cnp, offset);
5079 if(asp->mismatch != NULL)
5080 add_offset_to_mismatch(&(asp->mismatch), offset, -1, -1);
5081 prev = asp;
5082 }
5083 }
5084 asp = next;
5085 }
5086 anp->extremes.right += offset;
5087
5088 if(offset > 0 && anp->blocks != NULL)
5089 {
5090 refresh_insertion_list(ilp);
5091 abp = anp->blocks;
5092 while(abp)
5093 {
5094 next_abp = abp->next;
5095 abp->next = NULL;
5096 offset = get_offset_of_insertion(ilp, abp->gr.left);
5097 while( abp && (max_insert_size = get_max_insert_size (ilp,
5098 abp->gr.left, abp->gr.right, &insert_pos)) > 0)
5099 {
5100 /*insertion at the very begining */
5101 if(insert_pos == -1)
5102 offset += max_insert_size;
5103 else
5104 {
5105 leftover = abp->gr.right - insert_pos;
5106 if(leftover > 0)
5107 {
5108 new_abp = MemNew(sizeof(AlignBlock));
5109 new_abp->gr.left = insert_pos + 1;
5110 new_abp->gr.right = abp->gr.right;
5111 new_abp->order = abp->order;
5112 new_abp->next = next_abp;
5113
5114 abp->gr.left += offset;
5115 abp->gr.right = insert_pos + offset;
5116 abp->next = new_abp;
5117 if(abp->gr.strand != Seq_strand_minus)
5118 {
5119 new_abp->gr.strand = abp->gr.strand;
5120 abp->gr.strand = 0;
5121 }
5122 abp = new_abp;
5123 offset += max_insert_size;
5124 }
5125 else /*reach the end */
5126 break;
5127 }
5128 }
5129 if(abp != NULL)
5130 {
5131 abp->gr.left += offset;
5132 abp->gr.right += offset;
5133 abp->next = next_abp;
5134 }
5135 abp = next_abp;
5136 }
5137 }
5138 }
5139
free_insert_list(InsertListPtr ilp)5140 static void free_insert_list(InsertListPtr ilp)
5141 {
5142 InsertListPtr next;
5143
5144 while(ilp)
5145 {
5146 next = ilp->next;
5147 MemFree(ilp);
5148 ilp = next;
5149 }
5150 }
5151
5152
FlatAlignNode(ValNodePtr anp_list)5153 NLM_EXTERN Boolean FlatAlignNode(ValNodePtr anp_list)
5154 {
5155 ValNodePtr curr;
5156 AlignNodePtr master_anp, anp;
5157 AnnotInfoPtr annot_info;
5158 Uint1 align_type;
5159 InsertListPtr ilp;
5160 AlignSegPtr asp;
5161 Int4 p_pos;
5162
5163
5164 master_anp = NULL;
5165 for(curr = anp_list; curr != NULL; curr = curr->next)
5166 {
5167 if(curr->choice == OBJ_SEQANNOT)
5168 {
5169 annot_info = curr->data.ptrvalue;
5170 align_type = get_alignment_type (annot_info);
5171 if(align_type == ALIGN_DNA_TO_PROT ||
5172 align_type == ALIGN_PROT_TO_DNA || align_type == ALIGN_TDNA_TO_TDNA)
5173 return FALSE;
5174 }
5175 else
5176 {
5177 anp = curr->data.ptrvalue;
5178 if(anp->is_master)
5179 master_anp = anp;
5180 }
5181 }
5182 if(master_anp == NULL)
5183 return FALSE;
5184
5185 /*load all the insertions in the alignments*/
5186 ilp = NULL;
5187 for(curr = anp_list; curr != NULL; curr = curr->next)
5188 {
5189 if(curr->choice != OBJ_SEQANNOT)
5190 {
5191 anp = curr->data.ptrvalue;
5192 if(anp != master_anp)
5193 {
5194 p_pos = -1;
5195 for(asp = anp->segs; asp != NULL; asp = asp->next)
5196 {
5197 if(asp->type == INS_SEG)
5198 load_insertion_list(&ilp, asp->ins_pos, asp->gr.right, (Boolean)(p_pos == asp->ins_pos));
5199 else if(asp->type != GAP_SEG)
5200 p_pos = asp->gr.right;
5201 }
5202 }
5203 }
5204 }
5205
5206 if(ilp == NULL)
5207 return FALSE;
5208
5209 /*do the real flatting*/
5210 for(curr = anp_list; curr != NULL; curr = curr->next)
5211 {
5212 if(curr->choice != OBJ_SEQANNOT)
5213 {
5214 anp = curr->data.ptrvalue;
5215 modify_anp_with_insertion(anp, ilp);
5216 }
5217 }
5218
5219
5220 free_insert_list(ilp);
5221 return TRUE;
5222 }
5223
5224
5225 /*
5226 * Delete all the bad YACs from the list
5227 * anything on the NHGRI map that is recorded inconsistent will
5228 * be considered inconsistent. For the Whitehead map, the
5229 * inconsistent+ambiguous is inconsistent. Inconsistent alone
5230 * is not considered inconsistent
5231 */
is_ambiguous_annot(AnnotInfoPtr info,Uint1 db)5232 static Boolean is_ambiguous_annot(AnnotInfoPtr info, Uint1 db)
5233 {
5234 if(info == NULL)
5235 return FALSE;
5236 if(info->annot_type == ANNOT_CONSIST)
5237 {
5238 if(info->consistent == ALIGN_CONSISTENT)
5239 return FALSE;
5240 else
5241 {
5242 if(info->consistent == ALIGN_INCONSISTENT)
5243 {
5244 if(db == YAC_NHGRI)
5245 return TRUE;
5246 else if(db == YAC_MIT)
5247 { /*inconsistent and ambiguous are different*/
5248 if(StringCmp(info->annotDB, "Ambiguous") == 0)
5249
5250 return TRUE;
5251 else
5252 return FALSE;
5253 }
5254 }
5255 }
5256 }
5257
5258 return FALSE;
5259 }
5260
5261 /*
5262 * delete any of the whitehead yacs that only contains
5263 * ambiguous STS hits
5264 */
delete_alignnode(AlignNodePtr anp,Uint1 db)5265 static Boolean delete_alignnode (AlignNodePtr anp, Uint1 db)
5266 {
5267 AlignSegPtr asp;
5268 ValNodePtr curr;
5269 Boolean has_sts_hits;
5270
5271 has_sts_hits = FALSE;
5272 for(asp = anp->segs; asp != NULL; asp = asp->next)
5273 {
5274 if(asp->mismatch != NULL)
5275 {
5276 for(curr = asp->mismatch; curr != NULL; curr = curr->next)
5277 {
5278 if(curr->choice == MISMATCH_CLOSE) /*unambiguous hits*/
5279 return FALSE;
5280 if(curr->choice == MISMATCH_SQUARE)
5281 {
5282 if(db == YAC_NHGRI)
5283 return FALSE;
5284 }
5285 }
5286 has_sts_hits = TRUE;
5287 }
5288 }
5289
5290 return (has_sts_hits == FALSE);
5291 }
5292
5293
5294 /*
5295 * Delete all the bad YACs from the list
5296 * anything on the NHGRI map that is recorded inconsistent will
5297 * be considered inconsistent. For the Whitehead map, the
5298 * inconsistent+ambiguous is inconsistent. Inconsistent alone
5299 * is not considered inconsistent
5300 */
CleanUpAmbiguousYAC(ValNodePtr PNTR anp_node,Uint1 db,SeqIdPtr chr_id)5301 NLM_EXTERN void CleanUpAmbiguousYAC (ValNodePtr PNTR anp_node, Uint1 db, SeqIdPtr chr_id)
5302 {
5303 AnnotInfoPtr info;
5304 AlignNodePtr anp;
5305 ValNodePtr curr, prev, next;
5306 Boolean del_annot;
5307 Boolean del;
5308
5309 prev = NULL;
5310 del_annot = FALSE;
5311
5312 curr = *anp_node;
5313 while(curr)
5314 {
5315 next = curr->next;
5316 del = FALSE;
5317 if(curr->choice == OBJ_SEQANNOT)
5318 {
5319 info = curr->data.ptrvalue;
5320 del_annot = is_ambiguous_annot(info, db);
5321 }
5322 else if(!del_annot || chr_id != NULL)
5323 {
5324 anp = curr->data.ptrvalue;
5325 if(chr_id != NULL)
5326 {
5327 if(anp->sip != NULL)
5328 {
5329 if(SeqIdMatch(chr_id, anp->sip))
5330 del = TRUE;
5331 }
5332 }
5333 if(!del)
5334 del = delete_alignnode (anp, db);
5335 }
5336 if(del_annot)
5337 del = TRUE;
5338
5339 if(del)
5340 {
5341 if(prev == NULL)
5342 *anp_node = next;
5343 else
5344 prev->next = next;
5345 curr->next = NULL;
5346 FreeAlignNode(curr);
5347 }
5348 else
5349 prev = curr;
5350 curr = next;
5351 }
5352 }
5353
5354
5355
5356 /*****************************************************************
5357 *
5358 * check if the AlignNode only contains Seq-annot or it
5359 * has real sequence alignment.
5360 * the empty Seq-annot may be the unaligned contigs in
5361 * Eric Green's map
5362 *
5363 ******************************************************************/
alignode_has_alignments(ValNodePtr aligns)5364 NLM_EXTERN Boolean alignode_has_alignments(ValNodePtr aligns)
5365 {
5366 while(aligns)
5367 {
5368 if(aligns->choice != OBJ_SEQANNOT)
5369 return TRUE;
5370 aligns = aligns->next;
5371 }
5372
5373 return FALSE;
5374 }
5375
5376
5377
5378
5379
5380
5381
5382
5383