1 /* $Id: txalign.c,v 6.101 2016/09/02 15:01:22 ucko Exp $
2 ***************************************************************************
3 *                                                                         *
4 *                             COPYRIGHT NOTICE                            *
5 *                                                                         *
6 * This software/database is categorized as "United States Government      *
7 * Work" under the terms of the United States Copyright Act.  It was       *
8 * produced as part of the author's official duties as a Government        *
9 * employee and thus can not be copyrighted.  This software/database is    *
10 * freely available to the public for use without a copyright notice.      *
11 * Restrictions can not be placed on its present or future use.            *
12 *                                                                         *
13 * Although all reasonable efforts have been taken to ensure the accuracy  *
14 * and reliability of the software and data, the National Library of       *
15 * Medicine (NLM) and the U.S. Government do not and can not warrant the   *
16 * performance or results that may be obtained by using this software,     *
17 * data, or derivative works thereof.  The NLM and the U.S. Government     *
18 * disclaim any and all warranties, expressed or implied, as to the        *
19 * performance, merchantability or fitness for any particular purpose or   *
20 * use.                                                                    *
21 *                                                                         *
22 * In any work or product derived from this material, proper attribution   *
23 * of the author(s) as the source of the software or data would be         *
24 * appreciated.                                                            *
25 *                                                                         *
26 * ===========================================================================
27 *
28 * File Name:  txalign.c
29 *
30 * $Revision: 6.101 $
31 *
32 * File Description:  Formating of text alignment for the BLAST output
33 *
34 * Modifications:
35 * --------------------------------------------------------------------------
36 *
37 * ==========================================================================
38 */
39 
40 #define NLM_GENERATED_CODE_PROTO
41 #include <txalign.h>
42 #include <codon.h>
43 #include <ncbimisc.h>
44 #include <salpacc.h>
45 #include <salpstat.h>
46 #include <fdlKludge.h>
47 #include <blastdef.h>
48 #include <algo/blast/composition_adjustment/composition_constants.h>
49 
50 #define BUFFER_LENGTH 2048
51 #define MIN_INS_SPACE 50
52 #define MAX_GI_NUM    10
53 #define MAX_DB_NUM    10
54 #define LENGTH_TO_SHOW_DOWNLOAD 10000
55 
56 #define TXALIGN_HREF "http://www.ncbi.nlm.nih.gov"
57 
58 #define NEW_ENTREZ_HREF "http://www.ncbi.nlm.nih.gov/entrez/query.fcgi"
59 
60 #define WBLAST2_HREF "http://www.ncbi.nlm.nih.gov/blast/bl2seq/wblast2.cgi"
61 
62 /* Used in make_dumpgnl_links, set in getreq.cpp or getreqcmd.cpp */
63 const char *RID_glb;
64 const char *CDD_RID_glb;
65 /* Used in make_dumpgnl_links, set in format.cpp */
66 const char *Entrez_Query_Term ;
67 int query_number_glb;
68 
69 /*Indicate if db contains sequence with gi*/
70 Boolean DbHasGi=FALSE;
71 
72 int (*tx_fprintf)(FILE*, const char *, ...) = fprintf;
73 #define fprintf tx_fprintf
74 
75 /*
76 	Used by the functions that format the one-line descriptions.
77 */
78 typedef struct _txdfline_struct {
79         struct _txdfline_struct *next;
80         SeqAlignPtr seqalign;
81         SeqIdPtr id;
82         Char *buffer_id;
83         Char *title;
84         Nlm_FloatHi bit_score;
85         Nlm_FloatHi evalue;
86         Int4 score;
87         Int4 number;
88 	Boolean is_na;
89 	Boolean found_score;
90 	Boolean isnew;		/* used to print mark "new.gif" near defline */
91 	Boolean waschecked;	/* used to print some another .gif near such defline */
92 	CharPtr segs_str;	/* Used to print segs for dumpgnl program. */
93 	size_t  segs_buflen,
94 		segs_used;
95 } TxDfLineStruct, *TxDfLineStructPtr;
96 
97 
98 
99 /*fill string with num spaces and null-end the string*/
makeEmptyString(CharPtr str,Int4 num)100 static void makeEmptyString(CharPtr str, Int4 num){
101   Int4 i;
102   if(str){
103     for (i=0; i<num; i++){
104       str[i]=' ';
105     }
106     str[i]='\0';
107   }
108 }
109 
110 
111 /*return initials of names not exceeding 15 elements delimited by space. Need to free memory afterwards*/
getNameInitials(CharPtr name)112 static CharPtr getNameInitials(CharPtr name){
113   CharPtr temp2, initials=NULL;
114   Int4 i, maxElements=15;
115   CharPtr nameCopy;
116 
117   if(name){
118     nameCopy=MemNew(StringLen(name)+sizeof(Char));
119     if(!nameCopy) {
120       printf("insufficient memory!\n");
121       exit (1);
122     }
123 
124     StringCpy(nameCopy, name);
125     temp2=StrTok(nameCopy, " ");
126     if(temp2){
127       initials=MemNew((maxElements+1)*sizeof(Char));
128       if(!initials) {
129 	printf("insufficient memory!\n");
130 	exit (1);
131       }
132       i=0;
133       initials[i]=*temp2;
134       i++;
135       while((temp2=StrTok(NULL, " "))&&i<maxElements){
136 	initials[i]=*temp2;
137 	i++;
138       }
139       initials[i]='\0';
140     }
141     MemFree(nameCopy);
142   }
143 
144   return initials;
145 }
146 
147 /*return true if the linkout type in bdfl is linkoutType, false otherwise*/
checkLinkoutType(BlastDefLinePtr bdfl,Uint1 linkoutType)148 NLM_EXTERN Boolean checkLinkoutType(BlastDefLinePtr bdfl, Uint1 linkoutType){
149   Boolean isThisType=FALSE;
150   ValNodePtr vnp;
151   Int4 intval;
152 
153   if(bdfl){
154     vnp=bdfl->links;
155     if(vnp){
156       intval=vnp->data.intvalue;
157       if(linkoutType&intval){
158 	isThisType=TRUE;
159       }
160     }
161   }
162   return isThisType;
163 }
164 
165 /* return bdlp containing the sip from a chain of bdlp. Return the first bdlp if sip is null*/
getBlastDefLineForSeqId(BlastDefLinePtr bdlp,SeqIdPtr sip)166 BlastDefLinePtr getBlastDefLineForSeqId(BlastDefLinePtr bdlp, SeqIdPtr sip){
167   BlastDefLinePtr temp;
168   Boolean found=FALSE;
169   temp=bdlp;
170 
171   while(temp){
172     if(temp->seqid){
173       if(SeqIdMatch(temp->seqid, sip)){
174 	found=TRUE;
175 	break;
176       }
177     }
178     temp=temp->next;
179   }
180   if(!found){
181     temp=bdlp;
182   }
183   return temp;
184 }
185 /*add linkout for defline. It adds the linkout for the first sip that has a linkout*/
addLinkoutForDefline(BioseqPtr bsp,SeqIdPtr sip,FILE * fp)186 static void addLinkoutForDefline(BioseqPtr bsp, SeqIdPtr sip, FILE* fp){
187     BlastDefLinePtr bdlp, bdlpTemp;
188     Boolean hasLinkout=FALSE;
189     BIG_ID gi, firstGi=GetGIForSeqId(sip);
190     Char molType[8]={""};
191 
192     if(bsp){
193       bdlp=FDGetDeflineAsnFromBioseq(bsp);
194       if(bdlp){
195 	if(ISA_aa(bsp->mol)){
196 	  sprintf(molType, "[pgi]");
197 	}
198 	else if(ISA_na(bsp->mol)){
199 	  sprintf(molType, "[ngi]");
200 	}
201 
202 	/*add space in front of linkout*/
203 	fprintf(fp, " ");
204 
205         bdlpTemp=bdlp;
206 	while(bdlpTemp){
207             if(checkLinkoutType(bdlpTemp, linkout_gene)){
208                 hasLinkout=TRUE;
209                 gi=GetGIForSeqId(bdlpTemp->seqid);
210                 fprintf(fp, URL_Gene, gi, ISA_aa(bsp->mol) ? "PUID" : "NUID");
211                 break;
212             }
213             bdlpTemp=bdlpTemp->next;
214         }
215 	bdlpTemp=bdlp;
216 	while(bdlpTemp){
217 	  if(checkLinkoutType(bdlpTemp, linkout_unigene)){
218 	    hasLinkout=TRUE;
219 	    gi=GetGIForSeqId(bdlpTemp->seqid);
220 	    fprintf(fp, URL_Unigene,  gi);
221 	    break;
222 	  }
223 	  bdlpTemp=bdlpTemp->next;
224 	}
225 	bdlpTemp=bdlp;
226         if (RID_glb) {
227            while(bdlpTemp){
228               if(checkLinkoutType(bdlpTemp, linkout_structure)){
229                  hasLinkout=TRUE;
230                  gi=GetGIForSeqId(bdlpTemp->seqid);
231                  fprintf(fp, URL_Structure, RID_glb, firstGi, gi, CDD_RID_glb, "onegroup", StringCmp(Entrez_Query_Term, "") ? Entrez_Query_Term:"none");
232                  break;
233               }
234               bdlpTemp=bdlpTemp->next;
235            }
236         }
237 	bdlpTemp=bdlp;
238 	while(bdlpTemp){
239 	  if(checkLinkoutType(bdlpTemp, linkout_geo)){
240 	    gi=GetGIForSeqId(bdlpTemp->seqid);
241 	    fprintf(fp, URL_Geo, gi);
242 	    break;
243 	  }
244 	  bdlpTemp=bdlpTemp->next;
245 	}
246 
247       }
248       BlastDefLineSetFree(bdlp);
249     }
250 }
251 
252 
253 /*print linkout for bsp.  If sip is not null, the linkout is for that sip (ie., the case for nonredundant blast db*/
addLinkoutForBioseq(BioseqPtr bsp,SeqIdPtr sip,SeqIdPtr firstSip,FILE * fp)254 static void addLinkoutForBioseq(BioseqPtr bsp, SeqIdPtr sip, SeqIdPtr firstSip, FILE* fp){
255     BlastDefLinePtr bdlp, actualBdlp;
256     Boolean hasLinkout=FALSE;
257     BIG_ID gi, firstGi;
258     Char molType[8]={""};
259 
260     if(bsp){
261       bdlp=FDGetDeflineAsnFromBioseq(bsp);
262       actualBdlp=getBlastDefLineForSeqId(bdlp, sip);
263       if(actualBdlp){
264 	firstGi=GetGIForSeqId(firstSip);
265 
266 	gi=GetGIForSeqId(bsp->id);
267 	if(ISA_aa(bsp->mol)){
268 	  sprintf(molType, "[pgi]");
269 	}
270 	else if(ISA_na(bsp->mol)){
271 	  sprintf(molType, "[ngi]");
272 	}
273 
274 	/*add space in front of linkout*/
275 	fprintf(fp, " ");
276 
277         if(checkLinkoutType(actualBdlp, linkout_gene)){
278             hasLinkout=TRUE;
279             fprintf(fp, URL_Gene, gi, ISA_aa(bsp->mol) ? "PUID" : "NUID");
280         }
281 
282 	if(checkLinkoutType(actualBdlp, linkout_unigene)){
283 	  hasLinkout=TRUE;
284 	  fprintf(fp, URL_Unigene,  gi);
285 	}
286 	if(RID_glb && checkLinkoutType(actualBdlp, linkout_structure)){
287 	  hasLinkout=TRUE;
288 	  fprintf(fp, URL_Structure, RID_glb, firstGi, gi, CDD_RID_glb, "onepair", StringCmp(Entrez_Query_Term, "") ? Entrez_Query_Term:"none");
289 	}
290 
291 	if(checkLinkoutType(actualBdlp, linkout_geo)){
292 	  fprintf(fp, URL_Geo, gi);
293 	}
294       }
295       BlastDefLineSetFree(bdlp);
296     }
297 }
298 
299 
get_num_empty_space(Boolean compress)300 static Int4 get_num_empty_space(Boolean compress)
301 {
302 	return (compress ? (8+5 +1) : B_SPACE+POS_SPACE+STRAND_SPACE +1);
303 }
304 
305 static Boolean ShowAlignNodeText2Ex(ValNodePtr anp_list, Int2 num_node, Int4 line_len, FILE *fp, Int4 left, Int4 right, Uint4 option, Int4Ptr PNTR matrix, int (LIBCALLBACK *fmt_score_func)PROTO((AlignStatOptionPtr)), CharPtr db_name, CharPtr blast_type, Int4Ptr PNTR posMatrix, SeqAlignPtr PNTR last_align);
306 static Boolean SeqAlignSegsStr(SeqAlignPtr salp, Int2 index, CharPtr *dst, size_t *size, size_t *used);
307 static CharPtr StringAppend(CharPtr *dst, size_t *size, CharPtr src, size_t *used);
308 
309 
310 static ValNodePtr ProcessTextInsertion PROTO((AlignNodePtr anp, Int4 m_left, Int4 m_right, BioseqPtr bsp, Int4 line_len, Int1 frame));
311 
ExtractCurrentAlignNode(ValNodePtr PNTR anp_list)312 static ValNodePtr ExtractCurrentAlignNode(ValNodePtr PNTR anp_list)
313 {
314 	ValNodePtr head, curr, prev = NULL;
315 	AlignNodePtr anp;
316 	Uint4 itemID;
317 	Uint2 chain;
318 
319 	head = *anp_list;
320 	while(head && head->choice == OBJ_SEQANNOT)
321 		head = head->next;
322 	if(head == NULL)
323 		return NULL;
324 
325 	anp = (AlignNodePtr) head->data.ptrvalue;
326 	itemID = anp->itemID;
327 	chain = anp->chain;
328 
329 	head = *anp_list;
330 	curr = *anp_list;
331 	while(curr)
332 	{
333 		if(curr->choice != OBJ_SEQANNOT)
334 		{
335 			anp = (AlignNodePtr) curr->data.ptrvalue;
336 			if(anp->itemID != itemID || anp->chain != chain)
337 			{
338 				*anp_list = curr;
339 				if(prev !=NULL)
340 					prev->next = NULL;
341 				return head;
342 			}
343 		}
344 		else
345 		{
346 			*anp_list = curr;
347 			if(prev != NULL)
348 				prev->next = NULL;
349 			return head;
350 		}
351 		prev = curr;
352 		curr = curr->next;
353 	}
354 	*anp_list = NULL;
355 	return head;
356 }
357 
358 
modify_kludge_itemID(ValNodePtr anp_list,Uint4 itemID)359 static void modify_kludge_itemID (ValNodePtr anp_list, Uint4 itemID)
360 {
361 	AlignNodePtr anp;
362 
363 	while(anp_list)
364 	{
365 		if(anp_list->choice != OBJ_SEQANNOT)
366 		{
367 			anp = (AlignNodePtr) anp_list->data.ptrvalue;
368 			anp->itemID = itemID;
369 		}
370 		anp_list = anp_list->next;
371 	}
372 }
373 
374 /******************************************************************
375 *
376 *	LoadFollowerForSameId(anp_list)
377 *	if the same sequence appears multiple times in the anp_list,
378 *	it will be moved to the sequence that are the head of this
379 *	list. The field anp->follower is set as the order of the
380 *	repeats in this list
381 *
382 ******************************************************************/
LoadFollowerForSameId(ValNodePtr anp_list)383 static void LoadFollowerForSameId(ValNodePtr anp_list)
384 {
385 	ValNodePtr curr, n_curr;
386 	AlignNodePtr anp, n_anp;
387 
388 	curr = anp_list;
389 	while(curr)
390 	{
391 		if(curr->choice != OBJ_SEQANNOT)
392 		{
393 			anp = (AlignNodePtr) curr->data.ptrvalue;
394 			if(anp->is_master == FALSE && anp->follower == FALSE)
395 			{
396 				for(n_curr = curr->next; n_curr != NULL; n_curr = n_curr->next)
397 				{
398 					if(n_curr->choice != OBJ_SEQANNOT)
399 					{
400 						n_anp = (AlignNodePtr) n_curr->data.ptrvalue;
401 						if(n_anp->is_master == FALSE && n_anp->follower == FALSE)
402 						{
403 							if(SeqIdMatch(n_anp->sip, anp->sip))
404 								n_anp->follower = TRUE;
405 						}
406 					}
407 				}
408 			}
409 		}
410 		curr = curr->next;
411 	}
412 }
413 
414 
MaskWithLowComplexity(ByteStorePtr bsp,SeqLocPtr maskloc,Uint1 mol)415 static void MaskWithLowComplexity(ByteStorePtr bsp, SeqLocPtr maskloc, Uint1 mol)
416 {
417 	SeqLocPtr slp = NULL;
418 	Int4 start, stop;
419 	Uint1 res = 'N';
420 
421 
422 	if(mol == Seq_mol_aa)
423 		res = 'X';
424 
425 	while(maskloc)
426 	{
427 		slp = NULL;
428  		while((slp = SeqLocFindNext(maskloc, slp))!=NULL)
429 		{
430 			start = SeqLocStart(slp);
431 			stop = SeqLocStop(slp);
432 			BSSeek(bsp, start, SEEK_SET);
433 			for(; start <=stop; ++start)
434                 BSPutByte(bsp, (Int2)res);
435 		}
436 		maskloc = maskloc->next;
437 	}
438 }
439 
create_byte_store_from_bsp(BioseqPtr bsp)440 static ByteStorePtr create_byte_store_from_bsp (BioseqPtr bsp)
441 {
442 	SeqPortPtr spp;
443 	Uint1 code;
444 	ByteStorePtr b_store;
445 	Uint1 residue;
446 
447 	if(bsp == NULL)
448 		return NULL;
449 	if(bsp->mol == Seq_mol_aa)
450 		code = Seq_code_iupacaa;
451 	else
452 		code = Seq_code_iupacna;
453 
454 	spp = SeqPortNew(bsp, 0, bsp->length-1, Seq_strand_plus, code);
455 	b_store = BSNew(bsp->length +1);
456 	BSSeek(b_store, 0, SEEK_SET);
457 	while((residue = SeqPortGetResidue(spp)) != SEQPORT_EOF)
458 		BSPutByte(b_store, (Int2)residue);
459 	SeqPortFree(spp);
460 	return b_store;
461 }
462 
CreateMaskByteStore(ValNodePtr mask_list)463 ValNodePtr CreateMaskByteStore (ValNodePtr mask_list)
464 {
465 	BioseqPtr bsp;
466 	SeqLocPtr slp;
467 	SeqIdPtr sip;
468 	ValNodePtr list, curr;
469 	ByteStorePtr b_store, c_store;
470 	Uint1 mol;
471 
472 	list = NULL;
473 	b_store = NULL;
474 	while(mask_list)
475 	{
476 		curr = ValNodeNew(list);
477 		curr->choice = mask_list->choice;
478 		if(list == NULL)
479 			list = curr;
480 		slp = (ValNodePtr) mask_list->data.ptrvalue;
481 		if(slp != NULL)
482 		{
483 			if(b_store == NULL)
484 			{
485 				sip = SeqLocId(slp);
486 				if(sip != NULL)
487 				{
488 					bsp = BioseqLockById(sip);
489 					if(bsp != NULL)
490 					{
491 						b_store = create_byte_store_from_bsp (bsp);
492 						mol = bsp->mol;
493 						BioseqUnlock(bsp);
494 					}
495 				}
496 			}
497 			if(b_store != NULL)
498 			{
499 				if(mask_list->next == NULL)
500 				{
501 					c_store = b_store;
502 					b_store = NULL;
503 				}
504 				else
505 					c_store = BSDup(b_store);
506 				MaskWithLowComplexity(c_store, slp, mol);
507 				curr->data.ptrvalue = c_store;
508 			}
509 		}
510 
511 		mask_list = mask_list->next;
512 	}
513 
514 	if(b_store != NULL)
515 		BSFree(b_store);
516 	return list;
517 }
518 
FreeByteStoreList(ValNodePtr bs_list)519 static void FreeByteStoreList (ValNodePtr bs_list)
520 {
521 	ByteStorePtr bsp;
522 	ValNodePtr curr;
523 
524 	for(curr = bs_list; curr != NULL; curr = curr->next)
525 	{
526 		bsp = (ByteStorePtr) curr->data.ptrvalue;
527 		if(bsp != NULL)
528 			BSFree(bsp);
529 	}
530 	ValNodeFree(bs_list);
531 }
532 
replace_bytestore_data(BioseqPtr bsp,ValNodePtr bs_list,Uint1 frame)533 NLM_EXTERN Boolean replace_bytestore_data (BioseqPtr bsp, ValNodePtr bs_list, Uint1 frame)
534 {
535 	ByteStorePtr b_store;
536 	Uint1 code;
537 
538 	if(bsp == NULL)
539 		return FALSE;
540 
541 	if(bsp->mol == Seq_mol_aa)
542 		code = Seq_code_iupacaa;
543 	else
544 		code = Seq_code_iupacna;
545 
546 	while(bs_list)
547 	{
548 		if(bs_list->choice == frame)
549 		{
550 			b_store = (ByteStorePtr) bs_list->data.ptrvalue;
551 			if(b_store != NULL)
552 			{
553 				bsp->repr = Seq_repr_raw;
554 				bsp->seq_data = (SeqDataPtr) b_store;
555 				bsp->seq_data_type = code;
556 				return TRUE;
557 			}
558 		}
559 		bs_list = bs_list->next;
560 	}
561 
562 	return FALSE;
563 }
564 
565 
566 /*can the current alignnode be printed for text view*/
PrintAlignForText(AnnotInfoPtr info,AlignNodePtr anp)567 NLM_EXTERN Boolean PrintAlignForText(AnnotInfoPtr info, AlignNodePtr anp)
568 {
569         if(anp == NULL || anp->segs == NULL)
570                 return FALSE;
571         if(anp->segs->type == STD_SEG)
572         {
573                 if(info == NULL)
574                         return FALSE;
575                 if(info->annot_type != ANNOT_BLAST)
576                         return FALSE;
577                 if(info->blast_type != ALIGN_BLASTX &&
578 		   info->blast_type != ALIGN_TBLASTN &&
579                    info->blast_type != ALIGN_PSITBLASTN &&
580 		   info->blast_type != ALIGN_TBLASTX)
581                         return FALSE;
582         }
583 
584         return TRUE;
585 }
586 
587 
588 /*
589 *	for tblastn and blastx, return the frame of the non-master
590 *	for tblastx, return the frame for the master sequence
591 */
592 
is_master_alignment(AlignNodePtr anp,BioseqPtr m_bsp)593 static Boolean is_master_alignment (AlignNodePtr anp, BioseqPtr m_bsp)
594 {
595 	return (anp->is_master || BioseqMatch(m_bsp, anp->sip));
596 }
597 
get_alignment_frame(ValNodePtr anp_list,BioseqPtr m_bsp)598 static Int1 get_alignment_frame(ValNodePtr anp_list, BioseqPtr m_bsp)
599 {
600 	Uint1 c_type = 0;
601 	AlignNodePtr anp;
602 	AnnotInfoPtr annot_info;
603 	ValNodePtr  curr;
604 	Boolean found;
605 
606 	if(anp_list == NULL)
607 		return -1;
608 
609 	annot_info = NULL;
610 	found = FALSE;
611 	for(curr = anp_list; curr != NULL; curr = curr->next)
612 	{
613 		if(curr->choice == OBJ_SEQANNOT)
614 		{
615 			annot_info = (AnnotInfoPtr) anp_list->data.ptrvalue;
616 			c_type = get_alignment_type(annot_info);
617 		}
618 		else
619 		{
620 			anp = (AlignNodePtr) curr->data.ptrvalue;
621 			if(!is_master_alignment(anp, m_bsp))
622 				found = (c_type != ALIGN_TDNA_TO_TDNA);
623 			else
624 				found = (c_type == ALIGN_TDNA_TO_TDNA);
625 			if(found)
626 			{
627 				if(!PrintAlignForText(annot_info, anp))
628 					return -1;
629 				if(c_type == ALIGN_NORMAL || c_type == ALIGN_PROT_TO_DNA)
630 					return 0;
631 				if(c_type == ALIGN_DNA_TO_PROT || c_type == ALIGN_TDNA_TO_TDNA)
632 					return anp->m_frame;
633 			}
634 		}
635 	}
636 
637 	return -1;
638 }
639 
640 /**********************************************************************************
641 *
642 *       Given a chain of annots (ValNodePtrs) they are all printed out, one pattern
643 *       at a time.
644 *
645 *	For a give annot all alignments from one database sequence are assumed to be grouped together.
646 *
647 *	The Alignments from one databases sequence are currently ranked by expect value.
648 *	It has been suggested that this be changed and should not be relied on indefinitely.
649 *
650 *************************************************************************************/
651 
652 NLM_EXTERN Boolean LIBCALL
ShowTextAlignFromAnnotExtra(BioseqPtr bsp,ValNodePtr vnp,SeqLocPtr seqloc,Int4 line_len,FILE * fp,Uint1Ptr featureOrder,Uint1Ptr groupOrder,Uint4 option,Int4Ptr PNTR matrix,ValNodePtr mask_loc,int (LIBCALLBACK * fmt_score_func)PROTO ((AlignStatOptionPtr)))653 ShowTextAlignFromAnnotExtra(BioseqPtr bsp, ValNodePtr vnp, SeqLocPtr seqloc,
654         Int4 line_len, FILE *fp,
655         Uint1Ptr featureOrder, Uint1Ptr groupOrder, Uint4 option, Int4Ptr PNTR matrix,
656         ValNodePtr mask_loc, int (LIBCALLBACK *fmt_score_func)PROTO((AlignStatOptionPtr)))
657 {
658         Int4 index=0;
659         SeqAnnotPtr seqannot;
660         SeqAnnotPtr annot;
661         SeqFeatPtr sfp;
662         SeqLocPtr next;
663 
664         seqannot = SeqAnnotNew();
665         seqannot->type = 2;
666         AddAlignInfoToSeqAnnot(seqannot, 2);
667 
668         while (vnp && seqloc)
669         {
670                 index++;
671                 seqannot->data = vnp->data.ptrvalue;
672                 if (bsp->annot && bsp->annot->type == 1 && bsp->annot->data)
673 		{
674 			sfp = bsp->annot->data;
675 			if (sfp->data.choice == SEQFEAT_REGION)
676 				sfp->location = NULL;
677                         bsp->annot = SeqAnnotFree(bsp->annot);
678 		}
679                 annot = bsp->annot = SeqAnnotNew();
680                 bsp->annot->type = 1;   /* ftable. */
681                 next = seqloc->next;
682                 sfp = SeqFeatNew();
683                 seqloc->next = NULL;
684                 sfp->location = seqloc;
685                 sfp->data.choice = SEQFEAT_REGION;
686                 sfp->data.value.ptrvalue = StringSave("pattern");
687                 annot->data = sfp;
688                 fprintf(fp, "\nSignificant alignments for pattern occurrence %ld at position %ld\n\n",
689                         (long) index, (long) (SeqLocStart(seqloc)+1));
690                 ShowTextAlignFromAnnot(seqannot, line_len, fp, featureOrder, groupOrder, option, matrix, mask_loc, fmt_score_func);
691                 seqloc->next = next;
692                 seqloc = seqloc->next;
693                 vnp = vnp->next;
694         }
695         seqannot->data = NULL;
696 	seqannot = SeqAnnotFree(seqannot);
697 
698         return TRUE;
699 }
700 static Boolean load_master_translate_frame PROTO((ValNodePtr anp_list, Int4 m_len, BioseqPtr m_bsp));
701 static AlignNodePtr get_master_align_node PROTO((ValNodePtr anp_list));
702 
703 
704 /*****************************************************************************
705 *
706 *	ShowTextAlignFromAnnot(annot, locus, line_len, fp, master, f_order)
707 *	display the alignment stored in a Seq-annot in a text file
708 *	annot: the Seq-annot pointer
709 *	locus: if TRUE, show the locus name as the sequence label, otherwise,
710 *		use the accession
711 *	line_len: the number of sequence char per line
712 *	fp: The file pointer to store the text output
713 *	master: if TRUE, show the result as a master-slave type multiple pair
714 *	wise alignment. if FALSE, display one alignment after the other
715 *	f_order: the user selected feature type and order to be shown together
716 *	with the alignment
717 *	is_html: print out the format as an HTML page?
718 *	return TRUE for success, FALSE for fail
719 *
720 *****************************************************************************/
721 /* This modification of the function to pass position-specific matrix */
ShowTextAlignFromAnnot3(SeqAnnotPtr hannot,Int4 line_len,FILE * fp,Uint1Ptr featureOrder,Uint1Ptr groupOrder,Uint4 option,Int4Ptr PNTR matrix,ValNodePtr mask_loc,int (LIBCALLBACK * fmt_score_func)PROTO ((AlignStatOptionPtr)),CharPtr db_name,CharPtr www_blast_type,Int4Ptr PNTR posMatrix)722 NLM_EXTERN Boolean ShowTextAlignFromAnnot3(SeqAnnotPtr hannot, Int4 line_len, FILE *fp, Uint1Ptr featureOrder, Uint1Ptr groupOrder, Uint4 option, Int4Ptr PNTR matrix, ValNodePtr mask_loc, int (LIBCALLBACK *fmt_score_func)PROTO((AlignStatOptionPtr)), CharPtr db_name, CharPtr www_blast_type, Int4Ptr PNTR posMatrix)
723 {
724     SeqAlignPtr align, h_align, n_align, prev;
725     SeqLocPtr m_loc;
726     Boolean retval, matrix_loaded=FALSE;
727     SeqIdPtr m_sip;
728     Int4 m_start, m_stop, t_start, t_stop;
729     ValNodePtr anp_node, curr_list;
730     ValNodePtr annot_head;
731     Uint1 style;
732     SeqAnnotPtr annot;
733     Boolean master;
734     Boolean flat_insert;
735     Uint1 m_strand;
736     Uint1 annot_type;
737     Char annotDB[101];
738     Uint2 order;
739     Uint1   blast_type;
740     Boolean load_matrix;
741     ValNodePtr bs_list;	/*store the ByteStores that were masked master seqences*/
742     BioseqPtr m_bsp;
743     Int1 frame;
744     Uint1 code;
745     Uint1 repr;
746     ByteStorePtr seq_data = NULL;
747 
748 
749     annot = hannot;
750     flat_insert = (Boolean)(option & TXALIGN_FLAT_INS);
751     /* flat_insert = TRUE; */
752     master = (Boolean)(option & TXALIGN_MASTER);
753     if(annot->type != 2)
754         return FALSE;
755     m_start = -1;
756     m_stop = -1;
757     m_sip = NULL;
758     annot = hannot;
759     while(annot) {
760         if(annot->type == 2) {
761 	    SeqIdPtr siptemp;
762             align = (SeqAlignPtr) annot->data;
763 	    siptemp=TxGetSubjectIdFromSeqAlign(align);
764 	    if(siptemp&&siptemp->choice==SEQID_GI){
765 	      DbHasGi=TRUE;
766 	    }
767 
768             if(m_sip == NULL)
769                 m_sip = make_master(align);
770             if(m_sip != NULL) {
771                 get_boundary(m_sip, &t_start, &t_stop, align);
772                 if(m_start == -1 || m_start > t_start)
773                     m_start = t_start;
774                 if(m_stop == -1 || m_stop < t_stop)
775                     m_stop = t_stop;
776             }
777         }
778         annot = annot->next;
779     }
780     if(m_sip == NULL || m_start == -1 || m_stop == -1)
781         return FALSE;
782 
783     if(master)
784         style = COLLECT_MP;
785     else
786         style = COLLECT_MD;
787     anp_node = NULL;
788     m_loc = SeqLocIntNew(m_start, m_stop, Seq_strand_plus, m_sip);
789     annot = hannot;
790     load_matrix = FALSE;	/*if there is any protein sequence, set the load_matrix to TRUE*/
791     while(annot) {
792         if(annot->type == 2) {
793             annotDB[0] = '\0';
794             blast_type = get_align_annot_qual(annot, annotDB, 100, &annot_type);
795 
796             if(blast_type == ALIGN_BLASTX
797                || blast_type == ALIGN_TBLASTN
798                || blast_type == ALIGN_PSITBLASTN
799                || blast_type == ALIGN_TBLASTX)
800 	      load_matrix = TRUE;
801             if(blast_type == ALIGN_TBLASTX ||
802                (blast_type == ALIGN_BLASTX && annot_type == ANNOT_BLAST
803                 && (option & TXALIGN_BLASTX_SPECIAL))) { /*!!!!!!!this messes up all the itemIDs and entityIDs !!!!!!*/
804                 align = (SeqAlignPtr) annot->data;
805                 prev = NULL;
806                 h_align = NULL;
807                 order = 0;
808                 while(align) {
809                     ++order;
810                     n_align = align->next;
811                     align->next = NULL;
812                     if(get_align_ends(align, m_sip, &m_start, &m_stop, &m_strand)) {
813                         /* sint = m_loc->data.ptrvalue;
814                            sint->strand = m_strand; */
815                         update_seq_loc(m_start, m_stop, m_strand, m_loc);
816                         style = COLLECT_MD;
817                         master = FALSE;
818 
819                         annot->data = align;
820                         curr_list  = CollAlignFromSeqAnnot(annot, m_loc,
821                                                            featureOrder, groupOrder, style, FALSE, master, flat_insert);
822                         if(curr_list != NULL) {
823                             modify_kludge_itemID (curr_list, order);
824                             ValNodeLink(&anp_node, curr_list);
825                         }
826                     }
827                     if(prev == NULL)
828                         h_align = align;
829                     else
830                         prev->next = align;
831                     prev = align;
832                     align = n_align;
833                 }
834                 annot->data = h_align;
835             } else {
836                 curr_list  = CollAlignFromSeqAnnot(annot, m_loc, featureOrder, groupOrder, style, FALSE, master, flat_insert);
837                 if(curr_list != NULL)
838                     ValNodeLink(&anp_node, curr_list);
839             }
840         }
841         annot = annot->next;
842     }
843     SeqLocFree(m_loc);
844     if(anp_node == NULL)
845         return FALSE;
846 
847     m_bsp = BioseqLockById(m_sip);
848     if(m_bsp == NULL) {
849         FreeAlignNode(anp_node);
850         return FALSE;
851     }
852     if (m_bsp->seq_data_type == Seq_code_gap) {
853         BioseqUnlock(m_bsp);
854         return FALSE;
855     }
856 
857     if(mask_loc != NULL)
858         bs_list = CreateMaskByteStore (mask_loc);
859     else
860         bs_list = NULL;
861 
862     repr = m_bsp->repr;
863     seq_data = (ByteStorePtr) m_bsp->seq_data;
864     code = m_bsp->seq_data_type;
865 
866     if(matrix == NULL && (option & TXALIGN_MATRIX_VAL || load_matrix)) {
867         matrix = load_default_matrix();
868         matrix_loaded = TRUE;
869     }
870 
871     if(fmt_score_func != NULL) {
872         free_buff();
873         init_buff_ex(MAX(80, line_len + 23 + 12));
874     }
875     if(master) {
876         frame = get_alignment_frame(anp_node, m_bsp);
877         if(frame != -1 && bs_list != NULL) {
878            load_master_translate_frame(anp_node, m_bsp->length, m_bsp);
879 
880             if(!replace_bytestore_data (m_bsp, bs_list, (Uint1)frame)) {
881                 m_bsp->repr = repr;
882                 m_bsp->seq_data = (SeqDataPtr) seq_data;
883                 m_bsp->seq_data_type = code;
884             }
885         }
886         retval = ShowAlignNodeText2(anp_node, -1, line_len, fp, -1, -1, option, matrix, fmt_score_func, db_name, www_blast_type, posMatrix);
887         FreeAlignNode(anp_node);
888     } else {
889 SeqAlignPtr last_align=NULL;
890 
891         annot_head = NULL;
892         if(fmt_score_func != NULL)
893             LoadFollowerForSameId(anp_node);
894         while(anp_node) {
895             if(anp_node->choice == OBJ_SEQANNOT) {
896                 if(annot_head != NULL) {
897                     annot_head->next = NULL;
898                     FreeAlignNode(annot_head);
899                 }
900                 annot_head = anp_node;
901                 anp_node = anp_node->next;
902             } else {
903                 curr_list = ExtractCurrentAlignNode(&anp_node);
904                 if(curr_list) {
905                     if(annot_head != NULL) {
906                         annot_head->next = curr_list;
907                         load_master_translate_frame(annot_head, m_bsp->length, m_bsp);
908                         frame = get_alignment_frame(annot_head, m_bsp);
909                     } else
910                         frame = 0;
911                     if(frame != -1 && bs_list != NULL) {
912                         if(!replace_bytestore_data (m_bsp, bs_list, (Uint1)frame)) {
913                             m_bsp->repr = repr;
914                             m_bsp->seq_data = (SeqDataPtr) seq_data;
915                             m_bsp->seq_data_type = code;
916                         }
917                     }
918 
919                     if(annot_head != NULL) {
920                         retval = ShowAlignNodeText2Ex(annot_head, -1, line_len, fp, -1, -1, option, matrix, fmt_score_func, db_name, www_blast_type, posMatrix, &last_align);
921                         annot_head->next = NULL;
922                     } else
923                         retval = ShowAlignNodeText2Ex(curr_list, -1, line_len, fp, -1, -1, option, matrix, fmt_score_func, db_name, www_blast_type, posMatrix, &last_align);
924                     if(retval == TRUE)
925                         fprintf(fp, "\n\n");
926                     FreeAlignNode(curr_list);
927                 }
928             }
929         }
930         if(annot_head != NULL) {
931             annot_head->next = NULL;
932             FreeAlignNode(annot_head);
933         }
934     }
935 
936     m_bsp->repr = repr;
937     m_bsp->seq_data = (SeqDataPtr) seq_data;
938     m_bsp->seq_data_type = code;
939 
940     if (matrix_loaded)
941         free_default_matrix(matrix);
942 
943     if(fmt_score_func != NULL)
944         free_buff();
945     if(bs_list != NULL)
946         FreeByteStoreList (bs_list);
947     BioseqUnlock(m_bsp);
948 
949     return retval;
950 }
951 
ShowTextAlignFromAnnot(SeqAnnotPtr hannot,Int4 line_len,FILE * fp,Uint1Ptr featureOrder,Uint1Ptr groupOrder,Uint4 option,Int4Ptr PNTR matrix,ValNodePtr mask_loc,int (LIBCALLBACK * fmt_score_func)PROTO ((AlignStatOptionPtr)))952 NLM_EXTERN Boolean ShowTextAlignFromAnnot(SeqAnnotPtr hannot, Int4 line_len, FILE *fp, Uint1Ptr featureOrder, Uint1Ptr groupOrder, Uint4 option, Int4Ptr PNTR matrix, ValNodePtr mask_loc, int (LIBCALLBACK *fmt_score_func)PROTO((AlignStatOptionPtr)))
953 {
954     return ShowTextAlignFromAnnot2(hannot, line_len, fp, featureOrder, groupOrder, option, matrix, mask_loc, fmt_score_func, NULL, NULL);
955 
956 }
ShowTextAlignFromAnnot2(SeqAnnotPtr hannot,Int4 line_len,FILE * fp,Uint1Ptr featureOrder,Uint1Ptr groupOrder,Uint4 option,Int4Ptr PNTR matrix,ValNodePtr mask_loc,int (LIBCALLBACK * fmt_score_func)PROTO ((AlignStatOptionPtr)),CharPtr db_name,CharPtr www_blast_type)957 NLM_EXTERN Boolean ShowTextAlignFromAnnot2(SeqAnnotPtr hannot, Int4 line_len, FILE *fp, Uint1Ptr featureOrder, Uint1Ptr groupOrder, Uint4 option, Int4Ptr PNTR matrix, ValNodePtr mask_loc, int (LIBCALLBACK *fmt_score_func)PROTO((AlignStatOptionPtr)), CharPtr db_name, CharPtr www_blast_type)
958 {
959     return ShowTextAlignFromAnnot3(hannot, line_len, fp, featureOrder,
960                                    groupOrder, option, matrix, mask_loc,
961                                    fmt_score_func, db_name, www_blast_type,
962                                    NULL);
963 }
964 
965 /* Simple SeqAlign printing function:
966    Can be used while debugging.. options kept to a minimum
967    fp==NULL ==> stdout
968 */
SeqAlignPrint(SeqAlignPtr salp,FILE * fp)969 NLM_EXTERN void LIBCALL SeqAlignPrint(SeqAlignPtr salp,FILE* fp) {
970     SeqAnnotPtr sap;
971     Int4 align_options;
972     if(salp) {
973         if(salp->segtype==5) {
974             sap = SeqAnnotForSeqAlign((SeqAlignPtr)salp->segs);
975         } else{
976             sap = SeqAnnotForSeqAlign(salp);
977         }
978         if(sap) {
979             align_options =TXALIGN_COMPRESS+TXALIGN_END_NUM+TXALIGN_MASTER+TXALIGN_MISMATCH;
980             ShowTextAlignFromAnnot(sap, 70, fp==NULL ? stdout : fp, NULL, NULL, align_options, NULL, NULL, FormatScoreFunc);
981 
982             sap->data=NULL;
983             SeqAnnotFree(sap);
984         } else {
985             ErrPostEx(SEV_WARNING,0,0,"NULL SeqAnnot from Non-NULL SeqAlign\n");
986         }
987     }
988 }
989 
990 /*********************************************************************
991 *
992 *	functions used for producing the Web browser
993 *
994 *********************************************************************/
find_seqid_for_bioseq(GatherContextPtr gcp)995 static Boolean find_seqid_for_bioseq(GatherContextPtr gcp)
996 {
997 	BioseqPtr bsp;
998 	ValNodePtr vnp;
999 
1000 	if(gcp->thistype != OBJ_BIOSEQ)
1001 		return FALSE;
1002 	bsp = (BioseqPtr)(gcp->thisitem);
1003 	if(bsp == NULL)
1004 		return FALSE;
1005 	vnp  = (ValNodePtr)(gcp->userdata);
1006 	vnp->choice = bsp->mol;
1007 	vnp->data.ptrvalue = bsp->id;
1008 	return TRUE;
1009 }
1010 
find_seqid_for_seqfeat(GatherContextPtr gcp)1011 static Boolean find_seqid_for_seqfeat(GatherContextPtr gcp)
1012 {
1013 	SeqFeatPtr sfp;
1014 	ValNodePtr vnp;
1015 	BioseqPtr bsp;
1016 
1017 	if(gcp->thistype != OBJ_SEQFEAT)
1018 		return FALSE;
1019 	sfp = (SeqFeatPtr)(gcp->thisitem);
1020 	if(sfp == NULL || sfp->product == NULL)
1021 		return FALSE;
1022 	vnp  = (ValNodePtr)(gcp->userdata);
1023 	vnp->choice = Seq_mol_aa;
1024 	bsp = BioseqFindCore(SeqLocId(sfp->product));
1025 	if(bsp != NULL)
1026 		vnp->data.ptrvalue = bsp->id;
1027 	else
1028 		vnp->data.ptrvalue = SeqLocId(sfp->product);
1029 	return TRUE;
1030 }
1031 
add_html_label(TextAlignBufPtr tdp)1032 static Boolean add_html_label(TextAlignBufPtr tdp)
1033 {
1034 	if(tdp == NULL)
1035 		return FALSE;
1036 	if(tdp->label == NULL)
1037 		return FALSE;
1038 	if(tdp->seqEntityID == 0)
1039 		return FALSE;
1040 	/*only for the coding region features and Bioseqs */
1041 	return ((tdp->feattype == SEQFEAT_CDREGION) ||(tdp->feattype == 0
1042 		&& tdp->bsp_itemID != 0));
1043 	/* return (tdp->feattype == 0); */
1044 }
1045 
get_seqid_for_textbuf(TextAlignBufPtr tdp,CharPtr HTML_db,CharPtr HTML_dopt)1046 static SeqIdPtr get_seqid_for_textbuf(TextAlignBufPtr tdp, CharPtr HTML_db,
1047                                       CharPtr HTML_dopt)
1048 {
1049 	ValNode vn;
1050 	SeqIdPtr sip;
1051 
1052 
1053 	if(!add_html_label(tdp))
1054 		return 0;
1055 
1056 	vn.choice = 0;
1057 	vn.data.ptrvalue = NULL;
1058 	if(tdp->feattype == SEQFEAT_CDREGION)
1059 		GatherItem(tdp->seqEntityID, tdp->itemID, OBJ_SEQFEAT, (Pointer)(&vn), find_seqid_for_seqfeat);
1060 	else
1061 		GatherItem(tdp->seqEntityID, tdp->bsp_itemID, OBJ_BIOSEQ, (Pointer)(&vn), find_seqid_for_bioseq);
1062 	sip = (SeqIdPtr)(vn.data.ptrvalue);
1063 	if(sip != NULL) {
1064             if(vn.choice == Seq_mol_aa) {
1065                 StringCpy(HTML_dopt, "GenPept");
1066                 StringCpy(HTML_db,   "Protein");
1067             } else {
1068                 StringCpy(HTML_dopt, "GenBank");
1069                 StringCpy(HTML_db, "Nucleotide");
1070             }
1071 	}
1072 	return sip;
1073 
1074 }
1075 
1076 
1077 /******************************************************************
1078 *
1079 *	DrawTextToBuffer(tdp_list, m_buf)
1080 *	write the text into a buffer instead of a FILE
1081 *	return the buffer
1082 *
1083 ******************************************************************/
DrawTextToBuffer(ValNodePtr tdp_list,CharPtr PNTR m_buf,Boolean is_html,Int4 label_size,Int4 num_size,Boolean compress,Int4Ptr PNTR matrix,Int4 stop_val,Int4 line_len,Boolean show_strand,Boolean strip_semicolon,SeqIdPtr * already_linked,Uint4 options)1084 static CharPtr DrawTextToBuffer(ValNodePtr tdp_list, CharPtr PNTR m_buf, Boolean is_html, Int4 label_size, Int4 num_size, Boolean compress, Int4Ptr PNTR matrix, Int4 stop_val, Int4 line_len, Boolean show_strand, Boolean strip_semicolon, SeqIdPtr *already_linked, Uint4 options)
1085 {
1086 	Boolean already_done;
1087 	TextAlignBufPtr tdp;
1088 	CharPtr docbuf = NULL;
1089 	Int2 i;
1090 	Int4 pos;
1091 	ValNodePtr curr;
1092 	Int4 max_len;	/*maximum length for each line*/
1093 	Int4 size;
1094 	CharPtr HTML_buffer;
1095 	CharPtr matrix_buf;
1096 	Char HTML_db[32], HTML_dopt[16];
1097 	Int4 html_len;
1098 	SeqIdPtr sip;
1099 	DbtagPtr db_tag;
1100 	ObjectIdPtr oip;
1101 	Boolean load;
1102 	Int4 num_empty, max_pos_val;
1103 	Uint1 res;
1104 	Char temp[21];
1105 	Boolean is_first;
1106 	SeqIdPtr seqid_var;
1107 	Int4 getSeqCheckboxLen=200;
1108 
1109 	if(tdp_list==NULL)
1110 		return NULL;
1111 	tdp = (TextAlignBufPtr) tdp_list->data.ptrvalue;
1112 	if(tdp->buf == NULL)
1113 		return NULL;
1114 	if(compress)
1115 	{
1116 		num_empty = label_size + 1 + num_size + 1;
1117 		if(show_strand)
1118 			num_empty += STRAND_SPACE;
1119 	}
1120 	else
1121 	{
1122 		label_size = B_SPACE;
1123 		num_size = POS_SPACE;
1124 		num_empty = B_SPACE + STRAND_SPACE + POS_SPACE + 2;
1125 	}
1126 	/* max_len = 150; */
1127 	max_len = line_len + num_empty + 20;
1128 	if(is_html) {
1129             Char buffer[1024];
1130             sprintf(buffer, "%s?cmd=Retrieve&db=&list_uids=&"
1131                     "dopt=GenPept", NEW_ENTREZ_HREF);
1132             html_len = StringLen(buffer);
1133             html_len += 1 + MAX_GI_NUM + 10 + 20 + MAX_DB_NUM;
1134             HTML_buffer = (CharPtr) MemNew((size_t)html_len * sizeof(Char));
1135 	}
1136 	size = 0;
1137 	max_pos_val = 12;
1138 	for(curr = tdp_list; curr !=NULL; curr = curr->next)
1139 	{
1140 	   tdp = (TextAlignBufPtr) curr->data.ptrvalue;
1141 	   if(tdp->exonCount > 0 && tdp->buf == NULL)	/*it is a codon*/
1142 		size += (3 * max_len);
1143 	   else
1144 	   {
1145 		if(is_html && add_html_label(tdp))
1146 			size += (max_len + html_len);
1147 		else
1148 			size += max_len;
1149 	   }
1150 	   if(tdp->matrix_val)
1151 		   size += max_len;
1152 	   if(options&TXALIGN_HTML&&(options&TXALIGN_MASTER)&&DbHasGi&&(options&TXALIGN_GET_SEQUENCE)){
1153 	     size+=getSeqCheckboxLen;
1154 	   }
1155 	}
1156 	if(size == 0)
1157 	{
1158 		if(is_html)
1159 			MemFree(HTML_buffer);
1160 		return NULL;
1161 	}
1162 	size += max_pos_val;
1163 
1164 	docbuf = (CharPtr) MemNew((size_t)(size) * sizeof(Char));
1165 	matrix_buf = (CharPtr) MemNew((size_t)max_len * sizeof (Char));
1166 
1167 	pos = 0;
1168 	is_first = TRUE;
1169 	while(tdp_list)
1170 	{
1171 	   tdp = (TextAlignBufPtr) tdp_list->data.ptrvalue;
1172 	   if(tdp->exonCount > 0 && tdp->buf == NULL)	/*it is a codon*/
1173 	   {
1174 		for(i =0; i<3; ++i)
1175 		{
1176 			if(i == tdp->frame)
1177 				pos+= print_label_to_buffer_all_ex(docbuf+pos, tdp->label, tdp->pos,
1178 					tdp->strand, tdp->extra_space, FALSE, label_size, num_size, show_strand, strip_semicolon);
1179 			else
1180 				pos+= print_label_to_buffer_all_ex(docbuf+pos, NULL, -1,
1181 					0, tdp->extra_space, FALSE, label_size, num_size, show_strand, strip_semicolon);
1182 			sprintf(docbuf+pos, "%s\n", tdp->codon[i]);
1183 			pos += (StringLen(tdp->codon[i]) +1);
1184 		}
1185 	    }
1186 	    if(tdp->exonCount == 0 && tdp->buf !=NULL)
1187 	    {
1188 		if(tdp->matrix_val)	/*print the matrix of the alignment*/
1189 		{
1190 			MemSet(matrix_buf, ' ', (size_t)max_len* sizeof(Char));
1191 			matrix_buf[max_len-1] = '\0';
1192 			size = StringLen(tdp->buf);
1193 			if(matrix) /*protein alignment*/
1194 			{
1195 				for(i = 0; i<size; ++i)
1196 				{
1197 					res = tdp->buf[i];
1198 					if(tdp->matrix_val[i] > 0)
1199 					{
1200                                             if(tdp->matrix_val[i] == matrix[res][res] || tdp->matrix_val[i] == INT2_MAX)
1201                                                 matrix_buf[i+num_empty] = res;
1202                                             else
1203                                                 matrix_buf[i+num_empty] = '+';
1204 					}
1205 				}
1206 			}
1207 			else /*DNA alignment*/
1208 			{
1209 				for(i = 0; i<size; ++i)
1210 					if(tdp->matrix_val[i] != 0)
1211 						matrix_buf[i+num_empty] = (Uint1)(tdp->matrix_val[i]);
1212 			}
1213 			matrix_buf[i+num_empty] = '\0';
1214 			sprintf(docbuf+pos, "%s\n", matrix_buf);
1215 			pos += (StringLen(matrix_buf) +1);
1216 		}
1217 		load = FALSE;
1218 		if(is_html)
1219 		  {
1220 			sip = get_seqid_for_textbuf(tdp, HTML_db, HTML_dopt);
1221 			while(!load && sip)
1222 			  {
1223 				if(sip->choice == SEQID_GI && sip->data.intvalue != 0)
1224 				{
1225 					seqid_var = *already_linked;
1226 					already_done = FALSE;
1227 					while (seqid_var)
1228 					{
1229 						if (SeqIdMatch(sip, seqid_var) == TRUE)
1230 						{
1231 							already_done = TRUE;
1232 							break;
1233 						}
1234 						seqid_var = seqid_var->next;
1235 					}
1236 					if (already_done) {
1237        sprintf(HTML_buffer,
1238                "<a href=%s?cmd=Retrieve&db=%s&list_uids=%08ld&dopt=%s %s>",
1239                NEW_ENTREZ_HREF, HTML_db, (long)sip->data.intvalue, HTML_dopt,
1240 	       options & TXALIGN_TARGET_IN_LINKS ? "TARGET=\"EntrezView\"" : "");
1241 					} else {
1242 
1243        sprintf(HTML_buffer, "<a name = %ld></a>"
1244                "<a href=%s?cmd=Retrieve&db=%s&list_uids=%08ld&dopt=%s %s>",
1245                (long) sip->data.intvalue, NEW_ENTREZ_HREF, HTML_db,
1246                (long)sip->data.intvalue, HTML_dopt,
1247 	       options & TXALIGN_TARGET_IN_LINKS ? "TARGET=\"EntrezView\"" : "");
1248 
1249        ValNodeAddInt(already_linked, SEQID_GI, sip->data.intvalue);
1250 					}
1251 					/*check box for getting sequence*/
1252 					if(options&TXALIGN_HTML&&options&TXALIGN_MASTER&&DbHasGi&&(options&TXALIGN_GET_SEQUENCE)){
1253 					  Char checkboxBuf[200];
1254 					  snprintf(checkboxBuf, 200, "<input type=\"checkbox\" name=\"getSeqGi\" value=\"%d\" onClick=\"synchronizeCheck(this.value, 'getSeqAlignment%d', 'getSeqGi', this.checked)\">", sip->data.intvalue, query_number_glb);
1255 					  snprintf(docbuf+pos, size-pos, "%s", checkboxBuf);
1256 
1257 					  pos += StringLen(checkboxBuf);
1258 					}
1259 
1260 					html_len = StringLen(HTML_buffer);
1261 					snprintf(docbuf+pos, size-pos, "%s", HTML_buffer);
1262 					pos += html_len;
1263 
1264 					pos += print_label_to_buffer_all_ex(docbuf+pos, tdp->label, tdp->pos,
1265 						tdp->strand, FALSE, TRUE, label_size, num_size, show_strand, strip_semicolon);
1266 					load = TRUE;
1267 				}
1268 				else if(sip->choice == SEQID_GENERAL)
1269 				{
1270                                    db_tag = (DbtagPtr) sip->data.ptrvalue;
1271                                    if(db_tag->db) {
1272                                       oip = db_tag->tag;
1273                                       if(oip->id != 0) {
1274                                          if (StringCmp(db_tag->db, "THC") == 0) {
1275                                             sprintf(HTML_buffer, "<a name = THC%ld></a><a href=\"http://www.tigr.org/docs/tigr-scripts/hgi_scripts/thc_report.spl?est=THC%ld&report_type=n\">", (long) oip->id, (long) oip->id);
1276 
1277                                             html_len = StringLen(HTML_buffer);
1278                                             snprintf(docbuf+pos, size-pos, "%s",
1279                                                      HTML_buffer);
1280                                             pos += html_len;
1281                                             pos += print_label_to_buffer_all_ex(docbuf+pos, tdp->label, tdp->pos,
1282 					    		tdp->strand, FALSE, TRUE, label_size, num_size, show_strand, strip_semicolon);
1283                                             load = TRUE;
1284                                          } else if (!StringICmp(db_tag->db, "TI")) {
1285                                             sprintf(HTML_buffer, "<a name = TI%ld></a><a href=\"http://www.ncbi.nlm.nih.gov/Traces/trace.cgi?cmd=retrieve&dopt=fasta&val=%ld\">", (long) oip->id, (long) oip->id);
1286 
1287                                             html_len = StringLen(HTML_buffer);
1288                                             snprintf(docbuf+pos, size-pos, "%s",
1289                                                      HTML_buffer);
1290                                             pos += html_len;
1291                                             pos += print_label_to_buffer_all_ex(docbuf+pos, tdp->label, tdp->pos,
1292 					    		tdp->strand, FALSE, TRUE, label_size, num_size, show_strand, strip_semicolon);
1293                                             load = TRUE;
1294                                          }
1295                                       }
1296                                    }
1297                                 }
1298 				sip = sip->next;
1299 			}
1300 		}
1301 
1302 		if(!load){
1303 		  if(options&TXALIGN_HTML&&options&TXALIGN_MASTER&&DbHasGi&&(options&TXALIGN_GET_SEQUENCE)){
1304 		    Char checkboxBuf[200];
1305 		    snprintf(checkboxBuf, 200, "<input type=\"checkbox\" name=\"getSeqMaster\" value=\"\" onClick=\"uncheckable('getSeqAlignment%d', 'getSeqMaster')\">", query_number_glb);
1306 		    snprintf(docbuf+pos, size-pos, "%s", checkboxBuf);
1307 
1308 		    pos += StringLen(checkboxBuf);
1309 		  }
1310 		  pos += print_label_to_buffer_all_ex(docbuf+pos, tdp->label, tdp->pos, tdp->strand, FALSE, FALSE, label_size, num_size, show_strand, strip_semicolon);
1311 		}
1312 		snprintf(docbuf+pos, size-pos, "%s", tdp->buf);
1313 		pos += StringLen(tdp->buf);
1314 		if(stop_val >=0 && is_first)
1315 		{
1316 			sprintf(temp, " %ld\n", (long) (stop_val+1));
1317 			sprintf(docbuf+pos, "%s", temp);
1318 			pos += StringLen(temp);
1319 		}
1320 		else
1321 		{
1322 			sprintf(docbuf+pos, "\n");
1323 			pos += 1;
1324 		}
1325 
1326 		if(m_buf != NULL && *m_buf == NULL)
1327 			*m_buf = StringSave(tdp->buf);
1328 
1329 	    }
1330 	    tdp_list = tdp_list->next;
1331 	    is_first = FALSE;
1332 	}
1333 
1334 	if(is_html)
1335 		MemFree(HTML_buffer);
1336 	MemFree(matrix_buf);
1337 	docbuf[pos] = '\0';
1338 	return docbuf;
1339 }
1340 
1341 
1342 
1343 /*************************************************************************
1344 *
1345 *	DrawTextList(tdp_list, fp)
1346 *	returns the tdbp->text of the first node. It is used as the master
1347 *	sequence to compare the mismatches
1348 *
1349 *************************************************************************/
1350 /* static CharPtr DrawTextList(ValNodePtr tdp_list, FILE *fp)
1351 {
1352 	TextAlignBufPtr tdp;
1353 	CharPtr m_buf = NULL;
1354 	Int2 i;
1355 
1356 	if(tdp_list==NULL)
1357 		return NULL;
1358 
1359 	while(tdp_list)
1360 	{
1361 	   tdp = tdp_list->data.ptrvalue;
1362 	   if(tdp->exonCount > 0 && tdp->buf == NULL)
1363 	   {
1364 		for(i =0; i<3; ++i)
1365 		{
1366 			if(i == tdp->frame)
1367 				print_label(fp, tdp->label, tdp->pos, tdp->strand, tdp->extra_space);
1368 			else
1369 				print_label(fp, NULL, -1, 0, tdp->extra_space);
1370 			fprintf(fp, "%s\n", tdp->codon[i]);
1371 		}
1372 	    }
1373 	    if(tdp->exonCount == 0 && tdp->buf !=NULL)
1374 	    {
1375 		print_label(fp, tdp->label, tdp->pos, tdp->strand, FALSE);
1376 		fprintf(fp, "%s\n", tdp->buf);
1377 		if(m_buf == NULL)
1378 			m_buf = tdp->buf;
1379 	    }
1380 	    tdp_list = tdp_list->next;
1381 	}
1382 
1383 	return m_buf;
1384 } */
1385 
make_scale_bar_str(CharPtr PNTR bar,CharPtr PNTR num_str,Int4 num_empty,Int4 line_len)1386 NLM_EXTERN Boolean make_scale_bar_str(CharPtr PNTR bar, CharPtr PNTR num_str,
1387 		Int4 num_empty, Int4 line_len)
1388 {
1389 	Int4 i, j;
1390 	CharPtr curr;
1391 	Char temp[100];
1392 	Int4 len;
1393 
1394 	if(bar == NULL || num_str == NULL)
1395 		return FALSE;
1396 	*bar = (CharPtr) MemNew((size_t)(line_len+num_empty+2) * sizeof(Char));
1397 	*num_str = (CharPtr) MemNew((size_t)(line_len+num_empty+2) * sizeof(Char));
1398 	make_empty(*bar, (Int2)(line_len+num_empty));
1399 	make_empty(*num_str, (Int2)(line_len+num_empty));
1400 	for(i =0; i<line_len; ++i)
1401 	{
1402 		curr = *bar;
1403 		if((i+1)%5 ==0)
1404 			curr[i+num_empty]= '|';
1405 		curr = *num_str;
1406 		if((i+1)%10==0)
1407 		{
1408 			sprintf(temp, "%ld", (long) (i+1));
1409 			len = StringLen(temp);
1410 			for(j = 0; j<len; ++j)
1411 				curr[i+num_empty-(len-1-j)] = temp[j];
1412 		}
1413 
1414 	}
1415 	return TRUE;
1416 }
1417 
get_master_align_node(ValNodePtr anp_list)1418 static AlignNodePtr get_master_align_node(ValNodePtr anp_list)
1419 {
1420 	AlignNodePtr anp, first_anp = NULL;
1421 
1422 	while(anp_list)
1423 	{
1424 		if(anp_list->choice != OBJ_SEQANNOT)
1425 		{
1426 			anp = (AlignNodePtr) anp_list->data.ptrvalue;
1427 			if(anp->is_master)
1428 				return anp;
1429 			else if(first_anp == NULL)
1430 				first_anp = anp;
1431 		}
1432 		anp_list = anp_list->next;
1433 	}
1434 	return first_anp;
1435 }
1436 
1437 
1438 /**********************************************************************
1439 *
1440 *	figure out the DNA-protein alignment, the frame of translation
1441 *	in the DNA sequence compared with the protein sequence
1442 *	m_len is used to figure out the reading frame of the minus
1443 *	strand translation
1444 *
1445 **********************************************************************/
load_master_translate_frame(ValNodePtr anp_list,Int4 m_len,BioseqPtr m_bsp)1446 static Boolean load_master_translate_frame(ValNodePtr anp_list, Int4 m_len, BioseqPtr m_bsp)
1447 {
1448 	AlignNodePtr anp;
1449 	AlignSegPtr asp;
1450 	Int4 g_left, g_right;
1451 	Int4 start_pos;
1452 	Uint1 strand;
1453 	Int4 val;
1454 	AnnotInfoPtr annot_info;
1455 	Uint1 align_type;
1456 	AlignNodePtr master_anp;
1457 	Int4 offset;
1458 	Boolean found;
1459 
1460 
1461 	master_anp = get_master_align_node(anp_list);
1462 	if(master_anp == NULL)
1463 		return FALSE;
1464 
1465 	while(anp_list)
1466 	{
1467 		align_type = 0;
1468 		annot_info = NULL;
1469 		while(anp_list != NULL)
1470 		{
1471 			if(anp_list->choice == OBJ_SEQANNOT)
1472 			{
1473 				annot_info = (AnnotInfoPtr) anp_list->data.ptrvalue;
1474 				align_type = get_alignment_type(annot_info);
1475 			}
1476 			else
1477 			{
1478 				if(annot_info == NULL)
1479 					break;
1480 				else
1481 				{
1482 					if(align_type != 0 || annot_info->annot_type == ANNOT_BLAST)
1483 						break;
1484 				}
1485 			}
1486 			anp_list = anp_list->next;
1487 		}
1488 
1489 		while(anp_list != NULL && anp_list->choice != OBJ_SEQANNOT)
1490 		{
1491 			if(align_type == ALIGN_DNA_TO_PROT || align_type == ALIGN_TDNA_TO_TDNA)
1492 			{
1493 				anp = (AlignNodePtr) anp_list->data.ptrvalue;
1494 				/*for tblastx, need to figure out the translation frame of the master,
1495 				   for blastx (reverted to tblastn) and tblastn, need to figure out
1496 				   the frame for the query
1497 				*/
1498 				if((align_type == ALIGN_TDNA_TO_TDNA && BioseqMatch(m_bsp, anp->sip))||
1499 					(!(anp->is_master) && !BioseqMatch(m_bsp, anp->sip)))
1500 				{
1501 					g_left = anp->extremes.left;
1502 					g_right = anp->extremes.right;
1503 					if(align_type == ALIGN_TDNA_TO_TDNA)
1504 					{
1505 						strand = anp->extremes.strand;
1506 						offset = 0;
1507 						found = TRUE;
1508 					}
1509 					else
1510 					{
1511 						strand = Seq_strand_plus;
1512 						if(anp->extremes.strand != master_anp->extremes.strand)
1513 						{
1514 							if(anp->extremes.strand == Seq_strand_minus ||
1515 								master_anp->extremes.strand == Seq_strand_minus)
1516 									strand = Seq_strand_minus;
1517 						}
1518 						if(anp->extremes.strand == Seq_strand_minus)
1519 							g_left = g_right;
1520 
1521 						offset = 0;
1522 						found = FALSE;
1523 						for(asp = master_anp->segs; asp != NULL; asp = asp->next)
1524 						{
1525 							if(asp->type != GAP_SEG)
1526 							{
1527 								if(asp->type == INS_SEG)
1528 								{
1529 									if(offset > 0)
1530 										offset += asp->gr.right;
1531 								}
1532 								else
1533 								{
1534 									if(asp->gr.right < g_left)
1535 										offset += (asp->gr.right - asp->gr.left + 1);
1536 									else
1537 									{
1538 										if(g_left >= asp->gr.left && g_left <= asp->gr.right)
1539 										{
1540 											offset += MAX(0, g_left - asp->gr.left);
1541 											found = TRUE;
1542 											break;
1543 										}
1544 									}
1545 								}
1546 							}
1547 						}
1548 					}
1549 
1550 					if(found)
1551 					{
1552 						start_pos = ABS(master_anp->seqpos + offset);
1553 						if(strand == Seq_strand_minus)
1554 						{
1555 							val = (m_len -1 - start_pos)%3L;
1556 						}
1557 						else
1558 						{
1559 							val = start_pos%3L;
1560 						}
1561 
1562 						switch(val)
1563 						{
1564 							case 0:
1565 								if(strand == Seq_strand_minus)
1566 									anp->m_frame = 4;
1567 								else
1568 									anp->m_frame = 1;
1569 								break;
1570 							case 1:
1571 								if(strand == Seq_strand_minus)
1572 									anp->m_frame = 5;
1573 								else
1574 									anp->m_frame = 2;
1575 								break;
1576 							case 2:
1577 								if(strand == Seq_strand_minus)
1578 									anp->m_frame = 6;
1579 								else
1580 									anp->m_frame = 3;
1581 								break;
1582 							default:
1583 								break;
1584 						}
1585 					}
1586 				}
1587 			}
1588 			anp_list = anp_list->next;
1589 		}
1590 	}
1591 	return TRUE;
1592 }
1593 
1594 
1595 /**********************************************************************
1596 *
1597 *	figure out in the current range (m_left, m_right), the total number
1598 *	of reading frames that the hit proteins have
1599 *	return the AlignNode for the master sequence (master is always the
1600 *	DNA sequence
1601 *
1602 ***********************************************************************/
get_current_master_frame(ValNodePtr list,Int4 m_left,Int4 m_right,Uint1Ptr all_frame)1603 static Boolean get_current_master_frame(ValNodePtr list, Int4 m_left, Int4 m_right, Uint1Ptr all_frame)
1604 {
1605 	ValNodePtr anp_list;
1606 	AlignNodePtr anp;
1607 	Int4 g_left, g_right;
1608 	Uint1 i;
1609 	Boolean retval;
1610 
1611 	MemSet((Pointer)all_frame, 0, (size_t)6 * sizeof(Uint1));
1612 	while(list)
1613 	{
1614 		anp_list = (ValNodePtr) list->data.ptrvalue;
1615 		anp = (AlignNodePtr) anp_list->data.ptrvalue;
1616 		if(!(anp->is_master))
1617 		{
1618 			g_left = anp->extremes.left;
1619 			g_right = anp->extremes.right;
1620 			if(!(g_left > m_right || g_right < m_left))
1621 			{
1622 				if(anp->m_frame != 0)
1623 				{
1624 					for(i = 0; i<6; ++i)
1625 					{
1626 						if(all_frame[i] == anp->m_frame)
1627 							break;
1628 						else if(all_frame[i] == 0)
1629 						{
1630 							all_frame[i] = anp->m_frame;
1631 							break;
1632 						}
1633 					}
1634 					retval = TRUE;
1635 				}
1636 			}
1637 		}
1638 
1639 		list = list->next;
1640 	}
1641 
1642 	return retval;
1643 }
1644 
make_fake_cds(BioseqPtr m_bsp,Int4 start,Int4 stop,Uint1 strand)1645 NLM_EXTERN SeqFeatPtr make_fake_cds(BioseqPtr m_bsp, Int4 start, Int4 stop, Uint1 strand)
1646 {
1647 	SeqFeatPtr sfp;
1648 	CdRegionPtr crp;
1649 	IntFuzzPtr ifp_from, ifp_to;
1650 	Uint1 g_code = 0;
1651 	SeqDescrPtr descr;
1652 	SeqIntPtr seq_int;
1653 	SeqLocPtr slp;
1654 	BioSourcePtr source;
1655 	OrgRefPtr org;
1656 	OrgNamePtr orgname;
1657 	ValNodePtr vnp;
1658 
1659 	descr = m_bsp->descr;
1660 	while(descr)
1661 	{
1662 		/*look into BioSource to get the genetic code*/
1663 		if(descr->choice == Seq_descr_source)
1664 		{
1665 			source = (BioSourcePtr) descr->data.ptrvalue;
1666 			if(source != NULL)
1667 			{
1668 				org = source->org;
1669 				if(org != NULL)
1670 				{
1671 					orgname = org->orgname;
1672 					if(orgname != NULL)
1673 					{
1674 						g_code = orgname->gcode;
1675 						break;
1676 					}
1677 				}
1678 			}
1679 		}
1680 		descr = descr->next;
1681 	}
1682 
1683 	crp = CdRegionNew();
1684 	if(g_code != 0)
1685 	{
1686 		vnp = ValNodeNew(NULL);
1687 		vnp->choice = 2;
1688 		vnp->data.intvalue = (Int4)g_code;
1689 		ValNodeAddPointer(&(crp->genetic_code), 254, (Pointer)vnp);
1690 	}
1691 
1692 	sfp = SeqFeatNew();
1693 	sfp->data.choice = 3;
1694 	sfp->data.value.ptrvalue = crp;
1695 	sfp->partial = TRUE;
1696 	sfp->product = NULL;
1697 	slp = SeqLocIntNew(start, stop, strand, m_bsp->id);
1698 	seq_int = (SeqIntPtr) slp->data.ptrvalue;
1699 	ifp_from = IntFuzzNew();
1700 	ifp_from->choice = 4;
1701 	ifp_from->a = 2;
1702 	seq_int->if_from = ifp_from;
1703 	ifp_to = IntFuzzNew();
1704 	ifp_to->choice = 4;
1705 	ifp_to->a = 1;
1706 	seq_int->if_to = ifp_to;
1707 	sfp->location = slp;
1708 
1709 
1710 	return sfp;
1711 }
1712 
1713 
translate_faked_cds(SeqFeatPtr fake_cds,Uint1 frame,Int4 c_start,Int4 c_stop,Int4 master_len,AlignNodePtr anp)1714 static CharPtr translate_faked_cds(SeqFeatPtr fake_cds, Uint1 frame, Int4 c_start, Int4 c_stop, Int4 master_len, AlignNodePtr anp)
1715 {
1716 	Uint1 c_frame;
1717 	SeqLocPtr slp, t_slp;
1718 	SeqIntPtr sint;
1719 	Uint1 strand;
1720 	CdRegionPtr crp;
1721 	CharPtr buf;
1722 	Int4 from, to;
1723 	Int4 n;
1724 	AlignSegPtr asp;
1725 	Int4 m_start, m_stop;
1726 	Int4 t_start, t_stop;
1727 	Int4 l_pos = 0;
1728 	Int4 offset;
1729 
1730 
1731 	buf = (CharPtr) MemNew((size_t)(c_stop - c_start+2) * sizeof(Char));
1732 	buf[0] = '\0';
1733 	offset = 0;
1734 	for(asp = anp->segs; asp != NULL; asp = asp->next)
1735 	{
1736 		if(!((asp->gr.left > c_stop) || ( asp->gr.right < c_start)))
1737 		{
1738 			t_start = MAX(asp->gr.left, c_start);
1739 			t_stop = MIN(asp->gr.right, c_stop);
1740 			m_start = ABS(anp->seqpos + t_start - offset - anp->extremes.left);
1741 			m_stop = ABS(anp->seqpos + t_stop - offset - anp->extremes.left);
1742 			if(asp->type == GAP_SEG)
1743 			{
1744 				MemSet(buf+l_pos, ' ', (size_t)(t_stop - t_start + 1) * sizeof (Char));
1745 				l_pos += t_stop - t_start + 1;
1746 				buf[l_pos] = '\0';
1747 			}
1748 			else if(asp->type != INS_SEG)
1749 			{
1750 				if(frame > 3)
1751 				{
1752 					strand = Seq_strand_minus;
1753 					n = MAX(0, (master_len-1 - m_stop)/3 -1);
1754 					from = m_start;
1755 					to = master_len -1 - n*3;
1756 					c_frame = frame -3;
1757 					from = MAX(0, from -3);
1758 				}
1759 				else
1760 				{
1761 					strand = Seq_strand_plus;
1762 					n = MAX(0, (m_start/3-1));
1763 					from = n * 3;
1764 					to = m_stop;
1765 					c_frame = frame;
1766 					to = MIN(master_len-1, to + 3);
1767 				}
1768 				slp = fake_cds->location;
1769 				sint = (SeqIntPtr) slp->data.ptrvalue;
1770 				sint->from = from;
1771 				sint->to = to;
1772 				sint->strand = strand;
1773 				crp = (CdRegionPtr) fake_cds->data.value.ptrvalue;
1774 				crp->frame = c_frame;
1775 
1776 				t_slp = SeqLocIntNew(m_start, m_stop, Seq_strand_plus, SeqLocId(slp));
1777 				print_protein_for_cds(fake_cds, buf+l_pos, t_slp, TRUE);
1778 				l_pos += t_stop - t_start + 1;
1779 				SeqLocFree(t_slp);
1780 			}
1781 		}
1782 		else if(asp->gr.left > c_stop)
1783 			break;
1784 		if(asp->type == GAP_SEG)
1785 			offset += (asp->gr.right - asp->gr.left +1 );
1786 	}
1787 	return buf;
1788 }
1789 
load_fake_protein_buf(CharPtr buf,Uint1 frame,AlignNodePtr master_anp)1790 static ValNodePtr load_fake_protein_buf(CharPtr buf, Uint1 frame, AlignNodePtr master_anp)
1791 {
1792 	Char temp[20];
1793 	ValNodePtr head = NULL;
1794 	TextAlignBufPtr tdp;
1795 
1796 	tdp = (TextAlignBufPtr) MemNew(sizeof(TextAlignBuf));
1797 	tdp->pos = -1;
1798 	if(frame >3)
1799 	{
1800 		tdp->strand = Seq_strand_minus;
1801 		sprintf(temp, "frame=-%d", frame-3);
1802 	}
1803 	else
1804 	{
1805 		tdp->strand = Seq_strand_plus;
1806 		sprintf(temp, "frame=+%d", frame);
1807 	}
1808 	tdp->label = StringSave(temp);
1809 	tdp->buf = StringSave(buf);
1810 	tdp->itemID = 0;
1811 	tdp->feattype = 0;
1812 	tdp->subtype = 0;
1813 	tdp->entityID = master_anp->entityID;
1814 	tdp->seqEntityID = master_anp->seq_entityID;
1815 	tdp->bsp_itemID = master_anp->bsp_itemID;
1816 	ValNodeAddPointer(&head, 0, tdp);
1817 	return head;
1818 }
1819 
has_hit_in_region(Uint1Ptr all_frame)1820 static Boolean has_hit_in_region(Uint1Ptr all_frame)
1821 {
1822 	Uint1 i;
1823 
1824 	for(i = 0; i<6; ++i)
1825 		if(all_frame[i] != 0)
1826 			return TRUE;
1827 	return FALSE;
1828 }
1829 
1830 
check_bsp_id(ValNodePtr PNTR id_list,SeqIdPtr sip)1831 static Boolean check_bsp_id(ValNodePtr PNTR id_list, SeqIdPtr sip)
1832 {
1833 	ValNodePtr curr;
1834 
1835 	if(*id_list == NULL)
1836 	{
1837 		ValNodeAddPointer(id_list, 0, sip);
1838 		return FALSE;
1839 	}
1840 	curr = *id_list;
1841 	while(curr)
1842 	{
1843 		if(curr->data.ptrvalue == sip ||
1844 			SeqIdMatch((SeqIdPtr)(curr->data.ptrvalue), sip))
1845 			return TRUE;
1846 		curr = curr->next;
1847 	}
1848 
1849 
1850 	ValNodeAddPointer(id_list, 0, sip);
1851 	return FALSE;
1852 }
1853 
find_align_proc(GatherContextPtr gcp)1854 static Boolean find_align_proc(GatherContextPtr gcp)
1855 {
1856 	SeqAlignPtr PNTR p_align;
1857 
1858 	p_align = (SeqAlignPtr PNTR)(gcp->userdata);
1859 	*p_align = (SeqAlignPtr)gcp->thisitem;
1860 	return TRUE;
1861 }
1862 
1863 
1864 /*functions related to load the alignment summary, such as the number of
1865 identical, positive residues, # of gaps, to the structure
1866 */
1867 
load_align_sum_for_DenseDiag(DenseDiagPtr ddp,AlignSumPtr asp)1868 static Boolean load_align_sum_for_DenseDiag(DenseDiagPtr ddp, AlignSumPtr asp)
1869 {
1870     SeqInt si;
1871     SeqLoc sl;
1872     Int4 i;
1873     Int2 m_order, t_order;	/*order of the master and the target sequence*/
1874     Uint1 m_res, t_res;
1875     SeqIdPtr sip;
1876     SeqPortPtr m_spp, t_spp;
1877     Int2 dim;
1878     SeqPortPtr spp;
1879 
1880     if(ddp == NULL || asp == NULL)
1881         return FALSE;
1882     m_order = -1;
1883     t_order = -1;
1884     dim = 0;
1885     for(i = 0, sip = ddp->id; sip != NULL; sip = sip->next, ++i) {
1886         if(SeqIdMatch(sip, asp->master_sip) && m_order == -1)
1887             m_order = i;
1888         else if(SeqIdMatch(sip, asp->target_sip) && t_order == -1)
1889             t_order = i;
1890         ++dim;
1891     }
1892 
1893     if(m_order == -1 || t_order == -1)
1894         return FALSE;
1895 
1896     asp->m_frame_set = FALSE;
1897     asp->t_frame_set = FALSE;
1898 
1899     for(i = 0; i<dim; ++i) {
1900         if(i == m_order || i == t_order) {
1901 
1902             if(i == m_order)
1903                 si.id = asp->master_sip;
1904             else
1905                 si.id = asp->target_sip;
1906             si.from = ddp->starts[i];
1907             si.to = si.from + ddp->len -1;
1908             if(ddp->strands != NULL)
1909                 si.strand = ddp->strands[i];
1910             else
1911                 si.strand = 0;
1912 
1913 
1914             if (asp->is_aa) {
1915                 asp->m_strand = Seq_strand_unknown;
1916                 asp->t_strand = Seq_strand_unknown;
1917             } else {
1918                 if(i == m_order) {
1919                     asp->m_strand = si.strand;
1920                 } else {
1921                     asp->t_strand = si.strand;
1922                 }
1923             }
1924 
1925             sl.choice = SEQLOC_INT;
1926             sl.data.ptrvalue = &si;
1927 
1928             spp = SeqPortNewByLoc(&sl, (asp->is_aa) ? Seq_code_ncbieaa : Seq_code_iupacna);
1929             if(i == m_order) {
1930                 asp->master_from = si.from;
1931                 asp->master_to = si.to;
1932                 m_spp = spp;
1933             } else {
1934                 asp->target_from = si.from;
1935                 asp->target_to = si.to;
1936                 t_spp = spp;
1937             }
1938         }
1939     }
1940 
1941     if(m_spp == NULL || t_spp == NULL) {
1942         if(m_spp == NULL)
1943             SeqPortFree(m_spp);
1944         if(t_spp != NULL)
1945             SeqPortFree(t_spp);
1946         return FALSE;
1947     }
1948 
1949     for(i = 0; i<ddp->len; ++i) {
1950         m_res = SeqPortGetResidue(m_spp);
1951         t_res = SeqPortGetResidue(t_spp);
1952         if(m_res == t_res)
1953            ++(asp->identical);
1954         else if ((asp->matrix != NULL && asp->is_aa) &&
1955                  (IS_residue(m_res) && IS_residue(t_res)) &&
1956                  (asp->matrix[m_res][t_res] >0))
1957            ++(asp->positive);
1958     }
1959     asp->totlen = ddp->len;
1960 
1961     SeqPortFree(m_spp);
1962     SeqPortFree(t_spp);
1963     return TRUE;
1964 }
1965 
1966 
load_align_sum_for_DenseSeg(DenseSegPtr dsp,AlignSumPtr asp)1967 static Boolean load_align_sum_for_DenseSeg(DenseSegPtr dsp, AlignSumPtr asp)
1968 {
1969     SeqInt msi, tsi;
1970     SeqIntPtr sint;
1971     SeqLoc sl;
1972     Int2 i, k;
1973     Int2 dim;
1974     Int2 m_order, t_order;	/*order of the master and the target sequence*/
1975     Int4 index;
1976     Int4 j, val, t_val;
1977     Uint1 m_res, t_res, stdaa_res;
1978     SeqIdPtr sip;
1979     SeqPortPtr m_spp, t_spp;
1980     SeqMapTablePtr smtp;
1981 
1982 
1983     if(dsp == NULL || asp == NULL)
1984         return FALSE;
1985 
1986     if(asp->posMatrix != NULL) {
1987         if((smtp = SeqMapTableFindObj(Seq_code_ncbistdaa,
1988                                       Seq_code_ncbieaa)) == NULL)
1989             return FALSE;
1990     }
1991 
1992     m_order = -1;
1993     t_order = -1;
1994     dim = 0;
1995     for(i = 0, sip = dsp->ids; sip != NULL; sip = sip->next, ++i) {
1996         if(SeqIdMatch(sip, asp->master_sip) && m_order == -1)
1997             m_order = i;
1998         else if(SeqIdMatch(sip, asp->target_sip) && t_order == -1)
1999             t_order = i;
2000         ++dim;
2001     }
2002 
2003     if(m_order == -1 || t_order == -1)
2004         return FALSE;
2005 
2006     msi.id = asp->master_sip;
2007     msi.from = -1;
2008     msi.to = -1;
2009     msi.strand = (dsp->strands == NULL) ? 0 : dsp->strands[m_order];
2010 
2011     tsi.id = asp->target_sip;
2012     tsi.from = -1;
2013     tsi.to = -1;
2014     tsi.strand = (dsp->strands == NULL) ? 0 : dsp->strands[t_order];
2015 
2016     for(i = 0; i<dsp->numseg; ++i) {
2017         for(k = 0; k<dim; ++k) {
2018             val = dsp->starts[i*dim + k];
2019             if(val != -1 && (k == m_order || k == t_order)) {
2020                 sint = (k == m_order) ? (&msi) : (&tsi);
2021                 if(sint->from == -1 || sint->from > val)
2022                     sint->from = val;
2023                 if(sint->to == -1 || sint->to < (val + dsp->lens[i] -1))
2024                     sint->to = val + dsp->lens[i] -1;
2025             }
2026         }
2027     }
2028 
2029     asp->master_from = msi.from;
2030     asp->master_to = msi.to;
2031     asp->target_from = tsi.from;
2032     asp->target_to = tsi.to;
2033 
2034     if (asp->is_aa) {
2035         asp->m_strand = Seq_strand_unknown;
2036         asp->t_strand = Seq_strand_unknown;
2037     } else {
2038         asp->m_strand = dsp->strands[m_order];
2039         asp->t_strand = dsp->strands[t_order];
2040     }
2041     asp->m_frame_set = FALSE;
2042     asp->t_frame_set = FALSE;
2043 
2044     sl.choice = SEQLOC_INT;
2045     sl.data.ptrvalue = &msi;
2046     m_spp = SeqPortNewByLoc(&sl, (asp->is_aa) ? Seq_code_ncbieaa : Seq_code_iupacna);
2047 
2048     sl.choice = SEQLOC_INT;
2049     sl.data.ptrvalue = &tsi;
2050     t_spp = SeqPortNewByLoc(&sl, (asp->is_aa) ? Seq_code_ncbieaa : Seq_code_iupacna);
2051 
2052     for(i = 0; i<dsp->numseg; ++i) {
2053         val = dsp->starts[i*dim + m_order];
2054         t_val = dsp->starts[i*dim + t_order];
2055         if(val == -1 || t_val == -1) {
2056             asp->gaps += dsp->lens[i];
2057             if(val != -1) {
2058                 index = dsp->lens[i];
2059                 while (index > 0) {
2060                     index--;
2061                     m_res = SeqPortGetResidue(m_spp);
2062                 }
2063             }
2064             if(t_val != -1) {
2065                 index = dsp->lens[i];
2066                 while (index > 0) {
2067                     index--;
2068                     t_res = SeqPortGetResidue(t_spp);
2069                 }
2070             }
2071         } else {
2072             for(j = 0; j<dsp->lens[i]; ++j) {
2073                 m_res = SeqPortGetResidue(m_spp);
2074                 t_res = SeqPortGetResidue(t_spp);
2075                 if(m_res == t_res)
2076                    ++(asp->identical);
2077                 else if ((asp->matrix != NULL && asp->is_aa) &&
2078                          (IS_residue(m_res) && IS_residue(t_res))) {
2079                    if(asp->posMatrix != NULL) {
2080                       stdaa_res = SeqMapTableConvert(smtp, t_res);
2081                       if(asp->posMatrix[val+j][stdaa_res] > 0)
2082                          ++(asp->positive);
2083                    } else {
2084                       if(asp->matrix[m_res][t_res] >0)
2085                          ++(asp->positive);
2086                    }
2087                 }
2088             }
2089         }
2090         asp->totlen += dsp->lens[i];
2091     }
2092     SeqPortFree(m_spp);
2093     SeqPortFree(t_spp);
2094     return TRUE;
2095 }
2096 
2097 /*
2098 	Obtains the genetic code from a BioseqPtr, assuming that a fetch function
2099 	has been enabled.
2100 */
2101 NLM_EXTERN CharPtr
GetGeneticCodeFromSeqId(SeqIdPtr sip)2102 GetGeneticCodeFromSeqId (SeqIdPtr sip)
2103 
2104 {
2105 	BioseqPtr bsp;
2106 	BioSourcePtr source;
2107 	CharPtr genetic_code=NULL;
2108 	GeneticCodePtr gcp;
2109 	Int4 gen_code_val=1;	/* std genetic code if nothing found. */
2110 	ValNodePtr vnp;
2111 
2112 
2113 	bsp = BioseqLockById(sip);
2114 
2115 	if (bsp)
2116 	{
2117 		vnp = BioseqGetSeqDescr(bsp, Seq_descr_source, NULL);
2118 		if (vnp)
2119 		{
2120 			source = (BioSourcePtr) vnp->data.ptrvalue;
2121 			gen_code_val = source->org->orgname->gcode;
2122 		}
2123 		BioseqUnlock(bsp);
2124 	}
2125 
2126 	gcp = GeneticCodeFind(gen_code_val, NULL);
2127 	for (vnp = (ValNodePtr)gcp->data.ptrvalue; vnp != NULL; vnp = vnp->next)
2128 	{
2129 		if (vnp->choice == 3)   /* ncbieaa */
2130 		{
2131 			genetic_code = (CharPtr)vnp->data.ptrvalue;
2132 			break;
2133 		}
2134 	}
2135 
2136 	return genetic_code;
2137 }
OOFTranslateDNAInAllFrames(Uint1Ptr dna,Int4 length,SeqIdPtr query_id)2138 NLM_EXTERN CharPtr OOFTranslateDNAInAllFrames(Uint1Ptr dna, Int4 length,
2139                                               SeqIdPtr query_id)
2140 {
2141     CharPtr dnap;
2142     CharPtr codes;
2143     Int4 i;
2144     Uint1 codon[3];
2145 
2146     if(dna == NULL || length == 0)
2147         return NULL;
2148 
2149     dnap = (CharPtr) MemNew(length+1);
2150     codes = GetGeneticCodeFromSeqId(query_id);
2151 
2152     dnap[0] = dnap[1] = dnap[2] = 0;
2153 
2154     for (i = 2; i < length; i++) {
2155         codon[0] = dna[i-2];
2156         codon[1] = dna[i-1];
2157         codon[2] = dna[i];
2158         dnap[i+1] = AAForCodon(codon, codes);
2159     }
2160     return dnap;
2161 }
2162 
2163 NLM_EXTERN Uint1 AAForCodon (Uint1Ptr codon, CharPtr codes);
2164 
load_align_sum_for_StdSeg(StdSegPtr ssp,AlignSumPtr asp)2165 static Boolean load_align_sum_for_StdSeg(StdSegPtr ssp, AlignSumPtr asp)
2166 {
2167     Boolean master_is_translated=FALSE, both_translated=FALSE;
2168     Boolean target_is_translated = FALSE;
2169     BioseqPtr bsp;
2170     CharPtr genetic_code1, genetic_code2;
2171     SeqPortPtr spp1, spp2;
2172     Uint1 codon[4], residue1, residue2;
2173     Boolean ungapped_align = FALSE;
2174     StdSegPtr ssp_last;
2175 
2176     if(ssp == NULL || asp == NULL)
2177         return FALSE;
2178 
2179     if(asp->ooframe) {
2180        if (SeqLocStrand(ssp->loc) != Seq_strand_unknown) {
2181           master_is_translated = TRUE;
2182           target_is_translated = FALSE;
2183        } else {
2184           master_is_translated = FALSE;
2185           target_is_translated = TRUE;
2186        }
2187     } else {
2188         /* Check for valid sequence. */
2189         if (SeqLocLen(ssp->loc) == 3*SeqLocLen(ssp->loc->next))
2190             master_is_translated = TRUE;
2191         else if (3*SeqLocLen(ssp->loc) == SeqLocLen(ssp->loc->next))
2192             target_is_translated = TRUE;
2193         else if (SeqLocLen(ssp->loc) == SeqLocLen(ssp->loc->next))
2194             both_translated = TRUE;
2195         else
2196             return FALSE;
2197     }
2198 
2199     if (master_is_translated) {
2200         genetic_code1 = GetGeneticCodeFromSeqId(ssp->ids);
2201     } else if (both_translated) {
2202         genetic_code1 = GetGeneticCodeFromSeqId(ssp->ids);
2203         genetic_code2 = GetGeneticCodeFromSeqId(ssp->ids->next);
2204     } else {
2205         genetic_code1 = GetGeneticCodeFromSeqId(ssp->ids->next);
2206     }
2207 
2208     asp->m_frame_set = FALSE;
2209     asp->t_frame_set = FALSE;
2210 
2211     if (master_is_translated || both_translated) {
2212         asp->m_strand = SeqLocStrand(ssp->loc);
2213         asp->m_frame = SeqLocStart(ssp->loc);
2214         if (SeqLocStrand(ssp->loc) == Seq_strand_minus) {
2215             bsp = BioseqLockById(SeqLocId(ssp->loc));
2216             asp->m_frame += SeqLocLen(ssp->loc);
2217             asp->m_frame = -(1+(bsp->length - asp->m_frame)%3);
2218             asp->m_frame_set = TRUE;
2219             BioseqUnlock(bsp);
2220         } else {
2221             asp->m_frame = (1+(asp->m_frame)%3);
2222             asp->m_frame_set = TRUE;
2223         }
2224     }
2225 
2226     if (!master_is_translated || both_translated) {
2227         asp->t_strand = SeqLocStrand(ssp->loc->next);
2228         asp->t_frame = SeqLocStart(ssp->loc->next);
2229         if (SeqLocStrand(ssp->loc->next) == Seq_strand_minus) {
2230             if (bsp = BioseqLockById(SeqLocId(ssp->loc->next))) {
2231                 asp->t_frame += SeqLocLen(ssp->loc->next);
2232                 asp->t_frame = -(1+(bsp->length - asp->t_frame)%3);
2233                 asp->t_frame_set = TRUE;
2234                 BioseqUnlock(bsp);
2235             } else {
2236                 return FALSE;
2237             }
2238         } else {
2239             asp->t_frame = (1+(asp->t_frame)%3);
2240             asp->t_frame_set = TRUE;
2241         }
2242     }
2243 
2244     if (SeqLocStrand(ssp->loc) == Seq_strand_minus) {
2245         asp->master_from = SeqLocStop(ssp->loc);
2246     } else {
2247         asp->master_from = SeqLocStart(ssp->loc);
2248     }
2249 
2250     if (SeqLocStrand(ssp->loc->next) == Seq_strand_minus) {
2251         asp->target_from = SeqLocStop(ssp->loc->next);
2252     } else {
2253         asp->target_from = SeqLocStart(ssp->loc->next);
2254     }
2255 
2256 
2257     while (ssp) {
2258         if (ssp->loc->choice != SEQLOC_EMPTY && ssp->loc->next->choice != SEQLOC_EMPTY) {
2259             if (both_translated) {
2260                 spp1 = SeqPortNewByLoc(ssp->loc, Seq_code_ncbi4na);
2261                 spp2 = SeqPortNewByLoc(ssp->loc->next, Seq_code_ncbi4na);
2262                 while ((codon[0]=SeqPortGetResidue(spp2)) != SEQPORT_EOF) {
2263                     codon[1] = SeqPortGetResidue(spp2);
2264                     codon[2] = SeqPortGetResidue(spp2);
2265                     residue1 = AAForCodon(codon, genetic_code1);
2266                     codon[0] = SeqPortGetResidue(spp1);
2267                     codon[1] = SeqPortGetResidue(spp1);
2268                     codon[2] = SeqPortGetResidue(spp1);
2269                     residue2 = AAForCodon(codon, genetic_code2);
2270                     if (residue1 == residue2)
2271                        ++(asp->identical);
2272                     else if (asp->matrix != NULL &&
2273                               asp->matrix[residue1][residue2] >0)
2274                         ++(asp->positive);
2275                 }
2276             } else {
2277                 if (master_is_translated) {
2278                     spp1 = SeqPortNewByLoc(ssp->loc, Seq_code_ncbi4na);
2279                     spp2 = SeqPortNewByLoc(ssp->loc->next, Seq_code_ncbieaa);
2280                 } else {
2281                     spp2 = SeqPortNewByLoc(ssp->loc, Seq_code_ncbieaa);
2282                     spp1 = SeqPortNewByLoc(ssp->loc->next, Seq_code_ncbi4na);
2283                 }
2284 
2285                 while ((residue1=SeqPortGetResidue(spp2)) != SEQPORT_EOF) {
2286                     codon[0] = SeqPortGetResidue(spp1);
2287                     codon[1] = SeqPortGetResidue(spp1);
2288                     codon[2] = SeqPortGetResidue(spp1);
2289                     residue2 = AAForCodon(codon, genetic_code1);
2290 
2291                     if (residue1 == residue2)
2292                        ++(asp->identical);
2293                     else if (asp->matrix != NULL &&
2294                              asp->matrix[residue1][residue2] >0)
2295                        ++(asp->positive);
2296                 }
2297             }
2298             SeqPortFree(spp1);
2299             SeqPortFree(spp2);
2300             /* Check if this is an ungapped alignment;
2301                in this case do not go to next link */
2302             if (!asp->ooframe && ssp->next &&
2303                 ssp->next->loc->choice != SEQLOC_EMPTY &&
2304                 ssp->next->loc->next->choice != SEQLOC_EMPTY)
2305                 ungapped_align = TRUE;
2306         } else {	/* Count only gaps in the top (master) strand. */
2307             if (ssp->loc->choice == SEQLOC_EMPTY)
2308                 {
2309                     if (!master_is_translated || both_translated)
2310                         asp->gaps += SeqLocLen(ssp->loc->next)/3;
2311                     else
2312                         asp->gaps += SeqLocLen(ssp->loc->next);
2313                 }
2314         }
2315 
2316         if(asp->ooframe) {
2317            if (master_is_translated) {
2318               if(ssp->loc->next->choice != SEQLOC_EMPTY)
2319                  asp->totlen += SeqLocLen(ssp->loc->next);
2320               else
2321                  asp->totlen += SeqLocLen(ssp->loc)/3;
2322            } else {
2323               if(ssp->loc->choice != SEQLOC_EMPTY)
2324                  asp->totlen += SeqLocLen(ssp->loc);
2325               else
2326                  asp->totlen += SeqLocLen(ssp->loc->next)/3;
2327            }
2328         } else {
2329 
2330             if (ssp->loc->choice != SEQLOC_EMPTY) {
2331                 if (master_is_translated || both_translated)
2332                     asp->totlen += SeqLocLen(ssp->loc)/3;
2333                 else
2334                     asp->totlen += SeqLocLen(ssp->loc);
2335             } else {
2336                 if (target_is_translated || both_translated)
2337                     asp->totlen += SeqLocLen(ssp->loc->next)/3;
2338                 else
2339                     asp->totlen += SeqLocLen(ssp->loc->next);
2340             }
2341         }
2342 
2343         ssp_last = ssp;
2344 
2345         if (both_translated || ungapped_align)
2346             /* for tblastx perform only one StdSegPtr. */
2347             break;
2348 
2349         ssp = ssp->next;
2350     }
2351 
2352     if (SeqLocStrand(ssp_last->loc) == Seq_strand_minus) {
2353         asp->master_to = SeqLocStart(ssp_last->loc);
2354     } else {
2355         asp->master_to = SeqLocStop(ssp_last->loc);
2356     }
2357 
2358     if (SeqLocStrand(ssp_last->loc->next) == Seq_strand_minus) {
2359         asp->target_to = SeqLocStart(ssp_last->loc->next);
2360     } else {
2361         asp->target_to = SeqLocStop(ssp_last->loc->next);
2362     }
2363 
2364     return TRUE;
2365 }
2366 
2367 
2368 /*****************************************************************
2369 *
2370 *	find_score_in_align(align, chain, asp)
2371 *	align: the Seq-align point
2372 *	chain: for multiple segment Seq-aligns, such as DenseDiag and
2373 *	StdSeg, the order within the Seq-align
2374 *	asp:   the structure that records and stores the positive,
2375 *			identical residues
2376 *
2377 *****************************************************************/
find_score_in_align(SeqAlignPtr align,Uint2 chain,AlignSumPtr asp)2378 NLM_EXTERN ScorePtr find_score_in_align(SeqAlignPtr align, Uint2 chain,
2379                                         AlignSumPtr asp)
2380 {
2381     DenseDiagPtr ddp;
2382     DenseSegPtr dsp;
2383     StdSegPtr ssp;
2384     Uint2 order = 0;
2385     SeqAlignPtr sap;
2386     ScorePtr    sp;
2387 
2388     if(align == NULL)
2389         return NULL;
2390 
2391     if(asp != NULL) {
2392         asp->totlen = 0;
2393         asp->positive = 0;
2394         asp->identical = 0;
2395         asp->gaps = 0;
2396     }
2397     switch (align->segtype) {
2398     case 1: /*Dense-diag*/
2399         ddp = (DenseDiagPtr) align->segs;
2400         while(ddp) {
2401             ++order;
2402             if(order == chain) {
2403                 if(asp != NULL)
2404                     load_align_sum_for_DenseDiag(ddp, asp);
2405                 return ddp->scores;
2406             }
2407             ddp = ddp->next;
2408         }
2409         break;
2410     case 2:
2411         dsp = (DenseSegPtr) align->segs;
2412         if(asp != NULL)
2413             load_align_sum_for_DenseSeg(dsp, asp);
2414         if (dsp->scores)
2415             return dsp->scores;
2416         else
2417             return align->score;
2418     case 3:
2419         ssp = (StdSegPtr) align->segs;
2420         while(ssp) {
2421             ++order;
2422             if(order == chain) {
2423                 if(asp != NULL)
2424                     load_align_sum_for_StdSeg(ssp, asp);
2425                 if (ssp->scores)
2426                    return ssp->scores;
2427                 else
2428                    return align->score;
2429             }
2430             ssp = ssp->next;
2431         }
2432         break;
2433     case 5: /* Discontinuous alignment */
2434         sap = (SeqAlignPtr) align->segs;
2435 
2436         if((sp = find_score_in_align(sap, chain, asp)) == NULL)
2437             return align->score;
2438         else
2439             return sp;
2440     default:
2441         break;
2442     }
2443     return NULL;
2444 }
2445 
2446 /*try to decide if this fit the prototype of reversing the BLASTX
2447   result to make a TBLASTN output
2448 */
reverse_blastx_order(ValNodePtr anp_list)2449 static Boolean reverse_blastx_order (ValNodePtr anp_list)
2450 {
2451 	Int2 num = 0;
2452 	ValNodePtr c_list;
2453 	AnnotInfoPtr annot_info;
2454 	Uint1 align_type = 0;
2455 
2456 	for(c_list = anp_list; c_list != NULL; c_list = c_list->next)
2457 	{
2458 		if(c_list->choice == OBJ_SEQANNOT)
2459 		{
2460 			annot_info = (AnnotInfoPtr) c_list->data.ptrvalue;
2461 			align_type = get_alignment_type(annot_info);
2462 			if(align_type != ALIGN_DNA_TO_PROT)
2463 				return FALSE;
2464 		}
2465 		else
2466 		{
2467 			++num;
2468 			if(num > 2)	/*only for pairwise alignment */
2469 				return FALSE;
2470 		}
2471 	}
2472 
2473 	/* return (align_type == ALIGN_DNA_TO_PROT && num == 2); */
2474 	return (align_type == ALIGN_DNA_TO_PROT);
2475 }
2476 
2477 /*
2478 * change the alignnode of blastx to a pseudo tblastn and switch the
2479 * master sequence so that the blastx display will be the same as the
2480 * traditional blastx
2481 *
2482 */
modify_gather_range(GatherRangePtr grp,Boolean expand)2483 static void modify_gather_range (GatherRangePtr grp, Boolean expand)
2484 {
2485 	Int4 len;
2486 
2487 	len = grp->right - grp->left + 1;
2488 	if(expand)
2489 	{
2490 		grp->left *= 3;
2491 		grp->right = grp->left + len * 3 -1;
2492 	}
2493 	else
2494 	{
2495 		grp->left /=3;
2496 		grp->right = grp->left + len/3 -1;
2497 	}
2498 }
2499 
change_blastx_master(ValNodePtr anp_list,AlignNodePtr PNTR master_anp)2500 static Boolean change_blastx_master(ValNodePtr anp_list, AlignNodePtr PNTR master_anp)
2501 {
2502 
2503 	ValNodePtr c_list;
2504 	AnnotInfoPtr annot_info = NULL;
2505 	AlignNodePtr anp, m_anp, t_anp;
2506 	AlignSegPtr asp;
2507 
2508 	anp = NULL;
2509 	m_anp = NULL;
2510 	annot_info = NULL;
2511 	for(c_list = anp_list; c_list != NULL; c_list = c_list->next)
2512 	{
2513 		if(c_list->choice == OBJ_SEQANNOT)
2514 		{
2515 			annot_info = (AnnotInfoPtr) c_list->data.ptrvalue;
2516 			if(annot_info != NULL &&
2517 				get_alignment_type(annot_info) != ALIGN_DNA_TO_PROT)
2518 				annot_info = NULL;
2519 		}
2520 		else
2521 		{
2522 			t_anp = (AlignNodePtr) c_list->data.ptrvalue;
2523 			if(t_anp->is_master || t_anp == *master_anp)
2524 				m_anp = t_anp;
2525 			else
2526 				anp = t_anp;
2527 		}
2528 	}
2529 
2530 	if(m_anp == NULL || anp == NULL || annot_info == NULL)
2531 		return FALSE;
2532 
2533 	/*shrink the interval */
2534 	for(c_list = anp_list; c_list != NULL; c_list = c_list->next)
2535 	{
2536 		if(c_list->choice != OBJ_SEQANNOT)
2537 		{
2538 			t_anp = (AlignNodePtr) c_list->data.ptrvalue;
2539 			modify_gather_range (&(t_anp->extremes), FALSE);
2540 			for(asp = t_anp->segs; asp != NULL; asp = asp->next)
2541 			{
2542 				if(asp->type != INS_SEG)
2543 					modify_gather_range (&(asp->gr), FALSE);
2544 			}
2545 		}
2546 	}
2547 
2548 	annot_info->blast_type = ALIGN_TBLASTN;
2549 	*master_anp = anp;
2550 	anp->is_master = TRUE;
2551 	m_anp->is_master = FALSE;
2552 	return TRUE;
2553 }
2554 
get_max_feature_label(AlignNodePtr anp)2555 static Int4 get_max_feature_label (AlignNodePtr anp)
2556 {
2557 	AlignSegPtr asp;
2558 	Int4 len = 0, f_len;
2559 	FeatNodePtr fnp;
2560 	ValNodePtr vnp;
2561 
2562 	for(asp = anp->segs; asp != NULL; asp = asp->next)
2563 	{
2564 		if(asp->type != GAP_SEG && asp->type != INS_SEG)
2565 		{
2566 			for(vnp = asp->cnp; vnp != NULL; vnp = vnp->next)
2567 			{
2568 				fnp = (FeatNodePtr) vnp->data.ptrvalue;
2569 				if(fnp !=NULL && fnp->label != NULL)
2570 				{
2571 					f_len = StringLen(fnp->label);
2572 					if(f_len > len)
2573 						len = f_len;
2574 				}
2575 			}
2576 		}
2577 	}
2578 
2579 	return len;
2580 }
2581 
2582 
2583 /*
2584 *
2585 *	look through the list of alignnode to figure out the maximum
2586 *	length required to print the coordinates of the sequence in
2587 *	alignment
2588 *
2589 */
get_max_coordinates_len(ValNodePtr anp_list,Int4Ptr max_label_size)2590 static Int4 get_max_coordinates_len (ValNodePtr anp_list, Int4Ptr max_label_size)
2591 {
2592 	AlignNodePtr anp;
2593 	AlignSegPtr asp;
2594 	Int4 max_num, seqpos;
2595 	Char buf[101];
2596 	Int4 flabel_len;
2597 
2598 	max_num = 0;
2599 	*max_label_size = 0;
2600 	while(anp_list)
2601 	{
2602 		if(anp_list->choice != OBJ_SEQANNOT)
2603 		{
2604 			anp = (AlignNodePtr) anp_list->data.ptrvalue;
2605 			if(anp->seqpos < 0)
2606 				max_num = MAX(ABS(anp->seqpos), max_num);
2607 			else
2608 			{
2609 				seqpos = anp->seqpos - 1;
2610 				for(asp = anp->segs; asp != NULL; asp = asp->next)
2611 				{
2612 					if(asp->type != GAP_SEG)
2613 					{
2614 						if(asp->type == INS_SEG)
2615 							seqpos += (asp->gr.right -1);
2616 						else
2617 							seqpos += (asp->gr.right - asp->gr.left + 1);
2618 					}
2619 				}
2620 				max_num = MAX(seqpos, max_num);
2621 			}
2622 			if(anp->label != NULL)
2623 				*max_label_size = MAX(*max_label_size, (Int4)StringLen(anp->label));
2624 			flabel_len = get_max_feature_label (anp);
2625 			if(flabel_len > (*max_label_size))
2626 				*max_label_size = flabel_len;
2627 		}
2628 		anp_list = anp_list->next;
2629 	}
2630 	buf[0] = '\0';
2631 	sprintf(buf, "%ld", (long) (max_num+1));
2632 	return StringLen(buf);
2633 }
2634 
2635 
2636 
2637 /*
2638 *	do the converse of change blastx_master
2639 *	revert it from tblastn to blastx
2640 */
revert_blastx_alignment(ValNodePtr anp_list,AlignNodePtr master_anp)2641 static Boolean revert_blastx_alignment (ValNodePtr anp_list, AlignNodePtr master_anp)
2642 {
2643 	ValNodePtr c_list;
2644 	AnnotInfoPtr annot_info;
2645 	AlignNodePtr t_anp;
2646 	AlignSegPtr asp;
2647 
2648 	for(c_list = anp_list; c_list != NULL; c_list = c_list->next)
2649 	{
2650 		if(c_list->choice == OBJ_SEQANNOT)
2651 		{
2652 			annot_info = (AnnotInfoPtr) c_list->data.ptrvalue;
2653 			if(annot_info != NULL &&
2654 				get_alignment_type(annot_info) == ALIGN_PROT_TO_DNA)
2655 				annot_info->blast_type = ALIGN_BLASTX;
2656 		}
2657 		else
2658 		{
2659 			t_anp = (AlignNodePtr) c_list->data.ptrvalue;
2660 			if(t_anp == master_anp)
2661 				t_anp->is_master = TRUE;
2662 			else
2663 				t_anp->is_master = FALSE;
2664 			/*expand to the original interval */
2665 			modify_gather_range (&(t_anp->extremes), TRUE);
2666 			for(asp = t_anp->segs; asp != NULL; asp = asp->next)
2667 			{
2668 				if(asp->type != INS_SEG)
2669 					modify_gather_range (&(asp->gr), TRUE);
2670 			}
2671 		}
2672 	}
2673 
2674 	return TRUE;
2675 }
2676 
reverse_print(FILE * fp,CharPtr doc)2677 static void reverse_print(FILE *fp, CharPtr doc)
2678 {
2679 	Int4 i;
2680 	CharPtr str;
2681 
2682 	i = 0;
2683 	for(str = doc; *str != '\n' && *str != '\0'; ++str)
2684 		++i;
2685 
2686 	if(*str == '\n')
2687 	{
2688 		++i;
2689 		fprintf(fp, "%s", doc+i);
2690 		*str = '\0';
2691 		fprintf(fp, "%s\n", doc);
2692 		*str = '\n';
2693 	}
2694 	else
2695 		fprintf(fp, "%s", doc);
2696 }
2697 
get_anp_list_for_aligntype(ValNodePtr anp_list,Uint1 align_type,Int4 left,Int4 right)2698 static ValNodePtr get_anp_list_for_aligntype(ValNodePtr anp_list, Uint1 align_type,
2699 	Int4 left, Int4 right)
2700 {
2701 	Uint1 c_type = 0;
2702 	ValNodePtr list, prev;
2703 	AlignNodePtr anp;
2704 	AnnotInfoPtr annot_info;
2705 	ValNodePtr  curr;
2706 	Boolean first;
2707 	Boolean extract;
2708 
2709 	if(anp_list == NULL)
2710 		return NULL;
2711 
2712 
2713 	list = NULL;
2714 	extract = (align_type == 0);
2715 	prev = NULL;
2716 	first = TRUE;
2717 	annot_info = NULL;
2718 	while(anp_list != NULL)
2719 	{
2720 		if(anp_list->choice == OBJ_SEQANNOT)
2721 		{
2722 			annot_info = (AnnotInfoPtr) anp_list->data.ptrvalue;
2723 			c_type = get_alignment_type(annot_info);
2724 			extract = (c_type == align_type);
2725 			first = TRUE;
2726 		}
2727 		else if(extract)
2728 		{
2729 			anp = (AlignNodePtr) anp_list->data.ptrvalue;
2730 			if(!anp->is_master)
2731 			{
2732 				/*check the first alignnode to see if it is legal*/
2733 				if(first)
2734 				{
2735 					if(!PrintAlignForText(annot_info, anp))
2736 						extract = FALSE;
2737 					first = FALSE;
2738 				}
2739 				if(extract &&
2740 				!(anp->extremes.left > right || anp->extremes.right < left))
2741 				{
2742 					curr = ValNodeNew(NULL);
2743 					curr->data.ptrvalue = anp_list;
2744 					if(prev)
2745 						prev->next = curr;
2746 					else
2747 						list = curr;
2748 					prev = curr;
2749 				}
2750 			}
2751 		}
2752 		anp_list = anp_list->next;
2753 	}
2754 
2755 	return list;
2756 }
2757 
modify_separation_bar(CharPtr buf,Int4 size,Int1 frame)2758 static Boolean modify_separation_bar(CharPtr buf, Int4 size, Int1 frame)
2759 {
2760 	Char temp[21];
2761 	Int4 len, start;
2762 	Int4 i;
2763 
2764 	sprintf(temp, "BLASTX: frame = %d", frame);
2765 	len = StringLen(temp);
2766 	start = (size - len)/2;
2767 	if(start < 0)
2768 		return FALSE;
2769 	else
2770 	{
2771 		for(i = 0; i<len; ++i)
2772 		{
2773 			buf[start+i] = temp[i];
2774 		}
2775 		return TRUE;
2776 	}
2777 }
2778 
2779 
2780 
2781 /*
2782 *
2783 *	for hardline old blast users who prefer to see the label as Sbjct/Query
2784 *
2785 */
convert_label_to_query_subject(ValNodePtr anp_list)2786 static void convert_label_to_query_subject (ValNodePtr anp_list)
2787 {
2788 	AlignNodePtr anp;
2789 	Boolean first = TRUE;
2790 
2791 	while(anp_list)
2792 	{
2793 		if(anp_list->choice != OBJ_SEQANNOT)
2794 		{
2795 			anp = (AlignNodePtr) anp_list->data.ptrvalue;
2796 			if(anp->is_master)
2797 			{
2798 				MemFree(anp->label);
2799 				anp->label = StringSave("Query:");
2800 				first = FALSE;
2801 			}
2802 			else
2803 			{
2804 				MemFree(anp->label);
2805 				if(first)
2806 					anp->label = StringSave("Query:");
2807 				else
2808 					anp->label = StringSave("Sbjct:");
2809 				first = FALSE;
2810 			}
2811 		}
2812 		anp_list = anp_list->next;
2813 	}
2814 }
2815 
2816 /*
2817 *	request from Detlef: convert the sequence label to gi
2818 *
2819 */
convert_label_to_gi(ValNodePtr anp_list)2820 static void convert_label_to_gi(ValNodePtr anp_list)
2821 {
2822 	AlignNodePtr anp;
2823 	Char temp[101];
2824 
2825 	while(anp_list)
2826 	{
2827 		if(anp_list->choice != OBJ_SEQANNOT)
2828 		{
2829 			anp = (AlignNodePtr) anp_list->data.ptrvalue;
2830 			if(anp && anp->sip && anp->sip->choice == SEQID_GI &&
2831 				!anp->keep_label)
2832 			{
2833 				sprintf(temp, "%ld", (long) anp->sip->data.intvalue);
2834 				if(anp->label != NULL)
2835 					MemFree(anp->label);
2836 				anp->label = StringSave(temp);
2837 			}
2838 		}
2839 		anp_list = anp_list->next;
2840 	}
2841 }
2842 
2843 /*
2844 *
2845 *	for the display of tblastx, only one Seq-annot is allowed at
2846 *	any given time
2847 *
2848 */
illegal_tblastx_anp(ValNodePtr anp_list,BoolPtr has_tblastx)2849 static Boolean illegal_tblastx_anp (ValNodePtr anp_list, BoolPtr has_tblastx)
2850 {
2851 	AnnotInfoPtr info;
2852 	Int2 info_num = 0;
2853 
2854 	*has_tblastx = FALSE;
2855 
2856 	while(anp_list)
2857 	{
2858 		if(anp_list->choice == OBJ_SEQANNOT)
2859 		{
2860 			info = (AnnotInfoPtr) anp_list->data.ptrvalue;
2861 			if(info->blast_type == ALIGN_TBLASTX)
2862 				*has_tblastx = TRUE;
2863 			++info_num;
2864 		}
2865 		anp_list = anp_list->next;
2866 	}
2867 
2868 	if(*has_tblastx && info_num > 1)
2869 		return TRUE;
2870 	else
2871 		return FALSE;
2872 }
2873 
expand_position(Int4 pos,Int4 exp_val,Boolean inverse)2874 static Int4 expand_position(Int4 pos, Int4 exp_val, Boolean inverse)
2875 {
2876 	return inverse ? (pos/exp_val) : pos*exp_val;
2877 }
2878 
2879 
modify_tblastx_value(ValNodePtr anp_list,Int4 val,Boolean inverse)2880 static void modify_tblastx_value (ValNodePtr anp_list, Int4 val, Boolean inverse)
2881 {
2882 	AlignNodePtr anp;
2883 	AlignSegPtr asp;
2884 	Int4 left;
2885 
2886 	while(anp_list)
2887 	{
2888 		if(anp_list->choice != OBJ_SEQANNOT)
2889 		{
2890 			anp = (AlignNodePtr) anp_list->data.ptrvalue;
2891 			left = anp->extremes.left;
2892 			/* anp->extremes.left = expand_position(anp->extremes.left, val, inverse); */
2893 			anp->extremes.right = left + expand_position((anp->extremes.right -left), val, inverse);
2894 			for(asp = anp->segs; asp != NULL; asp = asp->next)
2895 			{
2896 				if(asp->type == INS_SEG)
2897 				{
2898 					asp->ins_pos = left + expand_position((asp->ins_pos -left), val, inverse);
2899 					asp->gr.right = expand_position(asp->gr.right, val, inverse);
2900 				}
2901 				else
2902 				{
2903 					asp->gr.left = left + expand_position(asp->gr.left - left , val, inverse);
2904 					asp->gr.right = left + expand_position(asp->gr.right - left, val, inverse);
2905 				}
2906 			}
2907 		}
2908 
2909 		anp_list = anp_list->next;
2910 	}
2911 }
2912 
2913 static
2914 SeqIdPtr
get_seq_id(SeqAlignPtr sap,Int2 index)2915 get_seq_id(SeqAlignPtr sap, Int2 index)
2916 {
2917 	SeqIdPtr sip = NULL;
2918 	if (sap->segtype == 1) {
2919 		DenseDiagPtr ddp = (DenseDiagPtr)sap->segs;
2920 		sip = ddp->id;
2921 	}
2922 	else if (sap->segtype == 2) {
2923 		DenseSegPtr dsp = (DenseSegPtr)sap->segs;
2924 		sip = dsp->ids;
2925 	}
2926 	for (; sip != NULL && --index >= 0; sip = sip->next);
2927 	return sip;
2928 }
2929 
2930 /***********************************************************************
2931 *
2932 *	ShowAlignNodeText(anp_list, num_node, line_len, locus,
2933 *	fp)
2934 *	convert the alignment data in the list of AlignNode into text written
2935 *	to a file
2936 *	anp_list: a list (ValNodePtr) of AlignNode processed from Seq-aligns
2937 *	num_node: the number of AlignNode to be processed currently. It can
2938 *	be used in the cases where only the top num_node in the anp_list is
2939 *	going to be processed. This can be useful to make vertically cashed
2940 *	buffer
2941 *	line_len: the length of sequence char per line
2942 *	locus: if TRUE, show the locus name
2943 *	fp: the file Pointer
2944 *	left: the leftmost position for display
2945 *	right: the rightmost position for display
2946 *	align_type:	the type of alignment. DNA-protein alignment?
2947 *
2948 *	return TRUE for success, FALSE for fail
2949 *
2950 ************************************************************************/
ShowAlignNodeText(ValNodePtr anp_list,Int2 num_node,Int4 line_len,FILE * fp,Int4 left,Int4 right,Uint4 option,Int4Ptr PNTR matrix,int (LIBCALLBACK * fmt_score_func)PROTO ((AlignStatOptionPtr)))2951 NLM_EXTERN Boolean ShowAlignNodeText(ValNodePtr anp_list, Int2 num_node, Int4 line_len, FILE *fp, Int4 left, Int4 right, Uint4 option, Int4Ptr PNTR matrix, int (LIBCALLBACK *fmt_score_func)PROTO((AlignStatOptionPtr)))
2952 {
2953     return ShowAlignNodeText2Ex(anp_list, num_node, line_len, fp, left, right,
2954                               option, matrix, fmt_score_func, NULL, NULL, NULL, NULL);
2955 }
2956 
ShowAlignNodeText2(ValNodePtr anp_list,Int2 num_node,Int4 line_len,FILE * fp,Int4 left,Int4 right,Uint4 option,Int4Ptr PNTR matrix,int (LIBCALLBACK * fmt_score_func)PROTO ((AlignStatOptionPtr)),CharPtr db_name,CharPtr blast_type,Int4Ptr PNTR posMatrix)2957 NLM_EXTERN Boolean ShowAlignNodeText2(ValNodePtr anp_list, Int2 num_node, Int4 line_len, FILE *fp, Int4 left, Int4 right, Uint4 option, Int4Ptr PNTR matrix, int (LIBCALLBACK *fmt_score_func)PROTO((AlignStatOptionPtr)), CharPtr db_name, CharPtr blast_type, Int4Ptr PNTR posMatrix)
2958 {
2959 	return ShowAlignNodeText2Ex(anp_list, num_node, line_len, fp, left, right, option, matrix, fmt_score_func, db_name, blast_type, posMatrix, NULL);
2960 }
2961 
2962 /**
2963  * transforms a string so that it becomes safe to be used as part of URL
2964  * the function converts characters with special meaning (such as
2965  * semicolon -- protocol separator) to escaped hexadecimal (%xx)
2966  */
2967 static
2968 CharPtr
MakeURLSafe(CharPtr src)2969 MakeURLSafe(CharPtr src)
2970 {
2971 	static Char HEXDIGS[] = "0123456789ABCDEF";
2972 	CharPtr buf;
2973 	size_t len;
2974 	CharPtr p;
2975 	Char c;
2976 
2977 	if (src == NULL) {
2978 		return NULL;
2979 	}
2980 	/* first pass to calculate required buffer size */
2981 	for (p = src, len = 0; (c = *(p++)) != '\0'; ) {
2982 		switch (c) {
2983 		default:
2984 			if (c < '0' || (c > '9' && c < 'A') ||
2985 					(c > 'Z' && c < 'a') || c > 'z') {
2986 				len += 3;
2987 				break;
2988 			}
2989 		case '-': case '_': case '.': case '!': case '~':
2990 		case '*': case '\'': case '(': case ')':
2991 			++len;
2992 		}
2993 	}
2994 	buf = (CharPtr)MemNew(len + 1);
2995 	/* second pass -- conversion */
2996 	for (p = buf; (c = *(src++)) != '\0'; ) {
2997 		switch (c) {
2998 		default:
2999 			if (c < '0' || (c > '9' && c < 'A') ||
3000 					(c > 'Z' && c < 'a') || c > 'z') {
3001 				*(p++) = '%';
3002 				*(p++) = HEXDIGS[(c >> 4) & 0xf];
3003 				*(p++) = HEXDIGS[c & 0xf];
3004 				break;
3005 			}
3006 		case '-': case '_': case '.': case '!': case '~':
3007 		case '*': case '\'': case '(': case ')':
3008 			*(p++) = c;
3009 		}
3010 	}
3011 	*p = '\0';
3012 	return buf;
3013 }
3014 
ShowAlignNodeText2Ex(ValNodePtr anp_list,Int2 num_node,Int4 line_len,FILE * fp,Int4 left,Int4 right,Uint4 option,Int4Ptr PNTR matrix,int (LIBCALLBACK * fmt_score_func)PROTO ((AlignStatOptionPtr)),CharPtr db_name,CharPtr blast_type,Int4Ptr PNTR posMatrix,SeqAlignPtr PNTR last_align)3015 static Boolean ShowAlignNodeText2Ex(ValNodePtr anp_list, Int2 num_node, Int4 line_len, FILE *fp, Int4 left, Int4 right, Uint4 option, Int4Ptr PNTR matrix, int (LIBCALLBACK *fmt_score_func)PROTO((AlignStatOptionPtr)), CharPtr db_name, CharPtr blast_type, Int4Ptr PNTR posMatrix, SeqAlignPtr PNTR last_align)
3016 {
3017     CharPtr bar, sep_bar;
3018     CharPtr num_str;
3019     Int4 i, j;
3020     Int4 num;
3021 
3022     Int4 c_start, c_stop;
3023     CharPtr m_buf, cm_buf;		/*text for the master sequence*/
3024     BioseqPtr m_bsp;
3025     Int4 m_len;			/*length of the master sequence*/
3026 
3027     ValNodePtr list;	/*list of DrawText*/
3028     AlignNodePtr anp, master_anp;
3029     Int4Ptr p_stop;
3030     Boolean is_end, strip_semicolon=TRUE;
3031     CharPtr docbuf, master_docbuf;
3032     Uint1 all_frame[6];
3033     SeqFeatPtr fake_cds;
3034     Int1 frame;
3035     Uint1 align_type;
3036     ValNodePtr curr;
3037     ValNodePtr c_list, PNTR pc_list;
3038     ValNodePtr a_list;
3039     Boolean is_html;
3040     Int4Ptr PNTR t_matrix;
3041     Boolean compress;
3042     Int4 last_pos;
3043     Boolean load_last_pos;
3044     ValNodePtr id_list;
3045     BioseqPtr bsp;
3046     SeqAlignPtr align;
3047     ScorePtr sp;
3048     AlignStatOption aso;
3049     AlignSum as;
3050     AlignSumPtr asp;
3051     Boolean reverse_display;
3052     Boolean has_data;
3053     Boolean show_score;
3054     Int4 max_label_size;
3055     Int4 max_num_size;
3056     Int4 empty_space;
3057     Boolean show_strand;
3058     Boolean has_tblastx;
3059     SeqIdPtr already_linked=NULL;
3060 
3061 
3062     if(anp_list == NULL)
3063         return FALSE;
3064 
3065     /*for tblastx, only one Seq-annot at time*/
3066     if(illegal_tblastx_anp (anp_list, &has_tblastx))
3067         return FALSE;
3068 
3069     /*for alignment that is not a same-molecule, needs to have a master*/
3070     master_anp = get_master_align_node(anp_list);
3071     if(master_anp == NULL) {
3072         Message(MSG_ERROR, "Fail to the master AlignNode");
3073         return FALSE;
3074     }
3075     is_html = (Boolean)(option & TXALIGN_HTML);
3076     load_last_pos = (Boolean)(option & TXALIGN_END_NUM);
3077 
3078 
3079     /* for hard line old blast user only!!!!! */
3080     if(option & TXALIGN_SHOW_QS)
3081         convert_label_to_query_subject (anp_list);
3082     else if(option & TXALIGN_SHOW_GI)
3083         convert_label_to_gi(anp_list);
3084 
3085     compress = (Boolean)(option & TXALIGN_COMPRESS);
3086     if(compress)
3087         max_num_size = get_max_coordinates_len (anp_list, &max_label_size);
3088     else {
3089         max_num_size = POS_SPACE;
3090         max_label_size = B_SPACE;
3091     }
3092 
3093     /* for display of the traditional regular blastX output */
3094     reverse_display = FALSE;
3095     if(option & TXALIGN_BLASTX_SPECIAL) {
3096         reverse_display = reverse_blastx_order (anp_list);
3097         if(reverse_display)
3098             change_blastx_master(anp_list, &master_anp);
3099     }
3100 
3101     if(has_tblastx)
3102         modify_tblastx_value (anp_list, 3, TRUE);
3103     if(left == -1)
3104         left = master_anp->extremes.left;
3105     if(right == -1)
3106         right = master_anp->extremes.right;
3107     if(left > master_anp->extremes.right || right < master_anp->extremes.left) {
3108         if(reverse_display)
3109             revert_blastx_alignment (anp_list, master_anp);
3110         return FALSE;
3111     }
3112 
3113     /*check for the molecule type of not-normal DNA-protein alignment*/
3114     fake_cds = NULL;
3115     frame = 0;
3116     m_bsp = BioseqLockById(master_anp->sip);
3117     if(m_bsp == NULL) {
3118         if(reverse_display)
3119             revert_blastx_alignment (anp_list, master_anp);
3120         return FALSE;
3121     }
3122 
3123     if(m_bsp->mol!= Seq_mol_aa) { /*a nucleotide sequence*/
3124         m_len = m_bsp->length;
3125         fake_cds = make_fake_cds(m_bsp, 0, m_bsp->length-1, Seq_strand_plus);
3126         load_master_translate_frame(anp_list, m_len, m_bsp);
3127     }
3128 
3129 
3130     ObjMgrSetHold();
3131     left = MAX(left, master_anp->extremes.left);
3132     right = MIN(right, master_anp->extremes.right);
3133 
3134 
3135     for(curr=anp_list, i=0; curr!=NULL; curr= curr->next) {	/*initiate the position*/
3136 
3137         if(curr->choice != OBJ_SEQANNOT) {
3138             anp = (AlignNodePtr) curr->data.ptrvalue;
3139             anp->align_num = i;
3140             ++i;
3141         }
3142     }
3143     p_stop = (Int4Ptr) MemNew((size_t)(i) * sizeof(Int4));
3144     for(j=0; j<i; ++j)
3145         p_stop[j] = -1;
3146     if(compress) {
3147         empty_space = max_num_size + 1 + max_label_size + 1;
3148         if(option & TXALIGN_SHOW_STRAND) {
3149             empty_space += STRAND_SPACE;
3150             show_strand = TRUE;
3151         } else
3152             show_strand = FALSE;
3153     } else {
3154         empty_space = get_num_empty_space(compress);
3155         show_strand = TRUE;
3156     }
3157 
3158     make_scale_bar_str(&bar, &num_str, compress? empty_space : empty_space+1, line_len);
3159     num = line_len + empty_space;
3160     sep_bar = (CharPtr) MemGet((size_t)(num+1)*sizeof(Char), MGET_ERRPOST);
3161     MemSet((Pointer)sep_bar, '-',(size_t)num* sizeof(Char));
3162     sep_bar[num] = '\0';
3163 
3164     if(is_html)
3165         fprintf(fp, "<PRE>\n");
3166     c_start = left;
3167 
3168     /* If a tool_url is set, then we use this rather than Entrez. */
3169     if (blast_type && !(option & TXALIGN_NO_ENTREZ))
3170     {
3171     	Char tool_url[128];
3172 
3173 	*tool_url = NULLB;
3174         GetAppParam("NCBI", blast_type, "TOOL_URL", "", tool_url, sizeof(tool_url));
3175 	/* Do use Entrez if available if the tool is dumpgnl.cgi as it does not handle gi's. */
3176 	if (*tool_url != NULLB && StringStr(tool_url, "dumpgnl.cgi") == NULL)
3177 		option |= TXALIGN_NO_ENTREZ;
3178     }
3179 
3180     MemSet(&aso, '\0', sizeof(AlignStatOption));
3181 
3182     /*format the summary for the score */
3183     if(fmt_score_func != NULL) {
3184 	aso.txalign_options = option;
3185         aso.indent_len = (Int2)empty_space;
3186         aso.line_len = (Int2)(line_len + empty_space);
3187         aso.html_hot_link_relative = FALSE;
3188         if (option & TXALIGN_NO_ENTREZ)
3189             aso.no_entrez = TRUE;
3190         else
3191             aso.no_entrez = FALSE;
3192 
3193         if (option & TXALIGN_NO_DUMPGNL)
3194             aso.no_dumpgnl = TRUE;
3195         else
3196             aso.no_dumpgnl = FALSE;
3197 
3198         if (option & TXALIGN_HTML) {
3199             aso.html_hot_link = TRUE;
3200             if (option & TXALIGN_HTML_RELATIVE)
3201                 aso.html_hot_link_relative = TRUE;
3202         } else {
3203             aso.html_hot_link = FALSE;
3204         }
3205         if (option & TXALIGN_SHOW_GI)
3206             aso.show_gi = TRUE;
3207         else
3208             aso.show_gi = FALSE;
3209         aso.fp = fp;
3210         aso.buf = NULL;
3211         id_list = NULL;
3212         aso.segs = NULL;
3213         if (blast_type)
3214 	{
3215             aso.blast_type = StringSave(blast_type);
3216             StringUpper(aso.blast_type);
3217         }
3218 	else
3219 	{
3220 		aso.blast_type = NULL;
3221 	}
3222         for(curr = anp_list; curr != NULL; curr = curr->next) {
3223             if(curr->choice != OBJ_SEQANNOT) {
3224                 anp = (AlignNodePtr) curr->data.ptrvalue;
3225                 show_score = FALSE;
3226                 if((reverse_display && anp == master_anp) || (!reverse_display && anp != master_anp)) {
3227                     if(!check_bsp_id(&id_list, anp->sip))
3228                         show_score = TRUE;
3229                 }
3230                 if(show_score) {
3231                     /*the first time it sees the Bioseq*/
3232                     bsp = BioseqLockById(anp->sip);
3233                     align = NULL;
3234 		    /* Use gather for translated searches and ungapped blast. */
3235 		    if(!has_tblastx && last_align && *last_align && (*last_align)->segtype == 2 && (*last_align)->next)
3236 		    {
3237 			align = (*last_align)->next;
3238 		    }
3239 		    else
3240 		    {
3241                     	align = NULL;
3242                     	GatherItem(anp->entityID, anp->itemID, (Uint2)(curr->choice), (Pointer)(&align), find_align_proc);
3243 		    }
3244 		    if (last_align)
3245 		    	*last_align = align;
3246                     if(align != NULL) {
3247                         if(align->segtype == 1 || align->segtype == 2 || align->segtype == 3 || align->segtype == 5) {
3248                             as.matrix = matrix;
3249                             as.posMatrix = posMatrix;
3250                             as.master_sip = master_anp->sip;
3251                             as.target_sip = anp->sip;
3252                             as.is_aa = (m_bsp->mol == Seq_mol_aa);
3253                             as.ooframe = FALSE; /* Not supported */
3254                             as.m_frame_set = FALSE;
3255                             as.t_frame_set = FALSE;
3256                             asp = &as;
3257                         } else
3258                             asp = NULL;
3259                         sp = find_score_in_align(align, anp->chain, asp);
3260                         if(sp != NULL) {
3261                             aso.follower = anp->follower;
3262                             aso.bsp = bsp;
3263                             aso.sp = sp;
3264                             aso.db_name = db_name;
3265                             if(asp != NULL) {
3266                                 aso.gaps = asp->gaps;
3267                                 aso.positive = asp->positive;
3268                                 aso.identical = asp->identical;
3269                                 aso.align_len = asp->totlen;
3270 
3271                                 /* This information was added for links to
3272                                    specific alignment only, but may be used
3273                                    for something else */
3274 
3275                                 aso.master_from = asp->master_from;
3276                                 aso.master_to = asp->master_to;
3277                                 aso.target_from = asp->target_from;
3278                                 aso.target_to = asp->target_to;
3279 
3280 
3281                                 if (asp->m_frame_set) {
3282                                         aso.m_frame = asp->m_frame;
3283                                 } else {
3284                                     aso.m_frame = 255;
3285                                 }
3286 
3287                                 if (asp->t_frame_set) {
3288                                     aso.t_frame = asp->t_frame;
3289                                 } else {
3290                                     aso.t_frame = 255;
3291                                 }
3292 
3293                                 aso.m_strand = asp->m_strand;
3294                                 aso.t_strand = asp->t_strand;
3295                             } else {
3296                                 aso.align_len = 0;
3297                             }
3298 
3299                             aso.segs = NULL;
3300 			    if (aso.follower == FALSE)
3301                             {
3302                                 SeqAlignPtr sap;
3303                                 size_t size = 0;
3304                                 size_t used = 0;
3305 
3306                                 for (sap = align; sap != NULL; sap = sap->next) {
3307                                     if (SeqIdMatch(TxGetSubjectIdFromSeqAlign(align), TxGetSubjectIdFromSeqAlign(sap))) {
3308                                         if (aso.segs != NULL) {
3309                                             StringAppend(&aso.segs, &size, ",", &used);
3310                                         }
3311                                         SeqAlignSegsStr(sap, 1, &aso.segs, &size, &used);
3312                                     } else
3313                                         break;
3314                                 }
3315                                 if (aso.segs == NULL) {
3316                                     /**
3317                                      * Something is really wrong if we're here
3318                                      */
3319                                     aso.segs = StringSave("");
3320                                 }
3321                             }
3322                             fmt_score_func(&aso);
3323     /* Print seqids of other sequences in a cluster if clustering of hits
3324        has been done  */
3325     if (!aso.follower) {
3326        BioseqPtr bsp;
3327        SeqIdPtr sip, sip_head;
3328        Char buffer[BUFFER_LENGTH+1]/*, line[BUFFER_LENGTH+1]*/;
3329        Int4 buf_len, gi, index;
3330        CharPtr title;
3331        Char HTML_dopt[8], HTML_database[11];
3332        Char HTML_buffer[BUFFER_LENGTH+1];
3333 
3334        /* Cluster sequences ids are saved in align->master (kludge) */
3335        sip_head = align->master;
3336        if (sip_head) {
3337           for (sip=sip_head, index=0; sip; sip = sip->next, index++);
3338           fprintf(fp, " Other sequences in the cluster (%d total)\n", index);
3339           for (sip = sip_head; sip; sip = sip->next) {
3340              /* SeqIds of other sequences in a cluster are printed here */
3341              bsp = BioseqLockById(sip);
3342              SeqIdWrite(bsp->id, buffer, PRINTID_FASTA_LONG, BUFFER_LENGTH);
3343              buf_len = StrLen(buffer);
3344              title = (CharPtr) Malloc((num-buf_len+1)*sizeof(Char));
3345              sprintf(title, "%.*s", num-buf_len, BioseqGetTitle(bsp));
3346 
3347              if (ISA_na(bsp->mol)) {
3348                 StringCpy(HTML_dopt, "GenBank");
3349                 StringCpy(HTML_database, "Nucleotide");
3350              } else {
3351                 StringCpy(HTML_dopt, "GenPept");
3352                 StringCpy(HTML_database, "Protein");
3353              }
3354 
3355              if (bsp->id->choice == SEQID_GI) {
3356                 gi = bsp->id->data.intvalue;
3357                 sprintf(HTML_buffer,
3358              "<a href=\"%s?cmd=Retrieve&db=%s&list_uids=%08ld&dopt=%s\" %s>",
3359              NEW_ENTREZ_HREF, HTML_database, (long) gi, HTML_dopt,
3360 	     option & TXALIGN_TARGET_IN_LINKS ? "TARGET=\"EntrezView\"" : "");
3361                 fprintf(fp, "    + %s%s </a> %s\n", HTML_buffer, buffer, title);
3362              } else {
3363                 fprintf(fp, "    + %s </a> %s\n", buffer, title);
3364              }
3365              BioseqUnlock(bsp);
3366           }
3367           fprintf(fp, "\n\n");
3368        }
3369     if (option & TXALIGN_BL2SEQ_LINK) {
3370        CharPtr id1, id2;
3371        Char buffer[BUFFER_LENGTH+1];
3372        BioseqPtr bsp;
3373 
3374        bsp = BioseqLockById(asp->master_sip);
3375 
3376        SeqIdWrite(SeqIdFindBest(bsp->id, SEQID_GI), buffer, PRINTID_FASTA_SHORT, BUFFER_LENGTH);
3377        id1 = MakeURLSafe(buffer);
3378        BioseqUnlock(bsp);
3379 
3380        SeqIdWrite(SeqIdFindBest(asp->target_sip, SEQID_GI), buffer, PRINTID_FASTA_SHORT, BUFFER_LENGTH);
3381        id2 = MakeURLSafe(buffer);
3382 
3383        fprintf(fp, "<A HREF=%s?PROGRAM=tblastx&WORD=3&RID=%s&ONE=%s&TWO=%s> Get TBLASTX alignments </A>\n",
3384                WBLAST2_HREF, RID_glb, id1, id2);
3385     }
3386 
3387     }
3388 
3389                             aso.segs = (CharPtr) MemFree(aso.segs);
3390                         }
3391                     }
3392                     if(bsp != NULL)
3393                         BioseqUnlock(bsp);
3394                 }
3395             }
3396         }
3397         aso.blast_type = (CharPtr) MemFree(aso.blast_type);
3398         ValNodeFree(id_list);
3399     }
3400 
3401     pc_list = (ValNodePtr *) MemNew((size_t)(ALIGN_MAX_TYPE +1) * sizeof(ValNodePtr));
3402     has_data = FALSE;
3403 
3404     for(i = 0; i<=ALIGN_MAX_TYPE; ++i) {
3405         pc_list[i]= get_anp_list_for_aligntype(anp_list, (Uint1)i, left, right);
3406         if(pc_list[i] != NULL)
3407             has_data = TRUE;
3408     }
3409 
3410     if(option & TXALIGN_SHOW_QS) {
3411         is_html = FALSE;
3412         strip_semicolon = FALSE;
3413     }
3414     master_docbuf = NULL;
3415     if(has_data) {
3416 	while(c_start <= right)	{ /*process line by line*/
3417 
3418             c_stop = MIN(right, (c_start+line_len -1));
3419             m_buf = NULL;
3420             is_end = FALSE;
3421             docbuf = NULL;
3422 
3423             /*process the master sequence*/
3424             if(has_tblastx)
3425                 list = ProcessTextAlignNode(master_anp, c_start, c_stop, &(p_stop[master_anp->align_num]), NULL, line_len, -1, option, matrix);
3426             else
3427                 list = ProcessTextAlignNode(master_anp, c_start, c_stop, &(p_stop[master_anp->align_num]), NULL, line_len, 0, option, NULL);
3428             if(list != NULL) {
3429                 if(option & TXALIGN_SHOW_RULER) {
3430                     fprintf(fp, "%s\n", num_str);	/*show scale*/
3431                     fprintf(fp, "%s\n", bar);
3432                 }
3433                 last_pos = load_last_pos ? p_stop[master_anp->align_num] : -1;
3434                 docbuf = DrawTextToBuffer(list, &m_buf, is_html, max_label_size, max_num_size, compress, matrix, last_pos, line_len, show_strand, strip_semicolon, &already_linked, option);
3435                 if(docbuf !=NULL) {
3436                     if(reverse_display)
3437                         master_docbuf = docbuf;
3438                     else {
3439                         fprintf(fp, "%s", docbuf);
3440                         MemFree(docbuf);
3441                     }
3442                 }
3443                 list = FreeTextAlignList(list);
3444             }
3445 
3446             for(align_type = 0; align_type <= ALIGN_MAX_TYPE; ++align_type) {
3447                 c_list = pc_list[align_type];
3448                 if(c_list != NULL) {
3449                     if(align_type == ALIGN_DNA_TO_PROT) {
3450                         /*process the hit protein sequence*/
3451                         if(get_current_master_frame(c_list, c_start, c_stop, all_frame)) {
3452                             list = NULL;
3453                             for(j = 0; j<6; ++j) {
3454                                 frame = all_frame[j];
3455                                 if(frame > 0) {
3456                                     /*translate the master sequence in the specified frame*/
3457                                     cm_buf = translate_faked_cds(fake_cds, frame, c_start, c_stop, m_len, master_anp);
3458                                     list = load_fake_protein_buf(cm_buf, frame, master_anp);
3459                                     docbuf = DrawTextToBuffer(list, NULL, is_html, max_label_size, max_num_size, compress, matrix, -1, line_len, show_strand, strip_semicolon, &already_linked, option);
3460                                     if(docbuf != NULL) {
3461                                         modify_separation_bar(sep_bar, num, frame);
3462                                         fprintf(fp, "%s\n", sep_bar);
3463                                         fprintf(fp, "%s", docbuf);
3464                                         MemFree(docbuf);
3465                                     }
3466                                     FreeTextAlignList(list);
3467 
3468                                     for(curr = c_list; curr != NULL; curr = curr->next) {
3469                                         a_list = (ValNodePtr) curr->data.ptrvalue;
3470                                         anp = (AlignNodePtr) a_list->data.ptrvalue;
3471                                         if(anp != master_anp) {
3472                                             list = ProcessTextAlignNode(anp, c_start, c_stop, &(p_stop[anp->align_num]), cm_buf, line_len, frame, option, matrix);
3473                                             if(list != NULL) {
3474                                                 docbuf = DrawTextToBuffer(list, NULL, is_html, max_label_size, max_num_size, compress, matrix, -1, line_len, show_strand, strip_semicolon, &already_linked, option);
3475                                                 if(docbuf != NULL) {
3476                                                     fprintf(fp, "%s", docbuf);
3477                                                     MemFree(docbuf);
3478                                                 }
3479                                                 FreeTextAlignList(list);
3480                                             }
3481                                         }
3482                                     }
3483                                     MemFree(cm_buf);
3484                                 } /*end of frame > 0 */
3485                             }
3486                         }
3487                     } else {
3488                         if(align_type == ALIGN_PROT_TO_DNA || align_type == ALIGN_TDNA_TO_TDNA)
3489                             frame = -1;
3490                         else
3491                             frame = 0;
3492                         if(frame == 0 && m_bsp->mol != Seq_mol_aa)
3493                             t_matrix = NULL;
3494                         else
3495                             t_matrix = matrix;
3496                         is_end = FALSE;
3497                         for(curr = c_list; curr !=NULL; curr = curr->next) {
3498                             a_list = (ValNodePtr) curr->data.ptrvalue;
3499                             anp = (AlignNodePtr) a_list->data.ptrvalue;
3500                             if(anp != master_anp) {
3501                                 /*generate the DrawText buffer*/
3502                                 if(align_type == ALIGN_NORMAL && m_bsp->mol != Seq_mol_aa)
3503                                     /*DNA alignment */
3504                                     list = ProcessTextAlignNode(anp, c_start, c_stop, &(p_stop[anp->align_num]), m_buf, line_len, frame, option, NULL);
3505                                 else
3506                                     list = ProcessTextAlignNode2(anp, c_start, c_stop, &(p_stop[anp->align_num]), m_buf, line_len, frame, option, t_matrix, posMatrix, master_anp->seqpos);
3507 
3508                                 last_pos = load_last_pos ? p_stop[anp->align_num] : -1;
3509                                 if(list != NULL) {
3510                                     /*DrawTextList(list, fp);*/
3511                                     docbuf = DrawTextToBuffer(list, NULL, is_html, max_label_size, max_num_size, compress, t_matrix, last_pos, line_len, show_strand, strip_semicolon, &already_linked, option);
3512                                     if(docbuf !=NULL) {
3513                                         if(reverse_display) {
3514                                             reverse_print(fp, docbuf);
3515                                             fprintf(fp, "%s", master_docbuf);
3516                                             MemFree(master_docbuf);
3517                                         } else
3518                                             fprintf(fp, "%s", docbuf);
3519                                         MemFree(docbuf);
3520                                     }
3521                                     list = FreeTextAlignList(list);
3522                                 }
3523                             }
3524                         }
3525                     }	/*end of else*/
3526                 }
3527             }
3528 
3529             if(m_buf != NULL)
3530                 MemFree(m_buf);
3531             if(c_stop < right)
3532                 fprintf(fp, "\n");
3533             c_start = c_stop+1;
3534 	}
3535     }
3536     for(i = 0; i<=ALIGN_MAX_TYPE; ++i) {
3537         if(pc_list[i] != NULL)
3538             ValNodeFree(pc_list[i]);
3539     }
3540     MemFree(pc_list);
3541 
3542     if(option & TXALIGN_HTML)
3543         fprintf(fp, "</PRE>\n");
3544 
3545     already_linked = ValNodeFree(already_linked);
3546 
3547     if(fake_cds != NULL)
3548         SeqFeatFree(fake_cds);
3549     BioseqUnlock(m_bsp);
3550     if(has_tblastx)
3551         modify_tblastx_value (anp_list, 3, FALSE);
3552     MemFree(num_str);
3553     MemFree(sep_bar);
3554     MemFree(bar);
3555     MemFree(p_stop);
3556     ObjMgrClearHold();
3557     if(reverse_display)
3558         revert_blastx_alignment (anp_list, master_anp);
3559     return has_data;
3560 }
3561 
3562 /*
3563 	Adds tdsp to the end of a chain of TxDfLineStructPtr's.
3564 	Returns the new TxDfLineStructPtr.
3565 */
3566 
3567 static TxDfLineStructPtr
TxDfLineStructAdd(TxDfLineStructPtr PNTR head,TxDfLineStructPtr tdsp)3568 TxDfLineStructAdd(TxDfLineStructPtr PNTR head, TxDfLineStructPtr tdsp)
3569 
3570 {
3571 	TxDfLineStructPtr var;
3572 
3573 	if (*head == NULL)
3574 	{
3575 		*head = tdsp;
3576 	}
3577 	else
3578 	{
3579 		var = *head;
3580 		while (var->next)
3581 		{
3582 			var = var->next;
3583 		}
3584 		var->next = tdsp;
3585 	}
3586 
3587 	return tdsp;
3588 }
3589 
3590 /*
3591   Filters the FASTA definition lines based on the SeqIdPtr's given
3592   as input. The gi_list is used to pull sequences out of the BioseqPtr,
3593   The buffer_id contains FASTA formatted ID, title contains the rest
3594   of the title.
3595 */
3596 
3597 NLM_EXTERN Boolean LIBCALL
FilterTheDefline(BioseqPtr bsp,SeqIdPtr gi_list_head,CharPtr buffer_id,Int4 buffer_id_length,CharPtr PNTR titlepp)3598 FilterTheDefline (BioseqPtr bsp, SeqIdPtr gi_list_head, CharPtr buffer_id, Int4 buffer_id_length, CharPtr PNTR titlepp)
3599 
3600 {
3601     Boolean first_time, found_gi, found_first_gi, not_done;
3602     CharPtr bsp_title, bsp_title_ptr, title, title_ptr;
3603     Char buffer[BUFFER_LENGTH], id_buf[255];
3604     Int4 index;
3605     SeqIdPtr gi_list, sip;
3606 
3607     if (bsp == NULL || gi_list_head == NULL)
3608         return FALSE;
3609 
3610     bsp_title = BioseqGetTitle(bsp);
3611     bsp_title_ptr = bsp_title;
3612     /* This is the longest it could be, this could be done more efficiently. */
3613     title = (CharPtr) MemNew((256+StringLen(bsp_title))*sizeof(Char));
3614     title_ptr = title;
3615     *titlepp = title;
3616 
3617     /*
3618       if (bsp_title_ptr == NULL)
3619       return FALSE;
3620     */
3621 
3622     first_time = TRUE;
3623     found_first_gi = TRUE;
3624     not_done = TRUE;
3625     while (not_done) {
3626         if (!first_time) {
3627             index=0;
3628             id_buf[0] = NULLB;
3629             if (bsp_title_ptr) {
3630                 while (*bsp_title_ptr != NULLB) {
3631                     if (*bsp_title_ptr == ' ') {
3632                         id_buf[index] = NULLB;
3633                         break;
3634                     }
3635                     id_buf[index] = *bsp_title_ptr;
3636                     bsp_title_ptr++;
3637                     index++;
3638                 }
3639             }
3640             if (id_buf[0] == NULLB)
3641                 break;
3642             sip = SeqIdParse(id_buf);
3643         } else {
3644             sip = bsp->id;
3645         }
3646 
3647         found_gi = FALSE;
3648         gi_list = gi_list_head;
3649         while (gi_list) {
3650             if(SeqIdIn(gi_list, sip) == TRUE) {
3651                 found_gi = TRUE;
3652                 break;
3653             }
3654             gi_list = gi_list->next;
3655         }
3656 
3657         if (found_gi) {
3658             if (!found_first_gi) {
3659                 *title_ptr = '>';
3660                 title_ptr++;
3661                 SeqIdWrite(sip, buffer, PRINTID_FASTA_LONG, BUFFER_LENGTH);
3662                 StringCpy(title_ptr, buffer);
3663                 title_ptr += StringLen(buffer);
3664             } else {
3665                 SeqIdWrite(sip, buffer_id, PRINTID_FASTA_LONG, buffer_id_length);
3666                 found_first_gi = FALSE;
3667             }
3668 
3669             if (bsp_title_ptr) {
3670                 while (*bsp_title_ptr != '>' && *bsp_title_ptr != NULLB) {
3671                     *title_ptr = *bsp_title_ptr;
3672                     bsp_title_ptr++;
3673                     title_ptr++;
3674                 }
3675             }
3676         } else {
3677             if (bsp_title_ptr) {
3678                 while (*bsp_title_ptr != '>' && *bsp_title_ptr != NULLB)
3679                     bsp_title_ptr++;
3680             }
3681         }
3682 
3683         if (first_time) {
3684             first_time = FALSE;
3685         } else {
3686             sip = SeqIdSetFree(sip);
3687         }
3688 
3689         if (bsp_title_ptr) {
3690             if (*bsp_title_ptr == '>')
3691                 bsp_title_ptr++;
3692 
3693             if (*bsp_title_ptr == NULLB) {
3694                 *title_ptr = NULLB;
3695                 break;
3696             }
3697         }
3698     }
3699     return TRUE;
3700 }
3701 
3702 
3703 #define STATS_LENGTH 14
3704 NLM_EXTERN Boolean LIBCALL
PrintDefLinesFromAnnot(SeqAnnotPtr seqannot,Int4 line_length,FILE * outfp,Uint4 options,Int4 mode,Int2Ptr marks)3705 PrintDefLinesFromAnnot(SeqAnnotPtr seqannot, Int4 line_length, FILE *outfp, Uint4 options, Int4 mode, Int2Ptr marks)
3706 
3707 {
3708 	Boolean retval;
3709 
3710 	if (seqannot == NULL || seqannot->type != 2)
3711 	{
3712 		return FALSE;
3713 	}
3714 
3715 	retval = PrintDefLinesFromSeqAlign((SeqAlignPtr) seqannot->data, line_length, outfp, options, mode, marks);
3716 
3717 	return retval;
3718 }
3719 
3720 NLM_EXTERN Boolean LIBCALL
PrintDefLinesExtra(ValNodePtr vnp,Int4 line_length,FILE * outfp,Uint4 options,Int4 mode,Int2Ptr marks,SeqLocPtr seqloc)3721 PrintDefLinesExtra(ValNodePtr vnp, Int4 line_length, FILE *outfp, Uint4 options, Int4 mode, Int2Ptr marks, SeqLocPtr seqloc)
3722 
3723 {
3724 	Boolean retval;
3725 	Char buffer[128];
3726 	Int2 titleIdAllocated;
3727 	Int4 index=0;
3728 	SeqAlignPtr seqalign;
3729 
3730 	if (vnp == NULL || seqloc == NULL)
3731 	{
3732 		return FALSE;
3733 	}
3734 
3735 	/* Disable printing of title. */
3736 	if (!(options & TXALIGN_DO_NOT_PRINT_TITLE))
3737 		options += TXALIGN_DO_NOT_PRINT_TITLE;
3738 
3739 
3740 	asn2ff_set_output(outfp, NULL);
3741 
3742 	ff_StartPrint(0, 0, (Int2)(line_length+2), NULL);
3743 
3744 	titleIdAllocated = line_length - STATS_LENGTH;
3745 
3746 	NewContLine();
3747 	TabToColumn((Int2)(titleIdAllocated+2));
3748 	ff_AddString("Score     E");
3749 	NewContLine();
3750 	TabToColumn((Int2)(titleIdAllocated+2));
3751 	if (options & TXALIGN_SHOW_NO_OF_SEGS) {
3752 	    ff_AddString("(bits)  Value  N");
3753 	} else {
3754 	   ff_AddString("(bits)  Value");
3755 	}
3756 	ff_EndPrint();
3757 
3758         while (vnp && seqloc)
3759         {
3760 		ff_StartPrint(0, 0, (Int2)(line_length+2), NULL);
3761 		index++;
3762                 seqalign = (SeqAlignPtr) vnp->data.ptrvalue;
3763 		sprintf(buffer, "\nSignificant matches for pattern occurrence %ld at position %ld\n\n",
3764                         (long) index, (long) (SeqLocStart(seqloc)+1));
3765                 ff_AddString(buffer);
3766 		ff_EndPrint();
3767 		retval = PrintDefLinesFromSeqAlign(seqalign, line_length, outfp, options, mode, marks);
3768 		vnp = vnp->next;
3769 		seqloc = seqloc->next;
3770 	}
3771 
3772 	return retval;
3773 }
3774 
3775 NLM_EXTERN Boolean LIBCALL
PrintDefLinesFromSeqAlignEx(SeqAlignPtr seqalign,Int4 line_length,FILE * outfp,Uint4 options,Int4 mode,Int2Ptr marks,Int4 number_of_descriptions)3776 PrintDefLinesFromSeqAlignEx(SeqAlignPtr seqalign, Int4 line_length, FILE *outfp, Uint4 options,
3777 		Int4 mode, Int2Ptr marks, Int4 number_of_descriptions)
3778 {
3779 	return PrintDefLinesFromSeqAlignEx2(seqalign, line_length, outfp, options,
3780 			mode, marks, number_of_descriptions, (CharPtr)NULL, (CharPtr)NULL);
3781 }
3782 
3783 
3784 static
3785 CharPtr
StringAppend(CharPtr * dst,size_t * size,CharPtr src,size_t * used)3786 StringAppend(CharPtr *dst, size_t *size, CharPtr src, size_t *used)
3787 {
3788 	size_t pos, len;
3789 
3790 	if (*dst == NULL) {
3791 		*size = 1;
3792 		pos = 0;
3793 	}
3794 	else {
3795 		pos = *used;
3796 	}
3797 	if (src == NULL) {
3798 		return *dst;
3799 	}
3800 	len = StringLen(src);
3801 	*used += len;
3802 	if (*dst == NULL || pos + len + 1 > *size) {
3803 		/**
3804 		 * extending destination buffer
3805 		 */
3806 		CharPtr old = *dst;
3807 		for (; pos + len + 1 > *size; *size *= 2);
3808 		*dst = (CharPtr)MemNew(*size);
3809 		**dst = '\0';
3810 		if (old != NULL) {
3811 			StringCpy(*dst, old);
3812 			MemFree(old);
3813 		}
3814 	}
3815 	StringCpy((*dst) + pos, src);
3816 	return *dst;
3817 }
3818 
3819 
3820 static
3821 Boolean
SeqAlignSegsStr(SeqAlignPtr sap,Int2 index,CharPtr * dst,size_t * size,size_t * used)3822 SeqAlignSegsStr(SeqAlignPtr sap, Int2 index, CharPtr *dst, size_t *size, size_t *used)
3823 {
3824 	Char buf[128];
3825 	Int4 start, stop;
3826 
3827 	start = SeqAlignStart(sap, 1);
3828 	stop = SeqAlignStop(sap, 1);
3829 
3830 	if (sap == NULL) {
3831 		return FALSE;
3832 	}
3833 
3834 	sprintf(buf, "%ld-%ld", (long)(start), (long)(stop));
3835 	StringAppend(dst, size, buf, used);
3836 
3837 	return TRUE;
3838 }
3839 
3840 /**
3841 	* links to incomplete genomes
3842 **/
3843 static void
make_dumpgnl_links(SeqIdPtr sip,CharPtr blast_type,CharPtr segs,CharPtr dbname,Boolean is_na,FILE * fp,CharPtr sip_buffer,Boolean isLinkOut)3844 make_dumpgnl_links(SeqIdPtr sip, CharPtr blast_type, CharPtr segs, CharPtr dbname, Boolean is_na, FILE *fp, CharPtr sip_buffer, Boolean isLinkOut)
3845 {
3846     BioseqPtr bsp;
3847     Boolean nodb_path = FALSE;
3848     Char gnl[256];
3849     CharPtr str, chptr, dbtmp;
3850     Uchar buf[32];
3851     Int4 i, j, length, gi;
3852     MD5Context context;
3853     Char passwd[128], tool_url[128], tmpbuff[256];
3854     SeqIdPtr bestid;
3855 
3856     /* We do need to make security protected link to BLAST gnl */
3857     if (StringStr(sip_buffer, "gnl|BL_ORD_ID") != NULL)
3858         return;
3859 
3860     *passwd = NULLB;
3861     *tool_url = NULLB;
3862 
3863     str = NULL;
3864 #ifdef OS_UNIX
3865     str = getenv("DUMPGNL_PASSWD");
3866 #endif
3867     if(str != NULL) {
3868         StringCpy(passwd, str);
3869     } else {
3870         GetAppParam("NCBI", blast_type, "PASSWD", "", passwd,
3871                     sizeof(passwd));
3872     }
3873 
3874     str = NULL;
3875 #ifdef OS_UNIX
3876     str = getenv("DUMPGNL_TOOL_URL");
3877 #endif
3878     if(str != NULL) {
3879         StringCpy(tool_url, str);
3880     } else {
3881         GetAppParam("NCBI", blast_type, "TOOL_URL", "", tool_url,
3882                     sizeof(tool_url));
3883     }
3884     /*only for linkout*/
3885     if(isLinkOut){
3886       StringCpy(tool_url, "/blast/dumpgnl.cgi");
3887     }
3888     /*no check for linkout*/
3889     if(!isLinkOut&&(*passwd == NULLB || *tool_url == NULLB))
3890         return;
3891 
3892     /* If we are using 'dumpgnl.cgi' (the default) do not strip off the path. */
3893     if (StrStr(tool_url, "dumpgnl.cgi")  == NULL)
3894 	nodb_path = TRUE;
3895 
3896     if(nodb_path) {
3897 
3898         length = StringLen(dbname);
3899         dbtmp = MemNew(sizeof(Char)*length + 2); /* aditional space and NULLB */
3900 
3901         for(i = 0; i < length; i++) {
3902 
3903             if(isspace(dbname[i]) || dbname[i] == ',') /* Rolling spaces */
3904                 continue;
3905 
3906             j = 0;
3907             while (!isspace(dbname[i]) && j < 256  && i < length) {
3908                 tmpbuff[j] = dbname[i];
3909                 j++; i++;
3910                 if(dbname[i] == ',')  /* Comma is valid delimiter */
3911                     break;
3912             }
3913             tmpbuff[j] = NULLB;
3914 
3915             if((chptr = strrchr(tmpbuff, '/')) != NULL) {
3916                 StringCat(dbtmp, chptr+1);
3917             } else {
3918                 StringCat(dbtmp, tmpbuff);
3919             }
3920 
3921             StringCat(dbtmp, " ");
3922         }
3923     } else {
3924         dbtmp = dbname;
3925     }
3926 
3927     if (sip->choice == SEQID_GI)
3928        gi = sip->data.intvalue;
3929     else
3930        gi = -1;
3931 
3932     bsp = BioseqLockById(sip);
3933     if (bsp)
3934 	sip = bsp->id;
3935 
3936     bestid = SeqIdFindBest(sip, SEQID_GENERAL);
3937     if (bestid && bestid->choice != SEQID_GENERAL)
3938     {
3939     	bestid = SeqIdFindBest(sip, SEQID_OTHER);
3940     	if (bestid && bestid->choice != SEQID_OTHER)
3941 	{
3942     		bestid = SeqIdFindBestAccession(sip);
3943 	}
3944     }
3945     /*
3946      * Need to protect start and stop positions
3947      * to avoid web users sending us hand-made URLs
3948      * to retrive full sequences
3949      */
3950     if (bestid && bestid->choice != SEQID_GI)
3951     {
3952     	MD5Init(&context);
3953     	length = StringLen(passwd);
3954 	MD5Update(&context, (UcharPtr)passwd, (Uint4)length);
3955 	SeqIdWrite(bestid, gnl, PRINTID_FASTA_SHORT, sizeof(gnl));
3956 	MD5Update(&context, (UcharPtr)gnl, (Uint4)StringLen(gnl));
3957 	MD5Update(&context, (UcharPtr)segs, (Uint4)StringLen(segs));
3958 	MD5Update(&context, (UcharPtr)passwd, (Uint4)length);
3959 	MD5Final(&context, (UcharPtr)buf);
3960     }
3961     else
3962     {
3963 	gnl[0] = NULLB;
3964     }
3965 
3966     bestid = SeqIdFindBest(sip, SEQID_GI);
3967     if (gi < 0 && bestid && bestid->choice == SEQID_GI)
3968     {
3969 	gi = bestid->data.intvalue;
3970     }
3971 
3972     str = MakeURLSafe(dbtmp == NULL ? "nr" : dbtmp);
3973     if (strchr(tool_url, '?') == NULL)
3974     {
3975     	fprintf(fp, "<a href=\"%s?db=%s&na=%d&", tool_url, str, is_na);
3976     }
3977     else
3978     {
3979     	fprintf(fp, "<a href=\"%s&db=%s&na=%d&", tool_url, str, is_na);
3980     }
3981     str = (CharPtr) MemFree(str);
3982     if (gnl[0] != NULLB)
3983     {
3984     	str = MakeURLSafe(gnl);
3985     	fprintf(fp, "gnl=%s&", str);
3986     	str = (CharPtr) MemFree(str);
3987     }
3988     if (gi != -1)
3989     {
3990     	fprintf(fp, "gi=%ld&", (long) gi);
3991     }
3992     if (RID_glb)
3993     {
3994     	fprintf(fp, "RID=%s&", RID_glb);
3995     }
3996 
3997     if (query_number_glb > 0)
3998        fprintf(fp, "QUERY_NUMBER=%ld&", query_number_glb);
3999 
4000     fprintf(fp,
4001             "segs=%s&seal=%02X%02X%02X%02X"
4002             "%02X%02X%02X%02X%02X%02X%02X%02X%02X%02X%02X%02X\">",
4003             segs,
4004             buf[0], buf[1], buf[2], buf[3],
4005             buf[4], buf[5], buf[6], buf[7],
4006             buf[8], buf[9], buf[10], buf[11],
4007             buf[12], buf[13], buf[14], buf[15]);
4008 
4009     BioseqUnlock(bsp);
4010     if(nodb_path)
4011         MemFree(dbtmp);
4012 
4013     return;
4014 }
4015 
4016 /* Filter the BlastdefLine structure based upon a gilist.
4017 Those that do match the gilist are returned, otherwise
4018 it's deleted.
4019 */
FilterAsn1DefLine(BlastDefLinePtr bdsp,SeqIdPtr gilist)4020 static BlastDefLinePtr  FilterAsn1DefLine(BlastDefLinePtr bdsp, SeqIdPtr gilist)
4021 {
4022     BlastDefLinePtr bdtmp1, bdtmp2, bdtmp_last, bdsp_retval=NULL;
4023     SeqIdPtr gi_list;
4024     Boolean found_gi;
4025 
4026     bdtmp1 = bdsp;
4027     while (bdtmp1)
4028     {
4029         gi_list = gilist;
4030         found_gi = FALSE;
4031 
4032         while (gi_list) {
4033             if(SeqIdIn(gi_list, bdtmp1->seqid) == TRUE) {
4034                 found_gi = TRUE;
4035                 break;
4036             }
4037             gi_list = gi_list->next;
4038         }
4039 
4040         if(found_gi) {
4041 
4042 		if (bdsp_retval == NULL)
4043 		{
4044 			bdsp_retval = bdtmp1;
4045 			bdtmp_last = bdtmp1;
4046 		}
4047 		else
4048 		{
4049 			bdtmp_last->next = bdtmp1;
4050 			bdtmp_last = bdtmp1;
4051 		}
4052 		bdtmp1 = bdtmp1->next;
4053 		bdtmp_last->next = NULL;
4054 	}
4055 	else
4056 	{
4057 		bdtmp2 = bdtmp1;
4058 		bdtmp1 = bdtmp1->next;
4059 		bdtmp2->next = NULL;
4060 		BlastDefLineFree(bdtmp2);
4061         }
4062     }
4063     return bdsp_retval;
4064 }
4065 
4066 /*
4067 	Formats the one-line description for the first part
4068 	of the BLAST report.  Note that the seqid for the
4069 	first sequence is formatted elsewhere.
4070 */
4071 
4072 
4073 static Boolean
Tx_PrintDefLine(BlastDefLinePtr bdsp,CharPtr buffer,Int4 length)4074 Tx_PrintDefLine(BlastDefLinePtr bdsp, CharPtr buffer, Int4 length)
4075 {
4076 	BlastDefLinePtr bdsp_tmp = NULL;
4077 	Char seqid_buf[BUFFER_LENGTH];
4078 	CharPtr ptr = NULL;
4079 	Int4 seqid_length = 0, total_length = 0;
4080 
4081 	if (bdsp == NULL)
4082 		return FALSE;
4083 
4084     /* Initialize arrays */
4085     MemSet(seqid_buf, '\0', BUFFER_LENGTH);
4086     MemSet(buffer, '\0', length);
4087 
4088 	StringNCpy(buffer, bdsp->title, length);
4089 	total_length = StringLen(buffer) + 1;
4090 
4091 	if (total_length >= length)
4092 		return TRUE;
4093 
4094 
4095 	bdsp_tmp = bdsp->next;
4096 	while (bdsp_tmp)
4097 	{
4098         ptr = buffer+total_length - 1;
4099 		SeqIdWrite(bdsp_tmp->seqid, seqid_buf, PRINTID_FASTA_LONG, BUFFER_LENGTH-1);
4100 		*ptr = ' ';
4101 		*(ptr+1) = '>';
4102 		StringNCpy(ptr+2, seqid_buf, length-total_length-2);
4103         total_length = StringLen(buffer) + 1;
4104 		if (total_length+2 >= length)
4105 			break;
4106 
4107 		ptr = buffer+total_length - 1;
4108 		*ptr = ' ';
4109 		StringNCpy(ptr+1, bdsp_tmp->title, length-total_length-1);
4110         total_length = StringLen(buffer) + 1;
4111 		if (total_length+3 >= length)
4112 			break;
4113 
4114 		bdsp_tmp = bdsp_tmp->next;
4115 	}
4116 
4117 	return TRUE;
4118 }
4119 
4120 #define KNOCK_OFF_ALLOWED
4121 NLM_EXTERN void LIBCALL
ScoreAndEvalueToBuffers(FloatHi bit_score,FloatHi evalue,CharPtr bit_score_buf,CharPtr PNTR evalue_buf,Uint1 format_options)4122 ScoreAndEvalueToBuffers(FloatHi bit_score, FloatHi evalue,
4123                         CharPtr bit_score_buf, CharPtr PNTR evalue_buf,
4124                         Uint1 format_options)
4125 {
4126 #ifdef OS_MAC
4127    if (evalue < 1.0e-180) {
4128       sprintf(*evalue_buf, "0.0");
4129    } else if (evalue < 1.0e-99) {
4130       sprintf(*evalue_buf, "%2.0Le", evalue);
4131       if (format_options & TX_KNOCK_OFF_ALLOWED)
4132          (*evalue_buf)++; /* Knock off digit. */
4133    } else if (evalue < 0.0009) {
4134       sprintf(*evalue_buf, "%3.0Le", evalue);
4135    } else if (evalue < 0.1) {
4136       sprintf(*evalue_buf, "%4.3Lf", evalue);
4137    } else if (evalue < 1.0) {
4138       sprintf(*evalue_buf, "%3.2Lf", evalue);
4139    } else if (evalue < 10.0) {
4140       sprintf(*evalue_buf, "%2.1Lf", evalue);
4141    } else {
4142       sprintf(*evalue_buf, "%5.0Lf", evalue);
4143    }
4144    if (bit_score > 9999)
4145       sprintf(bit_score_buf, "%4.3Le", bit_score);
4146    else if (bit_score > 99.9)
4147       sprintf(bit_score_buf, "%4.0ld", (long)bit_score);
4148    else /* %4.1Lf is bad on 68K Mac, so cast to long */
4149       sprintf(bit_score_buf, "%4.0ld", (long)bit_score);
4150 #else
4151    if (evalue < 1.0e-180) {
4152       sprintf(*evalue_buf, "0.0");
4153    } else if (evalue < 1.0e-99) {
4154       sprintf(*evalue_buf, "%2.0le", evalue);
4155       if (format_options & TX_KNOCK_OFF_ALLOWED)
4156          (*evalue_buf)++; /* Knock off digit. */
4157    } else if (evalue < 0.0009) {
4158       sprintf(*evalue_buf, "%3.0le", evalue);
4159    } else if (evalue < 0.1) {
4160       sprintf(*evalue_buf, "%4.3lf", evalue);
4161    } else if (evalue < 1.0) {
4162       sprintf(*evalue_buf, "%3.2lf", evalue);
4163    } else if (evalue < 10.0) {
4164       sprintf(*evalue_buf, "%2.1lf", evalue);
4165    } else {
4166       sprintf(*evalue_buf, "%5.0lf", evalue);
4167    }
4168    if (bit_score > 9999)
4169       sprintf(bit_score_buf, "%4.3le", bit_score);
4170    else if (bit_score > 99.9)
4171       sprintf(bit_score_buf, "%4.0ld", (long)bit_score);
4172    else if (format_options & TX_INTEGER_BIT_SCORE)
4173       sprintf(bit_score_buf, "%4.0lf", bit_score);
4174    else
4175       sprintf(bit_score_buf, "%4.1lf", bit_score);
4176 #endif
4177 }
4178 
4179 NLM_EXTERN Boolean LIBCALL
PrintDefLinesFromSeqAlignWithPath(SeqAlignPtr seqalign,Int4 line_length,FILE * outfp,Uint4 options,Int4 mode,Int2Ptr marks,Int4 number_of_descriptions,CharPtr db_name,CharPtr blast_type,CharPtr www_root_path)4180 PrintDefLinesFromSeqAlignWithPath(SeqAlignPtr seqalign, Int4 line_length, FILE *outfp, Uint4 options,
4181 		Int4 mode, Int2Ptr marks, Int4 number_of_descriptions,
4182 		CharPtr db_name, CharPtr blast_type, CharPtr www_root_path)
4183 {
4184     BioseqPtr bsp;
4185     Boolean found_next_one, found_gnl_id, same_id, found_score=FALSE, make_link=FALSE;
4186     Char buffer[BUFFER_LENGTH+1], buffer1[BUFFER_LENGTH+1], eval_buff[10], bit_score_buff[10];
4187     Char HTML_buffer[BUFFER_LENGTH+1], HTML_database[32], HTML_dopt[16], id_buffer[BUFFER_LENGTH+1];
4188     Char *ptr, *ptr_start, *eval_buff_ptr, *bit_score_buff_ptr;
4189     Int4 pos, title_length, title_allocated, titleIdAllocated;
4190     Nlm_FloatHi bit_score, evalue;
4191     Int4 gi = 0, number, score;
4192     SeqIdPtr bestid, gi_list, subject_id, sip_list=NULL, last_id;
4193     TxDfLineStructPtr txsp = NULL, txsp_head, txsp_var;
4194     Boolean retval = FALSE;
4195     Boolean firstnew = TRUE;
4196     Int4 countdescr = number_of_descriptions;
4197     Int4 numalign;
4198     DbtagPtr db_tag;
4199     ObjectIdPtr oip;
4200     Int2 ColumnDistance=2, extraSpace=0, extraSpace2=0, strLen=0, maxEvalWidth=5, maxNWidth=2;
4201     Char tempBuf[64], tempBuf2[64];
4202 
4203     if (outfp == NULL) {
4204         return FALSE;
4205     }
4206 
4207     if (seqalign == NULL || number_of_descriptions == 0) {	/* Two line returns so that the alignments or db report is not all bunched up. */
4208         NewContLine();
4209         NewContLine();
4210         return FALSE;
4211     }
4212 
4213 #ifdef OS_UNIX
4214     if (!www_root_path)
4215        www_root_path = getenv("WWW_ROOT_PATH");
4216 #endif
4217 
4218     if(!StringICmp(blast_type, "fruitfly")) {
4219         fprintf(outfp, "<IMG SRC=\"/BLAST/images/map_mark.gif\" BORDER=0> - please follow this image for the map location of the sequence<P>\n");
4220     }
4221 
4222     asn2ff_set_output(outfp, NULL);
4223 
4224     ff_StartPrint(0, 0, (Int2)(line_length+2), NULL);
4225 
4226     titleIdAllocated = line_length - STATS_LENGTH;
4227 
4228     if (options & TXALIGN_SHOW_NO_OF_SEGS) {
4229         titleIdAllocated -= 4;
4230     }
4231 
4232     if (options & TXALIGN_CHECK_BOX) {
4233         titleIdAllocated += 2;
4234     }
4235 
4236     if(options & TXALIGN_NEW_GIF)
4237         titleIdAllocated += 3;
4238 
4239     /* <PRE> block should be already opened outside of this function,
4240        but open it here just in case */
4241     if (options & TXALIGN_HTML) {
4242        ff_AddString("<PRE>");
4243        NewContLine();
4244     }
4245 
4246     /*AAS*/
4247     if (!(options & TXALIGN_DO_NOT_PRINT_TITLE)) {
4248         if ((mode == FIRST_PASS) || (mode == NOT_FIRST_PASS_REPEATS)) {
4249 	  if(RID_glb && options&TXALIGN_SHOW_LINKOUT&&options&TXALIGN_HTML){
4250 	    if(PairwiseSeqAlignHasLinkout(seqalign, linkout_structure)){
4251 
4252 	      fprintf(outfp, URL_Structure_Overview,  RID_glb, 0, 0, CDD_RID_glb, "overview", StringCmp(Entrez_Query_Term, "") ? Entrez_Query_Term:"none");
4253 
4254 	    }
4255 	  }
4256             NewContLine();
4257 
4258             NewContLine();
4259             TabToColumn((Int2)(titleIdAllocated));
4260 
4261             ff_AddString("Score    E");
4262             NewContLine();
4263             ff_AddString("Sequences producing significant alignments:");
4264             TabToColumn((Int2)(titleIdAllocated));
4265             if (options & TXALIGN_SHOW_NO_OF_SEGS) {
4266                 ff_AddString("(bits) Value  N");
4267             } else {
4268                 ff_AddString("(bits) Value");
4269             }
4270             NewContLine();
4271         }
4272 
4273         if (mode == NOT_FIRST_PASS_REPEATS) {
4274             ff_AddString("Sequences used in model and found again:");
4275             NewContLine();
4276         }
4277         if (mode == NOT_FIRST_PASS_NEW) {
4278             ff_AddString("Sequences not found previously or not previously below threshold:");
4279             NewContLine();
4280         }
4281         ff_EndPrint();
4282     }
4283 
4284     numalign = 0;
4285     last_id = NULL;
4286     txsp_head = NULL;
4287     while (seqalign) {
4288         if ((mode == FIRST_PASS) ||
4289             ((mode == NOT_FIRST_PASS_REPEATS) && marks && marks[numalign] & SEQ_ALIGN_MARK_REPEAT) ||
4290             ((mode == NOT_FIRST_PASS_NEW) && marks && (!(marks[numalign] & SEQ_ALIGN_MARK_REPEAT)))) {
4291 
4292             subject_id = SeqIdDup(TxGetSubjectIdFromSeqAlign(seqalign));
4293             same_id = FALSE;
4294             if(last_id && SeqIdComp(subject_id, last_id) == SIC_YES) {
4295                 same_id = TRUE;
4296             }
4297 
4298             last_id = SeqIdFree(last_id);
4299             last_id = SeqIdDup(subject_id);
4300 
4301             found_score = GetScoreAndEvalue(seqalign, &score, &bit_score, &evalue, &number);
4302             /* if the ID has been seen before, check that proper values are saved. */
4303             if (same_id == TRUE) {
4304                 if (score > txsp->score)
4305                     txsp->score = score;
4306                 if (bit_score > txsp->bit_score)
4307                     txsp->bit_score = bit_score;
4308                 if (evalue < txsp->evalue)
4309                     txsp->evalue = evalue;
4310                 if (number < txsp->number)
4311                     txsp->number = number;
4312                 StringAppend(&txsp->segs_str, &txsp->segs_buflen, ",", &txsp->segs_used);
4313                 SeqAlignSegsStr(seqalign, 1, &txsp->segs_str, &txsp->segs_buflen, &txsp->segs_used);
4314                 subject_id = SeqIdFree(subject_id);
4315             } else {
4316                 bsp = BioseqLockById(subject_id);
4317                 txsp = (TxDfLineStructPtr) MemNew(sizeof(TxDfLineStruct));
4318                 txsp->segs_str = NULL;
4319                 txsp->segs_buflen = 0;
4320                 if(bsp != NULL) {
4321 		    BlastDefLinePtr bdsp;
4322 		    Char buffer_priv[BUFFER_LENGTH];
4323 
4324                 MemSet(buffer_priv, '\0', BUFFER_LENGTH);
4325         	    bdsp =  FDGetDeflineAsnFromBioseq(bsp);
4326                     gi_list = GetUseThisGi(seqalign);
4327                     if (gi_list) {
4328 			if (bdsp)
4329 			{
4330         			bdsp = FilterAsn1DefLine(bdsp, gi_list);
4331 				Tx_PrintDefLine(bdsp, buffer_priv, BUFFER_LENGTH);
4332 				txsp->title = StringSave(buffer_priv);
4333 				txsp->id = bdsp->seqid;
4334                        		SeqIdWrite(bdsp->seqid, buffer, PRINTID_FASTA_LONG, BUFFER_LENGTH);
4335 				bdsp->seqid = NULL;
4336 				BlastDefLineSetFree(bdsp);
4337                         	subject_id = SeqIdFree(subject_id);
4338                         	gi_list = SeqIdSetFree(gi_list);
4339 			}
4340 			else
4341 			{
4342                         	FilterTheDefline(bsp, gi_list, buffer, BUFFER_LENGTH, &(txsp->title));
4343                         	gi_list = SeqIdSetFree(gi_list);
4344                         	subject_id = SeqIdFree(subject_id);
4345                         	txsp->id = SeqIdParse(buffer);
4346 			}
4347                     } else {
4348 			if (bdsp)
4349 			{
4350 				Tx_PrintDefLine(bdsp, buffer_priv, BUFFER_LENGTH);
4351 				txsp->title = StringSave(buffer_priv);
4352 				txsp->id = subject_id;
4353                        		SeqIdWrite(bdsp->seqid, buffer, PRINTID_FASTA_LONG, BUFFER_LENGTH);
4354 				BlastDefLineSetFree(bdsp);
4355 			}
4356 			else
4357 			{
4358                        		SeqIdWrite(bsp->id, buffer, PRINTID_FASTA_LONG, BUFFER_LENGTH);
4359                         	txsp->title = StringSave(BioseqGetTitle(bsp));
4360                         	txsp->id = subject_id;
4361 			}
4362                     }
4363                     txsp->is_na = (bsp->mol != Seq_mol_aa);
4364                 } else {
4365                     SeqIdWrite(subject_id, buffer, PRINTID_FASTA_LONG, BUFFER_LENGTH);
4366                     txsp->title = StringSave("Unknown");
4367                     txsp->is_na = FALSE;
4368                     txsp->id = subject_id;
4369                 }
4370                 txsp->seqalign = seqalign;
4371                 txsp->buffer_id = StringSave(buffer);
4372                 txsp->score = score;
4373                 txsp->bit_score = bit_score;
4374                 txsp->evalue = evalue;
4375                 txsp->number = number;
4376                 txsp->found_score = found_score;
4377                 SeqAlignSegsStr(seqalign, 1, &txsp->segs_str, &txsp->segs_buflen, &txsp->segs_used);
4378                 if (marks) {
4379                     /* seq is new if it was not Good on previous iteration */
4380                     txsp->isnew = (Boolean) !(marks[numalign] & SEQ_ALIGN_MARK_PREVGOOD);
4381                     txsp->waschecked = (Boolean) marks[numalign] & SEQ_ALIGN_MARK_PREVCHECKED;
4382                 } else {
4383                     txsp->isnew = FALSE;
4384                     txsp->waschecked = FALSE;
4385                 }
4386 
4387                 txsp = TxDfLineStructAdd(&txsp_head, txsp);
4388                 if(bsp != NULL)
4389                     BioseqUnlock(bsp);
4390                 retval = TRUE;
4391             }
4392         }
4393         seqalign = seqalign->next;
4394         numalign++;
4395     }
4396     last_id = SeqIdFree(last_id);
4397 
4398     if(retval == FALSE)
4399         return FALSE;
4400 
4401 
4402     /* Used for dumpgnl reports if GNL id's. (overwrite parameter!) */
4403     if (blast_type)
4404     {
4405         blast_type = StringSave(blast_type);
4406         StringUpper(blast_type);
4407     }
4408 
4409     /* If option TXALIGN_NO_ENTREZ set full database name will be stripped
4410        to the database fileneme */
4411 
4412     /* If a tool_url is set, then we use this rather than Entrez. */
4413     if (blast_type && !(options & TXALIGN_NO_ENTREZ))
4414     {
4415     	Char tool_url[128];
4416 
4417 	*tool_url = NULLB;
4418         GetAppParam("NCBI", blast_type, "TOOL_URL", "", tool_url, sizeof(tool_url));
4419 	/* Do use Entrez if available if the tool is dumpgnl.cgi as it does not handle gi's. */
4420 	if (*tool_url != NULLB  && StringStr(tool_url, "dumpgnl.cgi") == NULL)
4421 		options |= TXALIGN_NO_ENTREZ;
4422     }
4423 
4424     txsp = txsp_head;
4425     while (txsp && countdescr != 0) {
4426         found_next_one = FALSE;
4427         if (options & TXALIGN_HTML) {
4428 
4429             if (txsp->is_na) {
4430                 StringCpy(HTML_dopt, "GenBank");
4431                 StringCpy(HTML_database, "Nucleotide");
4432             } else {
4433                 StringCpy(HTML_dopt, "GenPept");
4434                 StringCpy(HTML_database, "Protein");
4435             }
4436             gi = 0;
4437             make_link = FALSE;
4438             bestid = SeqIdFindBest(txsp->id, SEQID_GI);
4439             if (bestid != NULL && bestid->choice == SEQID_GI && !(options & TXALIGN_NO_ENTREZ)) {
4440                 gi = bestid->data.intvalue;
4441                 if (options & TXALIGN_CHECK_BOX && options & TXALIGN_CHECK_BOX_CHECKED) {
4442    sprintf(HTML_buffer,
4443            "<INPUT TYPE=\"checkbox\" NAME=\"checked_GI\" "
4444            "VALUE=\"%d\" CHECKED> "
4445            "<a href=%s?cmd=Retrieve&db=%s&list_uids=%08ld&dopt=%s %s>"
4446            "<INPUT TYPE=\"hidden\" NAME =\"good_GI\" VALUE = \"%d\">",
4447            gi, NEW_ENTREZ_HREF, HTML_database, (long) gi, HTML_dopt,
4448 	   options & TXALIGN_TARGET_IN_LINKS ? "TARGET=\"EntrezView\"" : "",
4449 	   gi);
4450                 } else if (options & TXALIGN_CHECK_BOX) {
4451    sprintf(HTML_buffer,
4452            "<INPUT TYPE=\"checkbox\" NAME=\"checked_GI\" VALUE=\"%d\"> "
4453            "<a href=\"%s?cmd=Retrieve&db=%s&list_uids=%08ld&dopt=%s\" %s>",
4454            gi, NEW_ENTREZ_HREF, HTML_database, (long) gi, HTML_dopt,
4455 	   options & TXALIGN_TARGET_IN_LINKS ? "TARGET=\"EntrezView\"" : "");
4456 
4457                 } else {
4458    if(!StringICmp(blast_type, "fruitfly")) {
4459        sprintf(HTML_buffer,
4460                "<a href=\"http://www.ncbi.nlm.nih.gov\">"
4461                "<IMG SRC=\"/BLAST/images/map_mark.gif\" BORDER=0></a>"
4462                "&nbsp;&nbsp;<a href=\"%s?"
4463                "cmd=Retrieve&db=%s&list_uids=%08ld&dopt=%s\" %s>",
4464                NEW_ENTREZ_HREF, HTML_database, (long) gi, HTML_dopt,
4465 	       options & TXALIGN_TARGET_IN_LINKS ? "TARGET=\"EntrezView\"" : "");
4466    } else {
4467 
4468    sprintf(HTML_buffer,
4469            "<a href=\"%s?cmd=Retrieve&db=%s&list_uids=%08ld&dopt=%s\" %s>",
4470            NEW_ENTREZ_HREF, HTML_database, (long) gi, HTML_dopt,
4471 	   options & TXALIGN_TARGET_IN_LINKS ? "TARGET=\"EntrezView\"" : "");
4472    }
4473                 }
4474 
4475                 if (options & TXALIGN_NEW_GIF && (countdescr == -1 || countdescr > 0)) {
4476                     if (txsp->isnew) {
4477                         if (firstnew) {
4478                             firstnew = FALSE;
4479                             fprintf(outfp, "<a name = Evalue></a>");
4480                         }
4481                         fprintf(outfp, "<br><IMG SRC=\"%s/blast/images/new.gif\" WIDTH=30 HEIGHT=15 ALT=\"New sequence mark\">", www_root_path == NULL? "" : www_root_path);
4482                     } else {
4483                         fprintf(outfp, "<br><IMG SRC=\"%s/blast/images/bg.gif\" WIDTH=30 HEIGHT=15 ALT=\" \">",  www_root_path == NULL? "" : www_root_path);
4484                     }
4485                     if (txsp->waschecked) {
4486                         fprintf(outfp, "<IMG SRC=\"%s/blast/images/checked.gif\" WIDTH=15 HEIGHT=15 ALT=\"Checked mark\">",  www_root_path == NULL? "" : www_root_path);
4487                     } else {
4488                         fprintf(outfp, "<IMG SRC=\"%s/blast/images/bg.gif\" WIDTH=15 HEIGHT=15 ALT=\" \">",  www_root_path == NULL? "" : www_root_path);
4489                     }
4490                 }
4491                 fprintf(outfp, "%s", HTML_buffer);
4492                 make_link = TRUE;
4493                 /* If not SEQID_GI */
4494             } else if (bestid != NULL && !(options & TXALIGN_NO_DUMPGNL) || (options & TXALIGN_NO_ENTREZ))
4495                 {
4496                     if (bestid->choice != SEQID_GENERAL && bestid->choice != SEQID_OTHER)
4497                         { /* HACK, HACK, use SEQID_GENERAL for Greg's page, even though GI is present. */
4498                             if (bsp = BioseqLockById(bestid)) {
4499 				bestid = SeqIdFindBest(bsp->id, SEQID_OTHER);
4500 				BioseqUnlock(bsp);
4501 			    }
4502                         }
4503                     if (bestid->choice == SEQID_GENERAL) {
4504                         db_tag = (DbtagPtr) bestid->data.ptrvalue;
4505                         if(db_tag->db && StringCmp(db_tag->db, "THC") == 0) {
4506                             oip = db_tag->tag;
4507                             if(oip->id != 0) {
4508                                 fprintf(outfp, "<a href=\"http://www.tigr.org/docs/tigr-scripts/hgi_scripts/thc_report.spl?est=THC%ld&report_type=n\">", (long) oip->id);
4509 
4510                             }
4511                         } else if (db_tag->db && StringICmp(db_tag->db, "TI") == 0) {
4512                            oip = db_tag->tag;
4513                            if(oip->id != 0) {
4514                               fprintf(outfp, "<a href=\"http://www.ncbi.nlm.nih.gov/Traces/trace.cgi?cmd=retrieve&dopt=fasta&val=%ld\">", (long) oip->id);
4515                            }
4516                         } else {
4517                             make_dumpgnl_links(txsp->id, blast_type, txsp->segs_str, db_name, txsp->is_na, outfp, txsp->buffer_id, FALSE);
4518                         }
4519                     } else
4520                         make_dumpgnl_links(txsp->id, blast_type, txsp->segs_str, db_name, txsp->is_na, outfp, txsp->buffer_id, FALSE);
4521                     make_link = TRUE;
4522                 }
4523         }
4524 
4525         sprintf(buffer, "%s", txsp->buffer_id);
4526         if (!(options & TXALIGN_SHOW_GI)) {
4527             if (StringNCmp(buffer, "gi|", 3) == 0) {
4528                 ptr = &buffer[3];
4529                 while (*ptr != NULLB && *ptr != ' ') { /* Is there another ID beside the GI? */
4530                     if (*ptr == '|') {
4531                         ptr++;
4532                         found_next_one = TRUE;
4533                         break;
4534                     }
4535                     ptr++;
4536                 }
4537             }
4538         }
4539         if (found_next_one == FALSE) {
4540             ptr = buffer;
4541             ptr_start = buffer;
4542         } else {
4543             ptr_start = ptr;
4544         }
4545 
4546         found_gnl_id = FALSE;
4547         /* Check for an ID of type general from BLAST */
4548 
4549         if (StringNCmp(buffer, "gnl|BL_ORD_ID", 13) == 0) {
4550             ptr = buffer;
4551             /* look for end of gnl ID. */
4552             while (*ptr != NULLB && *ptr != ' ')
4553                 ptr++;
4554             /* Clear out all spaces. */
4555             while (*ptr != NULLB && *ptr == ' ')
4556                 ptr++;
4557             ptr_start = ptr;
4558             found_gnl_id = TRUE;
4559             make_link = FALSE;
4560         }
4561 
4562         if (StringNCmp(ptr, "lcl|", 4) == 0) {
4563             ptr += 4;
4564         }
4565 
4566         pos = StringLen(ptr);
4567 
4568         if ((options & TXALIGN_HTML) && make_link) {
4569             StringCpy(ptr+pos, "</a> ");
4570             pos++;		/* One for the space after "</a>" */
4571             pos += 4;	/* for "</a>" */
4572             title_allocated = titleIdAllocated - pos;
4573         }
4574 
4575         title_allocated = titleIdAllocated - pos;
4576 
4577         if (pos >= titleIdAllocated) {
4578             pos = titleIdAllocated+1; /* no space to definition. */
4579             sprintf(ptr+pos-3, "...");
4580             *(ptr+pos) = ' ';
4581             pos++;
4582             *(ptr+pos) = NULLB; /* in case no scores are printed. */
4583         } else  {
4584             if (found_gnl_id == FALSE)
4585 	    {
4586                 *(ptr + pos) = ' ';
4587                 pos++;
4588             }
4589 	    else
4590 	    { /* give back space (used above) so things line up. */
4591 		title_allocated++;
4592 	    }
4593 
4594             title_length = StringLen(txsp->title);
4595             if (title_length > title_allocated) {
4596                 title_length = title_allocated;
4597                 title_length -= 3;	/* For "..." */
4598                 if (txsp->title) {
4599                     StringNCpy((ptr+pos), txsp->title, title_length);
4600                     pos += title_length;
4601                 }
4602                 sprintf((ptr+pos), "...");
4603                 pos += 3;
4604             } else {
4605                 if (txsp->title) {
4606                     StringNCpy((ptr+pos), txsp->title, title_length);
4607                     pos += title_length;
4608                 }
4609                 while (title_length < title_allocated) {
4610                     *(ptr + pos) = ' ';
4611                     title_length++;
4612                     pos++;
4613                 }
4614             }
4615             *(ptr + pos) = ' ';
4616             pos++;
4617 
4618             /* set to NULLB in case no scores have been found. */
4619             *(ptr + pos) = NULLB;
4620         }
4621 
4622         if (txsp->found_score) {
4623             evalue = txsp->evalue;
4624             bit_score = txsp->bit_score;
4625 
4626             eval_buff_ptr = eval_buff;
4627             ScoreAndEvalueToBuffers(bit_score, evalue, bit_score_buff,
4628                &eval_buff_ptr,
4629                (TX_KNOCK_OFF_ALLOWED | TX_INTEGER_BIT_SCORE));
4630 
4631             if (options & TXALIGN_HTML) {
4632                 if (gi != 0)
4633                     sprintf(id_buffer, "%ld", (long) gi);
4634                 else {
4635 #ifdef OS_MAC
4636                     sprintf(id_buffer, "%s", txsp->buffer_id);
4637 #else
4638                     MuskSeqIdWrite(txsp->id, id_buffer, BUFFER_LENGTH,
4639                                    PRINTID_TEXTID_ACCESSION, FALSE, FALSE);
4640 #endif
4641                 }
4642                 bit_score_buff_ptr = bit_score_buff;
4643                 if (*bit_score_buff_ptr == ' ') {
4644                     bit_score_buff_ptr++;
4645                     sprintf(buffer1, " <a href = #%s>%s</a>", id_buffer, bit_score_buff_ptr);
4646                 } else {
4647                     sprintf(buffer1, "<a href = #%s>%s</a>", id_buffer, bit_score_buff_ptr);
4648                 }
4649             } else {
4650                 sprintf(buffer1, "%s", bit_score_buff);
4651             }
4652 
4653 	    /*adjust N position*/
4654             strLen=StringLen(eval_buff_ptr);
4655 	    extraSpace=strLen<maxEvalWidth?(maxEvalWidth-strLen):0;
4656 	    makeEmptyString(tempBuf, extraSpace);
4657             if (options & TXALIGN_SHOW_NO_OF_SEGS) {
4658 	      sprintf(tempBuf2, "%ld", (long) txsp->number);
4659 	      strLen=StringLen(tempBuf2);
4660 	      extraSpace2=strLen<maxNWidth?(maxNWidth-strLen):0;
4661 	      makeEmptyString(tempBuf2, extraSpace2);
4662 	      sprintf((ptr+pos), " %s   %s  %s%ld%s", buffer1, eval_buff_ptr, tempBuf, (long) txsp->number,tempBuf2);
4663 
4664 	    }
4665             else{
4666 
4667 	      sprintf((ptr+pos), " %s   %s%s", buffer1, eval_buff_ptr, tempBuf);
4668 
4669 	    }
4670         }
4671 
4672 
4673         if (countdescr == -1 || countdescr > 0){
4674 
4675 	  fprintf(outfp, "%s", ptr);
4676 	  /*add link out*/
4677 
4678 	  if(options&TXALIGN_SHOW_LINKOUT&&options&TXALIGN_HTML){
4679 	    bsp=BioseqLockById(txsp->id);
4680 	    addLinkoutForDefline(bsp, txsp->id, outfp);
4681 	    BioseqUnlock(bsp);
4682 	  }
4683 	  fprintf(outfp, "\n");
4684 	}
4685         txsp = txsp->next;
4686         if (countdescr > 0)
4687             countdescr--;
4688     }
4689 
4690     if (options & TXALIGN_HTML) {
4691         ff_AddString("</PRE>");
4692         NewContLine();
4693     } else
4694        fprintf(outfp, "\n");
4695 
4696     /* blast_type (overwriting parameter) allocated before last while loop. */
4697     blast_type = (CharPtr) MemFree(blast_type);
4698 
4699     txsp = txsp_head;
4700     while (txsp) {
4701         txsp->title = (CharPtr) MemFree(txsp->title);
4702         txsp->buffer_id = (CharPtr) MemFree(txsp->buffer_id);
4703         txsp->id = SeqIdSetFree(txsp->id);
4704         txsp->segs_str = (CharPtr) MemFree(txsp->segs_str);
4705         txsp_var = txsp;
4706         txsp = txsp->next;
4707         MemFree(txsp_var);
4708     }
4709 
4710     return TRUE;
4711 }
4712 
4713 NLM_EXTERN Boolean LIBCALL
PrintDefLinesFromSeqAlignEx2(SeqAlignPtr seqalign,Int4 line_length,FILE * outfp,Uint4 options,Int4 mode,Int2Ptr marks,Int4 number_of_descriptions,CharPtr db_name,CharPtr blast_type)4714 PrintDefLinesFromSeqAlignEx2(SeqAlignPtr seqalign, Int4 line_length, FILE *outfp, Uint4 options,
4715 		Int4 mode, Int2Ptr marks, Int4 number_of_descriptions,
4716 		CharPtr db_name, CharPtr blast_type)
4717 {
4718    return PrintDefLinesFromSeqAlignWithPath(seqalign, line_length, outfp,
4719              options, mode, marks, number_of_descriptions, db_name,
4720              blast_type, NULL);
4721 }
4722 
4723 NLM_EXTERN Boolean LIBCALL
PrintDefLinesFromSeqAlign(SeqAlignPtr seqalign,Int4 line_length,FILE * outfp,Uint4 options,Int4 mode,Int2Ptr marks)4724 PrintDefLinesFromSeqAlign(SeqAlignPtr seqalign, Int4 line_length, FILE *outfp, Uint4 options, Int4 mode, Int2Ptr marks)
4725 {
4726     Boolean	retval;
4727 
4728     retval = PrintDefLinesFromSeqAlignEx(seqalign, line_length, outfp, options, mode, marks, -1);
4729 
4730     return retval;
4731 }
4732 
4733 /*
4734 	Converts a number into a frame.
4735 */
4736 static CharPtr
NumToFrame(Int2 frame,CharPtr buffer)4737 NumToFrame(Int2 frame, CharPtr buffer)
4738 {
4739 	if (buffer)
4740 	{
4741 		if (frame > 0)
4742 		{
4743 			sprintf(buffer, "+%d", frame);
4744 		}
4745 		else
4746 		{
4747 			sprintf(buffer, "%d", frame);
4748 		}
4749 	}
4750 
4751 	return buffer;
4752 }
4753 
4754 /* This function transfer SeqAlignPtr into AlignStatOptionPtr */
4755 
FormatScoreFromSeqAlignEx(SeqAlignPtr sap,Uint4 option,FILE * fp,Int4Ptr PNTR matrix,Boolean follower,Boolean ooframe)4756 NLM_EXTERN Boolean FormatScoreFromSeqAlignEx(SeqAlignPtr sap, Uint4 option, FILE *fp, Int4Ptr PNTR matrix, Boolean follower, Boolean ooframe)
4757 {
4758     AlignStatOptionPtr asop;
4759     Int4 empty_space, line_len;
4760     AlignSum as;
4761     SeqAlignPtr sap_tmp;
4762 
4763     asop = (AlignStatOptionPtr) MemNew(sizeof(AlignStatOption));
4764     MemSet(&as, 0, sizeof(AlignSum));
4765 
4766     empty_space = 12; line_len = 60; /* TO BE DETERMINED !!!! */
4767 
4768     asop->indent_len = (Int2) empty_space;
4769     asop->line_len = (Int2) (line_len + empty_space);
4770     asop->html_hot_link_relative = FALSE;
4771 
4772 
4773     if (option & TXALIGN_NO_ENTREZ)
4774         asop->no_entrez = TRUE;
4775     else
4776         asop->no_entrez = FALSE;
4777 
4778     if (option & TXALIGN_NO_DUMPGNL)
4779         asop->no_dumpgnl = TRUE;
4780     else
4781         asop->no_dumpgnl = FALSE;
4782 
4783     if (option & TXALIGN_HTML) {
4784         asop->html_hot_link = TRUE;
4785         if (option & TXALIGN_HTML_RELATIVE)
4786             asop->html_hot_link_relative = TRUE;
4787     } else {
4788         asop->html_hot_link = FALSE;
4789     }
4790     if (option & TXALIGN_SHOW_GI)
4791         asop->show_gi = TRUE;
4792     else
4793         asop->show_gi = FALSE;
4794 
4795     asop->fp = fp;
4796     asop->buf = NULL;
4797     asop->segs = NULL;
4798     as.matrix = matrix;
4799 
4800     as.master_sip = TxGetQueryIdFromSeqAlign(sap);
4801     as.target_sip = TxGetSubjectIdFromSeqAlign(sap);
4802 
4803     if((asop->bsp = BioseqLockById(as.target_sip)) == NULL) {
4804         Char tmp[128];
4805         SeqIdWrite(as.target_sip, tmp, PRINTID_FASTA_LONG, sizeof(tmp));
4806         ErrPostEx(SEV_ERROR, 0, 0, "Failure to get Bioseq for %s\n", tmp);
4807         return FALSE;
4808     }
4809 
4810     as.is_aa = (asop->bsp->mol == Seq_mol_aa);
4811     as.ooframe = ooframe;
4812 
4813     asop->sp = NULL;
4814     if(sap->segtype == SAS_DISC) {
4815 
4816         Int4 last_m_to = 0, last_t_to = 0;
4817         Int4 m_adj = 0, t_adj = 0;
4818         for(sap_tmp = (SeqAlignPtr)sap->segs; sap_tmp != NULL;
4819             sap_tmp = sap_tmp->next) {
4820 
4821             /* We cannot find score this way .. :-) this fuction just
4822                calculates number of positives,identities etc. */
4823 
4824             find_score_in_align(sap_tmp, 1, &as);
4825 
4826             asop->gaps += as.gaps;
4827             asop->positive += as.positive;
4828             asop->identical += as.identical;
4829             asop->align_len += as.totlen;
4830 
4831             /* Adjustment for unaligned regions not counted in the
4832                function above */
4833 
4834             if(last_m_to != 0) {
4835                 m_adj = as.master_from - last_m_to - 1;
4836             }
4837 
4838             asop->align_len += m_adj;
4839             asop->gaps += m_adj;
4840 
4841             last_m_to = as.master_to;
4842         }
4843         asop->sp = sap->score;
4844     } else {
4845         asop->sp = find_score_in_align(sap, 1, &as);
4846         asop->gaps = as.gaps;
4847         asop->positive = as.positive;
4848         asop->identical = as.identical;
4849         asop->align_len = as.totlen;
4850 
4851 
4852         /* This information was added for links to
4853            specific alignment only, but may be used
4854            for something else */
4855 
4856         asop->master_from = as.master_from;
4857         asop->master_to = as.master_to;
4858         asop->target_from = as.target_from;
4859         asop->target_to = as.target_to;
4860     }
4861 
4862     asop->db_name = NULL;
4863 
4864     if (as.m_frame_set) {
4865         asop->m_frame = as.m_frame;
4866     } else {
4867         asop->m_frame = 255;
4868     }
4869 
4870     if (as.t_frame_set) {
4871         asop->t_frame = as.t_frame;
4872     } else {
4873         asop->t_frame = 255;
4874     }
4875 
4876     asop->m_strand = as.m_strand;
4877     asop->t_strand = as.t_strand;
4878 
4879     /*    if(!ooframe) {
4880           asop->m_frame = 255;
4881           asop->t_frame = 255;
4882           } else {
4883           asop->m_frame = as.m_frame;
4884           asop->t_frame = as.t_frame;
4885           } */
4886 
4887     /* asop->m_strand = Seq_strand_unknown;
4888        asop->t_strand = Seq_strand_unknown; */
4889 
4890     asop->follower = follower;
4891 
4892     init_buff_ex(255);
4893     FormatScoreFunc(asop);
4894     free_buff();
4895 
4896     BioseqUnlock(asop->bsp);
4897 
4898     MemFree(asop);
4899 
4900     return TRUE;
4901 }
4902 
4903 /* Functions to read specific information about taxonomy names from
4904    Bioseq created from Blast database source */
4905 
4906 /* This function transfer SeqAlignPtr into AlignStatOptionPtr */
4907 
FormatScoreFromSeqAlign(SeqAlignPtr sap,Uint4 option,FILE * fp,Int4Ptr PNTR matrix,Boolean follower)4908 NLM_EXTERN Boolean FormatScoreFromSeqAlign
4909 (SeqAlignPtr sap, Uint4 option, FILE *fp,
4910  Int4Ptr PNTR matrix, Boolean follower)
4911 {
4912     return FormatScoreFromSeqAlignEx(sap, option, fp, matrix, follower, FALSE);
4913 }
4914 
FSFPrintOneDefline(AlignStatOptionPtr asop,Boolean is_na,SeqIdPtr sip,CharPtr defline,Int4 taxid,SeqIdPtr firstSip)4915 static CharPtr FSFPrintOneDefline(AlignStatOptionPtr asop, Boolean is_na,
4916                                   SeqIdPtr sip, CharPtr defline, Int4 taxid, SeqIdPtr firstSip)
4917 {
4918     Char HTML_database[32], HTML_dopt[16], id_buffer[BUFFER_LENGTH+1];
4919     Char buffer[BUFFER_LENGTH+1];
4920     SeqIdPtr bestid;
4921     Boolean make_link = FALSE, found_next_one, found_gnl_id;
4922     DbtagPtr db_tag;
4923     ObjectIdPtr oip;
4924     CharPtr ptr;
4925     Int4 gi, seqid_len = 0;
4926     BioseqPtr bsp;
4927 
4928     /* Printing full label to the buffer */
4929      SeqIdWrite(sip, buffer, PRINTID_FASTA_LONG, BUFFER_LENGTH);
4930 
4931     if (asop->html_hot_link == TRUE)  {
4932         /* if (ISA_na(bsp->seq_data_type)) */
4933 
4934     	/* If a tool_url is set, then we use this rather than Entrez. */
4935     	if (asop->no_entrez == FALSE)
4936     	{
4937     		Char tool_url[128];
4938 
4939 		*tool_url = NULLB;
4940        		 GetAppParam("NCBI", asop->blast_type, "TOOL_URL", "", tool_url, sizeof(tool_url));
4941 		/* Do use Entrez if available if the tool is dumpgnl.cgi as it does not handle gi's. */
4942 		if (*tool_url != NULLB && StringStr(tool_url, "dumpgnl.cgi") == NULL)
4943 			asop->no_entrez = TRUE;
4944     	}
4945 
4946         if (is_na) {
4947             StringCpy(HTML_dopt, "GenBank");
4948             StringCpy(HTML_database, "Nucleotide");
4949         } else {
4950             StringCpy(HTML_dopt, "GenPept");
4951             StringCpy(HTML_database, "Protein");
4952         }
4953 
4954         bestid = SeqIdFindBest(sip, SEQID_GI);
4955         make_link = FALSE;
4956         gi = 0;
4957         if (bestid != NULL) {
4958             if (bestid->choice == SEQID_GI && asop->no_entrez == FALSE) {
4959                 gi = bestid->data.intvalue;
4960                 make_link = TRUE;
4961                 sprintf(id_buffer, "%ld", (long) gi);
4962             } else {
4963                 MuskSeqIdWrite(bestid, id_buffer, BUFFER_LENGTH, PRINTID_TEXTID_ACCESSION, FALSE, FALSE);
4964             }
4965 
4966             fprintf(asop->fp, "<a name = %s></a>", id_buffer);
4967             if (make_link) {
4968                 fprintf(asop->fp,
4969                         "<a href=\"%s?cmd=Retrieve&db=%s&list_uids=%08ld&dopt=%s\" %s>",
4970                         NEW_ENTREZ_HREF, HTML_database, (long) gi, HTML_dopt,
4971 			asop->txalign_options & TXALIGN_TARGET_IN_LINKS ? "TARGET=\"EntrezView\"" : "");
4972 
4973             } else if (asop->no_dumpgnl == FALSE || asop->no_entrez == TRUE) {
4974                 if (bestid->choice != SEQID_GENERAL && bestid->choice != SEQID_OTHER)
4975                     { /* HACK, HACK, use SEQID_OTHER for Greg's page, even though GI is present. */
4976                         /* bsp is already present. */
4977                         bestid = SeqIdFindBest(sip, SEQID_OTHER);
4978                     }
4979                 if (bestid->choice == SEQID_GENERAL) {
4980                     db_tag = (DbtagPtr) bestid->data.ptrvalue;
4981                     if(db_tag->db && StringCmp(db_tag->db, "THC") == 0) {
4982                         oip = db_tag->tag;
4983                         if(oip->id != 0) {
4984                             fprintf(asop->fp, "<a href=\"http://www.tigr.org/docs/tigr-scripts/hgi_scripts/thc_report.spl?est=THC%ld&report_type=n\">", (long) oip->id);
4985 
4986                         }
4987                     } else if (db_tag->db && StringICmp(db_tag->db, "TI") == 0) {
4988                        oip = db_tag->tag;
4989                        if(oip->id != 0) {
4990                           fprintf(asop->fp, "<a href=\"http://www.ncbi.nlm.nih.gov/Traces/trace.cgi?cmd=retrieve&dopt=fasta&val=%ld\">", (long) oip->id);
4991                        }
4992                     } else {
4993                         /** * links to incomplete genomes */
4994                         make_dumpgnl_links(sip, asop->blast_type, asop->segs, asop->db_name, is_na, asop->fp, buffer, FALSE);
4995                     }
4996                 } else {
4997                     make_dumpgnl_links(sip, asop->blast_type, asop->segs, asop->db_name, is_na, asop->fp, buffer, FALSE);
4998                 }
4999                 make_link = TRUE;
5000             }
5001         }
5002     }
5003 
5004     /* else {
5005        fprintf(asop->fp, ">");
5006        } */
5007 
5008     found_next_one = FALSE;
5009     if (asop->show_gi == FALSE) {
5010         if (StringNCmp(buffer, "gi|", 3) == 0) {
5011             ptr = &buffer[3];
5012             while (*ptr != NULLB && *ptr != ' ') {
5013                 /* Is there another ID beside the GI? */
5014                 if (*ptr == '|') {
5015                     ptr++;
5016                     found_next_one = TRUE;
5017                     break;
5018                 }
5019                 ptr++;
5020             }
5021         }
5022     }
5023     if (found_next_one == FALSE) /* If TRUE, then ptr set above. */
5024         ptr = buffer;
5025 
5026     /* Remove local ID's. */
5027     if (StringNCmp(ptr, "lcl|", 4) == 0) {
5028         ptr += 4;
5029     }
5030 
5031     found_gnl_id = TRUE;
5032     /* Check for an ID of type general from BLAST */
5033     if (StringNCmp(buffer, "gnl|BL_ORD_ID", 13) != 0) {
5034         fprintf(asop->fp, "%s", ptr);
5035         seqid_len = StringLen(ptr);
5036         found_gnl_id = FALSE;
5037     } else {
5038         make_link = FALSE;
5039     }
5040 
5041     if (asop->html_hot_link == TRUE && make_link == TRUE) {
5042         fprintf(asop->fp, "</a> ");
5043     } else if (found_gnl_id == FALSE) {
5044         fprintf(asop->fp, " ");
5045     }
5046 
5047     /*add link out*/
5048     if(asop->txalign_options&TXALIGN_SHOW_LINKOUT&&asop->txalign_options&TXALIGN_HTML){
5049       bsp=BioseqLockById(sip);
5050       if(bsp){
5051 	SeqIdPtr sipGi;
5052 	Char fastaLongIdBuf[BUFFER_LENGTH];
5053 	addLinkoutForBioseq(bsp, sip, firstSip, asop->fp);
5054 
5055 	sipGi=SeqIdFindBest(bsp->id, SEQID_GI);
5056 	if(sipGi&&bsp->length> LENGTH_TO_SHOW_DOWNLOAD&&ISA_na(asop->bsp->mol)){
5057 	  SeqIdWrite(bsp->id, fastaLongIdBuf, PRINTID_FASTA_LONG, BUFFER_LENGTH);
5058 	  make_dumpgnl_links(sipGi, asop->blast_type, asop->segs, asop->db_name, ISA_na(asop->bsp->mol), asop->fp, fastaLongIdBuf, TRUE);
5059 	  fprintf(asop->fp, "<img border=0 height=16 width=16 src=\"/blast/images/D.gif\" alt=\"Download subject sequence spanning the HSP\"></a>");
5060 	}
5061 	/*add one space before defline*/
5062 
5063 	fprintf(asop->fp, " ");
5064 
5065 	BioseqUnlock(bsp);
5066       }
5067     }
5068 #if 0
5069     if(taxid >=0 && asop->html_hot_link == TRUE && make_link == TRUE) {
5070         fprintf(asop->fp,
5071                 "<a href=\"http://www.ncbi.nlm.nih.gov/htbin-post"
5072                 "/Taxonomy/wgetorg?id=%d\">"
5073                 "<FONT color=\"red\">T</FONT></a> ", taxid);
5074     }
5075 #endif
5076 
5077     /* Subtract 10 off the lines length as the ID is not printed
5078        with ffprint functions. */
5079 
5080     ff_StartPrint(0, asop->indent_len,
5081                   (Int2)(asop->line_len+asop->indent_len-15), NULL);
5082     ff_AddString(defline);
5083     ff_EndPrint();
5084 
5085     return NULL;
5086 }
5087 
5088 
TX_PrintDeflinesWithAsn(BlastDefLinePtr PNTR bdsp,AlignStatOptionPtr asop)5089 static Boolean TX_PrintDeflinesWithAsn(BlastDefLinePtr PNTR bdsp,
5090                                        AlignStatOptionPtr asop)
5091 {
5092     Boolean  first = TRUE;
5093     BioseqPtr bsp;
5094     BlastDefLinePtr tbdsp;
5095     Int4 len, i;
5096     SeqIdPtr gilist;
5097     SeqIdPtr firstSip=NULL;
5098 
5099     if(bdsp == NULL || asop == NULL)
5100         return FALSE;
5101 
5102     bsp = asop->bsp;
5103 
5104     if((gilist = ScorePtrUseThisGi(asop->sp)) != NULL)
5105     {
5106         *bdsp = FilterAsn1DefLine(*bdsp, gilist);
5107         gilist = SeqIdSetFree(gilist);
5108     }
5109 
5110     for (tbdsp = *bdsp; tbdsp != NULL; tbdsp = tbdsp->next) {
5111 
5112         if(first) {
5113 	  SeqIdPtr bestid;
5114 	  bestid = SeqIdFindBest(tbdsp->seqid, SEQID_GI);
5115 	  firstSip=bestid;
5116 	  if(bestid->choice == SEQID_GI&&asop->html_hot_link&&(asop->txalign_options&TXALIGN_GET_SEQUENCE)){
5117 	    fprintf(asop->fp,  "<input type=\"checkbox\" name=\"getSeqGi\" value=\"%ld\" onClick=\"synchronizeCheck(this.value, 'getSeqAlignment%ld', 'getSeqGi', this.checked)\">", bestid->data.intvalue, query_number_glb);
5118 	  }
5119 
5120 	  fprintf(asop->fp, ">");
5121 	  first = FALSE;
5122         } else {
5123             fprintf(asop->fp, " ");
5124         }
5125 
5126         len = StringLen(tbdsp->title);
5127 
5128         /* Trimming tail white spaces if any */
5129         for(i = len; i > 0 && IS_WHITESP(tbdsp->title[i-1]); i++)
5130             tbdsp->title[i-1] = NULLB;
5131 
5132         FSFPrintOneDefline(asop, ISA_na(bsp->mol), tbdsp->seqid,
5133                            tbdsp->title, tbdsp->taxid, firstSip);
5134     }
5135     return TRUE;
5136 }
5137 #define TX_SEQID_BUF_SIZE 200
FormatScoreFunc(AlignStatOptionPtr asop)5138 NLM_EXTERN int LIBCALLBACK FormatScoreFunc(AlignStatOptionPtr asop)
5139 
5140 {
5141     BioseqPtr bsp;
5142     Boolean allocated, first;
5143     CharPtr defline, ptr, eval_buff_ptr, dline_buf, chptr, sptr;
5144     CharPtr new_defline;
5145     Char buf1[5], buf2[5];
5146     Char buffer[BUFFER_LENGTH+1], eval_buff[10], bit_score_buff[10];
5147     Char seqid_buf[TX_SEQID_BUF_SIZE+2];
5148     Char id_buffer[BUFFER_LENGTH+1];
5149     Nlm_FloatHi bit_score, evalue;
5150     Int4 percent_identical, percent_positive;
5151     Int4 number, score, gi, len, i;
5152     Int4 index; /* index for while loop over seqid. */
5153     ObjectIdPtr obid;
5154     SeqIdPtr gilist, sip, new_sip;
5155     ScorePtr	scrp, sp;
5156     Boolean splice_junction = FALSE;
5157     BlastDefLinePtr bdsp = NULL;
5158     CharPtr warning_msg = NULL;
5159     SeqIdPtr firstSip=NULL;
5160     Int2 comp_adjustment_method = eNoCompositionBasedStats;
5161 
5162 
5163     sp = asop->sp;
5164     bsp = asop->bsp;
5165 
5166     asn2ff_set_output(asop->fp, NULL);
5167 
5168     bit_score = 0.0;
5169     score = 0;
5170     evalue = 0.0;
5171     defline = NULL;
5172     *id_buffer = NULLB;
5173 
5174     if (bsp && asop->follower == FALSE) {
5175         /* Is the defline and sip allocated? */
5176 
5177         if((bdsp =  FDGetDeflineAsnFromBioseq(bsp)) != NULL) {
5178             TX_PrintDeflinesWithAsn(&bdsp, asop);
5179             bdsp = BlastDefLineSetFree(bdsp);
5180         } else {
5181             allocated = FALSE;
5182             gilist = ScorePtrUseThisGi(sp);
5183             if (gilist) {
5184                 FilterTheDefline(bsp, gilist, buffer, BUFFER_LENGTH, &(defline));
5185                 gilist = SeqIdSetFree(gilist);
5186                 sip = SeqIdParse(buffer);
5187                 allocated = TRUE;
5188             } else {
5189                 SeqIdWrite(bsp->id, buffer, PRINTID_FASTA_LONG, BUFFER_LENGTH);
5190                 sip = SeqIdSetDup(bsp->id);
5191                 defline = StringSave(BioseqGetTitle(bsp));
5192             }
5193 
5194             /* Here we print all defline one by one */
5195 
5196             dline_buf = defline;
5197             chptr = defline;
5198             new_sip = NULL;
5199             new_defline = NULL;
5200             first = TRUE;
5201             while (TRUE) {
5202                 if((chptr = StringChr(chptr, '>')) != NULL) {
5203 
5204                     /* If ">" character exists in the defline - we have to check,
5205                        that this is start of new SeqId string */
5206 
5207                     for (index=0, ptr = chptr+1, sptr = seqid_buf;
5208                          *ptr != ' ' && *ptr != NULLB && index < TX_SEQID_BUF_SIZE;
5209                          index++, ptr++, sptr++) {
5210                         *sptr = *ptr;
5211                     }
5212                     *sptr = NULLB;
5213 
5214                     if((new_sip = SeqIdParse(seqid_buf)) == NULL) {
5215                         chptr++;
5216                         continue;
5217                     }
5218 
5219                     *chptr = NULLB;
5220                     if(*ptr == ' ')
5221                         new_defline = ptr + 1;
5222                     else
5223                         new_defline = NULL;
5224                 } else {
5225                     new_sip = NULL;
5226                     new_defline = NULL;
5227                 }
5228 
5229                 if(sip != NULL) {
5230 
5231                     if(first) {
5232 		      SeqIdPtr bestid;
5233 		      bestid = SeqIdFindBest(bsp->id, SEQID_GI);
5234 		      firstSip=bestid;
5235 		      if(bestid->choice == SEQID_GI&&asop->html_hot_link&&(asop->txalign_options&TXALIGN_GET_SEQUENCE)){
5236 			fprintf(asop->fp,  "<input type=\"checkbox\" name=\"getSeqGi\" value=\"%ld\" onClick=\"synchronizeCheck(this.value, 'getSeqAlignment%ld', 'getSeqGi', this.checked)\">", bestid->data.intvalue, query_number_glb);
5237 		      }
5238 		      fprintf(asop->fp, ">");
5239 		      first = FALSE;
5240                     } else {
5241                         fprintf(asop->fp, " ");
5242                     }
5243                     len = StringLen(defline);
5244 
5245                     /* Trimming tail white spaces if any */
5246                     for(i = len; i > 0 && IS_WHITESP(defline[i-1]); i++)
5247                         defline[i-1] = NULLB;
5248 
5249                     FSFPrintOneDefline(asop, ISA_na(bsp->mol), sip, defline,
5250                                        -1, firstSip);
5251                     sip =SeqIdSetFree(sip);
5252                 }
5253 
5254                 if(new_sip != NULL && new_defline != NULL) {
5255                     chptr = defline = new_defline;
5256                     sip = new_sip;
5257                 } else {
5258                     break;
5259                 }
5260             }
5261 
5262             dline_buf = (CharPtr) MemFree(dline_buf);
5263         }
5264 
5265         fprintf(asop->fp, "          Length = %ld\n", (long) BioseqGetLen(bsp));
5266     }
5267 
5268     if (asop->no_entrez == TRUE &&
5269         asop->html_hot_link == TRUE && bsp != NULL) {
5270 
5271         /* For Gregs and Human Genome stuff we will add links to every
5272            HSP */
5273         SeqIdPtr bestid;
5274 
5275         gi = 0;
5276         bestid = SeqIdFindBest(bsp->id, SEQID_GI);
5277 
5278         if (bestid != NULL) {
5279             if (bestid->choice == SEQID_GI) {
5280                 gi = bestid->data.intvalue;
5281                 sprintf(id_buffer, "%ld", (long) gi);
5282             }
5283         }
5284     }
5285 
5286     number=1;
5287     for (scrp=sp; scrp; scrp = scrp->next) {
5288         obid = scrp->id;
5289         if(obid != NULL) {
5290             if (StringICmp(obid->str, "score") == 0) {
5291                 score = scrp->value.intvalue;
5292                 continue;
5293             }
5294             else if (StringICmp(obid->str, "e_value") == 0 || StringICmp(obid->str, "sum_e") == 0) {
5295                 evalue = scrp->value.realvalue;
5296                 continue;
5297             } else if (StringICmp(obid->str, "sum_n") == 0) {
5298                 number = scrp->value.intvalue;
5299                 continue;
5300             } else if (StringICmp(obid->str, "bit_score") == 0) {
5301                 bit_score = scrp->value.realvalue;
5302                 continue;
5303             } else if (StringICmp(obid->str, "splice_junction") == 0) {
5304                splice_junction = TRUE;
5305             } else if (StringICmp(obid->str, "comp_adjustment_method") == 0) {
5306                comp_adjustment_method = scrp->value.intvalue;
5307 	    }   else if (StringICmp(obid->str, "warning") == 0) {
5308                warning_msg = Malloc(256);
5309                sprintf(warning_msg,
5310                        "WARNING: HSPs with e-values below %.2g have been skipped\n",
5311                        scrp->value.realvalue);
5312             }
5313         } else {
5314             if(scrp->choice == 1) {
5315                 score = scrp->value.intvalue;
5316                 continue;
5317             } else if(scrp->choice == 2) {
5318                 bit_score = scrp->value.realvalue;
5319                 continue;
5320             }
5321         }
5322     }
5323 
5324     ff_StartPrint(0, 0, (Int2)(asop->line_len+asop->indent_len), NULL);
5325 
5326     if (warning_msg) {
5327        ff_AddString(warning_msg);
5328        NewContLine();
5329     }
5330     ff_EndPrint();
5331     eval_buff_ptr = eval_buff;
5332 
5333     ScoreAndEvalueToBuffers(bit_score, evalue, bit_score_buff,
5334                             &eval_buff_ptr, TX_KNOCK_OFF_ALLOWED);
5335 
5336     if(asop->html_hot_link == TRUE && *id_buffer != NULLB) {
5337 
5338         Int4 m_from, m_to, t_from, t_to;
5339 
5340         /* These are links for Human Genome viewer for every single
5341            alignment */
5342 
5343         if(asop->m_strand == Seq_strand_minus) {
5344             m_from = asop->master_to+1;
5345             m_to = asop->master_from+1;
5346         } else {
5347             m_from = asop->master_from+1;
5348             m_to = asop->master_to+1;
5349         }
5350 
5351         if(asop->t_strand == Seq_strand_minus) {
5352             t_from = asop->target_to+1;
5353             t_to = asop->target_from+1;
5354         } else {
5355             t_from = asop->target_from+1;
5356             t_to = asop->target_to+1;
5357         }
5358 
5359         sprintf(buffer, " <a name = %s_%ld></a>"
5360                 "<a name = %s_%ld_%d_%d_%d_%d>"
5361                 "</a>Score = %s bits (%ld), ",
5362                 id_buffer, (long) score, id_buffer, (long) score,
5363                 m_from, m_to, t_from, t_to,
5364                 bit_score_buff, (long) score);
5365     } else {
5366         sprintf(buffer, " Score = %s bits (%ld), ",
5367                 bit_score_buff, (long) score);
5368     }
5369 
5370     fprintf(asop->fp, "%s", buffer);
5371 
5372     if (number == 1)
5373         sprintf(buffer, "Expect = %s", eval_buff_ptr);
5374     else if (!splice_junction)
5375         sprintf(buffer, "Expect(%ld) = %s", (long) number, eval_buff_ptr);
5376     else
5377         sprintf(buffer, "Expect(%ld+) = %s", (long) number, eval_buff_ptr);
5378     fprintf(asop->fp, "%s", buffer);
5379     if (eNoCompositionBasedStats != comp_adjustment_method) {
5380       if (eCompositionBasedStats == comp_adjustment_method)
5381 	sprintf(buffer,",   Method: Composition-based stats.");
5382       if (eCompositionMatrixAdjust == comp_adjustment_method)
5383 	sprintf(buffer,",   Method: Compositional matrix adjust.");
5384       fprintf(asop->fp, "%s", buffer);
5385     }
5386     fprintf(asop->fp, "\n", buffer);
5387     ff_StartPrint(0, 0, (Int2)(asop->line_len+asop->indent_len), NULL);
5388     if (asop->align_len > 0) {
5389         asop->positive += asop->identical;
5390         percent_identical = (Int4) ((100*asop->identical + 0.5)/ (asop->align_len));
5391         percent_positive = (Int4) ((100*asop->positive + 0.5)/ (asop->align_len));
5392         /* Don't show positives for blastn, which has these set to 255. */
5393         if (asop->m_frame == 255 && asop->t_frame == 255 &&
5394             asop->m_strand != Seq_strand_unknown && asop->t_strand != Seq_strand_unknown)
5395             sprintf(buffer, " Identities = %ld/%ld (%ld%%)", (long) asop->identical, (long) asop->align_len, (long) percent_identical);
5396         else
5397             sprintf(buffer, " Identities = %ld/%ld (%ld%%), Positives = %ld/%ld (%ld%%)", (long) asop->identical, (long) asop->align_len, (long) percent_identical, (long) (asop->positive), (long) asop->align_len, (long) (percent_positive));
5398         ff_AddString(buffer);
5399         if (asop->gaps > 0) {
5400             sprintf(buffer, ", Gaps = %ld/%ld (%ld%%)", (long) asop->gaps,
5401                     (long) asop->align_len,
5402                     (long) (100*asop->gaps)/(asop->align_len));
5403             ff_AddString(buffer);
5404         }
5405         NewContLine();
5406 
5407         /* for testing. */
5408         if (asop->m_frame != 255 || asop->t_frame != 255) {
5409             if (asop->m_frame != 255 && asop->t_frame != 255) {
5410                 sprintf(buffer, " Frame = %s / %s", NumToFrame(asop->m_frame, buf1), NumToFrame(asop->t_frame, buf2));
5411             } else if (asop->m_frame != 255) {
5412                 sprintf(buffer, " Frame = %s", NumToFrame(asop->m_frame, buf2));
5413             }
5414             else if (asop->t_frame != 255) {
5415                 sprintf(buffer, " Frame = %s", NumToFrame(asop->t_frame, buf2));
5416             }
5417             ff_AddString(buffer);
5418             NewContLine();
5419         } else if (asop->m_strand != Seq_strand_unknown && asop->t_strand != Seq_strand_unknown) {
5420             if (asop->m_strand != asop->t_strand)
5421                 sprintf(buffer, " Strand = Plus / Minus");
5422             else
5423                 sprintf(buffer, " Strand = Plus / Plus");
5424             ff_AddString(buffer);
5425 
5426 
5427             NewContLine();
5428         }
5429         /* for testing. */
5430 
5431     }
5432     ff_EndPrint();
5433 
5434     return 0;
5435 }
5436 
5437 /*
5438 *
5439 *	determine the option for alignment based on the named tx_option
5440 *
5441 */
GetTxAlignOptionValue(Uint1 tx_option,BoolPtr hide_feature,BoolPtr print_score,BoolPtr split_display)5442 NLM_EXTERN Uint4 GetTxAlignOptionValue (Uint1 tx_option, BoolPtr hide_feature,
5443 	BoolPtr print_score, BoolPtr split_display)
5444 {
5445 	Uint4 option;
5446 
5447 	option = 0;
5448 	*print_score = FALSE;
5449 	*split_display = FALSE;
5450 	switch (tx_option)
5451 	{
5452 		/*multiple pairwise alignment */
5453 		case TEXT_MP:
5454 		case TEXT_MP_MISMATCH:
5455 			option |= TXALIGN_MASTER;
5456 			option |= TXALIGN_SHOW_RULER;
5457 			option |= TXALIGN_SHOW_STRAND;
5458 			if(tx_option == TEXT_MP_MISMATCH)
5459 				option |= TXALIGN_MISMATCH;
5460 			break;
5461 		/*FLAT multiple pairwise alignment*/
5462 		case TEXT_MPFLAT:
5463 		case TEXT_MPFLAT_MISMATCH:
5464 			option |= TXALIGN_MASTER;
5465 			option |= TXALIGN_FLAT_INS;
5466 			option |= TXALIGN_END_NUM;
5467 			option |= TXALIGN_COMPRESS;
5468 			if(tx_option == TEXT_MPFLAT_MISMATCH)
5469 				option |= TXALIGN_MISMATCH;
5470 			*split_display = TRUE;
5471 			break;
5472 		case TEXT_BLAST:
5473 			option |= TXALIGN_END_NUM;
5474 			option |= TXALIGN_BLASTX_SPECIAL;
5475 			option |= TXALIGN_MATRIX_VAL;
5476 			option |= TXALIGN_SHOW_QS;
5477 			*hide_feature = TRUE;
5478 			*print_score = TRUE;
5479 			*split_display = TRUE;
5480 			break;
5481 		default:
5482 			option |= TXALIGN_MASTER;
5483 			option |= TXALIGN_MISMATCH;
5484 			option |= TXALIGN_SHOW_RULER;
5485 			option |= TXALIGN_SHOW_STRAND;
5486 			break;
5487 	}
5488 	if(*hide_feature)
5489 		option |= TXALIGN_COMPRESS;
5490 	return option;
5491 }
5492 
5493 /** The following function assumes that neither of the locations in
5494  * the first link in StdSeg is empty.
5495  * @param sseg Alignment segments [in]
5496  * @param dna_strand The strand of the nucleotide sequence [out]
5497  * @return TRUE for tblastn, FALSE for blastx.
5498  */
OOFGetDNAStrand(StdSegPtr sseg,Int4Ptr dna_strand)5499 static Boolean OOFGetDNAStrand(StdSegPtr sseg, Int4Ptr dna_strand)
5500 {
5501     Uint1 strand;
5502     Boolean reverse;
5503 
5504     if ((strand = SeqLocStrand(sseg->loc)) != Seq_strand_unknown) {
5505        *dna_strand = (Int4) strand;
5506        reverse = FALSE;
5507     } else {
5508        *dna_strand = (Int4) SeqLocStrand(sseg->loc->next);
5509        reverse = TRUE;
5510     }
5511     return reverse;
5512 }
5513 
SetDNALineEnd(Int4 dna_index,Int4 dna_strand)5514 static Int4 SetDNALineEnd(Int4 dna_index, Int4 dna_strand)
5515 {
5516     Int4 dna_line_end;
5517 
5518     if(dna_strand != Seq_strand_minus)
5519         dna_line_end = dna_index == 0 ? 0 : dna_index -3;
5520     else
5521         dna_line_end = dna_index < 1 ? dna_index : dna_index -1;
5522 
5523     return dna_line_end;
5524 }
5525 
GetDigitsInINT(Int4 number)5526 static Int4 GetDigitsInINT(Int4 number)
5527 {
5528     Int4 count;
5529 
5530     for(count = 1; number > 9; count++)
5531         number = number/10;
5532 
5533     return count;
5534 }
5535 
GetMaxFROMDigits(StdSegPtr sseg)5536 static Int4 GetMaxFROMDigits(StdSegPtr sseg)
5537 {
5538     StdSegPtr ssp, ssp_last;
5539     Int4 master_from, target_from, master_to, target_to;
5540     Int4 max_number, count;
5541 
5542     master_from = SeqLocStart(sseg->loc);
5543     target_from = SeqLocStart(sseg->loc->next);
5544 
5545     for(ssp_last = ssp = sseg; ssp != NULL; ssp = ssp->next)
5546         ssp_last = ssp;
5547 
5548     master_to = SeqLocStop(ssp_last->loc);
5549     target_to = SeqLocStop(ssp_last->loc->next);
5550 
5551     max_number = MAX(MAX(master_from, master_to),
5552                      MAX(target_from, target_to));
5553 
5554     count = GetDigitsInINT(max_number);
5555 
5556     return count;
5557 }
5558 
5559 #define WIDTH 60
OOFShowSingleAlignment(SeqAlignPtr sap,ValNodePtr mask,Int4Ptr PNTR matrix,FILE * fp)5560 static Boolean OOFShowSingleAlignment(SeqAlignPtr sap, ValNodePtr mask,
5561                                       Int4Ptr PNTR matrix, FILE *fp)
5562 {
5563     StdSegPtr sseg;
5564     SeqIntPtr seq_int1, seq_int2;
5565     SeqLocPtr slp1, slp2;
5566     SeqIdPtr sip1, sip2;
5567     SeqFeatPtr fake_cds;
5568     ByteStorePtr b_store = NULL;
5569     Char line1[128], line2[128], line3[128];
5570     Int4 line_index, length_dna, length_pro, length;
5571     Int4 dna_index, pro_index, dna_line_start, pro_line_start;
5572     Int4 dna_line_end, pro_line_end, dna_to, dna_from;
5573     BioseqPtr bsp;
5574     SeqPortPtr spp;
5575     Int4 i, lines, k, shift_info = 0;
5576     Char  c1, c2, c3;
5577     Int4 dna_strand, max_digits, num_pad;
5578     Boolean reverse = FALSE;
5579 
5580     if(sap == NULL || sap->segtype != 3) /* Should be StdSeg here! */
5581         return FALSE;
5582 
5583     line_index = 0;
5584     lines = 0;
5585     dna_index =0;
5586     pro_index = 0;
5587     pro_line_end = 0;
5588     dna_line_end = 0;
5589 
5590     reverse = OOFGetDNAStrand((StdSegPtr) sap->segs, &dna_strand);
5591 
5592     /* Needed for printing nice alignment with normal spacing */
5593     max_digits = GetMaxFROMDigits((StdSegPtr) sap->segs);
5594 
5595     for(sseg = (StdSegPtr) sap->segs; sseg != NULL; sseg= sseg->next) {
5596 
5597         /* Now starting new alignment region */
5598 
5599         length_dna = 0;
5600         length_pro = 0;
5601         b_store = NULL;
5602 
5603         if (reverse) {
5604            slp2 = sseg->loc;
5605            slp1 = sseg->loc->next;
5606            sip2 = sseg->ids;       /* Protein */
5607            sip1 = sseg->ids->next; /* DNA */
5608         } else {
5609            slp1 = sseg->loc;
5610            slp2 = sseg->loc->next;
5611            sip1 = sseg->ids;       /* DNA */
5612            sip2 = sseg->ids->next; /* Protein */
5613         }
5614 
5615         if(slp1->choice == SEQLOC_INT)
5616             seq_int1 = (SeqIntPtr) slp1->data.ptrvalue;
5617         else if (slp1->choice == SEQLOC_EMPTY)
5618             seq_int1 = NULL;
5619         else
5620             return FALSE;       /* Invalid SeqLoc */
5621 
5622 
5623         if(slp2->choice == SEQLOC_INT)
5624             seq_int2 = (SeqIntPtr) slp2->data.ptrvalue;
5625         else if (slp2->choice == SEQLOC_EMPTY)
5626             seq_int2 = NULL;
5627         else
5628             return FALSE;       /* Invalid SeqLoc */
5629 
5630         /* Ignore double gap */
5631         if(seq_int1 == NULL && seq_int2 == NULL)
5632             continue;
5633 
5634         /* printf("shift_info = %d\n", shift_info); */
5635 
5636         if(shift_info%3)
5637             dna_index -= (3 - shift_info); /* adjustment for frameshift */
5638 
5639         switch(shift_info) {
5640         case 1:
5641             line1[line_index] = '\\';
5642             line2[line_index] = ' ';
5643             line3[line_index] = ' ';
5644             line_index++;
5645 
5646             if(line_index == WIDTH) {
5647                 dna_line_end = SetDNALineEnd(dna_index, dna_strand);
5648                 pro_line_end = pro_index;
5649             }
5650         case 2:
5651             line1[line_index] = '\\';
5652             line2[line_index] = ' ';
5653             line3[line_index] = ' ';
5654             line_index++;
5655 
5656             if(line_index == WIDTH) {
5657                 dna_line_end = SetDNALineEnd(dna_index, dna_strand);
5658                 pro_line_end = pro_index;
5659             }
5660            break;
5661         case 5:
5662             line1[line_index] = '/';
5663             line2[line_index] = ' ';
5664             line3[line_index] = ' ';
5665             line_index++;
5666 
5667             if(line_index == WIDTH) {
5668                 dna_line_end = SetDNALineEnd(dna_index, dna_strand);
5669                 pro_line_end = pro_index;
5670             }
5671 
5672         case 4:
5673             line1[line_index] = '/';
5674             line2[line_index] = ' ';
5675             line3[line_index] = ' ';
5676             line_index++;
5677 
5678             if(line_index == WIDTH) {
5679                 dna_line_end = SetDNALineEnd(dna_index, dna_strand);
5680                 pro_line_end = pro_index;
5681             }
5682 
5683             break;
5684         case 0:
5685         default:
5686             break;
5687         }
5688 
5689         /* Looking if any frame shift is followed next */
5690         if(seq_int1 != NULL && seq_int2 != NULL) {
5691             shift_info = (seq_int1->to - seq_int1->from + 1) -
5692                 (seq_int2->to - seq_int2->from)*3;
5693         } else if(seq_int1 != NULL) {
5694             shift_info = (seq_int1->to - seq_int1->from + 1)%3 + 3;
5695         } else {
5696             shift_info = 0;
5697         }
5698 
5699         if(seq_int1 != NULL) {
5700 
5701             if(dna_strand != Seq_strand_minus)
5702                 dna_index = seq_int1->from;
5703             else
5704                 dna_index = seq_int1->to;
5705 
5706             length_dna = (seq_int1->to - seq_int1->from + 1)/3;
5707         }
5708 
5709         if(seq_int2 != NULL) {
5710             pro_index = seq_int2->from;
5711             length_pro = seq_int2->to - seq_int2->from + 1;
5712         }
5713 
5714         if(line_index == 0) {
5715             dna_line_start = dna_index + 1;
5716             pro_line_start = pro_index + 1;
5717         }
5718 
5719         if (dna_line_start == 0)
5720             dna_line_start = dna_index + 1;
5721 
5722         if(pro_line_start == 0)
5723             pro_line_start = pro_index + 1;
5724 
5725         if(seq_int1 != NULL) {
5726 
5727             /* if(length_dna == 0) insertion
5728                continue; */
5729 
5730             /* Byte store for DNA */
5731             bsp = BioseqLockById(sip1);
5732 
5733 
5734             dna_from = seq_int1->from;
5735             dna_to = seq_int1->to;
5736 
5737             if(0 < shift_info && shift_info < 3) {
5738                 if(dna_strand != Seq_strand_minus)
5739                     dna_to = seq_int1->to + 3 - shift_info;
5740                 else
5741                     dna_from = seq_int1->from - 3 + shift_info;
5742             }
5743 
5744             if(dna_from >= dna_to) {
5745                 BioseqUnlock(bsp);
5746                 continue;
5747             }
5748 
5749             fake_cds = make_fake_cds(bsp, dna_from, dna_to,
5750                                      seq_int1->strand);
5751             BioseqUnlock(bsp);
5752 
5753             b_store = ProteinFromCdRegionEx(fake_cds, TRUE, FALSE);
5754             SeqFeatFree(fake_cds);
5755 
5756             if(b_store == NULL) {
5757                 return FALSE;
5758             }
5759 
5760             BSSeek(b_store, 0, SEEK_SET);
5761 
5762             /* length_dna = BSLen(b_store); */
5763         }
5764 
5765         if(seq_int2 != NULL) {
5766             /* Seq port for protein */
5767             bsp = BioseqLockById(sip2);
5768             spp = SeqPortNew(bsp, seq_int2->from,
5769                              seq_int2->to, 0, Seq_code_ncbieaa);
5770             BioseqUnlock(bsp);
5771         } else {
5772             spp = NULL;
5773         }
5774 
5775         if(length_dna == 0) length_dna = length_pro;
5776         if(length_pro == 0) length_pro = length_dna;
5777 
5778         length = MAX(length_pro, length_dna);
5779         /* length = MIN(length_pro, length_dna); */
5780 
5781         /* printf("length = %d\n", length); */
5782         for(i = 0; i < length; i++) {
5783 
5784             if(seq_int1 != NULL) {
5785 
5786                 /* This line should be checked for correctness */
5787                 if((line1[line_index] = BSGetByte(b_store)) == (Char)EOF)
5788                     line1[line_index] = '?';
5789 
5790                 if(dna_strand != Seq_strand_minus)
5791                     dna_index += 3;
5792                 else
5793                     dna_index -= 3;
5794 
5795             } else {
5796                 line1[line_index] = '-';
5797             }
5798 
5799             if(seq_int2 != NULL) {
5800                 line2[line_index] = SeqPortGetResidue(spp);
5801                 pro_index++;
5802             } else {
5803                 line2[line_index] = '-';
5804             }
5805 
5806             if(line1[line_index] == line2[line_index])
5807                 line3[line_index] = line1[line_index];
5808             else if(matrix[line1[line_index]][line2[line_index]] > 0)
5809                 line3[line_index] = '+';
5810             else
5811                 line3[line_index] = ' ';
5812 
5813             line_index++;
5814 
5815             if(line_index == WIDTH) {
5816                 dna_line_end = SetDNALineEnd(dna_index, dna_strand);
5817                 pro_line_end = pro_index;
5818             }
5819 
5820             if(line_index > WIDTH) { /* Printout */
5821 
5822                 line1[line_index] = line2[line_index] = line3[line_index] = '\0';
5823 #ifdef SHOW_RULER
5824                 fprintf(fp, "%5d",
5825  WIDTH*lines++);
5826 
5827                 for (k = 10; k <= WIDTH; k+=10)
5828                     fprintf(fp, "    .    :");
5829                 if (k-5 < WIDTH) fprintf(fp, "    .");
5830 #endif
5831 
5832                 c1 = line1[WIDTH]; c2 = line2[WIDTH]; c3 = line3[WIDTH];
5833                 line1[WIDTH] = line2[WIDTH] = line3[WIDTH] = '\0';
5834 
5835                 /* ------- Printout of the alignment ------------- */
5836 
5837                 if (reverse) {
5838                    fprintf(fp, "Query: %d", pro_line_start);
5839                    num_pad =
5840                       max_digits - GetDigitsInINT(pro_line_start) + 1;
5841 
5842                    for(k=0; k < num_pad; k++)
5843                       fprintf(fp, " ");
5844 
5845                    fprintf(fp, "%s %d\n", line2, pro_line_end);
5846 
5847                    num_pad = 8 + max_digits;
5848 
5849                    for(k=0; k < num_pad; k++)
5850                       fprintf(fp, " ");
5851 
5852                    fprintf(fp, "%s\nSbjct: %d", line3, dna_line_start);
5853 
5854                    num_pad =
5855                       max_digits - GetDigitsInINT(dna_line_start) + 1;
5856 
5857                    for(k=0; k < num_pad; k++)
5858                       fprintf(fp, " ");
5859 
5860                    fprintf(fp, "%s %d\n\n", line1, dna_line_end+3);
5861 
5862                 } else {
5863                    fprintf(fp, "Query: %d", dna_line_start);
5864                    num_pad =
5865                       max_digits - GetDigitsInINT(dna_line_start) + 1;
5866 
5867                    for(k=0; k < num_pad; k++)
5868                       fprintf(fp, " ");
5869 
5870                    fprintf(fp, "%s %d\n", line1, dna_line_end+3);
5871 
5872                    num_pad = 8 + max_digits;
5873 
5874                    for(k=0; k < num_pad; k++)
5875                       fprintf(fp, " ");
5876 
5877                    fprintf(fp, "%s\nSbjct: %d", line3, pro_line_start);
5878 
5879                    num_pad =
5880                       max_digits - GetDigitsInINT(pro_line_start) + 1;
5881 
5882                    for(k=0; k < num_pad; k++)
5883                       fprintf(fp, " ");
5884 
5885                    fprintf(fp, "%s %d\n\n", line2, pro_line_end);
5886                 }
5887                 /* --------------------------------------------------- */
5888 
5889                 if(dna_line_end != 0) {
5890 
5891                     if(dna_strand != Seq_strand_minus)
5892                         dna_line_start = dna_line_end+4; /*takes 3 bases*/
5893                     else
5894                         dna_line_start = dna_line_end+2; /*takes 3 bases*/
5895                 }
5896                 if(pro_line_end != 0)
5897                     pro_line_start = pro_line_end+1;
5898 
5899                 line1[WIDTH] = c1; line2[WIDTH] = c2; line3[WIDTH] = c3;
5900                 strcpy(line1, &line1[WIDTH]);
5901                 strcpy(line2, &line2[WIDTH]);
5902                 strcpy(line3, &line3[WIDTH]);
5903                 line_index = line_index - WIDTH;
5904             }
5905         }
5906 
5907         SeqPortFree(spp);       /* Protein SeqPort */
5908         BSFree(b_store);        /* DNA Byte store  */
5909     }
5910 
5911     /* Printing out remaining tail ... if any */
5912     line1[line_index] = line2[line_index] = line3[line_index] = '\0';
5913 
5914 #ifdef SHOW_RULER
5915     fprintf(fp, "%5d", WIDTH*lines);
5916 
5917     for (k = 10; k < line_index; k+=10)
5918         fprintf(fp, "    .    :");
5919 
5920     if (k-5 < line_index) fprintf(fp, "    .");
5921 #endif
5922 
5923     dna_line_end = SetDNALineEnd(dna_index, dna_strand);
5924     pro_line_end = pro_index;
5925 
5926 
5927     /* ------- Printout of the alignment remainder ------- */
5928     if (reverse) {
5929        fprintf(fp, "Query: %d", pro_line_start);
5930 
5931        num_pad = max_digits - GetDigitsInINT(pro_line_start) + 1;
5932 
5933        for(k=0; k < num_pad; k++)
5934           fprintf(fp, " ");
5935 
5936        fprintf(fp, "%s %d\n", line2, pro_line_end);
5937 
5938        num_pad = 8 + max_digits;
5939 
5940        for(k=0; k < num_pad; k++)
5941           fprintf(fp, " ");
5942 
5943        fprintf(fp, "%s\nSbjct: %d", line3, dna_line_start);
5944 
5945        num_pad = max_digits - GetDigitsInINT(dna_line_start) + 1;
5946 
5947        for(k=0; k < num_pad; k++)
5948           fprintf(fp, " ");
5949 
5950        fprintf(fp, "%s %d\n\n\n", line1, dna_line_end+3);
5951     } else {
5952        fprintf(fp, "Query: %d", dna_line_start);
5953 
5954        num_pad = max_digits - GetDigitsInINT(dna_line_start) + 1;
5955 
5956        for(k=0; k < num_pad; k++)
5957           fprintf(fp, " ");
5958 
5959        fprintf(fp, "%s %d\n", line1, dna_line_end+3);
5960 
5961        num_pad = 8 + max_digits;
5962 
5963        for(k=0; k < num_pad; k++)
5964           fprintf(fp, " ");
5965 
5966        fprintf(fp, "%s\nSbjct: %d", line3, pro_line_start);
5967 
5968        num_pad = max_digits - GetDigitsInINT(pro_line_start) + 1;
5969 
5970        for(k=0; k < num_pad; k++)
5971           fprintf(fp, " ");
5972 
5973        fprintf(fp, "%s %d\n\n\n", line2, pro_line_end);
5974     }
5975     /* --------------------------------------------------- */
5976 
5977     /*    fprintf(fp, "\nQuery: %-5d %s %-5d\n             "
5978           "%s\nSbjct: %-5d %s %-5d\n\n",
5979           dna_line_start, line1, dna_line_end+3, line3,
5980           pro_line_start, line2, pro_line_end); */
5981 
5982     return TRUE;
5983 }
5984 
5985 /*******************************************************************************
5986 
5987   Function : OOFShowBlastAlignment();
5988 
5989   Purpose : function to display a BLAST output with Out-of-Frame
5990             information
5991 
5992   Parameters : 	sap; seqalign
5993                 mask; list of masked regions in the query
5994                 fp; output file;
5995                 tx_option; some display options
5996 
5997   Return value : FALSE if failure
5998 
5999 *******************************************************************************/
OOFShowBlastAlignment(SeqAlignPtr sap,ValNodePtr mask,FILE * fp,Uint4 tx_option,Int4Ptr PNTR matrix)6000 NLM_EXTERN Boolean OOFShowBlastAlignment(SeqAlignPtr sap, ValNodePtr mask,
6001                                          FILE *fp, Uint4 tx_option,
6002                                          Int4Ptr PNTR matrix)
6003 {
6004     SeqAlignPtr     sap4;
6005     SeqIdPtr        new_id = NULL, old_id = NULL;
6006     Uint4           i;
6007     Boolean         bRet, follower= FALSE, matrix_loaded = FALSE;
6008 
6009     if(sap == NULL || fp == NULL)
6010         return FALSE;
6011 
6012     bRet = TRUE;
6013 
6014     /* get the matrix */
6015 
6016     if(matrix == NULL) {
6017         if((matrix = load_default_matrix()) == NULL)
6018             return FALSE;
6019         matrix_loaded = TRUE;
6020     }
6021 
6022     for(sap4 = sap; sap4 != NULL; sap4 = sap4->next) {
6023 
6024         /* Attempt to print score for the alignment */
6025         new_id = TxGetSubjectIdFromSeqAlign(sap4);
6026         if(old_id != NULL) {
6027             if(SeqIdMatch(new_id, old_id))
6028                 follower = TRUE;
6029         }
6030 
6031         old_id = new_id;
6032         if(!FormatScoreFromSeqAlignEx(sap4, tx_option, fp, matrix,
6033                                       follower, TRUE)){
6034             bRet=FALSE;
6035             break;
6036         }
6037 
6038         follower = FALSE;
6039 
6040         /*display a SeqAlign*/
6041         if (!OOFShowSingleAlignment(sap4, mask, matrix, fp)) {
6042             bRet=FALSE;
6043             break;
6044         }
6045     }
6046 
6047     if (matrix_loaded){
6048         for(i = 0; i<TX_MATRIX_SIZE; ++i)
6049             MemFree(matrix[i]);
6050         MemFree(matrix);
6051     }
6052 
6053     return(bRet);
6054 
6055 }
6056 
OOFDisplayTraceBack1(Int4Ptr a,CharPtr dna,CharPtr pro,Int4 ld,Int4 lp,Int4 q_start,Int4 p_start)6057 NLM_EXTERN void OOFDisplayTraceBack1(Int4Ptr a, CharPtr dna,
6058                                      CharPtr pro, Int4 ld, Int4 lp,
6059                                      Int4 q_start, Int4 p_start)
6060 {
6061     int len = 0, i, j, x, y, lines, k;
6062     static char line1[100], line2[100], line3[100],
6063         tmp[10] = "         ", *st;
6064     char *dna1, c1, c2, c3;
6065 
6066     dna1 = Malloc(ld+2);
6067     MemCpy(dna1+1, dna, ld);
6068     dna1[0] = ' '; dna1[1] = ' ';
6069 
6070     line1[0] = line2[0] = line3[0] = '\0'; x= q_start; y = p_start;
6071     printf("dna=%d pro=%d\n", y, x);
6072 
6073     for (len = 0, j = 0, lines = 0; x < lp && y < ld; j++) {
6074         i = a[j];
6075         switch(i) {
6076         case 0:
6077             line1[len] = '-';
6078             line3[len] = ' ';
6079             line2[len++] = pro[x++];
6080             break;
6081         case 1:
6082         case 5:
6083             if (i == 1) line1[len]  = '\\';
6084             else line1[len] = '/';
6085             line2[len] = line3[len] = ' ';
6086             len++;
6087         case 2:
6088         case 4:
6089             if (i < 3) line1[len]  = '\\';
6090             else line1[len] = '/';
6091             line2[len] = line3[len] = ' ';
6092             len++;
6093         case 3:
6094             line1[len] = dna1[y+i-2]; y+= i;
6095             line2[len] = pro[x++];
6096             if (line1[len] == line2[len]) line3[len++] = '|';
6097             else line3[len++] = ' ';
6098             break;
6099         case 6:
6100             line1[len] = dna1[y+1]; y+= 3;
6101             line2[len] = '-';
6102             line3[len++] = ' ';
6103         }
6104         if (len >= WIDTH) {
6105             line1[len] = line2[len] = line3[len] = '\0';
6106             printf("\n%5d", WIDTH*lines++);
6107             for (k = 10; k <= WIDTH; k+=10)
6108                 printf("    .    :");
6109             if (k-5 < WIDTH) printf("    .");
6110             c1 = line1[WIDTH]; c2 = line2[WIDTH]; c3 = line3[WIDTH];
6111             line1[WIDTH] = line2[WIDTH] = line3[WIDTH] = '\0';
6112             printf("\n     %s\n     %s\n     %s\n", line1, line3, line2);
6113             line1[WIDTH] = c1; line2[WIDTH] = c2; line3[WIDTH] = c3;
6114             strcpy(line1, &line1[WIDTH]);
6115             strcpy(line2, &line2[WIDTH]);
6116             strcpy(line3, &line3[WIDTH]);
6117             len = len - WIDTH;
6118         }
6119     }
6120     printf("\n%5d", WIDTH*lines);
6121     line1[len] = line2[len] = line3[len] = '\0';
6122     for (k = 10; k < len; k+=10)
6123         printf("    .    :");
6124     if (k-5 < len) printf("    .");
6125     printf("\n     %s\n     %s\n     %s\n", line1, line3, line2);
6126 
6127     MemFree(dna1);
6128 
6129     return;
6130 }
OOFDisplayTraceBack2(Int4Ptr a,CharPtr dna,CharPtr pro,Int4 ld,Int4 lp,Int4 q_start,Int4 p_start)6131 NLM_EXTERN void OOFDisplayTraceBack2(Int4Ptr a, CharPtr dna, CharPtr pro,
6132                                      Int4 ld, Int4 lp,
6133                                      Int4 q_start, Int4 p_start)
6134 {
6135     int len = 0, i, j, x, y, lines, k;
6136     static char line1[100], line2[100], line3[100],
6137         tmp[10] = "         ", *st;
6138     char *dna1, c1, c2, c3;
6139 
6140     dna1 = Malloc(ld+2);
6141     printf("%d %d\n", q_start, p_start);  /* Why does this go to stdout?? */
6142 
6143     MemCpy(dna1+1, dna, ld);
6144     dna1[0] = ' '; dna1[1] = ' ';
6145 
6146     line1[0] = line2[0] = line3[0] = '\0';
6147     x= q_start;
6148     y = p_start;
6149 
6150     for (len = 0, j = 0, lines = 0; x < lp && y < ld; j++) {
6151         i = a[j];
6152         /*printf("%d %d %d\n", i, len, b->j);*/
6153         if (i > 0 && i < 6) {
6154             if (i == 1) {
6155                 tmp[0] = pro[x++];
6156                 len--;
6157                 y--;
6158                 i++;
6159             } else tmp[i-2] = pro[x++];
6160         }
6161         if (i == 6) {
6162             i = 3; tmp[0] = tmp[1] = tmp[2] = '-';
6163             if (a[j+1] == 2) tmp[2] = ' ';
6164         }
6165         if (i > 0) {
6166             strncpy(&line1[len], &dna1[y], i); y+=i;
6167         } else {line1[len] = '-'; i = 1; tmp[0] = pro[x++];}
6168         strncpy(&line2[len], tmp, i);
6169         for (k = 0; k < i; k++) {
6170             if (tmp[k] != ' ' && tmp[k] != '-') {
6171                 if (k >= 2) tmp[k] = '\\';
6172                 else if (k == 1) tmp[k] = '|';
6173                 else tmp[k] = '/';
6174             } else tmp[k] = ' ';
6175         }
6176         if (i == 1) tmp[0] = ' ';
6177         strncpy(&line3[len], tmp, i);
6178         tmp[0] = tmp[1] =  tmp[2] = ' ';
6179         len += i;
6180         line1[len] = line2[len] =line3[len]  = '\0';
6181         if (len >= WIDTH) {
6182             printf("\n%5d", WIDTH*lines++);
6183             for (k = 10; k <= WIDTH; k+=10)
6184                 printf("    .    :");
6185             if (k-5 < WIDTH) printf("    .");
6186             c1 = line1[WIDTH]; c2 = line2[WIDTH]; c3 = line3[WIDTH];
6187             line1[WIDTH] = line2[WIDTH] = line3[WIDTH] = '\0';
6188             printf("\n     %s\n     %s\n     %s\n", line1, line3, line2);
6189             line1[WIDTH] = c1; line2[WIDTH] = c2; line3[WIDTH] = c3;
6190             strcpy(line1, &line1[WIDTH]);
6191             strcpy(line2, &line2[WIDTH]);
6192             strcpy(line3, &line3[WIDTH]);
6193             len = len - WIDTH;
6194         }
6195     }
6196     printf("\n%5d", WIDTH*lines);
6197     for (k = 10; k < len; k+=10)
6198         printf("    .    :");
6199     if (k-5 < len) printf("    .");
6200     printf("\n     %s\n     %s\n     %s\n", line1, line3, line2);
6201     dna1 = MemFree(dna1);
6202 }
6203 
RDBTaxNamesFree(RDBTaxNamesPtr tnames)6204 void RDBTaxNamesFree(RDBTaxNamesPtr tnames)
6205 {
6206     if (tnames == NULL)
6207         return;
6208 
6209     if (tnames->sci_name != NULL)
6210         MemFree(tnames->sci_name);
6211     if (tnames->common_name != NULL)
6212         MemFree(tnames->common_name);
6213     if (tnames->blast_name != NULL)
6214         MemFree(tnames->blast_name);
6215     MemFree(tnames);
6216 
6217     return;
6218 }
6219 
RDBTaxNamesClone(RDBTaxNamesPtr orig)6220 RDBTaxNamesPtr RDBTaxNamesClone(RDBTaxNamesPtr orig)
6221 {
6222     RDBTaxNamesPtr retval = NULL;
6223 
6224     if (!orig)
6225         return retval;
6226 
6227     if ((retval = (RDBTaxNamesPtr) MemNew(sizeof(RDBTaxNames))) != NULL) {
6228         retval->tax_id = orig->tax_id;
6229         retval->sci_name = StringSave(orig->sci_name);
6230         retval->common_name = StringSave(orig->common_name);
6231         retval->blast_name = StringSave(orig->blast_name);
6232         StringCpy(retval->s_king, orig->s_king);
6233     }
6234     return retval;
6235 }
6236 
FDGetTaxNamesFromBioseq(BioseqPtr bsp,Int4 taxid)6237 RDBTaxNamesPtr FDGetTaxNamesFromBioseq(BioseqPtr bsp, Int4 taxid)
6238 {
6239     BlastDefLinePtr bdsp;
6240     SeqDescrPtr sdp;
6241     UserFieldPtr ufp;
6242     UserObjectPtr uop;
6243     ObjectIdPtr oidp;
6244     CharPtr PNTR cpp;
6245     RDBTaxNamesPtr  tnames = NULL;
6246 
6247     bdsp = NULL;
6248     for(sdp = bsp->descr; sdp != NULL; sdp = sdp->next) {
6249 
6250         if(sdp->choice == 14) {   /* User object */
6251 
6252             uop = sdp->data.ptrvalue;
6253             oidp = uop->type;
6254 
6255             if(!StringCmp(oidp->str, TAX_DATA_OBJ_LABEL)) {
6256                 for(ufp = uop->data; ufp != NULL; ufp = ufp->next) {
6257                     oidp = (ObjectIdPtr) ufp->label;
6258                     if(oidp->id == taxid) {
6259                         cpp = ufp->data.ptrvalue;
6260                         tnames = (RDBTaxNamesPtr) MemNew(sizeof(RDBTaxNames));
6261                         tnames->tax_id = taxid;
6262                         tnames->sci_name =StringSave(cpp[SCI_NAME_POS]);
6263                         tnames->common_name = StringSave(cpp[COMMON_NAME_POS]);
6264                         tnames->blast_name = StringSave(cpp[BLAST_NAME_POS]);
6265 			StringCpy(tnames->s_king, cpp[S_KING_POS]);
6266                     }
6267                 }
6268             }
6269         }
6270     }
6271 
6272     /* If there is no this user object in the Bioseq returned pointer will
6273        be NULL */
6274 
6275     return tnames;
6276 }
6277 
FDGetDeflineAsnFromBioseq(BioseqPtr bsp)6278 BlastDefLinePtr FDGetDeflineAsnFromBioseq(BioseqPtr bsp)
6279 {
6280     BlastDefLinePtr bdsp;
6281     AsnIoMemPtr aimp;
6282     Int4 length;
6283     ByteStorePtr bstorep;
6284     ByteStorePtr PNTR bspp;
6285     SeqDescrPtr sdp;
6286     UserFieldPtr ufp;
6287     UserObjectPtr uop;
6288     ObjectIdPtr oidp;
6289     Uint1Ptr buffer;
6290 
6291     bdsp = NULL;
6292     for(sdp = bsp->descr; sdp != NULL; sdp = sdp->next) {
6293 
6294         if(sdp->choice == 14) {   /* User object */
6295 
6296             uop = sdp->data.ptrvalue;
6297             oidp = uop->type;
6298 
6299             if(!StringCmp(oidp->str, ASN_DEFLINE_OBJ_LABEL)) {
6300                 ufp = uop->data;
6301                 bspp = ufp->data.ptrvalue;
6302                 bstorep = bspp[0];
6303                 BSSeek(bstorep, 0, SEEK_SET);
6304                 length = BSLen(bstorep);
6305                 buffer = MemNew(length+1);
6306                 BSRead(bstorep, buffer, length);
6307                 aimp = AsnIoMemOpen("rb", buffer, length);
6308                 bdsp = (BlastDefLinePtr)
6309                     BlastDefLineSetAsnRead(aimp->aip, NULL);
6310                 AsnIoMemClose(aimp);
6311                 MemFree(buffer);
6312             }
6313         }
6314     }
6315 
6316     /* If there is no this user object in the Bioseq returned pointer will
6317        be NULL */
6318 
6319     return bdsp;
6320 }
6321 
PairwiseSeqAlignHasLinkout(SeqAlignPtr sap,Uint1 linkoutType)6322 Boolean PairwiseSeqAlignHasLinkout(SeqAlignPtr sap, Uint1 linkoutType){
6323   Boolean status=FALSE;
6324   SeqAlignPtr temp;
6325 
6326   temp=sap;
6327   while(temp&&!status){
6328     SeqIdPtr sip=TxGetSubjectIdFromSeqAlign(temp);
6329     BioseqPtr bsp=BioseqLockById(sip);
6330     BlastDefLinePtr bdlp, bdlptemp;
6331     if(bsp){
6332       bdlp=FDGetDeflineAsnFromBioseq(bsp);
6333       if(bdlp){
6334         bdlptemp=bdlp;
6335         while(bdlptemp&&!status){
6336           if(checkLinkoutType(bdlptemp, linkoutType)){
6337             status=TRUE;
6338           }
6339           bdlptemp=bdlptemp->next;
6340         }
6341 
6342         BlastDefLineSetFree(bdlp);
6343       }
6344       BioseqUnlockById(sip);
6345     }
6346     temp=temp->next;
6347 
6348   }
6349   return status;
6350 }
6351