1 /* $Id: txalign.c,v 6.101 2016/09/02 15:01:22 ucko Exp $
2 ***************************************************************************
3 * *
4 * COPYRIGHT NOTICE *
5 * *
6 * This software/database is categorized as "United States Government *
7 * Work" under the terms of the United States Copyright Act. It was *
8 * produced as part of the author's official duties as a Government *
9 * employee and thus can not be copyrighted. This software/database is *
10 * freely available to the public for use without a copyright notice. *
11 * Restrictions can not be placed on its present or future use. *
12 * *
13 * Although all reasonable efforts have been taken to ensure the accuracy *
14 * and reliability of the software and data, the National Library of *
15 * Medicine (NLM) and the U.S. Government do not and can not warrant the *
16 * performance or results that may be obtained by using this software, *
17 * data, or derivative works thereof. The NLM and the U.S. Government *
18 * disclaim any and all warranties, expressed or implied, as to the *
19 * performance, merchantability or fitness for any particular purpose or *
20 * use. *
21 * *
22 * In any work or product derived from this material, proper attribution *
23 * of the author(s) as the source of the software or data would be *
24 * appreciated. *
25 * *
26 * ===========================================================================
27 *
28 * File Name: txalign.c
29 *
30 * $Revision: 6.101 $
31 *
32 * File Description: Formating of text alignment for the BLAST output
33 *
34 * Modifications:
35 * --------------------------------------------------------------------------
36 *
37 * ==========================================================================
38 */
39
40 #define NLM_GENERATED_CODE_PROTO
41 #include <txalign.h>
42 #include <codon.h>
43 #include <ncbimisc.h>
44 #include <salpacc.h>
45 #include <salpstat.h>
46 #include <fdlKludge.h>
47 #include <blastdef.h>
48 #include <algo/blast/composition_adjustment/composition_constants.h>
49
50 #define BUFFER_LENGTH 2048
51 #define MIN_INS_SPACE 50
52 #define MAX_GI_NUM 10
53 #define MAX_DB_NUM 10
54 #define LENGTH_TO_SHOW_DOWNLOAD 10000
55
56 #define TXALIGN_HREF "http://www.ncbi.nlm.nih.gov"
57
58 #define NEW_ENTREZ_HREF "http://www.ncbi.nlm.nih.gov/entrez/query.fcgi"
59
60 #define WBLAST2_HREF "http://www.ncbi.nlm.nih.gov/blast/bl2seq/wblast2.cgi"
61
62 /* Used in make_dumpgnl_links, set in getreq.cpp or getreqcmd.cpp */
63 const char *RID_glb;
64 const char *CDD_RID_glb;
65 /* Used in make_dumpgnl_links, set in format.cpp */
66 const char *Entrez_Query_Term ;
67 int query_number_glb;
68
69 /*Indicate if db contains sequence with gi*/
70 Boolean DbHasGi=FALSE;
71
72 int (*tx_fprintf)(FILE*, const char *, ...) = fprintf;
73 #define fprintf tx_fprintf
74
75 /*
76 Used by the functions that format the one-line descriptions.
77 */
78 typedef struct _txdfline_struct {
79 struct _txdfline_struct *next;
80 SeqAlignPtr seqalign;
81 SeqIdPtr id;
82 Char *buffer_id;
83 Char *title;
84 Nlm_FloatHi bit_score;
85 Nlm_FloatHi evalue;
86 Int4 score;
87 Int4 number;
88 Boolean is_na;
89 Boolean found_score;
90 Boolean isnew; /* used to print mark "new.gif" near defline */
91 Boolean waschecked; /* used to print some another .gif near such defline */
92 CharPtr segs_str; /* Used to print segs for dumpgnl program. */
93 size_t segs_buflen,
94 segs_used;
95 } TxDfLineStruct, *TxDfLineStructPtr;
96
97
98
99 /*fill string with num spaces and null-end the string*/
makeEmptyString(CharPtr str,Int4 num)100 static void makeEmptyString(CharPtr str, Int4 num){
101 Int4 i;
102 if(str){
103 for (i=0; i<num; i++){
104 str[i]=' ';
105 }
106 str[i]='\0';
107 }
108 }
109
110
111 /*return initials of names not exceeding 15 elements delimited by space. Need to free memory afterwards*/
getNameInitials(CharPtr name)112 static CharPtr getNameInitials(CharPtr name){
113 CharPtr temp2, initials=NULL;
114 Int4 i, maxElements=15;
115 CharPtr nameCopy;
116
117 if(name){
118 nameCopy=MemNew(StringLen(name)+sizeof(Char));
119 if(!nameCopy) {
120 printf("insufficient memory!\n");
121 exit (1);
122 }
123
124 StringCpy(nameCopy, name);
125 temp2=StrTok(nameCopy, " ");
126 if(temp2){
127 initials=MemNew((maxElements+1)*sizeof(Char));
128 if(!initials) {
129 printf("insufficient memory!\n");
130 exit (1);
131 }
132 i=0;
133 initials[i]=*temp2;
134 i++;
135 while((temp2=StrTok(NULL, " "))&&i<maxElements){
136 initials[i]=*temp2;
137 i++;
138 }
139 initials[i]='\0';
140 }
141 MemFree(nameCopy);
142 }
143
144 return initials;
145 }
146
147 /*return true if the linkout type in bdfl is linkoutType, false otherwise*/
checkLinkoutType(BlastDefLinePtr bdfl,Uint1 linkoutType)148 NLM_EXTERN Boolean checkLinkoutType(BlastDefLinePtr bdfl, Uint1 linkoutType){
149 Boolean isThisType=FALSE;
150 ValNodePtr vnp;
151 Int4 intval;
152
153 if(bdfl){
154 vnp=bdfl->links;
155 if(vnp){
156 intval=vnp->data.intvalue;
157 if(linkoutType&intval){
158 isThisType=TRUE;
159 }
160 }
161 }
162 return isThisType;
163 }
164
165 /* return bdlp containing the sip from a chain of bdlp. Return the first bdlp if sip is null*/
getBlastDefLineForSeqId(BlastDefLinePtr bdlp,SeqIdPtr sip)166 BlastDefLinePtr getBlastDefLineForSeqId(BlastDefLinePtr bdlp, SeqIdPtr sip){
167 BlastDefLinePtr temp;
168 Boolean found=FALSE;
169 temp=bdlp;
170
171 while(temp){
172 if(temp->seqid){
173 if(SeqIdMatch(temp->seqid, sip)){
174 found=TRUE;
175 break;
176 }
177 }
178 temp=temp->next;
179 }
180 if(!found){
181 temp=bdlp;
182 }
183 return temp;
184 }
185 /*add linkout for defline. It adds the linkout for the first sip that has a linkout*/
addLinkoutForDefline(BioseqPtr bsp,SeqIdPtr sip,FILE * fp)186 static void addLinkoutForDefline(BioseqPtr bsp, SeqIdPtr sip, FILE* fp){
187 BlastDefLinePtr bdlp, bdlpTemp;
188 Boolean hasLinkout=FALSE;
189 BIG_ID gi, firstGi=GetGIForSeqId(sip);
190 Char molType[8]={""};
191
192 if(bsp){
193 bdlp=FDGetDeflineAsnFromBioseq(bsp);
194 if(bdlp){
195 if(ISA_aa(bsp->mol)){
196 sprintf(molType, "[pgi]");
197 }
198 else if(ISA_na(bsp->mol)){
199 sprintf(molType, "[ngi]");
200 }
201
202 /*add space in front of linkout*/
203 fprintf(fp, " ");
204
205 bdlpTemp=bdlp;
206 while(bdlpTemp){
207 if(checkLinkoutType(bdlpTemp, linkout_gene)){
208 hasLinkout=TRUE;
209 gi=GetGIForSeqId(bdlpTemp->seqid);
210 fprintf(fp, URL_Gene, gi, ISA_aa(bsp->mol) ? "PUID" : "NUID");
211 break;
212 }
213 bdlpTemp=bdlpTemp->next;
214 }
215 bdlpTemp=bdlp;
216 while(bdlpTemp){
217 if(checkLinkoutType(bdlpTemp, linkout_unigene)){
218 hasLinkout=TRUE;
219 gi=GetGIForSeqId(bdlpTemp->seqid);
220 fprintf(fp, URL_Unigene, gi);
221 break;
222 }
223 bdlpTemp=bdlpTemp->next;
224 }
225 bdlpTemp=bdlp;
226 if (RID_glb) {
227 while(bdlpTemp){
228 if(checkLinkoutType(bdlpTemp, linkout_structure)){
229 hasLinkout=TRUE;
230 gi=GetGIForSeqId(bdlpTemp->seqid);
231 fprintf(fp, URL_Structure, RID_glb, firstGi, gi, CDD_RID_glb, "onegroup", StringCmp(Entrez_Query_Term, "") ? Entrez_Query_Term:"none");
232 break;
233 }
234 bdlpTemp=bdlpTemp->next;
235 }
236 }
237 bdlpTemp=bdlp;
238 while(bdlpTemp){
239 if(checkLinkoutType(bdlpTemp, linkout_geo)){
240 gi=GetGIForSeqId(bdlpTemp->seqid);
241 fprintf(fp, URL_Geo, gi);
242 break;
243 }
244 bdlpTemp=bdlpTemp->next;
245 }
246
247 }
248 BlastDefLineSetFree(bdlp);
249 }
250 }
251
252
253 /*print linkout for bsp. If sip is not null, the linkout is for that sip (ie., the case for nonredundant blast db*/
addLinkoutForBioseq(BioseqPtr bsp,SeqIdPtr sip,SeqIdPtr firstSip,FILE * fp)254 static void addLinkoutForBioseq(BioseqPtr bsp, SeqIdPtr sip, SeqIdPtr firstSip, FILE* fp){
255 BlastDefLinePtr bdlp, actualBdlp;
256 Boolean hasLinkout=FALSE;
257 BIG_ID gi, firstGi;
258 Char molType[8]={""};
259
260 if(bsp){
261 bdlp=FDGetDeflineAsnFromBioseq(bsp);
262 actualBdlp=getBlastDefLineForSeqId(bdlp, sip);
263 if(actualBdlp){
264 firstGi=GetGIForSeqId(firstSip);
265
266 gi=GetGIForSeqId(bsp->id);
267 if(ISA_aa(bsp->mol)){
268 sprintf(molType, "[pgi]");
269 }
270 else if(ISA_na(bsp->mol)){
271 sprintf(molType, "[ngi]");
272 }
273
274 /*add space in front of linkout*/
275 fprintf(fp, " ");
276
277 if(checkLinkoutType(actualBdlp, linkout_gene)){
278 hasLinkout=TRUE;
279 fprintf(fp, URL_Gene, gi, ISA_aa(bsp->mol) ? "PUID" : "NUID");
280 }
281
282 if(checkLinkoutType(actualBdlp, linkout_unigene)){
283 hasLinkout=TRUE;
284 fprintf(fp, URL_Unigene, gi);
285 }
286 if(RID_glb && checkLinkoutType(actualBdlp, linkout_structure)){
287 hasLinkout=TRUE;
288 fprintf(fp, URL_Structure, RID_glb, firstGi, gi, CDD_RID_glb, "onepair", StringCmp(Entrez_Query_Term, "") ? Entrez_Query_Term:"none");
289 }
290
291 if(checkLinkoutType(actualBdlp, linkout_geo)){
292 fprintf(fp, URL_Geo, gi);
293 }
294 }
295 BlastDefLineSetFree(bdlp);
296 }
297 }
298
299
get_num_empty_space(Boolean compress)300 static Int4 get_num_empty_space(Boolean compress)
301 {
302 return (compress ? (8+5 +1) : B_SPACE+POS_SPACE+STRAND_SPACE +1);
303 }
304
305 static Boolean ShowAlignNodeText2Ex(ValNodePtr anp_list, Int2 num_node, Int4 line_len, FILE *fp, Int4 left, Int4 right, Uint4 option, Int4Ptr PNTR matrix, int (LIBCALLBACK *fmt_score_func)PROTO((AlignStatOptionPtr)), CharPtr db_name, CharPtr blast_type, Int4Ptr PNTR posMatrix, SeqAlignPtr PNTR last_align);
306 static Boolean SeqAlignSegsStr(SeqAlignPtr salp, Int2 index, CharPtr *dst, size_t *size, size_t *used);
307 static CharPtr StringAppend(CharPtr *dst, size_t *size, CharPtr src, size_t *used);
308
309
310 static ValNodePtr ProcessTextInsertion PROTO((AlignNodePtr anp, Int4 m_left, Int4 m_right, BioseqPtr bsp, Int4 line_len, Int1 frame));
311
ExtractCurrentAlignNode(ValNodePtr PNTR anp_list)312 static ValNodePtr ExtractCurrentAlignNode(ValNodePtr PNTR anp_list)
313 {
314 ValNodePtr head, curr, prev = NULL;
315 AlignNodePtr anp;
316 Uint4 itemID;
317 Uint2 chain;
318
319 head = *anp_list;
320 while(head && head->choice == OBJ_SEQANNOT)
321 head = head->next;
322 if(head == NULL)
323 return NULL;
324
325 anp = (AlignNodePtr) head->data.ptrvalue;
326 itemID = anp->itemID;
327 chain = anp->chain;
328
329 head = *anp_list;
330 curr = *anp_list;
331 while(curr)
332 {
333 if(curr->choice != OBJ_SEQANNOT)
334 {
335 anp = (AlignNodePtr) curr->data.ptrvalue;
336 if(anp->itemID != itemID || anp->chain != chain)
337 {
338 *anp_list = curr;
339 if(prev !=NULL)
340 prev->next = NULL;
341 return head;
342 }
343 }
344 else
345 {
346 *anp_list = curr;
347 if(prev != NULL)
348 prev->next = NULL;
349 return head;
350 }
351 prev = curr;
352 curr = curr->next;
353 }
354 *anp_list = NULL;
355 return head;
356 }
357
358
modify_kludge_itemID(ValNodePtr anp_list,Uint4 itemID)359 static void modify_kludge_itemID (ValNodePtr anp_list, Uint4 itemID)
360 {
361 AlignNodePtr anp;
362
363 while(anp_list)
364 {
365 if(anp_list->choice != OBJ_SEQANNOT)
366 {
367 anp = (AlignNodePtr) anp_list->data.ptrvalue;
368 anp->itemID = itemID;
369 }
370 anp_list = anp_list->next;
371 }
372 }
373
374 /******************************************************************
375 *
376 * LoadFollowerForSameId(anp_list)
377 * if the same sequence appears multiple times in the anp_list,
378 * it will be moved to the sequence that are the head of this
379 * list. The field anp->follower is set as the order of the
380 * repeats in this list
381 *
382 ******************************************************************/
LoadFollowerForSameId(ValNodePtr anp_list)383 static void LoadFollowerForSameId(ValNodePtr anp_list)
384 {
385 ValNodePtr curr, n_curr;
386 AlignNodePtr anp, n_anp;
387
388 curr = anp_list;
389 while(curr)
390 {
391 if(curr->choice != OBJ_SEQANNOT)
392 {
393 anp = (AlignNodePtr) curr->data.ptrvalue;
394 if(anp->is_master == FALSE && anp->follower == FALSE)
395 {
396 for(n_curr = curr->next; n_curr != NULL; n_curr = n_curr->next)
397 {
398 if(n_curr->choice != OBJ_SEQANNOT)
399 {
400 n_anp = (AlignNodePtr) n_curr->data.ptrvalue;
401 if(n_anp->is_master == FALSE && n_anp->follower == FALSE)
402 {
403 if(SeqIdMatch(n_anp->sip, anp->sip))
404 n_anp->follower = TRUE;
405 }
406 }
407 }
408 }
409 }
410 curr = curr->next;
411 }
412 }
413
414
MaskWithLowComplexity(ByteStorePtr bsp,SeqLocPtr maskloc,Uint1 mol)415 static void MaskWithLowComplexity(ByteStorePtr bsp, SeqLocPtr maskloc, Uint1 mol)
416 {
417 SeqLocPtr slp = NULL;
418 Int4 start, stop;
419 Uint1 res = 'N';
420
421
422 if(mol == Seq_mol_aa)
423 res = 'X';
424
425 while(maskloc)
426 {
427 slp = NULL;
428 while((slp = SeqLocFindNext(maskloc, slp))!=NULL)
429 {
430 start = SeqLocStart(slp);
431 stop = SeqLocStop(slp);
432 BSSeek(bsp, start, SEEK_SET);
433 for(; start <=stop; ++start)
434 BSPutByte(bsp, (Int2)res);
435 }
436 maskloc = maskloc->next;
437 }
438 }
439
create_byte_store_from_bsp(BioseqPtr bsp)440 static ByteStorePtr create_byte_store_from_bsp (BioseqPtr bsp)
441 {
442 SeqPortPtr spp;
443 Uint1 code;
444 ByteStorePtr b_store;
445 Uint1 residue;
446
447 if(bsp == NULL)
448 return NULL;
449 if(bsp->mol == Seq_mol_aa)
450 code = Seq_code_iupacaa;
451 else
452 code = Seq_code_iupacna;
453
454 spp = SeqPortNew(bsp, 0, bsp->length-1, Seq_strand_plus, code);
455 b_store = BSNew(bsp->length +1);
456 BSSeek(b_store, 0, SEEK_SET);
457 while((residue = SeqPortGetResidue(spp)) != SEQPORT_EOF)
458 BSPutByte(b_store, (Int2)residue);
459 SeqPortFree(spp);
460 return b_store;
461 }
462
CreateMaskByteStore(ValNodePtr mask_list)463 ValNodePtr CreateMaskByteStore (ValNodePtr mask_list)
464 {
465 BioseqPtr bsp;
466 SeqLocPtr slp;
467 SeqIdPtr sip;
468 ValNodePtr list, curr;
469 ByteStorePtr b_store, c_store;
470 Uint1 mol;
471
472 list = NULL;
473 b_store = NULL;
474 while(mask_list)
475 {
476 curr = ValNodeNew(list);
477 curr->choice = mask_list->choice;
478 if(list == NULL)
479 list = curr;
480 slp = (ValNodePtr) mask_list->data.ptrvalue;
481 if(slp != NULL)
482 {
483 if(b_store == NULL)
484 {
485 sip = SeqLocId(slp);
486 if(sip != NULL)
487 {
488 bsp = BioseqLockById(sip);
489 if(bsp != NULL)
490 {
491 b_store = create_byte_store_from_bsp (bsp);
492 mol = bsp->mol;
493 BioseqUnlock(bsp);
494 }
495 }
496 }
497 if(b_store != NULL)
498 {
499 if(mask_list->next == NULL)
500 {
501 c_store = b_store;
502 b_store = NULL;
503 }
504 else
505 c_store = BSDup(b_store);
506 MaskWithLowComplexity(c_store, slp, mol);
507 curr->data.ptrvalue = c_store;
508 }
509 }
510
511 mask_list = mask_list->next;
512 }
513
514 if(b_store != NULL)
515 BSFree(b_store);
516 return list;
517 }
518
FreeByteStoreList(ValNodePtr bs_list)519 static void FreeByteStoreList (ValNodePtr bs_list)
520 {
521 ByteStorePtr bsp;
522 ValNodePtr curr;
523
524 for(curr = bs_list; curr != NULL; curr = curr->next)
525 {
526 bsp = (ByteStorePtr) curr->data.ptrvalue;
527 if(bsp != NULL)
528 BSFree(bsp);
529 }
530 ValNodeFree(bs_list);
531 }
532
replace_bytestore_data(BioseqPtr bsp,ValNodePtr bs_list,Uint1 frame)533 NLM_EXTERN Boolean replace_bytestore_data (BioseqPtr bsp, ValNodePtr bs_list, Uint1 frame)
534 {
535 ByteStorePtr b_store;
536 Uint1 code;
537
538 if(bsp == NULL)
539 return FALSE;
540
541 if(bsp->mol == Seq_mol_aa)
542 code = Seq_code_iupacaa;
543 else
544 code = Seq_code_iupacna;
545
546 while(bs_list)
547 {
548 if(bs_list->choice == frame)
549 {
550 b_store = (ByteStorePtr) bs_list->data.ptrvalue;
551 if(b_store != NULL)
552 {
553 bsp->repr = Seq_repr_raw;
554 bsp->seq_data = (SeqDataPtr) b_store;
555 bsp->seq_data_type = code;
556 return TRUE;
557 }
558 }
559 bs_list = bs_list->next;
560 }
561
562 return FALSE;
563 }
564
565
566 /*can the current alignnode be printed for text view*/
PrintAlignForText(AnnotInfoPtr info,AlignNodePtr anp)567 NLM_EXTERN Boolean PrintAlignForText(AnnotInfoPtr info, AlignNodePtr anp)
568 {
569 if(anp == NULL || anp->segs == NULL)
570 return FALSE;
571 if(anp->segs->type == STD_SEG)
572 {
573 if(info == NULL)
574 return FALSE;
575 if(info->annot_type != ANNOT_BLAST)
576 return FALSE;
577 if(info->blast_type != ALIGN_BLASTX &&
578 info->blast_type != ALIGN_TBLASTN &&
579 info->blast_type != ALIGN_PSITBLASTN &&
580 info->blast_type != ALIGN_TBLASTX)
581 return FALSE;
582 }
583
584 return TRUE;
585 }
586
587
588 /*
589 * for tblastn and blastx, return the frame of the non-master
590 * for tblastx, return the frame for the master sequence
591 */
592
is_master_alignment(AlignNodePtr anp,BioseqPtr m_bsp)593 static Boolean is_master_alignment (AlignNodePtr anp, BioseqPtr m_bsp)
594 {
595 return (anp->is_master || BioseqMatch(m_bsp, anp->sip));
596 }
597
get_alignment_frame(ValNodePtr anp_list,BioseqPtr m_bsp)598 static Int1 get_alignment_frame(ValNodePtr anp_list, BioseqPtr m_bsp)
599 {
600 Uint1 c_type = 0;
601 AlignNodePtr anp;
602 AnnotInfoPtr annot_info;
603 ValNodePtr curr;
604 Boolean found;
605
606 if(anp_list == NULL)
607 return -1;
608
609 annot_info = NULL;
610 found = FALSE;
611 for(curr = anp_list; curr != NULL; curr = curr->next)
612 {
613 if(curr->choice == OBJ_SEQANNOT)
614 {
615 annot_info = (AnnotInfoPtr) anp_list->data.ptrvalue;
616 c_type = get_alignment_type(annot_info);
617 }
618 else
619 {
620 anp = (AlignNodePtr) curr->data.ptrvalue;
621 if(!is_master_alignment(anp, m_bsp))
622 found = (c_type != ALIGN_TDNA_TO_TDNA);
623 else
624 found = (c_type == ALIGN_TDNA_TO_TDNA);
625 if(found)
626 {
627 if(!PrintAlignForText(annot_info, anp))
628 return -1;
629 if(c_type == ALIGN_NORMAL || c_type == ALIGN_PROT_TO_DNA)
630 return 0;
631 if(c_type == ALIGN_DNA_TO_PROT || c_type == ALIGN_TDNA_TO_TDNA)
632 return anp->m_frame;
633 }
634 }
635 }
636
637 return -1;
638 }
639
640 /**********************************************************************************
641 *
642 * Given a chain of annots (ValNodePtrs) they are all printed out, one pattern
643 * at a time.
644 *
645 * For a give annot all alignments from one database sequence are assumed to be grouped together.
646 *
647 * The Alignments from one databases sequence are currently ranked by expect value.
648 * It has been suggested that this be changed and should not be relied on indefinitely.
649 *
650 *************************************************************************************/
651
652 NLM_EXTERN Boolean LIBCALL
ShowTextAlignFromAnnotExtra(BioseqPtr bsp,ValNodePtr vnp,SeqLocPtr seqloc,Int4 line_len,FILE * fp,Uint1Ptr featureOrder,Uint1Ptr groupOrder,Uint4 option,Int4Ptr PNTR matrix,ValNodePtr mask_loc,int (LIBCALLBACK * fmt_score_func)PROTO ((AlignStatOptionPtr)))653 ShowTextAlignFromAnnotExtra(BioseqPtr bsp, ValNodePtr vnp, SeqLocPtr seqloc,
654 Int4 line_len, FILE *fp,
655 Uint1Ptr featureOrder, Uint1Ptr groupOrder, Uint4 option, Int4Ptr PNTR matrix,
656 ValNodePtr mask_loc, int (LIBCALLBACK *fmt_score_func)PROTO((AlignStatOptionPtr)))
657 {
658 Int4 index=0;
659 SeqAnnotPtr seqannot;
660 SeqAnnotPtr annot;
661 SeqFeatPtr sfp;
662 SeqLocPtr next;
663
664 seqannot = SeqAnnotNew();
665 seqannot->type = 2;
666 AddAlignInfoToSeqAnnot(seqannot, 2);
667
668 while (vnp && seqloc)
669 {
670 index++;
671 seqannot->data = vnp->data.ptrvalue;
672 if (bsp->annot && bsp->annot->type == 1 && bsp->annot->data)
673 {
674 sfp = bsp->annot->data;
675 if (sfp->data.choice == SEQFEAT_REGION)
676 sfp->location = NULL;
677 bsp->annot = SeqAnnotFree(bsp->annot);
678 }
679 annot = bsp->annot = SeqAnnotNew();
680 bsp->annot->type = 1; /* ftable. */
681 next = seqloc->next;
682 sfp = SeqFeatNew();
683 seqloc->next = NULL;
684 sfp->location = seqloc;
685 sfp->data.choice = SEQFEAT_REGION;
686 sfp->data.value.ptrvalue = StringSave("pattern");
687 annot->data = sfp;
688 fprintf(fp, "\nSignificant alignments for pattern occurrence %ld at position %ld\n\n",
689 (long) index, (long) (SeqLocStart(seqloc)+1));
690 ShowTextAlignFromAnnot(seqannot, line_len, fp, featureOrder, groupOrder, option, matrix, mask_loc, fmt_score_func);
691 seqloc->next = next;
692 seqloc = seqloc->next;
693 vnp = vnp->next;
694 }
695 seqannot->data = NULL;
696 seqannot = SeqAnnotFree(seqannot);
697
698 return TRUE;
699 }
700 static Boolean load_master_translate_frame PROTO((ValNodePtr anp_list, Int4 m_len, BioseqPtr m_bsp));
701 static AlignNodePtr get_master_align_node PROTO((ValNodePtr anp_list));
702
703
704 /*****************************************************************************
705 *
706 * ShowTextAlignFromAnnot(annot, locus, line_len, fp, master, f_order)
707 * display the alignment stored in a Seq-annot in a text file
708 * annot: the Seq-annot pointer
709 * locus: if TRUE, show the locus name as the sequence label, otherwise,
710 * use the accession
711 * line_len: the number of sequence char per line
712 * fp: The file pointer to store the text output
713 * master: if TRUE, show the result as a master-slave type multiple pair
714 * wise alignment. if FALSE, display one alignment after the other
715 * f_order: the user selected feature type and order to be shown together
716 * with the alignment
717 * is_html: print out the format as an HTML page?
718 * return TRUE for success, FALSE for fail
719 *
720 *****************************************************************************/
721 /* This modification of the function to pass position-specific matrix */
ShowTextAlignFromAnnot3(SeqAnnotPtr hannot,Int4 line_len,FILE * fp,Uint1Ptr featureOrder,Uint1Ptr groupOrder,Uint4 option,Int4Ptr PNTR matrix,ValNodePtr mask_loc,int (LIBCALLBACK * fmt_score_func)PROTO ((AlignStatOptionPtr)),CharPtr db_name,CharPtr www_blast_type,Int4Ptr PNTR posMatrix)722 NLM_EXTERN Boolean ShowTextAlignFromAnnot3(SeqAnnotPtr hannot, Int4 line_len, FILE *fp, Uint1Ptr featureOrder, Uint1Ptr groupOrder, Uint4 option, Int4Ptr PNTR matrix, ValNodePtr mask_loc, int (LIBCALLBACK *fmt_score_func)PROTO((AlignStatOptionPtr)), CharPtr db_name, CharPtr www_blast_type, Int4Ptr PNTR posMatrix)
723 {
724 SeqAlignPtr align, h_align, n_align, prev;
725 SeqLocPtr m_loc;
726 Boolean retval, matrix_loaded=FALSE;
727 SeqIdPtr m_sip;
728 Int4 m_start, m_stop, t_start, t_stop;
729 ValNodePtr anp_node, curr_list;
730 ValNodePtr annot_head;
731 Uint1 style;
732 SeqAnnotPtr annot;
733 Boolean master;
734 Boolean flat_insert;
735 Uint1 m_strand;
736 Uint1 annot_type;
737 Char annotDB[101];
738 Uint2 order;
739 Uint1 blast_type;
740 Boolean load_matrix;
741 ValNodePtr bs_list; /*store the ByteStores that were masked master seqences*/
742 BioseqPtr m_bsp;
743 Int1 frame;
744 Uint1 code;
745 Uint1 repr;
746 ByteStorePtr seq_data = NULL;
747
748
749 annot = hannot;
750 flat_insert = (Boolean)(option & TXALIGN_FLAT_INS);
751 /* flat_insert = TRUE; */
752 master = (Boolean)(option & TXALIGN_MASTER);
753 if(annot->type != 2)
754 return FALSE;
755 m_start = -1;
756 m_stop = -1;
757 m_sip = NULL;
758 annot = hannot;
759 while(annot) {
760 if(annot->type == 2) {
761 SeqIdPtr siptemp;
762 align = (SeqAlignPtr) annot->data;
763 siptemp=TxGetSubjectIdFromSeqAlign(align);
764 if(siptemp&&siptemp->choice==SEQID_GI){
765 DbHasGi=TRUE;
766 }
767
768 if(m_sip == NULL)
769 m_sip = make_master(align);
770 if(m_sip != NULL) {
771 get_boundary(m_sip, &t_start, &t_stop, align);
772 if(m_start == -1 || m_start > t_start)
773 m_start = t_start;
774 if(m_stop == -1 || m_stop < t_stop)
775 m_stop = t_stop;
776 }
777 }
778 annot = annot->next;
779 }
780 if(m_sip == NULL || m_start == -1 || m_stop == -1)
781 return FALSE;
782
783 if(master)
784 style = COLLECT_MP;
785 else
786 style = COLLECT_MD;
787 anp_node = NULL;
788 m_loc = SeqLocIntNew(m_start, m_stop, Seq_strand_plus, m_sip);
789 annot = hannot;
790 load_matrix = FALSE; /*if there is any protein sequence, set the load_matrix to TRUE*/
791 while(annot) {
792 if(annot->type == 2) {
793 annotDB[0] = '\0';
794 blast_type = get_align_annot_qual(annot, annotDB, 100, &annot_type);
795
796 if(blast_type == ALIGN_BLASTX
797 || blast_type == ALIGN_TBLASTN
798 || blast_type == ALIGN_PSITBLASTN
799 || blast_type == ALIGN_TBLASTX)
800 load_matrix = TRUE;
801 if(blast_type == ALIGN_TBLASTX ||
802 (blast_type == ALIGN_BLASTX && annot_type == ANNOT_BLAST
803 && (option & TXALIGN_BLASTX_SPECIAL))) { /*!!!!!!!this messes up all the itemIDs and entityIDs !!!!!!*/
804 align = (SeqAlignPtr) annot->data;
805 prev = NULL;
806 h_align = NULL;
807 order = 0;
808 while(align) {
809 ++order;
810 n_align = align->next;
811 align->next = NULL;
812 if(get_align_ends(align, m_sip, &m_start, &m_stop, &m_strand)) {
813 /* sint = m_loc->data.ptrvalue;
814 sint->strand = m_strand; */
815 update_seq_loc(m_start, m_stop, m_strand, m_loc);
816 style = COLLECT_MD;
817 master = FALSE;
818
819 annot->data = align;
820 curr_list = CollAlignFromSeqAnnot(annot, m_loc,
821 featureOrder, groupOrder, style, FALSE, master, flat_insert);
822 if(curr_list != NULL) {
823 modify_kludge_itemID (curr_list, order);
824 ValNodeLink(&anp_node, curr_list);
825 }
826 }
827 if(prev == NULL)
828 h_align = align;
829 else
830 prev->next = align;
831 prev = align;
832 align = n_align;
833 }
834 annot->data = h_align;
835 } else {
836 curr_list = CollAlignFromSeqAnnot(annot, m_loc, featureOrder, groupOrder, style, FALSE, master, flat_insert);
837 if(curr_list != NULL)
838 ValNodeLink(&anp_node, curr_list);
839 }
840 }
841 annot = annot->next;
842 }
843 SeqLocFree(m_loc);
844 if(anp_node == NULL)
845 return FALSE;
846
847 m_bsp = BioseqLockById(m_sip);
848 if(m_bsp == NULL) {
849 FreeAlignNode(anp_node);
850 return FALSE;
851 }
852 if (m_bsp->seq_data_type == Seq_code_gap) {
853 BioseqUnlock(m_bsp);
854 return FALSE;
855 }
856
857 if(mask_loc != NULL)
858 bs_list = CreateMaskByteStore (mask_loc);
859 else
860 bs_list = NULL;
861
862 repr = m_bsp->repr;
863 seq_data = (ByteStorePtr) m_bsp->seq_data;
864 code = m_bsp->seq_data_type;
865
866 if(matrix == NULL && (option & TXALIGN_MATRIX_VAL || load_matrix)) {
867 matrix = load_default_matrix();
868 matrix_loaded = TRUE;
869 }
870
871 if(fmt_score_func != NULL) {
872 free_buff();
873 init_buff_ex(MAX(80, line_len + 23 + 12));
874 }
875 if(master) {
876 frame = get_alignment_frame(anp_node, m_bsp);
877 if(frame != -1 && bs_list != NULL) {
878 load_master_translate_frame(anp_node, m_bsp->length, m_bsp);
879
880 if(!replace_bytestore_data (m_bsp, bs_list, (Uint1)frame)) {
881 m_bsp->repr = repr;
882 m_bsp->seq_data = (SeqDataPtr) seq_data;
883 m_bsp->seq_data_type = code;
884 }
885 }
886 retval = ShowAlignNodeText2(anp_node, -1, line_len, fp, -1, -1, option, matrix, fmt_score_func, db_name, www_blast_type, posMatrix);
887 FreeAlignNode(anp_node);
888 } else {
889 SeqAlignPtr last_align=NULL;
890
891 annot_head = NULL;
892 if(fmt_score_func != NULL)
893 LoadFollowerForSameId(anp_node);
894 while(anp_node) {
895 if(anp_node->choice == OBJ_SEQANNOT) {
896 if(annot_head != NULL) {
897 annot_head->next = NULL;
898 FreeAlignNode(annot_head);
899 }
900 annot_head = anp_node;
901 anp_node = anp_node->next;
902 } else {
903 curr_list = ExtractCurrentAlignNode(&anp_node);
904 if(curr_list) {
905 if(annot_head != NULL) {
906 annot_head->next = curr_list;
907 load_master_translate_frame(annot_head, m_bsp->length, m_bsp);
908 frame = get_alignment_frame(annot_head, m_bsp);
909 } else
910 frame = 0;
911 if(frame != -1 && bs_list != NULL) {
912 if(!replace_bytestore_data (m_bsp, bs_list, (Uint1)frame)) {
913 m_bsp->repr = repr;
914 m_bsp->seq_data = (SeqDataPtr) seq_data;
915 m_bsp->seq_data_type = code;
916 }
917 }
918
919 if(annot_head != NULL) {
920 retval = ShowAlignNodeText2Ex(annot_head, -1, line_len, fp, -1, -1, option, matrix, fmt_score_func, db_name, www_blast_type, posMatrix, &last_align);
921 annot_head->next = NULL;
922 } else
923 retval = ShowAlignNodeText2Ex(curr_list, -1, line_len, fp, -1, -1, option, matrix, fmt_score_func, db_name, www_blast_type, posMatrix, &last_align);
924 if(retval == TRUE)
925 fprintf(fp, "\n\n");
926 FreeAlignNode(curr_list);
927 }
928 }
929 }
930 if(annot_head != NULL) {
931 annot_head->next = NULL;
932 FreeAlignNode(annot_head);
933 }
934 }
935
936 m_bsp->repr = repr;
937 m_bsp->seq_data = (SeqDataPtr) seq_data;
938 m_bsp->seq_data_type = code;
939
940 if (matrix_loaded)
941 free_default_matrix(matrix);
942
943 if(fmt_score_func != NULL)
944 free_buff();
945 if(bs_list != NULL)
946 FreeByteStoreList (bs_list);
947 BioseqUnlock(m_bsp);
948
949 return retval;
950 }
951
ShowTextAlignFromAnnot(SeqAnnotPtr hannot,Int4 line_len,FILE * fp,Uint1Ptr featureOrder,Uint1Ptr groupOrder,Uint4 option,Int4Ptr PNTR matrix,ValNodePtr mask_loc,int (LIBCALLBACK * fmt_score_func)PROTO ((AlignStatOptionPtr)))952 NLM_EXTERN Boolean ShowTextAlignFromAnnot(SeqAnnotPtr hannot, Int4 line_len, FILE *fp, Uint1Ptr featureOrder, Uint1Ptr groupOrder, Uint4 option, Int4Ptr PNTR matrix, ValNodePtr mask_loc, int (LIBCALLBACK *fmt_score_func)PROTO((AlignStatOptionPtr)))
953 {
954 return ShowTextAlignFromAnnot2(hannot, line_len, fp, featureOrder, groupOrder, option, matrix, mask_loc, fmt_score_func, NULL, NULL);
955
956 }
ShowTextAlignFromAnnot2(SeqAnnotPtr hannot,Int4 line_len,FILE * fp,Uint1Ptr featureOrder,Uint1Ptr groupOrder,Uint4 option,Int4Ptr PNTR matrix,ValNodePtr mask_loc,int (LIBCALLBACK * fmt_score_func)PROTO ((AlignStatOptionPtr)),CharPtr db_name,CharPtr www_blast_type)957 NLM_EXTERN Boolean ShowTextAlignFromAnnot2(SeqAnnotPtr hannot, Int4 line_len, FILE *fp, Uint1Ptr featureOrder, Uint1Ptr groupOrder, Uint4 option, Int4Ptr PNTR matrix, ValNodePtr mask_loc, int (LIBCALLBACK *fmt_score_func)PROTO((AlignStatOptionPtr)), CharPtr db_name, CharPtr www_blast_type)
958 {
959 return ShowTextAlignFromAnnot3(hannot, line_len, fp, featureOrder,
960 groupOrder, option, matrix, mask_loc,
961 fmt_score_func, db_name, www_blast_type,
962 NULL);
963 }
964
965 /* Simple SeqAlign printing function:
966 Can be used while debugging.. options kept to a minimum
967 fp==NULL ==> stdout
968 */
SeqAlignPrint(SeqAlignPtr salp,FILE * fp)969 NLM_EXTERN void LIBCALL SeqAlignPrint(SeqAlignPtr salp,FILE* fp) {
970 SeqAnnotPtr sap;
971 Int4 align_options;
972 if(salp) {
973 if(salp->segtype==5) {
974 sap = SeqAnnotForSeqAlign((SeqAlignPtr)salp->segs);
975 } else{
976 sap = SeqAnnotForSeqAlign(salp);
977 }
978 if(sap) {
979 align_options =TXALIGN_COMPRESS+TXALIGN_END_NUM+TXALIGN_MASTER+TXALIGN_MISMATCH;
980 ShowTextAlignFromAnnot(sap, 70, fp==NULL ? stdout : fp, NULL, NULL, align_options, NULL, NULL, FormatScoreFunc);
981
982 sap->data=NULL;
983 SeqAnnotFree(sap);
984 } else {
985 ErrPostEx(SEV_WARNING,0,0,"NULL SeqAnnot from Non-NULL SeqAlign\n");
986 }
987 }
988 }
989
990 /*********************************************************************
991 *
992 * functions used for producing the Web browser
993 *
994 *********************************************************************/
find_seqid_for_bioseq(GatherContextPtr gcp)995 static Boolean find_seqid_for_bioseq(GatherContextPtr gcp)
996 {
997 BioseqPtr bsp;
998 ValNodePtr vnp;
999
1000 if(gcp->thistype != OBJ_BIOSEQ)
1001 return FALSE;
1002 bsp = (BioseqPtr)(gcp->thisitem);
1003 if(bsp == NULL)
1004 return FALSE;
1005 vnp = (ValNodePtr)(gcp->userdata);
1006 vnp->choice = bsp->mol;
1007 vnp->data.ptrvalue = bsp->id;
1008 return TRUE;
1009 }
1010
find_seqid_for_seqfeat(GatherContextPtr gcp)1011 static Boolean find_seqid_for_seqfeat(GatherContextPtr gcp)
1012 {
1013 SeqFeatPtr sfp;
1014 ValNodePtr vnp;
1015 BioseqPtr bsp;
1016
1017 if(gcp->thistype != OBJ_SEQFEAT)
1018 return FALSE;
1019 sfp = (SeqFeatPtr)(gcp->thisitem);
1020 if(sfp == NULL || sfp->product == NULL)
1021 return FALSE;
1022 vnp = (ValNodePtr)(gcp->userdata);
1023 vnp->choice = Seq_mol_aa;
1024 bsp = BioseqFindCore(SeqLocId(sfp->product));
1025 if(bsp != NULL)
1026 vnp->data.ptrvalue = bsp->id;
1027 else
1028 vnp->data.ptrvalue = SeqLocId(sfp->product);
1029 return TRUE;
1030 }
1031
add_html_label(TextAlignBufPtr tdp)1032 static Boolean add_html_label(TextAlignBufPtr tdp)
1033 {
1034 if(tdp == NULL)
1035 return FALSE;
1036 if(tdp->label == NULL)
1037 return FALSE;
1038 if(tdp->seqEntityID == 0)
1039 return FALSE;
1040 /*only for the coding region features and Bioseqs */
1041 return ((tdp->feattype == SEQFEAT_CDREGION) ||(tdp->feattype == 0
1042 && tdp->bsp_itemID != 0));
1043 /* return (tdp->feattype == 0); */
1044 }
1045
get_seqid_for_textbuf(TextAlignBufPtr tdp,CharPtr HTML_db,CharPtr HTML_dopt)1046 static SeqIdPtr get_seqid_for_textbuf(TextAlignBufPtr tdp, CharPtr HTML_db,
1047 CharPtr HTML_dopt)
1048 {
1049 ValNode vn;
1050 SeqIdPtr sip;
1051
1052
1053 if(!add_html_label(tdp))
1054 return 0;
1055
1056 vn.choice = 0;
1057 vn.data.ptrvalue = NULL;
1058 if(tdp->feattype == SEQFEAT_CDREGION)
1059 GatherItem(tdp->seqEntityID, tdp->itemID, OBJ_SEQFEAT, (Pointer)(&vn), find_seqid_for_seqfeat);
1060 else
1061 GatherItem(tdp->seqEntityID, tdp->bsp_itemID, OBJ_BIOSEQ, (Pointer)(&vn), find_seqid_for_bioseq);
1062 sip = (SeqIdPtr)(vn.data.ptrvalue);
1063 if(sip != NULL) {
1064 if(vn.choice == Seq_mol_aa) {
1065 StringCpy(HTML_dopt, "GenPept");
1066 StringCpy(HTML_db, "Protein");
1067 } else {
1068 StringCpy(HTML_dopt, "GenBank");
1069 StringCpy(HTML_db, "Nucleotide");
1070 }
1071 }
1072 return sip;
1073
1074 }
1075
1076
1077 /******************************************************************
1078 *
1079 * DrawTextToBuffer(tdp_list, m_buf)
1080 * write the text into a buffer instead of a FILE
1081 * return the buffer
1082 *
1083 ******************************************************************/
DrawTextToBuffer(ValNodePtr tdp_list,CharPtr PNTR m_buf,Boolean is_html,Int4 label_size,Int4 num_size,Boolean compress,Int4Ptr PNTR matrix,Int4 stop_val,Int4 line_len,Boolean show_strand,Boolean strip_semicolon,SeqIdPtr * already_linked,Uint4 options)1084 static CharPtr DrawTextToBuffer(ValNodePtr tdp_list, CharPtr PNTR m_buf, Boolean is_html, Int4 label_size, Int4 num_size, Boolean compress, Int4Ptr PNTR matrix, Int4 stop_val, Int4 line_len, Boolean show_strand, Boolean strip_semicolon, SeqIdPtr *already_linked, Uint4 options)
1085 {
1086 Boolean already_done;
1087 TextAlignBufPtr tdp;
1088 CharPtr docbuf = NULL;
1089 Int2 i;
1090 Int4 pos;
1091 ValNodePtr curr;
1092 Int4 max_len; /*maximum length for each line*/
1093 Int4 size;
1094 CharPtr HTML_buffer;
1095 CharPtr matrix_buf;
1096 Char HTML_db[32], HTML_dopt[16];
1097 Int4 html_len;
1098 SeqIdPtr sip;
1099 DbtagPtr db_tag;
1100 ObjectIdPtr oip;
1101 Boolean load;
1102 Int4 num_empty, max_pos_val;
1103 Uint1 res;
1104 Char temp[21];
1105 Boolean is_first;
1106 SeqIdPtr seqid_var;
1107 Int4 getSeqCheckboxLen=200;
1108
1109 if(tdp_list==NULL)
1110 return NULL;
1111 tdp = (TextAlignBufPtr) tdp_list->data.ptrvalue;
1112 if(tdp->buf == NULL)
1113 return NULL;
1114 if(compress)
1115 {
1116 num_empty = label_size + 1 + num_size + 1;
1117 if(show_strand)
1118 num_empty += STRAND_SPACE;
1119 }
1120 else
1121 {
1122 label_size = B_SPACE;
1123 num_size = POS_SPACE;
1124 num_empty = B_SPACE + STRAND_SPACE + POS_SPACE + 2;
1125 }
1126 /* max_len = 150; */
1127 max_len = line_len + num_empty + 20;
1128 if(is_html) {
1129 Char buffer[1024];
1130 sprintf(buffer, "%s?cmd=Retrieve&db=&list_uids=&"
1131 "dopt=GenPept", NEW_ENTREZ_HREF);
1132 html_len = StringLen(buffer);
1133 html_len += 1 + MAX_GI_NUM + 10 + 20 + MAX_DB_NUM;
1134 HTML_buffer = (CharPtr) MemNew((size_t)html_len * sizeof(Char));
1135 }
1136 size = 0;
1137 max_pos_val = 12;
1138 for(curr = tdp_list; curr !=NULL; curr = curr->next)
1139 {
1140 tdp = (TextAlignBufPtr) curr->data.ptrvalue;
1141 if(tdp->exonCount > 0 && tdp->buf == NULL) /*it is a codon*/
1142 size += (3 * max_len);
1143 else
1144 {
1145 if(is_html && add_html_label(tdp))
1146 size += (max_len + html_len);
1147 else
1148 size += max_len;
1149 }
1150 if(tdp->matrix_val)
1151 size += max_len;
1152 if(options&TXALIGN_HTML&&(options&TXALIGN_MASTER)&&DbHasGi&&(options&TXALIGN_GET_SEQUENCE)){
1153 size+=getSeqCheckboxLen;
1154 }
1155 }
1156 if(size == 0)
1157 {
1158 if(is_html)
1159 MemFree(HTML_buffer);
1160 return NULL;
1161 }
1162 size += max_pos_val;
1163
1164 docbuf = (CharPtr) MemNew((size_t)(size) * sizeof(Char));
1165 matrix_buf = (CharPtr) MemNew((size_t)max_len * sizeof (Char));
1166
1167 pos = 0;
1168 is_first = TRUE;
1169 while(tdp_list)
1170 {
1171 tdp = (TextAlignBufPtr) tdp_list->data.ptrvalue;
1172 if(tdp->exonCount > 0 && tdp->buf == NULL) /*it is a codon*/
1173 {
1174 for(i =0; i<3; ++i)
1175 {
1176 if(i == tdp->frame)
1177 pos+= print_label_to_buffer_all_ex(docbuf+pos, tdp->label, tdp->pos,
1178 tdp->strand, tdp->extra_space, FALSE, label_size, num_size, show_strand, strip_semicolon);
1179 else
1180 pos+= print_label_to_buffer_all_ex(docbuf+pos, NULL, -1,
1181 0, tdp->extra_space, FALSE, label_size, num_size, show_strand, strip_semicolon);
1182 sprintf(docbuf+pos, "%s\n", tdp->codon[i]);
1183 pos += (StringLen(tdp->codon[i]) +1);
1184 }
1185 }
1186 if(tdp->exonCount == 0 && tdp->buf !=NULL)
1187 {
1188 if(tdp->matrix_val) /*print the matrix of the alignment*/
1189 {
1190 MemSet(matrix_buf, ' ', (size_t)max_len* sizeof(Char));
1191 matrix_buf[max_len-1] = '\0';
1192 size = StringLen(tdp->buf);
1193 if(matrix) /*protein alignment*/
1194 {
1195 for(i = 0; i<size; ++i)
1196 {
1197 res = tdp->buf[i];
1198 if(tdp->matrix_val[i] > 0)
1199 {
1200 if(tdp->matrix_val[i] == matrix[res][res] || tdp->matrix_val[i] == INT2_MAX)
1201 matrix_buf[i+num_empty] = res;
1202 else
1203 matrix_buf[i+num_empty] = '+';
1204 }
1205 }
1206 }
1207 else /*DNA alignment*/
1208 {
1209 for(i = 0; i<size; ++i)
1210 if(tdp->matrix_val[i] != 0)
1211 matrix_buf[i+num_empty] = (Uint1)(tdp->matrix_val[i]);
1212 }
1213 matrix_buf[i+num_empty] = '\0';
1214 sprintf(docbuf+pos, "%s\n", matrix_buf);
1215 pos += (StringLen(matrix_buf) +1);
1216 }
1217 load = FALSE;
1218 if(is_html)
1219 {
1220 sip = get_seqid_for_textbuf(tdp, HTML_db, HTML_dopt);
1221 while(!load && sip)
1222 {
1223 if(sip->choice == SEQID_GI && sip->data.intvalue != 0)
1224 {
1225 seqid_var = *already_linked;
1226 already_done = FALSE;
1227 while (seqid_var)
1228 {
1229 if (SeqIdMatch(sip, seqid_var) == TRUE)
1230 {
1231 already_done = TRUE;
1232 break;
1233 }
1234 seqid_var = seqid_var->next;
1235 }
1236 if (already_done) {
1237 sprintf(HTML_buffer,
1238 "<a href=%s?cmd=Retrieve&db=%s&list_uids=%08ld&dopt=%s %s>",
1239 NEW_ENTREZ_HREF, HTML_db, (long)sip->data.intvalue, HTML_dopt,
1240 options & TXALIGN_TARGET_IN_LINKS ? "TARGET=\"EntrezView\"" : "");
1241 } else {
1242
1243 sprintf(HTML_buffer, "<a name = %ld></a>"
1244 "<a href=%s?cmd=Retrieve&db=%s&list_uids=%08ld&dopt=%s %s>",
1245 (long) sip->data.intvalue, NEW_ENTREZ_HREF, HTML_db,
1246 (long)sip->data.intvalue, HTML_dopt,
1247 options & TXALIGN_TARGET_IN_LINKS ? "TARGET=\"EntrezView\"" : "");
1248
1249 ValNodeAddInt(already_linked, SEQID_GI, sip->data.intvalue);
1250 }
1251 /*check box for getting sequence*/
1252 if(options&TXALIGN_HTML&&options&TXALIGN_MASTER&&DbHasGi&&(options&TXALIGN_GET_SEQUENCE)){
1253 Char checkboxBuf[200];
1254 snprintf(checkboxBuf, 200, "<input type=\"checkbox\" name=\"getSeqGi\" value=\"%d\" onClick=\"synchronizeCheck(this.value, 'getSeqAlignment%d', 'getSeqGi', this.checked)\">", sip->data.intvalue, query_number_glb);
1255 snprintf(docbuf+pos, size-pos, "%s", checkboxBuf);
1256
1257 pos += StringLen(checkboxBuf);
1258 }
1259
1260 html_len = StringLen(HTML_buffer);
1261 snprintf(docbuf+pos, size-pos, "%s", HTML_buffer);
1262 pos += html_len;
1263
1264 pos += print_label_to_buffer_all_ex(docbuf+pos, tdp->label, tdp->pos,
1265 tdp->strand, FALSE, TRUE, label_size, num_size, show_strand, strip_semicolon);
1266 load = TRUE;
1267 }
1268 else if(sip->choice == SEQID_GENERAL)
1269 {
1270 db_tag = (DbtagPtr) sip->data.ptrvalue;
1271 if(db_tag->db) {
1272 oip = db_tag->tag;
1273 if(oip->id != 0) {
1274 if (StringCmp(db_tag->db, "THC") == 0) {
1275 sprintf(HTML_buffer, "<a name = THC%ld></a><a href=\"http://www.tigr.org/docs/tigr-scripts/hgi_scripts/thc_report.spl?est=THC%ld&report_type=n\">", (long) oip->id, (long) oip->id);
1276
1277 html_len = StringLen(HTML_buffer);
1278 snprintf(docbuf+pos, size-pos, "%s",
1279 HTML_buffer);
1280 pos += html_len;
1281 pos += print_label_to_buffer_all_ex(docbuf+pos, tdp->label, tdp->pos,
1282 tdp->strand, FALSE, TRUE, label_size, num_size, show_strand, strip_semicolon);
1283 load = TRUE;
1284 } else if (!StringICmp(db_tag->db, "TI")) {
1285 sprintf(HTML_buffer, "<a name = TI%ld></a><a href=\"http://www.ncbi.nlm.nih.gov/Traces/trace.cgi?cmd=retrieve&dopt=fasta&val=%ld\">", (long) oip->id, (long) oip->id);
1286
1287 html_len = StringLen(HTML_buffer);
1288 snprintf(docbuf+pos, size-pos, "%s",
1289 HTML_buffer);
1290 pos += html_len;
1291 pos += print_label_to_buffer_all_ex(docbuf+pos, tdp->label, tdp->pos,
1292 tdp->strand, FALSE, TRUE, label_size, num_size, show_strand, strip_semicolon);
1293 load = TRUE;
1294 }
1295 }
1296 }
1297 }
1298 sip = sip->next;
1299 }
1300 }
1301
1302 if(!load){
1303 if(options&TXALIGN_HTML&&options&TXALIGN_MASTER&&DbHasGi&&(options&TXALIGN_GET_SEQUENCE)){
1304 Char checkboxBuf[200];
1305 snprintf(checkboxBuf, 200, "<input type=\"checkbox\" name=\"getSeqMaster\" value=\"\" onClick=\"uncheckable('getSeqAlignment%d', 'getSeqMaster')\">", query_number_glb);
1306 snprintf(docbuf+pos, size-pos, "%s", checkboxBuf);
1307
1308 pos += StringLen(checkboxBuf);
1309 }
1310 pos += print_label_to_buffer_all_ex(docbuf+pos, tdp->label, tdp->pos, tdp->strand, FALSE, FALSE, label_size, num_size, show_strand, strip_semicolon);
1311 }
1312 snprintf(docbuf+pos, size-pos, "%s", tdp->buf);
1313 pos += StringLen(tdp->buf);
1314 if(stop_val >=0 && is_first)
1315 {
1316 sprintf(temp, " %ld\n", (long) (stop_val+1));
1317 sprintf(docbuf+pos, "%s", temp);
1318 pos += StringLen(temp);
1319 }
1320 else
1321 {
1322 sprintf(docbuf+pos, "\n");
1323 pos += 1;
1324 }
1325
1326 if(m_buf != NULL && *m_buf == NULL)
1327 *m_buf = StringSave(tdp->buf);
1328
1329 }
1330 tdp_list = tdp_list->next;
1331 is_first = FALSE;
1332 }
1333
1334 if(is_html)
1335 MemFree(HTML_buffer);
1336 MemFree(matrix_buf);
1337 docbuf[pos] = '\0';
1338 return docbuf;
1339 }
1340
1341
1342
1343 /*************************************************************************
1344 *
1345 * DrawTextList(tdp_list, fp)
1346 * returns the tdbp->text of the first node. It is used as the master
1347 * sequence to compare the mismatches
1348 *
1349 *************************************************************************/
1350 /* static CharPtr DrawTextList(ValNodePtr tdp_list, FILE *fp)
1351 {
1352 TextAlignBufPtr tdp;
1353 CharPtr m_buf = NULL;
1354 Int2 i;
1355
1356 if(tdp_list==NULL)
1357 return NULL;
1358
1359 while(tdp_list)
1360 {
1361 tdp = tdp_list->data.ptrvalue;
1362 if(tdp->exonCount > 0 && tdp->buf == NULL)
1363 {
1364 for(i =0; i<3; ++i)
1365 {
1366 if(i == tdp->frame)
1367 print_label(fp, tdp->label, tdp->pos, tdp->strand, tdp->extra_space);
1368 else
1369 print_label(fp, NULL, -1, 0, tdp->extra_space);
1370 fprintf(fp, "%s\n", tdp->codon[i]);
1371 }
1372 }
1373 if(tdp->exonCount == 0 && tdp->buf !=NULL)
1374 {
1375 print_label(fp, tdp->label, tdp->pos, tdp->strand, FALSE);
1376 fprintf(fp, "%s\n", tdp->buf);
1377 if(m_buf == NULL)
1378 m_buf = tdp->buf;
1379 }
1380 tdp_list = tdp_list->next;
1381 }
1382
1383 return m_buf;
1384 } */
1385
make_scale_bar_str(CharPtr PNTR bar,CharPtr PNTR num_str,Int4 num_empty,Int4 line_len)1386 NLM_EXTERN Boolean make_scale_bar_str(CharPtr PNTR bar, CharPtr PNTR num_str,
1387 Int4 num_empty, Int4 line_len)
1388 {
1389 Int4 i, j;
1390 CharPtr curr;
1391 Char temp[100];
1392 Int4 len;
1393
1394 if(bar == NULL || num_str == NULL)
1395 return FALSE;
1396 *bar = (CharPtr) MemNew((size_t)(line_len+num_empty+2) * sizeof(Char));
1397 *num_str = (CharPtr) MemNew((size_t)(line_len+num_empty+2) * sizeof(Char));
1398 make_empty(*bar, (Int2)(line_len+num_empty));
1399 make_empty(*num_str, (Int2)(line_len+num_empty));
1400 for(i =0; i<line_len; ++i)
1401 {
1402 curr = *bar;
1403 if((i+1)%5 ==0)
1404 curr[i+num_empty]= '|';
1405 curr = *num_str;
1406 if((i+1)%10==0)
1407 {
1408 sprintf(temp, "%ld", (long) (i+1));
1409 len = StringLen(temp);
1410 for(j = 0; j<len; ++j)
1411 curr[i+num_empty-(len-1-j)] = temp[j];
1412 }
1413
1414 }
1415 return TRUE;
1416 }
1417
get_master_align_node(ValNodePtr anp_list)1418 static AlignNodePtr get_master_align_node(ValNodePtr anp_list)
1419 {
1420 AlignNodePtr anp, first_anp = NULL;
1421
1422 while(anp_list)
1423 {
1424 if(anp_list->choice != OBJ_SEQANNOT)
1425 {
1426 anp = (AlignNodePtr) anp_list->data.ptrvalue;
1427 if(anp->is_master)
1428 return anp;
1429 else if(first_anp == NULL)
1430 first_anp = anp;
1431 }
1432 anp_list = anp_list->next;
1433 }
1434 return first_anp;
1435 }
1436
1437
1438 /**********************************************************************
1439 *
1440 * figure out the DNA-protein alignment, the frame of translation
1441 * in the DNA sequence compared with the protein sequence
1442 * m_len is used to figure out the reading frame of the minus
1443 * strand translation
1444 *
1445 **********************************************************************/
load_master_translate_frame(ValNodePtr anp_list,Int4 m_len,BioseqPtr m_bsp)1446 static Boolean load_master_translate_frame(ValNodePtr anp_list, Int4 m_len, BioseqPtr m_bsp)
1447 {
1448 AlignNodePtr anp;
1449 AlignSegPtr asp;
1450 Int4 g_left, g_right;
1451 Int4 start_pos;
1452 Uint1 strand;
1453 Int4 val;
1454 AnnotInfoPtr annot_info;
1455 Uint1 align_type;
1456 AlignNodePtr master_anp;
1457 Int4 offset;
1458 Boolean found;
1459
1460
1461 master_anp = get_master_align_node(anp_list);
1462 if(master_anp == NULL)
1463 return FALSE;
1464
1465 while(anp_list)
1466 {
1467 align_type = 0;
1468 annot_info = NULL;
1469 while(anp_list != NULL)
1470 {
1471 if(anp_list->choice == OBJ_SEQANNOT)
1472 {
1473 annot_info = (AnnotInfoPtr) anp_list->data.ptrvalue;
1474 align_type = get_alignment_type(annot_info);
1475 }
1476 else
1477 {
1478 if(annot_info == NULL)
1479 break;
1480 else
1481 {
1482 if(align_type != 0 || annot_info->annot_type == ANNOT_BLAST)
1483 break;
1484 }
1485 }
1486 anp_list = anp_list->next;
1487 }
1488
1489 while(anp_list != NULL && anp_list->choice != OBJ_SEQANNOT)
1490 {
1491 if(align_type == ALIGN_DNA_TO_PROT || align_type == ALIGN_TDNA_TO_TDNA)
1492 {
1493 anp = (AlignNodePtr) anp_list->data.ptrvalue;
1494 /*for tblastx, need to figure out the translation frame of the master,
1495 for blastx (reverted to tblastn) and tblastn, need to figure out
1496 the frame for the query
1497 */
1498 if((align_type == ALIGN_TDNA_TO_TDNA && BioseqMatch(m_bsp, anp->sip))||
1499 (!(anp->is_master) && !BioseqMatch(m_bsp, anp->sip)))
1500 {
1501 g_left = anp->extremes.left;
1502 g_right = anp->extremes.right;
1503 if(align_type == ALIGN_TDNA_TO_TDNA)
1504 {
1505 strand = anp->extremes.strand;
1506 offset = 0;
1507 found = TRUE;
1508 }
1509 else
1510 {
1511 strand = Seq_strand_plus;
1512 if(anp->extremes.strand != master_anp->extremes.strand)
1513 {
1514 if(anp->extremes.strand == Seq_strand_minus ||
1515 master_anp->extremes.strand == Seq_strand_minus)
1516 strand = Seq_strand_minus;
1517 }
1518 if(anp->extremes.strand == Seq_strand_minus)
1519 g_left = g_right;
1520
1521 offset = 0;
1522 found = FALSE;
1523 for(asp = master_anp->segs; asp != NULL; asp = asp->next)
1524 {
1525 if(asp->type != GAP_SEG)
1526 {
1527 if(asp->type == INS_SEG)
1528 {
1529 if(offset > 0)
1530 offset += asp->gr.right;
1531 }
1532 else
1533 {
1534 if(asp->gr.right < g_left)
1535 offset += (asp->gr.right - asp->gr.left + 1);
1536 else
1537 {
1538 if(g_left >= asp->gr.left && g_left <= asp->gr.right)
1539 {
1540 offset += MAX(0, g_left - asp->gr.left);
1541 found = TRUE;
1542 break;
1543 }
1544 }
1545 }
1546 }
1547 }
1548 }
1549
1550 if(found)
1551 {
1552 start_pos = ABS(master_anp->seqpos + offset);
1553 if(strand == Seq_strand_minus)
1554 {
1555 val = (m_len -1 - start_pos)%3L;
1556 }
1557 else
1558 {
1559 val = start_pos%3L;
1560 }
1561
1562 switch(val)
1563 {
1564 case 0:
1565 if(strand == Seq_strand_minus)
1566 anp->m_frame = 4;
1567 else
1568 anp->m_frame = 1;
1569 break;
1570 case 1:
1571 if(strand == Seq_strand_minus)
1572 anp->m_frame = 5;
1573 else
1574 anp->m_frame = 2;
1575 break;
1576 case 2:
1577 if(strand == Seq_strand_minus)
1578 anp->m_frame = 6;
1579 else
1580 anp->m_frame = 3;
1581 break;
1582 default:
1583 break;
1584 }
1585 }
1586 }
1587 }
1588 anp_list = anp_list->next;
1589 }
1590 }
1591 return TRUE;
1592 }
1593
1594
1595 /**********************************************************************
1596 *
1597 * figure out in the current range (m_left, m_right), the total number
1598 * of reading frames that the hit proteins have
1599 * return the AlignNode for the master sequence (master is always the
1600 * DNA sequence
1601 *
1602 ***********************************************************************/
get_current_master_frame(ValNodePtr list,Int4 m_left,Int4 m_right,Uint1Ptr all_frame)1603 static Boolean get_current_master_frame(ValNodePtr list, Int4 m_left, Int4 m_right, Uint1Ptr all_frame)
1604 {
1605 ValNodePtr anp_list;
1606 AlignNodePtr anp;
1607 Int4 g_left, g_right;
1608 Uint1 i;
1609 Boolean retval;
1610
1611 MemSet((Pointer)all_frame, 0, (size_t)6 * sizeof(Uint1));
1612 while(list)
1613 {
1614 anp_list = (ValNodePtr) list->data.ptrvalue;
1615 anp = (AlignNodePtr) anp_list->data.ptrvalue;
1616 if(!(anp->is_master))
1617 {
1618 g_left = anp->extremes.left;
1619 g_right = anp->extremes.right;
1620 if(!(g_left > m_right || g_right < m_left))
1621 {
1622 if(anp->m_frame != 0)
1623 {
1624 for(i = 0; i<6; ++i)
1625 {
1626 if(all_frame[i] == anp->m_frame)
1627 break;
1628 else if(all_frame[i] == 0)
1629 {
1630 all_frame[i] = anp->m_frame;
1631 break;
1632 }
1633 }
1634 retval = TRUE;
1635 }
1636 }
1637 }
1638
1639 list = list->next;
1640 }
1641
1642 return retval;
1643 }
1644
make_fake_cds(BioseqPtr m_bsp,Int4 start,Int4 stop,Uint1 strand)1645 NLM_EXTERN SeqFeatPtr make_fake_cds(BioseqPtr m_bsp, Int4 start, Int4 stop, Uint1 strand)
1646 {
1647 SeqFeatPtr sfp;
1648 CdRegionPtr crp;
1649 IntFuzzPtr ifp_from, ifp_to;
1650 Uint1 g_code = 0;
1651 SeqDescrPtr descr;
1652 SeqIntPtr seq_int;
1653 SeqLocPtr slp;
1654 BioSourcePtr source;
1655 OrgRefPtr org;
1656 OrgNamePtr orgname;
1657 ValNodePtr vnp;
1658
1659 descr = m_bsp->descr;
1660 while(descr)
1661 {
1662 /*look into BioSource to get the genetic code*/
1663 if(descr->choice == Seq_descr_source)
1664 {
1665 source = (BioSourcePtr) descr->data.ptrvalue;
1666 if(source != NULL)
1667 {
1668 org = source->org;
1669 if(org != NULL)
1670 {
1671 orgname = org->orgname;
1672 if(orgname != NULL)
1673 {
1674 g_code = orgname->gcode;
1675 break;
1676 }
1677 }
1678 }
1679 }
1680 descr = descr->next;
1681 }
1682
1683 crp = CdRegionNew();
1684 if(g_code != 0)
1685 {
1686 vnp = ValNodeNew(NULL);
1687 vnp->choice = 2;
1688 vnp->data.intvalue = (Int4)g_code;
1689 ValNodeAddPointer(&(crp->genetic_code), 254, (Pointer)vnp);
1690 }
1691
1692 sfp = SeqFeatNew();
1693 sfp->data.choice = 3;
1694 sfp->data.value.ptrvalue = crp;
1695 sfp->partial = TRUE;
1696 sfp->product = NULL;
1697 slp = SeqLocIntNew(start, stop, strand, m_bsp->id);
1698 seq_int = (SeqIntPtr) slp->data.ptrvalue;
1699 ifp_from = IntFuzzNew();
1700 ifp_from->choice = 4;
1701 ifp_from->a = 2;
1702 seq_int->if_from = ifp_from;
1703 ifp_to = IntFuzzNew();
1704 ifp_to->choice = 4;
1705 ifp_to->a = 1;
1706 seq_int->if_to = ifp_to;
1707 sfp->location = slp;
1708
1709
1710 return sfp;
1711 }
1712
1713
translate_faked_cds(SeqFeatPtr fake_cds,Uint1 frame,Int4 c_start,Int4 c_stop,Int4 master_len,AlignNodePtr anp)1714 static CharPtr translate_faked_cds(SeqFeatPtr fake_cds, Uint1 frame, Int4 c_start, Int4 c_stop, Int4 master_len, AlignNodePtr anp)
1715 {
1716 Uint1 c_frame;
1717 SeqLocPtr slp, t_slp;
1718 SeqIntPtr sint;
1719 Uint1 strand;
1720 CdRegionPtr crp;
1721 CharPtr buf;
1722 Int4 from, to;
1723 Int4 n;
1724 AlignSegPtr asp;
1725 Int4 m_start, m_stop;
1726 Int4 t_start, t_stop;
1727 Int4 l_pos = 0;
1728 Int4 offset;
1729
1730
1731 buf = (CharPtr) MemNew((size_t)(c_stop - c_start+2) * sizeof(Char));
1732 buf[0] = '\0';
1733 offset = 0;
1734 for(asp = anp->segs; asp != NULL; asp = asp->next)
1735 {
1736 if(!((asp->gr.left > c_stop) || ( asp->gr.right < c_start)))
1737 {
1738 t_start = MAX(asp->gr.left, c_start);
1739 t_stop = MIN(asp->gr.right, c_stop);
1740 m_start = ABS(anp->seqpos + t_start - offset - anp->extremes.left);
1741 m_stop = ABS(anp->seqpos + t_stop - offset - anp->extremes.left);
1742 if(asp->type == GAP_SEG)
1743 {
1744 MemSet(buf+l_pos, ' ', (size_t)(t_stop - t_start + 1) * sizeof (Char));
1745 l_pos += t_stop - t_start + 1;
1746 buf[l_pos] = '\0';
1747 }
1748 else if(asp->type != INS_SEG)
1749 {
1750 if(frame > 3)
1751 {
1752 strand = Seq_strand_minus;
1753 n = MAX(0, (master_len-1 - m_stop)/3 -1);
1754 from = m_start;
1755 to = master_len -1 - n*3;
1756 c_frame = frame -3;
1757 from = MAX(0, from -3);
1758 }
1759 else
1760 {
1761 strand = Seq_strand_plus;
1762 n = MAX(0, (m_start/3-1));
1763 from = n * 3;
1764 to = m_stop;
1765 c_frame = frame;
1766 to = MIN(master_len-1, to + 3);
1767 }
1768 slp = fake_cds->location;
1769 sint = (SeqIntPtr) slp->data.ptrvalue;
1770 sint->from = from;
1771 sint->to = to;
1772 sint->strand = strand;
1773 crp = (CdRegionPtr) fake_cds->data.value.ptrvalue;
1774 crp->frame = c_frame;
1775
1776 t_slp = SeqLocIntNew(m_start, m_stop, Seq_strand_plus, SeqLocId(slp));
1777 print_protein_for_cds(fake_cds, buf+l_pos, t_slp, TRUE);
1778 l_pos += t_stop - t_start + 1;
1779 SeqLocFree(t_slp);
1780 }
1781 }
1782 else if(asp->gr.left > c_stop)
1783 break;
1784 if(asp->type == GAP_SEG)
1785 offset += (asp->gr.right - asp->gr.left +1 );
1786 }
1787 return buf;
1788 }
1789
load_fake_protein_buf(CharPtr buf,Uint1 frame,AlignNodePtr master_anp)1790 static ValNodePtr load_fake_protein_buf(CharPtr buf, Uint1 frame, AlignNodePtr master_anp)
1791 {
1792 Char temp[20];
1793 ValNodePtr head = NULL;
1794 TextAlignBufPtr tdp;
1795
1796 tdp = (TextAlignBufPtr) MemNew(sizeof(TextAlignBuf));
1797 tdp->pos = -1;
1798 if(frame >3)
1799 {
1800 tdp->strand = Seq_strand_minus;
1801 sprintf(temp, "frame=-%d", frame-3);
1802 }
1803 else
1804 {
1805 tdp->strand = Seq_strand_plus;
1806 sprintf(temp, "frame=+%d", frame);
1807 }
1808 tdp->label = StringSave(temp);
1809 tdp->buf = StringSave(buf);
1810 tdp->itemID = 0;
1811 tdp->feattype = 0;
1812 tdp->subtype = 0;
1813 tdp->entityID = master_anp->entityID;
1814 tdp->seqEntityID = master_anp->seq_entityID;
1815 tdp->bsp_itemID = master_anp->bsp_itemID;
1816 ValNodeAddPointer(&head, 0, tdp);
1817 return head;
1818 }
1819
has_hit_in_region(Uint1Ptr all_frame)1820 static Boolean has_hit_in_region(Uint1Ptr all_frame)
1821 {
1822 Uint1 i;
1823
1824 for(i = 0; i<6; ++i)
1825 if(all_frame[i] != 0)
1826 return TRUE;
1827 return FALSE;
1828 }
1829
1830
check_bsp_id(ValNodePtr PNTR id_list,SeqIdPtr sip)1831 static Boolean check_bsp_id(ValNodePtr PNTR id_list, SeqIdPtr sip)
1832 {
1833 ValNodePtr curr;
1834
1835 if(*id_list == NULL)
1836 {
1837 ValNodeAddPointer(id_list, 0, sip);
1838 return FALSE;
1839 }
1840 curr = *id_list;
1841 while(curr)
1842 {
1843 if(curr->data.ptrvalue == sip ||
1844 SeqIdMatch((SeqIdPtr)(curr->data.ptrvalue), sip))
1845 return TRUE;
1846 curr = curr->next;
1847 }
1848
1849
1850 ValNodeAddPointer(id_list, 0, sip);
1851 return FALSE;
1852 }
1853
find_align_proc(GatherContextPtr gcp)1854 static Boolean find_align_proc(GatherContextPtr gcp)
1855 {
1856 SeqAlignPtr PNTR p_align;
1857
1858 p_align = (SeqAlignPtr PNTR)(gcp->userdata);
1859 *p_align = (SeqAlignPtr)gcp->thisitem;
1860 return TRUE;
1861 }
1862
1863
1864 /*functions related to load the alignment summary, such as the number of
1865 identical, positive residues, # of gaps, to the structure
1866 */
1867
load_align_sum_for_DenseDiag(DenseDiagPtr ddp,AlignSumPtr asp)1868 static Boolean load_align_sum_for_DenseDiag(DenseDiagPtr ddp, AlignSumPtr asp)
1869 {
1870 SeqInt si;
1871 SeqLoc sl;
1872 Int4 i;
1873 Int2 m_order, t_order; /*order of the master and the target sequence*/
1874 Uint1 m_res, t_res;
1875 SeqIdPtr sip;
1876 SeqPortPtr m_spp, t_spp;
1877 Int2 dim;
1878 SeqPortPtr spp;
1879
1880 if(ddp == NULL || asp == NULL)
1881 return FALSE;
1882 m_order = -1;
1883 t_order = -1;
1884 dim = 0;
1885 for(i = 0, sip = ddp->id; sip != NULL; sip = sip->next, ++i) {
1886 if(SeqIdMatch(sip, asp->master_sip) && m_order == -1)
1887 m_order = i;
1888 else if(SeqIdMatch(sip, asp->target_sip) && t_order == -1)
1889 t_order = i;
1890 ++dim;
1891 }
1892
1893 if(m_order == -1 || t_order == -1)
1894 return FALSE;
1895
1896 asp->m_frame_set = FALSE;
1897 asp->t_frame_set = FALSE;
1898
1899 for(i = 0; i<dim; ++i) {
1900 if(i == m_order || i == t_order) {
1901
1902 if(i == m_order)
1903 si.id = asp->master_sip;
1904 else
1905 si.id = asp->target_sip;
1906 si.from = ddp->starts[i];
1907 si.to = si.from + ddp->len -1;
1908 if(ddp->strands != NULL)
1909 si.strand = ddp->strands[i];
1910 else
1911 si.strand = 0;
1912
1913
1914 if (asp->is_aa) {
1915 asp->m_strand = Seq_strand_unknown;
1916 asp->t_strand = Seq_strand_unknown;
1917 } else {
1918 if(i == m_order) {
1919 asp->m_strand = si.strand;
1920 } else {
1921 asp->t_strand = si.strand;
1922 }
1923 }
1924
1925 sl.choice = SEQLOC_INT;
1926 sl.data.ptrvalue = &si;
1927
1928 spp = SeqPortNewByLoc(&sl, (asp->is_aa) ? Seq_code_ncbieaa : Seq_code_iupacna);
1929 if(i == m_order) {
1930 asp->master_from = si.from;
1931 asp->master_to = si.to;
1932 m_spp = spp;
1933 } else {
1934 asp->target_from = si.from;
1935 asp->target_to = si.to;
1936 t_spp = spp;
1937 }
1938 }
1939 }
1940
1941 if(m_spp == NULL || t_spp == NULL) {
1942 if(m_spp == NULL)
1943 SeqPortFree(m_spp);
1944 if(t_spp != NULL)
1945 SeqPortFree(t_spp);
1946 return FALSE;
1947 }
1948
1949 for(i = 0; i<ddp->len; ++i) {
1950 m_res = SeqPortGetResidue(m_spp);
1951 t_res = SeqPortGetResidue(t_spp);
1952 if(m_res == t_res)
1953 ++(asp->identical);
1954 else if ((asp->matrix != NULL && asp->is_aa) &&
1955 (IS_residue(m_res) && IS_residue(t_res)) &&
1956 (asp->matrix[m_res][t_res] >0))
1957 ++(asp->positive);
1958 }
1959 asp->totlen = ddp->len;
1960
1961 SeqPortFree(m_spp);
1962 SeqPortFree(t_spp);
1963 return TRUE;
1964 }
1965
1966
load_align_sum_for_DenseSeg(DenseSegPtr dsp,AlignSumPtr asp)1967 static Boolean load_align_sum_for_DenseSeg(DenseSegPtr dsp, AlignSumPtr asp)
1968 {
1969 SeqInt msi, tsi;
1970 SeqIntPtr sint;
1971 SeqLoc sl;
1972 Int2 i, k;
1973 Int2 dim;
1974 Int2 m_order, t_order; /*order of the master and the target sequence*/
1975 Int4 index;
1976 Int4 j, val, t_val;
1977 Uint1 m_res, t_res, stdaa_res;
1978 SeqIdPtr sip;
1979 SeqPortPtr m_spp, t_spp;
1980 SeqMapTablePtr smtp;
1981
1982
1983 if(dsp == NULL || asp == NULL)
1984 return FALSE;
1985
1986 if(asp->posMatrix != NULL) {
1987 if((smtp = SeqMapTableFindObj(Seq_code_ncbistdaa,
1988 Seq_code_ncbieaa)) == NULL)
1989 return FALSE;
1990 }
1991
1992 m_order = -1;
1993 t_order = -1;
1994 dim = 0;
1995 for(i = 0, sip = dsp->ids; sip != NULL; sip = sip->next, ++i) {
1996 if(SeqIdMatch(sip, asp->master_sip) && m_order == -1)
1997 m_order = i;
1998 else if(SeqIdMatch(sip, asp->target_sip) && t_order == -1)
1999 t_order = i;
2000 ++dim;
2001 }
2002
2003 if(m_order == -1 || t_order == -1)
2004 return FALSE;
2005
2006 msi.id = asp->master_sip;
2007 msi.from = -1;
2008 msi.to = -1;
2009 msi.strand = (dsp->strands == NULL) ? 0 : dsp->strands[m_order];
2010
2011 tsi.id = asp->target_sip;
2012 tsi.from = -1;
2013 tsi.to = -1;
2014 tsi.strand = (dsp->strands == NULL) ? 0 : dsp->strands[t_order];
2015
2016 for(i = 0; i<dsp->numseg; ++i) {
2017 for(k = 0; k<dim; ++k) {
2018 val = dsp->starts[i*dim + k];
2019 if(val != -1 && (k == m_order || k == t_order)) {
2020 sint = (k == m_order) ? (&msi) : (&tsi);
2021 if(sint->from == -1 || sint->from > val)
2022 sint->from = val;
2023 if(sint->to == -1 || sint->to < (val + dsp->lens[i] -1))
2024 sint->to = val + dsp->lens[i] -1;
2025 }
2026 }
2027 }
2028
2029 asp->master_from = msi.from;
2030 asp->master_to = msi.to;
2031 asp->target_from = tsi.from;
2032 asp->target_to = tsi.to;
2033
2034 if (asp->is_aa) {
2035 asp->m_strand = Seq_strand_unknown;
2036 asp->t_strand = Seq_strand_unknown;
2037 } else {
2038 asp->m_strand = dsp->strands[m_order];
2039 asp->t_strand = dsp->strands[t_order];
2040 }
2041 asp->m_frame_set = FALSE;
2042 asp->t_frame_set = FALSE;
2043
2044 sl.choice = SEQLOC_INT;
2045 sl.data.ptrvalue = &msi;
2046 m_spp = SeqPortNewByLoc(&sl, (asp->is_aa) ? Seq_code_ncbieaa : Seq_code_iupacna);
2047
2048 sl.choice = SEQLOC_INT;
2049 sl.data.ptrvalue = &tsi;
2050 t_spp = SeqPortNewByLoc(&sl, (asp->is_aa) ? Seq_code_ncbieaa : Seq_code_iupacna);
2051
2052 for(i = 0; i<dsp->numseg; ++i) {
2053 val = dsp->starts[i*dim + m_order];
2054 t_val = dsp->starts[i*dim + t_order];
2055 if(val == -1 || t_val == -1) {
2056 asp->gaps += dsp->lens[i];
2057 if(val != -1) {
2058 index = dsp->lens[i];
2059 while (index > 0) {
2060 index--;
2061 m_res = SeqPortGetResidue(m_spp);
2062 }
2063 }
2064 if(t_val != -1) {
2065 index = dsp->lens[i];
2066 while (index > 0) {
2067 index--;
2068 t_res = SeqPortGetResidue(t_spp);
2069 }
2070 }
2071 } else {
2072 for(j = 0; j<dsp->lens[i]; ++j) {
2073 m_res = SeqPortGetResidue(m_spp);
2074 t_res = SeqPortGetResidue(t_spp);
2075 if(m_res == t_res)
2076 ++(asp->identical);
2077 else if ((asp->matrix != NULL && asp->is_aa) &&
2078 (IS_residue(m_res) && IS_residue(t_res))) {
2079 if(asp->posMatrix != NULL) {
2080 stdaa_res = SeqMapTableConvert(smtp, t_res);
2081 if(asp->posMatrix[val+j][stdaa_res] > 0)
2082 ++(asp->positive);
2083 } else {
2084 if(asp->matrix[m_res][t_res] >0)
2085 ++(asp->positive);
2086 }
2087 }
2088 }
2089 }
2090 asp->totlen += dsp->lens[i];
2091 }
2092 SeqPortFree(m_spp);
2093 SeqPortFree(t_spp);
2094 return TRUE;
2095 }
2096
2097 /*
2098 Obtains the genetic code from a BioseqPtr, assuming that a fetch function
2099 has been enabled.
2100 */
2101 NLM_EXTERN CharPtr
GetGeneticCodeFromSeqId(SeqIdPtr sip)2102 GetGeneticCodeFromSeqId (SeqIdPtr sip)
2103
2104 {
2105 BioseqPtr bsp;
2106 BioSourcePtr source;
2107 CharPtr genetic_code=NULL;
2108 GeneticCodePtr gcp;
2109 Int4 gen_code_val=1; /* std genetic code if nothing found. */
2110 ValNodePtr vnp;
2111
2112
2113 bsp = BioseqLockById(sip);
2114
2115 if (bsp)
2116 {
2117 vnp = BioseqGetSeqDescr(bsp, Seq_descr_source, NULL);
2118 if (vnp)
2119 {
2120 source = (BioSourcePtr) vnp->data.ptrvalue;
2121 gen_code_val = source->org->orgname->gcode;
2122 }
2123 BioseqUnlock(bsp);
2124 }
2125
2126 gcp = GeneticCodeFind(gen_code_val, NULL);
2127 for (vnp = (ValNodePtr)gcp->data.ptrvalue; vnp != NULL; vnp = vnp->next)
2128 {
2129 if (vnp->choice == 3) /* ncbieaa */
2130 {
2131 genetic_code = (CharPtr)vnp->data.ptrvalue;
2132 break;
2133 }
2134 }
2135
2136 return genetic_code;
2137 }
OOFTranslateDNAInAllFrames(Uint1Ptr dna,Int4 length,SeqIdPtr query_id)2138 NLM_EXTERN CharPtr OOFTranslateDNAInAllFrames(Uint1Ptr dna, Int4 length,
2139 SeqIdPtr query_id)
2140 {
2141 CharPtr dnap;
2142 CharPtr codes;
2143 Int4 i;
2144 Uint1 codon[3];
2145
2146 if(dna == NULL || length == 0)
2147 return NULL;
2148
2149 dnap = (CharPtr) MemNew(length+1);
2150 codes = GetGeneticCodeFromSeqId(query_id);
2151
2152 dnap[0] = dnap[1] = dnap[2] = 0;
2153
2154 for (i = 2; i < length; i++) {
2155 codon[0] = dna[i-2];
2156 codon[1] = dna[i-1];
2157 codon[2] = dna[i];
2158 dnap[i+1] = AAForCodon(codon, codes);
2159 }
2160 return dnap;
2161 }
2162
2163 NLM_EXTERN Uint1 AAForCodon (Uint1Ptr codon, CharPtr codes);
2164
load_align_sum_for_StdSeg(StdSegPtr ssp,AlignSumPtr asp)2165 static Boolean load_align_sum_for_StdSeg(StdSegPtr ssp, AlignSumPtr asp)
2166 {
2167 Boolean master_is_translated=FALSE, both_translated=FALSE;
2168 Boolean target_is_translated = FALSE;
2169 BioseqPtr bsp;
2170 CharPtr genetic_code1, genetic_code2;
2171 SeqPortPtr spp1, spp2;
2172 Uint1 codon[4], residue1, residue2;
2173 Boolean ungapped_align = FALSE;
2174 StdSegPtr ssp_last;
2175
2176 if(ssp == NULL || asp == NULL)
2177 return FALSE;
2178
2179 if(asp->ooframe) {
2180 if (SeqLocStrand(ssp->loc) != Seq_strand_unknown) {
2181 master_is_translated = TRUE;
2182 target_is_translated = FALSE;
2183 } else {
2184 master_is_translated = FALSE;
2185 target_is_translated = TRUE;
2186 }
2187 } else {
2188 /* Check for valid sequence. */
2189 if (SeqLocLen(ssp->loc) == 3*SeqLocLen(ssp->loc->next))
2190 master_is_translated = TRUE;
2191 else if (3*SeqLocLen(ssp->loc) == SeqLocLen(ssp->loc->next))
2192 target_is_translated = TRUE;
2193 else if (SeqLocLen(ssp->loc) == SeqLocLen(ssp->loc->next))
2194 both_translated = TRUE;
2195 else
2196 return FALSE;
2197 }
2198
2199 if (master_is_translated) {
2200 genetic_code1 = GetGeneticCodeFromSeqId(ssp->ids);
2201 } else if (both_translated) {
2202 genetic_code1 = GetGeneticCodeFromSeqId(ssp->ids);
2203 genetic_code2 = GetGeneticCodeFromSeqId(ssp->ids->next);
2204 } else {
2205 genetic_code1 = GetGeneticCodeFromSeqId(ssp->ids->next);
2206 }
2207
2208 asp->m_frame_set = FALSE;
2209 asp->t_frame_set = FALSE;
2210
2211 if (master_is_translated || both_translated) {
2212 asp->m_strand = SeqLocStrand(ssp->loc);
2213 asp->m_frame = SeqLocStart(ssp->loc);
2214 if (SeqLocStrand(ssp->loc) == Seq_strand_minus) {
2215 bsp = BioseqLockById(SeqLocId(ssp->loc));
2216 asp->m_frame += SeqLocLen(ssp->loc);
2217 asp->m_frame = -(1+(bsp->length - asp->m_frame)%3);
2218 asp->m_frame_set = TRUE;
2219 BioseqUnlock(bsp);
2220 } else {
2221 asp->m_frame = (1+(asp->m_frame)%3);
2222 asp->m_frame_set = TRUE;
2223 }
2224 }
2225
2226 if (!master_is_translated || both_translated) {
2227 asp->t_strand = SeqLocStrand(ssp->loc->next);
2228 asp->t_frame = SeqLocStart(ssp->loc->next);
2229 if (SeqLocStrand(ssp->loc->next) == Seq_strand_minus) {
2230 if (bsp = BioseqLockById(SeqLocId(ssp->loc->next))) {
2231 asp->t_frame += SeqLocLen(ssp->loc->next);
2232 asp->t_frame = -(1+(bsp->length - asp->t_frame)%3);
2233 asp->t_frame_set = TRUE;
2234 BioseqUnlock(bsp);
2235 } else {
2236 return FALSE;
2237 }
2238 } else {
2239 asp->t_frame = (1+(asp->t_frame)%3);
2240 asp->t_frame_set = TRUE;
2241 }
2242 }
2243
2244 if (SeqLocStrand(ssp->loc) == Seq_strand_minus) {
2245 asp->master_from = SeqLocStop(ssp->loc);
2246 } else {
2247 asp->master_from = SeqLocStart(ssp->loc);
2248 }
2249
2250 if (SeqLocStrand(ssp->loc->next) == Seq_strand_minus) {
2251 asp->target_from = SeqLocStop(ssp->loc->next);
2252 } else {
2253 asp->target_from = SeqLocStart(ssp->loc->next);
2254 }
2255
2256
2257 while (ssp) {
2258 if (ssp->loc->choice != SEQLOC_EMPTY && ssp->loc->next->choice != SEQLOC_EMPTY) {
2259 if (both_translated) {
2260 spp1 = SeqPortNewByLoc(ssp->loc, Seq_code_ncbi4na);
2261 spp2 = SeqPortNewByLoc(ssp->loc->next, Seq_code_ncbi4na);
2262 while ((codon[0]=SeqPortGetResidue(spp2)) != SEQPORT_EOF) {
2263 codon[1] = SeqPortGetResidue(spp2);
2264 codon[2] = SeqPortGetResidue(spp2);
2265 residue1 = AAForCodon(codon, genetic_code1);
2266 codon[0] = SeqPortGetResidue(spp1);
2267 codon[1] = SeqPortGetResidue(spp1);
2268 codon[2] = SeqPortGetResidue(spp1);
2269 residue2 = AAForCodon(codon, genetic_code2);
2270 if (residue1 == residue2)
2271 ++(asp->identical);
2272 else if (asp->matrix != NULL &&
2273 asp->matrix[residue1][residue2] >0)
2274 ++(asp->positive);
2275 }
2276 } else {
2277 if (master_is_translated) {
2278 spp1 = SeqPortNewByLoc(ssp->loc, Seq_code_ncbi4na);
2279 spp2 = SeqPortNewByLoc(ssp->loc->next, Seq_code_ncbieaa);
2280 } else {
2281 spp2 = SeqPortNewByLoc(ssp->loc, Seq_code_ncbieaa);
2282 spp1 = SeqPortNewByLoc(ssp->loc->next, Seq_code_ncbi4na);
2283 }
2284
2285 while ((residue1=SeqPortGetResidue(spp2)) != SEQPORT_EOF) {
2286 codon[0] = SeqPortGetResidue(spp1);
2287 codon[1] = SeqPortGetResidue(spp1);
2288 codon[2] = SeqPortGetResidue(spp1);
2289 residue2 = AAForCodon(codon, genetic_code1);
2290
2291 if (residue1 == residue2)
2292 ++(asp->identical);
2293 else if (asp->matrix != NULL &&
2294 asp->matrix[residue1][residue2] >0)
2295 ++(asp->positive);
2296 }
2297 }
2298 SeqPortFree(spp1);
2299 SeqPortFree(spp2);
2300 /* Check if this is an ungapped alignment;
2301 in this case do not go to next link */
2302 if (!asp->ooframe && ssp->next &&
2303 ssp->next->loc->choice != SEQLOC_EMPTY &&
2304 ssp->next->loc->next->choice != SEQLOC_EMPTY)
2305 ungapped_align = TRUE;
2306 } else { /* Count only gaps in the top (master) strand. */
2307 if (ssp->loc->choice == SEQLOC_EMPTY)
2308 {
2309 if (!master_is_translated || both_translated)
2310 asp->gaps += SeqLocLen(ssp->loc->next)/3;
2311 else
2312 asp->gaps += SeqLocLen(ssp->loc->next);
2313 }
2314 }
2315
2316 if(asp->ooframe) {
2317 if (master_is_translated) {
2318 if(ssp->loc->next->choice != SEQLOC_EMPTY)
2319 asp->totlen += SeqLocLen(ssp->loc->next);
2320 else
2321 asp->totlen += SeqLocLen(ssp->loc)/3;
2322 } else {
2323 if(ssp->loc->choice != SEQLOC_EMPTY)
2324 asp->totlen += SeqLocLen(ssp->loc);
2325 else
2326 asp->totlen += SeqLocLen(ssp->loc->next)/3;
2327 }
2328 } else {
2329
2330 if (ssp->loc->choice != SEQLOC_EMPTY) {
2331 if (master_is_translated || both_translated)
2332 asp->totlen += SeqLocLen(ssp->loc)/3;
2333 else
2334 asp->totlen += SeqLocLen(ssp->loc);
2335 } else {
2336 if (target_is_translated || both_translated)
2337 asp->totlen += SeqLocLen(ssp->loc->next)/3;
2338 else
2339 asp->totlen += SeqLocLen(ssp->loc->next);
2340 }
2341 }
2342
2343 ssp_last = ssp;
2344
2345 if (both_translated || ungapped_align)
2346 /* for tblastx perform only one StdSegPtr. */
2347 break;
2348
2349 ssp = ssp->next;
2350 }
2351
2352 if (SeqLocStrand(ssp_last->loc) == Seq_strand_minus) {
2353 asp->master_to = SeqLocStart(ssp_last->loc);
2354 } else {
2355 asp->master_to = SeqLocStop(ssp_last->loc);
2356 }
2357
2358 if (SeqLocStrand(ssp_last->loc->next) == Seq_strand_minus) {
2359 asp->target_to = SeqLocStart(ssp_last->loc->next);
2360 } else {
2361 asp->target_to = SeqLocStop(ssp_last->loc->next);
2362 }
2363
2364 return TRUE;
2365 }
2366
2367
2368 /*****************************************************************
2369 *
2370 * find_score_in_align(align, chain, asp)
2371 * align: the Seq-align point
2372 * chain: for multiple segment Seq-aligns, such as DenseDiag and
2373 * StdSeg, the order within the Seq-align
2374 * asp: the structure that records and stores the positive,
2375 * identical residues
2376 *
2377 *****************************************************************/
find_score_in_align(SeqAlignPtr align,Uint2 chain,AlignSumPtr asp)2378 NLM_EXTERN ScorePtr find_score_in_align(SeqAlignPtr align, Uint2 chain,
2379 AlignSumPtr asp)
2380 {
2381 DenseDiagPtr ddp;
2382 DenseSegPtr dsp;
2383 StdSegPtr ssp;
2384 Uint2 order = 0;
2385 SeqAlignPtr sap;
2386 ScorePtr sp;
2387
2388 if(align == NULL)
2389 return NULL;
2390
2391 if(asp != NULL) {
2392 asp->totlen = 0;
2393 asp->positive = 0;
2394 asp->identical = 0;
2395 asp->gaps = 0;
2396 }
2397 switch (align->segtype) {
2398 case 1: /*Dense-diag*/
2399 ddp = (DenseDiagPtr) align->segs;
2400 while(ddp) {
2401 ++order;
2402 if(order == chain) {
2403 if(asp != NULL)
2404 load_align_sum_for_DenseDiag(ddp, asp);
2405 return ddp->scores;
2406 }
2407 ddp = ddp->next;
2408 }
2409 break;
2410 case 2:
2411 dsp = (DenseSegPtr) align->segs;
2412 if(asp != NULL)
2413 load_align_sum_for_DenseSeg(dsp, asp);
2414 if (dsp->scores)
2415 return dsp->scores;
2416 else
2417 return align->score;
2418 case 3:
2419 ssp = (StdSegPtr) align->segs;
2420 while(ssp) {
2421 ++order;
2422 if(order == chain) {
2423 if(asp != NULL)
2424 load_align_sum_for_StdSeg(ssp, asp);
2425 if (ssp->scores)
2426 return ssp->scores;
2427 else
2428 return align->score;
2429 }
2430 ssp = ssp->next;
2431 }
2432 break;
2433 case 5: /* Discontinuous alignment */
2434 sap = (SeqAlignPtr) align->segs;
2435
2436 if((sp = find_score_in_align(sap, chain, asp)) == NULL)
2437 return align->score;
2438 else
2439 return sp;
2440 default:
2441 break;
2442 }
2443 return NULL;
2444 }
2445
2446 /*try to decide if this fit the prototype of reversing the BLASTX
2447 result to make a TBLASTN output
2448 */
reverse_blastx_order(ValNodePtr anp_list)2449 static Boolean reverse_blastx_order (ValNodePtr anp_list)
2450 {
2451 Int2 num = 0;
2452 ValNodePtr c_list;
2453 AnnotInfoPtr annot_info;
2454 Uint1 align_type = 0;
2455
2456 for(c_list = anp_list; c_list != NULL; c_list = c_list->next)
2457 {
2458 if(c_list->choice == OBJ_SEQANNOT)
2459 {
2460 annot_info = (AnnotInfoPtr) c_list->data.ptrvalue;
2461 align_type = get_alignment_type(annot_info);
2462 if(align_type != ALIGN_DNA_TO_PROT)
2463 return FALSE;
2464 }
2465 else
2466 {
2467 ++num;
2468 if(num > 2) /*only for pairwise alignment */
2469 return FALSE;
2470 }
2471 }
2472
2473 /* return (align_type == ALIGN_DNA_TO_PROT && num == 2); */
2474 return (align_type == ALIGN_DNA_TO_PROT);
2475 }
2476
2477 /*
2478 * change the alignnode of blastx to a pseudo tblastn and switch the
2479 * master sequence so that the blastx display will be the same as the
2480 * traditional blastx
2481 *
2482 */
modify_gather_range(GatherRangePtr grp,Boolean expand)2483 static void modify_gather_range (GatherRangePtr grp, Boolean expand)
2484 {
2485 Int4 len;
2486
2487 len = grp->right - grp->left + 1;
2488 if(expand)
2489 {
2490 grp->left *= 3;
2491 grp->right = grp->left + len * 3 -1;
2492 }
2493 else
2494 {
2495 grp->left /=3;
2496 grp->right = grp->left + len/3 -1;
2497 }
2498 }
2499
change_blastx_master(ValNodePtr anp_list,AlignNodePtr PNTR master_anp)2500 static Boolean change_blastx_master(ValNodePtr anp_list, AlignNodePtr PNTR master_anp)
2501 {
2502
2503 ValNodePtr c_list;
2504 AnnotInfoPtr annot_info = NULL;
2505 AlignNodePtr anp, m_anp, t_anp;
2506 AlignSegPtr asp;
2507
2508 anp = NULL;
2509 m_anp = NULL;
2510 annot_info = NULL;
2511 for(c_list = anp_list; c_list != NULL; c_list = c_list->next)
2512 {
2513 if(c_list->choice == OBJ_SEQANNOT)
2514 {
2515 annot_info = (AnnotInfoPtr) c_list->data.ptrvalue;
2516 if(annot_info != NULL &&
2517 get_alignment_type(annot_info) != ALIGN_DNA_TO_PROT)
2518 annot_info = NULL;
2519 }
2520 else
2521 {
2522 t_anp = (AlignNodePtr) c_list->data.ptrvalue;
2523 if(t_anp->is_master || t_anp == *master_anp)
2524 m_anp = t_anp;
2525 else
2526 anp = t_anp;
2527 }
2528 }
2529
2530 if(m_anp == NULL || anp == NULL || annot_info == NULL)
2531 return FALSE;
2532
2533 /*shrink the interval */
2534 for(c_list = anp_list; c_list != NULL; c_list = c_list->next)
2535 {
2536 if(c_list->choice != OBJ_SEQANNOT)
2537 {
2538 t_anp = (AlignNodePtr) c_list->data.ptrvalue;
2539 modify_gather_range (&(t_anp->extremes), FALSE);
2540 for(asp = t_anp->segs; asp != NULL; asp = asp->next)
2541 {
2542 if(asp->type != INS_SEG)
2543 modify_gather_range (&(asp->gr), FALSE);
2544 }
2545 }
2546 }
2547
2548 annot_info->blast_type = ALIGN_TBLASTN;
2549 *master_anp = anp;
2550 anp->is_master = TRUE;
2551 m_anp->is_master = FALSE;
2552 return TRUE;
2553 }
2554
get_max_feature_label(AlignNodePtr anp)2555 static Int4 get_max_feature_label (AlignNodePtr anp)
2556 {
2557 AlignSegPtr asp;
2558 Int4 len = 0, f_len;
2559 FeatNodePtr fnp;
2560 ValNodePtr vnp;
2561
2562 for(asp = anp->segs; asp != NULL; asp = asp->next)
2563 {
2564 if(asp->type != GAP_SEG && asp->type != INS_SEG)
2565 {
2566 for(vnp = asp->cnp; vnp != NULL; vnp = vnp->next)
2567 {
2568 fnp = (FeatNodePtr) vnp->data.ptrvalue;
2569 if(fnp !=NULL && fnp->label != NULL)
2570 {
2571 f_len = StringLen(fnp->label);
2572 if(f_len > len)
2573 len = f_len;
2574 }
2575 }
2576 }
2577 }
2578
2579 return len;
2580 }
2581
2582
2583 /*
2584 *
2585 * look through the list of alignnode to figure out the maximum
2586 * length required to print the coordinates of the sequence in
2587 * alignment
2588 *
2589 */
get_max_coordinates_len(ValNodePtr anp_list,Int4Ptr max_label_size)2590 static Int4 get_max_coordinates_len (ValNodePtr anp_list, Int4Ptr max_label_size)
2591 {
2592 AlignNodePtr anp;
2593 AlignSegPtr asp;
2594 Int4 max_num, seqpos;
2595 Char buf[101];
2596 Int4 flabel_len;
2597
2598 max_num = 0;
2599 *max_label_size = 0;
2600 while(anp_list)
2601 {
2602 if(anp_list->choice != OBJ_SEQANNOT)
2603 {
2604 anp = (AlignNodePtr) anp_list->data.ptrvalue;
2605 if(anp->seqpos < 0)
2606 max_num = MAX(ABS(anp->seqpos), max_num);
2607 else
2608 {
2609 seqpos = anp->seqpos - 1;
2610 for(asp = anp->segs; asp != NULL; asp = asp->next)
2611 {
2612 if(asp->type != GAP_SEG)
2613 {
2614 if(asp->type == INS_SEG)
2615 seqpos += (asp->gr.right -1);
2616 else
2617 seqpos += (asp->gr.right - asp->gr.left + 1);
2618 }
2619 }
2620 max_num = MAX(seqpos, max_num);
2621 }
2622 if(anp->label != NULL)
2623 *max_label_size = MAX(*max_label_size, (Int4)StringLen(anp->label));
2624 flabel_len = get_max_feature_label (anp);
2625 if(flabel_len > (*max_label_size))
2626 *max_label_size = flabel_len;
2627 }
2628 anp_list = anp_list->next;
2629 }
2630 buf[0] = '\0';
2631 sprintf(buf, "%ld", (long) (max_num+1));
2632 return StringLen(buf);
2633 }
2634
2635
2636
2637 /*
2638 * do the converse of change blastx_master
2639 * revert it from tblastn to blastx
2640 */
revert_blastx_alignment(ValNodePtr anp_list,AlignNodePtr master_anp)2641 static Boolean revert_blastx_alignment (ValNodePtr anp_list, AlignNodePtr master_anp)
2642 {
2643 ValNodePtr c_list;
2644 AnnotInfoPtr annot_info;
2645 AlignNodePtr t_anp;
2646 AlignSegPtr asp;
2647
2648 for(c_list = anp_list; c_list != NULL; c_list = c_list->next)
2649 {
2650 if(c_list->choice == OBJ_SEQANNOT)
2651 {
2652 annot_info = (AnnotInfoPtr) c_list->data.ptrvalue;
2653 if(annot_info != NULL &&
2654 get_alignment_type(annot_info) == ALIGN_PROT_TO_DNA)
2655 annot_info->blast_type = ALIGN_BLASTX;
2656 }
2657 else
2658 {
2659 t_anp = (AlignNodePtr) c_list->data.ptrvalue;
2660 if(t_anp == master_anp)
2661 t_anp->is_master = TRUE;
2662 else
2663 t_anp->is_master = FALSE;
2664 /*expand to the original interval */
2665 modify_gather_range (&(t_anp->extremes), TRUE);
2666 for(asp = t_anp->segs; asp != NULL; asp = asp->next)
2667 {
2668 if(asp->type != INS_SEG)
2669 modify_gather_range (&(asp->gr), TRUE);
2670 }
2671 }
2672 }
2673
2674 return TRUE;
2675 }
2676
reverse_print(FILE * fp,CharPtr doc)2677 static void reverse_print(FILE *fp, CharPtr doc)
2678 {
2679 Int4 i;
2680 CharPtr str;
2681
2682 i = 0;
2683 for(str = doc; *str != '\n' && *str != '\0'; ++str)
2684 ++i;
2685
2686 if(*str == '\n')
2687 {
2688 ++i;
2689 fprintf(fp, "%s", doc+i);
2690 *str = '\0';
2691 fprintf(fp, "%s\n", doc);
2692 *str = '\n';
2693 }
2694 else
2695 fprintf(fp, "%s", doc);
2696 }
2697
get_anp_list_for_aligntype(ValNodePtr anp_list,Uint1 align_type,Int4 left,Int4 right)2698 static ValNodePtr get_anp_list_for_aligntype(ValNodePtr anp_list, Uint1 align_type,
2699 Int4 left, Int4 right)
2700 {
2701 Uint1 c_type = 0;
2702 ValNodePtr list, prev;
2703 AlignNodePtr anp;
2704 AnnotInfoPtr annot_info;
2705 ValNodePtr curr;
2706 Boolean first;
2707 Boolean extract;
2708
2709 if(anp_list == NULL)
2710 return NULL;
2711
2712
2713 list = NULL;
2714 extract = (align_type == 0);
2715 prev = NULL;
2716 first = TRUE;
2717 annot_info = NULL;
2718 while(anp_list != NULL)
2719 {
2720 if(anp_list->choice == OBJ_SEQANNOT)
2721 {
2722 annot_info = (AnnotInfoPtr) anp_list->data.ptrvalue;
2723 c_type = get_alignment_type(annot_info);
2724 extract = (c_type == align_type);
2725 first = TRUE;
2726 }
2727 else if(extract)
2728 {
2729 anp = (AlignNodePtr) anp_list->data.ptrvalue;
2730 if(!anp->is_master)
2731 {
2732 /*check the first alignnode to see if it is legal*/
2733 if(first)
2734 {
2735 if(!PrintAlignForText(annot_info, anp))
2736 extract = FALSE;
2737 first = FALSE;
2738 }
2739 if(extract &&
2740 !(anp->extremes.left > right || anp->extremes.right < left))
2741 {
2742 curr = ValNodeNew(NULL);
2743 curr->data.ptrvalue = anp_list;
2744 if(prev)
2745 prev->next = curr;
2746 else
2747 list = curr;
2748 prev = curr;
2749 }
2750 }
2751 }
2752 anp_list = anp_list->next;
2753 }
2754
2755 return list;
2756 }
2757
modify_separation_bar(CharPtr buf,Int4 size,Int1 frame)2758 static Boolean modify_separation_bar(CharPtr buf, Int4 size, Int1 frame)
2759 {
2760 Char temp[21];
2761 Int4 len, start;
2762 Int4 i;
2763
2764 sprintf(temp, "BLASTX: frame = %d", frame);
2765 len = StringLen(temp);
2766 start = (size - len)/2;
2767 if(start < 0)
2768 return FALSE;
2769 else
2770 {
2771 for(i = 0; i<len; ++i)
2772 {
2773 buf[start+i] = temp[i];
2774 }
2775 return TRUE;
2776 }
2777 }
2778
2779
2780
2781 /*
2782 *
2783 * for hardline old blast users who prefer to see the label as Sbjct/Query
2784 *
2785 */
convert_label_to_query_subject(ValNodePtr anp_list)2786 static void convert_label_to_query_subject (ValNodePtr anp_list)
2787 {
2788 AlignNodePtr anp;
2789 Boolean first = TRUE;
2790
2791 while(anp_list)
2792 {
2793 if(anp_list->choice != OBJ_SEQANNOT)
2794 {
2795 anp = (AlignNodePtr) anp_list->data.ptrvalue;
2796 if(anp->is_master)
2797 {
2798 MemFree(anp->label);
2799 anp->label = StringSave("Query:");
2800 first = FALSE;
2801 }
2802 else
2803 {
2804 MemFree(anp->label);
2805 if(first)
2806 anp->label = StringSave("Query:");
2807 else
2808 anp->label = StringSave("Sbjct:");
2809 first = FALSE;
2810 }
2811 }
2812 anp_list = anp_list->next;
2813 }
2814 }
2815
2816 /*
2817 * request from Detlef: convert the sequence label to gi
2818 *
2819 */
convert_label_to_gi(ValNodePtr anp_list)2820 static void convert_label_to_gi(ValNodePtr anp_list)
2821 {
2822 AlignNodePtr anp;
2823 Char temp[101];
2824
2825 while(anp_list)
2826 {
2827 if(anp_list->choice != OBJ_SEQANNOT)
2828 {
2829 anp = (AlignNodePtr) anp_list->data.ptrvalue;
2830 if(anp && anp->sip && anp->sip->choice == SEQID_GI &&
2831 !anp->keep_label)
2832 {
2833 sprintf(temp, "%ld", (long) anp->sip->data.intvalue);
2834 if(anp->label != NULL)
2835 MemFree(anp->label);
2836 anp->label = StringSave(temp);
2837 }
2838 }
2839 anp_list = anp_list->next;
2840 }
2841 }
2842
2843 /*
2844 *
2845 * for the display of tblastx, only one Seq-annot is allowed at
2846 * any given time
2847 *
2848 */
illegal_tblastx_anp(ValNodePtr anp_list,BoolPtr has_tblastx)2849 static Boolean illegal_tblastx_anp (ValNodePtr anp_list, BoolPtr has_tblastx)
2850 {
2851 AnnotInfoPtr info;
2852 Int2 info_num = 0;
2853
2854 *has_tblastx = FALSE;
2855
2856 while(anp_list)
2857 {
2858 if(anp_list->choice == OBJ_SEQANNOT)
2859 {
2860 info = (AnnotInfoPtr) anp_list->data.ptrvalue;
2861 if(info->blast_type == ALIGN_TBLASTX)
2862 *has_tblastx = TRUE;
2863 ++info_num;
2864 }
2865 anp_list = anp_list->next;
2866 }
2867
2868 if(*has_tblastx && info_num > 1)
2869 return TRUE;
2870 else
2871 return FALSE;
2872 }
2873
expand_position(Int4 pos,Int4 exp_val,Boolean inverse)2874 static Int4 expand_position(Int4 pos, Int4 exp_val, Boolean inverse)
2875 {
2876 return inverse ? (pos/exp_val) : pos*exp_val;
2877 }
2878
2879
modify_tblastx_value(ValNodePtr anp_list,Int4 val,Boolean inverse)2880 static void modify_tblastx_value (ValNodePtr anp_list, Int4 val, Boolean inverse)
2881 {
2882 AlignNodePtr anp;
2883 AlignSegPtr asp;
2884 Int4 left;
2885
2886 while(anp_list)
2887 {
2888 if(anp_list->choice != OBJ_SEQANNOT)
2889 {
2890 anp = (AlignNodePtr) anp_list->data.ptrvalue;
2891 left = anp->extremes.left;
2892 /* anp->extremes.left = expand_position(anp->extremes.left, val, inverse); */
2893 anp->extremes.right = left + expand_position((anp->extremes.right -left), val, inverse);
2894 for(asp = anp->segs; asp != NULL; asp = asp->next)
2895 {
2896 if(asp->type == INS_SEG)
2897 {
2898 asp->ins_pos = left + expand_position((asp->ins_pos -left), val, inverse);
2899 asp->gr.right = expand_position(asp->gr.right, val, inverse);
2900 }
2901 else
2902 {
2903 asp->gr.left = left + expand_position(asp->gr.left - left , val, inverse);
2904 asp->gr.right = left + expand_position(asp->gr.right - left, val, inverse);
2905 }
2906 }
2907 }
2908
2909 anp_list = anp_list->next;
2910 }
2911 }
2912
2913 static
2914 SeqIdPtr
get_seq_id(SeqAlignPtr sap,Int2 index)2915 get_seq_id(SeqAlignPtr sap, Int2 index)
2916 {
2917 SeqIdPtr sip = NULL;
2918 if (sap->segtype == 1) {
2919 DenseDiagPtr ddp = (DenseDiagPtr)sap->segs;
2920 sip = ddp->id;
2921 }
2922 else if (sap->segtype == 2) {
2923 DenseSegPtr dsp = (DenseSegPtr)sap->segs;
2924 sip = dsp->ids;
2925 }
2926 for (; sip != NULL && --index >= 0; sip = sip->next);
2927 return sip;
2928 }
2929
2930 /***********************************************************************
2931 *
2932 * ShowAlignNodeText(anp_list, num_node, line_len, locus,
2933 * fp)
2934 * convert the alignment data in the list of AlignNode into text written
2935 * to a file
2936 * anp_list: a list (ValNodePtr) of AlignNode processed from Seq-aligns
2937 * num_node: the number of AlignNode to be processed currently. It can
2938 * be used in the cases where only the top num_node in the anp_list is
2939 * going to be processed. This can be useful to make vertically cashed
2940 * buffer
2941 * line_len: the length of sequence char per line
2942 * locus: if TRUE, show the locus name
2943 * fp: the file Pointer
2944 * left: the leftmost position for display
2945 * right: the rightmost position for display
2946 * align_type: the type of alignment. DNA-protein alignment?
2947 *
2948 * return TRUE for success, FALSE for fail
2949 *
2950 ************************************************************************/
ShowAlignNodeText(ValNodePtr anp_list,Int2 num_node,Int4 line_len,FILE * fp,Int4 left,Int4 right,Uint4 option,Int4Ptr PNTR matrix,int (LIBCALLBACK * fmt_score_func)PROTO ((AlignStatOptionPtr)))2951 NLM_EXTERN Boolean ShowAlignNodeText(ValNodePtr anp_list, Int2 num_node, Int4 line_len, FILE *fp, Int4 left, Int4 right, Uint4 option, Int4Ptr PNTR matrix, int (LIBCALLBACK *fmt_score_func)PROTO((AlignStatOptionPtr)))
2952 {
2953 return ShowAlignNodeText2Ex(anp_list, num_node, line_len, fp, left, right,
2954 option, matrix, fmt_score_func, NULL, NULL, NULL, NULL);
2955 }
2956
ShowAlignNodeText2(ValNodePtr anp_list,Int2 num_node,Int4 line_len,FILE * fp,Int4 left,Int4 right,Uint4 option,Int4Ptr PNTR matrix,int (LIBCALLBACK * fmt_score_func)PROTO ((AlignStatOptionPtr)),CharPtr db_name,CharPtr blast_type,Int4Ptr PNTR posMatrix)2957 NLM_EXTERN Boolean ShowAlignNodeText2(ValNodePtr anp_list, Int2 num_node, Int4 line_len, FILE *fp, Int4 left, Int4 right, Uint4 option, Int4Ptr PNTR matrix, int (LIBCALLBACK *fmt_score_func)PROTO((AlignStatOptionPtr)), CharPtr db_name, CharPtr blast_type, Int4Ptr PNTR posMatrix)
2958 {
2959 return ShowAlignNodeText2Ex(anp_list, num_node, line_len, fp, left, right, option, matrix, fmt_score_func, db_name, blast_type, posMatrix, NULL);
2960 }
2961
2962 /**
2963 * transforms a string so that it becomes safe to be used as part of URL
2964 * the function converts characters with special meaning (such as
2965 * semicolon -- protocol separator) to escaped hexadecimal (%xx)
2966 */
2967 static
2968 CharPtr
MakeURLSafe(CharPtr src)2969 MakeURLSafe(CharPtr src)
2970 {
2971 static Char HEXDIGS[] = "0123456789ABCDEF";
2972 CharPtr buf;
2973 size_t len;
2974 CharPtr p;
2975 Char c;
2976
2977 if (src == NULL) {
2978 return NULL;
2979 }
2980 /* first pass to calculate required buffer size */
2981 for (p = src, len = 0; (c = *(p++)) != '\0'; ) {
2982 switch (c) {
2983 default:
2984 if (c < '0' || (c > '9' && c < 'A') ||
2985 (c > 'Z' && c < 'a') || c > 'z') {
2986 len += 3;
2987 break;
2988 }
2989 case '-': case '_': case '.': case '!': case '~':
2990 case '*': case '\'': case '(': case ')':
2991 ++len;
2992 }
2993 }
2994 buf = (CharPtr)MemNew(len + 1);
2995 /* second pass -- conversion */
2996 for (p = buf; (c = *(src++)) != '\0'; ) {
2997 switch (c) {
2998 default:
2999 if (c < '0' || (c > '9' && c < 'A') ||
3000 (c > 'Z' && c < 'a') || c > 'z') {
3001 *(p++) = '%';
3002 *(p++) = HEXDIGS[(c >> 4) & 0xf];
3003 *(p++) = HEXDIGS[c & 0xf];
3004 break;
3005 }
3006 case '-': case '_': case '.': case '!': case '~':
3007 case '*': case '\'': case '(': case ')':
3008 *(p++) = c;
3009 }
3010 }
3011 *p = '\0';
3012 return buf;
3013 }
3014
ShowAlignNodeText2Ex(ValNodePtr anp_list,Int2 num_node,Int4 line_len,FILE * fp,Int4 left,Int4 right,Uint4 option,Int4Ptr PNTR matrix,int (LIBCALLBACK * fmt_score_func)PROTO ((AlignStatOptionPtr)),CharPtr db_name,CharPtr blast_type,Int4Ptr PNTR posMatrix,SeqAlignPtr PNTR last_align)3015 static Boolean ShowAlignNodeText2Ex(ValNodePtr anp_list, Int2 num_node, Int4 line_len, FILE *fp, Int4 left, Int4 right, Uint4 option, Int4Ptr PNTR matrix, int (LIBCALLBACK *fmt_score_func)PROTO((AlignStatOptionPtr)), CharPtr db_name, CharPtr blast_type, Int4Ptr PNTR posMatrix, SeqAlignPtr PNTR last_align)
3016 {
3017 CharPtr bar, sep_bar;
3018 CharPtr num_str;
3019 Int4 i, j;
3020 Int4 num;
3021
3022 Int4 c_start, c_stop;
3023 CharPtr m_buf, cm_buf; /*text for the master sequence*/
3024 BioseqPtr m_bsp;
3025 Int4 m_len; /*length of the master sequence*/
3026
3027 ValNodePtr list; /*list of DrawText*/
3028 AlignNodePtr anp, master_anp;
3029 Int4Ptr p_stop;
3030 Boolean is_end, strip_semicolon=TRUE;
3031 CharPtr docbuf, master_docbuf;
3032 Uint1 all_frame[6];
3033 SeqFeatPtr fake_cds;
3034 Int1 frame;
3035 Uint1 align_type;
3036 ValNodePtr curr;
3037 ValNodePtr c_list, PNTR pc_list;
3038 ValNodePtr a_list;
3039 Boolean is_html;
3040 Int4Ptr PNTR t_matrix;
3041 Boolean compress;
3042 Int4 last_pos;
3043 Boolean load_last_pos;
3044 ValNodePtr id_list;
3045 BioseqPtr bsp;
3046 SeqAlignPtr align;
3047 ScorePtr sp;
3048 AlignStatOption aso;
3049 AlignSum as;
3050 AlignSumPtr asp;
3051 Boolean reverse_display;
3052 Boolean has_data;
3053 Boolean show_score;
3054 Int4 max_label_size;
3055 Int4 max_num_size;
3056 Int4 empty_space;
3057 Boolean show_strand;
3058 Boolean has_tblastx;
3059 SeqIdPtr already_linked=NULL;
3060
3061
3062 if(anp_list == NULL)
3063 return FALSE;
3064
3065 /*for tblastx, only one Seq-annot at time*/
3066 if(illegal_tblastx_anp (anp_list, &has_tblastx))
3067 return FALSE;
3068
3069 /*for alignment that is not a same-molecule, needs to have a master*/
3070 master_anp = get_master_align_node(anp_list);
3071 if(master_anp == NULL) {
3072 Message(MSG_ERROR, "Fail to the master AlignNode");
3073 return FALSE;
3074 }
3075 is_html = (Boolean)(option & TXALIGN_HTML);
3076 load_last_pos = (Boolean)(option & TXALIGN_END_NUM);
3077
3078
3079 /* for hard line old blast user only!!!!! */
3080 if(option & TXALIGN_SHOW_QS)
3081 convert_label_to_query_subject (anp_list);
3082 else if(option & TXALIGN_SHOW_GI)
3083 convert_label_to_gi(anp_list);
3084
3085 compress = (Boolean)(option & TXALIGN_COMPRESS);
3086 if(compress)
3087 max_num_size = get_max_coordinates_len (anp_list, &max_label_size);
3088 else {
3089 max_num_size = POS_SPACE;
3090 max_label_size = B_SPACE;
3091 }
3092
3093 /* for display of the traditional regular blastX output */
3094 reverse_display = FALSE;
3095 if(option & TXALIGN_BLASTX_SPECIAL) {
3096 reverse_display = reverse_blastx_order (anp_list);
3097 if(reverse_display)
3098 change_blastx_master(anp_list, &master_anp);
3099 }
3100
3101 if(has_tblastx)
3102 modify_tblastx_value (anp_list, 3, TRUE);
3103 if(left == -1)
3104 left = master_anp->extremes.left;
3105 if(right == -1)
3106 right = master_anp->extremes.right;
3107 if(left > master_anp->extremes.right || right < master_anp->extremes.left) {
3108 if(reverse_display)
3109 revert_blastx_alignment (anp_list, master_anp);
3110 return FALSE;
3111 }
3112
3113 /*check for the molecule type of not-normal DNA-protein alignment*/
3114 fake_cds = NULL;
3115 frame = 0;
3116 m_bsp = BioseqLockById(master_anp->sip);
3117 if(m_bsp == NULL) {
3118 if(reverse_display)
3119 revert_blastx_alignment (anp_list, master_anp);
3120 return FALSE;
3121 }
3122
3123 if(m_bsp->mol!= Seq_mol_aa) { /*a nucleotide sequence*/
3124 m_len = m_bsp->length;
3125 fake_cds = make_fake_cds(m_bsp, 0, m_bsp->length-1, Seq_strand_plus);
3126 load_master_translate_frame(anp_list, m_len, m_bsp);
3127 }
3128
3129
3130 ObjMgrSetHold();
3131 left = MAX(left, master_anp->extremes.left);
3132 right = MIN(right, master_anp->extremes.right);
3133
3134
3135 for(curr=anp_list, i=0; curr!=NULL; curr= curr->next) { /*initiate the position*/
3136
3137 if(curr->choice != OBJ_SEQANNOT) {
3138 anp = (AlignNodePtr) curr->data.ptrvalue;
3139 anp->align_num = i;
3140 ++i;
3141 }
3142 }
3143 p_stop = (Int4Ptr) MemNew((size_t)(i) * sizeof(Int4));
3144 for(j=0; j<i; ++j)
3145 p_stop[j] = -1;
3146 if(compress) {
3147 empty_space = max_num_size + 1 + max_label_size + 1;
3148 if(option & TXALIGN_SHOW_STRAND) {
3149 empty_space += STRAND_SPACE;
3150 show_strand = TRUE;
3151 } else
3152 show_strand = FALSE;
3153 } else {
3154 empty_space = get_num_empty_space(compress);
3155 show_strand = TRUE;
3156 }
3157
3158 make_scale_bar_str(&bar, &num_str, compress? empty_space : empty_space+1, line_len);
3159 num = line_len + empty_space;
3160 sep_bar = (CharPtr) MemGet((size_t)(num+1)*sizeof(Char), MGET_ERRPOST);
3161 MemSet((Pointer)sep_bar, '-',(size_t)num* sizeof(Char));
3162 sep_bar[num] = '\0';
3163
3164 if(is_html)
3165 fprintf(fp, "<PRE>\n");
3166 c_start = left;
3167
3168 /* If a tool_url is set, then we use this rather than Entrez. */
3169 if (blast_type && !(option & TXALIGN_NO_ENTREZ))
3170 {
3171 Char tool_url[128];
3172
3173 *tool_url = NULLB;
3174 GetAppParam("NCBI", blast_type, "TOOL_URL", "", tool_url, sizeof(tool_url));
3175 /* Do use Entrez if available if the tool is dumpgnl.cgi as it does not handle gi's. */
3176 if (*tool_url != NULLB && StringStr(tool_url, "dumpgnl.cgi") == NULL)
3177 option |= TXALIGN_NO_ENTREZ;
3178 }
3179
3180 MemSet(&aso, '\0', sizeof(AlignStatOption));
3181
3182 /*format the summary for the score */
3183 if(fmt_score_func != NULL) {
3184 aso.txalign_options = option;
3185 aso.indent_len = (Int2)empty_space;
3186 aso.line_len = (Int2)(line_len + empty_space);
3187 aso.html_hot_link_relative = FALSE;
3188 if (option & TXALIGN_NO_ENTREZ)
3189 aso.no_entrez = TRUE;
3190 else
3191 aso.no_entrez = FALSE;
3192
3193 if (option & TXALIGN_NO_DUMPGNL)
3194 aso.no_dumpgnl = TRUE;
3195 else
3196 aso.no_dumpgnl = FALSE;
3197
3198 if (option & TXALIGN_HTML) {
3199 aso.html_hot_link = TRUE;
3200 if (option & TXALIGN_HTML_RELATIVE)
3201 aso.html_hot_link_relative = TRUE;
3202 } else {
3203 aso.html_hot_link = FALSE;
3204 }
3205 if (option & TXALIGN_SHOW_GI)
3206 aso.show_gi = TRUE;
3207 else
3208 aso.show_gi = FALSE;
3209 aso.fp = fp;
3210 aso.buf = NULL;
3211 id_list = NULL;
3212 aso.segs = NULL;
3213 if (blast_type)
3214 {
3215 aso.blast_type = StringSave(blast_type);
3216 StringUpper(aso.blast_type);
3217 }
3218 else
3219 {
3220 aso.blast_type = NULL;
3221 }
3222 for(curr = anp_list; curr != NULL; curr = curr->next) {
3223 if(curr->choice != OBJ_SEQANNOT) {
3224 anp = (AlignNodePtr) curr->data.ptrvalue;
3225 show_score = FALSE;
3226 if((reverse_display && anp == master_anp) || (!reverse_display && anp != master_anp)) {
3227 if(!check_bsp_id(&id_list, anp->sip))
3228 show_score = TRUE;
3229 }
3230 if(show_score) {
3231 /*the first time it sees the Bioseq*/
3232 bsp = BioseqLockById(anp->sip);
3233 align = NULL;
3234 /* Use gather for translated searches and ungapped blast. */
3235 if(!has_tblastx && last_align && *last_align && (*last_align)->segtype == 2 && (*last_align)->next)
3236 {
3237 align = (*last_align)->next;
3238 }
3239 else
3240 {
3241 align = NULL;
3242 GatherItem(anp->entityID, anp->itemID, (Uint2)(curr->choice), (Pointer)(&align), find_align_proc);
3243 }
3244 if (last_align)
3245 *last_align = align;
3246 if(align != NULL) {
3247 if(align->segtype == 1 || align->segtype == 2 || align->segtype == 3 || align->segtype == 5) {
3248 as.matrix = matrix;
3249 as.posMatrix = posMatrix;
3250 as.master_sip = master_anp->sip;
3251 as.target_sip = anp->sip;
3252 as.is_aa = (m_bsp->mol == Seq_mol_aa);
3253 as.ooframe = FALSE; /* Not supported */
3254 as.m_frame_set = FALSE;
3255 as.t_frame_set = FALSE;
3256 asp = &as;
3257 } else
3258 asp = NULL;
3259 sp = find_score_in_align(align, anp->chain, asp);
3260 if(sp != NULL) {
3261 aso.follower = anp->follower;
3262 aso.bsp = bsp;
3263 aso.sp = sp;
3264 aso.db_name = db_name;
3265 if(asp != NULL) {
3266 aso.gaps = asp->gaps;
3267 aso.positive = asp->positive;
3268 aso.identical = asp->identical;
3269 aso.align_len = asp->totlen;
3270
3271 /* This information was added for links to
3272 specific alignment only, but may be used
3273 for something else */
3274
3275 aso.master_from = asp->master_from;
3276 aso.master_to = asp->master_to;
3277 aso.target_from = asp->target_from;
3278 aso.target_to = asp->target_to;
3279
3280
3281 if (asp->m_frame_set) {
3282 aso.m_frame = asp->m_frame;
3283 } else {
3284 aso.m_frame = 255;
3285 }
3286
3287 if (asp->t_frame_set) {
3288 aso.t_frame = asp->t_frame;
3289 } else {
3290 aso.t_frame = 255;
3291 }
3292
3293 aso.m_strand = asp->m_strand;
3294 aso.t_strand = asp->t_strand;
3295 } else {
3296 aso.align_len = 0;
3297 }
3298
3299 aso.segs = NULL;
3300 if (aso.follower == FALSE)
3301 {
3302 SeqAlignPtr sap;
3303 size_t size = 0;
3304 size_t used = 0;
3305
3306 for (sap = align; sap != NULL; sap = sap->next) {
3307 if (SeqIdMatch(TxGetSubjectIdFromSeqAlign(align), TxGetSubjectIdFromSeqAlign(sap))) {
3308 if (aso.segs != NULL) {
3309 StringAppend(&aso.segs, &size, ",", &used);
3310 }
3311 SeqAlignSegsStr(sap, 1, &aso.segs, &size, &used);
3312 } else
3313 break;
3314 }
3315 if (aso.segs == NULL) {
3316 /**
3317 * Something is really wrong if we're here
3318 */
3319 aso.segs = StringSave("");
3320 }
3321 }
3322 fmt_score_func(&aso);
3323 /* Print seqids of other sequences in a cluster if clustering of hits
3324 has been done */
3325 if (!aso.follower) {
3326 BioseqPtr bsp;
3327 SeqIdPtr sip, sip_head;
3328 Char buffer[BUFFER_LENGTH+1]/*, line[BUFFER_LENGTH+1]*/;
3329 Int4 buf_len, gi, index;
3330 CharPtr title;
3331 Char HTML_dopt[8], HTML_database[11];
3332 Char HTML_buffer[BUFFER_LENGTH+1];
3333
3334 /* Cluster sequences ids are saved in align->master (kludge) */
3335 sip_head = align->master;
3336 if (sip_head) {
3337 for (sip=sip_head, index=0; sip; sip = sip->next, index++);
3338 fprintf(fp, " Other sequences in the cluster (%d total)\n", index);
3339 for (sip = sip_head; sip; sip = sip->next) {
3340 /* SeqIds of other sequences in a cluster are printed here */
3341 bsp = BioseqLockById(sip);
3342 SeqIdWrite(bsp->id, buffer, PRINTID_FASTA_LONG, BUFFER_LENGTH);
3343 buf_len = StrLen(buffer);
3344 title = (CharPtr) Malloc((num-buf_len+1)*sizeof(Char));
3345 sprintf(title, "%.*s", num-buf_len, BioseqGetTitle(bsp));
3346
3347 if (ISA_na(bsp->mol)) {
3348 StringCpy(HTML_dopt, "GenBank");
3349 StringCpy(HTML_database, "Nucleotide");
3350 } else {
3351 StringCpy(HTML_dopt, "GenPept");
3352 StringCpy(HTML_database, "Protein");
3353 }
3354
3355 if (bsp->id->choice == SEQID_GI) {
3356 gi = bsp->id->data.intvalue;
3357 sprintf(HTML_buffer,
3358 "<a href=\"%s?cmd=Retrieve&db=%s&list_uids=%08ld&dopt=%s\" %s>",
3359 NEW_ENTREZ_HREF, HTML_database, (long) gi, HTML_dopt,
3360 option & TXALIGN_TARGET_IN_LINKS ? "TARGET=\"EntrezView\"" : "");
3361 fprintf(fp, " + %s%s </a> %s\n", HTML_buffer, buffer, title);
3362 } else {
3363 fprintf(fp, " + %s </a> %s\n", buffer, title);
3364 }
3365 BioseqUnlock(bsp);
3366 }
3367 fprintf(fp, "\n\n");
3368 }
3369 if (option & TXALIGN_BL2SEQ_LINK) {
3370 CharPtr id1, id2;
3371 Char buffer[BUFFER_LENGTH+1];
3372 BioseqPtr bsp;
3373
3374 bsp = BioseqLockById(asp->master_sip);
3375
3376 SeqIdWrite(SeqIdFindBest(bsp->id, SEQID_GI), buffer, PRINTID_FASTA_SHORT, BUFFER_LENGTH);
3377 id1 = MakeURLSafe(buffer);
3378 BioseqUnlock(bsp);
3379
3380 SeqIdWrite(SeqIdFindBest(asp->target_sip, SEQID_GI), buffer, PRINTID_FASTA_SHORT, BUFFER_LENGTH);
3381 id2 = MakeURLSafe(buffer);
3382
3383 fprintf(fp, "<A HREF=%s?PROGRAM=tblastx&WORD=3&RID=%s&ONE=%s&TWO=%s> Get TBLASTX alignments </A>\n",
3384 WBLAST2_HREF, RID_glb, id1, id2);
3385 }
3386
3387 }
3388
3389 aso.segs = (CharPtr) MemFree(aso.segs);
3390 }
3391 }
3392 if(bsp != NULL)
3393 BioseqUnlock(bsp);
3394 }
3395 }
3396 }
3397 aso.blast_type = (CharPtr) MemFree(aso.blast_type);
3398 ValNodeFree(id_list);
3399 }
3400
3401 pc_list = (ValNodePtr *) MemNew((size_t)(ALIGN_MAX_TYPE +1) * sizeof(ValNodePtr));
3402 has_data = FALSE;
3403
3404 for(i = 0; i<=ALIGN_MAX_TYPE; ++i) {
3405 pc_list[i]= get_anp_list_for_aligntype(anp_list, (Uint1)i, left, right);
3406 if(pc_list[i] != NULL)
3407 has_data = TRUE;
3408 }
3409
3410 if(option & TXALIGN_SHOW_QS) {
3411 is_html = FALSE;
3412 strip_semicolon = FALSE;
3413 }
3414 master_docbuf = NULL;
3415 if(has_data) {
3416 while(c_start <= right) { /*process line by line*/
3417
3418 c_stop = MIN(right, (c_start+line_len -1));
3419 m_buf = NULL;
3420 is_end = FALSE;
3421 docbuf = NULL;
3422
3423 /*process the master sequence*/
3424 if(has_tblastx)
3425 list = ProcessTextAlignNode(master_anp, c_start, c_stop, &(p_stop[master_anp->align_num]), NULL, line_len, -1, option, matrix);
3426 else
3427 list = ProcessTextAlignNode(master_anp, c_start, c_stop, &(p_stop[master_anp->align_num]), NULL, line_len, 0, option, NULL);
3428 if(list != NULL) {
3429 if(option & TXALIGN_SHOW_RULER) {
3430 fprintf(fp, "%s\n", num_str); /*show scale*/
3431 fprintf(fp, "%s\n", bar);
3432 }
3433 last_pos = load_last_pos ? p_stop[master_anp->align_num] : -1;
3434 docbuf = DrawTextToBuffer(list, &m_buf, is_html, max_label_size, max_num_size, compress, matrix, last_pos, line_len, show_strand, strip_semicolon, &already_linked, option);
3435 if(docbuf !=NULL) {
3436 if(reverse_display)
3437 master_docbuf = docbuf;
3438 else {
3439 fprintf(fp, "%s", docbuf);
3440 MemFree(docbuf);
3441 }
3442 }
3443 list = FreeTextAlignList(list);
3444 }
3445
3446 for(align_type = 0; align_type <= ALIGN_MAX_TYPE; ++align_type) {
3447 c_list = pc_list[align_type];
3448 if(c_list != NULL) {
3449 if(align_type == ALIGN_DNA_TO_PROT) {
3450 /*process the hit protein sequence*/
3451 if(get_current_master_frame(c_list, c_start, c_stop, all_frame)) {
3452 list = NULL;
3453 for(j = 0; j<6; ++j) {
3454 frame = all_frame[j];
3455 if(frame > 0) {
3456 /*translate the master sequence in the specified frame*/
3457 cm_buf = translate_faked_cds(fake_cds, frame, c_start, c_stop, m_len, master_anp);
3458 list = load_fake_protein_buf(cm_buf, frame, master_anp);
3459 docbuf = DrawTextToBuffer(list, NULL, is_html, max_label_size, max_num_size, compress, matrix, -1, line_len, show_strand, strip_semicolon, &already_linked, option);
3460 if(docbuf != NULL) {
3461 modify_separation_bar(sep_bar, num, frame);
3462 fprintf(fp, "%s\n", sep_bar);
3463 fprintf(fp, "%s", docbuf);
3464 MemFree(docbuf);
3465 }
3466 FreeTextAlignList(list);
3467
3468 for(curr = c_list; curr != NULL; curr = curr->next) {
3469 a_list = (ValNodePtr) curr->data.ptrvalue;
3470 anp = (AlignNodePtr) a_list->data.ptrvalue;
3471 if(anp != master_anp) {
3472 list = ProcessTextAlignNode(anp, c_start, c_stop, &(p_stop[anp->align_num]), cm_buf, line_len, frame, option, matrix);
3473 if(list != NULL) {
3474 docbuf = DrawTextToBuffer(list, NULL, is_html, max_label_size, max_num_size, compress, matrix, -1, line_len, show_strand, strip_semicolon, &already_linked, option);
3475 if(docbuf != NULL) {
3476 fprintf(fp, "%s", docbuf);
3477 MemFree(docbuf);
3478 }
3479 FreeTextAlignList(list);
3480 }
3481 }
3482 }
3483 MemFree(cm_buf);
3484 } /*end of frame > 0 */
3485 }
3486 }
3487 } else {
3488 if(align_type == ALIGN_PROT_TO_DNA || align_type == ALIGN_TDNA_TO_TDNA)
3489 frame = -1;
3490 else
3491 frame = 0;
3492 if(frame == 0 && m_bsp->mol != Seq_mol_aa)
3493 t_matrix = NULL;
3494 else
3495 t_matrix = matrix;
3496 is_end = FALSE;
3497 for(curr = c_list; curr !=NULL; curr = curr->next) {
3498 a_list = (ValNodePtr) curr->data.ptrvalue;
3499 anp = (AlignNodePtr) a_list->data.ptrvalue;
3500 if(anp != master_anp) {
3501 /*generate the DrawText buffer*/
3502 if(align_type == ALIGN_NORMAL && m_bsp->mol != Seq_mol_aa)
3503 /*DNA alignment */
3504 list = ProcessTextAlignNode(anp, c_start, c_stop, &(p_stop[anp->align_num]), m_buf, line_len, frame, option, NULL);
3505 else
3506 list = ProcessTextAlignNode2(anp, c_start, c_stop, &(p_stop[anp->align_num]), m_buf, line_len, frame, option, t_matrix, posMatrix, master_anp->seqpos);
3507
3508 last_pos = load_last_pos ? p_stop[anp->align_num] : -1;
3509 if(list != NULL) {
3510 /*DrawTextList(list, fp);*/
3511 docbuf = DrawTextToBuffer(list, NULL, is_html, max_label_size, max_num_size, compress, t_matrix, last_pos, line_len, show_strand, strip_semicolon, &already_linked, option);
3512 if(docbuf !=NULL) {
3513 if(reverse_display) {
3514 reverse_print(fp, docbuf);
3515 fprintf(fp, "%s", master_docbuf);
3516 MemFree(master_docbuf);
3517 } else
3518 fprintf(fp, "%s", docbuf);
3519 MemFree(docbuf);
3520 }
3521 list = FreeTextAlignList(list);
3522 }
3523 }
3524 }
3525 } /*end of else*/
3526 }
3527 }
3528
3529 if(m_buf != NULL)
3530 MemFree(m_buf);
3531 if(c_stop < right)
3532 fprintf(fp, "\n");
3533 c_start = c_stop+1;
3534 }
3535 }
3536 for(i = 0; i<=ALIGN_MAX_TYPE; ++i) {
3537 if(pc_list[i] != NULL)
3538 ValNodeFree(pc_list[i]);
3539 }
3540 MemFree(pc_list);
3541
3542 if(option & TXALIGN_HTML)
3543 fprintf(fp, "</PRE>\n");
3544
3545 already_linked = ValNodeFree(already_linked);
3546
3547 if(fake_cds != NULL)
3548 SeqFeatFree(fake_cds);
3549 BioseqUnlock(m_bsp);
3550 if(has_tblastx)
3551 modify_tblastx_value (anp_list, 3, FALSE);
3552 MemFree(num_str);
3553 MemFree(sep_bar);
3554 MemFree(bar);
3555 MemFree(p_stop);
3556 ObjMgrClearHold();
3557 if(reverse_display)
3558 revert_blastx_alignment (anp_list, master_anp);
3559 return has_data;
3560 }
3561
3562 /*
3563 Adds tdsp to the end of a chain of TxDfLineStructPtr's.
3564 Returns the new TxDfLineStructPtr.
3565 */
3566
3567 static TxDfLineStructPtr
TxDfLineStructAdd(TxDfLineStructPtr PNTR head,TxDfLineStructPtr tdsp)3568 TxDfLineStructAdd(TxDfLineStructPtr PNTR head, TxDfLineStructPtr tdsp)
3569
3570 {
3571 TxDfLineStructPtr var;
3572
3573 if (*head == NULL)
3574 {
3575 *head = tdsp;
3576 }
3577 else
3578 {
3579 var = *head;
3580 while (var->next)
3581 {
3582 var = var->next;
3583 }
3584 var->next = tdsp;
3585 }
3586
3587 return tdsp;
3588 }
3589
3590 /*
3591 Filters the FASTA definition lines based on the SeqIdPtr's given
3592 as input. The gi_list is used to pull sequences out of the BioseqPtr,
3593 The buffer_id contains FASTA formatted ID, title contains the rest
3594 of the title.
3595 */
3596
3597 NLM_EXTERN Boolean LIBCALL
FilterTheDefline(BioseqPtr bsp,SeqIdPtr gi_list_head,CharPtr buffer_id,Int4 buffer_id_length,CharPtr PNTR titlepp)3598 FilterTheDefline (BioseqPtr bsp, SeqIdPtr gi_list_head, CharPtr buffer_id, Int4 buffer_id_length, CharPtr PNTR titlepp)
3599
3600 {
3601 Boolean first_time, found_gi, found_first_gi, not_done;
3602 CharPtr bsp_title, bsp_title_ptr, title, title_ptr;
3603 Char buffer[BUFFER_LENGTH], id_buf[255];
3604 Int4 index;
3605 SeqIdPtr gi_list, sip;
3606
3607 if (bsp == NULL || gi_list_head == NULL)
3608 return FALSE;
3609
3610 bsp_title = BioseqGetTitle(bsp);
3611 bsp_title_ptr = bsp_title;
3612 /* This is the longest it could be, this could be done more efficiently. */
3613 title = (CharPtr) MemNew((256+StringLen(bsp_title))*sizeof(Char));
3614 title_ptr = title;
3615 *titlepp = title;
3616
3617 /*
3618 if (bsp_title_ptr == NULL)
3619 return FALSE;
3620 */
3621
3622 first_time = TRUE;
3623 found_first_gi = TRUE;
3624 not_done = TRUE;
3625 while (not_done) {
3626 if (!first_time) {
3627 index=0;
3628 id_buf[0] = NULLB;
3629 if (bsp_title_ptr) {
3630 while (*bsp_title_ptr != NULLB) {
3631 if (*bsp_title_ptr == ' ') {
3632 id_buf[index] = NULLB;
3633 break;
3634 }
3635 id_buf[index] = *bsp_title_ptr;
3636 bsp_title_ptr++;
3637 index++;
3638 }
3639 }
3640 if (id_buf[0] == NULLB)
3641 break;
3642 sip = SeqIdParse(id_buf);
3643 } else {
3644 sip = bsp->id;
3645 }
3646
3647 found_gi = FALSE;
3648 gi_list = gi_list_head;
3649 while (gi_list) {
3650 if(SeqIdIn(gi_list, sip) == TRUE) {
3651 found_gi = TRUE;
3652 break;
3653 }
3654 gi_list = gi_list->next;
3655 }
3656
3657 if (found_gi) {
3658 if (!found_first_gi) {
3659 *title_ptr = '>';
3660 title_ptr++;
3661 SeqIdWrite(sip, buffer, PRINTID_FASTA_LONG, BUFFER_LENGTH);
3662 StringCpy(title_ptr, buffer);
3663 title_ptr += StringLen(buffer);
3664 } else {
3665 SeqIdWrite(sip, buffer_id, PRINTID_FASTA_LONG, buffer_id_length);
3666 found_first_gi = FALSE;
3667 }
3668
3669 if (bsp_title_ptr) {
3670 while (*bsp_title_ptr != '>' && *bsp_title_ptr != NULLB) {
3671 *title_ptr = *bsp_title_ptr;
3672 bsp_title_ptr++;
3673 title_ptr++;
3674 }
3675 }
3676 } else {
3677 if (bsp_title_ptr) {
3678 while (*bsp_title_ptr != '>' && *bsp_title_ptr != NULLB)
3679 bsp_title_ptr++;
3680 }
3681 }
3682
3683 if (first_time) {
3684 first_time = FALSE;
3685 } else {
3686 sip = SeqIdSetFree(sip);
3687 }
3688
3689 if (bsp_title_ptr) {
3690 if (*bsp_title_ptr == '>')
3691 bsp_title_ptr++;
3692
3693 if (*bsp_title_ptr == NULLB) {
3694 *title_ptr = NULLB;
3695 break;
3696 }
3697 }
3698 }
3699 return TRUE;
3700 }
3701
3702
3703 #define STATS_LENGTH 14
3704 NLM_EXTERN Boolean LIBCALL
PrintDefLinesFromAnnot(SeqAnnotPtr seqannot,Int4 line_length,FILE * outfp,Uint4 options,Int4 mode,Int2Ptr marks)3705 PrintDefLinesFromAnnot(SeqAnnotPtr seqannot, Int4 line_length, FILE *outfp, Uint4 options, Int4 mode, Int2Ptr marks)
3706
3707 {
3708 Boolean retval;
3709
3710 if (seqannot == NULL || seqannot->type != 2)
3711 {
3712 return FALSE;
3713 }
3714
3715 retval = PrintDefLinesFromSeqAlign((SeqAlignPtr) seqannot->data, line_length, outfp, options, mode, marks);
3716
3717 return retval;
3718 }
3719
3720 NLM_EXTERN Boolean LIBCALL
PrintDefLinesExtra(ValNodePtr vnp,Int4 line_length,FILE * outfp,Uint4 options,Int4 mode,Int2Ptr marks,SeqLocPtr seqloc)3721 PrintDefLinesExtra(ValNodePtr vnp, Int4 line_length, FILE *outfp, Uint4 options, Int4 mode, Int2Ptr marks, SeqLocPtr seqloc)
3722
3723 {
3724 Boolean retval;
3725 Char buffer[128];
3726 Int2 titleIdAllocated;
3727 Int4 index=0;
3728 SeqAlignPtr seqalign;
3729
3730 if (vnp == NULL || seqloc == NULL)
3731 {
3732 return FALSE;
3733 }
3734
3735 /* Disable printing of title. */
3736 if (!(options & TXALIGN_DO_NOT_PRINT_TITLE))
3737 options += TXALIGN_DO_NOT_PRINT_TITLE;
3738
3739
3740 asn2ff_set_output(outfp, NULL);
3741
3742 ff_StartPrint(0, 0, (Int2)(line_length+2), NULL);
3743
3744 titleIdAllocated = line_length - STATS_LENGTH;
3745
3746 NewContLine();
3747 TabToColumn((Int2)(titleIdAllocated+2));
3748 ff_AddString("Score E");
3749 NewContLine();
3750 TabToColumn((Int2)(titleIdAllocated+2));
3751 if (options & TXALIGN_SHOW_NO_OF_SEGS) {
3752 ff_AddString("(bits) Value N");
3753 } else {
3754 ff_AddString("(bits) Value");
3755 }
3756 ff_EndPrint();
3757
3758 while (vnp && seqloc)
3759 {
3760 ff_StartPrint(0, 0, (Int2)(line_length+2), NULL);
3761 index++;
3762 seqalign = (SeqAlignPtr) vnp->data.ptrvalue;
3763 sprintf(buffer, "\nSignificant matches for pattern occurrence %ld at position %ld\n\n",
3764 (long) index, (long) (SeqLocStart(seqloc)+1));
3765 ff_AddString(buffer);
3766 ff_EndPrint();
3767 retval = PrintDefLinesFromSeqAlign(seqalign, line_length, outfp, options, mode, marks);
3768 vnp = vnp->next;
3769 seqloc = seqloc->next;
3770 }
3771
3772 return retval;
3773 }
3774
3775 NLM_EXTERN Boolean LIBCALL
PrintDefLinesFromSeqAlignEx(SeqAlignPtr seqalign,Int4 line_length,FILE * outfp,Uint4 options,Int4 mode,Int2Ptr marks,Int4 number_of_descriptions)3776 PrintDefLinesFromSeqAlignEx(SeqAlignPtr seqalign, Int4 line_length, FILE *outfp, Uint4 options,
3777 Int4 mode, Int2Ptr marks, Int4 number_of_descriptions)
3778 {
3779 return PrintDefLinesFromSeqAlignEx2(seqalign, line_length, outfp, options,
3780 mode, marks, number_of_descriptions, (CharPtr)NULL, (CharPtr)NULL);
3781 }
3782
3783
3784 static
3785 CharPtr
StringAppend(CharPtr * dst,size_t * size,CharPtr src,size_t * used)3786 StringAppend(CharPtr *dst, size_t *size, CharPtr src, size_t *used)
3787 {
3788 size_t pos, len;
3789
3790 if (*dst == NULL) {
3791 *size = 1;
3792 pos = 0;
3793 }
3794 else {
3795 pos = *used;
3796 }
3797 if (src == NULL) {
3798 return *dst;
3799 }
3800 len = StringLen(src);
3801 *used += len;
3802 if (*dst == NULL || pos + len + 1 > *size) {
3803 /**
3804 * extending destination buffer
3805 */
3806 CharPtr old = *dst;
3807 for (; pos + len + 1 > *size; *size *= 2);
3808 *dst = (CharPtr)MemNew(*size);
3809 **dst = '\0';
3810 if (old != NULL) {
3811 StringCpy(*dst, old);
3812 MemFree(old);
3813 }
3814 }
3815 StringCpy((*dst) + pos, src);
3816 return *dst;
3817 }
3818
3819
3820 static
3821 Boolean
SeqAlignSegsStr(SeqAlignPtr sap,Int2 index,CharPtr * dst,size_t * size,size_t * used)3822 SeqAlignSegsStr(SeqAlignPtr sap, Int2 index, CharPtr *dst, size_t *size, size_t *used)
3823 {
3824 Char buf[128];
3825 Int4 start, stop;
3826
3827 start = SeqAlignStart(sap, 1);
3828 stop = SeqAlignStop(sap, 1);
3829
3830 if (sap == NULL) {
3831 return FALSE;
3832 }
3833
3834 sprintf(buf, "%ld-%ld", (long)(start), (long)(stop));
3835 StringAppend(dst, size, buf, used);
3836
3837 return TRUE;
3838 }
3839
3840 /**
3841 * links to incomplete genomes
3842 **/
3843 static void
make_dumpgnl_links(SeqIdPtr sip,CharPtr blast_type,CharPtr segs,CharPtr dbname,Boolean is_na,FILE * fp,CharPtr sip_buffer,Boolean isLinkOut)3844 make_dumpgnl_links(SeqIdPtr sip, CharPtr blast_type, CharPtr segs, CharPtr dbname, Boolean is_na, FILE *fp, CharPtr sip_buffer, Boolean isLinkOut)
3845 {
3846 BioseqPtr bsp;
3847 Boolean nodb_path = FALSE;
3848 Char gnl[256];
3849 CharPtr str, chptr, dbtmp;
3850 Uchar buf[32];
3851 Int4 i, j, length, gi;
3852 MD5Context context;
3853 Char passwd[128], tool_url[128], tmpbuff[256];
3854 SeqIdPtr bestid;
3855
3856 /* We do need to make security protected link to BLAST gnl */
3857 if (StringStr(sip_buffer, "gnl|BL_ORD_ID") != NULL)
3858 return;
3859
3860 *passwd = NULLB;
3861 *tool_url = NULLB;
3862
3863 str = NULL;
3864 #ifdef OS_UNIX
3865 str = getenv("DUMPGNL_PASSWD");
3866 #endif
3867 if(str != NULL) {
3868 StringCpy(passwd, str);
3869 } else {
3870 GetAppParam("NCBI", blast_type, "PASSWD", "", passwd,
3871 sizeof(passwd));
3872 }
3873
3874 str = NULL;
3875 #ifdef OS_UNIX
3876 str = getenv("DUMPGNL_TOOL_URL");
3877 #endif
3878 if(str != NULL) {
3879 StringCpy(tool_url, str);
3880 } else {
3881 GetAppParam("NCBI", blast_type, "TOOL_URL", "", tool_url,
3882 sizeof(tool_url));
3883 }
3884 /*only for linkout*/
3885 if(isLinkOut){
3886 StringCpy(tool_url, "/blast/dumpgnl.cgi");
3887 }
3888 /*no check for linkout*/
3889 if(!isLinkOut&&(*passwd == NULLB || *tool_url == NULLB))
3890 return;
3891
3892 /* If we are using 'dumpgnl.cgi' (the default) do not strip off the path. */
3893 if (StrStr(tool_url, "dumpgnl.cgi") == NULL)
3894 nodb_path = TRUE;
3895
3896 if(nodb_path) {
3897
3898 length = StringLen(dbname);
3899 dbtmp = MemNew(sizeof(Char)*length + 2); /* aditional space and NULLB */
3900
3901 for(i = 0; i < length; i++) {
3902
3903 if(isspace(dbname[i]) || dbname[i] == ',') /* Rolling spaces */
3904 continue;
3905
3906 j = 0;
3907 while (!isspace(dbname[i]) && j < 256 && i < length) {
3908 tmpbuff[j] = dbname[i];
3909 j++; i++;
3910 if(dbname[i] == ',') /* Comma is valid delimiter */
3911 break;
3912 }
3913 tmpbuff[j] = NULLB;
3914
3915 if((chptr = strrchr(tmpbuff, '/')) != NULL) {
3916 StringCat(dbtmp, chptr+1);
3917 } else {
3918 StringCat(dbtmp, tmpbuff);
3919 }
3920
3921 StringCat(dbtmp, " ");
3922 }
3923 } else {
3924 dbtmp = dbname;
3925 }
3926
3927 if (sip->choice == SEQID_GI)
3928 gi = sip->data.intvalue;
3929 else
3930 gi = -1;
3931
3932 bsp = BioseqLockById(sip);
3933 if (bsp)
3934 sip = bsp->id;
3935
3936 bestid = SeqIdFindBest(sip, SEQID_GENERAL);
3937 if (bestid && bestid->choice != SEQID_GENERAL)
3938 {
3939 bestid = SeqIdFindBest(sip, SEQID_OTHER);
3940 if (bestid && bestid->choice != SEQID_OTHER)
3941 {
3942 bestid = SeqIdFindBestAccession(sip);
3943 }
3944 }
3945 /*
3946 * Need to protect start and stop positions
3947 * to avoid web users sending us hand-made URLs
3948 * to retrive full sequences
3949 */
3950 if (bestid && bestid->choice != SEQID_GI)
3951 {
3952 MD5Init(&context);
3953 length = StringLen(passwd);
3954 MD5Update(&context, (UcharPtr)passwd, (Uint4)length);
3955 SeqIdWrite(bestid, gnl, PRINTID_FASTA_SHORT, sizeof(gnl));
3956 MD5Update(&context, (UcharPtr)gnl, (Uint4)StringLen(gnl));
3957 MD5Update(&context, (UcharPtr)segs, (Uint4)StringLen(segs));
3958 MD5Update(&context, (UcharPtr)passwd, (Uint4)length);
3959 MD5Final(&context, (UcharPtr)buf);
3960 }
3961 else
3962 {
3963 gnl[0] = NULLB;
3964 }
3965
3966 bestid = SeqIdFindBest(sip, SEQID_GI);
3967 if (gi < 0 && bestid && bestid->choice == SEQID_GI)
3968 {
3969 gi = bestid->data.intvalue;
3970 }
3971
3972 str = MakeURLSafe(dbtmp == NULL ? "nr" : dbtmp);
3973 if (strchr(tool_url, '?') == NULL)
3974 {
3975 fprintf(fp, "<a href=\"%s?db=%s&na=%d&", tool_url, str, is_na);
3976 }
3977 else
3978 {
3979 fprintf(fp, "<a href=\"%s&db=%s&na=%d&", tool_url, str, is_na);
3980 }
3981 str = (CharPtr) MemFree(str);
3982 if (gnl[0] != NULLB)
3983 {
3984 str = MakeURLSafe(gnl);
3985 fprintf(fp, "gnl=%s&", str);
3986 str = (CharPtr) MemFree(str);
3987 }
3988 if (gi != -1)
3989 {
3990 fprintf(fp, "gi=%ld&", (long) gi);
3991 }
3992 if (RID_glb)
3993 {
3994 fprintf(fp, "RID=%s&", RID_glb);
3995 }
3996
3997 if (query_number_glb > 0)
3998 fprintf(fp, "QUERY_NUMBER=%ld&", query_number_glb);
3999
4000 fprintf(fp,
4001 "segs=%s&seal=%02X%02X%02X%02X"
4002 "%02X%02X%02X%02X%02X%02X%02X%02X%02X%02X%02X%02X\">",
4003 segs,
4004 buf[0], buf[1], buf[2], buf[3],
4005 buf[4], buf[5], buf[6], buf[7],
4006 buf[8], buf[9], buf[10], buf[11],
4007 buf[12], buf[13], buf[14], buf[15]);
4008
4009 BioseqUnlock(bsp);
4010 if(nodb_path)
4011 MemFree(dbtmp);
4012
4013 return;
4014 }
4015
4016 /* Filter the BlastdefLine structure based upon a gilist.
4017 Those that do match the gilist are returned, otherwise
4018 it's deleted.
4019 */
FilterAsn1DefLine(BlastDefLinePtr bdsp,SeqIdPtr gilist)4020 static BlastDefLinePtr FilterAsn1DefLine(BlastDefLinePtr bdsp, SeqIdPtr gilist)
4021 {
4022 BlastDefLinePtr bdtmp1, bdtmp2, bdtmp_last, bdsp_retval=NULL;
4023 SeqIdPtr gi_list;
4024 Boolean found_gi;
4025
4026 bdtmp1 = bdsp;
4027 while (bdtmp1)
4028 {
4029 gi_list = gilist;
4030 found_gi = FALSE;
4031
4032 while (gi_list) {
4033 if(SeqIdIn(gi_list, bdtmp1->seqid) == TRUE) {
4034 found_gi = TRUE;
4035 break;
4036 }
4037 gi_list = gi_list->next;
4038 }
4039
4040 if(found_gi) {
4041
4042 if (bdsp_retval == NULL)
4043 {
4044 bdsp_retval = bdtmp1;
4045 bdtmp_last = bdtmp1;
4046 }
4047 else
4048 {
4049 bdtmp_last->next = bdtmp1;
4050 bdtmp_last = bdtmp1;
4051 }
4052 bdtmp1 = bdtmp1->next;
4053 bdtmp_last->next = NULL;
4054 }
4055 else
4056 {
4057 bdtmp2 = bdtmp1;
4058 bdtmp1 = bdtmp1->next;
4059 bdtmp2->next = NULL;
4060 BlastDefLineFree(bdtmp2);
4061 }
4062 }
4063 return bdsp_retval;
4064 }
4065
4066 /*
4067 Formats the one-line description for the first part
4068 of the BLAST report. Note that the seqid for the
4069 first sequence is formatted elsewhere.
4070 */
4071
4072
4073 static Boolean
Tx_PrintDefLine(BlastDefLinePtr bdsp,CharPtr buffer,Int4 length)4074 Tx_PrintDefLine(BlastDefLinePtr bdsp, CharPtr buffer, Int4 length)
4075 {
4076 BlastDefLinePtr bdsp_tmp = NULL;
4077 Char seqid_buf[BUFFER_LENGTH];
4078 CharPtr ptr = NULL;
4079 Int4 seqid_length = 0, total_length = 0;
4080
4081 if (bdsp == NULL)
4082 return FALSE;
4083
4084 /* Initialize arrays */
4085 MemSet(seqid_buf, '\0', BUFFER_LENGTH);
4086 MemSet(buffer, '\0', length);
4087
4088 StringNCpy(buffer, bdsp->title, length);
4089 total_length = StringLen(buffer) + 1;
4090
4091 if (total_length >= length)
4092 return TRUE;
4093
4094
4095 bdsp_tmp = bdsp->next;
4096 while (bdsp_tmp)
4097 {
4098 ptr = buffer+total_length - 1;
4099 SeqIdWrite(bdsp_tmp->seqid, seqid_buf, PRINTID_FASTA_LONG, BUFFER_LENGTH-1);
4100 *ptr = ' ';
4101 *(ptr+1) = '>';
4102 StringNCpy(ptr+2, seqid_buf, length-total_length-2);
4103 total_length = StringLen(buffer) + 1;
4104 if (total_length+2 >= length)
4105 break;
4106
4107 ptr = buffer+total_length - 1;
4108 *ptr = ' ';
4109 StringNCpy(ptr+1, bdsp_tmp->title, length-total_length-1);
4110 total_length = StringLen(buffer) + 1;
4111 if (total_length+3 >= length)
4112 break;
4113
4114 bdsp_tmp = bdsp_tmp->next;
4115 }
4116
4117 return TRUE;
4118 }
4119
4120 #define KNOCK_OFF_ALLOWED
4121 NLM_EXTERN void LIBCALL
ScoreAndEvalueToBuffers(FloatHi bit_score,FloatHi evalue,CharPtr bit_score_buf,CharPtr PNTR evalue_buf,Uint1 format_options)4122 ScoreAndEvalueToBuffers(FloatHi bit_score, FloatHi evalue,
4123 CharPtr bit_score_buf, CharPtr PNTR evalue_buf,
4124 Uint1 format_options)
4125 {
4126 #ifdef OS_MAC
4127 if (evalue < 1.0e-180) {
4128 sprintf(*evalue_buf, "0.0");
4129 } else if (evalue < 1.0e-99) {
4130 sprintf(*evalue_buf, "%2.0Le", evalue);
4131 if (format_options & TX_KNOCK_OFF_ALLOWED)
4132 (*evalue_buf)++; /* Knock off digit. */
4133 } else if (evalue < 0.0009) {
4134 sprintf(*evalue_buf, "%3.0Le", evalue);
4135 } else if (evalue < 0.1) {
4136 sprintf(*evalue_buf, "%4.3Lf", evalue);
4137 } else if (evalue < 1.0) {
4138 sprintf(*evalue_buf, "%3.2Lf", evalue);
4139 } else if (evalue < 10.0) {
4140 sprintf(*evalue_buf, "%2.1Lf", evalue);
4141 } else {
4142 sprintf(*evalue_buf, "%5.0Lf", evalue);
4143 }
4144 if (bit_score > 9999)
4145 sprintf(bit_score_buf, "%4.3Le", bit_score);
4146 else if (bit_score > 99.9)
4147 sprintf(bit_score_buf, "%4.0ld", (long)bit_score);
4148 else /* %4.1Lf is bad on 68K Mac, so cast to long */
4149 sprintf(bit_score_buf, "%4.0ld", (long)bit_score);
4150 #else
4151 if (evalue < 1.0e-180) {
4152 sprintf(*evalue_buf, "0.0");
4153 } else if (evalue < 1.0e-99) {
4154 sprintf(*evalue_buf, "%2.0le", evalue);
4155 if (format_options & TX_KNOCK_OFF_ALLOWED)
4156 (*evalue_buf)++; /* Knock off digit. */
4157 } else if (evalue < 0.0009) {
4158 sprintf(*evalue_buf, "%3.0le", evalue);
4159 } else if (evalue < 0.1) {
4160 sprintf(*evalue_buf, "%4.3lf", evalue);
4161 } else if (evalue < 1.0) {
4162 sprintf(*evalue_buf, "%3.2lf", evalue);
4163 } else if (evalue < 10.0) {
4164 sprintf(*evalue_buf, "%2.1lf", evalue);
4165 } else {
4166 sprintf(*evalue_buf, "%5.0lf", evalue);
4167 }
4168 if (bit_score > 9999)
4169 sprintf(bit_score_buf, "%4.3le", bit_score);
4170 else if (bit_score > 99.9)
4171 sprintf(bit_score_buf, "%4.0ld", (long)bit_score);
4172 else if (format_options & TX_INTEGER_BIT_SCORE)
4173 sprintf(bit_score_buf, "%4.0lf", bit_score);
4174 else
4175 sprintf(bit_score_buf, "%4.1lf", bit_score);
4176 #endif
4177 }
4178
4179 NLM_EXTERN Boolean LIBCALL
PrintDefLinesFromSeqAlignWithPath(SeqAlignPtr seqalign,Int4 line_length,FILE * outfp,Uint4 options,Int4 mode,Int2Ptr marks,Int4 number_of_descriptions,CharPtr db_name,CharPtr blast_type,CharPtr www_root_path)4180 PrintDefLinesFromSeqAlignWithPath(SeqAlignPtr seqalign, Int4 line_length, FILE *outfp, Uint4 options,
4181 Int4 mode, Int2Ptr marks, Int4 number_of_descriptions,
4182 CharPtr db_name, CharPtr blast_type, CharPtr www_root_path)
4183 {
4184 BioseqPtr bsp;
4185 Boolean found_next_one, found_gnl_id, same_id, found_score=FALSE, make_link=FALSE;
4186 Char buffer[BUFFER_LENGTH+1], buffer1[BUFFER_LENGTH+1], eval_buff[10], bit_score_buff[10];
4187 Char HTML_buffer[BUFFER_LENGTH+1], HTML_database[32], HTML_dopt[16], id_buffer[BUFFER_LENGTH+1];
4188 Char *ptr, *ptr_start, *eval_buff_ptr, *bit_score_buff_ptr;
4189 Int4 pos, title_length, title_allocated, titleIdAllocated;
4190 Nlm_FloatHi bit_score, evalue;
4191 Int4 gi = 0, number, score;
4192 SeqIdPtr bestid, gi_list, subject_id, sip_list=NULL, last_id;
4193 TxDfLineStructPtr txsp = NULL, txsp_head, txsp_var;
4194 Boolean retval = FALSE;
4195 Boolean firstnew = TRUE;
4196 Int4 countdescr = number_of_descriptions;
4197 Int4 numalign;
4198 DbtagPtr db_tag;
4199 ObjectIdPtr oip;
4200 Int2 ColumnDistance=2, extraSpace=0, extraSpace2=0, strLen=0, maxEvalWidth=5, maxNWidth=2;
4201 Char tempBuf[64], tempBuf2[64];
4202
4203 if (outfp == NULL) {
4204 return FALSE;
4205 }
4206
4207 if (seqalign == NULL || number_of_descriptions == 0) { /* Two line returns so that the alignments or db report is not all bunched up. */
4208 NewContLine();
4209 NewContLine();
4210 return FALSE;
4211 }
4212
4213 #ifdef OS_UNIX
4214 if (!www_root_path)
4215 www_root_path = getenv("WWW_ROOT_PATH");
4216 #endif
4217
4218 if(!StringICmp(blast_type, "fruitfly")) {
4219 fprintf(outfp, "<IMG SRC=\"/BLAST/images/map_mark.gif\" BORDER=0> - please follow this image for the map location of the sequence<P>\n");
4220 }
4221
4222 asn2ff_set_output(outfp, NULL);
4223
4224 ff_StartPrint(0, 0, (Int2)(line_length+2), NULL);
4225
4226 titleIdAllocated = line_length - STATS_LENGTH;
4227
4228 if (options & TXALIGN_SHOW_NO_OF_SEGS) {
4229 titleIdAllocated -= 4;
4230 }
4231
4232 if (options & TXALIGN_CHECK_BOX) {
4233 titleIdAllocated += 2;
4234 }
4235
4236 if(options & TXALIGN_NEW_GIF)
4237 titleIdAllocated += 3;
4238
4239 /* <PRE> block should be already opened outside of this function,
4240 but open it here just in case */
4241 if (options & TXALIGN_HTML) {
4242 ff_AddString("<PRE>");
4243 NewContLine();
4244 }
4245
4246 /*AAS*/
4247 if (!(options & TXALIGN_DO_NOT_PRINT_TITLE)) {
4248 if ((mode == FIRST_PASS) || (mode == NOT_FIRST_PASS_REPEATS)) {
4249 if(RID_glb && options&TXALIGN_SHOW_LINKOUT&&options&TXALIGN_HTML){
4250 if(PairwiseSeqAlignHasLinkout(seqalign, linkout_structure)){
4251
4252 fprintf(outfp, URL_Structure_Overview, RID_glb, 0, 0, CDD_RID_glb, "overview", StringCmp(Entrez_Query_Term, "") ? Entrez_Query_Term:"none");
4253
4254 }
4255 }
4256 NewContLine();
4257
4258 NewContLine();
4259 TabToColumn((Int2)(titleIdAllocated));
4260
4261 ff_AddString("Score E");
4262 NewContLine();
4263 ff_AddString("Sequences producing significant alignments:");
4264 TabToColumn((Int2)(titleIdAllocated));
4265 if (options & TXALIGN_SHOW_NO_OF_SEGS) {
4266 ff_AddString("(bits) Value N");
4267 } else {
4268 ff_AddString("(bits) Value");
4269 }
4270 NewContLine();
4271 }
4272
4273 if (mode == NOT_FIRST_PASS_REPEATS) {
4274 ff_AddString("Sequences used in model and found again:");
4275 NewContLine();
4276 }
4277 if (mode == NOT_FIRST_PASS_NEW) {
4278 ff_AddString("Sequences not found previously or not previously below threshold:");
4279 NewContLine();
4280 }
4281 ff_EndPrint();
4282 }
4283
4284 numalign = 0;
4285 last_id = NULL;
4286 txsp_head = NULL;
4287 while (seqalign) {
4288 if ((mode == FIRST_PASS) ||
4289 ((mode == NOT_FIRST_PASS_REPEATS) && marks && marks[numalign] & SEQ_ALIGN_MARK_REPEAT) ||
4290 ((mode == NOT_FIRST_PASS_NEW) && marks && (!(marks[numalign] & SEQ_ALIGN_MARK_REPEAT)))) {
4291
4292 subject_id = SeqIdDup(TxGetSubjectIdFromSeqAlign(seqalign));
4293 same_id = FALSE;
4294 if(last_id && SeqIdComp(subject_id, last_id) == SIC_YES) {
4295 same_id = TRUE;
4296 }
4297
4298 last_id = SeqIdFree(last_id);
4299 last_id = SeqIdDup(subject_id);
4300
4301 found_score = GetScoreAndEvalue(seqalign, &score, &bit_score, &evalue, &number);
4302 /* if the ID has been seen before, check that proper values are saved. */
4303 if (same_id == TRUE) {
4304 if (score > txsp->score)
4305 txsp->score = score;
4306 if (bit_score > txsp->bit_score)
4307 txsp->bit_score = bit_score;
4308 if (evalue < txsp->evalue)
4309 txsp->evalue = evalue;
4310 if (number < txsp->number)
4311 txsp->number = number;
4312 StringAppend(&txsp->segs_str, &txsp->segs_buflen, ",", &txsp->segs_used);
4313 SeqAlignSegsStr(seqalign, 1, &txsp->segs_str, &txsp->segs_buflen, &txsp->segs_used);
4314 subject_id = SeqIdFree(subject_id);
4315 } else {
4316 bsp = BioseqLockById(subject_id);
4317 txsp = (TxDfLineStructPtr) MemNew(sizeof(TxDfLineStruct));
4318 txsp->segs_str = NULL;
4319 txsp->segs_buflen = 0;
4320 if(bsp != NULL) {
4321 BlastDefLinePtr bdsp;
4322 Char buffer_priv[BUFFER_LENGTH];
4323
4324 MemSet(buffer_priv, '\0', BUFFER_LENGTH);
4325 bdsp = FDGetDeflineAsnFromBioseq(bsp);
4326 gi_list = GetUseThisGi(seqalign);
4327 if (gi_list) {
4328 if (bdsp)
4329 {
4330 bdsp = FilterAsn1DefLine(bdsp, gi_list);
4331 Tx_PrintDefLine(bdsp, buffer_priv, BUFFER_LENGTH);
4332 txsp->title = StringSave(buffer_priv);
4333 txsp->id = bdsp->seqid;
4334 SeqIdWrite(bdsp->seqid, buffer, PRINTID_FASTA_LONG, BUFFER_LENGTH);
4335 bdsp->seqid = NULL;
4336 BlastDefLineSetFree(bdsp);
4337 subject_id = SeqIdFree(subject_id);
4338 gi_list = SeqIdSetFree(gi_list);
4339 }
4340 else
4341 {
4342 FilterTheDefline(bsp, gi_list, buffer, BUFFER_LENGTH, &(txsp->title));
4343 gi_list = SeqIdSetFree(gi_list);
4344 subject_id = SeqIdFree(subject_id);
4345 txsp->id = SeqIdParse(buffer);
4346 }
4347 } else {
4348 if (bdsp)
4349 {
4350 Tx_PrintDefLine(bdsp, buffer_priv, BUFFER_LENGTH);
4351 txsp->title = StringSave(buffer_priv);
4352 txsp->id = subject_id;
4353 SeqIdWrite(bdsp->seqid, buffer, PRINTID_FASTA_LONG, BUFFER_LENGTH);
4354 BlastDefLineSetFree(bdsp);
4355 }
4356 else
4357 {
4358 SeqIdWrite(bsp->id, buffer, PRINTID_FASTA_LONG, BUFFER_LENGTH);
4359 txsp->title = StringSave(BioseqGetTitle(bsp));
4360 txsp->id = subject_id;
4361 }
4362 }
4363 txsp->is_na = (bsp->mol != Seq_mol_aa);
4364 } else {
4365 SeqIdWrite(subject_id, buffer, PRINTID_FASTA_LONG, BUFFER_LENGTH);
4366 txsp->title = StringSave("Unknown");
4367 txsp->is_na = FALSE;
4368 txsp->id = subject_id;
4369 }
4370 txsp->seqalign = seqalign;
4371 txsp->buffer_id = StringSave(buffer);
4372 txsp->score = score;
4373 txsp->bit_score = bit_score;
4374 txsp->evalue = evalue;
4375 txsp->number = number;
4376 txsp->found_score = found_score;
4377 SeqAlignSegsStr(seqalign, 1, &txsp->segs_str, &txsp->segs_buflen, &txsp->segs_used);
4378 if (marks) {
4379 /* seq is new if it was not Good on previous iteration */
4380 txsp->isnew = (Boolean) !(marks[numalign] & SEQ_ALIGN_MARK_PREVGOOD);
4381 txsp->waschecked = (Boolean) marks[numalign] & SEQ_ALIGN_MARK_PREVCHECKED;
4382 } else {
4383 txsp->isnew = FALSE;
4384 txsp->waschecked = FALSE;
4385 }
4386
4387 txsp = TxDfLineStructAdd(&txsp_head, txsp);
4388 if(bsp != NULL)
4389 BioseqUnlock(bsp);
4390 retval = TRUE;
4391 }
4392 }
4393 seqalign = seqalign->next;
4394 numalign++;
4395 }
4396 last_id = SeqIdFree(last_id);
4397
4398 if(retval == FALSE)
4399 return FALSE;
4400
4401
4402 /* Used for dumpgnl reports if GNL id's. (overwrite parameter!) */
4403 if (blast_type)
4404 {
4405 blast_type = StringSave(blast_type);
4406 StringUpper(blast_type);
4407 }
4408
4409 /* If option TXALIGN_NO_ENTREZ set full database name will be stripped
4410 to the database fileneme */
4411
4412 /* If a tool_url is set, then we use this rather than Entrez. */
4413 if (blast_type && !(options & TXALIGN_NO_ENTREZ))
4414 {
4415 Char tool_url[128];
4416
4417 *tool_url = NULLB;
4418 GetAppParam("NCBI", blast_type, "TOOL_URL", "", tool_url, sizeof(tool_url));
4419 /* Do use Entrez if available if the tool is dumpgnl.cgi as it does not handle gi's. */
4420 if (*tool_url != NULLB && StringStr(tool_url, "dumpgnl.cgi") == NULL)
4421 options |= TXALIGN_NO_ENTREZ;
4422 }
4423
4424 txsp = txsp_head;
4425 while (txsp && countdescr != 0) {
4426 found_next_one = FALSE;
4427 if (options & TXALIGN_HTML) {
4428
4429 if (txsp->is_na) {
4430 StringCpy(HTML_dopt, "GenBank");
4431 StringCpy(HTML_database, "Nucleotide");
4432 } else {
4433 StringCpy(HTML_dopt, "GenPept");
4434 StringCpy(HTML_database, "Protein");
4435 }
4436 gi = 0;
4437 make_link = FALSE;
4438 bestid = SeqIdFindBest(txsp->id, SEQID_GI);
4439 if (bestid != NULL && bestid->choice == SEQID_GI && !(options & TXALIGN_NO_ENTREZ)) {
4440 gi = bestid->data.intvalue;
4441 if (options & TXALIGN_CHECK_BOX && options & TXALIGN_CHECK_BOX_CHECKED) {
4442 sprintf(HTML_buffer,
4443 "<INPUT TYPE=\"checkbox\" NAME=\"checked_GI\" "
4444 "VALUE=\"%d\" CHECKED> "
4445 "<a href=%s?cmd=Retrieve&db=%s&list_uids=%08ld&dopt=%s %s>"
4446 "<INPUT TYPE=\"hidden\" NAME =\"good_GI\" VALUE = \"%d\">",
4447 gi, NEW_ENTREZ_HREF, HTML_database, (long) gi, HTML_dopt,
4448 options & TXALIGN_TARGET_IN_LINKS ? "TARGET=\"EntrezView\"" : "",
4449 gi);
4450 } else if (options & TXALIGN_CHECK_BOX) {
4451 sprintf(HTML_buffer,
4452 "<INPUT TYPE=\"checkbox\" NAME=\"checked_GI\" VALUE=\"%d\"> "
4453 "<a href=\"%s?cmd=Retrieve&db=%s&list_uids=%08ld&dopt=%s\" %s>",
4454 gi, NEW_ENTREZ_HREF, HTML_database, (long) gi, HTML_dopt,
4455 options & TXALIGN_TARGET_IN_LINKS ? "TARGET=\"EntrezView\"" : "");
4456
4457 } else {
4458 if(!StringICmp(blast_type, "fruitfly")) {
4459 sprintf(HTML_buffer,
4460 "<a href=\"http://www.ncbi.nlm.nih.gov\">"
4461 "<IMG SRC=\"/BLAST/images/map_mark.gif\" BORDER=0></a>"
4462 " <a href=\"%s?"
4463 "cmd=Retrieve&db=%s&list_uids=%08ld&dopt=%s\" %s>",
4464 NEW_ENTREZ_HREF, HTML_database, (long) gi, HTML_dopt,
4465 options & TXALIGN_TARGET_IN_LINKS ? "TARGET=\"EntrezView\"" : "");
4466 } else {
4467
4468 sprintf(HTML_buffer,
4469 "<a href=\"%s?cmd=Retrieve&db=%s&list_uids=%08ld&dopt=%s\" %s>",
4470 NEW_ENTREZ_HREF, HTML_database, (long) gi, HTML_dopt,
4471 options & TXALIGN_TARGET_IN_LINKS ? "TARGET=\"EntrezView\"" : "");
4472 }
4473 }
4474
4475 if (options & TXALIGN_NEW_GIF && (countdescr == -1 || countdescr > 0)) {
4476 if (txsp->isnew) {
4477 if (firstnew) {
4478 firstnew = FALSE;
4479 fprintf(outfp, "<a name = Evalue></a>");
4480 }
4481 fprintf(outfp, "<br><IMG SRC=\"%s/blast/images/new.gif\" WIDTH=30 HEIGHT=15 ALT=\"New sequence mark\">", www_root_path == NULL? "" : www_root_path);
4482 } else {
4483 fprintf(outfp, "<br><IMG SRC=\"%s/blast/images/bg.gif\" WIDTH=30 HEIGHT=15 ALT=\" \">", www_root_path == NULL? "" : www_root_path);
4484 }
4485 if (txsp->waschecked) {
4486 fprintf(outfp, "<IMG SRC=\"%s/blast/images/checked.gif\" WIDTH=15 HEIGHT=15 ALT=\"Checked mark\">", www_root_path == NULL? "" : www_root_path);
4487 } else {
4488 fprintf(outfp, "<IMG SRC=\"%s/blast/images/bg.gif\" WIDTH=15 HEIGHT=15 ALT=\" \">", www_root_path == NULL? "" : www_root_path);
4489 }
4490 }
4491 fprintf(outfp, "%s", HTML_buffer);
4492 make_link = TRUE;
4493 /* If not SEQID_GI */
4494 } else if (bestid != NULL && !(options & TXALIGN_NO_DUMPGNL) || (options & TXALIGN_NO_ENTREZ))
4495 {
4496 if (bestid->choice != SEQID_GENERAL && bestid->choice != SEQID_OTHER)
4497 { /* HACK, HACK, use SEQID_GENERAL for Greg's page, even though GI is present. */
4498 if (bsp = BioseqLockById(bestid)) {
4499 bestid = SeqIdFindBest(bsp->id, SEQID_OTHER);
4500 BioseqUnlock(bsp);
4501 }
4502 }
4503 if (bestid->choice == SEQID_GENERAL) {
4504 db_tag = (DbtagPtr) bestid->data.ptrvalue;
4505 if(db_tag->db && StringCmp(db_tag->db, "THC") == 0) {
4506 oip = db_tag->tag;
4507 if(oip->id != 0) {
4508 fprintf(outfp, "<a href=\"http://www.tigr.org/docs/tigr-scripts/hgi_scripts/thc_report.spl?est=THC%ld&report_type=n\">", (long) oip->id);
4509
4510 }
4511 } else if (db_tag->db && StringICmp(db_tag->db, "TI") == 0) {
4512 oip = db_tag->tag;
4513 if(oip->id != 0) {
4514 fprintf(outfp, "<a href=\"http://www.ncbi.nlm.nih.gov/Traces/trace.cgi?cmd=retrieve&dopt=fasta&val=%ld\">", (long) oip->id);
4515 }
4516 } else {
4517 make_dumpgnl_links(txsp->id, blast_type, txsp->segs_str, db_name, txsp->is_na, outfp, txsp->buffer_id, FALSE);
4518 }
4519 } else
4520 make_dumpgnl_links(txsp->id, blast_type, txsp->segs_str, db_name, txsp->is_na, outfp, txsp->buffer_id, FALSE);
4521 make_link = TRUE;
4522 }
4523 }
4524
4525 sprintf(buffer, "%s", txsp->buffer_id);
4526 if (!(options & TXALIGN_SHOW_GI)) {
4527 if (StringNCmp(buffer, "gi|", 3) == 0) {
4528 ptr = &buffer[3];
4529 while (*ptr != NULLB && *ptr != ' ') { /* Is there another ID beside the GI? */
4530 if (*ptr == '|') {
4531 ptr++;
4532 found_next_one = TRUE;
4533 break;
4534 }
4535 ptr++;
4536 }
4537 }
4538 }
4539 if (found_next_one == FALSE) {
4540 ptr = buffer;
4541 ptr_start = buffer;
4542 } else {
4543 ptr_start = ptr;
4544 }
4545
4546 found_gnl_id = FALSE;
4547 /* Check for an ID of type general from BLAST */
4548
4549 if (StringNCmp(buffer, "gnl|BL_ORD_ID", 13) == 0) {
4550 ptr = buffer;
4551 /* look for end of gnl ID. */
4552 while (*ptr != NULLB && *ptr != ' ')
4553 ptr++;
4554 /* Clear out all spaces. */
4555 while (*ptr != NULLB && *ptr == ' ')
4556 ptr++;
4557 ptr_start = ptr;
4558 found_gnl_id = TRUE;
4559 make_link = FALSE;
4560 }
4561
4562 if (StringNCmp(ptr, "lcl|", 4) == 0) {
4563 ptr += 4;
4564 }
4565
4566 pos = StringLen(ptr);
4567
4568 if ((options & TXALIGN_HTML) && make_link) {
4569 StringCpy(ptr+pos, "</a> ");
4570 pos++; /* One for the space after "</a>" */
4571 pos += 4; /* for "</a>" */
4572 title_allocated = titleIdAllocated - pos;
4573 }
4574
4575 title_allocated = titleIdAllocated - pos;
4576
4577 if (pos >= titleIdAllocated) {
4578 pos = titleIdAllocated+1; /* no space to definition. */
4579 sprintf(ptr+pos-3, "...");
4580 *(ptr+pos) = ' ';
4581 pos++;
4582 *(ptr+pos) = NULLB; /* in case no scores are printed. */
4583 } else {
4584 if (found_gnl_id == FALSE)
4585 {
4586 *(ptr + pos) = ' ';
4587 pos++;
4588 }
4589 else
4590 { /* give back space (used above) so things line up. */
4591 title_allocated++;
4592 }
4593
4594 title_length = StringLen(txsp->title);
4595 if (title_length > title_allocated) {
4596 title_length = title_allocated;
4597 title_length -= 3; /* For "..." */
4598 if (txsp->title) {
4599 StringNCpy((ptr+pos), txsp->title, title_length);
4600 pos += title_length;
4601 }
4602 sprintf((ptr+pos), "...");
4603 pos += 3;
4604 } else {
4605 if (txsp->title) {
4606 StringNCpy((ptr+pos), txsp->title, title_length);
4607 pos += title_length;
4608 }
4609 while (title_length < title_allocated) {
4610 *(ptr + pos) = ' ';
4611 title_length++;
4612 pos++;
4613 }
4614 }
4615 *(ptr + pos) = ' ';
4616 pos++;
4617
4618 /* set to NULLB in case no scores have been found. */
4619 *(ptr + pos) = NULLB;
4620 }
4621
4622 if (txsp->found_score) {
4623 evalue = txsp->evalue;
4624 bit_score = txsp->bit_score;
4625
4626 eval_buff_ptr = eval_buff;
4627 ScoreAndEvalueToBuffers(bit_score, evalue, bit_score_buff,
4628 &eval_buff_ptr,
4629 (TX_KNOCK_OFF_ALLOWED | TX_INTEGER_BIT_SCORE));
4630
4631 if (options & TXALIGN_HTML) {
4632 if (gi != 0)
4633 sprintf(id_buffer, "%ld", (long) gi);
4634 else {
4635 #ifdef OS_MAC
4636 sprintf(id_buffer, "%s", txsp->buffer_id);
4637 #else
4638 MuskSeqIdWrite(txsp->id, id_buffer, BUFFER_LENGTH,
4639 PRINTID_TEXTID_ACCESSION, FALSE, FALSE);
4640 #endif
4641 }
4642 bit_score_buff_ptr = bit_score_buff;
4643 if (*bit_score_buff_ptr == ' ') {
4644 bit_score_buff_ptr++;
4645 sprintf(buffer1, " <a href = #%s>%s</a>", id_buffer, bit_score_buff_ptr);
4646 } else {
4647 sprintf(buffer1, "<a href = #%s>%s</a>", id_buffer, bit_score_buff_ptr);
4648 }
4649 } else {
4650 sprintf(buffer1, "%s", bit_score_buff);
4651 }
4652
4653 /*adjust N position*/
4654 strLen=StringLen(eval_buff_ptr);
4655 extraSpace=strLen<maxEvalWidth?(maxEvalWidth-strLen):0;
4656 makeEmptyString(tempBuf, extraSpace);
4657 if (options & TXALIGN_SHOW_NO_OF_SEGS) {
4658 sprintf(tempBuf2, "%ld", (long) txsp->number);
4659 strLen=StringLen(tempBuf2);
4660 extraSpace2=strLen<maxNWidth?(maxNWidth-strLen):0;
4661 makeEmptyString(tempBuf2, extraSpace2);
4662 sprintf((ptr+pos), " %s %s %s%ld%s", buffer1, eval_buff_ptr, tempBuf, (long) txsp->number,tempBuf2);
4663
4664 }
4665 else{
4666
4667 sprintf((ptr+pos), " %s %s%s", buffer1, eval_buff_ptr, tempBuf);
4668
4669 }
4670 }
4671
4672
4673 if (countdescr == -1 || countdescr > 0){
4674
4675 fprintf(outfp, "%s", ptr);
4676 /*add link out*/
4677
4678 if(options&TXALIGN_SHOW_LINKOUT&&options&TXALIGN_HTML){
4679 bsp=BioseqLockById(txsp->id);
4680 addLinkoutForDefline(bsp, txsp->id, outfp);
4681 BioseqUnlock(bsp);
4682 }
4683 fprintf(outfp, "\n");
4684 }
4685 txsp = txsp->next;
4686 if (countdescr > 0)
4687 countdescr--;
4688 }
4689
4690 if (options & TXALIGN_HTML) {
4691 ff_AddString("</PRE>");
4692 NewContLine();
4693 } else
4694 fprintf(outfp, "\n");
4695
4696 /* blast_type (overwriting parameter) allocated before last while loop. */
4697 blast_type = (CharPtr) MemFree(blast_type);
4698
4699 txsp = txsp_head;
4700 while (txsp) {
4701 txsp->title = (CharPtr) MemFree(txsp->title);
4702 txsp->buffer_id = (CharPtr) MemFree(txsp->buffer_id);
4703 txsp->id = SeqIdSetFree(txsp->id);
4704 txsp->segs_str = (CharPtr) MemFree(txsp->segs_str);
4705 txsp_var = txsp;
4706 txsp = txsp->next;
4707 MemFree(txsp_var);
4708 }
4709
4710 return TRUE;
4711 }
4712
4713 NLM_EXTERN Boolean LIBCALL
PrintDefLinesFromSeqAlignEx2(SeqAlignPtr seqalign,Int4 line_length,FILE * outfp,Uint4 options,Int4 mode,Int2Ptr marks,Int4 number_of_descriptions,CharPtr db_name,CharPtr blast_type)4714 PrintDefLinesFromSeqAlignEx2(SeqAlignPtr seqalign, Int4 line_length, FILE *outfp, Uint4 options,
4715 Int4 mode, Int2Ptr marks, Int4 number_of_descriptions,
4716 CharPtr db_name, CharPtr blast_type)
4717 {
4718 return PrintDefLinesFromSeqAlignWithPath(seqalign, line_length, outfp,
4719 options, mode, marks, number_of_descriptions, db_name,
4720 blast_type, NULL);
4721 }
4722
4723 NLM_EXTERN Boolean LIBCALL
PrintDefLinesFromSeqAlign(SeqAlignPtr seqalign,Int4 line_length,FILE * outfp,Uint4 options,Int4 mode,Int2Ptr marks)4724 PrintDefLinesFromSeqAlign(SeqAlignPtr seqalign, Int4 line_length, FILE *outfp, Uint4 options, Int4 mode, Int2Ptr marks)
4725 {
4726 Boolean retval;
4727
4728 retval = PrintDefLinesFromSeqAlignEx(seqalign, line_length, outfp, options, mode, marks, -1);
4729
4730 return retval;
4731 }
4732
4733 /*
4734 Converts a number into a frame.
4735 */
4736 static CharPtr
NumToFrame(Int2 frame,CharPtr buffer)4737 NumToFrame(Int2 frame, CharPtr buffer)
4738 {
4739 if (buffer)
4740 {
4741 if (frame > 0)
4742 {
4743 sprintf(buffer, "+%d", frame);
4744 }
4745 else
4746 {
4747 sprintf(buffer, "%d", frame);
4748 }
4749 }
4750
4751 return buffer;
4752 }
4753
4754 /* This function transfer SeqAlignPtr into AlignStatOptionPtr */
4755
FormatScoreFromSeqAlignEx(SeqAlignPtr sap,Uint4 option,FILE * fp,Int4Ptr PNTR matrix,Boolean follower,Boolean ooframe)4756 NLM_EXTERN Boolean FormatScoreFromSeqAlignEx(SeqAlignPtr sap, Uint4 option, FILE *fp, Int4Ptr PNTR matrix, Boolean follower, Boolean ooframe)
4757 {
4758 AlignStatOptionPtr asop;
4759 Int4 empty_space, line_len;
4760 AlignSum as;
4761 SeqAlignPtr sap_tmp;
4762
4763 asop = (AlignStatOptionPtr) MemNew(sizeof(AlignStatOption));
4764 MemSet(&as, 0, sizeof(AlignSum));
4765
4766 empty_space = 12; line_len = 60; /* TO BE DETERMINED !!!! */
4767
4768 asop->indent_len = (Int2) empty_space;
4769 asop->line_len = (Int2) (line_len + empty_space);
4770 asop->html_hot_link_relative = FALSE;
4771
4772
4773 if (option & TXALIGN_NO_ENTREZ)
4774 asop->no_entrez = TRUE;
4775 else
4776 asop->no_entrez = FALSE;
4777
4778 if (option & TXALIGN_NO_DUMPGNL)
4779 asop->no_dumpgnl = TRUE;
4780 else
4781 asop->no_dumpgnl = FALSE;
4782
4783 if (option & TXALIGN_HTML) {
4784 asop->html_hot_link = TRUE;
4785 if (option & TXALIGN_HTML_RELATIVE)
4786 asop->html_hot_link_relative = TRUE;
4787 } else {
4788 asop->html_hot_link = FALSE;
4789 }
4790 if (option & TXALIGN_SHOW_GI)
4791 asop->show_gi = TRUE;
4792 else
4793 asop->show_gi = FALSE;
4794
4795 asop->fp = fp;
4796 asop->buf = NULL;
4797 asop->segs = NULL;
4798 as.matrix = matrix;
4799
4800 as.master_sip = TxGetQueryIdFromSeqAlign(sap);
4801 as.target_sip = TxGetSubjectIdFromSeqAlign(sap);
4802
4803 if((asop->bsp = BioseqLockById(as.target_sip)) == NULL) {
4804 Char tmp[128];
4805 SeqIdWrite(as.target_sip, tmp, PRINTID_FASTA_LONG, sizeof(tmp));
4806 ErrPostEx(SEV_ERROR, 0, 0, "Failure to get Bioseq for %s\n", tmp);
4807 return FALSE;
4808 }
4809
4810 as.is_aa = (asop->bsp->mol == Seq_mol_aa);
4811 as.ooframe = ooframe;
4812
4813 asop->sp = NULL;
4814 if(sap->segtype == SAS_DISC) {
4815
4816 Int4 last_m_to = 0, last_t_to = 0;
4817 Int4 m_adj = 0, t_adj = 0;
4818 for(sap_tmp = (SeqAlignPtr)sap->segs; sap_tmp != NULL;
4819 sap_tmp = sap_tmp->next) {
4820
4821 /* We cannot find score this way .. :-) this fuction just
4822 calculates number of positives,identities etc. */
4823
4824 find_score_in_align(sap_tmp, 1, &as);
4825
4826 asop->gaps += as.gaps;
4827 asop->positive += as.positive;
4828 asop->identical += as.identical;
4829 asop->align_len += as.totlen;
4830
4831 /* Adjustment for unaligned regions not counted in the
4832 function above */
4833
4834 if(last_m_to != 0) {
4835 m_adj = as.master_from - last_m_to - 1;
4836 }
4837
4838 asop->align_len += m_adj;
4839 asop->gaps += m_adj;
4840
4841 last_m_to = as.master_to;
4842 }
4843 asop->sp = sap->score;
4844 } else {
4845 asop->sp = find_score_in_align(sap, 1, &as);
4846 asop->gaps = as.gaps;
4847 asop->positive = as.positive;
4848 asop->identical = as.identical;
4849 asop->align_len = as.totlen;
4850
4851
4852 /* This information was added for links to
4853 specific alignment only, but may be used
4854 for something else */
4855
4856 asop->master_from = as.master_from;
4857 asop->master_to = as.master_to;
4858 asop->target_from = as.target_from;
4859 asop->target_to = as.target_to;
4860 }
4861
4862 asop->db_name = NULL;
4863
4864 if (as.m_frame_set) {
4865 asop->m_frame = as.m_frame;
4866 } else {
4867 asop->m_frame = 255;
4868 }
4869
4870 if (as.t_frame_set) {
4871 asop->t_frame = as.t_frame;
4872 } else {
4873 asop->t_frame = 255;
4874 }
4875
4876 asop->m_strand = as.m_strand;
4877 asop->t_strand = as.t_strand;
4878
4879 /* if(!ooframe) {
4880 asop->m_frame = 255;
4881 asop->t_frame = 255;
4882 } else {
4883 asop->m_frame = as.m_frame;
4884 asop->t_frame = as.t_frame;
4885 } */
4886
4887 /* asop->m_strand = Seq_strand_unknown;
4888 asop->t_strand = Seq_strand_unknown; */
4889
4890 asop->follower = follower;
4891
4892 init_buff_ex(255);
4893 FormatScoreFunc(asop);
4894 free_buff();
4895
4896 BioseqUnlock(asop->bsp);
4897
4898 MemFree(asop);
4899
4900 return TRUE;
4901 }
4902
4903 /* Functions to read specific information about taxonomy names from
4904 Bioseq created from Blast database source */
4905
4906 /* This function transfer SeqAlignPtr into AlignStatOptionPtr */
4907
FormatScoreFromSeqAlign(SeqAlignPtr sap,Uint4 option,FILE * fp,Int4Ptr PNTR matrix,Boolean follower)4908 NLM_EXTERN Boolean FormatScoreFromSeqAlign
4909 (SeqAlignPtr sap, Uint4 option, FILE *fp,
4910 Int4Ptr PNTR matrix, Boolean follower)
4911 {
4912 return FormatScoreFromSeqAlignEx(sap, option, fp, matrix, follower, FALSE);
4913 }
4914
FSFPrintOneDefline(AlignStatOptionPtr asop,Boolean is_na,SeqIdPtr sip,CharPtr defline,Int4 taxid,SeqIdPtr firstSip)4915 static CharPtr FSFPrintOneDefline(AlignStatOptionPtr asop, Boolean is_na,
4916 SeqIdPtr sip, CharPtr defline, Int4 taxid, SeqIdPtr firstSip)
4917 {
4918 Char HTML_database[32], HTML_dopt[16], id_buffer[BUFFER_LENGTH+1];
4919 Char buffer[BUFFER_LENGTH+1];
4920 SeqIdPtr bestid;
4921 Boolean make_link = FALSE, found_next_one, found_gnl_id;
4922 DbtagPtr db_tag;
4923 ObjectIdPtr oip;
4924 CharPtr ptr;
4925 Int4 gi, seqid_len = 0;
4926 BioseqPtr bsp;
4927
4928 /* Printing full label to the buffer */
4929 SeqIdWrite(sip, buffer, PRINTID_FASTA_LONG, BUFFER_LENGTH);
4930
4931 if (asop->html_hot_link == TRUE) {
4932 /* if (ISA_na(bsp->seq_data_type)) */
4933
4934 /* If a tool_url is set, then we use this rather than Entrez. */
4935 if (asop->no_entrez == FALSE)
4936 {
4937 Char tool_url[128];
4938
4939 *tool_url = NULLB;
4940 GetAppParam("NCBI", asop->blast_type, "TOOL_URL", "", tool_url, sizeof(tool_url));
4941 /* Do use Entrez if available if the tool is dumpgnl.cgi as it does not handle gi's. */
4942 if (*tool_url != NULLB && StringStr(tool_url, "dumpgnl.cgi") == NULL)
4943 asop->no_entrez = TRUE;
4944 }
4945
4946 if (is_na) {
4947 StringCpy(HTML_dopt, "GenBank");
4948 StringCpy(HTML_database, "Nucleotide");
4949 } else {
4950 StringCpy(HTML_dopt, "GenPept");
4951 StringCpy(HTML_database, "Protein");
4952 }
4953
4954 bestid = SeqIdFindBest(sip, SEQID_GI);
4955 make_link = FALSE;
4956 gi = 0;
4957 if (bestid != NULL) {
4958 if (bestid->choice == SEQID_GI && asop->no_entrez == FALSE) {
4959 gi = bestid->data.intvalue;
4960 make_link = TRUE;
4961 sprintf(id_buffer, "%ld", (long) gi);
4962 } else {
4963 MuskSeqIdWrite(bestid, id_buffer, BUFFER_LENGTH, PRINTID_TEXTID_ACCESSION, FALSE, FALSE);
4964 }
4965
4966 fprintf(asop->fp, "<a name = %s></a>", id_buffer);
4967 if (make_link) {
4968 fprintf(asop->fp,
4969 "<a href=\"%s?cmd=Retrieve&db=%s&list_uids=%08ld&dopt=%s\" %s>",
4970 NEW_ENTREZ_HREF, HTML_database, (long) gi, HTML_dopt,
4971 asop->txalign_options & TXALIGN_TARGET_IN_LINKS ? "TARGET=\"EntrezView\"" : "");
4972
4973 } else if (asop->no_dumpgnl == FALSE || asop->no_entrez == TRUE) {
4974 if (bestid->choice != SEQID_GENERAL && bestid->choice != SEQID_OTHER)
4975 { /* HACK, HACK, use SEQID_OTHER for Greg's page, even though GI is present. */
4976 /* bsp is already present. */
4977 bestid = SeqIdFindBest(sip, SEQID_OTHER);
4978 }
4979 if (bestid->choice == SEQID_GENERAL) {
4980 db_tag = (DbtagPtr) bestid->data.ptrvalue;
4981 if(db_tag->db && StringCmp(db_tag->db, "THC") == 0) {
4982 oip = db_tag->tag;
4983 if(oip->id != 0) {
4984 fprintf(asop->fp, "<a href=\"http://www.tigr.org/docs/tigr-scripts/hgi_scripts/thc_report.spl?est=THC%ld&report_type=n\">", (long) oip->id);
4985
4986 }
4987 } else if (db_tag->db && StringICmp(db_tag->db, "TI") == 0) {
4988 oip = db_tag->tag;
4989 if(oip->id != 0) {
4990 fprintf(asop->fp, "<a href=\"http://www.ncbi.nlm.nih.gov/Traces/trace.cgi?cmd=retrieve&dopt=fasta&val=%ld\">", (long) oip->id);
4991 }
4992 } else {
4993 /** * links to incomplete genomes */
4994 make_dumpgnl_links(sip, asop->blast_type, asop->segs, asop->db_name, is_na, asop->fp, buffer, FALSE);
4995 }
4996 } else {
4997 make_dumpgnl_links(sip, asop->blast_type, asop->segs, asop->db_name, is_na, asop->fp, buffer, FALSE);
4998 }
4999 make_link = TRUE;
5000 }
5001 }
5002 }
5003
5004 /* else {
5005 fprintf(asop->fp, ">");
5006 } */
5007
5008 found_next_one = FALSE;
5009 if (asop->show_gi == FALSE) {
5010 if (StringNCmp(buffer, "gi|", 3) == 0) {
5011 ptr = &buffer[3];
5012 while (*ptr != NULLB && *ptr != ' ') {
5013 /* Is there another ID beside the GI? */
5014 if (*ptr == '|') {
5015 ptr++;
5016 found_next_one = TRUE;
5017 break;
5018 }
5019 ptr++;
5020 }
5021 }
5022 }
5023 if (found_next_one == FALSE) /* If TRUE, then ptr set above. */
5024 ptr = buffer;
5025
5026 /* Remove local ID's. */
5027 if (StringNCmp(ptr, "lcl|", 4) == 0) {
5028 ptr += 4;
5029 }
5030
5031 found_gnl_id = TRUE;
5032 /* Check for an ID of type general from BLAST */
5033 if (StringNCmp(buffer, "gnl|BL_ORD_ID", 13) != 0) {
5034 fprintf(asop->fp, "%s", ptr);
5035 seqid_len = StringLen(ptr);
5036 found_gnl_id = FALSE;
5037 } else {
5038 make_link = FALSE;
5039 }
5040
5041 if (asop->html_hot_link == TRUE && make_link == TRUE) {
5042 fprintf(asop->fp, "</a> ");
5043 } else if (found_gnl_id == FALSE) {
5044 fprintf(asop->fp, " ");
5045 }
5046
5047 /*add link out*/
5048 if(asop->txalign_options&TXALIGN_SHOW_LINKOUT&&asop->txalign_options&TXALIGN_HTML){
5049 bsp=BioseqLockById(sip);
5050 if(bsp){
5051 SeqIdPtr sipGi;
5052 Char fastaLongIdBuf[BUFFER_LENGTH];
5053 addLinkoutForBioseq(bsp, sip, firstSip, asop->fp);
5054
5055 sipGi=SeqIdFindBest(bsp->id, SEQID_GI);
5056 if(sipGi&&bsp->length> LENGTH_TO_SHOW_DOWNLOAD&&ISA_na(asop->bsp->mol)){
5057 SeqIdWrite(bsp->id, fastaLongIdBuf, PRINTID_FASTA_LONG, BUFFER_LENGTH);
5058 make_dumpgnl_links(sipGi, asop->blast_type, asop->segs, asop->db_name, ISA_na(asop->bsp->mol), asop->fp, fastaLongIdBuf, TRUE);
5059 fprintf(asop->fp, "<img border=0 height=16 width=16 src=\"/blast/images/D.gif\" alt=\"Download subject sequence spanning the HSP\"></a>");
5060 }
5061 /*add one space before defline*/
5062
5063 fprintf(asop->fp, " ");
5064
5065 BioseqUnlock(bsp);
5066 }
5067 }
5068 #if 0
5069 if(taxid >=0 && asop->html_hot_link == TRUE && make_link == TRUE) {
5070 fprintf(asop->fp,
5071 "<a href=\"http://www.ncbi.nlm.nih.gov/htbin-post"
5072 "/Taxonomy/wgetorg?id=%d\">"
5073 "<FONT color=\"red\">T</FONT></a> ", taxid);
5074 }
5075 #endif
5076
5077 /* Subtract 10 off the lines length as the ID is not printed
5078 with ffprint functions. */
5079
5080 ff_StartPrint(0, asop->indent_len,
5081 (Int2)(asop->line_len+asop->indent_len-15), NULL);
5082 ff_AddString(defline);
5083 ff_EndPrint();
5084
5085 return NULL;
5086 }
5087
5088
TX_PrintDeflinesWithAsn(BlastDefLinePtr PNTR bdsp,AlignStatOptionPtr asop)5089 static Boolean TX_PrintDeflinesWithAsn(BlastDefLinePtr PNTR bdsp,
5090 AlignStatOptionPtr asop)
5091 {
5092 Boolean first = TRUE;
5093 BioseqPtr bsp;
5094 BlastDefLinePtr tbdsp;
5095 Int4 len, i;
5096 SeqIdPtr gilist;
5097 SeqIdPtr firstSip=NULL;
5098
5099 if(bdsp == NULL || asop == NULL)
5100 return FALSE;
5101
5102 bsp = asop->bsp;
5103
5104 if((gilist = ScorePtrUseThisGi(asop->sp)) != NULL)
5105 {
5106 *bdsp = FilterAsn1DefLine(*bdsp, gilist);
5107 gilist = SeqIdSetFree(gilist);
5108 }
5109
5110 for (tbdsp = *bdsp; tbdsp != NULL; tbdsp = tbdsp->next) {
5111
5112 if(first) {
5113 SeqIdPtr bestid;
5114 bestid = SeqIdFindBest(tbdsp->seqid, SEQID_GI);
5115 firstSip=bestid;
5116 if(bestid->choice == SEQID_GI&&asop->html_hot_link&&(asop->txalign_options&TXALIGN_GET_SEQUENCE)){
5117 fprintf(asop->fp, "<input type=\"checkbox\" name=\"getSeqGi\" value=\"%ld\" onClick=\"synchronizeCheck(this.value, 'getSeqAlignment%ld', 'getSeqGi', this.checked)\">", bestid->data.intvalue, query_number_glb);
5118 }
5119
5120 fprintf(asop->fp, ">");
5121 first = FALSE;
5122 } else {
5123 fprintf(asop->fp, " ");
5124 }
5125
5126 len = StringLen(tbdsp->title);
5127
5128 /* Trimming tail white spaces if any */
5129 for(i = len; i > 0 && IS_WHITESP(tbdsp->title[i-1]); i++)
5130 tbdsp->title[i-1] = NULLB;
5131
5132 FSFPrintOneDefline(asop, ISA_na(bsp->mol), tbdsp->seqid,
5133 tbdsp->title, tbdsp->taxid, firstSip);
5134 }
5135 return TRUE;
5136 }
5137 #define TX_SEQID_BUF_SIZE 200
FormatScoreFunc(AlignStatOptionPtr asop)5138 NLM_EXTERN int LIBCALLBACK FormatScoreFunc(AlignStatOptionPtr asop)
5139
5140 {
5141 BioseqPtr bsp;
5142 Boolean allocated, first;
5143 CharPtr defline, ptr, eval_buff_ptr, dline_buf, chptr, sptr;
5144 CharPtr new_defline;
5145 Char buf1[5], buf2[5];
5146 Char buffer[BUFFER_LENGTH+1], eval_buff[10], bit_score_buff[10];
5147 Char seqid_buf[TX_SEQID_BUF_SIZE+2];
5148 Char id_buffer[BUFFER_LENGTH+1];
5149 Nlm_FloatHi bit_score, evalue;
5150 Int4 percent_identical, percent_positive;
5151 Int4 number, score, gi, len, i;
5152 Int4 index; /* index for while loop over seqid. */
5153 ObjectIdPtr obid;
5154 SeqIdPtr gilist, sip, new_sip;
5155 ScorePtr scrp, sp;
5156 Boolean splice_junction = FALSE;
5157 BlastDefLinePtr bdsp = NULL;
5158 CharPtr warning_msg = NULL;
5159 SeqIdPtr firstSip=NULL;
5160 Int2 comp_adjustment_method = eNoCompositionBasedStats;
5161
5162
5163 sp = asop->sp;
5164 bsp = asop->bsp;
5165
5166 asn2ff_set_output(asop->fp, NULL);
5167
5168 bit_score = 0.0;
5169 score = 0;
5170 evalue = 0.0;
5171 defline = NULL;
5172 *id_buffer = NULLB;
5173
5174 if (bsp && asop->follower == FALSE) {
5175 /* Is the defline and sip allocated? */
5176
5177 if((bdsp = FDGetDeflineAsnFromBioseq(bsp)) != NULL) {
5178 TX_PrintDeflinesWithAsn(&bdsp, asop);
5179 bdsp = BlastDefLineSetFree(bdsp);
5180 } else {
5181 allocated = FALSE;
5182 gilist = ScorePtrUseThisGi(sp);
5183 if (gilist) {
5184 FilterTheDefline(bsp, gilist, buffer, BUFFER_LENGTH, &(defline));
5185 gilist = SeqIdSetFree(gilist);
5186 sip = SeqIdParse(buffer);
5187 allocated = TRUE;
5188 } else {
5189 SeqIdWrite(bsp->id, buffer, PRINTID_FASTA_LONG, BUFFER_LENGTH);
5190 sip = SeqIdSetDup(bsp->id);
5191 defline = StringSave(BioseqGetTitle(bsp));
5192 }
5193
5194 /* Here we print all defline one by one */
5195
5196 dline_buf = defline;
5197 chptr = defline;
5198 new_sip = NULL;
5199 new_defline = NULL;
5200 first = TRUE;
5201 while (TRUE) {
5202 if((chptr = StringChr(chptr, '>')) != NULL) {
5203
5204 /* If ">" character exists in the defline - we have to check,
5205 that this is start of new SeqId string */
5206
5207 for (index=0, ptr = chptr+1, sptr = seqid_buf;
5208 *ptr != ' ' && *ptr != NULLB && index < TX_SEQID_BUF_SIZE;
5209 index++, ptr++, sptr++) {
5210 *sptr = *ptr;
5211 }
5212 *sptr = NULLB;
5213
5214 if((new_sip = SeqIdParse(seqid_buf)) == NULL) {
5215 chptr++;
5216 continue;
5217 }
5218
5219 *chptr = NULLB;
5220 if(*ptr == ' ')
5221 new_defline = ptr + 1;
5222 else
5223 new_defline = NULL;
5224 } else {
5225 new_sip = NULL;
5226 new_defline = NULL;
5227 }
5228
5229 if(sip != NULL) {
5230
5231 if(first) {
5232 SeqIdPtr bestid;
5233 bestid = SeqIdFindBest(bsp->id, SEQID_GI);
5234 firstSip=bestid;
5235 if(bestid->choice == SEQID_GI&&asop->html_hot_link&&(asop->txalign_options&TXALIGN_GET_SEQUENCE)){
5236 fprintf(asop->fp, "<input type=\"checkbox\" name=\"getSeqGi\" value=\"%ld\" onClick=\"synchronizeCheck(this.value, 'getSeqAlignment%ld', 'getSeqGi', this.checked)\">", bestid->data.intvalue, query_number_glb);
5237 }
5238 fprintf(asop->fp, ">");
5239 first = FALSE;
5240 } else {
5241 fprintf(asop->fp, " ");
5242 }
5243 len = StringLen(defline);
5244
5245 /* Trimming tail white spaces if any */
5246 for(i = len; i > 0 && IS_WHITESP(defline[i-1]); i++)
5247 defline[i-1] = NULLB;
5248
5249 FSFPrintOneDefline(asop, ISA_na(bsp->mol), sip, defline,
5250 -1, firstSip);
5251 sip =SeqIdSetFree(sip);
5252 }
5253
5254 if(new_sip != NULL && new_defline != NULL) {
5255 chptr = defline = new_defline;
5256 sip = new_sip;
5257 } else {
5258 break;
5259 }
5260 }
5261
5262 dline_buf = (CharPtr) MemFree(dline_buf);
5263 }
5264
5265 fprintf(asop->fp, " Length = %ld\n", (long) BioseqGetLen(bsp));
5266 }
5267
5268 if (asop->no_entrez == TRUE &&
5269 asop->html_hot_link == TRUE && bsp != NULL) {
5270
5271 /* For Gregs and Human Genome stuff we will add links to every
5272 HSP */
5273 SeqIdPtr bestid;
5274
5275 gi = 0;
5276 bestid = SeqIdFindBest(bsp->id, SEQID_GI);
5277
5278 if (bestid != NULL) {
5279 if (bestid->choice == SEQID_GI) {
5280 gi = bestid->data.intvalue;
5281 sprintf(id_buffer, "%ld", (long) gi);
5282 }
5283 }
5284 }
5285
5286 number=1;
5287 for (scrp=sp; scrp; scrp = scrp->next) {
5288 obid = scrp->id;
5289 if(obid != NULL) {
5290 if (StringICmp(obid->str, "score") == 0) {
5291 score = scrp->value.intvalue;
5292 continue;
5293 }
5294 else if (StringICmp(obid->str, "e_value") == 0 || StringICmp(obid->str, "sum_e") == 0) {
5295 evalue = scrp->value.realvalue;
5296 continue;
5297 } else if (StringICmp(obid->str, "sum_n") == 0) {
5298 number = scrp->value.intvalue;
5299 continue;
5300 } else if (StringICmp(obid->str, "bit_score") == 0) {
5301 bit_score = scrp->value.realvalue;
5302 continue;
5303 } else if (StringICmp(obid->str, "splice_junction") == 0) {
5304 splice_junction = TRUE;
5305 } else if (StringICmp(obid->str, "comp_adjustment_method") == 0) {
5306 comp_adjustment_method = scrp->value.intvalue;
5307 } else if (StringICmp(obid->str, "warning") == 0) {
5308 warning_msg = Malloc(256);
5309 sprintf(warning_msg,
5310 "WARNING: HSPs with e-values below %.2g have been skipped\n",
5311 scrp->value.realvalue);
5312 }
5313 } else {
5314 if(scrp->choice == 1) {
5315 score = scrp->value.intvalue;
5316 continue;
5317 } else if(scrp->choice == 2) {
5318 bit_score = scrp->value.realvalue;
5319 continue;
5320 }
5321 }
5322 }
5323
5324 ff_StartPrint(0, 0, (Int2)(asop->line_len+asop->indent_len), NULL);
5325
5326 if (warning_msg) {
5327 ff_AddString(warning_msg);
5328 NewContLine();
5329 }
5330 ff_EndPrint();
5331 eval_buff_ptr = eval_buff;
5332
5333 ScoreAndEvalueToBuffers(bit_score, evalue, bit_score_buff,
5334 &eval_buff_ptr, TX_KNOCK_OFF_ALLOWED);
5335
5336 if(asop->html_hot_link == TRUE && *id_buffer != NULLB) {
5337
5338 Int4 m_from, m_to, t_from, t_to;
5339
5340 /* These are links for Human Genome viewer for every single
5341 alignment */
5342
5343 if(asop->m_strand == Seq_strand_minus) {
5344 m_from = asop->master_to+1;
5345 m_to = asop->master_from+1;
5346 } else {
5347 m_from = asop->master_from+1;
5348 m_to = asop->master_to+1;
5349 }
5350
5351 if(asop->t_strand == Seq_strand_minus) {
5352 t_from = asop->target_to+1;
5353 t_to = asop->target_from+1;
5354 } else {
5355 t_from = asop->target_from+1;
5356 t_to = asop->target_to+1;
5357 }
5358
5359 sprintf(buffer, " <a name = %s_%ld></a>"
5360 "<a name = %s_%ld_%d_%d_%d_%d>"
5361 "</a>Score = %s bits (%ld), ",
5362 id_buffer, (long) score, id_buffer, (long) score,
5363 m_from, m_to, t_from, t_to,
5364 bit_score_buff, (long) score);
5365 } else {
5366 sprintf(buffer, " Score = %s bits (%ld), ",
5367 bit_score_buff, (long) score);
5368 }
5369
5370 fprintf(asop->fp, "%s", buffer);
5371
5372 if (number == 1)
5373 sprintf(buffer, "Expect = %s", eval_buff_ptr);
5374 else if (!splice_junction)
5375 sprintf(buffer, "Expect(%ld) = %s", (long) number, eval_buff_ptr);
5376 else
5377 sprintf(buffer, "Expect(%ld+) = %s", (long) number, eval_buff_ptr);
5378 fprintf(asop->fp, "%s", buffer);
5379 if (eNoCompositionBasedStats != comp_adjustment_method) {
5380 if (eCompositionBasedStats == comp_adjustment_method)
5381 sprintf(buffer,", Method: Composition-based stats.");
5382 if (eCompositionMatrixAdjust == comp_adjustment_method)
5383 sprintf(buffer,", Method: Compositional matrix adjust.");
5384 fprintf(asop->fp, "%s", buffer);
5385 }
5386 fprintf(asop->fp, "\n", buffer);
5387 ff_StartPrint(0, 0, (Int2)(asop->line_len+asop->indent_len), NULL);
5388 if (asop->align_len > 0) {
5389 asop->positive += asop->identical;
5390 percent_identical = (Int4) ((100*asop->identical + 0.5)/ (asop->align_len));
5391 percent_positive = (Int4) ((100*asop->positive + 0.5)/ (asop->align_len));
5392 /* Don't show positives for blastn, which has these set to 255. */
5393 if (asop->m_frame == 255 && asop->t_frame == 255 &&
5394 asop->m_strand != Seq_strand_unknown && asop->t_strand != Seq_strand_unknown)
5395 sprintf(buffer, " Identities = %ld/%ld (%ld%%)", (long) asop->identical, (long) asop->align_len, (long) percent_identical);
5396 else
5397 sprintf(buffer, " Identities = %ld/%ld (%ld%%), Positives = %ld/%ld (%ld%%)", (long) asop->identical, (long) asop->align_len, (long) percent_identical, (long) (asop->positive), (long) asop->align_len, (long) (percent_positive));
5398 ff_AddString(buffer);
5399 if (asop->gaps > 0) {
5400 sprintf(buffer, ", Gaps = %ld/%ld (%ld%%)", (long) asop->gaps,
5401 (long) asop->align_len,
5402 (long) (100*asop->gaps)/(asop->align_len));
5403 ff_AddString(buffer);
5404 }
5405 NewContLine();
5406
5407 /* for testing. */
5408 if (asop->m_frame != 255 || asop->t_frame != 255) {
5409 if (asop->m_frame != 255 && asop->t_frame != 255) {
5410 sprintf(buffer, " Frame = %s / %s", NumToFrame(asop->m_frame, buf1), NumToFrame(asop->t_frame, buf2));
5411 } else if (asop->m_frame != 255) {
5412 sprintf(buffer, " Frame = %s", NumToFrame(asop->m_frame, buf2));
5413 }
5414 else if (asop->t_frame != 255) {
5415 sprintf(buffer, " Frame = %s", NumToFrame(asop->t_frame, buf2));
5416 }
5417 ff_AddString(buffer);
5418 NewContLine();
5419 } else if (asop->m_strand != Seq_strand_unknown && asop->t_strand != Seq_strand_unknown) {
5420 if (asop->m_strand != asop->t_strand)
5421 sprintf(buffer, " Strand = Plus / Minus");
5422 else
5423 sprintf(buffer, " Strand = Plus / Plus");
5424 ff_AddString(buffer);
5425
5426
5427 NewContLine();
5428 }
5429 /* for testing. */
5430
5431 }
5432 ff_EndPrint();
5433
5434 return 0;
5435 }
5436
5437 /*
5438 *
5439 * determine the option for alignment based on the named tx_option
5440 *
5441 */
GetTxAlignOptionValue(Uint1 tx_option,BoolPtr hide_feature,BoolPtr print_score,BoolPtr split_display)5442 NLM_EXTERN Uint4 GetTxAlignOptionValue (Uint1 tx_option, BoolPtr hide_feature,
5443 BoolPtr print_score, BoolPtr split_display)
5444 {
5445 Uint4 option;
5446
5447 option = 0;
5448 *print_score = FALSE;
5449 *split_display = FALSE;
5450 switch (tx_option)
5451 {
5452 /*multiple pairwise alignment */
5453 case TEXT_MP:
5454 case TEXT_MP_MISMATCH:
5455 option |= TXALIGN_MASTER;
5456 option |= TXALIGN_SHOW_RULER;
5457 option |= TXALIGN_SHOW_STRAND;
5458 if(tx_option == TEXT_MP_MISMATCH)
5459 option |= TXALIGN_MISMATCH;
5460 break;
5461 /*FLAT multiple pairwise alignment*/
5462 case TEXT_MPFLAT:
5463 case TEXT_MPFLAT_MISMATCH:
5464 option |= TXALIGN_MASTER;
5465 option |= TXALIGN_FLAT_INS;
5466 option |= TXALIGN_END_NUM;
5467 option |= TXALIGN_COMPRESS;
5468 if(tx_option == TEXT_MPFLAT_MISMATCH)
5469 option |= TXALIGN_MISMATCH;
5470 *split_display = TRUE;
5471 break;
5472 case TEXT_BLAST:
5473 option |= TXALIGN_END_NUM;
5474 option |= TXALIGN_BLASTX_SPECIAL;
5475 option |= TXALIGN_MATRIX_VAL;
5476 option |= TXALIGN_SHOW_QS;
5477 *hide_feature = TRUE;
5478 *print_score = TRUE;
5479 *split_display = TRUE;
5480 break;
5481 default:
5482 option |= TXALIGN_MASTER;
5483 option |= TXALIGN_MISMATCH;
5484 option |= TXALIGN_SHOW_RULER;
5485 option |= TXALIGN_SHOW_STRAND;
5486 break;
5487 }
5488 if(*hide_feature)
5489 option |= TXALIGN_COMPRESS;
5490 return option;
5491 }
5492
5493 /** The following function assumes that neither of the locations in
5494 * the first link in StdSeg is empty.
5495 * @param sseg Alignment segments [in]
5496 * @param dna_strand The strand of the nucleotide sequence [out]
5497 * @return TRUE for tblastn, FALSE for blastx.
5498 */
OOFGetDNAStrand(StdSegPtr sseg,Int4Ptr dna_strand)5499 static Boolean OOFGetDNAStrand(StdSegPtr sseg, Int4Ptr dna_strand)
5500 {
5501 Uint1 strand;
5502 Boolean reverse;
5503
5504 if ((strand = SeqLocStrand(sseg->loc)) != Seq_strand_unknown) {
5505 *dna_strand = (Int4) strand;
5506 reverse = FALSE;
5507 } else {
5508 *dna_strand = (Int4) SeqLocStrand(sseg->loc->next);
5509 reverse = TRUE;
5510 }
5511 return reverse;
5512 }
5513
SetDNALineEnd(Int4 dna_index,Int4 dna_strand)5514 static Int4 SetDNALineEnd(Int4 dna_index, Int4 dna_strand)
5515 {
5516 Int4 dna_line_end;
5517
5518 if(dna_strand != Seq_strand_minus)
5519 dna_line_end = dna_index == 0 ? 0 : dna_index -3;
5520 else
5521 dna_line_end = dna_index < 1 ? dna_index : dna_index -1;
5522
5523 return dna_line_end;
5524 }
5525
GetDigitsInINT(Int4 number)5526 static Int4 GetDigitsInINT(Int4 number)
5527 {
5528 Int4 count;
5529
5530 for(count = 1; number > 9; count++)
5531 number = number/10;
5532
5533 return count;
5534 }
5535
GetMaxFROMDigits(StdSegPtr sseg)5536 static Int4 GetMaxFROMDigits(StdSegPtr sseg)
5537 {
5538 StdSegPtr ssp, ssp_last;
5539 Int4 master_from, target_from, master_to, target_to;
5540 Int4 max_number, count;
5541
5542 master_from = SeqLocStart(sseg->loc);
5543 target_from = SeqLocStart(sseg->loc->next);
5544
5545 for(ssp_last = ssp = sseg; ssp != NULL; ssp = ssp->next)
5546 ssp_last = ssp;
5547
5548 master_to = SeqLocStop(ssp_last->loc);
5549 target_to = SeqLocStop(ssp_last->loc->next);
5550
5551 max_number = MAX(MAX(master_from, master_to),
5552 MAX(target_from, target_to));
5553
5554 count = GetDigitsInINT(max_number);
5555
5556 return count;
5557 }
5558
5559 #define WIDTH 60
OOFShowSingleAlignment(SeqAlignPtr sap,ValNodePtr mask,Int4Ptr PNTR matrix,FILE * fp)5560 static Boolean OOFShowSingleAlignment(SeqAlignPtr sap, ValNodePtr mask,
5561 Int4Ptr PNTR matrix, FILE *fp)
5562 {
5563 StdSegPtr sseg;
5564 SeqIntPtr seq_int1, seq_int2;
5565 SeqLocPtr slp1, slp2;
5566 SeqIdPtr sip1, sip2;
5567 SeqFeatPtr fake_cds;
5568 ByteStorePtr b_store = NULL;
5569 Char line1[128], line2[128], line3[128];
5570 Int4 line_index, length_dna, length_pro, length;
5571 Int4 dna_index, pro_index, dna_line_start, pro_line_start;
5572 Int4 dna_line_end, pro_line_end, dna_to, dna_from;
5573 BioseqPtr bsp;
5574 SeqPortPtr spp;
5575 Int4 i, lines, k, shift_info = 0;
5576 Char c1, c2, c3;
5577 Int4 dna_strand, max_digits, num_pad;
5578 Boolean reverse = FALSE;
5579
5580 if(sap == NULL || sap->segtype != 3) /* Should be StdSeg here! */
5581 return FALSE;
5582
5583 line_index = 0;
5584 lines = 0;
5585 dna_index =0;
5586 pro_index = 0;
5587 pro_line_end = 0;
5588 dna_line_end = 0;
5589
5590 reverse = OOFGetDNAStrand((StdSegPtr) sap->segs, &dna_strand);
5591
5592 /* Needed for printing nice alignment with normal spacing */
5593 max_digits = GetMaxFROMDigits((StdSegPtr) sap->segs);
5594
5595 for(sseg = (StdSegPtr) sap->segs; sseg != NULL; sseg= sseg->next) {
5596
5597 /* Now starting new alignment region */
5598
5599 length_dna = 0;
5600 length_pro = 0;
5601 b_store = NULL;
5602
5603 if (reverse) {
5604 slp2 = sseg->loc;
5605 slp1 = sseg->loc->next;
5606 sip2 = sseg->ids; /* Protein */
5607 sip1 = sseg->ids->next; /* DNA */
5608 } else {
5609 slp1 = sseg->loc;
5610 slp2 = sseg->loc->next;
5611 sip1 = sseg->ids; /* DNA */
5612 sip2 = sseg->ids->next; /* Protein */
5613 }
5614
5615 if(slp1->choice == SEQLOC_INT)
5616 seq_int1 = (SeqIntPtr) slp1->data.ptrvalue;
5617 else if (slp1->choice == SEQLOC_EMPTY)
5618 seq_int1 = NULL;
5619 else
5620 return FALSE; /* Invalid SeqLoc */
5621
5622
5623 if(slp2->choice == SEQLOC_INT)
5624 seq_int2 = (SeqIntPtr) slp2->data.ptrvalue;
5625 else if (slp2->choice == SEQLOC_EMPTY)
5626 seq_int2 = NULL;
5627 else
5628 return FALSE; /* Invalid SeqLoc */
5629
5630 /* Ignore double gap */
5631 if(seq_int1 == NULL && seq_int2 == NULL)
5632 continue;
5633
5634 /* printf("shift_info = %d\n", shift_info); */
5635
5636 if(shift_info%3)
5637 dna_index -= (3 - shift_info); /* adjustment for frameshift */
5638
5639 switch(shift_info) {
5640 case 1:
5641 line1[line_index] = '\\';
5642 line2[line_index] = ' ';
5643 line3[line_index] = ' ';
5644 line_index++;
5645
5646 if(line_index == WIDTH) {
5647 dna_line_end = SetDNALineEnd(dna_index, dna_strand);
5648 pro_line_end = pro_index;
5649 }
5650 case 2:
5651 line1[line_index] = '\\';
5652 line2[line_index] = ' ';
5653 line3[line_index] = ' ';
5654 line_index++;
5655
5656 if(line_index == WIDTH) {
5657 dna_line_end = SetDNALineEnd(dna_index, dna_strand);
5658 pro_line_end = pro_index;
5659 }
5660 break;
5661 case 5:
5662 line1[line_index] = '/';
5663 line2[line_index] = ' ';
5664 line3[line_index] = ' ';
5665 line_index++;
5666
5667 if(line_index == WIDTH) {
5668 dna_line_end = SetDNALineEnd(dna_index, dna_strand);
5669 pro_line_end = pro_index;
5670 }
5671
5672 case 4:
5673 line1[line_index] = '/';
5674 line2[line_index] = ' ';
5675 line3[line_index] = ' ';
5676 line_index++;
5677
5678 if(line_index == WIDTH) {
5679 dna_line_end = SetDNALineEnd(dna_index, dna_strand);
5680 pro_line_end = pro_index;
5681 }
5682
5683 break;
5684 case 0:
5685 default:
5686 break;
5687 }
5688
5689 /* Looking if any frame shift is followed next */
5690 if(seq_int1 != NULL && seq_int2 != NULL) {
5691 shift_info = (seq_int1->to - seq_int1->from + 1) -
5692 (seq_int2->to - seq_int2->from)*3;
5693 } else if(seq_int1 != NULL) {
5694 shift_info = (seq_int1->to - seq_int1->from + 1)%3 + 3;
5695 } else {
5696 shift_info = 0;
5697 }
5698
5699 if(seq_int1 != NULL) {
5700
5701 if(dna_strand != Seq_strand_minus)
5702 dna_index = seq_int1->from;
5703 else
5704 dna_index = seq_int1->to;
5705
5706 length_dna = (seq_int1->to - seq_int1->from + 1)/3;
5707 }
5708
5709 if(seq_int2 != NULL) {
5710 pro_index = seq_int2->from;
5711 length_pro = seq_int2->to - seq_int2->from + 1;
5712 }
5713
5714 if(line_index == 0) {
5715 dna_line_start = dna_index + 1;
5716 pro_line_start = pro_index + 1;
5717 }
5718
5719 if (dna_line_start == 0)
5720 dna_line_start = dna_index + 1;
5721
5722 if(pro_line_start == 0)
5723 pro_line_start = pro_index + 1;
5724
5725 if(seq_int1 != NULL) {
5726
5727 /* if(length_dna == 0) insertion
5728 continue; */
5729
5730 /* Byte store for DNA */
5731 bsp = BioseqLockById(sip1);
5732
5733
5734 dna_from = seq_int1->from;
5735 dna_to = seq_int1->to;
5736
5737 if(0 < shift_info && shift_info < 3) {
5738 if(dna_strand != Seq_strand_minus)
5739 dna_to = seq_int1->to + 3 - shift_info;
5740 else
5741 dna_from = seq_int1->from - 3 + shift_info;
5742 }
5743
5744 if(dna_from >= dna_to) {
5745 BioseqUnlock(bsp);
5746 continue;
5747 }
5748
5749 fake_cds = make_fake_cds(bsp, dna_from, dna_to,
5750 seq_int1->strand);
5751 BioseqUnlock(bsp);
5752
5753 b_store = ProteinFromCdRegionEx(fake_cds, TRUE, FALSE);
5754 SeqFeatFree(fake_cds);
5755
5756 if(b_store == NULL) {
5757 return FALSE;
5758 }
5759
5760 BSSeek(b_store, 0, SEEK_SET);
5761
5762 /* length_dna = BSLen(b_store); */
5763 }
5764
5765 if(seq_int2 != NULL) {
5766 /* Seq port for protein */
5767 bsp = BioseqLockById(sip2);
5768 spp = SeqPortNew(bsp, seq_int2->from,
5769 seq_int2->to, 0, Seq_code_ncbieaa);
5770 BioseqUnlock(bsp);
5771 } else {
5772 spp = NULL;
5773 }
5774
5775 if(length_dna == 0) length_dna = length_pro;
5776 if(length_pro == 0) length_pro = length_dna;
5777
5778 length = MAX(length_pro, length_dna);
5779 /* length = MIN(length_pro, length_dna); */
5780
5781 /* printf("length = %d\n", length); */
5782 for(i = 0; i < length; i++) {
5783
5784 if(seq_int1 != NULL) {
5785
5786 /* This line should be checked for correctness */
5787 if((line1[line_index] = BSGetByte(b_store)) == (Char)EOF)
5788 line1[line_index] = '?';
5789
5790 if(dna_strand != Seq_strand_minus)
5791 dna_index += 3;
5792 else
5793 dna_index -= 3;
5794
5795 } else {
5796 line1[line_index] = '-';
5797 }
5798
5799 if(seq_int2 != NULL) {
5800 line2[line_index] = SeqPortGetResidue(spp);
5801 pro_index++;
5802 } else {
5803 line2[line_index] = '-';
5804 }
5805
5806 if(line1[line_index] == line2[line_index])
5807 line3[line_index] = line1[line_index];
5808 else if(matrix[line1[line_index]][line2[line_index]] > 0)
5809 line3[line_index] = '+';
5810 else
5811 line3[line_index] = ' ';
5812
5813 line_index++;
5814
5815 if(line_index == WIDTH) {
5816 dna_line_end = SetDNALineEnd(dna_index, dna_strand);
5817 pro_line_end = pro_index;
5818 }
5819
5820 if(line_index > WIDTH) { /* Printout */
5821
5822 line1[line_index] = line2[line_index] = line3[line_index] = '\0';
5823 #ifdef SHOW_RULER
5824 fprintf(fp, "%5d",
5825 WIDTH*lines++);
5826
5827 for (k = 10; k <= WIDTH; k+=10)
5828 fprintf(fp, " . :");
5829 if (k-5 < WIDTH) fprintf(fp, " .");
5830 #endif
5831
5832 c1 = line1[WIDTH]; c2 = line2[WIDTH]; c3 = line3[WIDTH];
5833 line1[WIDTH] = line2[WIDTH] = line3[WIDTH] = '\0';
5834
5835 /* ------- Printout of the alignment ------------- */
5836
5837 if (reverse) {
5838 fprintf(fp, "Query: %d", pro_line_start);
5839 num_pad =
5840 max_digits - GetDigitsInINT(pro_line_start) + 1;
5841
5842 for(k=0; k < num_pad; k++)
5843 fprintf(fp, " ");
5844
5845 fprintf(fp, "%s %d\n", line2, pro_line_end);
5846
5847 num_pad = 8 + max_digits;
5848
5849 for(k=0; k < num_pad; k++)
5850 fprintf(fp, " ");
5851
5852 fprintf(fp, "%s\nSbjct: %d", line3, dna_line_start);
5853
5854 num_pad =
5855 max_digits - GetDigitsInINT(dna_line_start) + 1;
5856
5857 for(k=0; k < num_pad; k++)
5858 fprintf(fp, " ");
5859
5860 fprintf(fp, "%s %d\n\n", line1, dna_line_end+3);
5861
5862 } else {
5863 fprintf(fp, "Query: %d", dna_line_start);
5864 num_pad =
5865 max_digits - GetDigitsInINT(dna_line_start) + 1;
5866
5867 for(k=0; k < num_pad; k++)
5868 fprintf(fp, " ");
5869
5870 fprintf(fp, "%s %d\n", line1, dna_line_end+3);
5871
5872 num_pad = 8 + max_digits;
5873
5874 for(k=0; k < num_pad; k++)
5875 fprintf(fp, " ");
5876
5877 fprintf(fp, "%s\nSbjct: %d", line3, pro_line_start);
5878
5879 num_pad =
5880 max_digits - GetDigitsInINT(pro_line_start) + 1;
5881
5882 for(k=0; k < num_pad; k++)
5883 fprintf(fp, " ");
5884
5885 fprintf(fp, "%s %d\n\n", line2, pro_line_end);
5886 }
5887 /* --------------------------------------------------- */
5888
5889 if(dna_line_end != 0) {
5890
5891 if(dna_strand != Seq_strand_minus)
5892 dna_line_start = dna_line_end+4; /*takes 3 bases*/
5893 else
5894 dna_line_start = dna_line_end+2; /*takes 3 bases*/
5895 }
5896 if(pro_line_end != 0)
5897 pro_line_start = pro_line_end+1;
5898
5899 line1[WIDTH] = c1; line2[WIDTH] = c2; line3[WIDTH] = c3;
5900 strcpy(line1, &line1[WIDTH]);
5901 strcpy(line2, &line2[WIDTH]);
5902 strcpy(line3, &line3[WIDTH]);
5903 line_index = line_index - WIDTH;
5904 }
5905 }
5906
5907 SeqPortFree(spp); /* Protein SeqPort */
5908 BSFree(b_store); /* DNA Byte store */
5909 }
5910
5911 /* Printing out remaining tail ... if any */
5912 line1[line_index] = line2[line_index] = line3[line_index] = '\0';
5913
5914 #ifdef SHOW_RULER
5915 fprintf(fp, "%5d", WIDTH*lines);
5916
5917 for (k = 10; k < line_index; k+=10)
5918 fprintf(fp, " . :");
5919
5920 if (k-5 < line_index) fprintf(fp, " .");
5921 #endif
5922
5923 dna_line_end = SetDNALineEnd(dna_index, dna_strand);
5924 pro_line_end = pro_index;
5925
5926
5927 /* ------- Printout of the alignment remainder ------- */
5928 if (reverse) {
5929 fprintf(fp, "Query: %d", pro_line_start);
5930
5931 num_pad = max_digits - GetDigitsInINT(pro_line_start) + 1;
5932
5933 for(k=0; k < num_pad; k++)
5934 fprintf(fp, " ");
5935
5936 fprintf(fp, "%s %d\n", line2, pro_line_end);
5937
5938 num_pad = 8 + max_digits;
5939
5940 for(k=0; k < num_pad; k++)
5941 fprintf(fp, " ");
5942
5943 fprintf(fp, "%s\nSbjct: %d", line3, dna_line_start);
5944
5945 num_pad = max_digits - GetDigitsInINT(dna_line_start) + 1;
5946
5947 for(k=0; k < num_pad; k++)
5948 fprintf(fp, " ");
5949
5950 fprintf(fp, "%s %d\n\n\n", line1, dna_line_end+3);
5951 } else {
5952 fprintf(fp, "Query: %d", dna_line_start);
5953
5954 num_pad = max_digits - GetDigitsInINT(dna_line_start) + 1;
5955
5956 for(k=0; k < num_pad; k++)
5957 fprintf(fp, " ");
5958
5959 fprintf(fp, "%s %d\n", line1, dna_line_end+3);
5960
5961 num_pad = 8 + max_digits;
5962
5963 for(k=0; k < num_pad; k++)
5964 fprintf(fp, " ");
5965
5966 fprintf(fp, "%s\nSbjct: %d", line3, pro_line_start);
5967
5968 num_pad = max_digits - GetDigitsInINT(pro_line_start) + 1;
5969
5970 for(k=0; k < num_pad; k++)
5971 fprintf(fp, " ");
5972
5973 fprintf(fp, "%s %d\n\n\n", line2, pro_line_end);
5974 }
5975 /* --------------------------------------------------- */
5976
5977 /* fprintf(fp, "\nQuery: %-5d %s %-5d\n "
5978 "%s\nSbjct: %-5d %s %-5d\n\n",
5979 dna_line_start, line1, dna_line_end+3, line3,
5980 pro_line_start, line2, pro_line_end); */
5981
5982 return TRUE;
5983 }
5984
5985 /*******************************************************************************
5986
5987 Function : OOFShowBlastAlignment();
5988
5989 Purpose : function to display a BLAST output with Out-of-Frame
5990 information
5991
5992 Parameters : sap; seqalign
5993 mask; list of masked regions in the query
5994 fp; output file;
5995 tx_option; some display options
5996
5997 Return value : FALSE if failure
5998
5999 *******************************************************************************/
OOFShowBlastAlignment(SeqAlignPtr sap,ValNodePtr mask,FILE * fp,Uint4 tx_option,Int4Ptr PNTR matrix)6000 NLM_EXTERN Boolean OOFShowBlastAlignment(SeqAlignPtr sap, ValNodePtr mask,
6001 FILE *fp, Uint4 tx_option,
6002 Int4Ptr PNTR matrix)
6003 {
6004 SeqAlignPtr sap4;
6005 SeqIdPtr new_id = NULL, old_id = NULL;
6006 Uint4 i;
6007 Boolean bRet, follower= FALSE, matrix_loaded = FALSE;
6008
6009 if(sap == NULL || fp == NULL)
6010 return FALSE;
6011
6012 bRet = TRUE;
6013
6014 /* get the matrix */
6015
6016 if(matrix == NULL) {
6017 if((matrix = load_default_matrix()) == NULL)
6018 return FALSE;
6019 matrix_loaded = TRUE;
6020 }
6021
6022 for(sap4 = sap; sap4 != NULL; sap4 = sap4->next) {
6023
6024 /* Attempt to print score for the alignment */
6025 new_id = TxGetSubjectIdFromSeqAlign(sap4);
6026 if(old_id != NULL) {
6027 if(SeqIdMatch(new_id, old_id))
6028 follower = TRUE;
6029 }
6030
6031 old_id = new_id;
6032 if(!FormatScoreFromSeqAlignEx(sap4, tx_option, fp, matrix,
6033 follower, TRUE)){
6034 bRet=FALSE;
6035 break;
6036 }
6037
6038 follower = FALSE;
6039
6040 /*display a SeqAlign*/
6041 if (!OOFShowSingleAlignment(sap4, mask, matrix, fp)) {
6042 bRet=FALSE;
6043 break;
6044 }
6045 }
6046
6047 if (matrix_loaded){
6048 for(i = 0; i<TX_MATRIX_SIZE; ++i)
6049 MemFree(matrix[i]);
6050 MemFree(matrix);
6051 }
6052
6053 return(bRet);
6054
6055 }
6056
OOFDisplayTraceBack1(Int4Ptr a,CharPtr dna,CharPtr pro,Int4 ld,Int4 lp,Int4 q_start,Int4 p_start)6057 NLM_EXTERN void OOFDisplayTraceBack1(Int4Ptr a, CharPtr dna,
6058 CharPtr pro, Int4 ld, Int4 lp,
6059 Int4 q_start, Int4 p_start)
6060 {
6061 int len = 0, i, j, x, y, lines, k;
6062 static char line1[100], line2[100], line3[100],
6063 tmp[10] = " ", *st;
6064 char *dna1, c1, c2, c3;
6065
6066 dna1 = Malloc(ld+2);
6067 MemCpy(dna1+1, dna, ld);
6068 dna1[0] = ' '; dna1[1] = ' ';
6069
6070 line1[0] = line2[0] = line3[0] = '\0'; x= q_start; y = p_start;
6071 printf("dna=%d pro=%d\n", y, x);
6072
6073 for (len = 0, j = 0, lines = 0; x < lp && y < ld; j++) {
6074 i = a[j];
6075 switch(i) {
6076 case 0:
6077 line1[len] = '-';
6078 line3[len] = ' ';
6079 line2[len++] = pro[x++];
6080 break;
6081 case 1:
6082 case 5:
6083 if (i == 1) line1[len] = '\\';
6084 else line1[len] = '/';
6085 line2[len] = line3[len] = ' ';
6086 len++;
6087 case 2:
6088 case 4:
6089 if (i < 3) line1[len] = '\\';
6090 else line1[len] = '/';
6091 line2[len] = line3[len] = ' ';
6092 len++;
6093 case 3:
6094 line1[len] = dna1[y+i-2]; y+= i;
6095 line2[len] = pro[x++];
6096 if (line1[len] == line2[len]) line3[len++] = '|';
6097 else line3[len++] = ' ';
6098 break;
6099 case 6:
6100 line1[len] = dna1[y+1]; y+= 3;
6101 line2[len] = '-';
6102 line3[len++] = ' ';
6103 }
6104 if (len >= WIDTH) {
6105 line1[len] = line2[len] = line3[len] = '\0';
6106 printf("\n%5d", WIDTH*lines++);
6107 for (k = 10; k <= WIDTH; k+=10)
6108 printf(" . :");
6109 if (k-5 < WIDTH) printf(" .");
6110 c1 = line1[WIDTH]; c2 = line2[WIDTH]; c3 = line3[WIDTH];
6111 line1[WIDTH] = line2[WIDTH] = line3[WIDTH] = '\0';
6112 printf("\n %s\n %s\n %s\n", line1, line3, line2);
6113 line1[WIDTH] = c1; line2[WIDTH] = c2; line3[WIDTH] = c3;
6114 strcpy(line1, &line1[WIDTH]);
6115 strcpy(line2, &line2[WIDTH]);
6116 strcpy(line3, &line3[WIDTH]);
6117 len = len - WIDTH;
6118 }
6119 }
6120 printf("\n%5d", WIDTH*lines);
6121 line1[len] = line2[len] = line3[len] = '\0';
6122 for (k = 10; k < len; k+=10)
6123 printf(" . :");
6124 if (k-5 < len) printf(" .");
6125 printf("\n %s\n %s\n %s\n", line1, line3, line2);
6126
6127 MemFree(dna1);
6128
6129 return;
6130 }
OOFDisplayTraceBack2(Int4Ptr a,CharPtr dna,CharPtr pro,Int4 ld,Int4 lp,Int4 q_start,Int4 p_start)6131 NLM_EXTERN void OOFDisplayTraceBack2(Int4Ptr a, CharPtr dna, CharPtr pro,
6132 Int4 ld, Int4 lp,
6133 Int4 q_start, Int4 p_start)
6134 {
6135 int len = 0, i, j, x, y, lines, k;
6136 static char line1[100], line2[100], line3[100],
6137 tmp[10] = " ", *st;
6138 char *dna1, c1, c2, c3;
6139
6140 dna1 = Malloc(ld+2);
6141 printf("%d %d\n", q_start, p_start); /* Why does this go to stdout?? */
6142
6143 MemCpy(dna1+1, dna, ld);
6144 dna1[0] = ' '; dna1[1] = ' ';
6145
6146 line1[0] = line2[0] = line3[0] = '\0';
6147 x= q_start;
6148 y = p_start;
6149
6150 for (len = 0, j = 0, lines = 0; x < lp && y < ld; j++) {
6151 i = a[j];
6152 /*printf("%d %d %d\n", i, len, b->j);*/
6153 if (i > 0 && i < 6) {
6154 if (i == 1) {
6155 tmp[0] = pro[x++];
6156 len--;
6157 y--;
6158 i++;
6159 } else tmp[i-2] = pro[x++];
6160 }
6161 if (i == 6) {
6162 i = 3; tmp[0] = tmp[1] = tmp[2] = '-';
6163 if (a[j+1] == 2) tmp[2] = ' ';
6164 }
6165 if (i > 0) {
6166 strncpy(&line1[len], &dna1[y], i); y+=i;
6167 } else {line1[len] = '-'; i = 1; tmp[0] = pro[x++];}
6168 strncpy(&line2[len], tmp, i);
6169 for (k = 0; k < i; k++) {
6170 if (tmp[k] != ' ' && tmp[k] != '-') {
6171 if (k >= 2) tmp[k] = '\\';
6172 else if (k == 1) tmp[k] = '|';
6173 else tmp[k] = '/';
6174 } else tmp[k] = ' ';
6175 }
6176 if (i == 1) tmp[0] = ' ';
6177 strncpy(&line3[len], tmp, i);
6178 tmp[0] = tmp[1] = tmp[2] = ' ';
6179 len += i;
6180 line1[len] = line2[len] =line3[len] = '\0';
6181 if (len >= WIDTH) {
6182 printf("\n%5d", WIDTH*lines++);
6183 for (k = 10; k <= WIDTH; k+=10)
6184 printf(" . :");
6185 if (k-5 < WIDTH) printf(" .");
6186 c1 = line1[WIDTH]; c2 = line2[WIDTH]; c3 = line3[WIDTH];
6187 line1[WIDTH] = line2[WIDTH] = line3[WIDTH] = '\0';
6188 printf("\n %s\n %s\n %s\n", line1, line3, line2);
6189 line1[WIDTH] = c1; line2[WIDTH] = c2; line3[WIDTH] = c3;
6190 strcpy(line1, &line1[WIDTH]);
6191 strcpy(line2, &line2[WIDTH]);
6192 strcpy(line3, &line3[WIDTH]);
6193 len = len - WIDTH;
6194 }
6195 }
6196 printf("\n%5d", WIDTH*lines);
6197 for (k = 10; k < len; k+=10)
6198 printf(" . :");
6199 if (k-5 < len) printf(" .");
6200 printf("\n %s\n %s\n %s\n", line1, line3, line2);
6201 dna1 = MemFree(dna1);
6202 }
6203
RDBTaxNamesFree(RDBTaxNamesPtr tnames)6204 void RDBTaxNamesFree(RDBTaxNamesPtr tnames)
6205 {
6206 if (tnames == NULL)
6207 return;
6208
6209 if (tnames->sci_name != NULL)
6210 MemFree(tnames->sci_name);
6211 if (tnames->common_name != NULL)
6212 MemFree(tnames->common_name);
6213 if (tnames->blast_name != NULL)
6214 MemFree(tnames->blast_name);
6215 MemFree(tnames);
6216
6217 return;
6218 }
6219
RDBTaxNamesClone(RDBTaxNamesPtr orig)6220 RDBTaxNamesPtr RDBTaxNamesClone(RDBTaxNamesPtr orig)
6221 {
6222 RDBTaxNamesPtr retval = NULL;
6223
6224 if (!orig)
6225 return retval;
6226
6227 if ((retval = (RDBTaxNamesPtr) MemNew(sizeof(RDBTaxNames))) != NULL) {
6228 retval->tax_id = orig->tax_id;
6229 retval->sci_name = StringSave(orig->sci_name);
6230 retval->common_name = StringSave(orig->common_name);
6231 retval->blast_name = StringSave(orig->blast_name);
6232 StringCpy(retval->s_king, orig->s_king);
6233 }
6234 return retval;
6235 }
6236
FDGetTaxNamesFromBioseq(BioseqPtr bsp,Int4 taxid)6237 RDBTaxNamesPtr FDGetTaxNamesFromBioseq(BioseqPtr bsp, Int4 taxid)
6238 {
6239 BlastDefLinePtr bdsp;
6240 SeqDescrPtr sdp;
6241 UserFieldPtr ufp;
6242 UserObjectPtr uop;
6243 ObjectIdPtr oidp;
6244 CharPtr PNTR cpp;
6245 RDBTaxNamesPtr tnames = NULL;
6246
6247 bdsp = NULL;
6248 for(sdp = bsp->descr; sdp != NULL; sdp = sdp->next) {
6249
6250 if(sdp->choice == 14) { /* User object */
6251
6252 uop = sdp->data.ptrvalue;
6253 oidp = uop->type;
6254
6255 if(!StringCmp(oidp->str, TAX_DATA_OBJ_LABEL)) {
6256 for(ufp = uop->data; ufp != NULL; ufp = ufp->next) {
6257 oidp = (ObjectIdPtr) ufp->label;
6258 if(oidp->id == taxid) {
6259 cpp = ufp->data.ptrvalue;
6260 tnames = (RDBTaxNamesPtr) MemNew(sizeof(RDBTaxNames));
6261 tnames->tax_id = taxid;
6262 tnames->sci_name =StringSave(cpp[SCI_NAME_POS]);
6263 tnames->common_name = StringSave(cpp[COMMON_NAME_POS]);
6264 tnames->blast_name = StringSave(cpp[BLAST_NAME_POS]);
6265 StringCpy(tnames->s_king, cpp[S_KING_POS]);
6266 }
6267 }
6268 }
6269 }
6270 }
6271
6272 /* If there is no this user object in the Bioseq returned pointer will
6273 be NULL */
6274
6275 return tnames;
6276 }
6277
FDGetDeflineAsnFromBioseq(BioseqPtr bsp)6278 BlastDefLinePtr FDGetDeflineAsnFromBioseq(BioseqPtr bsp)
6279 {
6280 BlastDefLinePtr bdsp;
6281 AsnIoMemPtr aimp;
6282 Int4 length;
6283 ByteStorePtr bstorep;
6284 ByteStorePtr PNTR bspp;
6285 SeqDescrPtr sdp;
6286 UserFieldPtr ufp;
6287 UserObjectPtr uop;
6288 ObjectIdPtr oidp;
6289 Uint1Ptr buffer;
6290
6291 bdsp = NULL;
6292 for(sdp = bsp->descr; sdp != NULL; sdp = sdp->next) {
6293
6294 if(sdp->choice == 14) { /* User object */
6295
6296 uop = sdp->data.ptrvalue;
6297 oidp = uop->type;
6298
6299 if(!StringCmp(oidp->str, ASN_DEFLINE_OBJ_LABEL)) {
6300 ufp = uop->data;
6301 bspp = ufp->data.ptrvalue;
6302 bstorep = bspp[0];
6303 BSSeek(bstorep, 0, SEEK_SET);
6304 length = BSLen(bstorep);
6305 buffer = MemNew(length+1);
6306 BSRead(bstorep, buffer, length);
6307 aimp = AsnIoMemOpen("rb", buffer, length);
6308 bdsp = (BlastDefLinePtr)
6309 BlastDefLineSetAsnRead(aimp->aip, NULL);
6310 AsnIoMemClose(aimp);
6311 MemFree(buffer);
6312 }
6313 }
6314 }
6315
6316 /* If there is no this user object in the Bioseq returned pointer will
6317 be NULL */
6318
6319 return bdsp;
6320 }
6321
PairwiseSeqAlignHasLinkout(SeqAlignPtr sap,Uint1 linkoutType)6322 Boolean PairwiseSeqAlignHasLinkout(SeqAlignPtr sap, Uint1 linkoutType){
6323 Boolean status=FALSE;
6324 SeqAlignPtr temp;
6325
6326 temp=sap;
6327 while(temp&&!status){
6328 SeqIdPtr sip=TxGetSubjectIdFromSeqAlign(temp);
6329 BioseqPtr bsp=BioseqLockById(sip);
6330 BlastDefLinePtr bdlp, bdlptemp;
6331 if(bsp){
6332 bdlp=FDGetDeflineAsnFromBioseq(bsp);
6333 if(bdlp){
6334 bdlptemp=bdlp;
6335 while(bdlptemp&&!status){
6336 if(checkLinkoutType(bdlptemp, linkoutType)){
6337 status=TRUE;
6338 }
6339 bdlptemp=bdlptemp->next;
6340 }
6341
6342 BlastDefLineSetFree(bdlp);
6343 }
6344 BioseqUnlockById(sip);
6345 }
6346 temp=temp->next;
6347
6348 }
6349 return status;
6350 }
6351