1 /* =========================================================================== 2 * 3 * PUBLIC DOMAIN NOTICE 4 * National Center for Biotechnology Information (NCBI) 5 * 6 * This software/database is a "United States Government Work" under the 7 * terms of the United States Copyright Act. It was written as part of 8 * the author's official duties as a United States Government employee and 9 * thus cannot be copyrighted. This software/database is freely available 10 * to the public for use. The National Library of Medicine and the U.S. 11 * Government do not place any restriction on its use or reproduction. 12 * We would, however, appreciate having the NCBI and the author cited in 13 * any work or product based on this material. 14 * 15 * Although all reasonable efforts have been taken to ensure the accuracy 16 * and reliability of the software and data, the NLM and the U.S. 17 * Government do not and cannot warrant the performance or results that 18 * may be obtained by using this software or data. The NLM and the U.S. 19 * Government disclaim all warranties, express or implied, including 20 * warranties of performance, merchantability or fitness for any particular 21 * purpose. 22 23 * =========================================================================== 24 * 25 * File Name: dotseq.h 26 * 27 * Author: Fasika Aklilu 28 * 29 * Version Creation Date: 8/9/01 30 * 31 * $Revision: 6.4 $ 32 * 33 * File Description: computes local alignments for dot matrix 34 * 35 * Modifications: 36 * -------------------------------------------------------------------------- 37 * Date Name Description of modification 38 * ------- ---------- ----------------------------------------------------- 39 40 $Revision: 6.4 $ 41 $Log: dotseq.h,v $ 42 Revision 6.4 2001/08/09 16:33:18 aklilu 43 added revision 44 45 Revision 6.3 2000/07/26 18:23:10 sicotte 46 added DOT_SPI_FindBestAlnByDotPlotEx, to return rejected alignments 47 48 49 */ 50 51 #ifndef _DOTSEQ_ 52 #define _DOTSEQ_ 53 54 #ifdef __cplusplus 55 extern "C" { 56 #endif 57 58 /**************************************************************************** 59 60 INCLUDE SECTION 61 ***************************************************************************/ 62 63 #include <tofasta.h> 64 #include <seqport.h> 65 #include <sequtil.h> 66 #include <sqnutils.h> 67 #include <blastpri.h> 68 #include <explore.h> 69 #include <seqmgr.h> 70 #include <lookup.h> 71 #include <jsavlt.h> 72 73 /**************************************************************************** 74 75 DEFINES SECTION 76 ***************************************************************************/ 77 78 79 #define UNDEFINED 25 /* amino acids undefined in BLOSUM62 */ 80 #define MAX_TRIM 200 81 82 83 84 85 /**************************************************************************** 86 87 DATA STRUCTURE SECTION 88 ***************************************************************************/ 89 90 91 typedef struct hs_diag { 92 Int4 q_start; /* left most value on the graph */ 93 Int4 s_start; /* left most value on the graph */ 94 Int4 length; 95 Int4 score; 96 Int4 rdmKey; 97 } DOTDiag, PNTR DOTDiagPtr; 98 99 100 101 /* coordinates of old diags -used in history binary tree */ 102 103 typedef struct hist { 104 Int4 diag_constant; 105 Int4 q_stop; 106 } DOTHist, PNTR DOTHistPtr; 107 108 109 110 /* main struct */ 111 112 typedef struct mainseqinfo { 113 Int4Ptr PNTR matrix; /* dna matrix */ 114 Int2 maxscore; /* highest matrix score */ 115 Int2 minscore;/* lowest matrix score */ 116 Boolean is_na; 117 Int4 qlen; /* length of query sequence */ 118 Int4 slen; /* length of subject sequence */ 119 Uint1 qstrand; /* strand of query */ 120 Uint1 sstrand; /* strand of subject */ 121 BioseqPtr qbsp; /* query bioseq */ 122 BioseqPtr sbsp; /* subject bioseq */ 123 SeqLocPtr qslp; /* query seqloc pointer */ 124 SeqLocPtr sslp; /* subject seqloc pointer */ 125 Int4 q_start; /* left position on query bioseq */ 126 Int4 q_stop; /* right position on query bioseq */ 127 Int4 s_start; /* left position on subject bioseq */ 128 Int4 s_stop;/* right position on subject bioseq */ 129 Uint1Ptr qseq; /* query sequence buffer */ 130 Uint1Ptr sseq; /* subject sequence buffer */ 131 CharPtr qname; /* query accession */ 132 CharPtr sname;/* subject accession */ 133 /* hash value */ 134 Int4 word_size; /* size of hash table index */ 135 /* binary tree data */ 136 Int4 cutoff_score; /* cutoff to store hits */ 137 Avl_TreePtr tree; /* binary tree for collecting hits */ 138 Boolean first_pass; 139 Int4Ptr score_array;/* array by score for threshold ramp */ 140 Int4 unique; /* binary tree variable */ 141 Int4 tree_limit; /* upper limit for size of binary tree */ 142 /* sorted diag data */ 143 DOTDiagPtr PNTR hitlist; /* dotseq output data -- array of hits */ 144 Int4 index; /* total number of stored hits in array */ 145 } DOTMainData, PNTR DOTMainDataPtr; 146 147 148 typedef struct dotdata { 149 Uint1 xstrand; 150 Uint1 ystrand; 151 Int4 xstart; 152 Int4 xstop; 153 Int4 ystart; 154 Int4 ystop; 155 Int4 index; 156 DOTDiagPtr PNTR hitlist; 157 } DOTData, PNTR DOTDataPtr; 158 159 /* information for history binary tree */ 160 161 typedef struct info{ 162 DOTMainDataPtr mip; 163 Uint1Ptr qseq; 164 Uint1Ptr sseq; 165 Int4 q_pos; 166 Int4 s_pos; 167 Int4 wordsize; 168 Avl_TreePtr tree; 169 Boolean first_pass; 170 } DOTInfo, PNTR DOTInfoPtr; 171 172 173 174 /**************************************************************************** 175 176 FUNCTION DECLARATIONS 177 ***************************************************************************/ 178 179 /* Function: Compute all matches between two sequences. Input: 2 bioseqptrs. 180 Returns: DOTMainDataPtr with hitlist structure with start/stops in bioseq coordinates 181 */ 182 DOTMainDataPtr DOT_CreateAndStore (DOTMainDataPtr mip, BioseqPtr qbsp, BioseqPtr sbsp, Int4 q_start, Int4 q_stop, Int4 s_start, Int4 s_stop, Int4 word_size, Int4 tree_limit, Boolean initialize); 183 /* Function: Compute all matches between two sequences. Input: 2 seqlocptrs (can specify plus or minus strand in slp). 184 Returns: Filled DOTMainDataPtr with hitlist structure with start/stops in bioseq coordinates 185 */ 186 DOTMainDataPtr DOT_CreateAndStorebyLoc (SeqLocPtr slp1, SeqLocPtr slp2, Int4 word_size, Int4 tree_limit); 187 Int2 DOT_BuildHitList(DOTMainDataPtr mip, Boolean do_sort, Boolean do_countscore); 188 Boolean DOT_GetSeqs (DOTMainDataPtr mip, Boolean is_zoom); 189 Int2 DOT_FreeMainInfo(DOTMainDataPtr mip); 190 Int2 DOT_FreeMainInfoPtrEx (DOTMainDataPtr mip); 191 Int2 DOT_FreeHitsArray (DOTDiagPtr PNTR hitlist, Int4 index); 192 Boolean DOT_GetSeqs (DOTMainDataPtr mip, Boolean is_zoom); 193 extern DOTMainDataPtr DOT_InitMainInfo (DOTMainDataPtr mip, BioseqPtr qbsp, BioseqPtr sbsp, Int4 word_size, Int4 tree_limit, Int4 qstart, Int4 qstop, Int4 sstart, Int4 sstop); 194 SeqAlignPtr DOT_SPI_FindBestAlnByDotPlot(SeqLocPtr slp1, SeqLocPtr slp2, Int4 wordsize, Int4 num_hits); 195 extern Uint2 DOT_AttachSeqAnnotToSeqEntry (Uint2 entityID, SeqAnnotPtr sap, BioseqPtr bsp); 196 extern Int4Ptr PNTR DOT_DNAScoringMatrix(Int4 mismatch, Int4 reward,Int4 alsize); 197 198 #ifdef __cplusplus 199 } 200 #endif 201 202 #endif /* ndef _DOTSEQ_ */ 203