1 /* ===========================================================================
2 *
3 *                            PUBLIC DOMAIN NOTICE
4 *            National Center for Biotechnology Information (NCBI)
5 *
6 *  This software/database is a "United States Government Work" under the
7 *  terms of the United States Copyright Act.  It was written as part of
8 *  the author's official duties as a United States Government employee and
9 *  thus cannot be copyrighted.  This software/database is freely available
10 *  to the public for use. The National Library of Medicine and the U.S.
11 *  Government do not place any restriction on its use or reproduction.
12 *  We would, however, appreciate having the NCBI and the author cited in
13 *  any work or product based on this material.
14 *
15 *  Although all reasonable efforts have been taken to ensure the accuracy
16 *  and reliability of the software and data, the NLM and the U.S.
17 *  Government do not and cannot warrant the performance or results that
18 *  may be obtained by using this software or data. The NLM and the U.S.
19 *  Government disclaim all warranties, express or implied, including
20 *  warranties of performance, merchantability or fitness for any particular
21 *  purpose.
22 
23 * ===========================================================================
24 *
25 * File Name:  dotseq.h
26 *
27 * Author:  Fasika Aklilu
28 *
29 * Version Creation Date:   8/9/01
30 *
31 * $Revision: 6.4 $
32 *
33 * File Description: computes local alignments for dot matrix
34 *
35 * Modifications:
36 * --------------------------------------------------------------------------
37 * Date     Name        Description of modification
38 * -------  ----------  -----------------------------------------------------
39 
40 $Revision: 6.4 $
41 $Log: dotseq.h,v $
42 Revision 6.4  2001/08/09 16:33:18  aklilu
43 added revision
44 
45 Revision 6.3  2000/07/26 18:23:10  sicotte
46 added DOT_SPI_FindBestAlnByDotPlotEx, to return rejected alignments
47 
48 
49 */
50 
51 #ifndef _DOTSEQ_
52 #define _DOTSEQ_
53 
54 #ifdef __cplusplus
55 extern "C" {
56 #endif
57 
58   /****************************************************************************
59 
60       INCLUDE SECTION
61    ***************************************************************************/
62 
63 #include <tofasta.h>
64 #include <seqport.h>
65 #include <sequtil.h>
66 #include <sqnutils.h>
67 #include <blastpri.h>
68 #include <explore.h>
69 #include <seqmgr.h>
70 #include <lookup.h>
71 #include <jsavlt.h>
72 
73  /****************************************************************************
74 
75       DEFINES SECTION
76  ***************************************************************************/
77 
78 
79 #define UNDEFINED  25     /* amino acids undefined in BLOSUM62 */
80 #define MAX_TRIM   200
81 
82 
83 
84 
85  /****************************************************************************
86 
87       DATA STRUCTURE SECTION
88  ***************************************************************************/
89 
90 
91   typedef struct hs_diag {
92     Int4 q_start; /* left most value on the graph */
93     Int4 s_start; /* left most value on the graph */
94     Int4 length;
95     Int4 score;
96     Int4 rdmKey;
97   } DOTDiag, PNTR DOTDiagPtr;
98 
99 
100 
101   /* coordinates of old diags -used in history binary tree */
102 
103   typedef struct hist {
104     Int4    diag_constant;
105     Int4    q_stop;
106   } DOTHist, PNTR DOTHistPtr;
107 
108 
109 
110   /* main struct */
111 
112   typedef struct mainseqinfo {
113     Int4Ptr  PNTR matrix; /* dna matrix */
114     Int2     maxscore; /* highest matrix score */
115     Int2     minscore;/* lowest matrix score */
116     Boolean     is_na;
117     Int4        qlen; /* length of query sequence */
118     Int4        slen; /* length of subject sequence */
119     Uint1       qstrand; /* strand of query */
120     Uint1       sstrand;  /* strand of subject */
121     BioseqPtr   qbsp; /* query bioseq */
122     BioseqPtr   sbsp; /* subject bioseq */
123     SeqLocPtr   qslp; /* query seqloc pointer */
124     SeqLocPtr   sslp; /* subject seqloc pointer */
125     Int4        q_start;  /* left position on query bioseq */
126     Int4        q_stop;   /* right position on query bioseq */
127     Int4        s_start;  /* left position on subject bioseq */
128     Int4        s_stop;/* right position on subject bioseq */
129     Uint1Ptr    qseq; /* query sequence buffer */
130     Uint1Ptr    sseq; /* subject sequence buffer */
131     CharPtr     qname; /* query accession */
132     CharPtr     sname;/* subject accession */
133     /* hash value */
134     Int4        word_size; /* size of hash table index */
135     /* binary tree data */
136     Int4        cutoff_score; /* cutoff to store hits */
137     Avl_TreePtr tree; /* binary tree for collecting hits */
138     Boolean     first_pass;
139     Int4Ptr     score_array;/* array by score for threshold ramp */
140     Int4        unique; /* binary tree variable */
141     Int4        tree_limit; /* upper limit for size of binary tree */
142     /* sorted diag data */
143     DOTDiagPtr  PNTR  hitlist; /* dotseq output data -- array of hits */
144     Int4        index; /* total number of stored hits in array */
145   } DOTMainData, PNTR DOTMainDataPtr;
146 
147 
148   typedef struct dotdata {
149     Uint1 xstrand;
150     Uint1 ystrand;
151     Int4  xstart;
152     Int4  xstop;
153     Int4  ystart;
154     Int4  ystop;
155     Int4  index;
156     DOTDiagPtr    PNTR  hitlist;
157   } DOTData, PNTR DOTDataPtr;
158 
159   /* information for history binary tree */
160 
161   typedef struct info{
162     DOTMainDataPtr  mip;
163     Uint1Ptr    qseq;
164     Uint1Ptr    sseq;
165     Int4        q_pos;
166     Int4        s_pos;
167     Int4        wordsize;
168     Avl_TreePtr tree;
169     Boolean     first_pass;
170   } DOTInfo, PNTR  DOTInfoPtr;
171 
172 
173 
174  /****************************************************************************
175 
176       FUNCTION DECLARATIONS
177  ***************************************************************************/
178 
179   /* Function: Compute all matches between two sequences. Input: 2 bioseqptrs.
180      Returns: DOTMainDataPtr with hitlist structure with start/stops in bioseq coordinates
181    */
182 DOTMainDataPtr DOT_CreateAndStore (DOTMainDataPtr mip, BioseqPtr qbsp, BioseqPtr sbsp, Int4 q_start, Int4 q_stop, Int4 s_start, Int4 s_stop, Int4 word_size, Int4 tree_limit, Boolean initialize);
183   /* Function: Compute all matches between two sequences. Input: 2 seqlocptrs (can specify plus or minus strand in slp).
184      Returns: Filled DOTMainDataPtr with hitlist structure with start/stops in bioseq coordinates
185   */
186 DOTMainDataPtr DOT_CreateAndStorebyLoc (SeqLocPtr slp1, SeqLocPtr slp2, Int4 word_size, Int4 tree_limit);
187 Int2 DOT_BuildHitList(DOTMainDataPtr mip, Boolean do_sort, Boolean do_countscore);
188 Boolean DOT_GetSeqs (DOTMainDataPtr mip, Boolean is_zoom);
189 Int2 DOT_FreeMainInfo(DOTMainDataPtr mip);
190 Int2 DOT_FreeMainInfoPtrEx (DOTMainDataPtr mip);
191 Int2 DOT_FreeHitsArray (DOTDiagPtr PNTR hitlist, Int4 index);
192 Boolean DOT_GetSeqs (DOTMainDataPtr mip, Boolean is_zoom);
193 extern DOTMainDataPtr DOT_InitMainInfo (DOTMainDataPtr mip, BioseqPtr qbsp, BioseqPtr sbsp, Int4 word_size, Int4 tree_limit, Int4 qstart, Int4 qstop, Int4 sstart, Int4 sstop);
194 SeqAlignPtr DOT_SPI_FindBestAlnByDotPlot(SeqLocPtr slp1, SeqLocPtr slp2, Int4 wordsize, Int4 num_hits);
195 extern Uint2 DOT_AttachSeqAnnotToSeqEntry (Uint2 entityID, SeqAnnotPtr sap, BioseqPtr bsp);
196 extern Int4Ptr PNTR DOT_DNAScoringMatrix(Int4 mismatch, Int4 reward,Int4 alsize);
197 
198 #ifdef __cplusplus
199 }
200 #endif
201 
202 #endif /* ndef _DOTSEQ_ */
203