1 
2 /* $Id: param.h 1233 2013-10-08 18:26:31Z wrp $ */
3 /* $Revision: 1233 $  */
4 
5 #include <sys/types.h>
6 
7 #ifndef P_STRUCT
8 #define P_STRUCT
9 
10 #define MAXSQ 60
11 
12 /* Concurrent read version */
13 
14 struct fastr {
15   int ktup;
16   int cgap;
17   int pgap;
18   int pamfact;
19   int scfact;
20   /* these values will soon be abandoned */
21   int bestoff;
22   int bestscale;
23   int bkfact;
24   int bktup;
25   int bestmax;
26   /* statistics based scaling values */
27   int use_E_thresholds;
28   double E_join, E_band_opt;
29   int altflag;
30   int optflag;
31   int iniflag;
32   int optcut;
33   int optcut_set;
34   int optwid;
35   int optwid_set;
36 };
37 
38 struct prostr {
39     int gopen;
40     int gextend;
41     int width;
42 };
43 
44 /* must be identical in thr_bufs.h */
45 struct score_count_s {
46   long s_cnt[3];
47   long tot_scores;
48 };
49 
50 struct pstruct		/* parameters */
51 {
52   int n0;	/* length of query sequence, used for statistics */
53   int gdelval;	/* value gap open (-10) */
54   int ggapval;	/* value for additional residues in gap (-2) */
55   int gshift;	/* frameshift for fastx, fasty */
56   int gsubs;	/* nt substitution in fasty */
57   int p_d_mat;	/* dna match penalty */
58   int p_d_mis;	/* dna mismatch penalty */
59   int p_d_set;	/* using match/mismatch */
60   int n1_low;
61   int n1_high;	/* sequence length limits */
62   int score_ix;	/* index to sorted score */
63   int show_ident;	/* flag - show identical lalign alignment */
64   int nseq;	/* number of different sequences (for lalign) */
65   int zsflag;	/* use scalebest() */
66   int zsflag2;	/* statistics for best shuffle */
67   int zsflag_f;	/* use scalebest() */
68   int zs_win;	/* window shuffle size */
69   int shuffle_dna3;	/* shuffle dna as codons */
70   int histint;		/* histogram interval */
71   unsigned char sq[MAXSQ+1];
72   int hsq[MAXSQ+1];
73   int nsq;		/* length of normal sq */
74   /* int pamh1[MAXSQ+1]; */	/* identical match score (diagonal scores) */
75   /* int *pamh2[MAXSQ+1]; */	/* ktup match score */
76   int ext_sq_set;	/* flag for using extended alphabet */
77   unsigned char sqx[MAXSQ+1];
78   int hsqx[MAXSQ+1];
79   int c_nt[MAXSQ+1];
80   int nsqx;	/* length of extended sq */
81   int nsq_e;	/* effective nsq */
82   int dnaseq;	/* -1 = not set (protein); 0 = protein; 1 = DNA; 2 = other, 3 RNA */
83   int nt_align;	/* DNA/RNA alignment = 1 */
84   int debug_lib;
85   int tr_type;	/* codon table */
86   int sw_flag;
87   char pamfile[MAX_FN];	/* pam file name */
88   char pamfile_save[MAX_FN];  /* original pam file */
89   char pam_name[MAX_FN];
90   char pgpfile[MAX_FN];
91   int pgpfile_type;
92   float pamscale;	/* ln(2)/3 or ln(2)/2 */
93   float ulambda;	/* ungapped lambda */
94   float entropy;	/* bits/position */
95   float tfract_id;	/* target fraction id */
96   int pam_pssm;
97   int pam_set;
98   int pam_variable;
99   int have_pam2;
100   int **pam2[2];	/* set of 2D scoring matrices; [0] lower-case 'x', [1] upper/lower case */
101   int **pam2p[2];
102   int pamoff;	/* offset for pam values */
103   int pam_l, pam_h, pam_xx, pam_xm;	/* lowest, highest pam value */
104   int pam_x_set;
105   int pam_x_id_sim;	/* =0 -> 'N,X' identical but not similar;
106 			   =1 -> 'N,X' identical+similar;
107 			   <0 -> 'N,X' not identical, not similar */
108   int pam_ms;		/* use a Mass Spec pam matrix */
109   void *fp_struct;	/* function specific parameters based on algorith/scoring matrix */
110   int LK_set;
111   double pLambda, pK, pH;	/* Karlin-Altscul parameters */
112   int maxlen;
113   int max_repeat;	/* used for repeat count in ssearch34/lalign */
114   int repeat_thresh;
115   char *other_info;
116   double e_cut;		/* cutoff for scores */
117   double e_cut_r; 	/* cutoff for multiple local alignments */
118   double zs_off;	/* z-score offset from sampling */
119   int do_rep;		/* enable multiple alignments */
120   int can_pre_align;	/* flag for have_ares & 0x1 pre-alignments */
121   long zdb_size; 	/* force database size */
122   int zdb_size_set;	/* flag for user -Z */
123   int pgm_id;
124   int pseudocts;
125   int shuff_node;
126   union {
127     struct fastr fa;
128     struct prostr pr;
129   } param_u;
130 };
131 
132 #include "rstruct.h"
133 
134 /* the seq_record has all the invariant data about a sequence -
135    sequence length, libstr, sequence itself, etc.
136    it does not have the results information
137    we can have 1, 2, or 6 (obsolete tfasta) results records for a sequence,
138    but there will still be only one sequence record.
139 */
140 
141 struct annot_str {
142   /* information for conventional annotations */
143   unsigned char *aa1_ann;	/* annotation string */
144   /* information for "rich" annotations */
145   int n_annot;	/* length of ann_arr_str array */
146   int n_domains; 	/* length of domain_arr_p array */
147   struct annot_entry *annot_arr_p;	/* array[n_annot] of annot_entry's for all annotations */
148   struct annot_entry **s_annot_arr_p;	/* sorted version of annots */
149 };
150 
151 /* ann_str keeps information on "rich" annotations, position, type, value */
152 struct annot_entry {
153   long pos;
154   long end;
155   char label;	/* currently -V *#%!@ symbols, plus 'V' for variant */
156   unsigned char value;	/* must be amino acid residue, binary encoded */
157   char *comment;
158   int target;	 /* 0 for query/ 1 for library */
159 };
160 
161 /* domain_str keeps information on "rich" annotations, position, type, value */
162 struct domfeat_data {
163   struct annot_entry *annot_entry_p;
164   struct domfeat_data *next;
165   long pos;	/* annotation position */
166   long a_pos;	/* aligned annotation position */
167   long end_pos;	/* domain annotation end */
168   int score;	/* score of current region */
169   int n_ident;	/* count for percent id */
170   int n_alen; 	/* align len for percent id */
171 };
172 
173 /* seq_record has the data required to do a calculation */
174 struct seq_record {
175   unsigned char *aa1b;		/* sequence buffer */
176   struct annot_str *annot_p;
177   int n1;
178   long l_offset;	/* q_offset/l_offset set outside getlib() based on chunks; 0-based */
179   long l_off;		/* q_off/l_off comes from @C:123, and is 1-based */
180   int index;		/* index in search */
181 #ifdef DEBUG
182   long adler32_crc;
183 #endif
184 };
185 
186 /* mseq_record has meta data not required to calculate score or alignment */
187 struct mseq_record {
188   int *n1tot_p;
189 #ifdef USE_FSEEKO
190   off_t lseek;
191 #else
192   long lseek;
193 #endif
194   struct lmf_str *m_file_p;
195   int cont;
196   char libstr[MAX_UID];
197   char *bline;
198   int bline_max;
199   int annot_req_flag;
200   int index		/* index in search */;
201 #ifdef DEBUG
202   long adler32_crc;
203 #endif
204 };
205 
206 struct seqr_chain {
207   struct seq_record *seqr_base;
208   struct mseq_record *mseqr_base;
209   struct seqr_chain *next;
210   /*   struct lib_seq_info *ldb_info; */
211   int max_chain_seqs;
212   int cur_seq_cnt;
213   unsigned char *aa1b_base;
214   int aa1b_size;
215   int aa1b_next;
216   int contiguous;
217 };
218 
219 struct getlib_str {
220   int lcont;		/* lcont save */
221   int ocont;		/* ocont save */
222   int eof;		/* done with this file */
223 #ifdef USE_FSEEKO
224   off_t lseek;
225 #else
226   long lseek;
227 #endif
228   long loffset;		/* loffset save */
229   char libstr[MAX_UID];	/* repository for libstr */
230   int n_libstr;		/* length of libstr */
231   unsigned char *aa1save;	/* overlapping sequence save */
232   struct lib_struct *lib_list_p;
233   int *n1tot_ptr, *n1tot_cur;
234   int n1tot_cnt;
235   int n1tot_v;
236   long tot_memK;	/* cummulative amount of memory allocated for aa1b;
237 			   used to limit memory use */
238   long max_memK;	/* allow separate memory limits for main,link
239 			   searches */
240   long lost_memK;	/* check for waste */
241   struct seqr_chain *start_seqr_chain;
242   struct seqr_chain *cur_seqr_chain;
243   int use_memory;
244 };
245 
246 #endif	/* P_STRUCT */
247 
248 #ifndef A_STRUCT
249 #include "aln_structs.h"
250 #endif
251