1 2 /* $Id: param.h 1233 2013-10-08 18:26:31Z wrp $ */ 3 /* $Revision: 1233 $ */ 4 5 #include <sys/types.h> 6 7 #ifndef P_STRUCT 8 #define P_STRUCT 9 10 #define MAXSQ 60 11 12 /* Concurrent read version */ 13 14 struct fastr { 15 int ktup; 16 int cgap; 17 int pgap; 18 int pamfact; 19 int scfact; 20 /* these values will soon be abandoned */ 21 int bestoff; 22 int bestscale; 23 int bkfact; 24 int bktup; 25 int bestmax; 26 /* statistics based scaling values */ 27 int use_E_thresholds; 28 double E_join, E_band_opt; 29 int altflag; 30 int optflag; 31 int iniflag; 32 int optcut; 33 int optcut_set; 34 int optwid; 35 int optwid_set; 36 }; 37 38 struct prostr { 39 int gopen; 40 int gextend; 41 int width; 42 }; 43 44 /* must be identical in thr_bufs.h */ 45 struct score_count_s { 46 long s_cnt[3]; 47 long tot_scores; 48 }; 49 50 struct pstruct /* parameters */ 51 { 52 int n0; /* length of query sequence, used for statistics */ 53 int gdelval; /* value gap open (-10) */ 54 int ggapval; /* value for additional residues in gap (-2) */ 55 int gshift; /* frameshift for fastx, fasty */ 56 int gsubs; /* nt substitution in fasty */ 57 int p_d_mat; /* dna match penalty */ 58 int p_d_mis; /* dna mismatch penalty */ 59 int p_d_set; /* using match/mismatch */ 60 int n1_low; 61 int n1_high; /* sequence length limits */ 62 int score_ix; /* index to sorted score */ 63 int show_ident; /* flag - show identical lalign alignment */ 64 int nseq; /* number of different sequences (for lalign) */ 65 int zsflag; /* use scalebest() */ 66 int zsflag2; /* statistics for best shuffle */ 67 int zsflag_f; /* use scalebest() */ 68 int zs_win; /* window shuffle size */ 69 int shuffle_dna3; /* shuffle dna as codons */ 70 int histint; /* histogram interval */ 71 unsigned char sq[MAXSQ+1]; 72 int hsq[MAXSQ+1]; 73 int nsq; /* length of normal sq */ 74 /* int pamh1[MAXSQ+1]; */ /* identical match score (diagonal scores) */ 75 /* int *pamh2[MAXSQ+1]; */ /* ktup match score */ 76 int ext_sq_set; /* flag for using extended alphabet */ 77 unsigned char sqx[MAXSQ+1]; 78 int hsqx[MAXSQ+1]; 79 int c_nt[MAXSQ+1]; 80 int nsqx; /* length of extended sq */ 81 int nsq_e; /* effective nsq */ 82 int dnaseq; /* -1 = not set (protein); 0 = protein; 1 = DNA; 2 = other, 3 RNA */ 83 int nt_align; /* DNA/RNA alignment = 1 */ 84 int debug_lib; 85 int tr_type; /* codon table */ 86 int sw_flag; 87 char pamfile[MAX_FN]; /* pam file name */ 88 char pamfile_save[MAX_FN]; /* original pam file */ 89 char pam_name[MAX_FN]; 90 char pgpfile[MAX_FN]; 91 int pgpfile_type; 92 float pamscale; /* ln(2)/3 or ln(2)/2 */ 93 float ulambda; /* ungapped lambda */ 94 float entropy; /* bits/position */ 95 float tfract_id; /* target fraction id */ 96 int pam_pssm; 97 int pam_set; 98 int pam_variable; 99 int have_pam2; 100 int **pam2[2]; /* set of 2D scoring matrices; [0] lower-case 'x', [1] upper/lower case */ 101 int **pam2p[2]; 102 int pamoff; /* offset for pam values */ 103 int pam_l, pam_h, pam_xx, pam_xm; /* lowest, highest pam value */ 104 int pam_x_set; 105 int pam_x_id_sim; /* =0 -> 'N,X' identical but not similar; 106 =1 -> 'N,X' identical+similar; 107 <0 -> 'N,X' not identical, not similar */ 108 int pam_ms; /* use a Mass Spec pam matrix */ 109 void *fp_struct; /* function specific parameters based on algorith/scoring matrix */ 110 int LK_set; 111 double pLambda, pK, pH; /* Karlin-Altscul parameters */ 112 int maxlen; 113 int max_repeat; /* used for repeat count in ssearch34/lalign */ 114 int repeat_thresh; 115 char *other_info; 116 double e_cut; /* cutoff for scores */ 117 double e_cut_r; /* cutoff for multiple local alignments */ 118 double zs_off; /* z-score offset from sampling */ 119 int do_rep; /* enable multiple alignments */ 120 int can_pre_align; /* flag for have_ares & 0x1 pre-alignments */ 121 long zdb_size; /* force database size */ 122 int zdb_size_set; /* flag for user -Z */ 123 int pgm_id; 124 int pseudocts; 125 int shuff_node; 126 union { 127 struct fastr fa; 128 struct prostr pr; 129 } param_u; 130 }; 131 132 #include "rstruct.h" 133 134 /* the seq_record has all the invariant data about a sequence - 135 sequence length, libstr, sequence itself, etc. 136 it does not have the results information 137 we can have 1, 2, or 6 (obsolete tfasta) results records for a sequence, 138 but there will still be only one sequence record. 139 */ 140 141 struct annot_str { 142 /* information for conventional annotations */ 143 unsigned char *aa1_ann; /* annotation string */ 144 /* information for "rich" annotations */ 145 int n_annot; /* length of ann_arr_str array */ 146 int n_domains; /* length of domain_arr_p array */ 147 struct annot_entry *annot_arr_p; /* array[n_annot] of annot_entry's for all annotations */ 148 struct annot_entry **s_annot_arr_p; /* sorted version of annots */ 149 }; 150 151 /* ann_str keeps information on "rich" annotations, position, type, value */ 152 struct annot_entry { 153 long pos; 154 long end; 155 char label; /* currently -V *#%!@ symbols, plus 'V' for variant */ 156 unsigned char value; /* must be amino acid residue, binary encoded */ 157 char *comment; 158 int target; /* 0 for query/ 1 for library */ 159 }; 160 161 /* domain_str keeps information on "rich" annotations, position, type, value */ 162 struct domfeat_data { 163 struct annot_entry *annot_entry_p; 164 struct domfeat_data *next; 165 long pos; /* annotation position */ 166 long a_pos; /* aligned annotation position */ 167 long end_pos; /* domain annotation end */ 168 int score; /* score of current region */ 169 int n_ident; /* count for percent id */ 170 int n_alen; /* align len for percent id */ 171 }; 172 173 /* seq_record has the data required to do a calculation */ 174 struct seq_record { 175 unsigned char *aa1b; /* sequence buffer */ 176 struct annot_str *annot_p; 177 int n1; 178 long l_offset; /* q_offset/l_offset set outside getlib() based on chunks; 0-based */ 179 long l_off; /* q_off/l_off comes from @C:123, and is 1-based */ 180 int index; /* index in search */ 181 #ifdef DEBUG 182 long adler32_crc; 183 #endif 184 }; 185 186 /* mseq_record has meta data not required to calculate score or alignment */ 187 struct mseq_record { 188 int *n1tot_p; 189 #ifdef USE_FSEEKO 190 off_t lseek; 191 #else 192 long lseek; 193 #endif 194 struct lmf_str *m_file_p; 195 int cont; 196 char libstr[MAX_UID]; 197 char *bline; 198 int bline_max; 199 int annot_req_flag; 200 int index /* index in search */; 201 #ifdef DEBUG 202 long adler32_crc; 203 #endif 204 }; 205 206 struct seqr_chain { 207 struct seq_record *seqr_base; 208 struct mseq_record *mseqr_base; 209 struct seqr_chain *next; 210 /* struct lib_seq_info *ldb_info; */ 211 int max_chain_seqs; 212 int cur_seq_cnt; 213 unsigned char *aa1b_base; 214 int aa1b_size; 215 int aa1b_next; 216 int contiguous; 217 }; 218 219 struct getlib_str { 220 int lcont; /* lcont save */ 221 int ocont; /* ocont save */ 222 int eof; /* done with this file */ 223 #ifdef USE_FSEEKO 224 off_t lseek; 225 #else 226 long lseek; 227 #endif 228 long loffset; /* loffset save */ 229 char libstr[MAX_UID]; /* repository for libstr */ 230 int n_libstr; /* length of libstr */ 231 unsigned char *aa1save; /* overlapping sequence save */ 232 struct lib_struct *lib_list_p; 233 int *n1tot_ptr, *n1tot_cur; 234 int n1tot_cnt; 235 int n1tot_v; 236 long tot_memK; /* cummulative amount of memory allocated for aa1b; 237 used to limit memory use */ 238 long max_memK; /* allow separate memory limits for main,link 239 searches */ 240 long lost_memK; /* check for waste */ 241 struct seqr_chain *start_seqr_chain; 242 struct seqr_chain *cur_seqr_chain; 243 int use_memory; 244 }; 245 246 #endif /* P_STRUCT */ 247 248 #ifndef A_STRUCT 249 #include "aln_structs.h" 250 #endif 251