1 /* $Id: dynprog.h 221582 2020-01-30 21:28:29Z twu $ */ 2 #ifndef DYNPROG_INCLUDED 3 #define DYNPROG_INCLUDED 4 #ifdef HAVE_CONFIG_H 5 #include <config.h> /* For HAVE_SSE2, HAVE_SSE4_1 */ 6 #endif 7 8 9 /* #define DEBUG_SIMD 1 */ 10 /* #define DEBUG_AVX2 1 */ 11 12 /* Used by dynprog_simd.c and dynprog.c when comparing using DEBUG_SIMD or DEBUG_AVX2 */ 13 /* #define ZERO_INITIAL_GAP_PENALTY 1 */ /* Needed for bam_indelfix */ 14 #define INFINITE_INITIAL_GAP_PENALTY 1 /* Prevents any gaps on row 0 or column 0 */ 15 16 /* Note: If we select neither ZERO_INITIAL_GAP_PENALTY or 17 INFINITE_INITIAL_GAP_PENALTY, we have standard behavior, which 18 allows open+extend on row 0 or column 0 */ 19 20 21 22 /* BEST_LOCAL is a local alignment, whereas QUERYEND_INDELS and 23 QUERYEND_NOGAPS are global. QUERYEND_GAP allows an intron at the 24 end */ 25 typedef enum {QUERYEND_GAP, QUERYEND_INDELS, QUERYEND_NOGAPS, BEST_LOCAL} Endalign_T; 26 typedef struct Dynprog_T *Dynprog_T; /* Needed before header files below */ 27 28 #include "bool.h" 29 #include "list.h" 30 #include "genomicpos.h" 31 #include "pairpool.h" 32 #include "chrnum.h" 33 #include "iit-read.h" 34 #include "splicetrie_build.h" /* For splicetype */ 35 #include "genome.h" 36 #include "mode.h" 37 38 #ifdef GSNAP 39 #include "compress.h" 40 #endif 41 42 43 #define FULLMATCH 3 44 #define HALFMATCH 1 45 #define AMBIGUOUS 3 /* Previously set to 0, but then strings of N's cause problems */ 46 47 /* These values were set to -5, -4, -3, but this led to chopped ends 48 in GMAP alignments, and failure to find chimeras */ 49 #define MISMATCH_HIGHQ -3 50 #define MISMATCH_MEDQ -2 51 #define MISMATCH_LOWQ -1 52 53 54 typedef enum {HIGHQ, MEDQ, LOWQ, ENDQ} Mismatchtype_T; 55 #define NMISMATCHTYPES 4 56 57 /* Note: HIGHQ, MEDQ, and LOWQ indicate parameters for high, medium, 58 and low sequence quality, respectively */ 59 #define DEFECT_HIGHQ 0.003 60 #define DEFECT_MEDQ 0.014 61 62 #define SINGLE_OPEN_HIGHQ -8 63 #define SINGLE_OPEN_MEDQ -7 64 #define SINGLE_OPEN_LOWQ -6 65 66 #define SINGLE_EXTEND_HIGHQ -3 67 #define SINGLE_EXTEND_MEDQ -2 68 #define SINGLE_EXTEND_LOWQ -1 69 70 #define PAIRED_OPEN_HIGHQ -8 71 #define PAIRED_OPEN_MEDQ -7 72 #define PAIRED_OPEN_LOWQ -6 73 74 #define PAIRED_EXTEND_HIGHQ -3 75 #define PAIRED_EXTEND_MEDQ -2 76 #define PAIRED_EXTEND_LOWQ -1 77 78 79 #define UNKNOWNJUMP -1000000 80 81 typedef char Score8_T; 82 typedef char Direction8_T; 83 84 typedef short Score16_T; 85 typedef short Direction16_T; 86 87 typedef short Pairdistance_T; 88 89 /* For standard dynamic programming. Use ints, so NEG_INFINITY_32 works. */ 90 typedef int Score32_T; 91 typedef int Direction32_T; 92 93 /* Genome is on the horizontal axis. Query sequence is on the vertical axis. Dynamic programming fills in matrices column by column */ 94 /* The following values are for directions_nogap. For directions_Egap, the choices are DIAG or not DIAG (meaning HORIZ). */ 95 /* For directions_Fgap, the choices are DIAG or not DIAG (meaning VERT) */ 96 #define VERT -2 /* or VERT == -3 in SIMD code. Don't check for dir == VERT. Check instead if dir == DIAG or dir == HORIZ */ 97 #define HORIZ -1 98 #define DIAG 0 /* Pre-dominant case. Directions_alloc clears to this value. */ 99 100 #define NEG_INFINITY_8 (-128) 101 #define POS_INFINITY_8 (127) 102 #define MAX_CHAR (127) 103 104 #define NEG_INFINITY_16 (-32768) 105 #define POS_INFINITY_16 (32767) 106 #define MAX_SHORT (32767) 107 108 /* We can allow -128 and -32768 for NEG_INFINITY in SIMD procedures, 109 because we are using saturation */ 110 #ifdef HAVE_AVX2 111 #define ALIGN_SIZE 32 112 #define ONE_CHAR 1 113 #define MID_CHAR_INSERT 16 114 #define LAST_CHAR_INSERT 0 115 #define SIMD_NCHARS 32 /* 32 8-bit chars in 256 bits */ 116 117 #define ONE_SHORT 2 118 #define MID_SHORT_INSERT 8 119 #define LAST_SHORT_INSERT 0 120 #define SIMD_NSHORTS 16 /* 16 16-bit shorts in 256 bits */ 121 122 #define LAST_CHAR_NONAVX2 15 /* For DEBUG_AVX2 */ 123 #define LAST_SHORT_NONAVX2 14 /* For DEBUG_AVX2. (8 - 1) * 2 */ 124 125 #ifdef DEBUG_AVX2 126 #define SIMD_NCHARS_NONAVX2 16 127 #define SIMD_NSHORTS_NONAVX2 8 128 #endif 129 130 #elif defined(HAVE_SSE4_1) || defined(HAVE_SSE2) 131 #define ALIGN_SIZE 16 132 #define ONE_CHAR 1 133 #define LAST_CHAR_SHIFT 15 134 #define SIMD_NCHARS 16 /* 16 8-bit chars in 128 bits */ 135 136 #define ONE_SHORT 2 137 #define LAST_SHORT_SHIFT 14 /* (8 - 1) * 2 */ 138 #define SIMD_NSHORTS 8 /* 8 16-bit shorts in 128 bits */ 139 140 #define LAST_CHAR_NONAVX2 15 /* For DEBUG_SIMD */ 141 #define LAST_SHORT_NONAVX2 14 /* For DEBUG_SIMD */ 142 143 #endif 144 145 /* Can allow -32768 in non-SIMD procedures, because we are using ints */ 146 #define NEG_INFINITY_32 (-32768) 147 #define NEG_INFINITY_INT (-32768) 148 149 150 /* This is still needed, because sequences passed into compute_scores 151 might be lower-case */ 152 #define PREUC 1 153 154 extern int use8p_size[NMISMATCHTYPES]; 155 extern Pairdistance_T **pairdistance_array[NMISMATCHTYPES]; 156 #ifndef HAVE_SSE4_1 157 extern Pairdistance_T **pairdistance_array_plus_128[NMISMATCHTYPES]; 158 #endif 159 extern bool **consistent_array[3]; 160 extern int *nt_to_int_array; 161 162 163 struct Space_single_T { 164 void **matrix_ptrs, *matrix_space; 165 void **directions_ptrs_0, *directions_space_0; 166 void **directions_ptrs_1, *directions_space_1; 167 void **directions_ptrs_2, *directions_space_2; 168 }; 169 170 struct Space_double_T { 171 void **upper_matrix_ptrs, *upper_matrix_space; 172 void **upper_directions_ptrs_0, *upper_directions_space_0; 173 void **upper_directions_ptrs_1, *upper_directions_space_1; 174 void **lower_matrix_ptrs, *lower_matrix_space; 175 void **lower_directions_ptrs_0, *lower_directions_space_0; 176 void **lower_directions_ptrs_1, *lower_directions_space_1; 177 }; 178 179 180 #define T Dynprog_T 181 struct T { 182 int max_rlength; 183 int max_glength; 184 185 #ifdef DEBUG12 186 struct Int3_T **matrix3_ptrs, *matrix3_space; 187 #endif 188 189 #if !defined(HAVE_SSE2) || defined(DEBUG_SIMD) 190 Score32_T **matrix_ptrs, *matrix_space; 191 Direction32_T **directions_ptrs_0, *directions_space_0; 192 Direction32_T **directions_ptrs_1, *directions_space_1; 193 Direction32_T **directions_ptrs_2, *directions_space_2; 194 #endif 195 #ifdef HAVE_SSE2 196 union { 197 struct Space_single_T one; 198 struct Space_double_T two; 199 } aligned; 200 #ifdef DEBUG_AVX2 201 union { 202 struct Space_single_T one; 203 struct Space_double_T two; 204 } aligned_std; 205 #endif 206 #endif 207 int nspaces; 208 }; 209 210 211 extern char * 212 Dynprog_endalign_string (Endalign_T endalign); 213 214 extern int 215 Dynprog_score (int matches, int mismatches, int qopens, int qindels, int topens, int tindels, 216 double defect_rate); 217 218 extern T 219 Dynprog_new (int maxlookback, int extraquerygap, int maxpeelback, 220 int extramaterial_end, int extramaterial_paired, 221 bool doublep); 222 extern void 223 Dynprog_free (T *old); 224 225 extern bool 226 Dynprog_consistent_p (int c, int g, int g_alt, int genestrand); 227 228 extern void 229 Dynprog_term (Mode_T mode); 230 extern void 231 Dynprog_init (Mode_T mode); 232 233 extern void 234 Dynprog_compute_bands (int *lband, int *uband, int rlength, int glength, int extraband, bool widebandp); 235 236 237 extern void 238 Dynprog_Matrix32_print (Score32_T **matrix, int rlength, int glength, char *rsequence, 239 char *gsequence, char *gsequencealt, 240 int goffset, Univcoord_T chroffset, Univcoord_T chrhigh, 241 bool watsonp, bool revp, int lband, int uband); 242 243 extern void 244 Dynprog_Directions32_print (Direction32_T **directions_nogap, Direction32_T **directions_Egap, Direction32_T **directions_Fgap, 245 int rlength, int glength, char *rsequence, char *gsequence, char *gsequence_alt, 246 int goffset, Univcoord_T chroffset, Univcoord_T chrhigh, 247 bool watsonp, bool revp, int lband, int uband); 248 249 extern Score32_T ** 250 Dynprog_standard (Direction32_T ***directions_nogap, Direction32_T ***directions_Egap, Direction32_T ***directions_Fgap, 251 T this, char *rsequence, char *gsequence, char *gsequence_alt, 252 int rlength, int glength, 253 int goffset, Univcoord_T chroffset, Univcoord_T chrhigh, bool watsonp, 254 Mismatchtype_T mismatchtype, Score32_T open, Score32_T extend, 255 int lband, int uband, bool jump_late_p, bool revp, int saturation, 256 bool upperp, bool lowerp); 257 258 extern List_T 259 Dynprog_traceback_std (List_T pairs, int *nmatches, int *nmismatches, int *nopens, int *nindels, 260 Direction32_T **directions_nogap, Direction32_T **directions_Egap, Direction32_T **directions_Fgap, 261 int r, int c, char *rsequence, char *rsequenceuc, char *gsequence, char *gsequence_alt, 262 int queryoffset, int genomeoffset, Pairpool_T pairpool, bool revp, 263 Univcoord_T chroffset, Univcoord_T chrhigh, bool watsonp, int genestrand, 264 int dynprogindex); 265 266 267 #undef T 268 #endif 269