1 /* CM_HB_MX, ScanMatrix_t, and GammaHitMx_t implementations:
2 * dynamic programming matrices for CMs
3 *
4 * CM_HB_MX is based heavily on HMMER 3's p7_gmx.c module.
5 *
6 * Table of contents:
7 * 1. CM_HB_MX data structure functions,
8 * matrix of float scores for HMM banded CM alignment/search
9 * 2. CM_HB_SHADOW_MX data structure functions
10 * HMM banded shadow matrix for tracing back HMM banded CM parses
11 * 3. ScanMatrix_t data structure functions,
12 * auxiliary info and matrix of float and/or int scores for
13 * query dependent banded or non-banded CM DP search functions
14 * 4. GammaHitMx_t data structure functions,
15 * semi-HMM data structure for optimal resolution of overlapping
16 * hits for CM and CP9 DP search functions
17 *
18 * EPN, Fri Oct 26 05:04:34 2007
19 */
20
21 #include <stdio.h>
22 #include <stdlib.h>
23
24 #include <xmmintrin.h>
25 #include <emmintrin.h>
26
27 #include "easel.h"
28 #include "esl_vectorops.h"
29 #include "esl_sse.h"
30
31 #include "hmmer.h"
32
33 #include "infernal.h"
34
35 #include "impl_sse.h"
36
37 /*****************************************************************
38 * 4. GammaHitMx_t data structure functions,
39 * Semi HMM data structure for optimal resolution of overlapping
40 * hits for CM DP search functions.
41 *****************************************************************/
42
43 /* Function: CreateGammaHitMx()
44 * Date: EPN, Mon Nov 5 05:22:56 2007
45 *
46 * Purpose: Allocate and initialize a gamma semi-HMM for
47 * optimal hit resolution of a CM based scan.
48 * If(do_backward), L position is init'ed instead of
49 * 0th position, for Backward HMM scans.
50 *
51 * Returns: Newly allocated GammaHitMx_t object:
52 */
53 GammaHitMx_epu8 *
CreateGammaHitMx_epu8(int L,int i0,int be_greedy,int offset_zero,float cutoff,int do_backward)54 CreateGammaHitMx_epu8(int L, int i0, int be_greedy, int offset_zero, float cutoff, int do_backward)
55 {
56 int status;
57 GammaHitMx_epu8 *gamma;
58 ESL_ALLOC(gamma, sizeof(GammaHitMx_epu8));
59
60 gamma->L = L;
61 gamma->i0 = i0;
62 gamma->iamgreedy = be_greedy;
63 gamma->cutoff = cutoff;
64 /* allocate/initialize for CYK/Inside */
65 ESL_ALLOC(gamma->mx, sizeof(int) * (L+1));
66 ESL_ALLOC(gamma->gback, sizeof(int) * (L+1));
67 ESL_ALLOC(gamma->savesc, sizeof(float) * (L+1));
68
69 if(do_backward) {
70 gamma->mx[L] = offset_zero;
71 gamma->gback[L] = -1;
72 }
73 else {
74 gamma->mx[0] = offset_zero;
75 gamma->gback[0] = -1;
76 }
77 return gamma;
78
79 ERROR:
80 cm_Fail("memory allocation error in cm_CreateGammaHitMx().\n");
81 return NULL;
82 }
83
84 /* Function: FreeGammaHitMx()
85 * Date: EPN, Mon Nov 5 05:32:00 2007
86 *
87 * Purpose: Free a gamma semi-HMM.
88 *
89 * Returns: void;
90 */
91 void
FreeGammaHitMx_epu8(GammaHitMx_epu8 * gamma)92 FreeGammaHitMx_epu8(GammaHitMx_epu8 *gamma)
93 {
94 free(gamma->mx);
95 free(gamma->gback);
96 free(gamma->savesc);
97 free(gamma);
98
99 return;
100 }
101
102 /* Function: UpdateGammaHitMx()
103 * Date: EPN, Mon Nov 5 05:41:14 2007
104 *
105 * Purpose: Update a gamma semi-HMM for CM hits that end at gamma-relative position <j>.
106 *
107 * Args: cm - the model, used only for it's alphabet and null model
108 * errbuf - for reporting errors
109 * gamma - the gamma data structure
110 * j - offset j for gamma must be between 0 and gamma->L
111 * alpha_row - row of DP matrix to examine, we look at [dn..dx], NULL if we want to report
112 * this j is IMPOSSIBLE end point of a hit (only possible if using_hmm_bands == TRUE)
113 * dn - minimum d to look at
114 * dx - maximum d to look at
115 * using_hmm_bands - if TRUE, alpha_row is offset by dn, so we look at [0..dx-dn]
116 * bestr - [dn..dx] root state (0 or local entry) corresponding to hit stored in alpha_row
117 * hitlist - CM_TOPHITS hitlist to add to, only used in this function if gamma->iamgreedy
118 * W - window size, max size of a hit, only used if we're doing a NULL3 correction (act != NULL)
119 * act - [0..j..W-1][0..a..abc->K-1], alphabet count, count of residue a in dsq from 1..jp where j = jp%(W+1)
120 *
121 * Returns: eslOK on succes; eslEMEM on memory allocation error;
122 *
123 */
124 int
UpdateGammaHitMx_epu8(CM_CONSENSUS * ccm,char * errbuf,GammaHitMx_epu8 * gamma,int j,__m128i * alpha_row,CM_TOPHITS * hitlist,int W,int sW)125 UpdateGammaHitMx_epu8(CM_CONSENSUS *ccm, char *errbuf, GammaHitMx_epu8 *gamma, int j, __m128i *alpha_row,
126 CM_TOPHITS *hitlist, int W, int sW)
127 {
128 int i, d;
129 /*int bestd;*/
130 int ip, jp;
131 int do_report_hit;
132 uint8_t hit_sc, bestd_sc;
133 float fhit_sc;
134 int cumulative_sc;
135 CM_HIT *hit = NULL;
136
137 /* mode 1: non-greedy */
138 if(! gamma->iamgreedy || alpha_row == NULL) {
139 gamma->mx[j] = gamma->mx[j-1] + 0;
140 gamma->gback[j] = -1;
141 gamma->savesc[j] = -eslINFINITY; // IMPOSSIBLE;
142
143 if(alpha_row != NULL) {
144 for (d = 1; d <= W && d <= j; d++) {
145 i = j-d+1;
146 hit_sc = *(((uint8_t *) &alpha_row[d%sW])+d/sW);
147 fhit_sc = ((float) (hit_sc - ccm->base_b))/ccm->scale_b;
148 cumulative_sc = gamma->mx[i-1] + hit_sc - ccm->base_b;
149 if (cumulative_sc < 0) cumulative_sc = 0;
150 if (cumulative_sc >= gamma->mx[j]) { /* Break ties in favor of larger d */
151 do_report_hit = TRUE;
152 if(do_report_hit) {
153 gamma->mx[j] = cumulative_sc;
154 gamma->gback[j] = i + (gamma->i0-1);
155 gamma->savesc[j] = fhit_sc;
156 }
157 }
158 }
159 }
160 }
161 /* mode 2: greedy */
162 if(gamma->iamgreedy) {
163 /* Resolving overlaps greedily (RSEARCH style),
164 * At least one hit is sent back for each j here.
165 * However, some hits can already be removed for the greedy overlap
166 * resolution algorithm. Specifically, at the given j, any hit with a
167 * d of d1 is guaranteed to mask any hit of lesser score with a d > d1 */
168 /* First, report hit with d of dmin (min valid d) if >= cutoff */
169 d = 1;
170 hit_sc = *((uint8_t *) &alpha_row[d%sW] + d/sW);
171 fhit_sc = ((float) (hit_sc - ccm->base_b))/ccm->scale_b;
172 if (fhit_sc >= gamma->cutoff && NOT_IMPOSSIBLE(hit_sc)) {
173 do_report_hit = TRUE;
174 ip = j-d+gamma->i0;
175 jp = j-1+gamma->i0;
176 assert(ip >= gamma->i0);
177 assert(jp >= gamma->i0);
178 if(do_report_hit) {
179 cm_tophits_CreateNextHit(hitlist, &hit);
180 hit->start = ip;
181 hit->stop = jp;
182 hit->score = fhit_sc;
183 hit->root = 0;
184 hit->mode = TRMODE_J;
185 hit->hmmonly = FALSE;
186 }
187 }
188 /*bestd = 0;*/
189 bestd_sc = hit_sc;
190 /* Now, if current score is greater than maximum seen previous, report
191 * it if >= cutoff and set new max */
192 for (d = 2; d <= W; d++) {
193 hit_sc = *(((uint8_t *) &alpha_row[d%sW]) + d/sW);
194 fhit_sc = ((float) (hit_sc - ccm->base_b))/ccm->scale_b;
195 if (hit_sc > bestd_sc) {
196 if (fhit_sc >= gamma->cutoff && NOT_IMPOSSIBLE(hit_sc)) {
197 do_report_hit = TRUE;
198 ip = j-d+gamma->i0;
199 jp = j-1+gamma->i0;
200 assert(ip >= gamma->i0);
201 assert(jp >= gamma->i0);
202 if(do_report_hit) {
203 cm_tophits_CreateNextHit(hitlist, &hit);
204 hit->start = ip;
205 hit->stop = jp;
206 hit->score = fhit_sc;
207 hit->root = 0;
208 hit->mode = TRMODE_J;
209 hit->hmmonly = FALSE;
210 }
211 }
212 /*bestd = d;*/
213 bestd_sc = hit_sc;
214 }
215 }
216 }
217 return eslOK;
218 }
219
220
221 /* Function: TBackGammaHitMx()
222 * Date: EPN, Mon Nov 5 10:14:30 2007
223 *
224 * Purpose: Traceback with a gamma semi-HMM for CM/CP9 hits in the forward
225 * direction.
226 * gamma->iamgreedy should be FALSE.
227 *
228 * Returns: void; dies immediately upon an error.
229 */
230 void
TBackGammaHitMx_epu8(GammaHitMx_epu8 * gamma,CM_TOPHITS * hitlist,int i0,int j0)231 TBackGammaHitMx_epu8(GammaHitMx_epu8 *gamma, CM_TOPHITS *hitlist, int i0, int j0)
232 {
233 int j, jp_g;
234 CM_HIT *hit = NULL;
235
236 if(gamma->iamgreedy) cm_Fail("cm_TBackGammaHitMx(), gamma->iamgreedy is TRUE.\n");
237 if(hitlist == NULL) cm_Fail("cm_TBackGammaHitMx(), hitlist == NULL");
238 /* Recover all hits: an (i,j,sc) triple for each one.
239 */
240 j = j0;
241 while (j >= i0) {
242 jp_g = j-i0+1;
243 if (gamma->gback[jp_g] == -1) j--; /* no hit */
244 else { /* a hit, a palpable hit */
245 if(gamma->savesc[jp_g] >= gamma->cutoff) { /* report the hit */
246 cm_tophits_CreateNextHit(hitlist, &hit);
247 hit->start = gamma->gback[jp_g];
248 hit->stop = j;
249 hit->score = gamma->savesc[jp_g];
250 hit->root = 0;
251 hit->mode = TRMODE_J;
252 hit->hmmonly = FALSE;
253 }
254 j = gamma->gback[jp_g]-1;
255 }
256 }
257 return;
258 }
259
260