1 /* CM_HB_MX, ScanMatrix_t, and GammaHitMx_t implementations:
2  * dynamic programming matrices for CMs
3  *
4  * CM_HB_MX is based heavily on HMMER 3's p7_gmx.c module.
5  *
6  * Table of contents:
7  *   1. CM_HB_MX data structure functions,
8  *      matrix of float scores for HMM banded CM alignment/search
9  *   2. CM_HB_SHADOW_MX data structure functions
10  *      HMM banded shadow matrix for tracing back HMM banded CM parses
11  *   3. ScanMatrix_t data structure functions,
12  *      auxiliary info and matrix of float and/or int scores for
13  *      query dependent banded or non-banded CM DP search functions
14  *   4. GammaHitMx_t data structure functions,
15  *      semi-HMM data structure for optimal resolution of overlapping
16  *      hits for CM and CP9 DP search functions
17  *
18  * EPN, Fri Oct 26 05:04:34 2007
19  */
20 
21 #include <stdio.h>
22 #include <stdlib.h>
23 
24 #include <xmmintrin.h>
25 #include <emmintrin.h>
26 
27 #include "easel.h"
28 #include "esl_vectorops.h"
29 #include "esl_sse.h"
30 
31 #include "hmmer.h"
32 
33 #include "infernal.h"
34 
35 #include "impl_sse.h"
36 
37 /*****************************************************************
38  *   4. GammaHitMx_t data structure functions,
39  *      Semi HMM data structure for optimal resolution of overlapping
40  *      hits for CM DP search functions.
41  *****************************************************************/
42 
43 /* Function: CreateGammaHitMx()
44  * Date:     EPN, Mon Nov  5 05:22:56 2007
45  *
46  * Purpose:  Allocate and initialize a gamma semi-HMM for
47  *           optimal hit resolution of a CM based scan.
48  *           If(do_backward), L position is init'ed instead of
49  *           0th position, for Backward HMM scans.
50  *
51  * Returns:  Newly allocated GammaHitMx_t object:
52  */
53 GammaHitMx_epu8 *
CreateGammaHitMx_epu8(int L,int i0,int be_greedy,int offset_zero,float cutoff,int do_backward)54 CreateGammaHitMx_epu8(int L, int i0, int be_greedy, int offset_zero, float cutoff, int do_backward)
55 {
56   int status;
57   GammaHitMx_epu8 *gamma;
58   ESL_ALLOC(gamma, sizeof(GammaHitMx_epu8));
59 
60   gamma->L  = L;
61   gamma->i0 = i0;
62   gamma->iamgreedy = be_greedy;
63   gamma->cutoff    = cutoff;
64   /* allocate/initialize for CYK/Inside */
65   ESL_ALLOC(gamma->mx,     sizeof(int)     * (L+1));
66   ESL_ALLOC(gamma->gback,  sizeof(int)     * (L+1));
67   ESL_ALLOC(gamma->savesc, sizeof(float)   * (L+1));
68 
69   if(do_backward) {
70     gamma->mx[L]    = offset_zero;
71     gamma->gback[L] = -1;
72   }
73   else {
74     gamma->mx[0]    = offset_zero;
75     gamma->gback[0] = -1;
76   }
77   return gamma;
78 
79  ERROR:
80   cm_Fail("memory allocation error in cm_CreateGammaHitMx().\n");
81   return NULL;
82 }
83 
84 /* Function: FreeGammaHitMx()
85  * Date:     EPN, Mon Nov  5 05:32:00 2007
86  *
87  * Purpose:  Free a gamma semi-HMM.
88  *
89  * Returns:  void;
90  */
91 void
FreeGammaHitMx_epu8(GammaHitMx_epu8 * gamma)92 FreeGammaHitMx_epu8(GammaHitMx_epu8 *gamma)
93 {
94   free(gamma->mx);
95   free(gamma->gback);
96   free(gamma->savesc);
97   free(gamma);
98 
99   return;
100 }
101 
102 /* Function: UpdateGammaHitMx()
103  * Date:     EPN, Mon Nov  5 05:41:14 2007
104  *
105  * Purpose:  Update a gamma semi-HMM for CM hits that end at gamma-relative position <j>.
106  *
107  * Args:     cm        - the model, used only for it's alphabet and null model
108  *           errbuf    - for reporting errors
109  *           gamma     - the gamma data structure
110  *           j         - offset j for gamma must be between 0 and gamma->L
111  *           alpha_row - row of DP matrix to examine, we look at [dn..dx], NULL if we want to report
112  *                       this j is IMPOSSIBLE end point of a hit (only possible if using_hmm_bands == TRUE)
113  *           dn        - minimum d to look at
114  *           dx        - maximum d to look at
115  *           using_hmm_bands - if TRUE, alpha_row is offset by dn, so we look at [0..dx-dn]
116  *           bestr     - [dn..dx] root state (0 or local entry) corresponding to hit stored in alpha_row
117  *           hitlist   - CM_TOPHITS hitlist to add to, only used in this function if gamma->iamgreedy
118  *           W         - window size, max size of a hit, only used if we're doing a NULL3 correction (act != NULL)
119  *           act       - [0..j..W-1][0..a..abc->K-1], alphabet count, count of residue a in dsq from 1..jp where j = jp%(W+1)
120  *
121  * Returns:  eslOK on succes; eslEMEM on memory allocation error;
122  *
123  */
124 int
UpdateGammaHitMx_epu8(CM_CONSENSUS * ccm,char * errbuf,GammaHitMx_epu8 * gamma,int j,__m128i * alpha_row,CM_TOPHITS * hitlist,int W,int sW)125 UpdateGammaHitMx_epu8(CM_CONSENSUS *ccm, char *errbuf, GammaHitMx_epu8 *gamma, int j, __m128i *alpha_row,
126 		      CM_TOPHITS *hitlist, int W, int sW)
127 {
128   int i, d;
129   /*int bestd;*/
130   int ip, jp;
131   int do_report_hit;
132   uint8_t hit_sc, bestd_sc;
133   float  fhit_sc;
134   int cumulative_sc;
135   CM_HIT *hit = NULL;
136 
137   /* mode 1: non-greedy  */
138   if(! gamma->iamgreedy || alpha_row == NULL) {
139     gamma->mx[j]     = gamma->mx[j-1] + 0;
140     gamma->gback[j]  = -1;
141     gamma->savesc[j] = -eslINFINITY; // IMPOSSIBLE;
142 
143     if(alpha_row != NULL) {
144       for (d = 1; d <= W && d <= j; d++) {
145 	i = j-d+1;
146 	hit_sc = *(((uint8_t *) &alpha_row[d%sW])+d/sW);
147         fhit_sc = ((float) (hit_sc - ccm->base_b))/ccm->scale_b;
148 	cumulative_sc = gamma->mx[i-1] + hit_sc - ccm->base_b;
149         if (cumulative_sc < 0) cumulative_sc = 0;
150 	if (cumulative_sc >= gamma->mx[j]) { /* Break ties in favor of larger d */
151 	  do_report_hit = TRUE;
152 	  if(do_report_hit) {
153 	    gamma->mx[j]     = cumulative_sc;
154 	    gamma->gback[j]  = i + (gamma->i0-1);
155 	    gamma->savesc[j] = fhit_sc;
156 	  }
157 	}
158       }
159     }
160   }
161   /* mode 2: greedy */
162   if(gamma->iamgreedy) {
163     /* Resolving overlaps greedily (RSEARCH style),
164      * At least one hit is sent back for each j here.
165      * However, some hits can already be removed for the greedy overlap
166      * resolution algorithm.  Specifically, at the given j, any hit with a
167      * d of d1 is guaranteed to mask any hit of lesser score with a d > d1 */
168     /* First, report hit with d of dmin (min valid d) if >= cutoff */
169     d = 1;
170     hit_sc = *((uint8_t *) &alpha_row[d%sW] + d/sW);
171     fhit_sc = ((float) (hit_sc - ccm->base_b))/ccm->scale_b;
172     if (fhit_sc >= gamma->cutoff && NOT_IMPOSSIBLE(hit_sc)) {
173       do_report_hit = TRUE;
174       ip = j-d+gamma->i0;
175       jp = j-1+gamma->i0;
176       assert(ip >= gamma->i0);
177       assert(jp >= gamma->i0);
178       if(do_report_hit) {
179 	cm_tophits_CreateNextHit(hitlist, &hit);
180 	hit->start = ip;
181 	hit->stop  = jp;
182 	hit->score = fhit_sc;
183 	hit->root  = 0;
184 	hit->mode  = TRMODE_J;
185 	hit->hmmonly = FALSE;
186       }
187     }
188     /*bestd    = 0;*/
189     bestd_sc = hit_sc;
190     /* Now, if current score is greater than maximum seen previous, report
191      * it if >= cutoff and set new max */
192     for (d = 2; d <= W; d++) {
193       hit_sc = *(((uint8_t *) &alpha_row[d%sW]) + d/sW);
194       fhit_sc = ((float) (hit_sc - ccm->base_b))/ccm->scale_b;
195       if (hit_sc > bestd_sc) {
196 	if (fhit_sc >= gamma->cutoff && NOT_IMPOSSIBLE(hit_sc)) {
197 	  do_report_hit = TRUE;
198 	  ip = j-d+gamma->i0;
199 	  jp = j-1+gamma->i0;
200 	  assert(ip >= gamma->i0);
201 	  assert(jp >= gamma->i0);
202 	  if(do_report_hit) {
203 	    cm_tophits_CreateNextHit(hitlist, &hit);
204 	    hit->start = ip;
205 	    hit->stop  = jp;
206 	    hit->score = fhit_sc;
207 	    hit->root  = 0;
208 	    hit->mode  = TRMODE_J;
209 	    hit->hmmonly = FALSE;
210 	  }
211 	}
212 	/*bestd = d;*/
213         bestd_sc = hit_sc;
214       }
215     }
216   }
217   return eslOK;
218 }
219 
220 
221 /* Function: TBackGammaHitMx()
222  * Date:     EPN, Mon Nov  5 10:14:30 2007
223  *
224  * Purpose:  Traceback with a gamma semi-HMM for CM/CP9 hits in the forward
225  *           direction.
226  *           gamma->iamgreedy should be FALSE.
227  *
228  * Returns:  void; dies immediately upon an error.
229  */
230 void
TBackGammaHitMx_epu8(GammaHitMx_epu8 * gamma,CM_TOPHITS * hitlist,int i0,int j0)231 TBackGammaHitMx_epu8(GammaHitMx_epu8 *gamma, CM_TOPHITS *hitlist, int i0, int j0)
232 {
233   int j, jp_g;
234   CM_HIT *hit = NULL;
235 
236   if(gamma->iamgreedy) cm_Fail("cm_TBackGammaHitMx(), gamma->iamgreedy is TRUE.\n");
237   if(hitlist == NULL)  cm_Fail("cm_TBackGammaHitMx(), hitlist == NULL");
238   /* Recover all hits: an (i,j,sc) triple for each one.
239    */
240   j = j0;
241   while (j >= i0) {
242     jp_g = j-i0+1;
243     if (gamma->gback[jp_g] == -1) j--; /* no hit */
244     else {              /* a hit, a palpable hit */
245       if(gamma->savesc[jp_g] >= gamma->cutoff) { /* report the hit */
246 	cm_tophits_CreateNextHit(hitlist, &hit);
247 	hit->start = gamma->gback[jp_g];
248 	hit->stop  = j;
249 	hit->score = gamma->savesc[jp_g];
250 	hit->root  = 0;
251 	hit->mode  = TRMODE_J;
252 	hit->hmmonly = FALSE;
253       }
254       j = gamma->gback[jp_g]-1;
255     }
256   }
257   return;
258 }
259 
260