1 #ifdef _cplusplus
2 extern "C" {
3 #endif
4 #include "wise2xhmmer2.h"
5 
6 #ifndef HMMER2_EXTERN_DEFINED
7 #include "globals.h"
8 #endif
9 
10 /* Function:  bootstrap_HMMer2(void)
11  *
12  * Descrip:    You need to call this function before you
13  *             call any other to get the HMMer2 system working ;)
14  *
15  *
16  *
17  */
18 # line 45 "wise2xhmmer2.dy"
bootstrap_HMMer2(void)19 void bootstrap_HMMer2(void)
20 {
21   SetAlphabet(hmmAMINO);
22 }
23 
24 /* Function:  read_SeqAlign_HMMER(seqfile)
25  *
26  * Descrip:    This function read a single SeqAlign from
27  *             the filename using the HMMER libraries
28  *
29  *
30  * Arg:        seqfile [UNKN ] Undocumented argument [char *]
31  *
32  * Return [UNKN ]  Undocumented return value [SeqAlign *]
33  *
34  */
35 # line 54 "wise2xhmmer2.dy"
read_SeqAlign_HMMER(char * seqfile)36 SeqAlign * read_SeqAlign_HMMER(char * seqfile)
37 {
38   AINFO ainfo;
39   char ** data;
40   int format;
41   SeqAlign * out;
42   int i;
43   Sequence * temp;
44 
45   if (! SeqfileFormat(seqfile, &format, NULL) ) {
46     switch (squid_errno) {
47     case SQERR_NOFILE:
48       warn("Alignment file %s could not be opened for reading", seqfile);
49       return NULL;
50     case SQERR_FORMAT:
51     default:
52       warn("Failed to determine format of alignment file %s", seqfile);
53     }
54   }
55 
56   /* attempt to read it in now */
57 
58   if (! ReadAlignment(seqfile, format, &data, &ainfo)) {
59     warn("Failed to read aligned sequence file %s", seqfile);
60     return NULL;
61   }
62 
63   /* make SeqAlign */
64 
65   out = SeqAlign_alloc_len(ainfo.nseq);
66 
67   for (i = 0; i < ainfo.nseq; i++) {
68     temp = new_Sequence_from_strings(ainfo.sqinfo[i].name,data[i]);
69     add_SeqAlign(out,temp);
70   }
71 
72   return out;
73 }
74 
75 
76 
77 
78 
79 /* Function:  HMMer2_read_ThreeStateModel(filename)
80  *
81  * Descrip:    This function reads a single HMM from
82  *             filename for a ThreeStateModel
83  *
84  *
85  * Arg:        filename [UNKN ] Undocumented argument [char *]
86  *
87  * Return [UNKN ]  Undocumented return value [ThreeStateModel *]
88  *
89  */
90 # line 101 "wise2xhmmer2.dy"
HMMer2_read_ThreeStateModel(char * filename)91 ThreeStateModel * HMMer2_read_ThreeStateModel(char * filename)
92 {
93   struct plan7_s *hmm;
94   ThreeStateModel * out;
95   HMMFILE * hmmfp;
96 
97   if( filename == NULL ) {
98     warn("You have passed in a NULL filename into HMMer2_read_ThreeStateModel - unlikely to work!");
99     return NULL;
100   }
101 
102   hmmfp = HMMFileOpen(filename,"HMMERDB");
103 
104   if( hmmfp == NULL ) {
105     warn("Could not open %s for HMM reading",filename);
106     return NULL;
107   }
108 
109   if( HMMFileRead(hmmfp, &hmm) == 0 ) {
110     return NULL;
111   }
112 
113   if( hmm == NULL ) {
114     warn("Unable to read HMM file... ");
115     return NULL;
116   }
117 
118   out = ThreeStateModel_from_HMMer2(hmm);
119   FreePlan7(hmm);
120 
121   return out;
122 }
123 
124 /* Function:  HMMer2_ThreeStateDB(filename)
125  *
126  * Descrip:    This function makes a new ThreeStateDB
127  *             from a filename for the HMMer2 system.
128  *
129  *
130  * Arg:        filename [SOFT ] filename of the HMMer db [char *]
131  *
132  * Return [UNKN ]  Undocumented return value [ThreeStateDB *]
133  *
134  */
135 # line 140 "wise2xhmmer2.dy"
HMMer2_ThreeStateDB(char * filename)136 ThreeStateDB * HMMer2_ThreeStateDB(char * filename)
137 {
138   ThreeStateDB * out;
139 
140   out = ThreeStateDB_alloc();
141   out->dbtype    = TSMDB_GENERIC;
142   out->filename = stringalloc(filename);
143   out->reload_generic = reload_ThreeStateDB_HMMer2;
144   out->open_generic   = open_ThreeStateDB_HMMer2;
145   out->close_generic  = close_ThreeStateDB_HMMer2;
146   out->dataentry_add  = dataentry_ThreeStateDB_HMMer2;
147   out->open_index_generic = open_for_indexing_ThreeStateDB_HMMer2;
148   out->close_index_generic = close_for_indexing_ThreeStateDB_HMMer2;
149   out->index_generic = index_ThreeStateDB_HMMer2;
150 
151   return out;
152 }
153 
154 /* Function:  open_for_indexing_ThreeStateDB_HMMer2(tdb)
155  *
156  * Descrip:    This function is the generic wrapper for the
157  *             threestatedb open for indexing
158  *
159  *
160  * Arg:        tdb [UNKN ] Undocumented argument [ThreeStateDB *]
161  *
162  * Return [UNKN ]  Undocumented return value [boolean]
163  *
164  */
165 # line 163 "wise2xhmmer2.dy"
open_for_indexing_ThreeStateDB_HMMer2(ThreeStateDB * tdb)166 boolean open_for_indexing_ThreeStateDB_HMMer2(ThreeStateDB * tdb)
167 {
168   return TRUE; /* nowt to do ! */
169 }
170 
171 /* Function:  close_for_indexing_ThreeStateDB_HMMer2(tdb)
172  *
173  * Descrip:    This function is the generic wrapper for the
174  *             threestatedb open for indexing
175  *
176  *
177  * Arg:        tdb [UNKN ] Undocumented argument [ThreeStateDB *]
178  *
179  * Return [UNKN ]  Undocumented return value [boolean]
180  *
181  */
182 # line 173 "wise2xhmmer2.dy"
close_for_indexing_ThreeStateDB_HMMer2(ThreeStateDB * tdb)183 boolean close_for_indexing_ThreeStateDB_HMMer2(ThreeStateDB * tdb)
184 {
185   return TRUE; /* nowt to do ! */
186 }
187 
188 
189 #ifdef HMMER_2_1_2 /* new style indexing */
190 
index_ThreeStateDB_HMMer2(ThreeStateDB * tdb,DataEntry * de)191  ThreeStateModel * index_ThreeStateDB_HMMer2(ThreeStateDB * tdb,DataEntry *de)
192 {
193   struct plan7_s *hmm;
194   ThreeStateModel * out;
195   HMMFILE * hmmfp;
196 
197   hmmfp = HMMFileOpen(tdb->filename,"HMMERDB");
198   if( hmmfp == NULL ) {
199     warn("Could not open %s as HMMER file\n",tdb->filename);
200     return NULL;
201   }
202 
203   if( hmmfp->gsi == NULL ) {
204     warn("Could not retrieve sequences from HMMER database as it is not indexed!");
205     return NULL;
206   }
207 
208   if( ! HMMFilePositionByName(hmmfp,de->name) ) {
209     warn("Unable to find HMM %s in %s",de->name,tdb->filename);
210     return NULL;
211   }
212 
213   if( HMMFileRead(hmmfp, &hmm) == 0 ) {
214     return NULL;
215   }
216 
217   if( hmm == NULL ) {
218     warn("Unable to read HMM file... ");
219     return NULL;
220   }
221 
222   out = ThreeStateModel_from_HMMer2(hmm);
223   FreePlan7(hmm);
224   HMMFileClose(hmmfp);
225 
226   return out;
227 }
228 #else /* use old style indexing */
229 
230 /* Function:  index_ThreeStateDB_HMMer2(tdb,*de)
231  *
232  * Descrip:    This function is the generic wrapper for the
233  *             threestatedb get indexed
234  *
235  *
236  * Arg:        tdb [UNKN ] Undocumented argument [ThreeStateDB *]
237  * Arg:        *de [UNKN ] Undocumented argument [DataEntry]
238  *
239  * Return [UNKN ]  Undocumented return value [ThreeStateModel*]
240  *
241  */
242 # line 225 "wise2xhmmer2.dy"
index_ThreeStateDB_HMMer2(ThreeStateDB * tdb,DataEntry * de)243 ThreeStateModel*  index_ThreeStateDB_HMMer2(ThreeStateDB * tdb,DataEntry *de)
244 {
245   struct plan7_s *hmm;
246   ThreeStateModel * out;
247   HMMFILE * hmmfp;
248 
249 
250   hmmfp = HMMFileOpenFseek(tdb->filename,"HMMERDB",de->byte_position);
251 
252   if( hmmfp == NULL ) {
253     warn("Could not open %s to byte position %d",tdb->filename,de->data[0]);
254     return NULL;
255   }
256 
257   if( HMMFileRead(hmmfp, &hmm) == 0 ) {
258     return NULL;
259   }
260 
261   if( hmm == NULL ) {
262     warn("Unable to read HMM file... ");
263     return NULL;
264   }
265 
266   out = ThreeStateModel_from_HMMer2(hmm);
267   FreePlan7(hmm);
268   HMMFileClose(hmmfp);
269 
270   return out;
271 }
272 #endif /* HMMER_2_1_2 */
273 
274 
275 /* Function:  dataentry_ThreeStateDB_HMMer2(tdb,en)
276  *
277  * Descrip:    This function is the generic wrapper
278  *             for the threestatedb info for hmmer2
279  *
280  *
281  * Arg:        tdb [UNKN ] Undocumented argument [ThreeStateDB *]
282  * Arg:         en [UNKN ] Undocumented argument [DataEntry *]
283  *
284  * Return [UNKN ]  Undocumented return value [boolean]
285  *
286  */
287 # line 262 "wise2xhmmer2.dy"
dataentry_ThreeStateDB_HMMer2(ThreeStateDB * tdb,DataEntry * en)288 boolean dataentry_ThreeStateDB_HMMer2(ThreeStateDB * tdb,DataEntry * en)
289 {
290   HMMFILE * hmmfp;
291 
292   hmmfp = (HMMFILE *) (tdb->data);
293 
294 #ifdef HMMER_2_1_2
295   en->data[0] = 0;
296   /* we rely on the fact that someone has already set the name */
297 
298 #else
299   en->data[0] = HMMFtell(hmmfp);
300   en->byte_position = tdb->byte_position;
301 #endif
302 
303   return TRUE;
304 }
305 
306 
307 /* Function:  close_ThreeStateDB_HMMer2(tdb)
308  *
309  * Descrip:    This function is the generic wrapper
310  *             for the threestatedb close for hmmer2
311  *
312  *
313  * Arg:        tdb [UNKN ] Undocumented argument [ThreeStateDB *]
314  *
315  * Return [UNKN ]  Undocumented return value [boolean]
316  *
317  */
318 # line 286 "wise2xhmmer2.dy"
close_ThreeStateDB_HMMer2(ThreeStateDB * tdb)319 boolean close_ThreeStateDB_HMMer2(ThreeStateDB * tdb)
320 {
321   HMMFILE * hmmfp;
322 
323   hmmfp = (HMMFILE *) (tdb->data);
324 
325   HMMFileClose(hmmfp);
326   return TRUE;
327 }
328 
329 
330 /* Function:  reload_ThreeStateDB_HMMer2(tdb,return_status)
331  *
332  * Descrip:    This function is the generic wrapper
333  *             for the threestatedb relaod function for hmmer2
334  *
335  *
336  * Arg:                  tdb [UNKN ] Undocumented argument [ThreeStateDB *]
337  * Arg:        return_status [UNKN ] Undocumented argument [int *]
338  *
339  * Return [UNKN ]  Undocumented return value [ThreeStateModel *]
340  *
341  */
342 # line 302 "wise2xhmmer2.dy"
reload_ThreeStateDB_HMMer2(ThreeStateDB * tdb,int * return_status)343 ThreeStateModel * reload_ThreeStateDB_HMMer2(ThreeStateDB * tdb,int * return_status)
344 {
345   struct plan7_s *hmm;
346   ThreeStateModel * out;
347   HMMFILE * hmmfp;
348 
349   hmmfp = (HMMFILE *) (tdb->data);
350 
351 #ifndef HMMER_2_1_2
352   tdb->byte_position = HMMFtell(hmmfp);
353 #endif
354 
355   if( HMMFileRead(hmmfp, &hmm) == 0 ) {
356     /* end of file */
357     *return_status = DB_RETURN_END;
358     return NULL;
359   }
360 
361   if( hmm == NULL ) {
362     warn("Unable to read HMM file... ");
363     *return_status = DB_RETURN_ERROR;
364   }
365 
366   *return_status = DB_RETURN_OK;
367 
368   out = ThreeStateModel_from_HMMer2(hmm);
369   FreePlan7(hmm);
370 
371   return out;
372 }
373 
374 
375 
376 /* Function:  open_ThreeStateDB_HMMer2(tdb)
377  *
378  * Descrip:    This function is the generic wrapper
379  *             for the threestatedb open function
380  *
381  *
382  * Arg:        tdb [UNKN ] Undocumented argument [ThreeStateDB *]
383  *
384  * Return [UNKN ]  Undocumented return value [boolean]
385  *
386  */
387 # line 340 "wise2xhmmer2.dy"
open_ThreeStateDB_HMMer2(ThreeStateDB * tdb)388 boolean open_ThreeStateDB_HMMer2(ThreeStateDB * tdb)
389 {
390   if( tdb->data != NULL ) {
391     warn("Attempting to open an already open tdb. Ugh!");
392     return FALSE;
393   }
394 
395   if( (tdb->data = (void *)HMMFileOpen(tdb->filename,"HMMERDB")) == NULL ) {
396     warn("Could not open %s as a filename for hmmer2 directory",tdb->filename);
397     return FALSE;
398   }
399   tdb->byte_position = 0;
400 
401   return TRUE;
402 }
403 
404 
405 
406 
407 /* Function:  ThreeStateModel_from_HMMer2(p7)
408  *
409  * Descrip:    This function converts a HMMer2 HMM into
410  *             a Wise2 HMM (threestatemodel).
411  *
412  *
413  * Arg:        p7 [UNKN ] Undocumented argument [struct plan7_s *]
414  *
415  * Return [UNKN ]  Undocumented return value [ThreeStateModel *]
416  *
417  */
418 # line 363 "wise2xhmmer2.dy"
ThreeStateModel_from_HMMer2(struct plan7_s * p7)419 ThreeStateModel * ThreeStateModel_from_HMMer2(struct plan7_s * p7)
420 {
421   ThreeStateModel * out;
422   ThreeStateUnit  * unit;
423   int i,j;
424   char * runner;
425 
426   if( p7 == NULL ) {
427     warn("Can't make a ThreeStateModel from a NULL pointer... ");
428     return NULL;
429   }
430 
431   out = ThreeStateModel_alloc_len(p7->M);
432 
433   if( out == NULL )
434     return NULL; /* already complained*/
435 
436   out->name = stringalloc(p7->name);
437 
438 
439 #ifdef HMMER_2_1_2
440   if( p7->acc != NULL ) {
441     out->accession = stringalloc(p7->acc);
442   } else {
443     out->accession = stringalloc(p7->name);
444   }
445 #else
446   /* munge around accession number. Ugh! */
447 
448   if( p7->desc != NULL ) {
449     runner = strtok(p7->desc,spacestr);
450     if( runner != NULL && strstartcmp(runner,"PF") == 0 ) {
451       out->accession = stringalloc(runner);
452     } else {
453       out->accession = stringalloc(p7->name);
454     }
455   }
456 #endif
457 
458   for(i=0;i<p7->M;i++) {
459 
460     unit = blank_ThreeStateUnit();
461     add_ThreeStateModel(out,unit);
462 
463 
464     if( i != p7->M-1) {
465       unit->transition[TSM_MATCH2MATCH]= p7->t[i+1][TMM];
466       unit->transition[TSM_MATCH2DELETE]= p7->t[i+1][TMD];
467       unit->transition[TSM_MATCH2INSERT]= p7->t[i+1][TMI];
468       unit->transition[TSM_INSERT2MATCH]= p7->t[i+1][TIM];
469       unit->transition[TSM_INSERT2INSERT]= p7->t[i+1][TII];
470       unit->transition[TSM_DELETE2MATCH]= p7->t[i+1][TDM];
471       unit->transition[TSM_DELETE2DELETE]= p7->t[i+1][TDD];
472       unit->transition[TSM_START2MATCH] = p7->begin[i+1];
473       unit->transition[TSM_MATCH2END] =   p7->end[i+1];
474     }
475 
476 
477     if( i == p7->M-1) {
478       /* all probability onto end */
479 
480       unit->transition[TSM_MATCH2END] =  1.0;
481     }
482 
483     /* we have to map sean's probabilities over to ours */
484 
485     for(j=0;j<Alphabet_size;j++) {
486       unit->match_emission[Alphabet[j]-'A'] = p7->mat[i+1][j];
487     }
488 
489     if( i != p7->M-1) {
490       for(j=0;j<Alphabet_size;j++) {
491 	unit->insert_emission[Alphabet[j]-'A'] = p7->ins[i+1][j];
492       }
493     } else {
494       /* no insert position! - could leave as 0's? */
495       /* but this does upset some sensible error tracking code */
496       /* set it to 1/26 probs ;) */
497       for(j=0;j<Alphabet_size;j++) {
498 	unit->insert_emission[Alphabet[j]-'A'] = p7->ins[1][j];
499       }
500 
501     }
502   }
503 
504   out->rm = RandomModel_alloc();
505 
506   for(i=0;i<26;i++)
507     out->rm->aminoacid[i] = 1.0;
508 
509   for(j=0;j<Alphabet_size;j++) {
510     out->rm->aminoacid[Alphabet[j]-'A'] = p7->null[j];
511   }
512 
513   display_char_in_ThreeStateModel(out);
514 
515   return out;
516 }
517 
518 
519 
520 
521 
522 
523 
524 # line 499 "wise2xhmmer2.c"
525 
526 #ifdef _cplusplus
527 }
528 #endif
529