1 /* Formatting, transmitting, and printing single alignments to a
2  * profile.
3  *
4  * Contents:
5  *   1. The P7_ALIDISPLAY object.
6  *   2. The P7_ALIDISPLAY API.
7  *   3. Debugging/dev code.
8  *   4. Benchmark driver.
9  *   5. Unit tests.
10  *   6. Test driver.
11  *   7. Example.
12  */
13 #include "p7_config.h"
14 
15 #include <stdlib.h>
16 #include <stdio.h>
17 #include <string.h>
18 #include <ctype.h>
19 #include <string.h>
20 
21 #include "easel.h"
22 #include "hmmer.h"
23 
24 // Define bit-vector constants used in the _Serialize() and _Deserialize routines
25 #define RFLINE_PRESENT (1 << 0)
26 #define MMLINE_PRESENT (1 << 1)
27 #define CSLINE_PRESENT (1 << 2)
28 #define PPLINE_PRESENT (1 << 3)
29 #define ASEQ_PRESENT (1 << 4)
30 #define NTSEQ_PRESENT (1 << 5)
31 
32 /*****************************************************************
33  * 1. The P7_ALIDISPLAY object
34  *****************************************************************/
35 
36 /* A P7_ALIDISPLAY structure originally had two possible states:
37  * "serialized", in which all of the variable-length fields of the structure
38  * were pointers into one block of allocated memory, which was pointed to by the
39  * structure's "mem" field, and
40  * "deserialized", in which each of the variable-length fields of the structure
41  * were separate regions of allocated memory.  The intent of this was that
42  * "serialized" structures would be faster to create, as they only require one
43  * memory allocation, and easier to send over sockets, but that "deserialized"
44  * structures would be easier to modify and/or reuse.  However, HMMER currently
45  * does not ever modify or reuse P7_ALIDISPLAY structures.
46  *
47  * P7_ALIDISPLAY structures are created in "serialized" mode. The only time they
48  * were ever converted into "deserialized" mode was in hmmpgmd2msa, and this was
49  * somewhat unnecessary.
50  *
51  * Now, we have added _Serialize() and _Deserialize() functions that have the more
52  * traditional behavior of converting P7_ALIDISPLAY structures to and from
53  * contiguous blocks of bytes for transmission over sockets.  With this change,
54  * P7_ALIDISPLAY structures will always be in "serialized" mode, and will never be
55  * converted to their "deserialized" mode.  (Yes, this terminology is confusing.)
56  *
57  * For H3, we will leave the existing code to handle the "serialized" and "deserialized"
58  * modes in place.  HMMER's output code is being heavily revised for H4, and we will
59  * revisit how these modes are handled in H4 once that portion of the code stabilizes.
60  */
61 
62 /* Function:  p7_alidisplay_Create()
63  * Synopsis:  Create an alignment display, from trace and oprofile.
64  *
65  * Purpose:   Creates and returns an alignment display for domain number
66  *            <which> in traceback <tr>, where the traceback
67  *            corresponds to an alignment of optimized profile <om> to digital sequence
68  *            <dsq>, and the unique name of that target
69  *            sequence <dsq> is <sqname>. The <which> index starts at 0.
70  *
71  *            It will be a little faster if the trace is indexed with
72  *            <p7_trace_Index()> first. The number of domains is then
73  *            in <tr->ndom>. If the caller wants to create alidisplays
74  *            for all of these, it would loop <which> from
75  *            <0..tr->ndom-1>.
76  *
77  *            However, even without an index, the routine will work fine.
78  *
79  * Args:      tr       - traceback
80  *            which    - domain number, 0..tr->ndom-1
81  *            om       - optimized profile (query)
82  *            sq       - digital sequence (target)
83  *            ntsq     - text sequence (original nucleotide target in the case of translated search)
84  *            ddef_app - optional posterior prob alignment line; only nhmmer sends a not-NULL value
85  *
86  * Returns:   <eslOK> on success.
87  *
88  * Throws:    <NULL> on allocation failure, or if something's internally corrupt
89  *            in the data.
90  */
91 P7_ALIDISPLAY *
p7_alidisplay_Create(const P7_TRACE * tr,int which,const P7_OPROFILE * om,const ESL_SQ * sq,const ESL_SQ * ntsq)92 p7_alidisplay_Create(const P7_TRACE *tr, int which, const P7_OPROFILE *om, const ESL_SQ *sq, const ESL_SQ *ntsq)
93 {
94   P7_ALIDISPLAY *ad       = NULL;
95   char          *Alphabet = om->abc->sym;
96   int            n, pos, z;
97   int            z1,z2;
98   int            k,x,i,s;
99   int            hmm_namelen, hmm_acclen, hmm_desclen;
100   int            sq_namelen,  sq_acclen,  sq_desclen;
101   int            status;
102   ESL_SQ         *ntorfseqtxt = NULL;
103 
104   /* First figure out which piece of the trace (from first match to last match)
105    * we're going to represent, and how big it is.
106    */
107   if (tr->ndom > 0) {		/* if we have an index, this is a little faster: */
108     for (z1 = tr->tfrom[which]; z1 < tr->N; z1++) if (tr->st[z1] == p7T_M) break;  /* find next M state      */
109     if (z1 == tr->N) return NULL;                                                  /* no M? corrupt trace    */
110     for (z2 = tr->tto[which];   z2 >= 0 ;   z2--) if (tr->st[z2] == p7T_M) break;  /* find prev M state      */
111     if (z2 == -1) return NULL;                                                     /* no M? corrupt trace    */
112   } else {			/* without an index, we can still do it fine:    */
113     for (z1 = 0; which >= 0 && z1 < tr->N; z1++) if (tr->st[z1] == p7T_B) which--; /* find the right B state */
114     if (z1 == tr->N) return NULL;                                                  /* no such domain <which> */
115     for (; z1 < tr->N; z1++) if (tr->st[z1] == p7T_M) break;                       /* find next M state      */
116     if (z1 == tr->N) return NULL;                                                  /* no M? corrupt trace    */
117     for (z2 = z1; z2 < tr->N; z2++) if (tr->st[z2] == p7T_E) break;                /* find the next E state  */
118     for (; z2 >= 0;    z2--) if (tr->st[z2] == p7T_M) break;                       /* find prev M state      */
119     if (z2 == -1) return NULL;                                                     /* no M? corrupt trace    */
120   }
121 
122   /* Now we know that z1..z2 in the trace will be represented in the
123    * alidisplay; that's z2-z1+1 positions. We need a \0 trailer on all
124    * our display lines, so allocate z2-z1+2. We know each position is
125    * M, D, or I, so there's a 1:1 correspondence of trace positions
126    * with alignment display positions.  We also know the display
127    * starts and ends with M states.
128    *
129    * So now let's allocate. The alidisplay is packed into a single
130    * memory space, so this appears to be intricate, but it's just
131    * bookkeeping.
132    */
133   n = (z2-z1+2) * 3;                     /* model, mline, aseq mandatory         */
134 
135   if (om->rf[0]  != 0)    n += z2-z1+2;  /* optional reference line              */
136   if (om->mm[0]  != 0)    n += z2-z1+2;  /* optional reference line              */
137   if (om->cs[0]  != 0)    n += z2-z1+2;  /* optional structure line              */
138   if (tr->pp     != NULL) n += z2-z1+2;  /* optional posterior prob line         */
139   hmm_namelen = strlen(om->name);                           n += hmm_namelen + 1;
140   hmm_acclen  = (om->acc  != NULL ? strlen(om->acc)  : 0);  n += hmm_acclen  + 1;
141   hmm_desclen = (om->desc != NULL ? strlen(om->desc) : 0);  n += hmm_desclen + 1;
142 
143   sq_namelen  = strlen(sq->name);                           n += sq_namelen  + 1;
144   sq_acclen   = strlen(sq->acc);                            n += sq_acclen   + 1; /* sq->acc is "\0" when unset */
145   sq_desclen  = strlen(sq->desc);                           n += sq_desclen  + 1; /* same for desc              */
146 
147   ESL_ALLOC(ad, sizeof(P7_ALIDISPLAY));
148   ad->mem = NULL;
149 
150   pos = 0;
151   ad->memsize = sizeof(char) * n;
152   ESL_ALLOC(ad->mem, ad->memsize);
153   if (om->rf[0]  != 0) { ad->rfline = ad->mem + pos; pos += z2-z1+2; } else { ad->rfline = NULL; }
154   //if (om->mm[0]  != 0) { ad->mmline = ad->mem + pos; pos += z2-z1+2; } else { ad->mmline = NULL; }
155   ad->mmline = NULL;
156   if (om->cs[0]  != 0) { ad->csline = ad->mem + pos; pos += z2-z1+2; } else { ad->csline = NULL; }
157   ad->model   = ad->mem + pos;  pos += z2-z1+2;
158   ad->mline   = ad->mem + pos;  pos += z2-z1+2;
159   ad->aseq    = ad->mem + pos;  pos += z2-z1+2;
160 
161   if (tr->pp != NULL)  { ad->ppline = ad->mem + pos;  pos += z2-z1+2;} else { ad->ppline = NULL; }
162   ad->hmmname = ad->mem + pos;  pos += hmm_namelen +1;
163   ad->hmmacc  = ad->mem + pos;  pos += hmm_acclen +1;
164   ad->hmmdesc = ad->mem + pos;  pos += hmm_desclen +1;
165   ad->sqname  = ad->mem + pos;  pos += sq_namelen +1;
166   ad->sqacc   = ad->mem + pos;  pos += sq_acclen +1;
167   ad->sqdesc  = ad->mem + pos;  pos += sq_desclen +1;
168 
169   strcpy(ad->hmmname, om->name);
170   if (om->acc  != NULL) strcpy(ad->hmmacc,  om->acc);  else ad->hmmacc[0]  = 0;
171   if (om->desc != NULL) strcpy(ad->hmmdesc, om->desc); else ad->hmmdesc[0] = 0;
172 
173   strcpy(ad->sqname,  sq->name);
174   strcpy(ad->sqacc,   sq->acc);
175   strcpy(ad->sqdesc,  sq->desc);
176 
177   /* Determine hit coords */
178   ad->hmmfrom = tr->k[z1];
179   ad->hmmto   = tr->k[z2];
180   ad->M       = om->M;
181 
182   ad->sqfrom  = tr->i[z1];
183   ad->sqto    = tr->i[z2];
184   ad->L       = sq->n;
185 
186   /* optional rf line */
187   if (ad->rfline != NULL) {
188     for (z = z1; z <= z2; z++) ad->rfline[z-z1] = ((tr->st[z] == p7T_I) ? '.' : om->rf[tr->k[z]]);
189     ad->rfline[z-z1] = '\0';
190   }
191 
192   /* optional mm line */
193   if (ad->mmline != NULL) {
194     for (z = z1; z <= z2; z++) ad->mmline[z-z1] = ((tr->st[z] == p7T_I) ? '.' : om->mm[tr->k[z]]);
195     ad->mmline[z-z1] = '\0';
196   }
197 
198   /* optional cs line */
199   if (ad->csline != NULL) {
200     for (z = z1; z <= z2; z++) ad->csline[z-z1] = ((tr->st[z] == p7T_I) ? '.' : om->cs[tr->k[z]]);
201     ad->csline[z-z1] = '\0';
202   }
203 
204   /* optional pp line */
205   if (ad->ppline != NULL) {
206     for (z = z1; z <= z2; z++) ad->ppline[z-z1] = ( (tr->st[z] == p7T_D) ? '.' : p7_alidisplay_EncodePostProb(tr->pp[z]));
207     ad->ppline[z-z1] = '\0';
208   }
209 
210 
211   /* mandatory three alignment display lines: model, mline, aseq */
212   for (z = z1; z <= z2; z++)
213     {
214       k = tr->k[z];
215       i = tr->i[z];
216       x = sq->dsq[i];
217       s = tr->st[z];
218 
219       switch (s) {
220       case p7T_M:
221         ad->model[z-z1] = om->consensus[k];
222         if      (x == esl_abc_DigitizeSymbol(om->abc, om->consensus[k])) ad->mline[z-z1] = ad->model[z-z1];
223         else if (p7_oprofile_FGetEmission(om, k, x) > 1.0)               ad->mline[z-z1] = '+'; /* >1 not >0; om has odds ratios, not scores */
224         else                                                             ad->mline[z-z1] = ' ';
225         ad->aseq  [z-z1] = toupper(Alphabet[x]);
226         break;
227 
228       case p7T_I:
229         ad->model [z-z1] = '.';
230         ad->mline [z-z1] = ' ';
231         ad->aseq  [z-z1] = tolower(Alphabet[x]);
232         break;
233 
234       case p7T_D:
235         ad->model [z-z1] = om->consensus[k];
236         ad->mline [z-z1] = ' ';
237         ad->aseq  [z-z1] = '-';
238 
239         break;
240 
241       default: ESL_XEXCEPTION(eslEINVAL, "invalid state in trace: not M,D,I");
242       }
243     }
244   ad->model [z2-z1+1] = '\0';
245   ad->mline [z2-z1+1] = '\0';
246   ad->aseq  [z2-z1+1] = '\0';
247   ad->N = z2-z1+1;
248   ad->ntseq = NULL;  // Mark this NULL so it doesn't cause problems with serialization.
249   // nhmmer can reset it if it wants later
250   esl_sq_Destroy(ntorfseqtxt);
251 
252 
253   return ad;
254 
255  ERROR:
256   p7_alidisplay_Destroy(ad);
257   return NULL;
258 }
259 
260 /* Function: p7_alidisplay_Create_empty()
261  * Synopsis: Creates an empty P7_ALIDISPLAY object
262  *
263  * Purpose:  Creates an empty P7_ALIDISPLAY object, one that does not contain an alignment to
264  *           display but has been initialized to reasonable values (NULL for all strings, 0 for the amount
265  *           of memory allocated in its buffer, etc.).  This is mainly intended to be used to create a structure
266  *           that a serialized P7_ALIDISPLAY object can be deserialized into.
267  *
268  * Returns:  Pointer to the new <P7_ALIDISPLAY>
269  *
270  * Throws:   Returns NULL if unable to allocate memory
271  */
p7_alidisplay_Create_empty()272 extern P7_ALIDISPLAY *p7_alidisplay_Create_empty()
273 {
274   P7_ALIDISPLAY *new_obj;
275   int status;  // standard ESL_ALLOC error condition
276 
277   ESL_ALLOC(new_obj, sizeof(P7_ALIDISPLAY));
278 
279   // Init fields to sensible "empty" values
280   new_obj->rfline = NULL;
281   new_obj->mmline = NULL;
282   new_obj->csline = NULL;
283   new_obj->model = NULL;
284   new_obj->mline = NULL;
285   new_obj->aseq = NULL;
286   new_obj->ntseq = NULL;
287   new_obj->ppline = NULL;
288   new_obj->N = 0;
289 
290   new_obj->hmmname = NULL;
291   new_obj->hmmacc = NULL;
292   new_obj->hmmdesc = NULL;
293   new_obj->hmmfrom = 0;
294   new_obj->hmmto = 0;
295   new_obj->M = 0;
296 
297   new_obj->sqname = NULL;
298   new_obj->sqacc = NULL;
299   new_obj->sqdesc = NULL;
300   new_obj->sqfrom = 0;
301   new_obj->sqto = 0;
302   new_obj->L = 0;
303 
304   new_obj->memsize = 0;
305   new_obj->mem = NULL;
306 
307   return new_obj;
308 
309   ERROR: // only get here if the ESL_ALLOC fails
310     return NULL;
311 }
312 /* Function:  p7_alidisplay_Clone()
313  * Synopsis:  Make a duplicate of an ALIDISPLAY.
314  *
315  * Purpose:   Create a duplicate of alignment display <ad>.
316  *            Return a pointer to the duplicate. Caller
317  *            is responsible for freeing the new object.
318  *
319  * Returns:   pointer to new <P7_ALIDISPLAY>
320  *
321  * Throws:    <NULL> on allocation failure.
322  */
323 P7_ALIDISPLAY *
p7_alidisplay_Clone(const P7_ALIDISPLAY * ad)324 p7_alidisplay_Clone(const P7_ALIDISPLAY *ad)
325 {
326   P7_ALIDISPLAY *ad2 = NULL;
327   int status;
328 
329   ESL_ALLOC(ad2, sizeof(P7_ALIDISPLAY));
330   ad2->rfline  = ad2->mmline = ad2->csline = ad2->model   = ad2->mline  = ad2->aseq = ad2->ntseq = ad2->ppline = NULL;
331   ad2->hmmname = ad2->hmmacc = ad2->hmmdesc = NULL;
332   ad2->sqname  = ad2->sqacc  = ad2->sqdesc  = NULL;
333   ad2->mem     = NULL;
334   ad2->memsize = 0;
335 
336   if (ad->memsize) 		/* serialized */
337     {
338       ESL_ALLOC(ad2->mem, sizeof(char) * ad->memsize);
339       ad2->memsize = ad->memsize;
340       memcpy(ad2->mem, ad->mem, ad->memsize);
341 
342       ad2->rfline = (ad->rfline ? ad2->mem + (ad->rfline - ad->mem) : NULL );
343       ad2->mmline = (ad->mmline ? ad2->mem + (ad->mmline - ad->mem) : NULL );
344       ad2->csline = (ad->csline ? ad2->mem + (ad->csline - ad->mem) : NULL );
345       ad2->model  = ad2->mem + (ad->model  - ad->mem);
346       ad2->mline  = ad2->mem + (ad->mline  - ad->mem);
347       ad2->aseq   = ad2->mem + (ad->aseq   - ad->mem);
348       ad2->ntseq  = (ad->ntseq  ? ad2->mem + (ad->ntseq  - ad->mem) : NULL );
349       ad2->ppline = (ad->ppline ? ad2->mem + (ad->ppline - ad->mem) : NULL );
350       ad2->N      = ad->N;
351 
352       ad2->hmmname = ad2->mem + (ad->hmmname - ad->mem);
353       ad2->hmmacc  = ad2->mem + (ad->hmmacc  - ad->mem);
354       ad2->hmmdesc = ad2->mem + (ad->hmmdesc - ad->mem);
355       ad2->hmmfrom = ad->hmmfrom;
356       ad2->hmmto   = ad->hmmto;
357       ad2->M       = ad->M;
358 
359       ad2->sqname  = ad2->mem + (ad->sqname - ad->mem);
360       ad2->sqacc   = ad2->mem + (ad->sqacc  - ad->mem);
361       ad2->sqdesc  = ad2->mem + (ad->sqdesc - ad->mem);
362       ad2->sqfrom  = ad->sqfrom;
363       ad2->sqto    = ad->sqto;
364       ad2->L       = ad->L;
365     }
366   else				/* deserialized */
367     {
368       if ( esl_strdup(ad->rfline, -1, &(ad2->rfline)) != eslOK) goto ERROR;
369       if ( esl_strdup(ad->mmline, -1, &(ad2->mmline)) != eslOK) goto ERROR;
370       if ( esl_strdup(ad->csline, -1, &(ad2->csline)) != eslOK) goto ERROR;
371       if ( esl_strdup(ad->model,  -1, &(ad2->model))  != eslOK) goto ERROR;
372       if ( esl_strdup(ad->mline,  -1, &(ad2->mline))  != eslOK) goto ERROR;
373       if ( esl_strdup(ad->aseq,   -1, &(ad2->aseq))   != eslOK) goto ERROR;
374       if ( esl_strdup(ad->ntseq,  -1, &(ad2->ntseq))  != eslOK) goto ERROR;
375       if ( esl_strdup(ad->ppline, -1, &(ad2->ppline)) != eslOK) goto ERROR;
376       ad2->N = ad->N;
377 
378       if ( esl_strdup(ad->hmmname, -1, &(ad2->hmmname)) != eslOK) goto ERROR;
379       if ( esl_strdup(ad->hmmacc,  -1, &(ad2->hmmacc))  != eslOK) goto ERROR;
380       if ( esl_strdup(ad->hmmdesc, -1, &(ad2->hmmdesc)) != eslOK) goto ERROR;
381       ad2->hmmfrom = ad->hmmfrom;
382       ad2->hmmto   = ad->hmmto;
383       ad2->M       = ad->M;
384 
385       if ( esl_strdup(ad->sqname,  -1, &(ad2->sqname)) != eslOK) goto ERROR;
386       if ( esl_strdup(ad->sqacc,   -1, &(ad2->sqacc))  != eslOK) goto ERROR;
387       if ( esl_strdup(ad->sqdesc,  -1, &(ad2->sqdesc)) != eslOK) goto ERROR;
388       ad2->sqfrom  = ad->sqfrom;
389       ad2->sqto    = ad->sqto;
390       ad2->L       = ad->L;
391     }
392 
393   return ad2;
394 
395  ERROR:
396   if (ad2) p7_alidisplay_Destroy(ad2);
397   return NULL;
398 }
399 
400 
401 /* Function:  p7_alidisplay_Sizeof()
402  * Synopsis:  Returns the total size of a P7_ALIDISPLAY, in bytes.
403  *
404  * Purpose:   Return the total size of <P7_ALIDISPLAY> <ad>, in bytes.
405  *
406  *            Note that <ad->memsize = p7_alidisplay_Sizeof(ad) - sizeof(P7_ALIDISPLAY)>,
407  *            for a serialized object, because <ad->memsize> only refers to the sum
408  *            of the variable-length allocated fields.
409  *
410  * Args:      ad - P7_ALIDISPLAY to get the size of
411  *
412  * Returns:   size of <ad> in bytes
413  */
414 size_t
p7_alidisplay_Sizeof(const P7_ALIDISPLAY * ad)415 p7_alidisplay_Sizeof(const P7_ALIDISPLAY *ad)
416 {
417   size_t n = sizeof(P7_ALIDISPLAY);
418 
419   if (ad->rfline) n += ad->N+1; /* +1 for \0 */
420   if (ad->mmline) n += ad->N+1;
421   if (ad->csline) n += ad->N+1;
422   if (ad->ppline) n += ad->N+1;
423   n += 3 * (ad->N+1);	          /* model, mline, aseq */
424   if (ad->ntseq)  n += (3 * ad->N) + 1;	          /* ntseq */
425   n += 1 + strlen(ad->hmmname);
426   n += 1 + strlen(ad->hmmacc);	  /* optional acc, desc fields: when not present, just "" ("\0") */
427   n += 1 + strlen(ad->hmmdesc);
428   n += 1 + strlen(ad->sqname);
429   n += 1 + strlen(ad->sqacc);
430   n += 1 + strlen(ad->sqdesc);
431 
432   return n;
433 }
434 
435 
436 #define SER_BASE_SIZE ((5 * sizeof(int)) + (3 * sizeof(int64_t)) +1) // Total size of the fixed-length fields in a
437 // serialized P7_ALIDISPLAY
438 
439 /* Function:  p7_alidisplay_Serialize
440  * Synopsis:  Serializes a HMMD_SEARCH_STATS object into a stream of bytes
441  *.           that can be reliably transmitted over internet sockets
442  *
443  * Purpose:   Converts an architecture-dependent P7_SEARCH_STATS object into a contiguous stream
444  *            of bytes with each field of the data structure in network byte order for transmission
445  *            over sockets.  The serialized byte stream may be part of a larger allocated buffer.
446  *            If the provided buffer is NULL, allocates a new buffer large enough for the serialized object
447  *            If the provided buffer is not large enough to hold the serialized object and its existing data, re-allocates
448  *            a larger buffer
449  *
450  * Inputs:    obj: A pointer to the HMMD_SEARCH_STATS object to be serialized
451  *            buf: Handle to the buffer that the object should be serialized into.  If *buf is NULL,
452  *                 a new buffer will be allocated.  buf == NULL is not allowed.
453  *            n:   Offset (in bytes) from the start of the buffer to where the serialized object should start.
454  *            nalloc: size (in bytes) of the buffer passed in buf
455  *
456  *Returns:    On success: returns eslOK, sets *buf to the base of the buffer containing the object
457  *            if allocation or re-allocation was requried, sets *n to the offset from the start of the buffer
458  *            to the first position after the serialized object and sets *nalloc to the new size of the buffer
459  *            if allocation or re-allocation was required.
460  *
461  * Throws:    Returns eslEMEM if unable to allocate or re-allocate memory.  Returns eslEINVAL if obj == NULL, n == NULL, or buf == NULL
462  */
p7_alidisplay_Serialize(const P7_ALIDISPLAY * obj,uint8_t ** buf,uint32_t * n,uint32_t * nalloc)463 int p7_alidisplay_Serialize(const P7_ALIDISPLAY *obj, uint8_t **buf, uint32_t *n, uint32_t *nalloc){
464 
465   int status; // error variable used by ESL_ALLOC
466   uint32_t ser_size; // size of the structure when serialized
467   uint8_t *ptr; // current position within the buffer
468   uint32_t network_32bit; // hold 32-bit fields after conversion to network order
469   uint64_t network_64bit; // hold 64-bit fields after conversion to network order
470   uint8_t presence_flags = 0; // Bit-vector that records presence or absence of optional strings
471   uint32_t hmmname_length, hmmacc_length, hmmdesc_length, sqname_length, sqacc_length, sqdesc_length;
472 
473   // check to make sure we were passed a valid pointer
474   if(obj == NULL || buf == NULL || n == NULL){ // no object to serialize or nowhere to put a buffer pointer
475     return(eslEINVAL);
476   }
477 
478   // Pass 1: Compute size of the serialized data structure
479   /* 11 ints: 4 int fields in P7_ALIDISPLAY + lengths of 6 variable-length strings + total length of serialized structure
480      3 int64_t fields in P7_ALIDISPLAY
481      1 byte for presence/absence bit vector for rfline, mmline, csline, ppline, aseq, ntseq
482     */
483   ser_size = SER_BASE_SIZE;
484 
485   // This method will work regardless of whether the object is in "serialized" or "deserialized" format
486   // it's also blatantly stolen from the _Sizeof() routine.
487   // Note that we can't just call _Sizeof() to figure out how big the serialized data structure will be because
488   // the serialized structure has some fields that differ from the base P7_ALIDISPLAY structure
489 
490   if (obj->rfline){
491     presence_flags += RFLINE_PRESENT;
492     ser_size += obj->N+1; /* +1 for \0 */
493   }
494 
495   if (obj->mmline) {
496     presence_flags += MMLINE_PRESENT;
497     ser_size += obj->N+1;
498   }
499 
500   if (obj->csline){
501     presence_flags += CSLINE_PRESENT;
502     ser_size += obj->N+1;
503   }
504 
505   ser_size += 2 * (obj->N+1);           /* model, mline */
506 
507   if (obj->aseq){
508     presence_flags += ASEQ_PRESENT;
509     ser_size += obj->N+1;
510   }
511 
512   if (obj->ntseq){
513     presence_flags += NTSEQ_PRESENT;
514     ser_size += (3 * obj->N) + 1;           /* ntseq */
515   }
516 
517   if (obj->ppline){
518     presence_flags += PPLINE_PRESENT;
519     ser_size += obj->N+1;
520   }
521 
522   hmmname_length = strlen(obj->hmmname);
523   ser_size += 1 + hmmname_length;
524 
525   hmmacc_length = strlen(obj->hmmacc);
526   ser_size += 1 + hmmacc_length;    /* optional acc, desc fields: when not present, just "" ("\0") */
527 
528   hmmdesc_length = strlen(obj->hmmdesc);
529   ser_size += 1 + hmmdesc_length;
530 
531   sqname_length = strlen(obj->sqname);
532   ser_size += 1 + sqname_length;
533 
534   sqacc_length = strlen(obj->sqacc);
535   ser_size += 1 + sqacc_length;
536 
537   sqdesc_length = strlen(obj->sqdesc);
538   ser_size += 1 + sqdesc_length;
539 
540   // Now that we know how big the serialized data structure will be, determine if we have enough buffer space to hold it
541   if(*buf == NULL){ // have no buffer, so allocate one
542     ESL_ALLOC(*buf, ser_size);
543     *nalloc = ser_size;
544   }
545 
546   if((*n + ser_size) > *nalloc){ //have a buffer, but it's not big enough
547     ESL_REALLOC(*buf, (*n + ser_size));
548     *nalloc = *n + ser_size;
549   }
550 
551   // Pass 2: serialize the structure
552 
553   //First, the fixed-length field
554   ptr = *buf + *n; // point to the start of empty space in the buffer
555 
556   network_32bit = esl_hton32(ser_size); // first field in the serialized object is its size
557   memcpy(ptr, &network_32bit, sizeof(uint32_t)); // Write size of the serialized object into the buffer
558   ptr += sizeof(uint32_t);
559 
560   // Field 2: N
561   network_32bit = esl_hton32(obj->N);
562   memcpy(ptr, &network_32bit, sizeof(uint32_t));
563   ptr += sizeof(uint32_t);
564 
565   // Field 3: Hmmfrom field
566   network_32bit = esl_hton32(obj->hmmfrom);
567   memcpy(ptr, &network_32bit, sizeof(uint32_t));
568   ptr += sizeof(uint32_t);
569 
570   // Field 4: Hmmto field
571   network_32bit = esl_hton32(obj->hmmto);
572   memcpy(ptr, &network_32bit, sizeof(uint32_t));
573   ptr += sizeof(uint32_t);
574 
575   // Field 5: M field
576   network_32bit = esl_hton32(obj->M);
577   memcpy(ptr, &network_32bit, sizeof(uint32_t));
578   ptr += sizeof(uint32_t);
579 
580   // Field 6: Sqfrom
581   network_64bit = esl_hton64(obj->sqfrom);
582   memcpy(ptr, &network_64bit, sizeof(int64_t));
583   ptr += sizeof(int64_t);
584 
585   // Field 7: Sqto
586   network_64bit = esl_hton64(obj->sqto);
587   memcpy(ptr, &network_64bit, sizeof(int64_t));
588   ptr += sizeof(int64_t);
589 
590   // Field 8: L
591   network_64bit = esl_hton64(obj->L);
592   memcpy(ptr, &network_64bit, sizeof(int64_t));
593   ptr += sizeof(int64_t);
594 
595   // Field 9: presence_flags
596   memcpy(ptr, &presence_flags, sizeof(uint8_t));
597   ptr += sizeof(uint8_t);
598 
599   //Now, the strings, some of which are optional
600   // Note that many of these strings are fixed-length if they are present
601 
602   // Field 10: Rfline
603   if(presence_flags & RFLINE_PRESENT){
604     strcpy((char *) ptr, obj->rfline);
605     ptr += obj->N + 1;
606   }
607 
608   // Field 11: mmline
609   if(presence_flags & MMLINE_PRESENT){
610     strcpy((char *) ptr, obj->mmline);
611     ptr += obj->N + 1;
612   }
613 
614   // Field 12: csline
615   if(presence_flags & CSLINE_PRESENT){
616     strcpy((char *) ptr, obj->csline);
617     ptr += obj->N + 1;
618   }
619 
620   // Field 13: Model
621   strcpy((char *) ptr, obj->model);
622   ptr += obj->N + 1;
623 
624   // Field 14: Mline
625   strcpy((char *) ptr, obj->mline);
626   ptr += obj->N + 1;
627 
628   // Field 15: Aseq
629   if(presence_flags & ASEQ_PRESENT){
630     strcpy((char *) ptr, obj->aseq);
631     ptr += obj->N + 1;
632   }
633 
634   // Field 16: ntseq
635   if(presence_flags & NTSEQ_PRESENT){
636     strcpy((char *) ptr, obj->ntseq);
637     ptr += (3 * obj->N) + 1;
638   }
639 
640   // Field 17: PPline
641   if(presence_flags & PPLINE_PRESENT){
642     strcpy((char *) ptr, obj->ppline);
643     ptr += obj->N + 1;
644   }
645 
646   // Field 18: Hmmname
647   strcpy((char *) ptr, obj->hmmname);
648   ptr += hmmname_length + 1;
649 
650   // Field 19: Hmmacc
651   strcpy((char *) ptr, obj->hmmacc);
652   ptr += hmmacc_length + 1;
653 
654   // Field 20: Hmmdesc
655   strcpy((char *) ptr, obj->hmmdesc);
656   ptr += hmmdesc_length + 1;
657 
658   // Field 21: Sqname
659   strcpy((char *) ptr, obj->sqname);
660   ptr += sqname_length +1;
661 
662   // Field 22: Sqacc
663   strcpy((char *) ptr, obj->sqacc);
664   ptr += sqacc_length +1 ;
665 
666   // Field 23: Sqdesc
667   strcpy((char *) ptr, obj->sqdesc);
668   ptr += sqdesc_length +1 ;
669 
670   // sanity-check that we computed the length correctly
671   if(ptr != *buf + *n + ser_size){
672     printf("Serialized object length did not match computed length in p7_alidisplay_Serialize\n");
673     return eslEINVAL;
674   }
675 
676   *n = ptr - *buf; // update n to be just past the serialized object
677   return eslOK; // If we make it here, everything succeeded, so return a pass
678 
679   ERROR:
680     return eslEMEM;
681 }
682 
683 /* Function:  p7_alidisplay_Deserialize
684  * Synopsis:  Derializes a P7_ALIDISPLAY object from a stream of bytes in network order into
685  *            a valid data structure
686  *
687  * Purpose:   Deserializes a serialized P7_ALIDISPLAY object from
688  *.           buf starting at position position *pos.
689  *
690  * Inputs:    buf: the buffer that the object should be de-serialized from
691  *            pos: a pointer to the offset from the start of buf to the beginning of the object
692  *            ret_obj: a P7_ALIDISPLAY structure to deserialize the object into.  May not be NULL. May either be an
693  *            "empty" object created with p7_alidisplay_Create_empty, or a P7_ALIDISPLAY object containing valid data
694  *
695  * Returns:   On success: returns eslOK, deserializes the P7_ALIDISPLAY object into ret_object, and updates
696  *.           pos to point to the position after the end of the P7_ALIDISPLAY object.
697  *
698  * Throws:    Returns eslEINVAL if ret_obj == NULL, buf == NULL, or N == NULL.  Returnts eslEMEM if unable to increase
699  *            the buffer in ret_obj to match the size of the deserialized object.
700  */
p7_alidisplay_Deserialize(const uint8_t * buf,uint32_t * n,P7_ALIDISPLAY * ret_obj)701 extern int p7_alidisplay_Deserialize(const uint8_t *buf, uint32_t *n, P7_ALIDISPLAY *ret_obj){
702   int status;  // Standard Easel error code variable
703 
704   uint8_t *ptr;
705   char *mem_ptr;
706   uint64_t network_64bit; // holds 64-bit values in network order
707   uint32_t network_32bit; // holds 64-bit values in network order
708   uint32_t obj_size; // How much space does the variable-length portion of the serialized object take up?
709   uint8_t presence_flags; // bit-vector that tells us which strings are present in the object
710   int string_length; // used to hold the length of strings copied out of serialized object
711 
712   if ((buf == NULL) || (ret_obj == NULL) || (n == NULL)){ // check to make sure we've been passed valid objects
713       return(eslEINVAL);
714   }
715 
716   mem_ptr = NULL; // set this for safety
717   ptr = (uint8_t *) buf + *n; // Get pointer to start of the object
718   //First field: Size of the serialized object.  Copy out of buffer into scalar variable to deal with memory alignment, convert to
719   // host machine order
720   memcpy(&network_32bit, ptr, sizeof(uint32_t)); // Grab the bytes out of the buffer
721   obj_size = esl_ntoh32(network_32bit);
722   ptr += sizeof(uint32_t);
723 
724   if(ret_obj->memsize < (obj_size - SER_BASE_SIZE)){  // ret_obj doesn't have enough space for this P7_ALIDISPLAY
725     if(ret_obj->mem != NULL){
726       ESL_REALLOC(ret_obj->mem, (obj_size - SER_BASE_SIZE));
727     }
728     else{
729       ESL_ALLOC(ret_obj->mem, (obj_size - SER_BASE_SIZE));
730     }
731     ret_obj->memsize = obj_size - SER_BASE_SIZE;
732   }
733 
734   // Second field: N
735   memcpy(&network_32bit, ptr, sizeof(uint32_t)); // Grab the bytes out of the buffer
736   ret_obj->N = esl_ntoh32(network_32bit);
737   ptr += sizeof(uint32_t);
738 
739   // Third field: Hmmfrom
740   memcpy(&network_32bit, ptr, sizeof(uint32_t));
741   ret_obj->hmmfrom = esl_ntoh32(network_32bit);
742   ptr += sizeof(uint32_t);
743 
744   // Fourth field: Hmmto
745   memcpy(&network_32bit, ptr, sizeof(uint32_t));
746   ret_obj->hmmto = esl_ntoh32(network_32bit);
747   ptr += sizeof(uint32_t);
748 
749   // Fifth field: M
750   memcpy(&network_32bit, ptr, sizeof(uint32_t));
751   ret_obj->M = esl_ntoh32(network_32bit);
752   ptr += sizeof(uint32_t);
753 
754   // Sixth field: sqfrom
755   memcpy(&network_64bit, ptr, sizeof(uint64_t));
756   ret_obj->sqfrom = esl_ntoh64(network_64bit);
757   ptr += sizeof(uint64_t);
758 
759   // Seventh field: sqto
760   memcpy(&network_64bit, ptr, sizeof(uint64_t));
761   ret_obj->sqto = esl_ntoh64(network_64bit);
762   ptr += sizeof(uint64_t);
763 
764   // Eighth field: L
765   memcpy(&network_64bit, ptr, sizeof(uint64_t));
766   ret_obj->L = esl_ntoh64(network_64bit);
767   ptr += sizeof(uint64_t);
768 
769   // Ninth field: presence flags
770   presence_flags = *ptr; // no need for memcpy with one-byte field
771   ptr += sizeof(uint8_t);
772 
773   // Bulk copy the strings into the alidisplay's mem field
774   memcpy(ret_obj->mem, ptr, (obj_size - SER_BASE_SIZE));
775   mem_ptr = ret_obj->mem;
776   ptr += (obj_size - SER_BASE_SIZE);
777   // Tenth field: rfline, if present
778 
779   if(ptr != buf + *n + obj_size){
780     printf("Error: in p7_alidisplay_Deserialize, found object (ptr) to be of size %ld, expected %u.\n", (long int) (ptr - (buf + *n)), obj_size);
781     //return eslEINVAL;
782   }
783 
784   if(presence_flags & RFLINE_PRESENT){
785     ret_obj->rfline = mem_ptr;
786     string_length = strlen(ret_obj->rfline);
787     mem_ptr+= string_length +1; // + 1 to account for end-of-string character
788   }
789   else{ // not present
790     ret_obj->rfline = NULL;
791   }
792 
793   // Eleventh field: mmline, if present
794   if(presence_flags & MMLINE_PRESENT){
795     ret_obj->mmline = mem_ptr;
796     string_length = strlen(ret_obj->mmline);
797     mem_ptr+= string_length + 1;
798   }
799   else{ // not present
800     ret_obj->mmline = NULL;
801   }
802 
803   // Twelfth field: csline, if present
804   if(presence_flags & CSLINE_PRESENT){
805     ret_obj->csline = mem_ptr;
806     string_length = strlen(ret_obj->csline);
807     mem_ptr+= string_length + 1;
808   }
809   else{ // not present
810     ret_obj->csline = NULL;
811   }
812 
813 
814   // Thirteenth field: model
815   ret_obj->model = mem_ptr;
816   string_length = strlen(ret_obj->model);
817   mem_ptr+= string_length + 1;
818 
819  // Thirteenth field: mline
820   ret_obj->mline = mem_ptr;
821   string_length = strlen(ret_obj->mline);
822   mem_ptr+= string_length + 1;
823 
824   // Fourteenth field: aseq, if present
825   if(presence_flags & ASEQ_PRESENT){
826     ret_obj->aseq = mem_ptr;
827     string_length = strlen(ret_obj->aseq);
828     mem_ptr+= string_length + 1;
829   }
830   else{ // not present
831     ret_obj->aseq = NULL;
832   }
833 
834   // Fifteenth field: ntseq, if present
835   if(presence_flags & NTSEQ_PRESENT){
836     ret_obj->ntseq = mem_ptr;
837     string_length = strlen(ret_obj->ntseq);
838     mem_ptr+= string_length + 1;
839   }
840   else{ // not present
841     ret_obj->ntseq = NULL;
842   }
843 
844   // Sixteenth field: ppline, if present
845   if(presence_flags & PPLINE_PRESENT){
846     ret_obj->ppline = mem_ptr;
847     string_length = strlen(ret_obj->ppline);
848     mem_ptr+= string_length + 1;
849   }
850   else{ // not present
851     ret_obj->ppline = NULL;
852   }
853 
854   // Seventeenth field: hmmname
855   ret_obj->hmmname = mem_ptr;
856   string_length = strlen(ret_obj->hmmname);
857   mem_ptr+= string_length + 1;
858 
859   // Eighteenth field: hmmacc
860   ret_obj->hmmacc = mem_ptr;
861   string_length = strlen(ret_obj->hmmacc);
862   mem_ptr+= string_length + 1;
863 
864   // Nineteenth field: hmmdesc
865   ret_obj->hmmdesc = mem_ptr;
866   string_length = strlen(ret_obj->hmmdesc);
867   mem_ptr+= string_length + 1;
868 
869   // Twentyith field: sqname
870   ret_obj->sqname = mem_ptr;
871   string_length = strlen(ret_obj->sqname);
872   mem_ptr+= string_length + 1;
873 
874   // Twentyfirst field: sqacc
875   ret_obj->sqacc = mem_ptr;
876   string_length = strlen(ret_obj->sqacc);
877   mem_ptr+= string_length + 1;
878 
879   // Twentysecond field: sqdesc
880   ret_obj->sqdesc = mem_ptr;
881   string_length = strlen(ret_obj->sqdesc);
882   mem_ptr+= string_length +1;
883 
884   // Sanity-check that we got the length right
885   if(mem_ptr - ret_obj->mem != (obj_size - SER_BASE_SIZE)){
886     printf("Error: at end of p7_alidisplay_Deserialize, found strings to be of size %ld, expected %ld.\n", (long)(mem_ptr - ret_obj->mem), (long)(obj_size - SER_BASE_SIZE));
887     return eslEINVAL;
888   }
889   *n += obj_size;
890   return eslOK;  // as usual, if we get to the end of the routine without failing, return success
891 
892   ERROR: // only get here if we can't allocate memory
893     return eslEMEM;
894 }
895 
896 /* Function:  p7_alidisplay_Serialize()
897  * Synopsis:  Serialize a P7_ALIDISPLAY, using internal memory.
898  *
899  * Purpose:   Serialize the <P7_ALIDISPLAY> <ad>, internally converting
900  *            all its variable-length allocations to a single
901  *            contiguous memory allocation. Serialization aids
902  *            interprocess communication.
903  *
904  *            If <ad> is already serialized, do nothing.
905  *
906  * Args:      ad  - alidisplay to serialize
907  *
908  * Returns:   <eslOK> on success.
909  *
910  * Throws:    <eslEMEM> on allocation failure, and <ad> is restored to
911  *            its original (deserialized) state.
912  */
913 int
p7_alidisplay_Serialize_old(P7_ALIDISPLAY * ad)914 p7_alidisplay_Serialize_old(P7_ALIDISPLAY *ad)
915 {
916   int pos;
917   int n;
918   int status;
919 
920   if (ad->mem) return eslOK;	/* already serialized, so no-op */
921   ad->memsize = p7_alidisplay_Sizeof(ad) - sizeof(P7_ALIDISPLAY);
922   ESL_ALLOC(ad->mem, ad->memsize);
923 
924   /* allow no exceptions past this point, because API guarantees restore of original state upon error */
925 
926   pos = 0;
927   if (ad->rfline) { memcpy(ad->mem+pos, ad->rfline, ad->N+1); free(ad->rfline); ad->rfline = ad->mem+pos;  pos += ad->N+1; }
928   if (ad->mmline) { memcpy(ad->mem+pos, ad->mmline, ad->N+1); free(ad->mmline); ad->mmline = ad->mem+pos;  pos += ad->N+1; }
929   if (ad->csline) { memcpy(ad->mem+pos, ad->csline, ad->N+1); free(ad->csline); ad->csline = ad->mem+pos;  pos += ad->N+1; }
930   memcpy(ad->mem+pos, ad->model,  ad->N+1); free(ad->model); ad->model = ad->mem+pos; pos += ad->N+1;
931   memcpy(ad->mem+pos, ad->mline,  ad->N+1); free(ad->mline); ad->mline = ad->mem+pos; pos += ad->N+1;
932   memcpy(ad->mem+pos, ad->aseq,   ad->N+1); free(ad->aseq);  ad->aseq  = ad->mem+pos; pos += ad->N+1;
933   if (ad->ntseq)  { memcpy(ad->mem+pos, ad->ntseq, (3*ad->N)+1); free(ad->ntseq);  ad->ntseq  = ad->mem+pos; pos += (3*ad->N)+1; }
934   if (ad->ppline) { memcpy(ad->mem+pos, ad->ppline, ad->N+1); free(ad->ppline); ad->ppline = ad->mem+pos;  pos += ad->N+1; }
935   n = 1 + strlen(ad->hmmname);  memcpy(ad->mem + pos, ad->hmmname, n); free(ad->hmmname); ad->hmmname = ad->mem+pos; pos += n;
936   n = 1 + strlen(ad->hmmacc);   memcpy(ad->mem + pos, ad->hmmacc,  n); free(ad->hmmacc);  ad->hmmacc  = ad->mem+pos; pos += n;
937   n = 1 + strlen(ad->hmmdesc);  memcpy(ad->mem + pos, ad->hmmdesc, n); free(ad->hmmdesc); ad->hmmdesc = ad->mem+pos; pos += n;
938   n = 1 + strlen(ad->sqname);   memcpy(ad->mem + pos, ad->sqname,  n); free(ad->sqname);  ad->sqname  = ad->mem+pos; pos += n;
939   n = 1 + strlen(ad->sqacc);    memcpy(ad->mem + pos, ad->sqacc,   n); free(ad->sqacc);   ad->sqacc   = ad->mem+pos; pos += n;
940   n = 1 + strlen(ad->sqdesc);   memcpy(ad->mem + pos, ad->sqdesc,  n); free(ad->sqdesc);  ad->sqdesc  = ad->mem+pos; pos += n;
941 
942   return eslOK;
943 
944  ERROR:
945   if (ad->mem) { free(ad->mem); ad->mem = NULL; }
946   return status;
947 }
948 
949 /* Function:  p7_alidisplay_Deserialize()
950  * Synopsis:  Deserialize a P7_ALIDISPLAY, using internal memory.
951  *
952  * Purpose:   Deserialize the <P7_ALIDISPLAY> <ad>, converting its internal
953  *            allocations from a single contiguous memory chunk to individual
954  *            variable-length allocations. Deserialization facilitates
955  *            reallocation/editing of individual elements of the display.
956  *
957  *            If <ad> is already deserialized, do nothing.
958  *
959  * Args:      ad - alidisplay to serialize
960  *
961  * Returns:   <eslOK> on success
962  *
963  * Throws:    <eslEMEM> on allocation failure, and <ad> is restored to
964  *            its original (serialized) state.
965  */
966 int
p7_alidisplay_Deserialize_old(P7_ALIDISPLAY * ad)967 p7_alidisplay_Deserialize_old(P7_ALIDISPLAY *ad)
968 {
969   int pos;
970   int n;
971   int status;
972 
973   if (ad->mem == NULL) return eslOK; /* already deserialized, so no-op */
974 
975   pos = 0;
976   if (ad->rfline) { ESL_ALLOC(ad->rfline, sizeof(char) * ad->N+1); memcpy(ad->rfline, ad->mem+pos, ad->N+1); pos += ad->N+1; }
977   if (ad->mmline) { ESL_ALLOC(ad->mmline, sizeof(char) * ad->N+1); memcpy(ad->mmline, ad->mem+pos, ad->N+1); pos += ad->N+1; }
978   if (ad->csline) { ESL_ALLOC(ad->csline, sizeof(char) * ad->N+1); memcpy(ad->csline, ad->mem+pos, ad->N+1); pos += ad->N+1; }
979   ESL_ALLOC(ad->model, sizeof(char) * ad->N+1); memcpy(ad->model, ad->mem+pos, ad->N+1); pos += ad->N+1;
980   ESL_ALLOC(ad->mline, sizeof(char) * ad->N+1); memcpy(ad->mline, ad->mem+pos, ad->N+1); pos += ad->N+1;
981   ESL_ALLOC(ad->aseq,  sizeof(char) * ad->N+1); memcpy(ad->aseq,  ad->mem+pos, ad->N+1); pos += ad->N+1;
982   if (ad->ntseq)  { ESL_ALLOC(ad->ntseq,  sizeof(char) * (3*ad->N)+1); memcpy(ad->ntseq,  ad->mem+pos, (3*ad->N)+1); pos += (3*ad->N)+1; }
983   if (ad->ppline) { ESL_ALLOC(ad->ppline, sizeof(char) * ad->N+1); memcpy(ad->ppline, ad->mem+pos, ad->N+1); pos += ad->N+1; }
984   n = 1 + strlen(ad->mem+pos);  ESL_ALLOC(ad->hmmname,  sizeof(char) * n); memcpy(ad->hmmname,  ad->mem+pos, n); pos += n;
985   n = 1 + strlen(ad->mem+pos);  ESL_ALLOC(ad->hmmacc,   sizeof(char) * n); memcpy(ad->hmmacc,   ad->mem+pos, n); pos += n;
986   n = 1 + strlen(ad->mem+pos);  ESL_ALLOC(ad->hmmdesc,  sizeof(char) * n); memcpy(ad->hmmdesc,  ad->mem+pos, n); pos += n;
987   n = 1 + strlen(ad->mem+pos);  ESL_ALLOC(ad->sqname,   sizeof(char) * n); memcpy(ad->sqname,   ad->mem+pos, n); pos += n;
988   n = 1 + strlen(ad->mem+pos);  ESL_ALLOC(ad->sqacc,    sizeof(char) * n); memcpy(ad->sqacc,    ad->mem+pos, n); pos += n;
989   n = 1 + strlen(ad->mem+pos);  ESL_ALLOC(ad->sqdesc,   sizeof(char) * n); memcpy(ad->sqdesc,   ad->mem+pos, n); pos += n;
990 
991   free(ad->mem);
992   ad->mem     = NULL;
993   ad->memsize = 0;
994   return eslOK;
995 
996  ERROR:
997   /* restore serialized state, if an alloc fails. tedious, if not nontrivial. */
998   /* the pointers are non-NULL whether we just allocated them or if they're pointing into mem, so we have to check against mem+pos */
999   pos = 0;
1000   if (ad->rfline) { if (ad->rfline != ad->mem+pos) { free(ad->rfline); ad->rfline = ad->mem+pos; }  pos += ad->N+1; }
1001   if (ad->mmline) { if (ad->mmline != ad->mem+pos) { free(ad->mmline); ad->mmline = ad->mem+pos; }  pos += ad->N+1; }
1002   if (ad->csline) { if (ad->csline != ad->mem+pos) { free(ad->csline); ad->csline = ad->mem+pos; }  pos += ad->N+1; }
1003   if (ad->model != ad->mem+pos) { free(ad->model); ad->model = ad->mem+pos; }  pos += ad->N+1;
1004   if (ad->mline != ad->mem+pos) { free(ad->mline); ad->mline = ad->mem+pos; }  pos += ad->N+1;
1005   if (ad->aseq  != ad->mem+pos) { free(ad->aseq);  ad->aseq  = ad->mem+pos; }  pos += ad->N+1;
1006   if (ad->ntseq)  { if (ad->ntseq  != ad->mem+pos) { free(ad->ntseq);  ad->ntseq  = ad->mem+pos; }  pos += (3*ad->N)+1; }
1007   if (ad->ppline) { if (ad->ppline != ad->mem+pos) { free(ad->ppline); ad->ppline = ad->mem+pos; }  pos += ad->N+1; }
1008 
1009   n = 1 + strlen(ad->hmmname);  if (ad->hmmname != ad->mem+pos) { free(ad->hmmname); ad->hmmname = ad->mem+pos; }  pos += n;
1010   n = 1 + strlen(ad->hmmacc);   if (ad->hmmacc  != ad->mem+pos) { free(ad->hmmacc);  ad->hmmacc  = ad->mem+pos; }  pos += n;
1011   n = 1 + strlen(ad->hmmname);  if (ad->hmmdesc != ad->mem+pos) { free(ad->hmmdesc); ad->hmmdesc = ad->mem+pos; }  pos += n;
1012   n = 1 + strlen(ad->sqname);   if (ad->sqname  != ad->mem+pos) { free(ad->sqname);  ad->sqname = ad->mem+pos;  }  pos += n;
1013   n = 1 + strlen(ad->sqacc);    if (ad->sqacc   != ad->mem+pos) { free(ad->sqacc);   ad->sqacc  = ad->mem+pos;  }  pos += n;
1014   n = 1 + strlen(ad->sqname);   if (ad->sqdesc  != ad->mem+pos) { free(ad->sqdesc);  ad->sqdesc = ad->mem+pos;  }  pos += n;
1015   return status;
1016 }
1017 
1018 
1019 /* Function:  p7_alidisplay_Destroy()
1020  * Synopsis:  Frees a <P7_ALIDISPLAY>
1021  */
1022 void
p7_alidisplay_Destroy(P7_ALIDISPLAY * ad)1023 p7_alidisplay_Destroy(P7_ALIDISPLAY *ad)
1024 {
1025   if (ad == NULL) return;
1026   if (ad->mem)
1027     {	/* serialized form */
1028       free(ad->mem);
1029     }
1030   else
1031     {	/* deserialized form */
1032       if (ad->rfline)  free(ad->rfline);
1033       if (ad->mmline)  free(ad->mmline);
1034       if (ad->csline)  free(ad->csline);
1035       if (ad->model)   free(ad->model);
1036       if (ad->mline)   free(ad->mline);
1037       if (ad->aseq)    free(ad->aseq);
1038       if (ad->ntseq)   free(ad->ntseq);
1039       if (ad->ppline)  free(ad->ppline);
1040       if (ad->hmmname) free(ad->hmmname);
1041       if (ad->hmmacc)  free(ad->hmmacc);
1042       if (ad->hmmdesc) free(ad->hmmdesc);
1043       if (ad->sqname)  free(ad->sqname);
1044       if (ad->sqacc)   free(ad->sqacc);
1045       if (ad->sqdesc)  free(ad->sqdesc);
1046     }
1047   free(ad);
1048 }
1049 /*---------------- end, alidisplay object -----------------------*/
1050 
1051 
1052 
1053 /*****************************************************************
1054  * 2. The P7_ALIDISPLAY API
1055  *****************************************************************/
1056 
1057 static int
integer_textwidth(long n)1058 integer_textwidth(long n)
1059 {
1060   int w = (n < 0)? 1 : 0;
1061   while (n != 0) { n /= 10; w++; }
1062   return w;
1063 }
1064 
1065 
1066 /* Function:  p7_alidisplay_EncodePostProb()
1067  * Synopsis:  Convert a posterior probability to a char code.
1068  *
1069  * Purpose:   Convert the posterior probability <p> to
1070  *            a character code suitable for Stockholm format
1071  *            <#=GC PP_cons> and <#=GR seqname PP> annotation
1072  *            lines. HMMER uses the same codes in alignment
1073  *            output.
1074  *
1075  *            Characters <0-9*> are used; $0.0 \leq p < 0.05$
1076  *            is coded as 0, $0.05 \leq p < 0.15$ is coded as
1077  *            1, ... and so on ..., $0.85 \leq p < 0.95$ is
1078  *            coded as 9, and $0.95 \leq p \leq 1.0$ is coded
1079  *            as '*'.
1080  *
1081  * Returns:   the encoded character.
1082  */
1083 char
p7_alidisplay_EncodePostProb(float p)1084 p7_alidisplay_EncodePostProb(float p)
1085 {
1086   return (p + 0.05 >= 1.0) ? '*' :  (char) ((p + 0.05) * 10.0) + '0';
1087 }
1088 
1089 /* Function:  p7_alidisplay_DecodePostProb()
1090  * Synopsis:  Convert a char code post prob to an approx float.
1091  *
1092  * Purpose:   Convert posterior probability code <pc>, which
1093  *            is [0-9*], to an approximate floating point probability.
1094  *
1095  *            The result is crude, because <pc> has already discretized
1096  *            with loss of precision. We require that
1097  *            <p7_alidisplay_EncodePostProb(p7_alidisplay_DecodePostProb(pc)) == pc>,
1098  *            and that <pc=='0'> decodes to a nonzero probability just to
1099  *            avoid any possible absorbing-zero artifacts.
1100  *
1101  * Returns:   the decoded real-valued approximate probability.
1102  */
1103 float
p7_alidisplay_DecodePostProb(char pc)1104 p7_alidisplay_DecodePostProb(char pc)
1105 {
1106   if      (pc == '0') return 0.01;
1107   else if (pc == '*') return 1.0;
1108   else if (pc == '.') return 0.0;
1109   else                return ((float) (pc - '0') / 10.);
1110 }
1111 
1112 
1113 /* Function:  p7_alidisplay_Print()
1114  * Synopsis:  Human readable output of <P7_ALIDISPLAY>
1115  *
1116  * Purpose:   Prints alignment <ad> to stream <fp>.
1117  *
1118  *            Put at least <min_aliwidth> alignment characters per
1119  *            line; try to make lines no longer than <linewidth>
1120  *            characters, including name, coords, and spacing.  The
1121  *            width of lines may exceed <linewidth>, if that's what it
1122  *            takes to put a name, coords, and <min_aliwidth>
1123  *            characters of alignment on a line.
1124  *
1125  *            As a special case, if <linewidth> is negative or 0, then
1126  *            alignments are formatted in a single block of unlimited
1127  *            line length.
1128  *
1129  * Returns:   <eslOK> on success.
1130  *
1131  * Throws:    <eslEWRITE> on write error, such as filling the disk.
1132  */
1133 int
p7_alidisplay_Print(FILE * fp,P7_ALIDISPLAY * ad,int min_aliwidth,int linewidth,P7_PIPELINE * pli)1134 p7_alidisplay_Print(FILE *fp, P7_ALIDISPLAY *ad, int min_aliwidth, int linewidth, P7_PIPELINE *pli)
1135 {
1136    int status;
1137    if ((status = p7_nontranslated_alidisplay_Print(fp, ad, min_aliwidth, linewidth, pli->show_accessions)) != eslOK) return status;
1138 
1139 	return status;
1140 }
1141 
1142 /* Function:  p7_nontranslated_alidisplay_Print()
1143  * Synopsis:  Human readable output of <P7_ALIDISPLAY>
1144  *
1145  * Purpose:   Prints alignment <ad> to stream <fp>.
1146  *
1147  *            Put at least <min_aliwidth> alignment characters per
1148  *            line; try to make lines no longer than <linewidth>
1149  *            characters, including name, coords, and spacing.  The
1150  *            width of lines may exceed <linewidth>, if that's what it
1151  *            takes to put a name, coords, and <min_aliwidth>
1152  *            characters of alignment on a line.
1153  *
1154  *            As a special case, if <linewidth> is negative or 0, then
1155  *            alignments are formatted in a single block of unlimited
1156  *            line length.
1157  *
1158  * Returns:   <eslOK> on success.
1159  *
1160  * Throws:    <eslEWRITE> on write error, such as filling the disk.
1161  */
1162 int
p7_nontranslated_alidisplay_Print(FILE * fp,P7_ALIDISPLAY * ad,int min_aliwidth,int linewidth,int show_accessions)1163 p7_nontranslated_alidisplay_Print(FILE *fp, P7_ALIDISPLAY *ad, int min_aliwidth, int linewidth, int show_accessions)
1164 {
1165   char *buf          = NULL;
1166   char *show_hmmname = NULL;
1167   char *show_seqname = NULL;
1168   int   namewidth, coordwidth, aliwidth;
1169   int   pos;
1170   int   status;
1171   int   ni, nk;
1172   int   z;
1173   long  i1,i2;
1174   int   k1,k2;
1175 
1176   /* implement the --acc option for preferring accessions over names in output  */
1177   show_hmmname = (show_accessions && ad->hmmacc[0] != '\0') ? ad->hmmacc : ad->hmmname;
1178   show_seqname = (show_accessions && ad->sqacc[0]  != '\0') ? ad->sqacc  : ad->sqname;
1179 
1180   /* dynamically size the output lines */
1181   namewidth  = ESL_MAX(strlen(show_hmmname), strlen(show_seqname));
1182   coordwidth = ESL_MAX(ESL_MAX(integer_textwidth(ad->hmmfrom),
1183                               integer_textwidth(ad->hmmto)),
1184                       ESL_MAX(integer_textwidth(ad->sqfrom),
1185                               integer_textwidth(ad->sqto)));
1186 
1187   aliwidth   = (linewidth > 0) ? linewidth - namewidth - 2*coordwidth - 5 : ad->N;
1188   if (aliwidth < ad->N && aliwidth < min_aliwidth) aliwidth = min_aliwidth; /* at least, regardless of some silly linewidth setting */
1189   ESL_ALLOC(buf, sizeof(char) * (aliwidth+1));
1190   buf[aliwidth] = 0;
1191 
1192   /* Break the alignment into multiple blocks of width aliwidth for printing */
1193   i1 = ad->sqfrom;
1194   k1 = ad->hmmfrom;
1195   for (pos = 0; pos < ad->N; pos += aliwidth)
1196     {
1197       if (pos > 0) { if (fprintf(fp, "\n") < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "alignment display write failed"); } /* blank line betweeen blocks */
1198 
1199       ni = nk = 0;
1200       for (z = pos; z < pos + aliwidth && z < ad->N; z++) {
1201         if (ad->model[z] != '.') nk++; /* k advances except on insert states */
1202         if (ad->aseq[z]  != '-') ni++; /* i advances except on delete states */
1203       }
1204 
1205       k2 = k1+nk-1;
1206       if (ad->sqfrom < ad->sqto) i2 = i1+ni-1;
1207       else                       i2 = i1-ni+1; // revcomp hit for DNA
1208 
1209       if (ad->csline != NULL) { strncpy(buf, ad->csline+pos, aliwidth); if (fprintf(fp, "  %*s %s CS\n", namewidth+coordwidth+1, "", buf) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "alignment display write failed"); }
1210       if (ad->rfline != NULL) { strncpy(buf, ad->rfline+pos, aliwidth); if (fprintf(fp, "  %*s %s RF\n", namewidth+coordwidth+1, "", buf) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "alignment display write failed"); }
1211       if (ad->mmline != NULL) { strncpy(buf, ad->mmline+pos, aliwidth); if (fprintf(fp, "  %*s %s MM\n", namewidth+coordwidth+1, "", buf) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "alignment display write failed"); }
1212 
1213       strncpy(buf, ad->model+pos, aliwidth); if (fprintf(fp, "  %*s %*d %s %-*d\n", namewidth,  show_hmmname, coordwidth, k1, buf, coordwidth, k2) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "alignment display write failed");
1214       strncpy(buf, ad->mline+pos, aliwidth); if (fprintf(fp, "  %*s %s\n", namewidth+coordwidth+1, " ", buf)                                       < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "alignment display write failed");
1215 
1216       if (ni > 0) { strncpy(buf, ad->aseq+pos, aliwidth); if (fprintf(fp, "  %*s %*ld %s %-*ld\n", namewidth, show_seqname, coordwidth, i1,  buf, coordwidth, i2)  < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "alignment display write failed");  }
1217       else        { strncpy(buf, ad->aseq+pos, aliwidth); if (fprintf(fp, "  %*s %*s %s %*s\n",    namewidth, show_seqname, coordwidth, "-", buf, coordwidth, "-") < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "alignment display write failed");  }
1218 
1219       if (ad->ppline != NULL)  { strncpy(buf, ad->ppline+pos, aliwidth);  if (fprintf(fp, "  %*s %s PP\n", namewidth+coordwidth+1, "", buf)  < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "alignment display write failed");  }
1220 
1221       k1 += nk;
1222       if   (ad->sqfrom < ad->sqto)  i1 += ni;
1223       else                          i1 -= ni;  // revcomp hit for DNA
1224     }
1225   fflush(fp);
1226   free(buf);
1227   return eslOK;
1228 
1229  ERROR:
1230   if (buf) free(buf);
1231   return status;
1232 }
1233 
1234 /* Function:  p7_alidisplay_Backconvert()
1235  * Synopsis:  Convert an alidisplay to a faux trace and subsequence.
1236  *
1237  * Purpose:   Convert alignment display object <ad> to a faux subsequence
1238  *            and faux subsequence trace, returning them in <ret_sq> and
1239  *            <ret_tr>.
1240  *
1241  *            The subsequence <*ret_sq> is digital; ascii residues in
1242  *            <ad> are digitized using digital alphabet <abc>.
1243  *
1244  *            The subsequence and trace are suitable for passing as
1245  *            array elements to <p7_tracealign_Seqs>. This is the
1246  *            main purpose of backconversion. Results of a profile
1247  *            search are stored in a hit list as a processed
1248  *            <P7_ALIDISPLAY>, not as a <P7_TRACE> and <ESL_SQ>, to
1249  *            reduce space and to reduce communication overhead in
1250  *            parallelized search implementations. After reduction
1251  *            to a final hit list, a master may want to construct a
1252  *            multiple alignment of all the significant hits.
1253  *
1254  * Returns:   <eslOK> on success.
1255  *
1256  * Throws:    <eslEMEM> on allocation failures. <eslECORRUPT> on unexpected internal
1257  *            data corruption. On any exception, <*ret_sq> and <*ret_tr> are
1258  *            <NULL>.
1259  *
1260  * Xref:      SRE:J4/29.
1261  */
1262 int
p7_alidisplay_Backconvert(const P7_ALIDISPLAY * ad,const ESL_ALPHABET * abc,ESL_SQ ** ret_sq,P7_TRACE ** ret_tr)1263 p7_alidisplay_Backconvert(const P7_ALIDISPLAY *ad, const ESL_ALPHABET *abc, ESL_SQ **ret_sq, P7_TRACE **ret_tr)
1264 {
1265   ESL_SQ   *sq   = NULL;	/* RETURN: faux subsequence          */
1266   P7_TRACE *tr   = NULL;	/* RETURN: faux trace                */
1267   int       subL = 0;		/* subsequence length in the <ad>    */
1268   int       a, i, k;        	/* coords for <ad>, <sq->dsq>, model */
1269   char      s;   	        /* current state type: MDI           */
1270   int       status;
1271 
1272   /* Make a first pass over <ad> just to calculate subseq length */
1273   for (a = 0; a < ad->N; a++)
1274     if (! esl_abc_CIsGap(abc, ad->aseq[a])) subL++;
1275 
1276   /* Allocations */
1277   if ((sq = esl_sq_CreateDigital(abc)) == NULL)   { status = eslEMEM; goto ERROR; }
1278   if ((status = esl_sq_GrowTo(sq, subL)) != eslOK) goto ERROR;
1279 
1280   if ((tr = (ad->ppline == NULL) ?  p7_trace_Create() : p7_trace_CreateWithPP()) == NULL) { status = eslEMEM; goto ERROR; }
1281   if ((status = p7_trace_GrowTo(tr, subL+6)) != eslOK) goto ERROR;   /* +6 is for SNB/ECT */
1282 
1283   /* Construction of dsq, trace */
1284   sq->dsq[0] = eslDSQ_SENTINEL;
1285   if ((status = ((ad->ppline == NULL) ? p7_trace_Append(tr, p7T_S, 0, 0) : p7_trace_AppendWithPP(tr, p7T_S, 0, 0, 0.0))) != eslOK) goto ERROR;
1286   if ((status = ((ad->ppline == NULL) ? p7_trace_Append(tr, p7T_N, 0, 0) : p7_trace_AppendWithPP(tr, p7T_N, 0, 0, 0.0))) != eslOK) goto ERROR;
1287   if ((status = ((ad->ppline == NULL) ? p7_trace_Append(tr, p7T_B, 0, 0) : p7_trace_AppendWithPP(tr, p7T_B, 0, 0, 0.0))) != eslOK) goto ERROR;
1288   k = ad->hmmfrom - 1;   // -1 so the first M causes k to advance to <hmmfrom>.
1289   i = 1;                 //    ... which assumes <ad> always starts with M; currently true, all alis are local alis.
1290   for (a = 0; a < ad->N; a++)
1291     {
1292       /* here, anything that could appear in the original input
1293        * sequence, as opposed to an alignment gap, needs to be
1294        * reconstructed.  So, do not test for IsResidue(), because that
1295        * will fail to reconstruct * chars. use ! IsGap() instead.
1296        * [xref iss#135]
1297        */
1298       if   (! esl_abc_CIsGap(abc, ad->model[a])) { k++; s = (! esl_abc_CIsGap(abc, ad->aseq[a]) ? p7T_M : p7T_D); }
1299       else s = p7T_I;
1300 
1301       if ((status = ((ad->ppline == NULL) ? p7_trace_Append(tr, s, k, i) : p7_trace_AppendWithPP(tr, s, k, i, p7_alidisplay_DecodePostProb(ad->ppline[a])))) != eslOK) goto ERROR;
1302 
1303       switch (s) {
1304       case p7T_M: sq->dsq[i] = esl_abc_DigitizeSymbol(abc, ad->aseq[a]); i++; break;
1305       case p7T_I: sq->dsq[i] = esl_abc_DigitizeSymbol(abc, ad->aseq[a]); i++; break;
1306       case p7T_D:                                                             break;
1307       }
1308     }
1309   if ((status = ((ad->ppline == NULL) ? p7_trace_Append(tr, p7T_E, 0, 0) : p7_trace_AppendWithPP(tr, p7T_E, 0, 0, 0.0))) != eslOK) goto ERROR;
1310   if ((status = ((ad->ppline == NULL) ? p7_trace_Append(tr, p7T_C, 0, 0) : p7_trace_AppendWithPP(tr, p7T_C, 0, 0, 0.0))) != eslOK) goto ERROR;
1311   if ((status = ((ad->ppline == NULL) ? p7_trace_Append(tr, p7T_T, 0, 0) : p7_trace_AppendWithPP(tr, p7T_T, 0, 0, 0.0))) != eslOK) goto ERROR;
1312   sq->dsq[i] = eslDSQ_SENTINEL;
1313 
1314   /* some sanity checks */
1315   if (tr->N != ad->N + 6)  ESL_XEXCEPTION(eslECORRUPT, "backconverted trace ended up with unexpected size (%s/%s)",         ad->sqname, ad->hmmname);
1316   if (k     != ad->hmmto)  ESL_XEXCEPTION(eslECORRUPT, "backconverted trace didn't end at expected place on model (%s/%s)", ad->sqname, ad->hmmname);
1317   if (i     != subL+1)     ESL_XEXCEPTION(eslECORRUPT, "backconverted subseq didn't end at expected length (%s/%s)",        ad->sqname, ad->hmmname);
1318 
1319   /* Set up <sq> annotation as a subseq of a source sequence */
1320   if ((status = esl_sq_FormatName(sq, "%s/%" PRId64 "-%" PRId64 "", ad->sqname, ad->sqfrom, ad->sqto))                      != eslOK) goto ERROR;
1321   if ((status = esl_sq_FormatDesc(sq, "[subseq from] %s", ad->sqdesc[0] != '\0' ? ad->sqdesc : ad->sqname)) != eslOK) goto ERROR;
1322   if ((status = esl_sq_SetSource (sq, ad->sqname))                                                          != eslOK) goto ERROR;
1323   if (ad->sqacc[0]  != '\0') { if ((status = esl_sq_SetAccession  (sq, ad->sqacc)) != eslOK) goto ERROR; }
1324   sq->n     = subL;
1325   sq->start = ad->sqfrom;
1326   sq->end   = ad->sqto;
1327   sq->C     = 0;
1328   sq->W     = subL;
1329   sq->L     = ad->L;
1330 
1331   tr->M     = ad->M;
1332   tr->L     = ad->L;
1333 
1334   *ret_sq = sq;
1335   *ret_tr = tr;
1336   return eslOK;
1337 
1338  ERROR:
1339   if (sq != NULL) esl_sq_Destroy(sq);
1340   if (tr != NULL) p7_trace_Destroy(tr);
1341   *ret_sq = NULL;
1342   *ret_tr = NULL;
1343   return status;
1344 }
1345 /*------------------- end, alidisplay API -----------------------*/
1346 
1347 
1348 /*****************************************************************
1349  * 3. Debugging/dev code
1350  *****************************************************************/
1351 
1352 
1353 /* Function:  p7_alidisplay_Sample()
1354  * Synopsis:  Sample a random, ugly <P7_ALIDISPLAY> for test purposes
1355  * Incept:    SRE, Wed Feb 28 14:22:12 2018 [Caravan Palace, Dragons]
1356  *
1357  * Purpose:   Sample a random, dirty <P7_ALIDISPLAY> of length <N> for
1358  *            testing purposes, using random number generator <rng>.
1359  *            Return it through <ret_ad>. Caller frees.
1360  *
1361  *            P7_ALIDISPLAY is assumed to be a _local_ alignment.
1362  *            Must start with M, and end with M|D.
1363  *
1364  * Args:      rng    - random number generator
1365  *            N      - length of alignment
1366  *            ret_ad - RETURN: random sampled <P7_ALIDISPLAY>
1367  *
1368  * Returns:   <eslOK> on success, and <ret_ad> points to the new
1369  *            <P7_ALIDISPLAY>
1370  *
1371  * Throws:    <eslEMEM> on allocation error, and <ret_ad> is NULL.
1372  */
1373 int
p7_alidisplay_Sample(ESL_RANDOMNESS * rng,int N,P7_ALIDISPLAY ** ret_ad)1374 p7_alidisplay_Sample(ESL_RANDOMNESS *rng, int N, P7_ALIDISPLAY **ret_ad)
1375 {
1376   P7_ALIDISPLAY *ad            = NULL;
1377   char          *guidestring   = NULL;	/* string [0..N-1] composed of MDI */
1378   int            nM            = 0;
1379   int            nD            = 0;
1380   int            nI            = 0;
1381   enum p7t_statetype_e last_st;
1382   int            pos;
1383   int            status;
1384 
1385   ESL_ALLOC(guidestring, sizeof(char) * (N+1));
1386 
1387   guidestring[0] = 'M'; nM++; last_st = p7T_M; /* local alignments must start with M */
1388   for (pos = 1; pos < N-1; pos++)
1389     {
1390       switch (last_st)
1391 	{
1392 	case p7T_M:
1393 	  switch (esl_rnd_Roll(rng, 3))
1394 	    {
1395 	    case 0: guidestring[pos] = 'M'; nM++; last_st = p7T_M; break;
1396 	    case 1: guidestring[pos] = 'D'; nD++; last_st = p7T_D; break;
1397 	    case 2: guidestring[pos] = 'I'; nI++; last_st = p7T_I; break;
1398 	    }
1399 	  break;
1400 
1401 	case p7T_I:
1402 	  switch (esl_rnd_Roll(rng, 2))
1403 	    {
1404 	    case 0: guidestring[pos] = 'M'; nM++; last_st = p7T_M; break;
1405 	    case 1: guidestring[pos] = 'I'; nI++; last_st = p7T_I; break;
1406 	    }
1407 	  break;
1408 
1409 	case p7T_D:
1410 	  switch (esl_rnd_Roll(rng, 2))
1411 	    {
1412 	    case 0: guidestring[pos] = 'M'; nM++; last_st = p7T_M; break;
1413 	    case 1: guidestring[pos] = 'D'; nD++; last_st = p7T_D; break;
1414 	    }
1415 	  break;
1416 
1417 	default:
1418 	  break;
1419 	}
1420     }
1421   /* local alignments can end on M or D. (optimal local alignments can only end on M) */
1422   switch (last_st) {
1423   case p7T_I:
1424     guidestring[N-1] = 'M';  nM++;  break;
1425   default:
1426     switch (esl_rnd_Roll(rng, 2)) {
1427     case 0: guidestring[N-1] = 'M'; nM++; break;
1428     case 1: guidestring[N-1] = 'D'; nD++; break;
1429     }
1430     break;
1431   }
1432   guidestring[N] = '\0';
1433 
1434   ESL_ALLOC(ad, sizeof(P7_ALIDISPLAY));
1435   ad->rfline  = ad->mmline = ad->csline = ad->model   = ad->mline  = ad->aseq = ad->ntseq = ad->ppline = NULL;
1436   ad->hmmname = ad->hmmacc = ad->hmmdesc = NULL;
1437   ad->sqname  = ad->sqacc  = ad->sqdesc  = NULL;
1438   ad->mem     = NULL;
1439   ad->memsize = 0;
1440 
1441   /* Optional lines are added w/ 50% chance */
1442   if (esl_rnd_Roll(rng, 2) == 0)  ESL_ALLOC(ad->rfline, sizeof(char) * (N+1));
1443   if (esl_rnd_Roll(rng, 2) == 0)  ESL_ALLOC(ad->mmline, sizeof(char) * (N+1));
1444   if (esl_rnd_Roll(rng, 2) == 0)  ESL_ALLOC(ad->csline, sizeof(char) * (N+1));
1445   if (esl_rnd_Roll(rng, 2) == 0)  ESL_ALLOC(ad->ppline, sizeof(char) * (N+1));
1446   ESL_ALLOC(ad->model, sizeof(char) * (N+1));
1447   ESL_ALLOC(ad->mline, sizeof(char) * (N+1));
1448   ESL_ALLOC(ad->aseq,  sizeof(char) * (N+1));
1449   ad->N = N;
1450 
1451   esl_strdup("my_hmm", -1, &(ad->hmmname));
1452   if (esl_rnd_Roll(rng, 2) == 0) esl_strdup("PF000007",          -1, &(ad->hmmacc));  else esl_strdup("", -1, &(ad->hmmacc));
1453   if (esl_rnd_Roll(rng, 2) == 0) esl_strdup("(hmm description)", -1, &(ad->hmmdesc)); else esl_strdup("", -1, &(ad->hmmdesc));
1454 
1455   esl_strdup("my_seq", -1, &(ad->sqname));
1456   if (esl_rnd_Roll(rng, 2) == 0) esl_strdup("ABC000001.42",           -1, &(ad->sqacc));  else esl_strdup("", -1, &(ad->sqacc));
1457   if (esl_rnd_Roll(rng, 2) == 0) esl_strdup("(sequence description)", -1, &(ad->sqdesc)); else esl_strdup("", -1, &(ad->sqdesc));
1458 
1459   /* model, seq coords must look valid. */
1460   ad->hmmfrom = 100;
1461   ad->hmmto   = ad->hmmfrom + nM + nD - 1;
1462   ad->M       = ad->hmmto + esl_rnd_Roll(rng, 2);
1463 
1464   ad->sqfrom  = 1000;
1465   ad->sqto    = ad->sqfrom + nM + nI - 1;
1466   ad->L       = ad->sqto + esl_rnd_Roll(rng, 2);
1467 
1468   /* rfline is free-char "reference annotation" on consensus; H3 puts '.' for inserts. */
1469   if (ad->rfline) {
1470     for (pos = 0; pos < N; pos++)
1471       ad->rfline[pos] = (guidestring[pos] == 'I' ? '.' : 'x');
1472     ad->rfline[pos] = '\0';
1473   }
1474 
1475   /* mmline indicates which columns should be masked (assigned background distribution), '.' indicates no mask; H3 puts '.' for inserts. */
1476   if (ad->mmline) {
1477     for (pos = 0; pos < N; pos++)
1478       ad->mmline[pos] = (guidestring[pos] == 'I' ? '.' : '.');
1479     ad->mmline[pos] = '\0';
1480   }
1481 
1482   /* csline is optional. It has free-char "consensus structure annotation" on consensus positions. H3 puts '.' on inserts. */
1483   if (ad->csline) {
1484     for (pos = 0; pos < N; pos++)
1485       ad->csline[pos] = (guidestring[pos] == 'I' ? '.' : 'X');
1486     ad->csline[pos] = '\0';
1487   }
1488 
1489   /* the mandatory three-line alignment display:
1490    *
1491    *   guidestring:    MMMDI
1492    *   model:          XXXX.
1493    *   mline:          A+
1494    *   aseq:           AAA-a
1495    */
1496   for (pos = 0; pos < N; pos++)
1497     {
1498       switch (guidestring[pos]) {
1499       case 'M':
1500 	ad->model[pos] = 'X';
1501 	switch (esl_rnd_Roll(rng, 3)) {
1502 	case 0: ad->mline[pos] = 'A';
1503 	case 1: ad->mline[pos] = '+';
1504 	case 2: ad->mline[pos] = ' ';
1505 	}
1506 	if (ad->mline[pos] == ' ' && esl_rnd_Roll(rng, 50) == 0) ad->aseq[pos] = '*';  // dirty aligned sequence up with nasty * stop codons, about 1/(3*50) of the time.
1507 	else                                                     ad->aseq[pos] = 'A';  // ... they would only be aligned to ' ' on an mline.
1508 	break;                                                                         // ... they might appear aligned to a match or insert state (see iss#135)
1509 
1510       case 'D':
1511 	ad->model[pos] = 'X';
1512 	ad->mline[pos] = ' ';
1513 	ad->aseq[pos]  = '-';
1514 	break;
1515 
1516       case 'I':
1517 	ad->model[pos] = '.';
1518 	ad->mline[pos] = ' ';
1519 	ad->aseq[pos]  = 'a';
1520 	break;
1521       }
1522     }
1523   ad->model[pos] = '\0';
1524   ad->mline[pos] = '\0';
1525   ad->aseq[pos]  = '\0';
1526 
1527   /* ppline is optional */
1528   if (ad->ppline) {
1529     for (pos = 0; pos < N; pos++)
1530       ad->ppline[pos] = (guidestring[pos] == 'D' ? '.' : p7_alidisplay_EncodePostProb(esl_random(rng)));
1531     ad->ppline[pos] = '\0';
1532   }
1533 
1534   free(guidestring);
1535   *ret_ad = ad;
1536   return eslOK;
1537 
1538  ERROR:
1539   if (guidestring) free(guidestring);
1540   if (ad)          p7_alidisplay_Destroy(ad);
1541   *ret_ad = NULL;
1542   return status;
1543 }
1544 
1545 
1546 
1547 /* Function:  p7_alidisplay_Dump()
1548  * Synopsis:  Print contents of P7_ALIDISPLAY for inspection.
1549  *
1550  * Purpose:   Print contents of the <P7_ALIDISPLAY> <ad> to
1551  *            stream <fp> for inspection. Includes all elements
1552  *            of the structure, whether the object is allocated
1553  *            in serialized or deserialized form, and the total
1554  *            size of the object in bytes.
1555  *
1556  * Returns:   <eslOK>
1557  */
1558 int
p7_alidisplay_Dump(FILE * fp,const P7_ALIDISPLAY * ad)1559 p7_alidisplay_Dump(FILE *fp, const P7_ALIDISPLAY *ad)
1560 {
1561   fprintf(fp, "P7_ALIDISPLAY dump\n");
1562   fprintf(fp, "------------------\n");
1563 
1564   fprintf(fp, "rfline  = %s\n", ad->rfline ? ad->rfline : "[none]");
1565   fprintf(fp, "mmline  = %s\n", ad->mmline ? ad->mmline : "[none]");
1566   fprintf(fp, "csline  = %s\n", ad->csline ? ad->csline : "[none]");
1567   fprintf(fp, "model   = %s\n", ad->model);
1568   fprintf(fp, "mline   = %s\n", ad->mline);
1569   fprintf(fp, "aseq    = %s\n", ad->aseq);
1570   fprintf(fp, "N       = %d\n", ad->N);
1571   fprintf(fp, "\n");
1572 
1573   fprintf(fp, "hmmname = %s\n", ad->hmmname);
1574   fprintf(fp, "hmmacc  = %s\n", ad->hmmacc[0]  == '\0' ? "[none]" : ad->hmmacc);
1575   fprintf(fp, "hmmdesc = %s\n", ad->hmmdesc[0] == '\0' ? "[none]" : ad->hmmdesc);
1576   fprintf(fp, "hmmfrom = %d\n", ad->hmmfrom);
1577   fprintf(fp, "hmmto   = %d\n", ad->hmmto);
1578   fprintf(fp, "M       = %d\n", ad->M);
1579   fprintf(fp, "\n");
1580 
1581   fprintf(fp, "sqname  = %s\n",  ad->sqname);
1582   fprintf(fp, "sqacc   = %s\n",  ad->sqacc[0]  == '\0' ? "[none]" : ad->sqacc);
1583   fprintf(fp, "sqdesc  = %s\n",  ad->sqdesc[0] == '\0' ? "[none]" : ad->sqdesc);
1584   fprintf(fp, "sqfrom  = %" PRId64 "\n", ad->sqfrom);
1585   fprintf(fp, "sqto    = %" PRId64 "\n", ad->sqto);
1586   fprintf(fp, "L       = %" PRId64 "\n", ad->L);
1587   fprintf(fp, "\n");
1588 
1589   fprintf(fp, "size    = %d bytes\n",  (int) p7_alidisplay_Sizeof(ad));
1590   fprintf(fp, "%s\n", ad->mem ? "serialized" : "not serialized");
1591   return eslOK;
1592 }
1593 
1594 /* Function:  p7_alidisplay_Compare()
1595  * Synopsis:  Compare two <P7_ALIDISPLAY> objects for equality
1596  *
1597  * Purpose:   Compare alignment displays <ad1> and <ad2> for
1598  *            equality. Return <eslOK> if they have identical
1599  *            contents; <eslFAIL> if not.
1600  *
1601  *            Only contents matter, not serialization status;
1602  *            a serialized and deserialized version of the same
1603  *            alidisplay will compare identical.
1604  */
1605 int
p7_alidisplay_Compare(const P7_ALIDISPLAY * ad1,const P7_ALIDISPLAY * ad2)1606 p7_alidisplay_Compare(const P7_ALIDISPLAY *ad1, const P7_ALIDISPLAY *ad2)
1607 {
1608   if (ad1->mem && ad2->mem)	/* both objects serialized */
1609     {
1610       if (ad1->memsize != ad2->memsize)                  return eslFAIL;
1611       if (memcmp(ad1->mem, ad2->mem, ad1->memsize) != 0) return eslFAIL;
1612     }
1613 
1614   if (esl_strcmp(ad1->rfline,  ad2->rfline)  != eslOK) return eslFAIL;
1615   if (esl_strcmp(ad1->mmline,  ad2->mmline)  != eslOK) return eslFAIL;
1616   if (esl_strcmp(ad1->csline,  ad2->csline)  != eslOK) return eslFAIL;
1617   if (esl_strcmp(ad1->model,   ad2->model)   != eslOK) return eslFAIL;
1618   if (esl_strcmp(ad1->mline,   ad2->mline)   != eslOK) return eslFAIL;
1619   if (esl_strcmp(ad1->aseq,    ad2->aseq)    != eslOK) return eslFAIL;
1620   if (esl_strcmp(ad1->ntseq,   ad2->ntseq)   != eslOK) return eslFAIL;
1621   if (esl_strcmp(ad1->ppline,  ad2->ppline)  != eslOK) return eslFAIL;
1622   if (ad1->N != ad2->N)                                return eslFAIL;
1623 
1624   if (esl_strcmp(ad1->hmmname, ad2->hmmname) != eslOK) return eslFAIL;
1625   if (esl_strcmp(ad1->hmmacc,  ad2->hmmacc)  != eslOK) return eslFAIL;
1626   if (esl_strcmp(ad1->hmmdesc, ad2->hmmdesc) != eslOK) return eslFAIL;
1627   if (ad1->hmmfrom != ad2->hmmfrom)                    return eslFAIL;
1628   if (ad1->hmmto   != ad2->hmmto)                      return eslFAIL;
1629   if (ad1->M       != ad2->M)                          return eslFAIL;
1630 
1631   if (esl_strcmp(ad1->sqname,  ad2->sqname)  != eslOK) return eslFAIL;
1632   if (esl_strcmp(ad1->sqacc,   ad2->sqacc)   != eslOK) return eslFAIL;
1633   if (esl_strcmp(ad1->sqdesc,  ad2->sqdesc)  != eslOK) return eslFAIL;
1634   if (ad1->sqfrom != ad2->sqfrom)                      return eslFAIL;
1635   if (ad1->sqto   != ad2->sqto)                        return eslFAIL;
1636   if (ad1->M      != ad2->M)                           return eslFAIL;
1637 
1638   return eslOK;
1639 }
1640 
1641 
1642 /*-------------- end, debugging/dev code ------------------------*/
1643 
1644 
1645 
1646 /*****************************************************************
1647  * 4. Benchmark driver.
1648  *****************************************************************/
1649 #ifdef p7ALIDISPLAY_BENCHMARK
1650 /*
1651    gcc -o benchmark-alidisplay -std=gnu99 -g -Wall -O2 -I. -L. -I../easel -L../easel -Dp7ALIDISPLAY_BENCHMARK p7_alidisplay.c -lhmmer -leasel -lm
1652 
1653    ./benchmark-alidisplay <hmmfile>     runs benchmark
1654    ./benchmark-alidisplay -b <hmmfile>  gets baseline time to subtract: just random trace generation
1655  */
1656 #include "p7_config.h"
1657 
1658 #include "easel.h"
1659 #include "esl_alphabet.h"
1660 #include "esl_getopts.h"
1661 #include "esl_random.h"
1662 #include "esl_stopwatch.h"
1663 
1664 #include "hmmer.h"
1665 
1666 static ESL_OPTIONS options[] = {
1667   /* name           type      default  env  range toggles reqs incomp  help                                       docgroup*/
1668   { "-h",        eslARG_NONE,   FALSE, NULL, NULL,  NULL,  NULL, NULL, "show brief help on version and usage",             0 },
1669   { "-b",        eslARG_NONE,   FALSE, NULL, NULL,  NULL,  NULL, NULL, "baseline timing",                                  0 },
1670   { "-p",        eslARG_NONE,   FALSE, NULL, NULL,  NULL,  NULL, NULL, "include fake PP line, just to see how it looks",   0 },
1671   { "-s",        eslARG_INT,     "42", NULL, NULL,  NULL,  NULL, NULL, "set random number seed to <n>",                    0 },
1672   { "-N",        eslARG_INT,  "50000", NULL, "n>0", NULL,  NULL, NULL, "number of traces to generate",                     0 },
1673    {  0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
1674 };
1675 static char usage[]  = "[-options] <hmmfile>";
1676 static char banner[] = "benchmark driver for P7_ALIDISPLAY";
1677 
1678 int
main(int argc,char ** argv)1679 main(int argc, char **argv)
1680 {
1681   ESL_GETOPTS    *go      = p7_CreateDefaultApp(options, 1, argc, argv, banner, usage);
1682   char           *hmmfile = esl_opt_GetArg(go, 1);
1683   int             N       = esl_opt_GetInteger(go, "-N");
1684   ESL_STOPWATCH  *w       = esl_stopwatch_Create();
1685   ESL_RANDOMNESS *r       = esl_randomness_CreateFast(esl_opt_GetInteger(go, "-s"));
1686   ESL_ALPHABET   *abc     = NULL;
1687   P7_HMMFILE     *hfp     = NULL;
1688   P7_HMM         *hmm     = NULL;
1689   P7_BG          *bg      = NULL;
1690   P7_PROFILE     *gm      = NULL;
1691   P7_OPROFILE    *om      = NULL;
1692   P7_TRACE       *tr      = NULL;
1693   ESL_SQ         *sq      = NULL;
1694   P7_ALIDISPLAY  *ad      = NULL;
1695   int             i,z;
1696 
1697   if (p7_hmmfile_OpenE(hmmfile, NULL, &hfp, NULL) != eslOK) p7_Fail("Failed to open HMM file %s", hmmfile);
1698   if (p7_hmmfile_Read(hfp, &abc, &hmm)            != eslOK) p7_Fail("Failed to read HMM");
1699   p7_hmmfile_Close(hfp);
1700 
1701 
1702   bg = p7_bg_Create(abc);
1703   p7_bg_SetLength(bg, 0);
1704   gm = p7_profile_Create(hmm->M, abc);
1705   p7_ProfileConfig(hmm, bg, gm, 0, p7_UNIGLOCAL); /* that sets N,C,J to generate nothing */
1706   om = p7_oprofile_Create(gm->M, abc);
1707   p7_oprofile_Convert(gm, om);
1708 
1709   if (esl_opt_GetBoolean(go, "-p")) tr = p7_trace_CreateWithPP();
1710   else                              tr = p7_trace_Create();
1711 
1712   sq = esl_sq_CreateDigital(abc);
1713 
1714   esl_stopwatch_Start(w);
1715   for (i = 0; i < N; i++)
1716     {
1717       p7_ProfileEmit(r, hmm, gm, bg, sq, tr);
1718       esl_sq_SetName(sq, "random");
1719 
1720       if (! esl_opt_GetBoolean(go, "-b"))
1721 	{
1722 	  if (esl_opt_GetBoolean(go, "-p"))
1723 	    for (z = 0; z < tr->N; z++)
1724 	      if (tr->i[z] > 0) tr->pp[z] = esl_random(r);
1725 
1726 	  ad = p7_alidisplay_Create(tr, 0, om, sq, NULL);
1727 	  p7_alidisplay_Print(stdout, ad, 40, 80, FALSE);
1728 	  p7_alidisplay_Destroy(ad);
1729 	}
1730       p7_trace_Reuse(tr);
1731       esl_sq_Reuse(sq);
1732     }
1733   esl_stopwatch_Stop(w);
1734   esl_stopwatch_Display(stdout, w, "# CPU time: ");
1735 
1736   esl_sq_Destroy(sq);
1737   p7_trace_Destroy(tr);
1738   p7_oprofile_Destroy(om);
1739   p7_profile_Destroy(gm);
1740   p7_bg_Destroy(bg);
1741   p7_hmm_Destroy(hmm);
1742   esl_alphabet_Destroy(abc);
1743   esl_randomness_Destroy(r);
1744   esl_stopwatch_Destroy(w);
1745   esl_getopts_Destroy(go);
1746   return 0;
1747 }
1748 #endif /*p7ALIDISPLAY_BENCHMARK*/
1749 /*--------------------- end, benchmark driver -------------------*/
1750 
1751 /****************************************************************
1752  * 5. Unit tests.
1753  ****************************************************************/
1754 #ifdef p7ALIDISPLAY_TESTDRIVE
1755 
1756 /*Testing function that generates a P7_ALIDISPLAY containing a nucleotide sequence string rather than an amino
1757   *sequence string.  This function should only be used for testing the serialization/deserializaton code.  No attempt
1758   *is made to make the nucleotide string be reasonable or even valid -- it's just a valid C string of the correct length
1759   * Like p7_alidisplay_Sample, which it is based on, it randomly selects which of the optional fields should be present in the
1760   * alidisplay */
1761 static int
alidisplay_SampleFake_ntseq(ESL_RANDOMNESS * rng,int N,P7_ALIDISPLAY ** ret_ad)1762 alidisplay_SampleFake_ntseq(ESL_RANDOMNESS *rng, int N, P7_ALIDISPLAY **ret_ad)
1763 {
1764   P7_ALIDISPLAY *ad            = NULL;
1765   char          *guidestring   = NULL;  /* string [0..N-1] composed of MDI */
1766   int            nM            = 0;
1767   int            nD            = 0;
1768   int            nI            = 0;
1769   enum p7t_statetype_e last_st;
1770   int            pos;
1771   int            status;
1772 
1773   ESL_ALLOC(guidestring, sizeof(char) * (N+1));
1774 
1775   guidestring[0] = 'M'; nM++; last_st = p7T_M; /* local alignments must start with M */
1776   for (pos = 1; pos < N-1; pos++)
1777     {
1778       switch (last_st)
1779   {
1780   case p7T_M:
1781     switch (esl_rnd_Roll(rng, 3))
1782       {
1783       case 0: guidestring[pos] = 'M'; nM++; last_st = p7T_M; break;
1784       case 1: guidestring[pos] = 'D'; nD++; last_st = p7T_D; break;
1785       case 2: guidestring[pos] = 'I'; nI++; last_st = p7T_I; break;
1786       }
1787     break;
1788 
1789   case p7T_I:
1790     switch (esl_rnd_Roll(rng, 2))
1791       {
1792       case 0: guidestring[pos] = 'M'; nM++; last_st = p7T_M; break;
1793       case 1: guidestring[pos] = 'I'; nI++; last_st = p7T_I; break;
1794       }
1795     break;
1796 
1797   case p7T_D:
1798     switch (esl_rnd_Roll(rng, 2))
1799       {
1800       case 0: guidestring[pos] = 'M'; nM++; last_st = p7T_M; break;
1801       case 1: guidestring[pos] = 'D'; nD++; last_st = p7T_D; break;
1802       }
1803     break;
1804 
1805   default:
1806     break;
1807   }
1808     }
1809   /* local alignments can end on M or D. (optimal local alignments can only end on M) */
1810   switch (last_st) {
1811   case p7T_I:
1812     guidestring[N-1] = 'M';  nM++;  break;
1813   default:
1814     switch (esl_rnd_Roll(rng, 2)) {
1815     case 0: guidestring[N-1] = 'M'; nM++; break;
1816     case 1: guidestring[N-1] = 'D'; nD++; break;
1817     }
1818     break;
1819   }
1820   guidestring[N] = '\0';
1821 
1822   ESL_ALLOC(ad, sizeof(P7_ALIDISPLAY));
1823   ad->rfline  = ad->mmline = ad->csline = ad->model   = ad->mline  = ad->aseq = ad->ntseq = ad->ppline = NULL;
1824   ad->hmmname = ad->hmmacc = ad->hmmdesc = NULL;
1825   ad->sqname  = ad->sqacc  = ad->sqdesc  = NULL;
1826   ad->mem     = NULL;
1827   ad->memsize = 0;
1828 
1829   /* Optional lines are added w/ 50% chance */
1830   if (esl_rnd_Roll(rng, 2) == 0)  ESL_ALLOC(ad->rfline, sizeof(char) * (N+1));
1831   if (esl_rnd_Roll(rng, 2) == 0)  ESL_ALLOC(ad->mmline, sizeof(char) * (N+1));
1832   if (esl_rnd_Roll(rng, 2) == 0)  ESL_ALLOC(ad->csline, sizeof(char) * (N+1));
1833   if (esl_rnd_Roll(rng, 2) == 0)  ESL_ALLOC(ad->ppline, sizeof(char) * (N+1));
1834   ESL_ALLOC(ad->model, sizeof(char) * (N+1));
1835   ESL_ALLOC(ad->mline, sizeof(char) * (N+1));
1836   ESL_ALLOC(ad->ntseq,  sizeof(char) * ((3 *N)+1));
1837   ad->N = N;
1838 
1839   esl_strdup("my_hmm", -1, &(ad->hmmname));
1840   if (esl_rnd_Roll(rng, 2) == 0) esl_strdup("PF000007",          -1, &(ad->hmmacc));  else esl_strdup("", -1, &(ad->hmmacc));
1841   if (esl_rnd_Roll(rng, 2) == 0) esl_strdup("(hmm description)", -1, &(ad->hmmdesc)); else esl_strdup("", -1, &(ad->hmmdesc));
1842 
1843   esl_strdup("my_seq", -1, &(ad->sqname));
1844   if (esl_rnd_Roll(rng, 2) == 0) esl_strdup("ABC000001.42",           -1, &(ad->sqacc));  else esl_strdup("", -1, &(ad->sqacc));
1845   if (esl_rnd_Roll(rng, 2) == 0) esl_strdup("(sequence description)", -1, &(ad->sqdesc)); else esl_strdup("", -1, &(ad->sqdesc));
1846 
1847   /* model, seq coords must look valid. */
1848   ad->hmmfrom = 100;
1849   ad->hmmto   = ad->hmmfrom + nM + nD - 1;
1850   ad->M       = ad->hmmto + esl_rnd_Roll(rng, 2);
1851 
1852   ad->sqfrom  = 1000;
1853   ad->sqto    = ad->sqfrom + nM + nI - 1;
1854   ad->L       = ad->sqto + esl_rnd_Roll(rng, 2);
1855 
1856   /* rfline is free-char "reference annotation" on consensus; H3 puts '.' for inserts. */
1857   if (ad->rfline) {
1858     for (pos = 0; pos < N; pos++)
1859       ad->rfline[pos] = (guidestring[pos] == 'I' ? '.' : 'x');
1860     ad->rfline[pos] = '\0';
1861   }
1862 
1863   /* mmline indicates which columns should be masked (assigned background distribution), '.' indicates no mask; H3 puts '.' for inserts. */
1864   if (ad->mmline) {
1865     for (pos = 0; pos < N; pos++)
1866       ad->mmline[pos] = (guidestring[pos] == 'I' ? '.' : '.');
1867     ad->mmline[pos] = '\0';
1868   }
1869 
1870   /* csline is optional. It has free-char "consensus structure annotation" on consensus positions. H3 puts '.' on inserts. */
1871   if (ad->csline) {
1872     for (pos = 0; pos < N; pos++)
1873       ad->csline[pos] = (guidestring[pos] == 'I' ? '.' : 'X');
1874     ad->csline[pos] = '\0';
1875   }
1876 
1877   /* the mandatory three-line alignment display:
1878    *
1879    *   guidestring:    MMMDI
1880    *   model:          XXXX.
1881    *   mline:          A+
1882    *   ntseq:           AAA-a
1883    */
1884   for (pos = 0; pos < N; pos++)
1885     {
1886       switch (guidestring[pos]) {
1887       case 'M':
1888   ad->model[pos] = 'X';
1889   switch (esl_rnd_Roll(rng, 3)) {
1890   case 0: ad->mline[pos] = 'A';
1891   case 1: ad->mline[pos] = '+';
1892   case 2: ad->mline[pos] = ' ';
1893   }
1894   if (ad->mline[pos] == ' ' && esl_rnd_Roll(rng, 50) == 0) ad->ntseq[pos] = '*';  // dirty aligned sequence up with nasty * stop codons, about 1/(3*50) of the time.
1895   else                                                     ad->ntseq[pos] = 'A';  // ... they would only be aligned to ' ' on an mline.
1896   break;                                                                         // ... they might appear aligned to a match or insert state (see iss#135)
1897 
1898       case 'D':
1899   ad->model[pos] = 'X';
1900   ad->mline[pos] = ' ';
1901   ad->ntseq[pos]  = '-';
1902   break;
1903 
1904       case 'I':
1905   ad->model[pos] = '.';
1906   ad->mline[pos] = ' ';
1907   ad->ntseq[pos]  = 'a';
1908   break;
1909       }
1910     }
1911   ad->model[pos] = '\0';
1912   ad->mline[pos] = '\0';
1913 
1914   // pad out ntseq to the 3N length it needs
1915   for (pos = N; pos <  (3 * N); pos++){
1916     if(esl_rnd_Roll(rng, 50) == 0) ad->ntseq[pos] = '*';  // dirty aligned sequence up with nasty * stop codons, about 1/(3*50) of the time.
1917     else                                                     ad->ntseq[pos] = 'A';  // ... they would only be aligned to ' ' on an mline.
1918   }
1919   ad->ntseq[pos]  = '\0';
1920 
1921   /* ppline is optional */
1922   if (ad->ppline) {
1923     for (pos = 0; pos < N; pos++)
1924       ad->ppline[pos] = (guidestring[pos] == 'D' ? '.' : p7_alidisplay_EncodePostProb(esl_random(rng)));
1925     ad->ppline[pos] = '\0';
1926   }
1927 
1928   free(guidestring);
1929   *ret_ad = ad;
1930   return eslOK;
1931 
1932  ERROR:
1933   if (guidestring) free(guidestring);
1934   if (ad)          p7_alidisplay_Destroy(ad);
1935   *ret_ad = NULL;
1936   return status;
1937 }
1938 
1939 
1940 
1941 
1942 static void
utest_Serialize(ESL_RANDOMNESS * rng,int ntrials)1943 utest_Serialize(ESL_RANDOMNESS *rng, int ntrials)
1944 {
1945   char msg[]               = "utest_Serialize failed";
1946   P7_ALIDISPLAY **serial   = malloc(ntrials * sizeof(P7_ALIDISPLAY *));
1947   P7_ALIDISPLAY **deserial = malloc(ntrials * sizeof(P7_ALIDISPLAY *));
1948   uint8_t       **buf      = malloc(sizeof(uint8_t *));
1949   uint32_t        n        = 0;
1950   uint32_t        nalloc   = 0;
1951   int             alignment_length;
1952   int             i;
1953 
1954   *buf = NULL;
1955   for (i = 0; i < ntrials; i++)
1956     {
1957       // Create random alignment to serialize
1958       alignment_length = (esl_random_uint32(rng) %300) + 50;
1959       if (esl_rnd_Roll(rng, 2) == 0)
1960 	{ // 50% chance of alidisplay with an amino sequence, 50% chance of alidisplay with nucleotide seq.
1961 	  if (p7_alidisplay_Sample(rng, alignment_length, &(serial[i])) != eslOK) esl_fatal(msg);
1962 	}
1963       else
1964 	{
1965 	  if (alidisplay_SampleFake_ntseq(rng, alignment_length, &(serial[i])) != eslOK) esl_fatal(msg);
1966 	}
1967       if (p7_alidisplay_Serialize(serial[i], buf, &n, &nalloc) != eslOK) esl_fatal(msg);
1968     }
1969 
1970   n = 0; // reset to start of buffer
1971   for (i = 0; i < ntrials; i++)
1972     {
1973       if ((deserial[i] = p7_alidisplay_Create_empty())     == NULL)  esl_fatal(msg);
1974       if( p7_alidisplay_Deserialize(*buf, &n, deserial[i]) != eslOK) esl_fatal(msg);
1975     }
1976 
1977   // free the buffer here to make sure we've actually copied all the data out of it into the new structures
1978   free(*buf);
1979   free(buf);
1980   for (i = 0; i < ntrials; i++)
1981     if (p7_alidisplay_Compare(serial[i], deserial[i]) != eslOK) esl_fatal(msg); // deserialized structure didn't match serialized
1982   // haven't failed yet, so we've succeeded.  Clean up and exit
1983 
1984   for (i = 0; i < ntrials; i++) {
1985     p7_alidisplay_Destroy(serial[i]);
1986     p7_alidisplay_Destroy(deserial[i]);
1987   }
1988   free(serial);
1989   free(deserial);
1990   return;
1991 }
1992 
1993 // Test that the _Serialize() function generates the correct errors when passed invalid arguments
1994 static void
utest_serialize_error_conditions(ESL_RANDOMNESS * rng)1995 utest_serialize_error_conditions(ESL_RANDOMNESS *rng)
1996 {
1997   char msg[]            = "utest_serialize_error_conditions failed";
1998   P7_ALIDISPLAY *foo    = NULL;
1999   uint8_t      **buf    = malloc(sizeof(uint8_t *));
2000   uint32_t       n      = 0;
2001   uint32_t       nalloc = 0;
2002 
2003   // Create an alisplay to work with.  Don't really care about its contents -- other tests will verify
2004   // correct serialization and deserialization
2005   *buf = NULL; // set buf to valid value
2006   if ( p7_alidisplay_Sample(rng, 100, &foo)              != eslOK)     esl_fatal(msg);
2007   if ( p7_alidisplay_Serialize(foo, NULL, &n,   &nalloc) != eslEINVAL) esl_fatal(msg);   // Test 1: _Serialize returns error if passed NULL buffer
2008   if ( p7_alidisplay_Serialize(foo,  buf, NULL, &nalloc) != eslEINVAL) esl_fatal(msg);   // Test 2: error on NULL n ptr
2009   if ( p7_alidisplay_Serialize(NULL, buf, &n,   &nalloc) != eslEINVAL) esl_fatal(msg);   // Test 3: error on NULL object ptr
2010 
2011   if (buf) { free(*buf); free(buf); }
2012   p7_alidisplay_Destroy(foo);
2013   return;
2014 }
2015 
2016 static void
utest_deserialize_error_conditions(ESL_RANDOMNESS * rng)2017 utest_deserialize_error_conditions(ESL_RANDOMNESS *rng)
2018 {
2019   char            msg[]    = "utest_deserialize_error_conditions failed";
2020   P7_ALIDISPLAY  *sampled  = NULL; // sampled alidisplay that we'll serialze
2021   P7_ALIDISPLAY  *deserial = NULL; // alidisplay to hold the deserialized object
2022   uint8_t        *buf      = NULL;
2023   uint32_t        n = 0, nalloc = 0;
2024 
2025   if ((deserial = p7_alidisplay_Create_empty())            == NULL)  esl_fatal(msg);
2026   if ( p7_alidisplay_Sample(rng, 100, &sampled)            != eslOK) esl_fatal(msg);
2027   if ( p7_alidisplay_Serialize(sampled, &buf, &n, &nalloc) != eslOK) esl_fatal(msg);
2028 
2029 
2030   if ( p7_alidisplay_Deserialize(NULL, &n, deserial)  != eslEINVAL) esl_fatal(msg);   // Test 1: error on buf == NULL;
2031   if ( p7_alidisplay_Deserialize(buf, NULL, deserial) != eslEINVAL) esl_fatal(msg);   // Test 2: error on n == NULL
2032   if ( p7_alidisplay_Deserialize(buf, &n, NULL)       != eslEINVAL) esl_fatal(msg);   // Test 3: error on serialized object == NULL
2033 
2034   free(buf);
2035   p7_alidisplay_Destroy(deserial);
2036   p7_alidisplay_Destroy(sampled);
2037   return;
2038 }
2039 
2040 
2041 static void
utest_Backconvert(int be_verbose,ESL_RANDOMNESS * rng,ESL_ALPHABET * abc,int ntrials,int N)2042 utest_Backconvert(int be_verbose, ESL_RANDOMNESS *rng, ESL_ALPHABET *abc, int ntrials, int N)
2043 {
2044   char          msg[] = "utest_Backconvert failed";
2045   P7_ALIDISPLAY *ad   = NULL;
2046   ESL_SQ        *sq   = NULL;
2047   P7_TRACE      *tr   = NULL;
2048   int            trial;
2049 
2050   for (trial = 0; trial < ntrials; trial++)
2051     {
2052       if ( p7_alidisplay_Sample(rng, N, &ad)                     != eslOK) esl_fatal(msg);
2053       if ( p7_alidisplay_Serialize_old(ad)                           != eslOK) esl_fatal(msg);
2054       if (be_verbose && p7_alidisplay_Dump(stdout, ad)           != eslOK) esl_fatal(msg);
2055       if ( p7_alidisplay_Backconvert(ad, abc, &sq, &tr)          != eslOK) esl_fatal(msg);
2056       if (be_verbose && p7_trace_Dump(stdout, tr, NULL, sq->dsq) != eslOK) esl_fatal(msg);
2057       if ( p7_trace_Validate(tr, abc, sq->dsq, NULL)             != eslOK) esl_fatal(msg);
2058 
2059       p7_alidisplay_Destroy(ad);
2060       esl_sq_Destroy(sq);
2061       p7_trace_Destroy(tr);
2062     }
2063   return;
2064 }
2065 
2066 
2067 #endif /*p7ALIDISPLAY_TESTDRIVE*/
2068 /*------------------- end, unit tests ---------------------------*/
2069 
2070 /*****************************************************************
2071  * 6. Test driver.
2072  *****************************************************************/
2073 #ifdef p7ALIDISPLAY_TESTDRIVE
2074 
2075 #include <stdlib.h>
2076 #include <string.h>
2077 
2078 #include "easel.h"
2079 #include "esl_getopts.h"
2080 #include "esl_random.h"
2081 
2082 #include "hmmer.h"
2083 
2084 static ESL_OPTIONS options[] = {
2085    /* name  type         default  env   range togs  reqs  incomp  help                docgrp */
2086   {"-h",  eslARG_NONE,    FALSE, NULL, NULL, NULL, NULL, NULL, "show help and usage",                            0},
2087   {"-N",  eslARG_INT,      "10", NULL, NULL, NULL, NULL, NULL, "number of random-sampled alidisplays to test",   0},
2088   {"-L",  eslARG_INT,      "20", NULL, NULL, NULL, NULL, NULL, "length of random-sampled alidisplays to test",   0},
2089   {"-s",  eslARG_INT,       "0", NULL, NULL, NULL, NULL, NULL, "set random number seed to <n>",                  0},
2090   {"-v",  eslARG_NONE,    FALSE, NULL, NULL, NULL, NULL, NULL, "show verbose commentary/output",                 0},
2091   { 0,0,0,0,0,0,0,0,0,0},
2092 };
2093 static char usage[]  = "[-options]";
2094 static char banner[] = "test driver for p7_alidisplay.c";
2095 
2096 int
main(int argc,char ** argv)2097 main(int argc, char **argv)
2098 {
2099   ESL_GETOPTS    *go         = esl_getopts_CreateDefaultApp(options, 0, argc, argv, banner, usage);
2100   ESL_RANDOMNESS *rng        = esl_randomness_CreateFast(esl_opt_GetInteger(go, "-s"));
2101   ESL_ALPHABET   *abc        = esl_alphabet_Create(eslAMINO);
2102   int             N          = esl_opt_GetInteger(go, "-N");
2103   int             L          = esl_opt_GetInteger(go, "-L");
2104   int             be_verbose = esl_opt_GetBoolean(go, "-v");
2105 
2106   //utest_Serialize_old  (            rng,      N, L);
2107   utest_Serialize(rng, 100);
2108   utest_Backconvert(be_verbose, rng, abc, N, L);
2109   utest_serialize_error_conditions(rng);
2110   utest_deserialize_error_conditions(rng);
2111 
2112   esl_alphabet_Destroy(abc);
2113   esl_randomness_Destroy(rng);
2114   esl_getopts_Destroy(go);
2115   return 0;
2116 }
2117 #endif /*p7ALIDISPLAY_TESTDRIVE*/
2118 /*------------------- end, test driver --------------------------*/
2119 
2120 
2121 /*****************************************************************
2122  * 7. Example.
2123  *****************************************************************/
2124 /*
2125    gcc -o p7_alidisplay_example -std=gnu99 -g -Wall -I. -L. -I../easel -L../easel -Dp7ALIDISPLAY_EXAMPLE p7_alidisplay.c -lhmmer -leasel -lm
2126 */
2127 #ifdef p7ALIDISPLAY_EXAMPLE
2128 #include "p7_config.h"
2129 
2130 #include "easel.h"
2131 #include "esl_alphabet.h"
2132 #include "esl_getopts.h"
2133 #include "esl_sq.h"
2134 #include "esl_sqio.h"
2135 
2136 #include "hmmer.h"
2137 
2138 static ESL_OPTIONS options[] = {
2139   /* name           type         default   env  range   toggles   reqs   incomp                             help                                                  docgroup*/
2140   { "-h",           eslARG_NONE,   FALSE, NULL, NULL,      NULL,  NULL,  NULL,                          "show brief help on version and usage",                         0 },
2141  {  0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
2142 };
2143 static char usage[]  = "[-options] <hmmfile> <seqfile>";
2144 static char banner[] = "example driver for P7_ALIDISPLAY";
2145 
2146 int
main(int argc,char ** argv)2147 main(int argc, char **argv)
2148 {
2149   ESL_GETOPTS    *go      = p7_CreateDefaultApp(options, 2, argc, argv, banner, usage);
2150   char           *hmmfile = esl_opt_GetArg(go, 1);
2151   char           *seqfile = esl_opt_GetArg(go, 2);
2152   ESL_ALPHABET   *abc     = NULL;
2153   P7_HMMFILE     *hfp     = NULL;
2154   P7_HMM         *hmm     = NULL;
2155   P7_BG          *bg      = NULL;
2156   P7_PROFILE     *gm      = NULL;
2157   P7_OPROFILE    *om      = NULL;
2158   P7_TRACE       *tr1     = NULL;
2159   P7_TRACE       *tr2     = NULL;
2160   ESL_SQFILE     *sqfp    = NULL;
2161   ESL_SQ         *sq      = NULL;
2162   ESL_SQ         *sq2     = NULL;
2163   P7_ALIDISPLAY  *ad      = NULL;
2164   P7_PIPELINE    *pli     = NULL;
2165   P7_TOPHITS     *hitlist = NULL;
2166 
2167   p7_FLogsumInit();
2168 
2169   /* Read a single HMM from a file */
2170   if (p7_hmmfile_OpenE(hmmfile, NULL, &hfp, NULL) != eslOK) p7_Fail("Failed to open HMM file %s", hmmfile);
2171   if (p7_hmmfile_Read(hfp, &abc, &hmm)            != eslOK) p7_Fail("Failed to read HMM");
2172   p7_hmmfile_Close(hfp);
2173 
2174   /* Read a single sequence from a file */
2175   if (esl_sqfile_Open(seqfile, eslSQFILE_UNKNOWN, NULL, &sqfp) != eslOK) p7_Fail("Failed to open sequence file %s", seqfile);
2176   sq = esl_sq_CreateDigital(abc);
2177   if (esl_sqio_Read(sqfp, sq) != eslOK) p7_Fail("Failed to read sequence");
2178 
2179   /* Configure a profile from the HMM */
2180   bg = p7_bg_Create(abc);
2181   p7_bg_SetLength(bg, 0);
2182   gm = p7_profile_Create(hmm->M, abc);
2183   p7_ProfileConfig(hmm, bg, gm, 0, p7_UNILOCAL);
2184   om = p7_oprofile_Create(gm->M, abc);
2185   p7_oprofile_Convert(gm, om);
2186 
2187   /* Create a pipeline and a top hits list */
2188   pli     = p7_pipeline_Create(NULL/*=default*/, hmm->M, 400, FALSE, p7_SEARCH_SEQS);
2189   hitlist = p7_tophits_Create();
2190 
2191   /* Run the pipeline */
2192   p7_pli_NewSeq(pli, sq);
2193   p7_bg_SetLength(bg, sq->n);
2194   p7_oprofile_ReconfigLength(om, sq->n);
2195   p7_Pipeline(pli, om, bg, sq, NULL, hitlist);
2196 
2197   if (hitlist->N == 0) { p7_Fail("target sequence doesn't hit"); }
2198 
2199   if (p7_alidisplay_Backconvert(hitlist->hit[0]->dcl[0].ad, abc, &sq2, &tr2) != eslOK) p7_Fail("backconvert failed");
2200 
2201   p7_trace_Dump(stdout, tr2, gm, sq2->dsq);
2202 
2203   p7_tophits_Destroy(hitlist);
2204   p7_alidisplay_Destroy(ad);
2205   esl_sq_Destroy(sq);
2206   esl_sq_Destroy(sq2);
2207   p7_trace_Destroy(tr2);
2208   p7_trace_Destroy(tr1);
2209   p7_oprofile_Destroy(om);
2210   p7_profile_Destroy(gm);
2211   p7_bg_Destroy(bg);
2212   p7_hmm_Destroy(hmm);
2213   esl_alphabet_Destroy(abc);
2214   esl_getopts_Destroy(go);
2215   return 0;
2216 }
2217 #endif /*p7ALIDISPLAY_EXAMPLE*/
2218 
2219 
2220