1 /* $Id: compacc2.c 1280 2014-08-21 00:47:55Z wrp $ */
2 
3 /* copyright (c) 1996, 1997, 1998, 1999, 2014 by William R. Pearson and
4    The Rector & Visitors of the University of Virginia */
5 
6 /* Licensed under the Apache License, Version 2.0 (the "License");
7    you may not use this file except in compliance with the License.
8    You may obtain a copy of the License at
9 
10    http://www.apache.org/licenses/LICENSE-2.0
11 
12    Unless required by applicable law or agreed to in writing,
13    software distributed under this License is distributed on an "AS
14    IS" BASIS, WITHOUT WRRANTIES OR CONDITIONS OF ANY KIND, either
15    express or implied.  See the License for the specific language
16    governing permissions and limitations under the License.
17 */
18 
19 /* Concurrent read version */
20 
21 #include <stdio.h>
22 #include <stdlib.h>
23 #if defined(UNIX)
24 #include <unistd.h>
25 #endif
26 #if defined(UNIX) || defined(WIN32)
27 #include <sys/types.h>
28 #endif
29 
30 #include <limits.h>
31 #include <ctype.h>
32 #include <float.h>
33 
34 #include <string.h>
35 #include <time.h>
36 #include <math.h>
37 
38 #include "defs.h"
39 #include "param.h"
40 #include "structs.h"
41 
42 #include "mm_file.h"
43 #include "best_stats.h"
44 
45 #define XTERNAL
46 #include "uascii.h"
47 #include "upam.h"
48 #undef XTERNAL
49 
50 #ifdef DEBUG
51 extern char ext_qtitle[];
52 #endif
53 
54 extern void abort ();
55 
56 #include "drop_func.h"	/* get init_work() */
57 /* drop_func.h includes dyn_string.h */
58 
59 void revcomp(unsigned char *seq, int n, int *c_nt);
60 extern void qshuffle(unsigned char *aa0, int n0, int nm0, void *);
61 #ifdef DEBUG
62 unsigned long adler32(unsigned long, const unsigned char *, unsigned int);
63 #endif
64 
65 void
66 s_annot_to_aa1a(long offset, int n1, struct annot_str *annot_p, unsigned char *ann_arr, char *tmp_line);
67 
68 extern void add_annot_def(struct mngmsg *m_msp, char *line, int qa_flag);
69 int add_annot_char(unsigned char *ann_arr, char ctmp_label);
70 
71 int get_annot(char *sname, struct mngmsg *, char *bline, long offset, int n1,
72 	      struct annot_str **annot_p,int target, int debug);
73 int
74 get_annot_list(char *sname, struct mngmsg *m_msp, struct beststr **bestp_arr,
75 	       int nbest,int target, int debug);
76 void
77 print_sum(FILE *fd, struct db_str *qtt, struct db_str *ntt, int in_mem, long mem_use);
78 int
79 check_seq_range(unsigned char *aa1b, int n1, int nsq, char *str);
80 /* print timing information */
81 extern void ptime (FILE *, long);
82 
83 /* this function consolidates code in comp_lib4.c for non-threaded, and in
84    work_thr2.c (threads) and work_comp2.c (worker nodes)
85 */
86 
87 void
init_aa0(unsigned char ** aa0,int n0,int nm0,unsigned char ** aa0s,unsigned char ** aa1s,int qframe,int qshuffle_flg,int max_tot,struct pstruct * ppst,void ** f_str,void ** qf_str,void * my_rand_state)88 init_aa0(unsigned char **aa0, int n0, int nm0,
89 	 unsigned char **aa0s, unsigned char **aa1s,
90 	 int qframe, int qshuffle_flg, int max_tot,
91 	 struct pstruct *ppst, void **f_str, void **qf_str,
92 	 void *my_rand_state) {
93   int id;
94 
95   /* note that aa[5,4,3,2] are never used, but are provided so that frame
96      can range from 0 .. 5; likewise for f_str[5..2] */
97 
98   aa0[5] = aa0[4] = aa0[3] = aa0[2] = aa0[1] = aa0[0];
99 
100   /* zero out for SSE2/ALTIVEC -- make sure this is ALWAYS done */
101   for (id=0; id < SEQ_PAD; id++) aa0[0][n0+id] = '\0';
102 
103   init_work (aa0[0], n0, ppst, &f_str[0]);
104   f_str[5] = f_str[4] = f_str[3] = f_str[2] = f_str[1] = f_str[0];
105 
106   if (qframe == 2) {
107     if ((aa0[1]=(unsigned char *)calloc((size_t)n0+2+SEQ_PAD,sizeof(unsigned char)))==NULL) {
108       fprintf(stderr,"*** error [%s:%d] - cannot allocate aa01[%d]\n", __FILE__, __LINE__, n0);
109     }
110     *aa0[1]='\0';
111     aa0[1]++;
112     memcpy(aa0[1],aa0[0],n0+1);
113     /* for ALTIVEC/SSE2, must pad with 16 NULL's */
114     for (id=0; id<SEQ_PAD; id++) {aa0[1][n0+id]=0;}
115     revcomp(aa0[1],n0,ppst->c_nt);
116     init_work (aa0[1], n0, ppst, &f_str[1]);
117   }
118 
119   if (qshuffle_flg) {
120     if ((*aa0s=(unsigned char *)calloc(n0+2+SEQ_PAD,sizeof(char)))==NULL) {
121       fprintf(stderr,"*** error [%s:%d] - cannot allocate aa0s[%d]\n",__FILE__, __LINE__, n0+2);
122       exit(1);
123     }
124     **aa0s='\0';
125     (*aa0s)++;
126     memcpy(*aa0s,aa0[0],n0);
127     qshuffle(*aa0s,n0,nm0, my_rand_state);
128     /* for SSE2/ALTIVEC, must pad with 16 NULL's */
129     for (id=0; id<SEQ_PAD; id++) {(*aa0s)[n0+id]=0;}
130     init_work (*aa0s, n0, ppst, qf_str);
131   }
132 
133   /* always allocate shuffle space */
134   if((*aa1s=calloc(max_tot+1,sizeof(char))) == NULL) {
135     fprintf(stderr,"*** error [%s:%d] - unable to allocate shuffled library sequence [%d]\n", __FILE__, __LINE__, max_tot);
136     exit(1);
137   }
138   else {
139     **aa1s=0;
140     (*aa1s)++;
141   }
142 }
143 
144 /* because it is used to pre-allocate space, maxn has various
145    constraints.  For "simple" comparisons, it is simply the length of
146    the longest library sequence.  But for translated comparisons, it
147    must be 3 or 6X the length of the query sequence.
148 
149    In addition, however, it can be reduced to make certain that
150    sequences are read in smaller chunks.  And, maxn affect how large
151    overlaps must be when sequences are read in chunks.
152 */
153 
154 int
reset_maxn(struct mngmsg * m_msp,int over_len,int maxn)155 reset_maxn(struct mngmsg *m_msp, int over_len, int maxn) {
156 
157   /* reduce maxn if requested */
158   if (m_msp->ldb_info.maxn > 0 && m_msp->ldb_info.maxn < maxn) maxn = m_msp->ldb_info.maxn;
159 
160   if (m_msp->qdnaseq==m_msp->ldb_info.ldnaseq || m_msp->qdnaseq==SEQT_DNA ||
161       m_msp->qdnaseq == SEQT_RNA) {/* !TFAST - either FASTA or FASTX */
162 
163     if (m_msp->n0 > m_msp->max_tot - m_msp->ldb_info.dupn) {
164       fprintf(stderr,"*** error [%s:%d] -  query sequence is too long %d > %d - %d %s\n",
165 	      __FILE__, __LINE__,
166 	      m_msp->n0,
167 	      m_msp->max_tot, m_msp->ldb_info.dupn,
168 	      m_msp->sqnam);
169       exit(1);
170     }
171 
172     m_msp->ldb_info.l_overlap = over_len;
173     m_msp->ldb_info.maxt3 = maxn-m_msp->ldb_info.l_overlap;
174   }
175   else {	/* is TFAST */
176     if (m_msp->n0 > MAXTST) {
177       fprintf(stderr,"*** error [%s:%d] -  query sequence is too long %d %s\n",
178 	      __FILE__, __LINE__, m_msp->n0,m_msp->sqnam);
179       exit(1);
180     }
181 
182     if (m_msp->n0*3 > maxn ) {	/* n0*3 for the three frames - this
183 				   will only happen if maxn has been
184 				   set low manually */
185 
186       if (m_msp->n0*4+2 < m_msp->max_tot) { /* m_msg0*3 + m_msg0 */
187 	fprintf(stderr,
188 		"*** error [%s:%d] - query sequence too long for library segment: %d - resetting to %d\n",
189 		__FILE__, __LINE__,
190 		maxn,m_msp->n0*3);
191 	maxn = m_msp->ldb_info.maxn = m_msp->n0*3;
192       }
193       else {
194 	fprintf(stderr,"*** error [%s:%d] -  query sequence too long for translated search: %d * 4 > %d %s\n",
195 		__FILE__, __LINE__, m_msp->n0,maxn, m_msp->sqnam);
196 	exit(1);
197       }
198     }
199 
200     /* set up some constants for overlaps */
201     m_msp->ldb_info.l_overlap = 3*over_len;
202     m_msp->ldb_info.maxt3 = maxn-m_msp->ldb_info.l_overlap-3;
203     m_msp->ldb_info.maxt3 -= m_msp->ldb_info.maxt3%3;
204     m_msp->ldb_info.maxt3++;
205 
206     maxn = maxn - 3; maxn -= maxn%3; maxn++;
207   }
208   return maxn;
209 }
210 
211 
212 int
scanseq(unsigned char * seq,int n,char * str)213 scanseq(unsigned char *seq, int n, char *str) {
214   int tot,i;
215   char aaray[128];		/* this must be set > nsq */
216 
217   for (i=0; i<128; i++)  aaray[i]=0;
218   for (i=0; i < (int)strlen(str); i++) aaray[qascii[str[i]]]=1;
219   for (i=tot=0; i<n; i++) tot += aaray[seq[i]];
220   return tot;
221 }
222 
223 /* subs_env takes a string, possibly with ${ENV}, and looks up all the
224    potential environment variables and substitutes them into the
225    string */
226 
subs_env(char * dest,char * src,int dest_size)227 void subs_env(char *dest, char *src, int dest_size) {
228   char *last_src, *bp, *bp1;
229 
230   last_src = src;
231 
232   if ((bp = strchr(src,'$'))==NULL) {
233     strncpy(dest, src, dest_size);
234     dest[dest_size-1] = '\0';
235   }
236   else {
237     *dest = '\0';
238     while (strlen(dest) < dest_size-1 && bp != NULL ) {
239       /* copy stuff before ${*/
240       *bp = '\0';
241       strncpy(dest, last_src, dest_size);
242       *bp = '$';
243 
244       /* copy ENV */
245       if (*(bp+1) != '{') {
246 	strncat(dest, "$", dest_size - strlen(dest) -1);
247 	dest[dest_size-1] = '\0';
248 	bp += 1;
249       }
250       else {	/* have  ${ENV} - put it in */
251 	if ((bp1 = strchr(bp+2,'}'))==NULL) {
252 	  fprintf(stderr, "*** error [%s:%d] - Unterminated ENV: %s\n",
253 		  __FILE__, __LINE__, src);
254 	  break;
255 	}
256 	else {
257 	  *bp1 = '\0';
258 	  if (getenv(bp+2)!=NULL) {
259 	    strncat(dest, getenv(bp+2), dest_size - strlen(dest) - 1);
260 	    dest[dest_size-1] = '\0';
261 	    *bp1 = '}';
262 	  }
263 	  bp = bp1+1;	/* bump bp even if getenv == NULL */
264 	}
265       }
266       last_src = bp;
267 
268       /* now get the next ${ENV} if present */
269       bp = strchr(last_src,'$');
270     }
271     /* now copy the last stuff */
272     strncat(dest, last_src, dest_size - strlen(dest) - 1);
273     dest[dest_size-1]='\0';
274   }
275 }
276 
277 
278 void
selectbest(struct beststr ** bptr,int k,int n)279 selectbest(struct beststr **bptr, int k, int n)	/* k is rank in array */
280 {
281   int v, i, j, l, r;
282   struct beststr *tmptr;
283 
284   l=0; r=n-1;
285 
286   while ( r > l ) {
287     v = bptr[r]->rst.score[0];
288     i = l-1;
289     j = r;
290     do {
291       while (bptr[++i]->rst.score[0] > v) ;
292       while (bptr[--j]->rst.score[0] < v) ;
293       tmptr = bptr[i]; bptr[i]=bptr[j]; bptr[j]=tmptr;
294     } while (j > i);
295     bptr[j]=bptr[i]; bptr[i]=bptr[r]; bptr[r]=tmptr;
296     if (i>=k) r = i-1;
297     if (i<=k) l = i+1;
298   }
299 }
300 
301 void
selectbestz(struct beststr ** bptr,int k,int n)302 selectbestz(struct beststr **bptr, int k, int n)	/* k is rank in array */
303 {
304   int i, j, l, r;
305   struct beststr *tmptr;
306   double v;
307 
308   l=0; r=n-1;
309 
310   while ( r > l ) {
311     v = bptr[r]->zscore;
312     i = l-1;
313     j = r;
314     do {
315       while (bptr[++i]->zscore > v) ;
316       while (bptr[--j]->zscore < v) ;
317       tmptr = bptr[i]; bptr[i]=bptr[j]; bptr[j]=tmptr;
318     } while (j > i);
319     bptr[j]=bptr[i]; bptr[i]=bptr[r]; bptr[r]=tmptr;
320     if (i>=k) r = i-1;
321     if (i<=k) l = i+1;
322   }
323 }
324 
325 /* improved shellsort with high-performance increments */
326 /*
327 shellsort(itemType a[], int l, int r)
328 { int i, j, k, h; itemType v;
329  int incs[16] = { 1391376, 463792, 198768, 86961, 33936,
330 		  13776, 4592, 1968, 861, 336,
331 		  112, 48, 21, 7, 3, 1 };
332  for ( k = 0; k < 16; k++)
333    for (h = incs[k], i = l+h; i <= r; i++) {
334        v = a[i]; j = i;
335        while (j > h && a[j-h] > v) {
336          a[j] = a[j-h]; j -= h;
337        }
338        a[j] = v;
339      }
340 }
341 */
342 
343 /* ?improved? version of sortbestz using optimal increments and fewer
344    exchanges */
sortbestz(struct beststr ** bptr,int nbest)345 void sortbestz(struct beststr **bptr, int nbest)
346 {
347   int gap, i, j, k;
348   struct beststr *tmp;
349   double v;
350   int incs[14] = { 198768, 86961, 33936,
351 		   13776, 4592, 1968, 861, 336,
352 		   112, 48, 21, 7, 3, 1 };
353 
354   for ( k = 0; k < 14; k++) {
355     gap = incs[k];
356     for (i=gap; i < nbest; i++) {
357       tmp = bptr[i];
358       j = i;
359       v = bptr[i]->zscore;
360       while ( j >= gap && bptr[j-gap]->zscore < v) {
361 	bptr[j] = bptr[j - gap];
362 	j -= gap;
363       }
364       bptr[j] = tmp;
365     }
366   }
367 }
368 
369 
370 /* sort based on sequence index */
sortbesti(struct beststr ** bptr,int nbest)371 void sortbesti(struct beststr **bptr, int nbest)
372 {
373   int gap, i, j, k;
374   struct beststr *tmp;
375   double v;
376   int incs[12] = { 33936, 13776, 4592, 1968, 861, 336,
377 		   112, 48, 21, 7, 3, 1 };
378 
379   for ( k = 0; k < 12; k++) {
380     gap = incs[k];
381     for (i=gap; i < nbest; i++) {
382       tmp = bptr[i];
383       j = i;
384       v = bptr[i]->seq->index;
385       while ( j >= gap && bptr[j-gap]->seq->index < v) {
386 	bptr[j] = bptr[j - gap];
387 	j -= gap;
388       }
389       bptr[j] = tmp;
390     }
391   }
392 }
393 
394 void
sortbeste(struct beststr ** bptr,int nbest)395 sortbeste(struct beststr **bptr, int nbest)
396 {
397   int gap, i, j, k;
398   struct beststr *tmp;
399   double v;
400   int incs[14] = { 198768, 86961, 33936,
401 		   13776, 4592, 1968, 861, 336,
402 		   112, 48, 21, 7, 3, 1 };
403 
404   for ( k = 0; k < 14; k++) {
405     gap = incs[k];
406     for (i=gap; i < nbest; i++) {
407       j = i;
408       tmp = bptr[i];
409       v = tmp->rst.escore;
410       while ( j >= gap && bptr[j-gap]->rst.escore > v) {
411 	bptr[j] = bptr[j - gap];
412 	j -= gap;
413       }
414       bptr[j] = tmp;
415     }
416   }
417 
418   /* sometimes there are many high scores with E()==0.0, sort
419      those by z() score */
420 
421   j = 0;
422   while (j < nbest && bptr[j]->rst.escore <= 2.0*DBL_MIN ) {j++;}
423   if (j > 1) sortbestz(bptr,j);
424 }
425 
426 extern char *prog_func;
427 extern char *verstr, *iprompt0, *refstr, *mp_verstr;
428 extern long tstart, tscan, tprev, tdone;	/* Timing */
429 #ifdef COMP_MLIB
430 extern long ttscan, ttdisp;
431 #endif
432 extern time_t tdstart, tddone;
433 
434 /* ****************************************************************
435    print command line arguments (argv_line)
436    possibly HTML header
437    !BLAST
438      please cite
439      version
440    BLAST
441      Reference version
442 **************************************************************** */
443 void
print_header1(FILE * fd,const char * argv_line,const struct mngmsg * m_msp,const struct pstruct * ppst)444 print_header1(FILE *fd, const char *argv_line,
445 	      const struct mngmsg *m_msp, const struct pstruct *ppst) {
446   int i;
447 
448 #ifdef PGM_DOC
449   if (!(m_msp->markx & (MX_M8OUT+MX_MBLAST2))) fprintf(fd, "#%s\n",argv_line);
450 #endif
451 
452   if (m_msp->markx & MX_M11OUT) {
453     fprintf(fd, "#:lav\n\nd {\n   \"%s\"\n}\n",argv_line+1);
454   }
455 
456   if (m_msp->markx & MX_HTML) {
457 #ifdef HTML_HEAD
458     fprintf(fd,"<html>\n<head>\n<title>%s Results</title>\n</head>\n<body>\n",prog_func);
459 #endif
460     fprintf(fd,"<pre>\n");
461   }
462 
463   if (m_msp->std_output) {
464     fprintf(fd,"%s\n",iprompt0);
465     if (refstr != NULL && refstr[0] != '\0') {
466       fprintf(fd," version %s%s\nPlease cite:\n %s\n",verstr,mp_verstr,refstr);
467     }
468     else {
469       fprintf(fd," version %s%s\n",verstr,mp_verstr);
470     }
471   }
472 
473   if (m_msp->markx & MX_MBLAST2) {
474     if (refstr != NULL && refstr[0] != '\0') {
475       fprintf(fd,"%s %s%s\n\nReference: %s\n\n", prog_func, verstr, mp_verstr, refstr);
476     }
477     else {
478       fprintf(fd,"%s %s%s\n\n", prog_func, verstr, mp_verstr);
479     }
480   }
481 
482   fflush(fd);
483 }
484 
485 /* ****************************************************************
486    MX_HTML: <pre>
487    Query:
488      1>>>accession description # aa
489    Annotation:
490    Library:
491 **************************************************************** */
492 void
print_header2(FILE * fd,int qlib,char * info_qlabel,unsigned char ** aa0,const struct mngmsg * m_msp,const struct pstruct * ppst,const char * info_lib_range_p)493 print_header2(FILE *fd, int qlib, char *info_qlabel, unsigned char **aa0,
494 	      const struct mngmsg *m_msp, const struct pstruct *ppst,
495 	      const char * info_lib_range_p) {
496   int j;
497   char tmp_str[MAX_STR];
498   double db_tt;
499 
500   /* if (m_msp->markx & MX_HTML) fputs("<pre>\n",fd); */
501 
502   if (m_msp->std_output) {
503     if (qlib==1) {
504       fprintf(fd,"Query: %s\n", m_msp->tname);
505     }
506 
507     if (m_msp->qdnaseq == SEQT_DNA || m_msp->qdnaseq == SEQT_RNA) {
508       strncpy(tmp_str,(m_msp->qframe==1)? " (forward-only)" : "\0",sizeof(tmp_str));
509       tmp_str[sizeof(tmp_str)-1]='\0';
510     }
511     else tmp_str[0]='\0';
512 
513     fprintf(fd,"%3d>>>%s%s\n", qlib,
514 	   m_msp->qtitle,
515 	   (m_msp->revcomp ? " (reverse complement)" : tmp_str));
516 
517     /* check for annotation */
518     if (m_msp->ann_flg && m_msp->aa0a != NULL) {
519       fprintf(fd,"Annotation: ");
520       for (j=0; j<m_msp->n0; j++) {
521 	if (m_msp->aa0a[j] && m_msp->ann_arr[m_msp->aa0a[j]] != ' ' ) {
522 	  fprintf(fd,"|%ld:%c%c",
523 		  j+m_msp->q_off,m_msp->ann_arr[m_msp->aa0a[j]],ppst->sq[aa0[0][j]]);
524 	}
525       }
526     fprintf(fd,"\n");
527     }
528 
529     fprintf(fd,"Library: %s%s\n", m_msp->ltitle,info_lib_range_p);
530 
531     if (m_msp->db.carry==0) {
532       fprintf(fd, "  %7ld residues in %5ld sequences\n", m_msp->db.length, m_msp->db.entries);
533     }
534     else {
535       db_tt = (double)m_msp->db.carry*(double)LONG_MAX + (double)m_msp->db.length;
536       fprintf(fd, "  %.0f residues in %5ld library sequences\n", db_tt, m_msp->db.entries);
537     }
538 
539   }
540   else {
541     if ((m_msp->markx & (MX_M8OUT + MX_M8COMMENT)) == (MX_M8OUT+MX_M8COMMENT)) {
542       fprintf(fd,"# %s %s%s\n",prog_func,verstr,mp_verstr);
543       fprintf(fd,"# Query: %s\n",m_msp->qtitle);
544       fprintf(fd,"# Database: %s\n",m_msp->ltitle);
545     }
546   }
547   if (m_msp->markx & MX_HTML) fputs("</pre>\n",fd);
548   fflush(fd);
549 }
550 
551 /* **************************************************************** */
552 /*   before showbest                                                */
553 /* **************************************************************** */
print_header3(FILE * fd,int qlib,struct mngmsg * m_msp,struct pstruct * ppst)554 void print_header3(FILE *fd, int qlib, struct mngmsg *m_msp, struct pstruct *ppst) {
555 
556     if (m_msp->markx & MX_MBLAST2) {
557       if (qlib == 1) {
558 	fprintf(fd, "\nDatabase: %s\n     %12ld sequences; %ld total letters\n\n\n",
559 		m_msp->ltitle, m_msp->db.entries, m_msp->db.length);
560       }
561       fprintf(fd, "\nQuery= %s\nLength=%d\n", m_msp->qtitle, m_msp->n0);
562     }
563 }
564 
565 
566 /* **************************************************************** */
567 /* alignment tranistion                                             */
568 /* **************************************************************** */
print_header4(FILE * fd,char * info_qlabel,char * argv_line,char * info_gstring3,char * info_hstring_p[2],struct mngmsg * m_msp,struct pstruct * ppst)569 void print_header4(FILE *fd, char *info_qlabel, char *argv_line, char *info_gstring3, char *info_hstring_p[2],
570 		   struct mngmsg *m_msp, struct pstruct *ppst) {
571 
572 	if (m_msp->std_output && (m_msp->markx & (MX_AMAP+ MX_HTML + MX_M9SUMM)) && !(m_msp->markx & MX_M10FORM)) {
573 	  fprintf(fd,"\n>>>%s%s, %d %s vs %s library\n",
574 		  info_qlabel,(m_msp->revcomp ? "_rev":"\0"), m_msp->n0,
575 		  m_msp->sqnam,m_msp->lname);
576 	}
577 
578 	if (m_msp->markx & MX_M10FORM) {
579 	  fprintf(fd,"\n>>>%s%s, %d %s vs %s library\n",
580 		  info_qlabel,(m_msp->revcomp ? "-":"\0"), m_msp->n0, m_msp->sqnam,
581 		  m_msp->lname);
582 	  fprintf(fd,"; pg_name: %s\n",m_msp->pgm_name);
583 	  fprintf(fd,"; pg_ver: %s%s\n",verstr,mp_verstr);
584 	  fprintf(fd,"; pg_argv: %s",argv_line);
585 	  fputs(info_gstring3,fd);
586 	  fputs(info_hstring_p[0],fd);
587 	  fputs(info_hstring_p[1],fd);
588 	}
589 }
590 
print_header4a(FILE * outfd,struct mngmsg * m_msp)591 void print_header4a(FILE *outfd, struct mngmsg *m_msp) {
592   if (!(m_msp->markx & MX_M8OUT) && (m_msp->markx & (MX_M10FORM+MX_M9SUMM)) && m_msp->show_code != SHOW_CODE_ID) {
593     fprintf(outfd,">>><<<\n");
594   }
595 }
596 
print_header5(FILE * fd,int qlib,struct db_str * qtt,struct mngmsg * m_msp,struct pstruct * ppst,int in_mem,long tot_memK)597 void print_header5(FILE *fd, int qlib, struct db_str *qtt,
598 		   struct mngmsg *m_msp, struct pstruct *ppst,
599 		   int in_mem, long tot_memK) {
600 
601   /* for MX_MBLAST2, show some statistics results */
602   if (m_msp->markx & MX_MBLAST2) {
603     fprintf(fd,"\n\nLambda      K     H\n");
604     fprintf(fd," %6.3f  %6.3f  %6.3f\n\n",ppst->pLambda,ppst->pK,ppst->pH);
605     fprintf(fd,"\nGapped\nLambda\n");
606     fprintf(fd," %6.3f  %6.3f  %6.3f\n",ppst->pLambda,ppst->pK,ppst->pH);
607     fprintf(fd,"\nEffective search space used: %ld\n\n",m_msp->db.entries);
608   }
609 
610   if (m_msp->markx & MX_M8COMMENT) {
611     fprintf(fd, "# %s processed %d queries\n",prog_func,qlib);
612   }
613 
614   if ( !((m_msp->markx & MX_M8OUT) || (m_msp->markx & MX_HTML))
615        && (m_msp->markx & (MX_M10FORM+MX_M9SUMM))) {
616     fprintf(fd,">>>///\n");
617   }
618 
619   if ( m_msp->markx & MX_HTML) fputs("<pre>",fd);
620   if (m_msp->std_output) {
621     print_sum(fd, qtt, &m_msp->db, in_mem, tot_memK);}
622   if ( m_msp->markx & MX_HTML) fputs("</pre>\n",fd);
623 #ifdef HTML_HEAD
624   if (m_msp->markx & MX_HTML) fprintf(fd,"</body>\n</html>\n");
625 #endif
626 
627   if (m_msp->markx & MX_MBLAST2) {
628       fprintf(fd,"\n  Database: %s\n",m_msp->ltitle);
629       fprintf(fd,"  Number of letters in database: %ld\n",m_msp->db.length);
630       fprintf(fd,"  Number of sequences in database: %ld\n",m_msp->db.entries);
631       fprintf(fd,"\n\n\nMatrix: %s\n",ppst->pam_name);
632       fprintf(fd,"Gap Penalties: Existence: %d, Extension: %d\n",ppst->gdelval, ppst->ggapval);
633   }
634 }
635 
636 void
print_annot_header(FILE * fd,struct mngmsg * m_msp)637 print_annot_header(FILE *fd, struct mngmsg *m_msp) {
638   int i;
639 
640   if (m_msp->ann_arr_def[1]) {
641     if (m_msp->markx & MX_HTML) {fprintf(fd,"<pre>");}
642     fprintf(fd, "Annotation symbols:\n");
643     for (i=1; m_msp->ann_arr[i]; i++) {
644       if (m_msp->ann_arr_def[i]) {
645 	fprintf(fd, " %c : %s\n",m_msp->ann_arr[i], m_msp->ann_arr_def[i]);
646       }
647     }
648     if (m_msp->markx & MX_HTML) {fputs("</pre><hr />\n",fd);}
649   }
650 }
651 
652 extern int fa_max_workers;
653 
654 void
print_sum(FILE * fd,struct db_str * qtt,struct db_str * ntt,int in_mem,long tot_memK)655 print_sum(FILE *fd, struct db_str *qtt, struct db_str *ntt, int in_mem, long tot_memK)
656 {
657   double db_tt;
658   char tstr1[26], tstr2[26];
659   char memstr[256];
660 
661   strncpy(tstr1,ctime(&tdstart),sizeof(tstr1));
662   strncpy(tstr2,ctime(&tddone),sizeof(tstr1));
663   tstr1[24]=tstr2[24]='\0';
664 
665   /* Print timing to output file as well */
666 
667   fprintf(fd, "\n%ld residues in %ld query   sequences\n", qtt->length, qtt->entries);
668   if (ntt->carry == 0)
669     fprintf(fd, "%ld residues in %ld library sequences\n", ntt->length, ntt->entries);
670   else {
671     db_tt = (double)ntt->carry*(double)LONG_MAX + (double)ntt->length;
672     fprintf(fd, "%.0f residues in %ld library sequences\n", db_tt, ntt->entries);
673   }
674 
675   memstr[0]='\0';
676   if (tot_memK && in_mem != 0) {
677     sprintf(memstr," in memory [%ldG]",(tot_memK >> 20));
678   }
679 
680 #if !defined(COMP_THR) && !defined(PCOMPLIB)
681   fprintf(fd," Scomplib [%s%s]\n start: %s done: %s\n",verstr,mp_verstr,tstr1,tstr2);
682 #endif
683 #if defined(COMP_THR)
684   fprintf(fd," Tcomplib [%s%s] (%d proc%s)\n start: %s done: %s\n", verstr, mp_verstr,
685 	  fa_max_workers, memstr, tstr1,tstr2);
686 #endif
687 #if defined(PCOMPLIB)
688   fprintf(fd," Pcomplib [%s%s] (%d proc%s)\n start: %s done: %s\n", verstr, mp_verstr,
689 	  fa_max_workers, memstr, tstr1,tstr2);
690 #endif
691 #ifndef COMP_MLIB
692   fprintf(fd," Scan time: ");
693   ptime(fd, tscan - tprev);
694   fprintf (fd," Display time: ");
695   ptime (fd, tdone - tscan);
696 #else
697   fprintf(fd," Total Scan time: ");
698   ptime(fd, ttscan);
699   fprintf (fd," Total Display time: ");
700   ptime (fd, ttdisp);
701 #endif
702   fprintf (fd,"\n");
703   fprintf (fd, "\nFunction used was %s [%s%s]\n", prog_func,verstr,mp_verstr);
704 }
705 
706 extern double zs_to_Ec(double zs, long entries);
707 extern double zs_to_bit(double zs, int n0, int n1);
708 extern double zs_to_p(double zs);
709 
710 #include "aln_structs.h"
711 
712 void
prhist(FILE * fd,const struct mngmsg * m_msp,struct pstruct * ppst,struct hist_str hist,int nstats,int sstats,struct db_str ntt,char * stat_info2,char * lib_range,char ** info_gstring2,char ** info_hstring,long tscan)713 prhist(FILE *fd, const struct mngmsg *m_msp,
714        struct pstruct *ppst,
715        struct hist_str hist,
716        int nstats, int sstats,
717        struct db_str ntt,
718        char *stat_info2,
719        char *lib_range,
720        char **info_gstring2,
721        char **info_hstring,
722        long tscan)
723 {
724   int i,j,hl,hll, el, ell, ev;
725   char hline[80], pch, *bp;
726   int mh1, mht;
727   int maxval, maxvalt, dotsiz, ddotsiz,doinset;
728   double cur_e, prev_e, f_int;
729   double max_dev, x_tmp;
730   double db_tt;
731   int n_chi_sq, cum_hl=0, max_i=0, max_dev_i;
732   double zs10_off;
733 
734 
735   if (m_msp->markx & MX_HTML) fputs("<pre>\n",fd);
736   else {fprintf(fd,"\n");}
737 
738   if (ppst->zsflag_f < 0) {
739     if (!m_msp->nohist) {
740       fprintf(fd, "  %7ld residues in %5ld sequences", ntt.length,ntt.entries);
741       fprintf(fd, "%s\n",lib_range);
742     }
743     fprintf(fd,"Algorithm: %s\nParameters: %s\n",info_gstring2[0],info_gstring2[1]);
744     return;
745   }
746 
747   if (nstats > 20) {
748     zs10_off = ppst->zs_off * 10.0;
749 
750     max_dev = 0.0;
751     mh1 = hist.maxh-1;			/* max value for histogram */
752     mht = (3*hist.maxh-3)/4 - 1;	/* x-coordinate for expansion */
753     n_chi_sq = 0;
754 
755     if (!m_msp->nohist && mh1 > 0) {
756       for (i=0,maxval=0,maxvalt=0; i<hist.maxh; i++) {
757 	if (hist.hist_a[i] > maxval) maxval = hist.hist_a[i];
758 	if (i >= mht &&  hist.hist_a[i]>maxvalt) maxvalt = hist.hist_a[i];
759       }
760       cum_hl = -hist.hist_a[0];
761       dotsiz = (maxval-1)/60+1;
762       ddotsiz = (maxvalt-1)/50+1;
763       doinset = (ddotsiz < dotsiz && dotsiz > 2);
764 
765       if (ppst->zsflag_f>=0)
766 	fprintf(fd,"       opt      E()\n");
767       else
768 	fprintf(fd,"     opt\n");
769 
770       prev_e =  zs_to_Ec((double)(hist.min_hist-hist.histint/2)-zs10_off,hist.entries);
771       for (i=0; i<=mh1; i++) {
772 	pch = (i==mh1) ? '>' : ' ';
773 	pch = (i==0) ? '<' : pch;
774 	hll = hl = hist.hist_a[i];
775 	if (ppst->zsflag_f>=0) {
776 	  cum_hl += hl;
777 	  f_int = (double)(i*hist.histint+hist.min_hist)+(double)hist.histint/2.0;
778 	  cur_e = zs_to_Ec(f_int-zs10_off,hist.entries);
779 	  ev = el = ell = (int)(cur_e - prev_e + 0.5);
780 	  if (hl > 0  && i > 5 && i < (90-hist.min_hist)/hist.histint) {
781 	    x_tmp  = fabs(cum_hl - cur_e);
782 	    if ( x_tmp > max_dev) {
783 	      max_dev = x_tmp;
784 	      max_i = i;
785 	    }
786 	    n_chi_sq++;
787 	  }
788 	  if ((el=(el+dotsiz-1)/dotsiz) > 60) el = 60;
789 	  if ((ell=(ell+ddotsiz-1)/ddotsiz) > 40) ell = 40;
790 	  fprintf(fd,"%c%3d %5d %5d:",
791 		  pch,(i<mh1)?(i)*hist.histint+hist.min_hist :
792 		  mh1*hist.histint+hist.min_hist,hl,ev);
793 	}
794 	else fprintf(fd,"%c%3d %5d :",
795 		     pch,(i<mh1)?(i)*hist.histint+hist.min_hist :
796 		     mh1*hist.histint+hist.min_hist,hl);
797 
798 	if ((hl=(hl+dotsiz-1)/dotsiz) > 60) hl = 60;
799 	if ((hll=(hll+ddotsiz-1)/ddotsiz) > 40) hll = 40;
800 	for (j=0; j<hl; j++) hline[j]='=';
801 	if (ppst->zsflag_f>=0) {
802 	  if (el <= hl ) {
803 	    if (el > 0) hline[el-1]='*';
804 	    hline[hl]='\0';
805 	  }
806 	  else {
807 	    for (j = hl; j < el; j++) hline[j]=' ';
808 	    hline[el-1]='*';
809 	    hline[hl=el]='\0';
810 	  }
811 	}
812 	else hline[hl] = 0;
813 	if (i==1) {
814 	  for (j=hl; j<10; j++) hline[j]=' ';
815 	  sprintf(&hline[10]," one = represents %d library sequences",dotsiz);
816 	}
817 	if (doinset && i == mht-2) {
818 	  for (j = hl; j < 10; j++) hline[j]=' ';
819 	  sprintf(&hline[10]," inset = represents %d library sequences",ddotsiz);
820 	}
821 	if (i >= mht&& doinset ) {
822 	  for (j = hl; j < 10; j++) hline[j]=' ';
823 	  hline[10]=':';
824 	  for (j = 11; j<11+hll; j++) hline[j]='=';
825 	  hline[11+hll]='\0';
826 	  if (ppst->zsflag_f>=0) {
827 	    if (ell <= hll) hline[10+ell]='*';
828 	    else {
829 	      for (j = 11+hll; j < 10+ell; j++) hline[j]=' ';
830 	      hline[10+ell] = '*';
831 	      hline[11+ell] = '\0';
832 	    }
833 	  }
834 	}
835 
836 	fprintf(fd,"%s\n",hline);
837 	prev_e = cur_e;
838       }
839     }
840     max_dev_i = max_i*hist.histint+hist.min_hist;
841   }
842   else {
843     max_dev = 0.0;
844     n_chi_sq = 0;
845     max_i = 0;
846     max_dev_i = 0;
847   }
848 
849   if (ppst->zsflag_f >=0 ) {
850     if (!m_msp->nohist) {
851       if (ntt.carry==0) {
852 	fprintf(fd, "  %7ld residues in %5ld sequences", ntt.length, ntt.entries);
853       }
854       else {
855 	db_tt = (double)ntt.carry*(double)LONG_MAX + (double)ntt.length;
856 	fprintf(fd, "  %.0f residues in %5ld library sequences", db_tt, ntt.entries);
857       }
858       fprintf(fd, "%s\n",lib_range);
859     }
860     fprintf(fd,"Statistics: %s\n",hist.stat_info);
861     if (stat_info2) {
862       fprintf(fd," Statistics E2: %s\n",stat_info2);
863     }
864 
865 #ifdef SAMP_STATS
866     fprintf(fd," statistics sampled from %ld (%d) to %ld sequences\n",
867 	    (hist.entries > nstats ? nstats : hist.entries),sstats, hist.entries);
868 #else
869     fprintf(fd," statistics extrapolated from %ld to %ld sequences\n",
870 	    (hist.entries > nstats ? nstats : hist.entries),hist.entries);
871 #endif
872 
873     if (!m_msp->nohist && cum_hl > 0) {
874       fprintf(fd," Kolmogorov-Smirnov  statistic: %6.4f (N=%d) at %3d\n",
875 	      max_dev/(float)cum_hl, n_chi_sq,max_dev_i);
876     }
877     if (m_msp->markx & MX_M10FORM) {
878       while ((bp=strchr(hist.stat_info,'\n'))!=NULL) *bp=' ';
879       if (cum_hl <= 0) cum_hl = -1;
880       sprintf(info_hstring[0],"; mp_extrap: %d %ld\n; mp_stats: %s\n; mp_KS: %6.4f (N=%d) at %3d\n",
881 	      MAX_STATS,hist.entries,hist.stat_info,max_dev/(float)cum_hl,
882 	      n_chi_sq,max_dev_i);
883     }
884   }
885 
886   if (m_msp->markx & MX_M10FORM) {
887     if ((bp = strchr(info_gstring2[1],'\n'))!=NULL) *bp = ' ';
888     sprintf(info_hstring[1],"; mp_Algorithm: %s\n; mp_Parameters: %s\n",info_gstring2[0],info_gstring2[1]);
889     if (bp != NULL ) *bp = '\n';
890   }
891 
892   if (ppst->other_info != NULL) {
893     fputs(ppst->other_info, fd);
894   }
895 
896   fprintf(fd,"Algorithm: %s\nParameters: %s\n",info_gstring2[0],info_gstring2[1]);
897 
898   fprintf (fd," Scan time: ");
899   ptime(fd,tscan);
900   fprintf(fd,"\n");
901   if (!m_msp->annot1_sname[0] &&  m_msp->markx & MX_HTML) {
902     fputs("</pre>\n<hr />\n",fd);
903   }
904 
905   fflush(fd);
906 }
907 
908 extern char prog_name[], *verstr;
909 
910 #ifdef PCOMPLIB
911 #include "mpi.h"
912 #endif
913 
s_abort(char * p,char * p1)914 void s_abort (char *p,  char *p1)
915 {
916   int i;
917 
918   fprintf (stderr, "\n***[%s] %s%s***\n", prog_name, p, p1);
919 #ifdef PCOMPLIB
920   MPI_Abort(MPI_COMM_WORLD,1);
921   MPI_Finalize();
922 #endif
923   exit (1);
924 }
925 
w_abort(char * p,char * p1)926 void w_abort (char *p, char *p1)
927 {
928   fprintf (stderr, "\n***[%s] %s%s***\n\n", prog_name, p, p1);
929   exit (1);
930 }
931 
932 extern struct a_res_str *
933 build_ares_code(unsigned char *aa0, int n0,
934 		unsigned char *aa1, struct seq_record *seq,
935 		int frame, int *have_ares, int repeat_thresh,
936 		const struct mngmsg *m_msp, struct pstruct *ppst,
937 		void *f_str
938 		);
939 
940 extern struct lmf_str *
941 re_openlib(struct lmf_str *, int outtty);
942 
943 #define MAX_BLINE 2048
944 #define RANLIB (m_fptr->ranlib)
945 
946 extern int
947 re_getlib(unsigned char *, struct annot_str **,
948 	  int, int, int, int, int, long *, long *,
949 	  struct lmf_str *m_fptr);
950 
951 /*
952    pre_load_best loads a set of sequences using re_getlib
953 
954    it should be used for getting sequences for shuffling, and for showbest() if m_msg->quiet
955 
956    it both opens the m_file_p buffer, gets the bline[] descriptions,
957    and reads the actual sequences.  In reading the sequences, it
958    should first allocate one large buffer so that individual buffers do not need to be freed.
959 */
960 
961 void
pre_load_best(unsigned char * aa1save,int maxn,struct beststr ** bbp_arr,int nbest,struct mngmsg * m_msp,int debug)962 pre_load_best(unsigned char *aa1save, int maxn,
963 	      struct beststr **bbp_arr, int nbest,
964 	      struct mngmsg *m_msp, int debug)
965 {
966   int i, n1, bl_len, tmp_bline_len, l_llen;
967   int seq_buf_len;
968   char bline[MAX_BLINE];
969   unsigned char  *seq_buf_p;
970   char *bline_buf_p;
971 
972   struct beststr *bbp;
973   struct lmf_str *m_fptr;
974 
975   /*
976      calculate how much room we need for sequences and blines
977   */
978 
979   if (m_msp->pre_load_done) return;
980 
981   seq_buf_len = 1;
982   for (i=0; i<nbest; i++) {
983     /* we are not (currently) allocating more than n1+1, because alignment is not vectorized,
984        if it were vectorized, we would need n+16
985     */
986 #ifdef DEBUG
987     if (bbp_arr[i]->n1 != bbp_arr[i]->seq->n1) {
988       fprintf(stderr,"*** error [%s:%d] - n1 (%d) != seq->n1 (%d)\n",
989 	      __FILE__, __LINE__, bbp_arr[i]->n1, bbp_arr[i]->seq->n1);
990     }
991 #endif
992 
993     if (bbp_arr[i]->seq->aa1b == NULL) {
994       seq_buf_len += bbp_arr[i]->seq->n1 + 1;
995     }
996   }
997 
998   /* have required sequence space (seq_buf_len), allocate it */
999 
1000   if ((m_msp->aa1save_buf_b=(unsigned char *)calloc(seq_buf_len, sizeof(char)))==NULL) {
1001     fprintf(stderr, "*** error [%s:%d] - cannot allocate space[%d] for sequence encoding\n",
1002 	    __FILE__, __LINE__, seq_buf_len);
1003     exit(1);
1004   }
1005   else {
1006     seq_buf_p = m_msp->aa1save_buf_b+1;		/* ensure there is an initial '\0' */
1007   }
1008 
1009   /* adjust description line length */
1010   l_llen = m_msp->aln.llen;
1011   if ((m_msp->markx & MX_M9SUMM) && m_msp->show_code != SHOW_CODE_ID) {
1012     l_llen += 40;
1013     if (l_llen > 200) l_llen=200;
1014   }
1015 
1016   tmp_bline_len = sizeof(bline)-1;
1017   if (!(m_msp->markx & MX_M10FORM) && !m_msp->long_info) {tmp_bline_len = l_llen-5;}
1018 
1019   /* allocate more bline than we need for simplicity */
1020   if ((bline_buf_p=m_msp->bline_buf_b=(char *)calloc(nbest*tmp_bline_len, sizeof(char)))==NULL) {
1021     fprintf(stderr, "*** error [%s:%d] - cannot allocate space[%d] for bline descriptions\n",
1022 	    __FILE__, __LINE__,  nbest*tmp_bline_len);
1023     exit(1);
1024   }
1025 
1026   for (i=0; i<nbest; i++) {
1027     bbp = bbp_arr[i];
1028 
1029     if ((m_fptr=re_openlib(bbp->mseq->m_file_p,!m_msp->quiet))==NULL) {
1030       fprintf(stderr,"*** error [%s:%d] - cannot re-open %s\n",
1031 	      __FILE__, __LINE__, bbp->mseq->m_file_p->lb_name);
1032       exit(1);
1033     }
1034     RANLIB(bline,tmp_bline_len,bbp->mseq->lseek,bbp->mseq->libstr,m_fptr);
1035     bl_len = strlen(bline);
1036     bbp->mseq->bline = bline_buf_p;
1037     bbp->mseq->bline_max = m_msp->aln.llen;
1038     strncpy(bbp->mseq->bline, bline, bl_len);
1039     bline_buf_p += bl_len+1;
1040 
1041     /* make sure we get annotation if present, and sequence if necessary */
1042     if (bbp->seq->aa1b==NULL || (m_msp->ann_flg==1 && bbp->seq->annot_p==NULL)) {
1043       n1 = re_getlib(aa1save, (m_msp->ann_flg==1) ? &(bbp->seq->annot_p) : NULL,
1044 		     maxn,m_msp->ldb_info.maxt3, m_msp->ldb_info.l_overlap,
1045 		     bbp->mseq->cont,m_msp->ldb_info.term_code,
1046 		     &bbp->seq->l_offset,&bbp->seq->l_off,bbp->mseq->m_file_p);
1047       if (n1 != bbp->seq->n1) {
1048 	fprintf(stderr,"*** error [%s:%d] - n1[%d/%d] != n1[%d] from re_getlib() at %s [maxn:%d/maxt3:%d]\n",
1049 		__FILE__, __LINE__,
1050 		bbp->n1, bbp->seq->n1, n1, bbp->mseq->libstr, maxn, m_msp->ldb_info.maxt3);
1051       }
1052 
1053 #ifdef DEBUG
1054       if (adler32(1L,aa1save,n1)!=bbp->adler32_crc) {
1055 	fprintf(stderr,"*** error [%s:%d] - adler32_crc from re_getlib() at %d(%d):  %s\n",
1056 		__FILE__, __LINE__,
1057 		bbp->mseq->index,bbp->n1, bline);
1058       }
1059 #endif
1060 
1061       /* if we don't have the sequence in the aa1b buffer, copy it from re_getlib */
1062       if (bbp->seq->aa1b == NULL)  {
1063 	bbp->seq->aa1b = seq_buf_p;
1064 	memcpy(bbp->seq->aa1b, aa1save, bbp->seq->n1+1);
1065 	seq_buf_p += bbp->seq->n1+1;
1066       }
1067     }
1068   }
1069 
1070   /* here, we are getting query annots after all the bptr[]s have been processed */
1071   /* moved to comp_lib9.c */
1072   /*
1073   if (m_msp->annot0_sname[0]) {
1074     if (get_annot(m_msp->annot0_sname, m_msp, m_msp->qtitle, m_msp->q_offset+m_msp->q_off-1,m_msp->n0, &m_msp->annot_p, 0, debug) < 0) {
1075       fprintf(stderr,"*** error [%s:%d] - %s did not produce annotations\n",__FILE__, __LINE__, m_msp->annot0_sname);
1076       m_msp->annot0_sname[0] = '\0';
1077     }
1078     if (m_msp->annot_p && m_msp->annot_p->n_annot > 0) {
1079       m_msp->aa0a = m_msp->annot_p->aa1_ann;
1080     }
1081     if (!m_msp->ann_arr[0]) {m_msp->ann_arr[0] = ' '; m_msp->ann_arr[1] = '\0';}
1082   }
1083   */
1084 
1085   /* if we have an variant annotation script, execute it and capture the output */
1086   /* must do after bline is set */
1087   if (m_msp->annot1_sname[0]) {
1088     if (get_annot_list(m_msp->annot1_sname, m_msp, bbp_arr, nbest, 1, debug)< 0) {
1089       fprintf(stderr,"*** error [%s:%d] - %s did not produce annotations for %s\n",__FILE__, __LINE__, m_msp->annot1_sname,m_msp->qtitle);
1090       m_msp->annot1_sname[0] = '\0';
1091     };
1092     if (!m_msp->ann_arr[0]) {m_msp->ann_arr[0] = ' '; m_msp->ann_arr[1] = '\0';}
1093   }
1094 
1095   m_msp->pre_load_done = 1;
1096 }
1097 
1098 /*  merge_ares_chains()
1099 
1100     seeks to merge two ares chains, producing a single chain that is
1101     sorted by sw_score.
1102 
1103     Strategy -- choose the chain with the highest score, and go down
1104     it until the head of the other chain has higher score, then link
1105     the other chain to the main chain, breaking the first, and
1106     continue the process.
1107 
1108     The two pointers, max_next and alt_next, keep track of the best
1109     and the alternate chain
1110  */
1111 
1112 
1113 #undef SHOW_MERGE_CHAIN
1114 
1115 struct a_res_str *
merge_ares_chains(struct a_res_str * cur_ares,struct a_res_str * tmp_ares,int score_ix,const char * msg)1116 merge_ares_chains(struct a_res_str *cur_ares,
1117 		  struct a_res_str *tmp_ares,
1118 		  int score_ix,
1119 		  const char *msg)
1120 {
1121   struct a_res_str *max_next, *max_ares, *alt_ares, *prev_next;
1122 
1123   if (!tmp_ares) return cur_ares;
1124 
1125 #ifdef SHOW_MERGE_CHAIN
1126   fprintf(stderr,"cur_ares->");
1127   for (max_next = cur_ares; max_next; max_next = max_next->next) {
1128     fprintf(stderr,"%d->",max_next->rst.score[score_ix]);
1129   }
1130 
1131   fprintf(stderr,"||\n");
1132   fprintf(stderr,"tmp_ares->");
1133   for (max_next = tmp_ares; max_next; max_next = max_next->next) {
1134     fprintf(stderr,"%d->",max_next->rst.score[score_ix]);
1135   }
1136   fprintf(stderr,"||\n");
1137 #endif
1138 
1139   /* start with the maximum score */
1140 
1141   if (cur_ares->rst.score[score_ix] >= tmp_ares->rst.score[score_ix]) {
1142     max_ares = max_next = prev_next = cur_ares;
1143     alt_ares = tmp_ares;
1144   }
1145   else {
1146     max_ares = max_next = prev_next = tmp_ares;
1147     alt_ares = cur_ares;
1148   }
1149 
1150   while (max_next && alt_ares) {
1151     /* this is guaranteed true for the first iteration */
1152     while (max_next && max_next->rst.score[score_ix] >= alt_ares->rst.score[score_ix]) {
1153       prev_next = max_next;
1154       max_next = max_next->next;
1155     }
1156     if (max_next==NULL) break;
1157     else {	/* max_next->rst.score[score_ix] no longer greater, switch
1158 		   pointers */
1159       prev_next->next = alt_ares;
1160       alt_ares = max_next;
1161       max_next = prev_next->next;
1162     }
1163   }
1164 
1165   /* we quit whenever max_next or alt_ares == NULL; if
1166      (max_next==NULL), then continue adding the rest of alt_ares */
1167 
1168   if (max_next==NULL) {
1169     prev_next->next = alt_ares;
1170   }
1171 
1172 
1173 #ifdef SHOW_MERGE_CHAIN
1174   fprintf(stderr,"[%s] merge_ares->",msg);
1175   for (max_next = max_ares; max_next; max_next = max_next->next) {
1176     fprintf(stderr,"%d->",max_next->rst.score[score_ix]);
1177   }
1178   fprintf(stderr,"||\n\n");
1179 #endif
1180 
1181   return max_ares;
1182 }
1183 
1184 /* copies from from to to shuffling */
1185 
1186 extern int my_nrand(int, void *);
1187 
1188 void
shuffle(unsigned char * from,unsigned char * to,int n,void * rand_state)1189 shuffle(unsigned char *from, unsigned char *to, int n, void *rand_state)
1190 {
1191   int i,j; unsigned char tmp;
1192 
1193   if (from != to) memcpy((void *)to,(void *)from,n);
1194 
1195   for (i=n; i>0; i--) {
1196     j = my_nrand(i, rand_state);
1197     tmp = to[j];
1198     to[j] = to[i-1];
1199     to[i-1] = tmp;
1200   }
1201   to[n] = 0;
1202 }
1203 
1204 /* shuffles DNA sequences as codons */
1205 void
shuffle3(unsigned char * from,unsigned char * to,int n,void * rand_state)1206 shuffle3(unsigned char *from, unsigned char *to, int n, void *rand_state)
1207 {
1208   int i, j, i3,j3; unsigned char tmp;
1209   int n3;
1210 
1211   if (from != to) memcpy((void *)to,(void *)from,n);
1212 
1213   n3 = n/3;
1214 
1215   for (i=n3; i>0; i--) {
1216     j = my_nrand(i, rand_state);
1217     i3 = i*3;
1218     j3 = j*3;
1219     tmp = to[j3];
1220     to[j3] = to[i3-1];
1221     to[i3-1] = tmp;
1222     tmp = to[j3+1];
1223     to[j3+1] = to[i3];
1224     to[i3] = tmp;
1225     tmp = to[j3+2];
1226     to[j3+2] = to[i3+1];
1227     to[i3+1] = tmp;
1228   }
1229   to[n] = 0;
1230 }
1231 
1232 /* "shuffles" by reversing the sequence */
1233 void
rshuffle(unsigned char * from,unsigned char * to,int n)1234 rshuffle(unsigned char *from, unsigned char *to, int n)
1235 {
1236   unsigned char *ptr = from + n;
1237 
1238   while (n-- > 0) {
1239     *to++ = *ptr--;
1240   }
1241   *to = '\0';
1242 }
1243 
1244 static int ieven = 0;
1245 /* copies from from to from shuffling, ieven changed for threads */
1246 void
wshuffle(unsigned char * from,unsigned char * to,int n,int wsiz,void * rand_state)1247 wshuffle(unsigned char *from, unsigned char *to, int n, int wsiz, void *rand_state)
1248 {
1249   int i,j, k, mm;
1250   unsigned char tmp, *top;
1251 
1252   memcpy((void *)to,(void *)from,n);
1253 
1254   mm = n%wsiz;
1255 
1256   if (ieven) {
1257     for (k=0; k<(n-wsiz); k += wsiz) {
1258       top = &to[k];
1259       for (i=wsiz; i>0; i--) {
1260 	j = my_nrand(i, rand_state);
1261 	tmp = top[j];
1262 	top[j] = top[i-1];
1263 	top[i-1] = tmp;
1264       }
1265     }
1266     top = &to[n-mm];
1267     for (i=mm; i>0; i--) {
1268       j = my_nrand(i, rand_state);
1269       tmp = top[j];
1270       top[j] = top[i-1];
1271       top[i-1] = tmp;
1272     }
1273     ieven = 0;
1274   }
1275   else {
1276     for (k=n; k>=wsiz; k -= wsiz) {
1277       top = &to[k-wsiz];
1278       for (i=wsiz; i>0; i--) {
1279 	j = my_nrand(i, rand_state);
1280 	tmp = top[j];
1281 	top[j] = top[i-1];
1282 	top[i-1] = tmp;
1283       }
1284     }
1285     top = &to[0];
1286     for (i=mm; i>0; i--) {
1287       j = my_nrand(i, rand_state);
1288       tmp = top[j];
1289       top[j] = top[i-1];
1290       top[i-1] = tmp;
1291     }
1292     ieven = 1;
1293   }
1294   to[n] = 0;
1295 }
1296 
1297 int
sfn_cmp(int * q,int * s)1298 sfn_cmp(int *q, int *s)
1299 {
1300   if (*q == *s) return *q;
1301   while (*q && *s) {
1302     if (*q == *s) return *q;
1303     else if (*q < *s) q++;
1304     else if (*q > *s) s++;
1305   }
1306   return 0;
1307 }
1308 
1309 #ifndef MPI_SRC
1310 void
revcomp(unsigned char * seq,int n,int * c_nt)1311 revcomp(unsigned char *seq, int n, int *c_nt)
1312 {
1313   unsigned char tmp;
1314   int i, ni;
1315 
1316   for (i=0, ni = n-1; i< n/2; i++,ni--) {
1317     tmp = c_nt[seq[i]];
1318     seq[i] = c_nt[seq[ni]];
1319     seq[ni] = tmp;
1320   }
1321   if ((n%2)==1) {
1322     i = n/2;
1323     seq[i] = c_nt[seq[i]];
1324   }
1325   seq[n]=0;
1326 }
1327 #endif
1328 
1329 /* check to see whether this score (or a shuff score) should
1330    be included in statistics */
samp_stats_idx(int * pre_nstats,int nstats,void * rand_state)1331 int samp_stats_idx (int *pre_nstats, int nstats, void *rand_state) {
1332   int jstats = -1;
1333 
1334   /* this code works when every score can be used for statistics
1335      estimates, but fails for fasta/[t]fast[xy] where only a fraction
1336      of scores are used */
1337 
1338   if (*pre_nstats < MAX_STATS) {
1339     jstats = (*pre_nstats)++;
1340   }
1341 
1342   /* here, the problem is that while we may have pre_nstats
1343      possible samplings, in some cases (-M subsets, fasta,
1344      [t]fast[xy] we don't have MAX_STATS samples yet.  Until we
1345      have MAX_STATS, we want more.  But the stats_idx strategy
1346      means that there may be additional samples in the buffers
1347      that are not reflected in nstats.
1348   */
1349 
1350   else {
1351 #ifdef SAMP_STATS_LESS
1352     /* now we have MAX_STATS samples
1353        we want to sample 1/2 of 60K - 120K, 1/3 of 120K - 180K, etc */
1354     /* check every 15K to see if we have gone past the next threshold */
1355 
1356     /* pre_nstats cannot be incremented before the % to ensure
1357        that stats_inc is incremented exactly at 60000, 120000, etc.
1358        use ">=" in case increment comes later
1359        tests suggest the first 60K are sampled about 25% more
1360        than the rest
1361     */
1362     if (nstats < MAX_STATS) {
1363       jstats = MAX_STATS - my_nrand(MAX_STATS - nstats, rand_state)-1;
1364     }
1365     else if (((*pre_nstats)++ % (MAX_STATS/4)) == 0 &&
1366 	     *pre_nstats >= stats_inc * MAX_STATS) {
1367       stats_inc = (*pre_nstats / MAX_STATS) + 1;
1368     }
1369     if ((*pre_nstats % stats_inc) == 0) {
1370       jstats = my_nrand(MAX_STATS, rand_state);
1371     }
1372 #else
1373     /* this sampling strategy calls my_nrand() for every
1374        sequence > 60K, but provides a very uniform sampling */
1375     jstats = my_nrand(++(*pre_nstats), rand_state);
1376     if (jstats >= MAX_STATS) { jstats = -1;}
1377 #endif
1378   }
1379   return jstats;
1380 }
1381 
1382 /* **************************************************************** */
1383 /* build_link_data -- produces fasta file from m_msp->
1384    (1) generate a temporary file name
1385    (2) write out accessions \t expects to the temporary file
1386    (3) run script against temporary file, producing fasta_file_expansion_file
1387    (4) return fasta expansion filename for standard fasta openlib().
1388 
1389    returns: the expansion library file name
1390             **link_link_file_p is the name of the file with the data
1391               that will be removed.
1392 */
1393 /* **************************************************************** */
1394 char *
build_link_data(char ** link_lib_file_p,struct mngmsg * m_msp,struct beststr ** bestp_arr,int debug)1395 build_link_data(char **link_lib_file_p,
1396 		struct mngmsg *m_msp, struct beststr **bestp_arr,
1397 		int debug) {
1398   int i, status;
1399   char tmp_line[MAX_SSTR];
1400   char link_acc_file[MAX_STR];
1401   int link_acc_fd;
1402   char *link_lib_file;
1403   char *link_lib_str;
1404   char link_script[MAX_LSTR];
1405   int link_lib_type;
1406   char *bp, *link_bp;
1407   FILE *link_fd=NULL;		/* file for link accessions */
1408 
1409 #ifndef UNIX
1410   return NULL;
1411 #else
1412   /* get two tmpfiles, one for accessions, one for library */
1413   link_acc_file[0] = '\0';
1414 
1415   if ((link_lib_file=(char *)calloc(MAX_STR,sizeof(char)))==NULL) {
1416     fprintf(stderr,"*** error [%s:%d] - [build_link_data] Cannot allocate link_lib_file",
1417 	    __FILE__, __LINE__);
1418   }
1419   link_lib_file[0] = '\0';
1420 
1421   if ((bp=getenv("TMP_DIR"))!=NULL) {
1422     strncpy(link_acc_file,bp,sizeof(link_acc_file));
1423     link_acc_file[sizeof(link_acc_file)-1] = '\0';
1424     SAFE_STRNCAT(link_acc_file,"/",sizeof(link_acc_file));
1425   }
1426 
1427   SAFE_STRNCAT(link_acc_file,"link_acc_XXXXXX",sizeof(link_acc_file));
1428   link_acc_fd = mkstemp(link_acc_file);
1429   strncpy(link_lib_file,link_acc_file,MAX_STR);
1430   link_acc_file[sizeof(link_acc_file)-1] = '\0';
1431   SAFE_STRNCAT(link_lib_file,".lib",MAX_STR);
1432 
1433   /* write out accessions to link_acc_file */
1434   if ((link_fd =fdopen(link_acc_fd,"w"))==NULL) {
1435     fprintf(stderr,"*** error [%s:%d] - Cannot open link_acc_file: %s\n",
1436 	    __FILE__, __LINE__, link_acc_file);
1437     goto no_links;
1438   }
1439 
1440   for (i=0; i<m_msp->nskip + m_msp->nshow; i++) {
1441     if ((bp=strchr(bestp_arr[i]->mseq->bline,' '))!=NULL) {
1442       *bp = '\0';
1443     }
1444     fprintf(link_fd,"%s\t%.3g\n",bestp_arr[i]->mseq->bline,bestp_arr[i]->rst.escore);
1445     if (bp != NULL) *bp=' ';
1446   }
1447   fclose(link_fd);
1448 
1449   /* build link_script link_acc_file > link_lib_file */
1450   /* check for indirect */
1451   link_bp = &m_msp->link_lname[0];
1452   if (*link_bp == '!') {
1453     link_bp++;
1454   }
1455   if (*link_bp == '@') {
1456     link_bp++;
1457   }
1458 
1459   /* remove library type */
1460   if ((bp=strchr(link_bp,' '))!=NULL) {
1461     *bp = '\0';
1462     sscanf(bp+1,"%d",&link_lib_type);
1463   }
1464   else {
1465     link_lib_type = 0;
1466   }
1467 
1468   strncpy(link_script,link_bp,sizeof(link_script));
1469   link_script[sizeof(link_script)-1] = '\0';
1470   SAFE_STRNCAT(link_script," ",sizeof(link_script));
1471   SAFE_STRNCAT(link_script,link_acc_file,sizeof(link_script));
1472   SAFE_STRNCAT(link_script," >",sizeof(link_script));
1473   SAFE_STRNCAT(link_script,link_lib_file,sizeof(link_script));
1474 
1475   /* un-edit m_msp->link_lname */
1476   if (bp != NULL) *bp = ' ';
1477 
1478   /* run link_script link_acc_file > link_lib_file */
1479   status = system(link_script);
1480   if (!debug) {
1481 #ifdef UNIX
1482     unlink(link_acc_file);
1483 #else
1484     _unlink(link_acc_file);
1485 #endif
1486   }
1487 
1488   if (status == NO_FILE_EXIT) {	/* my specific return for no links */
1489     goto no_links;
1490   }
1491 
1492   if (status < 0 || status == 127) {
1493     fprintf(stderr,"*** error [%s:%d] - script: %s failed\n",
1494 	    __FILE__, __LINE__,link_script);
1495     goto no_links;
1496   }
1497 
1498   if ((link_fd=fopen(link_lib_file,"r"))==NULL) {
1499     goto no_links;
1500   }
1501   else fclose(link_fd);
1502 
1503   if ((link_lib_str=(char *)calloc(MAX_STR,sizeof(char)))==NULL) {
1504     fprintf(stderr,"*** error [%s:%d] - [build_link_data] Cannot allocate link_lib_str",
1505 	    __FILE__, __LINE__);
1506   }
1507 
1508   /* build the file string (possibly @link_lib_file libtype) */
1509   link_lib_str[0]='\0';
1510   if (m_msp->link_lname[0] == '@') {
1511     SAFE_STRNCAT(link_lib_str,"@",MAX_STR);
1512   }
1513   SAFE_STRNCAT(link_lib_str,link_lib_file,MAX_STR);
1514   if (link_lib_type > 0) {
1515     sprintf(tmp_line," %d",link_lib_type);
1516     SAFE_STRNCAT(link_lib_str,tmp_line,MAX_STR);
1517   }
1518 
1519   *link_lib_file_p = link_lib_file;
1520   return link_lib_str;
1521 
1522  no_links:
1523   free(link_lib_file);
1524   *link_lib_file_p = NULL;
1525   return NULL;
1526 #endif
1527 }
1528 
1529 /* **************************************************************** */
1530 /* build_lib_db -- produces fasta file from script
1531    (1) generate a temporary file name lib_db_file
1532    (2) run script producing data in lib_db_file
1533 
1534    returns: the expansion library file name
1535             **db_str_file_p is the name of the file with the data
1536               that will be removed.
1537 */
1538 /* **************************************************************** */
1539 char *
build_lib_db(char * script_file)1540 build_lib_db(char *script_file) {
1541   int i, status;
1542   char tmp_line[MAX_SSTR];
1543   char *lib_db_file, *lib_db_str;
1544   char lib_db_script[MAX_LSTR];
1545   int lib_db_indirect;
1546   int lib_db_type;
1547   int lib_db_str_len;
1548   char *bp, *lib_bp;
1549 
1550   if ((lib_db_file=(char *)calloc(MAX_STR,sizeof(char)))==NULL) {
1551     fprintf(stderr,"*** error [%s:%d] - [build_lib_db] Cannot allocate lib_db_file",
1552 	    __FILE__, __LINE__);
1553     goto no_lib;
1554   }
1555 
1556   if ((bp=getenv("TMP_DIR"))!=NULL) {
1557     strncpy(lib_db_file,bp,MAX_STR);
1558     lib_db_file[sizeof(lib_db_file)-1] = '\0';
1559     SAFE_STRNCAT(lib_db_file,"/",sizeof(lib_db_file));
1560   }
1561 
1562   SAFE_STRNCAT(lib_db_file,"lib_db_XXXXXX",MAX_STR);
1563   mktemp(lib_db_file);
1564   lib_db_str_len = strlen(lib_db_file)+1;
1565 
1566   /* check for indirect */
1567   lib_bp = script_file;
1568   if (*lib_bp == '@') {
1569     lib_bp++;
1570     lib_db_str_len++;
1571   }
1572   /* remove library type */
1573   if ((bp=strchr(lib_bp,' '))!=NULL) {
1574     *bp = '\0';
1575     sscanf(bp+1,"%d",&lib_db_type);
1576     lib_db_str_len += (strlen(bp+1)+1);
1577   }
1578   else {
1579     lib_db_type = 0;
1580   }
1581 
1582   strncpy(lib_db_script,lib_bp,sizeof(lib_db_script));
1583   lib_db_script[sizeof(lib_db_script)-1] = '\0';
1584   SAFE_STRNCAT(lib_db_script," >",sizeof(lib_db_script));
1585   SAFE_STRNCAT(lib_db_script,lib_db_file,sizeof(lib_db_script));
1586 
1587   if (bp != NULL) *bp = ' ';
1588 
1589   /* run lib_db_script link_acc_file > lib_db_file */
1590   status = system(lib_db_script);
1591 
1592   if (status == NO_FILE_EXIT) {	/* my specific return for no links */
1593     goto no_lib;
1594   }
1595 
1596   if (status < 0 || status == 127) {
1597     fprintf(stderr,"*** error [%s:%d] - [build_lib_db] script: %s failed\n",
1598 	    __FILE__, __LINE__, lib_db_script);
1599     goto no_lib;
1600   }
1601 
1602   /* build the file string (possibly @lib_db_str libtype) */
1603   if ((lib_db_str=calloc(lib_db_str_len+1,sizeof(char)))==NULL) {
1604     fprintf(stderr,"*** error [%s:%d] - [build_lib_db] cannot allocate lib_db_str[%d]\n",
1605 	    __FILE__, __LINE__, lib_db_str_len+1);
1606     goto no_lib;
1607   }
1608   lib_db_str[0]='\0';
1609   if (*script_file == '@') {
1610     SAFE_STRNCAT(lib_db_str,"@",MAX_STR);
1611   }
1612   SAFE_STRNCAT(lib_db_str,lib_db_file,MAX_STR);
1613   if (lib_db_type > 0) {
1614     sprintf(tmp_line," %d",lib_db_type);
1615     SAFE_STRNCAT(lib_db_str,tmp_line,MAX_STR);
1616   }
1617 
1618   return lib_db_str;
1619 
1620  no_lib:
1621   return NULL;
1622 }
1623 
1624 /* used to temporarily allocate annotation array in next_annot_entry()*/
1625 struct annot_mstr {
1626   int max_annot;
1627   struct annot_entry *tmp_arr_p;
1628 };
1629 
1630 /* init_tmp_annot_mstr(size) intializes the structure used to track annots  */
1631 int
init_tmp_annot(struct annot_mstr * this,int size)1632 init_tmp_annot(struct annot_mstr *this, int size) {
1633   struct annot_entry *tmp_ann_astr;
1634 
1635   /* only reset if array is NULL */
1636   if (this->tmp_arr_p == NULL || this->max_annot <= 0) {
1637     this->max_annot = 32;
1638     if ((this->tmp_arr_p=(struct annot_entry *)calloc(this->max_annot, sizeof(struct annot_entry)))==NULL) {
1639       fprintf(stderr,"*** error [%s:%d] - cannot allocate annot_entry[%d]\n",
1640 		__FILE__,__LINE__,this->max_annot);
1641 	return 0;
1642     }
1643   }
1644   return 1;
1645 }
1646 
1647 int
update_tmp_annot(struct annot_mstr * this)1648 update_tmp_annot(struct annot_mstr *this) {
1649 
1650   this->max_annot += (this->max_annot/2);
1651   if ((this->tmp_arr_p= (struct annot_entry *)realloc(this->tmp_arr_p, this->max_annot*sizeof(struct annot_entry)))==NULL) {
1652     fprintf(stderr,"[*** error [%s:%d] - cannot reallocate tmp_ann_astr[%d]\n",
1653 	    __FILE__, __LINE__, this->max_annot);
1654     return 0;
1655   }
1656   return 1;
1657 }
1658 
1659 struct annot_str *
1660 next_annot_entry(FILE *annot_fd, char *tmp_line, int n_tmp_line,
1661 		 struct annot_str *annot_p,
1662 		 struct annot_mstr *mtmp_annot_p,
1663 		 struct mngmsg *m_msp, int target);
1664 
1665 /* **************************************************************** */
1666 /* get_annot_list -- produces fasta file from sname
1667    if sname[0]=='<', read the file directly, goto (4)
1668    if sname[0]=='!', run a script
1669    (1) generate a temporary file name
1670    (2) write out list of blines
1671    (3) run m_msp->annot1_sname[] script against temporary file, producing table of annotations
1672 
1673    (4) read in the annotations and merge them into beststr
1674    (5) return number of annotations
1675 */
1676 /* **************************************************************** */
1677 
1678 int
get_annot_list(char * sname,struct mngmsg * m_msp,struct beststr ** bestp_arr,int nbest,int target,int debug)1679 get_annot_list(char *sname, struct mngmsg *m_msp, struct beststr **bestp_arr, int nbest,
1680 	       int target, int debug) {
1681   int i, status;
1682   long l_offset;
1683   char tmp_line[MAX_STR];
1684   char annot_bline_file[MAX_STR];
1685   int annot_bline_fd;
1686   char *annot_descr_file;
1687   char annot_script[MAX_LSTR];
1688   struct annot_str *annot_p;
1689   char *bp;
1690   int annot_seq_cnt;
1691   FILE *annot_fd=NULL;		/* file for annot accessions */
1692   struct annot_mstr mtmp_annot;
1693 
1694 #ifndef UNIX
1695   return 0;
1696 #else
1697 
1698   if (sname[0] == '!') {
1699 
1700     /* get two tmpfiles, one for bline, one for returned annotations
1701        (but it would make more sense to use popen() to get the
1702        annotations back
1703     */
1704 
1705     annot_bline_file[0] = '\0';
1706 
1707     if ((annot_descr_file=(char *)calloc(MAX_STR,sizeof(char)))==NULL) {
1708       fprintf(stderr,"*** error [%s:%d] - [get_annot_list] Cannot allocate annot_file",
1709 	      __FILE__, __LINE__);
1710     }
1711     annot_descr_file[0] = '\0';
1712 
1713     if ((bp=getenv("TMP_DIR"))!=NULL) {
1714       strncpy(annot_bline_file,bp,sizeof(annot_bline_file));
1715       annot_bline_file[sizeof(annot_bline_file)-1] = '\0';
1716       SAFE_STRNCAT(annot_bline_file,"/",sizeof(annot_bline_file));
1717     }
1718 
1719     SAFE_STRNCAT(annot_bline_file,"annot_bline_XXXXXX",sizeof(annot_bline_file));
1720     annot_bline_fd = mkstemp(annot_bline_file);
1721     strncpy(annot_descr_file,annot_bline_file,MAX_STR);
1722     annot_bline_file[sizeof(annot_bline_file)-1] = '\0';
1723     SAFE_STRNCAT(annot_descr_file,".annot",MAX_STR);
1724 
1725     /* write out accessions to annot_bline_file */
1726     if ((annot_fd =fdopen(annot_bline_fd,"w"))==NULL) {
1727       fprintf(stderr,"*** error [%s:%d] - Cannot open annot_bline_file: %s\n",__FILE__, __LINE__, annot_bline_file);
1728       goto no_annots;
1729     }
1730 
1731     for (i=0; i<nbest; i++) {
1732       if (bestp_arr[i]->mseq->annot_req_flag) {	continue; }
1733       if ((strlen(bestp_arr[i]->mseq->bline) > DESCR_OFFSET) &&
1734 	  (bp=strchr(bestp_arr[i]->mseq->bline+DESCR_OFFSET,' '))!=NULL) {*bp = '\0';}
1735       else {bp = NULL;}
1736       /* provide sequence length with offset, but only if offset is positive */
1737       l_offset = bestp_arr[i]->seq->l_offset+bestp_arr[i]->seq->l_off -1;
1738       if (l_offset < 0) { l_offset = 0;}
1739       fprintf(annot_fd,"%s\t%ld\n",bestp_arr[i]->mseq->bline,
1740 	      l_offset + bestp_arr[i]->seq->n1);
1741       if (bp != NULL) *bp=' ';
1742       bestp_arr[i]->mseq->annot_req_flag = 1;
1743     }
1744     fclose(annot_fd);
1745 
1746     subs_env(annot_script, sname+1, sizeof(annot_script));
1747     annot_script[sizeof(annot_script)-1] = '\0';
1748     SAFE_STRNCAT(annot_script," ",sizeof(annot_script));
1749     SAFE_STRNCAT(annot_script,annot_bline_file,sizeof(annot_script));
1750     SAFE_STRNCAT(annot_script," >",sizeof(annot_script));
1751     SAFE_STRNCAT(annot_script,annot_descr_file,sizeof(annot_script));
1752 
1753     /* run annot_script annot_bline_file > annot_descr_file */
1754     status = system(annot_script);
1755     if (!debug) {
1756 #ifdef UNIX
1757       unlink(annot_bline_file);
1758 #else
1759       _unlink(annot_bline_file);
1760 #endif
1761     }
1762 
1763     if (status == NO_FILE_EXIT) {	/* my specific return for no annots */
1764       goto no_annots;
1765     }
1766 
1767     if (status < 0 || status == 127) {
1768       fprintf(stderr,"*** error [%s:%d] - script: %s failed\n",
1769 	      __FILE__, __LINE__, annot_script);
1770       goto no_annots;
1771     }
1772   }
1773   else if (sname[0] == '<') {
1774     annot_descr_file = sname+1;
1775   }
1776   else {
1777     fprintf(stderr,"*** error [%s:%d] - %s not script (!) or file (<)\n",__FILE__, __LINE__, sname);
1778     goto no_annots;
1779   }
1780 
1781   /* read annotation file */
1782 
1783   if ((annot_fd=fopen(annot_descr_file,"r"))==NULL) {
1784     goto no_annots;
1785   }
1786 
1787   /* be sure to ask for annotation once */
1788   for (i=0; i<nbest; i++) {
1789     bestp_arr[i]->mseq->annot_req_flag = 1;
1790   }
1791   /* we have some annotations */
1792   /* the annotation script MUST return the annotations ordered as in annot_descr_file,
1793      in "fasta" form:
1794 
1795      >bline_descr
1796      pos<tab>label<tab>value?<tab>comment (which is not read in this version)
1797      1 *
1798      11 V N
1799   */
1800 
1801   /* now read the annotation/variant file */
1802 
1803   /* read #comments, =annot_defs at beginning of file */
1804   tmp_line[0] = '#';
1805   while (tmp_line[0] == '#' || tmp_line[0] == '=') {
1806     if (tmp_line[0] == '=') add_annot_def(m_msp, tmp_line+1,1);
1807     if (fgets(tmp_line, sizeof(tmp_line), annot_fd)==NULL) {
1808       fprintf(stderr,"*** error [%s:%d] - premature annotation file end (%s)\n",
1809 	      __FILE__,__LINE__, annot_descr_file);
1810       goto no_annots;
1811     }
1812   }
1813 
1814   /* set mtmp_annot to be initialized */
1815   mtmp_annot.tmp_arr_p = NULL;
1816   mtmp_annot.max_annot = 0;
1817 
1818   annot_seq_cnt = 0;
1819 
1820   /* now read in the annotations, but only the first time if asked multiple times */
1821   for (i=0; i<nbest; i++) {
1822     if (!bestp_arr[i]->mseq->annot_req_flag) {
1823       continue;
1824     }
1825     bestp_arr[i]->mseq->annot_req_flag = 0;
1826 
1827     if ((bp=strchr(tmp_line,'\n'))!=NULL) *bp = '\0';
1828     if ((bp=strchr(tmp_line,'\t'))!=NULL) *bp = '\0';
1829     if (tmp_line[0] != '>' || strncmp(&tmp_line[1], bestp_arr[i]->mseq->bline, strlen(&tmp_line[1])) != 0) {
1830       fprintf(stderr,"*** error [%s:%d] - %s description mismatch (%s:%s)\n",
1831 	      __FILE__,__LINE__,annot_descr_file, tmp_line, bestp_arr[i]->mseq->bline);
1832       goto no_annots;
1833     }
1834 
1835     annot_p = next_annot_entry(annot_fd, tmp_line, sizeof(tmp_line), bestp_arr[i]->seq->annot_p, &mtmp_annot, m_msp, target);
1836 
1837     if (annot_p) {
1838       bestp_arr[i]->seq->annot_p = annot_p;
1839       s_annot_to_aa1a(bestp_arr[i]->seq->l_offset + bestp_arr[i]->seq->l_off - 1,
1840 		      bestp_arr[i]->seq->n1, annot_p,m_msp->ann_arr, bestp_arr[i]->mseq->libstr);
1841       annot_seq_cnt++;
1842       mtmp_annot.tmp_arr_p = NULL;
1843     }
1844     else {
1845       if (bestp_arr[i]->seq->annot_p) {
1846 	bestp_arr[i]->seq->annot_p->n_annot = 0;
1847       }
1848     }
1849   }
1850 
1851   if (mtmp_annot.tmp_arr_p) free(mtmp_annot.tmp_arr_p);
1852 
1853   fclose(annot_fd);
1854   if (sname[0]=='!') {
1855     if (!debug) {
1856 #ifdef UNIX
1857       unlink(annot_descr_file);
1858 #else
1859       _unlink(annot_descr_file);
1860 #endif
1861     }
1862     free(annot_descr_file);
1863   }
1864   return annot_seq_cnt;
1865 
1866  no_annots:
1867   for (i=0; i<nbest; i++) {
1868     if (bestp_arr[i]->seq->annot_p) {
1869       if (bestp_arr[i]->seq->annot_p->n_annot > 0) {
1870 	bestp_arr[i]->seq->annot_p->n_annot = 0;
1871       }
1872     }
1873   }
1874   if (sname[0] == '!') free(annot_descr_file);
1875   return -1;
1876 #endif
1877 }
1878 
sort_annots(struct annot_entry ** s_annot,int n_annot)1879 void sort_annots(struct annot_entry **s_annot, int n_annot)
1880 {
1881   int gap, i, j, k;
1882   struct annot_entry *tmp;
1883   int v;
1884   int incs[6] = { 112, 48, 21, 7, 3, 1 };
1885 
1886   for ( k = 0; k < 6; k++) {
1887     gap = incs[k];
1888     for (i=gap; i < n_annot; i++) {
1889       tmp = s_annot[i];
1890       j = i;
1891       v = s_annot[i]->pos;
1892       while ( j >= gap && s_annot[j-gap]->pos > v) {
1893 	s_annot[j] = s_annot[j - gap];
1894 	j -= gap;
1895       }
1896       s_annot[j] = tmp;
1897     }
1898   }
1899 }
1900 
1901 struct annot_str *
next_annot_entry(FILE * annot_fd,char * tmp_line,int n_tmp_line,struct annot_str * annot_p,struct annot_mstr * mtmp_annot_p,struct mngmsg * m_msp,int target)1902 next_annot_entry(FILE *annot_fd, char *tmp_line, int n_tmp_line, struct annot_str *annot_p,
1903 		 struct annot_mstr *mtmp_annot_p, struct mngmsg *m_msp, int target) {
1904 
1905   char ctmp_label, ctmp_value, tmp_comment[MAX_STR], annot_acc[MAX_STR];
1906   char *bp;
1907   int f_pos, f_end;
1908   int i_ann, l_doms, r_doms;
1909   int n_annot = 0;
1910 
1911   struct annot_entry *tmp_ann_entry_arr, **s_tmp_ann_entry_arr;
1912 
1913   SAFE_STRNCPY(annot_acc, tmp_line, sizeof(annot_acc));
1914 
1915   if (init_tmp_annot(mtmp_annot_p, 32)==0) return NULL;
1916 
1917   tmp_ann_entry_arr = mtmp_annot_p->tmp_arr_p;
1918 
1919   /* read through each annotation in file */
1920   while (fgets(tmp_line, n_tmp_line, annot_fd)!=NULL ) {
1921     if (tmp_line[0] == '>') goto next_bline;	/* start of new annotation */
1922     if (tmp_line[0] == '#') continue;		/* ignore comments */
1923     if (tmp_line[0] == '=') {	/* symbol definition */
1924       add_annot_def(m_msp, tmp_line+1,1);
1925       continue;
1926     }
1927 
1928     if (n_annot >= mtmp_annot_p->max_annot - 1) {
1929       /* try to expand annotation array */
1930       if (update_tmp_annot(mtmp_annot_p)==0) {
1931 	return NULL;
1932       }
1933       tmp_ann_entry_arr = mtmp_annot_p->tmp_arr_p;
1934     }
1935 
1936     /* sscanf cannot give strings with blanks */
1937     /* sscanf(tmp_line,"%d %c %c %s", &f_pos, &ctmp_label, &ctmp_value, tmp_comment); */
1938     tmp_comment[0] = '\0';
1939     if ((bp=strchr(tmp_line,'\r')) || (bp=strchr(tmp_line,'\n'))) *bp='\0';	/* clean up EOL */
1940     if ((bp=strchr(tmp_line,'\t'))!=NULL) {	/* fields MUST be tab delimited */
1941       f_pos=atoi(tmp_line) - 1;	/* get first field -- f_pos, converted to 0-offset  */
1942       ctmp_label = bp[1];	/* get second field -- ctmp_label */
1943       if ((bp=strchr(bp+1,'\t'))!=NULL) {	/* next field could be f_end or ctmp_value */
1944 	if (ctmp_label == '-') { f_end = atoi(bp+1) -1; ctmp_value = '\0';}
1945 	else {ctmp_value = bp[1]; f_end = f_pos;} 	/* have variant, not coordinate */
1946 	if ((bp=strchr(bp+1,'\t'))!=NULL) {		/* if last <tab>, get comment */
1947 	  strncpy(tmp_comment,bp+1,sizeof(tmp_comment));
1948 	}
1949       }
1950     }
1951     else {	/* no tab */
1952       continue;
1953     }
1954 
1955     tmp_ann_entry_arr[n_annot].pos = f_pos;
1956     tmp_ann_entry_arr[n_annot].end = f_end;
1957     tmp_ann_entry_arr[n_annot].label=ctmp_label;
1958     tmp_ann_entry_arr[n_annot].value=ctmp_value;
1959     tmp_ann_entry_arr[n_annot].comment = NULL;
1960     tmp_ann_entry_arr[n_annot].target = target;
1961     tmp_ann_entry_arr[n_annot].start = NULL;
1962 
1963     if (tmp_comment[0]) {
1964       if ((tmp_ann_entry_arr[n_annot].comment=(char *)calloc(strlen(tmp_comment)+1,sizeof(char)))!=NULL) {
1965 	strncpy(tmp_ann_entry_arr[n_annot].comment,tmp_comment,strlen(tmp_comment));
1966       }
1967     }
1968     if (ctmp_label== 'V') {
1969       /* map the .value from ascii to encoded residue */
1970       /* this must be lascii, not qascii, because the script
1971 	 describes the library, not the query, and for FASTX/TFASTX,
1972 	 those are different */
1973       tmp_ann_entry_arr[n_annot].value = lascii[tmp_ann_entry_arr[n_annot].value];
1974     }
1975     else if (ctmp_label == '[') {
1976       i_ann = add_annot_char(m_msp->ann_arr, ctmp_label);
1977       if (i_ann > 0) {
1978 	qascii[ctmp_label] = NANN + i_ann;
1979 	m_msp->ann_arr_def[i_ann] = NULL;
1980       }
1981     }
1982     else if (ctmp_label == ']') {
1983       i_ann = add_annot_char(m_msp->ann_arr, ctmp_label);
1984       if (i_ann > 0) {
1985 	qascii[ctmp_label] = NANN + i_ann;
1986 	m_msp->ann_arr_def[i_ann] = NULL;
1987       }
1988     }
1989     else if (ctmp_label == '-') {	/* if ctmp_label == '-', have f_end, which must be added with ']' */
1990       /* make sure start/stop characters are in annotation alphabet */
1991       i_ann = add_annot_char(m_msp->ann_arr, '[');
1992       if (i_ann > 0) {
1993 	qascii['['] = NANN + i_ann;
1994 	m_msp->ann_arr_def[i_ann] = NULL;
1995       }
1996 
1997       tmp_ann_entry_arr[n_annot].label='[';
1998       n_annot++;
1999 
2000       if (n_annot >= mtmp_annot_p->max_annot - 1) {
2001 	if (update_tmp_annot(mtmp_annot_p)==0) {
2002 	  return NULL;
2003 	}
2004 	tmp_ann_entry_arr = mtmp_annot_p->tmp_arr_p;
2005       }
2006 
2007       /* only required for start - stop annotations; requires sort
2008 	 later (which is not currently used */
2009       tmp_ann_entry_arr[n_annot].pos = f_end;
2010       tmp_ann_entry_arr[n_annot].end = f_end;
2011       tmp_ann_entry_arr[n_annot].label=']';
2012       tmp_ann_entry_arr[n_annot].value=ctmp_value;
2013       tmp_ann_entry_arr[n_annot].comment = NULL;
2014       tmp_ann_entry_arr[n_annot].target = target;
2015       tmp_ann_entry_arr[n_annot].start = &tmp_ann_entry_arr[n_annot-1];
2016 
2017       i_ann = add_annot_char(m_msp->ann_arr, ']');
2018       if (i_ann > 0) {
2019 	qascii[']'] = NANN + i_ann;
2020 	m_msp->ann_arr_def[i_ann] = NULL;
2021       }
2022     }
2023     else if ((i_ann = add_annot_char(m_msp->ann_arr, ctmp_label)) > 0) {
2024       m_msp->ann_arr_def[i_ann] = NULL;
2025       qascii[ctmp_label] = NANN + i_ann;
2026     }
2027     n_annot++;
2028   }
2029 
2030  next_bline:
2031   if (n_annot) {  /* if we have annotations, save them and set tmp_ann_entry_arr = NULL */
2032     tmp_ann_entry_arr = (struct annot_entry *)realloc(tmp_ann_entry_arr, (n_annot+1)*sizeof(struct annot_entry));
2033 
2034     if ((s_tmp_ann_entry_arr = (struct annot_entry **)calloc((n_annot+1),sizeof(struct annot_entry *)))==NULL) {
2035       fprintf(stderr,"*** error [%s:%d] - cannot alloc s_tmp_ann_entry_arr[%d]",
2036 	      __FILE__,__LINE__, n_annot+1);
2037       exit(1);
2038     }
2039 
2040     /* pair every domain start/stop */
2041     /* (1) count number of domains/check for consistency */
2042     l_doms = r_doms = 0;
2043     for (i_ann=0; i_ann < n_annot; i_ann++) {
2044       if (tmp_ann_entry_arr[i_ann].label == '[') l_doms++;
2045       if (tmp_ann_entry_arr[i_ann].label == ']') r_doms++;
2046     }
2047     if (l_doms != r_doms) {
2048       fprintf(stderr,"*** error [%s:%d] - unpaired domains: %s %d != %d\n",
2049 	      __FILE__,__LINE__, annot_acc, l_doms, r_doms);
2050 #ifdef DEBUG
2051       for (i_ann=0; i_ann < n_annot; i_ann++) {
2052 	if (tmp_ann_entry_arr[i_ann].label == '[')
2053 	  fprintf(stderr, "%ld %c %s\n",tmp_ann_entry_arr[i_ann].pos,tmp_ann_entry_arr[i_ann].label,tmp_ann_entry_arr[i_ann].comment);
2054 	if (tmp_ann_entry_arr[i_ann].label == ']')
2055 	  fprintf(stderr, "%ld %c\n",tmp_ann_entry_arr[i_ann].pos,tmp_ann_entry_arr[i_ann].label);
2056       }
2057 #endif
2058     }
2059     else if (l_doms > 0) {
2060       if ((tmp_domain_entry_arr = (struct annot_entry *)calloc((l_doms+1),sizeof(struct annot_entry)))==NULL) {
2061 	fprintf(stderr,"*** error [%s:%d] - cannot alloc s_tmp_ann_entry_arr[%d]",
2062 		__FILE__,__LINE__, l_doms+1);
2063       }
2064       else {
2065 	l_doms = 0;
2066 	for (i_ann=0; i_ann < n_annot+1; i_ann++) {
2067 	  if (tmp_ann_entry_arr[i_ann].label == '[') {
2068 	    tmp_domain_entry_arr[l_doms].pos = tmp_ann_entry_arr[i_ann].pos;
2069 	    tmp_domain_entry_arr[l_doms].label = '-';
2070 	    tmp_domain_entry_arr[l_doms].comment = tmp_ann_entry_arr[i_ann].comment;
2071 	  }
2072 	  else if (tmp_ann_entry_arr[i_ann].label == ']') {
2073 	    tmp_domain_entry_arr[l_doms].end = tmp_ann_entry_arr[i_ann].pos;
2074 	    l_doms++;
2075 	  }
2076 	}
2077       }
2078     }
2079 
2080     for (i_ann=0; i_ann < n_annot+1; i_ann++) {
2081       s_tmp_ann_entry_arr[i_ann] = &tmp_ann_entry_arr[i_ann];
2082     }
2083 
2084     sort_annots(s_tmp_ann_entry_arr,n_annot);
2085 
2086     /* now allocate an annot_p if necessary, and link tmp_ann_entry_arr to it */
2087     if (annot_p || (annot_p = calloc(1,sizeof(struct annot_str)))!=NULL) {
2088       annot_p->annot_arr_p = tmp_ann_entry_arr;
2089       annot_p->s_annot_arr_p = s_tmp_ann_entry_arr;
2090       annot_p->n_annot = n_annot;
2091       annot_p->n_domains = l_doms;
2092       /* set to NULL to re-initialize */
2093     }
2094   }
2095   else {
2096     annot_p = NULL;
2097   }
2098   return annot_p;
2099 }
2100 
2101 
2102 /* **************************************************************** */
2103 /* add_annot_char(ann_arr, ctmp_label) --
2104 
2105    (1) add annotation character to ann_arr if not present
2106    (2) return i_ann if added
2107 */
2108 /* **************************************************************** */
2109 
2110 int
add_annot_char(unsigned char * ann_arr,char ctmp_label)2111 add_annot_char(unsigned char *ann_arr, char ctmp_label) {
2112   int i_ann;
2113 
2114   if (ann_arr[0] == '\0') {
2115     ann_arr[0] = ' '; ann_arr[1] = '\0';
2116   }
2117 
2118   /* check to see if already there? */
2119   if (strchr((char *)ann_arr,ctmp_label)==NULL) {
2120     /* check for room for another character */
2121     if (strlen((char *)ann_arr) >= MAX_FN) {
2122       fprintf(stderr,"*** error [%s:%d] -  too many annotation characters: len(%s) + %c > %d\n",
2123 	      __FILE__, __LINE__, ann_arr, ctmp_label, MAX_FN-1);
2124       return 0;
2125     }
2126     else {
2127       ann_arr[i_ann=strlen((char *)ann_arr)] = ctmp_label;      /* add the character */
2128       ann_arr[i_ann+1] = '\0';	      /* guarantee null termination */
2129       return i_ann;
2130     }
2131   }
2132   else {
2133     return 0;
2134   }
2135 }
2136 
2137 /* **************************************************************** */
2138 /* get_annot -- produces fasta file from m_msp->sname script
2139    (modified 20-Sept-2012 to not use intermediate file)
2140 
2141    # (1) generate a temporary file name
2142    # (2) write out one bline (or portion that include accession)
2143    # (3) run sname[] script against temporary file, producing table of annotations
2144    (1) run script bline_id
2145    (4) read in the annotations and put them in struct annot_entry;
2146    (5) modify *annot_p to point to structure
2147    (6) return number of annotations
2148 */
2149 /* **************************************************************** */
2150 
2151 int
get_annot(char * sname,struct mngmsg * m_msp,char * bline,long offset,int n1,struct annot_str ** annot_p,int target,int debug)2152 get_annot(char *sname, struct mngmsg *m_msp, char *bline, long offset, int n1, struct annot_str **annot_p,
2153 	  int target, int debug) {
2154 
2155   char tmp_line[MAX_STR];
2156   FILE *annot_data_fd;
2157   char bline_descr[MAX_STR];
2158   char annot_data_file[MAX_LSTR];
2159   char annot_script[MAX_LSTR];
2160   long q_offset;
2161 
2162   char *bp;
2163   FILE *annot_fd=NULL;		/* file for annot accessions */
2164   struct annot_mstr mtmp_annot;
2165 
2166 #ifndef UNIX
2167   return 0;
2168 #else
2169 
2170   if (sname[0] == '!') {
2171     /* popen implementation */
2172 
2173     annot_data_file[0] = '\0';
2174 
2175     if (bline[0] == '>') {
2176       SAFE_STRNCPY(bline_descr, bline+1,sizeof(bline_descr));
2177     }
2178     else {
2179       SAFE_STRNCPY(bline_descr, bline,sizeof(bline_descr));
2180     }
2181     if ((strlen(bline_descr) > DESCR_OFFSET) &&
2182 	(bp=strchr(bline_descr+DESCR_OFFSET,' '))!=NULL) {*bp = '\0';}
2183     else {bp = NULL;}
2184 
2185     q_offset = m_msp->q_offset + m_msp->q_off - 1;
2186     if (q_offset < 0) { q_offset = 0;}
2187     sprintf(annot_script,"%s \"%s\" %ld",sname+1, bline_descr,q_offset+m_msp->n0);
2188     annot_script[sizeof(annot_script)-1] = '\0';
2189 
2190     annot_fd = popen(annot_script,"r");
2191   }
2192   else if (sname[0] == '<') {
2193     SAFE_STRNCPY(annot_data_file,sname+1,sizeof(annot_data_file));
2194     annot_fd=fopen(annot_data_file,"r");
2195   }
2196   else {
2197     fprintf(stderr,"*** error [%s:%d] - %s not script (!) or file (<)\n",__FILE__, __LINE__, sname);
2198     goto no_annots;
2199   }
2200 
2201   if (!annot_fd) {
2202     goto no_annots;
2203   }
2204   else {	/* read the annotations into the array */
2205 
2206     /* read #comments, =annot_defs at beginning of file */
2207     tmp_line[0] = '#';
2208     while (tmp_line[0] == '#' || tmp_line[0] == '=') {
2209       if (tmp_line[0] == '=') add_annot_def(m_msp, tmp_line+1,1);
2210       if (fgets(tmp_line, sizeof(tmp_line), annot_fd)==NULL) {
2211 	fprintf(stderr,"*** error [%s:%d] - premature annotation file end (%s)\n",
2212 		__FILE__,__LINE__, annot_data_file);
2213 	goto no_annots;
2214       }
2215     }
2216 
2217     /* set mtmp_annot to be initialized */
2218     mtmp_annot.tmp_arr_p = NULL;
2219     mtmp_annot.max_annot = 0;
2220 
2221     /* strlen(&tmp_line[1])-1 to remove '>' and beginning and '\n' at end */
2222     if (tmp_line[0] != '>') {
2223       fprintf(stderr,"*** error [%s:%d] - no %s description: [%s]\n",
2224 	      __FILE__,__LINE__,annot_data_file, tmp_line);
2225       goto no_annots;
2226     }
2227 
2228     *annot_p = next_annot_entry(annot_fd, tmp_line, sizeof(tmp_line), *annot_p, &mtmp_annot, m_msp, target);
2229 
2230     if (sname[0] == '!') {
2231       pclose(annot_fd);
2232     }
2233     else {
2234       fclose(annot_fd);
2235     }
2236 
2237     /* now allocate an annot_p if necessary, and link tmp_ann_entry_arr to it */
2238     if (*annot_p) {
2239       s_annot_to_aa1a(offset, n1, (*annot_p),m_msp->ann_arr,"get_annot");
2240       return (*annot_p)->n_annot;
2241     }
2242     else {
2243       if (mtmp_annot.tmp_arr_p) free(mtmp_annot.tmp_arr_p);
2244       return 0;
2245     }
2246   }
2247 
2248  no_annots:
2249   return -1;
2250 #endif
2251 }
2252 
2253 /* s_annot_to_aa1a -- takes an annot_entry[] and converts it to an *aa1_ann
2254  */
2255 void
s_annot_to_aa1a(long offset,int n1,struct annot_str * annot_p,unsigned char * ann_arr,char * tmp_line)2256 s_annot_to_aa1a(long offset, int n1, struct annot_str *annot_p, unsigned char *ann_arr, char *tmp_line) {
2257   unsigned char *aa1a_tmp;
2258   int i, ic, n_annot;
2259   struct annot_entry *this_annot;
2260   char *bp;
2261 
2262   if ((aa1a_tmp = (unsigned char *)calloc(n1+2,sizeof(char)))==NULL) {
2263     fprintf(stderr,"*** error [%s:%d] - cannot allocate aa1a_ann[%d] array\n",
2264 	    __FILE__, __LINE__, n1);
2265     return;
2266   }
2267 
2268   if (offset < 0) offset++;
2269 
2270   for (i=0; i < annot_p->n_annot; i++) {
2271     this_annot = &annot_p->annot_arr_p[i];
2272     /* skip VAR labels */
2273     if (this_annot->label == 'V') { continue; }
2274     if (this_annot->label == '-') {
2275       aa1a_tmp[this_annot->pos]=qascii['['] - NANN;
2276       aa1a_tmp[this_annot->end]=qascii[']'] - NANN;
2277       continue;
2278     }
2279     if (strchr((char *)ann_arr, this_annot->label)==NULL) {continue;}
2280     if (this_annot->pos - offset < n1) {
2281       if (this_annot->pos >= offset) {	/* not an error, but annotation must be in range */
2282 	aa1a_tmp[this_annot->pos - offset]=qascii[this_annot->label] - NANN;
2283       }
2284     }
2285     else {
2286       fprintf(stderr, "*** error [%s:%d] - this_annot->pos:[%ld - %ld] out of range: %d : %s\n",
2287 	      __FILE__, __LINE__, this_annot->pos,offset, n1, tmp_line);
2288     }
2289   }
2290   annot_p->aa1_ann = aa1a_tmp;
2291 }
2292 
2293 /* save_best captures much of the complexity of saving the best scores
2294    and appropriately sampling the scores for statistical analysis. It
2295    does the following:
2296 
2297    (1) update s_info counts for functions like fasta/x/y that don't
2298        optimize every score
2299 
2300    (2) for every result in the buffer:
2301        (a) decide if it should be used for statistical sampling
2302        (b) if the number of samples > MAX_STATS, then run
2303            process_hist() and update all the zscores
2304        (c) reset everything for next sequence
2305 
2306    (3) must ensure that -BIGNUM are never in best[]
2307 
2308 */
2309 
2310 #include "thr_buf_structs.h"
2311 #ifndef PCOMPLIB
2312 #define RESULTS_BUF reader_buf
2313 #define XTERNAL
2314 #include "thr_bufs2.h"
2315 #else
2316 #define RESULTS_BUF worker_buf
2317 #include "pcomp_bufs.h"
2318 #endif
2319 
2320 extern char *prog_func;		/* function label */
2321 extern int fa_max_workers;
2322 extern struct buf_head *lib_buf2_list;
2323 #ifdef DEBUG
2324 void check_rbuf(struct buf_head *cur_buf);
2325 #endif
2326 extern void get_rbuf(struct buf_head **lib_buf, int max_work_buf);
2327 extern void put_rbuf(struct buf_head *lib_buf, int max_work_buf);
2328 extern void wait_rbuf(int max_work_buf);
2329 extern void rbuf_done(int nthreads);
2330 extern void put_rbuf_done(int nthreads, struct buf_head *lib_buf,
2331 			  int max_work_buf);
2332 extern int
2333 process_hist(struct stat_str *sptr, int nstats,
2334 	     const struct mngmsg *m_msg,
2335 	     struct pstruct *ppst,
2336 	     struct hist_str *hist, void **pstat_void, struct score_count_s *s_info, int do_hist);
2337 
2338 extern void addhistz(double, struct hist_str *); /* scaleswn.c */
2339 void selectbestz(struct beststr **, int, int );
2340 extern double find_z(int score, double escore, int length, double comp, void *);
2341 extern double zs_to_E(double zs,int n1, int dnaseq, long entries, struct db_str db);
2342 extern struct beststr **bestp_arr;	/* array of pointers */
2343 extern int nbest;
2344 extern int nstats, nqstats, nrstats, pre_nstats, kstats, shuff_tot, sstats;
2345 extern double zbestcut;	/* cut off for best z-score */
2346 extern int bestfull;	/* index for selectbest() */
2347 extern int stats_done;	/* flag for z-value processing */
2348 extern void *rand_state;
2349 extern struct stat_str *stats; /* array of scores for statistics from real
2350 			   (or shuffled) sequences*/
2351 extern struct stat_str *qstats;	/* array of scores for shuffled query stats */
2352 extern struct stat_str *rstats;	/* array of scores from shuffled library */
2353 
2354 /* in the current version (fasta_35_01) save_best is used by both
2355    threaded and unthreaded versions */
2356 
2357 #define COPY_RST_P(d,s) 		\
2358 { d->rst.score[0] = s->rst.score[0];	\
2359   d->rst.score[1] = s->rst.score[1];	\
2360   d->rst.score[2] = s->rst.score[2];	\
2361   d->rst.valid_stat = s->rst.valid_stat; \
2362   d->rst.comp = s->rst.comp;		\
2363   d->rst.H = s->rst.H;			\
2364   d->rst.escore = s->rst.escore;	\
2365   d->rst.segnum = s->rst.segnum;	\
2366   d->rst.seglen = s->rst.seglen;	\
2367 }
2368 
2369 void
save_best(struct buf_head * lib_bhead_p,const struct mngmsg * m_msp,struct pstruct * ppst,struct db_str * ldb,FILE * fdata,struct hist_str * histp,void ** pstat_voidp,struct score_count_s * s_info)2370 save_best(struct buf_head *lib_bhead_p,
2371 	  const struct mngmsg *m_msp, struct pstruct *ppst,
2372 	  struct db_str *ldb, FILE *fdata,
2373 	  struct hist_str *histp, void **pstat_voidp,
2374 	  struct score_count_s *s_info)
2375 {
2376   double zscore;
2377   int i_score;
2378   struct beststr *bbp;
2379   struct buf2_data_s *rbuf_dp, *lib_buf2_dp;
2380   struct buf2_res_s *rbuf_rp, *lib_buf2_rp;
2381   int i, t_best, t_rbest, t_qrbest, tm_best, t_n1, sc_ix;
2382   int t_valid_stat, tr_valid_stat, use_shuff, zsflag_save;
2383   double e_score, tm_escore, t_rescore, t_qrescore;
2384   int buf2_cnt;
2385 
2386   if (!lib_bhead_p->hdr.have_results) return;
2387   if ((buf2_cnt=lib_bhead_p->hdr.buf2_cnt) <= 0) return;
2388   lib_buf2_dp = lib_bhead_p->buf2_data;
2389   lib_buf2_rp = lib_bhead_p->buf2_res;
2390 
2391   shuff_tot += lib_bhead_p->hdr.shuff_cnt;
2392   s_info->s_cnt[0] += lib_bhead_p->s_cnt_info.s_cnt[0];
2393   s_info->s_cnt[1] += lib_bhead_p->s_cnt_info.s_cnt[1];
2394   s_info->s_cnt[2] += lib_bhead_p->s_cnt_info.s_cnt[2];
2395   s_info->tot_scores += lib_bhead_p->s_cnt_info.tot_scores;;
2396 
2397   sc_ix = ppst->score_ix;
2398 
2399   t_best = t_rbest = t_qrbest = -BIGNUM;
2400   tm_escore = t_rescore = t_qrescore = FLT_MAX;
2401   t_valid_stat = tr_valid_stat = 0;
2402   if (ppst->zsflag >= 10 && ppst->zsflag < 20) { use_shuff = 1;}
2403   else { use_shuff = 0;}
2404 
2405 #ifdef DEBUG
2406   if (fdata) {
2407     fprintf(fdata,">save_best: %d\n",buf2_cnt);
2408   }
2409 #endif
2410 
2411   buf2_cnt = lib_bhead_p->hdr.buf2_cnt;
2412   while (buf2_cnt--) { /* count down the number of results */
2413     rbuf_rp = lib_buf2_rp++;	/* step through the results buffer */
2414     rbuf_dp = lib_buf2_dp++;	/* step through the data buffer */
2415 
2416     /* perhaps should use explicit flag to indicate no score */
2417     if (rbuf_rp->rst.score[0] == -BIGNUM) continue;
2418 
2419     /* i_score: current raw sorting score */
2420     i_score = rbuf_rp->rst.score[sc_ix];
2421     /* e_score, current escore */
2422     e_score = rbuf_rp->rst.escore;
2423 
2424     /* this should be done in the thread, and a sorted set of indexes
2425        should be produced by the thread, so we just go down the list
2426        to the zscore threshold */
2427     zscore = (double)i_score;
2428     if (stats_done) {
2429       zscore=find_z(i_score, e_score, rbuf_dp->seq->n1,(double)rbuf_rp->rst.comp,
2430 			  *pstat_voidp);
2431     }
2432 
2433     /* we have complex logic to decide:
2434        (a) for multiframe results, which is the best
2435        (b) information about valid stats
2436        we should simply return a stats array where all this is figured
2437        out in the thread.
2438      */
2439     t_n1 = rbuf_dp->seq->n1;
2440     if (i_score > t_best) tm_best = t_best = i_score;
2441     if (e_score < tm_escore) tm_escore = e_score;
2442     if (rbuf_rp->rst.valid_stat > t_valid_stat) {
2443       t_valid_stat = 1;
2444     }
2445 
2446     /* this stuff happens only for fasts/fastm/fastf
2447        again, the t_qrbest stuff should be done in the thread
2448        rather than check for every hit, run through the loop
2449        only if necessary.
2450      */
2451     if (m_msp->qshuffle) {
2452       if (rbuf_rp->qr_score > t_qrbest)
2453 	t_qrbest = rbuf_rp->qr_score;
2454       if (rbuf_rp->qr_escore < t_qrescore)
2455 	t_qrescore = rbuf_rp->qr_escore;
2456 
2457       if (rbuf_dp->frame == m_msp->nitt1 && t_qrbest > 0 && nqstats < m_msp->shuff_max) {
2458 	qstats[nqstats].n1 = rbuf_dp->seq->n1;	/* save the best score */
2459 	qstats[nqstats].comp =  rbuf_rp->rst.comp;
2460 	qstats[nqstats].H = rbuf_rp->rst.H;
2461 	qstats[nqstats].escore = t_qrescore;
2462 	qstats[nqstats++].score = t_qrbest;
2463 	t_qrbest = -BIGNUM;	/* reset t_qrbest, t_qrescore */
2464 	t_qrescore = FLT_MAX;
2465       }
2466     }	/* m_msp->qshuffle */
2467 
2468     if (use_shuff) {
2469       /* this check is required because some sequences scheduled to be
2470 	 used for statistics may not in fact be returning a score (if
2471 	 they are outside the -M range, for example.
2472        */
2473       if (rbuf_rp->r_rst.score[0] == -BIGNUM) { tr_valid_stat = 0; }
2474       if (rbuf_rp->r_rst.valid_stat > tr_valid_stat) {
2475 	tr_valid_stat = 1;
2476       }
2477       if (rbuf_rp->r_rst.score[sc_ix] > t_rbest) {
2478 	t_rbest = rbuf_rp->r_rst.score[sc_ix];
2479 	t_rescore = rbuf_rp->r_rst.escore;
2480       }
2481     }
2482 
2483     /* need to look for frame 0 if TFASTA, then save stats at frame 6 */
2484     if (fdata) {
2485       fprintf(fdata,
2486 	      "%-12s %6d %d %.5f %.5f %4d %4d %4d %2d %2d %4d %4d %4d %2d %2d %5d %8lld\n",
2487 	      rbuf_dp->mseq->libstr, rbuf_dp->seq->n1,rbuf_dp->frame,rbuf_rp->rst.comp,rbuf_rp->rst.H,
2488 	      rbuf_rp->rst.score[0],rbuf_rp->rst.score[1],rbuf_rp->rst.score[2],
2489 	      t_valid_stat, rbuf_rp->rst.alg_info,
2490 	      (rbuf_rp->r_rst.score[0]<0 ? -1 : rbuf_rp->r_rst.score[0]),
2491 	      (rbuf_rp->r_rst.score[1]<0 ? -1 : rbuf_rp->r_rst.score[1]),
2492 	      (rbuf_rp->r_rst.score[2]<0 ? -1 : rbuf_rp->r_rst.score[2]),
2493 	      tr_valid_stat, rbuf_rp->r_rst.alg_info,
2494 	      rbuf_dp->stats_idx, rbuf_dp->mseq->lseek);
2495     }
2496 
2497     /* statistics done for best score of set */
2498 
2499     if (rbuf_dp->frame == m_msp->nitt1) {
2500       ldb->entries++;
2501       ldb->length += t_n1;
2502       if (ldb->length > LONG_MAX) {
2503 	ldb->length -= LONG_MAX; ldb->carry++;
2504       }
2505     }
2506 
2507     if (rbuf_dp->frame == m_msp->nitt1 && ppst->zsflag >= 0) {
2508       /* if this sample should be used for statistics */
2509       if (use_shuff) t_valid_stat = tr_valid_stat;
2510       if (t_valid_stat) {
2511 	/* we've got our initial MAX_STATS values */
2512 	if (nstats >= MAX_STATS) {
2513 	  if (!stats_done) {
2514 	    zsflag_save = ppst->zsflag;
2515 	    if (ppst->zsflag > 20) {
2516 	      ppst->zsflag -= 20;
2517 	    }
2518 	    ppst->zsflag_f = process_hist(stats,nstats,m_msp, ppst,
2519 					  histp, pstat_voidp,s_info, 0);
2520 	    ppst->zsflag = zsflag_save;
2521 	    kstats = nstats;
2522 	    if (ppst->zsflag >= 0) {	/* this is redundant, but rare */
2523 	      stats_done = 1;
2524 	      for (i=0; i< nstats; i++) {
2525 		bestp_arr[i]->zscore =
2526 		  find_z(bestp_arr[i]->rst.score[ppst->score_ix],
2527 			 bestp_arr[i]->rst.escore, bestp_arr[i]->seq->n1,
2528 			 bestp_arr[i]->rst.comp, *pstat_voidp);
2529 	      }
2530 	    }
2531 	  }
2532 	}
2533 	else {
2534 	  /* this logic allows stats_idx to be over-ruled for searches
2535 	     where every query does not generate a score */
2536 	  rbuf_dp->stats_idx = nstats;
2537 	  nstats++;
2538 	}
2539       }
2540 
2541       if (rbuf_dp->stats_idx >= 0 && t_valid_stat) {
2542 	if (rbuf_dp->stats_idx >= MAX_STATS || nstats > MAX_STATS) {
2543 	  fprintf(stderr, "*** error [%s:%d] - nstats index [%d] out of range [%d,%d]\n",
2544 		  __FILE__, __LINE__,
2545 		  rbuf_dp->stats_idx, nstats,MAX_STATS);
2546 	}
2547 	else {  /* stats_idx is in range */
2548 	  sstats++;
2549 	  stats[rbuf_dp->stats_idx].n1 = t_n1;
2550 	  stats[rbuf_dp->stats_idx].comp = rbuf_rp->rst.comp;
2551 	  stats[rbuf_dp->stats_idx].H = rbuf_rp->rst.H;
2552 	  if (use_shuff) { /* use shuffled score */
2553 	    stats[rbuf_dp->stats_idx].escore  = t_rescore;
2554 	    stats[rbuf_dp->stats_idx].score = t_rbest;
2555 	  }
2556 	  else { /* real score, not shuffled */
2557 	    stats[rbuf_dp->stats_idx].escore  = tm_escore;
2558 	    stats[rbuf_dp->stats_idx].score = tm_best;
2559 	  }
2560 	} /* end stats_idx in range */
2561       }	/* end have valid stats_idx */
2562 
2563       if (t_valid_stat && stats_done && histp) {
2564 	addhistz(find_z(t_best, tm_escore, rbuf_dp->seq->n1, (double) rbuf_rp->rst.comp,
2565 			    *pstat_voidp), histp);
2566       }
2567       /* reset best scores */
2568       t_best = t_rbest = -BIGNUM;
2569       tm_escore = t_rescore = FLT_MAX;
2570       t_valid_stat = tr_valid_stat = 0;
2571     }
2572 
2573     /*
2574     if (rbuf_rp->rst.score[ppst->score_ix] > 200) {
2575       fprintf(stderr, "high score[%d]: %s %d: %d\n", rbuf_dp->seq->index,
2576 	      rbuf_dp->mseq->libstr, rbuf_dp->seq->n1, rbuf_rp->rst.score[ppst->score_ix]);
2577     }
2578     */
2579 
2580     if (zscore > zbestcut) {
2581       if (nbest >= MAX_BEST) {
2582 	bestfull = nbest-MAX_BEST/4;
2583 	selectbestz(bestp_arr,bestfull-1,nbest);
2584 	zbestcut = bestp_arr[bestfull-1]->zscore;
2585 	nbest = bestfull;
2586       }
2587       bbp = bestp_arr[nbest++];
2588 
2589       COPY_RST_P(bbp, rbuf_rp);
2590 
2591       bbp->seq = rbuf_dp->seq;
2592       bbp->mseq = rbuf_dp->mseq;
2593       bbp->n1 = rbuf_dp->seq->n1;
2594 #ifdef DEBUG
2595       bbp->adler32_crc = rbuf_dp->seq->adler32_crc;
2596 #endif
2597       /* rbuf_dp->best_save is set after a rbuf_dp is entered into best_str */
2598       if (rbuf_dp->best_save) {
2599 	/* a previous rbuf_dp->seq is in best_str at best_save */
2600 	if (rbuf_dp->best_save->seq == rbuf_dp->seq) {
2601 	  /* the best_save->seq matches the rbuf_dp->seq */
2602 	  bbp->bbp_link = rbuf_dp->best_save;
2603 	  /* bbp_link tells where this ->seq can be found */
2604 	}
2605 	else {
2606 	  bbp->bbp_link = NULL;
2607 	}
2608       }
2609       rbuf_dp->best_save = bbp;
2610       lib_bhead_p->hdr.have_best_save = 1;
2611       bbp->zscore = zscore;
2612       bbp->frame = rbuf_dp->frame;
2613     }
2614   }
2615 }
2616 
2617 void
save_best2(struct buf_head * lib_bhead_p,const struct mngmsg * m_msp,struct pstruct * ppst,struct db_str * ldb,FILE * fdata,struct hist_str * histp,void ** pstat_voidp,struct score_count_s * s_info)2618 save_best2(struct buf_head *lib_bhead_p,
2619 	   const struct mngmsg *m_msp, struct pstruct *ppst,
2620 	   struct db_str *ldb, FILE *fdata,
2621 	   struct hist_str *histp, void **pstat_voidp,
2622 	   struct score_count_s *s_info)
2623 {
2624   double zscore;
2625   int i_score;
2626   struct beststr *bbp;
2627   struct buf2_data_s *rbuf_dp, *lib_buf2_dp;
2628   struct buf2_res_s *rbuf_rp, *lib_buf2_rp;
2629   int i, sc_ix;
2630   int t_valid_stat, use_shuff, zsflag_save;
2631   double e_score;
2632   int buf2_cnt;
2633 
2634   if (!lib_bhead_p->hdr.have_results) return;
2635   if ((buf2_cnt = lib_bhead_p->hdr.buf2_cnt) <= 0) return;
2636 
2637   /*
2638 #ifdef DEBUG
2639   fprintf(stderr," save_best2: lib_bhead_p->buf2_data[0]->mseq->index/lseek: %d,%lld\n",
2640 	  lib_bhead_p->buf2_data[0].mseq->index,lib_bhead_p->buf2_data[0].mseq->lseek);
2641 #endif
2642   */
2643   if (ppst->zsflag >= 10 && ppst->zsflag < 20) { use_shuff = 1;}
2644   else {use_shuff = 0;}
2645 
2646   shuff_tot += lib_bhead_p->hdr.shuff_cnt;
2647   s_info->s_cnt[0] += lib_bhead_p->s_cnt_info.s_cnt[0];
2648   s_info->s_cnt[1] += lib_bhead_p->s_cnt_info.s_cnt[1];
2649   s_info->s_cnt[2] += lib_bhead_p->s_cnt_info.s_cnt[2];
2650   s_info->tot_scores += lib_bhead_p->s_cnt_info.tot_scores;;
2651   sc_ix = ppst->score_ix;
2652 
2653   /* save the raw data if requested */
2654   if (fdata) {
2655 #ifdef DEBUG
2656     fprintf(fdata,">save_best: %d\n",buf2_cnt);
2657 #endif
2658     lib_buf2_dp = lib_bhead_p->buf2_data;
2659     lib_buf2_rp = lib_bhead_p->buf2_res;
2660     buf2_cnt = lib_bhead_p->hdr.buf2_cnt;
2661     while (buf2_cnt--) { /* count down the number of results */
2662 
2663       rbuf_rp = lib_buf2_rp++;	/* step through the results buffer */
2664       rbuf_dp = lib_buf2_dp++;	/* step through the data buffer */
2665 
2666       /* perhaps should use explicit flag to indicate no score */
2667       if (rbuf_rp->rst.score[0] == -BIGNUM) continue;
2668 
2669       fprintf(fdata,
2670 	      "%-12s %6d %d %.5f %.5f %4d %4d %4d %2d %2d %4d %4d %4d %2d %2d %5d %8lld\n",
2671 	      rbuf_dp->mseq->libstr, rbuf_dp->seq->n1,rbuf_dp->frame,rbuf_rp->rst.comp,rbuf_rp->rst.H,
2672 	      rbuf_rp->rst.score[0],rbuf_rp->rst.score[1],rbuf_rp->rst.score[2],
2673 	      rbuf_rp->is_valid_stat, rbuf_rp->rst.alg_info,
2674 	      (rbuf_rp->r_rst.score[0]<0 ? -1 : rbuf_rp->r_rst.score[0]),
2675 	      (rbuf_rp->r_rst.score[1]<0 ? -1 : rbuf_rp->r_rst.score[1]),
2676 	      (rbuf_rp->r_rst.score[2]<0 ? -1 : rbuf_rp->r_rst.score[2]),
2677 	      rbuf_rp->is_valid_stat, rbuf_rp->r_rst.alg_info,
2678 	      rbuf_dp->stats_idx, rbuf_dp->mseq->lseek);
2679     }
2680   }
2681 
2682   /* save the high-scoring data */
2683   lib_buf2_rp = lib_bhead_p->buf2_res;
2684   lib_buf2_dp = lib_bhead_p->buf2_data;
2685   buf2_cnt = lib_bhead_p->hdr.buf2_cnt;
2686   while (buf2_cnt--) {
2687     rbuf_rp = lib_buf2_rp++;
2688     rbuf_dp = lib_buf2_dp++;
2689 
2690     /* perhaps should use explicit flag to indicate no score */
2691     if (rbuf_rp->rst.score[0] == -BIGNUM) continue;
2692 
2693     /* i_score: current raw sorting score */
2694     i_score = rbuf_rp->rst.score[sc_ix];
2695     /* e_score, current escore */
2696     e_score = rbuf_rp->rst.escore;
2697     /* this should be done in the thread, and a sorted set of indexes
2698        should be produced by the thread, so we just go down the list
2699        to the zscore threshold */
2700     zscore = (double)i_score;
2701     if (stats_done) {
2702       zscore=find_z(i_score, e_score, rbuf_dp->seq->n1,(double)rbuf_rp->rst.comp,
2703 			  *pstat_voidp);
2704     }
2705 
2706     if (rbuf_dp->frame == m_msp->nitt1) {
2707       ldb->entries++;
2708       ldb->length += rbuf_dp->seq->n1;
2709       if (ldb->length > LONG_MAX) {
2710 	ldb->length -= LONG_MAX; ldb->carry++;
2711       }
2712     }
2713 
2714     if (zscore > zbestcut) {
2715       if (nbest >= MAX_BEST) {
2716 	bestfull = nbest-MAX_BEST/4;
2717 	selectbestz(bestp_arr,bestfull-1,nbest);
2718 	zbestcut = bestp_arr[bestfull-1]->zscore;
2719 	nbest = bestfull;
2720       }
2721       bbp = bestp_arr[nbest++];
2722 
2723       COPY_RST_P(bbp, rbuf_rp);
2724 
2725       bbp->seq = rbuf_dp->seq;
2726       bbp->mseq = rbuf_dp->mseq;
2727       bbp->n1 = rbuf_dp->seq->n1;
2728 #ifdef DEBUG
2729       bbp->adler32_crc = rbuf_dp->seq->adler32_crc;
2730 #endif
2731       /* rbuf_dp->best_save is set after a rbuf_dp is entered into best_str */
2732       if (rbuf_dp->best_save) {
2733 	/* a previous rbuf_dp->seq is in best_str at best_save */
2734 	if (rbuf_dp->best_save->seq == rbuf_dp->seq) {
2735 	  /* the best_save->seq matches the rbuf_dp->seq */
2736 	  bbp->bbp_link = rbuf_dp->best_save;
2737 	  /* bbp_link tells where this ->seq can be found */
2738 	}
2739 	else {
2740 	  bbp->bbp_link = NULL;
2741 	}
2742       }
2743       rbuf_dp->best_save = bbp;
2744       lib_bhead_p->hdr.have_best_save = 1;
2745       bbp->zscore = zscore;
2746       bbp->frame = rbuf_dp->frame;
2747     }
2748   }
2749 
2750   /* process results for statistics */
2751   lib_buf2_dp = lib_bhead_p->buf2_data;
2752   lib_buf2_rp = lib_bhead_p->buf2_res;
2753   buf2_cnt = lib_bhead_p->hdr.buf2_cnt;
2754   while (buf2_cnt--) { /* count down the number of results */
2755     rbuf_dp = lib_buf2_dp++;	/* step through the results buffer */
2756     rbuf_rp = lib_buf2_rp++;	/* step through the results buffer */
2757 
2758     if (!rbuf_rp->is_valid_stat) { continue;}
2759 
2760 
2761     if (use_shuff) {
2762       i_score = rbuf_rp->r_rst.score[sc_ix];
2763     e_score = rbuf_rp->r_rst.escore;
2764     }
2765     else {
2766       i_score = rbuf_rp->rst.score[sc_ix];
2767       e_score = rbuf_rp->rst.escore;
2768     }
2769 
2770     if (rbuf_dp->stats_idx >= MAX_STATS || nstats > MAX_STATS) {
2771       fprintf(stderr, "*** error [%s:%d] - nstats index [%d] out of range [%d,%d]\n",
2772 	      __FILE__, __LINE__,
2773 	      rbuf_dp->stats_idx, nstats,MAX_STATS);
2774       continue;
2775     }
2776 
2777     if (nstats < MAX_STATS) {
2778       /* this logic allows stats_idx to be over-ruled for searches
2779 	 where every query does not generate a score */
2780       rbuf_dp->stats_idx = nstats;
2781       nstats++;
2782     }
2783 
2784     if (stats_done && histp) {
2785       addhistz(find_z(i_score, e_score, rbuf_dp->seq->n1, (double) rbuf_rp->rst.comp,
2786 		      *pstat_voidp), histp);
2787     }
2788 
2789     if (rbuf_dp->stats_idx < 0) {
2790       continue;
2791     }
2792 
2793     sstats++;
2794     stats[rbuf_dp->stats_idx].n1 = rbuf_dp->seq->n1;
2795     stats[rbuf_dp->stats_idx].comp = rbuf_rp->rst.comp;
2796     stats[rbuf_dp->stats_idx].H = rbuf_rp->rst.H;
2797     stats[rbuf_dp->stats_idx].escore  = e_score;
2798     stats[rbuf_dp->stats_idx].score = i_score;
2799   }
2800 
2801 
2802   /* fill the qstats[] array if m_msp->qshuffle */
2803   if (m_msp->qshuffle && nqstats < m_msp->shuff_max) {
2804     lib_buf2_dp = lib_bhead_p->buf2_data;
2805     lib_buf2_rp = lib_bhead_p->buf2_res;
2806     buf2_cnt = lib_bhead_p->hdr.buf2_cnt;
2807     while (buf2_cnt--) {
2808       rbuf_rp = lib_buf2_rp++;	/* step through the results buffer */
2809       rbuf_dp = lib_buf2_dp++;
2810 
2811       if (rbuf_rp->is_valid_stat && rbuf_rp->qr_score > 0
2812 	  && nqstats < m_msp->shuff_max) {
2813 	qstats[nqstats].n1 = rbuf_dp->seq->n1;	/* save the best score */
2814 	qstats[nqstats].comp =  rbuf_rp->rst.comp;
2815 	qstats[nqstats].H = rbuf_rp->rst.H;
2816 	qstats[nqstats].escore = rbuf_rp->qr_escore;
2817 	qstats[nqstats++].score = rbuf_rp->qr_score;
2818       }
2819     }	/* m_msp->qshuffle */
2820   }
2821 
2822   /* check if we have enough data to do stats */
2823   if (!stats_done && nstats >= MAX_STATS) {
2824     zsflag_save = ppst->zsflag;
2825     if (ppst->zsflag > 20) {
2826       ppst->zsflag -= 20;
2827     }
2828     ppst->zsflag_f = process_hist(stats,nstats,m_msp, ppst,
2829 				  histp, pstat_voidp,s_info, 0);
2830     ppst->zsflag = zsflag_save;
2831     kstats = nstats;
2832     stats_done = 1;
2833     for (i=0; i< nstats; i++) {
2834       bestp_arr[i]->zscore =
2835 	find_z(bestp_arr[i]->rst.score[ppst->score_ix],
2836 	       bestp_arr[i]->rst.escore, bestp_arr[i]->seq->n1,
2837 	       bestp_arr[i]->rst.comp, *pstat_voidp);
2838     }
2839   }
2840 
2841 }
2842 
2843 void
save_shuf(struct buf_head * lib_bhead_p,int nitt1,int shuff_max,int sc_ix,struct score_count_s * s_info)2844 save_shuf(struct buf_head *lib_bhead_p, int nitt1, int shuff_max, int sc_ix,
2845 	  struct score_count_s *s_info)
2846 {
2847   struct buf2_data_s *rbuf_dp, *lib_buf2_dp;
2848   struct buf2_res_s *rbuf_rp, *lib_buf2_rp;
2849   int t_valid_stat;
2850   int t_rbest;
2851   double t_rescore;
2852   int buf2_cnt, jstats;
2853   static int kstats=0;
2854 
2855 
2856   lib_buf2_dp = lib_bhead_p->buf2_data;
2857   lib_buf2_rp = lib_bhead_p->buf2_res;
2858   buf2_cnt = lib_bhead_p->hdr.buf2_cnt;
2859 
2860   s_info->s_cnt[0] += lib_bhead_p->s_cnt_info.s_cnt[0];
2861   s_info->s_cnt[1] += lib_bhead_p->s_cnt_info.s_cnt[1];
2862   s_info->s_cnt[2] += lib_bhead_p->s_cnt_info.s_cnt[2];
2863 
2864   s_info->tot_scores += lib_bhead_p->s_cnt_info.tot_scores;
2865   /* this is done because we are not using r_rst->valid_stat to limit selection of scores */
2866   /*   s_info->s_cnt[sc_ix] = s_info->tot_scores; */
2867 
2868   t_rbest = -BIGNUM;
2869   t_valid_stat = 0;
2870 
2871   while (buf2_cnt--) { /* count down the number of results */
2872     rbuf_dp = lib_buf2_dp++;	/* step through the results buffer */
2873     rbuf_rp = lib_buf2_rp++;	/* step through the results buffer */
2874 
2875     /* perhaps should use explicit flag to indicate no score */
2876     if (rbuf_rp->r_rst.score[0] == -BIGNUM) continue;
2877 
2878     if (rbuf_rp->r_rst.score[sc_ix] > t_rbest) {
2879       t_rbest = rbuf_rp->r_rst.score[sc_ix];
2880       t_rescore = rbuf_rp->r_rst.escore;
2881     }
2882 
2883     if (rbuf_rp->r_rst.valid_stat > t_valid_stat) {
2884       t_valid_stat = 1;
2885     }
2886 
2887     /* statistics done for best score of set */
2888     /* currently no check for rst->valid_stat, which causes
2889        over-estimates of shuffles */
2890 
2891     if (rbuf_dp->frame == nitt1) {
2892       if (t_valid_stat) {
2893 	if (nrstats < shuff_max ) { kstats = jstats = nrstats++; }
2894 	else {	/* randomly replace */
2895 	  jstats = my_nrand(++kstats,rand_state);
2896 	  if (jstats >= shuff_max) goto done;
2897 	}
2898 
2899 	rstats[jstats].n1 = rbuf_dp->seq->n1;
2900 	rstats[jstats].comp = rbuf_rp->r_rst.comp;
2901 	rstats[jstats].H = rbuf_rp->r_rst.H;
2902 	rstats[jstats].escore  = t_rescore;
2903 	rstats[jstats].score = t_rbest;
2904       done:
2905 	t_rbest = -BIGNUM;
2906       }
2907     }
2908   }
2909 }
2910 
2911 int
save_align(struct buf_head * lib_bhead_p,struct beststr ** bestp_arr)2912 save_align(struct buf_head *lib_bhead_p, struct beststr **bestp_arr)
2913 {
2914   struct buf2_ares_s *rbuf_ap, *lib_buf2_ap;
2915   int buf2_cnt;
2916 
2917   if (!lib_bhead_p->hdr.have_results || lib_bhead_p->hdr.buf2_cnt <= 0) return 0;
2918 
2919   lib_buf2_ap = lib_bhead_p->buf2_ares;
2920   buf2_cnt = lib_bhead_p->hdr.buf2_cnt;
2921 
2922   while (buf2_cnt-- > 0) { /* count down the number of results */
2923     rbuf_ap = lib_buf2_ap++;	/* step through the results buffer */
2924     if (bestp_arr[rbuf_ap->best_idx]->a_res == NULL) {
2925       bestp_arr[rbuf_ap->best_idx]->have_ares = rbuf_ap->have_ares;
2926       bestp_arr[rbuf_ap->best_idx]->a_res = rbuf_ap->a_res;
2927     }
2928 #ifdef DEBUG
2929     else {
2930       fprintf(stderr,"*** error [%s:%d] - attempt to re-save a_res for [%d]: %s\n",
2931 	      __FILE__, __LINE__, rbuf_ap->best_idx, bestp_arr[rbuf_ap->best_idx]->mseq->bline);
2932     }
2933 #endif
2934   }
2935 
2936   buf2_cnt = lib_bhead_p->hdr.buf2_cnt;
2937   lib_bhead_p->hdr.have_results = 0;
2938   lib_bhead_p->hdr.buf2_cnt = 0;
2939   return buf2_cnt;
2940 }
2941 
2942 /* buf_do_work fills in the lib_bhead_p->buf2_res[] array with the
2943    do_work() results,
2944 
2945    inputs:  **aa0, n0 (query)
2946             lib_bhead_p->buf2_data lib_bhead_p->hdr.buf2_cnt library sequences
2947 	    max_frame (used to set statistics info)
2948 	    ppst,
2949 	    void *f_struct prepared by init_work()
2950 
2951    results: lib_bhead_p->buf2_res[]
2952 
2953             included in buf2_res[] is use_stat, which captures the
2954             logic required to decide whether a value should be saved
2955             in the stats[] buffer.  This complexity mostly arises
2956             because there can be more scores than sequences, but there
2957             can only on statistics score per sequence (the best score).
2958 */
2959 void
buf_do_work(unsigned char ** aa0,int n0,struct buf_head * lib_bhead_p,int max_frame,struct pstruct * ppst,void ** f_str)2960 buf_do_work(unsigned char **aa0,  int n0,
2961 	    struct buf_head *lib_bhead_p,
2962 	    int max_frame,
2963 	    struct pstruct *ppst, void **f_str) {
2964 
2965   int buf2_cnt;
2966   unsigned long atmp;
2967   struct buf2_data_s *lib_buf2_dp;
2968   struct buf2_res_s *lib_buf2_rp, *t_best_rp;
2969   int t_best, sc_ix;
2970   double t_escore;
2971 
2972   sc_ix = ppst->score_ix;
2973 
2974   lib_bhead_p->s_cnt_info.s_cnt[0] = lib_bhead_p->s_cnt_info.s_cnt[1] =
2975     lib_bhead_p->s_cnt_info.s_cnt[2] = lib_bhead_p->s_cnt_info.tot_scores = 0;
2976 
2977   buf2_cnt = lib_bhead_p->hdr.buf2_cnt;
2978   lib_buf2_dp = lib_bhead_p->buf2_data;
2979   lib_buf2_rp = lib_bhead_p->buf2_res;
2980 
2981   t_best_rp = NULL;
2982   t_best = -BIGNUM;
2983   t_escore = 1000.0;
2984 
2985   while (buf2_cnt-- > 0) {
2986 
2987     lib_buf2_rp->rst.score[0] =
2988       lib_buf2_rp->rst.score[1] =
2989       lib_buf2_rp->rst.score[2] = -BIGNUM;
2990 
2991     lib_buf2_rp->is_valid_stat = 0;
2992 
2993     if (lib_buf2_dp->seq->n1 < ppst->n1_low ||
2994 	lib_buf2_dp->seq->n1 > ppst->n1_high ) {
2995       /* tells save_best() there is no stats score here -- not
2996 	 necessary as -BIGNUM indicates no score */
2997       lib_buf2_dp->stats_idx = -1;
2998       goto next_seq;
2999     }
3000 
3001 #ifdef DEBUG
3002     if (check_seq_range(lib_buf2_dp->seq->aa1b, lib_buf2_dp->seq->n1,
3003 			ppst->nsqx, "buf_do_work()")) {
3004       fprintf(stderr, "*** error [%s:%d] - [%s/buf_do_work] range error at: %d/%d (n1:%d)\n",
3005 	      __FILE__, __LINE__,
3006 	      prog_func,lib_bhead_p->hdr.buf2_cnt - (buf2_cnt+1),
3007 	      lib_bhead_p->hdr.buf2_cnt, lib_buf2_dp->seq->n1);
3008       goto next_seq;
3009     };
3010 
3011     /* also check for adler32_crc match */
3012     if (lib_buf2_dp->seq->adler32_crc != (atmp=adler32(1L,lib_buf2_dp->seq->aa1b, lib_buf2_dp->seq->n1))) {
3013       fprintf(stderr, "*** error [%s:%d] - [%s/buf_do_work] CRC error [%lu!=%lu] at: %d/%d (n1:%d/l_offset:%ld)\n",
3014 	      __FILE__, __LINE__,
3015 	      prog_func,lib_buf2_dp->seq->adler32_crc, atmp,
3016 	      lib_bhead_p->hdr.buf2_cnt - (buf2_cnt+1),
3017 	      lib_bhead_p->hdr.buf2_cnt,lib_buf2_dp->seq->n1,
3018 	      lib_buf2_dp->seq->l_offset);
3019       goto next_seq;
3020     }
3021 #endif
3022 
3023     do_work (aa0[lib_buf2_dp->frame], n0,
3024 	     lib_buf2_dp->seq->aa1b, lib_buf2_dp->seq->n1,
3025 	     lib_buf2_dp->frame, ppst, f_str[lib_buf2_dp->frame], 0, 0,
3026 	     &(lib_buf2_rp->rst), &(lib_bhead_p->s_cnt_info));
3027 
3028     if (lib_buf2_rp->rst.valid_stat) {
3029       if (lib_buf2_rp->rst.escore < t_escore) {
3030 	t_escore = lib_buf2_rp->rst.escore;
3031 	t_best_rp = lib_buf2_rp;
3032       }
3033       if (lib_buf2_rp->rst.score[sc_ix] > t_best) {
3034 	t_best = lib_buf2_rp->rst.score[sc_ix];
3035 	t_best_rp = lib_buf2_rp;
3036       }
3037     }
3038 
3039     if (lib_buf2_dp->frame == max_frame) {
3040       if (t_best_rp!=NULL) {
3041 	t_best_rp->is_valid_stat = 1;
3042 	t_best_rp = NULL;
3043       }
3044       t_best = -BIGNUM;
3045       t_escore = 1000.0;
3046     }
3047 
3048   next_seq:
3049     lib_buf2_dp++;
3050     lib_buf2_rp++;
3051   }
3052 
3053   /* place to produce z_scores */
3054   /* place to produce sorted array */
3055 
3056   lib_bhead_p->hdr.have_results = 1;
3057 }
3058 
3059 void
buf_do_align(unsigned char ** aa0,int n0,struct buf_head * lib_bhead_p,struct pstruct * ppst,const struct mngmsg * m_msp,void ** f_str)3060 buf_do_align(unsigned char **aa0,  int n0,
3061 	     struct buf_head *lib_bhead_p,
3062 	     struct pstruct *ppst, const struct mngmsg *m_msp,
3063 	     void **f_str) {
3064 
3065   int buf2_cnt, i, nsq;
3066   struct buf2_data_s *lib_buf2_dp;
3067   struct buf2_res_s *lib_buf2_rp;
3068   struct buf2_ares_s *lib_buf2_ap;
3069   struct rstruct rst;
3070 
3071   buf2_cnt = lib_bhead_p->hdr.buf2_cnt;
3072   lib_buf2_dp = lib_bhead_p->buf2_data;
3073   lib_buf2_rp = lib_bhead_p->buf2_res;
3074   lib_buf2_ap = lib_bhead_p->buf2_ares;
3075 
3076   while (buf2_cnt-- > 0) {
3077     if ( m_msp->stages > 1) {
3078       /* this is not typically done unless m_msp->stages > 1 */
3079       do_opt (aa0[lib_buf2_dp->frame], n0, lib_buf2_dp->seq->aa1b, lib_buf2_dp->seq->n1,
3080 	      lib_buf2_dp->frame, ppst, f_str[lib_buf2_dp->frame], &rst);
3081       lib_buf2_rp->rst.score[2]=rst.score[2];
3082     }
3083 
3084 #ifdef DEBUG
3085     if (lib_buf2_dp->seq->aa1b == NULL) {
3086       fprintf(stderr,"*** error [%s:%d] - [buf_do_align] null aa1b\n",__FILE__, __LINE__);
3087       lib_buf2_ap->a_res = NULL;
3088       break;
3089     }
3090     if (check_seq_range(lib_buf2_dp->seq->aa1b, lib_buf2_dp->seq->n1,
3091 			ppst->nsqx, "buf_do_align()")) {
3092       fprintf(stderr, "*** error [%s:%d] - [%s/buf_do_align] range error at: %d/%d (n1:%d)\n",
3093 	      __FILE__, __LINE__,
3094 	      prog_func,lib_bhead_p->hdr.buf2_cnt - (buf2_cnt+1),
3095 	      lib_bhead_p->hdr.buf2_cnt, lib_buf2_dp->seq->n1);
3096     };
3097 
3098     /* also check for adler32_crc match */
3099     if (lib_buf2_dp->seq->adler32_crc != adler32(1L,lib_buf2_dp->seq->aa1b, lib_buf2_dp->seq->n1)) {
3100       fprintf(stderr, "*** error [%s:%d] - [%s/buf_do_align] CRC error at: %d/%d (n1:%d)\n",
3101 	      __FILE__, __LINE__,
3102 	      prog_func,lib_bhead_p->hdr.buf2_cnt - (buf2_cnt+1),
3103 	      lib_bhead_p->hdr.buf2_cnt, lib_buf2_dp->seq->n1);
3104     }
3105 #endif
3106 
3107     lib_buf2_ap->a_res = build_ares_code(aa0[lib_buf2_dp->frame], m_msp->n0,
3108 					 lib_buf2_dp->seq->aa1b, lib_buf2_dp->seq,
3109 					 lib_buf2_dp->frame, &lib_buf2_ap->have_ares,
3110 					 lib_buf2_dp->repeat_thresh, m_msp, ppst, f_str[lib_buf2_dp->frame] );
3111 
3112     lib_buf2_dp++;
3113     lib_buf2_ap++;
3114     lib_buf2_rp++;
3115   }
3116   lib_bhead_p->hdr.have_results = 1;
3117 }
3118 
3119 void
buf_qshuf_work(unsigned char * aa0s,int n0,struct buf_head * lib_bhead_p,int max_frame,struct pstruct * ppst,void * qf_str,int ix_score)3120 buf_qshuf_work(unsigned char *aa0s,  int n0,
3121 	       struct buf_head *lib_bhead_p,
3122 	       int max_frame,
3123 	       struct pstruct *ppst, void *qf_str,
3124 	       int ix_score)
3125 {
3126   int buf2_cnt;
3127   struct buf2_data_s *lib_buf2_dp;
3128   struct buf2_res_s *lib_buf2_rp, *tq_best_rp;
3129   struct rstruct rrst;
3130   struct score_count_s q_scnt_info;
3131   int tq_best;
3132   double tq_escore;
3133 
3134   buf2_cnt = lib_bhead_p->hdr.buf2_cnt;
3135   lib_buf2_dp = lib_bhead_p->buf2_data;
3136   lib_buf2_rp = lib_bhead_p->buf2_res;
3137 
3138   tq_best_rp = NULL;
3139   tq_best = -BIGNUM;
3140   tq_escore = 1000.0;
3141 
3142   while (buf2_cnt-- > 0) {
3143     rrst.score[0] = rrst.score[1] = rrst.score[2] = -BIGNUM;
3144     rrst.valid_stat = 0;
3145 
3146     if (lib_buf2_dp->seq->n1 < ppst->n1_low ||
3147 	lib_buf2_dp->seq->n1 > ppst->n1_high ) {
3148       lib_buf2_dp++;
3149       lib_buf2_rp++;
3150       tq_best_rp = NULL;
3151       tq_best = -BIGNUM;
3152       tq_escore = 1000.0;
3153       continue;
3154     }
3155 
3156     do_work (aa0s, n0,
3157 	     lib_buf2_dp->seq->aa1b, lib_buf2_dp->seq->n1,
3158 	     lib_buf2_dp->frame, ppst, qf_str, 1, 0,
3159 	     &rrst, &q_scnt_info);
3160 
3161     /* buf_qshuf_work() is always called after buf_do_work(), which
3162        sets rp->is_valid_stat */
3163     if (lib_buf2_rp->is_valid_stat) {
3164       tq_best_rp = lib_buf2_rp;
3165     }
3166 
3167     if (rrst.escore < tq_escore) {
3168       tq_escore = rrst.escore;
3169     }
3170     if (rrst.score[ix_score] > tq_best) {
3171       tq_best = rrst.score[ix_score];
3172     }
3173 
3174     if (lib_buf2_dp->frame == max_frame) {
3175       if (tq_best_rp!=NULL) {
3176 	tq_best_rp->qr_score = tq_best;
3177 	tq_best_rp->qr_escore = tq_escore;
3178 	tq_best_rp = NULL;
3179       }
3180 #ifdef DEBUG
3181       else {
3182 	fprintf(stderr,"*** error [%s:%d] - tq_best_rp NULL at: %ld\n",
3183 		__FILE__, __LINE__, lib_buf2_rp - lib_bhead_p->buf2_res);
3184       }
3185 #endif
3186       tq_best = -BIGNUM;
3187       tq_escore = 1000.0;
3188     }
3189     lib_buf2_dp++;
3190     lib_buf2_rp++;
3191   }
3192 }
3193 
3194 void
buf_shuf_work(unsigned char ** aa0,int n0,unsigned char * aa1s,struct buf_head * lib_bhead_p,int max_frame,struct pstruct * ppst,void ** f_str,int ix_score,void * rand_state)3195 buf_shuf_work(unsigned char **aa0,  int n0, unsigned char *aa1s, struct buf_head *lib_bhead_p,
3196 	      int max_frame, struct pstruct *ppst, void **f_str,
3197 	      int ix_score, void *rand_state)
3198 {
3199   int buf2_cnt;
3200   int shuff_cnt;
3201   struct buf2_data_s *lib_buf2_dp;
3202   struct buf2_res_s *lib_buf2_rp, *tr_best_rp;
3203   int tr_best, sc_ix;
3204   double tr_escore;
3205 
3206   sc_ix = ppst->score_ix;
3207 
3208   lib_bhead_p->s_cnt_info.s_cnt[0] = lib_bhead_p->s_cnt_info.s_cnt[1] =
3209     lib_bhead_p->s_cnt_info.s_cnt[2] = lib_bhead_p->s_cnt_info.tot_scores = 0;
3210 
3211   shuff_cnt = 0;
3212   buf2_cnt = lib_bhead_p->hdr.buf2_cnt;
3213   lib_buf2_dp = lib_bhead_p->buf2_data;
3214   lib_buf2_rp = lib_bhead_p->buf2_res;
3215 
3216   tr_best_rp = NULL;
3217   tr_best = -BIGNUM;
3218   tr_escore = 1000.0;
3219 
3220   while (buf2_cnt-- > 0) {
3221     lib_buf2_rp->r_rst.score[0] = lib_buf2_rp->r_rst.score[1] =
3222       lib_buf2_rp->r_rst.score[2] = -BIGNUM;
3223     lib_buf2_rp->r_rst.valid_stat = lib_buf2_rp->is_valid_stat = 0;
3224 
3225     if ((lib_buf2_dp->stats_idx < 0) || lib_buf2_dp->seq->n1 < ppst->n1_low ||
3226 	lib_buf2_dp->seq->n1 > ppst->n1_high ) {
3227       lib_buf2_dp++;
3228       lib_buf2_rp++;
3229       tr_best_rp = NULL;
3230       tr_best = -BIGNUM;
3231       tr_escore = 1000.0;
3232       continue;
3233     }
3234 
3235     shuff_cnt++;
3236     if (ppst->zs_win > 0) {
3237       wshuffle(lib_buf2_dp->seq->aa1b,aa1s,lib_buf2_dp->seq->n1,ppst->zs_win, rand_state);
3238     }
3239     else {
3240       if (ppst->shuffle_dna3) {shuffle3(lib_buf2_dp->seq->aa1b,aa1s,lib_buf2_dp->seq->n1, rand_state);}
3241       else {shuffle(lib_buf2_dp->seq->aa1b,aa1s,lib_buf2_dp->seq->n1, rand_state);}
3242     }
3243 
3244     /* rshuffle(lib_buf2_dp->seq->aa1b,aa1s,lib_buf2_dp->seq->n1); */
3245 
3246 #ifdef DEBUG
3247     if (check_seq_range(aa1s, lib_buf2_dp->seq->n1,
3248 			ppst->nsqx, "buf_do_align()")) {
3249       fprintf(stderr, "*** error [%s:%d] - [%s/buf_do_shuff] range error at: %d/%d (n1:%d)\n",
3250 	      __FILE__, __LINE__,
3251 	      prog_func,lib_bhead_p->hdr.buf2_cnt - (buf2_cnt+1),
3252 	      lib_bhead_p->hdr.buf2_cnt, lib_buf2_dp->seq->n1);
3253     };
3254 #endif
3255 
3256     do_work (aa0[lib_buf2_dp->frame], n0,
3257 	     aa1s, lib_buf2_dp->seq->n1,
3258 	     lib_buf2_dp->frame, ppst, f_str[lib_buf2_dp->frame], 0, 1,
3259 	     &lib_buf2_rp->r_rst, &(lib_bhead_p->s_cnt_info));
3260 
3261     if (lib_buf2_rp->r_rst.valid_stat) {
3262       if (lib_buf2_rp->r_rst.escore < tr_escore) {
3263 	tr_escore = lib_buf2_rp->r_rst.escore;
3264 	tr_best_rp = lib_buf2_rp;
3265       }
3266       if (lib_buf2_rp->r_rst.score[sc_ix] > tr_best) {
3267 	tr_best = lib_buf2_rp->r_rst.score[sc_ix];
3268 	tr_best_rp = lib_buf2_rp;
3269       }
3270     }
3271 
3272     if (lib_buf2_dp->frame == max_frame) {
3273       if (tr_best_rp!=NULL) {
3274 	tr_best_rp->is_valid_stat = 1;
3275 	tr_best_rp = NULL;
3276       }
3277       tr_best = -BIGNUM;
3278       tr_escore = 1000.0;
3279     }
3280 
3281     lib_buf2_dp++;
3282     lib_buf2_rp++;
3283   }
3284   lib_bhead_p->hdr.shuff_cnt = shuff_cnt;
3285   lib_bhead_p->hdr.have_results = 1;
3286 }
3287 
3288 /* buf_shuf_seq is designed to:
3289    (1) take a list of sequences (specified by bptr[])
3290    (2) collect them from the database if they are not already available
3291    (3) send them to the threads or shuffle them directly and calculate scores
3292 */
3293 
3294 void
buf_shuf_seq(unsigned char ** aa0,int n0,unsigned char ** aa1shuff_b,unsigned char * aa1save,int maxn,struct beststr ** bestp_arr,int nbest,struct pstruct * ppst,struct mngmsg * m_msp,struct mng_thr * m_bufi_p,void ** f_str,struct score_count_s * s_info)3295 buf_shuf_seq(unsigned char **aa0, int n0,
3296 	     unsigned char **aa1shuff_b, unsigned char *aa1save, int maxn,
3297 	     struct beststr **bestp_arr, int nbest,
3298 	     struct pstruct *ppst, struct mngmsg *m_msp,
3299 	     struct mng_thr *m_bufi_p
3300 #if !defined(COMP_THR) && !defined(PCOMPLIB)
3301 	     , void **f_str
3302 #endif
3303 	     , struct score_count_s *s_info)
3304 {
3305   unsigned char *aa1shuff;
3306   struct beststr *bbp, **tmp_bestp;
3307   char l_bline[MAX_SSTR];
3308   int n1lib_req, shuff_mult;
3309   long loffset, l_off;
3310   int n1, itt;
3311   int max_do_cnt, ndiff, prev_index;
3312   int istats;
3313   int i, j;
3314 
3315   /* these variables track buffers of library sequences */
3316   int cur_buf_size, max_buf_size;
3317   struct buf_head *lib_bhead_p;
3318   struct buf2_data_s *lib_buf2_dp;
3319   struct buf2_res_s *lib_buf2_rp;
3320 
3321 /* (1) get the sequences into a buffer - the sequence information is
3322    currently in the bestp_arr - find out how many we have, and how
3323    many we will need - the number to shuffle */
3324 
3325 /* figure out how much space we need, first checking whether we have
3326    dups */
3327   if ((tmp_bestp = (struct beststr **)calloc(nbest, sizeof(struct beststr *)))==NULL) {
3328     fprintf(stderr,"*** error [%s:%d] - %s/buf_shuf_seq() *** cannot allocate tmp_bestp[%d]\n",
3329 	    __FILE__, __LINE__, prog_name, nbest);
3330     exit(1);
3331   }
3332   for (i = 0; i < nbest; i++) {
3333     tmp_bestp[i] = bestp_arr[i];
3334   }
3335 
3336   /* sort tmp_bestp[] by sequence index, so duplicates are adjacent */
3337   sortbesti(tmp_bestp, nbest);
3338 
3339   /* count number of different sequence indices, get required space
3340      without dups */
3341   prev_index = -1;
3342   n1lib_req = ndiff = 0;
3343   for (i = 0; i < nbest; i++) {
3344     if (tmp_bestp[i]->seq->index > prev_index) {
3345       prev_index = tmp_bestp[i]->seq->index;
3346       n1lib_req += tmp_bestp[i]->n1+ 2;
3347       ndiff++;
3348     }
3349   }
3350 
3351 #if !defined(COMP_THR) && !defined(PCOMPLIB)
3352       if (n1lib_req >= maxn) { /* we need new space, aa1shuff is too small */
3353 	if ((*aa1shuff_b = aa1shuff =
3354 	     (unsigned char *)realloc(*aa1shuff_b, n1lib_req*sizeof(char)))==NULL) {
3355 	  fprintf(stderr,"*** error [%s:%d] - cannot realloc aa1shuff[%d]\n",
3356 		  __FILE__, __LINE__, n1lib_req);
3357 	  exit(1);
3358 	}
3359       }
3360       else { aa1shuff = *aa1shuff_b;}
3361       *aa1shuff = '\0';
3362       aa1shuff++;
3363 
3364 #else
3365       if (n1lib_req < 2) {
3366 	fprintf(stderr,"*** error [%s:%d] - [%s/buf_shuf_seq] no residues to shuffle: %d (%d)\n",
3367 		__FILE__, __LINE__,
3368 		prog_func,n1lib_req,ndiff);
3369 	exit(1);
3370       }
3371 
3372       if ((*aa1shuff_b = aa1shuff =
3373 	   (unsigned char *)calloc(n1lib_req,sizeof(char)))==NULL) {
3374 	fprintf(stderr,"*** error [%s:%d] - cannot calloc aa1shuff[%d]\n",
3375 		__FILE__, __LINE__, n1lib_req);
3376 	exit(1);
3377       }
3378       *aa1shuff = '\0';
3379       aa1shuff++;
3380 #endif
3381 
3382       shuff_mult = (m_msp->shuff_max+1)/ndiff;
3383       istats = 0;
3384 
3385       /* setup lib_bhead buffers for shuffle comparisons */
3386 #if defined(COMP_THR) || defined(PCOMPLIB)	/* threaded/parallel */
3387       /* max_do_cnt can be smaller than max_buf2_cnt, but not larger */
3388       max_do_cnt = min(m_bufi_p->max_buf2_res,
3389 		       m_msp->shuff_max / (2 * fa_max_workers));
3390       /* we don't have a left over one, so we need one */
3391       get_rbuf(&lib_bhead_p,m_bufi_p->max_work_buf);
3392 #else	/* not threaded */
3393       max_do_cnt = m_bufi_p->max_buf2_res;
3394       lib_bhead_p = lib_buf2_list;  /* equivalent to un-threaded get_rbuf() */
3395 #endif
3396       max_buf_size = n1lib_req;
3397       cur_buf_size = 0;
3398       lib_bhead_p->hdr.buf2_cnt = 0;
3399       lib_bhead_p->hdr.have_results = 0;
3400       lib_bhead_p->hdr.stop_work = 0;
3401       lib_bhead_p->hdr.buf2_type=BUF2_DOSHUF;
3402       lib_bhead_p->hdr.seq_record_continuous = 0;
3403       lib_buf2_dp = lib_bhead_p->buf2_data;
3404       lib_buf2_rp = lib_bhead_p->buf2_res;
3405 
3406       /* read sequences into shuffle buffer */
3407 
3408       for (i = 0; i < ndiff; i++) {
3409 	bbp = tmp_bestp[i];
3410 	if (bbp->seq->aa1b == NULL) {
3411 	  /* get the sequence */
3412 	  (bbp->mseq->m_file_p->ranlib)(l_bline, sizeof(l_bline),
3413 				       bbp->mseq->lseek,bbp->mseq->libstr,bbp->mseq->m_file_p);
3414 	  n1 = re_getlib(aa1save,NULL, maxn,m_msp->ldb_info.maxt3,
3415 			 m_msp->ldb_info.l_overlap,bbp->mseq->cont,m_msp->ldb_info.term_code,
3416 			 &loffset,&l_off,bbp->mseq->m_file_p);
3417 
3418 	  /* fprintf(stderr, " %d gets %d %d\n",i,tmp_bestp[i]->seq->n1,n1); */
3419 
3420 	  memcpy(aa1shuff, aa1save, n1+1);
3421 	  bbp->seq->aa1b = aa1shuff;
3422 	  aa1shuff += n1 + 1;
3423 	}
3424 
3425 	/* lib_buf2_dp is used up by scores, the sequence is not sent multiple times */
3426 	cur_buf_size += bbp->seq->n1+1;
3427 	for (j = 0; j < shuff_mult; j++ ) {
3428 	  for (itt = m_msp->revcomp; itt <= m_msp->nitt1; itt++) {
3429 #ifdef PCOMPLIB
3430 	    lib_buf2_dp->seq_dup = 0;	/* mark first ->seq as original, not duplicate */
3431 #endif
3432 	    lib_buf2_dp->seq = bbp->seq;
3433 	    /* this invalidates lib_buf2_p->seq */
3434 	    lib_buf2_dp->stats_idx = istats++;
3435 	    lib_buf2_dp->frame = itt;
3436 	    lib_buf2_dp++;		/* point to next buf2 */
3437 	    lib_buf2_rp++;		/* point to next buf2 */
3438 	    lib_bhead_p->hdr.buf2_cnt++;
3439 
3440 	    if (lib_bhead_p->hdr.buf2_cnt >= max_do_cnt ||
3441 		cur_buf_size >= max_buf_size) {
3442 /* (2) send sequences for shuffling */
3443 #if defined(COMP_THR) || defined(PCOMPLIB)	/* threaded - fill and empty buffers */
3444 	      /* provide empty buffer to workers */
3445 	      lib_bhead_p->hdr.aa1b_used = cur_buf_size;
3446 	      lib_bhead_p->hdr.have_data = 1;
3447 	      lib_bhead_p->hdr.seq_record_continuous = 0;
3448 	      put_rbuf(lib_bhead_p,m_bufi_p->max_work_buf);
3449 	      get_rbuf(&lib_bhead_p,m_bufi_p->max_work_buf);
3450 #else		/* non-thread - just do the searches */
3451 	      if (lib_bhead_p->hdr.buf2_type & BUF2_DOSHUF) {
3452 		buf_shuf_work(aa0,m_msp->n0, aa1save, lib_bhead_p,
3453 			      m_msp->nitt1, ppst, f_str, ppst->score_ix, rand_state);
3454 	      }
3455 #endif
3456 /* (3) save results in the rstats structure */
3457 	      if (lib_bhead_p->hdr.buf2_cnt > 0 && lib_bhead_p->hdr.have_results) {
3458 		save_shuf(lib_bhead_p,m_msp->nitt1,m_msp->shuff_max,ppst->score_ix,s_info);
3459 	      }
3460 
3461 	      lib_bhead_p->s_cnt_info.s_cnt[0] = lib_bhead_p->s_cnt_info.s_cnt[1] =
3462 		lib_bhead_p->s_cnt_info.s_cnt[2] = lib_bhead_p->s_cnt_info.tot_scores = 0;
3463 
3464 	      lib_bhead_p->hdr.buf2_cnt = 0;
3465 	      cur_buf_size = 0;
3466 	      lib_bhead_p->hdr.have_results = 0;
3467 	      lib_bhead_p->hdr.buf2_type=BUF2_DOSHUF;
3468 	      lib_bhead_p->hdr.seq_record_continuous = 0; /* seq_records are coming from bestptr in any order */
3469 	      lib_bhead_p->hdr.stop_work = 0;
3470 	      lib_buf2_dp = lib_bhead_p->buf2_data;
3471 	    }
3472 	  } /* for (itt .. */
3473 	}
3474       }			/* done with tmp_bestp[] */
3475       free(tmp_bestp);
3476 
3477 #if defined(COMP_THR) || defined(PCOMPLIB)	/* if COMP_THR/PCOMPLIB - fill and empty buffers */
3478       /* check last buffers for any results */
3479       lib_bhead_p->hdr.seq_record_continuous = 0;
3480       put_rbuf(lib_bhead_p,m_bufi_p->max_work_buf);
3481 
3482       /* wait for the threads to finish */
3483 
3484       wait_rbuf(m_bufi_p->max_work_buf);
3485       /*
3486       fprintf(stderr, " num_reader[%d]-empty[%d]: %d\tnrstats: %d\n",
3487 	      num_reader_bufs,empty_reader_bufs,
3488 	      num_reader_bufs-empty_reader_bufs, nrstats);
3489       */
3490 
3491       for (i=0; i < num_reader_bufs; i++) {
3492 	if (RESULTS_BUF[i]->hdr.buf2_cnt > 0 && RESULTS_BUF[i]->hdr.have_results) {
3493 	  save_shuf(RESULTS_BUF[i],m_msp->nitt1, m_msp->shuff_max, ppst->score_ix, s_info);
3494 	  RESULTS_BUF[i]->hdr.buf2_cnt = RESULTS_BUF[i]->hdr.have_results = 0;
3495 	}
3496       }
3497 #else	/* just do the searches */
3498       /* aa1save is used for shuffles, not aa1shuf, because aa1shuf
3499 	 has library sequences */
3500       buf_shuf_work(aa0,m_msp->n0, aa1save, lib_bhead_p,
3501 		    m_msp->nitt1, ppst, f_str, ppst->score_ix, rand_state);
3502 
3503       save_shuf(lib_bhead_p,m_msp->nitt1,m_msp->shuff_max, ppst->score_ix, s_info);
3504       lib_bhead_p->hdr.buf2_cnt = lib_bhead_p->hdr.have_results = 0;
3505 #endif
3506 }
3507 
3508 /* buf_align_seq is structurally almost identical to buf_shuf_seq,
3509    except that the appropriate sequences are pre-loaded into bbp->seq
3510    (and ->bline), and it gets bbp->a_res, rather than scores */
3511 
3512 void
buf_align_seq(unsigned char ** aa0,int n0,struct beststr ** bestp_arr,int nbest,struct pstruct * ppst,struct mngmsg * m_msp,struct mng_thr * m_bufi_p,void ** f_str)3513 buf_align_seq(unsigned char **aa0, int n0,
3514 	      struct beststr **bestp_arr, int nbest,
3515 	      struct pstruct *ppst, struct mngmsg *m_msp,
3516 	      struct mng_thr *m_bufi_p
3517 #if !defined(COMP_THR) && !defined(PCOMPLIB)
3518 	      , void **f_str
3519 #endif
3520 	     )
3521 {
3522   struct beststr *bbp;
3523   int max_align_cnt;
3524   int i, n_pre_align;
3525   int cur_buf_size, max_buf_size;
3526   struct buf_head *lib_bhead_p;
3527   struct buf2_data_s *lib_buf2_dp;
3528   struct buf2_ares_s *lib_buf2_ap;
3529 
3530   /* setup lib_bhead buffers for alignments */
3531 #if defined(COMP_THR) || defined(PCOMPLIB)	/* threaded */
3532   /* max_do_cnt can be smaller than max_buf2_res, but not larger */
3533 #ifdef COMP_THR
3534   max_align_cnt = min(m_bufi_p->max_buf2_res,
3535 		      nbest / (4 * fa_max_workers));
3536 #else
3537   max_align_cnt = min(m_bufi_p->max_buf2_res, nbest / fa_max_workers);
3538 #endif
3539   if (max_align_cnt < 1) max_align_cnt = 1;
3540 
3541   get_rbuf(&lib_bhead_p,m_bufi_p->max_work_buf);
3542 #else	/* not threaded */
3543   max_align_cnt = m_bufi_p->max_buf2_res;
3544   lib_bhead_p = lib_buf2_list;  /* equivalent to un-threaded get_rbuf() */
3545 #endif
3546 
3547   max_buf_size = lib_bhead_p->hdr.aa1b_size;
3548   lib_bhead_p->hdr.buf2_cnt = 0;
3549   lib_bhead_p->hdr.have_results = 0;
3550   lib_bhead_p->hdr.stop_work = 0;
3551   lib_bhead_p->hdr.buf2_type=BUF2_DOALIGN;
3552   lib_buf2_dp = lib_bhead_p->buf2_data;
3553   lib_buf2_ap = lib_bhead_p->buf2_ares;
3554 
3555   /* read sequences into align buffer */
3556 
3557   n_pre_align = 0;
3558   cur_buf_size = 0;
3559   for (i = 0; i < nbest; i++) {
3560     bbp = bestp_arr[i];
3561 
3562     /* this invalidates lib_buf2_p->seq */
3563     lib_buf2_dp->seq = bbp->seq;
3564     cur_buf_size += bbp->seq->n1+1;
3565     lib_buf2_dp->frame = bbp->frame;
3566     lib_buf2_dp->repeat_thresh = bbp->repeat_thresh;
3567 #ifdef PCOMPLIB
3568     lib_buf2_dp->seq_dup = 0;
3569 #endif
3570     lib_buf2_ap->have_ares = 0;
3571     lib_buf2_ap->a_res = NULL;
3572     lib_buf2_ap->best_idx = i;
3573     lib_buf2_dp++;		/* point to next buf2_data */
3574     lib_buf2_ap++;		/* point to next buf2_ares */
3575     lib_bhead_p->hdr.buf2_cnt++;
3576 
3577     if (lib_bhead_p->hdr.buf2_cnt >= max_align_cnt ||
3578 	cur_buf_size >= max_buf_size - m_msp->ldb_info.maxn) {
3579 /* (2) send sequences for alignment */
3580 #if defined(COMP_THR) || defined(PCOMPLIB)	/* threaded - fill and empty buffers */
3581       /* provide empty buffer to workers */
3582       lib_bhead_p->hdr.seqr_cnt = lib_bhead_p->hdr.buf2_cnt;	/* for alignments, they are the same */
3583       lib_bhead_p->hdr.have_data = 1;
3584       lib_bhead_p->hdr.aa1b_used = cur_buf_size;
3585       lib_bhead_p->hdr.seq_record_continuous = 0;
3586       put_rbuf(lib_bhead_p,m_bufi_p->max_work_buf);
3587       get_rbuf(&lib_bhead_p,m_bufi_p->max_work_buf);
3588 #else		/* non-thread - just do the searches */
3589       buf_do_align(aa0, m_msp->n0, lib_bhead_p, ppst, m_msp, f_str);
3590 #endif
3591 
3592 /* (3) save alignments */
3593       if (lib_bhead_p->hdr.buf2_cnt > 0 && lib_bhead_p->hdr.have_results) {
3594 	n_pre_align += save_align(lib_bhead_p,bestp_arr);
3595       }
3596 
3597       cur_buf_size = 0;
3598       max_buf_size = lib_bhead_p->hdr.aa1b_size;
3599       lib_bhead_p->hdr.buf2_cnt = 0;
3600       lib_bhead_p->hdr.have_results = 0;
3601       lib_bhead_p->hdr.buf2_type=BUF2_DOALIGN;
3602       lib_bhead_p->hdr.stop_work = 0;
3603       lib_buf2_dp = lib_bhead_p->buf2_data;
3604       lib_buf2_ap = lib_bhead_p->buf2_ares;
3605     }
3606   }			/* done with bestp_arr[] */
3607 
3608 #if defined(COMP_THR) || defined(PCOMPLIB)	/* if COMP_THR - fill and empty buffers */
3609   /* check last buffers for any results */
3610   lib_bhead_p->hdr.seqr_cnt = lib_bhead_p->hdr.buf2_cnt;	/* for alignments, they are the same */
3611   lib_bhead_p->hdr.have_data = 1;
3612   lib_bhead_p->hdr.aa1b_used = cur_buf_size;
3613   lib_bhead_p->hdr.seq_record_continuous = 0;
3614   put_rbuf(lib_bhead_p,m_bufi_p->max_work_buf);
3615 
3616   /* wait for the threads to finish */
3617 
3618   wait_rbuf(m_bufi_p->max_work_buf);
3619 
3620   for (i=0; i < num_reader_bufs; i++) {
3621     if (RESULTS_BUF[i]->hdr.buf2_cnt > 0 && RESULTS_BUF[i]->hdr.have_results) {
3622       n_pre_align += save_align(RESULTS_BUF[i],bestp_arr);
3623       RESULTS_BUF[i]->hdr.buf2_cnt = RESULTS_BUF[i]->hdr.have_results = 0;
3624     }
3625   }
3626 #else	/* just do the searches */
3627   buf_do_align(aa0, m_msp->n0, lib_bhead_p, ppst, m_msp, f_str);
3628   n_pre_align += save_align(lib_bhead_p,bestp_arr);
3629   lib_bhead_p->hdr.buf2_cnt = lib_bhead_p->hdr.have_results = 0;
3630 #endif
3631 
3632   m_msp->align_done = 1;
3633 
3634   if (n_pre_align != nbest) {
3635     fprintf(stderr,"*** error [%s:%d] -  n_pre_align:%d != nbest: %d\n",
3636 	    __FILE__, __LINE__, n_pre_align, nbest);
3637   }
3638   for (i=0; i < nbest; i++) {
3639     if (bestp_arr[i]->a_res == NULL) {
3640       fprintf(stderr, "*** error [%s:%d] - have NULL a_res: %d\n",
3641 	      __FILE__, __LINE__, i);
3642     }
3643   }
3644 }
3645 
3646 int
check_seq_range(unsigned char * aa1b,int n1,int nsq,char * str)3647 check_seq_range(unsigned char *aa1b, int n1, int nsq, char *str) {
3648   int i, range_error;
3649   unsigned char *aa1p;
3650 
3651   range_error = 0;
3652   for (aa1p = aa1b, i=0; i < n1; i++, aa1p++) {
3653     if (*aa1p > nsq) {
3654       range_error = 1;
3655       /*      fprintf(stderr, "%s seq %d (%c) out of range at %d\n",
3656 	      str, *aa1p, *aa1p,i);
3657       */
3658     }
3659   }
3660   return range_error;
3661 }
3662 
3663 struct stack_str {
3664   void **stack;
3665   int size;
3666   int inc;
3667   int top;
3668 };
3669 
init_stack(int size,int inc)3670 struct stack_str *init_stack(int size, int inc) {
3671   struct stack_str *stack;
3672 
3673   if ((stack=(struct stack_str *)calloc(1,sizeof(struct stack_str)))==NULL) {
3674     fprintf(stderr,"*** error [%s:%d] - cannot allocate stack\n",
3675 	    __FILE__, __LINE__);
3676     return NULL;
3677   }
3678 
3679   if ((stack->stack=(void *)calloc(size,sizeof(void *)))==NULL) {
3680     fprintf(stderr,"*** error [%s:%d] - cannot allocate stack->stack[%d]\n",
3681 	    __FILE__, __LINE__,size);
3682     free(stack);
3683     return NULL;
3684   }
3685 
3686   stack->size = size;
3687   stack->inc = inc;
3688   stack->top = 0;
3689   return stack;
3690 }
3691 
push_stack(struct stack_str * stack,void * value)3692 void push_stack(struct stack_str *stack, void *value) {
3693 
3694   if (!stack) return;
3695   if (stack->top >= stack->size) {
3696     stack->size += stack->inc;
3697     if ((stack->stack = (void *)realloc(stack->stack, stack->size*sizeof(void *)))==NULL) {
3698       fprintf(stderr,"*** error [%s:%d] - cannot re-allocate stack to [%d]\n",
3699 	      __FILE__, __LINE__, stack->size);
3700       return;
3701     }
3702   }
3703   stack->stack[stack->top++] = value;
3704 }
3705 
pop_stack(struct stack_str * stack)3706 void * pop_stack(struct stack_str *stack) {
3707   if (stack == NULL) {
3708 #ifdef DEBUG
3709     fprintf(stderr," *** error [%s:%d] - pop_stack NULL stack\n",__FILE__, __LINE__);
3710 #endif
3711     return NULL;
3712   }
3713 
3714   if (stack->top-- > 0) {
3715     return stack->stack[stack->top];
3716   }
3717   else {
3718     stack->top = 0;
3719     return NULL;
3720   }
3721 }
3722 
free_stack(struct stack_str * stack)3723 void * free_stack(struct stack_str *stack) {
3724   if (stack==NULL) return NULL;
3725   if (stack->stack != NULL) free(stack->stack);
3726   free(stack);
3727   return NULL;
3728 }
3729 
3730 struct dyn_string_str *
init_dyn_string(int size,int inc)3731 init_dyn_string(int size, int inc) {
3732   struct dyn_string_str *dyn_string;
3733 
3734   if ((dyn_string=(struct dyn_string_str *)calloc(1,sizeof(struct dyn_string_str)))==NULL) {
3735     fprintf(stderr,"*** error [%s:%d] - cannot allocate dyn_string\n",
3736 	    __FILE__, __LINE__);
3737     return NULL;
3738   }
3739 
3740   if ((dyn_string->string=(void *)calloc(size,sizeof(void *)))==NULL) {
3741     fprintf(stderr,"*** error [%s:%d] - cannot allocate dyn_string->string[%d]\n",
3742 	    __FILE__, __LINE__,size);
3743     free(dyn_string);
3744     return NULL;
3745   }
3746 
3747   dyn_string->c_size = 0;
3748   dyn_string->inc = inc;
3749   dyn_string->mx_size = size;
3750   return dyn_string;
3751 }
3752 
3753 void
dyn_strcat(struct dyn_string_str * dyn_string,char * value)3754 dyn_strcat(struct dyn_string_str *dyn_string, char *value) {
3755   size_t add_len;
3756 
3757   add_len = strlen(value);
3758 
3759   if (!dyn_string) return;
3760   if (add_len + dyn_string->c_size + 1 >= dyn_string->mx_size) {
3761     while (dyn_string->inc < add_len) { dyn_string->inc *= 2; }
3762     dyn_string->mx_size += dyn_string->inc;
3763     if ((dyn_string->string = (void *)realloc(dyn_string->string, dyn_string->mx_size))==NULL) {
3764       fprintf(stderr,"*** error [%s:%d] - cannot re-allocate dyn_string to [%d]\n",
3765 	      __FILE__, __LINE__, dyn_string->mx_size);
3766       dyn_string->mx_size = 0;
3767       return;
3768     }
3769   }
3770   SAFE_STRNCAT(dyn_string->string,value,dyn_string->mx_size);
3771   dyn_string->c_size += add_len;
3772 }
3773 
dyn_strcpy(struct dyn_string_str * dyn_string,char * value)3774 void dyn_strcpy(struct dyn_string_str *dyn_string, char *value) {
3775   size_t add_len;
3776 
3777   add_len = strlen(value);
3778 
3779   if (!dyn_string) return;
3780   if (add_len + 1>= dyn_string->mx_size) {
3781     while (dyn_string->inc < add_len) { dyn_string->inc *= 2; }
3782     dyn_string->mx_size += dyn_string->inc;
3783     if ((dyn_string->string = (void *)realloc(dyn_string->string, dyn_string->mx_size))==NULL) {
3784       fprintf(stderr,"*** error [%s:%d] - cannot re-allocate dyn_string to [%d]\n",
3785 	      __FILE__, __LINE__, dyn_string->mx_size);
3786       dyn_string->mx_size = 0;
3787       return;
3788     }
3789   }
3790   SAFE_STRNCPY(dyn_string->string,value,dyn_string->mx_size);
3791 }
3792 
free_dyn_string(struct dyn_string_str * dyn_string)3793 void free_dyn_string(struct dyn_string_str *dyn_string) {
3794   if (dyn_string==NULL) return;
3795   if (dyn_string->string != NULL) free(dyn_string->string);
3796   free(dyn_string);
3797 }
3798 
3799 #include "a_mark.h"
3800 
3801 /* *itmp has the current alignment score, if *annot_arr[i_annot].label='V',
3802      this can be increased (total increase in *v_delta)
3803    *pam2aa0v[.value] gives possibly better pam score for variant
3804    *ip is position in annotated sequence (&i0 for annot0_p)
3805    *ia is position in aligned sequence (&i1 for annot0_p)
3806    sp1 is the array for the (possibly modified) displayed sequence
3807    sp1a is the array for the associated annotation
3808    sq maps encoded residues to displayed characters
3809    i_annot -- current annotation index in annot0_p->annot_arr_p[i_annot]
3810    annot_arr = annot0/1_p->annot_arr_p
3811    annot_stack = save current annotation
3812    *have_push_features = set for annotations pushed in stack (not 'V')
3813    *v_delta = change in score from variant at this position
3814    **region_p = set for '[' region start
3815    init_score -- used to initialize tmp_region_p->score.
3816 
3817 */
3818 
3819 int
next_annot_match(int * itmp,int * pam2aa0v,long ip,long ia,char * sp1,char * sp1a,const unsigned char * sq,int i_annot,int n_annot,struct annot_entry ** annot_arr,char ** ann_comment,void * annot_stack,int * have_push_features,int * v_delta,struct annot_entry ** region_p,struct annot_entry * tmp_region_p,int init_score)3820 next_annot_match(int *itmp, int *pam2aa0v, long ip, long ia, char *sp1, char *sp1a, const unsigned char *sq,
3821 		 int i_annot, int n_annot, struct annot_entry **annot_arr, char **ann_comment,
3822 		 void *annot_stack, int *have_push_features, int *v_delta,
3823 		 struct annot_entry **region_p, struct annot_entry *tmp_region_p,
3824 		 int init_score)  {
3825   int v_tmp;
3826 
3827   if (ann_comment) *ann_comment = NULL;
3828 
3829   /* count through the annotations at this position (long ip) */
3830 
3831   while (i_annot < n_annot && ip == annot_arr[i_annot]->pos) {
3832     if (annot_arr[i_annot]->label == 'V') { /* label == 'V' */
3833       v_tmp = pam2aa0v[annot_arr[i_annot]->value];
3834       if (v_tmp > *itmp) {
3835 	*v_delta += (v_tmp- *itmp);
3836 	*itmp = v_tmp;
3837 	*sp1 = sq[annot_arr[i_annot]->value];
3838 	if (sp1a) *sp1a = 'V';
3839 	if (ann_comment) *ann_comment = annot_arr[i_annot]->comment;
3840       }
3841     }
3842     else if (annot_arr[i_annot]->label == '[') {
3843       /* region_p needs to point to a more sophisticated data
3844 	 structure that keeps track of all the current regions being
3845 	 updated
3846 
3847 	 to start, region_p could include a linked list and a pointer to
3848 	 the current left-most region, which would be used for ']'
3849 	 detection
3850 
3851 	 for efficiency, update the ->score only when a new
3852 	 (overlapping) region is started or stopped
3853 
3854 	 same for n_indent, n_aln
3855       */
3856 
3857       if (region_p) {
3858 	memcpy(tmp_region_p, annot_arr[i_annot],sizeof(struct annot_entry));
3859         tmp_region_p->a_pos = ia;
3860 	tmp_region_p->score = init_score;
3861 	tmp_region_p->n_ident = tmp_region_p->n_aln = 0;
3862 	*region_p = tmp_region_p;
3863       }
3864     }
3865     else if (annot_arr[i_annot]->label == ']') {
3866       if (have_push_features) *have_push_features = 1;
3867       push_stack(annot_stack, annot_arr[i_annot]);
3868     }
3869     else if (annot_stack) {
3870       if (have_push_features) *have_push_features = 1;
3871       push_stack(annot_stack, annot_arr[i_annot]);
3872     }
3873     i_annot++;
3874   }  /* everything at this alignment position is checked */
3875   return i_annot;
3876 }
3877 
3878 /* returns M_NEG, M_ZERO, M_POS, M_IDENT, M_DEL (a_mark.h)
3879    updates *aln->nsim, npos, nident, nmismatch
3880 
3881 */
align_type(int score,char sp0,char sp1,int nt_align,struct a_struct * aln,int pam_x_id_sim)3882 int align_type(int score, char sp0, char sp1, int nt_align, struct a_struct *aln, int pam_x_id_sim) {
3883   int spa_val;
3884 
3885   if (score<0) {
3886     spa_val = M_NEG;
3887   }
3888   else if (score == 0) {
3889     spa_val = M_ZERO;
3890     if (aln) aln->nsim++;
3891   }
3892   else {
3893     spa_val = M_POS;
3894     if (aln) {aln->nsim++; aln->npos++;}
3895   }
3896 
3897   /* correct for score < 0 with 'N:N'/'X:X' */
3898   if (pam_x_id_sim > 0) {	/* > 0 -> identical, similar */
3899     if ((nt_align && toupper(sp0)=='N' && toupper(sp1)=='N') ||
3900 	(!nt_align && toupper(sp0)=='X' && toupper(sp1)=='X')) {
3901       spa_val = M_POS;
3902       if (aln) {
3903 	aln->nsim++;
3904       }
3905     }
3906   }
3907 
3908   if (aln) aln->nmismatch++;
3909   if (toupper(sp0) == toupper(sp1)) {
3910     spa_val = M_IDENT;
3911     if (aln) {
3912       aln->nident++;
3913       aln->nmismatch--;
3914     }
3915   }
3916   else if (nt_align) {
3917     if ((toupper(sp0) == 'T' && toupper(sp1) == 'U') ||
3918 	(toupper(sp0)=='U' && toupper(sp1)=='T')) {
3919       spa_val = M_IDENT;
3920       if (aln) {
3921 	aln->nident++;
3922 	aln->nmismatch--;
3923       }
3924     }
3925     /* add to gap count for 'N' matches ?? */
3926     else if (aln && toupper(sp0) == 'N') aln->ngap_q++;
3927     else if (aln && toupper(sp1) == 'N') aln->ngap_l++;
3928   }
3929 
3930   /* correct nident, nmismatch for N:N / X:X */
3931   if (pam_x_id_sim < 0) {	/* > 0 -> identical, similar */
3932     if ((nt_align && toupper(sp0)=='N' && toupper(sp1)=='N') ||
3933 	(!nt_align && toupper(sp0)=='X' && toupper(sp1)=='X')) {
3934       if (aln) {
3935 	aln->nident--;
3936 	aln->nmismatch++;
3937       }
3938     }
3939   }
3940 
3941   return spa_val;
3942 }
3943 
3944 /* seq_pos works with comment_var()/display_push_features()/do_url1() where
3945    i_offset = nn for reversed sequences
3946    off = 0 for 0 based offsets, 1 for 1-based offsets
3947  */
3948 int
seq_pos(int pos,int rev,int off)3949 seq_pos(int pos, int rev, int off) {
3950 
3951   if (rev) {
3952     return -pos-1 + off;
3953   }
3954   else {
3955     return pos;
3956   }
3957 }
3958 
3959 /* target = 0 (aa0), 1 (aa1)
3960 
3961    d_type = display_type (annot_fmt in cal_cons.c):
3962             1 (long text),   d1_fmt = " Variant: %d%c%c%d%c : %c%d%c";
3963             2 (-m 9c code)   sprintf(tmp_str, "|%c%c:%ld%c%c%ld%c",
3964 
3965    i0_pos/i1_pos have already been converted to reverse coordinate if necessary
3966 */
comment_var(long i0_pos,char sp0,long i1_pos,char sp1,char o_sp1,char sim_char,const char * ann_comment,struct dyn_string_str * annot_var_dyn,int target,int d_type)3967 void comment_var (long i0_pos, char sp0, long i1_pos, char sp1, char o_sp1,
3968 		  char sim_char, const char *ann_comment,
3969 		  struct dyn_string_str *annot_var_dyn, int target, int d_type)
3970 {
3971   char tmp_str[MAX_LSTR], tc, ann_ch0, ann_ch1;
3972   char *d1_fmt;
3973 
3974   if (d_type == 1) {
3975     if (target ==1) {
3976       d1_fmt = " Variant: %d%c%c%d%c : %c%d%c";
3977       sprintf(tmp_str,d1_fmt,
3978 	      i0_pos+1, sp0, sim_char, i1_pos+1,sp1, o_sp1,i1_pos+1,sp1);
3979     }
3980     else {
3981       d1_fmt = " qVariant: %d%c%c%d%c : %c%d%c";
3982       sprintf(tmp_str,d1_fmt,
3983 	      i0_pos+1, sp0, sim_char, i1_pos+1,sp1, o_sp1,i1_pos+1,sp0);
3984     }
3985 
3986     /* SAFE_STRNCAT(annot_var_s,tmp_str,n_annot_var_s); */
3987     dyn_strcat(annot_var_dyn, tmp_str);
3988 
3989     if (ann_comment) {
3990       sprintf(tmp_str," : %s",ann_comment);
3991       /* SAFE_STRNCAT(annot_var_s,tmp_str,n_annot_var_s); */
3992       dyn_strcat(annot_var_dyn, tmp_str);
3993     }
3994 
3995     /* SAFE_STRNCAT(annot_var_s,"\n",n_annot_var_s); */
3996     dyn_strcat(annot_var_dyn, "\n");
3997   }
3998   else if (d_type == 2) {
3999     if (target == 1) {
4000       ann_ch0 = 'X';
4001       ann_ch1 = 'V';
4002     }
4003     else {
4004       ann_ch0 = 'V';
4005       ann_ch1 = 'X';
4006     }
4007 
4008     sprintf(tmp_str, "|%c%c:%ld%c%c%ld%c",
4009 	    ann_ch0,ann_ch1,
4010 	    i0_pos+1,sp0, sim_char,i1_pos+1,sp1);
4011     /* SAFE_STRNCAT(annot_var_s, tmp_str, n_annot_var_s); */
4012     dyn_strcat(annot_var_dyn, tmp_str);
4013   }
4014 }
4015 
4016 void
display_push_features(void * annot_stack,struct dyn_string_str * annot_var_dyn,long i0_pos,char sp0,long i1_pos,char sp1,char sym,struct annot_entry ** region0_p,struct annot_entry ** region1_p,int tot_score,double comp,int n0,int n1,void * pstat_void,int d_type)4017 display_push_features(void *annot_stack, struct dyn_string_str *annot_var_dyn,
4018 		      long i0_pos, char sp0, long i1_pos, char sp1, char sym,
4019 		      struct annot_entry **region0_p,
4020 		      struct annot_entry **region1_p,
4021 		      int tot_score, double comp, int n0, int n1,
4022 		      void *pstat_void, int d_type) {
4023   struct annot_entry *this_annot_p;
4024   double lbits, total_bits, zscore, lprob, lpercid;
4025   char *ann_comment, *bp;
4026   struct annot_entry *region_p;
4027   char tmp_lstr[MAX_LSTR], ctarget, tmp_sstr[MAX_SSTR];
4028   int q_min, q_max, l_min, l_max;
4029   char *dt1_fmt, *dt2_fmt;
4030 
4031   zscore = find_z(tot_score, 1.0, n1, comp, pstat_void);
4032   total_bits = zs_to_bit(zscore, n0, n1);
4033 
4034   while ((this_annot_p = (struct annot_entry *)pop_stack(annot_stack))!=NULL) {
4035 
4036     if (this_annot_p->label == ']') {
4037       if (this_annot_p->target == 1) {
4038 	region_p = *region1_p;
4039 	if (!region_p) {
4040 	  fprintf(stderr,"*** error [%s:%d] *** -- target==1 but region1_p is null\n",__FILE__, __LINE__);
4041 #ifdef DEBUG
4042 	  fprintf(stderr,"*** qtitle: %s\n",ext_qtitle);
4043 #endif
4044 	  continue;
4045 	}
4046 	q_min = region_p->a_pos+1;
4047 	l_min = region_p->pos+1;
4048 	dt2_fmt = "|XR:%d-%d:%d-%d:s=%d;b=%.1f;I=%.3f;Q=%.1f";
4049       }
4050       else {
4051 	region_p = *region0_p;
4052 	if (!region_p) {
4053 	  fprintf(stderr,"*** error [%s:%d] *** -- target==0 but region0_p is null\n",__FILE__, __LINE__);
4054 #ifdef DEBUG
4055 	  fprintf(stderr,"*** qtitle: %s\n",ext_qtitle);
4056 #endif
4057 	  continue;
4058 	}
4059 	q_min = region_p->pos+1;
4060 	l_min = region_p->a_pos+1;
4061 	dt2_fmt = "|RX:%d-%d:%d-%d:s=%d;b=%.1f;I=%.3f;Q=%.1f";
4062       }
4063 
4064       if (region_p->score < 0) {
4065 	lbits = 0.0;
4066 	lprob = 1.0;
4067       }
4068       else {
4069 	lbits = total_bits * (double)region_p->score/tot_score;
4070 	zscore = find_z(region_p->score, 1.0, n1, comp, pstat_void);
4071 	lprob = zs_to_p(zscore);
4072       }
4073 
4074       if (lprob > 0.99) lprob = 0.0;
4075       else if (lprob < 1e-300) lprob = 3000.0;
4076       else lprob = -10.0*log(lprob)/log(10.0);
4077 
4078       if (region_p->n_aln > 0) {
4079 	lpercid = ((double)region_p->n_ident)/(double)region_p->n_aln;
4080       }
4081       else lpercid = -1.0;
4082 
4083       if (d_type == 1) {
4084 	if (this_annot_p->target == 0) {dt1_fmt = " qRegion: %d-%d:%d-%d : score=%d; bits=%.1f; Id=%.3f; Q=%.1f :  %s\n";}
4085 	else {dt1_fmt = " Region: %d-%d:%d-%d : score=%d; bits=%.1f; Id=%.3f; Q=%.1f :  %s\n";}
4086 	sprintf(tmp_lstr, dt1_fmt, q_min, i0_pos+1,
4087 		l_min, i1_pos+1, region_p->score, lbits, lpercid, lprob,
4088 		(region_p->comment) ? region_p->comment : '\0');
4089 
4090       }
4091       else if (d_type == 2) {
4092 	sprintf(tmp_lstr,dt2_fmt,
4093 		q_min, i0_pos+1,
4094 		l_min, i1_pos+1, region_p->score, lbits,lpercid, lprob);
4095 
4096 	if (region_p->comment) {
4097 	  SAFE_STRNCPY(tmp_sstr,region_p->comment,sizeof(tmp_sstr));
4098 	  if ((bp=strchr(tmp_sstr,' '))!=NULL) { *bp = '\0';}
4099 	  SAFE_STRNCAT(tmp_lstr,";C=",sizeof(tmp_lstr));
4100 	  SAFE_STRNCAT(tmp_lstr,tmp_sstr,sizeof(tmp_lstr));
4101 	}
4102       }
4103       /* SAFE_STRNCAT(annot_var_s,tmp_lstr,n_annot_var_s); */
4104       dyn_strcat(annot_var_dyn, tmp_lstr);
4105       region_p->score = 0;
4106       region_p = NULL;
4107     }
4108     else if ((ann_comment = this_annot_p->comment)) {
4109       if (d_type == 1 ) {
4110 	if (this_annot_p->target == 0) {dt1_fmt = " qSite:%c : %d%c%c%d%c : %s\n";}
4111 	else {dt1_fmt = " Site:%c : %d%c%c%d%c : %s\n";}
4112 	sprintf(tmp_lstr,dt1_fmt, this_annot_p->label,i0_pos+1, sp0,
4113 		sym, i1_pos+1, sp1, ann_comment);
4114 	/* SAFE_STRNCAT(annot_var_s,tmp_lstr,n_annot_var_s); */
4115 	dyn_strcat(annot_var_dyn, tmp_lstr);
4116       }
4117     }
4118   }
4119 }
4120