1 /*	doinit.c	general and function-specific initializations */
2 
3 /* $Id: doinit.c 1267 2014-07-29 13:50:40Z wrp $ */
4 
5 /* copyright (c) 1996, 1997, 1998, 2014 by William R. Pearson and the
6    Rector & Vistors of the University of Virginia */
7 
8 /* Licensed under the Apache License, Version 2.0 (the "License");
9    you may not use this file except in compliance with the License.
10    You may obtain a copy of the License at
11 
12    http://www.apache.org/licenses/LICENSE-2.0
13 
14    Unless required by applicable law or agreed to in writing,
15    software distributed under this License is distributed on an "AS
16    IS" BASIS, WITHOUT WRRANTIES OR CONDITIONS OF ANY KIND, either
17    express or implied.  See the License for the specific language
18    governing permissions and limitations under the License.
19 */
20 
21 /* this file performs general initializations of search parameters
22 
23    In addition, it calls several functions in init??.c that provide
24    program-specific initializations:
25 
26    f_initenv()	- called from initenv()
27    f_getopt()	- called from initenv() during a getopt() scan
28    f_getarg()	- called from initenv() after the getopt() scan
29 
30 */
31 
32 #include <stdio.h>
33 #include <stdlib.h>
34 #include <string.h>
35 
36 #if defined(UNIX) || defined(_MACH)
37 #include <unistd.h>
38 #endif
39 #ifndef PCOMPLIB
40 #ifdef IRIX
41 #include <sys/sysmp.h>
42 #endif
43 #else
44 #include "msg.h"	/* need for FIRSTNODE */
45 #ifdef MPI_SRC
46 #include "mpi.h"
47 #endif
48 #endif
49 
50 #include "defs.h"
51 #include "param.h"
52 #include "upam.h"	/* required for 'U' option change of nascii */
53 
54 #include "structs.h"
55 
56 #define XTERNAL
57 #include "uascii.h"
58 #undef XTERNAL
59 
60 #ifdef UNIX
61 #include <getopt.h>
62 #else
63 extern int optind;		/* used by getopt() */
64 extern char *optarg;
65 #endif
66 
67 char prog_name[MAX_FN];
68 
69 extern void f_initenv(struct mngmsg *, struct pstruct *, unsigned char **);
70 extern void f_lastenv(struct mngmsg *, struct pstruct *);
71 extern void f_getopt(char, char *, struct mngmsg *, struct pstruct *);
72 extern void f_getarg(int, char **, int, struct mngmsg *, struct pstruct *);
73 extern void show_help(char *, int pgm_id);
74 extern void show_all_help(char *pgm_name, int pgm_id);
75 void g_init_opts(struct mngmsg *, struct pstruct *);
76 void subs_env(char *dest, char *src, int dest_size);
77 
78 void add_ascii_ann(int *qascii, unsigned char *ann_arr);
79 static int set_markx(int markx, int val, char c);
80 static void pre_parse_markx(char *opt_arg, struct mngmsg *m_msp);
81 static void parse_markx(char *opt_arg, struct markx_str *this_markx);
82 static void get_annot_def_file(struct mngmsg *m_msp, char *fa_annot_env);
83 void markx_to_m_msp(struct mngmsg *m_msp, struct markx_str *this_markx);
84 void m_msp_to_markx(struct markx_str *this_markx, struct mngmsg *m_msp);
85 
86 int optcnt;
87 int fa_max_workers=MAX_WORKERS;
88 #ifdef PCOMPLIB
89 int worker_1=0;
90 int worker_n=0;
91 #endif
92 
93 extern struct opt_def_str f_options[];
94 
95 void set_opt_disp_defs(char opt_char, struct opt_def_str *options, int type,
96 		       int i_param1, int i_param2,
97 		       double d_param1, double d_param2, char *s_param);
98 
99 /* ****************************************************************
100    The option/-help system has been substantially restructured to
101    allow more consistent -h/-help messages.
102 
103    There are now two global arrays, opt_def_str g_options (global
104    options, parsed in doinit.c), and opt_def_str f_options
105    (function-specific options, parsed in initfa.c)
106 
107    struct opt_def_str {
108      char opt_char;	# getopt single character option letter
109      int has_arg;	# does it have an option?
110      char *opt_str;	# getopt_long (future) long option name
111      char *opt_descr_s;	# short description of option
112      char *opt_descr_l; # long description of option (if NULL, use opt_descr_s)
113      int opt_rank;	# rank of option (not used)
114      int fmt_type;	# fmt type (for defaults): 1,2 ints, 3,4 doubles
115      int i_param1;	# int default1
116      int i_param2;
117      double d_param1;	# double default1
118      double d_param2;
119    };
120 
121    the g_opt_string and f_opt_string's parsed by getopt() are built
122    from these structures, guaranteeing that the options and help
123    messages are kept in sync.
124 
125    long options descriptions (opt_descr_l) are saved in static arrays
126    (e.g. m_opt_descr[] in doinit.c, z_opt_descr[], s_opt_descr[] in
127    initfa.c
128 
129    The default option values, which are displayed from i_param[1,2],
130    d_param[1,2], are set by g_init_opts() and f_init_opts() using
131    set_opt_disp_defs().  g_init_opts()/f_init_opts() should be called
132    as late as possible in the program.
133 
134    **************************************************************** */
135 
136 static char m_opt_descr[] ="Output/alignment format;\n      0 - standard \":. \" alignment; 1 - \" xX\"; 2 - \".MS..\"; 3 - separate >fasta entries;\n      4 - \"---\" alignment map; 5 - 0+4; 6 - <html>;\n      8 - BLAST tabular; 8C commented BLAST tabular;\n      B - BLAST Query/Sbjct alignments; BB - complete BLAST output;\n      9 - FASTA tabular; 9c - FASTA tabular encoded; 9C FASTA tabular CIGAR encoded;\n     10 - parseable key:value; 11 - lav for LALIGN;\n      A - aligned residue score\n      F - 'F0,6,9c out_file' - alternate output formats to files;";
137 
138 struct opt_def_str g_options[] = {
139   {'C', 1, "aname_length", "length of the query/sbjct name in alignments", NULL, 0, 0, 0, 0, 0.0, 0.0, NULL},
140   {'D', 0, "debug", "enable debugging output", NULL, 0, 0, 0, 0, 0.0, 0.0, NULL},
141   {'e', 1, "expand", "expand_script to extend hits", NULL, 0, 0, 0, 0, 0.0, 0.0, NULL},
142   {'F', 1, "evalue_min", "min E()-value displayed", NULL, 0, 0, 0, 0, 0.0, 0.0, NULL},
143 #if defined(PCOMPLIB) || !defined(SHOW_HIST)
144   {'H', 0, "histogram", "show histogram", NULL, 0, 0, 0, 0, 0.0, 0.0, NULL},
145 #else
146   {'H', 0, "nohist", "no histogram", NULL, 0, 0, 0, 0, 0.0, 0.0, NULL},
147 #endif
148   {'i', 0, "revcomp", "search with reverse-complement", NULL, 0, 0, 0, 0, 0.0, 0.0, NULL},
149 #ifdef SHOW_HELP
150   {'I', 0, "interact", "interactive mode", NULL, 0, 0, 0, 0, 0.0, 0.0, NULL},
151 #endif
152   {'l', 1, "fastlibs", "FASTLIBS abbreviation file", NULL, 0, 0, 0, 0, 0.0, 0.0, NULL},
153   {'L', 0, "long_info", "long library descriptions", NULL, 0, 0, 0, 0, 0.0, 0.0, NULL},
154   {'m', 1, "outfmt", "output format", &m_opt_descr[0], 0, 0, 0, 0, 0.0, 0.0, NULL},
155   {'N', 1, "lib_length", "max library length before overlapping", NULL, 0, 0, 0, 0, 0.0, 0.0, NULL},
156   {'o', 1, "offsets", "offset coordinates of query/subject", NULL, 0, 0, 0, 0, 0.0, 0.0, NULL},
157   {'O', 1, "out", "write results to file", NULL, 0, 0, 0, 0, 0.0, 0.0, NULL},
158 #ifndef SHOW_HELP
159   {'q', 0, "quiet", "quiet -- do not prompt", NULL, 0, 0, 0, 0, 0.0, 0.0, NULL},
160   {'Q', 0, "\0", "quiet -- do not prompt", NULL, 0, 0, 0, 0, 0.0, 0.0, NULL},
161 #else
162   {'q', 0, "quiet", "quiet [default] -- do not prompt", NULL, 0, 0, 0, 0, 0.0, 0.0, NULL},
163   {'Q', 0, "\0", "quiet [default] -- do not prompt", NULL, 0, 0, 0, 0, 0.0, 0.0, NULL},
164 #endif
165   {'R', 1, "results_file", "raw score file", NULL, 0, 0, 0, 0, 0.0, 0.0, NULL},
166   {'T', 1, "threads", "max threads/workers", NULL, 0, 0, 0, 0, 0.0, 0.0, NULL},
167   {'v', 1, "shuffle_window", "shuffle window size", NULL, 0, 0, 0, 0, 0.0, 0.0, NULL},
168   {'V', 1, "annotation", "annotation characters in query/library for aligments", NULL, 0, 0, 0, 0, 0.0, 0.0, NULL},
169   {'w', 1, "aln_width", "width of alignment display", NULL, 0, 0, 0, 0, 0.0, 0.0, NULL},
170   {'Z', 1, "db_size", "database size for E()-value", "[library entries] database size for E()-value", 0, 0, 0, 0, 0.0, 0.0, NULL},
171   {'\0', 0, "", "", NULL, 0, 0, 0, 0, 0.0, 0.0, NULL}
172 };
173 
174 /* set default option values for help */
g_init_opts(struct mngmsg * m_msp,struct pstruct * ppst)175 void g_init_opts(struct mngmsg *m_msp, struct pstruct *ppst) {
176   set_opt_disp_defs('C', g_options, 1, m_msp->nmlen, 0, 0.0, 0.0, NULL);
177   set_opt_disp_defs('F', g_options, 3, 0, 0, m_msp->e_low, 0.0, NULL);
178   set_opt_disp_defs('m', g_options, 1, m_msp->markx, 0, 0.0, 0.0, NULL);
179   set_opt_disp_defs('o', g_options, 2, (int)m_msp->sq0off, (int)m_msp->sq1off, 0.0, 0.0,NULL);
180   set_opt_disp_defs('T', g_options, 1, fa_max_workers, 0, 0.0, 0.0, NULL);
181   set_opt_disp_defs('v', g_options, 1, ppst->zs_win, 0, 0.0, 0.0, NULL);
182   set_opt_disp_defs('w', g_options, 1, m_msp->aln.llen, 0, 0.0, 0.0, NULL);
183 }
184 
185 void
186 build_optstr(char *opt_str, int opt_len, struct opt_def_str *opt_defs);
187 
188 static int long_info_set=0;
189 static int llen_set = 0;
190 static int markx_set = 0;
191 
192 /* initenv ()  initializes the environment */
initenv(int argc,char ** argv,struct mngmsg * m_msp,struct pstruct * ppst,unsigned char ** aa0)193 void initenv (int argc, char **argv, struct mngmsg *m_msp,
194 		 struct pstruct *ppst, unsigned char **aa0)
195 {
196   char *cptr, *bp, *bp1;
197   int  copt;
198 
199    /* options for all search functions */
200    /* char   *g_optstr = "b:BC:d:DE:F:HiK:l:Lm:N:O:QqR:T:v:V:w:W:X:Z:"; */
201 
202    char g_optstring[MAX_STR];
203    char f_optstring[MAX_STR];
204    char optstring[MAX_STR];
205 
206    /* help functions exit(); try first */
207    if (argc == 1) {
208      show_help(m_msp->pgm_name, ppst->pgm_id);
209    }
210    if (strcmp(argv[1],"-help")==0 || strcmp(argv[1],"--help")==0) {
211      show_all_help(m_msp->pgm_name, ppst->pgm_id);
212    }
213 
214    build_optstr(g_optstring, sizeof(f_optstring), g_options);
215    build_optstr(f_optstring, sizeof(f_optstring), f_options);
216 
217 /*  these initializations will be used by all functions */
218 
219    /* prog_name[] is only used for error messages */
220    strncpy(prog_name,argv[0],sizeof(prog_name));
221    prog_name[sizeof(prog_name)-1]='\0';
222 
223 #ifdef PCOMPLIB
224 #ifdef MPI_SRC
225   MPI_Comm_size(MPI_COMM_WORLD,&fa_max_workers);
226   if (fa_max_workers <= 1) {
227     fprintf(stderr," nnodes = %d; no workers available\n",fa_max_workers);
228     exit(1);
229   }
230   else {
231     fa_max_workers -= FIRSTNODE;
232     fprintf(stderr," have %d workers\n",fa_max_workers);
233   }
234 #endif
235 #else
236 #if defined(IRIX)
237    fa_max_workers = sysmp(MP_NPROCS);
238 #else
239 #if defined(UNIX) || defined(HAVE_SYSCONF)
240    fa_max_workers = sysconf(_SC_NPROCESSORS_CONF);
241 #endif	/* UNIX || SYSCONF */
242 #endif  /* !IRIX */
243 #endif  /* !PCOMPLIB */
244 
245    m_msp->ltitle[0] = '\0';
246 
247    if ((cptr=getenv("FASTLIBS"))!=NULL) {
248      strncpy(m_msp->flstr,cptr,MAX_FN);
249      m_msp->flstr[MAX_FN-1] = '\0';
250    }
251    else m_msp->flstr[0]='\0';
252 
253    m_msp->std_output = 1;
254    m_msp->hist.hist_a = NULL;
255    m_msp->outfile[0] = '\0';
256    m_msp->outfd = NULL;
257    m_msp->ldb_info.ldnaseq = SEQT_PROT;	/* library is protein */
258    m_msp->n1_low = ppst->n1_low = 0;
259    m_msp->n1_high = ppst->n1_high = BIGNUM;
260    m_msp->ql_start = 1;	/* start with first query sequence */
261    m_msp->ql_stop = BIGNUM;	/* end with the last query sequence */
262    m_msp->aa1save_buf_b = NULL;
263    m_msp->bline_buf_b = NULL;
264 
265    m_msp->pamd1 = MAXSQ;
266    m_msp->pamd2 = MAXSQ;
267 
268    m_msp->ldb_info.term_code = 0;
269 
270    ppst->tr_type = 0;
271    ppst->debug_lib = 0;
272    m_msp->nshow = 20;
273    ppst->max_repeat = 50;
274    m_msp->nohist = 1;
275 #if defined(PCOMPLIB)
276    m_msp->mshow = 20;
277 #else
278 #ifdef SHOW_HIST
279    m_msp->nohist = 0;
280 #endif
281    m_msp->mshow = 50;
282 #endif
283    m_msp->do_showbest = 1;
284    m_msp->ashow = -1;
285    m_msp->ashow_set = 0;
286    m_msp->nmlen = DEF_NMLEN;
287    m_msp->z_bits = 1;
288    m_msp->tot_ident = 0;
289    m_msp->mshow_set = 0;
290    m_msp->mshow_min = 0;
291    m_msp->aln.llen = 60;
292    m_msp->aln.llcntx = 30;
293    m_msp->aln.llcntx_set = 0;
294    m_msp->e_low = 0.0;
295    m_msp->e_cut_set = 0;
296    m_msp->revcomp = 0;
297    m_msp->long_info = 0;
298    m_msp->ldb_info.maxn = 0;
299    m_msp->ldb_info.dupn = SEQDUP;
300    m_msp->dfile[0] = '\0';
301    m_msp->tname[0] = '\0';
302    m_msp->lname[0] = '\0';
303    m_msp->link_lname[0] = '\0';
304    m_msp->show_code = 0;
305    m_msp->tot_show_code = 0;
306    m_msp->aln.showall = 0;
307    m_msp->markx = 0;
308    m_msp->tot_markx = 0;
309    m_msp->markx_list = NULL;
310    m_msp->align_done = 0;
311    m_msp->sq0off = m_msp->sq1off = 1;
312    strncpy(m_msp->sqnam,"aa",4);
313    strncpy(m_msp->sqtype,"protein",10);
314 
315    /* annotation info */
316    m_msp->ann_flg = 0;
317    memset(m_msp->ann_arr,'\0',MAX_FN);
318    m_msp->ann_arr_def[0] = NULL;
319    m_msp->ann_arr_def[1] = NULL;
320    m_msp->annot0_sname[0]='\0';
321    m_msp->annot1_sname[0]='\0';
322    m_msp->annot_p = NULL;
323    m_msp->aa0a = NULL;
324 
325    ppst->LK_set = 0;
326    ppst->e_cut = m_msp->e_cut = 10.0;
327    ppst->e_cut_r = ppst->e_cut / 10.0;
328    ppst->do_rep = 1;
329    ppst->zs_win = 0;
330    ppst->show_ident = 0;
331 
332    ppst->zdb_size = -1;
333    ppst->zdb_size_set = 0;
334    ppst->dnaseq = SEQT_PROT;	/* default is protein */
335    ppst->nt_align = 0;
336 
337    ppst->other_info = NULL;
338 
339    g_init_opts(m_msp, ppst);
340 
341    f_initenv (m_msp, ppst, aa0);
342 
343    SAFE_STRNCPY (optstring, g_optstring, sizeof (optstring));
344    SAFE_STRNCAT (optstring, f_optstring, sizeof (optstring));
345 
346    while ((copt = getopt (argc, argv, optstring)) != EOF)
347    {
348       if (strchr (g_optstring, copt) != NULL)
349       {
350 	switch (copt) {  /* switches for all options */
351 	case 'C':
352 	  sscanf(optarg,"%d",&m_msp->nmlen);
353 	  if (m_msp->nmlen > MAX_UID-1) m_msp->nmlen = MAX_UID-1;
354 	  break;
355 	case 'D': ppst->debug_lib = 1;
356 	  break;
357 	case 'e':
358 	  strncpy(m_msp->link_lname, optarg, MAX_LSTR);
359 	  break;
360 	case 'F':
361 	  sscanf(optarg,"%lg",&m_msp->e_low);
362 	  m_msp->e_cut_set = 1;
363 	  break;
364 #if defined(PCOMPLIB) || !defined(SHOW_HIST)
365 	case 'H':
366 	  m_msp->nohist = 0; break;
367 #else
368 	case 'H':
369 	  m_msp->nohist = 1; break;
370 #endif
371 	case 'i':
372 	  m_msp->revcomp = 1; break;
373 	case 'I':
374 	  m_msp->quiet = 0; break;
375 	case 'l':
376 	  strncpy(m_msp->flstr,optarg,MAX_FN);
377 	  m_msp->flstr[MAX_FN-1]='\0';
378 	  break;
379 	case 'L':
380 	  m_msp->long_info = 1;
381 	  long_info_set = 1;
382 	  break;
383 	case 'm':
384 	  pre_parse_markx(optarg, m_msp);
385 	  markx_set = 1;
386 	  break;
387 	case 'N':
388 	  sscanf(optarg,"%d",&m_msp->ldb_info.maxn);
389 	  break;
390 	case 'o':
391 	  sscanf (optarg,"%ld %ld",&m_msp->sq0off,&m_msp->sq1off); break;
392 	case 'O':
393 	  strncpy(m_msp->outfile,optarg,MAX_FN);
394 	  m_msp->outfile[MAX_FN-1]='\0';
395 	  break;
396 	case 'q':
397 	case 'Q':
398 	  m_msp->quiet = 1;
399 	  break;
400 	case 'R':
401 	  strncpy (m_msp->dfile, optarg, MAX_FN);
402 	  m_msp->dfile[MAX_FN-1]='\0';
403 	  break;
404 	case 'T':
405 #ifdef PCOMPLIB
406 	  if (strchr(optarg,'-') != NULL) {
407 	    sscanf(optarg,"%d-%d",&worker_1,&worker_n);
408 	    if (worker_1 > worker_n) {
409 	      worker_1 = worker_n = 0;
410 	    }
411 	  }
412 	  else
413 #endif
414 	    sscanf (optarg, "%d", &fa_max_workers);
415 	  if (fa_max_workers < 0) fa_max_workers=1;
416 	  break;
417 	case 'v':
418 	  sscanf (optarg,"%d",&ppst->zs_win);
419 	  break;
420 	case 'V':
421 	  if (optarg[0] == '=') {
422 	    get_annot_def_file(m_msp, optarg+1);
423 	  }
424 	  else if ((cptr = getenv("FA_ANNOT_DEF"))) {
425 	    get_annot_def_file(m_msp, cptr);
426 	  }
427 	  else if (optarg[0] == 'q' && (optarg[1]=='!' || optarg[1]=='<')) {
428 	    strncpy(m_msp->annot0_sname,optarg+1,MAX_LSTR);
429 	    m_msp->ann_flg = 2;
430 	  }
431 	  else if (optarg[0]=='!' || optarg[0]=='<') {
432 	    strncpy(m_msp->annot1_sname,optarg,MAX_LSTR);
433 	    m_msp->ann_flg = 2;
434 	  }
435 	  else {
436 	      strncpy((char *)m_msp->ann_arr+1,optarg,MAX_FN-2);
437 	      m_msp->ann_arr[0]='\0';
438 	      m_msp->ann_arr[MAX_FN-2]='\0';
439 	      m_msp->ann_arr_n = strlen((char *)m_msp->ann_arr+1);
440 	      if (m_msp->ann_flg ==0) m_msp->ann_flg = 1;
441 	  }
442 
443 	  if (strlen((char *)m_msp->ann_arr) > 0) {
444 	    add_ascii_ann(qascii, m_msp->ann_arr);
445 	  }
446 
447 	  break;
448 /*
449 	case 'V':
450 	  fprintf(stderr," -V option not currently supported in parallel\n");
451 	  break;
452 */
453 	case 'w':
454 	  sscanf (optarg,"%d",&m_msp->aln.llen);
455 	  if (m_msp->aln.llen < 10) m_msp->aln.llen = 10;
456 	  if (m_msp->aln.llen > 200) m_msp->aln.llen = 200;
457 	  if (!m_msp->aln.llcntx_set) m_msp->aln.llcntx = m_msp->aln.llen/2;
458 	  llen_set = 1;
459 	  break;
460 	case 'Z':
461 	  sscanf(optarg,"%ld",&ppst->zdb_size);
462 	  ppst->zdb_size_set = 1;
463 	  break;
464 	}
465       }
466       else if (strchr (f_optstring, copt))
467 	 f_getopt (copt, optarg, m_msp, ppst);
468    }
469    optind--;
470 
471    if (!markx_set || !(m_msp->markx & (MX_ATYPE+MX_ANNOT_COORD+MX_ANNOT_MID))) {
472      m_msp->markx = set_markx(m_msp->markx, 0, '\0');
473    }
474 
475    /* done with options, check for initializations in initfa.c
476       (set sascii alphabet) */
477    f_lastenv (m_msp, ppst);
478 
479    if (argc - optind < 3) return;
480    m_msp->tnamesize = sizeof (m_msp->tname);
481    if (argc - optind > 1) {strncpy (m_msp->tname, argv[optind + 1],MAX_FN);}
482    if (argc - optind > 2) {strncpy(m_msp->lname, argv[optind + 2],MAX_LSTR);}
483    f_getarg (argc, argv, optind, m_msp, ppst);
484 }
485 
486 /* ann_scan scans an aa0 query sequence if -V ann_chars, and returns
487    an edited query sequence and allocates aa0a[n_n0+2] space for the
488    annotation */
489 
490 int
ann_scan(unsigned char * aa0,int n0,unsigned char ** aa0a_p,int seqtype)491 ann_scan(unsigned char *aa0, int n0, unsigned char **aa0a_p, int seqtype)
492 {
493   unsigned char *aa0p, *aa0d, *aa0ad;
494   int n_n0;
495 
496   /* count how many "real" residues */
497 
498   if (seqtype==SEQT_UNK) {
499     /* with SEQT_UNK, annotation characters are all < @,
500        while sequence chars are all > @ */
501     for (n_n0=0, aa0p = aa0; aa0p < aa0+n0; aa0p++) {
502       if (*aa0p > '@' || *aa0p == ESS ) n_n0++;		/* ESS captures ',' in sequence */
503     }
504   }
505   else {
506     /* if the sequence type is known, then annotation chars are > NANN */
507     for (n_n0=0, aa0p = aa0; aa0p < aa0+n0; aa0p++) {
508       if (*aa0p < NANN ) n_n0++;
509     }
510   }
511 
512   if (n_n0 == n0) {
513     *aa0a_p = NULL;
514     return n_n0;
515   }
516 
517   aa0d = aa0;
518   /* n_n0 has the real sequence length */
519   if ((*aa0a_p = calloc(n_n0+2, sizeof(char)))==NULL) {
520     fprintf(stderr," cannot allocate annotation sequence: %d\n",n_n0);
521 
522     /* this section is for failure, simply copy the correct sequence
523        and ignore the annotations */
524     if (seqtype==SEQT_UNK) {
525       for (aa0p = aa0; aa0p < aa0+n0; aa0p++) {
526 	if (*aa0p > '@' || *aa0p == ESS) {*aa0d++ = *aa0p;}
527       }
528     }
529     else {
530       for (aa0p = aa0; aa0p < aa0+n0; aa0p++) {
531 	if (*aa0p < NANN) {*aa0d++ = *aa0p;}
532       }
533     }
534     *aa0d = '\0';
535     return n_n0;
536   }
537 
538   /* have aa0a_p annotation array allocated */
539   aa0ad = *aa0a_p;
540   if (seqtype==SEQT_UNK) {
541     for (aa0p = aa0; aa0p<aa0+n0; aa0p++) {
542       if (*aa0p > '@' || *aa0p == ESS) {*aa0d++ = *aa0p; *aa0ad++='\0';}
543       else if (aa0ad > *aa0a_p) { aa0ad[-1] = *aa0p - NANN;}
544     }
545   }
546   else {
547     for (aa0p = aa0; aa0p<aa0+n0; aa0p++) {
548       if (*aa0p < NANN) {*aa0d++ = *aa0p; *aa0ad++='\0';}
549       else if (aa0ad > *aa0a_p) { aa0ad[-1] = *aa0p - NANN;}
550     }
551   }
552   *aa0ad = *aa0d = '\0';
553   return n_n0;
554 }
555 
556 /* renamed from ann_ascii() Feb, 2008 to allow ann_ascii[] */
557 void
add_ascii_ann(int * qascii,unsigned char * ann_arr)558 add_ascii_ann(int *qascii, unsigned char *ann_arr)
559 {
560   unsigned char *ann_p;
561   int ann_ix = NANN+1;
562 
563   if (ann_arr[0] == '\0' && ann_arr[1]=='\0') {
564     ann_arr[0] = ' ';
565     ann_arr[0] = '\0';
566     return;
567   }
568 
569   ann_arr[0] = ' ';
570 
571   if (strchr((char *)ann_arr+1,'*')) {qascii['*'] = NA;}
572 
573   for (ann_p = ann_arr+1; *ann_p; ann_p++) {
574     if (qascii[*ann_p] == NA) { qascii[*ann_p] = ann_ix++;}
575   }
576 }
577 
578 /* parse annotation description line */
add_annot_def(struct mngmsg * m_msp,char * line,int qa_flag)579 void add_annot_def(struct mngmsg *m_msp, char *line, int qa_flag) {
580   char *bp;
581   int i_ann;
582 
583   if ((bp=strchr(line,'\r')) !=NULL || (bp=strchr(line,'\n')) != NULL) {
584     *bp = '\0';
585   }
586 
587   if (m_msp->ann_arr[0]=='\0') {
588     m_msp->ann_arr[0] = ' ';
589     m_msp->ann_arr[1] = '\0';
590   }
591 
592   /* set the character */
593   i_ann = strlen((char *)m_msp->ann_arr);
594   if ((bp = strchr((char *)m_msp->ann_arr,line[0]))!=NULL) {
595     i_ann = (unsigned char *)bp - m_msp->ann_arr;
596   }
597   else {
598     m_msp->ann_arr[i_ann] = line[0];
599     m_msp->ann_arr[i_ann+1] = '\0';	/* required for strchr(ann_arr) to work */
600     if (qa_flag) qascii[line[0]] = NANN + i_ann;
601   }
602 
603   if ((bp=strchr(line,':'))!=NULL) {
604     /* allocate space for definitions */
605     if ((m_msp->ann_arr_def[i_ann]=(char *)calloc(strlen(bp+1)+1,sizeof(char)))!=NULL) {
606       /* read in the definitions and associate with symbol */
607       strncpy(m_msp->ann_arr_def[i_ann], bp+1,strlen(bp+1));
608     }
609   }
610   else {
611     m_msp->ann_arr_def[i_ann] = NULL;
612   }
613 
614 
615 }
616 
617 /* read definitions of annotation symbols from a file */
618 static void
get_annot_def_file(struct mngmsg * m_msp,char * fa_annot_env)619 get_annot_def_file(struct mngmsg *m_msp, char *fa_annot_env) {
620   FILE *def_fp;
621   char *bp, *bpf, line[MAX_STR];
622   char tmp_annot_env[MAX_STR];
623 
624   if ((bpf=strchr(fa_annot_env,' '))!=NULL) *bpf = '\0';
625 
626   subs_env(tmp_annot_env, fa_annot_env, sizeof(tmp_annot_env));
627   /* check that the file exists */
628   if ((def_fp = fopen(tmp_annot_env,"r"))==NULL) {
629     fprintf(stderr,"*** error *** annotation definition file: %s not found\n",
630 	    tmp_annot_env);
631     if (bpf) *bpf=' ';
632     return;
633   }
634 
635   /* read a line */
636   while (fgets(line, sizeof(line), def_fp)!=NULL) {
637     add_annot_def(m_msp, line, 0);
638   }
639   fclose(def_fp);
640   if (bpf) *bpf=' ';
641 
642   if (strlen((char *)m_msp->ann_arr)>1) m_msp->ann_flg = 1;
643 }
644 
645 int
set_markx(int markx,int val,char c)646 set_markx(int markx, int val, char c) {
647 
648   if (val < 3) {
649     if (c=='M') {
650       markx |= MX_ANNOT_MID;
651       markx &= (~MX_ANNOT_COORD);
652     }
653     else if (c=='B') {
654       markx |= MX_ANNOT_COORD;
655       markx |= MX_ANNOT_MID;
656     }
657     else {
658       markx |= MX_ANNOT_COORD;
659     }
660     if (c=='H') {
661       markx |= MX_HTML;
662     }
663     return markx | (MX_ATYPE & val);
664   }
665   else if (val == 3) {
666     markx |= (MX_ATYPE + MX_ASEP);
667   }
668   else if (val == 4) {
669     markx |= (MX_ATYPE + MX_AMAP);
670   }
671   else if (val == 5) {
672     markx |= MX_AMAP;
673   }
674   else if (val == 6 || c=='H') {
675     markx |= (MX_HTML) ;
676     if (c=='M') {
677       markx |= MX_ANNOT_MID;
678       markx &= (~MX_ANNOT_COORD);
679     }
680     else if (c=='B') {
681       markx |= MX_ANNOT_COORD;
682       markx |= MX_ANNOT_MID;
683     }
684     else {
685       markx |= MX_ANNOT_COORD;
686     }
687   }
688   else if (val == 8) {
689     markx |= MX_M9SUMM+MX_M8OUT;
690   }
691   else if (val == 9) {
692     markx |= MX_M9SUMM;
693   }
694   else if (val == 10) {
695     markx |= MX_M10FORM;
696   }
697   else if (val == 11) {
698     markx |= MX_M11OUT;
699   }
700 
701   return markx;
702 }
703 
704 void
pre_parse_markx(char * opt_arg,struct mngmsg * m_msp)705 pre_parse_markx(char *opt_arg, struct mngmsg *m_msp) {
706   char *bp, *last_bp;
707   struct markx_str *tmp_markx, *cur_markx, *last_markx;
708 
709   if (opt_arg[0] != 'F' && m_msp->markx_list != NULL) {
710     tmp_markx = m_msp->markx_list;
711   }
712   else {
713     if ((tmp_markx = (struct markx_str *)calloc(1,sizeof(struct markx_str)))==NULL) {
714       fprintf(stderr,"[error] Cannot allocate markx_list\n");
715       return;
716     }
717 
718     /* initialize markx to m_msg defaults -- we do not use m_msp
719        directly, because it might have been changed by an earlier -m
720        out_fmt */
721 
722     tmp_markx->nohist = 1;
723     if (m_msp->ashow_set) {tmp_markx->ashow = m_msp->ashow;}
724     else {tmp_markx->ashow = -1;}
725 
726     tmp_markx->show_code = 0;
727     if (long_info_set) tmp_markx->long_info = 1;
728     else tmp_markx->long_info = 0;
729     if (llen_set) {
730       tmp_markx->aln_llen = m_msp->aln.llen;
731       tmp_markx->aln_llcntx = m_msp->aln.llcntx;
732       tmp_markx->aln_llcntx_set = m_msp->aln.llcntx_set;
733     }
734     else {
735       tmp_markx->aln_llen = 60;
736       if (m_msp->aln.llcntx_set) {
737 	tmp_markx->aln_llcntx = m_msp->aln.llcntx;
738 	tmp_markx->aln_llcntx_set = m_msp->aln.llcntx_set;
739       }
740       else {
741 	tmp_markx->aln_llcntx = 30;
742 	tmp_markx->aln_llcntx_set = 0;
743       }
744     }
745     tmp_markx->std_output = 1;
746   }
747 
748   /* first check for -m "F file" format */
749   if (optarg[0] == 'F') {
750     if ((bp=strchr(optarg+1,' '))==NULL) {
751       fprintf(stderr,"-m F missing file name: %s\n",optarg);
752       return;
753     }
754     /* allocate space for file name */
755     if ((tmp_markx->out_file = calloc(strlen(bp+1)+1,sizeof(char)))==NULL) {
756       fprintf(stderr,"[error] Cannot allocate markx->out_file\n");
757       return;
758     }
759     strncpy(tmp_markx->out_file, bp+1, strlen(bp+1));
760     *bp = '\0';
761 
762     last_bp = optarg+1;
763   }
764   else {
765     last_bp = optarg;
766   }
767 
768   if (opt_arg[0] != 'F') {
769     m_msp_to_markx(tmp_markx, m_msp);
770   }
771 
772   while ((bp=strchr(last_bp,','))!=NULL) {
773     *bp = '\0';
774     parse_markx(last_bp, tmp_markx);
775     *bp = ',';
776     last_bp = bp+1;
777   }
778 
779   if (*last_bp) parse_markx(last_bp, tmp_markx);
780 
781   if (m_msp->markx_list!=NULL) {
782     if (opt_arg[0] == 'F') {
783       /* if file name, add this to the end of the list */
784       last_markx = m_msp->markx_list;
785       for (cur_markx=m_msp->markx_list->next; cur_markx; cur_markx = cur_markx->next) {
786 	last_markx = cur_markx;
787       }
788       last_markx->next = tmp_markx;
789     }
790     else if (tmp_markx != m_msp->markx_list) {
791       /* if no file name, then make this the first in the list,
792 	 unless it is already there */
793       cur_markx = m_msp->markx_list;
794       m_msp->markx_list = tmp_markx;
795       tmp_markx->next = cur_markx;
796     }
797   }
798   else {
799     m_msp->markx_list = tmp_markx;
800   }
801 
802   m_msp->tot_markx |= tmp_markx->markx;
803   m_msp->tot_show_code |= tmp_markx->show_code;
804 
805   /* if no -m F, save options into m_msp */
806   if (optarg[0] != 'F') {
807     markx_to_m_msp(m_msp, tmp_markx);
808   }
809 
810   return;
811 }
812 
813 void
parse_markx(char * optarg,struct markx_str * this)814 parse_markx(char *optarg, struct markx_str *this) {
815   int itmp;
816   char ctmp, ctmp2;
817 
818   itmp = 0;
819   ctmp = ctmp2 = '\0';
820 
821   if (optarg[0] == 'B') {	/* BLAST alignment output */
822     this->markx = MX_MBLAST;
823     this->aln_llcntx = 0;
824     this->aln_llcntx_set = 1;
825     this->long_info=1;
826     this->ashow = -1;
827     if (optarg[1] == 'B') {	/* complete BLAST output */
828       this->markx += MX_MBLAST2;
829       this->nohist = 1;
830       this->aln_llen = 65;
831       this->std_output = 0;
832       return;
833     }
834     else if (optarg[1] == '8') {
835       sscanf(optarg,"%d%c%c",&itmp,&ctmp,&ctmp2);
836     }
837     else {return;}		/* done with BLAST aligment output */
838   }
839   else if (optarg[0] == 'A') {
840     this->markx += MX_RES_ALIGN_SCORE;
841     this->aln_llcntx = 0;
842     this->aln_llcntx_set = 1;
843     return;
844   }
845   else {
846     sscanf(optarg,"%d%c%c",&itmp,&ctmp,&ctmp2);
847   }
848   if (itmp==9) {
849     if (ctmp=='c') {this->show_code = SHOW_CODE_ALIGN;}
850     else if (ctmp=='d') {this->show_code = SHOW_CODE_ALIGN + SHOW_CODE_EXT;}
851     else if (ctmp=='C') {this->show_code = SHOW_CODE_CIGAR;}
852     else if (ctmp=='D') {this->show_code = SHOW_CODE_CIGAR + SHOW_CODE_EXT;}
853     else if (ctmp=='i') {this->show_code = SHOW_CODE_ID;}
854   }
855   if (itmp > 6 && itmp != 11 && itmp != 10 && itmp != 9 && itmp != 8) itmp = 0;
856   this->markx = set_markx(this->markx,itmp,ctmp);
857   if (itmp == 11 ) { this->std_output = 0;}
858   if (itmp == 8) {
859     this->std_output = 0;
860     this->ashow = 0;
861     if (ctmp=='C') { this->markx += MX_M8COMMENT;}
862     if (ctmp2 == 'c') { this->show_code = SHOW_CODE_ALIGN;}
863     else if (ctmp2 == 'd') {this->show_code = SHOW_CODE_ALIGN + SHOW_CODE_EXT;}
864     else if (ctmp2 == 'C') {this->show_code = SHOW_CODE_CIGAR;}
865     else if (ctmp2 == 'D') {this->show_code = SHOW_CODE_CIGAR + SHOW_CODE_EXT;}
866   }
867 }
868 
869 /* transfer markx values for m_msp to m_msp */
870 void
markx_to_m_msp(struct mngmsg * m_msp,struct markx_str * this)871 markx_to_m_msp(struct mngmsg *m_msp, struct markx_str *this) {
872 
873   m_msp->markx = this->markx;
874   m_msp->nohist = this->nohist;
875   m_msp->ashow = this->ashow;
876   m_msp->show_code = this->show_code;
877   m_msp->long_info = this->long_info;
878   m_msp->aln.llen = this->aln_llen;
879   m_msp->aln.llcntx = this->aln_llcntx;
880   m_msp->aln.llcntx_set = this->aln_llcntx_set;
881   m_msp->std_output = this->std_output;
882 }
883 
884 /* save current m_msp values used with markx */
885 void
m_msp_to_markx(struct markx_str * this,struct mngmsg * m_msp)886 m_msp_to_markx(struct markx_str *this, struct mngmsg *m_msp) {
887 
888   this->markx = m_msp->markx ;
889   this->nohist = m_msp->nohist ;
890   this->ashow = m_msp->ashow ;
891   this->show_code = m_msp->show_code ;
892   this->long_info = m_msp->long_info ;
893   this->aln_llen = m_msp->aln.llen ;
894   this->aln_llcntx = m_msp->aln.llcntx ;
895   this->aln_llcntx_set = m_msp->aln.llcntx_set ;
896   this->std_output = m_msp->std_output ;
897 }
898 
899 /* put options from option table [struct opt_def_str *opt_defs] into
900    char *opt_str for getopt() */
901 
902 void
build_optstr(char * opt_str,int max_len,struct opt_def_str * opt_defs)903 build_optstr(char *opt_str, int max_len, struct opt_def_str *opt_defs) {
904   int i, opt_len = 0;
905   char *opt_pos;
906 
907   opt_pos = opt_str;
908   for (i=0; opt_defs[i].opt_char != '\0'; i++) {
909     if (opt_len + 2 > max_len) {
910       fprintf(stderr," *** error -- options too long %d >= %d\n", opt_len, max_len);
911       break;
912     }
913     *opt_pos++ = opt_defs[i].opt_char;
914     opt_len++;
915     if (opt_defs[i].has_arg) {
916       *opt_pos++ = ':';
917       opt_len++;
918     }
919   }
920   *opt_pos = '\0';
921 }
922 
923 /* set_opt_disp_defs associates parameter addresses with options */
924 void
set_opt_disp_defs(char opt_char,struct opt_def_str * options,int type,int i_param1,int i_param2,double d_param1,double d_param2,char * s_param)925 set_opt_disp_defs(char opt_char, struct opt_def_str *options, int type,
926 		  int i_param1, int i_param2,
927 		  double d_param1, double d_param2,
928 		  char *s_param) {
929   struct opt_def_str *this_opt;
930 
931   this_opt = options;
932   while (this_opt->opt_char != '\0') {
933     if (this_opt->opt_char == opt_char) {
934       this_opt->fmt_type = type;
935       switch (type) {
936       case 1:
937 	this_opt->i_param1 = i_param1;
938 	break;
939       case 2:
940 	this_opt->i_param1 = i_param1;
941 	this_opt->i_param2 = i_param2;
942 	break;
943       case 3:
944 	this_opt->d_param1 = d_param1;
945 	break;
946       case 4:
947 	this_opt->d_param1 = d_param1;
948 	this_opt->d_param2 = d_param2;
949 	break;
950       case 5:
951 	if (s_param != NULL) {
952 	  this_opt->s_param = (char *)calloc(strlen(s_param)+1,sizeof(char));
953 	  strncpy(this_opt->s_param,s_param,strlen(s_param));
954 	}
955 	else this_opt->s_param = NULL;
956 	break;
957       }
958     }
959     this_opt++;
960   }
961 }
962