1 /* doinit.c general and function-specific initializations */
2
3 /* $Id: doinit.c 1267 2014-07-29 13:50:40Z wrp $ */
4
5 /* copyright (c) 1996, 1997, 1998, 2014 by William R. Pearson and the
6 Rector & Vistors of the University of Virginia */
7
8 /* Licensed under the Apache License, Version 2.0 (the "License");
9 you may not use this file except in compliance with the License.
10 You may obtain a copy of the License at
11
12 http://www.apache.org/licenses/LICENSE-2.0
13
14 Unless required by applicable law or agreed to in writing,
15 software distributed under this License is distributed on an "AS
16 IS" BASIS, WITHOUT WRRANTIES OR CONDITIONS OF ANY KIND, either
17 express or implied. See the License for the specific language
18 governing permissions and limitations under the License.
19 */
20
21 /* this file performs general initializations of search parameters
22
23 In addition, it calls several functions in init??.c that provide
24 program-specific initializations:
25
26 f_initenv() - called from initenv()
27 f_getopt() - called from initenv() during a getopt() scan
28 f_getarg() - called from initenv() after the getopt() scan
29
30 */
31
32 #include <stdio.h>
33 #include <stdlib.h>
34 #include <string.h>
35
36 #if defined(UNIX) || defined(_MACH)
37 #include <unistd.h>
38 #endif
39 #ifndef PCOMPLIB
40 #ifdef IRIX
41 #include <sys/sysmp.h>
42 #endif
43 #else
44 #include "msg.h" /* need for FIRSTNODE */
45 #ifdef MPI_SRC
46 #include "mpi.h"
47 #endif
48 #endif
49
50 #include "defs.h"
51 #include "param.h"
52 #include "upam.h" /* required for 'U' option change of nascii */
53
54 #include "structs.h"
55
56 #define XTERNAL
57 #include "uascii.h"
58 #undef XTERNAL
59
60 #ifdef UNIX
61 #include <getopt.h>
62 #else
63 extern int optind; /* used by getopt() */
64 extern char *optarg;
65 #endif
66
67 char prog_name[MAX_FN];
68
69 extern void f_initenv(struct mngmsg *, struct pstruct *, unsigned char **);
70 extern void f_lastenv(struct mngmsg *, struct pstruct *);
71 extern void f_getopt(char, char *, struct mngmsg *, struct pstruct *);
72 extern void f_getarg(int, char **, int, struct mngmsg *, struct pstruct *);
73 extern void show_help(char *, int pgm_id);
74 extern void show_all_help(char *pgm_name, int pgm_id);
75 void g_init_opts(struct mngmsg *, struct pstruct *);
76 void subs_env(char *dest, char *src, int dest_size);
77
78 void add_ascii_ann(int *qascii, unsigned char *ann_arr);
79 static int set_markx(int markx, int val, char c);
80 static void pre_parse_markx(char *opt_arg, struct mngmsg *m_msp);
81 static void parse_markx(char *opt_arg, struct markx_str *this_markx);
82 static void get_annot_def_file(struct mngmsg *m_msp, char *fa_annot_env);
83 void markx_to_m_msp(struct mngmsg *m_msp, struct markx_str *this_markx);
84 void m_msp_to_markx(struct markx_str *this_markx, struct mngmsg *m_msp);
85
86 int optcnt;
87 int fa_max_workers=MAX_WORKERS;
88 #ifdef PCOMPLIB
89 int worker_1=0;
90 int worker_n=0;
91 #endif
92
93 extern struct opt_def_str f_options[];
94
95 void set_opt_disp_defs(char opt_char, struct opt_def_str *options, int type,
96 int i_param1, int i_param2,
97 double d_param1, double d_param2, char *s_param);
98
99 /* ****************************************************************
100 The option/-help system has been substantially restructured to
101 allow more consistent -h/-help messages.
102
103 There are now two global arrays, opt_def_str g_options (global
104 options, parsed in doinit.c), and opt_def_str f_options
105 (function-specific options, parsed in initfa.c)
106
107 struct opt_def_str {
108 char opt_char; # getopt single character option letter
109 int has_arg; # does it have an option?
110 char *opt_str; # getopt_long (future) long option name
111 char *opt_descr_s; # short description of option
112 char *opt_descr_l; # long description of option (if NULL, use opt_descr_s)
113 int opt_rank; # rank of option (not used)
114 int fmt_type; # fmt type (for defaults): 1,2 ints, 3,4 doubles
115 int i_param1; # int default1
116 int i_param2;
117 double d_param1; # double default1
118 double d_param2;
119 };
120
121 the g_opt_string and f_opt_string's parsed by getopt() are built
122 from these structures, guaranteeing that the options and help
123 messages are kept in sync.
124
125 long options descriptions (opt_descr_l) are saved in static arrays
126 (e.g. m_opt_descr[] in doinit.c, z_opt_descr[], s_opt_descr[] in
127 initfa.c
128
129 The default option values, which are displayed from i_param[1,2],
130 d_param[1,2], are set by g_init_opts() and f_init_opts() using
131 set_opt_disp_defs(). g_init_opts()/f_init_opts() should be called
132 as late as possible in the program.
133
134 **************************************************************** */
135
136 static char m_opt_descr[] ="Output/alignment format;\n 0 - standard \":. \" alignment; 1 - \" xX\"; 2 - \".MS..\"; 3 - separate >fasta entries;\n 4 - \"---\" alignment map; 5 - 0+4; 6 - <html>;\n 8 - BLAST tabular; 8C commented BLAST tabular;\n B - BLAST Query/Sbjct alignments; BB - complete BLAST output;\n 9 - FASTA tabular; 9c - FASTA tabular encoded; 9C FASTA tabular CIGAR encoded;\n 10 - parseable key:value; 11 - lav for LALIGN;\n A - aligned residue score\n F - 'F0,6,9c out_file' - alternate output formats to files;";
137
138 struct opt_def_str g_options[] = {
139 {'C', 1, "aname_length", "length of the query/sbjct name in alignments", NULL, 0, 0, 0, 0, 0.0, 0.0, NULL},
140 {'D', 0, "debug", "enable debugging output", NULL, 0, 0, 0, 0, 0.0, 0.0, NULL},
141 {'e', 1, "expand", "expand_script to extend hits", NULL, 0, 0, 0, 0, 0.0, 0.0, NULL},
142 {'F', 1, "evalue_min", "min E()-value displayed", NULL, 0, 0, 0, 0, 0.0, 0.0, NULL},
143 #if defined(PCOMPLIB) || !defined(SHOW_HIST)
144 {'H', 0, "histogram", "show histogram", NULL, 0, 0, 0, 0, 0.0, 0.0, NULL},
145 #else
146 {'H', 0, "nohist", "no histogram", NULL, 0, 0, 0, 0, 0.0, 0.0, NULL},
147 #endif
148 {'i', 0, "revcomp", "search with reverse-complement", NULL, 0, 0, 0, 0, 0.0, 0.0, NULL},
149 #ifdef SHOW_HELP
150 {'I', 0, "interact", "interactive mode", NULL, 0, 0, 0, 0, 0.0, 0.0, NULL},
151 #endif
152 {'l', 1, "fastlibs", "FASTLIBS abbreviation file", NULL, 0, 0, 0, 0, 0.0, 0.0, NULL},
153 {'L', 0, "long_info", "long library descriptions", NULL, 0, 0, 0, 0, 0.0, 0.0, NULL},
154 {'m', 1, "outfmt", "output format", &m_opt_descr[0], 0, 0, 0, 0, 0.0, 0.0, NULL},
155 {'N', 1, "lib_length", "max library length before overlapping", NULL, 0, 0, 0, 0, 0.0, 0.0, NULL},
156 {'o', 1, "offsets", "offset coordinates of query/subject", NULL, 0, 0, 0, 0, 0.0, 0.0, NULL},
157 {'O', 1, "out", "write results to file", NULL, 0, 0, 0, 0, 0.0, 0.0, NULL},
158 #ifndef SHOW_HELP
159 {'q', 0, "quiet", "quiet -- do not prompt", NULL, 0, 0, 0, 0, 0.0, 0.0, NULL},
160 {'Q', 0, "\0", "quiet -- do not prompt", NULL, 0, 0, 0, 0, 0.0, 0.0, NULL},
161 #else
162 {'q', 0, "quiet", "quiet [default] -- do not prompt", NULL, 0, 0, 0, 0, 0.0, 0.0, NULL},
163 {'Q', 0, "\0", "quiet [default] -- do not prompt", NULL, 0, 0, 0, 0, 0.0, 0.0, NULL},
164 #endif
165 {'R', 1, "results_file", "raw score file", NULL, 0, 0, 0, 0, 0.0, 0.0, NULL},
166 {'T', 1, "threads", "max threads/workers", NULL, 0, 0, 0, 0, 0.0, 0.0, NULL},
167 {'v', 1, "shuffle_window", "shuffle window size", NULL, 0, 0, 0, 0, 0.0, 0.0, NULL},
168 {'V', 1, "annotation", "annotation characters in query/library for aligments", NULL, 0, 0, 0, 0, 0.0, 0.0, NULL},
169 {'w', 1, "aln_width", "width of alignment display", NULL, 0, 0, 0, 0, 0.0, 0.0, NULL},
170 {'Z', 1, "db_size", "database size for E()-value", "[library entries] database size for E()-value", 0, 0, 0, 0, 0.0, 0.0, NULL},
171 {'\0', 0, "", "", NULL, 0, 0, 0, 0, 0.0, 0.0, NULL}
172 };
173
174 /* set default option values for help */
g_init_opts(struct mngmsg * m_msp,struct pstruct * ppst)175 void g_init_opts(struct mngmsg *m_msp, struct pstruct *ppst) {
176 set_opt_disp_defs('C', g_options, 1, m_msp->nmlen, 0, 0.0, 0.0, NULL);
177 set_opt_disp_defs('F', g_options, 3, 0, 0, m_msp->e_low, 0.0, NULL);
178 set_opt_disp_defs('m', g_options, 1, m_msp->markx, 0, 0.0, 0.0, NULL);
179 set_opt_disp_defs('o', g_options, 2, (int)m_msp->sq0off, (int)m_msp->sq1off, 0.0, 0.0,NULL);
180 set_opt_disp_defs('T', g_options, 1, fa_max_workers, 0, 0.0, 0.0, NULL);
181 set_opt_disp_defs('v', g_options, 1, ppst->zs_win, 0, 0.0, 0.0, NULL);
182 set_opt_disp_defs('w', g_options, 1, m_msp->aln.llen, 0, 0.0, 0.0, NULL);
183 }
184
185 void
186 build_optstr(char *opt_str, int opt_len, struct opt_def_str *opt_defs);
187
188 static int long_info_set=0;
189 static int llen_set = 0;
190 static int markx_set = 0;
191
192 /* initenv () initializes the environment */
initenv(int argc,char ** argv,struct mngmsg * m_msp,struct pstruct * ppst,unsigned char ** aa0)193 void initenv (int argc, char **argv, struct mngmsg *m_msp,
194 struct pstruct *ppst, unsigned char **aa0)
195 {
196 char *cptr, *bp, *bp1;
197 int copt;
198
199 /* options for all search functions */
200 /* char *g_optstr = "b:BC:d:DE:F:HiK:l:Lm:N:O:QqR:T:v:V:w:W:X:Z:"; */
201
202 char g_optstring[MAX_STR];
203 char f_optstring[MAX_STR];
204 char optstring[MAX_STR];
205
206 /* help functions exit(); try first */
207 if (argc == 1) {
208 show_help(m_msp->pgm_name, ppst->pgm_id);
209 }
210 if (strcmp(argv[1],"-help")==0 || strcmp(argv[1],"--help")==0) {
211 show_all_help(m_msp->pgm_name, ppst->pgm_id);
212 }
213
214 build_optstr(g_optstring, sizeof(f_optstring), g_options);
215 build_optstr(f_optstring, sizeof(f_optstring), f_options);
216
217 /* these initializations will be used by all functions */
218
219 /* prog_name[] is only used for error messages */
220 strncpy(prog_name,argv[0],sizeof(prog_name));
221 prog_name[sizeof(prog_name)-1]='\0';
222
223 #ifdef PCOMPLIB
224 #ifdef MPI_SRC
225 MPI_Comm_size(MPI_COMM_WORLD,&fa_max_workers);
226 if (fa_max_workers <= 1) {
227 fprintf(stderr," nnodes = %d; no workers available\n",fa_max_workers);
228 exit(1);
229 }
230 else {
231 fa_max_workers -= FIRSTNODE;
232 fprintf(stderr," have %d workers\n",fa_max_workers);
233 }
234 #endif
235 #else
236 #if defined(IRIX)
237 fa_max_workers = sysmp(MP_NPROCS);
238 #else
239 #if defined(UNIX) || defined(HAVE_SYSCONF)
240 fa_max_workers = sysconf(_SC_NPROCESSORS_CONF);
241 #endif /* UNIX || SYSCONF */
242 #endif /* !IRIX */
243 #endif /* !PCOMPLIB */
244
245 m_msp->ltitle[0] = '\0';
246
247 if ((cptr=getenv("FASTLIBS"))!=NULL) {
248 strncpy(m_msp->flstr,cptr,MAX_FN);
249 m_msp->flstr[MAX_FN-1] = '\0';
250 }
251 else m_msp->flstr[0]='\0';
252
253 m_msp->std_output = 1;
254 m_msp->hist.hist_a = NULL;
255 m_msp->outfile[0] = '\0';
256 m_msp->outfd = NULL;
257 m_msp->ldb_info.ldnaseq = SEQT_PROT; /* library is protein */
258 m_msp->n1_low = ppst->n1_low = 0;
259 m_msp->n1_high = ppst->n1_high = BIGNUM;
260 m_msp->ql_start = 1; /* start with first query sequence */
261 m_msp->ql_stop = BIGNUM; /* end with the last query sequence */
262 m_msp->aa1save_buf_b = NULL;
263 m_msp->bline_buf_b = NULL;
264
265 m_msp->pamd1 = MAXSQ;
266 m_msp->pamd2 = MAXSQ;
267
268 m_msp->ldb_info.term_code = 0;
269
270 ppst->tr_type = 0;
271 ppst->debug_lib = 0;
272 m_msp->nshow = 20;
273 ppst->max_repeat = 50;
274 m_msp->nohist = 1;
275 #if defined(PCOMPLIB)
276 m_msp->mshow = 20;
277 #else
278 #ifdef SHOW_HIST
279 m_msp->nohist = 0;
280 #endif
281 m_msp->mshow = 50;
282 #endif
283 m_msp->do_showbest = 1;
284 m_msp->ashow = -1;
285 m_msp->ashow_set = 0;
286 m_msp->nmlen = DEF_NMLEN;
287 m_msp->z_bits = 1;
288 m_msp->tot_ident = 0;
289 m_msp->mshow_set = 0;
290 m_msp->mshow_min = 0;
291 m_msp->aln.llen = 60;
292 m_msp->aln.llcntx = 30;
293 m_msp->aln.llcntx_set = 0;
294 m_msp->e_low = 0.0;
295 m_msp->e_cut_set = 0;
296 m_msp->revcomp = 0;
297 m_msp->long_info = 0;
298 m_msp->ldb_info.maxn = 0;
299 m_msp->ldb_info.dupn = SEQDUP;
300 m_msp->dfile[0] = '\0';
301 m_msp->tname[0] = '\0';
302 m_msp->lname[0] = '\0';
303 m_msp->link_lname[0] = '\0';
304 m_msp->show_code = 0;
305 m_msp->tot_show_code = 0;
306 m_msp->aln.showall = 0;
307 m_msp->markx = 0;
308 m_msp->tot_markx = 0;
309 m_msp->markx_list = NULL;
310 m_msp->align_done = 0;
311 m_msp->sq0off = m_msp->sq1off = 1;
312 strncpy(m_msp->sqnam,"aa",4);
313 strncpy(m_msp->sqtype,"protein",10);
314
315 /* annotation info */
316 m_msp->ann_flg = 0;
317 memset(m_msp->ann_arr,'\0',MAX_FN);
318 m_msp->ann_arr_def[0] = NULL;
319 m_msp->ann_arr_def[1] = NULL;
320 m_msp->annot0_sname[0]='\0';
321 m_msp->annot1_sname[0]='\0';
322 m_msp->annot_p = NULL;
323 m_msp->aa0a = NULL;
324
325 ppst->LK_set = 0;
326 ppst->e_cut = m_msp->e_cut = 10.0;
327 ppst->e_cut_r = ppst->e_cut / 10.0;
328 ppst->do_rep = 1;
329 ppst->zs_win = 0;
330 ppst->show_ident = 0;
331
332 ppst->zdb_size = -1;
333 ppst->zdb_size_set = 0;
334 ppst->dnaseq = SEQT_PROT; /* default is protein */
335 ppst->nt_align = 0;
336
337 ppst->other_info = NULL;
338
339 g_init_opts(m_msp, ppst);
340
341 f_initenv (m_msp, ppst, aa0);
342
343 SAFE_STRNCPY (optstring, g_optstring, sizeof (optstring));
344 SAFE_STRNCAT (optstring, f_optstring, sizeof (optstring));
345
346 while ((copt = getopt (argc, argv, optstring)) != EOF)
347 {
348 if (strchr (g_optstring, copt) != NULL)
349 {
350 switch (copt) { /* switches for all options */
351 case 'C':
352 sscanf(optarg,"%d",&m_msp->nmlen);
353 if (m_msp->nmlen > MAX_UID-1) m_msp->nmlen = MAX_UID-1;
354 break;
355 case 'D': ppst->debug_lib = 1;
356 break;
357 case 'e':
358 strncpy(m_msp->link_lname, optarg, MAX_LSTR);
359 break;
360 case 'F':
361 sscanf(optarg,"%lg",&m_msp->e_low);
362 m_msp->e_cut_set = 1;
363 break;
364 #if defined(PCOMPLIB) || !defined(SHOW_HIST)
365 case 'H':
366 m_msp->nohist = 0; break;
367 #else
368 case 'H':
369 m_msp->nohist = 1; break;
370 #endif
371 case 'i':
372 m_msp->revcomp = 1; break;
373 case 'I':
374 m_msp->quiet = 0; break;
375 case 'l':
376 strncpy(m_msp->flstr,optarg,MAX_FN);
377 m_msp->flstr[MAX_FN-1]='\0';
378 break;
379 case 'L':
380 m_msp->long_info = 1;
381 long_info_set = 1;
382 break;
383 case 'm':
384 pre_parse_markx(optarg, m_msp);
385 markx_set = 1;
386 break;
387 case 'N':
388 sscanf(optarg,"%d",&m_msp->ldb_info.maxn);
389 break;
390 case 'o':
391 sscanf (optarg,"%ld %ld",&m_msp->sq0off,&m_msp->sq1off); break;
392 case 'O':
393 strncpy(m_msp->outfile,optarg,MAX_FN);
394 m_msp->outfile[MAX_FN-1]='\0';
395 break;
396 case 'q':
397 case 'Q':
398 m_msp->quiet = 1;
399 break;
400 case 'R':
401 strncpy (m_msp->dfile, optarg, MAX_FN);
402 m_msp->dfile[MAX_FN-1]='\0';
403 break;
404 case 'T':
405 #ifdef PCOMPLIB
406 if (strchr(optarg,'-') != NULL) {
407 sscanf(optarg,"%d-%d",&worker_1,&worker_n);
408 if (worker_1 > worker_n) {
409 worker_1 = worker_n = 0;
410 }
411 }
412 else
413 #endif
414 sscanf (optarg, "%d", &fa_max_workers);
415 if (fa_max_workers < 0) fa_max_workers=1;
416 break;
417 case 'v':
418 sscanf (optarg,"%d",&ppst->zs_win);
419 break;
420 case 'V':
421 if (optarg[0] == '=') {
422 get_annot_def_file(m_msp, optarg+1);
423 }
424 else if ((cptr = getenv("FA_ANNOT_DEF"))) {
425 get_annot_def_file(m_msp, cptr);
426 }
427 else if (optarg[0] == 'q' && (optarg[1]=='!' || optarg[1]=='<')) {
428 strncpy(m_msp->annot0_sname,optarg+1,MAX_LSTR);
429 m_msp->ann_flg = 2;
430 }
431 else if (optarg[0]=='!' || optarg[0]=='<') {
432 strncpy(m_msp->annot1_sname,optarg,MAX_LSTR);
433 m_msp->ann_flg = 2;
434 }
435 else {
436 strncpy((char *)m_msp->ann_arr+1,optarg,MAX_FN-2);
437 m_msp->ann_arr[0]='\0';
438 m_msp->ann_arr[MAX_FN-2]='\0';
439 m_msp->ann_arr_n = strlen((char *)m_msp->ann_arr+1);
440 if (m_msp->ann_flg ==0) m_msp->ann_flg = 1;
441 }
442
443 if (strlen((char *)m_msp->ann_arr) > 0) {
444 add_ascii_ann(qascii, m_msp->ann_arr);
445 }
446
447 break;
448 /*
449 case 'V':
450 fprintf(stderr," -V option not currently supported in parallel\n");
451 break;
452 */
453 case 'w':
454 sscanf (optarg,"%d",&m_msp->aln.llen);
455 if (m_msp->aln.llen < 10) m_msp->aln.llen = 10;
456 if (m_msp->aln.llen > 200) m_msp->aln.llen = 200;
457 if (!m_msp->aln.llcntx_set) m_msp->aln.llcntx = m_msp->aln.llen/2;
458 llen_set = 1;
459 break;
460 case 'Z':
461 sscanf(optarg,"%ld",&ppst->zdb_size);
462 ppst->zdb_size_set = 1;
463 break;
464 }
465 }
466 else if (strchr (f_optstring, copt))
467 f_getopt (copt, optarg, m_msp, ppst);
468 }
469 optind--;
470
471 if (!markx_set || !(m_msp->markx & (MX_ATYPE+MX_ANNOT_COORD+MX_ANNOT_MID))) {
472 m_msp->markx = set_markx(m_msp->markx, 0, '\0');
473 }
474
475 /* done with options, check for initializations in initfa.c
476 (set sascii alphabet) */
477 f_lastenv (m_msp, ppst);
478
479 if (argc - optind < 3) return;
480 m_msp->tnamesize = sizeof (m_msp->tname);
481 if (argc - optind > 1) {strncpy (m_msp->tname, argv[optind + 1],MAX_FN);}
482 if (argc - optind > 2) {strncpy(m_msp->lname, argv[optind + 2],MAX_LSTR);}
483 f_getarg (argc, argv, optind, m_msp, ppst);
484 }
485
486 /* ann_scan scans an aa0 query sequence if -V ann_chars, and returns
487 an edited query sequence and allocates aa0a[n_n0+2] space for the
488 annotation */
489
490 int
ann_scan(unsigned char * aa0,int n0,unsigned char ** aa0a_p,int seqtype)491 ann_scan(unsigned char *aa0, int n0, unsigned char **aa0a_p, int seqtype)
492 {
493 unsigned char *aa0p, *aa0d, *aa0ad;
494 int n_n0;
495
496 /* count how many "real" residues */
497
498 if (seqtype==SEQT_UNK) {
499 /* with SEQT_UNK, annotation characters are all < @,
500 while sequence chars are all > @ */
501 for (n_n0=0, aa0p = aa0; aa0p < aa0+n0; aa0p++) {
502 if (*aa0p > '@' || *aa0p == ESS ) n_n0++; /* ESS captures ',' in sequence */
503 }
504 }
505 else {
506 /* if the sequence type is known, then annotation chars are > NANN */
507 for (n_n0=0, aa0p = aa0; aa0p < aa0+n0; aa0p++) {
508 if (*aa0p < NANN ) n_n0++;
509 }
510 }
511
512 if (n_n0 == n0) {
513 *aa0a_p = NULL;
514 return n_n0;
515 }
516
517 aa0d = aa0;
518 /* n_n0 has the real sequence length */
519 if ((*aa0a_p = calloc(n_n0+2, sizeof(char)))==NULL) {
520 fprintf(stderr," cannot allocate annotation sequence: %d\n",n_n0);
521
522 /* this section is for failure, simply copy the correct sequence
523 and ignore the annotations */
524 if (seqtype==SEQT_UNK) {
525 for (aa0p = aa0; aa0p < aa0+n0; aa0p++) {
526 if (*aa0p > '@' || *aa0p == ESS) {*aa0d++ = *aa0p;}
527 }
528 }
529 else {
530 for (aa0p = aa0; aa0p < aa0+n0; aa0p++) {
531 if (*aa0p < NANN) {*aa0d++ = *aa0p;}
532 }
533 }
534 *aa0d = '\0';
535 return n_n0;
536 }
537
538 /* have aa0a_p annotation array allocated */
539 aa0ad = *aa0a_p;
540 if (seqtype==SEQT_UNK) {
541 for (aa0p = aa0; aa0p<aa0+n0; aa0p++) {
542 if (*aa0p > '@' || *aa0p == ESS) {*aa0d++ = *aa0p; *aa0ad++='\0';}
543 else if (aa0ad > *aa0a_p) { aa0ad[-1] = *aa0p - NANN;}
544 }
545 }
546 else {
547 for (aa0p = aa0; aa0p<aa0+n0; aa0p++) {
548 if (*aa0p < NANN) {*aa0d++ = *aa0p; *aa0ad++='\0';}
549 else if (aa0ad > *aa0a_p) { aa0ad[-1] = *aa0p - NANN;}
550 }
551 }
552 *aa0ad = *aa0d = '\0';
553 return n_n0;
554 }
555
556 /* renamed from ann_ascii() Feb, 2008 to allow ann_ascii[] */
557 void
add_ascii_ann(int * qascii,unsigned char * ann_arr)558 add_ascii_ann(int *qascii, unsigned char *ann_arr)
559 {
560 unsigned char *ann_p;
561 int ann_ix = NANN+1;
562
563 if (ann_arr[0] == '\0' && ann_arr[1]=='\0') {
564 ann_arr[0] = ' ';
565 ann_arr[0] = '\0';
566 return;
567 }
568
569 ann_arr[0] = ' ';
570
571 if (strchr((char *)ann_arr+1,'*')) {qascii['*'] = NA;}
572
573 for (ann_p = ann_arr+1; *ann_p; ann_p++) {
574 if (qascii[*ann_p] == NA) { qascii[*ann_p] = ann_ix++;}
575 }
576 }
577
578 /* parse annotation description line */
add_annot_def(struct mngmsg * m_msp,char * line,int qa_flag)579 void add_annot_def(struct mngmsg *m_msp, char *line, int qa_flag) {
580 char *bp;
581 int i_ann;
582
583 if ((bp=strchr(line,'\r')) !=NULL || (bp=strchr(line,'\n')) != NULL) {
584 *bp = '\0';
585 }
586
587 if (m_msp->ann_arr[0]=='\0') {
588 m_msp->ann_arr[0] = ' ';
589 m_msp->ann_arr[1] = '\0';
590 }
591
592 /* set the character */
593 i_ann = strlen((char *)m_msp->ann_arr);
594 if ((bp = strchr((char *)m_msp->ann_arr,line[0]))!=NULL) {
595 i_ann = (unsigned char *)bp - m_msp->ann_arr;
596 }
597 else {
598 m_msp->ann_arr[i_ann] = line[0];
599 m_msp->ann_arr[i_ann+1] = '\0'; /* required for strchr(ann_arr) to work */
600 if (qa_flag) qascii[line[0]] = NANN + i_ann;
601 }
602
603 if ((bp=strchr(line,':'))!=NULL) {
604 /* allocate space for definitions */
605 if ((m_msp->ann_arr_def[i_ann]=(char *)calloc(strlen(bp+1)+1,sizeof(char)))!=NULL) {
606 /* read in the definitions and associate with symbol */
607 strncpy(m_msp->ann_arr_def[i_ann], bp+1,strlen(bp+1));
608 }
609 }
610 else {
611 m_msp->ann_arr_def[i_ann] = NULL;
612 }
613
614
615 }
616
617 /* read definitions of annotation symbols from a file */
618 static void
get_annot_def_file(struct mngmsg * m_msp,char * fa_annot_env)619 get_annot_def_file(struct mngmsg *m_msp, char *fa_annot_env) {
620 FILE *def_fp;
621 char *bp, *bpf, line[MAX_STR];
622 char tmp_annot_env[MAX_STR];
623
624 if ((bpf=strchr(fa_annot_env,' '))!=NULL) *bpf = '\0';
625
626 subs_env(tmp_annot_env, fa_annot_env, sizeof(tmp_annot_env));
627 /* check that the file exists */
628 if ((def_fp = fopen(tmp_annot_env,"r"))==NULL) {
629 fprintf(stderr,"*** error *** annotation definition file: %s not found\n",
630 tmp_annot_env);
631 if (bpf) *bpf=' ';
632 return;
633 }
634
635 /* read a line */
636 while (fgets(line, sizeof(line), def_fp)!=NULL) {
637 add_annot_def(m_msp, line, 0);
638 }
639 fclose(def_fp);
640 if (bpf) *bpf=' ';
641
642 if (strlen((char *)m_msp->ann_arr)>1) m_msp->ann_flg = 1;
643 }
644
645 int
set_markx(int markx,int val,char c)646 set_markx(int markx, int val, char c) {
647
648 if (val < 3) {
649 if (c=='M') {
650 markx |= MX_ANNOT_MID;
651 markx &= (~MX_ANNOT_COORD);
652 }
653 else if (c=='B') {
654 markx |= MX_ANNOT_COORD;
655 markx |= MX_ANNOT_MID;
656 }
657 else {
658 markx |= MX_ANNOT_COORD;
659 }
660 if (c=='H') {
661 markx |= MX_HTML;
662 }
663 return markx | (MX_ATYPE & val);
664 }
665 else if (val == 3) {
666 markx |= (MX_ATYPE + MX_ASEP);
667 }
668 else if (val == 4) {
669 markx |= (MX_ATYPE + MX_AMAP);
670 }
671 else if (val == 5) {
672 markx |= MX_AMAP;
673 }
674 else if (val == 6 || c=='H') {
675 markx |= (MX_HTML) ;
676 if (c=='M') {
677 markx |= MX_ANNOT_MID;
678 markx &= (~MX_ANNOT_COORD);
679 }
680 else if (c=='B') {
681 markx |= MX_ANNOT_COORD;
682 markx |= MX_ANNOT_MID;
683 }
684 else {
685 markx |= MX_ANNOT_COORD;
686 }
687 }
688 else if (val == 8) {
689 markx |= MX_M9SUMM+MX_M8OUT;
690 }
691 else if (val == 9) {
692 markx |= MX_M9SUMM;
693 }
694 else if (val == 10) {
695 markx |= MX_M10FORM;
696 }
697 else if (val == 11) {
698 markx |= MX_M11OUT;
699 }
700
701 return markx;
702 }
703
704 void
pre_parse_markx(char * opt_arg,struct mngmsg * m_msp)705 pre_parse_markx(char *opt_arg, struct mngmsg *m_msp) {
706 char *bp, *last_bp;
707 struct markx_str *tmp_markx, *cur_markx, *last_markx;
708
709 if (opt_arg[0] != 'F' && m_msp->markx_list != NULL) {
710 tmp_markx = m_msp->markx_list;
711 }
712 else {
713 if ((tmp_markx = (struct markx_str *)calloc(1,sizeof(struct markx_str)))==NULL) {
714 fprintf(stderr,"[error] Cannot allocate markx_list\n");
715 return;
716 }
717
718 /* initialize markx to m_msg defaults -- we do not use m_msp
719 directly, because it might have been changed by an earlier -m
720 out_fmt */
721
722 tmp_markx->nohist = 1;
723 if (m_msp->ashow_set) {tmp_markx->ashow = m_msp->ashow;}
724 else {tmp_markx->ashow = -1;}
725
726 tmp_markx->show_code = 0;
727 if (long_info_set) tmp_markx->long_info = 1;
728 else tmp_markx->long_info = 0;
729 if (llen_set) {
730 tmp_markx->aln_llen = m_msp->aln.llen;
731 tmp_markx->aln_llcntx = m_msp->aln.llcntx;
732 tmp_markx->aln_llcntx_set = m_msp->aln.llcntx_set;
733 }
734 else {
735 tmp_markx->aln_llen = 60;
736 if (m_msp->aln.llcntx_set) {
737 tmp_markx->aln_llcntx = m_msp->aln.llcntx;
738 tmp_markx->aln_llcntx_set = m_msp->aln.llcntx_set;
739 }
740 else {
741 tmp_markx->aln_llcntx = 30;
742 tmp_markx->aln_llcntx_set = 0;
743 }
744 }
745 tmp_markx->std_output = 1;
746 }
747
748 /* first check for -m "F file" format */
749 if (optarg[0] == 'F') {
750 if ((bp=strchr(optarg+1,' '))==NULL) {
751 fprintf(stderr,"-m F missing file name: %s\n",optarg);
752 return;
753 }
754 /* allocate space for file name */
755 if ((tmp_markx->out_file = calloc(strlen(bp+1)+1,sizeof(char)))==NULL) {
756 fprintf(stderr,"[error] Cannot allocate markx->out_file\n");
757 return;
758 }
759 strncpy(tmp_markx->out_file, bp+1, strlen(bp+1));
760 *bp = '\0';
761
762 last_bp = optarg+1;
763 }
764 else {
765 last_bp = optarg;
766 }
767
768 if (opt_arg[0] != 'F') {
769 m_msp_to_markx(tmp_markx, m_msp);
770 }
771
772 while ((bp=strchr(last_bp,','))!=NULL) {
773 *bp = '\0';
774 parse_markx(last_bp, tmp_markx);
775 *bp = ',';
776 last_bp = bp+1;
777 }
778
779 if (*last_bp) parse_markx(last_bp, tmp_markx);
780
781 if (m_msp->markx_list!=NULL) {
782 if (opt_arg[0] == 'F') {
783 /* if file name, add this to the end of the list */
784 last_markx = m_msp->markx_list;
785 for (cur_markx=m_msp->markx_list->next; cur_markx; cur_markx = cur_markx->next) {
786 last_markx = cur_markx;
787 }
788 last_markx->next = tmp_markx;
789 }
790 else if (tmp_markx != m_msp->markx_list) {
791 /* if no file name, then make this the first in the list,
792 unless it is already there */
793 cur_markx = m_msp->markx_list;
794 m_msp->markx_list = tmp_markx;
795 tmp_markx->next = cur_markx;
796 }
797 }
798 else {
799 m_msp->markx_list = tmp_markx;
800 }
801
802 m_msp->tot_markx |= tmp_markx->markx;
803 m_msp->tot_show_code |= tmp_markx->show_code;
804
805 /* if no -m F, save options into m_msp */
806 if (optarg[0] != 'F') {
807 markx_to_m_msp(m_msp, tmp_markx);
808 }
809
810 return;
811 }
812
813 void
parse_markx(char * optarg,struct markx_str * this)814 parse_markx(char *optarg, struct markx_str *this) {
815 int itmp;
816 char ctmp, ctmp2;
817
818 itmp = 0;
819 ctmp = ctmp2 = '\0';
820
821 if (optarg[0] == 'B') { /* BLAST alignment output */
822 this->markx = MX_MBLAST;
823 this->aln_llcntx = 0;
824 this->aln_llcntx_set = 1;
825 this->long_info=1;
826 this->ashow = -1;
827 if (optarg[1] == 'B') { /* complete BLAST output */
828 this->markx += MX_MBLAST2;
829 this->nohist = 1;
830 this->aln_llen = 65;
831 this->std_output = 0;
832 return;
833 }
834 else if (optarg[1] == '8') {
835 sscanf(optarg,"%d%c%c",&itmp,&ctmp,&ctmp2);
836 }
837 else {return;} /* done with BLAST aligment output */
838 }
839 else if (optarg[0] == 'A') {
840 this->markx += MX_RES_ALIGN_SCORE;
841 this->aln_llcntx = 0;
842 this->aln_llcntx_set = 1;
843 return;
844 }
845 else {
846 sscanf(optarg,"%d%c%c",&itmp,&ctmp,&ctmp2);
847 }
848 if (itmp==9) {
849 if (ctmp=='c') {this->show_code = SHOW_CODE_ALIGN;}
850 else if (ctmp=='d') {this->show_code = SHOW_CODE_ALIGN + SHOW_CODE_EXT;}
851 else if (ctmp=='C') {this->show_code = SHOW_CODE_CIGAR;}
852 else if (ctmp=='D') {this->show_code = SHOW_CODE_CIGAR + SHOW_CODE_EXT;}
853 else if (ctmp=='i') {this->show_code = SHOW_CODE_ID;}
854 }
855 if (itmp > 6 && itmp != 11 && itmp != 10 && itmp != 9 && itmp != 8) itmp = 0;
856 this->markx = set_markx(this->markx,itmp,ctmp);
857 if (itmp == 11 ) { this->std_output = 0;}
858 if (itmp == 8) {
859 this->std_output = 0;
860 this->ashow = 0;
861 if (ctmp=='C') { this->markx += MX_M8COMMENT;}
862 if (ctmp2 == 'c') { this->show_code = SHOW_CODE_ALIGN;}
863 else if (ctmp2 == 'd') {this->show_code = SHOW_CODE_ALIGN + SHOW_CODE_EXT;}
864 else if (ctmp2 == 'C') {this->show_code = SHOW_CODE_CIGAR;}
865 else if (ctmp2 == 'D') {this->show_code = SHOW_CODE_CIGAR + SHOW_CODE_EXT;}
866 }
867 }
868
869 /* transfer markx values for m_msp to m_msp */
870 void
markx_to_m_msp(struct mngmsg * m_msp,struct markx_str * this)871 markx_to_m_msp(struct mngmsg *m_msp, struct markx_str *this) {
872
873 m_msp->markx = this->markx;
874 m_msp->nohist = this->nohist;
875 m_msp->ashow = this->ashow;
876 m_msp->show_code = this->show_code;
877 m_msp->long_info = this->long_info;
878 m_msp->aln.llen = this->aln_llen;
879 m_msp->aln.llcntx = this->aln_llcntx;
880 m_msp->aln.llcntx_set = this->aln_llcntx_set;
881 m_msp->std_output = this->std_output;
882 }
883
884 /* save current m_msp values used with markx */
885 void
m_msp_to_markx(struct markx_str * this,struct mngmsg * m_msp)886 m_msp_to_markx(struct markx_str *this, struct mngmsg *m_msp) {
887
888 this->markx = m_msp->markx ;
889 this->nohist = m_msp->nohist ;
890 this->ashow = m_msp->ashow ;
891 this->show_code = m_msp->show_code ;
892 this->long_info = m_msp->long_info ;
893 this->aln_llen = m_msp->aln.llen ;
894 this->aln_llcntx = m_msp->aln.llcntx ;
895 this->aln_llcntx_set = m_msp->aln.llcntx_set ;
896 this->std_output = m_msp->std_output ;
897 }
898
899 /* put options from option table [struct opt_def_str *opt_defs] into
900 char *opt_str for getopt() */
901
902 void
build_optstr(char * opt_str,int max_len,struct opt_def_str * opt_defs)903 build_optstr(char *opt_str, int max_len, struct opt_def_str *opt_defs) {
904 int i, opt_len = 0;
905 char *opt_pos;
906
907 opt_pos = opt_str;
908 for (i=0; opt_defs[i].opt_char != '\0'; i++) {
909 if (opt_len + 2 > max_len) {
910 fprintf(stderr," *** error -- options too long %d >= %d\n", opt_len, max_len);
911 break;
912 }
913 *opt_pos++ = opt_defs[i].opt_char;
914 opt_len++;
915 if (opt_defs[i].has_arg) {
916 *opt_pos++ = ':';
917 opt_len++;
918 }
919 }
920 *opt_pos = '\0';
921 }
922
923 /* set_opt_disp_defs associates parameter addresses with options */
924 void
set_opt_disp_defs(char opt_char,struct opt_def_str * options,int type,int i_param1,int i_param2,double d_param1,double d_param2,char * s_param)925 set_opt_disp_defs(char opt_char, struct opt_def_str *options, int type,
926 int i_param1, int i_param2,
927 double d_param1, double d_param2,
928 char *s_param) {
929 struct opt_def_str *this_opt;
930
931 this_opt = options;
932 while (this_opt->opt_char != '\0') {
933 if (this_opt->opt_char == opt_char) {
934 this_opt->fmt_type = type;
935 switch (type) {
936 case 1:
937 this_opt->i_param1 = i_param1;
938 break;
939 case 2:
940 this_opt->i_param1 = i_param1;
941 this_opt->i_param2 = i_param2;
942 break;
943 case 3:
944 this_opt->d_param1 = d_param1;
945 break;
946 case 4:
947 this_opt->d_param1 = d_param1;
948 this_opt->d_param2 = d_param2;
949 break;
950 case 5:
951 if (s_param != NULL) {
952 this_opt->s_param = (char *)calloc(strlen(s_param)+1,sizeof(char));
953 strncpy(this_opt->s_param,s_param,strlen(s_param));
954 }
955 else this_opt->s_param = NULL;
956 break;
957 }
958 }
959 this_opt++;
960 }
961 }
962