1 static char const rcsid[] = "$Id: blastpgp.c,v 6.140 2008/03/31 13:35:18 madden Exp $";
2 
3 /* $Id: blastpgp.c,v 6.140 2008/03/31 13:35:18 madden Exp $ */
4 /**************************************************************************
5 *                                                                         *
6 *                             COPYRIGHT NOTICE                            *
7 *                                                                         *
8 * This software/database is categorized as "United States Government      *
9 * Work" under the terms of the United States Copyright Act.  It was       *
10 * produced as part of the author's official duties as a Government        *
11 * employee and thus can not be copyrighted.  This software/database is    *
12 * freely available to the public for use without a copyright notice.      *
13 * Restrictions can not be placed on its present or future use.            *
14 *                                                                         *
15 * Although all reasonable efforts have been taken to ensure the accuracy  *
16 * and reliability of the software and data, the National Library of       *
17 * Medicine (NLM) and the U.S. Government do not and can not warrant the   *
18 * performance or results that may be obtained by using this software,     *
19 * data, or derivative works thereof.  The NLM and the U.S. Government     *
20 * disclaim any and all warranties, expressed or implied, as to the        *
21 * performance, merchantability or fitness for any particular purpose or   *
22 * use.                                                                    *
23 *                                                                         *
24 * In any work or product derived from this material, proper attribution   *
25 * of the author(s) as the source of the software or data would be         *
26 * appreciated.                                                            *
27 *                                                                         *
28 **************************************************************************
29  * $Revision: 6.140 $
30  * $Log: blastpgp.c,v $
31  * Revision 6.140  2008/03/31 13:35:18  madden
32  * Change semantics of -c option, so that a new method for effective observations is used always and a new entropy-based method for column-specific PSI-BLAST pseudocounts is used by default. If default is used (-c 0), then all constants are defined in posit.c; if only the new method of effective observations is used, then the value of -c should be set by the user at approximately 30.  (Changes
33  * submitted by Alejandro Schaffer).
34  *
35  * Revision 6.139  2008/01/02 20:16:11  madden
36  * XML output respects -v and -b option, JIRA SB-30
37  *
38  * Revision 6.138  2008/01/02 14:02:06  madden
39  * Make composition-based score adjustments the default for blastp and tblastn
40  *
41  * Revision 6.137  2007/03/14 17:55:27  madden
42  *   - #include string.h to get a prototype for strcasecmp In
43  *   - In tick_callback, suppress unused parameter warnings in tick_callback
44  *     by casting to void.
45  *   - In PGPFormatMainOutput, initialize local variables pruneSeed and
46  *     prune to NULL.
47  *   - In PGPReadBlastOptions, print a warning if a PSI-BLAST output
48  *     checkpoint filename is specified, but does not have a file
49  *     extension appropriate for its type.
50  *   [from Mike Gertz]
51  *
52  * Revision 6.136  2006/07/17 17:32:54  coulouri
53  * from mike gertz: fix a memory leak in PGPFormatFooter
54  *
55  * Revision 6.135  2006/05/03 14:40:49  madden
56  * Changed usage of -t flag to also include specification of whether
57  * unified p-values should be used to calculate the significance of
58  * alignments when composition-based statistics is used. (from Mike Gertz)
59  *
60  * Revision 6.134  2006/01/24 18:33:51  papadopo
61  * from Mike Gertz: Use enumerated values, rather than #define'd constants, to specify the composition adjustment method
62  *
63  * Revision 6.133  2005/08/31 20:34:02  coulouri
64  *     In PGPReadBlastOptions:
65  *       - set the value of options->kappa_expect_value.
66  *       - changed the wording of the warnings that
67  *         composition-based statistics mode > 1 are experimental.
68  *       - set mode for composition-based statistics to 1 if restarting
69  *         from a checkpoint.
70  *      In Main:
71  *       - Remove outdated code for setting options->hitlist_size and
72  *         options->expect_value
73  *       - Set the value of kappa_expect_value for composition-based
74  *         statistics, modes > 1.
75  *       - Correctly set the evalue for preliminary alignments when more
76  *         than one query is in the file.
77  *
78  * Revision 6.132  2005/07/28 16:16:46  coulouri
79  * remove dead code
80  *
81  * Revision 6.131  2005/06/30 15:10:03  coulouri
82  * remove -g option, use enum for argument handling
83  *
84  * Revision 6.130  2005/05/18 17:35:49  papadopo
85  * add warnings if new composition-based statistics options are selected
86  *
87  * Revision 6.129  2005/05/17 17:51:20  papadopo
88  * make the -t argument a string and handle values of 'T' or 'F' manually (required for backward compatibility)
89  *
90  * Revision 6.128  2005/05/16 17:41:00  papadopo
91  * From Alejandro Schaffer: Added support for compositional adjustment
92  * of matrices via enhanced -t flag, which now is integer to allow 4 options.
93  *
94  * Revision 6.127  2005/04/04 14:57:47  papadopo
95  * remove requirement for a fasta format query file if restarting from scoremat
96  *
97  * Revision 6.126  2004/10/12 15:14:39  papadopo
98  * add gap open and extend penalties to [BC]posComputation calls
99  *
100  * Revision 6.125  2004/07/19 17:16:19  papadopo
101  * add capability to perform input and output of residue frequencies in scoremat form
102  *
103  * Revision 6.124  2004/06/30 12:33:30  madden
104  * Add include for blfmtutl.h
105  *
106  * Revision 6.123  2004/06/25 20:58:49  dondosha
107  * Ungapped option not supported for multi-iterational search, but OK otherwise
108  *
109  * Revision 6.122  2004/06/24 21:48:16  dondosha
110  * Made ungapped search option deprecated
111  *
112  * Revision 6.121  2004/06/24 19:02:23  dondosha
113  * Exit with error status if PGPReadBlastOptions returns NULL
114  *
115  * Revision 6.120  2004/04/29 19:56:00  dondosha
116  * Mask filtered locations in query sequence lines in XML output
117  *
118  * Revision 6.119  2004/03/31 17:59:32  dondosha
119  * Fix for XML output for multiple queries: append full XML outputs in one file
120  *
121  * Revision 6.118  2003/09/26 16:01:34  madden
122  * Change threshold to 0.002
123  *
124  * Revision 6.117  2003/05/30 17:31:09  coulouri
125  * add rcsid
126  *
127  * Revision 6.116  2003/05/13 16:02:42  coulouri
128  * make ErrPostEx(SEV_FATAL, ...) exit with nonzero status
129  *
130  * Revision 6.115  2003/03/20 14:47:16  madden
131  * StringSave on asn1_mode
132  *
133  * Revision 6.114  2003/03/20 13:44:23  madden
134  * Fix -m 10/11 output to make them SeqAnnots
135  *
136  * Revision 6.113  2002/12/31 22:47:16  boemker
137  * Added support for printing output as ASN (text, with -m 10, or binary, with
138  * -m 11).
139  *
140  * Revision 6.112  2002/11/12 16:04:24  dondosha
141  * Do not print the no hits found message for tabular output
142  *
143  * Revision 6.111  2002/09/23 21:00:00  madden
144  * Fix to correctly output SeqAlign for every query and iteration
145  *
146  * Revision 6.110  2002/09/19 13:25:00  camacho
147  * Fix incorrect index into myargs array
148  *
149  * Revision 6.109  2002/09/18 20:34:30  camacho
150  * Restored -P option
151  *
152  * Revision 6.108  2002/08/09 19:41:25  camacho
153  * 1) Added blast version number to command-line options
154  * 2) Added explanations for some default parameters
155  *
156  * Revision 6.107  2002/06/19 22:50:17  dondosha
157  * Added all queries information for tabular output with multiple queries
158  *
159  * Revision 6.106  2002/04/29 19:55:25  madden
160  * Use ARG_FLOAT for db length
161  *
162  * Revision 6.105  2002/03/13 22:48:06  dondosha
163  * Avoid freeing masking locations after each iteration with XML output
164  *
165  * Revision 6.104  2001/10/12 14:55:41  dondosha
166  * Changed description of the -t option
167  *
168  * Revision 6.103  2001/08/29 19:06:37  madden
169  * added variable posComputationcalled in Main, added parameter posComputationCalled to PGPrintPosMatrix
170  *
171  * Revision 6.102  2001/08/28 17:34:35  madden
172  * Add -m 9 as tabular output with comments
173  *
174  * Revision 6.101  2001/07/30 16:27:42  dondosha
175  * Do not call PGPOutTextMessages with XML output option
176  *
177  * Revision 6.100  2001/07/25 19:40:15  dondosha
178  * Multiply hitlist_size by 2 when going to next query if when tweak_parameters set
179  *
180  * Revision 6.99  2001/07/24 18:16:55  madden
181  * Set error_return when freeing
182  *
183  * Revision 6.98  2001/07/09 19:37:31  kans
184  * return 0 instead of NULL to fix Mac compiler error
185  *
186  * Revision 6.97  2001/07/03 20:50:33  madden
187  * Commented out call to PrintTabularOutputHeader
188  *
189  * Revision 6.96  2001/06/22 13:48:39  dondosha
190  * Declare variable vnp
191  *
192  * Revision 6.95  2001/06/21 21:56:37  dondosha
193  * Fixed memory leak: destroy all error returns; uncommented ObjMgrFreeCache
194  *
195  * Revision 6.94  2001/06/15 21:19:28  dondosha
196  * Added tabular output option -m 8
197  *
198  * Revision 6.93  2001/06/11 20:31:19  shavirin
199  * Added possibility to make Batch searches in iteractive mode.
200  *
201  * Revision 6.92  2001/04/13 20:46:01  madden
202  * Changed default e-value threshold for inclusion in multipass model from 0.002 to 0.005, Changed default constant in pseudocounts for multipass version from 7 to 9
203  *
204  * Revision 6.91  2001/04/10 19:20:52  madden
205  * Unsuppress some options suppressed for the release
206  *
207  * Revision 6.90  2001/03/30 22:02:04  madden
208  * Suppress options not yet ready for prime-time
209  *
210  * Revision 6.89  2000/11/30 17:10:13  shavirin
211  * Initialized to NULL xml pointer.
212  *
213  * Revision 6.88  2000/11/28 20:48:49  shavirin
214  * Adopted for usage with many-iterations XML.
215  *
216  * Revision 6.87  2000/11/22 21:57:08  shavirin
217  * Added passing of iteration number into XML output.
218  *
219  * Revision 6.86  2000/11/22 21:41:52  shavirin
220  * Removed problem with mutiple iteration XML output.
221  *
222  * Revision 6.85  2000/11/22 17:34:32  madden
223  * Change NULL to 0 for an intvalue
224  *
225  * Revision 6.84  2000/11/22 17:28:31  madden
226  * Enable decline-to-align
227  *
228  * Revision 6.83  2000/10/27 21:26:50  shavirin
229  * Produce valid XML output if no hits found.
230  *
231  * Revision 6.82  2000/10/27 19:14:41  madden
232  * Change description of -b option
233  *
234  * Revision 6.81  2000/10/24 13:51:33  shavirin
235  * Removed unused parameter from the function BXMLPrintOutput().
236  *
237  * Revision 6.80  2000/10/23 19:58:22  dondosha
238  * Open and close AsnIo outside of call(s) to BXMLPrintOutput
239  *
240  * Revision 6.79  2000/10/17 19:39:20  shavirin
241  * Fixed compilation problems on Mac.
242  *
243  * Revision 6.78  2000/10/13 20:58:01  madden
244  * Add YES_TO_DECLINE_TO_ALIGN define and disable same
245  *
246  * Revision 6.77  2000/09/28 15:48:30  dondosha
247  * Open <PRE> block in PGPFormatFooter
248  *
249  * Revision 6.76  2000/09/27 19:31:44  dondosha
250  * Use original square substitution matrix to pass to txalign on all iterations
251  *
252  * Revision 6.75  2000/09/21 17:54:46  dondosha
253  * Do not pass matrix to txalign in case of query-anchored formatting
254  *
255  * Revision 6.74  2000/09/13 18:34:35  dondosha
256  * Create BLAST_Matrix from ScoreBlk before converting it to txalign-style matrix
257  *
258  * Revision 6.73  2000/09/12 21:51:38  dondosha
259  * Pass the correct scoring matrix to ShowTextAlignFromAnnot
260  *
261  * Revision 6.72  2000/08/28 21:55:01  shavirin
262  * Added option (m = 7) to print XML output.
263  *
264  * Revision 6.71  2000/08/22 20:18:21  shavirin
265  * Added support for HTML output if decline-to-align parameter is set.
266  *
267  * Revision 6.70  2000/08/18 21:31:12  madden
268  * no longer need to raise E-value threshold for composition-based statistics
269  *
270  * Revision 6.69  2000/08/08 21:47:55  shavirin
271  * Added parameter decline_align and possibility to print then
272  * discontinuous alignment.
273  *
274  * Revision 6.68  2000/07/26 19:04:14  shavirin
275  * Set overriding default threshold only if input value != 0.
276  *
277  * Revision 6.67  2000/07/25 18:14:06  shavirin
278  * WARNING: This is no-turning-back changed related to S&W Blast from
279  * Alejandro Schaffer
280  *
281  * Revision 6.66  2000/07/21 20:46:40  madden
282  * Threshold set to default if arg is zero
283  *
284  * Revision 6.65  2000/06/27 15:25:19  madden
285  * Changed master-slave to query-anchored
286  *
287  * Revision 6.64  2000/03/29 17:32:55  madden
288  * Commented out yet another call to ObjMgrFreeCache
289  *
290  * Revision 6.63  2000/03/27 21:32:00  shavirin
291  * Use function ShowTextAlignFromAnnot2 instead of ShowTextAlignFromAnnot3
292  *
293  * Revision 6.62  2000/03/23 15:30:22  shavirin
294  * Removed call to ObjMgrFreeCache()
295  *
296  * Revision 6.61  2000/03/07 21:59:07  shavirin
297  * Now will use PSSM Matrix to show positives in PSI Blast
298  *
299  * Revision 6.60  2000/03/02 21:06:09  shavirin
300  * Added -U option, that allows to consider low characters in FASTA files
301  * as filtered regions (for blastn, blastp and tblastn).
302  *
303  * Revision 6.58  2000/01/07 22:01:04  shavirin
304  * Fixed problem with printing alignment.
305  *
306  * Revision 6.57  1999/12/21 21:34:05  shavirin
307  * Fixed some memory leaks.
308  *
309  * Revision 6.56  1999/11/08 19:12:41  shavirin
310  * Fixed minor SGI warning.
311  *
312  * Revision 6.55  1999/10/21 20:27:52  shavirin
313  * Fixed bug resulted in failue to print out seqannot. (-O)
314  *
315  * Revision 6.53  1999/10/14 15:52:51  shavirin
316  * Added possibility to make search by list of gis. Fixed some bugs.
317  *
318  * Revision 6.52  1999/10/05 17:41:08  shavirin
319  * Corrected in accordance to blast.c changes.
320  *
321  * Revision 6.51  1999/09/24 16:06:15  shavirin
322  * Matrix is set to NULL if no matrix calculation produced.
323  *
324  * Revision 6.50  1999/09/22 17:53:25  shavirin
325  * Now functions will collect messages in ValNodePtr before printing out.
326  *
327  * Revision 6.49  1999/09/21 16:01:46  shavirin
328  * Rearanged all file. Main function was disintegrated into few small
329  * functions.
330  *
331  * Revision 6.48  1999/08/27 18:17:42  shavirin
332  * Added new parameter to command line - decline_align.
333  *
334  * Revision 6.47  1999/08/26 14:58:06  madden
335  * Use float for db length
336  *
337  * Revision 6.46  1999/08/04 13:26:49  madden
338  * Added -B option
339  *
340  * Revision 6.45  1999/03/31 16:58:04  madden
341  * Removed static FindProt and FindNuc
342  *
343  * Revision 6.44  1999/03/24 18:16:33  madden
344  * zero-out groupOrder and featureOrder
345  *
346  * Revision 6.43  1999/03/21 19:43:23  madden
347  * Added -Q option to store ASCII version of last position-specific matrix in a file
348  *
349  * Revision 6.42  1999/03/04 14:17:20  egorov
350  * Added new parameter to BlastMaskTheResidues() function for correct masking
351  * when query is seqloc.  The paramter is not used in this file and is 0.
352  *
353  * Revision 6.41  1999/02/18 21:13:11  madden
354  * Check for non-pattern search before call to BlastPruneSapStructDestruct
355  *
356  * Revision 6.40  1999/02/10 21:12:27  madden
357  * Added HTML and GI list option, fixed filtering
358  *
359  * Revision 6.39  1999/01/22 17:24:51  madden
360  * added line breaks for alignment views
361  *
362  * Revision 6.38  1998/12/29 20:03:14  kans
363  * calls UseLocalAsnloadDataAndErrMsg at startup
364  *
365  * Revision 6.37  1998/12/17 21:54:39  madden
366  * Added call to BlastPruneHitsFromSeqAlign for other than first round
367  *
368  * Revision 6.36  1998/12/16 14:13:36  madden
369  * Fix for more than one pattern in query
370  *
371  * Revision 6.35  1998/11/19 14:04:34  madden
372  * Changed message level to SEV_WARNING
373  *
374  * Revision 6.34  1998/11/16 16:29:41  madden
375  * Added ErrSetMessageLevel(SEV_INFO) and fixed call to PrintKAParametersExtra
376  *
377  * Revision 6.33  1998/09/28 12:33:02  madden
378  * Used BlastErrorPrint
379  *
380  * Revision 6.31  1998/09/17 19:54:31  madden
381  * Removed fillCandLambda
382  *
383  * Revision 6.29  1998/09/10 22:38:24  madden
384  * Moved convertSeqAlignListToValNodeList and convertValNodeListToSeqAlignList
385  *
386  * Revision 6.28  1998/09/09 21:20:02  madden
387  * AS fixed warnings, removed stderr fprintf, added call to PrintKAParametersExtra
388  *
389  * Revision 6.27  1998/09/09 16:10:49  madden
390  * PHI-BLAST changes
391  *
392  * Revision 6.26  1998/07/17 15:41:36  madden
393  * Added effective search space flag
394  *
395  * Revision 6.24  1998/06/14 19:44:46  madden
396  * Checkpointing fix
397  *
398  * Revision 6.23  1998/06/12 21:32:27  madden
399  * Removed extra FnPtr cast
400  *
401  * Revision 6.22  1998/06/10 13:33:39  madden
402  * change -h from 0.01 to 0.001
403  *
404  * Revision 6.21  1998/06/05 21:48:43  madden
405  * Added -K and -L options
406  *
407  * Revision 6.20  1998/05/18 18:01:05  madden
408  * Changed args to allow filter options to be changed
409  *
410  * Revision 6.19  1998/05/01 18:31:03  egorov
411  * Add new parametes to BLASTOptionSetGapParam()
412  *
413  * Revision 6.18  1998/04/30 14:32:33  madden
414  * init_buff_ex arg changed to 90 for reference
415  *
416  * Revision 6.17  1998/04/29 14:29:31  madden
417  * Made reference line longer
418  *
419  * Revision 6.16  1998/04/01 22:49:13  madden
420  * Print No hits found message
421  *
422  * Revision 6.15  1998/02/25 20:50:50  madden
423  * Added arg for db length
424  *
425  * Revision 6.14  1998/02/24 22:48:36  madden
426  * Removed options for culling
427  *
428  * Revision 6.13  1998/01/02 14:33:37  madden
429  * Added comma
430  *
431  * Revision 6.12  1997/12/31 17:48:56  madden
432  * Added wordsize option
433  *
434  * Revision 6.11  1997/12/23 21:09:44  madden
435  * Added -K and -L for range-dependent blast
436  *
437  * Revision 6.10  1997/12/23 20:57:44  madden
438  * Changes for checkpointing
439  *
440  * Revision 6.9  1997/11/19 14:26:46  madden
441  * Removed extra break statement
442  *
443  * Revision 6.8  1997/11/18 22:24:24  madden
444  * Added call to BLASTOptionSetGapParams
445  *
446  * Revision 6.7  1997/11/08 22:04:43  madden
447  * Called BlastOtherReturnsPrepare earlier to before posMatrix is deleted
448  *
449  * Revision 6.6  1997/10/27 22:26:49  madden
450  * Added call to ObjMgrFreeCache(0)
451  *
452  * Revision 6.5  1997/10/23 20:26:15  madden
453  * Use of init_buff_ex rather than init_buff
454  *
455  * Revision 6.4  1997/10/22 21:56:49  madden
456  * Changed default values
457  *
458  * Revision 6.3  1997/10/07 21:33:34  madden
459  * Added BLUNT option
460  *
461  * Revision 6.2  1997/09/18 22:25:02  madden
462  * b and v options now work
463  *
464  * Revision 6.1  1997/09/16 16:40:50  madden
465  * Dbinfo printing changed for multiple db searches
466  *
467  * Revision 6.0  1997/08/25 18:19:19  madden
468  * Revision changed to 6.0
469  *
470  * Revision 1.24  1997/08/24 19:38:23  madden
471  * Used function BlastOtherReturnsPrepare
472  *
473  * Revision 1.23  1997/08/14 21:48:57  madden
474  * Added descriptions and alignments options
475  *
476  * Revision 1.22  1997/07/29 19:33:05  madden
477  * Added TXALIGN_SHOW_QS flag
478  *
479  * Revision 1.21  1997/07/28 18:36:45  madden
480  * Replaced printf with ErrPostEx and fprintf
481  *
482  * Revision 1.20  1997/07/28 16:59:21  madden
483  * Added include for simutil.h
484  *
485  * Revision 1.19  1997/07/28 16:50:51  madden
486  * Changed call to ShowTextAlignFromAnnot
487  *
488  * Revision 1.18  1997/07/22 19:18:40  madden
489  * printing version, etc.
490  *
491  * Revision 1.17  1997/06/25 14:06:21  madden
492  * minor changes to check convergence
493  *
494  * Revision 1.16  1997/06/23 20:51:29  madden
495  * Added matrix option
496  *
497  * Revision 1.15  1997/06/20 19:30:00  madden
498  * Added align_type info, support for SeqAligns
499  *
500  * Revision 1.14  1997/05/23 20:54:48  madden
501  * Added -Z option for final gapped alignment
502  *
503  * Revision 1.13  1997/05/07 15:06:01  madden
504  * replacement of search by compactSearch
505  *
506  * Revision 1.12  1997/04/21  14:09:27  madden
507  * Added four new master-slave alignment types.
508  *
509  * Revision 1.11  1997/04/11  19:03:47  madden
510  * Changes to ignore query ID and show master-slave alignments.
511  *
512  * Revision 1.10  1997/04/10  19:28:28  madden
513  * COMMAND_LINE replaced by ALL_ROUNDS.
514  *
515  * Revision 1.9  1997/04/10  13:27:32  madden
516  * Added COMMAND_LINE define, option for multiple alignments.
517  *
518  * Revision 1.8  1997/04/07  21:44:50  madden
519  * Removed unused variable stats.
520  *
521  * Revision 1.7  1997/04/04  21:13:33  madden
522  * Used function BioseqBlastEngineCore, remove PrivateScoreFunc.
523  *
524  * Revision 1.6  1997/04/04  16:38:04  madden
525  * Changed filtering option, ObjMgrHold.
526  *
527  * Revision 1.5  1997/03/21  20:30:02  madden
528  * Expect value arg made a float.
529  *
530  * Revision 1.4  1997/03/13  21:18:51  madden
531  * Changed default extension value to 1 from 2.
532  *
533  * Revision 1.3  1997/02/19  21:43:04  madden
534  * Extensive changes for psi-blast.  blastp runs now occur multiple times.
535  *
536  * Revision 1.2  1997/02/12  15:16:58  madden
537  * Changed from blast2 to new formatting.
538  *
539  * Revision 1.1  1997/01/16  21:35:42  madden
540  * Initial revision
541  *
542  * Revision 1.1  1997/01/16  21:34:23  madden
543  * Initial revision
544  *
545 */
546 #include <string.h>
547 
548 #include <ncbi.h>
549 #include <objseq.h>
550 #include <objsset.h>
551 #include <sequtil.h>
552 #include <seqport.h>
553 #include <tofasta.h>
554 #include <blast.h>
555 #include <blastpri.h>
556 #include <txalign.h>
557 #include <simutil.h>
558 #include <gapxdrop.h>
559 #include <posit.h>
560 #include <seed.h>
561 #include <sqnutils.h>
562 #include <xmlblast.h>
563 #include <ddvcreate.h>
564 #include <blfmtutl.h>
565 #include <objscoremat.h>
566 #include <algo/blast/composition_adjustment/composition_constants.h>
567 
568 /* Used by the callback function. */
569 FILE *global_fp=NULL;
570 /*
571   Callback to print out ticks, in UNIX only due to file systems
572   portability issues.
573   */
574 
575 static int LIBCALLBACK
tick_callback(Int4 sequence_number,Int4 number_of_positive_hits)576 tick_callback(Int4 sequence_number, Int4 number_of_positive_hits)
577 {
578     (void) sequence_number;
579     (void) number_of_positive_hits;
580 #ifdef OS_UNIX
581 
582     fprintf(global_fp, "%s", ".");
583     fflush(global_fp);
584 #endif
585     return 0;
586 }
587 
588 #define EVALUE_EXPAND 10
589 
590 #define YES_TO_DECLINE_TO_ALIGN
591 
592 #define NUMARG (sizeof(myargs)/sizeof(myargs[0]))
593 
594 typedef enum {
595 ARG_DATABASE = 0,
596 ARG_QUERY,
597 ARG_WINDOW_SIZE,
598 ARG_THRESHOLD,
599 ARG_EVALUE,
600 ARG_FORMAT,
601 ARG_OUT,
602 ARG_XDROP_UNGAPPED,
603 ARG_MULTIPLEHITS,
604 ARG_FILTER,
605 ARG_GAPOPEN,
606 ARG_GAPEXT,
607 ARG_XDROP_GAPPED,
608 ARG_GAP_TRIGGER,
609 ARG_REQUIRED_START,
610 ARG_REQUIRED_END,
611 ARG_THREADS,
612 ARG_SHOW_GIS,
613 ARG_EVALUE_INCLUSION_THRESHOLD,
614 ARG_PSEUDOCOUNT_CONSTANT,
615 ARG_MAX_PASSES,
616 ARG_BELIEVEQUERY,
617 ARG_XDROP_FINAL,
618 ARG_SEQALIGN_FILE,
619 ARG_MATRIX,
620 ARG_DESCRIPTIONS,
621 ARG_ALIGNMENTS,
622 ARG_CHECKPOINT_OUTPUT,
623 ARG_CHECKPOINT_INPUT,
624 ARG_WORDSIZE,
625 ARG_DBSIZE,
626 ARG_BEST_HITS,
627 ARG_SMITHWATERMAN,
628 ARG_SEARCHSP,
629 ARG_PHI_PROGRAM,
630 ARG_PHI_HITFILE,
631 ARG_HTML,
632 ARG_MATRIX_OUT,
633 ARG_ALIGNMENT_IN,
634 ARG_GILIST,
635 ARG_LCASE,
636 ARG_COMP_BASED_STATS,
637 ARG_SCOREMAT_INPUT,
638 ARG_SCOREMAT_OUTPUT,
639 ARG_COST_DECLINE_ALIGN
640 } BlastpgpArguments;
641 
642 static Args myargs[] = {
643     { "Database",                                                     /* ARG_DATABASE */
644       "nr", NULL, NULL, FALSE, 'd', ARG_STRING, 0.0, 0, NULL},
645     { "Query File (not needed if restarting from scoremat)",          /* ARG_QUERY */
646       "stdin", NULL, NULL, TRUE, 'i', ARG_FILE_IN, 0.0, 0, NULL},
647     { "Multiple Hits window size",                                    /* ARG_WINDOW_SIZE */
648       "40", NULL, NULL, FALSE, 'A', ARG_INT, 0.0, 0, NULL},
649     { "Threshold for extending hits",                                 /* ARG_THRESHOLD */
650       "11", NULL, NULL, FALSE, 'f', ARG_INT, 0.0, 0, NULL},
651     { "Expectation value (E)",                                        /* ARG_EVALUE */
652       "10.0", NULL, NULL, FALSE, 'e', ARG_FLOAT, 0.0, 0, NULL},
653     { "alignment view options:\n0 = pairwise,\n1 = query-anchored showing identities,\n2 = query-anchored no identities,\n3 = flat query-anchored, show identities,\n4 = flat query-anchored, no identities,\n5 = query-anchored no identities and blunt ends,\n6 = flat query-anchored, no identities and blunt ends,\n7 = XML Blast output,\n8 = Tabular output, \n9 = Tabular output with comments\n10 = ASN, text\n11 = ASN, binary",                                 /* ARG_FORMAT */
654       "0", NULL, NULL, FALSE, 'm', ARG_INT, 0.0, 0, NULL},
655     { "Output File for Alignment",                                    /* ARG_OUT */
656       "stdout", NULL, NULL, TRUE, 'o', ARG_FILE_OUT, 0.0, 0, NULL},
657     { "Dropoff (X) for blast extensions in bits (default if zero)",   /* ARG_XDROP_UNGAPPED */
658       "7.0", NULL, NULL, FALSE, 'y', ARG_FLOAT, 0.0, 0, NULL},
659     { "0 for multiple hit, 1 for single hit",                         /* ARG_MULTIPLEHITS */
660        "0",  NULL, NULL, FALSE, 'P', ARG_INT, 0.0, 0, NULL},
661     { "Filter query sequence with SEG",                               /* ARG_FILTER */
662       "F", NULL, NULL, FALSE, 'F', ARG_STRING, 0.0, 0, NULL},
663     { "Cost to open a gap",                                           /* ARG_GAPOPEN */
664       "11", NULL, NULL, FALSE, 'G', ARG_INT, 0.0, 0, NULL},
665     { "Cost to extend a gap",                                         /* ARG_GAPEXT */
666       "1", NULL, NULL, FALSE, 'E', ARG_INT, 0.0, 0, NULL},
667     { "X dropoff value for gapped alignment (in bits)",               /* ARG_XDROP_GAPPED */
668       "15", NULL, NULL, FALSE, 'X', ARG_INT, 0.0, 0, NULL},
669     { "Number of bits to trigger gapping",                            /* ARG_GAP_TRIGGER */
670       "22.0", NULL, NULL, FALSE, 'N', ARG_FLOAT, 0.0, 0, NULL},
671     { "Start of required region in query",                            /* ARG_REQUIRED_START */
672       "1", NULL, NULL, FALSE, 'S', ARG_INT, 0.0, 0, NULL},
673     { "End of required region in query (-1 indicates end of query)",  /* ARG_REQUIRED_END */
674       "-1", NULL, NULL, FALSE, 'H', ARG_INT, 0.0, 0, NULL},
675     { "Number of processors to use",                                  /* ARG_THREADS */
676       "1", NULL, NULL, FALSE, 'a', ARG_INT, 0.0, 0, NULL},
677     { "Show GI's in deflines",                                        /* ARG_SHOW_GIS */
678       "F", NULL, NULL, FALSE, 'I', ARG_BOOLEAN, 0.0, 0, NULL},
679     { "e-value threshold for inclusion in multipass model",           /* ARG_EVALUE_INCLUSION_THRESHOLD */
680       "0.002", NULL, NULL, FALSE, 'h', ARG_FLOAT, 0.0, 0, NULL},
681     { "Constant in pseudocounts for multipass version; 0 uses entropy method; otherwise a value near 30 is recommended",               /* ARG_PSEUDOCOUNT_CONSTANT */
682       "0", NULL, NULL, FALSE, 'c', ARG_INT, 0.0, 0, NULL},
683     { "Maximum number of passes to use in  multipass version",        /* ARG_MAX_PASSES */
684       "1", NULL, NULL, FALSE, 'j', ARG_INT, 0.0, 0, NULL},
685     { "Believe the query defline",                                    /* ARG_BELIEVEQUERY */
686       "F", NULL, NULL, FALSE, 'J', ARG_BOOLEAN, 0.0, 0, NULL},
687     { "X dropoff value for final gapped alignment (in bits)",         /* ARG_XDROP_FINAL */
688       "25", NULL, NULL, FALSE, 'Z', ARG_INT, 0.0, 0, NULL},
689     { "SeqAlign file ('Believe the query defline' must be TRUE)",     /* ARG_SEQALIGN_FILE */
690       NULL, NULL, NULL, TRUE, 'O', ARG_FILE_OUT, 0.0, 0, NULL},
691     { "Matrix",                                                       /* ARG_MATRIX */
692       "BLOSUM62", NULL, NULL, FALSE, 'M', ARG_STRING, 0.0, 0, NULL},
693     { "Number of database sequences to show one-line descriptions for (V)", /* ARG_DESCRIPTIONS */
694       "500", NULL, NULL, FALSE, 'v', ARG_INT, 0.0, 0, NULL},
695     { "Number of database sequence to show alignments for (B)",       /* ARG_ALIGNMENTS */
696       "250", NULL, NULL, FALSE, 'b', ARG_INT, 0.0, 0, NULL},
697     { "Output File for PSI-BLAST Checkpointing",                      /* ARG_CHECKPOINT_OUTPUT */
698       NULL, NULL, NULL, TRUE, 'C', ARG_FILE_OUT, 0.0, 0, NULL},
699     { "Input File for PSI-BLAST Restart",                             /* ARG_CHECKPOINT_INPUT */
700       NULL, NULL, NULL, TRUE, 'R', ARG_FILE_IN, 0.0, 0, NULL},
701     { "Word size",                                                    /* ARG_WORDSIZE */
702       "3", NULL, NULL, FALSE, 'W', ARG_INT, 0.0, 0, NULL},
703     { "Effective length of the database (use zero for the real size)", /* ARG_DBSIZE */
704       "0", NULL, NULL, FALSE, 'z', ARG_FLOAT, 0.0, 0, NULL},
705     { "Number of best hits from a region to keep",                     /* ARG_BEST_HITS */
706       "0", NULL, NULL, FALSE, 'K', ARG_INT, 0.0, 0, NULL},
707     { "Compute locally optimal Smith-Waterman alignments",             /* ARG_SMITHWATERMAN */
708       "F", NULL, NULL, FALSE, 's', ARG_BOOLEAN, 0.0, 0, NULL},
709     { "Effective length of the search space (use zero for the real size)", /* ARG_SEARCHSP */
710       "0", NULL, NULL, FALSE, 'Y', ARG_FLOAT, 0.0, 0, NULL},
711     { "program option for PHI-BLAST",                                  /* ARG_PHI_PROGRAM */
712       "blastpgp", NULL, NULL, FALSE, 'p', ARG_STRING, 0.0, 0, NULL},
713     { "Hit File for PHI-BLAST",                                        /* ARG_PHI_HITFILE */
714       "hit_file", NULL, NULL, FALSE, 'k', ARG_FILE_IN, 0.0, 0, NULL},
715     { "Produce HTML output",                                           /* ARG_HTML */
716       "F", NULL, NULL, FALSE, 'T', ARG_BOOLEAN, 0.0, 0, NULL},
717     { "Output File for PSI-BLAST Matrix in ASCII",                     /* ARG_MATRIX_OUT */
718       NULL, NULL, NULL, TRUE, 'Q', ARG_FILE_OUT, 0.0, 0, NULL},
719     { "Input Alignment File for PSI-BLAST Restart",                    /* ARG_ALIGNMENT_IN */
720       NULL, NULL, NULL, TRUE, 'B', ARG_FILE_IN, 0.0, 0, NULL},
721     { "Restrict search of database to list of GI's",                   /* ARG_GILIST */
722       NULL, NULL, NULL, TRUE, 'l', ARG_STRING, 0.0, 0, NULL},
723     {"Use lower case filtering of FASTA sequence",                     /* ARG_LCASE */
724      "F", NULL,NULL,TRUE,'U',ARG_BOOLEAN, 0.0,0,NULL},
725     { "Use composition based score adjustment\n"                             /* ARG_COMP_BASED_STATS */
726       "As first character:\n"
727       "0 or F or f: no composition-based statistics\n"
728       "2 or T or t: Composition-based score adjustment as in Bioinformatics 21:902-911, 2005, conditioned on sequence properties in round 1\n"
729       "1: Composition-based statistics as in NAR  29:2994--3005, 2001\n"
730       "3: Composition-based score adjustment as in Bioinformatics 21:902-911, "
731       "2005, unconditionally in round 1\n"
732       "As second character, if first character is equivalent to 1, 2, or 3:\n"
733       "U or u: unified p-value combining alignment p-value and "
734       "compositional p-value in round 1 only\n",
735       "2", NULL, NULL, FALSE, 't', ARG_STRING, 0.0, 0, NULL},
736     { "ASN.1 Scoremat input of checkpoint data:\n"
737       "0: no scoremat input\n"
738       "1: Restart is from ASCII scoremat checkpoint file,\n"
739       "2: Restart is from binary scoremat checkpoint file",            /* ARG_SCOREMAT_INPUT */
740       "0", NULL, NULL, TRUE, 'q', ARG_INT, 0.0, 0, NULL},
741     { "ASN.1 Scoremat output of checkpoint data:\n"
742       "0: no scoremat output\n"
743       "1: Output is ASCII scoremat checkpoint file (requires -J),\n"
744       "2: Output is binary scoremat checkpoint file (requires -J)",    /* ARG_SCOREMAT_OUTPUT */
745       "0", NULL, NULL, TRUE, 'u', ARG_INT, 0.0, 0, NULL},
746 #ifdef YES_TO_DECLINE_TO_ALIGN
747     { "Cost to decline alignment (disabled when 0)",                   /* ARG_COST_DECLINE_ALIGN */
748       "0", NULL, NULL, FALSE, 'L', ARG_INT, 0.0, 0, NULL},
749 #endif
750 };
751 
752 typedef struct _pgp_blast_options {
753     BLAST_OptionsBlkPtr options;
754     CharPtr blast_database;
755     BioseqPtr query_bsp, fake_bsp;
756     Int4 number_of_descriptions, number_of_alignments;
757     FILE *infp, *outfp;
758     AsnIoPtr aip_out;
759     Boolean html;
760     Boolean believe_query;
761     Uint4 align_options, print_options;
762     /* PHI-PSI Blast variables */
763     Uint1 featureOrder[FEATDEF_ANY];
764     Uint1 groupOrder[FEATDEF_ANY];
765     Int4 program_flag;
766     CharPtr patfile;
767     FILE *patfp;
768     seedSearchItems *seedSearch;
769     Boolean is_xml_output;
770     Boolean is_asn1_output;
771     char* asn1_mode;  /* "w" or "wb" */
772 } PGPBlastOptions, PNTR PGPBlastOptionsPtr;
773 
PGPGetPrintOptions(Boolean gapped,Uint4Ptr align_options_out,Uint4Ptr print_options_out)774 void PGPGetPrintOptions(Boolean gapped, Uint4Ptr align_options_out,
775                         Uint4Ptr print_options_out)
776 {
777     Uint4 print_options, align_options;
778 
779     print_options = 0;
780     if (gapped == FALSE)
781         print_options += TXALIGN_SHOW_NO_OF_SEGS;
782 
783     align_options = 0;
784     align_options += TXALIGN_COMPRESS;
785     align_options += TXALIGN_END_NUM;
786 
787     if (myargs[ARG_SHOW_GIS].intvalue) {
788         align_options += TXALIGN_SHOW_GI;
789         print_options += TXALIGN_SHOW_GI;
790     }
791 
792     if (myargs[ARG_HTML].intvalue) {
793         align_options += TXALIGN_HTML;
794         print_options += TXALIGN_HTML;
795     }
796 
797     if (myargs[ARG_FORMAT].intvalue != 0) {
798         align_options += TXALIGN_MASTER;
799         if (myargs[ARG_FORMAT].intvalue == 1 || myargs[ARG_FORMAT].intvalue == 3)
800             align_options += TXALIGN_MISMATCH;
801         if (myargs[ARG_FORMAT].intvalue == 3 || myargs[ARG_FORMAT].intvalue == 4 || myargs[ARG_FORMAT].intvalue == 6)
802             align_options += TXALIGN_FLAT_INS;
803         if (myargs[ARG_FORMAT].intvalue == 5 || myargs[ARG_FORMAT].intvalue == 6)
804             align_options += TXALIGN_BLUNT_END;
805     } else {
806         align_options += TXALIGN_MATRIX_VAL;
807         align_options += TXALIGN_SHOW_QS;
808     }
809 
810     *align_options_out = align_options;
811     *print_options_out = print_options;
812 
813     return;
814 }
PGPFreeBlastOptions(PGPBlastOptionsPtr bop)815 void PGPFreeBlastOptions(PGPBlastOptionsPtr bop)
816 {
817 
818     bop->options = BLASTOptionDelete(bop->options);
819 
820     FileClose(bop->infp);
821     FileClose(bop->outfp);
822     bop->aip_out = AsnIoClose(bop->aip_out);
823 
824     MemFree(bop->blast_database);
825     MemFree(bop->patfile);
826     MemFree(bop->seedSearch);
827 
828     MemFree(bop);
829 
830     return;
831 }
832 
PGPReadBlastOptions(void)833 PGPBlastOptionsPtr PGPReadBlastOptions(void)
834 {
835     PGPBlastOptionsPtr bop;
836     BLAST_OptionsBlkPtr options;
837     SeqEntryPtr sep;
838     Boolean is_dna;
839     Int4 align_view;
840 
841     align_view = myargs[ARG_FORMAT].intvalue;
842 
843     bop = MemNew(sizeof(PGPBlastOptions));
844 
845     bop->blast_database   = StringSave(myargs[ARG_DATABASE].strvalue);
846 
847     if (align_view != 7 && align_view != 10 && align_view != 11 && myargs[ARG_OUT].strvalue != NULL) {
848         if ((bop->outfp = FileOpen(myargs[ARG_OUT].strvalue, "w")) == NULL) {
849             ErrPostEx(SEV_FATAL, 1, 0, "blast: Unable to open output "
850                       "file %s\n", myargs[ARG_OUT].strvalue);
851             return NULL;
852         }
853     }
854 
855     bop->number_of_descriptions = myargs[ARG_DESCRIPTIONS].intvalue;
856     bop->number_of_alignments = myargs[ARG_ALIGNMENTS].intvalue;
857 
858     if (myargs[ARG_BELIEVEQUERY].intvalue != 0)
859         bop->believe_query = TRUE;
860 
861     if (myargs[ARG_SCOREMAT_INPUT].intvalue != NO_SCOREMAT_IO &&
862         myargs[ARG_SCOREMAT_INPUT].intvalue != ASCII_SCOREMAT &&
863         myargs[ARG_SCOREMAT_INPUT].intvalue != BINARY_SCOREMAT) {
864         ErrPostEx(SEV_FATAL, 1, 0,"Invalid choice for scoremat input\n");
865         return NULL;
866     }
867     if (myargs[ARG_SCOREMAT_OUTPUT].intvalue != NO_SCOREMAT_IO &&
868         myargs[ARG_SCOREMAT_OUTPUT].intvalue != ASCII_SCOREMAT &&
869         myargs[ARG_SCOREMAT_OUTPUT].intvalue != BINARY_SCOREMAT) {
870         ErrPostEx(SEV_FATAL, 1, 0,"Invalid choice for scoremat output\n");
871         return NULL;
872     }
873     if (myargs[ARG_CHECKPOINT_OUTPUT].strvalue != NULL) {
874         const char * description = NULL;
875         const char * recommended_extension = NULL;
876         const char * actual_extension = NULL;
877 
878         switch (myargs[ARG_SCOREMAT_OUTPUT].intvalue) {
879         case NO_SCOREMAT_IO:
880             description = "standard";
881             recommended_extension = ".chk";
882             break;
883         case ASCII_SCOREMAT:
884             description = "ascii ASN.1";
885             recommended_extension = ".asnt";
886             break;
887         case BINARY_SCOREMAT:
888             description = "binary ASN.1";
889             recommended_extension = ".asn";
890             break;
891         }
892         actual_extension =
893             strrchr(myargs[ARG_CHECKPOINT_OUTPUT].strvalue, '.');
894         if (NULL == actual_extension) { /* No extension */
895             actual_extension = "";
896         }
897         if (0 != strcasecmp(recommended_extension, actual_extension)) {
898             ErrPostEx(SEV_WARNING, 1, 0, "a %s checkpoint file "
899                       "should have extension \"%s\".\n", description,
900                       recommended_extension);
901         }
902     }
903     if (myargs[ARG_SCOREMAT_OUTPUT].intvalue != NO_SCOREMAT_IO) {
904         if (bop->believe_query == FALSE) {
905             ErrPostEx(SEV_FATAL, 1, 0,
906                       "-J option must be TRUE for scoremat output");
907             return NULL;
908         }
909     }
910 
911     if (myargs[ARG_SEQALIGN_FILE].strvalue != NULL) {
912 
913         if (bop->believe_query == FALSE) {
914             ErrPostEx(SEV_FATAL, 1, 0,
915                       "-J option must be TRUE for ASN.1 output");
916             return NULL;
917         }
918 
919         if ((bop->aip_out = AsnIoOpen (myargs[ARG_SEQALIGN_FILE].strvalue,"w")) == NULL) {
920             ErrPostEx(SEV_FATAL, 1, 0, "blast: Unable to open output "
921                       "file %s\n", myargs[ARG_SEQALIGN_FILE].strvalue);
922             return NULL;
923         }
924     }
925     else if (align_view == 10 || align_view == 11)
926     {
927         const char* mode = (align_view == 10) ? "w" : "wb";
928         if (bop->believe_query == FALSE) {
929             ErrPostEx(SEV_FATAL, 1, 0,
930                       "-J option must be TRUE for ASN.1 output");
931             return NULL;
932         }
933         if ((bop->aip_out = AsnIoOpen(myargs[ARG_OUT].strvalue, (char*) mode)) == NULL) {
934                 ErrPostEx(SEV_FATAL, 1, 0, "blast: Unable to open output file %s\n", myargs[ARG_OUT].strvalue);
935                 return NULL;
936         }
937 	bop->is_asn1_output = TRUE;
938         if(align_view == 10)
939             bop->asn1_mode = StringSave("w");
940         else
941             bop->asn1_mode = StringSave("wb");
942     }
943 
944     options = BLASTOptionNew("blastp", TRUE);
945     bop->options = options;
946 
947     /* read the query sequence */
948 
949     if (myargs[ARG_SCOREMAT_INPUT].intvalue == NO_SCOREMAT_IO) {
950         if ((bop->infp = FileOpen(myargs[ARG_QUERY].strvalue, "r")) == NULL) {
951             ErrPostEx(SEV_FATAL, 1, 0, "blast: Unable to open input file %s\n",
952                       myargs[ARG_QUERY].strvalue);
953             return NULL;
954         }
955         if(myargs[ARG_LCASE].intvalue) {
956             if((sep = FastaToSeqEntryForDb (bop->infp, FALSE, NULL, bop->believe_query, NULL, NULL, &options->query_lcase_mask)) == NULL) {
957                 ErrPostEx(SEV_FATAL, 1, 0, "Unable to read input FASTA file\n");
958                 return NULL;
959             }
960         } else {
961             if((sep = FastaToSeqEntryEx(bop->infp, FALSE, NULL, bop->believe_query)) == NULL) {
962                 ErrPostEx(SEV_FATAL, 1, 0, "Unable to read input FASTA file\n");
963                 return NULL;
964             }
965         }
966 
967         SeqEntryExplore(sep, &bop->query_bsp, FindProt);
968         if (bop->query_bsp == NULL) {
969             ErrPostEx(SEV_FATAL, 1, 0, "Unable to obtain bioseq\n");
970             return NULL;
971         }
972     }
973     else {                      /* recover the query sequence from scoremat */
974         AsnIoPtr scorematfile;
975         PssmWithParametersPtr scoremat = NULL;
976 
977         if (myargs[ARG_CHECKPOINT_INPUT].strvalue == NULL) {
978             ErrPostEx(SEV_FATAL, 1, 0, "No restart file specified\n");
979             return NULL;
980         }
981 
982         if (myargs[ARG_SCOREMAT_INPUT].intvalue == ASCII_SCOREMAT)
983             scorematfile = AsnIoOpen(myargs[ARG_CHECKPOINT_INPUT].strvalue, "r");
984         else  /* binary scoremat */
985             scorematfile = AsnIoOpen(myargs[ARG_CHECKPOINT_INPUT].strvalue, "rb");
986 
987         if (scorematfile == NULL) {
988             ErrPostEx(SEV_FATAL, 1, 0, "Unable to open scoremat file\n");
989             return NULL;
990         }
991 
992         scoremat = PssmWithParametersAsnRead(scorematfile, NULL);
993         AsnIoClose(scorematfile);
994         if (scoremat == NULL) {
995             ErrPostEx(SEV_FATAL, 1, 0, "Could not read scoremat "
996                                        "for query sequence\n");
997             return NULL;
998         }
999 
1000         if (scoremat->pssm == NULL ||
1001             scoremat->pssm->query == NULL ||
1002             scoremat->pssm->query->data.ptrvalue == NULL) {
1003             ErrPostEx(SEV_FATAL, 1, 0, "Cannot read query sequence "
1004                                        "from scoremat\n");
1005             return NULL;
1006         }
1007 
1008         /* use the bioseq from 'scoremat'; remove from that
1009            structure so the bioseq is not freed along with the
1010            rest of the scoremat. Note that only the first
1011            bioseq in a bioseq-set is used */
1012 
1013         bop->query_bsp = (BioseqPtr)(scoremat->pssm->query->data.ptrvalue);
1014         scoremat->pssm->query = ValNodeFree(scoremat->pssm->query);
1015         PssmWithParametersFree(scoremat);
1016     }
1017 
1018     /* without a bioseq we cannot continue */
1019     if (bop->query_bsp == NULL) {
1020         ErrPostEx(SEV_FATAL, 1, 0, "Unable to obtain bioseq\n");
1021         return NULL;
1022     }
1023 
1024     /* Set default gap params for matrix. */
1025     BLASTOptionSetGapParams(options, myargs[ARG_MATRIX].strvalue, 0, 0);
1026 
1027     if(myargs[ARG_FORMAT].intvalue == 7) {
1028         bop->is_xml_output = TRUE;
1029     } else if (myargs[ARG_FORMAT].intvalue != 8 && myargs[ARG_FORMAT].intvalue != 9) {
1030         PGPGetPrintOptions(options->gapped_calculation, &bop->align_options,
1031                            &bop->print_options);
1032     }
1033 
1034     /* decrement by one to agree with program values. */
1035     options->required_start = myargs[ARG_REQUIRED_START].intvalue - 1;
1036     options->required_end = myargs[ARG_REQUIRED_END].intvalue;
1037     if (options->required_end != -1) {
1038         options->required_end--;
1039     }
1040 
1041     options->window_size = myargs[ARG_WINDOW_SIZE].intvalue;
1042 
1043     if(myargs[ARG_THRESHOLD].intvalue)
1044         options->threshold_second = (Int4) myargs[ARG_THRESHOLD].intvalue;
1045 
1046     options->dropoff_2nd_pass  = myargs[ARG_XDROP_UNGAPPED].floatvalue;
1047     options->expect_value  = (Nlm_FloatHi) myargs[ARG_EVALUE].floatvalue;
1048     options->kappa_expect_value = options->expect_value;
1049     options->hitlist_size = MAX(bop->number_of_descriptions,
1050                                 bop->number_of_alignments);
1051 
1052     if (myargs[ARG_MULTIPLEHITS].intvalue == 1) {
1053        options->two_pass_method  = FALSE;
1054        options->multiple_hits_only  = FALSE;
1055     } else {
1056        /* all other inputs, including the default 0 use 2-hit method */
1057        options->two_pass_method  = FALSE;
1058        options->multiple_hits_only  = TRUE;
1059     }
1060     options->gap_open = myargs[ARG_GAPOPEN].intvalue;
1061     options->gap_extend = myargs[ARG_GAPEXT].intvalue;
1062 
1063 #ifdef YES_TO_DECLINE_TO_ALIGN
1064     if(myargs[ARG_COST_DECLINE_ALIGN].intvalue != 0) {
1065        options->discontinuous = TRUE;
1066        options->decline_align = myargs[ARG_COST_DECLINE_ALIGN].intvalue;
1067     } else {
1068        options->discontinuous = FALSE;
1069        options->decline_align = INT2_MAX;
1070     }
1071 #endif
1072 
1073     options->gap_x_dropoff = myargs[ARG_XDROP_GAPPED].intvalue;
1074     options->gap_x_dropoff_final = myargs[ARG_XDROP_FINAL].intvalue;
1075     options->gap_trigger = myargs[ARG_GAP_TRIGGER].floatvalue;
1076 
1077     if (StringICmp(myargs[ARG_FILTER].strvalue, "T") == 0) {
1078         options->filter_string = StringSave("S");
1079     } else {
1080         options->filter_string = StringSave(myargs[9].strvalue);
1081     }
1082 
1083     options->number_of_cpus = (Int2) myargs[ARG_THREADS].intvalue;
1084 
1085 
1086     options->isPatternSearch = FALSE;
1087 
1088     if (0 != (StringCmp("blastpgp",myargs[ARG_PHI_PROGRAM].strvalue))) {
1089         options->isPatternSearch = TRUE;
1090         options->discontinuous = FALSE;
1091         options->decline_align = INT2_MAX;
1092         bop->program_flag = convertProgramToFlag(myargs[ARG_PHI_PROGRAM].strvalue,
1093                                                  &is_dna);
1094     }
1095 
1096     if (options->isPatternSearch) {
1097         bop->patfile = StringSave(myargs[ARG_PHI_HITFILE].strvalue);
1098         if ((bop->patfp = FileOpen(bop->patfile, "r")) == NULL) {
1099             ErrPostEx(SEV_FATAL, 1, 0, "blast: Unable to open pattern "
1100                       "file %s\n", bop->patfile);
1101             return NULL;
1102         }
1103 
1104         bop->seedSearch = (seedSearchItems *)
1105             ckalloc(sizeof(seedSearchItems));
1106     }
1107 
1108     if(options->isPatternSearch)
1109         fillCandLambda(bop->seedSearch, myargs[ARG_MATRIX].strvalue, options);
1110 
1111     options->ethresh = (Nlm_FloatHi) myargs[ARG_EVALUE_INCLUSION_THRESHOLD].floatvalue;
1112     options->pseudoCountConst = myargs[ARG_PSEUDOCOUNT_CONSTANT].intvalue;
1113     options->maxNumPasses = myargs[ARG_MAX_PASSES].intvalue;
1114     /*zero out e-value threshold if it will not be used*/
1115     if (options->maxNumPasses == 1)
1116         options->ethresh = 0.0;
1117     if (myargs[ARG_WORDSIZE].intvalue)
1118         options->wordsize = myargs[ARG_WORDSIZE].intvalue;
1119     if (myargs[ARG_DBSIZE].floatvalue)
1120         options->db_length = (Int8) myargs[ARG_DBSIZE].floatvalue;
1121 
1122     options->hsp_range_max  = myargs[ARG_BEST_HITS].intvalue;
1123     if (options->hsp_range_max != 0)
1124         options->perform_culling = TRUE;
1125 
1126     options->block_width  = 20; /* Default value - previously '-L' parameter */
1127 
1128     if (myargs[ARG_SEARCHSP].floatvalue)
1129         options->searchsp_eff = (Nlm_FloatHi) myargs[ARG_SEARCHSP].floatvalue;
1130 
1131     switch (myargs[ARG_COMP_BASED_STATS].strvalue[0]) {
1132     case 'F':
1133     case 'f':
1134     case '0':
1135         options->tweak_parameters = eNoCompositionBasedStats;
1136         break;
1137     case 'T':
1138     case 't':
1139     case '2':
1140         options->tweak_parameters = eCompositionMatrixAdjust;
1141         break;
1142     case '1':
1143         options->tweak_parameters = eCompositionBasedStats;
1144         break;
1145     case '3':
1146         ErrPostEx(SEV_WARNING, 1, 0, "the -t 3 argument "
1147                   "is currently experimental\n");
1148         options->tweak_parameters = eCompoForceFullMatrixAdjust;
1149         break;
1150     default:
1151         ErrPostEx(SEV_FATAL, 1, 0, "invalid argument for composition-"
1152                   "based statistics; see -t options\n");
1153         break;
1154     }
1155     options->unified_p = 0;
1156     if (options->tweak_parameters > eNoCompositionBasedStats) {
1157         switch (myargs[ARG_COMP_BASED_STATS].strvalue[1]) {
1158         case 'U':
1159         case 'u':
1160             options->unified_p = 1;
1161             ErrPostEx(SEV_WARNING, 1, 0, "unified p-values "
1162                       "are currently experimental\n");
1163             break;
1164         case '\0':
1165             break;
1166         default:
1167             ErrPostEx(SEV_WARNING, 1, 0, "unrecognized second character"
1168                       "in value of -t, ignoring it\n");
1169             break;
1170         }
1171     }
1172     if ((options->tweak_parameters > 1) &&
1173         ((NULL != myargs[ARG_CHECKPOINT_INPUT].strvalue) ||
1174          (NULL != myargs[ARG_ALIGNMENT_IN].strvalue))) {
1175         ErrPostEx(SEV_WARNING, 1, 0,
1176                   "-t larger than 1 not supported when restarting "
1177                   "from a checkpoint; setting -t to 1\n");
1178         options->tweak_parameters = 1;
1179     }
1180     options->smith_waterman = (Boolean) myargs[ARG_SMITHWATERMAN].intvalue;
1181 
1182     /* Seting list of gis to restrict search */
1183     if (myargs[ARG_GILIST].strvalue) {
1184         options->gifile = StringSave(myargs[ARG_GILIST].strvalue);
1185     }
1186 
1187     options = BLASTOptionValidate(options, "blastp");
1188 
1189     if (options == NULL)
1190         return NULL;
1191 
1192     if(bop->believe_query) {
1193         bop->fake_bsp = bop->query_bsp;
1194     } else {
1195         bop->fake_bsp = BlastMakeFakeBioseq(bop->query_bsp, NULL);
1196 
1197         BLASTUpdateSeqIdInSeqInt(bop->options->query_lcase_mask,
1198                                  bop->fake_bsp->id);
1199     }
1200 
1201     return bop;
1202 }
1203 
PGPReadNextQuerySequence(PGPBlastOptionsPtr bop)1204 Boolean PGPReadNextQuerySequence(PGPBlastOptionsPtr bop)
1205 {
1206     SeqEntryPtr sep;
1207 
1208     /* Cleaning up stuff from previous query run */
1209 
1210     bop->options->query_lcase_mask = SeqLocSetFree(bop->options->query_lcase_mask);
1211 
1212     if (bop->believe_query == TRUE) { /* Not corect - to be fixed */
1213         BioseqFree(bop->query_bsp);
1214     } else {
1215         BioseqFree(bop->fake_bsp);
1216     }
1217 
1218     /* scoremats contain only one sequence */
1219     if(myargs[ARG_CHECKPOINT_INPUT].intvalue != NO_SCOREMAT_IO) {
1220         return FALSE;
1221     }
1222 
1223     if(myargs[ARG_LCASE].intvalue) {
1224         if((sep = FastaToSeqEntryForDb (bop->infp, FALSE, NULL, bop->believe_query, NULL, NULL, &bop->options->query_lcase_mask)) == NULL) {
1225             return FALSE;
1226         }
1227     } else {
1228         if((sep = FastaToSeqEntryEx(bop->infp, FALSE, NULL,
1229                                     bop->believe_query)) == NULL) {
1230             return FALSE;
1231         }
1232     }
1233 
1234     bop->query_bsp = NULL;
1235     SeqEntryExplore(sep, &bop->query_bsp, FindProt);
1236 
1237     if (bop->query_bsp == NULL) {
1238         ErrPostEx(SEV_FATAL, 1, 0, "Unable to obtain bioseq\n");
1239         return 0;
1240     }
1241 
1242     if(bop->believe_query) {
1243         bop->fake_bsp = bop->query_bsp;
1244     } else {
1245         bop->fake_bsp = BlastMakeFakeBioseq(bop->query_bsp, NULL);
1246 
1247         BLASTUpdateSeqIdInSeqInt(bop->options->query_lcase_mask,
1248                                  bop->fake_bsp->id);
1249     }
1250 
1251     return TRUE;
1252 }
1253 
PGPFormatHeader(PGPBlastOptionsPtr bop)1254 Boolean PGPFormatHeader(PGPBlastOptionsPtr bop)
1255 {
1256     Boolean html = myargs[ARG_HTML].intvalue;
1257 
1258     if (bop->outfp == NULL)
1259 	return FALSE;
1260 
1261     if (html)
1262         fprintf(bop->outfp, "<PRE>\n");
1263 
1264     init_buff_ex(90);
1265     BlastPrintVersionInfo("blastp", html, bop->outfp);
1266     fprintf(bop->outfp, "\n");
1267     BlastPrintReference(html, 90, bop->outfp);
1268     fprintf(bop->outfp, "\n");
1269     if (bop->options->tweak_parameters > 1) {
1270       CAdjustmentPrintReference(html, 90, bop->outfp);
1271       fprintf(bop->outfp, "\n");
1272     }
1273     if ((1== bop->options->tweak_parameters) ||
1274 	((1 < bop->options->tweak_parameters) && (bop->options->maxNumPasses > 1))) {
1275       CBStatisticsPrintReference(html, 90, (1 == bop->options->tweak_parameters),
1276 				 (bop->options->maxNumPasses > 1),  bop->outfp);
1277       fprintf(bop->outfp, "\n");
1278     }
1279     AcknowledgeBlastQuery(bop->query_bsp, 70,
1280                           bop->outfp, bop->believe_query, html);
1281     PrintDbInformation(bop->blast_database, TRUE, 70, bop->outfp, html);
1282     free_buff();
1283 
1284     return TRUE;
1285 }
PGPFormatFooter(PGPBlastOptionsPtr bop,BlastSearchBlkPtr search)1286 Boolean  PGPFormatFooter(PGPBlastOptionsPtr bop, BlastSearchBlkPtr search)
1287 {
1288     ValNodePtr vnp;
1289     BLAST_KarlinBlkPtr ka_params=NULL, ka_params_gap=NULL;
1290     TxDfDbInfoPtr dbinfo=NULL, dbinfo_head;
1291     CharPtr params_buffer=NULL;
1292     ValNodePtr other_returns;
1293     Boolean html = myargs[ARG_HTML].intvalue;
1294 
1295     if (bop->outfp == NULL)
1296 	return FALSE;
1297 
1298     if (html)
1299         fprintf(bop->outfp, "<PRE>\n");
1300 
1301     other_returns = BlastOtherReturnsPrepare(search);
1302     for (vnp=other_returns; vnp; vnp = vnp->next) {
1303         switch (vnp->choice) {
1304         case TXDBINFO:
1305             dbinfo = vnp->data.ptrvalue;
1306             break;
1307         case TXKABLK_NOGAP:
1308             ka_params = vnp->data.ptrvalue;
1309             break;
1310         case TXKABLK_GAP:
1311             ka_params_gap = vnp->data.ptrvalue;
1312             break;
1313         case TXPARAMETERS:
1314             params_buffer = vnp->data.ptrvalue;
1315             break;
1316         default:
1317             break;
1318         }
1319     }
1320     init_buff_ex(85);
1321     dbinfo_head = dbinfo;
1322     while (dbinfo) {
1323         PrintDbReport(dbinfo, 70, bop->outfp);
1324         dbinfo = dbinfo->next;
1325     }
1326     if (ka_params && !bop->options->isPatternSearch) {
1327         PrintKAParameters(ka_params->Lambda, ka_params->K,
1328                           ka_params->H, 70, bop->outfp, FALSE);
1329     }
1330     if (ka_params_gap) {
1331         if (bop->options->isPatternSearch)
1332             PrintKAParametersExtra(ka_params_gap->Lambda,
1333                                    ka_params_gap->K, ka_params_gap->H,
1334                                    bop->seedSearch->paramC, 70,
1335                                    bop->outfp, FALSE);
1336         else
1337             PrintKAParameters(ka_params_gap->Lambda, ka_params_gap->K,
1338                               ka_params_gap->H, 70, bop->outfp,
1339                               FALSE);
1340     }
1341     PrintTildeSepLines(params_buffer, 70, bop->outfp);
1342 
1343     free_buff();
1344     BlastOtherReturnsFree(other_returns);
1345 
1346     return TRUE;
1347 }
1348 
PGPrintPosMatrix(CharPtr filename,posSearchItems * posSearch,compactSearchItems * compactSearch,Boolean posComputationCalled)1349 Boolean PGPrintPosMatrix(CharPtr filename, posSearchItems *posSearch,
1350                          compactSearchItems *compactSearch,
1351                          Boolean posComputationCalled)
1352 {
1353     FILE *fp;
1354 
1355     if ((fp = FileOpen(filename, "w")) == NULL) {
1356         ErrPostEx(SEV_FATAL, 1, 0, "Unable to open matrix output file %s\n",
1357                   filename);
1358         return FALSE;
1359     }
1360 
1361     /* a diagnostic, partly an option with -Q. */
1362     outputPosMatrix(posSearch, compactSearch, fp, posComputationCalled);
1363     FileClose(fp);
1364 
1365     return TRUE;
1366 }
1367 
PGPSeedSearch(PGPBlastOptionsPtr bop,BlastSearchBlkPtr search,posSearchItems * posSearch,SeqLocPtr PNTR seqloc_duplicate,SeqAlignPtr PNTR PNTR lastSeqAligns,Int4Ptr numLastSeqAligns)1368 SeqAlignPtr PGPSeedSearch(PGPBlastOptionsPtr bop, BlastSearchBlkPtr search,
1369                           posSearchItems *posSearch,
1370                           SeqLocPtr PNTR seqloc_duplicate,
1371                           SeqAlignPtr PNTR PNTR lastSeqAligns,
1372                           Int4Ptr numLastSeqAligns)
1373 {
1374     Uint1Ptr query = NULL; /*query sequence read in*/
1375     Uint1Ptr unfilter_query = NULL; /*needed if seg will filter query*/
1376     SeqLocPtr seg_slp;  /*pointer to structure for seg filtering*/
1377     Int4 i, queryLength;   /*length of query sequence*/
1378     SeqAlignPtr head;
1379     SeqAnnotPtr annot;
1380     SeqFeatPtr sfp;
1381     SeqLocPtr seqloc, next;
1382     ValNodePtr seedReturn; /*return value from seedEngineCore, which
1383                              is a list of lists of SeqAligns, one
1384                              list of SeqAligns per pattern occurrence*/
1385 
1386     ValNodePtr vnp, info_vnp;  /* Output text messages from seedEngineCore() */
1387 
1388     query = BlastGetSequenceFromBioseq(bop->fake_bsp, &queryLength);
1389     seg_slp = BlastBioseqFilter(bop->fake_bsp,
1390                                 bop->options->filter_string);
1391     if (seg_slp) {
1392         unfilter_query = MemNew((queryLength + 1) * sizeof(Uint1));
1393         for (i = 0; i < queryLength; i++)
1394             unfilter_query[i] = query[i];
1395         BlastMaskTheResidues(query,queryLength,21,seg_slp,FALSE, 0);
1396     }
1397 
1398     search->gap_align = GapAlignBlkNew(1,1);
1399     search->gap_align->gap_open = bop->options->gap_open;
1400     search->gap_align->gap_extend = bop->options->gap_extend;
1401 
1402 
1403     search->gap_align->decline_align = INT2_MAX;
1404 
1405 #ifdef YES_TO_DECLINE_TO_ALIGN
1406     if(myargs[ARG_COST_DECLINE_ALIGN].intvalue != 0) {
1407         search->gap_align->decline_align = myargs[ARG_COST_DECLINE_ALIGN].intvalue;
1408     } else {
1409         search->gap_align->decline_align = INT2_MAX;
1410     }
1411 #endif
1412 
1413     /* search->gap_align->decline_align = (-(BLAST_SCORE_MIN)); */
1414     /* search->gap_align->decline_align = myargs[ARG_LCASE].intvalue; */
1415 
1416     search->gap_align->x_parameter = bop->options->gap_x_dropoff
1417         *NCBIMATH_LN2/bop->seedSearch->paramLambda;
1418     search->gap_align->matrix = search->sbp->matrix;
1419     initProbs(bop->seedSearch);
1420     init_order(search->gap_align->matrix, bop->program_flag,
1421                FALSE, bop->seedSearch);
1422 
1423     for(i = 0; i < queryLength; i++)
1424         query[i] = bop->seedSearch->order[query[i]];
1425 
1426     if (unfilter_query) {
1427         for(i = 0; i < queryLength; i++)
1428             unfilter_query[i] = bop->seedSearch->order[unfilter_query[i]];
1429     }
1430 
1431     seqloc = NULL;
1432     seedReturn = seedEngineCore(search, bop->options, query,
1433                                 unfilter_query,
1434                                 bop->blast_database, bop->patfile,
1435                                 bop->program_flag,
1436                                 bop->patfp, FALSE, FALSE,
1437                                 bop->seedSearch, bop->options->ethresh,
1438                                 myargs[ARG_SEARCHSP].floatvalue, posSearch,
1439                                 &seqloc, TRUE, &info_vnp);
1440     if (!bop->is_xml_output)
1441        PGPOutTextMessages(info_vnp, bop->outfp);
1442     ValNodeFreeData(info_vnp);
1443 
1444     if (search->error_return) {
1445        BlastErrorPrint(search->error_return);
1446        for (vnp = search->error_return; vnp; vnp = vnp->next) {
1447           BlastDestroyErrorMessage((BlastErrorMsgPtr)vnp->data.ptrvalue);
1448        }
1449        search->error_return = ValNodeFree(search->error_return);
1450     }
1451     *seqloc_duplicate = seqloc;
1452     head = convertValNodeListToSeqAlignList(seedReturn, lastSeqAligns,
1453                                             numLastSeqAligns);
1454 
1455     bop->featureOrder[FEATDEF_REGION] = 1;
1456     bop->groupOrder[FEATDEF_REGION] = 1;
1457     annot = bop->fake_bsp->annot = SeqAnnotNew();
1458     bop->fake_bsp->annot->type = 1;	/* ftable. */
1459     while (seqloc) {
1460         next = seqloc->next;
1461         sfp = SeqFeatNew();
1462         sfp->location = seqloc;
1463         sfp->data.choice = SEQFEAT_REGION;
1464         sfp->data.value.ptrvalue = StringSave("pattern");
1465         annot->data = sfp;
1466         if (next) {
1467             annot->next = SeqAnnotNew();
1468             annot = annot->next;
1469             annot->type = 1;
1470         }
1471         seqloc = next;
1472     }
1473 
1474     if (query != NULL)
1475         MemFree(query);
1476     if (unfilter_query != NULL)
1477         unfilter_query = MemFree(unfilter_query);
1478 
1479     return head;
1480 }
1481 
PGPFormatMainOutput(SeqAlignPtr head,PGPBlastOptionsPtr bop,BlastSearchBlkPtr search,Int4 thisPassNum,SeqAlignPtr PNTR lastSeqAligns,Int4 numLastSeqAligns,SeqLocPtr seed_seqloc,Int2Ptr posRepeatSequences)1482 void PGPFormatMainOutput(SeqAlignPtr head, PGPBlastOptionsPtr bop,
1483                          BlastSearchBlkPtr search, Int4 thisPassNum,
1484                          SeqAlignPtr PNTR lastSeqAligns,
1485                          Int4 numLastSeqAligns, SeqLocPtr seed_seqloc,
1486                          Int2Ptr posRepeatSequences)
1487 {
1488     SeqAnnotPtr seqannot;
1489     ValNodePtr pruneSeed = NULL, seedReturn;
1490     BlastPruneSapStructPtr prune = NULL;
1491     BLAST_MatrixPtr matrix;
1492     Int4Ptr PNTR txmatrix;
1493     BioseqPtr query_bsp;
1494 
1495     if(head == NULL && myargs[ARG_FORMAT].intvalue < 7) {
1496         fprintf(bop->outfp, "\n\n ***** No hits found ******\n\n");
1497         return;
1498     }
1499 
1500     if (myargs[ARG_FORMAT].intvalue == 8 || myargs[ARG_FORMAT].intvalue == 9) {
1501        query_bsp = (bop->believe_query) ? bop->query_bsp : bop->fake_bsp;
1502 	if (myargs[ARG_FORMAT].intvalue == 9)
1503        		PrintTabularOutputHeader(bop->blast_database, query_bsp, NULL,
1504                                 "blastp", thisPassNum, bop->believe_query,
1505                                 bop->outfp);
1506        BlastPrintTabulatedResults(head, query_bsp, NULL,
1507                                   bop->number_of_alignments,
1508                                   "blastp", FALSE,
1509                                   bop->believe_query, 0, 0, bop->outfp, FALSE);
1510        return;
1511     }
1512 
1513     seqannot = SeqAnnotNew();
1514     seqannot->type = 2;
1515     AddAlignInfoToSeqAnnot(seqannot, 2);
1516     seqannot->data = head;
1517 
1518     if (search->pbp->maxNumPasses != 1 && myargs[ARG_FORMAT].intvalue < 7) {
1519         fprintf(bop->outfp, "\nResults from round %d\n",
1520                 thisPassNum);
1521     }
1522 
1523     ObjMgrSetHold();
1524     init_buff_ex(85);
1525 
1526     /* ------- Printing deflines for the BLAST output ------- */
1527 
1528     if (thisPassNum == 1) {
1529         search->positionBased = FALSE;
1530         if (!bop->options->isPatternSearch) {
1531             prune = BlastPruneHitsFromSeqAlign(head,
1532                              bop->number_of_descriptions, NULL);
1533             PrintDefLinesFromSeqAlign(prune->sap, 80, bop->outfp,
1534                                       bop->print_options, FIRST_PASS, NULL);
1535         } else {
1536             seedReturn = convertSeqAlignListToValNodeList(head,lastSeqAligns,
1537                                                           numLastSeqAligns);
1538 
1539             pruneSeed = SeedPruneHitsFromSeedReturn(seedReturn,
1540                                       bop->number_of_descriptions);
1541             PrintDefLinesExtra(pruneSeed, 80, bop->outfp, bop->print_options,
1542                                FIRST_PASS, NULL, seed_seqloc);
1543         }
1544     } else {
1545         prune = BlastPruneHitsFromSeqAlign(head,
1546                               bop->number_of_descriptions, NULL);
1547         if (ALL_ROUNDS) {
1548             PrintDefLinesFromSeqAlign(prune->sap, 80, bop->outfp,
1549                                       bop->print_options,
1550                                       NOT_FIRST_PASS_REPEATS,
1551                                       posRepeatSequences);
1552             PrintDefLinesFromSeqAlign(prune->sap, 80, bop->outfp,
1553                                       bop->print_options, NOT_FIRST_PASS_NEW,
1554                                       posRepeatSequences);
1555         } else
1556             PrintDefLinesFromSeqAlign(prune->sap, 80, bop->outfp,
1557                                       bop->print_options, FIRST_PASS, NULL);
1558     } /*thisPassNum == 1 */
1559 
1560     /* ------- --------------------------------------- ------- */
1561 
1562     if (ALL_ROUNDS && search->posConverged && myargs[ARG_FORMAT].intvalue < 7) {
1563         fprintf(bop->outfp, "\nCONVERGED!\n");
1564     }
1565 
1566     free_buff();
1567 
1568     matrix = NULL;
1569     txmatrix = NULL;
1570     if (search->sbp->matrix) {
1571        matrix = BLAST_MatrixFill(search->sbp, FALSE);
1572        txmatrix = BlastMatrixToTxMatrix(matrix);
1573     }
1574 
1575     if (!(bop->options->isPatternSearch)) {
1576         prune = BlastPruneHitsFromSeqAlign(head, bop->number_of_alignments,
1577                                            prune);
1578         seqannot->data = prune->sap;
1579 
1580         if(bop->options->discontinuous) {
1581             if(!DDV_DisplayBlastPairList(prune->sap, search->mask,
1582                                          bop->outfp, FALSE,
1583                                          bop->align_options,
1584                                          bop->align_options & TXALIGN_HTML ? 6 : 1)) {
1585                 fprintf(stdout,
1586                         "\n\n!!!\n   "
1587                         "    --------  Failure to print alignment...  --------"
1588                         "\n!!!\n\n");
1589                 fflush(stdout);
1590             }
1591         } else {   /* Old type formating */
1592             if (myargs[ARG_FORMAT].intvalue != 0) {
1593                 ShowTextAlignFromAnnot2(seqannot, 60, bop->outfp,
1594                                         bop->featureOrder, bop->groupOrder,
1595                                         bop->align_options, NULL,
1596                                         search->mask, NULL,
1597                                         NULL, NULL);
1598             } else {
1599                 ShowTextAlignFromAnnot2(seqannot, 60, bop->outfp,
1600                                         bop->featureOrder, bop->groupOrder,
1601                                         bop->align_options, txmatrix,
1602                                         search->mask, FormatScoreFunc,
1603                                         NULL, NULL);
1604             }
1605         }
1606 
1607         /* seqannot->data = head; */
1608 
1609     } else {
1610         if (bop->number_of_alignments < bop->number_of_descriptions) {
1611             pruneSeed = SeedPruneHitsFromSeedReturn(pruneSeed,
1612                                                     bop->number_of_alignments);
1613         }
1614 
1615 
1616         if (myargs[ARG_FORMAT].intvalue != 0) {
1617             ShowTextAlignFromAnnotExtra(bop->query_bsp, pruneSeed,
1618                                         seed_seqloc, 60, bop->outfp,
1619                                         bop->featureOrder, bop->groupOrder,
1620                                         bop->align_options, NULL,
1621                                         search->mask, NULL);
1622         } else {
1623             ShowTextAlignFromAnnotExtra(bop->query_bsp, pruneSeed,
1624                                         seed_seqloc, 60, bop->outfp,
1625                                         bop->featureOrder, bop->groupOrder,
1626                                         bop->align_options, txmatrix,
1627                                         search->mask, FormatScoreFunc);
1628         }
1629     }
1630 
1631     if (!(bop->options->isPatternSearch)) {
1632         prune = BlastPruneSapStructDestruct(prune);
1633     }
1634 
1635     search->positionBased = TRUE;
1636     ObjMgrClearHold();
1637     ObjMgrFreeCache(0);
1638 
1639     matrix = BLAST_MatrixDestruct(matrix);
1640     if (txmatrix)
1641        txmatrix = TxMatrixDestruct(txmatrix);
1642 
1643     seqannot->data = NULL;
1644     seqannot = SeqAnnotFree(seqannot);
1645 
1646     return;
1647 }
1648 
PGPSeqAlignOut(PGPBlastOptionsPtr bop,SeqAlignPtr head)1649 void PGPSeqAlignOut(PGPBlastOptionsPtr bop, SeqAlignPtr head)
1650 {
1651     SeqAnnotPtr seqannot;
1652 
1653     if (!bop->aip_out || !head)
1654         return;
1655 
1656     seqannot = SeqAnnotNew();
1657     seqannot->type = 2;
1658     AddAlignInfoToSeqAnnot(seqannot, 2);
1659     seqannot->data = head;
1660     SeqAnnotAsnWrite((SeqAnnotPtr) seqannot, bop->aip_out, NULL);
1661     AsnIoReset(bop->aip_out);
1662 /*
1663     bop->aip_out = AsnIoClose(bop->aip_out);
1664 */
1665     seqannot->data = NULL;
1666     seqannot = SeqAnnotFree(seqannot);
1667 }
1668 
Main(void)1669 Int2 Main (void)
1670 
1671 {
1672     PGPBlastOptionsPtr bop;
1673     BlastSearchBlkPtr search;
1674     SeqAlignPtr  head = NULL;
1675     SeqLocPtr seed_seqloc = NULL;
1676     Char buf[256] = { '\0' };
1677 
1678     /* used for psi-blast */
1679     Int4 thisPassNum;
1680     posSearchItems *posSearch;
1681     compactSearchItems *compactSearch;
1682     Boolean  recoverCheckpoint = FALSE;
1683     Boolean  alreadyRecovered = FALSE;
1684     Boolean  freqCheckpoint = FALSE;
1685     Boolean  alignCheckpoint = FALSE;
1686     Boolean  posComputationCalled = FALSE;
1687     Boolean  checkReturn = FALSE;
1688     Boolean  next_query = FALSE;
1689     Boolean tabular_output;
1690     AsnIoPtr aip = NULL;
1691 
1692     SeqAlignPtr PNTR lastSeqAligns = NULL;
1693                                 /*keeps track of the last SeqAlign in
1694                                   each list of seedReturn so that the
1695                                   2-level list can be converted to a 1-level
1696                                   list and then back to 2-level*/
1697     Int4 numLastSeqAligns = 0;
1698 
1699     PSIXmlPtr psixp = NULL;
1700     ValNodePtr vnp;
1701 
1702     /* ----- End of definitions ----- */
1703 
1704     StringCpy(buf, "blastpgp ");
1705     StringNCat(buf, BlastGetVersionNumber(), sizeof(buf)-StringLen(buf)-1);
1706     if (! GetArgs (buf, NUMARG, myargs))
1707         return (1);
1708     ErrSetMessageLevel(SEV_WARNING);
1709 
1710     UseLocalAsnloadDataAndErrMsg ();
1711 
1712     if (! SeqEntryLoad())
1713         return 1;
1714 
1715     if ((bop = PGPReadBlastOptions()) == NULL)
1716        return 1;
1717 
1718     if(bop->is_xml_output) {
1719        if((aip = AsnIoOpen(myargs[ARG_OUT].strvalue, "wx")) == NULL)
1720           return 1;
1721     }
1722 
1723     /* Here we will start main do/while loop over many query entries in
1724        the input file. If there is an option for checkpoint recover or
1725        Output File for PSI-BLAST Checkpointing all but first entries in
1726        the input file will be discarded */
1727 
1728     do {
1729         search = BLASTSetUpSearchWithReadDb(bop->fake_bsp, "blastp",
1730                                             bop->query_bsp->length,
1731                                             bop->blast_database,
1732                                             bop->options, NULL);
1733 
1734         if (search == NULL)
1735             return 1;
1736 
1737 
1738         if(bop->is_xml_output) {
1739            psixp = PSIXmlInit(aip, "blastp", bop->blast_database,
1740                               bop->options,
1741                               bop->fake_bsp, 0);
1742         }
1743 
1744         /*AAS*/
1745         if ((NULL != myargs[ARG_CHECKPOINT_INPUT].strvalue) || (NULL != myargs[ARG_ALIGNMENT_IN].strvalue)) {
1746             recoverCheckpoint = TRUE;
1747             if (NULL != myargs[ARG_CHECKPOINT_INPUT].strvalue) {
1748                 freqCheckpoint = TRUE;
1749                 alignCheckpoint = FALSE;
1750             } else {
1751                 freqCheckpoint = FALSE;
1752                 alignCheckpoint = TRUE;
1753             }
1754         }
1755 
1756         if (recoverCheckpoint)
1757             search->positionBased = TRUE;
1758         else
1759             search->positionBased = FALSE;
1760 
1761         global_fp = bop->outfp;
1762         tabular_output = (myargs[ARG_FORMAT].intvalue == 8 || myargs[ARG_FORMAT].intvalue == 9);
1763 
1764         if(!bop->is_xml_output && !tabular_output) {
1765             PGPFormatHeader(bop);
1766         }
1767 
1768         posSearch = NULL;
1769         thisPassNum = 0;
1770         compactSearch = NULL;
1771         search->posConverged = FALSE;
1772         global_fp = bop->outfp;
1773         search->error_return = NULL;
1774         /*AAS*/
1775         if (recoverCheckpoint) {
1776             posSearch = (posSearchItems *) MemNew(1 * sizeof(posSearchItems));
1777             compactSearch = compactSearchNew(compactSearch);
1778             copySearchItems(compactSearch, search, bop->options->matrix);
1779             posInitializeInformation(posSearch,search);
1780             /*AAS*/
1781             if (freqCheckpoint) {
1782                 checkReturn = posReadCheckpoint(posSearch, compactSearch, myargs[ARG_CHECKPOINT_INPUT].strvalue, myargs[ARG_SCOREMAT_INPUT].intvalue, &(search->error_return));
1783                 search->sbp->posMatrix = posSearch->posMatrix;
1784                 if (NULL == search->sbp->posFreqs)
1785                     search->sbp->posFreqs =  allocatePosFreqs(compactSearch->qlength, compactSearch->alphabetSize);
1786                 copyPosFreqs(posSearch->posFreqs,search->sbp->posFreqs, compactSearch->qlength, compactSearch->alphabetSize);
1787             } else {
1788                 search->sbp->posMatrix = BposComputation(posSearch, search, compactSearch, myargs[ARG_ALIGNMENT_IN].strvalue, myargs[ARG_CHECKPOINT_OUTPUT].strvalue, myargs[ARG_SCOREMAT_OUTPUT].intvalue, bop->query_bsp, myargs[ARG_GAPOPEN].intvalue, myargs[ARG_GAPEXT].intvalue, &(search->error_return));
1789 		posComputationCalled = TRUE;
1790                 if (NULL == search->sbp->posMatrix)
1791                     checkReturn = FALSE;
1792                 else
1793                     checkReturn = TRUE;
1794             }
1795 
1796 
1797             if (search->error_return) {
1798                BlastErrorPrint(search->error_return);
1799                for (vnp = search->error_return; vnp; vnp = vnp->next) {
1800                   BlastDestroyErrorMessage((BlastErrorMsgPtr)vnp->data.ptrvalue);
1801                }
1802                search->error_return = ValNodeFree(search->error_return);
1803             }
1804             if (!checkReturn) {
1805                 ErrPostEx(SEV_FATAL, 1, 0, "blast: Error recovering from checkpoint");
1806                 return 1;
1807             }
1808 
1809             /* Print out Pos matrix if necessary */
1810             if (myargs[ARG_MATRIX_OUT].strvalue != NULL)
1811                 PGPrintPosMatrix(myargs[ARG_MATRIX_OUT].strvalue, posSearch, compactSearch, posComputationCalled);
1812         }
1813 
1814         do {  /*AAS*/
1815             thisPassNum++;
1816             if (thisPassNum > 1)
1817                 bop->options->isPatternSearch = FALSE;
1818 
1819             if(head != NULL)
1820                 head = SeqAlignSetFree(head);
1821 
1822 #ifdef OS_UNIX
1823             if(!bop->is_xml_output && !tabular_output && !bop->is_asn1_output) {
1824                 search->thr_info->tick_callback =  tick_callback;
1825                 fprintf(global_fp, "%s", "Searching");
1826                 fflush(global_fp);
1827             }
1828 #endif
1829             if (1 == thisPassNum && (!recoverCheckpoint)) {
1830 
1831                 posSearch = (posSearchItems *)
1832                     MemNew(1 * sizeof(posSearchItems));
1833             }
1834 
1835             /* ----- Here is real BLAST search done ------- */
1836 
1837             if (bop->options->isPatternSearch &&
1838                 (1 == thisPassNum && (!recoverCheckpoint))) {
1839                 head = PGPSeedSearch(bop, search, posSearch,
1840                                      &seed_seqloc,
1841                                      &lastSeqAligns, &numLastSeqAligns);
1842             } else {
1843                 if ((bop->options->tweak_parameters > 1) &&
1844                     (1 == thisPassNum && (!recoverCheckpoint))) {
1845                     /* round 1 and not recovering from checkpoint;
1846                      * relax the cutoff evalue so that we don't loose
1847                      * too many hits for which compositional adjustment
1848                      * improves the evalue. */
1849                     bop->options->expect_value =
1850                         bop->options->kappa_expect_value * EVALUE_EXPAND;
1851                     search->pbp->cutoff_e = bop->options->expect_value;
1852                 } else {
1853                     /* For pass > 1, set all expect_values equal */
1854                     search->pbp->cutoff_e =
1855                         bop->options->expect_value =
1856                         bop->options->kappa_expect_value;
1857                 }
1858                 if ((1 == thisPassNum) && (!recoverCheckpoint))
1859                     head = BioseqBlastEngineCore(search, bop->options, NULL);
1860                 else
1861                     head = BioseqBlastEngineCore(search, bop->options,
1862                                                  search->sbp->posMatrix);
1863             }
1864             /* ---------------------------------------------- */
1865 
1866             if (recoverCheckpoint) {
1867                 compactSearchDestruct(compactSearch);
1868                 recoverCheckpoint = FALSE;
1869                 alreadyRecovered = TRUE;
1870             }
1871 
1872             if (thisPassNum == 1) {
1873                 compactSearch = compactSearchNew(compactSearch);
1874             } else {
1875                 MemFree(compactSearch->standardProb);
1876             }
1877 
1878             copySearchItems(compactSearch, search, bop->options->matrix);
1879 
1880 
1881             /* The next two calls (after the "if") are diagnostics
1882                for Stephen. Don't perform this if only one pass will
1883                be made (i.e., standard BLAST) */
1884 
1885             if (ALL_ROUNDS && 1 != search->pbp->maxNumPasses) {
1886                 if ((1 == thisPassNum)  && (!alreadyRecovered))
1887                     posInitializeInformation(posSearch, search);
1888                 posPrintInformation(posSearch, search, thisPassNum);
1889             }
1890 
1891 #ifdef OS_UNIX
1892             if(!bop->is_xml_output && !tabular_output && !bop->is_asn1_output) {
1893                 fprintf(global_fp, "%s", "done\n\n");
1894             }
1895 #endif
1896 
1897             /* AAS */
1898             if (thisPassNum == 1) {
1899                 ReadDBBioseqFetchEnable ("blastpgp", bop->blast_database,
1900                                          FALSE, TRUE);
1901             } else {
1902 
1903                 /* Have things converged? */
1904                 if (ALL_ROUNDS && search->pbp->maxNumPasses != 1) {
1905                     posConvergenceTest(posSearch, search, head, thisPassNum);
1906                 }
1907             }
1908 
1909             /*AAS*/
1910             search->positionBased = TRUE;
1911 
1912             /* Here is all BLAST formating of the main output done */
1913 
1914             if(bop->is_xml_output) {
1915                 ValNodePtr other_returns;
1916                 IterationPtr iterp;
1917                 ValNodePtr vnp = search->mask;
1918                 /* Avoid linking masking locations into other_returns,
1919                    lest they will be freed */
1920                 search->mask = NULL;
1921                 other_returns = BlastOtherReturnsPrepare(search);
1922                 search->mask = vnp;
1923 
1924                 if (head == NULL) {
1925                     iterp = BXMLBuildOneIteration(head, other_returns,
1926                                bop->options->is_ooframe,
1927                                !bop->options->gapped_calculation, thisPassNum,
1928                                "No hits found", search->mask);
1929                 } else {
1930                     BlastPruneSapStructPtr prune =
1931                         BlastPruneHitsFromSeqAlign(head, bop->number_of_alignments, NULL);
1932                     iterp = BXMLBuildOneIteration(prune->sap, other_returns,
1933                                bop->options->is_ooframe,
1934                                !bop->options->gapped_calculation, thisPassNum,
1935                                (search->posConverged ? "CONVERGED" : NULL),
1936                                search->mask);
1937                     prune = BlastPruneSapStructDestruct(prune);
1938                 }
1939 
1940                 IterationAsnWrite(iterp, psixp->aip, psixp->atp);
1941                 AsnIoFlush(psixp->aip);
1942 
1943                 IterationFree(iterp);
1944                 BlastOtherReturnsFree(other_returns);
1945             } else if(!bop->is_asn1_output){
1946                 PGPFormatMainOutput(head, bop, search, thisPassNum,
1947                                     lastSeqAligns, numLastSeqAligns,
1948                                     seed_seqloc, posSearch->posRepeatSequences);
1949             }
1950 
1951             if (alreadyRecovered) {
1952                 posCheckpointFreeMemory(posSearch, compactSearch->qlength);
1953                 alreadyRecovered = FALSE;
1954             }
1955 
1956             if (ALL_ROUNDS && thisPassNum > 1) {
1957                 posCleanup(posSearch, compactSearch);
1958             }
1959 
1960             if (!search->posConverged && (search->pbp->maxNumPasses == 0 ||
1961                                           (thisPassNum < search->pbp->maxNumPasses))) {
1962                 if (ALL_ROUNDS) {
1963                     search->sbp->posMatrix =
1964                         CposComputation(posSearch, search, compactSearch,
1965                                         head, myargs[ARG_CHECKPOINT_OUTPUT].strvalue,
1966                                         (bop->options->isPatternSearch &&
1967                                          (1== thisPassNum)),
1968                                         myargs[ARG_SCOREMAT_OUTPUT].intvalue,
1969                                         bop->query_bsp,
1970                                         myargs[ARG_GAPOPEN].intvalue,
1971                                         myargs[ARG_GAPEXT].intvalue,
1972                                         &(search->error_return), 1.0); /*AAS*/
1973 		    posComputationCalled = TRUE;
1974                     if (search->error_return) {
1975                        BlastErrorPrint(search->error_return);
1976                        for (vnp = search->error_return; vnp; vnp = vnp->next) {
1977                           BlastDestroyErrorMessage((BlastErrorMsgPtr)vnp->data.ptrvalue);
1978                        }
1979                        search->error_return = ValNodeFree(search->error_return);
1980                     }
1981                 } else {
1982                     search->sbp->posMatrix =
1983                         WposComputation(compactSearch, head, search->sbp->posFreqs);
1984 		    posComputationCalled = TRUE;
1985                 }
1986 #if 0
1987                 /* DEBUG Printing of the matrix */
1988                 {{
1989                     Int4 i, j;
1990                     FILE *fd;
1991 
1992                     fd = FileOpen("omatrix.out", "w");
1993                     for(i = 0; i < bop->query_bsp->length; i++) {
1994                         for(j = 0; j < 26; j++) {
1995                             fprintf(fd, "%d ", search->sbp->posMatrix[i][j]);
1996                         }
1997                         fprintf(fd, "\n");
1998                     }
1999                     FileClose(fd);
2000                 }}
2001 #endif
2002             } else {
2003                 search->sbp->posMatrix = NULL;
2004             }
2005 
2006             if (ALL_ROUNDS && thisPassNum > 1) {
2007                 MemFree(posSearch->posRepeatSequences);
2008             }
2009 
2010             if (!search->posConverged && (0 == search->pbp->maxNumPasses ||
2011                                           thisPassNum < search->pbp->maxNumPasses)) {
2012 
2013                 /* Print out pos matrix if necessary */
2014                 if (ALL_ROUNDS && (myargs[ARG_MATRIX_OUT].strvalue != NULL))
2015                     PGPrintPosMatrix(myargs[ARG_MATRIX_OUT].strvalue, posSearch,
2016                                      compactSearch, posComputationCalled);
2017             }
2018 
2019         	if ((bop->is_asn1_output || bop->aip_out != NULL) && head != NULL)
2020             		PGPSeqAlignOut(bop, head);
2021 
2022         } while (( 0 == search->pbp->maxNumPasses || thisPassNum < (search->pbp->maxNumPasses)) && (! (search->posConverged)));
2023 
2024         head = SeqAlignSetFree(head);
2025 
2026         /* Here we will print out footer of BLAST output */
2027 
2028         /*need to temporarily adjust cutoff_e for printing*/
2029         if(!bop->is_xml_output && !tabular_output) {
2030             PGPFormatFooter(bop, search);
2031         }
2032 
2033         /* PGPOneQueryCleanup */
2034 
2035         if (bop->options->isPatternSearch) {
2036             bop->seedSearch = MemFree(bop->seedSearch);
2037             FileClose(bop->patfp);
2038         }
2039 
2040         if (ALL_ROUNDS) {
2041             posFreeInformation(posSearch);
2042             MemFree(posSearch);
2043         }
2044         compactSearchDestruct(compactSearch);
2045         search = BlastSearchBlkDestruct(search);
2046 
2047         /* If these options are set we are not going to proceed with another
2048            queryes in the input file if any */
2049 
2050         if(recoverCheckpoint || myargs[ARG_MATRIX_OUT].strvalue != NULL)
2051             break;
2052 
2053         next_query = FALSE;
2054         next_query = PGPReadNextQuerySequence(bop);
2055 
2056         if (psixp) {
2057            PSIXmlReset(psixp);
2058         }
2059     } while (next_query);       /* End of main do {} while (); loop */
2060 
2061     ReadDBBioseqFetchDisable();
2062 
2063 
2064     if(psixp != NULL) {
2065         PSIXmlClose(psixp);
2066     }
2067 
2068     PGPFreeBlastOptions(bop);
2069 
2070     return 0;
2071 }
2072 
2073 
2074 /* Nothing below this line is executable code */
2075 
2076 #ifdef PRINT_ONLY_ALIGNMENT
2077         {{
2078             AsnIoPtr aip;
2079 
2080             if (seqannot)
2081                 seqannot = SeqAnnotFree(seqannot);
2082 
2083             seqannot = SeqAnnotNew();
2084             seqannot->type = 2;
2085             AddAlignInfoToSeqAnnot(seqannot, 2);
2086             seqannot->data = head;
2087             aip = AsnIoOpen("stdout", "w");
2088             SeqAnnotAsnWrite(seqannot, aip, NULL);
2089             AsnIoReset(aip);
2090             AsnIoClose(aip);
2091 
2092             seqannot->data = NULL;
2093             seqannot = SeqAnnotFree(seqannot);
2094 
2095             return 0;
2096         }}
2097 #endif
2098